Pytesseract-PytesseractJs-LLM-OCR

Sleeping

File size: 3,175 Bytes

import os
import gradio as gr
import pytesseract

from Plan.AiLLM import llm_recognition
from Plan.pytesseractOCR import ocr_recognition
from Preprocess.preprocessImg import preprocess_image001

langs = []

choices = os.popen('tesseract --list-langs').read().split('\n')[1:-1]

# If you don't have tesseract executable in your PATH, include the following:
# pytesseract.pytesseract.tesseract_cmd = r'<full_path_to_your_tesseract_executable>'
# Example tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract'

# Simple image to string
# print(pytesseract.image_to_string(Image.open('eurotext.png')))

# # French text image to string
# print(pytesseract.image_to_string(Image.open('test-european.jpg'), lang='fra'))

# # Get bounding box estimates
# print(pytesseract.image_to_boxes(Image.open('test.png')))

# # Get verbose data including boxes, confidences, line and page numbers
# print(pytesseract.image_to_data(Image.open('test.png')))

# # Get information about orientation and script detection
# print(pytesseract.image_to_osd(Image.open('test.png'))


# 取得所有語言清單
languages = os.popen('tesseract --list-langs').read().split('\n')[1:-1]

print(' ======================================================== ')
# print(' ###### choices:' + choices)
# print(' ###### GET ENV - TESSDATA_PREFIX:' + os.getenv('TESSDATA_PREFIX'))
# print(' ###### OS - TESSDATA_PREFIX:' + os.environ['TESSDATA_PREFIX'])
# os.environ['TESSDATA_PREFIX'] = os.getenv('TESSDATA_PREFIX')
# print(' ###### Tesseract_Cmd:' + pytesseract.pytesseract.tesseract_cmd)
# pytesseract.pytesseract.tesseract_cmd = os.getenv('TESSDATA_PREFIX')
print(' ======================================================== ')


def preprocess_and_ocr(image, validation_type, language):
    preprocessed_image = preprocess_image001(image)
    ocr_result = ocr_recognition(preprocessed_image, validation_type, language)
    return preprocessed_image, ocr_result


def preprocess_and_llm(image, validation_type, language):
    preprocessed_image = preprocess_image001(image)
    llm_result = llm_recognition(preprocessed_image, validation_type, language)
    return preprocessed_image, llm_result


with gr.Blocks() as demo:
    with gr.Row():
        image_input = gr.Image(type="pil", label="上傳圖片")
        preprocess_output = gr.Image(type="pil", label="預處理後的圖片")

    with gr.Row():
        validation_type = gr.Dropdown(choices=["身分證正面", "身分證反面"], label="驗證類別")
        language_dropdown = gr.Dropdown(choices=languages, value="chi_tra", label="語言")

    with gr.Row():
        ocr_button = gr.Button("使用 OCR")
        llm_button = gr.Button("使用 AI LLM")

    with gr.Row():
        ocr_output = gr.JSON(label="OCR 解析結果")
        llm_output = gr.JSON(label="AI LLM 解析結果")

    ocr_button.click(preprocess_and_ocr, inputs=[image_input, validation_type, language_dropdown],
                     outputs=[preprocess_output, ocr_output])
    llm_button.click(preprocess_and_llm, inputs=[image_input, validation_type, language_dropdown],
                     outputs=[preprocess_output, llm_output])

demo.launch(share=False)