File size: 5,735 Bytes
e79d672
8b78611
 
0058c86
 
0ff7c49
e79d672
8b78611
 
 
d6f9dda
e79d672
8b78611
b1259cb
86cdd06
0058c86
7d3d794
0058c86
c163620
0058c86
8e0e6ba
0058c86
c163620
0058c86
8e0e6ba
0058c86
c163620
0058c86
76c2592
0058c86
c163620
0058c86
b24ce56
c163620
0ff7c49
7d3d794
 
e79d672
0058c86
 
8b78611
 
 
0058c86
8b78611
0058c86
8b78611
 
0058c86
 
8b78611
0058c86
 
 
 
 
 
 
 
 
 
 
 
 
e79d672
797ab4e
2c38d24
797ab4e
 
 
 
 
8b78611
797ab4e
8b78611
 
 
797ab4e
8b78611
 
 
 
112e19a
8b78611
 
0058c86
 
 
 
 
 
8b78611
0058c86
 
8b78611
0058c86
 
 
8b78611
 
0058c86
8b78611
0058c86
8b78611
 
0058c86
 
 
8b78611
 
0058c86
8b78611
 
112e19a
297e0ee
c5ad0b2
a4f0490
d6f9dda
c5ad0b2
e3bb044
0058c86
 
 
7eb334c
 
 
 
7faa25f
7eb334c
 
0058c86
7eb334c
7d3d794
 
0058c86
 
7eb334c
8b78611
0058c86
8b78611
0058c86
 
 
 
 
 
8b78611
 
 
0058c86
b1259cb
7d3d794
8b78611
e79d672
ed2ee0e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import gradio as gr
import spaces
from transformers import AutoModel, AutoTokenizer
from PIL import Image
import numpy as np
import os

tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True)
model = model.eval().cuda()
html_file = './demo.html'

@spaces.GPU
def run_GOT(image_array, got_mode, fine_grained_mode="", ocr_color="", ocr_box=""):
    image = image_array
    if got_mode == "plain texts OCR":
        res = model.chat(tokenizer, image, ocr_type='ocr')
    elif got_mode == "format texts OCR":
        res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=html_file)
    elif got_mode == "plain multi-crop OCR":
        res = model.chat_crop(tokenizer, image, ocr_type='ocr')
    elif got_mode == "format multi-crop OCR":
        res = model.chat_crop(tokenizer, image, ocr_type='format', render=True, save_render_file=html_file)
    elif got_mode == "plain fine-grained OCR":
        res = model.chat(tokenizer, image, ocr_type='ocr', ocr_box=ocr_box, ocr_color=ocr_color)
    elif got_mode == "format fine-grained OCR":
        res = model.chat(tokenizer, image, ocr_type='format', ocr_box=ocr_box, ocr_color=ocr_color, render=True, save_render_file=html_file)

    print("res:\n", res)
    if "format" in got_mode:
        with open(html_file, 'r') as f:
            demo_html = f.read()
        demo_html = demo_html.replace("https://cdn.jsdelivr.net/npm/[email protected]/es5/bundle.js", "assets/bundle.js")
        print("demo_html: \n", demo_html)
        print(os.path.abspath(html_file))
        return res, demo_html
    return res, None

def task_update(task):
    if "fine-grained" in task:
        return [
            gr.update(visible=True),
            gr.update(visible=False),
            gr.update(visible=False),
        ]
    else:
        return [
            gr.update(visible=False),
            gr.update(visible=False),
            gr.update(visible=False),
        ]

def fine_grained_update(task):
    if task == "box":
        return [
            gr.update(visible=False, value = ""),
            gr.update(visible=True),
        ]
    elif task == 'color':
        return [
            gr.update(visible=True),
            gr.update(visible=False, value = ""),
        ]


title_html = """
<h2> <span class="gradient-text" id="text">General OCR Theory</span><span class="plain-text">: Towards OCR-2.0 via a Unified End-to-end Model</span></h2>
<a href="https://huggingface.co/ucaslcl/GOT-OCR2_0">[😊 Hugging Face]</a> 
<a href="https://arxiv.org/abs/2409.01704">[πŸ“œ Paper]</a> 
<a href="https://github.com/Ucas-HaoranWei/GOT-OCR2.0/">[🌟 GitHub]</a> 
"""

with gr.Blocks() as demo:
    gr.HTML(title_html)
    gr.Markdown("""
    "πŸ”₯πŸ”₯πŸ”₯This is the official online demo of GOT-OCR-2.0 model!!!"
    
    ### Demo Guidelines
    """)
    
    with gr.Row():
        with gr.Column():
            image_input = gr.Image(type="filepath", label="upload your image")
            task_dropdown = gr.Dropdown(
                choices=[
                    "plain texts OCR",
                    "format texts OCR",
                    "plain multi-crop OCR",
                    "format multi-crop OCR",
                    "plain fine-grained OCR",
                    "format fine-grained OCR",
                ],
                label="Choose one mode of GOT",
                value="plain texts OCR"
            )
            fine_grained_dropdown = gr.Dropdown(
                choices=["box", "color"],
                label="fine-grained type",
                visible=False
            )
            color_dropdown = gr.Dropdown(
                choices=["red", "green", "blue"],
                label="color list",
                visible=False
            )
            box_input = gr.Textbox(
                label="input box: [x1,y1,x2,y2]",
                placeholder="e.g., [0,0,100,100]",
                visible=False
            )
            submit_button = gr.Button("Submit")
        
        with gr.Column():
            ocr_result = gr.Textbox(label="GOT output")

    with gr.Column():
        html_show = gr.HTML(f'<a href="/home/user/app/demo.html" target="_blank">Open Demo HTML</a>')
    with gr.Column():
        html_result = gr.HTML(
                label="rendered html", show_label=True)
    
    gr.Examples(
        examples=[
            ["assets/coco.jpg", "plain texts OCR", "", "", ""],
            ["assets/en2.png", "plain texts OCR", "", "", ""],
            ["assets/eq.jpg", "format texts OCR", "", "", ""],
            ["assets/table.jpg", "format texts OCR", "", "", ""],
            ["assets/giga.jpg", "format multi-crop OCR", "", "", ""],
            ["assets/aff2.png", "plain fine-grained OCR", "box", "", "[409,763,756,891]"],
            ["assets/color.png", "plain fine-grained OCR", "color", "red", ""],
        ],
        inputs=[image_input, task_dropdown, fine_grained_dropdown, color_dropdown, box_input],
        outputs=[ocr_result, html_result],
        fn = run_GOT,
        label="examples",
    )

    task_dropdown.change(
        task_update,
        inputs=[task_dropdown],
        outputs=[fine_grained_dropdown, color_dropdown, box_input]
    )
    fine_grained_dropdown.change(
        fine_grained_update,
        inputs=[fine_grained_dropdown],
        outputs=[color_dropdown, box_input]
    )
    
    submit_button.click(
        run_GOT,
        inputs=[image_input, task_dropdown, fine_grained_dropdown, color_dropdown, box_input],
        outputs=[ocr_result, html_result]
    )

demo.launch()