Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -11,6 +11,7 @@ from pathlib import Path
|
|
11 |
import re
|
12 |
import easyocr
|
13 |
|
|
|
14 |
tokenizer = AutoTokenizer.from_pretrained('RufusRubin777/GOT-OCR2_0_CPU', trust_remote_code=True, device_map='cpu')
|
15 |
model = AutoModel.from_pretrained('RufusRubin777/GOT-OCR2_0_CPU', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cpu', use_safetensors=True)
|
16 |
model = model.eval().cpu()
|
@@ -28,6 +29,7 @@ def image_to_base64(image):
|
|
28 |
image.save(buffered, format="PNG")
|
29 |
return base64.b64encode(buffered.getvalue()).decode()
|
30 |
|
|
|
31 |
# @spaces.GPU
|
32 |
def run_GOT(image,language):
|
33 |
unique_id = str(uuid.uuid4())
|
@@ -58,28 +60,26 @@ def run_GOT(image,language):
|
|
58 |
if os.path.exists(image_path):
|
59 |
os.remove(image_path)
|
60 |
|
61 |
-
#
|
62 |
-
def search_keyword(text,
|
63 |
-
|
64 |
-
return '<h3 style="text-align: center;">Please enter a keyword to search.</h3>'
|
65 |
-
|
66 |
text_lower = text.lower()
|
67 |
keyword_lower = keyword.lower()
|
68 |
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
return
|
83 |
|
84 |
def cleanup_old_files():
|
85 |
current_time = time.time()
|
@@ -93,6 +93,7 @@ title_html = """
|
|
93 |
<p>Scan Master uses General OCR Theory (GOT), a 580M end-to-end OCR 2.0 model for English optical character recognition and EASYOCR for Hindi optical character recognition. It supports plain text ocr.</p>
|
94 |
"""
|
95 |
|
|
|
96 |
with gr.Blocks() as scan_master_web_app:
|
97 |
gr.HTML(title_html)
|
98 |
gr.Markdown("""
|
@@ -141,4 +142,4 @@ with gr.Blocks() as scan_master_web_app:
|
|
141 |
|
142 |
if __name__ == "__main__":
|
143 |
cleanup_old_files()
|
144 |
-
scan_master_web_app.launch()
|
|
|
11 |
import re
|
12 |
import easyocr
|
13 |
|
14 |
+
# OCR Model
|
15 |
tokenizer = AutoTokenizer.from_pretrained('RufusRubin777/GOT-OCR2_0_CPU', trust_remote_code=True, device_map='cpu')
|
16 |
model = AutoModel.from_pretrained('RufusRubin777/GOT-OCR2_0_CPU', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cpu', use_safetensors=True)
|
17 |
model = model.eval().cpu()
|
|
|
29 |
image.save(buffered, format="PNG")
|
30 |
return base64.b64encode(buffered.getvalue()).decode()
|
31 |
|
32 |
+
|
33 |
# @spaces.GPU
|
34 |
def run_GOT(image,language):
|
35 |
unique_id = str(uuid.uuid4())
|
|
|
60 |
if os.path.exists(image_path):
|
61 |
os.remove(image_path)
|
62 |
|
63 |
+
# Search Functionality
|
64 |
+
def search_keyword(text,keyword):
|
65 |
+
# Convert text and keyword to lowercase for case-insensitive search
|
|
|
|
|
66 |
text_lower = text.lower()
|
67 |
keyword_lower = keyword.lower()
|
68 |
|
69 |
+
# Keyword position in the text
|
70 |
+
pos = text_lower.find(keyword_lower)
|
71 |
+
|
72 |
+
if pos == -1:
|
73 |
+
ans = '<h3 style="text-align: center;">'+"Keyword not found"+'</h3>'
|
74 |
+
else:
|
75 |
+
res = [i.start() for i in re.finditer(keyword_lower, text)]
|
76 |
+
ans = '<h3>'
|
77 |
+
l = 0
|
78 |
+
for x in res:
|
79 |
+
ans += text[l:x]+'<mark>'+text[x:x+len(keyword)]+'</mark>'
|
80 |
+
l += len(text[l:x]+text[x:x+len(keyword)])
|
81 |
+
ans += text[l:]+'</h3>'
|
82 |
+
return ans
|
83 |
|
84 |
def cleanup_old_files():
|
85 |
current_time = time.time()
|
|
|
93 |
<p>Scan Master uses General OCR Theory (GOT), a 580M end-to-end OCR 2.0 model for English optical character recognition and EASYOCR for Hindi optical character recognition. It supports plain text ocr.</p>
|
94 |
"""
|
95 |
|
96 |
+
|
97 |
with gr.Blocks() as scan_master_web_app:
|
98 |
gr.HTML(title_html)
|
99 |
gr.Markdown("""
|
|
|
142 |
|
143 |
if __name__ == "__main__":
|
144 |
cleanup_old_files()
|
145 |
+
scan_master_web_app.launch()
|