Spaces:

Divyansh12
/

OCR_Application

Sleeping

App Files Files Community

Divyansh12 commited on Sep 29, 2024

Commit

fa9edbf

verified ·

1 Parent(s): 429f75e

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -49

app.py CHANGED Viewed

@@ -1,59 +1,129 @@
 import streamlit as st
-from PIL import Image
-import re
 from transformers import AutoModel, AutoTokenizer
-st.set_page_config(page_title="OCR Application", page_icon="🖼️", layout="wide")
-@st.cache_resource
-def load_model():
-    # Load the tokenizer and model for processing images
-    tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
-    model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
-    return tokenizer, model
-def extract_text(image, tokenizer, model):
-    # Preprocess the image and extract text using the model
-    inputs = tokenizer(images=image, return_tensors="pt")  # Adjust based on how the model expects inputs
-    generated_ids = model.generate(**inputs)
-    extracted_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    return extracted_text
-def highlight_matches(text, keywords):
-    # Highlight keywords in the extracted text
-    pattern = re.compile(f"({re.escape(keywords)})", re.IGNORECASE)
-    highlighted_text = pattern.sub(r"<mark>\1</mark>", text)
-    return highlighted_text
-def main():
-    st.title("OCR Text Extractor using Qwen Model")
-    # Load model and tokenizer
-    tokenizer, model = load_model()
-    # Upload Image
-    uploaded_file = st.file_uploader("Upload an image for OCR", type=["png", "jpg", "jpeg"])
-    if uploaded_file:
-        image = Image.open(uploaded_file)
-        st.image(image, caption="Uploaded Image", use_column_width=True)
-        # Extract text from the image using the model
-        with st.spinner("Extracting text from the image..."):
-            extracted_text = extract_text(image, tokenizer, model)
-        st.subheader("Extracted Text")
-        st.text_area("Text from Image", extracted_text, height=300)
-        # Keyword search
-        st.subheader("Keyword Search")
-        keywords = st.text_input("Enter keywords to search:")
-        if st.button("Search"):
-            highlighted_text = highlight_matches(extracted_text, keywords)
-            st.subheader("Search Results")
-            st.markdown(highlighted_text, unsafe_allow_html=True)
-if __name__ == "__main__":
-    main()

 import streamlit as st
 from transformers import AutoModel, AutoTokenizer
+from PIL import Image
+import os
+import base64
+import uuid
+import time
+import shutil
+from pathlib import Path
+# Load tokenizer and model on CPU
+tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
+model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True)
+model = model.eval()  # Use CPU
+# Define folders for uploads and results
+UPLOAD_FOLDER = "./uploads"
+RESULTS_FOLDER = "./results"
+for folder in [UPLOAD_FOLDER, RESULTS_FOLDER]:
+    if not os.path.exists(folder):
+        os.makedirs(folder)
+# Function to run the GOT model
+def run_GOT(image, got_mode, fine_grained_mode="", ocr_color="", ocr_box=""):
+    unique_id = str(uuid.uuid4())
+    image_path = os.path.join(UPLOAD_FOLDER, f"{unique_id}.png")
+    result_path = os.path.join(RESULTS_FOLDER, f"{unique_id}.html")
+    image.save(image_path)
+    try:
+        if got_mode == "plain texts OCR":
+            res = model.chat(tokenizer, image_path, ocr_type='ocr')
+            return res, None
+        elif got_mode == "format texts OCR":
+            res = model.chat(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
+        elif got_mode == "plain multi-crop OCR":
+            res = model.chat_crop(tokenizer, image_path, ocr_type='ocr')
+            return res, None
+        elif got_mode == "format multi-crop OCR":
+            res = model.chat_crop(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
+        elif got_mode == "plain fine-grained OCR":
+            res = model.chat(tokenizer, image_path, ocr_type='ocr', ocr_box=ocr_box, ocr_color=ocr_color)
+            return res, None
+        elif got_mode == "format fine-grained OCR":
+            res = model.chat(tokenizer, image_path, ocr_type='format', ocr_box=ocr_box, ocr_color=ocr_color, render=True, save_render_file=result_path)
+        res_markdown = res
+        if "format" in got_mode and os.path.exists(result_path):
+            with open(result_path, 'r') as f:
+                html_content = f.read()
+            encoded_html = base64.b64encode(html_content.encode('utf-8')).decode('utf-8')
+            iframe_src = f"data:text/html;base64,{encoded_html}"
+            iframe = f'<iframe src="{iframe_src}" width="100%" height="600px"></iframe>'
+            return res_markdown, iframe
+        else:
+            return res_markdown, None
+    except Exception as e:
+        return f"Error: {str(e)}", None
+    finally:
+        if os.path.exists(image_path):
+            os.remove(image_path)
+# Function to clean up old files
+def cleanup_old_files():
+    current_time = time.time()
+    for folder in [UPLOAD_FOLDER, RESULTS_FOLDER]:
+        for file_path in Path(folder).glob('*'):
+            if current_time - file_path.stat().st_mtime > 3600:  # 1 hour
+                file_path.unlink()
+# Streamlit App
+st.set_page_config(page_title="GOT-OCR-2.0 Demo", layout="wide")
+st.markdown("""
+<h2> <span style="color: #ff6600">General OCR Theory</span>: Towards OCR-2.0 via a Unified End-to-end Model</h2>
+<a href="https://huggingface.co/ucaslcl/GOT-OCR2_0">[😊 Hugging Face]</a>
+<a href="https://arxiv.org/abs/2409.01704">[📜 Paper]</a>
+<a href="https://github.com/Ucas-HaoranWei/GOT-OCR2.0/">[🌟 GitHub]</a>
+""", unsafe_allow_html=True)
+st.markdown("""
+"🔥🔥🔥This is the official online demo of the GOT-OCR-2.0 model!!!"
+### Demo Guidelines
+- You need to upload your image below and choose one mode of GOT, then click "Submit" to run the GOT model. More characters will result in longer wait times.
+- **plain texts OCR & format texts OCR**: The two modes are for the image-level OCR.
+- **plain multi-crop OCR & format multi-crop OCR**: For images with more complex content, you can achieve higher-quality results with these modes.
+- **plain fine-grained OCR & format fine-grained OCR**: In these modes, you can specify fine-grained regions on the input image for more flexible OCR. Fine-grained regions can be coordinates of the box, red color, blue color, or green color.
+""")
+uploaded_image = st.file_uploader("Upload your image", type=["png", "jpg", "jpeg"])
+if uploaded_image:
+    image = Image.open(uploaded_image)
+    st.image(image, caption='Uploaded Image', use_column_width=True)
+    got_mode = st.selectbox("Choose one mode of GOT", [
+        "plain texts OCR",
+        "format texts OCR",
+        "plain multi-crop OCR",
+        "format multi-crop OCR",
+        "plain fine-grained OCR",
+        "format fine-grained OCR",
+    ])
+    fine_grained_mode = None
+    ocr_color = ""
+    ocr_box = ""
+    if "fine-grained" in got_mode:
+        fine_grained_mode = st.selectbox("Fine-grained type", ["box", "color"])
+        if fine_grained_mode == "box":
+            ocr_box = st.text_input("Input box: [x1,y1,x2,y2]", value="[0,0,100,100]")
+        elif fine_grained_mode == "color":
+            ocr_color = st.selectbox("Color list", ["red", "green", "blue"])
+    if st.button("Submit"):
+        with st.spinner("Processing..."):
+            result_text, html_result = run_GOT(image, got_mode, fine_grained_mode, ocr_color, ocr_box)
+            st.text_area("GOT Output", result_text, height=200)
+            if html_result:
+                st.markdown(html_result, unsafe_allow_html=True)
+# Cleanup old files
+cleanup_old_files()