Spaces:

Divyansh12
/

OCR_Application

Sleeping

App Files Files Community

Divyansh12 commited on Sep 29, 2024

Commit

b5ef879

verified ·

1 Parent(s): 06672c1

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -57

app.py CHANGED Viewed

@@ -1,34 +1,48 @@
-import os
-import streamlit as st
 from transformers import AutoModel, AutoTokenizer
 from PIL import Image
 import uuid
-# Cache the model loading function using @st.cache_resource
-@st.cache_resource
-def load_model(model_name):
-    if model_name == "OCR for english or hindi (runs on CPU)":
-        tokenizer = AutoTokenizer.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True)
-        model = AutoModel.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
-        model.eval()  # Load model on CPU
-    elif model_name == "OCR for english (runs on GPU)":
-        tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
-        model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
-        model.eval().cuda()  # Load model on GPU
-    return tokenizer, model
 # Function to run the GOT model for multilingual OCR
-@st.cache_data
-def run_GOT(_image, _tokenizer, _model):
     unique_id = str(uuid.uuid4())
     image_path = f"{unique_id}.png"
-    _image.save(image_path)  # Save the image using the underscore variable
     try:
-        # Use the model to extract text
-        res = _model.chat(_tokenizer, image_path, ocr_type='ocr')  # Extract plain text
-        return res
     except Exception as e:
         return f"Error: {str(e)}"
     finally:
@@ -37,48 +51,44 @@ def run_GOT(_image, _tokenizer, _model):
             os.remove(image_path)
 # Function to highlight keyword in text
-def highlight_keyword(text, keyword):
-    if keyword:
-        highlighted_text = text.replace(keyword, f"<mark>{keyword}</mark>")
-        return highlighted_text
-    return text
 # Streamlit App
-st.set_page_config(page_title="GOT-OCR Multilingual Demo", layout="wide")
-# Creating two columns
-left_col, right_col = st.columns(2)
-with left_col:
-    uploaded_image = st.file_uploader("Upload your image", type=["png", "jpg", "jpeg"])
-with right_col:
-    # Model selection in the right column
-    model_option = st.selectbox("Select Model", ["OCR for english or hindi (runs on CPU)", "OCR for english (runs on GPU)"])
 if uploaded_image:
     image = Image.open(uploaded_image)
-    with left_col:
-        st.image(image, caption='Uploaded Image', use_column_width=True)
-    with right_col:
-        if st.button("Run OCR"):
-            with st.spinner("Processing..."):
-                # Load the selected model (cached using @st.cache_resource)
-                tokenizer, model = load_model(model_option)
-                # Run OCR and cache the result using @st.cache_data
-                result_text = run_GOT(image, tokenizer, model)  # Pass the original image here
-                if "Error" not in result_text:
-                    # Keyword input for search
-                    keyword = st.text_input("Enter a keyword to highlight")
-                    # Highlight keyword in the extracted text
-                    highlighted_text = highlight_keyword(result_text, keyword)
-                    # Display the extracted text
-                    st.markdown(highlighted_text, unsafe_allow_html=True)
-                else:
-                    st.error(result_text)

 from transformers import AutoModel, AutoTokenizer
+import streamlit as st
 from PIL import Image
+import re
+import os
 import uuid
+# Load the model and tokenizer only once
+if "model" not in st.session_state or "tokenizer" not in st.session_state:
+    @st.cache_resource
+    def load_model(model_name):
+        if model_name == "OCR for English or Hindi (CPU)":
+            tokenizer = AutoTokenizer.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True)
+            model = AutoModel.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
+            model = model.eval()
+        elif model_name == "OCR for English (GPU)":
+            tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
+            model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
+            model = model.eval().to('cuda')
+        return model, tokenizer
+    # Load and store in session state
+    model_option = st.selectbox("Select Model", ["OCR for English or Hindi (CPU)", "OCR for English (GPU)"])
+    model, tokenizer = load_model(model_option)
+    st.session_state["model"] = model
+    st.session_state["tokenizer"] = tokenizer
+else:
+    model = st.session_state["model"]
+    tokenizer = st.session_state["tokenizer"]
 # Function to run the GOT model for multilingual OCR
+def run_ocr(image, model, tokenizer):
     unique_id = str(uuid.uuid4())
     image_path = f"{unique_id}.png"
+    # Save image to disk
+    image.save(image_path)
     try:
+        # Use the model to extract text from the image
+        res = model.chat(tokenizer, image_path, ocr_type='ocr')
+        if isinstance(res, str):
+            return res
+        else:
+            return str(res)
     except Exception as e:
         return f"Error: {str(e)}"
     finally:
             os.remove(image_path)
 # Function to highlight keyword in text
+def highlight_text(text, search_term):
+    if not search_term:
+        return text
+    pattern = re.compile(re.escape(search_term), re.IGNORECASE)
+    return pattern.sub(lambda m: f'<span style="background-color: yellow;">{m.group()}</span>', text)
 # Streamlit App
+st.title("GOT-OCR Multilingual Demo")
+st.write("Upload an image for OCR")
+# Upload image
+uploaded_image = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg"])
 if uploaded_image:
     image = Image.open(uploaded_image)
+    st.image(image, caption='Uploaded Image', use_column_width=True)
+    if st.button("Run OCR"):
+        with st.spinner("Processing..."):
+            # Run OCR and store the result in session state
+            result_text = run_ocr(image, model, tokenizer)
+            if "Error" not in result_text:
+                st.session_state["extracted_text"] = result_text  # Store the result in session state
+            else:
+                st.error(result_text)
+# Display the extracted text if it exists in session state
+if "extracted_text" in st.session_state:
+    extracted_text = st.session_state["extracted_text"]
+    st.subheader("Extracted Text:")
+    st.text(extracted_text)  # Display the raw extracted text
+    # Keyword input for search
+    search_term = st.text_input("Enter a word or phrase to highlight:")
+    # Highlight keyword in the extracted text
+    if search_term:
+        highlighted_text = highlight_text(extracted_text, search_term)
+        # Display the highlighted text using markdown
+        st.markdown(highlighted_text, unsafe_allow_html=True)