Spaces:

Divyansh12
/

OCR_Application

Sleeping

App Files Files Community

Divyansh12 commited on Sep 30, 2024

Commit

2dd690c

verified ·

1 Parent(s): c90dcb5

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -55

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from transformers import AutoModel, AutoTokenizer
 import streamlit as st
 from PIL import Image
@@ -5,61 +6,44 @@ import re
 import os
 import uuid
 # Load the model and tokenizer only once
-if "model" not in st.session_state or "tokenizer" not in st.session_state:
-    @st.cache_resource
-    def load_model(model_name):
-        if model_name == "OCR for English or Hindi (CPU)":
-            tokenizer = AutoTokenizer.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True)
-            model = AutoModel.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
-            model = model.eval()
-        elif model_name == "OCR for English (GPU)":
-            tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
-            model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
-            model = model.eval().to('cuda')
-        return model, tokenizer
-    # Load and store in session state
-    model_option = "OCR for English or Hindi (CPU)"  # Default value for loading purposes
-    model, tokenizer = load_model(model_option)
-    st.session_state["model"] = model
-    st.session_state["tokenizer"] = tokenizer
-else:
-    model = st.session_state["model"]
-    tokenizer = st.session_state["tokenizer"]
 # Function to run the GOT model for multilingual OCR
 def run_ocr(image, model, tokenizer):
-    unique_id = str(uuid.uuid4())
-    image_path = f"{unique_id}.png"
-    # Save image to disk
     image.save(image_path)
     try:
-        # Use the model to extract text from the image
         res = model.chat(tokenizer, image_path, ocr_type='ocr')
-        if isinstance(res, str):
-            return res
-        else:
-            return str(res)
     except Exception as e:
         return f"Error: {str(e)}"
     finally:
-        # Clean up the saved image
-        if os.path.exists(image_path):
-            os.remove(image_path)
 # Function to highlight keyword in text
 def highlight_text(text, search_term):
-    if not search_term:
-        return text
-    pattern = re.compile(re.escape(search_term), re.IGNORECASE)
-    return pattern.sub(lambda m: f'<span style="background-color: red;">{m.group()}</span>', text)
 # Streamlit App
-st.title("GOT-OCR Multilingual Demo")
-st.write("Upload an image for OCR")
 # Create two columns
 col1, col2 = st.columns(2)
@@ -73,30 +57,22 @@ with col1:
 # Right column - Model selection, options, and displaying extracted text
 with col2:
-    model_option = st.selectbox("Select Model", ["OCR for English or Hindi (CPU)", "OCR for English (GPU)"])
-    if st.button("Run OCR"):
-        with st.spinner("Processing..."):
-            # Run OCR and store the result in session state
-            if uploaded_image:
                 result_text = run_ocr(image, model, tokenizer)
                 if "Error" not in result_text:
-                    st.session_state["extracted_text"] = result_text  # Store the result in session state
                 else:
                     st.error(result_text)
-            else:
-                st.error("Please upload an image before running OCR.")
     # Display the extracted text if it exists in session state
     if "extracted_text" in st.session_state:
-        extracted_text = st.session_state["extracted_text"]
-        # Keyword input for search
         search_term = st.text_input("Enter a word or phrase to highlight:")
-        # Highlight keyword in the extracted text
-        highlighted_text = highlight_text(extracted_text, search_term)
-        # Display the highlighted text using markdown
         st.subheader("Extracted Text:")
-        st.markdown(f'<div style="white-space: pre-wrap;">{highlighted_text}</div>', unsafe_allow_html=True)

 from transformers import AutoModel, AutoTokenizer
 import streamlit as st
 from PIL import Image
 import os
 import uuid
+# Set the page layout to wide
+st.set_page_config(layout="wide")
 # Load the model and tokenizer only once
+@st.cache_resource
+def load_model(model_name):
+    if model_name == "OCR on CPU":
+        tokenizer = AutoTokenizer.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True)
+        model = AutoModel.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id).eval()
+    else:
+        tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
+        model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id).eval().to('cuda')
+    return model, tokenizer
+if "model" not in st.session_state or "tokenizer" not in st.session_state:
+    model, tokenizer = load_model("OCR for English or Hindi (CPU)")
+    st.session_state.update({"model": model, "tokenizer": tokenizer})
 # Function to run the GOT model for multilingual OCR
 def run_ocr(image, model, tokenizer):
+    image_path = f"{uuid.uuid4()}.png"
     image.save(image_path)
     try:
         res = model.chat(tokenizer, image_path, ocr_type='ocr')
+        return res if isinstance(res, str) else str(res)
     except Exception as e:
         return f"Error: {str(e)}"
     finally:
+        os.remove(image_path)
 # Function to highlight keyword in text
 def highlight_text(text, search_term):
+    return re.sub(re.escape(search_term), lambda m: f'<span style="background-color: red;">{m.group()}</span>', text, flags=re.IGNORECASE) if search_term else text
 # Streamlit App
+st.title(":blue[Object character recognition Application]")
+st.write("Give your Image")
 # Create two columns
 col1, col2 = st.columns(2)
 # Right column - Model selection, options, and displaying extracted text
 with col2:
+    model_option = st.selectbox("Select Model", ["OCR on CPU", "OCR on GPU"])
+    if st.button("DO OCR "):
+        if uploaded_image:
+            with st.spinner("Processing..."):
+                model, tokenizer = load_model(model_option)
                 result_text = run_ocr(image, model, tokenizer)
                 if "Error" not in result_text:
+                    st.session_state["extracted_text"] = result_text
                 else:
                     st.error(result_text)
+        else:
+            st.error("Please upload an image before running OCR.")
     # Display the extracted text if it exists in session state
     if "extracted_text" in st.session_state:
         search_term = st.text_input("Enter a word or phrase to highlight:")
         st.subheader("Extracted Text:")
+        st.markdown(f'<div style="white-space: pre-wrap;">{highlight_text(st.session_state["extracted_text"], search_term)}</div>', unsafe_allow_html=True)