Divyansh12 commited on
Commit
2d8087a
·
verified ·
1 Parent(s): b5ef879

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -23
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from transformers import AutoModel, AutoTokenizer
2
  import streamlit as st
3
  from PIL import Image
4
  import re
@@ -20,7 +20,7 @@ if "model" not in st.session_state or "tokenizer" not in st.session_state:
20
  return model, tokenizer
21
 
22
  # Load and store in session state
23
- model_option = st.selectbox("Select Model", ["OCR for English or Hindi (CPU)", "OCR for English (GPU)"])
24
  model, tokenizer = load_model(model_option)
25
  st.session_state["model"] = model
26
  st.session_state["tokenizer"] = tokenizer
@@ -61,34 +61,42 @@ def highlight_text(text, search_term):
61
  st.title("GOT-OCR Multilingual Demo")
62
  st.write("Upload an image for OCR")
63
 
64
- # Upload image
65
- uploaded_image = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg"])
 
 
 
 
 
 
 
66
 
67
- if uploaded_image:
68
- image = Image.open(uploaded_image)
69
- st.image(image, caption='Uploaded Image', use_column_width=True)
70
 
71
  if st.button("Run OCR"):
72
  with st.spinner("Processing..."):
73
  # Run OCR and store the result in session state
74
- result_text = run_ocr(image, model, tokenizer)
75
- if "Error" not in result_text:
76
- st.session_state["extracted_text"] = result_text # Store the result in session state
 
 
 
77
  else:
78
- st.error(result_text)
79
 
80
- # Display the extracted text if it exists in session state
81
- if "extracted_text" in st.session_state:
82
- extracted_text = st.session_state["extracted_text"]
83
 
84
- st.subheader("Extracted Text:")
85
- st.text(extracted_text) # Display the raw extracted text
86
-
87
- # Keyword input for search
88
- search_term = st.text_input("Enter a word or phrase to highlight:")
89
-
90
- # Highlight keyword in the extracted text
91
- if search_term:
92
  highlighted_text = highlight_text(extracted_text, search_term)
 
93
  # Display the highlighted text using markdown
94
- st.markdown(highlighted_text, unsafe_allow_html=True)
 
 
1
+ ffrom transformers import AutoModel, AutoTokenizer
2
  import streamlit as st
3
  from PIL import Image
4
  import re
 
20
  return model, tokenizer
21
 
22
  # Load and store in session state
23
+ model_option = "OCR for English or Hindi (CPU)" # Default value for loading purposes
24
  model, tokenizer = load_model(model_option)
25
  st.session_state["model"] = model
26
  st.session_state["tokenizer"] = tokenizer
 
61
  st.title("GOT-OCR Multilingual Demo")
62
  st.write("Upload an image for OCR")
63
 
64
+ # Create two columns
65
+ col1, col2 = st.columns(2)
66
+
67
+ # Left column - Display the uploaded image
68
+ with col1:
69
+ uploaded_image = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg"])
70
+ if uploaded_image:
71
+ image = Image.open(uploaded_image)
72
+ st.image(image, caption='Uploaded Image', use_column_width=True)
73
 
74
+ # Right column - Model selection, options, and displaying extracted text
75
+ with col2:
76
+ model_option = st.selectbox("Select Model", ["OCR for English or Hindi (CPU)", "OCR for English (GPU)"])
77
 
78
  if st.button("Run OCR"):
79
  with st.spinner("Processing..."):
80
  # Run OCR and store the result in session state
81
+ if uploaded_image:
82
+ result_text = run_ocr(image, model, tokenizer)
83
+ if "Error" not in result_text:
84
+ st.session_state["extracted_text"] = result_text # Store the result in session state
85
+ else:
86
+ st.error(result_text)
87
  else:
88
+ st.error("Please upload an image before running OCR.")
89
 
90
+ # Display the extracted text if it exists in session state
91
+ if "extracted_text" in st.session_state:
92
+ extracted_text = st.session_state["extracted_text"]
93
 
94
+ # Keyword input for search
95
+ search_term = st.text_input("Enter a word or phrase to highlight:")
96
+
97
+ # Highlight keyword in the extracted text
 
 
 
 
98
  highlighted_text = highlight_text(extracted_text, search_term)
99
+
100
  # Display the highlighted text using markdown
101
+ st.subheader("Extracted Text:")
102
+ st.markdown(f'<div style="white-space: pre-wrap;">{highlighted_text}</div>', unsafe_allow_html=True)