justin4602 commited on
Commit
301d77a
Β·
verified Β·
1 Parent(s): 74519d3

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +7 -7
  2. app.py +119 -0
  3. requirements.txt +8 -0
README.md CHANGED
@@ -1,12 +1,12 @@
1
  ---
2
- title: Ocr
3
- emoji: πŸ“‰
4
- colorFrom: yellow
5
- colorTo: green
6
  sdk: streamlit
7
- sdk_version: 1.38.0
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: OCR Tool GOT OCR 2
3
+ emoji: πŸš€
4
+ colorFrom: blue
5
+ colorTo: blue
6
  sdk: streamlit
 
7
  app_file: app.py
8
  pinned: false
9
+ short_description: tool to extract text from image with keyword search option
10
  ---
11
+ # OCR-Tool-
12
+ A ocr tool to extract text from images with keyword search using GOT 2.0
app.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModel, AutoTokenizer, Qwen2VLForConditionalGeneration, AutoProcessor
2
+ import streamlit as st
3
+ import os
4
+ from PIL import Image
5
+ import requests
6
+ import torch
7
+ import json
8
+ from torchvision import io
9
+ from typing import Dict
10
+ import re
11
+
12
+ @st.cache_resource
13
+ def init_model():
14
+ tokenizer = AutoTokenizer.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True)
15
+ model = AutoModel.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
16
+ model = model.eval()
17
+ return model, tokenizer
18
+
19
+ def init_gpu_model():
20
+ tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
21
+ model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
22
+ model = model.eval().cuda()
23
+ return model, tokenizer
24
+
25
+ def init_qwen_model():
26
+ model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", device_map="cpu", torch_dtype=torch.float16)
27
+ processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
28
+ return model, processor
29
+
30
+ def get_quen_op(image_file, model, processor):
31
+ try:
32
+ image = Image.open(image_file).convert('RGB')
33
+ conversation = [
34
+ {
35
+ "role":"user",
36
+ "content":[
37
+ {
38
+ "type":"image",
39
+ },
40
+ {
41
+ "type":"text",
42
+ "text":"Extract text from this image."
43
+ }
44
+ ]
45
+ }
46
+ ]
47
+ text_prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
48
+ inputs = processor(text=[text_prompt], images=[image], padding=True, return_tensors="pt")
49
+ inputs = {k: v.to(torch.float32) if torch.is_floating_point(v) else v for k, v in inputs.items()}
50
+
51
+ generation_config = {
52
+ "max_new_tokens": 32,
53
+ "do_sample": False,
54
+ "top_k": 20,
55
+ "top_p": 0.90,
56
+ "temperature": 0.4,
57
+ "num_return_sequences": 1,
58
+ "pad_token_id": processor.tokenizer.pad_token_id,
59
+ "eos_token_id": processor.tokenizer.eos_token_id,
60
+ }
61
+
62
+ output_ids = model.generate(**inputs, **generation_config)
63
+ if 'input_ids' in inputs:
64
+ generated_ids = output_ids[:, inputs['input_ids'].shape[1]:]
65
+ else:
66
+ generated_ids = output_ids
67
+
68
+ output_text = processor.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
69
+
70
+ return output_text[:] if output_text else "No text extracted from the image."
71
+
72
+ except Exception as e:
73
+ return f"An error occurred: {str(e)}"
74
+
75
+ @st.cache_data
76
+ def get_text(image_file, _model, _tokenizer):
77
+ res = _model.chat(_tokenizer, image_file, ocr_type='ocr')
78
+ return res
79
+
80
+ def highlight_text(text, search_term):
81
+ if not search_term:
82
+ return text
83
+ pattern = re.compile(re.escape(search_term), re.IGNORECASE)
84
+ return pattern.sub(lambda m: f'<span style="background-color: grey;">{m.group()}</span>', text)
85
+
86
+ def save_text_to_json(file_name, text_data):
87
+ """Save the extracted text into a JSON file."""
88
+ with open(file_name, 'w') as json_file:
89
+ json.dump({"extracted_text": text_data}, json_file, indent=4)
90
+ st.success(f"Text saved to {file_name}")
91
+
92
+ st.title("Extract text from the image using - GOT-OCR2.0 and search keyword")
93
+ st.write("Upload an image")
94
+
95
+ MODEL, PROCESSOR = init_model()
96
+
97
+ image_file = st.file_uploader("Upload Image", type=['jpg', 'png', 'jpeg'])
98
+
99
+ if image_file:
100
+ if not os.path.exists("images"):
101
+ os.makedirs("images")
102
+ with open(f"images/{image_file.name}", "wb") as f:
103
+ f.write(image_file.getbuffer())
104
+
105
+ image_file = f"images/{image_file.name}"
106
+
107
+ text = get_text(image_file, MODEL, PROCESSOR)
108
+
109
+ print(text)
110
+
111
+ # Add search functionality
112
+ search_term = st.text_input("Enter a word or phrase to search:")
113
+ highlighted_text = highlight_text(text, search_term)
114
+
115
+ st.markdown(highlighted_text, unsafe_allow_html=True)
116
+
117
+ # Save the extracted text in JSON
118
+ json_file_path = f"{image_file}_extracted.json"
119
+ save_text_to_json(json_file_path, text)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ transformers==4.45.0
2
+ streamlit==1.30.0
3
+ torch --index-url https://download.pytorch.org/whl/cpu
4
+ torchvision --index-url https://download.pytorch.org/whl/cpu
5
+ tiktoken
6
+ verovio
7
+ accelerate==0.28.0
8
+ Pillow==10.3.0