GAS17 commited on
Commit
9204aaf
·
verified ·
1 Parent(s): b3e1fb5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -24
app.py CHANGED
@@ -1,34 +1,49 @@
1
  import gradio as gr
2
  import io
3
- from doctr.io import DocumentFile
4
- from doctr.models import ocr_predictor
 
 
 
 
 
 
 
 
5
 
6
  # Initialize the OCR model
7
- model = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True)
 
 
 
 
8
 
9
  def ocr_process(file):
10
- # Read the uploaded file
11
- if file.name.lower().endswith('.pdf'):
12
- doc = DocumentFile.from_pdf(file.name)
13
- else:
14
- # Assume it's an image if not PDF
15
- image_stream = io.BytesIO(file.read())
16
- doc = DocumentFile.from_images(image_stream)
17
-
18
- # Perform OCR
19
- result = model(doc)
20
-
21
- # Extract text from the result
22
- extracted_text = ""
23
- for page in result.pages:
24
- for block in page.blocks:
25
- for line in block.lines:
26
- for word in line.words:
27
- extracted_text += word.value + " "
 
 
28
  extracted_text += "\n"
29
- extracted_text += "\n"
30
-
31
- return extracted_text.strip()
 
32
 
33
  # Create Gradio interface
34
  iface = gr.Interface(
 
1
  import gradio as gr
2
  import io
3
+ import sys
4
+
5
+ try:
6
+ from doctr.io import DocumentFile
7
+ from doctr.models import ocr_predictor
8
+ except ImportError:
9
+ print("Error: Failed to import doctr. Please ensure it's installed correctly.")
10
+ print("Python version:", sys.version)
11
+ print("Python path:", sys.path)
12
+ raise
13
 
14
  # Initialize the OCR model
15
+ try:
16
+ model = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True)
17
+ except Exception as e:
18
+ print(f"Error initializing OCR model: {e}")
19
+ raise
20
 
21
  def ocr_process(file):
22
+ try:
23
+ # Read the uploaded file
24
+ if file.name.lower().endswith('.pdf'):
25
+ doc = DocumentFile.from_pdf(file.name)
26
+ else:
27
+ # Assume it's an image if not PDF
28
+ image_stream = io.BytesIO(file.read())
29
+ doc = DocumentFile.from_images(image_stream)
30
+
31
+ # Perform OCR
32
+ result = model(doc)
33
+
34
+ # Extract text from the result
35
+ extracted_text = ""
36
+ for page in result.pages:
37
+ for block in page.blocks:
38
+ for line in block.lines:
39
+ for word in line.words:
40
+ extracted_text += word.value + " "
41
+ extracted_text += "\n"
42
  extracted_text += "\n"
43
+
44
+ return extracted_text.strip()
45
+ except Exception as e:
46
+ return f"Error processing file: {str(e)}"
47
 
48
  # Create Gradio interface
49
  iface = gr.Interface(