Genzo1010 commited on
Commit
2685d9c
·
verified ·
1 Parent(s): 0d2d871

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -80
app.py CHANGED
@@ -1,100 +1,100 @@
1
- # import gradio as gr
2
- # import requests
3
- # import os
4
- # from datasets import load_dataset, Image
5
- # from PIL import Image
6
- # from paddleocr import PaddleOCR
7
- # from doctr.io import DocumentFile
8
 
9
 
10
- # # Set environment variable for PyTorch usage
11
- # os.environ['USE_TF'] = '0' # Set TensorFlow to off
12
- # os.environ['USE_TORCH'] = '1' # Set PyTorch to on
13
 
14
- # from doctr.models import ocr_predictor
15
- # ocr_model = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True)
16
 
17
 
18
 
19
- # """
20
- # Perform OCR with doctr
21
- # """
22
- # def ocr_with_doctr(file):
23
- # text_output = ''
24
 
25
- # # Load the document
26
- # doc = DocumentFile.from_pdf(file)
27
 
28
- # # Perform OCR
29
- # result = ocr_model(doc)
30
 
31
- # # Extract text from OCR result
32
- # for page in result.pages:
33
- # for block in page.blocks:
34
- # for line in block.lines:
35
- # text_output += " ".join([word.value for word in line.words]) + "\n"
36
 
37
- # return text_output
38
-
39
- # """
40
- # Paddle OCR
41
- # """
42
- # def ocr_with_paddle(img):
43
- # finaltext = ''
44
- # ocr = PaddleOCR(lang='en', use_angle_cls=True, use_gpu=True)
45
- # # img_path = 'exp.jpeg'
46
- # result = ocr.ocr(img)
47
 
48
- # for i in range(len(result[0])):
49
- # text = result[0][i][1][0]
50
- # finaltext += ' '+ text
51
- # return finaltext
52
-
53
- # def generate_ocr(Method, file):
54
- # text_output = ''
55
- # if isinstance(file, bytes): # Handle file uploaded as bytes
56
- # file = io.BytesIO(file)
57
-
58
- # if file.name.endswith('.pdf'):
59
- # # Perform OCR on the PDF using doctr
60
- # text_output = ocr_with_doctr(file)
61
-
62
- # else:
63
- # # Handle image file
64
- # img_np = np.array(Image.open(file))
65
- # text_output = generate_text_from_image(Method, img_np)
66
 
67
- # return text_output
68
 
69
- # def generate_text_from_image(Method, img):
70
- # text_output = ''
71
- # if Method == 'PaddleOCR':
72
- # text_output = ocr_with_paddle(img)
73
- # return text_output
74
 
75
 
76
- # import gradio as gr
77
 
78
- # image_or_pdf = gr.File(label="Upload an image or PDF")
79
- # method = gr.Radio(["PaddleOCR"], value="PaddleOCR")
80
- # output = gr.Textbox(label="Output")
81
 
82
- # demo = gr.Interface(
83
- # generate_ocr,
84
- # [method, image_or_pdf],
85
- # output,
86
- # title="Optical Character Recognition",
87
- # css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}",
88
- # article="""<p style='text-align: center;'>Feel free to give us your thoughts on this demo and please contact us at
89
- # <a href="mailto:[email protected]" target="_blank">[email protected]</a>
90
- # <p style='text-align: center;'>Developed by: <a href="https://www.pragnakalp.com" target="_blank">Pragnakalp Techlabs</a></p>"""
91
- # )
92
 
93
- # demo.launch(share=True)
94
- import os
95
 
96
- # Disable TensorFlow to ensure PyTorch is used
97
- os.environ['USE_TF'] = '0'
98
 
99
- import torch
100
- print(torch.cuda.is_available()) # Should return True if GPU is available
 
1
+ import gradio as gr
2
+ import requests
3
+ import os
4
+ from datasets import load_dataset, Image
5
+ from PIL import Image
6
+ from paddleocr import PaddleOCR
7
+ from doctr.io import DocumentFile
8
 
9
 
10
+ # Set environment variable for PyTorch usage
11
+ os.environ['USE_TF'] = '0' # Set TensorFlow to off
12
+ os.environ['USE_TORCH'] = '1' # Set PyTorch to on
13
 
14
+ from doctr.models import ocr_predictor
15
+ ocr_model = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True)
16
 
17
 
18
 
19
+ """
20
+ Perform OCR with doctr
21
+ """
22
+ def ocr_with_doctr(file):
23
+ text_output = ''
24
 
25
+ # Load the document
26
+ doc = DocumentFile.from_pdf(file)
27
 
28
+ # Perform OCR
29
+ result = ocr_model(doc)
30
 
31
+ # Extract text from OCR result
32
+ for page in result.pages:
33
+ for block in page.blocks:
34
+ for line in block.lines:
35
+ text_output += " ".join([word.value for word in line.words]) + "\n"
36
 
37
+ return text_output
38
+
39
+ """
40
+ Paddle OCR
41
+ """
42
+ def ocr_with_paddle(img):
43
+ finaltext = ''
44
+ ocr = PaddleOCR(lang='en', use_angle_cls=True, use_gpu=True)
45
+ # img_path = 'exp.jpeg'
46
+ result = ocr.ocr(img)
47
 
48
+ for i in range(len(result[0])):
49
+ text = result[0][i][1][0]
50
+ finaltext += ' '+ text
51
+ return finaltext
52
+
53
+ def generate_ocr(Method, file):
54
+ text_output = ''
55
+ if isinstance(file, bytes): # Handle file uploaded as bytes
56
+ file = io.BytesIO(file)
57
+
58
+ if file.name.endswith('.pdf'):
59
+ # Perform OCR on the PDF using doctr
60
+ text_output = ocr_with_doctr(file)
61
+
62
+ else:
63
+ # Handle image file
64
+ img_np = np.array(Image.open(file))
65
+ text_output = generate_text_from_image(Method, img_np)
66
 
67
+ return text_output
68
 
69
+ def generate_text_from_image(Method, img):
70
+ text_output = ''
71
+ if Method == 'PaddleOCR':
72
+ text_output = ocr_with_paddle(img)
73
+ return text_output
74
 
75
 
76
+ import gradio as gr
77
 
78
+ image_or_pdf = gr.File(label="Upload an image or PDF")
79
+ method = gr.Radio(["PaddleOCR"], value="PaddleOCR")
80
+ output = gr.Textbox(label="Output")
81
 
82
+ demo = gr.Interface(
83
+ generate_ocr,
84
+ [method, image_or_pdf],
85
+ output,
86
+ title="Optical Character Recognition",
87
+ css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}",
88
+ article="""<p style='text-align: center;'>Feel free to give us your thoughts on this demo and please contact us at
89
+ <a href="mailto:[email protected]" target="_blank">[email protected]</a>
90
+ <p style='text-align: center;'>Developed by: <a href="https://www.pragnakalp.com" target="_blank">Pragnakalp Techlabs</a></p>"""
91
+ )
92
 
93
+ demo.launch(share=True)
94
+ # import os
95
 
96
+ # # Disable TensorFlow to ensure PyTorch is used
97
+ # os.environ['USE_TF'] = '0'
98
 
99
+ # import torch
100
+ # print(torch.cuda.is_available()) # Should return True if GPU is available