Genzo1010 commited on
Commit
9ecf60a
·
verified ·
1 Parent(s): ea17df0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -75
app.py CHANGED
@@ -1,94 +1,100 @@
1
- import gradio as gr
2
- import requests
3
- import os
4
- from datasets import load_dataset, Image
5
- from PIL import Image
6
- from paddleocr import PaddleOCR
7
- from doctr.io import DocumentFile
8
 
9
 
10
- # Set environment variable for PyTorch usage
11
- os.environ['USE_TF'] = '0' # Set TensorFlow to off
12
- os.environ['USE_TORCH'] = '1' # Set PyTorch to on
13
 
14
- from doctr.models import ocr_predictor
15
- ocr_model = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True)
16
 
17
 
18
 
19
- """
20
- Perform OCR with doctr
21
- """
22
- def ocr_with_doctr(file):
23
- text_output = ''
24
 
25
- # Load the document
26
- doc = DocumentFile.from_pdf(file)
27
 
28
- # Perform OCR
29
- result = ocr_model(doc)
30
 
31
- # Extract text from OCR result
32
- for page in result.pages:
33
- for block in page.blocks:
34
- for line in block.lines:
35
- text_output += " ".join([word.value for word in line.words]) + "\n"
36
 
37
- return text_output
38
-
39
- """
40
- Paddle OCR
41
- """
42
- def ocr_with_paddle(img):
43
- finaltext = ''
44
- ocr = PaddleOCR(lang='en', use_angle_cls=True, use_gpu=True)
45
- # img_path = 'exp.jpeg'
46
- result = ocr.ocr(img)
47
 
48
- for i in range(len(result[0])):
49
- text = result[0][i][1][0]
50
- finaltext += ' '+ text
51
- return finaltext
52
-
53
- def generate_ocr(Method, file):
54
- text_output = ''
55
- if isinstance(file, bytes): # Handle file uploaded as bytes
56
- file = io.BytesIO(file)
57
-
58
- if file.name.endswith('.pdf'):
59
- # Perform OCR on the PDF using doctr
60
- text_output = ocr_with_doctr(file)
61
-
62
- else:
63
- # Handle image file
64
- img_np = np.array(Image.open(file))
65
- text_output = generate_text_from_image(Method, img_np)
66
 
67
- return text_output
 
 
 
 
 
 
68
 
69
- def generate_text_from_image(Method, img):
70
- text_output = ''
71
- if Method == 'PaddleOCR':
72
- text_output = ocr_with_paddle(img)
73
- return text_output
74
 
 
75
 
76
- import gradio as gr
 
 
77
 
78
- image_or_pdf = gr.File(label="Upload an image or PDF")
79
- method = gr.Radio(["PaddleOCR"], value="PaddleOCR")
80
- output = gr.Textbox(label="Output")
 
 
 
 
 
 
 
81
 
82
- demo = gr.Interface(
83
- generate_ocr,
84
- [method, image_or_pdf],
85
- output,
86
- title="Optical Character Recognition",
87
- css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}",
88
- article="""<p style='text-align: center;'>Feel free to give us your thoughts on this demo and please contact us at
89
- <a href="mailto:[email protected]" target="_blank">[email protected]</a>
90
- <p style='text-align: center;'>Developed by: <a href="https://www.pragnakalp.com" target="_blank">Pragnakalp Techlabs</a></p>"""
91
- )
92
 
93
- demo.launch(share=True)
 
94
 
 
 
 
1
+ # import gradio as gr
2
+ # import requests
3
+ # import os
4
+ # from datasets import load_dataset, Image
5
+ # from PIL import Image
6
+ # from paddleocr import PaddleOCR
7
+ # from doctr.io import DocumentFile
8
 
9
 
10
+ # # Set environment variable for PyTorch usage
11
+ # os.environ['USE_TF'] = '0' # Set TensorFlow to off
12
+ # os.environ['USE_TORCH'] = '1' # Set PyTorch to on
13
 
14
+ # from doctr.models import ocr_predictor
15
+ # ocr_model = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True)
16
 
17
 
18
 
19
+ # """
20
+ # Perform OCR with doctr
21
+ # """
22
+ # def ocr_with_doctr(file):
23
+ # text_output = ''
24
 
25
+ # # Load the document
26
+ # doc = DocumentFile.from_pdf(file)
27
 
28
+ # # Perform OCR
29
+ # result = ocr_model(doc)
30
 
31
+ # # Extract text from OCR result
32
+ # for page in result.pages:
33
+ # for block in page.blocks:
34
+ # for line in block.lines:
35
+ # text_output += " ".join([word.value for word in line.words]) + "\n"
36
 
37
+ # return text_output
38
+
39
+ # """
40
+ # Paddle OCR
41
+ # """
42
+ # def ocr_with_paddle(img):
43
+ # finaltext = ''
44
+ # ocr = PaddleOCR(lang='en', use_angle_cls=True, use_gpu=True)
45
+ # # img_path = 'exp.jpeg'
46
+ # result = ocr.ocr(img)
47
 
48
+ # for i in range(len(result[0])):
49
+ # text = result[0][i][1][0]
50
+ # finaltext += ' '+ text
51
+ # return finaltext
52
+
53
+ # def generate_ocr(Method, file):
54
+ # text_output = ''
55
+ # if isinstance(file, bytes): # Handle file uploaded as bytes
56
+ # file = io.BytesIO(file)
57
+
58
+ # if file.name.endswith('.pdf'):
59
+ # # Perform OCR on the PDF using doctr
60
+ # text_output = ocr_with_doctr(file)
61
+
62
+ # else:
63
+ # # Handle image file
64
+ # img_np = np.array(Image.open(file))
65
+ # text_output = generate_text_from_image(Method, img_np)
66
 
67
+ # return text_output
68
+
69
+ # def generate_text_from_image(Method, img):
70
+ # text_output = ''
71
+ # if Method == 'PaddleOCR':
72
+ # text_output = ocr_with_paddle(img)
73
+ # return text_output
74
 
 
 
 
 
 
75
 
76
+ # import gradio as gr
77
 
78
+ # image_or_pdf = gr.File(label="Upload an image or PDF")
79
+ # method = gr.Radio(["PaddleOCR"], value="PaddleOCR")
80
+ # output = gr.Textbox(label="Output")
81
 
82
+ # demo = gr.Interface(
83
+ # generate_ocr,
84
+ # [method, image_or_pdf],
85
+ # output,
86
+ # title="Optical Character Recognition",
87
+ # css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}",
88
+ # article="""<p style='text-align: center;'>Feel free to give us your thoughts on this demo and please contact us at
89
+ # <a href="mailto:[email protected]" target="_blank">[email protected]</a>
90
+ # <p style='text-align: center;'>Developed by: <a href="https://www.pragnakalp.com" target="_blank">Pragnakalp Techlabs</a></p>"""
91
+ # )
92
 
93
+ # demo.launch(share=True)
94
+ import os
 
 
 
 
 
 
 
 
95
 
96
+ # Disable TensorFlow to ensure PyTorch is used
97
+ os.environ['USE_TF'] = '0'
98
 
99
+ import torch
100
+ print(torch.cuda.is_available()) # Should return True if GPU is available