Spaces:
Runtime error
Runtime error
shadabsayd
commited on
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pdf2image import convert_from_path
|
2 |
+
import base64
|
3 |
+
from io import BytesIO
|
4 |
+
import pandas as pd
|
5 |
+
import PIL
|
6 |
+
import numpy as np
|
7 |
+
from PIL import Image
|
8 |
+
from PIL import ImageDraw
|
9 |
+
import gradio as gr
|
10 |
+
import torch
|
11 |
+
import easyocr
|
12 |
+
|
13 |
+
torch.hub.download_url_to_file('https://github.com/JaidedAI/EasyOCR/raw/master/examples/english.png', 'english.png')
|
14 |
+
torch.hub.download_url_to_file('https://github.com/JaidedAI/EasyOCR/raw/master/examples/japanese.jpg', 'japanese.jpg')
|
15 |
+
torch.hub.download_url_to_file('https://i.imgur.com/mwQFd7G.jpeg', 'Hindi.jpeg')
|
16 |
+
|
17 |
+
def draw_boxes(image, bounds, color='yellow', width=2):
|
18 |
+
draw = ImageDraw.Draw(image)
|
19 |
+
for bound in bounds:
|
20 |
+
p0, p1, p2, p3 = bound[0]
|
21 |
+
draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width)
|
22 |
+
return image
|
23 |
+
|
24 |
+
|
25 |
+
def inference(img, lang):
|
26 |
+
global img1,bounds
|
27 |
+
img1= img
|
28 |
+
reader = easyocr.Reader(lang)
|
29 |
+
bounds = reader.readtext(img)
|
30 |
+
try:
|
31 |
+
#bounds = reader.readtext(img)
|
32 |
+
im = PIL.Image.open(img)
|
33 |
+
except:
|
34 |
+
im = PIL.Image.fromarray(img)
|
35 |
+
draw_boxes(im, bounds)
|
36 |
+
cdf = pd.DataFrame([x[1:] for x in bounds],columns=['text','confidence'])
|
37 |
+
cdf['confidence'] = cdf['confidence'].apply(lambda x:"{0:.1%}".format(x))
|
38 |
+
return [im, cdf]
|
39 |
+
|
40 |
+
def read_from_file(file_path,lang):
|
41 |
+
if '.pdf' in str(file_path):
|
42 |
+
print('pdf')
|
43 |
+
# Convert each page of the PDF into an image
|
44 |
+
pages = convert_from_path(file_path)
|
45 |
+
|
46 |
+
# Convert each page image to base64 and append to the list
|
47 |
+
data = []
|
48 |
+
imgs = []
|
49 |
+
n = 0
|
50 |
+
for i, page in enumerate(pages):
|
51 |
+
if n>=5:
|
52 |
+
break
|
53 |
+
# Save the page image to a byte buffer
|
54 |
+
img,dfx = inference(np.array(page)[:, :, ::-1],lang)
|
55 |
+
imgs.append(img)
|
56 |
+
dfx['page'] = n
|
57 |
+
data.append(dfx)
|
58 |
+
n+=1
|
59 |
+
data = pd.concat(data).reset_index(drop=True)
|
60 |
+
data
|
61 |
+
return [imgs,data]
|
62 |
+
else:
|
63 |
+
print('image')
|
64 |
+
img2 = np.array(PIL.Image.open(file_path))
|
65 |
+
a,b = inference(img2,lang)
|
66 |
+
return [[a],b]
|
67 |
+
|
68 |
+
|
69 |
+
title = 'Gradio OCR Demo'
|
70 |
+
description = 'Gradio OCR demo supports 80+ languages. To use it, simply upload your image and choose a language from the dropdown menu, or click one of the examples to load them.'
|
71 |
+
examples = [['english.png',['en']],['japanese.jpg',['ja', 'en']],['Hindi.jpeg',['hi', 'en']]]
|
72 |
+
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
|
73 |
+
choices = ['abq', 'ady', 'af', 'ang', 'ar', 'as', 'ava', 'az', 'be', 'bg',
|
74 |
+
'bh', 'bho', 'bn', 'bs', 'ch_sim', 'ch_tra', 'che', 'cs', 'cy',
|
75 |
+
'da', 'dar', 'de', 'en', 'es', 'et', 'fa', 'fr', 'ga', 'gom', 'hi',
|
76 |
+
'hr', 'hu', 'id', 'inh', 'is', 'it', 'ja', 'kbd', 'kn', 'ko', 'ku',
|
77 |
+
'la', 'lbe', 'lez', 'lt', 'lv', 'mah', 'mai', 'mi', 'mn', 'mr',
|
78 |
+
'ms', 'mt', 'ne', 'new', 'nl', 'no', 'oc', 'pi', 'pl', 'pt', 'ro',
|
79 |
+
'ru', 'rs_cyrillic', 'rs_latin', 'sck', 'sk', 'sl', 'sq', 'sv',
|
80 |
+
'sw', 'ta', 'tab', 'te', 'th', 'tjk', 'tl', 'tr', 'ug', 'uk', 'ur',
|
81 |
+
'uz', 'vi']
|
82 |
+
app = gr.Interface(
|
83 |
+
read_from_file,
|
84 |
+
inputs=[gr.File(type='filepath',label='input_file pdf or image'),#gr.Image(type='filepath', label='Input'),
|
85 |
+
gr.CheckboxGroup(choices, type="value", label='language')],
|
86 |
+
outputs=[gr.Gallery(columns=4), 'dataframe'],
|
87 |
+
title=title,
|
88 |
+
description=description,
|
89 |
+
examples=examples,
|
90 |
+
css=css
|
91 |
+
)
|
92 |
+
|
93 |
+
app.launch(debug=True,enable_queue=True)
|