Spaces:
Runtime error
Runtime error
from pdf2image import convert_from_path | |
import base64 | |
from io import BytesIO | |
import pandas as pd | |
import PIL | |
import numpy as np | |
from PIL import Image | |
from PIL import ImageDraw | |
import gradio as gr | |
import torch | |
import easyocr | |
torch.hub.download_url_to_file('https://github.com/JaidedAI/EasyOCR/raw/master/examples/english.png', 'english.png') | |
torch.hub.download_url_to_file('https://github.com/JaidedAI/EasyOCR/raw/master/examples/japanese.jpg', 'japanese.jpg') | |
torch.hub.download_url_to_file('https://i.imgur.com/mwQFd7G.jpeg', 'Hindi.jpeg') | |
def draw_boxes(image, bounds, color='yellow', width=2): | |
draw = ImageDraw.Draw(image) | |
for bound in bounds: | |
p0, p1, p2, p3 = bound[0] | |
draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width) | |
return image | |
def inference(img, lang): | |
global img1,bounds | |
img1= img | |
reader = easyocr.Reader(lang) | |
bounds = reader.readtext(img) | |
try: | |
#bounds = reader.readtext(img) | |
im = PIL.Image.open(img) | |
except: | |
im = PIL.Image.fromarray(img) | |
draw_boxes(im, bounds) | |
cdf = pd.DataFrame([x[1:] for x in bounds],columns=['text','confidence']) | |
cdf['confidence'] = cdf['confidence'].apply(lambda x:"{0:.1%}".format(x)) | |
return [im, cdf] | |
def read_from_file(file_path,lang): | |
if '.pdf' in str(file_path): | |
print('pdf') | |
# Convert each page of the PDF into an image | |
pages = convert_from_path(file_path) | |
# Convert each page image to base64 and append to the list | |
data = [] | |
imgs = [] | |
n = 0 | |
for i, page in enumerate(pages): | |
if n>=5: | |
break | |
# Save the page image to a byte buffer | |
img,dfx = inference(np.array(page)[:, :, ::-1],lang) | |
imgs.append(img) | |
dfx['page'] = n | |
data.append(dfx) | |
n+=1 | |
data = pd.concat(data).reset_index(drop=True) | |
data | |
return [imgs,data] | |
else: | |
print('image') | |
img2 = np.array(PIL.Image.open(file_path)) | |
a,b = inference(img2,lang) | |
return [[a],b] | |
title = 'Gradio OCR Demo' | |
description = 'Gradio OCR demo supports 80+ languages. To use it, simply upload your image and choose a language from the dropdown menu, or click one of the examples to load them.' | |
examples = [['english.png',['en']],['japanese.jpg',['ja', 'en']],['Hindi.jpeg',['hi', 'en']]] | |
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}" | |
choices = ['abq', 'ady', 'af', 'ang', 'ar', 'as', 'ava', 'az', 'be', 'bg', | |
'bh', 'bho', 'bn', 'bs', 'ch_sim', 'ch_tra', 'che', 'cs', 'cy', | |
'da', 'dar', 'de', 'en', 'es', 'et', 'fa', 'fr', 'ga', 'gom', 'hi', | |
'hr', 'hu', 'id', 'inh', 'is', 'it', 'ja', 'kbd', 'kn', 'ko', 'ku', | |
'la', 'lbe', 'lez', 'lt', 'lv', 'mah', 'mai', 'mi', 'mn', 'mr', | |
'ms', 'mt', 'ne', 'new', 'nl', 'no', 'oc', 'pi', 'pl', 'pt', 'ro', | |
'ru', 'rs_cyrillic', 'rs_latin', 'sck', 'sk', 'sl', 'sq', 'sv', | |
'sw', 'ta', 'tab', 'te', 'th', 'tjk', 'tl', 'tr', 'ug', 'uk', 'ur', | |
'uz', 'vi'] | |
app = gr.Interface( | |
read_from_file, | |
inputs=[gr.File(type='filepath',label='input_file pdf or image'),#gr.Image(type='filepath', label='Input'), | |
gr.CheckboxGroup(choices, type="value", label='language')], | |
outputs=[gr.Gallery(columns=4), 'dataframe'], | |
title=title, | |
description=description, | |
examples=examples, | |
css=css | |
) | |
app.launch(debug=True,enable_queue=True) |