ocr-pdf-image / app.py
shadabsayd's picture
Create app.py
23bb0fc verified
from pdf2image import convert_from_path
import base64
from io import BytesIO
import pandas as pd
import PIL
import numpy as np
from PIL import Image
from PIL import ImageDraw
import gradio as gr
import torch
import easyocr
torch.hub.download_url_to_file('https://github.com/JaidedAI/EasyOCR/raw/master/examples/english.png', 'english.png')
torch.hub.download_url_to_file('https://github.com/JaidedAI/EasyOCR/raw/master/examples/japanese.jpg', 'japanese.jpg')
torch.hub.download_url_to_file('https://i.imgur.com/mwQFd7G.jpeg', 'Hindi.jpeg')
def draw_boxes(image, bounds, color='yellow', width=2):
draw = ImageDraw.Draw(image)
for bound in bounds:
p0, p1, p2, p3 = bound[0]
draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width)
return image
def inference(img, lang):
global img1,bounds
img1= img
reader = easyocr.Reader(lang)
bounds = reader.readtext(img)
try:
#bounds = reader.readtext(img)
im = PIL.Image.open(img)
except:
im = PIL.Image.fromarray(img)
draw_boxes(im, bounds)
cdf = pd.DataFrame([x[1:] for x in bounds],columns=['text','confidence'])
cdf['confidence'] = cdf['confidence'].apply(lambda x:"{0:.1%}".format(x))
return [im, cdf]
def read_from_file(file_path,lang):
if '.pdf' in str(file_path):
print('pdf')
# Convert each page of the PDF into an image
pages = convert_from_path(file_path)
# Convert each page image to base64 and append to the list
data = []
imgs = []
n = 0
for i, page in enumerate(pages):
if n>=5:
break
# Save the page image to a byte buffer
img,dfx = inference(np.array(page)[:, :, ::-1],lang)
imgs.append(img)
dfx['page'] = n
data.append(dfx)
n+=1
data = pd.concat(data).reset_index(drop=True)
data
return [imgs,data]
else:
print('image')
img2 = np.array(PIL.Image.open(file_path))
a,b = inference(img2,lang)
return [[a],b]
title = 'Gradio OCR Demo'
description = 'Gradio OCR demo supports 80+ languages. To use it, simply upload your image and choose a language from the dropdown menu, or click one of the examples to load them.'
examples = [['english.png',['en']],['japanese.jpg',['ja', 'en']],['Hindi.jpeg',['hi', 'en']]]
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
choices = ['abq', 'ady', 'af', 'ang', 'ar', 'as', 'ava', 'az', 'be', 'bg',
'bh', 'bho', 'bn', 'bs', 'ch_sim', 'ch_tra', 'che', 'cs', 'cy',
'da', 'dar', 'de', 'en', 'es', 'et', 'fa', 'fr', 'ga', 'gom', 'hi',
'hr', 'hu', 'id', 'inh', 'is', 'it', 'ja', 'kbd', 'kn', 'ko', 'ku',
'la', 'lbe', 'lez', 'lt', 'lv', 'mah', 'mai', 'mi', 'mn', 'mr',
'ms', 'mt', 'ne', 'new', 'nl', 'no', 'oc', 'pi', 'pl', 'pt', 'ro',
'ru', 'rs_cyrillic', 'rs_latin', 'sck', 'sk', 'sl', 'sq', 'sv',
'sw', 'ta', 'tab', 'te', 'th', 'tjk', 'tl', 'tr', 'ug', 'uk', 'ur',
'uz', 'vi']
app = gr.Interface(
read_from_file,
inputs=[gr.File(type='filepath',label='input_file pdf or image'),#gr.Image(type='filepath', label='Input'),
gr.CheckboxGroup(choices, type="value", label='language')],
outputs=[gr.Gallery(columns=4), 'dataframe'],
title=title,
description=description,
examples=examples,
css=css
)
app.launch(debug=True,enable_queue=True)