Spaces:

shadabsayd
/

ocr-pdf-image

Runtime error

File size: 3,439 Bytes

23bb0fc

from pdf2image import convert_from_path
import base64
from io import BytesIO
import pandas as pd
import PIL
import numpy as np
from PIL import Image
from PIL import ImageDraw
import gradio as gr
import torch
import easyocr

torch.hub.download_url_to_file('https://github.com/JaidedAI/EasyOCR/raw/master/examples/english.png', 'english.png')
torch.hub.download_url_to_file('https://github.com/JaidedAI/EasyOCR/raw/master/examples/japanese.jpg', 'japanese.jpg')
torch.hub.download_url_to_file('https://i.imgur.com/mwQFd7G.jpeg', 'Hindi.jpeg')

def draw_boxes(image, bounds, color='yellow', width=2):
    draw = ImageDraw.Draw(image)
    for bound in bounds:
        p0, p1, p2, p3 = bound[0]
        draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width)
    return image


def inference(img, lang):
    global img1,bounds
    img1= img  
    reader = easyocr.Reader(lang)
    bounds = reader.readtext(img)
    try:
      #bounds = reader.readtext(img)
      im = PIL.Image.open(img)
    except:
      im = PIL.Image.fromarray(img)
    draw_boxes(im, bounds)
    cdf = pd.DataFrame([x[1:] for x in bounds],columns=['text','confidence'])
    cdf['confidence'] = cdf['confidence'].apply(lambda x:"{0:.1%}".format(x))
    return [im, cdf]

def read_from_file(file_path,lang):
    if '.pdf' in str(file_path):
      print('pdf')
      # Convert each page of the PDF into an image
      pages = convert_from_path(file_path)

      # Convert each page image to base64 and append to the list
      data = []
      imgs = []
      n = 0
      for i, page in enumerate(pages):
          if n>=5:
            break
          # Save the page image to a byte buffer
          img,dfx = inference(np.array(page)[:, :, ::-1],lang)
          imgs.append(img)
          dfx['page'] = n
          data.append(dfx)
          n+=1
      data = pd.concat(data).reset_index(drop=True)
      data
      return [imgs,data]
    else:
      print('image')
      img2 = np.array(PIL.Image.open(file_path))
      a,b = inference(img2,lang)
      return [[a],b] 


title = 'Gradio OCR Demo'
description = 'Gradio OCR demo supports 80+ languages. To use it, simply upload your image and choose a language from the dropdown menu, or click one of the examples to load them.'
examples = [['english.png',['en']],['japanese.jpg',['ja', 'en']],['Hindi.jpeg',['hi', 'en']]]
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
choices = ['abq', 'ady', 'af', 'ang', 'ar', 'as', 'ava', 'az', 'be', 'bg',
       'bh', 'bho', 'bn', 'bs', 'ch_sim', 'ch_tra', 'che', 'cs', 'cy',
       'da', 'dar', 'de', 'en', 'es', 'et', 'fa', 'fr', 'ga', 'gom', 'hi',
       'hr', 'hu', 'id', 'inh', 'is', 'it', 'ja', 'kbd', 'kn', 'ko', 'ku',
       'la', 'lbe', 'lez', 'lt', 'lv', 'mah', 'mai', 'mi', 'mn', 'mr',
       'ms', 'mt', 'ne', 'new', 'nl', 'no', 'oc', 'pi', 'pl', 'pt', 'ro',
       'ru', 'rs_cyrillic', 'rs_latin', 'sck', 'sk', 'sl', 'sq', 'sv',
       'sw', 'ta', 'tab', 'te', 'th', 'tjk', 'tl', 'tr', 'ug', 'uk', 'ur',
       'uz', 'vi']
app = gr.Interface(
    read_from_file,
    inputs=[gr.File(type='filepath',label='input_file pdf or image'),#gr.Image(type='filepath', label='Input'),
            gr.CheckboxGroup(choices, type="value", label='language')],
    outputs=[gr.Gallery(columns=4), 'dataframe'],
    title=title,
    description=description,
    examples=examples,
    css=css
    )

app.launch(debug=True,enable_queue=True)