import gradio as gr import tempfile import pytesseract import fitz # PyMuPDF, imported as fitz for backward compatibility reasons from PIL import Image def pdf_to_image(pdf_file, path, progress): # Convert the PDF to a PNG image using pdf2image doc = fitz.open(pdf_file.name) # open document fnames = [] for page in progress.tqdm(doc, desc="Converting PDF to image"): pix = page.get_pixmap() output = f"{path}/page.png" pix.save(output) fnames.append(output) return fnames def tesseract_ocr(image, progress=gr.Progress()): # Run OCR on the image using Tesseract with tempfile.TemporaryDirectory() as path: images = pdf_to_image(image, path, progress) text_res = [] for img in progress.tqdm(images, desc="Running OCR"): with open(img, 'rb') as f: img = Image.open(f) img.load() text = pytesseract.image_to_string(img) text_res.append(text) return text if __name__=="__main__": iface = gr.Interface( fn=tesseract_ocr, inputs=[gr.File(label="PDF file")], outputs=gr.Textbox(label="Text"), title="PDF to Text Converter", description="Converts a PDF file to text using Tesseract OCR.", ).queue(concurrency_count=10) iface.launch()