import gradio as gr import pytesseract from pdf2image import convert_from_path import tempfile import os def ocr_pdf(pdf_file): with tempfile.TemporaryDirectory() as path: pdf_path = os.path.join(path, "temp.pdf") with open(pdf_path, 'wb') as f: f.write(pdf_file.read()) images = convert_from_path(pdf_path) text = "" for image in images: text += pytesseract.image_to_string(image) return text iface = gr.Interface( fn=ocr_pdf, inputs=gr.File(label="Upload PDF", type="binary"), outputs=gr.Textbox(label="Extracted Text"), title="PDF OCR with PyTesseract", description="Upload a PDF file to extract its text using PyTesseract." ) if __name__ == "__main__": iface.launch()