Spaces:
Runtime error
Runtime error
import gradio as gr | |
import pytesseract | |
from pdf2image import convert_from_path | |
import tempfile | |
import os | |
def ocr_pdf(pdf_file): | |
with tempfile.TemporaryDirectory() as path: | |
pdf_path = os.path.join(path, "temp.pdf") | |
with open(pdf_path, 'wb') as f: | |
f.write(pdf_file.read()) | |
images = convert_from_path(pdf_path) | |
text = "" | |
for image in images: | |
text += pytesseract.image_to_string(image) | |
return text | |
iface = gr.Interface( | |
fn=ocr_pdf, | |
inputs=gr.File(label="Upload PDF", type="binary"), | |
outputs=gr.Textbox(label="Extracted Text"), | |
title="PDF OCR with PyTesseract", | |
description="Upload a PDF file to extract its text using PyTesseract." | |
) | |
if __name__ == "__main__": | |
iface.launch() |