Spaces:
Running
Running
import gradio as gr | |
import fitz | |
import os | |
import zipfile | |
from donut import DonutModel | |
def demo_process(input_pdf): | |
# Conversion of PDF to JPG images | |
pdf = fitz.open(input_pdf) | |
first_page = pdf[0] | |
pix = first_page.get_pixmap() | |
image_bytes = pix.tobytes("png") | |
pdf.close() | |
temp_dir = "images" | |
basename = os.path.basename(input_pdf).split('.')[0] | |
image_name = basename + "jpg" | |
os.makedirs(temp_dir, exist_ok=True) | |
with open(os.path.join(temp_dir, image_name), "wb") as f: | |
f.write(image_bytes) | |
image_path = os.path.join(temp_dir, image_name) | |
os.remove(image_path) | |
os.rmdir(temp_dir) | |
return image_path | |
# task_name = "SGSInvoice" | |
# task_prompt = f"<s_{task_name}>" | |
# model = DonutModel.from_pretrained("uartimcs/donut-invoice-extract") | |
# model.eval() | |
iface = gr.Interface( | |
fn=demo_process, | |
inputs=gr.File(label="Upload PDF File"), | |
outputs=gr.File(label="Download ZIP File"), | |
title="PDF to Images Converter", | |
description="Upload a PDF file and download a ZIP file containing all the pages as images. Host it on huggingface for convenience." | |
) | |
iface.launch() | |