Spaces:
Running
Running
import gradio as gr | |
import fitz | |
import os | |
import zipfile | |
def process(input_pdf): | |
# Conversion of PDF to JPG images | |
pdf = fitz.open(input_pdf) | |
first_page = pdf[0] | |
pix = first_page.get_pixmap() | |
image_bytes = pix.tobytes("jpg") | |
pdf.close() | |
temp_dir = "images" | |
basename = os.path.basename(input_pdf).split('.')[0] | |
image_name = basename + "jpg" | |
os.makedirs(temp_dir, exist_ok=True) | |
with open(os.path.join(temp_dir, image_name), "wb") as f: | |
f.write(image_bytes) | |
image_path = os.path.join(temp_dir, image_name) | |
output = model.inference(image=image_path, prompt=task_prompt)["predictions"][0] | |
return output | |
task_name = "SGSInvoice" | |
task_prompt = f"<s_{task_name}>" | |
model = DonutModel.from_pretrained("uartimcs/donut-invoice-extract") | |
model.eval() | |
demo = gr.Interface(fn=process,inputs=gr.File(file_types=['.pdf']),outputs="json", title=f"Donut 🍩 demonstration for `{task_name}` task",) | |
demo.launch() | |