uartimcs's picture
Update app.py
d21a60a verified
raw
history blame contribute delete
892 Bytes
import gradio as gr
import fitz # PyMuPDF
from PIL import Image
from donut import DonutModel
def analysis_process(pdf_file):
global model, task_prompt, task_name
# Open the PDF file
document = fitz.open(pdf_file.name)
# Get the first page
page = document.load_page(0)
# Render the page to a pixmap
pix = page.get_pixmap()
# Save the pixmap as an image
image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
image_path = "result.jpg"
image.save(image_path)
output = model.inference(image=image, prompt=task_prompt)["predictions"][0]
return output
task_name = "SGSInvoice"
task_prompt = f"<s_{task_name}>"
model = DonutModel.from_pretrained("uartimcs/donut-invoice-extract")
model.eval()
interface = gr.Interface(
fn=analysis_process,
inputs=gr.File(label="Upload PDF"),
outputs="json"
)
interface.launch()