uartimcs's picture
Update app.py
2085731 verified
raw
history blame
830 Bytes
import gradio as gr
import fitz # PyMuPDF
from PIL import Image
def analysis_process(pdf_file):
# Open the PDF file
document = fitz.open(pdf_file.name)
# Get the first page
page = document.load_page(0)
# Render the page to a pixmap
pix = page.get_pixmap()
# Save the pixmap as an image
image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
image_path = "first_page.jpg"
image.save(image_path)
output = model.inference(image=image_path, prompt=task_prompt)["predictions"][0]
return output
task_name = "SGSInvoice"
task_prompt = f"<s_{task_name}>"
model = DonutModel.from_pretrained("uartimcs/donut-invoice-extract")
model.eval()
interface = gr.Interface(
fn=analysis_process,
inputs=gr.File(label="Upload PDF"),
outputs="json"
)
interface.launch()