Spaces:
Running
Running
File size: 892 Bytes
c7f3bef 40c1877 e544363 c7f3bef 2085731 b0be0f0 40c1877 385ed2f d21a60a 40c1877 2085731 7c15745 2085731 3e75990 40c1877 2085731 f3a60f3 2085731 b215e79 40c1877 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import gradio as gr
import fitz # PyMuPDF
from PIL import Image
from donut import DonutModel
def analysis_process(pdf_file):
global model, task_prompt, task_name
# Open the PDF file
document = fitz.open(pdf_file.name)
# Get the first page
page = document.load_page(0)
# Render the page to a pixmap
pix = page.get_pixmap()
# Save the pixmap as an image
image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
image_path = "result.jpg"
image.save(image_path)
output = model.inference(image=image, prompt=task_prompt)["predictions"][0]
return output
task_name = "SGSInvoice"
task_prompt = f"<s_{task_name}>"
model = DonutModel.from_pretrained("uartimcs/donut-invoice-extract")
model.eval()
interface = gr.Interface(
fn=analysis_process,
inputs=gr.File(label="Upload PDF"),
outputs="json"
)
interface.launch()
|