uartimcs commited on
Commit
6041fea
1 Parent(s): 2f1e2b1

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -0
app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import fitz
3
+ import os
4
+ import zipfile
5
+
6
+ def process(input_pdf):
7
+ # Conversion of PDF to JPG images
8
+ pdf = fitz.open(input_pdf)
9
+ first_page = pdf[0]
10
+ pix = first_page.get_pixmap()
11
+ image_bytes = pix.tobytes("jpg")
12
+ pdf.close()
13
+
14
+ temp_dir = "images"
15
+ basename = os.path.basename(input_pdf).split('.')[0]
16
+ image_name = basename + "jpg"
17
+ os.makedirs(temp_dir, exist_ok=True)
18
+ with open(os.path.join(temp_dir, image_name), "wb") as f:
19
+ f.write(image_bytes)
20
+
21
+ image_path = os.path.join(temp_dir, image_name)
22
+ output = model.inference(image=image_path, prompt=task_prompt)["predictions"][0]
23
+
24
+ return output
25
+
26
+ task_name = "SGSInvoice"
27
+ task_prompt = f"<s_{task_name}>"
28
+ model = DonutModel.from_pretrained("uartimcs/donut-invoice-extract")
29
+ model.eval()
30
+ demo = gr.Interface(fn=process,inputs=gr.File(file_types=['.pdf']),outputs="json", title=f"Donut 🍩 demonstration for `{task_name}` task",)
31
+ demo.launch()
32
+
33
+