uartimcs commited on
Commit
c7f3bef
1 Parent(s): 6041fea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -33
app.py CHANGED
@@ -1,33 +1,34 @@
1
- import gradio as gr
2
- import fitz
3
- import os
4
- import zipfile
5
-
6
- def process(input_pdf):
7
- # Conversion of PDF to JPG images
8
- pdf = fitz.open(input_pdf)
9
- first_page = pdf[0]
10
- pix = first_page.get_pixmap()
11
- image_bytes = pix.tobytes("jpg")
12
- pdf.close()
13
-
14
- temp_dir = "images"
15
- basename = os.path.basename(input_pdf).split('.')[0]
16
- image_name = basename + "jpg"
17
- os.makedirs(temp_dir, exist_ok=True)
18
- with open(os.path.join(temp_dir, image_name), "wb") as f:
19
- f.write(image_bytes)
20
-
21
- image_path = os.path.join(temp_dir, image_name)
22
- output = model.inference(image=image_path, prompt=task_prompt)["predictions"][0]
23
-
24
- return output
25
-
26
- task_name = "SGSInvoice"
27
- task_prompt = f"<s_{task_name}>"
28
- model = DonutModel.from_pretrained("uartimcs/donut-invoice-extract")
29
- model.eval()
30
- demo = gr.Interface(fn=process,inputs=gr.File(file_types=['.pdf']),outputs="json", title=f"Donut 🍩 demonstration for `{task_name}` task",)
31
- demo.launch()
32
-
33
-
 
 
1
+ import gradio as gr
2
+ import fitz
3
+ import os
4
+ import zipfile
5
+ from donut import DonutModel
6
+
7
+ def process(input_pdf):
8
+ # Conversion of PDF to JPG images
9
+ pdf = fitz.open(input_pdf)
10
+ first_page = pdf[0]
11
+ pix = first_page.get_pixmap()
12
+ image_bytes = pix.tobytes("jpg")
13
+ pdf.close()
14
+
15
+ temp_dir = "images"
16
+ basename = os.path.basename(input_pdf).split('.')[0]
17
+ image_name = basename + "jpg"
18
+ os.makedirs(temp_dir, exist_ok=True)
19
+ with open(os.path.join(temp_dir, image_name), "wb") as f:
20
+ f.write(image_bytes)
21
+
22
+ image_path = os.path.join(temp_dir, image_name)
23
+ output = model.inference(image=image_path, prompt=task_prompt)["predictions"][0]
24
+
25
+ return output
26
+
27
+ task_name = "SGSInvoice"
28
+ task_prompt = f"<s_{task_name}>"
29
+ model = DonutModel.from_pretrained("uartimcs/donut-invoice-extract")
30
+ model.eval()
31
+ demo = gr.Interface(fn=process,inputs=gr.File(file_types=['.pdf']),outputs="json", title=f"Donut 🍩 demonstration for `{task_name}` task",)
32
+ demo.launch()
33
+
34
+