Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,33 +1,34 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import fitz
|
3 |
-
import os
|
4 |
-
import zipfile
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
model.
|
30 |
-
|
31 |
-
demo.
|
32 |
-
|
33 |
-
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import fitz
|
3 |
+
import os
|
4 |
+
import zipfile
|
5 |
+
from donut import DonutModel
|
6 |
+
|
7 |
+
def process(input_pdf):
|
8 |
+
# Conversion of PDF to JPG images
|
9 |
+
pdf = fitz.open(input_pdf)
|
10 |
+
first_page = pdf[0]
|
11 |
+
pix = first_page.get_pixmap()
|
12 |
+
image_bytes = pix.tobytes("jpg")
|
13 |
+
pdf.close()
|
14 |
+
|
15 |
+
temp_dir = "images"
|
16 |
+
basename = os.path.basename(input_pdf).split('.')[0]
|
17 |
+
image_name = basename + "jpg"
|
18 |
+
os.makedirs(temp_dir, exist_ok=True)
|
19 |
+
with open(os.path.join(temp_dir, image_name), "wb") as f:
|
20 |
+
f.write(image_bytes)
|
21 |
+
|
22 |
+
image_path = os.path.join(temp_dir, image_name)
|
23 |
+
output = model.inference(image=image_path, prompt=task_prompt)["predictions"][0]
|
24 |
+
|
25 |
+
return output
|
26 |
+
|
27 |
+
task_name = "SGSInvoice"
|
28 |
+
task_prompt = f"<s_{task_name}>"
|
29 |
+
model = DonutModel.from_pretrained("uartimcs/donut-invoice-extract")
|
30 |
+
model.eval()
|
31 |
+
demo = gr.Interface(fn=process,inputs=gr.File(file_types=['.pdf']),outputs="json", title=f"Donut 🍩 demonstration for `{task_name}` task",)
|
32 |
+
demo.launch()
|
33 |
+
|
34 |
+
|