uartimcs commited on
Commit
40c1877
1 Parent(s): de759fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -43
app.py CHANGED
@@ -1,48 +1,24 @@
1
  import gradio as gr
2
- import fitz
3
- import os
4
- import zipfile
5
- from donut import DonutModel
6
 
7
- def demo_process(input_pdf):
8
-
9
- # Conversion of PDF to JPG images
10
- pdf = fitz.open(input_pdf)
11
- first_page = pdf[0]
12
- pix = first_page.get_pixmap()
13
- image_bytes = pix.tobytes("png")
14
- pdf.close()
15
-
16
- temp_dir = "images"
17
- basename = os.path.basename(input_pdf).split('.')[0]
18
- image_name = basename + ".jpg"
19
- os.makedirs(temp_dir, exist_ok=True)
20
-
21
- with open(os.path.join(temp_dir, image_name), "wb") as f:
22
- f.write(image_bytes)
23
-
24
- # zip_path = "images.zip"
25
- image_path = os.path.join(temp_dir, image_name)
26
- # output = model.inference(image=image_name, prompt=task_prompt)["predictions"][0]
27
- # with zipfile.ZipFile(zip_path, "w") as zipf:
28
- # zipf.write(os.path.join(temp_dir, image_name), image_name)
29
- # os.remove(image_path)
30
- # os.rmdir(temp_dir)
31
- image_path = Image.fromarray(image_path)
32
-
33
  return image_path
34
-
35
 
36
- task_name = "SGSInvoice"
37
- task_prompt = f"<s_{task_name}>"
38
- model = DonutModel.from_pretrained("uartimcs/donut-invoice-extract")
39
- model.eval()
40
-
41
- iface = gr.Interface(
42
- fn=demo_process,
43
- inputs=gr.File(label="Upload PDF File"),
44
- outputs="image",
45
- title="PDF to Images Converter",
46
- description="Upload a PDF file and download a ZIP file containing all the pages as images. Host it on huggingface for convenience."
47
  )
48
- iface.launch()
 
 
1
  import gradio as gr
2
+ import fitz # PyMuPDF
3
+ from PIL import Image
 
 
4
 
5
+ def pdf_to_jpg(pdf_file):
6
+ # Open the PDF file
7
+ document = fitz.open(pdf_file.name)
8
+ # Get the first page
9
+ page = document.load_page(0)
10
+ # Render the page to a pixmap
11
+ pix = page.get_pixmap()
12
+ # Save the pixmap as an image
13
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
14
+ image_path = "first_page.jpg"
15
+ image.save(image_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  return image_path
 
17
 
18
+ interface = gr.Interface(
19
+ fn=pdf_to_jpg,
20
+ inputs=gr.inputs.File(label="Upload PDF"),
21
+ outputs=gr.outputs.Image(label="First Page JPG")
 
 
 
 
 
 
 
22
  )
23
+
24
+ interface.launch()