DoctorSlimm commited on
Commit
07e0b4e
·
verified ·
1 Parent(s): 34d3736

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -0
app.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import fitz # PyMuPDF
3
+
4
+ def pdf_to_markdown(pdf_file):
5
+ """Extract text from a PDF and format it into markdown."""
6
+ if pdf_file is None:
7
+ return "No file uploaded."
8
+
9
+ doc = fitz.open(pdf_file.name)
10
+ markdown_text = []
11
+
12
+ for i, page in enumerate(doc):
13
+ text = page.get_text("text")
14
+ markdown_text.append(f"=== Page {i + 1}\n\n{text.strip()}")
15
+
16
+ return "\n\n".join(markdown_text)
17
+
18
+ # Gradio Interface
19
+ iface = gr.Interface(
20
+ fn=pdf_to_markdown,
21
+ inputs=gr.File(type="file"),
22
+ outputs=gr.Textbox(label="Markdown Output", lines=15),
23
+ title="PDF to Markdown Extractor",
24
+ description="Upload a PDF and get a copyable markdown output.",
25
+ )
26
+
27
+ if __name__ == "__main__":
28
+ iface.launch()