pitchdeckio / app.py
DoctorSlimm's picture
Update app.py
b756001 verified
raw
history blame contribute delete
823 Bytes
import gradio as gr
import fitz # PyMuPDF
def pdf_to_markdown(pdf_file):
"""Extract text from a PDF and format it into markdown."""
if pdf_file is None:
return "No file uploaded."
# Open the uploaded PDF file using PyMuPDF
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
markdown_text = []
for i, page in enumerate(doc):
text = page.get_text("text")
markdown_text.append(f"=== Page {i + 1}\n\n{text.strip()}")
return "\n\n".join(markdown_text)
# Gradio Interface
iface = gr.Interface(
fn=pdf_to_markdown,
inputs=gr.File(type="file"),
outputs=gr.Textbox(label="Markdown Output", lines=15),
title="PDF to Markdown Extractor",
description="Upload a PDF and get a copyable markdown output.",
)
if __name__ == "__main__":
iface.launch()