import gradio as gr import fitz # PyMuPDF def pdf_to_markdown(pdf_file): """Extract text from a PDF and format it into markdown.""" if pdf_file is None: return "No file uploaded." # Open the uploaded PDF file using PyMuPDF doc = fitz.open(stream=pdf_file.read(), filetype="pdf") markdown_text = [] for i, page in enumerate(doc): text = page.get_text("text") markdown_text.append(f"=== Page {i + 1}\n\n{text.strip()}") return "\n\n".join(markdown_text) # Gradio Interface iface = gr.Interface( fn=pdf_to_markdown, inputs=gr.File(type="file"), outputs=gr.Textbox(label="Markdown Output", lines=15), title="PDF to Markdown Extractor", description="Upload a PDF and get a copyable markdown output.", ) if __name__ == "__main__": iface.launch()