Spaces:
Build error
Build error
import gradio as gr | |
import fitz # PyMuPDF | |
def pdf_to_markdown(pdf_file): | |
"""Extract text from a PDF and format it into markdown.""" | |
if pdf_file is None: | |
return "No file uploaded." | |
# Open the uploaded PDF file using PyMuPDF | |
doc = fitz.open(stream=pdf_file.read(), filetype="pdf") | |
markdown_text = [] | |
for i, page in enumerate(doc): | |
text = page.get_text("text") | |
markdown_text.append(f"=== Page {i + 1}\n\n{text.strip()}") | |
return "\n\n".join(markdown_text) | |
# Gradio Interface | |
iface = gr.Interface( | |
fn=pdf_to_markdown, | |
inputs=gr.File(type="file"), | |
outputs=gr.Textbox(label="Markdown Output", lines=15), | |
title="PDF to Markdown Extractor", | |
description="Upload a PDF and get a copyable markdown output.", | |
) | |
if __name__ == "__main__": | |
iface.launch() | |