pdftojson / app.py
dindizz's picture
Update app.py
370754a verified
raw
history blame
900 Bytes
import json
from docling.document_converter import DocumentConverter
import gradio as gr
def pdf_to_json(url):
converter = DocumentConverter()
result = converter.convert(url)
try:
# Attempt direct JSON export
json_output = result.document.export_to_json()
except AttributeError:
# Construct JSON if direct export is unavailable
content = {
"title": result.document.title if hasattr(result.document, 'title') else "Untitled",
"sections": [section.text for section in result.document.sections]
}
json_output = json.dumps(content, indent=2)
return json_output
# Gradio interface
iface = gr.Interface(
fn=pdf_to_json,
inputs=gr.Textbox(label="Enter PDF URL"),
outputs="text",
title="PDF to JSON Converter",
description="Convert a PDF from a URL to JSON format."
)
iface.launch()