pdftojson / app.py
dindizz's picture
Create app.py
adab60a verified
raw
history blame
746 Bytes
import json
from docling.document_converter import DocumentConverter
import gradio as gr
def pdf_to_json(source):
converter = DocumentConverter()
result = converter.convert(source)
try:
json_output = result.document.export_to_json()
except AttributeError:
content = {
"title": result.document.title if hasattr(result.document, 'title') else "Untitled",
"sections": [section.text for section in result.document.sections]
}
json_output = json.dumps(content, indent=2)
return json_output
# Gradio interface
iface = gr.Interface(
fn=pdf_to_json,
inputs=gr.Textbox(label="PDF URL"),
outputs="text",
title="PDF to JSON Converter"
)
iface.launch()