|
import json |
|
from docling.document_converter import DocumentConverter |
|
import gradio as gr |
|
|
|
def pdf_to_json(source): |
|
converter = DocumentConverter() |
|
result = converter.convert(source) |
|
|
|
try: |
|
json_output = result.document.export_to_json() |
|
except AttributeError: |
|
content = { |
|
"title": result.document.title if hasattr(result.document, 'title') else "Untitled", |
|
"sections": [section.text for section in result.document.sections] |
|
} |
|
json_output = json.dumps(content, indent=2) |
|
|
|
return json_output |
|
|
|
|
|
iface = gr.Interface( |
|
fn=pdf_to_json, |
|
inputs=gr.Textbox(label="PDF URL"), |
|
outputs="text", |
|
title="PDF to JSON Converter" |
|
) |
|
|
|
iface.launch() |
|
|