|
import json |
|
from docling.document_converter import DocumentConverter |
|
import gradio as gr |
|
|
|
def pdf_to_json(url): |
|
converter = DocumentConverter() |
|
result = converter.convert(url) |
|
|
|
try: |
|
|
|
json_output = result.document.export_to_json() |
|
except AttributeError: |
|
|
|
content = { |
|
"title": result.document.title if hasattr(result.document, 'title') else "Untitled", |
|
"sections": [section.text for section in result.document.sections] |
|
} |
|
json_output = json.dumps(content, indent=2) |
|
|
|
return json_output |
|
|
|
|
|
iface = gr.Interface( |
|
fn=pdf_to_json, |
|
inputs=gr.Textbox(label="Enter PDF URL"), |
|
outputs="text", |
|
title="PDF to JSON Converter", |
|
description="Convert a PDF from a URL to JSON format." |
|
) |
|
|
|
iface.launch() |
|
|