File size: 900 Bytes
adab60a
 
 
 
370754a
adab60a
370754a
adab60a
 
370754a
adab60a
 
370754a
adab60a
 
 
 
 
 
 
 
 
 
 
370754a
adab60a
370754a
 
adab60a
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import json
from docling.document_converter import DocumentConverter
import gradio as gr

def pdf_to_json(url):
    converter = DocumentConverter()
    result = converter.convert(url)
    
    try:
        # Attempt direct JSON export
        json_output = result.document.export_to_json()
    except AttributeError:
        # Construct JSON if direct export is unavailable
        content = {
            "title": result.document.title if hasattr(result.document, 'title') else "Untitled",
            "sections": [section.text for section in result.document.sections]
        }
        json_output = json.dumps(content, indent=2)
    
    return json_output

# Gradio interface
iface = gr.Interface(
    fn=pdf_to_json,
    inputs=gr.Textbox(label="Enter PDF URL"),
    outputs="text",
    title="PDF to JSON Converter",
    description="Convert a PDF from a URL to JSON format."
)

iface.launch()