File size: 1,338 Bytes
adab60a
392d77e
adab60a
 
392d77e
adab60a
370754a
392d77e
 
 
 
 
 
 
 
 
 
 
 
 
adab60a
 
392d77e
 
 
59dae28
 
 
 
 
392d77e
 
 
adab60a
 
 
 
370754a
adab60a
370754a
 
adab60a
 
392d77e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import json
import requests
from docling.document_converter import DocumentConverter
import gradio as gr
import tempfile

def pdf_to_json(url):
    # Download the PDF file from the URL
    try:
        response = requests.get(url)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        return f"Error downloading PDF: {e}"

    # Save the PDF to a temporary file
    with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf:
        temp_pdf.write(response.content)
        temp_pdf_path = temp_pdf.name

    # Initialize the DocumentConverter
    converter = DocumentConverter()
    try:
        # Convert the downloaded PDF file
        result = converter.convert(temp_pdf_path)
        
        # Inspect the attributes of the document to understand its structure
        document_attributes = dir(result.document)
        
        # Return document attributes for debugging purposes
        return f"Document attributes: {document_attributes}"
        
    except Exception as e:
        return f"Error processing PDF: {e}"

# Gradio interface
iface = gr.Interface(
    fn=pdf_to_json,
    inputs=gr.Textbox(label="Enter PDF URL"),
    outputs="text",
    title="PDF to JSON Converter",
    description="Convert a PDF from a URL to JSON format."
)

iface.launch(share=True)