File size: 1,338 Bytes
adab60a 392d77e adab60a 392d77e adab60a 370754a 392d77e adab60a 392d77e 59dae28 392d77e adab60a 370754a adab60a 370754a adab60a 392d77e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import json
import requests
from docling.document_converter import DocumentConverter
import gradio as gr
import tempfile
def pdf_to_json(url):
# Download the PDF file from the URL
try:
response = requests.get(url)
response.raise_for_status()
except requests.exceptions.RequestException as e:
return f"Error downloading PDF: {e}"
# Save the PDF to a temporary file
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf:
temp_pdf.write(response.content)
temp_pdf_path = temp_pdf.name
# Initialize the DocumentConverter
converter = DocumentConverter()
try:
# Convert the downloaded PDF file
result = converter.convert(temp_pdf_path)
# Inspect the attributes of the document to understand its structure
document_attributes = dir(result.document)
# Return document attributes for debugging purposes
return f"Document attributes: {document_attributes}"
except Exception as e:
return f"Error processing PDF: {e}"
# Gradio interface
iface = gr.Interface(
fn=pdf_to_json,
inputs=gr.Textbox(label="Enter PDF URL"),
outputs="text",
title="PDF to JSON Converter",
description="Convert a PDF from a URL to JSON format."
)
iface.launch(share=True)
|