import json import requests from docling.document_converter import DocumentConverter import gradio as gr import tempfile def pdf_to_json(url): # Download the PDF file from the URL try: response = requests.get(url) response.raise_for_status() except requests.exceptions.RequestException as e: return f"Error downloading PDF: {e}" # Save the PDF to a temporary file with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf: temp_pdf.write(response.content) temp_pdf_path = temp_pdf.name # Initialize the DocumentConverter converter = DocumentConverter() try: # Convert the downloaded PDF file result = converter.convert(temp_pdf_path) # Inspect the attributes of the document to understand its structure document_attributes = dir(result.document) # Return document attributes for debugging purposes return f"Document attributes: {document_attributes}" except Exception as e: return f"Error processing PDF: {e}" # Gradio interface iface = gr.Interface( fn=pdf_to_json, inputs=gr.Textbox(label="Enter PDF URL"), outputs="text", title="PDF to JSON Converter", description="Convert a PDF from a URL to JSON format." ) iface.launch(share=True)