|
import json |
|
import requests |
|
from docling.document_converter import DocumentConverter |
|
import gradio as gr |
|
import tempfile |
|
|
|
def pdf_to_json(url): |
|
|
|
try: |
|
response = requests.get(url) |
|
response.raise_for_status() |
|
except requests.exceptions.RequestException as e: |
|
return f"Error downloading PDF: {e}" |
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf: |
|
temp_pdf.write(response.content) |
|
temp_pdf_path = temp_pdf.name |
|
|
|
|
|
converter = DocumentConverter() |
|
try: |
|
|
|
result = converter.convert(temp_pdf_path) |
|
|
|
|
|
document_attributes = dir(result.document) |
|
|
|
|
|
return f"Document attributes: {document_attributes}" |
|
|
|
except Exception as e: |
|
return f"Error processing PDF: {e}" |
|
|
|
|
|
iface = gr.Interface( |
|
fn=pdf_to_json, |
|
inputs=gr.Textbox(label="Enter PDF URL"), |
|
outputs="text", |
|
title="PDF to JSON Converter", |
|
description="Convert a PDF from a URL to JSON format." |
|
) |
|
|
|
iface.launch(share=True) |
|
|