dindizz commited on
Commit
adab60a
1 Parent(s): 4c4dd04

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -0
app.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from docling.document_converter import DocumentConverter
3
+ import gradio as gr
4
+
5
+ def pdf_to_json(source):
6
+ converter = DocumentConverter()
7
+ result = converter.convert(source)
8
+
9
+ try:
10
+ json_output = result.document.export_to_json()
11
+ except AttributeError:
12
+ content = {
13
+ "title": result.document.title if hasattr(result.document, 'title') else "Untitled",
14
+ "sections": [section.text for section in result.document.sections]
15
+ }
16
+ json_output = json.dumps(content, indent=2)
17
+
18
+ return json_output
19
+
20
+ # Gradio interface
21
+ iface = gr.Interface(
22
+ fn=pdf_to_json,
23
+ inputs=gr.Textbox(label="PDF URL"),
24
+ outputs="text",
25
+ title="PDF to JSON Converter"
26
+ )
27
+
28
+ iface.launch()