File size: 2,553 Bytes
4b0678e
5f1077a
c577758
 
 
 
 
 
 
 
 
 
d9c1e67
5f1077a
 
 
 
 
 
 
 
 
 
d99955f
c577758
5d2e8ec
d99955f
c577758
5f1077a
 
ba611cd
 
 
 
 
 
 
0772fb4
 
 
 
 
 
59e60e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d9c1e67
 
 
 
 
 
c577758
 
5f1077a
c577758
 
 
 
 
 
 
 
 
a0e320b
c577758
59e60e9
 
 
5d2e8ec
 
ba611cd
0772fb4
d9c1e67
59e60e9
 
5f1077a
 
fac9a75
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import gradio as gr

from base_utils import (
    convert_pdf_to_image,
    extract_text_from_pdf,
    convert_doc_to_text,
    extract_text_from_docx,
    extract_text_from_ppt,
    extract_text_from_pptx,
    sanitize_list_of_lists,
    parse_url,
)

pdf_to_img = gr.Interface(
    convert_pdf_to_image, gr.File(), gr.Gallery(), api_name="pdf_to_img"
)
pdf_to_text = gr.Interface(
    extract_text_from_pdf,
    gr.File(),
    gr.Textbox(placeholder="Extracted text will appear here"),
    api_name="pdf_to_text",
)

doc_to_text = gr.Interface(
    convert_doc_to_text, gr.File(), gr.Textbox(), api_name="doc_to_text"
)
docx_to_text = gr.Interface(
    extract_text_from_docx, gr.File(), gr.Textbox(), api_name="docx_to_text"
)

ppt_to_text = gr.Interface(
    extract_text_from_ppt,
    gr.File(),
    gr.Textbox(),
    api_name="ppt_to_text",
)

pptx_to_text = gr.Interface(
    extract_text_from_pptx,
    gr.File(),
    gr.Textbox(),
    api_name="pptx_to_text",
)
str_to_json = gr.Interface(
    sanitize_list_of_lists,
    gr.Text(),
    gr.JSON(),
    api_name="str_to_json",
    examples=[
        """[
  ["What year was the Carthaginian Empire founded?", "Around 814 BCE"],
  ["Where was the center of the Carthaginian Empire located?", "Carthage, near present-day Tunis, Tunisia"],
  ["Which powerful ancient republic did Carthage have conflicts with?", "The Roman Republic"],
  ["Fill in the blank: Hannibal famously crossed the ________ with war elephants.", "Alps"],
  ["What were the series of conflicts between Carthage and Rome called?", "The Punic Wars"],
  ["Multiple Choice: What was a significant military advantage of Carthage? A) Strong infantry, B) Powerful navy, C) Fortified cities", "B) Powerful navy"],
  ["In what year was Carthage captured and destroyed by Rome?", "146 BCE"],
  ["What did Carthage excel in that allowed it to amass wealth?", "Maritime trade"]
]"""
    ],
)

url_parser = gr.Interface(
    parse_url,
    inputs=["text"],
    outputs=["text"],
    api_name="url_to_text",
)


demo = gr.TabbedInterface(
    [
        pdf_to_img,
        pdf_to_text,
        doc_to_text,
        docx_to_text,
        ppt_to_text,
        pptx_to_text,
        url_parser,
        str_to_json,
        # rmbg,
    ],
    [
        "PDF to Image",
        "Extract PDF Text",
        "Extract DOC Text",
        "Extract DOCX Text",
        "Extract PPT Text",
        "Extract PPTX Text",
        "Extract text from URL",
        "Extract Json",
    ],
)

demo.launch(server_name="0.0.0.0.", server_port=7860, debug=True)