utils / app.py
not-lain's picture
add background removal & restructure code into multiple files
c577758
raw
history blame
2.73 kB
import gradio as gr
from base_utils import (
convert_pdf_to_image,
extract_text_from_pdf,
convert_doc_to_text,
extract_text_from_docx,
extract_text_from_ppt,
extract_text_from_pptx,
sanitize_list_of_lists,
parse_url,
)
from background_removal import remove_bg
pdf_to_img = gr.Interface(
convert_pdf_to_image, gr.File(), gr.Gallery(), api_name="pdf_to_img"
)
pdf_to_text = gr.Interface(
extract_text_from_pdf,
gr.File(),
gr.Textbox(placeholder="Extracted text will appear here"),
api_name="pdf_to_text",
)
doc_to_text = gr.Interface(
convert_doc_to_text, gr.File(), gr.Textbox(), api_name="doc_to_text"
)
docx_to_text = gr.Interface(
extract_text_from_docx, gr.File(), gr.Textbox(), api_name="docx_to_text"
)
ppt_to_text = gr.Interface(
extract_text_from_ppt,
gr.File(),
gr.Textbox(),
api_name="ppt_to_text",
)
pptx_to_text = gr.Interface(
extract_text_from_pptx,
gr.File(),
gr.Textbox(),
api_name="pptx_to_text",
)
str_to_json = gr.Interface(
sanitize_list_of_lists,
gr.Text(),
gr.JSON(),
api_name="str_to_json",
examples=[
"""[
["What year was the Carthaginian Empire founded?", "Around 814 BCE"],
["Where was the center of the Carthaginian Empire located?", "Carthage, near present-day Tunis, Tunisia"],
["Which powerful ancient republic did Carthage have conflicts with?", "The Roman Republic"],
["Fill in the blank: Hannibal famously crossed the ________ with war elephants.", "Alps"],
["What were the series of conflicts between Carthage and Rome called?", "The Punic Wars"],
["Multiple Choice: What was a significant military advantage of Carthage? A) Strong infantry, B) Powerful navy, C) Fortified cities", "B) Powerful navy"],
["In what year was Carthage captured and destroyed by Rome?", "146 BCE"],
["What did Carthage excel in that allowed it to amass wealth?", "Maritime trade"]
]"""
],
)
url_parser = gr.Interface(
parse_url,
inputs=["text"],
outputs=["text"],
api_name="url_to_text",
)
rmbg = gr.Interface(
remove_bg,
inputs=["image"],
outputs=["image"],
api_name="rmbg",
)
demo = gr.TabbedInterface(
[
pdf_to_img,
pdf_to_text,
doc_to_text,
docx_to_text,
ppt_to_text,
pptx_to_text,
url_parser,
str_to_json,
rmbg,
],
[
"PDF to Image",
"Extract PDF Text",
"Extract DOC Text",
"Extract DOCX Text",
"Extract PPT Text",
"Extract PPTX Text",
"Extract text from URL",
"Extract Json",
"Remove Background",
],
)
demo.launch(server_name="0.0.0.0.", server_port=7860, debug=True)