Spaces:
Sleeping
Sleeping
File size: 4,778 Bytes
73683aa 0cc1374 a73ec05 73683aa a73ec05 73683aa a73ec05 b07522c 73683aa b07522c 73683aa a73ec05 2dbedf0 a73ec05 a011e6d a73ec05 a011e6d 73683aa a73ec05 73683aa a73ec05 73683aa a73ec05 73683aa a73ec05 73683aa a73ec05 73683aa a73ec05 73683aa 0a681f9 0cc1374 a232e1e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
# app.py
import os
import gradio as gr
import logging
import tempfile
from gradio_pdf import PDF
from config import config
from model import model_initialized
from pdf_processor import to_pdf, to_markdown
from tts import text_to_speech_openai, text_to_speech_gtts
# Set up logging
logging.basicConfig(level=logging.INFO)
# Load header HTML content
with open("header.html", "r", encoding="utf-8") as file:
header = file.read()
# Define language options (could also be moved to config.yaml)
latin_lang = ['af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga', 'hr',
'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms', 'mt', 'nl',
'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk', 'sl', 'sq', 'sv',
'sw', 'tl', 'tr', 'uz', 'vi', 'french', 'german']
arabic_lang = ['ar', 'fa', 'ug', 'ur']
cyrillic_lang = ['ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd', 'ava',
'dar', 'inh', 'che', 'lbe', 'lez', 'tab']
devanagari_lang = ['hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom',
'sa', 'bgc']
other_lang = ['ch', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka']
all_lang = ['', 'auto'] + other_lang + latin_lang + arabic_lang + cyrillic_lang + devanagari_lang
# Define a function to convert a file to a PDF (if not already)
def file_to_pdf(file_obj):
if file_obj is not None:
return to_pdf(file_obj.name)
return None
# Define a function to handle TTS using OpenAI (with fallback)
def read_text(text, language="en"):
"""
Attempts to synthesize speech from text using OpenAI TTS,
falling back to gTTS if an error occurs.
"""
try:
text_to_speech_openai(text, language)
except Exception as e:
logging.error("OpenAI TTS failed: %s. Falling back to gTTS.", e)
text_to_speech_gtts(text, language)
return "Audio played successfully"
# Set up the Gradio Blocks interface
with gr.Blocks() as demo:
gr.HTML(header)
with gr.Row():
with gr.Column(variant='panel', scale=5):
file_input = gr.File(label="Please upload a PDF or image", file_types=[".pdf", ".png", ".jpeg", ".jpg"])
max_pages = gr.Slider(1, 20, config.get("max_pages_default", 10), step=1, label='Max convert pages')
with gr.Row():
layout_mode = gr.Dropdown(
["layoutlmv3", "doclayout_yolo"],
label="Layout model",
value=config.get("layout_model_default", "doclayout_yolo")
)
language = gr.Dropdown(
all_lang,
label="Language",
value=config.get("language_default", "auto")
)
with gr.Row():
formula_enable = gr.Checkbox(label="Enable formula recognition", value=True)
is_ocr = gr.Checkbox(label="Force enable OCR", value=False)
table_enable = gr.Checkbox(label="Enable table recognition", value=True)
with gr.Row():
convert_button = gr.Button("Convert")
clear_button = gr.ClearButton(value="Clear")
pdf_display = PDF(label='PDF preview', interactive=False, visible=True, height=800)
with gr.Accordion("Examples:"):
example_root = os.path.join(os.path.dirname(__file__), "examples")
examples = [os.path.join(example_root, f) for f in os.listdir(example_root) if f.endswith("pdf")]
gr.Examples(examples=examples, inputs=file_input)
with gr.Column(variant='panel', scale=5):
output_file = gr.File(label="Convert result", interactive=False)
with gr.Tabs():
with gr.Tab("Markdown rendering"):
md_render = gr.Markdown(label="Markdown rendering", height=1100, show_copy_button=True, line_breaks=True)
with gr.Tab("Markdown text"):
md_text = gr.TextArea(lines=45, show_copy_button=True)
# TTS components
read_button = gr.Button("Read Out Loud")
read_status = gr.Textbox(label="TTS Status")
# Define interactions
file_input.change(fn=file_to_pdf, inputs=file_input, outputs=pdf_display)
convert_button.click(
fn=to_markdown,
inputs=[file_input, max_pages, is_ocr, layout_mode, formula_enable, table_enable, language],
outputs=[md_render, md_text, output_file, pdf_display]
)
read_button.click(fn=read_text, inputs=[md_text, language], outputs=read_status)
clear_button.add([file_input, md_render, pdf_display, md_text, output_file, is_ocr])
if __name__ == "__main__":
demo.launch(ssr_mode=True)
|