PDF_reader

Sleeping

App Files Files Community

PDF_reader / app.py

Echo9k

Added TTS

a73ec05 3 months ago

raw

history blame

4.78 kB

	# app.py
	import os
	import gradio as gr
	import logging
	import tempfile
	from gradio_pdf import PDF
	from config import config
	from model import model_initialized
	from pdf_processor import to_pdf, to_markdown
	from tts import text_to_speech_openai, text_to_speech_gtts

	# Set up logging
	logging.basicConfig(level=logging.INFO)

	# Load header HTML content
	with open("header.html", "r", encoding="utf-8") as file:
	header = file.read()

	# Define language options (could also be moved to config.yaml)
	latin_lang = ['af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga', 'hr',
	'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms', 'mt', 'nl',
	'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk', 'sl', 'sq', 'sv',
	'sw', 'tl', 'tr', 'uz', 'vi', 'french', 'german']
	arabic_lang = ['ar', 'fa', 'ug', 'ur']
	cyrillic_lang = ['ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd', 'ava',
	'dar', 'inh', 'che', 'lbe', 'lez', 'tab']
	devanagari_lang = ['hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom',
	'sa', 'bgc']
	other_lang = ['ch', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka']

	all_lang = ['', 'auto'] + other_lang + latin_lang + arabic_lang + cyrillic_lang + devanagari_lang

	# Define a function to convert a file to a PDF (if not already)
	def file_to_pdf(file_obj):
	if file_obj is not None:
	return to_pdf(file_obj.name)
	return None

	# Define a function to handle TTS using OpenAI (with fallback)
	def read_text(text, language="en"):
	"""
	Attempts to synthesize speech from text using OpenAI TTS,
	falling back to gTTS if an error occurs.
	"""
	try:
	text_to_speech_openai(text, language)
	except Exception as e:
	logging.error("OpenAI TTS failed: %s. Falling back to gTTS.", e)
	text_to_speech_gtts(text, language)
	return "Audio played successfully"

	# Set up the Gradio Blocks interface
	with gr.Blocks() as demo:
	gr.HTML(header)
	with gr.Row():
	with gr.Column(variant='panel', scale=5):
	file_input = gr.File(label="Please upload a PDF or image", file_types=[".pdf", ".png", ".jpeg", ".jpg"])
	max_pages = gr.Slider(1, 20, config.get("max_pages_default", 10), step=1, label='Max convert pages')
	with gr.Row():
	layout_mode = gr.Dropdown(
	["layoutlmv3", "doclayout_yolo"],
	label="Layout model",
	value=config.get("layout_model_default", "doclayout_yolo")
	)
	language = gr.Dropdown(
	all_lang,
	label="Language",
	value=config.get("language_default", "auto")
	)
	with gr.Row():
	formula_enable = gr.Checkbox(label="Enable formula recognition", value=True)
	is_ocr = gr.Checkbox(label="Force enable OCR", value=False)
	table_enable = gr.Checkbox(label="Enable table recognition", value=True)
	with gr.Row():
	convert_button = gr.Button("Convert")
	clear_button = gr.ClearButton(value="Clear")
	pdf_display = PDF(label='PDF preview', interactive=False, visible=True, height=800)
	with gr.Accordion("Examples:"):
	example_root = os.path.join(os.path.dirname(__file__), "examples")
	examples = [os.path.join(example_root, f) for f in os.listdir(example_root) if f.endswith("pdf")]
	gr.Examples(examples=examples, inputs=file_input)
	with gr.Column(variant='panel', scale=5):
	output_file = gr.File(label="Convert result", interactive=False)
	with gr.Tabs():
	with gr.Tab("Markdown rendering"):
	md_render = gr.Markdown(label="Markdown rendering", height=1100, show_copy_button=True, line_breaks=True)
	with gr.Tab("Markdown text"):
	md_text = gr.TextArea(lines=45, show_copy_button=True)
	# TTS components
	read_button = gr.Button("Read Out Loud")
	read_status = gr.Textbox(label="TTS Status")

	# Define interactions
	file_input.change(fn=file_to_pdf, inputs=file_input, outputs=pdf_display)

	convert_button.click(
	fn=to_markdown,
	inputs=[file_input, max_pages, is_ocr, layout_mode, formula_enable, table_enable, language],
	outputs=[md_render, md_text, output_file, pdf_display]
	)

	read_button.click(fn=read_text, inputs=[md_text, language], outputs=read_status)

	clear_button.add([file_input, md_render, pdf_display, md_text, output_file, is_ocr])

	if __name__ == "__main__":
	demo.launch(ssr_mode=True)