Spaces:
Sleeping
Sleeping
import gradio as gr | |
import asr | |
import tts | |
import util | |
mms_transcribe = gr.Interface( | |
fn=asr.transcribe, | |
inputs=[ | |
gr.Audio( | |
label="Record or Upload Uyghur Audio", | |
sources=["microphone", "upload"], | |
type="filepath", | |
), | |
gr.Dropdown( | |
choices=[model for model in asr.models_info], | |
label="Select a Model", | |
value="Ixxan-FineTuned-MMS", | |
interactive=True | |
), | |
], | |
outputs=[ | |
gr.Textbox(label="Uyghur Arabic Transcription"), | |
gr.Textbox(label="Uyghur Latin Transcription"), | |
], | |
examples=util.asr_examples, | |
description=( | |
""" | |
Transcribe Uyghur audio from a microphone or input file. | |
Cilck on examples below for sample usage. | |
Please keep the audio length under 10 seconds for faster processing since this space is running on CPU basic. | |
""" | |
), | |
article=util.asr_notes, | |
allow_flagging="never", | |
) | |
mms_synthesize = gr.Interface( | |
fn=tts.synthesize, | |
inputs=[ | |
gr.Text(label="Input text"), | |
gr.Dropdown( | |
choices=[model for model in tts.models_info], | |
label="Select a Model", | |
value="Ixxan-FineTuned-MMS", | |
interactive=True | |
) | |
], | |
outputs=[ | |
gr.Audio(label="Generated Audio"), | |
], | |
examples=util.tts_examples, | |
description=( | |
""" | |
Generate audio from input Uyghur text. | |
Cilck on examples below for sample usage. | |
Please keep the input text length under 200 characters for faster processing since this space is running on CPU basic. | |
""" | |
), | |
article=util.tts_notes, | |
allow_flagging="never", | |
) | |
tabbed_interface = gr.TabbedInterface( | |
[mms_transcribe, mms_synthesize], | |
["Speech-To-Text", "Text-To-Speech"], | |
) | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
""" | |
<h1 style="text-align: center; font-size: 28px; color: #4A90E2;"> | |
Uyghur Speech-To-Text (STT) and Text-To-Speech (TTS) Models | |
</h1> | |
<p style="text-align: center; font-size: 16px; color: #555;"> | |
Comparisons of existing and fine-tuned speech models for transcribing and synthesizing Uyghur speech. | |
</p> | |
To learn more about Uyghur Speech Technology, please check out my [blog post](https://ixxan.github.io/blog/low-resource-speech-uyghur). | |
To see the model fine-tuning code, please visit my [GitHub repository](https://github.com/ixxan/ug-speech). | |
""" | |
) | |
tabbed_interface.render() | |
if __name__ == "__main__": | |
demo.queue(default_concurrency_limit = 2, max_size=20) # <-- Sets up a queue with default parameters | |
demo.launch() |