import gradio as gr from gradio_client import Client DEBUG_MODE = True SAS_SWITCH = True ''' Function to get the speech from the text @params: text: str: The text to be converted to speech @params: voice: str: The voice to be used for the speech @return: result: str: The speech from the text ''' def get_speech(text, voice): ''' For now we are using external space to get the result. In future we will use our own model to get be more independent ''' client = Client("https://collabora-whisperspeech.hf.space/") result = client.predict( # str in 'Enter multilingual text📝' Textbox component text, # filepath in 'Upload or Record Speaker Audio (optional)🌬️💬' Audio component voice, "", # str in 'alternatively, you can paste in an audio file URL:' Textbox component 14, # float (numeric value between 10 and 15) in 'Tempo (in characters per second)' Slider component api_name="/whisper_speech_demo" ) if DEBUG_MODE: print(result) return result ''' ''' def generate_audio(pipe, segments, speaker, speaker_url, cps=14): # - If the speaker is a string and is a file path # then we will extract the speaker embedding # from the file # - else if the speaker_url is provided then we # will extract the speaker embedding from the url # - else we will use the default speaker if isinstance(speaker, (str, Path)): speaker = pipe.extract_spk_emb(speaker) elif speaker_url: speaker = pipe.extract_spk_emb(speaker_url) else: speaker = pipe.default_speaker langs, texts = [list(x) for x in zip(*segments)] print(texts, langs) stoks = pipe.t2s.generate(texts, cps=cps, lang=langs) stoks = stoks[stoks!=512] atoks = pipe.s2a.generate(stoks, speaker.unsqueeze(0)) audio = pipe.vocoder.decode(atoks) return audio.cpu() with gr.Blocks() as demo: with gr.Row(): text_input = gr.Textbox(label="Enter multilingual text📝") cps = gr.Slider(value=14, minimum=10, maximum=15, step=.25, label="Speed (in characters per second)") with gr.Row(equal_height=True): speaker_input = gr.Audio(label="Upload or Record Speaker Audio (optional)🌬️💬", sources=["upload", "microphone"], type='filepath') url_input = gr.Textbox(label="alternatively, you can paste in an audio file URL:") gr.Markdown(" \n ") # fixes the bottom overflow from Audio generate_button = gr.Button("Try Collabora's WhisperSpeech🌟") with gr.Column(scale=1): output_audio = gr.Audio(label="WhisperSpeech says…") demo.launch(server_port=46007)