diff --git "a/tts.ipynb" "b/tts.ipynb"
--- "a/tts.ipynb"
+++ "b/tts.ipynb"
@@ -9,52 +9,70 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 2,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/homebrew/Caskroom/miniconda/base/envs/llm/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+ " from .autonotebook import tqdm as notebook_tqdm\n"
+ ]
+ }
+ ],
"source": [
"import os\n",
"\n",
"import torch\n",
"import gradio as gr\n",
"from TTS.api import TTS\n",
- "os.environ[\"COQUI_TOS_AGREED\"] = \"1\""
+ "os.environ[\"COQUI_TOS_AGREED\"] = \"1\"\n",
+ "# os.environ[\"PYTORCH_ENABLE_MPS_FALLBACK\"] = \"1\""
]
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from collections import namedtuple\n",
"\n",
- "Voice = namedtuple('voice', ['name', 'neutral','sad','angry','happy'])\n"
+ "Voice = namedtuple('voice', ['name', 'neutral','angry'])\n"
]
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 84,
"metadata": {},
"outputs": [],
"source": [
"voices = [\n",
- " Voice('Rick', neutral='audio/rick/angry.mp3', sad=None, angry=None, happy=None),\n",
+ " Voice('Attenborough', neutral='audio/attenborough/neutral.wav', angry=None),\n",
+ " Voice('Rick', neutral='audio/rick/neutral.wav', angry=None),\n",
+ " Voice('Freeman', neutral='audio/freeman/neutral.wav', angry='audio/freeman/angry.wav'),\n",
+ " Voice('Walken', neutral='audio/walken/neutral.wav', angry=None),\n",
+ " Voice('Darth Wader', neutral='audio/darth/neutral.wav', angry=None),\n",
"]"
]
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "[voice(name='Rick', neutral='audio/rick/angry.mp3', sad=None, angry=None, happy=None)]"
+ "[voice(name='Attenborough', neutral='audio/attenborough/neutral.mp3', angry=None),\n",
+ " voice(name='Rick', neutral='audio/rick/neutral.mp3', angry=None),\n",
+ " voice(name='Freeman', neutral='audio/freeman/neutral.mp3', angry='audio/freeman/angry.mp3'),\n",
+ " voice(name='Walken', neutral='audio/walken/neutral.mp3', angry=None),\n",
+ " voice(name='Darth Wader', neutral='audio/darth/neutral.mp3', angry=None)]"
]
},
- "execution_count": 14,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -65,14 +83,32 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- " > tts_models/multilingual/multi-dataset/xtts_v1.1 is already downloaded.\n",
+ " > tts_models/multilingual/multi-dataset/xtts_v2 is already downloaded.\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/homebrew/Caskroom/miniconda/base/envs/llm/lib/python3.11/site-packages/transformers/utils/generic.py:441: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n",
+ " _torch_pytree._register_pytree_node(\n",
+ "/opt/homebrew/Caskroom/miniconda/base/envs/llm/lib/python3.11/site-packages/transformers/utils/generic.py:309: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n",
+ " _torch_pytree._register_pytree_node(\n",
+ "/opt/homebrew/Caskroom/miniconda/base/envs/llm/lib/python3.11/site-packages/transformers/utils/generic.py:309: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n",
+ " _torch_pytree._register_pytree_node(\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
" > Using model: xtts\n"
]
}
@@ -80,12 +116,58 @@
"source": [
"#load model for text to speech\n",
"device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
- "tts_pipelins = TTS(\"tts_models/multilingual/multi-dataset/xtts_v1.1\").to(device)"
+ "# device = \"mps\"\n",
+ "tts_pipelins = TTS(\"tts_models/multilingual/multi-dataset/xtts_v2\").to(device)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import IPython\n"
]
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 81,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "speaker_embedding_cache = {}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 82,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def compute_speaker_embedding(voice_path: str, config, pipeline, cache):\n",
+ " if voice_path not in cache:\n",
+ " cache[voice_path] = pipeline.synthesizer.tts_model.get_conditioning_latents(\n",
+ " audio_path=voice_path,\n",
+ " gpt_cond_len=config.gpt_cond_len,\n",
+ " gpt_cond_chunk_len=config.gpt_cond_chunk_len,\n",
+ " max_ref_length=config.max_ref_len,\n",
+ " sound_norm_refs=config.sound_norm_refs,\n",
+ " )\n",
+ " return cache[voice_path]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 87,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "out = compute_speaker_embedding(voices[0].neutral, tts_pipelins.synthesizer.tts_config, tts_pipelins, speaker_embedding_cache)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
"metadata": {},
"outputs": [
{
@@ -93,29 +175,297 @@
"output_type": "stream",
"text": [
" > Text splitted to sentences.\n",
- "[\"Hello, I am Rick, pickle rick, you took a wrong turn and now you're stuck in a parallel universe\"]\n",
- " > Processing time: 0.7903299331665039\n",
- " > Real-time factor: 0.11176741294459602\n"
+ "['Hey Petra, so you are hungry?', 'and you like me to prepare some strawberries for you?', 'do you like strawberries?']\n",
+ " > Processing time: 15.77448582649231\n",
+ " > Real-time factor: 1.7459813091024587\n"
]
- },
+ }
+ ],
+ "source": [
+ "out = tts_pipelins.tts(\n",
+ " \"Hello, I am Rick, pickle rick, you took a wrong turn and now you're stuck in a parallel universe\",\n",
+ " speaker_wav=\"audio/freeman/neutral.wav\",\n",
+ " language=\"en\",\n",
+ " # file_path=\"out.wav\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from typing import List\n",
+ "import time"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ref_audio_path = \"audio/freeman/neutral.wav\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "config.max_ref_len = 360"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 78,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "config = tts_pipelins.synthesizer.tts_config\n",
+ "(gpt_cond_latent, speaker_embedding) = tts_pipelins.synthesizer.tts_model.get_conditioning_latents(\n",
+ " audio_path=ref_audio_path,\n",
+ " gpt_cond_len=config.gpt_cond_len,\n",
+ " gpt_cond_chunk_len=config.gpt_cond_chunk_len,\n",
+ " max_ref_length=config.max_ref_len,\n",
+ " sound_norm_refs=config.sound_norm_refs,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 107,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "(gpt_cond_latent, speaker_embedding) = compute_speaker_embedding(voices[0].neutral, tts_pipelins.synthesizer.tts_config, tts_pipelins, speaker_embedding_cache)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 114,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 116,
+ "metadata": {},
+ "outputs": [
{
"data": {
"text/plain": [
- "'out.wav'"
+ "(205872,)"
]
},
- "execution_count": 27,
+ "execution_count": 116,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "tts_pipelins.tts_to_file(\"Hello, I am Rick, pickle rick, you took a wrong turn and now you're stuck in a parallel universe\", speaker_wav=\"audio/rick/neutral.wav\", emotion='neutral', language='en', file_path='out.wav')"
+ "np.array(out)"
]
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 110,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "205872"
+ ]
+ },
+ "execution_count": 110,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(out)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 128,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " > Text splitted to sentences.\n",
+ "['Something is up!']\n",
+ " > Processing time: 2.9515581130981445\n",
+ " > Real-time factor: 1.588292083019672\n"
+ ]
+ }
+ ],
+ "source": [
+ "out = tts(\n",
+ " tts_pipelins.synthesizer,\n",
+ " \"Something is up!\",\n",
+ " # speaker_wav=ref_audio_path,\n",
+ " language_name=\"en\",\n",
+ " speaker=None,\n",
+ " gpt_cond_latent=gpt_cond_latent,\n",
+ " speaker_embedding=speaker_embedding,\n",
+ " speed=1.1,\n",
+ " # file_path=\"out.wav\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 129,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 129,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "IPython.display.Audio(out, rate=22050)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from TTS.vocoder.utils.generic_utils import interpolate_vocoder_input\n",
+ "\n",
+ "def tts(\n",
+ " self,\n",
+ " text: str = \"\",\n",
+ " language_name: str = \"\",\n",
+ " reference_wav=None,\n",
+ " gpt_cond_latent=None,\n",
+ " speaker_embedding=None,\n",
+ " split_sentences: bool = True,\n",
+ " **kwargs,\n",
+ ") -> List[int]:\n",
+ " \"\"\"🐸 TTS magic. Run all the models and generate speech.\n",
+ "\n",
+ " Args:\n",
+ " text (str): input text.\n",
+ " speaker_name (str, optional): speaker id for multi-speaker models. Defaults to \"\".\n",
+ " language_name (str, optional): language id for multi-language models. Defaults to \"\".\n",
+ " speaker_wav (Union[str, List[str]], optional): path to the speaker wav for voice cloning. Defaults to None.\n",
+ " style_wav ([type], optional): style waveform for GST. Defaults to None.\n",
+ " style_text ([type], optional): transcription of style_wav for Capacitron. Defaults to None.\n",
+ " reference_wav ([type], optional): reference waveform for voice conversion. Defaults to None.\n",
+ " reference_speaker_name ([type], optional): speaker id of reference waveform. Defaults to None.\n",
+ " split_sentences (bool, optional): split the input text into sentences. Defaults to True.\n",
+ " **kwargs: additional arguments to pass to the TTS model.\n",
+ " Returns:\n",
+ " List[int]: [description]\n",
+ " \"\"\"\n",
+ " start_time = time.time()\n",
+ " wavs = []\n",
+ "\n",
+ " if not text and not reference_wav:\n",
+ " raise ValueError(\n",
+ " \"You need to define either `text` (for sythesis) or a `reference_wav` (for voice conversion) to use the Coqui TTS API.\"\n",
+ " )\n",
+ "\n",
+ " if text:\n",
+ " sens = [text]\n",
+ " if split_sentences:\n",
+ " print(\" > Text splitted to sentences.\")\n",
+ " sens = self.split_into_sentences(text)\n",
+ " print(sens)\n",
+ "\n",
+ " if not reference_wav: # not voice conversion\n",
+ " for sen in sens:\n",
+ " outputs = self.tts_model.inference(\n",
+ " sen,\n",
+ " language_name,\n",
+ " gpt_cond_latent,\n",
+ " speaker_embedding,\n",
+ " # GPT inference\n",
+ " temperature=0.75,\n",
+ " length_penalty=1.0,\n",
+ " repetition_penalty=10.0,\n",
+ " top_k=50,\n",
+ " top_p=0.85,\n",
+ " do_sample=True,\n",
+ " **kwargs,\n",
+ " )\n",
+ " waveform = outputs[\"wav\"]\n",
+ " if torch.is_tensor(waveform) and waveform.device != torch.device(\"cpu\") and not use_gl:\n",
+ " waveform = waveform.cpu()\n",
+ " if not use_gl:\n",
+ " waveform = waveform.numpy()\n",
+ " waveform = waveform.squeeze()\n",
+ "\n",
+ " # trim silence\n",
+ " if \"do_trim_silence\" in self.tts_config.audio and self.tts_config.audio[\"do_trim_silence\"]:\n",
+ " waveform = trim_silence(waveform, self.tts_model.ap)\n",
+ "\n",
+ " wavs += list(waveform)\n",
+ " wavs += [0] * 10000\n",
+ "\n",
+ "\n",
+ " # compute stats\n",
+ " process_time = time.time() - start_time\n",
+ " audio_time = len(wavs) / self.tts_config.audio[\"sample_rate\"]\n",
+ " print(f\" > Processing time: {process_time}\")\n",
+ " print(f\" > Real-time factor: {process_time / audio_time}\")\n",
+ " return wavs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "type(tts_pipelins)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "IPython.display.Audio(out, rate=22050)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -141,34 +491,99 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 90,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "voice_options = []\n",
+ "for voice in voices:\n",
+ " if voice.neutral:\n",
+ " voice_options.append(f\"{voice.name} - Neutral\")\n",
+ " if voice.angry:\n",
+ " voice_options.append(f\"{voice.name} - Angry\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 101,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def voice_from_text(voice):\n",
+ " for v in voices:\n",
+ " if voice == f\"{v.name} - Neutral\":\n",
+ " return v.neutral\n",
+ " if voice == f\"{v.name} - Angry\":\n",
+ " return v.angry"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 121,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def tts_gradio(text, voice, state):\n",
+ " print(text, voice, state)\n",
+ " voice_path = voice_from_text(voice)\n",
+ " (gpt_cond_latent, speaker_embedding) = compute_speaker_embedding(voice_path, tts_pipelins.synthesizer.tts_config, tts_pipelins, speaker_embedding_cache)\n",
+ " out = tts(\n",
+ " tts_pipelins.synthesizer,\n",
+ " text,\n",
+ " language_name=\"en\",\n",
+ " speaker=None,\n",
+ " gpt_cond_latent=gpt_cond_latent,\n",
+ " speaker_embedding=speaker_embedding,\n",
+ " speed=1.1,\n",
+ " # file_path=\"out.wav\",\n",
+ " )\n",
+ " return (22050, np.array(out)), dict(text=text, voice=voice)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 122,
"metadata": {},
"outputs": [
{
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/opt/conda/lib/python3.10/site-packages/gradio/utils.py:924: UserWarning: Expected 1 arguments for function , received 3.\n",
- " warnings.warn(\n",
- "/opt/conda/lib/python3.10/site-packages/gradio/utils.py:932: UserWarning: Expected maximum 1 arguments for function , received 3.\n",
- " warnings.warn(\n"
- ]
- },
+ "data": {
+ "text/plain": [
+ "dict_keys(['audio/attenborough/neutral.wav'])"
+ ]
+ },
+ "execution_count": 122,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "speaker_embedding_cache.keys()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 127,
+ "metadata": {},
+ "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "\n",
+ "This is going to be fun, let's enjoy ourselves\n",
+ "Closing server running on port: 7860\n",
+ "Closing server running on port: 7860\n",
+ "Closing server running on port: 7860\n",
+ "Closing server running on port: 7860\n",
+ "Closing server running on port: 7860\n",
+ "Closing server running on port: 7860\n",
+ "Closing server running on port: 7860\n",
+ "Closing server running on port: 7860\n",
+ "Closing server running on port: 7860\n",
+ "Closing server running on port: 7860\n",
+ "Closing server running on port: 7860\n",
"Running on local URL: http://0.0.0.0:7860\n",
"\n",
- "Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2024/04/18 13:48:05 [W] [service.go:132] login to server failed: dial tcp 44.237.78.176:7000: i/o timeout\n"
+ "To create a public link, set `share=True` in `launch()`.\n"
]
},
{
@@ -183,43 +598,42 @@
"metadata": {},
"output_type": "display_data"
},
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "This is going to be fun, let's enjoy ourselves Darth Wader - Neutral None\n",
+ " > Text splitted to sentences.\n",
+ "[\"This is going to be fun, let's enjoy ourselves\"]\n",
+ " > Processing time: 9.152068138122559\n",
+ " > Real-time factor: 1.8119083325456329\n"
+ ]
+ },
{
"name": "stderr",
"output_type": "stream",
"text": [
- "Traceback (most recent call last):\n",
- " File \"/opt/conda/lib/python3.10/site-packages/gradio/queueing.py\", line 527, in process_events\n",
- " response = await route_utils.call_process_api(\n",
- " File \"/opt/conda/lib/python3.10/site-packages/gradio/route_utils.py\", line 261, in call_process_api\n",
- " output = await app.get_blocks().process_api(\n",
- " File \"/opt/conda/lib/python3.10/site-packages/gradio/blocks.py\", line 1786, in process_api\n",
- " result = await self.call_function(\n",
- " File \"/opt/conda/lib/python3.10/site-packages/gradio/blocks.py\", line 1338, in call_function\n",
- " prediction = await anyio.to_thread.run_sync(\n",
- " File \"/opt/conda/lib/python3.10/site-packages/anyio/to_thread.py\", line 56, in run_sync\n",
- " return await get_async_backend().run_sync_in_worker_thread(\n",
- " File \"/opt/conda/lib/python3.10/site-packages/anyio/_backends/_asyncio.py\", line 2144, in run_sync_in_worker_thread\n",
- " return await future\n",
- " File \"/opt/conda/lib/python3.10/site-packages/anyio/_backends/_asyncio.py\", line 851, in run\n",
- " result = context.run(func, *args)\n",
- " File \"/opt/conda/lib/python3.10/site-packages/gradio/utils.py\", line 759, in wrapper\n",
- " response = f(*args, **kwargs)\n",
- "TypeError: tts() takes 1 positional argument but 3 were given\n"
+ "/opt/homebrew/Caskroom/miniconda/base/envs/llm/lib/python3.11/site-packages/gradio/processing_utils.py:390: UserWarning: Trying to convert audio automatically from float64 to 16-bit int format.\n",
+ " warnings.warn(warning.format(data.dtype))\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Keyboard interruption in main thread... closing server.\n",
- "Killing tunnel 0.0.0.0:7860 <> None\n"
+ "This is going to be fun, let's enjoy ourselves Darth Wader - Neutral {'text': \"This is going to be fun, let's enjoy ourselves\", 'voice': 'Darth Wader - Neutral'}\n",
+ " > Text splitted to sentences.\n",
+ "[\"This is going to be fun, let's enjoy ourselves\"]\n",
+ " > Processing time: 7.824646234512329\n",
+ " > Real-time factor: 1.8261372721316347\n",
+ "Keyboard interruption in main thread... closing server.\n"
]
},
{
"data": {
"text/plain": []
},
- "execution_count": 8,
+ "execution_count": 127,
"metadata": {},
"output_type": "execute_result"
}
@@ -232,28 +646,29 @@
"\n",
"\n",
"model_answer= ''\n",
- "general_context= ''\n",
+ "general_context= \"This is going to be fun, let's enjoy ourselves\"\n",
"# Define the initial state with some initial context.\n",
"print(general_context)\n",
"initial_state = {'context': general_context}\n",
"initial_context= initial_state['context']\n",
"# Create the Gradio interface.\n",
"iface = gr.Interface(\n",
- " fn=tts,\n",
+ " fn=tts_gradio,\n",
" inputs=[\n",
- " gr.Textbox(value=initial_context, visible=True),\n",
- " gr.Radio(choices=[x.name for x in voices], label='Choose a voice', value=voices[0].name, show_label=True), # Radio button for voice selection\n",
+ " gr.Textbox(value=initial_context, visible=True, label='Enter the text to be converted to speech', placeholder=\"This is going to be fun, let's enjoy ourselves\", lines=5),\n",
+ " gr.Radio(choices=voice_options, label='Choose a voice', value=voice_options[0], show_label=True), # Radio button for voice selection\n",
" gr.State() # This will keep track of the context state across interactions.\n",
" ],\n",
" outputs=[\n",
- " gr.Audio(label = 'output audio'),\n",
+ " gr.Audio(label = 'output audio', autoplay=True),\n",
" gr.State()\n",
- " ]\n",
+ " ],\n",
+ " flagging_options=['👎', '👍'],\n",
")\n",
"#close all interfaces open to make the port available\n",
"gr.close_all()\n",
"# Launch the interface.\n",
- "iface.launch(debug=True, share=True, server_name=\"0.0.0.0\", server_port=7860, ssl_verify=False)"
+ "iface.launch(debug=True, share=False, server_name=\"0.0.0.0\", server_port=7860, ssl_verify=False)"
]
},
{
@@ -280,7 +695,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.13"
+ "version": "3.11.8"
}
},
"nbformat": 4,