Spaces:
Runtime error
Runtime error
import os | |
import scipy | |
import nltk | |
import tempfile | |
import numpy as np | |
from bark.generation import preload_models, SAMPLE_RATE | |
from bark import generate_audio | |
from scipy.io import wavfile | |
import gradio as gr | |
nltk.download('punkt') | |
os.environ["CUDA_VISIBLE_DEVICES"] = "0" | |
preload_models() | |
def generate_audio_from_text(text,language_prompt,speaker_prompt): | |
if language_prompt == "english": | |
if speaker_prompt=="speaker 1": | |
history_prompt = "v2/en_speaker_0" | |
elif speaker_prompt=="speaker 2": | |
history_prompt = "v2/en_speaker_1" | |
elif speaker_prompt=="speaker 3": | |
history_prompt = "v2/en_speaker_2" | |
elif speaker_prompt=="speaker 4": | |
history_prompt = "v2/en_speaker_3" | |
elif speaker_prompt=="speaker 5": | |
history_prompt = "v2/en_speaker_4" | |
elif speaker_prompt=="speaker 6": | |
history_prompt = "v2/en_speaker_5" | |
elif speaker_prompt=="speaker 7": | |
history_prompt = "v2/en_speaker_6" | |
elif speaker_prompt=="speaker 8": | |
history_prompt = "v2/en_speaker_7" | |
elif speaker_prompt=="speaker 9": | |
history_prompt = "v2/en_speaker_8" | |
elif speaker_prompt=="speaker 10": | |
history_prompt = "v2/en_speaker_9" | |
else: | |
history_prompt = "v2/en_speaker_9" | |
elif language_prompt == "french": | |
if speaker_prompt=="speaker 1": | |
history_prompt = "v2/fr_speaker_0" | |
elif speaker_prompt=="speaker 2": | |
history_prompt = "v2/fr_speaker_1" | |
elif speaker_prompt=="speaker 3": | |
history_prompt = "v2/fr_speaker_2" | |
elif speaker_prompt=="speaker 4": | |
history_prompt = "v2/fr_speaker_3" | |
elif speaker_prompt=="speaker 5": | |
history_prompt = "v2/fr_speaker_4" | |
elif speaker_prompt=="speaker 6": | |
history_prompt = "v2/fr_speaker_5" | |
elif speaker_prompt=="speaker 7": | |
history_prompt = "v2/fr_speaker_6" | |
elif speaker_prompt=="speaker 8": | |
history_prompt = "v2/fr_speaker_7" | |
elif speaker_prompt=="speaker 9": | |
history_prompt = "v2/fr_speaker_8" | |
elif speaker_prompt=="speaker 10": | |
history_prompt = "v2/fr_speaker_9" | |
else: | |
history_prompt = "v2/fr_speaker_9" | |
elif language_prompt =="german": | |
if speaker_prompt=="speaker 1": | |
history_prompt = "v2/de_speaker_0" | |
elif speaker_prompt=="speaker 2": | |
history_prompt = "v2/de_speaker_1" | |
elif speaker_prompt=="speaker 3": | |
history_prompt = "v2/de_speaker_2" | |
elif speaker_prompt=="speaker 4": | |
history_prompt = "v2/de_speaker_3" | |
elif speaker_prompt=="speaker 5": | |
history_prompt = "v2/de_speaker_4" | |
elif speaker_prompt=="speaker 6": | |
history_prompt = "v2/de_speaker_5" | |
elif speaker_prompt=="speaker 7": | |
history_prompt = "v2/de_speaker_6" | |
elif speaker_prompt=="speaker 8": | |
history_prompt = "v2/de_speaker_7" | |
elif speaker_prompt=="speaker 9": | |
history_prompt = "v2/de_speaker_8" | |
elif speaker_prompt=="speaker 10": | |
history_prompt = "v2/de_speaker_9" | |
else: | |
history_prompt = "v2/de_speaker_9" | |
elif language_prompt =="hindi": | |
if speaker_prompt=="speaker 1": | |
history_prompt = "v2/hi_speaker_0" | |
elif speaker_prompt=="speaker 2": | |
history_prompt = "v2/hi_speaker_1" | |
elif speaker_prompt=="speaker 3": | |
history_prompt = "v2/hi_speaker_2" | |
elif speaker_prompt=="speaker 4": | |
history_prompt = "v2/hi_speaker_3" | |
elif speaker_prompt=="speaker 5": | |
history_prompt = "v2/hi_speaker_4" | |
elif speaker_prompt=="speaker 6": | |
history_prompt = "v2/hi_speaker_5" | |
elif speaker_prompt=="speaker 7": | |
history_prompt = "v2/hi_speaker_6" | |
elif speaker_prompt=="speaker 8": | |
history_prompt = "v2/hi_speaker_7" | |
elif speaker_prompt=="speaker 9": | |
history_prompt = "v2/hi_speaker_8" | |
elif speaker_prompt=="speaker 10": | |
history_prompt = "v2/hi_speaker_9" | |
else: | |
history_prompt = "v2/hi_speaker_9" | |
elif language_prompt =="chinese": | |
if speaker_prompt=="speaker 1": | |
history_prompt = "v2/zh_speaker_0" | |
elif speaker_prompt=="speaker 2": | |
history_prompt = "v2/zh_speaker_1" | |
elif speaker_prompt=="speaker 3": | |
history_prompt = "v2/zh_speaker_2" | |
elif speaker_prompt=="speaker 4": | |
history_prompt = "v2/zh_speaker_3" | |
elif speaker_prompt=="speaker 5": | |
history_prompt = "v2/zh_speaker_4" | |
elif speaker_prompt=="speaker 6": | |
history_prompt = "v2/zh_speaker_5" | |
elif speaker_prompt=="speaker 7": | |
history_prompt = "v2/zh_speaker_6" | |
elif speaker_prompt=="speaker 8": | |
history_prompt = "v2/zh_speaker_7" | |
elif speaker_prompt=="speaker 9": | |
history_prompt = "v2/zh_speaker_8" | |
elif speaker_prompt=="speaker 10": | |
history_prompt = "v2/zh_speaker_9" | |
else: | |
history_prompt = "v2/zh_speaker_9" | |
elif language_prompt =="italian": | |
if speaker_prompt=="speaker 1": | |
history_prompt = "v2/it_speaker_0" | |
elif speaker_prompt=="speaker 2": | |
history_prompt = "v2/it_speaker_1" | |
elif speaker_prompt=="speaker 3": | |
history_prompt = "v2/it_speaker_2" | |
elif speaker_prompt=="speaker 4": | |
history_prompt = "v2/it_speaker_3" | |
elif speaker_prompt=="speaker 5": | |
history_prompt = "v2/it_speaker_4" | |
elif speaker_prompt=="speaker 6": | |
history_prompt = "v2/it_speaker_5" | |
elif speaker_prompt=="speaker 7": | |
history_prompt = "v2/it_speaker_6" | |
elif speaker_prompt=="speaker 8": | |
history_prompt = "v2/it_speaker_7" | |
elif speaker_prompt=="speaker 9": | |
history_prompt = "v2/it_speaker_8" | |
elif speaker_prompt=="speaker 10": | |
history_prompt = "v2/it_speaker_9" | |
else: | |
history_prompt = "v2/it_speaker_9" | |
elif language_prompt =="japanese": | |
if speaker_prompt=="speaker 1": | |
history_prompt = "v2/ja_speaker_0" | |
elif speaker_prompt=="speaker 2": | |
history_prompt = "v2/ja_speaker_1" | |
elif speaker_prompt=="speaker 3": | |
history_prompt = "v2/ja_speaker_2" | |
elif speaker_prompt=="speaker 4": | |
history_prompt = "v2/ja_speaker_3" | |
elif speaker_prompt=="speaker 5": | |
history_prompt = "v2/ja_speaker_4" | |
elif speaker_prompt=="speaker 6": | |
history_prompt = "v2/ja_speaker_5" | |
elif speaker_prompt=="speaker 7": | |
history_prompt = "v2/ja_speaker_6" | |
elif speaker_prompt=="speaker 8": | |
history_prompt = "v2/ja_speaker_7" | |
elif speaker_prompt=="speaker 9": | |
history_prompt = "v2/ja_speaker_8" | |
elif speaker_prompt=="speaker 10": | |
history_prompt = "v2/ja_speaker_9" | |
else: | |
history_prompt = "v2/ja_speaker_9" | |
else: | |
raise ValueError("Invalid language or gender selection") | |
sentences = nltk.sent_tokenize(text) | |
silence = np.zeros(int(0.25 * SAMPLE_RATE)) # quarter second of silence | |
pieces = [] | |
for sentence in sentences: | |
audio_array = generate_audio(sentence, history_prompt=history_prompt) | |
pieces += [audio_array] | |
# Concatenate the audio pieces | |
final_audio = np.concatenate(pieces) | |
# Save the audio to a WAV file | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav: | |
wavfile.write(temp_wav.name, SAMPLE_RATE, final_audio) | |
# Return the saved audio file | |
return temp_wav.name | |
# Define lists of language and gender options | |
language_options = [ | |
"english", | |
"french", | |
"german", | |
"hindi", | |
"chinese", | |
"italian", | |
"japanese", | |
] | |
speaker_options=[ | |
"speaker 1", | |
"speaker 2", | |
"speaker 3", | |
"speaker 4", | |
"speaker 5", | |
"speaker 6", | |
"speaker 7", | |
"speaker 8", | |
"speaker 9", | |
"speaker 10", | |
] | |
# Create a Gradio interface with text input and dropdown menus for language and gender | |
iface = gr.Interface( | |
fn=generate_audio_from_text, | |
inputs=[ | |
gr.Textbox(text="Enter text to convert to speech:"), | |
gr.Dropdown(choices=language_options, label="Select language:"), | |
gr.Dropdown(choices=speaker_options, label="Select speaker:"), | |
], | |
outputs=gr.outputs.File(label="Download WAV File"), | |
title="Text-to-Speech App Vertical Solution", | |
timeout=300, | |
) | |
# Launch the Gradio app with sharing enabled | |
iface.launch(debug=True, enable_queue=True) | |