Spaces:
Running
Running
xJuuzouYTx
commited on
Commit
·
6f1ebe2
1
Parent(s):
1b5ebf2
[ADD] elevenlabs tts
Browse files- app.py +20 -12
- requirements.txt +1 -1
- tts/constants.py +1 -1
- tts/conversion.py +26 -7
app.py
CHANGED
@@ -7,7 +7,7 @@ from utils.model import model_downloader, get_model
|
|
7 |
import requests
|
8 |
import json
|
9 |
from tts.constants import VOICE_METHODS, BARK_VOICES, EDGE_VOICES
|
10 |
-
from tts.conversion import tts_infer
|
11 |
|
12 |
api_url = "https://rvc-models-api.onrender.com/uploadfile/"
|
13 |
|
@@ -130,9 +130,11 @@ def search_model(name):
|
|
130 |
|
131 |
def update_tts_methods_voice(select_value):
|
132 |
if select_value == "Edge-tts":
|
133 |
-
return gr.update(choices=EDGE_VOICES)
|
134 |
elif select_value == "Bark-tts":
|
135 |
-
return gr.update(choices=BARK_VOICES)
|
|
|
|
|
136 |
|
137 |
with gr.Blocks() as app:
|
138 |
gr.HTML("<h1> Simple RVC Inference - by Juuxn 💻 </h1>")
|
@@ -163,19 +165,26 @@ with gr.Blocks() as app:
|
|
163 |
with gr.Row():
|
164 |
tts_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Url del modelo RVC", show_label=True)
|
165 |
|
166 |
-
with gr.
|
167 |
-
tts_method = gr.Dropdown(choices=VOICE_METHODS, value="
|
168 |
-
tts_model = gr.Dropdown(choices=
|
169 |
-
|
170 |
-
|
|
|
|
|
171 |
with gr.Row():
|
172 |
tts_vc_output1 = gr.Textbox(label="Salida")
|
173 |
tts_vc_output2 = gr.Audio(label="Audio de salida")
|
174 |
|
175 |
-
tts_btn =
|
176 |
-
tts_btn.click(fn=tts_infer, inputs=[tts_text, tts_model_url, tts_method, tts_model], outputs=[tts_vc_output1, tts_vc_output2])
|
177 |
|
178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
gr.HTML("<h4>Buscar modelos</h4>")
|
180 |
search_name = gr.Textbox(placeholder="Billie Eillish (RVC v2 - 100 epoch)", label="Nombre", show_label=True)
|
181 |
# Salida
|
@@ -198,7 +207,6 @@ with gr.Blocks() as app:
|
|
198 |
btn_post_model = gr.Button(value="Publicar")
|
199 |
btn_post_model.click(fn=post_model, inputs=[post_name, post_model_url, post_version, post_creator], outputs=[post_output])
|
200 |
|
201 |
-
|
202 |
# with gr.Column():
|
203 |
# model_voice_path07 = gr.Dropdown(
|
204 |
# label=i18n("RVC Model:"),
|
|
|
7 |
import requests
|
8 |
import json
|
9 |
from tts.constants import VOICE_METHODS, BARK_VOICES, EDGE_VOICES
|
10 |
+
from tts.conversion import tts_infer, ELEVENLABS_VOICES_RAW, ELEVENLABS_VOICES_NAMES
|
11 |
|
12 |
api_url = "https://rvc-models-api.onrender.com/uploadfile/"
|
13 |
|
|
|
130 |
|
131 |
def update_tts_methods_voice(select_value):
|
132 |
if select_value == "Edge-tts":
|
133 |
+
return gr.update(choices=EDGE_VOICES), gr.Markdown.update(visible=False), gr.Textbox.update(visible=False)
|
134 |
elif select_value == "Bark-tts":
|
135 |
+
return gr.update(choices=BARK_VOICES), gr.Markdown.update(visible=False), gr.Textbox.update(visible=False)
|
136 |
+
elif select_value == 'ElevenLabs':
|
137 |
+
return gr.update(choices=ELEVENLABS_VOICES_NAMES), gr.Markdown.update(visible=True), gr.Textbox.update(visible=True)
|
138 |
|
139 |
with gr.Blocks() as app:
|
140 |
gr.HTML("<h1> Simple RVC Inference - by Juuxn 💻 </h1>")
|
|
|
165 |
with gr.Row():
|
166 |
tts_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Url del modelo RVC", show_label=True)
|
167 |
|
168 |
+
with gr.Row():
|
169 |
+
tts_method = gr.Dropdown(choices=VOICE_METHODS, value="ElevenLabs", label="Método TTS:", visible=True)
|
170 |
+
tts_model = gr.Dropdown(choices=ELEVENLABS_VOICES_NAMES, label="Modelo TTS:", visible=True, interactive=True)
|
171 |
+
tts_api_key = gr.Textbox(label="ElevenLabs Api key", show_label=True, placeholder="4a4afce72349680c8e8b6fdcfaf2b65a",interactive=True)
|
172 |
+
|
173 |
+
tts_btn = gr.Button(value="Convertir")
|
174 |
+
|
175 |
with gr.Row():
|
176 |
tts_vc_output1 = gr.Textbox(label="Salida")
|
177 |
tts_vc_output2 = gr.Audio(label="Audio de salida")
|
178 |
|
179 |
+
tts_btn.click(fn=tts_infer, inputs=[tts_text, tts_model_url, tts_method, tts_model, tts_api_key], outputs=[tts_vc_output1, tts_vc_output2])
|
|
|
180 |
|
181 |
+
tts_msg = gr.Markdown("""**Recomiendo que te crees una cuenta de eleven labs y pongas tu clave de api, es gratis y tienes 10k caracteres de limite al mes.** <br/>
|
182 |
+
![Imgur](https://imgur.com/HH6YTu0.png)
|
183 |
+
""", visible=True)
|
184 |
+
|
185 |
+
tts_method.change(fn=update_tts_methods_voice, inputs=[tts_method], outputs=[tts_model, tts_msg, tts_api_key])
|
186 |
+
|
187 |
+
with gr.Tab("Modelos"):
|
188 |
gr.HTML("<h4>Buscar modelos</h4>")
|
189 |
search_name = gr.Textbox(placeholder="Billie Eillish (RVC v2 - 100 epoch)", label="Nombre", show_label=True)
|
190 |
# Salida
|
|
|
207 |
btn_post_model = gr.Button(value="Publicar")
|
208 |
btn_post_model.click(fn=post_model, inputs=[post_name, post_model_url, post_version, post_creator], outputs=[post_output])
|
209 |
|
|
|
210 |
# with gr.Column():
|
211 |
# model_voice_path07 = gr.Dropdown(
|
212 |
# label=i18n("RVC Model:"),
|
requirements.txt
CHANGED
@@ -27,7 +27,7 @@ Cython==0.29.30
|
|
27 |
decorator==5.1.1
|
28 |
discord.py==2.3.2
|
29 |
edge-tts==6.1.5
|
30 |
-
elevenlabs
|
31 |
entrypoints==0.4
|
32 |
exceptiongroup==1.1.3
|
33 |
executing==1.2.0
|
|
|
27 |
decorator==5.1.1
|
28 |
discord.py==2.3.2
|
29 |
edge-tts==6.1.5
|
30 |
+
elevenlabs
|
31 |
entrypoints==0.4
|
32 |
exceptiongroup==1.1.3
|
33 |
executing==1.2.0
|
tts/constants.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
VOICE_METHODS = ["Edge-tts", "
|
2 |
|
3 |
BARK_VOICES = [
|
4 |
"v2/en_speaker_0-Male",
|
|
|
1 |
+
VOICE_METHODS = ["Edge-tts", "ElevenLabs",]
|
2 |
|
3 |
BARK_VOICES = [
|
4 |
"v2/en_speaker_0-Male",
|
tts/conversion.py
CHANGED
@@ -7,6 +7,18 @@ from gtts import gTTS
|
|
7 |
import edge_tts
|
8 |
from inference import Inference
|
9 |
import asyncio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
#git+https://github.com/suno-ai/bark.git
|
11 |
# from transformers import AutoProcessor, BarkModel
|
12 |
# import nltk
|
@@ -50,16 +62,11 @@ def cast_to_device(tensor, device):
|
|
50 |
# return speech, sampling_rate
|
51 |
|
52 |
|
53 |
-
def tts_infer(tts_text, model_url, tts_method, tts_model):
|
54 |
-
print("*****************")
|
55 |
-
print(tts_text)
|
56 |
-
print(model_url)
|
57 |
if not tts_text:
|
58 |
return 'Primero escribe el texto que quieres convertir.', None
|
59 |
if not tts_model:
|
60 |
return 'Selecciona un modelo TTS antes de convertir.', None
|
61 |
-
if not model_url:
|
62 |
-
return 'Escribe la url de modelo que quieres usar antes de convertir.', None
|
63 |
|
64 |
f0_method = "harvest"
|
65 |
output_folder = "audios"
|
@@ -94,7 +101,19 @@ def tts_infer(tts_text, model_url, tts_method, tts_model):
|
|
94 |
tts.save(converted_tts_filename)
|
95 |
print("Error: Audio will be replaced.")
|
96 |
success = False
|
97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
# elif tts_method == "Bark-tts":
|
99 |
# try:
|
100 |
# script = tts_text.replace("\n", " ").strip()
|
|
|
7 |
import edge_tts
|
8 |
from inference import Inference
|
9 |
import asyncio
|
10 |
+
from elevenlabs import voices, generate, save
|
11 |
+
|
12 |
+
ELEVENLABS_VOICES_RAW = voices()
|
13 |
+
|
14 |
+
def get_elevenlabs_voice_names():
|
15 |
+
elevenlabs_voice_names = []
|
16 |
+
for voice in ELEVENLABS_VOICES_RAW:
|
17 |
+
elevenlabs_voice_names.append(voice.name)
|
18 |
+
return elevenlabs_voice_names
|
19 |
+
|
20 |
+
ELEVENLABS_VOICES_NAMES = get_elevenlabs_voice_names()
|
21 |
+
|
22 |
#git+https://github.com/suno-ai/bark.git
|
23 |
# from transformers import AutoProcessor, BarkModel
|
24 |
# import nltk
|
|
|
62 |
# return speech, sampling_rate
|
63 |
|
64 |
|
65 |
+
def tts_infer(tts_text, model_url, tts_method, tts_model, tts_api_key):
|
|
|
|
|
|
|
66 |
if not tts_text:
|
67 |
return 'Primero escribe el texto que quieres convertir.', None
|
68 |
if not tts_model:
|
69 |
return 'Selecciona un modelo TTS antes de convertir.', None
|
|
|
|
|
70 |
|
71 |
f0_method = "harvest"
|
72 |
output_folder = "audios"
|
|
|
101 |
tts.save(converted_tts_filename)
|
102 |
print("Error: Audio will be replaced.")
|
103 |
success = False
|
104 |
+
if tts_method == 'ElevenLabs':
|
105 |
+
audio = generate(
|
106 |
+
text=tts_text,
|
107 |
+
voice=tts_model,
|
108 |
+
model="eleven_multilingual_v2",
|
109 |
+
api_key=tts_api_key
|
110 |
+
)
|
111 |
+
save(audio=audio, filename=converted_tts_filename)
|
112 |
+
success = True
|
113 |
+
|
114 |
+
if not model_url:
|
115 |
+
return 'Pon la url del modelo si quieres aplicarle otro tono.', converted_tts_filename
|
116 |
+
|
117 |
# elif tts_method == "Bark-tts":
|
118 |
# try:
|
119 |
# script = tts_text.replace("\n", " ").strip()
|