Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -6,420 +6,279 @@ import subprocess
|
|
6 |
import logging
|
7 |
from pathlib import Path
|
8 |
import torch
|
9 |
-
import torchaudio
|
10 |
import gradio as gr
|
11 |
-
from TTS.api import TTS
|
12 |
from TTS.tts.configs.xtts_config import XttsConfig
|
13 |
from TTS.tts.models.xtts import Xtts
|
14 |
from TTS.utils.generic_utils import get_user_data_dir
|
15 |
from huggingface_hub import hf_hub_download
|
16 |
import scipy.io.wavfile as wavfile
|
|
|
17 |
|
18 |
-
#
|
19 |
-
|
20 |
-
logger = logging.getLogger(__name__)
|
21 |
|
22 |
# Configuración inicial
|
23 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
|
|
24 |
|
25 |
-
# Configurar
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
# Suprimir warnings de DeepSpeed si no es necesario
|
30 |
-
import warnings
|
31 |
-
warnings.filterwarnings("ignore", category=FutureWarning, module="deepspeed")
|
32 |
-
warnings.filterwarnings("ignore", category=UserWarning, module="transformers")
|
33 |
|
34 |
class PedroTTSApp:
|
35 |
def __init__(self):
|
36 |
self.model = None
|
37 |
self.config = None
|
38 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
39 |
-
logger.info(f"
|
40 |
|
41 |
-
def check_and_install(self, package):
|
42 |
-
"""Verifica e instala paquetes faltantes"""
|
43 |
-
try:
|
44 |
-
__import__(package)
|
45 |
-
except ImportError:
|
46 |
-
logger.info(f"{package} no está instalado. Instalando...")
|
47 |
-
subprocess.check_call([sys.executable, "-m", "pip", "install", package])
|
48 |
-
|
49 |
def setup_model(self):
|
50 |
"""Descarga y configura el modelo"""
|
51 |
try:
|
52 |
-
logger.info("
|
|
|
|
|
53 |
repo_id = "Blakus/Pedro_Lab_XTTS"
|
54 |
local_dir = Path(get_user_data_dir("tts")) / "tts_models--multilingual--multi-dataset--xtts_v2"
|
55 |
local_dir.mkdir(parents=True, exist_ok=True)
|
56 |
|
|
|
57 |
files_to_download = ["config.json", "model.pth", "vocab.json"]
|
58 |
|
59 |
for file_name in files_to_download:
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
|
|
66 |
|
67 |
-
#
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
|
72 |
self.config = XttsConfig()
|
73 |
-
self.config.load_json(
|
74 |
|
75 |
self.model = Xtts.init_from_config(self.config)
|
76 |
|
77 |
-
#
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
vocab_path=str(vocab_path),
|
86 |
-
eval=True,
|
87 |
-
use_deepspeed=True
|
88 |
-
)
|
89 |
-
use_deepspeed = True
|
90 |
-
except Exception as deepspeed_error:
|
91 |
-
logger.warning(f"DeepSpeed falló, cargando sin DeepSpeed: {deepspeed_error}")
|
92 |
-
self.model.load_checkpoint(
|
93 |
-
self.config,
|
94 |
-
checkpoint_path=str(checkpoint_path),
|
95 |
-
vocab_path=str(vocab_path),
|
96 |
-
eval=True,
|
97 |
-
use_deepspeed=False
|
98 |
-
)
|
99 |
-
else:
|
100 |
-
self.model.load_checkpoint(
|
101 |
-
self.config,
|
102 |
-
checkpoint_path=str(checkpoint_path),
|
103 |
-
vocab_path=str(vocab_path),
|
104 |
-
eval=True,
|
105 |
-
use_deepspeed=False
|
106 |
-
)
|
107 |
|
108 |
if self.device == "cuda" and torch.cuda.is_available():
|
109 |
self.model.cuda()
|
110 |
-
logger.info(
|
111 |
else:
|
|
|
112 |
logger.info("Modelo cargado en CPU")
|
113 |
|
|
|
|
|
114 |
except Exception as e:
|
115 |
-
logger.error(f"Error
|
116 |
raise
|
117 |
|
118 |
-
def
|
119 |
-
"""
|
120 |
-
if not text or len(text.strip()) < 2:
|
121 |
-
return False, "El texto debe tener al menos 2 caracteres."
|
122 |
-
|
123 |
-
if len(text) > 600:
|
124 |
-
return False, "El texto no puede exceder los 600 caracteres."
|
125 |
-
|
126 |
-
if not audio_file:
|
127 |
-
return False, "Debe seleccionar un audio de referencia."
|
128 |
-
|
129 |
-
# Verificar que el archivo de audio existe
|
130 |
-
if not os.path.exists(audio_file):
|
131 |
-
return False, f"El archivo de audio de referencia no existe: {audio_file}"
|
132 |
-
|
133 |
-
return True, ""
|
134 |
-
|
135 |
-
def predict(self, prompt, language, reference_audio, speed):
|
136 |
-
"""Genera la síntesis de voz"""
|
137 |
try:
|
138 |
-
#
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
if not self.model:
|
148 |
-
return None, "Modelo no cargado correctamente."
|
149 |
-
|
150 |
-
# Parámetros optimizados para mejor calidad
|
151 |
-
inference_params = {
|
152 |
-
"temperature": 0.65,
|
153 |
-
"length_penalty": 1.2,
|
154 |
-
"repetition_penalty": 2.2,
|
155 |
-
"top_k": 40,
|
156 |
-
"top_p": 0.75,
|
157 |
-
"enable_text_splitting": True,
|
158 |
-
"speed": max(0.5, min(2.0, speed)) # Asegurar rango válido
|
159 |
-
}
|
160 |
-
|
161 |
-
logger.info(f"Generando audio para: '{prompt[:50]}...' en idioma: {language}")
|
162 |
|
163 |
-
|
|
|
|
|
|
|
|
|
164 |
gpt_cond_latent, speaker_embedding = self.model.get_conditioning_latents(
|
165 |
audio_path=reference_audio
|
166 |
)
|
167 |
|
168 |
start_time = time.time()
|
169 |
-
|
170 |
# Generar audio
|
171 |
out = self.model.inference(
|
172 |
-
|
173 |
language,
|
174 |
gpt_cond_latent,
|
175 |
speaker_embedding,
|
176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
)
|
178 |
|
179 |
inference_time = time.time() - start_time
|
180 |
|
181 |
-
#
|
182 |
timestamp = int(time.time())
|
183 |
-
output_path = f"
|
184 |
|
185 |
-
# Guardar audio con verificación
|
186 |
sample_rate = self.config.audio.get("output_sample_rate", 22050)
|
187 |
wavfile.write(output_path, sample_rate, out["wav"])
|
188 |
|
189 |
-
#
|
190 |
-
if not os.path.exists(output_path):
|
191 |
-
raise Exception("Error al guardar el archivo de audio")
|
192 |
-
|
193 |
-
# Calcular métricas
|
194 |
audio_length = len(out["wav"]) / sample_rate
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
|
203 |
-
|
204 |
-
return output_path, metrics_text
|
205 |
|
206 |
except Exception as e:
|
207 |
-
error_msg = f"Error
|
208 |
logger.error(error_msg)
|
209 |
return None, error_msg
|
210 |
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
# Inicializar la aplicación
|
215 |
-
app = PedroTTSApp()
|
216 |
-
|
217 |
-
try:
|
218 |
-
app.setup_model()
|
219 |
-
except Exception as e:
|
220 |
-
logger.error(f"Error al inicializar la aplicación: {e}")
|
221 |
-
raise
|
222 |
|
223 |
-
|
224 |
-
|
225 |
-
("Español", "es"),
|
226 |
-
("English", "en")
|
227 |
-
]
|
228 |
|
229 |
-
|
230 |
-
|
231 |
-
|
|
|
|
|
232 |
("Alegre", "alegre.wav"),
|
233 |
("Neutral Inglés", "neutral_ingles.wav")
|
234 |
]
|
235 |
|
236 |
-
# Tema personalizado
|
237 |
-
theme = gr.themes.Soft(
|
238 |
-
primary_hue="blue",
|
239 |
-
secondary_hue="gray",
|
240 |
-
).set(
|
241 |
-
body_background_fill='*neutral_50',
|
242 |
-
body_background_fill_dark='*neutral_900',
|
243 |
-
)
|
244 |
-
|
245 |
-
description = """
|
246 |
-
# 🎙️ Sintetizador de Voz - Pedro Labattaglia
|
247 |
-
|
248 |
-
Sintetizador de voz de alta calidad con la voz del reconocido locutor argentino Pedro Labattaglia.
|
249 |
-
|
250 |
-
## 📖 Instrucciones de uso:
|
251 |
-
1. **Seleccione el idioma** (Español o English)
|
252 |
-
2. **Elija un audio de referencia** que determine el tono y estilo
|
253 |
-
3. **Ajuste la velocidad** del habla según su preferencia
|
254 |
-
4. **Escriba el texto** que desea sintetizar (2-600 caracteres)
|
255 |
-
5. **Presione "Generar Voz"** y espere el resultado
|
256 |
-
|
257 |
-
> ⚡ El proceso puede tomar unos segundos dependiendo de la longitud del texto.
|
258 |
-
"""
|
259 |
-
|
260 |
# Crear interfaz
|
261 |
-
with gr.Blocks(
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
container=True,
|
272 |
-
height=300,
|
273 |
-
width=300
|
274 |
-
)
|
275 |
-
|
276 |
-
# Controles principales
|
277 |
with gr.Row():
|
278 |
with gr.Column(scale=2):
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
)
|
311 |
-
|
312 |
-
with gr.Row():
|
313 |
-
char_count = gr.Textbox(
|
314 |
-
label="Contador de caracteres",
|
315 |
-
value="0/600",
|
316 |
-
interactive=False,
|
317 |
-
scale=1
|
318 |
-
)
|
319 |
-
generate_button = gr.Button(
|
320 |
-
"🎙️ Generar Voz",
|
321 |
-
variant="primary",
|
322 |
-
scale=2,
|
323 |
-
size="lg"
|
324 |
-
)
|
325 |
-
|
326 |
with gr.Column(scale=1):
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
interactive=False
|
339 |
-
)
|
340 |
-
|
341 |
-
# Actualizar contador de caracteres
|
342 |
-
def update_char_count(text):
|
343 |
-
count = len(text) if text else 0
|
344 |
-
return f"{count}/600"
|
345 |
-
|
346 |
-
input_text.change(
|
347 |
-
update_char_count,
|
348 |
-
inputs=[input_text],
|
349 |
-
outputs=[char_count]
|
350 |
-
)
|
351 |
|
352 |
-
#
|
353 |
-
|
354 |
-
app.
|
355 |
-
inputs=[
|
356 |
-
outputs=[
|
357 |
)
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
- **Longitud máxima:** 600 caracteres por generación
|
366 |
-
- **Tiempo de procesamiento:** Variable según la longitud del texto
|
367 |
-
""")
|
368 |
|
369 |
return demo
|
370 |
|
371 |
def main():
|
372 |
-
"""Función principal"""
|
373 |
try:
|
374 |
-
|
375 |
-
os.environ.setdefault("GRADIO_SERVER_NAME", "0.0.0.0")
|
376 |
-
os.environ.setdefault("GRADIO_SERVER_PORT", "7860")
|
377 |
|
378 |
-
# Configurar
|
379 |
-
|
380 |
-
os.environ["OMP_NUM_THREADS"] = "1"
|
381 |
-
|
382 |
-
demo = create_gradio_interface()
|
383 |
|
384 |
-
#
|
385 |
-
|
386 |
|
387 |
-
# Configuración de lanzamiento
|
388 |
-
|
|
|
|
|
|
|
389 |
"show_error": True,
|
390 |
"quiet": False,
|
391 |
-
"
|
392 |
}
|
393 |
|
394 |
-
|
395 |
-
|
396 |
-
launch_kwargs.update({
|
397 |
-
"server_name": "0.0.0.0",
|
398 |
-
"server_port": 7860,
|
399 |
-
"share": False, # No necesario en Spaces
|
400 |
-
})
|
401 |
-
else:
|
402 |
-
# Configuración para desarrollo local
|
403 |
-
launch_kwargs.update({
|
404 |
-
"server_name": "127.0.0.1",
|
405 |
-
"server_port": 7860,
|
406 |
-
"share": True, # Crear link compartible para desarrollo local
|
407 |
-
"auth": [("Pedro Labattaglia", "PL2024"), ("Invitado", "PLTTS2024")]
|
408 |
-
})
|
409 |
-
|
410 |
-
logger.info(f"Lanzando aplicación {'en Hugging Face Spaces' if is_spaces else 'localmente'}...")
|
411 |
-
demo.launch(**launch_kwargs)
|
412 |
|
413 |
except Exception as e:
|
414 |
-
logger.error(f"Error
|
415 |
-
#
|
416 |
try:
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
|
|
|
|
|
|
423 |
|
424 |
if __name__ == "__main__":
|
425 |
main()
|
|
|
6 |
import logging
|
7 |
from pathlib import Path
|
8 |
import torch
|
|
|
9 |
import gradio as gr
|
|
|
10 |
from TTS.tts.configs.xtts_config import XttsConfig
|
11 |
from TTS.tts.models.xtts import Xtts
|
12 |
from TTS.utils.generic_utils import get_user_data_dir
|
13 |
from huggingface_hub import hf_hub_download
|
14 |
import scipy.io.wavfile as wavfile
|
15 |
+
import warnings
|
16 |
|
17 |
+
# Suprimir warnings
|
18 |
+
warnings.filterwarnings("ignore")
|
|
|
19 |
|
20 |
# Configuración inicial
|
21 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
22 |
+
os.environ["OMP_NUM_THREADS"] = "1"
|
23 |
|
24 |
+
# Configurar logging
|
25 |
+
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
|
26 |
+
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
class PedroTTSApp:
|
29 |
def __init__(self):
|
30 |
self.model = None
|
31 |
self.config = None
|
32 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
33 |
+
logger.info(f"Inicializando en dispositivo: {self.device}")
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
def setup_model(self):
|
36 |
"""Descarga y configura el modelo"""
|
37 |
try:
|
38 |
+
logger.info("Configurando modelo XTTS...")
|
39 |
+
|
40 |
+
# Configuración del repositorio
|
41 |
repo_id = "Blakus/Pedro_Lab_XTTS"
|
42 |
local_dir = Path(get_user_data_dir("tts")) / "tts_models--multilingual--multi-dataset--xtts_v2"
|
43 |
local_dir.mkdir(parents=True, exist_ok=True)
|
44 |
|
45 |
+
# Descargar archivos necesarios
|
46 |
files_to_download = ["config.json", "model.pth", "vocab.json"]
|
47 |
|
48 |
for file_name in files_to_download:
|
49 |
+
file_path = local_dir / file_name
|
50 |
+
if not file_path.exists():
|
51 |
+
logger.info(f"Descargando {file_name}...")
|
52 |
+
hf_hub_download(
|
53 |
+
repo_id=repo_id,
|
54 |
+
filename=file_name,
|
55 |
+
local_dir=str(local_dir)
|
56 |
+
)
|
57 |
+
else:
|
58 |
+
logger.info(f"{file_name} ya existe")
|
59 |
|
60 |
+
# Configurar modelo
|
61 |
+
config_path = str(local_dir / "config.json")
|
62 |
+
checkpoint_path = str(local_dir / "model.pth")
|
63 |
+
vocab_path = str(local_dir / "vocab.json")
|
64 |
|
65 |
self.config = XttsConfig()
|
66 |
+
self.config.load_json(config_path)
|
67 |
|
68 |
self.model = Xtts.init_from_config(self.config)
|
69 |
|
70 |
+
# Cargar sin DeepSpeed para mayor compatibilidad
|
71 |
+
self.model.load_checkpoint(
|
72 |
+
self.config,
|
73 |
+
checkpoint_path=checkpoint_path,
|
74 |
+
vocab_path=vocab_path,
|
75 |
+
eval=True,
|
76 |
+
use_deepspeed=False
|
77 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
if self.device == "cuda" and torch.cuda.is_available():
|
80 |
self.model.cuda()
|
81 |
+
logger.info("Modelo cargado en GPU")
|
82 |
else:
|
83 |
+
self.model.cpu()
|
84 |
logger.info("Modelo cargado en CPU")
|
85 |
|
86 |
+
logger.info("Modelo configurado exitosamente")
|
87 |
+
|
88 |
except Exception as e:
|
89 |
+
logger.error(f"Error configurando modelo: {e}")
|
90 |
raise
|
91 |
|
92 |
+
def generate_speech(self, text, language, reference_audio, speed):
|
93 |
+
"""Genera el audio de voz"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
try:
|
95 |
+
# Validaciones básicas
|
96 |
+
if not text or len(text.strip()) < 2:
|
97 |
+
return None, "❌ El texto debe tener al menos 2 caracteres"
|
98 |
+
|
99 |
+
if len(text) > 600:
|
100 |
+
return None, "❌ El texto no puede exceder 600 caracteres"
|
101 |
+
|
102 |
+
if not reference_audio:
|
103 |
+
return None, "❌ Seleccione un audio de referencia"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
+
text = text.strip()
|
106 |
+
|
107 |
+
logger.info(f"Generando audio para: '{text[:50]}...'")
|
108 |
+
|
109 |
+
# Obtener embeddings
|
110 |
gpt_cond_latent, speaker_embedding = self.model.get_conditioning_latents(
|
111 |
audio_path=reference_audio
|
112 |
)
|
113 |
|
114 |
start_time = time.time()
|
115 |
+
|
116 |
# Generar audio
|
117 |
out = self.model.inference(
|
118 |
+
text,
|
119 |
language,
|
120 |
gpt_cond_latent,
|
121 |
speaker_embedding,
|
122 |
+
temperature=0.7,
|
123 |
+
length_penalty=1.0,
|
124 |
+
repetition_penalty=2.0,
|
125 |
+
top_k=50,
|
126 |
+
top_p=0.8,
|
127 |
+
speed=speed,
|
128 |
+
enable_text_splitting=True
|
129 |
)
|
130 |
|
131 |
inference_time = time.time() - start_time
|
132 |
|
133 |
+
# Guardar audio
|
134 |
timestamp = int(time.time())
|
135 |
+
output_path = f"output_{timestamp}.wav"
|
136 |
|
|
|
137 |
sample_rate = self.config.audio.get("output_sample_rate", 22050)
|
138 |
wavfile.write(output_path, sample_rate, out["wav"])
|
139 |
|
140 |
+
# Métricas
|
|
|
|
|
|
|
|
|
141 |
audio_length = len(out["wav"]) / sample_rate
|
142 |
+
rtf = inference_time / audio_length if audio_length > 0 else 0
|
143 |
+
|
144 |
+
metrics = f"""✅ Generación completada
|
145 |
+
🕐 Tiempo: {inference_time:.2f}s
|
146 |
+
📏 Duración: {audio_length:.2f}s
|
147 |
+
⚡ Factor RT: {rtf:.2f}x
|
148 |
+
🎵 Sample Rate: {sample_rate}Hz"""
|
149 |
|
150 |
+
return output_path, metrics
|
|
|
151 |
|
152 |
except Exception as e:
|
153 |
+
error_msg = f"❌ Error: {str(e)}"
|
154 |
logger.error(error_msg)
|
155 |
return None, error_msg
|
156 |
|
157 |
+
# Inicializar aplicación global
|
158 |
+
app = PedroTTSApp()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
|
160 |
+
def create_interface():
|
161 |
+
"""Crear interfaz Gradio simplificada"""
|
|
|
|
|
|
|
162 |
|
163 |
+
# Configurar opciones
|
164 |
+
languages = [("Español", "es"), ("English", "en")]
|
165 |
+
audio_refs = [
|
166 |
+
("Neutral", "neutral.wav"),
|
167 |
+
("Serio", "serio.wav"),
|
168 |
("Alegre", "alegre.wav"),
|
169 |
("Neutral Inglés", "neutral_ingles.wav")
|
170 |
]
|
171 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
# Crear interfaz
|
173 |
+
with gr.Blocks(
|
174 |
+
title="Pedro Labattaglia TTS",
|
175 |
+
theme=gr.themes.Soft()
|
176 |
+
) as demo:
|
177 |
+
|
178 |
+
gr.Markdown("""
|
179 |
+
# 🎙️ Pedro Labattaglia - Síntesis de Voz
|
180 |
+
Generador de voz con IA usando la voz del locutor Pedro Labattaglia
|
181 |
+
""")
|
182 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
with gr.Row():
|
184 |
with gr.Column(scale=2):
|
185 |
+
# Controles
|
186 |
+
language = gr.Dropdown(
|
187 |
+
choices=languages,
|
188 |
+
value="es",
|
189 |
+
label="🌐 Idioma"
|
190 |
+
)
|
191 |
+
|
192 |
+
reference = gr.Dropdown(
|
193 |
+
choices=audio_refs,
|
194 |
+
value="neutral.wav",
|
195 |
+
label="🎵 Estilo de voz"
|
196 |
+
)
|
197 |
+
|
198 |
+
speed = gr.Slider(
|
199 |
+
0.5, 2.0, 1.0, 0.1,
|
200 |
+
label="⚡ Velocidad"
|
201 |
+
)
|
202 |
+
|
203 |
+
text_input = gr.Textbox(
|
204 |
+
label="📝 Texto a sintetizar",
|
205 |
+
placeholder="Escriba el texto aquí...",
|
206 |
+
lines=4,
|
207 |
+
max_lines=6
|
208 |
+
)
|
209 |
+
|
210 |
+
generate_btn = gr.Button(
|
211 |
+
"🎙️ Generar Voz",
|
212 |
+
variant="primary",
|
213 |
+
size="lg"
|
214 |
+
)
|
215 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
with gr.Column(scale=1):
|
217 |
+
# Resultados
|
218 |
+
audio_output = gr.Audio(
|
219 |
+
label="🎧 Audio Generado",
|
220 |
+
show_download_button=True
|
221 |
+
)
|
222 |
+
|
223 |
+
metrics_output = gr.Textbox(
|
224 |
+
label="📊 Estado",
|
225 |
+
value="Listo para generar audio...",
|
226 |
+
lines=6
|
227 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
|
229 |
+
# Conectar función
|
230 |
+
generate_btn.click(
|
231 |
+
fn=app.generate_speech,
|
232 |
+
inputs=[text_input, language, reference, speed],
|
233 |
+
outputs=[audio_output, metrics_output]
|
234 |
)
|
235 |
+
|
236 |
+
gr.Markdown("""
|
237 |
+
### ℹ️ Información
|
238 |
+
- **Longitud**: 2-600 caracteres
|
239 |
+
- **Idiomas**: Español e Inglés
|
240 |
+
- **Calidad**: 22kHz
|
241 |
+
""")
|
|
|
|
|
|
|
242 |
|
243 |
return demo
|
244 |
|
245 |
def main():
|
246 |
+
"""Función principal simplificada"""
|
247 |
try:
|
248 |
+
logger.info("Iniciando aplicación...")
|
|
|
|
|
249 |
|
250 |
+
# Configurar modelo
|
251 |
+
app.setup_model()
|
|
|
|
|
|
|
252 |
|
253 |
+
# Crear interfaz
|
254 |
+
demo = create_interface()
|
255 |
|
256 |
+
# Configuración de lanzamiento para Spaces
|
257 |
+
launch_config = {
|
258 |
+
"server_name": "0.0.0.0",
|
259 |
+
"server_port": 7860,
|
260 |
+
"share": True, # Siempre activar para Spaces
|
261 |
"show_error": True,
|
262 |
"quiet": False,
|
263 |
+
"inbrowser": False
|
264 |
}
|
265 |
|
266 |
+
logger.info("Lanzando interfaz...")
|
267 |
+
demo.launch(**launch_config)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
|
269 |
except Exception as e:
|
270 |
+
logger.error(f"Error crítico: {e}")
|
271 |
+
# Intentar lanzamiento mínimo
|
272 |
try:
|
273 |
+
demo = gr.Interface(
|
274 |
+
fn=lambda: "Error en la configuración",
|
275 |
+
inputs=gr.Textbox("Error"),
|
276 |
+
outputs=gr.Textbox("Error"),
|
277 |
+
title="Error de Configuración"
|
278 |
+
)
|
279 |
+
demo.launch(share=True)
|
280 |
+
except:
|
281 |
+
pass
|
282 |
|
283 |
if __name__ == "__main__":
|
284 |
main()
|