Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -185,9 +185,7 @@ class PedroTTSApp:
|
|
185 |
"""Cargar audios de referencia desde dataset privado"""
|
186 |
return self.load_private_reference_audios()
|
187 |
|
188 |
-
def generate_speech(self, text, language, reference_audio, speed,
|
189 |
-
temperature, length_penalty, repetition_penalty,
|
190 |
-
top_k, top_p, enable_text_splitting):
|
191 |
"""Genera el audio de voz con configuración avanzada"""
|
192 |
try:
|
193 |
if not text or len(text.strip()) < 2:
|
@@ -215,6 +213,12 @@ class PedroTTSApp:
|
|
215 |
|
216 |
start_time = time.time()
|
217 |
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
# Usar parámetros con conversión explícita a float para evitar errores de tipo
|
219 |
out = self.model.inference(
|
220 |
text,
|
@@ -468,6 +472,18 @@ def create_interface():
|
|
468 |
info="Velocidad de reproducción del audio"
|
469 |
)
|
470 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
471 |
text_input = gr.Textbox(
|
472 |
label="📝 Texto a sintetizar",
|
473 |
placeholder="Escriba el texto aquí...",
|
@@ -493,143 +509,6 @@ def create_interface():
|
|
493 |
variant="primary",
|
494 |
size="lg"
|
495 |
)
|
496 |
-
|
497 |
-
with gr.TabItem("⚙️ Configuración Avanzada"):
|
498 |
-
gr.Markdown("### 🔧 Parámetros Avanzados de Síntesis")
|
499 |
-
gr.Markdown("Ajuste estos valores para obtener diferentes características en la voz generada. ⚠️SOLO PARA USUARIOS AVANZADOS⚠️")
|
500 |
-
|
501 |
-
with gr.Row():
|
502 |
-
with gr.Column():
|
503 |
-
gr.Markdown("#### 🎚️ Parámetros de Calidad")
|
504 |
-
|
505 |
-
temperature = gr.Slider(
|
506 |
-
0.1, 1.5, 0.65,
|
507 |
-
label="🌡️ Temperatura",
|
508 |
-
info="Controla la creatividad. Valores bajos = más estable, valores altos = más expresivo"
|
509 |
-
)
|
510 |
-
|
511 |
-
length_penalty = gr.Slider(
|
512 |
-
-10.0, 10.0, 1.0,
|
513 |
-
label="📏 Length Penalty",
|
514 |
-
info="Controla la longitud de las pausas y entonación"
|
515 |
-
)
|
516 |
-
|
517 |
-
repetition_penalty = gr.Slider(
|
518 |
-
1.0, 10.0, 2.0, 0.1,
|
519 |
-
label="🔄 Repetition Penalty",
|
520 |
-
info="Evita repeticiones. Valores altos reducen repeticiones"
|
521 |
-
)
|
522 |
-
|
523 |
-
with gr.Column():
|
524 |
-
gr.Markdown("#### 🎯 Parámetros de Sampling")
|
525 |
-
|
526 |
-
top_k = gr.Slider(
|
527 |
-
1, 100, 50, 1,
|
528 |
-
label="🔝 Top-K",
|
529 |
-
info="Número de tokens más probables a considerar"
|
530 |
-
)
|
531 |
-
|
532 |
-
top_p = gr.Slider(
|
533 |
-
0.01, 1.0, 0.8, 0.01,
|
534 |
-
label="📊 Top-P (Nucleus)",
|
535 |
-
info="Probabilidad acumulada para sampling nuclear"
|
536 |
-
)
|
537 |
-
|
538 |
-
with gr.Column():
|
539 |
-
gr.Markdown("#### 🔘 Opciones de Procesamiento")
|
540 |
-
|
541 |
-
enable_text_splitting = gr.Checkbox(
|
542 |
-
value=True,
|
543 |
-
label="✂️ División por puntuación",
|
544 |
-
info="Divide texto en oraciones por puntuación para mejor entonación"
|
545 |
-
)
|
546 |
-
|
547 |
-
with gr.Row():
|
548 |
-
gr.Markdown("#### 🎛️ ¿No quieres modificar estos parámetros? No hay problema, puedes usar los presets de configuración")
|
549 |
-
|
550 |
-
with gr.Row():
|
551 |
-
conservative_btn = gr.Button(
|
552 |
-
"🛡️ Conservador",
|
553 |
-
variant="primary",
|
554 |
-
elem_classes="preset-button-selected"
|
555 |
-
)
|
556 |
-
balanced_btn = gr.Button(
|
557 |
-
"⚖️ Balanceado",
|
558 |
-
variant="secondary",
|
559 |
-
elem_classes="preset-button-unselected"
|
560 |
-
)
|
561 |
-
creative_btn = gr.Button(
|
562 |
-
"🎨 Creativo",
|
563 |
-
variant="secondary",
|
564 |
-
elem_classes="preset-button-unselected"
|
565 |
-
)
|
566 |
-
|
567 |
-
# Preset values - ensuring all values are proper floats to avoid type errors
|
568 |
-
def set_conservative():
|
569 |
-
return 0.45, 0.5, 1.8, 30.0, 0.75, True
|
570 |
-
|
571 |
-
def set_balanced():
|
572 |
-
return 0.65, 1.0, 2.0, 50.0, 0.8, True
|
573 |
-
|
574 |
-
def set_creative():
|
575 |
-
return 0.85, 1.5, 2.5, 70.0, 0.9, True
|
576 |
-
|
577 |
-
def update_preset_buttons_conservative():
|
578 |
-
return (
|
579 |
-
gr.update(variant="primary", elem_classes=["preset-button-selected"]),
|
580 |
-
gr.update(variant="secondary", elem_classes=["preset-button-unselected"]),
|
581 |
-
gr.update(variant="secondary", elem_classes=["preset-button-unselected"])
|
582 |
-
)
|
583 |
-
|
584 |
-
def update_preset_buttons_balanced():
|
585 |
-
return (
|
586 |
-
gr.update(variant="secondary", elem_classes=["preset-button-unselected"]),
|
587 |
-
gr.update(variant="primary", elem_classes=["preset-button-selected"]),
|
588 |
-
gr.update(variant="secondary", elem_classes=["preset-button-unselected"])
|
589 |
-
)
|
590 |
-
|
591 |
-
def update_preset_buttons_creative():
|
592 |
-
return (
|
593 |
-
gr.update(variant="secondary", elem_classes=["preset-button-unselected"]),
|
594 |
-
gr.update(variant="secondary", elem_classes=["preset-button-unselected"]),
|
595 |
-
gr.update(variant="primary", elem_classes=["preset-button-selected"])
|
596 |
-
)
|
597 |
-
|
598 |
-
def apply_conservative_preset():
|
599 |
-
values = set_conservative()
|
600 |
-
buttons = update_preset_buttons_conservative()
|
601 |
-
return values + buttons
|
602 |
-
|
603 |
-
def apply_balanced_preset():
|
604 |
-
values = set_balanced()
|
605 |
-
buttons = update_preset_buttons_balanced()
|
606 |
-
return values + buttons
|
607 |
-
|
608 |
-
def apply_creative_preset():
|
609 |
-
values = set_creative()
|
610 |
-
buttons = update_preset_buttons_creative()
|
611 |
-
return values + buttons
|
612 |
-
|
613 |
-
conservative_btn.click(
|
614 |
-
fn=apply_conservative_preset,
|
615 |
-
outputs=[temperature, length_penalty, repetition_penalty,
|
616 |
-
top_k, top_p, enable_text_splitting,
|
617 |
-
conservative_btn, balanced_btn, creative_btn]
|
618 |
-
)
|
619 |
-
|
620 |
-
balanced_btn.click(
|
621 |
-
fn=apply_balanced_preset,
|
622 |
-
outputs=[temperature, length_penalty, repetition_penalty,
|
623 |
-
top_k, top_p, enable_text_splitting,
|
624 |
-
conservative_btn, balanced_btn, creative_btn]
|
625 |
-
)
|
626 |
-
|
627 |
-
creative_btn.click(
|
628 |
-
fn=apply_creative_preset,
|
629 |
-
outputs=[temperature, length_penalty, repetition_penalty,
|
630 |
-
top_k, top_p, enable_text_splitting,
|
631 |
-
conservative_btn, balanced_btn, creative_btn]
|
632 |
-
)
|
633 |
|
634 |
with gr.Column(elem_classes="credits-section"):
|
635 |
gr.HTML("""
|
@@ -643,9 +522,7 @@ def create_interface():
|
|
643 |
|
644 |
generate_btn.click(
|
645 |
fn=app.generate_speech,
|
646 |
-
inputs=[text_input, language, reference, speed,
|
647 |
-
temperature, length_penalty, repetition_penalty,
|
648 |
-
top_k, top_p, enable_text_splitting],
|
649 |
outputs=[audio_output, metrics_output]
|
650 |
)
|
651 |
|
|
|
185 |
"""Cargar audios de referencia desde dataset privado"""
|
186 |
return self.load_private_reference_audios()
|
187 |
|
188 |
+
def generate_speech(self, text, language, reference_audio, speed, temperature, enable_text_splitting):
|
|
|
|
|
189 |
"""Genera el audio de voz con configuración avanzada"""
|
190 |
try:
|
191 |
if not text or len(text.strip()) < 2:
|
|
|
213 |
|
214 |
start_time = time.time()
|
215 |
|
216 |
+
# Valores fijos para los parámetros no expuestos
|
217 |
+
length_penalty = 1.0
|
218 |
+
repetition_penalty = 5.0
|
219 |
+
top_k = 50.0
|
220 |
+
top_p = 0.85
|
221 |
+
|
222 |
# Usar parámetros con conversión explícita a float para evitar errores de tipo
|
223 |
out = self.model.inference(
|
224 |
text,
|
|
|
472 |
info="Velocidad de reproducción del audio"
|
473 |
)
|
474 |
|
475 |
+
temperature = gr.Slider(
|
476 |
+
0.1, 1.5, 0.75, 0.05,
|
477 |
+
label="🎨 Creatividad",
|
478 |
+
info="🛡️ Más estable pero menos creativo/expresivo ← → 🎭 Menos estable pero más creativo/expresivo"
|
479 |
+
)
|
480 |
+
|
481 |
+
enable_text_splitting = gr.Checkbox(
|
482 |
+
value=True,
|
483 |
+
label="📖 Segmentación inteligente",
|
484 |
+
info="✅ Puede generar mejor coherencia con textos largos | ⚠️ A costa de estabilidad o pequeños errores"
|
485 |
+
)
|
486 |
+
|
487 |
text_input = gr.Textbox(
|
488 |
label="📝 Texto a sintetizar",
|
489 |
placeholder="Escriba el texto aquí...",
|
|
|
509 |
variant="primary",
|
510 |
size="lg"
|
511 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
512 |
|
513 |
with gr.Column(elem_classes="credits-section"):
|
514 |
gr.HTML("""
|
|
|
522 |
|
523 |
generate_btn.click(
|
524 |
fn=app.generate_speech,
|
525 |
+
inputs=[text_input, language, reference, speed, temperature, enable_text_splitting],
|
|
|
|
|
526 |
outputs=[audio_output, metrics_output]
|
527 |
)
|
528 |
|