XTTS_finetuned_dani

Sleeping

App Files Files Community

RedSparkie commited on Sep 20, 2024

Commit

a627d55

verified ·

1 Parent(s): 3cc5048

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -23

app.py CHANGED Viewed

@@ -3,36 +3,29 @@ import gradio as gr
 import torch
 from TTS.api import TTS
 import os
-import argparse
-import os
-import sys
 import tempfile
-import librosa.display
-import numpy as np
 import torchaudio
-import traceback
-from TTS.demos.xtts_ft_demo.utils.formatter import format_audio_list
-from TTS.demos.xtts_ft_demo.utils.gpt_train import train_gpt
 from TTS.tts.configs.xtts_config import XttsConfig
 from TTS.tts.models.xtts import Xtts
 os.environ["COQUI_TOS_AGREED"] = "1"
 device = "cpu"
-tts = TTS("RedSparkie/danielmula").to(device)
-model_path = 'RedSparkie/danielmula/model.pth'
-config_path = 'RedSparkie/danielmula/config.json'
-vocab_path = 'RedSparkie/danielmula/vocab.json'
 def clear_gpu_cache():
-    # clear the GPU cache
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
 XTTS_MODEL = None
 def load_model(xtts_checkpoint, xtts_config, xtts_vocab):
     global XTTS_MODEL
@@ -42,24 +35,32 @@ def load_model(xtts_checkpoint, xtts_config, xtts_vocab):
     config = XttsConfig()
     config.load_json(xtts_config)
     XTTS_MODEL = Xtts.init_from_config(config)
-    print("Loading XTTS model! ")
     XTTS_MODEL.load_checkpoint(config, checkpoint_path=xtts_checkpoint, vocab_path=xtts_vocab, use_deepspeed=False)
-    if torch.cuda.is_available():
-        XTTS_MODEL.cuda()
     print("Model Loaded!")
 def run_tts(lang, tts_text, speaker_audio_file):
     if XTTS_MODEL is None or not speaker_audio_file:
         return "You need to run the previous step to load the model !!", None, None
-    gpt_cond_latent, speaker_embedding = XTTS_MODEL.get_conditioning_latents(audio_path=speaker_audio_file, gpt_cond_len=XTTS_MODEL.config.gpt_cond_len, max_ref_length=XTTS_MODEL.config.max_ref_len, sound_norm_refs=XTTS_MODEL.config.sound_norm_refs)
     out = XTTS_MODEL.inference(
         text=tts_text,
         language=lang,
         gpt_cond_latent=gpt_cond_latent,
         speaker_embedding=speaker_embedding,
-        temperature=XTTS_MODEL.config.temperature, # Add custom parameters here
         length_penalty=XTTS_MODEL.config.length_penalty,
         repetition_penalty=XTTS_MODEL.config.repetition_penalty,
         top_k=XTTS_MODEL.config.top_k,
@@ -70,21 +71,23 @@ def run_tts(lang, tts_text, speaker_audio_file):
         out["wav"] = torch.tensor(out["wav"]).unsqueeze(0)
         out_path = fp.name
         torchaudio.save(out_path, out["wav"], 24000)
-    print("Speech generated !")
     return out_path, speaker_audio_file
 @spaces.GPU(enable_queue=True)
 def generate(text, audio):
     load_model(model_path, config_path, vocab_path)
     out_path, speaker_audio_file = run_tts(lang='es', tts_text=text, speaker_audio_file=audio)
     return out_path
 demo = gr.Interface(
     fn=generate,
     inputs=[gr.Textbox(label='Frase a generar'), gr.Audio(type='filepath', label='Voz de referencia')],
     outputs=gr.Audio(type='filepath')
 )
 demo.launch()

 import torch
 from TTS.api import TTS
 import os
 import tempfile
 import torchaudio
+from huggingface_hub import hf_hub_download
 from TTS.tts.configs.xtts_config import XttsConfig
 from TTS.tts.models.xtts import Xtts
+# Aceptar los términos de COQUI
 os.environ["COQUI_TOS_AGREED"] = "1"
+# Definir el dispositivo como CPU
 device = "cpu"
+# Descargar archivos desde HuggingFace
+model_path = hf_hub_download(repo_id="RedSparkie/danielmula", filename="model.pth")
+config_path = hf_hub_download(repo_id="RedSparkie/danielmula", filename="config.json")
+vocab_path = hf_hub_download(repo_id="RedSparkie/danielmula", filename="vocab.json")
+# Función para limpiar la caché de GPU (no necesaria para CPU, pero la mantengo por si en el futuro usas GPU)
 def clear_gpu_cache():
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
+# Cargar el modelo XTTS
 XTTS_MODEL = None
 def load_model(xtts_checkpoint, xtts_config, xtts_vocab):
     global XTTS_MODEL
     config = XttsConfig()
     config.load_json(xtts_config)
     XTTS_MODEL = Xtts.init_from_config(config)
+    print("Loading XTTS model!")
     XTTS_MODEL.load_checkpoint(config, checkpoint_path=xtts_checkpoint, vocab_path=xtts_vocab, use_deepspeed=False)
+    # No mover a GPU ya que usamos CPU
+    # if torch.cuda.is_available():
+    #     XTTS_MODEL.cuda()
     print("Model Loaded!")
+# Función para ejecutar TTS
 def run_tts(lang, tts_text, speaker_audio_file):
     if XTTS_MODEL is None or not speaker_audio_file:
         return "You need to run the previous step to load the model !!", None, None
+    gpt_cond_latent, speaker_embedding = XTTS_MODEL.get_conditioning_latents(
+        audio_path=speaker_audio_file,
+        gpt_cond_len=XTTS_MODEL.config.gpt_cond_len,
+        max_ref_length=XTTS_MODEL.config.max_ref_len,
+        sound_norm_refs=XTTS_MODEL.config.sound_norm_refs
+    )
     out = XTTS_MODEL.inference(
         text=tts_text,
         language=lang,
         gpt_cond_latent=gpt_cond_latent,
         speaker_embedding=speaker_embedding,
+        temperature=XTTS_MODEL.config.temperature,
         length_penalty=XTTS_MODEL.config.length_penalty,
         repetition_penalty=XTTS_MODEL.config.repetition_penalty,
         top_k=XTTS_MODEL.config.top_k,
         out["wav"] = torch.tensor(out["wav"]).unsqueeze(0)
         out_path = fp.name
         torchaudio.save(out_path, out["wav"], 24000)
+    print("Speech generated!")
     return out_path, speaker_audio_file
+# Definir la función para Gradio
 @spaces.GPU(enable_queue=True)
 def generate(text, audio):
     load_model(model_path, config_path, vocab_path)
     out_path, speaker_audio_file = run_tts(lang='es', tts_text=text, speaker_audio_file=audio)
     return out_path
+# Configurar la interfaz de Gradio
 demo = gr.Interface(
     fn=generate,
     inputs=[gr.Textbox(label='Frase a generar'), gr.Audio(type='filepath', label='Voz de referencia')],
     outputs=gr.Audio(type='filepath')
 )
+# Lanzar la interfaz
 demo.launch()