RedSparkie commited on
Commit
a627d55
verified
1 Parent(s): 3cc5048

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -23
app.py CHANGED
@@ -3,36 +3,29 @@ import gradio as gr
3
  import torch
4
  from TTS.api import TTS
5
  import os
6
- import argparse
7
- import os
8
- import sys
9
  import tempfile
10
- import librosa.display
11
- import numpy as np
12
-
13
  import torchaudio
14
- import traceback
15
- from TTS.demos.xtts_ft_demo.utils.formatter import format_audio_list
16
- from TTS.demos.xtts_ft_demo.utils.gpt_train import train_gpt
17
-
18
  from TTS.tts.configs.xtts_config import XttsConfig
19
  from TTS.tts.models.xtts import Xtts
20
 
 
21
  os.environ["COQUI_TOS_AGREED"] = "1"
22
 
 
23
  device = "cpu"
24
 
25
- tts = TTS("RedSparkie/danielmula").to(device)
26
- model_path = 'RedSparkie/danielmula/model.pth'
27
- config_path = 'RedSparkie/danielmula/config.json'
28
- vocab_path = 'RedSparkie/danielmula/vocab.json'
29
-
30
 
 
31
  def clear_gpu_cache():
32
- # clear the GPU cache
33
  if torch.cuda.is_available():
34
  torch.cuda.empty_cache()
35
 
 
36
  XTTS_MODEL = None
37
  def load_model(xtts_checkpoint, xtts_config, xtts_vocab):
38
  global XTTS_MODEL
@@ -42,24 +35,32 @@ def load_model(xtts_checkpoint, xtts_config, xtts_vocab):
42
  config = XttsConfig()
43
  config.load_json(xtts_config)
44
  XTTS_MODEL = Xtts.init_from_config(config)
45
- print("Loading XTTS model! ")
46
  XTTS_MODEL.load_checkpoint(config, checkpoint_path=xtts_checkpoint, vocab_path=xtts_vocab, use_deepspeed=False)
47
- if torch.cuda.is_available():
48
- XTTS_MODEL.cuda()
 
 
49
 
50
  print("Model Loaded!")
51
 
 
52
  def run_tts(lang, tts_text, speaker_audio_file):
53
  if XTTS_MODEL is None or not speaker_audio_file:
54
  return "You need to run the previous step to load the model !!", None, None
55
 
56
- gpt_cond_latent, speaker_embedding = XTTS_MODEL.get_conditioning_latents(audio_path=speaker_audio_file, gpt_cond_len=XTTS_MODEL.config.gpt_cond_len, max_ref_length=XTTS_MODEL.config.max_ref_len, sound_norm_refs=XTTS_MODEL.config.sound_norm_refs)
 
 
 
 
 
57
  out = XTTS_MODEL.inference(
58
  text=tts_text,
59
  language=lang,
60
  gpt_cond_latent=gpt_cond_latent,
61
  speaker_embedding=speaker_embedding,
62
- temperature=XTTS_MODEL.config.temperature, # Add custom parameters here
63
  length_penalty=XTTS_MODEL.config.length_penalty,
64
  repetition_penalty=XTTS_MODEL.config.repetition_penalty,
65
  top_k=XTTS_MODEL.config.top_k,
@@ -70,21 +71,23 @@ def run_tts(lang, tts_text, speaker_audio_file):
70
  out["wav"] = torch.tensor(out["wav"]).unsqueeze(0)
71
  out_path = fp.name
72
  torchaudio.save(out_path, out["wav"], 24000)
73
- print("Speech generated !")
74
 
75
  return out_path, speaker_audio_file
76
 
77
-
78
  @spaces.GPU(enable_queue=True)
79
  def generate(text, audio):
80
  load_model(model_path, config_path, vocab_path)
81
  out_path, speaker_audio_file = run_tts(lang='es', tts_text=text, speaker_audio_file=audio)
82
  return out_path
83
 
 
84
  demo = gr.Interface(
85
  fn=generate,
86
  inputs=[gr.Textbox(label='Frase a generar'), gr.Audio(type='filepath', label='Voz de referencia')],
87
  outputs=gr.Audio(type='filepath')
88
  )
89
 
 
90
  demo.launch()
 
3
  import torch
4
  from TTS.api import TTS
5
  import os
 
 
 
6
  import tempfile
 
 
 
7
  import torchaudio
8
+ from huggingface_hub import hf_hub_download
 
 
 
9
  from TTS.tts.configs.xtts_config import XttsConfig
10
  from TTS.tts.models.xtts import Xtts
11
 
12
+ # Aceptar los t茅rminos de COQUI
13
  os.environ["COQUI_TOS_AGREED"] = "1"
14
 
15
+ # Definir el dispositivo como CPU
16
  device = "cpu"
17
 
18
+ # Descargar archivos desde HuggingFace
19
+ model_path = hf_hub_download(repo_id="RedSparkie/danielmula", filename="model.pth")
20
+ config_path = hf_hub_download(repo_id="RedSparkie/danielmula", filename="config.json")
21
+ vocab_path = hf_hub_download(repo_id="RedSparkie/danielmula", filename="vocab.json")
 
22
 
23
+ # Funci贸n para limpiar la cach茅 de GPU (no necesaria para CPU, pero la mantengo por si en el futuro usas GPU)
24
  def clear_gpu_cache():
 
25
  if torch.cuda.is_available():
26
  torch.cuda.empty_cache()
27
 
28
+ # Cargar el modelo XTTS
29
  XTTS_MODEL = None
30
  def load_model(xtts_checkpoint, xtts_config, xtts_vocab):
31
  global XTTS_MODEL
 
35
  config = XttsConfig()
36
  config.load_json(xtts_config)
37
  XTTS_MODEL = Xtts.init_from_config(config)
38
+ print("Loading XTTS model!")
39
  XTTS_MODEL.load_checkpoint(config, checkpoint_path=xtts_checkpoint, vocab_path=xtts_vocab, use_deepspeed=False)
40
+
41
+ # No mover a GPU ya que usamos CPU
42
+ # if torch.cuda.is_available():
43
+ # XTTS_MODEL.cuda()
44
 
45
  print("Model Loaded!")
46
 
47
+ # Funci贸n para ejecutar TTS
48
  def run_tts(lang, tts_text, speaker_audio_file):
49
  if XTTS_MODEL is None or not speaker_audio_file:
50
  return "You need to run the previous step to load the model !!", None, None
51
 
52
+ gpt_cond_latent, speaker_embedding = XTTS_MODEL.get_conditioning_latents(
53
+ audio_path=speaker_audio_file,
54
+ gpt_cond_len=XTTS_MODEL.config.gpt_cond_len,
55
+ max_ref_length=XTTS_MODEL.config.max_ref_len,
56
+ sound_norm_refs=XTTS_MODEL.config.sound_norm_refs
57
+ )
58
  out = XTTS_MODEL.inference(
59
  text=tts_text,
60
  language=lang,
61
  gpt_cond_latent=gpt_cond_latent,
62
  speaker_embedding=speaker_embedding,
63
+ temperature=XTTS_MODEL.config.temperature,
64
  length_penalty=XTTS_MODEL.config.length_penalty,
65
  repetition_penalty=XTTS_MODEL.config.repetition_penalty,
66
  top_k=XTTS_MODEL.config.top_k,
 
71
  out["wav"] = torch.tensor(out["wav"]).unsqueeze(0)
72
  out_path = fp.name
73
  torchaudio.save(out_path, out["wav"], 24000)
74
+ print("Speech generated!")
75
 
76
  return out_path, speaker_audio_file
77
 
78
+ # Definir la funci贸n para Gradio
79
  @spaces.GPU(enable_queue=True)
80
  def generate(text, audio):
81
  load_model(model_path, config_path, vocab_path)
82
  out_path, speaker_audio_file = run_tts(lang='es', tts_text=text, speaker_audio_file=audio)
83
  return out_path
84
 
85
+ # Configurar la interfaz de Gradio
86
  demo = gr.Interface(
87
  fn=generate,
88
  inputs=[gr.Textbox(label='Frase a generar'), gr.Audio(type='filepath', label='Voz de referencia')],
89
  outputs=gr.Audio(type='filepath')
90
  )
91
 
92
+ # Lanzar la interfaz
93
  demo.launch()