Flux9665 commited on
Commit
c69f215
β€’
1 Parent(s): 9c3ebc7

remove the spaces environment, since we now have a permanent GPU

Browse files
Architectures/ControllabilityGAN/wgan/wgan_qc.py CHANGED
@@ -246,9 +246,9 @@ class WassersteinGanQuadraticCost(torch.nn.Module):
246
  if nograd:
247
  with torch.no_grad():
248
  if isinstance(self.G, torch.nn.parallel.DataParallel):
249
- generated_data = self.G.module(latent_samples.to("cpu"), return_intermediate=return_intermediate)
250
  else:
251
- generated_data = self.G(latent_samples.to("cpu"), return_intermediate=return_intermediate)
252
  else:
253
  generated_data = self.G(latent_samples)
254
  self.G.train()
 
246
  if nograd:
247
  with torch.no_grad():
248
  if isinstance(self.G, torch.nn.parallel.DataParallel):
249
+ generated_data = self.G.module(latent_samples, return_intermediate=return_intermediate)
250
  else:
251
+ generated_data = self.G(latent_samples, return_intermediate=return_intermediate)
252
  else:
253
  generated_data = self.G(latent_samples)
254
  self.G.train()
InferenceInterfaces/ToucanTTSInterface.py CHANGED
@@ -1,14 +1,12 @@
1
  import itertools
2
  import os
3
  import warnings
4
- from typing import cast
5
 
6
  import matplotlib.pyplot as plt
7
  import pyloudnorm
8
  import sounddevice
9
  import soundfile
10
  import torch
11
- import spaces
12
  with warnings.catch_warnings():
13
  warnings.simplefilter("ignore")
14
  from speechbrain.pretrained import EncoderClassifier
@@ -127,7 +125,6 @@ class ToucanTTSInterface(torch.nn.Module):
127
 
128
  self.lang_id = get_language_id(lang_id).to(self.device)
129
 
130
- @spaces.GPU
131
  def forward(self,
132
  text,
133
  view=False,
@@ -153,19 +150,15 @@ class ToucanTTSInterface(torch.nn.Module):
153
  1.0 means no scaling happens, higher values increase variance of the energy curve,
154
  lower values decrease variance of the energy curve.
155
  """
156
- device = "cuda" if torch.cuda.is_available() else "cpu"
157
- self.device = device
158
- self.to(device)
159
-
160
  with torch.inference_mode():
161
  phones = self.text2phone.string_to_tensor(text, input_phonemes=input_is_phones).to(torch.device(self.device))
162
  mel, durations, pitch, energy = self.phone2mel(phones,
163
  return_duration_pitch_energy=True,
164
- utterance_embedding=self.default_utterance_embedding.to(device),
165
  durations=durations,
166
  pitch=pitch,
167
  energy=energy,
168
- lang_id=self.lang_id.to(device),
169
  duration_scaling_factor=duration_scaling_factor,
170
  pitch_variance_scale=pitch_variance_scale,
171
  energy_variance_scale=energy_variance_scale,
@@ -228,8 +221,7 @@ class ToucanTTSInterface(torch.nn.Module):
228
  if return_plot_as_filepath:
229
  plt.savefig("tmp.png")
230
  return wave, sr, "tmp.png"
231
- self.to("cpu")
232
- self.device="cpu"
233
  return wave, sr
234
 
235
  def read_to_file(self,
 
1
  import itertools
2
  import os
3
  import warnings
 
4
 
5
  import matplotlib.pyplot as plt
6
  import pyloudnorm
7
  import sounddevice
8
  import soundfile
9
  import torch
 
10
  with warnings.catch_warnings():
11
  warnings.simplefilter("ignore")
12
  from speechbrain.pretrained import EncoderClassifier
 
125
 
126
  self.lang_id = get_language_id(lang_id).to(self.device)
127
 
 
128
  def forward(self,
129
  text,
130
  view=False,
 
150
  1.0 means no scaling happens, higher values increase variance of the energy curve,
151
  lower values decrease variance of the energy curve.
152
  """
 
 
 
 
153
  with torch.inference_mode():
154
  phones = self.text2phone.string_to_tensor(text, input_phonemes=input_is_phones).to(torch.device(self.device))
155
  mel, durations, pitch, energy = self.phone2mel(phones,
156
  return_duration_pitch_energy=True,
157
+ utterance_embedding=self.default_utterance_embedding.to(self.device),
158
  durations=durations,
159
  pitch=pitch,
160
  energy=energy,
161
+ lang_id=self.lang_id.to(self.device),
162
  duration_scaling_factor=duration_scaling_factor,
163
  pitch_variance_scale=pitch_variance_scale,
164
  energy_variance_scale=energy_variance_scale,
 
221
  if return_plot_as_filepath:
222
  plt.savefig("tmp.png")
223
  return wave, sr, "tmp.png"
224
+
 
225
  return wave, sr
226
 
227
  def read_to_file(self,
app.py CHANGED
@@ -1,7 +1,5 @@
1
  import os
2
 
3
- import spaces
4
-
5
  from run_model_downloader import download_models
6
 
7
  if not os.path.exists("Models/ToucanTTS_Meta/best.pt"):
@@ -23,17 +21,12 @@ class ControllableInterface(torch.nn.Module):
23
 
24
  def __init__(self, available_artificial_voices=1000):
25
  super().__init__()
26
- self.model = ToucanTTSInterface(device="cpu", tts_model_path="Meta", language="eng")
27
- self.wgan = GanWrapper(os.path.join(MODELS_DIR, "Embedding", "embedding_gan.pt"), device="cpu")
28
  self.generated_speaker_embeds = list()
29
  self.available_artificial_voices = available_artificial_voices
30
  self.current_language = ""
31
  self.current_accent = ""
32
- self.device = "cpu"
33
- self.model.to("cpu")
34
- self.model.device = "cpu"
35
- self.wgan.to("cpu")
36
- self.wgan.device = "cpu"
37
 
38
  def read(self,
39
  prompt,
 
1
  import os
2
 
 
 
3
  from run_model_downloader import download_models
4
 
5
  if not os.path.exists("Models/ToucanTTS_Meta/best.pt"):
 
21
 
22
  def __init__(self, available_artificial_voices=1000):
23
  super().__init__()
24
+ self.model = ToucanTTSInterface(device="cuda", tts_model_path="Meta", language="eng")
25
+ self.wgan = GanWrapper(os.path.join(MODELS_DIR, "Embedding", "embedding_gan.pt"), device="cuda")
26
  self.generated_speaker_embeds = list()
27
  self.available_artificial_voices = available_artificial_voices
28
  self.current_language = ""
29
  self.current_accent = ""
 
 
 
 
 
30
 
31
  def read(self,
32
  prompt,