PuristanLabs1 commited on
Commit
69d2a81
·
verified ·
1 Parent(s): c611518

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -6
app.py CHANGED
@@ -4,10 +4,11 @@ from parler_tts import ParlerTTSForConditionalGeneration
4
  from transformers import AutoTokenizer
5
  import soundfile as sf
6
  import tempfile
 
7
 
8
  # Load the model and tokenizers
9
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
10
- model = ParlerTTSForConditionalGeneration.from_pretrained("ai4bharat/indic-parler-tts").to(device)
11
  tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts")
12
  description_tokenizer = AutoTokenizer.from_pretrained(model.config.text_encoder._name_or_path)
13
 
@@ -35,11 +36,15 @@ def generate_description(language, gender, emotion, noise, reverb, expressivity,
35
  )
36
  return description
37
 
38
- # Generate audio function
 
39
  def generate_audio(text, description):
 
 
 
40
  # Prepare model inputs
41
- input_ids = description_tokenizer(description, return_tensors="pt").input_ids.to(device)
42
- prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
43
 
44
  # Generate audio
45
  generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
@@ -50,6 +55,9 @@ def generate_audio(text, description):
50
  sf.write(f.name, audio_arr, model.config.sampling_rate)
51
  audio_path = f.name
52
 
 
 
 
53
  return audio_path
54
 
55
  # Gradio Interface
@@ -146,4 +154,4 @@ def app():
146
  return demo
147
 
148
  # Run the app
149
- app().launch()
 
4
  from transformers import AutoTokenizer
5
  import soundfile as sf
6
  import tempfile
7
+ import spaces # Import the spaces module for ZeroGPU compatibility
8
 
9
  # Load the model and tokenizers
10
+ device = "cuda" if torch.cuda.is_available() else "cpu"
11
+ model = ParlerTTSForConditionalGeneration.from_pretrained("ai4bharat/indic-parler-tts")
12
  tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts")
13
  description_tokenizer = AutoTokenizer.from_pretrained(model.config.text_encoder._name_or_path)
14
 
 
36
  )
37
  return description
38
 
39
+ # Generate audio function with GPU allocation
40
+ @spaces.GPU # Allocate GPU for the duration of this function
41
  def generate_audio(text, description):
42
+ # Move model to GPU
43
+ model.to("cuda")
44
+
45
  # Prepare model inputs
46
+ input_ids = description_tokenizer(description, return_tensors="pt").input_ids.to("cuda")
47
+ prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to("cuda")
48
 
49
  # Generate audio
50
  generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
 
55
  sf.write(f.name, audio_arr, model.config.sampling_rate)
56
  audio_path = f.name
57
 
58
+ # Move model back to CPU
59
+ model.to("cpu")
60
+
61
  return audio_path
62
 
63
  # Gradio Interface
 
154
  return demo
155
 
156
  # Run the app
157
+ app().launch()