Spaces:
Running
on
Zero
Running
on
Zero
PuristanLabs1
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -4,10 +4,11 @@ from parler_tts import ParlerTTSForConditionalGeneration
|
|
4 |
from transformers import AutoTokenizer
|
5 |
import soundfile as sf
|
6 |
import tempfile
|
|
|
7 |
|
8 |
# Load the model and tokenizers
|
9 |
-
device = "cuda
|
10 |
-
model = ParlerTTSForConditionalGeneration.from_pretrained("ai4bharat/indic-parler-tts")
|
11 |
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts")
|
12 |
description_tokenizer = AutoTokenizer.from_pretrained(model.config.text_encoder._name_or_path)
|
13 |
|
@@ -35,11 +36,15 @@ def generate_description(language, gender, emotion, noise, reverb, expressivity,
|
|
35 |
)
|
36 |
return description
|
37 |
|
38 |
-
# Generate audio function
|
|
|
39 |
def generate_audio(text, description):
|
|
|
|
|
|
|
40 |
# Prepare model inputs
|
41 |
-
input_ids = description_tokenizer(description, return_tensors="pt").input_ids.to(
|
42 |
-
prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(
|
43 |
|
44 |
# Generate audio
|
45 |
generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
|
@@ -50,6 +55,9 @@ def generate_audio(text, description):
|
|
50 |
sf.write(f.name, audio_arr, model.config.sampling_rate)
|
51 |
audio_path = f.name
|
52 |
|
|
|
|
|
|
|
53 |
return audio_path
|
54 |
|
55 |
# Gradio Interface
|
@@ -146,4 +154,4 @@ def app():
|
|
146 |
return demo
|
147 |
|
148 |
# Run the app
|
149 |
-
app().launch()
|
|
|
4 |
from transformers import AutoTokenizer
|
5 |
import soundfile as sf
|
6 |
import tempfile
|
7 |
+
import spaces # Import the spaces module for ZeroGPU compatibility
|
8 |
|
9 |
# Load the model and tokenizers
|
10 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
11 |
+
model = ParlerTTSForConditionalGeneration.from_pretrained("ai4bharat/indic-parler-tts")
|
12 |
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts")
|
13 |
description_tokenizer = AutoTokenizer.from_pretrained(model.config.text_encoder._name_or_path)
|
14 |
|
|
|
36 |
)
|
37 |
return description
|
38 |
|
39 |
+
# Generate audio function with GPU allocation
|
40 |
+
@spaces.GPU # Allocate GPU for the duration of this function
|
41 |
def generate_audio(text, description):
|
42 |
+
# Move model to GPU
|
43 |
+
model.to("cuda")
|
44 |
+
|
45 |
# Prepare model inputs
|
46 |
+
input_ids = description_tokenizer(description, return_tensors="pt").input_ids.to("cuda")
|
47 |
+
prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to("cuda")
|
48 |
|
49 |
# Generate audio
|
50 |
generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
|
|
|
55 |
sf.write(f.name, audio_arr, model.config.sampling_rate)
|
56 |
audio_path = f.name
|
57 |
|
58 |
+
# Move model back to CPU
|
59 |
+
model.to("cpu")
|
60 |
+
|
61 |
return audio_path
|
62 |
|
63 |
# Gradio Interface
|
|
|
154 |
return demo
|
155 |
|
156 |
# Run the app
|
157 |
+
app().launch()
|