Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -10,9 +10,7 @@ from audioldm.audio.stft import TacotronSTFT
|
|
10 |
from audioldm.variational_autoencoder import AutoencoderKL
|
11 |
from pydub import AudioSegment
|
12 |
from gradio import Markdown
|
13 |
-
|
14 |
import torch
|
15 |
-
#from diffusers.models.autoencoder_kl import AutoencoderKL
|
16 |
from diffusers.models.unet_2d_condition import UNet2DConditionModel
|
17 |
from diffusers import DiffusionPipeline,AudioPipelineOutput
|
18 |
from transformers import CLIPTextModel, T5EncoderModel, AutoModel, T5Tokenizer, T5TokenizerFast
|
@@ -20,13 +18,7 @@ from typing import Union
|
|
20 |
from diffusers.utils.torch_utils import randn_tensor
|
21 |
from tqdm import tqdm
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
class Tango2Pipeline(DiffusionPipeline):
|
28 |
-
|
29 |
-
|
30 |
def __init__(
|
31 |
self,
|
32 |
vae: AutoencoderKL,
|
@@ -44,7 +36,6 @@ class Tango2Pipeline(DiffusionPipeline):
|
|
44 |
unet=unet,
|
45 |
scheduler=scheduler
|
46 |
)
|
47 |
-
|
48 |
|
49 |
def _encode_prompt(self, prompt):
|
50 |
device = self.text_encoder.device
|
@@ -266,11 +257,10 @@ def gradio_generate(prompt, output_format, steps, guidance):
|
|
266 |
return output_filename
|
267 |
|
268 |
|
269 |
-
# Gradio input and output components
|
270 |
input_text = gr.Textbox(lines=2, label="Prompt")
|
271 |
output_format = gr.Radio(label = "Output format", info = "The file you can dowload", choices = ["mp3", "wav"], value = "wav")
|
272 |
output_audio = gr.Audio(label="Generated Audio", type="filepath")
|
273 |
-
denoising_steps = gr.Slider(minimum=100, maximum=200, value=
|
274 |
guidance_scale = gr.Slider(minimum=1, maximum=10, value=3, step=0.1, label="Guidance Scale", interactive=True)
|
275 |
|
276 |
css = """
|
@@ -279,8 +269,6 @@ footer {
|
|
279 |
}
|
280 |
"""
|
281 |
|
282 |
-
|
283 |
-
# Gradio interface
|
284 |
gr_interface = gr.Interface(
|
285 |
fn=gradio_generate,
|
286 |
inputs=[input_text, output_format, denoising_steps, guidance_scale],
|
@@ -317,5 +305,4 @@ gr_interface = gr.Interface(
|
|
317 |
cache_examples="lazy", # Turn on to cache.
|
318 |
)
|
319 |
|
320 |
-
# Launch Gradio app
|
321 |
gr_interface.queue(10).launch()
|
|
|
10 |
from audioldm.variational_autoencoder import AutoencoderKL
|
11 |
from pydub import AudioSegment
|
12 |
from gradio import Markdown
|
|
|
13 |
import torch
|
|
|
14 |
from diffusers.models.unet_2d_condition import UNet2DConditionModel
|
15 |
from diffusers import DiffusionPipeline,AudioPipelineOutput
|
16 |
from transformers import CLIPTextModel, T5EncoderModel, AutoModel, T5Tokenizer, T5TokenizerFast
|
|
|
18 |
from diffusers.utils.torch_utils import randn_tensor
|
19 |
from tqdm import tqdm
|
20 |
|
|
|
|
|
|
|
|
|
21 |
class Tango2Pipeline(DiffusionPipeline):
|
|
|
|
|
22 |
def __init__(
|
23 |
self,
|
24 |
vae: AutoencoderKL,
|
|
|
36 |
unet=unet,
|
37 |
scheduler=scheduler
|
38 |
)
|
|
|
39 |
|
40 |
def _encode_prompt(self, prompt):
|
41 |
device = self.text_encoder.device
|
|
|
257 |
return output_filename
|
258 |
|
259 |
|
|
|
260 |
input_text = gr.Textbox(lines=2, label="Prompt")
|
261 |
output_format = gr.Radio(label = "Output format", info = "The file you can dowload", choices = ["mp3", "wav"], value = "wav")
|
262 |
output_audio = gr.Audio(label="Generated Audio", type="filepath")
|
263 |
+
denoising_steps = gr.Slider(minimum=100, maximum=200, value=200, step=1, label="Steps", interactive=True)
|
264 |
guidance_scale = gr.Slider(minimum=1, maximum=10, value=3, step=0.1, label="Guidance Scale", interactive=True)
|
265 |
|
266 |
css = """
|
|
|
269 |
}
|
270 |
"""
|
271 |
|
|
|
|
|
272 |
gr_interface = gr.Interface(
|
273 |
fn=gradio_generate,
|
274 |
inputs=[input_text, output_format, denoising_steps, guidance_scale],
|
|
|
305 |
cache_examples="lazy", # Turn on to cache.
|
306 |
)
|
307 |
|
|
|
308 |
gr_interface.queue(10).launch()
|