stardate69 commited on
Commit
f02052e
·
verified ·
1 Parent(s): 960e153

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -58
app.py CHANGED
@@ -1,75 +1,55 @@
1
- import os
 
2
  import torch
3
  import soundfile as sf
4
  from huggingface_hub import login
5
  from diffusers import StableAudioPipeline
6
- import gradio as gr
7
- import spaces
8
 
9
  # Load Hugging Face token securely
10
  HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
11
  if HUGGINGFACE_TOKEN is None:
12
- raise ValueError("Missing Hugging Face token. Please set it in Spaces Secrets.")
13
  login(HUGGINGFACE_TOKEN)
14
 
15
- # Set device for PyTorch (only CPU, if no GPU is available)
16
- device = "cpu"
17
- torch_dtype = torch.float32 # Use float32 for CPU by default
18
 
19
- # Check for GPU availability
20
- if torch.cuda.is_available():
21
- device = "cuda"
22
- torch_dtype = torch.float16 # Use float16 for GPU to optimize memory usage
23
-
24
- # Load the pipeline
25
- pipe = StableAudioPipeline.from_pretrained(
26
- "stabilityai/stable-audio-open-1.0",
27
- torch_dtype=torch_dtype
28
- )
29
  pipe = pipe.to(device)
30
 
31
- # Function to generate audio
 
 
 
 
32
  @spaces.GPU
33
- def generate_audio(prompt, negative_prompt, duration, diffusion_steps, seed):
34
- generator = torch.Generator(device).manual_seed(seed)
35
- audio_output = pipe(
36
- prompt=prompt,
37
- negative_prompt=negative_prompt,
38
- num_inference_steps=int(diffusion_steps), # Number of diffusion steps
39
- audio_end_in_s=duration,
40
- num_waveforms_per_prompt=1,
41
- generator=generator
42
- ).audios
43
  output_audio = audio_output[0].T.float().cpu().numpy()
44
- output_file = "output.wav"
45
- sf.write(output_file, output_audio, pipe.vae.sampling_rate)
46
- return output_file
47
 
48
- # Gradio UI
49
- with gr.Blocks() as demo:
50
- gr.Markdown("## 🎧 Stable Audio Open - Audio Generation 🎼")
51
- gr.Markdown("### Adjust prompts, duration, and diffusion steps to control the generation!")
52
-
53
- # Input Section
54
- with gr.Row():
55
- prompt_input = gr.Textbox(label="Prompt", value="The sound of a hammer hitting a wooden surface.")
56
- negative_input = gr.Textbox(label="Negative Prompt", value="Low quality.")
57
- with gr.Row():
58
- duration_input = gr.Slider(minimum=1, maximum=10, step=0.5, value=1, label="Duration (seconds)")
59
- diffusion_steps_input = gr.Slider(minimum=1, maximum=500, step=10, value=10, label="Diffusion Steps")
60
- with gr.Row():
61
- seed_input = gr.Number(label="Random Seed", value=42)
62
-
63
- # Output Section
64
- generate_button = gr.Button("Generate Audio")
65
- output_audio = gr.Audio(label="Generated Audio", type="filepath")
66
-
67
- # Connect the function to the button click
68
- generate_button.click(
69
- generate_audio,
70
- inputs=[prompt_input, negative_input, duration_input, diffusion_steps_input, seed_input],
71
- outputs=output_audio
72
- )
73
 
74
- # Launch the app
75
- demo.launch()
 
1
+ import gradio as gr
2
+ import spaces
3
  import torch
4
  import soundfile as sf
5
  from huggingface_hub import login
6
  from diffusers import StableAudioPipeline
7
+ import os
 
8
 
9
  # Load Hugging Face token securely
10
  HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
11
  if HUGGINGFACE_TOKEN is None:
12
+ raise ValueError("Missing Hugging Face token. Please set it in Hugging Face Secrets.")
13
  login(HUGGINGFACE_TOKEN)
14
 
15
+ # Set device for PyTorch (GPU or CPU)
16
+ device = "cuda" if torch.cuda.is_available() else "cpu"
17
+ torch_dtype = torch.float16 if device == "cuda" else torch.float32
18
 
19
+ # Load the StableAudio model from Hugging Face Hub
20
+ pipe = StableAudioPipeline.from_pretrained("stabilityai/stable-audio-open-1.0", torch_dtype=torch_dtype)
 
 
 
 
 
 
 
 
21
  pipe = pipe.to(device)
22
 
23
+ # Path to store generated audio files (ensure this folder is accessible and writable)
24
+ OUTPUT_PATH = "./generated_audio"
25
+ os.makedirs(OUTPUT_PATH, exist_ok=True)
26
+
27
+ # Function to generate audio from prompt
28
  @spaces.GPU
29
+ def generate_audio(prompt: str):
30
+ # Generate the audio using StableAudioPipeline
31
+ generator = torch.Generator(device).manual_seed(42)
32
+ audio_output = pipe(prompt=prompt, negative_prompt="Low Quality", num_inference_steps=10).audios
33
+
34
+ # Convert to numpy and save to a WAV file
 
 
 
 
35
  output_audio = audio_output[0].T.float().cpu().numpy()
36
+ output_filename = "output.wav"
37
+ output_path = os.path.join(OUTPUT_PATH, output_filename)
38
+ sf.write(output_path, output_audio, pipe.vae.sampling_rate)
39
 
40
+ # Construct full URL to access the generated file
41
+ audio_url = f"https://<your-hf-space-name>.hf.space/audio/{output_filename}"
42
+
43
+ return audio_url
44
+
45
+ # Gradio Interface setup
46
+ interface = gr.Interface(
47
+ fn=generate_audio,
48
+ inputs=gr.Textbox(label="Enter a text prompt to generate audio"),
49
+ outputs=gr.Textbox(label="Generated Audio URL"),
50
+ title="StableAudioText2Speech",
51
+ description="Generate audio from a text prompt using Hugging Face StableAudio Pipeline."
52
+ )
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ # Launch the Gradio interface as an HTTP endpoint
55
+ interface.launch(share=True)