Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,75 +1,55 @@
|
|
1 |
-
import
|
|
|
2 |
import torch
|
3 |
import soundfile as sf
|
4 |
from huggingface_hub import login
|
5 |
from diffusers import StableAudioPipeline
|
6 |
-
import
|
7 |
-
import spaces
|
8 |
|
9 |
# Load Hugging Face token securely
|
10 |
HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
|
11 |
if HUGGINGFACE_TOKEN is None:
|
12 |
-
raise ValueError("Missing Hugging Face token. Please set it in
|
13 |
login(HUGGINGFACE_TOKEN)
|
14 |
|
15 |
-
# Set device for PyTorch (
|
16 |
-
device = "cpu"
|
17 |
-
torch_dtype = torch.
|
18 |
|
19 |
-
#
|
20 |
-
|
21 |
-
device = "cuda"
|
22 |
-
torch_dtype = torch.float16 # Use float16 for GPU to optimize memory usage
|
23 |
-
|
24 |
-
# Load the pipeline
|
25 |
-
pipe = StableAudioPipeline.from_pretrained(
|
26 |
-
"stabilityai/stable-audio-open-1.0",
|
27 |
-
torch_dtype=torch_dtype
|
28 |
-
)
|
29 |
pipe = pipe.to(device)
|
30 |
|
31 |
-
#
|
|
|
|
|
|
|
|
|
32 |
@spaces.GPU
|
33 |
-
def generate_audio(prompt
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
audio_end_in_s=duration,
|
40 |
-
num_waveforms_per_prompt=1,
|
41 |
-
generator=generator
|
42 |
-
).audios
|
43 |
output_audio = audio_output[0].T.float().cpu().numpy()
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
|
48 |
-
#
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
seed_input = gr.Number(label="Random Seed", value=42)
|
62 |
-
|
63 |
-
# Output Section
|
64 |
-
generate_button = gr.Button("Generate Audio")
|
65 |
-
output_audio = gr.Audio(label="Generated Audio", type="filepath")
|
66 |
-
|
67 |
-
# Connect the function to the button click
|
68 |
-
generate_button.click(
|
69 |
-
generate_audio,
|
70 |
-
inputs=[prompt_input, negative_input, duration_input, diffusion_steps_input, seed_input],
|
71 |
-
outputs=output_audio
|
72 |
-
)
|
73 |
|
74 |
-
# Launch the
|
75 |
-
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import spaces
|
3 |
import torch
|
4 |
import soundfile as sf
|
5 |
from huggingface_hub import login
|
6 |
from diffusers import StableAudioPipeline
|
7 |
+
import os
|
|
|
8 |
|
9 |
# Load Hugging Face token securely
|
10 |
HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
|
11 |
if HUGGINGFACE_TOKEN is None:
|
12 |
+
raise ValueError("Missing Hugging Face token. Please set it in Hugging Face Secrets.")
|
13 |
login(HUGGINGFACE_TOKEN)
|
14 |
|
15 |
+
# Set device for PyTorch (GPU or CPU)
|
16 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
17 |
+
torch_dtype = torch.float16 if device == "cuda" else torch.float32
|
18 |
|
19 |
+
# Load the StableAudio model from Hugging Face Hub
|
20 |
+
pipe = StableAudioPipeline.from_pretrained("stabilityai/stable-audio-open-1.0", torch_dtype=torch_dtype)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
pipe = pipe.to(device)
|
22 |
|
23 |
+
# Path to store generated audio files (ensure this folder is accessible and writable)
|
24 |
+
OUTPUT_PATH = "./generated_audio"
|
25 |
+
os.makedirs(OUTPUT_PATH, exist_ok=True)
|
26 |
+
|
27 |
+
# Function to generate audio from prompt
|
28 |
@spaces.GPU
|
29 |
+
def generate_audio(prompt: str):
|
30 |
+
# Generate the audio using StableAudioPipeline
|
31 |
+
generator = torch.Generator(device).manual_seed(42)
|
32 |
+
audio_output = pipe(prompt=prompt, negative_prompt="Low Quality", num_inference_steps=10).audios
|
33 |
+
|
34 |
+
# Convert to numpy and save to a WAV file
|
|
|
|
|
|
|
|
|
35 |
output_audio = audio_output[0].T.float().cpu().numpy()
|
36 |
+
output_filename = "output.wav"
|
37 |
+
output_path = os.path.join(OUTPUT_PATH, output_filename)
|
38 |
+
sf.write(output_path, output_audio, pipe.vae.sampling_rate)
|
39 |
|
40 |
+
# Construct full URL to access the generated file
|
41 |
+
audio_url = f"https://<your-hf-space-name>.hf.space/audio/{output_filename}"
|
42 |
+
|
43 |
+
return audio_url
|
44 |
+
|
45 |
+
# Gradio Interface setup
|
46 |
+
interface = gr.Interface(
|
47 |
+
fn=generate_audio,
|
48 |
+
inputs=gr.Textbox(label="Enter a text prompt to generate audio"),
|
49 |
+
outputs=gr.Textbox(label="Generated Audio URL"),
|
50 |
+
title="StableAudioText2Speech",
|
51 |
+
description="Generate audio from a text prompt using Hugging Face StableAudio Pipeline."
|
52 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
+
# Launch the Gradio interface as an HTTP endpoint
|
55 |
+
interface.launch(share=True)
|