stardate69's picture
Update app.py
305c299 verified
raw
history blame
2.09 kB
from flask import Flask, request, jsonify, send_file
import spaces
import torch
import soundfile as sf
from huggingface_hub import login
from diffusers import StableAudioPipeline
import os
# Load Hugging Face token securely
HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
if HUGGINGFACE_TOKEN is None:
raise ValueError("Missing Hugging Face token. Please set it in Hugging Face Secrets.")
login(HUGGINGFACE_TOKEN)
# Set device for PyTorch (GPU or CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if device == "cuda" else torch.float32
# Load the StableAudio model from Hugging Face Hub
pipe = StableAudioPipeline.from_pretrained("stabilityai/stable-audio-open-1.0", torch_dtype=torch_dtype)
pipe = pipe.to(device)
# Path to store generated audio files
OUTPUT_PATH = "./generated_audio"
os.makedirs(OUTPUT_PATH, exist_ok=True)
# Initialize Flask app
app = Flask(__name__)
# Route to generate audio
@spaces.GPU
@app.route("/generate", methods=["GET"])
def generate_audio():
prompt = request.args.get("prompt")
if not prompt:
return jsonify({"error": "Missing prompt parameter"}), 400
try:
# Generate the audio using StableAudioPipeline
generator = torch.Generator(device).manual_seed(42)
audio_output = pipe(
prompt=prompt,
negative_prompt='Low Quality',
num_inference_steps=10, # Number of diffusion steps
audio_end_in_s=1,
num_waveforms_per_prompt=1,
generator=generator
).audios
# Convert to numpy and save to a WAV file
output_audio = audio_output[0].T.float().cpu().numpy()
output_filename = "output.wav"
output_path = os.path.join(OUTPUT_PATH, output_filename)
sf.write(output_path, output_audio, pipe.vae.sampling_rate)
# Return the WAV file
return send_file(output_path, as_attachment=True)
except Exception as e:
return jsonify({"error": str(e)}), 500
# Run the Flask app
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)