File size: 2,590 Bytes
305c299 f02052e 38967a3 fb63790 38967a3 f02052e eed32d8 2d2f9c9 fb63790 38967a3 fb63790 f02052e fb63790 f02052e 050eb17 b2cb8cf 82f6dc7 fb63790 305c299 ffb8822 305c299 6f4ef81 305c299 2d2f9c9 6f4ef81 305c299 6f4ef81 c6d52c6 82f6dc7 c5d879f 305c299 6f4ef81 305c299 6f4ef81 305c299 c5d879f 305c299 2a12840 305c299 256f384 eed32d8 2d5ac46 eed32d8 026dc7a eed32d8 7acd14a 2dc16da 508d0c7 305c299 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
from flask import Flask, request, jsonify, send_file
import spaces
import torch
import soundfile as sf
from huggingface_hub import login
from diffusers import StableAudioPipeline
import os
import io
import random
# Load Hugging Face token securely
HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
if HUGGINGFACE_TOKEN is None:
raise ValueError("Missing Hugging Face token. Please set it in Hugging Face Secrets.")
login(HUGGINGFACE_TOKEN)
# Set device for PyTorch (GPU or CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if device == "cuda" else torch.float32
# Load the StableAudio model from Hugging Face Hub
pipe = StableAudioPipeline.from_pretrained("stabilityai/stable-audio-open-1.0", torch_dtype=torch_dtype)
pipe = pipe.to(device)
# Initialize Flask app
app = Flask(__name__)
# Route to generate audio
@app.route("/generate", methods=["GET"])
@spaces.GPU
def generate_audio():
prompt = request.args.get("prompt")
seed = request.args.get("seed", random.randint(0, 100000), type=int)
if not prompt:
return jsonify({"error": "Missing prompt parameter"}), 400
try:
# Load the StableAudio model from Hugging Face Hub
#pipe = StableAudioPipeline.from_pretrained("stabilityai/stable-audio-open-1.0", torch_dtype=torch_dtype)
#pipe = pipe.to(device)
# Generate the audio using StableAudioPipeline
generator = torch.Generator(device)
generator.manual_seed(seed)
audio_output = pipe(
prompt=prompt,
negative_prompt='Low Quality',
num_inference_steps=10, # Number of diffusion steps
guidance_scale=14.0,
audio_end_in_s=1,
num_waveforms_per_prompt=1,
generator=generator
).audios
# Convert audio to BytesIO in memory
output_io = io.BytesIO()
output_io.truncate(0) # Clears any residual data from previous calls
output_audio = audio_output[0].T.float().cpu().numpy()
sf.write(output_io, output_audio, pipe.vae.sampling_rate, format="WAV") # Save as WAV or your preferred format
output_io.seek(0) # Reset buffer pointer to beginning
# Send the file in response as attachment for download
return send_file(output_io, as_attachment=False, download_name="output.wav", mimetype='audio/wav')
except Exception as e:
return jsonify({"error": str(e)}), 500
# Run the Flask app
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)
|