File size: 2,590 Bytes
305c299
f02052e
38967a3
fb63790
 
38967a3
f02052e
eed32d8
2d2f9c9
fb63790
38967a3
fb63790
 
f02052e
fb63790
 
f02052e
 
 
050eb17
b2cb8cf
82f6dc7
 
fb63790
305c299
 
 
 
 
ffb8822
305c299
6f4ef81
305c299
2d2f9c9
6f4ef81
305c299
 
 
 
6f4ef81
c6d52c6
82f6dc7
 
c5d879f
305c299
6f4ef81
 
 
305c299
 
6f4ef81
305c299
c5d879f
305c299
 
2a12840
305c299
256f384
eed32d8
 
2d5ac46
eed32d8
026dc7a
eed32d8
 
 
7acd14a
2dc16da
508d0c7
305c299
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from flask import Flask, request, jsonify, send_file
import spaces
import torch
import soundfile as sf
from huggingface_hub import login
from diffusers import StableAudioPipeline
import os
import io
import random

# Load Hugging Face token securely
HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
if HUGGINGFACE_TOKEN is None:
    raise ValueError("Missing Hugging Face token. Please set it in Hugging Face Secrets.")
login(HUGGINGFACE_TOKEN)

# Set device for PyTorch (GPU or CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if device == "cuda" else torch.float32

# Load the StableAudio model from Hugging Face Hub
pipe = StableAudioPipeline.from_pretrained("stabilityai/stable-audio-open-1.0", torch_dtype=torch_dtype)
pipe = pipe.to(device)

# Initialize Flask app
app = Flask(__name__)

# Route to generate audio
@app.route("/generate", methods=["GET"])
@spaces.GPU
def generate_audio():
    
    prompt = request.args.get("prompt")
    seed = request.args.get("seed", random.randint(0, 100000), type=int)
    
    if not prompt:
        return jsonify({"error": "Missing prompt parameter"}), 400

    try:
        
        # Load the StableAudio model from Hugging Face Hub
        #pipe = StableAudioPipeline.from_pretrained("stabilityai/stable-audio-open-1.0", torch_dtype=torch_dtype)
        #pipe = pipe.to(device)
        
        # Generate the audio using StableAudioPipeline
        generator = torch.Generator(device)
        generator.manual_seed(seed) 
        
        audio_output = pipe(
            prompt=prompt,
            negative_prompt='Low Quality',
            num_inference_steps=10,  # Number of diffusion steps
            guidance_scale=14.0,
            audio_end_in_s=1,
            num_waveforms_per_prompt=1,
            generator=generator
        ).audios
        
        # Convert audio to BytesIO in memory
        output_io = io.BytesIO()
        output_io.truncate(0) # Clears any residual data from previous calls
        output_audio = audio_output[0].T.float().cpu().numpy()
        sf.write(output_io, output_audio, pipe.vae.sampling_rate, format="WAV")  # Save as WAV or your preferred format
        output_io.seek(0)  # Reset buffer pointer to beginning

        # Send the file in response as attachment for download
        return send_file(output_io, as_attachment=False, download_name="output.wav", mimetype='audio/wav')
        
        
    except Exception as e:
        return jsonify({"error": str(e)}), 500

# Run the Flask app
if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)