midi-audioldm / app.py
lauraibnz's picture
Update app.py
466c0fb
raw
history blame
1.27 kB
import gradio as gr
from diffusers import AudioLDMControlNetPipeline, ControlNetModel
from pretty_midi import PrettyMIDI
import torch
if torch.cuda.is_available():
device = "cuda"
torch_dtype = torch.float16
else:
device = "cpu"
torch_dtype = torch.float32
controlnet = ControlNetModel.from_pretrained("lauraibnz/midi-audioldm", torch_dtype=torch_dtype)
pipe = AudioLDMControlNetPipeline.from_pretrained("cvssp/audioldm-m-full", controlnet=controlnet, torch_dtype=torch_dtype)
pipe = pipe.to(device)
def predict(prompt, midi_file=None, audio_length_in_s=5, controlnet_conditioning_scale=1.0, num_inference_steps=20):
if midi_file:
midi_file = midi_file.name
else:
midi_file = "test.mid"
midi = PrettyMIDI(midi_file)
audio = pipe(prompt, midi=midi, audio_length_in_s=audio_length_in_s, num_inference_steps=num_inference_steps, controlnet_conditioning_scale=float(controlnet_conditioning_scale))
return (16000, audio.audios.T)
demo = gr.Interface(fn=predict, inputs=[gr.Textbox(label="Prompt"), gr.UploadButton("Upload a MIDI File", file_types=[".mid"]), gr.Slider(0, 30, value=5, step=5, label="Duration (seconds)"), gr.Slider(0.0, 1.0, value=1.0, step=0.1, label="Conditioning scale")], outputs="audio")
demo.launch()