AMP-2023-S2-SoundGeneration

Running

App Files Files Community

acanivet commited on Jan 30, 2024

Commit

6ed901c

1 Parent(s): f22a587

cache + dl

Browse files

Files changed (3) hide show

.gitignore +1 -0
app.py +32 -6
model.py +14 -9

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.pyc

app.py CHANGED Viewed

@@ -1,9 +1,18 @@
 import streamlit as st
 from model import generate
 import numpy as np
 if "result" not in st.session_state:
-    st.session_state["result"] = np.empty(16000*4)
 st.title("Sound Exploration")
@@ -12,7 +21,7 @@ col1, col2 = st.columns(2)
 with col1:
     instrument = st.selectbox(
         'Which intrument do you want ?',
-        ('🎸 Bass', '🎺  Brass', '🪈 Flute', '🪕 Guitar', '🎹 Keyboard', '🔨 Mallet', 'Organ', 'Reed', '🎻 String', 'Synth lead', '🎙️ Vocal')
     )
 with col2:
@@ -22,11 +31,28 @@ with col2:
     )
 with st.expander("Magical parameters 🪄"):
-    p1 = st.slider('p1', 0., 1., step=0.001)
 if st.button("Generate ✨", type="primary"):
-    st.session_state["result"] = generate([instrument, instrument_t])
-if st.session_state["result"].any():
-    st.audio(st.session_state["result"], sample_rate=16000)

 import streamlit as st
 from model import generate
+import io
 import numpy as np
+from scipy.io.wavfile import write
+@st.cache_data
+def np_to_wav(waveform, sample_rate) -> bytes:
+    bytes_wav = bytes()
+    byte_io = io.BytesIO(bytes_wav)
+    write(byte_io, sample_rate, waveform.T)
+    return byte_io.read()
 if "result" not in st.session_state:
+    st.session_state["result"] = None
 st.title("Sound Exploration")
 with col1:
     instrument = st.selectbox(
         'Which intrument do you want ?',
+        ('🎸 Bass', '🎺  Brass', '🪈 Flute', '🪕 Guitar', '🎹 Keyboard', '🔨 Mallet', 'Organ', '🎷 Reed', '🎻 String', '⚡ Synth lead', '🎤 Vocal')
     )
 with col2:
     )
 with st.expander("Magical parameters 🪄"):
+    col1, col2 = st.columns(2)
+    with col1:
+        p1 = st.slider('p1', 0., 1., step=0.001, label_visibility='collapsed')
+        p2 = st.slider('p2', 0., 1., step=0.001, label_visibility='collapsed')
+        p3 = st.slider('p3', 0., 1., step=0.001, label_visibility='collapsed')
+    with col2:
+        p4 = st.slider('p4', 0., 1., step=0.001, label_visibility='collapsed')
+        p5 = st.slider('p5', 0., 1., step=0.001, label_visibility='collapsed')
+        use_params = st.toggle('Use magical parameters ?')
+    params = (p1, p2, p3, p4, p5) if use_params else None
 if st.button("Generate ✨", type="primary"):
+    st.session_state["result"] = generate([instrument, instrument_t], params)
+if st.session_state["result"] is not None:
+    col1, col2 = st.columns(2)
+    with col1:
+        st.audio(st.session_state["result"], sample_rate=16000)
+    with col2:
+        st.download_button(
+            label="Download  ⬇️",
+            data=np_to_wav(st.session_state["result"], 16000),
+            file_name='result.wav',
+        )

model.py CHANGED Viewed

@@ -1,20 +1,25 @@
 from cvae import CVAE
 import torch
 from typing import Sequence
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 instruments = ['bass_acoustic', 'brass_acoustic', 'flute_acoustic', 'guitar_acoustic', 'keyboard_acoustic', 'mallet_acoustic', 'organ_acoustic', 'reed_acoustic', 'string_acoustic', 'synth_lead_acoustic', 'vocal_acoustic', 'bass_synthetic', 'brass_synthetic', 'flute_synthetic', 'guitar_synthetic', 'keyboard_synthetic', 'mallet_synthetic', 'organ_synthetic', 'reed_synthetic', 'string_synthetic', 'synth_lead_synthetic', 'vocal_synthetic', 'bass_electronic', 'brass_electronic', 'flute_electronic', 'guitar_electronic', 'keyboard_electronic', 'mallet_electronic', 'organ_electronic', 'reed_electronic', 'string_electronic', 'synth_lead_electronic', 'vocal_electronic']
-model = CVAE.load_from_checkpoint(
-    'epoch=17-step=650718.ckpt',
-    io_channels=1,
-    io_features=16000*4,
-    latent_features=5,
-    channels=[32, 64, 128, 256, 512],
-    num_classes=len(instruments),
-    learning_rate=1e-5
-).to(device)
 def format(text):
     text = text.split(' ')[-1]

 from cvae import CVAE
 import torch
 from typing import Sequence
+import streamlit as st
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 instruments = ['bass_acoustic', 'brass_acoustic', 'flute_acoustic', 'guitar_acoustic', 'keyboard_acoustic', 'mallet_acoustic', 'organ_acoustic', 'reed_acoustic', 'string_acoustic', 'synth_lead_acoustic', 'vocal_acoustic', 'bass_synthetic', 'brass_synthetic', 'flute_synthetic', 'guitar_synthetic', 'keyboard_synthetic', 'mallet_synthetic', 'organ_synthetic', 'reed_synthetic', 'string_synthetic', 'synth_lead_synthetic', 'vocal_synthetic', 'bass_electronic', 'brass_electronic', 'flute_electronic', 'guitar_electronic', 'keyboard_electronic', 'mallet_electronic', 'organ_electronic', 'reed_electronic', 'string_electronic', 'synth_lead_electronic', 'vocal_electronic']
+@st.cache_resource
+def load_model(device):
+    return CVAE.load_from_checkpoint(
+        'epoch=17-step=650718.ckpt',
+        io_channels=1,
+        io_features=16000*4,
+        latent_features=5,
+        channels=[32, 64, 128, 256, 512],
+        num_classes=len(instruments),
+        learning_rate=1e-5
+    ).to(device)
+model = load_model(device)
 def format(text):
     text = text.split(' ')[-1]