Yann commited on
Commit
86694c3
·
1 Parent(s): 2c14895

push backend

Browse files
back/.gitignore ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Test files
2
+ playing/
3
+ test_waves/*
4
+ data/*
5
+ temp/*
6
+ output/*
7
+
8
+ comparison/
9
+ reconstruction_waves/
10
+ reconstruction_waves*
11
+
12
+
13
+ # To preserve file for Dave :/
14
+ librenderman.so
15
+
16
+
17
+ # Byte-compiled / optimized / DLL files
18
+ __pycache__/
19
+ *.py[cod]
20
+ *$py.class
21
+
22
+
23
+ # C extensions
24
+ *.so
25
+
26
+ # Distribution / packaging
27
+ .Python
28
+ build/
29
+ develop-eggs/
30
+ dist/
31
+ downloads/
32
+ eggs/
33
+ .eggs/
34
+ lib/
35
+ lib64/
36
+ parts/
37
+ sdist/
38
+ var/
39
+ wheels/
40
+ *.egg-info/
41
+ .installed.cfg
42
+ *.egg
43
+ MANIFEST
44
+
45
+ # PyInstaller
46
+ # Usually these files are written by a python script from a template
47
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
48
+ *.manifest
49
+ *.spec
50
+
51
+ # Installer logs
52
+ pip-log.txt
53
+ pip-delete-this-directory.txt
54
+
55
+ # Unit test / coverage reports
56
+ htmlcov/
57
+ .tox/
58
+ .coverage
59
+ .coverage.*
60
+ .cache
61
+ nosetests.xml
62
+ coverage.xml
63
+ *.cover
64
+ .hypothesis/
65
+ .pytest_cache/
66
+
67
+ # Translations
68
+ *.mo
69
+ *.pot
70
+
71
+ # Django stuff:
72
+ *.log
73
+ local_settings.py
74
+ db.sqlite3
75
+
76
+ # Flask stuff:
77
+ instance/
78
+ .webassets-cache
79
+
80
+ # Scrapy stuff:
81
+ .scrapy
82
+
83
+ # Sphinx documentation
84
+ docs/_build/
85
+
86
+ # PyBuilder
87
+ target/
88
+
89
+ # Jupyter Notebook
90
+ .ipynb_checkpoints
91
+
92
+ # pyenv
93
+ .python-version
94
+
95
+ # celery beat schedule file
96
+ celerybeat-schedule
97
+
98
+ # SageMath parsed files
99
+ *.sage.py
100
+
101
+ # Environments
102
+ .env
103
+ .venv
104
+ env/
105
+ venv/
106
+ ENV/
107
+ env.bak/
108
+ venv.bak/
109
+
110
+ # Spyder project settings
111
+ .spyderproject
112
+ .spyproject
113
+
114
+ # Rope project settings
115
+ .ropeproject
116
+
117
+ # mkdocs documentation
118
+ /site
119
+
120
+ # mypy
121
+ .mypy_cache/
122
+
123
+ # reference onlu
124
+ models/docs/
125
+
126
+ # large audio samples
127
+ audio/large
128
+
129
+ # final audio outputs
130
+ audio/outputs
131
+
132
+ # large datasets
133
+ data/large
134
+
135
+ # large saved models
136
+ models/saved/large
137
+
138
+ # mac
139
+ .DS_Store
140
+
141
+ # notebook experiments
142
+ notebooks/experiments
back/.vscode/launch.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ // Use IntelliSense to learn about possible attributes.
3
+ // Hover to view descriptions of existing attributes.
4
+ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5
+ "version": "0.2.0",
6
+ "configurations": [
7
+ {
8
+ "name": "1 - Gen Config VST",
9
+ "type": "python",
10
+ "request": "launch",
11
+ "module": "generators.vst_generator",
12
+ "justMyCode": true,
13
+ "args": [
14
+ "generate"
15
+ ]
16
+ },
17
+ {
18
+ "name": "2 - Run VST",
19
+ "type": "python",
20
+ "request": "launch",
21
+ "module": "generators.vst_generator",
22
+ "justMyCode": true,
23
+ "args": [
24
+ "run",
25
+ "--config",
26
+ "plugin_config/TAL-NoiseMaker-config.json"
27
+ ]
28
+ },
29
+ {
30
+ "name": "3 - Train Debug",
31
+ "type": "python",
32
+ "request": "launch",
33
+ "module": "models.spectrogram_cnn",
34
+ "justMyCode": true,
35
+ "args": [
36
+ "--epoch",
37
+ "2000",
38
+ "--model",
39
+ "C6XL"
40
+ ]
41
+ },
42
+ {
43
+ "name": "4 - Debug Launch",
44
+ "type": "python",
45
+ "request": "launch",
46
+ "module": "models.spectrogram_cnn",
47
+ "justMyCode": true,
48
+ "args": [
49
+ "--epoch",
50
+ "1",
51
+ "--model",
52
+ "C6XL",
53
+ "--resume"
54
+ ]
55
+ }
56
+ ]
57
+ }
back/.vscode/settings.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "python.analysis.autoImportCompletions": true,
3
+ "python.analysis.typeCheckingMode": "off",
4
+ "python.analysis.fixAll": ["source.unusedImports", "source.convertImportFormat"],
5
+ "editor.defaultFormatter": "ms-python.black-formatter"
6
+ }
back/InverSynth_00006.wav ADDED
Binary file (65.6 kB). View file
 
back/README.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Inversynth Fork
2
+ ## AMP Team
3
+
4
+ ## Launch instructions :
5
+
6
+ *Optional: outputing your own config file for your VST*
7
+ ```zsh
8
+ python -m generators.vst_generator generate
9
+ ```
10
+ *1. Dataset Creation based on config profile*
11
+ ```zsh
12
+ python -m generators.vst_generator run --config "your_config_path.json"
13
+ ```
14
+
15
+ *2. Model training*
16
+ ```zsh
17
+ python -m generators.spectrogram_cnn --epoch "your_epoch_number" --model C6XL
18
+ ```
19
+
20
+ Parameter | Default | Description
21
+ ---|---|---
22
+ `--num_examples` | `2000` | Number of examples to create
23
+ `--name` | `InverSynth` | Naming convention for datasets
24
+ `--dataset_directory` | `test_datasets` | Directory for datasets
25
+ `--wavefile_directory` | `test_waves` | Directory to for wave files.<br>Naming convention applied automatically
26
+ `--length` | `1.0` | Length of each sample in seconds
27
+ `--sample_rate` | `16384` | Sample rate (Samples/second)
28
+ `--sampling_method` | `random` | Method to use for generating examples.<br>Currently only random, but may<br>include whole space later
29
+ Optional |
30
+ `--regenerate_samples` | | Regenerate the set of points to explore if it<br>exists (will also force regenerating audio)
31
+ `--regenerate_audio` | | Regenerate audio files if they exist
32
+ `--normalise` | | Apply audio normalization
33
+
34
+ This module generates a dataset attempting to recreate the dataset generation<br>as defined in the [paper](paper/1812.06349.pdf)
35
+
36
+ Selecting an architecture:
37
+
38
+ - `C1`, `C2`, `C3`, `C4`, `C5`, `C6`, `C6XL`
back/generators/__init__.py ADDED
File without changes
back/generators/generator.py ADDED
@@ -0,0 +1,327 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+
3
+ # ParamValue = Tuple[str,float,List[float]]
4
+ import os
5
+ import os.path
6
+ from typing import List
7
+
8
+ import h5py
9
+ import numpy as np
10
+ from scipy.io.wavfile import write as write_wav
11
+
12
+ from generators.parameters import *
13
+
14
+ """
15
+ This is a base class to derive different kinds of sound generator from (e.g.
16
+ custom synthesis, VST plugins)
17
+ """
18
+
19
+
20
+ class SoundGenerator:
21
+ """
22
+ This is now a wrapper round the 'real' generation function
23
+ to handle normalising and saving
24
+ """
25
+
26
+ def generate(
27
+ self,
28
+ parameters: dict,
29
+ filename: str,
30
+ length: float,
31
+ sample_rate: int,
32
+ extra: dict,
33
+ normalise: bool = True,
34
+ ) -> np.ndarray:
35
+ audio = self.do_generate(parameters, filename, length, sample_rate, extra)
36
+ if normalise:
37
+ max = np.max(np.absolute(audio))
38
+ if max > 0:
39
+ audio = audio / max
40
+ if not self.creates_wave_file():
41
+ self.write_file(audio, filename, sample_rate)
42
+
43
+ def do_generate(
44
+ self,
45
+ parameters: dict,
46
+ filename: str,
47
+ length: float,
48
+ sample_rate: int,
49
+ extra: dict,
50
+ ) -> np.ndarray:
51
+ print(
52
+ "Someone needs to write this method! Generating silence in {} with parameters:{}".format(
53
+ filename, str(parameters)
54
+ )
55
+ )
56
+ return np.zeros(int(length * sample_rate))
57
+
58
+ def creates_wave_file(self) -> bool:
59
+ return False
60
+
61
+ # Assumes that the data is -1..1 floating point
62
+ def write_file(self, data: np.ndarray, filename: str, sample_rate: int):
63
+ # REVIEW: is this needed?
64
+ # int_data = (data * np.iinfo(np.int16).max).astype(int)
65
+ write_wav(filename, sample_rate, data)
66
+
67
+
68
+ """
69
+ This class runs through a parameter set, gets it to generate parameter settings
70
+ then runs the sound generator over it.
71
+ """
72
+
73
+
74
+ class DatasetCreator:
75
+ def __init__(
76
+ self,
77
+ name: str,
78
+ dataset_dir: str,
79
+ wave_file_dir: str,
80
+ parameters: ParameterSet,
81
+ normalise: bool = True,
82
+ ):
83
+ self.name = name
84
+ self.parameters = parameters
85
+ self.dataset_dir = dataset_dir
86
+ self.wave_file_dir = wave_file_dir
87
+ self.normalise = normalise
88
+ os.makedirs(dataset_dir, exist_ok=True)
89
+ os.makedirs(f"{wave_file_dir}/{name}", exist_ok=True)
90
+
91
+ def create_parameters(
92
+ self,
93
+ max: int = 2000,
94
+ method: str = "complete",
95
+ extra: dict = {},
96
+ force_create=False,
97
+ ) -> str:
98
+ filename = self.get_dataset_filename("data", "hdf5")
99
+ if os.path.isfile(filename) and not force_create:
100
+ print(
101
+ "Parameter file exists, not recreating (use --regenerate_samples if you want to force)"
102
+ )
103
+ return filename
104
+ print("+" * 40)
105
+ print(f"Generating Dataset {self.name}, {max} examples")
106
+ print(f"Datasets: {self.dataset_dir}")
107
+ print("+" * 40)
108
+
109
+ # Save out the parameters first
110
+ self.save_parameters()
111
+
112
+ # Generate the set of samples (could switch to generators,
113
+ # but need to figure out arbitrary size arrays in HDF5)
114
+ dataset: List[Sample] = []
115
+ # if method == "complete":
116
+ # dataset = self.parameters.recursively_generate_all()
117
+ # else:
118
+ dataset = self.parameters.sample_space(sample_size=max)
119
+
120
+ # Create the data file and add all the points to it
121
+ with h5py.File(filename, "w") as datafile:
122
+ # Figure out the sizes to store
123
+ records = len(dataset)
124
+ param_size = len(dataset[0].encode())
125
+
126
+ # Add columns to it
127
+ filenames = datafile.create_dataset(
128
+ "files", (records,), dtype=h5py.string_dtype()
129
+ )
130
+ parameters = datafile.create_dataset(
131
+ "parameters", (records,), dtype=h5py.string_dtype()
132
+ )
133
+ labels = datafile.create_dataset("labels", (records, param_size))
134
+ audio_exists = datafile.create_dataset(
135
+ "audio_exists", (records,), dtype=np.bool_
136
+ )
137
+
138
+ # Generate the sample points
139
+ for index, point in enumerate(dataset):
140
+ params = self.parameters.to_settings(point)
141
+ filenames[index] = self.get_wave_filename(index)
142
+ labels[index] = point.encode()
143
+ parameters[index] = json.dumps(params)
144
+ audio_exists[index] = False
145
+ if index % 1000 == 0:
146
+ print("Generating parameters for example {}".format(index))
147
+ datafile.flush()
148
+ datafile.close()
149
+
150
+ return filename
151
+
152
+ def generate_audio(
153
+ self,
154
+ sound_generator: SoundGenerator,
155
+ length: float = 1,
156
+ sample_rate: int = 16384,
157
+ extra: dict = {},
158
+ dataset_filename=None,
159
+ force_generate=True,
160
+ ):
161
+ if dataset_filename is None:
162
+ dataset_filename = self.get_dataset_filename("data", "hdf5")
163
+
164
+ print("+" * 40)
165
+ print(
166
+ f"Generating Audio for Dataset {self.name} ({dataset_filename}), with {length}s at {sample_rate}/s"
167
+ )
168
+ print(f"Output waves: {self.wave_file_dir}, datasets: {self.dataset_dir}")
169
+ print("+" * 40)
170
+
171
+ with h5py.File(dataset_filename, "r+") as datafile:
172
+ for name, value in datafile.items():
173
+ print(f"{name}: {value}")
174
+ # Get the columns
175
+ filenames = datafile.get("files")
176
+ print(filenames)
177
+ parameters = datafile.get("parameters")
178
+ print(parameters)
179
+ audio_exists = datafile.get("audio_exists")
180
+ print(audio_exists)
181
+
182
+ for index, filename in enumerate(filenames):
183
+ if (
184
+ audio_exists[index]
185
+ and os.path.isfile(filename)
186
+ and not force_generate
187
+ ):
188
+ print(f"Audio exists for index {index} ({filename})")
189
+ else:
190
+ print(f"Generating Audio for index {index} ({filename})")
191
+ params = json.loads(parameters[index])
192
+ audio = sound_generator.generate(
193
+ params,
194
+ filename,
195
+ length,
196
+ sample_rate,
197
+ extra,
198
+ normalise=self.normalise,
199
+ )
200
+ audio_exists[index] = bool(audio)
201
+ datafile.flush()
202
+ if index % 1000 == 0:
203
+ print("Generating example {}".format(index))
204
+
205
+ def save_parameters(self):
206
+ self.parameters.save_json(self.get_dataset_filename("params", "json"))
207
+ self.parameters.save(self.get_dataset_filename("params", "pckl"))
208
+
209
+ def get_dataset_filename(self, type: str, extension: str = "txt") -> str:
210
+ return f"{self.dataset_dir}/{self.name}_{type}.{extension}"
211
+
212
+ def get_wave_filename(self, index: int) -> str:
213
+ return f"{self.wave_file_dir}/{self.name}/{self.name}_{index:05d}.wav"
214
+
215
+
216
+ def default_generator_argparse():
217
+ parser = argparse.ArgumentParser(description="Process some integers.")
218
+ parser.add_argument(
219
+ "--num_examples",
220
+ type=int,
221
+ dest="samples",
222
+ action="store",
223
+ default=20000,
224
+ help="Number of examples to create",
225
+ )
226
+ parser.add_argument(
227
+ "--name",
228
+ type=str,
229
+ dest="name",
230
+ default="InverSynth",
231
+ help="Name of datasets to create",
232
+ )
233
+ parser.add_argument(
234
+ "--dataset_directory",
235
+ type=str,
236
+ dest="data_dir",
237
+ default="test_datasets",
238
+ help="Directory to put datasets",
239
+ )
240
+ parser.add_argument(
241
+ "--wavefile_directory",
242
+ type=str,
243
+ dest="wave_dir",
244
+ default="test_waves",
245
+ help="Directory to put wave files. Will have the dataset name appended automatically",
246
+ )
247
+ parser.add_argument(
248
+ "--length",
249
+ type=float,
250
+ dest="length",
251
+ default=1.0,
252
+ help="Length of each sample in seconds",
253
+ )
254
+ parser.add_argument(
255
+ "--sample_rate",
256
+ type=int,
257
+ dest="sample_rate",
258
+ default=16384,
259
+ help="Sample rate (Samples/second)",
260
+ )
261
+ parser.add_argument(
262
+ "--sampling_method",
263
+ type=str,
264
+ dest="method",
265
+ default="random",
266
+ choices=["random"],
267
+ help="Method to use for generating examples. Currently only random, but may include whole space later",
268
+ )
269
+ parser.add_argument(
270
+ "--regenerate_samples",
271
+ action="store_true",
272
+ help="Regenerate the set of points to explore if it exists (will also force regenerating audio)",
273
+ )
274
+ parser.add_argument(
275
+ "--regenerate_audio",
276
+ action="store_true",
277
+ help="Regenerate audio files if they exists",
278
+ )
279
+ parser.add_argument(
280
+ "--normalise", action="store_true", help="Regenerate audio files if they exists"
281
+ )
282
+ return parser
283
+
284
+
285
+ def generate_examples(
286
+ gen: SoundGenerator, parameters: ParameterSet, args=None, extra={}
287
+ ):
288
+ if not args:
289
+ parser = default_generator_argparse()
290
+ args = parser.parse_args()
291
+
292
+ g = DatasetCreator(
293
+ name=args.name,
294
+ dataset_dir=args.data_dir,
295
+ wave_file_dir=args.wave_dir,
296
+ parameters=parameters,
297
+ normalise=args.normalise,
298
+ )
299
+
300
+ g.create_parameters(
301
+ max=args.samples, method=args.method, force_create=True
302
+ )
303
+
304
+ g.generate_audio(
305
+ sound_generator=gen,
306
+ length=args.length,
307
+ sample_rate=args.sample_rate,
308
+ extra=extra,
309
+ force_generate=args.regenerate_audio | args.regenerate_samples,
310
+ )
311
+
312
+
313
+ if __name__ == "__main__":
314
+ gen = SoundGenerator()
315
+ parameters = ParameterSet(
316
+ [
317
+ Parameter("p1", [100, 110, 120, 130, 140]),
318
+ Parameter("p2", [200, 220, 240, 260, 280]),
319
+ ]
320
+ )
321
+ g = DatasetCreator(
322
+ "example_generator",
323
+ dataset_dir="test_datasets",
324
+ wave_file_dir="test_waves/example/",
325
+ parameters=parameters,
326
+ )
327
+ g.generate_examples(sound_generator=gen, parameters=parameters)
back/generators/parameters.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import random
3
+ from dataclasses import dataclass
4
+ from pickle import dump
5
+ from typing import Dict, List, Sequence, Tuple
6
+
7
+ import numpy as np
8
+
9
+ """
10
+ A setting for a parameter, with its oneHOT encoding
11
+ """
12
+
13
+
14
+ @dataclass
15
+ class ParamValue:
16
+ name: str
17
+ value: float
18
+ encoding: List[float]
19
+
20
+
21
+ """
22
+ A sample point - the parameter values, the oneHOT encoding and the audio
23
+ """
24
+
25
+
26
+ @dataclass
27
+ class Sample:
28
+ # parameter_values: List[Tuple[str,float]]
29
+ # parameter_encoding:List[List[float]]
30
+ parameters: List[ParamValue]
31
+ # length:float=0.1
32
+ # sample_rate:int = 44100
33
+ # audio:np.ndarray = np.zeros(1)
34
+
35
+ def value_list(self) -> List[Tuple[str, float]]:
36
+ return [(p.name, p.value) for p in self.parameters]
37
+
38
+ def encode(self) -> List[float]:
39
+ return np.hstack([p.encoding for p in self.parameters])
40
+
41
+
42
+ class Parameter:
43
+ def __init__(self, name: str, levels: list, id=""):
44
+ self.name = name
45
+ self.levels = levels
46
+ self.id = id
47
+
48
+ def get_levels(self) -> List[ParamValue]:
49
+ return [self.get_value(i) for i in range(len(self.levels))]
50
+
51
+ def sample(self) -> ParamValue:
52
+ index: int = random.choice(range(len(self.levels)))
53
+ return self.get_value(index)
54
+
55
+ def get_value(self, index: int) -> ParamValue:
56
+ encoding = np.zeros(len(self.levels)).astype(float)
57
+ encoding[index] = 1.0
58
+ return ParamValue(
59
+ name=self.name,
60
+ # Actual value
61
+ value=self.levels[index],
62
+ # One HOT encoding
63
+ encoding=encoding,
64
+ )
65
+
66
+ def decode(self, one_hot: List[float]) -> ParamValue:
67
+ ind = np.array(one_hot).argmax()
68
+ # ind = tf.cast(tf.argmax(one_hot, axis=-1), "int32")
69
+ return self.get_value(ind)
70
+
71
+ def from_output(
72
+ self, current_output: List[float]
73
+ ) -> Tuple[ParamValue, List[float]]:
74
+ param_data = current_output[: len(self.levels)]
75
+ remaining = current_output[len(self.levels) :]
76
+ my_val = self.decode(param_data)
77
+ return (my_val, remaining)
78
+
79
+ def to_json(self):
80
+ return {"name": self.name, "levels": self.levels, "id": self.id}
81
+
82
+
83
+ class ParameterSet:
84
+ def __init__(self, parameters: List[Parameter], fixed_parameters: dict = {}):
85
+ self.parameters = parameters
86
+ self.fixed_parameters = fixed_parameters
87
+
88
+ def sample_space(self, sample_size=2000) -> Sequence[Sample]:
89
+ print("Sampling {} points from parameter space".format(sample_size))
90
+ dataset = []
91
+ for i in range(sample_size):
92
+ params = [p.sample() for p in self.parameters]
93
+ dataset.append(Sample(params))
94
+ if i % 1000 == 0:
95
+ print("Sampling iteration: {}".format(i))
96
+ return dataset
97
+
98
+ # Runs through the whole parameter space, setting up parameters and calling the generation function
99
+ # Excuse slightly hacky recusions - sure there's a more numpy-ish way to do it!
100
+ def recursively_generate_all(
101
+ self, parameter_list: list = None, parameter_set=[], return_list=[]
102
+ ) -> Sequence[Sample]:
103
+ print("Generating entire parameter space")
104
+ if parameter_list is None:
105
+ parameter_list = self.parameters
106
+ param = parameter_list[0]
107
+ remaining = parameter_list[1:]
108
+ for p in param.levels:
109
+ ps = parameter_set.copy()
110
+ ps.append((param.name, p))
111
+ if len(remaining) == 0:
112
+ return_list.append(ps)
113
+ else:
114
+ self.recursively_generate_all(remaining, ps, return_list)
115
+ return return_list
116
+
117
+ def to_settings(self, p: Sample):
118
+ params = self.fixed_parameters.copy()
119
+ params.update(dict(p.value_list()))
120
+ return params
121
+
122
+ def encoding_to_settings(self, output: List[float]) -> Dict[str, float]:
123
+ params = self.fixed_parameters.copy()
124
+ for p in self.decode(output):
125
+ params[p.name] = p.value
126
+ return params
127
+
128
+ def decode(self, output: List[float]) -> List[ParamValue]:
129
+ values = []
130
+ for p in self.parameters:
131
+ v, output = p.from_output(output)
132
+ values.append(v)
133
+ if len(output) > 0:
134
+ print("Leftover output!: {}".format(output))
135
+ return values
136
+
137
+ def save(self, filename):
138
+ with open(filename, "wb") as file:
139
+ dump(self, file)
140
+
141
+ def save_json(self, filename):
142
+ dump = self.to_json()
143
+ with open(filename, "w") as file:
144
+ json.dump(dump, file, indent=2)
145
+
146
+ def explain(self):
147
+ levels = 0
148
+ for p in self.parameters:
149
+ levels += len(p.levels)
150
+ return {
151
+ "n_variable": len(self.parameters),
152
+ "n_fixed": len(self.fixed_parameters),
153
+ "levels": levels,
154
+ }
155
+
156
+ def to_json(self):
157
+ return {
158
+ "parameters": [p.to_json() for p in self.parameters],
159
+ "fixed": self.fixed_parameters,
160
+ }
161
+
162
+
163
+ """
164
+ Generates evenly spaced parameter values
165
+ paper:
166
+ The rest of the synthesizer parameters ranges are quantized evenly to 16
167
+ classes according to the following ranges ...
168
+ For each parameter, the first and last classes correspond to its range limits
169
+ """
170
+
171
+
172
+ def param_range(steps, min, max):
173
+ ext = float(max - min)
174
+ return [n * ext / (steps - 1) + min for n in range(steps)]
175
+
176
+
back/generators/vst_generator.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+
4
+ import dawdreamer as rm
5
+ import librosa
6
+ import numpy as np
7
+ import pandas as pd
8
+
9
+
10
+ from generators.generator import *
11
+ from generators.parameters import *
12
+
13
+
14
+ class VSTGenerator(SoundGenerator):
15
+ def __init__(
16
+ self,
17
+ vst: str,
18
+ sample_rate,
19
+ randomise_non_set: bool = True,
20
+ randomise_all: bool = False,
21
+ ):
22
+ self.vst = vst
23
+ self.randomise_non_set = randomise_non_set
24
+ self.randomise_all = randomise_all
25
+ self.sample_rate = sample_rate
26
+ self.load_engine()
27
+
28
+ def load_engine(self):
29
+ print("_____ LOADING VST _______")
30
+ engine = rm.RenderEngine(self.sample_rate, 1024)
31
+ synth = engine.make_plugin_processor("my_synth", self.vst)
32
+ if synth:
33
+ print("Loaded {}".format(self.vst))
34
+
35
+ self.engine = engine
36
+ self.synth = synth
37
+ else:
38
+ print("Couldn't load VST {}".format(self.vst))
39
+ print("_____ LOADED VST _______")
40
+
41
+ # def do_sound_generation(self,parameter_set,base_filename)->np.ndarray:
42
+ def do_generate(
43
+ self,
44
+ parameters: dict,
45
+ filename: str,
46
+ length: float,
47
+ sample_rate: int,
48
+ extra: dict = {},
49
+ ) -> np.ndarray:
50
+ if not self.engine:
51
+ print("VST not loaded")
52
+ return np.zeros(5)
53
+ resample = False
54
+ if not self.sample_rate == sample_rate:
55
+ resample = True
56
+ synth = self.synth
57
+ engine = self.engine
58
+ # print( synth.get_parameters_description() )
59
+ # print("Params to set:{}".format(parameters))
60
+
61
+ ids = dict([(p["name"], p["id"]) for p in extra["config"]["fixed_parameters"]])
62
+ ids.update(dict([(p["name"], p["id"]) for p in extra["config"]["parameters"]]))
63
+
64
+ # if self.randomise_non_set:
65
+ # new_patch = self.patch_generator.get_random_patch()
66
+ # engine.set_patch(new_patch)
67
+
68
+ synth_params = dict(synth.get_patch())
69
+ # Start with defaults
70
+
71
+ # if not self.randomise_non_set:
72
+ # for i in range(84):
73
+ # synth_params[i] = 0.5
74
+
75
+ for name, value in parameters.items():
76
+ synth_params[ids[name]] = value
77
+
78
+ # if self.randomise_all:
79
+ # new_patch = self.patch_generator.get_random_patch()
80
+ # engine.set_patch(new_patch)
81
+
82
+ note_length = length * 0.8
83
+ if "note_length" in extra:
84
+ note_length = extra["note_length"]
85
+
86
+ synth.set_patch(list(synth_params.items()))
87
+ synth.add_midi_note(40, 127, 0.1, note_length)
88
+ # don't do reverb
89
+ graph = [
90
+ (synth, []), # synth takes no inputs, so we give an empty list.
91
+ ]
92
+
93
+ engine.load_graph(graph)
94
+ engine.render(1)
95
+ data = engine.get_audio()
96
+ df = pd.DataFrame(data)
97
+ try:
98
+ data = librosa.to_mono(data).transpose()
99
+ except:
100
+ print("ERROR" * 100)
101
+ df = df.fillna(0)
102
+ data = df.to_numpy()
103
+ data = librosa.to_mono(data).transpose()
104
+
105
+ df = pd.DataFrame(data)
106
+ if(librosa.util.valid_audio(data)):
107
+ nsamps_target = int(1.0 * sample_rate)
108
+ # print(f"Got {len(data)} frames as type {type(data)}. Target: {nsamps_target}")
109
+ result = np.array(data)
110
+ return result
111
+ # else:
112
+ # print("ERROR" * 100)
113
+ # df = df.fillna(0)
114
+ # data = df.to_numpy()
115
+ # return data
116
+
117
+
118
+
119
+ def create_config(self, filename='default_config.json', default_value=0.0):
120
+ r = re.compile("(.*): (.*)")
121
+ params = []
122
+ fixed = []
123
+ for line in self.synth.get_parameters_description():
124
+ line['defaultValue']=float(line['defaultValue'])
125
+ if line['index'] < 86:
126
+ # fixed.append(
127
+ # {"id": line['index'], "name": line['name'], "value": line['defaultValue']}
128
+ # )
129
+ # else:
130
+ params.append(
131
+ {"id": line['index'], "name": line['name'], "value": line['defaultValue']}
132
+ )
133
+ output = {"parameters": params, "fixed_parameters": fixed}
134
+ os.makedirs("plugin_config", exist_ok=True)
135
+ with open("plugin_config/gen_config_"+str(self.vst)+'.json', "w") as f:
136
+ json.dump(output, f, indent=4)
137
+ return output
138
+
139
+
140
+ # Run the generator to create a full dataset
141
+ def run_generator(args): # name: str, plugin: str, config: str, max: int,
142
+ # dataset_directory: str, wavefile_directory: str,
143
+ # sample_rate: int = 16384, length: float = 1.0, note_length: float = -1, method: str = 'random'):
144
+
145
+ note_length = args.note_length
146
+ if note_length < 0.0:
147
+ note_length = note_length * 0.8
148
+
149
+ with open(args.config_file, "r") as f:
150
+ config = json.load(f)
151
+ sample=[]
152
+ for p in config['parameters']:
153
+ if isinstance(p['values'], str):
154
+ sample.append(
155
+ Parameter(p["name"], param_range(16, 0, 1), p.get("id", ""))
156
+ )
157
+ elif isinstance(p['values'], list):
158
+ sample.append(
159
+ Parameter(p["name"], p['values'], p.get("id", ""))
160
+ )
161
+
162
+ fixed = dict([(p["name"], p["value"]) for p in config["fixed_parameters"]])
163
+
164
+ plugin_rate = args.generate_samplerate or args.sample_rate
165
+
166
+ generate_examples(
167
+ gen=VSTGenerator(vst=args.plugin, sample_rate=16384),
168
+ parameters=ParameterSet(parameters=sample, fixed_parameters=fixed),
169
+ args=args,
170
+ extra={"note_length": note_length, "config": config},
171
+ )
172
+
173
+
174
+ # Create blank config file based on the plugin's parameter sets
175
+ def generate_defaults(plugin: str, output: str, default: float = 0.5):
176
+ gen = VSTGenerator(vst=plugin, sample_rate=16384)
177
+ gen.create_config(output, default_value=default)
178
+
179
+
180
+ # Example: python -m generators.vst_generator run --plugin /Library/Audio/Plug-Ins/VST/Lokomotiv.vst --config plugin_config/lokomotiv.json --dataset_name explore --wavefile_directory "test_waves/explore"
181
+
182
+ if __name__ == "__main__":
183
+ pass
184
+ # parser = argparse.ArgumentParser(description='Process some integers.')
185
+ parser = default_generator_argparse()
186
+ parser.add_argument(
187
+ "command",
188
+ type=str,
189
+ choices=["run", "generate"],
190
+ help="action to take: run (run the generator with a config) or generate (generate a blank config file for the plugin)",
191
+ )
192
+ parser.add_argument(
193
+ "--plugin",
194
+ dest="plugin",
195
+ default='libTAL-NoiseMaker.so',
196
+ help='plugin file. .so on linux, on mac its the top level plugin dir, e.g. "/Library/Audio/Plug-Ins/VST/Lokomotiv.vst"',
197
+ )
198
+ parser.add_argument(
199
+ "--output", dest="outfile", help="Place to store the generated parameters file"
200
+ )
201
+ parser.add_argument("--config", dest="config_file", help="Config file to use")
202
+ parser.add_argument(
203
+ "--default_value",
204
+ type=float,
205
+ dest="default_param",
206
+ action="store",
207
+ default=0.5,
208
+ help="Default setting for parameters when generating a blank config",
209
+ )
210
+ parser.add_argument(
211
+ "--note_length",
212
+ type=float,
213
+ dest="note_length",
214
+ default=0.8,
215
+ help="Length of a note in seconds",
216
+ )
217
+ parser.add_argument(
218
+ "--generation_sample_rate",
219
+ type=int,
220
+ default=None,
221
+ dest="generate_samplerate",
222
+ help="Sample rate for audio generation. Defaults to target samplerate, but some plugins (Dexed) have trouble running a our funny sample rates. Will be resampled to the target rate after generation",
223
+ )
224
+
225
+ args = parser.parse_args()
226
+ print(args)
227
+ if args.command == "run":
228
+ run_generator(args)
229
+ # args.name, args.plugin, args.config_file,
230
+ # args.samples, args.data_dir, args.wave_dir)
231
+
232
+ if args.command == "generate":
233
+ generate_defaults(args.plugin, args.outfile, args.default_param)
234
+ quit()
235
+
back/main.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from contextlib import asynccontextmanager
2
+ import uuid
3
+ from fastapi import Depends, FastAPI, File, HTTPException, UploadFile
4
+ from fastapi.responses import JSONResponse
5
+ from fastapi.staticfiles import StaticFiles
6
+ import os
7
+ from starlette.exceptions import HTTPException as StarletteHTTPException
8
+ from fastapi.middleware.cors import CORSMiddleware
9
+ import pathlib
10
+ from contextlib import asynccontextmanager
11
+ from glob import glob
12
+
13
+
14
+ from models.launch import inferrence, train_model
15
+ from models.spectrogram_cnn import get_model
16
+
17
+ # distinguish model type for reshaping
18
+
19
+ SERVER = "http://localhost:7860/"
20
+
21
+ path = os.path.dirname(os.path.realpath(__file__))
22
+
23
+ def load_model_and_parameters():
24
+ setup = {
25
+ "model_name": "C6XL",
26
+ "dataset_name": "InverSynth",
27
+ "epochs": 1,
28
+ "dataset_dir": "test_datasets",
29
+ "output_dir": "output",
30
+ "dataset_file": None,
31
+ "parameters_file": None,
32
+ "data_format": "channels_last",
33
+ "run_name": None,
34
+ "resume": True,
35
+ }
36
+ setup["model_type"] = "STFT"
37
+
38
+ try:
39
+ # charger model
40
+ model, parameters_file = train_model(model_callback=get_model, **setup)
41
+ except Exception as e:
42
+ print(f"Couldn't load model: {e}")
43
+ return None, None
44
+
45
+ return model, parameters_file
46
+
47
+ @asynccontextmanager
48
+ async def lifespan(app: FastAPI):
49
+
50
+
51
+
52
+ # Remove all files in the temp folder
53
+ tempFolderPath = os.path.join(path, "temp")
54
+ if os.path.exists(tempFolderPath):
55
+ for file_name in os.listdir(tempFolderPath):
56
+ file_path = os.path.join(tempFolderPath, file_name)
57
+ try:
58
+ if os.path.isfile(file_path):
59
+ os.remove(file_path)
60
+ except Exception as e:
61
+ print(f"Error deleting file {file_path}: {e}")
62
+
63
+ if not os.path.exists(tempFolderPath):
64
+ os.makedirs(tempFolderPath)
65
+
66
+ yield
67
+
68
+ app = FastAPI(lifespan=lifespan)
69
+
70
+ str_p = str(path)
71
+
72
+
73
+ class SPAStaticFiles(StaticFiles):
74
+ async def get_response(self, path: str, scope):
75
+ try:
76
+ return await super().get_response(path, scope)
77
+ except (HTTPException, StarletteHTTPException) as ex:
78
+ if ex.status_code == 404:
79
+ return await super().get_response("index.html", scope)
80
+ else:
81
+ raise ex
82
+
83
+
84
+
85
+
86
+
87
+ @app.get("/download/{file_id}")
88
+ async def generate_audio(file_id: str):
89
+ try:
90
+ # Use glob to find files starting with the specified ID
91
+ matching_files = glob(f"temp/{file_id}*")
92
+
93
+ if not matching_files:
94
+ # Handle the case when no matching file is found
95
+ print(f"No file found for file ID {file_id}")
96
+ raise HTTPException(status_code=404, detail="File not found")
97
+
98
+ # Assuming you want to copy the first matching file
99
+ else:
100
+ source_file_path = matching_files[0]
101
+ # Check if the file exists
102
+ # You can perform additional processing or send the file directly
103
+ return JSONResponse(content={"url": f"{source_file_path}"})
104
+
105
+ except Exception as e:
106
+ raise HTTPException(status_code=500, detail=str(e))
107
+
108
+
109
+ def is_valid_audio(file_extension):
110
+ # Define a list of valid audio file extensions
111
+ valid_audio_extensions = [".mp3", ".wav", ".ogg", ".flac"]
112
+
113
+ # Check if the provided file extension is in the list of valid audio extensions
114
+ return file_extension.lower() in valid_audio_extensions
115
+
116
+
117
+ @app.post("/upload/")
118
+ async def upload_audio_file(file: UploadFile = File(...)):
119
+ try:
120
+ model, parameters_file = load_model_and_parameters()
121
+ except:
122
+ raise("Couldn't load model")
123
+ try:
124
+ # Create a unique identifier for the uploaded file
125
+ file_id = str(uuid.uuid4())
126
+
127
+ # Extract the original file extension
128
+ _, file_extension = os.path.splitext(file.filename)
129
+
130
+ # Check if the file has a valid audio extension
131
+ if not is_valid_audio(file_extension):
132
+ raise HTTPException(status_code=400, detail="Invalid audio file format")
133
+
134
+
135
+ # Construct the file paths with the original file extension
136
+ file_path = os.path.join("temp", file_id + file_extension)
137
+
138
+
139
+ with open(file_path, "wb") as audio_file:
140
+ audio_file.write(file.file.read())
141
+ # generate_output_audio(file_path, output_file_path)
142
+ output = await start_inference(model=model, parameters_file=parameters_file, file_id=file_id, file_extension=file_extension)
143
+ # Send a confirmation with the identifier
144
+ return {"file_path": SERVER+output[0], "csv_path": SERVER+output[1], "output_file_path": SERVER+output[2]}
145
+
146
+ except Exception as e:
147
+ raise HTTPException(status_code=500, detail=str(e))
148
+
149
+ async def start_inference(model, parameters_file, file_id: str, file_extension : str):
150
+ file_path = os.path.join("temp", file_id + file_extension)
151
+
152
+ output = inferrence(model=model, parameters_file=parameters_file, file_path=file_path, file_id=file_id)
153
+
154
+ return output
155
+
156
+
157
+ origins = ["*"]
158
+ app.add_middleware(
159
+ CORSMiddleware,
160
+ allow_origins=origins,
161
+ allow_credentials=True,
162
+ allow_methods=origins,
163
+ allow_headers=origins,
164
+ )
165
+
166
+ app.mount(
167
+ "/temp", StaticFiles(directory="temp", check_dir=True, html=True), name="temp"
168
+ )
169
+ app.mount(
170
+ "/",
171
+ SPAStaticFiles(directory=f"{pathlib.PurePath(str_p).parent}/front/dist", html=True),
172
+ name="dist",
173
+ )
174
+
175
+
176
+ if __name__ == "__main__":
177
+ import uvicorn
178
+
179
+ uvicorn.run(app, host="0.0.0.0", port=7860)
back/models/__init__.py ADDED
File without changes
back/models/app.py ADDED
@@ -0,0 +1,645 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import json
3
+ import logging
4
+ import os
5
+ from pickle import load
6
+ from typing import Callable, List
7
+ import librosa
8
+ import numpy as np
9
+ import pandas as pd
10
+ import tensorflow as tf
11
+ from tensorflow import keras
12
+ from keras import backend as K
13
+ from keras.callbacks import CSVLogger
14
+ from kapre.time_frequency import Spectrogram
15
+ from models.importer_audio import audio_importer
16
+ import dawdreamer as daw
17
+ from scipy.io import wavfile
18
+ import librosa
19
+
20
+
21
+ from generators.parameters import ParameterSet, ParamValue
22
+ from models.common.data_generator import SoundDataGenerator
23
+
24
+
25
+ weight_var = K.variable(0.)
26
+
27
+ class Weight_trans(keras.callbacks.Callback):
28
+ def __init__(self, weight_var, transition, epochs):
29
+ self.alpha = weight_var
30
+ self.transition = transition
31
+ self.epochs = epochs
32
+ def on_epoch_end(self, epoch, logs={}):
33
+ if epoch > 680:
34
+ if self.transition == "linear":
35
+ K.set_value(self.alpha, ((epoch)/(self.epochs) - 0.617)*0.00001)
36
+ tf.print(f"new weight {weight_var.numpy()}")
37
+ if self.transition == "linear2":
38
+ K.set_value(self.alpha, (1.5625*epoch - 1.0625)*0.00001)
39
+ tf.print(f"new weight {weight_var.numpy()}")
40
+ if self.transition == "log":
41
+ K.set_value(self.alpha, (1- (tf.math.log(epoch*0.001 - 0.67285)/tf.math.log(0.0005)) - 0.35)*0.00001)
42
+ tf.print("log")
43
+ if self.transition == "log2":
44
+ K.set_value(self.alpha, (1- (tf.math.log(epoch*0.001 - 0.6575)/tf.math.log(0.0005)) - 0.5)*0.00001)
45
+ tf.print("log")
46
+ if self.transition == "log3":
47
+ K.set_value(self.alpha, (1- (tf.math.log(epoch*0.001 - 0.67978)/tf.math.log(0.00000005)) - 0.5)*0.00001)
48
+ tf.print("log")
49
+ if self.transition == "square":
50
+ K.set_value(self.alpha, 4.1*tf.pow(epoch*0.001 - 0.65, 2) + 0.002)
51
+ print("exp")
52
+ if self.transition == "quad":
53
+ K.set_value(self.alpha, 33*tf.pow(epoch*0.001 - 0.65, 4) + 0.002)
54
+ print("quad")
55
+
56
+
57
+ def train_val_split(
58
+ x_train: np.ndarray, y_train: np.ndarray, split: float = 0.2,
59
+ ) -> tuple:
60
+
61
+ slice: int = int(x_train.shape[0] * split)
62
+
63
+ x_val: np.ndarray = x_train[-slice:]
64
+ y_val: np.ndarray = y_train[-slice:]
65
+
66
+ x_train = x_train[:-slice]
67
+ y_train = y_train[:-slice]
68
+
69
+ return (x_val, y_val, x_train, y_train)
70
+
71
+
72
+ """Model Utils"""
73
+
74
+
75
+ def mean_percentile_rank(y_true, y_pred, k=5):
76
+ """
77
+ @paper
78
+ The first evaluation measure is the Mean Percentile Rank
79
+ (MPR) which is computed per synthesizer parameter.
80
+ """
81
+ # TODO
82
+
83
+
84
+ def top_k_mean_accuracy(y_true, y_pred, k=5):
85
+ """
86
+ @ paper
87
+ The top-k mean accuracy is obtained by computing the top-k
88
+ accuracy for each test example and then taking the mean across
89
+ all examples. In the same manner as done in the MPR analysis,
90
+ we compute the top-k mean accuracy per synthesizer
91
+ parameter for 𝑘 = 1, ... ,5.
92
+ """
93
+ # TODO: per parameter?
94
+ original_shape = tf.shape(y_true)
95
+ y_true = tf.reshape(y_true, (-1, tf.shape(y_true)[-1]))
96
+ y_pred = tf.reshape(y_pred, (-1, tf.shape(y_pred)[-1]))
97
+ top_k = K.in_top_k(y_pred, tf.cast(tf.argmax(y_true, axis=-1), "int32"), k)
98
+ correct_pred = tf.reshape(top_k, original_shape[:-1])
99
+ return tf.reduce_mean(tf.cast(correct_pred, tf.float32))
100
+
101
+ @tf.function
102
+ def CustomLoss(y_true, y_pred):
103
+ bce = tf.keras.losses.BinaryCrossentropy()
104
+ weights = custom_spectral_loss(y_true, y_pred)
105
+ weight_shift = (1-weight_var.numpy())+(weight_var.numpy()*weights.numpy())
106
+ # tf.print(f"New weight is {weight_shift}")
107
+ loss = bce(y_true, y_pred, sample_weight=weight_shift)
108
+ return loss
109
+
110
+ @tf.function
111
+ def custom_spectral_loss(y_true, y_pred):
112
+ # tf.print("After compiling model :",tf.executing_eagerly())
113
+
114
+ y_true = tf.reshape(y_true, (-1, tf.shape(y_true)[-1]))
115
+ y_pred = tf.reshape(y_pred, (-1, tf.shape(y_pred)[-1]))
116
+
117
+
118
+ # Assuming y_true and y_pred contain parameters for audio synthesis
119
+ # Extract parameters from y_true and y_pred
120
+ with open("test_datasets/InverSynth_params.pckl", "rb") as f:
121
+ parameters: ParameterSet = load(f)
122
+
123
+ predlist_true: List[ParamValue] = parameters.decode(y_true[0])
124
+
125
+ predlist_pred: List[ParamValue] = parameters.decode(y_pred[0])
126
+
127
+ # Convert parameter lists to DataFrames
128
+ # Generate audio from parameters
129
+ audio_true, penalty = generate_audio(predlist_true)
130
+ audio_pred, penalty = generate_audio(predlist_pred)
131
+
132
+ # Compute spectrogram
133
+ if SPECTRO_TYPE == 'spectro':
134
+ spectrogram_true = tf.math.abs(tf.signal.stft(audio_true, frame_length=1024, frame_step=512))
135
+ spectrogram_pred = tf.math.abs(tf.signal.stft(audio_pred, frame_length=1024, frame_step=512))
136
+ elif SPECTRO_TYPE == 'qtrans':
137
+ spectrogram_true = librosa.amplitude_to_db(librosa.cqt(audio_true, sr=SAMPLE_RATE, hop_length=128), ref=np.max)
138
+ spectrogram_pred = librosa.amplitude_to_db(librosa.cqt(audio_pred, sr=SAMPLE_RATE, hop_length=128), ref=np.max)
139
+ elif SPECTRO_TYPE == 'mel':
140
+ mel_spect = librosa.feature.melspectrogram(audio_true, sr=SAMPLE_RATE, n_fft=2048, hop_length=1024)
141
+ spectrogram_true = librosa.power_to_db(mel_spect, ref=np.max)
142
+ mel_spect = librosa.feature.melspectrogram(audio_pred, sr=SAMPLE_RATE, n_fft=2048, hop_length=1024)
143
+ spectrogram_pred = librosa.power_to_db(mel_spect, ref=np.max)
144
+ #L1 LOSS
145
+ if LOSS_TYPE == 'L1':
146
+ spectral_loss = penalty*tf.reduce_mean(tf.abs(spectrogram_true-spectrogram_pred))
147
+ #L2 LOSS
148
+ elif LOSS_TYPE =='L2':
149
+ spectral_loss = penalty*tf.reduce_mean((spectrogram_true - spectrogram_pred)**2)
150
+ #COSINE LOSS
151
+ elif LOSS_TYPE == 'COSINE':
152
+ spectral_loss = tf.losses.cosine_distance(spectrogram_true, spectrogram_pred, weights=1.0, axis=-1)
153
+
154
+ return spectral_loss
155
+
156
+ def summarize_compile(model: keras.Model):
157
+ model.summary(line_length=80, positions=[0.33, 0.65, 0.8, 1.0], show_trainable=True, expand_nested=True)
158
+ # Specify the training configuration (optimizer, loss, metrics)
159
+ model.compile(
160
+ optimizer=keras.optimizers.Adam(), # Optimizer- Adam [14] optimizer
161
+ # Loss function to minimize
162
+ # @paper: Therefore, we converged on using sigmoid activations with binary cross entropy loss.
163
+ # loss=keras.losses.BinaryCrossentropy(),
164
+ loss=CustomLoss,
165
+ # List of metrics to monitor
166
+ metrics=[
167
+ # @paper: 1) Mean Percentile Rank?
168
+ # mean_percentile_rank,
169
+ # @paper: 2) Top-k mean accuracy based evaluation
170
+ top_k_mean_accuracy,
171
+ custom_spectral_loss,
172
+ # Extra Adding 3) spectroloss accuracy
173
+ # Extra Adding 4) combined
174
+ # @paper: 5) Mean Absolute Error based evaluation
175
+ keras.metrics.MeanAbsoluteError(),
176
+ ],
177
+ )
178
+
179
+ def fit(
180
+ model: keras.Model,
181
+ x_train: np.ndarray,
182
+ y_train: np.ndarray,
183
+ x_val: np.ndarray,
184
+ y_val: np.ndarray,
185
+ batch_size: int = 16,
186
+ epochs: int = 200,
187
+ ) -> keras.Model:
188
+
189
+ # @paper:
190
+ # with a minibatch size of 16 for
191
+ # 100 epochs. The best weights for each model were set by
192
+ # employing an early stopping procedure.
193
+ logging.info("# Fit model on training data")
194
+ history = model.fit(
195
+ x_train,
196
+ y_train,
197
+ batch_size=batch_size,
198
+ epochs=epochs,
199
+ # @paper:
200
+ # Early stopping procedure:
201
+ # We pass some validation for
202
+ # monitoring validation loss and metrics
203
+ # at the end of each epoch
204
+ validation_data=(x_val, y_val),
205
+ verbose=0,
206
+ )
207
+
208
+ # The returned "history" object holds a record
209
+ # of the loss values and metric values during training
210
+ logging.info("\nhistory dict:", history.history)
211
+
212
+ return model
213
+
214
+
215
+ def compare(target, prediction, params, precision=1, print_output=False):
216
+ if print_output and len(prediction) < 10:
217
+ print(prediction)
218
+ print("Pred: {}".format(np.round(prediction, decimals=2)))
219
+ print("PRnd: {}".format(np.round(prediction)))
220
+ print("Act : {}".format(target))
221
+ print("+" * 5)
222
+
223
+ pred: List[ParamValue] = params.decode(prediction)
224
+ act: List[ParamValue] = params.decode(target)
225
+ pred_index: List[int] = [np.array(p.encoding).argmax() for p in pred]
226
+ act_index: List[int] = [np.array(p.encoding).argmax() for p in act]
227
+ width = 8
228
+ names = "Parameter: "
229
+ act_s = "Actual: "
230
+ pred_s = "Predicted: "
231
+ pred_i = "Pred. Indx:"
232
+ act_i = "Act. Index:"
233
+ diff_i = "Index Diff:"
234
+ for p in act:
235
+ names += p.name.rjust(width)[:width]
236
+ act_s += f"{p.value:>8.2f}"
237
+ for p in pred:
238
+ pred_s += f"{p.value:>8.2f}"
239
+ for p in pred_index:
240
+ pred_i += f"{p:>8}"
241
+ for p in act_index:
242
+ act_i += f"{p:>8}"
243
+ for i in range(len(act_index)):
244
+ diff = pred_index[i] - act_index[i]
245
+ diff_i += f"{diff:>8}"
246
+ exact = 0.0
247
+ close = 0.0
248
+ n_params = len(pred_index)
249
+ for i in range(n_params):
250
+ if pred_index[i] == act_index[i]:
251
+ exact = exact + 1.0
252
+ if abs(pred_index[i] - act_index[i]) <= precision:
253
+ close = close + 1.0
254
+ exact_ratio = exact / n_params
255
+ close_ratio = close / n_params
256
+ if print_output:
257
+ print(names)
258
+ print(act_s)
259
+ print(pred_s)
260
+ print(act_i)
261
+ print(pred_i)
262
+ print(diff_i)
263
+ print("-" * 30)
264
+ return exact_ratio, close_ratio
265
+
266
+
267
+ def evaluate(
268
+ prediction: np.ndarray, x: np.ndarray, y: np.ndarray, params: ParameterSet,
269
+ ):
270
+
271
+ print("Prediction Shape: {}".format(prediction.shape))
272
+
273
+ num: int = x.shape[0]
274
+ correct: int = 0
275
+ correct_r: float = 0.0
276
+ close_r: float = 0.0
277
+ for i in range(num):
278
+ should_print = i < 5
279
+ exact, close = compare(
280
+ target=y[i],
281
+ prediction=prediction[i],
282
+ params=params,
283
+ print_output=should_print,
284
+ )
285
+ if exact == 1.0:
286
+ correct = correct + 1
287
+ correct_r += exact
288
+ close_r += close
289
+ summary = params.explain()
290
+ print(
291
+ "{} Parameters with {} levels (fixed: {})".format(
292
+ summary["n_variable"], summary["levels"], summary["n_fixed"]
293
+ )
294
+ )
295
+ print(
296
+ "Got {} out of {} ({:.1f}% perfect); Exact params: {:.1f}%, Close params: {:.1f}%".format(
297
+ correct,
298
+ num,
299
+ correct / num * 100,
300
+ correct_r / num * 100,
301
+ close_r / num * 100,
302
+ )
303
+ )
304
+
305
+
306
+ def data_format_audio(audio: np.ndarray, data_format: str) -> np.ndarray:
307
+ # `(None, n_channel, n_freq, n_time)` if `'channels_first'`,
308
+ # `(None, n_freq, n_time, n_channel)` if `'channels_last'`,
309
+
310
+ if data_format == "channels_last":
311
+ audio = audio[np.newaxis, :, np.newaxis]
312
+ else:
313
+ audio = audio[np.newaxis, np.newaxis, :]
314
+
315
+ return audio
316
+
317
+
318
+ """
319
+ Wrap up the whole training process in a standard function. Gets a callback
320
+ to actually make the model, to keep it as flexible as possible.
321
+ # Params:
322
+ # - dataset_name (dataset name)
323
+ # - model_name: (C1..C6,e2e)
324
+ # - model_callback: function taking name,inputs,outputs,data_format and returning a Keras model
325
+ # - epochs: int
326
+ # - dataset_dir: place to find input data
327
+ # - output_dir: place to put outputs
328
+ # - parameters_file (override parameters filename)
329
+ # - dataset_file (override dataset filename)
330
+ # - data_format (channels_first or channels_last)
331
+ # - run_name: to save this run as
332
+ """
333
+ #LOSS TYPE FOR CUSTOM LOSS FUNCTION
334
+ LOSS_TYPE = 'L1'
335
+ SPECTRO_TYPE = 'spectro'
336
+ PRINT = 1
337
+
338
+ #DAWDREAMER EXPORT SETTINGS
339
+ SAMPLE_RATE = 16384
340
+ BUFFER_SIZE = 1024
341
+ SYNTH_PLUGIN = "libTAL-NoiseMaker.so"
342
+
343
+ ENGINE = daw.RenderEngine(SAMPLE_RATE, BUFFER_SIZE)
344
+ SYNTH = ENGINE.make_plugin_processor("my_synth", SYNTH_PLUGIN)
345
+ SYNTH.add_midi_note(40, 127, 0, 0.8)
346
+
347
+ with open('plugin_config/TAL-NoiseMaker-config.json') as f:
348
+ data = json.load(f)
349
+
350
+ dico=[]
351
+ # Extract the key ID from the JSON data
352
+ key_id = data['parameters']
353
+ for param in key_id:
354
+ dico.append(param['id'])
355
+
356
+ DICO=dico
357
+
358
+ def train_model(
359
+ # Main options
360
+ dataset_name: str,
361
+ model_name: str,
362
+ epochs: int,
363
+ model_callback: Callable[[str, int, int, str], keras.Model],
364
+ dataset_dir: str,
365
+ output_dir: str, # Directory names
366
+ dataset_file: str = None,
367
+ parameters_file: str = None,
368
+ run_name: str = None,
369
+ data_format: str = "channels_last",
370
+ save_best: bool = True,
371
+ resume: bool = False,
372
+ checkpoint: bool = True,
373
+ model_type: str = "STFT",
374
+ ):
375
+
376
+ tf.config.run_functions_eagerly(True)
377
+ # tf.data.experimental.enable_debug_mode()
378
+ time_generated = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
379
+ if not dataset_file:
380
+ dataset_file = (
381
+ os.getcwd() + "/" + dataset_dir + "/" + dataset_name + "_data.hdf5"
382
+ )
383
+ if not parameters_file:
384
+ parameters_file = (
385
+ os.getcwd() + "/" + dataset_dir + "/" + dataset_name + "_params.pckl"
386
+ )
387
+ if not run_name:
388
+ run_name = dataset_name + "_" + model_name
389
+
390
+ model_file = f"{output_dir}/model/{run_name}_{time_generated}"
391
+ if not os.path.exists(model_file):
392
+ os.makedirs(model_file)
393
+ best_model_file = f"{output_dir}/best_checkpoint/{run_name}_best_{time_generated}"
394
+ if not os.path.exists(best_model_file):
395
+ os.makedirs(best_model_file)
396
+ if resume:
397
+
398
+ # checkpoint_model_file = f"{output_dir}/{run_name}_checkpoint_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}"
399
+ # history_file = f"{output_dir}/{run_name}_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}"
400
+ checkpoint_model_file = f"{output_dir}/history/InverSynth_C6XL_checkpoint_20231201-103344"
401
+ history_file = f"{output_dir}/checkpoints/InverSynth_C6XL_20231201-103344"
402
+ else:
403
+ os.makedirs(f"{output_dir}/history", exist_ok=True)
404
+ os.makedirs(f"{output_dir}/checkpoints", exist_ok=True)
405
+ history_file = f"{output_dir}/history/{run_name}_{time_generated}"
406
+ checkpoint_model_file = f"{output_dir}/checkpoints/{run_name}_checkpoint_{time_generated}"
407
+
408
+ history_graph_file = f"{output_dir}/{run_name}.pdf"
409
+ print(tf.config.list_physical_devices('GPU'))
410
+ gpu_avail = len(tf.config.list_physical_devices('GPU')) # True/False
411
+ cuda_gpu_avail = len(tf.config.list_physical_devices('GPU')) # True/False
412
+
413
+ print("+" * 30)
414
+ print(f"++ {run_name}")
415
+ print(
416
+ f"Running model: {model_name} on dataset {dataset_file} (parameters {parameters_file}) for {epochs} epochs"
417
+ )
418
+ print(f"Saving model in {output_dir} as {model_file}")
419
+ print(f"Saving history as {history_file}")
420
+ print(f"GPU: {gpu_avail}, with CUDA: {cuda_gpu_avail}")
421
+ print("+" * 30)
422
+
423
+ os.makedirs(output_dir, exist_ok=True)
424
+
425
+ # Get training and validation generators
426
+ params = {"data_file": dataset_file, "batch_size": 64, "shuffle": True}
427
+ training_generator = SoundDataGenerator(first=0.8, **params)
428
+ validation_generator = SoundDataGenerator(last=0.2, **params)
429
+ n_samples = training_generator.get_audio_length()
430
+ print(f"get_audio_length: {n_samples}")
431
+ n_outputs = training_generator.get_label_size()
432
+
433
+ # set keras image_data_format
434
+ # NOTE: on CPU only `channels_last` is supported
435
+ physical_devices = tf.config.list_physical_devices('GPU')
436
+
437
+ keras.backend.set_image_data_format(data_format)
438
+
439
+ model: keras.Model = None
440
+ if resume and os.path.exists(checkpoint_model_file):
441
+ history = pd.read_csv(history_file)
442
+ # Note - its zero indexed in the file, but 1 indexed in the display
443
+ initial_epoch: int = max(history.iloc[:, 0]) + 1
444
+ # epochs:int = initial_epoch
445
+ print(
446
+ f"Resuming from model file: {checkpoint_model_file} after epoch {initial_epoch}"
447
+ )
448
+ model = keras.models.load_model(
449
+ checkpoint_model_file
450
+ ,
451
+ custom_objects={"top_k_mean_accuracy": top_k_mean_accuracy, "Spectrogram" : Spectrogram,
452
+ "custom_spectral_loss": custom_spectral_loss, "CustomLoss": CustomLoss
453
+ },
454
+ )
455
+ else:
456
+ model = model_callback(
457
+ model_name=model_name,
458
+ inputs=n_samples,
459
+ outputs=n_outputs,
460
+ data_format=data_format,
461
+ )
462
+ # keras.utils.plot_model(model, to_file='model.png', show_shapes=True, show_layer_activations=True)
463
+ # Summarize and compile the model
464
+ summarize_compile(model)
465
+ initial_epoch = 0
466
+ open(history_file, "w").close()
467
+
468
+ callbacks = []
469
+ best_callback = keras.callbacks.ModelCheckpoint(
470
+ filepath=best_model_file,
471
+ save_weights_only=False,
472
+ save_best_only=True,
473
+ verbose=1,
474
+ )
475
+ checkpoint_callback = keras.callbacks.ModelCheckpoint(
476
+ filepath=checkpoint_model_file,
477
+ save_weights_only=False,
478
+ save_best_only=False,
479
+ verbose=1,
480
+ )
481
+ os.makedirs(f"{output_dir}/logs", exist_ok=True)
482
+ log_dir = f"{output_dir}/logs/" + time_generated
483
+ tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, write_graph=True, write_images=True, profile_batch = '500,520')
484
+
485
+ if save_best:
486
+ callbacks.append(best_callback)
487
+ if checkpoint:
488
+ callbacks.append(checkpoint_callback)
489
+ callbacks.append(tensorboard_callback)
490
+ callbacks.append(CSVLogger(history_file, append=True))
491
+ callbacks.append(Weight_trans(weight_var, "log3" ,epochs))
492
+ # Parameter data - needed for decoding!
493
+
494
+ # Fit the model
495
+ history = None
496
+ try:
497
+ history = model.fit(
498
+ x=training_generator,
499
+ validation_data=validation_generator,
500
+ epochs=epochs,
501
+ callbacks=callbacks,
502
+ initial_epoch=initial_epoch,
503
+ verbose=1, # https://github.com/tensorflow/tensorflow/issues/38064
504
+ )
505
+ except Exception as e:
506
+ print(f"Something went wrong during `model.fit`: {e}")
507
+
508
+ # Save model
509
+ model.save(model_file)
510
+
511
+ # Save history
512
+ if history and not resume:
513
+ try:
514
+ hist_df = pd.DataFrame(history.history)
515
+ try:
516
+ fig = hist_df.plot(subplots=True, figsize=(8, 25))
517
+ fig[0].get_figure().savefig(history_graph_file)
518
+ except Exception as e:
519
+ print("Couldn't create history graph")
520
+ print(e)
521
+
522
+ except Exception as e:
523
+ tf.print("Couldn't save history")
524
+ print(e)
525
+
526
+ # evaluate prediction on random sample from validation set
527
+ # Parameter data - needed for decoding!
528
+ with open(parameters_file, "rb") as f:
529
+ parameters: ParameterSet = load(f)
530
+
531
+ # Shuffle data
532
+ validation_generator.on_epoch_end()
533
+ X, y = validation_generator.__getitem__(0)
534
+ X.reshape((X.__len__(), 1, 16384))
535
+
536
+ # if model_type == "STFT":
537
+ # # stft expects shape (channel, sample_rate)
538
+ # X = np.moveaxis(X, 1, -1)
539
+ prediction: np.ndarray = model.predict(X)
540
+ evaluate(prediction, X, y, parameters)
541
+
542
+ print("++++" * 5)
543
+ print("Pushing to trained model")
544
+ print("++++" * 5)
545
+
546
+ Valid=False
547
+ while Valid==False:
548
+ file = namefile = input("Enter .wav test file path: ")
549
+ if os.path.exists(file):
550
+ Valid=True
551
+ else:
552
+ print("File Path invalid, try again ")
553
+
554
+ newpred = model.predict(audio_importer(str(f'{namefile}')))
555
+ predlist: List[ParamValue] = parameters.decode(newpred[0])
556
+ df = pd.DataFrame(predlist)
557
+
558
+ print(df)
559
+ df = df.drop(['encoding'], axis=1)
560
+ # saving the dataframe
561
+ if not os.path.exists(str(f'output/wav_inferred')):
562
+ os.makedirs(str(f'output/wav_inferred'))
563
+ head, tail = os.path.split(str(f'{namefile}'))
564
+ print("Outputting CSV config in " + str(f'output/wav_inferred'))
565
+ df.to_csv(str(f'output/wav_inferred/{tail}.csv'))
566
+ #export(prediction, X, y, parameters)
567
+ # Loop through the rows of the DataFrame
568
+ i = 0
569
+ for values in df['value'].values:
570
+ # Set parameters using DataFrame values
571
+ SYNTH.set_parameter(DICO[i],values)
572
+ # (MIDI note, velocity, start, duration)
573
+ i += 1
574
+ #Setting volume to 0.9
575
+ SYNTH.set_parameter(1, 0.9)
576
+ # Set up the processing graph
577
+ graph = [
578
+ # synth takes no inputs, so we give an empty list.
579
+ (SYNTH, []),
580
+ ]
581
+
582
+ ENGINE.load_graph(graph)
583
+ ENGINE.render(1)
584
+ data = ENGINE.get_audio()
585
+ try:
586
+ data = librosa.to_mono(data).transpose()
587
+ except:
588
+ tf.print("ERROR" * 100)
589
+ df = df.fillna(0)
590
+ data = df.to_numpy()
591
+ data = librosa.to_mono(data).transpose()
592
+ tf.print("crashed, nan in generation")
593
+ synth_params = dict(SYNTH.get_patch())
594
+ print(synth_params)
595
+
596
+ df = pd.DataFrame(data)
597
+
598
+ # penalty=1000000
599
+ # df = pd.DataFrame(data)
600
+ # df = df.fillna(0)
601
+ # data = df.to_numpy()
602
+
603
+
604
+ wavfile.write(str(f'output/wav_inferred/gen_{tail}.wav'), SAMPLE_RATE, data)
605
+
606
+ def generate_audio(df_params):
607
+
608
+
609
+ # Loop through the rows of the DataFrame
610
+ i = 0
611
+ penalty=1
612
+ for param in df_params:
613
+ # Set parameters using DataFrame values
614
+ SYNTH.set_parameter(DICO[i], param.value)
615
+ # (MIDI note, velocity, start, duration)
616
+ i += 1
617
+ # Set up the processing graph
618
+ graph = [
619
+ # synth takes no inputs, so we give an empty list.
620
+ (SYNTH, []),
621
+ ]
622
+
623
+ ENGINE.load_graph(graph)
624
+ ENGINE.render(1)
625
+ data = ENGINE.get_audio()
626
+ if np.isnan(data).any():
627
+
628
+ # df = pd.DataFrame(data)
629
+ # df = df.fillna(0)
630
+ # data = df.to_numpy()
631
+
632
+ tf.print("crashed, nan in generation")
633
+ synth_params = dict(SYNTH.get_patch())
634
+ print(synth_params)
635
+ try:
636
+ data = librosa.to_mono(data).transpose()
637
+ if(librosa.util.valid_audio(data)):
638
+ result = np.array(data)
639
+ return result, penalty
640
+ except:
641
+ tf.print("crashed, nan in generation")
642
+ raise("Nan in generation, crashed")
643
+
644
+
645
+
back/models/common/__init__.py ADDED
File without changes
back/models/common/architectures.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+
3
+
4
+ # Model architectures
5
+ @dataclass
6
+ class C:
7
+ filters: int
8
+ window_size: tuple
9
+ strides: tuple
10
+ activation: str = "relu"
11
+
12
+
13
+ """Conv 1 (2 Layers)"""
14
+ c1: C = C(38, (13, 26), (13, 26))
15
+ c1_layers: list = [c1]
16
+
17
+ """Conv 2 (3 Layers)"""
18
+ c2_layers: list = [C(35, (6, 7), (5, 6)), C(87, (6, 9), (5, 8))]
19
+
20
+ """Conv 3 (4 Layers)"""
21
+ c3_layers: list = [C(32, (4, 5), (3, 4)), C(98, (4, 6), (3, 5)), C(128, (4, 6), (3, 5))]
22
+
23
+ """Conv 4 (5 Layers)"""
24
+ c4_layers: list = [
25
+ C(32, (3, 4), (2, 3)),
26
+ C(65, (3, 4), (2, 3)),
27
+ C(105, (3, 4), (2, 3)),
28
+ C(128, (4, 5), (3, 4)),
29
+ ]
30
+
31
+ """Conv 5 (6 Layers)"""
32
+ c5_layers: list = [
33
+ C(32, (3, 3), (2, 2)),
34
+ C(98, (3, 3), (2, 2)),
35
+ C(128, (3, 4), (2, 3)),
36
+ C(128, (3, 5), (2, 4)),
37
+ C(128, (3, 3), (2, 2)),
38
+ ]
39
+
40
+ """Conv 6 (7 Layers)"""
41
+ c6_layers: list = [
42
+ C(32, (3, 3), (2, 2)),
43
+ C(71, (3, 3), (2, 2)),
44
+ C(128, (3, 4), (2, 3)),
45
+ C(128, (3, 3), (2, 2)),
46
+ C(128, (3, 3), (2, 2)),
47
+ C(128, (3, 3), (1, 2)),
48
+ ]
49
+
50
+ """Conv 6XL, 7 Layers"""
51
+ c6XL_layers: list = [
52
+ C(64, (3, 3), (2, 2)),
53
+ C(128, (3, 3), (2, 2)),
54
+ C(128, (3, 4), (2, 3)),
55
+ C(128, (3, 3), (2, 2)),
56
+ C(256, (3, 3), (2, 2)),
57
+ C(256, (3, 3), (1, 2)),
58
+ ]
59
+
60
+
61
+ layers_map: dict = {
62
+ "C1": c1_layers,
63
+ "C2": c2_layers,
64
+ "C3": c3_layers,
65
+ "C4": c4_layers,
66
+ "C5": c5_layers,
67
+ "C6": c6_layers,
68
+ "C6XL": c6XL_layers,
69
+ }
back/models/common/data_generator.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import h5py
2
+ import numpy as np
3
+ from scipy.io import wavfile
4
+ from tensorflow import keras
5
+
6
+
7
+ class SoundDataGenerator(keras.utils.Sequence):
8
+ "Generates data for Keras"
9
+
10
+ def __init__(
11
+ self,
12
+ data_file=None,
13
+ batch_size=32,
14
+ n_samps=16384,
15
+ shuffle=True,
16
+ last: float = 0.0,
17
+ first: float = 0.0,
18
+ channels_last=False,
19
+ for_autoencoder=False,
20
+ ):
21
+ "Initialization"
22
+ self.dim = (1, n_samps)
23
+ self.batch_size = batch_size
24
+ self.shuffle = shuffle
25
+ self.data_file = data_file
26
+ self.n_channels = 1
27
+ self.for_autoencoder = for_autoencoder
28
+ # For the E2E model, need to return channels last?
29
+ if channels_last:
30
+ self.expand_axis = 2
31
+ else:
32
+ self.expand_axis = 1
33
+
34
+ database = h5py.File(data_file, "r")
35
+
36
+ self.database = database
37
+
38
+ self.n_samps = self.read_file(0).shape[0]
39
+ print("N Samps in audio data: {}".format(self.n_samps))
40
+
41
+ # set up list of IDs from data files
42
+ n_points = len(database["files"])
43
+ self.list_IDs = range(len(database["files"]))
44
+
45
+ print(f"Number of examples in dataset: {len(self.list_IDs)}")
46
+ slice: int = 0
47
+ if last > 0.0:
48
+ slice = int(n_points * (1 - last))
49
+ self.list_IDs = self.list_IDs[slice:]
50
+ print(f"Taking Last N points: {len(self.list_IDs)}")
51
+ elif first > 0.0:
52
+ slice = int(n_points * first)
53
+ self.list_IDs = self.list_IDs[:slice]
54
+ print(f"Taking First N points: {len(self.list_IDs)}")
55
+
56
+ # set up label size from data files
57
+ self.label_size = len(database["labels"][0])
58
+ self.on_epoch_end()
59
+
60
+ def get_audio_length(self):
61
+ return self.n_samps
62
+
63
+ def get_label_size(self):
64
+ return self.label_size
65
+
66
+ def __len__(self):
67
+ "Denotes the number of batches per epoch"
68
+ return int(np.floor(len(self.list_IDs) / self.batch_size))
69
+
70
+ def __getitem__(self, index):
71
+ "Generate one batch of data"
72
+ # Generate indexes of the batch
73
+ indexes = self.indexes[index * self.batch_size : (index + 1) * self.batch_size]
74
+
75
+ # Find list of IDs
76
+ # list_IDs_temp = [self.list_IDs[k] for k in indexes]
77
+
78
+ # Generate data
79
+ X, y = self.__data_generation(indexes)
80
+
81
+ # print("Returning data! Got X: {}, y: {}".format(X.shape,y.shape))
82
+ return X, y
83
+
84
+ def on_epoch_end(self):
85
+ "Updates indexes after each epoch"
86
+ self.indexes = np.arange(len(self.list_IDs))
87
+ if self.shuffle is True:
88
+ np.random.shuffle(self.indexes)
89
+
90
+ # Think this makes things worse - fills up memory
91
+ # @lru_cache(maxsize=150000)
92
+ def read_file(self, index):
93
+ filename = self.database["files"][index]
94
+ fs, data = wavfile.read(filename)
95
+ return data
96
+
97
+ def __data_generation(self, list_IDs_temp):
98
+ # X : (n_samples, *dim, n_channels)
99
+ "Generates data containing batch_size samples"
100
+ # Initialization
101
+ # X = np.empty((self.batch_size, *self.dim))
102
+ # y = np.empty((self.batch_size), dtype=int)
103
+
104
+ # Generate data
105
+ X = []
106
+ y = []
107
+ for i in list_IDs_temp:
108
+ # Read labels
109
+ y.append(self.database["labels"][i])
110
+ # Load soundfile data
111
+ data = self.read_file(i)
112
+ if data.shape[0] > self.n_samps:
113
+ print(
114
+ "Warning - too many samples: {} > {}".format(
115
+ data.shape[0], self.n_samps
116
+ )
117
+ )
118
+ X.append(data[: self.n_samps])
119
+ Xd = np.expand_dims(np.vstack(X), axis=1)
120
+ # Xd = Xd.flatten()
121
+ Xd = Xd.reshape((X.__len__(), 1, self.n_samps))
122
+ yd = np.vstack(y)
123
+
124
+ if self.for_autoencoder:
125
+ return yd, yd
126
+ return Xd, yd
back/models/comparison.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import pickle
4
+ import re
5
+
6
+ import h5py
7
+ import numpy as np
8
+ from scipy.io import wavfile
9
+ from scipy.io.wavfile import write as write_wav
10
+ from tensorflow import keras
11
+
12
+ from generators.generator import InverSynthGenerator, SoundGenerator, VSTGenerator
13
+ from generators.parameters import ParameterSet
14
+
15
+ """
16
+ This module generates comparisons - takes the original sound + params,
17
+ then generates a file with the predicted parameters
18
+ """
19
+
20
+
21
+ def compare(
22
+ model: keras.Model,
23
+ generator: SoundGenerator,
24
+ parameters: ParameterSet,
25
+ orig_file: str,
26
+ output_dir: str,
27
+ orig_params,
28
+ length: float,
29
+ sample_rate: int,
30
+ extra: dict = {},
31
+ ):
32
+ # (copy original file if given)
33
+ base_filename = orig_file.replace(".wav", "")
34
+ base_filename = re.sub(r".*/", "", base_filename)
35
+ copy_file: str = f"{output_dir}/{base_filename}_copy.wav"
36
+ regen_file: str = f"{output_dir}/{base_filename}_duplicate.wav"
37
+ reconstruct_file: str = f"{output_dir}/{base_filename}_reconstruct.wav"
38
+ print(f"Creating copy as {copy_file}")
39
+
40
+ # Load the wave file
41
+ fs, data = wavfile.read(orig_file)
42
+ # Copy original file to make sure
43
+ write_wav(copy_file, sample_rate, data)
44
+
45
+ # Decode original params, and regenerate output (make sure its correct)
46
+ orig = parameters.encoding_to_settings(orig_params)
47
+ generator.generate(orig, regen_file, length, sample_rate, extra)
48
+
49
+ # Run the wavefile into the model for prediction
50
+ X = [data]
51
+ Xd = np.expand_dims(np.vstack(X), axis=2)
52
+ # Get encoded parameters out of model
53
+ result = model.predict(Xd)[0]
54
+
55
+ # Decode prediction, and reconstruct output
56
+ predicted = parameters.encoding_to_settings(result)
57
+ generator.generate(predicted, reconstruct_file, length, sample_rate, extra)
58
+
59
+
60
+ def run_comparison(
61
+ model: keras.Model,
62
+ generator: SoundGenerator,
63
+ run_name: str,
64
+ indices=None,
65
+ num_samples=10,
66
+ data_dir="./test_datasets",
67
+ output_dir="./comparison",
68
+ length=1.0,
69
+ sample_rate=16384,
70
+ shuffle=True,
71
+ extra={},
72
+ ):
73
+ # Figure out data file and params file from run name
74
+ data_file = f"{data_dir}/{run_name}_data.hdf5"
75
+ parameters_file = f"{data_dir}/{run_name}_params.pckl"
76
+ print(f"Reading parameters from {parameters_file}")
77
+ parameters = pickle.load(open(parameters_file, "rb"))
78
+
79
+ output_dir = f"{output_dir}/{run_name}/"
80
+ os.makedirs(output_dir, exist_ok=True)
81
+
82
+ database = h5py.File(data_file, "r")
83
+
84
+ if not indices:
85
+ ids = np.array(range(len(database["files"])))
86
+ if shuffle:
87
+ np.random.shuffle(ids)
88
+ indices = ids[0:num_samples]
89
+
90
+ # filename
91
+ for i in indices:
92
+ print("Looking at index: {}".format(i))
93
+ filename = database["files"][i]
94
+ labels = database["labels"][i]
95
+ compare(
96
+ model=model,
97
+ generator=generator,
98
+ parameters=parameters,
99
+ orig_file=filename,
100
+ output_dir=output_dir,
101
+ orig_params=labels,
102
+ length=length,
103
+ sample_rate=sample_rate,
104
+ extra=extra,
105
+ )
106
+ # Generate
107
+
108
+
109
+ if __name__ == "__main__":
110
+
111
+ note_length = 0.8
112
+ sample_rate = 16384
113
+
114
+ lokomotiv = True
115
+ fm = True
116
+
117
+ if lokomotiv:
118
+ from generators.vst_generator import *
119
+
120
+ run_name = "lokomotiv_full"
121
+ model_file = "output/lokomotiv_full_e2e_best.h5"
122
+ plugin = "/Library/Audio/Plug-Ins/VST/Lokomotiv.vst"
123
+ config_file = "plugin_config/lokomotiv.json"
124
+ generator = VSTGenerator(vst=plugin, sample_rate=sample_rate)
125
+ with open(config_file, "r") as f:
126
+ config = json.load(f)
127
+
128
+ model = keras.models.load_model(model_file)
129
+ run_comparison(
130
+ model,
131
+ generator,
132
+ run_name,
133
+ num_samples=100,
134
+ extra={"note_length": note_length, "config": config},
135
+ )
136
+
137
+ if fm:
138
+ from generators.fm_generator import *
139
+
140
+ run_name = "inversynth_full"
141
+ model_file = "output/inversynth_full_e2e_best.h5"
142
+ generator = InverSynthGenerator()
143
+ model = keras.models.load_model(model_file)
144
+ run_comparison(model, generator, run_name, num_samples=100)
back/models/convert_to_preset.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv;
2
+ import xml.etree.ElementTree as ET
3
+
4
+ def convert_csv_to_preset(path: str, output_path: str):
5
+
6
+ daw_to_preset_og = {
7
+ 'Master Volume': 'volume',
8
+ 'Filter Type': 'filtertype',
9
+ 'Filter Cutoff': 'cutoff',
10
+ 'Filter Resonance': 'resonance',
11
+ 'Filter Keyfollow': 'keyfollow',
12
+ 'Filter Contour': 'filtercontour',
13
+ 'Filter Attack': 'filterattack',
14
+ 'Filter Decay': 'filterdecay',
15
+ 'Filter Sustain': 'filtersustain',
16
+ 'Filter Release': 'filterrelease',
17
+ 'Amp Attack': 'ampattack',
18
+ 'Amp Decay': 'ampdecay',
19
+ 'Amp Sustain': 'ampsustain',
20
+ 'Amp Release': 'amprelease',
21
+ 'Osc 1 Volume': 'osc1volume',
22
+ 'Osc 2 Volume': 'osc2volume',
23
+ 'Osc 3 Volume': 'osc3volume',
24
+ 'Osc Mastertune': 'oscmastertune',
25
+ 'Osc 1 Tune': 'osc1tune',
26
+ 'Osc 2 Tune': 'osc2tune',
27
+ 'Osc 1 Fine Tune': 'osc1finetune',
28
+ 'Osc 2 Fine Tune': 'osc2finetune',
29
+ 'Osc 1 Waveform': 'osc1waveform',
30
+ 'Osc 2 Waveform': 'osc2waveform',
31
+ 'Osc Sync': 'oscsync',
32
+ 'Lfo 1 Waveform': 'lfo1waveform',
33
+ 'Lfo 2 Waveform': 'lfo2waveform',
34
+ 'Lfo 1 Rate': 'lfo1rate',
35
+ 'Lfo 2 Rate': 'lfo2rate',
36
+ 'Lfo 1 Amount': 'lfo1amount',
37
+ 'Lfo 2 Amount': 'lfo2amount',
38
+ 'Lfo 1 Destination': 'lfo1destination',
39
+ 'Lfo 2 Destination': 'lfo2destination',
40
+ 'Lfo 1 Phase': 'lfo1phase',
41
+ 'Lfo 2 Phase': 'lfo2phase',
42
+ 'Osc 2 FM': 'osc2fm',
43
+ 'Osc 2 Phase': 'osc2phase',
44
+ 'Osc 1 PW': 'osc1pw',
45
+ 'Osc 1 Phase': 'osc1phase',
46
+ 'Transpose': 'transpose',
47
+ 'Free Ad Attack': 'freeadattack',
48
+ 'Free Ad Decay': 'freeaddecay',
49
+ 'Free Ad Amount': 'freeadamount',
50
+ 'Free Ad Destination': 'freeaddestination',
51
+ 'Lfo 1 Sync': 'lfo1sync',
52
+ 'Lfo 1 Keytrigger': 'lfo1keytrigger',
53
+ 'Lfo 2 Sync': 'lfo2sync',
54
+ 'Lfo 2 Keytrigger': 'lfo2keytrigger',
55
+ 'Portamento Amount': 'portamento',
56
+ 'Portamento Mode': 'portamentomode',
57
+ 'Voices': 'voices',
58
+ 'Velocity Volume': 'velocityvolume',
59
+ 'Velocity Contour': 'velocitycontour',
60
+ 'Velocity Filter': 'velocitycutoff',
61
+ 'Pitchwheel Cutoff': 'pitchwheelcutoff',
62
+ 'Pitchwheel Pitch': 'pitchwheelpitch',
63
+ 'Ringmodulation': 'ringmodulation',
64
+ 'Chorus 1 Enable': 'chorus1enable',
65
+ 'Chorus 2 Enable': 'chorus2enable',
66
+ 'Reverb Wet': 'reverbwet',
67
+ 'Reverb Decay': 'reverbdecay',
68
+ 'Reverb Pre Delay': 'reverbpredelay',
69
+ 'Reverb High Cut': 'reverbhighcut',
70
+ 'Reverb Low Cut': 'reverblowcut',
71
+ 'Osc Bitcrusher': 'oscbitcrusher',
72
+ 'Master High Pass': 'highpass',
73
+ 'Master Detune': 'detune',
74
+ 'Vintage Noise': 'vintagenoise',
75
+ 'Envelope Destination': 'envelopeeditordest1',
76
+ 'Envelope Speed': 'envelopeeditorspeed',
77
+ 'Envelope Amount': 'envelopeeditoramount',
78
+ 'Envelope One Shot Mode': 'envelopeoneshot',
79
+ 'Envelope Fix Tempo': 'envelopefixtempo',
80
+ 'Filter Drive': 'filterdrive',
81
+ 'Delay Wet': 'delaywet',
82
+ 'Delay Time': 'delaytime',
83
+ 'Delay Sync': 'delaysync',
84
+ 'Delay x2 L': 'delayfactorl',
85
+ 'Delay x2 R': 'delayfactorr',
86
+ 'Delay High Shelf': 'delayhighshelf',
87
+ 'Delay Low Shelf': 'delaylowshelf',
88
+ 'Delay Feedback': 'delayfeedback',
89
+ }
90
+
91
+ daw_to_preset = {v: k for k, v in daw_to_preset_og.items()}
92
+
93
+ # Read CSV data from file
94
+ with open(path, 'r') as csv_file:
95
+ csv_reader = csv.DictReader(csv_file)
96
+ csv_data = list(csv_reader)
97
+
98
+ for entry in csv_data:
99
+ parameter_name = entry['name']
100
+ parameter_value_str = entry['value']
101
+
102
+
103
+ # Check if the name needs mapping
104
+ if parameter_name in daw_to_preset_og:
105
+ xml_key = daw_to_preset_og[parameter_name]
106
+
107
+ # Check if the value is numeric
108
+ try:
109
+ parameter_value = float(parameter_value_str)
110
+ except ValueError:
111
+ print(f"Skipping non-numeric value for parameter {parameter_name}: {parameter_value_str}")
112
+ continue
113
+
114
+ if xml_key in daw_to_preset:
115
+ # Update the corresponding value in the XML dictionary
116
+ daw_to_preset[xml_key] = parameter_value
117
+
118
+ print(daw_to_preset)
119
+
120
+ # Check for invalid float values and remove them from the dictionary
121
+ invalid_values = [key for key, value in daw_to_preset.items() if not isinstance(value, float)]
122
+ for key in invalid_values:
123
+ print(f"Removing attribute {key} from daw_to_preset due to invalid float value.")
124
+ daw_to_preset[key] = 0.0
125
+ # Print the updated XML dictionary
126
+ print(daw_to_preset)
127
+
128
+ # Generate XML
129
+ root = ET.Element('tal', curprogram="0", version="1.7", presetName="CH Chordionator III FN",
130
+ path="Factory Presets/CHORD/CH Chordionator III FN.noisemakerpreset")
131
+ programs = ET.SubElement(root, 'programs')
132
+ program = ET.SubElement(programs, 'program', programname="CH Chordionator III FN", unknown="0.5", volume="0.5")
133
+ # Add parameters to the XML inside the single <program> element
134
+ for param_name, param_value in daw_to_preset.items():
135
+ program.set(param_name, str(param_value))
136
+
137
+
138
+ ET.SubElement(root, 'midimap')
139
+ # Create an ElementTree object
140
+
141
+ tree = ET.ElementTree(root)
142
+
143
+ # Save the XML to a file
144
+ output_xml_path = output_path
145
+ tree.write(output_xml_path)
146
+
147
+ print(f"XML file written to {output_xml_path}")
148
+
149
+ return output_xml_path
back/models/importer_audio.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import librosa.core.audio
2
+ import numpy as np
3
+ from scipy.io import wavfile
4
+
5
+
6
+ def audio_importer(filename):
7
+ # X : (n_samples, *dim, n_channels)
8
+ "Generates data containing batch_size samples"
9
+
10
+ # Generate data
11
+ X = []
12
+ # Load soundfile data
13
+ data, r = librosa.core.audio.load(filename, sr=16384, mono=True, duration=1.0)
14
+ n_samps = data.shape[0]
15
+ print("N Samps in audio data: {}".format(n_samps))
16
+ X.append(data[: n_samps])
17
+ Xd = np.expand_dims(np.vstack(X), axis=1)
18
+ Xd = Xd.reshape((X.__len__(), 1, n_samps))
19
+
20
+ return Xd
21
+
22
+ if __name__ == "__main__":
23
+ audio_importer("C:/Users/yderre/Downloads/inver-synth-master/inver-synth-master/test_waves/InverSynth/InverSynth_00006.wav")
back/models/launch.py ADDED
@@ -0,0 +1,518 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import json
3
+ import os
4
+ from pickle import load
5
+ from typing import Callable, List
6
+ import librosa
7
+ import numpy as np
8
+ import pandas as pd
9
+ import tensorflow as tf
10
+ from tensorflow import keras
11
+ from keras import backend as K
12
+ from kapre.time_frequency import Spectrogram
13
+ from models.convert_to_preset import convert_csv_to_preset
14
+ from models.importer_audio import audio_importer
15
+ import dawdreamer as daw
16
+ from scipy.io import wavfile
17
+ import librosa
18
+
19
+
20
+ from generators.parameters import ParameterSet, ParamValue
21
+
22
+
23
+ weight_var = K.variable(0.0)
24
+
25
+
26
+ class Weight_trans(keras.callbacks.Callback):
27
+ def __init__(self, weight_var, transition, epochs):
28
+ self.alpha = weight_var
29
+ self.transition = transition
30
+ self.epochs = epochs
31
+
32
+ def on_epoch_end(self, epoch, logs={}):
33
+ if epoch > 680:
34
+ if self.transition == "linear":
35
+ K.set_value(self.alpha, ((epoch) / (self.epochs) - 0.617) * 0.00001)
36
+ tf.print(f"new weight {weight_var.numpy()}")
37
+ if self.transition == "linear2":
38
+ K.set_value(self.alpha, (1.5625 * epoch - 1.0625) * 0.00001)
39
+ tf.print(f"new weight {weight_var.numpy()}")
40
+ if self.transition == "log":
41
+ K.set_value(
42
+ self.alpha,
43
+ (
44
+ 1
45
+ - (tf.math.log(epoch * 0.001 - 0.67285) / tf.math.log(0.0005))
46
+ - 0.35
47
+ )
48
+ * 0.00001,
49
+ )
50
+ tf.print("log")
51
+ if self.transition == "log2":
52
+ K.set_value(
53
+ self.alpha,
54
+ (
55
+ 1
56
+ - (tf.math.log(epoch * 0.001 - 0.6575) / tf.math.log(0.0005))
57
+ - 0.5
58
+ )
59
+ * 0.00001,
60
+ )
61
+ tf.print("log")
62
+ if self.transition == "log3":
63
+ K.set_value(
64
+ self.alpha,
65
+ (
66
+ 1
67
+ - (
68
+ tf.math.log(epoch * 0.001 - 0.67978)
69
+ / tf.math.log(0.00000005)
70
+ )
71
+ - 0.5
72
+ )
73
+ * 0.00001,
74
+ )
75
+ tf.print("log")
76
+ if self.transition == "square":
77
+ K.set_value(self.alpha, 4.1 * tf.pow(epoch * 0.001 - 0.65, 2) + 0.002)
78
+ print("exp")
79
+ if self.transition == "quad":
80
+ K.set_value(self.alpha, 33 * tf.pow(epoch * 0.001 - 0.65, 4) + 0.002)
81
+ print("quad")
82
+
83
+
84
+ """Model Utils"""
85
+
86
+
87
+ def mean_percentile_rank(y_true, y_pred, k=5):
88
+ """
89
+ @paper
90
+ The first evaluation measure is the Mean Percentile Rank
91
+ (MPR) which is computed per synthesizer parameter.
92
+ """
93
+ # TODO
94
+
95
+
96
+ def top_k_mean_accuracy(y_true, y_pred, k=5):
97
+ """
98
+ @ paper
99
+ The top-k mean accuracy is obtained by computing the top-k
100
+ accuracy for each test example and then taking the mean across
101
+ all examples. In the same manner as done in the MPR analysis,
102
+ we compute the top-k mean accuracy per synthesizer
103
+ parameter for 𝑘 = 1, ... ,5.
104
+ """
105
+ # TODO: per parameter?
106
+ original_shape = tf.shape(y_true)
107
+ y_true = tf.reshape(y_true, (-1, tf.shape(y_true)[-1]))
108
+ y_pred = tf.reshape(y_pred, (-1, tf.shape(y_pred)[-1]))
109
+ top_k = K.in_top_k(y_pred, tf.cast(tf.argmax(y_true, axis=-1), "int32"), k)
110
+ correct_pred = tf.reshape(top_k, original_shape[:-1])
111
+ return tf.reduce_mean(tf.cast(correct_pred, tf.float32))
112
+
113
+
114
+ @tf.function
115
+ def CustomLoss(y_true, y_pred):
116
+ bce = tf.keras.losses.BinaryCrossentropy()
117
+ weights = custom_spectral_loss(y_true, y_pred)
118
+ weight_shift = (1 - weight_var.numpy()) + (weight_var.numpy() * weights.numpy())
119
+ # tf.print(f"New weight is {weight_shift}")
120
+ loss = bce(y_true, y_pred, sample_weight=weight_shift)
121
+ return loss
122
+
123
+
124
+ @tf.function
125
+ def custom_spectral_loss(y_true, y_pred):
126
+ # tf.print("After compiling model :",tf.executing_eagerly())
127
+
128
+ y_true = tf.reshape(y_true, (-1, tf.shape(y_true)[-1]))
129
+ y_pred = tf.reshape(y_pred, (-1, tf.shape(y_pred)[-1]))
130
+
131
+ # Assuming y_true and y_pred contain parameters for audio synthesis
132
+ # Extract parameters from y_true and y_pred
133
+ with open("test_datasets/InverSynth_params.pckl", "rb") as f:
134
+ parameters: ParameterSet = load(f)
135
+
136
+ predlist_true: List[ParamValue] = parameters.decode(y_true[0])
137
+
138
+ predlist_pred: List[ParamValue] = parameters.decode(y_pred[0])
139
+
140
+ # Convert parameter lists to DataFrames
141
+ # Generate audio from parameters
142
+ audio_true, penalty = generate_audio(predlist_true)
143
+ audio_pred, penalty = generate_audio(predlist_pred)
144
+
145
+ # Compute spectrogram
146
+ if SPECTRO_TYPE == "spectro":
147
+ spectrogram_true = tf.math.abs(
148
+ tf.signal.stft(audio_true, frame_length=1024, frame_step=512)
149
+ )
150
+ spectrogram_pred = tf.math.abs(
151
+ tf.signal.stft(audio_pred, frame_length=1024, frame_step=512)
152
+ )
153
+ elif SPECTRO_TYPE == "qtrans":
154
+ spectrogram_true = librosa.amplitude_to_db(
155
+ librosa.cqt(audio_true, sr=SAMPLE_RATE, hop_length=128), ref=np.max
156
+ )
157
+ spectrogram_pred = librosa.amplitude_to_db(
158
+ librosa.cqt(audio_pred, sr=SAMPLE_RATE, hop_length=128), ref=np.max
159
+ )
160
+ elif SPECTRO_TYPE == "mel":
161
+ mel_spect = librosa.feature.melspectrogram(
162
+ audio_true, sr=SAMPLE_RATE, n_fft=2048, hop_length=1024
163
+ )
164
+ spectrogram_true = librosa.power_to_db(mel_spect, ref=np.max)
165
+ mel_spect = librosa.feature.melspectrogram(
166
+ audio_pred, sr=SAMPLE_RATE, n_fft=2048, hop_length=1024
167
+ )
168
+ spectrogram_pred = librosa.power_to_db(mel_spect, ref=np.max)
169
+ # L1 LOSS
170
+ if LOSS_TYPE == "L1":
171
+ spectral_loss = penalty * tf.reduce_mean(
172
+ tf.abs(spectrogram_true - spectrogram_pred)
173
+ )
174
+ # L2 LOSS
175
+ elif LOSS_TYPE == "L2":
176
+ spectral_loss = penalty * tf.reduce_mean(
177
+ (spectrogram_true - spectrogram_pred) ** 2
178
+ )
179
+ # COSINE LOSS
180
+ elif LOSS_TYPE == "COSINE":
181
+ spectral_loss = tf.losses.cosine_distance(
182
+ spectrogram_true, spectrogram_pred, weights=1.0, axis=-1
183
+ )
184
+
185
+ return spectral_loss
186
+
187
+
188
+ def compare(target, prediction, params, precision=1, print_output=False):
189
+ if print_output and len(prediction) < 10:
190
+ print(prediction)
191
+ print("Pred: {}".format(np.round(prediction, decimals=2)))
192
+ print("PRnd: {}".format(np.round(prediction)))
193
+ print("Act : {}".format(target))
194
+ print("+" * 5)
195
+
196
+ pred: List[ParamValue] = params.decode(prediction)
197
+ act: List[ParamValue] = params.decode(target)
198
+ pred_index: List[int] = [np.array(p.encoding).argmax() for p in pred]
199
+ act_index: List[int] = [np.array(p.encoding).argmax() for p in act]
200
+ width = 8
201
+ names = "Parameter: "
202
+ act_s = "Actual: "
203
+ pred_s = "Predicted: "
204
+ pred_i = "Pred. Indx:"
205
+ act_i = "Act. Index:"
206
+ diff_i = "Index Diff:"
207
+ for p in act:
208
+ names += p.name.rjust(width)[:width]
209
+ act_s += f"{p.value:>8.2f}"
210
+ for p in pred:
211
+ pred_s += f"{p.value:>8.2f}"
212
+ for p in pred_index:
213
+ pred_i += f"{p:>8}"
214
+ for p in act_index:
215
+ act_i += f"{p:>8}"
216
+ for i in range(len(act_index)):
217
+ diff = pred_index[i] - act_index[i]
218
+ diff_i += f"{diff:>8}"
219
+ exact = 0.0
220
+ close = 0.0
221
+ n_params = len(pred_index)
222
+ for i in range(n_params):
223
+ if pred_index[i] == act_index[i]:
224
+ exact = exact + 1.0
225
+ if abs(pred_index[i] - act_index[i]) <= precision:
226
+ close = close + 1.0
227
+ exact_ratio = exact / n_params
228
+ close_ratio = close / n_params
229
+ if print_output:
230
+ print(names)
231
+ print(act_s)
232
+ print(pred_s)
233
+ print(act_i)
234
+ print(pred_i)
235
+ print(diff_i)
236
+ print("-" * 30)
237
+ return exact_ratio, close_ratio
238
+
239
+
240
+ def evaluate(
241
+ prediction: np.ndarray,
242
+ x: np.ndarray,
243
+ y: np.ndarray,
244
+ params: ParameterSet,
245
+ ):
246
+ print("Prediction Shape: {}".format(prediction.shape))
247
+
248
+ num: int = x.shape[0]
249
+ correct: int = 0
250
+ correct_r: float = 0.0
251
+ close_r: float = 0.0
252
+ for i in range(num):
253
+ should_print = i < 5
254
+ exact, close = compare(
255
+ target=y[i],
256
+ prediction=prediction[i],
257
+ params=params,
258
+ print_output=should_print,
259
+ )
260
+ if exact == 1.0:
261
+ correct = correct + 1
262
+ correct_r += exact
263
+ close_r += close
264
+ summary = params.explain()
265
+ print(
266
+ "{} Parameters with {} levels (fixed: {})".format(
267
+ summary["n_variable"], summary["levels"], summary["n_fixed"]
268
+ )
269
+ )
270
+ print(
271
+ "Got {} out of {} ({:.1f}% perfect); Exact params: {:.1f}%, Close params: {:.1f}%".format(
272
+ correct,
273
+ num,
274
+ correct / num * 100,
275
+ correct_r / num * 100,
276
+ close_r / num * 100,
277
+ )
278
+ )
279
+
280
+
281
+ """
282
+ Wrap up the whole training process in a standard function. Gets a callback
283
+ to actually make the model, to keep it as flexible as possible.
284
+ # Params:
285
+ # - dataset_name (dataset name)
286
+ # - model_name: (C1..C6,e2e)
287
+ # - model_callback: function taking name,inputs,outputs,data_format and returning a Keras model
288
+ # - epochs: int
289
+ # - dataset_dir: place to find input data
290
+ # - output_dir: place to put outputs
291
+ # - parameters_file (override parameters filename)
292
+ # - dataset_file (override dataset filename)
293
+ # - data_format (channels_first or channels_last)
294
+ # - run_name: to save this run as
295
+ """
296
+ # LOSS TYPE FOR CUSTOM LOSS FUNCTION
297
+ LOSS_TYPE = "L1"
298
+ SPECTRO_TYPE = "spectro"
299
+ PRINT = 1
300
+
301
+ # DAWDREAMER EXPORT SETTINGS
302
+ SAMPLE_RATE = 16384
303
+ BUFFER_SIZE = 1024
304
+ SYNTH_PLUGIN = "TAL-NoiseMaker.vst3"
305
+
306
+ ENGINE = daw.RenderEngine(SAMPLE_RATE, BUFFER_SIZE)
307
+ SYNTH = ENGINE.make_plugin_processor("my_synth", SYNTH_PLUGIN)
308
+ SYNTH.add_midi_note(40, 127, 0, 0.8)
309
+
310
+ with open("plugin_config/TAL-NoiseMaker-config.json") as f:
311
+ data = json.load(f)
312
+
313
+ dico = []
314
+ # Extract the key ID from the JSON data
315
+ key_id = data["parameters"]
316
+ for param in key_id:
317
+ dico.append(param["id"])
318
+
319
+ DICO = dico
320
+
321
+
322
+ def train_model(
323
+ # Main options
324
+ dataset_name: str,
325
+ model_name: str,
326
+ epochs: int,
327
+ model_callback: Callable[[str, int, int, str], keras.Model],
328
+ dataset_dir: str,
329
+ output_dir: str, # Directory names
330
+ dataset_file: str = None,
331
+ parameters_file: str = None,
332
+ run_name: str = None,
333
+ data_format: str = "channels_last",
334
+ save_best: bool = True,
335
+ resume: bool = False,
336
+ checkpoint: bool = True,
337
+ model_type: str = "STFT",
338
+ ):
339
+ tf.config.run_functions_eagerly(True)
340
+ # tf.data.experimental.enable_debug_mode()
341
+ time_generated = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
342
+ if not dataset_file:
343
+ dataset_file = (
344
+ os.getcwd() + "/" + dataset_dir + "/" + dataset_name + "_data.hdf5"
345
+ )
346
+ if not parameters_file:
347
+ parameters_file = (
348
+ os.getcwd() + "/" + dataset_dir + "/" + dataset_name + "_params.pckl"
349
+ )
350
+ if not run_name:
351
+ run_name = dataset_name + "_" + model_name
352
+
353
+ model_file = f"{output_dir}/model/{run_name}_{time_generated}"
354
+ if not os.path.exists(model_file):
355
+ os.makedirs(model_file)
356
+ best_model_file = f"{output_dir}/best_checkpoint/{run_name}_best_{time_generated}"
357
+ if not os.path.exists(best_model_file):
358
+ os.makedirs(best_model_file)
359
+ if resume:
360
+ # checkpoint_model_file = f"{output_dir}/{run_name}_checkpoint_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}"
361
+ # history_file = f"{output_dir}/{run_name}_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}"
362
+ checkpoint_model_file = (
363
+ f"{output_dir}/checkpoints/InverSynth_C6XL_checkpoint_20240123-100644"
364
+ )
365
+ history_file = f"{output_dir}/history/InverSynth_C6XL_20240123-100644"
366
+
367
+ print(tf.config.list_physical_devices("GPU"))
368
+ gpu_avail = len(tf.config.list_physical_devices("GPU")) # True/False
369
+ cuda_gpu_avail = len(tf.config.list_physical_devices("GPU")) # True/False
370
+
371
+ print("+" * 30)
372
+ print(f"++ {run_name}")
373
+ print(
374
+ f"Running model: {model_name} on dataset {dataset_file} (parameters {parameters_file}) for {epochs} epochs"
375
+ )
376
+ print(f"Saving model in {output_dir} as {model_file}")
377
+ print(f"Saving history as {history_file}")
378
+ print(f"GPU: {gpu_avail}, with CUDA: {cuda_gpu_avail}")
379
+ print("+" * 30)
380
+
381
+ os.makedirs(output_dir, exist_ok=True)
382
+
383
+ # Get training and validation generators
384
+ params = {"data_file": dataset_file, "batch_size": 64, "shuffle": True}
385
+
386
+ model: keras.Model = None
387
+ if resume and os.path.exists(checkpoint_model_file):
388
+ history = pd.read_csv(history_file)
389
+ # Note - its zero indexed in the file, but 1 indexed in the display
390
+ initial_epoch: int = max(history.iloc[:, 0]) + 1
391
+ # epochs:int = initial_epoch
392
+ print(
393
+ f"Resuming from model file: {checkpoint_model_file} after epoch {initial_epoch}"
394
+ )
395
+ model = keras.models.load_model(
396
+ checkpoint_model_file,
397
+ custom_objects={
398
+ "top_k_mean_accuracy": top_k_mean_accuracy,
399
+ "Spectrogram": Spectrogram,
400
+ "custom_spectral_loss": custom_spectral_loss,
401
+ "CustomLoss": CustomLoss,
402
+ },
403
+ )
404
+
405
+ return model, parameters_file
406
+
407
+
408
+ def inferrence(model: keras.Model, parameters_file: str, file_path: str, file_id: str):
409
+ # Start infer
410
+
411
+ with open(parameters_file, "rb") as f:
412
+ parameters: ParameterSet = load(f)
413
+
414
+ print("++++" * 5)
415
+ print("Pushing to trained model")
416
+ print("++++" * 5)
417
+
418
+ Valid = False
419
+ while Valid == False:
420
+ namefile = file_path
421
+ if os.path.exists(namefile):
422
+ Valid = True
423
+ else:
424
+ raise("File Path invalid, try again ")
425
+
426
+ try:
427
+ newpred = model.predict(audio_importer(str(f"{namefile}")))
428
+ except:
429
+ raise "Crashed"
430
+
431
+ predlist: List[ParamValue] = parameters.decode(newpred[0])
432
+ df = pd.DataFrame(predlist)
433
+
434
+ print(df)
435
+ df = df.drop(["encoding"], axis=1)
436
+ # saving the dataframe
437
+
438
+ print("Outputting CSV config in " + str(f"temp/"))
439
+
440
+ csv_path = str(f"temp/{file_id}_config.csv")
441
+
442
+ xml_path_wow = (f"temp/{file_id}_config.noisemakerpreset")
443
+ df.to_csv(csv_path)
444
+
445
+ xml_path = convert_csv_to_preset(csv_path, xml_path_wow)
446
+ # export(prediction, X, y, parameters)
447
+ # Loop through the rows of the DataFrame
448
+ i = 0
449
+ for values in df["value"].values:
450
+ # Set parameters using DataFrame values
451
+ SYNTH.set_parameter(DICO[i], values)
452
+ # (MIDI note, velocity, start, duration)
453
+ i += 1
454
+ # Setting volume to 0.9
455
+ SYNTH.set_parameter(1, 0.9)
456
+ # Set up the processing graph
457
+ graph = [
458
+ # synth takes no inputs, so we give an empty list.
459
+ (SYNTH, []),
460
+ ]
461
+
462
+ ENGINE.load_graph(graph)
463
+ ENGINE.render(1)
464
+ data = ENGINE.get_audio()
465
+ try:
466
+ data = librosa.to_mono(data).transpose()
467
+ except:
468
+ tf.print("ERROR" * 100)
469
+ df = df.fillna(0)
470
+ data = df.to_numpy()
471
+ data = librosa.to_mono(data).transpose()
472
+ tf.print("crashed, nan in generation")
473
+ synth_params = dict(SYNTH.get_patch())
474
+ print(synth_params)
475
+
476
+ df = pd.DataFrame(data)
477
+
478
+ # penalty=1000000
479
+ # df = pd.DataFrame(data)
480
+ # df = df.fillna(0)
481
+ # data = df.to_numpy()
482
+
483
+ output_file_path = str(f"temp/{file_id}_generated.wav")
484
+
485
+ wavfile.write(output_file_path, SAMPLE_RATE, data)
486
+
487
+ return file_path, xml_path, output_file_path
488
+
489
+
490
+ def generate_audio(df_params):
491
+ # Loop through the rows of the DataFrame
492
+ i = 0
493
+ penalty = 1
494
+ for param in df_params:
495
+ # Set parameters using DataFrame values
496
+ SYNTH.set_parameter(DICO[i], param.value)
497
+ # (MIDI note, velocity, start, duration)
498
+ i += 1
499
+ # Set up the processing graph
500
+ graph = [
501
+ # synth takes no inputs, so we give an empty list.
502
+ (SYNTH, []),
503
+ ]
504
+
505
+ ENGINE.load_graph(graph)
506
+ ENGINE.render(1)
507
+ data = ENGINE.get_audio()
508
+ try:
509
+ data = librosa.to_mono(data).transpose()
510
+ except:
511
+ print("ERROR" * 100)
512
+ df = pd.DataFrame(data)
513
+ df = df.fillna(0)
514
+ data = df.to_numpy()
515
+ data = librosa.to_mono(data).transpose()
516
+
517
+ result = np.array(data)
518
+ return result, penalty
back/models/runner.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+
3
+ from models.launch import train_model
4
+ from models.spectrogram_cnn import get_model as get_spectrogram
5
+
6
+
7
+ def standard_run_parser() -> argparse.ArgumentParser:
8
+ parser = argparse.ArgumentParser(
9
+ description="Setup and train a model, storing the output"
10
+ )
11
+ parser.add_argument(
12
+ "--model",
13
+ dest="model_name",
14
+ type=str,
15
+ choices=["C1", "C2", "C3", "C4", "C5", "C6", "C6XL", "e2e"],
16
+ default="e2e",
17
+ help="Model architecture to run",
18
+ )
19
+ parser.add_argument(
20
+ "--dataset_name",
21
+ default="InverSynth",
22
+ help='Name of the dataset to use - other filenames are generated from this. If you have a file "modelname_data.hdf5", put in "modelname"',
23
+ )
24
+ parser.add_argument(
25
+ "--epochs", type=int, default=100, help="How many epochs to run"
26
+ )
27
+ parser.add_argument(
28
+ "--dataset_dir",
29
+ default="test_datasets",
30
+ help="Directory full of datasets to use",
31
+ )
32
+ parser.add_argument(
33
+ "--output_dir",
34
+ default="output",
35
+ help="Directory to store the final model and history",
36
+ )
37
+ parser.add_argument(
38
+ "--dataset_file", default=None, help="Specify an exact dataset file to use"
39
+ )
40
+ parser.add_argument(
41
+ "--parameters_file",
42
+ default=None,
43
+ help="Specify an exact parameters file to use",
44
+ )
45
+ parser.add_argument(
46
+ "--data_format",
47
+ type=str,
48
+ choices=["channels_last", "channels_first"],
49
+ default="channels_last",
50
+ help="Image data format for Keras. If CPU only, has to be channels_last",
51
+ )
52
+ parser.add_argument(
53
+ "--run_name",
54
+ type=str,
55
+ dest="run_name",
56
+ help="Name to save the output under. Defaults to dataset_name + model",
57
+ )
58
+ parser.add_argument(
59
+ "--resume",
60
+ dest="resume",
61
+ action="store_const",
62
+ const=True,
63
+ default=False,
64
+ help="Look for a checkpoint file to resume from",
65
+ )
66
+ return parser
67
+
68
+
69
+ if __name__ == "__main__":
70
+
71
+ print("Starting model runner")
72
+ # Get a standard parser, and the arguments out of it
73
+ parser = standard_run_parser()
74
+ args = parser.parse_args()
75
+ setup = vars(args)
76
+
77
+ print("Parsed arguments")
78
+ # Figure out the model callback
79
+ model_callback = get_spectrogram
80
+
81
+ # Actually train the model
82
+ train_model(model_callback=model_callback, **setup)
back/models/spectrogram_cnn.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ # import keras
4
+ from kapre.time_frequency import Spectrogram
5
+ from tensorflow import keras
6
+
7
+ from generators.generator import *
8
+ from models.common.architectures import layers_map
9
+
10
+
11
+ """
12
+ The STFT spectrogram of the input signal is fed
13
+ into a 2D CNN that predicts the synthesizer parameter
14
+ configuration. This configuration is then used to produce
15
+ a sound that is similar to the input sound.
16
+ """
17
+
18
+
19
+ """Model Architecture"""
20
+ # @ paper:
21
+ # 1 2D Strided Convolution Layer C(38,13,26,13,26)
22
+ # where C(F,K1,K2,S1,S2) stands for a ReLU activated
23
+ # 2D strided convolutional layer with F filters in size of (K1,K2)
24
+ # and strides (S1,S2).
25
+
26
+
27
+ def assemble_model(
28
+ src: np.ndarray,
29
+ n_outputs: int,
30
+ arch_layers: list,
31
+ n_dft: int = 512, # Orig:128
32
+ n_hop: int = 256, #  Orig:64
33
+ data_format: str = "channels_first",
34
+ ) -> keras.Model:
35
+
36
+ inputs = keras.Input(shape=src.shape, name="stft")
37
+
38
+ # @paper: Spectrogram based CNN that receives the (log) spectrogram matrix as input
39
+
40
+ # @kapre:
41
+ # abs(Spectrogram) in a shape of 2D data, i.e.,
42
+ # `(None, n_channel, n_freq, n_time)` if `'channels_first'`,
43
+ # `(None, n_freq, n_time, n_channel)` if `'channels_last'`,
44
+ x = Spectrogram(
45
+ n_dft=n_dft,
46
+ n_hop=n_hop,
47
+ input_shape=src.shape,
48
+ trainable_kernel=True,
49
+ name="static_stft",
50
+ image_data_format=data_format,
51
+ return_decibel_spectrogram=True,
52
+ )(inputs)
53
+
54
+ # Swaps order to match the paper?
55
+ # TODO: dig in to this (GPU only?)
56
+ if data_format == "channels_first": # n_channel, n_freq, n_time)
57
+ x = keras.layers.Permute((1, 3, 2))(x)
58
+ else:
59
+ x = keras.layers.Permute((2, 1, 3))(x)
60
+
61
+ # x = keras.layers.Conv2D(64,(3,3),strides=(2,2),activation="relu",data_format="channels_last", padding='same')(x)
62
+ # x = keras.layers.Conv2D(128,(3,3),strides=(2,2),activation="relu",data_format="channels_last", padding='same')(x)
63
+ # x = keras.layers.Conv2D(128,(3,4),strides=(2,3),activation="relu",data_format="channels_last", padding='same')(x)
64
+ # x = keras.layers.Conv2D(128,(3,3),strides=(2,2),activation="relu",data_format="channels_last", padding='same')(x)
65
+ # x = keras.layers.Conv2D(256,(3,3),strides=(2,2),activation="relu",data_format="channels_last", padding='same')(x)
66
+ # x = keras.layers.Conv2D(256,(3,3),strides=(1,2),activation="relu",data_format="channels_last", padding='same')(x)
67
+ for arch_layer in arch_layers:
68
+ x = keras.layers.Conv2D(
69
+ arch_layer.filters,
70
+ arch_layer.window_size,
71
+ strides=arch_layer.strides,
72
+ activation=arch_layer.activation,
73
+ data_format=data_format,
74
+ padding='same'
75
+ )(x)
76
+
77
+ # Flatten down to a single dimension
78
+ x = keras.layers.Flatten()(x)
79
+
80
+ # @paper: sigmoid activations with binary cross entropy loss
81
+ # @paper: FC-512
82
+ x = keras.layers.Dense(512)(x)
83
+
84
+ # @paper: FC-368(sigmoid)
85
+ outputs = keras.layers.Dense(n_outputs, activation="sigmoid", name="predictions")(x)
86
+
87
+ return keras.Model(inputs=inputs, outputs=outputs)
88
+
89
+
90
+ """
91
+ Standard callback to get a model ready to train
92
+ """
93
+
94
+
95
+ def get_model(
96
+ model_name: str, inputs: int, outputs: int, data_format: str = "channels_last"
97
+ ) -> keras.Model:
98
+ arch_layers = layers_map.get("C1")
99
+ if model_name in layers_map:
100
+ arch_layers = layers_map.get(model_name)
101
+ else:
102
+ print(
103
+ f"Warning: {model_name} is not compatible with the spectrogram model. C1 Architecture will be used instead."
104
+ )
105
+ return assemble_model(
106
+ np.zeros([1, inputs]),
107
+ n_outputs=outputs,
108
+ arch_layers=arch_layers,
109
+ data_format=data_format,
110
+ )
111
+
112
+
113
+ if __name__ == "__main__":
114
+
115
+ from models.launch import train_model, inferrence
116
+ from models.runner import standard_run_parser
117
+
118
+ # Get a standard parser, and the arguments out of it
119
+ parser = standard_run_parser()
120
+ args = parser.parse_args()
121
+ setup = vars(args)
122
+ print(setup)
123
+ # distinguish model type for reshaping
124
+ setup["model_type"] = "STFT"
125
+ # tf.config.run_functions_eagerly(True)
126
+ # Actually train the model
127
+
128
+ model, parameters_file = train_model(model_callback=get_model, **setup)
129
+
130
+ file_path, csv_path = inferrence(model, parameters_file)
131
+
132
+ print(file_path)
133
+
134
+ print(csv_path)
back/output.xml ADDED
@@ -0,0 +1 @@
 
 
1
+ <tal curprogram="0" version="1.7" presetName="CH Chordionator III FN" path="Factory Presets/CHORD/CH Chordionator III FN.noisemakerpreset"><programs><program programname="CH Chordionator III FN" unknown="0.5" volume="0" -="0" filtertype="0.63636364" cutoff="0.6666666666666666" resonance="0.9333333333333333" keyfollow="0.26666666666666666" filtercontour="0.06666666666666667" filterattack="0.13333333333333333" filterdecay="0.26666666666666666" filtersustain="0.3333333333333333" filterrelease="0.13333333333333333" ampattack="0.8" ampdecay="1.0" ampsustain="0.4666666666666667" amprelease="0.13333333333333333" osc1volume="0.26666666666666666" osc2volume="0.9333333333333333" osc3volume="0.3333333333333333" oscmastertune="0" osc1tune="0.6" osc2tune="0.8" osc1finetune="0.6666666666666666" osc2finetune="0.2" osc1waveform="0.0" osc2waveform="0.5" oscsync="1.0" lfo1waveform="0.6" lfo2waveform="1.0" lfo1rate="0.6666666666666666" lfo2rate="0.9333333333333333" lfo1amount="0.06666666666666667" lfo2amount="1.0" lfo1destination="0.2857142857142857" lfo2destination="0.14285714285714285" lfo1phase="0.8666666666666667" lfo2phase="0.9333333333333333" osc2fm="0.4" osc2phase="0.4666666666666667" osc1pw="0.7333333333333333" osc1phase="0.5333333333333333" transpose="0" freeadattack="0.3333333333333333" freeaddecay="0.26666666666666666" freeadamount="0.5333333333333333" freeaddestination="0.2" lfo1sync="1.0" lfo1keytrigger="0" lfo2sync="0.0" lfo2keytrigger="0" portamento="0" portamentomode="0" voices="0" velocityvolume="0" velocitycontour="0" velocitycutoff="0" pitchwheelcutoff="0" pitchwheelpitch="0" ringmodulation="0.4666666666666667" chorus1enable="0.0" chorus2enable="1.0" reverbwet="0.4" reverbdecay="0.8666666666666667" reverbpredelay="0.5333333333333333" reverbhighcut="0.4666666666666667" reverblowcut="0.4666666666666667" oscbitcrusher="0.2" highpass="0.06666666666666667" detune="0.13333333333333333" vintagenoise="0.9333333333333333" envelopeeditordest1="0" envelopeeditorspeed="0" envelopeeditoramount="0" envelopeoneshot="0" envelopefixtempo="0" filterdrive="0.0" delaywet="0.13333333333333333" delaytime="1.0" delaysync="1.0" delayfactorl="0.0" delayfactorr="0.0" delayhighshelf="0.4666666666666667" delaylowshelf="1.0" delayfeedback="0.5333333333333333" /></programs><midimap /></tal>
back/plugin_config/TAL-NoiseMaker-config.json ADDED
@@ -0,0 +1,422 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "parameters": [
3
+ {
4
+ "id": 11,
5
+ "name": "Amp Attack",
6
+ "values": "-"
7
+ },
8
+ {
9
+ "id": 12,
10
+ "name": "Amp Decay",
11
+ "values": "-"
12
+ },
13
+ {
14
+ "id": 14,
15
+ "name": "Amp Release",
16
+ "values": "-"
17
+ },
18
+ {
19
+ "id": 13,
20
+ "name": "Amp Sustain",
21
+ "values": "-"
22
+ },
23
+ {
24
+ "id": 58,
25
+ "name": "Chorus 1 Enable",
26
+ "values": [0.0, 1.0]
27
+ },
28
+ {
29
+ "id": 59,
30
+ "name": "Chorus 2 Enable",
31
+ "values": [0.0, 1.0]
32
+ },
33
+ {
34
+ "id": 3,
35
+ "name": "Filter Cutoff",
36
+ "values": "-"
37
+ },
38
+ {
39
+ "id": 81,
40
+ "name": "Delay x2 L",
41
+ "values": [0.0, 1.0]
42
+ },
43
+ {
44
+ "id": 82,
45
+ "name": "Delay x2 R",
46
+ "values": [0.0, 1.0]
47
+ },
48
+ {
49
+ "id": 85,
50
+ "name": "Delay Feedback",
51
+ "values": "-"
52
+ },
53
+ {
54
+ "id": 83,
55
+ "name": "Delay High Shelf",
56
+ "values": "-"
57
+ },
58
+ {
59
+ "id": 84,
60
+ "name": "Delay Low Shelf",
61
+ "values": "-"
62
+ },
63
+ {
64
+ "id": 80,
65
+ "name": "Delay Sync",
66
+ "values": [0.0, 1.0]
67
+ },
68
+ {
69
+ "id": 79,
70
+ "name": "Delay Time",
71
+ "values": "-"
72
+ },
73
+ {
74
+ "id": 78,
75
+ "name": "Delay Wet",
76
+ "values": "-"
77
+ },
78
+ {
79
+ "id": 67,
80
+ "name": "Master Detune",
81
+ "values": "-"
82
+ },
83
+ {
84
+ "id": 7,
85
+ "name": "Filter Attack",
86
+ "values": "-"
87
+ },
88
+ {
89
+ "id": 6,
90
+ "name": "Filter Contour",
91
+ "values": "-"
92
+ },
93
+ {
94
+ "id": 8,
95
+ "name": "Filter Decay",
96
+ "values": "-"
97
+ },
98
+ {
99
+ "id": 77,
100
+ "name": "Filter Drive",
101
+ "values": "-"
102
+ },
103
+ {
104
+ "id": 10,
105
+ "name": "Filter Release",
106
+ "values": "-"
107
+ },
108
+ {
109
+ "id": 9,
110
+ "name": "Filter Sustain",
111
+ "values": "-"
112
+ },
113
+ {
114
+ "id": 2,
115
+ "name": "Filter Type",
116
+ "values": [0.0, 0.09090909, 0.18181818, 0.27272727, 0.36363636, 0.45454545, 0.54545455, 0.63636364, 0.72727273, 0.81818182, 0.90909091, 1.0]
117
+ },
118
+ {
119
+ "id": 43,
120
+ "name": "Free Ad Amount",
121
+ "values": "-"
122
+ },
123
+ {
124
+ "id": 41,
125
+ "name": "Free Ad Attack",
126
+ "values": "-"
127
+ },
128
+ {
129
+ "id": 42,
130
+ "name": "Free Ad Decay",
131
+ "values": "-"
132
+ },
133
+ {
134
+ "id": 44,
135
+ "name": "Free Ad Destination",
136
+ "values": [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]
137
+ },
138
+ {
139
+ "id": 66,
140
+ "name": "Master High Pass",
141
+ "values": "-"
142
+ },
143
+ {
144
+ "id": 5,
145
+ "name": "Filter Keyfollow",
146
+ "values": "-"
147
+ },
148
+ {
149
+ "id": 30,
150
+ "name": "Lfo 1 Amount",
151
+ "values": "-"
152
+ },
153
+ {
154
+ "id": 32,
155
+ "name": "Lfo 1 Destination",
156
+ "values": [0.0, 0.14285714285714285, 0.2857142857142857, 0.42857142857142855, 0.5714285714285714, 0.7142857142857142, 0.8571428571428571, 1.0]
157
+ },
158
+ {
159
+ "id": 34,
160
+ "name": "Lfo 1 Phase",
161
+ "values": "-"
162
+ },
163
+ {
164
+ "id": 28,
165
+ "name": "Lfo 1 Rate",
166
+ "values": "-"
167
+ },
168
+ {
169
+ "id": 45,
170
+ "name": "Lfo 1 Sync",
171
+ "values": [0.0, 1.0]
172
+ },
173
+ {
174
+ "id": 26,
175
+ "name": "Lfo 1 Waveform",
176
+ "values": [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]
177
+ },
178
+ {
179
+ "id": 31,
180
+ "name": "Lfo 2 Amount",
181
+ "values": "-"
182
+ },
183
+ {
184
+ "id": 33,
185
+ "name": "Lfo 2 Destination",
186
+ "values": [0.0, 0.14285714285714285, 0.2857142857142857, 0.42857142857142855, 0.5714285714285714, 0.7142857142857142, 0.8571428571428571, 1.0]
187
+ },
188
+ {
189
+ "id": 35,
190
+ "name": "Lfo 2 Phase",
191
+ "values": "-"
192
+ },
193
+ {
194
+ "id": 29,
195
+ "name": "Lfo 2 Rate",
196
+ "values": "-"
197
+ },
198
+ {
199
+ "id": 47,
200
+ "name": "Lfo 2 Sync",
201
+ "values": [0.0, 1.0]
202
+ },
203
+ {
204
+ "id": 27,
205
+ "name": "Lfo 2 Waveform",
206
+ "values": [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]
207
+ },
208
+ {
209
+ "id": 21,
210
+ "name": "Osc 1 Fine Tune",
211
+ "values": "-"
212
+ },
213
+ {
214
+ "id": 39,
215
+ "name": "Osc 1 Phase",
216
+ "values": "-"
217
+ },
218
+ {
219
+ "id": 38,
220
+ "name": "Osc 1 PW",
221
+ "values": "-"
222
+ },
223
+ {
224
+ "id": 19,
225
+ "name": "Osc 1 Tune",
226
+ "values": "-"
227
+ },
228
+ {
229
+ "id": 15,
230
+ "name": "Osc 1 Volume",
231
+ "values": "-"
232
+ },
233
+ {
234
+ "id": 23,
235
+ "name": "Osc 1 Waveform",
236
+ "values":[0.0, 0.5, 1.0]
237
+ },
238
+ {
239
+ "id": 22,
240
+ "name": "Osc 2 Fine Tune",
241
+ "values": "-"
242
+ },
243
+ {
244
+ "id": 36,
245
+ "name": "Osc 2 FM",
246
+ "values": "-"
247
+ },
248
+ {
249
+ "id": 37,
250
+ "name": "Osc 2 Phase",
251
+ "values": "-"
252
+ },
253
+ {
254
+ "id": 20,
255
+ "name": "Osc 2 Tune",
256
+ "values": "-"
257
+ },
258
+ {
259
+ "id": 16,
260
+ "name": "Osc 2 Volume",
261
+ "values": "-"
262
+ },
263
+ {
264
+ "id": 24,
265
+ "name": "Osc 2 Waveform",
266
+ "values":[0.0, 0.5, 1.0]
267
+ },
268
+ {
269
+ "id": 17,
270
+ "name": "Osc 3 Volume",
271
+ "values": "-"
272
+ },
273
+ {
274
+ "id": 65,
275
+ "name": "Osc Bitcrusher",
276
+ "values": "-"
277
+ },
278
+ {
279
+ "id": 25,
280
+ "name": "Osc Sync",
281
+ "values": [0.0, 1.0]
282
+ },
283
+ {
284
+ "id": 4,
285
+ "name": "Filter Resonance",
286
+ "values": "-"
287
+ },
288
+ {
289
+ "id": 61,
290
+ "name": "Reverb Decay",
291
+ "values": "-"
292
+ },
293
+ {
294
+ "id": 63,
295
+ "name": "Reverb High Cut",
296
+ "values": "-"
297
+ },
298
+ {
299
+ "id": 64,
300
+ "name": "Reverb Low Cut",
301
+ "values": "-"
302
+ },
303
+ {
304
+ "id": 62,
305
+ "name": "Reverb Pre Delay",
306
+ "values": "-"
307
+ },
308
+ {
309
+ "id": 60,
310
+ "name": "Reverb Wet",
311
+ "values": "-"
312
+ },
313
+ {
314
+ "id": 57,
315
+ "name": "Ringmodulation",
316
+ "values": "-"
317
+ },
318
+ {
319
+ "id": 68,
320
+ "name": "Vintage Noise",
321
+ "values": "-"
322
+ }
323
+ ],
324
+
325
+ "fixed_parameters": [
326
+ {
327
+ "id": 73,
328
+ "name": "Envelope Amount",
329
+ "value": 0.0
330
+ },
331
+ {
332
+ "id": 71,
333
+ "name": "Envelope Destination",
334
+ "value": 0.0
335
+ },
336
+ {
337
+ "id": 72,
338
+ "name": "Envelope Speed",
339
+ "value": 0.0
340
+ },
341
+ {
342
+ "id": 75,
343
+ "name": "Envelope Fix Tempo",
344
+ "value": 0.0
345
+ },
346
+ {
347
+ "id": 74,
348
+ "name": "Envelope One Shot Mode",
349
+ "value": 0.0
350
+ },
351
+ {
352
+ "id": 46,
353
+ "name": "Lfo 1 Keytrigger",
354
+ "value": 0.0
355
+ },
356
+ {
357
+ "id": 48,
358
+ "name": "Lfo 2 Keytrigger",
359
+ "value": 0.0
360
+ },
361
+ {
362
+ "id": 18,
363
+ "name": "Osc Mastertune",
364
+ "value": 0.5
365
+ },
366
+ {
367
+ "id": 55,
368
+ "name": "Pitchwheel Cutoff",
369
+ "value": 0.0
370
+ },
371
+ {
372
+ "id": 56,
373
+ "name": "Pitchwheel Pitch",
374
+ "value": 0.0
375
+ },
376
+ {
377
+ "id": 49,
378
+ "name": "Portamento Amount",
379
+ "value": 0.0
380
+ },
381
+ {
382
+ "id": 50,
383
+ "name": "Portamento Mode",
384
+ "value": 0.0
385
+ },
386
+ {
387
+ "id": 40,
388
+ "name": "Transpose",
389
+ "value": 0.5
390
+ },
391
+ {
392
+ "id": 53,
393
+ "name": "Velocity Contour",
394
+ "value": 0.0
395
+ },
396
+ {
397
+ "id": 54,
398
+ "name": "Velocity Filter",
399
+ "value": 0.0
400
+ },
401
+ {
402
+ "id": 52,
403
+ "name": "Velocity Volume",
404
+ "value": 0.0
405
+ },
406
+ {
407
+ "id": 51,
408
+ "name": "Voices",
409
+ "value": 0.0
410
+ },
411
+ {
412
+ "id": 1,
413
+ "name": "Master Volume",
414
+ "value": 0.6
415
+ },
416
+ {
417
+ "id": 76,
418
+ "name": "Envelope Reset",
419
+ "value": 0.0
420
+ }
421
+ ]
422
+ }
back/plugin_config/gen_config_libTAL-NoiseMaker.so.json ADDED
@@ -0,0 +1,435 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "parameters": [
3
+ {
4
+ "id": 0,
5
+ "name": "-",
6
+ "value": 0.0
7
+ },
8
+ {
9
+ "id": 1,
10
+ "name": "Master Volume",
11
+ "value": 0.0
12
+ },
13
+ {
14
+ "id": 2,
15
+ "name": "Filter Type",
16
+ "value": 0.0
17
+ },
18
+ {
19
+ "id": 3,
20
+ "name": "Filter Cutoff",
21
+ "value": 0.0
22
+ },
23
+ {
24
+ "id": 4,
25
+ "name": "Filter Resonance",
26
+ "value": 0.0
27
+ },
28
+ {
29
+ "id": 5,
30
+ "name": "Filter Keyfollow",
31
+ "value": 0.0
32
+ },
33
+ {
34
+ "id": 6,
35
+ "name": "Filter Contour",
36
+ "value": 0.0
37
+ },
38
+ {
39
+ "id": 7,
40
+ "name": "Filter Attack",
41
+ "value": 0.0
42
+ },
43
+ {
44
+ "id": 8,
45
+ "name": "Filter Decay",
46
+ "value": 0.0
47
+ },
48
+ {
49
+ "id": 9,
50
+ "name": "Filter Sustain",
51
+ "value": 0.0
52
+ },
53
+ {
54
+ "id": 10,
55
+ "name": "Filter Release",
56
+ "value": 0.0
57
+ },
58
+ {
59
+ "id": 11,
60
+ "name": "Amp Attack",
61
+ "value": 0.0
62
+ },
63
+ {
64
+ "id": 12,
65
+ "name": "Amp Decay",
66
+ "value": 0.0
67
+ },
68
+ {
69
+ "id": 13,
70
+ "name": "Amp Sustain",
71
+ "value": 0.0
72
+ },
73
+ {
74
+ "id": 14,
75
+ "name": "Amp Release",
76
+ "value": 0.0
77
+ },
78
+ {
79
+ "id": 15,
80
+ "name": "Osc 1 Volume",
81
+ "value": 0.0
82
+ },
83
+ {
84
+ "id": 16,
85
+ "name": "Osc 2 Volume",
86
+ "value": 0.0
87
+ },
88
+ {
89
+ "id": 17,
90
+ "name": "Osc 3 Volume",
91
+ "value": 0.0
92
+ },
93
+ {
94
+ "id": 18,
95
+ "name": "Osc Mastertune",
96
+ "value": 0.0
97
+ },
98
+ {
99
+ "id": 19,
100
+ "name": "Osc 1 Tune",
101
+ "value": 0.0
102
+ },
103
+ {
104
+ "id": 20,
105
+ "name": "Osc 2 Tune",
106
+ "value": 0.0
107
+ },
108
+ {
109
+ "id": 21,
110
+ "name": "Osc 1 Fine Tune",
111
+ "value": 0.0
112
+ },
113
+ {
114
+ "id": 22,
115
+ "name": "Osc 2 Fine Tune",
116
+ "value": 0.0
117
+ },
118
+ {
119
+ "id": 23,
120
+ "name": "Osc 1 Waveform",
121
+ "value": 0.0
122
+ },
123
+ {
124
+ "id": 24,
125
+ "name": "Osc 2 Waveform",
126
+ "value": 0.0
127
+ },
128
+ {
129
+ "id": 25,
130
+ "name": "Osc Sync",
131
+ "value": 0.0
132
+ },
133
+ {
134
+ "id": 26,
135
+ "name": "Lfo 1 Waveform",
136
+ "value": 0.0
137
+ },
138
+ {
139
+ "id": 27,
140
+ "name": "Lfo 2 Waveform",
141
+ "value": 0.0
142
+ },
143
+ {
144
+ "id": 28,
145
+ "name": "Lfo 1 Rate",
146
+ "value": 0.0
147
+ },
148
+ {
149
+ "id": 29,
150
+ "name": "Lfo 2 Rate",
151
+ "value": 0.0
152
+ },
153
+ {
154
+ "id": 30,
155
+ "name": "Lfo 1 Amount",
156
+ "value": 0.0
157
+ },
158
+ {
159
+ "id": 31,
160
+ "name": "Lfo 2 Amount",
161
+ "value": 0.0
162
+ },
163
+ {
164
+ "id": 32,
165
+ "name": "Lfo 1 Destination",
166
+ "value": 0.0
167
+ },
168
+ {
169
+ "id": 33,
170
+ "name": "Lfo 2 Destination",
171
+ "value": 0.0
172
+ },
173
+ {
174
+ "id": 34,
175
+ "name": "Lfo 1 Phase",
176
+ "value": 0.0
177
+ },
178
+ {
179
+ "id": 35,
180
+ "name": "Lfo 2 Phase",
181
+ "value": 0.0
182
+ },
183
+ {
184
+ "id": 36,
185
+ "name": "Osc 2 FM",
186
+ "value": 0.0
187
+ },
188
+ {
189
+ "id": 37,
190
+ "name": "Osc 2 Phase",
191
+ "value": 0.0
192
+ },
193
+ {
194
+ "id": 38,
195
+ "name": "Osc 1 PW",
196
+ "value": 0.0
197
+ },
198
+ {
199
+ "id": 39,
200
+ "name": "Osc 1 Phase",
201
+ "value": 0.0
202
+ },
203
+ {
204
+ "id": 40,
205
+ "name": "Transpose",
206
+ "value": 0.0
207
+ },
208
+ {
209
+ "id": 41,
210
+ "name": "Free Ad Attack",
211
+ "value": 0.0
212
+ },
213
+ {
214
+ "id": 42,
215
+ "name": "Free Ad Decay",
216
+ "value": 0.0
217
+ },
218
+ {
219
+ "id": 43,
220
+ "name": "Free Ad Amount",
221
+ "value": 0.0
222
+ },
223
+ {
224
+ "id": 44,
225
+ "name": "Free Ad Destination",
226
+ "value": 0.0
227
+ },
228
+ {
229
+ "id": 45,
230
+ "name": "Lfo 1 Sync",
231
+ "value": 0.0
232
+ },
233
+ {
234
+ "id": 46,
235
+ "name": "Lfo 1 Keytrigger",
236
+ "value": 0.0
237
+ },
238
+ {
239
+ "id": 47,
240
+ "name": "Lfo 2 Sync",
241
+ "value": 0.0
242
+ },
243
+ {
244
+ "id": 48,
245
+ "name": "Lfo 2 Keytrigger",
246
+ "value": 0.0
247
+ },
248
+ {
249
+ "id": 49,
250
+ "name": "Portamento Amount",
251
+ "value": 0.0
252
+ },
253
+ {
254
+ "id": 50,
255
+ "name": "Portamento Mode",
256
+ "value": 0.0
257
+ },
258
+ {
259
+ "id": 51,
260
+ "name": "Voices",
261
+ "value": 0.0
262
+ },
263
+ {
264
+ "id": 52,
265
+ "name": "Velocity Volume",
266
+ "value": 0.0
267
+ },
268
+ {
269
+ "id": 53,
270
+ "name": "Velocity Contour",
271
+ "value": 0.0
272
+ },
273
+ {
274
+ "id": 54,
275
+ "name": "Velocity Filter",
276
+ "value": 0.0
277
+ },
278
+ {
279
+ "id": 55,
280
+ "name": "Pitchwheel Cutoff",
281
+ "value": 0.0
282
+ },
283
+ {
284
+ "id": 56,
285
+ "name": "Pitchwheel Pitch",
286
+ "value": 0.0
287
+ },
288
+ {
289
+ "id": 57,
290
+ "name": "Ringmodulation",
291
+ "value": 0.0
292
+ },
293
+ {
294
+ "id": 58,
295
+ "name": "Chorus 1 Enable",
296
+ "value": 0.0
297
+ },
298
+ {
299
+ "id": 59,
300
+ "name": "Chorus 2 Enable",
301
+ "value": 0.0
302
+ },
303
+ {
304
+ "id": 60,
305
+ "name": "Reverb Wet",
306
+ "value": 0.0
307
+ },
308
+ {
309
+ "id": 61,
310
+ "name": "Reverb Decay",
311
+ "value": 0.0
312
+ },
313
+ {
314
+ "id": 62,
315
+ "name": "Reverb Pre Delay",
316
+ "value": 0.0
317
+ },
318
+ {
319
+ "id": 63,
320
+ "name": "Reverb High Cut",
321
+ "value": 0.0
322
+ },
323
+ {
324
+ "id": 64,
325
+ "name": "Reverb Low Cut",
326
+ "value": 0.0
327
+ },
328
+ {
329
+ "id": 65,
330
+ "name": "Osc Bitcrusher",
331
+ "value": 0.0
332
+ },
333
+ {
334
+ "id": 66,
335
+ "name": "Master High Pass",
336
+ "value": 0.0
337
+ },
338
+ {
339
+ "id": 67,
340
+ "name": "Master Detune",
341
+ "value": 0.0
342
+ },
343
+ {
344
+ "id": 68,
345
+ "name": "Vintage Noise",
346
+ "value": 0.0
347
+ },
348
+ {
349
+ "id": 69,
350
+ "name": "Panic",
351
+ "value": 0.0
352
+ },
353
+ {
354
+ "id": 70,
355
+ "name": "MIDI LEARN",
356
+ "value": 0.0
357
+ },
358
+ {
359
+ "id": 71,
360
+ "name": "Envelope Destination",
361
+ "value": 0.0
362
+ },
363
+ {
364
+ "id": 72,
365
+ "name": "Envelope Speed",
366
+ "value": 0.0
367
+ },
368
+ {
369
+ "id": 73,
370
+ "name": "Envelope Amount",
371
+ "value": 0.0
372
+ },
373
+ {
374
+ "id": 74,
375
+ "name": "Envelope One Shot Mode",
376
+ "value": 0.0
377
+ },
378
+ {
379
+ "id": 75,
380
+ "name": "Envelope Fix Tempo",
381
+ "value": 0.0
382
+ },
383
+ {
384
+ "id": 76,
385
+ "name": "Envelope Reset",
386
+ "value": 0.0
387
+ },
388
+ {
389
+ "id": 77,
390
+ "name": "Filter Drive",
391
+ "value": 0.0
392
+ },
393
+ {
394
+ "id": 78,
395
+ "name": "Delay Wet",
396
+ "value": 0.0
397
+ },
398
+ {
399
+ "id": 79,
400
+ "name": "Delay Time",
401
+ "value": 0.0
402
+ },
403
+ {
404
+ "id": 80,
405
+ "name": "Delay Sync",
406
+ "value": 0.0
407
+ },
408
+ {
409
+ "id": 81,
410
+ "name": "Delay x2 L",
411
+ "value": 0.0
412
+ },
413
+ {
414
+ "id": 82,
415
+ "name": "Delay x2 R",
416
+ "value": 0.0
417
+ },
418
+ {
419
+ "id": 83,
420
+ "name": "Delay High Shelf",
421
+ "value": 0.0
422
+ },
423
+ {
424
+ "id": 84,
425
+ "name": "Delay Low Shelf",
426
+ "value": 0.0
427
+ },
428
+ {
429
+ "id": 85,
430
+ "name": "Delay Feedback",
431
+ "value": 0.0
432
+ }
433
+ ],
434
+ "fixed_parameters": []
435
+ }
back/requirements.txt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wheel
2
+ fastapi
3
+ uvicorn[standard]
4
+ python-multipart
5
+ python-dotenv
6
+ aiofiles
7
+ torch
8
+ torchaudio
9
+ torchmetrics
10
+ torchvision
11
+ dataclasses
12
+ dawdreamer
13
+ matplotlib
14
+ pandas
15
+ samplerate
16
+ tensorboard
17
+ tensorflow-estimator
18
+ tensorflow
19
+ scikit-learn
20
+ scipy
21
+ numpy
22
+ numba
23
+ kapre==0.1.7
24
+ keras-applications
25
+ keras-preprocessing
26
+ keras
27
+ librosa
28
+ h5py
back/utils/export_to_excel.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ file = pd.read_csv(f"output/InverSynth_C6XL_20231201-103344") #Read the dataframe
4
+ file.to_excel(f'foo.xlsx', index=False) #Save the dataframe
back/utils/import csv.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+
3
+ # Open the CSV file
4
+ with open('InverSynth_00006.wav.csv', 'r') as csvfile:
5
+ reader = csv.reader(csvfile)
6
+
7
+ # Skip the header row
8
+ next(reader)
9
+
10
+ # Loop through the rows
11
+ for row in reader:
12
+ # Get the floating-point number from the third column
13
+ value = float(row[2])
14
+
15
+ # Do something with the value
16
+ print(f"The value is {value}")
back/utils/import json.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ # Load the JSON data from a file
4
+ with open('plugin_config/TAL-NoiseMaker-config.json') as f:
5
+ data = json.load(f)
6
+
7
+ dico=[]
8
+ # Extract the key ID from the JSON data
9
+ key_id = data['parameters']
10
+ for param in key_id:
11
+ dico.append(param['id'])
12
+
13
+ print(dico)
back/utils/synth.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ import contextlib
5
+ import glob
6
+ import io
7
+ import json
8
+ import logging
9
+ import multiprocessing
10
+ import os
11
+ import random
12
+ import time
13
+ import traceback
14
+ import sys
15
+
16
+ from pydub import AudioSegment, silence
17
+ from anyio import Path
18
+ import dawdreamer as daw
19
+ import numpy as np
20
+ from scipy.io import wavfile
21
+ from pydub import AudioSegment
22
+ from pydub.silence import split_on_silence
23
+ import tqdm
24
+ import csv
25
+
26
+
27
+ SAMPLE_RATE = 44100
28
+ # Parameters will undergo automation at this buffer/block size.
29
+ BUFFER_SIZE = 128
30
+ PPQN = 960 # Pulses per quarter note.
31
+
32
+ SYNTH_PLUGIN = "libTAL-NoiseMaker.so"
33
+ # SYNTH_PLUGIN = "C:/Program Files/Common Files/VST3/Surge Synth Team/Surge XT.vst3/Contents/x86_64-win/Surge XT.vst3"
34
+
35
+
36
+ def make_sine(freq: float, duration: float, sr=SAMPLE_RATE):
37
+ """Return sine wave based on freq in Hz and duration in seconds"""
38
+ N = int(duration * sr) # Number of samples
39
+ return np.sin(np.pi*2.*freq*np.arange(N)/sr)
40
+
41
+
42
+
43
+ def gen():
44
+ # print(f'Current gen: {self.current_num}')
45
+ engine = daw.RenderEngine(SAMPLE_RATE, BUFFER_SIZE)
46
+ output_dir = Path("yay")
47
+
48
+ # Make a processor and give it the unique name "my_synth", which we use later.
49
+ synth = engine.make_plugin_processor("my_synth", SYNTH_PLUGIN)
50
+ assert synth.get_name() == "my_synth"
51
+
52
+ #Outputs the properties names and dumps them in a json
53
+
54
+ params = {}
55
+ for param in range(1,86):
56
+ params[param] = (synth.get_parameter_name(param))
57
+ with open(str(f'params_.json'), 'w') as f:
58
+ json.dump(params, f)
59
+
60
+
61
+ #synth.load_preset("C:/Users/yderre/AppData/Roaming/ToguAudioLine/TAL-NoiseMaker/presets/Factory Presets/DRUM/DR 8bit Kick II FN.noisemakerpreset")
62
+
63
+ # Get the parameters description from the plugin
64
+ parameters = synth.get_parameters_description()
65
+
66
+ array = []
67
+ # Create a dictionary with parameter names as keys and their indices as values
68
+ synth.add_midi_note(40, 127, 0, 0.2)
69
+
70
+ # for i in range(0,120):
71
+ # print(f"{parameters[i]['name']}")
72
+ # return
73
+
74
+ with open('InverSynth_01998.wav.csv', 'r') as csvfile:
75
+ reader = csv.reader(csvfile)
76
+
77
+ # Skip the header row
78
+ next(reader)
79
+ i=0
80
+ # Loop through the rows
81
+ for row in reader:
82
+
83
+ # Get the floating-point number from the third column
84
+ value = float(row[2])
85
+ params = {}
86
+ # (MIDI note, velocity, start, duration)
87
+ print(f"{parameters[i]['name']} changed from {parameters[i]['defaultValue']} to {value} ")
88
+ synth.set_parameter(i, value)
89
+ i+=1
90
+ # don't do reverb
91
+ graph = [
92
+ # synth takes no inputs, so we give an empty list.
93
+ (synth, []),
94
+ ]
95
+
96
+
97
+
98
+ engine.load_graph(graph)
99
+ engine.render(1)
100
+ output = engine.get_audio()
101
+ wavfile.write(
102
+ str(output_dir/f'test_.wav'), SAMPLE_RATE, output.transpose())
103
+ synth.open_editor() # Open the editor, make changes, and clos
104
+
105
+
106
+
107
+
108
+ if __name__ == "__main__":
109
+ gen()