Spaces:
Running
Running
Yann
commited on
Commit
·
86694c3
1
Parent(s):
2c14895
push backend
Browse files- back/.gitignore +142 -0
- back/.vscode/launch.json +57 -0
- back/.vscode/settings.json +6 -0
- back/InverSynth_00006.wav +0 -0
- back/README.md +38 -0
- back/generators/__init__.py +0 -0
- back/generators/generator.py +327 -0
- back/generators/parameters.py +176 -0
- back/generators/vst_generator.py +235 -0
- back/main.py +179 -0
- back/models/__init__.py +0 -0
- back/models/app.py +645 -0
- back/models/common/__init__.py +0 -0
- back/models/common/architectures.py +69 -0
- back/models/common/data_generator.py +126 -0
- back/models/comparison.py +144 -0
- back/models/convert_to_preset.py +149 -0
- back/models/importer_audio.py +23 -0
- back/models/launch.py +518 -0
- back/models/runner.py +82 -0
- back/models/spectrogram_cnn.py +134 -0
- back/output.xml +1 -0
- back/plugin_config/TAL-NoiseMaker-config.json +422 -0
- back/plugin_config/gen_config_libTAL-NoiseMaker.so.json +435 -0
- back/requirements.txt +28 -0
- back/utils/export_to_excel.py +4 -0
- back/utils/import csv.py +16 -0
- back/utils/import json.py +13 -0
- back/utils/synth.py +109 -0
back/.gitignore
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Test files
|
2 |
+
playing/
|
3 |
+
test_waves/*
|
4 |
+
data/*
|
5 |
+
temp/*
|
6 |
+
output/*
|
7 |
+
|
8 |
+
comparison/
|
9 |
+
reconstruction_waves/
|
10 |
+
reconstruction_waves*
|
11 |
+
|
12 |
+
|
13 |
+
# To preserve file for Dave :/
|
14 |
+
librenderman.so
|
15 |
+
|
16 |
+
|
17 |
+
# Byte-compiled / optimized / DLL files
|
18 |
+
__pycache__/
|
19 |
+
*.py[cod]
|
20 |
+
*$py.class
|
21 |
+
|
22 |
+
|
23 |
+
# C extensions
|
24 |
+
*.so
|
25 |
+
|
26 |
+
# Distribution / packaging
|
27 |
+
.Python
|
28 |
+
build/
|
29 |
+
develop-eggs/
|
30 |
+
dist/
|
31 |
+
downloads/
|
32 |
+
eggs/
|
33 |
+
.eggs/
|
34 |
+
lib/
|
35 |
+
lib64/
|
36 |
+
parts/
|
37 |
+
sdist/
|
38 |
+
var/
|
39 |
+
wheels/
|
40 |
+
*.egg-info/
|
41 |
+
.installed.cfg
|
42 |
+
*.egg
|
43 |
+
MANIFEST
|
44 |
+
|
45 |
+
# PyInstaller
|
46 |
+
# Usually these files are written by a python script from a template
|
47 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
48 |
+
*.manifest
|
49 |
+
*.spec
|
50 |
+
|
51 |
+
# Installer logs
|
52 |
+
pip-log.txt
|
53 |
+
pip-delete-this-directory.txt
|
54 |
+
|
55 |
+
# Unit test / coverage reports
|
56 |
+
htmlcov/
|
57 |
+
.tox/
|
58 |
+
.coverage
|
59 |
+
.coverage.*
|
60 |
+
.cache
|
61 |
+
nosetests.xml
|
62 |
+
coverage.xml
|
63 |
+
*.cover
|
64 |
+
.hypothesis/
|
65 |
+
.pytest_cache/
|
66 |
+
|
67 |
+
# Translations
|
68 |
+
*.mo
|
69 |
+
*.pot
|
70 |
+
|
71 |
+
# Django stuff:
|
72 |
+
*.log
|
73 |
+
local_settings.py
|
74 |
+
db.sqlite3
|
75 |
+
|
76 |
+
# Flask stuff:
|
77 |
+
instance/
|
78 |
+
.webassets-cache
|
79 |
+
|
80 |
+
# Scrapy stuff:
|
81 |
+
.scrapy
|
82 |
+
|
83 |
+
# Sphinx documentation
|
84 |
+
docs/_build/
|
85 |
+
|
86 |
+
# PyBuilder
|
87 |
+
target/
|
88 |
+
|
89 |
+
# Jupyter Notebook
|
90 |
+
.ipynb_checkpoints
|
91 |
+
|
92 |
+
# pyenv
|
93 |
+
.python-version
|
94 |
+
|
95 |
+
# celery beat schedule file
|
96 |
+
celerybeat-schedule
|
97 |
+
|
98 |
+
# SageMath parsed files
|
99 |
+
*.sage.py
|
100 |
+
|
101 |
+
# Environments
|
102 |
+
.env
|
103 |
+
.venv
|
104 |
+
env/
|
105 |
+
venv/
|
106 |
+
ENV/
|
107 |
+
env.bak/
|
108 |
+
venv.bak/
|
109 |
+
|
110 |
+
# Spyder project settings
|
111 |
+
.spyderproject
|
112 |
+
.spyproject
|
113 |
+
|
114 |
+
# Rope project settings
|
115 |
+
.ropeproject
|
116 |
+
|
117 |
+
# mkdocs documentation
|
118 |
+
/site
|
119 |
+
|
120 |
+
# mypy
|
121 |
+
.mypy_cache/
|
122 |
+
|
123 |
+
# reference onlu
|
124 |
+
models/docs/
|
125 |
+
|
126 |
+
# large audio samples
|
127 |
+
audio/large
|
128 |
+
|
129 |
+
# final audio outputs
|
130 |
+
audio/outputs
|
131 |
+
|
132 |
+
# large datasets
|
133 |
+
data/large
|
134 |
+
|
135 |
+
# large saved models
|
136 |
+
models/saved/large
|
137 |
+
|
138 |
+
# mac
|
139 |
+
.DS_Store
|
140 |
+
|
141 |
+
# notebook experiments
|
142 |
+
notebooks/experiments
|
back/.vscode/launch.json
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
// Use IntelliSense to learn about possible attributes.
|
3 |
+
// Hover to view descriptions of existing attributes.
|
4 |
+
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
5 |
+
"version": "0.2.0",
|
6 |
+
"configurations": [
|
7 |
+
{
|
8 |
+
"name": "1 - Gen Config VST",
|
9 |
+
"type": "python",
|
10 |
+
"request": "launch",
|
11 |
+
"module": "generators.vst_generator",
|
12 |
+
"justMyCode": true,
|
13 |
+
"args": [
|
14 |
+
"generate"
|
15 |
+
]
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"name": "2 - Run VST",
|
19 |
+
"type": "python",
|
20 |
+
"request": "launch",
|
21 |
+
"module": "generators.vst_generator",
|
22 |
+
"justMyCode": true,
|
23 |
+
"args": [
|
24 |
+
"run",
|
25 |
+
"--config",
|
26 |
+
"plugin_config/TAL-NoiseMaker-config.json"
|
27 |
+
]
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"name": "3 - Train Debug",
|
31 |
+
"type": "python",
|
32 |
+
"request": "launch",
|
33 |
+
"module": "models.spectrogram_cnn",
|
34 |
+
"justMyCode": true,
|
35 |
+
"args": [
|
36 |
+
"--epoch",
|
37 |
+
"2000",
|
38 |
+
"--model",
|
39 |
+
"C6XL"
|
40 |
+
]
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"name": "4 - Debug Launch",
|
44 |
+
"type": "python",
|
45 |
+
"request": "launch",
|
46 |
+
"module": "models.spectrogram_cnn",
|
47 |
+
"justMyCode": true,
|
48 |
+
"args": [
|
49 |
+
"--epoch",
|
50 |
+
"1",
|
51 |
+
"--model",
|
52 |
+
"C6XL",
|
53 |
+
"--resume"
|
54 |
+
]
|
55 |
+
}
|
56 |
+
]
|
57 |
+
}
|
back/.vscode/settings.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"python.analysis.autoImportCompletions": true,
|
3 |
+
"python.analysis.typeCheckingMode": "off",
|
4 |
+
"python.analysis.fixAll": ["source.unusedImports", "source.convertImportFormat"],
|
5 |
+
"editor.defaultFormatter": "ms-python.black-formatter"
|
6 |
+
}
|
back/InverSynth_00006.wav
ADDED
Binary file (65.6 kB). View file
|
|
back/README.md
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Inversynth Fork
|
2 |
+
## AMP Team
|
3 |
+
|
4 |
+
## Launch instructions :
|
5 |
+
|
6 |
+
*Optional: outputing your own config file for your VST*
|
7 |
+
```zsh
|
8 |
+
python -m generators.vst_generator generate
|
9 |
+
```
|
10 |
+
*1. Dataset Creation based on config profile*
|
11 |
+
```zsh
|
12 |
+
python -m generators.vst_generator run --config "your_config_path.json"
|
13 |
+
```
|
14 |
+
|
15 |
+
*2. Model training*
|
16 |
+
```zsh
|
17 |
+
python -m generators.spectrogram_cnn --epoch "your_epoch_number" --model C6XL
|
18 |
+
```
|
19 |
+
|
20 |
+
Parameter | Default | Description
|
21 |
+
---|---|---
|
22 |
+
`--num_examples` | `2000` | Number of examples to create
|
23 |
+
`--name` | `InverSynth` | Naming convention for datasets
|
24 |
+
`--dataset_directory` | `test_datasets` | Directory for datasets
|
25 |
+
`--wavefile_directory` | `test_waves` | Directory to for wave files.<br>Naming convention applied automatically
|
26 |
+
`--length` | `1.0` | Length of each sample in seconds
|
27 |
+
`--sample_rate` | `16384` | Sample rate (Samples/second)
|
28 |
+
`--sampling_method` | `random` | Method to use for generating examples.<br>Currently only random, but may<br>include whole space later
|
29 |
+
Optional |
|
30 |
+
`--regenerate_samples` | | Regenerate the set of points to explore if it<br>exists (will also force regenerating audio)
|
31 |
+
`--regenerate_audio` | | Regenerate audio files if they exist
|
32 |
+
`--normalise` | | Apply audio normalization
|
33 |
+
|
34 |
+
This module generates a dataset attempting to recreate the dataset generation<br>as defined in the [paper](paper/1812.06349.pdf)
|
35 |
+
|
36 |
+
Selecting an architecture:
|
37 |
+
|
38 |
+
- `C1`, `C2`, `C3`, `C4`, `C5`, `C6`, `C6XL`
|
back/generators/__init__.py
ADDED
File without changes
|
back/generators/generator.py
ADDED
@@ -0,0 +1,327 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
|
3 |
+
# ParamValue = Tuple[str,float,List[float]]
|
4 |
+
import os
|
5 |
+
import os.path
|
6 |
+
from typing import List
|
7 |
+
|
8 |
+
import h5py
|
9 |
+
import numpy as np
|
10 |
+
from scipy.io.wavfile import write as write_wav
|
11 |
+
|
12 |
+
from generators.parameters import *
|
13 |
+
|
14 |
+
"""
|
15 |
+
This is a base class to derive different kinds of sound generator from (e.g.
|
16 |
+
custom synthesis, VST plugins)
|
17 |
+
"""
|
18 |
+
|
19 |
+
|
20 |
+
class SoundGenerator:
|
21 |
+
"""
|
22 |
+
This is now a wrapper round the 'real' generation function
|
23 |
+
to handle normalising and saving
|
24 |
+
"""
|
25 |
+
|
26 |
+
def generate(
|
27 |
+
self,
|
28 |
+
parameters: dict,
|
29 |
+
filename: str,
|
30 |
+
length: float,
|
31 |
+
sample_rate: int,
|
32 |
+
extra: dict,
|
33 |
+
normalise: bool = True,
|
34 |
+
) -> np.ndarray:
|
35 |
+
audio = self.do_generate(parameters, filename, length, sample_rate, extra)
|
36 |
+
if normalise:
|
37 |
+
max = np.max(np.absolute(audio))
|
38 |
+
if max > 0:
|
39 |
+
audio = audio / max
|
40 |
+
if not self.creates_wave_file():
|
41 |
+
self.write_file(audio, filename, sample_rate)
|
42 |
+
|
43 |
+
def do_generate(
|
44 |
+
self,
|
45 |
+
parameters: dict,
|
46 |
+
filename: str,
|
47 |
+
length: float,
|
48 |
+
sample_rate: int,
|
49 |
+
extra: dict,
|
50 |
+
) -> np.ndarray:
|
51 |
+
print(
|
52 |
+
"Someone needs to write this method! Generating silence in {} with parameters:{}".format(
|
53 |
+
filename, str(parameters)
|
54 |
+
)
|
55 |
+
)
|
56 |
+
return np.zeros(int(length * sample_rate))
|
57 |
+
|
58 |
+
def creates_wave_file(self) -> bool:
|
59 |
+
return False
|
60 |
+
|
61 |
+
# Assumes that the data is -1..1 floating point
|
62 |
+
def write_file(self, data: np.ndarray, filename: str, sample_rate: int):
|
63 |
+
# REVIEW: is this needed?
|
64 |
+
# int_data = (data * np.iinfo(np.int16).max).astype(int)
|
65 |
+
write_wav(filename, sample_rate, data)
|
66 |
+
|
67 |
+
|
68 |
+
"""
|
69 |
+
This class runs through a parameter set, gets it to generate parameter settings
|
70 |
+
then runs the sound generator over it.
|
71 |
+
"""
|
72 |
+
|
73 |
+
|
74 |
+
class DatasetCreator:
|
75 |
+
def __init__(
|
76 |
+
self,
|
77 |
+
name: str,
|
78 |
+
dataset_dir: str,
|
79 |
+
wave_file_dir: str,
|
80 |
+
parameters: ParameterSet,
|
81 |
+
normalise: bool = True,
|
82 |
+
):
|
83 |
+
self.name = name
|
84 |
+
self.parameters = parameters
|
85 |
+
self.dataset_dir = dataset_dir
|
86 |
+
self.wave_file_dir = wave_file_dir
|
87 |
+
self.normalise = normalise
|
88 |
+
os.makedirs(dataset_dir, exist_ok=True)
|
89 |
+
os.makedirs(f"{wave_file_dir}/{name}", exist_ok=True)
|
90 |
+
|
91 |
+
def create_parameters(
|
92 |
+
self,
|
93 |
+
max: int = 2000,
|
94 |
+
method: str = "complete",
|
95 |
+
extra: dict = {},
|
96 |
+
force_create=False,
|
97 |
+
) -> str:
|
98 |
+
filename = self.get_dataset_filename("data", "hdf5")
|
99 |
+
if os.path.isfile(filename) and not force_create:
|
100 |
+
print(
|
101 |
+
"Parameter file exists, not recreating (use --regenerate_samples if you want to force)"
|
102 |
+
)
|
103 |
+
return filename
|
104 |
+
print("+" * 40)
|
105 |
+
print(f"Generating Dataset {self.name}, {max} examples")
|
106 |
+
print(f"Datasets: {self.dataset_dir}")
|
107 |
+
print("+" * 40)
|
108 |
+
|
109 |
+
# Save out the parameters first
|
110 |
+
self.save_parameters()
|
111 |
+
|
112 |
+
# Generate the set of samples (could switch to generators,
|
113 |
+
# but need to figure out arbitrary size arrays in HDF5)
|
114 |
+
dataset: List[Sample] = []
|
115 |
+
# if method == "complete":
|
116 |
+
# dataset = self.parameters.recursively_generate_all()
|
117 |
+
# else:
|
118 |
+
dataset = self.parameters.sample_space(sample_size=max)
|
119 |
+
|
120 |
+
# Create the data file and add all the points to it
|
121 |
+
with h5py.File(filename, "w") as datafile:
|
122 |
+
# Figure out the sizes to store
|
123 |
+
records = len(dataset)
|
124 |
+
param_size = len(dataset[0].encode())
|
125 |
+
|
126 |
+
# Add columns to it
|
127 |
+
filenames = datafile.create_dataset(
|
128 |
+
"files", (records,), dtype=h5py.string_dtype()
|
129 |
+
)
|
130 |
+
parameters = datafile.create_dataset(
|
131 |
+
"parameters", (records,), dtype=h5py.string_dtype()
|
132 |
+
)
|
133 |
+
labels = datafile.create_dataset("labels", (records, param_size))
|
134 |
+
audio_exists = datafile.create_dataset(
|
135 |
+
"audio_exists", (records,), dtype=np.bool_
|
136 |
+
)
|
137 |
+
|
138 |
+
# Generate the sample points
|
139 |
+
for index, point in enumerate(dataset):
|
140 |
+
params = self.parameters.to_settings(point)
|
141 |
+
filenames[index] = self.get_wave_filename(index)
|
142 |
+
labels[index] = point.encode()
|
143 |
+
parameters[index] = json.dumps(params)
|
144 |
+
audio_exists[index] = False
|
145 |
+
if index % 1000 == 0:
|
146 |
+
print("Generating parameters for example {}".format(index))
|
147 |
+
datafile.flush()
|
148 |
+
datafile.close()
|
149 |
+
|
150 |
+
return filename
|
151 |
+
|
152 |
+
def generate_audio(
|
153 |
+
self,
|
154 |
+
sound_generator: SoundGenerator,
|
155 |
+
length: float = 1,
|
156 |
+
sample_rate: int = 16384,
|
157 |
+
extra: dict = {},
|
158 |
+
dataset_filename=None,
|
159 |
+
force_generate=True,
|
160 |
+
):
|
161 |
+
if dataset_filename is None:
|
162 |
+
dataset_filename = self.get_dataset_filename("data", "hdf5")
|
163 |
+
|
164 |
+
print("+" * 40)
|
165 |
+
print(
|
166 |
+
f"Generating Audio for Dataset {self.name} ({dataset_filename}), with {length}s at {sample_rate}/s"
|
167 |
+
)
|
168 |
+
print(f"Output waves: {self.wave_file_dir}, datasets: {self.dataset_dir}")
|
169 |
+
print("+" * 40)
|
170 |
+
|
171 |
+
with h5py.File(dataset_filename, "r+") as datafile:
|
172 |
+
for name, value in datafile.items():
|
173 |
+
print(f"{name}: {value}")
|
174 |
+
# Get the columns
|
175 |
+
filenames = datafile.get("files")
|
176 |
+
print(filenames)
|
177 |
+
parameters = datafile.get("parameters")
|
178 |
+
print(parameters)
|
179 |
+
audio_exists = datafile.get("audio_exists")
|
180 |
+
print(audio_exists)
|
181 |
+
|
182 |
+
for index, filename in enumerate(filenames):
|
183 |
+
if (
|
184 |
+
audio_exists[index]
|
185 |
+
and os.path.isfile(filename)
|
186 |
+
and not force_generate
|
187 |
+
):
|
188 |
+
print(f"Audio exists for index {index} ({filename})")
|
189 |
+
else:
|
190 |
+
print(f"Generating Audio for index {index} ({filename})")
|
191 |
+
params = json.loads(parameters[index])
|
192 |
+
audio = sound_generator.generate(
|
193 |
+
params,
|
194 |
+
filename,
|
195 |
+
length,
|
196 |
+
sample_rate,
|
197 |
+
extra,
|
198 |
+
normalise=self.normalise,
|
199 |
+
)
|
200 |
+
audio_exists[index] = bool(audio)
|
201 |
+
datafile.flush()
|
202 |
+
if index % 1000 == 0:
|
203 |
+
print("Generating example {}".format(index))
|
204 |
+
|
205 |
+
def save_parameters(self):
|
206 |
+
self.parameters.save_json(self.get_dataset_filename("params", "json"))
|
207 |
+
self.parameters.save(self.get_dataset_filename("params", "pckl"))
|
208 |
+
|
209 |
+
def get_dataset_filename(self, type: str, extension: str = "txt") -> str:
|
210 |
+
return f"{self.dataset_dir}/{self.name}_{type}.{extension}"
|
211 |
+
|
212 |
+
def get_wave_filename(self, index: int) -> str:
|
213 |
+
return f"{self.wave_file_dir}/{self.name}/{self.name}_{index:05d}.wav"
|
214 |
+
|
215 |
+
|
216 |
+
def default_generator_argparse():
|
217 |
+
parser = argparse.ArgumentParser(description="Process some integers.")
|
218 |
+
parser.add_argument(
|
219 |
+
"--num_examples",
|
220 |
+
type=int,
|
221 |
+
dest="samples",
|
222 |
+
action="store",
|
223 |
+
default=20000,
|
224 |
+
help="Number of examples to create",
|
225 |
+
)
|
226 |
+
parser.add_argument(
|
227 |
+
"--name",
|
228 |
+
type=str,
|
229 |
+
dest="name",
|
230 |
+
default="InverSynth",
|
231 |
+
help="Name of datasets to create",
|
232 |
+
)
|
233 |
+
parser.add_argument(
|
234 |
+
"--dataset_directory",
|
235 |
+
type=str,
|
236 |
+
dest="data_dir",
|
237 |
+
default="test_datasets",
|
238 |
+
help="Directory to put datasets",
|
239 |
+
)
|
240 |
+
parser.add_argument(
|
241 |
+
"--wavefile_directory",
|
242 |
+
type=str,
|
243 |
+
dest="wave_dir",
|
244 |
+
default="test_waves",
|
245 |
+
help="Directory to put wave files. Will have the dataset name appended automatically",
|
246 |
+
)
|
247 |
+
parser.add_argument(
|
248 |
+
"--length",
|
249 |
+
type=float,
|
250 |
+
dest="length",
|
251 |
+
default=1.0,
|
252 |
+
help="Length of each sample in seconds",
|
253 |
+
)
|
254 |
+
parser.add_argument(
|
255 |
+
"--sample_rate",
|
256 |
+
type=int,
|
257 |
+
dest="sample_rate",
|
258 |
+
default=16384,
|
259 |
+
help="Sample rate (Samples/second)",
|
260 |
+
)
|
261 |
+
parser.add_argument(
|
262 |
+
"--sampling_method",
|
263 |
+
type=str,
|
264 |
+
dest="method",
|
265 |
+
default="random",
|
266 |
+
choices=["random"],
|
267 |
+
help="Method to use for generating examples. Currently only random, but may include whole space later",
|
268 |
+
)
|
269 |
+
parser.add_argument(
|
270 |
+
"--regenerate_samples",
|
271 |
+
action="store_true",
|
272 |
+
help="Regenerate the set of points to explore if it exists (will also force regenerating audio)",
|
273 |
+
)
|
274 |
+
parser.add_argument(
|
275 |
+
"--regenerate_audio",
|
276 |
+
action="store_true",
|
277 |
+
help="Regenerate audio files if they exists",
|
278 |
+
)
|
279 |
+
parser.add_argument(
|
280 |
+
"--normalise", action="store_true", help="Regenerate audio files if they exists"
|
281 |
+
)
|
282 |
+
return parser
|
283 |
+
|
284 |
+
|
285 |
+
def generate_examples(
|
286 |
+
gen: SoundGenerator, parameters: ParameterSet, args=None, extra={}
|
287 |
+
):
|
288 |
+
if not args:
|
289 |
+
parser = default_generator_argparse()
|
290 |
+
args = parser.parse_args()
|
291 |
+
|
292 |
+
g = DatasetCreator(
|
293 |
+
name=args.name,
|
294 |
+
dataset_dir=args.data_dir,
|
295 |
+
wave_file_dir=args.wave_dir,
|
296 |
+
parameters=parameters,
|
297 |
+
normalise=args.normalise,
|
298 |
+
)
|
299 |
+
|
300 |
+
g.create_parameters(
|
301 |
+
max=args.samples, method=args.method, force_create=True
|
302 |
+
)
|
303 |
+
|
304 |
+
g.generate_audio(
|
305 |
+
sound_generator=gen,
|
306 |
+
length=args.length,
|
307 |
+
sample_rate=args.sample_rate,
|
308 |
+
extra=extra,
|
309 |
+
force_generate=args.regenerate_audio | args.regenerate_samples,
|
310 |
+
)
|
311 |
+
|
312 |
+
|
313 |
+
if __name__ == "__main__":
|
314 |
+
gen = SoundGenerator()
|
315 |
+
parameters = ParameterSet(
|
316 |
+
[
|
317 |
+
Parameter("p1", [100, 110, 120, 130, 140]),
|
318 |
+
Parameter("p2", [200, 220, 240, 260, 280]),
|
319 |
+
]
|
320 |
+
)
|
321 |
+
g = DatasetCreator(
|
322 |
+
"example_generator",
|
323 |
+
dataset_dir="test_datasets",
|
324 |
+
wave_file_dir="test_waves/example/",
|
325 |
+
parameters=parameters,
|
326 |
+
)
|
327 |
+
g.generate_examples(sound_generator=gen, parameters=parameters)
|
back/generators/parameters.py
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import random
|
3 |
+
from dataclasses import dataclass
|
4 |
+
from pickle import dump
|
5 |
+
from typing import Dict, List, Sequence, Tuple
|
6 |
+
|
7 |
+
import numpy as np
|
8 |
+
|
9 |
+
"""
|
10 |
+
A setting for a parameter, with its oneHOT encoding
|
11 |
+
"""
|
12 |
+
|
13 |
+
|
14 |
+
@dataclass
|
15 |
+
class ParamValue:
|
16 |
+
name: str
|
17 |
+
value: float
|
18 |
+
encoding: List[float]
|
19 |
+
|
20 |
+
|
21 |
+
"""
|
22 |
+
A sample point - the parameter values, the oneHOT encoding and the audio
|
23 |
+
"""
|
24 |
+
|
25 |
+
|
26 |
+
@dataclass
|
27 |
+
class Sample:
|
28 |
+
# parameter_values: List[Tuple[str,float]]
|
29 |
+
# parameter_encoding:List[List[float]]
|
30 |
+
parameters: List[ParamValue]
|
31 |
+
# length:float=0.1
|
32 |
+
# sample_rate:int = 44100
|
33 |
+
# audio:np.ndarray = np.zeros(1)
|
34 |
+
|
35 |
+
def value_list(self) -> List[Tuple[str, float]]:
|
36 |
+
return [(p.name, p.value) for p in self.parameters]
|
37 |
+
|
38 |
+
def encode(self) -> List[float]:
|
39 |
+
return np.hstack([p.encoding for p in self.parameters])
|
40 |
+
|
41 |
+
|
42 |
+
class Parameter:
|
43 |
+
def __init__(self, name: str, levels: list, id=""):
|
44 |
+
self.name = name
|
45 |
+
self.levels = levels
|
46 |
+
self.id = id
|
47 |
+
|
48 |
+
def get_levels(self) -> List[ParamValue]:
|
49 |
+
return [self.get_value(i) for i in range(len(self.levels))]
|
50 |
+
|
51 |
+
def sample(self) -> ParamValue:
|
52 |
+
index: int = random.choice(range(len(self.levels)))
|
53 |
+
return self.get_value(index)
|
54 |
+
|
55 |
+
def get_value(self, index: int) -> ParamValue:
|
56 |
+
encoding = np.zeros(len(self.levels)).astype(float)
|
57 |
+
encoding[index] = 1.0
|
58 |
+
return ParamValue(
|
59 |
+
name=self.name,
|
60 |
+
# Actual value
|
61 |
+
value=self.levels[index],
|
62 |
+
# One HOT encoding
|
63 |
+
encoding=encoding,
|
64 |
+
)
|
65 |
+
|
66 |
+
def decode(self, one_hot: List[float]) -> ParamValue:
|
67 |
+
ind = np.array(one_hot).argmax()
|
68 |
+
# ind = tf.cast(tf.argmax(one_hot, axis=-1), "int32")
|
69 |
+
return self.get_value(ind)
|
70 |
+
|
71 |
+
def from_output(
|
72 |
+
self, current_output: List[float]
|
73 |
+
) -> Tuple[ParamValue, List[float]]:
|
74 |
+
param_data = current_output[: len(self.levels)]
|
75 |
+
remaining = current_output[len(self.levels) :]
|
76 |
+
my_val = self.decode(param_data)
|
77 |
+
return (my_val, remaining)
|
78 |
+
|
79 |
+
def to_json(self):
|
80 |
+
return {"name": self.name, "levels": self.levels, "id": self.id}
|
81 |
+
|
82 |
+
|
83 |
+
class ParameterSet:
|
84 |
+
def __init__(self, parameters: List[Parameter], fixed_parameters: dict = {}):
|
85 |
+
self.parameters = parameters
|
86 |
+
self.fixed_parameters = fixed_parameters
|
87 |
+
|
88 |
+
def sample_space(self, sample_size=2000) -> Sequence[Sample]:
|
89 |
+
print("Sampling {} points from parameter space".format(sample_size))
|
90 |
+
dataset = []
|
91 |
+
for i in range(sample_size):
|
92 |
+
params = [p.sample() for p in self.parameters]
|
93 |
+
dataset.append(Sample(params))
|
94 |
+
if i % 1000 == 0:
|
95 |
+
print("Sampling iteration: {}".format(i))
|
96 |
+
return dataset
|
97 |
+
|
98 |
+
# Runs through the whole parameter space, setting up parameters and calling the generation function
|
99 |
+
# Excuse slightly hacky recusions - sure there's a more numpy-ish way to do it!
|
100 |
+
def recursively_generate_all(
|
101 |
+
self, parameter_list: list = None, parameter_set=[], return_list=[]
|
102 |
+
) -> Sequence[Sample]:
|
103 |
+
print("Generating entire parameter space")
|
104 |
+
if parameter_list is None:
|
105 |
+
parameter_list = self.parameters
|
106 |
+
param = parameter_list[0]
|
107 |
+
remaining = parameter_list[1:]
|
108 |
+
for p in param.levels:
|
109 |
+
ps = parameter_set.copy()
|
110 |
+
ps.append((param.name, p))
|
111 |
+
if len(remaining) == 0:
|
112 |
+
return_list.append(ps)
|
113 |
+
else:
|
114 |
+
self.recursively_generate_all(remaining, ps, return_list)
|
115 |
+
return return_list
|
116 |
+
|
117 |
+
def to_settings(self, p: Sample):
|
118 |
+
params = self.fixed_parameters.copy()
|
119 |
+
params.update(dict(p.value_list()))
|
120 |
+
return params
|
121 |
+
|
122 |
+
def encoding_to_settings(self, output: List[float]) -> Dict[str, float]:
|
123 |
+
params = self.fixed_parameters.copy()
|
124 |
+
for p in self.decode(output):
|
125 |
+
params[p.name] = p.value
|
126 |
+
return params
|
127 |
+
|
128 |
+
def decode(self, output: List[float]) -> List[ParamValue]:
|
129 |
+
values = []
|
130 |
+
for p in self.parameters:
|
131 |
+
v, output = p.from_output(output)
|
132 |
+
values.append(v)
|
133 |
+
if len(output) > 0:
|
134 |
+
print("Leftover output!: {}".format(output))
|
135 |
+
return values
|
136 |
+
|
137 |
+
def save(self, filename):
|
138 |
+
with open(filename, "wb") as file:
|
139 |
+
dump(self, file)
|
140 |
+
|
141 |
+
def save_json(self, filename):
|
142 |
+
dump = self.to_json()
|
143 |
+
with open(filename, "w") as file:
|
144 |
+
json.dump(dump, file, indent=2)
|
145 |
+
|
146 |
+
def explain(self):
|
147 |
+
levels = 0
|
148 |
+
for p in self.parameters:
|
149 |
+
levels += len(p.levels)
|
150 |
+
return {
|
151 |
+
"n_variable": len(self.parameters),
|
152 |
+
"n_fixed": len(self.fixed_parameters),
|
153 |
+
"levels": levels,
|
154 |
+
}
|
155 |
+
|
156 |
+
def to_json(self):
|
157 |
+
return {
|
158 |
+
"parameters": [p.to_json() for p in self.parameters],
|
159 |
+
"fixed": self.fixed_parameters,
|
160 |
+
}
|
161 |
+
|
162 |
+
|
163 |
+
"""
|
164 |
+
Generates evenly spaced parameter values
|
165 |
+
paper:
|
166 |
+
The rest of the synthesizer parameters ranges are quantized evenly to 16
|
167 |
+
classes according to the following ranges ...
|
168 |
+
For each parameter, the first and last classes correspond to its range limits
|
169 |
+
"""
|
170 |
+
|
171 |
+
|
172 |
+
def param_range(steps, min, max):
|
173 |
+
ext = float(max - min)
|
174 |
+
return [n * ext / (steps - 1) + min for n in range(steps)]
|
175 |
+
|
176 |
+
|
back/generators/vst_generator.py
ADDED
@@ -0,0 +1,235 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import re
|
3 |
+
|
4 |
+
import dawdreamer as rm
|
5 |
+
import librosa
|
6 |
+
import numpy as np
|
7 |
+
import pandas as pd
|
8 |
+
|
9 |
+
|
10 |
+
from generators.generator import *
|
11 |
+
from generators.parameters import *
|
12 |
+
|
13 |
+
|
14 |
+
class VSTGenerator(SoundGenerator):
|
15 |
+
def __init__(
|
16 |
+
self,
|
17 |
+
vst: str,
|
18 |
+
sample_rate,
|
19 |
+
randomise_non_set: bool = True,
|
20 |
+
randomise_all: bool = False,
|
21 |
+
):
|
22 |
+
self.vst = vst
|
23 |
+
self.randomise_non_set = randomise_non_set
|
24 |
+
self.randomise_all = randomise_all
|
25 |
+
self.sample_rate = sample_rate
|
26 |
+
self.load_engine()
|
27 |
+
|
28 |
+
def load_engine(self):
|
29 |
+
print("_____ LOADING VST _______")
|
30 |
+
engine = rm.RenderEngine(self.sample_rate, 1024)
|
31 |
+
synth = engine.make_plugin_processor("my_synth", self.vst)
|
32 |
+
if synth:
|
33 |
+
print("Loaded {}".format(self.vst))
|
34 |
+
|
35 |
+
self.engine = engine
|
36 |
+
self.synth = synth
|
37 |
+
else:
|
38 |
+
print("Couldn't load VST {}".format(self.vst))
|
39 |
+
print("_____ LOADED VST _______")
|
40 |
+
|
41 |
+
# def do_sound_generation(self,parameter_set,base_filename)->np.ndarray:
|
42 |
+
def do_generate(
|
43 |
+
self,
|
44 |
+
parameters: dict,
|
45 |
+
filename: str,
|
46 |
+
length: float,
|
47 |
+
sample_rate: int,
|
48 |
+
extra: dict = {},
|
49 |
+
) -> np.ndarray:
|
50 |
+
if not self.engine:
|
51 |
+
print("VST not loaded")
|
52 |
+
return np.zeros(5)
|
53 |
+
resample = False
|
54 |
+
if not self.sample_rate == sample_rate:
|
55 |
+
resample = True
|
56 |
+
synth = self.synth
|
57 |
+
engine = self.engine
|
58 |
+
# print( synth.get_parameters_description() )
|
59 |
+
# print("Params to set:{}".format(parameters))
|
60 |
+
|
61 |
+
ids = dict([(p["name"], p["id"]) for p in extra["config"]["fixed_parameters"]])
|
62 |
+
ids.update(dict([(p["name"], p["id"]) for p in extra["config"]["parameters"]]))
|
63 |
+
|
64 |
+
# if self.randomise_non_set:
|
65 |
+
# new_patch = self.patch_generator.get_random_patch()
|
66 |
+
# engine.set_patch(new_patch)
|
67 |
+
|
68 |
+
synth_params = dict(synth.get_patch())
|
69 |
+
# Start with defaults
|
70 |
+
|
71 |
+
# if not self.randomise_non_set:
|
72 |
+
# for i in range(84):
|
73 |
+
# synth_params[i] = 0.5
|
74 |
+
|
75 |
+
for name, value in parameters.items():
|
76 |
+
synth_params[ids[name]] = value
|
77 |
+
|
78 |
+
# if self.randomise_all:
|
79 |
+
# new_patch = self.patch_generator.get_random_patch()
|
80 |
+
# engine.set_patch(new_patch)
|
81 |
+
|
82 |
+
note_length = length * 0.8
|
83 |
+
if "note_length" in extra:
|
84 |
+
note_length = extra["note_length"]
|
85 |
+
|
86 |
+
synth.set_patch(list(synth_params.items()))
|
87 |
+
synth.add_midi_note(40, 127, 0.1, note_length)
|
88 |
+
# don't do reverb
|
89 |
+
graph = [
|
90 |
+
(synth, []), # synth takes no inputs, so we give an empty list.
|
91 |
+
]
|
92 |
+
|
93 |
+
engine.load_graph(graph)
|
94 |
+
engine.render(1)
|
95 |
+
data = engine.get_audio()
|
96 |
+
df = pd.DataFrame(data)
|
97 |
+
try:
|
98 |
+
data = librosa.to_mono(data).transpose()
|
99 |
+
except:
|
100 |
+
print("ERROR" * 100)
|
101 |
+
df = df.fillna(0)
|
102 |
+
data = df.to_numpy()
|
103 |
+
data = librosa.to_mono(data).transpose()
|
104 |
+
|
105 |
+
df = pd.DataFrame(data)
|
106 |
+
if(librosa.util.valid_audio(data)):
|
107 |
+
nsamps_target = int(1.0 * sample_rate)
|
108 |
+
# print(f"Got {len(data)} frames as type {type(data)}. Target: {nsamps_target}")
|
109 |
+
result = np.array(data)
|
110 |
+
return result
|
111 |
+
# else:
|
112 |
+
# print("ERROR" * 100)
|
113 |
+
# df = df.fillna(0)
|
114 |
+
# data = df.to_numpy()
|
115 |
+
# return data
|
116 |
+
|
117 |
+
|
118 |
+
|
119 |
+
def create_config(self, filename='default_config.json', default_value=0.0):
|
120 |
+
r = re.compile("(.*): (.*)")
|
121 |
+
params = []
|
122 |
+
fixed = []
|
123 |
+
for line in self.synth.get_parameters_description():
|
124 |
+
line['defaultValue']=float(line['defaultValue'])
|
125 |
+
if line['index'] < 86:
|
126 |
+
# fixed.append(
|
127 |
+
# {"id": line['index'], "name": line['name'], "value": line['defaultValue']}
|
128 |
+
# )
|
129 |
+
# else:
|
130 |
+
params.append(
|
131 |
+
{"id": line['index'], "name": line['name'], "value": line['defaultValue']}
|
132 |
+
)
|
133 |
+
output = {"parameters": params, "fixed_parameters": fixed}
|
134 |
+
os.makedirs("plugin_config", exist_ok=True)
|
135 |
+
with open("plugin_config/gen_config_"+str(self.vst)+'.json', "w") as f:
|
136 |
+
json.dump(output, f, indent=4)
|
137 |
+
return output
|
138 |
+
|
139 |
+
|
140 |
+
# Run the generator to create a full dataset
|
141 |
+
def run_generator(args): # name: str, plugin: str, config: str, max: int,
|
142 |
+
# dataset_directory: str, wavefile_directory: str,
|
143 |
+
# sample_rate: int = 16384, length: float = 1.0, note_length: float = -1, method: str = 'random'):
|
144 |
+
|
145 |
+
note_length = args.note_length
|
146 |
+
if note_length < 0.0:
|
147 |
+
note_length = note_length * 0.8
|
148 |
+
|
149 |
+
with open(args.config_file, "r") as f:
|
150 |
+
config = json.load(f)
|
151 |
+
sample=[]
|
152 |
+
for p in config['parameters']:
|
153 |
+
if isinstance(p['values'], str):
|
154 |
+
sample.append(
|
155 |
+
Parameter(p["name"], param_range(16, 0, 1), p.get("id", ""))
|
156 |
+
)
|
157 |
+
elif isinstance(p['values'], list):
|
158 |
+
sample.append(
|
159 |
+
Parameter(p["name"], p['values'], p.get("id", ""))
|
160 |
+
)
|
161 |
+
|
162 |
+
fixed = dict([(p["name"], p["value"]) for p in config["fixed_parameters"]])
|
163 |
+
|
164 |
+
plugin_rate = args.generate_samplerate or args.sample_rate
|
165 |
+
|
166 |
+
generate_examples(
|
167 |
+
gen=VSTGenerator(vst=args.plugin, sample_rate=16384),
|
168 |
+
parameters=ParameterSet(parameters=sample, fixed_parameters=fixed),
|
169 |
+
args=args,
|
170 |
+
extra={"note_length": note_length, "config": config},
|
171 |
+
)
|
172 |
+
|
173 |
+
|
174 |
+
# Create blank config file based on the plugin's parameter sets
|
175 |
+
def generate_defaults(plugin: str, output: str, default: float = 0.5):
|
176 |
+
gen = VSTGenerator(vst=plugin, sample_rate=16384)
|
177 |
+
gen.create_config(output, default_value=default)
|
178 |
+
|
179 |
+
|
180 |
+
# Example: python -m generators.vst_generator run --plugin /Library/Audio/Plug-Ins/VST/Lokomotiv.vst --config plugin_config/lokomotiv.json --dataset_name explore --wavefile_directory "test_waves/explore"
|
181 |
+
|
182 |
+
if __name__ == "__main__":
|
183 |
+
pass
|
184 |
+
# parser = argparse.ArgumentParser(description='Process some integers.')
|
185 |
+
parser = default_generator_argparse()
|
186 |
+
parser.add_argument(
|
187 |
+
"command",
|
188 |
+
type=str,
|
189 |
+
choices=["run", "generate"],
|
190 |
+
help="action to take: run (run the generator with a config) or generate (generate a blank config file for the plugin)",
|
191 |
+
)
|
192 |
+
parser.add_argument(
|
193 |
+
"--plugin",
|
194 |
+
dest="plugin",
|
195 |
+
default='libTAL-NoiseMaker.so',
|
196 |
+
help='plugin file. .so on linux, on mac its the top level plugin dir, e.g. "/Library/Audio/Plug-Ins/VST/Lokomotiv.vst"',
|
197 |
+
)
|
198 |
+
parser.add_argument(
|
199 |
+
"--output", dest="outfile", help="Place to store the generated parameters file"
|
200 |
+
)
|
201 |
+
parser.add_argument("--config", dest="config_file", help="Config file to use")
|
202 |
+
parser.add_argument(
|
203 |
+
"--default_value",
|
204 |
+
type=float,
|
205 |
+
dest="default_param",
|
206 |
+
action="store",
|
207 |
+
default=0.5,
|
208 |
+
help="Default setting for parameters when generating a blank config",
|
209 |
+
)
|
210 |
+
parser.add_argument(
|
211 |
+
"--note_length",
|
212 |
+
type=float,
|
213 |
+
dest="note_length",
|
214 |
+
default=0.8,
|
215 |
+
help="Length of a note in seconds",
|
216 |
+
)
|
217 |
+
parser.add_argument(
|
218 |
+
"--generation_sample_rate",
|
219 |
+
type=int,
|
220 |
+
default=None,
|
221 |
+
dest="generate_samplerate",
|
222 |
+
help="Sample rate for audio generation. Defaults to target samplerate, but some plugins (Dexed) have trouble running a our funny sample rates. Will be resampled to the target rate after generation",
|
223 |
+
)
|
224 |
+
|
225 |
+
args = parser.parse_args()
|
226 |
+
print(args)
|
227 |
+
if args.command == "run":
|
228 |
+
run_generator(args)
|
229 |
+
# args.name, args.plugin, args.config_file,
|
230 |
+
# args.samples, args.data_dir, args.wave_dir)
|
231 |
+
|
232 |
+
if args.command == "generate":
|
233 |
+
generate_defaults(args.plugin, args.outfile, args.default_param)
|
234 |
+
quit()
|
235 |
+
|
back/main.py
ADDED
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from contextlib import asynccontextmanager
|
2 |
+
import uuid
|
3 |
+
from fastapi import Depends, FastAPI, File, HTTPException, UploadFile
|
4 |
+
from fastapi.responses import JSONResponse
|
5 |
+
from fastapi.staticfiles import StaticFiles
|
6 |
+
import os
|
7 |
+
from starlette.exceptions import HTTPException as StarletteHTTPException
|
8 |
+
from fastapi.middleware.cors import CORSMiddleware
|
9 |
+
import pathlib
|
10 |
+
from contextlib import asynccontextmanager
|
11 |
+
from glob import glob
|
12 |
+
|
13 |
+
|
14 |
+
from models.launch import inferrence, train_model
|
15 |
+
from models.spectrogram_cnn import get_model
|
16 |
+
|
17 |
+
# distinguish model type for reshaping
|
18 |
+
|
19 |
+
SERVER = "http://localhost:7860/"
|
20 |
+
|
21 |
+
path = os.path.dirname(os.path.realpath(__file__))
|
22 |
+
|
23 |
+
def load_model_and_parameters():
|
24 |
+
setup = {
|
25 |
+
"model_name": "C6XL",
|
26 |
+
"dataset_name": "InverSynth",
|
27 |
+
"epochs": 1,
|
28 |
+
"dataset_dir": "test_datasets",
|
29 |
+
"output_dir": "output",
|
30 |
+
"dataset_file": None,
|
31 |
+
"parameters_file": None,
|
32 |
+
"data_format": "channels_last",
|
33 |
+
"run_name": None,
|
34 |
+
"resume": True,
|
35 |
+
}
|
36 |
+
setup["model_type"] = "STFT"
|
37 |
+
|
38 |
+
try:
|
39 |
+
# charger model
|
40 |
+
model, parameters_file = train_model(model_callback=get_model, **setup)
|
41 |
+
except Exception as e:
|
42 |
+
print(f"Couldn't load model: {e}")
|
43 |
+
return None, None
|
44 |
+
|
45 |
+
return model, parameters_file
|
46 |
+
|
47 |
+
@asynccontextmanager
|
48 |
+
async def lifespan(app: FastAPI):
|
49 |
+
|
50 |
+
|
51 |
+
|
52 |
+
# Remove all files in the temp folder
|
53 |
+
tempFolderPath = os.path.join(path, "temp")
|
54 |
+
if os.path.exists(tempFolderPath):
|
55 |
+
for file_name in os.listdir(tempFolderPath):
|
56 |
+
file_path = os.path.join(tempFolderPath, file_name)
|
57 |
+
try:
|
58 |
+
if os.path.isfile(file_path):
|
59 |
+
os.remove(file_path)
|
60 |
+
except Exception as e:
|
61 |
+
print(f"Error deleting file {file_path}: {e}")
|
62 |
+
|
63 |
+
if not os.path.exists(tempFolderPath):
|
64 |
+
os.makedirs(tempFolderPath)
|
65 |
+
|
66 |
+
yield
|
67 |
+
|
68 |
+
app = FastAPI(lifespan=lifespan)
|
69 |
+
|
70 |
+
str_p = str(path)
|
71 |
+
|
72 |
+
|
73 |
+
class SPAStaticFiles(StaticFiles):
|
74 |
+
async def get_response(self, path: str, scope):
|
75 |
+
try:
|
76 |
+
return await super().get_response(path, scope)
|
77 |
+
except (HTTPException, StarletteHTTPException) as ex:
|
78 |
+
if ex.status_code == 404:
|
79 |
+
return await super().get_response("index.html", scope)
|
80 |
+
else:
|
81 |
+
raise ex
|
82 |
+
|
83 |
+
|
84 |
+
|
85 |
+
|
86 |
+
|
87 |
+
@app.get("/download/{file_id}")
|
88 |
+
async def generate_audio(file_id: str):
|
89 |
+
try:
|
90 |
+
# Use glob to find files starting with the specified ID
|
91 |
+
matching_files = glob(f"temp/{file_id}*")
|
92 |
+
|
93 |
+
if not matching_files:
|
94 |
+
# Handle the case when no matching file is found
|
95 |
+
print(f"No file found for file ID {file_id}")
|
96 |
+
raise HTTPException(status_code=404, detail="File not found")
|
97 |
+
|
98 |
+
# Assuming you want to copy the first matching file
|
99 |
+
else:
|
100 |
+
source_file_path = matching_files[0]
|
101 |
+
# Check if the file exists
|
102 |
+
# You can perform additional processing or send the file directly
|
103 |
+
return JSONResponse(content={"url": f"{source_file_path}"})
|
104 |
+
|
105 |
+
except Exception as e:
|
106 |
+
raise HTTPException(status_code=500, detail=str(e))
|
107 |
+
|
108 |
+
|
109 |
+
def is_valid_audio(file_extension):
|
110 |
+
# Define a list of valid audio file extensions
|
111 |
+
valid_audio_extensions = [".mp3", ".wav", ".ogg", ".flac"]
|
112 |
+
|
113 |
+
# Check if the provided file extension is in the list of valid audio extensions
|
114 |
+
return file_extension.lower() in valid_audio_extensions
|
115 |
+
|
116 |
+
|
117 |
+
@app.post("/upload/")
|
118 |
+
async def upload_audio_file(file: UploadFile = File(...)):
|
119 |
+
try:
|
120 |
+
model, parameters_file = load_model_and_parameters()
|
121 |
+
except:
|
122 |
+
raise("Couldn't load model")
|
123 |
+
try:
|
124 |
+
# Create a unique identifier for the uploaded file
|
125 |
+
file_id = str(uuid.uuid4())
|
126 |
+
|
127 |
+
# Extract the original file extension
|
128 |
+
_, file_extension = os.path.splitext(file.filename)
|
129 |
+
|
130 |
+
# Check if the file has a valid audio extension
|
131 |
+
if not is_valid_audio(file_extension):
|
132 |
+
raise HTTPException(status_code=400, detail="Invalid audio file format")
|
133 |
+
|
134 |
+
|
135 |
+
# Construct the file paths with the original file extension
|
136 |
+
file_path = os.path.join("temp", file_id + file_extension)
|
137 |
+
|
138 |
+
|
139 |
+
with open(file_path, "wb") as audio_file:
|
140 |
+
audio_file.write(file.file.read())
|
141 |
+
# generate_output_audio(file_path, output_file_path)
|
142 |
+
output = await start_inference(model=model, parameters_file=parameters_file, file_id=file_id, file_extension=file_extension)
|
143 |
+
# Send a confirmation with the identifier
|
144 |
+
return {"file_path": SERVER+output[0], "csv_path": SERVER+output[1], "output_file_path": SERVER+output[2]}
|
145 |
+
|
146 |
+
except Exception as e:
|
147 |
+
raise HTTPException(status_code=500, detail=str(e))
|
148 |
+
|
149 |
+
async def start_inference(model, parameters_file, file_id: str, file_extension : str):
|
150 |
+
file_path = os.path.join("temp", file_id + file_extension)
|
151 |
+
|
152 |
+
output = inferrence(model=model, parameters_file=parameters_file, file_path=file_path, file_id=file_id)
|
153 |
+
|
154 |
+
return output
|
155 |
+
|
156 |
+
|
157 |
+
origins = ["*"]
|
158 |
+
app.add_middleware(
|
159 |
+
CORSMiddleware,
|
160 |
+
allow_origins=origins,
|
161 |
+
allow_credentials=True,
|
162 |
+
allow_methods=origins,
|
163 |
+
allow_headers=origins,
|
164 |
+
)
|
165 |
+
|
166 |
+
app.mount(
|
167 |
+
"/temp", StaticFiles(directory="temp", check_dir=True, html=True), name="temp"
|
168 |
+
)
|
169 |
+
app.mount(
|
170 |
+
"/",
|
171 |
+
SPAStaticFiles(directory=f"{pathlib.PurePath(str_p).parent}/front/dist", html=True),
|
172 |
+
name="dist",
|
173 |
+
)
|
174 |
+
|
175 |
+
|
176 |
+
if __name__ == "__main__":
|
177 |
+
import uvicorn
|
178 |
+
|
179 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
back/models/__init__.py
ADDED
File without changes
|
back/models/app.py
ADDED
@@ -0,0 +1,645 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import datetime
|
2 |
+
import json
|
3 |
+
import logging
|
4 |
+
import os
|
5 |
+
from pickle import load
|
6 |
+
from typing import Callable, List
|
7 |
+
import librosa
|
8 |
+
import numpy as np
|
9 |
+
import pandas as pd
|
10 |
+
import tensorflow as tf
|
11 |
+
from tensorflow import keras
|
12 |
+
from keras import backend as K
|
13 |
+
from keras.callbacks import CSVLogger
|
14 |
+
from kapre.time_frequency import Spectrogram
|
15 |
+
from models.importer_audio import audio_importer
|
16 |
+
import dawdreamer as daw
|
17 |
+
from scipy.io import wavfile
|
18 |
+
import librosa
|
19 |
+
|
20 |
+
|
21 |
+
from generators.parameters import ParameterSet, ParamValue
|
22 |
+
from models.common.data_generator import SoundDataGenerator
|
23 |
+
|
24 |
+
|
25 |
+
weight_var = K.variable(0.)
|
26 |
+
|
27 |
+
class Weight_trans(keras.callbacks.Callback):
|
28 |
+
def __init__(self, weight_var, transition, epochs):
|
29 |
+
self.alpha = weight_var
|
30 |
+
self.transition = transition
|
31 |
+
self.epochs = epochs
|
32 |
+
def on_epoch_end(self, epoch, logs={}):
|
33 |
+
if epoch > 680:
|
34 |
+
if self.transition == "linear":
|
35 |
+
K.set_value(self.alpha, ((epoch)/(self.epochs) - 0.617)*0.00001)
|
36 |
+
tf.print(f"new weight {weight_var.numpy()}")
|
37 |
+
if self.transition == "linear2":
|
38 |
+
K.set_value(self.alpha, (1.5625*epoch - 1.0625)*0.00001)
|
39 |
+
tf.print(f"new weight {weight_var.numpy()}")
|
40 |
+
if self.transition == "log":
|
41 |
+
K.set_value(self.alpha, (1- (tf.math.log(epoch*0.001 - 0.67285)/tf.math.log(0.0005)) - 0.35)*0.00001)
|
42 |
+
tf.print("log")
|
43 |
+
if self.transition == "log2":
|
44 |
+
K.set_value(self.alpha, (1- (tf.math.log(epoch*0.001 - 0.6575)/tf.math.log(0.0005)) - 0.5)*0.00001)
|
45 |
+
tf.print("log")
|
46 |
+
if self.transition == "log3":
|
47 |
+
K.set_value(self.alpha, (1- (tf.math.log(epoch*0.001 - 0.67978)/tf.math.log(0.00000005)) - 0.5)*0.00001)
|
48 |
+
tf.print("log")
|
49 |
+
if self.transition == "square":
|
50 |
+
K.set_value(self.alpha, 4.1*tf.pow(epoch*0.001 - 0.65, 2) + 0.002)
|
51 |
+
print("exp")
|
52 |
+
if self.transition == "quad":
|
53 |
+
K.set_value(self.alpha, 33*tf.pow(epoch*0.001 - 0.65, 4) + 0.002)
|
54 |
+
print("quad")
|
55 |
+
|
56 |
+
|
57 |
+
def train_val_split(
|
58 |
+
x_train: np.ndarray, y_train: np.ndarray, split: float = 0.2,
|
59 |
+
) -> tuple:
|
60 |
+
|
61 |
+
slice: int = int(x_train.shape[0] * split)
|
62 |
+
|
63 |
+
x_val: np.ndarray = x_train[-slice:]
|
64 |
+
y_val: np.ndarray = y_train[-slice:]
|
65 |
+
|
66 |
+
x_train = x_train[:-slice]
|
67 |
+
y_train = y_train[:-slice]
|
68 |
+
|
69 |
+
return (x_val, y_val, x_train, y_train)
|
70 |
+
|
71 |
+
|
72 |
+
"""Model Utils"""
|
73 |
+
|
74 |
+
|
75 |
+
def mean_percentile_rank(y_true, y_pred, k=5):
|
76 |
+
"""
|
77 |
+
@paper
|
78 |
+
The first evaluation measure is the Mean Percentile Rank
|
79 |
+
(MPR) which is computed per synthesizer parameter.
|
80 |
+
"""
|
81 |
+
# TODO
|
82 |
+
|
83 |
+
|
84 |
+
def top_k_mean_accuracy(y_true, y_pred, k=5):
|
85 |
+
"""
|
86 |
+
@ paper
|
87 |
+
The top-k mean accuracy is obtained by computing the top-k
|
88 |
+
accuracy for each test example and then taking the mean across
|
89 |
+
all examples. In the same manner as done in the MPR analysis,
|
90 |
+
we compute the top-k mean accuracy per synthesizer
|
91 |
+
parameter for 𝑘 = 1, ... ,5.
|
92 |
+
"""
|
93 |
+
# TODO: per parameter?
|
94 |
+
original_shape = tf.shape(y_true)
|
95 |
+
y_true = tf.reshape(y_true, (-1, tf.shape(y_true)[-1]))
|
96 |
+
y_pred = tf.reshape(y_pred, (-1, tf.shape(y_pred)[-1]))
|
97 |
+
top_k = K.in_top_k(y_pred, tf.cast(tf.argmax(y_true, axis=-1), "int32"), k)
|
98 |
+
correct_pred = tf.reshape(top_k, original_shape[:-1])
|
99 |
+
return tf.reduce_mean(tf.cast(correct_pred, tf.float32))
|
100 |
+
|
101 |
+
@tf.function
|
102 |
+
def CustomLoss(y_true, y_pred):
|
103 |
+
bce = tf.keras.losses.BinaryCrossentropy()
|
104 |
+
weights = custom_spectral_loss(y_true, y_pred)
|
105 |
+
weight_shift = (1-weight_var.numpy())+(weight_var.numpy()*weights.numpy())
|
106 |
+
# tf.print(f"New weight is {weight_shift}")
|
107 |
+
loss = bce(y_true, y_pred, sample_weight=weight_shift)
|
108 |
+
return loss
|
109 |
+
|
110 |
+
@tf.function
|
111 |
+
def custom_spectral_loss(y_true, y_pred):
|
112 |
+
# tf.print("After compiling model :",tf.executing_eagerly())
|
113 |
+
|
114 |
+
y_true = tf.reshape(y_true, (-1, tf.shape(y_true)[-1]))
|
115 |
+
y_pred = tf.reshape(y_pred, (-1, tf.shape(y_pred)[-1]))
|
116 |
+
|
117 |
+
|
118 |
+
# Assuming y_true and y_pred contain parameters for audio synthesis
|
119 |
+
# Extract parameters from y_true and y_pred
|
120 |
+
with open("test_datasets/InverSynth_params.pckl", "rb") as f:
|
121 |
+
parameters: ParameterSet = load(f)
|
122 |
+
|
123 |
+
predlist_true: List[ParamValue] = parameters.decode(y_true[0])
|
124 |
+
|
125 |
+
predlist_pred: List[ParamValue] = parameters.decode(y_pred[0])
|
126 |
+
|
127 |
+
# Convert parameter lists to DataFrames
|
128 |
+
# Generate audio from parameters
|
129 |
+
audio_true, penalty = generate_audio(predlist_true)
|
130 |
+
audio_pred, penalty = generate_audio(predlist_pred)
|
131 |
+
|
132 |
+
# Compute spectrogram
|
133 |
+
if SPECTRO_TYPE == 'spectro':
|
134 |
+
spectrogram_true = tf.math.abs(tf.signal.stft(audio_true, frame_length=1024, frame_step=512))
|
135 |
+
spectrogram_pred = tf.math.abs(tf.signal.stft(audio_pred, frame_length=1024, frame_step=512))
|
136 |
+
elif SPECTRO_TYPE == 'qtrans':
|
137 |
+
spectrogram_true = librosa.amplitude_to_db(librosa.cqt(audio_true, sr=SAMPLE_RATE, hop_length=128), ref=np.max)
|
138 |
+
spectrogram_pred = librosa.amplitude_to_db(librosa.cqt(audio_pred, sr=SAMPLE_RATE, hop_length=128), ref=np.max)
|
139 |
+
elif SPECTRO_TYPE == 'mel':
|
140 |
+
mel_spect = librosa.feature.melspectrogram(audio_true, sr=SAMPLE_RATE, n_fft=2048, hop_length=1024)
|
141 |
+
spectrogram_true = librosa.power_to_db(mel_spect, ref=np.max)
|
142 |
+
mel_spect = librosa.feature.melspectrogram(audio_pred, sr=SAMPLE_RATE, n_fft=2048, hop_length=1024)
|
143 |
+
spectrogram_pred = librosa.power_to_db(mel_spect, ref=np.max)
|
144 |
+
#L1 LOSS
|
145 |
+
if LOSS_TYPE == 'L1':
|
146 |
+
spectral_loss = penalty*tf.reduce_mean(tf.abs(spectrogram_true-spectrogram_pred))
|
147 |
+
#L2 LOSS
|
148 |
+
elif LOSS_TYPE =='L2':
|
149 |
+
spectral_loss = penalty*tf.reduce_mean((spectrogram_true - spectrogram_pred)**2)
|
150 |
+
#COSINE LOSS
|
151 |
+
elif LOSS_TYPE == 'COSINE':
|
152 |
+
spectral_loss = tf.losses.cosine_distance(spectrogram_true, spectrogram_pred, weights=1.0, axis=-1)
|
153 |
+
|
154 |
+
return spectral_loss
|
155 |
+
|
156 |
+
def summarize_compile(model: keras.Model):
|
157 |
+
model.summary(line_length=80, positions=[0.33, 0.65, 0.8, 1.0], show_trainable=True, expand_nested=True)
|
158 |
+
# Specify the training configuration (optimizer, loss, metrics)
|
159 |
+
model.compile(
|
160 |
+
optimizer=keras.optimizers.Adam(), # Optimizer- Adam [14] optimizer
|
161 |
+
# Loss function to minimize
|
162 |
+
# @paper: Therefore, we converged on using sigmoid activations with binary cross entropy loss.
|
163 |
+
# loss=keras.losses.BinaryCrossentropy(),
|
164 |
+
loss=CustomLoss,
|
165 |
+
# List of metrics to monitor
|
166 |
+
metrics=[
|
167 |
+
# @paper: 1) Mean Percentile Rank?
|
168 |
+
# mean_percentile_rank,
|
169 |
+
# @paper: 2) Top-k mean accuracy based evaluation
|
170 |
+
top_k_mean_accuracy,
|
171 |
+
custom_spectral_loss,
|
172 |
+
# Extra Adding 3) spectroloss accuracy
|
173 |
+
# Extra Adding 4) combined
|
174 |
+
# @paper: 5) Mean Absolute Error based evaluation
|
175 |
+
keras.metrics.MeanAbsoluteError(),
|
176 |
+
],
|
177 |
+
)
|
178 |
+
|
179 |
+
def fit(
|
180 |
+
model: keras.Model,
|
181 |
+
x_train: np.ndarray,
|
182 |
+
y_train: np.ndarray,
|
183 |
+
x_val: np.ndarray,
|
184 |
+
y_val: np.ndarray,
|
185 |
+
batch_size: int = 16,
|
186 |
+
epochs: int = 200,
|
187 |
+
) -> keras.Model:
|
188 |
+
|
189 |
+
# @paper:
|
190 |
+
# with a minibatch size of 16 for
|
191 |
+
# 100 epochs. The best weights for each model were set by
|
192 |
+
# employing an early stopping procedure.
|
193 |
+
logging.info("# Fit model on training data")
|
194 |
+
history = model.fit(
|
195 |
+
x_train,
|
196 |
+
y_train,
|
197 |
+
batch_size=batch_size,
|
198 |
+
epochs=epochs,
|
199 |
+
# @paper:
|
200 |
+
# Early stopping procedure:
|
201 |
+
# We pass some validation for
|
202 |
+
# monitoring validation loss and metrics
|
203 |
+
# at the end of each epoch
|
204 |
+
validation_data=(x_val, y_val),
|
205 |
+
verbose=0,
|
206 |
+
)
|
207 |
+
|
208 |
+
# The returned "history" object holds a record
|
209 |
+
# of the loss values and metric values during training
|
210 |
+
logging.info("\nhistory dict:", history.history)
|
211 |
+
|
212 |
+
return model
|
213 |
+
|
214 |
+
|
215 |
+
def compare(target, prediction, params, precision=1, print_output=False):
|
216 |
+
if print_output and len(prediction) < 10:
|
217 |
+
print(prediction)
|
218 |
+
print("Pred: {}".format(np.round(prediction, decimals=2)))
|
219 |
+
print("PRnd: {}".format(np.round(prediction)))
|
220 |
+
print("Act : {}".format(target))
|
221 |
+
print("+" * 5)
|
222 |
+
|
223 |
+
pred: List[ParamValue] = params.decode(prediction)
|
224 |
+
act: List[ParamValue] = params.decode(target)
|
225 |
+
pred_index: List[int] = [np.array(p.encoding).argmax() for p in pred]
|
226 |
+
act_index: List[int] = [np.array(p.encoding).argmax() for p in act]
|
227 |
+
width = 8
|
228 |
+
names = "Parameter: "
|
229 |
+
act_s = "Actual: "
|
230 |
+
pred_s = "Predicted: "
|
231 |
+
pred_i = "Pred. Indx:"
|
232 |
+
act_i = "Act. Index:"
|
233 |
+
diff_i = "Index Diff:"
|
234 |
+
for p in act:
|
235 |
+
names += p.name.rjust(width)[:width]
|
236 |
+
act_s += f"{p.value:>8.2f}"
|
237 |
+
for p in pred:
|
238 |
+
pred_s += f"{p.value:>8.2f}"
|
239 |
+
for p in pred_index:
|
240 |
+
pred_i += f"{p:>8}"
|
241 |
+
for p in act_index:
|
242 |
+
act_i += f"{p:>8}"
|
243 |
+
for i in range(len(act_index)):
|
244 |
+
diff = pred_index[i] - act_index[i]
|
245 |
+
diff_i += f"{diff:>8}"
|
246 |
+
exact = 0.0
|
247 |
+
close = 0.0
|
248 |
+
n_params = len(pred_index)
|
249 |
+
for i in range(n_params):
|
250 |
+
if pred_index[i] == act_index[i]:
|
251 |
+
exact = exact + 1.0
|
252 |
+
if abs(pred_index[i] - act_index[i]) <= precision:
|
253 |
+
close = close + 1.0
|
254 |
+
exact_ratio = exact / n_params
|
255 |
+
close_ratio = close / n_params
|
256 |
+
if print_output:
|
257 |
+
print(names)
|
258 |
+
print(act_s)
|
259 |
+
print(pred_s)
|
260 |
+
print(act_i)
|
261 |
+
print(pred_i)
|
262 |
+
print(diff_i)
|
263 |
+
print("-" * 30)
|
264 |
+
return exact_ratio, close_ratio
|
265 |
+
|
266 |
+
|
267 |
+
def evaluate(
|
268 |
+
prediction: np.ndarray, x: np.ndarray, y: np.ndarray, params: ParameterSet,
|
269 |
+
):
|
270 |
+
|
271 |
+
print("Prediction Shape: {}".format(prediction.shape))
|
272 |
+
|
273 |
+
num: int = x.shape[0]
|
274 |
+
correct: int = 0
|
275 |
+
correct_r: float = 0.0
|
276 |
+
close_r: float = 0.0
|
277 |
+
for i in range(num):
|
278 |
+
should_print = i < 5
|
279 |
+
exact, close = compare(
|
280 |
+
target=y[i],
|
281 |
+
prediction=prediction[i],
|
282 |
+
params=params,
|
283 |
+
print_output=should_print,
|
284 |
+
)
|
285 |
+
if exact == 1.0:
|
286 |
+
correct = correct + 1
|
287 |
+
correct_r += exact
|
288 |
+
close_r += close
|
289 |
+
summary = params.explain()
|
290 |
+
print(
|
291 |
+
"{} Parameters with {} levels (fixed: {})".format(
|
292 |
+
summary["n_variable"], summary["levels"], summary["n_fixed"]
|
293 |
+
)
|
294 |
+
)
|
295 |
+
print(
|
296 |
+
"Got {} out of {} ({:.1f}% perfect); Exact params: {:.1f}%, Close params: {:.1f}%".format(
|
297 |
+
correct,
|
298 |
+
num,
|
299 |
+
correct / num * 100,
|
300 |
+
correct_r / num * 100,
|
301 |
+
close_r / num * 100,
|
302 |
+
)
|
303 |
+
)
|
304 |
+
|
305 |
+
|
306 |
+
def data_format_audio(audio: np.ndarray, data_format: str) -> np.ndarray:
|
307 |
+
# `(None, n_channel, n_freq, n_time)` if `'channels_first'`,
|
308 |
+
# `(None, n_freq, n_time, n_channel)` if `'channels_last'`,
|
309 |
+
|
310 |
+
if data_format == "channels_last":
|
311 |
+
audio = audio[np.newaxis, :, np.newaxis]
|
312 |
+
else:
|
313 |
+
audio = audio[np.newaxis, np.newaxis, :]
|
314 |
+
|
315 |
+
return audio
|
316 |
+
|
317 |
+
|
318 |
+
"""
|
319 |
+
Wrap up the whole training process in a standard function. Gets a callback
|
320 |
+
to actually make the model, to keep it as flexible as possible.
|
321 |
+
# Params:
|
322 |
+
# - dataset_name (dataset name)
|
323 |
+
# - model_name: (C1..C6,e2e)
|
324 |
+
# - model_callback: function taking name,inputs,outputs,data_format and returning a Keras model
|
325 |
+
# - epochs: int
|
326 |
+
# - dataset_dir: place to find input data
|
327 |
+
# - output_dir: place to put outputs
|
328 |
+
# - parameters_file (override parameters filename)
|
329 |
+
# - dataset_file (override dataset filename)
|
330 |
+
# - data_format (channels_first or channels_last)
|
331 |
+
# - run_name: to save this run as
|
332 |
+
"""
|
333 |
+
#LOSS TYPE FOR CUSTOM LOSS FUNCTION
|
334 |
+
LOSS_TYPE = 'L1'
|
335 |
+
SPECTRO_TYPE = 'spectro'
|
336 |
+
PRINT = 1
|
337 |
+
|
338 |
+
#DAWDREAMER EXPORT SETTINGS
|
339 |
+
SAMPLE_RATE = 16384
|
340 |
+
BUFFER_SIZE = 1024
|
341 |
+
SYNTH_PLUGIN = "libTAL-NoiseMaker.so"
|
342 |
+
|
343 |
+
ENGINE = daw.RenderEngine(SAMPLE_RATE, BUFFER_SIZE)
|
344 |
+
SYNTH = ENGINE.make_plugin_processor("my_synth", SYNTH_PLUGIN)
|
345 |
+
SYNTH.add_midi_note(40, 127, 0, 0.8)
|
346 |
+
|
347 |
+
with open('plugin_config/TAL-NoiseMaker-config.json') as f:
|
348 |
+
data = json.load(f)
|
349 |
+
|
350 |
+
dico=[]
|
351 |
+
# Extract the key ID from the JSON data
|
352 |
+
key_id = data['parameters']
|
353 |
+
for param in key_id:
|
354 |
+
dico.append(param['id'])
|
355 |
+
|
356 |
+
DICO=dico
|
357 |
+
|
358 |
+
def train_model(
|
359 |
+
# Main options
|
360 |
+
dataset_name: str,
|
361 |
+
model_name: str,
|
362 |
+
epochs: int,
|
363 |
+
model_callback: Callable[[str, int, int, str], keras.Model],
|
364 |
+
dataset_dir: str,
|
365 |
+
output_dir: str, # Directory names
|
366 |
+
dataset_file: str = None,
|
367 |
+
parameters_file: str = None,
|
368 |
+
run_name: str = None,
|
369 |
+
data_format: str = "channels_last",
|
370 |
+
save_best: bool = True,
|
371 |
+
resume: bool = False,
|
372 |
+
checkpoint: bool = True,
|
373 |
+
model_type: str = "STFT",
|
374 |
+
):
|
375 |
+
|
376 |
+
tf.config.run_functions_eagerly(True)
|
377 |
+
# tf.data.experimental.enable_debug_mode()
|
378 |
+
time_generated = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
|
379 |
+
if not dataset_file:
|
380 |
+
dataset_file = (
|
381 |
+
os.getcwd() + "/" + dataset_dir + "/" + dataset_name + "_data.hdf5"
|
382 |
+
)
|
383 |
+
if not parameters_file:
|
384 |
+
parameters_file = (
|
385 |
+
os.getcwd() + "/" + dataset_dir + "/" + dataset_name + "_params.pckl"
|
386 |
+
)
|
387 |
+
if not run_name:
|
388 |
+
run_name = dataset_name + "_" + model_name
|
389 |
+
|
390 |
+
model_file = f"{output_dir}/model/{run_name}_{time_generated}"
|
391 |
+
if not os.path.exists(model_file):
|
392 |
+
os.makedirs(model_file)
|
393 |
+
best_model_file = f"{output_dir}/best_checkpoint/{run_name}_best_{time_generated}"
|
394 |
+
if not os.path.exists(best_model_file):
|
395 |
+
os.makedirs(best_model_file)
|
396 |
+
if resume:
|
397 |
+
|
398 |
+
# checkpoint_model_file = f"{output_dir}/{run_name}_checkpoint_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}"
|
399 |
+
# history_file = f"{output_dir}/{run_name}_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}"
|
400 |
+
checkpoint_model_file = f"{output_dir}/history/InverSynth_C6XL_checkpoint_20231201-103344"
|
401 |
+
history_file = f"{output_dir}/checkpoints/InverSynth_C6XL_20231201-103344"
|
402 |
+
else:
|
403 |
+
os.makedirs(f"{output_dir}/history", exist_ok=True)
|
404 |
+
os.makedirs(f"{output_dir}/checkpoints", exist_ok=True)
|
405 |
+
history_file = f"{output_dir}/history/{run_name}_{time_generated}"
|
406 |
+
checkpoint_model_file = f"{output_dir}/checkpoints/{run_name}_checkpoint_{time_generated}"
|
407 |
+
|
408 |
+
history_graph_file = f"{output_dir}/{run_name}.pdf"
|
409 |
+
print(tf.config.list_physical_devices('GPU'))
|
410 |
+
gpu_avail = len(tf.config.list_physical_devices('GPU')) # True/False
|
411 |
+
cuda_gpu_avail = len(tf.config.list_physical_devices('GPU')) # True/False
|
412 |
+
|
413 |
+
print("+" * 30)
|
414 |
+
print(f"++ {run_name}")
|
415 |
+
print(
|
416 |
+
f"Running model: {model_name} on dataset {dataset_file} (parameters {parameters_file}) for {epochs} epochs"
|
417 |
+
)
|
418 |
+
print(f"Saving model in {output_dir} as {model_file}")
|
419 |
+
print(f"Saving history as {history_file}")
|
420 |
+
print(f"GPU: {gpu_avail}, with CUDA: {cuda_gpu_avail}")
|
421 |
+
print("+" * 30)
|
422 |
+
|
423 |
+
os.makedirs(output_dir, exist_ok=True)
|
424 |
+
|
425 |
+
# Get training and validation generators
|
426 |
+
params = {"data_file": dataset_file, "batch_size": 64, "shuffle": True}
|
427 |
+
training_generator = SoundDataGenerator(first=0.8, **params)
|
428 |
+
validation_generator = SoundDataGenerator(last=0.2, **params)
|
429 |
+
n_samples = training_generator.get_audio_length()
|
430 |
+
print(f"get_audio_length: {n_samples}")
|
431 |
+
n_outputs = training_generator.get_label_size()
|
432 |
+
|
433 |
+
# set keras image_data_format
|
434 |
+
# NOTE: on CPU only `channels_last` is supported
|
435 |
+
physical_devices = tf.config.list_physical_devices('GPU')
|
436 |
+
|
437 |
+
keras.backend.set_image_data_format(data_format)
|
438 |
+
|
439 |
+
model: keras.Model = None
|
440 |
+
if resume and os.path.exists(checkpoint_model_file):
|
441 |
+
history = pd.read_csv(history_file)
|
442 |
+
# Note - its zero indexed in the file, but 1 indexed in the display
|
443 |
+
initial_epoch: int = max(history.iloc[:, 0]) + 1
|
444 |
+
# epochs:int = initial_epoch
|
445 |
+
print(
|
446 |
+
f"Resuming from model file: {checkpoint_model_file} after epoch {initial_epoch}"
|
447 |
+
)
|
448 |
+
model = keras.models.load_model(
|
449 |
+
checkpoint_model_file
|
450 |
+
,
|
451 |
+
custom_objects={"top_k_mean_accuracy": top_k_mean_accuracy, "Spectrogram" : Spectrogram,
|
452 |
+
"custom_spectral_loss": custom_spectral_loss, "CustomLoss": CustomLoss
|
453 |
+
},
|
454 |
+
)
|
455 |
+
else:
|
456 |
+
model = model_callback(
|
457 |
+
model_name=model_name,
|
458 |
+
inputs=n_samples,
|
459 |
+
outputs=n_outputs,
|
460 |
+
data_format=data_format,
|
461 |
+
)
|
462 |
+
# keras.utils.plot_model(model, to_file='model.png', show_shapes=True, show_layer_activations=True)
|
463 |
+
# Summarize and compile the model
|
464 |
+
summarize_compile(model)
|
465 |
+
initial_epoch = 0
|
466 |
+
open(history_file, "w").close()
|
467 |
+
|
468 |
+
callbacks = []
|
469 |
+
best_callback = keras.callbacks.ModelCheckpoint(
|
470 |
+
filepath=best_model_file,
|
471 |
+
save_weights_only=False,
|
472 |
+
save_best_only=True,
|
473 |
+
verbose=1,
|
474 |
+
)
|
475 |
+
checkpoint_callback = keras.callbacks.ModelCheckpoint(
|
476 |
+
filepath=checkpoint_model_file,
|
477 |
+
save_weights_only=False,
|
478 |
+
save_best_only=False,
|
479 |
+
verbose=1,
|
480 |
+
)
|
481 |
+
os.makedirs(f"{output_dir}/logs", exist_ok=True)
|
482 |
+
log_dir = f"{output_dir}/logs/" + time_generated
|
483 |
+
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, write_graph=True, write_images=True, profile_batch = '500,520')
|
484 |
+
|
485 |
+
if save_best:
|
486 |
+
callbacks.append(best_callback)
|
487 |
+
if checkpoint:
|
488 |
+
callbacks.append(checkpoint_callback)
|
489 |
+
callbacks.append(tensorboard_callback)
|
490 |
+
callbacks.append(CSVLogger(history_file, append=True))
|
491 |
+
callbacks.append(Weight_trans(weight_var, "log3" ,epochs))
|
492 |
+
# Parameter data - needed for decoding!
|
493 |
+
|
494 |
+
# Fit the model
|
495 |
+
history = None
|
496 |
+
try:
|
497 |
+
history = model.fit(
|
498 |
+
x=training_generator,
|
499 |
+
validation_data=validation_generator,
|
500 |
+
epochs=epochs,
|
501 |
+
callbacks=callbacks,
|
502 |
+
initial_epoch=initial_epoch,
|
503 |
+
verbose=1, # https://github.com/tensorflow/tensorflow/issues/38064
|
504 |
+
)
|
505 |
+
except Exception as e:
|
506 |
+
print(f"Something went wrong during `model.fit`: {e}")
|
507 |
+
|
508 |
+
# Save model
|
509 |
+
model.save(model_file)
|
510 |
+
|
511 |
+
# Save history
|
512 |
+
if history and not resume:
|
513 |
+
try:
|
514 |
+
hist_df = pd.DataFrame(history.history)
|
515 |
+
try:
|
516 |
+
fig = hist_df.plot(subplots=True, figsize=(8, 25))
|
517 |
+
fig[0].get_figure().savefig(history_graph_file)
|
518 |
+
except Exception as e:
|
519 |
+
print("Couldn't create history graph")
|
520 |
+
print(e)
|
521 |
+
|
522 |
+
except Exception as e:
|
523 |
+
tf.print("Couldn't save history")
|
524 |
+
print(e)
|
525 |
+
|
526 |
+
# evaluate prediction on random sample from validation set
|
527 |
+
# Parameter data - needed for decoding!
|
528 |
+
with open(parameters_file, "rb") as f:
|
529 |
+
parameters: ParameterSet = load(f)
|
530 |
+
|
531 |
+
# Shuffle data
|
532 |
+
validation_generator.on_epoch_end()
|
533 |
+
X, y = validation_generator.__getitem__(0)
|
534 |
+
X.reshape((X.__len__(), 1, 16384))
|
535 |
+
|
536 |
+
# if model_type == "STFT":
|
537 |
+
# # stft expects shape (channel, sample_rate)
|
538 |
+
# X = np.moveaxis(X, 1, -1)
|
539 |
+
prediction: np.ndarray = model.predict(X)
|
540 |
+
evaluate(prediction, X, y, parameters)
|
541 |
+
|
542 |
+
print("++++" * 5)
|
543 |
+
print("Pushing to trained model")
|
544 |
+
print("++++" * 5)
|
545 |
+
|
546 |
+
Valid=False
|
547 |
+
while Valid==False:
|
548 |
+
file = namefile = input("Enter .wav test file path: ")
|
549 |
+
if os.path.exists(file):
|
550 |
+
Valid=True
|
551 |
+
else:
|
552 |
+
print("File Path invalid, try again ")
|
553 |
+
|
554 |
+
newpred = model.predict(audio_importer(str(f'{namefile}')))
|
555 |
+
predlist: List[ParamValue] = parameters.decode(newpred[0])
|
556 |
+
df = pd.DataFrame(predlist)
|
557 |
+
|
558 |
+
print(df)
|
559 |
+
df = df.drop(['encoding'], axis=1)
|
560 |
+
# saving the dataframe
|
561 |
+
if not os.path.exists(str(f'output/wav_inferred')):
|
562 |
+
os.makedirs(str(f'output/wav_inferred'))
|
563 |
+
head, tail = os.path.split(str(f'{namefile}'))
|
564 |
+
print("Outputting CSV config in " + str(f'output/wav_inferred'))
|
565 |
+
df.to_csv(str(f'output/wav_inferred/{tail}.csv'))
|
566 |
+
#export(prediction, X, y, parameters)
|
567 |
+
# Loop through the rows of the DataFrame
|
568 |
+
i = 0
|
569 |
+
for values in df['value'].values:
|
570 |
+
# Set parameters using DataFrame values
|
571 |
+
SYNTH.set_parameter(DICO[i],values)
|
572 |
+
# (MIDI note, velocity, start, duration)
|
573 |
+
i += 1
|
574 |
+
#Setting volume to 0.9
|
575 |
+
SYNTH.set_parameter(1, 0.9)
|
576 |
+
# Set up the processing graph
|
577 |
+
graph = [
|
578 |
+
# synth takes no inputs, so we give an empty list.
|
579 |
+
(SYNTH, []),
|
580 |
+
]
|
581 |
+
|
582 |
+
ENGINE.load_graph(graph)
|
583 |
+
ENGINE.render(1)
|
584 |
+
data = ENGINE.get_audio()
|
585 |
+
try:
|
586 |
+
data = librosa.to_mono(data).transpose()
|
587 |
+
except:
|
588 |
+
tf.print("ERROR" * 100)
|
589 |
+
df = df.fillna(0)
|
590 |
+
data = df.to_numpy()
|
591 |
+
data = librosa.to_mono(data).transpose()
|
592 |
+
tf.print("crashed, nan in generation")
|
593 |
+
synth_params = dict(SYNTH.get_patch())
|
594 |
+
print(synth_params)
|
595 |
+
|
596 |
+
df = pd.DataFrame(data)
|
597 |
+
|
598 |
+
# penalty=1000000
|
599 |
+
# df = pd.DataFrame(data)
|
600 |
+
# df = df.fillna(0)
|
601 |
+
# data = df.to_numpy()
|
602 |
+
|
603 |
+
|
604 |
+
wavfile.write(str(f'output/wav_inferred/gen_{tail}.wav'), SAMPLE_RATE, data)
|
605 |
+
|
606 |
+
def generate_audio(df_params):
|
607 |
+
|
608 |
+
|
609 |
+
# Loop through the rows of the DataFrame
|
610 |
+
i = 0
|
611 |
+
penalty=1
|
612 |
+
for param in df_params:
|
613 |
+
# Set parameters using DataFrame values
|
614 |
+
SYNTH.set_parameter(DICO[i], param.value)
|
615 |
+
# (MIDI note, velocity, start, duration)
|
616 |
+
i += 1
|
617 |
+
# Set up the processing graph
|
618 |
+
graph = [
|
619 |
+
# synth takes no inputs, so we give an empty list.
|
620 |
+
(SYNTH, []),
|
621 |
+
]
|
622 |
+
|
623 |
+
ENGINE.load_graph(graph)
|
624 |
+
ENGINE.render(1)
|
625 |
+
data = ENGINE.get_audio()
|
626 |
+
if np.isnan(data).any():
|
627 |
+
|
628 |
+
# df = pd.DataFrame(data)
|
629 |
+
# df = df.fillna(0)
|
630 |
+
# data = df.to_numpy()
|
631 |
+
|
632 |
+
tf.print("crashed, nan in generation")
|
633 |
+
synth_params = dict(SYNTH.get_patch())
|
634 |
+
print(synth_params)
|
635 |
+
try:
|
636 |
+
data = librosa.to_mono(data).transpose()
|
637 |
+
if(librosa.util.valid_audio(data)):
|
638 |
+
result = np.array(data)
|
639 |
+
return result, penalty
|
640 |
+
except:
|
641 |
+
tf.print("crashed, nan in generation")
|
642 |
+
raise("Nan in generation, crashed")
|
643 |
+
|
644 |
+
|
645 |
+
|
back/models/common/__init__.py
ADDED
File without changes
|
back/models/common/architectures.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
|
3 |
+
|
4 |
+
# Model architectures
|
5 |
+
@dataclass
|
6 |
+
class C:
|
7 |
+
filters: int
|
8 |
+
window_size: tuple
|
9 |
+
strides: tuple
|
10 |
+
activation: str = "relu"
|
11 |
+
|
12 |
+
|
13 |
+
"""Conv 1 (2 Layers)"""
|
14 |
+
c1: C = C(38, (13, 26), (13, 26))
|
15 |
+
c1_layers: list = [c1]
|
16 |
+
|
17 |
+
"""Conv 2 (3 Layers)"""
|
18 |
+
c2_layers: list = [C(35, (6, 7), (5, 6)), C(87, (6, 9), (5, 8))]
|
19 |
+
|
20 |
+
"""Conv 3 (4 Layers)"""
|
21 |
+
c3_layers: list = [C(32, (4, 5), (3, 4)), C(98, (4, 6), (3, 5)), C(128, (4, 6), (3, 5))]
|
22 |
+
|
23 |
+
"""Conv 4 (5 Layers)"""
|
24 |
+
c4_layers: list = [
|
25 |
+
C(32, (3, 4), (2, 3)),
|
26 |
+
C(65, (3, 4), (2, 3)),
|
27 |
+
C(105, (3, 4), (2, 3)),
|
28 |
+
C(128, (4, 5), (3, 4)),
|
29 |
+
]
|
30 |
+
|
31 |
+
"""Conv 5 (6 Layers)"""
|
32 |
+
c5_layers: list = [
|
33 |
+
C(32, (3, 3), (2, 2)),
|
34 |
+
C(98, (3, 3), (2, 2)),
|
35 |
+
C(128, (3, 4), (2, 3)),
|
36 |
+
C(128, (3, 5), (2, 4)),
|
37 |
+
C(128, (3, 3), (2, 2)),
|
38 |
+
]
|
39 |
+
|
40 |
+
"""Conv 6 (7 Layers)"""
|
41 |
+
c6_layers: list = [
|
42 |
+
C(32, (3, 3), (2, 2)),
|
43 |
+
C(71, (3, 3), (2, 2)),
|
44 |
+
C(128, (3, 4), (2, 3)),
|
45 |
+
C(128, (3, 3), (2, 2)),
|
46 |
+
C(128, (3, 3), (2, 2)),
|
47 |
+
C(128, (3, 3), (1, 2)),
|
48 |
+
]
|
49 |
+
|
50 |
+
"""Conv 6XL, 7 Layers"""
|
51 |
+
c6XL_layers: list = [
|
52 |
+
C(64, (3, 3), (2, 2)),
|
53 |
+
C(128, (3, 3), (2, 2)),
|
54 |
+
C(128, (3, 4), (2, 3)),
|
55 |
+
C(128, (3, 3), (2, 2)),
|
56 |
+
C(256, (3, 3), (2, 2)),
|
57 |
+
C(256, (3, 3), (1, 2)),
|
58 |
+
]
|
59 |
+
|
60 |
+
|
61 |
+
layers_map: dict = {
|
62 |
+
"C1": c1_layers,
|
63 |
+
"C2": c2_layers,
|
64 |
+
"C3": c3_layers,
|
65 |
+
"C4": c4_layers,
|
66 |
+
"C5": c5_layers,
|
67 |
+
"C6": c6_layers,
|
68 |
+
"C6XL": c6XL_layers,
|
69 |
+
}
|
back/models/common/data_generator.py
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import h5py
|
2 |
+
import numpy as np
|
3 |
+
from scipy.io import wavfile
|
4 |
+
from tensorflow import keras
|
5 |
+
|
6 |
+
|
7 |
+
class SoundDataGenerator(keras.utils.Sequence):
|
8 |
+
"Generates data for Keras"
|
9 |
+
|
10 |
+
def __init__(
|
11 |
+
self,
|
12 |
+
data_file=None,
|
13 |
+
batch_size=32,
|
14 |
+
n_samps=16384,
|
15 |
+
shuffle=True,
|
16 |
+
last: float = 0.0,
|
17 |
+
first: float = 0.0,
|
18 |
+
channels_last=False,
|
19 |
+
for_autoencoder=False,
|
20 |
+
):
|
21 |
+
"Initialization"
|
22 |
+
self.dim = (1, n_samps)
|
23 |
+
self.batch_size = batch_size
|
24 |
+
self.shuffle = shuffle
|
25 |
+
self.data_file = data_file
|
26 |
+
self.n_channels = 1
|
27 |
+
self.for_autoencoder = for_autoencoder
|
28 |
+
# For the E2E model, need to return channels last?
|
29 |
+
if channels_last:
|
30 |
+
self.expand_axis = 2
|
31 |
+
else:
|
32 |
+
self.expand_axis = 1
|
33 |
+
|
34 |
+
database = h5py.File(data_file, "r")
|
35 |
+
|
36 |
+
self.database = database
|
37 |
+
|
38 |
+
self.n_samps = self.read_file(0).shape[0]
|
39 |
+
print("N Samps in audio data: {}".format(self.n_samps))
|
40 |
+
|
41 |
+
# set up list of IDs from data files
|
42 |
+
n_points = len(database["files"])
|
43 |
+
self.list_IDs = range(len(database["files"]))
|
44 |
+
|
45 |
+
print(f"Number of examples in dataset: {len(self.list_IDs)}")
|
46 |
+
slice: int = 0
|
47 |
+
if last > 0.0:
|
48 |
+
slice = int(n_points * (1 - last))
|
49 |
+
self.list_IDs = self.list_IDs[slice:]
|
50 |
+
print(f"Taking Last N points: {len(self.list_IDs)}")
|
51 |
+
elif first > 0.0:
|
52 |
+
slice = int(n_points * first)
|
53 |
+
self.list_IDs = self.list_IDs[:slice]
|
54 |
+
print(f"Taking First N points: {len(self.list_IDs)}")
|
55 |
+
|
56 |
+
# set up label size from data files
|
57 |
+
self.label_size = len(database["labels"][0])
|
58 |
+
self.on_epoch_end()
|
59 |
+
|
60 |
+
def get_audio_length(self):
|
61 |
+
return self.n_samps
|
62 |
+
|
63 |
+
def get_label_size(self):
|
64 |
+
return self.label_size
|
65 |
+
|
66 |
+
def __len__(self):
|
67 |
+
"Denotes the number of batches per epoch"
|
68 |
+
return int(np.floor(len(self.list_IDs) / self.batch_size))
|
69 |
+
|
70 |
+
def __getitem__(self, index):
|
71 |
+
"Generate one batch of data"
|
72 |
+
# Generate indexes of the batch
|
73 |
+
indexes = self.indexes[index * self.batch_size : (index + 1) * self.batch_size]
|
74 |
+
|
75 |
+
# Find list of IDs
|
76 |
+
# list_IDs_temp = [self.list_IDs[k] for k in indexes]
|
77 |
+
|
78 |
+
# Generate data
|
79 |
+
X, y = self.__data_generation(indexes)
|
80 |
+
|
81 |
+
# print("Returning data! Got X: {}, y: {}".format(X.shape,y.shape))
|
82 |
+
return X, y
|
83 |
+
|
84 |
+
def on_epoch_end(self):
|
85 |
+
"Updates indexes after each epoch"
|
86 |
+
self.indexes = np.arange(len(self.list_IDs))
|
87 |
+
if self.shuffle is True:
|
88 |
+
np.random.shuffle(self.indexes)
|
89 |
+
|
90 |
+
# Think this makes things worse - fills up memory
|
91 |
+
# @lru_cache(maxsize=150000)
|
92 |
+
def read_file(self, index):
|
93 |
+
filename = self.database["files"][index]
|
94 |
+
fs, data = wavfile.read(filename)
|
95 |
+
return data
|
96 |
+
|
97 |
+
def __data_generation(self, list_IDs_temp):
|
98 |
+
# X : (n_samples, *dim, n_channels)
|
99 |
+
"Generates data containing batch_size samples"
|
100 |
+
# Initialization
|
101 |
+
# X = np.empty((self.batch_size, *self.dim))
|
102 |
+
# y = np.empty((self.batch_size), dtype=int)
|
103 |
+
|
104 |
+
# Generate data
|
105 |
+
X = []
|
106 |
+
y = []
|
107 |
+
for i in list_IDs_temp:
|
108 |
+
# Read labels
|
109 |
+
y.append(self.database["labels"][i])
|
110 |
+
# Load soundfile data
|
111 |
+
data = self.read_file(i)
|
112 |
+
if data.shape[0] > self.n_samps:
|
113 |
+
print(
|
114 |
+
"Warning - too many samples: {} > {}".format(
|
115 |
+
data.shape[0], self.n_samps
|
116 |
+
)
|
117 |
+
)
|
118 |
+
X.append(data[: self.n_samps])
|
119 |
+
Xd = np.expand_dims(np.vstack(X), axis=1)
|
120 |
+
# Xd = Xd.flatten()
|
121 |
+
Xd = Xd.reshape((X.__len__(), 1, self.n_samps))
|
122 |
+
yd = np.vstack(y)
|
123 |
+
|
124 |
+
if self.for_autoencoder:
|
125 |
+
return yd, yd
|
126 |
+
return Xd, yd
|
back/models/comparison.py
ADDED
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
import pickle
|
4 |
+
import re
|
5 |
+
|
6 |
+
import h5py
|
7 |
+
import numpy as np
|
8 |
+
from scipy.io import wavfile
|
9 |
+
from scipy.io.wavfile import write as write_wav
|
10 |
+
from tensorflow import keras
|
11 |
+
|
12 |
+
from generators.generator import InverSynthGenerator, SoundGenerator, VSTGenerator
|
13 |
+
from generators.parameters import ParameterSet
|
14 |
+
|
15 |
+
"""
|
16 |
+
This module generates comparisons - takes the original sound + params,
|
17 |
+
then generates a file with the predicted parameters
|
18 |
+
"""
|
19 |
+
|
20 |
+
|
21 |
+
def compare(
|
22 |
+
model: keras.Model,
|
23 |
+
generator: SoundGenerator,
|
24 |
+
parameters: ParameterSet,
|
25 |
+
orig_file: str,
|
26 |
+
output_dir: str,
|
27 |
+
orig_params,
|
28 |
+
length: float,
|
29 |
+
sample_rate: int,
|
30 |
+
extra: dict = {},
|
31 |
+
):
|
32 |
+
# (copy original file if given)
|
33 |
+
base_filename = orig_file.replace(".wav", "")
|
34 |
+
base_filename = re.sub(r".*/", "", base_filename)
|
35 |
+
copy_file: str = f"{output_dir}/{base_filename}_copy.wav"
|
36 |
+
regen_file: str = f"{output_dir}/{base_filename}_duplicate.wav"
|
37 |
+
reconstruct_file: str = f"{output_dir}/{base_filename}_reconstruct.wav"
|
38 |
+
print(f"Creating copy as {copy_file}")
|
39 |
+
|
40 |
+
# Load the wave file
|
41 |
+
fs, data = wavfile.read(orig_file)
|
42 |
+
# Copy original file to make sure
|
43 |
+
write_wav(copy_file, sample_rate, data)
|
44 |
+
|
45 |
+
# Decode original params, and regenerate output (make sure its correct)
|
46 |
+
orig = parameters.encoding_to_settings(orig_params)
|
47 |
+
generator.generate(orig, regen_file, length, sample_rate, extra)
|
48 |
+
|
49 |
+
# Run the wavefile into the model for prediction
|
50 |
+
X = [data]
|
51 |
+
Xd = np.expand_dims(np.vstack(X), axis=2)
|
52 |
+
# Get encoded parameters out of model
|
53 |
+
result = model.predict(Xd)[0]
|
54 |
+
|
55 |
+
# Decode prediction, and reconstruct output
|
56 |
+
predicted = parameters.encoding_to_settings(result)
|
57 |
+
generator.generate(predicted, reconstruct_file, length, sample_rate, extra)
|
58 |
+
|
59 |
+
|
60 |
+
def run_comparison(
|
61 |
+
model: keras.Model,
|
62 |
+
generator: SoundGenerator,
|
63 |
+
run_name: str,
|
64 |
+
indices=None,
|
65 |
+
num_samples=10,
|
66 |
+
data_dir="./test_datasets",
|
67 |
+
output_dir="./comparison",
|
68 |
+
length=1.0,
|
69 |
+
sample_rate=16384,
|
70 |
+
shuffle=True,
|
71 |
+
extra={},
|
72 |
+
):
|
73 |
+
# Figure out data file and params file from run name
|
74 |
+
data_file = f"{data_dir}/{run_name}_data.hdf5"
|
75 |
+
parameters_file = f"{data_dir}/{run_name}_params.pckl"
|
76 |
+
print(f"Reading parameters from {parameters_file}")
|
77 |
+
parameters = pickle.load(open(parameters_file, "rb"))
|
78 |
+
|
79 |
+
output_dir = f"{output_dir}/{run_name}/"
|
80 |
+
os.makedirs(output_dir, exist_ok=True)
|
81 |
+
|
82 |
+
database = h5py.File(data_file, "r")
|
83 |
+
|
84 |
+
if not indices:
|
85 |
+
ids = np.array(range(len(database["files"])))
|
86 |
+
if shuffle:
|
87 |
+
np.random.shuffle(ids)
|
88 |
+
indices = ids[0:num_samples]
|
89 |
+
|
90 |
+
# filename
|
91 |
+
for i in indices:
|
92 |
+
print("Looking at index: {}".format(i))
|
93 |
+
filename = database["files"][i]
|
94 |
+
labels = database["labels"][i]
|
95 |
+
compare(
|
96 |
+
model=model,
|
97 |
+
generator=generator,
|
98 |
+
parameters=parameters,
|
99 |
+
orig_file=filename,
|
100 |
+
output_dir=output_dir,
|
101 |
+
orig_params=labels,
|
102 |
+
length=length,
|
103 |
+
sample_rate=sample_rate,
|
104 |
+
extra=extra,
|
105 |
+
)
|
106 |
+
# Generate
|
107 |
+
|
108 |
+
|
109 |
+
if __name__ == "__main__":
|
110 |
+
|
111 |
+
note_length = 0.8
|
112 |
+
sample_rate = 16384
|
113 |
+
|
114 |
+
lokomotiv = True
|
115 |
+
fm = True
|
116 |
+
|
117 |
+
if lokomotiv:
|
118 |
+
from generators.vst_generator import *
|
119 |
+
|
120 |
+
run_name = "lokomotiv_full"
|
121 |
+
model_file = "output/lokomotiv_full_e2e_best.h5"
|
122 |
+
plugin = "/Library/Audio/Plug-Ins/VST/Lokomotiv.vst"
|
123 |
+
config_file = "plugin_config/lokomotiv.json"
|
124 |
+
generator = VSTGenerator(vst=plugin, sample_rate=sample_rate)
|
125 |
+
with open(config_file, "r") as f:
|
126 |
+
config = json.load(f)
|
127 |
+
|
128 |
+
model = keras.models.load_model(model_file)
|
129 |
+
run_comparison(
|
130 |
+
model,
|
131 |
+
generator,
|
132 |
+
run_name,
|
133 |
+
num_samples=100,
|
134 |
+
extra={"note_length": note_length, "config": config},
|
135 |
+
)
|
136 |
+
|
137 |
+
if fm:
|
138 |
+
from generators.fm_generator import *
|
139 |
+
|
140 |
+
run_name = "inversynth_full"
|
141 |
+
model_file = "output/inversynth_full_e2e_best.h5"
|
142 |
+
generator = InverSynthGenerator()
|
143 |
+
model = keras.models.load_model(model_file)
|
144 |
+
run_comparison(model, generator, run_name, num_samples=100)
|
back/models/convert_to_preset.py
ADDED
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import csv;
|
2 |
+
import xml.etree.ElementTree as ET
|
3 |
+
|
4 |
+
def convert_csv_to_preset(path: str, output_path: str):
|
5 |
+
|
6 |
+
daw_to_preset_og = {
|
7 |
+
'Master Volume': 'volume',
|
8 |
+
'Filter Type': 'filtertype',
|
9 |
+
'Filter Cutoff': 'cutoff',
|
10 |
+
'Filter Resonance': 'resonance',
|
11 |
+
'Filter Keyfollow': 'keyfollow',
|
12 |
+
'Filter Contour': 'filtercontour',
|
13 |
+
'Filter Attack': 'filterattack',
|
14 |
+
'Filter Decay': 'filterdecay',
|
15 |
+
'Filter Sustain': 'filtersustain',
|
16 |
+
'Filter Release': 'filterrelease',
|
17 |
+
'Amp Attack': 'ampattack',
|
18 |
+
'Amp Decay': 'ampdecay',
|
19 |
+
'Amp Sustain': 'ampsustain',
|
20 |
+
'Amp Release': 'amprelease',
|
21 |
+
'Osc 1 Volume': 'osc1volume',
|
22 |
+
'Osc 2 Volume': 'osc2volume',
|
23 |
+
'Osc 3 Volume': 'osc3volume',
|
24 |
+
'Osc Mastertune': 'oscmastertune',
|
25 |
+
'Osc 1 Tune': 'osc1tune',
|
26 |
+
'Osc 2 Tune': 'osc2tune',
|
27 |
+
'Osc 1 Fine Tune': 'osc1finetune',
|
28 |
+
'Osc 2 Fine Tune': 'osc2finetune',
|
29 |
+
'Osc 1 Waveform': 'osc1waveform',
|
30 |
+
'Osc 2 Waveform': 'osc2waveform',
|
31 |
+
'Osc Sync': 'oscsync',
|
32 |
+
'Lfo 1 Waveform': 'lfo1waveform',
|
33 |
+
'Lfo 2 Waveform': 'lfo2waveform',
|
34 |
+
'Lfo 1 Rate': 'lfo1rate',
|
35 |
+
'Lfo 2 Rate': 'lfo2rate',
|
36 |
+
'Lfo 1 Amount': 'lfo1amount',
|
37 |
+
'Lfo 2 Amount': 'lfo2amount',
|
38 |
+
'Lfo 1 Destination': 'lfo1destination',
|
39 |
+
'Lfo 2 Destination': 'lfo2destination',
|
40 |
+
'Lfo 1 Phase': 'lfo1phase',
|
41 |
+
'Lfo 2 Phase': 'lfo2phase',
|
42 |
+
'Osc 2 FM': 'osc2fm',
|
43 |
+
'Osc 2 Phase': 'osc2phase',
|
44 |
+
'Osc 1 PW': 'osc1pw',
|
45 |
+
'Osc 1 Phase': 'osc1phase',
|
46 |
+
'Transpose': 'transpose',
|
47 |
+
'Free Ad Attack': 'freeadattack',
|
48 |
+
'Free Ad Decay': 'freeaddecay',
|
49 |
+
'Free Ad Amount': 'freeadamount',
|
50 |
+
'Free Ad Destination': 'freeaddestination',
|
51 |
+
'Lfo 1 Sync': 'lfo1sync',
|
52 |
+
'Lfo 1 Keytrigger': 'lfo1keytrigger',
|
53 |
+
'Lfo 2 Sync': 'lfo2sync',
|
54 |
+
'Lfo 2 Keytrigger': 'lfo2keytrigger',
|
55 |
+
'Portamento Amount': 'portamento',
|
56 |
+
'Portamento Mode': 'portamentomode',
|
57 |
+
'Voices': 'voices',
|
58 |
+
'Velocity Volume': 'velocityvolume',
|
59 |
+
'Velocity Contour': 'velocitycontour',
|
60 |
+
'Velocity Filter': 'velocitycutoff',
|
61 |
+
'Pitchwheel Cutoff': 'pitchwheelcutoff',
|
62 |
+
'Pitchwheel Pitch': 'pitchwheelpitch',
|
63 |
+
'Ringmodulation': 'ringmodulation',
|
64 |
+
'Chorus 1 Enable': 'chorus1enable',
|
65 |
+
'Chorus 2 Enable': 'chorus2enable',
|
66 |
+
'Reverb Wet': 'reverbwet',
|
67 |
+
'Reverb Decay': 'reverbdecay',
|
68 |
+
'Reverb Pre Delay': 'reverbpredelay',
|
69 |
+
'Reverb High Cut': 'reverbhighcut',
|
70 |
+
'Reverb Low Cut': 'reverblowcut',
|
71 |
+
'Osc Bitcrusher': 'oscbitcrusher',
|
72 |
+
'Master High Pass': 'highpass',
|
73 |
+
'Master Detune': 'detune',
|
74 |
+
'Vintage Noise': 'vintagenoise',
|
75 |
+
'Envelope Destination': 'envelopeeditordest1',
|
76 |
+
'Envelope Speed': 'envelopeeditorspeed',
|
77 |
+
'Envelope Amount': 'envelopeeditoramount',
|
78 |
+
'Envelope One Shot Mode': 'envelopeoneshot',
|
79 |
+
'Envelope Fix Tempo': 'envelopefixtempo',
|
80 |
+
'Filter Drive': 'filterdrive',
|
81 |
+
'Delay Wet': 'delaywet',
|
82 |
+
'Delay Time': 'delaytime',
|
83 |
+
'Delay Sync': 'delaysync',
|
84 |
+
'Delay x2 L': 'delayfactorl',
|
85 |
+
'Delay x2 R': 'delayfactorr',
|
86 |
+
'Delay High Shelf': 'delayhighshelf',
|
87 |
+
'Delay Low Shelf': 'delaylowshelf',
|
88 |
+
'Delay Feedback': 'delayfeedback',
|
89 |
+
}
|
90 |
+
|
91 |
+
daw_to_preset = {v: k for k, v in daw_to_preset_og.items()}
|
92 |
+
|
93 |
+
# Read CSV data from file
|
94 |
+
with open(path, 'r') as csv_file:
|
95 |
+
csv_reader = csv.DictReader(csv_file)
|
96 |
+
csv_data = list(csv_reader)
|
97 |
+
|
98 |
+
for entry in csv_data:
|
99 |
+
parameter_name = entry['name']
|
100 |
+
parameter_value_str = entry['value']
|
101 |
+
|
102 |
+
|
103 |
+
# Check if the name needs mapping
|
104 |
+
if parameter_name in daw_to_preset_og:
|
105 |
+
xml_key = daw_to_preset_og[parameter_name]
|
106 |
+
|
107 |
+
# Check if the value is numeric
|
108 |
+
try:
|
109 |
+
parameter_value = float(parameter_value_str)
|
110 |
+
except ValueError:
|
111 |
+
print(f"Skipping non-numeric value for parameter {parameter_name}: {parameter_value_str}")
|
112 |
+
continue
|
113 |
+
|
114 |
+
if xml_key in daw_to_preset:
|
115 |
+
# Update the corresponding value in the XML dictionary
|
116 |
+
daw_to_preset[xml_key] = parameter_value
|
117 |
+
|
118 |
+
print(daw_to_preset)
|
119 |
+
|
120 |
+
# Check for invalid float values and remove them from the dictionary
|
121 |
+
invalid_values = [key for key, value in daw_to_preset.items() if not isinstance(value, float)]
|
122 |
+
for key in invalid_values:
|
123 |
+
print(f"Removing attribute {key} from daw_to_preset due to invalid float value.")
|
124 |
+
daw_to_preset[key] = 0.0
|
125 |
+
# Print the updated XML dictionary
|
126 |
+
print(daw_to_preset)
|
127 |
+
|
128 |
+
# Generate XML
|
129 |
+
root = ET.Element('tal', curprogram="0", version="1.7", presetName="CH Chordionator III FN",
|
130 |
+
path="Factory Presets/CHORD/CH Chordionator III FN.noisemakerpreset")
|
131 |
+
programs = ET.SubElement(root, 'programs')
|
132 |
+
program = ET.SubElement(programs, 'program', programname="CH Chordionator III FN", unknown="0.5", volume="0.5")
|
133 |
+
# Add parameters to the XML inside the single <program> element
|
134 |
+
for param_name, param_value in daw_to_preset.items():
|
135 |
+
program.set(param_name, str(param_value))
|
136 |
+
|
137 |
+
|
138 |
+
ET.SubElement(root, 'midimap')
|
139 |
+
# Create an ElementTree object
|
140 |
+
|
141 |
+
tree = ET.ElementTree(root)
|
142 |
+
|
143 |
+
# Save the XML to a file
|
144 |
+
output_xml_path = output_path
|
145 |
+
tree.write(output_xml_path)
|
146 |
+
|
147 |
+
print(f"XML file written to {output_xml_path}")
|
148 |
+
|
149 |
+
return output_xml_path
|
back/models/importer_audio.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import librosa.core.audio
|
2 |
+
import numpy as np
|
3 |
+
from scipy.io import wavfile
|
4 |
+
|
5 |
+
|
6 |
+
def audio_importer(filename):
|
7 |
+
# X : (n_samples, *dim, n_channels)
|
8 |
+
"Generates data containing batch_size samples"
|
9 |
+
|
10 |
+
# Generate data
|
11 |
+
X = []
|
12 |
+
# Load soundfile data
|
13 |
+
data, r = librosa.core.audio.load(filename, sr=16384, mono=True, duration=1.0)
|
14 |
+
n_samps = data.shape[0]
|
15 |
+
print("N Samps in audio data: {}".format(n_samps))
|
16 |
+
X.append(data[: n_samps])
|
17 |
+
Xd = np.expand_dims(np.vstack(X), axis=1)
|
18 |
+
Xd = Xd.reshape((X.__len__(), 1, n_samps))
|
19 |
+
|
20 |
+
return Xd
|
21 |
+
|
22 |
+
if __name__ == "__main__":
|
23 |
+
audio_importer("C:/Users/yderre/Downloads/inver-synth-master/inver-synth-master/test_waves/InverSynth/InverSynth_00006.wav")
|
back/models/launch.py
ADDED
@@ -0,0 +1,518 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import datetime
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
from pickle import load
|
5 |
+
from typing import Callable, List
|
6 |
+
import librosa
|
7 |
+
import numpy as np
|
8 |
+
import pandas as pd
|
9 |
+
import tensorflow as tf
|
10 |
+
from tensorflow import keras
|
11 |
+
from keras import backend as K
|
12 |
+
from kapre.time_frequency import Spectrogram
|
13 |
+
from models.convert_to_preset import convert_csv_to_preset
|
14 |
+
from models.importer_audio import audio_importer
|
15 |
+
import dawdreamer as daw
|
16 |
+
from scipy.io import wavfile
|
17 |
+
import librosa
|
18 |
+
|
19 |
+
|
20 |
+
from generators.parameters import ParameterSet, ParamValue
|
21 |
+
|
22 |
+
|
23 |
+
weight_var = K.variable(0.0)
|
24 |
+
|
25 |
+
|
26 |
+
class Weight_trans(keras.callbacks.Callback):
|
27 |
+
def __init__(self, weight_var, transition, epochs):
|
28 |
+
self.alpha = weight_var
|
29 |
+
self.transition = transition
|
30 |
+
self.epochs = epochs
|
31 |
+
|
32 |
+
def on_epoch_end(self, epoch, logs={}):
|
33 |
+
if epoch > 680:
|
34 |
+
if self.transition == "linear":
|
35 |
+
K.set_value(self.alpha, ((epoch) / (self.epochs) - 0.617) * 0.00001)
|
36 |
+
tf.print(f"new weight {weight_var.numpy()}")
|
37 |
+
if self.transition == "linear2":
|
38 |
+
K.set_value(self.alpha, (1.5625 * epoch - 1.0625) * 0.00001)
|
39 |
+
tf.print(f"new weight {weight_var.numpy()}")
|
40 |
+
if self.transition == "log":
|
41 |
+
K.set_value(
|
42 |
+
self.alpha,
|
43 |
+
(
|
44 |
+
1
|
45 |
+
- (tf.math.log(epoch * 0.001 - 0.67285) / tf.math.log(0.0005))
|
46 |
+
- 0.35
|
47 |
+
)
|
48 |
+
* 0.00001,
|
49 |
+
)
|
50 |
+
tf.print("log")
|
51 |
+
if self.transition == "log2":
|
52 |
+
K.set_value(
|
53 |
+
self.alpha,
|
54 |
+
(
|
55 |
+
1
|
56 |
+
- (tf.math.log(epoch * 0.001 - 0.6575) / tf.math.log(0.0005))
|
57 |
+
- 0.5
|
58 |
+
)
|
59 |
+
* 0.00001,
|
60 |
+
)
|
61 |
+
tf.print("log")
|
62 |
+
if self.transition == "log3":
|
63 |
+
K.set_value(
|
64 |
+
self.alpha,
|
65 |
+
(
|
66 |
+
1
|
67 |
+
- (
|
68 |
+
tf.math.log(epoch * 0.001 - 0.67978)
|
69 |
+
/ tf.math.log(0.00000005)
|
70 |
+
)
|
71 |
+
- 0.5
|
72 |
+
)
|
73 |
+
* 0.00001,
|
74 |
+
)
|
75 |
+
tf.print("log")
|
76 |
+
if self.transition == "square":
|
77 |
+
K.set_value(self.alpha, 4.1 * tf.pow(epoch * 0.001 - 0.65, 2) + 0.002)
|
78 |
+
print("exp")
|
79 |
+
if self.transition == "quad":
|
80 |
+
K.set_value(self.alpha, 33 * tf.pow(epoch * 0.001 - 0.65, 4) + 0.002)
|
81 |
+
print("quad")
|
82 |
+
|
83 |
+
|
84 |
+
"""Model Utils"""
|
85 |
+
|
86 |
+
|
87 |
+
def mean_percentile_rank(y_true, y_pred, k=5):
|
88 |
+
"""
|
89 |
+
@paper
|
90 |
+
The first evaluation measure is the Mean Percentile Rank
|
91 |
+
(MPR) which is computed per synthesizer parameter.
|
92 |
+
"""
|
93 |
+
# TODO
|
94 |
+
|
95 |
+
|
96 |
+
def top_k_mean_accuracy(y_true, y_pred, k=5):
|
97 |
+
"""
|
98 |
+
@ paper
|
99 |
+
The top-k mean accuracy is obtained by computing the top-k
|
100 |
+
accuracy for each test example and then taking the mean across
|
101 |
+
all examples. In the same manner as done in the MPR analysis,
|
102 |
+
we compute the top-k mean accuracy per synthesizer
|
103 |
+
parameter for 𝑘 = 1, ... ,5.
|
104 |
+
"""
|
105 |
+
# TODO: per parameter?
|
106 |
+
original_shape = tf.shape(y_true)
|
107 |
+
y_true = tf.reshape(y_true, (-1, tf.shape(y_true)[-1]))
|
108 |
+
y_pred = tf.reshape(y_pred, (-1, tf.shape(y_pred)[-1]))
|
109 |
+
top_k = K.in_top_k(y_pred, tf.cast(tf.argmax(y_true, axis=-1), "int32"), k)
|
110 |
+
correct_pred = tf.reshape(top_k, original_shape[:-1])
|
111 |
+
return tf.reduce_mean(tf.cast(correct_pred, tf.float32))
|
112 |
+
|
113 |
+
|
114 |
+
@tf.function
|
115 |
+
def CustomLoss(y_true, y_pred):
|
116 |
+
bce = tf.keras.losses.BinaryCrossentropy()
|
117 |
+
weights = custom_spectral_loss(y_true, y_pred)
|
118 |
+
weight_shift = (1 - weight_var.numpy()) + (weight_var.numpy() * weights.numpy())
|
119 |
+
# tf.print(f"New weight is {weight_shift}")
|
120 |
+
loss = bce(y_true, y_pred, sample_weight=weight_shift)
|
121 |
+
return loss
|
122 |
+
|
123 |
+
|
124 |
+
@tf.function
|
125 |
+
def custom_spectral_loss(y_true, y_pred):
|
126 |
+
# tf.print("After compiling model :",tf.executing_eagerly())
|
127 |
+
|
128 |
+
y_true = tf.reshape(y_true, (-1, tf.shape(y_true)[-1]))
|
129 |
+
y_pred = tf.reshape(y_pred, (-1, tf.shape(y_pred)[-1]))
|
130 |
+
|
131 |
+
# Assuming y_true and y_pred contain parameters for audio synthesis
|
132 |
+
# Extract parameters from y_true and y_pred
|
133 |
+
with open("test_datasets/InverSynth_params.pckl", "rb") as f:
|
134 |
+
parameters: ParameterSet = load(f)
|
135 |
+
|
136 |
+
predlist_true: List[ParamValue] = parameters.decode(y_true[0])
|
137 |
+
|
138 |
+
predlist_pred: List[ParamValue] = parameters.decode(y_pred[0])
|
139 |
+
|
140 |
+
# Convert parameter lists to DataFrames
|
141 |
+
# Generate audio from parameters
|
142 |
+
audio_true, penalty = generate_audio(predlist_true)
|
143 |
+
audio_pred, penalty = generate_audio(predlist_pred)
|
144 |
+
|
145 |
+
# Compute spectrogram
|
146 |
+
if SPECTRO_TYPE == "spectro":
|
147 |
+
spectrogram_true = tf.math.abs(
|
148 |
+
tf.signal.stft(audio_true, frame_length=1024, frame_step=512)
|
149 |
+
)
|
150 |
+
spectrogram_pred = tf.math.abs(
|
151 |
+
tf.signal.stft(audio_pred, frame_length=1024, frame_step=512)
|
152 |
+
)
|
153 |
+
elif SPECTRO_TYPE == "qtrans":
|
154 |
+
spectrogram_true = librosa.amplitude_to_db(
|
155 |
+
librosa.cqt(audio_true, sr=SAMPLE_RATE, hop_length=128), ref=np.max
|
156 |
+
)
|
157 |
+
spectrogram_pred = librosa.amplitude_to_db(
|
158 |
+
librosa.cqt(audio_pred, sr=SAMPLE_RATE, hop_length=128), ref=np.max
|
159 |
+
)
|
160 |
+
elif SPECTRO_TYPE == "mel":
|
161 |
+
mel_spect = librosa.feature.melspectrogram(
|
162 |
+
audio_true, sr=SAMPLE_RATE, n_fft=2048, hop_length=1024
|
163 |
+
)
|
164 |
+
spectrogram_true = librosa.power_to_db(mel_spect, ref=np.max)
|
165 |
+
mel_spect = librosa.feature.melspectrogram(
|
166 |
+
audio_pred, sr=SAMPLE_RATE, n_fft=2048, hop_length=1024
|
167 |
+
)
|
168 |
+
spectrogram_pred = librosa.power_to_db(mel_spect, ref=np.max)
|
169 |
+
# L1 LOSS
|
170 |
+
if LOSS_TYPE == "L1":
|
171 |
+
spectral_loss = penalty * tf.reduce_mean(
|
172 |
+
tf.abs(spectrogram_true - spectrogram_pred)
|
173 |
+
)
|
174 |
+
# L2 LOSS
|
175 |
+
elif LOSS_TYPE == "L2":
|
176 |
+
spectral_loss = penalty * tf.reduce_mean(
|
177 |
+
(spectrogram_true - spectrogram_pred) ** 2
|
178 |
+
)
|
179 |
+
# COSINE LOSS
|
180 |
+
elif LOSS_TYPE == "COSINE":
|
181 |
+
spectral_loss = tf.losses.cosine_distance(
|
182 |
+
spectrogram_true, spectrogram_pred, weights=1.0, axis=-1
|
183 |
+
)
|
184 |
+
|
185 |
+
return spectral_loss
|
186 |
+
|
187 |
+
|
188 |
+
def compare(target, prediction, params, precision=1, print_output=False):
|
189 |
+
if print_output and len(prediction) < 10:
|
190 |
+
print(prediction)
|
191 |
+
print("Pred: {}".format(np.round(prediction, decimals=2)))
|
192 |
+
print("PRnd: {}".format(np.round(prediction)))
|
193 |
+
print("Act : {}".format(target))
|
194 |
+
print("+" * 5)
|
195 |
+
|
196 |
+
pred: List[ParamValue] = params.decode(prediction)
|
197 |
+
act: List[ParamValue] = params.decode(target)
|
198 |
+
pred_index: List[int] = [np.array(p.encoding).argmax() for p in pred]
|
199 |
+
act_index: List[int] = [np.array(p.encoding).argmax() for p in act]
|
200 |
+
width = 8
|
201 |
+
names = "Parameter: "
|
202 |
+
act_s = "Actual: "
|
203 |
+
pred_s = "Predicted: "
|
204 |
+
pred_i = "Pred. Indx:"
|
205 |
+
act_i = "Act. Index:"
|
206 |
+
diff_i = "Index Diff:"
|
207 |
+
for p in act:
|
208 |
+
names += p.name.rjust(width)[:width]
|
209 |
+
act_s += f"{p.value:>8.2f}"
|
210 |
+
for p in pred:
|
211 |
+
pred_s += f"{p.value:>8.2f}"
|
212 |
+
for p in pred_index:
|
213 |
+
pred_i += f"{p:>8}"
|
214 |
+
for p in act_index:
|
215 |
+
act_i += f"{p:>8}"
|
216 |
+
for i in range(len(act_index)):
|
217 |
+
diff = pred_index[i] - act_index[i]
|
218 |
+
diff_i += f"{diff:>8}"
|
219 |
+
exact = 0.0
|
220 |
+
close = 0.0
|
221 |
+
n_params = len(pred_index)
|
222 |
+
for i in range(n_params):
|
223 |
+
if pred_index[i] == act_index[i]:
|
224 |
+
exact = exact + 1.0
|
225 |
+
if abs(pred_index[i] - act_index[i]) <= precision:
|
226 |
+
close = close + 1.0
|
227 |
+
exact_ratio = exact / n_params
|
228 |
+
close_ratio = close / n_params
|
229 |
+
if print_output:
|
230 |
+
print(names)
|
231 |
+
print(act_s)
|
232 |
+
print(pred_s)
|
233 |
+
print(act_i)
|
234 |
+
print(pred_i)
|
235 |
+
print(diff_i)
|
236 |
+
print("-" * 30)
|
237 |
+
return exact_ratio, close_ratio
|
238 |
+
|
239 |
+
|
240 |
+
def evaluate(
|
241 |
+
prediction: np.ndarray,
|
242 |
+
x: np.ndarray,
|
243 |
+
y: np.ndarray,
|
244 |
+
params: ParameterSet,
|
245 |
+
):
|
246 |
+
print("Prediction Shape: {}".format(prediction.shape))
|
247 |
+
|
248 |
+
num: int = x.shape[0]
|
249 |
+
correct: int = 0
|
250 |
+
correct_r: float = 0.0
|
251 |
+
close_r: float = 0.0
|
252 |
+
for i in range(num):
|
253 |
+
should_print = i < 5
|
254 |
+
exact, close = compare(
|
255 |
+
target=y[i],
|
256 |
+
prediction=prediction[i],
|
257 |
+
params=params,
|
258 |
+
print_output=should_print,
|
259 |
+
)
|
260 |
+
if exact == 1.0:
|
261 |
+
correct = correct + 1
|
262 |
+
correct_r += exact
|
263 |
+
close_r += close
|
264 |
+
summary = params.explain()
|
265 |
+
print(
|
266 |
+
"{} Parameters with {} levels (fixed: {})".format(
|
267 |
+
summary["n_variable"], summary["levels"], summary["n_fixed"]
|
268 |
+
)
|
269 |
+
)
|
270 |
+
print(
|
271 |
+
"Got {} out of {} ({:.1f}% perfect); Exact params: {:.1f}%, Close params: {:.1f}%".format(
|
272 |
+
correct,
|
273 |
+
num,
|
274 |
+
correct / num * 100,
|
275 |
+
correct_r / num * 100,
|
276 |
+
close_r / num * 100,
|
277 |
+
)
|
278 |
+
)
|
279 |
+
|
280 |
+
|
281 |
+
"""
|
282 |
+
Wrap up the whole training process in a standard function. Gets a callback
|
283 |
+
to actually make the model, to keep it as flexible as possible.
|
284 |
+
# Params:
|
285 |
+
# - dataset_name (dataset name)
|
286 |
+
# - model_name: (C1..C6,e2e)
|
287 |
+
# - model_callback: function taking name,inputs,outputs,data_format and returning a Keras model
|
288 |
+
# - epochs: int
|
289 |
+
# - dataset_dir: place to find input data
|
290 |
+
# - output_dir: place to put outputs
|
291 |
+
# - parameters_file (override parameters filename)
|
292 |
+
# - dataset_file (override dataset filename)
|
293 |
+
# - data_format (channels_first or channels_last)
|
294 |
+
# - run_name: to save this run as
|
295 |
+
"""
|
296 |
+
# LOSS TYPE FOR CUSTOM LOSS FUNCTION
|
297 |
+
LOSS_TYPE = "L1"
|
298 |
+
SPECTRO_TYPE = "spectro"
|
299 |
+
PRINT = 1
|
300 |
+
|
301 |
+
# DAWDREAMER EXPORT SETTINGS
|
302 |
+
SAMPLE_RATE = 16384
|
303 |
+
BUFFER_SIZE = 1024
|
304 |
+
SYNTH_PLUGIN = "TAL-NoiseMaker.vst3"
|
305 |
+
|
306 |
+
ENGINE = daw.RenderEngine(SAMPLE_RATE, BUFFER_SIZE)
|
307 |
+
SYNTH = ENGINE.make_plugin_processor("my_synth", SYNTH_PLUGIN)
|
308 |
+
SYNTH.add_midi_note(40, 127, 0, 0.8)
|
309 |
+
|
310 |
+
with open("plugin_config/TAL-NoiseMaker-config.json") as f:
|
311 |
+
data = json.load(f)
|
312 |
+
|
313 |
+
dico = []
|
314 |
+
# Extract the key ID from the JSON data
|
315 |
+
key_id = data["parameters"]
|
316 |
+
for param in key_id:
|
317 |
+
dico.append(param["id"])
|
318 |
+
|
319 |
+
DICO = dico
|
320 |
+
|
321 |
+
|
322 |
+
def train_model(
|
323 |
+
# Main options
|
324 |
+
dataset_name: str,
|
325 |
+
model_name: str,
|
326 |
+
epochs: int,
|
327 |
+
model_callback: Callable[[str, int, int, str], keras.Model],
|
328 |
+
dataset_dir: str,
|
329 |
+
output_dir: str, # Directory names
|
330 |
+
dataset_file: str = None,
|
331 |
+
parameters_file: str = None,
|
332 |
+
run_name: str = None,
|
333 |
+
data_format: str = "channels_last",
|
334 |
+
save_best: bool = True,
|
335 |
+
resume: bool = False,
|
336 |
+
checkpoint: bool = True,
|
337 |
+
model_type: str = "STFT",
|
338 |
+
):
|
339 |
+
tf.config.run_functions_eagerly(True)
|
340 |
+
# tf.data.experimental.enable_debug_mode()
|
341 |
+
time_generated = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
342 |
+
if not dataset_file:
|
343 |
+
dataset_file = (
|
344 |
+
os.getcwd() + "/" + dataset_dir + "/" + dataset_name + "_data.hdf5"
|
345 |
+
)
|
346 |
+
if not parameters_file:
|
347 |
+
parameters_file = (
|
348 |
+
os.getcwd() + "/" + dataset_dir + "/" + dataset_name + "_params.pckl"
|
349 |
+
)
|
350 |
+
if not run_name:
|
351 |
+
run_name = dataset_name + "_" + model_name
|
352 |
+
|
353 |
+
model_file = f"{output_dir}/model/{run_name}_{time_generated}"
|
354 |
+
if not os.path.exists(model_file):
|
355 |
+
os.makedirs(model_file)
|
356 |
+
best_model_file = f"{output_dir}/best_checkpoint/{run_name}_best_{time_generated}"
|
357 |
+
if not os.path.exists(best_model_file):
|
358 |
+
os.makedirs(best_model_file)
|
359 |
+
if resume:
|
360 |
+
# checkpoint_model_file = f"{output_dir}/{run_name}_checkpoint_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}"
|
361 |
+
# history_file = f"{output_dir}/{run_name}_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}"
|
362 |
+
checkpoint_model_file = (
|
363 |
+
f"{output_dir}/checkpoints/InverSynth_C6XL_checkpoint_20240123-100644"
|
364 |
+
)
|
365 |
+
history_file = f"{output_dir}/history/InverSynth_C6XL_20240123-100644"
|
366 |
+
|
367 |
+
print(tf.config.list_physical_devices("GPU"))
|
368 |
+
gpu_avail = len(tf.config.list_physical_devices("GPU")) # True/False
|
369 |
+
cuda_gpu_avail = len(tf.config.list_physical_devices("GPU")) # True/False
|
370 |
+
|
371 |
+
print("+" * 30)
|
372 |
+
print(f"++ {run_name}")
|
373 |
+
print(
|
374 |
+
f"Running model: {model_name} on dataset {dataset_file} (parameters {parameters_file}) for {epochs} epochs"
|
375 |
+
)
|
376 |
+
print(f"Saving model in {output_dir} as {model_file}")
|
377 |
+
print(f"Saving history as {history_file}")
|
378 |
+
print(f"GPU: {gpu_avail}, with CUDA: {cuda_gpu_avail}")
|
379 |
+
print("+" * 30)
|
380 |
+
|
381 |
+
os.makedirs(output_dir, exist_ok=True)
|
382 |
+
|
383 |
+
# Get training and validation generators
|
384 |
+
params = {"data_file": dataset_file, "batch_size": 64, "shuffle": True}
|
385 |
+
|
386 |
+
model: keras.Model = None
|
387 |
+
if resume and os.path.exists(checkpoint_model_file):
|
388 |
+
history = pd.read_csv(history_file)
|
389 |
+
# Note - its zero indexed in the file, but 1 indexed in the display
|
390 |
+
initial_epoch: int = max(history.iloc[:, 0]) + 1
|
391 |
+
# epochs:int = initial_epoch
|
392 |
+
print(
|
393 |
+
f"Resuming from model file: {checkpoint_model_file} after epoch {initial_epoch}"
|
394 |
+
)
|
395 |
+
model = keras.models.load_model(
|
396 |
+
checkpoint_model_file,
|
397 |
+
custom_objects={
|
398 |
+
"top_k_mean_accuracy": top_k_mean_accuracy,
|
399 |
+
"Spectrogram": Spectrogram,
|
400 |
+
"custom_spectral_loss": custom_spectral_loss,
|
401 |
+
"CustomLoss": CustomLoss,
|
402 |
+
},
|
403 |
+
)
|
404 |
+
|
405 |
+
return model, parameters_file
|
406 |
+
|
407 |
+
|
408 |
+
def inferrence(model: keras.Model, parameters_file: str, file_path: str, file_id: str):
|
409 |
+
# Start infer
|
410 |
+
|
411 |
+
with open(parameters_file, "rb") as f:
|
412 |
+
parameters: ParameterSet = load(f)
|
413 |
+
|
414 |
+
print("++++" * 5)
|
415 |
+
print("Pushing to trained model")
|
416 |
+
print("++++" * 5)
|
417 |
+
|
418 |
+
Valid = False
|
419 |
+
while Valid == False:
|
420 |
+
namefile = file_path
|
421 |
+
if os.path.exists(namefile):
|
422 |
+
Valid = True
|
423 |
+
else:
|
424 |
+
raise("File Path invalid, try again ")
|
425 |
+
|
426 |
+
try:
|
427 |
+
newpred = model.predict(audio_importer(str(f"{namefile}")))
|
428 |
+
except:
|
429 |
+
raise "Crashed"
|
430 |
+
|
431 |
+
predlist: List[ParamValue] = parameters.decode(newpred[0])
|
432 |
+
df = pd.DataFrame(predlist)
|
433 |
+
|
434 |
+
print(df)
|
435 |
+
df = df.drop(["encoding"], axis=1)
|
436 |
+
# saving the dataframe
|
437 |
+
|
438 |
+
print("Outputting CSV config in " + str(f"temp/"))
|
439 |
+
|
440 |
+
csv_path = str(f"temp/{file_id}_config.csv")
|
441 |
+
|
442 |
+
xml_path_wow = (f"temp/{file_id}_config.noisemakerpreset")
|
443 |
+
df.to_csv(csv_path)
|
444 |
+
|
445 |
+
xml_path = convert_csv_to_preset(csv_path, xml_path_wow)
|
446 |
+
# export(prediction, X, y, parameters)
|
447 |
+
# Loop through the rows of the DataFrame
|
448 |
+
i = 0
|
449 |
+
for values in df["value"].values:
|
450 |
+
# Set parameters using DataFrame values
|
451 |
+
SYNTH.set_parameter(DICO[i], values)
|
452 |
+
# (MIDI note, velocity, start, duration)
|
453 |
+
i += 1
|
454 |
+
# Setting volume to 0.9
|
455 |
+
SYNTH.set_parameter(1, 0.9)
|
456 |
+
# Set up the processing graph
|
457 |
+
graph = [
|
458 |
+
# synth takes no inputs, so we give an empty list.
|
459 |
+
(SYNTH, []),
|
460 |
+
]
|
461 |
+
|
462 |
+
ENGINE.load_graph(graph)
|
463 |
+
ENGINE.render(1)
|
464 |
+
data = ENGINE.get_audio()
|
465 |
+
try:
|
466 |
+
data = librosa.to_mono(data).transpose()
|
467 |
+
except:
|
468 |
+
tf.print("ERROR" * 100)
|
469 |
+
df = df.fillna(0)
|
470 |
+
data = df.to_numpy()
|
471 |
+
data = librosa.to_mono(data).transpose()
|
472 |
+
tf.print("crashed, nan in generation")
|
473 |
+
synth_params = dict(SYNTH.get_patch())
|
474 |
+
print(synth_params)
|
475 |
+
|
476 |
+
df = pd.DataFrame(data)
|
477 |
+
|
478 |
+
# penalty=1000000
|
479 |
+
# df = pd.DataFrame(data)
|
480 |
+
# df = df.fillna(0)
|
481 |
+
# data = df.to_numpy()
|
482 |
+
|
483 |
+
output_file_path = str(f"temp/{file_id}_generated.wav")
|
484 |
+
|
485 |
+
wavfile.write(output_file_path, SAMPLE_RATE, data)
|
486 |
+
|
487 |
+
return file_path, xml_path, output_file_path
|
488 |
+
|
489 |
+
|
490 |
+
def generate_audio(df_params):
|
491 |
+
# Loop through the rows of the DataFrame
|
492 |
+
i = 0
|
493 |
+
penalty = 1
|
494 |
+
for param in df_params:
|
495 |
+
# Set parameters using DataFrame values
|
496 |
+
SYNTH.set_parameter(DICO[i], param.value)
|
497 |
+
# (MIDI note, velocity, start, duration)
|
498 |
+
i += 1
|
499 |
+
# Set up the processing graph
|
500 |
+
graph = [
|
501 |
+
# synth takes no inputs, so we give an empty list.
|
502 |
+
(SYNTH, []),
|
503 |
+
]
|
504 |
+
|
505 |
+
ENGINE.load_graph(graph)
|
506 |
+
ENGINE.render(1)
|
507 |
+
data = ENGINE.get_audio()
|
508 |
+
try:
|
509 |
+
data = librosa.to_mono(data).transpose()
|
510 |
+
except:
|
511 |
+
print("ERROR" * 100)
|
512 |
+
df = pd.DataFrame(data)
|
513 |
+
df = df.fillna(0)
|
514 |
+
data = df.to_numpy()
|
515 |
+
data = librosa.to_mono(data).transpose()
|
516 |
+
|
517 |
+
result = np.array(data)
|
518 |
+
return result, penalty
|
back/models/runner.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
|
3 |
+
from models.launch import train_model
|
4 |
+
from models.spectrogram_cnn import get_model as get_spectrogram
|
5 |
+
|
6 |
+
|
7 |
+
def standard_run_parser() -> argparse.ArgumentParser:
|
8 |
+
parser = argparse.ArgumentParser(
|
9 |
+
description="Setup and train a model, storing the output"
|
10 |
+
)
|
11 |
+
parser.add_argument(
|
12 |
+
"--model",
|
13 |
+
dest="model_name",
|
14 |
+
type=str,
|
15 |
+
choices=["C1", "C2", "C3", "C4", "C5", "C6", "C6XL", "e2e"],
|
16 |
+
default="e2e",
|
17 |
+
help="Model architecture to run",
|
18 |
+
)
|
19 |
+
parser.add_argument(
|
20 |
+
"--dataset_name",
|
21 |
+
default="InverSynth",
|
22 |
+
help='Name of the dataset to use - other filenames are generated from this. If you have a file "modelname_data.hdf5", put in "modelname"',
|
23 |
+
)
|
24 |
+
parser.add_argument(
|
25 |
+
"--epochs", type=int, default=100, help="How many epochs to run"
|
26 |
+
)
|
27 |
+
parser.add_argument(
|
28 |
+
"--dataset_dir",
|
29 |
+
default="test_datasets",
|
30 |
+
help="Directory full of datasets to use",
|
31 |
+
)
|
32 |
+
parser.add_argument(
|
33 |
+
"--output_dir",
|
34 |
+
default="output",
|
35 |
+
help="Directory to store the final model and history",
|
36 |
+
)
|
37 |
+
parser.add_argument(
|
38 |
+
"--dataset_file", default=None, help="Specify an exact dataset file to use"
|
39 |
+
)
|
40 |
+
parser.add_argument(
|
41 |
+
"--parameters_file",
|
42 |
+
default=None,
|
43 |
+
help="Specify an exact parameters file to use",
|
44 |
+
)
|
45 |
+
parser.add_argument(
|
46 |
+
"--data_format",
|
47 |
+
type=str,
|
48 |
+
choices=["channels_last", "channels_first"],
|
49 |
+
default="channels_last",
|
50 |
+
help="Image data format for Keras. If CPU only, has to be channels_last",
|
51 |
+
)
|
52 |
+
parser.add_argument(
|
53 |
+
"--run_name",
|
54 |
+
type=str,
|
55 |
+
dest="run_name",
|
56 |
+
help="Name to save the output under. Defaults to dataset_name + model",
|
57 |
+
)
|
58 |
+
parser.add_argument(
|
59 |
+
"--resume",
|
60 |
+
dest="resume",
|
61 |
+
action="store_const",
|
62 |
+
const=True,
|
63 |
+
default=False,
|
64 |
+
help="Look for a checkpoint file to resume from",
|
65 |
+
)
|
66 |
+
return parser
|
67 |
+
|
68 |
+
|
69 |
+
if __name__ == "__main__":
|
70 |
+
|
71 |
+
print("Starting model runner")
|
72 |
+
# Get a standard parser, and the arguments out of it
|
73 |
+
parser = standard_run_parser()
|
74 |
+
args = parser.parse_args()
|
75 |
+
setup = vars(args)
|
76 |
+
|
77 |
+
print("Parsed arguments")
|
78 |
+
# Figure out the model callback
|
79 |
+
model_callback = get_spectrogram
|
80 |
+
|
81 |
+
# Actually train the model
|
82 |
+
train_model(model_callback=model_callback, **setup)
|
back/models/spectrogram_cnn.py
ADDED
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
# import keras
|
4 |
+
from kapre.time_frequency import Spectrogram
|
5 |
+
from tensorflow import keras
|
6 |
+
|
7 |
+
from generators.generator import *
|
8 |
+
from models.common.architectures import layers_map
|
9 |
+
|
10 |
+
|
11 |
+
"""
|
12 |
+
The STFT spectrogram of the input signal is fed
|
13 |
+
into a 2D CNN that predicts the synthesizer parameter
|
14 |
+
configuration. This configuration is then used to produce
|
15 |
+
a sound that is similar to the input sound.
|
16 |
+
"""
|
17 |
+
|
18 |
+
|
19 |
+
"""Model Architecture"""
|
20 |
+
# @ paper:
|
21 |
+
# 1 2D Strided Convolution Layer C(38,13,26,13,26)
|
22 |
+
# where C(F,K1,K2,S1,S2) stands for a ReLU activated
|
23 |
+
# 2D strided convolutional layer with F filters in size of (K1,K2)
|
24 |
+
# and strides (S1,S2).
|
25 |
+
|
26 |
+
|
27 |
+
def assemble_model(
|
28 |
+
src: np.ndarray,
|
29 |
+
n_outputs: int,
|
30 |
+
arch_layers: list,
|
31 |
+
n_dft: int = 512, # Orig:128
|
32 |
+
n_hop: int = 256, # Orig:64
|
33 |
+
data_format: str = "channels_first",
|
34 |
+
) -> keras.Model:
|
35 |
+
|
36 |
+
inputs = keras.Input(shape=src.shape, name="stft")
|
37 |
+
|
38 |
+
# @paper: Spectrogram based CNN that receives the (log) spectrogram matrix as input
|
39 |
+
|
40 |
+
# @kapre:
|
41 |
+
# abs(Spectrogram) in a shape of 2D data, i.e.,
|
42 |
+
# `(None, n_channel, n_freq, n_time)` if `'channels_first'`,
|
43 |
+
# `(None, n_freq, n_time, n_channel)` if `'channels_last'`,
|
44 |
+
x = Spectrogram(
|
45 |
+
n_dft=n_dft,
|
46 |
+
n_hop=n_hop,
|
47 |
+
input_shape=src.shape,
|
48 |
+
trainable_kernel=True,
|
49 |
+
name="static_stft",
|
50 |
+
image_data_format=data_format,
|
51 |
+
return_decibel_spectrogram=True,
|
52 |
+
)(inputs)
|
53 |
+
|
54 |
+
# Swaps order to match the paper?
|
55 |
+
# TODO: dig in to this (GPU only?)
|
56 |
+
if data_format == "channels_first": # n_channel, n_freq, n_time)
|
57 |
+
x = keras.layers.Permute((1, 3, 2))(x)
|
58 |
+
else:
|
59 |
+
x = keras.layers.Permute((2, 1, 3))(x)
|
60 |
+
|
61 |
+
# x = keras.layers.Conv2D(64,(3,3),strides=(2,2),activation="relu",data_format="channels_last", padding='same')(x)
|
62 |
+
# x = keras.layers.Conv2D(128,(3,3),strides=(2,2),activation="relu",data_format="channels_last", padding='same')(x)
|
63 |
+
# x = keras.layers.Conv2D(128,(3,4),strides=(2,3),activation="relu",data_format="channels_last", padding='same')(x)
|
64 |
+
# x = keras.layers.Conv2D(128,(3,3),strides=(2,2),activation="relu",data_format="channels_last", padding='same')(x)
|
65 |
+
# x = keras.layers.Conv2D(256,(3,3),strides=(2,2),activation="relu",data_format="channels_last", padding='same')(x)
|
66 |
+
# x = keras.layers.Conv2D(256,(3,3),strides=(1,2),activation="relu",data_format="channels_last", padding='same')(x)
|
67 |
+
for arch_layer in arch_layers:
|
68 |
+
x = keras.layers.Conv2D(
|
69 |
+
arch_layer.filters,
|
70 |
+
arch_layer.window_size,
|
71 |
+
strides=arch_layer.strides,
|
72 |
+
activation=arch_layer.activation,
|
73 |
+
data_format=data_format,
|
74 |
+
padding='same'
|
75 |
+
)(x)
|
76 |
+
|
77 |
+
# Flatten down to a single dimension
|
78 |
+
x = keras.layers.Flatten()(x)
|
79 |
+
|
80 |
+
# @paper: sigmoid activations with binary cross entropy loss
|
81 |
+
# @paper: FC-512
|
82 |
+
x = keras.layers.Dense(512)(x)
|
83 |
+
|
84 |
+
# @paper: FC-368(sigmoid)
|
85 |
+
outputs = keras.layers.Dense(n_outputs, activation="sigmoid", name="predictions")(x)
|
86 |
+
|
87 |
+
return keras.Model(inputs=inputs, outputs=outputs)
|
88 |
+
|
89 |
+
|
90 |
+
"""
|
91 |
+
Standard callback to get a model ready to train
|
92 |
+
"""
|
93 |
+
|
94 |
+
|
95 |
+
def get_model(
|
96 |
+
model_name: str, inputs: int, outputs: int, data_format: str = "channels_last"
|
97 |
+
) -> keras.Model:
|
98 |
+
arch_layers = layers_map.get("C1")
|
99 |
+
if model_name in layers_map:
|
100 |
+
arch_layers = layers_map.get(model_name)
|
101 |
+
else:
|
102 |
+
print(
|
103 |
+
f"Warning: {model_name} is not compatible with the spectrogram model. C1 Architecture will be used instead."
|
104 |
+
)
|
105 |
+
return assemble_model(
|
106 |
+
np.zeros([1, inputs]),
|
107 |
+
n_outputs=outputs,
|
108 |
+
arch_layers=arch_layers,
|
109 |
+
data_format=data_format,
|
110 |
+
)
|
111 |
+
|
112 |
+
|
113 |
+
if __name__ == "__main__":
|
114 |
+
|
115 |
+
from models.launch import train_model, inferrence
|
116 |
+
from models.runner import standard_run_parser
|
117 |
+
|
118 |
+
# Get a standard parser, and the arguments out of it
|
119 |
+
parser = standard_run_parser()
|
120 |
+
args = parser.parse_args()
|
121 |
+
setup = vars(args)
|
122 |
+
print(setup)
|
123 |
+
# distinguish model type for reshaping
|
124 |
+
setup["model_type"] = "STFT"
|
125 |
+
# tf.config.run_functions_eagerly(True)
|
126 |
+
# Actually train the model
|
127 |
+
|
128 |
+
model, parameters_file = train_model(model_callback=get_model, **setup)
|
129 |
+
|
130 |
+
file_path, csv_path = inferrence(model, parameters_file)
|
131 |
+
|
132 |
+
print(file_path)
|
133 |
+
|
134 |
+
print(csv_path)
|
back/output.xml
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
<tal curprogram="0" version="1.7" presetName="CH Chordionator III FN" path="Factory Presets/CHORD/CH Chordionator III FN.noisemakerpreset"><programs><program programname="CH Chordionator III FN" unknown="0.5" volume="0" -="0" filtertype="0.63636364" cutoff="0.6666666666666666" resonance="0.9333333333333333" keyfollow="0.26666666666666666" filtercontour="0.06666666666666667" filterattack="0.13333333333333333" filterdecay="0.26666666666666666" filtersustain="0.3333333333333333" filterrelease="0.13333333333333333" ampattack="0.8" ampdecay="1.0" ampsustain="0.4666666666666667" amprelease="0.13333333333333333" osc1volume="0.26666666666666666" osc2volume="0.9333333333333333" osc3volume="0.3333333333333333" oscmastertune="0" osc1tune="0.6" osc2tune="0.8" osc1finetune="0.6666666666666666" osc2finetune="0.2" osc1waveform="0.0" osc2waveform="0.5" oscsync="1.0" lfo1waveform="0.6" lfo2waveform="1.0" lfo1rate="0.6666666666666666" lfo2rate="0.9333333333333333" lfo1amount="0.06666666666666667" lfo2amount="1.0" lfo1destination="0.2857142857142857" lfo2destination="0.14285714285714285" lfo1phase="0.8666666666666667" lfo2phase="0.9333333333333333" osc2fm="0.4" osc2phase="0.4666666666666667" osc1pw="0.7333333333333333" osc1phase="0.5333333333333333" transpose="0" freeadattack="0.3333333333333333" freeaddecay="0.26666666666666666" freeadamount="0.5333333333333333" freeaddestination="0.2" lfo1sync="1.0" lfo1keytrigger="0" lfo2sync="0.0" lfo2keytrigger="0" portamento="0" portamentomode="0" voices="0" velocityvolume="0" velocitycontour="0" velocitycutoff="0" pitchwheelcutoff="0" pitchwheelpitch="0" ringmodulation="0.4666666666666667" chorus1enable="0.0" chorus2enable="1.0" reverbwet="0.4" reverbdecay="0.8666666666666667" reverbpredelay="0.5333333333333333" reverbhighcut="0.4666666666666667" reverblowcut="0.4666666666666667" oscbitcrusher="0.2" highpass="0.06666666666666667" detune="0.13333333333333333" vintagenoise="0.9333333333333333" envelopeeditordest1="0" envelopeeditorspeed="0" envelopeeditoramount="0" envelopeoneshot="0" envelopefixtempo="0" filterdrive="0.0" delaywet="0.13333333333333333" delaytime="1.0" delaysync="1.0" delayfactorl="0.0" delayfactorr="0.0" delayhighshelf="0.4666666666666667" delaylowshelf="1.0" delayfeedback="0.5333333333333333" /></programs><midimap /></tal>
|
back/plugin_config/TAL-NoiseMaker-config.json
ADDED
@@ -0,0 +1,422 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"parameters": [
|
3 |
+
{
|
4 |
+
"id": 11,
|
5 |
+
"name": "Amp Attack",
|
6 |
+
"values": "-"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"id": 12,
|
10 |
+
"name": "Amp Decay",
|
11 |
+
"values": "-"
|
12 |
+
},
|
13 |
+
{
|
14 |
+
"id": 14,
|
15 |
+
"name": "Amp Release",
|
16 |
+
"values": "-"
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"id": 13,
|
20 |
+
"name": "Amp Sustain",
|
21 |
+
"values": "-"
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"id": 58,
|
25 |
+
"name": "Chorus 1 Enable",
|
26 |
+
"values": [0.0, 1.0]
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"id": 59,
|
30 |
+
"name": "Chorus 2 Enable",
|
31 |
+
"values": [0.0, 1.0]
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"id": 3,
|
35 |
+
"name": "Filter Cutoff",
|
36 |
+
"values": "-"
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"id": 81,
|
40 |
+
"name": "Delay x2 L",
|
41 |
+
"values": [0.0, 1.0]
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"id": 82,
|
45 |
+
"name": "Delay x2 R",
|
46 |
+
"values": [0.0, 1.0]
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"id": 85,
|
50 |
+
"name": "Delay Feedback",
|
51 |
+
"values": "-"
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"id": 83,
|
55 |
+
"name": "Delay High Shelf",
|
56 |
+
"values": "-"
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"id": 84,
|
60 |
+
"name": "Delay Low Shelf",
|
61 |
+
"values": "-"
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"id": 80,
|
65 |
+
"name": "Delay Sync",
|
66 |
+
"values": [0.0, 1.0]
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"id": 79,
|
70 |
+
"name": "Delay Time",
|
71 |
+
"values": "-"
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"id": 78,
|
75 |
+
"name": "Delay Wet",
|
76 |
+
"values": "-"
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"id": 67,
|
80 |
+
"name": "Master Detune",
|
81 |
+
"values": "-"
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"id": 7,
|
85 |
+
"name": "Filter Attack",
|
86 |
+
"values": "-"
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"id": 6,
|
90 |
+
"name": "Filter Contour",
|
91 |
+
"values": "-"
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"id": 8,
|
95 |
+
"name": "Filter Decay",
|
96 |
+
"values": "-"
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"id": 77,
|
100 |
+
"name": "Filter Drive",
|
101 |
+
"values": "-"
|
102 |
+
},
|
103 |
+
{
|
104 |
+
"id": 10,
|
105 |
+
"name": "Filter Release",
|
106 |
+
"values": "-"
|
107 |
+
},
|
108 |
+
{
|
109 |
+
"id": 9,
|
110 |
+
"name": "Filter Sustain",
|
111 |
+
"values": "-"
|
112 |
+
},
|
113 |
+
{
|
114 |
+
"id": 2,
|
115 |
+
"name": "Filter Type",
|
116 |
+
"values": [0.0, 0.09090909, 0.18181818, 0.27272727, 0.36363636, 0.45454545, 0.54545455, 0.63636364, 0.72727273, 0.81818182, 0.90909091, 1.0]
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"id": 43,
|
120 |
+
"name": "Free Ad Amount",
|
121 |
+
"values": "-"
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"id": 41,
|
125 |
+
"name": "Free Ad Attack",
|
126 |
+
"values": "-"
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"id": 42,
|
130 |
+
"name": "Free Ad Decay",
|
131 |
+
"values": "-"
|
132 |
+
},
|
133 |
+
{
|
134 |
+
"id": 44,
|
135 |
+
"name": "Free Ad Destination",
|
136 |
+
"values": [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]
|
137 |
+
},
|
138 |
+
{
|
139 |
+
"id": 66,
|
140 |
+
"name": "Master High Pass",
|
141 |
+
"values": "-"
|
142 |
+
},
|
143 |
+
{
|
144 |
+
"id": 5,
|
145 |
+
"name": "Filter Keyfollow",
|
146 |
+
"values": "-"
|
147 |
+
},
|
148 |
+
{
|
149 |
+
"id": 30,
|
150 |
+
"name": "Lfo 1 Amount",
|
151 |
+
"values": "-"
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"id": 32,
|
155 |
+
"name": "Lfo 1 Destination",
|
156 |
+
"values": [0.0, 0.14285714285714285, 0.2857142857142857, 0.42857142857142855, 0.5714285714285714, 0.7142857142857142, 0.8571428571428571, 1.0]
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"id": 34,
|
160 |
+
"name": "Lfo 1 Phase",
|
161 |
+
"values": "-"
|
162 |
+
},
|
163 |
+
{
|
164 |
+
"id": 28,
|
165 |
+
"name": "Lfo 1 Rate",
|
166 |
+
"values": "-"
|
167 |
+
},
|
168 |
+
{
|
169 |
+
"id": 45,
|
170 |
+
"name": "Lfo 1 Sync",
|
171 |
+
"values": [0.0, 1.0]
|
172 |
+
},
|
173 |
+
{
|
174 |
+
"id": 26,
|
175 |
+
"name": "Lfo 1 Waveform",
|
176 |
+
"values": [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]
|
177 |
+
},
|
178 |
+
{
|
179 |
+
"id": 31,
|
180 |
+
"name": "Lfo 2 Amount",
|
181 |
+
"values": "-"
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"id": 33,
|
185 |
+
"name": "Lfo 2 Destination",
|
186 |
+
"values": [0.0, 0.14285714285714285, 0.2857142857142857, 0.42857142857142855, 0.5714285714285714, 0.7142857142857142, 0.8571428571428571, 1.0]
|
187 |
+
},
|
188 |
+
{
|
189 |
+
"id": 35,
|
190 |
+
"name": "Lfo 2 Phase",
|
191 |
+
"values": "-"
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"id": 29,
|
195 |
+
"name": "Lfo 2 Rate",
|
196 |
+
"values": "-"
|
197 |
+
},
|
198 |
+
{
|
199 |
+
"id": 47,
|
200 |
+
"name": "Lfo 2 Sync",
|
201 |
+
"values": [0.0, 1.0]
|
202 |
+
},
|
203 |
+
{
|
204 |
+
"id": 27,
|
205 |
+
"name": "Lfo 2 Waveform",
|
206 |
+
"values": [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]
|
207 |
+
},
|
208 |
+
{
|
209 |
+
"id": 21,
|
210 |
+
"name": "Osc 1 Fine Tune",
|
211 |
+
"values": "-"
|
212 |
+
},
|
213 |
+
{
|
214 |
+
"id": 39,
|
215 |
+
"name": "Osc 1 Phase",
|
216 |
+
"values": "-"
|
217 |
+
},
|
218 |
+
{
|
219 |
+
"id": 38,
|
220 |
+
"name": "Osc 1 PW",
|
221 |
+
"values": "-"
|
222 |
+
},
|
223 |
+
{
|
224 |
+
"id": 19,
|
225 |
+
"name": "Osc 1 Tune",
|
226 |
+
"values": "-"
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"id": 15,
|
230 |
+
"name": "Osc 1 Volume",
|
231 |
+
"values": "-"
|
232 |
+
},
|
233 |
+
{
|
234 |
+
"id": 23,
|
235 |
+
"name": "Osc 1 Waveform",
|
236 |
+
"values":[0.0, 0.5, 1.0]
|
237 |
+
},
|
238 |
+
{
|
239 |
+
"id": 22,
|
240 |
+
"name": "Osc 2 Fine Tune",
|
241 |
+
"values": "-"
|
242 |
+
},
|
243 |
+
{
|
244 |
+
"id": 36,
|
245 |
+
"name": "Osc 2 FM",
|
246 |
+
"values": "-"
|
247 |
+
},
|
248 |
+
{
|
249 |
+
"id": 37,
|
250 |
+
"name": "Osc 2 Phase",
|
251 |
+
"values": "-"
|
252 |
+
},
|
253 |
+
{
|
254 |
+
"id": 20,
|
255 |
+
"name": "Osc 2 Tune",
|
256 |
+
"values": "-"
|
257 |
+
},
|
258 |
+
{
|
259 |
+
"id": 16,
|
260 |
+
"name": "Osc 2 Volume",
|
261 |
+
"values": "-"
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"id": 24,
|
265 |
+
"name": "Osc 2 Waveform",
|
266 |
+
"values":[0.0, 0.5, 1.0]
|
267 |
+
},
|
268 |
+
{
|
269 |
+
"id": 17,
|
270 |
+
"name": "Osc 3 Volume",
|
271 |
+
"values": "-"
|
272 |
+
},
|
273 |
+
{
|
274 |
+
"id": 65,
|
275 |
+
"name": "Osc Bitcrusher",
|
276 |
+
"values": "-"
|
277 |
+
},
|
278 |
+
{
|
279 |
+
"id": 25,
|
280 |
+
"name": "Osc Sync",
|
281 |
+
"values": [0.0, 1.0]
|
282 |
+
},
|
283 |
+
{
|
284 |
+
"id": 4,
|
285 |
+
"name": "Filter Resonance",
|
286 |
+
"values": "-"
|
287 |
+
},
|
288 |
+
{
|
289 |
+
"id": 61,
|
290 |
+
"name": "Reverb Decay",
|
291 |
+
"values": "-"
|
292 |
+
},
|
293 |
+
{
|
294 |
+
"id": 63,
|
295 |
+
"name": "Reverb High Cut",
|
296 |
+
"values": "-"
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"id": 64,
|
300 |
+
"name": "Reverb Low Cut",
|
301 |
+
"values": "-"
|
302 |
+
},
|
303 |
+
{
|
304 |
+
"id": 62,
|
305 |
+
"name": "Reverb Pre Delay",
|
306 |
+
"values": "-"
|
307 |
+
},
|
308 |
+
{
|
309 |
+
"id": 60,
|
310 |
+
"name": "Reverb Wet",
|
311 |
+
"values": "-"
|
312 |
+
},
|
313 |
+
{
|
314 |
+
"id": 57,
|
315 |
+
"name": "Ringmodulation",
|
316 |
+
"values": "-"
|
317 |
+
},
|
318 |
+
{
|
319 |
+
"id": 68,
|
320 |
+
"name": "Vintage Noise",
|
321 |
+
"values": "-"
|
322 |
+
}
|
323 |
+
],
|
324 |
+
|
325 |
+
"fixed_parameters": [
|
326 |
+
{
|
327 |
+
"id": 73,
|
328 |
+
"name": "Envelope Amount",
|
329 |
+
"value": 0.0
|
330 |
+
},
|
331 |
+
{
|
332 |
+
"id": 71,
|
333 |
+
"name": "Envelope Destination",
|
334 |
+
"value": 0.0
|
335 |
+
},
|
336 |
+
{
|
337 |
+
"id": 72,
|
338 |
+
"name": "Envelope Speed",
|
339 |
+
"value": 0.0
|
340 |
+
},
|
341 |
+
{
|
342 |
+
"id": 75,
|
343 |
+
"name": "Envelope Fix Tempo",
|
344 |
+
"value": 0.0
|
345 |
+
},
|
346 |
+
{
|
347 |
+
"id": 74,
|
348 |
+
"name": "Envelope One Shot Mode",
|
349 |
+
"value": 0.0
|
350 |
+
},
|
351 |
+
{
|
352 |
+
"id": 46,
|
353 |
+
"name": "Lfo 1 Keytrigger",
|
354 |
+
"value": 0.0
|
355 |
+
},
|
356 |
+
{
|
357 |
+
"id": 48,
|
358 |
+
"name": "Lfo 2 Keytrigger",
|
359 |
+
"value": 0.0
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"id": 18,
|
363 |
+
"name": "Osc Mastertune",
|
364 |
+
"value": 0.5
|
365 |
+
},
|
366 |
+
{
|
367 |
+
"id": 55,
|
368 |
+
"name": "Pitchwheel Cutoff",
|
369 |
+
"value": 0.0
|
370 |
+
},
|
371 |
+
{
|
372 |
+
"id": 56,
|
373 |
+
"name": "Pitchwheel Pitch",
|
374 |
+
"value": 0.0
|
375 |
+
},
|
376 |
+
{
|
377 |
+
"id": 49,
|
378 |
+
"name": "Portamento Amount",
|
379 |
+
"value": 0.0
|
380 |
+
},
|
381 |
+
{
|
382 |
+
"id": 50,
|
383 |
+
"name": "Portamento Mode",
|
384 |
+
"value": 0.0
|
385 |
+
},
|
386 |
+
{
|
387 |
+
"id": 40,
|
388 |
+
"name": "Transpose",
|
389 |
+
"value": 0.5
|
390 |
+
},
|
391 |
+
{
|
392 |
+
"id": 53,
|
393 |
+
"name": "Velocity Contour",
|
394 |
+
"value": 0.0
|
395 |
+
},
|
396 |
+
{
|
397 |
+
"id": 54,
|
398 |
+
"name": "Velocity Filter",
|
399 |
+
"value": 0.0
|
400 |
+
},
|
401 |
+
{
|
402 |
+
"id": 52,
|
403 |
+
"name": "Velocity Volume",
|
404 |
+
"value": 0.0
|
405 |
+
},
|
406 |
+
{
|
407 |
+
"id": 51,
|
408 |
+
"name": "Voices",
|
409 |
+
"value": 0.0
|
410 |
+
},
|
411 |
+
{
|
412 |
+
"id": 1,
|
413 |
+
"name": "Master Volume",
|
414 |
+
"value": 0.6
|
415 |
+
},
|
416 |
+
{
|
417 |
+
"id": 76,
|
418 |
+
"name": "Envelope Reset",
|
419 |
+
"value": 0.0
|
420 |
+
}
|
421 |
+
]
|
422 |
+
}
|
back/plugin_config/gen_config_libTAL-NoiseMaker.so.json
ADDED
@@ -0,0 +1,435 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"parameters": [
|
3 |
+
{
|
4 |
+
"id": 0,
|
5 |
+
"name": "-",
|
6 |
+
"value": 0.0
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"id": 1,
|
10 |
+
"name": "Master Volume",
|
11 |
+
"value": 0.0
|
12 |
+
},
|
13 |
+
{
|
14 |
+
"id": 2,
|
15 |
+
"name": "Filter Type",
|
16 |
+
"value": 0.0
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"id": 3,
|
20 |
+
"name": "Filter Cutoff",
|
21 |
+
"value": 0.0
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"id": 4,
|
25 |
+
"name": "Filter Resonance",
|
26 |
+
"value": 0.0
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"id": 5,
|
30 |
+
"name": "Filter Keyfollow",
|
31 |
+
"value": 0.0
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"id": 6,
|
35 |
+
"name": "Filter Contour",
|
36 |
+
"value": 0.0
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"id": 7,
|
40 |
+
"name": "Filter Attack",
|
41 |
+
"value": 0.0
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"id": 8,
|
45 |
+
"name": "Filter Decay",
|
46 |
+
"value": 0.0
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"id": 9,
|
50 |
+
"name": "Filter Sustain",
|
51 |
+
"value": 0.0
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"id": 10,
|
55 |
+
"name": "Filter Release",
|
56 |
+
"value": 0.0
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"id": 11,
|
60 |
+
"name": "Amp Attack",
|
61 |
+
"value": 0.0
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"id": 12,
|
65 |
+
"name": "Amp Decay",
|
66 |
+
"value": 0.0
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"id": 13,
|
70 |
+
"name": "Amp Sustain",
|
71 |
+
"value": 0.0
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"id": 14,
|
75 |
+
"name": "Amp Release",
|
76 |
+
"value": 0.0
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"id": 15,
|
80 |
+
"name": "Osc 1 Volume",
|
81 |
+
"value": 0.0
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"id": 16,
|
85 |
+
"name": "Osc 2 Volume",
|
86 |
+
"value": 0.0
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"id": 17,
|
90 |
+
"name": "Osc 3 Volume",
|
91 |
+
"value": 0.0
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"id": 18,
|
95 |
+
"name": "Osc Mastertune",
|
96 |
+
"value": 0.0
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"id": 19,
|
100 |
+
"name": "Osc 1 Tune",
|
101 |
+
"value": 0.0
|
102 |
+
},
|
103 |
+
{
|
104 |
+
"id": 20,
|
105 |
+
"name": "Osc 2 Tune",
|
106 |
+
"value": 0.0
|
107 |
+
},
|
108 |
+
{
|
109 |
+
"id": 21,
|
110 |
+
"name": "Osc 1 Fine Tune",
|
111 |
+
"value": 0.0
|
112 |
+
},
|
113 |
+
{
|
114 |
+
"id": 22,
|
115 |
+
"name": "Osc 2 Fine Tune",
|
116 |
+
"value": 0.0
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"id": 23,
|
120 |
+
"name": "Osc 1 Waveform",
|
121 |
+
"value": 0.0
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"id": 24,
|
125 |
+
"name": "Osc 2 Waveform",
|
126 |
+
"value": 0.0
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"id": 25,
|
130 |
+
"name": "Osc Sync",
|
131 |
+
"value": 0.0
|
132 |
+
},
|
133 |
+
{
|
134 |
+
"id": 26,
|
135 |
+
"name": "Lfo 1 Waveform",
|
136 |
+
"value": 0.0
|
137 |
+
},
|
138 |
+
{
|
139 |
+
"id": 27,
|
140 |
+
"name": "Lfo 2 Waveform",
|
141 |
+
"value": 0.0
|
142 |
+
},
|
143 |
+
{
|
144 |
+
"id": 28,
|
145 |
+
"name": "Lfo 1 Rate",
|
146 |
+
"value": 0.0
|
147 |
+
},
|
148 |
+
{
|
149 |
+
"id": 29,
|
150 |
+
"name": "Lfo 2 Rate",
|
151 |
+
"value": 0.0
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"id": 30,
|
155 |
+
"name": "Lfo 1 Amount",
|
156 |
+
"value": 0.0
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"id": 31,
|
160 |
+
"name": "Lfo 2 Amount",
|
161 |
+
"value": 0.0
|
162 |
+
},
|
163 |
+
{
|
164 |
+
"id": 32,
|
165 |
+
"name": "Lfo 1 Destination",
|
166 |
+
"value": 0.0
|
167 |
+
},
|
168 |
+
{
|
169 |
+
"id": 33,
|
170 |
+
"name": "Lfo 2 Destination",
|
171 |
+
"value": 0.0
|
172 |
+
},
|
173 |
+
{
|
174 |
+
"id": 34,
|
175 |
+
"name": "Lfo 1 Phase",
|
176 |
+
"value": 0.0
|
177 |
+
},
|
178 |
+
{
|
179 |
+
"id": 35,
|
180 |
+
"name": "Lfo 2 Phase",
|
181 |
+
"value": 0.0
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"id": 36,
|
185 |
+
"name": "Osc 2 FM",
|
186 |
+
"value": 0.0
|
187 |
+
},
|
188 |
+
{
|
189 |
+
"id": 37,
|
190 |
+
"name": "Osc 2 Phase",
|
191 |
+
"value": 0.0
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"id": 38,
|
195 |
+
"name": "Osc 1 PW",
|
196 |
+
"value": 0.0
|
197 |
+
},
|
198 |
+
{
|
199 |
+
"id": 39,
|
200 |
+
"name": "Osc 1 Phase",
|
201 |
+
"value": 0.0
|
202 |
+
},
|
203 |
+
{
|
204 |
+
"id": 40,
|
205 |
+
"name": "Transpose",
|
206 |
+
"value": 0.0
|
207 |
+
},
|
208 |
+
{
|
209 |
+
"id": 41,
|
210 |
+
"name": "Free Ad Attack",
|
211 |
+
"value": 0.0
|
212 |
+
},
|
213 |
+
{
|
214 |
+
"id": 42,
|
215 |
+
"name": "Free Ad Decay",
|
216 |
+
"value": 0.0
|
217 |
+
},
|
218 |
+
{
|
219 |
+
"id": 43,
|
220 |
+
"name": "Free Ad Amount",
|
221 |
+
"value": 0.0
|
222 |
+
},
|
223 |
+
{
|
224 |
+
"id": 44,
|
225 |
+
"name": "Free Ad Destination",
|
226 |
+
"value": 0.0
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"id": 45,
|
230 |
+
"name": "Lfo 1 Sync",
|
231 |
+
"value": 0.0
|
232 |
+
},
|
233 |
+
{
|
234 |
+
"id": 46,
|
235 |
+
"name": "Lfo 1 Keytrigger",
|
236 |
+
"value": 0.0
|
237 |
+
},
|
238 |
+
{
|
239 |
+
"id": 47,
|
240 |
+
"name": "Lfo 2 Sync",
|
241 |
+
"value": 0.0
|
242 |
+
},
|
243 |
+
{
|
244 |
+
"id": 48,
|
245 |
+
"name": "Lfo 2 Keytrigger",
|
246 |
+
"value": 0.0
|
247 |
+
},
|
248 |
+
{
|
249 |
+
"id": 49,
|
250 |
+
"name": "Portamento Amount",
|
251 |
+
"value": 0.0
|
252 |
+
},
|
253 |
+
{
|
254 |
+
"id": 50,
|
255 |
+
"name": "Portamento Mode",
|
256 |
+
"value": 0.0
|
257 |
+
},
|
258 |
+
{
|
259 |
+
"id": 51,
|
260 |
+
"name": "Voices",
|
261 |
+
"value": 0.0
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"id": 52,
|
265 |
+
"name": "Velocity Volume",
|
266 |
+
"value": 0.0
|
267 |
+
},
|
268 |
+
{
|
269 |
+
"id": 53,
|
270 |
+
"name": "Velocity Contour",
|
271 |
+
"value": 0.0
|
272 |
+
},
|
273 |
+
{
|
274 |
+
"id": 54,
|
275 |
+
"name": "Velocity Filter",
|
276 |
+
"value": 0.0
|
277 |
+
},
|
278 |
+
{
|
279 |
+
"id": 55,
|
280 |
+
"name": "Pitchwheel Cutoff",
|
281 |
+
"value": 0.0
|
282 |
+
},
|
283 |
+
{
|
284 |
+
"id": 56,
|
285 |
+
"name": "Pitchwheel Pitch",
|
286 |
+
"value": 0.0
|
287 |
+
},
|
288 |
+
{
|
289 |
+
"id": 57,
|
290 |
+
"name": "Ringmodulation",
|
291 |
+
"value": 0.0
|
292 |
+
},
|
293 |
+
{
|
294 |
+
"id": 58,
|
295 |
+
"name": "Chorus 1 Enable",
|
296 |
+
"value": 0.0
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"id": 59,
|
300 |
+
"name": "Chorus 2 Enable",
|
301 |
+
"value": 0.0
|
302 |
+
},
|
303 |
+
{
|
304 |
+
"id": 60,
|
305 |
+
"name": "Reverb Wet",
|
306 |
+
"value": 0.0
|
307 |
+
},
|
308 |
+
{
|
309 |
+
"id": 61,
|
310 |
+
"name": "Reverb Decay",
|
311 |
+
"value": 0.0
|
312 |
+
},
|
313 |
+
{
|
314 |
+
"id": 62,
|
315 |
+
"name": "Reverb Pre Delay",
|
316 |
+
"value": 0.0
|
317 |
+
},
|
318 |
+
{
|
319 |
+
"id": 63,
|
320 |
+
"name": "Reverb High Cut",
|
321 |
+
"value": 0.0
|
322 |
+
},
|
323 |
+
{
|
324 |
+
"id": 64,
|
325 |
+
"name": "Reverb Low Cut",
|
326 |
+
"value": 0.0
|
327 |
+
},
|
328 |
+
{
|
329 |
+
"id": 65,
|
330 |
+
"name": "Osc Bitcrusher",
|
331 |
+
"value": 0.0
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"id": 66,
|
335 |
+
"name": "Master High Pass",
|
336 |
+
"value": 0.0
|
337 |
+
},
|
338 |
+
{
|
339 |
+
"id": 67,
|
340 |
+
"name": "Master Detune",
|
341 |
+
"value": 0.0
|
342 |
+
},
|
343 |
+
{
|
344 |
+
"id": 68,
|
345 |
+
"name": "Vintage Noise",
|
346 |
+
"value": 0.0
|
347 |
+
},
|
348 |
+
{
|
349 |
+
"id": 69,
|
350 |
+
"name": "Panic",
|
351 |
+
"value": 0.0
|
352 |
+
},
|
353 |
+
{
|
354 |
+
"id": 70,
|
355 |
+
"name": "MIDI LEARN",
|
356 |
+
"value": 0.0
|
357 |
+
},
|
358 |
+
{
|
359 |
+
"id": 71,
|
360 |
+
"name": "Envelope Destination",
|
361 |
+
"value": 0.0
|
362 |
+
},
|
363 |
+
{
|
364 |
+
"id": 72,
|
365 |
+
"name": "Envelope Speed",
|
366 |
+
"value": 0.0
|
367 |
+
},
|
368 |
+
{
|
369 |
+
"id": 73,
|
370 |
+
"name": "Envelope Amount",
|
371 |
+
"value": 0.0
|
372 |
+
},
|
373 |
+
{
|
374 |
+
"id": 74,
|
375 |
+
"name": "Envelope One Shot Mode",
|
376 |
+
"value": 0.0
|
377 |
+
},
|
378 |
+
{
|
379 |
+
"id": 75,
|
380 |
+
"name": "Envelope Fix Tempo",
|
381 |
+
"value": 0.0
|
382 |
+
},
|
383 |
+
{
|
384 |
+
"id": 76,
|
385 |
+
"name": "Envelope Reset",
|
386 |
+
"value": 0.0
|
387 |
+
},
|
388 |
+
{
|
389 |
+
"id": 77,
|
390 |
+
"name": "Filter Drive",
|
391 |
+
"value": 0.0
|
392 |
+
},
|
393 |
+
{
|
394 |
+
"id": 78,
|
395 |
+
"name": "Delay Wet",
|
396 |
+
"value": 0.0
|
397 |
+
},
|
398 |
+
{
|
399 |
+
"id": 79,
|
400 |
+
"name": "Delay Time",
|
401 |
+
"value": 0.0
|
402 |
+
},
|
403 |
+
{
|
404 |
+
"id": 80,
|
405 |
+
"name": "Delay Sync",
|
406 |
+
"value": 0.0
|
407 |
+
},
|
408 |
+
{
|
409 |
+
"id": 81,
|
410 |
+
"name": "Delay x2 L",
|
411 |
+
"value": 0.0
|
412 |
+
},
|
413 |
+
{
|
414 |
+
"id": 82,
|
415 |
+
"name": "Delay x2 R",
|
416 |
+
"value": 0.0
|
417 |
+
},
|
418 |
+
{
|
419 |
+
"id": 83,
|
420 |
+
"name": "Delay High Shelf",
|
421 |
+
"value": 0.0
|
422 |
+
},
|
423 |
+
{
|
424 |
+
"id": 84,
|
425 |
+
"name": "Delay Low Shelf",
|
426 |
+
"value": 0.0
|
427 |
+
},
|
428 |
+
{
|
429 |
+
"id": 85,
|
430 |
+
"name": "Delay Feedback",
|
431 |
+
"value": 0.0
|
432 |
+
}
|
433 |
+
],
|
434 |
+
"fixed_parameters": []
|
435 |
+
}
|
back/requirements.txt
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wheel
|
2 |
+
fastapi
|
3 |
+
uvicorn[standard]
|
4 |
+
python-multipart
|
5 |
+
python-dotenv
|
6 |
+
aiofiles
|
7 |
+
torch
|
8 |
+
torchaudio
|
9 |
+
torchmetrics
|
10 |
+
torchvision
|
11 |
+
dataclasses
|
12 |
+
dawdreamer
|
13 |
+
matplotlib
|
14 |
+
pandas
|
15 |
+
samplerate
|
16 |
+
tensorboard
|
17 |
+
tensorflow-estimator
|
18 |
+
tensorflow
|
19 |
+
scikit-learn
|
20 |
+
scipy
|
21 |
+
numpy
|
22 |
+
numba
|
23 |
+
kapre==0.1.7
|
24 |
+
keras-applications
|
25 |
+
keras-preprocessing
|
26 |
+
keras
|
27 |
+
librosa
|
28 |
+
h5py
|
back/utils/export_to_excel.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
file = pd.read_csv(f"output/InverSynth_C6XL_20231201-103344") #Read the dataframe
|
4 |
+
file.to_excel(f'foo.xlsx', index=False) #Save the dataframe
|
back/utils/import csv.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import csv
|
2 |
+
|
3 |
+
# Open the CSV file
|
4 |
+
with open('InverSynth_00006.wav.csv', 'r') as csvfile:
|
5 |
+
reader = csv.reader(csvfile)
|
6 |
+
|
7 |
+
# Skip the header row
|
8 |
+
next(reader)
|
9 |
+
|
10 |
+
# Loop through the rows
|
11 |
+
for row in reader:
|
12 |
+
# Get the floating-point number from the third column
|
13 |
+
value = float(row[2])
|
14 |
+
|
15 |
+
# Do something with the value
|
16 |
+
print(f"The value is {value}")
|
back/utils/import json.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
|
3 |
+
# Load the JSON data from a file
|
4 |
+
with open('plugin_config/TAL-NoiseMaker-config.json') as f:
|
5 |
+
data = json.load(f)
|
6 |
+
|
7 |
+
dico=[]
|
8 |
+
# Extract the key ID from the JSON data
|
9 |
+
key_id = data['parameters']
|
10 |
+
for param in key_id:
|
11 |
+
dico.append(param['id'])
|
12 |
+
|
13 |
+
print(dico)
|
back/utils/synth.py
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# coding: utf-8
|
3 |
+
|
4 |
+
import contextlib
|
5 |
+
import glob
|
6 |
+
import io
|
7 |
+
import json
|
8 |
+
import logging
|
9 |
+
import multiprocessing
|
10 |
+
import os
|
11 |
+
import random
|
12 |
+
import time
|
13 |
+
import traceback
|
14 |
+
import sys
|
15 |
+
|
16 |
+
from pydub import AudioSegment, silence
|
17 |
+
from anyio import Path
|
18 |
+
import dawdreamer as daw
|
19 |
+
import numpy as np
|
20 |
+
from scipy.io import wavfile
|
21 |
+
from pydub import AudioSegment
|
22 |
+
from pydub.silence import split_on_silence
|
23 |
+
import tqdm
|
24 |
+
import csv
|
25 |
+
|
26 |
+
|
27 |
+
SAMPLE_RATE = 44100
|
28 |
+
# Parameters will undergo automation at this buffer/block size.
|
29 |
+
BUFFER_SIZE = 128
|
30 |
+
PPQN = 960 # Pulses per quarter note.
|
31 |
+
|
32 |
+
SYNTH_PLUGIN = "libTAL-NoiseMaker.so"
|
33 |
+
# SYNTH_PLUGIN = "C:/Program Files/Common Files/VST3/Surge Synth Team/Surge XT.vst3/Contents/x86_64-win/Surge XT.vst3"
|
34 |
+
|
35 |
+
|
36 |
+
def make_sine(freq: float, duration: float, sr=SAMPLE_RATE):
|
37 |
+
"""Return sine wave based on freq in Hz and duration in seconds"""
|
38 |
+
N = int(duration * sr) # Number of samples
|
39 |
+
return np.sin(np.pi*2.*freq*np.arange(N)/sr)
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
def gen():
|
44 |
+
# print(f'Current gen: {self.current_num}')
|
45 |
+
engine = daw.RenderEngine(SAMPLE_RATE, BUFFER_SIZE)
|
46 |
+
output_dir = Path("yay")
|
47 |
+
|
48 |
+
# Make a processor and give it the unique name "my_synth", which we use later.
|
49 |
+
synth = engine.make_plugin_processor("my_synth", SYNTH_PLUGIN)
|
50 |
+
assert synth.get_name() == "my_synth"
|
51 |
+
|
52 |
+
#Outputs the properties names and dumps them in a json
|
53 |
+
|
54 |
+
params = {}
|
55 |
+
for param in range(1,86):
|
56 |
+
params[param] = (synth.get_parameter_name(param))
|
57 |
+
with open(str(f'params_.json'), 'w') as f:
|
58 |
+
json.dump(params, f)
|
59 |
+
|
60 |
+
|
61 |
+
#synth.load_preset("C:/Users/yderre/AppData/Roaming/ToguAudioLine/TAL-NoiseMaker/presets/Factory Presets/DRUM/DR 8bit Kick II FN.noisemakerpreset")
|
62 |
+
|
63 |
+
# Get the parameters description from the plugin
|
64 |
+
parameters = synth.get_parameters_description()
|
65 |
+
|
66 |
+
array = []
|
67 |
+
# Create a dictionary with parameter names as keys and their indices as values
|
68 |
+
synth.add_midi_note(40, 127, 0, 0.2)
|
69 |
+
|
70 |
+
# for i in range(0,120):
|
71 |
+
# print(f"{parameters[i]['name']}")
|
72 |
+
# return
|
73 |
+
|
74 |
+
with open('InverSynth_01998.wav.csv', 'r') as csvfile:
|
75 |
+
reader = csv.reader(csvfile)
|
76 |
+
|
77 |
+
# Skip the header row
|
78 |
+
next(reader)
|
79 |
+
i=0
|
80 |
+
# Loop through the rows
|
81 |
+
for row in reader:
|
82 |
+
|
83 |
+
# Get the floating-point number from the third column
|
84 |
+
value = float(row[2])
|
85 |
+
params = {}
|
86 |
+
# (MIDI note, velocity, start, duration)
|
87 |
+
print(f"{parameters[i]['name']} changed from {parameters[i]['defaultValue']} to {value} ")
|
88 |
+
synth.set_parameter(i, value)
|
89 |
+
i+=1
|
90 |
+
# don't do reverb
|
91 |
+
graph = [
|
92 |
+
# synth takes no inputs, so we give an empty list.
|
93 |
+
(synth, []),
|
94 |
+
]
|
95 |
+
|
96 |
+
|
97 |
+
|
98 |
+
engine.load_graph(graph)
|
99 |
+
engine.render(1)
|
100 |
+
output = engine.get_audio()
|
101 |
+
wavfile.write(
|
102 |
+
str(output_dir/f'test_.wav'), SAMPLE_RATE, output.transpose())
|
103 |
+
synth.open_editor() # Open the editor, make changes, and clos
|
104 |
+
|
105 |
+
|
106 |
+
|
107 |
+
|
108 |
+
if __name__ == "__main__":
|
109 |
+
gen()
|