onlybetheone commited on
Commit
a3241bc
·
verified ·
1 Parent(s): 535925d
music_tokenizer/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "resblock": "1",
3
+ "num_gpus": 8,
4
+ "batch_size": 140,
5
+ "learning_rate": 0.00002,
6
+ "adam_b1": 0.5,
7
+ "adam_b2": 0.9,
8
+ "lr_decay": 0.98,
9
+ "seed": 1234,
10
+
11
+ "upsample_rates": [8,5,4,2],
12
+ "upsample_kernel_sizes": [16,11,8,4],
13
+ "upsample_initial_channel": 512,
14
+ "resblock_kernel_sizes": [3,5,7,9,11,13],
15
+ "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5], [1,3,5], [1,3,5], [1,3,5]],
16
+
17
+ "segment_size": 48000,
18
+ "num_mels": 80,
19
+ "num_freq": 1024,
20
+ "n_fft": 1024,
21
+ "hop_size": 240,
22
+ "win_size": 1024,
23
+
24
+ "sampling_rate": 48000,
25
+
26
+ "n_code_groups": 2,
27
+ "n_codes": 1024,
28
+ "codebook_loss_lambda": 1.0,
29
+ "commitment_loss_lambda": 0.25,
30
+
31
+ "fmin": 0,
32
+ "fmax": 48000,
33
+ "fmax_for_loss": null,
34
+
35
+ "num_workers": 24,
36
+
37
+ "dist_config": {
38
+ "dist_backend": "nccl",
39
+ "dist_url": "tcp://localhost:54321",
40
+ "world_size": 1
41
+ }
42
+ }
music_tokenizer/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba71efdc50378baf9776d607eb11566907c3810e6f221c316719c02591135626
3
+ size 537087507
wavtokenizer/config.yaml ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pytorch_lightning==1.8.6
2
+ seed_everything: 3407
3
+ trainer:
4
+ logger:
5
+ class_path: pytorch_lightning.loggers.TensorBoardLogger
6
+ init_args:
7
+ save_dir: ./result/
8
+ name: lightning_logs
9
+ version: null
10
+ log_graph: false
11
+ default_hp_metric: true
12
+ prefix: ''
13
+ sub_dir: null
14
+ logdir: null
15
+ comment: ''
16
+ purge_step: null
17
+ max_queue: 10
18
+ flush_secs: 120
19
+ filename_suffix: ''
20
+ write_to_disk: true
21
+ comet_config:
22
+ disabled: true
23
+ enable_checkpointing: true
24
+ callbacks:
25
+ - class_path: pytorch_lightning.callbacks.LearningRateMonitor
26
+ init_args:
27
+ logging_interval: null
28
+ log_momentum: false
29
+ - class_path: pytorch_lightning.callbacks.ModelSummary
30
+ init_args:
31
+ max_depth: 2
32
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
33
+ init_args:
34
+ dirpath: null
35
+ filename: wavtokenizer_checkpoint_{epoch}_{step}_{val_loss:.4f}
36
+ monitor: val_loss
37
+ verbose: false
38
+ save_last: true
39
+ save_top_k: 10
40
+ save_weights_only: false
41
+ mode: min
42
+ auto_insert_metric_name: true
43
+ every_n_train_steps: 1000
44
+ train_time_interval: null
45
+ every_n_epochs: null
46
+ save_on_train_epoch_end: null
47
+ - class_path: inspiremusic.wavtokenizer.decoder.helpers.GradNormCallback
48
+ default_root_dir: null
49
+ gradient_clip_val: null
50
+ gradient_clip_algorithm: null
51
+ num_nodes: 1
52
+ num_processes: null
53
+ devices: -1
54
+ gpus: null
55
+ auto_select_gpus: false
56
+ tpu_cores: null
57
+ ipus: null
58
+ enable_progress_bar: true
59
+ overfit_batches: 0.0
60
+ track_grad_norm: -1
61
+ check_val_every_n_epoch: 1
62
+ fast_dev_run: false
63
+ accumulate_grad_batches: null
64
+ max_epochs: null
65
+ min_epochs: null
66
+ max_steps: 20000000
67
+ min_steps: null
68
+ max_time: null
69
+ limit_train_batches: null
70
+ limit_val_batches: 100
71
+ limit_test_batches: null
72
+ limit_predict_batches: null
73
+ val_check_interval: null
74
+ log_every_n_steps: 1000
75
+ accelerator: gpu
76
+ strategy: ddp
77
+ sync_batchnorm: false
78
+ precision: 32
79
+ enable_model_summary: true
80
+ num_sanity_val_steps: 2
81
+ resume_from_checkpoint: null
82
+ profiler: null
83
+ benchmark: null
84
+ deterministic: null
85
+ reload_dataloaders_every_n_epochs: 0
86
+ auto_lr_find: false
87
+ replace_sampler_ddp: true
88
+ detect_anomaly: false
89
+ auto_scale_batch_size: false
90
+ plugins: null
91
+ amp_backend: native
92
+ amp_level: null
93
+ move_metrics_to_cpu: false
94
+ multiple_trainloader_mode: max_size_cycle
95
+ inference_mode: true
96
+ ckpt_path: null
97
+ data:
98
+ class_path: inspiremusic.wavtokenizer.decoder.dataset.VocosDataModule
99
+ init_args:
100
+ train_params:
101
+ filelist_path: train.scp
102
+ sampling_rate: 24000
103
+ num_samples: 72000
104
+ batch_size: 38
105
+ num_workers: 8
106
+ val_params:
107
+ filelist_path: test.scp
108
+ sampling_rate: 24000
109
+ num_samples: 72000
110
+ batch_size: 10
111
+ num_workers: 8
112
+ model:
113
+ class_path: inspiremusic.wavtokenizer.decoder.experiment.WavTokenizer
114
+ init_args:
115
+ feature_extractor:
116
+ class_path: inspiremusic.wavtokenizer.decoder.feature_extractors.EncodecFeatures
117
+ init_args:
118
+ encodec_model: encodec_24khz
119
+ bandwidths:
120
+ - 6.6
121
+ - 6.6
122
+ - 6.6
123
+ - 6.6
124
+ train_codebooks: true
125
+ num_quantizers: 1
126
+ dowmsamples:
127
+ - 8
128
+ - 5
129
+ - 4
130
+ - 2
131
+ vq_bins: 4096
132
+ vq_kmeans: 200
133
+ backbone:
134
+ class_path: inspiremusic.wavtokenizer.decoder.models.VocosBackbone
135
+ init_args:
136
+ input_channels: 512
137
+ dim: 768
138
+ intermediate_dim: 2304
139
+ num_layers: 12
140
+ layer_scale_init_value: null
141
+ adanorm_num_embeddings: 4
142
+ head:
143
+ class_path: inspiremusic.wavtokenizer.decoder.heads.ISTFTHead
144
+ init_args:
145
+ dim: 768
146
+ n_fft: 1280
147
+ hop_length: 320
148
+ padding: same
149
+ resume_config: config.yaml
150
+ resume_model: last.ckpt
151
+ sample_rate: 24000
152
+ initial_learning_rate: 0.0001
153
+ num_warmup_steps: 0
154
+ mel_loss_coeff: 45.0
155
+ mrd_loss_coeff: 1.0
156
+ pretrain_mel_steps: 0
157
+ decay_mel_coeff: false
158
+ evaluate_utmos: false
159
+ evaluate_pesq: true
160
+ evaluate_periodicty: true
161
+ resume: true
162
+
163
+
164
+
wavtokenizer/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65dc00edbd293c0b4de81045648688207e5e69f1c32025beaaba0eb273fa851c
3
+ size 1754883448