Update config.json
Browse files- config.json +2 -2
config.json
CHANGED
@@ -22,7 +22,7 @@
|
|
22 |
"mm_use_im_start_end": false,
|
23 |
"mm_vision_select_feature": "patch",
|
24 |
"mm_vision_select_layer": -1,
|
25 |
-
"music_encoder": "./
|
26 |
"mm_vision_tower": "./pretrained/oryx_vit.pth",
|
27 |
"mm_vision_tower_lr": null,
|
28 |
"modality_max_length": "None",
|
@@ -34,7 +34,7 @@
|
|
34 |
"rope_scaling": null,
|
35 |
"rope_theta": 1000000.0,
|
36 |
"sliding_window": null,
|
37 |
-
"speech_encoder": "./
|
38 |
"speech_encoder_ds_rate": 10,
|
39 |
"speech_encoder_hidden_size": 2048,
|
40 |
"speech_encoder_type": "dual",
|
|
|
22 |
"mm_use_im_start_end": false,
|
23 |
"mm_vision_select_feature": "patch",
|
24 |
"mm_vision_select_layer": -1,
|
25 |
+
"music_encoder": "./BEATs_iter3_plus_AS2M_finetuned_on_AS2M_cpt2.pt",
|
26 |
"mm_vision_tower": "./pretrained/oryx_vit.pth",
|
27 |
"mm_vision_tower_lr": null,
|
28 |
"modality_max_length": "None",
|
|
|
34 |
"rope_scaling": null,
|
35 |
"rope_theta": 1000000.0,
|
36 |
"sliding_window": null,
|
37 |
+
"speech_encoder": "./large-v3.pt",
|
38 |
"speech_encoder_ds_rate": 10,
|
39 |
"speech_encoder_hidden_size": 2048,
|
40 |
"speech_encoder_type": "dual",
|