Uploading trained model
Browse files- added_tokens.json +1 -0
- checkpoint-550/config.json +117 -0
- checkpoint-550/optimizer.pt +3 -0
- checkpoint-550/preprocessor_config.json +10 -0
- checkpoint-550/pytorch_model.bin +3 -0
- checkpoint-550/rng_state.pth +3 -0
- checkpoint-550/scaler.pt +3 -0
- checkpoint-550/scheduler.pt +3 -0
- checkpoint-550/trainer_state.json +472 -0
- checkpoint-550/training_args.bin +3 -0
- checkpoint-99/config.json +117 -0
- checkpoint-99/optimizer.pt +3 -0
- checkpoint-99/preprocessor_config.json +10 -0
- checkpoint-99/pytorch_model.bin +3 -0
- checkpoint-99/rng_state.pth +3 -0
- checkpoint-99/scaler.pt +3 -0
- checkpoint-99/scheduler.pt +3 -0
- checkpoint-99/trainer_state.json +97 -0
- checkpoint-99/training_args.bin +3 -0
- config.json +117 -0
- preprocessor_config.json +11 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +1 -0
- tokenizer_config.json +1 -0
- training_args.bin +3 -0
- vocab.json +1 -0
added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<s>": 43, "</s>": 44}
|
checkpoint-550/config.json
ADDED
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "slplab/wav2vec2-xls-r-300m_phone-mfa_korean",
|
3 |
+
"activation_dropout": 0.0,
|
4 |
+
"adapter_kernel_size": 3,
|
5 |
+
"adapter_stride": 2,
|
6 |
+
"add_adapter": false,
|
7 |
+
"apply_spec_augment": true,
|
8 |
+
"architectures": [
|
9 |
+
"Wav2Vec2ForSpeechClassification"
|
10 |
+
],
|
11 |
+
"attention_dropout": 0.1,
|
12 |
+
"bos_token_id": 1,
|
13 |
+
"classifier_proj_size": 256,
|
14 |
+
"codevector_dim": 768,
|
15 |
+
"contrastive_logits_temperature": 0.1,
|
16 |
+
"conv_bias": true,
|
17 |
+
"conv_dim": [
|
18 |
+
512,
|
19 |
+
512,
|
20 |
+
512,
|
21 |
+
512,
|
22 |
+
512,
|
23 |
+
512,
|
24 |
+
512
|
25 |
+
],
|
26 |
+
"conv_kernel": [
|
27 |
+
10,
|
28 |
+
3,
|
29 |
+
3,
|
30 |
+
3,
|
31 |
+
3,
|
32 |
+
2,
|
33 |
+
2
|
34 |
+
],
|
35 |
+
"conv_stride": [
|
36 |
+
5,
|
37 |
+
2,
|
38 |
+
2,
|
39 |
+
2,
|
40 |
+
2,
|
41 |
+
2,
|
42 |
+
2
|
43 |
+
],
|
44 |
+
"ctc_loss_reduction": "mean",
|
45 |
+
"ctc_zero_infinity": false,
|
46 |
+
"diversity_loss_weight": 0.1,
|
47 |
+
"do_stable_layer_norm": true,
|
48 |
+
"eos_token_id": 2,
|
49 |
+
"feat_extract_activation": "gelu",
|
50 |
+
"feat_extract_dropout": 0.0,
|
51 |
+
"feat_extract_norm": "layer",
|
52 |
+
"feat_proj_dropout": 0.1,
|
53 |
+
"feat_quantizer_dropout": 0.0,
|
54 |
+
"final_dropout": 0.0,
|
55 |
+
"finetuning_task": "wav2vec2_clf",
|
56 |
+
"gradient_checkpointing": false,
|
57 |
+
"hidden_act": "gelu",
|
58 |
+
"hidden_dropout": 0.1,
|
59 |
+
"hidden_size": 1024,
|
60 |
+
"id2label": {
|
61 |
+
"0": "LABEL_0"
|
62 |
+
},
|
63 |
+
"initializer_range": 0.02,
|
64 |
+
"intermediate_size": 4096,
|
65 |
+
"label2id": {
|
66 |
+
"LABEL_0": 0
|
67 |
+
},
|
68 |
+
"layer_norm_eps": 1e-05,
|
69 |
+
"layerdrop": 0.1,
|
70 |
+
"mask_feature_length": 10,
|
71 |
+
"mask_feature_min_masks": 0,
|
72 |
+
"mask_feature_prob": 0.0,
|
73 |
+
"mask_time_length": 10,
|
74 |
+
"mask_time_min_masks": 2,
|
75 |
+
"mask_time_prob": 0.075,
|
76 |
+
"model_type": "wav2vec2",
|
77 |
+
"num_adapter_layers": 3,
|
78 |
+
"num_attention_heads": 16,
|
79 |
+
"num_codevector_groups": 2,
|
80 |
+
"num_codevectors_per_group": 320,
|
81 |
+
"num_conv_pos_embedding_groups": 16,
|
82 |
+
"num_conv_pos_embeddings": 128,
|
83 |
+
"num_feat_extract_layers": 7,
|
84 |
+
"num_hidden_layers": 24,
|
85 |
+
"num_negatives": 100,
|
86 |
+
"output_hidden_size": 1024,
|
87 |
+
"pad_token_id": 42,
|
88 |
+
"pooling_mode": "mean",
|
89 |
+
"problem_type": "regression",
|
90 |
+
"proj_codevector_dim": 768,
|
91 |
+
"tdnn_dilation": [
|
92 |
+
1,
|
93 |
+
2,
|
94 |
+
3,
|
95 |
+
1,
|
96 |
+
1
|
97 |
+
],
|
98 |
+
"tdnn_dim": [
|
99 |
+
512,
|
100 |
+
512,
|
101 |
+
512,
|
102 |
+
512,
|
103 |
+
1500
|
104 |
+
],
|
105 |
+
"tdnn_kernel": [
|
106 |
+
5,
|
107 |
+
3,
|
108 |
+
3,
|
109 |
+
1,
|
110 |
+
1
|
111 |
+
],
|
112 |
+
"torch_dtype": "float32",
|
113 |
+
"transformers_version": "4.19.3",
|
114 |
+
"use_weighted_layer_sum": false,
|
115 |
+
"vocab_size": 45,
|
116 |
+
"xvector_output_dim": 512
|
117 |
+
}
|
checkpoint-550/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b06cc682f07e4dcf4ee4801f5cdb587ec58468fd89b8aa459aab642ffa305bc
|
3 |
+
size 2498465161
|
checkpoint-550/preprocessor_config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cache_dir": "/data2/excalibur12/.cache/huggingface/datasets",
|
3 |
+
"do_normalize": true,
|
4 |
+
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
|
5 |
+
"feature_size": 1,
|
6 |
+
"padding_side": "right",
|
7 |
+
"padding_value": 0.0,
|
8 |
+
"return_attention_mask": true,
|
9 |
+
"sampling_rate": 16000
|
10 |
+
}
|
checkpoint-550/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc61e7cd85802dd3a58764549781397be6d31714c1a41b59e9327535093e2194
|
3 |
+
size 1266101869
|
checkpoint-550/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46015840c7dc0498fdae4bd8dce131fc8fdb7beb4db6d28e7f511ed2c19d088f
|
3 |
+
size 14567
|
checkpoint-550/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c33378e25f610e335ddba9ca3bc1885deef22bd7de80474020b185e11026b99
|
3 |
+
size 559
|
checkpoint-550/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37e8d607b82b4eb73fe403c69aa9e37348793e30ec147c856a28d72e5b4d7d9b
|
3 |
+
size 623
|
checkpoint-550/trainer_state.json
ADDED
@@ -0,0 +1,472 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.30745795369148254,
|
3 |
+
"best_model_checkpoint": "asd_pronunciation_w2v_xlsr-reg/checkpoint-99",
|
4 |
+
"epoch": 49.977777777777774,
|
5 |
+
"global_step": 550,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.98,
|
12 |
+
"eval_loss": 0.8269791603088379,
|
13 |
+
"eval_mse": 0.8274638056755066,
|
14 |
+
"eval_runtime": 67.3404,
|
15 |
+
"eval_samples_per_second": 31.497,
|
16 |
+
"eval_steps_per_second": 0.995,
|
17 |
+
"step": 11
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"epoch": 1.98,
|
21 |
+
"eval_loss": 0.4631172716617584,
|
22 |
+
"eval_mse": 0.46300947666168213,
|
23 |
+
"eval_runtime": 67.2163,
|
24 |
+
"eval_samples_per_second": 31.555,
|
25 |
+
"eval_steps_per_second": 0.997,
|
26 |
+
"step": 22
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"epoch": 2.98,
|
30 |
+
"eval_loss": 0.351409912109375,
|
31 |
+
"eval_mse": 0.3515050709247589,
|
32 |
+
"eval_runtime": 67.5563,
|
33 |
+
"eval_samples_per_second": 31.396,
|
34 |
+
"eval_steps_per_second": 0.992,
|
35 |
+
"step": 33
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"epoch": 3.98,
|
39 |
+
"eval_loss": 0.3585782051086426,
|
40 |
+
"eval_mse": 0.35852691531181335,
|
41 |
+
"eval_runtime": 69.7976,
|
42 |
+
"eval_samples_per_second": 30.388,
|
43 |
+
"eval_steps_per_second": 0.96,
|
44 |
+
"step": 44
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 4.98,
|
48 |
+
"eval_loss": 0.4412296712398529,
|
49 |
+
"eval_mse": 0.4412075877189636,
|
50 |
+
"eval_runtime": 79.1138,
|
51 |
+
"eval_samples_per_second": 26.809,
|
52 |
+
"eval_steps_per_second": 0.847,
|
53 |
+
"step": 55
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"epoch": 5.98,
|
57 |
+
"eval_loss": 0.430486798286438,
|
58 |
+
"eval_mse": 0.43049314618110657,
|
59 |
+
"eval_runtime": 72.5547,
|
60 |
+
"eval_samples_per_second": 29.233,
|
61 |
+
"eval_steps_per_second": 0.923,
|
62 |
+
"step": 66
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"epoch": 6.98,
|
66 |
+
"eval_loss": 0.42916765809059143,
|
67 |
+
"eval_mse": 0.4291659891605377,
|
68 |
+
"eval_runtime": 95.5528,
|
69 |
+
"eval_samples_per_second": 22.197,
|
70 |
+
"eval_steps_per_second": 0.701,
|
71 |
+
"step": 77
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"epoch": 7.98,
|
75 |
+
"eval_loss": 0.4343053996562958,
|
76 |
+
"eval_mse": 0.43428245186805725,
|
77 |
+
"eval_runtime": 66.5007,
|
78 |
+
"eval_samples_per_second": 31.894,
|
79 |
+
"eval_steps_per_second": 1.008,
|
80 |
+
"step": 88
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"epoch": 8.98,
|
84 |
+
"eval_loss": 0.30745795369148254,
|
85 |
+
"eval_mse": 0.3074318766593933,
|
86 |
+
"eval_runtime": 66.2359,
|
87 |
+
"eval_samples_per_second": 32.022,
|
88 |
+
"eval_steps_per_second": 1.012,
|
89 |
+
"step": 99
|
90 |
+
},
|
91 |
+
{
|
92 |
+
"epoch": 9.98,
|
93 |
+
"eval_loss": 0.42939332127571106,
|
94 |
+
"eval_mse": 0.4293690323829651,
|
95 |
+
"eval_runtime": 66.3865,
|
96 |
+
"eval_samples_per_second": 31.949,
|
97 |
+
"eval_steps_per_second": 1.009,
|
98 |
+
"step": 110
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"epoch": 10.98,
|
102 |
+
"eval_loss": 0.3727492094039917,
|
103 |
+
"eval_mse": 0.3727482855319977,
|
104 |
+
"eval_runtime": 66.8857,
|
105 |
+
"eval_samples_per_second": 31.711,
|
106 |
+
"eval_steps_per_second": 1.002,
|
107 |
+
"step": 121
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 11.98,
|
111 |
+
"eval_loss": 0.42278197407722473,
|
112 |
+
"eval_mse": 0.42277276515960693,
|
113 |
+
"eval_runtime": 65.5197,
|
114 |
+
"eval_samples_per_second": 32.372,
|
115 |
+
"eval_steps_per_second": 1.023,
|
116 |
+
"step": 132
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"epoch": 12.98,
|
120 |
+
"eval_loss": 0.4014572203159332,
|
121 |
+
"eval_mse": 0.40141549706459045,
|
122 |
+
"eval_runtime": 65.6627,
|
123 |
+
"eval_samples_per_second": 32.301,
|
124 |
+
"eval_steps_per_second": 1.02,
|
125 |
+
"step": 143
|
126 |
+
},
|
127 |
+
{
|
128 |
+
"epoch": 13.98,
|
129 |
+
"eval_loss": 0.37818050384521484,
|
130 |
+
"eval_mse": 0.37818947434425354,
|
131 |
+
"eval_runtime": 66.0031,
|
132 |
+
"eval_samples_per_second": 32.135,
|
133 |
+
"eval_steps_per_second": 1.015,
|
134 |
+
"step": 154
|
135 |
+
},
|
136 |
+
{
|
137 |
+
"epoch": 14.98,
|
138 |
+
"eval_loss": 0.4358225166797638,
|
139 |
+
"eval_mse": 0.43578916788101196,
|
140 |
+
"eval_runtime": 65.7571,
|
141 |
+
"eval_samples_per_second": 32.255,
|
142 |
+
"eval_steps_per_second": 1.019,
|
143 |
+
"step": 165
|
144 |
+
},
|
145 |
+
{
|
146 |
+
"epoch": 15.98,
|
147 |
+
"eval_loss": 0.36039263010025024,
|
148 |
+
"eval_mse": 0.3603877127170563,
|
149 |
+
"eval_runtime": 67.6323,
|
150 |
+
"eval_samples_per_second": 31.361,
|
151 |
+
"eval_steps_per_second": 0.991,
|
152 |
+
"step": 176
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"epoch": 16.98,
|
156 |
+
"eval_loss": 0.40446653962135315,
|
157 |
+
"eval_mse": 0.40447959303855896,
|
158 |
+
"eval_runtime": 66.1021,
|
159 |
+
"eval_samples_per_second": 32.087,
|
160 |
+
"eval_steps_per_second": 1.014,
|
161 |
+
"step": 187
|
162 |
+
},
|
163 |
+
{
|
164 |
+
"epoch": 17.98,
|
165 |
+
"eval_loss": 0.3638509213924408,
|
166 |
+
"eval_mse": 0.3638397455215454,
|
167 |
+
"eval_runtime": 66.1765,
|
168 |
+
"eval_samples_per_second": 32.051,
|
169 |
+
"eval_steps_per_second": 1.012,
|
170 |
+
"step": 198
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 18.98,
|
174 |
+
"eval_loss": 0.4001483619213104,
|
175 |
+
"eval_mse": 0.400104284286499,
|
176 |
+
"eval_runtime": 66.5235,
|
177 |
+
"eval_samples_per_second": 31.883,
|
178 |
+
"eval_steps_per_second": 1.007,
|
179 |
+
"step": 209
|
180 |
+
},
|
181 |
+
{
|
182 |
+
"epoch": 19.98,
|
183 |
+
"eval_loss": 0.3836788535118103,
|
184 |
+
"eval_mse": 0.38362112641334534,
|
185 |
+
"eval_runtime": 65.5397,
|
186 |
+
"eval_samples_per_second": 32.362,
|
187 |
+
"eval_steps_per_second": 1.022,
|
188 |
+
"step": 220
|
189 |
+
},
|
190 |
+
{
|
191 |
+
"epoch": 20.98,
|
192 |
+
"eval_loss": 0.3813478350639343,
|
193 |
+
"eval_mse": 0.3813202679157257,
|
194 |
+
"eval_runtime": 67.0847,
|
195 |
+
"eval_samples_per_second": 31.617,
|
196 |
+
"eval_steps_per_second": 0.999,
|
197 |
+
"step": 231
|
198 |
+
},
|
199 |
+
{
|
200 |
+
"epoch": 21.98,
|
201 |
+
"eval_loss": 0.3911483883857727,
|
202 |
+
"eval_mse": 0.39113306999206543,
|
203 |
+
"eval_runtime": 66.4329,
|
204 |
+
"eval_samples_per_second": 31.927,
|
205 |
+
"eval_steps_per_second": 1.009,
|
206 |
+
"step": 242
|
207 |
+
},
|
208 |
+
{
|
209 |
+
"epoch": 22.98,
|
210 |
+
"eval_loss": 0.43994417786598206,
|
211 |
+
"eval_mse": 0.4399244487285614,
|
212 |
+
"eval_runtime": 66.1358,
|
213 |
+
"eval_samples_per_second": 32.07,
|
214 |
+
"eval_steps_per_second": 1.013,
|
215 |
+
"step": 253
|
216 |
+
},
|
217 |
+
{
|
218 |
+
"epoch": 23.98,
|
219 |
+
"eval_loss": 0.4228975772857666,
|
220 |
+
"eval_mse": 0.42288002371788025,
|
221 |
+
"eval_runtime": 66.6907,
|
222 |
+
"eval_samples_per_second": 31.804,
|
223 |
+
"eval_steps_per_second": 1.005,
|
224 |
+
"step": 264
|
225 |
+
},
|
226 |
+
{
|
227 |
+
"epoch": 24.98,
|
228 |
+
"eval_loss": 0.4192221462726593,
|
229 |
+
"eval_mse": 0.4191807508468628,
|
230 |
+
"eval_runtime": 65.8241,
|
231 |
+
"eval_samples_per_second": 32.222,
|
232 |
+
"eval_steps_per_second": 1.018,
|
233 |
+
"step": 275
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 25.98,
|
237 |
+
"eval_loss": 0.40975797176361084,
|
238 |
+
"eval_mse": 0.4097472131252289,
|
239 |
+
"eval_runtime": 67.5766,
|
240 |
+
"eval_samples_per_second": 31.387,
|
241 |
+
"eval_steps_per_second": 0.991,
|
242 |
+
"step": 286
|
243 |
+
},
|
244 |
+
{
|
245 |
+
"epoch": 26.98,
|
246 |
+
"eval_loss": 0.3760901093482971,
|
247 |
+
"eval_mse": 0.37607377767562866,
|
248 |
+
"eval_runtime": 65.9157,
|
249 |
+
"eval_samples_per_second": 32.177,
|
250 |
+
"eval_steps_per_second": 1.016,
|
251 |
+
"step": 297
|
252 |
+
},
|
253 |
+
{
|
254 |
+
"epoch": 27.98,
|
255 |
+
"eval_loss": 0.389096200466156,
|
256 |
+
"eval_mse": 0.3890584111213684,
|
257 |
+
"eval_runtime": 65.956,
|
258 |
+
"eval_samples_per_second": 32.158,
|
259 |
+
"eval_steps_per_second": 1.016,
|
260 |
+
"step": 308
|
261 |
+
},
|
262 |
+
{
|
263 |
+
"epoch": 28.98,
|
264 |
+
"eval_loss": 0.4243176579475403,
|
265 |
+
"eval_mse": 0.4242975115776062,
|
266 |
+
"eval_runtime": 66.4794,
|
267 |
+
"eval_samples_per_second": 31.905,
|
268 |
+
"eval_steps_per_second": 1.008,
|
269 |
+
"step": 319
|
270 |
+
},
|
271 |
+
{
|
272 |
+
"epoch": 29.98,
|
273 |
+
"eval_loss": 0.4235914349555969,
|
274 |
+
"eval_mse": 0.4235744774341583,
|
275 |
+
"eval_runtime": 66.2277,
|
276 |
+
"eval_samples_per_second": 32.026,
|
277 |
+
"eval_steps_per_second": 1.012,
|
278 |
+
"step": 330
|
279 |
+
},
|
280 |
+
{
|
281 |
+
"epoch": 30.98,
|
282 |
+
"eval_loss": 0.4235081076622009,
|
283 |
+
"eval_mse": 0.42347782850265503,
|
284 |
+
"eval_runtime": 66.6381,
|
285 |
+
"eval_samples_per_second": 31.829,
|
286 |
+
"eval_steps_per_second": 1.005,
|
287 |
+
"step": 341
|
288 |
+
},
|
289 |
+
{
|
290 |
+
"epoch": 31.98,
|
291 |
+
"eval_loss": 0.4236636757850647,
|
292 |
+
"eval_mse": 0.4236546456813812,
|
293 |
+
"eval_runtime": 65.6032,
|
294 |
+
"eval_samples_per_second": 32.331,
|
295 |
+
"eval_steps_per_second": 1.021,
|
296 |
+
"step": 352
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 32.98,
|
300 |
+
"eval_loss": 0.4269878566265106,
|
301 |
+
"eval_mse": 0.4269687533378601,
|
302 |
+
"eval_runtime": 65.9665,
|
303 |
+
"eval_samples_per_second": 32.153,
|
304 |
+
"eval_steps_per_second": 1.016,
|
305 |
+
"step": 363
|
306 |
+
},
|
307 |
+
{
|
308 |
+
"epoch": 33.98,
|
309 |
+
"eval_loss": 0.39142194390296936,
|
310 |
+
"eval_mse": 0.3913804888725281,
|
311 |
+
"eval_runtime": 66.6217,
|
312 |
+
"eval_samples_per_second": 31.836,
|
313 |
+
"eval_steps_per_second": 1.006,
|
314 |
+
"step": 374
|
315 |
+
},
|
316 |
+
{
|
317 |
+
"epoch": 34.98,
|
318 |
+
"eval_loss": 0.3899790942668915,
|
319 |
+
"eval_mse": 0.38995301723480225,
|
320 |
+
"eval_runtime": 65.771,
|
321 |
+
"eval_samples_per_second": 32.248,
|
322 |
+
"eval_steps_per_second": 1.019,
|
323 |
+
"step": 385
|
324 |
+
},
|
325 |
+
{
|
326 |
+
"epoch": 35.98,
|
327 |
+
"eval_loss": 0.4031297564506531,
|
328 |
+
"eval_mse": 0.4031302034854889,
|
329 |
+
"eval_runtime": 68.033,
|
330 |
+
"eval_samples_per_second": 31.176,
|
331 |
+
"eval_steps_per_second": 0.985,
|
332 |
+
"step": 396
|
333 |
+
},
|
334 |
+
{
|
335 |
+
"epoch": 36.98,
|
336 |
+
"eval_loss": 0.373826265335083,
|
337 |
+
"eval_mse": 0.3738201856613159,
|
338 |
+
"eval_runtime": 66.1644,
|
339 |
+
"eval_samples_per_second": 32.057,
|
340 |
+
"eval_steps_per_second": 1.013,
|
341 |
+
"step": 407
|
342 |
+
},
|
343 |
+
{
|
344 |
+
"epoch": 37.98,
|
345 |
+
"eval_loss": 0.37409740686416626,
|
346 |
+
"eval_mse": 0.37407544255256653,
|
347 |
+
"eval_runtime": 65.7007,
|
348 |
+
"eval_samples_per_second": 32.283,
|
349 |
+
"eval_steps_per_second": 1.02,
|
350 |
+
"step": 418
|
351 |
+
},
|
352 |
+
{
|
353 |
+
"epoch": 38.98,
|
354 |
+
"eval_loss": 0.41098639369010925,
|
355 |
+
"eval_mse": 0.4109634459018707,
|
356 |
+
"eval_runtime": 65.8334,
|
357 |
+
"eval_samples_per_second": 32.218,
|
358 |
+
"eval_steps_per_second": 1.018,
|
359 |
+
"step": 429
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 39.98,
|
363 |
+
"eval_loss": 0.38580140471458435,
|
364 |
+
"eval_mse": 0.3857785165309906,
|
365 |
+
"eval_runtime": 65.912,
|
366 |
+
"eval_samples_per_second": 32.179,
|
367 |
+
"eval_steps_per_second": 1.017,
|
368 |
+
"step": 440
|
369 |
+
},
|
370 |
+
{
|
371 |
+
"epoch": 40.98,
|
372 |
+
"eval_loss": 0.40168315172195435,
|
373 |
+
"eval_mse": 0.4016563296318054,
|
374 |
+
"eval_runtime": 67.322,
|
375 |
+
"eval_samples_per_second": 31.505,
|
376 |
+
"eval_steps_per_second": 0.995,
|
377 |
+
"step": 451
|
378 |
+
},
|
379 |
+
{
|
380 |
+
"epoch": 41.98,
|
381 |
+
"eval_loss": 0.3875749111175537,
|
382 |
+
"eval_mse": 0.3875587284564972,
|
383 |
+
"eval_runtime": 65.9445,
|
384 |
+
"eval_samples_per_second": 32.163,
|
385 |
+
"eval_steps_per_second": 1.016,
|
386 |
+
"step": 462
|
387 |
+
},
|
388 |
+
{
|
389 |
+
"epoch": 42.98,
|
390 |
+
"eval_loss": 0.401607483625412,
|
391 |
+
"eval_mse": 0.4015834629535675,
|
392 |
+
"eval_runtime": 66.235,
|
393 |
+
"eval_samples_per_second": 32.022,
|
394 |
+
"eval_steps_per_second": 1.012,
|
395 |
+
"step": 473
|
396 |
+
},
|
397 |
+
{
|
398 |
+
"epoch": 43.98,
|
399 |
+
"eval_loss": 0.3939042389392853,
|
400 |
+
"eval_mse": 0.3938945233821869,
|
401 |
+
"eval_runtime": 66.0054,
|
402 |
+
"eval_samples_per_second": 32.134,
|
403 |
+
"eval_steps_per_second": 1.015,
|
404 |
+
"step": 484
|
405 |
+
},
|
406 |
+
{
|
407 |
+
"epoch": 44.98,
|
408 |
+
"eval_loss": 0.40803390741348267,
|
409 |
+
"eval_mse": 0.40802931785583496,
|
410 |
+
"eval_runtime": 66.0842,
|
411 |
+
"eval_samples_per_second": 32.095,
|
412 |
+
"eval_steps_per_second": 1.014,
|
413 |
+
"step": 495
|
414 |
+
},
|
415 |
+
{
|
416 |
+
"epoch": 45.44,
|
417 |
+
"learning_rate": 3.03030303030303e-05,
|
418 |
+
"loss": 0.4034,
|
419 |
+
"step": 500
|
420 |
+
},
|
421 |
+
{
|
422 |
+
"epoch": 45.98,
|
423 |
+
"eval_loss": 0.38819119334220886,
|
424 |
+
"eval_mse": 0.38818415999412537,
|
425 |
+
"eval_runtime": 67.7262,
|
426 |
+
"eval_samples_per_second": 31.317,
|
427 |
+
"eval_steps_per_second": 0.989,
|
428 |
+
"step": 506
|
429 |
+
},
|
430 |
+
{
|
431 |
+
"epoch": 46.98,
|
432 |
+
"eval_loss": 0.4062108099460602,
|
433 |
+
"eval_mse": 0.4061962068080902,
|
434 |
+
"eval_runtime": 63.6037,
|
435 |
+
"eval_samples_per_second": 33.347,
|
436 |
+
"eval_steps_per_second": 1.053,
|
437 |
+
"step": 517
|
438 |
+
},
|
439 |
+
{
|
440 |
+
"epoch": 47.98,
|
441 |
+
"eval_loss": 0.38834279775619507,
|
442 |
+
"eval_mse": 0.38832658529281616,
|
443 |
+
"eval_runtime": 64.2256,
|
444 |
+
"eval_samples_per_second": 33.024,
|
445 |
+
"eval_steps_per_second": 1.043,
|
446 |
+
"step": 528
|
447 |
+
},
|
448 |
+
{
|
449 |
+
"epoch": 48.98,
|
450 |
+
"eval_loss": 0.3969601094722748,
|
451 |
+
"eval_mse": 0.3969435691833496,
|
452 |
+
"eval_runtime": 63.6409,
|
453 |
+
"eval_samples_per_second": 33.328,
|
454 |
+
"eval_steps_per_second": 1.053,
|
455 |
+
"step": 539
|
456 |
+
},
|
457 |
+
{
|
458 |
+
"epoch": 49.98,
|
459 |
+
"eval_loss": 0.3963707685470581,
|
460 |
+
"eval_mse": 0.3963526785373688,
|
461 |
+
"eval_runtime": 63.4841,
|
462 |
+
"eval_samples_per_second": 33.41,
|
463 |
+
"eval_steps_per_second": 1.055,
|
464 |
+
"step": 550
|
465 |
+
}
|
466 |
+
],
|
467 |
+
"max_steps": 550,
|
468 |
+
"num_train_epochs": 50,
|
469 |
+
"total_flos": 1.603121844978697e+20,
|
470 |
+
"trial_name": null,
|
471 |
+
"trial_params": null
|
472 |
+
}
|
checkpoint-550/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6d18a7837d69e5d09a40f73faf5071bd1c07927dac0c1c18d211646e1533cbb
|
3 |
+
size 3247
|
checkpoint-99/config.json
ADDED
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "slplab/wav2vec2-xls-r-300m_phone-mfa_korean",
|
3 |
+
"activation_dropout": 0.0,
|
4 |
+
"adapter_kernel_size": 3,
|
5 |
+
"adapter_stride": 2,
|
6 |
+
"add_adapter": false,
|
7 |
+
"apply_spec_augment": true,
|
8 |
+
"architectures": [
|
9 |
+
"Wav2Vec2ForSpeechClassification"
|
10 |
+
],
|
11 |
+
"attention_dropout": 0.1,
|
12 |
+
"bos_token_id": 1,
|
13 |
+
"classifier_proj_size": 256,
|
14 |
+
"codevector_dim": 768,
|
15 |
+
"contrastive_logits_temperature": 0.1,
|
16 |
+
"conv_bias": true,
|
17 |
+
"conv_dim": [
|
18 |
+
512,
|
19 |
+
512,
|
20 |
+
512,
|
21 |
+
512,
|
22 |
+
512,
|
23 |
+
512,
|
24 |
+
512
|
25 |
+
],
|
26 |
+
"conv_kernel": [
|
27 |
+
10,
|
28 |
+
3,
|
29 |
+
3,
|
30 |
+
3,
|
31 |
+
3,
|
32 |
+
2,
|
33 |
+
2
|
34 |
+
],
|
35 |
+
"conv_stride": [
|
36 |
+
5,
|
37 |
+
2,
|
38 |
+
2,
|
39 |
+
2,
|
40 |
+
2,
|
41 |
+
2,
|
42 |
+
2
|
43 |
+
],
|
44 |
+
"ctc_loss_reduction": "mean",
|
45 |
+
"ctc_zero_infinity": false,
|
46 |
+
"diversity_loss_weight": 0.1,
|
47 |
+
"do_stable_layer_norm": true,
|
48 |
+
"eos_token_id": 2,
|
49 |
+
"feat_extract_activation": "gelu",
|
50 |
+
"feat_extract_dropout": 0.0,
|
51 |
+
"feat_extract_norm": "layer",
|
52 |
+
"feat_proj_dropout": 0.1,
|
53 |
+
"feat_quantizer_dropout": 0.0,
|
54 |
+
"final_dropout": 0.0,
|
55 |
+
"finetuning_task": "wav2vec2_clf",
|
56 |
+
"gradient_checkpointing": false,
|
57 |
+
"hidden_act": "gelu",
|
58 |
+
"hidden_dropout": 0.1,
|
59 |
+
"hidden_size": 1024,
|
60 |
+
"id2label": {
|
61 |
+
"0": "LABEL_0"
|
62 |
+
},
|
63 |
+
"initializer_range": 0.02,
|
64 |
+
"intermediate_size": 4096,
|
65 |
+
"label2id": {
|
66 |
+
"LABEL_0": 0
|
67 |
+
},
|
68 |
+
"layer_norm_eps": 1e-05,
|
69 |
+
"layerdrop": 0.1,
|
70 |
+
"mask_feature_length": 10,
|
71 |
+
"mask_feature_min_masks": 0,
|
72 |
+
"mask_feature_prob": 0.0,
|
73 |
+
"mask_time_length": 10,
|
74 |
+
"mask_time_min_masks": 2,
|
75 |
+
"mask_time_prob": 0.075,
|
76 |
+
"model_type": "wav2vec2",
|
77 |
+
"num_adapter_layers": 3,
|
78 |
+
"num_attention_heads": 16,
|
79 |
+
"num_codevector_groups": 2,
|
80 |
+
"num_codevectors_per_group": 320,
|
81 |
+
"num_conv_pos_embedding_groups": 16,
|
82 |
+
"num_conv_pos_embeddings": 128,
|
83 |
+
"num_feat_extract_layers": 7,
|
84 |
+
"num_hidden_layers": 24,
|
85 |
+
"num_negatives": 100,
|
86 |
+
"output_hidden_size": 1024,
|
87 |
+
"pad_token_id": 42,
|
88 |
+
"pooling_mode": "mean",
|
89 |
+
"problem_type": "regression",
|
90 |
+
"proj_codevector_dim": 768,
|
91 |
+
"tdnn_dilation": [
|
92 |
+
1,
|
93 |
+
2,
|
94 |
+
3,
|
95 |
+
1,
|
96 |
+
1
|
97 |
+
],
|
98 |
+
"tdnn_dim": [
|
99 |
+
512,
|
100 |
+
512,
|
101 |
+
512,
|
102 |
+
512,
|
103 |
+
1500
|
104 |
+
],
|
105 |
+
"tdnn_kernel": [
|
106 |
+
5,
|
107 |
+
3,
|
108 |
+
3,
|
109 |
+
1,
|
110 |
+
1
|
111 |
+
],
|
112 |
+
"torch_dtype": "float32",
|
113 |
+
"transformers_version": "4.19.3",
|
114 |
+
"use_weighted_layer_sum": false,
|
115 |
+
"vocab_size": 45,
|
116 |
+
"xvector_output_dim": 512
|
117 |
+
}
|
checkpoint-99/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e45b75190c0075591cb83659bea8786524011d16b66bc51030ee67788dcfce10
|
3 |
+
size 2498464777
|
checkpoint-99/preprocessor_config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cache_dir": "/data2/excalibur12/.cache/huggingface/datasets",
|
3 |
+
"do_normalize": true,
|
4 |
+
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
|
5 |
+
"feature_size": 1,
|
6 |
+
"padding_side": "right",
|
7 |
+
"padding_value": 0.0,
|
8 |
+
"return_attention_mask": true,
|
9 |
+
"sampling_rate": 16000
|
10 |
+
}
|
checkpoint-99/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e47fa51983f1f4f6c591d3b181d5a8bb4d4d7b57224ffbc4f2dae26521d1b84
|
3 |
+
size 1266101869
|
checkpoint-99/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e53ff83bc5a0f7e3f45273d2de1bf2192e1300b343841603a2b096df6ad8c127
|
3 |
+
size 14567
|
checkpoint-99/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78a083ceb88b83ff7ed1f0adf6d62580b8ceb7d9c17ad4684f511424e289c436
|
3 |
+
size 559
|
checkpoint-99/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0de4bd42e78a35b7fa3b4618172ba102f4f2a9be54789adf5790bde4d76e585e
|
3 |
+
size 623
|
checkpoint-99/trainer_state.json
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.30745795369148254,
|
3 |
+
"best_model_checkpoint": "asd_pronunciation_w2v_xlsr-reg/checkpoint-99",
|
4 |
+
"epoch": 8.977777777777778,
|
5 |
+
"global_step": 99,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.98,
|
12 |
+
"eval_loss": 0.8269791603088379,
|
13 |
+
"eval_mse": 0.8274638056755066,
|
14 |
+
"eval_runtime": 67.3404,
|
15 |
+
"eval_samples_per_second": 31.497,
|
16 |
+
"eval_steps_per_second": 0.995,
|
17 |
+
"step": 11
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"epoch": 1.98,
|
21 |
+
"eval_loss": 0.4631172716617584,
|
22 |
+
"eval_mse": 0.46300947666168213,
|
23 |
+
"eval_runtime": 67.2163,
|
24 |
+
"eval_samples_per_second": 31.555,
|
25 |
+
"eval_steps_per_second": 0.997,
|
26 |
+
"step": 22
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"epoch": 2.98,
|
30 |
+
"eval_loss": 0.351409912109375,
|
31 |
+
"eval_mse": 0.3515050709247589,
|
32 |
+
"eval_runtime": 67.5563,
|
33 |
+
"eval_samples_per_second": 31.396,
|
34 |
+
"eval_steps_per_second": 0.992,
|
35 |
+
"step": 33
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"epoch": 3.98,
|
39 |
+
"eval_loss": 0.3585782051086426,
|
40 |
+
"eval_mse": 0.35852691531181335,
|
41 |
+
"eval_runtime": 69.7976,
|
42 |
+
"eval_samples_per_second": 30.388,
|
43 |
+
"eval_steps_per_second": 0.96,
|
44 |
+
"step": 44
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 4.98,
|
48 |
+
"eval_loss": 0.4412296712398529,
|
49 |
+
"eval_mse": 0.4412075877189636,
|
50 |
+
"eval_runtime": 79.1138,
|
51 |
+
"eval_samples_per_second": 26.809,
|
52 |
+
"eval_steps_per_second": 0.847,
|
53 |
+
"step": 55
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"epoch": 5.98,
|
57 |
+
"eval_loss": 0.430486798286438,
|
58 |
+
"eval_mse": 0.43049314618110657,
|
59 |
+
"eval_runtime": 72.5547,
|
60 |
+
"eval_samples_per_second": 29.233,
|
61 |
+
"eval_steps_per_second": 0.923,
|
62 |
+
"step": 66
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"epoch": 6.98,
|
66 |
+
"eval_loss": 0.42916765809059143,
|
67 |
+
"eval_mse": 0.4291659891605377,
|
68 |
+
"eval_runtime": 95.5528,
|
69 |
+
"eval_samples_per_second": 22.197,
|
70 |
+
"eval_steps_per_second": 0.701,
|
71 |
+
"step": 77
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"epoch": 7.98,
|
75 |
+
"eval_loss": 0.4343053996562958,
|
76 |
+
"eval_mse": 0.43428245186805725,
|
77 |
+
"eval_runtime": 66.5007,
|
78 |
+
"eval_samples_per_second": 31.894,
|
79 |
+
"eval_steps_per_second": 1.008,
|
80 |
+
"step": 88
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"epoch": 8.98,
|
84 |
+
"eval_loss": 0.30745795369148254,
|
85 |
+
"eval_mse": 0.3074318766593933,
|
86 |
+
"eval_runtime": 66.2359,
|
87 |
+
"eval_samples_per_second": 32.022,
|
88 |
+
"eval_steps_per_second": 1.012,
|
89 |
+
"step": 99
|
90 |
+
}
|
91 |
+
],
|
92 |
+
"max_steps": 550,
|
93 |
+
"num_train_epochs": 50,
|
94 |
+
"total_flos": 2.8846380565942047e+19,
|
95 |
+
"trial_name": null,
|
96 |
+
"trial_params": null
|
97 |
+
}
|
checkpoint-99/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6d18a7837d69e5d09a40f73faf5071bd1c07927dac0c1c18d211646e1533cbb
|
3 |
+
size 3247
|
config.json
ADDED
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "slplab/wav2vec2-xls-r-300m_phone-mfa_korean",
|
3 |
+
"activation_dropout": 0.0,
|
4 |
+
"adapter_kernel_size": 3,
|
5 |
+
"adapter_stride": 2,
|
6 |
+
"add_adapter": false,
|
7 |
+
"apply_spec_augment": true,
|
8 |
+
"architectures": [
|
9 |
+
"Wav2Vec2ForSpeechClassification"
|
10 |
+
],
|
11 |
+
"attention_dropout": 0.1,
|
12 |
+
"bos_token_id": 1,
|
13 |
+
"classifier_proj_size": 256,
|
14 |
+
"codevector_dim": 768,
|
15 |
+
"contrastive_logits_temperature": 0.1,
|
16 |
+
"conv_bias": true,
|
17 |
+
"conv_dim": [
|
18 |
+
512,
|
19 |
+
512,
|
20 |
+
512,
|
21 |
+
512,
|
22 |
+
512,
|
23 |
+
512,
|
24 |
+
512
|
25 |
+
],
|
26 |
+
"conv_kernel": [
|
27 |
+
10,
|
28 |
+
3,
|
29 |
+
3,
|
30 |
+
3,
|
31 |
+
3,
|
32 |
+
2,
|
33 |
+
2
|
34 |
+
],
|
35 |
+
"conv_stride": [
|
36 |
+
5,
|
37 |
+
2,
|
38 |
+
2,
|
39 |
+
2,
|
40 |
+
2,
|
41 |
+
2,
|
42 |
+
2
|
43 |
+
],
|
44 |
+
"ctc_loss_reduction": "mean",
|
45 |
+
"ctc_zero_infinity": false,
|
46 |
+
"diversity_loss_weight": 0.1,
|
47 |
+
"do_stable_layer_norm": true,
|
48 |
+
"eos_token_id": 2,
|
49 |
+
"feat_extract_activation": "gelu",
|
50 |
+
"feat_extract_dropout": 0.0,
|
51 |
+
"feat_extract_norm": "layer",
|
52 |
+
"feat_proj_dropout": 0.1,
|
53 |
+
"feat_quantizer_dropout": 0.0,
|
54 |
+
"final_dropout": 0.0,
|
55 |
+
"finetuning_task": "wav2vec2_clf",
|
56 |
+
"gradient_checkpointing": false,
|
57 |
+
"hidden_act": "gelu",
|
58 |
+
"hidden_dropout": 0.1,
|
59 |
+
"hidden_size": 1024,
|
60 |
+
"id2label": {
|
61 |
+
"0": "LABEL_0"
|
62 |
+
},
|
63 |
+
"initializer_range": 0.02,
|
64 |
+
"intermediate_size": 4096,
|
65 |
+
"label2id": {
|
66 |
+
"LABEL_0": 0
|
67 |
+
},
|
68 |
+
"layer_norm_eps": 1e-05,
|
69 |
+
"layerdrop": 0.1,
|
70 |
+
"mask_feature_length": 10,
|
71 |
+
"mask_feature_min_masks": 0,
|
72 |
+
"mask_feature_prob": 0.0,
|
73 |
+
"mask_time_length": 10,
|
74 |
+
"mask_time_min_masks": 2,
|
75 |
+
"mask_time_prob": 0.075,
|
76 |
+
"model_type": "wav2vec2",
|
77 |
+
"num_adapter_layers": 3,
|
78 |
+
"num_attention_heads": 16,
|
79 |
+
"num_codevector_groups": 2,
|
80 |
+
"num_codevectors_per_group": 320,
|
81 |
+
"num_conv_pos_embedding_groups": 16,
|
82 |
+
"num_conv_pos_embeddings": 128,
|
83 |
+
"num_feat_extract_layers": 7,
|
84 |
+
"num_hidden_layers": 24,
|
85 |
+
"num_negatives": 100,
|
86 |
+
"output_hidden_size": 1024,
|
87 |
+
"pad_token_id": 42,
|
88 |
+
"pooling_mode": "mean",
|
89 |
+
"problem_type": "regression",
|
90 |
+
"proj_codevector_dim": 768,
|
91 |
+
"tdnn_dilation": [
|
92 |
+
1,
|
93 |
+
2,
|
94 |
+
3,
|
95 |
+
1,
|
96 |
+
1
|
97 |
+
],
|
98 |
+
"tdnn_dim": [
|
99 |
+
512,
|
100 |
+
512,
|
101 |
+
512,
|
102 |
+
512,
|
103 |
+
1500
|
104 |
+
],
|
105 |
+
"tdnn_kernel": [
|
106 |
+
5,
|
107 |
+
3,
|
108 |
+
3,
|
109 |
+
1,
|
110 |
+
1
|
111 |
+
],
|
112 |
+
"torch_dtype": "float32",
|
113 |
+
"transformers_version": "4.19.3",
|
114 |
+
"use_weighted_layer_sum": false,
|
115 |
+
"vocab_size": 45,
|
116 |
+
"xvector_output_dim": 512
|
117 |
+
}
|
preprocessor_config.json
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cache_dir": "/data2/excalibur12/.cache/huggingface/datasets",
|
3 |
+
"do_normalize": true,
|
4 |
+
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
|
5 |
+
"feature_size": 1,
|
6 |
+
"padding_side": "right",
|
7 |
+
"padding_value": 0.0,
|
8 |
+
"processor_class": "Wav2Vec2Processor",
|
9 |
+
"return_attention_mask": true,
|
10 |
+
"sampling_rate": 16000
|
11 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e47fa51983f1f4f6c591d3b181d5a8bb4d4d7b57224ffbc4f2dae26521d1b84
|
3 |
+
size 1266101869
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "word_delimiter_token": null, "phone_delimiter_token": "|", "do_phonemize": false, "phonemizer_lang": "en-us", "phonemizer_backend": "espeak", "name_or_path": "slplab/wav2vec2-xls-r-300m_phone-mfa_korean", "special_tokens_map_file": null, "tokenizer_class": "Wav2Vec2PhonemeCTCTokenizer", "processor_class": "Wav2Vec2Processor"}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6d18a7837d69e5d09a40f73faf5071bd1c07927dac0c1c18d211646e1533cbb
|
3 |
+
size 3247
|
vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"A": 0, "B": 1, "BB": 2, "CHh": 3, "D": 4, "DD": 5, "E": 6, "EO": 7, "EU": 8, "G": 9, "GG": 10, "H": 11, "I": 12, "J": 13, "JJ": 14, "Kh": 15, "L": 16, "M": 17, "N": 18, "NG": 19, "O": 20, "Ph": 21, "R": 22, "S": 23, "SS": 24, "Th": 25, "U": 26, "[PAD]": 42, "[UNK]": 41, "euI": 27, "iA": 28, "iE": 29, "iEO": 30, "iO": 31, "iU": 32, "k": 33, "oA": 34, "oE": 35, "p": 36, "t": 37, "uEO": 38, "uI": 39, "|": 40}
|