Marcos12886
commited on
Commit
•
b73f096
1
Parent(s):
c9c4a3f
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- README.md +28 -32
- checkpoint-121/model.safetensors +1 -1
- checkpoint-121/optimizer.pt +1 -1
- checkpoint-121/scheduler.pt +1 -1
- checkpoint-121/trainer_state.json +43 -43
- checkpoint-121/training_args.bin +1 -1
- checkpoint-145/config.json +85 -0
- checkpoint-145/model.safetensors +3 -0
- checkpoint-145/optimizer.pt +3 -0
- checkpoint-145/rng_state.pth +3 -0
- checkpoint-145/scheduler.pt +3 -0
- checkpoint-145/trainer_state.json +114 -0
- checkpoint-145/training_args.bin +3 -0
- checkpoint-169/config.json +85 -0
- checkpoint-169/model.safetensors +3 -0
- checkpoint-169/optimizer.pt +3 -0
- checkpoint-169/rng_state.pth +3 -0
- checkpoint-169/scheduler.pt +3 -0
- checkpoint-169/trainer_state.json +126 -0
- checkpoint-169/training_args.bin +3 -0
- checkpoint-194/config.json +85 -0
- checkpoint-194/model.safetensors +3 -0
- checkpoint-194/optimizer.pt +3 -0
- checkpoint-194/rng_state.pth +3 -0
- checkpoint-194/scheduler.pt +3 -0
- checkpoint-194/trainer_state.json +138 -0
- checkpoint-194/training_args.bin +3 -0
- checkpoint-218/config.json +85 -0
- checkpoint-218/model.safetensors +3 -0
- checkpoint-218/optimizer.pt +3 -0
- checkpoint-218/rng_state.pth +3 -0
- checkpoint-218/scheduler.pt +3 -0
- checkpoint-218/trainer_state.json +150 -0
- checkpoint-218/training_args.bin +3 -0
- checkpoint-24/model.safetensors +1 -1
- checkpoint-24/optimizer.pt +1 -1
- checkpoint-24/rng_state.pth +1 -1
- checkpoint-24/scheduler.pt +1 -1
- checkpoint-24/trainer_state.json +13 -13
- checkpoint-24/training_args.bin +1 -1
- checkpoint-242/config.json +85 -0
- checkpoint-242/model.safetensors +3 -0
- checkpoint-242/optimizer.pt +3 -0
- checkpoint-242/rng_state.pth +3 -0
- checkpoint-242/scheduler.pt +3 -0
- checkpoint-242/trainer_state.json +162 -0
- checkpoint-242/training_args.bin +3 -0
- checkpoint-266/config.json +85 -0
- checkpoint-266/model.safetensors +3 -0
- checkpoint-266/optimizer.pt +3 -0
README.md
CHANGED
@@ -4,26 +4,14 @@ license: apache-2.0
|
|
4 |
base_model: ntu-spml/distilhubert
|
5 |
tags:
|
6 |
- generated_from_trainer
|
7 |
-
datasets:
|
8 |
-
- audiofolder
|
9 |
metrics:
|
10 |
- accuracy
|
|
|
|
|
|
|
11 |
model-index:
|
12 |
- name: distilhubert-finetuned-mixed-data
|
13 |
-
results:
|
14 |
-
- task:
|
15 |
-
name: Audio Classification
|
16 |
-
type: audio-classification
|
17 |
-
dataset:
|
18 |
-
name: audiofolder
|
19 |
-
type: audiofolder
|
20 |
-
config: default
|
21 |
-
split: train
|
22 |
-
args: default
|
23 |
-
metrics:
|
24 |
-
- name: Accuracy
|
25 |
-
type: accuracy
|
26 |
-
value: 0.8691275167785235
|
27 |
---
|
28 |
|
29 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
@@ -31,10 +19,13 @@ should probably proofread and complete it, then remove this comment. -->
|
|
31 |
|
32 |
# distilhubert-finetuned-mixed-data
|
33 |
|
34 |
-
This model is a fine-tuned version of [ntu-spml/distilhubert](https://huggingface.co/ntu-spml/distilhubert) on
|
35 |
It achieves the following results on the evaluation set:
|
36 |
-
- Loss: 0.
|
37 |
-
- Accuracy: 0.
|
|
|
|
|
|
|
38 |
|
39 |
## Model description
|
40 |
|
@@ -61,25 +52,30 @@ The following hyperparameters were used during training:
|
|
61 |
- total_train_batch_size: 64
|
62 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
63 |
- lr_scheduler_type: cosine
|
64 |
-
- lr_scheduler_warmup_ratio: 0.
|
65 |
-
- num_epochs:
|
66 |
|
67 |
### Training results
|
68 |
|
69 |
-
| Training Loss | Epoch
|
70 |
-
|
71 |
-
| No log | 0.
|
72 |
-
| No log | 1.
|
73 |
-
| No log | 2.
|
74 |
-
| No log |
|
75 |
-
| No log | 4.
|
76 |
-
| No log | 5.
|
77 |
-
| No log | 6.
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
|
80 |
### Framework versions
|
81 |
|
82 |
- Transformers 4.44.2
|
83 |
-
- Pytorch 2.4.
|
84 |
-
- Datasets 2.21.0
|
85 |
- Tokenizers 0.19.1
|
|
|
4 |
base_model: ntu-spml/distilhubert
|
5 |
tags:
|
6 |
- generated_from_trainer
|
|
|
|
|
7 |
metrics:
|
8 |
- accuracy
|
9 |
+
- precision
|
10 |
+
- recall
|
11 |
+
- f1
|
12 |
model-index:
|
13 |
- name: distilhubert-finetuned-mixed-data
|
14 |
+
results: []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
---
|
16 |
|
17 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
|
|
19 |
|
20 |
# distilhubert-finetuned-mixed-data
|
21 |
|
22 |
+
This model is a fine-tuned version of [ntu-spml/distilhubert](https://huggingface.co/ntu-spml/distilhubert) on an unknown dataset.
|
23 |
It achieves the following results on the evaluation set:
|
24 |
+
- Loss: 0.4765
|
25 |
+
- Accuracy: 0.8475
|
26 |
+
- Precision: 0.8497
|
27 |
+
- Recall: 0.8475
|
28 |
+
- F1: 0.8484
|
29 |
|
30 |
## Model description
|
31 |
|
|
|
52 |
- total_train_batch_size: 64
|
53 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
54 |
- lr_scheduler_type: cosine
|
55 |
+
- lr_scheduler_warmup_ratio: 0.01
|
56 |
+
- num_epochs: 15
|
57 |
|
58 |
### Training results
|
59 |
|
60 |
+
| Training Loss | Epoch | Step | Validation Loss | Accuracy | Precision | Recall | F1 |
|
61 |
+
|:-------------:|:-------:|:----:|:---------------:|:--------:|:---------:|:------:|:------:|
|
62 |
+
| No log | 0.9897 | 24 | 0.9809 | 0.6357 | 0.5920 | 0.6357 | 0.5921 |
|
63 |
+
| No log | 1.9794 | 48 | 0.7444 | 0.7158 | 0.6992 | 0.7158 | 0.6905 |
|
64 |
+
| No log | 2.9691 | 72 | 0.6172 | 0.7494 | 0.7438 | 0.7494 | 0.7449 |
|
65 |
+
| No log | 4.0 | 97 | 0.5431 | 0.7984 | 0.7918 | 0.7984 | 0.7874 |
|
66 |
+
| No log | 4.9897 | 121 | 0.5269 | 0.8010 | 0.8006 | 0.8010 | 0.7975 |
|
67 |
+
| No log | 5.9794 | 145 | 0.5811 | 0.7494 | 0.7802 | 0.7494 | 0.7551 |
|
68 |
+
| No log | 6.9691 | 169 | 0.4408 | 0.8398 | 0.8366 | 0.8398 | 0.8355 |
|
69 |
+
| No log | 8.0 | 194 | 0.4687 | 0.8191 | 0.8188 | 0.8191 | 0.8168 |
|
70 |
+
| No log | 8.9897 | 218 | 0.4364 | 0.8475 | 0.8483 | 0.8475 | 0.8474 |
|
71 |
+
| No log | 9.9794 | 242 | 0.4291 | 0.8579 | 0.8561 | 0.8579 | 0.8568 |
|
72 |
+
| No log | 10.9691 | 266 | 0.4699 | 0.8501 | 0.8582 | 0.8501 | 0.8528 |
|
73 |
+
| No log | 12.0 | 291 | 0.4862 | 0.8450 | 0.8536 | 0.8450 | 0.8480 |
|
74 |
+
| No log | 12.9897 | 315 | 0.4765 | 0.8475 | 0.8497 | 0.8475 | 0.8484 |
|
75 |
|
76 |
|
77 |
### Framework versions
|
78 |
|
79 |
- Transformers 4.44.2
|
80 |
+
- Pytorch 2.4.1+cu121
|
|
|
81 |
- Tokenizers 0.19.1
|
checkpoint-121/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 94765560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61738703583616f26bc5974045f101b4badf661cb1bd5759b55ffedbf42f8309
|
3 |
size 94765560
|
checkpoint-121/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 189556666
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a04d7aa37706181567dea5d9c17d0a79ae70a3f43b546fd604def5a5c896f325
|
3 |
size 189556666
|
checkpoint-121/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c4a0a93ff2c02ad8dfec37403a0eff289573d406127ab0a60fb4386bc8bcb2b
|
3 |
size 1064
|
checkpoint-121/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-121",
|
4 |
"epoch": 4.989690721649485,
|
5 |
"eval_steps": 500,
|
@@ -10,69 +10,69 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.9896907216494846,
|
13 |
-
"eval_accuracy": 0.
|
14 |
-
"eval_f1": 0.
|
15 |
-
"eval_loss":
|
16 |
-
"eval_precision": 0.
|
17 |
-
"eval_recall": 0.
|
18 |
-
"eval_runtime": 1.
|
19 |
-
"eval_samples_per_second":
|
20 |
-
"eval_steps_per_second":
|
21 |
"step": 24
|
22 |
},
|
23 |
{
|
24 |
"epoch": 1.9793814432989691,
|
25 |
-
"eval_accuracy": 0.
|
26 |
-
"eval_f1": 0.
|
27 |
-
"eval_loss": 0.
|
28 |
-
"eval_precision": 0.
|
29 |
-
"eval_recall": 0.
|
30 |
-
"eval_runtime": 1.
|
31 |
-
"eval_samples_per_second":
|
32 |
-
"eval_steps_per_second": 28.
|
33 |
"step": 48
|
34 |
},
|
35 |
{
|
36 |
"epoch": 2.9690721649484537,
|
37 |
-
"eval_accuracy": 0.
|
38 |
-
"eval_f1": 0.
|
39 |
-
"eval_loss": 0.
|
40 |
-
"eval_precision": 0.
|
41 |
-
"eval_recall": 0.
|
42 |
-
"eval_runtime": 1.
|
43 |
-
"eval_samples_per_second":
|
44 |
-
"eval_steps_per_second": 28.
|
45 |
"step": 72
|
46 |
},
|
47 |
{
|
48 |
"epoch": 4.0,
|
49 |
-
"eval_accuracy": 0.
|
50 |
-
"eval_f1": 0.
|
51 |
-
"eval_loss": 0.
|
52 |
-
"eval_precision": 0.
|
53 |
-
"eval_recall": 0.
|
54 |
-
"eval_runtime": 1.
|
55 |
-
"eval_samples_per_second":
|
56 |
-
"eval_steps_per_second": 28.
|
57 |
"step": 97
|
58 |
},
|
59 |
{
|
60 |
"epoch": 4.989690721649485,
|
61 |
-
"eval_accuracy": 0.
|
62 |
-
"eval_f1": 0.
|
63 |
-
"eval_loss": 0.
|
64 |
-
"eval_precision": 0.
|
65 |
-
"eval_recall": 0.
|
66 |
-
"eval_runtime": 1.
|
67 |
-
"eval_samples_per_second":
|
68 |
-
"eval_steps_per_second":
|
69 |
"step": 121
|
70 |
}
|
71 |
],
|
72 |
"logging_steps": 500,
|
73 |
-
"max_steps":
|
74 |
"num_input_tokens_seen": 0,
|
75 |
-
"num_train_epochs":
|
76 |
"save_steps": 500,
|
77 |
"stateful_callbacks": {
|
78 |
"EarlyStoppingCallback": {
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.8010335917312662,
|
3 |
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-121",
|
4 |
"epoch": 4.989690721649485,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.9896907216494846,
|
13 |
+
"eval_accuracy": 0.6356589147286822,
|
14 |
+
"eval_f1": 0.5920563016978556,
|
15 |
+
"eval_loss": 0.980873703956604,
|
16 |
+
"eval_precision": 0.5920482291587493,
|
17 |
+
"eval_recall": 0.6356589147286822,
|
18 |
+
"eval_runtime": 1.6668,
|
19 |
+
"eval_samples_per_second": 232.175,
|
20 |
+
"eval_steps_per_second": 29.397,
|
21 |
"step": 24
|
22 |
},
|
23 |
{
|
24 |
"epoch": 1.9793814432989691,
|
25 |
+
"eval_accuracy": 0.7157622739018088,
|
26 |
+
"eval_f1": 0.6905410405322238,
|
27 |
+
"eval_loss": 0.7444477081298828,
|
28 |
+
"eval_precision": 0.6992377248989063,
|
29 |
+
"eval_recall": 0.7157622739018088,
|
30 |
+
"eval_runtime": 1.6941,
|
31 |
+
"eval_samples_per_second": 228.443,
|
32 |
+
"eval_steps_per_second": 28.924,
|
33 |
"step": 48
|
34 |
},
|
35 |
{
|
36 |
"epoch": 2.9690721649484537,
|
37 |
+
"eval_accuracy": 0.7493540051679587,
|
38 |
+
"eval_f1": 0.744898505571463,
|
39 |
+
"eval_loss": 0.6171658039093018,
|
40 |
+
"eval_precision": 0.7437592422989429,
|
41 |
+
"eval_recall": 0.7493540051679587,
|
42 |
+
"eval_runtime": 1.6943,
|
43 |
+
"eval_samples_per_second": 228.408,
|
44 |
+
"eval_steps_per_second": 28.92,
|
45 |
"step": 72
|
46 |
},
|
47 |
{
|
48 |
"epoch": 4.0,
|
49 |
+
"eval_accuracy": 0.7984496124031008,
|
50 |
+
"eval_f1": 0.7873621619744228,
|
51 |
+
"eval_loss": 0.5430988073348999,
|
52 |
+
"eval_precision": 0.79180344284319,
|
53 |
+
"eval_recall": 0.7984496124031008,
|
54 |
+
"eval_runtime": 1.7027,
|
55 |
+
"eval_samples_per_second": 227.289,
|
56 |
+
"eval_steps_per_second": 28.778,
|
57 |
"step": 97
|
58 |
},
|
59 |
{
|
60 |
"epoch": 4.989690721649485,
|
61 |
+
"eval_accuracy": 0.8010335917312662,
|
62 |
+
"eval_f1": 0.7974946178390901,
|
63 |
+
"eval_loss": 0.5268548130989075,
|
64 |
+
"eval_precision": 0.8005965453214461,
|
65 |
+
"eval_recall": 0.8010335917312662,
|
66 |
+
"eval_runtime": 1.6829,
|
67 |
+
"eval_samples_per_second": 229.957,
|
68 |
+
"eval_steps_per_second": 29.116,
|
69 |
"step": 121
|
70 |
}
|
71 |
],
|
72 |
"logging_steps": 500,
|
73 |
+
"max_steps": 360,
|
74 |
"num_input_tokens_seen": 0,
|
75 |
+
"num_train_epochs": 15,
|
76 |
"save_steps": 500,
|
77 |
"stateful_callbacks": {
|
78 |
"EarlyStoppingCallback": {
|
checkpoint-121/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a
|
3 |
size 5240
|
checkpoint-145/config.json
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "ntu-spml/distilhubert",
|
3 |
+
"activation_dropout": 0.1,
|
4 |
+
"apply_spec_augment": false,
|
5 |
+
"architectures": [
|
6 |
+
"HubertForSequenceClassification"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.1,
|
9 |
+
"bos_token_id": 1,
|
10 |
+
"classifier_proj_size": 256,
|
11 |
+
"conv_bias": false,
|
12 |
+
"conv_dim": [
|
13 |
+
512,
|
14 |
+
512,
|
15 |
+
512,
|
16 |
+
512,
|
17 |
+
512,
|
18 |
+
512,
|
19 |
+
512
|
20 |
+
],
|
21 |
+
"conv_kernel": [
|
22 |
+
10,
|
23 |
+
3,
|
24 |
+
3,
|
25 |
+
3,
|
26 |
+
3,
|
27 |
+
2,
|
28 |
+
2
|
29 |
+
],
|
30 |
+
"conv_stride": [
|
31 |
+
5,
|
32 |
+
2,
|
33 |
+
2,
|
34 |
+
2,
|
35 |
+
2,
|
36 |
+
2,
|
37 |
+
2
|
38 |
+
],
|
39 |
+
"ctc_loss_reduction": "sum",
|
40 |
+
"ctc_zero_infinity": false,
|
41 |
+
"do_stable_layer_norm": false,
|
42 |
+
"eos_token_id": 2,
|
43 |
+
"feat_extract_activation": "gelu",
|
44 |
+
"feat_extract_norm": "group",
|
45 |
+
"feat_proj_dropout": 0.0,
|
46 |
+
"feat_proj_layer_norm": false,
|
47 |
+
"final_dropout": 0.0,
|
48 |
+
"finetuning_task": "audio-classification",
|
49 |
+
"hidden_act": "gelu",
|
50 |
+
"hidden_dropout": 0.1,
|
51 |
+
"hidden_size": 768,
|
52 |
+
"id2label": {
|
53 |
+
"0": "1s_normal",
|
54 |
+
"1": "1s_pain",
|
55 |
+
"2": "1s_hunger",
|
56 |
+
"3": "1s_asphyxia"
|
57 |
+
},
|
58 |
+
"initializer_range": 0.02,
|
59 |
+
"intermediate_size": 3072,
|
60 |
+
"label2id": {
|
61 |
+
"1s_asphyxia": 3,
|
62 |
+
"1s_hunger": 2,
|
63 |
+
"1s_normal": 0,
|
64 |
+
"1s_pain": 1
|
65 |
+
},
|
66 |
+
"layer_norm_eps": 1e-05,
|
67 |
+
"layerdrop": 0.0,
|
68 |
+
"mask_feature_length": 10,
|
69 |
+
"mask_feature_min_masks": 0,
|
70 |
+
"mask_feature_prob": 0.0,
|
71 |
+
"mask_time_length": 10,
|
72 |
+
"mask_time_min_masks": 2,
|
73 |
+
"mask_time_prob": 0.05,
|
74 |
+
"model_type": "hubert",
|
75 |
+
"num_attention_heads": 12,
|
76 |
+
"num_conv_pos_embedding_groups": 16,
|
77 |
+
"num_conv_pos_embeddings": 128,
|
78 |
+
"num_feat_extract_layers": 7,
|
79 |
+
"num_hidden_layers": 2,
|
80 |
+
"pad_token_id": 0,
|
81 |
+
"torch_dtype": "float32",
|
82 |
+
"transformers_version": "4.44.2",
|
83 |
+
"use_weighted_layer_sum": false,
|
84 |
+
"vocab_size": 32
|
85 |
+
}
|
checkpoint-145/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62b1cac8d8be6152086485540750316f43a58660bddd11c298bb50d0f3f5f531
|
3 |
+
size 94765560
|
checkpoint-145/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f74fa4c7ca2e76ec752e5b913444b39e61cef0b5760afb453b5c55805b47cacc
|
3 |
+
size 189556666
|
checkpoint-145/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0867bb3588983088e1ae19ae31c54b18cd181442273a375c356a0362c53a9a5
|
3 |
+
size 14308
|
checkpoint-145/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62e2160d5bd28532b533e3efabec8ccad052eb12401ff324f1c912ac3e74c929
|
3 |
+
size 1064
|
checkpoint-145/trainer_state.json
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.8010335917312662,
|
3 |
+
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-121",
|
4 |
+
"epoch": 5.979381443298969,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 145,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.9896907216494846,
|
13 |
+
"eval_accuracy": 0.6356589147286822,
|
14 |
+
"eval_f1": 0.5920563016978556,
|
15 |
+
"eval_loss": 0.980873703956604,
|
16 |
+
"eval_precision": 0.5920482291587493,
|
17 |
+
"eval_recall": 0.6356589147286822,
|
18 |
+
"eval_runtime": 1.6668,
|
19 |
+
"eval_samples_per_second": 232.175,
|
20 |
+
"eval_steps_per_second": 29.397,
|
21 |
+
"step": 24
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 1.9793814432989691,
|
25 |
+
"eval_accuracy": 0.7157622739018088,
|
26 |
+
"eval_f1": 0.6905410405322238,
|
27 |
+
"eval_loss": 0.7444477081298828,
|
28 |
+
"eval_precision": 0.6992377248989063,
|
29 |
+
"eval_recall": 0.7157622739018088,
|
30 |
+
"eval_runtime": 1.6941,
|
31 |
+
"eval_samples_per_second": 228.443,
|
32 |
+
"eval_steps_per_second": 28.924,
|
33 |
+
"step": 48
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"epoch": 2.9690721649484537,
|
37 |
+
"eval_accuracy": 0.7493540051679587,
|
38 |
+
"eval_f1": 0.744898505571463,
|
39 |
+
"eval_loss": 0.6171658039093018,
|
40 |
+
"eval_precision": 0.7437592422989429,
|
41 |
+
"eval_recall": 0.7493540051679587,
|
42 |
+
"eval_runtime": 1.6943,
|
43 |
+
"eval_samples_per_second": 228.408,
|
44 |
+
"eval_steps_per_second": 28.92,
|
45 |
+
"step": 72
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"epoch": 4.0,
|
49 |
+
"eval_accuracy": 0.7984496124031008,
|
50 |
+
"eval_f1": 0.7873621619744228,
|
51 |
+
"eval_loss": 0.5430988073348999,
|
52 |
+
"eval_precision": 0.79180344284319,
|
53 |
+
"eval_recall": 0.7984496124031008,
|
54 |
+
"eval_runtime": 1.7027,
|
55 |
+
"eval_samples_per_second": 227.289,
|
56 |
+
"eval_steps_per_second": 28.778,
|
57 |
+
"step": 97
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"epoch": 4.989690721649485,
|
61 |
+
"eval_accuracy": 0.8010335917312662,
|
62 |
+
"eval_f1": 0.7974946178390901,
|
63 |
+
"eval_loss": 0.5268548130989075,
|
64 |
+
"eval_precision": 0.8005965453214461,
|
65 |
+
"eval_recall": 0.8010335917312662,
|
66 |
+
"eval_runtime": 1.6829,
|
67 |
+
"eval_samples_per_second": 229.957,
|
68 |
+
"eval_steps_per_second": 29.116,
|
69 |
+
"step": 121
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"epoch": 5.979381443298969,
|
73 |
+
"eval_accuracy": 0.7493540051679587,
|
74 |
+
"eval_f1": 0.7551329793037762,
|
75 |
+
"eval_loss": 0.5810549259185791,
|
76 |
+
"eval_precision": 0.7802262423287315,
|
77 |
+
"eval_recall": 0.7493540051679587,
|
78 |
+
"eval_runtime": 1.6999,
|
79 |
+
"eval_samples_per_second": 227.658,
|
80 |
+
"eval_steps_per_second": 28.825,
|
81 |
+
"step": 145
|
82 |
+
}
|
83 |
+
],
|
84 |
+
"logging_steps": 500,
|
85 |
+
"max_steps": 360,
|
86 |
+
"num_input_tokens_seen": 0,
|
87 |
+
"num_train_epochs": 15,
|
88 |
+
"save_steps": 500,
|
89 |
+
"stateful_callbacks": {
|
90 |
+
"EarlyStoppingCallback": {
|
91 |
+
"args": {
|
92 |
+
"early_stopping_patience": 3,
|
93 |
+
"early_stopping_threshold": 0.0
|
94 |
+
},
|
95 |
+
"attributes": {
|
96 |
+
"early_stopping_patience_counter": 0
|
97 |
+
}
|
98 |
+
},
|
99 |
+
"TrainerControl": {
|
100 |
+
"args": {
|
101 |
+
"should_epoch_stop": false,
|
102 |
+
"should_evaluate": false,
|
103 |
+
"should_log": false,
|
104 |
+
"should_save": true,
|
105 |
+
"should_training_stop": false
|
106 |
+
},
|
107 |
+
"attributes": {}
|
108 |
+
}
|
109 |
+
},
|
110 |
+
"total_flos": 2.10820802112e+16,
|
111 |
+
"train_batch_size": 8,
|
112 |
+
"trial_name": null,
|
113 |
+
"trial_params": null
|
114 |
+
}
|
checkpoint-145/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a
|
3 |
+
size 5240
|
checkpoint-169/config.json
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "ntu-spml/distilhubert",
|
3 |
+
"activation_dropout": 0.1,
|
4 |
+
"apply_spec_augment": false,
|
5 |
+
"architectures": [
|
6 |
+
"HubertForSequenceClassification"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.1,
|
9 |
+
"bos_token_id": 1,
|
10 |
+
"classifier_proj_size": 256,
|
11 |
+
"conv_bias": false,
|
12 |
+
"conv_dim": [
|
13 |
+
512,
|
14 |
+
512,
|
15 |
+
512,
|
16 |
+
512,
|
17 |
+
512,
|
18 |
+
512,
|
19 |
+
512
|
20 |
+
],
|
21 |
+
"conv_kernel": [
|
22 |
+
10,
|
23 |
+
3,
|
24 |
+
3,
|
25 |
+
3,
|
26 |
+
3,
|
27 |
+
2,
|
28 |
+
2
|
29 |
+
],
|
30 |
+
"conv_stride": [
|
31 |
+
5,
|
32 |
+
2,
|
33 |
+
2,
|
34 |
+
2,
|
35 |
+
2,
|
36 |
+
2,
|
37 |
+
2
|
38 |
+
],
|
39 |
+
"ctc_loss_reduction": "sum",
|
40 |
+
"ctc_zero_infinity": false,
|
41 |
+
"do_stable_layer_norm": false,
|
42 |
+
"eos_token_id": 2,
|
43 |
+
"feat_extract_activation": "gelu",
|
44 |
+
"feat_extract_norm": "group",
|
45 |
+
"feat_proj_dropout": 0.0,
|
46 |
+
"feat_proj_layer_norm": false,
|
47 |
+
"final_dropout": 0.0,
|
48 |
+
"finetuning_task": "audio-classification",
|
49 |
+
"hidden_act": "gelu",
|
50 |
+
"hidden_dropout": 0.1,
|
51 |
+
"hidden_size": 768,
|
52 |
+
"id2label": {
|
53 |
+
"0": "1s_normal",
|
54 |
+
"1": "1s_pain",
|
55 |
+
"2": "1s_hunger",
|
56 |
+
"3": "1s_asphyxia"
|
57 |
+
},
|
58 |
+
"initializer_range": 0.02,
|
59 |
+
"intermediate_size": 3072,
|
60 |
+
"label2id": {
|
61 |
+
"1s_asphyxia": 3,
|
62 |
+
"1s_hunger": 2,
|
63 |
+
"1s_normal": 0,
|
64 |
+
"1s_pain": 1
|
65 |
+
},
|
66 |
+
"layer_norm_eps": 1e-05,
|
67 |
+
"layerdrop": 0.0,
|
68 |
+
"mask_feature_length": 10,
|
69 |
+
"mask_feature_min_masks": 0,
|
70 |
+
"mask_feature_prob": 0.0,
|
71 |
+
"mask_time_length": 10,
|
72 |
+
"mask_time_min_masks": 2,
|
73 |
+
"mask_time_prob": 0.05,
|
74 |
+
"model_type": "hubert",
|
75 |
+
"num_attention_heads": 12,
|
76 |
+
"num_conv_pos_embedding_groups": 16,
|
77 |
+
"num_conv_pos_embeddings": 128,
|
78 |
+
"num_feat_extract_layers": 7,
|
79 |
+
"num_hidden_layers": 2,
|
80 |
+
"pad_token_id": 0,
|
81 |
+
"torch_dtype": "float32",
|
82 |
+
"transformers_version": "4.44.2",
|
83 |
+
"use_weighted_layer_sum": false,
|
84 |
+
"vocab_size": 32
|
85 |
+
}
|
checkpoint-169/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:052c2fac14563eb5b0b53eb4a3f6509d46dcf61f99ea7c0e5e76df5345230eff
|
3 |
+
size 94765560
|
checkpoint-169/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:672db33956055547a36f8f888cdb0f8124de62210906cbe58b87898baf646fe9
|
3 |
+
size 189556666
|
checkpoint-169/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3d85d4b435d4ac4552bb46f32d5f63a55dbc65baaa5af6a14b6b39e968f4b8e
|
3 |
+
size 14308
|
checkpoint-169/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a897dacc1b915150336aff74c1f17b3b8504d76168703307724c32fe3bca6896
|
3 |
+
size 1064
|
checkpoint-169/trainer_state.json
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.8397932816537468,
|
3 |
+
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-169",
|
4 |
+
"epoch": 6.969072164948454,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 169,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.9896907216494846,
|
13 |
+
"eval_accuracy": 0.6356589147286822,
|
14 |
+
"eval_f1": 0.5920563016978556,
|
15 |
+
"eval_loss": 0.980873703956604,
|
16 |
+
"eval_precision": 0.5920482291587493,
|
17 |
+
"eval_recall": 0.6356589147286822,
|
18 |
+
"eval_runtime": 1.6668,
|
19 |
+
"eval_samples_per_second": 232.175,
|
20 |
+
"eval_steps_per_second": 29.397,
|
21 |
+
"step": 24
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 1.9793814432989691,
|
25 |
+
"eval_accuracy": 0.7157622739018088,
|
26 |
+
"eval_f1": 0.6905410405322238,
|
27 |
+
"eval_loss": 0.7444477081298828,
|
28 |
+
"eval_precision": 0.6992377248989063,
|
29 |
+
"eval_recall": 0.7157622739018088,
|
30 |
+
"eval_runtime": 1.6941,
|
31 |
+
"eval_samples_per_second": 228.443,
|
32 |
+
"eval_steps_per_second": 28.924,
|
33 |
+
"step": 48
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"epoch": 2.9690721649484537,
|
37 |
+
"eval_accuracy": 0.7493540051679587,
|
38 |
+
"eval_f1": 0.744898505571463,
|
39 |
+
"eval_loss": 0.6171658039093018,
|
40 |
+
"eval_precision": 0.7437592422989429,
|
41 |
+
"eval_recall": 0.7493540051679587,
|
42 |
+
"eval_runtime": 1.6943,
|
43 |
+
"eval_samples_per_second": 228.408,
|
44 |
+
"eval_steps_per_second": 28.92,
|
45 |
+
"step": 72
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"epoch": 4.0,
|
49 |
+
"eval_accuracy": 0.7984496124031008,
|
50 |
+
"eval_f1": 0.7873621619744228,
|
51 |
+
"eval_loss": 0.5430988073348999,
|
52 |
+
"eval_precision": 0.79180344284319,
|
53 |
+
"eval_recall": 0.7984496124031008,
|
54 |
+
"eval_runtime": 1.7027,
|
55 |
+
"eval_samples_per_second": 227.289,
|
56 |
+
"eval_steps_per_second": 28.778,
|
57 |
+
"step": 97
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"epoch": 4.989690721649485,
|
61 |
+
"eval_accuracy": 0.8010335917312662,
|
62 |
+
"eval_f1": 0.7974946178390901,
|
63 |
+
"eval_loss": 0.5268548130989075,
|
64 |
+
"eval_precision": 0.8005965453214461,
|
65 |
+
"eval_recall": 0.8010335917312662,
|
66 |
+
"eval_runtime": 1.6829,
|
67 |
+
"eval_samples_per_second": 229.957,
|
68 |
+
"eval_steps_per_second": 29.116,
|
69 |
+
"step": 121
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"epoch": 5.979381443298969,
|
73 |
+
"eval_accuracy": 0.7493540051679587,
|
74 |
+
"eval_f1": 0.7551329793037762,
|
75 |
+
"eval_loss": 0.5810549259185791,
|
76 |
+
"eval_precision": 0.7802262423287315,
|
77 |
+
"eval_recall": 0.7493540051679587,
|
78 |
+
"eval_runtime": 1.6999,
|
79 |
+
"eval_samples_per_second": 227.658,
|
80 |
+
"eval_steps_per_second": 28.825,
|
81 |
+
"step": 145
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"epoch": 6.969072164948454,
|
85 |
+
"eval_accuracy": 0.8397932816537468,
|
86 |
+
"eval_f1": 0.8354840008265724,
|
87 |
+
"eval_loss": 0.44080850481987,
|
88 |
+
"eval_precision": 0.8365717854569443,
|
89 |
+
"eval_recall": 0.8397932816537468,
|
90 |
+
"eval_runtime": 1.6785,
|
91 |
+
"eval_samples_per_second": 230.56,
|
92 |
+
"eval_steps_per_second": 29.192,
|
93 |
+
"step": 169
|
94 |
+
}
|
95 |
+
],
|
96 |
+
"logging_steps": 500,
|
97 |
+
"max_steps": 360,
|
98 |
+
"num_input_tokens_seen": 0,
|
99 |
+
"num_train_epochs": 15,
|
100 |
+
"save_steps": 500,
|
101 |
+
"stateful_callbacks": {
|
102 |
+
"EarlyStoppingCallback": {
|
103 |
+
"args": {
|
104 |
+
"early_stopping_patience": 3,
|
105 |
+
"early_stopping_threshold": 0.0
|
106 |
+
},
|
107 |
+
"attributes": {
|
108 |
+
"early_stopping_patience_counter": 0
|
109 |
+
}
|
110 |
+
},
|
111 |
+
"TrainerControl": {
|
112 |
+
"args": {
|
113 |
+
"should_epoch_stop": false,
|
114 |
+
"should_evaluate": false,
|
115 |
+
"should_log": false,
|
116 |
+
"should_save": true,
|
117 |
+
"should_training_stop": false
|
118 |
+
},
|
119 |
+
"attributes": {}
|
120 |
+
}
|
121 |
+
},
|
122 |
+
"total_flos": 2.45957602464e+16,
|
123 |
+
"train_batch_size": 8,
|
124 |
+
"trial_name": null,
|
125 |
+
"trial_params": null
|
126 |
+
}
|
checkpoint-169/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a
|
3 |
+
size 5240
|
checkpoint-194/config.json
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "ntu-spml/distilhubert",
|
3 |
+
"activation_dropout": 0.1,
|
4 |
+
"apply_spec_augment": false,
|
5 |
+
"architectures": [
|
6 |
+
"HubertForSequenceClassification"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.1,
|
9 |
+
"bos_token_id": 1,
|
10 |
+
"classifier_proj_size": 256,
|
11 |
+
"conv_bias": false,
|
12 |
+
"conv_dim": [
|
13 |
+
512,
|
14 |
+
512,
|
15 |
+
512,
|
16 |
+
512,
|
17 |
+
512,
|
18 |
+
512,
|
19 |
+
512
|
20 |
+
],
|
21 |
+
"conv_kernel": [
|
22 |
+
10,
|
23 |
+
3,
|
24 |
+
3,
|
25 |
+
3,
|
26 |
+
3,
|
27 |
+
2,
|
28 |
+
2
|
29 |
+
],
|
30 |
+
"conv_stride": [
|
31 |
+
5,
|
32 |
+
2,
|
33 |
+
2,
|
34 |
+
2,
|
35 |
+
2,
|
36 |
+
2,
|
37 |
+
2
|
38 |
+
],
|
39 |
+
"ctc_loss_reduction": "sum",
|
40 |
+
"ctc_zero_infinity": false,
|
41 |
+
"do_stable_layer_norm": false,
|
42 |
+
"eos_token_id": 2,
|
43 |
+
"feat_extract_activation": "gelu",
|
44 |
+
"feat_extract_norm": "group",
|
45 |
+
"feat_proj_dropout": 0.0,
|
46 |
+
"feat_proj_layer_norm": false,
|
47 |
+
"final_dropout": 0.0,
|
48 |
+
"finetuning_task": "audio-classification",
|
49 |
+
"hidden_act": "gelu",
|
50 |
+
"hidden_dropout": 0.1,
|
51 |
+
"hidden_size": 768,
|
52 |
+
"id2label": {
|
53 |
+
"0": "1s_normal",
|
54 |
+
"1": "1s_pain",
|
55 |
+
"2": "1s_hunger",
|
56 |
+
"3": "1s_asphyxia"
|
57 |
+
},
|
58 |
+
"initializer_range": 0.02,
|
59 |
+
"intermediate_size": 3072,
|
60 |
+
"label2id": {
|
61 |
+
"1s_asphyxia": 3,
|
62 |
+
"1s_hunger": 2,
|
63 |
+
"1s_normal": 0,
|
64 |
+
"1s_pain": 1
|
65 |
+
},
|
66 |
+
"layer_norm_eps": 1e-05,
|
67 |
+
"layerdrop": 0.0,
|
68 |
+
"mask_feature_length": 10,
|
69 |
+
"mask_feature_min_masks": 0,
|
70 |
+
"mask_feature_prob": 0.0,
|
71 |
+
"mask_time_length": 10,
|
72 |
+
"mask_time_min_masks": 2,
|
73 |
+
"mask_time_prob": 0.05,
|
74 |
+
"model_type": "hubert",
|
75 |
+
"num_attention_heads": 12,
|
76 |
+
"num_conv_pos_embedding_groups": 16,
|
77 |
+
"num_conv_pos_embeddings": 128,
|
78 |
+
"num_feat_extract_layers": 7,
|
79 |
+
"num_hidden_layers": 2,
|
80 |
+
"pad_token_id": 0,
|
81 |
+
"torch_dtype": "float32",
|
82 |
+
"transformers_version": "4.44.2",
|
83 |
+
"use_weighted_layer_sum": false,
|
84 |
+
"vocab_size": 32
|
85 |
+
}
|
checkpoint-194/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d03ee65315496952ebb447ef8a2d41c18b1e5e1649887b43e9d670daab42cd79
|
3 |
+
size 94765560
|
checkpoint-194/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21b9a71b29339ce815e0272ac73c5e9382e8e4b94e000d00c2c15c46fcaf1bb1
|
3 |
+
size 189556666
|
checkpoint-194/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a98371a8e546887d8a0cf63952365049758fe889acd42fc72eb734ef3af0332
|
3 |
+
size 14308
|
checkpoint-194/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6cf3f9f9e1b079187c143133062b7476177d05ae3b26e39797b7b8deffda481f
|
3 |
+
size 1064
|
checkpoint-194/trainer_state.json
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.8397932816537468,
|
3 |
+
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-169",
|
4 |
+
"epoch": 8.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 194,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.9896907216494846,
|
13 |
+
"eval_accuracy": 0.6356589147286822,
|
14 |
+
"eval_f1": 0.5920563016978556,
|
15 |
+
"eval_loss": 0.980873703956604,
|
16 |
+
"eval_precision": 0.5920482291587493,
|
17 |
+
"eval_recall": 0.6356589147286822,
|
18 |
+
"eval_runtime": 1.6668,
|
19 |
+
"eval_samples_per_second": 232.175,
|
20 |
+
"eval_steps_per_second": 29.397,
|
21 |
+
"step": 24
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 1.9793814432989691,
|
25 |
+
"eval_accuracy": 0.7157622739018088,
|
26 |
+
"eval_f1": 0.6905410405322238,
|
27 |
+
"eval_loss": 0.7444477081298828,
|
28 |
+
"eval_precision": 0.6992377248989063,
|
29 |
+
"eval_recall": 0.7157622739018088,
|
30 |
+
"eval_runtime": 1.6941,
|
31 |
+
"eval_samples_per_second": 228.443,
|
32 |
+
"eval_steps_per_second": 28.924,
|
33 |
+
"step": 48
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"epoch": 2.9690721649484537,
|
37 |
+
"eval_accuracy": 0.7493540051679587,
|
38 |
+
"eval_f1": 0.744898505571463,
|
39 |
+
"eval_loss": 0.6171658039093018,
|
40 |
+
"eval_precision": 0.7437592422989429,
|
41 |
+
"eval_recall": 0.7493540051679587,
|
42 |
+
"eval_runtime": 1.6943,
|
43 |
+
"eval_samples_per_second": 228.408,
|
44 |
+
"eval_steps_per_second": 28.92,
|
45 |
+
"step": 72
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"epoch": 4.0,
|
49 |
+
"eval_accuracy": 0.7984496124031008,
|
50 |
+
"eval_f1": 0.7873621619744228,
|
51 |
+
"eval_loss": 0.5430988073348999,
|
52 |
+
"eval_precision": 0.79180344284319,
|
53 |
+
"eval_recall": 0.7984496124031008,
|
54 |
+
"eval_runtime": 1.7027,
|
55 |
+
"eval_samples_per_second": 227.289,
|
56 |
+
"eval_steps_per_second": 28.778,
|
57 |
+
"step": 97
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"epoch": 4.989690721649485,
|
61 |
+
"eval_accuracy": 0.8010335917312662,
|
62 |
+
"eval_f1": 0.7974946178390901,
|
63 |
+
"eval_loss": 0.5268548130989075,
|
64 |
+
"eval_precision": 0.8005965453214461,
|
65 |
+
"eval_recall": 0.8010335917312662,
|
66 |
+
"eval_runtime": 1.6829,
|
67 |
+
"eval_samples_per_second": 229.957,
|
68 |
+
"eval_steps_per_second": 29.116,
|
69 |
+
"step": 121
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"epoch": 5.979381443298969,
|
73 |
+
"eval_accuracy": 0.7493540051679587,
|
74 |
+
"eval_f1": 0.7551329793037762,
|
75 |
+
"eval_loss": 0.5810549259185791,
|
76 |
+
"eval_precision": 0.7802262423287315,
|
77 |
+
"eval_recall": 0.7493540051679587,
|
78 |
+
"eval_runtime": 1.6999,
|
79 |
+
"eval_samples_per_second": 227.658,
|
80 |
+
"eval_steps_per_second": 28.825,
|
81 |
+
"step": 145
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"epoch": 6.969072164948454,
|
85 |
+
"eval_accuracy": 0.8397932816537468,
|
86 |
+
"eval_f1": 0.8354840008265724,
|
87 |
+
"eval_loss": 0.44080850481987,
|
88 |
+
"eval_precision": 0.8365717854569443,
|
89 |
+
"eval_recall": 0.8397932816537468,
|
90 |
+
"eval_runtime": 1.6785,
|
91 |
+
"eval_samples_per_second": 230.56,
|
92 |
+
"eval_steps_per_second": 29.192,
|
93 |
+
"step": 169
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 8.0,
|
97 |
+
"eval_accuracy": 0.8191214470284238,
|
98 |
+
"eval_f1": 0.8167837576120855,
|
99 |
+
"eval_loss": 0.46871018409729004,
|
100 |
+
"eval_precision": 0.8187643627479353,
|
101 |
+
"eval_recall": 0.8191214470284238,
|
102 |
+
"eval_runtime": 1.6719,
|
103 |
+
"eval_samples_per_second": 231.477,
|
104 |
+
"eval_steps_per_second": 29.308,
|
105 |
+
"step": 194
|
106 |
+
}
|
107 |
+
],
|
108 |
+
"logging_steps": 500,
|
109 |
+
"max_steps": 360,
|
110 |
+
"num_input_tokens_seen": 0,
|
111 |
+
"num_train_epochs": 15,
|
112 |
+
"save_steps": 500,
|
113 |
+
"stateful_callbacks": {
|
114 |
+
"EarlyStoppingCallback": {
|
115 |
+
"args": {
|
116 |
+
"early_stopping_patience": 3,
|
117 |
+
"early_stopping_threshold": 0.0
|
118 |
+
},
|
119 |
+
"attributes": {
|
120 |
+
"early_stopping_patience_counter": 0
|
121 |
+
}
|
122 |
+
},
|
123 |
+
"TrainerControl": {
|
124 |
+
"args": {
|
125 |
+
"should_epoch_stop": false,
|
126 |
+
"should_evaluate": false,
|
127 |
+
"should_log": false,
|
128 |
+
"should_save": true,
|
129 |
+
"should_training_stop": false
|
130 |
+
},
|
131 |
+
"attributes": {}
|
132 |
+
}
|
133 |
+
},
|
134 |
+
"total_flos": 2.81094402816e+16,
|
135 |
+
"train_batch_size": 8,
|
136 |
+
"trial_name": null,
|
137 |
+
"trial_params": null
|
138 |
+
}
|
checkpoint-194/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a
|
3 |
+
size 5240
|
checkpoint-218/config.json
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "ntu-spml/distilhubert",
|
3 |
+
"activation_dropout": 0.1,
|
4 |
+
"apply_spec_augment": false,
|
5 |
+
"architectures": [
|
6 |
+
"HubertForSequenceClassification"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.1,
|
9 |
+
"bos_token_id": 1,
|
10 |
+
"classifier_proj_size": 256,
|
11 |
+
"conv_bias": false,
|
12 |
+
"conv_dim": [
|
13 |
+
512,
|
14 |
+
512,
|
15 |
+
512,
|
16 |
+
512,
|
17 |
+
512,
|
18 |
+
512,
|
19 |
+
512
|
20 |
+
],
|
21 |
+
"conv_kernel": [
|
22 |
+
10,
|
23 |
+
3,
|
24 |
+
3,
|
25 |
+
3,
|
26 |
+
3,
|
27 |
+
2,
|
28 |
+
2
|
29 |
+
],
|
30 |
+
"conv_stride": [
|
31 |
+
5,
|
32 |
+
2,
|
33 |
+
2,
|
34 |
+
2,
|
35 |
+
2,
|
36 |
+
2,
|
37 |
+
2
|
38 |
+
],
|
39 |
+
"ctc_loss_reduction": "sum",
|
40 |
+
"ctc_zero_infinity": false,
|
41 |
+
"do_stable_layer_norm": false,
|
42 |
+
"eos_token_id": 2,
|
43 |
+
"feat_extract_activation": "gelu",
|
44 |
+
"feat_extract_norm": "group",
|
45 |
+
"feat_proj_dropout": 0.0,
|
46 |
+
"feat_proj_layer_norm": false,
|
47 |
+
"final_dropout": 0.0,
|
48 |
+
"finetuning_task": "audio-classification",
|
49 |
+
"hidden_act": "gelu",
|
50 |
+
"hidden_dropout": 0.1,
|
51 |
+
"hidden_size": 768,
|
52 |
+
"id2label": {
|
53 |
+
"0": "1s_normal",
|
54 |
+
"1": "1s_pain",
|
55 |
+
"2": "1s_hunger",
|
56 |
+
"3": "1s_asphyxia"
|
57 |
+
},
|
58 |
+
"initializer_range": 0.02,
|
59 |
+
"intermediate_size": 3072,
|
60 |
+
"label2id": {
|
61 |
+
"1s_asphyxia": 3,
|
62 |
+
"1s_hunger": 2,
|
63 |
+
"1s_normal": 0,
|
64 |
+
"1s_pain": 1
|
65 |
+
},
|
66 |
+
"layer_norm_eps": 1e-05,
|
67 |
+
"layerdrop": 0.0,
|
68 |
+
"mask_feature_length": 10,
|
69 |
+
"mask_feature_min_masks": 0,
|
70 |
+
"mask_feature_prob": 0.0,
|
71 |
+
"mask_time_length": 10,
|
72 |
+
"mask_time_min_masks": 2,
|
73 |
+
"mask_time_prob": 0.05,
|
74 |
+
"model_type": "hubert",
|
75 |
+
"num_attention_heads": 12,
|
76 |
+
"num_conv_pos_embedding_groups": 16,
|
77 |
+
"num_conv_pos_embeddings": 128,
|
78 |
+
"num_feat_extract_layers": 7,
|
79 |
+
"num_hidden_layers": 2,
|
80 |
+
"pad_token_id": 0,
|
81 |
+
"torch_dtype": "float32",
|
82 |
+
"transformers_version": "4.44.2",
|
83 |
+
"use_weighted_layer_sum": false,
|
84 |
+
"vocab_size": 32
|
85 |
+
}
|
checkpoint-218/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f22baf06235c468452afb48784bcfe17b8d6b16547f92c897a6b83b9d4be1927
|
3 |
+
size 94765560
|
checkpoint-218/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b107c786ae699e053540620707226aced653806a534f7bf2c3d041d5a799d0d
|
3 |
+
size 189556666
|
checkpoint-218/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87f5981b8629d20da8d7bb452f1f8e311534ac9a1fbf9cf6ca9f99f281c14bfa
|
3 |
+
size 14308
|
checkpoint-218/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e51016c1626b13f8e0c2274df9c63eb74ef480d103cd1eaea425e0978c8cf392
|
3 |
+
size 1064
|
checkpoint-218/trainer_state.json
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.8475452196382429,
|
3 |
+
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-218",
|
4 |
+
"epoch": 8.989690721649485,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 218,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.9896907216494846,
|
13 |
+
"eval_accuracy": 0.6356589147286822,
|
14 |
+
"eval_f1": 0.5920563016978556,
|
15 |
+
"eval_loss": 0.980873703956604,
|
16 |
+
"eval_precision": 0.5920482291587493,
|
17 |
+
"eval_recall": 0.6356589147286822,
|
18 |
+
"eval_runtime": 1.6668,
|
19 |
+
"eval_samples_per_second": 232.175,
|
20 |
+
"eval_steps_per_second": 29.397,
|
21 |
+
"step": 24
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 1.9793814432989691,
|
25 |
+
"eval_accuracy": 0.7157622739018088,
|
26 |
+
"eval_f1": 0.6905410405322238,
|
27 |
+
"eval_loss": 0.7444477081298828,
|
28 |
+
"eval_precision": 0.6992377248989063,
|
29 |
+
"eval_recall": 0.7157622739018088,
|
30 |
+
"eval_runtime": 1.6941,
|
31 |
+
"eval_samples_per_second": 228.443,
|
32 |
+
"eval_steps_per_second": 28.924,
|
33 |
+
"step": 48
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"epoch": 2.9690721649484537,
|
37 |
+
"eval_accuracy": 0.7493540051679587,
|
38 |
+
"eval_f1": 0.744898505571463,
|
39 |
+
"eval_loss": 0.6171658039093018,
|
40 |
+
"eval_precision": 0.7437592422989429,
|
41 |
+
"eval_recall": 0.7493540051679587,
|
42 |
+
"eval_runtime": 1.6943,
|
43 |
+
"eval_samples_per_second": 228.408,
|
44 |
+
"eval_steps_per_second": 28.92,
|
45 |
+
"step": 72
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"epoch": 4.0,
|
49 |
+
"eval_accuracy": 0.7984496124031008,
|
50 |
+
"eval_f1": 0.7873621619744228,
|
51 |
+
"eval_loss": 0.5430988073348999,
|
52 |
+
"eval_precision": 0.79180344284319,
|
53 |
+
"eval_recall": 0.7984496124031008,
|
54 |
+
"eval_runtime": 1.7027,
|
55 |
+
"eval_samples_per_second": 227.289,
|
56 |
+
"eval_steps_per_second": 28.778,
|
57 |
+
"step": 97
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"epoch": 4.989690721649485,
|
61 |
+
"eval_accuracy": 0.8010335917312662,
|
62 |
+
"eval_f1": 0.7974946178390901,
|
63 |
+
"eval_loss": 0.5268548130989075,
|
64 |
+
"eval_precision": 0.8005965453214461,
|
65 |
+
"eval_recall": 0.8010335917312662,
|
66 |
+
"eval_runtime": 1.6829,
|
67 |
+
"eval_samples_per_second": 229.957,
|
68 |
+
"eval_steps_per_second": 29.116,
|
69 |
+
"step": 121
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"epoch": 5.979381443298969,
|
73 |
+
"eval_accuracy": 0.7493540051679587,
|
74 |
+
"eval_f1": 0.7551329793037762,
|
75 |
+
"eval_loss": 0.5810549259185791,
|
76 |
+
"eval_precision": 0.7802262423287315,
|
77 |
+
"eval_recall": 0.7493540051679587,
|
78 |
+
"eval_runtime": 1.6999,
|
79 |
+
"eval_samples_per_second": 227.658,
|
80 |
+
"eval_steps_per_second": 28.825,
|
81 |
+
"step": 145
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"epoch": 6.969072164948454,
|
85 |
+
"eval_accuracy": 0.8397932816537468,
|
86 |
+
"eval_f1": 0.8354840008265724,
|
87 |
+
"eval_loss": 0.44080850481987,
|
88 |
+
"eval_precision": 0.8365717854569443,
|
89 |
+
"eval_recall": 0.8397932816537468,
|
90 |
+
"eval_runtime": 1.6785,
|
91 |
+
"eval_samples_per_second": 230.56,
|
92 |
+
"eval_steps_per_second": 29.192,
|
93 |
+
"step": 169
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 8.0,
|
97 |
+
"eval_accuracy": 0.8191214470284238,
|
98 |
+
"eval_f1": 0.8167837576120855,
|
99 |
+
"eval_loss": 0.46871018409729004,
|
100 |
+
"eval_precision": 0.8187643627479353,
|
101 |
+
"eval_recall": 0.8191214470284238,
|
102 |
+
"eval_runtime": 1.6719,
|
103 |
+
"eval_samples_per_second": 231.477,
|
104 |
+
"eval_steps_per_second": 29.308,
|
105 |
+
"step": 194
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"epoch": 8.989690721649485,
|
109 |
+
"eval_accuracy": 0.8475452196382429,
|
110 |
+
"eval_f1": 0.8473710740005564,
|
111 |
+
"eval_loss": 0.4363822937011719,
|
112 |
+
"eval_precision": 0.8483238707679635,
|
113 |
+
"eval_recall": 0.8475452196382429,
|
114 |
+
"eval_runtime": 1.6838,
|
115 |
+
"eval_samples_per_second": 229.835,
|
116 |
+
"eval_steps_per_second": 29.101,
|
117 |
+
"step": 218
|
118 |
+
}
|
119 |
+
],
|
120 |
+
"logging_steps": 500,
|
121 |
+
"max_steps": 360,
|
122 |
+
"num_input_tokens_seen": 0,
|
123 |
+
"num_train_epochs": 15,
|
124 |
+
"save_steps": 500,
|
125 |
+
"stateful_callbacks": {
|
126 |
+
"EarlyStoppingCallback": {
|
127 |
+
"args": {
|
128 |
+
"early_stopping_patience": 3,
|
129 |
+
"early_stopping_threshold": 0.0
|
130 |
+
},
|
131 |
+
"attributes": {
|
132 |
+
"early_stopping_patience_counter": 0
|
133 |
+
}
|
134 |
+
},
|
135 |
+
"TrainerControl": {
|
136 |
+
"args": {
|
137 |
+
"should_epoch_stop": false,
|
138 |
+
"should_evaluate": false,
|
139 |
+
"should_log": false,
|
140 |
+
"should_save": true,
|
141 |
+
"should_training_stop": false
|
142 |
+
},
|
143 |
+
"attributes": {}
|
144 |
+
}
|
145 |
+
},
|
146 |
+
"total_flos": 3.16231203168e+16,
|
147 |
+
"train_batch_size": 8,
|
148 |
+
"trial_name": null,
|
149 |
+
"trial_params": null
|
150 |
+
}
|
checkpoint-218/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a
|
3 |
+
size 5240
|
checkpoint-24/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 94765560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a3bee6833ba8b12da8d51850147bb3c3b153eec346dfd3b0fda4ff730aa9026
|
3 |
size 94765560
|
checkpoint-24/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 189556666
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:856d9a47a6db002c6e55d97a99db231cee10d1fef5c0909f7d527b86c30af746
|
3 |
size 189556666
|
checkpoint-24/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14308
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:202bb63ca18cd0b022bafcf6cf19faa822d2e1dbc2cdaf6b5a9bdcbe21ca7562
|
3 |
size 14308
|
checkpoint-24/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a45410ab60db58a246b1af794984e39ea7655e4cee627c4cdfe5bd40727f72a
|
3 |
size 1064
|
checkpoint-24/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-24",
|
4 |
"epoch": 0.9896907216494846,
|
5 |
"eval_steps": 500,
|
@@ -10,21 +10,21 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.9896907216494846,
|
13 |
-
"eval_accuracy": 0.
|
14 |
-
"eval_f1": 0.
|
15 |
-
"eval_loss":
|
16 |
-
"eval_precision": 0.
|
17 |
-
"eval_recall": 0.
|
18 |
-
"eval_runtime": 1.
|
19 |
-
"eval_samples_per_second": 232.
|
20 |
-
"eval_steps_per_second": 29.
|
21 |
"step": 24
|
22 |
}
|
23 |
],
|
24 |
"logging_steps": 500,
|
25 |
-
"max_steps":
|
26 |
"num_input_tokens_seen": 0,
|
27 |
-
"num_train_epochs":
|
28 |
"save_steps": 500,
|
29 |
"stateful_callbacks": {
|
30 |
"EarlyStoppingCallback": {
|
@@ -42,12 +42,12 @@
|
|
42 |
"should_evaluate": false,
|
43 |
"should_log": false,
|
44 |
"should_save": true,
|
45 |
-
"should_training_stop":
|
46 |
},
|
47 |
"attributes": {}
|
48 |
}
|
49 |
},
|
50 |
-
"total_flos":
|
51 |
"train_batch_size": 8,
|
52 |
"trial_name": null,
|
53 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.6356589147286822,
|
3 |
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-24",
|
4 |
"epoch": 0.9896907216494846,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.9896907216494846,
|
13 |
+
"eval_accuracy": 0.6356589147286822,
|
14 |
+
"eval_f1": 0.5920563016978556,
|
15 |
+
"eval_loss": 0.980873703956604,
|
16 |
+
"eval_precision": 0.5920482291587493,
|
17 |
+
"eval_recall": 0.6356589147286822,
|
18 |
+
"eval_runtime": 1.6668,
|
19 |
+
"eval_samples_per_second": 232.175,
|
20 |
+
"eval_steps_per_second": 29.397,
|
21 |
"step": 24
|
22 |
}
|
23 |
],
|
24 |
"logging_steps": 500,
|
25 |
+
"max_steps": 360,
|
26 |
"num_input_tokens_seen": 0,
|
27 |
+
"num_train_epochs": 15,
|
28 |
"save_steps": 500,
|
29 |
"stateful_callbacks": {
|
30 |
"EarlyStoppingCallback": {
|
|
|
42 |
"should_evaluate": false,
|
43 |
"should_log": false,
|
44 |
"should_save": true,
|
45 |
+
"should_training_stop": false
|
46 |
},
|
47 |
"attributes": {}
|
48 |
}
|
49 |
},
|
50 |
+
"total_flos": 3513680035200000.0,
|
51 |
"train_batch_size": 8,
|
52 |
"trial_name": null,
|
53 |
"trial_params": null
|
checkpoint-24/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a
|
3 |
size 5240
|
checkpoint-242/config.json
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "ntu-spml/distilhubert",
|
3 |
+
"activation_dropout": 0.1,
|
4 |
+
"apply_spec_augment": false,
|
5 |
+
"architectures": [
|
6 |
+
"HubertForSequenceClassification"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.1,
|
9 |
+
"bos_token_id": 1,
|
10 |
+
"classifier_proj_size": 256,
|
11 |
+
"conv_bias": false,
|
12 |
+
"conv_dim": [
|
13 |
+
512,
|
14 |
+
512,
|
15 |
+
512,
|
16 |
+
512,
|
17 |
+
512,
|
18 |
+
512,
|
19 |
+
512
|
20 |
+
],
|
21 |
+
"conv_kernel": [
|
22 |
+
10,
|
23 |
+
3,
|
24 |
+
3,
|
25 |
+
3,
|
26 |
+
3,
|
27 |
+
2,
|
28 |
+
2
|
29 |
+
],
|
30 |
+
"conv_stride": [
|
31 |
+
5,
|
32 |
+
2,
|
33 |
+
2,
|
34 |
+
2,
|
35 |
+
2,
|
36 |
+
2,
|
37 |
+
2
|
38 |
+
],
|
39 |
+
"ctc_loss_reduction": "sum",
|
40 |
+
"ctc_zero_infinity": false,
|
41 |
+
"do_stable_layer_norm": false,
|
42 |
+
"eos_token_id": 2,
|
43 |
+
"feat_extract_activation": "gelu",
|
44 |
+
"feat_extract_norm": "group",
|
45 |
+
"feat_proj_dropout": 0.0,
|
46 |
+
"feat_proj_layer_norm": false,
|
47 |
+
"final_dropout": 0.0,
|
48 |
+
"finetuning_task": "audio-classification",
|
49 |
+
"hidden_act": "gelu",
|
50 |
+
"hidden_dropout": 0.1,
|
51 |
+
"hidden_size": 768,
|
52 |
+
"id2label": {
|
53 |
+
"0": "1s_normal",
|
54 |
+
"1": "1s_pain",
|
55 |
+
"2": "1s_hunger",
|
56 |
+
"3": "1s_asphyxia"
|
57 |
+
},
|
58 |
+
"initializer_range": 0.02,
|
59 |
+
"intermediate_size": 3072,
|
60 |
+
"label2id": {
|
61 |
+
"1s_asphyxia": 3,
|
62 |
+
"1s_hunger": 2,
|
63 |
+
"1s_normal": 0,
|
64 |
+
"1s_pain": 1
|
65 |
+
},
|
66 |
+
"layer_norm_eps": 1e-05,
|
67 |
+
"layerdrop": 0.0,
|
68 |
+
"mask_feature_length": 10,
|
69 |
+
"mask_feature_min_masks": 0,
|
70 |
+
"mask_feature_prob": 0.0,
|
71 |
+
"mask_time_length": 10,
|
72 |
+
"mask_time_min_masks": 2,
|
73 |
+
"mask_time_prob": 0.05,
|
74 |
+
"model_type": "hubert",
|
75 |
+
"num_attention_heads": 12,
|
76 |
+
"num_conv_pos_embedding_groups": 16,
|
77 |
+
"num_conv_pos_embeddings": 128,
|
78 |
+
"num_feat_extract_layers": 7,
|
79 |
+
"num_hidden_layers": 2,
|
80 |
+
"pad_token_id": 0,
|
81 |
+
"torch_dtype": "float32",
|
82 |
+
"transformers_version": "4.44.2",
|
83 |
+
"use_weighted_layer_sum": false,
|
84 |
+
"vocab_size": 32
|
85 |
+
}
|
checkpoint-242/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c3c3acfb39a1584bdad8c4452c6a99cc55d03b67d4754ef55bdc31be94eb5b7
|
3 |
+
size 94765560
|
checkpoint-242/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:529f05dfd1fdfa07d440221559587dda8eded6dd4e7f89fd777f0ac7f24f5fee
|
3 |
+
size 189556666
|
checkpoint-242/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e731e4993f8e25825c268c79f8c5e714e7bddf10c95d3c48fd5bec091ada0032
|
3 |
+
size 14308
|
checkpoint-242/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:781bb61f6baa40042311c990cc82713d83cdc179ccb43dbb9cdd148961a2e8ad
|
3 |
+
size 1064
|
checkpoint-242/trainer_state.json
ADDED
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.8578811369509044,
|
3 |
+
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-242",
|
4 |
+
"epoch": 9.97938144329897,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 242,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.9896907216494846,
|
13 |
+
"eval_accuracy": 0.6356589147286822,
|
14 |
+
"eval_f1": 0.5920563016978556,
|
15 |
+
"eval_loss": 0.980873703956604,
|
16 |
+
"eval_precision": 0.5920482291587493,
|
17 |
+
"eval_recall": 0.6356589147286822,
|
18 |
+
"eval_runtime": 1.6668,
|
19 |
+
"eval_samples_per_second": 232.175,
|
20 |
+
"eval_steps_per_second": 29.397,
|
21 |
+
"step": 24
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 1.9793814432989691,
|
25 |
+
"eval_accuracy": 0.7157622739018088,
|
26 |
+
"eval_f1": 0.6905410405322238,
|
27 |
+
"eval_loss": 0.7444477081298828,
|
28 |
+
"eval_precision": 0.6992377248989063,
|
29 |
+
"eval_recall": 0.7157622739018088,
|
30 |
+
"eval_runtime": 1.6941,
|
31 |
+
"eval_samples_per_second": 228.443,
|
32 |
+
"eval_steps_per_second": 28.924,
|
33 |
+
"step": 48
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"epoch": 2.9690721649484537,
|
37 |
+
"eval_accuracy": 0.7493540051679587,
|
38 |
+
"eval_f1": 0.744898505571463,
|
39 |
+
"eval_loss": 0.6171658039093018,
|
40 |
+
"eval_precision": 0.7437592422989429,
|
41 |
+
"eval_recall": 0.7493540051679587,
|
42 |
+
"eval_runtime": 1.6943,
|
43 |
+
"eval_samples_per_second": 228.408,
|
44 |
+
"eval_steps_per_second": 28.92,
|
45 |
+
"step": 72
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"epoch": 4.0,
|
49 |
+
"eval_accuracy": 0.7984496124031008,
|
50 |
+
"eval_f1": 0.7873621619744228,
|
51 |
+
"eval_loss": 0.5430988073348999,
|
52 |
+
"eval_precision": 0.79180344284319,
|
53 |
+
"eval_recall": 0.7984496124031008,
|
54 |
+
"eval_runtime": 1.7027,
|
55 |
+
"eval_samples_per_second": 227.289,
|
56 |
+
"eval_steps_per_second": 28.778,
|
57 |
+
"step": 97
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"epoch": 4.989690721649485,
|
61 |
+
"eval_accuracy": 0.8010335917312662,
|
62 |
+
"eval_f1": 0.7974946178390901,
|
63 |
+
"eval_loss": 0.5268548130989075,
|
64 |
+
"eval_precision": 0.8005965453214461,
|
65 |
+
"eval_recall": 0.8010335917312662,
|
66 |
+
"eval_runtime": 1.6829,
|
67 |
+
"eval_samples_per_second": 229.957,
|
68 |
+
"eval_steps_per_second": 29.116,
|
69 |
+
"step": 121
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"epoch": 5.979381443298969,
|
73 |
+
"eval_accuracy": 0.7493540051679587,
|
74 |
+
"eval_f1": 0.7551329793037762,
|
75 |
+
"eval_loss": 0.5810549259185791,
|
76 |
+
"eval_precision": 0.7802262423287315,
|
77 |
+
"eval_recall": 0.7493540051679587,
|
78 |
+
"eval_runtime": 1.6999,
|
79 |
+
"eval_samples_per_second": 227.658,
|
80 |
+
"eval_steps_per_second": 28.825,
|
81 |
+
"step": 145
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"epoch": 6.969072164948454,
|
85 |
+
"eval_accuracy": 0.8397932816537468,
|
86 |
+
"eval_f1": 0.8354840008265724,
|
87 |
+
"eval_loss": 0.44080850481987,
|
88 |
+
"eval_precision": 0.8365717854569443,
|
89 |
+
"eval_recall": 0.8397932816537468,
|
90 |
+
"eval_runtime": 1.6785,
|
91 |
+
"eval_samples_per_second": 230.56,
|
92 |
+
"eval_steps_per_second": 29.192,
|
93 |
+
"step": 169
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 8.0,
|
97 |
+
"eval_accuracy": 0.8191214470284238,
|
98 |
+
"eval_f1": 0.8167837576120855,
|
99 |
+
"eval_loss": 0.46871018409729004,
|
100 |
+
"eval_precision": 0.8187643627479353,
|
101 |
+
"eval_recall": 0.8191214470284238,
|
102 |
+
"eval_runtime": 1.6719,
|
103 |
+
"eval_samples_per_second": 231.477,
|
104 |
+
"eval_steps_per_second": 29.308,
|
105 |
+
"step": 194
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"epoch": 8.989690721649485,
|
109 |
+
"eval_accuracy": 0.8475452196382429,
|
110 |
+
"eval_f1": 0.8473710740005564,
|
111 |
+
"eval_loss": 0.4363822937011719,
|
112 |
+
"eval_precision": 0.8483238707679635,
|
113 |
+
"eval_recall": 0.8475452196382429,
|
114 |
+
"eval_runtime": 1.6838,
|
115 |
+
"eval_samples_per_second": 229.835,
|
116 |
+
"eval_steps_per_second": 29.101,
|
117 |
+
"step": 218
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"epoch": 9.97938144329897,
|
121 |
+
"eval_accuracy": 0.8578811369509044,
|
122 |
+
"eval_f1": 0.8567532661685897,
|
123 |
+
"eval_loss": 0.42906680703163147,
|
124 |
+
"eval_precision": 0.8560504853170988,
|
125 |
+
"eval_recall": 0.8578811369509044,
|
126 |
+
"eval_runtime": 1.6875,
|
127 |
+
"eval_samples_per_second": 229.333,
|
128 |
+
"eval_steps_per_second": 29.037,
|
129 |
+
"step": 242
|
130 |
+
}
|
131 |
+
],
|
132 |
+
"logging_steps": 500,
|
133 |
+
"max_steps": 360,
|
134 |
+
"num_input_tokens_seen": 0,
|
135 |
+
"num_train_epochs": 15,
|
136 |
+
"save_steps": 500,
|
137 |
+
"stateful_callbacks": {
|
138 |
+
"EarlyStoppingCallback": {
|
139 |
+
"args": {
|
140 |
+
"early_stopping_patience": 3,
|
141 |
+
"early_stopping_threshold": 0.0
|
142 |
+
},
|
143 |
+
"attributes": {
|
144 |
+
"early_stopping_patience_counter": 0
|
145 |
+
}
|
146 |
+
},
|
147 |
+
"TrainerControl": {
|
148 |
+
"args": {
|
149 |
+
"should_epoch_stop": false,
|
150 |
+
"should_evaluate": false,
|
151 |
+
"should_log": false,
|
152 |
+
"should_save": true,
|
153 |
+
"should_training_stop": false
|
154 |
+
},
|
155 |
+
"attributes": {}
|
156 |
+
}
|
157 |
+
},
|
158 |
+
"total_flos": 3.5136800352e+16,
|
159 |
+
"train_batch_size": 8,
|
160 |
+
"trial_name": null,
|
161 |
+
"trial_params": null
|
162 |
+
}
|
checkpoint-242/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a
|
3 |
+
size 5240
|
checkpoint-266/config.json
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "ntu-spml/distilhubert",
|
3 |
+
"activation_dropout": 0.1,
|
4 |
+
"apply_spec_augment": false,
|
5 |
+
"architectures": [
|
6 |
+
"HubertForSequenceClassification"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.1,
|
9 |
+
"bos_token_id": 1,
|
10 |
+
"classifier_proj_size": 256,
|
11 |
+
"conv_bias": false,
|
12 |
+
"conv_dim": [
|
13 |
+
512,
|
14 |
+
512,
|
15 |
+
512,
|
16 |
+
512,
|
17 |
+
512,
|
18 |
+
512,
|
19 |
+
512
|
20 |
+
],
|
21 |
+
"conv_kernel": [
|
22 |
+
10,
|
23 |
+
3,
|
24 |
+
3,
|
25 |
+
3,
|
26 |
+
3,
|
27 |
+
2,
|
28 |
+
2
|
29 |
+
],
|
30 |
+
"conv_stride": [
|
31 |
+
5,
|
32 |
+
2,
|
33 |
+
2,
|
34 |
+
2,
|
35 |
+
2,
|
36 |
+
2,
|
37 |
+
2
|
38 |
+
],
|
39 |
+
"ctc_loss_reduction": "sum",
|
40 |
+
"ctc_zero_infinity": false,
|
41 |
+
"do_stable_layer_norm": false,
|
42 |
+
"eos_token_id": 2,
|
43 |
+
"feat_extract_activation": "gelu",
|
44 |
+
"feat_extract_norm": "group",
|
45 |
+
"feat_proj_dropout": 0.0,
|
46 |
+
"feat_proj_layer_norm": false,
|
47 |
+
"final_dropout": 0.0,
|
48 |
+
"finetuning_task": "audio-classification",
|
49 |
+
"hidden_act": "gelu",
|
50 |
+
"hidden_dropout": 0.1,
|
51 |
+
"hidden_size": 768,
|
52 |
+
"id2label": {
|
53 |
+
"0": "1s_normal",
|
54 |
+
"1": "1s_pain",
|
55 |
+
"2": "1s_hunger",
|
56 |
+
"3": "1s_asphyxia"
|
57 |
+
},
|
58 |
+
"initializer_range": 0.02,
|
59 |
+
"intermediate_size": 3072,
|
60 |
+
"label2id": {
|
61 |
+
"1s_asphyxia": 3,
|
62 |
+
"1s_hunger": 2,
|
63 |
+
"1s_normal": 0,
|
64 |
+
"1s_pain": 1
|
65 |
+
},
|
66 |
+
"layer_norm_eps": 1e-05,
|
67 |
+
"layerdrop": 0.0,
|
68 |
+
"mask_feature_length": 10,
|
69 |
+
"mask_feature_min_masks": 0,
|
70 |
+
"mask_feature_prob": 0.0,
|
71 |
+
"mask_time_length": 10,
|
72 |
+
"mask_time_min_masks": 2,
|
73 |
+
"mask_time_prob": 0.05,
|
74 |
+
"model_type": "hubert",
|
75 |
+
"num_attention_heads": 12,
|
76 |
+
"num_conv_pos_embedding_groups": 16,
|
77 |
+
"num_conv_pos_embeddings": 128,
|
78 |
+
"num_feat_extract_layers": 7,
|
79 |
+
"num_hidden_layers": 2,
|
80 |
+
"pad_token_id": 0,
|
81 |
+
"torch_dtype": "float32",
|
82 |
+
"transformers_version": "4.44.2",
|
83 |
+
"use_weighted_layer_sum": false,
|
84 |
+
"vocab_size": 32
|
85 |
+
}
|
checkpoint-266/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b40af64761e85d5fea70c649217c8cc140a31cb9ca6b3839f952a2bb9f87e0b2
|
3 |
+
size 94765560
|
checkpoint-266/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2157435343c02544baabe61713043b7b3749a01b1cc4aeeb2340f89faf040460
|
3 |
+
size 189556666
|