File size: 2,659 Bytes
a032f99
 
53f6532
 
a032f99
1d21972
0f6f358
53f6532
 
0f6f358
 
53f6532
a032f99
53f6532
 
 
a032f99
 
 
53f6532
a032f99
 
 
 
 
53f6532
 
 
 
 
 
 
 
 
a032f99
 
 
53f6532
 
a032f99
0f6f358
 
53f6532
 
0f6f358
 
53f6532
a032f99
0f6f358
 
53f6532
a032f99
 
 
53f6532
a032f99
 
53f6532
 
 
 
 
 
 
 
 
 
 
a032f99
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
{
    "mon": {
        "dataset_path": "data/baby_cry_detection",
        "output_dir": "distilhubert-finetuned-cry-detector",
        "training_args": {
            "num_train_epochs": 10, 
            "learning_rate": 3e-5,
            "warmup_ratio": 0.1,
            "output_dir": "distilhubert-finetuned-cry-detector",
            "eval_strategy": "epoch",
            "save_strategy": "epoch",
            "lr_scheduler_type": "cosine_with_restarts",
            "auto_find_batch_size": true,
            "per_device_train_batch_size": 64,
            "per_device_eval_batch_size": 64,
            "gradient_accumulation_steps": 2,
            "gradient_checkpointing": true,
            "load_best_model_at_end": true,
            "greater_is_better": true,
            "metric_for_best_model": "eval_f1",
            "optim": "adamw_torch",
            "hub_strategy": "checkpoint",
            "report_to": "tensorboard",
            "full_determinism": true,
            "seed": 123,
            "data_seed":123,
            "max_grad_norm": 0.5,
            "weight_decay": 0.02,
            "fp16": true,
            "eval_accumulation_steps": 1,
            "dataloader_num_workers": 12,
            "dataloader_pin_memory": true,
            "label_smoothing_factor": 0.1,
            "length_column_name": "input_length"
        }
    },
    "class": {
        "dataset_path": "data/mixed_data",
        "output_dir": "distilhubert-finetuned-mixed-data",
        "training_args": {
            "num_train_epochs": 45,
            "learning_rate": 8e-4,
            "warmup_ratio": 0.1,
            "output_dir": "distilhubert-finetuned-mixed-data",
            "eval_strategy": "epoch",
            "save_strategy": "epoch",
            "lr_scheduler_type": "cosine_with_restarts",
            "auto_find_batch_size": true,
            "per_device_train_batch_size": 128,
            "per_device_eval_batch_size": 128,
            "gradient_accumulation_steps": 2,
            "gradient_checkpointing": true,
            "load_best_model_at_end": true,
            "greater_is_better": true,
            "metric_for_best_model": "eval_f1",
            "optim": "adamw_torch",
            "seed": 123,
            "data_seed": 123,
            "max_grad_norm": 0.5,
            "weight_decay": 0.02,
            "fp16": true,
            "eval_accumulation_steps": 1,
            "dataloader_num_workers": 12,
            "dataloader_pin_memory": true,
            "label_smoothing_factor": 0.1,
            "length_column_name": "input_length",
            "hub_strategy": "checkpoint",
            "report_to": "tensorboard"
        }
    }
}