File size: 1,762 Bytes
6d5a4c8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
{
"batching": {
"batch_size": 64,
"max_tokens": 1024,
"sampling_smoothing": 1,
"sort_by_size": true
},
"decoders": {
"classification": {},
"default_decoder": {
"layers_to_use": [
-1
],
"loss_weight": 1,
"metric": "accuracy",
"topn": 1
},
"dependency": {
"arc_representation_dim": 768,
"metric": "las",
"tag_representation_dim": 256
},
"mlm": {
"metric": "perplexity"
},
"multiclas": {
"metric": "multi_acc",
"threshold": 0.7
},
"multiseq": {
"metric": "multi_acc",
"threshold": 0.7
},
"regression": {
"metric": "avg_dist"
},
"seq": {},
"seq_bio": {
"metric": "span_f1"
},
"string2string": {},
"tok": {
"pre_split": true
}
},
"default_dec_dataset_embeds_dim": 12,
"encoder": {
"dropout": 0.2,
"max_input_length": 64,
"update_weights_encoder": true
},
"random_seed": 8446,
"training": {
"keep_top_n": 1,
"learning_rate_scheduler": {
"cut_frac": 0.3,
"decay_factor": 0.38,
"discriminative_fine_tuning": true,
"gradual_unfreezing": true
},
"num_epochs": 50,
"optimizer": {
"betas": [
0.9,
0.99
],
"correct_bias": false,
"lr": 0.0015,
"weight_decay": 0.01
}
},
"transformer_model": "dbmdz/bert-base-italian-xxl-cased"
} |