|
{ |
|
"optimizer":"AdamW", |
|
"learning_rate":0.0005, |
|
"adam_epsilon":0.00001, |
|
"warmup_steps":25, |
|
"weight_decay":0, |
|
"train_batch_size":64, |
|
"eval_batch_size":64, |
|
"use_scheduler":true, |
|
"metrics":null, |
|
"model_name_or_path":"allegro/herbert-base-cased", |
|
"num_classes":7, |
|
"finetune_last_n_layers":4, |
|
"config_kwargs":{ |
|
"classifier_dropout":0.2 |
|
}, |
|
"task_model_kwargs":{ |
|
"adam_epsilon":0.00001, |
|
"eval_batch_size":64, |
|
"learning_rate":0.0005, |
|
"optimizer":"AdamW", |
|
"train_batch_size":64, |
|
"use_scheduler":true, |
|
"warmup_steps":25, |
|
"weight_decay":0 |
|
}, |
|
"model_compile_kwargs":null, |
|
"evaluation_mode":"unit", |
|
"tagging_scheme":null, |
|
"ignore_index":-100, |
|
"downstream_model_type":"AutoModelForTokenClassification" |
|
} |