sara-nabhani's picture
Training in progress, step 1000
2564339
{
"best_metric": 0.3712866943650337,
"best_model_checkpoint": "/home2/s5432073/language-tech-project/results/ltp-roberta-large-default/checkpoint-1000",
"epoch": 5.9171597633136095,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.18,
"learning_rate": 9.28348909657321e-06,
"loss": 0.4444,
"step": 200
},
{
"epoch": 1.18,
"eval_f1": 0.13967554851569386,
"eval_f1_all": [
0.1323529411764706,
0.1882770870337478,
0.0,
0.0,
0.49830890642615555,
0.0,
0.0,
0.0,
0.7215112736136504,
0.6323024054982818,
0.0,
0.0043859649122807015,
0.0,
0.0,
0.0,
0.0,
0.6163723916532904,
0.0,
0.0,
0.0
],
"eval_loss": 0.35773786902427673,
"eval_runtime": 4.4833,
"eval_samples_per_second": 422.907,
"eval_steps_per_second": 13.383,
"step": 200
},
{
"epoch": 2.37,
"learning_rate": 8.037383177570094e-06,
"loss": 0.3287,
"step": 400
},
{
"epoch": 2.37,
"eval_f1": 0.26059605664667,
"eval_f1_all": [
0.4311377245508982,
0.33333333333333337,
0.0,
0.0,
0.6143001007049347,
0.03592814371257485,
0.2823529411764706,
0.0,
0.7357440890125174,
0.55,
0.1650485436893204,
0.3709677419354839,
0.0,
0.0,
0.11209439528023599,
0.0,
0.6120218579234973,
0.5846153846153846,
0.034934497816593885,
0.3494423791821561
],
"eval_loss": 0.32891085743904114,
"eval_runtime": 3.0437,
"eval_samples_per_second": 622.918,
"eval_steps_per_second": 19.713,
"step": 400
},
{
"epoch": 3.55,
"learning_rate": 6.791277258566978e-06,
"loss": 0.2952,
"step": 600
},
{
"epoch": 3.55,
"eval_f1": 0.32608326603750615,
"eval_f1_all": [
0.5166240409207161,
0.5852478839177752,
0.0,
0.0,
0.6099585062240663,
0.0588235294117647,
0.29069767441860467,
0.0,
0.745417515274949,
0.619047619047619,
0.3628691983122363,
0.4564564564564564,
0.0,
0.015625,
0.49217935349322217,
0.04332129963898917,
0.6925515055467513,
0.6268656716417911,
0.05714285714285715,
0.3488372093023256
],
"eval_loss": 0.31139805912971497,
"eval_runtime": 2.9889,
"eval_samples_per_second": 634.353,
"eval_steps_per_second": 20.074,
"step": 600
},
{
"epoch": 4.73,
"learning_rate": 5.545171339563863e-06,
"loss": 0.2745,
"step": 800
},
{
"epoch": 4.73,
"eval_f1": 0.35306069701657317,
"eval_f1_all": [
0.5316455696202532,
0.575,
0.02857142857142857,
0.125,
0.6256306760847629,
0.1005586592178771,
0.32222222222222224,
0.0,
0.7624595469255664,
0.6393442622950821,
0.4034334763948497,
0.5371900826446281,
0.0,
0.015503875968992248,
0.5170630816959669,
0.08304498269896193,
0.6463620981387478,
0.6822429906542056,
0.07407407407407407,
0.39186691312384475
],
"eval_loss": 0.3067249059677124,
"eval_runtime": 2.9637,
"eval_samples_per_second": 639.733,
"eval_steps_per_second": 20.245,
"step": 800
},
{
"epoch": 5.92,
"learning_rate": 4.299065420560748e-06,
"loss": 0.2575,
"step": 1000
},
{
"epoch": 5.92,
"eval_f1": 0.3712866943650337,
"eval_f1_all": [
0.5213032581453635,
0.5556978233034571,
0.1081081081081081,
0.1391304347826087,
0.6363636363636365,
0.1005586592178771,
0.2840909090909091,
0.0,
0.7576530612244897,
0.6426076833527357,
0.4453441295546559,
0.5411140583554377,
0.0,
0.015037593984962405,
0.5839929639401935,
0.1921921921921922,
0.641573994867408,
0.6936936936936936,
0.12648221343873517,
0.4407894736842105
],
"eval_loss": 0.3082274794578552,
"eval_runtime": 2.9599,
"eval_samples_per_second": 640.56,
"eval_steps_per_second": 20.271,
"step": 1000
}
],
"max_steps": 1690,
"num_train_epochs": 10,
"total_flos": 5502978001057296.0,
"trial_name": null,
"trial_params": null
}