toxic-mbert / trainer_state.json
RabidUmarell's picture
model
a6423a2
{
"best_metric": 0.49082762002944946,
"best_model_checkpoint": "models/toxic-bert-mbert/checkpoint-380",
"epoch": 0.41170097508125675,
"eval_steps": 10,
"global_step": 380,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 1.968e-05,
"loss": 1.4487,
"step": 10
},
{
"epoch": 0.01,
"eval_f1": 0.48475671310316976,
"eval_loss": 1.1357542276382446,
"eval_runtime": 20.5083,
"eval_samples_per_second": 270.037,
"eval_steps_per_second": 11.264,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 1.9280000000000002e-05,
"loss": 1.1897,
"step": 20
},
{
"epoch": 0.02,
"eval_f1": 0.48475671310316976,
"eval_loss": 1.0628113746643066,
"eval_runtime": 20.8063,
"eval_samples_per_second": 266.169,
"eval_steps_per_second": 11.102,
"step": 20
},
{
"epoch": 0.03,
"learning_rate": 1.8880000000000002e-05,
"loss": 1.0351,
"step": 30
},
{
"epoch": 0.03,
"eval_f1": 0.48475671310316976,
"eval_loss": 1.0026295185089111,
"eval_runtime": 21.4302,
"eval_samples_per_second": 258.42,
"eval_steps_per_second": 10.779,
"step": 30
},
{
"epoch": 0.04,
"learning_rate": 1.8480000000000003e-05,
"loss": 0.9274,
"step": 40
},
{
"epoch": 0.04,
"eval_f1": 0.602134407148032,
"eval_loss": 0.8651727437973022,
"eval_runtime": 21.7252,
"eval_samples_per_second": 254.911,
"eval_steps_per_second": 10.633,
"step": 40
},
{
"epoch": 0.05,
"learning_rate": 1.8080000000000003e-05,
"loss": 0.8717,
"step": 50
},
{
"epoch": 0.05,
"eval_f1": 0.5878712152620588,
"eval_loss": 0.8609752058982849,
"eval_runtime": 21.4208,
"eval_samples_per_second": 258.534,
"eval_steps_per_second": 10.784,
"step": 50
},
{
"epoch": 0.07,
"learning_rate": 1.768e-05,
"loss": 0.8187,
"step": 60
},
{
"epoch": 0.07,
"eval_f1": 0.6764570688570977,
"eval_loss": 0.7394715547561646,
"eval_runtime": 21.8392,
"eval_samples_per_second": 253.58,
"eval_steps_per_second": 10.577,
"step": 60
},
{
"epoch": 0.08,
"learning_rate": 1.732e-05,
"loss": 0.8706,
"step": 70
},
{
"epoch": 0.08,
"eval_f1": 0.6850705705176812,
"eval_loss": 0.7013543844223022,
"eval_runtime": 21.7795,
"eval_samples_per_second": 254.276,
"eval_steps_per_second": 10.606,
"step": 70
},
{
"epoch": 0.09,
"learning_rate": 1.692e-05,
"loss": 0.7463,
"step": 80
},
{
"epoch": 0.09,
"eval_f1": 0.7283246330984933,
"eval_loss": 0.665144681930542,
"eval_runtime": 21.4734,
"eval_samples_per_second": 257.901,
"eval_steps_per_second": 10.758,
"step": 80
},
{
"epoch": 0.1,
"learning_rate": 1.652e-05,
"loss": 0.7677,
"step": 90
},
{
"epoch": 0.1,
"eval_f1": 0.6548131465387051,
"eval_loss": 0.718267560005188,
"eval_runtime": 21.871,
"eval_samples_per_second": 253.212,
"eval_steps_per_second": 10.562,
"step": 90
},
{
"epoch": 0.11,
"learning_rate": 1.612e-05,
"loss": 0.6402,
"step": 100
},
{
"epoch": 0.11,
"eval_f1": 0.7153883418869857,
"eval_loss": 0.6134028434753418,
"eval_runtime": 21.3946,
"eval_samples_per_second": 258.85,
"eval_steps_per_second": 10.797,
"step": 100
},
{
"epoch": 0.12,
"learning_rate": 1.5720000000000002e-05,
"loss": 0.6408,
"step": 110
},
{
"epoch": 0.12,
"eval_f1": 0.7200057070545418,
"eval_loss": 0.6316511034965515,
"eval_runtime": 21.4218,
"eval_samples_per_second": 258.521,
"eval_steps_per_second": 10.783,
"step": 110
},
{
"epoch": 0.13,
"learning_rate": 1.5320000000000002e-05,
"loss": 0.6293,
"step": 120
},
{
"epoch": 0.13,
"eval_f1": 0.7252309612107771,
"eval_loss": 0.6177955865859985,
"eval_runtime": 22.0787,
"eval_samples_per_second": 250.831,
"eval_steps_per_second": 10.463,
"step": 120
},
{
"epoch": 0.14,
"learning_rate": 1.4920000000000001e-05,
"loss": 0.5921,
"step": 130
},
{
"epoch": 0.14,
"eval_f1": 0.718310903510847,
"eval_loss": 0.6382821798324585,
"eval_runtime": 21.4528,
"eval_samples_per_second": 258.148,
"eval_steps_per_second": 10.768,
"step": 130
},
{
"epoch": 0.15,
"learning_rate": 1.4520000000000002e-05,
"loss": 0.6829,
"step": 140
},
{
"epoch": 0.15,
"eval_f1": 0.7104650126557948,
"eval_loss": 0.6063101291656494,
"eval_runtime": 21.6996,
"eval_samples_per_second": 255.212,
"eval_steps_per_second": 10.645,
"step": 140
},
{
"epoch": 0.16,
"learning_rate": 1.412e-05,
"loss": 0.6528,
"step": 150
},
{
"epoch": 0.16,
"eval_f1": 0.7266460816131931,
"eval_loss": 0.5720272064208984,
"eval_runtime": 21.3758,
"eval_samples_per_second": 259.078,
"eval_steps_per_second": 10.807,
"step": 150
},
{
"epoch": 0.17,
"learning_rate": 1.3720000000000002e-05,
"loss": 0.5472,
"step": 160
},
{
"epoch": 0.17,
"eval_f1": 0.7174077954335052,
"eval_loss": 0.6016837358474731,
"eval_runtime": 21.9003,
"eval_samples_per_second": 252.873,
"eval_steps_per_second": 10.548,
"step": 160
},
{
"epoch": 0.18,
"learning_rate": 1.3320000000000001e-05,
"loss": 0.6625,
"step": 170
},
{
"epoch": 0.18,
"eval_f1": 0.7238055756700145,
"eval_loss": 0.5748048424720764,
"eval_runtime": 21.4241,
"eval_samples_per_second": 258.494,
"eval_steps_per_second": 10.782,
"step": 170
},
{
"epoch": 0.2,
"learning_rate": 1.2920000000000002e-05,
"loss": 0.551,
"step": 180
},
{
"epoch": 0.2,
"eval_f1": 0.7216203790293768,
"eval_loss": 0.5944197177886963,
"eval_runtime": 21.7253,
"eval_samples_per_second": 254.91,
"eval_steps_per_second": 10.633,
"step": 180
},
{
"epoch": 0.21,
"learning_rate": 1.252e-05,
"loss": 0.5633,
"step": 190
},
{
"epoch": 0.21,
"eval_f1": 0.7591239466384562,
"eval_loss": 0.5621122717857361,
"eval_runtime": 21.1932,
"eval_samples_per_second": 261.31,
"eval_steps_per_second": 10.9,
"step": 190
},
{
"epoch": 0.22,
"learning_rate": 1.2120000000000001e-05,
"loss": 0.5372,
"step": 200
},
{
"epoch": 0.22,
"eval_f1": 0.75760072154562,
"eval_loss": 0.5480858087539673,
"eval_runtime": 21.2836,
"eval_samples_per_second": 260.201,
"eval_steps_per_second": 10.853,
"step": 200
},
{
"epoch": 0.23,
"learning_rate": 1.172e-05,
"loss": 0.6353,
"step": 210
},
{
"epoch": 0.23,
"eval_f1": 0.7511938652247329,
"eval_loss": 0.5706632137298584,
"eval_runtime": 21.8422,
"eval_samples_per_second": 253.545,
"eval_steps_per_second": 10.576,
"step": 210
},
{
"epoch": 0.24,
"learning_rate": 1.132e-05,
"loss": 0.6332,
"step": 220
},
{
"epoch": 0.24,
"eval_f1": 0.7614490903091703,
"eval_loss": 0.5543012022972107,
"eval_runtime": 21.5696,
"eval_samples_per_second": 256.75,
"eval_steps_per_second": 10.71,
"step": 220
},
{
"epoch": 0.25,
"learning_rate": 1.0920000000000002e-05,
"loss": 0.5311,
"step": 230
},
{
"epoch": 0.25,
"eval_f1": 0.7432260016931946,
"eval_loss": 0.5397886633872986,
"eval_runtime": 21.3122,
"eval_samples_per_second": 259.851,
"eval_steps_per_second": 10.839,
"step": 230
},
{
"epoch": 0.26,
"learning_rate": 1.0520000000000001e-05,
"loss": 0.5791,
"step": 240
},
{
"epoch": 0.26,
"eval_f1": 0.7434033096243912,
"eval_loss": 0.5391152501106262,
"eval_runtime": 21.3796,
"eval_samples_per_second": 259.032,
"eval_steps_per_second": 10.805,
"step": 240
},
{
"epoch": 0.27,
"learning_rate": 1.0120000000000001e-05,
"loss": 0.5831,
"step": 250
},
{
"epoch": 0.27,
"eval_f1": 0.763099957359349,
"eval_loss": 0.5244932174682617,
"eval_runtime": 21.3101,
"eval_samples_per_second": 259.877,
"eval_steps_per_second": 10.84,
"step": 250
},
{
"epoch": 0.28,
"learning_rate": 9.72e-06,
"loss": 0.5453,
"step": 260
},
{
"epoch": 0.28,
"eval_f1": 0.7585661401268046,
"eval_loss": 0.5211306214332581,
"eval_runtime": 21.2639,
"eval_samples_per_second": 260.442,
"eval_steps_per_second": 10.863,
"step": 260
},
{
"epoch": 0.29,
"learning_rate": 9.32e-06,
"loss": 0.5087,
"step": 270
},
{
"epoch": 0.29,
"eval_f1": 0.7549183270549422,
"eval_loss": 0.5206575989723206,
"eval_runtime": 21.5568,
"eval_samples_per_second": 256.902,
"eval_steps_per_second": 10.716,
"step": 270
},
{
"epoch": 0.3,
"learning_rate": 8.920000000000001e-06,
"loss": 0.539,
"step": 280
},
{
"epoch": 0.3,
"eval_f1": 0.7483575758659107,
"eval_loss": 0.5601561665534973,
"eval_runtime": 21.7276,
"eval_samples_per_second": 254.883,
"eval_steps_per_second": 10.632,
"step": 280
},
{
"epoch": 0.31,
"learning_rate": 8.52e-06,
"loss": 0.502,
"step": 290
},
{
"epoch": 0.31,
"eval_f1": 0.7497368207624416,
"eval_loss": 0.5269237160682678,
"eval_runtime": 21.3645,
"eval_samples_per_second": 259.215,
"eval_steps_per_second": 10.812,
"step": 290
},
{
"epoch": 0.33,
"learning_rate": 8.120000000000002e-06,
"loss": 0.5656,
"step": 300
},
{
"epoch": 0.33,
"eval_f1": 0.7490172830029811,
"eval_loss": 0.5704778432846069,
"eval_runtime": 21.3309,
"eval_samples_per_second": 259.623,
"eval_steps_per_second": 10.829,
"step": 300
},
{
"epoch": 0.34,
"learning_rate": 7.72e-06,
"loss": 0.6157,
"step": 310
},
{
"epoch": 0.34,
"eval_f1": 0.7610313116302819,
"eval_loss": 0.5527724027633667,
"eval_runtime": 21.5886,
"eval_samples_per_second": 256.524,
"eval_steps_per_second": 10.7,
"step": 310
},
{
"epoch": 0.35,
"learning_rate": 7.32e-06,
"loss": 0.5262,
"step": 320
},
{
"epoch": 0.35,
"eval_f1": 0.7693956225207979,
"eval_loss": 0.5064041614532471,
"eval_runtime": 21.4519,
"eval_samples_per_second": 258.159,
"eval_steps_per_second": 10.768,
"step": 320
},
{
"epoch": 0.36,
"learning_rate": 6.92e-06,
"loss": 0.5032,
"step": 330
},
{
"epoch": 0.36,
"eval_f1": 0.757594480871035,
"eval_loss": 0.5091240406036377,
"eval_runtime": 21.5576,
"eval_samples_per_second": 256.894,
"eval_steps_per_second": 10.716,
"step": 330
},
{
"epoch": 0.37,
"learning_rate": 6.520000000000001e-06,
"loss": 0.4859,
"step": 340
},
{
"epoch": 0.37,
"eval_f1": 0.751697533021681,
"eval_loss": 0.5241729021072388,
"eval_runtime": 21.6466,
"eval_samples_per_second": 255.837,
"eval_steps_per_second": 10.671,
"step": 340
},
{
"epoch": 0.38,
"learning_rate": 6.120000000000001e-06,
"loss": 0.6227,
"step": 350
},
{
"epoch": 0.38,
"eval_f1": 0.7821857624888272,
"eval_loss": 0.4922301769256592,
"eval_runtime": 21.5823,
"eval_samples_per_second": 256.599,
"eval_steps_per_second": 10.703,
"step": 350
},
{
"epoch": 0.39,
"learning_rate": 5.72e-06,
"loss": 0.4927,
"step": 360
},
{
"epoch": 0.39,
"eval_f1": 0.778530969617547,
"eval_loss": 0.4920683801174164,
"eval_runtime": 21.173,
"eval_samples_per_second": 261.559,
"eval_steps_per_second": 10.91,
"step": 360
},
{
"epoch": 0.4,
"learning_rate": 5.320000000000001e-06,
"loss": 0.596,
"step": 370
},
{
"epoch": 0.4,
"eval_f1": 0.7737080595193158,
"eval_loss": 0.509250283241272,
"eval_runtime": 21.9207,
"eval_samples_per_second": 252.638,
"eval_steps_per_second": 10.538,
"step": 370
},
{
"epoch": 0.41,
"learning_rate": 4.92e-06,
"loss": 0.5932,
"step": 380
},
{
"epoch": 0.41,
"eval_f1": 0.7900070254555186,
"eval_loss": 0.49082762002944946,
"eval_runtime": 21.7381,
"eval_samples_per_second": 254.76,
"eval_steps_per_second": 10.627,
"step": 380
}
],
"logging_steps": 10,
"max_steps": 500,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"total_flos": 547397542755648.0,
"train_batch_size": 24,
"trial_name": null,
"trial_params": null
}