|
{ |
|
"best_metric": 0.49082762002944946, |
|
"best_model_checkpoint": "models/toxic-bert-mbert/checkpoint-380", |
|
"epoch": 0.41170097508125675, |
|
"eval_steps": 10, |
|
"global_step": 380, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.968e-05, |
|
"loss": 1.4487, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_f1": 0.48475671310316976, |
|
"eval_loss": 1.1357542276382446, |
|
"eval_runtime": 20.5083, |
|
"eval_samples_per_second": 270.037, |
|
"eval_steps_per_second": 11.264, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9280000000000002e-05, |
|
"loss": 1.1897, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_f1": 0.48475671310316976, |
|
"eval_loss": 1.0628113746643066, |
|
"eval_runtime": 20.8063, |
|
"eval_samples_per_second": 266.169, |
|
"eval_steps_per_second": 11.102, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.8880000000000002e-05, |
|
"loss": 1.0351, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_f1": 0.48475671310316976, |
|
"eval_loss": 1.0026295185089111, |
|
"eval_runtime": 21.4302, |
|
"eval_samples_per_second": 258.42, |
|
"eval_steps_per_second": 10.779, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8480000000000003e-05, |
|
"loss": 0.9274, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_f1": 0.602134407148032, |
|
"eval_loss": 0.8651727437973022, |
|
"eval_runtime": 21.7252, |
|
"eval_samples_per_second": 254.911, |
|
"eval_steps_per_second": 10.633, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.8080000000000003e-05, |
|
"loss": 0.8717, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_f1": 0.5878712152620588, |
|
"eval_loss": 0.8609752058982849, |
|
"eval_runtime": 21.4208, |
|
"eval_samples_per_second": 258.534, |
|
"eval_steps_per_second": 10.784, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.768e-05, |
|
"loss": 0.8187, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_f1": 0.6764570688570977, |
|
"eval_loss": 0.7394715547561646, |
|
"eval_runtime": 21.8392, |
|
"eval_samples_per_second": 253.58, |
|
"eval_steps_per_second": 10.577, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.732e-05, |
|
"loss": 0.8706, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_f1": 0.6850705705176812, |
|
"eval_loss": 0.7013543844223022, |
|
"eval_runtime": 21.7795, |
|
"eval_samples_per_second": 254.276, |
|
"eval_steps_per_second": 10.606, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.692e-05, |
|
"loss": 0.7463, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_f1": 0.7283246330984933, |
|
"eval_loss": 0.665144681930542, |
|
"eval_runtime": 21.4734, |
|
"eval_samples_per_second": 257.901, |
|
"eval_steps_per_second": 10.758, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.652e-05, |
|
"loss": 0.7677, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_f1": 0.6548131465387051, |
|
"eval_loss": 0.718267560005188, |
|
"eval_runtime": 21.871, |
|
"eval_samples_per_second": 253.212, |
|
"eval_steps_per_second": 10.562, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.612e-05, |
|
"loss": 0.6402, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_f1": 0.7153883418869857, |
|
"eval_loss": 0.6134028434753418, |
|
"eval_runtime": 21.3946, |
|
"eval_samples_per_second": 258.85, |
|
"eval_steps_per_second": 10.797, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.5720000000000002e-05, |
|
"loss": 0.6408, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_f1": 0.7200057070545418, |
|
"eval_loss": 0.6316511034965515, |
|
"eval_runtime": 21.4218, |
|
"eval_samples_per_second": 258.521, |
|
"eval_steps_per_second": 10.783, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.5320000000000002e-05, |
|
"loss": 0.6293, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_f1": 0.7252309612107771, |
|
"eval_loss": 0.6177955865859985, |
|
"eval_runtime": 22.0787, |
|
"eval_samples_per_second": 250.831, |
|
"eval_steps_per_second": 10.463, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.4920000000000001e-05, |
|
"loss": 0.5921, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_f1": 0.718310903510847, |
|
"eval_loss": 0.6382821798324585, |
|
"eval_runtime": 21.4528, |
|
"eval_samples_per_second": 258.148, |
|
"eval_steps_per_second": 10.768, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.4520000000000002e-05, |
|
"loss": 0.6829, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_f1": 0.7104650126557948, |
|
"eval_loss": 0.6063101291656494, |
|
"eval_runtime": 21.6996, |
|
"eval_samples_per_second": 255.212, |
|
"eval_steps_per_second": 10.645, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.412e-05, |
|
"loss": 0.6528, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_f1": 0.7266460816131931, |
|
"eval_loss": 0.5720272064208984, |
|
"eval_runtime": 21.3758, |
|
"eval_samples_per_second": 259.078, |
|
"eval_steps_per_second": 10.807, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.3720000000000002e-05, |
|
"loss": 0.5472, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_f1": 0.7174077954335052, |
|
"eval_loss": 0.6016837358474731, |
|
"eval_runtime": 21.9003, |
|
"eval_samples_per_second": 252.873, |
|
"eval_steps_per_second": 10.548, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.3320000000000001e-05, |
|
"loss": 0.6625, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_f1": 0.7238055756700145, |
|
"eval_loss": 0.5748048424720764, |
|
"eval_runtime": 21.4241, |
|
"eval_samples_per_second": 258.494, |
|
"eval_steps_per_second": 10.782, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.2920000000000002e-05, |
|
"loss": 0.551, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_f1": 0.7216203790293768, |
|
"eval_loss": 0.5944197177886963, |
|
"eval_runtime": 21.7253, |
|
"eval_samples_per_second": 254.91, |
|
"eval_steps_per_second": 10.633, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.252e-05, |
|
"loss": 0.5633, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_f1": 0.7591239466384562, |
|
"eval_loss": 0.5621122717857361, |
|
"eval_runtime": 21.1932, |
|
"eval_samples_per_second": 261.31, |
|
"eval_steps_per_second": 10.9, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.2120000000000001e-05, |
|
"loss": 0.5372, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_f1": 0.75760072154562, |
|
"eval_loss": 0.5480858087539673, |
|
"eval_runtime": 21.2836, |
|
"eval_samples_per_second": 260.201, |
|
"eval_steps_per_second": 10.853, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.172e-05, |
|
"loss": 0.6353, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_f1": 0.7511938652247329, |
|
"eval_loss": 0.5706632137298584, |
|
"eval_runtime": 21.8422, |
|
"eval_samples_per_second": 253.545, |
|
"eval_steps_per_second": 10.576, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.132e-05, |
|
"loss": 0.6332, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_f1": 0.7614490903091703, |
|
"eval_loss": 0.5543012022972107, |
|
"eval_runtime": 21.5696, |
|
"eval_samples_per_second": 256.75, |
|
"eval_steps_per_second": 10.71, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.0920000000000002e-05, |
|
"loss": 0.5311, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_f1": 0.7432260016931946, |
|
"eval_loss": 0.5397886633872986, |
|
"eval_runtime": 21.3122, |
|
"eval_samples_per_second": 259.851, |
|
"eval_steps_per_second": 10.839, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.0520000000000001e-05, |
|
"loss": 0.5791, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_f1": 0.7434033096243912, |
|
"eval_loss": 0.5391152501106262, |
|
"eval_runtime": 21.3796, |
|
"eval_samples_per_second": 259.032, |
|
"eval_steps_per_second": 10.805, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.0120000000000001e-05, |
|
"loss": 0.5831, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_f1": 0.763099957359349, |
|
"eval_loss": 0.5244932174682617, |
|
"eval_runtime": 21.3101, |
|
"eval_samples_per_second": 259.877, |
|
"eval_steps_per_second": 10.84, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.72e-06, |
|
"loss": 0.5453, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_f1": 0.7585661401268046, |
|
"eval_loss": 0.5211306214332581, |
|
"eval_runtime": 21.2639, |
|
"eval_samples_per_second": 260.442, |
|
"eval_steps_per_second": 10.863, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.32e-06, |
|
"loss": 0.5087, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_f1": 0.7549183270549422, |
|
"eval_loss": 0.5206575989723206, |
|
"eval_runtime": 21.5568, |
|
"eval_samples_per_second": 256.902, |
|
"eval_steps_per_second": 10.716, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.920000000000001e-06, |
|
"loss": 0.539, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_f1": 0.7483575758659107, |
|
"eval_loss": 0.5601561665534973, |
|
"eval_runtime": 21.7276, |
|
"eval_samples_per_second": 254.883, |
|
"eval_steps_per_second": 10.632, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.52e-06, |
|
"loss": 0.502, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_f1": 0.7497368207624416, |
|
"eval_loss": 0.5269237160682678, |
|
"eval_runtime": 21.3645, |
|
"eval_samples_per_second": 259.215, |
|
"eval_steps_per_second": 10.812, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.120000000000002e-06, |
|
"loss": 0.5656, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_f1": 0.7490172830029811, |
|
"eval_loss": 0.5704778432846069, |
|
"eval_runtime": 21.3309, |
|
"eval_samples_per_second": 259.623, |
|
"eval_steps_per_second": 10.829, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.72e-06, |
|
"loss": 0.6157, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_f1": 0.7610313116302819, |
|
"eval_loss": 0.5527724027633667, |
|
"eval_runtime": 21.5886, |
|
"eval_samples_per_second": 256.524, |
|
"eval_steps_per_second": 10.7, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 7.32e-06, |
|
"loss": 0.5262, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_f1": 0.7693956225207979, |
|
"eval_loss": 0.5064041614532471, |
|
"eval_runtime": 21.4519, |
|
"eval_samples_per_second": 258.159, |
|
"eval_steps_per_second": 10.768, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.92e-06, |
|
"loss": 0.5032, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_f1": 0.757594480871035, |
|
"eval_loss": 0.5091240406036377, |
|
"eval_runtime": 21.5576, |
|
"eval_samples_per_second": 256.894, |
|
"eval_steps_per_second": 10.716, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.520000000000001e-06, |
|
"loss": 0.4859, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_f1": 0.751697533021681, |
|
"eval_loss": 0.5241729021072388, |
|
"eval_runtime": 21.6466, |
|
"eval_samples_per_second": 255.837, |
|
"eval_steps_per_second": 10.671, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.120000000000001e-06, |
|
"loss": 0.6227, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_f1": 0.7821857624888272, |
|
"eval_loss": 0.4922301769256592, |
|
"eval_runtime": 21.5823, |
|
"eval_samples_per_second": 256.599, |
|
"eval_steps_per_second": 10.703, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5.72e-06, |
|
"loss": 0.4927, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_f1": 0.778530969617547, |
|
"eval_loss": 0.4920683801174164, |
|
"eval_runtime": 21.173, |
|
"eval_samples_per_second": 261.559, |
|
"eval_steps_per_second": 10.91, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.320000000000001e-06, |
|
"loss": 0.596, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_f1": 0.7737080595193158, |
|
"eval_loss": 0.509250283241272, |
|
"eval_runtime": 21.9207, |
|
"eval_samples_per_second": 252.638, |
|
"eval_steps_per_second": 10.538, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.92e-06, |
|
"loss": 0.5932, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_f1": 0.7900070254555186, |
|
"eval_loss": 0.49082762002944946, |
|
"eval_runtime": 21.7381, |
|
"eval_samples_per_second": 254.76, |
|
"eval_steps_per_second": 10.627, |
|
"step": 380 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"total_flos": 547397542755648.0, |
|
"train_batch_size": 24, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|