clm7b0129-wds-0.8-kendall-onof-ofif-corr-max-2-simpo-max1500-default
/
checkpoint-150
/trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 0.12310217480508823, | |
"eval_steps": 50, | |
"global_step": 150, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.008206811653672548, | |
"grad_norm": 0.06318386644124985, | |
"learning_rate": 4.999451708687114e-06, | |
"logits/chosen": -2.1367907524108887, | |
"logits/rejected": -2.4948182106018066, | |
"logps/chosen": -0.291498601436615, | |
"logps/rejected": -0.3196522295475006, | |
"loss": 7.5728, | |
"rewards/accuracies": 0.5, | |
"rewards/chosen": -0.4372479021549225, | |
"rewards/margins": 0.04223042353987694, | |
"rewards/rejected": -0.47947829961776733, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.016413623307345096, | |
"grad_norm": 0.07310314476490021, | |
"learning_rate": 4.997807075247147e-06, | |
"logits/chosen": -2.1456007957458496, | |
"logits/rejected": -2.4455342292785645, | |
"logps/chosen": -0.26213544607162476, | |
"logps/rejected": -0.32332050800323486, | |
"loss": 7.5298, | |
"rewards/accuracies": 0.5625, | |
"rewards/chosen": -0.3932031989097595, | |
"rewards/margins": 0.09177760779857635, | |
"rewards/rejected": -0.4849807620048523, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.024620434961017644, | |
"grad_norm": 0.05936102196574211, | |
"learning_rate": 4.9950668210706795e-06, | |
"logits/chosen": -2.0765950679779053, | |
"logits/rejected": -2.485799789428711, | |
"logps/chosen": -0.26631081104278564, | |
"logps/rejected": -0.32647624611854553, | |
"loss": 7.5208, | |
"rewards/accuracies": 0.512499988079071, | |
"rewards/chosen": -0.39946624636650085, | |
"rewards/margins": 0.09024813771247864, | |
"rewards/rejected": -0.4897143840789795, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.03282724661469019, | |
"grad_norm": 0.08499134331941605, | |
"learning_rate": 4.9912321481237616e-06, | |
"logits/chosen": -2.0753884315490723, | |
"logits/rejected": -2.441580295562744, | |
"logps/chosen": -0.2749950885772705, | |
"logps/rejected": -0.30180150270462036, | |
"loss": 7.4229, | |
"rewards/accuracies": 0.5375000238418579, | |
"rewards/chosen": -0.41249266266822815, | |
"rewards/margins": 0.04020959883928299, | |
"rewards/rejected": -0.45270222425460815, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.04103405826836274, | |
"grad_norm": 0.07681389898061752, | |
"learning_rate": 4.986304738420684e-06, | |
"logits/chosen": -2.145660877227783, | |
"logits/rejected": -2.465946912765503, | |
"logps/chosen": -0.24909739196300507, | |
"logps/rejected": -0.2796121835708618, | |
"loss": 7.4811, | |
"rewards/accuracies": 0.48750001192092896, | |
"rewards/chosen": -0.373646080493927, | |
"rewards/margins": 0.045772187411785126, | |
"rewards/rejected": -0.41941824555397034, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.04103405826836274, | |
"eval_logits/chosen": -2.012000799179077, | |
"eval_logits/rejected": -2.5381252765655518, | |
"eval_logps/chosen": -0.24157460033893585, | |
"eval_logps/rejected": -0.2957758605480194, | |
"eval_loss": 0.9317650198936462, | |
"eval_rewards/accuracies": 0.5252525210380554, | |
"eval_rewards/chosen": -0.3623619079589844, | |
"eval_rewards/margins": 0.08130191266536713, | |
"eval_rewards/rejected": -0.4436637759208679, | |
"eval_runtime": 26.0809, | |
"eval_samples_per_second": 30.214, | |
"eval_steps_per_second": 3.796, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.04924086992203529, | |
"grad_norm": 0.06638535112142563, | |
"learning_rate": 4.980286753286196e-06, | |
"logits/chosen": -2.145846128463745, | |
"logits/rejected": -2.4077115058898926, | |
"logps/chosen": -0.22265203297138214, | |
"logps/rejected": -0.30774614214897156, | |
"loss": 7.4605, | |
"rewards/accuracies": 0.5625, | |
"rewards/chosen": -0.3339780271053314, | |
"rewards/margins": 0.1276412308216095, | |
"rewards/rejected": -0.4616192877292633, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.057447681575707836, | |
"grad_norm": 0.057281140238046646, | |
"learning_rate": 4.973180832407471e-06, | |
"logits/chosen": -2.0021350383758545, | |
"logits/rejected": -2.4299912452697754, | |
"logps/chosen": -0.23488977551460266, | |
"logps/rejected": -0.33270469307899475, | |
"loss": 7.4257, | |
"rewards/accuracies": 0.574999988079071, | |
"rewards/chosen": -0.3523346781730652, | |
"rewards/margins": 0.14672236144542694, | |
"rewards/rejected": -0.4990570545196533, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.06565449322938038, | |
"grad_norm": 0.07725922018289566, | |
"learning_rate": 4.964990092676263e-06, | |
"logits/chosen": -2.117995023727417, | |
"logits/rejected": -2.359265089035034, | |
"logps/chosen": -0.21598832309246063, | |
"logps/rejected": -0.300583153963089, | |
"loss": 7.4384, | |
"rewards/accuracies": 0.6000000238418579, | |
"rewards/chosen": -0.32398244738578796, | |
"rewards/margins": 0.12689228355884552, | |
"rewards/rejected": -0.4508747458457947, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.07386130488305294, | |
"grad_norm": 0.0598183274269104, | |
"learning_rate": 4.9557181268217225e-06, | |
"logits/chosen": -2.282627582550049, | |
"logits/rejected": -2.441333532333374, | |
"logps/chosen": -0.23655852675437927, | |
"logps/rejected": -0.3246815800666809, | |
"loss": 7.4584, | |
"rewards/accuracies": 0.512499988079071, | |
"rewards/chosen": -0.3548378050327301, | |
"rewards/margins": 0.13218457996845245, | |
"rewards/rejected": -0.48702239990234375, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.08206811653672548, | |
"grad_norm": 0.058213479816913605, | |
"learning_rate": 4.9453690018345144e-06, | |
"logits/chosen": -2.1114468574523926, | |
"logits/rejected": -2.5035691261291504, | |
"logps/chosen": -0.23073866963386536, | |
"logps/rejected": -0.29445192217826843, | |
"loss": 7.4116, | |
"rewards/accuracies": 0.5375000238418579, | |
"rewards/chosen": -0.34610801935195923, | |
"rewards/margins": 0.09556989371776581, | |
"rewards/rejected": -0.44167789816856384, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.08206811653672548, | |
"eval_logits/chosen": -2.0183491706848145, | |
"eval_logits/rejected": -2.5400593280792236, | |
"eval_logps/chosen": -0.20393377542495728, | |
"eval_logps/rejected": -0.2818409502506256, | |
"eval_loss": 0.9129964113235474, | |
"eval_rewards/accuracies": 0.5656565427780151, | |
"eval_rewards/chosen": -0.3059006631374359, | |
"eval_rewards/margins": 0.11686072498559952, | |
"eval_rewards/rejected": -0.4227614104747772, | |
"eval_runtime": 26.0825, | |
"eval_samples_per_second": 30.212, | |
"eval_steps_per_second": 3.796, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.09027492819039803, | |
"grad_norm": 0.06249881908297539, | |
"learning_rate": 4.933947257182901e-06, | |
"logits/chosen": -2.1324548721313477, | |
"logits/rejected": -2.434319019317627, | |
"logps/chosen": -0.22180762887001038, | |
"logps/rejected": -0.28862181305885315, | |
"loss": 7.3604, | |
"rewards/accuracies": 0.512499988079071, | |
"rewards/chosen": -0.332711398601532, | |
"rewards/margins": 0.10022131353616714, | |
"rewards/rejected": -0.43293270468711853, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.09848173984407058, | |
"grad_norm": 0.061758093535900116, | |
"learning_rate": 4.921457902821578e-06, | |
"logits/chosen": -2.0597169399261475, | |
"logits/rejected": -2.4386391639709473, | |
"logps/chosen": -0.22720107436180115, | |
"logps/rejected": -0.303659051656723, | |
"loss": 7.3624, | |
"rewards/accuracies": 0.6000000238418579, | |
"rewards/chosen": -0.3408016264438629, | |
"rewards/margins": 0.1146869882941246, | |
"rewards/rejected": -0.4554885923862457, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.10668855149774313, | |
"grad_norm": 0.08368540555238724, | |
"learning_rate": 4.907906416994146e-06, | |
"logits/chosen": -2.0944437980651855, | |
"logits/rejected": -2.4157254695892334, | |
"logps/chosen": -0.19590887427330017, | |
"logps/rejected": -0.3365771770477295, | |
"loss": 7.3464, | |
"rewards/accuracies": 0.675000011920929, | |
"rewards/chosen": -0.29386329650878906, | |
"rewards/margins": 0.2110024392604828, | |
"rewards/rejected": -0.5048657655715942, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.11489536315141567, | |
"grad_norm": 0.060954928398132324, | |
"learning_rate": 4.893298743830168e-06, | |
"logits/chosen": -2.1551766395568848, | |
"logits/rejected": -2.5695576667785645, | |
"logps/chosen": -0.19875812530517578, | |
"logps/rejected": -0.2967599928379059, | |
"loss": 7.3179, | |
"rewards/accuracies": 0.574999988079071, | |
"rewards/chosen": -0.29813718795776367, | |
"rewards/margins": 0.14700281620025635, | |
"rewards/rejected": -0.4451400339603424, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.12310217480508823, | |
"grad_norm": 0.05665091797709465, | |
"learning_rate": 4.8776412907378845e-06, | |
"logits/chosen": -2.1410365104675293, | |
"logits/rejected": -2.4798667430877686, | |
"logps/chosen": -0.19316771626472473, | |
"logps/rejected": -0.2972142696380615, | |
"loss": 7.2384, | |
"rewards/accuracies": 0.574999988079071, | |
"rewards/chosen": -0.2897515296936035, | |
"rewards/margins": 0.15606984496116638, | |
"rewards/rejected": -0.4458213746547699, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.12310217480508823, | |
"eval_logits/chosen": -2.0654072761535645, | |
"eval_logits/rejected": -2.596571207046509, | |
"eval_logps/chosen": -0.17970335483551025, | |
"eval_logps/rejected": -0.2767573893070221, | |
"eval_loss": 0.8982937335968018, | |
"eval_rewards/accuracies": 0.5858585834503174, | |
"eval_rewards/chosen": -0.269555002450943, | |
"eval_rewards/margins": 0.14558106660842896, | |
"eval_rewards/rejected": -0.41513609886169434, | |
"eval_runtime": 26.0741, | |
"eval_samples_per_second": 30.222, | |
"eval_steps_per_second": 3.797, | |
"step": 150 | |
} | |
], | |
"logging_steps": 10, | |
"max_steps": 1500, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 2, | |
"save_steps": 50, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 5.5967069864617574e+17, | |
"train_batch_size": 1, | |
"trial_name": null, | |
"trial_params": null | |
} | |