nash_dpo_iter_3 / trainer_state.json
YYYYYYibo's picture
Model save
7915532 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9992429977289932,
"eval_steps": 100,
"global_step": 165,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 2.9411764705882356e-07,
"logits/chosen": -2.62508487701416,
"logits/rejected": -2.638840436935425,
"logps/chosen": -313.21063232421875,
"logps/rejected": -286.36663818359375,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.06,
"learning_rate": 2.9411764705882355e-06,
"logits/chosen": -2.7004079818725586,
"logits/rejected": -2.6217572689056396,
"logps/chosen": -292.9493408203125,
"logps/rejected": -278.7856140136719,
"loss": 0.6926,
"rewards/accuracies": 0.5069444179534912,
"rewards/chosen": 0.0015960136661306024,
"rewards/margins": 0.0010866459924727678,
"rewards/rejected": 0.0005093678482808173,
"step": 10
},
{
"epoch": 0.12,
"learning_rate": 4.994932636402032e-06,
"logits/chosen": -2.690582752227783,
"logits/rejected": -2.671006917953491,
"logps/chosen": -273.6416931152344,
"logps/rejected": -290.06622314453125,
"loss": 0.6854,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": 0.04393266513943672,
"rewards/margins": 0.014766323380172253,
"rewards/rejected": 0.029166344553232193,
"step": 20
},
{
"epoch": 0.18,
"learning_rate": 4.905416503522124e-06,
"logits/chosen": -2.6617255210876465,
"logits/rejected": -2.585472345352173,
"logps/chosen": -288.24456787109375,
"logps/rejected": -275.30908203125,
"loss": 0.6639,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.08098463714122772,
"rewards/margins": 0.06556984782218933,
"rewards/rejected": 0.015414801426231861,
"step": 30
},
{
"epoch": 0.24,
"learning_rate": 4.707922373336524e-06,
"logits/chosen": -2.5689034461975098,
"logits/rejected": -2.5172557830810547,
"logps/chosen": -297.8088684082031,
"logps/rejected": -299.01019287109375,
"loss": 0.6496,
"rewards/accuracies": 0.659375011920929,
"rewards/chosen": -0.03176301717758179,
"rewards/margins": 0.09769946336746216,
"rewards/rejected": -0.12946248054504395,
"step": 40
},
{
"epoch": 0.3,
"learning_rate": 4.411315662967732e-06,
"logits/chosen": -2.543713331222534,
"logits/rejected": -2.471020221710205,
"logps/chosen": -278.70068359375,
"logps/rejected": -281.05767822265625,
"loss": 0.645,
"rewards/accuracies": 0.640625,
"rewards/chosen": 0.058358293026685715,
"rewards/margins": 0.13286305963993073,
"rewards/rejected": -0.07450475543737411,
"step": 50
},
{
"epoch": 0.36,
"learning_rate": 4.028910905897229e-06,
"logits/chosen": -2.5148937702178955,
"logits/rejected": -2.403398036956787,
"logps/chosen": -313.97503662109375,
"logps/rejected": -300.5794677734375,
"loss": 0.6317,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.1467473804950714,
"rewards/margins": 0.1627379208803177,
"rewards/rejected": -0.3094852566719055,
"step": 60
},
{
"epoch": 0.42,
"learning_rate": 3.577874068920446e-06,
"logits/chosen": -2.4615416526794434,
"logits/rejected": -2.3834948539733887,
"logps/chosen": -288.96875,
"logps/rejected": -298.4138488769531,
"loss": 0.6272,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.0141445966437459,
"rewards/margins": 0.17550477385520935,
"rewards/rejected": -0.16136017441749573,
"step": 70
},
{
"epoch": 0.48,
"learning_rate": 3.0784519801008546e-06,
"logits/chosen": -2.386679172515869,
"logits/rejected": -2.308007001876831,
"logps/chosen": -303.1172180175781,
"logps/rejected": -317.23577880859375,
"loss": 0.6276,
"rewards/accuracies": 0.6656249761581421,
"rewards/chosen": -0.03143765777349472,
"rewards/margins": 0.20620755851268768,
"rewards/rejected": -0.237645223736763,
"step": 80
},
{
"epoch": 0.55,
"learning_rate": 2.553063458334059e-06,
"logits/chosen": -2.4485552310943604,
"logits/rejected": -2.3585047721862793,
"logps/chosen": -294.64202880859375,
"logps/rejected": -314.85906982421875,
"loss": 0.6264,
"rewards/accuracies": 0.6656249761581421,
"rewards/chosen": -0.18389078974723816,
"rewards/margins": 0.19124503433704376,
"rewards/rejected": -0.3751358091831207,
"step": 90
},
{
"epoch": 0.61,
"learning_rate": 2.025292943281429e-06,
"logits/chosen": -2.4612982273101807,
"logits/rejected": -2.3962552547454834,
"logps/chosen": -300.9443359375,
"logps/rejected": -299.62554931640625,
"loss": 0.6237,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.03618014603853226,
"rewards/margins": 0.2104295939207077,
"rewards/rejected": -0.24660976231098175,
"step": 100
},
{
"epoch": 0.61,
"eval_logits/chosen": -2.440356492996216,
"eval_logits/rejected": -2.3331212997436523,
"eval_logps/chosen": -314.80450439453125,
"eval_logps/rejected": -316.8028259277344,
"eval_loss": 0.6047022938728333,
"eval_rewards/accuracies": 0.6980000138282776,
"eval_rewards/chosen": -0.14116904139518738,
"eval_rewards/margins": 0.23404958844184875,
"eval_rewards/rejected": -0.37521862983703613,
"eval_runtime": 384.1798,
"eval_samples_per_second": 5.206,
"eval_steps_per_second": 0.651,
"step": 100
},
{
"epoch": 0.67,
"learning_rate": 1.5188318011445907e-06,
"logits/chosen": -2.4451894760131836,
"logits/rejected": -2.3738484382629395,
"logps/chosen": -297.38006591796875,
"logps/rejected": -310.2391662597656,
"loss": 0.6156,
"rewards/accuracies": 0.659375011920929,
"rewards/chosen": -0.10568475723266602,
"rewards/margins": 0.23291108012199402,
"rewards/rejected": -0.33859583735466003,
"step": 110
},
{
"epoch": 0.73,
"learning_rate": 1.0564148305586296e-06,
"logits/chosen": -2.5074470043182373,
"logits/rejected": -2.3541178703308105,
"logps/chosen": -313.4942932128906,
"logps/rejected": -304.71240234375,
"loss": 0.6023,
"rewards/accuracies": 0.671875,
"rewards/chosen": -0.05554385855793953,
"rewards/margins": 0.2575618028640747,
"rewards/rejected": -0.31310564279556274,
"step": 120
},
{
"epoch": 0.79,
"learning_rate": 6.587997083462197e-07,
"logits/chosen": -2.472149133682251,
"logits/rejected": -2.410820960998535,
"logps/chosen": -306.9402770996094,
"logps/rejected": -340.479736328125,
"loss": 0.6055,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -0.10773856937885284,
"rewards/margins": 0.24569562077522278,
"rewards/rejected": -0.35343414545059204,
"step": 130
},
{
"epoch": 0.85,
"learning_rate": 3.438351873250492e-07,
"logits/chosen": -2.4470582008361816,
"logits/rejected": -2.354292392730713,
"logps/chosen": -300.5553283691406,
"logps/rejected": -334.2596435546875,
"loss": 0.6132,
"rewards/accuracies": 0.684374988079071,
"rewards/chosen": -0.13135434687137604,
"rewards/margins": 0.2554669976234436,
"rewards/rejected": -0.38682132959365845,
"step": 140
},
{
"epoch": 0.91,
"learning_rate": 1.2565987432367032e-07,
"logits/chosen": -2.466301679611206,
"logits/rejected": -2.3889572620391846,
"logps/chosen": -304.37078857421875,
"logps/rejected": -318.76507568359375,
"loss": 0.6197,
"rewards/accuracies": 0.671875,
"rewards/chosen": -0.13493719696998596,
"rewards/margins": 0.24466891586780548,
"rewards/rejected": -0.379606157541275,
"step": 150
},
{
"epoch": 0.97,
"learning_rate": 1.4067554877743861e-08,
"logits/chosen": -2.437718152999878,
"logits/rejected": -2.32914662361145,
"logps/chosen": -297.7317810058594,
"logps/rejected": -310.4150085449219,
"loss": 0.5989,
"rewards/accuracies": 0.690625011920929,
"rewards/chosen": -0.10253523290157318,
"rewards/margins": 0.2659505009651184,
"rewards/rejected": -0.3684857487678528,
"step": 160
},
{
"epoch": 1.0,
"step": 165,
"total_flos": 0.0,
"train_loss": 0.6320372126319191,
"train_runtime": 7509.7506,
"train_samples_per_second": 2.814,
"train_steps_per_second": 0.022
}
],
"logging_steps": 10,
"max_steps": 165,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}