plaguss's picture
plaguss HF staff
Model save
3e6253c verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 20,
"global_step": 206,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 8.9375,
"learning_rate": 2.3809523809523808e-06,
"logits/chosen": -2.7700600624084473,
"logits/rejected": -2.8606302738189697,
"logps/chosen": -421.64996337890625,
"logps/rejected": -531.4378662109375,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.1,
"grad_norm": 7.375,
"learning_rate": 2.380952380952381e-05,
"logits/chosen": -2.7684054374694824,
"logits/rejected": -2.7337145805358887,
"logps/chosen": -333.7870178222656,
"logps/rejected": -312.4859313964844,
"loss": 0.6852,
"rewards/accuracies": 0.5277777910232544,
"rewards/chosen": 0.030088074505329132,
"rewards/margins": 0.01666567102074623,
"rewards/rejected": 0.013422403484582901,
"step": 10
},
{
"epoch": 0.19,
"grad_norm": 6.125,
"learning_rate": 4.761904761904762e-05,
"logits/chosen": -2.8010494709014893,
"logits/rejected": -2.79127836227417,
"logps/chosen": -331.8260498046875,
"logps/rejected": -332.01409912109375,
"loss": 0.6028,
"rewards/accuracies": 0.8187500238418579,
"rewards/chosen": 0.41256317496299744,
"rewards/margins": 0.21184520423412323,
"rewards/rejected": 0.2007180005311966,
"step": 20
},
{
"epoch": 0.19,
"eval_logits/chosen": -2.6401147842407227,
"eval_logits/rejected": -2.614283800125122,
"eval_logps/chosen": -324.9909973144531,
"eval_logps/rejected": -327.9555969238281,
"eval_loss": 0.5285959243774414,
"eval_rewards/accuracies": 0.8125,
"eval_rewards/chosen": 0.878866970539093,
"eval_rewards/margins": 0.43177998065948486,
"eval_rewards/rejected": 0.44708704948425293,
"eval_runtime": 114.9886,
"eval_samples_per_second": 1.6,
"eval_steps_per_second": 0.104,
"step": 20
},
{
"epoch": 0.29,
"grad_norm": 4.84375,
"learning_rate": 4.9708589101037306e-05,
"logits/chosen": -2.6763195991516113,
"logits/rejected": -2.651015043258667,
"logps/chosen": -356.8539733886719,
"logps/rejected": -363.6021423339844,
"loss": 0.4643,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": 0.943565845489502,
"rewards/margins": 0.708370566368103,
"rewards/rejected": 0.23519524931907654,
"step": 30
},
{
"epoch": 0.39,
"grad_norm": 5.75,
"learning_rate": 4.870996167038154e-05,
"logits/chosen": -2.655568838119507,
"logits/rejected": -2.6175591945648193,
"logps/chosen": -353.34619140625,
"logps/rejected": -359.96832275390625,
"loss": 0.3363,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": 0.6268302798271179,
"rewards/margins": 1.4118897914886475,
"rewards/rejected": -0.7850595712661743,
"step": 40
},
{
"epoch": 0.39,
"eval_logits/chosen": -2.535161256790161,
"eval_logits/rejected": -2.5076351165771484,
"eval_logps/chosen": -328.5650939941406,
"eval_logps/rejected": -343.5235900878906,
"eval_loss": 0.32319265604019165,
"eval_rewards/accuracies": 0.859375,
"eval_rewards/chosen": 0.5214586853981018,
"eval_rewards/margins": 1.6311697959899902,
"eval_rewards/rejected": -1.1097110509872437,
"eval_runtime": 114.9563,
"eval_samples_per_second": 1.601,
"eval_steps_per_second": 0.104,
"step": 40
},
{
"epoch": 0.49,
"grad_norm": 4.4375,
"learning_rate": 4.7029241811087457e-05,
"logits/chosen": -2.682722806930542,
"logits/rejected": -2.627808094024658,
"logps/chosen": -382.26690673828125,
"logps/rejected": -376.25689697265625,
"loss": 0.3043,
"rewards/accuracies": 0.875,
"rewards/chosen": 0.3637928366661072,
"rewards/margins": 1.71381413936615,
"rewards/rejected": -1.3500211238861084,
"step": 50
},
{
"epoch": 0.58,
"grad_norm": 5.875,
"learning_rate": 4.471478077342798e-05,
"logits/chosen": -2.6791253089904785,
"logits/rejected": -2.641322374343872,
"logps/chosen": -344.8480529785156,
"logps/rejected": -372.0831298828125,
"loss": 0.2458,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 0.6151469349861145,
"rewards/margins": 2.2607688903808594,
"rewards/rejected": -1.6456218957901,
"step": 60
},
{
"epoch": 0.58,
"eval_logits/chosen": -2.592442274093628,
"eval_logits/rejected": -2.560177803039551,
"eval_logps/chosen": -328.04132080078125,
"eval_logps/rejected": -351.1114196777344,
"eval_loss": 0.2501268982887268,
"eval_rewards/accuracies": 0.9114583134651184,
"eval_rewards/chosen": 0.5738345980644226,
"eval_rewards/margins": 2.4423279762268066,
"eval_rewards/rejected": -1.8684934377670288,
"eval_runtime": 115.0094,
"eval_samples_per_second": 1.6,
"eval_steps_per_second": 0.104,
"step": 60
},
{
"epoch": 0.68,
"grad_norm": 4.3125,
"learning_rate": 4.1833161387527986e-05,
"logits/chosen": -2.6796765327453613,
"logits/rejected": -2.6534857749938965,
"logps/chosen": -371.1224670410156,
"logps/rejected": -358.3480529785156,
"loss": 0.2487,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -1.5859086513519287,
"rewards/margins": 2.399681329727173,
"rewards/rejected": -3.9855899810791016,
"step": 70
},
{
"epoch": 0.78,
"grad_norm": 4.4375,
"learning_rate": 3.84672825965686e-05,
"logits/chosen": -2.568530559539795,
"logits/rejected": -2.5246570110321045,
"logps/chosen": -354.64984130859375,
"logps/rejected": -360.8916931152344,
"loss": 0.2116,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -0.9132896661758423,
"rewards/margins": 2.766739845275879,
"rewards/rejected": -3.680030107498169,
"step": 80
},
{
"epoch": 0.78,
"eval_logits/chosen": -2.3427236080169678,
"eval_logits/rejected": -2.3129446506500244,
"eval_logps/chosen": -340.5351257324219,
"eval_logps/rejected": -370.7005920410156,
"eval_loss": 0.19913233816623688,
"eval_rewards/accuracies": 0.9166666865348816,
"eval_rewards/chosen": -0.6755423545837402,
"eval_rewards/margins": 3.1518704891204834,
"eval_rewards/rejected": -3.8274126052856445,
"eval_runtime": 114.9725,
"eval_samples_per_second": 1.6,
"eval_steps_per_second": 0.104,
"step": 80
},
{
"epoch": 0.87,
"grad_norm": 2.0,
"learning_rate": 3.471397460512563e-05,
"logits/chosen": -2.428431272506714,
"logits/rejected": -2.410618782043457,
"logps/chosen": -361.0326232910156,
"logps/rejected": -401.57269287109375,
"loss": 0.1841,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.7255961298942566,
"rewards/margins": 3.4665279388427734,
"rewards/rejected": -4.192124366760254,
"step": 90
},
{
"epoch": 0.97,
"grad_norm": 5.4375,
"learning_rate": 3.0681213250482255e-05,
"logits/chosen": -2.3709776401519775,
"logits/rejected": -2.353501796722412,
"logps/chosen": -331.63623046875,
"logps/rejected": -377.9563903808594,
"loss": 0.1386,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.6168140172958374,
"rewards/margins": 3.699888229370117,
"rewards/rejected": -4.316702365875244,
"step": 100
},
{
"epoch": 0.97,
"eval_logits/chosen": -2.3535118103027344,
"eval_logits/rejected": -2.3131775856018066,
"eval_logps/chosen": -330.8599548339844,
"eval_logps/rejected": -362.6181335449219,
"eval_loss": 0.20019526779651642,
"eval_rewards/accuracies": 0.9375,
"eval_rewards/chosen": 0.29197368025779724,
"eval_rewards/margins": 3.311133623123169,
"eval_rewards/rejected": -3.019160032272339,
"eval_runtime": 114.9013,
"eval_samples_per_second": 1.601,
"eval_steps_per_second": 0.104,
"step": 100
},
{
"epoch": 1.07,
"grad_norm": 0.76953125,
"learning_rate": 2.648501373438142e-05,
"logits/chosen": -2.4437708854675293,
"logits/rejected": -2.4320626258850098,
"logps/chosen": -344.7204895019531,
"logps/rejected": -410.47601318359375,
"loss": 0.0711,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.051882706582546234,
"rewards/margins": 5.166382789611816,
"rewards/rejected": -5.218265533447266,
"step": 110
},
{
"epoch": 1.17,
"grad_norm": 1.4375,
"learning_rate": 2.2246093076900144e-05,
"logits/chosen": -2.430386781692505,
"logits/rejected": -2.34106183052063,
"logps/chosen": -400.32452392578125,
"logps/rejected": -425.37457275390625,
"loss": 0.0458,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -1.0012158155441284,
"rewards/margins": 6.266473293304443,
"rewards/rejected": -7.2676897048950195,
"step": 120
},
{
"epoch": 1.17,
"eval_logits/chosen": -2.2717294692993164,
"eval_logits/rejected": -2.2290468215942383,
"eval_logps/chosen": -347.5820007324219,
"eval_logps/rejected": -391.1982727050781,
"eval_loss": 0.17477566003799438,
"eval_rewards/accuracies": 0.9479166865348816,
"eval_rewards/chosen": -1.3802350759506226,
"eval_rewards/margins": 4.496945858001709,
"eval_rewards/rejected": -5.877180576324463,
"eval_runtime": 114.9627,
"eval_samples_per_second": 1.601,
"eval_steps_per_second": 0.104,
"step": 120
},
{
"epoch": 1.26,
"grad_norm": 1.0546875,
"learning_rate": 1.8086397307570723e-05,
"logits/chosen": -2.376091957092285,
"logits/rejected": -2.3415114879608154,
"logps/chosen": -337.0244140625,
"logps/rejected": -408.39263916015625,
"loss": 0.0283,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.006544408388435841,
"rewards/margins": 6.594322204589844,
"rewards/rejected": -6.600866794586182,
"step": 130
},
{
"epoch": 1.36,
"grad_norm": 1.3515625,
"learning_rate": 1.4125593300137766e-05,
"logits/chosen": -2.364224672317505,
"logits/rejected": -2.310997724533081,
"logps/chosen": -343.6619567871094,
"logps/rejected": -398.0953674316406,
"loss": 0.0426,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": 0.7415364384651184,
"rewards/margins": 6.200386047363281,
"rewards/rejected": -5.4588494300842285,
"step": 140
},
{
"epoch": 1.36,
"eval_logits/chosen": -2.240306854248047,
"eval_logits/rejected": -2.195923328399658,
"eval_logps/chosen": -334.4142761230469,
"eval_logps/rejected": -375.5160217285156,
"eval_loss": 0.17553412914276123,
"eval_rewards/accuracies": 0.9375,
"eval_rewards/chosen": -0.06346017122268677,
"eval_rewards/margins": 4.2454915046691895,
"eval_rewards/rejected": -4.3089518547058105,
"eval_runtime": 115.021,
"eval_samples_per_second": 1.6,
"eval_steps_per_second": 0.104,
"step": 140
},
{
"epoch": 1.46,
"grad_norm": 0.8515625,
"learning_rate": 1.0477626174477404e-05,
"logits/chosen": -2.3424103260040283,
"logits/rejected": -2.2978971004486084,
"logps/chosen": -347.8079528808594,
"logps/rejected": -375.98260498046875,
"loss": 0.0508,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 0.3227364122867584,
"rewards/margins": 5.873551845550537,
"rewards/rejected": -5.550815582275391,
"step": 150
},
{
"epoch": 1.55,
"grad_norm": 1.96875,
"learning_rate": 7.247441302957858e-06,
"logits/chosen": -2.3495125770568848,
"logits/rejected": -2.307555675506592,
"logps/chosen": -331.29718017578125,
"logps/rejected": -415.452392578125,
"loss": 0.029,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": 0.16269809007644653,
"rewards/margins": 6.475255012512207,
"rewards/rejected": -6.312556266784668,
"step": 160
},
{
"epoch": 1.55,
"eval_logits/chosen": -2.232851266860962,
"eval_logits/rejected": -2.1893069744110107,
"eval_logps/chosen": -341.7696533203125,
"eval_logps/rejected": -387.3076171875,
"eval_loss": 0.16915130615234375,
"eval_rewards/accuracies": 0.9375,
"eval_rewards/chosen": -0.7989979386329651,
"eval_rewards/margins": 4.689115524291992,
"eval_rewards/rejected": -5.4881134033203125,
"eval_runtime": 114.9918,
"eval_samples_per_second": 1.6,
"eval_steps_per_second": 0.104,
"step": 160
},
{
"epoch": 1.65,
"grad_norm": 0.345703125,
"learning_rate": 4.527965223149957e-06,
"logits/chosen": -2.407200336456299,
"logits/rejected": -2.3430123329162598,
"logps/chosen": -387.9550476074219,
"logps/rejected": -445.9234313964844,
"loss": 0.0175,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": 0.19500017166137695,
"rewards/margins": 7.179248809814453,
"rewards/rejected": -6.984248161315918,
"step": 170
},
{
"epoch": 1.75,
"grad_norm": 3.015625,
"learning_rate": 2.397432310532133e-06,
"logits/chosen": -2.3570303916931152,
"logits/rejected": -2.300320863723755,
"logps/chosen": -367.35577392578125,
"logps/rejected": -424.9029235839844,
"loss": 0.0676,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": 0.1322220414876938,
"rewards/margins": 6.755249977111816,
"rewards/rejected": -6.623027801513672,
"step": 180
},
{
"epoch": 1.75,
"eval_logits/chosen": -2.2314395904541016,
"eval_logits/rejected": -2.1864326000213623,
"eval_logps/chosen": -340.7237854003906,
"eval_logps/rejected": -386.9397277832031,
"eval_loss": 0.16764594614505768,
"eval_rewards/accuracies": 0.9375,
"eval_rewards/chosen": -0.6944115161895752,
"eval_rewards/margins": 4.756911754608154,
"eval_rewards/rejected": -5.45132303237915,
"eval_runtime": 114.865,
"eval_samples_per_second": 1.602,
"eval_steps_per_second": 0.104,
"step": 180
},
{
"epoch": 1.84,
"grad_norm": 0.56640625,
"learning_rate": 9.171341179489034e-07,
"logits/chosen": -2.3660504817962646,
"logits/rejected": -2.2959539890289307,
"logps/chosen": -335.60052490234375,
"logps/rejected": -383.60040283203125,
"loss": 0.0178,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.19160650670528412,
"rewards/margins": 6.720318794250488,
"rewards/rejected": -6.528712272644043,
"step": 190
},
{
"epoch": 1.94,
"grad_norm": 3.9375,
"learning_rate": 1.296561292287446e-07,
"logits/chosen": -2.3115243911743164,
"logits/rejected": -2.281430959701538,
"logps/chosen": -323.0104675292969,
"logps/rejected": -385.94757080078125,
"loss": 0.0517,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.006203270051628351,
"rewards/margins": 6.517538547515869,
"rewards/rejected": -6.523741722106934,
"step": 200
},
{
"epoch": 1.94,
"eval_logits/chosen": -2.231421709060669,
"eval_logits/rejected": -2.186391830444336,
"eval_logps/chosen": -341.20733642578125,
"eval_logps/rejected": -387.5655517578125,
"eval_loss": 0.16659200191497803,
"eval_rewards/accuracies": 0.9375,
"eval_rewards/chosen": -0.7427660822868347,
"eval_rewards/margins": 4.771137237548828,
"eval_rewards/rejected": -5.513904094696045,
"eval_runtime": 114.34,
"eval_samples_per_second": 1.609,
"eval_steps_per_second": 0.105,
"step": 200
},
{
"epoch": 2.0,
"step": 206,
"total_flos": 0.0,
"train_loss": 0.1882365908726905,
"train_runtime": 5068.0756,
"train_samples_per_second": 0.65,
"train_steps_per_second": 0.041
}
],
"logging_steps": 10,
"max_steps": 206,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}