|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 20, |
|
"global_step": 206, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 8.9375, |
|
"learning_rate": 2.3809523809523808e-06, |
|
"logits/chosen": -2.7700600624084473, |
|
"logits/rejected": -2.8606302738189697, |
|
"logps/chosen": -421.64996337890625, |
|
"logps/rejected": -531.4378662109375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 7.375, |
|
"learning_rate": 2.380952380952381e-05, |
|
"logits/chosen": -2.7684054374694824, |
|
"logits/rejected": -2.7337145805358887, |
|
"logps/chosen": -333.7870178222656, |
|
"logps/rejected": -312.4859313964844, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.5277777910232544, |
|
"rewards/chosen": 0.030088074505329132, |
|
"rewards/margins": 0.01666567102074623, |
|
"rewards/rejected": 0.013422403484582901, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 6.125, |
|
"learning_rate": 4.761904761904762e-05, |
|
"logits/chosen": -2.8010494709014893, |
|
"logits/rejected": -2.79127836227417, |
|
"logps/chosen": -331.8260498046875, |
|
"logps/rejected": -332.01409912109375, |
|
"loss": 0.6028, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 0.41256317496299744, |
|
"rewards/margins": 0.21184520423412323, |
|
"rewards/rejected": 0.2007180005311966, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_logits/chosen": -2.6401147842407227, |
|
"eval_logits/rejected": -2.614283800125122, |
|
"eval_logps/chosen": -324.9909973144531, |
|
"eval_logps/rejected": -327.9555969238281, |
|
"eval_loss": 0.5285959243774414, |
|
"eval_rewards/accuracies": 0.8125, |
|
"eval_rewards/chosen": 0.878866970539093, |
|
"eval_rewards/margins": 0.43177998065948486, |
|
"eval_rewards/rejected": 0.44708704948425293, |
|
"eval_runtime": 114.9886, |
|
"eval_samples_per_second": 1.6, |
|
"eval_steps_per_second": 0.104, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 4.84375, |
|
"learning_rate": 4.9708589101037306e-05, |
|
"logits/chosen": -2.6763195991516113, |
|
"logits/rejected": -2.651015043258667, |
|
"logps/chosen": -356.8539733886719, |
|
"logps/rejected": -363.6021423339844, |
|
"loss": 0.4643, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.943565845489502, |
|
"rewards/margins": 0.708370566368103, |
|
"rewards/rejected": 0.23519524931907654, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 5.75, |
|
"learning_rate": 4.870996167038154e-05, |
|
"logits/chosen": -2.655568838119507, |
|
"logits/rejected": -2.6175591945648193, |
|
"logps/chosen": -353.34619140625, |
|
"logps/rejected": -359.96832275390625, |
|
"loss": 0.3363, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.6268302798271179, |
|
"rewards/margins": 1.4118897914886475, |
|
"rewards/rejected": -0.7850595712661743, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_logits/chosen": -2.535161256790161, |
|
"eval_logits/rejected": -2.5076351165771484, |
|
"eval_logps/chosen": -328.5650939941406, |
|
"eval_logps/rejected": -343.5235900878906, |
|
"eval_loss": 0.32319265604019165, |
|
"eval_rewards/accuracies": 0.859375, |
|
"eval_rewards/chosen": 0.5214586853981018, |
|
"eval_rewards/margins": 1.6311697959899902, |
|
"eval_rewards/rejected": -1.1097110509872437, |
|
"eval_runtime": 114.9563, |
|
"eval_samples_per_second": 1.601, |
|
"eval_steps_per_second": 0.104, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 4.4375, |
|
"learning_rate": 4.7029241811087457e-05, |
|
"logits/chosen": -2.682722806930542, |
|
"logits/rejected": -2.627808094024658, |
|
"logps/chosen": -382.26690673828125, |
|
"logps/rejected": -376.25689697265625, |
|
"loss": 0.3043, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.3637928366661072, |
|
"rewards/margins": 1.71381413936615, |
|
"rewards/rejected": -1.3500211238861084, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 5.875, |
|
"learning_rate": 4.471478077342798e-05, |
|
"logits/chosen": -2.6791253089904785, |
|
"logits/rejected": -2.641322374343872, |
|
"logps/chosen": -344.8480529785156, |
|
"logps/rejected": -372.0831298828125, |
|
"loss": 0.2458, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.6151469349861145, |
|
"rewards/margins": 2.2607688903808594, |
|
"rewards/rejected": -1.6456218957901, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": -2.592442274093628, |
|
"eval_logits/rejected": -2.560177803039551, |
|
"eval_logps/chosen": -328.04132080078125, |
|
"eval_logps/rejected": -351.1114196777344, |
|
"eval_loss": 0.2501268982887268, |
|
"eval_rewards/accuracies": 0.9114583134651184, |
|
"eval_rewards/chosen": 0.5738345980644226, |
|
"eval_rewards/margins": 2.4423279762268066, |
|
"eval_rewards/rejected": -1.8684934377670288, |
|
"eval_runtime": 115.0094, |
|
"eval_samples_per_second": 1.6, |
|
"eval_steps_per_second": 0.104, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 4.3125, |
|
"learning_rate": 4.1833161387527986e-05, |
|
"logits/chosen": -2.6796765327453613, |
|
"logits/rejected": -2.6534857749938965, |
|
"logps/chosen": -371.1224670410156, |
|
"logps/rejected": -358.3480529785156, |
|
"loss": 0.2487, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.5859086513519287, |
|
"rewards/margins": 2.399681329727173, |
|
"rewards/rejected": -3.9855899810791016, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 4.4375, |
|
"learning_rate": 3.84672825965686e-05, |
|
"logits/chosen": -2.568530559539795, |
|
"logits/rejected": -2.5246570110321045, |
|
"logps/chosen": -354.64984130859375, |
|
"logps/rejected": -360.8916931152344, |
|
"loss": 0.2116, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.9132896661758423, |
|
"rewards/margins": 2.766739845275879, |
|
"rewards/rejected": -3.680030107498169, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -2.3427236080169678, |
|
"eval_logits/rejected": -2.3129446506500244, |
|
"eval_logps/chosen": -340.5351257324219, |
|
"eval_logps/rejected": -370.7005920410156, |
|
"eval_loss": 0.19913233816623688, |
|
"eval_rewards/accuracies": 0.9166666865348816, |
|
"eval_rewards/chosen": -0.6755423545837402, |
|
"eval_rewards/margins": 3.1518704891204834, |
|
"eval_rewards/rejected": -3.8274126052856445, |
|
"eval_runtime": 114.9725, |
|
"eval_samples_per_second": 1.6, |
|
"eval_steps_per_second": 0.104, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 2.0, |
|
"learning_rate": 3.471397460512563e-05, |
|
"logits/chosen": -2.428431272506714, |
|
"logits/rejected": -2.410618782043457, |
|
"logps/chosen": -361.0326232910156, |
|
"logps/rejected": -401.57269287109375, |
|
"loss": 0.1841, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.7255961298942566, |
|
"rewards/margins": 3.4665279388427734, |
|
"rewards/rejected": -4.192124366760254, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 5.4375, |
|
"learning_rate": 3.0681213250482255e-05, |
|
"logits/chosen": -2.3709776401519775, |
|
"logits/rejected": -2.353501796722412, |
|
"logps/chosen": -331.63623046875, |
|
"logps/rejected": -377.9563903808594, |
|
"loss": 0.1386, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.6168140172958374, |
|
"rewards/margins": 3.699888229370117, |
|
"rewards/rejected": -4.316702365875244, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_logits/chosen": -2.3535118103027344, |
|
"eval_logits/rejected": -2.3131775856018066, |
|
"eval_logps/chosen": -330.8599548339844, |
|
"eval_logps/rejected": -362.6181335449219, |
|
"eval_loss": 0.20019526779651642, |
|
"eval_rewards/accuracies": 0.9375, |
|
"eval_rewards/chosen": 0.29197368025779724, |
|
"eval_rewards/margins": 3.311133623123169, |
|
"eval_rewards/rejected": -3.019160032272339, |
|
"eval_runtime": 114.9013, |
|
"eval_samples_per_second": 1.601, |
|
"eval_steps_per_second": 0.104, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.76953125, |
|
"learning_rate": 2.648501373438142e-05, |
|
"logits/chosen": -2.4437708854675293, |
|
"logits/rejected": -2.4320626258850098, |
|
"logps/chosen": -344.7204895019531, |
|
"logps/rejected": -410.47601318359375, |
|
"loss": 0.0711, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.051882706582546234, |
|
"rewards/margins": 5.166382789611816, |
|
"rewards/rejected": -5.218265533447266, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 1.4375, |
|
"learning_rate": 2.2246093076900144e-05, |
|
"logits/chosen": -2.430386781692505, |
|
"logits/rejected": -2.34106183052063, |
|
"logps/chosen": -400.32452392578125, |
|
"logps/rejected": -425.37457275390625, |
|
"loss": 0.0458, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.0012158155441284, |
|
"rewards/margins": 6.266473293304443, |
|
"rewards/rejected": -7.2676897048950195, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_logits/chosen": -2.2717294692993164, |
|
"eval_logits/rejected": -2.2290468215942383, |
|
"eval_logps/chosen": -347.5820007324219, |
|
"eval_logps/rejected": -391.1982727050781, |
|
"eval_loss": 0.17477566003799438, |
|
"eval_rewards/accuracies": 0.9479166865348816, |
|
"eval_rewards/chosen": -1.3802350759506226, |
|
"eval_rewards/margins": 4.496945858001709, |
|
"eval_rewards/rejected": -5.877180576324463, |
|
"eval_runtime": 114.9627, |
|
"eval_samples_per_second": 1.601, |
|
"eval_steps_per_second": 0.104, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 1.8086397307570723e-05, |
|
"logits/chosen": -2.376091957092285, |
|
"logits/rejected": -2.3415114879608154, |
|
"logps/chosen": -337.0244140625, |
|
"logps/rejected": -408.39263916015625, |
|
"loss": 0.0283, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.006544408388435841, |
|
"rewards/margins": 6.594322204589844, |
|
"rewards/rejected": -6.600866794586182, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 1.4125593300137766e-05, |
|
"logits/chosen": -2.364224672317505, |
|
"logits/rejected": -2.310997724533081, |
|
"logps/chosen": -343.6619567871094, |
|
"logps/rejected": -398.0953674316406, |
|
"loss": 0.0426, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": 0.7415364384651184, |
|
"rewards/margins": 6.200386047363281, |
|
"rewards/rejected": -5.4588494300842285, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_logits/chosen": -2.240306854248047, |
|
"eval_logits/rejected": -2.195923328399658, |
|
"eval_logps/chosen": -334.4142761230469, |
|
"eval_logps/rejected": -375.5160217285156, |
|
"eval_loss": 0.17553412914276123, |
|
"eval_rewards/accuracies": 0.9375, |
|
"eval_rewards/chosen": -0.06346017122268677, |
|
"eval_rewards/margins": 4.2454915046691895, |
|
"eval_rewards/rejected": -4.3089518547058105, |
|
"eval_runtime": 115.021, |
|
"eval_samples_per_second": 1.6, |
|
"eval_steps_per_second": 0.104, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 0.8515625, |
|
"learning_rate": 1.0477626174477404e-05, |
|
"logits/chosen": -2.3424103260040283, |
|
"logits/rejected": -2.2978971004486084, |
|
"logps/chosen": -347.8079528808594, |
|
"logps/rejected": -375.98260498046875, |
|
"loss": 0.0508, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.3227364122867584, |
|
"rewards/margins": 5.873551845550537, |
|
"rewards/rejected": -5.550815582275391, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 1.96875, |
|
"learning_rate": 7.247441302957858e-06, |
|
"logits/chosen": -2.3495125770568848, |
|
"logits/rejected": -2.307555675506592, |
|
"logps/chosen": -331.29718017578125, |
|
"logps/rejected": -415.452392578125, |
|
"loss": 0.029, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": 0.16269809007644653, |
|
"rewards/margins": 6.475255012512207, |
|
"rewards/rejected": -6.312556266784668, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_logits/chosen": -2.232851266860962, |
|
"eval_logits/rejected": -2.1893069744110107, |
|
"eval_logps/chosen": -341.7696533203125, |
|
"eval_logps/rejected": -387.3076171875, |
|
"eval_loss": 0.16915130615234375, |
|
"eval_rewards/accuracies": 0.9375, |
|
"eval_rewards/chosen": -0.7989979386329651, |
|
"eval_rewards/margins": 4.689115524291992, |
|
"eval_rewards/rejected": -5.4881134033203125, |
|
"eval_runtime": 114.9918, |
|
"eval_samples_per_second": 1.6, |
|
"eval_steps_per_second": 0.104, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 4.527965223149957e-06, |
|
"logits/chosen": -2.407200336456299, |
|
"logits/rejected": -2.3430123329162598, |
|
"logps/chosen": -387.9550476074219, |
|
"logps/rejected": -445.9234313964844, |
|
"loss": 0.0175, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": 0.19500017166137695, |
|
"rewards/margins": 7.179248809814453, |
|
"rewards/rejected": -6.984248161315918, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 3.015625, |
|
"learning_rate": 2.397432310532133e-06, |
|
"logits/chosen": -2.3570303916931152, |
|
"logits/rejected": -2.300320863723755, |
|
"logps/chosen": -367.35577392578125, |
|
"logps/rejected": -424.9029235839844, |
|
"loss": 0.0676, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": 0.1322220414876938, |
|
"rewards/margins": 6.755249977111816, |
|
"rewards/rejected": -6.623027801513672, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_logits/chosen": -2.2314395904541016, |
|
"eval_logits/rejected": -2.1864326000213623, |
|
"eval_logps/chosen": -340.7237854003906, |
|
"eval_logps/rejected": -386.9397277832031, |
|
"eval_loss": 0.16764594614505768, |
|
"eval_rewards/accuracies": 0.9375, |
|
"eval_rewards/chosen": -0.6944115161895752, |
|
"eval_rewards/margins": 4.756911754608154, |
|
"eval_rewards/rejected": -5.45132303237915, |
|
"eval_runtime": 114.865, |
|
"eval_samples_per_second": 1.602, |
|
"eval_steps_per_second": 0.104, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 0.56640625, |
|
"learning_rate": 9.171341179489034e-07, |
|
"logits/chosen": -2.3660504817962646, |
|
"logits/rejected": -2.2959539890289307, |
|
"logps/chosen": -335.60052490234375, |
|
"logps/rejected": -383.60040283203125, |
|
"loss": 0.0178, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19160650670528412, |
|
"rewards/margins": 6.720318794250488, |
|
"rewards/rejected": -6.528712272644043, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 3.9375, |
|
"learning_rate": 1.296561292287446e-07, |
|
"logits/chosen": -2.3115243911743164, |
|
"logits/rejected": -2.281430959701538, |
|
"logps/chosen": -323.0104675292969, |
|
"logps/rejected": -385.94757080078125, |
|
"loss": 0.0517, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.006203270051628351, |
|
"rewards/margins": 6.517538547515869, |
|
"rewards/rejected": -6.523741722106934, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_logits/chosen": -2.231421709060669, |
|
"eval_logits/rejected": -2.186391830444336, |
|
"eval_logps/chosen": -341.20733642578125, |
|
"eval_logps/rejected": -387.5655517578125, |
|
"eval_loss": 0.16659200191497803, |
|
"eval_rewards/accuracies": 0.9375, |
|
"eval_rewards/chosen": -0.7427660822868347, |
|
"eval_rewards/margins": 4.771137237548828, |
|
"eval_rewards/rejected": -5.513904094696045, |
|
"eval_runtime": 114.34, |
|
"eval_samples_per_second": 1.609, |
|
"eval_steps_per_second": 0.105, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 206, |
|
"total_flos": 0.0, |
|
"train_loss": 0.1882365908726905, |
|
"train_runtime": 5068.0756, |
|
"train_samples_per_second": 0.65, |
|
"train_steps_per_second": 0.041 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 206, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|