|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 100, |
|
"global_step": 156, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.125e-07, |
|
"logits/chosen": -2.58028244972229, |
|
"logits/rejected": -2.5264699459075928, |
|
"logps/chosen": -275.8560791015625, |
|
"logps/rejected": -243.97787475585938, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -2.599268913269043, |
|
"logits/rejected": -2.620181083679199, |
|
"logps/chosen": -229.47467041015625, |
|
"logps/rejected": -188.52099609375, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.4756944477558136, |
|
"rewards/chosen": -0.005837044212967157, |
|
"rewards/margins": -1.28112105812761e-05, |
|
"rewards/rejected": -0.005824232939630747, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.989935734988098e-06, |
|
"logits/chosen": -2.6293463706970215, |
|
"logits/rejected": -2.631697654724121, |
|
"logps/chosen": -233.75509643554688, |
|
"logps/rejected": -196.51675415039062, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.056370873004198074, |
|
"rewards/margins": 0.016503650695085526, |
|
"rewards/rejected": -0.0728745236992836, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"logits/chosen": -2.6340599060058594, |
|
"logits/rejected": -2.6145126819610596, |
|
"logps/chosen": -244.6521453857422, |
|
"logps/rejected": -211.09170532226562, |
|
"loss": 0.6767, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.04168517515063286, |
|
"rewards/margins": 0.03752576559782028, |
|
"rewards/rejected": -0.07921093702316284, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.646121984004666e-06, |
|
"logits/chosen": -2.5753064155578613, |
|
"logits/rejected": -2.546308994293213, |
|
"logps/chosen": -235.4735870361328, |
|
"logps/rejected": -216.3767852783203, |
|
"loss": 0.6724, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.20934326946735382, |
|
"rewards/margins": 0.032163362950086594, |
|
"rewards/rejected": -0.24150662124156952, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3069871595684795e-06, |
|
"logits/chosen": -2.579021692276001, |
|
"logits/rejected": -2.5481619834899902, |
|
"logps/chosen": -238.4982452392578, |
|
"logps/rejected": -210.54537963867188, |
|
"loss": 0.6609, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.13248375058174133, |
|
"rewards/margins": 0.07907234132289886, |
|
"rewards/rejected": -0.211556077003479, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.8772424536302565e-06, |
|
"logits/chosen": -2.5088608264923096, |
|
"logits/rejected": -2.4528446197509766, |
|
"logps/chosen": -244.6417236328125, |
|
"logps/rejected": -219.36660766601562, |
|
"loss": 0.6588, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.10884495079517365, |
|
"rewards/margins": 0.0814167931675911, |
|
"rewards/rejected": -0.19026173651218414, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3784370602033572e-06, |
|
"logits/chosen": -2.4629616737365723, |
|
"logits/rejected": -2.438931941986084, |
|
"logps/chosen": -238.8616943359375, |
|
"logps/rejected": -205.7681121826172, |
|
"loss": 0.6535, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.17304827272891998, |
|
"rewards/margins": 0.10028227418661118, |
|
"rewards/rejected": -0.27333056926727295, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.835583164544139e-06, |
|
"logits/chosen": -2.4710023403167725, |
|
"logits/rejected": -2.4444005489349365, |
|
"logps/chosen": -239.69384765625, |
|
"logps/rejected": -218.24093627929688, |
|
"loss": 0.6514, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -0.20169004797935486, |
|
"rewards/margins": 0.10011257976293564, |
|
"rewards/rejected": -0.3018026351928711, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2759017277414165e-06, |
|
"logits/chosen": -2.495241641998291, |
|
"logits/rejected": -2.4798271656036377, |
|
"logps/chosen": -267.980224609375, |
|
"logps/rejected": -255.43936157226562, |
|
"loss": 0.6519, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.25710397958755493, |
|
"rewards/margins": 0.0879761129617691, |
|
"rewards/rejected": -0.34508010745048523, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": -2.4803011417388916, |
|
"logits/rejected": -2.4598233699798584, |
|
"logps/chosen": -257.54901123046875, |
|
"logps/rejected": -227.8817596435547, |
|
"loss": 0.6483, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.22189533710479736, |
|
"rewards/margins": 0.143667072057724, |
|
"rewards/rejected": -0.365562379360199, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_logits/chosen": -2.4257185459136963, |
|
"eval_logits/rejected": -2.339790105819702, |
|
"eval_logps/chosen": -308.5958557128906, |
|
"eval_logps/rejected": -300.188232421875, |
|
"eval_loss": 0.6988382339477539, |
|
"eval_rewards/accuracies": 0.4779999852180481, |
|
"eval_rewards/chosen": -0.10537321120500565, |
|
"eval_rewards/margins": 0.0014481162652373314, |
|
"eval_rewards/rejected": -0.10682134330272675, |
|
"eval_runtime": 383.862, |
|
"eval_samples_per_second": 5.21, |
|
"eval_steps_per_second": 0.651, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.217751806485235e-06, |
|
"logits/chosen": -2.4746994972229004, |
|
"logits/rejected": -2.4346601963043213, |
|
"logps/chosen": -272.1098327636719, |
|
"logps/rejected": -237.3122100830078, |
|
"loss": 0.6401, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.21971949934959412, |
|
"rewards/margins": 0.1589251309633255, |
|
"rewards/rejected": -0.3786446452140808, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.723433775328385e-07, |
|
"logits/chosen": -2.441807270050049, |
|
"logits/rejected": -2.439669609069824, |
|
"logps/chosen": -260.669189453125, |
|
"logps/rejected": -254.50344848632812, |
|
"loss": 0.6552, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.26529985666275024, |
|
"rewards/margins": 0.10932101309299469, |
|
"rewards/rejected": -0.3746209144592285, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1356686569674344e-07, |
|
"logits/chosen": -2.4392101764678955, |
|
"logits/rejected": -2.375783920288086, |
|
"logps/chosen": -241.207275390625, |
|
"logps/rejected": -224.5260772705078, |
|
"loss": 0.6479, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.24242636561393738, |
|
"rewards/margins": 0.09688840806484222, |
|
"rewards/rejected": -0.3393147885799408, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.59412823400657e-07, |
|
"logits/chosen": -2.3803234100341797, |
|
"logits/rejected": -2.3904504776000977, |
|
"logps/chosen": -245.4437255859375, |
|
"logps/rejected": -237.4853057861328, |
|
"loss": 0.6359, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -0.213097482919693, |
|
"rewards/margins": 0.13748633861541748, |
|
"rewards/rejected": -0.35058385133743286, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.262559558016325e-08, |
|
"logits/chosen": -2.4386649131774902, |
|
"logits/rejected": -2.3991496562957764, |
|
"logps/chosen": -235.7268524169922, |
|
"logps/rejected": -208.8386688232422, |
|
"loss": 0.65, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.1908179074525833, |
|
"rewards/margins": 0.11969475448131561, |
|
"rewards/rejected": -0.3105126619338989, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 156, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6591962063923861, |
|
"train_runtime": 7161.0752, |
|
"train_samples_per_second": 2.793, |
|
"train_steps_per_second": 0.022 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 156, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|