|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9991836734693877, |
|
"eval_steps": 100, |
|
"global_step": 153, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.125e-07, |
|
"logits/chosen": -2.6383626461029053, |
|
"logits/rejected": -2.6590394973754883, |
|
"logps/chosen": -263.8804626464844, |
|
"logps/rejected": -342.5028381347656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -2.692108392715454, |
|
"logits/rejected": -2.6769356727600098, |
|
"logps/chosen": -251.55091857910156, |
|
"logps/rejected": -279.13623046875, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": 0.006078645121306181, |
|
"rewards/margins": 0.001292299828492105, |
|
"rewards/rejected": 0.004786345642060041, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.989490450759331e-06, |
|
"logits/chosen": -2.7136893272399902, |
|
"logits/rejected": -2.6634609699249268, |
|
"logps/chosen": -246.88778686523438, |
|
"logps/rejected": -270.17083740234375, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.009861673228442669, |
|
"rewards/margins": 0.002566133625805378, |
|
"rewards/rejected": 0.007295539136976004, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.872270441827174e-06, |
|
"logits/chosen": -2.7332160472869873, |
|
"logits/rejected": -2.6656548976898193, |
|
"logps/chosen": -276.451416015625, |
|
"logps/rejected": -291.919189453125, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.0339818000793457, |
|
"rewards/margins": 0.020258868113160133, |
|
"rewards/rejected": -0.05424066632986069, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.630851211353007e-06, |
|
"logits/chosen": -2.623366117477417, |
|
"logits/rejected": -2.5993411540985107, |
|
"logps/chosen": -253.49966430664062, |
|
"logps/rejected": -300.6990966796875, |
|
"loss": 0.6794, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -0.047824542969465256, |
|
"rewards/margins": 0.032587505877017975, |
|
"rewards/rejected": -0.08041204512119293, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.277872161641682e-06, |
|
"logits/chosen": -2.695284366607666, |
|
"logits/rejected": -2.6641030311584473, |
|
"logps/chosen": -250.3493194580078, |
|
"logps/rejected": -272.2112731933594, |
|
"loss": 0.6766, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.006258129142224789, |
|
"rewards/margins": 0.02742757275700569, |
|
"rewards/rejected": -0.033685702830553055, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.831813362428005e-06, |
|
"logits/chosen": -2.6273930072784424, |
|
"logits/rejected": -2.5842039585113525, |
|
"logps/chosen": -262.25286865234375, |
|
"logps/rejected": -298.41510009765625, |
|
"loss": 0.667, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.15569987893104553, |
|
"rewards/margins": 0.06216084957122803, |
|
"rewards/rejected": -0.21786072850227356, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.3160280345958614e-06, |
|
"logits/chosen": -2.6327927112579346, |
|
"logits/rejected": -2.5868000984191895, |
|
"logps/chosen": -278.89703369140625, |
|
"logps/rejected": -314.3589172363281, |
|
"loss": 0.6649, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.1321047842502594, |
|
"rewards/margins": 0.05293331667780876, |
|
"rewards/rejected": -0.18503807485103607, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.757519902117886e-06, |
|
"logits/chosen": -2.6241769790649414, |
|
"logits/rejected": -2.61041522026062, |
|
"logps/chosen": -260.6992492675781, |
|
"logps/rejected": -272.4108581542969, |
|
"loss": 0.6678, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -0.16336451470851898, |
|
"rewards/margins": 0.07086249440908432, |
|
"rewards/rejected": -0.2342270165681839, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.185529423440807e-06, |
|
"logits/chosen": -2.6284656524658203, |
|
"logits/rejected": -2.6176791191101074, |
|
"logps/chosen": -282.1419372558594, |
|
"logps/rejected": -331.07354736328125, |
|
"loss": 0.6629, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.29410403966903687, |
|
"rewards/margins": 0.06631916761398315, |
|
"rewards/rejected": -0.3604232370853424, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6300029195778454e-06, |
|
"logits/chosen": -2.631556749343872, |
|
"logits/rejected": -2.61484956741333, |
|
"logps/chosen": -280.27362060546875, |
|
"logps/rejected": -321.37884521484375, |
|
"loss": 0.6615, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.21054717898368835, |
|
"rewards/margins": 0.08309625834226608, |
|
"rewards/rejected": -0.2936434745788574, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_logits/chosen": -2.6791629791259766, |
|
"eval_logits/rejected": -2.5814735889434814, |
|
"eval_logps/chosen": -310.7413024902344, |
|
"eval_logps/rejected": -294.6734619140625, |
|
"eval_loss": 0.6692679524421692, |
|
"eval_rewards/accuracies": 0.5519999861717224, |
|
"eval_rewards/chosen": -0.1884314864873886, |
|
"eval_rewards/margins": 0.05432567000389099, |
|
"eval_rewards/rejected": -0.2427571415901184, |
|
"eval_runtime": 395.8034, |
|
"eval_samples_per_second": 5.053, |
|
"eval_steps_per_second": 0.632, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1200247470632394e-06, |
|
"logits/chosen": -2.6365230083465576, |
|
"logits/rejected": -2.636570453643799, |
|
"logps/chosen": -262.6191711425781, |
|
"logps/rejected": -310.3700256347656, |
|
"loss": 0.6646, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": -0.22685377299785614, |
|
"rewards/margins": 0.07261677831411362, |
|
"rewards/rejected": -0.29947060346603394, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.822945986946386e-07, |
|
"logits/chosen": -2.6256182193756104, |
|
"logits/rejected": -2.5980124473571777, |
|
"logps/chosen": -246.5704803466797, |
|
"logps/rejected": -282.092529296875, |
|
"loss": 0.6621, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.20391853153705597, |
|
"rewards/margins": 0.08593297004699707, |
|
"rewards/rejected": -0.28985148668289185, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.397296523427807e-07, |
|
"logits/chosen": -2.584958553314209, |
|
"logits/rejected": -2.562863349914551, |
|
"logps/chosen": -262.963134765625, |
|
"logps/rejected": -306.772216796875, |
|
"loss": 0.6598, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.21135267615318298, |
|
"rewards/margins": 0.08709158003330231, |
|
"rewards/rejected": -0.2984442710876465, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.102647517397798e-07, |
|
"logits/chosen": -2.6206483840942383, |
|
"logits/rejected": -2.604473352432251, |
|
"logps/chosen": -272.1588439941406, |
|
"logps/rejected": -318.1951599121094, |
|
"loss": 0.6636, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.21283432841300964, |
|
"rewards/margins": 0.10337638854980469, |
|
"rewards/rejected": -0.3162107467651367, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.9134352763748345e-09, |
|
"logits/chosen": -2.6338722705841064, |
|
"logits/rejected": -2.62086820602417, |
|
"logps/chosen": -296.335205078125, |
|
"logps/rejected": -331.77777099609375, |
|
"loss": 0.6594, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.23208002746105194, |
|
"rewards/margins": 0.08073471486568451, |
|
"rewards/rejected": -0.31281474232673645, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 153, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6705375785141988, |
|
"train_runtime": 6538.0645, |
|
"train_samples_per_second": 2.998, |
|
"train_steps_per_second": 0.023 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 153, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|