{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9991836734693877, "eval_steps": 100, "global_step": 153, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.125e-07, "logits/chosen": -2.6383626461029053, "logits/rejected": -2.6590394973754883, "logps/chosen": -263.8804626464844, "logps/rejected": -342.5028381347656, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.07, "learning_rate": 3.125e-06, "logits/chosen": -2.692108392715454, "logits/rejected": -2.6769356727600098, "logps/chosen": -251.55091857910156, "logps/rejected": -279.13623046875, "loss": 0.693, "rewards/accuracies": 0.4444444477558136, "rewards/chosen": 0.006078645121306181, "rewards/margins": 0.001292299828492105, "rewards/rejected": 0.004786345642060041, "step": 10 }, { "epoch": 0.13, "learning_rate": 4.989490450759331e-06, "logits/chosen": -2.7136893272399902, "logits/rejected": -2.6634609699249268, "logps/chosen": -246.88778686523438, "logps/rejected": -270.17083740234375, "loss": 0.6909, "rewards/accuracies": 0.5625, "rewards/chosen": 0.009861673228442669, "rewards/margins": 0.002566133625805378, "rewards/rejected": 0.007295539136976004, "step": 20 }, { "epoch": 0.2, "learning_rate": 4.872270441827174e-06, "logits/chosen": -2.7332160472869873, "logits/rejected": -2.6656548976898193, "logps/chosen": -276.451416015625, "logps/rejected": -291.919189453125, "loss": 0.6845, "rewards/accuracies": 0.578125, "rewards/chosen": -0.0339818000793457, "rewards/margins": 0.020258868113160133, "rewards/rejected": -0.05424066632986069, "step": 30 }, { "epoch": 0.26, "learning_rate": 4.630851211353007e-06, "logits/chosen": -2.623366117477417, "logits/rejected": -2.5993411540985107, "logps/chosen": -253.49966430664062, "logps/rejected": -300.6990966796875, "loss": 0.6794, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.047824542969465256, "rewards/margins": 0.032587505877017975, "rewards/rejected": -0.08041204512119293, "step": 40 }, { "epoch": 0.33, "learning_rate": 4.277872161641682e-06, "logits/chosen": -2.695284366607666, "logits/rejected": -2.6641030311584473, "logps/chosen": -250.3493194580078, "logps/rejected": -272.2112731933594, "loss": 0.6766, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.006258129142224789, "rewards/margins": 0.02742757275700569, "rewards/rejected": -0.033685702830553055, "step": 50 }, { "epoch": 0.39, "learning_rate": 3.831813362428005e-06, "logits/chosen": -2.6273930072784424, "logits/rejected": -2.5842039585113525, "logps/chosen": -262.25286865234375, "logps/rejected": -298.41510009765625, "loss": 0.667, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.15569987893104553, "rewards/margins": 0.06216084957122803, "rewards/rejected": -0.21786072850227356, "step": 60 }, { "epoch": 0.46, "learning_rate": 3.3160280345958614e-06, "logits/chosen": -2.6327927112579346, "logits/rejected": -2.5868000984191895, "logps/chosen": -278.89703369140625, "logps/rejected": -314.3589172363281, "loss": 0.6649, "rewards/accuracies": 0.609375, "rewards/chosen": -0.1321047842502594, "rewards/margins": 0.05293331667780876, "rewards/rejected": -0.18503807485103607, "step": 70 }, { "epoch": 0.52, "learning_rate": 2.757519902117886e-06, "logits/chosen": -2.6241769790649414, "logits/rejected": -2.61041522026062, "logps/chosen": -260.6992492675781, "logps/rejected": -272.4108581542969, "loss": 0.6678, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.16336451470851898, "rewards/margins": 0.07086249440908432, "rewards/rejected": -0.2342270165681839, "step": 80 }, { "epoch": 0.59, "learning_rate": 2.185529423440807e-06, "logits/chosen": -2.6284656524658203, "logits/rejected": -2.6176791191101074, "logps/chosen": -282.1419372558594, "logps/rejected": -331.07354736328125, "loss": 0.6629, "rewards/accuracies": 0.625, "rewards/chosen": -0.29410403966903687, "rewards/margins": 0.06631916761398315, "rewards/rejected": -0.3604232370853424, "step": 90 }, { "epoch": 0.65, "learning_rate": 1.6300029195778454e-06, "logits/chosen": -2.631556749343872, "logits/rejected": -2.61484956741333, "logps/chosen": -280.27362060546875, "logps/rejected": -321.37884521484375, "loss": 0.6615, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.21054717898368835, "rewards/margins": 0.08309625834226608, "rewards/rejected": -0.2936434745788574, "step": 100 }, { "epoch": 0.65, "eval_logits/chosen": -2.6791629791259766, "eval_logits/rejected": -2.5814735889434814, "eval_logps/chosen": -310.7413024902344, "eval_logps/rejected": -294.6734619140625, "eval_loss": 0.6692679524421692, "eval_rewards/accuracies": 0.5519999861717224, "eval_rewards/chosen": -0.1884314864873886, "eval_rewards/margins": 0.05432567000389099, "eval_rewards/rejected": -0.2427571415901184, "eval_runtime": 395.8034, "eval_samples_per_second": 5.053, "eval_steps_per_second": 0.632, "step": 100 }, { "epoch": 0.72, "learning_rate": 1.1200247470632394e-06, "logits/chosen": -2.6365230083465576, "logits/rejected": -2.636570453643799, "logps/chosen": -262.6191711425781, "logps/rejected": -310.3700256347656, "loss": 0.6646, "rewards/accuracies": 0.6343749761581421, "rewards/chosen": -0.22685377299785614, "rewards/margins": 0.07261677831411362, "rewards/rejected": -0.29947060346603394, "step": 110 }, { "epoch": 0.78, "learning_rate": 6.822945986946386e-07, "logits/chosen": -2.6256182193756104, "logits/rejected": -2.5980124473571777, "logps/chosen": -246.5704803466797, "logps/rejected": -282.092529296875, "loss": 0.6621, "rewards/accuracies": 0.625, "rewards/chosen": -0.20391853153705597, "rewards/margins": 0.08593297004699707, "rewards/rejected": -0.28985148668289185, "step": 120 }, { "epoch": 0.85, "learning_rate": 3.397296523427807e-07, "logits/chosen": -2.584958553314209, "logits/rejected": -2.562863349914551, "logps/chosen": -262.963134765625, "logps/rejected": -306.772216796875, "loss": 0.6598, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.21135267615318298, "rewards/margins": 0.08709158003330231, "rewards/rejected": -0.2984442710876465, "step": 130 }, { "epoch": 0.91, "learning_rate": 1.102647517397798e-07, "logits/chosen": -2.6206483840942383, "logits/rejected": -2.604473352432251, "logps/chosen": -272.1588439941406, "logps/rejected": -318.1951599121094, "loss": 0.6636, "rewards/accuracies": 0.65625, "rewards/chosen": -0.21283432841300964, "rewards/margins": 0.10337638854980469, "rewards/rejected": -0.3162107467651367, "step": 140 }, { "epoch": 0.98, "learning_rate": 5.9134352763748345e-09, "logits/chosen": -2.6338722705841064, "logits/rejected": -2.62086820602417, "logps/chosen": -296.335205078125, "logps/rejected": -331.77777099609375, "loss": 0.6594, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.23208002746105194, "rewards/margins": 0.08073471486568451, "rewards/rejected": -0.31281474232673645, "step": 150 }, { "epoch": 1.0, "step": 153, "total_flos": 0.0, "train_loss": 0.6705375785141988, "train_runtime": 6538.0645, "train_samples_per_second": 2.998, "train_steps_per_second": 0.023 } ], "logging_steps": 10, "max_steps": 153, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }