{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 100, "global_step": 156, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.125e-07, "logits/chosen": -2.89351749420166, "logits/rejected": -2.7752203941345215, "logps/chosen": -345.7324523925781, "logps/rejected": -319.42047119140625, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.06, "learning_rate": 3.125e-06, "logits/chosen": -2.8028833866119385, "logits/rejected": -2.7471988201141357, "logps/chosen": -255.036865234375, "logps/rejected": -252.82679748535156, "loss": 0.6928, "rewards/accuracies": 0.4583333432674408, "rewards/chosen": 0.0003066221543122083, "rewards/margins": 0.0006635435856878757, "rewards/rejected": -0.0003569214604794979, "step": 10 }, { "epoch": 0.13, "learning_rate": 4.989935734988098e-06, "logits/chosen": -2.7710824012756348, "logits/rejected": -2.7166454792022705, "logps/chosen": -277.1798400878906, "logps/rejected": -256.997802734375, "loss": 0.6889, "rewards/accuracies": 0.65625, "rewards/chosen": 0.012738336808979511, "rewards/margins": 0.008854442276060581, "rewards/rejected": 0.00388389453291893, "step": 20 }, { "epoch": 0.19, "learning_rate": 4.8776412907378845e-06, "logits/chosen": -2.7218027114868164, "logits/rejected": -2.658277750015259, "logps/chosen": -274.3503112792969, "logps/rejected": -246.41128540039062, "loss": 0.6795, "rewards/accuracies": 0.6875, "rewards/chosen": 0.03419749438762665, "rewards/margins": 0.027278240770101547, "rewards/rejected": 0.006919251289218664, "step": 30 }, { "epoch": 0.26, "learning_rate": 4.646121984004666e-06, "logits/chosen": -2.7631821632385254, "logits/rejected": -2.6550638675689697, "logps/chosen": -268.45220947265625, "logps/rejected": -251.58743286132812, "loss": 0.6693, "rewards/accuracies": 0.715624988079071, "rewards/chosen": 0.03486743941903114, "rewards/margins": 0.05773182958364487, "rewards/rejected": -0.022864393889904022, "step": 40 }, { "epoch": 0.32, "learning_rate": 4.3069871595684795e-06, "logits/chosen": -2.676445960998535, "logits/rejected": -2.62298321723938, "logps/chosen": -274.287841796875, "logps/rejected": -281.6821594238281, "loss": 0.6605, "rewards/accuracies": 0.671875, "rewards/chosen": -0.003023784141987562, "rewards/margins": 0.07856948673725128, "rewards/rejected": -0.08159326761960983, "step": 50 }, { "epoch": 0.38, "learning_rate": 3.8772424536302565e-06, "logits/chosen": -2.687849521636963, "logits/rejected": -2.6154909133911133, "logps/chosen": -287.0888366699219, "logps/rejected": -274.48822021484375, "loss": 0.6507, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -0.03279999643564224, "rewards/margins": 0.11418493092060089, "rewards/rejected": -0.14698493480682373, "step": 60 }, { "epoch": 0.45, "learning_rate": 3.3784370602033572e-06, "logits/chosen": -2.6265475749969482, "logits/rejected": -2.570312976837158, "logps/chosen": -304.10125732421875, "logps/rejected": -292.9505310058594, "loss": 0.6431, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.033603884279727936, "rewards/margins": 0.12847770750522614, "rewards/rejected": -0.16208159923553467, "step": 70 }, { "epoch": 0.51, "learning_rate": 2.835583164544139e-06, "logits/chosen": -2.6333694458007812, "logits/rejected": -2.5370612144470215, "logps/chosen": -302.35736083984375, "logps/rejected": -276.11175537109375, "loss": 0.634, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.04339645802974701, "rewards/margins": 0.14248773455619812, "rewards/rejected": -0.18588420748710632, "step": 80 }, { "epoch": 0.58, "learning_rate": 2.2759017277414165e-06, "logits/chosen": -2.584850311279297, "logits/rejected": -2.5956850051879883, "logps/chosen": -292.6184997558594, "logps/rejected": -311.091796875, "loss": 0.6249, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09801232814788818, "rewards/margins": 0.17943526804447174, "rewards/rejected": -0.27744758129119873, "step": 90 }, { "epoch": 0.64, "learning_rate": 1.7274575140626318e-06, "logits/chosen": -2.6319050788879395, "logits/rejected": -2.5922951698303223, "logps/chosen": -290.19976806640625, "logps/rejected": -273.79815673828125, "loss": 0.6297, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.1279720962047577, "rewards/margins": 0.16270975768566132, "rewards/rejected": -0.2906818985939026, "step": 100 }, { "epoch": 0.64, "eval_logits/chosen": -2.6261305809020996, "eval_logits/rejected": -2.541619062423706, "eval_logps/chosen": -295.3544006347656, "eval_logps/rejected": -286.08172607421875, "eval_loss": 0.6284892559051514, "eval_rewards/accuracies": 0.699999988079071, "eval_rewards/chosen": -0.11314628273248672, "eval_rewards/margins": 0.1725120097398758, "eval_rewards/rejected": -0.2856582701206207, "eval_runtime": 384.1378, "eval_samples_per_second": 5.206, "eval_steps_per_second": 0.651, "step": 100 }, { "epoch": 0.7, "learning_rate": 1.217751806485235e-06, "logits/chosen": -2.6338038444519043, "logits/rejected": -2.5288445949554443, "logps/chosen": -289.0359802246094, "logps/rejected": -275.2894287109375, "loss": 0.6177, "rewards/accuracies": 0.684374988079071, "rewards/chosen": -0.06765095144510269, "rewards/margins": 0.20255950093269348, "rewards/rejected": -0.2702104449272156, "step": 110 }, { "epoch": 0.77, "learning_rate": 7.723433775328385e-07, "logits/chosen": -2.552917003631592, "logits/rejected": -2.550574779510498, "logps/chosen": -274.2412109375, "logps/rejected": -288.6865539550781, "loss": 0.6251, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.10379727929830551, "rewards/margins": 0.1611437350511551, "rewards/rejected": -0.2649410367012024, "step": 120 }, { "epoch": 0.83, "learning_rate": 4.1356686569674344e-07, "logits/chosen": -2.611356735229492, "logits/rejected": -2.5477137565612793, "logps/chosen": -291.66290283203125, "logps/rejected": -301.75537109375, "loss": 0.6233, "rewards/accuracies": 0.690625011920929, "rewards/chosen": -0.11176357418298721, "rewards/margins": 0.20609335601329803, "rewards/rejected": -0.31785690784454346, "step": 130 }, { "epoch": 0.9, "learning_rate": 1.59412823400657e-07, "logits/chosen": -2.6064445972442627, "logits/rejected": -2.479429244995117, "logps/chosen": -313.6204833984375, "logps/rejected": -287.5814514160156, "loss": 0.6146, "rewards/accuracies": 0.684374988079071, "rewards/chosen": -0.09765791893005371, "rewards/margins": 0.21785131096839905, "rewards/rejected": -0.31550922989845276, "step": 140 }, { "epoch": 0.96, "learning_rate": 2.262559558016325e-08, "logits/chosen": -2.58495831489563, "logits/rejected": -2.4780099391937256, "logps/chosen": -300.6700134277344, "logps/rejected": -281.14935302734375, "loss": 0.6193, "rewards/accuracies": 0.746874988079071, "rewards/chosen": -0.11531106382608414, "rewards/margins": 0.22770515084266663, "rewards/rejected": -0.34301620721817017, "step": 150 }, { "epoch": 1.0, "step": 156, "total_flos": 0.0, "train_loss": 0.6446946973984058, "train_runtime": 7192.2445, "train_samples_per_second": 2.781, "train_steps_per_second": 0.022 } ], "logging_steps": 10, "max_steps": 156, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }