{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 106, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "grad_norm": 32.01167376803002, "learning_rate": 9.09090909090909e-09, "logits/chosen": -3.1918134689331055, "logits/rejected": -2.114994764328003, "logps/chosen": -43.23323059082031, "logps/rejected": -487.12335205078125, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.09, "grad_norm": 26.482209257499743, "learning_rate": 9.09090909090909e-08, "logits/chosen": -2.4537441730499268, "logits/rejected": -2.188873291015625, "logps/chosen": -104.89955139160156, "logps/rejected": -340.5350341796875, "loss": 0.6929, "rewards/accuracies": 0.5069444179534912, "rewards/chosen": -0.00020233324903529137, "rewards/margins": 0.0006850131321698427, "rewards/rejected": -0.0008873462211340666, "step": 10 }, { "epoch": 0.19, "grad_norm": 27.340373748893274, "learning_rate": 9.780178907671787e-08, "logits/chosen": -2.5941455364227295, "logits/rejected": -2.2474465370178223, "logps/chosen": -112.54396057128906, "logps/rejected": -382.1304626464844, "loss": 0.6893, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.001845196820795536, "rewards/margins": 0.008222021162509918, "rewards/rejected": -0.006376823876053095, "step": 20 }, { "epoch": 0.28, "grad_norm": 29.186381338310213, "learning_rate": 9.045084971874737e-08, "logits/chosen": -2.686591625213623, "logits/rejected": -2.179025173187256, "logps/chosen": -103.99493408203125, "logps/rejected": -395.27813720703125, "loss": 0.6802, "rewards/accuracies": 0.956250011920929, "rewards/chosen": 0.005808202549815178, "rewards/margins": 0.02655060589313507, "rewards/rejected": -0.020742401480674744, "step": 30 }, { "epoch": 0.38, "grad_norm": 25.724411216221245, "learning_rate": 7.871643313414717e-08, "logits/chosen": -2.667210578918457, "logits/rejected": -2.4061439037323, "logps/chosen": -100.63652038574219, "logps/rejected": -365.4666748046875, "loss": 0.6685, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.008249158971011639, "rewards/margins": 0.0507800467312336, "rewards/rejected": -0.042530883103609085, "step": 40 }, { "epoch": 0.47, "grad_norm": 27.99539110543597, "learning_rate": 6.387014543809223e-08, "logits/chosen": -2.500103235244751, "logits/rejected": -2.2492752075195312, "logps/chosen": -121.36189270019531, "logps/rejected": -427.84686279296875, "loss": 0.6553, "rewards/accuracies": 0.9375, "rewards/chosen": 0.013524128124117851, "rewards/margins": 0.08421863615512848, "rewards/rejected": -0.07069449126720428, "step": 50 }, { "epoch": 0.57, "grad_norm": 29.277621202155327, "learning_rate": 4.7520812266338875e-08, "logits/chosen": -2.5562338829040527, "logits/rejected": -2.3804821968078613, "logps/chosen": -104.37593078613281, "logps/rejected": -411.9266662597656, "loss": 0.6401, "rewards/accuracies": 0.96875, "rewards/chosen": 0.014270206913352013, "rewards/margins": 0.11923271417617798, "rewards/rejected": -0.10496251285076141, "step": 60 }, { "epoch": 0.66, "grad_norm": 28.168581664040957, "learning_rate": 3.1440137554088955e-08, "logits/chosen": -2.517213821411133, "logits/rejected": -2.3926634788513184, "logps/chosen": -96.15052795410156, "logps/rejected": -427.611083984375, "loss": 0.6226, "rewards/accuracies": 0.981249988079071, "rewards/chosen": 0.019482703879475594, "rewards/margins": 0.1615636646747589, "rewards/rejected": -0.14208097755908966, "step": 70 }, { "epoch": 0.75, "grad_norm": 27.520854136320587, "learning_rate": 1.7370711923791564e-08, "logits/chosen": -2.592007875442505, "logits/rejected": -2.249636173248291, "logps/chosen": -104.5450668334961, "logps/rejected": -369.75421142578125, "loss": 0.6127, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.017579929903149605, "rewards/margins": 0.14949214458465576, "rewards/rejected": -0.1319122165441513, "step": 80 }, { "epoch": 0.85, "grad_norm": 27.070318418613407, "learning_rate": 6.837175952121305e-09, "logits/chosen": -2.4734714031219482, "logits/rejected": -2.3791213035583496, "logps/chosen": -112.87300109863281, "logps/rejected": -397.2712707519531, "loss": 0.6142, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.019381331279873848, "rewards/margins": 0.17430000007152557, "rewards/rejected": -0.15491867065429688, "step": 90 }, { "epoch": 0.94, "grad_norm": 25.619627780442052, "learning_rate": 9.81001706259532e-10, "logits/chosen": -2.4850308895111084, "logits/rejected": -2.2666268348693848, "logps/chosen": -106.1055908203125, "logps/rejected": -518.9412841796875, "loss": 0.6061, "rewards/accuracies": 0.956250011920929, "rewards/chosen": 0.015003817155957222, "rewards/margins": 0.22597499191761017, "rewards/rejected": -0.2109711617231369, "step": 100 }, { "epoch": 1.0, "step": 106, "total_flos": 0.0, "train_loss": 0.6464094665815245, "train_runtime": 1236.0981, "train_samples_per_second": 5.464, "train_steps_per_second": 0.086 } ], "logging_steps": 10, "max_steps": 106, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }