{ "epoch": 1.9986120749479528, "eval_alpha0": 0.1956627070903778, "eval_alpha1": 0.8043374419212341, "eval_excess_loss": 0.03396472496617781, "eval_logits/chosen": 0.941495954990387, "eval_logits/rejected": 3.0511972904205322, "eval_logps/chosen": -737.6100463867188, "eval_logps/rejected": -1278.7679443359375, "eval_loss": 0.2410118132829666, "eval_rewards/accuracies": 0.8778195381164551, "eval_rewards/chosen": -3.451406478881836, "eval_rewards/margins": 5.298919200897217, "eval_rewards/rejected": -8.750325202941895, "eval_runtime": 386.05, "eval_samples": 8491, "eval_samples_per_second": 21.995, "eval_steps_per_second": 0.345, "eval_task_excess_loss0": 0.08787519361543576, "eval_task_excess_loss1": 0.03778563067191184, "eval_task_loss0": 0.5294545888900757, "eval_task_loss1": 0.1724005937576294, "total_flos": 0.0, "train_loss": 0.17575526105033026, "train_runtime": 46867.94, "train_samples": 184443, "train_samples_per_second": 7.871, "train_steps_per_second": 0.031 }