{ "epoch": 0.9994242947610823, "eval_logits/chosen": -1.1188652515411377, "eval_logits/rejected": -0.6186092495918274, "eval_logps/chosen": -639.7113037109375, "eval_logps/rejected": -952.0885009765625, "eval_loss": 0.3328971564769745, "eval_rewards/accuracies": 0.828125, "eval_rewards/chosen": -3.571798324584961, "eval_rewards/margins": 3.05015230178833, "eval_rewards/rejected": -6.621951103210449, "eval_runtime": 330.4232, "eval_samples": 7126, "eval_samples_per_second": 21.566, "eval_steps_per_second": 0.339 }