{ "epoch": 1.0, "eval_logits/chosen": -0.21594780683517456, "eval_logits/rejected": 2.576901435852051, "eval_logps/chosen": -609.2966918945312, "eval_logps/rejected": -7672.10595703125, "eval_loss": 7.102515519363806e-05, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -2.423149824142456, "eval_rewards/margins": 67.89002990722656, "eval_rewards/rejected": -70.31317901611328, "eval_runtime": 191.0845, "eval_samples": 3905, "eval_samples_per_second": 20.436, "eval_steps_per_second": 0.324 }