{ "epoch": 1.0, "eval_logits/chosen": -3.25435733795166, "eval_logits/rejected": -3.204742431640625, "eval_logps/chosen": -433.63043212890625, "eval_logps/rejected": -429.45819091796875, "eval_loss": 0.6412035226821899, "eval_rewards/accuracies": 0.64453125, "eval_rewards/chosen": -0.10438449680805206, "eval_rewards/margins": 0.14497718214988708, "eval_rewards/rejected": -0.24936166405677795, "eval_runtime": 8.2917, "eval_samples": 2000, "eval_samples_per_second": 241.204, "eval_steps_per_second": 3.859 }