{ "epoch": 1.0, "eval_logits/chosen": -2.715510368347168, "eval_logits/rejected": -2.6831843852996826, "eval_logps/chosen": -309.6580505371094, "eval_logps/rejected": -322.2830810546875, "eval_loss": 0.5924258232116699, "eval_rewards/accuracies": 0.7321428656578064, "eval_rewards/chosen": -0.2516312599182129, "eval_rewards/margins": 0.3497060239315033, "eval_rewards/margins_max": 1.2300177812576294, "eval_rewards/margins_min": -0.5546701550483704, "eval_rewards/margins_std": 0.6038431525230408, "eval_rewards/rejected": -0.6013372540473938, "eval_runtime": 424.8015, "eval_samples": 2000, "eval_samples_per_second": 4.708, "eval_steps_per_second": 0.148 }