{ "epoch": 2.0, "eval_logits/chosen": -2.5716235637664795, "eval_logits/rejected": -2.555190324783325, "eval_logps/chosen": -73.5245361328125, "eval_logps/rejected": -81.39889526367188, "eval_loss": 0.6463928818702698, "eval_rewards/accuracies": 0.3710317313671112, "eval_rewards/chosen": 0.48334550857543945, "eval_rewards/margins": 0.6186185479164124, "eval_rewards/rejected": -0.13527308404445648, "eval_runtime": 113.7854, "eval_samples": 2000, "eval_samples_per_second": 17.577, "eval_steps_per_second": 0.554 }