{ "epoch": 3.0, "eval_dpo_losses": 0.6100724935531616, "eval_logits/chosen": -2.5277466773986816, "eval_logits/rejected": -2.480801582336426, "eval_logps/chosen": -323.6263122558594, "eval_logps/rejected": -331.3143005371094, "eval_loss": 5.126430988311768, "eval_positive_losses": 43.18838882446289, "eval_rewards/accuracies": 0.6669999957084656, "eval_rewards/chosen": -0.39032912254333496, "eval_rewards/margins": 0.3370245695114136, "eval_rewards/margins_max": 1.416748046875, "eval_rewards/margins_min": -0.8377854228019714, "eval_rewards/margins_std": 0.7706853747367859, "eval_rewards/rejected": -0.7273536920547485, "eval_runtime": 428.2171, "eval_samples": 2000, "eval_samples_per_second": 4.671, "eval_steps_per_second": 0.292 }