{ "epoch": 3.0, "eval_dpo_losses": 0.6849049925804138, "eval_logits/chosen": -2.3206958770751953, "eval_logits/rejected": -2.278275489807129, "eval_logps/chosen": -425.83282470703125, "eval_logps/rejected": -459.3068542480469, "eval_loss": 14.385190963745117, "eval_positive_losses": 141.65965270996094, "eval_rewards/accuracies": 0.6666666865348816, "eval_rewards/chosen": -1.406116247177124, "eval_rewards/margins": 0.5951257348060608, "eval_rewards/margins_max": 2.288450241088867, "eval_rewards/margins_min": -1.0994930267333984, "eval_rewards/margins_std": 1.49783194065094, "eval_rewards/rejected": -2.00124192237854, "eval_runtime": 280.9524, "eval_samples": 2000, "eval_samples_per_second": 7.119, "eval_steps_per_second": 0.224 }