just1nseo's picture
End of training
63a2487 verified
{
"epoch": 3.0,
"eval_dpo_losses": 0.6849049925804138,
"eval_logits/chosen": -2.3206958770751953,
"eval_logits/rejected": -2.278275489807129,
"eval_logps/chosen": -425.83282470703125,
"eval_logps/rejected": -459.3068542480469,
"eval_loss": 14.385190963745117,
"eval_positive_losses": 141.65965270996094,
"eval_rewards/accuracies": 0.6666666865348816,
"eval_rewards/chosen": -1.406116247177124,
"eval_rewards/margins": 0.5951257348060608,
"eval_rewards/margins_max": 2.288450241088867,
"eval_rewards/margins_min": -1.0994930267333984,
"eval_rewards/margins_std": 1.49783194065094,
"eval_rewards/rejected": -2.00124192237854,
"eval_runtime": 280.9524,
"eval_samples": 2000,
"eval_samples_per_second": 7.119,
"eval_steps_per_second": 0.224,
"train_loss": 0.36563416943303856,
"train_runtime": 9271.2095,
"train_samples": 5678,
"train_samples_per_second": 1.837,
"train_steps_per_second": 0.115
}