just1nseo's picture
End of training
f1a6dc8 verified
{
"epoch": 1.0,
"eval_dpo_losses": 0.6446115970611572,
"eval_logits/chosen": -2.713839530944824,
"eval_logits/rejected": -2.6717445850372314,
"eval_logps/chosen": -284.5663146972656,
"eval_logps/rejected": -271.5415954589844,
"eval_loss": 1.616790771484375,
"eval_positive_losses": 8.828282356262207,
"eval_rewards/accuracies": 0.6499999761581421,
"eval_rewards/chosen": 0.0002709717955440283,
"eval_rewards/margins": 0.12989762425422668,
"eval_rewards/margins_max": 0.6433730721473694,
"eval_rewards/margins_min": -0.34936216473579407,
"eval_rewards/margins_std": 0.33273470401763916,
"eval_rewards/rejected": -0.12962664663791656,
"eval_runtime": 428.5272,
"eval_samples": 2000,
"eval_samples_per_second": 4.667,
"eval_steps_per_second": 0.292
}