just1nseo's picture
End of training
ffda7a4 verified
{
"epoch": 1.0,
"eval_logits/chosen": -1.6206190586090088,
"eval_logits/rejected": -1.5537647008895874,
"eval_logps/chosen": -463.3371887207031,
"eval_logps/rejected": -554.199951171875,
"eval_loss": 0.4948451519012451,
"eval_rewards/accuracies": 0.7484999895095825,
"eval_rewards/chosen": -1.7888187170028687,
"eval_rewards/margins": 1.0946472883224487,
"eval_rewards/margins_max": 3.58725905418396,
"eval_rewards/margins_min": -0.9700561761856079,
"eval_rewards/margins_std": 1.5435914993286133,
"eval_rewards/rejected": -2.8834662437438965,
"eval_runtime": 858.4449,
"eval_samples": 4000,
"eval_samples_per_second": 4.66,
"eval_steps_per_second": 0.291,
"train_loss": 0.5053737083043175,
"train_runtime": 67897.0602,
"train_samples": 66812,
"train_samples_per_second": 0.984,
"train_steps_per_second": 0.062
}