NicholasCorrado's picture
End of training
2eb1b8d verified
{
"epoch": 1.9986120749479528,
"eval_alpha0": 0.1956627070903778,
"eval_alpha1": 0.8043374419212341,
"eval_excess_loss": 0.03396472496617781,
"eval_logits/chosen": 0.941495954990387,
"eval_logits/rejected": 3.0511972904205322,
"eval_logps/chosen": -737.6100463867188,
"eval_logps/rejected": -1278.7679443359375,
"eval_loss": 0.2410118132829666,
"eval_rewards/accuracies": 0.8778195381164551,
"eval_rewards/chosen": -3.451406478881836,
"eval_rewards/margins": 5.298919200897217,
"eval_rewards/rejected": -8.750325202941895,
"eval_runtime": 386.05,
"eval_samples": 8491,
"eval_samples_per_second": 21.995,
"eval_steps_per_second": 0.345,
"eval_task_excess_loss0": 0.08787519361543576,
"eval_task_excess_loss1": 0.03778563067191184,
"eval_task_loss0": 0.5294545888900757,
"eval_task_loss1": 0.1724005937576294,
"total_flos": 0.0,
"train_loss": 0.17575526105033026,
"train_runtime": 46867.94,
"train_samples": 184443,
"train_samples_per_second": 7.871,
"train_steps_per_second": 0.031
}