zephyr-7b-group-dpo-full-4 / all_results.json
NicholasCorrado's picture
End of training
778d555 verified
raw
history blame contribute delete
774 Bytes
{
"epoch": 0.9994242947610823,
"eval_logits/chosen": -1.1188652515411377,
"eval_logits/rejected": -0.6186092495918274,
"eval_logps/chosen": -639.7113037109375,
"eval_logps/rejected": -952.0885009765625,
"eval_loss": 0.3328971564769745,
"eval_rewards/accuracies": 0.828125,
"eval_rewards/chosen": -3.571798324584961,
"eval_rewards/margins": 3.05015230178833,
"eval_rewards/rejected": -6.621951103210449,
"eval_runtime": 330.4232,
"eval_samples": 7126,
"eval_samples_per_second": 21.566,
"eval_steps_per_second": 0.339,
"total_flos": 0.0,
"train_loss": 0.17276417467451316,
"train_runtime": 14185.6914,
"train_samples": 111134,
"train_samples_per_second": 7.834,
"train_steps_per_second": 0.031
}