zephyr-7b-dpo-uffull-qlora-5e-7 / eval_results.json
just1nseo's picture
End of training
aeb6e5f verified
{
"epoch": 1.0,
"eval_logits/chosen": -2.715510368347168,
"eval_logits/rejected": -2.6831843852996826,
"eval_logps/chosen": -309.6580505371094,
"eval_logps/rejected": -322.2830810546875,
"eval_loss": 0.5924258232116699,
"eval_rewards/accuracies": 0.7321428656578064,
"eval_rewards/chosen": -0.2516312599182129,
"eval_rewards/margins": 0.3497060239315033,
"eval_rewards/margins_max": 1.2300177812576294,
"eval_rewards/margins_min": -0.5546701550483704,
"eval_rewards/margins_std": 0.6038431525230408,
"eval_rewards/rejected": -0.6013372540473938,
"eval_runtime": 424.8015,
"eval_samples": 2000,
"eval_samples_per_second": 4.708,
"eval_steps_per_second": 0.148
}