zephyr-dpo-qlora-gpt4-5e-6 / all_results.json
just1nseo's picture
End of training
9b5b070 verified
raw
history blame contribute delete
896 Bytes
{
"epoch": 1.0,
"eval_logits/chosen": -1.4956016540527344,
"eval_logits/rejected": -1.4366858005523682,
"eval_logps/chosen": -890.294189453125,
"eval_logps/rejected": -960.1845092773438,
"eval_loss": 0.9318017959594727,
"eval_rewards/accuracies": 0.6430000066757202,
"eval_rewards/chosen": -6.057007789611816,
"eval_rewards/margins": 0.9590479731559753,
"eval_rewards/margins_max": 4.770871162414551,
"eval_rewards/margins_min": -3.0068037509918213,
"eval_rewards/margins_std": 2.65700101852417,
"eval_rewards/rejected": -7.016055107116699,
"eval_runtime": 428.551,
"eval_samples": 2000,
"eval_samples_per_second": 4.667,
"eval_steps_per_second": 0.292,
"train_loss": 0.24729210323011372,
"train_runtime": 4030.3896,
"train_samples": 5678,
"train_samples_per_second": 1.409,
"train_steps_per_second": 0.088
}