|
{ |
|
"epoch": 3.0, |
|
"eval_dpo_losses": 0.6849049925804138, |
|
"eval_logits/chosen": -2.3206958770751953, |
|
"eval_logits/rejected": -2.278275489807129, |
|
"eval_logps/chosen": -425.83282470703125, |
|
"eval_logps/rejected": -459.3068542480469, |
|
"eval_loss": 14.385190963745117, |
|
"eval_positive_losses": 141.65965270996094, |
|
"eval_rewards/accuracies": 0.6666666865348816, |
|
"eval_rewards/chosen": -1.406116247177124, |
|
"eval_rewards/margins": 0.5951257348060608, |
|
"eval_rewards/margins_max": 2.288450241088867, |
|
"eval_rewards/margins_min": -1.0994930267333984, |
|
"eval_rewards/margins_std": 1.49783194065094, |
|
"eval_rewards/rejected": -2.00124192237854, |
|
"eval_runtime": 280.9524, |
|
"eval_samples": 2000, |
|
"eval_samples_per_second": 7.119, |
|
"eval_steps_per_second": 0.224, |
|
"train_loss": 0.36563416943303856, |
|
"train_runtime": 9271.2095, |
|
"train_samples": 5678, |
|
"train_samples_per_second": 1.837, |
|
"train_steps_per_second": 0.115 |
|
} |