{ | |
"epoch": 1.0, | |
"eval_logps/chosen": -2.491426467895508, | |
"eval_logps/rejected": -2.823484420776367, | |
"eval_loss": 3.3512649536132812, | |
"eval_rewards/accuracies": 1.0, | |
"eval_rewards/chosen": -24.914264678955078, | |
"eval_rewards/margins": 3.32058048248291, | |
"eval_rewards/rejected": -28.234844207763672, | |
"eval_runtime": 1.9721, | |
"eval_samples": 10, | |
"eval_samples_per_second": 5.071, | |
"eval_steps_per_second": 1.014, | |
"total_flos": 0.0, | |
"train_loss": 9.667842355096774, | |
"train_runtime": 2221.9837, | |
"train_samples": 4610, | |
"train_samples_per_second": 2.075, | |
"train_steps_per_second": 0.26 | |
} |