{ "epoch": 4.695652173913043, "eval_logits/chosen": -0.107421875, "eval_logits/rejected": 0.0263671875, "eval_logps/chosen": -129.0, "eval_logps/rejected": -171.0, "eval_loss": 0.5796619057655334, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.007049560546875, "eval_rewards/margins": 0.255859375, "eval_rewards/rejected": -0.26171875, "eval_runtime": 5.9126, "eval_samples_per_second": 41.268, "eval_steps_per_second": 1.353 }