{ "epoch": 1.0, "eval_logits/chosen": -1.4956016540527344, "eval_logits/rejected": -1.4366858005523682, "eval_logps/chosen": -890.294189453125, "eval_logps/rejected": -960.1845092773438, "eval_loss": 0.9318017959594727, "eval_rewards/accuracies": 0.6430000066757202, "eval_rewards/chosen": -6.057007789611816, "eval_rewards/margins": 0.9590479731559753, "eval_rewards/margins_max": 4.770871162414551, "eval_rewards/margins_min": -3.0068037509918213, "eval_rewards/margins_std": 2.65700101852417, "eval_rewards/rejected": -7.016055107116699, "eval_runtime": 428.551, "eval_samples": 2000, "eval_samples_per_second": 4.667, "eval_steps_per_second": 0.292, "train_loss": 0.24729210323011372, "train_runtime": 4030.3896, "train_samples": 5678, "train_samples_per_second": 1.409, "train_steps_per_second": 0.088 }