{ "epoch": 0.9994353472614342, "eval_logits/chosen": -2.932816743850708, "eval_logits/rejected": -2.902209758758545, "eval_logps/chosen": -250.2433319091797, "eval_logps/rejected": -229.81752014160156, "eval_loss": 0.6546631455421448, "eval_rewards/accuracies": 0.591292142868042, "eval_rewards/chosen": -0.2775766849517822, "eval_rewards/margins": 0.10209956020116806, "eval_rewards/rejected": -0.3796762526035309, "eval_runtime": 230.8386, "eval_samples": 712, "eval_samples_per_second": 3.084, "eval_steps_per_second": 0.386, "total_flos": 0.0, "train_loss": 0.6718437324135991, "train_runtime": 9969.8232, "train_samples": 14167, "train_samples_per_second": 1.421, "train_steps_per_second": 0.089 }