{ "epoch": 1.0, "eval_logits/chosen": -2.7746472358703613, "eval_logits/rejected": -2.752934455871582, "eval_logps/chosen": -310.119873046875, "eval_logps/rejected": -321.6407165527344, "eval_loss": 0.5893968343734741, "eval_rewards/accuracies": 0.703499972820282, "eval_rewards/chosen": -0.27382245659828186, "eval_rewards/margins": 0.32820531725883484, "eval_rewards/rejected": -0.6020277142524719, "eval_runtime": 692.2285, "eval_samples": 2000, "eval_samples_per_second": 2.889, "eval_steps_per_second": 0.361, "total_flos": 0.0, "train_loss": 0.6164219083351729, "train_runtime": 73481.1174, "train_samples": 61134, "train_samples_per_second": 0.832, "train_steps_per_second": 0.052 }