{ "epoch": 1.0, "eval_logits/chosen": -0.043629273772239685, "eval_logits/rejected": 0.8509737253189087, "eval_logps/chosen": -301.6346435546875, "eval_logps/rejected": -350.5372314453125, "eval_loss": 0.6487784385681152, "eval_rewards/accuracies": 0.7777777910232544, "eval_rewards/chosen": -0.17524881660938263, "eval_rewards/margins": 0.7123382687568665, "eval_rewards/rejected": -0.8875871300697327, "eval_runtime": 237.6574, "eval_samples": 2000, "eval_samples_per_second": 8.415, "eval_steps_per_second": 0.265, "train_loss": 0.6847315603082285, "train_runtime": 17806.8146, "train_samples": 61135, "train_samples_per_second": 3.433, "train_steps_per_second": 0.027 }