{ "epoch": 2.0, "eval_logits/chosen": 0.7808946371078491, "eval_logits/rejected": 0.8833128213882446, "eval_logps/chosen": -261.0694274902344, "eval_logps/rejected": -233.27114868164062, "eval_loss": 0.0021080097649246454, "eval_rewards/accuracies": 0.6919999718666077, "eval_rewards/chosen": -0.008252721279859543, "eval_rewards/margins": 0.01009758934378624, "eval_rewards/rejected": -0.018350308761000633, "eval_runtime": 325.1898, "eval_samples": 2000, "eval_samples_per_second": 6.15, "eval_steps_per_second": 0.384, "train_loss": 0.0021909422920118682, "train_runtime": 18127.9992, "train_samples": 30567, "train_samples_per_second": 3.372, "train_steps_per_second": 0.053 }