{ "epoch": 1.0, "eval_logits/chosen": -3.25435733795166, "eval_logits/rejected": -3.204742431640625, "eval_logps/chosen": -433.63043212890625, "eval_logps/rejected": -429.45819091796875, "eval_loss": 0.6412035226821899, "eval_rewards/accuracies": 0.64453125, "eval_rewards/chosen": -0.10438449680805206, "eval_rewards/margins": 0.14497718214988708, "eval_rewards/rejected": -0.24936166405677795, "eval_runtime": 8.2917, "eval_samples": 2000, "eval_samples_per_second": 241.204, "eval_steps_per_second": 3.859, "total_flos": 0.0, "train_loss": 0.6571792745689967, "train_runtime": 784.6622, "train_samples": 61135, "train_samples_per_second": 77.913, "train_steps_per_second": 0.609 }