{ "epoch": 0.9994242947610823, "eval_logits/chosen": -1.1188652515411377, "eval_logits/rejected": -0.6186092495918274, "eval_logps/chosen": -639.7113037109375, "eval_logps/rejected": -952.0885009765625, "eval_loss": 0.3328971564769745, "eval_rewards/accuracies": 0.828125, "eval_rewards/chosen": -3.571798324584961, "eval_rewards/margins": 3.05015230178833, "eval_rewards/rejected": -6.621951103210449, "eval_runtime": 330.4232, "eval_samples": 7126, "eval_samples_per_second": 21.566, "eval_steps_per_second": 0.339, "total_flos": 0.0, "train_loss": 0.17276417467451316, "train_runtime": 14185.6914, "train_samples": 111134, "train_samples_per_second": 7.834, "train_steps_per_second": 0.031 }