{ "epoch": 19.84, "eval_logits/chosen": -1.7818838357925415, "eval_logits/rejected": -1.7583956718444824, "eval_logps/chosen": -1971.018798828125, "eval_logps/rejected": -2233.02197265625, "eval_loss": 1.56223726272583, "eval_rewards/accuracies": 0.6499999761581421, "eval_rewards/chosen": -17.027843475341797, "eval_rewards/margins": 2.817903757095337, "eval_rewards/rejected": -19.845748901367188, "eval_runtime": 67.4863, "eval_samples": 200, "eval_samples_per_second": 2.964, "eval_steps_per_second": 0.37 }