{ "epoch": 19.84, "eval_logits/chosen": -1.7818838357925415, "eval_logits/rejected": -1.7583956718444824, "eval_logps/chosen": -1971.018798828125, "eval_logps/rejected": -2233.02197265625, "eval_loss": 1.56223726272583, "eval_rewards/accuracies": 0.6499999761581421, "eval_rewards/chosen": -17.027843475341797, "eval_rewards/margins": 2.817903757095337, "eval_rewards/rejected": -19.845748901367188, "eval_runtime": 67.4863, "eval_samples": 200, "eval_samples_per_second": 2.964, "eval_steps_per_second": 0.37, "total_flos": 0.0, "train_loss": 0.13093096087141606, "train_runtime": 15450.2645, "train_samples": 1000, "train_samples_per_second": 1.294, "train_steps_per_second": 0.08 }