{ "epoch": 3.0, "eval_logits/chosen": -2.4314932823181152, "eval_logits/rejected": -2.403698682785034, "eval_logps/chosen": -326.85711669921875, "eval_logps/rejected": -393.3514709472656, "eval_loss": 0.6730212569236755, "eval_rewards/accuracies": 0.7074999809265137, "eval_rewards/chosen": -13.26190185546875, "eval_rewards/margins": 8.8816556930542, "eval_rewards/rejected": -22.143556594848633, "eval_runtime": 139.8238, "eval_samples": 3156, "eval_samples_per_second": 22.571, "eval_steps_per_second": 0.358, "train_loss": 0.2455477410652717, "train_runtime": 50581.1093, "train_samples": 82404, "train_samples_per_second": 4.887, "train_steps_per_second": 0.306 }