{ "epoch": 3.0, "eval_logits/chosen": -0.23369529843330383, "eval_logits/rejected": -0.23373132944107056, "eval_logps/chosen": -176.31857299804688, "eval_logps/rejected": -176.20704650878906, "eval_loss": 0.6931473612785339, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": 0.0, "eval_rewards/margins": 0.0, "eval_rewards/rejected": 0.0, "eval_runtime": 13.1063, "eval_samples_per_second": 76.299, "eval_steps_per_second": 9.537 }