{ "epoch": 0.9989701338825953, "eval_logits/chosen": -2.065095901489258, "eval_logits/rejected": -1.9689290523529053, "eval_logps/chosen": -335.3676452636719, "eval_logps/rejected": -427.32891845703125, "eval_loss": 0.5029041767120361, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": -1.1300084590911865, "eval_rewards/margins": 0.865437388420105, "eval_rewards/rejected": -1.995445728302002, "eval_runtime": 65.1127, "eval_samples_per_second": 15.358, "eval_steps_per_second": 0.246 }