{ "epoch": 4.988190836088805, "eval_dpo_loss": 0.6770760416984558, "eval_logits": -3.2698676586151123, "eval_logps": -195.45721435546875, "eval_loss": 0.39512959122657776, "eval_objective": 0.3955548405647278, "eval_ranking_idealized": 0.9194214940071106, "eval_ranking_idealized_expo": 0.5309917330741882, "eval_ranking_simple": 0.56611567735672, "eval_regularize": 0.3955548405647278, "eval_runtime": 259.4514, "eval_samples": 5790, "eval_samples_per_second": 22.316, "eval_steps_per_second": 0.933 }