{ "epoch": 0.9974099364257123, "eval_chosen_logps": -100.49517059326172, "eval_chosen_rewards": -0.6339624524116516, "eval_log_diff_policy": 2.7547221183776855, "eval_logits": -1.3275535106658936, "eval_logp_accuracy": 0.54347825050354, "eval_loss": 0.6549956798553467, "eval_objective": 0.6503259539604187, "eval_ranking_simple": 0.54347825050354, "eval_rejected_logps": -103.24990844726562, "eval_rejected_rewards": -0.8270438313484192, "eval_reward_accuracy": 0.6183574795722961, "eval_runtime": 579.2451, "eval_samples": 9933, "eval_samples_per_second": 17.148, "eval_steps_per_second": 0.715 }