{ "epoch": 2.987152034261242, "eval_chosen_logps": -93.77220916748047, "eval_chosen_rewards": 0.1566028594970703, "eval_log_diff_policy": 0.627028226852417, "eval_logits": -1.2008216381072998, "eval_logp_accuracy": 0.49048912525177, "eval_loss": 0.4184386134147644, "eval_objective": 0.4334131181240082, "eval_ranking_simple": 0.49048912525177, "eval_rejected_logps": -94.39924621582031, "eval_rejected_rewards": 0.08802003413438797, "eval_reward_accuracy": 0.6290760636329651, "eval_runtime": 251.8277, "eval_samples": 4407, "eval_samples_per_second": 17.5, "eval_steps_per_second": 0.731, "total_flos": 0.0, "train_loss": 0.27528571787700856, "train_runtime": 19429.5857, "train_samples": 44816, "train_samples_per_second": 6.92, "train_steps_per_second": 0.024 }