{ "epoch": 1.0, "eval_logits/chosen": -2.3130569458007812, "eval_logits/rejected": -2.4918906688690186, "eval_logps/chosen": -2.289309024810791, "eval_logps/rejected": -2.4444260597229004, "eval_loss": 3.0870368480682373, "eval_rewards/accuracies": 0.6299999952316284, "eval_rewards/chosen": -22.893089294433594, "eval_rewards/margins": 1.5511717796325684, "eval_rewards/rejected": -24.444263458251953, "eval_runtime": 12.8093, "eval_samples": 100, "eval_samples_per_second": 7.807, "eval_steps_per_second": 1.952, "total_flos": 0.0, "train_loss": 2.970746964952526, "train_runtime": 37264.6051, "train_samples": 59338, "train_samples_per_second": 1.592, "train_steps_per_second": 0.796 }