{ "epoch": 1.0, "eval_logits/chosen": -0.44726553559303284, "eval_logits/rejected": 0.045745834708213806, "eval_logps/chosen": -395.98583984375, "eval_logps/rejected": -470.54931640625, "eval_loss": 0.5046471357345581, "eval_rewards/accuracies": 0.7245509028434753, "eval_rewards/chosen": -1.1825801134109497, "eval_rewards/margins": 0.8755642771720886, "eval_rewards/rejected": -2.0581440925598145, "eval_runtime": 494.7185, "eval_samples": 2000, "eval_samples_per_second": 4.043, "eval_steps_per_second": 0.338, "train_loss": 0.5401819272219315, "train_runtime": 34352.758, "train_samples": 61135, "train_samples_per_second": 1.78, "train_steps_per_second": 0.015 }