{ "epoch": 2.9959514170040484, "eval_logits/chosen": -1.5362521409988403, "eval_logits/rejected": -1.577704906463623, "eval_logps/chosen": -191.7516632080078, "eval_logps/rejected": -226.06272888183594, "eval_loss": 0.31698280572891235, "eval_rewards/accuracies": 0.8463855385780334, "eval_rewards/chosen": -0.7307406663894653, "eval_rewards/margins": 2.3931150436401367, "eval_rewards/rejected": -3.1238558292388916, "eval_runtime": 23.3432, "eval_samples_per_second": 14.137, "eval_steps_per_second": 3.556, "total_flos": 4.5615607240812134e+17, "train_loss": 0.26195224279218965, "train_runtime": 3105.2921, "train_samples_per_second": 2.862, "train_steps_per_second": 0.357 }