{ "epoch": 0.9983492901947838, "eval_logits/chosen": -2.375783920288086, "eval_logits/rejected": -2.3758819103240967, "eval_logps/chosen": -218.0917205810547, "eval_logps/rejected": -221.27059936523438, "eval_loss": 0.6741424202919006, "eval_rewards/accuracies": 0.564338207244873, "eval_rewards/chosen": -0.5671599507331848, "eval_rewards/margins": 0.056060947477817535, "eval_rewards/rejected": -0.6232209205627441, "eval_runtime": 162.069, "eval_samples": 8652, "eval_samples_per_second": 53.385, "eval_steps_per_second": 0.839, "total_flos": 0.0, "train_loss": 0.6782766637347993, "train_runtime": 9304.817, "train_samples": 193800, "train_samples_per_second": 20.828, "train_steps_per_second": 0.041 }