{ "epoch": 1.0, "eval_logits/chosen": -0.35237225890159607, "eval_logits/rejected": -0.32851526141166687, "eval_logps/chosen": -679.8935546875, "eval_logps/rejected": -1442.5582275390625, "eval_loss": 0.27591243386268616, "eval_rewards/accuracies": 0.8920454382896423, "eval_rewards/chosen": -1.230484962463379, "eval_rewards/margins": 5.869514465332031, "eval_rewards/rejected": -7.099999904632568, "eval_runtime": 171.0958, "eval_samples": 1392, "eval_samples_per_second": 8.136, "eval_steps_per_second": 0.257, "train_loss": 0.0030385508506429234, "train_runtime": 144.6456, "train_samples": 19761, "train_samples_per_second": 136.617, "train_steps_per_second": 2.136 }