{ "epoch": 0.9963674104826155, "eval_logits/chosen": -0.7187579870223999, "eval_logits/rejected": 0.8491049408912659, "eval_logps/chosen": -712.8595581054688, "eval_logps/rejected": -1311.8641357421875, "eval_loss": 0.16912925243377686, "eval_rewards/accuracies": 0.9240196347236633, "eval_rewards/chosen": -2.9091784954071045, "eval_rewards/margins": 5.781402111053467, "eval_rewards/rejected": -8.690580368041992, "eval_runtime": 602.9119, "eval_samples": 6491, "eval_samples_per_second": 10.766, "eval_steps_per_second": 0.169, "total_flos": 0.0, "train_loss": 0.250737202167511, "train_runtime": 29028.2374, "train_samples": 123309, "train_samples_per_second": 4.248, "train_steps_per_second": 0.008 }