sanchit-gandhi's picture
Model save
0354e7d verified
{"train/loss": 0.6377, "train/grad_norm": 8.862220764160156, "train/learning_rate": 6.004792024680294e-11, "train/rewards/chosen": -0.11145105212926865, "train/rewards/rejected": -0.26377159357070923, "train/rewards/accuracies": 0.6575000286102295, "train/rewards/margins": 0.15232053399085999, "train/logps/rejected": -418.4268493652344, "train/logps/chosen": -445.8639831542969, "train/logits/rejected": -3.1062843799591064, "train/logits/chosen": -3.1527392864227295, "train/epoch": 1.0, "train/global_step": 478, "_timestamp": 1714147827.4227393, "_runtime": 791.7218701839447, "_step": 25, "eval/loss": 0.6412035226821899, "eval/runtime": 8.2917, "eval/samples_per_second": 241.204, "eval/steps_per_second": 3.859, "eval/rewards/chosen": -0.10438449680805206, "eval/rewards/rejected": -0.24936166405677795, "eval/rewards/accuracies": 0.64453125, "eval/rewards/margins": 0.14497718214988708, "eval/logps/rejected": -429.45819091796875, "eval/logps/chosen": -433.63043212890625, "eval/logits/rejected": -3.204742431640625, "eval/logits/chosen": -3.25435733795166, "train_runtime": 784.6622, "train_samples_per_second": 77.913, "train_steps_per_second": 0.609, "total_flos": 0.0, "train_loss": 0.6571792745689967}