{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9874476987447699, "eval_steps": 500, "global_step": 59, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016736401673640166, "grad_norm": 6.740061107347406, "learning_rate": 1.6666666666666664e-08, "logits/chosen": -2.7879996299743652, "logits/rejected": -2.789036750793457, "logps/chosen": -155.33309936523438, "logps/pi_response": -163.74407958984375, "logps/ref_response": -163.74407958984375, "logps/rejected": -160.54603576660156, "loss": 0.6933, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.16736401673640167, "grad_norm": 7.101331538874614, "learning_rate": 9.860114570402053e-08, "logits/chosen": -2.732710599899292, "logits/rejected": -2.7246806621551514, "logps/chosen": -148.40945434570312, "logps/pi_response": -149.57220458984375, "logps/ref_response": -149.60110473632812, "logps/rejected": -148.08311462402344, "loss": 0.6933, "rewards/accuracies": 0.4513888955116272, "rewards/chosen": 0.00027455115923658013, "rewards/margins": -0.0001711228978820145, "rewards/rejected": 0.0004456740280147642, "step": 10 }, { "epoch": 0.33472803347280333, "grad_norm": 7.337554336815785, "learning_rate": 8.374915007591053e-08, "logits/chosen": -2.6503920555114746, "logits/rejected": -2.64430570602417, "logps/chosen": -160.72381591796875, "logps/pi_response": -162.48867797851562, "logps/ref_response": -162.63189697265625, "logps/rejected": -161.467041015625, "loss": 0.6935, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": 0.0015181514900177717, "rewards/margins": -0.00011065408762078732, "rewards/rejected": 0.0016288056503981352, "step": 20 }, { "epoch": 0.502092050209205, "grad_norm": 6.797623155516534, "learning_rate": 5.738232820012406e-08, "logits/chosen": -2.6548681259155273, "logits/rejected": -2.6514317989349365, "logps/chosen": -148.1983642578125, "logps/pi_response": -147.19541931152344, "logps/ref_response": -147.06637573242188, "logps/rejected": -147.02438354492188, "loss": 0.6933, "rewards/accuracies": 0.4781250059604645, "rewards/chosen": -0.0014096560189500451, "rewards/margins": -0.000304248504107818, "rewards/rejected": -0.0011054074857383966, "step": 30 }, { "epoch": 0.6694560669456067, "grad_norm": 6.526352701379774, "learning_rate": 2.8496739886173992e-08, "logits/chosen": -2.7033307552337646, "logits/rejected": -2.697089433670044, "logps/chosen": -158.65147399902344, "logps/pi_response": -156.37876892089844, "logps/ref_response": -156.102783203125, "logps/rejected": -155.2120361328125, "loss": 0.6932, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.002376242307946086, "rewards/margins": 2.0093168132007122e-05, "rewards/rejected": -0.0023963353596627712, "step": 40 }, { "epoch": 0.8368200836820083, "grad_norm": 6.8095971653212, "learning_rate": 6.947819411632222e-09, "logits/chosen": -2.687405586242676, "logits/rejected": -2.6960949897766113, "logps/chosen": -156.6856689453125, "logps/pi_response": -156.02719116210938, "logps/ref_response": -155.70803833007812, "logps/rejected": -160.33645629882812, "loss": 0.6932, "rewards/accuracies": 0.4781250059604645, "rewards/chosen": -0.002912611234933138, "rewards/margins": 0.0002370561269344762, "rewards/rejected": -0.0031496677547693253, "step": 50 }, { "epoch": 0.9874476987447699, "step": 59, "total_flos": 0.0, "train_loss": 0.6932335586871131, "train_runtime": 2785.4094, "train_samples_per_second": 5.487, "train_steps_per_second": 0.021 } ], "logging_steps": 10, "max_steps": 59, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }