{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 125, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008, "grad_norm": 477.2496337890625, "learning_rate": 7.692307692307694e-07, "logits/chosen": 2.15625, "logits/rejected": 1.4375, "logps/chosen": -146.0, "logps/rejected": -131.0, "loss": 2.7656, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.08, "grad_norm": 1.534887996967882e-05, "learning_rate": 7.692307692307694e-06, "logits/chosen": 1.8359375, "logits/rejected": 1.1328125, "logps/chosen": -125.0, "logps/rejected": -187.0, "loss": 0.9163, "rewards/accuracies": 0.8055555820465088, "rewards/chosen": 1.46875, "rewards/margins": 8.3125, "rewards/rejected": -6.84375, "step": 10 }, { "epoch": 0.16, "grad_norm": 6.47921126473161e-12, "learning_rate": 9.903926402016153e-06, "logits/chosen": 0.3828125, "logits/rejected": 0.357421875, "logps/chosen": -160.0, "logps/rejected": -540.0, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.921875, "rewards/margins": 39.25, "rewards/rejected": -42.25, "step": 20 }, { "epoch": 0.24, "grad_norm": 5.994430991895002e-15, "learning_rate": 9.442228179894362e-06, "logits/chosen": -0.060791015625, "logits/rejected": -0.28515625, "logps/chosen": -264.0, "logps/rejected": -728.0, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -13.9375, "rewards/margins": 47.25, "rewards/rejected": -61.25, "step": 30 }, { "epoch": 0.32, "grad_norm": 5.643975006320093e-14, "learning_rate": 8.633301610170136e-06, "logits/chosen": 1.0703125, "logits/rejected": 0.59375, "logps/chosen": -164.0, "logps/rejected": -740.0, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.609375, "rewards/margins": 59.25, "rewards/rejected": -62.0, "step": 40 }, { "epoch": 0.4, "grad_norm": 2.0163520159099946e-11, "learning_rate": 7.540376726232648e-06, "logits/chosen": 1.234375, "logits/rejected": 0.6640625, "logps/chosen": -130.0, "logps/rejected": -636.0, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.015625, "rewards/margins": 52.0, "rewards/rejected": -53.0, "step": 50 }, { "epoch": 0.48, "grad_norm": 4.786671276360566e-15, "learning_rate": 6.248882390836135e-06, "logits/chosen": 1.359375, "logits/rejected": 0.78515625, "logps/chosen": -146.0, "logps/rejected": -660.0, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.71875, "rewards/margins": 52.5, "rewards/rejected": -54.0, "step": 60 }, { "epoch": 0.56, "grad_norm": 1.9845672956549099e-13, "learning_rate": 4.859768718620656e-06, "logits/chosen": 1.359375, "logits/rejected": 0.83984375, "logps/chosen": -150.0, "logps/rejected": -640.0, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.58203125, "rewards/margins": 52.0, "rewards/rejected": -52.5, "step": 70 }, { "epoch": 0.64, "grad_norm": 3.1451468355701492e-15, "learning_rate": 3.4816162744519266e-06, "logits/chosen": 1.3125, "logits/rejected": 0.73828125, "logps/chosen": -149.0, "logps/rejected": -688.0, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.97265625, "rewards/margins": 55.75, "rewards/rejected": -56.75, "step": 80 }, { "epoch": 0.72, "grad_norm": 1.8802392639811344e-13, "learning_rate": 2.2221488349019903e-06, "logits/chosen": 1.3359375, "logits/rejected": 0.79296875, "logps/chosen": -144.0, "logps/rejected": -680.0, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.796875, "rewards/margins": 54.5, "rewards/rejected": -56.25, "step": 90 }, { "epoch": 0.8, "grad_norm": 2.8757966674675117e-07, "learning_rate": 1.1798131208919628e-06, "logits/chosen": 1.359375, "logits/rejected": 0.76171875, "logps/chosen": -150.0, "logps/rejected": -632.0, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4453125, "rewards/margins": 49.75, "rewards/rejected": -51.25, "step": 100 }, { "epoch": 0.88, "grad_norm": 1.703149137021777e-14, "learning_rate": 4.3608367469340553e-07, "logits/chosen": 1.34375, "logits/rejected": 0.7734375, "logps/chosen": -153.0, "logps/rejected": -628.0, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.078125, "rewards/margins": 50.0, "rewards/rejected": -52.0, "step": 110 }, { "epoch": 0.96, "grad_norm": 7.835807033238558e-16, "learning_rate": 4.909437331777178e-08, "logits/chosen": 1.3515625, "logits/rejected": 0.7734375, "logps/chosen": -144.0, "logps/rejected": -676.0, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.203125, "rewards/margins": 54.0, "rewards/rejected": -55.25, "step": 120 }, { "epoch": 1.0, "step": 125, "total_flos": 0.0, "train_loss": 0.08870122274137147, "train_runtime": 1811.5821, "train_samples_per_second": 1.104, "train_steps_per_second": 0.069 } ], "logging_steps": 10, "max_steps": 125, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }