{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.269230769230769, "eval_steps": 25, "global_step": 15, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.20512820512820512, "grad_norm": 0.02463500201702118, "learning_rate": 9.890738003669029e-05, "loss": 10.3785, "step": 1 }, { "epoch": 0.20512820512820512, "eval_loss": 10.376482009887695, "eval_runtime": 0.0463, "eval_samples_per_second": 713.06, "eval_steps_per_second": 108.039, "step": 1 }, { "epoch": 0.41025641025641024, "grad_norm": 0.02375912107527256, "learning_rate": 9.567727288213005e-05, "loss": 10.3785, "step": 2 }, { "epoch": 0.6153846153846154, "grad_norm": 0.021632865071296692, "learning_rate": 9.045084971874738e-05, "loss": 10.3781, "step": 3 }, { "epoch": 0.8205128205128205, "grad_norm": 0.021732978522777557, "learning_rate": 8.345653031794292e-05, "loss": 10.3779, "step": 4 }, { "epoch": 1.0897435897435896, "grad_norm": 0.03939536213874817, "learning_rate": 7.500000000000001e-05, "loss": 19.5493, "step": 5 }, { "epoch": 1.294871794871795, "grad_norm": 0.02134210802614689, "learning_rate": 6.545084971874738e-05, "loss": 10.3206, "step": 6 }, { "epoch": 1.5, "grad_norm": 0.024313388392329216, "learning_rate": 5.522642316338268e-05, "loss": 10.4732, "step": 7 }, { "epoch": 1.7051282051282053, "grad_norm": 0.044148996472358704, "learning_rate": 4.477357683661734e-05, "loss": 10.3157, "step": 8 }, { "epoch": 1.9102564102564101, "grad_norm": 0.027150770649313927, "learning_rate": 3.4549150281252636e-05, "loss": 11.0088, "step": 9 }, { "epoch": 2.1794871794871793, "grad_norm": 0.028502710163593292, "learning_rate": 2.500000000000001e-05, "loss": 10.8882, "step": 10 }, { "epoch": 2.3846153846153846, "grad_norm": 0.031949229538440704, "learning_rate": 1.6543469682057106e-05, "loss": 10.4045, "step": 11 }, { "epoch": 2.58974358974359, "grad_norm": 0.025377444922924042, "learning_rate": 9.549150281252633e-06, "loss": 10.3581, "step": 12 }, { "epoch": 2.7948717948717947, "grad_norm": 0.0361032597720623, "learning_rate": 4.322727117869951e-06, "loss": 10.3767, "step": 13 }, { "epoch": 3.064102564102564, "grad_norm": 0.04238375648856163, "learning_rate": 1.0926199633097157e-06, "loss": 18.0558, "step": 14 }, { "epoch": 3.269230769230769, "grad_norm": 0.05153433233499527, "learning_rate": 0.0, "loss": 11.0025, "step": 15 } ], "logging_steps": 1, "max_steps": 15, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 12830417879040.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }