{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0006861181152335374, "eval_steps": 3, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.861181152335375e-05, "grad_norm": 2.1441073417663574, "learning_rate": 2e-05, "loss": 4.9116, "step": 1 }, { "epoch": 6.861181152335375e-05, "eval_loss": 4.604319095611572, "eval_runtime": 233.9836, "eval_samples_per_second": 26.228, "eval_steps_per_second": 13.116, "step": 1 }, { "epoch": 0.0001372236230467075, "grad_norm": 1.9458142518997192, "learning_rate": 4e-05, "loss": 4.5221, "step": 2 }, { "epoch": 0.00020583543457006125, "grad_norm": 2.0711398124694824, "learning_rate": 6e-05, "loss": 4.8797, "step": 3 }, { "epoch": 0.00020583543457006125, "eval_loss": 4.589332580566406, "eval_runtime": 231.9652, "eval_samples_per_second": 26.457, "eval_steps_per_second": 13.23, "step": 3 }, { "epoch": 0.000274447246093415, "grad_norm": 1.8909467458724976, "learning_rate": 8e-05, "loss": 4.5326, "step": 4 }, { "epoch": 0.0003430590576167687, "grad_norm": 2.2932982444763184, "learning_rate": 0.0001, "loss": 4.8023, "step": 5 }, { "epoch": 0.0004116708691401225, "grad_norm": 2.753103494644165, "learning_rate": 0.00012, "loss": 4.4773, "step": 6 }, { "epoch": 0.0004116708691401225, "eval_loss": 4.165267467498779, "eval_runtime": 232.0307, "eval_samples_per_second": 26.449, "eval_steps_per_second": 13.227, "step": 6 }, { "epoch": 0.0004802826806634762, "grad_norm": 2.441800355911255, "learning_rate": 0.00014, "loss": 3.8999, "step": 7 }, { "epoch": 0.00054889449218683, "grad_norm": 3.2049639225006104, "learning_rate": 0.00016, "loss": 3.5331, "step": 8 }, { "epoch": 0.0006175063037101837, "grad_norm": 2.61136531829834, "learning_rate": 0.00018, "loss": 3.4902, "step": 9 }, { "epoch": 0.0006175063037101837, "eval_loss": 2.9883694648742676, "eval_runtime": 232.1027, "eval_samples_per_second": 26.441, "eval_steps_per_second": 13.223, "step": 9 }, { "epoch": 0.0006861181152335374, "grad_norm": 2.8295400142669678, "learning_rate": 0.0002, "loss": 2.9575, "step": 10 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2570388167983104.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }