{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9487179487179487, "eval_steps": 500, "global_step": 38, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05128205128205128, "grad_norm": 6.611382418524136, "learning_rate": 5e-05, "loss": 0.9748, "step": 1 }, { "epoch": 0.10256410256410256, "grad_norm": 6.255827210322037, "learning_rate": 4.9909937213563165e-05, "loss": 0.9645, "step": 2 }, { "epoch": 0.15384615384615385, "grad_norm": 26.37694268451028, "learning_rate": 4.9640397758692715e-05, "loss": 1.8679, "step": 3 }, { "epoch": 0.20512820512820512, "grad_norm": 109.62841403244713, "learning_rate": 4.9193323673337476e-05, "loss": 3.1823, "step": 4 }, { "epoch": 0.2564102564102564, "grad_norm": 38.27279611720776, "learning_rate": 4.857193613652711e-05, "loss": 1.8243, "step": 5 }, { "epoch": 0.3076923076923077, "grad_norm": 52.16666128950054, "learning_rate": 4.77807122597034e-05, "loss": 2.3408, "step": 6 }, { "epoch": 0.358974358974359, "grad_norm": 61.865694873216064, "learning_rate": 4.6825352829029705e-05, "loss": 1.668, "step": 7 }, { "epoch": 0.41025641025641024, "grad_norm": 20.412323559915432, "learning_rate": 4.571274123109606e-05, "loss": 1.2475, "step": 8 }, { "epoch": 0.46153846153846156, "grad_norm": 6.613572126150075, "learning_rate": 4.445089385796099e-05, "loss": 0.937, "step": 9 }, { "epoch": 0.5128205128205128, "grad_norm": 6.2563622738947045, "learning_rate": 4.3048902348863116e-05, "loss": 0.8008, "step": 10 }, { "epoch": 0.5641025641025641, "grad_norm": 7.2077871299442755, "learning_rate": 4.151686808475204e-05, "loss": 0.7078, "step": 11 }, { "epoch": 0.6153846153846154, "grad_norm": 2.815837188824988, "learning_rate": 3.986582940760717e-05, "loss": 0.5816, "step": 12 }, { "epoch": 0.6666666666666666, "grad_norm": 3.486576911271355, "learning_rate": 3.8107682088930794e-05, "loss": 0.4899, "step": 13 }, { "epoch": 0.717948717948718, "grad_norm": 2.149756304629691, "learning_rate": 3.6255093620441834e-05, "loss": 0.491, "step": 14 }, { "epoch": 0.7692307692307693, "grad_norm": 2.053499289405444, "learning_rate": 3.432141194450772e-05, "loss": 0.4575, "step": 15 }, { "epoch": 0.8205128205128205, "grad_norm": 1.395636933291467, "learning_rate": 3.232056928191376e-05, "loss": 0.3981, "step": 16 }, { "epoch": 0.8717948717948718, "grad_norm": 1.017073513763627, "learning_rate": 3.0266981749893157e-05, "loss": 0.3686, "step": 17 }, { "epoch": 0.9230769230769231, "grad_norm": 1.4739148894658656, "learning_rate": 2.8175445493671972e-05, "loss": 0.4269, "step": 18 }, { "epoch": 0.9743589743589743, "grad_norm": 1.3136690374942346, "learning_rate": 2.606103007990371e-05, "loss": 0.3809, "step": 19 }, { "epoch": 1.0256410256410255, "grad_norm": 1.5835385453547504, "learning_rate": 2.39389699200963e-05, "loss": 0.5439, "step": 20 }, { "epoch": 1.0769230769230769, "grad_norm": 0.8971814869025673, "learning_rate": 2.182455450632803e-05, "loss": 0.3104, "step": 21 }, { "epoch": 1.1282051282051282, "grad_norm": 0.8492410740355226, "learning_rate": 1.973301825010685e-05, "loss": 0.2981, "step": 22 }, { "epoch": 1.1794871794871795, "grad_norm": 0.740051068350938, "learning_rate": 1.7679430718086243e-05, "loss": 0.2724, "step": 23 }, { "epoch": 1.2307692307692308, "grad_norm": 0.6880281330130831, "learning_rate": 1.567858805549229e-05, "loss": 0.2526, "step": 24 }, { "epoch": 1.282051282051282, "grad_norm": 0.6787402589568466, "learning_rate": 1.3744906379558165e-05, "loss": 0.2757, "step": 25 }, { "epoch": 1.3333333333333333, "grad_norm": 0.67758309834235, "learning_rate": 1.1892317911069212e-05, "loss": 0.2696, "step": 26 }, { "epoch": 1.3846153846153846, "grad_norm": 0.674443102358302, "learning_rate": 1.0134170592392836e-05, "loss": 0.2805, "step": 27 }, { "epoch": 1.435897435897436, "grad_norm": 0.5633358189387688, "learning_rate": 8.483131915247968e-06, "loss": 0.2503, "step": 28 }, { "epoch": 1.4871794871794872, "grad_norm": 0.6197473329771566, "learning_rate": 6.951097651136889e-06, "loss": 0.2578, "step": 29 }, { "epoch": 1.5384615384615383, "grad_norm": 0.5580527713592528, "learning_rate": 5.549106142039018e-06, "loss": 0.2553, "step": 30 }, { "epoch": 1.5897435897435899, "grad_norm": 0.5149565392248769, "learning_rate": 4.2872587689039484e-06, "loss": 0.2405, "step": 31 }, { "epoch": 1.641025641025641, "grad_norm": 0.5766230877779212, "learning_rate": 3.1746471709702964e-06, "loss": 0.2889, "step": 32 }, { "epoch": 1.6923076923076923, "grad_norm": 0.5729826484566335, "learning_rate": 2.219287740296605e-06, "loss": 0.257, "step": 33 }, { "epoch": 1.7435897435897436, "grad_norm": 0.5287896884964262, "learning_rate": 1.428063863472895e-06, "loss": 0.2621, "step": 34 }, { "epoch": 1.7948717948717947, "grad_norm": 0.4759737147838452, "learning_rate": 8.066763266625282e-07, "loss": 0.2433, "step": 35 }, { "epoch": 1.8461538461538463, "grad_norm": 0.5048702889164741, "learning_rate": 3.5960224130728857e-07, "loss": 0.2462, "step": 36 }, { "epoch": 1.8974358974358974, "grad_norm": 0.4819356178933713, "learning_rate": 9.006278643683696e-08, "loss": 0.2415, "step": 37 }, { "epoch": 1.9487179487179487, "grad_norm": 0.43218352237890395, "learning_rate": 0.0, "loss": 0.2067, "step": 38 }, { "epoch": 1.9487179487179487, "step": 38, "total_flos": 6321437491200.0, "train_loss": 0.6674525247592675, "train_runtime": 535.3552, "train_samples_per_second": 1.143, "train_steps_per_second": 0.071 } ], "logging_steps": 1, "max_steps": 38, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6321437491200.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }