{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 4149, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.024102193299590263, "grad_norm": 0.1189291775226593, "learning_rate": 2.0000000000000003e-06, "loss": 2.4342, "step": 100 }, { "epoch": 0.048204386599180526, "grad_norm": 0.13796567916870117, "learning_rate": 4.000000000000001e-06, "loss": 2.4322, "step": 200 }, { "epoch": 0.07230657989877079, "grad_norm": 0.19356612861156464, "learning_rate": 6e-06, "loss": 2.4096, "step": 300 }, { "epoch": 0.09640877319836105, "grad_norm": 0.2834133803844452, "learning_rate": 8.000000000000001e-06, "loss": 2.407, "step": 400 }, { "epoch": 0.12051096649795132, "grad_norm": 0.3791491389274597, "learning_rate": 1e-05, "loss": 2.3248, "step": 500 }, { "epoch": 0.14461315979754158, "grad_norm": 0.4329169988632202, "learning_rate": 1.2e-05, "loss": 2.2811, "step": 600 }, { "epoch": 0.16871535309713184, "grad_norm": 0.46981674432754517, "learning_rate": 1.4e-05, "loss": 2.305, "step": 700 }, { "epoch": 0.1928175463967221, "grad_norm": 0.5296657085418701, "learning_rate": 1.6000000000000003e-05, "loss": 2.3034, "step": 800 }, { "epoch": 0.21691973969631237, "grad_norm": 0.5638027191162109, "learning_rate": 1.8e-05, "loss": 2.2357, "step": 900 }, { "epoch": 0.24102193299590263, "grad_norm": 0.5962812900543213, "learning_rate": 2e-05, "loss": 2.2149, "step": 1000 }, { "epoch": 0.26512412629549287, "grad_norm": 0.6268550753593445, "learning_rate": 1.9950276214176393e-05, "loss": 2.1829, "step": 1100 }, { "epoch": 0.28922631959508316, "grad_norm": 0.7216062545776367, "learning_rate": 1.9801599347680883e-05, "loss": 2.1726, "step": 1200 }, { "epoch": 0.3133285128946734, "grad_norm": 0.6300708651542664, "learning_rate": 1.9555447955846792e-05, "loss": 2.1723, "step": 1300 }, { "epoch": 0.3374307061942637, "grad_norm": 0.7944507598876953, "learning_rate": 1.921426995449166e-05, "loss": 2.1556, "step": 1400 }, { "epoch": 0.3615328994938539, "grad_norm": 0.8489624261856079, "learning_rate": 1.8781458275988913e-05, "loss": 2.1597, "step": 1500 }, { "epoch": 0.3856350927934442, "grad_norm": 0.7369367480278015, "learning_rate": 1.826131712737932e-05, "loss": 2.1143, "step": 1600 }, { "epoch": 0.40973728609303445, "grad_norm": 0.7708919644355774, "learning_rate": 1.7659019186077174e-05, "loss": 2.1062, "step": 1700 }, { "epoch": 0.43383947939262474, "grad_norm": 0.8765296936035156, "learning_rate": 1.6980554158849546e-05, "loss": 2.1099, "step": 1800 }, { "epoch": 0.457941672692215, "grad_norm": 0.8643397092819214, "learning_rate": 1.6232669215636963e-05, "loss": 2.0938, "step": 1900 }, { "epoch": 0.48204386599180526, "grad_norm": 0.7830623984336853, "learning_rate": 1.5422801890586833e-05, "loss": 2.052, "step": 2000 }, { "epoch": 0.5061460592913956, "grad_norm": 1.1112042665481567, "learning_rate": 1.4559006117582424e-05, "loss": 2.1028, "step": 2100 }, { "epoch": 0.5302482525909857, "grad_norm": 1.3099650144577026, "learning_rate": 1.3649872135826173e-05, "loss": 2.0898, "step": 2200 }, { "epoch": 0.554350445890576, "grad_norm": 0.9661689400672913, "learning_rate": 1.2704441061996842e-05, "loss": 2.036, "step": 2300 }, { "epoch": 0.5784526391901663, "grad_norm": 0.8178704977035522, "learning_rate": 1.1732114978539648e-05, "loss": 2.0622, "step": 2400 }, { "epoch": 0.6025548324897566, "grad_norm": 0.8422364592552185, "learning_rate": 1.0742563432239503e-05, "loss": 2.0805, "step": 2500 }, { "epoch": 0.6266570257893468, "grad_norm": 0.980856716632843, "learning_rate": 9.745627272926332e-06, "loss": 2.0643, "step": 2600 }, { "epoch": 0.6507592190889371, "grad_norm": 0.8345414996147156, "learning_rate": 8.751220788613237e-06, "loss": 2.0526, "step": 2700 }, { "epoch": 0.6748614123885274, "grad_norm": 0.8472179770469666, "learning_rate": 7.769233110309735e-06, "loss": 2.0522, "step": 2800 }, { "epoch": 0.6989636056881177, "grad_norm": 0.8088483810424805, "learning_rate": 6.809429867015307e-06, "loss": 2.1132, "step": 2900 }, { "epoch": 0.7230657989877078, "grad_norm": 0.9939411878585815, "learning_rate": 5.881356068910429e-06, "loss": 2.0592, "step": 3000 }, { "epoch": 0.7471679922872981, "grad_norm": 1.0879184007644653, "learning_rate": 4.994241184548192e-06, "loss": 1.9955, "step": 3100 }, { "epoch": 0.7712701855868884, "grad_norm": 1.0115540027618408, "learning_rate": 4.1569073560307905e-06, "loss": 2.0433, "step": 3200 }, { "epoch": 0.7953723788864787, "grad_norm": 0.9652523398399353, "learning_rate": 3.3776816649486378e-06, "loss": 2.0405, "step": 3300 }, { "epoch": 0.8194745721860689, "grad_norm": 0.9643158316612244, "learning_rate": 2.6643133215760586e-06, "loss": 2.0431, "step": 3400 }, { "epoch": 0.8435767654856592, "grad_norm": 0.9680646657943726, "learning_rate": 2.0238966008568905e-06, "loss": 2.0118, "step": 3500 }, { "epoch": 0.8676789587852495, "grad_norm": 0.9375040531158447, "learning_rate": 1.4628002915629202e-06, "loss": 2.062, "step": 3600 }, { "epoch": 0.8917811520848398, "grad_norm": 0.9199696183204651, "learning_rate": 9.866043602360909e-07, "loss": 2.0442, "step": 3700 }, { "epoch": 0.91588334538443, "grad_norm": 0.7687466144561768, "learning_rate": 6.000444597762811e-07, "loss": 2.0474, "step": 3800 }, { "epoch": 0.9399855386840202, "grad_norm": 1.007958173751831, "learning_rate": 3.069648345231813e-07, "loss": 2.0126, "step": 3900 }, { "epoch": 0.9640877319836105, "grad_norm": 0.9788452982902527, "learning_rate": 1.1028009017986174e-07, "loss": 2.0637, "step": 4000 }, { "epoch": 0.9881899252832008, "grad_norm": 1.100760817527771, "learning_rate": 1.1946208766822066e-08, "loss": 2.0634, "step": 4100 }, { "epoch": 1.0, "step": 4149, "total_flos": 7.557299011584e+16, "train_loss": 2.140284038331314, "train_runtime": 1100.1009, "train_samples_per_second": 7.543, "train_steps_per_second": 3.771 } ], "logging_steps": 100, "max_steps": 4149, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.557299011584e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }