{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.3657457189463597, "eval_steps": 500, "global_step": 50000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.314914378927195e-06, "grad_norm": 2702.876220703125, "learning_rate": 3.6573769292663306e-10, "loss": 185.8854, "step": 1 }, { "epoch": 0.007314914378927195, "grad_norm": 25910.705078125, "learning_rate": 3.6573769292663306e-07, "loss": 48.434, "step": 1000 }, { "epoch": 0.01462982875785439, "grad_norm": 3.9086620807647705, "learning_rate": 7.314753858532661e-07, "loss": 17.9804, "step": 2000 }, { "epoch": 0.021944743136781585, "grad_norm": 17.556304931640625, "learning_rate": 1.097213078779899e-06, "loss": 10.5262, "step": 3000 }, { "epoch": 0.02925965751570878, "grad_norm": 4689.71240234375, "learning_rate": 1.4629507717065323e-06, "loss": 11.6651, "step": 4000 }, { "epoch": 0.03657457189463598, "grad_norm": 0.0065016308799386024, "learning_rate": 1.8286884646331652e-06, "loss": 9.4746, "step": 5000 }, { "epoch": 0.04388948627356317, "grad_norm": 0.060599055141210556, "learning_rate": 2.194426157559798e-06, "loss": 7.1303, "step": 6000 }, { "epoch": 0.051204400652490364, "grad_norm": 1028.9453125, "learning_rate": 2.560163850486431e-06, "loss": 8.917, "step": 7000 }, { "epoch": 0.05851931503141756, "grad_norm": 5.463726043701172, "learning_rate": 2.9259015434130645e-06, "loss": 8.9339, "step": 8000 }, { "epoch": 0.06583422941034475, "grad_norm": 0.3307730257511139, "learning_rate": 3.2916392363396975e-06, "loss": 8.746, "step": 9000 }, { "epoch": 0.07314914378927195, "grad_norm": 0.003347629914060235, "learning_rate": 3.6573769292663304e-06, "loss": 9.4711, "step": 10000 }, { "epoch": 0.08046405816819914, "grad_norm": 0.006155295763164759, "learning_rate": 4.023114622192964e-06, "loss": 7.647, "step": 11000 }, { "epoch": 0.08777897254712634, "grad_norm": 0.0019895241130143404, "learning_rate": 4.388852315119596e-06, "loss": 5.7327, "step": 12000 }, { "epoch": 0.09509388692605353, "grad_norm": 0.004527593031525612, "learning_rate": 4.75459000804623e-06, "loss": 5.5751, "step": 13000 }, { "epoch": 0.10240880130498073, "grad_norm": 0.009128883481025696, "learning_rate": 4.986629929451543e-06, "loss": 8.6148, "step": 14000 }, { "epoch": 0.10972371568390792, "grad_norm": 0.012683026492595673, "learning_rate": 4.9459914171462015e-06, "loss": 8.5558, "step": 15000 }, { "epoch": 0.11703863006283512, "grad_norm": 0.0028167981654405594, "learning_rate": 4.90535290484086e-06, "loss": 6.0433, "step": 16000 }, { "epoch": 0.1243535444417623, "grad_norm": 0.0011900264071300626, "learning_rate": 4.864714392535519e-06, "loss": 6.9084, "step": 17000 }, { "epoch": 0.1316684588206895, "grad_norm": 0.025524910539388657, "learning_rate": 4.824075880230177e-06, "loss": 6.7333, "step": 18000 }, { "epoch": 0.1389833731996167, "grad_norm": 0.027619725093245506, "learning_rate": 4.783437367924835e-06, "loss": 4.1436, "step": 19000 }, { "epoch": 0.1462982875785439, "grad_norm": 0.012698939070105553, "learning_rate": 4.742798855619494e-06, "loss": 5.397, "step": 20000 }, { "epoch": 0.15361320195747108, "grad_norm": 0.031142177060246468, "learning_rate": 4.702160343314152e-06, "loss": 5.5156, "step": 21000 }, { "epoch": 0.16092811633639828, "grad_norm": 0.0004189134924672544, "learning_rate": 4.66152183100881e-06, "loss": 4.8633, "step": 22000 }, { "epoch": 0.16824303071532548, "grad_norm": 0.0103899035602808, "learning_rate": 4.620883318703469e-06, "loss": 7.2146, "step": 23000 }, { "epoch": 0.17555794509425268, "grad_norm": 1158.8017578125, "learning_rate": 4.580244806398127e-06, "loss": 5.7667, "step": 24000 }, { "epoch": 0.18287285947317986, "grad_norm": 0.03399639576673508, "learning_rate": 4.5396062940927856e-06, "loss": 5.0472, "step": 25000 }, { "epoch": 0.19018777385210706, "grad_norm": 0.017644532024860382, "learning_rate": 4.4989677817874446e-06, "loss": 4.8188, "step": 26000 }, { "epoch": 0.19750268823103426, "grad_norm": 0.00010079160711029544, "learning_rate": 4.4583292694821035e-06, "loss": 5.7598, "step": 27000 }, { "epoch": 0.20481760260996146, "grad_norm": 0.009618501178920269, "learning_rate": 4.417690757176762e-06, "loss": 4.683, "step": 28000 }, { "epoch": 0.21213251698888866, "grad_norm": 0.018396975472569466, "learning_rate": 4.377052244871421e-06, "loss": 5.7816, "step": 29000 }, { "epoch": 0.21944743136781583, "grad_norm": 0.026549218222498894, "learning_rate": 4.336413732566079e-06, "loss": 5.5149, "step": 30000 }, { "epoch": 0.22676234574674303, "grad_norm": 0.01402178592979908, "learning_rate": 4.295775220260737e-06, "loss": 6.1021, "step": 31000 }, { "epoch": 0.23407726012567023, "grad_norm": 2950.190185546875, "learning_rate": 4.255136707955396e-06, "loss": 5.1742, "step": 32000 }, { "epoch": 0.24139217450459743, "grad_norm": 0.01243713591247797, "learning_rate": 4.214498195650054e-06, "loss": 4.8856, "step": 33000 }, { "epoch": 0.2487070888835246, "grad_norm": 0.00121857482008636, "learning_rate": 4.173859683344712e-06, "loss": 4.0296, "step": 34000 }, { "epoch": 0.25602200326245184, "grad_norm": 0.021528728306293488, "learning_rate": 4.133221171039371e-06, "loss": 3.7989, "step": 35000 }, { "epoch": 0.263336917641379, "grad_norm": 0.067794568836689, "learning_rate": 4.092582658734029e-06, "loss": 4.8373, "step": 36000 }, { "epoch": 0.2706518320203062, "grad_norm": 83.66200256347656, "learning_rate": 4.0519441464286876e-06, "loss": 3.2441, "step": 37000 }, { "epoch": 0.2779667463992334, "grad_norm": 0.00217541866004467, "learning_rate": 4.0113056341233466e-06, "loss": 3.5578, "step": 38000 }, { "epoch": 0.2852816607781606, "grad_norm": 0.0008728219545446336, "learning_rate": 3.970667121818005e-06, "loss": 2.6644, "step": 39000 }, { "epoch": 0.2925965751570878, "grad_norm": 0.027021408081054688, "learning_rate": 3.930028609512664e-06, "loss": 3.7778, "step": 40000 }, { "epoch": 0.299911489536015, "grad_norm": 0.001312136766500771, "learning_rate": 3.889390097207322e-06, "loss": 4.2509, "step": 41000 }, { "epoch": 0.30722640391494216, "grad_norm": 0.011145360767841339, "learning_rate": 3.84875158490198e-06, "loss": 4.6084, "step": 42000 }, { "epoch": 0.3145413182938694, "grad_norm": 289.0566711425781, "learning_rate": 3.8081130725966386e-06, "loss": 3.3176, "step": 43000 }, { "epoch": 0.32185623267279656, "grad_norm": 0.006871068850159645, "learning_rate": 3.767474560291297e-06, "loss": 3.0108, "step": 44000 }, { "epoch": 0.32917114705172373, "grad_norm": 0.002771923318505287, "learning_rate": 3.7268360479859557e-06, "loss": 3.5366, "step": 45000 }, { "epoch": 0.33648606143065096, "grad_norm": 0.0030192858539521694, "learning_rate": 3.686197535680614e-06, "loss": 3.3997, "step": 46000 }, { "epoch": 0.34380097580957814, "grad_norm": 3926.616943359375, "learning_rate": 3.6455590233752724e-06, "loss": 3.5042, "step": 47000 }, { "epoch": 0.35111589018850536, "grad_norm": 0.000544128124602139, "learning_rate": 3.604920511069931e-06, "loss": 2.9953, "step": 48000 }, { "epoch": 0.35843080456743254, "grad_norm": 14144.9677734375, "learning_rate": 3.564281998764589e-06, "loss": 2.9742, "step": 49000 }, { "epoch": 0.3657457189463597, "grad_norm": 0.0021343908738344908, "learning_rate": 3.5236434864592477e-06, "loss": 3.4145, "step": 50000 } ], "logging_steps": 1000, "max_steps": 136707, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50000, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }