mertunsal's picture
Add files using upload-large-folder tool
9197d8d verified
raw
history blame
9.49 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.3657457189463597,
"eval_steps": 500,
"global_step": 50000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 7.314914378927195e-06,
"grad_norm": 2702.876220703125,
"learning_rate": 3.6573769292663306e-10,
"loss": 185.8854,
"step": 1
},
{
"epoch": 0.007314914378927195,
"grad_norm": 25910.705078125,
"learning_rate": 3.6573769292663306e-07,
"loss": 48.434,
"step": 1000
},
{
"epoch": 0.01462982875785439,
"grad_norm": 3.9086620807647705,
"learning_rate": 7.314753858532661e-07,
"loss": 17.9804,
"step": 2000
},
{
"epoch": 0.021944743136781585,
"grad_norm": 17.556304931640625,
"learning_rate": 1.097213078779899e-06,
"loss": 10.5262,
"step": 3000
},
{
"epoch": 0.02925965751570878,
"grad_norm": 4689.71240234375,
"learning_rate": 1.4629507717065323e-06,
"loss": 11.6651,
"step": 4000
},
{
"epoch": 0.03657457189463598,
"grad_norm": 0.0065016308799386024,
"learning_rate": 1.8286884646331652e-06,
"loss": 9.4746,
"step": 5000
},
{
"epoch": 0.04388948627356317,
"grad_norm": 0.060599055141210556,
"learning_rate": 2.194426157559798e-06,
"loss": 7.1303,
"step": 6000
},
{
"epoch": 0.051204400652490364,
"grad_norm": 1028.9453125,
"learning_rate": 2.560163850486431e-06,
"loss": 8.917,
"step": 7000
},
{
"epoch": 0.05851931503141756,
"grad_norm": 5.463726043701172,
"learning_rate": 2.9259015434130645e-06,
"loss": 8.9339,
"step": 8000
},
{
"epoch": 0.06583422941034475,
"grad_norm": 0.3307730257511139,
"learning_rate": 3.2916392363396975e-06,
"loss": 8.746,
"step": 9000
},
{
"epoch": 0.07314914378927195,
"grad_norm": 0.003347629914060235,
"learning_rate": 3.6573769292663304e-06,
"loss": 9.4711,
"step": 10000
},
{
"epoch": 0.08046405816819914,
"grad_norm": 0.006155295763164759,
"learning_rate": 4.023114622192964e-06,
"loss": 7.647,
"step": 11000
},
{
"epoch": 0.08777897254712634,
"grad_norm": 0.0019895241130143404,
"learning_rate": 4.388852315119596e-06,
"loss": 5.7327,
"step": 12000
},
{
"epoch": 0.09509388692605353,
"grad_norm": 0.004527593031525612,
"learning_rate": 4.75459000804623e-06,
"loss": 5.5751,
"step": 13000
},
{
"epoch": 0.10240880130498073,
"grad_norm": 0.009128883481025696,
"learning_rate": 4.986629929451543e-06,
"loss": 8.6148,
"step": 14000
},
{
"epoch": 0.10972371568390792,
"grad_norm": 0.012683026492595673,
"learning_rate": 4.9459914171462015e-06,
"loss": 8.5558,
"step": 15000
},
{
"epoch": 0.11703863006283512,
"grad_norm": 0.0028167981654405594,
"learning_rate": 4.90535290484086e-06,
"loss": 6.0433,
"step": 16000
},
{
"epoch": 0.1243535444417623,
"grad_norm": 0.0011900264071300626,
"learning_rate": 4.864714392535519e-06,
"loss": 6.9084,
"step": 17000
},
{
"epoch": 0.1316684588206895,
"grad_norm": 0.025524910539388657,
"learning_rate": 4.824075880230177e-06,
"loss": 6.7333,
"step": 18000
},
{
"epoch": 0.1389833731996167,
"grad_norm": 0.027619725093245506,
"learning_rate": 4.783437367924835e-06,
"loss": 4.1436,
"step": 19000
},
{
"epoch": 0.1462982875785439,
"grad_norm": 0.012698939070105553,
"learning_rate": 4.742798855619494e-06,
"loss": 5.397,
"step": 20000
},
{
"epoch": 0.15361320195747108,
"grad_norm": 0.031142177060246468,
"learning_rate": 4.702160343314152e-06,
"loss": 5.5156,
"step": 21000
},
{
"epoch": 0.16092811633639828,
"grad_norm": 0.0004189134924672544,
"learning_rate": 4.66152183100881e-06,
"loss": 4.8633,
"step": 22000
},
{
"epoch": 0.16824303071532548,
"grad_norm": 0.0103899035602808,
"learning_rate": 4.620883318703469e-06,
"loss": 7.2146,
"step": 23000
},
{
"epoch": 0.17555794509425268,
"grad_norm": 1158.8017578125,
"learning_rate": 4.580244806398127e-06,
"loss": 5.7667,
"step": 24000
},
{
"epoch": 0.18287285947317986,
"grad_norm": 0.03399639576673508,
"learning_rate": 4.5396062940927856e-06,
"loss": 5.0472,
"step": 25000
},
{
"epoch": 0.19018777385210706,
"grad_norm": 0.017644532024860382,
"learning_rate": 4.4989677817874446e-06,
"loss": 4.8188,
"step": 26000
},
{
"epoch": 0.19750268823103426,
"grad_norm": 0.00010079160711029544,
"learning_rate": 4.4583292694821035e-06,
"loss": 5.7598,
"step": 27000
},
{
"epoch": 0.20481760260996146,
"grad_norm": 0.009618501178920269,
"learning_rate": 4.417690757176762e-06,
"loss": 4.683,
"step": 28000
},
{
"epoch": 0.21213251698888866,
"grad_norm": 0.018396975472569466,
"learning_rate": 4.377052244871421e-06,
"loss": 5.7816,
"step": 29000
},
{
"epoch": 0.21944743136781583,
"grad_norm": 0.026549218222498894,
"learning_rate": 4.336413732566079e-06,
"loss": 5.5149,
"step": 30000
},
{
"epoch": 0.22676234574674303,
"grad_norm": 0.01402178592979908,
"learning_rate": 4.295775220260737e-06,
"loss": 6.1021,
"step": 31000
},
{
"epoch": 0.23407726012567023,
"grad_norm": 2950.190185546875,
"learning_rate": 4.255136707955396e-06,
"loss": 5.1742,
"step": 32000
},
{
"epoch": 0.24139217450459743,
"grad_norm": 0.01243713591247797,
"learning_rate": 4.214498195650054e-06,
"loss": 4.8856,
"step": 33000
},
{
"epoch": 0.2487070888835246,
"grad_norm": 0.00121857482008636,
"learning_rate": 4.173859683344712e-06,
"loss": 4.0296,
"step": 34000
},
{
"epoch": 0.25602200326245184,
"grad_norm": 0.021528728306293488,
"learning_rate": 4.133221171039371e-06,
"loss": 3.7989,
"step": 35000
},
{
"epoch": 0.263336917641379,
"grad_norm": 0.067794568836689,
"learning_rate": 4.092582658734029e-06,
"loss": 4.8373,
"step": 36000
},
{
"epoch": 0.2706518320203062,
"grad_norm": 83.66200256347656,
"learning_rate": 4.0519441464286876e-06,
"loss": 3.2441,
"step": 37000
},
{
"epoch": 0.2779667463992334,
"grad_norm": 0.00217541866004467,
"learning_rate": 4.0113056341233466e-06,
"loss": 3.5578,
"step": 38000
},
{
"epoch": 0.2852816607781606,
"grad_norm": 0.0008728219545446336,
"learning_rate": 3.970667121818005e-06,
"loss": 2.6644,
"step": 39000
},
{
"epoch": 0.2925965751570878,
"grad_norm": 0.027021408081054688,
"learning_rate": 3.930028609512664e-06,
"loss": 3.7778,
"step": 40000
},
{
"epoch": 0.299911489536015,
"grad_norm": 0.001312136766500771,
"learning_rate": 3.889390097207322e-06,
"loss": 4.2509,
"step": 41000
},
{
"epoch": 0.30722640391494216,
"grad_norm": 0.011145360767841339,
"learning_rate": 3.84875158490198e-06,
"loss": 4.6084,
"step": 42000
},
{
"epoch": 0.3145413182938694,
"grad_norm": 289.0566711425781,
"learning_rate": 3.8081130725966386e-06,
"loss": 3.3176,
"step": 43000
},
{
"epoch": 0.32185623267279656,
"grad_norm": 0.006871068850159645,
"learning_rate": 3.767474560291297e-06,
"loss": 3.0108,
"step": 44000
},
{
"epoch": 0.32917114705172373,
"grad_norm": 0.002771923318505287,
"learning_rate": 3.7268360479859557e-06,
"loss": 3.5366,
"step": 45000
},
{
"epoch": 0.33648606143065096,
"grad_norm": 0.0030192858539521694,
"learning_rate": 3.686197535680614e-06,
"loss": 3.3997,
"step": 46000
},
{
"epoch": 0.34380097580957814,
"grad_norm": 3926.616943359375,
"learning_rate": 3.6455590233752724e-06,
"loss": 3.5042,
"step": 47000
},
{
"epoch": 0.35111589018850536,
"grad_norm": 0.000544128124602139,
"learning_rate": 3.604920511069931e-06,
"loss": 2.9953,
"step": 48000
},
{
"epoch": 0.35843080456743254,
"grad_norm": 14144.9677734375,
"learning_rate": 3.564281998764589e-06,
"loss": 2.9742,
"step": 49000
},
{
"epoch": 0.3657457189463597,
"grad_norm": 0.0021343908738344908,
"learning_rate": 3.5236434864592477e-06,
"loss": 3.4145,
"step": 50000
}
],
"logging_steps": 1000,
"max_steps": 136707,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50000,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}