|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.998933522929257, |
|
"eval_steps": 500, |
|
"global_step": 6327, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004739898092191018, |
|
"grad_norm": 0.36863938554596193, |
|
"learning_rate": 9.999938362758687e-05, |
|
"loss": 2.2013, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.009479796184382036, |
|
"grad_norm": 0.500081400365978, |
|
"learning_rate": 9.999753452554404e-05, |
|
"loss": 2.0963, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.014219694276573054, |
|
"grad_norm": 0.4147328978072978, |
|
"learning_rate": 9.999445273946093e-05, |
|
"loss": 2.1148, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.018959592368764072, |
|
"grad_norm": 0.35930434339916095, |
|
"learning_rate": 9.999013834531869e-05, |
|
"loss": 2.1787, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02369949046095509, |
|
"grad_norm": 0.5423960911916873, |
|
"learning_rate": 9.998459144948825e-05, |
|
"loss": 2.1055, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.028439388553146108, |
|
"grad_norm": 0.40880961489218204, |
|
"learning_rate": 9.997781218872771e-05, |
|
"loss": 2.1723, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.033179286645337126, |
|
"grad_norm": 0.43317175193770346, |
|
"learning_rate": 9.99698007301791e-05, |
|
"loss": 2.0316, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.037919184737528144, |
|
"grad_norm": 0.37892996565691084, |
|
"learning_rate": 9.996055727136406e-05, |
|
"loss": 2.0171, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04265908282971916, |
|
"grad_norm": 0.43747110352943336, |
|
"learning_rate": 9.995008204017915e-05, |
|
"loss": 2.0887, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04739898092191018, |
|
"grad_norm": 0.39459537932523525, |
|
"learning_rate": 9.993837529489007e-05, |
|
"loss": 2.03, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0521388790141012, |
|
"grad_norm": 0.3839963642801344, |
|
"learning_rate": 9.992543732412544e-05, |
|
"loss": 1.9306, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.056878777106292217, |
|
"grad_norm": 0.40520894461995377, |
|
"learning_rate": 9.99112684468696e-05, |
|
"loss": 2.0425, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.061618675198483235, |
|
"grad_norm": 0.390483069303289, |
|
"learning_rate": 9.989586901245472e-05, |
|
"loss": 2.087, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.06635857329067425, |
|
"grad_norm": 0.4180445176279502, |
|
"learning_rate": 9.987923940055228e-05, |
|
"loss": 2.0679, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07109847138286526, |
|
"grad_norm": 0.49880430744694115, |
|
"learning_rate": 9.986138002116364e-05, |
|
"loss": 2.0628, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07583836947505629, |
|
"grad_norm": 0.4427166571522091, |
|
"learning_rate": 9.984229131460996e-05, |
|
"loss": 2.0681, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.0805782675672473, |
|
"grad_norm": 0.38471776319499607, |
|
"learning_rate": 9.982197375152129e-05, |
|
"loss": 2.0019, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.08531816565943832, |
|
"grad_norm": 0.4628072900469101, |
|
"learning_rate": 9.980042783282509e-05, |
|
"loss": 1.9909, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09005806375162934, |
|
"grad_norm": 0.3939907069527393, |
|
"learning_rate": 9.977765408973374e-05, |
|
"loss": 2.0713, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.09479796184382036, |
|
"grad_norm": 0.4184843314019155, |
|
"learning_rate": 9.97536530837315e-05, |
|
"loss": 1.9729, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09953785993601137, |
|
"grad_norm": 0.6336861212047761, |
|
"learning_rate": 9.97284254065607e-05, |
|
"loss": 2.0278, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1042777580282024, |
|
"grad_norm": 0.39737439720337403, |
|
"learning_rate": 9.970197168020713e-05, |
|
"loss": 2.0603, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.10901765612039341, |
|
"grad_norm": 0.4161581082817388, |
|
"learning_rate": 9.967429255688468e-05, |
|
"loss": 2.0308, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.11375755421258443, |
|
"grad_norm": 0.4122381540422074, |
|
"learning_rate": 9.964538871901923e-05, |
|
"loss": 2.1011, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.11849745230477544, |
|
"grad_norm": 0.40792411841005016, |
|
"learning_rate": 9.961526087923193e-05, |
|
"loss": 2.0535, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.12323735039696647, |
|
"grad_norm": 0.4298298302428991, |
|
"learning_rate": 9.958390978032157e-05, |
|
"loss": 1.9882, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.12797724848915748, |
|
"grad_norm": 0.3932772338211237, |
|
"learning_rate": 9.955133619524623e-05, |
|
"loss": 2.0703, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.1327171465813485, |
|
"grad_norm": 0.4304879913642714, |
|
"learning_rate": 9.951754092710429e-05, |
|
"loss": 2.0661, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.13745704467353953, |
|
"grad_norm": 0.3933942358750948, |
|
"learning_rate": 9.948252480911458e-05, |
|
"loss": 1.9941, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.14219694276573053, |
|
"grad_norm": 0.3876898041012675, |
|
"learning_rate": 9.944628870459587e-05, |
|
"loss": 2.001, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.14693684085792155, |
|
"grad_norm": 0.39971149840828696, |
|
"learning_rate": 9.940883350694556e-05, |
|
"loss": 1.9889, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.15167673895011258, |
|
"grad_norm": 0.4322868983437022, |
|
"learning_rate": 9.93701601396177e-05, |
|
"loss": 2.019, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1564166370423036, |
|
"grad_norm": 0.40679391432223605, |
|
"learning_rate": 9.933026955610014e-05, |
|
"loss": 2.0402, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.1611565351344946, |
|
"grad_norm": 0.40265148647862, |
|
"learning_rate": 9.928916273989108e-05, |
|
"loss": 1.9488, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.16589643322668562, |
|
"grad_norm": 0.4119893126018702, |
|
"learning_rate": 9.924684070447483e-05, |
|
"loss": 2.0143, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.17063633131887665, |
|
"grad_norm": 0.41446853317804655, |
|
"learning_rate": 9.92033044932968e-05, |
|
"loss": 1.9393, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.17537622941106767, |
|
"grad_norm": 0.4775440242382454, |
|
"learning_rate": 9.915855517973776e-05, |
|
"loss": 1.9899, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.18011612750325867, |
|
"grad_norm": 0.41303403265485017, |
|
"learning_rate": 9.91125938670874e-05, |
|
"loss": 2.0431, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1848560255954497, |
|
"grad_norm": 0.381415505593885, |
|
"learning_rate": 9.906542168851715e-05, |
|
"loss": 1.9778, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.18959592368764072, |
|
"grad_norm": 0.45202098843075295, |
|
"learning_rate": 9.901703980705219e-05, |
|
"loss": 2.0098, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.19433582177983175, |
|
"grad_norm": 0.38808197740496003, |
|
"learning_rate": 9.896744941554279e-05, |
|
"loss": 1.9467, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.19907571987202274, |
|
"grad_norm": 0.40860216072850924, |
|
"learning_rate": 9.891665173663492e-05, |
|
"loss": 2.0267, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.20381561796421377, |
|
"grad_norm": 0.4068044305771888, |
|
"learning_rate": 9.886464802274009e-05, |
|
"loss": 2.0872, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2085555160564048, |
|
"grad_norm": 0.43039544158069454, |
|
"learning_rate": 9.88114395560045e-05, |
|
"loss": 2.0094, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.21329541414859582, |
|
"grad_norm": 0.37668435282131046, |
|
"learning_rate": 9.875702764827737e-05, |
|
"loss": 2.0032, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.21803531224078682, |
|
"grad_norm": 0.4289799607032317, |
|
"learning_rate": 9.87014136410787e-05, |
|
"loss": 1.9535, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.22277521033297784, |
|
"grad_norm": 0.416501457655663, |
|
"learning_rate": 9.864459890556604e-05, |
|
"loss": 2.0246, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.22751510842516887, |
|
"grad_norm": 0.42709577377722036, |
|
"learning_rate": 9.858658484250082e-05, |
|
"loss": 1.9675, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.23225500651735986, |
|
"grad_norm": 0.38491345570315816, |
|
"learning_rate": 9.852737288221378e-05, |
|
"loss": 1.9768, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2369949046095509, |
|
"grad_norm": 0.4331220698731146, |
|
"learning_rate": 9.846696448456967e-05, |
|
"loss": 1.96, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2417348027017419, |
|
"grad_norm": 0.5157356350680703, |
|
"learning_rate": 9.840536113893129e-05, |
|
"loss": 2.0168, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.24647470079393294, |
|
"grad_norm": 0.42673885807943607, |
|
"learning_rate": 9.834256436412272e-05, |
|
"loss": 1.9192, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.25121459888612396, |
|
"grad_norm": 0.399056341637914, |
|
"learning_rate": 9.827857570839198e-05, |
|
"loss": 2.009, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.25595449697831496, |
|
"grad_norm": 0.38514488410609315, |
|
"learning_rate": 9.821339674937274e-05, |
|
"loss": 2.0237, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.26069439507050596, |
|
"grad_norm": 0.43535566879213633, |
|
"learning_rate": 9.814702909404547e-05, |
|
"loss": 1.9746, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.265434293162697, |
|
"grad_norm": 0.4277848981360601, |
|
"learning_rate": 9.807947437869788e-05, |
|
"loss": 2.0008, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.270174191254888, |
|
"grad_norm": 0.42806115487352164, |
|
"learning_rate": 9.801073426888447e-05, |
|
"loss": 2.0819, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.27491408934707906, |
|
"grad_norm": 0.36287005859609833, |
|
"learning_rate": 9.794081045938554e-05, |
|
"loss": 2.0256, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.27965398743927006, |
|
"grad_norm": 0.467970576527151, |
|
"learning_rate": 9.786970467416538e-05, |
|
"loss": 2.0221, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.28439388553146105, |
|
"grad_norm": 0.37993477630266503, |
|
"learning_rate": 9.779741866632977e-05, |
|
"loss": 1.9589, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2891337836236521, |
|
"grad_norm": 0.44198107142469956, |
|
"learning_rate": 9.772395421808274e-05, |
|
"loss": 2.0035, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.2938736817158431, |
|
"grad_norm": 0.44573447679188816, |
|
"learning_rate": 9.764931314068267e-05, |
|
"loss": 1.9909, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.2986135798080341, |
|
"grad_norm": 0.4731340699659092, |
|
"learning_rate": 9.757349727439759e-05, |
|
"loss": 2.0103, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.30335347790022515, |
|
"grad_norm": 0.3963283837850387, |
|
"learning_rate": 9.749650848845984e-05, |
|
"loss": 2.0639, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.30809337599241615, |
|
"grad_norm": 0.3884422717238912, |
|
"learning_rate": 9.741834868101998e-05, |
|
"loss": 2.0342, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3128332740846072, |
|
"grad_norm": 0.42096628799860736, |
|
"learning_rate": 9.733901977909997e-05, |
|
"loss": 2.0037, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.3175731721767982, |
|
"grad_norm": 0.3922372868315195, |
|
"learning_rate": 9.725852373854568e-05, |
|
"loss": 2.0327, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.3223130702689892, |
|
"grad_norm": 0.37724258160489493, |
|
"learning_rate": 9.717686254397866e-05, |
|
"loss": 1.9996, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.32705296836118025, |
|
"grad_norm": 0.36849429342184464, |
|
"learning_rate": 9.70940382087472e-05, |
|
"loss": 1.9789, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.33179286645337125, |
|
"grad_norm": 0.38001698944458373, |
|
"learning_rate": 9.701005277487673e-05, |
|
"loss": 1.8886, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.33653276454556225, |
|
"grad_norm": 0.4434394537121414, |
|
"learning_rate": 9.692490831301944e-05, |
|
"loss": 2.0773, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.3412726626377533, |
|
"grad_norm": 0.44409242659624243, |
|
"learning_rate": 9.683860692240321e-05, |
|
"loss": 1.9944, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.3460125607299443, |
|
"grad_norm": 0.3706038723114169, |
|
"learning_rate": 9.675115073077989e-05, |
|
"loss": 1.9399, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.35075245882213535, |
|
"grad_norm": 0.3775340444246396, |
|
"learning_rate": 9.666254189437286e-05, |
|
"loss": 2.0434, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.35549235691432635, |
|
"grad_norm": 0.39740898678838216, |
|
"learning_rate": 9.657278259782378e-05, |
|
"loss": 2.0483, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.36023225500651734, |
|
"grad_norm": 0.3856650140837026, |
|
"learning_rate": 9.648187505413886e-05, |
|
"loss": 1.9621, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.3649721530987084, |
|
"grad_norm": 0.49084336306431187, |
|
"learning_rate": 9.638982150463415e-05, |
|
"loss": 1.9878, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.3697120511908994, |
|
"grad_norm": 0.41318948101107866, |
|
"learning_rate": 9.629662421888039e-05, |
|
"loss": 2.0805, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.3744519492830904, |
|
"grad_norm": 0.402590356367594, |
|
"learning_rate": 9.620228549464703e-05, |
|
"loss": 2.0258, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.37919184737528144, |
|
"grad_norm": 0.4461694641117838, |
|
"learning_rate": 9.610680765784556e-05, |
|
"loss": 1.9692, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.38393174546747244, |
|
"grad_norm": 0.41581795351534184, |
|
"learning_rate": 9.601019306247215e-05, |
|
"loss": 2.022, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.3886716435596635, |
|
"grad_norm": 0.4182347418587252, |
|
"learning_rate": 9.591244409054965e-05, |
|
"loss": 1.9989, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.3934115416518545, |
|
"grad_norm": 0.36463111311757684, |
|
"learning_rate": 9.581356315206885e-05, |
|
"loss": 2.0483, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.3981514397440455, |
|
"grad_norm": 0.4636476781338481, |
|
"learning_rate": 9.571355268492907e-05, |
|
"loss": 1.9491, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.40289133783623654, |
|
"grad_norm": 0.43027600259738763, |
|
"learning_rate": 9.561241515487802e-05, |
|
"loss": 1.9423, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.40763123592842754, |
|
"grad_norm": 0.43322329785996827, |
|
"learning_rate": 9.551015305545104e-05, |
|
"loss": 1.9349, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.41237113402061853, |
|
"grad_norm": 0.3900423005352424, |
|
"learning_rate": 9.540676890790962e-05, |
|
"loss": 1.9571, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.4171110321128096, |
|
"grad_norm": 0.3736027589992883, |
|
"learning_rate": 9.53022652611792e-05, |
|
"loss": 2.033, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.4218509302050006, |
|
"grad_norm": 0.4412678924097936, |
|
"learning_rate": 9.519664469178638e-05, |
|
"loss": 1.9928, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.42659082829719164, |
|
"grad_norm": 0.36064586995797043, |
|
"learning_rate": 9.508990980379537e-05, |
|
"loss": 2.0181, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.43133072638938263, |
|
"grad_norm": 0.36982453028008294, |
|
"learning_rate": 9.498206322874381e-05, |
|
"loss": 2.0118, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.43607062448157363, |
|
"grad_norm": 0.4936789348648113, |
|
"learning_rate": 9.487310762557784e-05, |
|
"loss": 2.0388, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.4408105225737647, |
|
"grad_norm": 0.4192120475618224, |
|
"learning_rate": 9.476304568058657e-05, |
|
"loss": 2.0001, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.4455504206659557, |
|
"grad_norm": 0.4212248975591549, |
|
"learning_rate": 9.465188010733586e-05, |
|
"loss": 2.0464, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.4502903187581467, |
|
"grad_norm": 0.4111853146435081, |
|
"learning_rate": 9.453961364660143e-05, |
|
"loss": 2.0118, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.45503021685033773, |
|
"grad_norm": 0.3911083150496816, |
|
"learning_rate": 9.442624906630124e-05, |
|
"loss": 1.9256, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.45977011494252873, |
|
"grad_norm": 0.4275198886604283, |
|
"learning_rate": 9.431178916142731e-05, |
|
"loss": 2.0142, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.4645100130347197, |
|
"grad_norm": 0.41213645663674664, |
|
"learning_rate": 9.419623675397672e-05, |
|
"loss": 1.9863, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.4692499111269108, |
|
"grad_norm": 0.39744532831875506, |
|
"learning_rate": 9.407959469288214e-05, |
|
"loss": 1.963, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.4739898092191018, |
|
"grad_norm": 0.40358506493166846, |
|
"learning_rate": 9.396186585394153e-05, |
|
"loss": 1.9724, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.47872970731129283, |
|
"grad_norm": 0.3715075397009002, |
|
"learning_rate": 9.384305313974719e-05, |
|
"loss": 1.9564, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.4834696054034838, |
|
"grad_norm": 0.41249417731334614, |
|
"learning_rate": 9.372315947961434e-05, |
|
"loss": 2.0089, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.4882095034956748, |
|
"grad_norm": 0.4477075629260475, |
|
"learning_rate": 9.360218782950873e-05, |
|
"loss": 2.0249, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.4929494015878659, |
|
"grad_norm": 0.41335031918044873, |
|
"learning_rate": 9.34801411719739e-05, |
|
"loss": 2.0439, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.4976892996800569, |
|
"grad_norm": 0.4023689824634566, |
|
"learning_rate": 9.335702251605756e-05, |
|
"loss": 2.0278, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5024291977722479, |
|
"grad_norm": 0.37476123227339486, |
|
"learning_rate": 9.32328348972374e-05, |
|
"loss": 2.0854, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5071690958644389, |
|
"grad_norm": 0.3680109272331818, |
|
"learning_rate": 9.310758137734634e-05, |
|
"loss": 2.0505, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.5119089939566299, |
|
"grad_norm": 0.47590335433852127, |
|
"learning_rate": 9.298126504449697e-05, |
|
"loss": 1.9342, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5166488920488209, |
|
"grad_norm": 0.443747158773761, |
|
"learning_rate": 9.285388901300537e-05, |
|
"loss": 2.0338, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.5213887901410119, |
|
"grad_norm": 0.4300619230217585, |
|
"learning_rate": 9.272545642331443e-05, |
|
"loss": 1.9431, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.526128688233203, |
|
"grad_norm": 0.4068927208227842, |
|
"learning_rate": 9.259597044191636e-05, |
|
"loss": 1.9639, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.530868586325394, |
|
"grad_norm": 0.3904780080331756, |
|
"learning_rate": 9.246543426127463e-05, |
|
"loss": 2.044, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.535608484417585, |
|
"grad_norm": 0.4074988084895911, |
|
"learning_rate": 9.233385109974528e-05, |
|
"loss": 1.9209, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.540348382509776, |
|
"grad_norm": 0.48971289458578504, |
|
"learning_rate": 9.220122420149753e-05, |
|
"loss": 1.9405, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.545088280601967, |
|
"grad_norm": 0.4560990819156225, |
|
"learning_rate": 9.206755683643383e-05, |
|
"loss": 1.9754, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.5498281786941581, |
|
"grad_norm": 0.4953771996336736, |
|
"learning_rate": 9.193285230010923e-05, |
|
"loss": 1.9832, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.5545680767863491, |
|
"grad_norm": 0.452270837264993, |
|
"learning_rate": 9.179711391365016e-05, |
|
"loss": 2.0267, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.5593079748785401, |
|
"grad_norm": 0.38839940667413064, |
|
"learning_rate": 9.166034502367246e-05, |
|
"loss": 2.0303, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.5640478729707311, |
|
"grad_norm": 0.4434400621892702, |
|
"learning_rate": 9.152254900219899e-05, |
|
"loss": 2.019, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.5687877710629221, |
|
"grad_norm": 0.4265655972195879, |
|
"learning_rate": 9.138372924657638e-05, |
|
"loss": 1.9578, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5735276691551132, |
|
"grad_norm": 0.37712073893593084, |
|
"learning_rate": 9.124388917939135e-05, |
|
"loss": 1.9002, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.5782675672473042, |
|
"grad_norm": 0.3967821230664083, |
|
"learning_rate": 9.110303224838628e-05, |
|
"loss": 1.9982, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.5830074653394952, |
|
"grad_norm": 0.4225910574667248, |
|
"learning_rate": 9.096116192637424e-05, |
|
"loss": 1.9999, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.5877473634316862, |
|
"grad_norm": 0.46005143244561764, |
|
"learning_rate": 9.081828171115334e-05, |
|
"loss": 1.9269, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.5924872615238772, |
|
"grad_norm": 0.41650738683050376, |
|
"learning_rate": 9.067439512542048e-05, |
|
"loss": 2.0138, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5972271596160682, |
|
"grad_norm": 0.4595664788322495, |
|
"learning_rate": 9.052950571668457e-05, |
|
"loss": 1.8902, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6019670577082593, |
|
"grad_norm": 0.47181766838174233, |
|
"learning_rate": 9.038361705717897e-05, |
|
"loss": 2.0354, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.6067069558004503, |
|
"grad_norm": 0.4016620461236779, |
|
"learning_rate": 9.023673274377349e-05, |
|
"loss": 2.0428, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6114468538926413, |
|
"grad_norm": 0.44582424551905314, |
|
"learning_rate": 9.00888563978857e-05, |
|
"loss": 1.9205, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.6161867519848323, |
|
"grad_norm": 0.4731092970060822, |
|
"learning_rate": 8.993999166539155e-05, |
|
"loss": 1.9468, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6209266500770233, |
|
"grad_norm": 0.41403788063445784, |
|
"learning_rate": 8.979014221653569e-05, |
|
"loss": 1.967, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6256665481692144, |
|
"grad_norm": 0.3824681634104647, |
|
"learning_rate": 8.963931174584072e-05, |
|
"loss": 1.9764, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.6304064462614054, |
|
"grad_norm": 0.3979138111413701, |
|
"learning_rate": 8.94875039720163e-05, |
|
"loss": 2.0262, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.6351463443535964, |
|
"grad_norm": 0.41027150705022153, |
|
"learning_rate": 8.93347226378674e-05, |
|
"loss": 1.9379, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.6398862424457874, |
|
"grad_norm": 0.46333301444068553, |
|
"learning_rate": 8.9180971510202e-05, |
|
"loss": 1.9551, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.6446261405379784, |
|
"grad_norm": 0.39959859369206574, |
|
"learning_rate": 8.902625437973823e-05, |
|
"loss": 1.9199, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.6493660386301695, |
|
"grad_norm": 0.42731835258341894, |
|
"learning_rate": 8.887057506101096e-05, |
|
"loss": 2.0178, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.6541059367223605, |
|
"grad_norm": 0.43891265274307517, |
|
"learning_rate": 8.871393739227764e-05, |
|
"loss": 1.9369, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.6588458348145515, |
|
"grad_norm": 0.4314210574368562, |
|
"learning_rate": 8.855634523542384e-05, |
|
"loss": 2.0049, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.6635857329067425, |
|
"grad_norm": 0.44613138847149775, |
|
"learning_rate": 8.839780247586785e-05, |
|
"loss": 1.9509, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6683256309989335, |
|
"grad_norm": 0.4379460820834945, |
|
"learning_rate": 8.823831302246498e-05, |
|
"loss": 1.9541, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.6730655290911245, |
|
"grad_norm": 0.3682639471382051, |
|
"learning_rate": 8.807788080741124e-05, |
|
"loss": 2.0064, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.6778054271833156, |
|
"grad_norm": 0.3981445155765943, |
|
"learning_rate": 8.791650978614627e-05, |
|
"loss": 1.9151, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.6825453252755066, |
|
"grad_norm": 0.3868845773205047, |
|
"learning_rate": 8.77542039372559e-05, |
|
"loss": 2.0033, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.6872852233676976, |
|
"grad_norm": 0.4065050795968265, |
|
"learning_rate": 8.759096726237406e-05, |
|
"loss": 1.9333, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.6920251214598886, |
|
"grad_norm": 0.4019451177579478, |
|
"learning_rate": 8.742680378608405e-05, |
|
"loss": 1.9738, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.6967650195520796, |
|
"grad_norm": 0.40929290402886576, |
|
"learning_rate": 8.726171755581943e-05, |
|
"loss": 1.9054, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.7015049176442707, |
|
"grad_norm": 0.4521322208310143, |
|
"learning_rate": 8.709571264176409e-05, |
|
"loss": 2.038, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.7062448157364617, |
|
"grad_norm": 0.4152045328204035, |
|
"learning_rate": 8.692879313675201e-05, |
|
"loss": 2.0632, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.7109847138286527, |
|
"grad_norm": 0.4153887781497306, |
|
"learning_rate": 8.676096315616633e-05, |
|
"loss": 1.9658, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7157246119208437, |
|
"grad_norm": 0.4421939758182222, |
|
"learning_rate": 8.659222683783785e-05, |
|
"loss": 1.9318, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.7204645100130347, |
|
"grad_norm": 0.40964882006156955, |
|
"learning_rate": 8.642258834194306e-05, |
|
"loss": 1.9843, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.7252044081052257, |
|
"grad_norm": 0.4083908197791484, |
|
"learning_rate": 8.625205185090148e-05, |
|
"loss": 1.9828, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.7299443061974168, |
|
"grad_norm": 0.39713303306109243, |
|
"learning_rate": 8.608062156927267e-05, |
|
"loss": 1.9957, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.7346842042896078, |
|
"grad_norm": 0.3984748196137378, |
|
"learning_rate": 8.59083017236525e-05, |
|
"loss": 1.9756, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.7394241023817988, |
|
"grad_norm": 0.3801131175331665, |
|
"learning_rate": 8.57350965625689e-05, |
|
"loss": 2.0876, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.7441640004739898, |
|
"grad_norm": 0.40526485533564677, |
|
"learning_rate": 8.556101035637723e-05, |
|
"loss": 1.9273, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.7489038985661808, |
|
"grad_norm": 0.43256807999674307, |
|
"learning_rate": 8.538604739715487e-05, |
|
"loss": 1.9965, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.7536437966583719, |
|
"grad_norm": 0.4089571388848955, |
|
"learning_rate": 8.521021199859547e-05, |
|
"loss": 1.9838, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.7583836947505629, |
|
"grad_norm": 0.43989226476544846, |
|
"learning_rate": 8.503350849590261e-05, |
|
"loss": 2.0101, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.7631235928427539, |
|
"grad_norm": 0.4312349465343795, |
|
"learning_rate": 8.485594124568286e-05, |
|
"loss": 2.0024, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.7678634909349449, |
|
"grad_norm": 0.42870468778423404, |
|
"learning_rate": 8.467751462583837e-05, |
|
"loss": 1.9171, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.7726033890271359, |
|
"grad_norm": 0.37297491856173187, |
|
"learning_rate": 8.449823303545902e-05, |
|
"loss": 1.9234, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.777343287119327, |
|
"grad_norm": 0.43903627896277525, |
|
"learning_rate": 8.431810089471386e-05, |
|
"loss": 2.0138, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.782083185211518, |
|
"grad_norm": 0.4356441070614573, |
|
"learning_rate": 8.413712264474218e-05, |
|
"loss": 1.9822, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.786823083303709, |
|
"grad_norm": 0.42844869008890196, |
|
"learning_rate": 8.395530274754401e-05, |
|
"loss": 1.9615, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.7915629813959, |
|
"grad_norm": 0.442280918540681, |
|
"learning_rate": 8.377264568587012e-05, |
|
"loss": 1.9835, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.796302879488091, |
|
"grad_norm": 0.42858220049882395, |
|
"learning_rate": 8.358915596311143e-05, |
|
"loss": 1.9043, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.801042777580282, |
|
"grad_norm": 0.388683268775689, |
|
"learning_rate": 8.340483810318809e-05, |
|
"loss": 2.0451, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.8057826756724731, |
|
"grad_norm": 0.4116698984896444, |
|
"learning_rate": 8.321969665043785e-05, |
|
"loss": 1.9792, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8105225737646641, |
|
"grad_norm": 0.40384036708963345, |
|
"learning_rate": 8.303373616950408e-05, |
|
"loss": 1.8407, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.8152624718568551, |
|
"grad_norm": 0.4680015183031998, |
|
"learning_rate": 8.28469612452232e-05, |
|
"loss": 1.9616, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.8200023699490461, |
|
"grad_norm": 0.43443236620799985, |
|
"learning_rate": 8.265937648251162e-05, |
|
"loss": 1.9879, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.8247422680412371, |
|
"grad_norm": 0.4892981794701289, |
|
"learning_rate": 8.247098650625229e-05, |
|
"loss": 1.9988, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.8294821661334282, |
|
"grad_norm": 0.41120558715230104, |
|
"learning_rate": 8.228179596118055e-05, |
|
"loss": 2.0057, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.8342220642256192, |
|
"grad_norm": 0.3856884225256909, |
|
"learning_rate": 8.209180951176972e-05, |
|
"loss": 2.0345, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.8389619623178102, |
|
"grad_norm": 0.43262267182183567, |
|
"learning_rate": 8.190103184211606e-05, |
|
"loss": 2.0506, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.8437018604100012, |
|
"grad_norm": 0.46227543956491046, |
|
"learning_rate": 8.170946765582327e-05, |
|
"loss": 1.9537, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.8484417585021922, |
|
"grad_norm": 0.41122944892391, |
|
"learning_rate": 8.151712167588654e-05, |
|
"loss": 1.9481, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.8531816565943833, |
|
"grad_norm": 0.4762971181475547, |
|
"learning_rate": 8.13239986445761e-05, |
|
"loss": 1.969, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.8579215546865743, |
|
"grad_norm": 0.41348450657088276, |
|
"learning_rate": 8.113010332332032e-05, |
|
"loss": 2.0127, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.8626614527787653, |
|
"grad_norm": 0.41355376759860496, |
|
"learning_rate": 8.093544049258826e-05, |
|
"loss": 1.9378, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.8674013508709563, |
|
"grad_norm": 0.4739386141603482, |
|
"learning_rate": 8.074001495177187e-05, |
|
"loss": 1.9548, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.8721412489631473, |
|
"grad_norm": 0.4067937473126016, |
|
"learning_rate": 8.054383151906766e-05, |
|
"loss": 1.9588, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.8768811470553383, |
|
"grad_norm": 0.4603727127637402, |
|
"learning_rate": 8.034689503135783e-05, |
|
"loss": 1.9616, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.8816210451475294, |
|
"grad_norm": 0.404919540874673, |
|
"learning_rate": 8.014921034409115e-05, |
|
"loss": 1.9476, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.8863609432397204, |
|
"grad_norm": 0.39850400899429533, |
|
"learning_rate": 7.99507823311631e-05, |
|
"loss": 1.9603, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.8911008413319114, |
|
"grad_norm": 0.48693274229874695, |
|
"learning_rate": 7.97516158847958e-05, |
|
"loss": 2.0121, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.8958407394241024, |
|
"grad_norm": 0.45401122715232545, |
|
"learning_rate": 7.955171591541739e-05, |
|
"loss": 1.8593, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.9005806375162934, |
|
"grad_norm": 0.38605278944495364, |
|
"learning_rate": 7.935108735154094e-05, |
|
"loss": 1.9199, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.9053205356084845, |
|
"grad_norm": 0.4453838492498413, |
|
"learning_rate": 7.914973513964291e-05, |
|
"loss": 1.9354, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.9100604337006755, |
|
"grad_norm": 0.4123431078009058, |
|
"learning_rate": 7.894766424404126e-05, |
|
"loss": 1.9807, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.9148003317928665, |
|
"grad_norm": 0.43369573713775106, |
|
"learning_rate": 7.874487964677301e-05, |
|
"loss": 1.9707, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.9195402298850575, |
|
"grad_norm": 0.3949770503185179, |
|
"learning_rate": 7.854138634747145e-05, |
|
"loss": 1.9742, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.9242801279772485, |
|
"grad_norm": 0.4224215984268503, |
|
"learning_rate": 7.833718936324277e-05, |
|
"loss": 1.9465, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.9290200260694395, |
|
"grad_norm": 0.5228997588486322, |
|
"learning_rate": 7.813229372854251e-05, |
|
"loss": 1.9454, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.9337599241616306, |
|
"grad_norm": 0.42165180512522465, |
|
"learning_rate": 7.792670449505135e-05, |
|
"loss": 1.9175, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.9384998222538216, |
|
"grad_norm": 0.40378336800384856, |
|
"learning_rate": 7.772042673155055e-05, |
|
"loss": 1.9237, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.9432397203460126, |
|
"grad_norm": 0.45740238886085255, |
|
"learning_rate": 7.751346552379706e-05, |
|
"loss": 1.9752, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.9479796184382036, |
|
"grad_norm": 0.39149703066060726, |
|
"learning_rate": 7.730582597439799e-05, |
|
"loss": 1.98, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.9527195165303946, |
|
"grad_norm": 0.4198989958604622, |
|
"learning_rate": 7.709751320268499e-05, |
|
"loss": 1.9937, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.9574594146225857, |
|
"grad_norm": 0.45036655944797305, |
|
"learning_rate": 7.688853234458786e-05, |
|
"loss": 1.9439, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.9621993127147767, |
|
"grad_norm": 0.47886989965002774, |
|
"learning_rate": 7.667888855250806e-05, |
|
"loss": 1.8984, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.9669392108069677, |
|
"grad_norm": 0.4485436591345206, |
|
"learning_rate": 7.646858699519158e-05, |
|
"loss": 1.9997, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.9716791088991587, |
|
"grad_norm": 0.4089350286618743, |
|
"learning_rate": 7.625763285760154e-05, |
|
"loss": 2.0561, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.9764190069913496, |
|
"grad_norm": 0.5012148973934161, |
|
"learning_rate": 7.604603134079039e-05, |
|
"loss": 1.9108, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.9811589050835408, |
|
"grad_norm": 0.4193397192808331, |
|
"learning_rate": 7.583378766177163e-05, |
|
"loss": 2.0375, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.9858988031757318, |
|
"grad_norm": 0.3996742152514563, |
|
"learning_rate": 7.56209070533912e-05, |
|
"loss": 1.8992, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.9906387012679227, |
|
"grad_norm": 0.43312783729617976, |
|
"learning_rate": 7.540739476419847e-05, |
|
"loss": 2.0202, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.9953785993601137, |
|
"grad_norm": 0.47876561721756805, |
|
"learning_rate": 7.519325605831684e-05, |
|
"loss": 1.9258, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.0001184974523047, |
|
"grad_norm": 0.40845159679128945, |
|
"learning_rate": 7.497849621531396e-05, |
|
"loss": 1.8963, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.0048583955444959, |
|
"grad_norm": 0.4911320886031023, |
|
"learning_rate": 7.476312053007151e-05, |
|
"loss": 1.8763, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.0095982936366867, |
|
"grad_norm": 0.4341191300612264, |
|
"learning_rate": 7.454713431265475e-05, |
|
"loss": 1.9345, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.0143381917288778, |
|
"grad_norm": 0.44526984352662835, |
|
"learning_rate": 7.43305428881815e-05, |
|
"loss": 1.9666, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.019078089821069, |
|
"grad_norm": 0.45021419491727926, |
|
"learning_rate": 7.411335159669093e-05, |
|
"loss": 1.9683, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.0238179879132598, |
|
"grad_norm": 0.46367987121746707, |
|
"learning_rate": 7.389556579301186e-05, |
|
"loss": 1.884, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.028557886005451, |
|
"grad_norm": 0.518631039907863, |
|
"learning_rate": 7.367719084663074e-05, |
|
"loss": 1.8473, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.0332977840976418, |
|
"grad_norm": 0.4686244164357671, |
|
"learning_rate": 7.345823214155927e-05, |
|
"loss": 1.8894, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.038037682189833, |
|
"grad_norm": 0.5124536145999882, |
|
"learning_rate": 7.323869507620169e-05, |
|
"loss": 1.886, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.0427775802820238, |
|
"grad_norm": 0.428865165913033, |
|
"learning_rate": 7.30185850632216e-05, |
|
"loss": 1.8934, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.047517478374215, |
|
"grad_norm": 0.4575909980653946, |
|
"learning_rate": 7.27979075294086e-05, |
|
"loss": 1.8793, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.052257376466406, |
|
"grad_norm": 0.46819042427920937, |
|
"learning_rate": 7.257666791554448e-05, |
|
"loss": 1.9177, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.056997274558597, |
|
"grad_norm": 0.5869490097444697, |
|
"learning_rate": 7.2354871676269e-05, |
|
"loss": 1.8888, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.061737172650788, |
|
"grad_norm": 0.4407701363338049, |
|
"learning_rate": 7.213252427994547e-05, |
|
"loss": 1.9145, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.066477070742979, |
|
"grad_norm": 0.5471189926425418, |
|
"learning_rate": 7.1909631208526e-05, |
|
"loss": 1.8647, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.07121696883517, |
|
"grad_norm": 0.45247580903783674, |
|
"learning_rate": 7.168619795741616e-05, |
|
"loss": 1.8793, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.0759568669273611, |
|
"grad_norm": 0.5394937103937341, |
|
"learning_rate": 7.146223003533964e-05, |
|
"loss": 1.9394, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.080696765019552, |
|
"grad_norm": 0.5010981958648577, |
|
"learning_rate": 7.12377329642024e-05, |
|
"loss": 1.8009, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.0854366631117431, |
|
"grad_norm": 0.49455090224086273, |
|
"learning_rate": 7.101271227895646e-05, |
|
"loss": 1.9877, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.090176561203934, |
|
"grad_norm": 0.4487359249312413, |
|
"learning_rate": 7.07871735274636e-05, |
|
"loss": 1.8578, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.0949164592961251, |
|
"grad_norm": 0.5006725728639967, |
|
"learning_rate": 7.056112227035831e-05, |
|
"loss": 1.9142, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.0996563573883162, |
|
"grad_norm": 0.46840477309344347, |
|
"learning_rate": 7.033456408091103e-05, |
|
"loss": 1.9178, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.1043962554805071, |
|
"grad_norm": 0.44881264282080685, |
|
"learning_rate": 7.010750454489042e-05, |
|
"loss": 1.9011, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.1091361535726982, |
|
"grad_norm": 0.4914874135601711, |
|
"learning_rate": 6.987994926042588e-05, |
|
"loss": 1.8817, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.1138760516648891, |
|
"grad_norm": 0.4875786937414022, |
|
"learning_rate": 6.965190383786938e-05, |
|
"loss": 1.9151, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.1186159497570802, |
|
"grad_norm": 0.47374621253430516, |
|
"learning_rate": 6.942337389965722e-05, |
|
"loss": 1.8652, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.1233558478492713, |
|
"grad_norm": 0.45812614575538185, |
|
"learning_rate": 6.919436508017139e-05, |
|
"loss": 1.9191, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.1280957459414622, |
|
"grad_norm": 0.5233924389852819, |
|
"learning_rate": 6.896488302560062e-05, |
|
"loss": 1.8944, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.1328356440336533, |
|
"grad_norm": 0.4760349705385804, |
|
"learning_rate": 6.873493339380125e-05, |
|
"loss": 1.8896, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.1375755421258442, |
|
"grad_norm": 0.47170548205722757, |
|
"learning_rate": 6.850452185415763e-05, |
|
"loss": 1.8436, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.1423154402180353, |
|
"grad_norm": 0.4742928761569321, |
|
"learning_rate": 6.827365408744244e-05, |
|
"loss": 1.938, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.1470553383102264, |
|
"grad_norm": 0.5423850691494456, |
|
"learning_rate": 6.804233578567658e-05, |
|
"loss": 1.8889, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.1517952364024173, |
|
"grad_norm": 0.48227588856524584, |
|
"learning_rate": 6.781057265198885e-05, |
|
"loss": 1.9094, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.1565351344946084, |
|
"grad_norm": 0.45425361404028264, |
|
"learning_rate": 6.75783704004753e-05, |
|
"loss": 1.859, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.1612750325867993, |
|
"grad_norm": 0.4433613473826934, |
|
"learning_rate": 6.734573475605846e-05, |
|
"loss": 1.9084, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.1660149306789904, |
|
"grad_norm": 0.4943942467439202, |
|
"learning_rate": 6.711267145434603e-05, |
|
"loss": 1.9647, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.1707548287711815, |
|
"grad_norm": 0.4577985217898985, |
|
"learning_rate": 6.687918624148963e-05, |
|
"loss": 1.8903, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.1754947268633724, |
|
"grad_norm": 0.5864019689805202, |
|
"learning_rate": 6.664528487404298e-05, |
|
"loss": 1.8431, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.1802346249555635, |
|
"grad_norm": 0.4979542549244347, |
|
"learning_rate": 6.641097311882015e-05, |
|
"loss": 1.9381, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.1849745230477544, |
|
"grad_norm": 0.5142117151718176, |
|
"learning_rate": 6.617625675275317e-05, |
|
"loss": 1.8608, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.1897144211399455, |
|
"grad_norm": 0.5179927851112526, |
|
"learning_rate": 6.59411415627498e-05, |
|
"loss": 1.9493, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.1944543192321366, |
|
"grad_norm": 0.5221841655224025, |
|
"learning_rate": 6.570563334555068e-05, |
|
"loss": 1.8724, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.1991942173243275, |
|
"grad_norm": 0.4985837837212232, |
|
"learning_rate": 6.546973790758655e-05, |
|
"loss": 1.952, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.2039341154165186, |
|
"grad_norm": 0.5552319456240327, |
|
"learning_rate": 6.523346106483504e-05, |
|
"loss": 1.9397, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.2086740135087095, |
|
"grad_norm": 0.4769628041892156, |
|
"learning_rate": 6.499680864267725e-05, |
|
"loss": 2.0053, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.2134139116009006, |
|
"grad_norm": 0.4516518959319936, |
|
"learning_rate": 6.475978647575416e-05, |
|
"loss": 1.9402, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.2181538096930915, |
|
"grad_norm": 0.4913816447981876, |
|
"learning_rate": 6.452240040782276e-05, |
|
"loss": 1.8451, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.2228937077852826, |
|
"grad_norm": 0.4748765999127487, |
|
"learning_rate": 6.4284656291612e-05, |
|
"loss": 1.9117, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.2276336058774737, |
|
"grad_norm": 0.5114110285568767, |
|
"learning_rate": 6.404655998867848e-05, |
|
"loss": 1.8831, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.2323735039696646, |
|
"grad_norm": 0.47839985560769943, |
|
"learning_rate": 6.380811736926188e-05, |
|
"loss": 1.8627, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.2371134020618557, |
|
"grad_norm": 0.5355232832118345, |
|
"learning_rate": 6.356933431214034e-05, |
|
"loss": 1.9189, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.2418533001540466, |
|
"grad_norm": 0.4895001261750141, |
|
"learning_rate": 6.33302167044854e-05, |
|
"loss": 1.9699, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.2465931982462377, |
|
"grad_norm": 0.4635882938471385, |
|
"learning_rate": 6.309077044171694e-05, |
|
"loss": 1.8779, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.2513330963384286, |
|
"grad_norm": 0.45916609044978873, |
|
"learning_rate": 6.285100142735782e-05, |
|
"loss": 1.8527, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.2560729944306197, |
|
"grad_norm": 0.46784246908879684, |
|
"learning_rate": 6.261091557288826e-05, |
|
"loss": 1.8844, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.2608128925228108, |
|
"grad_norm": 0.5131345820024794, |
|
"learning_rate": 6.237051879760014e-05, |
|
"loss": 1.8402, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.2655527906150017, |
|
"grad_norm": 0.5766279369511716, |
|
"learning_rate": 6.21298170284511e-05, |
|
"loss": 1.8558, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.2702926887071928, |
|
"grad_norm": 0.48863073587665085, |
|
"learning_rate": 6.188881619991834e-05, |
|
"loss": 1.9337, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.2750325867993837, |
|
"grad_norm": 0.5958235159214345, |
|
"learning_rate": 6.164752225385235e-05, |
|
"loss": 1.9018, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.2797724848915748, |
|
"grad_norm": 0.5127854587716114, |
|
"learning_rate": 6.140594113933042e-05, |
|
"loss": 1.928, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.284512382983766, |
|
"grad_norm": 0.4918233056408275, |
|
"learning_rate": 6.116407881250994e-05, |
|
"loss": 1.9623, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.2892522810759568, |
|
"grad_norm": 0.4759408966884228, |
|
"learning_rate": 6.0921941236481505e-05, |
|
"loss": 1.876, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.293992179168148, |
|
"grad_norm": 0.49692255085585224, |
|
"learning_rate": 6.067953438112205e-05, |
|
"loss": 1.871, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.2987320772603388, |
|
"grad_norm": 0.51069268079758, |
|
"learning_rate": 6.043686422294747e-05, |
|
"loss": 1.9503, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.30347197535253, |
|
"grad_norm": 0.4848235028179103, |
|
"learning_rate": 6.019393674496543e-05, |
|
"loss": 1.9636, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.308211873444721, |
|
"grad_norm": 0.7269161906292443, |
|
"learning_rate": 5.995075793652775e-05, |
|
"loss": 1.8818, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.312951771536912, |
|
"grad_norm": 0.46011103384366614, |
|
"learning_rate": 5.9707333793182794e-05, |
|
"loss": 1.9123, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.317691669629103, |
|
"grad_norm": 0.5009880993886451, |
|
"learning_rate": 5.946367031652761e-05, |
|
"loss": 1.9407, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.3224315677212939, |
|
"grad_norm": 0.5049332736921734, |
|
"learning_rate": 5.921977351406004e-05, |
|
"loss": 1.8624, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.327171465813485, |
|
"grad_norm": 0.4984446750273935, |
|
"learning_rate": 5.8975649399030485e-05, |
|
"loss": 1.8407, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.331911363905676, |
|
"grad_norm": 0.5202629992326526, |
|
"learning_rate": 5.873130399029374e-05, |
|
"loss": 1.8723, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.336651261997867, |
|
"grad_norm": 0.57260787674711, |
|
"learning_rate": 5.8486743312160584e-05, |
|
"loss": 1.9077, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.341391160090058, |
|
"grad_norm": 0.47793956835922086, |
|
"learning_rate": 5.824197339424923e-05, |
|
"loss": 1.9855, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.346131058182249, |
|
"grad_norm": 0.4699288477951403, |
|
"learning_rate": 5.799700027133666e-05, |
|
"loss": 1.9131, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.35087095627444, |
|
"grad_norm": 0.504238497502292, |
|
"learning_rate": 5.7751829983209896e-05, |
|
"loss": 1.9438, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.3556108543666312, |
|
"grad_norm": 0.4814570049600418, |
|
"learning_rate": 5.750646857451701e-05, |
|
"loss": 1.9549, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.360350752458822, |
|
"grad_norm": 0.5038793494327912, |
|
"learning_rate": 5.726092209461814e-05, |
|
"loss": 1.9016, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.3650906505510132, |
|
"grad_norm": 0.5240318677978467, |
|
"learning_rate": 5.701519659743636e-05, |
|
"loss": 1.9323, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.369830548643204, |
|
"grad_norm": 0.5135642745972475, |
|
"learning_rate": 5.6769298141308345e-05, |
|
"loss": 1.8633, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.3745704467353952, |
|
"grad_norm": 0.5115968529507217, |
|
"learning_rate": 5.652323278883511e-05, |
|
"loss": 1.8486, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.3793103448275863, |
|
"grad_norm": 0.4973184073827783, |
|
"learning_rate": 5.6277006606732465e-05, |
|
"loss": 1.9067, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.3840502429197772, |
|
"grad_norm": 0.48576803898302945, |
|
"learning_rate": 5.603062566568144e-05, |
|
"loss": 1.9167, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.3887901410119683, |
|
"grad_norm": 0.532613823404453, |
|
"learning_rate": 5.5784096040178624e-05, |
|
"loss": 1.916, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.3935300391041592, |
|
"grad_norm": 0.5402345956070669, |
|
"learning_rate": 5.5537423808386457e-05, |
|
"loss": 1.9193, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.3982699371963503, |
|
"grad_norm": 0.4920153790997806, |
|
"learning_rate": 5.5290615051983276e-05, |
|
"loss": 1.8214, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.4030098352885414, |
|
"grad_norm": 0.5305053717830343, |
|
"learning_rate": 5.504367585601342e-05, |
|
"loss": 1.8724, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.4077497333807323, |
|
"grad_norm": 0.5348665608450567, |
|
"learning_rate": 5.479661230873723e-05, |
|
"loss": 1.9576, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.4124896314729234, |
|
"grad_norm": 0.5212184732874925, |
|
"learning_rate": 5.4549430501480895e-05, |
|
"loss": 1.9409, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.4172295295651143, |
|
"grad_norm": 0.513803010422433, |
|
"learning_rate": 5.43021365284863e-05, |
|
"loss": 1.8691, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.4219694276573054, |
|
"grad_norm": 0.5405731422319697, |
|
"learning_rate": 5.405473648676074e-05, |
|
"loss": 1.9071, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.4267093257494965, |
|
"grad_norm": 0.5828580104321831, |
|
"learning_rate": 5.380723647592668e-05, |
|
"loss": 1.8781, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.4314492238416874, |
|
"grad_norm": 0.4730373307838654, |
|
"learning_rate": 5.3559642598071244e-05, |
|
"loss": 1.9514, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.4361891219338785, |
|
"grad_norm": 0.5098706245647135, |
|
"learning_rate": 5.3311960957595885e-05, |
|
"loss": 1.9019, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.4409290200260694, |
|
"grad_norm": 0.4902558604014986, |
|
"learning_rate": 5.306419766106582e-05, |
|
"loss": 1.8003, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.4456689181182605, |
|
"grad_norm": 0.5662981198334492, |
|
"learning_rate": 5.2816358817059483e-05, |
|
"loss": 1.9584, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.4504088162104516, |
|
"grad_norm": 0.5080795735549143, |
|
"learning_rate": 5.2568450536017946e-05, |
|
"loss": 1.8299, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.4551487143026425, |
|
"grad_norm": 0.4883320170692768, |
|
"learning_rate": 5.23204789300942e-05, |
|
"loss": 1.8948, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.4598886123948336, |
|
"grad_norm": 0.5018665885085004, |
|
"learning_rate": 5.207245011300256e-05, |
|
"loss": 1.9096, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.4646285104870245, |
|
"grad_norm": 0.49985987707909735, |
|
"learning_rate": 5.182437019986781e-05, |
|
"loss": 1.8725, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.4693684085792156, |
|
"grad_norm": 0.5501802725606001, |
|
"learning_rate": 5.157624530707457e-05, |
|
"loss": 1.852, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.4741083066714067, |
|
"grad_norm": 0.5050415458131547, |
|
"learning_rate": 5.132808155211637e-05, |
|
"loss": 1.9234, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.4788482047635976, |
|
"grad_norm": 0.5388328369977669, |
|
"learning_rate": 5.107988505344493e-05, |
|
"loss": 1.8503, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.4835881028557887, |
|
"grad_norm": 0.5294932998067775, |
|
"learning_rate": 5.083166193031924e-05, |
|
"loss": 1.8602, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.4883280009479796, |
|
"grad_norm": 0.5081432892581731, |
|
"learning_rate": 5.058341830265473e-05, |
|
"loss": 1.8916, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.4930678990401707, |
|
"grad_norm": 0.48231454449779565, |
|
"learning_rate": 5.033516029087231e-05, |
|
"loss": 1.9268, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.4978077971323618, |
|
"grad_norm": 0.5031248301603529, |
|
"learning_rate": 5.008689401574762e-05, |
|
"loss": 1.8619, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.5025476952245527, |
|
"grad_norm": 0.48955254310210605, |
|
"learning_rate": 4.983862559825994e-05, |
|
"loss": 1.9342, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.5072875933167436, |
|
"grad_norm": 0.5786990144175583, |
|
"learning_rate": 4.959036115944146e-05, |
|
"loss": 1.9487, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.5120274914089347, |
|
"grad_norm": 0.5204059056090741, |
|
"learning_rate": 4.93421068202262e-05, |
|
"loss": 1.9237, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.5167673895011258, |
|
"grad_norm": 0.5063131987653341, |
|
"learning_rate": 4.909386870129921e-05, |
|
"loss": 1.9752, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.5215072875933169, |
|
"grad_norm": 0.48289993909064316, |
|
"learning_rate": 4.884565292294563e-05, |
|
"loss": 1.8891, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.5262471856855078, |
|
"grad_norm": 0.5172395191973475, |
|
"learning_rate": 4.859746560489979e-05, |
|
"loss": 1.8907, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.5309870837776987, |
|
"grad_norm": 0.4807916914066212, |
|
"learning_rate": 4.834931286619432e-05, |
|
"loss": 1.9074, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.5357269818698898, |
|
"grad_norm": 0.5144939695987174, |
|
"learning_rate": 4.810120082500934e-05, |
|
"loss": 1.8338, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.5404668799620809, |
|
"grad_norm": 0.5199756044880577, |
|
"learning_rate": 4.785313559852156e-05, |
|
"loss": 1.965, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.545206778054272, |
|
"grad_norm": 0.5415928562917922, |
|
"learning_rate": 4.7605123302753433e-05, |
|
"loss": 1.8472, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.5499466761464629, |
|
"grad_norm": 0.5335132590972799, |
|
"learning_rate": 4.735717005242248e-05, |
|
"loss": 1.8558, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.5546865742386538, |
|
"grad_norm": 0.5581108907205053, |
|
"learning_rate": 4.710928196079042e-05, |
|
"loss": 1.8794, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.5594264723308449, |
|
"grad_norm": 0.5335645184315633, |
|
"learning_rate": 4.6861465139512475e-05, |
|
"loss": 1.8271, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.564166370423036, |
|
"grad_norm": 0.5470177997128685, |
|
"learning_rate": 4.661372569848678e-05, |
|
"loss": 1.8935, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.568906268515227, |
|
"grad_norm": 0.5362519757955545, |
|
"learning_rate": 4.636606974570361e-05, |
|
"loss": 1.8072, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.573646166607418, |
|
"grad_norm": 0.6040810957613818, |
|
"learning_rate": 4.611850338709482e-05, |
|
"loss": 1.7864, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.5783860646996088, |
|
"grad_norm": 0.5318403452991018, |
|
"learning_rate": 4.5871032726383386e-05, |
|
"loss": 1.8524, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.5831259627918, |
|
"grad_norm": 0.5512446332300014, |
|
"learning_rate": 4.562366386493286e-05, |
|
"loss": 1.8972, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.587865860883991, |
|
"grad_norm": 0.5083043080271707, |
|
"learning_rate": 4.537640290159688e-05, |
|
"loss": 1.7909, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.5926057589761822, |
|
"grad_norm": 0.516558139348224, |
|
"learning_rate": 4.512925593256895e-05, |
|
"loss": 1.9006, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.597345657068373, |
|
"grad_norm": 0.5406712324925647, |
|
"learning_rate": 4.4882229051232e-05, |
|
"loss": 1.9456, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.602085555160564, |
|
"grad_norm": 0.5537236012465999, |
|
"learning_rate": 4.463532834800825e-05, |
|
"loss": 1.8696, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.606825453252755, |
|
"grad_norm": 0.5501268633544832, |
|
"learning_rate": 4.438855991020896e-05, |
|
"loss": 1.9089, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.6115653513449462, |
|
"grad_norm": 0.5642376324584947, |
|
"learning_rate": 4.414192982188446e-05, |
|
"loss": 1.868, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.616305249437137, |
|
"grad_norm": 0.49603254737837815, |
|
"learning_rate": 4.3895444163674006e-05, |
|
"loss": 1.9261, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.6210451475293282, |
|
"grad_norm": 0.5264212888797052, |
|
"learning_rate": 4.364910901265606e-05, |
|
"loss": 1.9271, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.625785045621519, |
|
"grad_norm": 0.5165427594444576, |
|
"learning_rate": 4.340293044219825e-05, |
|
"loss": 1.8798, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.6305249437137102, |
|
"grad_norm": 0.5111756681074762, |
|
"learning_rate": 4.315691452180777e-05, |
|
"loss": 1.8821, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.6352648418059013, |
|
"grad_norm": 0.5353729238490614, |
|
"learning_rate": 4.2911067316981656e-05, |
|
"loss": 1.9193, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.6400047398980921, |
|
"grad_norm": 0.5427362289483532, |
|
"learning_rate": 4.2665394889057325e-05, |
|
"loss": 1.8648, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.6447446379902833, |
|
"grad_norm": 0.5316532712452083, |
|
"learning_rate": 4.2419903295063045e-05, |
|
"loss": 1.8696, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.6494845360824741, |
|
"grad_norm": 0.5445515739019248, |
|
"learning_rate": 4.2174598587568706e-05, |
|
"loss": 1.7773, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.6542244341746652, |
|
"grad_norm": 0.515985891781636, |
|
"learning_rate": 4.192948681453645e-05, |
|
"loss": 1.9528, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.6589643322668564, |
|
"grad_norm": 0.533497568011406, |
|
"learning_rate": 4.168457401917169e-05, |
|
"loss": 1.9089, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.6637042303590472, |
|
"grad_norm": 0.5034380410666982, |
|
"learning_rate": 4.1439866239774065e-05, |
|
"loss": 1.902, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.6684441284512384, |
|
"grad_norm": 0.5008886693586585, |
|
"learning_rate": 4.119536950958853e-05, |
|
"loss": 1.8597, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.6731840265434292, |
|
"grad_norm": 0.5042866133180605, |
|
"learning_rate": 4.095108985665668e-05, |
|
"loss": 1.941, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.6779239246356203, |
|
"grad_norm": 0.4894456961892347, |
|
"learning_rate": 4.070703330366809e-05, |
|
"loss": 1.8749, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.6826638227278115, |
|
"grad_norm": 0.5304927617260963, |
|
"learning_rate": 4.0463205867811834e-05, |
|
"loss": 1.9169, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.6874037208200023, |
|
"grad_norm": 0.5192399220515885, |
|
"learning_rate": 4.0219613560628074e-05, |
|
"loss": 1.8853, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.6921436189121932, |
|
"grad_norm": 0.5436581114459818, |
|
"learning_rate": 3.997626238785997e-05, |
|
"loss": 1.9093, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.6968835170043843, |
|
"grad_norm": 0.5671093634463978, |
|
"learning_rate": 3.973315834930549e-05, |
|
"loss": 1.8667, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.7016234150965754, |
|
"grad_norm": 0.5505401718757482, |
|
"learning_rate": 3.949030743866955e-05, |
|
"loss": 1.8701, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.7063633131887666, |
|
"grad_norm": 0.5107784655812311, |
|
"learning_rate": 3.924771564341621e-05, |
|
"loss": 1.8796, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.7111032112809574, |
|
"grad_norm": 0.5123424894974382, |
|
"learning_rate": 3.900538894462112e-05, |
|
"loss": 1.9345, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.7158431093731483, |
|
"grad_norm": 0.5975803333556319, |
|
"learning_rate": 3.876333331682394e-05, |
|
"loss": 1.9071, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.7205830074653394, |
|
"grad_norm": 0.5607215795184285, |
|
"learning_rate": 3.8521554727881115e-05, |
|
"loss": 1.8444, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.7253229055575305, |
|
"grad_norm": 0.5812681320546813, |
|
"learning_rate": 3.828005913881876e-05, |
|
"loss": 1.8783, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.7300628036497216, |
|
"grad_norm": 0.5809996822930421, |
|
"learning_rate": 3.803885250368562e-05, |
|
"loss": 1.8667, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.7348027017419125, |
|
"grad_norm": 0.5264379258394054, |
|
"learning_rate": 3.7797940769406324e-05, |
|
"loss": 1.8832, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.7395425998341034, |
|
"grad_norm": 0.5452547674401557, |
|
"learning_rate": 3.755732987563476e-05, |
|
"loss": 1.9126, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.7442824979262945, |
|
"grad_norm": 0.5573756045226962, |
|
"learning_rate": 3.731702575460763e-05, |
|
"loss": 1.9267, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.7490223960184856, |
|
"grad_norm": 0.5891329270301621, |
|
"learning_rate": 3.707703433099815e-05, |
|
"loss": 1.8927, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.7537622941106767, |
|
"grad_norm": 0.5379354015536967, |
|
"learning_rate": 3.683736152177005e-05, |
|
"loss": 1.8829, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.7585021922028676, |
|
"grad_norm": 0.584902744080287, |
|
"learning_rate": 3.659801323603163e-05, |
|
"loss": 1.9032, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.7632420902950585, |
|
"grad_norm": 0.47271945766863005, |
|
"learning_rate": 3.63589953748901e-05, |
|
"loss": 1.8634, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.7679819883872496, |
|
"grad_norm": 0.5602358756096469, |
|
"learning_rate": 3.612031383130612e-05, |
|
"loss": 1.8436, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.7727218864794407, |
|
"grad_norm": 0.5171084893952771, |
|
"learning_rate": 3.5881974489948456e-05, |
|
"loss": 1.8279, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.7774617845716318, |
|
"grad_norm": 0.5085114117110985, |
|
"learning_rate": 3.564398322704887e-05, |
|
"loss": 1.8842, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.7822016826638227, |
|
"grad_norm": 0.5395255555244833, |
|
"learning_rate": 3.5406345910257346e-05, |
|
"loss": 1.8974, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.7869415807560136, |
|
"grad_norm": 0.5256917642696852, |
|
"learning_rate": 3.5169068398497344e-05, |
|
"loss": 1.9247, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.7916814788482047, |
|
"grad_norm": 0.5297510632715654, |
|
"learning_rate": 3.493215654182134e-05, |
|
"loss": 1.8941, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.7964213769403958, |
|
"grad_norm": 0.4887292770108947, |
|
"learning_rate": 3.4695616181266674e-05, |
|
"loss": 1.8662, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.801161275032587, |
|
"grad_norm": 0.605286928037954, |
|
"learning_rate": 3.445945314871144e-05, |
|
"loss": 1.7946, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.8059011731247778, |
|
"grad_norm": 0.5534598174424521, |
|
"learning_rate": 3.422367326673079e-05, |
|
"loss": 1.9319, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.8106410712169687, |
|
"grad_norm": 0.516541325820194, |
|
"learning_rate": 3.398828234845331e-05, |
|
"loss": 1.9102, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.8153809693091598, |
|
"grad_norm": 0.5316375380294128, |
|
"learning_rate": 3.3753286197417714e-05, |
|
"loss": 1.9137, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.820120867401351, |
|
"grad_norm": 0.5048711282201915, |
|
"learning_rate": 3.3518690607429784e-05, |
|
"loss": 1.8643, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.824860765493542, |
|
"grad_norm": 0.5407400572506997, |
|
"learning_rate": 3.3284501362419566e-05, |
|
"loss": 1.8524, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.829600663585733, |
|
"grad_norm": 0.5444240928370307, |
|
"learning_rate": 3.305072423629862e-05, |
|
"loss": 1.9604, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.8343405616779238, |
|
"grad_norm": 0.5259735881080222, |
|
"learning_rate": 3.281736499281783e-05, |
|
"loss": 1.8699, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.839080459770115, |
|
"grad_norm": 0.5412391021904834, |
|
"learning_rate": 3.2584429385425163e-05, |
|
"loss": 1.9233, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.843820357862306, |
|
"grad_norm": 0.581528749881215, |
|
"learning_rate": 3.235192315712394e-05, |
|
"loss": 1.9037, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.8485602559544971, |
|
"grad_norm": 0.486599214527775, |
|
"learning_rate": 3.211985204033114e-05, |
|
"loss": 1.881, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.853300154046688, |
|
"grad_norm": 0.5732281840924196, |
|
"learning_rate": 3.188822175673618e-05, |
|
"loss": 1.9289, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.858040052138879, |
|
"grad_norm": 0.5393218742500727, |
|
"learning_rate": 3.165703801715969e-05, |
|
"loss": 1.8178, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.86277995023107, |
|
"grad_norm": 0.5317421200650526, |
|
"learning_rate": 3.142630652141286e-05, |
|
"loss": 1.7813, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.8675198483232611, |
|
"grad_norm": 0.4707578563318653, |
|
"learning_rate": 3.119603295815685e-05, |
|
"loss": 1.8928, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.8722597464154522, |
|
"grad_norm": 0.503217338566424, |
|
"learning_rate": 3.096622300476253e-05, |
|
"loss": 1.9702, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.8769996445076431, |
|
"grad_norm": 0.5191335631232252, |
|
"learning_rate": 3.07368823271705e-05, |
|
"loss": 1.8832, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.881739542599834, |
|
"grad_norm": 0.5929718795388419, |
|
"learning_rate": 3.050801657975147e-05, |
|
"loss": 1.9705, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.8864794406920251, |
|
"grad_norm": 0.5203449537199084, |
|
"learning_rate": 3.0279631405166754e-05, |
|
"loss": 1.8005, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.8912193387842162, |
|
"grad_norm": 0.6060740003713215, |
|
"learning_rate": 3.0051732434229184e-05, |
|
"loss": 1.8802, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.895959236876407, |
|
"grad_norm": 0.5254251326665124, |
|
"learning_rate": 2.9824325285764332e-05, |
|
"loss": 1.9063, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.9006991349685982, |
|
"grad_norm": 0.5412654814841995, |
|
"learning_rate": 2.9597415566471874e-05, |
|
"loss": 1.7974, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 1.905439033060789, |
|
"grad_norm": 0.6096977687423671, |
|
"learning_rate": 2.9371008870787474e-05, |
|
"loss": 1.8789, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.9101789311529802, |
|
"grad_norm": 0.5751076752952912, |
|
"learning_rate": 2.914511078074481e-05, |
|
"loss": 1.9147, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 1.9149188292451713, |
|
"grad_norm": 0.5596872085857021, |
|
"learning_rate": 2.891972686583791e-05, |
|
"loss": 1.8939, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.9196587273373622, |
|
"grad_norm": 0.5205001238706851, |
|
"learning_rate": 2.8694862682883866e-05, |
|
"loss": 1.8675, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.9243986254295533, |
|
"grad_norm": 0.6060966652232279, |
|
"learning_rate": 2.8470523775885816e-05, |
|
"loss": 1.8542, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.9291385235217442, |
|
"grad_norm": 0.5060927602134601, |
|
"learning_rate": 2.824671567589635e-05, |
|
"loss": 1.9095, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 1.9338784216139353, |
|
"grad_norm": 0.527071756794979, |
|
"learning_rate": 2.8023443900880984e-05, |
|
"loss": 1.8144, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 1.9386183197061264, |
|
"grad_norm": 0.6186591144971271, |
|
"learning_rate": 2.780071395558222e-05, |
|
"loss": 1.9328, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 1.9433582177983173, |
|
"grad_norm": 0.5084958011646354, |
|
"learning_rate": 2.757853133138382e-05, |
|
"loss": 1.8292, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.9480981158905084, |
|
"grad_norm": 0.5671058444452819, |
|
"learning_rate": 2.7356901506175426e-05, |
|
"loss": 1.8621, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 1.9528380139826993, |
|
"grad_norm": 0.6077250993929268, |
|
"learning_rate": 2.7135829944217406e-05, |
|
"loss": 1.8969, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 1.9575779120748904, |
|
"grad_norm": 0.5478709269890887, |
|
"learning_rate": 2.6915322096006244e-05, |
|
"loss": 1.9648, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 1.9623178101670815, |
|
"grad_norm": 0.5304846907499281, |
|
"learning_rate": 2.6695383398140155e-05, |
|
"loss": 1.8867, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.9670577082592724, |
|
"grad_norm": 0.5084950385451593, |
|
"learning_rate": 2.6476019273184938e-05, |
|
"loss": 1.8987, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.9717976063514633, |
|
"grad_norm": 0.5881914443826771, |
|
"learning_rate": 2.6257235129540424e-05, |
|
"loss": 1.8718, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.9765375044436544, |
|
"grad_norm": 0.5557425542971698, |
|
"learning_rate": 2.603903636130701e-05, |
|
"loss": 1.8204, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 1.9812774025358455, |
|
"grad_norm": 0.5235298330164154, |
|
"learning_rate": 2.5821428348152788e-05, |
|
"loss": 1.915, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 1.9860173006280366, |
|
"grad_norm": 0.6107709148392828, |
|
"learning_rate": 2.560441645518078e-05, |
|
"loss": 1.8223, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 1.9907571987202275, |
|
"grad_norm": 0.5614697856069703, |
|
"learning_rate": 2.538800603279673e-05, |
|
"loss": 1.8439, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.9954970968124184, |
|
"grad_norm": 0.5563269995130558, |
|
"learning_rate": 2.5172202416577236e-05, |
|
"loss": 1.8982, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 2.0002369949046095, |
|
"grad_norm": 0.5673849628756762, |
|
"learning_rate": 2.4957010927138136e-05, |
|
"loss": 1.8956, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 2.0049768929968006, |
|
"grad_norm": 0.5274159605663582, |
|
"learning_rate": 2.4742436870003326e-05, |
|
"loss": 1.8572, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 2.0097167910889917, |
|
"grad_norm": 0.5388999304024686, |
|
"learning_rate": 2.452848553547396e-05, |
|
"loss": 1.8441, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 2.014456689181183, |
|
"grad_norm": 0.5715679686982497, |
|
"learning_rate": 2.431516219849809e-05, |
|
"loss": 1.838, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.0191965872733735, |
|
"grad_norm": 0.5795119843431206, |
|
"learning_rate": 2.4102472118540487e-05, |
|
"loss": 1.8329, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 2.0239364853655646, |
|
"grad_norm": 0.5503184533431318, |
|
"learning_rate": 2.3890420539453057e-05, |
|
"loss": 1.8733, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 2.0286763834577557, |
|
"grad_norm": 0.54871121092008, |
|
"learning_rate": 2.3679012689345558e-05, |
|
"loss": 1.8601, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 2.033416281549947, |
|
"grad_norm": 0.5879797146794722, |
|
"learning_rate": 2.3468253780456678e-05, |
|
"loss": 1.7751, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 2.038156179642138, |
|
"grad_norm": 0.5510154682184406, |
|
"learning_rate": 2.3258149009025482e-05, |
|
"loss": 1.827, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.0428960777343286, |
|
"grad_norm": 0.513792181350148, |
|
"learning_rate": 2.3048703555163357e-05, |
|
"loss": 1.8474, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 2.0476359758265197, |
|
"grad_norm": 0.5489219942664323, |
|
"learning_rate": 2.2839922582726336e-05, |
|
"loss": 1.8862, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 2.052375873918711, |
|
"grad_norm": 0.6504687065880719, |
|
"learning_rate": 2.2631811239187646e-05, |
|
"loss": 1.7984, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 2.057115772010902, |
|
"grad_norm": 0.6130904570523673, |
|
"learning_rate": 2.2424374655510965e-05, |
|
"loss": 1.7921, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 2.0618556701030926, |
|
"grad_norm": 0.6408124203446663, |
|
"learning_rate": 2.2217617946023765e-05, |
|
"loss": 1.8592, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.0665955681952837, |
|
"grad_norm": 0.6181447797115482, |
|
"learning_rate": 2.201154620829137e-05, |
|
"loss": 1.8067, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 2.071335466287475, |
|
"grad_norm": 0.5627617017019729, |
|
"learning_rate": 2.1806164522991118e-05, |
|
"loss": 1.7701, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 2.076075364379666, |
|
"grad_norm": 0.5510540438192786, |
|
"learning_rate": 2.1601477953787214e-05, |
|
"loss": 1.857, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 2.080815262471857, |
|
"grad_norm": 0.6083237779423979, |
|
"learning_rate": 2.1397491547205807e-05, |
|
"loss": 1.7601, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 2.0855551605640477, |
|
"grad_norm": 0.6047311337345246, |
|
"learning_rate": 2.119421033251071e-05, |
|
"loss": 1.8347, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.0902950586562388, |
|
"grad_norm": 0.5662369508712475, |
|
"learning_rate": 2.0991639321579214e-05, |
|
"loss": 1.8545, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 2.09503495674843, |
|
"grad_norm": 0.5935079368512177, |
|
"learning_rate": 2.078978350877862e-05, |
|
"loss": 1.879, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 2.099774854840621, |
|
"grad_norm": 0.571586984028468, |
|
"learning_rate": 2.058864787084309e-05, |
|
"loss": 1.7671, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 2.104514752932812, |
|
"grad_norm": 0.5682037137995106, |
|
"learning_rate": 2.0388237366751006e-05, |
|
"loss": 1.865, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 2.1092546510250028, |
|
"grad_norm": 0.5490908649638305, |
|
"learning_rate": 2.018855693760257e-05, |
|
"loss": 1.78, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.113994549117194, |
|
"grad_norm": 0.6176356249016943, |
|
"learning_rate": 1.998961150649814e-05, |
|
"loss": 1.8435, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 2.118734447209385, |
|
"grad_norm": 0.5319868348925916, |
|
"learning_rate": 1.9791405978416694e-05, |
|
"loss": 1.8981, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 2.123474345301576, |
|
"grad_norm": 0.5752723871436735, |
|
"learning_rate": 1.9593945240095052e-05, |
|
"loss": 1.7755, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 2.128214243393767, |
|
"grad_norm": 0.6366681694521167, |
|
"learning_rate": 1.9397234159907275e-05, |
|
"loss": 1.8707, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 2.132954141485958, |
|
"grad_norm": 0.5901487974014347, |
|
"learning_rate": 1.920127758774466e-05, |
|
"loss": 1.8256, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.137694039578149, |
|
"grad_norm": 0.5888105104943471, |
|
"learning_rate": 1.9006080354896267e-05, |
|
"loss": 1.8357, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 2.14243393767034, |
|
"grad_norm": 0.5878169661429707, |
|
"learning_rate": 1.8811647273929628e-05, |
|
"loss": 1.8241, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 2.147173835762531, |
|
"grad_norm": 0.5581948418607748, |
|
"learning_rate": 1.8617983138572277e-05, |
|
"loss": 1.848, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 2.1519137338547223, |
|
"grad_norm": 0.6137321662868356, |
|
"learning_rate": 1.8425092723593395e-05, |
|
"loss": 1.78, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 2.156653631946913, |
|
"grad_norm": 0.558081495592443, |
|
"learning_rate": 1.823298078468624e-05, |
|
"loss": 1.8153, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.161393530039104, |
|
"grad_norm": 0.6039625325723422, |
|
"learning_rate": 1.8041652058350767e-05, |
|
"loss": 1.8416, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 2.166133428131295, |
|
"grad_norm": 0.6295821331128388, |
|
"learning_rate": 1.785111126177691e-05, |
|
"loss": 1.7953, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 2.1708733262234863, |
|
"grad_norm": 0.5911527371211652, |
|
"learning_rate": 1.7661363092728307e-05, |
|
"loss": 1.7851, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 2.1756132243156774, |
|
"grad_norm": 0.565852777352692, |
|
"learning_rate": 1.7472412229426455e-05, |
|
"loss": 1.8101, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 2.180353122407868, |
|
"grad_norm": 0.5656454600563583, |
|
"learning_rate": 1.7284263330435317e-05, |
|
"loss": 1.917, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.185093020500059, |
|
"grad_norm": 0.6035646498858932, |
|
"learning_rate": 1.709692103454651e-05, |
|
"loss": 1.8168, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 2.1898329185922503, |
|
"grad_norm": 0.5477939270708279, |
|
"learning_rate": 1.6910389960664992e-05, |
|
"loss": 1.777, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 2.1945728166844414, |
|
"grad_norm": 0.5898939001383526, |
|
"learning_rate": 1.672467470769507e-05, |
|
"loss": 1.7575, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 2.1993127147766325, |
|
"grad_norm": 0.544798273283213, |
|
"learning_rate": 1.6539779854427074e-05, |
|
"loss": 1.8834, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 2.204052612868823, |
|
"grad_norm": 0.610618761949142, |
|
"learning_rate": 1.6355709959424487e-05, |
|
"loss": 1.8785, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.2087925109610143, |
|
"grad_norm": 0.6064522176814057, |
|
"learning_rate": 1.6172469560911553e-05, |
|
"loss": 1.7854, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 2.2135324090532054, |
|
"grad_norm": 0.6022849345976745, |
|
"learning_rate": 1.599006317666131e-05, |
|
"loss": 1.8497, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 2.2182723071453965, |
|
"grad_norm": 0.5926151325695663, |
|
"learning_rate": 1.5808495303884297e-05, |
|
"loss": 1.8184, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 2.2230122052375876, |
|
"grad_norm": 0.5740462281531319, |
|
"learning_rate": 1.562777041911761e-05, |
|
"loss": 1.8073, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 2.2277521033297782, |
|
"grad_norm": 0.595274030679382, |
|
"learning_rate": 1.5447892978114592e-05, |
|
"loss": 1.8095, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.2324920014219694, |
|
"grad_norm": 0.5805561493774153, |
|
"learning_rate": 1.526886741573496e-05, |
|
"loss": 1.7907, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 2.2372318995141605, |
|
"grad_norm": 0.6585750772533296, |
|
"learning_rate": 1.5090698145835413e-05, |
|
"loss": 1.8081, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 2.2419717976063516, |
|
"grad_norm": 0.7616121844460758, |
|
"learning_rate": 1.491338956116085e-05, |
|
"loss": 1.8571, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 2.2467116956985427, |
|
"grad_norm": 0.6037559488690589, |
|
"learning_rate": 1.473694603323611e-05, |
|
"loss": 1.8194, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 2.2514515937907333, |
|
"grad_norm": 0.6412117105060221, |
|
"learning_rate": 1.4561371912258098e-05, |
|
"loss": 1.7447, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.2561914918829244, |
|
"grad_norm": 0.6178165307415238, |
|
"learning_rate": 1.4386671526988593e-05, |
|
"loss": 1.8047, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 2.2609313899751156, |
|
"grad_norm": 0.5887211775830831, |
|
"learning_rate": 1.421284918464752e-05, |
|
"loss": 1.8309, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 2.2656712880673067, |
|
"grad_norm": 0.6715832023904247, |
|
"learning_rate": 1.4039909170806764e-05, |
|
"loss": 1.7598, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 2.2704111861594978, |
|
"grad_norm": 0.5565711226911474, |
|
"learning_rate": 1.386785574928446e-05, |
|
"loss": 1.8042, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 2.2751510842516884, |
|
"grad_norm": 1.0370061435438975, |
|
"learning_rate": 1.3696693162039893e-05, |
|
"loss": 1.8418, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.2798909823438795, |
|
"grad_norm": 0.619379427966442, |
|
"learning_rate": 1.3526425629068967e-05, |
|
"loss": 1.8709, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 2.2846308804360707, |
|
"grad_norm": 0.6181820044240368, |
|
"learning_rate": 1.3357057348300067e-05, |
|
"loss": 1.8222, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 2.2893707785282618, |
|
"grad_norm": 0.6447967865409838, |
|
"learning_rate": 1.318859249549066e-05, |
|
"loss": 1.8183, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 2.294110676620453, |
|
"grad_norm": 0.6058171204419526, |
|
"learning_rate": 1.3021035224124224e-05, |
|
"loss": 1.805, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 2.2988505747126435, |
|
"grad_norm": 0.5434323398332925, |
|
"learning_rate": 1.2854389665307975e-05, |
|
"loss": 1.7541, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.3035904728048346, |
|
"grad_norm": 0.6113667985824829, |
|
"learning_rate": 1.2688659927670915e-05, |
|
"loss": 1.758, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 2.3083303708970258, |
|
"grad_norm": 0.5720767875706882, |
|
"learning_rate": 1.2523850097262563e-05, |
|
"loss": 1.8322, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 2.313070268989217, |
|
"grad_norm": 0.5628951626795141, |
|
"learning_rate": 1.2359964237452238e-05, |
|
"loss": 1.7798, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 2.317810167081408, |
|
"grad_norm": 0.6094150987430762, |
|
"learning_rate": 1.219700638882888e-05, |
|
"loss": 1.7842, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 2.3225500651735986, |
|
"grad_norm": 0.6036779282592939, |
|
"learning_rate": 1.2034980569101367e-05, |
|
"loss": 1.8383, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.3272899632657897, |
|
"grad_norm": 0.6175747345768624, |
|
"learning_rate": 1.1873890772999502e-05, |
|
"loss": 1.9046, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 2.332029861357981, |
|
"grad_norm": 0.5564649373869762, |
|
"learning_rate": 1.1713740972175574e-05, |
|
"loss": 1.8104, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 2.336769759450172, |
|
"grad_norm": 0.6441404862225901, |
|
"learning_rate": 1.155453511510633e-05, |
|
"loss": 1.7864, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 2.341509657542363, |
|
"grad_norm": 0.6927623121031959, |
|
"learning_rate": 1.1396277126995707e-05, |
|
"loss": 1.829, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 2.3462495556345537, |
|
"grad_norm": 0.6537904475611329, |
|
"learning_rate": 1.1238970909677993e-05, |
|
"loss": 1.8655, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.350989453726745, |
|
"grad_norm": 0.5779494171909159, |
|
"learning_rate": 1.1082620341521766e-05, |
|
"loss": 1.7482, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 2.355729351818936, |
|
"grad_norm": 0.6161830958900923, |
|
"learning_rate": 1.0927229277334061e-05, |
|
"loss": 1.7789, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 2.360469249911127, |
|
"grad_norm": 0.5946038603032194, |
|
"learning_rate": 1.0772801548265498e-05, |
|
"loss": 1.8189, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 2.365209148003318, |
|
"grad_norm": 0.6072288944056834, |
|
"learning_rate": 1.0619340961715746e-05, |
|
"loss": 1.8588, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 2.369949046095509, |
|
"grad_norm": 0.5882805952028816, |
|
"learning_rate": 1.0466851301239711e-05, |
|
"loss": 1.8238, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.3746889441877, |
|
"grad_norm": 0.6288910196539964, |
|
"learning_rate": 1.0315336326454161e-05, |
|
"loss": 1.7055, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 2.379428842279891, |
|
"grad_norm": 0.6043835236662759, |
|
"learning_rate": 1.0164799772945149e-05, |
|
"loss": 1.8134, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 2.384168740372082, |
|
"grad_norm": 0.5821262142704368, |
|
"learning_rate": 1.0015245352175811e-05, |
|
"loss": 1.797, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 2.3889086384642733, |
|
"grad_norm": 0.6369667143877562, |
|
"learning_rate": 9.866676751394927e-06, |
|
"loss": 1.8199, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 2.393648536556464, |
|
"grad_norm": 0.5924507902566707, |
|
"learning_rate": 9.719097633545975e-06, |
|
"loss": 1.8524, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.398388434648655, |
|
"grad_norm": 0.5762513665027686, |
|
"learning_rate": 9.572511637176811e-06, |
|
"loss": 1.8428, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 2.403128332740846, |
|
"grad_norm": 0.5799149040724592, |
|
"learning_rate": 9.426922376350028e-06, |
|
"loss": 1.8463, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 2.4078682308330372, |
|
"grad_norm": 0.5898000658332848, |
|
"learning_rate": 9.282333440553804e-06, |
|
"loss": 1.7772, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 2.4126081289252284, |
|
"grad_norm": 0.5967206158269678, |
|
"learning_rate": 9.13874839461336e-06, |
|
"loss": 1.8234, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 2.417348027017419, |
|
"grad_norm": 0.6245591569289297, |
|
"learning_rate": 8.996170778603153e-06, |
|
"loss": 1.8047, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.42208792510961, |
|
"grad_norm": 0.5981945344970201, |
|
"learning_rate": 8.854604107759568e-06, |
|
"loss": 1.8429, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 2.4268278232018012, |
|
"grad_norm": 0.6112665064763977, |
|
"learning_rate": 8.714051872394213e-06, |
|
"loss": 1.7746, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 2.4315677212939923, |
|
"grad_norm": 0.5847743009358597, |
|
"learning_rate": 8.574517537807897e-06, |
|
"loss": 1.7703, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 2.436307619386183, |
|
"grad_norm": 0.5617053604855574, |
|
"learning_rate": 8.436004544205217e-06, |
|
"loss": 1.8498, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 2.441047517478374, |
|
"grad_norm": 0.5947168640425712, |
|
"learning_rate": 8.2985163066097e-06, |
|
"loss": 1.8439, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.4457874155705652, |
|
"grad_norm": 0.6456439652584188, |
|
"learning_rate": 8.162056214779618e-06, |
|
"loss": 1.8125, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 2.4505273136627563, |
|
"grad_norm": 0.6053385247801931, |
|
"learning_rate": 8.02662763312439e-06, |
|
"loss": 1.8193, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 2.4552672117549474, |
|
"grad_norm": 0.6364991896683941, |
|
"learning_rate": 7.89223390062172e-06, |
|
"loss": 1.8081, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 2.460007109847138, |
|
"grad_norm": 0.630663938586301, |
|
"learning_rate": 7.758878330735142e-06, |
|
"loss": 1.8317, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 2.464747007939329, |
|
"grad_norm": 0.6625585293729884, |
|
"learning_rate": 7.626564211332465e-06, |
|
"loss": 1.7914, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.4694869060315203, |
|
"grad_norm": 0.6132933711832741, |
|
"learning_rate": 7.49529480460458e-06, |
|
"loss": 1.8072, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 2.4742268041237114, |
|
"grad_norm": 0.6723366054843423, |
|
"learning_rate": 7.3650733469851574e-06, |
|
"loss": 1.8693, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 2.4789667022159025, |
|
"grad_norm": 0.5948715205500895, |
|
"learning_rate": 7.235903049070742e-06, |
|
"loss": 1.7441, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 2.483706600308093, |
|
"grad_norm": 0.602660875671921, |
|
"learning_rate": 7.1077870955416685e-06, |
|
"loss": 1.8301, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 2.4884464984002843, |
|
"grad_norm": 0.6657860629895173, |
|
"learning_rate": 6.98072864508349e-06, |
|
"loss": 1.7357, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.4931863964924754, |
|
"grad_norm": 0.6400301583474429, |
|
"learning_rate": 6.854730830309203e-06, |
|
"loss": 1.8309, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 2.4979262945846665, |
|
"grad_norm": 0.6519457597490862, |
|
"learning_rate": 6.729796757681861e-06, |
|
"loss": 1.8622, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 2.502666192676857, |
|
"grad_norm": 0.6018425213466797, |
|
"learning_rate": 6.605929507438108e-06, |
|
"loss": 1.8124, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 2.5074060907690483, |
|
"grad_norm": 0.6356535657958864, |
|
"learning_rate": 6.4831321335121706e-06, |
|
"loss": 1.8493, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 2.5121459888612394, |
|
"grad_norm": 0.5933711757944313, |
|
"learning_rate": 6.361407663460612e-06, |
|
"loss": 1.8152, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.5168858869534305, |
|
"grad_norm": 0.6176252282132866, |
|
"learning_rate": 6.240759098387628e-06, |
|
"loss": 1.7796, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 2.5216257850456216, |
|
"grad_norm": 0.6035543936375999, |
|
"learning_rate": 6.12118941287112e-06, |
|
"loss": 1.8072, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 2.5263656831378123, |
|
"grad_norm": 0.6423602506797493, |
|
"learning_rate": 6.002701554889306e-06, |
|
"loss": 1.8894, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 2.5311055812300034, |
|
"grad_norm": 0.6166718860982423, |
|
"learning_rate": 5.885298445748072e-06, |
|
"loss": 1.8476, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 2.5358454793221945, |
|
"grad_norm": 0.6250486214392823, |
|
"learning_rate": 5.768982980008924e-06, |
|
"loss": 1.8044, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.5405853774143856, |
|
"grad_norm": 0.6409013217160432, |
|
"learning_rate": 5.653758025417616e-06, |
|
"loss": 1.7732, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 2.5453252755065767, |
|
"grad_norm": 0.5853729101352203, |
|
"learning_rate": 5.5396264228335e-06, |
|
"loss": 1.816, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 2.5500651735987674, |
|
"grad_norm": 0.6674717253505213, |
|
"learning_rate": 5.42659098615943e-06, |
|
"loss": 1.828, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 2.5548050716909585, |
|
"grad_norm": 0.6079460431124653, |
|
"learning_rate": 5.314654502272393e-06, |
|
"loss": 1.8305, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 2.5595449697831496, |
|
"grad_norm": 0.6132271739956523, |
|
"learning_rate": 5.203819730954806e-06, |
|
"loss": 1.9389, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.5642848678753407, |
|
"grad_norm": 0.6412964569520792, |
|
"learning_rate": 5.094089404826513e-06, |
|
"loss": 1.8878, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 2.569024765967532, |
|
"grad_norm": 0.6314773808659059, |
|
"learning_rate": 4.985466229277331e-06, |
|
"loss": 1.7996, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 2.5737646640597225, |
|
"grad_norm": 0.6019377364178156, |
|
"learning_rate": 4.877952882400411e-06, |
|
"loss": 1.8326, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 2.5785045621519136, |
|
"grad_norm": 0.6375177888153616, |
|
"learning_rate": 4.771552014926206e-06, |
|
"loss": 1.8313, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 2.5832444602441047, |
|
"grad_norm": 0.6184290636855982, |
|
"learning_rate": 4.666266250157097e-06, |
|
"loss": 1.8408, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.587984358336296, |
|
"grad_norm": 0.6145812896553856, |
|
"learning_rate": 4.562098183902713e-06, |
|
"loss": 1.7928, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 2.592724256428487, |
|
"grad_norm": 0.5863286484938057, |
|
"learning_rate": 4.459050384415941e-06, |
|
"loss": 1.7671, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 2.5974641545206776, |
|
"grad_norm": 0.5908385265300592, |
|
"learning_rate": 4.357125392329636e-06, |
|
"loss": 1.8528, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 2.6022040526128687, |
|
"grad_norm": 0.6315835702501038, |
|
"learning_rate": 4.256325720593912e-06, |
|
"loss": 1.8952, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 2.60694395070506, |
|
"grad_norm": 0.5905062832031487, |
|
"learning_rate": 4.15665385441425e-06, |
|
"loss": 1.8604, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.611683848797251, |
|
"grad_norm": 0.568727331363524, |
|
"learning_rate": 4.0581122511901934e-06, |
|
"loss": 1.8351, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 2.616423746889442, |
|
"grad_norm": 0.6400621125560388, |
|
"learning_rate": 3.960703340454791e-06, |
|
"loss": 1.857, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 2.6211636449816327, |
|
"grad_norm": 0.6844853412168999, |
|
"learning_rate": 3.864429523814644e-06, |
|
"loss": 1.8371, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 2.625903543073824, |
|
"grad_norm": 0.6040727492768455, |
|
"learning_rate": 3.7692931748907425e-06, |
|
"loss": 1.8582, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 2.630643441166015, |
|
"grad_norm": 0.6488970700922259, |
|
"learning_rate": 3.675296639259912e-06, |
|
"loss": 1.8466, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.635383339258206, |
|
"grad_norm": 0.606860701135619, |
|
"learning_rate": 3.5824422343970267e-06, |
|
"loss": 1.8823, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 2.640123237350397, |
|
"grad_norm": 0.6107041616886252, |
|
"learning_rate": 3.4907322496178397e-06, |
|
"loss": 1.7635, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 2.6448631354425878, |
|
"grad_norm": 0.6205661299793865, |
|
"learning_rate": 3.4001689460225195e-06, |
|
"loss": 1.7604, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 2.649603033534779, |
|
"grad_norm": 0.6114908815089501, |
|
"learning_rate": 3.3107545564399434e-06, |
|
"loss": 1.8452, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 2.65434293162697, |
|
"grad_norm": 0.621202845423754, |
|
"learning_rate": 3.2224912853726476e-06, |
|
"loss": 1.8557, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.659082829719161, |
|
"grad_norm": 0.6376438148340446, |
|
"learning_rate": 3.1353813089424424e-06, |
|
"loss": 1.8295, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 2.663822727811352, |
|
"grad_norm": 0.6085163299666503, |
|
"learning_rate": 3.0494267748367723e-06, |
|
"loss": 1.7302, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 2.668562625903543, |
|
"grad_norm": 0.6330680248898437, |
|
"learning_rate": 2.9646298022557915e-06, |
|
"loss": 1.7756, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 2.673302523995734, |
|
"grad_norm": 0.6575109357986112, |
|
"learning_rate": 2.8809924818600952e-06, |
|
"loss": 1.7728, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 2.678042422087925, |
|
"grad_norm": 0.5972530598708538, |
|
"learning_rate": 2.7985168757191482e-06, |
|
"loss": 1.7927, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.682782320180116, |
|
"grad_norm": 0.6505229836146454, |
|
"learning_rate": 2.7172050172604824e-06, |
|
"loss": 1.768, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 2.6875222182723073, |
|
"grad_norm": 0.6339702452986381, |
|
"learning_rate": 2.63705891121957e-06, |
|
"loss": 1.7756, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 2.692262116364498, |
|
"grad_norm": 0.6729168831182509, |
|
"learning_rate": 2.5580805335903457e-06, |
|
"loss": 1.8363, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 2.697002014456689, |
|
"grad_norm": 0.6421591660117998, |
|
"learning_rate": 2.4802718315765527e-06, |
|
"loss": 1.7585, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 2.70174191254888, |
|
"grad_norm": 0.5993295713871896, |
|
"learning_rate": 2.403634723543674e-06, |
|
"loss": 1.8379, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.7064818106410713, |
|
"grad_norm": 0.5931932390101198, |
|
"learning_rate": 2.3281710989716933e-06, |
|
"loss": 1.8127, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 2.7112217087332624, |
|
"grad_norm": 0.6007499215207198, |
|
"learning_rate": 2.2538828184084595e-06, |
|
"loss": 1.7643, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 2.715961606825453, |
|
"grad_norm": 0.6294360874753062, |
|
"learning_rate": 2.1807717134238347e-06, |
|
"loss": 1.8007, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 2.720701504917644, |
|
"grad_norm": 0.6305932589800126, |
|
"learning_rate": 2.1088395865645537e-06, |
|
"loss": 1.802, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 2.7254414030098353, |
|
"grad_norm": 0.6091954631732173, |
|
"learning_rate": 2.038088211309769e-06, |
|
"loss": 1.7978, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.7301813011020264, |
|
"grad_norm": 0.6353525285344948, |
|
"learning_rate": 1.968519332027302e-06, |
|
"loss": 1.8641, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 2.7349211991942175, |
|
"grad_norm": 0.5869911293052614, |
|
"learning_rate": 1.9001346639306805e-06, |
|
"loss": 1.876, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 2.739661097286408, |
|
"grad_norm": 0.6462140073621514, |
|
"learning_rate": 1.8329358930368245e-06, |
|
"loss": 1.7947, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 2.7444009953785993, |
|
"grad_norm": 0.6298906028352366, |
|
"learning_rate": 1.7669246761244763e-06, |
|
"loss": 1.7983, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 2.7491408934707904, |
|
"grad_norm": 0.6351921002703318, |
|
"learning_rate": 1.7021026406933427e-06, |
|
"loss": 1.7563, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.7538807915629815, |
|
"grad_norm": 0.6081707137727146, |
|
"learning_rate": 1.638471384924012e-06, |
|
"loss": 1.8005, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 2.7586206896551726, |
|
"grad_norm": 0.6527854672102444, |
|
"learning_rate": 1.5760324776385171e-06, |
|
"loss": 1.8228, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 2.7633605877473633, |
|
"grad_norm": 0.6207692422398574, |
|
"learning_rate": 1.5147874582616518e-06, |
|
"loss": 1.8751, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 2.7681004858395544, |
|
"grad_norm": 0.6078351786970941, |
|
"learning_rate": 1.4547378367830267e-06, |
|
"loss": 1.854, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 2.7728403839317455, |
|
"grad_norm": 0.5914179875660134, |
|
"learning_rate": 1.3958850937198453e-06, |
|
"loss": 1.8771, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.7775802820239366, |
|
"grad_norm": 0.6150352638939602, |
|
"learning_rate": 1.3382306800804045e-06, |
|
"loss": 1.7422, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 2.7823201801161277, |
|
"grad_norm": 0.6205091178728268, |
|
"learning_rate": 1.2817760173282954e-06, |
|
"loss": 1.8005, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 2.7870600782083184, |
|
"grad_norm": 0.6352299718478237, |
|
"learning_rate": 1.2265224973474042e-06, |
|
"loss": 1.7703, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 2.7917999763005095, |
|
"grad_norm": 0.6466624089179797, |
|
"learning_rate": 1.1724714824075333e-06, |
|
"loss": 1.8315, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 2.7965398743927006, |
|
"grad_norm": 0.5968151491811187, |
|
"learning_rate": 1.1196243051308787e-06, |
|
"loss": 1.9011, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.8012797724848917, |
|
"grad_norm": 0.6310690230989541, |
|
"learning_rate": 1.0679822684591112e-06, |
|
"loss": 1.8434, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 2.806019670577083, |
|
"grad_norm": 0.6459331883257132, |
|
"learning_rate": 1.0175466456213034e-06, |
|
"loss": 1.7773, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 2.8107595686692735, |
|
"grad_norm": 0.6898338914840095, |
|
"learning_rate": 9.683186801025256e-07, |
|
"loss": 1.8417, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 2.8154994667614646, |
|
"grad_norm": 0.6097250867359322, |
|
"learning_rate": 9.202995856131769e-07, |
|
"loss": 1.8076, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 2.8202393648536557, |
|
"grad_norm": 0.6610392263190566, |
|
"learning_rate": 8.734905460590581e-07, |
|
"loss": 1.7511, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 2.824979262945847, |
|
"grad_norm": 0.6070988311686517, |
|
"learning_rate": 8.278927155121851e-07, |
|
"loss": 1.8309, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 2.829719161038038, |
|
"grad_norm": 0.6261583831010433, |
|
"learning_rate": 7.835072181823666e-07, |
|
"loss": 1.8377, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 2.8344590591302286, |
|
"grad_norm": 0.6243423055956993, |
|
"learning_rate": 7.403351483894427e-07, |
|
"loss": 1.7941, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 2.8391989572224197, |
|
"grad_norm": 0.702784469663522, |
|
"learning_rate": 6.983775705363238e-07, |
|
"loss": 1.8042, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 2.8439388553146108, |
|
"grad_norm": 0.5996597981711203, |
|
"learning_rate": 6.576355190827499e-07, |
|
"loss": 1.8512, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.848678753406802, |
|
"grad_norm": 0.5539803926109534, |
|
"learning_rate": 6.181099985197947e-07, |
|
"loss": 1.8558, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 2.853418651498993, |
|
"grad_norm": 0.5462268948543724, |
|
"learning_rate": 5.798019833450629e-07, |
|
"loss": 1.7838, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 2.8581585495911837, |
|
"grad_norm": 0.6522918616165346, |
|
"learning_rate": 5.4271241803871e-07, |
|
"loss": 1.8523, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 2.8628984476833748, |
|
"grad_norm": 0.6013569849197028, |
|
"learning_rate": 5.068422170401377e-07, |
|
"loss": 1.8239, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 2.867638345775566, |
|
"grad_norm": 0.6217056805780841, |
|
"learning_rate": 4.72192264725424e-07, |
|
"loss": 1.8316, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 2.872378243867757, |
|
"grad_norm": 0.6047869013985818, |
|
"learning_rate": 4.387634153855791e-07, |
|
"loss": 1.8189, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 2.877118141959948, |
|
"grad_norm": 0.6730414277089524, |
|
"learning_rate": 4.065564932054067e-07, |
|
"loss": 1.7824, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 2.8818580400521387, |
|
"grad_norm": 0.612791047561647, |
|
"learning_rate": 3.755722922432481e-07, |
|
"loss": 1.7867, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 2.88659793814433, |
|
"grad_norm": 0.6615842561782111, |
|
"learning_rate": 3.4581157641137563e-07, |
|
"loss": 1.8359, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 2.891337836236521, |
|
"grad_norm": 0.6358101876016702, |
|
"learning_rate": 3.1727507945714663e-07, |
|
"loss": 1.8628, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.896077734328712, |
|
"grad_norm": 0.5951921137175086, |
|
"learning_rate": 2.8996350494495116e-07, |
|
"loss": 1.8516, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 2.900817632420903, |
|
"grad_norm": 0.6310271682459363, |
|
"learning_rate": 2.6387752623883156e-07, |
|
"loss": 1.8437, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 2.905557530513094, |
|
"grad_norm": 0.6305755436522482, |
|
"learning_rate": 2.390177864858956e-07, |
|
"loss": 1.8514, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 2.910297428605285, |
|
"grad_norm": 0.6404150710185624, |
|
"learning_rate": 2.1538489860044587e-07, |
|
"loss": 1.8186, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 2.915037326697476, |
|
"grad_norm": 0.6158013141692098, |
|
"learning_rate": 1.92979445248892e-07, |
|
"loss": 1.8083, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 2.919777224789667, |
|
"grad_norm": 0.6416671093424775, |
|
"learning_rate": 1.7180197883537308e-07, |
|
"loss": 1.7786, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 2.9245171228818583, |
|
"grad_norm": 0.5582605199061633, |
|
"learning_rate": 1.518530214881242e-07, |
|
"loss": 1.7976, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 2.929257020974049, |
|
"grad_norm": 0.6106802327952866, |
|
"learning_rate": 1.3313306504663115e-07, |
|
"loss": 1.7604, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 2.93399691906624, |
|
"grad_norm": 0.649320638486437, |
|
"learning_rate": 1.1564257104947352e-07, |
|
"loss": 1.8441, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 2.938736817158431, |
|
"grad_norm": 0.5884577603080124, |
|
"learning_rate": 9.938197072298372e-08, |
|
"loss": 1.8196, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.9434767152506223, |
|
"grad_norm": 0.6392485935256708, |
|
"learning_rate": 8.435166497057222e-08, |
|
"loss": 1.857, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 2.9482166133428134, |
|
"grad_norm": 0.6506401892518179, |
|
"learning_rate": 7.055202436287433e-08, |
|
"loss": 1.7725, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 2.952956511435004, |
|
"grad_norm": 0.6149298488489828, |
|
"learning_rate": 5.7983389128596355e-08, |
|
"loss": 1.8946, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 2.957696409527195, |
|
"grad_norm": 0.5722181216171393, |
|
"learning_rate": 4.664606914615011e-08, |
|
"loss": 1.8542, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 2.9624363076193863, |
|
"grad_norm": 0.6428450313630513, |
|
"learning_rate": 3.654034393598127e-08, |
|
"loss": 1.824, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.9671762057115774, |
|
"grad_norm": 0.6329021168786573, |
|
"learning_rate": 2.766646265369155e-08, |
|
"loss": 1.8012, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 2.9719161038037685, |
|
"grad_norm": 0.6406715656233972, |
|
"learning_rate": 2.0024644083921352e-08, |
|
"loss": 1.8472, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 2.976656001895959, |
|
"grad_norm": 0.5842266635593326, |
|
"learning_rate": 1.3615076634898582e-08, |
|
"loss": 1.8102, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 2.9813958999881502, |
|
"grad_norm": 0.6430039656205391, |
|
"learning_rate": 8.437918333864536e-09, |
|
"loss": 1.7935, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 2.9861357980803414, |
|
"grad_norm": 0.6055802510109696, |
|
"learning_rate": 4.493296823104842e-09, |
|
"loss": 1.8425, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.990875696172532, |
|
"grad_norm": 0.5757552404684133, |
|
"learning_rate": 1.781309356863048e-09, |
|
"loss": 1.8636, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 2.9956155942647236, |
|
"grad_norm": 0.602338679600079, |
|
"learning_rate": 3.0202279890922947e-10, |
|
"loss": 1.7555, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 2.998933522929257, |
|
"step": 6327, |
|
"total_flos": 3180599149854720.0, |
|
"train_loss": 1.9022130669246677, |
|
"train_runtime": 57110.8809, |
|
"train_samples_per_second": 0.887, |
|
"train_steps_per_second": 0.111 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 6327, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3180599149854720.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|