diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,218363 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9999935863106213, + "eval_steps": 61000, + "global_step": 311832, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 6.413689378609705e-05, + "grad_norm": 8.135562896728516, + "learning_rate": 3.206772703950744e-09, + "loss": 0.8416, + "step": 10 + }, + { + "epoch": 0.0001282737875721941, + "grad_norm": 10.716862678527832, + "learning_rate": 6.413545407901488e-09, + "loss": 0.9044, + "step": 20 + }, + { + "epoch": 0.00019241068135829114, + "grad_norm": 11.06715202331543, + "learning_rate": 9.620318111852233e-09, + "loss": 0.8359, + "step": 30 + }, + { + "epoch": 0.0002565475751443882, + "grad_norm": 9.63078498840332, + "learning_rate": 1.2827090815802977e-08, + "loss": 0.8421, + "step": 40 + }, + { + "epoch": 0.0003206844689304852, + "grad_norm": 8.548972129821777, + "learning_rate": 1.6033863519753723e-08, + "loss": 0.8503, + "step": 50 + }, + { + "epoch": 0.0003848213627165823, + "grad_norm": 10.621478080749512, + "learning_rate": 1.9240636223704465e-08, + "loss": 0.8608, + "step": 60 + }, + { + "epoch": 0.0004489582565026793, + "grad_norm": 10.499129295349121, + "learning_rate": 2.2447408927655208e-08, + "loss": 0.8388, + "step": 70 + }, + { + "epoch": 0.0005130951502887764, + "grad_norm": 11.931020736694336, + "learning_rate": 2.5654181631605953e-08, + "loss": 0.8689, + "step": 80 + }, + { + "epoch": 0.0005772320440748734, + "grad_norm": 11.505675315856934, + "learning_rate": 2.88609543355567e-08, + "loss": 0.835, + "step": 90 + }, + { + "epoch": 0.0006413689378609704, + "grad_norm": 6.3639044761657715, + "learning_rate": 3.2067727039507445e-08, + "loss": 0.8218, + "step": 100 + }, + { + "epoch": 0.0007055058316470676, + "grad_norm": 10.279727935791016, + "learning_rate": 3.527449974345819e-08, + "loss": 0.8279, + "step": 110 + }, + { + "epoch": 0.0007696427254331646, + "grad_norm": 9.912506103515625, + "learning_rate": 3.848127244740893e-08, + "loss": 0.8666, + "step": 120 + }, + { + "epoch": 0.0008337796192192616, + "grad_norm": 9.041036605834961, + "learning_rate": 4.168804515135967e-08, + "loss": 0.8098, + "step": 130 + }, + { + "epoch": 0.0008979165130053586, + "grad_norm": 10.479462623596191, + "learning_rate": 4.4894817855310415e-08, + "loss": 0.8174, + "step": 140 + }, + { + "epoch": 0.0009620534067914557, + "grad_norm": 11.549273490905762, + "learning_rate": 4.8101590559261164e-08, + "loss": 0.8904, + "step": 150 + }, + { + "epoch": 0.0010261903005775528, + "grad_norm": 9.424370765686035, + "learning_rate": 5.130836326321191e-08, + "loss": 0.8845, + "step": 160 + }, + { + "epoch": 0.0010903271943636498, + "grad_norm": 10.978484153747559, + "learning_rate": 5.451513596716265e-08, + "loss": 0.8073, + "step": 170 + }, + { + "epoch": 0.0011544640881497468, + "grad_norm": 9.741132736206055, + "learning_rate": 5.77219086711134e-08, + "loss": 0.8366, + "step": 180 + }, + { + "epoch": 0.0012186009819358439, + "grad_norm": 6.865855693817139, + "learning_rate": 6.092868137506415e-08, + "loss": 0.7748, + "step": 190 + }, + { + "epoch": 0.0012827378757219409, + "grad_norm": 10.588968276977539, + "learning_rate": 6.413545407901489e-08, + "loss": 0.8293, + "step": 200 + }, + { + "epoch": 0.0013468747695080379, + "grad_norm": 10.359200477600098, + "learning_rate": 6.734222678296562e-08, + "loss": 0.7546, + "step": 210 + }, + { + "epoch": 0.001411011663294135, + "grad_norm": 10.029732704162598, + "learning_rate": 7.054899948691638e-08, + "loss": 0.8056, + "step": 220 + }, + { + "epoch": 0.0014751485570802321, + "grad_norm": 8.809959411621094, + "learning_rate": 7.375577219086712e-08, + "loss": 0.7782, + "step": 230 + }, + { + "epoch": 0.0015392854508663291, + "grad_norm": 7.988217830657959, + "learning_rate": 7.696254489481786e-08, + "loss": 0.7392, + "step": 240 + }, + { + "epoch": 0.0016034223446524261, + "grad_norm": 7.067934513092041, + "learning_rate": 8.01693175987686e-08, + "loss": 0.7242, + "step": 250 + }, + { + "epoch": 0.0016675592384385231, + "grad_norm": 7.659463882446289, + "learning_rate": 8.337609030271935e-08, + "loss": 0.7337, + "step": 260 + }, + { + "epoch": 0.0017316961322246202, + "grad_norm": 9.248407363891602, + "learning_rate": 8.658286300667009e-08, + "loss": 0.7197, + "step": 270 + }, + { + "epoch": 0.0017958330260107172, + "grad_norm": 9.64366626739502, + "learning_rate": 8.978963571062083e-08, + "loss": 0.7122, + "step": 280 + }, + { + "epoch": 0.0018599699197968144, + "grad_norm": 7.405231475830078, + "learning_rate": 9.299640841457157e-08, + "loss": 0.6541, + "step": 290 + }, + { + "epoch": 0.0019241068135829114, + "grad_norm": 7.820592403411865, + "learning_rate": 9.620318111852233e-08, + "loss": 0.6595, + "step": 300 + }, + { + "epoch": 0.001988243707369008, + "grad_norm": 6.586963653564453, + "learning_rate": 9.940995382247307e-08, + "loss": 0.629, + "step": 310 + }, + { + "epoch": 0.0020523806011551056, + "grad_norm": 5.655336380004883, + "learning_rate": 1.0261672652642381e-07, + "loss": 0.5769, + "step": 320 + }, + { + "epoch": 0.0021165174949412027, + "grad_norm": 4.75510835647583, + "learning_rate": 1.0582349923037456e-07, + "loss": 0.5499, + "step": 330 + }, + { + "epoch": 0.0021806543887272997, + "grad_norm": 5.1090288162231445, + "learning_rate": 1.090302719343253e-07, + "loss": 0.5097, + "step": 340 + }, + { + "epoch": 0.0022447912825133967, + "grad_norm": 5.625272274017334, + "learning_rate": 1.1223704463827605e-07, + "loss": 0.4836, + "step": 350 + }, + { + "epoch": 0.0023089281762994937, + "grad_norm": 5.049161434173584, + "learning_rate": 1.154438173422268e-07, + "loss": 0.4603, + "step": 360 + }, + { + "epoch": 0.0023730650700855907, + "grad_norm": 5.61590051651001, + "learning_rate": 1.1865059004617754e-07, + "loss": 0.4457, + "step": 370 + }, + { + "epoch": 0.0024372019638716877, + "grad_norm": 4.5563435554504395, + "learning_rate": 1.218573627501283e-07, + "loss": 0.4312, + "step": 380 + }, + { + "epoch": 0.0025013388576577847, + "grad_norm": 4.078986167907715, + "learning_rate": 1.2506413545407904e-07, + "loss": 0.3961, + "step": 390 + }, + { + "epoch": 0.0025654757514438817, + "grad_norm": 4.474985122680664, + "learning_rate": 1.2827090815802978e-07, + "loss": 0.3504, + "step": 400 + }, + { + "epoch": 0.0026296126452299787, + "grad_norm": 3.7105531692504883, + "learning_rate": 1.3147768086198052e-07, + "loss": 0.3325, + "step": 410 + }, + { + "epoch": 0.0026937495390160758, + "grad_norm": 3.7654149532318115, + "learning_rate": 1.3468445356593124e-07, + "loss": 0.2948, + "step": 420 + }, + { + "epoch": 0.0027578864328021728, + "grad_norm": 3.5739786624908447, + "learning_rate": 1.37891226269882e-07, + "loss": 0.2721, + "step": 430 + }, + { + "epoch": 0.00282202332658827, + "grad_norm": 2.772843599319458, + "learning_rate": 1.4109799897383275e-07, + "loss": 0.25, + "step": 440 + }, + { + "epoch": 0.0028861602203743672, + "grad_norm": 2.9764015674591064, + "learning_rate": 1.443047716777835e-07, + "loss": 0.2425, + "step": 450 + }, + { + "epoch": 0.0029502971141604642, + "grad_norm": 2.5800020694732666, + "learning_rate": 1.4751154438173424e-07, + "loss": 0.2328, + "step": 460 + }, + { + "epoch": 0.0030144340079465612, + "grad_norm": 2.62642765045166, + "learning_rate": 1.5071831708568498e-07, + "loss": 0.2177, + "step": 470 + }, + { + "epoch": 0.0030785709017326583, + "grad_norm": 3.1305058002471924, + "learning_rate": 1.5392508978963572e-07, + "loss": 0.2358, + "step": 480 + }, + { + "epoch": 0.0031427077955187553, + "grad_norm": 2.686131000518799, + "learning_rate": 1.5713186249358646e-07, + "loss": 0.2118, + "step": 490 + }, + { + "epoch": 0.0032068446893048523, + "grad_norm": 2.276582717895508, + "learning_rate": 1.603386351975372e-07, + "loss": 0.2095, + "step": 500 + }, + { + "epoch": 0.0032709815830909493, + "grad_norm": 2.1569366455078125, + "learning_rate": 1.6354540790148795e-07, + "loss": 0.2041, + "step": 510 + }, + { + "epoch": 0.0033351184768770463, + "grad_norm": 2.888230800628662, + "learning_rate": 1.667521806054387e-07, + "loss": 0.1833, + "step": 520 + }, + { + "epoch": 0.0033992553706631433, + "grad_norm": 2.5815255641937256, + "learning_rate": 1.6995895330938946e-07, + "loss": 0.1653, + "step": 530 + }, + { + "epoch": 0.0034633922644492403, + "grad_norm": 2.9071788787841797, + "learning_rate": 1.7316572601334018e-07, + "loss": 0.1935, + "step": 540 + }, + { + "epoch": 0.0035275291582353373, + "grad_norm": 2.7453720569610596, + "learning_rate": 1.7637249871729094e-07, + "loss": 0.1791, + "step": 550 + }, + { + "epoch": 0.0035916660520214343, + "grad_norm": 2.607252836227417, + "learning_rate": 1.7957927142124166e-07, + "loss": 0.1677, + "step": 560 + }, + { + "epoch": 0.0036558029458075318, + "grad_norm": 2.0972280502319336, + "learning_rate": 1.8278604412519243e-07, + "loss": 0.161, + "step": 570 + }, + { + "epoch": 0.003719939839593629, + "grad_norm": 2.4978232383728027, + "learning_rate": 1.8599281682914315e-07, + "loss": 0.1847, + "step": 580 + }, + { + "epoch": 0.003784076733379726, + "grad_norm": 3.0713841915130615, + "learning_rate": 1.8919958953309391e-07, + "loss": 0.1549, + "step": 590 + }, + { + "epoch": 0.003848213627165823, + "grad_norm": 2.3856425285339355, + "learning_rate": 1.9240636223704466e-07, + "loss": 0.143, + "step": 600 + }, + { + "epoch": 0.00391235052095192, + "grad_norm": 2.8760671615600586, + "learning_rate": 1.956131349409954e-07, + "loss": 0.1642, + "step": 610 + }, + { + "epoch": 0.003976487414738016, + "grad_norm": 2.787055253982544, + "learning_rate": 1.9881990764494614e-07, + "loss": 0.1705, + "step": 620 + }, + { + "epoch": 0.004040624308524114, + "grad_norm": 2.4248359203338623, + "learning_rate": 2.020266803488969e-07, + "loss": 0.1677, + "step": 630 + }, + { + "epoch": 0.004104761202310211, + "grad_norm": 2.2203550338745117, + "learning_rate": 2.0523345305284763e-07, + "loss": 0.1379, + "step": 640 + }, + { + "epoch": 0.004168898096096308, + "grad_norm": 2.3956847190856934, + "learning_rate": 2.084402257567984e-07, + "loss": 0.1413, + "step": 650 + }, + { + "epoch": 0.004233034989882405, + "grad_norm": 2.7636780738830566, + "learning_rate": 2.116469984607491e-07, + "loss": 0.1464, + "step": 660 + }, + { + "epoch": 0.004297171883668502, + "grad_norm": 2.452714204788208, + "learning_rate": 2.1485377116469985e-07, + "loss": 0.1429, + "step": 670 + }, + { + "epoch": 0.004361308777454599, + "grad_norm": 2.292991876602173, + "learning_rate": 2.180605438686506e-07, + "loss": 0.1403, + "step": 680 + }, + { + "epoch": 0.004425445671240696, + "grad_norm": 3.0862314701080322, + "learning_rate": 2.2126731657260134e-07, + "loss": 0.1445, + "step": 690 + }, + { + "epoch": 0.004489582565026793, + "grad_norm": 2.497093439102173, + "learning_rate": 2.244740892765521e-07, + "loss": 0.1285, + "step": 700 + }, + { + "epoch": 0.00455371945881289, + "grad_norm": 2.4969825744628906, + "learning_rate": 2.2768086198050282e-07, + "loss": 0.1384, + "step": 710 + }, + { + "epoch": 0.004617856352598987, + "grad_norm": 2.2161011695861816, + "learning_rate": 2.308876346844536e-07, + "loss": 0.1266, + "step": 720 + }, + { + "epoch": 0.004681993246385084, + "grad_norm": 2.202615737915039, + "learning_rate": 2.340944073884043e-07, + "loss": 0.1246, + "step": 730 + }, + { + "epoch": 0.004746130140171181, + "grad_norm": 2.171642541885376, + "learning_rate": 2.3730118009235508e-07, + "loss": 0.1377, + "step": 740 + }, + { + "epoch": 0.004810267033957279, + "grad_norm": 2.4430935382843018, + "learning_rate": 2.405079527963058e-07, + "loss": 0.116, + "step": 750 + }, + { + "epoch": 0.004874403927743375, + "grad_norm": 2.2906432151794434, + "learning_rate": 2.437147255002566e-07, + "loss": 0.1166, + "step": 760 + }, + { + "epoch": 0.004938540821529473, + "grad_norm": 2.0091114044189453, + "learning_rate": 2.469214982042073e-07, + "loss": 0.1279, + "step": 770 + }, + { + "epoch": 0.0050026777153155694, + "grad_norm": 2.5473973751068115, + "learning_rate": 2.501282709081581e-07, + "loss": 0.1176, + "step": 780 + }, + { + "epoch": 0.005066814609101667, + "grad_norm": 2.719332218170166, + "learning_rate": 2.5333504361210877e-07, + "loss": 0.1131, + "step": 790 + }, + { + "epoch": 0.0051309515028877635, + "grad_norm": 2.3760056495666504, + "learning_rate": 2.5654181631605956e-07, + "loss": 0.1032, + "step": 800 + }, + { + "epoch": 0.005195088396673861, + "grad_norm": 3.086043119430542, + "learning_rate": 2.597485890200103e-07, + "loss": 0.1031, + "step": 810 + }, + { + "epoch": 0.0052592252904599575, + "grad_norm": 2.121891498565674, + "learning_rate": 2.6295536172396105e-07, + "loss": 0.1154, + "step": 820 + }, + { + "epoch": 0.005323362184246055, + "grad_norm": 2.0646345615386963, + "learning_rate": 2.661621344279118e-07, + "loss": 0.0958, + "step": 830 + }, + { + "epoch": 0.0053874990780321515, + "grad_norm": 2.8303093910217285, + "learning_rate": 2.693689071318625e-07, + "loss": 0.1153, + "step": 840 + }, + { + "epoch": 0.005451635971818249, + "grad_norm": 2.213944673538208, + "learning_rate": 2.7257567983581327e-07, + "loss": 0.1195, + "step": 850 + }, + { + "epoch": 0.0055157728656043455, + "grad_norm": 2.0894155502319336, + "learning_rate": 2.75782452539764e-07, + "loss": 0.0937, + "step": 860 + }, + { + "epoch": 0.005579909759390443, + "grad_norm": 1.789013147354126, + "learning_rate": 2.7898922524371476e-07, + "loss": 0.1072, + "step": 870 + }, + { + "epoch": 0.00564404665317654, + "grad_norm": 2.3917477130889893, + "learning_rate": 2.821959979476655e-07, + "loss": 0.114, + "step": 880 + }, + { + "epoch": 0.005708183546962637, + "grad_norm": 3.153083086013794, + "learning_rate": 2.8540277065161624e-07, + "loss": 0.1051, + "step": 890 + }, + { + "epoch": 0.0057723204407487344, + "grad_norm": 2.2773597240448, + "learning_rate": 2.88609543355567e-07, + "loss": 0.0967, + "step": 900 + }, + { + "epoch": 0.005836457334534831, + "grad_norm": 3.3676769733428955, + "learning_rate": 2.9181631605951773e-07, + "loss": 0.1016, + "step": 910 + }, + { + "epoch": 0.0059005942283209285, + "grad_norm": 2.569157600402832, + "learning_rate": 2.9502308876346847e-07, + "loss": 0.1016, + "step": 920 + }, + { + "epoch": 0.005964731122107025, + "grad_norm": 2.6093432903289795, + "learning_rate": 2.982298614674192e-07, + "loss": 0.0815, + "step": 930 + }, + { + "epoch": 0.0060288680158931225, + "grad_norm": 2.2556352615356445, + "learning_rate": 3.0143663417136996e-07, + "loss": 0.0999, + "step": 940 + }, + { + "epoch": 0.006093004909679219, + "grad_norm": 2.0448052883148193, + "learning_rate": 3.046434068753207e-07, + "loss": 0.0885, + "step": 950 + }, + { + "epoch": 0.0061571418034653165, + "grad_norm": 2.736361503601074, + "learning_rate": 3.0785017957927144e-07, + "loss": 0.0933, + "step": 960 + }, + { + "epoch": 0.006221278697251413, + "grad_norm": 2.21384596824646, + "learning_rate": 3.110569522832222e-07, + "loss": 0.0926, + "step": 970 + }, + { + "epoch": 0.0062854155910375105, + "grad_norm": 2.22341251373291, + "learning_rate": 3.142637249871729e-07, + "loss": 0.0842, + "step": 980 + }, + { + "epoch": 0.006349552484823607, + "grad_norm": 1.8333430290222168, + "learning_rate": 3.1747049769112367e-07, + "loss": 0.0882, + "step": 990 + }, + { + "epoch": 0.0064136893786097045, + "grad_norm": 2.1448442935943604, + "learning_rate": 3.206772703950744e-07, + "loss": 0.0885, + "step": 1000 + }, + { + "epoch": 0.006477826272395802, + "grad_norm": 2.026063919067383, + "learning_rate": 3.238840430990252e-07, + "loss": 0.0797, + "step": 1010 + }, + { + "epoch": 0.006541963166181899, + "grad_norm": 2.610535144805908, + "learning_rate": 3.270908158029759e-07, + "loss": 0.0939, + "step": 1020 + }, + { + "epoch": 0.006606100059967996, + "grad_norm": 2.1977763175964355, + "learning_rate": 3.3029758850692664e-07, + "loss": 0.0874, + "step": 1030 + }, + { + "epoch": 0.006670236953754093, + "grad_norm": 2.115877389907837, + "learning_rate": 3.335043612108774e-07, + "loss": 0.1032, + "step": 1040 + }, + { + "epoch": 0.00673437384754019, + "grad_norm": 2.53721022605896, + "learning_rate": 3.367111339148282e-07, + "loss": 0.1079, + "step": 1050 + }, + { + "epoch": 0.006798510741326287, + "grad_norm": 2.2047951221466064, + "learning_rate": 3.399179066187789e-07, + "loss": 0.0902, + "step": 1060 + }, + { + "epoch": 0.006862647635112384, + "grad_norm": 1.9997676610946655, + "learning_rate": 3.431246793227296e-07, + "loss": 0.0878, + "step": 1070 + }, + { + "epoch": 0.006926784528898481, + "grad_norm": 2.194669723510742, + "learning_rate": 3.4633145202668035e-07, + "loss": 0.0795, + "step": 1080 + }, + { + "epoch": 0.006990921422684578, + "grad_norm": 2.847025156021118, + "learning_rate": 3.4953822473063115e-07, + "loss": 0.0812, + "step": 1090 + }, + { + "epoch": 0.007055058316470675, + "grad_norm": 2.1623098850250244, + "learning_rate": 3.527449974345819e-07, + "loss": 0.0733, + "step": 1100 + }, + { + "epoch": 0.007119195210256772, + "grad_norm": 2.1313631534576416, + "learning_rate": 3.559517701385326e-07, + "loss": 0.0887, + "step": 1110 + }, + { + "epoch": 0.007183332104042869, + "grad_norm": 1.838502049446106, + "learning_rate": 3.591585428424833e-07, + "loss": 0.0816, + "step": 1120 + }, + { + "epoch": 0.007247468997828966, + "grad_norm": 2.4724252223968506, + "learning_rate": 3.623653155464341e-07, + "loss": 0.0701, + "step": 1130 + }, + { + "epoch": 0.0073116058916150636, + "grad_norm": 2.5112111568450928, + "learning_rate": 3.6557208825038486e-07, + "loss": 0.0797, + "step": 1140 + }, + { + "epoch": 0.00737574278540116, + "grad_norm": 2.5839927196502686, + "learning_rate": 3.687788609543356e-07, + "loss": 0.0855, + "step": 1150 + }, + { + "epoch": 0.007439879679187258, + "grad_norm": 3.895423650741577, + "learning_rate": 3.719856336582863e-07, + "loss": 0.0889, + "step": 1160 + }, + { + "epoch": 0.007504016572973354, + "grad_norm": 2.3129162788391113, + "learning_rate": 3.7519240636223703e-07, + "loss": 0.0737, + "step": 1170 + }, + { + "epoch": 0.007568153466759452, + "grad_norm": 2.2384352684020996, + "learning_rate": 3.7839917906618783e-07, + "loss": 0.0725, + "step": 1180 + }, + { + "epoch": 0.007632290360545548, + "grad_norm": 2.1032588481903076, + "learning_rate": 3.8160595177013857e-07, + "loss": 0.0948, + "step": 1190 + }, + { + "epoch": 0.007696427254331646, + "grad_norm": 2.255089044570923, + "learning_rate": 3.848127244740893e-07, + "loss": 0.0798, + "step": 1200 + }, + { + "epoch": 0.007760564148117742, + "grad_norm": 1.7414604425430298, + "learning_rate": 3.8801949717804e-07, + "loss": 0.0772, + "step": 1210 + }, + { + "epoch": 0.00782470104190384, + "grad_norm": 1.687619924545288, + "learning_rate": 3.912262698819908e-07, + "loss": 0.0706, + "step": 1220 + }, + { + "epoch": 0.007888837935689936, + "grad_norm": 2.5747528076171875, + "learning_rate": 3.9443304258594154e-07, + "loss": 0.0759, + "step": 1230 + }, + { + "epoch": 0.007952974829476033, + "grad_norm": 2.255894660949707, + "learning_rate": 3.976398152898923e-07, + "loss": 0.0845, + "step": 1240 + }, + { + "epoch": 0.008017111723262131, + "grad_norm": 2.367706775665283, + "learning_rate": 4.0084658799384303e-07, + "loss": 0.092, + "step": 1250 + }, + { + "epoch": 0.008081248617048228, + "grad_norm": 1.8855806589126587, + "learning_rate": 4.040533606977938e-07, + "loss": 0.0684, + "step": 1260 + }, + { + "epoch": 0.008145385510834324, + "grad_norm": 2.4973232746124268, + "learning_rate": 4.072601334017445e-07, + "loss": 0.0626, + "step": 1270 + }, + { + "epoch": 0.008209522404620423, + "grad_norm": 2.1072261333465576, + "learning_rate": 4.1046690610569525e-07, + "loss": 0.0799, + "step": 1280 + }, + { + "epoch": 0.00827365929840652, + "grad_norm": 1.9451370239257812, + "learning_rate": 4.13673678809646e-07, + "loss": 0.0763, + "step": 1290 + }, + { + "epoch": 0.008337796192192616, + "grad_norm": 2.526870012283325, + "learning_rate": 4.168804515135968e-07, + "loss": 0.0799, + "step": 1300 + }, + { + "epoch": 0.008401933085978712, + "grad_norm": 1.7936248779296875, + "learning_rate": 4.2008722421754754e-07, + "loss": 0.0657, + "step": 1310 + }, + { + "epoch": 0.00846606997976481, + "grad_norm": 1.7973401546478271, + "learning_rate": 4.232939969214982e-07, + "loss": 0.0768, + "step": 1320 + }, + { + "epoch": 0.008530206873550907, + "grad_norm": 2.051457643508911, + "learning_rate": 4.2650076962544897e-07, + "loss": 0.0839, + "step": 1330 + }, + { + "epoch": 0.008594343767337004, + "grad_norm": 1.8935335874557495, + "learning_rate": 4.297075423293997e-07, + "loss": 0.0722, + "step": 1340 + }, + { + "epoch": 0.0086584806611231, + "grad_norm": 1.8838813304901123, + "learning_rate": 4.329143150333505e-07, + "loss": 0.0655, + "step": 1350 + }, + { + "epoch": 0.008722617554909199, + "grad_norm": 1.9510728120803833, + "learning_rate": 4.361210877373012e-07, + "loss": 0.0616, + "step": 1360 + }, + { + "epoch": 0.008786754448695295, + "grad_norm": 1.8975841999053955, + "learning_rate": 4.3932786044125194e-07, + "loss": 0.0611, + "step": 1370 + }, + { + "epoch": 0.008850891342481392, + "grad_norm": 1.757426142692566, + "learning_rate": 4.425346331452027e-07, + "loss": 0.0648, + "step": 1380 + }, + { + "epoch": 0.00891502823626749, + "grad_norm": 2.4363205432891846, + "learning_rate": 4.457414058491535e-07, + "loss": 0.0687, + "step": 1390 + }, + { + "epoch": 0.008979165130053587, + "grad_norm": 2.466268301010132, + "learning_rate": 4.489481785531042e-07, + "loss": 0.0593, + "step": 1400 + }, + { + "epoch": 0.009043302023839683, + "grad_norm": 2.0895450115203857, + "learning_rate": 4.521549512570549e-07, + "loss": 0.0619, + "step": 1410 + }, + { + "epoch": 0.00910743891762578, + "grad_norm": 1.737841248512268, + "learning_rate": 4.5536172396100565e-07, + "loss": 0.066, + "step": 1420 + }, + { + "epoch": 0.009171575811411878, + "grad_norm": 1.9468801021575928, + "learning_rate": 4.5856849666495645e-07, + "loss": 0.0703, + "step": 1430 + }, + { + "epoch": 0.009235712705197975, + "grad_norm": 1.5404072999954224, + "learning_rate": 4.617752693689072e-07, + "loss": 0.0702, + "step": 1440 + }, + { + "epoch": 0.009299849598984071, + "grad_norm": 1.623038411140442, + "learning_rate": 4.6498204207285793e-07, + "loss": 0.0684, + "step": 1450 + }, + { + "epoch": 0.009363986492770168, + "grad_norm": 1.524778127670288, + "learning_rate": 4.681888147768086e-07, + "loss": 0.0503, + "step": 1460 + }, + { + "epoch": 0.009428123386556266, + "grad_norm": 2.2616991996765137, + "learning_rate": 4.713955874807594e-07, + "loss": 0.0728, + "step": 1470 + }, + { + "epoch": 0.009492260280342363, + "grad_norm": 1.9728755950927734, + "learning_rate": 4.7460236018471016e-07, + "loss": 0.0634, + "step": 1480 + }, + { + "epoch": 0.00955639717412846, + "grad_norm": 2.3513500690460205, + "learning_rate": 4.778091328886609e-07, + "loss": 0.076, + "step": 1490 + }, + { + "epoch": 0.009620534067914558, + "grad_norm": 2.413816452026367, + "learning_rate": 4.810159055926116e-07, + "loss": 0.0559, + "step": 1500 + }, + { + "epoch": 0.009684670961700654, + "grad_norm": 2.188338041305542, + "learning_rate": 4.842226782965624e-07, + "loss": 0.0679, + "step": 1510 + }, + { + "epoch": 0.00974880785548675, + "grad_norm": 1.5445910692214966, + "learning_rate": 4.874294510005132e-07, + "loss": 0.0616, + "step": 1520 + }, + { + "epoch": 0.009812944749272847, + "grad_norm": 1.6505457162857056, + "learning_rate": 4.906362237044639e-07, + "loss": 0.0559, + "step": 1530 + }, + { + "epoch": 0.009877081643058946, + "grad_norm": 2.2405667304992676, + "learning_rate": 4.938429964084146e-07, + "loss": 0.0518, + "step": 1540 + }, + { + "epoch": 0.009941218536845042, + "grad_norm": 1.8405488729476929, + "learning_rate": 4.970497691123654e-07, + "loss": 0.0571, + "step": 1550 + }, + { + "epoch": 0.010005355430631139, + "grad_norm": 1.7272032499313354, + "learning_rate": 5.002565418163162e-07, + "loss": 0.0643, + "step": 1560 + }, + { + "epoch": 0.010069492324417235, + "grad_norm": 2.216449499130249, + "learning_rate": 5.034633145202668e-07, + "loss": 0.0602, + "step": 1570 + }, + { + "epoch": 0.010133629218203334, + "grad_norm": 1.555444598197937, + "learning_rate": 5.066700872242175e-07, + "loss": 0.0601, + "step": 1580 + }, + { + "epoch": 0.01019776611198943, + "grad_norm": 1.8560059070587158, + "learning_rate": 5.098768599281683e-07, + "loss": 0.0646, + "step": 1590 + }, + { + "epoch": 0.010261903005775527, + "grad_norm": 1.8203860521316528, + "learning_rate": 5.130836326321191e-07, + "loss": 0.0587, + "step": 1600 + }, + { + "epoch": 0.010326039899561624, + "grad_norm": 1.401741623878479, + "learning_rate": 5.162904053360698e-07, + "loss": 0.0666, + "step": 1610 + }, + { + "epoch": 0.010390176793347722, + "grad_norm": 2.429461717605591, + "learning_rate": 5.194971780400206e-07, + "loss": 0.0671, + "step": 1620 + }, + { + "epoch": 0.010454313687133818, + "grad_norm": 1.3045133352279663, + "learning_rate": 5.227039507439713e-07, + "loss": 0.0552, + "step": 1630 + }, + { + "epoch": 0.010518450580919915, + "grad_norm": 1.1228832006454468, + "learning_rate": 5.259107234479221e-07, + "loss": 0.0569, + "step": 1640 + }, + { + "epoch": 0.010582587474706013, + "grad_norm": 2.608595848083496, + "learning_rate": 5.291174961518728e-07, + "loss": 0.0614, + "step": 1650 + }, + { + "epoch": 0.01064672436849211, + "grad_norm": 2.572727918624878, + "learning_rate": 5.323242688558236e-07, + "loss": 0.0639, + "step": 1660 + }, + { + "epoch": 0.010710861262278206, + "grad_norm": 1.7884821891784668, + "learning_rate": 5.355310415597743e-07, + "loss": 0.0607, + "step": 1670 + }, + { + "epoch": 0.010774998156064303, + "grad_norm": 2.0818710327148438, + "learning_rate": 5.38737814263725e-07, + "loss": 0.0455, + "step": 1680 + }, + { + "epoch": 0.010839135049850401, + "grad_norm": 1.8627077341079712, + "learning_rate": 5.419445869676758e-07, + "loss": 0.0551, + "step": 1690 + }, + { + "epoch": 0.010903271943636498, + "grad_norm": 1.7816085815429688, + "learning_rate": 5.451513596716265e-07, + "loss": 0.0463, + "step": 1700 + }, + { + "epoch": 0.010967408837422594, + "grad_norm": 1.6360572576522827, + "learning_rate": 5.483581323755772e-07, + "loss": 0.0448, + "step": 1710 + }, + { + "epoch": 0.011031545731208691, + "grad_norm": 1.8366321325302124, + "learning_rate": 5.51564905079528e-07, + "loss": 0.0543, + "step": 1720 + }, + { + "epoch": 0.01109568262499479, + "grad_norm": 1.3522605895996094, + "learning_rate": 5.547716777834788e-07, + "loss": 0.0576, + "step": 1730 + }, + { + "epoch": 0.011159819518780886, + "grad_norm": 1.83017897605896, + "learning_rate": 5.579784504874295e-07, + "loss": 0.0636, + "step": 1740 + }, + { + "epoch": 0.011223956412566983, + "grad_norm": 1.6681979894638062, + "learning_rate": 5.611852231913802e-07, + "loss": 0.063, + "step": 1750 + }, + { + "epoch": 0.01128809330635308, + "grad_norm": 1.7300772666931152, + "learning_rate": 5.64391995895331e-07, + "loss": 0.0581, + "step": 1760 + }, + { + "epoch": 0.011352230200139177, + "grad_norm": 1.3762904405593872, + "learning_rate": 5.675987685992818e-07, + "loss": 0.0606, + "step": 1770 + }, + { + "epoch": 0.011416367093925274, + "grad_norm": 1.9628626108169556, + "learning_rate": 5.708055413032325e-07, + "loss": 0.0671, + "step": 1780 + }, + { + "epoch": 0.01148050398771137, + "grad_norm": 1.9082118272781372, + "learning_rate": 5.740123140071832e-07, + "loss": 0.0609, + "step": 1790 + }, + { + "epoch": 0.011544640881497469, + "grad_norm": 2.207084894180298, + "learning_rate": 5.77219086711134e-07, + "loss": 0.0571, + "step": 1800 + }, + { + "epoch": 0.011608777775283565, + "grad_norm": 1.3231322765350342, + "learning_rate": 5.804258594150848e-07, + "loss": 0.0496, + "step": 1810 + }, + { + "epoch": 0.011672914669069662, + "grad_norm": 1.5998111963272095, + "learning_rate": 5.836326321190355e-07, + "loss": 0.0606, + "step": 1820 + }, + { + "epoch": 0.011737051562855759, + "grad_norm": 1.6842598915100098, + "learning_rate": 5.868394048229861e-07, + "loss": 0.0557, + "step": 1830 + }, + { + "epoch": 0.011801188456641857, + "grad_norm": 1.4744997024536133, + "learning_rate": 5.900461775269369e-07, + "loss": 0.0567, + "step": 1840 + }, + { + "epoch": 0.011865325350427953, + "grad_norm": 2.192277431488037, + "learning_rate": 5.932529502308876e-07, + "loss": 0.0528, + "step": 1850 + }, + { + "epoch": 0.01192946224421405, + "grad_norm": 1.7011133432388306, + "learning_rate": 5.964597229348384e-07, + "loss": 0.0459, + "step": 1860 + }, + { + "epoch": 0.011993599138000147, + "grad_norm": 1.7284241914749146, + "learning_rate": 5.996664956387892e-07, + "loss": 0.0522, + "step": 1870 + }, + { + "epoch": 0.012057736031786245, + "grad_norm": 1.853323221206665, + "learning_rate": 6.028732683427399e-07, + "loss": 0.0577, + "step": 1880 + }, + { + "epoch": 0.012121872925572342, + "grad_norm": 1.6866708993911743, + "learning_rate": 6.060800410466906e-07, + "loss": 0.051, + "step": 1890 + }, + { + "epoch": 0.012186009819358438, + "grad_norm": 2.820150852203369, + "learning_rate": 6.092868137506414e-07, + "loss": 0.0478, + "step": 1900 + }, + { + "epoch": 0.012250146713144536, + "grad_norm": 1.3693703413009644, + "learning_rate": 6.124935864545922e-07, + "loss": 0.0417, + "step": 1910 + }, + { + "epoch": 0.012314283606930633, + "grad_norm": 1.4814716577529907, + "learning_rate": 6.157003591585429e-07, + "loss": 0.0447, + "step": 1920 + }, + { + "epoch": 0.01237842050071673, + "grad_norm": 1.7265548706054688, + "learning_rate": 6.189071318624936e-07, + "loss": 0.0519, + "step": 1930 + }, + { + "epoch": 0.012442557394502826, + "grad_norm": 1.7359428405761719, + "learning_rate": 6.221139045664444e-07, + "loss": 0.0465, + "step": 1940 + }, + { + "epoch": 0.012506694288288924, + "grad_norm": 1.5335360765457153, + "learning_rate": 6.253206772703952e-07, + "loss": 0.0476, + "step": 1950 + }, + { + "epoch": 0.012570831182075021, + "grad_norm": 1.7577869892120361, + "learning_rate": 6.285274499743459e-07, + "loss": 0.0431, + "step": 1960 + }, + { + "epoch": 0.012634968075861118, + "grad_norm": 1.253122091293335, + "learning_rate": 6.317342226782965e-07, + "loss": 0.0475, + "step": 1970 + }, + { + "epoch": 0.012699104969647214, + "grad_norm": 1.7545239925384521, + "learning_rate": 6.349409953822473e-07, + "loss": 0.0523, + "step": 1980 + }, + { + "epoch": 0.012763241863433313, + "grad_norm": 1.6145719289779663, + "learning_rate": 6.38147768086198e-07, + "loss": 0.0532, + "step": 1990 + }, + { + "epoch": 0.012827378757219409, + "grad_norm": 1.6796982288360596, + "learning_rate": 6.413545407901488e-07, + "loss": 0.0468, + "step": 2000 + }, + { + "epoch": 0.012891515651005506, + "grad_norm": 1.7424479722976685, + "learning_rate": 6.445613134940996e-07, + "loss": 0.0436, + "step": 2010 + }, + { + "epoch": 0.012955652544791604, + "grad_norm": 2.4510207176208496, + "learning_rate": 6.477680861980504e-07, + "loss": 0.0602, + "step": 2020 + }, + { + "epoch": 0.0130197894385777, + "grad_norm": 1.4376516342163086, + "learning_rate": 6.509748589020011e-07, + "loss": 0.0463, + "step": 2030 + }, + { + "epoch": 0.013083926332363797, + "grad_norm": 2.1743013858795166, + "learning_rate": 6.541816316059518e-07, + "loss": 0.0424, + "step": 2040 + }, + { + "epoch": 0.013148063226149894, + "grad_norm": 1.9213758707046509, + "learning_rate": 6.573884043099026e-07, + "loss": 0.0475, + "step": 2050 + }, + { + "epoch": 0.013212200119935992, + "grad_norm": 2.2999606132507324, + "learning_rate": 6.605951770138533e-07, + "loss": 0.0678, + "step": 2060 + }, + { + "epoch": 0.013276337013722089, + "grad_norm": 1.6947029829025269, + "learning_rate": 6.63801949717804e-07, + "loss": 0.0496, + "step": 2070 + }, + { + "epoch": 0.013340473907508185, + "grad_norm": 2.068711996078491, + "learning_rate": 6.670087224217548e-07, + "loss": 0.0513, + "step": 2080 + }, + { + "epoch": 0.013404610801294282, + "grad_norm": 2.0679399967193604, + "learning_rate": 6.702154951257056e-07, + "loss": 0.0608, + "step": 2090 + }, + { + "epoch": 0.01346874769508038, + "grad_norm": 1.390113115310669, + "learning_rate": 6.734222678296564e-07, + "loss": 0.0445, + "step": 2100 + }, + { + "epoch": 0.013532884588866477, + "grad_norm": 2.04164457321167, + "learning_rate": 6.76629040533607e-07, + "loss": 0.05, + "step": 2110 + }, + { + "epoch": 0.013597021482652573, + "grad_norm": 3.0909676551818848, + "learning_rate": 6.798358132375578e-07, + "loss": 0.048, + "step": 2120 + }, + { + "epoch": 0.013661158376438672, + "grad_norm": 1.5751962661743164, + "learning_rate": 6.830425859415085e-07, + "loss": 0.0506, + "step": 2130 + }, + { + "epoch": 0.013725295270224768, + "grad_norm": 1.8469059467315674, + "learning_rate": 6.862493586454592e-07, + "loss": 0.0482, + "step": 2140 + }, + { + "epoch": 0.013789432164010865, + "grad_norm": 2.0632050037384033, + "learning_rate": 6.8945613134941e-07, + "loss": 0.047, + "step": 2150 + }, + { + "epoch": 0.013853569057796961, + "grad_norm": 1.7224044799804688, + "learning_rate": 6.926629040533607e-07, + "loss": 0.0413, + "step": 2160 + }, + { + "epoch": 0.01391770595158306, + "grad_norm": 1.2005095481872559, + "learning_rate": 6.958696767573114e-07, + "loss": 0.0468, + "step": 2170 + }, + { + "epoch": 0.013981842845369156, + "grad_norm": 1.6894320249557495, + "learning_rate": 6.990764494612623e-07, + "loss": 0.0497, + "step": 2180 + }, + { + "epoch": 0.014045979739155253, + "grad_norm": 2.376833915710449, + "learning_rate": 7.02283222165213e-07, + "loss": 0.0418, + "step": 2190 + }, + { + "epoch": 0.01411011663294135, + "grad_norm": 1.680874228477478, + "learning_rate": 7.054899948691638e-07, + "loss": 0.0498, + "step": 2200 + }, + { + "epoch": 0.014174253526727448, + "grad_norm": 1.6105834245681763, + "learning_rate": 7.086967675731145e-07, + "loss": 0.0487, + "step": 2210 + }, + { + "epoch": 0.014238390420513544, + "grad_norm": 1.9710890054702759, + "learning_rate": 7.119035402770652e-07, + "loss": 0.0497, + "step": 2220 + }, + { + "epoch": 0.01430252731429964, + "grad_norm": 1.7088145017623901, + "learning_rate": 7.15110312981016e-07, + "loss": 0.0385, + "step": 2230 + }, + { + "epoch": 0.014366664208085737, + "grad_norm": 2.4289820194244385, + "learning_rate": 7.183170856849666e-07, + "loss": 0.0516, + "step": 2240 + }, + { + "epoch": 0.014430801101871836, + "grad_norm": 1.693321943283081, + "learning_rate": 7.215238583889174e-07, + "loss": 0.0435, + "step": 2250 + }, + { + "epoch": 0.014494937995657932, + "grad_norm": 1.9192099571228027, + "learning_rate": 7.247306310928682e-07, + "loss": 0.0482, + "step": 2260 + }, + { + "epoch": 0.014559074889444029, + "grad_norm": 0.8561009764671326, + "learning_rate": 7.27937403796819e-07, + "loss": 0.0493, + "step": 2270 + }, + { + "epoch": 0.014623211783230127, + "grad_norm": 1.7811331748962402, + "learning_rate": 7.311441765007697e-07, + "loss": 0.049, + "step": 2280 + }, + { + "epoch": 0.014687348677016224, + "grad_norm": 1.2994056940078735, + "learning_rate": 7.343509492047204e-07, + "loss": 0.0409, + "step": 2290 + }, + { + "epoch": 0.01475148557080232, + "grad_norm": 1.252750039100647, + "learning_rate": 7.375577219086712e-07, + "loss": 0.042, + "step": 2300 + }, + { + "epoch": 0.014815622464588417, + "grad_norm": 1.558959722518921, + "learning_rate": 7.407644946126219e-07, + "loss": 0.0445, + "step": 2310 + }, + { + "epoch": 0.014879759358374515, + "grad_norm": 1.7213983535766602, + "learning_rate": 7.439712673165726e-07, + "loss": 0.0394, + "step": 2320 + }, + { + "epoch": 0.014943896252160612, + "grad_norm": 1.8647485971450806, + "learning_rate": 7.471780400205234e-07, + "loss": 0.0441, + "step": 2330 + }, + { + "epoch": 0.015008033145946708, + "grad_norm": 2.38940167427063, + "learning_rate": 7.503848127244741e-07, + "loss": 0.0403, + "step": 2340 + }, + { + "epoch": 0.015072170039732805, + "grad_norm": 1.6554349660873413, + "learning_rate": 7.53591585428425e-07, + "loss": 0.0441, + "step": 2350 + }, + { + "epoch": 0.015136306933518903, + "grad_norm": 1.9792137145996094, + "learning_rate": 7.567983581323757e-07, + "loss": 0.0478, + "step": 2360 + }, + { + "epoch": 0.015200443827305, + "grad_norm": 1.3561503887176514, + "learning_rate": 7.600051308363265e-07, + "loss": 0.0414, + "step": 2370 + }, + { + "epoch": 0.015264580721091096, + "grad_norm": 2.6571028232574463, + "learning_rate": 7.632119035402771e-07, + "loss": 0.0393, + "step": 2380 + }, + { + "epoch": 0.015328717614877195, + "grad_norm": 1.7432042360305786, + "learning_rate": 7.664186762442278e-07, + "loss": 0.0366, + "step": 2390 + }, + { + "epoch": 0.015392854508663291, + "grad_norm": 1.9216645956039429, + "learning_rate": 7.696254489481786e-07, + "loss": 0.0399, + "step": 2400 + }, + { + "epoch": 0.015456991402449388, + "grad_norm": 1.4957715272903442, + "learning_rate": 7.728322216521293e-07, + "loss": 0.0412, + "step": 2410 + }, + { + "epoch": 0.015521128296235484, + "grad_norm": 2.0664377212524414, + "learning_rate": 7.7603899435608e-07, + "loss": 0.0315, + "step": 2420 + }, + { + "epoch": 0.015585265190021583, + "grad_norm": 1.9414721727371216, + "learning_rate": 7.792457670600309e-07, + "loss": 0.047, + "step": 2430 + }, + { + "epoch": 0.01564940208380768, + "grad_norm": 1.556045413017273, + "learning_rate": 7.824525397639816e-07, + "loss": 0.0401, + "step": 2440 + }, + { + "epoch": 0.015713538977593776, + "grad_norm": 1.55962336063385, + "learning_rate": 7.856593124679324e-07, + "loss": 0.0421, + "step": 2450 + }, + { + "epoch": 0.015777675871379872, + "grad_norm": 1.3714858293533325, + "learning_rate": 7.888660851718831e-07, + "loss": 0.0382, + "step": 2460 + }, + { + "epoch": 0.01584181276516597, + "grad_norm": 1.7481050491333008, + "learning_rate": 7.920728578758338e-07, + "loss": 0.0564, + "step": 2470 + }, + { + "epoch": 0.015905949658952066, + "grad_norm": 1.4160561561584473, + "learning_rate": 7.952796305797846e-07, + "loss": 0.0358, + "step": 2480 + }, + { + "epoch": 0.015970086552738166, + "grad_norm": 2.030376434326172, + "learning_rate": 7.984864032837353e-07, + "loss": 0.0425, + "step": 2490 + }, + { + "epoch": 0.016034223446524262, + "grad_norm": 1.5082240104675293, + "learning_rate": 8.016931759876861e-07, + "loss": 0.0461, + "step": 2500 + }, + { + "epoch": 0.01609836034031036, + "grad_norm": 1.9544262886047363, + "learning_rate": 8.048999486916367e-07, + "loss": 0.0383, + "step": 2510 + }, + { + "epoch": 0.016162497234096455, + "grad_norm": 1.6944468021392822, + "learning_rate": 8.081067213955876e-07, + "loss": 0.046, + "step": 2520 + }, + { + "epoch": 0.016226634127882552, + "grad_norm": 1.5525201559066772, + "learning_rate": 8.113134940995383e-07, + "loss": 0.0356, + "step": 2530 + }, + { + "epoch": 0.01629077102166865, + "grad_norm": 2.0058481693267822, + "learning_rate": 8.14520266803489e-07, + "loss": 0.0449, + "step": 2540 + }, + { + "epoch": 0.016354907915454745, + "grad_norm": 1.2101256847381592, + "learning_rate": 8.177270395074398e-07, + "loss": 0.0329, + "step": 2550 + }, + { + "epoch": 0.016419044809240845, + "grad_norm": 1.590275764465332, + "learning_rate": 8.209338122113905e-07, + "loss": 0.0438, + "step": 2560 + }, + { + "epoch": 0.016483181703026942, + "grad_norm": 1.4366024732589722, + "learning_rate": 8.241405849153412e-07, + "loss": 0.0358, + "step": 2570 + }, + { + "epoch": 0.01654731859681304, + "grad_norm": 1.3212709426879883, + "learning_rate": 8.27347357619292e-07, + "loss": 0.0422, + "step": 2580 + }, + { + "epoch": 0.016611455490599135, + "grad_norm": 1.3149542808532715, + "learning_rate": 8.305541303232427e-07, + "loss": 0.0451, + "step": 2590 + }, + { + "epoch": 0.01667559238438523, + "grad_norm": 2.0486605167388916, + "learning_rate": 8.337609030271936e-07, + "loss": 0.0303, + "step": 2600 + }, + { + "epoch": 0.016739729278171328, + "grad_norm": 2.1981372833251953, + "learning_rate": 8.369676757311443e-07, + "loss": 0.0503, + "step": 2610 + }, + { + "epoch": 0.016803866171957425, + "grad_norm": 1.6597967147827148, + "learning_rate": 8.401744484350951e-07, + "loss": 0.0362, + "step": 2620 + }, + { + "epoch": 0.016868003065743525, + "grad_norm": 2.1460511684417725, + "learning_rate": 8.433812211390458e-07, + "loss": 0.0434, + "step": 2630 + }, + { + "epoch": 0.01693213995952962, + "grad_norm": 1.2565027475357056, + "learning_rate": 8.465879938429964e-07, + "loss": 0.0464, + "step": 2640 + }, + { + "epoch": 0.016996276853315718, + "grad_norm": 1.4828057289123535, + "learning_rate": 8.497947665469472e-07, + "loss": 0.0373, + "step": 2650 + }, + { + "epoch": 0.017060413747101814, + "grad_norm": 2.4982426166534424, + "learning_rate": 8.530015392508979e-07, + "loss": 0.0374, + "step": 2660 + }, + { + "epoch": 0.01712455064088791, + "grad_norm": 1.8210854530334473, + "learning_rate": 8.562083119548486e-07, + "loss": 0.0446, + "step": 2670 + }, + { + "epoch": 0.017188687534674008, + "grad_norm": 1.7910974025726318, + "learning_rate": 8.594150846587994e-07, + "loss": 0.0382, + "step": 2680 + }, + { + "epoch": 0.017252824428460104, + "grad_norm": 1.5497280359268188, + "learning_rate": 8.626218573627502e-07, + "loss": 0.0439, + "step": 2690 + }, + { + "epoch": 0.0173169613222462, + "grad_norm": 2.107048273086548, + "learning_rate": 8.65828630066701e-07, + "loss": 0.0323, + "step": 2700 + }, + { + "epoch": 0.0173810982160323, + "grad_norm": 1.7939436435699463, + "learning_rate": 8.690354027706517e-07, + "loss": 0.0375, + "step": 2710 + }, + { + "epoch": 0.017445235109818397, + "grad_norm": 1.5770498514175415, + "learning_rate": 8.722421754746024e-07, + "loss": 0.0365, + "step": 2720 + }, + { + "epoch": 0.017509372003604494, + "grad_norm": 2.1870534420013428, + "learning_rate": 8.754489481785532e-07, + "loss": 0.0436, + "step": 2730 + }, + { + "epoch": 0.01757350889739059, + "grad_norm": 2.0507476329803467, + "learning_rate": 8.786557208825039e-07, + "loss": 0.0301, + "step": 2740 + }, + { + "epoch": 0.017637645791176687, + "grad_norm": 1.1799434423446655, + "learning_rate": 8.818624935864547e-07, + "loss": 0.0411, + "step": 2750 + }, + { + "epoch": 0.017701782684962784, + "grad_norm": 1.364424467086792, + "learning_rate": 8.850692662904054e-07, + "loss": 0.0413, + "step": 2760 + }, + { + "epoch": 0.01776591957874888, + "grad_norm": 2.17989182472229, + "learning_rate": 8.882760389943563e-07, + "loss": 0.0446, + "step": 2770 + }, + { + "epoch": 0.01783005647253498, + "grad_norm": 1.3833954334259033, + "learning_rate": 8.91482811698307e-07, + "loss": 0.0448, + "step": 2780 + }, + { + "epoch": 0.017894193366321077, + "grad_norm": 1.3813868761062622, + "learning_rate": 8.946895844022576e-07, + "loss": 0.056, + "step": 2790 + }, + { + "epoch": 0.017958330260107173, + "grad_norm": 1.6611545085906982, + "learning_rate": 8.978963571062084e-07, + "loss": 0.0317, + "step": 2800 + }, + { + "epoch": 0.01802246715389327, + "grad_norm": 2.107382297515869, + "learning_rate": 9.011031298101591e-07, + "loss": 0.0386, + "step": 2810 + }, + { + "epoch": 0.018086604047679367, + "grad_norm": 1.9452520608901978, + "learning_rate": 9.043099025141098e-07, + "loss": 0.0334, + "step": 2820 + }, + { + "epoch": 0.018150740941465463, + "grad_norm": 1.8797872066497803, + "learning_rate": 9.075166752180606e-07, + "loss": 0.0378, + "step": 2830 + }, + { + "epoch": 0.01821487783525156, + "grad_norm": 1.349656105041504, + "learning_rate": 9.107234479220113e-07, + "loss": 0.0408, + "step": 2840 + }, + { + "epoch": 0.018279014729037656, + "grad_norm": 1.657008409500122, + "learning_rate": 9.13930220625962e-07, + "loss": 0.0432, + "step": 2850 + }, + { + "epoch": 0.018343151622823756, + "grad_norm": 1.0578302145004272, + "learning_rate": 9.171369933299129e-07, + "loss": 0.0468, + "step": 2860 + }, + { + "epoch": 0.018407288516609853, + "grad_norm": 1.3269705772399902, + "learning_rate": 9.203437660338637e-07, + "loss": 0.0331, + "step": 2870 + }, + { + "epoch": 0.01847142541039595, + "grad_norm": 1.4782181978225708, + "learning_rate": 9.235505387378144e-07, + "loss": 0.0297, + "step": 2880 + }, + { + "epoch": 0.018535562304182046, + "grad_norm": 1.494398832321167, + "learning_rate": 9.267573114417651e-07, + "loss": 0.0348, + "step": 2890 + }, + { + "epoch": 0.018599699197968143, + "grad_norm": 2.166395664215088, + "learning_rate": 9.299640841457159e-07, + "loss": 0.0366, + "step": 2900 + }, + { + "epoch": 0.01866383609175424, + "grad_norm": 1.7287582159042358, + "learning_rate": 9.331708568496666e-07, + "loss": 0.036, + "step": 2910 + }, + { + "epoch": 0.018727972985540336, + "grad_norm": 1.187628149986267, + "learning_rate": 9.363776295536172e-07, + "loss": 0.0344, + "step": 2920 + }, + { + "epoch": 0.018792109879326436, + "grad_norm": 2.1952199935913086, + "learning_rate": 9.39584402257568e-07, + "loss": 0.0374, + "step": 2930 + }, + { + "epoch": 0.018856246773112532, + "grad_norm": 1.5708807706832886, + "learning_rate": 9.427911749615188e-07, + "loss": 0.0244, + "step": 2940 + }, + { + "epoch": 0.01892038366689863, + "grad_norm": 1.7028510570526123, + "learning_rate": 9.459979476654696e-07, + "loss": 0.0365, + "step": 2950 + }, + { + "epoch": 0.018984520560684726, + "grad_norm": 1.1860458850860596, + "learning_rate": 9.492047203694203e-07, + "loss": 0.0328, + "step": 2960 + }, + { + "epoch": 0.019048657454470822, + "grad_norm": 1.1099803447723389, + "learning_rate": 9.52411493073371e-07, + "loss": 0.0321, + "step": 2970 + }, + { + "epoch": 0.01911279434825692, + "grad_norm": 1.7563613653182983, + "learning_rate": 9.556182657773218e-07, + "loss": 0.0387, + "step": 2980 + }, + { + "epoch": 0.019176931242043015, + "grad_norm": 2.043030023574829, + "learning_rate": 9.588250384812725e-07, + "loss": 0.036, + "step": 2990 + }, + { + "epoch": 0.019241068135829115, + "grad_norm": 1.5777499675750732, + "learning_rate": 9.620318111852232e-07, + "loss": 0.0326, + "step": 3000 + }, + { + "epoch": 0.019305205029615212, + "grad_norm": 1.2317931652069092, + "learning_rate": 9.652385838891739e-07, + "loss": 0.0302, + "step": 3010 + }, + { + "epoch": 0.01936934192340131, + "grad_norm": 1.5755513906478882, + "learning_rate": 9.684453565931248e-07, + "loss": 0.0379, + "step": 3020 + }, + { + "epoch": 0.019433478817187405, + "grad_norm": 1.1730719804763794, + "learning_rate": 9.716521292970755e-07, + "loss": 0.0357, + "step": 3030 + }, + { + "epoch": 0.0194976157109735, + "grad_norm": 1.3751786947250366, + "learning_rate": 9.748589020010264e-07, + "loss": 0.0379, + "step": 3040 + }, + { + "epoch": 0.019561752604759598, + "grad_norm": 2.1120567321777344, + "learning_rate": 9.78065674704977e-07, + "loss": 0.0403, + "step": 3050 + }, + { + "epoch": 0.019625889498545695, + "grad_norm": 1.4736151695251465, + "learning_rate": 9.812724474089277e-07, + "loss": 0.0334, + "step": 3060 + }, + { + "epoch": 0.01969002639233179, + "grad_norm": 1.5400184392929077, + "learning_rate": 9.844792201128784e-07, + "loss": 0.0396, + "step": 3070 + }, + { + "epoch": 0.01975416328611789, + "grad_norm": 1.8855392932891846, + "learning_rate": 9.876859928168291e-07, + "loss": 0.041, + "step": 3080 + }, + { + "epoch": 0.019818300179903988, + "grad_norm": 1.3872394561767578, + "learning_rate": 9.9089276552078e-07, + "loss": 0.0366, + "step": 3090 + }, + { + "epoch": 0.019882437073690085, + "grad_norm": 1.054284930229187, + "learning_rate": 9.940995382247307e-07, + "loss": 0.0335, + "step": 3100 + }, + { + "epoch": 0.01994657396747618, + "grad_norm": 1.2524304389953613, + "learning_rate": 9.973063109286816e-07, + "loss": 0.038, + "step": 3110 + }, + { + "epoch": 0.020010710861262278, + "grad_norm": 1.533583641052246, + "learning_rate": 1.0005130836326323e-06, + "loss": 0.0359, + "step": 3120 + }, + { + "epoch": 0.020074847755048374, + "grad_norm": 2.047041177749634, + "learning_rate": 1.003719856336583e-06, + "loss": 0.0408, + "step": 3130 + }, + { + "epoch": 0.02013898464883447, + "grad_norm": 1.6743892431259155, + "learning_rate": 1.0069266290405337e-06, + "loss": 0.0356, + "step": 3140 + }, + { + "epoch": 0.02020312154262057, + "grad_norm": 2.2678470611572266, + "learning_rate": 1.0101334017444844e-06, + "loss": 0.0396, + "step": 3150 + }, + { + "epoch": 0.020267258436406668, + "grad_norm": 1.4740620851516724, + "learning_rate": 1.013340174448435e-06, + "loss": 0.0378, + "step": 3160 + }, + { + "epoch": 0.020331395330192764, + "grad_norm": 1.1527410745620728, + "learning_rate": 1.016546947152386e-06, + "loss": 0.0336, + "step": 3170 + }, + { + "epoch": 0.02039553222397886, + "grad_norm": 2.1838512420654297, + "learning_rate": 1.0197537198563367e-06, + "loss": 0.0389, + "step": 3180 + }, + { + "epoch": 0.020459669117764957, + "grad_norm": 1.4129743576049805, + "learning_rate": 1.0229604925602873e-06, + "loss": 0.0349, + "step": 3190 + }, + { + "epoch": 0.020523806011551054, + "grad_norm": 2.70485782623291, + "learning_rate": 1.0261672652642382e-06, + "loss": 0.0437, + "step": 3200 + }, + { + "epoch": 0.02058794290533715, + "grad_norm": 1.1545413732528687, + "learning_rate": 1.029374037968189e-06, + "loss": 0.0251, + "step": 3210 + }, + { + "epoch": 0.020652079799123247, + "grad_norm": 1.4345098733901978, + "learning_rate": 1.0325808106721396e-06, + "loss": 0.0284, + "step": 3220 + }, + { + "epoch": 0.020716216692909347, + "grad_norm": 1.5682227611541748, + "learning_rate": 1.0357875833760903e-06, + "loss": 0.0338, + "step": 3230 + }, + { + "epoch": 0.020780353586695444, + "grad_norm": 1.8983999490737915, + "learning_rate": 1.0389943560800412e-06, + "loss": 0.0323, + "step": 3240 + }, + { + "epoch": 0.02084449048048154, + "grad_norm": 1.4826823472976685, + "learning_rate": 1.042201128783992e-06, + "loss": 0.0341, + "step": 3250 + }, + { + "epoch": 0.020908627374267637, + "grad_norm": 1.2763713598251343, + "learning_rate": 1.0454079014879426e-06, + "loss": 0.0297, + "step": 3260 + }, + { + "epoch": 0.020972764268053733, + "grad_norm": 1.163649559020996, + "learning_rate": 1.0486146741918933e-06, + "loss": 0.0289, + "step": 3270 + }, + { + "epoch": 0.02103690116183983, + "grad_norm": 1.3910245895385742, + "learning_rate": 1.0518214468958442e-06, + "loss": 0.0332, + "step": 3280 + }, + { + "epoch": 0.021101038055625927, + "grad_norm": 1.4478743076324463, + "learning_rate": 1.0550282195997949e-06, + "loss": 0.0317, + "step": 3290 + }, + { + "epoch": 0.021165174949412027, + "grad_norm": 1.6459060907363892, + "learning_rate": 1.0582349923037456e-06, + "loss": 0.0372, + "step": 3300 + }, + { + "epoch": 0.021229311843198123, + "grad_norm": 1.8430840969085693, + "learning_rate": 1.0614417650076963e-06, + "loss": 0.0353, + "step": 3310 + }, + { + "epoch": 0.02129344873698422, + "grad_norm": 1.2712984085083008, + "learning_rate": 1.0646485377116472e-06, + "loss": 0.031, + "step": 3320 + }, + { + "epoch": 0.021357585630770316, + "grad_norm": 0.8528488278388977, + "learning_rate": 1.0678553104155978e-06, + "loss": 0.0284, + "step": 3330 + }, + { + "epoch": 0.021421722524556413, + "grad_norm": 1.7615848779678345, + "learning_rate": 1.0710620831195485e-06, + "loss": 0.0329, + "step": 3340 + }, + { + "epoch": 0.02148585941834251, + "grad_norm": 1.3932099342346191, + "learning_rate": 1.0742688558234992e-06, + "loss": 0.0307, + "step": 3350 + }, + { + "epoch": 0.021549996312128606, + "grad_norm": 2.1969845294952393, + "learning_rate": 1.07747562852745e-06, + "loss": 0.0379, + "step": 3360 + }, + { + "epoch": 0.021614133205914706, + "grad_norm": 1.2978613376617432, + "learning_rate": 1.0806824012314008e-06, + "loss": 0.033, + "step": 3370 + }, + { + "epoch": 0.021678270099700803, + "grad_norm": 1.0141751766204834, + "learning_rate": 1.0838891739353515e-06, + "loss": 0.0255, + "step": 3380 + }, + { + "epoch": 0.0217424069934869, + "grad_norm": 1.300687551498413, + "learning_rate": 1.0870959466393024e-06, + "loss": 0.0352, + "step": 3390 + }, + { + "epoch": 0.021806543887272996, + "grad_norm": 1.0667314529418945, + "learning_rate": 1.090302719343253e-06, + "loss": 0.0275, + "step": 3400 + }, + { + "epoch": 0.021870680781059092, + "grad_norm": 1.2791436910629272, + "learning_rate": 1.0935094920472038e-06, + "loss": 0.0253, + "step": 3410 + }, + { + "epoch": 0.02193481767484519, + "grad_norm": 0.9602054953575134, + "learning_rate": 1.0967162647511545e-06, + "loss": 0.0305, + "step": 3420 + }, + { + "epoch": 0.021998954568631286, + "grad_norm": 1.0475029945373535, + "learning_rate": 1.0999230374551052e-06, + "loss": 0.0272, + "step": 3430 + }, + { + "epoch": 0.022063091462417382, + "grad_norm": 1.3703036308288574, + "learning_rate": 1.103129810159056e-06, + "loss": 0.0386, + "step": 3440 + }, + { + "epoch": 0.022127228356203482, + "grad_norm": 2.4703800678253174, + "learning_rate": 1.1063365828630068e-06, + "loss": 0.0375, + "step": 3450 + }, + { + "epoch": 0.02219136524998958, + "grad_norm": 1.3595285415649414, + "learning_rate": 1.1095433555669577e-06, + "loss": 0.0309, + "step": 3460 + }, + { + "epoch": 0.022255502143775675, + "grad_norm": 1.59342360496521, + "learning_rate": 1.1127501282709083e-06, + "loss": 0.029, + "step": 3470 + }, + { + "epoch": 0.022319639037561772, + "grad_norm": 1.3593720197677612, + "learning_rate": 1.115956900974859e-06, + "loss": 0.0272, + "step": 3480 + }, + { + "epoch": 0.02238377593134787, + "grad_norm": 1.0497242212295532, + "learning_rate": 1.1191636736788097e-06, + "loss": 0.0307, + "step": 3490 + }, + { + "epoch": 0.022447912825133965, + "grad_norm": 1.5057237148284912, + "learning_rate": 1.1223704463827604e-06, + "loss": 0.0308, + "step": 3500 + }, + { + "epoch": 0.02251204971892006, + "grad_norm": 1.297191858291626, + "learning_rate": 1.125577219086711e-06, + "loss": 0.0308, + "step": 3510 + }, + { + "epoch": 0.02257618661270616, + "grad_norm": 2.359473705291748, + "learning_rate": 1.128783991790662e-06, + "loss": 0.0302, + "step": 3520 + }, + { + "epoch": 0.022640323506492258, + "grad_norm": 0.7025953531265259, + "learning_rate": 1.1319907644946127e-06, + "loss": 0.0259, + "step": 3530 + }, + { + "epoch": 0.022704460400278355, + "grad_norm": 1.1531574726104736, + "learning_rate": 1.1351975371985636e-06, + "loss": 0.0314, + "step": 3540 + }, + { + "epoch": 0.02276859729406445, + "grad_norm": 1.433619499206543, + "learning_rate": 1.1384043099025143e-06, + "loss": 0.0261, + "step": 3550 + }, + { + "epoch": 0.022832734187850548, + "grad_norm": 1.3180760145187378, + "learning_rate": 1.141611082606465e-06, + "loss": 0.039, + "step": 3560 + }, + { + "epoch": 0.022896871081636645, + "grad_norm": 1.2552984952926636, + "learning_rate": 1.1448178553104157e-06, + "loss": 0.0258, + "step": 3570 + }, + { + "epoch": 0.02296100797542274, + "grad_norm": 1.1624325513839722, + "learning_rate": 1.1480246280143664e-06, + "loss": 0.0282, + "step": 3580 + }, + { + "epoch": 0.023025144869208838, + "grad_norm": 1.4660444259643555, + "learning_rate": 1.1512314007183173e-06, + "loss": 0.0325, + "step": 3590 + }, + { + "epoch": 0.023089281762994938, + "grad_norm": 1.164898157119751, + "learning_rate": 1.154438173422268e-06, + "loss": 0.0251, + "step": 3600 + }, + { + "epoch": 0.023153418656781034, + "grad_norm": 1.5619678497314453, + "learning_rate": 1.1576449461262186e-06, + "loss": 0.0462, + "step": 3610 + }, + { + "epoch": 0.02321755555056713, + "grad_norm": 1.7733670473098755, + "learning_rate": 1.1608517188301695e-06, + "loss": 0.0273, + "step": 3620 + }, + { + "epoch": 0.023281692444353227, + "grad_norm": 0.8263606429100037, + "learning_rate": 1.1640584915341202e-06, + "loss": 0.0287, + "step": 3630 + }, + { + "epoch": 0.023345829338139324, + "grad_norm": 1.2657470703125, + "learning_rate": 1.167265264238071e-06, + "loss": 0.0251, + "step": 3640 + }, + { + "epoch": 0.02340996623192542, + "grad_norm": 1.2587162256240845, + "learning_rate": 1.1704720369420216e-06, + "loss": 0.0193, + "step": 3650 + }, + { + "epoch": 0.023474103125711517, + "grad_norm": 1.990098237991333, + "learning_rate": 1.1736788096459723e-06, + "loss": 0.0254, + "step": 3660 + }, + { + "epoch": 0.023538240019497617, + "grad_norm": 1.408094882965088, + "learning_rate": 1.1768855823499232e-06, + "loss": 0.0362, + "step": 3670 + }, + { + "epoch": 0.023602376913283714, + "grad_norm": 1.239940881729126, + "learning_rate": 1.1800923550538739e-06, + "loss": 0.0329, + "step": 3680 + }, + { + "epoch": 0.02366651380706981, + "grad_norm": 1.7103601694107056, + "learning_rate": 1.1832991277578246e-06, + "loss": 0.0265, + "step": 3690 + }, + { + "epoch": 0.023730650700855907, + "grad_norm": 1.2067160606384277, + "learning_rate": 1.1865059004617753e-06, + "loss": 0.0315, + "step": 3700 + }, + { + "epoch": 0.023794787594642004, + "grad_norm": 1.611244559288025, + "learning_rate": 1.1897126731657262e-06, + "loss": 0.0269, + "step": 3710 + }, + { + "epoch": 0.0238589244884281, + "grad_norm": 1.1752269268035889, + "learning_rate": 1.1929194458696769e-06, + "loss": 0.031, + "step": 3720 + }, + { + "epoch": 0.023923061382214197, + "grad_norm": 1.7413352727890015, + "learning_rate": 1.1961262185736275e-06, + "loss": 0.0338, + "step": 3730 + }, + { + "epoch": 0.023987198276000293, + "grad_norm": 2.1114962100982666, + "learning_rate": 1.1993329912775784e-06, + "loss": 0.0289, + "step": 3740 + }, + { + "epoch": 0.024051335169786393, + "grad_norm": 1.0259640216827393, + "learning_rate": 1.2025397639815291e-06, + "loss": 0.0278, + "step": 3750 + }, + { + "epoch": 0.02411547206357249, + "grad_norm": 1.528367519378662, + "learning_rate": 1.2057465366854798e-06, + "loss": 0.0253, + "step": 3760 + }, + { + "epoch": 0.024179608957358587, + "grad_norm": 1.4296187162399292, + "learning_rate": 1.2089533093894305e-06, + "loss": 0.034, + "step": 3770 + }, + { + "epoch": 0.024243745851144683, + "grad_norm": 1.101138710975647, + "learning_rate": 1.2121600820933812e-06, + "loss": 0.0231, + "step": 3780 + }, + { + "epoch": 0.02430788274493078, + "grad_norm": 1.2148758172988892, + "learning_rate": 1.215366854797332e-06, + "loss": 0.0276, + "step": 3790 + }, + { + "epoch": 0.024372019638716876, + "grad_norm": 1.8424838781356812, + "learning_rate": 1.2185736275012828e-06, + "loss": 0.0307, + "step": 3800 + }, + { + "epoch": 0.024436156532502973, + "grad_norm": 1.2761074304580688, + "learning_rate": 1.2217804002052335e-06, + "loss": 0.0237, + "step": 3810 + }, + { + "epoch": 0.024500293426289073, + "grad_norm": 1.4775094985961914, + "learning_rate": 1.2249871729091844e-06, + "loss": 0.0278, + "step": 3820 + }, + { + "epoch": 0.02456443032007517, + "grad_norm": 1.41787850856781, + "learning_rate": 1.228193945613135e-06, + "loss": 0.0287, + "step": 3830 + }, + { + "epoch": 0.024628567213861266, + "grad_norm": 1.7441461086273193, + "learning_rate": 1.2314007183170858e-06, + "loss": 0.0253, + "step": 3840 + }, + { + "epoch": 0.024692704107647363, + "grad_norm": 1.940899133682251, + "learning_rate": 1.2346074910210365e-06, + "loss": 0.0321, + "step": 3850 + }, + { + "epoch": 0.02475684100143346, + "grad_norm": 1.5329341888427734, + "learning_rate": 1.2378142637249871e-06, + "loss": 0.0305, + "step": 3860 + }, + { + "epoch": 0.024820977895219556, + "grad_norm": 1.282849907875061, + "learning_rate": 1.241021036428938e-06, + "loss": 0.0242, + "step": 3870 + }, + { + "epoch": 0.024885114789005652, + "grad_norm": 1.5139777660369873, + "learning_rate": 1.2442278091328887e-06, + "loss": 0.0276, + "step": 3880 + }, + { + "epoch": 0.024949251682791752, + "grad_norm": 1.3489099740982056, + "learning_rate": 1.2474345818368396e-06, + "loss": 0.0414, + "step": 3890 + }, + { + "epoch": 0.02501338857657785, + "grad_norm": 1.1435086727142334, + "learning_rate": 1.2506413545407903e-06, + "loss": 0.0242, + "step": 3900 + }, + { + "epoch": 0.025077525470363946, + "grad_norm": 1.0997439622879028, + "learning_rate": 1.2538481272447408e-06, + "loss": 0.0245, + "step": 3910 + }, + { + "epoch": 0.025141662364150042, + "grad_norm": 0.8081037998199463, + "learning_rate": 1.2570548999486917e-06, + "loss": 0.0241, + "step": 3920 + }, + { + "epoch": 0.02520579925793614, + "grad_norm": 0.8284711241722107, + "learning_rate": 1.2602616726526426e-06, + "loss": 0.0339, + "step": 3930 + }, + { + "epoch": 0.025269936151722235, + "grad_norm": 1.4088741540908813, + "learning_rate": 1.263468445356593e-06, + "loss": 0.0267, + "step": 3940 + }, + { + "epoch": 0.025334073045508332, + "grad_norm": 1.1950079202651978, + "learning_rate": 1.266675218060544e-06, + "loss": 0.0279, + "step": 3950 + }, + { + "epoch": 0.02539820993929443, + "grad_norm": 1.3671220541000366, + "learning_rate": 1.2698819907644947e-06, + "loss": 0.0245, + "step": 3960 + }, + { + "epoch": 0.02546234683308053, + "grad_norm": 2.05141282081604, + "learning_rate": 1.2730887634684456e-06, + "loss": 0.0278, + "step": 3970 + }, + { + "epoch": 0.025526483726866625, + "grad_norm": 1.7943063974380493, + "learning_rate": 1.276295536172396e-06, + "loss": 0.0254, + "step": 3980 + }, + { + "epoch": 0.02559062062065272, + "grad_norm": 1.4211649894714355, + "learning_rate": 1.279502308876347e-06, + "loss": 0.0219, + "step": 3990 + }, + { + "epoch": 0.025654757514438818, + "grad_norm": 1.2778598070144653, + "learning_rate": 1.2827090815802976e-06, + "loss": 0.0274, + "step": 4000 + }, + { + "epoch": 0.025718894408224915, + "grad_norm": 1.6716960668563843, + "learning_rate": 1.2859158542842483e-06, + "loss": 0.0225, + "step": 4010 + }, + { + "epoch": 0.02578303130201101, + "grad_norm": 1.0186187028884888, + "learning_rate": 1.2891226269881992e-06, + "loss": 0.0251, + "step": 4020 + }, + { + "epoch": 0.025847168195797108, + "grad_norm": 1.4226250648498535, + "learning_rate": 1.29232939969215e-06, + "loss": 0.0341, + "step": 4030 + }, + { + "epoch": 0.025911305089583208, + "grad_norm": 1.6474844217300415, + "learning_rate": 1.2955361723961008e-06, + "loss": 0.0277, + "step": 4040 + }, + { + "epoch": 0.025975441983369305, + "grad_norm": 1.2548375129699707, + "learning_rate": 1.2987429451000513e-06, + "loss": 0.0319, + "step": 4050 + }, + { + "epoch": 0.0260395788771554, + "grad_norm": 1.264318585395813, + "learning_rate": 1.3019497178040022e-06, + "loss": 0.0255, + "step": 4060 + }, + { + "epoch": 0.026103715770941498, + "grad_norm": 2.2901430130004883, + "learning_rate": 1.305156490507953e-06, + "loss": 0.0341, + "step": 4070 + }, + { + "epoch": 0.026167852664727594, + "grad_norm": 1.6619858741760254, + "learning_rate": 1.3083632632119036e-06, + "loss": 0.0309, + "step": 4080 + }, + { + "epoch": 0.02623198955851369, + "grad_norm": 0.9754308462142944, + "learning_rate": 1.3115700359158545e-06, + "loss": 0.0285, + "step": 4090 + }, + { + "epoch": 0.026296126452299787, + "grad_norm": 1.2731003761291504, + "learning_rate": 1.3147768086198052e-06, + "loss": 0.0277, + "step": 4100 + }, + { + "epoch": 0.026360263346085884, + "grad_norm": 1.1012314558029175, + "learning_rate": 1.317983581323756e-06, + "loss": 0.0286, + "step": 4110 + }, + { + "epoch": 0.026424400239871984, + "grad_norm": 1.2308241128921509, + "learning_rate": 1.3211903540277066e-06, + "loss": 0.0292, + "step": 4120 + }, + { + "epoch": 0.02648853713365808, + "grad_norm": 1.3766791820526123, + "learning_rate": 1.3243971267316575e-06, + "loss": 0.0317, + "step": 4130 + }, + { + "epoch": 0.026552674027444177, + "grad_norm": 1.2170155048370361, + "learning_rate": 1.327603899435608e-06, + "loss": 0.0266, + "step": 4140 + }, + { + "epoch": 0.026616810921230274, + "grad_norm": 1.0523362159729004, + "learning_rate": 1.3308106721395588e-06, + "loss": 0.033, + "step": 4150 + }, + { + "epoch": 0.02668094781501637, + "grad_norm": 1.247113585472107, + "learning_rate": 1.3340174448435095e-06, + "loss": 0.0362, + "step": 4160 + }, + { + "epoch": 0.026745084708802467, + "grad_norm": 1.7340353727340698, + "learning_rate": 1.3372242175474604e-06, + "loss": 0.029, + "step": 4170 + }, + { + "epoch": 0.026809221602588564, + "grad_norm": 1.5098899602890015, + "learning_rate": 1.3404309902514111e-06, + "loss": 0.0365, + "step": 4180 + }, + { + "epoch": 0.026873358496374664, + "grad_norm": 1.6181445121765137, + "learning_rate": 1.3436377629553618e-06, + "loss": 0.029, + "step": 4190 + }, + { + "epoch": 0.02693749539016076, + "grad_norm": 0.9857799410820007, + "learning_rate": 1.3468445356593127e-06, + "loss": 0.0231, + "step": 4200 + }, + { + "epoch": 0.027001632283946857, + "grad_norm": 1.0968294143676758, + "learning_rate": 1.3500513083632632e-06, + "loss": 0.0285, + "step": 4210 + }, + { + "epoch": 0.027065769177732953, + "grad_norm": 0.9057127833366394, + "learning_rate": 1.353258081067214e-06, + "loss": 0.0182, + "step": 4220 + }, + { + "epoch": 0.02712990607151905, + "grad_norm": 1.425401210784912, + "learning_rate": 1.3564648537711648e-06, + "loss": 0.031, + "step": 4230 + }, + { + "epoch": 0.027194042965305146, + "grad_norm": 1.5106675624847412, + "learning_rate": 1.3596716264751157e-06, + "loss": 0.0348, + "step": 4240 + }, + { + "epoch": 0.027258179859091243, + "grad_norm": 1.6456857919692993, + "learning_rate": 1.3628783991790662e-06, + "loss": 0.0271, + "step": 4250 + }, + { + "epoch": 0.027322316752877343, + "grad_norm": 1.3953471183776855, + "learning_rate": 1.366085171883017e-06, + "loss": 0.0261, + "step": 4260 + }, + { + "epoch": 0.02738645364666344, + "grad_norm": 1.5983757972717285, + "learning_rate": 1.369291944586968e-06, + "loss": 0.025, + "step": 4270 + }, + { + "epoch": 0.027450590540449536, + "grad_norm": 1.4413799047470093, + "learning_rate": 1.3724987172909184e-06, + "loss": 0.0254, + "step": 4280 + }, + { + "epoch": 0.027514727434235633, + "grad_norm": 1.8395564556121826, + "learning_rate": 1.3757054899948693e-06, + "loss": 0.0265, + "step": 4290 + }, + { + "epoch": 0.02757886432802173, + "grad_norm": 3.691532850265503, + "learning_rate": 1.37891226269882e-06, + "loss": 0.0357, + "step": 4300 + }, + { + "epoch": 0.027643001221807826, + "grad_norm": 2.7965006828308105, + "learning_rate": 1.3821190354027707e-06, + "loss": 0.0271, + "step": 4310 + }, + { + "epoch": 0.027707138115593923, + "grad_norm": 0.9594042301177979, + "learning_rate": 1.3853258081067214e-06, + "loss": 0.0251, + "step": 4320 + }, + { + "epoch": 0.02777127500938002, + "grad_norm": 0.89903724193573, + "learning_rate": 1.3885325808106723e-06, + "loss": 0.0211, + "step": 4330 + }, + { + "epoch": 0.02783541190316612, + "grad_norm": 1.770849347114563, + "learning_rate": 1.3917393535146228e-06, + "loss": 0.0229, + "step": 4340 + }, + { + "epoch": 0.027899548796952216, + "grad_norm": 1.7908738851547241, + "learning_rate": 1.3949461262185737e-06, + "loss": 0.0293, + "step": 4350 + }, + { + "epoch": 0.027963685690738312, + "grad_norm": 1.2615981101989746, + "learning_rate": 1.3981528989225246e-06, + "loss": 0.0345, + "step": 4360 + }, + { + "epoch": 0.02802782258452441, + "grad_norm": 1.5795953273773193, + "learning_rate": 1.4013596716264753e-06, + "loss": 0.0277, + "step": 4370 + }, + { + "epoch": 0.028091959478310505, + "grad_norm": 0.9272065758705139, + "learning_rate": 1.404566444330426e-06, + "loss": 0.024, + "step": 4380 + }, + { + "epoch": 0.028156096372096602, + "grad_norm": 1.234574794769287, + "learning_rate": 1.4077732170343767e-06, + "loss": 0.0179, + "step": 4390 + }, + { + "epoch": 0.0282202332658827, + "grad_norm": 0.859353244304657, + "learning_rate": 1.4109799897383276e-06, + "loss": 0.0282, + "step": 4400 + }, + { + "epoch": 0.0282843701596688, + "grad_norm": 1.1903260946273804, + "learning_rate": 1.414186762442278e-06, + "loss": 0.0219, + "step": 4410 + }, + { + "epoch": 0.028348507053454895, + "grad_norm": 1.5210109949111938, + "learning_rate": 1.417393535146229e-06, + "loss": 0.0208, + "step": 4420 + }, + { + "epoch": 0.028412643947240992, + "grad_norm": 1.2154312133789062, + "learning_rate": 1.4206003078501798e-06, + "loss": 0.0203, + "step": 4430 + }, + { + "epoch": 0.02847678084102709, + "grad_norm": 1.0572129487991333, + "learning_rate": 1.4238070805541303e-06, + "loss": 0.0273, + "step": 4440 + }, + { + "epoch": 0.028540917734813185, + "grad_norm": 2.9614217281341553, + "learning_rate": 1.4270138532580812e-06, + "loss": 0.0287, + "step": 4450 + }, + { + "epoch": 0.02860505462859928, + "grad_norm": 0.9641210436820984, + "learning_rate": 1.430220625962032e-06, + "loss": 0.0226, + "step": 4460 + }, + { + "epoch": 0.028669191522385378, + "grad_norm": 1.0650194883346558, + "learning_rate": 1.4334273986659828e-06, + "loss": 0.0249, + "step": 4470 + }, + { + "epoch": 0.028733328416171475, + "grad_norm": 1.7810701131820679, + "learning_rate": 1.4366341713699333e-06, + "loss": 0.0241, + "step": 4480 + }, + { + "epoch": 0.028797465309957575, + "grad_norm": 1.532281517982483, + "learning_rate": 1.4398409440738842e-06, + "loss": 0.0277, + "step": 4490 + }, + { + "epoch": 0.02886160220374367, + "grad_norm": 1.1858774423599243, + "learning_rate": 1.4430477167778349e-06, + "loss": 0.0276, + "step": 4500 + }, + { + "epoch": 0.028925739097529768, + "grad_norm": 1.3994327783584595, + "learning_rate": 1.4462544894817856e-06, + "loss": 0.0299, + "step": 4510 + }, + { + "epoch": 0.028989875991315864, + "grad_norm": 1.0907750129699707, + "learning_rate": 1.4494612621857365e-06, + "loss": 0.023, + "step": 4520 + }, + { + "epoch": 0.02905401288510196, + "grad_norm": 0.8516006469726562, + "learning_rate": 1.4526680348896872e-06, + "loss": 0.0283, + "step": 4530 + }, + { + "epoch": 0.029118149778888058, + "grad_norm": 1.7701914310455322, + "learning_rate": 1.455874807593638e-06, + "loss": 0.0251, + "step": 4540 + }, + { + "epoch": 0.029182286672674154, + "grad_norm": 2.0644941329956055, + "learning_rate": 1.4590815802975885e-06, + "loss": 0.0264, + "step": 4550 + }, + { + "epoch": 0.029246423566460254, + "grad_norm": 1.278272271156311, + "learning_rate": 1.4622883530015394e-06, + "loss": 0.0317, + "step": 4560 + }, + { + "epoch": 0.02931056046024635, + "grad_norm": 1.3904606103897095, + "learning_rate": 1.46549512570549e-06, + "loss": 0.0269, + "step": 4570 + }, + { + "epoch": 0.029374697354032447, + "grad_norm": 1.9059582948684692, + "learning_rate": 1.4687018984094408e-06, + "loss": 0.0311, + "step": 4580 + }, + { + "epoch": 0.029438834247818544, + "grad_norm": 1.3313368558883667, + "learning_rate": 1.4719086711133915e-06, + "loss": 0.0305, + "step": 4590 + }, + { + "epoch": 0.02950297114160464, + "grad_norm": 1.8630759716033936, + "learning_rate": 1.4751154438173424e-06, + "loss": 0.0232, + "step": 4600 + }, + { + "epoch": 0.029567108035390737, + "grad_norm": 1.6356956958770752, + "learning_rate": 1.4783222165212933e-06, + "loss": 0.027, + "step": 4610 + }, + { + "epoch": 0.029631244929176834, + "grad_norm": 0.6639159917831421, + "learning_rate": 1.4815289892252438e-06, + "loss": 0.0244, + "step": 4620 + }, + { + "epoch": 0.029695381822962934, + "grad_norm": 1.786681890487671, + "learning_rate": 1.4847357619291947e-06, + "loss": 0.0266, + "step": 4630 + }, + { + "epoch": 0.02975951871674903, + "grad_norm": 1.597255825996399, + "learning_rate": 1.4879425346331452e-06, + "loss": 0.0262, + "step": 4640 + }, + { + "epoch": 0.029823655610535127, + "grad_norm": 1.2564306259155273, + "learning_rate": 1.491149307337096e-06, + "loss": 0.0245, + "step": 4650 + }, + { + "epoch": 0.029887792504321224, + "grad_norm": 1.2096034288406372, + "learning_rate": 1.4943560800410468e-06, + "loss": 0.0245, + "step": 4660 + }, + { + "epoch": 0.02995192939810732, + "grad_norm": 1.2752938270568848, + "learning_rate": 1.4975628527449977e-06, + "loss": 0.0198, + "step": 4670 + }, + { + "epoch": 0.030016066291893417, + "grad_norm": 1.5911647081375122, + "learning_rate": 1.5007696254489481e-06, + "loss": 0.0248, + "step": 4680 + }, + { + "epoch": 0.030080203185679513, + "grad_norm": 2.105846643447876, + "learning_rate": 1.503976398152899e-06, + "loss": 0.0173, + "step": 4690 + }, + { + "epoch": 0.03014434007946561, + "grad_norm": 0.8787049055099487, + "learning_rate": 1.50718317085685e-06, + "loss": 0.0312, + "step": 4700 + }, + { + "epoch": 0.03020847697325171, + "grad_norm": 1.3518826961517334, + "learning_rate": 1.5103899435608004e-06, + "loss": 0.0251, + "step": 4710 + }, + { + "epoch": 0.030272613867037806, + "grad_norm": 0.5953378677368164, + "learning_rate": 1.5135967162647513e-06, + "loss": 0.0268, + "step": 4720 + }, + { + "epoch": 0.030336750760823903, + "grad_norm": 0.946031928062439, + "learning_rate": 1.516803488968702e-06, + "loss": 0.0207, + "step": 4730 + }, + { + "epoch": 0.03040088765461, + "grad_norm": 1.2910767793655396, + "learning_rate": 1.520010261672653e-06, + "loss": 0.0288, + "step": 4740 + }, + { + "epoch": 0.030465024548396096, + "grad_norm": 0.6571576595306396, + "learning_rate": 1.5232170343766034e-06, + "loss": 0.017, + "step": 4750 + }, + { + "epoch": 0.030529161442182193, + "grad_norm": 0.7259587645530701, + "learning_rate": 1.5264238070805543e-06, + "loss": 0.0227, + "step": 4760 + }, + { + "epoch": 0.03059329833596829, + "grad_norm": 1.0711289644241333, + "learning_rate": 1.5296305797845052e-06, + "loss": 0.0287, + "step": 4770 + }, + { + "epoch": 0.03065743522975439, + "grad_norm": 1.5687236785888672, + "learning_rate": 1.5328373524884557e-06, + "loss": 0.0354, + "step": 4780 + }, + { + "epoch": 0.030721572123540486, + "grad_norm": 1.2231394052505493, + "learning_rate": 1.5360441251924066e-06, + "loss": 0.0215, + "step": 4790 + }, + { + "epoch": 0.030785709017326583, + "grad_norm": 1.2256968021392822, + "learning_rate": 1.5392508978963573e-06, + "loss": 0.0213, + "step": 4800 + }, + { + "epoch": 0.03084984591111268, + "grad_norm": 1.2588187456130981, + "learning_rate": 1.542457670600308e-06, + "loss": 0.0177, + "step": 4810 + }, + { + "epoch": 0.030913982804898776, + "grad_norm": 1.4336833953857422, + "learning_rate": 1.5456644433042586e-06, + "loss": 0.0205, + "step": 4820 + }, + { + "epoch": 0.030978119698684872, + "grad_norm": 0.8296056389808655, + "learning_rate": 1.5488712160082095e-06, + "loss": 0.0283, + "step": 4830 + }, + { + "epoch": 0.03104225659247097, + "grad_norm": 1.141822099685669, + "learning_rate": 1.55207798871216e-06, + "loss": 0.0304, + "step": 4840 + }, + { + "epoch": 0.031106393486257065, + "grad_norm": 0.9502159357070923, + "learning_rate": 1.555284761416111e-06, + "loss": 0.0209, + "step": 4850 + }, + { + "epoch": 0.031170530380043165, + "grad_norm": 1.7776676416397095, + "learning_rate": 1.5584915341200618e-06, + "loss": 0.0323, + "step": 4860 + }, + { + "epoch": 0.031234667273829262, + "grad_norm": 0.9667540788650513, + "learning_rate": 1.5616983068240125e-06, + "loss": 0.018, + "step": 4870 + }, + { + "epoch": 0.03129880416761536, + "grad_norm": 1.545588731765747, + "learning_rate": 1.5649050795279632e-06, + "loss": 0.0201, + "step": 4880 + }, + { + "epoch": 0.03136294106140146, + "grad_norm": 0.8739742636680603, + "learning_rate": 1.5681118522319139e-06, + "loss": 0.0236, + "step": 4890 + }, + { + "epoch": 0.03142707795518755, + "grad_norm": 1.9178887605667114, + "learning_rate": 1.5713186249358648e-06, + "loss": 0.0263, + "step": 4900 + }, + { + "epoch": 0.03149121484897365, + "grad_norm": 1.7376662492752075, + "learning_rate": 1.5745253976398153e-06, + "loss": 0.0173, + "step": 4910 + }, + { + "epoch": 0.031555351742759745, + "grad_norm": 1.293989658355713, + "learning_rate": 1.5777321703437662e-06, + "loss": 0.0283, + "step": 4920 + }, + { + "epoch": 0.031619488636545845, + "grad_norm": 1.388981819152832, + "learning_rate": 1.5809389430477169e-06, + "loss": 0.0188, + "step": 4930 + }, + { + "epoch": 0.03168362553033194, + "grad_norm": 1.5035390853881836, + "learning_rate": 1.5841457157516675e-06, + "loss": 0.0219, + "step": 4940 + }, + { + "epoch": 0.03174776242411804, + "grad_norm": 1.5644978284835815, + "learning_rate": 1.5873524884556184e-06, + "loss": 0.0296, + "step": 4950 + }, + { + "epoch": 0.03181189931790413, + "grad_norm": 0.9671863317489624, + "learning_rate": 1.5905592611595691e-06, + "loss": 0.0357, + "step": 4960 + }, + { + "epoch": 0.03187603621169023, + "grad_norm": 1.728955864906311, + "learning_rate": 1.59376603386352e-06, + "loss": 0.0257, + "step": 4970 + }, + { + "epoch": 0.03194017310547633, + "grad_norm": 0.9994457960128784, + "learning_rate": 1.5969728065674705e-06, + "loss": 0.0256, + "step": 4980 + }, + { + "epoch": 0.032004309999262424, + "grad_norm": 1.5000872611999512, + "learning_rate": 1.6001795792714214e-06, + "loss": 0.0213, + "step": 4990 + }, + { + "epoch": 0.032068446893048524, + "grad_norm": 1.2088123559951782, + "learning_rate": 1.6033863519753721e-06, + "loss": 0.0179, + "step": 5000 + }, + { + "epoch": 0.03213258378683462, + "grad_norm": 1.0315769910812378, + "learning_rate": 1.6065931246793228e-06, + "loss": 0.0192, + "step": 5010 + }, + { + "epoch": 0.03219672068062072, + "grad_norm": 1.391422986984253, + "learning_rate": 1.6097998973832735e-06, + "loss": 0.0285, + "step": 5020 + }, + { + "epoch": 0.03226085757440681, + "grad_norm": 1.2687987089157104, + "learning_rate": 1.6130066700872244e-06, + "loss": 0.0255, + "step": 5030 + }, + { + "epoch": 0.03232499446819291, + "grad_norm": 0.7762933373451233, + "learning_rate": 1.6162134427911753e-06, + "loss": 0.0217, + "step": 5040 + }, + { + "epoch": 0.03238913136197901, + "grad_norm": 1.1362630128860474, + "learning_rate": 1.6194202154951258e-06, + "loss": 0.0219, + "step": 5050 + }, + { + "epoch": 0.032453268255765104, + "grad_norm": 0.9996119141578674, + "learning_rate": 1.6226269881990767e-06, + "loss": 0.0227, + "step": 5060 + }, + { + "epoch": 0.032517405149551204, + "grad_norm": 1.7603073120117188, + "learning_rate": 1.6258337609030271e-06, + "loss": 0.0244, + "step": 5070 + }, + { + "epoch": 0.0325815420433373, + "grad_norm": 1.5313562154769897, + "learning_rate": 1.629040533606978e-06, + "loss": 0.0316, + "step": 5080 + }, + { + "epoch": 0.0326456789371234, + "grad_norm": 1.6863077878952026, + "learning_rate": 1.6322473063109287e-06, + "loss": 0.0243, + "step": 5090 + }, + { + "epoch": 0.03270981583090949, + "grad_norm": 0.6779791116714478, + "learning_rate": 1.6354540790148796e-06, + "loss": 0.0169, + "step": 5100 + }, + { + "epoch": 0.03277395272469559, + "grad_norm": 1.0107567310333252, + "learning_rate": 1.6386608517188305e-06, + "loss": 0.0222, + "step": 5110 + }, + { + "epoch": 0.03283808961848169, + "grad_norm": 1.782139778137207, + "learning_rate": 1.641867624422781e-06, + "loss": 0.0238, + "step": 5120 + }, + { + "epoch": 0.03290222651226778, + "grad_norm": 0.9885055422782898, + "learning_rate": 1.645074397126732e-06, + "loss": 0.0219, + "step": 5130 + }, + { + "epoch": 0.032966363406053883, + "grad_norm": 1.1917113065719604, + "learning_rate": 1.6482811698306824e-06, + "loss": 0.0196, + "step": 5140 + }, + { + "epoch": 0.03303050029983998, + "grad_norm": 1.1517614126205444, + "learning_rate": 1.6514879425346333e-06, + "loss": 0.0287, + "step": 5150 + }, + { + "epoch": 0.03309463719362608, + "grad_norm": 1.6611928939819336, + "learning_rate": 1.654694715238584e-06, + "loss": 0.0179, + "step": 5160 + }, + { + "epoch": 0.03315877408741217, + "grad_norm": 0.7962343096733093, + "learning_rate": 1.6579014879425349e-06, + "loss": 0.0241, + "step": 5170 + }, + { + "epoch": 0.03322291098119827, + "grad_norm": 1.4753403663635254, + "learning_rate": 1.6611082606464854e-06, + "loss": 0.0274, + "step": 5180 + }, + { + "epoch": 0.03328704787498437, + "grad_norm": 1.1891019344329834, + "learning_rate": 1.6643150333504363e-06, + "loss": 0.0172, + "step": 5190 + }, + { + "epoch": 0.03335118476877046, + "grad_norm": 1.0140347480773926, + "learning_rate": 1.6675218060543872e-06, + "loss": 0.0236, + "step": 5200 + }, + { + "epoch": 0.03341532166255656, + "grad_norm": 1.0874922275543213, + "learning_rate": 1.6707285787583376e-06, + "loss": 0.0189, + "step": 5210 + }, + { + "epoch": 0.033479458556342656, + "grad_norm": 1.6356042623519897, + "learning_rate": 1.6739353514622885e-06, + "loss": 0.0189, + "step": 5220 + }, + { + "epoch": 0.033543595450128756, + "grad_norm": 1.688215732574463, + "learning_rate": 1.6771421241662392e-06, + "loss": 0.025, + "step": 5230 + }, + { + "epoch": 0.03360773234391485, + "grad_norm": 1.1185798645019531, + "learning_rate": 1.6803488968701901e-06, + "loss": 0.0189, + "step": 5240 + }, + { + "epoch": 0.03367186923770095, + "grad_norm": 1.1338945627212524, + "learning_rate": 1.6835556695741406e-06, + "loss": 0.0224, + "step": 5250 + }, + { + "epoch": 0.03373600613148705, + "grad_norm": 1.0788078308105469, + "learning_rate": 1.6867624422780915e-06, + "loss": 0.023, + "step": 5260 + }, + { + "epoch": 0.03380014302527314, + "grad_norm": 1.0811059474945068, + "learning_rate": 1.689969214982042e-06, + "loss": 0.0236, + "step": 5270 + }, + { + "epoch": 0.03386427991905924, + "grad_norm": 0.7724621891975403, + "learning_rate": 1.693175987685993e-06, + "loss": 0.0196, + "step": 5280 + }, + { + "epoch": 0.033928416812845336, + "grad_norm": 0.8252857327461243, + "learning_rate": 1.6963827603899438e-06, + "loss": 0.0274, + "step": 5290 + }, + { + "epoch": 0.033992553706631436, + "grad_norm": 1.3175960779190063, + "learning_rate": 1.6995895330938945e-06, + "loss": 0.0184, + "step": 5300 + }, + { + "epoch": 0.03405669060041753, + "grad_norm": 1.2741248607635498, + "learning_rate": 1.7027963057978452e-06, + "loss": 0.0275, + "step": 5310 + }, + { + "epoch": 0.03412082749420363, + "grad_norm": 0.8108416795730591, + "learning_rate": 1.7060030785017959e-06, + "loss": 0.0129, + "step": 5320 + }, + { + "epoch": 0.03418496438798972, + "grad_norm": 1.051501989364624, + "learning_rate": 1.7092098512057468e-06, + "loss": 0.0199, + "step": 5330 + }, + { + "epoch": 0.03424910128177582, + "grad_norm": 1.7660843133926392, + "learning_rate": 1.7124166239096972e-06, + "loss": 0.0239, + "step": 5340 + }, + { + "epoch": 0.03431323817556192, + "grad_norm": 1.312746524810791, + "learning_rate": 1.7156233966136481e-06, + "loss": 0.0257, + "step": 5350 + }, + { + "epoch": 0.034377375069348015, + "grad_norm": 1.3452777862548828, + "learning_rate": 1.7188301693175988e-06, + "loss": 0.021, + "step": 5360 + }, + { + "epoch": 0.034441511963134115, + "grad_norm": 0.9213612079620361, + "learning_rate": 1.7220369420215497e-06, + "loss": 0.0184, + "step": 5370 + }, + { + "epoch": 0.03450564885692021, + "grad_norm": 1.207118272781372, + "learning_rate": 1.7252437147255004e-06, + "loss": 0.0214, + "step": 5380 + }, + { + "epoch": 0.03456978575070631, + "grad_norm": 0.9979588389396667, + "learning_rate": 1.7284504874294511e-06, + "loss": 0.0211, + "step": 5390 + }, + { + "epoch": 0.0346339226444924, + "grad_norm": 1.328615665435791, + "learning_rate": 1.731657260133402e-06, + "loss": 0.0302, + "step": 5400 + }, + { + "epoch": 0.0346980595382785, + "grad_norm": 1.2631783485412598, + "learning_rate": 1.7348640328373525e-06, + "loss": 0.0198, + "step": 5410 + }, + { + "epoch": 0.0347621964320646, + "grad_norm": 1.2121427059173584, + "learning_rate": 1.7380708055413034e-06, + "loss": 0.028, + "step": 5420 + }, + { + "epoch": 0.034826333325850695, + "grad_norm": 1.0202001333236694, + "learning_rate": 1.741277578245254e-06, + "loss": 0.0274, + "step": 5430 + }, + { + "epoch": 0.034890470219636795, + "grad_norm": 1.1604700088500977, + "learning_rate": 1.7444843509492048e-06, + "loss": 0.0269, + "step": 5440 + }, + { + "epoch": 0.03495460711342289, + "grad_norm": 0.8548974990844727, + "learning_rate": 1.7476911236531557e-06, + "loss": 0.0269, + "step": 5450 + }, + { + "epoch": 0.03501874400720899, + "grad_norm": 3.13950252532959, + "learning_rate": 1.7508978963571064e-06, + "loss": 0.016, + "step": 5460 + }, + { + "epoch": 0.03508288090099508, + "grad_norm": 1.0822423696517944, + "learning_rate": 1.7541046690610573e-06, + "loss": 0.0175, + "step": 5470 + }, + { + "epoch": 0.03514701779478118, + "grad_norm": 1.2122656106948853, + "learning_rate": 1.7573114417650077e-06, + "loss": 0.0189, + "step": 5480 + }, + { + "epoch": 0.03521115468856728, + "grad_norm": 1.1777442693710327, + "learning_rate": 1.7605182144689587e-06, + "loss": 0.018, + "step": 5490 + }, + { + "epoch": 0.035275291582353374, + "grad_norm": 1.0098204612731934, + "learning_rate": 1.7637249871729093e-06, + "loss": 0.0185, + "step": 5500 + }, + { + "epoch": 0.035339428476139474, + "grad_norm": 0.7142383456230164, + "learning_rate": 1.76693175987686e-06, + "loss": 0.0184, + "step": 5510 + }, + { + "epoch": 0.03540356536992557, + "grad_norm": 0.5789303183555603, + "learning_rate": 1.7701385325808107e-06, + "loss": 0.0119, + "step": 5520 + }, + { + "epoch": 0.03546770226371167, + "grad_norm": 1.0011595487594604, + "learning_rate": 1.7733453052847616e-06, + "loss": 0.0236, + "step": 5530 + }, + { + "epoch": 0.03553183915749776, + "grad_norm": 1.1528468132019043, + "learning_rate": 1.7765520779887125e-06, + "loss": 0.0287, + "step": 5540 + }, + { + "epoch": 0.03559597605128386, + "grad_norm": 1.0052666664123535, + "learning_rate": 1.779758850692663e-06, + "loss": 0.0201, + "step": 5550 + }, + { + "epoch": 0.03566011294506996, + "grad_norm": 1.2844178676605225, + "learning_rate": 1.782965623396614e-06, + "loss": 0.0259, + "step": 5560 + }, + { + "epoch": 0.035724249838856054, + "grad_norm": 0.8071389198303223, + "learning_rate": 1.7861723961005644e-06, + "loss": 0.0261, + "step": 5570 + }, + { + "epoch": 0.035788386732642154, + "grad_norm": 1.3172229528427124, + "learning_rate": 1.7893791688045153e-06, + "loss": 0.0193, + "step": 5580 + }, + { + "epoch": 0.03585252362642825, + "grad_norm": 0.5351840257644653, + "learning_rate": 1.792585941508466e-06, + "loss": 0.0221, + "step": 5590 + }, + { + "epoch": 0.03591666052021435, + "grad_norm": 1.4215500354766846, + "learning_rate": 1.7957927142124169e-06, + "loss": 0.0213, + "step": 5600 + }, + { + "epoch": 0.03598079741400044, + "grad_norm": 1.0777472257614136, + "learning_rate": 1.7989994869163673e-06, + "loss": 0.0207, + "step": 5610 + }, + { + "epoch": 0.03604493430778654, + "grad_norm": 2.632100820541382, + "learning_rate": 1.8022062596203183e-06, + "loss": 0.0265, + "step": 5620 + }, + { + "epoch": 0.03610907120157264, + "grad_norm": 1.2422752380371094, + "learning_rate": 1.8054130323242692e-06, + "loss": 0.0182, + "step": 5630 + }, + { + "epoch": 0.03617320809535873, + "grad_norm": 1.2604893445968628, + "learning_rate": 1.8086198050282196e-06, + "loss": 0.019, + "step": 5640 + }, + { + "epoch": 0.03623734498914483, + "grad_norm": 0.9792515635490417, + "learning_rate": 1.8118265777321705e-06, + "loss": 0.0235, + "step": 5650 + }, + { + "epoch": 0.036301481882930926, + "grad_norm": 0.6919666528701782, + "learning_rate": 1.8150333504361212e-06, + "loss": 0.0166, + "step": 5660 + }, + { + "epoch": 0.036365618776717026, + "grad_norm": 0.734473705291748, + "learning_rate": 1.8182401231400721e-06, + "loss": 0.0206, + "step": 5670 + }, + { + "epoch": 0.03642975567050312, + "grad_norm": 1.429874062538147, + "learning_rate": 1.8214468958440226e-06, + "loss": 0.0199, + "step": 5680 + }, + { + "epoch": 0.03649389256428922, + "grad_norm": 1.1574482917785645, + "learning_rate": 1.8246536685479735e-06, + "loss": 0.0174, + "step": 5690 + }, + { + "epoch": 0.03655802945807531, + "grad_norm": 0.7550815939903259, + "learning_rate": 1.827860441251924e-06, + "loss": 0.0229, + "step": 5700 + }, + { + "epoch": 0.03662216635186141, + "grad_norm": 0.7385497093200684, + "learning_rate": 1.8310672139558749e-06, + "loss": 0.0283, + "step": 5710 + }, + { + "epoch": 0.03668630324564751, + "grad_norm": 0.99532550573349, + "learning_rate": 1.8342739866598258e-06, + "loss": 0.0173, + "step": 5720 + }, + { + "epoch": 0.036750440139433606, + "grad_norm": 0.7917321920394897, + "learning_rate": 1.8374807593637765e-06, + "loss": 0.0208, + "step": 5730 + }, + { + "epoch": 0.036814577033219706, + "grad_norm": 1.0840858221054077, + "learning_rate": 1.8406875320677274e-06, + "loss": 0.0201, + "step": 5740 + }, + { + "epoch": 0.0368787139270058, + "grad_norm": 0.8415246605873108, + "learning_rate": 1.8438943047716779e-06, + "loss": 0.025, + "step": 5750 + }, + { + "epoch": 0.0369428508207919, + "grad_norm": 1.3072212934494019, + "learning_rate": 1.8471010774756288e-06, + "loss": 0.0176, + "step": 5760 + }, + { + "epoch": 0.03700698771457799, + "grad_norm": 1.1021171808242798, + "learning_rate": 1.8503078501795792e-06, + "loss": 0.0181, + "step": 5770 + }, + { + "epoch": 0.03707112460836409, + "grad_norm": 0.6983956098556519, + "learning_rate": 1.8535146228835301e-06, + "loss": 0.0235, + "step": 5780 + }, + { + "epoch": 0.03713526150215019, + "grad_norm": 1.2423068284988403, + "learning_rate": 1.856721395587481e-06, + "loss": 0.0182, + "step": 5790 + }, + { + "epoch": 0.037199398395936285, + "grad_norm": 2.747347354888916, + "learning_rate": 1.8599281682914317e-06, + "loss": 0.0209, + "step": 5800 + }, + { + "epoch": 0.037263535289722385, + "grad_norm": 1.6623913049697876, + "learning_rate": 1.8631349409953824e-06, + "loss": 0.02, + "step": 5810 + }, + { + "epoch": 0.03732767218350848, + "grad_norm": 0.9565622806549072, + "learning_rate": 1.866341713699333e-06, + "loss": 0.0154, + "step": 5820 + }, + { + "epoch": 0.03739180907729458, + "grad_norm": 0.966018795967102, + "learning_rate": 1.869548486403284e-06, + "loss": 0.021, + "step": 5830 + }, + { + "epoch": 0.03745594597108067, + "grad_norm": 1.0688713788986206, + "learning_rate": 1.8727552591072345e-06, + "loss": 0.0247, + "step": 5840 + }, + { + "epoch": 0.03752008286486677, + "grad_norm": 1.0856199264526367, + "learning_rate": 1.8759620318111854e-06, + "loss": 0.0235, + "step": 5850 + }, + { + "epoch": 0.03758421975865287, + "grad_norm": 1.232641339302063, + "learning_rate": 1.879168804515136e-06, + "loss": 0.0224, + "step": 5860 + }, + { + "epoch": 0.037648356652438965, + "grad_norm": 0.9010834097862244, + "learning_rate": 1.882375577219087e-06, + "loss": 0.0235, + "step": 5870 + }, + { + "epoch": 0.037712493546225065, + "grad_norm": 0.8574920296669006, + "learning_rate": 1.8855823499230377e-06, + "loss": 0.0229, + "step": 5880 + }, + { + "epoch": 0.03777663044001116, + "grad_norm": 0.9682068824768066, + "learning_rate": 1.8887891226269884e-06, + "loss": 0.0189, + "step": 5890 + }, + { + "epoch": 0.03784076733379726, + "grad_norm": 1.4444180727005005, + "learning_rate": 1.8919958953309393e-06, + "loss": 0.025, + "step": 5900 + }, + { + "epoch": 0.03790490422758335, + "grad_norm": 0.8763965964317322, + "learning_rate": 1.8952026680348897e-06, + "loss": 0.0216, + "step": 5910 + }, + { + "epoch": 0.03796904112136945, + "grad_norm": 1.5463361740112305, + "learning_rate": 1.8984094407388406e-06, + "loss": 0.0192, + "step": 5920 + }, + { + "epoch": 0.03803317801515555, + "grad_norm": 0.7160257697105408, + "learning_rate": 1.9016162134427913e-06, + "loss": 0.0173, + "step": 5930 + }, + { + "epoch": 0.038097314908941644, + "grad_norm": 2.2366437911987305, + "learning_rate": 1.904822986146742e-06, + "loss": 0.0241, + "step": 5940 + }, + { + "epoch": 0.038161451802727744, + "grad_norm": 1.0212844610214233, + "learning_rate": 1.9080297588506925e-06, + "loss": 0.0186, + "step": 5950 + }, + { + "epoch": 0.03822558869651384, + "grad_norm": 1.2753028869628906, + "learning_rate": 1.9112365315546436e-06, + "loss": 0.0189, + "step": 5960 + }, + { + "epoch": 0.03828972559029994, + "grad_norm": 1.4174270629882812, + "learning_rate": 1.9144433042585943e-06, + "loss": 0.0208, + "step": 5970 + }, + { + "epoch": 0.03835386248408603, + "grad_norm": 1.3357608318328857, + "learning_rate": 1.917650076962545e-06, + "loss": 0.0195, + "step": 5980 + }, + { + "epoch": 0.03841799937787213, + "grad_norm": 1.2511043548583984, + "learning_rate": 1.9208568496664957e-06, + "loss": 0.0167, + "step": 5990 + }, + { + "epoch": 0.03848213627165823, + "grad_norm": 0.9986559748649597, + "learning_rate": 1.9240636223704464e-06, + "loss": 0.0183, + "step": 6000 + }, + { + "epoch": 0.038546273165444324, + "grad_norm": 1.2135497331619263, + "learning_rate": 1.9272703950743975e-06, + "loss": 0.0202, + "step": 6010 + }, + { + "epoch": 0.038610410059230424, + "grad_norm": 0.6283160448074341, + "learning_rate": 1.9304771677783477e-06, + "loss": 0.0216, + "step": 6020 + }, + { + "epoch": 0.03867454695301652, + "grad_norm": 0.8352381587028503, + "learning_rate": 1.933683940482299e-06, + "loss": 0.0216, + "step": 6030 + }, + { + "epoch": 0.03873868384680262, + "grad_norm": 0.8876038789749146, + "learning_rate": 1.9368907131862495e-06, + "loss": 0.0202, + "step": 6040 + }, + { + "epoch": 0.03880282074058871, + "grad_norm": 1.5208381414413452, + "learning_rate": 1.9400974858902002e-06, + "loss": 0.0218, + "step": 6050 + }, + { + "epoch": 0.03886695763437481, + "grad_norm": 0.8314782977104187, + "learning_rate": 1.943304258594151e-06, + "loss": 0.0154, + "step": 6060 + }, + { + "epoch": 0.0389310945281609, + "grad_norm": 0.7073200345039368, + "learning_rate": 1.9465110312981016e-06, + "loss": 0.02, + "step": 6070 + }, + { + "epoch": 0.038995231421947, + "grad_norm": 0.969404935836792, + "learning_rate": 1.9497178040020527e-06, + "loss": 0.0219, + "step": 6080 + }, + { + "epoch": 0.0390593683157331, + "grad_norm": 1.1272072792053223, + "learning_rate": 1.952924576706003e-06, + "loss": 0.0236, + "step": 6090 + }, + { + "epoch": 0.039123505209519197, + "grad_norm": 1.5298181772232056, + "learning_rate": 1.956131349409954e-06, + "loss": 0.0248, + "step": 6100 + }, + { + "epoch": 0.0391876421033053, + "grad_norm": 1.1177719831466675, + "learning_rate": 1.959338122113905e-06, + "loss": 0.0212, + "step": 6110 + }, + { + "epoch": 0.03925177899709139, + "grad_norm": 1.3308947086334229, + "learning_rate": 1.9625448948178555e-06, + "loss": 0.0261, + "step": 6120 + }, + { + "epoch": 0.03931591589087749, + "grad_norm": 0.9389868974685669, + "learning_rate": 1.965751667521806e-06, + "loss": 0.0164, + "step": 6130 + }, + { + "epoch": 0.03938005278466358, + "grad_norm": 1.041259527206421, + "learning_rate": 1.968958440225757e-06, + "loss": 0.0268, + "step": 6140 + }, + { + "epoch": 0.03944418967844968, + "grad_norm": 0.7473919987678528, + "learning_rate": 1.972165212929708e-06, + "loss": 0.0268, + "step": 6150 + }, + { + "epoch": 0.03950832657223578, + "grad_norm": 1.006229043006897, + "learning_rate": 1.9753719856336582e-06, + "loss": 0.0238, + "step": 6160 + }, + { + "epoch": 0.039572463466021876, + "grad_norm": 1.1355164051055908, + "learning_rate": 1.9785787583376094e-06, + "loss": 0.0203, + "step": 6170 + }, + { + "epoch": 0.039636600359807976, + "grad_norm": 1.5693013668060303, + "learning_rate": 1.98178553104156e-06, + "loss": 0.0193, + "step": 6180 + }, + { + "epoch": 0.03970073725359407, + "grad_norm": 1.2391225099563599, + "learning_rate": 1.9849923037455107e-06, + "loss": 0.0234, + "step": 6190 + }, + { + "epoch": 0.03976487414738017, + "grad_norm": 1.2182879447937012, + "learning_rate": 1.9881990764494614e-06, + "loss": 0.0211, + "step": 6200 + }, + { + "epoch": 0.03982901104116626, + "grad_norm": 1.1928573846817017, + "learning_rate": 1.991405849153412e-06, + "loss": 0.0235, + "step": 6210 + }, + { + "epoch": 0.03989314793495236, + "grad_norm": 1.5095957517623901, + "learning_rate": 1.9946126218573632e-06, + "loss": 0.0201, + "step": 6220 + }, + { + "epoch": 0.03995728482873846, + "grad_norm": 1.1891467571258545, + "learning_rate": 1.9978193945613135e-06, + "loss": 0.022, + "step": 6230 + }, + { + "epoch": 0.040021421722524556, + "grad_norm": 1.2522825002670288, + "learning_rate": 2.0010261672652646e-06, + "loss": 0.025, + "step": 6240 + }, + { + "epoch": 0.040085558616310656, + "grad_norm": 1.1242830753326416, + "learning_rate": 2.0042329399692153e-06, + "loss": 0.0231, + "step": 6250 + }, + { + "epoch": 0.04014969551009675, + "grad_norm": 1.4165798425674438, + "learning_rate": 2.007439712673166e-06, + "loss": 0.0193, + "step": 6260 + }, + { + "epoch": 0.04021383240388285, + "grad_norm": 1.4682064056396484, + "learning_rate": 2.0106464853771167e-06, + "loss": 0.0254, + "step": 6270 + }, + { + "epoch": 0.04027796929766894, + "grad_norm": 0.9028723835945129, + "learning_rate": 2.0138532580810674e-06, + "loss": 0.0229, + "step": 6280 + }, + { + "epoch": 0.04034210619145504, + "grad_norm": 0.7347221374511719, + "learning_rate": 2.017060030785018e-06, + "loss": 0.0204, + "step": 6290 + }, + { + "epoch": 0.04040624308524114, + "grad_norm": 1.0344412326812744, + "learning_rate": 2.0202668034889687e-06, + "loss": 0.0249, + "step": 6300 + }, + { + "epoch": 0.040470379979027235, + "grad_norm": 1.3016290664672852, + "learning_rate": 2.02347357619292e-06, + "loss": 0.0179, + "step": 6310 + }, + { + "epoch": 0.040534516872813335, + "grad_norm": 1.060326337814331, + "learning_rate": 2.02668034889687e-06, + "loss": 0.0171, + "step": 6320 + }, + { + "epoch": 0.04059865376659943, + "grad_norm": 1.2995046377182007, + "learning_rate": 2.0298871216008212e-06, + "loss": 0.0211, + "step": 6330 + }, + { + "epoch": 0.04066279066038553, + "grad_norm": 1.4574737548828125, + "learning_rate": 2.033093894304772e-06, + "loss": 0.0196, + "step": 6340 + }, + { + "epoch": 0.04072692755417162, + "grad_norm": 1.034865379333496, + "learning_rate": 2.0363006670087226e-06, + "loss": 0.0203, + "step": 6350 + }, + { + "epoch": 0.04079106444795772, + "grad_norm": 1.1399013996124268, + "learning_rate": 2.0395074397126733e-06, + "loss": 0.0237, + "step": 6360 + }, + { + "epoch": 0.04085520134174382, + "grad_norm": 1.309984803199768, + "learning_rate": 2.042714212416624e-06, + "loss": 0.0205, + "step": 6370 + }, + { + "epoch": 0.040919338235529915, + "grad_norm": 0.6266277432441711, + "learning_rate": 2.0459209851205747e-06, + "loss": 0.0134, + "step": 6380 + }, + { + "epoch": 0.040983475129316015, + "grad_norm": 0.8712389469146729, + "learning_rate": 2.0491277578245254e-06, + "loss": 0.021, + "step": 6390 + }, + { + "epoch": 0.04104761202310211, + "grad_norm": 0.8083272576332092, + "learning_rate": 2.0523345305284765e-06, + "loss": 0.0185, + "step": 6400 + }, + { + "epoch": 0.04111174891688821, + "grad_norm": 0.7754104733467102, + "learning_rate": 2.055541303232427e-06, + "loss": 0.0282, + "step": 6410 + }, + { + "epoch": 0.0411758858106743, + "grad_norm": 0.6917300224304199, + "learning_rate": 2.058748075936378e-06, + "loss": 0.0195, + "step": 6420 + }, + { + "epoch": 0.0412400227044604, + "grad_norm": 1.2205440998077393, + "learning_rate": 2.0619548486403286e-06, + "loss": 0.0155, + "step": 6430 + }, + { + "epoch": 0.041304159598246494, + "grad_norm": 1.1273776292800903, + "learning_rate": 2.0651616213442792e-06, + "loss": 0.0199, + "step": 6440 + }, + { + "epoch": 0.041368296492032594, + "grad_norm": 1.1309514045715332, + "learning_rate": 2.06836839404823e-06, + "loss": 0.0207, + "step": 6450 + }, + { + "epoch": 0.041432433385818694, + "grad_norm": 1.0427113771438599, + "learning_rate": 2.0715751667521806e-06, + "loss": 0.0162, + "step": 6460 + }, + { + "epoch": 0.04149657027960479, + "grad_norm": 1.0652873516082764, + "learning_rate": 2.0747819394561317e-06, + "loss": 0.0195, + "step": 6470 + }, + { + "epoch": 0.04156070717339089, + "grad_norm": 1.2061506509780884, + "learning_rate": 2.0779887121600824e-06, + "loss": 0.0219, + "step": 6480 + }, + { + "epoch": 0.04162484406717698, + "grad_norm": 1.1749380826950073, + "learning_rate": 2.081195484864033e-06, + "loss": 0.0235, + "step": 6490 + }, + { + "epoch": 0.04168898096096308, + "grad_norm": 0.9566291570663452, + "learning_rate": 2.084402257567984e-06, + "loss": 0.0186, + "step": 6500 + }, + { + "epoch": 0.041753117854749174, + "grad_norm": 1.0603986978530884, + "learning_rate": 2.0876090302719345e-06, + "loss": 0.0205, + "step": 6510 + }, + { + "epoch": 0.041817254748535274, + "grad_norm": 1.050813913345337, + "learning_rate": 2.090815802975885e-06, + "loss": 0.0223, + "step": 6520 + }, + { + "epoch": 0.041881391642321374, + "grad_norm": 0.7652897238731384, + "learning_rate": 2.094022575679836e-06, + "loss": 0.0172, + "step": 6530 + }, + { + "epoch": 0.04194552853610747, + "grad_norm": 0.9831271171569824, + "learning_rate": 2.0972293483837866e-06, + "loss": 0.0229, + "step": 6540 + }, + { + "epoch": 0.04200966542989357, + "grad_norm": 0.8050175309181213, + "learning_rate": 2.1004361210877377e-06, + "loss": 0.017, + "step": 6550 + }, + { + "epoch": 0.04207380232367966, + "grad_norm": 1.143938660621643, + "learning_rate": 2.1036428937916884e-06, + "loss": 0.0158, + "step": 6560 + }, + { + "epoch": 0.04213793921746576, + "grad_norm": 0.8601158857345581, + "learning_rate": 2.106849666495639e-06, + "loss": 0.0229, + "step": 6570 + }, + { + "epoch": 0.04220207611125185, + "grad_norm": 0.7796095013618469, + "learning_rate": 2.1100564391995897e-06, + "loss": 0.0211, + "step": 6580 + }, + { + "epoch": 0.04226621300503795, + "grad_norm": 0.9684072732925415, + "learning_rate": 2.1132632119035404e-06, + "loss": 0.0188, + "step": 6590 + }, + { + "epoch": 0.04233034989882405, + "grad_norm": 0.705844521522522, + "learning_rate": 2.116469984607491e-06, + "loss": 0.0204, + "step": 6600 + }, + { + "epoch": 0.042394486792610146, + "grad_norm": 1.1430692672729492, + "learning_rate": 2.119676757311442e-06, + "loss": 0.0183, + "step": 6610 + }, + { + "epoch": 0.042458623686396246, + "grad_norm": 0.8286201357841492, + "learning_rate": 2.1228835300153925e-06, + "loss": 0.0205, + "step": 6620 + }, + { + "epoch": 0.04252276058018234, + "grad_norm": 1.245947241783142, + "learning_rate": 2.126090302719343e-06, + "loss": 0.0303, + "step": 6630 + }, + { + "epoch": 0.04258689747396844, + "grad_norm": 2.2712762355804443, + "learning_rate": 2.1292970754232943e-06, + "loss": 0.0269, + "step": 6640 + }, + { + "epoch": 0.04265103436775453, + "grad_norm": 0.8857012987136841, + "learning_rate": 2.132503848127245e-06, + "loss": 0.0182, + "step": 6650 + }, + { + "epoch": 0.04271517126154063, + "grad_norm": 1.162306547164917, + "learning_rate": 2.1357106208311957e-06, + "loss": 0.022, + "step": 6660 + }, + { + "epoch": 0.04277930815532673, + "grad_norm": 0.4371137022972107, + "learning_rate": 2.1389173935351464e-06, + "loss": 0.0205, + "step": 6670 + }, + { + "epoch": 0.042843445049112826, + "grad_norm": 0.761978805065155, + "learning_rate": 2.142124166239097e-06, + "loss": 0.0203, + "step": 6680 + }, + { + "epoch": 0.042907581942898926, + "grad_norm": 0.7236703038215637, + "learning_rate": 2.1453309389430478e-06, + "loss": 0.0199, + "step": 6690 + }, + { + "epoch": 0.04297171883668502, + "grad_norm": 0.8764296770095825, + "learning_rate": 2.1485377116469984e-06, + "loss": 0.02, + "step": 6700 + }, + { + "epoch": 0.04303585573047112, + "grad_norm": 0.6730541586875916, + "learning_rate": 2.1517444843509496e-06, + "loss": 0.0194, + "step": 6710 + }, + { + "epoch": 0.04309999262425721, + "grad_norm": 1.8022714853286743, + "learning_rate": 2.1549512570549e-06, + "loss": 0.019, + "step": 6720 + }, + { + "epoch": 0.04316412951804331, + "grad_norm": 0.8802977800369263, + "learning_rate": 2.158158029758851e-06, + "loss": 0.0149, + "step": 6730 + }, + { + "epoch": 0.04322826641182941, + "grad_norm": 1.2370784282684326, + "learning_rate": 2.1613648024628016e-06, + "loss": 0.0172, + "step": 6740 + }, + { + "epoch": 0.043292403305615505, + "grad_norm": 1.1011195182800293, + "learning_rate": 2.1645715751667523e-06, + "loss": 0.0253, + "step": 6750 + }, + { + "epoch": 0.043356540199401605, + "grad_norm": 0.8227287530899048, + "learning_rate": 2.167778347870703e-06, + "loss": 0.0185, + "step": 6760 + }, + { + "epoch": 0.0434206770931877, + "grad_norm": 0.6026843190193176, + "learning_rate": 2.1709851205746537e-06, + "loss": 0.0148, + "step": 6770 + }, + { + "epoch": 0.0434848139869738, + "grad_norm": 1.1184115409851074, + "learning_rate": 2.174191893278605e-06, + "loss": 0.0178, + "step": 6780 + }, + { + "epoch": 0.04354895088075989, + "grad_norm": 0.9370375275611877, + "learning_rate": 2.177398665982555e-06, + "loss": 0.0209, + "step": 6790 + }, + { + "epoch": 0.04361308777454599, + "grad_norm": 1.184848427772522, + "learning_rate": 2.180605438686506e-06, + "loss": 0.024, + "step": 6800 + }, + { + "epoch": 0.043677224668332085, + "grad_norm": 0.7680546045303345, + "learning_rate": 2.183812211390457e-06, + "loss": 0.0112, + "step": 6810 + }, + { + "epoch": 0.043741361562118185, + "grad_norm": 1.324097990989685, + "learning_rate": 2.1870189840944076e-06, + "loss": 0.0226, + "step": 6820 + }, + { + "epoch": 0.043805498455904285, + "grad_norm": 0.9823472499847412, + "learning_rate": 2.1902257567983583e-06, + "loss": 0.025, + "step": 6830 + }, + { + "epoch": 0.04386963534969038, + "grad_norm": 1.1413768529891968, + "learning_rate": 2.193432529502309e-06, + "loss": 0.0247, + "step": 6840 + }, + { + "epoch": 0.04393377224347648, + "grad_norm": 1.236708402633667, + "learning_rate": 2.19663930220626e-06, + "loss": 0.0223, + "step": 6850 + }, + { + "epoch": 0.04399790913726257, + "grad_norm": 0.8317570090293884, + "learning_rate": 2.1998460749102103e-06, + "loss": 0.0145, + "step": 6860 + }, + { + "epoch": 0.04406204603104867, + "grad_norm": 1.6575742959976196, + "learning_rate": 2.2030528476141614e-06, + "loss": 0.0226, + "step": 6870 + }, + { + "epoch": 0.044126182924834764, + "grad_norm": 0.9479625821113586, + "learning_rate": 2.206259620318112e-06, + "loss": 0.0168, + "step": 6880 + }, + { + "epoch": 0.044190319818620864, + "grad_norm": 0.414814293384552, + "learning_rate": 2.209466393022063e-06, + "loss": 0.0202, + "step": 6890 + }, + { + "epoch": 0.044254456712406964, + "grad_norm": 1.0208877325057983, + "learning_rate": 2.2126731657260135e-06, + "loss": 0.0172, + "step": 6900 + }, + { + "epoch": 0.04431859360619306, + "grad_norm": 0.67899489402771, + "learning_rate": 2.215879938429964e-06, + "loss": 0.0195, + "step": 6910 + }, + { + "epoch": 0.04438273049997916, + "grad_norm": 1.2453488111495972, + "learning_rate": 2.2190867111339153e-06, + "loss": 0.0213, + "step": 6920 + }, + { + "epoch": 0.04444686739376525, + "grad_norm": 1.481083631515503, + "learning_rate": 2.2222934838378656e-06, + "loss": 0.0172, + "step": 6930 + }, + { + "epoch": 0.04451100428755135, + "grad_norm": 0.8547831177711487, + "learning_rate": 2.2255002565418167e-06, + "loss": 0.0157, + "step": 6940 + }, + { + "epoch": 0.044575141181337444, + "grad_norm": 1.2726802825927734, + "learning_rate": 2.228707029245767e-06, + "loss": 0.0189, + "step": 6950 + }, + { + "epoch": 0.044639278075123544, + "grad_norm": 0.828345000743866, + "learning_rate": 2.231913801949718e-06, + "loss": 0.0186, + "step": 6960 + }, + { + "epoch": 0.044703414968909644, + "grad_norm": 0.7094138264656067, + "learning_rate": 2.2351205746536688e-06, + "loss": 0.0208, + "step": 6970 + }, + { + "epoch": 0.04476755186269574, + "grad_norm": 0.6570111513137817, + "learning_rate": 2.2383273473576194e-06, + "loss": 0.0172, + "step": 6980 + }, + { + "epoch": 0.04483168875648184, + "grad_norm": 1.5432921648025513, + "learning_rate": 2.24153412006157e-06, + "loss": 0.0245, + "step": 6990 + }, + { + "epoch": 0.04489582565026793, + "grad_norm": 0.7850481271743774, + "learning_rate": 2.244740892765521e-06, + "loss": 0.0207, + "step": 7000 + }, + { + "epoch": 0.04495996254405403, + "grad_norm": 0.7977728247642517, + "learning_rate": 2.247947665469472e-06, + "loss": 0.0119, + "step": 7010 + }, + { + "epoch": 0.04502409943784012, + "grad_norm": 1.1667042970657349, + "learning_rate": 2.251154438173422e-06, + "loss": 0.015, + "step": 7020 + }, + { + "epoch": 0.04508823633162622, + "grad_norm": 1.0411245822906494, + "learning_rate": 2.2543612108773733e-06, + "loss": 0.0194, + "step": 7030 + }, + { + "epoch": 0.04515237322541232, + "grad_norm": 1.2678431272506714, + "learning_rate": 2.257567983581324e-06, + "loss": 0.0217, + "step": 7040 + }, + { + "epoch": 0.045216510119198416, + "grad_norm": 1.2046818733215332, + "learning_rate": 2.2607747562852747e-06, + "loss": 0.0197, + "step": 7050 + }, + { + "epoch": 0.045280647012984516, + "grad_norm": 1.0384752750396729, + "learning_rate": 2.2639815289892254e-06, + "loss": 0.019, + "step": 7060 + }, + { + "epoch": 0.04534478390677061, + "grad_norm": 0.8046727776527405, + "learning_rate": 2.267188301693176e-06, + "loss": 0.0198, + "step": 7070 + }, + { + "epoch": 0.04540892080055671, + "grad_norm": 1.2377238273620605, + "learning_rate": 2.270395074397127e-06, + "loss": 0.0196, + "step": 7080 + }, + { + "epoch": 0.0454730576943428, + "grad_norm": 1.2584959268569946, + "learning_rate": 2.2736018471010775e-06, + "loss": 0.0176, + "step": 7090 + }, + { + "epoch": 0.0455371945881289, + "grad_norm": 0.8299155831336975, + "learning_rate": 2.2768086198050286e-06, + "loss": 0.0201, + "step": 7100 + }, + { + "epoch": 0.045601331481914996, + "grad_norm": 0.7531854510307312, + "learning_rate": 2.2800153925089793e-06, + "loss": 0.0155, + "step": 7110 + }, + { + "epoch": 0.045665468375701096, + "grad_norm": 0.6106268763542175, + "learning_rate": 2.28322216521293e-06, + "loss": 0.0162, + "step": 7120 + }, + { + "epoch": 0.045729605269487196, + "grad_norm": 1.0959511995315552, + "learning_rate": 2.2864289379168806e-06, + "loss": 0.0154, + "step": 7130 + }, + { + "epoch": 0.04579374216327329, + "grad_norm": 1.0702574253082275, + "learning_rate": 2.2896357106208313e-06, + "loss": 0.0205, + "step": 7140 + }, + { + "epoch": 0.04585787905705939, + "grad_norm": 0.9860323667526245, + "learning_rate": 2.2928424833247824e-06, + "loss": 0.019, + "step": 7150 + }, + { + "epoch": 0.04592201595084548, + "grad_norm": 0.8199664950370789, + "learning_rate": 2.2960492560287327e-06, + "loss": 0.017, + "step": 7160 + }, + { + "epoch": 0.04598615284463158, + "grad_norm": 0.6780887842178345, + "learning_rate": 2.299256028732684e-06, + "loss": 0.0198, + "step": 7170 + }, + { + "epoch": 0.046050289738417675, + "grad_norm": 0.9349753260612488, + "learning_rate": 2.3024628014366345e-06, + "loss": 0.0147, + "step": 7180 + }, + { + "epoch": 0.046114426632203775, + "grad_norm": 0.8048481941223145, + "learning_rate": 2.305669574140585e-06, + "loss": 0.0195, + "step": 7190 + }, + { + "epoch": 0.046178563525989876, + "grad_norm": 0.797756016254425, + "learning_rate": 2.308876346844536e-06, + "loss": 0.0211, + "step": 7200 + }, + { + "epoch": 0.04624270041977597, + "grad_norm": 1.1557680368423462, + "learning_rate": 2.3120831195484866e-06, + "loss": 0.0184, + "step": 7210 + }, + { + "epoch": 0.04630683731356207, + "grad_norm": 1.015023946762085, + "learning_rate": 2.3152898922524373e-06, + "loss": 0.02, + "step": 7220 + }, + { + "epoch": 0.04637097420734816, + "grad_norm": 0.8543024063110352, + "learning_rate": 2.318496664956388e-06, + "loss": 0.0208, + "step": 7230 + }, + { + "epoch": 0.04643511110113426, + "grad_norm": 1.2135494947433472, + "learning_rate": 2.321703437660339e-06, + "loss": 0.0171, + "step": 7240 + }, + { + "epoch": 0.046499247994920355, + "grad_norm": 1.1175837516784668, + "learning_rate": 2.3249102103642893e-06, + "loss": 0.018, + "step": 7250 + }, + { + "epoch": 0.046563384888706455, + "grad_norm": 0.9123100638389587, + "learning_rate": 2.3281169830682404e-06, + "loss": 0.0145, + "step": 7260 + }, + { + "epoch": 0.046627521782492555, + "grad_norm": 0.9876992702484131, + "learning_rate": 2.331323755772191e-06, + "loss": 0.0151, + "step": 7270 + }, + { + "epoch": 0.04669165867627865, + "grad_norm": 1.081762671470642, + "learning_rate": 2.334530528476142e-06, + "loss": 0.0172, + "step": 7280 + }, + { + "epoch": 0.04675579557006475, + "grad_norm": 1.0402169227600098, + "learning_rate": 2.3377373011800925e-06, + "loss": 0.0153, + "step": 7290 + }, + { + "epoch": 0.04681993246385084, + "grad_norm": 0.9691343307495117, + "learning_rate": 2.340944073884043e-06, + "loss": 0.0135, + "step": 7300 + }, + { + "epoch": 0.04688406935763694, + "grad_norm": 0.9390423893928528, + "learning_rate": 2.344150846587994e-06, + "loss": 0.0135, + "step": 7310 + }, + { + "epoch": 0.046948206251423034, + "grad_norm": 0.5622203946113586, + "learning_rate": 2.3473576192919446e-06, + "loss": 0.0112, + "step": 7320 + }, + { + "epoch": 0.047012343145209134, + "grad_norm": 0.5227202773094177, + "learning_rate": 2.3505643919958957e-06, + "loss": 0.0112, + "step": 7330 + }, + { + "epoch": 0.047076480038995235, + "grad_norm": 0.6646885275840759, + "learning_rate": 2.3537711646998464e-06, + "loss": 0.0171, + "step": 7340 + }, + { + "epoch": 0.04714061693278133, + "grad_norm": 0.8758254051208496, + "learning_rate": 2.356977937403797e-06, + "loss": 0.0159, + "step": 7350 + }, + { + "epoch": 0.04720475382656743, + "grad_norm": 1.2100036144256592, + "learning_rate": 2.3601847101077478e-06, + "loss": 0.016, + "step": 7360 + }, + { + "epoch": 0.04726889072035352, + "grad_norm": 0.970321774482727, + "learning_rate": 2.3633914828116985e-06, + "loss": 0.0196, + "step": 7370 + }, + { + "epoch": 0.04733302761413962, + "grad_norm": 0.9241225719451904, + "learning_rate": 2.366598255515649e-06, + "loss": 0.0155, + "step": 7380 + }, + { + "epoch": 0.047397164507925714, + "grad_norm": 1.0202387571334839, + "learning_rate": 2.3698050282196e-06, + "loss": 0.0138, + "step": 7390 + }, + { + "epoch": 0.047461301401711814, + "grad_norm": 1.1858984231948853, + "learning_rate": 2.3730118009235505e-06, + "loss": 0.0143, + "step": 7400 + }, + { + "epoch": 0.047525438295497914, + "grad_norm": 0.733315646648407, + "learning_rate": 2.3762185736275016e-06, + "loss": 0.018, + "step": 7410 + }, + { + "epoch": 0.04758957518928401, + "grad_norm": 1.1660308837890625, + "learning_rate": 2.3794253463314523e-06, + "loss": 0.0162, + "step": 7420 + }, + { + "epoch": 0.04765371208307011, + "grad_norm": 0.8735769987106323, + "learning_rate": 2.382632119035403e-06, + "loss": 0.0196, + "step": 7430 + }, + { + "epoch": 0.0477178489768562, + "grad_norm": 1.218178629875183, + "learning_rate": 2.3858388917393537e-06, + "loss": 0.0167, + "step": 7440 + }, + { + "epoch": 0.0477819858706423, + "grad_norm": 1.3256758451461792, + "learning_rate": 2.3890456644433044e-06, + "loss": 0.013, + "step": 7450 + }, + { + "epoch": 0.04784612276442839, + "grad_norm": 1.5111521482467651, + "learning_rate": 2.392252437147255e-06, + "loss": 0.0195, + "step": 7460 + }, + { + "epoch": 0.047910259658214494, + "grad_norm": 0.35723578929901123, + "learning_rate": 2.3954592098512058e-06, + "loss": 0.0157, + "step": 7470 + }, + { + "epoch": 0.04797439655200059, + "grad_norm": 0.7256841063499451, + "learning_rate": 2.398665982555157e-06, + "loss": 0.0187, + "step": 7480 + }, + { + "epoch": 0.04803853344578669, + "grad_norm": 0.9517735242843628, + "learning_rate": 2.4018727552591076e-06, + "loss": 0.0141, + "step": 7490 + }, + { + "epoch": 0.04810267033957279, + "grad_norm": 0.8512372970581055, + "learning_rate": 2.4050795279630583e-06, + "loss": 0.0224, + "step": 7500 + }, + { + "epoch": 0.04816680723335888, + "grad_norm": 1.0928106307983398, + "learning_rate": 2.408286300667009e-06, + "loss": 0.0144, + "step": 7510 + }, + { + "epoch": 0.04823094412714498, + "grad_norm": 0.7627331614494324, + "learning_rate": 2.4114930733709596e-06, + "loss": 0.0183, + "step": 7520 + }, + { + "epoch": 0.04829508102093107, + "grad_norm": 0.8778024315834045, + "learning_rate": 2.4146998460749103e-06, + "loss": 0.0182, + "step": 7530 + }, + { + "epoch": 0.04835921791471717, + "grad_norm": 0.993202805519104, + "learning_rate": 2.417906618778861e-06, + "loss": 0.0145, + "step": 7540 + }, + { + "epoch": 0.048423354808503266, + "grad_norm": 0.9249763488769531, + "learning_rate": 2.421113391482812e-06, + "loss": 0.0183, + "step": 7550 + }, + { + "epoch": 0.048487491702289366, + "grad_norm": 0.865367591381073, + "learning_rate": 2.4243201641867624e-06, + "loss": 0.0219, + "step": 7560 + }, + { + "epoch": 0.048551628596075466, + "grad_norm": 0.7539327144622803, + "learning_rate": 2.4275269368907135e-06, + "loss": 0.0181, + "step": 7570 + }, + { + "epoch": 0.04861576548986156, + "grad_norm": 0.7270578742027283, + "learning_rate": 2.430733709594664e-06, + "loss": 0.0147, + "step": 7580 + }, + { + "epoch": 0.04867990238364766, + "grad_norm": 0.9653283357620239, + "learning_rate": 2.433940482298615e-06, + "loss": 0.021, + "step": 7590 + }, + { + "epoch": 0.04874403927743375, + "grad_norm": 0.9090064764022827, + "learning_rate": 2.4371472550025656e-06, + "loss": 0.0199, + "step": 7600 + }, + { + "epoch": 0.04880817617121985, + "grad_norm": 0.9785999655723572, + "learning_rate": 2.4403540277065163e-06, + "loss": 0.0127, + "step": 7610 + }, + { + "epoch": 0.048872313065005946, + "grad_norm": 0.8310036063194275, + "learning_rate": 2.443560800410467e-06, + "loss": 0.0118, + "step": 7620 + }, + { + "epoch": 0.048936449958792046, + "grad_norm": 0.9784269332885742, + "learning_rate": 2.4467675731144177e-06, + "loss": 0.0148, + "step": 7630 + }, + { + "epoch": 0.049000586852578146, + "grad_norm": 0.8932363986968994, + "learning_rate": 2.4499743458183688e-06, + "loss": 0.0193, + "step": 7640 + }, + { + "epoch": 0.04906472374636424, + "grad_norm": 0.9446199536323547, + "learning_rate": 2.453181118522319e-06, + "loss": 0.0151, + "step": 7650 + }, + { + "epoch": 0.04912886064015034, + "grad_norm": 1.0221552848815918, + "learning_rate": 2.45638789122627e-06, + "loss": 0.0181, + "step": 7660 + }, + { + "epoch": 0.04919299753393643, + "grad_norm": 0.6242429614067078, + "learning_rate": 2.459594663930221e-06, + "loss": 0.0144, + "step": 7670 + }, + { + "epoch": 0.04925713442772253, + "grad_norm": 1.2173144817352295, + "learning_rate": 2.4628014366341715e-06, + "loss": 0.0122, + "step": 7680 + }, + { + "epoch": 0.049321271321508625, + "grad_norm": 0.6649882197380066, + "learning_rate": 2.4660082093381222e-06, + "loss": 0.0148, + "step": 7690 + }, + { + "epoch": 0.049385408215294725, + "grad_norm": 1.4457169771194458, + "learning_rate": 2.469214982042073e-06, + "loss": 0.0186, + "step": 7700 + }, + { + "epoch": 0.049449545109080825, + "grad_norm": 0.8608556389808655, + "learning_rate": 2.472421754746024e-06, + "loss": 0.0184, + "step": 7710 + }, + { + "epoch": 0.04951368200286692, + "grad_norm": 1.0127040147781372, + "learning_rate": 2.4756285274499743e-06, + "loss": 0.0193, + "step": 7720 + }, + { + "epoch": 0.04957781889665302, + "grad_norm": 1.072650671005249, + "learning_rate": 2.4788353001539254e-06, + "loss": 0.0184, + "step": 7730 + }, + { + "epoch": 0.04964195579043911, + "grad_norm": 1.0660983324050903, + "learning_rate": 2.482042072857876e-06, + "loss": 0.0184, + "step": 7740 + }, + { + "epoch": 0.04970609268422521, + "grad_norm": 0.8504105806350708, + "learning_rate": 2.4852488455618268e-06, + "loss": 0.0156, + "step": 7750 + }, + { + "epoch": 0.049770229578011305, + "grad_norm": 0.5149958729743958, + "learning_rate": 2.4884556182657775e-06, + "loss": 0.0155, + "step": 7760 + }, + { + "epoch": 0.049834366471797405, + "grad_norm": 1.1049935817718506, + "learning_rate": 2.491662390969728e-06, + "loss": 0.0188, + "step": 7770 + }, + { + "epoch": 0.049898503365583505, + "grad_norm": 1.0066155195236206, + "learning_rate": 2.4948691636736793e-06, + "loss": 0.0151, + "step": 7780 + }, + { + "epoch": 0.0499626402593696, + "grad_norm": 0.8193107843399048, + "learning_rate": 2.4980759363776295e-06, + "loss": 0.0154, + "step": 7790 + }, + { + "epoch": 0.0500267771531557, + "grad_norm": 1.3258004188537598, + "learning_rate": 2.5012827090815806e-06, + "loss": 0.0194, + "step": 7800 + }, + { + "epoch": 0.05009091404694179, + "grad_norm": 1.3659716844558716, + "learning_rate": 2.5044894817855313e-06, + "loss": 0.0195, + "step": 7810 + }, + { + "epoch": 0.05015505094072789, + "grad_norm": 0.598633885383606, + "learning_rate": 2.5076962544894816e-06, + "loss": 0.0164, + "step": 7820 + }, + { + "epoch": 0.050219187834513984, + "grad_norm": 0.8737356662750244, + "learning_rate": 2.5109030271934327e-06, + "loss": 0.0114, + "step": 7830 + }, + { + "epoch": 0.050283324728300084, + "grad_norm": 1.0397603511810303, + "learning_rate": 2.5141097998973834e-06, + "loss": 0.0151, + "step": 7840 + }, + { + "epoch": 0.05034746162208618, + "grad_norm": 0.946444571018219, + "learning_rate": 2.5173165726013345e-06, + "loss": 0.02, + "step": 7850 + }, + { + "epoch": 0.05041159851587228, + "grad_norm": 0.6684794425964355, + "learning_rate": 2.520523345305285e-06, + "loss": 0.0155, + "step": 7860 + }, + { + "epoch": 0.05047573540965838, + "grad_norm": 1.3526276350021362, + "learning_rate": 2.5237301180092355e-06, + "loss": 0.015, + "step": 7870 + }, + { + "epoch": 0.05053987230344447, + "grad_norm": 1.45792555809021, + "learning_rate": 2.526936890713186e-06, + "loss": 0.0116, + "step": 7880 + }, + { + "epoch": 0.05060400919723057, + "grad_norm": 0.5818025469779968, + "learning_rate": 2.5301436634171373e-06, + "loss": 0.0152, + "step": 7890 + }, + { + "epoch": 0.050668146091016664, + "grad_norm": 1.4632349014282227, + "learning_rate": 2.533350436121088e-06, + "loss": 0.0138, + "step": 7900 + }, + { + "epoch": 0.050732282984802764, + "grad_norm": 1.1218039989471436, + "learning_rate": 2.5365572088250382e-06, + "loss": 0.0145, + "step": 7910 + }, + { + "epoch": 0.05079641987858886, + "grad_norm": 1.6577773094177246, + "learning_rate": 2.5397639815289893e-06, + "loss": 0.0165, + "step": 7920 + }, + { + "epoch": 0.05086055677237496, + "grad_norm": 1.0832455158233643, + "learning_rate": 2.54297075423294e-06, + "loss": 0.015, + "step": 7930 + }, + { + "epoch": 0.05092469366616106, + "grad_norm": 0.9392962455749512, + "learning_rate": 2.546177526936891e-06, + "loss": 0.0275, + "step": 7940 + }, + { + "epoch": 0.05098883055994715, + "grad_norm": 0.7276794910430908, + "learning_rate": 2.549384299640842e-06, + "loss": 0.0215, + "step": 7950 + }, + { + "epoch": 0.05105296745373325, + "grad_norm": 0.7038756012916565, + "learning_rate": 2.552591072344792e-06, + "loss": 0.0187, + "step": 7960 + }, + { + "epoch": 0.05111710434751934, + "grad_norm": 1.1616355180740356, + "learning_rate": 2.5557978450487432e-06, + "loss": 0.0137, + "step": 7970 + }, + { + "epoch": 0.05118124124130544, + "grad_norm": 1.0356817245483398, + "learning_rate": 2.559004617752694e-06, + "loss": 0.0181, + "step": 7980 + }, + { + "epoch": 0.051245378135091536, + "grad_norm": 1.3789634704589844, + "learning_rate": 2.5622113904566446e-06, + "loss": 0.019, + "step": 7990 + }, + { + "epoch": 0.051309515028877636, + "grad_norm": 1.080942988395691, + "learning_rate": 2.5654181631605953e-06, + "loss": 0.0144, + "step": 8000 + }, + { + "epoch": 0.051373651922663736, + "grad_norm": 0.87821364402771, + "learning_rate": 2.568624935864546e-06, + "loss": 0.014, + "step": 8010 + }, + { + "epoch": 0.05143778881644983, + "grad_norm": 0.784485936164856, + "learning_rate": 2.5718317085684967e-06, + "loss": 0.0146, + "step": 8020 + }, + { + "epoch": 0.05150192571023593, + "grad_norm": 0.8742894530296326, + "learning_rate": 2.5750384812724478e-06, + "loss": 0.0162, + "step": 8030 + }, + { + "epoch": 0.05156606260402202, + "grad_norm": 0.8528547883033752, + "learning_rate": 2.5782452539763985e-06, + "loss": 0.0178, + "step": 8040 + }, + { + "epoch": 0.05163019949780812, + "grad_norm": 1.3443467617034912, + "learning_rate": 2.5814520266803487e-06, + "loss": 0.0154, + "step": 8050 + }, + { + "epoch": 0.051694336391594216, + "grad_norm": 0.8554036021232605, + "learning_rate": 2.5846587993843e-06, + "loss": 0.0194, + "step": 8060 + }, + { + "epoch": 0.051758473285380316, + "grad_norm": 1.0235211849212646, + "learning_rate": 2.5878655720882505e-06, + "loss": 0.0138, + "step": 8070 + }, + { + "epoch": 0.051822610179166416, + "grad_norm": 0.93379807472229, + "learning_rate": 2.5910723447922017e-06, + "loss": 0.0143, + "step": 8080 + }, + { + "epoch": 0.05188674707295251, + "grad_norm": 0.8252114057540894, + "learning_rate": 2.5942791174961523e-06, + "loss": 0.018, + "step": 8090 + }, + { + "epoch": 0.05195088396673861, + "grad_norm": 0.7677938342094421, + "learning_rate": 2.5974858902001026e-06, + "loss": 0.0117, + "step": 8100 + }, + { + "epoch": 0.0520150208605247, + "grad_norm": 1.0340348482131958, + "learning_rate": 2.6006926629040537e-06, + "loss": 0.0165, + "step": 8110 + }, + { + "epoch": 0.0520791577543108, + "grad_norm": 0.7640451192855835, + "learning_rate": 2.6038994356080044e-06, + "loss": 0.0188, + "step": 8120 + }, + { + "epoch": 0.052143294648096895, + "grad_norm": 0.7858720421791077, + "learning_rate": 2.607106208311955e-06, + "loss": 0.0168, + "step": 8130 + }, + { + "epoch": 0.052207431541882995, + "grad_norm": 0.6935146450996399, + "learning_rate": 2.610312981015906e-06, + "loss": 0.017, + "step": 8140 + }, + { + "epoch": 0.052271568435669095, + "grad_norm": 0.6653669476509094, + "learning_rate": 2.6135197537198565e-06, + "loss": 0.018, + "step": 8150 + }, + { + "epoch": 0.05233570532945519, + "grad_norm": 0.853435754776001, + "learning_rate": 2.616726526423807e-06, + "loss": 0.0139, + "step": 8160 + }, + { + "epoch": 0.05239984222324129, + "grad_norm": 1.3172193765640259, + "learning_rate": 2.6199332991277583e-06, + "loss": 0.0171, + "step": 8170 + }, + { + "epoch": 0.05246397911702738, + "grad_norm": 0.8625347018241882, + "learning_rate": 2.623140071831709e-06, + "loss": 0.0136, + "step": 8180 + }, + { + "epoch": 0.05252811601081348, + "grad_norm": 1.255388617515564, + "learning_rate": 2.6263468445356592e-06, + "loss": 0.0157, + "step": 8190 + }, + { + "epoch": 0.052592252904599575, + "grad_norm": 0.9203137159347534, + "learning_rate": 2.6295536172396104e-06, + "loss": 0.0158, + "step": 8200 + }, + { + "epoch": 0.052656389798385675, + "grad_norm": 1.5580936670303345, + "learning_rate": 2.632760389943561e-06, + "loss": 0.0172, + "step": 8210 + }, + { + "epoch": 0.05272052669217177, + "grad_norm": 0.9016255140304565, + "learning_rate": 2.635967162647512e-06, + "loss": 0.0173, + "step": 8220 + }, + { + "epoch": 0.05278466358595787, + "grad_norm": 0.9950858950614929, + "learning_rate": 2.6391739353514624e-06, + "loss": 0.0167, + "step": 8230 + }, + { + "epoch": 0.05284880047974397, + "grad_norm": 0.932772159576416, + "learning_rate": 2.642380708055413e-06, + "loss": 0.0156, + "step": 8240 + }, + { + "epoch": 0.05291293737353006, + "grad_norm": 1.7568084001541138, + "learning_rate": 2.645587480759364e-06, + "loss": 0.0186, + "step": 8250 + }, + { + "epoch": 0.05297707426731616, + "grad_norm": 0.7419828176498413, + "learning_rate": 2.648794253463315e-06, + "loss": 0.0122, + "step": 8260 + }, + { + "epoch": 0.053041211161102254, + "grad_norm": 0.6230597496032715, + "learning_rate": 2.6520010261672656e-06, + "loss": 0.0108, + "step": 8270 + }, + { + "epoch": 0.053105348054888354, + "grad_norm": 0.6508496999740601, + "learning_rate": 2.655207798871216e-06, + "loss": 0.0157, + "step": 8280 + }, + { + "epoch": 0.05316948494867445, + "grad_norm": 0.5818604826927185, + "learning_rate": 2.658414571575167e-06, + "loss": 0.0222, + "step": 8290 + }, + { + "epoch": 0.05323362184246055, + "grad_norm": 0.8822751641273499, + "learning_rate": 2.6616213442791177e-06, + "loss": 0.0178, + "step": 8300 + }, + { + "epoch": 0.05329775873624665, + "grad_norm": 1.0668436288833618, + "learning_rate": 2.6648281169830688e-06, + "loss": 0.0123, + "step": 8310 + }, + { + "epoch": 0.05336189563003274, + "grad_norm": 0.8772680759429932, + "learning_rate": 2.668034889687019e-06, + "loss": 0.0173, + "step": 8320 + }, + { + "epoch": 0.05342603252381884, + "grad_norm": 1.3241474628448486, + "learning_rate": 2.6712416623909697e-06, + "loss": 0.0198, + "step": 8330 + }, + { + "epoch": 0.053490169417604934, + "grad_norm": 1.102897047996521, + "learning_rate": 2.674448435094921e-06, + "loss": 0.0154, + "step": 8340 + }, + { + "epoch": 0.053554306311391034, + "grad_norm": 0.7951586842536926, + "learning_rate": 2.6776552077988715e-06, + "loss": 0.0171, + "step": 8350 + }, + { + "epoch": 0.05361844320517713, + "grad_norm": 1.398962378501892, + "learning_rate": 2.6808619805028222e-06, + "loss": 0.0138, + "step": 8360 + }, + { + "epoch": 0.05368258009896323, + "grad_norm": 1.0954784154891968, + "learning_rate": 2.684068753206773e-06, + "loss": 0.0173, + "step": 8370 + }, + { + "epoch": 0.05374671699274933, + "grad_norm": 0.9455815553665161, + "learning_rate": 2.6872755259107236e-06, + "loss": 0.0165, + "step": 8380 + }, + { + "epoch": 0.05381085388653542, + "grad_norm": 0.6192914843559265, + "learning_rate": 2.6904822986146743e-06, + "loss": 0.0127, + "step": 8390 + }, + { + "epoch": 0.05387499078032152, + "grad_norm": 0.47157925367355347, + "learning_rate": 2.6936890713186254e-06, + "loss": 0.0136, + "step": 8400 + }, + { + "epoch": 0.05393912767410761, + "grad_norm": 1.3008977174758911, + "learning_rate": 2.6968958440225757e-06, + "loss": 0.0172, + "step": 8410 + }, + { + "epoch": 0.05400326456789371, + "grad_norm": 0.40314775705337524, + "learning_rate": 2.7001026167265264e-06, + "loss": 0.0177, + "step": 8420 + }, + { + "epoch": 0.05406740146167981, + "grad_norm": 0.7646990418434143, + "learning_rate": 2.7033093894304775e-06, + "loss": 0.0151, + "step": 8430 + }, + { + "epoch": 0.05413153835546591, + "grad_norm": 0.6532096862792969, + "learning_rate": 2.706516162134428e-06, + "loss": 0.0137, + "step": 8440 + }, + { + "epoch": 0.05419567524925201, + "grad_norm": 0.6426984071731567, + "learning_rate": 2.7097229348383793e-06, + "loss": 0.016, + "step": 8450 + }, + { + "epoch": 0.0542598121430381, + "grad_norm": 0.6302536725997925, + "learning_rate": 2.7129297075423296e-06, + "loss": 0.0216, + "step": 8460 + }, + { + "epoch": 0.0543239490368242, + "grad_norm": 0.5971202850341797, + "learning_rate": 2.7161364802462802e-06, + "loss": 0.0155, + "step": 8470 + }, + { + "epoch": 0.05438808593061029, + "grad_norm": 0.7935372591018677, + "learning_rate": 2.7193432529502314e-06, + "loss": 0.0181, + "step": 8480 + }, + { + "epoch": 0.05445222282439639, + "grad_norm": 0.7118707299232483, + "learning_rate": 2.722550025654182e-06, + "loss": 0.0173, + "step": 8490 + }, + { + "epoch": 0.054516359718182486, + "grad_norm": 0.8970438241958618, + "learning_rate": 2.7257567983581323e-06, + "loss": 0.0162, + "step": 8500 + }, + { + "epoch": 0.054580496611968586, + "grad_norm": 1.0925875902175903, + "learning_rate": 2.728963571062083e-06, + "loss": 0.017, + "step": 8510 + }, + { + "epoch": 0.054644633505754686, + "grad_norm": 1.1510151624679565, + "learning_rate": 2.732170343766034e-06, + "loss": 0.0117, + "step": 8520 + }, + { + "epoch": 0.05470877039954078, + "grad_norm": 0.77836674451828, + "learning_rate": 2.735377116469985e-06, + "loss": 0.0126, + "step": 8530 + }, + { + "epoch": 0.05477290729332688, + "grad_norm": 0.48642775416374207, + "learning_rate": 2.738583889173936e-06, + "loss": 0.0168, + "step": 8540 + }, + { + "epoch": 0.05483704418711297, + "grad_norm": 0.5595241189002991, + "learning_rate": 2.741790661877886e-06, + "loss": 0.0112, + "step": 8550 + }, + { + "epoch": 0.05490118108089907, + "grad_norm": 0.9097594618797302, + "learning_rate": 2.744997434581837e-06, + "loss": 0.0111, + "step": 8560 + }, + { + "epoch": 0.054965317974685166, + "grad_norm": 1.075947880744934, + "learning_rate": 2.748204207285788e-06, + "loss": 0.0155, + "step": 8570 + }, + { + "epoch": 0.055029454868471266, + "grad_norm": 0.7200561165809631, + "learning_rate": 2.7514109799897387e-06, + "loss": 0.0168, + "step": 8580 + }, + { + "epoch": 0.05509359176225736, + "grad_norm": 1.1899163722991943, + "learning_rate": 2.754617752693689e-06, + "loss": 0.0228, + "step": 8590 + }, + { + "epoch": 0.05515772865604346, + "grad_norm": 0.45061996579170227, + "learning_rate": 2.75782452539764e-06, + "loss": 0.0154, + "step": 8600 + }, + { + "epoch": 0.05522186554982956, + "grad_norm": 0.6197959184646606, + "learning_rate": 2.7610312981015907e-06, + "loss": 0.0134, + "step": 8610 + }, + { + "epoch": 0.05528600244361565, + "grad_norm": 1.1750388145446777, + "learning_rate": 2.7642380708055414e-06, + "loss": 0.0208, + "step": 8620 + }, + { + "epoch": 0.05535013933740175, + "grad_norm": 0.905691921710968, + "learning_rate": 2.7674448435094925e-06, + "loss": 0.0189, + "step": 8630 + }, + { + "epoch": 0.055414276231187845, + "grad_norm": 0.9650370478630066, + "learning_rate": 2.770651616213443e-06, + "loss": 0.0203, + "step": 8640 + }, + { + "epoch": 0.055478413124973945, + "grad_norm": 0.8335312604904175, + "learning_rate": 2.7738583889173935e-06, + "loss": 0.0107, + "step": 8650 + }, + { + "epoch": 0.05554255001876004, + "grad_norm": 1.6039456129074097, + "learning_rate": 2.7770651616213446e-06, + "loss": 0.0221, + "step": 8660 + }, + { + "epoch": 0.05560668691254614, + "grad_norm": 0.6446152925491333, + "learning_rate": 2.7802719343252953e-06, + "loss": 0.0137, + "step": 8670 + }, + { + "epoch": 0.05567082380633224, + "grad_norm": 0.7407627701759338, + "learning_rate": 2.7834787070292456e-06, + "loss": 0.0217, + "step": 8680 + }, + { + "epoch": 0.05573496070011833, + "grad_norm": 0.4696844518184662, + "learning_rate": 2.7866854797331967e-06, + "loss": 0.0113, + "step": 8690 + }, + { + "epoch": 0.05579909759390443, + "grad_norm": 0.8112123012542725, + "learning_rate": 2.7898922524371474e-06, + "loss": 0.0192, + "step": 8700 + }, + { + "epoch": 0.055863234487690525, + "grad_norm": 0.8872155547142029, + "learning_rate": 2.7930990251410985e-06, + "loss": 0.0194, + "step": 8710 + }, + { + "epoch": 0.055927371381476625, + "grad_norm": 0.4063515365123749, + "learning_rate": 2.796305797845049e-06, + "loss": 0.0194, + "step": 8720 + }, + { + "epoch": 0.05599150827526272, + "grad_norm": 0.9252082705497742, + "learning_rate": 2.7995125705489994e-06, + "loss": 0.0178, + "step": 8730 + }, + { + "epoch": 0.05605564516904882, + "grad_norm": 1.4230228662490845, + "learning_rate": 2.8027193432529506e-06, + "loss": 0.015, + "step": 8740 + }, + { + "epoch": 0.05611978206283492, + "grad_norm": 0.4911647439002991, + "learning_rate": 2.8059261159569012e-06, + "loss": 0.0159, + "step": 8750 + }, + { + "epoch": 0.05618391895662101, + "grad_norm": 0.9393802881240845, + "learning_rate": 2.809132888660852e-06, + "loss": 0.0204, + "step": 8760 + }, + { + "epoch": 0.05624805585040711, + "grad_norm": 1.24632728099823, + "learning_rate": 2.812339661364803e-06, + "loss": 0.0177, + "step": 8770 + }, + { + "epoch": 0.056312192744193204, + "grad_norm": 1.1230436563491821, + "learning_rate": 2.8155464340687533e-06, + "loss": 0.0217, + "step": 8780 + }, + { + "epoch": 0.056376329637979304, + "grad_norm": 0.7382339239120483, + "learning_rate": 2.818753206772704e-06, + "loss": 0.0141, + "step": 8790 + }, + { + "epoch": 0.0564404665317654, + "grad_norm": 1.1875547170639038, + "learning_rate": 2.821959979476655e-06, + "loss": 0.0145, + "step": 8800 + }, + { + "epoch": 0.0565046034255515, + "grad_norm": 0.7362310290336609, + "learning_rate": 2.825166752180606e-06, + "loss": 0.0158, + "step": 8810 + }, + { + "epoch": 0.0565687403193376, + "grad_norm": 0.9873781800270081, + "learning_rate": 2.828373524884556e-06, + "loss": 0.01, + "step": 8820 + }, + { + "epoch": 0.05663287721312369, + "grad_norm": 0.7471084594726562, + "learning_rate": 2.831580297588507e-06, + "loss": 0.0145, + "step": 8830 + }, + { + "epoch": 0.05669701410690979, + "grad_norm": 0.8579428791999817, + "learning_rate": 2.834787070292458e-06, + "loss": 0.0176, + "step": 8840 + }, + { + "epoch": 0.056761151000695884, + "grad_norm": 0.5471231341362, + "learning_rate": 2.837993842996409e-06, + "loss": 0.016, + "step": 8850 + }, + { + "epoch": 0.056825287894481984, + "grad_norm": 0.9654415845870972, + "learning_rate": 2.8412006157003597e-06, + "loss": 0.0157, + "step": 8860 + }, + { + "epoch": 0.05688942478826808, + "grad_norm": 0.9503028392791748, + "learning_rate": 2.84440738840431e-06, + "loss": 0.0147, + "step": 8870 + }, + { + "epoch": 0.05695356168205418, + "grad_norm": 1.0068310499191284, + "learning_rate": 2.8476141611082606e-06, + "loss": 0.0099, + "step": 8880 + }, + { + "epoch": 0.05701769857584028, + "grad_norm": 0.6317100524902344, + "learning_rate": 2.8508209338122117e-06, + "loss": 0.0128, + "step": 8890 + }, + { + "epoch": 0.05708183546962637, + "grad_norm": 0.96009361743927, + "learning_rate": 2.8540277065161624e-06, + "loss": 0.018, + "step": 8900 + }, + { + "epoch": 0.05714597236341247, + "grad_norm": 0.7821346521377563, + "learning_rate": 2.8572344792201127e-06, + "loss": 0.0136, + "step": 8910 + }, + { + "epoch": 0.05721010925719856, + "grad_norm": 0.7975736260414124, + "learning_rate": 2.860441251924064e-06, + "loss": 0.0219, + "step": 8920 + }, + { + "epoch": 0.05727424615098466, + "grad_norm": 0.9637255072593689, + "learning_rate": 2.8636480246280145e-06, + "loss": 0.019, + "step": 8930 + }, + { + "epoch": 0.057338383044770756, + "grad_norm": 1.2077337503433228, + "learning_rate": 2.8668547973319656e-06, + "loss": 0.012, + "step": 8940 + }, + { + "epoch": 0.057402519938556856, + "grad_norm": 0.8413091897964478, + "learning_rate": 2.8700615700359163e-06, + "loss": 0.0136, + "step": 8950 + }, + { + "epoch": 0.05746665683234295, + "grad_norm": 0.570277750492096, + "learning_rate": 2.8732683427398666e-06, + "loss": 0.0122, + "step": 8960 + }, + { + "epoch": 0.05753079372612905, + "grad_norm": 1.1096901893615723, + "learning_rate": 2.8764751154438177e-06, + "loss": 0.0133, + "step": 8970 + }, + { + "epoch": 0.05759493061991515, + "grad_norm": 0.5581759214401245, + "learning_rate": 2.8796818881477684e-06, + "loss": 0.013, + "step": 8980 + }, + { + "epoch": 0.05765906751370124, + "grad_norm": 0.8480189442634583, + "learning_rate": 2.882888660851719e-06, + "loss": 0.0161, + "step": 8990 + }, + { + "epoch": 0.05772320440748734, + "grad_norm": 1.3270263671875, + "learning_rate": 2.8860954335556698e-06, + "loss": 0.0144, + "step": 9000 + }, + { + "epoch": 0.057787341301273436, + "grad_norm": 1.3251687288284302, + "learning_rate": 2.8893022062596204e-06, + "loss": 0.0166, + "step": 9010 + }, + { + "epoch": 0.057851478195059536, + "grad_norm": 0.2769591212272644, + "learning_rate": 2.892508978963571e-06, + "loss": 0.0167, + "step": 9020 + }, + { + "epoch": 0.05791561508884563, + "grad_norm": 0.5797544121742249, + "learning_rate": 2.8957157516675222e-06, + "loss": 0.0174, + "step": 9030 + }, + { + "epoch": 0.05797975198263173, + "grad_norm": 0.8181819319725037, + "learning_rate": 2.898922524371473e-06, + "loss": 0.0233, + "step": 9040 + }, + { + "epoch": 0.05804388887641783, + "grad_norm": 0.547584593296051, + "learning_rate": 2.902129297075423e-06, + "loss": 0.0113, + "step": 9050 + }, + { + "epoch": 0.05810802577020392, + "grad_norm": 1.119734525680542, + "learning_rate": 2.9053360697793743e-06, + "loss": 0.0111, + "step": 9060 + }, + { + "epoch": 0.05817216266399002, + "grad_norm": 0.8142489790916443, + "learning_rate": 2.908542842483325e-06, + "loss": 0.0146, + "step": 9070 + }, + { + "epoch": 0.058236299557776115, + "grad_norm": 0.8385538458824158, + "learning_rate": 2.911749615187276e-06, + "loss": 0.013, + "step": 9080 + }, + { + "epoch": 0.058300436451562215, + "grad_norm": 0.585768461227417, + "learning_rate": 2.9149563878912264e-06, + "loss": 0.0129, + "step": 9090 + }, + { + "epoch": 0.05836457334534831, + "grad_norm": 0.6681596636772156, + "learning_rate": 2.918163160595177e-06, + "loss": 0.0105, + "step": 9100 + }, + { + "epoch": 0.05842871023913441, + "grad_norm": 0.8642899394035339, + "learning_rate": 2.921369933299128e-06, + "loss": 0.0193, + "step": 9110 + }, + { + "epoch": 0.05849284713292051, + "grad_norm": 1.0386979579925537, + "learning_rate": 2.924576706003079e-06, + "loss": 0.0158, + "step": 9120 + }, + { + "epoch": 0.0585569840267066, + "grad_norm": 0.6511367559432983, + "learning_rate": 2.9277834787070296e-06, + "loss": 0.0154, + "step": 9130 + }, + { + "epoch": 0.0586211209204927, + "grad_norm": 0.3735872805118561, + "learning_rate": 2.93099025141098e-06, + "loss": 0.0094, + "step": 9140 + }, + { + "epoch": 0.058685257814278795, + "grad_norm": 0.6131263375282288, + "learning_rate": 2.934197024114931e-06, + "loss": 0.015, + "step": 9150 + }, + { + "epoch": 0.058749394708064895, + "grad_norm": 0.6784162521362305, + "learning_rate": 2.9374037968188816e-06, + "loss": 0.0117, + "step": 9160 + }, + { + "epoch": 0.05881353160185099, + "grad_norm": 0.806260347366333, + "learning_rate": 2.9406105695228327e-06, + "loss": 0.0163, + "step": 9170 + }, + { + "epoch": 0.05887766849563709, + "grad_norm": 1.063302993774414, + "learning_rate": 2.943817342226783e-06, + "loss": 0.0142, + "step": 9180 + }, + { + "epoch": 0.05894180538942319, + "grad_norm": 0.3902188539505005, + "learning_rate": 2.9470241149307337e-06, + "loss": 0.0138, + "step": 9190 + }, + { + "epoch": 0.05900594228320928, + "grad_norm": 0.5390955805778503, + "learning_rate": 2.950230887634685e-06, + "loss": 0.0143, + "step": 9200 + }, + { + "epoch": 0.05907007917699538, + "grad_norm": 0.5154760479927063, + "learning_rate": 2.9534376603386355e-06, + "loss": 0.0154, + "step": 9210 + }, + { + "epoch": 0.059134216070781474, + "grad_norm": 0.9843395948410034, + "learning_rate": 2.9566444330425866e-06, + "loss": 0.0176, + "step": 9220 + }, + { + "epoch": 0.059198352964567574, + "grad_norm": 0.8421329855918884, + "learning_rate": 2.959851205746537e-06, + "loss": 0.0159, + "step": 9230 + }, + { + "epoch": 0.05926248985835367, + "grad_norm": 0.8714247345924377, + "learning_rate": 2.9630579784504876e-06, + "loss": 0.0177, + "step": 9240 + }, + { + "epoch": 0.05932662675213977, + "grad_norm": 0.9572415351867676, + "learning_rate": 2.9662647511544383e-06, + "loss": 0.0159, + "step": 9250 + }, + { + "epoch": 0.05939076364592587, + "grad_norm": 0.38546404242515564, + "learning_rate": 2.9694715238583894e-06, + "loss": 0.0132, + "step": 9260 + }, + { + "epoch": 0.05945490053971196, + "grad_norm": 0.9735164046287537, + "learning_rate": 2.9726782965623396e-06, + "loss": 0.011, + "step": 9270 + }, + { + "epoch": 0.05951903743349806, + "grad_norm": 0.7716529965400696, + "learning_rate": 2.9758850692662903e-06, + "loss": 0.0162, + "step": 9280 + }, + { + "epoch": 0.059583174327284154, + "grad_norm": 0.7090407013893127, + "learning_rate": 2.9790918419702414e-06, + "loss": 0.015, + "step": 9290 + }, + { + "epoch": 0.059647311221070254, + "grad_norm": 0.7737558484077454, + "learning_rate": 2.982298614674192e-06, + "loss": 0.0131, + "step": 9300 + }, + { + "epoch": 0.05971144811485635, + "grad_norm": 0.61954665184021, + "learning_rate": 2.9855053873781432e-06, + "loss": 0.0171, + "step": 9310 + }, + { + "epoch": 0.05977558500864245, + "grad_norm": 0.8703621625900269, + "learning_rate": 2.9887121600820935e-06, + "loss": 0.0161, + "step": 9320 + }, + { + "epoch": 0.05983972190242854, + "grad_norm": 0.6462575793266296, + "learning_rate": 2.991918932786044e-06, + "loss": 0.0083, + "step": 9330 + }, + { + "epoch": 0.05990385879621464, + "grad_norm": 0.275501549243927, + "learning_rate": 2.9951257054899953e-06, + "loss": 0.0109, + "step": 9340 + }, + { + "epoch": 0.05996799569000074, + "grad_norm": 1.0189841985702515, + "learning_rate": 2.998332478193946e-06, + "loss": 0.014, + "step": 9350 + }, + { + "epoch": 0.06003213258378683, + "grad_norm": 1.032149076461792, + "learning_rate": 3.0015392508978963e-06, + "loss": 0.0169, + "step": 9360 + }, + { + "epoch": 0.06009626947757293, + "grad_norm": 0.9786964654922485, + "learning_rate": 3.0047460236018474e-06, + "loss": 0.0165, + "step": 9370 + }, + { + "epoch": 0.060160406371359026, + "grad_norm": 0.7089354991912842, + "learning_rate": 3.007952796305798e-06, + "loss": 0.013, + "step": 9380 + }, + { + "epoch": 0.060224543265145127, + "grad_norm": 0.9810829162597656, + "learning_rate": 3.0111595690097488e-06, + "loss": 0.0143, + "step": 9390 + }, + { + "epoch": 0.06028868015893122, + "grad_norm": 1.4317636489868164, + "learning_rate": 3.0143663417137e-06, + "loss": 0.0158, + "step": 9400 + }, + { + "epoch": 0.06035281705271732, + "grad_norm": 1.0124961137771606, + "learning_rate": 3.01757311441765e-06, + "loss": 0.0169, + "step": 9410 + }, + { + "epoch": 0.06041695394650342, + "grad_norm": 0.7623961567878723, + "learning_rate": 3.020779887121601e-06, + "loss": 0.0181, + "step": 9420 + }, + { + "epoch": 0.06048109084028951, + "grad_norm": 0.6873654127120972, + "learning_rate": 3.023986659825552e-06, + "loss": 0.0145, + "step": 9430 + }, + { + "epoch": 0.06054522773407561, + "grad_norm": 0.7793513536453247, + "learning_rate": 3.0271934325295026e-06, + "loss": 0.0144, + "step": 9440 + }, + { + "epoch": 0.060609364627861706, + "grad_norm": 0.5543414354324341, + "learning_rate": 3.0304002052334537e-06, + "loss": 0.0155, + "step": 9450 + }, + { + "epoch": 0.060673501521647806, + "grad_norm": 0.34898149967193604, + "learning_rate": 3.033606977937404e-06, + "loss": 0.0144, + "step": 9460 + }, + { + "epoch": 0.0607376384154339, + "grad_norm": 0.7438449263572693, + "learning_rate": 3.0368137506413547e-06, + "loss": 0.0207, + "step": 9470 + }, + { + "epoch": 0.06080177530922, + "grad_norm": 0.5092421770095825, + "learning_rate": 3.040020523345306e-06, + "loss": 0.0124, + "step": 9480 + }, + { + "epoch": 0.0608659122030061, + "grad_norm": 0.7169559597969055, + "learning_rate": 3.0432272960492565e-06, + "loss": 0.011, + "step": 9490 + }, + { + "epoch": 0.06093004909679219, + "grad_norm": 1.0686838626861572, + "learning_rate": 3.0464340687532068e-06, + "loss": 0.015, + "step": 9500 + }, + { + "epoch": 0.06099418599057829, + "grad_norm": 1.1948643922805786, + "learning_rate": 3.0496408414571575e-06, + "loss": 0.019, + "step": 9510 + }, + { + "epoch": 0.061058322884364385, + "grad_norm": 0.9161044359207153, + "learning_rate": 3.0528476141611086e-06, + "loss": 0.0152, + "step": 9520 + }, + { + "epoch": 0.061122459778150486, + "grad_norm": 0.5058609843254089, + "learning_rate": 3.0560543868650593e-06, + "loss": 0.0132, + "step": 9530 + }, + { + "epoch": 0.06118659667193658, + "grad_norm": 1.7881057262420654, + "learning_rate": 3.0592611595690104e-06, + "loss": 0.0128, + "step": 9540 + }, + { + "epoch": 0.06125073356572268, + "grad_norm": 0.7522915005683899, + "learning_rate": 3.0624679322729606e-06, + "loss": 0.0193, + "step": 9550 + }, + { + "epoch": 0.06131487045950878, + "grad_norm": 1.0016465187072754, + "learning_rate": 3.0656747049769113e-06, + "loss": 0.0142, + "step": 9560 + }, + { + "epoch": 0.06137900735329487, + "grad_norm": 0.5280004739761353, + "learning_rate": 3.0688814776808624e-06, + "loss": 0.016, + "step": 9570 + }, + { + "epoch": 0.06144314424708097, + "grad_norm": 0.850499153137207, + "learning_rate": 3.072088250384813e-06, + "loss": 0.0147, + "step": 9580 + }, + { + "epoch": 0.061507281140867065, + "grad_norm": 0.8008180856704712, + "learning_rate": 3.0752950230887634e-06, + "loss": 0.0084, + "step": 9590 + }, + { + "epoch": 0.061571418034653165, + "grad_norm": 0.9171088337898254, + "learning_rate": 3.0785017957927145e-06, + "loss": 0.0124, + "step": 9600 + }, + { + "epoch": 0.06163555492843926, + "grad_norm": 0.8631775975227356, + "learning_rate": 3.081708568496665e-06, + "loss": 0.0124, + "step": 9610 + }, + { + "epoch": 0.06169969182222536, + "grad_norm": 0.7938309907913208, + "learning_rate": 3.084915341200616e-06, + "loss": 0.0184, + "step": 9620 + }, + { + "epoch": 0.06176382871601146, + "grad_norm": 1.385733962059021, + "learning_rate": 3.088122113904567e-06, + "loss": 0.0196, + "step": 9630 + }, + { + "epoch": 0.06182796560979755, + "grad_norm": 1.151713252067566, + "learning_rate": 3.0913288866085173e-06, + "loss": 0.0177, + "step": 9640 + }, + { + "epoch": 0.06189210250358365, + "grad_norm": 0.8653784394264221, + "learning_rate": 3.094535659312468e-06, + "loss": 0.02, + "step": 9650 + }, + { + "epoch": 0.061956239397369745, + "grad_norm": 0.7082491517066956, + "learning_rate": 3.097742432016419e-06, + "loss": 0.0186, + "step": 9660 + }, + { + "epoch": 0.062020376291155845, + "grad_norm": 1.227051854133606, + "learning_rate": 3.1009492047203698e-06, + "loss": 0.0119, + "step": 9670 + }, + { + "epoch": 0.06208451318494194, + "grad_norm": 0.8086697459220886, + "learning_rate": 3.10415597742432e-06, + "loss": 0.0126, + "step": 9680 + }, + { + "epoch": 0.06214865007872804, + "grad_norm": 0.7787764072418213, + "learning_rate": 3.107362750128271e-06, + "loss": 0.0108, + "step": 9690 + }, + { + "epoch": 0.06221278697251413, + "grad_norm": 1.1162656545639038, + "learning_rate": 3.110569522832222e-06, + "loss": 0.0135, + "step": 9700 + }, + { + "epoch": 0.06227692386630023, + "grad_norm": 0.9261019229888916, + "learning_rate": 3.113776295536173e-06, + "loss": 0.0122, + "step": 9710 + }, + { + "epoch": 0.06234106076008633, + "grad_norm": 0.7241477370262146, + "learning_rate": 3.1169830682401236e-06, + "loss": 0.0154, + "step": 9720 + }, + { + "epoch": 0.062405197653872424, + "grad_norm": 0.6688586473464966, + "learning_rate": 3.120189840944074e-06, + "loss": 0.014, + "step": 9730 + }, + { + "epoch": 0.062469334547658524, + "grad_norm": 0.9395526051521301, + "learning_rate": 3.123396613648025e-06, + "loss": 0.0115, + "step": 9740 + }, + { + "epoch": 0.06253347144144462, + "grad_norm": 1.0174471139907837, + "learning_rate": 3.1266033863519757e-06, + "loss": 0.0148, + "step": 9750 + }, + { + "epoch": 0.06259760833523072, + "grad_norm": 0.7402105927467346, + "learning_rate": 3.1298101590559264e-06, + "loss": 0.0154, + "step": 9760 + }, + { + "epoch": 0.06266174522901681, + "grad_norm": 0.7139943242073059, + "learning_rate": 3.1330169317598767e-06, + "loss": 0.0156, + "step": 9770 + }, + { + "epoch": 0.06272588212280292, + "grad_norm": 0.7116230726242065, + "learning_rate": 3.1362237044638278e-06, + "loss": 0.0159, + "step": 9780 + }, + { + "epoch": 0.06279001901658901, + "grad_norm": 1.0926690101623535, + "learning_rate": 3.1394304771677785e-06, + "loss": 0.0123, + "step": 9790 + }, + { + "epoch": 0.0628541559103751, + "grad_norm": 0.7361863851547241, + "learning_rate": 3.1426372498717296e-06, + "loss": 0.0119, + "step": 9800 + }, + { + "epoch": 0.0629182928041612, + "grad_norm": 1.3561779260635376, + "learning_rate": 3.1458440225756803e-06, + "loss": 0.0155, + "step": 9810 + }, + { + "epoch": 0.0629824296979473, + "grad_norm": 1.122456431388855, + "learning_rate": 3.1490507952796305e-06, + "loss": 0.0176, + "step": 9820 + }, + { + "epoch": 0.0630465665917334, + "grad_norm": 1.144529938697815, + "learning_rate": 3.1522575679835816e-06, + "loss": 0.0144, + "step": 9830 + }, + { + "epoch": 0.06311070348551949, + "grad_norm": 1.3597661256790161, + "learning_rate": 3.1554643406875323e-06, + "loss": 0.0171, + "step": 9840 + }, + { + "epoch": 0.06317484037930558, + "grad_norm": 0.7596487402915955, + "learning_rate": 3.1586711133914834e-06, + "loss": 0.0159, + "step": 9850 + }, + { + "epoch": 0.06323897727309169, + "grad_norm": 0.6753873825073242, + "learning_rate": 3.1618778860954337e-06, + "loss": 0.016, + "step": 9860 + }, + { + "epoch": 0.06330311416687778, + "grad_norm": 0.7201382517814636, + "learning_rate": 3.1650846587993844e-06, + "loss": 0.0122, + "step": 9870 + }, + { + "epoch": 0.06336725106066388, + "grad_norm": 0.5358558297157288, + "learning_rate": 3.168291431503335e-06, + "loss": 0.0162, + "step": 9880 + }, + { + "epoch": 0.06343138795444998, + "grad_norm": 0.7316409945487976, + "learning_rate": 3.171498204207286e-06, + "loss": 0.0127, + "step": 9890 + }, + { + "epoch": 0.06349552484823608, + "grad_norm": 0.6747068166732788, + "learning_rate": 3.174704976911237e-06, + "loss": 0.0134, + "step": 9900 + }, + { + "epoch": 0.06355966174202217, + "grad_norm": 0.730777382850647, + "learning_rate": 3.177911749615187e-06, + "loss": 0.0156, + "step": 9910 + }, + { + "epoch": 0.06362379863580826, + "grad_norm": 0.7868751287460327, + "learning_rate": 3.1811185223191383e-06, + "loss": 0.0126, + "step": 9920 + }, + { + "epoch": 0.06368793552959437, + "grad_norm": 1.0477957725524902, + "learning_rate": 3.184325295023089e-06, + "loss": 0.018, + "step": 9930 + }, + { + "epoch": 0.06375207242338046, + "grad_norm": 0.6357901692390442, + "learning_rate": 3.18753206772704e-06, + "loss": 0.0156, + "step": 9940 + }, + { + "epoch": 0.06381620931716656, + "grad_norm": 1.0937377214431763, + "learning_rate": 3.1907388404309903e-06, + "loss": 0.0124, + "step": 9950 + }, + { + "epoch": 0.06388034621095266, + "grad_norm": 0.5854929089546204, + "learning_rate": 3.193945613134941e-06, + "loss": 0.0166, + "step": 9960 + }, + { + "epoch": 0.06394448310473876, + "grad_norm": 0.7687313556671143, + "learning_rate": 3.197152385838892e-06, + "loss": 0.0194, + "step": 9970 + }, + { + "epoch": 0.06400861999852485, + "grad_norm": 0.7688573002815247, + "learning_rate": 3.200359158542843e-06, + "loss": 0.0131, + "step": 9980 + }, + { + "epoch": 0.06407275689231094, + "grad_norm": 1.1176494359970093, + "learning_rate": 3.2035659312467935e-06, + "loss": 0.016, + "step": 9990 + }, + { + "epoch": 0.06413689378609705, + "grad_norm": 0.5380460023880005, + "learning_rate": 3.2067727039507442e-06, + "loss": 0.0192, + "step": 10000 + }, + { + "epoch": 0.06420103067988314, + "grad_norm": 0.764922559261322, + "learning_rate": 3.209979476654695e-06, + "loss": 0.0149, + "step": 10010 + }, + { + "epoch": 0.06426516757366924, + "grad_norm": 0.9032078981399536, + "learning_rate": 3.2131862493586456e-06, + "loss": 0.0104, + "step": 10020 + }, + { + "epoch": 0.06432930446745534, + "grad_norm": 0.678203284740448, + "learning_rate": 3.2163930220625967e-06, + "loss": 0.0155, + "step": 10030 + }, + { + "epoch": 0.06439344136124144, + "grad_norm": 1.1898125410079956, + "learning_rate": 3.219599794766547e-06, + "loss": 0.0171, + "step": 10040 + }, + { + "epoch": 0.06445757825502753, + "grad_norm": 1.0876600742340088, + "learning_rate": 3.2228065674704977e-06, + "loss": 0.0163, + "step": 10050 + }, + { + "epoch": 0.06452171514881362, + "grad_norm": 0.40626418590545654, + "learning_rate": 3.2260133401744488e-06, + "loss": 0.0156, + "step": 10060 + }, + { + "epoch": 0.06458585204259973, + "grad_norm": 0.5721704363822937, + "learning_rate": 3.2292201128783995e-06, + "loss": 0.0128, + "step": 10070 + }, + { + "epoch": 0.06464998893638582, + "grad_norm": 1.0086840391159058, + "learning_rate": 3.2324268855823506e-06, + "loss": 0.0173, + "step": 10080 + }, + { + "epoch": 0.06471412583017191, + "grad_norm": 0.7133669853210449, + "learning_rate": 3.235633658286301e-06, + "loss": 0.0164, + "step": 10090 + }, + { + "epoch": 0.06477826272395802, + "grad_norm": 0.36082619428634644, + "learning_rate": 3.2388404309902515e-06, + "loss": 0.0138, + "step": 10100 + }, + { + "epoch": 0.06484239961774411, + "grad_norm": 0.7135457992553711, + "learning_rate": 3.2420472036942026e-06, + "loss": 0.0128, + "step": 10110 + }, + { + "epoch": 0.06490653651153021, + "grad_norm": 0.5620806217193604, + "learning_rate": 3.2452539763981533e-06, + "loss": 0.0114, + "step": 10120 + }, + { + "epoch": 0.0649706734053163, + "grad_norm": 0.7917492389678955, + "learning_rate": 3.248460749102104e-06, + "loss": 0.0137, + "step": 10130 + }, + { + "epoch": 0.06503481029910241, + "grad_norm": 0.8087460398674011, + "learning_rate": 3.2516675218060543e-06, + "loss": 0.0079, + "step": 10140 + }, + { + "epoch": 0.0650989471928885, + "grad_norm": 0.7026078104972839, + "learning_rate": 3.2548742945100054e-06, + "loss": 0.0139, + "step": 10150 + }, + { + "epoch": 0.0651630840866746, + "grad_norm": 0.4520063102245331, + "learning_rate": 3.258081067213956e-06, + "loss": 0.0125, + "step": 10160 + }, + { + "epoch": 0.0652272209804607, + "grad_norm": 0.7053927183151245, + "learning_rate": 3.261287839917907e-06, + "loss": 0.014, + "step": 10170 + }, + { + "epoch": 0.0652913578742468, + "grad_norm": 0.6258370280265808, + "learning_rate": 3.2644946126218575e-06, + "loss": 0.0128, + "step": 10180 + }, + { + "epoch": 0.06535549476803289, + "grad_norm": 0.3676292598247528, + "learning_rate": 3.267701385325808e-06, + "loss": 0.0114, + "step": 10190 + }, + { + "epoch": 0.06541963166181898, + "grad_norm": 0.8146242499351501, + "learning_rate": 3.2709081580297593e-06, + "loss": 0.0117, + "step": 10200 + }, + { + "epoch": 0.06548376855560509, + "grad_norm": 0.7208275198936462, + "learning_rate": 3.27411493073371e-06, + "loss": 0.0102, + "step": 10210 + }, + { + "epoch": 0.06554790544939118, + "grad_norm": 0.8345526456832886, + "learning_rate": 3.277321703437661e-06, + "loss": 0.0167, + "step": 10220 + }, + { + "epoch": 0.06561204234317727, + "grad_norm": 0.7537758946418762, + "learning_rate": 3.2805284761416113e-06, + "loss": 0.0167, + "step": 10230 + }, + { + "epoch": 0.06567617923696338, + "grad_norm": 1.0677050352096558, + "learning_rate": 3.283735248845562e-06, + "loss": 0.0152, + "step": 10240 + }, + { + "epoch": 0.06574031613074947, + "grad_norm": 0.5786572098731995, + "learning_rate": 3.2869420215495127e-06, + "loss": 0.0128, + "step": 10250 + }, + { + "epoch": 0.06580445302453557, + "grad_norm": 0.6964079737663269, + "learning_rate": 3.290148794253464e-06, + "loss": 0.0168, + "step": 10260 + }, + { + "epoch": 0.06586858991832166, + "grad_norm": 0.7183060646057129, + "learning_rate": 3.293355566957414e-06, + "loss": 0.015, + "step": 10270 + }, + { + "epoch": 0.06593272681210777, + "grad_norm": 0.7558525204658508, + "learning_rate": 3.296562339661365e-06, + "loss": 0.013, + "step": 10280 + }, + { + "epoch": 0.06599686370589386, + "grad_norm": 0.6874273419380188, + "learning_rate": 3.299769112365316e-06, + "loss": 0.0135, + "step": 10290 + }, + { + "epoch": 0.06606100059967995, + "grad_norm": 0.6457377672195435, + "learning_rate": 3.3029758850692666e-06, + "loss": 0.0117, + "step": 10300 + }, + { + "epoch": 0.06612513749346606, + "grad_norm": 1.20498526096344, + "learning_rate": 3.3061826577732177e-06, + "loss": 0.0151, + "step": 10310 + }, + { + "epoch": 0.06618927438725215, + "grad_norm": 0.7991650700569153, + "learning_rate": 3.309389430477168e-06, + "loss": 0.0171, + "step": 10320 + }, + { + "epoch": 0.06625341128103825, + "grad_norm": 0.8451363444328308, + "learning_rate": 3.3125962031811187e-06, + "loss": 0.0136, + "step": 10330 + }, + { + "epoch": 0.06631754817482434, + "grad_norm": 0.6329699158668518, + "learning_rate": 3.3158029758850698e-06, + "loss": 0.0135, + "step": 10340 + }, + { + "epoch": 0.06638168506861045, + "grad_norm": 1.151140570640564, + "learning_rate": 3.3190097485890205e-06, + "loss": 0.0127, + "step": 10350 + }, + { + "epoch": 0.06644582196239654, + "grad_norm": 0.7782281637191772, + "learning_rate": 3.3222165212929707e-06, + "loss": 0.0175, + "step": 10360 + }, + { + "epoch": 0.06650995885618263, + "grad_norm": 0.5035085082054138, + "learning_rate": 3.325423293996922e-06, + "loss": 0.0108, + "step": 10370 + }, + { + "epoch": 0.06657409574996874, + "grad_norm": 0.6519835591316223, + "learning_rate": 3.3286300667008725e-06, + "loss": 0.0133, + "step": 10380 + }, + { + "epoch": 0.06663823264375483, + "grad_norm": 0.8411291241645813, + "learning_rate": 3.3318368394048232e-06, + "loss": 0.0133, + "step": 10390 + }, + { + "epoch": 0.06670236953754093, + "grad_norm": 0.943038821220398, + "learning_rate": 3.3350436121087743e-06, + "loss": 0.0109, + "step": 10400 + }, + { + "epoch": 0.06676650643132702, + "grad_norm": 0.9379919767379761, + "learning_rate": 3.3382503848127246e-06, + "loss": 0.014, + "step": 10410 + }, + { + "epoch": 0.06683064332511313, + "grad_norm": 0.35660219192504883, + "learning_rate": 3.3414571575166753e-06, + "loss": 0.0145, + "step": 10420 + }, + { + "epoch": 0.06689478021889922, + "grad_norm": 0.6724391579627991, + "learning_rate": 3.3446639302206264e-06, + "loss": 0.0147, + "step": 10430 + }, + { + "epoch": 0.06695891711268531, + "grad_norm": 0.7699723839759827, + "learning_rate": 3.347870702924577e-06, + "loss": 0.0142, + "step": 10440 + }, + { + "epoch": 0.06702305400647142, + "grad_norm": 0.9424741268157959, + "learning_rate": 3.3510774756285274e-06, + "loss": 0.0152, + "step": 10450 + }, + { + "epoch": 0.06708719090025751, + "grad_norm": 1.0261833667755127, + "learning_rate": 3.3542842483324785e-06, + "loss": 0.017, + "step": 10460 + }, + { + "epoch": 0.0671513277940436, + "grad_norm": 0.460065096616745, + "learning_rate": 3.357491021036429e-06, + "loss": 0.011, + "step": 10470 + }, + { + "epoch": 0.0672154646878297, + "grad_norm": 0.33964595198631287, + "learning_rate": 3.3606977937403803e-06, + "loss": 0.0178, + "step": 10480 + }, + { + "epoch": 0.0672796015816158, + "grad_norm": 0.44423216581344604, + "learning_rate": 3.363904566444331e-06, + "loss": 0.0125, + "step": 10490 + }, + { + "epoch": 0.0673437384754019, + "grad_norm": 0.8569762110710144, + "learning_rate": 3.3671113391482812e-06, + "loss": 0.0079, + "step": 10500 + }, + { + "epoch": 0.06740787536918799, + "grad_norm": 0.7153170108795166, + "learning_rate": 3.370318111852232e-06, + "loss": 0.0174, + "step": 10510 + }, + { + "epoch": 0.0674720122629741, + "grad_norm": 0.6602340936660767, + "learning_rate": 3.373524884556183e-06, + "loss": 0.0116, + "step": 10520 + }, + { + "epoch": 0.06753614915676019, + "grad_norm": 1.0937711000442505, + "learning_rate": 3.3767316572601337e-06, + "loss": 0.0103, + "step": 10530 + }, + { + "epoch": 0.06760028605054628, + "grad_norm": 0.680429220199585, + "learning_rate": 3.379938429964084e-06, + "loss": 0.0161, + "step": 10540 + }, + { + "epoch": 0.06766442294433238, + "grad_norm": 0.8192482590675354, + "learning_rate": 3.383145202668035e-06, + "loss": 0.0171, + "step": 10550 + }, + { + "epoch": 0.06772855983811849, + "grad_norm": 0.48834553360939026, + "learning_rate": 3.386351975371986e-06, + "loss": 0.019, + "step": 10560 + }, + { + "epoch": 0.06779269673190458, + "grad_norm": 0.8775561451911926, + "learning_rate": 3.389558748075937e-06, + "loss": 0.0157, + "step": 10570 + }, + { + "epoch": 0.06785683362569067, + "grad_norm": 0.7037835717201233, + "learning_rate": 3.3927655207798876e-06, + "loss": 0.0122, + "step": 10580 + }, + { + "epoch": 0.06792097051947676, + "grad_norm": 0.9321784973144531, + "learning_rate": 3.395972293483838e-06, + "loss": 0.0138, + "step": 10590 + }, + { + "epoch": 0.06798510741326287, + "grad_norm": 0.6849508881568909, + "learning_rate": 3.399179066187789e-06, + "loss": 0.0103, + "step": 10600 + }, + { + "epoch": 0.06804924430704896, + "grad_norm": 0.6341637969017029, + "learning_rate": 3.4023858388917397e-06, + "loss": 0.0137, + "step": 10610 + }, + { + "epoch": 0.06811338120083506, + "grad_norm": 0.5561732053756714, + "learning_rate": 3.4055926115956904e-06, + "loss": 0.0169, + "step": 10620 + }, + { + "epoch": 0.06817751809462116, + "grad_norm": 0.7867043614387512, + "learning_rate": 3.408799384299641e-06, + "loss": 0.0142, + "step": 10630 + }, + { + "epoch": 0.06824165498840726, + "grad_norm": 0.40953245759010315, + "learning_rate": 3.4120061570035917e-06, + "loss": 0.0149, + "step": 10640 + }, + { + "epoch": 0.06830579188219335, + "grad_norm": 1.0042667388916016, + "learning_rate": 3.4152129297075424e-06, + "loss": 0.0178, + "step": 10650 + }, + { + "epoch": 0.06836992877597944, + "grad_norm": 0.5220630764961243, + "learning_rate": 3.4184197024114935e-06, + "loss": 0.011, + "step": 10660 + }, + { + "epoch": 0.06843406566976555, + "grad_norm": 0.8816062808036804, + "learning_rate": 3.4216264751154442e-06, + "loss": 0.0162, + "step": 10670 + }, + { + "epoch": 0.06849820256355164, + "grad_norm": 0.3584265112876892, + "learning_rate": 3.4248332478193945e-06, + "loss": 0.0123, + "step": 10680 + }, + { + "epoch": 0.06856233945733774, + "grad_norm": 0.9158117175102234, + "learning_rate": 3.4280400205233456e-06, + "loss": 0.0125, + "step": 10690 + }, + { + "epoch": 0.06862647635112384, + "grad_norm": 0.6624887585639954, + "learning_rate": 3.4312467932272963e-06, + "loss": 0.0131, + "step": 10700 + }, + { + "epoch": 0.06869061324490994, + "grad_norm": 0.5710936188697815, + "learning_rate": 3.4344535659312474e-06, + "loss": 0.0129, + "step": 10710 + }, + { + "epoch": 0.06875475013869603, + "grad_norm": 0.5341808199882507, + "learning_rate": 3.4376603386351977e-06, + "loss": 0.0189, + "step": 10720 + }, + { + "epoch": 0.06881888703248212, + "grad_norm": 0.9151763916015625, + "learning_rate": 3.4408671113391484e-06, + "loss": 0.0122, + "step": 10730 + }, + { + "epoch": 0.06888302392626823, + "grad_norm": 1.0330543518066406, + "learning_rate": 3.4440738840430995e-06, + "loss": 0.0101, + "step": 10740 + }, + { + "epoch": 0.06894716082005432, + "grad_norm": 0.3488490879535675, + "learning_rate": 3.44728065674705e-06, + "loss": 0.0128, + "step": 10750 + }, + { + "epoch": 0.06901129771384042, + "grad_norm": 0.4824540615081787, + "learning_rate": 3.450487429451001e-06, + "loss": 0.0149, + "step": 10760 + }, + { + "epoch": 0.06907543460762652, + "grad_norm": 0.7239876389503479, + "learning_rate": 3.453694202154951e-06, + "loss": 0.0111, + "step": 10770 + }, + { + "epoch": 0.06913957150141262, + "grad_norm": 0.7549890875816345, + "learning_rate": 3.4569009748589022e-06, + "loss": 0.0149, + "step": 10780 + }, + { + "epoch": 0.06920370839519871, + "grad_norm": 0.44856587052345276, + "learning_rate": 3.460107747562853e-06, + "loss": 0.0122, + "step": 10790 + }, + { + "epoch": 0.0692678452889848, + "grad_norm": 0.9687082171440125, + "learning_rate": 3.463314520266804e-06, + "loss": 0.0144, + "step": 10800 + }, + { + "epoch": 0.06933198218277091, + "grad_norm": 0.6999725699424744, + "learning_rate": 3.4665212929707547e-06, + "loss": 0.0116, + "step": 10810 + }, + { + "epoch": 0.069396119076557, + "grad_norm": 0.7693178653717041, + "learning_rate": 3.469728065674705e-06, + "loss": 0.0151, + "step": 10820 + }, + { + "epoch": 0.0694602559703431, + "grad_norm": 0.5353623032569885, + "learning_rate": 3.472934838378656e-06, + "loss": 0.0128, + "step": 10830 + }, + { + "epoch": 0.0695243928641292, + "grad_norm": 0.7831609845161438, + "learning_rate": 3.476141611082607e-06, + "loss": 0.0128, + "step": 10840 + }, + { + "epoch": 0.0695885297579153, + "grad_norm": 0.5129367113113403, + "learning_rate": 3.479348383786558e-06, + "loss": 0.0169, + "step": 10850 + }, + { + "epoch": 0.06965266665170139, + "grad_norm": 0.5755190849304199, + "learning_rate": 3.482555156490508e-06, + "loss": 0.0134, + "step": 10860 + }, + { + "epoch": 0.06971680354548748, + "grad_norm": 0.9079647660255432, + "learning_rate": 3.485761929194459e-06, + "loss": 0.0134, + "step": 10870 + }, + { + "epoch": 0.06978094043927359, + "grad_norm": 0.889003574848175, + "learning_rate": 3.4889687018984096e-06, + "loss": 0.0169, + "step": 10880 + }, + { + "epoch": 0.06984507733305968, + "grad_norm": 0.8207578659057617, + "learning_rate": 3.4921754746023607e-06, + "loss": 0.0159, + "step": 10890 + }, + { + "epoch": 0.06990921422684578, + "grad_norm": 0.5410775542259216, + "learning_rate": 3.4953822473063114e-06, + "loss": 0.0145, + "step": 10900 + }, + { + "epoch": 0.06997335112063188, + "grad_norm": 0.848727822303772, + "learning_rate": 3.4985890200102616e-06, + "loss": 0.0172, + "step": 10910 + }, + { + "epoch": 0.07003748801441798, + "grad_norm": 0.6241754293441772, + "learning_rate": 3.5017957927142127e-06, + "loss": 0.0127, + "step": 10920 + }, + { + "epoch": 0.07010162490820407, + "grad_norm": 0.6075505614280701, + "learning_rate": 3.5050025654181634e-06, + "loss": 0.011, + "step": 10930 + }, + { + "epoch": 0.07016576180199016, + "grad_norm": 0.86773282289505, + "learning_rate": 3.5082093381221145e-06, + "loss": 0.0142, + "step": 10940 + }, + { + "epoch": 0.07022989869577627, + "grad_norm": 0.6941466927528381, + "learning_rate": 3.511416110826065e-06, + "loss": 0.0133, + "step": 10950 + }, + { + "epoch": 0.07029403558956236, + "grad_norm": 0.7797922492027283, + "learning_rate": 3.5146228835300155e-06, + "loss": 0.01, + "step": 10960 + }, + { + "epoch": 0.07035817248334846, + "grad_norm": 0.8995891809463501, + "learning_rate": 3.5178296562339666e-06, + "loss": 0.0129, + "step": 10970 + }, + { + "epoch": 0.07042230937713456, + "grad_norm": 0.8827101588249207, + "learning_rate": 3.5210364289379173e-06, + "loss": 0.0116, + "step": 10980 + }, + { + "epoch": 0.07048644627092066, + "grad_norm": 0.3860144317150116, + "learning_rate": 3.524243201641868e-06, + "loss": 0.0109, + "step": 10990 + }, + { + "epoch": 0.07055058316470675, + "grad_norm": 0.4791621267795563, + "learning_rate": 3.5274499743458187e-06, + "loss": 0.0109, + "step": 11000 + }, + { + "epoch": 0.07061472005849284, + "grad_norm": 0.6825062036514282, + "learning_rate": 3.5306567470497694e-06, + "loss": 0.0121, + "step": 11010 + }, + { + "epoch": 0.07067885695227895, + "grad_norm": 1.4695130586624146, + "learning_rate": 3.53386351975372e-06, + "loss": 0.0131, + "step": 11020 + }, + { + "epoch": 0.07074299384606504, + "grad_norm": 0.32003363966941833, + "learning_rate": 3.537070292457671e-06, + "loss": 0.0148, + "step": 11030 + }, + { + "epoch": 0.07080713073985113, + "grad_norm": 0.6566299200057983, + "learning_rate": 3.5402770651616214e-06, + "loss": 0.0124, + "step": 11040 + }, + { + "epoch": 0.07087126763363724, + "grad_norm": 0.7709537744522095, + "learning_rate": 3.543483837865572e-06, + "loss": 0.0137, + "step": 11050 + }, + { + "epoch": 0.07093540452742333, + "grad_norm": 0.46081745624542236, + "learning_rate": 3.5466906105695232e-06, + "loss": 0.0108, + "step": 11060 + }, + { + "epoch": 0.07099954142120943, + "grad_norm": 0.5471722483634949, + "learning_rate": 3.549897383273474e-06, + "loss": 0.0115, + "step": 11070 + }, + { + "epoch": 0.07106367831499552, + "grad_norm": 0.9127947092056274, + "learning_rate": 3.553104155977425e-06, + "loss": 0.016, + "step": 11080 + }, + { + "epoch": 0.07112781520878163, + "grad_norm": 0.6626812219619751, + "learning_rate": 3.5563109286813753e-06, + "loss": 0.0165, + "step": 11090 + }, + { + "epoch": 0.07119195210256772, + "grad_norm": 0.9916837215423584, + "learning_rate": 3.559517701385326e-06, + "loss": 0.0157, + "step": 11100 + }, + { + "epoch": 0.07125608899635381, + "grad_norm": 0.5472110509872437, + "learning_rate": 3.562724474089277e-06, + "loss": 0.0111, + "step": 11110 + }, + { + "epoch": 0.07132022589013992, + "grad_norm": 0.5907249450683594, + "learning_rate": 3.565931246793228e-06, + "loss": 0.0133, + "step": 11120 + }, + { + "epoch": 0.07138436278392601, + "grad_norm": 0.7342523336410522, + "learning_rate": 3.569138019497178e-06, + "loss": 0.013, + "step": 11130 + }, + { + "epoch": 0.07144849967771211, + "grad_norm": 0.8366237282752991, + "learning_rate": 3.5723447922011288e-06, + "loss": 0.0086, + "step": 11140 + }, + { + "epoch": 0.0715126365714982, + "grad_norm": 0.23374402523040771, + "learning_rate": 3.57555156490508e-06, + "loss": 0.0106, + "step": 11150 + }, + { + "epoch": 0.07157677346528431, + "grad_norm": 0.6893095970153809, + "learning_rate": 3.5787583376090306e-06, + "loss": 0.0168, + "step": 11160 + }, + { + "epoch": 0.0716409103590704, + "grad_norm": 0.6772871017456055, + "learning_rate": 3.5819651103129817e-06, + "loss": 0.0111, + "step": 11170 + }, + { + "epoch": 0.0717050472528565, + "grad_norm": 0.7183294296264648, + "learning_rate": 3.585171883016932e-06, + "loss": 0.0182, + "step": 11180 + }, + { + "epoch": 0.0717691841466426, + "grad_norm": 0.7910516262054443, + "learning_rate": 3.5883786557208826e-06, + "loss": 0.0176, + "step": 11190 + }, + { + "epoch": 0.0718333210404287, + "grad_norm": 0.6576840877532959, + "learning_rate": 3.5915854284248337e-06, + "loss": 0.0107, + "step": 11200 + }, + { + "epoch": 0.07189745793421479, + "grad_norm": 0.470316618680954, + "learning_rate": 3.5947922011287844e-06, + "loss": 0.0083, + "step": 11210 + }, + { + "epoch": 0.07196159482800088, + "grad_norm": 0.8724121451377869, + "learning_rate": 3.5979989738327347e-06, + "loss": 0.0135, + "step": 11220 + }, + { + "epoch": 0.07202573172178699, + "grad_norm": 0.9997464418411255, + "learning_rate": 3.601205746536686e-06, + "loss": 0.0134, + "step": 11230 + }, + { + "epoch": 0.07208986861557308, + "grad_norm": 0.8020079731941223, + "learning_rate": 3.6044125192406365e-06, + "loss": 0.0157, + "step": 11240 + }, + { + "epoch": 0.07215400550935917, + "grad_norm": 0.3713897466659546, + "learning_rate": 3.607619291944587e-06, + "loss": 0.0083, + "step": 11250 + }, + { + "epoch": 0.07221814240314528, + "grad_norm": 0.7209635972976685, + "learning_rate": 3.6108260646485383e-06, + "loss": 0.0122, + "step": 11260 + }, + { + "epoch": 0.07228227929693137, + "grad_norm": 0.40024682879447937, + "learning_rate": 3.6140328373524886e-06, + "loss": 0.0126, + "step": 11270 + }, + { + "epoch": 0.07234641619071747, + "grad_norm": 0.8981276154518127, + "learning_rate": 3.6172396100564393e-06, + "loss": 0.018, + "step": 11280 + }, + { + "epoch": 0.07241055308450356, + "grad_norm": 0.822223424911499, + "learning_rate": 3.6204463827603904e-06, + "loss": 0.0131, + "step": 11290 + }, + { + "epoch": 0.07247468997828967, + "grad_norm": 0.5042352676391602, + "learning_rate": 3.623653155464341e-06, + "loss": 0.0098, + "step": 11300 + }, + { + "epoch": 0.07253882687207576, + "grad_norm": 0.5179206728935242, + "learning_rate": 3.6268599281682913e-06, + "loss": 0.0114, + "step": 11310 + }, + { + "epoch": 0.07260296376586185, + "grad_norm": 0.6678454875946045, + "learning_rate": 3.6300667008722424e-06, + "loss": 0.0153, + "step": 11320 + }, + { + "epoch": 0.07266710065964795, + "grad_norm": 0.52106773853302, + "learning_rate": 3.633273473576193e-06, + "loss": 0.0116, + "step": 11330 + }, + { + "epoch": 0.07273123755343405, + "grad_norm": 0.9093338847160339, + "learning_rate": 3.6364802462801442e-06, + "loss": 0.0111, + "step": 11340 + }, + { + "epoch": 0.07279537444722015, + "grad_norm": 0.8999060392379761, + "learning_rate": 3.639687018984095e-06, + "loss": 0.0171, + "step": 11350 + }, + { + "epoch": 0.07285951134100624, + "grad_norm": 1.9983782768249512, + "learning_rate": 3.642893791688045e-06, + "loss": 0.0146, + "step": 11360 + }, + { + "epoch": 0.07292364823479235, + "grad_norm": 0.3759327530860901, + "learning_rate": 3.6461005643919963e-06, + "loss": 0.0124, + "step": 11370 + }, + { + "epoch": 0.07298778512857844, + "grad_norm": 0.7182804942131042, + "learning_rate": 3.649307337095947e-06, + "loss": 0.0125, + "step": 11380 + }, + { + "epoch": 0.07305192202236453, + "grad_norm": 0.8969981670379639, + "learning_rate": 3.6525141097998977e-06, + "loss": 0.0152, + "step": 11390 + }, + { + "epoch": 0.07311605891615063, + "grad_norm": 0.5347842574119568, + "learning_rate": 3.655720882503848e-06, + "loss": 0.0108, + "step": 11400 + }, + { + "epoch": 0.07318019580993673, + "grad_norm": 0.6039052605628967, + "learning_rate": 3.658927655207799e-06, + "loss": 0.0134, + "step": 11410 + }, + { + "epoch": 0.07324433270372283, + "grad_norm": 0.42864710092544556, + "learning_rate": 3.6621344279117498e-06, + "loss": 0.0145, + "step": 11420 + }, + { + "epoch": 0.07330846959750892, + "grad_norm": 1.079379677772522, + "learning_rate": 3.665341200615701e-06, + "loss": 0.011, + "step": 11430 + }, + { + "epoch": 0.07337260649129503, + "grad_norm": 0.4444161355495453, + "learning_rate": 3.6685479733196516e-06, + "loss": 0.0144, + "step": 11440 + }, + { + "epoch": 0.07343674338508112, + "grad_norm": 0.4957866370677948, + "learning_rate": 3.671754746023602e-06, + "loss": 0.0124, + "step": 11450 + }, + { + "epoch": 0.07350088027886721, + "grad_norm": 1.4191908836364746, + "learning_rate": 3.674961518727553e-06, + "loss": 0.0153, + "step": 11460 + }, + { + "epoch": 0.0735650171726533, + "grad_norm": 0.5625303387641907, + "learning_rate": 3.6781682914315036e-06, + "loss": 0.0142, + "step": 11470 + }, + { + "epoch": 0.07362915406643941, + "grad_norm": 1.1107635498046875, + "learning_rate": 3.6813750641354547e-06, + "loss": 0.0149, + "step": 11480 + }, + { + "epoch": 0.0736932909602255, + "grad_norm": 0.32591041922569275, + "learning_rate": 3.6845818368394054e-06, + "loss": 0.0133, + "step": 11490 + }, + { + "epoch": 0.0737574278540116, + "grad_norm": 0.37027707695961, + "learning_rate": 3.6877886095433557e-06, + "loss": 0.0137, + "step": 11500 + }, + { + "epoch": 0.0738215647477977, + "grad_norm": 0.7116591334342957, + "learning_rate": 3.6909953822473064e-06, + "loss": 0.0098, + "step": 11510 + }, + { + "epoch": 0.0738857016415838, + "grad_norm": 0.4879796802997589, + "learning_rate": 3.6942021549512575e-06, + "loss": 0.0121, + "step": 11520 + }, + { + "epoch": 0.07394983853536989, + "grad_norm": 0.46357476711273193, + "learning_rate": 3.697408927655208e-06, + "loss": 0.008, + "step": 11530 + }, + { + "epoch": 0.07401397542915598, + "grad_norm": 0.6622515320777893, + "learning_rate": 3.7006157003591585e-06, + "loss": 0.0087, + "step": 11540 + }, + { + "epoch": 0.07407811232294209, + "grad_norm": 1.0612263679504395, + "learning_rate": 3.7038224730631096e-06, + "loss": 0.0114, + "step": 11550 + }, + { + "epoch": 0.07414224921672818, + "grad_norm": 0.630497932434082, + "learning_rate": 3.7070292457670603e-06, + "loss": 0.0146, + "step": 11560 + }, + { + "epoch": 0.07420638611051428, + "grad_norm": 0.5127402544021606, + "learning_rate": 3.7102360184710114e-06, + "loss": 0.0094, + "step": 11570 + }, + { + "epoch": 0.07427052300430038, + "grad_norm": 0.39021891355514526, + "learning_rate": 3.713442791174962e-06, + "loss": 0.0112, + "step": 11580 + }, + { + "epoch": 0.07433465989808648, + "grad_norm": 0.6863677501678467, + "learning_rate": 3.7166495638789123e-06, + "loss": 0.0153, + "step": 11590 + }, + { + "epoch": 0.07439879679187257, + "grad_norm": 0.7049649953842163, + "learning_rate": 3.7198563365828634e-06, + "loss": 0.0145, + "step": 11600 + }, + { + "epoch": 0.07446293368565866, + "grad_norm": 0.5492684841156006, + "learning_rate": 3.723063109286814e-06, + "loss": 0.0098, + "step": 11610 + }, + { + "epoch": 0.07452707057944477, + "grad_norm": 0.5002531409263611, + "learning_rate": 3.726269881990765e-06, + "loss": 0.0093, + "step": 11620 + }, + { + "epoch": 0.07459120747323086, + "grad_norm": 0.783821702003479, + "learning_rate": 3.7294766546947155e-06, + "loss": 0.0128, + "step": 11630 + }, + { + "epoch": 0.07465534436701696, + "grad_norm": 0.539421021938324, + "learning_rate": 3.732683427398666e-06, + "loss": 0.0145, + "step": 11640 + }, + { + "epoch": 0.07471948126080306, + "grad_norm": 0.740747332572937, + "learning_rate": 3.735890200102617e-06, + "loss": 0.0136, + "step": 11650 + }, + { + "epoch": 0.07478361815458916, + "grad_norm": 0.5146269798278809, + "learning_rate": 3.739096972806568e-06, + "loss": 0.0139, + "step": 11660 + }, + { + "epoch": 0.07484775504837525, + "grad_norm": 0.39279046654701233, + "learning_rate": 3.7423037455105187e-06, + "loss": 0.0117, + "step": 11670 + }, + { + "epoch": 0.07491189194216134, + "grad_norm": 0.3348548114299774, + "learning_rate": 3.745510518214469e-06, + "loss": 0.0144, + "step": 11680 + }, + { + "epoch": 0.07497602883594745, + "grad_norm": 0.9481940865516663, + "learning_rate": 3.74871729091842e-06, + "loss": 0.0122, + "step": 11690 + }, + { + "epoch": 0.07504016572973354, + "grad_norm": 0.5905875563621521, + "learning_rate": 3.7519240636223708e-06, + "loss": 0.0084, + "step": 11700 + }, + { + "epoch": 0.07510430262351964, + "grad_norm": 0.7137616276741028, + "learning_rate": 3.755130836326322e-06, + "loss": 0.0129, + "step": 11710 + }, + { + "epoch": 0.07516843951730574, + "grad_norm": 0.6572869420051575, + "learning_rate": 3.758337609030272e-06, + "loss": 0.0177, + "step": 11720 + }, + { + "epoch": 0.07523257641109184, + "grad_norm": 0.6707942485809326, + "learning_rate": 3.761544381734223e-06, + "loss": 0.0119, + "step": 11730 + }, + { + "epoch": 0.07529671330487793, + "grad_norm": 0.3048764169216156, + "learning_rate": 3.764751154438174e-06, + "loss": 0.0164, + "step": 11740 + }, + { + "epoch": 0.07536085019866402, + "grad_norm": 0.8465550541877747, + "learning_rate": 3.7679579271421246e-06, + "loss": 0.0121, + "step": 11750 + }, + { + "epoch": 0.07542498709245013, + "grad_norm": 1.304787516593933, + "learning_rate": 3.7711646998460753e-06, + "loss": 0.0099, + "step": 11760 + }, + { + "epoch": 0.07548912398623622, + "grad_norm": 0.7193863987922668, + "learning_rate": 3.7743714725500256e-06, + "loss": 0.0106, + "step": 11770 + }, + { + "epoch": 0.07555326088002232, + "grad_norm": 0.6778226494789124, + "learning_rate": 3.7775782452539767e-06, + "loss": 0.01, + "step": 11780 + }, + { + "epoch": 0.07561739777380842, + "grad_norm": 0.7179293036460876, + "learning_rate": 3.7807850179579274e-06, + "loss": 0.0138, + "step": 11790 + }, + { + "epoch": 0.07568153466759452, + "grad_norm": 0.3168581426143646, + "learning_rate": 3.7839917906618785e-06, + "loss": 0.0111, + "step": 11800 + }, + { + "epoch": 0.07574567156138061, + "grad_norm": 0.6792516708374023, + "learning_rate": 3.7871985633658288e-06, + "loss": 0.0124, + "step": 11810 + }, + { + "epoch": 0.0758098084551667, + "grad_norm": 0.9062817096710205, + "learning_rate": 3.7904053360697795e-06, + "loss": 0.0097, + "step": 11820 + }, + { + "epoch": 0.07587394534895281, + "grad_norm": 0.5379997491836548, + "learning_rate": 3.7936121087737306e-06, + "loss": 0.0115, + "step": 11830 + }, + { + "epoch": 0.0759380822427389, + "grad_norm": 0.6321361064910889, + "learning_rate": 3.7968188814776813e-06, + "loss": 0.0112, + "step": 11840 + }, + { + "epoch": 0.076002219136525, + "grad_norm": 0.6613107919692993, + "learning_rate": 3.800025654181632e-06, + "loss": 0.0107, + "step": 11850 + }, + { + "epoch": 0.0760663560303111, + "grad_norm": 0.433931440114975, + "learning_rate": 3.8032324268855826e-06, + "loss": 0.013, + "step": 11860 + }, + { + "epoch": 0.0761304929240972, + "grad_norm": 0.5778363347053528, + "learning_rate": 3.8064391995895333e-06, + "loss": 0.016, + "step": 11870 + }, + { + "epoch": 0.07619462981788329, + "grad_norm": 0.47336652874946594, + "learning_rate": 3.809645972293484e-06, + "loss": 0.0091, + "step": 11880 + }, + { + "epoch": 0.07625876671166938, + "grad_norm": 0.691404402256012, + "learning_rate": 3.812852744997435e-06, + "loss": 0.0092, + "step": 11890 + }, + { + "epoch": 0.07632290360545549, + "grad_norm": 0.5509531497955322, + "learning_rate": 3.816059517701385e-06, + "loss": 0.0118, + "step": 11900 + }, + { + "epoch": 0.07638704049924158, + "grad_norm": 1.3395763635635376, + "learning_rate": 3.819266290405336e-06, + "loss": 0.0117, + "step": 11910 + }, + { + "epoch": 0.07645117739302768, + "grad_norm": 0.6070330739021301, + "learning_rate": 3.822473063109287e-06, + "loss": 0.0102, + "step": 11920 + }, + { + "epoch": 0.07651531428681378, + "grad_norm": 9.7800931930542, + "learning_rate": 3.825679835813238e-06, + "loss": 0.01, + "step": 11930 + }, + { + "epoch": 0.07657945118059988, + "grad_norm": 0.5915313959121704, + "learning_rate": 3.828886608517189e-06, + "loss": 0.016, + "step": 11940 + }, + { + "epoch": 0.07664358807438597, + "grad_norm": 0.6098341345787048, + "learning_rate": 3.832093381221139e-06, + "loss": 0.0144, + "step": 11950 + }, + { + "epoch": 0.07670772496817206, + "grad_norm": 0.6589644551277161, + "learning_rate": 3.83530015392509e-06, + "loss": 0.0141, + "step": 11960 + }, + { + "epoch": 0.07677186186195817, + "grad_norm": 0.26208600401878357, + "learning_rate": 3.838506926629041e-06, + "loss": 0.01, + "step": 11970 + }, + { + "epoch": 0.07683599875574426, + "grad_norm": 0.5078624486923218, + "learning_rate": 3.841713699332991e-06, + "loss": 0.0149, + "step": 11980 + }, + { + "epoch": 0.07690013564953035, + "grad_norm": 0.7186983227729797, + "learning_rate": 3.8449204720369425e-06, + "loss": 0.0086, + "step": 11990 + }, + { + "epoch": 0.07696427254331646, + "grad_norm": 0.6783637404441833, + "learning_rate": 3.848127244740893e-06, + "loss": 0.0142, + "step": 12000 + }, + { + "epoch": 0.07702840943710255, + "grad_norm": 0.7996017932891846, + "learning_rate": 3.851334017444844e-06, + "loss": 0.0129, + "step": 12010 + }, + { + "epoch": 0.07709254633088865, + "grad_norm": 1.3191332817077637, + "learning_rate": 3.854540790148795e-06, + "loss": 0.0099, + "step": 12020 + }, + { + "epoch": 0.07715668322467474, + "grad_norm": 1.0860892534255981, + "learning_rate": 3.857747562852745e-06, + "loss": 0.0115, + "step": 12030 + }, + { + "epoch": 0.07722082011846085, + "grad_norm": 0.4692663550376892, + "learning_rate": 3.8609543355566955e-06, + "loss": 0.0081, + "step": 12040 + }, + { + "epoch": 0.07728495701224694, + "grad_norm": 0.9405308961868286, + "learning_rate": 3.864161108260647e-06, + "loss": 0.0146, + "step": 12050 + }, + { + "epoch": 0.07734909390603303, + "grad_norm": 0.7099402546882629, + "learning_rate": 3.867367880964598e-06, + "loss": 0.012, + "step": 12060 + }, + { + "epoch": 0.07741323079981913, + "grad_norm": 0.7617799639701843, + "learning_rate": 3.870574653668549e-06, + "loss": 0.0142, + "step": 12070 + }, + { + "epoch": 0.07747736769360523, + "grad_norm": 0.8338746428489685, + "learning_rate": 3.873781426372499e-06, + "loss": 0.0111, + "step": 12080 + }, + { + "epoch": 0.07754150458739133, + "grad_norm": 0.9001340270042419, + "learning_rate": 3.876988199076449e-06, + "loss": 0.0103, + "step": 12090 + }, + { + "epoch": 0.07760564148117742, + "grad_norm": 0.6787715554237366, + "learning_rate": 3.8801949717804005e-06, + "loss": 0.008, + "step": 12100 + }, + { + "epoch": 0.07766977837496353, + "grad_norm": 0.7984828948974609, + "learning_rate": 3.883401744484352e-06, + "loss": 0.0122, + "step": 12110 + }, + { + "epoch": 0.07773391526874962, + "grad_norm": 0.8640044331550598, + "learning_rate": 3.886608517188302e-06, + "loss": 0.0121, + "step": 12120 + }, + { + "epoch": 0.07779805216253571, + "grad_norm": 0.4400997459888458, + "learning_rate": 3.889815289892253e-06, + "loss": 0.0149, + "step": 12130 + }, + { + "epoch": 0.0778621890563218, + "grad_norm": 1.0008609294891357, + "learning_rate": 3.893022062596203e-06, + "loss": 0.0121, + "step": 12140 + }, + { + "epoch": 0.07792632595010791, + "grad_norm": 0.7383760809898376, + "learning_rate": 3.896228835300154e-06, + "loss": 0.0111, + "step": 12150 + }, + { + "epoch": 0.077990462843894, + "grad_norm": 0.9170994758605957, + "learning_rate": 3.8994356080041054e-06, + "loss": 0.0099, + "step": 12160 + }, + { + "epoch": 0.0780545997376801, + "grad_norm": 0.700387179851532, + "learning_rate": 3.902642380708056e-06, + "loss": 0.0127, + "step": 12170 + }, + { + "epoch": 0.0781187366314662, + "grad_norm": 0.6357445120811462, + "learning_rate": 3.905849153412006e-06, + "loss": 0.0157, + "step": 12180 + }, + { + "epoch": 0.0781828735252523, + "grad_norm": 0.8678653836250305, + "learning_rate": 3.909055926115957e-06, + "loss": 0.0124, + "step": 12190 + }, + { + "epoch": 0.07824701041903839, + "grad_norm": 0.6583905220031738, + "learning_rate": 3.912262698819908e-06, + "loss": 0.0129, + "step": 12200 + }, + { + "epoch": 0.07831114731282449, + "grad_norm": 0.4002678692340851, + "learning_rate": 3.915469471523859e-06, + "loss": 0.0091, + "step": 12210 + }, + { + "epoch": 0.0783752842066106, + "grad_norm": 0.7340361475944519, + "learning_rate": 3.91867624422781e-06, + "loss": 0.011, + "step": 12220 + }, + { + "epoch": 0.07843942110039669, + "grad_norm": 0.5798015594482422, + "learning_rate": 3.92188301693176e-06, + "loss": 0.0159, + "step": 12230 + }, + { + "epoch": 0.07850355799418278, + "grad_norm": 0.6820700764656067, + "learning_rate": 3.925089789635711e-06, + "loss": 0.0111, + "step": 12240 + }, + { + "epoch": 0.07856769488796889, + "grad_norm": 0.5610462427139282, + "learning_rate": 3.928296562339662e-06, + "loss": 0.0089, + "step": 12250 + }, + { + "epoch": 0.07863183178175498, + "grad_norm": 0.39736512303352356, + "learning_rate": 3.931503335043612e-06, + "loss": 0.0104, + "step": 12260 + }, + { + "epoch": 0.07869596867554107, + "grad_norm": 0.6950397491455078, + "learning_rate": 3.934710107747563e-06, + "loss": 0.0101, + "step": 12270 + }, + { + "epoch": 0.07876010556932717, + "grad_norm": 0.8704506754875183, + "learning_rate": 3.937916880451514e-06, + "loss": 0.0131, + "step": 12280 + }, + { + "epoch": 0.07882424246311327, + "grad_norm": 0.7454317808151245, + "learning_rate": 3.941123653155465e-06, + "loss": 0.0112, + "step": 12290 + }, + { + "epoch": 0.07888837935689937, + "grad_norm": 0.33978649973869324, + "learning_rate": 3.944330425859416e-06, + "loss": 0.0105, + "step": 12300 + }, + { + "epoch": 0.07895251625068546, + "grad_norm": 0.8149883151054382, + "learning_rate": 3.947537198563366e-06, + "loss": 0.0177, + "step": 12310 + }, + { + "epoch": 0.07901665314447157, + "grad_norm": 0.5741153955459595, + "learning_rate": 3.9507439712673165e-06, + "loss": 0.0118, + "step": 12320 + }, + { + "epoch": 0.07908079003825766, + "grad_norm": 0.8584007620811462, + "learning_rate": 3.953950743971268e-06, + "loss": 0.0122, + "step": 12330 + }, + { + "epoch": 0.07914492693204375, + "grad_norm": 0.5068797469139099, + "learning_rate": 3.957157516675219e-06, + "loss": 0.0124, + "step": 12340 + }, + { + "epoch": 0.07920906382582985, + "grad_norm": 0.36949658393859863, + "learning_rate": 3.960364289379169e-06, + "loss": 0.011, + "step": 12350 + }, + { + "epoch": 0.07927320071961595, + "grad_norm": 0.41789624094963074, + "learning_rate": 3.96357106208312e-06, + "loss": 0.0119, + "step": 12360 + }, + { + "epoch": 0.07933733761340205, + "grad_norm": 0.1528804749250412, + "learning_rate": 3.96677783478707e-06, + "loss": 0.0132, + "step": 12370 + }, + { + "epoch": 0.07940147450718814, + "grad_norm": 0.3877411186695099, + "learning_rate": 3.9699846074910215e-06, + "loss": 0.0154, + "step": 12380 + }, + { + "epoch": 0.07946561140097425, + "grad_norm": 0.5584845542907715, + "learning_rate": 3.973191380194973e-06, + "loss": 0.0127, + "step": 12390 + }, + { + "epoch": 0.07952974829476034, + "grad_norm": 0.28894999623298645, + "learning_rate": 3.976398152898923e-06, + "loss": 0.0127, + "step": 12400 + }, + { + "epoch": 0.07959388518854643, + "grad_norm": 0.7544544339179993, + "learning_rate": 3.979604925602873e-06, + "loss": 0.0134, + "step": 12410 + }, + { + "epoch": 0.07965802208233252, + "grad_norm": 0.5420180559158325, + "learning_rate": 3.982811698306824e-06, + "loss": 0.0142, + "step": 12420 + }, + { + "epoch": 0.07972215897611863, + "grad_norm": 0.4947992265224457, + "learning_rate": 3.986018471010775e-06, + "loss": 0.0099, + "step": 12430 + }, + { + "epoch": 0.07978629586990472, + "grad_norm": 0.8791900873184204, + "learning_rate": 3.9892252437147265e-06, + "loss": 0.0146, + "step": 12440 + }, + { + "epoch": 0.07985043276369082, + "grad_norm": 0.5872790217399597, + "learning_rate": 3.992432016418677e-06, + "loss": 0.0094, + "step": 12450 + }, + { + "epoch": 0.07991456965747692, + "grad_norm": 1.1887428760528564, + "learning_rate": 3.995638789122627e-06, + "loss": 0.0113, + "step": 12460 + }, + { + "epoch": 0.07997870655126302, + "grad_norm": 0.6307410001754761, + "learning_rate": 3.998845561826578e-06, + "loss": 0.009, + "step": 12470 + }, + { + "epoch": 0.08004284344504911, + "grad_norm": 0.4119446277618408, + "learning_rate": 4.002052334530529e-06, + "loss": 0.0143, + "step": 12480 + }, + { + "epoch": 0.0801069803388352, + "grad_norm": 0.6567076444625854, + "learning_rate": 4.0052591072344795e-06, + "loss": 0.013, + "step": 12490 + }, + { + "epoch": 0.08017111723262131, + "grad_norm": 0.7034739851951599, + "learning_rate": 4.008465879938431e-06, + "loss": 0.01, + "step": 12500 + }, + { + "epoch": 0.0802352541264074, + "grad_norm": 0.5242000222206116, + "learning_rate": 4.011672652642381e-06, + "loss": 0.013, + "step": 12510 + }, + { + "epoch": 0.0802993910201935, + "grad_norm": 0.644496500492096, + "learning_rate": 4.014879425346332e-06, + "loss": 0.0132, + "step": 12520 + }, + { + "epoch": 0.0803635279139796, + "grad_norm": 0.5115908980369568, + "learning_rate": 4.018086198050283e-06, + "loss": 0.0111, + "step": 12530 + }, + { + "epoch": 0.0804276648077657, + "grad_norm": 0.1372889280319214, + "learning_rate": 4.021292970754233e-06, + "loss": 0.0139, + "step": 12540 + }, + { + "epoch": 0.08049180170155179, + "grad_norm": 0.8904168009757996, + "learning_rate": 4.024499743458184e-06, + "loss": 0.0118, + "step": 12550 + }, + { + "epoch": 0.08055593859533788, + "grad_norm": 0.7952111959457397, + "learning_rate": 4.027706516162135e-06, + "loss": 0.0114, + "step": 12560 + }, + { + "epoch": 0.08062007548912399, + "grad_norm": 0.4509839713573456, + "learning_rate": 4.030913288866086e-06, + "loss": 0.0096, + "step": 12570 + }, + { + "epoch": 0.08068421238291008, + "grad_norm": 0.4335967004299164, + "learning_rate": 4.034120061570036e-06, + "loss": 0.0089, + "step": 12580 + }, + { + "epoch": 0.08074834927669618, + "grad_norm": 0.6105585694313049, + "learning_rate": 4.037326834273987e-06, + "loss": 0.0143, + "step": 12590 + }, + { + "epoch": 0.08081248617048228, + "grad_norm": 0.5058387517929077, + "learning_rate": 4.0405336069779375e-06, + "loss": 0.011, + "step": 12600 + }, + { + "epoch": 0.08087662306426838, + "grad_norm": 0.7142662405967712, + "learning_rate": 4.043740379681889e-06, + "loss": 0.0159, + "step": 12610 + }, + { + "epoch": 0.08094075995805447, + "grad_norm": 0.6708266735076904, + "learning_rate": 4.04694715238584e-06, + "loss": 0.0107, + "step": 12620 + }, + { + "epoch": 0.08100489685184056, + "grad_norm": 0.49649274349212646, + "learning_rate": 4.05015392508979e-06, + "loss": 0.0099, + "step": 12630 + }, + { + "epoch": 0.08106903374562667, + "grad_norm": 0.737576425075531, + "learning_rate": 4.05336069779374e-06, + "loss": 0.0097, + "step": 12640 + }, + { + "epoch": 0.08113317063941276, + "grad_norm": 0.6056515574455261, + "learning_rate": 4.056567470497691e-06, + "loss": 0.0156, + "step": 12650 + }, + { + "epoch": 0.08119730753319886, + "grad_norm": 0.6571008563041687, + "learning_rate": 4.0597742432016425e-06, + "loss": 0.0085, + "step": 12660 + }, + { + "epoch": 0.08126144442698496, + "grad_norm": 0.7000786662101746, + "learning_rate": 4.062981015905593e-06, + "loss": 0.0163, + "step": 12670 + }, + { + "epoch": 0.08132558132077106, + "grad_norm": 0.9011369943618774, + "learning_rate": 4.066187788609544e-06, + "loss": 0.0098, + "step": 12680 + }, + { + "epoch": 0.08138971821455715, + "grad_norm": 0.9465958476066589, + "learning_rate": 4.069394561313494e-06, + "loss": 0.0093, + "step": 12690 + }, + { + "epoch": 0.08145385510834324, + "grad_norm": 0.7725071310997009, + "learning_rate": 4.072601334017445e-06, + "loss": 0.0149, + "step": 12700 + }, + { + "epoch": 0.08151799200212935, + "grad_norm": 0.2763436734676361, + "learning_rate": 4.075808106721396e-06, + "loss": 0.0111, + "step": 12710 + }, + { + "epoch": 0.08158212889591544, + "grad_norm": 0.7053089141845703, + "learning_rate": 4.079014879425347e-06, + "loss": 0.0087, + "step": 12720 + }, + { + "epoch": 0.08164626578970154, + "grad_norm": 0.6975532174110413, + "learning_rate": 4.082221652129298e-06, + "loss": 0.0115, + "step": 12730 + }, + { + "epoch": 0.08171040268348764, + "grad_norm": 0.3494133949279785, + "learning_rate": 4.085428424833248e-06, + "loss": 0.01, + "step": 12740 + }, + { + "epoch": 0.08177453957727374, + "grad_norm": 0.6423804759979248, + "learning_rate": 4.088635197537199e-06, + "loss": 0.0122, + "step": 12750 + }, + { + "epoch": 0.08183867647105983, + "grad_norm": 0.5009698271751404, + "learning_rate": 4.091841970241149e-06, + "loss": 0.0092, + "step": 12760 + }, + { + "epoch": 0.08190281336484592, + "grad_norm": 0.36906102299690247, + "learning_rate": 4.0950487429451005e-06, + "loss": 0.0079, + "step": 12770 + }, + { + "epoch": 0.08196695025863203, + "grad_norm": 0.6030924320220947, + "learning_rate": 4.098255515649051e-06, + "loss": 0.014, + "step": 12780 + }, + { + "epoch": 0.08203108715241812, + "grad_norm": 0.6758723855018616, + "learning_rate": 4.101462288353002e-06, + "loss": 0.0129, + "step": 12790 + }, + { + "epoch": 0.08209522404620422, + "grad_norm": 0.4944661855697632, + "learning_rate": 4.104669061056953e-06, + "loss": 0.0101, + "step": 12800 + }, + { + "epoch": 0.08215936093999031, + "grad_norm": 0.4592941403388977, + "learning_rate": 4.107875833760903e-06, + "loss": 0.0112, + "step": 12810 + }, + { + "epoch": 0.08222349783377642, + "grad_norm": 0.9778560996055603, + "learning_rate": 4.111082606464854e-06, + "loss": 0.0123, + "step": 12820 + }, + { + "epoch": 0.08228763472756251, + "grad_norm": 0.9011563658714294, + "learning_rate": 4.114289379168805e-06, + "loss": 0.0093, + "step": 12830 + }, + { + "epoch": 0.0823517716213486, + "grad_norm": 0.7265444397926331, + "learning_rate": 4.117496151872756e-06, + "loss": 0.0122, + "step": 12840 + }, + { + "epoch": 0.08241590851513471, + "grad_norm": 0.29877620935440063, + "learning_rate": 4.120702924576706e-06, + "loss": 0.0109, + "step": 12850 + }, + { + "epoch": 0.0824800454089208, + "grad_norm": 0.8235241770744324, + "learning_rate": 4.123909697280657e-06, + "loss": 0.0106, + "step": 12860 + }, + { + "epoch": 0.0825441823027069, + "grad_norm": 0.7094138860702515, + "learning_rate": 4.127116469984608e-06, + "loss": 0.0087, + "step": 12870 + }, + { + "epoch": 0.08260831919649299, + "grad_norm": 0.6995874047279358, + "learning_rate": 4.1303232426885585e-06, + "loss": 0.0107, + "step": 12880 + }, + { + "epoch": 0.0826724560902791, + "grad_norm": 0.9243060350418091, + "learning_rate": 4.13353001539251e-06, + "loss": 0.0139, + "step": 12890 + }, + { + "epoch": 0.08273659298406519, + "grad_norm": 0.4975259006023407, + "learning_rate": 4.13673678809646e-06, + "loss": 0.0127, + "step": 12900 + }, + { + "epoch": 0.08280072987785128, + "grad_norm": 0.6469900012016296, + "learning_rate": 4.139943560800411e-06, + "loss": 0.0167, + "step": 12910 + }, + { + "epoch": 0.08286486677163739, + "grad_norm": 0.7069517970085144, + "learning_rate": 4.143150333504361e-06, + "loss": 0.0114, + "step": 12920 + }, + { + "epoch": 0.08292900366542348, + "grad_norm": 0.690664529800415, + "learning_rate": 4.146357106208312e-06, + "loss": 0.0157, + "step": 12930 + }, + { + "epoch": 0.08299314055920957, + "grad_norm": 0.48529475927352905, + "learning_rate": 4.1495638789122635e-06, + "loss": 0.015, + "step": 12940 + }, + { + "epoch": 0.08305727745299567, + "grad_norm": 0.3557664155960083, + "learning_rate": 4.152770651616214e-06, + "loss": 0.0097, + "step": 12950 + }, + { + "epoch": 0.08312141434678177, + "grad_norm": 0.7546350955963135, + "learning_rate": 4.155977424320165e-06, + "loss": 0.0131, + "step": 12960 + }, + { + "epoch": 0.08318555124056787, + "grad_norm": 0.6388615965843201, + "learning_rate": 4.159184197024115e-06, + "loss": 0.0113, + "step": 12970 + }, + { + "epoch": 0.08324968813435396, + "grad_norm": 0.41935300827026367, + "learning_rate": 4.162390969728066e-06, + "loss": 0.0105, + "step": 12980 + }, + { + "epoch": 0.08331382502814007, + "grad_norm": 0.8952448964118958, + "learning_rate": 4.1655977424320165e-06, + "loss": 0.012, + "step": 12990 + }, + { + "epoch": 0.08337796192192616, + "grad_norm": 1.4644192457199097, + "learning_rate": 4.168804515135968e-06, + "loss": 0.0117, + "step": 13000 + }, + { + "epoch": 0.08344209881571225, + "grad_norm": 0.7456547021865845, + "learning_rate": 4.172011287839918e-06, + "loss": 0.0084, + "step": 13010 + }, + { + "epoch": 0.08350623570949835, + "grad_norm": 0.5758523344993591, + "learning_rate": 4.175218060543869e-06, + "loss": 0.0144, + "step": 13020 + }, + { + "epoch": 0.08357037260328445, + "grad_norm": 0.4921818971633911, + "learning_rate": 4.17842483324782e-06, + "loss": 0.0104, + "step": 13030 + }, + { + "epoch": 0.08363450949707055, + "grad_norm": 0.3467360734939575, + "learning_rate": 4.18163160595177e-06, + "loss": 0.012, + "step": 13040 + }, + { + "epoch": 0.08369864639085664, + "grad_norm": 0.9223970174789429, + "learning_rate": 4.1848383786557215e-06, + "loss": 0.0113, + "step": 13050 + }, + { + "epoch": 0.08376278328464275, + "grad_norm": 0.6632959246635437, + "learning_rate": 4.188045151359672e-06, + "loss": 0.0158, + "step": 13060 + }, + { + "epoch": 0.08382692017842884, + "grad_norm": 0.754456102848053, + "learning_rate": 4.191251924063623e-06, + "loss": 0.0139, + "step": 13070 + }, + { + "epoch": 0.08389105707221493, + "grad_norm": 0.7275996804237366, + "learning_rate": 4.194458696767573e-06, + "loss": 0.0101, + "step": 13080 + }, + { + "epoch": 0.08395519396600103, + "grad_norm": 0.7685105204582214, + "learning_rate": 4.197665469471524e-06, + "loss": 0.0131, + "step": 13090 + }, + { + "epoch": 0.08401933085978713, + "grad_norm": 0.4401727616786957, + "learning_rate": 4.200872242175475e-06, + "loss": 0.0107, + "step": 13100 + }, + { + "epoch": 0.08408346775357323, + "grad_norm": 0.9203099012374878, + "learning_rate": 4.204079014879426e-06, + "loss": 0.0132, + "step": 13110 + }, + { + "epoch": 0.08414760464735932, + "grad_norm": 0.4970671236515045, + "learning_rate": 4.207285787583377e-06, + "loss": 0.0128, + "step": 13120 + }, + { + "epoch": 0.08421174154114543, + "grad_norm": 0.4000563621520996, + "learning_rate": 4.210492560287327e-06, + "loss": 0.0114, + "step": 13130 + }, + { + "epoch": 0.08427587843493152, + "grad_norm": 0.5417554378509521, + "learning_rate": 4.213699332991278e-06, + "loss": 0.0117, + "step": 13140 + }, + { + "epoch": 0.08434001532871761, + "grad_norm": 0.47524988651275635, + "learning_rate": 4.216906105695228e-06, + "loss": 0.0079, + "step": 13150 + }, + { + "epoch": 0.0844041522225037, + "grad_norm": 0.502637505531311, + "learning_rate": 4.2201128783991795e-06, + "loss": 0.0124, + "step": 13160 + }, + { + "epoch": 0.08446828911628981, + "grad_norm": 0.45672890543937683, + "learning_rate": 4.22331965110313e-06, + "loss": 0.0117, + "step": 13170 + }, + { + "epoch": 0.0845324260100759, + "grad_norm": 0.5679007172584534, + "learning_rate": 4.226526423807081e-06, + "loss": 0.0093, + "step": 13180 + }, + { + "epoch": 0.084596562903862, + "grad_norm": 0.5174263715744019, + "learning_rate": 4.229733196511032e-06, + "loss": 0.0087, + "step": 13190 + }, + { + "epoch": 0.0846606997976481, + "grad_norm": 0.6781324148178101, + "learning_rate": 4.232939969214982e-06, + "loss": 0.0082, + "step": 13200 + }, + { + "epoch": 0.0847248366914342, + "grad_norm": 0.8433681130409241, + "learning_rate": 4.236146741918933e-06, + "loss": 0.0116, + "step": 13210 + }, + { + "epoch": 0.08478897358522029, + "grad_norm": 0.4254860579967499, + "learning_rate": 4.239353514622884e-06, + "loss": 0.0111, + "step": 13220 + }, + { + "epoch": 0.08485311047900639, + "grad_norm": 0.4683290719985962, + "learning_rate": 4.242560287326835e-06, + "loss": 0.008, + "step": 13230 + }, + { + "epoch": 0.08491724737279249, + "grad_norm": 0.9889957308769226, + "learning_rate": 4.245767060030785e-06, + "loss": 0.0127, + "step": 13240 + }, + { + "epoch": 0.08498138426657859, + "grad_norm": 0.9414776563644409, + "learning_rate": 4.248973832734736e-06, + "loss": 0.0111, + "step": 13250 + }, + { + "epoch": 0.08504552116036468, + "grad_norm": 0.6853829622268677, + "learning_rate": 4.252180605438686e-06, + "loss": 0.0146, + "step": 13260 + }, + { + "epoch": 0.08510965805415079, + "grad_norm": 0.8550167679786682, + "learning_rate": 4.2553873781426375e-06, + "loss": 0.0153, + "step": 13270 + }, + { + "epoch": 0.08517379494793688, + "grad_norm": 0.49315908551216125, + "learning_rate": 4.258594150846589e-06, + "loss": 0.0079, + "step": 13280 + }, + { + "epoch": 0.08523793184172297, + "grad_norm": 0.842653751373291, + "learning_rate": 4.261800923550539e-06, + "loss": 0.0131, + "step": 13290 + }, + { + "epoch": 0.08530206873550907, + "grad_norm": 0.841410219669342, + "learning_rate": 4.26500769625449e-06, + "loss": 0.0104, + "step": 13300 + }, + { + "epoch": 0.08536620562929517, + "grad_norm": 0.7665841579437256, + "learning_rate": 4.26821446895844e-06, + "loss": 0.0107, + "step": 13310 + }, + { + "epoch": 0.08543034252308127, + "grad_norm": 0.44842883944511414, + "learning_rate": 4.271421241662391e-06, + "loss": 0.0143, + "step": 13320 + }, + { + "epoch": 0.08549447941686736, + "grad_norm": 0.47715622186660767, + "learning_rate": 4.2746280143663425e-06, + "loss": 0.0125, + "step": 13330 + }, + { + "epoch": 0.08555861631065347, + "grad_norm": 0.6261366009712219, + "learning_rate": 4.277834787070293e-06, + "loss": 0.008, + "step": 13340 + }, + { + "epoch": 0.08562275320443956, + "grad_norm": 0.6199816465377808, + "learning_rate": 4.281041559774243e-06, + "loss": 0.0099, + "step": 13350 + }, + { + "epoch": 0.08568689009822565, + "grad_norm": 0.6077964901924133, + "learning_rate": 4.284248332478194e-06, + "loss": 0.0094, + "step": 13360 + }, + { + "epoch": 0.08575102699201174, + "grad_norm": 0.5811752676963806, + "learning_rate": 4.287455105182145e-06, + "loss": 0.0094, + "step": 13370 + }, + { + "epoch": 0.08581516388579785, + "grad_norm": 0.5186547636985779, + "learning_rate": 4.2906618778860955e-06, + "loss": 0.0089, + "step": 13380 + }, + { + "epoch": 0.08587930077958394, + "grad_norm": 0.6199879050254822, + "learning_rate": 4.293868650590047e-06, + "loss": 0.0131, + "step": 13390 + }, + { + "epoch": 0.08594343767337004, + "grad_norm": 0.5385717153549194, + "learning_rate": 4.297075423293997e-06, + "loss": 0.0155, + "step": 13400 + }, + { + "epoch": 0.08600757456715614, + "grad_norm": 0.5268189907073975, + "learning_rate": 4.300282195997948e-06, + "loss": 0.0094, + "step": 13410 + }, + { + "epoch": 0.08607171146094224, + "grad_norm": 1.222302794456482, + "learning_rate": 4.303488968701899e-06, + "loss": 0.0145, + "step": 13420 + }, + { + "epoch": 0.08613584835472833, + "grad_norm": 0.36908653378486633, + "learning_rate": 4.306695741405849e-06, + "loss": 0.0103, + "step": 13430 + }, + { + "epoch": 0.08619998524851442, + "grad_norm": 1.2218557596206665, + "learning_rate": 4.3099025141098e-06, + "loss": 0.0094, + "step": 13440 + }, + { + "epoch": 0.08626412214230053, + "grad_norm": 1.115786075592041, + "learning_rate": 4.313109286813751e-06, + "loss": 0.0106, + "step": 13450 + }, + { + "epoch": 0.08632825903608662, + "grad_norm": 0.48821571469306946, + "learning_rate": 4.316316059517702e-06, + "loss": 0.0102, + "step": 13460 + }, + { + "epoch": 0.08639239592987272, + "grad_norm": 0.3381451964378357, + "learning_rate": 4.319522832221653e-06, + "loss": 0.0078, + "step": 13470 + }, + { + "epoch": 0.08645653282365882, + "grad_norm": 0.682102382183075, + "learning_rate": 4.322729604925603e-06, + "loss": 0.01, + "step": 13480 + }, + { + "epoch": 0.08652066971744492, + "grad_norm": 0.3813456892967224, + "learning_rate": 4.3259363776295535e-06, + "loss": 0.0113, + "step": 13490 + }, + { + "epoch": 0.08658480661123101, + "grad_norm": 0.4198649823665619, + "learning_rate": 4.329143150333505e-06, + "loss": 0.0084, + "step": 13500 + }, + { + "epoch": 0.0866489435050171, + "grad_norm": 0.5584994554519653, + "learning_rate": 4.332349923037456e-06, + "loss": 0.0132, + "step": 13510 + }, + { + "epoch": 0.08671308039880321, + "grad_norm": 0.490875780582428, + "learning_rate": 4.335556695741406e-06, + "loss": 0.0111, + "step": 13520 + }, + { + "epoch": 0.0867772172925893, + "grad_norm": 0.3352775275707245, + "learning_rate": 4.338763468445356e-06, + "loss": 0.011, + "step": 13530 + }, + { + "epoch": 0.0868413541863754, + "grad_norm": 0.7659197449684143, + "learning_rate": 4.341970241149307e-06, + "loss": 0.0131, + "step": 13540 + }, + { + "epoch": 0.08690549108016149, + "grad_norm": 0.5991475582122803, + "learning_rate": 4.3451770138532585e-06, + "loss": 0.0102, + "step": 13550 + }, + { + "epoch": 0.0869696279739476, + "grad_norm": 0.6653827428817749, + "learning_rate": 4.34838378655721e-06, + "loss": 0.009, + "step": 13560 + }, + { + "epoch": 0.08703376486773369, + "grad_norm": 0.39620816707611084, + "learning_rate": 4.35159055926116e-06, + "loss": 0.0076, + "step": 13570 + }, + { + "epoch": 0.08709790176151978, + "grad_norm": 1.3507369756698608, + "learning_rate": 4.35479733196511e-06, + "loss": 0.0089, + "step": 13580 + }, + { + "epoch": 0.08716203865530589, + "grad_norm": 0.7978771924972534, + "learning_rate": 4.358004104669061e-06, + "loss": 0.0097, + "step": 13590 + }, + { + "epoch": 0.08722617554909198, + "grad_norm": 0.5015578269958496, + "learning_rate": 4.361210877373012e-06, + "loss": 0.0126, + "step": 13600 + }, + { + "epoch": 0.08729031244287808, + "grad_norm": 0.8807877898216248, + "learning_rate": 4.364417650076963e-06, + "loss": 0.0109, + "step": 13610 + }, + { + "epoch": 0.08735444933666417, + "grad_norm": 0.7356600761413574, + "learning_rate": 4.367624422780914e-06, + "loss": 0.0103, + "step": 13620 + }, + { + "epoch": 0.08741858623045028, + "grad_norm": 0.839003324508667, + "learning_rate": 4.370831195484864e-06, + "loss": 0.012, + "step": 13630 + }, + { + "epoch": 0.08748272312423637, + "grad_norm": 0.6434532403945923, + "learning_rate": 4.374037968188815e-06, + "loss": 0.0135, + "step": 13640 + }, + { + "epoch": 0.08754686001802246, + "grad_norm": 0.8763306140899658, + "learning_rate": 4.377244740892766e-06, + "loss": 0.0102, + "step": 13650 + }, + { + "epoch": 0.08761099691180857, + "grad_norm": 0.4847932457923889, + "learning_rate": 4.3804515135967165e-06, + "loss": 0.0129, + "step": 13660 + }, + { + "epoch": 0.08767513380559466, + "grad_norm": 0.3382943868637085, + "learning_rate": 4.383658286300667e-06, + "loss": 0.0087, + "step": 13670 + }, + { + "epoch": 0.08773927069938076, + "grad_norm": 0.5592327117919922, + "learning_rate": 4.386865059004618e-06, + "loss": 0.008, + "step": 13680 + }, + { + "epoch": 0.08780340759316685, + "grad_norm": 1.143094539642334, + "learning_rate": 4.390071831708569e-06, + "loss": 0.0128, + "step": 13690 + }, + { + "epoch": 0.08786754448695296, + "grad_norm": 0.4922727644443512, + "learning_rate": 4.39327860441252e-06, + "loss": 0.0093, + "step": 13700 + }, + { + "epoch": 0.08793168138073905, + "grad_norm": 0.504297137260437, + "learning_rate": 4.39648537711647e-06, + "loss": 0.0118, + "step": 13710 + }, + { + "epoch": 0.08799581827452514, + "grad_norm": 0.5197631120681763, + "learning_rate": 4.399692149820421e-06, + "loss": 0.0098, + "step": 13720 + }, + { + "epoch": 0.08805995516831125, + "grad_norm": 0.8234639167785645, + "learning_rate": 4.402898922524372e-06, + "loss": 0.0112, + "step": 13730 + }, + { + "epoch": 0.08812409206209734, + "grad_norm": 0.1754753142595291, + "learning_rate": 4.406105695228323e-06, + "loss": 0.0056, + "step": 13740 + }, + { + "epoch": 0.08818822895588344, + "grad_norm": 0.4347337484359741, + "learning_rate": 4.409312467932273e-06, + "loss": 0.0097, + "step": 13750 + }, + { + "epoch": 0.08825236584966953, + "grad_norm": 0.875468909740448, + "learning_rate": 4.412519240636224e-06, + "loss": 0.0118, + "step": 13760 + }, + { + "epoch": 0.08831650274345564, + "grad_norm": 0.7216804623603821, + "learning_rate": 4.4157260133401745e-06, + "loss": 0.0107, + "step": 13770 + }, + { + "epoch": 0.08838063963724173, + "grad_norm": 0.7757262587547302, + "learning_rate": 4.418932786044126e-06, + "loss": 0.0146, + "step": 13780 + }, + { + "epoch": 0.08844477653102782, + "grad_norm": 0.23120538890361786, + "learning_rate": 4.422139558748077e-06, + "loss": 0.0069, + "step": 13790 + }, + { + "epoch": 0.08850891342481393, + "grad_norm": 0.4751850664615631, + "learning_rate": 4.425346331452027e-06, + "loss": 0.0133, + "step": 13800 + }, + { + "epoch": 0.08857305031860002, + "grad_norm": 0.7578360438346863, + "learning_rate": 4.428553104155977e-06, + "loss": 0.0221, + "step": 13810 + }, + { + "epoch": 0.08863718721238611, + "grad_norm": 0.5103495121002197, + "learning_rate": 4.431759876859928e-06, + "loss": 0.0081, + "step": 13820 + }, + { + "epoch": 0.08870132410617221, + "grad_norm": 0.7559943795204163, + "learning_rate": 4.4349666495638795e-06, + "loss": 0.0161, + "step": 13830 + }, + { + "epoch": 0.08876546099995831, + "grad_norm": 0.6206265687942505, + "learning_rate": 4.438173422267831e-06, + "loss": 0.0163, + "step": 13840 + }, + { + "epoch": 0.08882959789374441, + "grad_norm": 0.5653325915336609, + "learning_rate": 4.441380194971781e-06, + "loss": 0.0116, + "step": 13850 + }, + { + "epoch": 0.0888937347875305, + "grad_norm": 0.28707340359687805, + "learning_rate": 4.444586967675731e-06, + "loss": 0.0078, + "step": 13860 + }, + { + "epoch": 0.08895787168131661, + "grad_norm": 0.45919936895370483, + "learning_rate": 4.447793740379682e-06, + "loss": 0.0127, + "step": 13870 + }, + { + "epoch": 0.0890220085751027, + "grad_norm": 0.4506450593471527, + "learning_rate": 4.451000513083633e-06, + "loss": 0.0092, + "step": 13880 + }, + { + "epoch": 0.0890861454688888, + "grad_norm": 0.8949733376502991, + "learning_rate": 4.454207285787584e-06, + "loss": 0.0094, + "step": 13890 + }, + { + "epoch": 0.08915028236267489, + "grad_norm": 0.4191751480102539, + "learning_rate": 4.457414058491534e-06, + "loss": 0.0109, + "step": 13900 + }, + { + "epoch": 0.089214419256461, + "grad_norm": 0.8228646516799927, + "learning_rate": 4.460620831195485e-06, + "loss": 0.0099, + "step": 13910 + }, + { + "epoch": 0.08927855615024709, + "grad_norm": 0.8701620697975159, + "learning_rate": 4.463827603899436e-06, + "loss": 0.0129, + "step": 13920 + }, + { + "epoch": 0.08934269304403318, + "grad_norm": 0.6736899614334106, + "learning_rate": 4.467034376603387e-06, + "loss": 0.0072, + "step": 13930 + }, + { + "epoch": 0.08940682993781929, + "grad_norm": 0.4631296396255493, + "learning_rate": 4.4702411493073375e-06, + "loss": 0.0123, + "step": 13940 + }, + { + "epoch": 0.08947096683160538, + "grad_norm": 1.0359212160110474, + "learning_rate": 4.473447922011288e-06, + "loss": 0.0125, + "step": 13950 + }, + { + "epoch": 0.08953510372539147, + "grad_norm": 0.6013842821121216, + "learning_rate": 4.476654694715239e-06, + "loss": 0.012, + "step": 13960 + }, + { + "epoch": 0.08959924061917757, + "grad_norm": 0.4382389485836029, + "learning_rate": 4.47986146741919e-06, + "loss": 0.0132, + "step": 13970 + }, + { + "epoch": 0.08966337751296367, + "grad_norm": 0.5488568544387817, + "learning_rate": 4.48306824012314e-06, + "loss": 0.0105, + "step": 13980 + }, + { + "epoch": 0.08972751440674977, + "grad_norm": 0.7292414903640747, + "learning_rate": 4.486275012827091e-06, + "loss": 0.0119, + "step": 13990 + }, + { + "epoch": 0.08979165130053586, + "grad_norm": 0.930444598197937, + "learning_rate": 4.489481785531042e-06, + "loss": 0.0086, + "step": 14000 + }, + { + "epoch": 0.08985578819432197, + "grad_norm": 0.6052126884460449, + "learning_rate": 4.492688558234993e-06, + "loss": 0.0104, + "step": 14010 + }, + { + "epoch": 0.08991992508810806, + "grad_norm": 0.46579042077064514, + "learning_rate": 4.495895330938944e-06, + "loss": 0.0082, + "step": 14020 + }, + { + "epoch": 0.08998406198189415, + "grad_norm": 0.8902080655097961, + "learning_rate": 4.499102103642894e-06, + "loss": 0.0098, + "step": 14030 + }, + { + "epoch": 0.09004819887568025, + "grad_norm": 0.9307649731636047, + "learning_rate": 4.502308876346844e-06, + "loss": 0.0131, + "step": 14040 + }, + { + "epoch": 0.09011233576946635, + "grad_norm": 0.33789774775505066, + "learning_rate": 4.5055156490507955e-06, + "loss": 0.0101, + "step": 14050 + }, + { + "epoch": 0.09017647266325245, + "grad_norm": 0.8060634136199951, + "learning_rate": 4.508722421754747e-06, + "loss": 0.0096, + "step": 14060 + }, + { + "epoch": 0.09024060955703854, + "grad_norm": 0.5572049021720886, + "learning_rate": 4.511929194458698e-06, + "loss": 0.0084, + "step": 14070 + }, + { + "epoch": 0.09030474645082465, + "grad_norm": 0.6087406277656555, + "learning_rate": 4.515135967162648e-06, + "loss": 0.0089, + "step": 14080 + }, + { + "epoch": 0.09036888334461074, + "grad_norm": 0.681932270526886, + "learning_rate": 4.518342739866598e-06, + "loss": 0.0132, + "step": 14090 + }, + { + "epoch": 0.09043302023839683, + "grad_norm": 0.38711613416671753, + "learning_rate": 4.521549512570549e-06, + "loss": 0.0086, + "step": 14100 + }, + { + "epoch": 0.09049715713218293, + "grad_norm": 0.24457596242427826, + "learning_rate": 4.5247562852745005e-06, + "loss": 0.0125, + "step": 14110 + }, + { + "epoch": 0.09056129402596903, + "grad_norm": 0.3963066339492798, + "learning_rate": 4.527963057978451e-06, + "loss": 0.0091, + "step": 14120 + }, + { + "epoch": 0.09062543091975513, + "grad_norm": 0.30504074692726135, + "learning_rate": 4.531169830682401e-06, + "loss": 0.0109, + "step": 14130 + }, + { + "epoch": 0.09068956781354122, + "grad_norm": 0.5058887004852295, + "learning_rate": 4.534376603386352e-06, + "loss": 0.0176, + "step": 14140 + }, + { + "epoch": 0.09075370470732733, + "grad_norm": 0.29121342301368713, + "learning_rate": 4.537583376090303e-06, + "loss": 0.0088, + "step": 14150 + }, + { + "epoch": 0.09081784160111342, + "grad_norm": 0.9375796318054199, + "learning_rate": 4.540790148794254e-06, + "loss": 0.0138, + "step": 14160 + }, + { + "epoch": 0.09088197849489951, + "grad_norm": 1.0603824853897095, + "learning_rate": 4.543996921498205e-06, + "loss": 0.0111, + "step": 14170 + }, + { + "epoch": 0.0909461153886856, + "grad_norm": 0.8005550503730774, + "learning_rate": 4.547203694202155e-06, + "loss": 0.014, + "step": 14180 + }, + { + "epoch": 0.09101025228247171, + "grad_norm": 0.4290587604045868, + "learning_rate": 4.550410466906106e-06, + "loss": 0.0159, + "step": 14190 + }, + { + "epoch": 0.0910743891762578, + "grad_norm": 0.436431884765625, + "learning_rate": 4.553617239610057e-06, + "loss": 0.0112, + "step": 14200 + }, + { + "epoch": 0.0911385260700439, + "grad_norm": 0.5074294209480286, + "learning_rate": 4.556824012314007e-06, + "loss": 0.0081, + "step": 14210 + }, + { + "epoch": 0.09120266296382999, + "grad_norm": 0.5892575979232788, + "learning_rate": 4.5600307850179585e-06, + "loss": 0.0094, + "step": 14220 + }, + { + "epoch": 0.0912667998576161, + "grad_norm": 0.7842941284179688, + "learning_rate": 4.563237557721909e-06, + "loss": 0.0091, + "step": 14230 + }, + { + "epoch": 0.09133093675140219, + "grad_norm": 0.582675039768219, + "learning_rate": 4.56644433042586e-06, + "loss": 0.0164, + "step": 14240 + }, + { + "epoch": 0.09139507364518829, + "grad_norm": 0.7669952511787415, + "learning_rate": 4.569651103129811e-06, + "loss": 0.0146, + "step": 14250 + }, + { + "epoch": 0.09145921053897439, + "grad_norm": 0.6250030398368835, + "learning_rate": 4.572857875833761e-06, + "loss": 0.0111, + "step": 14260 + }, + { + "epoch": 0.09152334743276049, + "grad_norm": 0.7588163018226624, + "learning_rate": 4.5760646485377115e-06, + "loss": 0.0114, + "step": 14270 + }, + { + "epoch": 0.09158748432654658, + "grad_norm": 0.6219152808189392, + "learning_rate": 4.579271421241663e-06, + "loss": 0.0107, + "step": 14280 + }, + { + "epoch": 0.09165162122033267, + "grad_norm": 0.585282027721405, + "learning_rate": 4.582478193945614e-06, + "loss": 0.0141, + "step": 14290 + }, + { + "epoch": 0.09171575811411878, + "grad_norm": 0.5579394698143005, + "learning_rate": 4.585684966649565e-06, + "loss": 0.0095, + "step": 14300 + }, + { + "epoch": 0.09177989500790487, + "grad_norm": 0.5351959466934204, + "learning_rate": 4.588891739353515e-06, + "loss": 0.0121, + "step": 14310 + }, + { + "epoch": 0.09184403190169096, + "grad_norm": 0.49338340759277344, + "learning_rate": 4.592098512057465e-06, + "loss": 0.0131, + "step": 14320 + }, + { + "epoch": 0.09190816879547707, + "grad_norm": 0.5949311256408691, + "learning_rate": 4.5953052847614165e-06, + "loss": 0.0108, + "step": 14330 + }, + { + "epoch": 0.09197230568926316, + "grad_norm": 0.5237823128700256, + "learning_rate": 4.598512057465368e-06, + "loss": 0.0128, + "step": 14340 + }, + { + "epoch": 0.09203644258304926, + "grad_norm": 0.5870658755302429, + "learning_rate": 4.601718830169318e-06, + "loss": 0.0088, + "step": 14350 + }, + { + "epoch": 0.09210057947683535, + "grad_norm": 0.9670231938362122, + "learning_rate": 4.604925602873269e-06, + "loss": 0.0159, + "step": 14360 + }, + { + "epoch": 0.09216471637062146, + "grad_norm": 0.5894929766654968, + "learning_rate": 4.608132375577219e-06, + "loss": 0.0106, + "step": 14370 + }, + { + "epoch": 0.09222885326440755, + "grad_norm": 0.38806742429733276, + "learning_rate": 4.61133914828117e-06, + "loss": 0.0112, + "step": 14380 + }, + { + "epoch": 0.09229299015819364, + "grad_norm": 0.5953904986381531, + "learning_rate": 4.6145459209851215e-06, + "loss": 0.0101, + "step": 14390 + }, + { + "epoch": 0.09235712705197975, + "grad_norm": 0.5008606910705566, + "learning_rate": 4.617752693689072e-06, + "loss": 0.012, + "step": 14400 + }, + { + "epoch": 0.09242126394576584, + "grad_norm": 1.0356851816177368, + "learning_rate": 4.620959466393022e-06, + "loss": 0.0082, + "step": 14410 + }, + { + "epoch": 0.09248540083955194, + "grad_norm": 0.5471305251121521, + "learning_rate": 4.624166239096973e-06, + "loss": 0.0083, + "step": 14420 + }, + { + "epoch": 0.09254953773333803, + "grad_norm": 0.4250575304031372, + "learning_rate": 4.627373011800924e-06, + "loss": 0.018, + "step": 14430 + }, + { + "epoch": 0.09261367462712414, + "grad_norm": 0.44478684663772583, + "learning_rate": 4.6305797845048745e-06, + "loss": 0.0105, + "step": 14440 + }, + { + "epoch": 0.09267781152091023, + "grad_norm": 0.6541079878807068, + "learning_rate": 4.633786557208826e-06, + "loss": 0.009, + "step": 14450 + }, + { + "epoch": 0.09274194841469632, + "grad_norm": 0.3483266234397888, + "learning_rate": 4.636993329912776e-06, + "loss": 0.0097, + "step": 14460 + }, + { + "epoch": 0.09280608530848243, + "grad_norm": 0.34588584303855896, + "learning_rate": 4.640200102616727e-06, + "loss": 0.0065, + "step": 14470 + }, + { + "epoch": 0.09287022220226852, + "grad_norm": 0.26781347393989563, + "learning_rate": 4.643406875320678e-06, + "loss": 0.0093, + "step": 14480 + }, + { + "epoch": 0.09293435909605462, + "grad_norm": 0.6617163419723511, + "learning_rate": 4.646613648024628e-06, + "loss": 0.0112, + "step": 14490 + }, + { + "epoch": 0.09299849598984071, + "grad_norm": 0.6089116930961609, + "learning_rate": 4.649820420728579e-06, + "loss": 0.0171, + "step": 14500 + }, + { + "epoch": 0.09306263288362682, + "grad_norm": 0.2882973253726959, + "learning_rate": 4.65302719343253e-06, + "loss": 0.0104, + "step": 14510 + }, + { + "epoch": 0.09312676977741291, + "grad_norm": 0.4431282877922058, + "learning_rate": 4.656233966136481e-06, + "loss": 0.01, + "step": 14520 + }, + { + "epoch": 0.093190906671199, + "grad_norm": 0.6200217008590698, + "learning_rate": 4.659440738840431e-06, + "loss": 0.0099, + "step": 14530 + }, + { + "epoch": 0.09325504356498511, + "grad_norm": 0.8319776654243469, + "learning_rate": 4.662647511544382e-06, + "loss": 0.0122, + "step": 14540 + }, + { + "epoch": 0.0933191804587712, + "grad_norm": 0.5067435503005981, + "learning_rate": 4.6658542842483325e-06, + "loss": 0.013, + "step": 14550 + }, + { + "epoch": 0.0933833173525573, + "grad_norm": 0.5280515551567078, + "learning_rate": 4.669061056952284e-06, + "loss": 0.0099, + "step": 14560 + }, + { + "epoch": 0.09344745424634339, + "grad_norm": 0.6483435034751892, + "learning_rate": 4.672267829656235e-06, + "loss": 0.0134, + "step": 14570 + }, + { + "epoch": 0.0935115911401295, + "grad_norm": 0.42229729890823364, + "learning_rate": 4.675474602360185e-06, + "loss": 0.0096, + "step": 14580 + }, + { + "epoch": 0.09357572803391559, + "grad_norm": 0.7056224346160889, + "learning_rate": 4.678681375064136e-06, + "loss": 0.011, + "step": 14590 + }, + { + "epoch": 0.09363986492770168, + "grad_norm": 0.636827826499939, + "learning_rate": 4.681888147768086e-06, + "loss": 0.0087, + "step": 14600 + }, + { + "epoch": 0.09370400182148779, + "grad_norm": 0.660778820514679, + "learning_rate": 4.6850949204720375e-06, + "loss": 0.008, + "step": 14610 + }, + { + "epoch": 0.09376813871527388, + "grad_norm": 0.4916780889034271, + "learning_rate": 4.688301693175988e-06, + "loss": 0.0078, + "step": 14620 + }, + { + "epoch": 0.09383227560905998, + "grad_norm": 1.1069315671920776, + "learning_rate": 4.691508465879939e-06, + "loss": 0.0141, + "step": 14630 + }, + { + "epoch": 0.09389641250284607, + "grad_norm": 0.9133396744728088, + "learning_rate": 4.694715238583889e-06, + "loss": 0.0077, + "step": 14640 + }, + { + "epoch": 0.09396054939663218, + "grad_norm": 0.42861101031303406, + "learning_rate": 4.69792201128784e-06, + "loss": 0.0116, + "step": 14650 + }, + { + "epoch": 0.09402468629041827, + "grad_norm": 0.5793526768684387, + "learning_rate": 4.701128783991791e-06, + "loss": 0.0138, + "step": 14660 + }, + { + "epoch": 0.09408882318420436, + "grad_norm": 0.24592435359954834, + "learning_rate": 4.704335556695742e-06, + "loss": 0.0145, + "step": 14670 + }, + { + "epoch": 0.09415296007799047, + "grad_norm": 0.4596893787384033, + "learning_rate": 4.707542329399693e-06, + "loss": 0.0143, + "step": 14680 + }, + { + "epoch": 0.09421709697177656, + "grad_norm": 0.25949400663375854, + "learning_rate": 4.710749102103643e-06, + "loss": 0.0094, + "step": 14690 + }, + { + "epoch": 0.09428123386556266, + "grad_norm": 0.7687262892723083, + "learning_rate": 4.713955874807594e-06, + "loss": 0.0148, + "step": 14700 + }, + { + "epoch": 0.09434537075934875, + "grad_norm": 1.0646096467971802, + "learning_rate": 4.717162647511544e-06, + "loss": 0.0146, + "step": 14710 + }, + { + "epoch": 0.09440950765313486, + "grad_norm": 0.5260186195373535, + "learning_rate": 4.7203694202154955e-06, + "loss": 0.0097, + "step": 14720 + }, + { + "epoch": 0.09447364454692095, + "grad_norm": 0.5607770681381226, + "learning_rate": 4.723576192919447e-06, + "loss": 0.0103, + "step": 14730 + }, + { + "epoch": 0.09453778144070704, + "grad_norm": 0.7789020538330078, + "learning_rate": 4.726782965623397e-06, + "loss": 0.0144, + "step": 14740 + }, + { + "epoch": 0.09460191833449315, + "grad_norm": 0.48097801208496094, + "learning_rate": 4.729989738327348e-06, + "loss": 0.0083, + "step": 14750 + }, + { + "epoch": 0.09466605522827924, + "grad_norm": 0.668645441532135, + "learning_rate": 4.733196511031298e-06, + "loss": 0.013, + "step": 14760 + }, + { + "epoch": 0.09473019212206533, + "grad_norm": 0.4790743291378021, + "learning_rate": 4.736403283735249e-06, + "loss": 0.0097, + "step": 14770 + }, + { + "epoch": 0.09479432901585143, + "grad_norm": 1.270419716835022, + "learning_rate": 4.7396100564392e-06, + "loss": 0.0091, + "step": 14780 + }, + { + "epoch": 0.09485846590963753, + "grad_norm": 0.5246705412864685, + "learning_rate": 4.742816829143151e-06, + "loss": 0.016, + "step": 14790 + }, + { + "epoch": 0.09492260280342363, + "grad_norm": 0.3491441607475281, + "learning_rate": 4.746023601847101e-06, + "loss": 0.013, + "step": 14800 + }, + { + "epoch": 0.09498673969720972, + "grad_norm": 0.49751850962638855, + "learning_rate": 4.749230374551052e-06, + "loss": 0.0086, + "step": 14810 + }, + { + "epoch": 0.09505087659099583, + "grad_norm": 0.7094282507896423, + "learning_rate": 4.752437147255003e-06, + "loss": 0.0166, + "step": 14820 + }, + { + "epoch": 0.09511501348478192, + "grad_norm": 0.5809402465820312, + "learning_rate": 4.7556439199589535e-06, + "loss": 0.0067, + "step": 14830 + }, + { + "epoch": 0.09517915037856801, + "grad_norm": 0.3063122034072876, + "learning_rate": 4.758850692662905e-06, + "loss": 0.0059, + "step": 14840 + }, + { + "epoch": 0.09524328727235411, + "grad_norm": 0.8257580399513245, + "learning_rate": 4.762057465366855e-06, + "loss": 0.008, + "step": 14850 + }, + { + "epoch": 0.09530742416614021, + "grad_norm": 0.5764210820198059, + "learning_rate": 4.765264238070806e-06, + "loss": 0.0099, + "step": 14860 + }, + { + "epoch": 0.09537156105992631, + "grad_norm": 0.4129992723464966, + "learning_rate": 4.768471010774756e-06, + "loss": 0.0087, + "step": 14870 + }, + { + "epoch": 0.0954356979537124, + "grad_norm": 0.4546699821949005, + "learning_rate": 4.771677783478707e-06, + "loss": 0.012, + "step": 14880 + }, + { + "epoch": 0.09549983484749851, + "grad_norm": 0.5095817446708679, + "learning_rate": 4.774884556182658e-06, + "loss": 0.014, + "step": 14890 + }, + { + "epoch": 0.0955639717412846, + "grad_norm": 0.6998573541641235, + "learning_rate": 4.778091328886609e-06, + "loss": 0.0123, + "step": 14900 + }, + { + "epoch": 0.0956281086350707, + "grad_norm": 0.30628302693367004, + "learning_rate": 4.78129810159056e-06, + "loss": 0.0096, + "step": 14910 + }, + { + "epoch": 0.09569224552885679, + "grad_norm": 0.42637142539024353, + "learning_rate": 4.78450487429451e-06, + "loss": 0.0073, + "step": 14920 + }, + { + "epoch": 0.0957563824226429, + "grad_norm": 0.4169028401374817, + "learning_rate": 4.787711646998461e-06, + "loss": 0.0083, + "step": 14930 + }, + { + "epoch": 0.09582051931642899, + "grad_norm": 0.6567114591598511, + "learning_rate": 4.7909184197024116e-06, + "loss": 0.0117, + "step": 14940 + }, + { + "epoch": 0.09588465621021508, + "grad_norm": 0.3499446213245392, + "learning_rate": 4.794125192406363e-06, + "loss": 0.0184, + "step": 14950 + }, + { + "epoch": 0.09594879310400117, + "grad_norm": 0.7126962542533875, + "learning_rate": 4.797331965110314e-06, + "loss": 0.0161, + "step": 14960 + }, + { + "epoch": 0.09601292999778728, + "grad_norm": 0.5603629350662231, + "learning_rate": 4.800538737814264e-06, + "loss": 0.0094, + "step": 14970 + }, + { + "epoch": 0.09607706689157337, + "grad_norm": 0.6502920389175415, + "learning_rate": 4.803745510518215e-06, + "loss": 0.0083, + "step": 14980 + }, + { + "epoch": 0.09614120378535947, + "grad_norm": 0.5913206338882446, + "learning_rate": 4.806952283222165e-06, + "loss": 0.0095, + "step": 14990 + }, + { + "epoch": 0.09620534067914557, + "grad_norm": 0.809626579284668, + "learning_rate": 4.8101590559261165e-06, + "loss": 0.0097, + "step": 15000 + }, + { + "epoch": 0.09626947757293167, + "grad_norm": 0.544957160949707, + "learning_rate": 4.813365828630067e-06, + "loss": 0.0137, + "step": 15010 + }, + { + "epoch": 0.09633361446671776, + "grad_norm": 0.7568247318267822, + "learning_rate": 4.816572601334018e-06, + "loss": 0.0137, + "step": 15020 + }, + { + "epoch": 0.09639775136050385, + "grad_norm": 0.6684417724609375, + "learning_rate": 4.819779374037968e-06, + "loss": 0.0097, + "step": 15030 + }, + { + "epoch": 0.09646188825428996, + "grad_norm": 0.2786485254764557, + "learning_rate": 4.822986146741919e-06, + "loss": 0.0089, + "step": 15040 + }, + { + "epoch": 0.09652602514807605, + "grad_norm": 0.5178800225257874, + "learning_rate": 4.82619291944587e-06, + "loss": 0.0108, + "step": 15050 + }, + { + "epoch": 0.09659016204186215, + "grad_norm": 0.27339088916778564, + "learning_rate": 4.829399692149821e-06, + "loss": 0.0088, + "step": 15060 + }, + { + "epoch": 0.09665429893564825, + "grad_norm": 0.39263832569122314, + "learning_rate": 4.832606464853772e-06, + "loss": 0.0081, + "step": 15070 + }, + { + "epoch": 0.09671843582943435, + "grad_norm": 0.7981494069099426, + "learning_rate": 4.835813237557722e-06, + "loss": 0.0115, + "step": 15080 + }, + { + "epoch": 0.09678257272322044, + "grad_norm": 0.8040106296539307, + "learning_rate": 4.839020010261673e-06, + "loss": 0.0087, + "step": 15090 + }, + { + "epoch": 0.09684670961700653, + "grad_norm": 0.39302074909210205, + "learning_rate": 4.842226782965624e-06, + "loss": 0.0134, + "step": 15100 + }, + { + "epoch": 0.09691084651079264, + "grad_norm": 0.26070883870124817, + "learning_rate": 4.8454335556695745e-06, + "loss": 0.011, + "step": 15110 + }, + { + "epoch": 0.09697498340457873, + "grad_norm": 0.4337320327758789, + "learning_rate": 4.848640328373525e-06, + "loss": 0.0092, + "step": 15120 + }, + { + "epoch": 0.09703912029836483, + "grad_norm": 0.7534663677215576, + "learning_rate": 4.851847101077476e-06, + "loss": 0.0131, + "step": 15130 + }, + { + "epoch": 0.09710325719215093, + "grad_norm": 0.891177237033844, + "learning_rate": 4.855053873781427e-06, + "loss": 0.0096, + "step": 15140 + }, + { + "epoch": 0.09716739408593703, + "grad_norm": 0.6958470344543457, + "learning_rate": 4.858260646485377e-06, + "loss": 0.0123, + "step": 15150 + }, + { + "epoch": 0.09723153097972312, + "grad_norm": 0.5443466305732727, + "learning_rate": 4.861467419189328e-06, + "loss": 0.0115, + "step": 15160 + }, + { + "epoch": 0.09729566787350921, + "grad_norm": 0.3978094160556793, + "learning_rate": 4.864674191893279e-06, + "loss": 0.01, + "step": 15170 + }, + { + "epoch": 0.09735980476729532, + "grad_norm": 0.6599048972129822, + "learning_rate": 4.86788096459723e-06, + "loss": 0.0092, + "step": 15180 + }, + { + "epoch": 0.09742394166108141, + "grad_norm": 0.490038126707077, + "learning_rate": 4.871087737301181e-06, + "loss": 0.0075, + "step": 15190 + }, + { + "epoch": 0.0974880785548675, + "grad_norm": 0.5247456431388855, + "learning_rate": 4.874294510005131e-06, + "loss": 0.0101, + "step": 15200 + }, + { + "epoch": 0.09755221544865361, + "grad_norm": 0.7498074769973755, + "learning_rate": 4.8775012827090814e-06, + "loss": 0.0076, + "step": 15210 + }, + { + "epoch": 0.0976163523424397, + "grad_norm": 0.5493730902671814, + "learning_rate": 4.8807080554130326e-06, + "loss": 0.0084, + "step": 15220 + }, + { + "epoch": 0.0976804892362258, + "grad_norm": 0.5536153316497803, + "learning_rate": 4.883914828116984e-06, + "loss": 0.0116, + "step": 15230 + }, + { + "epoch": 0.09774462613001189, + "grad_norm": 0.5220369100570679, + "learning_rate": 4.887121600820934e-06, + "loss": 0.0066, + "step": 15240 + }, + { + "epoch": 0.097808763023798, + "grad_norm": 0.8064947724342346, + "learning_rate": 4.890328373524885e-06, + "loss": 0.0128, + "step": 15250 + }, + { + "epoch": 0.09787289991758409, + "grad_norm": 0.42613735795021057, + "learning_rate": 4.893535146228835e-06, + "loss": 0.0115, + "step": 15260 + }, + { + "epoch": 0.09793703681137018, + "grad_norm": 0.5674329996109009, + "learning_rate": 4.8967419189327864e-06, + "loss": 0.0127, + "step": 15270 + }, + { + "epoch": 0.09800117370515629, + "grad_norm": 0.30216166377067566, + "learning_rate": 4.8999486916367375e-06, + "loss": 0.0109, + "step": 15280 + }, + { + "epoch": 0.09806531059894238, + "grad_norm": 0.6081557273864746, + "learning_rate": 4.903155464340688e-06, + "loss": 0.014, + "step": 15290 + }, + { + "epoch": 0.09812944749272848, + "grad_norm": 0.8605742454528809, + "learning_rate": 4.906362237044638e-06, + "loss": 0.0115, + "step": 15300 + }, + { + "epoch": 0.09819358438651457, + "grad_norm": 0.5217424631118774, + "learning_rate": 4.909569009748589e-06, + "loss": 0.0076, + "step": 15310 + }, + { + "epoch": 0.09825772128030068, + "grad_norm": 0.6119788885116577, + "learning_rate": 4.91277578245254e-06, + "loss": 0.0125, + "step": 15320 + }, + { + "epoch": 0.09832185817408677, + "grad_norm": 0.6533595323562622, + "learning_rate": 4.915982555156491e-06, + "loss": 0.0101, + "step": 15330 + }, + { + "epoch": 0.09838599506787286, + "grad_norm": 0.30561283230781555, + "learning_rate": 4.919189327860442e-06, + "loss": 0.0084, + "step": 15340 + }, + { + "epoch": 0.09845013196165897, + "grad_norm": 0.4871523678302765, + "learning_rate": 4.922396100564392e-06, + "loss": 0.0095, + "step": 15350 + }, + { + "epoch": 0.09851426885544506, + "grad_norm": 0.6592676043510437, + "learning_rate": 4.925602873268343e-06, + "loss": 0.0114, + "step": 15360 + }, + { + "epoch": 0.09857840574923116, + "grad_norm": 0.9625756144523621, + "learning_rate": 4.928809645972294e-06, + "loss": 0.0124, + "step": 15370 + }, + { + "epoch": 0.09864254264301725, + "grad_norm": 0.4479202628135681, + "learning_rate": 4.9320164186762444e-06, + "loss": 0.0118, + "step": 15380 + }, + { + "epoch": 0.09870667953680336, + "grad_norm": 0.7261309623718262, + "learning_rate": 4.935223191380195e-06, + "loss": 0.0095, + "step": 15390 + }, + { + "epoch": 0.09877081643058945, + "grad_norm": 0.31453683972358704, + "learning_rate": 4.938429964084146e-06, + "loss": 0.0104, + "step": 15400 + }, + { + "epoch": 0.09883495332437554, + "grad_norm": 0.833005428314209, + "learning_rate": 4.941636736788097e-06, + "loss": 0.0108, + "step": 15410 + }, + { + "epoch": 0.09889909021816165, + "grad_norm": 0.22341343760490417, + "learning_rate": 4.944843509492048e-06, + "loss": 0.0119, + "step": 15420 + }, + { + "epoch": 0.09896322711194774, + "grad_norm": 0.23929423093795776, + "learning_rate": 4.948050282195998e-06, + "loss": 0.0095, + "step": 15430 + }, + { + "epoch": 0.09902736400573384, + "grad_norm": 0.6590454578399658, + "learning_rate": 4.9512570548999486e-06, + "loss": 0.0076, + "step": 15440 + }, + { + "epoch": 0.09909150089951993, + "grad_norm": 0.7365688681602478, + "learning_rate": 4.9544638276039e-06, + "loss": 0.0091, + "step": 15450 + }, + { + "epoch": 0.09915563779330604, + "grad_norm": 0.48849421739578247, + "learning_rate": 4.957670600307851e-06, + "loss": 0.0084, + "step": 15460 + }, + { + "epoch": 0.09921977468709213, + "grad_norm": 1.1581774950027466, + "learning_rate": 4.960877373011802e-06, + "loss": 0.0113, + "step": 15470 + }, + { + "epoch": 0.09928391158087822, + "grad_norm": 0.6785595417022705, + "learning_rate": 4.964084145715752e-06, + "loss": 0.0077, + "step": 15480 + }, + { + "epoch": 0.09934804847466433, + "grad_norm": 0.6807507276535034, + "learning_rate": 4.9672909184197024e-06, + "loss": 0.0095, + "step": 15490 + }, + { + "epoch": 0.09941218536845042, + "grad_norm": 1.0348047018051147, + "learning_rate": 4.9704976911236536e-06, + "loss": 0.0127, + "step": 15500 + }, + { + "epoch": 0.09947632226223652, + "grad_norm": 0.5778086185455322, + "learning_rate": 4.973704463827605e-06, + "loss": 0.0101, + "step": 15510 + }, + { + "epoch": 0.09954045915602261, + "grad_norm": 0.6600560545921326, + "learning_rate": 4.976911236531555e-06, + "loss": 0.0091, + "step": 15520 + }, + { + "epoch": 0.09960459604980872, + "grad_norm": 0.6662725210189819, + "learning_rate": 4.980118009235505e-06, + "loss": 0.0128, + "step": 15530 + }, + { + "epoch": 0.09966873294359481, + "grad_norm": 0.3879687786102295, + "learning_rate": 4.983324781939456e-06, + "loss": 0.0094, + "step": 15540 + }, + { + "epoch": 0.0997328698373809, + "grad_norm": 0.3806883990764618, + "learning_rate": 4.9865315546434074e-06, + "loss": 0.0082, + "step": 15550 + }, + { + "epoch": 0.09979700673116701, + "grad_norm": 0.4219231903553009, + "learning_rate": 4.9897383273473585e-06, + "loss": 0.0081, + "step": 15560 + }, + { + "epoch": 0.0998611436249531, + "grad_norm": 0.428092896938324, + "learning_rate": 4.992945100051309e-06, + "loss": 0.0097, + "step": 15570 + }, + { + "epoch": 0.0999252805187392, + "grad_norm": 0.8046025633811951, + "learning_rate": 4.996151872755259e-06, + "loss": 0.0121, + "step": 15580 + }, + { + "epoch": 0.09998941741252529, + "grad_norm": 0.5787171125411987, + "learning_rate": 4.99935864545921e-06, + "loss": 0.0104, + "step": 15590 + }, + { + "epoch": 0.1000535543063114, + "grad_norm": 0.3348299264907837, + "learning_rate": 5.002565418163161e-06, + "loss": 0.0102, + "step": 15600 + }, + { + "epoch": 0.10011769120009749, + "grad_norm": 0.3324357569217682, + "learning_rate": 5.0057721908671116e-06, + "loss": 0.0061, + "step": 15610 + }, + { + "epoch": 0.10018182809388358, + "grad_norm": 0.9565490484237671, + "learning_rate": 5.008978963571063e-06, + "loss": 0.0176, + "step": 15620 + }, + { + "epoch": 0.10024596498766969, + "grad_norm": 0.6187155842781067, + "learning_rate": 5.012185736275014e-06, + "loss": 0.0103, + "step": 15630 + }, + { + "epoch": 0.10031010188145578, + "grad_norm": 0.6529510617256165, + "learning_rate": 5.015392508978963e-06, + "loss": 0.0067, + "step": 15640 + }, + { + "epoch": 0.10037423877524188, + "grad_norm": 0.6332399845123291, + "learning_rate": 5.018599281682914e-06, + "loss": 0.0108, + "step": 15650 + }, + { + "epoch": 0.10043837566902797, + "grad_norm": 0.5133268237113953, + "learning_rate": 5.0218060543868654e-06, + "loss": 0.0089, + "step": 15660 + }, + { + "epoch": 0.10050251256281408, + "grad_norm": 0.34205323457717896, + "learning_rate": 5.025012827090816e-06, + "loss": 0.0112, + "step": 15670 + }, + { + "epoch": 0.10056664945660017, + "grad_norm": 0.6201695203781128, + "learning_rate": 5.028219599794767e-06, + "loss": 0.0108, + "step": 15680 + }, + { + "epoch": 0.10063078635038626, + "grad_norm": 0.30734115839004517, + "learning_rate": 5.031426372498718e-06, + "loss": 0.0176, + "step": 15690 + }, + { + "epoch": 0.10069492324417235, + "grad_norm": 0.5728802680969238, + "learning_rate": 5.034633145202669e-06, + "loss": 0.0128, + "step": 15700 + }, + { + "epoch": 0.10075906013795846, + "grad_norm": 0.5966327786445618, + "learning_rate": 5.037839917906619e-06, + "loss": 0.0142, + "step": 15710 + }, + { + "epoch": 0.10082319703174455, + "grad_norm": 0.3857520818710327, + "learning_rate": 5.04104669061057e-06, + "loss": 0.007, + "step": 15720 + }, + { + "epoch": 0.10088733392553065, + "grad_norm": 0.8242173790931702, + "learning_rate": 5.04425346331452e-06, + "loss": 0.0073, + "step": 15730 + }, + { + "epoch": 0.10095147081931675, + "grad_norm": 0.3519569933414459, + "learning_rate": 5.047460236018471e-06, + "loss": 0.0085, + "step": 15740 + }, + { + "epoch": 0.10101560771310285, + "grad_norm": 0.5322737097740173, + "learning_rate": 5.050667008722422e-06, + "loss": 0.0097, + "step": 15750 + }, + { + "epoch": 0.10107974460688894, + "grad_norm": 0.6807852387428284, + "learning_rate": 5.053873781426372e-06, + "loss": 0.0127, + "step": 15760 + }, + { + "epoch": 0.10114388150067503, + "grad_norm": 0.7069212198257446, + "learning_rate": 5.0570805541303234e-06, + "loss": 0.0091, + "step": 15770 + }, + { + "epoch": 0.10120801839446114, + "grad_norm": 0.6726041436195374, + "learning_rate": 5.0602873268342746e-06, + "loss": 0.0104, + "step": 15780 + }, + { + "epoch": 0.10127215528824723, + "grad_norm": 0.5097774267196655, + "learning_rate": 5.063494099538226e-06, + "loss": 0.0128, + "step": 15790 + }, + { + "epoch": 0.10133629218203333, + "grad_norm": 0.5260890126228333, + "learning_rate": 5.066700872242176e-06, + "loss": 0.0062, + "step": 15800 + }, + { + "epoch": 0.10140042907581943, + "grad_norm": 0.41690969467163086, + "learning_rate": 5.069907644946127e-06, + "loss": 0.0102, + "step": 15810 + }, + { + "epoch": 0.10146456596960553, + "grad_norm": 0.41804683208465576, + "learning_rate": 5.0731144176500765e-06, + "loss": 0.0134, + "step": 15820 + }, + { + "epoch": 0.10152870286339162, + "grad_norm": 0.3769904375076294, + "learning_rate": 5.076321190354028e-06, + "loss": 0.0101, + "step": 15830 + }, + { + "epoch": 0.10159283975717771, + "grad_norm": 1.0027804374694824, + "learning_rate": 5.079527963057979e-06, + "loss": 0.0114, + "step": 15840 + }, + { + "epoch": 0.10165697665096382, + "grad_norm": 0.556800365447998, + "learning_rate": 5.08273473576193e-06, + "loss": 0.0086, + "step": 15850 + }, + { + "epoch": 0.10172111354474991, + "grad_norm": 0.4666617512702942, + "learning_rate": 5.08594150846588e-06, + "loss": 0.0112, + "step": 15860 + }, + { + "epoch": 0.101785250438536, + "grad_norm": 0.542410135269165, + "learning_rate": 5.089148281169831e-06, + "loss": 0.007, + "step": 15870 + }, + { + "epoch": 0.10184938733232211, + "grad_norm": 0.6153366565704346, + "learning_rate": 5.092355053873782e-06, + "loss": 0.0073, + "step": 15880 + }, + { + "epoch": 0.1019135242261082, + "grad_norm": 0.44405901432037354, + "learning_rate": 5.0955618265777326e-06, + "loss": 0.007, + "step": 15890 + }, + { + "epoch": 0.1019776611198943, + "grad_norm": 0.3870573937892914, + "learning_rate": 5.098768599281684e-06, + "loss": 0.0132, + "step": 15900 + }, + { + "epoch": 0.1020417980136804, + "grad_norm": 0.5652061104774475, + "learning_rate": 5.101975371985634e-06, + "loss": 0.0105, + "step": 15910 + }, + { + "epoch": 0.1021059349074665, + "grad_norm": 0.8508217930793762, + "learning_rate": 5.105182144689584e-06, + "loss": 0.0135, + "step": 15920 + }, + { + "epoch": 0.1021700718012526, + "grad_norm": 0.3459599018096924, + "learning_rate": 5.108388917393535e-06, + "loss": 0.0086, + "step": 15930 + }, + { + "epoch": 0.10223420869503869, + "grad_norm": 0.5796306133270264, + "learning_rate": 5.1115956900974864e-06, + "loss": 0.0129, + "step": 15940 + }, + { + "epoch": 0.1022983455888248, + "grad_norm": 0.5738620162010193, + "learning_rate": 5.114802462801437e-06, + "loss": 0.0147, + "step": 15950 + }, + { + "epoch": 0.10236248248261089, + "grad_norm": 0.3462698459625244, + "learning_rate": 5.118009235505388e-06, + "loss": 0.0112, + "step": 15960 + }, + { + "epoch": 0.10242661937639698, + "grad_norm": 0.519221305847168, + "learning_rate": 5.121216008209339e-06, + "loss": 0.008, + "step": 15970 + }, + { + "epoch": 0.10249075627018307, + "grad_norm": 0.42029598355293274, + "learning_rate": 5.124422780913289e-06, + "loss": 0.0092, + "step": 15980 + }, + { + "epoch": 0.10255489316396918, + "grad_norm": 0.4115634262561798, + "learning_rate": 5.12762955361724e-06, + "loss": 0.0066, + "step": 15990 + }, + { + "epoch": 0.10261903005775527, + "grad_norm": 0.9606252908706665, + "learning_rate": 5.130836326321191e-06, + "loss": 0.0098, + "step": 16000 + }, + { + "epoch": 0.10268316695154137, + "grad_norm": 0.40013089776039124, + "learning_rate": 5.134043099025141e-06, + "loss": 0.0131, + "step": 16010 + }, + { + "epoch": 0.10274730384532747, + "grad_norm": 0.724273681640625, + "learning_rate": 5.137249871729092e-06, + "loss": 0.0101, + "step": 16020 + }, + { + "epoch": 0.10281144073911357, + "grad_norm": 0.4710618853569031, + "learning_rate": 5.140456644433043e-06, + "loss": 0.0125, + "step": 16030 + }, + { + "epoch": 0.10287557763289966, + "grad_norm": 0.40870100259780884, + "learning_rate": 5.143663417136993e-06, + "loss": 0.0092, + "step": 16040 + }, + { + "epoch": 0.10293971452668575, + "grad_norm": 0.640579342842102, + "learning_rate": 5.1468701898409444e-06, + "loss": 0.0112, + "step": 16050 + }, + { + "epoch": 0.10300385142047186, + "grad_norm": 0.5337666273117065, + "learning_rate": 5.1500769625448956e-06, + "loss": 0.0107, + "step": 16060 + }, + { + "epoch": 0.10306798831425795, + "grad_norm": 0.42939725518226624, + "learning_rate": 5.153283735248847e-06, + "loss": 0.011, + "step": 16070 + }, + { + "epoch": 0.10313212520804405, + "grad_norm": 0.532259464263916, + "learning_rate": 5.156490507952797e-06, + "loss": 0.0129, + "step": 16080 + }, + { + "epoch": 0.10319626210183015, + "grad_norm": 0.5188320279121399, + "learning_rate": 5.159697280656748e-06, + "loss": 0.0092, + "step": 16090 + }, + { + "epoch": 0.10326039899561625, + "grad_norm": 0.5226402282714844, + "learning_rate": 5.1629040533606975e-06, + "loss": 0.0085, + "step": 16100 + }, + { + "epoch": 0.10332453588940234, + "grad_norm": 0.45598104596138, + "learning_rate": 5.166110826064649e-06, + "loss": 0.0106, + "step": 16110 + }, + { + "epoch": 0.10338867278318843, + "grad_norm": 0.3412771224975586, + "learning_rate": 5.1693175987686e-06, + "loss": 0.0109, + "step": 16120 + }, + { + "epoch": 0.10345280967697454, + "grad_norm": 0.22648046910762787, + "learning_rate": 5.17252437147255e-06, + "loss": 0.0098, + "step": 16130 + }, + { + "epoch": 0.10351694657076063, + "grad_norm": 0.5391767024993896, + "learning_rate": 5.175731144176501e-06, + "loss": 0.0084, + "step": 16140 + }, + { + "epoch": 0.10358108346454672, + "grad_norm": 0.7075507640838623, + "learning_rate": 5.178937916880452e-06, + "loss": 0.0132, + "step": 16150 + }, + { + "epoch": 0.10364522035833283, + "grad_norm": 0.6682233214378357, + "learning_rate": 5.182144689584403e-06, + "loss": 0.0115, + "step": 16160 + }, + { + "epoch": 0.10370935725211892, + "grad_norm": 0.4709302484989166, + "learning_rate": 5.1853514622883536e-06, + "loss": 0.0093, + "step": 16170 + }, + { + "epoch": 0.10377349414590502, + "grad_norm": 0.4433741867542267, + "learning_rate": 5.188558234992305e-06, + "loss": 0.0088, + "step": 16180 + }, + { + "epoch": 0.10383763103969111, + "grad_norm": 0.6409521102905273, + "learning_rate": 5.191765007696254e-06, + "loss": 0.0117, + "step": 16190 + }, + { + "epoch": 0.10390176793347722, + "grad_norm": 0.639723539352417, + "learning_rate": 5.194971780400205e-06, + "loss": 0.0115, + "step": 16200 + }, + { + "epoch": 0.10396590482726331, + "grad_norm": 0.46562427282333374, + "learning_rate": 5.198178553104156e-06, + "loss": 0.0077, + "step": 16210 + }, + { + "epoch": 0.1040300417210494, + "grad_norm": 0.44589903950691223, + "learning_rate": 5.2013853258081074e-06, + "loss": 0.0073, + "step": 16220 + }, + { + "epoch": 0.10409417861483551, + "grad_norm": 0.5284266471862793, + "learning_rate": 5.204592098512058e-06, + "loss": 0.0109, + "step": 16230 + }, + { + "epoch": 0.1041583155086216, + "grad_norm": 0.4344564974308014, + "learning_rate": 5.207798871216009e-06, + "loss": 0.0144, + "step": 16240 + }, + { + "epoch": 0.1042224524024077, + "grad_norm": 0.5707141757011414, + "learning_rate": 5.21100564391996e-06, + "loss": 0.0086, + "step": 16250 + }, + { + "epoch": 0.10428658929619379, + "grad_norm": 0.43890857696533203, + "learning_rate": 5.21421241662391e-06, + "loss": 0.0098, + "step": 16260 + }, + { + "epoch": 0.1043507261899799, + "grad_norm": 0.2899274528026581, + "learning_rate": 5.217419189327861e-06, + "loss": 0.0078, + "step": 16270 + }, + { + "epoch": 0.10441486308376599, + "grad_norm": 0.5155450701713562, + "learning_rate": 5.220625962031812e-06, + "loss": 0.0104, + "step": 16280 + }, + { + "epoch": 0.10447899997755208, + "grad_norm": 0.3467402756214142, + "learning_rate": 5.223832734735762e-06, + "loss": 0.0078, + "step": 16290 + }, + { + "epoch": 0.10454313687133819, + "grad_norm": 0.389515221118927, + "learning_rate": 5.227039507439713e-06, + "loss": 0.0126, + "step": 16300 + }, + { + "epoch": 0.10460727376512428, + "grad_norm": 0.8770573735237122, + "learning_rate": 5.230246280143664e-06, + "loss": 0.0102, + "step": 16310 + }, + { + "epoch": 0.10467141065891038, + "grad_norm": 0.3336693048477173, + "learning_rate": 5.233453052847614e-06, + "loss": 0.0102, + "step": 16320 + }, + { + "epoch": 0.10473554755269647, + "grad_norm": 0.46077433228492737, + "learning_rate": 5.2366598255515655e-06, + "loss": 0.0101, + "step": 16330 + }, + { + "epoch": 0.10479968444648258, + "grad_norm": 0.29403117299079895, + "learning_rate": 5.2398665982555166e-06, + "loss": 0.0131, + "step": 16340 + }, + { + "epoch": 0.10486382134026867, + "grad_norm": 0.600862443447113, + "learning_rate": 5.243073370959467e-06, + "loss": 0.0126, + "step": 16350 + }, + { + "epoch": 0.10492795823405476, + "grad_norm": 0.3670068383216858, + "learning_rate": 5.246280143663418e-06, + "loss": 0.0077, + "step": 16360 + }, + { + "epoch": 0.10499209512784087, + "grad_norm": 0.5938912034034729, + "learning_rate": 5.249486916367368e-06, + "loss": 0.0082, + "step": 16370 + }, + { + "epoch": 0.10505623202162696, + "grad_norm": 0.5170342326164246, + "learning_rate": 5.2526936890713185e-06, + "loss": 0.0091, + "step": 16380 + }, + { + "epoch": 0.10512036891541306, + "grad_norm": 0.534372866153717, + "learning_rate": 5.25590046177527e-06, + "loss": 0.0106, + "step": 16390 + }, + { + "epoch": 0.10518450580919915, + "grad_norm": 0.49109458923339844, + "learning_rate": 5.259107234479221e-06, + "loss": 0.0087, + "step": 16400 + }, + { + "epoch": 0.10524864270298526, + "grad_norm": 0.6332600712776184, + "learning_rate": 5.262314007183171e-06, + "loss": 0.0104, + "step": 16410 + }, + { + "epoch": 0.10531277959677135, + "grad_norm": 0.2460511326789856, + "learning_rate": 5.265520779887122e-06, + "loss": 0.0065, + "step": 16420 + }, + { + "epoch": 0.10537691649055744, + "grad_norm": 0.3532591164112091, + "learning_rate": 5.268727552591073e-06, + "loss": 0.0121, + "step": 16430 + }, + { + "epoch": 0.10544105338434354, + "grad_norm": 0.3050209879875183, + "learning_rate": 5.271934325295024e-06, + "loss": 0.0341, + "step": 16440 + }, + { + "epoch": 0.10550519027812964, + "grad_norm": 0.44340020418167114, + "learning_rate": 5.2751410979989746e-06, + "loss": 0.0117, + "step": 16450 + }, + { + "epoch": 0.10556932717191574, + "grad_norm": 0.5648112297058105, + "learning_rate": 5.278347870702925e-06, + "loss": 0.0082, + "step": 16460 + }, + { + "epoch": 0.10563346406570183, + "grad_norm": 1.0201709270477295, + "learning_rate": 5.281554643406875e-06, + "loss": 0.0091, + "step": 16470 + }, + { + "epoch": 0.10569760095948794, + "grad_norm": 0.5138731002807617, + "learning_rate": 5.284761416110826e-06, + "loss": 0.0093, + "step": 16480 + }, + { + "epoch": 0.10576173785327403, + "grad_norm": 0.3440841734409332, + "learning_rate": 5.287968188814777e-06, + "loss": 0.0114, + "step": 16490 + }, + { + "epoch": 0.10582587474706012, + "grad_norm": 0.37862738966941833, + "learning_rate": 5.291174961518728e-06, + "loss": 0.0092, + "step": 16500 + }, + { + "epoch": 0.10589001164084622, + "grad_norm": 0.7785543203353882, + "learning_rate": 5.294381734222679e-06, + "loss": 0.0078, + "step": 16510 + }, + { + "epoch": 0.10595414853463232, + "grad_norm": 0.5919600129127502, + "learning_rate": 5.29758850692663e-06, + "loss": 0.0093, + "step": 16520 + }, + { + "epoch": 0.10601828542841842, + "grad_norm": 0.4332359731197357, + "learning_rate": 5.300795279630581e-06, + "loss": 0.0105, + "step": 16530 + }, + { + "epoch": 0.10608242232220451, + "grad_norm": 0.5844279527664185, + "learning_rate": 5.304002052334531e-06, + "loss": 0.011, + "step": 16540 + }, + { + "epoch": 0.10614655921599062, + "grad_norm": 0.7590405941009521, + "learning_rate": 5.3072088250384815e-06, + "loss": 0.0064, + "step": 16550 + }, + { + "epoch": 0.10621069610977671, + "grad_norm": 0.4074823260307312, + "learning_rate": 5.310415597742432e-06, + "loss": 0.0125, + "step": 16560 + }, + { + "epoch": 0.1062748330035628, + "grad_norm": 0.6540939807891846, + "learning_rate": 5.313622370446383e-06, + "loss": 0.0121, + "step": 16570 + }, + { + "epoch": 0.1063389698973489, + "grad_norm": 0.8707407116889954, + "learning_rate": 5.316829143150334e-06, + "loss": 0.0082, + "step": 16580 + }, + { + "epoch": 0.106403106791135, + "grad_norm": 0.6418952345848083, + "learning_rate": 5.320035915854285e-06, + "loss": 0.0131, + "step": 16590 + }, + { + "epoch": 0.1064672436849211, + "grad_norm": 0.5803223848342896, + "learning_rate": 5.323242688558235e-06, + "loss": 0.0094, + "step": 16600 + }, + { + "epoch": 0.10653138057870719, + "grad_norm": 0.6605579257011414, + "learning_rate": 5.3264494612621865e-06, + "loss": 0.0086, + "step": 16610 + }, + { + "epoch": 0.1065955174724933, + "grad_norm": 0.6360081434249878, + "learning_rate": 5.3296562339661376e-06, + "loss": 0.012, + "step": 16620 + }, + { + "epoch": 0.10665965436627939, + "grad_norm": 0.5654777884483337, + "learning_rate": 5.332863006670088e-06, + "loss": 0.0117, + "step": 16630 + }, + { + "epoch": 0.10672379126006548, + "grad_norm": 0.6596087217330933, + "learning_rate": 5.336069779374038e-06, + "loss": 0.012, + "step": 16640 + }, + { + "epoch": 0.10678792815385157, + "grad_norm": 0.7838721871376038, + "learning_rate": 5.339276552077988e-06, + "loss": 0.0157, + "step": 16650 + }, + { + "epoch": 0.10685206504763768, + "grad_norm": 0.420419305562973, + "learning_rate": 5.3424833247819395e-06, + "loss": 0.009, + "step": 16660 + }, + { + "epoch": 0.10691620194142377, + "grad_norm": 0.40264374017715454, + "learning_rate": 5.345690097485891e-06, + "loss": 0.0111, + "step": 16670 + }, + { + "epoch": 0.10698033883520987, + "grad_norm": 0.32008737325668335, + "learning_rate": 5.348896870189842e-06, + "loss": 0.0062, + "step": 16680 + }, + { + "epoch": 0.10704447572899597, + "grad_norm": 0.6018548011779785, + "learning_rate": 5.352103642893792e-06, + "loss": 0.0091, + "step": 16690 + }, + { + "epoch": 0.10710861262278207, + "grad_norm": 0.3959011733531952, + "learning_rate": 5.355310415597743e-06, + "loss": 0.0077, + "step": 16700 + }, + { + "epoch": 0.10717274951656816, + "grad_norm": 0.6982358694076538, + "learning_rate": 5.358517188301694e-06, + "loss": 0.0115, + "step": 16710 + }, + { + "epoch": 0.10723688641035425, + "grad_norm": 0.34304675459861755, + "learning_rate": 5.3617239610056445e-06, + "loss": 0.0108, + "step": 16720 + }, + { + "epoch": 0.10730102330414036, + "grad_norm": 0.67317795753479, + "learning_rate": 5.364930733709595e-06, + "loss": 0.0123, + "step": 16730 + }, + { + "epoch": 0.10736516019792645, + "grad_norm": 0.4527972340583801, + "learning_rate": 5.368137506413546e-06, + "loss": 0.0096, + "step": 16740 + }, + { + "epoch": 0.10742929709171255, + "grad_norm": 0.8204785585403442, + "learning_rate": 5.371344279117496e-06, + "loss": 0.01, + "step": 16750 + }, + { + "epoch": 0.10749343398549865, + "grad_norm": 0.1776057481765747, + "learning_rate": 5.374551051821447e-06, + "loss": 0.0073, + "step": 16760 + }, + { + "epoch": 0.10755757087928475, + "grad_norm": 0.36770233511924744, + "learning_rate": 5.377757824525398e-06, + "loss": 0.0095, + "step": 16770 + }, + { + "epoch": 0.10762170777307084, + "grad_norm": 0.4649524688720703, + "learning_rate": 5.380964597229349e-06, + "loss": 0.0077, + "step": 16780 + }, + { + "epoch": 0.10768584466685693, + "grad_norm": 0.4981425702571869, + "learning_rate": 5.3841713699333e-06, + "loss": 0.0089, + "step": 16790 + }, + { + "epoch": 0.10774998156064304, + "grad_norm": 0.5665914416313171, + "learning_rate": 5.387378142637251e-06, + "loss": 0.011, + "step": 16800 + }, + { + "epoch": 0.10781411845442913, + "grad_norm": 0.6016936302185059, + "learning_rate": 5.390584915341202e-06, + "loss": 0.0091, + "step": 16810 + }, + { + "epoch": 0.10787825534821523, + "grad_norm": 0.3479364812374115, + "learning_rate": 5.393791688045151e-06, + "loss": 0.009, + "step": 16820 + }, + { + "epoch": 0.10794239224200133, + "grad_norm": 0.40164706110954285, + "learning_rate": 5.3969984607491025e-06, + "loss": 0.0101, + "step": 16830 + }, + { + "epoch": 0.10800652913578743, + "grad_norm": 0.24736908078193665, + "learning_rate": 5.400205233453053e-06, + "loss": 0.0087, + "step": 16840 + }, + { + "epoch": 0.10807066602957352, + "grad_norm": 0.5277517437934875, + "learning_rate": 5.403412006157004e-06, + "loss": 0.0099, + "step": 16850 + }, + { + "epoch": 0.10813480292335961, + "grad_norm": 0.5787237882614136, + "learning_rate": 5.406618778860955e-06, + "loss": 0.0118, + "step": 16860 + }, + { + "epoch": 0.10819893981714572, + "grad_norm": 0.6116265654563904, + "learning_rate": 5.409825551564905e-06, + "loss": 0.0116, + "step": 16870 + }, + { + "epoch": 0.10826307671093181, + "grad_norm": 0.5526065826416016, + "learning_rate": 5.413032324268856e-06, + "loss": 0.0083, + "step": 16880 + }, + { + "epoch": 0.1083272136047179, + "grad_norm": 0.5182403922080994, + "learning_rate": 5.4162390969728075e-06, + "loss": 0.012, + "step": 16890 + }, + { + "epoch": 0.10839135049850401, + "grad_norm": 0.5181537866592407, + "learning_rate": 5.4194458696767586e-06, + "loss": 0.0089, + "step": 16900 + }, + { + "epoch": 0.1084554873922901, + "grad_norm": 0.6523613333702087, + "learning_rate": 5.422652642380708e-06, + "loss": 0.013, + "step": 16910 + }, + { + "epoch": 0.1085196242860762, + "grad_norm": 0.12294157594442368, + "learning_rate": 5.425859415084659e-06, + "loss": 0.0081, + "step": 16920 + }, + { + "epoch": 0.10858376117986229, + "grad_norm": 0.5861759781837463, + "learning_rate": 5.429066187788609e-06, + "loss": 0.0073, + "step": 16930 + }, + { + "epoch": 0.1086478980736484, + "grad_norm": 0.5482618808746338, + "learning_rate": 5.4322729604925605e-06, + "loss": 0.0097, + "step": 16940 + }, + { + "epoch": 0.10871203496743449, + "grad_norm": 0.5014786124229431, + "learning_rate": 5.435479733196512e-06, + "loss": 0.0126, + "step": 16950 + }, + { + "epoch": 0.10877617186122059, + "grad_norm": 0.31347060203552246, + "learning_rate": 5.438686505900463e-06, + "loss": 0.0136, + "step": 16960 + }, + { + "epoch": 0.10884030875500669, + "grad_norm": 0.5638201832771301, + "learning_rate": 5.441893278604413e-06, + "loss": 0.0088, + "step": 16970 + }, + { + "epoch": 0.10890444564879279, + "grad_norm": 1.0175175666809082, + "learning_rate": 5.445100051308364e-06, + "loss": 0.0106, + "step": 16980 + }, + { + "epoch": 0.10896858254257888, + "grad_norm": 0.6966453194618225, + "learning_rate": 5.448306824012315e-06, + "loss": 0.0131, + "step": 16990 + }, + { + "epoch": 0.10903271943636497, + "grad_norm": 0.4546926021575928, + "learning_rate": 5.451513596716265e-06, + "loss": 0.0073, + "step": 17000 + }, + { + "epoch": 0.10909685633015108, + "grad_norm": 0.4377993941307068, + "learning_rate": 5.454720369420216e-06, + "loss": 0.0167, + "step": 17010 + }, + { + "epoch": 0.10916099322393717, + "grad_norm": 1.2318954467773438, + "learning_rate": 5.457927142124166e-06, + "loss": 0.0159, + "step": 17020 + }, + { + "epoch": 0.10922513011772327, + "grad_norm": 0.6596897840499878, + "learning_rate": 5.461133914828117e-06, + "loss": 0.0073, + "step": 17030 + }, + { + "epoch": 0.10928926701150937, + "grad_norm": 0.694681704044342, + "learning_rate": 5.464340687532068e-06, + "loss": 0.011, + "step": 17040 + }, + { + "epoch": 0.10935340390529547, + "grad_norm": 0.24846361577510834, + "learning_rate": 5.467547460236019e-06, + "loss": 0.0084, + "step": 17050 + }, + { + "epoch": 0.10941754079908156, + "grad_norm": 0.630030632019043, + "learning_rate": 5.47075423293997e-06, + "loss": 0.0076, + "step": 17060 + }, + { + "epoch": 0.10948167769286765, + "grad_norm": 0.8209272027015686, + "learning_rate": 5.473961005643921e-06, + "loss": 0.0131, + "step": 17070 + }, + { + "epoch": 0.10954581458665376, + "grad_norm": 0.5075275897979736, + "learning_rate": 5.477167778347872e-06, + "loss": 0.0107, + "step": 17080 + }, + { + "epoch": 0.10960995148043985, + "grad_norm": 0.5664962530136108, + "learning_rate": 5.480374551051821e-06, + "loss": 0.0128, + "step": 17090 + }, + { + "epoch": 0.10967408837422594, + "grad_norm": 0.5662223100662231, + "learning_rate": 5.483581323755772e-06, + "loss": 0.0081, + "step": 17100 + }, + { + "epoch": 0.10973822526801205, + "grad_norm": 0.5928486585617065, + "learning_rate": 5.4867880964597235e-06, + "loss": 0.0114, + "step": 17110 + }, + { + "epoch": 0.10980236216179814, + "grad_norm": 0.4811129868030548, + "learning_rate": 5.489994869163674e-06, + "loss": 0.0074, + "step": 17120 + }, + { + "epoch": 0.10986649905558424, + "grad_norm": 0.3544856607913971, + "learning_rate": 5.493201641867625e-06, + "loss": 0.0088, + "step": 17130 + }, + { + "epoch": 0.10993063594937033, + "grad_norm": 0.45926907658576965, + "learning_rate": 5.496408414571576e-06, + "loss": 0.0093, + "step": 17140 + }, + { + "epoch": 0.10999477284315644, + "grad_norm": 0.6390130519866943, + "learning_rate": 5.499615187275526e-06, + "loss": 0.0061, + "step": 17150 + }, + { + "epoch": 0.11005890973694253, + "grad_norm": 0.7102174758911133, + "learning_rate": 5.502821959979477e-06, + "loss": 0.0121, + "step": 17160 + }, + { + "epoch": 0.11012304663072862, + "grad_norm": 0.5696839094161987, + "learning_rate": 5.5060287326834285e-06, + "loss": 0.0081, + "step": 17170 + }, + { + "epoch": 0.11018718352451472, + "grad_norm": 0.4979040026664734, + "learning_rate": 5.509235505387378e-06, + "loss": 0.0139, + "step": 17180 + }, + { + "epoch": 0.11025132041830082, + "grad_norm": 0.4420786201953888, + "learning_rate": 5.512442278091329e-06, + "loss": 0.0124, + "step": 17190 + }, + { + "epoch": 0.11031545731208692, + "grad_norm": 0.3444763422012329, + "learning_rate": 5.51564905079528e-06, + "loss": 0.0082, + "step": 17200 + }, + { + "epoch": 0.11037959420587301, + "grad_norm": 0.638555645942688, + "learning_rate": 5.51885582349923e-06, + "loss": 0.0126, + "step": 17210 + }, + { + "epoch": 0.11044373109965912, + "grad_norm": 0.5583418607711792, + "learning_rate": 5.5220625962031815e-06, + "loss": 0.0094, + "step": 17220 + }, + { + "epoch": 0.11050786799344521, + "grad_norm": 0.8487377166748047, + "learning_rate": 5.525269368907133e-06, + "loss": 0.0128, + "step": 17230 + }, + { + "epoch": 0.1105720048872313, + "grad_norm": 0.3511020541191101, + "learning_rate": 5.528476141611083e-06, + "loss": 0.0081, + "step": 17240 + }, + { + "epoch": 0.1106361417810174, + "grad_norm": 0.4752103388309479, + "learning_rate": 5.531682914315034e-06, + "loss": 0.0121, + "step": 17250 + }, + { + "epoch": 0.1107002786748035, + "grad_norm": 0.2458493560552597, + "learning_rate": 5.534889687018985e-06, + "loss": 0.0089, + "step": 17260 + }, + { + "epoch": 0.1107644155685896, + "grad_norm": 0.2905209958553314, + "learning_rate": 5.5380964597229345e-06, + "loss": 0.0097, + "step": 17270 + }, + { + "epoch": 0.11082855246237569, + "grad_norm": 0.46376416087150574, + "learning_rate": 5.541303232426886e-06, + "loss": 0.0097, + "step": 17280 + }, + { + "epoch": 0.1108926893561618, + "grad_norm": 0.3083602786064148, + "learning_rate": 5.544510005130837e-06, + "loss": 0.0076, + "step": 17290 + }, + { + "epoch": 0.11095682624994789, + "grad_norm": 0.7128134965896606, + "learning_rate": 5.547716777834787e-06, + "loss": 0.0126, + "step": 17300 + }, + { + "epoch": 0.11102096314373398, + "grad_norm": 0.772965133190155, + "learning_rate": 5.550923550538738e-06, + "loss": 0.0113, + "step": 17310 + }, + { + "epoch": 0.11108510003752008, + "grad_norm": 0.5096204280853271, + "learning_rate": 5.554130323242689e-06, + "loss": 0.0109, + "step": 17320 + }, + { + "epoch": 0.11114923693130618, + "grad_norm": 0.5388255715370178, + "learning_rate": 5.55733709594664e-06, + "loss": 0.0085, + "step": 17330 + }, + { + "epoch": 0.11121337382509228, + "grad_norm": 0.7835232019424438, + "learning_rate": 5.560543868650591e-06, + "loss": 0.009, + "step": 17340 + }, + { + "epoch": 0.11127751071887837, + "grad_norm": 0.38351520895957947, + "learning_rate": 5.563750641354542e-06, + "loss": 0.0086, + "step": 17350 + }, + { + "epoch": 0.11134164761266448, + "grad_norm": 0.4709744155406952, + "learning_rate": 5.566957414058491e-06, + "loss": 0.009, + "step": 17360 + }, + { + "epoch": 0.11140578450645057, + "grad_norm": 0.46643105149269104, + "learning_rate": 5.570164186762442e-06, + "loss": 0.0091, + "step": 17370 + }, + { + "epoch": 0.11146992140023666, + "grad_norm": 0.4783753454685211, + "learning_rate": 5.573370959466393e-06, + "loss": 0.0061, + "step": 17380 + }, + { + "epoch": 0.11153405829402276, + "grad_norm": 0.2818965017795563, + "learning_rate": 5.576577732170344e-06, + "loss": 0.0101, + "step": 17390 + }, + { + "epoch": 0.11159819518780886, + "grad_norm": 0.3929568827152252, + "learning_rate": 5.579784504874295e-06, + "loss": 0.0058, + "step": 17400 + }, + { + "epoch": 0.11166233208159496, + "grad_norm": 0.3165569007396698, + "learning_rate": 5.582991277578246e-06, + "loss": 0.0107, + "step": 17410 + }, + { + "epoch": 0.11172646897538105, + "grad_norm": 0.37585151195526123, + "learning_rate": 5.586198050282197e-06, + "loss": 0.0096, + "step": 17420 + }, + { + "epoch": 0.11179060586916716, + "grad_norm": 0.3941322863101959, + "learning_rate": 5.589404822986147e-06, + "loss": 0.01, + "step": 17430 + }, + { + "epoch": 0.11185474276295325, + "grad_norm": 0.38753095269203186, + "learning_rate": 5.592611595690098e-06, + "loss": 0.0072, + "step": 17440 + }, + { + "epoch": 0.11191887965673934, + "grad_norm": 0.6736488938331604, + "learning_rate": 5.5958183683940495e-06, + "loss": 0.0106, + "step": 17450 + }, + { + "epoch": 0.11198301655052544, + "grad_norm": 0.45821017026901245, + "learning_rate": 5.599025141097999e-06, + "loss": 0.0115, + "step": 17460 + }, + { + "epoch": 0.11204715344431154, + "grad_norm": 0.3283272981643677, + "learning_rate": 5.60223191380195e-06, + "loss": 0.0088, + "step": 17470 + }, + { + "epoch": 0.11211129033809764, + "grad_norm": 0.7205197215080261, + "learning_rate": 5.605438686505901e-06, + "loss": 0.0086, + "step": 17480 + }, + { + "epoch": 0.11217542723188373, + "grad_norm": 0.6044069528579712, + "learning_rate": 5.608645459209851e-06, + "loss": 0.0074, + "step": 17490 + }, + { + "epoch": 0.11223956412566984, + "grad_norm": 0.39601925015449524, + "learning_rate": 5.6118522319138025e-06, + "loss": 0.0064, + "step": 17500 + }, + { + "epoch": 0.11230370101945593, + "grad_norm": 0.21867237985134125, + "learning_rate": 5.615059004617754e-06, + "loss": 0.0087, + "step": 17510 + }, + { + "epoch": 0.11236783791324202, + "grad_norm": 0.1866510808467865, + "learning_rate": 5.618265777321704e-06, + "loss": 0.0075, + "step": 17520 + }, + { + "epoch": 0.11243197480702811, + "grad_norm": 0.4482560455799103, + "learning_rate": 5.621472550025655e-06, + "loss": 0.0118, + "step": 17530 + }, + { + "epoch": 0.11249611170081422, + "grad_norm": 0.6520987153053284, + "learning_rate": 5.624679322729606e-06, + "loss": 0.0086, + "step": 17540 + }, + { + "epoch": 0.11256024859460032, + "grad_norm": 0.4319920837879181, + "learning_rate": 5.6278860954335555e-06, + "loss": 0.0047, + "step": 17550 + }, + { + "epoch": 0.11262438548838641, + "grad_norm": 0.8728720545768738, + "learning_rate": 5.631092868137507e-06, + "loss": 0.0112, + "step": 17560 + }, + { + "epoch": 0.11268852238217252, + "grad_norm": 0.29425284266471863, + "learning_rate": 5.634299640841458e-06, + "loss": 0.0082, + "step": 17570 + }, + { + "epoch": 0.11275265927595861, + "grad_norm": 0.5924003720283508, + "learning_rate": 5.637506413545408e-06, + "loss": 0.0089, + "step": 17580 + }, + { + "epoch": 0.1128167961697447, + "grad_norm": 0.6752727031707764, + "learning_rate": 5.640713186249359e-06, + "loss": 0.009, + "step": 17590 + }, + { + "epoch": 0.1128809330635308, + "grad_norm": 0.5387188196182251, + "learning_rate": 5.64391995895331e-06, + "loss": 0.0127, + "step": 17600 + }, + { + "epoch": 0.1129450699573169, + "grad_norm": 0.38861316442489624, + "learning_rate": 5.6471267316572605e-06, + "loss": 0.0095, + "step": 17610 + }, + { + "epoch": 0.113009206851103, + "grad_norm": 0.7571175694465637, + "learning_rate": 5.650333504361212e-06, + "loss": 0.0092, + "step": 17620 + }, + { + "epoch": 0.11307334374488909, + "grad_norm": 0.10430482029914856, + "learning_rate": 5.653540277065163e-06, + "loss": 0.0079, + "step": 17630 + }, + { + "epoch": 0.1131374806386752, + "grad_norm": 0.27501529455184937, + "learning_rate": 5.656747049769112e-06, + "loss": 0.0076, + "step": 17640 + }, + { + "epoch": 0.11320161753246129, + "grad_norm": 0.18850356340408325, + "learning_rate": 5.659953822473063e-06, + "loss": 0.0071, + "step": 17650 + }, + { + "epoch": 0.11326575442624738, + "grad_norm": 0.7970367074012756, + "learning_rate": 5.663160595177014e-06, + "loss": 0.0102, + "step": 17660 + }, + { + "epoch": 0.11332989132003347, + "grad_norm": 0.5236055254936218, + "learning_rate": 5.666367367880965e-06, + "loss": 0.0086, + "step": 17670 + }, + { + "epoch": 0.11339402821381958, + "grad_norm": 0.45878875255584717, + "learning_rate": 5.669574140584916e-06, + "loss": 0.0083, + "step": 17680 + }, + { + "epoch": 0.11345816510760567, + "grad_norm": 0.3952488303184509, + "learning_rate": 5.672780913288867e-06, + "loss": 0.0059, + "step": 17690 + }, + { + "epoch": 0.11352230200139177, + "grad_norm": 0.28443267941474915, + "learning_rate": 5.675987685992818e-06, + "loss": 0.0111, + "step": 17700 + }, + { + "epoch": 0.11358643889517787, + "grad_norm": 0.5733482837677002, + "learning_rate": 5.679194458696768e-06, + "loss": 0.0112, + "step": 17710 + }, + { + "epoch": 0.11365057578896397, + "grad_norm": 0.6616878509521484, + "learning_rate": 5.682401231400719e-06, + "loss": 0.0093, + "step": 17720 + }, + { + "epoch": 0.11371471268275006, + "grad_norm": 0.4348270893096924, + "learning_rate": 5.685608004104669e-06, + "loss": 0.0063, + "step": 17730 + }, + { + "epoch": 0.11377884957653615, + "grad_norm": 0.2959847152233124, + "learning_rate": 5.68881477680862e-06, + "loss": 0.006, + "step": 17740 + }, + { + "epoch": 0.11384298647032226, + "grad_norm": 0.814254641532898, + "learning_rate": 5.692021549512571e-06, + "loss": 0.0112, + "step": 17750 + }, + { + "epoch": 0.11390712336410835, + "grad_norm": 0.40536028146743774, + "learning_rate": 5.695228322216521e-06, + "loss": 0.0125, + "step": 17760 + }, + { + "epoch": 0.11397126025789445, + "grad_norm": 0.3108545243740082, + "learning_rate": 5.698435094920472e-06, + "loss": 0.0095, + "step": 17770 + }, + { + "epoch": 0.11403539715168055, + "grad_norm": 0.09603261202573776, + "learning_rate": 5.7016418676244235e-06, + "loss": 0.0065, + "step": 17780 + }, + { + "epoch": 0.11409953404546665, + "grad_norm": 1.1272550821304321, + "learning_rate": 5.704848640328375e-06, + "loss": 0.0143, + "step": 17790 + }, + { + "epoch": 0.11416367093925274, + "grad_norm": 0.629813551902771, + "learning_rate": 5.708055413032325e-06, + "loss": 0.0089, + "step": 17800 + }, + { + "epoch": 0.11422780783303883, + "grad_norm": 0.2537789046764374, + "learning_rate": 5.711262185736276e-06, + "loss": 0.0089, + "step": 17810 + }, + { + "epoch": 0.11429194472682494, + "grad_norm": 0.7877181768417358, + "learning_rate": 5.714468958440225e-06, + "loss": 0.0093, + "step": 17820 + }, + { + "epoch": 0.11435608162061103, + "grad_norm": 0.498452365398407, + "learning_rate": 5.7176757311441765e-06, + "loss": 0.0138, + "step": 17830 + }, + { + "epoch": 0.11442021851439713, + "grad_norm": 0.41156306862831116, + "learning_rate": 5.720882503848128e-06, + "loss": 0.0083, + "step": 17840 + }, + { + "epoch": 0.11448435540818323, + "grad_norm": 0.23975320160388947, + "learning_rate": 5.724089276552079e-06, + "loss": 0.0092, + "step": 17850 + }, + { + "epoch": 0.11454849230196933, + "grad_norm": 0.5480472445487976, + "learning_rate": 5.727296049256029e-06, + "loss": 0.0095, + "step": 17860 + }, + { + "epoch": 0.11461262919575542, + "grad_norm": 0.6654664874076843, + "learning_rate": 5.73050282195998e-06, + "loss": 0.0119, + "step": 17870 + }, + { + "epoch": 0.11467676608954151, + "grad_norm": 0.3661085367202759, + "learning_rate": 5.733709594663931e-06, + "loss": 0.0105, + "step": 17880 + }, + { + "epoch": 0.11474090298332762, + "grad_norm": 0.34792476892471313, + "learning_rate": 5.7369163673678815e-06, + "loss": 0.0104, + "step": 17890 + }, + { + "epoch": 0.11480503987711371, + "grad_norm": 0.8125250339508057, + "learning_rate": 5.740123140071833e-06, + "loss": 0.0086, + "step": 17900 + }, + { + "epoch": 0.1148691767708998, + "grad_norm": 0.4975816309452057, + "learning_rate": 5.743329912775782e-06, + "loss": 0.0091, + "step": 17910 + }, + { + "epoch": 0.1149333136646859, + "grad_norm": 0.6501782536506653, + "learning_rate": 5.746536685479733e-06, + "loss": 0.0095, + "step": 17920 + }, + { + "epoch": 0.114997450558472, + "grad_norm": 0.38356733322143555, + "learning_rate": 5.749743458183684e-06, + "loss": 0.0116, + "step": 17930 + }, + { + "epoch": 0.1150615874522581, + "grad_norm": 0.6213951706886292, + "learning_rate": 5.752950230887635e-06, + "loss": 0.0111, + "step": 17940 + }, + { + "epoch": 0.11512572434604419, + "grad_norm": 0.43015363812446594, + "learning_rate": 5.756157003591586e-06, + "loss": 0.009, + "step": 17950 + }, + { + "epoch": 0.1151898612398303, + "grad_norm": 0.1405862420797348, + "learning_rate": 5.759363776295537e-06, + "loss": 0.0115, + "step": 17960 + }, + { + "epoch": 0.11525399813361639, + "grad_norm": 0.4237961769104004, + "learning_rate": 5.762570548999488e-06, + "loss": 0.0084, + "step": 17970 + }, + { + "epoch": 0.11531813502740249, + "grad_norm": 0.9272621870040894, + "learning_rate": 5.765777321703438e-06, + "loss": 0.0079, + "step": 17980 + }, + { + "epoch": 0.11538227192118858, + "grad_norm": 0.24716678261756897, + "learning_rate": 5.768984094407389e-06, + "loss": 0.0095, + "step": 17990 + }, + { + "epoch": 0.11544640881497469, + "grad_norm": 0.4450685679912567, + "learning_rate": 5.7721908671113395e-06, + "loss": 0.0083, + "step": 18000 + }, + { + "epoch": 0.11551054570876078, + "grad_norm": 0.5911607146263123, + "learning_rate": 5.77539763981529e-06, + "loss": 0.0093, + "step": 18010 + }, + { + "epoch": 0.11557468260254687, + "grad_norm": 0.382109671831131, + "learning_rate": 5.778604412519241e-06, + "loss": 0.0062, + "step": 18020 + }, + { + "epoch": 0.11563881949633298, + "grad_norm": 0.4133998453617096, + "learning_rate": 5.781811185223192e-06, + "loss": 0.0075, + "step": 18030 + }, + { + "epoch": 0.11570295639011907, + "grad_norm": 0.38716354966163635, + "learning_rate": 5.785017957927142e-06, + "loss": 0.008, + "step": 18040 + }, + { + "epoch": 0.11576709328390516, + "grad_norm": 0.4410455822944641, + "learning_rate": 5.788224730631093e-06, + "loss": 0.007, + "step": 18050 + }, + { + "epoch": 0.11583123017769126, + "grad_norm": 0.7385231852531433, + "learning_rate": 5.7914315033350445e-06, + "loss": 0.0108, + "step": 18060 + }, + { + "epoch": 0.11589536707147736, + "grad_norm": 1.0614713430404663, + "learning_rate": 5.794638276038996e-06, + "loss": 0.0151, + "step": 18070 + }, + { + "epoch": 0.11595950396526346, + "grad_norm": 0.44553372263908386, + "learning_rate": 5.797845048742946e-06, + "loss": 0.0069, + "step": 18080 + }, + { + "epoch": 0.11602364085904955, + "grad_norm": 0.4055965840816498, + "learning_rate": 5.801051821446896e-06, + "loss": 0.0118, + "step": 18090 + }, + { + "epoch": 0.11608777775283566, + "grad_norm": 0.5591070652008057, + "learning_rate": 5.804258594150846e-06, + "loss": 0.0126, + "step": 18100 + }, + { + "epoch": 0.11615191464662175, + "grad_norm": 0.6480048298835754, + "learning_rate": 5.8074653668547975e-06, + "loss": 0.0077, + "step": 18110 + }, + { + "epoch": 0.11621605154040784, + "grad_norm": 0.34888118505477905, + "learning_rate": 5.810672139558749e-06, + "loss": 0.0123, + "step": 18120 + }, + { + "epoch": 0.11628018843419394, + "grad_norm": 0.7395862340927124, + "learning_rate": 5.813878912262699e-06, + "loss": 0.0102, + "step": 18130 + }, + { + "epoch": 0.11634432532798004, + "grad_norm": 0.34636107087135315, + "learning_rate": 5.81708568496665e-06, + "loss": 0.0075, + "step": 18140 + }, + { + "epoch": 0.11640846222176614, + "grad_norm": 0.6415095329284668, + "learning_rate": 5.820292457670601e-06, + "loss": 0.0116, + "step": 18150 + }, + { + "epoch": 0.11647259911555223, + "grad_norm": 0.2900804579257965, + "learning_rate": 5.823499230374552e-06, + "loss": 0.0098, + "step": 18160 + }, + { + "epoch": 0.11653673600933834, + "grad_norm": 0.28599801659584045, + "learning_rate": 5.8267060030785025e-06, + "loss": 0.0104, + "step": 18170 + }, + { + "epoch": 0.11660087290312443, + "grad_norm": 1.0962491035461426, + "learning_rate": 5.829912775782453e-06, + "loss": 0.0078, + "step": 18180 + }, + { + "epoch": 0.11666500979691052, + "grad_norm": 0.8533971309661865, + "learning_rate": 5.833119548486403e-06, + "loss": 0.0096, + "step": 18190 + }, + { + "epoch": 0.11672914669069662, + "grad_norm": 0.47534847259521484, + "learning_rate": 5.836326321190354e-06, + "loss": 0.0094, + "step": 18200 + }, + { + "epoch": 0.11679328358448272, + "grad_norm": 0.3004346489906311, + "learning_rate": 5.839533093894305e-06, + "loss": 0.0078, + "step": 18210 + }, + { + "epoch": 0.11685742047826882, + "grad_norm": 0.4630396366119385, + "learning_rate": 5.842739866598256e-06, + "loss": 0.0086, + "step": 18220 + }, + { + "epoch": 0.11692155737205491, + "grad_norm": 0.5481510162353516, + "learning_rate": 5.845946639302207e-06, + "loss": 0.0122, + "step": 18230 + }, + { + "epoch": 0.11698569426584102, + "grad_norm": 0.3296847641468048, + "learning_rate": 5.849153412006158e-06, + "loss": 0.009, + "step": 18240 + }, + { + "epoch": 0.11704983115962711, + "grad_norm": 0.6358070969581604, + "learning_rate": 5.852360184710109e-06, + "loss": 0.0112, + "step": 18250 + }, + { + "epoch": 0.1171139680534132, + "grad_norm": 0.3781026005744934, + "learning_rate": 5.855566957414059e-06, + "loss": 0.0095, + "step": 18260 + }, + { + "epoch": 0.1171781049471993, + "grad_norm": 0.4986782968044281, + "learning_rate": 5.858773730118009e-06, + "loss": 0.0087, + "step": 18270 + }, + { + "epoch": 0.1172422418409854, + "grad_norm": 0.21493154764175415, + "learning_rate": 5.86198050282196e-06, + "loss": 0.0103, + "step": 18280 + }, + { + "epoch": 0.1173063787347715, + "grad_norm": 0.24460344016551971, + "learning_rate": 5.865187275525911e-06, + "loss": 0.009, + "step": 18290 + }, + { + "epoch": 0.11737051562855759, + "grad_norm": 0.372915655374527, + "learning_rate": 5.868394048229862e-06, + "loss": 0.0081, + "step": 18300 + }, + { + "epoch": 0.1174346525223437, + "grad_norm": 0.8102298974990845, + "learning_rate": 5.871600820933813e-06, + "loss": 0.0105, + "step": 18310 + }, + { + "epoch": 0.11749878941612979, + "grad_norm": 0.6621906161308289, + "learning_rate": 5.874807593637763e-06, + "loss": 0.0131, + "step": 18320 + }, + { + "epoch": 0.11756292630991588, + "grad_norm": 0.6036761403083801, + "learning_rate": 5.878014366341714e-06, + "loss": 0.0095, + "step": 18330 + }, + { + "epoch": 0.11762706320370198, + "grad_norm": 0.5530111193656921, + "learning_rate": 5.8812211390456655e-06, + "loss": 0.0106, + "step": 18340 + }, + { + "epoch": 0.11769120009748808, + "grad_norm": 0.40899187326431274, + "learning_rate": 5.884427911749616e-06, + "loss": 0.0077, + "step": 18350 + }, + { + "epoch": 0.11775533699127418, + "grad_norm": 0.6335707902908325, + "learning_rate": 5.887634684453566e-06, + "loss": 0.0092, + "step": 18360 + }, + { + "epoch": 0.11781947388506027, + "grad_norm": 0.5672249794006348, + "learning_rate": 5.890841457157517e-06, + "loss": 0.0116, + "step": 18370 + }, + { + "epoch": 0.11788361077884638, + "grad_norm": 0.4070172905921936, + "learning_rate": 5.894048229861467e-06, + "loss": 0.0127, + "step": 18380 + }, + { + "epoch": 0.11794774767263247, + "grad_norm": 0.4748009145259857, + "learning_rate": 5.8972550025654185e-06, + "loss": 0.0111, + "step": 18390 + }, + { + "epoch": 0.11801188456641856, + "grad_norm": 0.6402952671051025, + "learning_rate": 5.90046177526937e-06, + "loss": 0.0129, + "step": 18400 + }, + { + "epoch": 0.11807602146020466, + "grad_norm": 0.706387460231781, + "learning_rate": 5.90366854797332e-06, + "loss": 0.0122, + "step": 18410 + }, + { + "epoch": 0.11814015835399076, + "grad_norm": 1.0082502365112305, + "learning_rate": 5.906875320677271e-06, + "loss": 0.007, + "step": 18420 + }, + { + "epoch": 0.11820429524777686, + "grad_norm": 0.30910611152648926, + "learning_rate": 5.910082093381222e-06, + "loss": 0.0085, + "step": 18430 + }, + { + "epoch": 0.11826843214156295, + "grad_norm": 0.7558024525642395, + "learning_rate": 5.913288866085173e-06, + "loss": 0.0082, + "step": 18440 + }, + { + "epoch": 0.11833256903534906, + "grad_norm": 0.5140365958213806, + "learning_rate": 5.916495638789123e-06, + "loss": 0.0082, + "step": 18450 + }, + { + "epoch": 0.11839670592913515, + "grad_norm": 0.46017810702323914, + "learning_rate": 5.919702411493074e-06, + "loss": 0.0094, + "step": 18460 + }, + { + "epoch": 0.11846084282292124, + "grad_norm": 0.40746715664863586, + "learning_rate": 5.922909184197024e-06, + "loss": 0.0104, + "step": 18470 + }, + { + "epoch": 0.11852497971670733, + "grad_norm": 0.6365993022918701, + "learning_rate": 5.926115956900975e-06, + "loss": 0.0124, + "step": 18480 + }, + { + "epoch": 0.11858911661049344, + "grad_norm": 0.47270020842552185, + "learning_rate": 5.929322729604926e-06, + "loss": 0.0069, + "step": 18490 + }, + { + "epoch": 0.11865325350427953, + "grad_norm": 0.4353969097137451, + "learning_rate": 5.9325295023088765e-06, + "loss": 0.0093, + "step": 18500 + }, + { + "epoch": 0.11871739039806563, + "grad_norm": 0.7127542495727539, + "learning_rate": 5.935736275012828e-06, + "loss": 0.0087, + "step": 18510 + }, + { + "epoch": 0.11878152729185174, + "grad_norm": 0.3726551830768585, + "learning_rate": 5.938943047716779e-06, + "loss": 0.0065, + "step": 18520 + }, + { + "epoch": 0.11884566418563783, + "grad_norm": 0.8096532821655273, + "learning_rate": 5.94214982042073e-06, + "loss": 0.0101, + "step": 18530 + }, + { + "epoch": 0.11890980107942392, + "grad_norm": 0.38727736473083496, + "learning_rate": 5.945356593124679e-06, + "loss": 0.0148, + "step": 18540 + }, + { + "epoch": 0.11897393797321001, + "grad_norm": 0.3246394097805023, + "learning_rate": 5.94856336582863e-06, + "loss": 0.0128, + "step": 18550 + }, + { + "epoch": 0.11903807486699612, + "grad_norm": 0.3038940131664276, + "learning_rate": 5.951770138532581e-06, + "loss": 0.0101, + "step": 18560 + }, + { + "epoch": 0.11910221176078221, + "grad_norm": 0.8246645927429199, + "learning_rate": 5.954976911236532e-06, + "loss": 0.0117, + "step": 18570 + }, + { + "epoch": 0.11916634865456831, + "grad_norm": 0.45428499579429626, + "learning_rate": 5.958183683940483e-06, + "loss": 0.0119, + "step": 18580 + }, + { + "epoch": 0.1192304855483544, + "grad_norm": 0.7145015001296997, + "learning_rate": 5.961390456644434e-06, + "loss": 0.0061, + "step": 18590 + }, + { + "epoch": 0.11929462244214051, + "grad_norm": 0.6768432259559631, + "learning_rate": 5.964597229348384e-06, + "loss": 0.0087, + "step": 18600 + }, + { + "epoch": 0.1193587593359266, + "grad_norm": 0.7962307929992676, + "learning_rate": 5.967804002052335e-06, + "loss": 0.0094, + "step": 18610 + }, + { + "epoch": 0.1194228962297127, + "grad_norm": 0.40162888169288635, + "learning_rate": 5.9710107747562865e-06, + "loss": 0.011, + "step": 18620 + }, + { + "epoch": 0.1194870331234988, + "grad_norm": 0.6926724910736084, + "learning_rate": 5.974217547460236e-06, + "loss": 0.0105, + "step": 18630 + }, + { + "epoch": 0.1195511700172849, + "grad_norm": 0.4717659652233124, + "learning_rate": 5.977424320164187e-06, + "loss": 0.0086, + "step": 18640 + }, + { + "epoch": 0.11961530691107099, + "grad_norm": 0.7448294162750244, + "learning_rate": 5.980631092868137e-06, + "loss": 0.0092, + "step": 18650 + }, + { + "epoch": 0.11967944380485708, + "grad_norm": 0.590056836605072, + "learning_rate": 5.983837865572088e-06, + "loss": 0.0093, + "step": 18660 + }, + { + "epoch": 0.11974358069864319, + "grad_norm": 0.7150057554244995, + "learning_rate": 5.9870446382760395e-06, + "loss": 0.0106, + "step": 18670 + }, + { + "epoch": 0.11980771759242928, + "grad_norm": 0.5308887958526611, + "learning_rate": 5.990251410979991e-06, + "loss": 0.0119, + "step": 18680 + }, + { + "epoch": 0.11987185448621537, + "grad_norm": 0.503163754940033, + "learning_rate": 5.993458183683941e-06, + "loss": 0.0076, + "step": 18690 + }, + { + "epoch": 0.11993599138000148, + "grad_norm": 0.6761953234672546, + "learning_rate": 5.996664956387892e-06, + "loss": 0.009, + "step": 18700 + }, + { + "epoch": 0.12000012827378757, + "grad_norm": 0.4515625536441803, + "learning_rate": 5.999871729091843e-06, + "loss": 0.0169, + "step": 18710 + }, + { + "epoch": 0.12006426516757367, + "grad_norm": 0.42195725440979004, + "learning_rate": 6.0030785017957925e-06, + "loss": 0.0081, + "step": 18720 + }, + { + "epoch": 0.12012840206135976, + "grad_norm": 0.21539296209812164, + "learning_rate": 6.006285274499744e-06, + "loss": 0.0097, + "step": 18730 + }, + { + "epoch": 0.12019253895514587, + "grad_norm": 0.7816265225410461, + "learning_rate": 6.009492047203695e-06, + "loss": 0.0137, + "step": 18740 + }, + { + "epoch": 0.12025667584893196, + "grad_norm": 0.4108607769012451, + "learning_rate": 6.012698819907645e-06, + "loss": 0.008, + "step": 18750 + }, + { + "epoch": 0.12032081274271805, + "grad_norm": 0.6747710704803467, + "learning_rate": 6.015905592611596e-06, + "loss": 0.0098, + "step": 18760 + }, + { + "epoch": 0.12038494963650416, + "grad_norm": 0.4920329749584198, + "learning_rate": 6.019112365315547e-06, + "loss": 0.0082, + "step": 18770 + }, + { + "epoch": 0.12044908653029025, + "grad_norm": 0.46777355670928955, + "learning_rate": 6.0223191380194975e-06, + "loss": 0.0101, + "step": 18780 + }, + { + "epoch": 0.12051322342407635, + "grad_norm": 0.5914434790611267, + "learning_rate": 6.025525910723449e-06, + "loss": 0.0118, + "step": 18790 + }, + { + "epoch": 0.12057736031786244, + "grad_norm": 0.5254697799682617, + "learning_rate": 6.0287326834274e-06, + "loss": 0.0072, + "step": 18800 + }, + { + "epoch": 0.12064149721164855, + "grad_norm": 0.6814507246017456, + "learning_rate": 6.031939456131351e-06, + "loss": 0.009, + "step": 18810 + }, + { + "epoch": 0.12070563410543464, + "grad_norm": 0.4084748923778534, + "learning_rate": 6.0351462288353e-06, + "loss": 0.0088, + "step": 18820 + }, + { + "epoch": 0.12076977099922073, + "grad_norm": 0.3717684745788574, + "learning_rate": 6.038353001539251e-06, + "loss": 0.0063, + "step": 18830 + }, + { + "epoch": 0.12083390789300684, + "grad_norm": 0.6668663024902344, + "learning_rate": 6.041559774243202e-06, + "loss": 0.0115, + "step": 18840 + }, + { + "epoch": 0.12089804478679293, + "grad_norm": 0.49359339475631714, + "learning_rate": 6.044766546947153e-06, + "loss": 0.0064, + "step": 18850 + }, + { + "epoch": 0.12096218168057903, + "grad_norm": 0.7852402329444885, + "learning_rate": 6.047973319651104e-06, + "loss": 0.0077, + "step": 18860 + }, + { + "epoch": 0.12102631857436512, + "grad_norm": 0.5898550748825073, + "learning_rate": 6.051180092355054e-06, + "loss": 0.0087, + "step": 18870 + }, + { + "epoch": 0.12109045546815123, + "grad_norm": 0.46745458245277405, + "learning_rate": 6.054386865059005e-06, + "loss": 0.0078, + "step": 18880 + }, + { + "epoch": 0.12115459236193732, + "grad_norm": 0.5510157942771912, + "learning_rate": 6.057593637762956e-06, + "loss": 0.0105, + "step": 18890 + }, + { + "epoch": 0.12121872925572341, + "grad_norm": 0.6177038550376892, + "learning_rate": 6.0608004104669075e-06, + "loss": 0.0103, + "step": 18900 + }, + { + "epoch": 0.12128286614950952, + "grad_norm": 0.18779337406158447, + "learning_rate": 6.064007183170857e-06, + "loss": 0.0062, + "step": 18910 + }, + { + "epoch": 0.12134700304329561, + "grad_norm": 0.5417159199714661, + "learning_rate": 6.067213955874808e-06, + "loss": 0.008, + "step": 18920 + }, + { + "epoch": 0.1214111399370817, + "grad_norm": 0.25111818313598633, + "learning_rate": 6.070420728578758e-06, + "loss": 0.0056, + "step": 18930 + }, + { + "epoch": 0.1214752768308678, + "grad_norm": 0.21491633355617523, + "learning_rate": 6.073627501282709e-06, + "loss": 0.0081, + "step": 18940 + }, + { + "epoch": 0.1215394137246539, + "grad_norm": 0.5052981972694397, + "learning_rate": 6.0768342739866605e-06, + "loss": 0.0075, + "step": 18950 + }, + { + "epoch": 0.12160355061844, + "grad_norm": 0.7487515211105347, + "learning_rate": 6.080041046690612e-06, + "loss": 0.0069, + "step": 18960 + }, + { + "epoch": 0.12166768751222609, + "grad_norm": 0.7578110694885254, + "learning_rate": 6.083247819394562e-06, + "loss": 0.0089, + "step": 18970 + }, + { + "epoch": 0.1217318244060122, + "grad_norm": 0.18499760329723358, + "learning_rate": 6.086454592098513e-06, + "loss": 0.0092, + "step": 18980 + }, + { + "epoch": 0.12179596129979829, + "grad_norm": 0.33362337946891785, + "learning_rate": 6.089661364802464e-06, + "loss": 0.0133, + "step": 18990 + }, + { + "epoch": 0.12186009819358438, + "grad_norm": 0.8806824088096619, + "learning_rate": 6.0928681375064135e-06, + "loss": 0.0083, + "step": 19000 + }, + { + "epoch": 0.12192423508737048, + "grad_norm": 0.30265992879867554, + "learning_rate": 6.096074910210365e-06, + "loss": 0.0111, + "step": 19010 + }, + { + "epoch": 0.12198837198115658, + "grad_norm": 0.4938734471797943, + "learning_rate": 6.099281682914315e-06, + "loss": 0.0087, + "step": 19020 + }, + { + "epoch": 0.12205250887494268, + "grad_norm": 0.9325695037841797, + "learning_rate": 6.102488455618266e-06, + "loss": 0.0142, + "step": 19030 + }, + { + "epoch": 0.12211664576872877, + "grad_norm": 0.20962482690811157, + "learning_rate": 6.105695228322217e-06, + "loss": 0.0085, + "step": 19040 + }, + { + "epoch": 0.12218078266251488, + "grad_norm": 0.37175944447517395, + "learning_rate": 6.108902001026168e-06, + "loss": 0.0083, + "step": 19050 + }, + { + "epoch": 0.12224491955630097, + "grad_norm": 0.3731585144996643, + "learning_rate": 6.1121087737301185e-06, + "loss": 0.0056, + "step": 19060 + }, + { + "epoch": 0.12230905645008706, + "grad_norm": 0.9835683107376099, + "learning_rate": 6.11531554643407e-06, + "loss": 0.0102, + "step": 19070 + }, + { + "epoch": 0.12237319334387316, + "grad_norm": 0.548466145992279, + "learning_rate": 6.118522319138021e-06, + "loss": 0.01, + "step": 19080 + }, + { + "epoch": 0.12243733023765926, + "grad_norm": 0.1948307752609253, + "learning_rate": 6.12172909184197e-06, + "loss": 0.008, + "step": 19090 + }, + { + "epoch": 0.12250146713144536, + "grad_norm": 0.28201523423194885, + "learning_rate": 6.124935864545921e-06, + "loss": 0.008, + "step": 19100 + }, + { + "epoch": 0.12256560402523145, + "grad_norm": 0.35290905833244324, + "learning_rate": 6.128142637249872e-06, + "loss": 0.0074, + "step": 19110 + }, + { + "epoch": 0.12262974091901756, + "grad_norm": 0.45489513874053955, + "learning_rate": 6.131349409953823e-06, + "loss": 0.0123, + "step": 19120 + }, + { + "epoch": 0.12269387781280365, + "grad_norm": 0.23540635406970978, + "learning_rate": 6.134556182657774e-06, + "loss": 0.009, + "step": 19130 + }, + { + "epoch": 0.12275801470658974, + "grad_norm": 0.33407628536224365, + "learning_rate": 6.137762955361725e-06, + "loss": 0.0103, + "step": 19140 + }, + { + "epoch": 0.12282215160037584, + "grad_norm": 0.6046065092086792, + "learning_rate": 6.140969728065675e-06, + "loss": 0.0083, + "step": 19150 + }, + { + "epoch": 0.12288628849416194, + "grad_norm": 0.2274067997932434, + "learning_rate": 6.144176500769626e-06, + "loss": 0.0065, + "step": 19160 + }, + { + "epoch": 0.12295042538794804, + "grad_norm": 0.2769779562950134, + "learning_rate": 6.147383273473577e-06, + "loss": 0.0086, + "step": 19170 + }, + { + "epoch": 0.12301456228173413, + "grad_norm": 0.7009875774383545, + "learning_rate": 6.150590046177527e-06, + "loss": 0.0067, + "step": 19180 + }, + { + "epoch": 0.12307869917552024, + "grad_norm": 0.6626484394073486, + "learning_rate": 6.153796818881478e-06, + "loss": 0.023, + "step": 19190 + }, + { + "epoch": 0.12314283606930633, + "grad_norm": 0.43245089054107666, + "learning_rate": 6.157003591585429e-06, + "loss": 0.0103, + "step": 19200 + }, + { + "epoch": 0.12320697296309242, + "grad_norm": 0.43614763021469116, + "learning_rate": 6.160210364289379e-06, + "loss": 0.0076, + "step": 19210 + }, + { + "epoch": 0.12327110985687852, + "grad_norm": 0.5906426310539246, + "learning_rate": 6.16341713699333e-06, + "loss": 0.0117, + "step": 19220 + }, + { + "epoch": 0.12333524675066462, + "grad_norm": 0.40125492215156555, + "learning_rate": 6.1666239096972815e-06, + "loss": 0.0064, + "step": 19230 + }, + { + "epoch": 0.12339938364445072, + "grad_norm": 0.6000845432281494, + "learning_rate": 6.169830682401232e-06, + "loss": 0.0071, + "step": 19240 + }, + { + "epoch": 0.12346352053823681, + "grad_norm": 0.6457440257072449, + "learning_rate": 6.173037455105183e-06, + "loss": 0.0074, + "step": 19250 + }, + { + "epoch": 0.12352765743202292, + "grad_norm": 0.29348060488700867, + "learning_rate": 6.176244227809134e-06, + "loss": 0.0065, + "step": 19260 + }, + { + "epoch": 0.12359179432580901, + "grad_norm": 0.5351219177246094, + "learning_rate": 6.1794510005130834e-06, + "loss": 0.0086, + "step": 19270 + }, + { + "epoch": 0.1236559312195951, + "grad_norm": 0.6515761613845825, + "learning_rate": 6.1826577732170345e-06, + "loss": 0.0105, + "step": 19280 + }, + { + "epoch": 0.1237200681133812, + "grad_norm": 0.3923013508319855, + "learning_rate": 6.185864545920986e-06, + "loss": 0.0096, + "step": 19290 + }, + { + "epoch": 0.1237842050071673, + "grad_norm": 0.48430967330932617, + "learning_rate": 6.189071318624936e-06, + "loss": 0.0058, + "step": 19300 + }, + { + "epoch": 0.1238483419009534, + "grad_norm": 0.24645860493183136, + "learning_rate": 6.192278091328887e-06, + "loss": 0.0073, + "step": 19310 + }, + { + "epoch": 0.12391247879473949, + "grad_norm": 0.27493560314178467, + "learning_rate": 6.195484864032838e-06, + "loss": 0.0062, + "step": 19320 + }, + { + "epoch": 0.12397661568852558, + "grad_norm": 0.48881635069847107, + "learning_rate": 6.198691636736789e-06, + "loss": 0.008, + "step": 19330 + }, + { + "epoch": 0.12404075258231169, + "grad_norm": 0.5472639799118042, + "learning_rate": 6.2018984094407395e-06, + "loss": 0.0108, + "step": 19340 + }, + { + "epoch": 0.12410488947609778, + "grad_norm": 0.5145623683929443, + "learning_rate": 6.205105182144691e-06, + "loss": 0.01, + "step": 19350 + }, + { + "epoch": 0.12416902636988388, + "grad_norm": 0.6084321141242981, + "learning_rate": 6.20831195484864e-06, + "loss": 0.0101, + "step": 19360 + }, + { + "epoch": 0.12423316326366998, + "grad_norm": 0.11005605757236481, + "learning_rate": 6.211518727552591e-06, + "loss": 0.0054, + "step": 19370 + }, + { + "epoch": 0.12429730015745608, + "grad_norm": 0.46959608793258667, + "learning_rate": 6.214725500256542e-06, + "loss": 0.0072, + "step": 19380 + }, + { + "epoch": 0.12436143705124217, + "grad_norm": 0.7657153606414795, + "learning_rate": 6.2179322729604926e-06, + "loss": 0.0095, + "step": 19390 + }, + { + "epoch": 0.12442557394502826, + "grad_norm": 0.552920401096344, + "learning_rate": 6.221139045664444e-06, + "loss": 0.0088, + "step": 19400 + }, + { + "epoch": 0.12448971083881437, + "grad_norm": 0.5344424247741699, + "learning_rate": 6.224345818368395e-06, + "loss": 0.0085, + "step": 19410 + }, + { + "epoch": 0.12455384773260046, + "grad_norm": 0.3879581689834595, + "learning_rate": 6.227552591072346e-06, + "loss": 0.0079, + "step": 19420 + }, + { + "epoch": 0.12461798462638655, + "grad_norm": 0.521867036819458, + "learning_rate": 6.230759363776296e-06, + "loss": 0.0208, + "step": 19430 + }, + { + "epoch": 0.12468212152017266, + "grad_norm": 0.6008889079093933, + "learning_rate": 6.233966136480247e-06, + "loss": 0.0084, + "step": 19440 + }, + { + "epoch": 0.12474625841395875, + "grad_norm": 0.25370872020721436, + "learning_rate": 6.237172909184197e-06, + "loss": 0.0095, + "step": 19450 + }, + { + "epoch": 0.12481039530774485, + "grad_norm": 0.3903500437736511, + "learning_rate": 6.240379681888148e-06, + "loss": 0.011, + "step": 19460 + }, + { + "epoch": 0.12487453220153094, + "grad_norm": 0.3653107285499573, + "learning_rate": 6.243586454592099e-06, + "loss": 0.0052, + "step": 19470 + }, + { + "epoch": 0.12493866909531705, + "grad_norm": 0.25027909874916077, + "learning_rate": 6.24679322729605e-06, + "loss": 0.0106, + "step": 19480 + }, + { + "epoch": 0.12500280598910316, + "grad_norm": 0.2770934998989105, + "learning_rate": 6.25e-06, + "loss": 0.0063, + "step": 19490 + }, + { + "epoch": 0.12506694288288925, + "grad_norm": 0.6216777563095093, + "learning_rate": 6.253206772703951e-06, + "loss": 0.0081, + "step": 19500 + }, + { + "epoch": 0.12513107977667534, + "grad_norm": 0.46903955936431885, + "learning_rate": 6.2564135454079025e-06, + "loss": 0.0116, + "step": 19510 + }, + { + "epoch": 0.12519521667046143, + "grad_norm": 0.5048975348472595, + "learning_rate": 6.259620318111853e-06, + "loss": 0.0094, + "step": 19520 + }, + { + "epoch": 0.12525935356424753, + "grad_norm": 0.6152871251106262, + "learning_rate": 6.262827090815804e-06, + "loss": 0.0125, + "step": 19530 + }, + { + "epoch": 0.12532349045803362, + "grad_norm": 0.5237569808959961, + "learning_rate": 6.266033863519753e-06, + "loss": 0.0143, + "step": 19540 + }, + { + "epoch": 0.1253876273518197, + "grad_norm": 0.6239390969276428, + "learning_rate": 6.2692406362237044e-06, + "loss": 0.0111, + "step": 19550 + }, + { + "epoch": 0.12545176424560583, + "grad_norm": 0.6508604288101196, + "learning_rate": 6.2724474089276556e-06, + "loss": 0.0093, + "step": 19560 + }, + { + "epoch": 0.12551590113939193, + "grad_norm": 0.29125237464904785, + "learning_rate": 6.275654181631607e-06, + "loss": 0.0084, + "step": 19570 + }, + { + "epoch": 0.12558003803317802, + "grad_norm": 0.5597888827323914, + "learning_rate": 6.278860954335557e-06, + "loss": 0.0094, + "step": 19580 + }, + { + "epoch": 0.12564417492696411, + "grad_norm": 0.3374530076980591, + "learning_rate": 6.282067727039508e-06, + "loss": 0.0069, + "step": 19590 + }, + { + "epoch": 0.1257083118207502, + "grad_norm": 0.6125375628471375, + "learning_rate": 6.285274499743459e-06, + "loss": 0.0111, + "step": 19600 + }, + { + "epoch": 0.1257724487145363, + "grad_norm": 0.40054383873939514, + "learning_rate": 6.288481272447409e-06, + "loss": 0.0066, + "step": 19610 + }, + { + "epoch": 0.1258365856083224, + "grad_norm": 0.18805621564388275, + "learning_rate": 6.2916880451513605e-06, + "loss": 0.0095, + "step": 19620 + }, + { + "epoch": 0.12590072250210851, + "grad_norm": 0.34585040807724, + "learning_rate": 6.294894817855311e-06, + "loss": 0.0071, + "step": 19630 + }, + { + "epoch": 0.1259648593958946, + "grad_norm": 0.5074530839920044, + "learning_rate": 6.298101590559261e-06, + "loss": 0.0055, + "step": 19640 + }, + { + "epoch": 0.1260289962896807, + "grad_norm": 0.27524951100349426, + "learning_rate": 6.301308363263212e-06, + "loss": 0.007, + "step": 19650 + }, + { + "epoch": 0.1260931331834668, + "grad_norm": 0.4609309434890747, + "learning_rate": 6.304515135967163e-06, + "loss": 0.0065, + "step": 19660 + }, + { + "epoch": 0.1261572700772529, + "grad_norm": 0.46933484077453613, + "learning_rate": 6.3077219086711136e-06, + "loss": 0.0091, + "step": 19670 + }, + { + "epoch": 0.12622140697103898, + "grad_norm": 0.542717456817627, + "learning_rate": 6.310928681375065e-06, + "loss": 0.0097, + "step": 19680 + }, + { + "epoch": 0.12628554386482507, + "grad_norm": 0.3351193368434906, + "learning_rate": 6.314135454079016e-06, + "loss": 0.0067, + "step": 19690 + }, + { + "epoch": 0.12634968075861117, + "grad_norm": 0.495302677154541, + "learning_rate": 6.317342226782967e-06, + "loss": 0.0101, + "step": 19700 + }, + { + "epoch": 0.1264138176523973, + "grad_norm": 0.20892836153507233, + "learning_rate": 6.320548999486917e-06, + "loss": 0.0071, + "step": 19710 + }, + { + "epoch": 0.12647795454618338, + "grad_norm": 0.417507529258728, + "learning_rate": 6.3237557721908674e-06, + "loss": 0.0106, + "step": 19720 + }, + { + "epoch": 0.12654209143996947, + "grad_norm": 0.17814181745052338, + "learning_rate": 6.326962544894818e-06, + "loss": 0.0088, + "step": 19730 + }, + { + "epoch": 0.12660622833375557, + "grad_norm": 0.524132251739502, + "learning_rate": 6.330169317598769e-06, + "loss": 0.0077, + "step": 19740 + }, + { + "epoch": 0.12667036522754166, + "grad_norm": 1.6826726198196411, + "learning_rate": 6.33337609030272e-06, + "loss": 0.0126, + "step": 19750 + }, + { + "epoch": 0.12673450212132775, + "grad_norm": 0.45619019865989685, + "learning_rate": 6.33658286300667e-06, + "loss": 0.0099, + "step": 19760 + }, + { + "epoch": 0.12679863901511385, + "grad_norm": 0.6970486640930176, + "learning_rate": 6.339789635710621e-06, + "loss": 0.0101, + "step": 19770 + }, + { + "epoch": 0.12686277590889997, + "grad_norm": 0.5018978118896484, + "learning_rate": 6.342996408414572e-06, + "loss": 0.0086, + "step": 19780 + }, + { + "epoch": 0.12692691280268606, + "grad_norm": 0.8621292114257812, + "learning_rate": 6.3462031811185235e-06, + "loss": 0.0192, + "step": 19790 + }, + { + "epoch": 0.12699104969647215, + "grad_norm": 0.6635532975196838, + "learning_rate": 6.349409953822474e-06, + "loss": 0.0082, + "step": 19800 + }, + { + "epoch": 0.12705518659025825, + "grad_norm": 0.18149831891059875, + "learning_rate": 6.352616726526424e-06, + "loss": 0.0096, + "step": 19810 + }, + { + "epoch": 0.12711932348404434, + "grad_norm": 0.22729986906051636, + "learning_rate": 6.355823499230374e-06, + "loss": 0.0109, + "step": 19820 + }, + { + "epoch": 0.12718346037783043, + "grad_norm": 0.6454578042030334, + "learning_rate": 6.3590302719343254e-06, + "loss": 0.0081, + "step": 19830 + }, + { + "epoch": 0.12724759727161652, + "grad_norm": 0.4148944020271301, + "learning_rate": 6.3622370446382766e-06, + "loss": 0.0118, + "step": 19840 + }, + { + "epoch": 0.12731173416540265, + "grad_norm": 0.40859290957450867, + "learning_rate": 6.365443817342228e-06, + "loss": 0.0095, + "step": 19850 + }, + { + "epoch": 0.12737587105918874, + "grad_norm": 0.3300305902957916, + "learning_rate": 6.368650590046178e-06, + "loss": 0.009, + "step": 19860 + }, + { + "epoch": 0.12744000795297483, + "grad_norm": 0.5568016767501831, + "learning_rate": 6.371857362750129e-06, + "loss": 0.0101, + "step": 19870 + }, + { + "epoch": 0.12750414484676093, + "grad_norm": 0.9923796653747559, + "learning_rate": 6.37506413545408e-06, + "loss": 0.0124, + "step": 19880 + }, + { + "epoch": 0.12756828174054702, + "grad_norm": 0.5063279867172241, + "learning_rate": 6.3782709081580304e-06, + "loss": 0.0097, + "step": 19890 + }, + { + "epoch": 0.1276324186343331, + "grad_norm": 0.6805953979492188, + "learning_rate": 6.381477680861981e-06, + "loss": 0.0103, + "step": 19900 + }, + { + "epoch": 0.1276965555281192, + "grad_norm": 0.19922682642936707, + "learning_rate": 6.384684453565931e-06, + "loss": 0.0096, + "step": 19910 + }, + { + "epoch": 0.12776069242190533, + "grad_norm": 0.5955575108528137, + "learning_rate": 6.387891226269882e-06, + "loss": 0.0098, + "step": 19920 + }, + { + "epoch": 0.12782482931569142, + "grad_norm": 0.872285008430481, + "learning_rate": 6.391097998973833e-06, + "loss": 0.0091, + "step": 19930 + }, + { + "epoch": 0.1278889662094775, + "grad_norm": 0.9698531627655029, + "learning_rate": 6.394304771677784e-06, + "loss": 0.0094, + "step": 19940 + }, + { + "epoch": 0.1279531031032636, + "grad_norm": 0.3564271330833435, + "learning_rate": 6.3975115443817346e-06, + "loss": 0.0066, + "step": 19950 + }, + { + "epoch": 0.1280172399970497, + "grad_norm": 0.6537336707115173, + "learning_rate": 6.400718317085686e-06, + "loss": 0.0096, + "step": 19960 + }, + { + "epoch": 0.1280813768908358, + "grad_norm": 0.30640843510627747, + "learning_rate": 6.403925089789637e-06, + "loss": 0.0057, + "step": 19970 + }, + { + "epoch": 0.12814551378462188, + "grad_norm": 0.5473002791404724, + "learning_rate": 6.407131862493587e-06, + "loss": 0.0079, + "step": 19980 + }, + { + "epoch": 0.128209650678408, + "grad_norm": 0.5697849988937378, + "learning_rate": 6.410338635197537e-06, + "loss": 0.0116, + "step": 19990 + }, + { + "epoch": 0.1282737875721941, + "grad_norm": 0.3163318932056427, + "learning_rate": 6.4135454079014884e-06, + "loss": 0.0116, + "step": 20000 + }, + { + "epoch": 0.1283379244659802, + "grad_norm": 0.408660352230072, + "learning_rate": 6.416752180605439e-06, + "loss": 0.0094, + "step": 20010 + }, + { + "epoch": 0.12840206135976628, + "grad_norm": 0.40231066942214966, + "learning_rate": 6.41995895330939e-06, + "loss": 0.0077, + "step": 20020 + }, + { + "epoch": 0.12846619825355238, + "grad_norm": 0.38235190510749817, + "learning_rate": 6.423165726013341e-06, + "loss": 0.0059, + "step": 20030 + }, + { + "epoch": 0.12853033514733847, + "grad_norm": 0.6513561010360718, + "learning_rate": 6.426372498717291e-06, + "loss": 0.0101, + "step": 20040 + }, + { + "epoch": 0.12859447204112456, + "grad_norm": 0.6265757083892822, + "learning_rate": 6.429579271421242e-06, + "loss": 0.0067, + "step": 20050 + }, + { + "epoch": 0.12865860893491068, + "grad_norm": 0.17988623678684235, + "learning_rate": 6.432786044125193e-06, + "loss": 0.0079, + "step": 20060 + }, + { + "epoch": 0.12872274582869678, + "grad_norm": 0.347770631313324, + "learning_rate": 6.4359928168291445e-06, + "loss": 0.0088, + "step": 20070 + }, + { + "epoch": 0.12878688272248287, + "grad_norm": 0.302829384803772, + "learning_rate": 6.439199589533094e-06, + "loss": 0.0099, + "step": 20080 + }, + { + "epoch": 0.12885101961626896, + "grad_norm": 0.717654824256897, + "learning_rate": 6.442406362237045e-06, + "loss": 0.0112, + "step": 20090 + }, + { + "epoch": 0.12891515651005506, + "grad_norm": 0.38352274894714355, + "learning_rate": 6.445613134940995e-06, + "loss": 0.0151, + "step": 20100 + }, + { + "epoch": 0.12897929340384115, + "grad_norm": 0.6565334796905518, + "learning_rate": 6.4488199076449464e-06, + "loss": 0.0129, + "step": 20110 + }, + { + "epoch": 0.12904343029762724, + "grad_norm": 0.38155457377433777, + "learning_rate": 6.4520266803488976e-06, + "loss": 0.0085, + "step": 20120 + }, + { + "epoch": 0.12910756719141336, + "grad_norm": 0.37201428413391113, + "learning_rate": 6.455233453052848e-06, + "loss": 0.012, + "step": 20130 + }, + { + "epoch": 0.12917170408519946, + "grad_norm": 0.2879607081413269, + "learning_rate": 6.458440225756799e-06, + "loss": 0.0079, + "step": 20140 + }, + { + "epoch": 0.12923584097898555, + "grad_norm": 0.5509543418884277, + "learning_rate": 6.46164699846075e-06, + "loss": 0.0077, + "step": 20150 + }, + { + "epoch": 0.12929997787277164, + "grad_norm": 0.39315474033355713, + "learning_rate": 6.464853771164701e-06, + "loss": 0.0096, + "step": 20160 + }, + { + "epoch": 0.12936411476655774, + "grad_norm": 0.5400700569152832, + "learning_rate": 6.468060543868651e-06, + "loss": 0.0067, + "step": 20170 + }, + { + "epoch": 0.12942825166034383, + "grad_norm": 0.5487892627716064, + "learning_rate": 6.471267316572602e-06, + "loss": 0.0079, + "step": 20180 + }, + { + "epoch": 0.12949238855412992, + "grad_norm": 0.7011224031448364, + "learning_rate": 6.474474089276552e-06, + "loss": 0.0152, + "step": 20190 + }, + { + "epoch": 0.12955652544791604, + "grad_norm": 0.45283573865890503, + "learning_rate": 6.477680861980503e-06, + "loss": 0.0059, + "step": 20200 + }, + { + "epoch": 0.12962066234170214, + "grad_norm": 0.8114439845085144, + "learning_rate": 6.480887634684454e-06, + "loss": 0.0114, + "step": 20210 + }, + { + "epoch": 0.12968479923548823, + "grad_norm": 0.42993730306625366, + "learning_rate": 6.484094407388405e-06, + "loss": 0.0073, + "step": 20220 + }, + { + "epoch": 0.12974893612927432, + "grad_norm": 0.30626970529556274, + "learning_rate": 6.4873011800923556e-06, + "loss": 0.0091, + "step": 20230 + }, + { + "epoch": 0.12981307302306042, + "grad_norm": 0.41342639923095703, + "learning_rate": 6.490507952796307e-06, + "loss": 0.006, + "step": 20240 + }, + { + "epoch": 0.1298772099168465, + "grad_norm": 0.691035807132721, + "learning_rate": 6.493714725500258e-06, + "loss": 0.0187, + "step": 20250 + }, + { + "epoch": 0.1299413468106326, + "grad_norm": 0.6163961887359619, + "learning_rate": 6.496921498204208e-06, + "loss": 0.008, + "step": 20260 + }, + { + "epoch": 0.13000548370441872, + "grad_norm": 0.29074427485466003, + "learning_rate": 6.500128270908158e-06, + "loss": 0.0076, + "step": 20270 + }, + { + "epoch": 0.13006962059820482, + "grad_norm": 0.38345515727996826, + "learning_rate": 6.503335043612109e-06, + "loss": 0.0076, + "step": 20280 + }, + { + "epoch": 0.1301337574919909, + "grad_norm": 0.510400652885437, + "learning_rate": 6.50654181631606e-06, + "loss": 0.0078, + "step": 20290 + }, + { + "epoch": 0.130197894385777, + "grad_norm": 0.3353305459022522, + "learning_rate": 6.509748589020011e-06, + "loss": 0.0107, + "step": 20300 + }, + { + "epoch": 0.1302620312795631, + "grad_norm": 0.20535027980804443, + "learning_rate": 6.512955361723962e-06, + "loss": 0.0066, + "step": 20310 + }, + { + "epoch": 0.1303261681733492, + "grad_norm": 0.6824355721473694, + "learning_rate": 6.516162134427912e-06, + "loss": 0.0091, + "step": 20320 + }, + { + "epoch": 0.13039030506713528, + "grad_norm": 0.3136247992515564, + "learning_rate": 6.519368907131863e-06, + "loss": 0.0069, + "step": 20330 + }, + { + "epoch": 0.1304544419609214, + "grad_norm": 0.4937147796154022, + "learning_rate": 6.522575679835814e-06, + "loss": 0.0086, + "step": 20340 + }, + { + "epoch": 0.1305185788547075, + "grad_norm": 0.3378153443336487, + "learning_rate": 6.525782452539765e-06, + "loss": 0.0058, + "step": 20350 + }, + { + "epoch": 0.1305827157484936, + "grad_norm": 0.6026080250740051, + "learning_rate": 6.528989225243715e-06, + "loss": 0.0125, + "step": 20360 + }, + { + "epoch": 0.13064685264227968, + "grad_norm": 0.25930720567703247, + "learning_rate": 6.532195997947666e-06, + "loss": 0.0068, + "step": 20370 + }, + { + "epoch": 0.13071098953606577, + "grad_norm": 0.46107766032218933, + "learning_rate": 6.535402770651616e-06, + "loss": 0.0104, + "step": 20380 + }, + { + "epoch": 0.13077512642985187, + "grad_norm": 0.47708791494369507, + "learning_rate": 6.5386095433555674e-06, + "loss": 0.0074, + "step": 20390 + }, + { + "epoch": 0.13083926332363796, + "grad_norm": 0.7677677869796753, + "learning_rate": 6.5418163160595186e-06, + "loss": 0.0092, + "step": 20400 + }, + { + "epoch": 0.13090340021742408, + "grad_norm": 0.3615362346172333, + "learning_rate": 6.545023088763469e-06, + "loss": 0.0092, + "step": 20410 + }, + { + "epoch": 0.13096753711121017, + "grad_norm": 0.49676114320755005, + "learning_rate": 6.54822986146742e-06, + "loss": 0.0054, + "step": 20420 + }, + { + "epoch": 0.13103167400499627, + "grad_norm": 0.47132083773612976, + "learning_rate": 6.551436634171371e-06, + "loss": 0.0087, + "step": 20430 + }, + { + "epoch": 0.13109581089878236, + "grad_norm": 0.4946648180484772, + "learning_rate": 6.554643406875322e-06, + "loss": 0.01, + "step": 20440 + }, + { + "epoch": 0.13115994779256845, + "grad_norm": 0.41357100009918213, + "learning_rate": 6.557850179579272e-06, + "loss": 0.0117, + "step": 20450 + }, + { + "epoch": 0.13122408468635455, + "grad_norm": 0.7563621401786804, + "learning_rate": 6.561056952283223e-06, + "loss": 0.0103, + "step": 20460 + }, + { + "epoch": 0.13128822158014064, + "grad_norm": 0.39744338393211365, + "learning_rate": 6.564263724987173e-06, + "loss": 0.0081, + "step": 20470 + }, + { + "epoch": 0.13135235847392676, + "grad_norm": 0.43926945328712463, + "learning_rate": 6.567470497691124e-06, + "loss": 0.0094, + "step": 20480 + }, + { + "epoch": 0.13141649536771285, + "grad_norm": 0.3090771436691284, + "learning_rate": 6.570677270395075e-06, + "loss": 0.0086, + "step": 20490 + }, + { + "epoch": 0.13148063226149895, + "grad_norm": 0.5703118443489075, + "learning_rate": 6.5738840430990255e-06, + "loss": 0.0087, + "step": 20500 + }, + { + "epoch": 0.13154476915528504, + "grad_norm": 0.4070364534854889, + "learning_rate": 6.5770908158029766e-06, + "loss": 0.0081, + "step": 20510 + }, + { + "epoch": 0.13160890604907113, + "grad_norm": 0.9661247730255127, + "learning_rate": 6.580297588506928e-06, + "loss": 0.0106, + "step": 20520 + }, + { + "epoch": 0.13167304294285723, + "grad_norm": 0.43930143117904663, + "learning_rate": 6.583504361210879e-06, + "loss": 0.0097, + "step": 20530 + }, + { + "epoch": 0.13173717983664332, + "grad_norm": 0.6114511489868164, + "learning_rate": 6.586711133914828e-06, + "loss": 0.0082, + "step": 20540 + }, + { + "epoch": 0.13180131673042944, + "grad_norm": 0.46881040930747986, + "learning_rate": 6.589917906618779e-06, + "loss": 0.0083, + "step": 20550 + }, + { + "epoch": 0.13186545362421553, + "grad_norm": 0.6186842322349548, + "learning_rate": 6.59312467932273e-06, + "loss": 0.01, + "step": 20560 + }, + { + "epoch": 0.13192959051800163, + "grad_norm": 0.0744633749127388, + "learning_rate": 6.596331452026681e-06, + "loss": 0.0078, + "step": 20570 + }, + { + "epoch": 0.13199372741178772, + "grad_norm": 0.3463693857192993, + "learning_rate": 6.599538224730632e-06, + "loss": 0.0055, + "step": 20580 + }, + { + "epoch": 0.1320578643055738, + "grad_norm": 0.49897441267967224, + "learning_rate": 6.602744997434583e-06, + "loss": 0.0091, + "step": 20590 + }, + { + "epoch": 0.1321220011993599, + "grad_norm": 0.4889982044696808, + "learning_rate": 6.605951770138533e-06, + "loss": 0.0094, + "step": 20600 + }, + { + "epoch": 0.132186138093146, + "grad_norm": 0.4408458173274994, + "learning_rate": 6.609158542842484e-06, + "loss": 0.0093, + "step": 20610 + }, + { + "epoch": 0.13225027498693212, + "grad_norm": 0.8227345943450928, + "learning_rate": 6.612365315546435e-06, + "loss": 0.0129, + "step": 20620 + }, + { + "epoch": 0.1323144118807182, + "grad_norm": 0.33699744939804077, + "learning_rate": 6.615572088250385e-06, + "loss": 0.0093, + "step": 20630 + }, + { + "epoch": 0.1323785487745043, + "grad_norm": 0.28413769602775574, + "learning_rate": 6.618778860954336e-06, + "loss": 0.0067, + "step": 20640 + }, + { + "epoch": 0.1324426856682904, + "grad_norm": 1.1441147327423096, + "learning_rate": 6.621985633658286e-06, + "loss": 0.0107, + "step": 20650 + }, + { + "epoch": 0.1325068225620765, + "grad_norm": 0.26640936732292175, + "learning_rate": 6.625192406362237e-06, + "loss": 0.0072, + "step": 20660 + }, + { + "epoch": 0.13257095945586259, + "grad_norm": 0.322395384311676, + "learning_rate": 6.6283991790661884e-06, + "loss": 0.007, + "step": 20670 + }, + { + "epoch": 0.13263509634964868, + "grad_norm": 0.727817177772522, + "learning_rate": 6.6316059517701396e-06, + "loss": 0.0095, + "step": 20680 + }, + { + "epoch": 0.1326992332434348, + "grad_norm": 0.5444375872612, + "learning_rate": 6.63481272447409e-06, + "loss": 0.0076, + "step": 20690 + }, + { + "epoch": 0.1327633701372209, + "grad_norm": 0.5482738018035889, + "learning_rate": 6.638019497178041e-06, + "loss": 0.0071, + "step": 20700 + }, + { + "epoch": 0.13282750703100699, + "grad_norm": 0.38756558299064636, + "learning_rate": 6.641226269881992e-06, + "loss": 0.0057, + "step": 20710 + }, + { + "epoch": 0.13289164392479308, + "grad_norm": 0.4515531063079834, + "learning_rate": 6.6444330425859415e-06, + "loss": 0.0098, + "step": 20720 + }, + { + "epoch": 0.13295578081857917, + "grad_norm": 0.3611808717250824, + "learning_rate": 6.647639815289893e-06, + "loss": 0.0066, + "step": 20730 + }, + { + "epoch": 0.13301991771236527, + "grad_norm": 0.49165627360343933, + "learning_rate": 6.650846587993844e-06, + "loss": 0.0098, + "step": 20740 + }, + { + "epoch": 0.13308405460615136, + "grad_norm": 0.20099137723445892, + "learning_rate": 6.654053360697794e-06, + "loss": 0.0073, + "step": 20750 + }, + { + "epoch": 0.13314819149993748, + "grad_norm": 0.2407764047384262, + "learning_rate": 6.657260133401745e-06, + "loss": 0.0099, + "step": 20760 + }, + { + "epoch": 0.13321232839372357, + "grad_norm": 0.21744117140769958, + "learning_rate": 6.660466906105696e-06, + "loss": 0.0085, + "step": 20770 + }, + { + "epoch": 0.13327646528750967, + "grad_norm": 0.5023742914199829, + "learning_rate": 6.6636736788096465e-06, + "loss": 0.0141, + "step": 20780 + }, + { + "epoch": 0.13334060218129576, + "grad_norm": 0.2848564684391022, + "learning_rate": 6.6668804515135976e-06, + "loss": 0.009, + "step": 20790 + }, + { + "epoch": 0.13340473907508185, + "grad_norm": 0.46269291639328003, + "learning_rate": 6.670087224217549e-06, + "loss": 0.0105, + "step": 20800 + }, + { + "epoch": 0.13346887596886794, + "grad_norm": 0.2645798623561859, + "learning_rate": 6.673293996921498e-06, + "loss": 0.0048, + "step": 20810 + }, + { + "epoch": 0.13353301286265404, + "grad_norm": 0.7819306254386902, + "learning_rate": 6.676500769625449e-06, + "loss": 0.0073, + "step": 20820 + }, + { + "epoch": 0.13359714975644016, + "grad_norm": 0.5828709006309509, + "learning_rate": 6.6797075423294e-06, + "loss": 0.0105, + "step": 20830 + }, + { + "epoch": 0.13366128665022625, + "grad_norm": 0.18396398425102234, + "learning_rate": 6.682914315033351e-06, + "loss": 0.0074, + "step": 20840 + }, + { + "epoch": 0.13372542354401235, + "grad_norm": 0.34209463000297546, + "learning_rate": 6.686121087737302e-06, + "loss": 0.0058, + "step": 20850 + }, + { + "epoch": 0.13378956043779844, + "grad_norm": 0.6025264859199524, + "learning_rate": 6.689327860441253e-06, + "loss": 0.0063, + "step": 20860 + }, + { + "epoch": 0.13385369733158453, + "grad_norm": 0.7551537156105042, + "learning_rate": 6.692534633145203e-06, + "loss": 0.0081, + "step": 20870 + }, + { + "epoch": 0.13391783422537062, + "grad_norm": 0.7358047962188721, + "learning_rate": 6.695741405849154e-06, + "loss": 0.0136, + "step": 20880 + }, + { + "epoch": 0.13398197111915672, + "grad_norm": 0.5618805289268494, + "learning_rate": 6.698948178553105e-06, + "loss": 0.01, + "step": 20890 + }, + { + "epoch": 0.13404610801294284, + "grad_norm": 0.47844335436820984, + "learning_rate": 6.702154951257055e-06, + "loss": 0.0097, + "step": 20900 + }, + { + "epoch": 0.13411024490672893, + "grad_norm": 0.4649708569049835, + "learning_rate": 6.705361723961006e-06, + "loss": 0.0122, + "step": 20910 + }, + { + "epoch": 0.13417438180051502, + "grad_norm": 0.2919611632823944, + "learning_rate": 6.708568496664957e-06, + "loss": 0.0081, + "step": 20920 + }, + { + "epoch": 0.13423851869430112, + "grad_norm": 0.5163574814796448, + "learning_rate": 6.711775269368907e-06, + "loss": 0.0063, + "step": 20930 + }, + { + "epoch": 0.1343026555880872, + "grad_norm": 0.38330158591270447, + "learning_rate": 6.714982042072858e-06, + "loss": 0.0084, + "step": 20940 + }, + { + "epoch": 0.1343667924818733, + "grad_norm": 0.3098152279853821, + "learning_rate": 6.7181888147768094e-06, + "loss": 0.0108, + "step": 20950 + }, + { + "epoch": 0.1344309293756594, + "grad_norm": 0.5839094519615173, + "learning_rate": 6.7213955874807606e-06, + "loss": 0.0096, + "step": 20960 + }, + { + "epoch": 0.13449506626944552, + "grad_norm": 0.226688951253891, + "learning_rate": 6.724602360184711e-06, + "loss": 0.0122, + "step": 20970 + }, + { + "epoch": 0.1345592031632316, + "grad_norm": 0.7050507068634033, + "learning_rate": 6.727809132888662e-06, + "loss": 0.0094, + "step": 20980 + }, + { + "epoch": 0.1346233400570177, + "grad_norm": 0.34695059061050415, + "learning_rate": 6.731015905592611e-06, + "loss": 0.0078, + "step": 20990 + }, + { + "epoch": 0.1346874769508038, + "grad_norm": 0.34674152731895447, + "learning_rate": 6.7342226782965625e-06, + "loss": 0.0087, + "step": 21000 + }, + { + "epoch": 0.1347516138445899, + "grad_norm": 0.3843696117401123, + "learning_rate": 6.737429451000514e-06, + "loss": 0.0058, + "step": 21010 + }, + { + "epoch": 0.13481575073837598, + "grad_norm": 0.14192050695419312, + "learning_rate": 6.740636223704464e-06, + "loss": 0.0113, + "step": 21020 + }, + { + "epoch": 0.13487988763216208, + "grad_norm": 0.35683688521385193, + "learning_rate": 6.743842996408415e-06, + "loss": 0.0078, + "step": 21030 + }, + { + "epoch": 0.1349440245259482, + "grad_norm": 0.1909664124250412, + "learning_rate": 6.747049769112366e-06, + "loss": 0.0084, + "step": 21040 + }, + { + "epoch": 0.1350081614197343, + "grad_norm": 0.36528199911117554, + "learning_rate": 6.750256541816317e-06, + "loss": 0.01, + "step": 21050 + }, + { + "epoch": 0.13507229831352038, + "grad_norm": 0.26217061281204224, + "learning_rate": 6.7534633145202675e-06, + "loss": 0.0058, + "step": 21060 + }, + { + "epoch": 0.13513643520730648, + "grad_norm": 0.26856938004493713, + "learning_rate": 6.7566700872242186e-06, + "loss": 0.011, + "step": 21070 + }, + { + "epoch": 0.13520057210109257, + "grad_norm": 0.5780160427093506, + "learning_rate": 6.759876859928168e-06, + "loss": 0.0097, + "step": 21080 + }, + { + "epoch": 0.13526470899487866, + "grad_norm": 0.6351631283760071, + "learning_rate": 6.763083632632119e-06, + "loss": 0.0113, + "step": 21090 + }, + { + "epoch": 0.13532884588866476, + "grad_norm": 0.4127849340438843, + "learning_rate": 6.76629040533607e-06, + "loss": 0.0118, + "step": 21100 + }, + { + "epoch": 0.13539298278245088, + "grad_norm": 0.5747127532958984, + "learning_rate": 6.769497178040021e-06, + "loss": 0.0082, + "step": 21110 + }, + { + "epoch": 0.13545711967623697, + "grad_norm": 0.36885106563568115, + "learning_rate": 6.772703950743972e-06, + "loss": 0.0093, + "step": 21120 + }, + { + "epoch": 0.13552125657002306, + "grad_norm": 0.557569682598114, + "learning_rate": 6.775910723447923e-06, + "loss": 0.01, + "step": 21130 + }, + { + "epoch": 0.13558539346380916, + "grad_norm": 0.4298759996891022, + "learning_rate": 6.779117496151874e-06, + "loss": 0.0068, + "step": 21140 + }, + { + "epoch": 0.13564953035759525, + "grad_norm": 0.22811418771743774, + "learning_rate": 6.782324268855824e-06, + "loss": 0.0066, + "step": 21150 + }, + { + "epoch": 0.13571366725138134, + "grad_norm": 0.5950798392295837, + "learning_rate": 6.785531041559775e-06, + "loss": 0.0069, + "step": 21160 + }, + { + "epoch": 0.13577780414516744, + "grad_norm": 0.3545633554458618, + "learning_rate": 6.788737814263725e-06, + "loss": 0.0097, + "step": 21170 + }, + { + "epoch": 0.13584194103895353, + "grad_norm": 0.40008631348609924, + "learning_rate": 6.791944586967676e-06, + "loss": 0.0114, + "step": 21180 + }, + { + "epoch": 0.13590607793273965, + "grad_norm": 0.7489058971405029, + "learning_rate": 6.795151359671627e-06, + "loss": 0.0095, + "step": 21190 + }, + { + "epoch": 0.13597021482652574, + "grad_norm": 0.5277951955795288, + "learning_rate": 6.798358132375578e-06, + "loss": 0.0085, + "step": 21200 + }, + { + "epoch": 0.13603435172031184, + "grad_norm": 0.4878092408180237, + "learning_rate": 6.801564905079528e-06, + "loss": 0.0094, + "step": 21210 + }, + { + "epoch": 0.13609848861409793, + "grad_norm": 0.5894260406494141, + "learning_rate": 6.804771677783479e-06, + "loss": 0.0089, + "step": 21220 + }, + { + "epoch": 0.13616262550788402, + "grad_norm": 0.42668089270591736, + "learning_rate": 6.8079784504874305e-06, + "loss": 0.0082, + "step": 21230 + }, + { + "epoch": 0.13622676240167012, + "grad_norm": 0.18119743466377258, + "learning_rate": 6.811185223191381e-06, + "loss": 0.0073, + "step": 21240 + }, + { + "epoch": 0.1362908992954562, + "grad_norm": 0.49351224303245544, + "learning_rate": 6.814391995895332e-06, + "loss": 0.0093, + "step": 21250 + }, + { + "epoch": 0.13635503618924233, + "grad_norm": 0.4177885353565216, + "learning_rate": 6.817598768599282e-06, + "loss": 0.0095, + "step": 21260 + }, + { + "epoch": 0.13641917308302842, + "grad_norm": 0.5884034633636475, + "learning_rate": 6.820805541303232e-06, + "loss": 0.0065, + "step": 21270 + }, + { + "epoch": 0.13648330997681452, + "grad_norm": 0.34864699840545654, + "learning_rate": 6.8240123140071835e-06, + "loss": 0.0093, + "step": 21280 + }, + { + "epoch": 0.1365474468706006, + "grad_norm": 0.4722350537776947, + "learning_rate": 6.827219086711135e-06, + "loss": 0.0114, + "step": 21290 + }, + { + "epoch": 0.1366115837643867, + "grad_norm": 0.29134905338287354, + "learning_rate": 6.830425859415085e-06, + "loss": 0.0178, + "step": 21300 + }, + { + "epoch": 0.1366757206581728, + "grad_norm": 0.2316485494375229, + "learning_rate": 6.833632632119036e-06, + "loss": 0.0075, + "step": 21310 + }, + { + "epoch": 0.1367398575519589, + "grad_norm": 0.3858979344367981, + "learning_rate": 6.836839404822987e-06, + "loss": 0.0066, + "step": 21320 + }, + { + "epoch": 0.136803994445745, + "grad_norm": 0.30195894837379456, + "learning_rate": 6.840046177526938e-06, + "loss": 0.0086, + "step": 21330 + }, + { + "epoch": 0.1368681313395311, + "grad_norm": 0.39725568890571594, + "learning_rate": 6.8432529502308885e-06, + "loss": 0.0064, + "step": 21340 + }, + { + "epoch": 0.1369322682333172, + "grad_norm": 0.44647717475891113, + "learning_rate": 6.846459722934839e-06, + "loss": 0.0074, + "step": 21350 + }, + { + "epoch": 0.1369964051271033, + "grad_norm": 0.19102302193641663, + "learning_rate": 6.849666495638789e-06, + "loss": 0.0053, + "step": 21360 + }, + { + "epoch": 0.13706054202088938, + "grad_norm": 0.3509165048599243, + "learning_rate": 6.85287326834274e-06, + "loss": 0.0121, + "step": 21370 + }, + { + "epoch": 0.13712467891467547, + "grad_norm": 0.3943997621536255, + "learning_rate": 6.856080041046691e-06, + "loss": 0.0073, + "step": 21380 + }, + { + "epoch": 0.13718881580846157, + "grad_norm": 0.29147347807884216, + "learning_rate": 6.8592868137506415e-06, + "loss": 0.0088, + "step": 21390 + }, + { + "epoch": 0.1372529527022477, + "grad_norm": 0.4324707090854645, + "learning_rate": 6.862493586454593e-06, + "loss": 0.0108, + "step": 21400 + }, + { + "epoch": 0.13731708959603378, + "grad_norm": 0.5054298043251038, + "learning_rate": 6.865700359158544e-06, + "loss": 0.0067, + "step": 21410 + }, + { + "epoch": 0.13738122648981987, + "grad_norm": 0.3810000717639923, + "learning_rate": 6.868907131862495e-06, + "loss": 0.0118, + "step": 21420 + }, + { + "epoch": 0.13744536338360597, + "grad_norm": 0.6489289402961731, + "learning_rate": 6.872113904566445e-06, + "loss": 0.0089, + "step": 21430 + }, + { + "epoch": 0.13750950027739206, + "grad_norm": 0.5280774831771851, + "learning_rate": 6.875320677270395e-06, + "loss": 0.0132, + "step": 21440 + }, + { + "epoch": 0.13757363717117815, + "grad_norm": 0.5226761698722839, + "learning_rate": 6.878527449974346e-06, + "loss": 0.0094, + "step": 21450 + }, + { + "epoch": 0.13763777406496425, + "grad_norm": 0.3569224178791046, + "learning_rate": 6.881734222678297e-06, + "loss": 0.0079, + "step": 21460 + }, + { + "epoch": 0.13770191095875037, + "grad_norm": 0.35034656524658203, + "learning_rate": 6.884940995382248e-06, + "loss": 0.009, + "step": 21470 + }, + { + "epoch": 0.13776604785253646, + "grad_norm": 0.4666964113712311, + "learning_rate": 6.888147768086199e-06, + "loss": 0.0128, + "step": 21480 + }, + { + "epoch": 0.13783018474632255, + "grad_norm": 0.49302372336387634, + "learning_rate": 6.891354540790149e-06, + "loss": 0.0087, + "step": 21490 + }, + { + "epoch": 0.13789432164010865, + "grad_norm": 0.44650712609291077, + "learning_rate": 6.8945613134941e-06, + "loss": 0.0075, + "step": 21500 + }, + { + "epoch": 0.13795845853389474, + "grad_norm": 0.26706868410110474, + "learning_rate": 6.8977680861980515e-06, + "loss": 0.0086, + "step": 21510 + }, + { + "epoch": 0.13802259542768083, + "grad_norm": 0.6490288972854614, + "learning_rate": 6.900974858902002e-06, + "loss": 0.0102, + "step": 21520 + }, + { + "epoch": 0.13808673232146693, + "grad_norm": 0.48663124442100525, + "learning_rate": 6.904181631605952e-06, + "loss": 0.0053, + "step": 21530 + }, + { + "epoch": 0.13815086921525305, + "grad_norm": 0.4921961724758148, + "learning_rate": 6.907388404309902e-06, + "loss": 0.0074, + "step": 21540 + }, + { + "epoch": 0.13821500610903914, + "grad_norm": 0.48281329870224, + "learning_rate": 6.910595177013853e-06, + "loss": 0.0067, + "step": 21550 + }, + { + "epoch": 0.13827914300282523, + "grad_norm": 0.4729806184768677, + "learning_rate": 6.9138019497178045e-06, + "loss": 0.0106, + "step": 21560 + }, + { + "epoch": 0.13834327989661133, + "grad_norm": 0.5231598615646362, + "learning_rate": 6.917008722421756e-06, + "loss": 0.0107, + "step": 21570 + }, + { + "epoch": 0.13840741679039742, + "grad_norm": 0.2413243055343628, + "learning_rate": 6.920215495125706e-06, + "loss": 0.0091, + "step": 21580 + }, + { + "epoch": 0.1384715536841835, + "grad_norm": 0.49566343426704407, + "learning_rate": 6.923422267829657e-06, + "loss": 0.0066, + "step": 21590 + }, + { + "epoch": 0.1385356905779696, + "grad_norm": 0.4083350896835327, + "learning_rate": 6.926629040533608e-06, + "loss": 0.008, + "step": 21600 + }, + { + "epoch": 0.13859982747175573, + "grad_norm": 0.3106785714626312, + "learning_rate": 6.929835813237558e-06, + "loss": 0.0051, + "step": 21610 + }, + { + "epoch": 0.13866396436554182, + "grad_norm": 0.5513443350791931, + "learning_rate": 6.9330425859415095e-06, + "loss": 0.0125, + "step": 21620 + }, + { + "epoch": 0.1387281012593279, + "grad_norm": 0.607825756072998, + "learning_rate": 6.93624935864546e-06, + "loss": 0.0099, + "step": 21630 + }, + { + "epoch": 0.138792238153114, + "grad_norm": 0.30102694034576416, + "learning_rate": 6.93945613134941e-06, + "loss": 0.0077, + "step": 21640 + }, + { + "epoch": 0.1388563750469001, + "grad_norm": 0.3206464350223541, + "learning_rate": 6.942662904053361e-06, + "loss": 0.0072, + "step": 21650 + }, + { + "epoch": 0.1389205119406862, + "grad_norm": 0.8913500308990479, + "learning_rate": 6.945869676757312e-06, + "loss": 0.0079, + "step": 21660 + }, + { + "epoch": 0.13898464883447229, + "grad_norm": 0.4227841794490814, + "learning_rate": 6.9490764494612625e-06, + "loss": 0.01, + "step": 21670 + }, + { + "epoch": 0.1390487857282584, + "grad_norm": 0.4441201388835907, + "learning_rate": 6.952283222165214e-06, + "loss": 0.0101, + "step": 21680 + }, + { + "epoch": 0.1391129226220445, + "grad_norm": 0.40509462356567383, + "learning_rate": 6.955489994869165e-06, + "loss": 0.009, + "step": 21690 + }, + { + "epoch": 0.1391770595158306, + "grad_norm": 0.35223808884620667, + "learning_rate": 6.958696767573116e-06, + "loss": 0.0073, + "step": 21700 + }, + { + "epoch": 0.13924119640961669, + "grad_norm": 0.3851562738418579, + "learning_rate": 6.961903540277066e-06, + "loss": 0.0076, + "step": 21710 + }, + { + "epoch": 0.13930533330340278, + "grad_norm": 0.3918079733848572, + "learning_rate": 6.965110312981016e-06, + "loss": 0.0078, + "step": 21720 + }, + { + "epoch": 0.13936947019718887, + "grad_norm": 0.5268648862838745, + "learning_rate": 6.968317085684967e-06, + "loss": 0.0114, + "step": 21730 + }, + { + "epoch": 0.13943360709097496, + "grad_norm": 0.38811802864074707, + "learning_rate": 6.971523858388918e-06, + "loss": 0.0079, + "step": 21740 + }, + { + "epoch": 0.13949774398476109, + "grad_norm": 0.5009214878082275, + "learning_rate": 6.974730631092869e-06, + "loss": 0.0123, + "step": 21750 + }, + { + "epoch": 0.13956188087854718, + "grad_norm": 0.36382487416267395, + "learning_rate": 6.977937403796819e-06, + "loss": 0.0095, + "step": 21760 + }, + { + "epoch": 0.13962601777233327, + "grad_norm": 0.40182816982269287, + "learning_rate": 6.98114417650077e-06, + "loss": 0.0121, + "step": 21770 + }, + { + "epoch": 0.13969015466611936, + "grad_norm": 0.23861971497535706, + "learning_rate": 6.984350949204721e-06, + "loss": 0.0086, + "step": 21780 + }, + { + "epoch": 0.13975429155990546, + "grad_norm": 0.4996737837791443, + "learning_rate": 6.9875577219086725e-06, + "loss": 0.0066, + "step": 21790 + }, + { + "epoch": 0.13981842845369155, + "grad_norm": 0.5609036087989807, + "learning_rate": 6.990764494612623e-06, + "loss": 0.0071, + "step": 21800 + }, + { + "epoch": 0.13988256534747764, + "grad_norm": 0.2084120810031891, + "learning_rate": 6.993971267316573e-06, + "loss": 0.0059, + "step": 21810 + }, + { + "epoch": 0.13994670224126377, + "grad_norm": 0.3993605673313141, + "learning_rate": 6.997178040020523e-06, + "loss": 0.0087, + "step": 21820 + }, + { + "epoch": 0.14001083913504986, + "grad_norm": 0.315652996301651, + "learning_rate": 7.000384812724474e-06, + "loss": 0.0086, + "step": 21830 + }, + { + "epoch": 0.14007497602883595, + "grad_norm": 0.12413597851991653, + "learning_rate": 7.0035915854284255e-06, + "loss": 0.0098, + "step": 21840 + }, + { + "epoch": 0.14013911292262204, + "grad_norm": 0.21881459653377533, + "learning_rate": 7.006798358132377e-06, + "loss": 0.0084, + "step": 21850 + }, + { + "epoch": 0.14020324981640814, + "grad_norm": 0.4514821469783783, + "learning_rate": 7.010005130836327e-06, + "loss": 0.0078, + "step": 21860 + }, + { + "epoch": 0.14026738671019423, + "grad_norm": 0.4085347652435303, + "learning_rate": 7.013211903540278e-06, + "loss": 0.0081, + "step": 21870 + }, + { + "epoch": 0.14033152360398032, + "grad_norm": 0.24071069061756134, + "learning_rate": 7.016418676244229e-06, + "loss": 0.0111, + "step": 21880 + }, + { + "epoch": 0.14039566049776644, + "grad_norm": 0.30820876359939575, + "learning_rate": 7.019625448948179e-06, + "loss": 0.0094, + "step": 21890 + }, + { + "epoch": 0.14045979739155254, + "grad_norm": 0.484391987323761, + "learning_rate": 7.02283222165213e-06, + "loss": 0.0085, + "step": 21900 + }, + { + "epoch": 0.14052393428533863, + "grad_norm": 0.5085844993591309, + "learning_rate": 7.02603899435608e-06, + "loss": 0.0071, + "step": 21910 + }, + { + "epoch": 0.14058807117912472, + "grad_norm": 0.5190669894218445, + "learning_rate": 7.029245767060031e-06, + "loss": 0.0102, + "step": 21920 + }, + { + "epoch": 0.14065220807291082, + "grad_norm": 0.7314053773880005, + "learning_rate": 7.032452539763982e-06, + "loss": 0.0097, + "step": 21930 + }, + { + "epoch": 0.1407163449666969, + "grad_norm": 0.15084406733512878, + "learning_rate": 7.035659312467933e-06, + "loss": 0.0069, + "step": 21940 + }, + { + "epoch": 0.140780481860483, + "grad_norm": 0.5159353017807007, + "learning_rate": 7.0388660851718835e-06, + "loss": 0.0121, + "step": 21950 + }, + { + "epoch": 0.14084461875426912, + "grad_norm": 0.3132694661617279, + "learning_rate": 7.042072857875835e-06, + "loss": 0.0112, + "step": 21960 + }, + { + "epoch": 0.14090875564805522, + "grad_norm": 0.20876124501228333, + "learning_rate": 7.045279630579786e-06, + "loss": 0.0067, + "step": 21970 + }, + { + "epoch": 0.1409728925418413, + "grad_norm": 0.3610847592353821, + "learning_rate": 7.048486403283736e-06, + "loss": 0.0053, + "step": 21980 + }, + { + "epoch": 0.1410370294356274, + "grad_norm": 0.30084025859832764, + "learning_rate": 7.051693175987686e-06, + "loss": 0.0084, + "step": 21990 + }, + { + "epoch": 0.1411011663294135, + "grad_norm": 1.0451889038085938, + "learning_rate": 7.054899948691637e-06, + "loss": 0.0101, + "step": 22000 + }, + { + "epoch": 0.1411653032231996, + "grad_norm": 0.397225558757782, + "learning_rate": 7.058106721395588e-06, + "loss": 0.0059, + "step": 22010 + }, + { + "epoch": 0.14122944011698568, + "grad_norm": 0.37977731227874756, + "learning_rate": 7.061313494099539e-06, + "loss": 0.0078, + "step": 22020 + }, + { + "epoch": 0.1412935770107718, + "grad_norm": 0.5413793921470642, + "learning_rate": 7.06452026680349e-06, + "loss": 0.0058, + "step": 22030 + }, + { + "epoch": 0.1413577139045579, + "grad_norm": 0.5112346410751343, + "learning_rate": 7.06772703950744e-06, + "loss": 0.009, + "step": 22040 + }, + { + "epoch": 0.141421850798344, + "grad_norm": 0.15585067868232727, + "learning_rate": 7.070933812211391e-06, + "loss": 0.0078, + "step": 22050 + }, + { + "epoch": 0.14148598769213008, + "grad_norm": 0.6915168166160583, + "learning_rate": 7.074140584915342e-06, + "loss": 0.0058, + "step": 22060 + }, + { + "epoch": 0.14155012458591618, + "grad_norm": 0.38445475697517395, + "learning_rate": 7.0773473576192935e-06, + "loss": 0.0077, + "step": 22070 + }, + { + "epoch": 0.14161426147970227, + "grad_norm": 0.2983399033546448, + "learning_rate": 7.080554130323243e-06, + "loss": 0.0065, + "step": 22080 + }, + { + "epoch": 0.14167839837348836, + "grad_norm": 0.3959824740886688, + "learning_rate": 7.083760903027194e-06, + "loss": 0.0085, + "step": 22090 + }, + { + "epoch": 0.14174253526727448, + "grad_norm": 0.2227490246295929, + "learning_rate": 7.086967675731144e-06, + "loss": 0.0083, + "step": 22100 + }, + { + "epoch": 0.14180667216106058, + "grad_norm": 0.3485599756240845, + "learning_rate": 7.090174448435095e-06, + "loss": 0.0068, + "step": 22110 + }, + { + "epoch": 0.14187080905484667, + "grad_norm": 0.5139448642730713, + "learning_rate": 7.0933812211390465e-06, + "loss": 0.0069, + "step": 22120 + }, + { + "epoch": 0.14193494594863276, + "grad_norm": 0.16740815341472626, + "learning_rate": 7.096587993842997e-06, + "loss": 0.0063, + "step": 22130 + }, + { + "epoch": 0.14199908284241886, + "grad_norm": 0.5977540612220764, + "learning_rate": 7.099794766546948e-06, + "loss": 0.0072, + "step": 22140 + }, + { + "epoch": 0.14206321973620495, + "grad_norm": 0.6218705177307129, + "learning_rate": 7.103001539250899e-06, + "loss": 0.0075, + "step": 22150 + }, + { + "epoch": 0.14212735662999104, + "grad_norm": 0.605303168296814, + "learning_rate": 7.10620831195485e-06, + "loss": 0.0101, + "step": 22160 + }, + { + "epoch": 0.14219149352377716, + "grad_norm": 0.309969961643219, + "learning_rate": 7.1094150846587995e-06, + "loss": 0.0056, + "step": 22170 + }, + { + "epoch": 0.14225563041756326, + "grad_norm": 0.3917008638381958, + "learning_rate": 7.112621857362751e-06, + "loss": 0.0096, + "step": 22180 + }, + { + "epoch": 0.14231976731134935, + "grad_norm": 0.2769995331764221, + "learning_rate": 7.115828630066701e-06, + "loss": 0.0092, + "step": 22190 + }, + { + "epoch": 0.14238390420513544, + "grad_norm": 0.28504928946495056, + "learning_rate": 7.119035402770652e-06, + "loss": 0.0061, + "step": 22200 + }, + { + "epoch": 0.14244804109892154, + "grad_norm": 0.28232064843177795, + "learning_rate": 7.122242175474603e-06, + "loss": 0.0061, + "step": 22210 + }, + { + "epoch": 0.14251217799270763, + "grad_norm": 1.2043057680130005, + "learning_rate": 7.125448948178554e-06, + "loss": 0.0068, + "step": 22220 + }, + { + "epoch": 0.14257631488649372, + "grad_norm": 0.6536058783531189, + "learning_rate": 7.1286557208825045e-06, + "loss": 0.0076, + "step": 22230 + }, + { + "epoch": 0.14264045178027984, + "grad_norm": 0.3707522451877594, + "learning_rate": 7.131862493586456e-06, + "loss": 0.0142, + "step": 22240 + }, + { + "epoch": 0.14270458867406594, + "grad_norm": 0.480421245098114, + "learning_rate": 7.135069266290407e-06, + "loss": 0.0107, + "step": 22250 + }, + { + "epoch": 0.14276872556785203, + "grad_norm": 0.3505784571170807, + "learning_rate": 7.138276038994356e-06, + "loss": 0.0094, + "step": 22260 + }, + { + "epoch": 0.14283286246163812, + "grad_norm": 0.46982210874557495, + "learning_rate": 7.141482811698307e-06, + "loss": 0.0128, + "step": 22270 + }, + { + "epoch": 0.14289699935542421, + "grad_norm": 0.2160736620426178, + "learning_rate": 7.1446895844022575e-06, + "loss": 0.0078, + "step": 22280 + }, + { + "epoch": 0.1429611362492103, + "grad_norm": 0.08916353434324265, + "learning_rate": 7.147896357106209e-06, + "loss": 0.0083, + "step": 22290 + }, + { + "epoch": 0.1430252731429964, + "grad_norm": 0.332522988319397, + "learning_rate": 7.15110312981016e-06, + "loss": 0.0082, + "step": 22300 + }, + { + "epoch": 0.14308941003678252, + "grad_norm": 0.7146239876747131, + "learning_rate": 7.154309902514111e-06, + "loss": 0.0097, + "step": 22310 + }, + { + "epoch": 0.14315354693056861, + "grad_norm": 0.5682917237281799, + "learning_rate": 7.157516675218061e-06, + "loss": 0.0105, + "step": 22320 + }, + { + "epoch": 0.1432176838243547, + "grad_norm": 0.14967156946659088, + "learning_rate": 7.160723447922012e-06, + "loss": 0.008, + "step": 22330 + }, + { + "epoch": 0.1432818207181408, + "grad_norm": 0.814206063747406, + "learning_rate": 7.163930220625963e-06, + "loss": 0.0064, + "step": 22340 + }, + { + "epoch": 0.1433459576119269, + "grad_norm": 0.36975303292274475, + "learning_rate": 7.167136993329913e-06, + "loss": 0.0072, + "step": 22350 + }, + { + "epoch": 0.143410094505713, + "grad_norm": 0.18659432232379913, + "learning_rate": 7.170343766033864e-06, + "loss": 0.0071, + "step": 22360 + }, + { + "epoch": 0.14347423139949908, + "grad_norm": 0.4955224096775055, + "learning_rate": 7.173550538737815e-06, + "loss": 0.0087, + "step": 22370 + }, + { + "epoch": 0.1435383682932852, + "grad_norm": 0.46361032128334045, + "learning_rate": 7.176757311441765e-06, + "loss": 0.0062, + "step": 22380 + }, + { + "epoch": 0.1436025051870713, + "grad_norm": 0.24322505295276642, + "learning_rate": 7.179964084145716e-06, + "loss": 0.0076, + "step": 22390 + }, + { + "epoch": 0.1436666420808574, + "grad_norm": 0.8162360787391663, + "learning_rate": 7.1831708568496675e-06, + "loss": 0.0098, + "step": 22400 + }, + { + "epoch": 0.14373077897464348, + "grad_norm": 0.4669942259788513, + "learning_rate": 7.186377629553618e-06, + "loss": 0.0075, + "step": 22410 + }, + { + "epoch": 0.14379491586842957, + "grad_norm": 0.2904738783836365, + "learning_rate": 7.189584402257569e-06, + "loss": 0.0091, + "step": 22420 + }, + { + "epoch": 0.14385905276221567, + "grad_norm": 0.6553630828857422, + "learning_rate": 7.19279117496152e-06, + "loss": 0.0089, + "step": 22430 + }, + { + "epoch": 0.14392318965600176, + "grad_norm": 0.31168097257614136, + "learning_rate": 7.195997947665469e-06, + "loss": 0.0077, + "step": 22440 + }, + { + "epoch": 0.14398732654978788, + "grad_norm": 0.2955361604690552, + "learning_rate": 7.1992047203694205e-06, + "loss": 0.0064, + "step": 22450 + }, + { + "epoch": 0.14405146344357397, + "grad_norm": 0.2961823642253876, + "learning_rate": 7.202411493073372e-06, + "loss": 0.0103, + "step": 22460 + }, + { + "epoch": 0.14411560033736007, + "grad_norm": 0.4810275435447693, + "learning_rate": 7.205618265777322e-06, + "loss": 0.0077, + "step": 22470 + }, + { + "epoch": 0.14417973723114616, + "grad_norm": 0.3276253342628479, + "learning_rate": 7.208825038481273e-06, + "loss": 0.0065, + "step": 22480 + }, + { + "epoch": 0.14424387412493225, + "grad_norm": 0.26311808824539185, + "learning_rate": 7.212031811185224e-06, + "loss": 0.007, + "step": 22490 + }, + { + "epoch": 0.14430801101871835, + "grad_norm": 0.3071235716342926, + "learning_rate": 7.215238583889174e-06, + "loss": 0.0095, + "step": 22500 + }, + { + "epoch": 0.14437214791250444, + "grad_norm": 0.2706223130226135, + "learning_rate": 7.2184453565931255e-06, + "loss": 0.0079, + "step": 22510 + }, + { + "epoch": 0.14443628480629056, + "grad_norm": 0.2890281677246094, + "learning_rate": 7.221652129297077e-06, + "loss": 0.0072, + "step": 22520 + }, + { + "epoch": 0.14450042170007665, + "grad_norm": 0.3435359001159668, + "learning_rate": 7.224858902001026e-06, + "loss": 0.0065, + "step": 22530 + }, + { + "epoch": 0.14456455859386275, + "grad_norm": 0.368247926235199, + "learning_rate": 7.228065674704977e-06, + "loss": 0.0071, + "step": 22540 + }, + { + "epoch": 0.14462869548764884, + "grad_norm": 0.0901537537574768, + "learning_rate": 7.231272447408928e-06, + "loss": 0.0084, + "step": 22550 + }, + { + "epoch": 0.14469283238143493, + "grad_norm": 0.47297295928001404, + "learning_rate": 7.2344792201128785e-06, + "loss": 0.0072, + "step": 22560 + }, + { + "epoch": 0.14475696927522103, + "grad_norm": 0.3122113049030304, + "learning_rate": 7.23768599281683e-06, + "loss": 0.0071, + "step": 22570 + }, + { + "epoch": 0.14482110616900712, + "grad_norm": 0.4448734223842621, + "learning_rate": 7.240892765520781e-06, + "loss": 0.006, + "step": 22580 + }, + { + "epoch": 0.14488524306279324, + "grad_norm": 0.47366371750831604, + "learning_rate": 7.244099538224732e-06, + "loss": 0.0061, + "step": 22590 + }, + { + "epoch": 0.14494937995657933, + "grad_norm": 0.3275550603866577, + "learning_rate": 7.247306310928682e-06, + "loss": 0.0083, + "step": 22600 + }, + { + "epoch": 0.14501351685036543, + "grad_norm": 0.2917988896369934, + "learning_rate": 7.250513083632633e-06, + "loss": 0.0073, + "step": 22610 + }, + { + "epoch": 0.14507765374415152, + "grad_norm": 0.5993563532829285, + "learning_rate": 7.253719856336583e-06, + "loss": 0.0057, + "step": 22620 + }, + { + "epoch": 0.1451417906379376, + "grad_norm": 0.6318184733390808, + "learning_rate": 7.256926629040534e-06, + "loss": 0.0074, + "step": 22630 + }, + { + "epoch": 0.1452059275317237, + "grad_norm": 0.25389841198921204, + "learning_rate": 7.260133401744485e-06, + "loss": 0.0064, + "step": 22640 + }, + { + "epoch": 0.1452700644255098, + "grad_norm": 0.4760797619819641, + "learning_rate": 7.263340174448435e-06, + "loss": 0.0113, + "step": 22650 + }, + { + "epoch": 0.1453342013192959, + "grad_norm": 0.5239165425300598, + "learning_rate": 7.266546947152386e-06, + "loss": 0.0078, + "step": 22660 + }, + { + "epoch": 0.145398338213082, + "grad_norm": 0.3103371262550354, + "learning_rate": 7.269753719856337e-06, + "loss": 0.0082, + "step": 22670 + }, + { + "epoch": 0.1454624751068681, + "grad_norm": 0.28552502393722534, + "learning_rate": 7.2729604925602885e-06, + "loss": 0.0065, + "step": 22680 + }, + { + "epoch": 0.1455266120006542, + "grad_norm": 0.48143863677978516, + "learning_rate": 7.276167265264239e-06, + "loss": 0.0092, + "step": 22690 + }, + { + "epoch": 0.1455907488944403, + "grad_norm": 0.44797706604003906, + "learning_rate": 7.27937403796819e-06, + "loss": 0.0094, + "step": 22700 + }, + { + "epoch": 0.14565488578822638, + "grad_norm": 0.6186256408691406, + "learning_rate": 7.282580810672139e-06, + "loss": 0.0081, + "step": 22710 + }, + { + "epoch": 0.14571902268201248, + "grad_norm": 0.3706546127796173, + "learning_rate": 7.28578758337609e-06, + "loss": 0.0091, + "step": 22720 + }, + { + "epoch": 0.14578315957579857, + "grad_norm": 0.4360228180885315, + "learning_rate": 7.2889943560800415e-06, + "loss": 0.0085, + "step": 22730 + }, + { + "epoch": 0.1458472964695847, + "grad_norm": 0.2986323833465576, + "learning_rate": 7.292201128783993e-06, + "loss": 0.0071, + "step": 22740 + }, + { + "epoch": 0.14591143336337078, + "grad_norm": 0.35248422622680664, + "learning_rate": 7.295407901487943e-06, + "loss": 0.0076, + "step": 22750 + }, + { + "epoch": 0.14597557025715688, + "grad_norm": 0.24654223024845123, + "learning_rate": 7.298614674191894e-06, + "loss": 0.008, + "step": 22760 + }, + { + "epoch": 0.14603970715094297, + "grad_norm": 0.09405659139156342, + "learning_rate": 7.301821446895845e-06, + "loss": 0.0062, + "step": 22770 + }, + { + "epoch": 0.14610384404472906, + "grad_norm": 0.3926496207714081, + "learning_rate": 7.305028219599795e-06, + "loss": 0.0107, + "step": 22780 + }, + { + "epoch": 0.14616798093851516, + "grad_norm": 0.5553023219108582, + "learning_rate": 7.3082349923037465e-06, + "loss": 0.0088, + "step": 22790 + }, + { + "epoch": 0.14623211783230125, + "grad_norm": 0.479950875043869, + "learning_rate": 7.311441765007696e-06, + "loss": 0.0062, + "step": 22800 + }, + { + "epoch": 0.14629625472608737, + "grad_norm": 0.23034152388572693, + "learning_rate": 7.314648537711647e-06, + "loss": 0.0121, + "step": 22810 + }, + { + "epoch": 0.14636039161987346, + "grad_norm": 0.4224400520324707, + "learning_rate": 7.317855310415598e-06, + "loss": 0.0103, + "step": 22820 + }, + { + "epoch": 0.14642452851365956, + "grad_norm": 0.31793448328971863, + "learning_rate": 7.321062083119549e-06, + "loss": 0.0073, + "step": 22830 + }, + { + "epoch": 0.14648866540744565, + "grad_norm": 0.46968749165534973, + "learning_rate": 7.3242688558234995e-06, + "loss": 0.0094, + "step": 22840 + }, + { + "epoch": 0.14655280230123174, + "grad_norm": 0.5646508932113647, + "learning_rate": 7.327475628527451e-06, + "loss": 0.0094, + "step": 22850 + }, + { + "epoch": 0.14661693919501784, + "grad_norm": 0.7915888428688049, + "learning_rate": 7.330682401231402e-06, + "loss": 0.0061, + "step": 22860 + }, + { + "epoch": 0.14668107608880393, + "grad_norm": 0.410305380821228, + "learning_rate": 7.333889173935352e-06, + "loss": 0.0086, + "step": 22870 + }, + { + "epoch": 0.14674521298259005, + "grad_norm": 0.6306430697441101, + "learning_rate": 7.337095946639303e-06, + "loss": 0.0085, + "step": 22880 + }, + { + "epoch": 0.14680934987637614, + "grad_norm": 0.27923277020454407, + "learning_rate": 7.340302719343253e-06, + "loss": 0.0137, + "step": 22890 + }, + { + "epoch": 0.14687348677016224, + "grad_norm": 0.384808212518692, + "learning_rate": 7.343509492047204e-06, + "loss": 0.0051, + "step": 22900 + }, + { + "epoch": 0.14693762366394833, + "grad_norm": 0.5522543787956238, + "learning_rate": 7.346716264751155e-06, + "loss": 0.0067, + "step": 22910 + }, + { + "epoch": 0.14700176055773442, + "grad_norm": 0.41765451431274414, + "learning_rate": 7.349923037455106e-06, + "loss": 0.0062, + "step": 22920 + }, + { + "epoch": 0.14706589745152052, + "grad_norm": 0.2115785926580429, + "learning_rate": 7.353129810159056e-06, + "loss": 0.0093, + "step": 22930 + }, + { + "epoch": 0.1471300343453066, + "grad_norm": 0.46930214762687683, + "learning_rate": 7.356336582863007e-06, + "loss": 0.0088, + "step": 22940 + }, + { + "epoch": 0.14719417123909273, + "grad_norm": 0.4150533080101013, + "learning_rate": 7.359543355566958e-06, + "loss": 0.0083, + "step": 22950 + }, + { + "epoch": 0.14725830813287882, + "grad_norm": 0.3615683913230896, + "learning_rate": 7.3627501282709095e-06, + "loss": 0.0086, + "step": 22960 + }, + { + "epoch": 0.14732244502666492, + "grad_norm": 0.32483333349227905, + "learning_rate": 7.36595690097486e-06, + "loss": 0.0054, + "step": 22970 + }, + { + "epoch": 0.147386581920451, + "grad_norm": 0.46817225217819214, + "learning_rate": 7.369163673678811e-06, + "loss": 0.0044, + "step": 22980 + }, + { + "epoch": 0.1474507188142371, + "grad_norm": 0.2266695201396942, + "learning_rate": 7.37237044638276e-06, + "loss": 0.0061, + "step": 22990 + }, + { + "epoch": 0.1475148557080232, + "grad_norm": 0.293531209230423, + "learning_rate": 7.375577219086711e-06, + "loss": 0.01, + "step": 23000 + }, + { + "epoch": 0.1475789926018093, + "grad_norm": 0.6825623512268066, + "learning_rate": 7.3787839917906625e-06, + "loss": 0.008, + "step": 23010 + }, + { + "epoch": 0.1476431294955954, + "grad_norm": 0.5003395080566406, + "learning_rate": 7.381990764494613e-06, + "loss": 0.0074, + "step": 23020 + }, + { + "epoch": 0.1477072663893815, + "grad_norm": 0.17529790103435516, + "learning_rate": 7.385197537198564e-06, + "loss": 0.0083, + "step": 23030 + }, + { + "epoch": 0.1477714032831676, + "grad_norm": 0.32682469487190247, + "learning_rate": 7.388404309902515e-06, + "loss": 0.0072, + "step": 23040 + }, + { + "epoch": 0.1478355401769537, + "grad_norm": 0.41206035017967224, + "learning_rate": 7.391611082606466e-06, + "loss": 0.0094, + "step": 23050 + }, + { + "epoch": 0.14789967707073978, + "grad_norm": 0.3241424262523651, + "learning_rate": 7.394817855310416e-06, + "loss": 0.0148, + "step": 23060 + }, + { + "epoch": 0.14796381396452588, + "grad_norm": 0.7326326966285706, + "learning_rate": 7.3980246280143675e-06, + "loss": 0.0098, + "step": 23070 + }, + { + "epoch": 0.14802795085831197, + "grad_norm": 0.5066380500793457, + "learning_rate": 7.401231400718317e-06, + "loss": 0.0104, + "step": 23080 + }, + { + "epoch": 0.1480920877520981, + "grad_norm": 0.42016395926475525, + "learning_rate": 7.404438173422268e-06, + "loss": 0.007, + "step": 23090 + }, + { + "epoch": 0.14815622464588418, + "grad_norm": 0.346256285905838, + "learning_rate": 7.407644946126219e-06, + "loss": 0.01, + "step": 23100 + }, + { + "epoch": 0.14822036153967028, + "grad_norm": 0.2951318323612213, + "learning_rate": 7.41085171883017e-06, + "loss": 0.009, + "step": 23110 + }, + { + "epoch": 0.14828449843345637, + "grad_norm": 0.4058953523635864, + "learning_rate": 7.4140584915341205e-06, + "loss": 0.01, + "step": 23120 + }, + { + "epoch": 0.14834863532724246, + "grad_norm": 0.482075572013855, + "learning_rate": 7.417265264238072e-06, + "loss": 0.0098, + "step": 23130 + }, + { + "epoch": 0.14841277222102855, + "grad_norm": 0.2302228808403015, + "learning_rate": 7.420472036942023e-06, + "loss": 0.0083, + "step": 23140 + }, + { + "epoch": 0.14847690911481465, + "grad_norm": 0.46454161405563354, + "learning_rate": 7.423678809645973e-06, + "loss": 0.0095, + "step": 23150 + }, + { + "epoch": 0.14854104600860077, + "grad_norm": 0.42806532979011536, + "learning_rate": 7.426885582349924e-06, + "loss": 0.0075, + "step": 23160 + }, + { + "epoch": 0.14860518290238686, + "grad_norm": 0.7626487016677856, + "learning_rate": 7.4300923550538735e-06, + "loss": 0.0086, + "step": 23170 + }, + { + "epoch": 0.14866931979617296, + "grad_norm": 0.292229026556015, + "learning_rate": 7.433299127757825e-06, + "loss": 0.0071, + "step": 23180 + }, + { + "epoch": 0.14873345668995905, + "grad_norm": 0.24265456199645996, + "learning_rate": 7.436505900461776e-06, + "loss": 0.0069, + "step": 23190 + }, + { + "epoch": 0.14879759358374514, + "grad_norm": 0.3216264843940735, + "learning_rate": 7.439712673165727e-06, + "loss": 0.0053, + "step": 23200 + }, + { + "epoch": 0.14886173047753123, + "grad_norm": 0.4742765724658966, + "learning_rate": 7.442919445869677e-06, + "loss": 0.0083, + "step": 23210 + }, + { + "epoch": 0.14892586737131733, + "grad_norm": 0.41154447197914124, + "learning_rate": 7.446126218573628e-06, + "loss": 0.0126, + "step": 23220 + }, + { + "epoch": 0.14899000426510345, + "grad_norm": 0.43802884221076965, + "learning_rate": 7.449332991277579e-06, + "loss": 0.0052, + "step": 23230 + }, + { + "epoch": 0.14905414115888954, + "grad_norm": 0.34096428751945496, + "learning_rate": 7.45253976398153e-06, + "loss": 0.0087, + "step": 23240 + }, + { + "epoch": 0.14911827805267563, + "grad_norm": 0.49796128273010254, + "learning_rate": 7.455746536685481e-06, + "loss": 0.0061, + "step": 23250 + }, + { + "epoch": 0.14918241494646173, + "grad_norm": 0.3448033928871155, + "learning_rate": 7.458953309389431e-06, + "loss": 0.0092, + "step": 23260 + }, + { + "epoch": 0.14924655184024782, + "grad_norm": 0.4206487238407135, + "learning_rate": 7.462160082093381e-06, + "loss": 0.0087, + "step": 23270 + }, + { + "epoch": 0.14931068873403391, + "grad_norm": 0.13565437495708466, + "learning_rate": 7.465366854797332e-06, + "loss": 0.0071, + "step": 23280 + }, + { + "epoch": 0.14937482562782, + "grad_norm": 0.47501200437545776, + "learning_rate": 7.4685736275012835e-06, + "loss": 0.009, + "step": 23290 + }, + { + "epoch": 0.14943896252160613, + "grad_norm": 1.0063420534133911, + "learning_rate": 7.471780400205234e-06, + "loss": 0.0072, + "step": 23300 + }, + { + "epoch": 0.14950309941539222, + "grad_norm": 0.23026001453399658, + "learning_rate": 7.474987172909185e-06, + "loss": 0.006, + "step": 23310 + }, + { + "epoch": 0.14956723630917831, + "grad_norm": 0.7847120761871338, + "learning_rate": 7.478193945613136e-06, + "loss": 0.0076, + "step": 23320 + }, + { + "epoch": 0.1496313732029644, + "grad_norm": 0.36820703744888306, + "learning_rate": 7.481400718317086e-06, + "loss": 0.0139, + "step": 23330 + }, + { + "epoch": 0.1496955100967505, + "grad_norm": 0.21741189062595367, + "learning_rate": 7.484607491021037e-06, + "loss": 0.0118, + "step": 23340 + }, + { + "epoch": 0.1497596469905366, + "grad_norm": 0.2632524371147156, + "learning_rate": 7.487814263724988e-06, + "loss": 0.006, + "step": 23350 + }, + { + "epoch": 0.1498237838843227, + "grad_norm": 0.5101386904716492, + "learning_rate": 7.491021036428938e-06, + "loss": 0.0068, + "step": 23360 + }, + { + "epoch": 0.1498879207781088, + "grad_norm": 0.30875131487846375, + "learning_rate": 7.494227809132889e-06, + "loss": 0.0096, + "step": 23370 + }, + { + "epoch": 0.1499520576718949, + "grad_norm": 0.4480379819869995, + "learning_rate": 7.49743458183684e-06, + "loss": 0.0082, + "step": 23380 + }, + { + "epoch": 0.150016194565681, + "grad_norm": 0.514462947845459, + "learning_rate": 7.50064135454079e-06, + "loss": 0.013, + "step": 23390 + }, + { + "epoch": 0.1500803314594671, + "grad_norm": 0.4939626455307007, + "learning_rate": 7.5038481272447415e-06, + "loss": 0.0106, + "step": 23400 + }, + { + "epoch": 0.15014446835325318, + "grad_norm": 0.3024665415287018, + "learning_rate": 7.507054899948693e-06, + "loss": 0.0092, + "step": 23410 + }, + { + "epoch": 0.15020860524703927, + "grad_norm": 0.6919910311698914, + "learning_rate": 7.510261672652644e-06, + "loss": 0.0087, + "step": 23420 + }, + { + "epoch": 0.15027274214082537, + "grad_norm": 0.09423622488975525, + "learning_rate": 7.513468445356594e-06, + "loss": 0.0067, + "step": 23430 + }, + { + "epoch": 0.1503368790346115, + "grad_norm": 0.26807406544685364, + "learning_rate": 7.516675218060544e-06, + "loss": 0.0058, + "step": 23440 + }, + { + "epoch": 0.15040101592839758, + "grad_norm": 0.5968019962310791, + "learning_rate": 7.5198819907644946e-06, + "loss": 0.006, + "step": 23450 + }, + { + "epoch": 0.15046515282218367, + "grad_norm": 0.42045214772224426, + "learning_rate": 7.523088763468446e-06, + "loss": 0.0094, + "step": 23460 + }, + { + "epoch": 0.15052928971596977, + "grad_norm": 0.2310306280851364, + "learning_rate": 7.526295536172397e-06, + "loss": 0.0071, + "step": 23470 + }, + { + "epoch": 0.15059342660975586, + "grad_norm": 0.4738796651363373, + "learning_rate": 7.529502308876348e-06, + "loss": 0.0077, + "step": 23480 + }, + { + "epoch": 0.15065756350354195, + "grad_norm": 0.37021204829216003, + "learning_rate": 7.532709081580298e-06, + "loss": 0.0082, + "step": 23490 + }, + { + "epoch": 0.15072170039732805, + "grad_norm": 1.2935945987701416, + "learning_rate": 7.535915854284249e-06, + "loss": 0.0115, + "step": 23500 + }, + { + "epoch": 0.15078583729111417, + "grad_norm": 0.4377444088459015, + "learning_rate": 7.5391226269882e-06, + "loss": 0.0108, + "step": 23510 + }, + { + "epoch": 0.15084997418490026, + "grad_norm": 0.24022506177425385, + "learning_rate": 7.542329399692151e-06, + "loss": 0.008, + "step": 23520 + }, + { + "epoch": 0.15091411107868635, + "grad_norm": 0.38265594840049744, + "learning_rate": 7.545536172396101e-06, + "loss": 0.0082, + "step": 23530 + }, + { + "epoch": 0.15097824797247245, + "grad_norm": 0.14455340802669525, + "learning_rate": 7.548742945100051e-06, + "loss": 0.0107, + "step": 23540 + }, + { + "epoch": 0.15104238486625854, + "grad_norm": 0.37466180324554443, + "learning_rate": 7.551949717804002e-06, + "loss": 0.009, + "step": 23550 + }, + { + "epoch": 0.15110652176004463, + "grad_norm": 0.5194727778434753, + "learning_rate": 7.555156490507953e-06, + "loss": 0.0081, + "step": 23560 + }, + { + "epoch": 0.15117065865383073, + "grad_norm": 0.44248753786087036, + "learning_rate": 7.5583632632119045e-06, + "loss": 0.008, + "step": 23570 + }, + { + "epoch": 0.15123479554761685, + "grad_norm": 0.16237616539001465, + "learning_rate": 7.561570035915855e-06, + "loss": 0.0048, + "step": 23580 + }, + { + "epoch": 0.15129893244140294, + "grad_norm": 0.2832142114639282, + "learning_rate": 7.564776808619806e-06, + "loss": 0.0075, + "step": 23590 + }, + { + "epoch": 0.15136306933518903, + "grad_norm": 0.2390090376138687, + "learning_rate": 7.567983581323757e-06, + "loss": 0.0051, + "step": 23600 + }, + { + "epoch": 0.15142720622897513, + "grad_norm": 0.27855297923088074, + "learning_rate": 7.571190354027707e-06, + "loss": 0.0072, + "step": 23610 + }, + { + "epoch": 0.15149134312276122, + "grad_norm": 0.9485284686088562, + "learning_rate": 7.5743971267316575e-06, + "loss": 0.0068, + "step": 23620 + }, + { + "epoch": 0.1515554800165473, + "grad_norm": 0.44275709986686707, + "learning_rate": 7.577603899435609e-06, + "loss": 0.0063, + "step": 23630 + }, + { + "epoch": 0.1516196169103334, + "grad_norm": 0.5310981273651123, + "learning_rate": 7.580810672139559e-06, + "loss": 0.0071, + "step": 23640 + }, + { + "epoch": 0.15168375380411953, + "grad_norm": 0.5224945545196533, + "learning_rate": 7.58401744484351e-06, + "loss": 0.0101, + "step": 23650 + }, + { + "epoch": 0.15174789069790562, + "grad_norm": 0.202161505818367, + "learning_rate": 7.587224217547461e-06, + "loss": 0.0061, + "step": 23660 + }, + { + "epoch": 0.1518120275916917, + "grad_norm": 0.5099035501480103, + "learning_rate": 7.590430990251411e-06, + "loss": 0.0075, + "step": 23670 + }, + { + "epoch": 0.1518761644854778, + "grad_norm": 0.6963080763816833, + "learning_rate": 7.5936377629553625e-06, + "loss": 0.0073, + "step": 23680 + }, + { + "epoch": 0.1519403013792639, + "grad_norm": 0.5661160349845886, + "learning_rate": 7.596844535659314e-06, + "loss": 0.0101, + "step": 23690 + }, + { + "epoch": 0.15200443827305, + "grad_norm": 0.1545230895280838, + "learning_rate": 7.600051308363264e-06, + "loss": 0.0094, + "step": 23700 + }, + { + "epoch": 0.15206857516683608, + "grad_norm": 0.38300302624702454, + "learning_rate": 7.603258081067214e-06, + "loss": 0.0125, + "step": 23710 + }, + { + "epoch": 0.1521327120606222, + "grad_norm": 0.3603207468986511, + "learning_rate": 7.606464853771165e-06, + "loss": 0.0086, + "step": 23720 + }, + { + "epoch": 0.1521968489544083, + "grad_norm": 0.40290364623069763, + "learning_rate": 7.6096716264751156e-06, + "loss": 0.0059, + "step": 23730 + }, + { + "epoch": 0.1522609858481944, + "grad_norm": 0.4015500545501709, + "learning_rate": 7.612878399179067e-06, + "loss": 0.0065, + "step": 23740 + }, + { + "epoch": 0.15232512274198048, + "grad_norm": 0.19602881371974945, + "learning_rate": 7.616085171883018e-06, + "loss": 0.0063, + "step": 23750 + }, + { + "epoch": 0.15238925963576658, + "grad_norm": 0.4438900947570801, + "learning_rate": 7.619291944586968e-06, + "loss": 0.0064, + "step": 23760 + }, + { + "epoch": 0.15245339652955267, + "grad_norm": 0.30078378319740295, + "learning_rate": 7.622498717290919e-06, + "loss": 0.0067, + "step": 23770 + }, + { + "epoch": 0.15251753342333876, + "grad_norm": 0.13329555094242096, + "learning_rate": 7.62570548999487e-06, + "loss": 0.0071, + "step": 23780 + }, + { + "epoch": 0.15258167031712488, + "grad_norm": 0.38784217834472656, + "learning_rate": 7.628912262698821e-06, + "loss": 0.0067, + "step": 23790 + }, + { + "epoch": 0.15264580721091098, + "grad_norm": 0.4762949049472809, + "learning_rate": 7.63211903540277e-06, + "loss": 0.01, + "step": 23800 + }, + { + "epoch": 0.15270994410469707, + "grad_norm": 0.3756033778190613, + "learning_rate": 7.635325808106721e-06, + "loss": 0.0105, + "step": 23810 + }, + { + "epoch": 0.15277408099848316, + "grad_norm": 0.24478082358837128, + "learning_rate": 7.638532580810672e-06, + "loss": 0.0071, + "step": 23820 + }, + { + "epoch": 0.15283821789226926, + "grad_norm": 0.3203602433204651, + "learning_rate": 7.641739353514623e-06, + "loss": 0.0079, + "step": 23830 + }, + { + "epoch": 0.15290235478605535, + "grad_norm": 0.22945840656757355, + "learning_rate": 7.644946126218574e-06, + "loss": 0.0044, + "step": 23840 + }, + { + "epoch": 0.15296649167984144, + "grad_norm": 0.2977195680141449, + "learning_rate": 7.648152898922526e-06, + "loss": 0.0056, + "step": 23850 + }, + { + "epoch": 0.15303062857362756, + "grad_norm": 0.38903388381004333, + "learning_rate": 7.651359671626477e-06, + "loss": 0.0085, + "step": 23860 + }, + { + "epoch": 0.15309476546741366, + "grad_norm": 0.42640334367752075, + "learning_rate": 7.654566444330426e-06, + "loss": 0.0158, + "step": 23870 + }, + { + "epoch": 0.15315890236119975, + "grad_norm": 0.41411834955215454, + "learning_rate": 7.657773217034377e-06, + "loss": 0.0062, + "step": 23880 + }, + { + "epoch": 0.15322303925498584, + "grad_norm": 0.6199022531509399, + "learning_rate": 7.660979989738328e-06, + "loss": 0.0068, + "step": 23890 + }, + { + "epoch": 0.15328717614877194, + "grad_norm": 0.4416908323764801, + "learning_rate": 7.664186762442278e-06, + "loss": 0.008, + "step": 23900 + }, + { + "epoch": 0.15335131304255803, + "grad_norm": 0.4486631751060486, + "learning_rate": 7.667393535146229e-06, + "loss": 0.0115, + "step": 23910 + }, + { + "epoch": 0.15341544993634412, + "grad_norm": 0.20878660678863525, + "learning_rate": 7.67060030785018e-06, + "loss": 0.0072, + "step": 23920 + }, + { + "epoch": 0.15347958683013024, + "grad_norm": 0.24708037078380585, + "learning_rate": 7.673807080554131e-06, + "loss": 0.0076, + "step": 23930 + }, + { + "epoch": 0.15354372372391634, + "grad_norm": 0.48210886120796204, + "learning_rate": 7.677013853258082e-06, + "loss": 0.007, + "step": 23940 + }, + { + "epoch": 0.15360786061770243, + "grad_norm": 0.2741550803184509, + "learning_rate": 7.680220625962033e-06, + "loss": 0.0128, + "step": 23950 + }, + { + "epoch": 0.15367199751148852, + "grad_norm": 0.39950141310691833, + "learning_rate": 7.683427398665983e-06, + "loss": 0.0064, + "step": 23960 + }, + { + "epoch": 0.15373613440527462, + "grad_norm": 0.5735801458358765, + "learning_rate": 7.686634171369934e-06, + "loss": 0.0088, + "step": 23970 + }, + { + "epoch": 0.1538002712990607, + "grad_norm": 0.512901246547699, + "learning_rate": 7.689840944073885e-06, + "loss": 0.0072, + "step": 23980 + }, + { + "epoch": 0.1538644081928468, + "grad_norm": 0.40003034472465515, + "learning_rate": 7.693047716777834e-06, + "loss": 0.0063, + "step": 23990 + }, + { + "epoch": 0.15392854508663292, + "grad_norm": 0.3761098384857178, + "learning_rate": 7.696254489481785e-06, + "loss": 0.0082, + "step": 24000 + }, + { + "epoch": 0.15399268198041902, + "grad_norm": 0.18713784217834473, + "learning_rate": 7.699461262185737e-06, + "loss": 0.0093, + "step": 24010 + }, + { + "epoch": 0.1540568188742051, + "grad_norm": 0.21950750052928925, + "learning_rate": 7.702668034889688e-06, + "loss": 0.007, + "step": 24020 + }, + { + "epoch": 0.1541209557679912, + "grad_norm": 0.04866539686918259, + "learning_rate": 7.705874807593639e-06, + "loss": 0.0102, + "step": 24030 + }, + { + "epoch": 0.1541850926617773, + "grad_norm": 0.4602527916431427, + "learning_rate": 7.70908158029759e-06, + "loss": 0.0085, + "step": 24040 + }, + { + "epoch": 0.1542492295555634, + "grad_norm": 0.293916791677475, + "learning_rate": 7.712288353001541e-06, + "loss": 0.0079, + "step": 24050 + }, + { + "epoch": 0.15431336644934948, + "grad_norm": 0.3953401446342468, + "learning_rate": 7.71549512570549e-06, + "loss": 0.0079, + "step": 24060 + }, + { + "epoch": 0.15437750334313557, + "grad_norm": 0.5296419858932495, + "learning_rate": 7.718701898409442e-06, + "loss": 0.0112, + "step": 24070 + }, + { + "epoch": 0.1544416402369217, + "grad_norm": 0.3289147615432739, + "learning_rate": 7.721908671113391e-06, + "loss": 0.0078, + "step": 24080 + }, + { + "epoch": 0.1545057771307078, + "grad_norm": 0.3659955561161041, + "learning_rate": 7.725115443817342e-06, + "loss": 0.009, + "step": 24090 + }, + { + "epoch": 0.15456991402449388, + "grad_norm": 0.6170902848243713, + "learning_rate": 7.728322216521293e-06, + "loss": 0.0072, + "step": 24100 + }, + { + "epoch": 0.15463405091827997, + "grad_norm": 0.4635654389858246, + "learning_rate": 7.731528989225244e-06, + "loss": 0.0109, + "step": 24110 + }, + { + "epoch": 0.15469818781206607, + "grad_norm": 0.3954100012779236, + "learning_rate": 7.734735761929195e-06, + "loss": 0.011, + "step": 24120 + }, + { + "epoch": 0.15476232470585216, + "grad_norm": 0.4503888487815857, + "learning_rate": 7.737942534633147e-06, + "loss": 0.0079, + "step": 24130 + }, + { + "epoch": 0.15482646159963825, + "grad_norm": 0.03226611018180847, + "learning_rate": 7.741149307337098e-06, + "loss": 0.0079, + "step": 24140 + }, + { + "epoch": 0.15489059849342438, + "grad_norm": 0.17917682230472565, + "learning_rate": 7.744356080041047e-06, + "loss": 0.0117, + "step": 24150 + }, + { + "epoch": 0.15495473538721047, + "grad_norm": 0.5344177484512329, + "learning_rate": 7.747562852744998e-06, + "loss": 0.0074, + "step": 24160 + }, + { + "epoch": 0.15501887228099656, + "grad_norm": 0.42366012930870056, + "learning_rate": 7.750769625448948e-06, + "loss": 0.007, + "step": 24170 + }, + { + "epoch": 0.15508300917478265, + "grad_norm": 0.2631521224975586, + "learning_rate": 7.753976398152899e-06, + "loss": 0.0077, + "step": 24180 + }, + { + "epoch": 0.15514714606856875, + "grad_norm": 0.6201837062835693, + "learning_rate": 7.75718317085685e-06, + "loss": 0.006, + "step": 24190 + }, + { + "epoch": 0.15521128296235484, + "grad_norm": 0.4369581341743469, + "learning_rate": 7.760389943560801e-06, + "loss": 0.0069, + "step": 24200 + }, + { + "epoch": 0.15527541985614093, + "grad_norm": 0.1973450779914856, + "learning_rate": 7.763596716264752e-06, + "loss": 0.0084, + "step": 24210 + }, + { + "epoch": 0.15533955674992705, + "grad_norm": 0.3332526981830597, + "learning_rate": 7.766803488968703e-06, + "loss": 0.0105, + "step": 24220 + }, + { + "epoch": 0.15540369364371315, + "grad_norm": 0.23551328480243683, + "learning_rate": 7.770010261672654e-06, + "loss": 0.008, + "step": 24230 + }, + { + "epoch": 0.15546783053749924, + "grad_norm": 0.3230539560317993, + "learning_rate": 7.773217034376604e-06, + "loss": 0.0064, + "step": 24240 + }, + { + "epoch": 0.15553196743128533, + "grad_norm": 0.8207498788833618, + "learning_rate": 7.776423807080555e-06, + "loss": 0.0074, + "step": 24250 + }, + { + "epoch": 0.15559610432507143, + "grad_norm": 0.457095742225647, + "learning_rate": 7.779630579784506e-06, + "loss": 0.0074, + "step": 24260 + }, + { + "epoch": 0.15566024121885752, + "grad_norm": 0.8549022078514099, + "learning_rate": 7.782837352488455e-06, + "loss": 0.0081, + "step": 24270 + }, + { + "epoch": 0.1557243781126436, + "grad_norm": 0.39721837639808655, + "learning_rate": 7.786044125192406e-06, + "loss": 0.0062, + "step": 24280 + }, + { + "epoch": 0.15578851500642973, + "grad_norm": 0.2372387945652008, + "learning_rate": 7.789250897896358e-06, + "loss": 0.0101, + "step": 24290 + }, + { + "epoch": 0.15585265190021583, + "grad_norm": 0.3453238308429718, + "learning_rate": 7.792457670600309e-06, + "loss": 0.0066, + "step": 24300 + }, + { + "epoch": 0.15591678879400192, + "grad_norm": 0.70710289478302, + "learning_rate": 7.79566444330426e-06, + "loss": 0.0058, + "step": 24310 + }, + { + "epoch": 0.155980925687788, + "grad_norm": 0.37608903646469116, + "learning_rate": 7.798871216008211e-06, + "loss": 0.0066, + "step": 24320 + }, + { + "epoch": 0.1560450625815741, + "grad_norm": 0.3334524929523468, + "learning_rate": 7.80207798871216e-06, + "loss": 0.0073, + "step": 24330 + }, + { + "epoch": 0.1561091994753602, + "grad_norm": 0.44738757610321045, + "learning_rate": 7.805284761416111e-06, + "loss": 0.0089, + "step": 24340 + }, + { + "epoch": 0.1561733363691463, + "grad_norm": 0.5732530355453491, + "learning_rate": 7.808491534120063e-06, + "loss": 0.0092, + "step": 24350 + }, + { + "epoch": 0.1562374732629324, + "grad_norm": 0.3681492805480957, + "learning_rate": 7.811698306824012e-06, + "loss": 0.0067, + "step": 24360 + }, + { + "epoch": 0.1563016101567185, + "grad_norm": 0.30083394050598145, + "learning_rate": 7.814905079527963e-06, + "loss": 0.0073, + "step": 24370 + }, + { + "epoch": 0.1563657470505046, + "grad_norm": 0.3898124098777771, + "learning_rate": 7.818111852231914e-06, + "loss": 0.0068, + "step": 24380 + }, + { + "epoch": 0.1564298839442907, + "grad_norm": 0.4957955777645111, + "learning_rate": 7.821318624935865e-06, + "loss": 0.0074, + "step": 24390 + }, + { + "epoch": 0.15649402083807679, + "grad_norm": 0.538674533367157, + "learning_rate": 7.824525397639816e-06, + "loss": 0.0076, + "step": 24400 + }, + { + "epoch": 0.15655815773186288, + "grad_norm": 0.510328471660614, + "learning_rate": 7.827732170343768e-06, + "loss": 0.0077, + "step": 24410 + }, + { + "epoch": 0.15662229462564897, + "grad_norm": 0.9383547306060791, + "learning_rate": 7.830938943047719e-06, + "loss": 0.0086, + "step": 24420 + }, + { + "epoch": 0.1566864315194351, + "grad_norm": 0.37724044919013977, + "learning_rate": 7.834145715751668e-06, + "loss": 0.0064, + "step": 24430 + }, + { + "epoch": 0.1567505684132212, + "grad_norm": 0.4641571342945099, + "learning_rate": 7.83735248845562e-06, + "loss": 0.0066, + "step": 24440 + }, + { + "epoch": 0.15681470530700728, + "grad_norm": 0.3275459110736847, + "learning_rate": 7.840559261159569e-06, + "loss": 0.0075, + "step": 24450 + }, + { + "epoch": 0.15687884220079337, + "grad_norm": 0.2929161489009857, + "learning_rate": 7.84376603386352e-06, + "loss": 0.0069, + "step": 24460 + }, + { + "epoch": 0.15694297909457947, + "grad_norm": 0.35929611325263977, + "learning_rate": 7.84697280656747e-06, + "loss": 0.0054, + "step": 24470 + }, + { + "epoch": 0.15700711598836556, + "grad_norm": 0.6709334254264832, + "learning_rate": 7.850179579271422e-06, + "loss": 0.0088, + "step": 24480 + }, + { + "epoch": 0.15707125288215165, + "grad_norm": 0.2960211932659149, + "learning_rate": 7.853386351975373e-06, + "loss": 0.0078, + "step": 24490 + }, + { + "epoch": 0.15713538977593777, + "grad_norm": 0.39208248257637024, + "learning_rate": 7.856593124679324e-06, + "loss": 0.0063, + "step": 24500 + }, + { + "epoch": 0.15719952666972387, + "grad_norm": 0.276226282119751, + "learning_rate": 7.859799897383275e-06, + "loss": 0.005, + "step": 24510 + }, + { + "epoch": 0.15726366356350996, + "grad_norm": 0.20240812003612518, + "learning_rate": 7.863006670087225e-06, + "loss": 0.0146, + "step": 24520 + }, + { + "epoch": 0.15732780045729605, + "grad_norm": 0.5281372666358948, + "learning_rate": 7.866213442791176e-06, + "loss": 0.0092, + "step": 24530 + }, + { + "epoch": 0.15739193735108215, + "grad_norm": 0.750211238861084, + "learning_rate": 7.869420215495125e-06, + "loss": 0.0057, + "step": 24540 + }, + { + "epoch": 0.15745607424486824, + "grad_norm": 0.4016280174255371, + "learning_rate": 7.872626988199076e-06, + "loss": 0.0064, + "step": 24550 + }, + { + "epoch": 0.15752021113865433, + "grad_norm": 0.3035252094268799, + "learning_rate": 7.875833760903027e-06, + "loss": 0.0068, + "step": 24560 + }, + { + "epoch": 0.15758434803244045, + "grad_norm": 0.4220190942287445, + "learning_rate": 7.879040533606979e-06, + "loss": 0.0075, + "step": 24570 + }, + { + "epoch": 0.15764848492622655, + "grad_norm": 0.29820379614830017, + "learning_rate": 7.88224730631093e-06, + "loss": 0.0056, + "step": 24580 + }, + { + "epoch": 0.15771262182001264, + "grad_norm": 0.2260063886642456, + "learning_rate": 7.88545407901488e-06, + "loss": 0.0115, + "step": 24590 + }, + { + "epoch": 0.15777675871379873, + "grad_norm": 0.2958219051361084, + "learning_rate": 7.888660851718832e-06, + "loss": 0.0064, + "step": 24600 + }, + { + "epoch": 0.15784089560758482, + "grad_norm": 0.29102352261543274, + "learning_rate": 7.891867624422781e-06, + "loss": 0.0068, + "step": 24610 + }, + { + "epoch": 0.15790503250137092, + "grad_norm": 0.32563960552215576, + "learning_rate": 7.895074397126732e-06, + "loss": 0.0086, + "step": 24620 + }, + { + "epoch": 0.157969169395157, + "grad_norm": 0.38926222920417786, + "learning_rate": 7.898281169830684e-06, + "loss": 0.0086, + "step": 24630 + }, + { + "epoch": 0.15803330628894313, + "grad_norm": 0.27023231983184814, + "learning_rate": 7.901487942534633e-06, + "loss": 0.0069, + "step": 24640 + }, + { + "epoch": 0.15809744318272922, + "grad_norm": 0.23996005952358246, + "learning_rate": 7.904694715238584e-06, + "loss": 0.0075, + "step": 24650 + }, + { + "epoch": 0.15816158007651532, + "grad_norm": 0.423515647649765, + "learning_rate": 7.907901487942535e-06, + "loss": 0.0064, + "step": 24660 + }, + { + "epoch": 0.1582257169703014, + "grad_norm": 0.37240225076675415, + "learning_rate": 7.911108260646486e-06, + "loss": 0.0076, + "step": 24670 + }, + { + "epoch": 0.1582898538640875, + "grad_norm": 0.41030654311180115, + "learning_rate": 7.914315033350437e-06, + "loss": 0.0103, + "step": 24680 + }, + { + "epoch": 0.1583539907578736, + "grad_norm": 0.5382036566734314, + "learning_rate": 7.917521806054389e-06, + "loss": 0.0061, + "step": 24690 + }, + { + "epoch": 0.1584181276516597, + "grad_norm": 0.29591771960258484, + "learning_rate": 7.920728578758338e-06, + "loss": 0.0078, + "step": 24700 + }, + { + "epoch": 0.1584822645454458, + "grad_norm": 0.580381453037262, + "learning_rate": 7.923935351462289e-06, + "loss": 0.0054, + "step": 24710 + }, + { + "epoch": 0.1585464014392319, + "grad_norm": 0.7036392688751221, + "learning_rate": 7.92714212416624e-06, + "loss": 0.0122, + "step": 24720 + }, + { + "epoch": 0.158610538333018, + "grad_norm": 0.6105961799621582, + "learning_rate": 7.93034889687019e-06, + "loss": 0.0088, + "step": 24730 + }, + { + "epoch": 0.1586746752268041, + "grad_norm": 0.22060944139957428, + "learning_rate": 7.93355566957414e-06, + "loss": 0.0033, + "step": 24740 + }, + { + "epoch": 0.15873881212059018, + "grad_norm": 0.35518988966941833, + "learning_rate": 7.936762442278092e-06, + "loss": 0.0081, + "step": 24750 + }, + { + "epoch": 0.15880294901437628, + "grad_norm": 0.4817931652069092, + "learning_rate": 7.939969214982043e-06, + "loss": 0.0064, + "step": 24760 + }, + { + "epoch": 0.15886708590816237, + "grad_norm": 0.18447251617908478, + "learning_rate": 7.943175987685994e-06, + "loss": 0.0098, + "step": 24770 + }, + { + "epoch": 0.1589312228019485, + "grad_norm": 0.6530489325523376, + "learning_rate": 7.946382760389945e-06, + "loss": 0.0086, + "step": 24780 + }, + { + "epoch": 0.15899535969573458, + "grad_norm": 0.37190836668014526, + "learning_rate": 7.949589533093896e-06, + "loss": 0.0113, + "step": 24790 + }, + { + "epoch": 0.15905949658952068, + "grad_norm": 0.367475688457489, + "learning_rate": 7.952796305797846e-06, + "loss": 0.0055, + "step": 24800 + }, + { + "epoch": 0.15912363348330677, + "grad_norm": 0.3782992362976074, + "learning_rate": 7.956003078501797e-06, + "loss": 0.0038, + "step": 24810 + }, + { + "epoch": 0.15918777037709286, + "grad_norm": 0.08628425747156143, + "learning_rate": 7.959209851205746e-06, + "loss": 0.0073, + "step": 24820 + }, + { + "epoch": 0.15925190727087896, + "grad_norm": 0.3039507269859314, + "learning_rate": 7.962416623909697e-06, + "loss": 0.0071, + "step": 24830 + }, + { + "epoch": 0.15931604416466505, + "grad_norm": 0.3761020004749298, + "learning_rate": 7.965623396613648e-06, + "loss": 0.0097, + "step": 24840 + }, + { + "epoch": 0.15938018105845117, + "grad_norm": 0.2636759877204895, + "learning_rate": 7.9688301693176e-06, + "loss": 0.0072, + "step": 24850 + }, + { + "epoch": 0.15944431795223726, + "grad_norm": 0.16530971229076385, + "learning_rate": 7.97203694202155e-06, + "loss": 0.007, + "step": 24860 + }, + { + "epoch": 0.15950845484602336, + "grad_norm": 0.5165784955024719, + "learning_rate": 7.975243714725502e-06, + "loss": 0.0104, + "step": 24870 + }, + { + "epoch": 0.15957259173980945, + "grad_norm": 0.22670114040374756, + "learning_rate": 7.978450487429453e-06, + "loss": 0.0068, + "step": 24880 + }, + { + "epoch": 0.15963672863359554, + "grad_norm": 0.3174070417881012, + "learning_rate": 7.981657260133402e-06, + "loss": 0.0075, + "step": 24890 + }, + { + "epoch": 0.15970086552738164, + "grad_norm": 0.090137779712677, + "learning_rate": 7.984864032837353e-06, + "loss": 0.0046, + "step": 24900 + }, + { + "epoch": 0.15976500242116773, + "grad_norm": 0.4895990192890167, + "learning_rate": 7.988070805541303e-06, + "loss": 0.0096, + "step": 24910 + }, + { + "epoch": 0.15982913931495385, + "grad_norm": 0.42553743720054626, + "learning_rate": 7.991277578245254e-06, + "loss": 0.0072, + "step": 24920 + }, + { + "epoch": 0.15989327620873994, + "grad_norm": 0.570526123046875, + "learning_rate": 7.994484350949205e-06, + "loss": 0.0089, + "step": 24930 + }, + { + "epoch": 0.15995741310252604, + "grad_norm": 0.15483412146568298, + "learning_rate": 7.997691123653156e-06, + "loss": 0.0051, + "step": 24940 + }, + { + "epoch": 0.16002154999631213, + "grad_norm": 0.2970651686191559, + "learning_rate": 8.000897896357107e-06, + "loss": 0.0093, + "step": 24950 + }, + { + "epoch": 0.16008568689009822, + "grad_norm": 0.6350005269050598, + "learning_rate": 8.004104669061058e-06, + "loss": 0.0055, + "step": 24960 + }, + { + "epoch": 0.16014982378388432, + "grad_norm": 0.2805918753147125, + "learning_rate": 8.00731144176501e-06, + "loss": 0.0089, + "step": 24970 + }, + { + "epoch": 0.1602139606776704, + "grad_norm": 1.4337726831436157, + "learning_rate": 8.010518214468959e-06, + "loss": 0.0081, + "step": 24980 + }, + { + "epoch": 0.16027809757145653, + "grad_norm": 0.1360992044210434, + "learning_rate": 8.01372498717291e-06, + "loss": 0.0077, + "step": 24990 + }, + { + "epoch": 0.16034223446524262, + "grad_norm": 0.3743704557418823, + "learning_rate": 8.016931759876861e-06, + "loss": 0.0069, + "step": 25000 + }, + { + "epoch": 0.16040637135902872, + "grad_norm": 0.25996172428131104, + "learning_rate": 8.02013853258081e-06, + "loss": 0.0095, + "step": 25010 + }, + { + "epoch": 0.1604705082528148, + "grad_norm": 0.3653346598148346, + "learning_rate": 8.023345305284762e-06, + "loss": 0.0068, + "step": 25020 + }, + { + "epoch": 0.1605346451466009, + "grad_norm": 0.2275839000940323, + "learning_rate": 8.026552077988713e-06, + "loss": 0.0095, + "step": 25030 + }, + { + "epoch": 0.160598782040387, + "grad_norm": 0.4239732027053833, + "learning_rate": 8.029758850692664e-06, + "loss": 0.0111, + "step": 25040 + }, + { + "epoch": 0.1606629189341731, + "grad_norm": 0.3384075164794922, + "learning_rate": 8.032965623396615e-06, + "loss": 0.0076, + "step": 25050 + }, + { + "epoch": 0.1607270558279592, + "grad_norm": 0.2895222306251526, + "learning_rate": 8.036172396100566e-06, + "loss": 0.0097, + "step": 25060 + }, + { + "epoch": 0.1607911927217453, + "grad_norm": 0.16481779515743256, + "learning_rate": 8.039379168804516e-06, + "loss": 0.0097, + "step": 25070 + }, + { + "epoch": 0.1608553296155314, + "grad_norm": 0.4353906512260437, + "learning_rate": 8.042585941508467e-06, + "loss": 0.0131, + "step": 25080 + }, + { + "epoch": 0.1609194665093175, + "grad_norm": 0.4619003236293793, + "learning_rate": 8.045792714212418e-06, + "loss": 0.0078, + "step": 25090 + }, + { + "epoch": 0.16098360340310358, + "grad_norm": 0.4785225987434387, + "learning_rate": 8.048999486916367e-06, + "loss": 0.0074, + "step": 25100 + }, + { + "epoch": 0.16104774029688967, + "grad_norm": 0.3400071859359741, + "learning_rate": 8.052206259620318e-06, + "loss": 0.0099, + "step": 25110 + }, + { + "epoch": 0.16111187719067577, + "grad_norm": 0.212716743350029, + "learning_rate": 8.05541303232427e-06, + "loss": 0.0109, + "step": 25120 + }, + { + "epoch": 0.1611760140844619, + "grad_norm": 0.13593250513076782, + "learning_rate": 8.05861980502822e-06, + "loss": 0.0071, + "step": 25130 + }, + { + "epoch": 0.16124015097824798, + "grad_norm": 0.3976585865020752, + "learning_rate": 8.061826577732172e-06, + "loss": 0.0098, + "step": 25140 + }, + { + "epoch": 0.16130428787203407, + "grad_norm": 0.4405446946620941, + "learning_rate": 8.065033350436123e-06, + "loss": 0.0065, + "step": 25150 + }, + { + "epoch": 0.16136842476582017, + "grad_norm": 0.4052540063858032, + "learning_rate": 8.068240123140072e-06, + "loss": 0.0068, + "step": 25160 + }, + { + "epoch": 0.16143256165960626, + "grad_norm": 0.48169589042663574, + "learning_rate": 8.071446895844023e-06, + "loss": 0.0094, + "step": 25170 + }, + { + "epoch": 0.16149669855339235, + "grad_norm": 0.6314558386802673, + "learning_rate": 8.074653668547974e-06, + "loss": 0.0096, + "step": 25180 + }, + { + "epoch": 0.16156083544717845, + "grad_norm": 0.3743741810321808, + "learning_rate": 8.077860441251924e-06, + "loss": 0.009, + "step": 25190 + }, + { + "epoch": 0.16162497234096457, + "grad_norm": 0.49911192059516907, + "learning_rate": 8.081067213955875e-06, + "loss": 0.0084, + "step": 25200 + }, + { + "epoch": 0.16168910923475066, + "grad_norm": 0.3587149381637573, + "learning_rate": 8.084273986659826e-06, + "loss": 0.0078, + "step": 25210 + }, + { + "epoch": 0.16175324612853675, + "grad_norm": 0.38517385721206665, + "learning_rate": 8.087480759363777e-06, + "loss": 0.012, + "step": 25220 + }, + { + "epoch": 0.16181738302232285, + "grad_norm": 0.62278813123703, + "learning_rate": 8.090687532067728e-06, + "loss": 0.0114, + "step": 25230 + }, + { + "epoch": 0.16188151991610894, + "grad_norm": 0.4665565490722656, + "learning_rate": 8.09389430477168e-06, + "loss": 0.0067, + "step": 25240 + }, + { + "epoch": 0.16194565680989503, + "grad_norm": 0.2966174781322479, + "learning_rate": 8.097101077475629e-06, + "loss": 0.008, + "step": 25250 + }, + { + "epoch": 0.16200979370368113, + "grad_norm": 0.6743813753128052, + "learning_rate": 8.10030785017958e-06, + "loss": 0.007, + "step": 25260 + }, + { + "epoch": 0.16207393059746725, + "grad_norm": 0.5041316151618958, + "learning_rate": 8.103514622883531e-06, + "loss": 0.0064, + "step": 25270 + }, + { + "epoch": 0.16213806749125334, + "grad_norm": 0.20831072330474854, + "learning_rate": 8.10672139558748e-06, + "loss": 0.0062, + "step": 25280 + }, + { + "epoch": 0.16220220438503943, + "grad_norm": 0.3768036365509033, + "learning_rate": 8.109928168291432e-06, + "loss": 0.0061, + "step": 25290 + }, + { + "epoch": 0.16226634127882553, + "grad_norm": 0.32609325647354126, + "learning_rate": 8.113134940995383e-06, + "loss": 0.01, + "step": 25300 + }, + { + "epoch": 0.16233047817261162, + "grad_norm": 0.4458755552768707, + "learning_rate": 8.116341713699334e-06, + "loss": 0.0069, + "step": 25310 + }, + { + "epoch": 0.1623946150663977, + "grad_norm": 0.30376890301704407, + "learning_rate": 8.119548486403285e-06, + "loss": 0.0055, + "step": 25320 + }, + { + "epoch": 0.1624587519601838, + "grad_norm": 0.3464112877845764, + "learning_rate": 8.122755259107236e-06, + "loss": 0.0078, + "step": 25330 + }, + { + "epoch": 0.16252288885396993, + "grad_norm": 0.2156023532152176, + "learning_rate": 8.125962031811185e-06, + "loss": 0.0102, + "step": 25340 + }, + { + "epoch": 0.16258702574775602, + "grad_norm": 0.761702835559845, + "learning_rate": 8.129168804515137e-06, + "loss": 0.0064, + "step": 25350 + }, + { + "epoch": 0.1626511626415421, + "grad_norm": 0.150650754570961, + "learning_rate": 8.132375577219088e-06, + "loss": 0.0059, + "step": 25360 + }, + { + "epoch": 0.1627152995353282, + "grad_norm": 0.4692308008670807, + "learning_rate": 8.135582349923039e-06, + "loss": 0.01, + "step": 25370 + }, + { + "epoch": 0.1627794364291143, + "grad_norm": 0.4848043918609619, + "learning_rate": 8.138789122626988e-06, + "loss": 0.0068, + "step": 25380 + }, + { + "epoch": 0.1628435733229004, + "grad_norm": 0.23428279161453247, + "learning_rate": 8.14199589533094e-06, + "loss": 0.0076, + "step": 25390 + }, + { + "epoch": 0.16290771021668649, + "grad_norm": 0.7011100053787231, + "learning_rate": 8.14520266803489e-06, + "loss": 0.0074, + "step": 25400 + }, + { + "epoch": 0.1629718471104726, + "grad_norm": 0.2428915947675705, + "learning_rate": 8.148409440738842e-06, + "loss": 0.0054, + "step": 25410 + }, + { + "epoch": 0.1630359840042587, + "grad_norm": 0.25832948088645935, + "learning_rate": 8.151616213442793e-06, + "loss": 0.008, + "step": 25420 + }, + { + "epoch": 0.1631001208980448, + "grad_norm": 0.5887409448623657, + "learning_rate": 8.154822986146742e-06, + "loss": 0.006, + "step": 25430 + }, + { + "epoch": 0.16316425779183089, + "grad_norm": 0.31074729561805725, + "learning_rate": 8.158029758850693e-06, + "loss": 0.0048, + "step": 25440 + }, + { + "epoch": 0.16322839468561698, + "grad_norm": 0.21727532148361206, + "learning_rate": 8.161236531554644e-06, + "loss": 0.0064, + "step": 25450 + }, + { + "epoch": 0.16329253157940307, + "grad_norm": 0.26646608114242554, + "learning_rate": 8.164443304258595e-06, + "loss": 0.0085, + "step": 25460 + }, + { + "epoch": 0.16335666847318916, + "grad_norm": 0.6402791738510132, + "learning_rate": 8.167650076962545e-06, + "loss": 0.0065, + "step": 25470 + }, + { + "epoch": 0.16342080536697529, + "grad_norm": 0.2471948117017746, + "learning_rate": 8.170856849666496e-06, + "loss": 0.006, + "step": 25480 + }, + { + "epoch": 0.16348494226076138, + "grad_norm": 0.36383992433547974, + "learning_rate": 8.174063622370447e-06, + "loss": 0.007, + "step": 25490 + }, + { + "epoch": 0.16354907915454747, + "grad_norm": 0.3720337450504303, + "learning_rate": 8.177270395074398e-06, + "loss": 0.0084, + "step": 25500 + }, + { + "epoch": 0.16361321604833357, + "grad_norm": 0.42418551445007324, + "learning_rate": 8.18047716777835e-06, + "loss": 0.0088, + "step": 25510 + }, + { + "epoch": 0.16367735294211966, + "grad_norm": 0.35232800245285034, + "learning_rate": 8.183683940482299e-06, + "loss": 0.0063, + "step": 25520 + }, + { + "epoch": 0.16374148983590575, + "grad_norm": 0.34052902460098267, + "learning_rate": 8.18689071318625e-06, + "loss": 0.0083, + "step": 25530 + }, + { + "epoch": 0.16380562672969184, + "grad_norm": 0.43512094020843506, + "learning_rate": 8.190097485890201e-06, + "loss": 0.0104, + "step": 25540 + }, + { + "epoch": 0.16386976362347794, + "grad_norm": 0.548888087272644, + "learning_rate": 8.193304258594152e-06, + "loss": 0.0119, + "step": 25550 + }, + { + "epoch": 0.16393390051726406, + "grad_norm": 0.48956605792045593, + "learning_rate": 8.196511031298101e-06, + "loss": 0.0064, + "step": 25560 + }, + { + "epoch": 0.16399803741105015, + "grad_norm": 0.20779581367969513, + "learning_rate": 8.199717804002053e-06, + "loss": 0.0075, + "step": 25570 + }, + { + "epoch": 0.16406217430483624, + "grad_norm": 0.3951849937438965, + "learning_rate": 8.202924576706004e-06, + "loss": 0.0105, + "step": 25580 + }, + { + "epoch": 0.16412631119862234, + "grad_norm": 0.31702741980552673, + "learning_rate": 8.206131349409955e-06, + "loss": 0.0081, + "step": 25590 + }, + { + "epoch": 0.16419044809240843, + "grad_norm": 0.2765990197658539, + "learning_rate": 8.209338122113906e-06, + "loss": 0.0114, + "step": 25600 + }, + { + "epoch": 0.16425458498619452, + "grad_norm": 0.31762373447418213, + "learning_rate": 8.212544894817855e-06, + "loss": 0.0052, + "step": 25610 + }, + { + "epoch": 0.16431872187998062, + "grad_norm": 0.3267388939857483, + "learning_rate": 8.215751667521806e-06, + "loss": 0.0101, + "step": 25620 + }, + { + "epoch": 0.16438285877376674, + "grad_norm": 0.5397911071777344, + "learning_rate": 8.218958440225758e-06, + "loss": 0.0071, + "step": 25630 + }, + { + "epoch": 0.16444699566755283, + "grad_norm": 0.5041503310203552, + "learning_rate": 8.222165212929709e-06, + "loss": 0.0094, + "step": 25640 + }, + { + "epoch": 0.16451113256133892, + "grad_norm": 0.2852059602737427, + "learning_rate": 8.225371985633658e-06, + "loss": 0.0059, + "step": 25650 + }, + { + "epoch": 0.16457526945512502, + "grad_norm": 0.3237536549568176, + "learning_rate": 8.22857875833761e-06, + "loss": 0.0052, + "step": 25660 + }, + { + "epoch": 0.1646394063489111, + "grad_norm": 0.4522680342197418, + "learning_rate": 8.23178553104156e-06, + "loss": 0.0081, + "step": 25670 + }, + { + "epoch": 0.1647035432426972, + "grad_norm": 0.24704231321811676, + "learning_rate": 8.234992303745511e-06, + "loss": 0.0079, + "step": 25680 + }, + { + "epoch": 0.1647676801364833, + "grad_norm": 0.5617715120315552, + "learning_rate": 8.238199076449463e-06, + "loss": 0.0092, + "step": 25690 + }, + { + "epoch": 0.16483181703026942, + "grad_norm": 0.32703539729118347, + "learning_rate": 8.241405849153412e-06, + "loss": 0.0086, + "step": 25700 + }, + { + "epoch": 0.1648959539240555, + "grad_norm": 0.3462812900543213, + "learning_rate": 8.244612621857363e-06, + "loss": 0.0074, + "step": 25710 + }, + { + "epoch": 0.1649600908178416, + "grad_norm": 0.24970732629299164, + "learning_rate": 8.247819394561314e-06, + "loss": 0.0091, + "step": 25720 + }, + { + "epoch": 0.1650242277116277, + "grad_norm": 0.3338163495063782, + "learning_rate": 8.251026167265265e-06, + "loss": 0.0074, + "step": 25730 + }, + { + "epoch": 0.1650883646054138, + "grad_norm": 0.20319034159183502, + "learning_rate": 8.254232939969216e-06, + "loss": 0.0088, + "step": 25740 + }, + { + "epoch": 0.16515250149919988, + "grad_norm": 0.4668441414833069, + "learning_rate": 8.257439712673166e-06, + "loss": 0.0056, + "step": 25750 + }, + { + "epoch": 0.16521663839298598, + "grad_norm": 0.3017856776714325, + "learning_rate": 8.260646485377117e-06, + "loss": 0.0099, + "step": 25760 + }, + { + "epoch": 0.1652807752867721, + "grad_norm": 0.6447399258613586, + "learning_rate": 8.263853258081068e-06, + "loss": 0.0089, + "step": 25770 + }, + { + "epoch": 0.1653449121805582, + "grad_norm": 0.5781249403953552, + "learning_rate": 8.26706003078502e-06, + "loss": 0.0089, + "step": 25780 + }, + { + "epoch": 0.16540904907434428, + "grad_norm": 0.20946453511714935, + "learning_rate": 8.27026680348897e-06, + "loss": 0.0063, + "step": 25790 + }, + { + "epoch": 0.16547318596813038, + "grad_norm": 0.4722101390361786, + "learning_rate": 8.27347357619292e-06, + "loss": 0.0086, + "step": 25800 + }, + { + "epoch": 0.16553732286191647, + "grad_norm": 0.284199595451355, + "learning_rate": 8.27668034889687e-06, + "loss": 0.0065, + "step": 25810 + }, + { + "epoch": 0.16560145975570256, + "grad_norm": 0.38956043124198914, + "learning_rate": 8.279887121600822e-06, + "loss": 0.0087, + "step": 25820 + }, + { + "epoch": 0.16566559664948866, + "grad_norm": 0.32880067825317383, + "learning_rate": 8.283093894304773e-06, + "loss": 0.0071, + "step": 25830 + }, + { + "epoch": 0.16572973354327478, + "grad_norm": 0.5591426491737366, + "learning_rate": 8.286300667008722e-06, + "loss": 0.0068, + "step": 25840 + }, + { + "epoch": 0.16579387043706087, + "grad_norm": 0.2781529724597931, + "learning_rate": 8.289507439712674e-06, + "loss": 0.0054, + "step": 25850 + }, + { + "epoch": 0.16585800733084696, + "grad_norm": 0.6771969795227051, + "learning_rate": 8.292714212416625e-06, + "loss": 0.0089, + "step": 25860 + }, + { + "epoch": 0.16592214422463306, + "grad_norm": 0.2917311489582062, + "learning_rate": 8.295920985120576e-06, + "loss": 0.0068, + "step": 25870 + }, + { + "epoch": 0.16598628111841915, + "grad_norm": 0.12180343270301819, + "learning_rate": 8.299127757824527e-06, + "loss": 0.0088, + "step": 25880 + }, + { + "epoch": 0.16605041801220524, + "grad_norm": 0.2622659206390381, + "learning_rate": 8.302334530528476e-06, + "loss": 0.0073, + "step": 25890 + }, + { + "epoch": 0.16611455490599134, + "grad_norm": 0.5130379796028137, + "learning_rate": 8.305541303232427e-06, + "loss": 0.0096, + "step": 25900 + }, + { + "epoch": 0.16617869179977746, + "grad_norm": 0.3362933397293091, + "learning_rate": 8.308748075936379e-06, + "loss": 0.0059, + "step": 25910 + }, + { + "epoch": 0.16624282869356355, + "grad_norm": 0.4755385220050812, + "learning_rate": 8.31195484864033e-06, + "loss": 0.0123, + "step": 25920 + }, + { + "epoch": 0.16630696558734964, + "grad_norm": 0.35895073413848877, + "learning_rate": 8.315161621344279e-06, + "loss": 0.0078, + "step": 25930 + }, + { + "epoch": 0.16637110248113574, + "grad_norm": 0.35405483841896057, + "learning_rate": 8.31836839404823e-06, + "loss": 0.0075, + "step": 25940 + }, + { + "epoch": 0.16643523937492183, + "grad_norm": 0.3995170295238495, + "learning_rate": 8.321575166752181e-06, + "loss": 0.007, + "step": 25950 + }, + { + "epoch": 0.16649937626870792, + "grad_norm": 0.3175564706325531, + "learning_rate": 8.324781939456132e-06, + "loss": 0.0061, + "step": 25960 + }, + { + "epoch": 0.16656351316249401, + "grad_norm": 0.4694744944572449, + "learning_rate": 8.327988712160084e-06, + "loss": 0.0072, + "step": 25970 + }, + { + "epoch": 0.16662765005628014, + "grad_norm": 0.3355288505554199, + "learning_rate": 8.331195484864033e-06, + "loss": 0.0064, + "step": 25980 + }, + { + "epoch": 0.16669178695006623, + "grad_norm": 0.4550934135913849, + "learning_rate": 8.334402257567984e-06, + "loss": 0.0066, + "step": 25990 + }, + { + "epoch": 0.16675592384385232, + "grad_norm": 0.47918909788131714, + "learning_rate": 8.337609030271935e-06, + "loss": 0.0088, + "step": 26000 + }, + { + "epoch": 0.16682006073763841, + "grad_norm": 0.4235347807407379, + "learning_rate": 8.340815802975886e-06, + "loss": 0.0064, + "step": 26010 + }, + { + "epoch": 0.1668841976314245, + "grad_norm": 0.45766931772232056, + "learning_rate": 8.344022575679836e-06, + "loss": 0.0066, + "step": 26020 + }, + { + "epoch": 0.1669483345252106, + "grad_norm": 0.5351958274841309, + "learning_rate": 8.347229348383787e-06, + "loss": 0.0076, + "step": 26030 + }, + { + "epoch": 0.1670124714189967, + "grad_norm": 0.6322855949401855, + "learning_rate": 8.350436121087738e-06, + "loss": 0.0095, + "step": 26040 + }, + { + "epoch": 0.16707660831278281, + "grad_norm": 0.3424733281135559, + "learning_rate": 8.353642893791689e-06, + "loss": 0.0071, + "step": 26050 + }, + { + "epoch": 0.1671407452065689, + "grad_norm": 0.13436731696128845, + "learning_rate": 8.35684966649564e-06, + "loss": 0.0064, + "step": 26060 + }, + { + "epoch": 0.167204882100355, + "grad_norm": 0.4394909143447876, + "learning_rate": 8.36005643919959e-06, + "loss": 0.0061, + "step": 26070 + }, + { + "epoch": 0.1672690189941411, + "grad_norm": 0.7153190970420837, + "learning_rate": 8.36326321190354e-06, + "loss": 0.0057, + "step": 26080 + }, + { + "epoch": 0.1673331558879272, + "grad_norm": 0.43987026810646057, + "learning_rate": 8.366469984607492e-06, + "loss": 0.0113, + "step": 26090 + }, + { + "epoch": 0.16739729278171328, + "grad_norm": 0.2604074776172638, + "learning_rate": 8.369676757311443e-06, + "loss": 0.0078, + "step": 26100 + }, + { + "epoch": 0.16746142967549937, + "grad_norm": 0.41117510199546814, + "learning_rate": 8.372883530015392e-06, + "loss": 0.0123, + "step": 26110 + }, + { + "epoch": 0.1675255665692855, + "grad_norm": 0.5456597805023193, + "learning_rate": 8.376090302719343e-06, + "loss": 0.006, + "step": 26120 + }, + { + "epoch": 0.1675897034630716, + "grad_norm": 0.32660984992980957, + "learning_rate": 8.379297075423295e-06, + "loss": 0.0091, + "step": 26130 + }, + { + "epoch": 0.16765384035685768, + "grad_norm": 0.3434271514415741, + "learning_rate": 8.382503848127246e-06, + "loss": 0.0081, + "step": 26140 + }, + { + "epoch": 0.16771797725064377, + "grad_norm": 0.4495420455932617, + "learning_rate": 8.385710620831197e-06, + "loss": 0.0149, + "step": 26150 + }, + { + "epoch": 0.16778211414442987, + "grad_norm": 0.14051438868045807, + "learning_rate": 8.388917393535146e-06, + "loss": 0.0075, + "step": 26160 + }, + { + "epoch": 0.16784625103821596, + "grad_norm": 0.3010246455669403, + "learning_rate": 8.392124166239097e-06, + "loss": 0.0077, + "step": 26170 + }, + { + "epoch": 0.16791038793200205, + "grad_norm": 0.4757477939128876, + "learning_rate": 8.395330938943048e-06, + "loss": 0.0075, + "step": 26180 + }, + { + "epoch": 0.16797452482578817, + "grad_norm": 0.40908652544021606, + "learning_rate": 8.398537711647e-06, + "loss": 0.0111, + "step": 26190 + }, + { + "epoch": 0.16803866171957427, + "grad_norm": 0.5626053214073181, + "learning_rate": 8.40174448435095e-06, + "loss": 0.0057, + "step": 26200 + }, + { + "epoch": 0.16810279861336036, + "grad_norm": 0.5374211668968201, + "learning_rate": 8.4049512570549e-06, + "loss": 0.0068, + "step": 26210 + }, + { + "epoch": 0.16816693550714645, + "grad_norm": 0.5747615694999695, + "learning_rate": 8.408158029758851e-06, + "loss": 0.0065, + "step": 26220 + }, + { + "epoch": 0.16823107240093255, + "grad_norm": 0.3368001878261566, + "learning_rate": 8.411364802462802e-06, + "loss": 0.0073, + "step": 26230 + }, + { + "epoch": 0.16829520929471864, + "grad_norm": 0.29381346702575684, + "learning_rate": 8.414571575166753e-06, + "loss": 0.0101, + "step": 26240 + }, + { + "epoch": 0.16835934618850473, + "grad_norm": 0.1899987906217575, + "learning_rate": 8.417778347870703e-06, + "loss": 0.006, + "step": 26250 + }, + { + "epoch": 0.16842348308229085, + "grad_norm": 0.29310908913612366, + "learning_rate": 8.420985120574654e-06, + "loss": 0.0079, + "step": 26260 + }, + { + "epoch": 0.16848761997607695, + "grad_norm": 0.33029741048812866, + "learning_rate": 8.424191893278605e-06, + "loss": 0.0076, + "step": 26270 + }, + { + "epoch": 0.16855175686986304, + "grad_norm": 0.4537966549396515, + "learning_rate": 8.427398665982556e-06, + "loss": 0.0033, + "step": 26280 + }, + { + "epoch": 0.16861589376364913, + "grad_norm": 0.1576673835515976, + "learning_rate": 8.430605438686507e-06, + "loss": 0.0047, + "step": 26290 + }, + { + "epoch": 0.16868003065743523, + "grad_norm": 0.40747445821762085, + "learning_rate": 8.433812211390457e-06, + "loss": 0.0053, + "step": 26300 + }, + { + "epoch": 0.16874416755122132, + "grad_norm": 0.49658462405204773, + "learning_rate": 8.437018984094408e-06, + "loss": 0.0113, + "step": 26310 + }, + { + "epoch": 0.1688083044450074, + "grad_norm": 0.4060763418674469, + "learning_rate": 8.440225756798359e-06, + "loss": 0.0079, + "step": 26320 + }, + { + "epoch": 0.16887244133879353, + "grad_norm": 0.31417739391326904, + "learning_rate": 8.44343252950231e-06, + "loss": 0.0086, + "step": 26330 + }, + { + "epoch": 0.16893657823257963, + "grad_norm": 0.36448004841804504, + "learning_rate": 8.44663930220626e-06, + "loss": 0.0073, + "step": 26340 + }, + { + "epoch": 0.16900071512636572, + "grad_norm": 0.3465425968170166, + "learning_rate": 8.44984607491021e-06, + "loss": 0.006, + "step": 26350 + }, + { + "epoch": 0.1690648520201518, + "grad_norm": 0.4072333574295044, + "learning_rate": 8.453052847614162e-06, + "loss": 0.0061, + "step": 26360 + }, + { + "epoch": 0.1691289889139379, + "grad_norm": 0.2462216317653656, + "learning_rate": 8.456259620318113e-06, + "loss": 0.0056, + "step": 26370 + }, + { + "epoch": 0.169193125807724, + "grad_norm": 0.46497079730033875, + "learning_rate": 8.459466393022064e-06, + "loss": 0.0062, + "step": 26380 + }, + { + "epoch": 0.1692572627015101, + "grad_norm": 0.33011871576309204, + "learning_rate": 8.462673165726013e-06, + "loss": 0.0083, + "step": 26390 + }, + { + "epoch": 0.1693213995952962, + "grad_norm": 0.577763020992279, + "learning_rate": 8.465879938429964e-06, + "loss": 0.0064, + "step": 26400 + }, + { + "epoch": 0.1693855364890823, + "grad_norm": 0.34762248396873474, + "learning_rate": 8.469086711133916e-06, + "loss": 0.0075, + "step": 26410 + }, + { + "epoch": 0.1694496733828684, + "grad_norm": 0.47082164883613586, + "learning_rate": 8.472293483837867e-06, + "loss": 0.0056, + "step": 26420 + }, + { + "epoch": 0.1695138102766545, + "grad_norm": 0.7073124051094055, + "learning_rate": 8.475500256541816e-06, + "loss": 0.0085, + "step": 26430 + }, + { + "epoch": 0.16957794717044058, + "grad_norm": 0.5106498003005981, + "learning_rate": 8.478707029245767e-06, + "loss": 0.0131, + "step": 26440 + }, + { + "epoch": 0.16964208406422668, + "grad_norm": 0.6935898661613464, + "learning_rate": 8.481913801949718e-06, + "loss": 0.0064, + "step": 26450 + }, + { + "epoch": 0.16970622095801277, + "grad_norm": 0.45264732837677, + "learning_rate": 8.48512057465367e-06, + "loss": 0.0056, + "step": 26460 + }, + { + "epoch": 0.1697703578517989, + "grad_norm": 0.38825467228889465, + "learning_rate": 8.48832734735762e-06, + "loss": 0.0053, + "step": 26470 + }, + { + "epoch": 0.16983449474558499, + "grad_norm": 0.7266507744789124, + "learning_rate": 8.49153412006157e-06, + "loss": 0.0095, + "step": 26480 + }, + { + "epoch": 0.16989863163937108, + "grad_norm": 0.07236064970493317, + "learning_rate": 8.494740892765521e-06, + "loss": 0.0061, + "step": 26490 + }, + { + "epoch": 0.16996276853315717, + "grad_norm": 0.1668139547109604, + "learning_rate": 8.497947665469472e-06, + "loss": 0.0064, + "step": 26500 + }, + { + "epoch": 0.17002690542694326, + "grad_norm": 0.33534303307533264, + "learning_rate": 8.501154438173423e-06, + "loss": 0.0069, + "step": 26510 + }, + { + "epoch": 0.17009104232072936, + "grad_norm": 0.2349112182855606, + "learning_rate": 8.504361210877373e-06, + "loss": 0.008, + "step": 26520 + }, + { + "epoch": 0.17015517921451545, + "grad_norm": 0.4541345536708832, + "learning_rate": 8.507567983581324e-06, + "loss": 0.0078, + "step": 26530 + }, + { + "epoch": 0.17021931610830157, + "grad_norm": 0.39967626333236694, + "learning_rate": 8.510774756285275e-06, + "loss": 0.0063, + "step": 26540 + }, + { + "epoch": 0.17028345300208766, + "grad_norm": 0.7688847780227661, + "learning_rate": 8.513981528989226e-06, + "loss": 0.0085, + "step": 26550 + }, + { + "epoch": 0.17034758989587376, + "grad_norm": 0.3346398174762726, + "learning_rate": 8.517188301693177e-06, + "loss": 0.006, + "step": 26560 + }, + { + "epoch": 0.17041172678965985, + "grad_norm": 0.6446905732154846, + "learning_rate": 8.520395074397128e-06, + "loss": 0.009, + "step": 26570 + }, + { + "epoch": 0.17047586368344594, + "grad_norm": 0.38544151186943054, + "learning_rate": 8.523601847101078e-06, + "loss": 0.0089, + "step": 26580 + }, + { + "epoch": 0.17054000057723204, + "grad_norm": 0.24830207228660583, + "learning_rate": 8.526808619805029e-06, + "loss": 0.0113, + "step": 26590 + }, + { + "epoch": 0.17060413747101813, + "grad_norm": 0.40618616342544556, + "learning_rate": 8.53001539250898e-06, + "loss": 0.0049, + "step": 26600 + }, + { + "epoch": 0.17066827436480425, + "grad_norm": 0.2531229555606842, + "learning_rate": 8.53322216521293e-06, + "loss": 0.0065, + "step": 26610 + }, + { + "epoch": 0.17073241125859034, + "grad_norm": 0.17107687890529633, + "learning_rate": 8.53642893791688e-06, + "loss": 0.0055, + "step": 26620 + }, + { + "epoch": 0.17079654815237644, + "grad_norm": 0.674441933631897, + "learning_rate": 8.539635710620832e-06, + "loss": 0.0084, + "step": 26630 + }, + { + "epoch": 0.17086068504616253, + "grad_norm": 0.5298900604248047, + "learning_rate": 8.542842483324783e-06, + "loss": 0.0073, + "step": 26640 + }, + { + "epoch": 0.17092482193994862, + "grad_norm": 0.13486960530281067, + "learning_rate": 8.546049256028734e-06, + "loss": 0.0063, + "step": 26650 + }, + { + "epoch": 0.17098895883373472, + "grad_norm": 0.2705175280570984, + "learning_rate": 8.549256028732685e-06, + "loss": 0.0089, + "step": 26660 + }, + { + "epoch": 0.1710530957275208, + "grad_norm": 0.34008297324180603, + "learning_rate": 8.552462801436634e-06, + "loss": 0.0105, + "step": 26670 + }, + { + "epoch": 0.17111723262130693, + "grad_norm": 0.20941926538944244, + "learning_rate": 8.555669574140585e-06, + "loss": 0.0046, + "step": 26680 + }, + { + "epoch": 0.17118136951509302, + "grad_norm": 0.3021007478237152, + "learning_rate": 8.558876346844537e-06, + "loss": 0.007, + "step": 26690 + }, + { + "epoch": 0.17124550640887912, + "grad_norm": 0.5261565446853638, + "learning_rate": 8.562083119548486e-06, + "loss": 0.0107, + "step": 26700 + }, + { + "epoch": 0.1713096433026652, + "grad_norm": 0.34425294399261475, + "learning_rate": 8.565289892252437e-06, + "loss": 0.0067, + "step": 26710 + }, + { + "epoch": 0.1713737801964513, + "grad_norm": 0.09715772420167923, + "learning_rate": 8.568496664956388e-06, + "loss": 0.005, + "step": 26720 + }, + { + "epoch": 0.1714379170902374, + "grad_norm": 0.4063803553581238, + "learning_rate": 8.57170343766034e-06, + "loss": 0.0101, + "step": 26730 + }, + { + "epoch": 0.1715020539840235, + "grad_norm": 0.21971246600151062, + "learning_rate": 8.57491021036429e-06, + "loss": 0.0075, + "step": 26740 + }, + { + "epoch": 0.1715661908778096, + "grad_norm": 0.1712648570537567, + "learning_rate": 8.578116983068242e-06, + "loss": 0.005, + "step": 26750 + }, + { + "epoch": 0.1716303277715957, + "grad_norm": 0.19962908327579498, + "learning_rate": 8.581323755772191e-06, + "loss": 0.012, + "step": 26760 + }, + { + "epoch": 0.1716944646653818, + "grad_norm": 0.3044357895851135, + "learning_rate": 8.584530528476142e-06, + "loss": 0.0063, + "step": 26770 + }, + { + "epoch": 0.1717586015591679, + "grad_norm": 0.3212033212184906, + "learning_rate": 8.587737301180093e-06, + "loss": 0.0094, + "step": 26780 + }, + { + "epoch": 0.17182273845295398, + "grad_norm": 0.10855749994516373, + "learning_rate": 8.590944073884043e-06, + "loss": 0.0102, + "step": 26790 + }, + { + "epoch": 0.17188687534674008, + "grad_norm": 0.18229839205741882, + "learning_rate": 8.594150846587994e-06, + "loss": 0.0063, + "step": 26800 + }, + { + "epoch": 0.17195101224052617, + "grad_norm": 0.30547040700912476, + "learning_rate": 8.597357619291945e-06, + "loss": 0.0096, + "step": 26810 + }, + { + "epoch": 0.1720151491343123, + "grad_norm": 0.5064559578895569, + "learning_rate": 8.600564391995896e-06, + "loss": 0.0121, + "step": 26820 + }, + { + "epoch": 0.17207928602809838, + "grad_norm": 0.4874636232852936, + "learning_rate": 8.603771164699847e-06, + "loss": 0.0087, + "step": 26830 + }, + { + "epoch": 0.17214342292188448, + "grad_norm": 0.4293060600757599, + "learning_rate": 8.606977937403798e-06, + "loss": 0.0082, + "step": 26840 + }, + { + "epoch": 0.17220755981567057, + "grad_norm": 0.4626399874687195, + "learning_rate": 8.610184710107748e-06, + "loss": 0.0098, + "step": 26850 + }, + { + "epoch": 0.17227169670945666, + "grad_norm": 0.34719598293304443, + "learning_rate": 8.613391482811699e-06, + "loss": 0.0091, + "step": 26860 + }, + { + "epoch": 0.17233583360324276, + "grad_norm": 0.45511436462402344, + "learning_rate": 8.61659825551565e-06, + "loss": 0.0083, + "step": 26870 + }, + { + "epoch": 0.17239997049702885, + "grad_norm": 0.3324184715747833, + "learning_rate": 8.6198050282196e-06, + "loss": 0.0076, + "step": 26880 + }, + { + "epoch": 0.17246410739081497, + "grad_norm": 0.42451098561286926, + "learning_rate": 8.62301180092355e-06, + "loss": 0.0068, + "step": 26890 + }, + { + "epoch": 0.17252824428460106, + "grad_norm": 0.501450777053833, + "learning_rate": 8.626218573627502e-06, + "loss": 0.0095, + "step": 26900 + }, + { + "epoch": 0.17259238117838716, + "grad_norm": 0.2772112488746643, + "learning_rate": 8.629425346331453e-06, + "loss": 0.0077, + "step": 26910 + }, + { + "epoch": 0.17265651807217325, + "grad_norm": 0.24453534185886383, + "learning_rate": 8.632632119035404e-06, + "loss": 0.0059, + "step": 26920 + }, + { + "epoch": 0.17272065496595934, + "grad_norm": 0.26350727677345276, + "learning_rate": 8.635838891739355e-06, + "loss": 0.006, + "step": 26930 + }, + { + "epoch": 0.17278479185974543, + "grad_norm": 0.20161627233028412, + "learning_rate": 8.639045664443306e-06, + "loss": 0.0054, + "step": 26940 + }, + { + "epoch": 0.17284892875353153, + "grad_norm": 0.25609177350997925, + "learning_rate": 8.642252437147255e-06, + "loss": 0.0062, + "step": 26950 + }, + { + "epoch": 0.17291306564731765, + "grad_norm": 0.43955254554748535, + "learning_rate": 8.645459209851206e-06, + "loss": 0.0081, + "step": 26960 + }, + { + "epoch": 0.17297720254110374, + "grad_norm": 0.4165267050266266, + "learning_rate": 8.648665982555156e-06, + "loss": 0.008, + "step": 26970 + }, + { + "epoch": 0.17304133943488983, + "grad_norm": 0.3899036645889282, + "learning_rate": 8.651872755259107e-06, + "loss": 0.0068, + "step": 26980 + }, + { + "epoch": 0.17310547632867593, + "grad_norm": 0.3477489650249481, + "learning_rate": 8.655079527963058e-06, + "loss": 0.0043, + "step": 26990 + }, + { + "epoch": 0.17316961322246202, + "grad_norm": 0.6160642504692078, + "learning_rate": 8.65828630066701e-06, + "loss": 0.005, + "step": 27000 + }, + { + "epoch": 0.17323375011624811, + "grad_norm": 0.20395542681217194, + "learning_rate": 8.66149307337096e-06, + "loss": 0.005, + "step": 27010 + }, + { + "epoch": 0.1732978870100342, + "grad_norm": 0.5390647649765015, + "learning_rate": 8.664699846074911e-06, + "loss": 0.0074, + "step": 27020 + }, + { + "epoch": 0.1733620239038203, + "grad_norm": 0.2438899427652359, + "learning_rate": 8.667906618778863e-06, + "loss": 0.0086, + "step": 27030 + }, + { + "epoch": 0.17342616079760642, + "grad_norm": 0.3125239908695221, + "learning_rate": 8.671113391482812e-06, + "loss": 0.0121, + "step": 27040 + }, + { + "epoch": 0.17349029769139251, + "grad_norm": 0.41965559124946594, + "learning_rate": 8.674320164186763e-06, + "loss": 0.0091, + "step": 27050 + }, + { + "epoch": 0.1735544345851786, + "grad_norm": 0.7748135924339294, + "learning_rate": 8.677526936890713e-06, + "loss": 0.008, + "step": 27060 + }, + { + "epoch": 0.1736185714789647, + "grad_norm": 0.29431599378585815, + "learning_rate": 8.680733709594664e-06, + "loss": 0.0108, + "step": 27070 + }, + { + "epoch": 0.1736827083727508, + "grad_norm": 0.1197415143251419, + "learning_rate": 8.683940482298615e-06, + "loss": 0.0047, + "step": 27080 + }, + { + "epoch": 0.1737468452665369, + "grad_norm": 0.40981724858283997, + "learning_rate": 8.687147255002566e-06, + "loss": 0.0081, + "step": 27090 + }, + { + "epoch": 0.17381098216032298, + "grad_norm": 0.08727839589118958, + "learning_rate": 8.690354027706517e-06, + "loss": 0.0063, + "step": 27100 + }, + { + "epoch": 0.1738751190541091, + "grad_norm": 0.44716620445251465, + "learning_rate": 8.693560800410468e-06, + "loss": 0.0087, + "step": 27110 + }, + { + "epoch": 0.1739392559478952, + "grad_norm": 0.47064927220344543, + "learning_rate": 8.69676757311442e-06, + "loss": 0.008, + "step": 27120 + }, + { + "epoch": 0.1740033928416813, + "grad_norm": 0.4345616400241852, + "learning_rate": 8.699974345818369e-06, + "loss": 0.008, + "step": 27130 + }, + { + "epoch": 0.17406752973546738, + "grad_norm": 0.40541645884513855, + "learning_rate": 8.70318111852232e-06, + "loss": 0.0055, + "step": 27140 + }, + { + "epoch": 0.17413166662925347, + "grad_norm": 0.6267321705818176, + "learning_rate": 8.706387891226271e-06, + "loss": 0.0105, + "step": 27150 + }, + { + "epoch": 0.17419580352303957, + "grad_norm": 0.2842670977115631, + "learning_rate": 8.70959466393022e-06, + "loss": 0.0095, + "step": 27160 + }, + { + "epoch": 0.17425994041682566, + "grad_norm": 0.36302152276039124, + "learning_rate": 8.712801436634171e-06, + "loss": 0.0066, + "step": 27170 + }, + { + "epoch": 0.17432407731061178, + "grad_norm": 0.2736494243144989, + "learning_rate": 8.716008209338123e-06, + "loss": 0.0109, + "step": 27180 + }, + { + "epoch": 0.17438821420439787, + "grad_norm": 0.46017733216285706, + "learning_rate": 8.719214982042074e-06, + "loss": 0.0089, + "step": 27190 + }, + { + "epoch": 0.17445235109818397, + "grad_norm": 0.4222848117351532, + "learning_rate": 8.722421754746025e-06, + "loss": 0.0079, + "step": 27200 + }, + { + "epoch": 0.17451648799197006, + "grad_norm": 0.5578427910804749, + "learning_rate": 8.725628527449976e-06, + "loss": 0.0087, + "step": 27210 + }, + { + "epoch": 0.17458062488575615, + "grad_norm": 0.1966858059167862, + "learning_rate": 8.728835300153925e-06, + "loss": 0.0113, + "step": 27220 + }, + { + "epoch": 0.17464476177954225, + "grad_norm": 0.6067239046096802, + "learning_rate": 8.732042072857876e-06, + "loss": 0.009, + "step": 27230 + }, + { + "epoch": 0.17470889867332834, + "grad_norm": 0.5752755999565125, + "learning_rate": 8.735248845561828e-06, + "loss": 0.007, + "step": 27240 + }, + { + "epoch": 0.17477303556711446, + "grad_norm": 0.15181779861450195, + "learning_rate": 8.738455618265777e-06, + "loss": 0.0077, + "step": 27250 + }, + { + "epoch": 0.17483717246090055, + "grad_norm": 0.3850315809249878, + "learning_rate": 8.741662390969728e-06, + "loss": 0.0062, + "step": 27260 + }, + { + "epoch": 0.17490130935468665, + "grad_norm": 0.40511053800582886, + "learning_rate": 8.744869163673679e-06, + "loss": 0.0097, + "step": 27270 + }, + { + "epoch": 0.17496544624847274, + "grad_norm": 0.23062027990818024, + "learning_rate": 8.74807593637763e-06, + "loss": 0.0052, + "step": 27280 + }, + { + "epoch": 0.17502958314225883, + "grad_norm": 0.2363414466381073, + "learning_rate": 8.751282709081581e-06, + "loss": 0.0068, + "step": 27290 + }, + { + "epoch": 0.17509372003604493, + "grad_norm": 0.4977990388870239, + "learning_rate": 8.754489481785532e-06, + "loss": 0.009, + "step": 27300 + }, + { + "epoch": 0.17515785692983102, + "grad_norm": 0.6856558322906494, + "learning_rate": 8.757696254489484e-06, + "loss": 0.0074, + "step": 27310 + }, + { + "epoch": 0.17522199382361714, + "grad_norm": 1.0530060529708862, + "learning_rate": 8.760903027193433e-06, + "loss": 0.0116, + "step": 27320 + }, + { + "epoch": 0.17528613071740323, + "grad_norm": 0.6366263031959534, + "learning_rate": 8.764109799897384e-06, + "loss": 0.008, + "step": 27330 + }, + { + "epoch": 0.17535026761118933, + "grad_norm": 0.35337623953819275, + "learning_rate": 8.767316572601334e-06, + "loss": 0.0059, + "step": 27340 + }, + { + "epoch": 0.17541440450497542, + "grad_norm": 0.9613513946533203, + "learning_rate": 8.770523345305285e-06, + "loss": 0.0095, + "step": 27350 + }, + { + "epoch": 0.1754785413987615, + "grad_norm": 0.2593524754047394, + "learning_rate": 8.773730118009236e-06, + "loss": 0.0064, + "step": 27360 + }, + { + "epoch": 0.1755426782925476, + "grad_norm": 0.5186986327171326, + "learning_rate": 8.776936890713187e-06, + "loss": 0.007, + "step": 27370 + }, + { + "epoch": 0.1756068151863337, + "grad_norm": 0.22691476345062256, + "learning_rate": 8.780143663417138e-06, + "loss": 0.0074, + "step": 27380 + }, + { + "epoch": 0.17567095208011982, + "grad_norm": 0.4468889832496643, + "learning_rate": 8.783350436121089e-06, + "loss": 0.0078, + "step": 27390 + }, + { + "epoch": 0.1757350889739059, + "grad_norm": 0.8645966649055481, + "learning_rate": 8.78655720882504e-06, + "loss": 0.006, + "step": 27400 + }, + { + "epoch": 0.175799225867692, + "grad_norm": 0.5231471061706543, + "learning_rate": 8.78976398152899e-06, + "loss": 0.0081, + "step": 27410 + }, + { + "epoch": 0.1758633627614781, + "grad_norm": 0.39743897318840027, + "learning_rate": 8.79297075423294e-06, + "loss": 0.0077, + "step": 27420 + }, + { + "epoch": 0.1759274996552642, + "grad_norm": 0.49018046259880066, + "learning_rate": 8.79617752693689e-06, + "loss": 0.0065, + "step": 27430 + }, + { + "epoch": 0.17599163654905028, + "grad_norm": 0.4271479845046997, + "learning_rate": 8.799384299640841e-06, + "loss": 0.0059, + "step": 27440 + }, + { + "epoch": 0.17605577344283638, + "grad_norm": 0.3524341285228729, + "learning_rate": 8.802591072344792e-06, + "loss": 0.0111, + "step": 27450 + }, + { + "epoch": 0.1761199103366225, + "grad_norm": 0.4612078070640564, + "learning_rate": 8.805797845048744e-06, + "loss": 0.0074, + "step": 27460 + }, + { + "epoch": 0.1761840472304086, + "grad_norm": 0.24353353679180145, + "learning_rate": 8.809004617752695e-06, + "loss": 0.0104, + "step": 27470 + }, + { + "epoch": 0.17624818412419468, + "grad_norm": 0.13203102350234985, + "learning_rate": 8.812211390456646e-06, + "loss": 0.0069, + "step": 27480 + }, + { + "epoch": 0.17631232101798078, + "grad_norm": 0.09631278365850449, + "learning_rate": 8.815418163160597e-06, + "loss": 0.0074, + "step": 27490 + }, + { + "epoch": 0.17637645791176687, + "grad_norm": 0.308401495218277, + "learning_rate": 8.818624935864546e-06, + "loss": 0.0062, + "step": 27500 + }, + { + "epoch": 0.17644059480555296, + "grad_norm": 0.13009114563465118, + "learning_rate": 8.821831708568497e-06, + "loss": 0.0083, + "step": 27510 + }, + { + "epoch": 0.17650473169933906, + "grad_norm": 0.1699720323085785, + "learning_rate": 8.825038481272449e-06, + "loss": 0.0057, + "step": 27520 + }, + { + "epoch": 0.17656886859312518, + "grad_norm": 0.03352305293083191, + "learning_rate": 8.828245253976398e-06, + "loss": 0.0049, + "step": 27530 + }, + { + "epoch": 0.17663300548691127, + "grad_norm": 0.4160129427909851, + "learning_rate": 8.831452026680349e-06, + "loss": 0.0064, + "step": 27540 + }, + { + "epoch": 0.17669714238069736, + "grad_norm": 0.27879947423934937, + "learning_rate": 8.8346587993843e-06, + "loss": 0.0097, + "step": 27550 + }, + { + "epoch": 0.17676127927448346, + "grad_norm": 0.19119217991828918, + "learning_rate": 8.837865572088251e-06, + "loss": 0.0076, + "step": 27560 + }, + { + "epoch": 0.17682541616826955, + "grad_norm": 0.26554617285728455, + "learning_rate": 8.841072344792202e-06, + "loss": 0.0078, + "step": 27570 + }, + { + "epoch": 0.17688955306205564, + "grad_norm": 0.4061427116394043, + "learning_rate": 8.844279117496153e-06, + "loss": 0.0051, + "step": 27580 + }, + { + "epoch": 0.17695368995584174, + "grad_norm": 0.13531051576137543, + "learning_rate": 8.847485890200103e-06, + "loss": 0.0102, + "step": 27590 + }, + { + "epoch": 0.17701782684962786, + "grad_norm": 0.31303972005844116, + "learning_rate": 8.850692662904054e-06, + "loss": 0.005, + "step": 27600 + }, + { + "epoch": 0.17708196374341395, + "grad_norm": 0.42270204424858093, + "learning_rate": 8.853899435608005e-06, + "loss": 0.0103, + "step": 27610 + }, + { + "epoch": 0.17714610063720004, + "grad_norm": 0.22162435948848724, + "learning_rate": 8.857106208311955e-06, + "loss": 0.0064, + "step": 27620 + }, + { + "epoch": 0.17721023753098614, + "grad_norm": 0.16792258620262146, + "learning_rate": 8.860312981015906e-06, + "loss": 0.006, + "step": 27630 + }, + { + "epoch": 0.17727437442477223, + "grad_norm": 0.32242026925086975, + "learning_rate": 8.863519753719857e-06, + "loss": 0.0054, + "step": 27640 + }, + { + "epoch": 0.17733851131855832, + "grad_norm": 0.48972517251968384, + "learning_rate": 8.866726526423808e-06, + "loss": 0.0058, + "step": 27650 + }, + { + "epoch": 0.17740264821234442, + "grad_norm": 0.36505600810050964, + "learning_rate": 8.869933299127759e-06, + "loss": 0.0094, + "step": 27660 + }, + { + "epoch": 0.17746678510613054, + "grad_norm": 0.2752530574798584, + "learning_rate": 8.87314007183171e-06, + "loss": 0.007, + "step": 27670 + }, + { + "epoch": 0.17753092199991663, + "grad_norm": 0.22372488677501678, + "learning_rate": 8.876346844535661e-06, + "loss": 0.0054, + "step": 27680 + }, + { + "epoch": 0.17759505889370272, + "grad_norm": 0.6196951270103455, + "learning_rate": 8.87955361723961e-06, + "loss": 0.0079, + "step": 27690 + }, + { + "epoch": 0.17765919578748882, + "grad_norm": 0.3792284429073334, + "learning_rate": 8.882760389943562e-06, + "loss": 0.0084, + "step": 27700 + }, + { + "epoch": 0.1777233326812749, + "grad_norm": 0.33533328771591187, + "learning_rate": 8.885967162647511e-06, + "loss": 0.007, + "step": 27710 + }, + { + "epoch": 0.177787469575061, + "grad_norm": 0.20695863664150238, + "learning_rate": 8.889173935351462e-06, + "loss": 0.0079, + "step": 27720 + }, + { + "epoch": 0.1778516064688471, + "grad_norm": 0.3518396317958832, + "learning_rate": 8.892380708055413e-06, + "loss": 0.0072, + "step": 27730 + }, + { + "epoch": 0.17791574336263322, + "grad_norm": 0.24414193630218506, + "learning_rate": 8.895587480759365e-06, + "loss": 0.0044, + "step": 27740 + }, + { + "epoch": 0.1779798802564193, + "grad_norm": 0.4045836925506592, + "learning_rate": 8.898794253463316e-06, + "loss": 0.0085, + "step": 27750 + }, + { + "epoch": 0.1780440171502054, + "grad_norm": 0.30575981736183167, + "learning_rate": 8.902001026167267e-06, + "loss": 0.0083, + "step": 27760 + }, + { + "epoch": 0.1781081540439915, + "grad_norm": 0.5452970266342163, + "learning_rate": 8.905207798871218e-06, + "loss": 0.0082, + "step": 27770 + }, + { + "epoch": 0.1781722909377776, + "grad_norm": 0.31361040472984314, + "learning_rate": 8.908414571575167e-06, + "loss": 0.005, + "step": 27780 + }, + { + "epoch": 0.17823642783156368, + "grad_norm": 0.2590939402580261, + "learning_rate": 8.911621344279118e-06, + "loss": 0.0074, + "step": 27790 + }, + { + "epoch": 0.17830056472534977, + "grad_norm": 0.3190052807331085, + "learning_rate": 8.914828116983068e-06, + "loss": 0.0064, + "step": 27800 + }, + { + "epoch": 0.1783647016191359, + "grad_norm": 0.35626527667045593, + "learning_rate": 8.918034889687019e-06, + "loss": 0.0077, + "step": 27810 + }, + { + "epoch": 0.178428838512922, + "grad_norm": 0.40019169449806213, + "learning_rate": 8.92124166239097e-06, + "loss": 0.0075, + "step": 27820 + }, + { + "epoch": 0.17849297540670808, + "grad_norm": 0.2987542450428009, + "learning_rate": 8.924448435094921e-06, + "loss": 0.0095, + "step": 27830 + }, + { + "epoch": 0.17855711230049418, + "grad_norm": 0.4885701835155487, + "learning_rate": 8.927655207798872e-06, + "loss": 0.0078, + "step": 27840 + }, + { + "epoch": 0.17862124919428027, + "grad_norm": 0.5281029939651489, + "learning_rate": 8.930861980502823e-06, + "loss": 0.0041, + "step": 27850 + }, + { + "epoch": 0.17868538608806636, + "grad_norm": 0.5410029292106628, + "learning_rate": 8.934068753206774e-06, + "loss": 0.0106, + "step": 27860 + }, + { + "epoch": 0.17874952298185245, + "grad_norm": 0.3174389898777008, + "learning_rate": 8.937275525910724e-06, + "loss": 0.006, + "step": 27870 + }, + { + "epoch": 0.17881365987563858, + "grad_norm": 0.3071681559085846, + "learning_rate": 8.940482298614675e-06, + "loss": 0.0093, + "step": 27880 + }, + { + "epoch": 0.17887779676942467, + "grad_norm": 0.2326798141002655, + "learning_rate": 8.943689071318626e-06, + "loss": 0.0067, + "step": 27890 + }, + { + "epoch": 0.17894193366321076, + "grad_norm": 0.27320408821105957, + "learning_rate": 8.946895844022576e-06, + "loss": 0.0111, + "step": 27900 + }, + { + "epoch": 0.17900607055699685, + "grad_norm": 0.3682153522968292, + "learning_rate": 8.950102616726527e-06, + "loss": 0.0052, + "step": 27910 + }, + { + "epoch": 0.17907020745078295, + "grad_norm": 0.20906396210193634, + "learning_rate": 8.953309389430478e-06, + "loss": 0.0097, + "step": 27920 + }, + { + "epoch": 0.17913434434456904, + "grad_norm": 0.5595477819442749, + "learning_rate": 8.956516162134429e-06, + "loss": 0.0104, + "step": 27930 + }, + { + "epoch": 0.17919848123835513, + "grad_norm": 0.5162923336029053, + "learning_rate": 8.95972293483838e-06, + "loss": 0.0073, + "step": 27940 + }, + { + "epoch": 0.17926261813214125, + "grad_norm": 0.463011771440506, + "learning_rate": 8.962929707542331e-06, + "loss": 0.0066, + "step": 27950 + }, + { + "epoch": 0.17932675502592735, + "grad_norm": 0.30121463537216187, + "learning_rate": 8.96613648024628e-06, + "loss": 0.0037, + "step": 27960 + }, + { + "epoch": 0.17939089191971344, + "grad_norm": 0.3653218746185303, + "learning_rate": 8.969343252950232e-06, + "loss": 0.0098, + "step": 27970 + }, + { + "epoch": 0.17945502881349953, + "grad_norm": 0.3108031451702118, + "learning_rate": 8.972550025654183e-06, + "loss": 0.007, + "step": 27980 + }, + { + "epoch": 0.17951916570728563, + "grad_norm": 0.12764820456504822, + "learning_rate": 8.975756798358132e-06, + "loss": 0.0049, + "step": 27990 + }, + { + "epoch": 0.17958330260107172, + "grad_norm": 0.336069792509079, + "learning_rate": 8.978963571062083e-06, + "loss": 0.0046, + "step": 28000 + }, + { + "epoch": 0.1796474394948578, + "grad_norm": 0.522538959980011, + "learning_rate": 8.982170343766034e-06, + "loss": 0.0094, + "step": 28010 + }, + { + "epoch": 0.17971157638864393, + "grad_norm": 0.35158011317253113, + "learning_rate": 8.985377116469986e-06, + "loss": 0.0061, + "step": 28020 + }, + { + "epoch": 0.17977571328243003, + "grad_norm": 0.2685913145542145, + "learning_rate": 8.988583889173937e-06, + "loss": 0.0075, + "step": 28030 + }, + { + "epoch": 0.17983985017621612, + "grad_norm": 0.21148598194122314, + "learning_rate": 8.991790661877888e-06, + "loss": 0.0048, + "step": 28040 + }, + { + "epoch": 0.1799039870700022, + "grad_norm": 0.08513572067022324, + "learning_rate": 8.994997434581839e-06, + "loss": 0.0087, + "step": 28050 + }, + { + "epoch": 0.1799681239637883, + "grad_norm": 0.39910271763801575, + "learning_rate": 8.998204207285788e-06, + "loss": 0.0066, + "step": 28060 + }, + { + "epoch": 0.1800322608575744, + "grad_norm": 0.40334391593933105, + "learning_rate": 9.00141097998974e-06, + "loss": 0.0076, + "step": 28070 + }, + { + "epoch": 0.1800963977513605, + "grad_norm": 0.20072820782661438, + "learning_rate": 9.004617752693689e-06, + "loss": 0.0067, + "step": 28080 + }, + { + "epoch": 0.1801605346451466, + "grad_norm": 0.6156127452850342, + "learning_rate": 9.00782452539764e-06, + "loss": 0.008, + "step": 28090 + }, + { + "epoch": 0.1802246715389327, + "grad_norm": 0.5517195463180542, + "learning_rate": 9.011031298101591e-06, + "loss": 0.0079, + "step": 28100 + }, + { + "epoch": 0.1802888084327188, + "grad_norm": 0.3041163980960846, + "learning_rate": 9.014238070805542e-06, + "loss": 0.0073, + "step": 28110 + }, + { + "epoch": 0.1803529453265049, + "grad_norm": 0.21832886338233948, + "learning_rate": 9.017444843509493e-06, + "loss": 0.0056, + "step": 28120 + }, + { + "epoch": 0.180417082220291, + "grad_norm": 0.2679687440395355, + "learning_rate": 9.020651616213444e-06, + "loss": 0.0059, + "step": 28130 + }, + { + "epoch": 0.18048121911407708, + "grad_norm": 0.12943078577518463, + "learning_rate": 9.023858388917395e-06, + "loss": 0.0046, + "step": 28140 + }, + { + "epoch": 0.18054535600786317, + "grad_norm": 0.39300015568733215, + "learning_rate": 9.027065161621345e-06, + "loss": 0.0067, + "step": 28150 + }, + { + "epoch": 0.1806094929016493, + "grad_norm": 0.0813639909029007, + "learning_rate": 9.030271934325296e-06, + "loss": 0.0081, + "step": 28160 + }, + { + "epoch": 0.1806736297954354, + "grad_norm": 0.2814038097858429, + "learning_rate": 9.033478707029245e-06, + "loss": 0.0094, + "step": 28170 + }, + { + "epoch": 0.18073776668922148, + "grad_norm": 0.2565455734729767, + "learning_rate": 9.036685479733197e-06, + "loss": 0.0041, + "step": 28180 + }, + { + "epoch": 0.18080190358300757, + "grad_norm": 0.4624595046043396, + "learning_rate": 9.039892252437148e-06, + "loss": 0.0047, + "step": 28190 + }, + { + "epoch": 0.18086604047679367, + "grad_norm": 0.3168759346008301, + "learning_rate": 9.043099025141099e-06, + "loss": 0.0066, + "step": 28200 + }, + { + "epoch": 0.18093017737057976, + "grad_norm": 0.06198474392294884, + "learning_rate": 9.04630579784505e-06, + "loss": 0.0069, + "step": 28210 + }, + { + "epoch": 0.18099431426436585, + "grad_norm": 0.14223305881023407, + "learning_rate": 9.049512570549001e-06, + "loss": 0.0071, + "step": 28220 + }, + { + "epoch": 0.18105845115815197, + "grad_norm": 0.2489401400089264, + "learning_rate": 9.052719343252952e-06, + "loss": 0.0085, + "step": 28230 + }, + { + "epoch": 0.18112258805193807, + "grad_norm": 0.22115850448608398, + "learning_rate": 9.055926115956902e-06, + "loss": 0.0069, + "step": 28240 + }, + { + "epoch": 0.18118672494572416, + "grad_norm": 0.3180225193500519, + "learning_rate": 9.059132888660853e-06, + "loss": 0.0091, + "step": 28250 + }, + { + "epoch": 0.18125086183951025, + "grad_norm": 0.2616806626319885, + "learning_rate": 9.062339661364802e-06, + "loss": 0.011, + "step": 28260 + }, + { + "epoch": 0.18131499873329635, + "grad_norm": 0.33274537324905396, + "learning_rate": 9.065546434068753e-06, + "loss": 0.0087, + "step": 28270 + }, + { + "epoch": 0.18137913562708244, + "grad_norm": 0.35504183173179626, + "learning_rate": 9.068753206772704e-06, + "loss": 0.0101, + "step": 28280 + }, + { + "epoch": 0.18144327252086853, + "grad_norm": 0.5860472917556763, + "learning_rate": 9.071959979476655e-06, + "loss": 0.0061, + "step": 28290 + }, + { + "epoch": 0.18150740941465465, + "grad_norm": 0.2541690170764923, + "learning_rate": 9.075166752180607e-06, + "loss": 0.0095, + "step": 28300 + }, + { + "epoch": 0.18157154630844075, + "grad_norm": 0.3021218776702881, + "learning_rate": 9.078373524884558e-06, + "loss": 0.0059, + "step": 28310 + }, + { + "epoch": 0.18163568320222684, + "grad_norm": 0.5009143948554993, + "learning_rate": 9.081580297588509e-06, + "loss": 0.0057, + "step": 28320 + }, + { + "epoch": 0.18169982009601293, + "grad_norm": 0.13357484340667725, + "learning_rate": 9.084787070292458e-06, + "loss": 0.0079, + "step": 28330 + }, + { + "epoch": 0.18176395698979902, + "grad_norm": 0.29337722063064575, + "learning_rate": 9.08799384299641e-06, + "loss": 0.0069, + "step": 28340 + }, + { + "epoch": 0.18182809388358512, + "grad_norm": 0.3855084180831909, + "learning_rate": 9.09120061570036e-06, + "loss": 0.0061, + "step": 28350 + }, + { + "epoch": 0.1818922307773712, + "grad_norm": 0.548573911190033, + "learning_rate": 9.09440738840431e-06, + "loss": 0.0072, + "step": 28360 + }, + { + "epoch": 0.18195636767115733, + "grad_norm": 0.8141303062438965, + "learning_rate": 9.097614161108261e-06, + "loss": 0.0085, + "step": 28370 + }, + { + "epoch": 0.18202050456494343, + "grad_norm": 0.3887251317501068, + "learning_rate": 9.100820933812212e-06, + "loss": 0.0118, + "step": 28380 + }, + { + "epoch": 0.18208464145872952, + "grad_norm": 0.033384427428245544, + "learning_rate": 9.104027706516163e-06, + "loss": 0.0081, + "step": 28390 + }, + { + "epoch": 0.1821487783525156, + "grad_norm": 0.17048637568950653, + "learning_rate": 9.107234479220114e-06, + "loss": 0.0074, + "step": 28400 + }, + { + "epoch": 0.1822129152463017, + "grad_norm": 0.2683856189250946, + "learning_rate": 9.110441251924065e-06, + "loss": 0.0048, + "step": 28410 + }, + { + "epoch": 0.1822770521400878, + "grad_norm": 0.3027239143848419, + "learning_rate": 9.113648024628015e-06, + "loss": 0.0069, + "step": 28420 + }, + { + "epoch": 0.1823411890338739, + "grad_norm": 0.5860539078712463, + "learning_rate": 9.116854797331966e-06, + "loss": 0.012, + "step": 28430 + }, + { + "epoch": 0.18240532592765998, + "grad_norm": 0.675926923751831, + "learning_rate": 9.120061570035917e-06, + "loss": 0.0078, + "step": 28440 + }, + { + "epoch": 0.1824694628214461, + "grad_norm": 0.4775112271308899, + "learning_rate": 9.123268342739866e-06, + "loss": 0.0055, + "step": 28450 + }, + { + "epoch": 0.1825335997152322, + "grad_norm": 0.41792356967926025, + "learning_rate": 9.126475115443818e-06, + "loss": 0.0085, + "step": 28460 + }, + { + "epoch": 0.1825977366090183, + "grad_norm": 0.22479628026485443, + "learning_rate": 9.129681888147769e-06, + "loss": 0.0063, + "step": 28470 + }, + { + "epoch": 0.18266187350280438, + "grad_norm": 0.7009286284446716, + "learning_rate": 9.13288866085172e-06, + "loss": 0.0077, + "step": 28480 + }, + { + "epoch": 0.18272601039659048, + "grad_norm": 0.33191394805908203, + "learning_rate": 9.136095433555671e-06, + "loss": 0.0095, + "step": 28490 + }, + { + "epoch": 0.18279014729037657, + "grad_norm": 0.655703067779541, + "learning_rate": 9.139302206259622e-06, + "loss": 0.019, + "step": 28500 + }, + { + "epoch": 0.18285428418416266, + "grad_norm": 0.43687865138053894, + "learning_rate": 9.142508978963571e-06, + "loss": 0.0065, + "step": 28510 + }, + { + "epoch": 0.18291842107794878, + "grad_norm": 0.1936979442834854, + "learning_rate": 9.145715751667523e-06, + "loss": 0.0075, + "step": 28520 + }, + { + "epoch": 0.18298255797173488, + "grad_norm": 0.27804601192474365, + "learning_rate": 9.148922524371474e-06, + "loss": 0.0078, + "step": 28530 + }, + { + "epoch": 0.18304669486552097, + "grad_norm": 0.34704890847206116, + "learning_rate": 9.152129297075423e-06, + "loss": 0.0062, + "step": 28540 + }, + { + "epoch": 0.18311083175930706, + "grad_norm": 0.27939388155937195, + "learning_rate": 9.155336069779374e-06, + "loss": 0.0062, + "step": 28550 + }, + { + "epoch": 0.18317496865309316, + "grad_norm": 0.222448468208313, + "learning_rate": 9.158542842483325e-06, + "loss": 0.0054, + "step": 28560 + }, + { + "epoch": 0.18323910554687925, + "grad_norm": 0.12520845234394073, + "learning_rate": 9.161749615187276e-06, + "loss": 0.0112, + "step": 28570 + }, + { + "epoch": 0.18330324244066534, + "grad_norm": 0.39360687136650085, + "learning_rate": 9.164956387891228e-06, + "loss": 0.0111, + "step": 28580 + }, + { + "epoch": 0.18336737933445146, + "grad_norm": 0.32524222135543823, + "learning_rate": 9.168163160595179e-06, + "loss": 0.0089, + "step": 28590 + }, + { + "epoch": 0.18343151622823756, + "grad_norm": 0.4135376513004303, + "learning_rate": 9.17136993329913e-06, + "loss": 0.005, + "step": 28600 + }, + { + "epoch": 0.18349565312202365, + "grad_norm": 0.39116549491882324, + "learning_rate": 9.17457670600308e-06, + "loss": 0.0065, + "step": 28610 + }, + { + "epoch": 0.18355979001580974, + "grad_norm": 0.25644803047180176, + "learning_rate": 9.17778347870703e-06, + "loss": 0.0089, + "step": 28620 + }, + { + "epoch": 0.18362392690959584, + "grad_norm": 0.41352519392967224, + "learning_rate": 9.18099025141098e-06, + "loss": 0.0133, + "step": 28630 + }, + { + "epoch": 0.18368806380338193, + "grad_norm": 0.756648600101471, + "learning_rate": 9.18419702411493e-06, + "loss": 0.007, + "step": 28640 + }, + { + "epoch": 0.18375220069716802, + "grad_norm": 0.48206591606140137, + "learning_rate": 9.187403796818882e-06, + "loss": 0.0061, + "step": 28650 + }, + { + "epoch": 0.18381633759095414, + "grad_norm": 0.47298529744148254, + "learning_rate": 9.190610569522833e-06, + "loss": 0.0055, + "step": 28660 + }, + { + "epoch": 0.18388047448474024, + "grad_norm": 0.4739581346511841, + "learning_rate": 9.193817342226784e-06, + "loss": 0.0066, + "step": 28670 + }, + { + "epoch": 0.18394461137852633, + "grad_norm": 0.39162930846214294, + "learning_rate": 9.197024114930735e-06, + "loss": 0.007, + "step": 28680 + }, + { + "epoch": 0.18400874827231242, + "grad_norm": 0.37037205696105957, + "learning_rate": 9.200230887634686e-06, + "loss": 0.0064, + "step": 28690 + }, + { + "epoch": 0.18407288516609852, + "grad_norm": 0.22075816988945007, + "learning_rate": 9.203437660338636e-06, + "loss": 0.006, + "step": 28700 + }, + { + "epoch": 0.1841370220598846, + "grad_norm": 0.212757408618927, + "learning_rate": 9.206644433042587e-06, + "loss": 0.0087, + "step": 28710 + }, + { + "epoch": 0.1842011589536707, + "grad_norm": 0.4505358040332794, + "learning_rate": 9.209851205746538e-06, + "loss": 0.0065, + "step": 28720 + }, + { + "epoch": 0.18426529584745682, + "grad_norm": 0.6356704235076904, + "learning_rate": 9.213057978450487e-06, + "loss": 0.0058, + "step": 28730 + }, + { + "epoch": 0.18432943274124292, + "grad_norm": 0.4008321464061737, + "learning_rate": 9.216264751154439e-06, + "loss": 0.0073, + "step": 28740 + }, + { + "epoch": 0.184393569635029, + "grad_norm": 0.43277493119239807, + "learning_rate": 9.21947152385839e-06, + "loss": 0.0118, + "step": 28750 + }, + { + "epoch": 0.1844577065288151, + "grad_norm": 0.3399185240268707, + "learning_rate": 9.22267829656234e-06, + "loss": 0.0087, + "step": 28760 + }, + { + "epoch": 0.1845218434226012, + "grad_norm": 0.5668254494667053, + "learning_rate": 9.225885069266292e-06, + "loss": 0.0061, + "step": 28770 + }, + { + "epoch": 0.1845859803163873, + "grad_norm": 0.5704218149185181, + "learning_rate": 9.229091841970243e-06, + "loss": 0.006, + "step": 28780 + }, + { + "epoch": 0.18465011721017338, + "grad_norm": 0.6402566432952881, + "learning_rate": 9.232298614674192e-06, + "loss": 0.0075, + "step": 28790 + }, + { + "epoch": 0.1847142541039595, + "grad_norm": 0.14951485395431519, + "learning_rate": 9.235505387378144e-06, + "loss": 0.0072, + "step": 28800 + }, + { + "epoch": 0.1847783909977456, + "grad_norm": 0.5898628234863281, + "learning_rate": 9.238712160082095e-06, + "loss": 0.0099, + "step": 28810 + }, + { + "epoch": 0.1848425278915317, + "grad_norm": 0.49422821402549744, + "learning_rate": 9.241918932786044e-06, + "loss": 0.0055, + "step": 28820 + }, + { + "epoch": 0.18490666478531778, + "grad_norm": 0.2988821268081665, + "learning_rate": 9.245125705489995e-06, + "loss": 0.0057, + "step": 28830 + }, + { + "epoch": 0.18497080167910387, + "grad_norm": 0.4461117684841156, + "learning_rate": 9.248332478193946e-06, + "loss": 0.0079, + "step": 28840 + }, + { + "epoch": 0.18503493857288997, + "grad_norm": 0.43934503197669983, + "learning_rate": 9.251539250897897e-06, + "loss": 0.0114, + "step": 28850 + }, + { + "epoch": 0.18509907546667606, + "grad_norm": 0.3138737976551056, + "learning_rate": 9.254746023601849e-06, + "loss": 0.0082, + "step": 28860 + }, + { + "epoch": 0.18516321236046218, + "grad_norm": 0.6254568099975586, + "learning_rate": 9.2579527963058e-06, + "loss": 0.0081, + "step": 28870 + }, + { + "epoch": 0.18522734925424827, + "grad_norm": 0.5110483765602112, + "learning_rate": 9.261159569009749e-06, + "loss": 0.006, + "step": 28880 + }, + { + "epoch": 0.18529148614803437, + "grad_norm": 0.3529542088508606, + "learning_rate": 9.2643663417137e-06, + "loss": 0.0072, + "step": 28890 + }, + { + "epoch": 0.18535562304182046, + "grad_norm": 0.5061513781547546, + "learning_rate": 9.267573114417651e-06, + "loss": 0.0049, + "step": 28900 + }, + { + "epoch": 0.18541975993560655, + "grad_norm": 0.4316900074481964, + "learning_rate": 9.2707798871216e-06, + "loss": 0.0083, + "step": 28910 + }, + { + "epoch": 0.18548389682939265, + "grad_norm": 0.6369470953941345, + "learning_rate": 9.273986659825552e-06, + "loss": 0.0096, + "step": 28920 + }, + { + "epoch": 0.18554803372317874, + "grad_norm": 0.25403037667274475, + "learning_rate": 9.277193432529503e-06, + "loss": 0.0107, + "step": 28930 + }, + { + "epoch": 0.18561217061696486, + "grad_norm": 0.278654545545578, + "learning_rate": 9.280400205233454e-06, + "loss": 0.01, + "step": 28940 + }, + { + "epoch": 0.18567630751075095, + "grad_norm": 0.20908258855342865, + "learning_rate": 9.283606977937405e-06, + "loss": 0.0067, + "step": 28950 + }, + { + "epoch": 0.18574044440453705, + "grad_norm": 0.23901280760765076, + "learning_rate": 9.286813750641356e-06, + "loss": 0.0064, + "step": 28960 + }, + { + "epoch": 0.18580458129832314, + "grad_norm": 0.4733389616012573, + "learning_rate": 9.290020523345306e-06, + "loss": 0.0083, + "step": 28970 + }, + { + "epoch": 0.18586871819210923, + "grad_norm": 0.31443294882774353, + "learning_rate": 9.293227296049257e-06, + "loss": 0.0071, + "step": 28980 + }, + { + "epoch": 0.18593285508589533, + "grad_norm": 0.27634161710739136, + "learning_rate": 9.296434068753208e-06, + "loss": 0.0056, + "step": 28990 + }, + { + "epoch": 0.18599699197968142, + "grad_norm": 0.25937992334365845, + "learning_rate": 9.299640841457157e-06, + "loss": 0.0046, + "step": 29000 + }, + { + "epoch": 0.18606112887346754, + "grad_norm": 0.29781779646873474, + "learning_rate": 9.302847614161108e-06, + "loss": 0.0076, + "step": 29010 + }, + { + "epoch": 0.18612526576725363, + "grad_norm": 0.2678763270378113, + "learning_rate": 9.30605438686506e-06, + "loss": 0.0068, + "step": 29020 + }, + { + "epoch": 0.18618940266103973, + "grad_norm": 0.29163113236427307, + "learning_rate": 9.30926115956901e-06, + "loss": 0.0073, + "step": 29030 + }, + { + "epoch": 0.18625353955482582, + "grad_norm": 0.27826422452926636, + "learning_rate": 9.312467932272962e-06, + "loss": 0.0067, + "step": 29040 + }, + { + "epoch": 0.1863176764486119, + "grad_norm": 0.36537352204322815, + "learning_rate": 9.315674704976913e-06, + "loss": 0.0063, + "step": 29050 + }, + { + "epoch": 0.186381813342398, + "grad_norm": 0.5334638357162476, + "learning_rate": 9.318881477680862e-06, + "loss": 0.0057, + "step": 29060 + }, + { + "epoch": 0.1864459502361841, + "grad_norm": 0.453370600938797, + "learning_rate": 9.322088250384813e-06, + "loss": 0.0065, + "step": 29070 + }, + { + "epoch": 0.18651008712997022, + "grad_norm": 0.47658535838127136, + "learning_rate": 9.325295023088765e-06, + "loss": 0.0057, + "step": 29080 + }, + { + "epoch": 0.1865742240237563, + "grad_norm": 0.09743408113718033, + "learning_rate": 9.328501795792716e-06, + "loss": 0.0048, + "step": 29090 + }, + { + "epoch": 0.1866383609175424, + "grad_norm": 0.4015127420425415, + "learning_rate": 9.331708568496665e-06, + "loss": 0.0069, + "step": 29100 + }, + { + "epoch": 0.1867024978113285, + "grad_norm": 0.36822250485420227, + "learning_rate": 9.334915341200616e-06, + "loss": 0.0062, + "step": 29110 + }, + { + "epoch": 0.1867666347051146, + "grad_norm": 0.26752158999443054, + "learning_rate": 9.338122113904567e-06, + "loss": 0.0039, + "step": 29120 + }, + { + "epoch": 0.18683077159890069, + "grad_norm": 0.1569271683692932, + "learning_rate": 9.341328886608518e-06, + "loss": 0.0067, + "step": 29130 + }, + { + "epoch": 0.18689490849268678, + "grad_norm": 0.11092264950275421, + "learning_rate": 9.34453565931247e-06, + "loss": 0.0049, + "step": 29140 + }, + { + "epoch": 0.1869590453864729, + "grad_norm": 0.20999155938625336, + "learning_rate": 9.347742432016419e-06, + "loss": 0.0071, + "step": 29150 + }, + { + "epoch": 0.187023182280259, + "grad_norm": 0.22878888249397278, + "learning_rate": 9.35094920472037e-06, + "loss": 0.0057, + "step": 29160 + }, + { + "epoch": 0.18708731917404509, + "grad_norm": 0.5292526483535767, + "learning_rate": 9.354155977424321e-06, + "loss": 0.0099, + "step": 29170 + }, + { + "epoch": 0.18715145606783118, + "grad_norm": 0.250640869140625, + "learning_rate": 9.357362750128272e-06, + "loss": 0.0047, + "step": 29180 + }, + { + "epoch": 0.18721559296161727, + "grad_norm": 0.4628705680370331, + "learning_rate": 9.360569522832222e-06, + "loss": 0.0109, + "step": 29190 + }, + { + "epoch": 0.18727972985540337, + "grad_norm": 0.08864600211381912, + "learning_rate": 9.363776295536173e-06, + "loss": 0.0052, + "step": 29200 + }, + { + "epoch": 0.18734386674918946, + "grad_norm": 0.4074329137802124, + "learning_rate": 9.366983068240124e-06, + "loss": 0.0042, + "step": 29210 + }, + { + "epoch": 0.18740800364297558, + "grad_norm": 0.08895924687385559, + "learning_rate": 9.370189840944075e-06, + "loss": 0.0045, + "step": 29220 + }, + { + "epoch": 0.18747214053676167, + "grad_norm": 0.39348146319389343, + "learning_rate": 9.373396613648026e-06, + "loss": 0.0092, + "step": 29230 + }, + { + "epoch": 0.18753627743054777, + "grad_norm": 0.7161470055580139, + "learning_rate": 9.376603386351976e-06, + "loss": 0.0056, + "step": 29240 + }, + { + "epoch": 0.18760041432433386, + "grad_norm": 0.2930874526500702, + "learning_rate": 9.379810159055927e-06, + "loss": 0.0089, + "step": 29250 + }, + { + "epoch": 0.18766455121811995, + "grad_norm": 0.47388607263565063, + "learning_rate": 9.383016931759878e-06, + "loss": 0.0089, + "step": 29260 + }, + { + "epoch": 0.18772868811190604, + "grad_norm": 0.4754874110221863, + "learning_rate": 9.386223704463829e-06, + "loss": 0.0053, + "step": 29270 + }, + { + "epoch": 0.18779282500569214, + "grad_norm": 0.026910239830613136, + "learning_rate": 9.389430477167778e-06, + "loss": 0.0066, + "step": 29280 + }, + { + "epoch": 0.18785696189947826, + "grad_norm": 0.26451611518859863, + "learning_rate": 9.39263724987173e-06, + "loss": 0.0115, + "step": 29290 + }, + { + "epoch": 0.18792109879326435, + "grad_norm": 0.36335819959640503, + "learning_rate": 9.39584402257568e-06, + "loss": 0.0112, + "step": 29300 + }, + { + "epoch": 0.18798523568705044, + "grad_norm": 0.3709248900413513, + "learning_rate": 9.399050795279632e-06, + "loss": 0.0073, + "step": 29310 + }, + { + "epoch": 0.18804937258083654, + "grad_norm": 0.6065011620521545, + "learning_rate": 9.402257567983583e-06, + "loss": 0.0063, + "step": 29320 + }, + { + "epoch": 0.18811350947462263, + "grad_norm": 0.5511764883995056, + "learning_rate": 9.405464340687532e-06, + "loss": 0.0054, + "step": 29330 + }, + { + "epoch": 0.18817764636840872, + "grad_norm": 0.5607746839523315, + "learning_rate": 9.408671113391483e-06, + "loss": 0.0048, + "step": 29340 + }, + { + "epoch": 0.18824178326219482, + "grad_norm": 0.1629454493522644, + "learning_rate": 9.411877886095434e-06, + "loss": 0.0062, + "step": 29350 + }, + { + "epoch": 0.18830592015598094, + "grad_norm": 0.114433154463768, + "learning_rate": 9.415084658799386e-06, + "loss": 0.0087, + "step": 29360 + }, + { + "epoch": 0.18837005704976703, + "grad_norm": 0.21146710216999054, + "learning_rate": 9.418291431503335e-06, + "loss": 0.0073, + "step": 29370 + }, + { + "epoch": 0.18843419394355312, + "grad_norm": 0.28075212240219116, + "learning_rate": 9.421498204207286e-06, + "loss": 0.0075, + "step": 29380 + }, + { + "epoch": 0.18849833083733922, + "grad_norm": 0.6241480708122253, + "learning_rate": 9.424704976911237e-06, + "loss": 0.0093, + "step": 29390 + }, + { + "epoch": 0.1885624677311253, + "grad_norm": 0.4124491810798645, + "learning_rate": 9.427911749615188e-06, + "loss": 0.0053, + "step": 29400 + }, + { + "epoch": 0.1886266046249114, + "grad_norm": 0.2754786014556885, + "learning_rate": 9.43111852231914e-06, + "loss": 0.0062, + "step": 29410 + }, + { + "epoch": 0.1886907415186975, + "grad_norm": 0.566390872001648, + "learning_rate": 9.434325295023089e-06, + "loss": 0.0103, + "step": 29420 + }, + { + "epoch": 0.18875487841248362, + "grad_norm": 0.538544774055481, + "learning_rate": 9.43753206772704e-06, + "loss": 0.0083, + "step": 29430 + }, + { + "epoch": 0.1888190153062697, + "grad_norm": 0.2464735358953476, + "learning_rate": 9.440738840430991e-06, + "loss": 0.0056, + "step": 29440 + }, + { + "epoch": 0.1888831522000558, + "grad_norm": 0.17093630135059357, + "learning_rate": 9.443945613134942e-06, + "loss": 0.005, + "step": 29450 + }, + { + "epoch": 0.1889472890938419, + "grad_norm": 0.6086510419845581, + "learning_rate": 9.447152385838893e-06, + "loss": 0.0076, + "step": 29460 + }, + { + "epoch": 0.189011425987628, + "grad_norm": 0.3101136088371277, + "learning_rate": 9.450359158542843e-06, + "loss": 0.0062, + "step": 29470 + }, + { + "epoch": 0.18907556288141408, + "grad_norm": 0.18571898341178894, + "learning_rate": 9.453565931246794e-06, + "loss": 0.005, + "step": 29480 + }, + { + "epoch": 0.18913969977520018, + "grad_norm": 0.14237219095230103, + "learning_rate": 9.456772703950745e-06, + "loss": 0.0049, + "step": 29490 + }, + { + "epoch": 0.1892038366689863, + "grad_norm": 0.22469183802604675, + "learning_rate": 9.459979476654696e-06, + "loss": 0.0111, + "step": 29500 + }, + { + "epoch": 0.1892679735627724, + "grad_norm": 0.08391445875167847, + "learning_rate": 9.463186249358645e-06, + "loss": 0.0087, + "step": 29510 + }, + { + "epoch": 0.18933211045655848, + "grad_norm": 0.3000529110431671, + "learning_rate": 9.466393022062597e-06, + "loss": 0.0062, + "step": 29520 + }, + { + "epoch": 0.18939624735034458, + "grad_norm": 0.3420923352241516, + "learning_rate": 9.469599794766548e-06, + "loss": 0.0078, + "step": 29530 + }, + { + "epoch": 0.18946038424413067, + "grad_norm": 0.38952866196632385, + "learning_rate": 9.472806567470499e-06, + "loss": 0.0116, + "step": 29540 + }, + { + "epoch": 0.18952452113791676, + "grad_norm": 0.39586111903190613, + "learning_rate": 9.47601334017445e-06, + "loss": 0.0068, + "step": 29550 + }, + { + "epoch": 0.18958865803170286, + "grad_norm": 0.26339074969291687, + "learning_rate": 9.4792201128784e-06, + "loss": 0.0049, + "step": 29560 + }, + { + "epoch": 0.18965279492548898, + "grad_norm": 0.2637289762496948, + "learning_rate": 9.48242688558235e-06, + "loss": 0.0044, + "step": 29570 + }, + { + "epoch": 0.18971693181927507, + "grad_norm": 0.28041067719459534, + "learning_rate": 9.485633658286302e-06, + "loss": 0.0051, + "step": 29580 + }, + { + "epoch": 0.18978106871306116, + "grad_norm": 0.30262765288352966, + "learning_rate": 9.488840430990253e-06, + "loss": 0.0122, + "step": 29590 + }, + { + "epoch": 0.18984520560684726, + "grad_norm": 0.27783453464508057, + "learning_rate": 9.492047203694202e-06, + "loss": 0.0102, + "step": 29600 + }, + { + "epoch": 0.18990934250063335, + "grad_norm": 0.3596213161945343, + "learning_rate": 9.495253976398153e-06, + "loss": 0.0065, + "step": 29610 + }, + { + "epoch": 0.18997347939441944, + "grad_norm": 0.26507431268692017, + "learning_rate": 9.498460749102104e-06, + "loss": 0.0077, + "step": 29620 + }, + { + "epoch": 0.19003761628820554, + "grad_norm": 0.32735857367515564, + "learning_rate": 9.501667521806055e-06, + "loss": 0.0063, + "step": 29630 + }, + { + "epoch": 0.19010175318199166, + "grad_norm": 0.4577884376049042, + "learning_rate": 9.504874294510007e-06, + "loss": 0.0086, + "step": 29640 + }, + { + "epoch": 0.19016589007577775, + "grad_norm": 0.48929426074028015, + "learning_rate": 9.508081067213956e-06, + "loss": 0.0059, + "step": 29650 + }, + { + "epoch": 0.19023002696956384, + "grad_norm": 0.3111748993396759, + "learning_rate": 9.511287839917907e-06, + "loss": 0.0054, + "step": 29660 + }, + { + "epoch": 0.19029416386334994, + "grad_norm": 0.3823695778846741, + "learning_rate": 9.514494612621858e-06, + "loss": 0.0117, + "step": 29670 + }, + { + "epoch": 0.19035830075713603, + "grad_norm": 0.11964625865221024, + "learning_rate": 9.51770138532581e-06, + "loss": 0.0058, + "step": 29680 + }, + { + "epoch": 0.19042243765092212, + "grad_norm": 0.16966375708580017, + "learning_rate": 9.520908158029759e-06, + "loss": 0.0085, + "step": 29690 + }, + { + "epoch": 0.19048657454470821, + "grad_norm": 0.5360361933708191, + "learning_rate": 9.52411493073371e-06, + "loss": 0.0117, + "step": 29700 + }, + { + "epoch": 0.19055071143849434, + "grad_norm": 0.3585972785949707, + "learning_rate": 9.527321703437661e-06, + "loss": 0.0061, + "step": 29710 + }, + { + "epoch": 0.19061484833228043, + "grad_norm": 0.15603461861610413, + "learning_rate": 9.530528476141612e-06, + "loss": 0.0041, + "step": 29720 + }, + { + "epoch": 0.19067898522606652, + "grad_norm": 0.3922441601753235, + "learning_rate": 9.533735248845563e-06, + "loss": 0.005, + "step": 29730 + }, + { + "epoch": 0.19074312211985262, + "grad_norm": 0.2955012619495392, + "learning_rate": 9.536942021549513e-06, + "loss": 0.0053, + "step": 29740 + }, + { + "epoch": 0.1908072590136387, + "grad_norm": 0.24606548249721527, + "learning_rate": 9.540148794253464e-06, + "loss": 0.0049, + "step": 29750 + }, + { + "epoch": 0.1908713959074248, + "grad_norm": 0.4470388889312744, + "learning_rate": 9.543355566957415e-06, + "loss": 0.007, + "step": 29760 + }, + { + "epoch": 0.1909355328012109, + "grad_norm": 0.30352315306663513, + "learning_rate": 9.546562339661366e-06, + "loss": 0.0083, + "step": 29770 + }, + { + "epoch": 0.19099966969499702, + "grad_norm": 0.439728707075119, + "learning_rate": 9.549769112365315e-06, + "loss": 0.0051, + "step": 29780 + }, + { + "epoch": 0.1910638065887831, + "grad_norm": 0.4757135510444641, + "learning_rate": 9.552975885069266e-06, + "loss": 0.0056, + "step": 29790 + }, + { + "epoch": 0.1911279434825692, + "grad_norm": 0.37213197350502014, + "learning_rate": 9.556182657773218e-06, + "loss": 0.0047, + "step": 29800 + }, + { + "epoch": 0.1911920803763553, + "grad_norm": 0.4364951252937317, + "learning_rate": 9.559389430477169e-06, + "loss": 0.0091, + "step": 29810 + }, + { + "epoch": 0.1912562172701414, + "grad_norm": 0.3021470606327057, + "learning_rate": 9.56259620318112e-06, + "loss": 0.0078, + "step": 29820 + }, + { + "epoch": 0.19132035416392748, + "grad_norm": 0.25977569818496704, + "learning_rate": 9.565802975885071e-06, + "loss": 0.0091, + "step": 29830 + }, + { + "epoch": 0.19138449105771357, + "grad_norm": 0.3275696039199829, + "learning_rate": 9.56900974858902e-06, + "loss": 0.0061, + "step": 29840 + }, + { + "epoch": 0.1914486279514997, + "grad_norm": 0.3866559565067291, + "learning_rate": 9.572216521292971e-06, + "loss": 0.0061, + "step": 29850 + }, + { + "epoch": 0.1915127648452858, + "grad_norm": 0.3434855341911316, + "learning_rate": 9.575423293996923e-06, + "loss": 0.0062, + "step": 29860 + }, + { + "epoch": 0.19157690173907188, + "grad_norm": 0.3796941637992859, + "learning_rate": 9.578630066700872e-06, + "loss": 0.011, + "step": 29870 + }, + { + "epoch": 0.19164103863285797, + "grad_norm": 0.6490646600723267, + "learning_rate": 9.581836839404823e-06, + "loss": 0.0081, + "step": 29880 + }, + { + "epoch": 0.19170517552664407, + "grad_norm": 0.2909601330757141, + "learning_rate": 9.585043612108774e-06, + "loss": 0.0072, + "step": 29890 + }, + { + "epoch": 0.19176931242043016, + "grad_norm": 0.44852450489997864, + "learning_rate": 9.588250384812725e-06, + "loss": 0.0079, + "step": 29900 + }, + { + "epoch": 0.19183344931421625, + "grad_norm": 0.6037708520889282, + "learning_rate": 9.591457157516676e-06, + "loss": 0.007, + "step": 29910 + }, + { + "epoch": 0.19189758620800235, + "grad_norm": 0.20274221897125244, + "learning_rate": 9.594663930220628e-06, + "loss": 0.007, + "step": 29920 + }, + { + "epoch": 0.19196172310178847, + "grad_norm": 0.2396126240491867, + "learning_rate": 9.597870702924577e-06, + "loss": 0.0062, + "step": 29930 + }, + { + "epoch": 0.19202585999557456, + "grad_norm": 0.50922691822052, + "learning_rate": 9.601077475628528e-06, + "loss": 0.0059, + "step": 29940 + }, + { + "epoch": 0.19208999688936065, + "grad_norm": 0.5943179130554199, + "learning_rate": 9.60428424833248e-06, + "loss": 0.0086, + "step": 29950 + }, + { + "epoch": 0.19215413378314675, + "grad_norm": 0.11651797592639923, + "learning_rate": 9.60749102103643e-06, + "loss": 0.0064, + "step": 29960 + }, + { + "epoch": 0.19221827067693284, + "grad_norm": 0.3450041711330414, + "learning_rate": 9.61069779374038e-06, + "loss": 0.0077, + "step": 29970 + }, + { + "epoch": 0.19228240757071893, + "grad_norm": 0.3452431261539459, + "learning_rate": 9.61390456644433e-06, + "loss": 0.0077, + "step": 29980 + }, + { + "epoch": 0.19234654446450503, + "grad_norm": 0.5599872469902039, + "learning_rate": 9.617111339148282e-06, + "loss": 0.0062, + "step": 29990 + }, + { + "epoch": 0.19241068135829115, + "grad_norm": 0.11537335813045502, + "learning_rate": 9.620318111852233e-06, + "loss": 0.0053, + "step": 30000 + }, + { + "epoch": 0.19247481825207724, + "grad_norm": 0.2817453444004059, + "learning_rate": 9.623524884556184e-06, + "loss": 0.0092, + "step": 30010 + }, + { + "epoch": 0.19253895514586333, + "grad_norm": 0.37333157658576965, + "learning_rate": 9.626731657260134e-06, + "loss": 0.0106, + "step": 30020 + }, + { + "epoch": 0.19260309203964943, + "grad_norm": 0.45478856563568115, + "learning_rate": 9.629938429964085e-06, + "loss": 0.0055, + "step": 30030 + }, + { + "epoch": 0.19266722893343552, + "grad_norm": 0.2714729905128479, + "learning_rate": 9.633145202668036e-06, + "loss": 0.0063, + "step": 30040 + }, + { + "epoch": 0.1927313658272216, + "grad_norm": 0.2567165791988373, + "learning_rate": 9.636351975371987e-06, + "loss": 0.0088, + "step": 30050 + }, + { + "epoch": 0.1927955027210077, + "grad_norm": 0.24161703884601593, + "learning_rate": 9.639558748075936e-06, + "loss": 0.0078, + "step": 30060 + }, + { + "epoch": 0.19285963961479383, + "grad_norm": 0.5712881088256836, + "learning_rate": 9.642765520779887e-06, + "loss": 0.0061, + "step": 30070 + }, + { + "epoch": 0.19292377650857992, + "grad_norm": 0.4481494426727295, + "learning_rate": 9.645972293483839e-06, + "loss": 0.0061, + "step": 30080 + }, + { + "epoch": 0.192987913402366, + "grad_norm": 0.41341614723205566, + "learning_rate": 9.64917906618779e-06, + "loss": 0.006, + "step": 30090 + }, + { + "epoch": 0.1930520502961521, + "grad_norm": 0.5246740579605103, + "learning_rate": 9.65238583889174e-06, + "loss": 0.0068, + "step": 30100 + }, + { + "epoch": 0.1931161871899382, + "grad_norm": 0.271650493144989, + "learning_rate": 9.65559261159569e-06, + "loss": 0.0087, + "step": 30110 + }, + { + "epoch": 0.1931803240837243, + "grad_norm": 0.32311326265335083, + "learning_rate": 9.658799384299641e-06, + "loss": 0.0053, + "step": 30120 + }, + { + "epoch": 0.19324446097751038, + "grad_norm": 0.48358315229415894, + "learning_rate": 9.662006157003592e-06, + "loss": 0.0072, + "step": 30130 + }, + { + "epoch": 0.1933085978712965, + "grad_norm": 0.2274123579263687, + "learning_rate": 9.665212929707544e-06, + "loss": 0.0061, + "step": 30140 + }, + { + "epoch": 0.1933727347650826, + "grad_norm": 0.6029508709907532, + "learning_rate": 9.668419702411493e-06, + "loss": 0.0095, + "step": 30150 + }, + { + "epoch": 0.1934368716588687, + "grad_norm": 0.6068460941314697, + "learning_rate": 9.671626475115444e-06, + "loss": 0.0048, + "step": 30160 + }, + { + "epoch": 0.19350100855265479, + "grad_norm": 0.16866932809352875, + "learning_rate": 9.674833247819395e-06, + "loss": 0.0098, + "step": 30170 + }, + { + "epoch": 0.19356514544644088, + "grad_norm": 0.5950133204460144, + "learning_rate": 9.678040020523346e-06, + "loss": 0.0102, + "step": 30180 + }, + { + "epoch": 0.19362928234022697, + "grad_norm": 0.18013131618499756, + "learning_rate": 9.681246793227297e-06, + "loss": 0.0121, + "step": 30190 + }, + { + "epoch": 0.19369341923401306, + "grad_norm": 0.5216000080108643, + "learning_rate": 9.684453565931249e-06, + "loss": 0.0082, + "step": 30200 + }, + { + "epoch": 0.19375755612779919, + "grad_norm": 0.22303879261016846, + "learning_rate": 9.687660338635198e-06, + "loss": 0.0071, + "step": 30210 + }, + { + "epoch": 0.19382169302158528, + "grad_norm": 0.3154642581939697, + "learning_rate": 9.690867111339149e-06, + "loss": 0.0093, + "step": 30220 + }, + { + "epoch": 0.19388582991537137, + "grad_norm": 0.24289870262145996, + "learning_rate": 9.6940738840431e-06, + "loss": 0.006, + "step": 30230 + }, + { + "epoch": 0.19394996680915746, + "grad_norm": 0.32565346360206604, + "learning_rate": 9.69728065674705e-06, + "loss": 0.0088, + "step": 30240 + }, + { + "epoch": 0.19401410370294356, + "grad_norm": 0.2683010399341583, + "learning_rate": 9.700487429451e-06, + "loss": 0.005, + "step": 30250 + }, + { + "epoch": 0.19407824059672965, + "grad_norm": 0.26952335238456726, + "learning_rate": 9.703694202154952e-06, + "loss": 0.0058, + "step": 30260 + }, + { + "epoch": 0.19414237749051574, + "grad_norm": 0.25967660546302795, + "learning_rate": 9.706900974858903e-06, + "loss": 0.0062, + "step": 30270 + }, + { + "epoch": 0.19420651438430186, + "grad_norm": 0.15537609159946442, + "learning_rate": 9.710107747562854e-06, + "loss": 0.005, + "step": 30280 + }, + { + "epoch": 0.19427065127808796, + "grad_norm": 0.4608062505722046, + "learning_rate": 9.713314520266805e-06, + "loss": 0.0074, + "step": 30290 + }, + { + "epoch": 0.19433478817187405, + "grad_norm": 0.4129866063594818, + "learning_rate": 9.716521292970755e-06, + "loss": 0.0102, + "step": 30300 + }, + { + "epoch": 0.19439892506566014, + "grad_norm": 0.5129449367523193, + "learning_rate": 9.719728065674706e-06, + "loss": 0.0077, + "step": 30310 + }, + { + "epoch": 0.19446306195944624, + "grad_norm": 0.33677932620048523, + "learning_rate": 9.722934838378657e-06, + "loss": 0.0083, + "step": 30320 + }, + { + "epoch": 0.19452719885323233, + "grad_norm": 0.8424015641212463, + "learning_rate": 9.726141611082606e-06, + "loss": 0.0074, + "step": 30330 + }, + { + "epoch": 0.19459133574701842, + "grad_norm": 0.29892176389694214, + "learning_rate": 9.729348383786557e-06, + "loss": 0.0074, + "step": 30340 + }, + { + "epoch": 0.19465547264080454, + "grad_norm": 0.2060842663049698, + "learning_rate": 9.732555156490508e-06, + "loss": 0.007, + "step": 30350 + }, + { + "epoch": 0.19471960953459064, + "grad_norm": 1.0599238872528076, + "learning_rate": 9.73576192919446e-06, + "loss": 0.0091, + "step": 30360 + }, + { + "epoch": 0.19478374642837673, + "grad_norm": 0.1715250313282013, + "learning_rate": 9.73896870189841e-06, + "loss": 0.0114, + "step": 30370 + }, + { + "epoch": 0.19484788332216282, + "grad_norm": 0.2989075481891632, + "learning_rate": 9.742175474602362e-06, + "loss": 0.0059, + "step": 30380 + }, + { + "epoch": 0.19491202021594892, + "grad_norm": 0.34631791710853577, + "learning_rate": 9.745382247306311e-06, + "loss": 0.007, + "step": 30390 + }, + { + "epoch": 0.194976157109735, + "grad_norm": 0.15520182251930237, + "learning_rate": 9.748589020010262e-06, + "loss": 0.005, + "step": 30400 + }, + { + "epoch": 0.1950402940035211, + "grad_norm": 0.6479896306991577, + "learning_rate": 9.751795792714213e-06, + "loss": 0.0051, + "step": 30410 + }, + { + "epoch": 0.19510443089730722, + "grad_norm": 0.38978007435798645, + "learning_rate": 9.755002565418163e-06, + "loss": 0.0062, + "step": 30420 + }, + { + "epoch": 0.19516856779109332, + "grad_norm": 0.29684966802597046, + "learning_rate": 9.758209338122114e-06, + "loss": 0.0044, + "step": 30430 + }, + { + "epoch": 0.1952327046848794, + "grad_norm": 0.61577969789505, + "learning_rate": 9.761416110826065e-06, + "loss": 0.008, + "step": 30440 + }, + { + "epoch": 0.1952968415786655, + "grad_norm": 0.2743522822856903, + "learning_rate": 9.764622883530016e-06, + "loss": 0.0061, + "step": 30450 + }, + { + "epoch": 0.1953609784724516, + "grad_norm": 0.10397171974182129, + "learning_rate": 9.767829656233967e-06, + "loss": 0.0081, + "step": 30460 + }, + { + "epoch": 0.1954251153662377, + "grad_norm": 0.2834140956401825, + "learning_rate": 9.771036428937918e-06, + "loss": 0.01, + "step": 30470 + }, + { + "epoch": 0.19548925226002378, + "grad_norm": 0.191796213388443, + "learning_rate": 9.774243201641868e-06, + "loss": 0.0069, + "step": 30480 + }, + { + "epoch": 0.1955533891538099, + "grad_norm": 0.1656953990459442, + "learning_rate": 9.777449974345819e-06, + "loss": 0.0064, + "step": 30490 + }, + { + "epoch": 0.195617526047596, + "grad_norm": 0.2885013222694397, + "learning_rate": 9.78065674704977e-06, + "loss": 0.0094, + "step": 30500 + }, + { + "epoch": 0.1956816629413821, + "grad_norm": 0.41713154315948486, + "learning_rate": 9.78386351975372e-06, + "loss": 0.0071, + "step": 30510 + }, + { + "epoch": 0.19574579983516818, + "grad_norm": 0.3589705228805542, + "learning_rate": 9.78707029245767e-06, + "loss": 0.0098, + "step": 30520 + }, + { + "epoch": 0.19580993672895428, + "grad_norm": 0.19352507591247559, + "learning_rate": 9.790277065161622e-06, + "loss": 0.0074, + "step": 30530 + }, + { + "epoch": 0.19587407362274037, + "grad_norm": 0.8712581396102905, + "learning_rate": 9.793483837865573e-06, + "loss": 0.009, + "step": 30540 + }, + { + "epoch": 0.19593821051652646, + "grad_norm": 0.21297553181648254, + "learning_rate": 9.796690610569524e-06, + "loss": 0.0128, + "step": 30550 + }, + { + "epoch": 0.19600234741031258, + "grad_norm": 0.40038996934890747, + "learning_rate": 9.799897383273475e-06, + "loss": 0.0078, + "step": 30560 + }, + { + "epoch": 0.19606648430409868, + "grad_norm": 0.4325382113456726, + "learning_rate": 9.803104155977426e-06, + "loss": 0.0099, + "step": 30570 + }, + { + "epoch": 0.19613062119788477, + "grad_norm": 0.19598670303821564, + "learning_rate": 9.806310928681376e-06, + "loss": 0.01, + "step": 30580 + }, + { + "epoch": 0.19619475809167086, + "grad_norm": 0.27098730206489563, + "learning_rate": 9.809517701385327e-06, + "loss": 0.0053, + "step": 30590 + }, + { + "epoch": 0.19625889498545696, + "grad_norm": 0.1887778639793396, + "learning_rate": 9.812724474089276e-06, + "loss": 0.0066, + "step": 30600 + }, + { + "epoch": 0.19632303187924305, + "grad_norm": 0.4308469593524933, + "learning_rate": 9.815931246793227e-06, + "loss": 0.0082, + "step": 30610 + }, + { + "epoch": 0.19638716877302914, + "grad_norm": 0.2181924432516098, + "learning_rate": 9.819138019497178e-06, + "loss": 0.0076, + "step": 30620 + }, + { + "epoch": 0.19645130566681526, + "grad_norm": 0.38740473985671997, + "learning_rate": 9.82234479220113e-06, + "loss": 0.0061, + "step": 30630 + }, + { + "epoch": 0.19651544256060136, + "grad_norm": 0.36458855867385864, + "learning_rate": 9.82555156490508e-06, + "loss": 0.0055, + "step": 30640 + }, + { + "epoch": 0.19657957945438745, + "grad_norm": 0.21326607465744019, + "learning_rate": 9.828758337609032e-06, + "loss": 0.0071, + "step": 30650 + }, + { + "epoch": 0.19664371634817354, + "grad_norm": 0.2693590819835663, + "learning_rate": 9.831965110312983e-06, + "loss": 0.0074, + "step": 30660 + }, + { + "epoch": 0.19670785324195963, + "grad_norm": 0.3972683250904083, + "learning_rate": 9.835171883016932e-06, + "loss": 0.0054, + "step": 30670 + }, + { + "epoch": 0.19677199013574573, + "grad_norm": 0.23552118241786957, + "learning_rate": 9.838378655720883e-06, + "loss": 0.0067, + "step": 30680 + }, + { + "epoch": 0.19683612702953182, + "grad_norm": 0.18564055860042572, + "learning_rate": 9.841585428424833e-06, + "loss": 0.0116, + "step": 30690 + }, + { + "epoch": 0.19690026392331794, + "grad_norm": 0.2473290115594864, + "learning_rate": 9.844792201128784e-06, + "loss": 0.0068, + "step": 30700 + }, + { + "epoch": 0.19696440081710404, + "grad_norm": 0.13441959023475647, + "learning_rate": 9.847998973832735e-06, + "loss": 0.0057, + "step": 30710 + }, + { + "epoch": 0.19702853771089013, + "grad_norm": 0.24626748263835907, + "learning_rate": 9.851205746536686e-06, + "loss": 0.0061, + "step": 30720 + }, + { + "epoch": 0.19709267460467622, + "grad_norm": 0.2407292276620865, + "learning_rate": 9.854412519240637e-06, + "loss": 0.0091, + "step": 30730 + }, + { + "epoch": 0.19715681149846231, + "grad_norm": 0.31166961789131165, + "learning_rate": 9.857619291944588e-06, + "loss": 0.0081, + "step": 30740 + }, + { + "epoch": 0.1972209483922484, + "grad_norm": 0.18418951332569122, + "learning_rate": 9.86082606464854e-06, + "loss": 0.0063, + "step": 30750 + }, + { + "epoch": 0.1972850852860345, + "grad_norm": 0.5213150382041931, + "learning_rate": 9.864032837352489e-06, + "loss": 0.0039, + "step": 30760 + }, + { + "epoch": 0.19734922217982062, + "grad_norm": 0.657940685749054, + "learning_rate": 9.86723961005644e-06, + "loss": 0.0064, + "step": 30770 + }, + { + "epoch": 0.19741335907360671, + "grad_norm": 0.24551035463809967, + "learning_rate": 9.87044638276039e-06, + "loss": 0.0088, + "step": 30780 + }, + { + "epoch": 0.1974774959673928, + "grad_norm": 0.31944477558135986, + "learning_rate": 9.87365315546434e-06, + "loss": 0.0087, + "step": 30790 + }, + { + "epoch": 0.1975416328611789, + "grad_norm": 0.6363940834999084, + "learning_rate": 9.876859928168292e-06, + "loss": 0.0073, + "step": 30800 + }, + { + "epoch": 0.197605769754965, + "grad_norm": 0.37719208002090454, + "learning_rate": 9.880066700872243e-06, + "loss": 0.0061, + "step": 30810 + }, + { + "epoch": 0.1976699066487511, + "grad_norm": 0.09619004279375076, + "learning_rate": 9.883273473576194e-06, + "loss": 0.007, + "step": 30820 + }, + { + "epoch": 0.19773404354253718, + "grad_norm": 0.48498639464378357, + "learning_rate": 9.886480246280145e-06, + "loss": 0.0068, + "step": 30830 + }, + { + "epoch": 0.1977981804363233, + "grad_norm": 0.96433025598526, + "learning_rate": 9.889687018984096e-06, + "loss": 0.0084, + "step": 30840 + }, + { + "epoch": 0.1978623173301094, + "grad_norm": 0.48657941818237305, + "learning_rate": 9.892893791688045e-06, + "loss": 0.0089, + "step": 30850 + }, + { + "epoch": 0.1979264542238955, + "grad_norm": 0.5844976902008057, + "learning_rate": 9.896100564391997e-06, + "loss": 0.007, + "step": 30860 + }, + { + "epoch": 0.19799059111768158, + "grad_norm": 0.34406018257141113, + "learning_rate": 9.899307337095948e-06, + "loss": 0.0055, + "step": 30870 + }, + { + "epoch": 0.19805472801146767, + "grad_norm": 0.481023907661438, + "learning_rate": 9.902514109799897e-06, + "loss": 0.01, + "step": 30880 + }, + { + "epoch": 0.19811886490525377, + "grad_norm": 0.11957148462533951, + "learning_rate": 9.905720882503848e-06, + "loss": 0.0095, + "step": 30890 + }, + { + "epoch": 0.19818300179903986, + "grad_norm": 0.5999955534934998, + "learning_rate": 9.9089276552078e-06, + "loss": 0.0078, + "step": 30900 + }, + { + "epoch": 0.19824713869282598, + "grad_norm": 0.3113568127155304, + "learning_rate": 9.91213442791175e-06, + "loss": 0.0061, + "step": 30910 + }, + { + "epoch": 0.19831127558661207, + "grad_norm": 0.39044150710105896, + "learning_rate": 9.915341200615702e-06, + "loss": 0.0054, + "step": 30920 + }, + { + "epoch": 0.19837541248039817, + "grad_norm": 0.5722967386245728, + "learning_rate": 9.918547973319653e-06, + "loss": 0.009, + "step": 30930 + }, + { + "epoch": 0.19843954937418426, + "grad_norm": 0.35687652230262756, + "learning_rate": 9.921754746023604e-06, + "loss": 0.0052, + "step": 30940 + }, + { + "epoch": 0.19850368626797035, + "grad_norm": 0.5795350074768066, + "learning_rate": 9.924961518727553e-06, + "loss": 0.0059, + "step": 30950 + }, + { + "epoch": 0.19856782316175645, + "grad_norm": 0.4220397174358368, + "learning_rate": 9.928168291431504e-06, + "loss": 0.0104, + "step": 30960 + }, + { + "epoch": 0.19863196005554254, + "grad_norm": 0.1680145561695099, + "learning_rate": 9.931375064135454e-06, + "loss": 0.0064, + "step": 30970 + }, + { + "epoch": 0.19869609694932866, + "grad_norm": 0.4890241026878357, + "learning_rate": 9.934581836839405e-06, + "loss": 0.0074, + "step": 30980 + }, + { + "epoch": 0.19876023384311475, + "grad_norm": 0.1341424286365509, + "learning_rate": 9.937788609543356e-06, + "loss": 0.0068, + "step": 30990 + }, + { + "epoch": 0.19882437073690085, + "grad_norm": 0.29779118299484253, + "learning_rate": 9.940995382247307e-06, + "loss": 0.0042, + "step": 31000 + }, + { + "epoch": 0.19888850763068694, + "grad_norm": 0.1636631190776825, + "learning_rate": 9.944202154951258e-06, + "loss": 0.0062, + "step": 31010 + }, + { + "epoch": 0.19895264452447303, + "grad_norm": 0.18600665032863617, + "learning_rate": 9.94740892765521e-06, + "loss": 0.0055, + "step": 31020 + }, + { + "epoch": 0.19901678141825913, + "grad_norm": 0.5404052734375, + "learning_rate": 9.95061570035916e-06, + "loss": 0.0066, + "step": 31030 + }, + { + "epoch": 0.19908091831204522, + "grad_norm": 0.12097226083278656, + "learning_rate": 9.95382247306311e-06, + "loss": 0.0035, + "step": 31040 + }, + { + "epoch": 0.19914505520583134, + "grad_norm": 0.14039276540279388, + "learning_rate": 9.957029245767061e-06, + "loss": 0.0076, + "step": 31050 + }, + { + "epoch": 0.19920919209961743, + "grad_norm": 0.3843585252761841, + "learning_rate": 9.96023601847101e-06, + "loss": 0.007, + "step": 31060 + }, + { + "epoch": 0.19927332899340353, + "grad_norm": 0.3972879648208618, + "learning_rate": 9.963442791174962e-06, + "loss": 0.0053, + "step": 31070 + }, + { + "epoch": 0.19933746588718962, + "grad_norm": 0.3833004832267761, + "learning_rate": 9.966649563878913e-06, + "loss": 0.0063, + "step": 31080 + }, + { + "epoch": 0.1994016027809757, + "grad_norm": 0.37778857350349426, + "learning_rate": 9.969856336582864e-06, + "loss": 0.0059, + "step": 31090 + }, + { + "epoch": 0.1994657396747618, + "grad_norm": 0.3843926191329956, + "learning_rate": 9.973063109286815e-06, + "loss": 0.0098, + "step": 31100 + }, + { + "epoch": 0.1995298765685479, + "grad_norm": 0.2783735990524292, + "learning_rate": 9.976269881990766e-06, + "loss": 0.0097, + "step": 31110 + }, + { + "epoch": 0.19959401346233402, + "grad_norm": 0.33093342185020447, + "learning_rate": 9.979476654694717e-06, + "loss": 0.0062, + "step": 31120 + }, + { + "epoch": 0.1996581503561201, + "grad_norm": 0.27166324853897095, + "learning_rate": 9.982683427398667e-06, + "loss": 0.0059, + "step": 31130 + }, + { + "epoch": 0.1997222872499062, + "grad_norm": 0.48899003863334656, + "learning_rate": 9.985890200102618e-06, + "loss": 0.0088, + "step": 31140 + }, + { + "epoch": 0.1997864241436923, + "grad_norm": 0.4287528991699219, + "learning_rate": 9.989096972806567e-06, + "loss": 0.0089, + "step": 31150 + }, + { + "epoch": 0.1998505610374784, + "grad_norm": 0.37305253744125366, + "learning_rate": 9.992303745510518e-06, + "loss": 0.0071, + "step": 31160 + }, + { + "epoch": 0.19991469793126448, + "grad_norm": 0.38936832547187805, + "learning_rate": 9.99551051821447e-06, + "loss": 0.0046, + "step": 31170 + }, + { + "epoch": 0.19997883482505058, + "grad_norm": 0.03365279734134674, + "learning_rate": 9.99871729091842e-06, + "loss": 0.0062, + "step": 31180 + }, + { + "epoch": 0.2000429717188367, + "grad_norm": 0.13471749424934387, + "learning_rate": 9.999999988722357e-06, + "loss": 0.0072, + "step": 31190 + }, + { + "epoch": 0.2001071086126228, + "grad_norm": 0.3005850911140442, + "learning_rate": 9.999999919803426e-06, + "loss": 0.0045, + "step": 31200 + }, + { + "epoch": 0.20017124550640888, + "grad_norm": 0.37838831543922424, + "learning_rate": 9.999999788230924e-06, + "loss": 0.0068, + "step": 31210 + }, + { + "epoch": 0.20023538240019498, + "grad_norm": 0.27392446994781494, + "learning_rate": 9.99999959400485e-06, + "loss": 0.0085, + "step": 31220 + }, + { + "epoch": 0.20029951929398107, + "grad_norm": 0.22249607741832733, + "learning_rate": 9.999999337125208e-06, + "loss": 0.0076, + "step": 31230 + }, + { + "epoch": 0.20036365618776716, + "grad_norm": 0.5627263188362122, + "learning_rate": 9.999999017592e-06, + "loss": 0.0081, + "step": 31240 + }, + { + "epoch": 0.20042779308155326, + "grad_norm": 0.32999005913734436, + "learning_rate": 9.999998635405232e-06, + "loss": 0.0076, + "step": 31250 + }, + { + "epoch": 0.20049192997533938, + "grad_norm": 0.21928158402442932, + "learning_rate": 9.999998190564907e-06, + "loss": 0.0093, + "step": 31260 + }, + { + "epoch": 0.20055606686912547, + "grad_norm": 0.3306523263454437, + "learning_rate": 9.99999768307103e-06, + "loss": 0.0075, + "step": 31270 + }, + { + "epoch": 0.20062020376291156, + "grad_norm": 0.2662363648414612, + "learning_rate": 9.999997112923611e-06, + "loss": 0.0052, + "step": 31280 + }, + { + "epoch": 0.20068434065669766, + "grad_norm": 0.2550363838672638, + "learning_rate": 9.999996480122654e-06, + "loss": 0.0078, + "step": 31290 + }, + { + "epoch": 0.20074847755048375, + "grad_norm": 0.530947208404541, + "learning_rate": 9.999995784668167e-06, + "loss": 0.0079, + "step": 31300 + }, + { + "epoch": 0.20081261444426984, + "grad_norm": 0.2493947297334671, + "learning_rate": 9.99999502656016e-06, + "loss": 0.007, + "step": 31310 + }, + { + "epoch": 0.20087675133805594, + "grad_norm": 0.3358502984046936, + "learning_rate": 9.999994205798643e-06, + "loss": 0.0078, + "step": 31320 + }, + { + "epoch": 0.20094088823184206, + "grad_norm": 0.2657833993434906, + "learning_rate": 9.999993322383621e-06, + "loss": 0.0082, + "step": 31330 + }, + { + "epoch": 0.20100502512562815, + "grad_norm": 0.9135074019432068, + "learning_rate": 9.999992376315113e-06, + "loss": 0.0065, + "step": 31340 + }, + { + "epoch": 0.20106916201941424, + "grad_norm": 0.47939422726631165, + "learning_rate": 9.999991367593127e-06, + "loss": 0.0084, + "step": 31350 + }, + { + "epoch": 0.20113329891320034, + "grad_norm": 0.21267169713974, + "learning_rate": 9.999990296217674e-06, + "loss": 0.0048, + "step": 31360 + }, + { + "epoch": 0.20119743580698643, + "grad_norm": 0.2891674339771271, + "learning_rate": 9.99998916218877e-06, + "loss": 0.0112, + "step": 31370 + }, + { + "epoch": 0.20126157270077252, + "grad_norm": 0.33775970339775085, + "learning_rate": 9.999987965506428e-06, + "loss": 0.0078, + "step": 31380 + }, + { + "epoch": 0.20132570959455862, + "grad_norm": 0.2934929430484772, + "learning_rate": 9.999986706170664e-06, + "loss": 0.0063, + "step": 31390 + }, + { + "epoch": 0.2013898464883447, + "grad_norm": 0.3903428912162781, + "learning_rate": 9.999985384181492e-06, + "loss": 0.0081, + "step": 31400 + }, + { + "epoch": 0.20145398338213083, + "grad_norm": 0.3547932207584381, + "learning_rate": 9.999983999538932e-06, + "loss": 0.0086, + "step": 31410 + }, + { + "epoch": 0.20151812027591692, + "grad_norm": 0.5304170846939087, + "learning_rate": 9.999982552242998e-06, + "loss": 0.0068, + "step": 31420 + }, + { + "epoch": 0.20158225716970302, + "grad_norm": 0.6028372645378113, + "learning_rate": 9.999981042293707e-06, + "loss": 0.008, + "step": 31430 + }, + { + "epoch": 0.2016463940634891, + "grad_norm": 0.06574447453022003, + "learning_rate": 9.999979469691082e-06, + "loss": 0.0045, + "step": 31440 + }, + { + "epoch": 0.2017105309572752, + "grad_norm": 0.35700806975364685, + "learning_rate": 9.999977834435141e-06, + "loss": 0.0063, + "step": 31450 + }, + { + "epoch": 0.2017746678510613, + "grad_norm": 0.1741345375776291, + "learning_rate": 9.999976136525904e-06, + "loss": 0.0059, + "step": 31460 + }, + { + "epoch": 0.2018388047448474, + "grad_norm": 0.3376474976539612, + "learning_rate": 9.999974375963393e-06, + "loss": 0.0071, + "step": 31470 + }, + { + "epoch": 0.2019029416386335, + "grad_norm": 0.33258095383644104, + "learning_rate": 9.999972552747629e-06, + "loss": 0.0098, + "step": 31480 + }, + { + "epoch": 0.2019670785324196, + "grad_norm": 0.5604947805404663, + "learning_rate": 9.999970666878635e-06, + "loss": 0.0068, + "step": 31490 + }, + { + "epoch": 0.2020312154262057, + "grad_norm": 0.2540450692176819, + "learning_rate": 9.999968718356437e-06, + "loss": 0.0036, + "step": 31500 + }, + { + "epoch": 0.2020953523199918, + "grad_norm": 0.21161718666553497, + "learning_rate": 9.999966707181055e-06, + "loss": 0.0088, + "step": 31510 + }, + { + "epoch": 0.20215948921377788, + "grad_norm": 0.5596034526824951, + "learning_rate": 9.999964633352519e-06, + "loss": 0.0082, + "step": 31520 + }, + { + "epoch": 0.20222362610756398, + "grad_norm": 0.3756974935531616, + "learning_rate": 9.999962496870852e-06, + "loss": 0.0276, + "step": 31530 + }, + { + "epoch": 0.20228776300135007, + "grad_norm": 0.3303465247154236, + "learning_rate": 9.99996029773608e-06, + "loss": 0.0067, + "step": 31540 + }, + { + "epoch": 0.2023518998951362, + "grad_norm": 0.07992622256278992, + "learning_rate": 9.999958035948233e-06, + "loss": 0.0086, + "step": 31550 + }, + { + "epoch": 0.20241603678892228, + "grad_norm": 0.4249248802661896, + "learning_rate": 9.999955711507338e-06, + "loss": 0.005, + "step": 31560 + }, + { + "epoch": 0.20248017368270838, + "grad_norm": 0.20567820966243744, + "learning_rate": 9.999953324413428e-06, + "loss": 0.004, + "step": 31570 + }, + { + "epoch": 0.20254431057649447, + "grad_norm": 0.8298195600509644, + "learning_rate": 9.999950874666525e-06, + "loss": 0.0086, + "step": 31580 + }, + { + "epoch": 0.20260844747028056, + "grad_norm": 0.2139284908771515, + "learning_rate": 9.999948362266666e-06, + "loss": 0.008, + "step": 31590 + }, + { + "epoch": 0.20267258436406665, + "grad_norm": 0.49156635999679565, + "learning_rate": 9.99994578721388e-06, + "loss": 0.006, + "step": 31600 + }, + { + "epoch": 0.20273672125785275, + "grad_norm": 0.177789106965065, + "learning_rate": 9.999943149508201e-06, + "loss": 0.005, + "step": 31610 + }, + { + "epoch": 0.20280085815163887, + "grad_norm": 0.4381600618362427, + "learning_rate": 9.999940449149659e-06, + "loss": 0.0076, + "step": 31620 + }, + { + "epoch": 0.20286499504542496, + "grad_norm": 0.242206871509552, + "learning_rate": 9.999937686138292e-06, + "loss": 0.0052, + "step": 31630 + }, + { + "epoch": 0.20292913193921105, + "grad_norm": 0.6824889779090881, + "learning_rate": 9.999934860474132e-06, + "loss": 0.0093, + "step": 31640 + }, + { + "epoch": 0.20299326883299715, + "grad_norm": 0.524763286113739, + "learning_rate": 9.999931972157214e-06, + "loss": 0.0063, + "step": 31650 + }, + { + "epoch": 0.20305740572678324, + "grad_norm": 0.39510250091552734, + "learning_rate": 9.999929021187575e-06, + "loss": 0.0048, + "step": 31660 + }, + { + "epoch": 0.20312154262056933, + "grad_norm": 0.3214946985244751, + "learning_rate": 9.999926007565253e-06, + "loss": 0.0096, + "step": 31670 + }, + { + "epoch": 0.20318567951435543, + "grad_norm": 0.38387608528137207, + "learning_rate": 9.999922931290282e-06, + "loss": 0.0054, + "step": 31680 + }, + { + "epoch": 0.20324981640814155, + "grad_norm": 0.4253934323787689, + "learning_rate": 9.999919792362708e-06, + "loss": 0.0068, + "step": 31690 + }, + { + "epoch": 0.20331395330192764, + "grad_norm": 0.2738664150238037, + "learning_rate": 9.999916590782561e-06, + "loss": 0.0054, + "step": 31700 + }, + { + "epoch": 0.20337809019571373, + "grad_norm": 0.2658179700374603, + "learning_rate": 9.999913326549888e-06, + "loss": 0.0054, + "step": 31710 + }, + { + "epoch": 0.20344222708949983, + "grad_norm": 0.5510397553443909, + "learning_rate": 9.999909999664727e-06, + "loss": 0.0082, + "step": 31720 + }, + { + "epoch": 0.20350636398328592, + "grad_norm": 0.17201349139213562, + "learning_rate": 9.999906610127119e-06, + "loss": 0.0081, + "step": 31730 + }, + { + "epoch": 0.203570500877072, + "grad_norm": 0.4930127263069153, + "learning_rate": 9.99990315793711e-06, + "loss": 0.0082, + "step": 31740 + }, + { + "epoch": 0.2036346377708581, + "grad_norm": 0.23746931552886963, + "learning_rate": 9.99989964309474e-06, + "loss": 0.0068, + "step": 31750 + }, + { + "epoch": 0.20369877466464423, + "grad_norm": 0.27894327044487, + "learning_rate": 9.999896065600054e-06, + "loss": 0.0062, + "step": 31760 + }, + { + "epoch": 0.20376291155843032, + "grad_norm": 0.28237441182136536, + "learning_rate": 9.999892425453098e-06, + "loss": 0.0094, + "step": 31770 + }, + { + "epoch": 0.2038270484522164, + "grad_norm": 0.41106197237968445, + "learning_rate": 9.999888722653917e-06, + "loss": 0.005, + "step": 31780 + }, + { + "epoch": 0.2038911853460025, + "grad_norm": 0.2211090624332428, + "learning_rate": 9.999884957202555e-06, + "loss": 0.0069, + "step": 31790 + }, + { + "epoch": 0.2039553222397886, + "grad_norm": 0.3440389633178711, + "learning_rate": 9.999881129099062e-06, + "loss": 0.0104, + "step": 31800 + }, + { + "epoch": 0.2040194591335747, + "grad_norm": 0.5909982323646545, + "learning_rate": 9.999877238343485e-06, + "loss": 0.009, + "step": 31810 + }, + { + "epoch": 0.2040835960273608, + "grad_norm": 0.4488292336463928, + "learning_rate": 9.999873284935873e-06, + "loss": 0.0091, + "step": 31820 + }, + { + "epoch": 0.2041477329211469, + "grad_norm": 0.3296765387058258, + "learning_rate": 9.999869268876275e-06, + "loss": 0.0045, + "step": 31830 + }, + { + "epoch": 0.204211869814933, + "grad_norm": 0.4224696457386017, + "learning_rate": 9.999865190164741e-06, + "loss": 0.0194, + "step": 31840 + }, + { + "epoch": 0.2042760067087191, + "grad_norm": 0.1356227546930313, + "learning_rate": 9.999861048801324e-06, + "loss": 0.008, + "step": 31850 + }, + { + "epoch": 0.2043401436025052, + "grad_norm": 0.8743406534194946, + "learning_rate": 9.999856844786076e-06, + "loss": 0.0065, + "step": 31860 + }, + { + "epoch": 0.20440428049629128, + "grad_norm": 0.707042932510376, + "learning_rate": 9.999852578119046e-06, + "loss": 0.0059, + "step": 31870 + }, + { + "epoch": 0.20446841739007737, + "grad_norm": 0.33693328499794006, + "learning_rate": 9.99984824880029e-06, + "loss": 0.0051, + "step": 31880 + }, + { + "epoch": 0.20453255428386347, + "grad_norm": 0.6988204717636108, + "learning_rate": 9.999843856829862e-06, + "loss": 0.0065, + "step": 31890 + }, + { + "epoch": 0.2045966911776496, + "grad_norm": 0.6744667291641235, + "learning_rate": 9.999839402207819e-06, + "loss": 0.0111, + "step": 31900 + }, + { + "epoch": 0.20466082807143568, + "grad_norm": 0.658195436000824, + "learning_rate": 9.999834884934214e-06, + "loss": 0.0048, + "step": 31910 + }, + { + "epoch": 0.20472496496522177, + "grad_norm": 0.3740374743938446, + "learning_rate": 9.999830305009104e-06, + "loss": 0.0065, + "step": 31920 + }, + { + "epoch": 0.20478910185900787, + "grad_norm": 0.36082711815834045, + "learning_rate": 9.999825662432547e-06, + "loss": 0.0072, + "step": 31930 + }, + { + "epoch": 0.20485323875279396, + "grad_norm": 0.40542277693748474, + "learning_rate": 9.999820957204602e-06, + "loss": 0.0065, + "step": 31940 + }, + { + "epoch": 0.20491737564658005, + "grad_norm": 0.9706642031669617, + "learning_rate": 9.999816189325327e-06, + "loss": 0.0106, + "step": 31950 + }, + { + "epoch": 0.20498151254036615, + "grad_norm": 0.3222975730895996, + "learning_rate": 9.999811358794781e-06, + "loss": 0.0083, + "step": 31960 + }, + { + "epoch": 0.20504564943415227, + "grad_norm": 0.11145718395709991, + "learning_rate": 9.999806465613027e-06, + "loss": 0.0068, + "step": 31970 + }, + { + "epoch": 0.20510978632793836, + "grad_norm": 0.4002038836479187, + "learning_rate": 9.999801509780123e-06, + "loss": 0.0052, + "step": 31980 + }, + { + "epoch": 0.20517392322172445, + "grad_norm": 0.29977932572364807, + "learning_rate": 9.999796491296134e-06, + "loss": 0.0053, + "step": 31990 + }, + { + "epoch": 0.20523806011551055, + "grad_norm": 0.3992215692996979, + "learning_rate": 9.999791410161121e-06, + "loss": 0.0051, + "step": 32000 + }, + { + "epoch": 0.20530219700929664, + "grad_norm": 0.24485594034194946, + "learning_rate": 9.999786266375147e-06, + "loss": 0.0066, + "step": 32010 + }, + { + "epoch": 0.20536633390308273, + "grad_norm": 0.36206868290901184, + "learning_rate": 9.99978105993828e-06, + "loss": 0.009, + "step": 32020 + }, + { + "epoch": 0.20543047079686882, + "grad_norm": 0.5218985080718994, + "learning_rate": 9.999775790850582e-06, + "loss": 0.0096, + "step": 32030 + }, + { + "epoch": 0.20549460769065495, + "grad_norm": 0.1210787445306778, + "learning_rate": 9.99977045911212e-06, + "loss": 0.0062, + "step": 32040 + }, + { + "epoch": 0.20555874458444104, + "grad_norm": 0.18466708064079285, + "learning_rate": 9.999765064722961e-06, + "loss": 0.0069, + "step": 32050 + }, + { + "epoch": 0.20562288147822713, + "grad_norm": 0.5108962655067444, + "learning_rate": 9.999759607683173e-06, + "loss": 0.0075, + "step": 32060 + }, + { + "epoch": 0.20568701837201323, + "grad_norm": 0.2827768623828888, + "learning_rate": 9.999754087992823e-06, + "loss": 0.0082, + "step": 32070 + }, + { + "epoch": 0.20575115526579932, + "grad_norm": 0.5526918172836304, + "learning_rate": 9.999748505651981e-06, + "loss": 0.0091, + "step": 32080 + }, + { + "epoch": 0.2058152921595854, + "grad_norm": 0.19573715329170227, + "learning_rate": 9.999742860660716e-06, + "loss": 0.0045, + "step": 32090 + }, + { + "epoch": 0.2058794290533715, + "grad_norm": 0.3106071352958679, + "learning_rate": 9.999737153019102e-06, + "loss": 0.0105, + "step": 32100 + }, + { + "epoch": 0.20594356594715763, + "grad_norm": 0.26938295364379883, + "learning_rate": 9.999731382727207e-06, + "loss": 0.0082, + "step": 32110 + }, + { + "epoch": 0.20600770284094372, + "grad_norm": 0.3470536470413208, + "learning_rate": 9.999725549785107e-06, + "loss": 0.0072, + "step": 32120 + }, + { + "epoch": 0.2060718397347298, + "grad_norm": 0.2686624825000763, + "learning_rate": 9.99971965419287e-06, + "loss": 0.0068, + "step": 32130 + }, + { + "epoch": 0.2061359766285159, + "grad_norm": 0.23761259019374847, + "learning_rate": 9.999713695950573e-06, + "loss": 0.01, + "step": 32140 + }, + { + "epoch": 0.206200113522302, + "grad_norm": 0.188786581158638, + "learning_rate": 9.999707675058289e-06, + "loss": 0.0047, + "step": 32150 + }, + { + "epoch": 0.2062642504160881, + "grad_norm": 0.39413657784461975, + "learning_rate": 9.999701591516095e-06, + "loss": 0.0058, + "step": 32160 + }, + { + "epoch": 0.20632838730987418, + "grad_norm": 0.6398225426673889, + "learning_rate": 9.999695445324069e-06, + "loss": 0.0075, + "step": 32170 + }, + { + "epoch": 0.2063925242036603, + "grad_norm": 0.6331750750541687, + "learning_rate": 9.999689236482283e-06, + "loss": 0.0039, + "step": 32180 + }, + { + "epoch": 0.2064566610974464, + "grad_norm": 0.21261058747768402, + "learning_rate": 9.99968296499082e-06, + "loss": 0.0084, + "step": 32190 + }, + { + "epoch": 0.2065207979912325, + "grad_norm": 0.2594454884529114, + "learning_rate": 9.999676630849756e-06, + "loss": 0.0091, + "step": 32200 + }, + { + "epoch": 0.20658493488501858, + "grad_norm": 0.3002074956893921, + "learning_rate": 9.999670234059167e-06, + "loss": 0.0049, + "step": 32210 + }, + { + "epoch": 0.20664907177880468, + "grad_norm": 0.40826472640037537, + "learning_rate": 9.99966377461914e-06, + "loss": 0.0086, + "step": 32220 + }, + { + "epoch": 0.20671320867259077, + "grad_norm": 0.6430691480636597, + "learning_rate": 9.999657252529754e-06, + "loss": 0.0072, + "step": 32230 + }, + { + "epoch": 0.20677734556637686, + "grad_norm": 0.4967564642429352, + "learning_rate": 9.999650667791087e-06, + "loss": 0.009, + "step": 32240 + }, + { + "epoch": 0.20684148246016298, + "grad_norm": 0.3127409815788269, + "learning_rate": 9.999644020403225e-06, + "loss": 0.0078, + "step": 32250 + }, + { + "epoch": 0.20690561935394908, + "grad_norm": 0.22701984643936157, + "learning_rate": 9.999637310366248e-06, + "loss": 0.0066, + "step": 32260 + }, + { + "epoch": 0.20696975624773517, + "grad_norm": 0.33977678418159485, + "learning_rate": 9.999630537680245e-06, + "loss": 0.0067, + "step": 32270 + }, + { + "epoch": 0.20703389314152126, + "grad_norm": 0.260306715965271, + "learning_rate": 9.999623702345296e-06, + "loss": 0.0056, + "step": 32280 + }, + { + "epoch": 0.20709803003530736, + "grad_norm": 0.2731505334377289, + "learning_rate": 9.999616804361491e-06, + "loss": 0.0046, + "step": 32290 + }, + { + "epoch": 0.20716216692909345, + "grad_norm": 0.2968423664569855, + "learning_rate": 9.999609843728914e-06, + "loss": 0.0114, + "step": 32300 + }, + { + "epoch": 0.20722630382287954, + "grad_norm": 0.2855728566646576, + "learning_rate": 9.999602820447651e-06, + "loss": 0.0069, + "step": 32310 + }, + { + "epoch": 0.20729044071666566, + "grad_norm": 0.15290455520153046, + "learning_rate": 9.999595734517793e-06, + "loss": 0.0026, + "step": 32320 + }, + { + "epoch": 0.20735457761045176, + "grad_norm": 0.5125821232795715, + "learning_rate": 9.999588585939426e-06, + "loss": 0.0051, + "step": 32330 + }, + { + "epoch": 0.20741871450423785, + "grad_norm": 0.7281843423843384, + "learning_rate": 9.99958137471264e-06, + "loss": 0.004, + "step": 32340 + }, + { + "epoch": 0.20748285139802394, + "grad_norm": 0.3147353231906891, + "learning_rate": 9.99957410083753e-06, + "loss": 0.0072, + "step": 32350 + }, + { + "epoch": 0.20754698829181004, + "grad_norm": 0.24015134572982788, + "learning_rate": 9.999566764314182e-06, + "loss": 0.0053, + "step": 32360 + }, + { + "epoch": 0.20761112518559613, + "grad_norm": 0.20664353668689728, + "learning_rate": 9.999559365142688e-06, + "loss": 0.0062, + "step": 32370 + }, + { + "epoch": 0.20767526207938222, + "grad_norm": 0.24446997046470642, + "learning_rate": 9.999551903323141e-06, + "loss": 0.0063, + "step": 32380 + }, + { + "epoch": 0.20773939897316834, + "grad_norm": 0.14070431888103485, + "learning_rate": 9.999544378855637e-06, + "loss": 0.0044, + "step": 32390 + }, + { + "epoch": 0.20780353586695444, + "grad_norm": 0.8572035431861877, + "learning_rate": 9.99953679174027e-06, + "loss": 0.0085, + "step": 32400 + }, + { + "epoch": 0.20786767276074053, + "grad_norm": 0.28308725357055664, + "learning_rate": 9.999529141977132e-06, + "loss": 0.0053, + "step": 32410 + }, + { + "epoch": 0.20793180965452662, + "grad_norm": 0.06351161003112793, + "learning_rate": 9.99952142956632e-06, + "loss": 0.0081, + "step": 32420 + }, + { + "epoch": 0.20799594654831272, + "grad_norm": 0.36633729934692383, + "learning_rate": 9.999513654507934e-06, + "loss": 0.0043, + "step": 32430 + }, + { + "epoch": 0.2080600834420988, + "grad_norm": 0.4253617525100708, + "learning_rate": 9.999505816802069e-06, + "loss": 0.0048, + "step": 32440 + }, + { + "epoch": 0.2081242203358849, + "grad_norm": 0.06172889843583107, + "learning_rate": 9.99949791644882e-06, + "loss": 0.0077, + "step": 32450 + }, + { + "epoch": 0.20818835722967102, + "grad_norm": 0.18378232419490814, + "learning_rate": 9.999489953448291e-06, + "loss": 0.0063, + "step": 32460 + }, + { + "epoch": 0.20825249412345712, + "grad_norm": 0.22072811424732208, + "learning_rate": 9.999481927800579e-06, + "loss": 0.0072, + "step": 32470 + }, + { + "epoch": 0.2083166310172432, + "grad_norm": 0.5811721086502075, + "learning_rate": 9.999473839505786e-06, + "loss": 0.0079, + "step": 32480 + }, + { + "epoch": 0.2083807679110293, + "grad_norm": 0.32655444741249084, + "learning_rate": 9.999465688564014e-06, + "loss": 0.0094, + "step": 32490 + }, + { + "epoch": 0.2084449048048154, + "grad_norm": 0.08320006728172302, + "learning_rate": 9.999457474975362e-06, + "loss": 0.0069, + "step": 32500 + }, + { + "epoch": 0.2085090416986015, + "grad_norm": 0.29463374614715576, + "learning_rate": 9.999449198739935e-06, + "loss": 0.0061, + "step": 32510 + }, + { + "epoch": 0.20857317859238758, + "grad_norm": 0.19419896602630615, + "learning_rate": 9.999440859857836e-06, + "loss": 0.0049, + "step": 32520 + }, + { + "epoch": 0.2086373154861737, + "grad_norm": 0.2826046645641327, + "learning_rate": 9.99943245832917e-06, + "loss": 0.0064, + "step": 32530 + }, + { + "epoch": 0.2087014523799598, + "grad_norm": 0.45521748065948486, + "learning_rate": 9.999423994154043e-06, + "loss": 0.0071, + "step": 32540 + }, + { + "epoch": 0.2087655892737459, + "grad_norm": 0.34207192063331604, + "learning_rate": 9.99941546733256e-06, + "loss": 0.0073, + "step": 32550 + }, + { + "epoch": 0.20882972616753198, + "grad_norm": 0.1273985207080841, + "learning_rate": 9.99940687786483e-06, + "loss": 0.0059, + "step": 32560 + }, + { + "epoch": 0.20889386306131807, + "grad_norm": 0.19721442461013794, + "learning_rate": 9.999398225750956e-06, + "loss": 0.0055, + "step": 32570 + }, + { + "epoch": 0.20895799995510417, + "grad_norm": 0.3487097918987274, + "learning_rate": 9.999389510991051e-06, + "loss": 0.0061, + "step": 32580 + }, + { + "epoch": 0.20902213684889026, + "grad_norm": 0.29255926609039307, + "learning_rate": 9.999380733585222e-06, + "loss": 0.0087, + "step": 32590 + }, + { + "epoch": 0.20908627374267638, + "grad_norm": 0.28362610936164856, + "learning_rate": 9.999371893533579e-06, + "loss": 0.0052, + "step": 32600 + }, + { + "epoch": 0.20915041063646247, + "grad_norm": 0.45663630962371826, + "learning_rate": 9.999362990836233e-06, + "loss": 0.006, + "step": 32610 + }, + { + "epoch": 0.20921454753024857, + "grad_norm": 0.2847580015659332, + "learning_rate": 9.999354025493297e-06, + "loss": 0.0056, + "step": 32620 + }, + { + "epoch": 0.20927868442403466, + "grad_norm": 0.24016176164150238, + "learning_rate": 9.99934499750488e-06, + "loss": 0.0071, + "step": 32630 + }, + { + "epoch": 0.20934282131782075, + "grad_norm": 0.0941351130604744, + "learning_rate": 9.999335906871099e-06, + "loss": 0.0073, + "step": 32640 + }, + { + "epoch": 0.20940695821160685, + "grad_norm": 0.4059191346168518, + "learning_rate": 9.999326753592066e-06, + "loss": 0.0057, + "step": 32650 + }, + { + "epoch": 0.20947109510539294, + "grad_norm": 0.32373708486557007, + "learning_rate": 9.999317537667894e-06, + "loss": 0.0077, + "step": 32660 + }, + { + "epoch": 0.20953523199917906, + "grad_norm": 0.30504995584487915, + "learning_rate": 9.999308259098703e-06, + "loss": 0.0077, + "step": 32670 + }, + { + "epoch": 0.20959936889296515, + "grad_norm": 0.24574518203735352, + "learning_rate": 9.999298917884606e-06, + "loss": 0.0059, + "step": 32680 + }, + { + "epoch": 0.20966350578675125, + "grad_norm": 0.23023582994937897, + "learning_rate": 9.999289514025718e-06, + "loss": 0.0076, + "step": 32690 + }, + { + "epoch": 0.20972764268053734, + "grad_norm": 0.27018046379089355, + "learning_rate": 9.999280047522161e-06, + "loss": 0.0065, + "step": 32700 + }, + { + "epoch": 0.20979177957432343, + "grad_norm": 0.2876282036304474, + "learning_rate": 9.999270518374054e-06, + "loss": 0.0117, + "step": 32710 + }, + { + "epoch": 0.20985591646810953, + "grad_norm": 0.16353587806224823, + "learning_rate": 9.999260926581513e-06, + "loss": 0.0042, + "step": 32720 + }, + { + "epoch": 0.20992005336189562, + "grad_norm": 0.35183534026145935, + "learning_rate": 9.99925127214466e-06, + "loss": 0.0063, + "step": 32730 + }, + { + "epoch": 0.20998419025568174, + "grad_norm": 0.4123291075229645, + "learning_rate": 9.999241555063614e-06, + "loss": 0.008, + "step": 32740 + }, + { + "epoch": 0.21004832714946783, + "grad_norm": 0.37130826711654663, + "learning_rate": 9.999231775338499e-06, + "loss": 0.0072, + "step": 32750 + }, + { + "epoch": 0.21011246404325393, + "grad_norm": 0.16660301387310028, + "learning_rate": 9.999221932969439e-06, + "loss": 0.0054, + "step": 32760 + }, + { + "epoch": 0.21017660093704002, + "grad_norm": 0.35891416668891907, + "learning_rate": 9.999212027956553e-06, + "loss": 0.0039, + "step": 32770 + }, + { + "epoch": 0.2102407378308261, + "grad_norm": 0.44732221961021423, + "learning_rate": 9.999202060299968e-06, + "loss": 0.0138, + "step": 32780 + }, + { + "epoch": 0.2103048747246122, + "grad_norm": 0.41310566663742065, + "learning_rate": 9.999192029999809e-06, + "loss": 0.0073, + "step": 32790 + }, + { + "epoch": 0.2103690116183983, + "grad_norm": 0.33074861764907837, + "learning_rate": 9.999181937056199e-06, + "loss": 0.006, + "step": 32800 + }, + { + "epoch": 0.2104331485121844, + "grad_norm": 0.14894649386405945, + "learning_rate": 9.999171781469268e-06, + "loss": 0.008, + "step": 32810 + }, + { + "epoch": 0.2104972854059705, + "grad_norm": 0.1162961795926094, + "learning_rate": 9.999161563239143e-06, + "loss": 0.0059, + "step": 32820 + }, + { + "epoch": 0.2105614222997566, + "grad_norm": 0.25399044156074524, + "learning_rate": 9.999151282365948e-06, + "loss": 0.0046, + "step": 32830 + }, + { + "epoch": 0.2106255591935427, + "grad_norm": 0.5153788924217224, + "learning_rate": 9.999140938849816e-06, + "loss": 0.0093, + "step": 32840 + }, + { + "epoch": 0.2106896960873288, + "grad_norm": 0.8990302681922913, + "learning_rate": 9.999130532690876e-06, + "loss": 0.0093, + "step": 32850 + }, + { + "epoch": 0.21075383298111489, + "grad_norm": 0.33208703994750977, + "learning_rate": 9.999120063889258e-06, + "loss": 0.0085, + "step": 32860 + }, + { + "epoch": 0.21081796987490098, + "grad_norm": 0.3936298191547394, + "learning_rate": 9.99910953244509e-06, + "loss": 0.0061, + "step": 32870 + }, + { + "epoch": 0.21088210676868707, + "grad_norm": 0.3910579979419708, + "learning_rate": 9.999098938358508e-06, + "loss": 0.0081, + "step": 32880 + }, + { + "epoch": 0.2109462436624732, + "grad_norm": 0.1852560192346573, + "learning_rate": 9.999088281629645e-06, + "loss": 0.0054, + "step": 32890 + }, + { + "epoch": 0.21101038055625929, + "grad_norm": 0.3421950936317444, + "learning_rate": 9.999077562258632e-06, + "loss": 0.0069, + "step": 32900 + }, + { + "epoch": 0.21107451745004538, + "grad_norm": 0.0992131233215332, + "learning_rate": 9.999066780245605e-06, + "loss": 0.0069, + "step": 32910 + }, + { + "epoch": 0.21113865434383147, + "grad_norm": 0.08349548280239105, + "learning_rate": 9.999055935590697e-06, + "loss": 0.0054, + "step": 32920 + }, + { + "epoch": 0.21120279123761757, + "grad_norm": 0.14864757657051086, + "learning_rate": 9.999045028294045e-06, + "loss": 0.0084, + "step": 32930 + }, + { + "epoch": 0.21126692813140366, + "grad_norm": 0.19409166276454926, + "learning_rate": 9.999034058355788e-06, + "loss": 0.0042, + "step": 32940 + }, + { + "epoch": 0.21133106502518975, + "grad_norm": 0.46400272846221924, + "learning_rate": 9.99902302577606e-06, + "loss": 0.0071, + "step": 32950 + }, + { + "epoch": 0.21139520191897587, + "grad_norm": 0.3607134222984314, + "learning_rate": 9.999011930555002e-06, + "loss": 0.0061, + "step": 32960 + }, + { + "epoch": 0.21145933881276197, + "grad_norm": 0.3315497934818268, + "learning_rate": 9.99900077269275e-06, + "loss": 0.0052, + "step": 32970 + }, + { + "epoch": 0.21152347570654806, + "grad_norm": 0.21923942863941193, + "learning_rate": 9.998989552189449e-06, + "loss": 0.0062, + "step": 32980 + }, + { + "epoch": 0.21158761260033415, + "grad_norm": 0.5289713740348816, + "learning_rate": 9.998978269045233e-06, + "loss": 0.0089, + "step": 32990 + }, + { + "epoch": 0.21165174949412024, + "grad_norm": 0.3155423104763031, + "learning_rate": 9.998966923260247e-06, + "loss": 0.0095, + "step": 33000 + }, + { + "epoch": 0.21171588638790634, + "grad_norm": 0.365531325340271, + "learning_rate": 9.998955514834634e-06, + "loss": 0.0055, + "step": 33010 + }, + { + "epoch": 0.21178002328169243, + "grad_norm": 0.30708417296409607, + "learning_rate": 9.998944043768534e-06, + "loss": 0.0064, + "step": 33020 + }, + { + "epoch": 0.21184416017547855, + "grad_norm": 0.3104497790336609, + "learning_rate": 9.998932510062093e-06, + "loss": 0.0062, + "step": 33030 + }, + { + "epoch": 0.21190829706926465, + "grad_norm": 0.5184586644172668, + "learning_rate": 9.998920913715457e-06, + "loss": 0.0055, + "step": 33040 + }, + { + "epoch": 0.21197243396305074, + "grad_norm": 0.2403479516506195, + "learning_rate": 9.998909254728767e-06, + "loss": 0.0052, + "step": 33050 + }, + { + "epoch": 0.21203657085683683, + "grad_norm": 0.47688665986061096, + "learning_rate": 9.998897533102173e-06, + "loss": 0.0062, + "step": 33060 + }, + { + "epoch": 0.21210070775062292, + "grad_norm": 0.4534326493740082, + "learning_rate": 9.998885748835819e-06, + "loss": 0.0065, + "step": 33070 + }, + { + "epoch": 0.21216484464440902, + "grad_norm": 0.3673833906650543, + "learning_rate": 9.998873901929853e-06, + "loss": 0.007, + "step": 33080 + }, + { + "epoch": 0.2122289815381951, + "grad_norm": 0.16532112658023834, + "learning_rate": 9.998861992384426e-06, + "loss": 0.007, + "step": 33090 + }, + { + "epoch": 0.21229311843198123, + "grad_norm": 0.368022084236145, + "learning_rate": 9.998850020199686e-06, + "loss": 0.0064, + "step": 33100 + }, + { + "epoch": 0.21235725532576732, + "grad_norm": 0.21564172208309174, + "learning_rate": 9.99883798537578e-06, + "loss": 0.0064, + "step": 33110 + }, + { + "epoch": 0.21242139221955342, + "grad_norm": 0.29615518450737, + "learning_rate": 9.998825887912865e-06, + "loss": 0.0091, + "step": 33120 + }, + { + "epoch": 0.2124855291133395, + "grad_norm": 0.07216177135705948, + "learning_rate": 9.998813727811085e-06, + "loss": 0.0059, + "step": 33130 + }, + { + "epoch": 0.2125496660071256, + "grad_norm": 0.17017757892608643, + "learning_rate": 9.998801505070599e-06, + "loss": 0.0062, + "step": 33140 + }, + { + "epoch": 0.2126138029009117, + "grad_norm": 0.26409536600112915, + "learning_rate": 9.998789219691557e-06, + "loss": 0.0058, + "step": 33150 + }, + { + "epoch": 0.2126779397946978, + "grad_norm": 0.1319057047367096, + "learning_rate": 9.998776871674114e-06, + "loss": 0.0043, + "step": 33160 + }, + { + "epoch": 0.2127420766884839, + "grad_norm": 0.4250667989253998, + "learning_rate": 9.998764461018423e-06, + "loss": 0.0059, + "step": 33170 + }, + { + "epoch": 0.21280621358227, + "grad_norm": 0.2796914577484131, + "learning_rate": 9.998751987724643e-06, + "loss": 0.0062, + "step": 33180 + }, + { + "epoch": 0.2128703504760561, + "grad_norm": 0.20251691341400146, + "learning_rate": 9.998739451792927e-06, + "loss": 0.0063, + "step": 33190 + }, + { + "epoch": 0.2129344873698422, + "grad_norm": 0.2854151129722595, + "learning_rate": 9.998726853223432e-06, + "loss": 0.0045, + "step": 33200 + }, + { + "epoch": 0.21299862426362828, + "grad_norm": 0.17091292142868042, + "learning_rate": 9.998714192016318e-06, + "loss": 0.007, + "step": 33210 + }, + { + "epoch": 0.21306276115741438, + "grad_norm": 0.12076331675052643, + "learning_rate": 9.998701468171743e-06, + "loss": 0.0048, + "step": 33220 + }, + { + "epoch": 0.21312689805120047, + "grad_norm": 0.3442078232765198, + "learning_rate": 9.998688681689865e-06, + "loss": 0.0058, + "step": 33230 + }, + { + "epoch": 0.2131910349449866, + "grad_norm": 0.35692065954208374, + "learning_rate": 9.998675832570845e-06, + "loss": 0.0053, + "step": 33240 + }, + { + "epoch": 0.21325517183877268, + "grad_norm": 0.31049343943595886, + "learning_rate": 9.998662920814846e-06, + "loss": 0.0063, + "step": 33250 + }, + { + "epoch": 0.21331930873255878, + "grad_norm": 0.4583049416542053, + "learning_rate": 9.998649946422028e-06, + "loss": 0.008, + "step": 33260 + }, + { + "epoch": 0.21338344562634487, + "grad_norm": 0.22530090808868408, + "learning_rate": 9.998636909392551e-06, + "loss": 0.006, + "step": 33270 + }, + { + "epoch": 0.21344758252013096, + "grad_norm": 0.3867437541484833, + "learning_rate": 9.998623809726585e-06, + "loss": 0.0053, + "step": 33280 + }, + { + "epoch": 0.21351171941391706, + "grad_norm": 0.5634160041809082, + "learning_rate": 9.998610647424287e-06, + "loss": 0.0091, + "step": 33290 + }, + { + "epoch": 0.21357585630770315, + "grad_norm": 0.16131380200386047, + "learning_rate": 9.998597422485826e-06, + "loss": 0.0112, + "step": 33300 + }, + { + "epoch": 0.21363999320148927, + "grad_norm": 0.8143040537834167, + "learning_rate": 9.998584134911368e-06, + "loss": 0.0063, + "step": 33310 + }, + { + "epoch": 0.21370413009527536, + "grad_norm": 0.1807277947664261, + "learning_rate": 9.998570784701077e-06, + "loss": 0.0061, + "step": 33320 + }, + { + "epoch": 0.21376826698906146, + "grad_norm": 0.36162883043289185, + "learning_rate": 9.998557371855123e-06, + "loss": 0.0064, + "step": 33330 + }, + { + "epoch": 0.21383240388284755, + "grad_norm": 0.25059476494789124, + "learning_rate": 9.998543896373672e-06, + "loss": 0.0086, + "step": 33340 + }, + { + "epoch": 0.21389654077663364, + "grad_norm": 0.4012435972690582, + "learning_rate": 9.998530358256893e-06, + "loss": 0.0071, + "step": 33350 + }, + { + "epoch": 0.21396067767041974, + "grad_norm": 0.2773272395133972, + "learning_rate": 9.998516757504958e-06, + "loss": 0.0071, + "step": 33360 + }, + { + "epoch": 0.21402481456420583, + "grad_norm": 0.2923520803451538, + "learning_rate": 9.998503094118033e-06, + "loss": 0.0085, + "step": 33370 + }, + { + "epoch": 0.21408895145799195, + "grad_norm": 0.3366430103778839, + "learning_rate": 9.998489368096293e-06, + "loss": 0.0058, + "step": 33380 + }, + { + "epoch": 0.21415308835177804, + "grad_norm": 0.1886427104473114, + "learning_rate": 9.998475579439909e-06, + "loss": 0.0058, + "step": 33390 + }, + { + "epoch": 0.21421722524556414, + "grad_norm": 0.4118272364139557, + "learning_rate": 9.998461728149053e-06, + "loss": 0.0075, + "step": 33400 + }, + { + "epoch": 0.21428136213935023, + "grad_norm": 0.32713785767555237, + "learning_rate": 9.9984478142239e-06, + "loss": 0.0049, + "step": 33410 + }, + { + "epoch": 0.21434549903313632, + "grad_norm": 0.21947899460792542, + "learning_rate": 9.998433837664623e-06, + "loss": 0.0064, + "step": 33420 + }, + { + "epoch": 0.21440963592692242, + "grad_norm": 0.6944822669029236, + "learning_rate": 9.998419798471399e-06, + "loss": 0.0084, + "step": 33430 + }, + { + "epoch": 0.2144737728207085, + "grad_norm": 0.05396656319499016, + "learning_rate": 9.998405696644402e-06, + "loss": 0.0043, + "step": 33440 + }, + { + "epoch": 0.21453790971449463, + "grad_norm": 0.37956616282463074, + "learning_rate": 9.998391532183809e-06, + "loss": 0.0075, + "step": 33450 + }, + { + "epoch": 0.21460204660828072, + "grad_norm": 0.3165275454521179, + "learning_rate": 9.998377305089797e-06, + "loss": 0.0045, + "step": 33460 + }, + { + "epoch": 0.21466618350206682, + "grad_norm": 0.3055875301361084, + "learning_rate": 9.998363015362546e-06, + "loss": 0.0034, + "step": 33470 + }, + { + "epoch": 0.2147303203958529, + "grad_norm": 0.30824825167655945, + "learning_rate": 9.998348663002234e-06, + "loss": 0.0073, + "step": 33480 + }, + { + "epoch": 0.214794457289639, + "grad_norm": 0.08666949719190598, + "learning_rate": 9.998334248009041e-06, + "loss": 0.0096, + "step": 33490 + }, + { + "epoch": 0.2148585941834251, + "grad_norm": 0.08656428754329681, + "learning_rate": 9.998319770383148e-06, + "loss": 0.0076, + "step": 33500 + }, + { + "epoch": 0.2149227310772112, + "grad_norm": 0.3311026096343994, + "learning_rate": 9.998305230124736e-06, + "loss": 0.0071, + "step": 33510 + }, + { + "epoch": 0.2149868679709973, + "grad_norm": 0.1963089257478714, + "learning_rate": 9.998290627233986e-06, + "loss": 0.0041, + "step": 33520 + }, + { + "epoch": 0.2150510048647834, + "grad_norm": 0.23175717890262604, + "learning_rate": 9.998275961711083e-06, + "loss": 0.0047, + "step": 33530 + }, + { + "epoch": 0.2151151417585695, + "grad_norm": 0.14738452434539795, + "learning_rate": 9.99826123355621e-06, + "loss": 0.0103, + "step": 33540 + }, + { + "epoch": 0.2151792786523556, + "grad_norm": 0.3874845802783966, + "learning_rate": 9.998246442769552e-06, + "loss": 0.007, + "step": 33550 + }, + { + "epoch": 0.21524341554614168, + "grad_norm": 0.20483845472335815, + "learning_rate": 9.998231589351295e-06, + "loss": 0.0066, + "step": 33560 + }, + { + "epoch": 0.21530755243992777, + "grad_norm": 0.5111588835716248, + "learning_rate": 9.998216673301622e-06, + "loss": 0.0093, + "step": 33570 + }, + { + "epoch": 0.21537168933371387, + "grad_norm": 0.48205098509788513, + "learning_rate": 9.998201694620722e-06, + "loss": 0.0093, + "step": 33580 + }, + { + "epoch": 0.2154358262275, + "grad_norm": 0.3506639301776886, + "learning_rate": 9.998186653308784e-06, + "loss": 0.0045, + "step": 33590 + }, + { + "epoch": 0.21549996312128608, + "grad_norm": 0.15712834894657135, + "learning_rate": 9.998171549365992e-06, + "loss": 0.0093, + "step": 33600 + }, + { + "epoch": 0.21556410001507217, + "grad_norm": 0.46175265312194824, + "learning_rate": 9.99815638279254e-06, + "loss": 0.0104, + "step": 33610 + }, + { + "epoch": 0.21562823690885827, + "grad_norm": 0.2752005159854889, + "learning_rate": 9.998141153588619e-06, + "loss": 0.0055, + "step": 33620 + }, + { + "epoch": 0.21569237380264436, + "grad_norm": 0.17104874551296234, + "learning_rate": 9.998125861754414e-06, + "loss": 0.0095, + "step": 33630 + }, + { + "epoch": 0.21575651069643045, + "grad_norm": 0.1689721643924713, + "learning_rate": 9.998110507290122e-06, + "loss": 0.006, + "step": 33640 + }, + { + "epoch": 0.21582064759021655, + "grad_norm": 0.3183901906013489, + "learning_rate": 9.998095090195932e-06, + "loss": 0.006, + "step": 33650 + }, + { + "epoch": 0.21588478448400267, + "grad_norm": 0.47291427850723267, + "learning_rate": 9.998079610472038e-06, + "loss": 0.0067, + "step": 33660 + }, + { + "epoch": 0.21594892137778876, + "grad_norm": 0.4215930700302124, + "learning_rate": 9.998064068118635e-06, + "loss": 0.0043, + "step": 33670 + }, + { + "epoch": 0.21601305827157485, + "grad_norm": 0.2335570752620697, + "learning_rate": 9.998048463135916e-06, + "loss": 0.0052, + "step": 33680 + }, + { + "epoch": 0.21607719516536095, + "grad_norm": 0.3899843692779541, + "learning_rate": 9.99803279552408e-06, + "loss": 0.0063, + "step": 33690 + }, + { + "epoch": 0.21614133205914704, + "grad_norm": 0.4035715162754059, + "learning_rate": 9.998017065283321e-06, + "loss": 0.0071, + "step": 33700 + }, + { + "epoch": 0.21620546895293313, + "grad_norm": 0.18261416256427765, + "learning_rate": 9.998001272413833e-06, + "loss": 0.0062, + "step": 33710 + }, + { + "epoch": 0.21626960584671923, + "grad_norm": 0.5986291170120239, + "learning_rate": 9.99798541691582e-06, + "loss": 0.0048, + "step": 33720 + }, + { + "epoch": 0.21633374274050535, + "grad_norm": 0.3167121112346649, + "learning_rate": 9.997969498789478e-06, + "loss": 0.0053, + "step": 33730 + }, + { + "epoch": 0.21639787963429144, + "grad_norm": 0.1511198729276657, + "learning_rate": 9.997953518035004e-06, + "loss": 0.0079, + "step": 33740 + }, + { + "epoch": 0.21646201652807753, + "grad_norm": 1.4042974710464478, + "learning_rate": 9.997937474652602e-06, + "loss": 0.0063, + "step": 33750 + }, + { + "epoch": 0.21652615342186363, + "grad_norm": 0.27419793605804443, + "learning_rate": 9.997921368642471e-06, + "loss": 0.0115, + "step": 33760 + }, + { + "epoch": 0.21659029031564972, + "grad_norm": 0.2841130495071411, + "learning_rate": 9.997905200004813e-06, + "loss": 0.007, + "step": 33770 + }, + { + "epoch": 0.2166544272094358, + "grad_norm": 0.13329987227916718, + "learning_rate": 9.99788896873983e-06, + "loss": 0.0065, + "step": 33780 + }, + { + "epoch": 0.2167185641032219, + "grad_norm": 0.33807572722435, + "learning_rate": 9.997872674847729e-06, + "loss": 0.0115, + "step": 33790 + }, + { + "epoch": 0.21678270099700803, + "grad_norm": 0.27996155619621277, + "learning_rate": 9.99785631832871e-06, + "loss": 0.0069, + "step": 33800 + }, + { + "epoch": 0.21684683789079412, + "grad_norm": 0.13712485134601593, + "learning_rate": 9.997839899182978e-06, + "loss": 0.0076, + "step": 33810 + }, + { + "epoch": 0.2169109747845802, + "grad_norm": 0.23094645142555237, + "learning_rate": 9.99782341741074e-06, + "loss": 0.0069, + "step": 33820 + }, + { + "epoch": 0.2169751116783663, + "grad_norm": 0.18428556621074677, + "learning_rate": 9.997806873012206e-06, + "loss": 0.0077, + "step": 33830 + }, + { + "epoch": 0.2170392485721524, + "grad_norm": 0.4980505704879761, + "learning_rate": 9.997790265987579e-06, + "loss": 0.0072, + "step": 33840 + }, + { + "epoch": 0.2171033854659385, + "grad_norm": 0.22734621167182922, + "learning_rate": 9.997773596337065e-06, + "loss": 0.0079, + "step": 33850 + }, + { + "epoch": 0.21716752235972459, + "grad_norm": 0.479688435792923, + "learning_rate": 9.997756864060878e-06, + "loss": 0.0066, + "step": 33860 + }, + { + "epoch": 0.2172316592535107, + "grad_norm": 0.38074034452438354, + "learning_rate": 9.997740069159227e-06, + "loss": 0.0063, + "step": 33870 + }, + { + "epoch": 0.2172957961472968, + "grad_norm": 0.2710610330104828, + "learning_rate": 9.997723211632319e-06, + "loss": 0.0047, + "step": 33880 + }, + { + "epoch": 0.2173599330410829, + "grad_norm": 0.2434622347354889, + "learning_rate": 9.997706291480369e-06, + "loss": 0.0085, + "step": 33890 + }, + { + "epoch": 0.21742406993486899, + "grad_norm": 0.35984596610069275, + "learning_rate": 9.997689308703586e-06, + "loss": 0.0106, + "step": 33900 + }, + { + "epoch": 0.21748820682865508, + "grad_norm": 0.14613239467144012, + "learning_rate": 9.997672263302187e-06, + "loss": 0.0044, + "step": 33910 + }, + { + "epoch": 0.21755234372244117, + "grad_norm": 0.2217494696378708, + "learning_rate": 9.99765515527638e-06, + "loss": 0.0081, + "step": 33920 + }, + { + "epoch": 0.21761648061622726, + "grad_norm": 0.39456450939178467, + "learning_rate": 9.997637984626382e-06, + "loss": 0.0044, + "step": 33930 + }, + { + "epoch": 0.21768061751001339, + "grad_norm": 0.16979649662971497, + "learning_rate": 9.997620751352409e-06, + "loss": 0.0058, + "step": 33940 + }, + { + "epoch": 0.21774475440379948, + "grad_norm": 0.5155903697013855, + "learning_rate": 9.997603455454676e-06, + "loss": 0.0075, + "step": 33950 + }, + { + "epoch": 0.21780889129758557, + "grad_norm": 0.26639315485954285, + "learning_rate": 9.9975860969334e-06, + "loss": 0.006, + "step": 33960 + }, + { + "epoch": 0.21787302819137166, + "grad_norm": 0.46729981899261475, + "learning_rate": 9.997568675788801e-06, + "loss": 0.0071, + "step": 33970 + }, + { + "epoch": 0.21793716508515776, + "grad_norm": 0.26663681864738464, + "learning_rate": 9.997551192021092e-06, + "loss": 0.0041, + "step": 33980 + }, + { + "epoch": 0.21800130197894385, + "grad_norm": 0.3270251154899597, + "learning_rate": 9.997533645630495e-06, + "loss": 0.0054, + "step": 33990 + }, + { + "epoch": 0.21806543887272994, + "grad_norm": 0.23128148913383484, + "learning_rate": 9.99751603661723e-06, + "loss": 0.0053, + "step": 34000 + }, + { + "epoch": 0.21812957576651607, + "grad_norm": 0.3948480188846588, + "learning_rate": 9.997498364981516e-06, + "loss": 0.0077, + "step": 34010 + }, + { + "epoch": 0.21819371266030216, + "grad_norm": 0.2914036810398102, + "learning_rate": 9.997480630723578e-06, + "loss": 0.0063, + "step": 34020 + }, + { + "epoch": 0.21825784955408825, + "grad_norm": 0.2660025656223297, + "learning_rate": 9.997462833843636e-06, + "loss": 0.0054, + "step": 34030 + }, + { + "epoch": 0.21832198644787434, + "grad_norm": 0.26283159852027893, + "learning_rate": 9.997444974341912e-06, + "loss": 0.0063, + "step": 34040 + }, + { + "epoch": 0.21838612334166044, + "grad_norm": 0.4147302806377411, + "learning_rate": 9.997427052218632e-06, + "loss": 0.0101, + "step": 34050 + }, + { + "epoch": 0.21845026023544653, + "grad_norm": 0.21125684678554535, + "learning_rate": 9.997409067474018e-06, + "loss": 0.0046, + "step": 34060 + }, + { + "epoch": 0.21851439712923262, + "grad_norm": 0.1434185951948166, + "learning_rate": 9.997391020108298e-06, + "loss": 0.0049, + "step": 34070 + }, + { + "epoch": 0.21857853402301874, + "grad_norm": 0.20431987941265106, + "learning_rate": 9.997372910121696e-06, + "loss": 0.005, + "step": 34080 + }, + { + "epoch": 0.21864267091680484, + "grad_norm": 0.3831230700016022, + "learning_rate": 9.99735473751444e-06, + "loss": 0.0045, + "step": 34090 + }, + { + "epoch": 0.21870680781059093, + "grad_norm": 0.3873409330844879, + "learning_rate": 9.997336502286758e-06, + "loss": 0.0055, + "step": 34100 + }, + { + "epoch": 0.21877094470437702, + "grad_norm": 0.47922199964523315, + "learning_rate": 9.997318204438879e-06, + "loss": 0.0086, + "step": 34110 + }, + { + "epoch": 0.21883508159816312, + "grad_norm": 0.19254538416862488, + "learning_rate": 9.997299843971029e-06, + "loss": 0.005, + "step": 34120 + }, + { + "epoch": 0.2188992184919492, + "grad_norm": 0.28071871399879456, + "learning_rate": 9.997281420883443e-06, + "loss": 0.0048, + "step": 34130 + }, + { + "epoch": 0.2189633553857353, + "grad_norm": 0.46463486552238464, + "learning_rate": 9.997262935176347e-06, + "loss": 0.0056, + "step": 34140 + }, + { + "epoch": 0.21902749227952142, + "grad_norm": 0.30839383602142334, + "learning_rate": 9.997244386849976e-06, + "loss": 0.0056, + "step": 34150 + }, + { + "epoch": 0.21909162917330752, + "grad_norm": 0.3349360227584839, + "learning_rate": 9.997225775904561e-06, + "loss": 0.005, + "step": 34160 + }, + { + "epoch": 0.2191557660670936, + "grad_norm": 0.24762451648712158, + "learning_rate": 9.997207102340336e-06, + "loss": 0.0065, + "step": 34170 + }, + { + "epoch": 0.2192199029608797, + "grad_norm": 0.26319772005081177, + "learning_rate": 9.997188366157536e-06, + "loss": 0.006, + "step": 34180 + }, + { + "epoch": 0.2192840398546658, + "grad_norm": 0.4329639673233032, + "learning_rate": 9.997169567356392e-06, + "loss": 0.0053, + "step": 34190 + }, + { + "epoch": 0.2193481767484519, + "grad_norm": 0.23011185228824615, + "learning_rate": 9.997150705937141e-06, + "loss": 0.0046, + "step": 34200 + }, + { + "epoch": 0.21941231364223798, + "grad_norm": 0.19329330325126648, + "learning_rate": 9.997131781900023e-06, + "loss": 0.0125, + "step": 34210 + }, + { + "epoch": 0.2194764505360241, + "grad_norm": 0.07114830613136292, + "learning_rate": 9.997112795245272e-06, + "loss": 0.0054, + "step": 34220 + }, + { + "epoch": 0.2195405874298102, + "grad_norm": 0.40303030610084534, + "learning_rate": 9.997093745973125e-06, + "loss": 0.0082, + "step": 34230 + }, + { + "epoch": 0.2196047243235963, + "grad_norm": 0.6453074812889099, + "learning_rate": 9.997074634083823e-06, + "loss": 0.009, + "step": 34240 + }, + { + "epoch": 0.21966886121738238, + "grad_norm": 0.4790237545967102, + "learning_rate": 9.997055459577605e-06, + "loss": 0.0057, + "step": 34250 + }, + { + "epoch": 0.21973299811116848, + "grad_norm": 0.36487022042274475, + "learning_rate": 9.997036222454709e-06, + "loss": 0.0069, + "step": 34260 + }, + { + "epoch": 0.21979713500495457, + "grad_norm": 0.3129631578922272, + "learning_rate": 9.99701692271538e-06, + "loss": 0.0088, + "step": 34270 + }, + { + "epoch": 0.21986127189874066, + "grad_norm": 0.3710615634918213, + "learning_rate": 9.996997560359855e-06, + "loss": 0.0067, + "step": 34280 + }, + { + "epoch": 0.21992540879252676, + "grad_norm": 0.2760692238807678, + "learning_rate": 9.99697813538838e-06, + "loss": 0.0092, + "step": 34290 + }, + { + "epoch": 0.21998954568631288, + "grad_norm": 0.3143475651741028, + "learning_rate": 9.996958647801199e-06, + "loss": 0.0065, + "step": 34300 + }, + { + "epoch": 0.22005368258009897, + "grad_norm": 0.4855823814868927, + "learning_rate": 9.996939097598554e-06, + "loss": 0.0083, + "step": 34310 + }, + { + "epoch": 0.22011781947388506, + "grad_norm": 0.19354212284088135, + "learning_rate": 9.996919484780692e-06, + "loss": 0.0047, + "step": 34320 + }, + { + "epoch": 0.22018195636767116, + "grad_norm": 0.4365869164466858, + "learning_rate": 9.996899809347855e-06, + "loss": 0.0064, + "step": 34330 + }, + { + "epoch": 0.22024609326145725, + "grad_norm": 0.3569786250591278, + "learning_rate": 9.996880071300294e-06, + "loss": 0.006, + "step": 34340 + }, + { + "epoch": 0.22031023015524334, + "grad_norm": 0.5439212918281555, + "learning_rate": 9.996860270638255e-06, + "loss": 0.0091, + "step": 34350 + }, + { + "epoch": 0.22037436704902943, + "grad_norm": 0.22508126497268677, + "learning_rate": 9.996840407361983e-06, + "loss": 0.0086, + "step": 34360 + }, + { + "epoch": 0.22043850394281556, + "grad_norm": 0.40013641119003296, + "learning_rate": 9.996820481471734e-06, + "loss": 0.0051, + "step": 34370 + }, + { + "epoch": 0.22050264083660165, + "grad_norm": 0.10357312113046646, + "learning_rate": 9.996800492967748e-06, + "loss": 0.0048, + "step": 34380 + }, + { + "epoch": 0.22056677773038774, + "grad_norm": 0.212563619017601, + "learning_rate": 9.996780441850284e-06, + "loss": 0.0057, + "step": 34390 + }, + { + "epoch": 0.22063091462417384, + "grad_norm": 0.4786578416824341, + "learning_rate": 9.99676032811959e-06, + "loss": 0.0074, + "step": 34400 + }, + { + "epoch": 0.22069505151795993, + "grad_norm": 0.14567528665065765, + "learning_rate": 9.996740151775917e-06, + "loss": 0.0058, + "step": 34410 + }, + { + "epoch": 0.22075918841174602, + "grad_norm": 0.6325238347053528, + "learning_rate": 9.996719912819519e-06, + "loss": 0.0072, + "step": 34420 + }, + { + "epoch": 0.22082332530553211, + "grad_norm": 0.22808292508125305, + "learning_rate": 9.996699611250648e-06, + "loss": 0.0084, + "step": 34430 + }, + { + "epoch": 0.22088746219931824, + "grad_norm": 0.2220643162727356, + "learning_rate": 9.996679247069561e-06, + "loss": 0.0063, + "step": 34440 + }, + { + "epoch": 0.22095159909310433, + "grad_norm": 0.20701898634433746, + "learning_rate": 9.996658820276512e-06, + "loss": 0.0069, + "step": 34450 + }, + { + "epoch": 0.22101573598689042, + "grad_norm": 0.16193971037864685, + "learning_rate": 9.996638330871758e-06, + "loss": 0.0073, + "step": 34460 + }, + { + "epoch": 0.22107987288067651, + "grad_norm": 0.31429579854011536, + "learning_rate": 9.996617778855553e-06, + "loss": 0.0043, + "step": 34470 + }, + { + "epoch": 0.2211440097744626, + "grad_norm": 0.17158426344394684, + "learning_rate": 9.996597164228157e-06, + "loss": 0.0077, + "step": 34480 + }, + { + "epoch": 0.2212081466682487, + "grad_norm": 0.29266834259033203, + "learning_rate": 9.996576486989827e-06, + "loss": 0.0063, + "step": 34490 + }, + { + "epoch": 0.2212722835620348, + "grad_norm": 0.5624863505363464, + "learning_rate": 9.996555747140824e-06, + "loss": 0.0069, + "step": 34500 + }, + { + "epoch": 0.22133642045582091, + "grad_norm": 0.26331159472465515, + "learning_rate": 9.996534944681404e-06, + "loss": 0.0052, + "step": 34510 + }, + { + "epoch": 0.221400557349607, + "grad_norm": 0.17117565870285034, + "learning_rate": 9.996514079611832e-06, + "loss": 0.0061, + "step": 34520 + }, + { + "epoch": 0.2214646942433931, + "grad_norm": 0.20332291722297668, + "learning_rate": 9.996493151932367e-06, + "loss": 0.0054, + "step": 34530 + }, + { + "epoch": 0.2215288311371792, + "grad_norm": 0.2634843587875366, + "learning_rate": 9.996472161643273e-06, + "loss": 0.0068, + "step": 34540 + }, + { + "epoch": 0.2215929680309653, + "grad_norm": 0.4386470317840576, + "learning_rate": 9.996451108744811e-06, + "loss": 0.0041, + "step": 34550 + }, + { + "epoch": 0.22165710492475138, + "grad_norm": 0.4105231761932373, + "learning_rate": 9.996429993237245e-06, + "loss": 0.0049, + "step": 34560 + }, + { + "epoch": 0.22172124181853747, + "grad_norm": 0.2908138334751129, + "learning_rate": 9.996408815120842e-06, + "loss": 0.0061, + "step": 34570 + }, + { + "epoch": 0.2217853787123236, + "grad_norm": 0.29927143454551697, + "learning_rate": 9.996387574395863e-06, + "loss": 0.005, + "step": 34580 + }, + { + "epoch": 0.2218495156061097, + "grad_norm": 0.29443830251693726, + "learning_rate": 9.996366271062578e-06, + "loss": 0.0051, + "step": 34590 + }, + { + "epoch": 0.22191365249989578, + "grad_norm": 0.13422074913978577, + "learning_rate": 9.996344905121255e-06, + "loss": 0.0057, + "step": 34600 + }, + { + "epoch": 0.22197778939368187, + "grad_norm": 0.39134156703948975, + "learning_rate": 9.996323476572157e-06, + "loss": 0.0081, + "step": 34610 + }, + { + "epoch": 0.22204192628746797, + "grad_norm": 0.22255438566207886, + "learning_rate": 9.996301985415556e-06, + "loss": 0.0118, + "step": 34620 + }, + { + "epoch": 0.22210606318125406, + "grad_norm": 0.35640034079551697, + "learning_rate": 9.99628043165172e-06, + "loss": 0.0052, + "step": 34630 + }, + { + "epoch": 0.22217020007504015, + "grad_norm": 0.5733597874641418, + "learning_rate": 9.996258815280919e-06, + "loss": 0.0058, + "step": 34640 + }, + { + "epoch": 0.22223433696882627, + "grad_norm": 0.32103994488716125, + "learning_rate": 9.996237136303426e-06, + "loss": 0.0076, + "step": 34650 + }, + { + "epoch": 0.22229847386261237, + "grad_norm": 0.5610170960426331, + "learning_rate": 9.996215394719509e-06, + "loss": 0.0083, + "step": 34660 + }, + { + "epoch": 0.22236261075639846, + "grad_norm": 0.28157588839530945, + "learning_rate": 9.996193590529445e-06, + "loss": 0.009, + "step": 34670 + }, + { + "epoch": 0.22242674765018455, + "grad_norm": 0.379728764295578, + "learning_rate": 9.9961717237335e-06, + "loss": 0.0071, + "step": 34680 + }, + { + "epoch": 0.22249088454397065, + "grad_norm": 0.3714889585971832, + "learning_rate": 9.996149794331955e-06, + "loss": 0.0083, + "step": 34690 + }, + { + "epoch": 0.22255502143775674, + "grad_norm": 0.1807042509317398, + "learning_rate": 9.996127802325084e-06, + "loss": 0.0042, + "step": 34700 + }, + { + "epoch": 0.22261915833154283, + "grad_norm": 0.5503183007240295, + "learning_rate": 9.996105747713158e-06, + "loss": 0.01, + "step": 34710 + }, + { + "epoch": 0.22268329522532895, + "grad_norm": 0.39598900079727173, + "learning_rate": 9.99608363049646e-06, + "loss": 0.0087, + "step": 34720 + }, + { + "epoch": 0.22274743211911505, + "grad_norm": 0.4356114864349365, + "learning_rate": 9.996061450675261e-06, + "loss": 0.006, + "step": 34730 + }, + { + "epoch": 0.22281156901290114, + "grad_norm": 0.15179894864559174, + "learning_rate": 9.996039208249842e-06, + "loss": 0.0079, + "step": 34740 + }, + { + "epoch": 0.22287570590668723, + "grad_norm": 0.25440022349357605, + "learning_rate": 9.99601690322048e-06, + "loss": 0.0071, + "step": 34750 + }, + { + "epoch": 0.22293984280047333, + "grad_norm": 0.18919359147548676, + "learning_rate": 9.995994535587456e-06, + "loss": 0.008, + "step": 34760 + }, + { + "epoch": 0.22300397969425942, + "grad_norm": 0.9403988122940063, + "learning_rate": 9.99597210535105e-06, + "loss": 0.0104, + "step": 34770 + }, + { + "epoch": 0.2230681165880455, + "grad_norm": 0.4549175500869751, + "learning_rate": 9.995949612511545e-06, + "loss": 0.0047, + "step": 34780 + }, + { + "epoch": 0.22313225348183163, + "grad_norm": 0.4007059335708618, + "learning_rate": 9.995927057069218e-06, + "loss": 0.0054, + "step": 34790 + }, + { + "epoch": 0.22319639037561773, + "grad_norm": 0.31075727939605713, + "learning_rate": 9.995904439024356e-06, + "loss": 0.0041, + "step": 34800 + }, + { + "epoch": 0.22326052726940382, + "grad_norm": 0.3774067163467407, + "learning_rate": 9.99588175837724e-06, + "loss": 0.0065, + "step": 34810 + }, + { + "epoch": 0.2233246641631899, + "grad_norm": 0.21462136507034302, + "learning_rate": 9.995859015128156e-06, + "loss": 0.0093, + "step": 34820 + }, + { + "epoch": 0.223388801056976, + "grad_norm": 0.2967641055583954, + "learning_rate": 9.995836209277388e-06, + "loss": 0.0073, + "step": 34830 + }, + { + "epoch": 0.2234529379507621, + "grad_norm": 0.3427407145500183, + "learning_rate": 9.99581334082522e-06, + "loss": 0.0047, + "step": 34840 + }, + { + "epoch": 0.2235170748445482, + "grad_norm": 0.2846042513847351, + "learning_rate": 9.995790409771942e-06, + "loss": 0.0053, + "step": 34850 + }, + { + "epoch": 0.2235812117383343, + "grad_norm": 0.23080459237098694, + "learning_rate": 9.99576741611784e-06, + "loss": 0.0066, + "step": 34860 + }, + { + "epoch": 0.2236453486321204, + "grad_norm": 0.41316619515419006, + "learning_rate": 9.995744359863201e-06, + "loss": 0.0075, + "step": 34870 + }, + { + "epoch": 0.2237094855259065, + "grad_norm": 0.22799259424209595, + "learning_rate": 9.995721241008315e-06, + "loss": 0.0048, + "step": 34880 + }, + { + "epoch": 0.2237736224196926, + "grad_norm": 0.3459303081035614, + "learning_rate": 9.995698059553471e-06, + "loss": 0.0037, + "step": 34890 + }, + { + "epoch": 0.22383775931347868, + "grad_norm": 0.19496068358421326, + "learning_rate": 9.995674815498961e-06, + "loss": 0.0072, + "step": 34900 + }, + { + "epoch": 0.22390189620726478, + "grad_norm": 0.162460595369339, + "learning_rate": 9.995651508845074e-06, + "loss": 0.005, + "step": 34910 + }, + { + "epoch": 0.22396603310105087, + "grad_norm": 0.3905678391456604, + "learning_rate": 9.995628139592103e-06, + "loss": 0.0067, + "step": 34920 + }, + { + "epoch": 0.224030169994837, + "grad_norm": 0.3376942574977875, + "learning_rate": 9.995604707740343e-06, + "loss": 0.0046, + "step": 34930 + }, + { + "epoch": 0.22409430688862308, + "grad_norm": 0.21789802610874176, + "learning_rate": 9.995581213290085e-06, + "loss": 0.0054, + "step": 34940 + }, + { + "epoch": 0.22415844378240918, + "grad_norm": 0.45708635449409485, + "learning_rate": 9.995557656241624e-06, + "loss": 0.01, + "step": 34950 + }, + { + "epoch": 0.22422258067619527, + "grad_norm": 0.30806267261505127, + "learning_rate": 9.995534036595256e-06, + "loss": 0.0046, + "step": 34960 + }, + { + "epoch": 0.22428671756998136, + "grad_norm": 0.40519043803215027, + "learning_rate": 9.995510354351275e-06, + "loss": 0.0075, + "step": 34970 + }, + { + "epoch": 0.22435085446376746, + "grad_norm": 0.3858379125595093, + "learning_rate": 9.995486609509978e-06, + "loss": 0.0074, + "step": 34980 + }, + { + "epoch": 0.22441499135755355, + "grad_norm": 0.1513359248638153, + "learning_rate": 9.995462802071666e-06, + "loss": 0.0048, + "step": 34990 + }, + { + "epoch": 0.22447912825133967, + "grad_norm": 0.1579079031944275, + "learning_rate": 9.995438932036634e-06, + "loss": 0.0045, + "step": 35000 + }, + { + "epoch": 0.22454326514512576, + "grad_norm": 0.38252583146095276, + "learning_rate": 9.995414999405182e-06, + "loss": 0.0082, + "step": 35010 + }, + { + "epoch": 0.22460740203891186, + "grad_norm": 0.17620162665843964, + "learning_rate": 9.99539100417761e-06, + "loss": 0.0071, + "step": 35020 + }, + { + "epoch": 0.22467153893269795, + "grad_norm": 0.4304888844490051, + "learning_rate": 9.995366946354218e-06, + "loss": 0.006, + "step": 35030 + }, + { + "epoch": 0.22473567582648404, + "grad_norm": 0.17609496414661407, + "learning_rate": 9.995342825935309e-06, + "loss": 0.0052, + "step": 35040 + }, + { + "epoch": 0.22479981272027014, + "grad_norm": 0.294415146112442, + "learning_rate": 9.995318642921183e-06, + "loss": 0.0104, + "step": 35050 + }, + { + "epoch": 0.22486394961405623, + "grad_norm": 0.3411390483379364, + "learning_rate": 9.995294397312145e-06, + "loss": 0.0052, + "step": 35060 + }, + { + "epoch": 0.22492808650784235, + "grad_norm": 0.45513299107551575, + "learning_rate": 9.995270089108498e-06, + "loss": 0.0044, + "step": 35070 + }, + { + "epoch": 0.22499222340162844, + "grad_norm": 0.15879599750041962, + "learning_rate": 9.995245718310546e-06, + "loss": 0.0041, + "step": 35080 + }, + { + "epoch": 0.22505636029541454, + "grad_norm": 0.10694967210292816, + "learning_rate": 9.995221284918596e-06, + "loss": 0.0056, + "step": 35090 + }, + { + "epoch": 0.22512049718920063, + "grad_norm": 0.2496778517961502, + "learning_rate": 9.995196788932955e-06, + "loss": 0.0086, + "step": 35100 + }, + { + "epoch": 0.22518463408298672, + "grad_norm": 0.2647297978401184, + "learning_rate": 9.995172230353924e-06, + "loss": 0.0045, + "step": 35110 + }, + { + "epoch": 0.22524877097677282, + "grad_norm": 0.5168192982673645, + "learning_rate": 9.995147609181819e-06, + "loss": 0.0061, + "step": 35120 + }, + { + "epoch": 0.2253129078705589, + "grad_norm": 0.2683510184288025, + "learning_rate": 9.995122925416943e-06, + "loss": 0.0046, + "step": 35130 + }, + { + "epoch": 0.22537704476434503, + "grad_norm": 0.2050730139017105, + "learning_rate": 9.995098179059605e-06, + "loss": 0.0069, + "step": 35140 + }, + { + "epoch": 0.22544118165813112, + "grad_norm": 0.27218306064605713, + "learning_rate": 9.99507337011012e-06, + "loss": 0.0062, + "step": 35150 + }, + { + "epoch": 0.22550531855191722, + "grad_norm": 0.08528287708759308, + "learning_rate": 9.995048498568793e-06, + "loss": 0.0041, + "step": 35160 + }, + { + "epoch": 0.2255694554457033, + "grad_norm": 0.20639532804489136, + "learning_rate": 9.99502356443594e-06, + "loss": 0.0062, + "step": 35170 + }, + { + "epoch": 0.2256335923394894, + "grad_norm": 0.15732760727405548, + "learning_rate": 9.99499856771187e-06, + "loss": 0.0036, + "step": 35180 + }, + { + "epoch": 0.2256977292332755, + "grad_norm": 0.23593586683273315, + "learning_rate": 9.994973508396901e-06, + "loss": 0.0045, + "step": 35190 + }, + { + "epoch": 0.2257618661270616, + "grad_norm": 0.3014291524887085, + "learning_rate": 9.994948386491343e-06, + "loss": 0.0069, + "step": 35200 + }, + { + "epoch": 0.2258260030208477, + "grad_norm": 0.37358778715133667, + "learning_rate": 9.99492320199551e-06, + "loss": 0.0077, + "step": 35210 + }, + { + "epoch": 0.2258901399146338, + "grad_norm": 0.09573258459568024, + "learning_rate": 9.994897954909722e-06, + "loss": 0.0079, + "step": 35220 + }, + { + "epoch": 0.2259542768084199, + "grad_norm": 0.22441881895065308, + "learning_rate": 9.994872645234293e-06, + "loss": 0.0046, + "step": 35230 + }, + { + "epoch": 0.226018413702206, + "grad_norm": 0.3367602527141571, + "learning_rate": 9.994847272969538e-06, + "loss": 0.0052, + "step": 35240 + }, + { + "epoch": 0.22608255059599208, + "grad_norm": 0.4609188735485077, + "learning_rate": 9.994821838115778e-06, + "loss": 0.0058, + "step": 35250 + }, + { + "epoch": 0.22614668748977818, + "grad_norm": 0.36705097556114197, + "learning_rate": 9.99479634067333e-06, + "loss": 0.0054, + "step": 35260 + }, + { + "epoch": 0.22621082438356427, + "grad_norm": 0.07663752138614655, + "learning_rate": 9.994770780642514e-06, + "loss": 0.0053, + "step": 35270 + }, + { + "epoch": 0.2262749612773504, + "grad_norm": 0.40321534872055054, + "learning_rate": 9.994745158023652e-06, + "loss": 0.0081, + "step": 35280 + }, + { + "epoch": 0.22633909817113648, + "grad_norm": 0.47762489318847656, + "learning_rate": 9.99471947281706e-06, + "loss": 0.0102, + "step": 35290 + }, + { + "epoch": 0.22640323506492258, + "grad_norm": 0.37097352743148804, + "learning_rate": 9.994693725023067e-06, + "loss": 0.0042, + "step": 35300 + }, + { + "epoch": 0.22646737195870867, + "grad_norm": 0.33247286081314087, + "learning_rate": 9.994667914641991e-06, + "loss": 0.0053, + "step": 35310 + }, + { + "epoch": 0.22653150885249476, + "grad_norm": 0.18313194811344147, + "learning_rate": 9.994642041674156e-06, + "loss": 0.009, + "step": 35320 + }, + { + "epoch": 0.22659564574628085, + "grad_norm": 0.12064996361732483, + "learning_rate": 9.994616106119885e-06, + "loss": 0.0034, + "step": 35330 + }, + { + "epoch": 0.22665978264006695, + "grad_norm": 0.09952157735824585, + "learning_rate": 9.994590107979508e-06, + "loss": 0.0063, + "step": 35340 + }, + { + "epoch": 0.22672391953385307, + "grad_norm": 0.3346708416938782, + "learning_rate": 9.994564047253346e-06, + "loss": 0.0068, + "step": 35350 + }, + { + "epoch": 0.22678805642763916, + "grad_norm": 0.16705653071403503, + "learning_rate": 9.994537923941725e-06, + "loss": 0.0065, + "step": 35360 + }, + { + "epoch": 0.22685219332142526, + "grad_norm": 0.24314428865909576, + "learning_rate": 9.994511738044976e-06, + "loss": 0.0067, + "step": 35370 + }, + { + "epoch": 0.22691633021521135, + "grad_norm": 0.28826290369033813, + "learning_rate": 9.994485489563425e-06, + "loss": 0.0043, + "step": 35380 + }, + { + "epoch": 0.22698046710899744, + "grad_norm": 0.2939811050891876, + "learning_rate": 9.994459178497403e-06, + "loss": 0.0053, + "step": 35390 + }, + { + "epoch": 0.22704460400278353, + "grad_norm": 0.21459046006202698, + "learning_rate": 9.994432804847235e-06, + "loss": 0.0077, + "step": 35400 + }, + { + "epoch": 0.22710874089656963, + "grad_norm": 0.3848009407520294, + "learning_rate": 9.994406368613257e-06, + "loss": 0.0071, + "step": 35410 + }, + { + "epoch": 0.22717287779035575, + "grad_norm": 0.31907474994659424, + "learning_rate": 9.994379869795797e-06, + "loss": 0.0056, + "step": 35420 + }, + { + "epoch": 0.22723701468414184, + "grad_norm": 0.6475202441215515, + "learning_rate": 9.994353308395186e-06, + "loss": 0.0059, + "step": 35430 + }, + { + "epoch": 0.22730115157792793, + "grad_norm": 0.23448844254016876, + "learning_rate": 9.99432668441176e-06, + "loss": 0.0062, + "step": 35440 + }, + { + "epoch": 0.22736528847171403, + "grad_norm": 0.18885894119739532, + "learning_rate": 9.994299997845851e-06, + "loss": 0.007, + "step": 35450 + }, + { + "epoch": 0.22742942536550012, + "grad_norm": 0.30449241399765015, + "learning_rate": 9.994273248697794e-06, + "loss": 0.0067, + "step": 35460 + }, + { + "epoch": 0.22749356225928621, + "grad_norm": 0.2622857987880707, + "learning_rate": 9.994246436967925e-06, + "loss": 0.0097, + "step": 35470 + }, + { + "epoch": 0.2275576991530723, + "grad_norm": 0.2114802598953247, + "learning_rate": 9.994219562656577e-06, + "loss": 0.0071, + "step": 35480 + }, + { + "epoch": 0.22762183604685843, + "grad_norm": 0.2560284435749054, + "learning_rate": 9.99419262576409e-06, + "loss": 0.0071, + "step": 35490 + }, + { + "epoch": 0.22768597294064452, + "grad_norm": 0.2646476626396179, + "learning_rate": 9.994165626290799e-06, + "loss": 0.008, + "step": 35500 + }, + { + "epoch": 0.22775010983443061, + "grad_norm": 0.1781512349843979, + "learning_rate": 9.994138564237044e-06, + "loss": 0.0055, + "step": 35510 + }, + { + "epoch": 0.2278142467282167, + "grad_norm": 0.18580837547779083, + "learning_rate": 9.994111439603164e-06, + "loss": 0.0045, + "step": 35520 + }, + { + "epoch": 0.2278783836220028, + "grad_norm": 0.32868054509162903, + "learning_rate": 9.994084252389497e-06, + "loss": 0.0034, + "step": 35530 + }, + { + "epoch": 0.2279425205157889, + "grad_norm": 0.3121737539768219, + "learning_rate": 9.994057002596387e-06, + "loss": 0.0075, + "step": 35540 + }, + { + "epoch": 0.228006657409575, + "grad_norm": 0.4381645917892456, + "learning_rate": 9.994029690224173e-06, + "loss": 0.0097, + "step": 35550 + }, + { + "epoch": 0.2280707943033611, + "grad_norm": 0.26858723163604736, + "learning_rate": 9.994002315273196e-06, + "loss": 0.0071, + "step": 35560 + }, + { + "epoch": 0.2281349311971472, + "grad_norm": 0.19299057126045227, + "learning_rate": 9.993974877743802e-06, + "loss": 0.0079, + "step": 35570 + }, + { + "epoch": 0.2281990680909333, + "grad_norm": 0.39440590143203735, + "learning_rate": 9.993947377636332e-06, + "loss": 0.0081, + "step": 35580 + }, + { + "epoch": 0.2282632049847194, + "grad_norm": 0.11068882048130035, + "learning_rate": 9.993919814951135e-06, + "loss": 0.0061, + "step": 35590 + }, + { + "epoch": 0.22832734187850548, + "grad_norm": 0.27898338437080383, + "learning_rate": 9.993892189688552e-06, + "loss": 0.0069, + "step": 35600 + }, + { + "epoch": 0.22839147877229157, + "grad_norm": 0.28347986936569214, + "learning_rate": 9.993864501848933e-06, + "loss": 0.004, + "step": 35610 + }, + { + "epoch": 0.22845561566607767, + "grad_norm": 0.16570374369621277, + "learning_rate": 9.99383675143262e-06, + "loss": 0.0055, + "step": 35620 + }, + { + "epoch": 0.2285197525598638, + "grad_norm": 0.6783402562141418, + "learning_rate": 9.993808938439965e-06, + "loss": 0.0062, + "step": 35630 + }, + { + "epoch": 0.22858388945364988, + "grad_norm": 0.3398296535015106, + "learning_rate": 9.993781062871316e-06, + "loss": 0.0069, + "step": 35640 + }, + { + "epoch": 0.22864802634743597, + "grad_norm": 0.3182373046875, + "learning_rate": 9.993753124727018e-06, + "loss": 0.0054, + "step": 35650 + }, + { + "epoch": 0.22871216324122207, + "grad_norm": 0.0939110517501831, + "learning_rate": 9.993725124007427e-06, + "loss": 0.0059, + "step": 35660 + }, + { + "epoch": 0.22877630013500816, + "grad_norm": 0.33174213767051697, + "learning_rate": 9.99369706071289e-06, + "loss": 0.0042, + "step": 35670 + }, + { + "epoch": 0.22884043702879425, + "grad_norm": 0.08618547767400742, + "learning_rate": 9.99366893484376e-06, + "loss": 0.0038, + "step": 35680 + }, + { + "epoch": 0.22890457392258035, + "grad_norm": 0.34387779235839844, + "learning_rate": 9.99364074640039e-06, + "loss": 0.0084, + "step": 35690 + }, + { + "epoch": 0.22896871081636647, + "grad_norm": 0.22006486356258392, + "learning_rate": 9.993612495383131e-06, + "loss": 0.0053, + "step": 35700 + }, + { + "epoch": 0.22903284771015256, + "grad_norm": 0.15135833621025085, + "learning_rate": 9.99358418179234e-06, + "loss": 0.0064, + "step": 35710 + }, + { + "epoch": 0.22909698460393865, + "grad_norm": 0.27555614709854126, + "learning_rate": 9.99355580562837e-06, + "loss": 0.0063, + "step": 35720 + }, + { + "epoch": 0.22916112149772475, + "grad_norm": 0.3538702130317688, + "learning_rate": 9.993527366891578e-06, + "loss": 0.0088, + "step": 35730 + }, + { + "epoch": 0.22922525839151084, + "grad_norm": 0.19812867045402527, + "learning_rate": 9.993498865582318e-06, + "loss": 0.0054, + "step": 35740 + }, + { + "epoch": 0.22928939528529693, + "grad_norm": 0.23928295075893402, + "learning_rate": 9.993470301700948e-06, + "loss": 0.0064, + "step": 35750 + }, + { + "epoch": 0.22935353217908303, + "grad_norm": 0.2500852346420288, + "learning_rate": 9.993441675247829e-06, + "loss": 0.0071, + "step": 35760 + }, + { + "epoch": 0.22941766907286912, + "grad_norm": 0.2701261043548584, + "learning_rate": 9.993412986223313e-06, + "loss": 0.0048, + "step": 35770 + }, + { + "epoch": 0.22948180596665524, + "grad_norm": 0.23142360150814056, + "learning_rate": 9.993384234627765e-06, + "loss": 0.0068, + "step": 35780 + }, + { + "epoch": 0.22954594286044133, + "grad_norm": 0.543775200843811, + "learning_rate": 9.993355420461546e-06, + "loss": 0.0053, + "step": 35790 + }, + { + "epoch": 0.22961007975422743, + "grad_norm": 0.17761576175689697, + "learning_rate": 9.993326543725012e-06, + "loss": 0.0041, + "step": 35800 + }, + { + "epoch": 0.22967421664801352, + "grad_norm": 0.06380312889814377, + "learning_rate": 9.99329760441853e-06, + "loss": 0.0048, + "step": 35810 + }, + { + "epoch": 0.2297383535417996, + "grad_norm": 0.23243442177772522, + "learning_rate": 9.99326860254246e-06, + "loss": 0.0064, + "step": 35820 + }, + { + "epoch": 0.2298024904355857, + "grad_norm": 0.33521875739097595, + "learning_rate": 9.993239538097164e-06, + "loss": 0.0059, + "step": 35830 + }, + { + "epoch": 0.2298666273293718, + "grad_norm": 0.14889921247959137, + "learning_rate": 9.99321041108301e-06, + "loss": 0.0028, + "step": 35840 + }, + { + "epoch": 0.22993076422315792, + "grad_norm": 0.49306491017341614, + "learning_rate": 9.993181221500358e-06, + "loss": 0.0065, + "step": 35850 + }, + { + "epoch": 0.229994901116944, + "grad_norm": 0.231283500790596, + "learning_rate": 9.99315196934958e-06, + "loss": 0.0048, + "step": 35860 + }, + { + "epoch": 0.2300590380107301, + "grad_norm": 0.10951454937458038, + "learning_rate": 9.993122654631037e-06, + "loss": 0.0086, + "step": 35870 + }, + { + "epoch": 0.2301231749045162, + "grad_norm": 0.21918267011642456, + "learning_rate": 9.993093277345101e-06, + "loss": 0.0035, + "step": 35880 + }, + { + "epoch": 0.2301873117983023, + "grad_norm": 0.26090681552886963, + "learning_rate": 9.993063837492136e-06, + "loss": 0.0068, + "step": 35890 + }, + { + "epoch": 0.23025144869208838, + "grad_norm": 0.17361082136631012, + "learning_rate": 9.993034335072514e-06, + "loss": 0.0065, + "step": 35900 + }, + { + "epoch": 0.23031558558587448, + "grad_norm": 0.20613285899162292, + "learning_rate": 9.993004770086602e-06, + "loss": 0.0073, + "step": 35910 + }, + { + "epoch": 0.2303797224796606, + "grad_norm": 0.2803209722042084, + "learning_rate": 9.992975142534771e-06, + "loss": 0.0053, + "step": 35920 + }, + { + "epoch": 0.2304438593734467, + "grad_norm": 0.16461430490016937, + "learning_rate": 9.992945452417396e-06, + "loss": 0.0068, + "step": 35930 + }, + { + "epoch": 0.23050799626723278, + "grad_norm": 0.3616570830345154, + "learning_rate": 9.992915699734843e-06, + "loss": 0.0068, + "step": 35940 + }, + { + "epoch": 0.23057213316101888, + "grad_norm": 1.1020573377609253, + "learning_rate": 9.992885884487488e-06, + "loss": 0.0089, + "step": 35950 + }, + { + "epoch": 0.23063627005480497, + "grad_norm": 0.2968498170375824, + "learning_rate": 9.992856006675706e-06, + "loss": 0.0054, + "step": 35960 + }, + { + "epoch": 0.23070040694859106, + "grad_norm": 0.8094943761825562, + "learning_rate": 9.99282606629987e-06, + "loss": 0.0075, + "step": 35970 + }, + { + "epoch": 0.23076454384237716, + "grad_norm": 0.528534471988678, + "learning_rate": 9.992796063360354e-06, + "loss": 0.0042, + "step": 35980 + }, + { + "epoch": 0.23082868073616328, + "grad_norm": 0.27213770151138306, + "learning_rate": 9.992765997857536e-06, + "loss": 0.0111, + "step": 35990 + }, + { + "epoch": 0.23089281762994937, + "grad_norm": 0.3508973717689514, + "learning_rate": 9.992735869791792e-06, + "loss": 0.0102, + "step": 36000 + }, + { + "epoch": 0.23095695452373546, + "grad_norm": 0.2607177495956421, + "learning_rate": 9.9927056791635e-06, + "loss": 0.0056, + "step": 36010 + }, + { + "epoch": 0.23102109141752156, + "grad_norm": 0.2572328746318817, + "learning_rate": 9.992675425973035e-06, + "loss": 0.0059, + "step": 36020 + }, + { + "epoch": 0.23108522831130765, + "grad_norm": 0.10305281728506088, + "learning_rate": 9.99264511022078e-06, + "loss": 0.0068, + "step": 36030 + }, + { + "epoch": 0.23114936520509374, + "grad_norm": 0.29231107234954834, + "learning_rate": 9.992614731907114e-06, + "loss": 0.0048, + "step": 36040 + }, + { + "epoch": 0.23121350209887984, + "grad_norm": 0.4050886332988739, + "learning_rate": 9.992584291032417e-06, + "loss": 0.0056, + "step": 36050 + }, + { + "epoch": 0.23127763899266596, + "grad_norm": 0.7118836641311646, + "learning_rate": 9.99255378759707e-06, + "loss": 0.0133, + "step": 36060 + }, + { + "epoch": 0.23134177588645205, + "grad_norm": 0.22194357216358185, + "learning_rate": 9.992523221601457e-06, + "loss": 0.0057, + "step": 36070 + }, + { + "epoch": 0.23140591278023814, + "grad_norm": 0.14988495409488678, + "learning_rate": 9.992492593045958e-06, + "loss": 0.0079, + "step": 36080 + }, + { + "epoch": 0.23147004967402424, + "grad_norm": 0.24507054686546326, + "learning_rate": 9.992461901930961e-06, + "loss": 0.0064, + "step": 36090 + }, + { + "epoch": 0.23153418656781033, + "grad_norm": 0.14381228387355804, + "learning_rate": 9.992431148256847e-06, + "loss": 0.007, + "step": 36100 + }, + { + "epoch": 0.23159832346159642, + "grad_norm": 0.5826447606086731, + "learning_rate": 9.992400332024004e-06, + "loss": 0.0114, + "step": 36110 + }, + { + "epoch": 0.23166246035538252, + "grad_norm": 0.30199071764945984, + "learning_rate": 9.992369453232815e-06, + "loss": 0.0066, + "step": 36120 + }, + { + "epoch": 0.23172659724916864, + "grad_norm": 0.41684022545814514, + "learning_rate": 9.99233851188367e-06, + "loss": 0.0038, + "step": 36130 + }, + { + "epoch": 0.23179073414295473, + "grad_norm": 0.10940320044755936, + "learning_rate": 9.992307507976955e-06, + "loss": 0.0084, + "step": 36140 + }, + { + "epoch": 0.23185487103674082, + "grad_norm": 0.41128668189048767, + "learning_rate": 9.992276441513061e-06, + "loss": 0.0064, + "step": 36150 + }, + { + "epoch": 0.23191900793052692, + "grad_norm": 0.6398462653160095, + "learning_rate": 9.992245312492372e-06, + "loss": 0.009, + "step": 36160 + }, + { + "epoch": 0.231983144824313, + "grad_norm": 0.5770807266235352, + "learning_rate": 9.992214120915282e-06, + "loss": 0.0055, + "step": 36170 + }, + { + "epoch": 0.2320472817180991, + "grad_norm": 0.46604737639427185, + "learning_rate": 9.992182866782183e-06, + "loss": 0.008, + "step": 36180 + }, + { + "epoch": 0.2321114186118852, + "grad_norm": 0.25087466835975647, + "learning_rate": 9.992151550093464e-06, + "loss": 0.0047, + "step": 36190 + }, + { + "epoch": 0.23217555550567132, + "grad_norm": 0.11956658959388733, + "learning_rate": 9.992120170849517e-06, + "loss": 0.0062, + "step": 36200 + }, + { + "epoch": 0.2322396923994574, + "grad_norm": 0.2919653058052063, + "learning_rate": 9.992088729050737e-06, + "loss": 0.0073, + "step": 36210 + }, + { + "epoch": 0.2323038292932435, + "grad_norm": 0.26989543437957764, + "learning_rate": 9.992057224697518e-06, + "loss": 0.0054, + "step": 36220 + }, + { + "epoch": 0.2323679661870296, + "grad_norm": 0.1296006292104721, + "learning_rate": 9.992025657790254e-06, + "loss": 0.0044, + "step": 36230 + }, + { + "epoch": 0.2324321030808157, + "grad_norm": 0.10228066891431808, + "learning_rate": 9.991994028329341e-06, + "loss": 0.0059, + "step": 36240 + }, + { + "epoch": 0.23249623997460178, + "grad_norm": 0.23078081011772156, + "learning_rate": 9.991962336315175e-06, + "loss": 0.0042, + "step": 36250 + }, + { + "epoch": 0.23256037686838787, + "grad_norm": 0.27274227142333984, + "learning_rate": 9.991930581748154e-06, + "loss": 0.0079, + "step": 36260 + }, + { + "epoch": 0.232624513762174, + "grad_norm": 0.17164266109466553, + "learning_rate": 9.991898764628674e-06, + "loss": 0.0056, + "step": 36270 + }, + { + "epoch": 0.2326886506559601, + "grad_norm": 0.1612294614315033, + "learning_rate": 9.991866884957135e-06, + "loss": 0.0055, + "step": 36280 + }, + { + "epoch": 0.23275278754974618, + "grad_norm": 0.31363794207572937, + "learning_rate": 9.991834942733936e-06, + "loss": 0.0064, + "step": 36290 + }, + { + "epoch": 0.23281692444353227, + "grad_norm": 0.3872830867767334, + "learning_rate": 9.991802937959478e-06, + "loss": 0.0058, + "step": 36300 + }, + { + "epoch": 0.23288106133731837, + "grad_norm": 0.43687549233436584, + "learning_rate": 9.991770870634163e-06, + "loss": 0.0066, + "step": 36310 + }, + { + "epoch": 0.23294519823110446, + "grad_norm": 0.5026020407676697, + "learning_rate": 9.991738740758388e-06, + "loss": 0.0066, + "step": 36320 + }, + { + "epoch": 0.23300933512489055, + "grad_norm": 0.66605144739151, + "learning_rate": 9.991706548332563e-06, + "loss": 0.0043, + "step": 36330 + }, + { + "epoch": 0.23307347201867668, + "grad_norm": 0.2193024456501007, + "learning_rate": 9.991674293357085e-06, + "loss": 0.0046, + "step": 36340 + }, + { + "epoch": 0.23313760891246277, + "grad_norm": 0.2898307144641876, + "learning_rate": 9.99164197583236e-06, + "loss": 0.0041, + "step": 36350 + }, + { + "epoch": 0.23320174580624886, + "grad_norm": 0.3485690653324127, + "learning_rate": 9.991609595758796e-06, + "loss": 0.0066, + "step": 36360 + }, + { + "epoch": 0.23326588270003495, + "grad_norm": 0.4199317991733551, + "learning_rate": 9.991577153136794e-06, + "loss": 0.0057, + "step": 36370 + }, + { + "epoch": 0.23333001959382105, + "grad_norm": 0.634312093257904, + "learning_rate": 9.991544647966765e-06, + "loss": 0.0038, + "step": 36380 + }, + { + "epoch": 0.23339415648760714, + "grad_norm": 0.32351168990135193, + "learning_rate": 9.991512080249111e-06, + "loss": 0.0055, + "step": 36390 + }, + { + "epoch": 0.23345829338139323, + "grad_norm": 0.16520749032497406, + "learning_rate": 9.991479449984246e-06, + "loss": 0.0037, + "step": 36400 + }, + { + "epoch": 0.23352243027517935, + "grad_norm": 0.4310011863708496, + "learning_rate": 9.991446757172575e-06, + "loss": 0.0052, + "step": 36410 + }, + { + "epoch": 0.23358656716896545, + "grad_norm": 0.3078899085521698, + "learning_rate": 9.99141400181451e-06, + "loss": 0.0061, + "step": 36420 + }, + { + "epoch": 0.23365070406275154, + "grad_norm": 0.49376317858695984, + "learning_rate": 9.991381183910459e-06, + "loss": 0.0059, + "step": 36430 + }, + { + "epoch": 0.23371484095653763, + "grad_norm": 0.3923306167125702, + "learning_rate": 9.991348303460834e-06, + "loss": 0.0077, + "step": 36440 + }, + { + "epoch": 0.23377897785032373, + "grad_norm": 0.25772348046302795, + "learning_rate": 9.99131536046605e-06, + "loss": 0.0044, + "step": 36450 + }, + { + "epoch": 0.23384311474410982, + "grad_norm": 0.244273841381073, + "learning_rate": 9.991282354926515e-06, + "loss": 0.0056, + "step": 36460 + }, + { + "epoch": 0.2339072516378959, + "grad_norm": 0.27689307928085327, + "learning_rate": 9.991249286842645e-06, + "loss": 0.005, + "step": 36470 + }, + { + "epoch": 0.23397138853168203, + "grad_norm": 0.14308667182922363, + "learning_rate": 9.991216156214855e-06, + "loss": 0.0068, + "step": 36480 + }, + { + "epoch": 0.23403552542546813, + "grad_norm": 0.2195776104927063, + "learning_rate": 9.991182963043558e-06, + "loss": 0.0045, + "step": 36490 + }, + { + "epoch": 0.23409966231925422, + "grad_norm": 0.20184378325939178, + "learning_rate": 9.991149707329173e-06, + "loss": 0.0088, + "step": 36500 + }, + { + "epoch": 0.2341637992130403, + "grad_norm": 0.13293959200382233, + "learning_rate": 9.991116389072115e-06, + "loss": 0.0048, + "step": 36510 + }, + { + "epoch": 0.2342279361068264, + "grad_norm": 0.10092591494321823, + "learning_rate": 9.991083008272801e-06, + "loss": 0.0081, + "step": 36520 + }, + { + "epoch": 0.2342920730006125, + "grad_norm": 0.3733716309070587, + "learning_rate": 9.99104956493165e-06, + "loss": 0.0082, + "step": 36530 + }, + { + "epoch": 0.2343562098943986, + "grad_norm": 0.5411534309387207, + "learning_rate": 9.991016059049081e-06, + "loss": 0.0125, + "step": 36540 + }, + { + "epoch": 0.2344203467881847, + "grad_norm": 0.4143863022327423, + "learning_rate": 9.990982490625514e-06, + "loss": 0.0065, + "step": 36550 + }, + { + "epoch": 0.2344844836819708, + "grad_norm": 0.15255770087242126, + "learning_rate": 9.990948859661367e-06, + "loss": 0.0066, + "step": 36560 + }, + { + "epoch": 0.2345486205757569, + "grad_norm": 0.1986321359872818, + "learning_rate": 9.990915166157067e-06, + "loss": 0.0075, + "step": 36570 + }, + { + "epoch": 0.234612757469543, + "grad_norm": 0.26919758319854736, + "learning_rate": 9.990881410113032e-06, + "loss": 0.0057, + "step": 36580 + }, + { + "epoch": 0.23467689436332909, + "grad_norm": 0.24907876551151276, + "learning_rate": 9.990847591529685e-06, + "loss": 0.0037, + "step": 36590 + }, + { + "epoch": 0.23474103125711518, + "grad_norm": 0.13349837064743042, + "learning_rate": 9.990813710407453e-06, + "loss": 0.0054, + "step": 36600 + }, + { + "epoch": 0.23480516815090127, + "grad_norm": 0.1132342666387558, + "learning_rate": 9.990779766746757e-06, + "loss": 0.0044, + "step": 36610 + }, + { + "epoch": 0.2348693050446874, + "grad_norm": 0.446804016828537, + "learning_rate": 9.990745760548023e-06, + "loss": 0.0064, + "step": 36620 + }, + { + "epoch": 0.2349334419384735, + "grad_norm": 0.40333107113838196, + "learning_rate": 9.990711691811679e-06, + "loss": 0.0064, + "step": 36630 + }, + { + "epoch": 0.23499757883225958, + "grad_norm": 0.23259688913822174, + "learning_rate": 9.990677560538149e-06, + "loss": 0.0059, + "step": 36640 + }, + { + "epoch": 0.23506171572604567, + "grad_norm": 0.3403295874595642, + "learning_rate": 9.990643366727863e-06, + "loss": 0.0042, + "step": 36650 + }, + { + "epoch": 0.23512585261983177, + "grad_norm": 0.21020762622356415, + "learning_rate": 9.99060911038125e-06, + "loss": 0.0078, + "step": 36660 + }, + { + "epoch": 0.23518998951361786, + "grad_norm": 0.170172318816185, + "learning_rate": 9.990574791498737e-06, + "loss": 0.0073, + "step": 36670 + }, + { + "epoch": 0.23525412640740395, + "grad_norm": 0.17274317145347595, + "learning_rate": 9.990540410080755e-06, + "loss": 0.0051, + "step": 36680 + }, + { + "epoch": 0.23531826330119007, + "grad_norm": 0.1390146017074585, + "learning_rate": 9.990505966127735e-06, + "loss": 0.0067, + "step": 36690 + }, + { + "epoch": 0.23538240019497617, + "grad_norm": 0.34374529123306274, + "learning_rate": 9.990471459640109e-06, + "loss": 0.0078, + "step": 36700 + }, + { + "epoch": 0.23544653708876226, + "grad_norm": 0.4592181146144867, + "learning_rate": 9.99043689061831e-06, + "loss": 0.0063, + "step": 36710 + }, + { + "epoch": 0.23551067398254835, + "grad_norm": 0.3880027234554291, + "learning_rate": 9.99040225906277e-06, + "loss": 0.0054, + "step": 36720 + }, + { + "epoch": 0.23557481087633445, + "grad_norm": 0.11227881163358688, + "learning_rate": 9.990367564973922e-06, + "loss": 0.0058, + "step": 36730 + }, + { + "epoch": 0.23563894777012054, + "grad_norm": 0.3381422460079193, + "learning_rate": 9.990332808352203e-06, + "loss": 0.0056, + "step": 36740 + }, + { + "epoch": 0.23570308466390663, + "grad_norm": 0.18786120414733887, + "learning_rate": 9.990297989198046e-06, + "loss": 0.0043, + "step": 36750 + }, + { + "epoch": 0.23576722155769275, + "grad_norm": 0.2107134759426117, + "learning_rate": 9.990263107511888e-06, + "loss": 0.007, + "step": 36760 + }, + { + "epoch": 0.23583135845147885, + "grad_norm": 0.22882787883281708, + "learning_rate": 9.990228163294169e-06, + "loss": 0.0057, + "step": 36770 + }, + { + "epoch": 0.23589549534526494, + "grad_norm": 0.28724777698516846, + "learning_rate": 9.990193156545323e-06, + "loss": 0.0055, + "step": 36780 + }, + { + "epoch": 0.23595963223905103, + "grad_norm": 0.45361918210983276, + "learning_rate": 9.990158087265791e-06, + "loss": 0.006, + "step": 36790 + }, + { + "epoch": 0.23602376913283712, + "grad_norm": 0.23359541594982147, + "learning_rate": 9.990122955456012e-06, + "loss": 0.0086, + "step": 36800 + }, + { + "epoch": 0.23608790602662322, + "grad_norm": 0.1540917605161667, + "learning_rate": 9.990087761116426e-06, + "loss": 0.0054, + "step": 36810 + }, + { + "epoch": 0.2361520429204093, + "grad_norm": 0.37155643105506897, + "learning_rate": 9.990052504247473e-06, + "loss": 0.0054, + "step": 36820 + }, + { + "epoch": 0.23621617981419543, + "grad_norm": 0.07868891954421997, + "learning_rate": 9.990017184849597e-06, + "loss": 0.0045, + "step": 36830 + }, + { + "epoch": 0.23628031670798152, + "grad_norm": 0.1814006119966507, + "learning_rate": 9.989981802923238e-06, + "loss": 0.0055, + "step": 36840 + }, + { + "epoch": 0.23634445360176762, + "grad_norm": 0.3288971483707428, + "learning_rate": 9.989946358468843e-06, + "loss": 0.0071, + "step": 36850 + }, + { + "epoch": 0.2364085904955537, + "grad_norm": 0.25540879368782043, + "learning_rate": 9.989910851486853e-06, + "loss": 0.0047, + "step": 36860 + }, + { + "epoch": 0.2364727273893398, + "grad_norm": 0.18015730381011963, + "learning_rate": 9.989875281977713e-06, + "loss": 0.0057, + "step": 36870 + }, + { + "epoch": 0.2365368642831259, + "grad_norm": 0.26143142580986023, + "learning_rate": 9.989839649941867e-06, + "loss": 0.0071, + "step": 36880 + }, + { + "epoch": 0.236601001176912, + "grad_norm": 0.16240955889225006, + "learning_rate": 9.98980395537977e-06, + "loss": 0.0087, + "step": 36890 + }, + { + "epoch": 0.2366651380706981, + "grad_norm": 0.3834659159183502, + "learning_rate": 9.989768198291858e-06, + "loss": 0.0082, + "step": 36900 + }, + { + "epoch": 0.2367292749644842, + "grad_norm": 0.16273871064186096, + "learning_rate": 9.989732378678587e-06, + "loss": 0.0035, + "step": 36910 + }, + { + "epoch": 0.2367934118582703, + "grad_norm": 0.2915313243865967, + "learning_rate": 9.989696496540401e-06, + "loss": 0.0065, + "step": 36920 + }, + { + "epoch": 0.2368575487520564, + "grad_norm": 0.2651917338371277, + "learning_rate": 9.989660551877755e-06, + "loss": 0.0044, + "step": 36930 + }, + { + "epoch": 0.23692168564584248, + "grad_norm": 0.312575101852417, + "learning_rate": 9.989624544691095e-06, + "loss": 0.007, + "step": 36940 + }, + { + "epoch": 0.23698582253962858, + "grad_norm": 0.049576129764318466, + "learning_rate": 9.989588474980873e-06, + "loss": 0.0068, + "step": 36950 + }, + { + "epoch": 0.23704995943341467, + "grad_norm": 0.13811209797859192, + "learning_rate": 9.989552342747542e-06, + "loss": 0.0035, + "step": 36960 + }, + { + "epoch": 0.2371140963272008, + "grad_norm": 0.1147325187921524, + "learning_rate": 9.989516147991552e-06, + "loss": 0.0187, + "step": 36970 + }, + { + "epoch": 0.23717823322098688, + "grad_norm": 0.4486019015312195, + "learning_rate": 9.989479890713361e-06, + "loss": 0.0066, + "step": 36980 + }, + { + "epoch": 0.23724237011477298, + "grad_norm": 0.13639159500598907, + "learning_rate": 9.989443570913419e-06, + "loss": 0.0043, + "step": 36990 + }, + { + "epoch": 0.23730650700855907, + "grad_norm": 0.531525731086731, + "learning_rate": 9.989407188592186e-06, + "loss": 0.0057, + "step": 37000 + }, + { + "epoch": 0.23737064390234516, + "grad_norm": 0.34439024329185486, + "learning_rate": 9.989370743750113e-06, + "loss": 0.0063, + "step": 37010 + }, + { + "epoch": 0.23743478079613126, + "grad_norm": 0.36032649874687195, + "learning_rate": 9.98933423638766e-06, + "loss": 0.0045, + "step": 37020 + }, + { + "epoch": 0.23749891768991735, + "grad_norm": 0.3823663294315338, + "learning_rate": 9.989297666505282e-06, + "loss": 0.0056, + "step": 37030 + }, + { + "epoch": 0.23756305458370347, + "grad_norm": 0.2125890702009201, + "learning_rate": 9.98926103410344e-06, + "loss": 0.0049, + "step": 37040 + }, + { + "epoch": 0.23762719147748956, + "grad_norm": 0.32678574323654175, + "learning_rate": 9.98922433918259e-06, + "loss": 0.0053, + "step": 37050 + }, + { + "epoch": 0.23769132837127566, + "grad_norm": 0.20189277827739716, + "learning_rate": 9.989187581743194e-06, + "loss": 0.0038, + "step": 37060 + }, + { + "epoch": 0.23775546526506175, + "grad_norm": 0.25536254048347473, + "learning_rate": 9.98915076178571e-06, + "loss": 0.0069, + "step": 37070 + }, + { + "epoch": 0.23781960215884784, + "grad_norm": 0.441403865814209, + "learning_rate": 9.989113879310604e-06, + "loss": 0.0056, + "step": 37080 + }, + { + "epoch": 0.23788373905263394, + "grad_norm": 0.31718453764915466, + "learning_rate": 9.989076934318334e-06, + "loss": 0.004, + "step": 37090 + }, + { + "epoch": 0.23794787594642003, + "grad_norm": 0.4428984224796295, + "learning_rate": 9.989039926809363e-06, + "loss": 0.0036, + "step": 37100 + }, + { + "epoch": 0.23801201284020615, + "grad_norm": 0.05789393559098244, + "learning_rate": 9.989002856784159e-06, + "loss": 0.0023, + "step": 37110 + }, + { + "epoch": 0.23807614973399224, + "grad_norm": 0.49261847138404846, + "learning_rate": 9.988965724243183e-06, + "loss": 0.0056, + "step": 37120 + }, + { + "epoch": 0.23814028662777834, + "grad_norm": 0.36797863245010376, + "learning_rate": 9.9889285291869e-06, + "loss": 0.0059, + "step": 37130 + }, + { + "epoch": 0.23820442352156443, + "grad_norm": 0.2522566020488739, + "learning_rate": 9.988891271615777e-06, + "loss": 0.0062, + "step": 37140 + }, + { + "epoch": 0.23826856041535052, + "grad_norm": 0.43269291520118713, + "learning_rate": 9.98885395153028e-06, + "loss": 0.0061, + "step": 37150 + }, + { + "epoch": 0.23833269730913662, + "grad_norm": 0.3380693197250366, + "learning_rate": 9.988816568930879e-06, + "loss": 0.0083, + "step": 37160 + }, + { + "epoch": 0.2383968342029227, + "grad_norm": 0.1102917343378067, + "learning_rate": 9.98877912381804e-06, + "loss": 0.0096, + "step": 37170 + }, + { + "epoch": 0.2384609710967088, + "grad_norm": 0.1320537030696869, + "learning_rate": 9.988741616192234e-06, + "loss": 0.0099, + "step": 37180 + }, + { + "epoch": 0.23852510799049492, + "grad_norm": 0.24336595833301544, + "learning_rate": 9.98870404605393e-06, + "loss": 0.006, + "step": 37190 + }, + { + "epoch": 0.23858924488428102, + "grad_norm": 0.2764676511287689, + "learning_rate": 9.9886664134036e-06, + "loss": 0.0082, + "step": 37200 + }, + { + "epoch": 0.2386533817780671, + "grad_norm": 0.24236895143985748, + "learning_rate": 9.988628718241712e-06, + "loss": 0.0063, + "step": 37210 + }, + { + "epoch": 0.2387175186718532, + "grad_norm": 0.2600942552089691, + "learning_rate": 9.988590960568743e-06, + "loss": 0.0059, + "step": 37220 + }, + { + "epoch": 0.2387816555656393, + "grad_norm": 0.1426173597574234, + "learning_rate": 9.988553140385163e-06, + "loss": 0.0048, + "step": 37230 + }, + { + "epoch": 0.2388457924594254, + "grad_norm": 0.31119289994239807, + "learning_rate": 9.988515257691446e-06, + "loss": 0.009, + "step": 37240 + }, + { + "epoch": 0.23890992935321148, + "grad_norm": 0.26968684792518616, + "learning_rate": 9.988477312488069e-06, + "loss": 0.004, + "step": 37250 + }, + { + "epoch": 0.2389740662469976, + "grad_norm": 0.3957492411136627, + "learning_rate": 9.988439304775506e-06, + "loss": 0.0045, + "step": 37260 + }, + { + "epoch": 0.2390382031407837, + "grad_norm": 0.10733895003795624, + "learning_rate": 9.988401234554233e-06, + "loss": 0.0042, + "step": 37270 + }, + { + "epoch": 0.2391023400345698, + "grad_norm": 0.3841085135936737, + "learning_rate": 9.988363101824727e-06, + "loss": 0.0053, + "step": 37280 + }, + { + "epoch": 0.23916647692835588, + "grad_norm": 0.33194515109062195, + "learning_rate": 9.988324906587467e-06, + "loss": 0.0049, + "step": 37290 + }, + { + "epoch": 0.23923061382214197, + "grad_norm": 0.4566282033920288, + "learning_rate": 9.98828664884293e-06, + "loss": 0.0054, + "step": 37300 + }, + { + "epoch": 0.23929475071592807, + "grad_norm": 0.17479823529720306, + "learning_rate": 9.988248328591597e-06, + "loss": 0.0058, + "step": 37310 + }, + { + "epoch": 0.23935888760971416, + "grad_norm": 0.2093168944120407, + "learning_rate": 9.988209945833947e-06, + "loss": 0.0079, + "step": 37320 + }, + { + "epoch": 0.23942302450350028, + "grad_norm": 0.511039137840271, + "learning_rate": 9.98817150057046e-06, + "loss": 0.0047, + "step": 37330 + }, + { + "epoch": 0.23948716139728637, + "grad_norm": 0.2602264881134033, + "learning_rate": 9.988132992801623e-06, + "loss": 0.0076, + "step": 37340 + }, + { + "epoch": 0.23955129829107247, + "grad_norm": 0.2345946729183197, + "learning_rate": 9.98809442252791e-06, + "loss": 0.0039, + "step": 37350 + }, + { + "epoch": 0.23961543518485856, + "grad_norm": 0.3673745095729828, + "learning_rate": 9.98805578974981e-06, + "loss": 0.0059, + "step": 37360 + }, + { + "epoch": 0.23967957207864465, + "grad_norm": 0.424146831035614, + "learning_rate": 9.988017094467809e-06, + "loss": 0.0044, + "step": 37370 + }, + { + "epoch": 0.23974370897243075, + "grad_norm": 0.13633568584918976, + "learning_rate": 9.987978336682388e-06, + "loss": 0.007, + "step": 37380 + }, + { + "epoch": 0.23980784586621684, + "grad_norm": 0.20849905908107758, + "learning_rate": 9.987939516394032e-06, + "loss": 0.0048, + "step": 37390 + }, + { + "epoch": 0.23987198276000296, + "grad_norm": 0.3548513352870941, + "learning_rate": 9.98790063360323e-06, + "loss": 0.0059, + "step": 37400 + }, + { + "epoch": 0.23993611965378905, + "grad_norm": 0.5116329193115234, + "learning_rate": 9.987861688310467e-06, + "loss": 0.008, + "step": 37410 + }, + { + "epoch": 0.24000025654757515, + "grad_norm": 0.28186020255088806, + "learning_rate": 9.987822680516232e-06, + "loss": 0.0035, + "step": 37420 + }, + { + "epoch": 0.24006439344136124, + "grad_norm": 0.24905213713645935, + "learning_rate": 9.987783610221015e-06, + "loss": 0.0054, + "step": 37430 + }, + { + "epoch": 0.24012853033514733, + "grad_norm": 0.15364207327365875, + "learning_rate": 9.987744477425306e-06, + "loss": 0.0055, + "step": 37440 + }, + { + "epoch": 0.24019266722893343, + "grad_norm": 0.24339143931865692, + "learning_rate": 9.987705282129593e-06, + "loss": 0.0077, + "step": 37450 + }, + { + "epoch": 0.24025680412271952, + "grad_norm": 0.07584648579359055, + "learning_rate": 9.987666024334368e-06, + "loss": 0.008, + "step": 37460 + }, + { + "epoch": 0.24032094101650564, + "grad_norm": 0.24445678293704987, + "learning_rate": 9.987626704040123e-06, + "loss": 0.0044, + "step": 37470 + }, + { + "epoch": 0.24038507791029173, + "grad_norm": 0.1876668930053711, + "learning_rate": 9.98758732124735e-06, + "loss": 0.0048, + "step": 37480 + }, + { + "epoch": 0.24044921480407783, + "grad_norm": 0.3630131185054779, + "learning_rate": 9.987547875956545e-06, + "loss": 0.0053, + "step": 37490 + }, + { + "epoch": 0.24051335169786392, + "grad_norm": 0.2916017174720764, + "learning_rate": 9.987508368168199e-06, + "loss": 0.0075, + "step": 37500 + }, + { + "epoch": 0.24057748859165, + "grad_norm": 0.5088058710098267, + "learning_rate": 9.98746879788281e-06, + "loss": 0.0088, + "step": 37510 + }, + { + "epoch": 0.2406416254854361, + "grad_norm": 0.5135015249252319, + "learning_rate": 9.987429165100873e-06, + "loss": 0.0065, + "step": 37520 + }, + { + "epoch": 0.2407057623792222, + "grad_norm": 0.36910855770111084, + "learning_rate": 9.98738946982288e-06, + "loss": 0.0084, + "step": 37530 + }, + { + "epoch": 0.24076989927300832, + "grad_norm": 0.20661070942878723, + "learning_rate": 9.987349712049337e-06, + "loss": 0.006, + "step": 37540 + }, + { + "epoch": 0.2408340361667944, + "grad_norm": 0.21455512940883636, + "learning_rate": 9.987309891780737e-06, + "loss": 0.0055, + "step": 37550 + }, + { + "epoch": 0.2408981730605805, + "grad_norm": 0.24813826382160187, + "learning_rate": 9.987270009017578e-06, + "loss": 0.0039, + "step": 37560 + }, + { + "epoch": 0.2409623099543666, + "grad_norm": 0.1844363510608673, + "learning_rate": 9.987230063760363e-06, + "loss": 0.0058, + "step": 37570 + }, + { + "epoch": 0.2410264468481527, + "grad_norm": 0.5201581120491028, + "learning_rate": 9.98719005600959e-06, + "loss": 0.0052, + "step": 37580 + }, + { + "epoch": 0.24109058374193879, + "grad_norm": 0.33404937386512756, + "learning_rate": 9.987149985765762e-06, + "loss": 0.006, + "step": 37590 + }, + { + "epoch": 0.24115472063572488, + "grad_norm": 0.18592841923236847, + "learning_rate": 9.987109853029379e-06, + "loss": 0.0107, + "step": 37600 + }, + { + "epoch": 0.241218857529511, + "grad_norm": 0.3620583117008209, + "learning_rate": 9.987069657800947e-06, + "loss": 0.0051, + "step": 37610 + }, + { + "epoch": 0.2412829944232971, + "grad_norm": 0.4495629668235779, + "learning_rate": 9.987029400080968e-06, + "loss": 0.0055, + "step": 37620 + }, + { + "epoch": 0.24134713131708319, + "grad_norm": 0.17936894297599792, + "learning_rate": 9.986989079869944e-06, + "loss": 0.0066, + "step": 37630 + }, + { + "epoch": 0.24141126821086928, + "grad_norm": 0.0979299545288086, + "learning_rate": 9.986948697168386e-06, + "loss": 0.0048, + "step": 37640 + }, + { + "epoch": 0.24147540510465537, + "grad_norm": 0.2822265625, + "learning_rate": 9.986908251976794e-06, + "loss": 0.0063, + "step": 37650 + }, + { + "epoch": 0.24153954199844146, + "grad_norm": 0.360325425863266, + "learning_rate": 9.986867744295678e-06, + "loss": 0.0076, + "step": 37660 + }, + { + "epoch": 0.24160367889222756, + "grad_norm": 0.3158207833766937, + "learning_rate": 9.986827174125547e-06, + "loss": 0.0041, + "step": 37670 + }, + { + "epoch": 0.24166781578601368, + "grad_norm": 0.203022301197052, + "learning_rate": 9.986786541466907e-06, + "loss": 0.0039, + "step": 37680 + }, + { + "epoch": 0.24173195267979977, + "grad_norm": 0.09906020760536194, + "learning_rate": 9.986745846320266e-06, + "loss": 0.0051, + "step": 37690 + }, + { + "epoch": 0.24179608957358587, + "grad_norm": 0.29850175976753235, + "learning_rate": 9.986705088686136e-06, + "loss": 0.0055, + "step": 37700 + }, + { + "epoch": 0.24186022646737196, + "grad_norm": 0.2408210039138794, + "learning_rate": 9.986664268565027e-06, + "loss": 0.0064, + "step": 37710 + }, + { + "epoch": 0.24192436336115805, + "grad_norm": 0.21141502261161804, + "learning_rate": 9.986623385957452e-06, + "loss": 0.0045, + "step": 37720 + }, + { + "epoch": 0.24198850025494414, + "grad_norm": 0.26400238275527954, + "learning_rate": 9.986582440863923e-06, + "loss": 0.0048, + "step": 37730 + }, + { + "epoch": 0.24205263714873024, + "grad_norm": 0.33857569098472595, + "learning_rate": 9.98654143328495e-06, + "loss": 0.0039, + "step": 37740 + }, + { + "epoch": 0.24211677404251636, + "grad_norm": 0.25933921337127686, + "learning_rate": 9.98650036322105e-06, + "loss": 0.0041, + "step": 37750 + }, + { + "epoch": 0.24218091093630245, + "grad_norm": 0.16440115869045258, + "learning_rate": 9.986459230672736e-06, + "loss": 0.0039, + "step": 37760 + }, + { + "epoch": 0.24224504783008854, + "grad_norm": 0.11378602683544159, + "learning_rate": 9.986418035640523e-06, + "loss": 0.0072, + "step": 37770 + }, + { + "epoch": 0.24230918472387464, + "grad_norm": 0.1978156566619873, + "learning_rate": 9.98637677812493e-06, + "loss": 0.0037, + "step": 37780 + }, + { + "epoch": 0.24237332161766073, + "grad_norm": 0.24849757552146912, + "learning_rate": 9.986335458126472e-06, + "loss": 0.006, + "step": 37790 + }, + { + "epoch": 0.24243745851144682, + "grad_norm": 0.25591734051704407, + "learning_rate": 9.986294075645668e-06, + "loss": 0.0057, + "step": 37800 + }, + { + "epoch": 0.24250159540523292, + "grad_norm": 0.10914437472820282, + "learning_rate": 9.986252630683035e-06, + "loss": 0.005, + "step": 37810 + }, + { + "epoch": 0.24256573229901904, + "grad_norm": 0.33318716287612915, + "learning_rate": 9.986211123239092e-06, + "loss": 0.0077, + "step": 37820 + }, + { + "epoch": 0.24262986919280513, + "grad_norm": 0.20896196365356445, + "learning_rate": 9.98616955331436e-06, + "loss": 0.004, + "step": 37830 + }, + { + "epoch": 0.24269400608659122, + "grad_norm": 0.525875449180603, + "learning_rate": 9.986127920909361e-06, + "loss": 0.0115, + "step": 37840 + }, + { + "epoch": 0.24275814298037732, + "grad_norm": 0.25195562839508057, + "learning_rate": 9.986086226024615e-06, + "loss": 0.0113, + "step": 37850 + }, + { + "epoch": 0.2428222798741634, + "grad_norm": 0.48433178663253784, + "learning_rate": 9.986044468660646e-06, + "loss": 0.007, + "step": 37860 + }, + { + "epoch": 0.2428864167679495, + "grad_norm": 0.19795604050159454, + "learning_rate": 9.986002648817976e-06, + "loss": 0.0052, + "step": 37870 + }, + { + "epoch": 0.2429505536617356, + "grad_norm": 0.1475193351507187, + "learning_rate": 9.985960766497129e-06, + "loss": 0.006, + "step": 37880 + }, + { + "epoch": 0.24301469055552172, + "grad_norm": 0.1122908964753151, + "learning_rate": 9.98591882169863e-06, + "loss": 0.0049, + "step": 37890 + }, + { + "epoch": 0.2430788274493078, + "grad_norm": 0.22580060362815857, + "learning_rate": 9.985876814423005e-06, + "loss": 0.0068, + "step": 37900 + }, + { + "epoch": 0.2431429643430939, + "grad_norm": 0.17058011889457703, + "learning_rate": 9.985834744670779e-06, + "loss": 0.0056, + "step": 37910 + }, + { + "epoch": 0.24320710123688, + "grad_norm": 0.46447932720184326, + "learning_rate": 9.985792612442482e-06, + "loss": 0.0087, + "step": 37920 + }, + { + "epoch": 0.2432712381306661, + "grad_norm": 0.07212353497743607, + "learning_rate": 9.98575041773864e-06, + "loss": 0.0051, + "step": 37930 + }, + { + "epoch": 0.24333537502445218, + "grad_norm": 0.2533189356327057, + "learning_rate": 9.985708160559781e-06, + "loss": 0.0063, + "step": 37940 + }, + { + "epoch": 0.24339951191823828, + "grad_norm": 0.2570257782936096, + "learning_rate": 9.985665840906437e-06, + "loss": 0.0099, + "step": 37950 + }, + { + "epoch": 0.2434636488120244, + "grad_norm": 0.2666739821434021, + "learning_rate": 9.985623458779136e-06, + "loss": 0.0043, + "step": 37960 + }, + { + "epoch": 0.2435277857058105, + "grad_norm": 0.31614717841148376, + "learning_rate": 9.98558101417841e-06, + "loss": 0.0043, + "step": 37970 + }, + { + "epoch": 0.24359192259959658, + "grad_norm": 0.4189772605895996, + "learning_rate": 9.98553850710479e-06, + "loss": 0.0085, + "step": 37980 + }, + { + "epoch": 0.24365605949338268, + "grad_norm": 0.07213608175516129, + "learning_rate": 9.98549593755881e-06, + "loss": 0.0079, + "step": 37990 + }, + { + "epoch": 0.24372019638716877, + "grad_norm": 0.45046406984329224, + "learning_rate": 9.985453305541002e-06, + "loss": 0.0059, + "step": 38000 + }, + { + "epoch": 0.24378433328095486, + "grad_norm": 0.2924140393733978, + "learning_rate": 9.985410611051903e-06, + "loss": 0.0068, + "step": 38010 + }, + { + "epoch": 0.24384847017474096, + "grad_norm": 0.13637924194335938, + "learning_rate": 9.985367854092044e-06, + "loss": 0.0043, + "step": 38020 + }, + { + "epoch": 0.24391260706852708, + "grad_norm": 0.07379632443189621, + "learning_rate": 9.985325034661965e-06, + "loss": 0.0041, + "step": 38030 + }, + { + "epoch": 0.24397674396231317, + "grad_norm": 0.14553886651992798, + "learning_rate": 9.985282152762199e-06, + "loss": 0.0055, + "step": 38040 + }, + { + "epoch": 0.24404088085609926, + "grad_norm": 0.12040738761425018, + "learning_rate": 9.985239208393285e-06, + "loss": 0.0041, + "step": 38050 + }, + { + "epoch": 0.24410501774988536, + "grad_norm": 0.3329266905784607, + "learning_rate": 9.98519620155576e-06, + "loss": 0.0095, + "step": 38060 + }, + { + "epoch": 0.24416915464367145, + "grad_norm": 0.2862496078014374, + "learning_rate": 9.985153132250165e-06, + "loss": 0.0057, + "step": 38070 + }, + { + "epoch": 0.24423329153745754, + "grad_norm": 0.16985680162906647, + "learning_rate": 9.985110000477036e-06, + "loss": 0.0046, + "step": 38080 + }, + { + "epoch": 0.24429742843124364, + "grad_norm": 0.532107949256897, + "learning_rate": 9.985066806236919e-06, + "loss": 0.0056, + "step": 38090 + }, + { + "epoch": 0.24436156532502976, + "grad_norm": 0.28466662764549255, + "learning_rate": 9.985023549530351e-06, + "loss": 0.0072, + "step": 38100 + }, + { + "epoch": 0.24442570221881585, + "grad_norm": 0.1789170801639557, + "learning_rate": 9.984980230357875e-06, + "loss": 0.0043, + "step": 38110 + }, + { + "epoch": 0.24448983911260194, + "grad_norm": 0.18516568839550018, + "learning_rate": 9.984936848720032e-06, + "loss": 0.0087, + "step": 38120 + }, + { + "epoch": 0.24455397600638804, + "grad_norm": 0.29143643379211426, + "learning_rate": 9.98489340461737e-06, + "loss": 0.0065, + "step": 38130 + }, + { + "epoch": 0.24461811290017413, + "grad_norm": 0.2915891706943512, + "learning_rate": 9.98484989805043e-06, + "loss": 0.0092, + "step": 38140 + }, + { + "epoch": 0.24468224979396022, + "grad_norm": 0.16607245802879333, + "learning_rate": 9.984806329019761e-06, + "loss": 0.006, + "step": 38150 + }, + { + "epoch": 0.24474638668774631, + "grad_norm": 0.21408230066299438, + "learning_rate": 9.984762697525903e-06, + "loss": 0.0027, + "step": 38160 + }, + { + "epoch": 0.24481052358153244, + "grad_norm": 0.28983208537101746, + "learning_rate": 9.984719003569407e-06, + "loss": 0.0064, + "step": 38170 + }, + { + "epoch": 0.24487466047531853, + "grad_norm": 0.3070462942123413, + "learning_rate": 9.984675247150818e-06, + "loss": 0.0099, + "step": 38180 + }, + { + "epoch": 0.24493879736910462, + "grad_norm": 0.1305485963821411, + "learning_rate": 9.984631428270687e-06, + "loss": 0.0051, + "step": 38190 + }, + { + "epoch": 0.24500293426289071, + "grad_norm": 0.21113575994968414, + "learning_rate": 9.984587546929562e-06, + "loss": 0.0072, + "step": 38200 + }, + { + "epoch": 0.2450670711566768, + "grad_norm": 0.19226740300655365, + "learning_rate": 9.984543603127992e-06, + "loss": 0.0042, + "step": 38210 + }, + { + "epoch": 0.2451312080504629, + "grad_norm": 0.36949214339256287, + "learning_rate": 9.984499596866528e-06, + "loss": 0.0062, + "step": 38220 + }, + { + "epoch": 0.245195344944249, + "grad_norm": 0.5497113466262817, + "learning_rate": 9.984455528145723e-06, + "loss": 0.0061, + "step": 38230 + }, + { + "epoch": 0.24525948183803511, + "grad_norm": 0.21563158929347992, + "learning_rate": 9.984411396966125e-06, + "loss": 0.0055, + "step": 38240 + }, + { + "epoch": 0.2453236187318212, + "grad_norm": 0.19525288045406342, + "learning_rate": 9.984367203328292e-06, + "loss": 0.0064, + "step": 38250 + }, + { + "epoch": 0.2453877556256073, + "grad_norm": 0.32135501503944397, + "learning_rate": 9.984322947232776e-06, + "loss": 0.0063, + "step": 38260 + }, + { + "epoch": 0.2454518925193934, + "grad_norm": 0.43099191784858704, + "learning_rate": 9.98427862868013e-06, + "loss": 0.0087, + "step": 38270 + }, + { + "epoch": 0.2455160294131795, + "grad_norm": 0.5807035565376282, + "learning_rate": 9.984234247670912e-06, + "loss": 0.0058, + "step": 38280 + }, + { + "epoch": 0.24558016630696558, + "grad_norm": 0.24664615094661713, + "learning_rate": 9.984189804205676e-06, + "loss": 0.0037, + "step": 38290 + }, + { + "epoch": 0.24564430320075167, + "grad_norm": 0.4822005331516266, + "learning_rate": 9.98414529828498e-06, + "loss": 0.0117, + "step": 38300 + }, + { + "epoch": 0.2457084400945378, + "grad_norm": 0.22811010479927063, + "learning_rate": 9.98410072990938e-06, + "loss": 0.0057, + "step": 38310 + }, + { + "epoch": 0.2457725769883239, + "grad_norm": 0.2250605970621109, + "learning_rate": 9.984056099079437e-06, + "loss": 0.011, + "step": 38320 + }, + { + "epoch": 0.24583671388210998, + "grad_norm": 0.48564302921295166, + "learning_rate": 9.984011405795709e-06, + "loss": 0.0077, + "step": 38330 + }, + { + "epoch": 0.24590085077589607, + "grad_norm": 0.5844722986221313, + "learning_rate": 9.983966650058756e-06, + "loss": 0.009, + "step": 38340 + }, + { + "epoch": 0.24596498766968217, + "grad_norm": 0.20418652892112732, + "learning_rate": 9.983921831869137e-06, + "loss": 0.0058, + "step": 38350 + }, + { + "epoch": 0.24602912456346826, + "grad_norm": 0.39494210481643677, + "learning_rate": 9.983876951227418e-06, + "loss": 0.0057, + "step": 38360 + }, + { + "epoch": 0.24609326145725435, + "grad_norm": 0.13403719663619995, + "learning_rate": 9.983832008134157e-06, + "loss": 0.0058, + "step": 38370 + }, + { + "epoch": 0.24615739835104047, + "grad_norm": 0.2099473774433136, + "learning_rate": 9.98378700258992e-06, + "loss": 0.0042, + "step": 38380 + }, + { + "epoch": 0.24622153524482657, + "grad_norm": 0.26247361302375793, + "learning_rate": 9.98374193459527e-06, + "loss": 0.0084, + "step": 38390 + }, + { + "epoch": 0.24628567213861266, + "grad_norm": 0.36011576652526855, + "learning_rate": 9.983696804150772e-06, + "loss": 0.0056, + "step": 38400 + }, + { + "epoch": 0.24634980903239875, + "grad_norm": 0.2732531726360321, + "learning_rate": 9.98365161125699e-06, + "loss": 0.0051, + "step": 38410 + }, + { + "epoch": 0.24641394592618485, + "grad_norm": 0.0891374871134758, + "learning_rate": 9.983606355914493e-06, + "loss": 0.0055, + "step": 38420 + }, + { + "epoch": 0.24647808281997094, + "grad_norm": 0.092893585562706, + "learning_rate": 9.983561038123847e-06, + "loss": 0.0033, + "step": 38430 + }, + { + "epoch": 0.24654221971375703, + "grad_norm": 0.21625855565071106, + "learning_rate": 9.98351565788562e-06, + "loss": 0.0072, + "step": 38440 + }, + { + "epoch": 0.24660635660754315, + "grad_norm": 0.12428835779428482, + "learning_rate": 9.983470215200377e-06, + "loss": 0.0063, + "step": 38450 + }, + { + "epoch": 0.24667049350132925, + "grad_norm": 0.11295032501220703, + "learning_rate": 9.983424710068693e-06, + "loss": 0.0079, + "step": 38460 + }, + { + "epoch": 0.24673463039511534, + "grad_norm": 0.20468109846115112, + "learning_rate": 9.983379142491134e-06, + "loss": 0.0051, + "step": 38470 + }, + { + "epoch": 0.24679876728890143, + "grad_norm": 0.3960379958152771, + "learning_rate": 9.983333512468274e-06, + "loss": 0.0052, + "step": 38480 + }, + { + "epoch": 0.24686290418268753, + "grad_norm": 0.20143310725688934, + "learning_rate": 9.983287820000684e-06, + "loss": 0.0062, + "step": 38490 + }, + { + "epoch": 0.24692704107647362, + "grad_norm": 0.2517850399017334, + "learning_rate": 9.983242065088937e-06, + "loss": 0.0071, + "step": 38500 + }, + { + "epoch": 0.2469911779702597, + "grad_norm": 0.710372269153595, + "learning_rate": 9.983196247733602e-06, + "loss": 0.005, + "step": 38510 + }, + { + "epoch": 0.24705531486404583, + "grad_norm": 0.2682972848415375, + "learning_rate": 9.983150367935259e-06, + "loss": 0.0038, + "step": 38520 + }, + { + "epoch": 0.24711945175783193, + "grad_norm": 0.058020081371068954, + "learning_rate": 9.98310442569448e-06, + "loss": 0.0049, + "step": 38530 + }, + { + "epoch": 0.24718358865161802, + "grad_norm": 0.47107598185539246, + "learning_rate": 9.983058421011842e-06, + "loss": 0.0052, + "step": 38540 + }, + { + "epoch": 0.2472477255454041, + "grad_norm": 0.2273666262626648, + "learning_rate": 9.98301235388792e-06, + "loss": 0.0052, + "step": 38550 + }, + { + "epoch": 0.2473118624391902, + "grad_norm": 0.16423803567886353, + "learning_rate": 9.982966224323292e-06, + "loss": 0.0068, + "step": 38560 + }, + { + "epoch": 0.2473759993329763, + "grad_norm": 0.33906319737434387, + "learning_rate": 9.982920032318536e-06, + "loss": 0.0057, + "step": 38570 + }, + { + "epoch": 0.2474401362267624, + "grad_norm": 0.3201685845851898, + "learning_rate": 9.982873777874228e-06, + "loss": 0.0056, + "step": 38580 + }, + { + "epoch": 0.2475042731205485, + "grad_norm": 0.18730387091636658, + "learning_rate": 9.982827460990955e-06, + "loss": 0.0034, + "step": 38590 + }, + { + "epoch": 0.2475684100143346, + "grad_norm": 0.4003932774066925, + "learning_rate": 9.98278108166929e-06, + "loss": 0.0086, + "step": 38600 + }, + { + "epoch": 0.2476325469081207, + "grad_norm": 0.11151555180549622, + "learning_rate": 9.982734639909819e-06, + "loss": 0.0044, + "step": 38610 + }, + { + "epoch": 0.2476966838019068, + "grad_norm": 0.438439279794693, + "learning_rate": 9.982688135713119e-06, + "loss": 0.0048, + "step": 38620 + }, + { + "epoch": 0.24776082069569288, + "grad_norm": 0.3096461594104767, + "learning_rate": 9.982641569079777e-06, + "loss": 0.0088, + "step": 38630 + }, + { + "epoch": 0.24782495758947898, + "grad_norm": 0.23310524225234985, + "learning_rate": 9.982594940010375e-06, + "loss": 0.0075, + "step": 38640 + }, + { + "epoch": 0.24788909448326507, + "grad_norm": 0.1498975306749344, + "learning_rate": 9.982548248505497e-06, + "loss": 0.0067, + "step": 38650 + }, + { + "epoch": 0.24795323137705116, + "grad_norm": 0.2816608250141144, + "learning_rate": 9.98250149456573e-06, + "loss": 0.0062, + "step": 38660 + }, + { + "epoch": 0.24801736827083729, + "grad_norm": 0.05591500177979469, + "learning_rate": 9.982454678191658e-06, + "loss": 0.0045, + "step": 38670 + }, + { + "epoch": 0.24808150516462338, + "grad_norm": 0.5578827857971191, + "learning_rate": 9.982407799383866e-06, + "loss": 0.005, + "step": 38680 + }, + { + "epoch": 0.24814564205840947, + "grad_norm": 0.3391058146953583, + "learning_rate": 9.982360858142945e-06, + "loss": 0.0055, + "step": 38690 + }, + { + "epoch": 0.24820977895219556, + "grad_norm": 0.19329701364040375, + "learning_rate": 9.982313854469481e-06, + "loss": 0.0053, + "step": 38700 + }, + { + "epoch": 0.24827391584598166, + "grad_norm": 0.10662292689085007, + "learning_rate": 9.982266788364064e-06, + "loss": 0.0054, + "step": 38710 + }, + { + "epoch": 0.24833805273976775, + "grad_norm": 0.18978509306907654, + "learning_rate": 9.982219659827283e-06, + "loss": 0.0073, + "step": 38720 + }, + { + "epoch": 0.24840218963355384, + "grad_norm": 0.35354286432266235, + "learning_rate": 9.98217246885973e-06, + "loss": 0.0067, + "step": 38730 + }, + { + "epoch": 0.24846632652733996, + "grad_norm": 0.16298361122608185, + "learning_rate": 9.982125215461992e-06, + "loss": 0.0049, + "step": 38740 + }, + { + "epoch": 0.24853046342112606, + "grad_norm": 0.15791991353034973, + "learning_rate": 9.982077899634668e-06, + "loss": 0.0078, + "step": 38750 + }, + { + "epoch": 0.24859460031491215, + "grad_norm": 0.21328802406787872, + "learning_rate": 9.982030521378346e-06, + "loss": 0.0054, + "step": 38760 + }, + { + "epoch": 0.24865873720869824, + "grad_norm": 0.12775328755378723, + "learning_rate": 9.98198308069362e-06, + "loss": 0.0084, + "step": 38770 + }, + { + "epoch": 0.24872287410248434, + "grad_norm": 0.2946627140045166, + "learning_rate": 9.981935577581088e-06, + "loss": 0.0097, + "step": 38780 + }, + { + "epoch": 0.24878701099627043, + "grad_norm": 0.20051661133766174, + "learning_rate": 9.981888012041341e-06, + "loss": 0.0041, + "step": 38790 + }, + { + "epoch": 0.24885114789005652, + "grad_norm": 0.2328706979751587, + "learning_rate": 9.981840384074977e-06, + "loss": 0.0061, + "step": 38800 + }, + { + "epoch": 0.24891528478384264, + "grad_norm": 0.18127407133579254, + "learning_rate": 9.981792693682592e-06, + "loss": 0.0116, + "step": 38810 + }, + { + "epoch": 0.24897942167762874, + "grad_norm": 0.31884151697158813, + "learning_rate": 9.981744940864786e-06, + "loss": 0.0053, + "step": 38820 + }, + { + "epoch": 0.24904355857141483, + "grad_norm": 0.31905224919319153, + "learning_rate": 9.981697125622153e-06, + "loss": 0.0084, + "step": 38830 + }, + { + "epoch": 0.24910769546520092, + "grad_norm": 0.30726176500320435, + "learning_rate": 9.981649247955296e-06, + "loss": 0.0073, + "step": 38840 + }, + { + "epoch": 0.24917183235898702, + "grad_norm": 0.3163350224494934, + "learning_rate": 9.981601307864811e-06, + "loss": 0.0054, + "step": 38850 + }, + { + "epoch": 0.2492359692527731, + "grad_norm": 0.10571905225515366, + "learning_rate": 9.981553305351306e-06, + "loss": 0.004, + "step": 38860 + }, + { + "epoch": 0.2493001061465592, + "grad_norm": 0.20794761180877686, + "learning_rate": 9.981505240415375e-06, + "loss": 0.0066, + "step": 38870 + }, + { + "epoch": 0.24936424304034532, + "grad_norm": 0.29438677430152893, + "learning_rate": 9.981457113057623e-06, + "loss": 0.0061, + "step": 38880 + }, + { + "epoch": 0.24942837993413142, + "grad_norm": 0.16410395503044128, + "learning_rate": 9.981408923278654e-06, + "loss": 0.0034, + "step": 38890 + }, + { + "epoch": 0.2494925168279175, + "grad_norm": 0.12223443388938904, + "learning_rate": 9.98136067107907e-06, + "loss": 0.0075, + "step": 38900 + }, + { + "epoch": 0.2495566537217036, + "grad_norm": 0.16954652965068817, + "learning_rate": 9.981312356459479e-06, + "loss": 0.0058, + "step": 38910 + }, + { + "epoch": 0.2496207906154897, + "grad_norm": 0.40928614139556885, + "learning_rate": 9.981263979420481e-06, + "loss": 0.0056, + "step": 38920 + }, + { + "epoch": 0.2496849275092758, + "grad_norm": 0.1617717742919922, + "learning_rate": 9.981215539962687e-06, + "loss": 0.0039, + "step": 38930 + }, + { + "epoch": 0.24974906440306188, + "grad_norm": 0.18253344297409058, + "learning_rate": 9.981167038086701e-06, + "loss": 0.0048, + "step": 38940 + }, + { + "epoch": 0.249813201296848, + "grad_norm": 0.3788161277770996, + "learning_rate": 9.981118473793135e-06, + "loss": 0.0061, + "step": 38950 + }, + { + "epoch": 0.2498773381906341, + "grad_norm": 0.5014322996139526, + "learning_rate": 9.981069847082592e-06, + "loss": 0.0074, + "step": 38960 + }, + { + "epoch": 0.2499414750844202, + "grad_norm": 0.11842537671327591, + "learning_rate": 9.981021157955684e-06, + "loss": 0.0048, + "step": 38970 + }, + { + "epoch": 0.2500056119782063, + "grad_norm": 0.3788881301879883, + "learning_rate": 9.980972406413022e-06, + "loss": 0.0069, + "step": 38980 + }, + { + "epoch": 0.2500697488719924, + "grad_norm": 0.21051029860973358, + "learning_rate": 9.980923592455214e-06, + "loss": 0.0045, + "step": 38990 + }, + { + "epoch": 0.2501338857657785, + "grad_norm": 0.1375262290239334, + "learning_rate": 9.980874716082875e-06, + "loss": 0.0039, + "step": 39000 + }, + { + "epoch": 0.2501980226595646, + "grad_norm": 0.31067126989364624, + "learning_rate": 9.980825777296617e-06, + "loss": 0.0079, + "step": 39010 + }, + { + "epoch": 0.2502621595533507, + "grad_norm": 0.5094921588897705, + "learning_rate": 9.98077677609705e-06, + "loss": 0.0071, + "step": 39020 + }, + { + "epoch": 0.2503262964471368, + "grad_norm": 0.13262316584587097, + "learning_rate": 9.980727712484792e-06, + "loss": 0.0066, + "step": 39030 + }, + { + "epoch": 0.25039043334092287, + "grad_norm": 0.32757246494293213, + "learning_rate": 9.980678586460455e-06, + "loss": 0.0082, + "step": 39040 + }, + { + "epoch": 0.25045457023470896, + "grad_norm": 0.29297035932540894, + "learning_rate": 9.980629398024658e-06, + "loss": 0.0079, + "step": 39050 + }, + { + "epoch": 0.25051870712849506, + "grad_norm": 0.18163636326789856, + "learning_rate": 9.980580147178011e-06, + "loss": 0.0047, + "step": 39060 + }, + { + "epoch": 0.25058284402228115, + "grad_norm": 0.21941807866096497, + "learning_rate": 9.980530833921139e-06, + "loss": 0.0058, + "step": 39070 + }, + { + "epoch": 0.25064698091606724, + "grad_norm": 0.37371599674224854, + "learning_rate": 9.980481458254654e-06, + "loss": 0.0085, + "step": 39080 + }, + { + "epoch": 0.25071111780985333, + "grad_norm": 0.027556492015719414, + "learning_rate": 9.980432020179179e-06, + "loss": 0.0066, + "step": 39090 + }, + { + "epoch": 0.2507752547036394, + "grad_norm": 0.20441341400146484, + "learning_rate": 9.98038251969533e-06, + "loss": 0.0079, + "step": 39100 + }, + { + "epoch": 0.2508393915974255, + "grad_norm": 0.25886884331703186, + "learning_rate": 9.980332956803727e-06, + "loss": 0.0073, + "step": 39110 + }, + { + "epoch": 0.25090352849121167, + "grad_norm": 0.3851901888847351, + "learning_rate": 9.980283331504994e-06, + "loss": 0.0043, + "step": 39120 + }, + { + "epoch": 0.25096766538499776, + "grad_norm": 0.15157650411128998, + "learning_rate": 9.980233643799751e-06, + "loss": 0.0058, + "step": 39130 + }, + { + "epoch": 0.25103180227878386, + "grad_norm": 0.3190149962902069, + "learning_rate": 9.980183893688621e-06, + "loss": 0.0069, + "step": 39140 + }, + { + "epoch": 0.25109593917256995, + "grad_norm": 0.39699846506118774, + "learning_rate": 9.98013408117223e-06, + "loss": 0.0054, + "step": 39150 + }, + { + "epoch": 0.25116007606635604, + "grad_norm": 0.09573323279619217, + "learning_rate": 9.980084206251197e-06, + "loss": 0.0044, + "step": 39160 + }, + { + "epoch": 0.25122421296014213, + "grad_norm": 0.29710081219673157, + "learning_rate": 9.980034268926151e-06, + "loss": 0.0055, + "step": 39170 + }, + { + "epoch": 0.25128834985392823, + "grad_norm": 0.07126680016517639, + "learning_rate": 9.979984269197715e-06, + "loss": 0.0032, + "step": 39180 + }, + { + "epoch": 0.2513524867477143, + "grad_norm": 0.11775899678468704, + "learning_rate": 9.97993420706652e-06, + "loss": 0.0047, + "step": 39190 + }, + { + "epoch": 0.2514166236415004, + "grad_norm": 0.4710961580276489, + "learning_rate": 9.979884082533188e-06, + "loss": 0.0034, + "step": 39200 + }, + { + "epoch": 0.2514807605352865, + "grad_norm": 0.22415786981582642, + "learning_rate": 9.97983389559835e-06, + "loss": 0.0051, + "step": 39210 + }, + { + "epoch": 0.2515448974290726, + "grad_norm": 0.30657926201820374, + "learning_rate": 9.979783646262633e-06, + "loss": 0.0069, + "step": 39220 + }, + { + "epoch": 0.2516090343228587, + "grad_norm": 0.14880214631557465, + "learning_rate": 9.97973333452667e-06, + "loss": 0.0059, + "step": 39230 + }, + { + "epoch": 0.2516731712166448, + "grad_norm": 0.194294273853302, + "learning_rate": 9.979682960391086e-06, + "loss": 0.0059, + "step": 39240 + }, + { + "epoch": 0.2517373081104309, + "grad_norm": 0.2686821520328522, + "learning_rate": 9.97963252385652e-06, + "loss": 0.0055, + "step": 39250 + }, + { + "epoch": 0.25180144500421703, + "grad_norm": 0.5156296491622925, + "learning_rate": 9.979582024923596e-06, + "loss": 0.0072, + "step": 39260 + }, + { + "epoch": 0.2518655818980031, + "grad_norm": 0.3677895963191986, + "learning_rate": 9.979531463592951e-06, + "loss": 0.0036, + "step": 39270 + }, + { + "epoch": 0.2519297187917892, + "grad_norm": 0.1801011562347412, + "learning_rate": 9.979480839865219e-06, + "loss": 0.0054, + "step": 39280 + }, + { + "epoch": 0.2519938556855753, + "grad_norm": 0.26837414503097534, + "learning_rate": 9.979430153741034e-06, + "loss": 0.0035, + "step": 39290 + }, + { + "epoch": 0.2520579925793614, + "grad_norm": 0.21830004453659058, + "learning_rate": 9.979379405221028e-06, + "loss": 0.0047, + "step": 39300 + }, + { + "epoch": 0.2521221294731475, + "grad_norm": 0.5536841750144958, + "learning_rate": 9.979328594305842e-06, + "loss": 0.004, + "step": 39310 + }, + { + "epoch": 0.2521862663669336, + "grad_norm": 0.29669874906539917, + "learning_rate": 9.979277720996108e-06, + "loss": 0.0075, + "step": 39320 + }, + { + "epoch": 0.2522504032607197, + "grad_norm": 0.163956880569458, + "learning_rate": 9.979226785292465e-06, + "loss": 0.0052, + "step": 39330 + }, + { + "epoch": 0.2523145401545058, + "grad_norm": 0.3207421898841858, + "learning_rate": 9.979175787195556e-06, + "loss": 0.0067, + "step": 39340 + }, + { + "epoch": 0.25237867704829187, + "grad_norm": 0.3795740604400635, + "learning_rate": 9.979124726706011e-06, + "loss": 0.0052, + "step": 39350 + }, + { + "epoch": 0.25244281394207796, + "grad_norm": 0.22112007439136505, + "learning_rate": 9.979073603824477e-06, + "loss": 0.0054, + "step": 39360 + }, + { + "epoch": 0.25250695083586405, + "grad_norm": 0.2207915037870407, + "learning_rate": 9.979022418551592e-06, + "loss": 0.0075, + "step": 39370 + }, + { + "epoch": 0.25257108772965015, + "grad_norm": 0.2956860363483429, + "learning_rate": 9.978971170887997e-06, + "loss": 0.0047, + "step": 39380 + }, + { + "epoch": 0.25263522462343624, + "grad_norm": 0.24269051849842072, + "learning_rate": 9.978919860834337e-06, + "loss": 0.007, + "step": 39390 + }, + { + "epoch": 0.25269936151722233, + "grad_norm": 0.2062823325395584, + "learning_rate": 9.97886848839125e-06, + "loss": 0.0044, + "step": 39400 + }, + { + "epoch": 0.2527634984110085, + "grad_norm": 0.299936443567276, + "learning_rate": 9.978817053559384e-06, + "loss": 0.0057, + "step": 39410 + }, + { + "epoch": 0.2528276353047946, + "grad_norm": 0.17424818873405457, + "learning_rate": 9.97876555633938e-06, + "loss": 0.006, + "step": 39420 + }, + { + "epoch": 0.25289177219858067, + "grad_norm": 0.526239812374115, + "learning_rate": 9.978713996731888e-06, + "loss": 0.0072, + "step": 39430 + }, + { + "epoch": 0.25295590909236676, + "grad_norm": 0.2941557466983795, + "learning_rate": 9.978662374737551e-06, + "loss": 0.006, + "step": 39440 + }, + { + "epoch": 0.25302004598615285, + "grad_norm": 0.57313472032547, + "learning_rate": 9.978610690357014e-06, + "loss": 0.0055, + "step": 39450 + }, + { + "epoch": 0.25308418287993895, + "grad_norm": 0.36666339635849, + "learning_rate": 9.978558943590929e-06, + "loss": 0.0067, + "step": 39460 + }, + { + "epoch": 0.25314831977372504, + "grad_norm": 0.24211323261260986, + "learning_rate": 9.978507134439941e-06, + "loss": 0.0037, + "step": 39470 + }, + { + "epoch": 0.25321245666751113, + "grad_norm": 0.36444950103759766, + "learning_rate": 9.978455262904703e-06, + "loss": 0.0066, + "step": 39480 + }, + { + "epoch": 0.2532765935612972, + "grad_norm": 0.293956458568573, + "learning_rate": 9.97840332898586e-06, + "loss": 0.0088, + "step": 39490 + }, + { + "epoch": 0.2533407304550833, + "grad_norm": 0.27882516384124756, + "learning_rate": 9.978351332684066e-06, + "loss": 0.0066, + "step": 39500 + }, + { + "epoch": 0.2534048673488694, + "grad_norm": 0.1138320341706276, + "learning_rate": 9.978299273999972e-06, + "loss": 0.0071, + "step": 39510 + }, + { + "epoch": 0.2534690042426555, + "grad_norm": 0.2853226065635681, + "learning_rate": 9.97824715293423e-06, + "loss": 0.0056, + "step": 39520 + }, + { + "epoch": 0.2535331411364416, + "grad_norm": 0.05086997523903847, + "learning_rate": 9.978194969487493e-06, + "loss": 0.0057, + "step": 39530 + }, + { + "epoch": 0.2535972780302277, + "grad_norm": 0.17800335586071014, + "learning_rate": 9.978142723660415e-06, + "loss": 0.0045, + "step": 39540 + }, + { + "epoch": 0.25366141492401384, + "grad_norm": 0.337404727935791, + "learning_rate": 9.978090415453651e-06, + "loss": 0.0056, + "step": 39550 + }, + { + "epoch": 0.25372555181779993, + "grad_norm": 0.2746542692184448, + "learning_rate": 9.978038044867858e-06, + "loss": 0.0059, + "step": 39560 + }, + { + "epoch": 0.253789688711586, + "grad_norm": 0.8087136745452881, + "learning_rate": 9.977985611903688e-06, + "loss": 0.0056, + "step": 39570 + }, + { + "epoch": 0.2538538256053721, + "grad_norm": 0.07329525798559189, + "learning_rate": 9.977933116561801e-06, + "loss": 0.0051, + "step": 39580 + }, + { + "epoch": 0.2539179624991582, + "grad_norm": 0.16835616528987885, + "learning_rate": 9.977880558842857e-06, + "loss": 0.005, + "step": 39590 + }, + { + "epoch": 0.2539820993929443, + "grad_norm": 0.22268901765346527, + "learning_rate": 9.977827938747508e-06, + "loss": 0.0055, + "step": 39600 + }, + { + "epoch": 0.2540462362867304, + "grad_norm": 0.18357685208320618, + "learning_rate": 9.977775256276421e-06, + "loss": 0.0067, + "step": 39610 + }, + { + "epoch": 0.2541103731805165, + "grad_norm": 0.24646100401878357, + "learning_rate": 9.97772251143025e-06, + "loss": 0.0064, + "step": 39620 + }, + { + "epoch": 0.2541745100743026, + "grad_norm": 0.249356210231781, + "learning_rate": 9.97766970420966e-06, + "loss": 0.0047, + "step": 39630 + }, + { + "epoch": 0.2542386469680887, + "grad_norm": 0.25969377160072327, + "learning_rate": 9.977616834615308e-06, + "loss": 0.0042, + "step": 39640 + }, + { + "epoch": 0.25430278386187477, + "grad_norm": 0.15693524479866028, + "learning_rate": 9.977563902647865e-06, + "loss": 0.0052, + "step": 39650 + }, + { + "epoch": 0.25436692075566086, + "grad_norm": 0.3180239200592041, + "learning_rate": 9.977510908307985e-06, + "loss": 0.0072, + "step": 39660 + }, + { + "epoch": 0.25443105764944696, + "grad_norm": 0.3399795591831207, + "learning_rate": 9.977457851596337e-06, + "loss": 0.004, + "step": 39670 + }, + { + "epoch": 0.25449519454323305, + "grad_norm": 0.07352469861507416, + "learning_rate": 9.977404732513584e-06, + "loss": 0.0061, + "step": 39680 + }, + { + "epoch": 0.2545593314370192, + "grad_norm": 0.3775775134563446, + "learning_rate": 9.977351551060392e-06, + "loss": 0.0076, + "step": 39690 + }, + { + "epoch": 0.2546234683308053, + "grad_norm": 0.15693242847919464, + "learning_rate": 9.97729830723743e-06, + "loss": 0.0086, + "step": 39700 + }, + { + "epoch": 0.2546876052245914, + "grad_norm": 0.29596182703971863, + "learning_rate": 9.977245001045362e-06, + "loss": 0.0053, + "step": 39710 + }, + { + "epoch": 0.2547517421183775, + "grad_norm": 0.12882474064826965, + "learning_rate": 9.977191632484857e-06, + "loss": 0.0067, + "step": 39720 + }, + { + "epoch": 0.25481587901216357, + "grad_norm": 0.3904571831226349, + "learning_rate": 9.977138201556583e-06, + "loss": 0.0048, + "step": 39730 + }, + { + "epoch": 0.25488001590594966, + "grad_norm": 0.2068895399570465, + "learning_rate": 9.977084708261211e-06, + "loss": 0.0072, + "step": 39740 + }, + { + "epoch": 0.25494415279973576, + "grad_norm": 0.10844743996858597, + "learning_rate": 9.97703115259941e-06, + "loss": 0.0064, + "step": 39750 + }, + { + "epoch": 0.25500828969352185, + "grad_norm": 0.19294798374176025, + "learning_rate": 9.976977534571853e-06, + "loss": 0.005, + "step": 39760 + }, + { + "epoch": 0.25507242658730794, + "grad_norm": 0.2761460840702057, + "learning_rate": 9.976923854179207e-06, + "loss": 0.0062, + "step": 39770 + }, + { + "epoch": 0.25513656348109404, + "grad_norm": 0.1745172142982483, + "learning_rate": 9.976870111422152e-06, + "loss": 0.0052, + "step": 39780 + }, + { + "epoch": 0.25520070037488013, + "grad_norm": 0.11423773318529129, + "learning_rate": 9.976816306301355e-06, + "loss": 0.0075, + "step": 39790 + }, + { + "epoch": 0.2552648372686662, + "grad_norm": 0.12186460942029953, + "learning_rate": 9.976762438817494e-06, + "loss": 0.0053, + "step": 39800 + }, + { + "epoch": 0.2553289741624523, + "grad_norm": 0.15649254620075226, + "learning_rate": 9.976708508971242e-06, + "loss": 0.0047, + "step": 39810 + }, + { + "epoch": 0.2553931110562384, + "grad_norm": 0.13888505101203918, + "learning_rate": 9.976654516763278e-06, + "loss": 0.0075, + "step": 39820 + }, + { + "epoch": 0.25545724795002456, + "grad_norm": 0.18757475912570953, + "learning_rate": 9.976600462194273e-06, + "loss": 0.0057, + "step": 39830 + }, + { + "epoch": 0.25552138484381065, + "grad_norm": 0.34580785036087036, + "learning_rate": 9.97654634526491e-06, + "loss": 0.0058, + "step": 39840 + }, + { + "epoch": 0.25558552173759674, + "grad_norm": 0.5235243439674377, + "learning_rate": 9.976492165975864e-06, + "loss": 0.007, + "step": 39850 + }, + { + "epoch": 0.25564965863138284, + "grad_norm": 0.3960067927837372, + "learning_rate": 9.976437924327813e-06, + "loss": 0.0049, + "step": 39860 + }, + { + "epoch": 0.25571379552516893, + "grad_norm": 0.33573824167251587, + "learning_rate": 9.97638362032144e-06, + "loss": 0.0059, + "step": 39870 + }, + { + "epoch": 0.255777932418955, + "grad_norm": 0.22803282737731934, + "learning_rate": 9.976329253957422e-06, + "loss": 0.0057, + "step": 39880 + }, + { + "epoch": 0.2558420693127411, + "grad_norm": 0.03270663693547249, + "learning_rate": 9.976274825236443e-06, + "loss": 0.0032, + "step": 39890 + }, + { + "epoch": 0.2559062062065272, + "grad_norm": 0.25057414174079895, + "learning_rate": 9.976220334159183e-06, + "loss": 0.0058, + "step": 39900 + }, + { + "epoch": 0.2559703431003133, + "grad_norm": 0.2685162425041199, + "learning_rate": 9.976165780726328e-06, + "loss": 0.0039, + "step": 39910 + }, + { + "epoch": 0.2560344799940994, + "grad_norm": 0.3304634988307953, + "learning_rate": 9.976111164938558e-06, + "loss": 0.0044, + "step": 39920 + }, + { + "epoch": 0.2560986168878855, + "grad_norm": 0.2324581891298294, + "learning_rate": 9.976056486796557e-06, + "loss": 0.0084, + "step": 39930 + }, + { + "epoch": 0.2561627537816716, + "grad_norm": 0.4089649021625519, + "learning_rate": 9.976001746301014e-06, + "loss": 0.0047, + "step": 39940 + }, + { + "epoch": 0.2562268906754577, + "grad_norm": 0.23112742602825165, + "learning_rate": 9.975946943452613e-06, + "loss": 0.0056, + "step": 39950 + }, + { + "epoch": 0.25629102756924377, + "grad_norm": 0.3333764374256134, + "learning_rate": 9.97589207825204e-06, + "loss": 0.0081, + "step": 39960 + }, + { + "epoch": 0.2563551644630299, + "grad_norm": 0.30956459045410156, + "learning_rate": 9.975837150699984e-06, + "loss": 0.0052, + "step": 39970 + }, + { + "epoch": 0.256419301356816, + "grad_norm": 0.41459202766418457, + "learning_rate": 9.975782160797129e-06, + "loss": 0.0056, + "step": 39980 + }, + { + "epoch": 0.2564834382506021, + "grad_norm": 0.1197529062628746, + "learning_rate": 9.975727108544171e-06, + "loss": 0.0038, + "step": 39990 + }, + { + "epoch": 0.2565475751443882, + "grad_norm": 0.09705403447151184, + "learning_rate": 9.975671993941795e-06, + "loss": 0.0044, + "step": 40000 + }, + { + "epoch": 0.2566117120381743, + "grad_norm": 0.448026180267334, + "learning_rate": 9.975616816990691e-06, + "loss": 0.004, + "step": 40010 + }, + { + "epoch": 0.2566758489319604, + "grad_norm": 0.1669142097234726, + "learning_rate": 9.975561577691555e-06, + "loss": 0.0087, + "step": 40020 + }, + { + "epoch": 0.2567399858257465, + "grad_norm": 0.33060795068740845, + "learning_rate": 9.975506276045076e-06, + "loss": 0.0078, + "step": 40030 + }, + { + "epoch": 0.25680412271953257, + "grad_norm": 0.10030943900346756, + "learning_rate": 9.975450912051946e-06, + "loss": 0.0041, + "step": 40040 + }, + { + "epoch": 0.25686825961331866, + "grad_norm": 0.35985592007637024, + "learning_rate": 9.97539548571286e-06, + "loss": 0.0069, + "step": 40050 + }, + { + "epoch": 0.25693239650710475, + "grad_norm": 0.148360937833786, + "learning_rate": 9.975339997028516e-06, + "loss": 0.0061, + "step": 40060 + }, + { + "epoch": 0.25699653340089085, + "grad_norm": 0.2986155152320862, + "learning_rate": 9.975284445999602e-06, + "loss": 0.0056, + "step": 40070 + }, + { + "epoch": 0.25706067029467694, + "grad_norm": 0.20368216931819916, + "learning_rate": 9.97522883262682e-06, + "loss": 0.0069, + "step": 40080 + }, + { + "epoch": 0.25712480718846303, + "grad_norm": 0.4418974220752716, + "learning_rate": 9.975173156910865e-06, + "loss": 0.0067, + "step": 40090 + }, + { + "epoch": 0.2571889440822491, + "grad_norm": 0.45510780811309814, + "learning_rate": 9.975117418852434e-06, + "loss": 0.0048, + "step": 40100 + }, + { + "epoch": 0.2572530809760353, + "grad_norm": 0.3036324977874756, + "learning_rate": 9.975061618452228e-06, + "loss": 0.0051, + "step": 40110 + }, + { + "epoch": 0.25731721786982137, + "grad_norm": 0.28871700167655945, + "learning_rate": 9.975005755710943e-06, + "loss": 0.004, + "step": 40120 + }, + { + "epoch": 0.25738135476360746, + "grad_norm": 0.11629597097635269, + "learning_rate": 9.974949830629279e-06, + "loss": 0.0067, + "step": 40130 + }, + { + "epoch": 0.25744549165739355, + "grad_norm": 0.19242048263549805, + "learning_rate": 9.97489384320794e-06, + "loss": 0.0068, + "step": 40140 + }, + { + "epoch": 0.25750962855117965, + "grad_norm": 0.07171380519866943, + "learning_rate": 9.974837793447625e-06, + "loss": 0.0032, + "step": 40150 + }, + { + "epoch": 0.25757376544496574, + "grad_norm": 0.3329668343067169, + "learning_rate": 9.974781681349037e-06, + "loss": 0.0065, + "step": 40160 + }, + { + "epoch": 0.25763790233875183, + "grad_norm": 0.3116569221019745, + "learning_rate": 9.974725506912879e-06, + "loss": 0.0085, + "step": 40170 + }, + { + "epoch": 0.2577020392325379, + "grad_norm": 0.21919970214366913, + "learning_rate": 9.974669270139854e-06, + "loss": 0.0084, + "step": 40180 + }, + { + "epoch": 0.257766176126324, + "grad_norm": 0.1700431853532791, + "learning_rate": 9.974612971030668e-06, + "loss": 0.0051, + "step": 40190 + }, + { + "epoch": 0.2578303130201101, + "grad_norm": 0.09541097283363342, + "learning_rate": 9.974556609586028e-06, + "loss": 0.0037, + "step": 40200 + }, + { + "epoch": 0.2578944499138962, + "grad_norm": 0.12062127888202667, + "learning_rate": 9.974500185806637e-06, + "loss": 0.0035, + "step": 40210 + }, + { + "epoch": 0.2579585868076823, + "grad_norm": 0.3263625502586365, + "learning_rate": 9.974443699693204e-06, + "loss": 0.0068, + "step": 40220 + }, + { + "epoch": 0.2580227237014684, + "grad_norm": 0.3020590841770172, + "learning_rate": 9.974387151246437e-06, + "loss": 0.0109, + "step": 40230 + }, + { + "epoch": 0.2580868605952545, + "grad_norm": 0.46705231070518494, + "learning_rate": 9.97433054046704e-06, + "loss": 0.0103, + "step": 40240 + }, + { + "epoch": 0.25815099748904063, + "grad_norm": 0.1899949014186859, + "learning_rate": 9.97427386735573e-06, + "loss": 0.0058, + "step": 40250 + }, + { + "epoch": 0.2582151343828267, + "grad_norm": 0.18206621706485748, + "learning_rate": 9.974217131913213e-06, + "loss": 0.0028, + "step": 40260 + }, + { + "epoch": 0.2582792712766128, + "grad_norm": 0.3200024664402008, + "learning_rate": 9.974160334140199e-06, + "loss": 0.0083, + "step": 40270 + }, + { + "epoch": 0.2583434081703989, + "grad_norm": 0.4450424611568451, + "learning_rate": 9.974103474037403e-06, + "loss": 0.0075, + "step": 40280 + }, + { + "epoch": 0.258407545064185, + "grad_norm": 0.13085024058818817, + "learning_rate": 9.974046551605533e-06, + "loss": 0.0051, + "step": 40290 + }, + { + "epoch": 0.2584716819579711, + "grad_norm": 0.27274298667907715, + "learning_rate": 9.973989566845307e-06, + "loss": 0.0052, + "step": 40300 + }, + { + "epoch": 0.2585358188517572, + "grad_norm": 0.36792632937431335, + "learning_rate": 9.973932519757437e-06, + "loss": 0.0064, + "step": 40310 + }, + { + "epoch": 0.2585999557455433, + "grad_norm": 0.20151731371879578, + "learning_rate": 9.973875410342636e-06, + "loss": 0.0071, + "step": 40320 + }, + { + "epoch": 0.2586640926393294, + "grad_norm": 0.2218293994665146, + "learning_rate": 9.973818238601623e-06, + "loss": 0.0041, + "step": 40330 + }, + { + "epoch": 0.2587282295331155, + "grad_norm": 0.19607120752334595, + "learning_rate": 9.97376100453511e-06, + "loss": 0.0055, + "step": 40340 + }, + { + "epoch": 0.25879236642690157, + "grad_norm": 0.44621092081069946, + "learning_rate": 9.973703708143819e-06, + "loss": 0.0038, + "step": 40350 + }, + { + "epoch": 0.25885650332068766, + "grad_norm": 0.7802656292915344, + "learning_rate": 9.973646349428465e-06, + "loss": 0.0064, + "step": 40360 + }, + { + "epoch": 0.25892064021447375, + "grad_norm": 0.13441035151481628, + "learning_rate": 9.973588928389768e-06, + "loss": 0.005, + "step": 40370 + }, + { + "epoch": 0.25898477710825984, + "grad_norm": 0.19954079389572144, + "learning_rate": 9.973531445028448e-06, + "loss": 0.0034, + "step": 40380 + }, + { + "epoch": 0.259048914002046, + "grad_norm": 0.17247618734836578, + "learning_rate": 9.973473899345226e-06, + "loss": 0.0067, + "step": 40390 + }, + { + "epoch": 0.2591130508958321, + "grad_norm": 0.3464730381965637, + "learning_rate": 9.973416291340818e-06, + "loss": 0.0103, + "step": 40400 + }, + { + "epoch": 0.2591771877896182, + "grad_norm": 0.14043326675891876, + "learning_rate": 9.973358621015951e-06, + "loss": 0.006, + "step": 40410 + }, + { + "epoch": 0.2592413246834043, + "grad_norm": 0.20075811445713043, + "learning_rate": 9.973300888371345e-06, + "loss": 0.0069, + "step": 40420 + }, + { + "epoch": 0.25930546157719037, + "grad_norm": 0.4112997055053711, + "learning_rate": 9.973243093407726e-06, + "loss": 0.0074, + "step": 40430 + }, + { + "epoch": 0.25936959847097646, + "grad_norm": 0.5253247618675232, + "learning_rate": 9.973185236125815e-06, + "loss": 0.0074, + "step": 40440 + }, + { + "epoch": 0.25943373536476255, + "grad_norm": 0.11537769436836243, + "learning_rate": 9.973127316526341e-06, + "loss": 0.0079, + "step": 40450 + }, + { + "epoch": 0.25949787225854865, + "grad_norm": 0.3097275495529175, + "learning_rate": 9.973069334610027e-06, + "loss": 0.0094, + "step": 40460 + }, + { + "epoch": 0.25956200915233474, + "grad_norm": 0.26016682386398315, + "learning_rate": 9.973011290377598e-06, + "loss": 0.0083, + "step": 40470 + }, + { + "epoch": 0.25962614604612083, + "grad_norm": 0.34955739974975586, + "learning_rate": 9.972953183829786e-06, + "loss": 0.0053, + "step": 40480 + }, + { + "epoch": 0.2596902829399069, + "grad_norm": 0.1267748326063156, + "learning_rate": 9.972895014967315e-06, + "loss": 0.0054, + "step": 40490 + }, + { + "epoch": 0.259754419833693, + "grad_norm": 0.21058782935142517, + "learning_rate": 9.972836783790915e-06, + "loss": 0.0046, + "step": 40500 + }, + { + "epoch": 0.2598185567274791, + "grad_norm": 0.17960603535175323, + "learning_rate": 9.972778490301317e-06, + "loss": 0.0065, + "step": 40510 + }, + { + "epoch": 0.2598826936212652, + "grad_norm": 0.14524775743484497, + "learning_rate": 9.97272013449925e-06, + "loss": 0.0067, + "step": 40520 + }, + { + "epoch": 0.25994683051505135, + "grad_norm": 0.3339650332927704, + "learning_rate": 9.972661716385446e-06, + "loss": 0.0033, + "step": 40530 + }, + { + "epoch": 0.26001096740883745, + "grad_norm": 0.2901301383972168, + "learning_rate": 9.972603235960636e-06, + "loss": 0.0066, + "step": 40540 + }, + { + "epoch": 0.26007510430262354, + "grad_norm": 0.25822460651397705, + "learning_rate": 9.972544693225554e-06, + "loss": 0.0052, + "step": 40550 + }, + { + "epoch": 0.26013924119640963, + "grad_norm": 0.47759416699409485, + "learning_rate": 9.972486088180936e-06, + "loss": 0.0054, + "step": 40560 + }, + { + "epoch": 0.2602033780901957, + "grad_norm": 0.05683809518814087, + "learning_rate": 9.97242742082751e-06, + "loss": 0.0047, + "step": 40570 + }, + { + "epoch": 0.2602675149839818, + "grad_norm": 0.597055971622467, + "learning_rate": 9.972368691166014e-06, + "loss": 0.0157, + "step": 40580 + }, + { + "epoch": 0.2603316518777679, + "grad_norm": 0.2670753002166748, + "learning_rate": 9.972309899197187e-06, + "loss": 0.0055, + "step": 40590 + }, + { + "epoch": 0.260395788771554, + "grad_norm": 0.0818215012550354, + "learning_rate": 9.972251044921764e-06, + "loss": 0.0065, + "step": 40600 + }, + { + "epoch": 0.2604599256653401, + "grad_norm": 0.229543998837471, + "learning_rate": 9.97219212834048e-06, + "loss": 0.0044, + "step": 40610 + }, + { + "epoch": 0.2605240625591262, + "grad_norm": 0.21901388466358185, + "learning_rate": 9.972133149454075e-06, + "loss": 0.0068, + "step": 40620 + }, + { + "epoch": 0.2605881994529123, + "grad_norm": 0.3773210048675537, + "learning_rate": 9.972074108263289e-06, + "loss": 0.0072, + "step": 40630 + }, + { + "epoch": 0.2606523363466984, + "grad_norm": 0.2899719774723053, + "learning_rate": 9.97201500476886e-06, + "loss": 0.0058, + "step": 40640 + }, + { + "epoch": 0.26071647324048447, + "grad_norm": 0.14465318620204926, + "learning_rate": 9.97195583897153e-06, + "loss": 0.0043, + "step": 40650 + }, + { + "epoch": 0.26078061013427056, + "grad_norm": 0.2485213279724121, + "learning_rate": 9.971896610872041e-06, + "loss": 0.0047, + "step": 40660 + }, + { + "epoch": 0.2608447470280567, + "grad_norm": 0.27826058864593506, + "learning_rate": 9.971837320471132e-06, + "loss": 0.005, + "step": 40670 + }, + { + "epoch": 0.2609088839218428, + "grad_norm": 0.21804702281951904, + "learning_rate": 9.971777967769549e-06, + "loss": 0.0047, + "step": 40680 + }, + { + "epoch": 0.2609730208156289, + "grad_norm": 0.22855201363563538, + "learning_rate": 9.971718552768035e-06, + "loss": 0.0057, + "step": 40690 + }, + { + "epoch": 0.261037157709415, + "grad_norm": 0.298373818397522, + "learning_rate": 9.971659075467335e-06, + "loss": 0.0039, + "step": 40700 + }, + { + "epoch": 0.2611012946032011, + "grad_norm": 0.15485908091068268, + "learning_rate": 9.971599535868193e-06, + "loss": 0.0059, + "step": 40710 + }, + { + "epoch": 0.2611654314969872, + "grad_norm": 0.24103794991970062, + "learning_rate": 9.971539933971355e-06, + "loss": 0.005, + "step": 40720 + }, + { + "epoch": 0.26122956839077327, + "grad_norm": 0.199654221534729, + "learning_rate": 9.971480269777569e-06, + "loss": 0.0051, + "step": 40730 + }, + { + "epoch": 0.26129370528455936, + "grad_norm": 0.11665575951337814, + "learning_rate": 9.971420543287582e-06, + "loss": 0.0071, + "step": 40740 + }, + { + "epoch": 0.26135784217834546, + "grad_norm": 0.18692216277122498, + "learning_rate": 9.971360754502143e-06, + "loss": 0.0045, + "step": 40750 + }, + { + "epoch": 0.26142197907213155, + "grad_norm": 0.27884986996650696, + "learning_rate": 9.971300903422001e-06, + "loss": 0.0036, + "step": 40760 + }, + { + "epoch": 0.26148611596591764, + "grad_norm": 0.19081412255764008, + "learning_rate": 9.971240990047904e-06, + "loss": 0.0075, + "step": 40770 + }, + { + "epoch": 0.26155025285970374, + "grad_norm": 0.24674800038337708, + "learning_rate": 9.971181014380607e-06, + "loss": 0.006, + "step": 40780 + }, + { + "epoch": 0.26161438975348983, + "grad_norm": 0.10528982430696487, + "learning_rate": 9.971120976420857e-06, + "loss": 0.0049, + "step": 40790 + }, + { + "epoch": 0.2616785266472759, + "grad_norm": 0.2989997863769531, + "learning_rate": 9.97106087616941e-06, + "loss": 0.0077, + "step": 40800 + }, + { + "epoch": 0.26174266354106207, + "grad_norm": 0.10974735766649246, + "learning_rate": 9.971000713627016e-06, + "loss": 0.0102, + "step": 40810 + }, + { + "epoch": 0.26180680043484816, + "grad_norm": 0.2013663798570633, + "learning_rate": 9.970940488794432e-06, + "loss": 0.0062, + "step": 40820 + }, + { + "epoch": 0.26187093732863426, + "grad_norm": 0.04378150776028633, + "learning_rate": 9.970880201672409e-06, + "loss": 0.0044, + "step": 40830 + }, + { + "epoch": 0.26193507422242035, + "grad_norm": 0.2523619532585144, + "learning_rate": 9.970819852261707e-06, + "loss": 0.0055, + "step": 40840 + }, + { + "epoch": 0.26199921111620644, + "grad_norm": 0.36605334281921387, + "learning_rate": 9.970759440563075e-06, + "loss": 0.0042, + "step": 40850 + }, + { + "epoch": 0.26206334800999254, + "grad_norm": 0.3867529034614563, + "learning_rate": 9.97069896657728e-06, + "loss": 0.0064, + "step": 40860 + }, + { + "epoch": 0.26212748490377863, + "grad_norm": 0.3114843964576721, + "learning_rate": 9.970638430305071e-06, + "loss": 0.0054, + "step": 40870 + }, + { + "epoch": 0.2621916217975647, + "grad_norm": 0.12710262835025787, + "learning_rate": 9.97057783174721e-06, + "loss": 0.0045, + "step": 40880 + }, + { + "epoch": 0.2622557586913508, + "grad_norm": 0.21920526027679443, + "learning_rate": 9.970517170904459e-06, + "loss": 0.0049, + "step": 40890 + }, + { + "epoch": 0.2623198955851369, + "grad_norm": 0.08163820952177048, + "learning_rate": 9.970456447777574e-06, + "loss": 0.0074, + "step": 40900 + }, + { + "epoch": 0.262384032478923, + "grad_norm": 0.31037309765815735, + "learning_rate": 9.970395662367318e-06, + "loss": 0.0053, + "step": 40910 + }, + { + "epoch": 0.2624481693727091, + "grad_norm": 0.24946099519729614, + "learning_rate": 9.970334814674452e-06, + "loss": 0.0053, + "step": 40920 + }, + { + "epoch": 0.2625123062664952, + "grad_norm": 0.12234389036893845, + "learning_rate": 9.970273904699736e-06, + "loss": 0.005, + "step": 40930 + }, + { + "epoch": 0.2625764431602813, + "grad_norm": 0.7745259404182434, + "learning_rate": 9.970212932443939e-06, + "loss": 0.0053, + "step": 40940 + }, + { + "epoch": 0.2626405800540674, + "grad_norm": 0.1711163967847824, + "learning_rate": 9.97015189790782e-06, + "loss": 0.0053, + "step": 40950 + }, + { + "epoch": 0.2627047169478535, + "grad_norm": 0.10916420817375183, + "learning_rate": 9.970090801092146e-06, + "loss": 0.0039, + "step": 40960 + }, + { + "epoch": 0.2627688538416396, + "grad_norm": 0.19058328866958618, + "learning_rate": 9.970029641997683e-06, + "loss": 0.0065, + "step": 40970 + }, + { + "epoch": 0.2628329907354257, + "grad_norm": 0.19633597135543823, + "learning_rate": 9.969968420625196e-06, + "loss": 0.0048, + "step": 40980 + }, + { + "epoch": 0.2628971276292118, + "grad_norm": 0.056958526372909546, + "learning_rate": 9.969907136975451e-06, + "loss": 0.0066, + "step": 40990 + }, + { + "epoch": 0.2629612645229979, + "grad_norm": 0.13696123659610748, + "learning_rate": 9.96984579104922e-06, + "loss": 0.0044, + "step": 41000 + }, + { + "epoch": 0.263025401416784, + "grad_norm": 0.47774285078048706, + "learning_rate": 9.96978438284727e-06, + "loss": 0.0073, + "step": 41010 + }, + { + "epoch": 0.2630895383105701, + "grad_norm": 0.30265384912490845, + "learning_rate": 9.969722912370367e-06, + "loss": 0.0079, + "step": 41020 + }, + { + "epoch": 0.2631536752043562, + "grad_norm": 0.3104373514652252, + "learning_rate": 9.969661379619286e-06, + "loss": 0.006, + "step": 41030 + }, + { + "epoch": 0.26321781209814227, + "grad_norm": 0.5163923501968384, + "learning_rate": 9.969599784594795e-06, + "loss": 0.0064, + "step": 41040 + }, + { + "epoch": 0.26328194899192836, + "grad_norm": 0.21843431890010834, + "learning_rate": 9.969538127297668e-06, + "loss": 0.0105, + "step": 41050 + }, + { + "epoch": 0.26334608588571445, + "grad_norm": 0.26374852657318115, + "learning_rate": 9.969476407728675e-06, + "loss": 0.0072, + "step": 41060 + }, + { + "epoch": 0.26341022277950055, + "grad_norm": 0.1182999536395073, + "learning_rate": 9.969414625888594e-06, + "loss": 0.006, + "step": 41070 + }, + { + "epoch": 0.26347435967328664, + "grad_norm": 0.2888597846031189, + "learning_rate": 9.969352781778195e-06, + "loss": 0.0049, + "step": 41080 + }, + { + "epoch": 0.26353849656707273, + "grad_norm": 0.09545408189296722, + "learning_rate": 9.969290875398252e-06, + "loss": 0.0059, + "step": 41090 + }, + { + "epoch": 0.2636026334608589, + "grad_norm": 0.17045533657073975, + "learning_rate": 9.969228906749544e-06, + "loss": 0.0056, + "step": 41100 + }, + { + "epoch": 0.263666770354645, + "grad_norm": 0.24985957145690918, + "learning_rate": 9.969166875832848e-06, + "loss": 0.004, + "step": 41110 + }, + { + "epoch": 0.26373090724843107, + "grad_norm": 0.4133603870868683, + "learning_rate": 9.969104782648938e-06, + "loss": 0.0053, + "step": 41120 + }, + { + "epoch": 0.26379504414221716, + "grad_norm": 0.30793261528015137, + "learning_rate": 9.969042627198592e-06, + "loss": 0.0061, + "step": 41130 + }, + { + "epoch": 0.26385918103600325, + "grad_norm": 0.3550465703010559, + "learning_rate": 9.968980409482594e-06, + "loss": 0.0094, + "step": 41140 + }, + { + "epoch": 0.26392331792978935, + "grad_norm": 0.13868685066699982, + "learning_rate": 9.968918129501718e-06, + "loss": 0.0047, + "step": 41150 + }, + { + "epoch": 0.26398745482357544, + "grad_norm": 0.10272249579429626, + "learning_rate": 9.968855787256749e-06, + "loss": 0.0052, + "step": 41160 + }, + { + "epoch": 0.26405159171736153, + "grad_norm": 0.0827287957072258, + "learning_rate": 9.968793382748462e-06, + "loss": 0.0046, + "step": 41170 + }, + { + "epoch": 0.2641157286111476, + "grad_norm": 0.2657158374786377, + "learning_rate": 9.968730915977647e-06, + "loss": 0.0042, + "step": 41180 + }, + { + "epoch": 0.2641798655049337, + "grad_norm": 0.12274724245071411, + "learning_rate": 9.96866838694508e-06, + "loss": 0.0058, + "step": 41190 + }, + { + "epoch": 0.2642440023987198, + "grad_norm": 0.09837764501571655, + "learning_rate": 9.968605795651548e-06, + "loss": 0.0056, + "step": 41200 + }, + { + "epoch": 0.2643081392925059, + "grad_norm": 0.3094668686389923, + "learning_rate": 9.968543142097833e-06, + "loss": 0.0089, + "step": 41210 + }, + { + "epoch": 0.264372276186292, + "grad_norm": 0.30115365982055664, + "learning_rate": 9.968480426284723e-06, + "loss": 0.0054, + "step": 41220 + }, + { + "epoch": 0.2644364130800781, + "grad_norm": 0.6357099413871765, + "learning_rate": 9.968417648213e-06, + "loss": 0.005, + "step": 41230 + }, + { + "epoch": 0.26450054997386424, + "grad_norm": 0.40032604336738586, + "learning_rate": 9.968354807883456e-06, + "loss": 0.0051, + "step": 41240 + }, + { + "epoch": 0.26456468686765033, + "grad_norm": 0.4655933678150177, + "learning_rate": 9.968291905296874e-06, + "loss": 0.0031, + "step": 41250 + }, + { + "epoch": 0.2646288237614364, + "grad_norm": 0.3356774151325226, + "learning_rate": 9.968228940454043e-06, + "loss": 0.0057, + "step": 41260 + }, + { + "epoch": 0.2646929606552225, + "grad_norm": 0.04913124814629555, + "learning_rate": 9.968165913355754e-06, + "loss": 0.0044, + "step": 41270 + }, + { + "epoch": 0.2647570975490086, + "grad_norm": 0.1629490703344345, + "learning_rate": 9.968102824002795e-06, + "loss": 0.0044, + "step": 41280 + }, + { + "epoch": 0.2648212344427947, + "grad_norm": 0.17841202020645142, + "learning_rate": 9.968039672395957e-06, + "loss": 0.0057, + "step": 41290 + }, + { + "epoch": 0.2648853713365808, + "grad_norm": 0.11928600072860718, + "learning_rate": 9.96797645853603e-06, + "loss": 0.0074, + "step": 41300 + }, + { + "epoch": 0.2649495082303669, + "grad_norm": 0.39552077651023865, + "learning_rate": 9.96791318242381e-06, + "loss": 0.007, + "step": 41310 + }, + { + "epoch": 0.265013645124153, + "grad_norm": 0.14500652253627777, + "learning_rate": 9.967849844060084e-06, + "loss": 0.004, + "step": 41320 + }, + { + "epoch": 0.2650777820179391, + "grad_norm": 0.24915923178195953, + "learning_rate": 9.967786443445651e-06, + "loss": 0.0056, + "step": 41330 + }, + { + "epoch": 0.26514191891172517, + "grad_norm": 0.19904428720474243, + "learning_rate": 9.967722980581303e-06, + "loss": 0.0066, + "step": 41340 + }, + { + "epoch": 0.26520605580551126, + "grad_norm": 0.22887201607227325, + "learning_rate": 9.967659455467836e-06, + "loss": 0.0035, + "step": 41350 + }, + { + "epoch": 0.26527019269929736, + "grad_norm": 0.2342652678489685, + "learning_rate": 9.967595868106045e-06, + "loss": 0.0064, + "step": 41360 + }, + { + "epoch": 0.26533432959308345, + "grad_norm": 0.17694617807865143, + "learning_rate": 9.96753221849673e-06, + "loss": 0.0061, + "step": 41370 + }, + { + "epoch": 0.2653984664868696, + "grad_norm": 0.25880196690559387, + "learning_rate": 9.967468506640684e-06, + "loss": 0.0051, + "step": 41380 + }, + { + "epoch": 0.2654626033806557, + "grad_norm": 0.10445227473974228, + "learning_rate": 9.967404732538706e-06, + "loss": 0.0033, + "step": 41390 + }, + { + "epoch": 0.2655267402744418, + "grad_norm": 0.26488828659057617, + "learning_rate": 9.967340896191598e-06, + "loss": 0.0068, + "step": 41400 + }, + { + "epoch": 0.2655908771682279, + "grad_norm": 0.13136650621891022, + "learning_rate": 9.96727699760016e-06, + "loss": 0.0057, + "step": 41410 + }, + { + "epoch": 0.26565501406201397, + "grad_norm": 0.4326325058937073, + "learning_rate": 9.967213036765188e-06, + "loss": 0.0042, + "step": 41420 + }, + { + "epoch": 0.26571915095580007, + "grad_norm": 0.3093279302120209, + "learning_rate": 9.967149013687489e-06, + "loss": 0.0033, + "step": 41430 + }, + { + "epoch": 0.26578328784958616, + "grad_norm": 0.2260848581790924, + "learning_rate": 9.967084928367862e-06, + "loss": 0.0051, + "step": 41440 + }, + { + "epoch": 0.26584742474337225, + "grad_norm": 0.23080188035964966, + "learning_rate": 9.967020780807111e-06, + "loss": 0.0069, + "step": 41450 + }, + { + "epoch": 0.26591156163715834, + "grad_norm": 0.4775310754776001, + "learning_rate": 9.96695657100604e-06, + "loss": 0.0084, + "step": 41460 + }, + { + "epoch": 0.26597569853094444, + "grad_norm": 0.1611340492963791, + "learning_rate": 9.966892298965453e-06, + "loss": 0.0074, + "step": 41470 + }, + { + "epoch": 0.26603983542473053, + "grad_norm": 0.3432773947715759, + "learning_rate": 9.966827964686157e-06, + "loss": 0.0033, + "step": 41480 + }, + { + "epoch": 0.2661039723185166, + "grad_norm": 0.09466341137886047, + "learning_rate": 9.966763568168955e-06, + "loss": 0.0077, + "step": 41490 + }, + { + "epoch": 0.2661681092123027, + "grad_norm": 0.22389864921569824, + "learning_rate": 9.966699109414657e-06, + "loss": 0.0044, + "step": 41500 + }, + { + "epoch": 0.2662322461060888, + "grad_norm": 0.1356392204761505, + "learning_rate": 9.96663458842407e-06, + "loss": 0.0055, + "step": 41510 + }, + { + "epoch": 0.26629638299987496, + "grad_norm": 0.11065568774938583, + "learning_rate": 9.966570005197999e-06, + "loss": 0.0028, + "step": 41520 + }, + { + "epoch": 0.26636051989366105, + "grad_norm": 0.5627606511116028, + "learning_rate": 9.96650535973726e-06, + "loss": 0.0069, + "step": 41530 + }, + { + "epoch": 0.26642465678744714, + "grad_norm": 0.2600993514060974, + "learning_rate": 9.966440652042657e-06, + "loss": 0.0043, + "step": 41540 + }, + { + "epoch": 0.26648879368123324, + "grad_norm": 0.14146417379379272, + "learning_rate": 9.966375882115005e-06, + "loss": 0.0113, + "step": 41550 + }, + { + "epoch": 0.26655293057501933, + "grad_norm": 0.3202704191207886, + "learning_rate": 9.966311049955111e-06, + "loss": 0.0051, + "step": 41560 + }, + { + "epoch": 0.2666170674688054, + "grad_norm": 0.18367043137550354, + "learning_rate": 9.966246155563793e-06, + "loss": 0.0066, + "step": 41570 + }, + { + "epoch": 0.2666812043625915, + "grad_norm": 0.07598559558391571, + "learning_rate": 9.96618119894186e-06, + "loss": 0.0055, + "step": 41580 + }, + { + "epoch": 0.2667453412563776, + "grad_norm": 0.3397144675254822, + "learning_rate": 9.966116180090128e-06, + "loss": 0.0072, + "step": 41590 + }, + { + "epoch": 0.2668094781501637, + "grad_norm": 0.32975801825523376, + "learning_rate": 9.96605109900941e-06, + "loss": 0.0052, + "step": 41600 + }, + { + "epoch": 0.2668736150439498, + "grad_norm": 0.26922792196273804, + "learning_rate": 9.965985955700523e-06, + "loss": 0.0068, + "step": 41610 + }, + { + "epoch": 0.2669377519377359, + "grad_norm": 0.15959490835666656, + "learning_rate": 9.965920750164283e-06, + "loss": 0.0091, + "step": 41620 + }, + { + "epoch": 0.267001888831522, + "grad_norm": 0.3165683150291443, + "learning_rate": 9.965855482401507e-06, + "loss": 0.0059, + "step": 41630 + }, + { + "epoch": 0.2670660257253081, + "grad_norm": 0.1645023375749588, + "learning_rate": 9.965790152413013e-06, + "loss": 0.0039, + "step": 41640 + }, + { + "epoch": 0.26713016261909417, + "grad_norm": 0.3173259198665619, + "learning_rate": 9.965724760199618e-06, + "loss": 0.0053, + "step": 41650 + }, + { + "epoch": 0.2671942995128803, + "grad_norm": 0.2163485437631607, + "learning_rate": 9.965659305762144e-06, + "loss": 0.0071, + "step": 41660 + }, + { + "epoch": 0.2672584364066664, + "grad_norm": 0.09772640466690063, + "learning_rate": 9.96559378910141e-06, + "loss": 0.0067, + "step": 41670 + }, + { + "epoch": 0.2673225733004525, + "grad_norm": 0.16423320770263672, + "learning_rate": 9.965528210218236e-06, + "loss": 0.0082, + "step": 41680 + }, + { + "epoch": 0.2673867101942386, + "grad_norm": 0.23379464447498322, + "learning_rate": 9.965462569113447e-06, + "loss": 0.0058, + "step": 41690 + }, + { + "epoch": 0.2674508470880247, + "grad_norm": 0.619549036026001, + "learning_rate": 9.965396865787861e-06, + "loss": 0.0035, + "step": 41700 + }, + { + "epoch": 0.2675149839818108, + "grad_norm": 0.21900302171707153, + "learning_rate": 9.965331100242303e-06, + "loss": 0.0063, + "step": 41710 + }, + { + "epoch": 0.2675791208755969, + "grad_norm": 0.30182841420173645, + "learning_rate": 9.9652652724776e-06, + "loss": 0.004, + "step": 41720 + }, + { + "epoch": 0.26764325776938297, + "grad_norm": 0.334244966506958, + "learning_rate": 9.965199382494574e-06, + "loss": 0.0055, + "step": 41730 + }, + { + "epoch": 0.26770739466316906, + "grad_norm": 0.787428081035614, + "learning_rate": 9.96513343029405e-06, + "loss": 0.0069, + "step": 41740 + }, + { + "epoch": 0.26777153155695516, + "grad_norm": 0.2864348590373993, + "learning_rate": 9.965067415876857e-06, + "loss": 0.006, + "step": 41750 + }, + { + "epoch": 0.26783566845074125, + "grad_norm": 0.40290093421936035, + "learning_rate": 9.965001339243819e-06, + "loss": 0.0069, + "step": 41760 + }, + { + "epoch": 0.26789980534452734, + "grad_norm": 0.2561686336994171, + "learning_rate": 9.964935200395767e-06, + "loss": 0.006, + "step": 41770 + }, + { + "epoch": 0.26796394223831344, + "grad_norm": 0.19381536543369293, + "learning_rate": 9.964868999333528e-06, + "loss": 0.0055, + "step": 41780 + }, + { + "epoch": 0.26802807913209953, + "grad_norm": 0.19894808530807495, + "learning_rate": 9.964802736057933e-06, + "loss": 0.0059, + "step": 41790 + }, + { + "epoch": 0.2680922160258857, + "grad_norm": 0.16865375638008118, + "learning_rate": 9.96473641056981e-06, + "loss": 0.0023, + "step": 41800 + }, + { + "epoch": 0.26815635291967177, + "grad_norm": 0.337950199842453, + "learning_rate": 9.964670022869994e-06, + "loss": 0.0083, + "step": 41810 + }, + { + "epoch": 0.26822048981345786, + "grad_norm": 0.08005267381668091, + "learning_rate": 9.964603572959312e-06, + "loss": 0.0041, + "step": 41820 + }, + { + "epoch": 0.26828462670724396, + "grad_norm": 0.25674504041671753, + "learning_rate": 9.9645370608386e-06, + "loss": 0.0079, + "step": 41830 + }, + { + "epoch": 0.26834876360103005, + "grad_norm": 0.2807120680809021, + "learning_rate": 9.96447048650869e-06, + "loss": 0.0046, + "step": 41840 + }, + { + "epoch": 0.26841290049481614, + "grad_norm": 0.5037015080451965, + "learning_rate": 9.964403849970416e-06, + "loss": 0.0056, + "step": 41850 + }, + { + "epoch": 0.26847703738860224, + "grad_norm": 0.2273329496383667, + "learning_rate": 9.964337151224617e-06, + "loss": 0.0039, + "step": 41860 + }, + { + "epoch": 0.26854117428238833, + "grad_norm": 0.15029305219650269, + "learning_rate": 9.964270390272123e-06, + "loss": 0.0056, + "step": 41870 + }, + { + "epoch": 0.2686053111761744, + "grad_norm": 0.6880108714103699, + "learning_rate": 9.964203567113773e-06, + "loss": 0.0084, + "step": 41880 + }, + { + "epoch": 0.2686694480699605, + "grad_norm": 0.1664203554391861, + "learning_rate": 9.964136681750406e-06, + "loss": 0.005, + "step": 41890 + }, + { + "epoch": 0.2687335849637466, + "grad_norm": 0.445311963558197, + "learning_rate": 9.964069734182858e-06, + "loss": 0.007, + "step": 41900 + }, + { + "epoch": 0.2687977218575327, + "grad_norm": 0.14195266366004944, + "learning_rate": 9.964002724411967e-06, + "loss": 0.0031, + "step": 41910 + }, + { + "epoch": 0.2688618587513188, + "grad_norm": 0.0973130464553833, + "learning_rate": 9.963935652438575e-06, + "loss": 0.0052, + "step": 41920 + }, + { + "epoch": 0.2689259956451049, + "grad_norm": 0.21904417872428894, + "learning_rate": 9.963868518263521e-06, + "loss": 0.0048, + "step": 41930 + }, + { + "epoch": 0.26899013253889104, + "grad_norm": 0.39342308044433594, + "learning_rate": 9.963801321887648e-06, + "loss": 0.0055, + "step": 41940 + }, + { + "epoch": 0.26905426943267713, + "grad_norm": 0.236109659075737, + "learning_rate": 9.963734063311797e-06, + "loss": 0.0048, + "step": 41950 + }, + { + "epoch": 0.2691184063264632, + "grad_norm": 0.40211576223373413, + "learning_rate": 9.963666742536811e-06, + "loss": 0.0111, + "step": 41960 + }, + { + "epoch": 0.2691825432202493, + "grad_norm": 0.07679940015077591, + "learning_rate": 9.963599359563532e-06, + "loss": 0.0052, + "step": 41970 + }, + { + "epoch": 0.2692466801140354, + "grad_norm": 0.3705988824367523, + "learning_rate": 9.963531914392806e-06, + "loss": 0.005, + "step": 41980 + }, + { + "epoch": 0.2693108170078215, + "grad_norm": 0.36707741022109985, + "learning_rate": 9.963464407025478e-06, + "loss": 0.0047, + "step": 41990 + }, + { + "epoch": 0.2693749539016076, + "grad_norm": 0.31048306822776794, + "learning_rate": 9.963396837462392e-06, + "loss": 0.0063, + "step": 42000 + }, + { + "epoch": 0.2694390907953937, + "grad_norm": 0.24470870196819305, + "learning_rate": 9.963329205704397e-06, + "loss": 0.006, + "step": 42010 + }, + { + "epoch": 0.2695032276891798, + "grad_norm": 0.37310677766799927, + "learning_rate": 9.963261511752341e-06, + "loss": 0.0077, + "step": 42020 + }, + { + "epoch": 0.2695673645829659, + "grad_norm": 0.23146171867847443, + "learning_rate": 9.96319375560707e-06, + "loss": 0.0063, + "step": 42030 + }, + { + "epoch": 0.26963150147675197, + "grad_norm": 0.4126221835613251, + "learning_rate": 9.963125937269435e-06, + "loss": 0.0085, + "step": 42040 + }, + { + "epoch": 0.26969563837053806, + "grad_norm": 0.37276023626327515, + "learning_rate": 9.963058056740284e-06, + "loss": 0.0113, + "step": 42050 + }, + { + "epoch": 0.26975977526432415, + "grad_norm": 0.16797538101673126, + "learning_rate": 9.962990114020469e-06, + "loss": 0.0044, + "step": 42060 + }, + { + "epoch": 0.26982391215811025, + "grad_norm": 0.26113346219062805, + "learning_rate": 9.962922109110841e-06, + "loss": 0.006, + "step": 42070 + }, + { + "epoch": 0.2698880490518964, + "grad_norm": 0.3887992203235626, + "learning_rate": 9.962854042012253e-06, + "loss": 0.007, + "step": 42080 + }, + { + "epoch": 0.2699521859456825, + "grad_norm": 0.14516444504261017, + "learning_rate": 9.962785912725556e-06, + "loss": 0.003, + "step": 42090 + }, + { + "epoch": 0.2700163228394686, + "grad_norm": 0.12583817541599274, + "learning_rate": 9.962717721251604e-06, + "loss": 0.0049, + "step": 42100 + }, + { + "epoch": 0.2700804597332547, + "grad_norm": 0.18013928830623627, + "learning_rate": 9.962649467591253e-06, + "loss": 0.005, + "step": 42110 + }, + { + "epoch": 0.27014459662704077, + "grad_norm": 0.6400836706161499, + "learning_rate": 9.962581151745358e-06, + "loss": 0.0049, + "step": 42120 + }, + { + "epoch": 0.27020873352082686, + "grad_norm": 0.6360874772071838, + "learning_rate": 9.962512773714773e-06, + "loss": 0.0096, + "step": 42130 + }, + { + "epoch": 0.27027287041461295, + "grad_norm": 0.3784855604171753, + "learning_rate": 9.962444333500358e-06, + "loss": 0.0113, + "step": 42140 + }, + { + "epoch": 0.27033700730839905, + "grad_norm": 0.32857540249824524, + "learning_rate": 9.962375831102968e-06, + "loss": 0.0102, + "step": 42150 + }, + { + "epoch": 0.27040114420218514, + "grad_norm": 0.24528630077838898, + "learning_rate": 9.96230726652346e-06, + "loss": 0.0062, + "step": 42160 + }, + { + "epoch": 0.27046528109597123, + "grad_norm": 0.4703960120677948, + "learning_rate": 9.962238639762697e-06, + "loss": 0.0057, + "step": 42170 + }, + { + "epoch": 0.2705294179897573, + "grad_norm": 0.2784826159477234, + "learning_rate": 9.96216995082154e-06, + "loss": 0.0102, + "step": 42180 + }, + { + "epoch": 0.2705935548835434, + "grad_norm": 0.34280872344970703, + "learning_rate": 9.962101199700845e-06, + "loss": 0.0069, + "step": 42190 + }, + { + "epoch": 0.2706576917773295, + "grad_norm": 0.34265586733818054, + "learning_rate": 9.962032386401475e-06, + "loss": 0.0043, + "step": 42200 + }, + { + "epoch": 0.2707218286711156, + "grad_norm": 0.14476878941059113, + "learning_rate": 9.961963510924295e-06, + "loss": 0.0051, + "step": 42210 + }, + { + "epoch": 0.27078596556490175, + "grad_norm": 0.028610268607735634, + "learning_rate": 9.961894573270163e-06, + "loss": 0.0033, + "step": 42220 + }, + { + "epoch": 0.27085010245868785, + "grad_norm": 0.3419274687767029, + "learning_rate": 9.961825573439947e-06, + "loss": 0.0047, + "step": 42230 + }, + { + "epoch": 0.27091423935247394, + "grad_norm": 0.11877810209989548, + "learning_rate": 9.96175651143451e-06, + "loss": 0.0054, + "step": 42240 + }, + { + "epoch": 0.27097837624626003, + "grad_norm": 0.11344487965106964, + "learning_rate": 9.96168738725472e-06, + "loss": 0.0051, + "step": 42250 + }, + { + "epoch": 0.2710425131400461, + "grad_norm": 0.2172202169895172, + "learning_rate": 9.96161820090144e-06, + "loss": 0.0043, + "step": 42260 + }, + { + "epoch": 0.2711066500338322, + "grad_norm": 0.1751595288515091, + "learning_rate": 9.961548952375537e-06, + "loss": 0.0066, + "step": 42270 + }, + { + "epoch": 0.2711707869276183, + "grad_norm": 0.20869243144989014, + "learning_rate": 9.96147964167788e-06, + "loss": 0.0052, + "step": 42280 + }, + { + "epoch": 0.2712349238214044, + "grad_norm": 0.35894539952278137, + "learning_rate": 9.961410268809338e-06, + "loss": 0.0058, + "step": 42290 + }, + { + "epoch": 0.2712990607151905, + "grad_norm": 0.21298572421073914, + "learning_rate": 9.961340833770778e-06, + "loss": 0.0054, + "step": 42300 + }, + { + "epoch": 0.2713631976089766, + "grad_norm": 0.10361744463443756, + "learning_rate": 9.961271336563073e-06, + "loss": 0.0081, + "step": 42310 + }, + { + "epoch": 0.2714273345027627, + "grad_norm": 0.12398401647806168, + "learning_rate": 9.961201777187091e-06, + "loss": 0.0029, + "step": 42320 + }, + { + "epoch": 0.2714914713965488, + "grad_norm": 0.08253161609172821, + "learning_rate": 9.961132155643704e-06, + "loss": 0.006, + "step": 42330 + }, + { + "epoch": 0.27155560829033487, + "grad_norm": 0.12968796491622925, + "learning_rate": 9.961062471933788e-06, + "loss": 0.0036, + "step": 42340 + }, + { + "epoch": 0.27161974518412096, + "grad_norm": 0.03698797523975372, + "learning_rate": 9.960992726058212e-06, + "loss": 0.0043, + "step": 42350 + }, + { + "epoch": 0.27168388207790706, + "grad_norm": 0.3756403923034668, + "learning_rate": 9.960922918017852e-06, + "loss": 0.0144, + "step": 42360 + }, + { + "epoch": 0.2717480189716932, + "grad_norm": 0.776396632194519, + "learning_rate": 9.960853047813583e-06, + "loss": 0.0065, + "step": 42370 + }, + { + "epoch": 0.2718121558654793, + "grad_norm": 0.20592395961284637, + "learning_rate": 9.960783115446279e-06, + "loss": 0.0029, + "step": 42380 + }, + { + "epoch": 0.2718762927592654, + "grad_norm": 0.5793088674545288, + "learning_rate": 9.960713120916818e-06, + "loss": 0.0063, + "step": 42390 + }, + { + "epoch": 0.2719404296530515, + "grad_norm": 0.2557103633880615, + "learning_rate": 9.960643064226077e-06, + "loss": 0.0038, + "step": 42400 + }, + { + "epoch": 0.2720045665468376, + "grad_norm": 0.2395615428686142, + "learning_rate": 9.960572945374932e-06, + "loss": 0.0091, + "step": 42410 + }, + { + "epoch": 0.27206870344062367, + "grad_norm": 0.4039780795574188, + "learning_rate": 9.960502764364262e-06, + "loss": 0.0062, + "step": 42420 + }, + { + "epoch": 0.27213284033440976, + "grad_norm": 0.18832018971443176, + "learning_rate": 9.960432521194947e-06, + "loss": 0.0052, + "step": 42430 + }, + { + "epoch": 0.27219697722819586, + "grad_norm": 0.2135220468044281, + "learning_rate": 9.960362215867868e-06, + "loss": 0.0044, + "step": 42440 + }, + { + "epoch": 0.27226111412198195, + "grad_norm": 0.28974127769470215, + "learning_rate": 9.960291848383904e-06, + "loss": 0.0056, + "step": 42450 + }, + { + "epoch": 0.27232525101576804, + "grad_norm": 0.17635077238082886, + "learning_rate": 9.96022141874394e-06, + "loss": 0.0067, + "step": 42460 + }, + { + "epoch": 0.27238938790955414, + "grad_norm": 0.12280075252056122, + "learning_rate": 9.960150926948857e-06, + "loss": 0.0053, + "step": 42470 + }, + { + "epoch": 0.27245352480334023, + "grad_norm": 0.1510230302810669, + "learning_rate": 9.960080372999537e-06, + "loss": 0.0073, + "step": 42480 + }, + { + "epoch": 0.2725176616971263, + "grad_norm": 0.21415026485919952, + "learning_rate": 9.960009756896865e-06, + "loss": 0.0039, + "step": 42490 + }, + { + "epoch": 0.2725817985909124, + "grad_norm": 0.20693327486515045, + "learning_rate": 9.959939078641725e-06, + "loss": 0.0031, + "step": 42500 + }, + { + "epoch": 0.27264593548469856, + "grad_norm": 0.192563995718956, + "learning_rate": 9.959868338235004e-06, + "loss": 0.0049, + "step": 42510 + }, + { + "epoch": 0.27271007237848466, + "grad_norm": 0.1595400720834732, + "learning_rate": 9.959797535677589e-06, + "loss": 0.004, + "step": 42520 + }, + { + "epoch": 0.27277420927227075, + "grad_norm": 0.3150067925453186, + "learning_rate": 9.959726670970366e-06, + "loss": 0.0085, + "step": 42530 + }, + { + "epoch": 0.27283834616605684, + "grad_norm": 0.08167749643325806, + "learning_rate": 9.959655744114223e-06, + "loss": 0.0046, + "step": 42540 + }, + { + "epoch": 0.27290248305984294, + "grad_norm": 0.4045458734035492, + "learning_rate": 9.959584755110048e-06, + "loss": 0.0074, + "step": 42550 + }, + { + "epoch": 0.27296661995362903, + "grad_norm": 0.30020734667778015, + "learning_rate": 9.959513703958732e-06, + "loss": 0.004, + "step": 42560 + }, + { + "epoch": 0.2730307568474151, + "grad_norm": 0.28891295194625854, + "learning_rate": 9.959442590661165e-06, + "loss": 0.0036, + "step": 42570 + }, + { + "epoch": 0.2730948937412012, + "grad_norm": 0.10582451522350311, + "learning_rate": 9.959371415218238e-06, + "loss": 0.0051, + "step": 42580 + }, + { + "epoch": 0.2731590306349873, + "grad_norm": 0.3654150068759918, + "learning_rate": 9.959300177630842e-06, + "loss": 0.0053, + "step": 42590 + }, + { + "epoch": 0.2732231675287734, + "grad_norm": 0.43862292170524597, + "learning_rate": 9.95922887789987e-06, + "loss": 0.0043, + "step": 42600 + }, + { + "epoch": 0.2732873044225595, + "grad_norm": 0.5285431146621704, + "learning_rate": 9.959157516026217e-06, + "loss": 0.0057, + "step": 42610 + }, + { + "epoch": 0.2733514413163456, + "grad_norm": 0.17073945701122284, + "learning_rate": 9.959086092010776e-06, + "loss": 0.0077, + "step": 42620 + }, + { + "epoch": 0.2734155782101317, + "grad_norm": 0.1495482623577118, + "learning_rate": 9.959014605854443e-06, + "loss": 0.0035, + "step": 42630 + }, + { + "epoch": 0.2734797151039178, + "grad_norm": 0.40101027488708496, + "learning_rate": 9.958943057558111e-06, + "loss": 0.0076, + "step": 42640 + }, + { + "epoch": 0.2735438519977039, + "grad_norm": 0.2977857291698456, + "learning_rate": 9.958871447122678e-06, + "loss": 0.0071, + "step": 42650 + }, + { + "epoch": 0.27360798889149, + "grad_norm": 0.2916841506958008, + "learning_rate": 9.958799774549044e-06, + "loss": 0.0042, + "step": 42660 + }, + { + "epoch": 0.2736721257852761, + "grad_norm": 0.15789587795734406, + "learning_rate": 9.958728039838104e-06, + "loss": 0.0048, + "step": 42670 + }, + { + "epoch": 0.2737362626790622, + "grad_norm": 0.11915109306573868, + "learning_rate": 9.958656242990757e-06, + "loss": 0.0068, + "step": 42680 + }, + { + "epoch": 0.2738003995728483, + "grad_norm": 0.29966673254966736, + "learning_rate": 9.958584384007904e-06, + "loss": 0.0041, + "step": 42690 + }, + { + "epoch": 0.2738645364666344, + "grad_norm": 0.0362437441945076, + "learning_rate": 9.958512462890444e-06, + "loss": 0.0051, + "step": 42700 + }, + { + "epoch": 0.2739286733604205, + "grad_norm": 0.1600048989057541, + "learning_rate": 9.95844047963928e-06, + "loss": 0.0058, + "step": 42710 + }, + { + "epoch": 0.2739928102542066, + "grad_norm": 0.24165469408035278, + "learning_rate": 9.958368434255312e-06, + "loss": 0.0051, + "step": 42720 + }, + { + "epoch": 0.27405694714799267, + "grad_norm": 0.32953357696533203, + "learning_rate": 9.958296326739444e-06, + "loss": 0.0053, + "step": 42730 + }, + { + "epoch": 0.27412108404177876, + "grad_norm": 0.47327950596809387, + "learning_rate": 9.95822415709258e-06, + "loss": 0.0071, + "step": 42740 + }, + { + "epoch": 0.27418522093556486, + "grad_norm": 0.2136109620332718, + "learning_rate": 9.958151925315624e-06, + "loss": 0.0066, + "step": 42750 + }, + { + "epoch": 0.27424935782935095, + "grad_norm": 0.28177398443222046, + "learning_rate": 9.95807963140948e-06, + "loss": 0.0071, + "step": 42760 + }, + { + "epoch": 0.27431349472313704, + "grad_norm": 0.25047868490219116, + "learning_rate": 9.958007275375054e-06, + "loss": 0.0057, + "step": 42770 + }, + { + "epoch": 0.27437763161692313, + "grad_norm": 0.6044594049453735, + "learning_rate": 9.957934857213256e-06, + "loss": 0.0034, + "step": 42780 + }, + { + "epoch": 0.2744417685107093, + "grad_norm": 0.37561270594596863, + "learning_rate": 9.957862376924989e-06, + "loss": 0.0051, + "step": 42790 + }, + { + "epoch": 0.2745059054044954, + "grad_norm": 0.5243725180625916, + "learning_rate": 9.957789834511164e-06, + "loss": 0.0075, + "step": 42800 + }, + { + "epoch": 0.27457004229828147, + "grad_norm": 0.35506904125213623, + "learning_rate": 9.957717229972687e-06, + "loss": 0.0046, + "step": 42810 + }, + { + "epoch": 0.27463417919206756, + "grad_norm": 0.2856937646865845, + "learning_rate": 9.957644563310472e-06, + "loss": 0.0085, + "step": 42820 + }, + { + "epoch": 0.27469831608585366, + "grad_norm": 0.29843416810035706, + "learning_rate": 9.957571834525427e-06, + "loss": 0.0069, + "step": 42830 + }, + { + "epoch": 0.27476245297963975, + "grad_norm": 0.12337861210107803, + "learning_rate": 9.957499043618464e-06, + "loss": 0.0068, + "step": 42840 + }, + { + "epoch": 0.27482658987342584, + "grad_norm": 0.12973666191101074, + "learning_rate": 9.957426190590494e-06, + "loss": 0.0044, + "step": 42850 + }, + { + "epoch": 0.27489072676721193, + "grad_norm": 0.74559485912323, + "learning_rate": 9.957353275442431e-06, + "loss": 0.0092, + "step": 42860 + }, + { + "epoch": 0.27495486366099803, + "grad_norm": 0.12368279695510864, + "learning_rate": 9.95728029817519e-06, + "loss": 0.0036, + "step": 42870 + }, + { + "epoch": 0.2750190005547841, + "grad_norm": 0.20460005104541779, + "learning_rate": 9.957207258789683e-06, + "loss": 0.0044, + "step": 42880 + }, + { + "epoch": 0.2750831374485702, + "grad_norm": 0.3100970983505249, + "learning_rate": 9.957134157286825e-06, + "loss": 0.0061, + "step": 42890 + }, + { + "epoch": 0.2751472743423563, + "grad_norm": 0.10070610046386719, + "learning_rate": 9.957060993667534e-06, + "loss": 0.0051, + "step": 42900 + }, + { + "epoch": 0.2752114112361424, + "grad_norm": 0.2688789665699005, + "learning_rate": 9.956987767932727e-06, + "loss": 0.0042, + "step": 42910 + }, + { + "epoch": 0.2752755481299285, + "grad_norm": 0.2600405812263489, + "learning_rate": 9.956914480083319e-06, + "loss": 0.0079, + "step": 42920 + }, + { + "epoch": 0.27533968502371464, + "grad_norm": 0.07776429504156113, + "learning_rate": 9.956841130120232e-06, + "loss": 0.0048, + "step": 42930 + }, + { + "epoch": 0.27540382191750074, + "grad_norm": 0.42027747631073, + "learning_rate": 9.95676771804438e-06, + "loss": 0.0058, + "step": 42940 + }, + { + "epoch": 0.27546795881128683, + "grad_norm": 0.11998993158340454, + "learning_rate": 9.956694243856689e-06, + "loss": 0.0047, + "step": 42950 + }, + { + "epoch": 0.2755320957050729, + "grad_norm": 0.26443660259246826, + "learning_rate": 9.956620707558076e-06, + "loss": 0.0073, + "step": 42960 + }, + { + "epoch": 0.275596232598859, + "grad_norm": 0.408774197101593, + "learning_rate": 9.95654710914946e-06, + "loss": 0.0055, + "step": 42970 + }, + { + "epoch": 0.2756603694926451, + "grad_norm": 0.13061800599098206, + "learning_rate": 9.95647344863177e-06, + "loss": 0.0035, + "step": 42980 + }, + { + "epoch": 0.2757245063864312, + "grad_norm": 0.29762259125709534, + "learning_rate": 9.956399726005924e-06, + "loss": 0.0067, + "step": 42990 + }, + { + "epoch": 0.2757886432802173, + "grad_norm": 0.1276562511920929, + "learning_rate": 9.956325941272847e-06, + "loss": 0.0072, + "step": 43000 + }, + { + "epoch": 0.2758527801740034, + "grad_norm": 0.39894869923591614, + "learning_rate": 9.956252094433464e-06, + "loss": 0.0068, + "step": 43010 + }, + { + "epoch": 0.2759169170677895, + "grad_norm": 0.27307799458503723, + "learning_rate": 9.9561781854887e-06, + "loss": 0.0051, + "step": 43020 + }, + { + "epoch": 0.2759810539615756, + "grad_norm": 0.38455525040626526, + "learning_rate": 9.956104214439481e-06, + "loss": 0.0098, + "step": 43030 + }, + { + "epoch": 0.27604519085536167, + "grad_norm": 0.16045977175235748, + "learning_rate": 9.956030181286736e-06, + "loss": 0.0038, + "step": 43040 + }, + { + "epoch": 0.27610932774914776, + "grad_norm": 0.3692812919616699, + "learning_rate": 9.955956086031387e-06, + "loss": 0.0032, + "step": 43050 + }, + { + "epoch": 0.27617346464293385, + "grad_norm": 0.059148553758859634, + "learning_rate": 9.955881928674369e-06, + "loss": 0.0062, + "step": 43060 + }, + { + "epoch": 0.27623760153672, + "grad_norm": 0.2269824594259262, + "learning_rate": 9.955807709216609e-06, + "loss": 0.005, + "step": 43070 + }, + { + "epoch": 0.2763017384305061, + "grad_norm": 0.26325589418411255, + "learning_rate": 9.955733427659034e-06, + "loss": 0.0066, + "step": 43080 + }, + { + "epoch": 0.2763658753242922, + "grad_norm": 0.22607797384262085, + "learning_rate": 9.95565908400258e-06, + "loss": 0.0062, + "step": 43090 + }, + { + "epoch": 0.2764300122180783, + "grad_norm": 0.24247755110263824, + "learning_rate": 9.955584678248173e-06, + "loss": 0.0061, + "step": 43100 + }, + { + "epoch": 0.2764941491118644, + "grad_norm": 0.2428087443113327, + "learning_rate": 9.95551021039675e-06, + "loss": 0.0044, + "step": 43110 + }, + { + "epoch": 0.27655828600565047, + "grad_norm": 0.4491789937019348, + "learning_rate": 9.955435680449243e-06, + "loss": 0.0072, + "step": 43120 + }, + { + "epoch": 0.27662242289943656, + "grad_norm": 0.28530851006507874, + "learning_rate": 9.955361088406585e-06, + "loss": 0.0073, + "step": 43130 + }, + { + "epoch": 0.27668655979322265, + "grad_norm": 0.3485097885131836, + "learning_rate": 9.95528643426971e-06, + "loss": 0.0074, + "step": 43140 + }, + { + "epoch": 0.27675069668700875, + "grad_norm": 0.2821292281150818, + "learning_rate": 9.955211718039558e-06, + "loss": 0.0052, + "step": 43150 + }, + { + "epoch": 0.27681483358079484, + "grad_norm": 0.09028469026088715, + "learning_rate": 9.955136939717057e-06, + "loss": 0.006, + "step": 43160 + }, + { + "epoch": 0.27687897047458093, + "grad_norm": 0.24043463170528412, + "learning_rate": 9.955062099303151e-06, + "loss": 0.0036, + "step": 43170 + }, + { + "epoch": 0.276943107368367, + "grad_norm": 0.0799112319946289, + "learning_rate": 9.954987196798776e-06, + "loss": 0.0041, + "step": 43180 + }, + { + "epoch": 0.2770072442621531, + "grad_norm": 0.28518977761268616, + "learning_rate": 9.95491223220487e-06, + "loss": 0.0057, + "step": 43190 + }, + { + "epoch": 0.2770713811559392, + "grad_norm": 0.1286603808403015, + "learning_rate": 9.954837205522371e-06, + "loss": 0.005, + "step": 43200 + }, + { + "epoch": 0.27713551804972536, + "grad_norm": 0.3737489879131317, + "learning_rate": 9.95476211675222e-06, + "loss": 0.0036, + "step": 43210 + }, + { + "epoch": 0.27719965494351145, + "grad_norm": 0.401049941778183, + "learning_rate": 9.954686965895361e-06, + "loss": 0.0037, + "step": 43220 + }, + { + "epoch": 0.27726379183729755, + "grad_norm": 0.2654677927494049, + "learning_rate": 9.954611752952733e-06, + "loss": 0.0064, + "step": 43230 + }, + { + "epoch": 0.27732792873108364, + "grad_norm": 0.4903867244720459, + "learning_rate": 9.954536477925279e-06, + "loss": 0.0071, + "step": 43240 + }, + { + "epoch": 0.27739206562486973, + "grad_norm": 0.26085662841796875, + "learning_rate": 9.95446114081394e-06, + "loss": 0.0042, + "step": 43250 + }, + { + "epoch": 0.2774562025186558, + "grad_norm": 0.18222586810588837, + "learning_rate": 9.954385741619663e-06, + "loss": 0.0037, + "step": 43260 + }, + { + "epoch": 0.2775203394124419, + "grad_norm": 0.0768163800239563, + "learning_rate": 9.954310280343394e-06, + "loss": 0.007, + "step": 43270 + }, + { + "epoch": 0.277584476306228, + "grad_norm": 0.11794343590736389, + "learning_rate": 9.954234756986072e-06, + "loss": 0.0076, + "step": 43280 + }, + { + "epoch": 0.2776486132000141, + "grad_norm": 0.07045776396989822, + "learning_rate": 9.954159171548653e-06, + "loss": 0.0054, + "step": 43290 + }, + { + "epoch": 0.2777127500938002, + "grad_norm": 0.3573853373527527, + "learning_rate": 9.954083524032075e-06, + "loss": 0.0074, + "step": 43300 + }, + { + "epoch": 0.2777768869875863, + "grad_norm": 0.0045158397406339645, + "learning_rate": 9.95400781443729e-06, + "loss": 0.0089, + "step": 43310 + }, + { + "epoch": 0.2778410238813724, + "grad_norm": 0.1320253610610962, + "learning_rate": 9.953932042765247e-06, + "loss": 0.0047, + "step": 43320 + }, + { + "epoch": 0.2779051607751585, + "grad_norm": 0.2115936279296875, + "learning_rate": 9.953856209016895e-06, + "loss": 0.0045, + "step": 43330 + }, + { + "epoch": 0.27796929766894457, + "grad_norm": 0.13962361216545105, + "learning_rate": 9.953780313193185e-06, + "loss": 0.0082, + "step": 43340 + }, + { + "epoch": 0.2780334345627307, + "grad_norm": 0.206785187125206, + "learning_rate": 9.953704355295066e-06, + "loss": 0.0066, + "step": 43350 + }, + { + "epoch": 0.2780975714565168, + "grad_norm": 0.15403756499290466, + "learning_rate": 9.953628335323494e-06, + "loss": 0.0054, + "step": 43360 + }, + { + "epoch": 0.2781617083503029, + "grad_norm": 0.124571792781353, + "learning_rate": 9.953552253279415e-06, + "loss": 0.0035, + "step": 43370 + }, + { + "epoch": 0.278225845244089, + "grad_norm": 0.12376170605421066, + "learning_rate": 9.953476109163788e-06, + "loss": 0.0041, + "step": 43380 + }, + { + "epoch": 0.2782899821378751, + "grad_norm": 0.2316630780696869, + "learning_rate": 9.953399902977565e-06, + "loss": 0.0064, + "step": 43390 + }, + { + "epoch": 0.2783541190316612, + "grad_norm": 0.24657541513442993, + "learning_rate": 9.953323634721701e-06, + "loss": 0.004, + "step": 43400 + }, + { + "epoch": 0.2784182559254473, + "grad_norm": 0.21009762585163116, + "learning_rate": 9.953247304397151e-06, + "loss": 0.0032, + "step": 43410 + }, + { + "epoch": 0.27848239281923337, + "grad_norm": 0.17280659079551697, + "learning_rate": 9.953170912004873e-06, + "loss": 0.0057, + "step": 43420 + }, + { + "epoch": 0.27854652971301946, + "grad_norm": 0.20576423406600952, + "learning_rate": 9.953094457545824e-06, + "loss": 0.0038, + "step": 43430 + }, + { + "epoch": 0.27861066660680556, + "grad_norm": 0.1083400622010231, + "learning_rate": 9.953017941020959e-06, + "loss": 0.0038, + "step": 43440 + }, + { + "epoch": 0.27867480350059165, + "grad_norm": 0.16818398237228394, + "learning_rate": 9.952941362431242e-06, + "loss": 0.0047, + "step": 43450 + }, + { + "epoch": 0.27873894039437774, + "grad_norm": 0.1237788051366806, + "learning_rate": 9.952864721777629e-06, + "loss": 0.0033, + "step": 43460 + }, + { + "epoch": 0.27880307728816384, + "grad_norm": 0.42831873893737793, + "learning_rate": 9.952788019061082e-06, + "loss": 0.0061, + "step": 43470 + }, + { + "epoch": 0.27886721418194993, + "grad_norm": 0.20468097925186157, + "learning_rate": 9.95271125428256e-06, + "loss": 0.0037, + "step": 43480 + }, + { + "epoch": 0.2789313510757361, + "grad_norm": 0.3154074549674988, + "learning_rate": 9.952634427443027e-06, + "loss": 0.0094, + "step": 43490 + }, + { + "epoch": 0.27899548796952217, + "grad_norm": 0.3426404893398285, + "learning_rate": 9.952557538543445e-06, + "loss": 0.0046, + "step": 43500 + }, + { + "epoch": 0.27905962486330826, + "grad_norm": 0.11730002611875534, + "learning_rate": 9.952480587584779e-06, + "loss": 0.0043, + "step": 43510 + }, + { + "epoch": 0.27912376175709436, + "grad_norm": 0.22600385546684265, + "learning_rate": 9.952403574567991e-06, + "loss": 0.0055, + "step": 43520 + }, + { + "epoch": 0.27918789865088045, + "grad_norm": 0.2184022217988968, + "learning_rate": 9.952326499494046e-06, + "loss": 0.0046, + "step": 43530 + }, + { + "epoch": 0.27925203554466654, + "grad_norm": 0.15621612966060638, + "learning_rate": 9.952249362363909e-06, + "loss": 0.0053, + "step": 43540 + }, + { + "epoch": 0.27931617243845264, + "grad_norm": 0.2692352533340454, + "learning_rate": 9.95217216317855e-06, + "loss": 0.0034, + "step": 43550 + }, + { + "epoch": 0.27938030933223873, + "grad_norm": 0.2867933511734009, + "learning_rate": 9.952094901938935e-06, + "loss": 0.006, + "step": 43560 + }, + { + "epoch": 0.2794444462260248, + "grad_norm": 0.2318105250597, + "learning_rate": 9.952017578646032e-06, + "loss": 0.013, + "step": 43570 + }, + { + "epoch": 0.2795085831198109, + "grad_norm": 0.15158458054065704, + "learning_rate": 9.951940193300808e-06, + "loss": 0.0043, + "step": 43580 + }, + { + "epoch": 0.279572720013597, + "grad_norm": 0.19293011724948883, + "learning_rate": 9.951862745904235e-06, + "loss": 0.0074, + "step": 43590 + }, + { + "epoch": 0.2796368569073831, + "grad_norm": 0.23179151117801666, + "learning_rate": 9.951785236457283e-06, + "loss": 0.0051, + "step": 43600 + }, + { + "epoch": 0.2797009938011692, + "grad_norm": 0.25460487604141235, + "learning_rate": 9.951707664960922e-06, + "loss": 0.0068, + "step": 43610 + }, + { + "epoch": 0.2797651306949553, + "grad_norm": 0.16452178359031677, + "learning_rate": 9.951630031416127e-06, + "loss": 0.0045, + "step": 43620 + }, + { + "epoch": 0.27982926758874144, + "grad_norm": 0.1425665020942688, + "learning_rate": 9.951552335823866e-06, + "loss": 0.004, + "step": 43630 + }, + { + "epoch": 0.27989340448252753, + "grad_norm": 0.2122945785522461, + "learning_rate": 9.951474578185117e-06, + "loss": 0.0033, + "step": 43640 + }, + { + "epoch": 0.2799575413763136, + "grad_norm": 0.08789437264204025, + "learning_rate": 9.951396758500854e-06, + "loss": 0.0046, + "step": 43650 + }, + { + "epoch": 0.2800216782700997, + "grad_norm": 0.25604885816574097, + "learning_rate": 9.951318876772049e-06, + "loss": 0.0076, + "step": 43660 + }, + { + "epoch": 0.2800858151638858, + "grad_norm": 0.07172763347625732, + "learning_rate": 9.951240932999681e-06, + "loss": 0.0054, + "step": 43670 + }, + { + "epoch": 0.2801499520576719, + "grad_norm": 0.3497158885002136, + "learning_rate": 9.951162927184724e-06, + "loss": 0.0074, + "step": 43680 + }, + { + "epoch": 0.280214088951458, + "grad_norm": 0.21498006582260132, + "learning_rate": 9.951084859328159e-06, + "loss": 0.0031, + "step": 43690 + }, + { + "epoch": 0.2802782258452441, + "grad_norm": 0.35907694697380066, + "learning_rate": 9.95100672943096e-06, + "loss": 0.0035, + "step": 43700 + }, + { + "epoch": 0.2803423627390302, + "grad_norm": 0.2003171443939209, + "learning_rate": 9.95092853749411e-06, + "loss": 0.0043, + "step": 43710 + }, + { + "epoch": 0.2804064996328163, + "grad_norm": 0.33922964334487915, + "learning_rate": 9.950850283518587e-06, + "loss": 0.0043, + "step": 43720 + }, + { + "epoch": 0.28047063652660237, + "grad_norm": 0.5845425724983215, + "learning_rate": 9.95077196750537e-06, + "loss": 0.0121, + "step": 43730 + }, + { + "epoch": 0.28053477342038846, + "grad_norm": 0.16597582399845123, + "learning_rate": 9.950693589455444e-06, + "loss": 0.0094, + "step": 43740 + }, + { + "epoch": 0.28059891031417455, + "grad_norm": 0.2852199077606201, + "learning_rate": 9.950615149369788e-06, + "loss": 0.0093, + "step": 43750 + }, + { + "epoch": 0.28066304720796065, + "grad_norm": 0.21773932874202728, + "learning_rate": 9.950536647249387e-06, + "loss": 0.0042, + "step": 43760 + }, + { + "epoch": 0.28072718410174674, + "grad_norm": 0.09281706809997559, + "learning_rate": 9.950458083095222e-06, + "loss": 0.0062, + "step": 43770 + }, + { + "epoch": 0.2807913209955329, + "grad_norm": 0.07119549065828323, + "learning_rate": 9.95037945690828e-06, + "loss": 0.0046, + "step": 43780 + }, + { + "epoch": 0.280855457889319, + "grad_norm": 0.06815053522586823, + "learning_rate": 9.950300768689547e-06, + "loss": 0.0063, + "step": 43790 + }, + { + "epoch": 0.2809195947831051, + "grad_norm": 0.24725092947483063, + "learning_rate": 9.950222018440006e-06, + "loss": 0.0043, + "step": 43800 + }, + { + "epoch": 0.28098373167689117, + "grad_norm": 0.15198677778244019, + "learning_rate": 9.950143206160646e-06, + "loss": 0.0045, + "step": 43810 + }, + { + "epoch": 0.28104786857067726, + "grad_norm": 0.26507192850112915, + "learning_rate": 9.950064331852452e-06, + "loss": 0.0078, + "step": 43820 + }, + { + "epoch": 0.28111200546446335, + "grad_norm": 0.34943291544914246, + "learning_rate": 9.949985395516416e-06, + "loss": 0.0073, + "step": 43830 + }, + { + "epoch": 0.28117614235824945, + "grad_norm": 0.11543948948383331, + "learning_rate": 9.949906397153524e-06, + "loss": 0.0054, + "step": 43840 + }, + { + "epoch": 0.28124027925203554, + "grad_norm": 0.24528025090694427, + "learning_rate": 9.949827336764767e-06, + "loss": 0.0066, + "step": 43850 + }, + { + "epoch": 0.28130441614582163, + "grad_norm": 0.2551104724407196, + "learning_rate": 9.949748214351135e-06, + "loss": 0.0042, + "step": 43860 + }, + { + "epoch": 0.2813685530396077, + "grad_norm": 0.2274218201637268, + "learning_rate": 9.949669029913625e-06, + "loss": 0.0056, + "step": 43870 + }, + { + "epoch": 0.2814326899333938, + "grad_norm": 0.07005950063467026, + "learning_rate": 9.94958978345322e-06, + "loss": 0.0077, + "step": 43880 + }, + { + "epoch": 0.2814968268271799, + "grad_norm": 0.27010342478752136, + "learning_rate": 9.949510474970919e-06, + "loss": 0.0047, + "step": 43890 + }, + { + "epoch": 0.281560963720966, + "grad_norm": 0.3861885666847229, + "learning_rate": 9.949431104467716e-06, + "loss": 0.0066, + "step": 43900 + }, + { + "epoch": 0.2816251006147521, + "grad_norm": 0.07936596125364304, + "learning_rate": 9.9493516719446e-06, + "loss": 0.0035, + "step": 43910 + }, + { + "epoch": 0.28168923750853825, + "grad_norm": 0.03691532835364342, + "learning_rate": 9.949272177402574e-06, + "loss": 0.005, + "step": 43920 + }, + { + "epoch": 0.28175337440232434, + "grad_norm": 0.19261986017227173, + "learning_rate": 9.949192620842629e-06, + "loss": 0.005, + "step": 43930 + }, + { + "epoch": 0.28181751129611043, + "grad_norm": 0.3985759913921356, + "learning_rate": 9.949113002265764e-06, + "loss": 0.0046, + "step": 43940 + }, + { + "epoch": 0.2818816481898965, + "grad_norm": 0.38030776381492615, + "learning_rate": 9.949033321672977e-06, + "loss": 0.0055, + "step": 43950 + }, + { + "epoch": 0.2819457850836826, + "grad_norm": 0.31178340315818787, + "learning_rate": 9.948953579065262e-06, + "loss": 0.0057, + "step": 43960 + }, + { + "epoch": 0.2820099219774687, + "grad_norm": 0.1481586992740631, + "learning_rate": 9.948873774443623e-06, + "loss": 0.0042, + "step": 43970 + }, + { + "epoch": 0.2820740588712548, + "grad_norm": 0.4430583715438843, + "learning_rate": 9.94879390780906e-06, + "loss": 0.0067, + "step": 43980 + }, + { + "epoch": 0.2821381957650409, + "grad_norm": 0.06289871037006378, + "learning_rate": 9.948713979162571e-06, + "loss": 0.0072, + "step": 43990 + }, + { + "epoch": 0.282202332658827, + "grad_norm": 0.14244233071804047, + "learning_rate": 9.948633988505161e-06, + "loss": 0.0064, + "step": 44000 + }, + { + "epoch": 0.2822664695526131, + "grad_norm": 0.3439823389053345, + "learning_rate": 9.948553935837831e-06, + "loss": 0.0069, + "step": 44010 + }, + { + "epoch": 0.2823306064463992, + "grad_norm": 0.20238210260868073, + "learning_rate": 9.94847382116158e-06, + "loss": 0.0045, + "step": 44020 + }, + { + "epoch": 0.2823947433401853, + "grad_norm": 0.4001085162162781, + "learning_rate": 9.948393644477417e-06, + "loss": 0.0043, + "step": 44030 + }, + { + "epoch": 0.28245888023397137, + "grad_norm": 0.2548503875732422, + "learning_rate": 9.948313405786346e-06, + "loss": 0.0035, + "step": 44040 + }, + { + "epoch": 0.28252301712775746, + "grad_norm": 0.3001789152622223, + "learning_rate": 9.948233105089371e-06, + "loss": 0.0039, + "step": 44050 + }, + { + "epoch": 0.2825871540215436, + "grad_norm": 0.10993659496307373, + "learning_rate": 9.948152742387498e-06, + "loss": 0.004, + "step": 44060 + }, + { + "epoch": 0.2826512909153297, + "grad_norm": 0.2728452682495117, + "learning_rate": 9.948072317681737e-06, + "loss": 0.0038, + "step": 44070 + }, + { + "epoch": 0.2827154278091158, + "grad_norm": 0.1629365235567093, + "learning_rate": 9.94799183097309e-06, + "loss": 0.0037, + "step": 44080 + }, + { + "epoch": 0.2827795647029019, + "grad_norm": 0.4628024995326996, + "learning_rate": 9.947911282262571e-06, + "loss": 0.0064, + "step": 44090 + }, + { + "epoch": 0.282843701596688, + "grad_norm": 0.08697532117366791, + "learning_rate": 9.947830671551187e-06, + "loss": 0.0044, + "step": 44100 + }, + { + "epoch": 0.2829078384904741, + "grad_norm": 0.37861886620521545, + "learning_rate": 9.94774999883995e-06, + "loss": 0.004, + "step": 44110 + }, + { + "epoch": 0.28297197538426017, + "grad_norm": 0.37521013617515564, + "learning_rate": 9.947669264129867e-06, + "loss": 0.0042, + "step": 44120 + }, + { + "epoch": 0.28303611227804626, + "grad_norm": 0.12726260721683502, + "learning_rate": 9.947588467421954e-06, + "loss": 0.0036, + "step": 44130 + }, + { + "epoch": 0.28310024917183235, + "grad_norm": 0.09171196818351746, + "learning_rate": 9.94750760871722e-06, + "loss": 0.0073, + "step": 44140 + }, + { + "epoch": 0.28316438606561845, + "grad_norm": 0.21508584916591644, + "learning_rate": 9.94742668801668e-06, + "loss": 0.005, + "step": 44150 + }, + { + "epoch": 0.28322852295940454, + "grad_norm": 0.2806234657764435, + "learning_rate": 9.947345705321349e-06, + "loss": 0.0072, + "step": 44160 + }, + { + "epoch": 0.28329265985319063, + "grad_norm": 0.27383875846862793, + "learning_rate": 9.947264660632241e-06, + "loss": 0.0051, + "step": 44170 + }, + { + "epoch": 0.2833567967469767, + "grad_norm": 0.10557805001735687, + "learning_rate": 9.94718355395037e-06, + "loss": 0.0059, + "step": 44180 + }, + { + "epoch": 0.2834209336407628, + "grad_norm": 0.2006809264421463, + "learning_rate": 9.947102385276752e-06, + "loss": 0.006, + "step": 44190 + }, + { + "epoch": 0.28348507053454897, + "grad_norm": 0.3478815257549286, + "learning_rate": 9.947021154612407e-06, + "loss": 0.0078, + "step": 44200 + }, + { + "epoch": 0.28354920742833506, + "grad_norm": 0.4356499910354614, + "learning_rate": 9.946939861958352e-06, + "loss": 0.0057, + "step": 44210 + }, + { + "epoch": 0.28361334432212115, + "grad_norm": 0.1853245198726654, + "learning_rate": 9.946858507315603e-06, + "loss": 0.0034, + "step": 44220 + }, + { + "epoch": 0.28367748121590725, + "grad_norm": 0.06792756915092468, + "learning_rate": 9.946777090685182e-06, + "loss": 0.0051, + "step": 44230 + }, + { + "epoch": 0.28374161810969334, + "grad_norm": 0.16064020991325378, + "learning_rate": 9.94669561206811e-06, + "loss": 0.0047, + "step": 44240 + }, + { + "epoch": 0.28380575500347943, + "grad_norm": 0.11461981385946274, + "learning_rate": 9.946614071465405e-06, + "loss": 0.0051, + "step": 44250 + }, + { + "epoch": 0.2838698918972655, + "grad_norm": 0.4316380023956299, + "learning_rate": 9.946532468878091e-06, + "loss": 0.0077, + "step": 44260 + }, + { + "epoch": 0.2839340287910516, + "grad_norm": 0.1925811767578125, + "learning_rate": 9.946450804307191e-06, + "loss": 0.0062, + "step": 44270 + }, + { + "epoch": 0.2839981656848377, + "grad_norm": 0.40506237745285034, + "learning_rate": 9.946369077753725e-06, + "loss": 0.0064, + "step": 44280 + }, + { + "epoch": 0.2840623025786238, + "grad_norm": 0.12813763320446014, + "learning_rate": 9.946287289218722e-06, + "loss": 0.0086, + "step": 44290 + }, + { + "epoch": 0.2841264394724099, + "grad_norm": 0.22591009736061096, + "learning_rate": 9.946205438703202e-06, + "loss": 0.0043, + "step": 44300 + }, + { + "epoch": 0.284190576366196, + "grad_norm": 0.15613387525081635, + "learning_rate": 9.946123526208194e-06, + "loss": 0.0053, + "step": 44310 + }, + { + "epoch": 0.2842547132599821, + "grad_norm": 0.46575087308883667, + "learning_rate": 9.946041551734724e-06, + "loss": 0.005, + "step": 44320 + }, + { + "epoch": 0.2843188501537682, + "grad_norm": 0.24979087710380554, + "learning_rate": 9.945959515283817e-06, + "loss": 0.0044, + "step": 44330 + }, + { + "epoch": 0.2843829870475543, + "grad_norm": 0.27205783128738403, + "learning_rate": 9.945877416856504e-06, + "loss": 0.0065, + "step": 44340 + }, + { + "epoch": 0.2844471239413404, + "grad_norm": 0.16671212017536163, + "learning_rate": 9.94579525645381e-06, + "loss": 0.003, + "step": 44350 + }, + { + "epoch": 0.2845112608351265, + "grad_norm": 0.5268667936325073, + "learning_rate": 9.945713034076767e-06, + "loss": 0.007, + "step": 44360 + }, + { + "epoch": 0.2845753977289126, + "grad_norm": 0.2374076396226883, + "learning_rate": 9.945630749726408e-06, + "loss": 0.0042, + "step": 44370 + }, + { + "epoch": 0.2846395346226987, + "grad_norm": 0.2763907015323639, + "learning_rate": 9.945548403403757e-06, + "loss": 0.0102, + "step": 44380 + }, + { + "epoch": 0.2847036715164848, + "grad_norm": 0.1680525243282318, + "learning_rate": 9.945465995109854e-06, + "loss": 0.0052, + "step": 44390 + }, + { + "epoch": 0.2847678084102709, + "grad_norm": 0.21754974126815796, + "learning_rate": 9.945383524845724e-06, + "loss": 0.0058, + "step": 44400 + }, + { + "epoch": 0.284831945304057, + "grad_norm": 0.39914047718048096, + "learning_rate": 9.945300992612406e-06, + "loss": 0.0055, + "step": 44410 + }, + { + "epoch": 0.28489608219784307, + "grad_norm": 0.030866149812936783, + "learning_rate": 9.945218398410932e-06, + "loss": 0.004, + "step": 44420 + }, + { + "epoch": 0.28496021909162916, + "grad_norm": 0.10678695142269135, + "learning_rate": 9.945135742242337e-06, + "loss": 0.0056, + "step": 44430 + }, + { + "epoch": 0.28502435598541526, + "grad_norm": 0.22009611129760742, + "learning_rate": 9.945053024107656e-06, + "loss": 0.005, + "step": 44440 + }, + { + "epoch": 0.28508849287920135, + "grad_norm": 0.27552640438079834, + "learning_rate": 9.944970244007927e-06, + "loss": 0.0046, + "step": 44450 + }, + { + "epoch": 0.28515262977298744, + "grad_norm": 0.5489904284477234, + "learning_rate": 9.944887401944187e-06, + "loss": 0.0072, + "step": 44460 + }, + { + "epoch": 0.28521676666677354, + "grad_norm": 0.2531215250492096, + "learning_rate": 9.944804497917475e-06, + "loss": 0.0052, + "step": 44470 + }, + { + "epoch": 0.2852809035605597, + "grad_norm": 0.3310990631580353, + "learning_rate": 9.944721531928828e-06, + "loss": 0.0061, + "step": 44480 + }, + { + "epoch": 0.2853450404543458, + "grad_norm": 0.5441588163375854, + "learning_rate": 9.944638503979284e-06, + "loss": 0.0047, + "step": 44490 + }, + { + "epoch": 0.28540917734813187, + "grad_norm": 0.16787196695804596, + "learning_rate": 9.944555414069888e-06, + "loss": 0.0036, + "step": 44500 + }, + { + "epoch": 0.28547331424191796, + "grad_norm": 0.20527492463588715, + "learning_rate": 9.94447226220168e-06, + "loss": 0.0061, + "step": 44510 + }, + { + "epoch": 0.28553745113570406, + "grad_norm": 0.38600656390190125, + "learning_rate": 9.944389048375697e-06, + "loss": 0.0036, + "step": 44520 + }, + { + "epoch": 0.28560158802949015, + "grad_norm": 0.15580099821090698, + "learning_rate": 9.944305772592987e-06, + "loss": 0.0047, + "step": 44530 + }, + { + "epoch": 0.28566572492327624, + "grad_norm": 0.3504888117313385, + "learning_rate": 9.944222434854595e-06, + "loss": 0.0078, + "step": 44540 + }, + { + "epoch": 0.28572986181706234, + "grad_norm": 0.16987036168575287, + "learning_rate": 9.94413903516156e-06, + "loss": 0.0065, + "step": 44550 + }, + { + "epoch": 0.28579399871084843, + "grad_norm": 0.5898092985153198, + "learning_rate": 9.944055573514928e-06, + "loss": 0.0025, + "step": 44560 + }, + { + "epoch": 0.2858581356046345, + "grad_norm": 0.10460730642080307, + "learning_rate": 9.943972049915748e-06, + "loss": 0.0031, + "step": 44570 + }, + { + "epoch": 0.2859222724984206, + "grad_norm": 0.3808610439300537, + "learning_rate": 9.943888464365065e-06, + "loss": 0.0056, + "step": 44580 + }, + { + "epoch": 0.2859864093922067, + "grad_norm": 0.13328585028648376, + "learning_rate": 9.943804816863925e-06, + "loss": 0.0068, + "step": 44590 + }, + { + "epoch": 0.2860505462859928, + "grad_norm": 0.06399616599082947, + "learning_rate": 9.943721107413378e-06, + "loss": 0.0036, + "step": 44600 + }, + { + "epoch": 0.2861146831797789, + "grad_norm": 0.23969586193561554, + "learning_rate": 9.943637336014472e-06, + "loss": 0.0043, + "step": 44610 + }, + { + "epoch": 0.28617882007356504, + "grad_norm": 0.34828394651412964, + "learning_rate": 9.943553502668257e-06, + "loss": 0.0115, + "step": 44620 + }, + { + "epoch": 0.28624295696735114, + "grad_norm": 0.15816433727741241, + "learning_rate": 9.943469607375784e-06, + "loss": 0.0087, + "step": 44630 + }, + { + "epoch": 0.28630709386113723, + "grad_norm": 0.2197151482105255, + "learning_rate": 9.943385650138103e-06, + "loss": 0.0066, + "step": 44640 + }, + { + "epoch": 0.2863712307549233, + "grad_norm": 0.08302151411771774, + "learning_rate": 9.943301630956268e-06, + "loss": 0.0051, + "step": 44650 + }, + { + "epoch": 0.2864353676487094, + "grad_norm": 0.35876697301864624, + "learning_rate": 9.94321754983133e-06, + "loss": 0.0079, + "step": 44660 + }, + { + "epoch": 0.2864995045424955, + "grad_norm": 0.17010320723056793, + "learning_rate": 9.943133406764342e-06, + "loss": 0.0035, + "step": 44670 + }, + { + "epoch": 0.2865636414362816, + "grad_norm": 0.18060402572155, + "learning_rate": 9.94304920175636e-06, + "loss": 0.0054, + "step": 44680 + }, + { + "epoch": 0.2866277783300677, + "grad_norm": 0.03345949575304985, + "learning_rate": 9.942964934808442e-06, + "loss": 0.0057, + "step": 44690 + }, + { + "epoch": 0.2866919152238538, + "grad_norm": 0.23722924292087555, + "learning_rate": 9.942880605921637e-06, + "loss": 0.0067, + "step": 44700 + }, + { + "epoch": 0.2867560521176399, + "grad_norm": 0.37032759189605713, + "learning_rate": 9.942796215097007e-06, + "loss": 0.0051, + "step": 44710 + }, + { + "epoch": 0.286820189011426, + "grad_norm": 0.2218584567308426, + "learning_rate": 9.942711762335608e-06, + "loss": 0.0081, + "step": 44720 + }, + { + "epoch": 0.28688432590521207, + "grad_norm": 0.08566952496767044, + "learning_rate": 9.942627247638497e-06, + "loss": 0.0059, + "step": 44730 + }, + { + "epoch": 0.28694846279899816, + "grad_norm": 0.18911112844944, + "learning_rate": 9.942542671006734e-06, + "loss": 0.0052, + "step": 44740 + }, + { + "epoch": 0.28701259969278425, + "grad_norm": 0.3110847771167755, + "learning_rate": 9.94245803244138e-06, + "loss": 0.0053, + "step": 44750 + }, + { + "epoch": 0.2870767365865704, + "grad_norm": 0.2588106691837311, + "learning_rate": 9.942373331943494e-06, + "loss": 0.0042, + "step": 44760 + }, + { + "epoch": 0.2871408734803565, + "grad_norm": 0.1012803167104721, + "learning_rate": 9.942288569514139e-06, + "loss": 0.0048, + "step": 44770 + }, + { + "epoch": 0.2872050103741426, + "grad_norm": 0.24208807945251465, + "learning_rate": 9.942203745154375e-06, + "loss": 0.0065, + "step": 44780 + }, + { + "epoch": 0.2872691472679287, + "grad_norm": 0.1405334770679474, + "learning_rate": 9.942118858865266e-06, + "loss": 0.003, + "step": 44790 + }, + { + "epoch": 0.2873332841617148, + "grad_norm": 0.15158362686634064, + "learning_rate": 9.942033910647875e-06, + "loss": 0.0035, + "step": 44800 + }, + { + "epoch": 0.28739742105550087, + "grad_norm": 0.2788327932357788, + "learning_rate": 9.94194890050327e-06, + "loss": 0.0048, + "step": 44810 + }, + { + "epoch": 0.28746155794928696, + "grad_norm": 0.18016715347766876, + "learning_rate": 9.94186382843251e-06, + "loss": 0.0043, + "step": 44820 + }, + { + "epoch": 0.28752569484307305, + "grad_norm": 0.1494971513748169, + "learning_rate": 9.941778694436665e-06, + "loss": 0.0054, + "step": 44830 + }, + { + "epoch": 0.28758983173685915, + "grad_norm": 0.4434851109981537, + "learning_rate": 9.941693498516802e-06, + "loss": 0.0091, + "step": 44840 + }, + { + "epoch": 0.28765396863064524, + "grad_norm": 0.14780807495117188, + "learning_rate": 9.941608240673985e-06, + "loss": 0.0031, + "step": 44850 + }, + { + "epoch": 0.28771810552443133, + "grad_norm": 0.13415762782096863, + "learning_rate": 9.941522920909287e-06, + "loss": 0.0091, + "step": 44860 + }, + { + "epoch": 0.2877822424182174, + "grad_norm": 0.3207700252532959, + "learning_rate": 9.941437539223777e-06, + "loss": 0.0041, + "step": 44870 + }, + { + "epoch": 0.2878463793120035, + "grad_norm": 0.2459515631198883, + "learning_rate": 9.941352095618522e-06, + "loss": 0.0044, + "step": 44880 + }, + { + "epoch": 0.2879105162057896, + "grad_norm": 0.2570980191230774, + "learning_rate": 9.941266590094593e-06, + "loss": 0.0057, + "step": 44890 + }, + { + "epoch": 0.28797465309957576, + "grad_norm": 0.2937203347682953, + "learning_rate": 9.941181022653061e-06, + "loss": 0.0071, + "step": 44900 + }, + { + "epoch": 0.28803878999336185, + "grad_norm": 0.2644745409488678, + "learning_rate": 9.941095393295002e-06, + "loss": 0.0034, + "step": 44910 + }, + { + "epoch": 0.28810292688714795, + "grad_norm": 0.13715563714504242, + "learning_rate": 9.941009702021484e-06, + "loss": 0.0041, + "step": 44920 + }, + { + "epoch": 0.28816706378093404, + "grad_norm": 0.13606588542461395, + "learning_rate": 9.940923948833585e-06, + "loss": 0.0048, + "step": 44930 + }, + { + "epoch": 0.28823120067472013, + "grad_norm": 0.3833830952644348, + "learning_rate": 9.940838133732376e-06, + "loss": 0.0082, + "step": 44940 + }, + { + "epoch": 0.2882953375685062, + "grad_norm": 0.2200787365436554, + "learning_rate": 9.940752256718936e-06, + "loss": 0.005, + "step": 44950 + }, + { + "epoch": 0.2883594744622923, + "grad_norm": 0.19088926911354065, + "learning_rate": 9.940666317794337e-06, + "loss": 0.0073, + "step": 44960 + }, + { + "epoch": 0.2884236113560784, + "grad_norm": 0.3547663986682892, + "learning_rate": 9.94058031695966e-06, + "loss": 0.005, + "step": 44970 + }, + { + "epoch": 0.2884877482498645, + "grad_norm": 0.38969674706459045, + "learning_rate": 9.94049425421598e-06, + "loss": 0.0061, + "step": 44980 + }, + { + "epoch": 0.2885518851436506, + "grad_norm": 0.25331956148147583, + "learning_rate": 9.940408129564375e-06, + "loss": 0.0054, + "step": 44990 + }, + { + "epoch": 0.2886160220374367, + "grad_norm": 0.22191715240478516, + "learning_rate": 9.940321943005927e-06, + "loss": 0.0066, + "step": 45000 + }, + { + "epoch": 0.2886801589312228, + "grad_norm": 0.264688640832901, + "learning_rate": 9.940235694541712e-06, + "loss": 0.0068, + "step": 45010 + }, + { + "epoch": 0.2887442958250089, + "grad_norm": 0.3253937363624573, + "learning_rate": 9.940149384172815e-06, + "loss": 0.0056, + "step": 45020 + }, + { + "epoch": 0.28880843271879497, + "grad_norm": 0.6022482514381409, + "learning_rate": 9.940063011900314e-06, + "loss": 0.0065, + "step": 45030 + }, + { + "epoch": 0.2888725696125811, + "grad_norm": 0.21916916966438293, + "learning_rate": 9.939976577725294e-06, + "loss": 0.0043, + "step": 45040 + }, + { + "epoch": 0.2889367065063672, + "grad_norm": 0.13018429279327393, + "learning_rate": 9.939890081648837e-06, + "loss": 0.0047, + "step": 45050 + }, + { + "epoch": 0.2890008434001533, + "grad_norm": 0.2375756800174713, + "learning_rate": 9.939803523672027e-06, + "loss": 0.0067, + "step": 45060 + }, + { + "epoch": 0.2890649802939394, + "grad_norm": 0.049471210688352585, + "learning_rate": 9.939716903795947e-06, + "loss": 0.0047, + "step": 45070 + }, + { + "epoch": 0.2891291171877255, + "grad_norm": 0.09005885571241379, + "learning_rate": 9.939630222021685e-06, + "loss": 0.0045, + "step": 45080 + }, + { + "epoch": 0.2891932540815116, + "grad_norm": 0.23219124972820282, + "learning_rate": 9.939543478350327e-06, + "loss": 0.0037, + "step": 45090 + }, + { + "epoch": 0.2892573909752977, + "grad_norm": 0.2335500717163086, + "learning_rate": 9.939456672782957e-06, + "loss": 0.0038, + "step": 45100 + }, + { + "epoch": 0.28932152786908377, + "grad_norm": 0.20651297271251678, + "learning_rate": 9.939369805320664e-06, + "loss": 0.0043, + "step": 45110 + }, + { + "epoch": 0.28938566476286987, + "grad_norm": 0.30232393741607666, + "learning_rate": 9.93928287596454e-06, + "loss": 0.0057, + "step": 45120 + }, + { + "epoch": 0.28944980165665596, + "grad_norm": 0.32386380434036255, + "learning_rate": 9.939195884715669e-06, + "loss": 0.0055, + "step": 45130 + }, + { + "epoch": 0.28951393855044205, + "grad_norm": 0.1387457400560379, + "learning_rate": 9.939108831575144e-06, + "loss": 0.005, + "step": 45140 + }, + { + "epoch": 0.28957807544422814, + "grad_norm": 0.15047401189804077, + "learning_rate": 9.939021716544057e-06, + "loss": 0.0038, + "step": 45150 + }, + { + "epoch": 0.28964221233801424, + "grad_norm": 0.2504582107067108, + "learning_rate": 9.938934539623497e-06, + "loss": 0.006, + "step": 45160 + }, + { + "epoch": 0.28970634923180033, + "grad_norm": 0.0815175250172615, + "learning_rate": 9.938847300814558e-06, + "loss": 0.0078, + "step": 45170 + }, + { + "epoch": 0.2897704861255865, + "grad_norm": 0.11432450264692307, + "learning_rate": 9.938760000118333e-06, + "loss": 0.0037, + "step": 45180 + }, + { + "epoch": 0.2898346230193726, + "grad_norm": 0.4249918460845947, + "learning_rate": 9.938672637535913e-06, + "loss": 0.0045, + "step": 45190 + }, + { + "epoch": 0.28989875991315867, + "grad_norm": 0.2470531463623047, + "learning_rate": 9.938585213068398e-06, + "loss": 0.0034, + "step": 45200 + }, + { + "epoch": 0.28996289680694476, + "grad_norm": 0.15708892047405243, + "learning_rate": 9.938497726716879e-06, + "loss": 0.0035, + "step": 45210 + }, + { + "epoch": 0.29002703370073085, + "grad_norm": 0.2380772829055786, + "learning_rate": 9.938410178482455e-06, + "loss": 0.0047, + "step": 45220 + }, + { + "epoch": 0.29009117059451694, + "grad_norm": 0.21689799427986145, + "learning_rate": 9.93832256836622e-06, + "loss": 0.0069, + "step": 45230 + }, + { + "epoch": 0.29015530748830304, + "grad_norm": 0.15360818803310394, + "learning_rate": 9.938234896369276e-06, + "loss": 0.0048, + "step": 45240 + }, + { + "epoch": 0.29021944438208913, + "grad_norm": 0.1445130854845047, + "learning_rate": 9.93814716249272e-06, + "loss": 0.0046, + "step": 45250 + }, + { + "epoch": 0.2902835812758752, + "grad_norm": 0.21403834223747253, + "learning_rate": 9.93805936673765e-06, + "loss": 0.0041, + "step": 45260 + }, + { + "epoch": 0.2903477181696613, + "grad_norm": 0.29812654852867126, + "learning_rate": 9.937971509105166e-06, + "loss": 0.0068, + "step": 45270 + }, + { + "epoch": 0.2904118550634474, + "grad_norm": 0.08340287953615189, + "learning_rate": 9.93788358959637e-06, + "loss": 0.0063, + "step": 45280 + }, + { + "epoch": 0.2904759919572335, + "grad_norm": 0.21106629073619843, + "learning_rate": 9.937795608212367e-06, + "loss": 0.0092, + "step": 45290 + }, + { + "epoch": 0.2905401288510196, + "grad_norm": 0.06347090005874634, + "learning_rate": 9.937707564954251e-06, + "loss": 0.0082, + "step": 45300 + }, + { + "epoch": 0.2906042657448057, + "grad_norm": 0.2607266306877136, + "learning_rate": 9.937619459823133e-06, + "loss": 0.0048, + "step": 45310 + }, + { + "epoch": 0.2906684026385918, + "grad_norm": 0.13297231495380402, + "learning_rate": 9.937531292820114e-06, + "loss": 0.0063, + "step": 45320 + }, + { + "epoch": 0.29073253953237793, + "grad_norm": 0.3041931092739105, + "learning_rate": 9.9374430639463e-06, + "loss": 0.0048, + "step": 45330 + }, + { + "epoch": 0.290796676426164, + "grad_norm": 0.2948283553123474, + "learning_rate": 9.937354773202792e-06, + "loss": 0.005, + "step": 45340 + }, + { + "epoch": 0.2908608133199501, + "grad_norm": 0.4528610110282898, + "learning_rate": 9.937266420590702e-06, + "loss": 0.0056, + "step": 45350 + }, + { + "epoch": 0.2909249502137362, + "grad_norm": 0.2601458728313446, + "learning_rate": 9.937178006111138e-06, + "loss": 0.005, + "step": 45360 + }, + { + "epoch": 0.2909890871075223, + "grad_norm": 0.15300409495830536, + "learning_rate": 9.937089529765203e-06, + "loss": 0.0036, + "step": 45370 + }, + { + "epoch": 0.2910532240013084, + "grad_norm": 0.24543161690235138, + "learning_rate": 9.937000991554007e-06, + "loss": 0.0064, + "step": 45380 + }, + { + "epoch": 0.2911173608950945, + "grad_norm": 0.18194890022277832, + "learning_rate": 9.93691239147866e-06, + "loss": 0.0047, + "step": 45390 + }, + { + "epoch": 0.2911814977888806, + "grad_norm": 0.3864964246749878, + "learning_rate": 9.936823729540274e-06, + "loss": 0.008, + "step": 45400 + }, + { + "epoch": 0.2912456346826667, + "grad_norm": 0.014018191024661064, + "learning_rate": 9.936735005739958e-06, + "loss": 0.0046, + "step": 45410 + }, + { + "epoch": 0.29130977157645277, + "grad_norm": 0.29989856481552124, + "learning_rate": 9.936646220078823e-06, + "loss": 0.0059, + "step": 45420 + }, + { + "epoch": 0.29137390847023886, + "grad_norm": 0.4319022297859192, + "learning_rate": 9.936557372557982e-06, + "loss": 0.0051, + "step": 45430 + }, + { + "epoch": 0.29143804536402496, + "grad_norm": 0.25548070669174194, + "learning_rate": 9.93646846317855e-06, + "loss": 0.0041, + "step": 45440 + }, + { + "epoch": 0.29150218225781105, + "grad_norm": 0.1536119133234024, + "learning_rate": 9.936379491941641e-06, + "loss": 0.005, + "step": 45450 + }, + { + "epoch": 0.29156631915159714, + "grad_norm": 0.17052310705184937, + "learning_rate": 9.936290458848367e-06, + "loss": 0.005, + "step": 45460 + }, + { + "epoch": 0.2916304560453833, + "grad_norm": 0.7218708992004395, + "learning_rate": 9.93620136389985e-06, + "loss": 0.0033, + "step": 45470 + }, + { + "epoch": 0.2916945929391694, + "grad_norm": 0.2695336639881134, + "learning_rate": 9.936112207097197e-06, + "loss": 0.0031, + "step": 45480 + }, + { + "epoch": 0.2917587298329555, + "grad_norm": 0.26749128103256226, + "learning_rate": 9.936022988441533e-06, + "loss": 0.0056, + "step": 45490 + }, + { + "epoch": 0.29182286672674157, + "grad_norm": 0.3378068208694458, + "learning_rate": 9.935933707933972e-06, + "loss": 0.0059, + "step": 45500 + }, + { + "epoch": 0.29188700362052766, + "grad_norm": 0.2979477345943451, + "learning_rate": 9.935844365575635e-06, + "loss": 0.0055, + "step": 45510 + }, + { + "epoch": 0.29195114051431376, + "grad_norm": 0.22918939590454102, + "learning_rate": 9.93575496136764e-06, + "loss": 0.0087, + "step": 45520 + }, + { + "epoch": 0.29201527740809985, + "grad_norm": 0.6446087956428528, + "learning_rate": 9.935665495311108e-06, + "loss": 0.0098, + "step": 45530 + }, + { + "epoch": 0.29207941430188594, + "grad_norm": 0.2776643633842468, + "learning_rate": 9.93557596740716e-06, + "loss": 0.0055, + "step": 45540 + }, + { + "epoch": 0.29214355119567204, + "grad_norm": 0.18786029517650604, + "learning_rate": 9.935486377656917e-06, + "loss": 0.0047, + "step": 45550 + }, + { + "epoch": 0.29220768808945813, + "grad_norm": 0.45293381810188293, + "learning_rate": 9.935396726061503e-06, + "loss": 0.0076, + "step": 45560 + }, + { + "epoch": 0.2922718249832442, + "grad_norm": 0.17026039958000183, + "learning_rate": 9.93530701262204e-06, + "loss": 0.0037, + "step": 45570 + }, + { + "epoch": 0.2923359618770303, + "grad_norm": 0.19190262258052826, + "learning_rate": 9.935217237339654e-06, + "loss": 0.004, + "step": 45580 + }, + { + "epoch": 0.2924000987708164, + "grad_norm": 0.08137141168117523, + "learning_rate": 9.935127400215468e-06, + "loss": 0.0073, + "step": 45590 + }, + { + "epoch": 0.2924642356646025, + "grad_norm": 0.105504609644413, + "learning_rate": 9.935037501250608e-06, + "loss": 0.004, + "step": 45600 + }, + { + "epoch": 0.29252837255838865, + "grad_norm": 0.18592245876789093, + "learning_rate": 9.934947540446203e-06, + "loss": 0.0057, + "step": 45610 + }, + { + "epoch": 0.29259250945217474, + "grad_norm": 0.3530644476413727, + "learning_rate": 9.934857517803376e-06, + "loss": 0.0069, + "step": 45620 + }, + { + "epoch": 0.29265664634596084, + "grad_norm": 0.2561638355255127, + "learning_rate": 9.93476743332326e-06, + "loss": 0.0043, + "step": 45630 + }, + { + "epoch": 0.29272078323974693, + "grad_norm": 0.08145124465227127, + "learning_rate": 9.934677287006979e-06, + "loss": 0.0049, + "step": 45640 + }, + { + "epoch": 0.292784920133533, + "grad_norm": 0.26823779940605164, + "learning_rate": 9.934587078855666e-06, + "loss": 0.0052, + "step": 45650 + }, + { + "epoch": 0.2928490570273191, + "grad_norm": 0.21624739468097687, + "learning_rate": 9.93449680887045e-06, + "loss": 0.0056, + "step": 45660 + }, + { + "epoch": 0.2929131939211052, + "grad_norm": 0.1595057100057602, + "learning_rate": 9.934406477052463e-06, + "loss": 0.0056, + "step": 45670 + }, + { + "epoch": 0.2929773308148913, + "grad_norm": 0.09059811383485794, + "learning_rate": 9.934316083402834e-06, + "loss": 0.005, + "step": 45680 + }, + { + "epoch": 0.2930414677086774, + "grad_norm": 0.24553117156028748, + "learning_rate": 9.9342256279227e-06, + "loss": 0.0051, + "step": 45690 + }, + { + "epoch": 0.2931056046024635, + "grad_norm": 0.2377704530954361, + "learning_rate": 9.934135110613193e-06, + "loss": 0.0055, + "step": 45700 + }, + { + "epoch": 0.2931697414962496, + "grad_norm": 0.16822881996631622, + "learning_rate": 9.934044531475446e-06, + "loss": 0.0066, + "step": 45710 + }, + { + "epoch": 0.2932338783900357, + "grad_norm": 0.049672652035951614, + "learning_rate": 9.933953890510594e-06, + "loss": 0.0026, + "step": 45720 + }, + { + "epoch": 0.29329801528382177, + "grad_norm": 0.22833536565303802, + "learning_rate": 9.933863187719774e-06, + "loss": 0.0029, + "step": 45730 + }, + { + "epoch": 0.29336215217760786, + "grad_norm": 0.2805958688259125, + "learning_rate": 9.933772423104122e-06, + "loss": 0.0038, + "step": 45740 + }, + { + "epoch": 0.293426289071394, + "grad_norm": 0.14065445959568024, + "learning_rate": 9.933681596664778e-06, + "loss": 0.0047, + "step": 45750 + }, + { + "epoch": 0.2934904259651801, + "grad_norm": 0.3495698571205139, + "learning_rate": 9.933590708402873e-06, + "loss": 0.0055, + "step": 45760 + }, + { + "epoch": 0.2935545628589662, + "grad_norm": 0.21391554176807404, + "learning_rate": 9.933499758319554e-06, + "loss": 0.004, + "step": 45770 + }, + { + "epoch": 0.2936186997527523, + "grad_norm": 0.17593559622764587, + "learning_rate": 9.933408746415954e-06, + "loss": 0.0048, + "step": 45780 + }, + { + "epoch": 0.2936828366465384, + "grad_norm": 0.20354965329170227, + "learning_rate": 9.93331767269322e-06, + "loss": 0.0051, + "step": 45790 + }, + { + "epoch": 0.2937469735403245, + "grad_norm": 0.2857542335987091, + "learning_rate": 9.933226537152487e-06, + "loss": 0.0045, + "step": 45800 + }, + { + "epoch": 0.29381111043411057, + "grad_norm": 0.2183580994606018, + "learning_rate": 9.9331353397949e-06, + "loss": 0.0033, + "step": 45810 + }, + { + "epoch": 0.29387524732789666, + "grad_norm": 0.25809770822525024, + "learning_rate": 9.933044080621602e-06, + "loss": 0.0054, + "step": 45820 + }, + { + "epoch": 0.29393938422168275, + "grad_norm": 0.136774942278862, + "learning_rate": 9.932952759633736e-06, + "loss": 0.0035, + "step": 45830 + }, + { + "epoch": 0.29400352111546885, + "grad_norm": 0.2631630599498749, + "learning_rate": 9.932861376832449e-06, + "loss": 0.0041, + "step": 45840 + }, + { + "epoch": 0.29406765800925494, + "grad_norm": 0.39657431840896606, + "learning_rate": 9.932769932218879e-06, + "loss": 0.0068, + "step": 45850 + }, + { + "epoch": 0.29413179490304103, + "grad_norm": 0.23736067116260529, + "learning_rate": 9.93267842579418e-06, + "loss": 0.0037, + "step": 45860 + }, + { + "epoch": 0.2941959317968271, + "grad_norm": 0.1151050254702568, + "learning_rate": 9.932586857559492e-06, + "loss": 0.0035, + "step": 45870 + }, + { + "epoch": 0.2942600686906132, + "grad_norm": 0.1694098711013794, + "learning_rate": 9.932495227515968e-06, + "loss": 0.005, + "step": 45880 + }, + { + "epoch": 0.29432420558439937, + "grad_norm": 0.2936653792858124, + "learning_rate": 9.932403535664752e-06, + "loss": 0.0045, + "step": 45890 + }, + { + "epoch": 0.29438834247818546, + "grad_norm": 0.05786910280585289, + "learning_rate": 9.932311782006995e-06, + "loss": 0.0059, + "step": 45900 + }, + { + "epoch": 0.29445247937197155, + "grad_norm": 0.43607258796691895, + "learning_rate": 9.932219966543846e-06, + "loss": 0.0059, + "step": 45910 + }, + { + "epoch": 0.29451661626575765, + "grad_norm": 0.3279356360435486, + "learning_rate": 9.932128089276455e-06, + "loss": 0.0041, + "step": 45920 + }, + { + "epoch": 0.29458075315954374, + "grad_norm": 0.22652366757392883, + "learning_rate": 9.932036150205976e-06, + "loss": 0.0049, + "step": 45930 + }, + { + "epoch": 0.29464489005332983, + "grad_norm": 0.4583686888217926, + "learning_rate": 9.93194414933356e-06, + "loss": 0.0063, + "step": 45940 + }, + { + "epoch": 0.2947090269471159, + "grad_norm": 0.36138418316841125, + "learning_rate": 9.931852086660357e-06, + "loss": 0.005, + "step": 45950 + }, + { + "epoch": 0.294773163840902, + "grad_norm": 0.27291011810302734, + "learning_rate": 9.931759962187524e-06, + "loss": 0.0057, + "step": 45960 + }, + { + "epoch": 0.2948373007346881, + "grad_norm": 0.14691214263439178, + "learning_rate": 9.931667775916212e-06, + "loss": 0.0055, + "step": 45970 + }, + { + "epoch": 0.2949014376284742, + "grad_norm": 0.2556776702404022, + "learning_rate": 9.931575527847578e-06, + "loss": 0.0064, + "step": 45980 + }, + { + "epoch": 0.2949655745222603, + "grad_norm": 0.17870935797691345, + "learning_rate": 9.93148321798278e-06, + "loss": 0.0034, + "step": 45990 + }, + { + "epoch": 0.2950297114160464, + "grad_norm": 0.17593887448310852, + "learning_rate": 9.931390846322973e-06, + "loss": 0.0063, + "step": 46000 + }, + { + "epoch": 0.2950938483098325, + "grad_norm": 0.1608942449092865, + "learning_rate": 9.931298412869314e-06, + "loss": 0.0054, + "step": 46010 + }, + { + "epoch": 0.2951579852036186, + "grad_norm": 0.1781020164489746, + "learning_rate": 9.93120591762296e-06, + "loss": 0.0046, + "step": 46020 + }, + { + "epoch": 0.2952221220974047, + "grad_norm": 0.19401803612709045, + "learning_rate": 9.931113360585073e-06, + "loss": 0.0043, + "step": 46030 + }, + { + "epoch": 0.2952862589911908, + "grad_norm": 0.24899917840957642, + "learning_rate": 9.931020741756811e-06, + "loss": 0.0038, + "step": 46040 + }, + { + "epoch": 0.2953503958849769, + "grad_norm": 0.31500309705734253, + "learning_rate": 9.930928061139334e-06, + "loss": 0.0046, + "step": 46050 + }, + { + "epoch": 0.295414532778763, + "grad_norm": 0.12831856310367584, + "learning_rate": 9.930835318733806e-06, + "loss": 0.0032, + "step": 46060 + }, + { + "epoch": 0.2954786696725491, + "grad_norm": 0.32607731223106384, + "learning_rate": 9.930742514541387e-06, + "loss": 0.0077, + "step": 46070 + }, + { + "epoch": 0.2955428065663352, + "grad_norm": 0.42799004912376404, + "learning_rate": 9.93064964856324e-06, + "loss": 0.0067, + "step": 46080 + }, + { + "epoch": 0.2956069434601213, + "grad_norm": 0.20071062445640564, + "learning_rate": 9.930556720800527e-06, + "loss": 0.0049, + "step": 46090 + }, + { + "epoch": 0.2956710803539074, + "grad_norm": 0.1952655017375946, + "learning_rate": 9.930463731254419e-06, + "loss": 0.0063, + "step": 46100 + }, + { + "epoch": 0.29573521724769347, + "grad_norm": 0.1607760787010193, + "learning_rate": 9.930370679926073e-06, + "loss": 0.0036, + "step": 46110 + }, + { + "epoch": 0.29579935414147956, + "grad_norm": 0.3157738149166107, + "learning_rate": 9.93027756681666e-06, + "loss": 0.0051, + "step": 46120 + }, + { + "epoch": 0.29586349103526566, + "grad_norm": 0.06405570358037949, + "learning_rate": 9.930184391927344e-06, + "loss": 0.0025, + "step": 46130 + }, + { + "epoch": 0.29592762792905175, + "grad_norm": 0.2306731790304184, + "learning_rate": 9.930091155259296e-06, + "loss": 0.0053, + "step": 46140 + }, + { + "epoch": 0.29599176482283784, + "grad_norm": 0.410203754901886, + "learning_rate": 9.929997856813682e-06, + "loss": 0.0061, + "step": 46150 + }, + { + "epoch": 0.29605590171662394, + "grad_norm": 0.2868666350841522, + "learning_rate": 9.929904496591672e-06, + "loss": 0.0087, + "step": 46160 + }, + { + "epoch": 0.2961200386104101, + "grad_norm": 0.367072731256485, + "learning_rate": 9.929811074594434e-06, + "loss": 0.0046, + "step": 46170 + }, + { + "epoch": 0.2961841755041962, + "grad_norm": 0.19694176316261292, + "learning_rate": 9.92971759082314e-06, + "loss": 0.0042, + "step": 46180 + }, + { + "epoch": 0.29624831239798227, + "grad_norm": 0.17494481801986694, + "learning_rate": 9.929624045278962e-06, + "loss": 0.0043, + "step": 46190 + }, + { + "epoch": 0.29631244929176837, + "grad_norm": 0.3938837945461273, + "learning_rate": 9.92953043796307e-06, + "loss": 0.0029, + "step": 46200 + }, + { + "epoch": 0.29637658618555446, + "grad_norm": 0.39776164293289185, + "learning_rate": 9.929436768876642e-06, + "loss": 0.0039, + "step": 46210 + }, + { + "epoch": 0.29644072307934055, + "grad_norm": 0.2527408301830292, + "learning_rate": 9.929343038020845e-06, + "loss": 0.0038, + "step": 46220 + }, + { + "epoch": 0.29650485997312664, + "grad_norm": 0.25211331248283386, + "learning_rate": 9.929249245396858e-06, + "loss": 0.005, + "step": 46230 + }, + { + "epoch": 0.29656899686691274, + "grad_norm": 0.17463168501853943, + "learning_rate": 9.929155391005857e-06, + "loss": 0.0052, + "step": 46240 + }, + { + "epoch": 0.29663313376069883, + "grad_norm": 0.26096269488334656, + "learning_rate": 9.929061474849013e-06, + "loss": 0.0045, + "step": 46250 + }, + { + "epoch": 0.2966972706544849, + "grad_norm": 0.05219439044594765, + "learning_rate": 9.928967496927507e-06, + "loss": 0.0036, + "step": 46260 + }, + { + "epoch": 0.296761407548271, + "grad_norm": 0.17849355936050415, + "learning_rate": 9.928873457242515e-06, + "loss": 0.0069, + "step": 46270 + }, + { + "epoch": 0.2968255444420571, + "grad_norm": 0.2509777247905731, + "learning_rate": 9.928779355795217e-06, + "loss": 0.0052, + "step": 46280 + }, + { + "epoch": 0.2968896813358432, + "grad_norm": 0.7019671201705933, + "learning_rate": 9.92868519258679e-06, + "loss": 0.0044, + "step": 46290 + }, + { + "epoch": 0.2969538182296293, + "grad_norm": 0.2720184922218323, + "learning_rate": 9.928590967618417e-06, + "loss": 0.0103, + "step": 46300 + }, + { + "epoch": 0.29701795512341544, + "grad_norm": 0.17571432888507843, + "learning_rate": 9.928496680891276e-06, + "loss": 0.0037, + "step": 46310 + }, + { + "epoch": 0.29708209201720154, + "grad_norm": 0.28019189834594727, + "learning_rate": 9.928402332406549e-06, + "loss": 0.0108, + "step": 46320 + }, + { + "epoch": 0.29714622891098763, + "grad_norm": 0.24343018233776093, + "learning_rate": 9.928307922165417e-06, + "loss": 0.0067, + "step": 46330 + }, + { + "epoch": 0.2972103658047737, + "grad_norm": 0.40319544076919556, + "learning_rate": 9.928213450169066e-06, + "loss": 0.0045, + "step": 46340 + }, + { + "epoch": 0.2972745026985598, + "grad_norm": 0.23438304662704468, + "learning_rate": 9.92811891641868e-06, + "loss": 0.0085, + "step": 46350 + }, + { + "epoch": 0.2973386395923459, + "grad_norm": 0.23929394781589508, + "learning_rate": 9.928024320915438e-06, + "loss": 0.0087, + "step": 46360 + }, + { + "epoch": 0.297402776486132, + "grad_norm": 0.1507410705089569, + "learning_rate": 9.927929663660532e-06, + "loss": 0.003, + "step": 46370 + }, + { + "epoch": 0.2974669133799181, + "grad_norm": 0.38207119703292847, + "learning_rate": 9.927834944655144e-06, + "loss": 0.0036, + "step": 46380 + }, + { + "epoch": 0.2975310502737042, + "grad_norm": 0.24152018129825592, + "learning_rate": 9.927740163900463e-06, + "loss": 0.0072, + "step": 46390 + }, + { + "epoch": 0.2975951871674903, + "grad_norm": 0.43097245693206787, + "learning_rate": 9.927645321397676e-06, + "loss": 0.0073, + "step": 46400 + }, + { + "epoch": 0.2976593240612764, + "grad_norm": 0.25575006008148193, + "learning_rate": 9.927550417147971e-06, + "loss": 0.006, + "step": 46410 + }, + { + "epoch": 0.29772346095506247, + "grad_norm": 0.26496264338493347, + "learning_rate": 9.92745545115254e-06, + "loss": 0.0083, + "step": 46420 + }, + { + "epoch": 0.29778759784884856, + "grad_norm": 0.18561311066150665, + "learning_rate": 9.927360423412566e-06, + "loss": 0.0056, + "step": 46430 + }, + { + "epoch": 0.29785173474263466, + "grad_norm": 0.3656439781188965, + "learning_rate": 9.927265333929248e-06, + "loss": 0.0033, + "step": 46440 + }, + { + "epoch": 0.2979158716364208, + "grad_norm": 0.17973309755325317, + "learning_rate": 9.927170182703772e-06, + "loss": 0.0109, + "step": 46450 + }, + { + "epoch": 0.2979800085302069, + "grad_norm": 0.12009453773498535, + "learning_rate": 9.927074969737334e-06, + "loss": 0.0045, + "step": 46460 + }, + { + "epoch": 0.298044145423993, + "grad_norm": 0.886565089225769, + "learning_rate": 9.926979695031126e-06, + "loss": 0.0054, + "step": 46470 + }, + { + "epoch": 0.2981082823177791, + "grad_norm": 0.07676417380571365, + "learning_rate": 9.926884358586337e-06, + "loss": 0.0037, + "step": 46480 + }, + { + "epoch": 0.2981724192115652, + "grad_norm": 0.23020076751708984, + "learning_rate": 9.926788960404169e-06, + "loss": 0.0042, + "step": 46490 + }, + { + "epoch": 0.29823655610535127, + "grad_norm": 0.2501325309276581, + "learning_rate": 9.926693500485814e-06, + "loss": 0.0075, + "step": 46500 + }, + { + "epoch": 0.29830069299913736, + "grad_norm": 0.2389826774597168, + "learning_rate": 9.926597978832467e-06, + "loss": 0.0053, + "step": 46510 + }, + { + "epoch": 0.29836482989292346, + "grad_norm": 0.0756034404039383, + "learning_rate": 9.926502395445328e-06, + "loss": 0.0044, + "step": 46520 + }, + { + "epoch": 0.29842896678670955, + "grad_norm": 0.3056093752384186, + "learning_rate": 9.926406750325591e-06, + "loss": 0.004, + "step": 46530 + }, + { + "epoch": 0.29849310368049564, + "grad_norm": 0.15931715071201324, + "learning_rate": 9.92631104347446e-06, + "loss": 0.0086, + "step": 46540 + }, + { + "epoch": 0.29855724057428173, + "grad_norm": 0.2678433954715729, + "learning_rate": 9.926215274893128e-06, + "loss": 0.0063, + "step": 46550 + }, + { + "epoch": 0.29862137746806783, + "grad_norm": 0.0851227343082428, + "learning_rate": 9.926119444582798e-06, + "loss": 0.0084, + "step": 46560 + }, + { + "epoch": 0.2986855143618539, + "grad_norm": 0.17011059820652008, + "learning_rate": 9.92602355254467e-06, + "loss": 0.006, + "step": 46570 + }, + { + "epoch": 0.29874965125564, + "grad_norm": 0.3575952351093292, + "learning_rate": 9.925927598779948e-06, + "loss": 0.0053, + "step": 46580 + }, + { + "epoch": 0.29881378814942616, + "grad_norm": 0.1385606825351715, + "learning_rate": 9.925831583289834e-06, + "loss": 0.0056, + "step": 46590 + }, + { + "epoch": 0.29887792504321226, + "grad_norm": 0.2562883198261261, + "learning_rate": 9.925735506075526e-06, + "loss": 0.0045, + "step": 46600 + }, + { + "epoch": 0.29894206193699835, + "grad_norm": 0.1414983868598938, + "learning_rate": 9.925639367138235e-06, + "loss": 0.0042, + "step": 46610 + }, + { + "epoch": 0.29900619883078444, + "grad_norm": 0.5601732730865479, + "learning_rate": 9.925543166479162e-06, + "loss": 0.0057, + "step": 46620 + }, + { + "epoch": 0.29907033572457054, + "grad_norm": 0.2273009717464447, + "learning_rate": 9.925446904099511e-06, + "loss": 0.006, + "step": 46630 + }, + { + "epoch": 0.29913447261835663, + "grad_norm": 0.3177121579647064, + "learning_rate": 9.925350580000493e-06, + "loss": 0.0045, + "step": 46640 + }, + { + "epoch": 0.2991986095121427, + "grad_norm": 0.8014094233512878, + "learning_rate": 9.92525419418331e-06, + "loss": 0.0049, + "step": 46650 + }, + { + "epoch": 0.2992627464059288, + "grad_norm": 0.26341408491134644, + "learning_rate": 9.92515774664917e-06, + "loss": 0.0076, + "step": 46660 + }, + { + "epoch": 0.2993268832997149, + "grad_norm": 0.43867725133895874, + "learning_rate": 9.925061237399287e-06, + "loss": 0.0052, + "step": 46670 + }, + { + "epoch": 0.299391020193501, + "grad_norm": 0.1924295425415039, + "learning_rate": 9.924964666434866e-06, + "loss": 0.0054, + "step": 46680 + }, + { + "epoch": 0.2994551570872871, + "grad_norm": 0.5385870337486267, + "learning_rate": 9.924868033757119e-06, + "loss": 0.0079, + "step": 46690 + }, + { + "epoch": 0.2995192939810732, + "grad_norm": 0.28352195024490356, + "learning_rate": 9.924771339367253e-06, + "loss": 0.005, + "step": 46700 + }, + { + "epoch": 0.2995834308748593, + "grad_norm": 0.2408374845981598, + "learning_rate": 9.924674583266483e-06, + "loss": 0.0045, + "step": 46710 + }, + { + "epoch": 0.2996475677686454, + "grad_norm": 0.12736321985721588, + "learning_rate": 9.924577765456023e-06, + "loss": 0.0051, + "step": 46720 + }, + { + "epoch": 0.29971170466243147, + "grad_norm": 0.12144535034894943, + "learning_rate": 9.924480885937082e-06, + "loss": 0.0059, + "step": 46730 + }, + { + "epoch": 0.2997758415562176, + "grad_norm": 0.09012685716152191, + "learning_rate": 9.924383944710875e-06, + "loss": 0.0039, + "step": 46740 + }, + { + "epoch": 0.2998399784500037, + "grad_norm": 0.14406102895736694, + "learning_rate": 9.92428694177862e-06, + "loss": 0.0042, + "step": 46750 + }, + { + "epoch": 0.2999041153437898, + "grad_norm": 0.05325210839509964, + "learning_rate": 9.92418987714153e-06, + "loss": 0.0039, + "step": 46760 + }, + { + "epoch": 0.2999682522375759, + "grad_norm": 0.19741320610046387, + "learning_rate": 9.924092750800823e-06, + "loss": 0.0049, + "step": 46770 + }, + { + "epoch": 0.300032389131362, + "grad_norm": 0.15774212777614594, + "learning_rate": 9.92399556275771e-06, + "loss": 0.005, + "step": 46780 + }, + { + "epoch": 0.3000965260251481, + "grad_norm": 0.273052453994751, + "learning_rate": 9.923898313013419e-06, + "loss": 0.0058, + "step": 46790 + }, + { + "epoch": 0.3001606629189342, + "grad_norm": 0.15874101221561432, + "learning_rate": 9.92380100156916e-06, + "loss": 0.0044, + "step": 46800 + }, + { + "epoch": 0.30022479981272027, + "grad_norm": 0.11979794502258301, + "learning_rate": 9.923703628426155e-06, + "loss": 0.0041, + "step": 46810 + }, + { + "epoch": 0.30028893670650636, + "grad_norm": 0.1807662546634674, + "learning_rate": 9.923606193585627e-06, + "loss": 0.005, + "step": 46820 + }, + { + "epoch": 0.30035307360029245, + "grad_norm": 0.3019179701805115, + "learning_rate": 9.923508697048792e-06, + "loss": 0.0092, + "step": 46830 + }, + { + "epoch": 0.30041721049407855, + "grad_norm": 0.2532137930393219, + "learning_rate": 9.923411138816876e-06, + "loss": 0.0072, + "step": 46840 + }, + { + "epoch": 0.30048134738786464, + "grad_norm": 0.2682390511035919, + "learning_rate": 9.923313518891099e-06, + "loss": 0.0069, + "step": 46850 + }, + { + "epoch": 0.30054548428165073, + "grad_norm": 0.2450132519006729, + "learning_rate": 9.923215837272684e-06, + "loss": 0.0052, + "step": 46860 + }, + { + "epoch": 0.3006096211754368, + "grad_norm": 0.24705521762371063, + "learning_rate": 9.923118093962858e-06, + "loss": 0.0047, + "step": 46870 + }, + { + "epoch": 0.300673758069223, + "grad_norm": 0.08048294484615326, + "learning_rate": 9.923020288962843e-06, + "loss": 0.0068, + "step": 46880 + }, + { + "epoch": 0.30073789496300907, + "grad_norm": 0.13034740090370178, + "learning_rate": 9.922922422273866e-06, + "loss": 0.0052, + "step": 46890 + }, + { + "epoch": 0.30080203185679516, + "grad_norm": 0.20540949702262878, + "learning_rate": 9.922824493897153e-06, + "loss": 0.0056, + "step": 46900 + }, + { + "epoch": 0.30086616875058125, + "grad_norm": 0.28085941076278687, + "learning_rate": 9.922726503833928e-06, + "loss": 0.0048, + "step": 46910 + }, + { + "epoch": 0.30093030564436735, + "grad_norm": 0.25790196657180786, + "learning_rate": 9.922628452085423e-06, + "loss": 0.0042, + "step": 46920 + }, + { + "epoch": 0.30099444253815344, + "grad_norm": 0.21742090582847595, + "learning_rate": 9.922530338652867e-06, + "loss": 0.007, + "step": 46930 + }, + { + "epoch": 0.30105857943193953, + "grad_norm": 0.19471445679664612, + "learning_rate": 9.922432163537486e-06, + "loss": 0.0051, + "step": 46940 + }, + { + "epoch": 0.3011227163257256, + "grad_norm": 0.109901562333107, + "learning_rate": 9.922333926740513e-06, + "loss": 0.0036, + "step": 46950 + }, + { + "epoch": 0.3011868532195117, + "grad_norm": 0.24060367047786713, + "learning_rate": 9.922235628263177e-06, + "loss": 0.0051, + "step": 46960 + }, + { + "epoch": 0.3012509901132978, + "grad_norm": 0.09304339438676834, + "learning_rate": 9.922137268106711e-06, + "loss": 0.0045, + "step": 46970 + }, + { + "epoch": 0.3013151270070839, + "grad_norm": 0.3272383511066437, + "learning_rate": 9.922038846272347e-06, + "loss": 0.0069, + "step": 46980 + }, + { + "epoch": 0.30137926390087, + "grad_norm": 0.2325884848833084, + "learning_rate": 9.92194036276132e-06, + "loss": 0.0043, + "step": 46990 + }, + { + "epoch": 0.3014434007946561, + "grad_norm": 0.17553608119487762, + "learning_rate": 9.92184181757486e-06, + "loss": 0.0059, + "step": 47000 + }, + { + "epoch": 0.3015075376884422, + "grad_norm": 0.15212492644786835, + "learning_rate": 9.921743210714207e-06, + "loss": 0.0052, + "step": 47010 + }, + { + "epoch": 0.30157167458222833, + "grad_norm": 0.5152463316917419, + "learning_rate": 9.921644542180593e-06, + "loss": 0.0074, + "step": 47020 + }, + { + "epoch": 0.3016358114760144, + "grad_norm": 0.1545516699552536, + "learning_rate": 9.921545811975254e-06, + "loss": 0.0046, + "step": 47030 + }, + { + "epoch": 0.3016999483698005, + "grad_norm": 0.25146082043647766, + "learning_rate": 9.921447020099431e-06, + "loss": 0.0056, + "step": 47040 + }, + { + "epoch": 0.3017640852635866, + "grad_norm": 0.15383067727088928, + "learning_rate": 9.921348166554359e-06, + "loss": 0.0065, + "step": 47050 + }, + { + "epoch": 0.3018282221573727, + "grad_norm": 0.361632764339447, + "learning_rate": 9.921249251341276e-06, + "loss": 0.005, + "step": 47060 + }, + { + "epoch": 0.3018923590511588, + "grad_norm": 0.09196867793798447, + "learning_rate": 9.921150274461424e-06, + "loss": 0.0088, + "step": 47070 + }, + { + "epoch": 0.3019564959449449, + "grad_norm": 0.29238617420196533, + "learning_rate": 9.921051235916042e-06, + "loss": 0.0038, + "step": 47080 + }, + { + "epoch": 0.302020632838731, + "grad_norm": 0.2663821578025818, + "learning_rate": 9.92095213570637e-06, + "loss": 0.0089, + "step": 47090 + }, + { + "epoch": 0.3020847697325171, + "grad_norm": 0.46504130959510803, + "learning_rate": 9.920852973833651e-06, + "loss": 0.0062, + "step": 47100 + }, + { + "epoch": 0.30214890662630317, + "grad_norm": 0.2274145931005478, + "learning_rate": 9.920753750299128e-06, + "loss": 0.0058, + "step": 47110 + }, + { + "epoch": 0.30221304352008926, + "grad_norm": 0.10128103941679001, + "learning_rate": 9.920654465104042e-06, + "loss": 0.0057, + "step": 47120 + }, + { + "epoch": 0.30227718041387536, + "grad_norm": 0.1892748475074768, + "learning_rate": 9.920555118249641e-06, + "loss": 0.0031, + "step": 47130 + }, + { + "epoch": 0.30234131730766145, + "grad_norm": 0.30800819396972656, + "learning_rate": 9.920455709737166e-06, + "loss": 0.0045, + "step": 47140 + }, + { + "epoch": 0.30240545420144754, + "grad_norm": 0.3005007803440094, + "learning_rate": 9.920356239567867e-06, + "loss": 0.0048, + "step": 47150 + }, + { + "epoch": 0.3024695910952337, + "grad_norm": 0.30953630805015564, + "learning_rate": 9.920256707742985e-06, + "loss": 0.0048, + "step": 47160 + }, + { + "epoch": 0.3025337279890198, + "grad_norm": 0.26445797085762024, + "learning_rate": 9.920157114263772e-06, + "loss": 0.0074, + "step": 47170 + }, + { + "epoch": 0.3025978648828059, + "grad_norm": 0.2116151750087738, + "learning_rate": 9.920057459131472e-06, + "loss": 0.0055, + "step": 47180 + }, + { + "epoch": 0.30266200177659197, + "grad_norm": 0.14083783328533173, + "learning_rate": 9.919957742347336e-06, + "loss": 0.0051, + "step": 47190 + }, + { + "epoch": 0.30272613867037806, + "grad_norm": 0.456478089094162, + "learning_rate": 9.919857963912615e-06, + "loss": 0.0069, + "step": 47200 + }, + { + "epoch": 0.30279027556416416, + "grad_norm": 0.10006704926490784, + "learning_rate": 9.919758123828558e-06, + "loss": 0.0037, + "step": 47210 + }, + { + "epoch": 0.30285441245795025, + "grad_norm": 0.05251394584774971, + "learning_rate": 9.919658222096414e-06, + "loss": 0.0026, + "step": 47220 + }, + { + "epoch": 0.30291854935173634, + "grad_norm": 0.17398954927921295, + "learning_rate": 9.919558258717436e-06, + "loss": 0.0052, + "step": 47230 + }, + { + "epoch": 0.30298268624552244, + "grad_norm": 0.07287753373384476, + "learning_rate": 9.919458233692877e-06, + "loss": 0.0023, + "step": 47240 + }, + { + "epoch": 0.30304682313930853, + "grad_norm": 0.17693035304546356, + "learning_rate": 9.919358147023992e-06, + "loss": 0.0065, + "step": 47250 + }, + { + "epoch": 0.3031109600330946, + "grad_norm": 0.06258239597082138, + "learning_rate": 9.919257998712032e-06, + "loss": 0.0048, + "step": 47260 + }, + { + "epoch": 0.3031750969268807, + "grad_norm": 0.1813821941614151, + "learning_rate": 9.919157788758253e-06, + "loss": 0.0064, + "step": 47270 + }, + { + "epoch": 0.3032392338206668, + "grad_norm": 0.12876521050930023, + "learning_rate": 9.919057517163913e-06, + "loss": 0.0058, + "step": 47280 + }, + { + "epoch": 0.3033033707144529, + "grad_norm": 0.28710272908210754, + "learning_rate": 9.918957183930267e-06, + "loss": 0.0034, + "step": 47290 + }, + { + "epoch": 0.30336750760823905, + "grad_norm": 0.22267496585845947, + "learning_rate": 9.91885678905857e-06, + "loss": 0.0053, + "step": 47300 + }, + { + "epoch": 0.30343164450202514, + "grad_norm": 0.24553196132183075, + "learning_rate": 9.918756332550084e-06, + "loss": 0.0052, + "step": 47310 + }, + { + "epoch": 0.30349578139581124, + "grad_norm": 0.13842783868312836, + "learning_rate": 9.918655814406064e-06, + "loss": 0.0057, + "step": 47320 + }, + { + "epoch": 0.30355991828959733, + "grad_norm": 0.25684648752212524, + "learning_rate": 9.918555234627773e-06, + "loss": 0.0039, + "step": 47330 + }, + { + "epoch": 0.3036240551833834, + "grad_norm": 0.11633238941431046, + "learning_rate": 9.91845459321647e-06, + "loss": 0.0045, + "step": 47340 + }, + { + "epoch": 0.3036881920771695, + "grad_norm": 0.18775558471679688, + "learning_rate": 9.918353890173414e-06, + "loss": 0.0034, + "step": 47350 + }, + { + "epoch": 0.3037523289709556, + "grad_norm": 0.14034156501293182, + "learning_rate": 9.918253125499869e-06, + "loss": 0.0046, + "step": 47360 + }, + { + "epoch": 0.3038164658647417, + "grad_norm": 0.05788616091012955, + "learning_rate": 9.918152299197097e-06, + "loss": 0.0029, + "step": 47370 + }, + { + "epoch": 0.3038806027585278, + "grad_norm": 0.1960693895816803, + "learning_rate": 9.918051411266363e-06, + "loss": 0.004, + "step": 47380 + }, + { + "epoch": 0.3039447396523139, + "grad_norm": 0.30320316553115845, + "learning_rate": 9.917950461708929e-06, + "loss": 0.0038, + "step": 47390 + }, + { + "epoch": 0.3040088765461, + "grad_norm": 0.29490649700164795, + "learning_rate": 9.917849450526061e-06, + "loss": 0.0048, + "step": 47400 + }, + { + "epoch": 0.3040730134398861, + "grad_norm": 0.20866785943508148, + "learning_rate": 9.917748377719025e-06, + "loss": 0.0056, + "step": 47410 + }, + { + "epoch": 0.30413715033367217, + "grad_norm": 0.12553279101848602, + "learning_rate": 9.917647243289087e-06, + "loss": 0.0062, + "step": 47420 + }, + { + "epoch": 0.30420128722745826, + "grad_norm": 0.48011791706085205, + "learning_rate": 9.917546047237513e-06, + "loss": 0.0049, + "step": 47430 + }, + { + "epoch": 0.3042654241212444, + "grad_norm": 0.16010598838329315, + "learning_rate": 9.917444789565576e-06, + "loss": 0.0045, + "step": 47440 + }, + { + "epoch": 0.3043295610150305, + "grad_norm": 0.05319499596953392, + "learning_rate": 9.917343470274539e-06, + "loss": 0.005, + "step": 47450 + }, + { + "epoch": 0.3043936979088166, + "grad_norm": 0.3981022834777832, + "learning_rate": 9.917242089365674e-06, + "loss": 0.0045, + "step": 47460 + }, + { + "epoch": 0.3044578348026027, + "grad_norm": 0.4476795196533203, + "learning_rate": 9.917140646840252e-06, + "loss": 0.0075, + "step": 47470 + }, + { + "epoch": 0.3045219716963888, + "grad_norm": 0.2710070013999939, + "learning_rate": 9.917039142699542e-06, + "loss": 0.0056, + "step": 47480 + }, + { + "epoch": 0.3045861085901749, + "grad_norm": 0.14953048527240753, + "learning_rate": 9.91693757694482e-06, + "loss": 0.0052, + "step": 47490 + }, + { + "epoch": 0.30465024548396097, + "grad_norm": 0.221146821975708, + "learning_rate": 9.916835949577355e-06, + "loss": 0.0061, + "step": 47500 + }, + { + "epoch": 0.30471438237774706, + "grad_norm": 0.22094900906085968, + "learning_rate": 9.91673426059842e-06, + "loss": 0.0035, + "step": 47510 + }, + { + "epoch": 0.30477851927153315, + "grad_norm": 0.16071777045726776, + "learning_rate": 9.916632510009292e-06, + "loss": 0.0048, + "step": 47520 + }, + { + "epoch": 0.30484265616531925, + "grad_norm": 0.07116349786520004, + "learning_rate": 9.916530697811244e-06, + "loss": 0.0051, + "step": 47530 + }, + { + "epoch": 0.30490679305910534, + "grad_norm": 0.17552363872528076, + "learning_rate": 9.916428824005554e-06, + "loss": 0.0053, + "step": 47540 + }, + { + "epoch": 0.30497092995289143, + "grad_norm": 0.15659548342227936, + "learning_rate": 9.916326888593498e-06, + "loss": 0.0051, + "step": 47550 + }, + { + "epoch": 0.3050350668466775, + "grad_norm": 0.15439796447753906, + "learning_rate": 9.916224891576349e-06, + "loss": 0.0034, + "step": 47560 + }, + { + "epoch": 0.3050992037404636, + "grad_norm": 0.2635513246059418, + "learning_rate": 9.91612283295539e-06, + "loss": 0.0057, + "step": 47570 + }, + { + "epoch": 0.30516334063424977, + "grad_norm": 0.42883941531181335, + "learning_rate": 9.9160207127319e-06, + "loss": 0.0099, + "step": 47580 + }, + { + "epoch": 0.30522747752803586, + "grad_norm": 0.19080328941345215, + "learning_rate": 9.915918530907155e-06, + "loss": 0.0046, + "step": 47590 + }, + { + "epoch": 0.30529161442182196, + "grad_norm": 0.12923285365104675, + "learning_rate": 9.915816287482438e-06, + "loss": 0.0041, + "step": 47600 + }, + { + "epoch": 0.30535575131560805, + "grad_norm": 0.3636232018470764, + "learning_rate": 9.915713982459029e-06, + "loss": 0.004, + "step": 47610 + }, + { + "epoch": 0.30541988820939414, + "grad_norm": 0.09407957643270493, + "learning_rate": 9.915611615838212e-06, + "loss": 0.0054, + "step": 47620 + }, + { + "epoch": 0.30548402510318023, + "grad_norm": 0.25751787424087524, + "learning_rate": 9.915509187621269e-06, + "loss": 0.0038, + "step": 47630 + }, + { + "epoch": 0.3055481619969663, + "grad_norm": 0.17965631186962128, + "learning_rate": 9.91540669780948e-06, + "loss": 0.0037, + "step": 47640 + }, + { + "epoch": 0.3056122988907524, + "grad_norm": 0.24733319878578186, + "learning_rate": 9.915304146404135e-06, + "loss": 0.0068, + "step": 47650 + }, + { + "epoch": 0.3056764357845385, + "grad_norm": 0.030508002266287804, + "learning_rate": 9.915201533406514e-06, + "loss": 0.0046, + "step": 47660 + }, + { + "epoch": 0.3057405726783246, + "grad_norm": 0.1589413732290268, + "learning_rate": 9.915098858817907e-06, + "loss": 0.003, + "step": 47670 + }, + { + "epoch": 0.3058047095721107, + "grad_norm": 0.4408036768436432, + "learning_rate": 9.914996122639596e-06, + "loss": 0.0061, + "step": 47680 + }, + { + "epoch": 0.3058688464658968, + "grad_norm": 0.18685586750507355, + "learning_rate": 9.914893324872871e-06, + "loss": 0.0028, + "step": 47690 + }, + { + "epoch": 0.3059329833596829, + "grad_norm": 0.282780259847641, + "learning_rate": 9.914790465519021e-06, + "loss": 0.0043, + "step": 47700 + }, + { + "epoch": 0.305997120253469, + "grad_norm": 0.27768674492836, + "learning_rate": 9.914687544579335e-06, + "loss": 0.0053, + "step": 47710 + }, + { + "epoch": 0.30606125714725513, + "grad_norm": 0.40035441517829895, + "learning_rate": 9.9145845620551e-06, + "loss": 0.0049, + "step": 47720 + }, + { + "epoch": 0.3061253940410412, + "grad_norm": 0.014787815511226654, + "learning_rate": 9.914481517947609e-06, + "loss": 0.0048, + "step": 47730 + }, + { + "epoch": 0.3061895309348273, + "grad_norm": 0.5116310715675354, + "learning_rate": 9.914378412258151e-06, + "loss": 0.0061, + "step": 47740 + }, + { + "epoch": 0.3062536678286134, + "grad_norm": 0.18756724894046783, + "learning_rate": 9.914275244988021e-06, + "loss": 0.003, + "step": 47750 + }, + { + "epoch": 0.3063178047223995, + "grad_norm": 0.2650206387042999, + "learning_rate": 9.91417201613851e-06, + "loss": 0.0059, + "step": 47760 + }, + { + "epoch": 0.3063819416161856, + "grad_norm": 0.24275191128253937, + "learning_rate": 9.914068725710912e-06, + "loss": 0.0057, + "step": 47770 + }, + { + "epoch": 0.3064460785099717, + "grad_norm": 0.36543434858322144, + "learning_rate": 9.91396537370652e-06, + "loss": 0.0052, + "step": 47780 + }, + { + "epoch": 0.3065102154037578, + "grad_norm": 0.18792115151882172, + "learning_rate": 9.913861960126628e-06, + "loss": 0.0038, + "step": 47790 + }, + { + "epoch": 0.3065743522975439, + "grad_norm": 0.14406542479991913, + "learning_rate": 9.913758484972536e-06, + "loss": 0.0039, + "step": 47800 + }, + { + "epoch": 0.30663848919132997, + "grad_norm": 0.12238479405641556, + "learning_rate": 9.913654948245536e-06, + "loss": 0.0034, + "step": 47810 + }, + { + "epoch": 0.30670262608511606, + "grad_norm": 0.2577309012413025, + "learning_rate": 9.913551349946931e-06, + "loss": 0.0028, + "step": 47820 + }, + { + "epoch": 0.30676676297890215, + "grad_norm": 0.18023546040058136, + "learning_rate": 9.913447690078012e-06, + "loss": 0.0056, + "step": 47830 + }, + { + "epoch": 0.30683089987268825, + "grad_norm": 0.105728380382061, + "learning_rate": 9.913343968640085e-06, + "loss": 0.0029, + "step": 47840 + }, + { + "epoch": 0.30689503676647434, + "grad_norm": 0.0686994418501854, + "learning_rate": 9.913240185634448e-06, + "loss": 0.0042, + "step": 47850 + }, + { + "epoch": 0.3069591736602605, + "grad_norm": 0.20035584270954132, + "learning_rate": 9.913136341062397e-06, + "loss": 0.0045, + "step": 47860 + }, + { + "epoch": 0.3070233105540466, + "grad_norm": 0.23853257298469543, + "learning_rate": 9.913032434925236e-06, + "loss": 0.0051, + "step": 47870 + }, + { + "epoch": 0.3070874474478327, + "grad_norm": 0.18853001296520233, + "learning_rate": 9.91292846722427e-06, + "loss": 0.0103, + "step": 47880 + }, + { + "epoch": 0.30715158434161877, + "grad_norm": 0.3605303466320038, + "learning_rate": 9.912824437960796e-06, + "loss": 0.0051, + "step": 47890 + }, + { + "epoch": 0.30721572123540486, + "grad_norm": 0.06613648682832718, + "learning_rate": 9.912720347136122e-06, + "loss": 0.0043, + "step": 47900 + }, + { + "epoch": 0.30727985812919095, + "grad_norm": 0.1732809990644455, + "learning_rate": 9.912616194751553e-06, + "loss": 0.0051, + "step": 47910 + }, + { + "epoch": 0.30734399502297705, + "grad_norm": 0.13078118860721588, + "learning_rate": 9.912511980808388e-06, + "loss": 0.0061, + "step": 47920 + }, + { + "epoch": 0.30740813191676314, + "grad_norm": 0.25918957591056824, + "learning_rate": 9.912407705307941e-06, + "loss": 0.0038, + "step": 47930 + }, + { + "epoch": 0.30747226881054923, + "grad_norm": 0.36120375990867615, + "learning_rate": 9.91230336825151e-06, + "loss": 0.005, + "step": 47940 + }, + { + "epoch": 0.3075364057043353, + "grad_norm": 0.0602896548807621, + "learning_rate": 9.912198969640412e-06, + "loss": 0.0026, + "step": 47950 + }, + { + "epoch": 0.3076005425981214, + "grad_norm": 0.12653125822544098, + "learning_rate": 9.912094509475947e-06, + "loss": 0.0051, + "step": 47960 + }, + { + "epoch": 0.3076646794919075, + "grad_norm": 0.5669960975646973, + "learning_rate": 9.911989987759429e-06, + "loss": 0.0128, + "step": 47970 + }, + { + "epoch": 0.3077288163856936, + "grad_norm": 0.168401300907135, + "learning_rate": 9.911885404492164e-06, + "loss": 0.0037, + "step": 47980 + }, + { + "epoch": 0.3077929532794797, + "grad_norm": 0.2946607768535614, + "learning_rate": 9.911780759675465e-06, + "loss": 0.006, + "step": 47990 + }, + { + "epoch": 0.30785709017326585, + "grad_norm": 0.2680601179599762, + "learning_rate": 9.911676053310644e-06, + "loss": 0.0038, + "step": 48000 + }, + { + "epoch": 0.30792122706705194, + "grad_norm": 0.22053630650043488, + "learning_rate": 9.911571285399011e-06, + "loss": 0.005, + "step": 48010 + }, + { + "epoch": 0.30798536396083803, + "grad_norm": 0.3797345757484436, + "learning_rate": 9.911466455941878e-06, + "loss": 0.0051, + "step": 48020 + }, + { + "epoch": 0.3080495008546241, + "grad_norm": 0.09887450188398361, + "learning_rate": 9.911361564940562e-06, + "loss": 0.0037, + "step": 48030 + }, + { + "epoch": 0.3081136377484102, + "grad_norm": 0.054559383541345596, + "learning_rate": 9.911256612396377e-06, + "loss": 0.0046, + "step": 48040 + }, + { + "epoch": 0.3081777746421963, + "grad_norm": 0.21160346269607544, + "learning_rate": 9.911151598310633e-06, + "loss": 0.0087, + "step": 48050 + }, + { + "epoch": 0.3082419115359824, + "grad_norm": 0.23671574890613556, + "learning_rate": 9.911046522684651e-06, + "loss": 0.0035, + "step": 48060 + }, + { + "epoch": 0.3083060484297685, + "grad_norm": 0.3388223350048065, + "learning_rate": 9.910941385519747e-06, + "loss": 0.0051, + "step": 48070 + }, + { + "epoch": 0.3083701853235546, + "grad_norm": 0.1838870793581009, + "learning_rate": 9.910836186817237e-06, + "loss": 0.004, + "step": 48080 + }, + { + "epoch": 0.3084343222173407, + "grad_norm": 0.19456282258033752, + "learning_rate": 9.91073092657844e-06, + "loss": 0.0057, + "step": 48090 + }, + { + "epoch": 0.3084984591111268, + "grad_norm": 0.36534351110458374, + "learning_rate": 9.910625604804675e-06, + "loss": 0.0047, + "step": 48100 + }, + { + "epoch": 0.30856259600491287, + "grad_norm": 0.20905707776546478, + "learning_rate": 9.910520221497261e-06, + "loss": 0.0053, + "step": 48110 + }, + { + "epoch": 0.30862673289869896, + "grad_norm": 0.1807992160320282, + "learning_rate": 9.91041477665752e-06, + "loss": 0.0071, + "step": 48120 + }, + { + "epoch": 0.30869086979248506, + "grad_norm": 0.3418310880661011, + "learning_rate": 9.91030927028677e-06, + "loss": 0.0055, + "step": 48130 + }, + { + "epoch": 0.30875500668627115, + "grad_norm": 0.18091419339179993, + "learning_rate": 9.910203702386339e-06, + "loss": 0.0054, + "step": 48140 + }, + { + "epoch": 0.3088191435800573, + "grad_norm": 0.1711590737104416, + "learning_rate": 9.910098072957543e-06, + "loss": 0.0046, + "step": 48150 + }, + { + "epoch": 0.3088832804738434, + "grad_norm": 0.3131524324417114, + "learning_rate": 9.909992382001712e-06, + "loss": 0.005, + "step": 48160 + }, + { + "epoch": 0.3089474173676295, + "grad_norm": 0.3353725075721741, + "learning_rate": 9.909886629520165e-06, + "loss": 0.0044, + "step": 48170 + }, + { + "epoch": 0.3090115542614156, + "grad_norm": 0.09161921590566635, + "learning_rate": 9.909780815514231e-06, + "loss": 0.005, + "step": 48180 + }, + { + "epoch": 0.30907569115520167, + "grad_norm": 0.14977139234542847, + "learning_rate": 9.909674939985235e-06, + "loss": 0.005, + "step": 48190 + }, + { + "epoch": 0.30913982804898776, + "grad_norm": 0.49496692419052124, + "learning_rate": 9.909569002934501e-06, + "loss": 0.0043, + "step": 48200 + }, + { + "epoch": 0.30920396494277386, + "grad_norm": 0.08514609932899475, + "learning_rate": 9.909463004363359e-06, + "loss": 0.0083, + "step": 48210 + }, + { + "epoch": 0.30926810183655995, + "grad_norm": 0.17172439396381378, + "learning_rate": 9.90935694427314e-06, + "loss": 0.009, + "step": 48220 + }, + { + "epoch": 0.30933223873034604, + "grad_norm": 0.3003605604171753, + "learning_rate": 9.909250822665166e-06, + "loss": 0.0061, + "step": 48230 + }, + { + "epoch": 0.30939637562413214, + "grad_norm": 0.20939604938030243, + "learning_rate": 9.909144639540772e-06, + "loss": 0.0068, + "step": 48240 + }, + { + "epoch": 0.30946051251791823, + "grad_norm": 0.3731301426887512, + "learning_rate": 9.909038394901287e-06, + "loss": 0.0036, + "step": 48250 + }, + { + "epoch": 0.3095246494117043, + "grad_norm": 0.025112763047218323, + "learning_rate": 9.908932088748042e-06, + "loss": 0.0049, + "step": 48260 + }, + { + "epoch": 0.3095887863054904, + "grad_norm": 0.12015421688556671, + "learning_rate": 9.908825721082371e-06, + "loss": 0.0061, + "step": 48270 + }, + { + "epoch": 0.3096529231992765, + "grad_norm": 0.2233283519744873, + "learning_rate": 9.908719291905605e-06, + "loss": 0.0049, + "step": 48280 + }, + { + "epoch": 0.30971706009306266, + "grad_norm": 0.12509781122207642, + "learning_rate": 9.908612801219075e-06, + "loss": 0.0063, + "step": 48290 + }, + { + "epoch": 0.30978119698684875, + "grad_norm": 0.35053691267967224, + "learning_rate": 9.908506249024122e-06, + "loss": 0.0079, + "step": 48300 + }, + { + "epoch": 0.30984533388063484, + "grad_norm": 0.15429310500621796, + "learning_rate": 9.908399635322077e-06, + "loss": 0.0052, + "step": 48310 + }, + { + "epoch": 0.30990947077442094, + "grad_norm": 0.40210241079330444, + "learning_rate": 9.908292960114276e-06, + "loss": 0.008, + "step": 48320 + }, + { + "epoch": 0.30997360766820703, + "grad_norm": 0.07661717385053635, + "learning_rate": 9.908186223402056e-06, + "loss": 0.0055, + "step": 48330 + }, + { + "epoch": 0.3100377445619931, + "grad_norm": 0.2834452688694, + "learning_rate": 9.908079425186757e-06, + "loss": 0.0039, + "step": 48340 + }, + { + "epoch": 0.3101018814557792, + "grad_norm": 0.04436261206865311, + "learning_rate": 9.907972565469713e-06, + "loss": 0.0046, + "step": 48350 + }, + { + "epoch": 0.3101660183495653, + "grad_norm": 0.30914679169654846, + "learning_rate": 9.907865644252265e-06, + "loss": 0.0053, + "step": 48360 + }, + { + "epoch": 0.3102301552433514, + "grad_norm": 0.06785024702548981, + "learning_rate": 9.907758661535753e-06, + "loss": 0.0027, + "step": 48370 + }, + { + "epoch": 0.3102942921371375, + "grad_norm": 0.24976181983947754, + "learning_rate": 9.907651617321518e-06, + "loss": 0.0054, + "step": 48380 + }, + { + "epoch": 0.3103584290309236, + "grad_norm": 0.152425155043602, + "learning_rate": 9.9075445116109e-06, + "loss": 0.0031, + "step": 48390 + }, + { + "epoch": 0.3104225659247097, + "grad_norm": 0.14601565897464752, + "learning_rate": 9.907437344405244e-06, + "loss": 0.0064, + "step": 48400 + }, + { + "epoch": 0.3104867028184958, + "grad_norm": 0.18691451847553253, + "learning_rate": 9.90733011570589e-06, + "loss": 0.0054, + "step": 48410 + }, + { + "epoch": 0.31055083971228187, + "grad_norm": 0.03606029227375984, + "learning_rate": 9.90722282551418e-06, + "loss": 0.0046, + "step": 48420 + }, + { + "epoch": 0.310614976606068, + "grad_norm": 0.23096883296966553, + "learning_rate": 9.907115473831462e-06, + "loss": 0.0029, + "step": 48430 + }, + { + "epoch": 0.3106791134998541, + "grad_norm": 0.3457227647304535, + "learning_rate": 9.90700806065908e-06, + "loss": 0.0032, + "step": 48440 + }, + { + "epoch": 0.3107432503936402, + "grad_norm": 0.17288430035114288, + "learning_rate": 9.90690058599838e-06, + "loss": 0.0038, + "step": 48450 + }, + { + "epoch": 0.3108073872874263, + "grad_norm": 0.2500647008419037, + "learning_rate": 9.906793049850712e-06, + "loss": 0.0045, + "step": 48460 + }, + { + "epoch": 0.3108715241812124, + "grad_norm": 0.22884602844715118, + "learning_rate": 9.906685452217418e-06, + "loss": 0.0054, + "step": 48470 + }, + { + "epoch": 0.3109356610749985, + "grad_norm": 0.29955244064331055, + "learning_rate": 9.906577793099847e-06, + "loss": 0.004, + "step": 48480 + }, + { + "epoch": 0.3109997979687846, + "grad_norm": 0.3578546345233917, + "learning_rate": 9.90647007249935e-06, + "loss": 0.006, + "step": 48490 + }, + { + "epoch": 0.31106393486257067, + "grad_norm": 0.289204865694046, + "learning_rate": 9.906362290417277e-06, + "loss": 0.0042, + "step": 48500 + }, + { + "epoch": 0.31112807175635676, + "grad_norm": 0.2857840359210968, + "learning_rate": 9.906254446854979e-06, + "loss": 0.0037, + "step": 48510 + }, + { + "epoch": 0.31119220865014285, + "grad_norm": 0.18643750250339508, + "learning_rate": 9.906146541813804e-06, + "loss": 0.0104, + "step": 48520 + }, + { + "epoch": 0.31125634554392895, + "grad_norm": 0.1863306611776352, + "learning_rate": 9.90603857529511e-06, + "loss": 0.0064, + "step": 48530 + }, + { + "epoch": 0.31132048243771504, + "grad_norm": 0.1412154883146286, + "learning_rate": 9.905930547300244e-06, + "loss": 0.0048, + "step": 48540 + }, + { + "epoch": 0.31138461933150113, + "grad_norm": 0.27536478638648987, + "learning_rate": 9.90582245783056e-06, + "loss": 0.0063, + "step": 48550 + }, + { + "epoch": 0.3114487562252872, + "grad_norm": 0.476123183965683, + "learning_rate": 9.905714306887417e-06, + "loss": 0.0053, + "step": 48560 + }, + { + "epoch": 0.3115128931190734, + "grad_norm": 0.1640210747718811, + "learning_rate": 9.905606094472168e-06, + "loss": 0.0032, + "step": 48570 + }, + { + "epoch": 0.31157703001285947, + "grad_norm": 0.23330853879451752, + "learning_rate": 9.905497820586168e-06, + "loss": 0.0037, + "step": 48580 + }, + { + "epoch": 0.31164116690664556, + "grad_norm": 0.20885784924030304, + "learning_rate": 9.905389485230776e-06, + "loss": 0.0036, + "step": 48590 + }, + { + "epoch": 0.31170530380043165, + "grad_norm": 0.3812311887741089, + "learning_rate": 9.905281088407346e-06, + "loss": 0.0057, + "step": 48600 + }, + { + "epoch": 0.31176944069421775, + "grad_norm": 0.2555144727230072, + "learning_rate": 9.905172630117238e-06, + "loss": 0.0038, + "step": 48610 + }, + { + "epoch": 0.31183357758800384, + "grad_norm": 0.3070421516895294, + "learning_rate": 9.905064110361813e-06, + "loss": 0.0033, + "step": 48620 + }, + { + "epoch": 0.31189771448178993, + "grad_norm": 0.2051517218351364, + "learning_rate": 9.904955529142427e-06, + "loss": 0.0043, + "step": 48630 + }, + { + "epoch": 0.311961851375576, + "grad_norm": 0.39771977066993713, + "learning_rate": 9.904846886460443e-06, + "loss": 0.0054, + "step": 48640 + }, + { + "epoch": 0.3120259882693621, + "grad_norm": 0.39983877539634705, + "learning_rate": 9.904738182317224e-06, + "loss": 0.0035, + "step": 48650 + }, + { + "epoch": 0.3120901251631482, + "grad_norm": 0.19409961998462677, + "learning_rate": 9.90462941671413e-06, + "loss": 0.0031, + "step": 48660 + }, + { + "epoch": 0.3121542620569343, + "grad_norm": 0.42691096663475037, + "learning_rate": 9.904520589652523e-06, + "loss": 0.0056, + "step": 48670 + }, + { + "epoch": 0.3122183989507204, + "grad_norm": 0.1716923862695694, + "learning_rate": 9.904411701133766e-06, + "loss": 0.0034, + "step": 48680 + }, + { + "epoch": 0.3122825358445065, + "grad_norm": 0.1936800628900528, + "learning_rate": 9.904302751159227e-06, + "loss": 0.0049, + "step": 48690 + }, + { + "epoch": 0.3123466727382926, + "grad_norm": 0.25935399532318115, + "learning_rate": 9.90419373973027e-06, + "loss": 0.0042, + "step": 48700 + }, + { + "epoch": 0.31241080963207873, + "grad_norm": 0.10071436315774918, + "learning_rate": 9.90408466684826e-06, + "loss": 0.0032, + "step": 48710 + }, + { + "epoch": 0.3124749465258648, + "grad_norm": 0.23191823065280914, + "learning_rate": 9.903975532514564e-06, + "loss": 0.0046, + "step": 48720 + }, + { + "epoch": 0.3125390834196509, + "grad_norm": 0.2393505722284317, + "learning_rate": 9.90386633673055e-06, + "loss": 0.0044, + "step": 48730 + }, + { + "epoch": 0.312603220313437, + "grad_norm": 0.32246753573417664, + "learning_rate": 9.903757079497587e-06, + "loss": 0.0052, + "step": 48740 + }, + { + "epoch": 0.3126673572072231, + "grad_norm": 0.30864331126213074, + "learning_rate": 9.903647760817041e-06, + "loss": 0.0078, + "step": 48750 + }, + { + "epoch": 0.3127314941010092, + "grad_norm": 0.4452064037322998, + "learning_rate": 9.903538380690284e-06, + "loss": 0.0056, + "step": 48760 + }, + { + "epoch": 0.3127956309947953, + "grad_norm": 0.1841566562652588, + "learning_rate": 9.90342893911869e-06, + "loss": 0.0046, + "step": 48770 + }, + { + "epoch": 0.3128597678885814, + "grad_norm": 0.3984428942203522, + "learning_rate": 9.903319436103625e-06, + "loss": 0.0084, + "step": 48780 + }, + { + "epoch": 0.3129239047823675, + "grad_norm": 0.4029768109321594, + "learning_rate": 9.903209871646463e-06, + "loss": 0.0058, + "step": 48790 + }, + { + "epoch": 0.31298804167615357, + "grad_norm": 0.24533171951770782, + "learning_rate": 9.903100245748576e-06, + "loss": 0.0059, + "step": 48800 + }, + { + "epoch": 0.31305217856993967, + "grad_norm": 0.13123102486133575, + "learning_rate": 9.90299055841134e-06, + "loss": 0.0045, + "step": 48810 + }, + { + "epoch": 0.31311631546372576, + "grad_norm": 0.39378902316093445, + "learning_rate": 9.90288080963613e-06, + "loss": 0.0058, + "step": 48820 + }, + { + "epoch": 0.31318045235751185, + "grad_norm": 0.28770095109939575, + "learning_rate": 9.902770999424314e-06, + "loss": 0.0049, + "step": 48830 + }, + { + "epoch": 0.31324458925129794, + "grad_norm": 0.055193688720464706, + "learning_rate": 9.902661127777277e-06, + "loss": 0.0033, + "step": 48840 + }, + { + "epoch": 0.3133087261450841, + "grad_norm": 0.056696776300668716, + "learning_rate": 9.902551194696393e-06, + "loss": 0.0056, + "step": 48850 + }, + { + "epoch": 0.3133728630388702, + "grad_norm": 0.4966791868209839, + "learning_rate": 9.902441200183038e-06, + "loss": 0.0068, + "step": 48860 + }, + { + "epoch": 0.3134369999326563, + "grad_norm": 0.31629717350006104, + "learning_rate": 9.902331144238591e-06, + "loss": 0.0109, + "step": 48870 + }, + { + "epoch": 0.3135011368264424, + "grad_norm": 0.16882503032684326, + "learning_rate": 9.90222102686443e-06, + "loss": 0.0056, + "step": 48880 + }, + { + "epoch": 0.31356527372022847, + "grad_norm": 0.07945968955755234, + "learning_rate": 9.902110848061937e-06, + "loss": 0.0037, + "step": 48890 + }, + { + "epoch": 0.31362941061401456, + "grad_norm": 0.1186680793762207, + "learning_rate": 9.90200060783249e-06, + "loss": 0.0033, + "step": 48900 + }, + { + "epoch": 0.31369354750780065, + "grad_norm": 0.13710758090019226, + "learning_rate": 9.901890306177474e-06, + "loss": 0.0045, + "step": 48910 + }, + { + "epoch": 0.31375768440158674, + "grad_norm": 0.1552942842245102, + "learning_rate": 9.901779943098269e-06, + "loss": 0.005, + "step": 48920 + }, + { + "epoch": 0.31382182129537284, + "grad_norm": 0.25293827056884766, + "learning_rate": 9.901669518596258e-06, + "loss": 0.004, + "step": 48930 + }, + { + "epoch": 0.31388595818915893, + "grad_norm": 0.11367998272180557, + "learning_rate": 9.901559032672824e-06, + "loss": 0.0034, + "step": 48940 + }, + { + "epoch": 0.313950095082945, + "grad_norm": 0.06295833736658096, + "learning_rate": 9.901448485329353e-06, + "loss": 0.0083, + "step": 48950 + }, + { + "epoch": 0.3140142319767311, + "grad_norm": 0.2326330542564392, + "learning_rate": 9.901337876567227e-06, + "loss": 0.005, + "step": 48960 + }, + { + "epoch": 0.3140783688705172, + "grad_norm": 0.2274763286113739, + "learning_rate": 9.901227206387838e-06, + "loss": 0.0048, + "step": 48970 + }, + { + "epoch": 0.3141425057643033, + "grad_norm": 0.2659567594528198, + "learning_rate": 9.901116474792566e-06, + "loss": 0.0041, + "step": 48980 + }, + { + "epoch": 0.31420664265808945, + "grad_norm": 0.14676564931869507, + "learning_rate": 9.901005681782805e-06, + "loss": 0.0039, + "step": 48990 + }, + { + "epoch": 0.31427077955187555, + "grad_norm": 0.06894460320472717, + "learning_rate": 9.900894827359937e-06, + "loss": 0.0045, + "step": 49000 + }, + { + "epoch": 0.31433491644566164, + "grad_norm": 0.13913805782794952, + "learning_rate": 9.900783911525357e-06, + "loss": 0.0038, + "step": 49010 + }, + { + "epoch": 0.31439905333944773, + "grad_norm": 0.22315600514411926, + "learning_rate": 9.900672934280451e-06, + "loss": 0.0048, + "step": 49020 + }, + { + "epoch": 0.3144631902332338, + "grad_norm": 0.10821523517370224, + "learning_rate": 9.90056189562661e-06, + "loss": 0.0038, + "step": 49030 + }, + { + "epoch": 0.3145273271270199, + "grad_norm": 0.2065705507993698, + "learning_rate": 9.900450795565226e-06, + "loss": 0.004, + "step": 49040 + }, + { + "epoch": 0.314591464020806, + "grad_norm": 0.24347898364067078, + "learning_rate": 9.900339634097693e-06, + "loss": 0.0029, + "step": 49050 + }, + { + "epoch": 0.3146556009145921, + "grad_norm": 0.200932577252388, + "learning_rate": 9.900228411225401e-06, + "loss": 0.0033, + "step": 49060 + }, + { + "epoch": 0.3147197378083782, + "grad_norm": 0.21611665189266205, + "learning_rate": 9.900117126949744e-06, + "loss": 0.0037, + "step": 49070 + }, + { + "epoch": 0.3147838747021643, + "grad_norm": 0.31797540187835693, + "learning_rate": 9.900005781272118e-06, + "loss": 0.004, + "step": 49080 + }, + { + "epoch": 0.3148480115959504, + "grad_norm": 0.22289103269577026, + "learning_rate": 9.899894374193918e-06, + "loss": 0.0051, + "step": 49090 + }, + { + "epoch": 0.3149121484897365, + "grad_norm": 0.35321786999702454, + "learning_rate": 9.899782905716539e-06, + "loss": 0.0053, + "step": 49100 + }, + { + "epoch": 0.31497628538352257, + "grad_norm": 0.242935910820961, + "learning_rate": 9.89967137584138e-06, + "loss": 0.0042, + "step": 49110 + }, + { + "epoch": 0.31504042227730866, + "grad_norm": 0.0777844786643982, + "learning_rate": 9.899559784569834e-06, + "loss": 0.0027, + "step": 49120 + }, + { + "epoch": 0.3151045591710948, + "grad_norm": 0.07172335684299469, + "learning_rate": 9.899448131903305e-06, + "loss": 0.0044, + "step": 49130 + }, + { + "epoch": 0.3151686960648809, + "grad_norm": 0.1554524302482605, + "learning_rate": 9.899336417843188e-06, + "loss": 0.0086, + "step": 49140 + }, + { + "epoch": 0.315232832958667, + "grad_norm": 0.2136203497648239, + "learning_rate": 9.899224642390884e-06, + "loss": 0.0077, + "step": 49150 + }, + { + "epoch": 0.3152969698524531, + "grad_norm": 0.1316751092672348, + "learning_rate": 9.899112805547795e-06, + "loss": 0.0062, + "step": 49160 + }, + { + "epoch": 0.3153611067462392, + "grad_norm": 0.2350343018770218, + "learning_rate": 9.89900090731532e-06, + "loss": 0.0072, + "step": 49170 + }, + { + "epoch": 0.3154252436400253, + "grad_norm": 0.2078297734260559, + "learning_rate": 9.898888947694863e-06, + "loss": 0.0045, + "step": 49180 + }, + { + "epoch": 0.31548938053381137, + "grad_norm": 0.1405486911535263, + "learning_rate": 9.898776926687826e-06, + "loss": 0.0039, + "step": 49190 + }, + { + "epoch": 0.31555351742759746, + "grad_norm": 0.1346013844013214, + "learning_rate": 9.898664844295612e-06, + "loss": 0.0046, + "step": 49200 + }, + { + "epoch": 0.31561765432138356, + "grad_norm": 0.23130005598068237, + "learning_rate": 9.898552700519628e-06, + "loss": 0.0036, + "step": 49210 + }, + { + "epoch": 0.31568179121516965, + "grad_norm": 0.0856684073805809, + "learning_rate": 9.898440495361277e-06, + "loss": 0.0057, + "step": 49220 + }, + { + "epoch": 0.31574592810895574, + "grad_norm": 0.19981451332569122, + "learning_rate": 9.898328228821966e-06, + "loss": 0.0052, + "step": 49230 + }, + { + "epoch": 0.31581006500274184, + "grad_norm": 0.31087830662727356, + "learning_rate": 9.898215900903101e-06, + "loss": 0.0067, + "step": 49240 + }, + { + "epoch": 0.31587420189652793, + "grad_norm": 0.27627718448638916, + "learning_rate": 9.898103511606092e-06, + "loss": 0.0021, + "step": 49250 + }, + { + "epoch": 0.315938338790314, + "grad_norm": 0.30112534761428833, + "learning_rate": 9.897991060932345e-06, + "loss": 0.0074, + "step": 49260 + }, + { + "epoch": 0.31600247568410017, + "grad_norm": 0.20653463900089264, + "learning_rate": 9.897878548883266e-06, + "loss": 0.0037, + "step": 49270 + }, + { + "epoch": 0.31606661257788626, + "grad_norm": 0.4281090497970581, + "learning_rate": 9.897765975460272e-06, + "loss": 0.0046, + "step": 49280 + }, + { + "epoch": 0.31613074947167236, + "grad_norm": 0.18075962364673615, + "learning_rate": 9.897653340664767e-06, + "loss": 0.005, + "step": 49290 + }, + { + "epoch": 0.31619488636545845, + "grad_norm": 0.19884683191776276, + "learning_rate": 9.897540644498168e-06, + "loss": 0.0043, + "step": 49300 + }, + { + "epoch": 0.31625902325924454, + "grad_norm": 0.10651399940252304, + "learning_rate": 9.897427886961884e-06, + "loss": 0.0031, + "step": 49310 + }, + { + "epoch": 0.31632316015303064, + "grad_norm": 0.38676655292510986, + "learning_rate": 9.897315068057327e-06, + "loss": 0.0042, + "step": 49320 + }, + { + "epoch": 0.31638729704681673, + "grad_norm": 0.012912292033433914, + "learning_rate": 9.897202187785913e-06, + "loss": 0.0042, + "step": 49330 + }, + { + "epoch": 0.3164514339406028, + "grad_norm": 0.08300850540399551, + "learning_rate": 9.897089246149056e-06, + "loss": 0.0034, + "step": 49340 + }, + { + "epoch": 0.3165155708343889, + "grad_norm": 0.14932957291603088, + "learning_rate": 9.896976243148171e-06, + "loss": 0.0065, + "step": 49350 + }, + { + "epoch": 0.316579707728175, + "grad_norm": 0.17913511395454407, + "learning_rate": 9.896863178784673e-06, + "loss": 0.0054, + "step": 49360 + }, + { + "epoch": 0.3166438446219611, + "grad_norm": 0.32844895124435425, + "learning_rate": 9.89675005305998e-06, + "loss": 0.0046, + "step": 49370 + }, + { + "epoch": 0.3167079815157472, + "grad_norm": 0.3574899137020111, + "learning_rate": 9.896636865975509e-06, + "loss": 0.007, + "step": 49380 + }, + { + "epoch": 0.3167721184095333, + "grad_norm": 0.5497488975524902, + "learning_rate": 9.89652361753268e-06, + "loss": 0.0049, + "step": 49390 + }, + { + "epoch": 0.3168362553033194, + "grad_norm": 0.3612213730812073, + "learning_rate": 9.896410307732909e-06, + "loss": 0.0048, + "step": 49400 + }, + { + "epoch": 0.31690039219710553, + "grad_norm": 0.1442115157842636, + "learning_rate": 9.896296936577618e-06, + "loss": 0.0043, + "step": 49410 + }, + { + "epoch": 0.3169645290908916, + "grad_norm": 0.18238170444965363, + "learning_rate": 9.896183504068226e-06, + "loss": 0.0029, + "step": 49420 + }, + { + "epoch": 0.3170286659846777, + "grad_norm": 0.18228575587272644, + "learning_rate": 9.896070010206155e-06, + "loss": 0.0055, + "step": 49430 + }, + { + "epoch": 0.3170928028784638, + "grad_norm": 0.3545880913734436, + "learning_rate": 9.89595645499283e-06, + "loss": 0.0084, + "step": 49440 + }, + { + "epoch": 0.3171569397722499, + "grad_norm": 0.10810332745313644, + "learning_rate": 9.89584283842967e-06, + "loss": 0.0069, + "step": 49450 + }, + { + "epoch": 0.317221076666036, + "grad_norm": 0.2051643431186676, + "learning_rate": 9.895729160518102e-06, + "loss": 0.0051, + "step": 49460 + }, + { + "epoch": 0.3172852135598221, + "grad_norm": 0.22339393198490143, + "learning_rate": 9.895615421259548e-06, + "loss": 0.0054, + "step": 49470 + }, + { + "epoch": 0.3173493504536082, + "grad_norm": 0.026365969330072403, + "learning_rate": 9.895501620655431e-06, + "loss": 0.0039, + "step": 49480 + }, + { + "epoch": 0.3174134873473943, + "grad_norm": 0.2481347918510437, + "learning_rate": 9.895387758707182e-06, + "loss": 0.0043, + "step": 49490 + }, + { + "epoch": 0.31747762424118037, + "grad_norm": 0.3272586166858673, + "learning_rate": 9.895273835416227e-06, + "loss": 0.0044, + "step": 49500 + }, + { + "epoch": 0.31754176113496646, + "grad_norm": 0.13250267505645752, + "learning_rate": 9.895159850783991e-06, + "loss": 0.0033, + "step": 49510 + }, + { + "epoch": 0.31760589802875255, + "grad_norm": 0.266339510679245, + "learning_rate": 9.895045804811903e-06, + "loss": 0.0061, + "step": 49520 + }, + { + "epoch": 0.31767003492253865, + "grad_norm": 0.30501192808151245, + "learning_rate": 9.894931697501394e-06, + "loss": 0.003, + "step": 49530 + }, + { + "epoch": 0.31773417181632474, + "grad_norm": 0.22355207800865173, + "learning_rate": 9.894817528853891e-06, + "loss": 0.0039, + "step": 49540 + }, + { + "epoch": 0.3177983087101109, + "grad_norm": 0.23635293543338776, + "learning_rate": 9.894703298870827e-06, + "loss": 0.0046, + "step": 49550 + }, + { + "epoch": 0.317862445603897, + "grad_norm": 0.269288569688797, + "learning_rate": 9.894589007553631e-06, + "loss": 0.0066, + "step": 49560 + }, + { + "epoch": 0.3179265824976831, + "grad_norm": 0.24839213490486145, + "learning_rate": 9.894474654903739e-06, + "loss": 0.0028, + "step": 49570 + }, + { + "epoch": 0.31799071939146917, + "grad_norm": 0.07492738962173462, + "learning_rate": 9.894360240922581e-06, + "loss": 0.0038, + "step": 49580 + }, + { + "epoch": 0.31805485628525526, + "grad_norm": 0.1257239133119583, + "learning_rate": 9.89424576561159e-06, + "loss": 0.0055, + "step": 49590 + }, + { + "epoch": 0.31811899317904135, + "grad_norm": 0.12412641942501068, + "learning_rate": 9.894131228972203e-06, + "loss": 0.0052, + "step": 49600 + }, + { + "epoch": 0.31818313007282745, + "grad_norm": 0.19532410800457, + "learning_rate": 9.894016631005853e-06, + "loss": 0.0041, + "step": 49610 + }, + { + "epoch": 0.31824726696661354, + "grad_norm": 0.27602460980415344, + "learning_rate": 9.893901971713978e-06, + "loss": 0.0064, + "step": 49620 + }, + { + "epoch": 0.31831140386039963, + "grad_norm": 0.5332382917404175, + "learning_rate": 9.893787251098012e-06, + "loss": 0.0065, + "step": 49630 + }, + { + "epoch": 0.3183755407541857, + "grad_norm": 0.32323774695396423, + "learning_rate": 9.893672469159396e-06, + "loss": 0.0051, + "step": 49640 + }, + { + "epoch": 0.3184396776479718, + "grad_norm": 0.14860421419143677, + "learning_rate": 9.893557625899565e-06, + "loss": 0.0037, + "step": 49650 + }, + { + "epoch": 0.3185038145417579, + "grad_norm": 0.043334994465112686, + "learning_rate": 9.893442721319962e-06, + "loss": 0.0055, + "step": 49660 + }, + { + "epoch": 0.318567951435544, + "grad_norm": 0.4149910509586334, + "learning_rate": 9.893327755422022e-06, + "loss": 0.0069, + "step": 49670 + }, + { + "epoch": 0.3186320883293301, + "grad_norm": 0.20254351198673248, + "learning_rate": 9.89321272820719e-06, + "loss": 0.008, + "step": 49680 + }, + { + "epoch": 0.3186962252231162, + "grad_norm": 0.2773299813270569, + "learning_rate": 9.893097639676902e-06, + "loss": 0.0062, + "step": 49690 + }, + { + "epoch": 0.31876036211690234, + "grad_norm": 0.23761241137981415, + "learning_rate": 9.892982489832605e-06, + "loss": 0.0052, + "step": 49700 + }, + { + "epoch": 0.31882449901068843, + "grad_norm": 0.2280692160129547, + "learning_rate": 9.892867278675742e-06, + "loss": 0.006, + "step": 49710 + }, + { + "epoch": 0.3188886359044745, + "grad_norm": 0.39356282353401184, + "learning_rate": 9.892752006207754e-06, + "loss": 0.006, + "step": 49720 + }, + { + "epoch": 0.3189527727982606, + "grad_norm": 0.11281643807888031, + "learning_rate": 9.892636672430088e-06, + "loss": 0.0034, + "step": 49730 + }, + { + "epoch": 0.3190169096920467, + "grad_norm": 0.1185554713010788, + "learning_rate": 9.892521277344184e-06, + "loss": 0.0047, + "step": 49740 + }, + { + "epoch": 0.3190810465858328, + "grad_norm": 0.09055802971124649, + "learning_rate": 9.892405820951496e-06, + "loss": 0.0053, + "step": 49750 + }, + { + "epoch": 0.3191451834796189, + "grad_norm": 0.19612687826156616, + "learning_rate": 9.892290303253465e-06, + "loss": 0.0025, + "step": 49760 + }, + { + "epoch": 0.319209320373405, + "grad_norm": 0.3884764611721039, + "learning_rate": 9.89217472425154e-06, + "loss": 0.0046, + "step": 49770 + }, + { + "epoch": 0.3192734572671911, + "grad_norm": 0.2126941829919815, + "learning_rate": 9.892059083947168e-06, + "loss": 0.0038, + "step": 49780 + }, + { + "epoch": 0.3193375941609772, + "grad_norm": 0.31118959188461304, + "learning_rate": 9.891943382341799e-06, + "loss": 0.0067, + "step": 49790 + }, + { + "epoch": 0.31940173105476327, + "grad_norm": 0.13976700603961945, + "learning_rate": 9.891827619436886e-06, + "loss": 0.0047, + "step": 49800 + }, + { + "epoch": 0.31946586794854936, + "grad_norm": 0.15904361009597778, + "learning_rate": 9.891711795233876e-06, + "loss": 0.0044, + "step": 49810 + }, + { + "epoch": 0.31953000484233546, + "grad_norm": 0.3219791650772095, + "learning_rate": 9.891595909734219e-06, + "loss": 0.0063, + "step": 49820 + }, + { + "epoch": 0.31959414173612155, + "grad_norm": 0.2676171362400055, + "learning_rate": 9.891479962939371e-06, + "loss": 0.0032, + "step": 49830 + }, + { + "epoch": 0.3196582786299077, + "grad_norm": 0.16709725558757782, + "learning_rate": 9.891363954850784e-06, + "loss": 0.0054, + "step": 49840 + }, + { + "epoch": 0.3197224155236938, + "grad_norm": 0.21087214350700378, + "learning_rate": 9.89124788546991e-06, + "loss": 0.0049, + "step": 49850 + }, + { + "epoch": 0.3197865524174799, + "grad_norm": 0.13865779340267181, + "learning_rate": 9.891131754798204e-06, + "loss": 0.0042, + "step": 49860 + }, + { + "epoch": 0.319850689311266, + "grad_norm": 0.30375921726226807, + "learning_rate": 9.891015562837122e-06, + "loss": 0.0048, + "step": 49870 + }, + { + "epoch": 0.31991482620505207, + "grad_norm": 0.19928953051567078, + "learning_rate": 9.890899309588119e-06, + "loss": 0.0042, + "step": 49880 + }, + { + "epoch": 0.31997896309883817, + "grad_norm": 0.23441387712955475, + "learning_rate": 9.890782995052652e-06, + "loss": 0.002, + "step": 49890 + }, + { + "epoch": 0.32004309999262426, + "grad_norm": 0.18868665397167206, + "learning_rate": 9.89066661923218e-06, + "loss": 0.0041, + "step": 49900 + }, + { + "epoch": 0.32010723688641035, + "grad_norm": 0.3113063871860504, + "learning_rate": 9.890550182128158e-06, + "loss": 0.0036, + "step": 49910 + }, + { + "epoch": 0.32017137378019644, + "grad_norm": 0.1571812927722931, + "learning_rate": 9.890433683742047e-06, + "loss": 0.0029, + "step": 49920 + }, + { + "epoch": 0.32023551067398254, + "grad_norm": 0.3203980624675751, + "learning_rate": 9.890317124075308e-06, + "loss": 0.0061, + "step": 49930 + }, + { + "epoch": 0.32029964756776863, + "grad_norm": 0.21253187954425812, + "learning_rate": 9.8902005031294e-06, + "loss": 0.0091, + "step": 49940 + }, + { + "epoch": 0.3203637844615547, + "grad_norm": 0.09002522379159927, + "learning_rate": 9.890083820905785e-06, + "loss": 0.0084, + "step": 49950 + }, + { + "epoch": 0.3204279213553408, + "grad_norm": 0.17045079171657562, + "learning_rate": 9.889967077405925e-06, + "loss": 0.0075, + "step": 49960 + }, + { + "epoch": 0.3204920582491269, + "grad_norm": 0.10132025927305222, + "learning_rate": 9.889850272631283e-06, + "loss": 0.0038, + "step": 49970 + }, + { + "epoch": 0.32055619514291306, + "grad_norm": 0.11106637120246887, + "learning_rate": 9.889733406583321e-06, + "loss": 0.004, + "step": 49980 + }, + { + "epoch": 0.32062033203669915, + "grad_norm": 0.07718731462955475, + "learning_rate": 9.889616479263506e-06, + "loss": 0.007, + "step": 49990 + }, + { + "epoch": 0.32068446893048524, + "grad_norm": 0.34893959760665894, + "learning_rate": 9.889499490673301e-06, + "loss": 0.0037, + "step": 50000 + }, + { + "epoch": 0.32074860582427134, + "grad_norm": 0.13644975423812866, + "learning_rate": 9.889382440814172e-06, + "loss": 0.0068, + "step": 50010 + }, + { + "epoch": 0.32081274271805743, + "grad_norm": 0.2971213757991791, + "learning_rate": 9.88926532968759e-06, + "loss": 0.0044, + "step": 50020 + }, + { + "epoch": 0.3208768796118435, + "grad_norm": 0.07066557556390762, + "learning_rate": 9.889148157295017e-06, + "loss": 0.0031, + "step": 50030 + }, + { + "epoch": 0.3209410165056296, + "grad_norm": 0.9618569016456604, + "learning_rate": 9.889030923637923e-06, + "loss": 0.0105, + "step": 50040 + }, + { + "epoch": 0.3210051533994157, + "grad_norm": 0.5154543519020081, + "learning_rate": 9.888913628717778e-06, + "loss": 0.0042, + "step": 50050 + }, + { + "epoch": 0.3210692902932018, + "grad_norm": 0.22388550639152527, + "learning_rate": 9.88879627253605e-06, + "loss": 0.0051, + "step": 50060 + }, + { + "epoch": 0.3211334271869879, + "grad_norm": 0.17283904552459717, + "learning_rate": 9.888678855094213e-06, + "loss": 0.0048, + "step": 50070 + }, + { + "epoch": 0.321197564080774, + "grad_norm": 0.21307899057865143, + "learning_rate": 9.888561376393734e-06, + "loss": 0.0063, + "step": 50080 + }, + { + "epoch": 0.3212617009745601, + "grad_norm": 0.4096912741661072, + "learning_rate": 9.888443836436088e-06, + "loss": 0.0054, + "step": 50090 + }, + { + "epoch": 0.3213258378683462, + "grad_norm": 0.28107506036758423, + "learning_rate": 9.888326235222748e-06, + "loss": 0.0056, + "step": 50100 + }, + { + "epoch": 0.32138997476213227, + "grad_norm": 0.08909334242343903, + "learning_rate": 9.888208572755185e-06, + "loss": 0.0049, + "step": 50110 + }, + { + "epoch": 0.3214541116559184, + "grad_norm": 0.13259750604629517, + "learning_rate": 9.888090849034876e-06, + "loss": 0.0033, + "step": 50120 + }, + { + "epoch": 0.3215182485497045, + "grad_norm": 0.2727391719818115, + "learning_rate": 9.887973064063294e-06, + "loss": 0.0036, + "step": 50130 + }, + { + "epoch": 0.3215823854434906, + "grad_norm": 0.06979379802942276, + "learning_rate": 9.887855217841918e-06, + "loss": 0.0036, + "step": 50140 + }, + { + "epoch": 0.3216465223372767, + "grad_norm": 0.5669009685516357, + "learning_rate": 9.887737310372221e-06, + "loss": 0.0059, + "step": 50150 + }, + { + "epoch": 0.3217106592310628, + "grad_norm": 0.178040012717247, + "learning_rate": 9.887619341655683e-06, + "loss": 0.0037, + "step": 50160 + }, + { + "epoch": 0.3217747961248489, + "grad_norm": 0.4985739290714264, + "learning_rate": 9.887501311693782e-06, + "loss": 0.0058, + "step": 50170 + }, + { + "epoch": 0.321838933018635, + "grad_norm": 0.054809264838695526, + "learning_rate": 9.887383220487995e-06, + "loss": 0.004, + "step": 50180 + }, + { + "epoch": 0.32190306991242107, + "grad_norm": 0.29745447635650635, + "learning_rate": 9.887265068039806e-06, + "loss": 0.0056, + "step": 50190 + }, + { + "epoch": 0.32196720680620716, + "grad_norm": 0.2449481040239334, + "learning_rate": 9.887146854350692e-06, + "loss": 0.0046, + "step": 50200 + }, + { + "epoch": 0.32203134369999326, + "grad_norm": 0.07773438841104507, + "learning_rate": 9.887028579422132e-06, + "loss": 0.0038, + "step": 50210 + }, + { + "epoch": 0.32209548059377935, + "grad_norm": 0.08787532150745392, + "learning_rate": 9.886910243255614e-06, + "loss": 0.0034, + "step": 50220 + }, + { + "epoch": 0.32215961748756544, + "grad_norm": 0.10316630452871323, + "learning_rate": 9.886791845852619e-06, + "loss": 0.0037, + "step": 50230 + }, + { + "epoch": 0.32222375438135153, + "grad_norm": 0.24471049010753632, + "learning_rate": 9.886673387214628e-06, + "loss": 0.0084, + "step": 50240 + }, + { + "epoch": 0.32228789127513763, + "grad_norm": 0.13041454553604126, + "learning_rate": 9.886554867343127e-06, + "loss": 0.0025, + "step": 50250 + }, + { + "epoch": 0.3223520281689238, + "grad_norm": 0.4005851447582245, + "learning_rate": 9.886436286239602e-06, + "loss": 0.0044, + "step": 50260 + }, + { + "epoch": 0.32241616506270987, + "grad_norm": 0.19282877445220947, + "learning_rate": 9.886317643905536e-06, + "loss": 0.0026, + "step": 50270 + }, + { + "epoch": 0.32248030195649596, + "grad_norm": 0.2606230676174164, + "learning_rate": 9.88619894034242e-06, + "loss": 0.0058, + "step": 50280 + }, + { + "epoch": 0.32254443885028206, + "grad_norm": 0.13031746447086334, + "learning_rate": 9.886080175551738e-06, + "loss": 0.0046, + "step": 50290 + }, + { + "epoch": 0.32260857574406815, + "grad_norm": 0.2334151417016983, + "learning_rate": 9.885961349534981e-06, + "loss": 0.0039, + "step": 50300 + }, + { + "epoch": 0.32267271263785424, + "grad_norm": 0.17930831015110016, + "learning_rate": 9.885842462293633e-06, + "loss": 0.009, + "step": 50310 + }, + { + "epoch": 0.32273684953164034, + "grad_norm": 0.2496260404586792, + "learning_rate": 9.88572351382919e-06, + "loss": 0.0049, + "step": 50320 + }, + { + "epoch": 0.32280098642542643, + "grad_norm": 0.22151890397071838, + "learning_rate": 9.885604504143138e-06, + "loss": 0.009, + "step": 50330 + }, + { + "epoch": 0.3228651233192125, + "grad_norm": 0.09284746646881104, + "learning_rate": 9.88548543323697e-06, + "loss": 0.0061, + "step": 50340 + }, + { + "epoch": 0.3229292602129986, + "grad_norm": 0.19882896542549133, + "learning_rate": 9.88536630111218e-06, + "loss": 0.0084, + "step": 50350 + }, + { + "epoch": 0.3229933971067847, + "grad_norm": 0.11681967228651047, + "learning_rate": 9.885247107770257e-06, + "loss": 0.0052, + "step": 50360 + }, + { + "epoch": 0.3230575340005708, + "grad_norm": 0.10497705638408661, + "learning_rate": 9.885127853212696e-06, + "loss": 0.0045, + "step": 50370 + }, + { + "epoch": 0.3231216708943569, + "grad_norm": 0.17021536827087402, + "learning_rate": 9.885008537440992e-06, + "loss": 0.0056, + "step": 50380 + }, + { + "epoch": 0.323185807788143, + "grad_norm": 0.3915821313858032, + "learning_rate": 9.884889160456642e-06, + "loss": 0.0046, + "step": 50390 + }, + { + "epoch": 0.32324994468192914, + "grad_norm": 0.26966342329978943, + "learning_rate": 9.884769722261136e-06, + "loss": 0.0053, + "step": 50400 + }, + { + "epoch": 0.32331408157571523, + "grad_norm": 0.3279496133327484, + "learning_rate": 9.884650222855976e-06, + "loss": 0.0038, + "step": 50410 + }, + { + "epoch": 0.3233782184695013, + "grad_norm": 0.1679442822933197, + "learning_rate": 9.884530662242658e-06, + "loss": 0.0043, + "step": 50420 + }, + { + "epoch": 0.3234423553632874, + "grad_norm": 0.07241009920835495, + "learning_rate": 9.884411040422679e-06, + "loss": 0.0037, + "step": 50430 + }, + { + "epoch": 0.3235064922570735, + "grad_norm": 0.20607556402683258, + "learning_rate": 9.884291357397539e-06, + "loss": 0.0045, + "step": 50440 + }, + { + "epoch": 0.3235706291508596, + "grad_norm": 0.24917933344841003, + "learning_rate": 9.884171613168738e-06, + "loss": 0.0034, + "step": 50450 + }, + { + "epoch": 0.3236347660446457, + "grad_norm": 0.2530268728733063, + "learning_rate": 9.884051807737776e-06, + "loss": 0.0028, + "step": 50460 + }, + { + "epoch": 0.3236989029384318, + "grad_norm": 0.1264554262161255, + "learning_rate": 9.883931941106154e-06, + "loss": 0.0071, + "step": 50470 + }, + { + "epoch": 0.3237630398322179, + "grad_norm": 0.32021564245224, + "learning_rate": 9.883812013275374e-06, + "loss": 0.0052, + "step": 50480 + }, + { + "epoch": 0.323827176726004, + "grad_norm": 0.03336715325713158, + "learning_rate": 9.88369202424694e-06, + "loss": 0.0028, + "step": 50490 + }, + { + "epoch": 0.32389131361979007, + "grad_norm": 0.1920616328716278, + "learning_rate": 9.883571974022355e-06, + "loss": 0.008, + "step": 50500 + }, + { + "epoch": 0.32395545051357616, + "grad_norm": 0.1073715016245842, + "learning_rate": 9.883451862603121e-06, + "loss": 0.0047, + "step": 50510 + }, + { + "epoch": 0.32401958740736225, + "grad_norm": 0.17998799681663513, + "learning_rate": 9.883331689990746e-06, + "loss": 0.0053, + "step": 50520 + }, + { + "epoch": 0.32408372430114835, + "grad_norm": 0.4126385748386383, + "learning_rate": 9.883211456186735e-06, + "loss": 0.0074, + "step": 50530 + }, + { + "epoch": 0.3241478611949345, + "grad_norm": 0.2791917026042938, + "learning_rate": 9.883091161192595e-06, + "loss": 0.0054, + "step": 50540 + }, + { + "epoch": 0.3242119980887206, + "grad_norm": 0.1871456801891327, + "learning_rate": 9.882970805009831e-06, + "loss": 0.0048, + "step": 50550 + }, + { + "epoch": 0.3242761349825067, + "grad_norm": 0.1261935979127884, + "learning_rate": 9.882850387639956e-06, + "loss": 0.006, + "step": 50560 + }, + { + "epoch": 0.3243402718762928, + "grad_norm": 0.25968706607818604, + "learning_rate": 9.882729909084472e-06, + "loss": 0.0055, + "step": 50570 + }, + { + "epoch": 0.32440440877007887, + "grad_norm": 0.12232595682144165, + "learning_rate": 9.882609369344895e-06, + "loss": 0.0038, + "step": 50580 + }, + { + "epoch": 0.32446854566386496, + "grad_norm": 0.25618013739585876, + "learning_rate": 9.882488768422733e-06, + "loss": 0.0071, + "step": 50590 + }, + { + "epoch": 0.32453268255765105, + "grad_norm": 0.0873841866850853, + "learning_rate": 9.882368106319497e-06, + "loss": 0.005, + "step": 50600 + }, + { + "epoch": 0.32459681945143715, + "grad_norm": 0.28166040778160095, + "learning_rate": 9.8822473830367e-06, + "loss": 0.0034, + "step": 50610 + }, + { + "epoch": 0.32466095634522324, + "grad_norm": 0.19365577399730682, + "learning_rate": 9.882126598575854e-06, + "loss": 0.0044, + "step": 50620 + }, + { + "epoch": 0.32472509323900933, + "grad_norm": 0.14441215991973877, + "learning_rate": 9.882005752938471e-06, + "loss": 0.004, + "step": 50630 + }, + { + "epoch": 0.3247892301327954, + "grad_norm": 0.2671840786933899, + "learning_rate": 9.881884846126069e-06, + "loss": 0.0055, + "step": 50640 + }, + { + "epoch": 0.3248533670265815, + "grad_norm": 0.5790324211120605, + "learning_rate": 9.881763878140157e-06, + "loss": 0.0052, + "step": 50650 + }, + { + "epoch": 0.3249175039203676, + "grad_norm": 0.03954846411943436, + "learning_rate": 9.881642848982258e-06, + "loss": 0.0028, + "step": 50660 + }, + { + "epoch": 0.3249816408141537, + "grad_norm": 0.10793976485729218, + "learning_rate": 9.881521758653885e-06, + "loss": 0.0056, + "step": 50670 + }, + { + "epoch": 0.32504577770793985, + "grad_norm": 0.06970980763435364, + "learning_rate": 9.881400607156556e-06, + "loss": 0.0071, + "step": 50680 + }, + { + "epoch": 0.32510991460172595, + "grad_norm": 0.29071202874183655, + "learning_rate": 9.881279394491787e-06, + "loss": 0.0074, + "step": 50690 + }, + { + "epoch": 0.32517405149551204, + "grad_norm": 0.30976402759552, + "learning_rate": 9.881158120661098e-06, + "loss": 0.0059, + "step": 50700 + }, + { + "epoch": 0.32523818838929813, + "grad_norm": 0.27598875761032104, + "learning_rate": 9.88103678566601e-06, + "loss": 0.0036, + "step": 50710 + }, + { + "epoch": 0.3253023252830842, + "grad_norm": 0.4413817524909973, + "learning_rate": 9.880915389508045e-06, + "loss": 0.0045, + "step": 50720 + }, + { + "epoch": 0.3253664621768703, + "grad_norm": 0.11329472064971924, + "learning_rate": 9.88079393218872e-06, + "loss": 0.0054, + "step": 50730 + }, + { + "epoch": 0.3254305990706564, + "grad_norm": 0.22125688195228577, + "learning_rate": 9.880672413709559e-06, + "loss": 0.0067, + "step": 50740 + }, + { + "epoch": 0.3254947359644425, + "grad_norm": 0.4026035964488983, + "learning_rate": 9.880550834072084e-06, + "loss": 0.0061, + "step": 50750 + }, + { + "epoch": 0.3255588728582286, + "grad_norm": 0.11307325959205627, + "learning_rate": 9.88042919327782e-06, + "loss": 0.0045, + "step": 50760 + }, + { + "epoch": 0.3256230097520147, + "grad_norm": 0.19422641396522522, + "learning_rate": 9.88030749132829e-06, + "loss": 0.0052, + "step": 50770 + }, + { + "epoch": 0.3256871466458008, + "grad_norm": 0.34293127059936523, + "learning_rate": 9.880185728225017e-06, + "loss": 0.0056, + "step": 50780 + }, + { + "epoch": 0.3257512835395869, + "grad_norm": 0.20531544089317322, + "learning_rate": 9.88006390396953e-06, + "loss": 0.0053, + "step": 50790 + }, + { + "epoch": 0.32581542043337297, + "grad_norm": 0.2079891413450241, + "learning_rate": 9.879942018563357e-06, + "loss": 0.0032, + "step": 50800 + }, + { + "epoch": 0.32587955732715906, + "grad_norm": 0.5488170981407166, + "learning_rate": 9.87982007200802e-06, + "loss": 0.0056, + "step": 50810 + }, + { + "epoch": 0.3259436942209452, + "grad_norm": 0.0822836309671402, + "learning_rate": 9.879698064305052e-06, + "loss": 0.0027, + "step": 50820 + }, + { + "epoch": 0.3260078311147313, + "grad_norm": 0.12823008000850677, + "learning_rate": 9.879575995455979e-06, + "loss": 0.0036, + "step": 50830 + }, + { + "epoch": 0.3260719680085174, + "grad_norm": 0.16132020950317383, + "learning_rate": 9.87945386546233e-06, + "loss": 0.0046, + "step": 50840 + }, + { + "epoch": 0.3261361049023035, + "grad_norm": 0.14670330286026, + "learning_rate": 9.879331674325638e-06, + "loss": 0.0046, + "step": 50850 + }, + { + "epoch": 0.3262002417960896, + "grad_norm": 0.10751719772815704, + "learning_rate": 9.879209422047434e-06, + "loss": 0.0056, + "step": 50860 + }, + { + "epoch": 0.3262643786898757, + "grad_norm": 0.3112606108188629, + "learning_rate": 9.87908710862925e-06, + "loss": 0.0047, + "step": 50870 + }, + { + "epoch": 0.32632851558366177, + "grad_norm": 0.2563963234424591, + "learning_rate": 9.878964734072614e-06, + "loss": 0.0056, + "step": 50880 + }, + { + "epoch": 0.32639265247744786, + "grad_norm": 0.4979173243045807, + "learning_rate": 9.878842298379063e-06, + "loss": 0.0068, + "step": 50890 + }, + { + "epoch": 0.32645678937123396, + "grad_norm": 0.08930937200784683, + "learning_rate": 9.878719801550132e-06, + "loss": 0.0034, + "step": 50900 + }, + { + "epoch": 0.32652092626502005, + "grad_norm": 0.14936961233615875, + "learning_rate": 9.878597243587356e-06, + "loss": 0.0059, + "step": 50910 + }, + { + "epoch": 0.32658506315880614, + "grad_norm": 0.1063743308186531, + "learning_rate": 9.87847462449227e-06, + "loss": 0.0041, + "step": 50920 + }, + { + "epoch": 0.32664920005259224, + "grad_norm": 0.2172681838274002, + "learning_rate": 9.878351944266408e-06, + "loss": 0.0063, + "step": 50930 + }, + { + "epoch": 0.32671333694637833, + "grad_norm": 0.27539241313934326, + "learning_rate": 9.878229202911312e-06, + "loss": 0.0055, + "step": 50940 + }, + { + "epoch": 0.3267774738401644, + "grad_norm": 0.10550977289676666, + "learning_rate": 9.878106400428518e-06, + "loss": 0.0045, + "step": 50950 + }, + { + "epoch": 0.32684161073395057, + "grad_norm": 0.1692620813846588, + "learning_rate": 9.877983536819563e-06, + "loss": 0.0036, + "step": 50960 + }, + { + "epoch": 0.32690574762773666, + "grad_norm": 0.18884654343128204, + "learning_rate": 9.877860612085987e-06, + "loss": 0.0053, + "step": 50970 + }, + { + "epoch": 0.32696988452152276, + "grad_norm": 0.225121408700943, + "learning_rate": 9.877737626229334e-06, + "loss": 0.0042, + "step": 50980 + }, + { + "epoch": 0.32703402141530885, + "grad_norm": 0.0583873875439167, + "learning_rate": 9.877614579251142e-06, + "loss": 0.0029, + "step": 50990 + }, + { + "epoch": 0.32709815830909494, + "grad_norm": 0.26959750056266785, + "learning_rate": 9.877491471152953e-06, + "loss": 0.0034, + "step": 51000 + }, + { + "epoch": 0.32716229520288104, + "grad_norm": 0.31422290205955505, + "learning_rate": 9.877368301936309e-06, + "loss": 0.0054, + "step": 51010 + }, + { + "epoch": 0.32722643209666713, + "grad_norm": 0.4650602340698242, + "learning_rate": 9.877245071602755e-06, + "loss": 0.0073, + "step": 51020 + }, + { + "epoch": 0.3272905689904532, + "grad_norm": 0.1505156308412552, + "learning_rate": 9.877121780153834e-06, + "loss": 0.0056, + "step": 51030 + }, + { + "epoch": 0.3273547058842393, + "grad_norm": 0.0990140438079834, + "learning_rate": 9.876998427591092e-06, + "loss": 0.0048, + "step": 51040 + }, + { + "epoch": 0.3274188427780254, + "grad_norm": 0.14473867416381836, + "learning_rate": 9.876875013916076e-06, + "loss": 0.0054, + "step": 51050 + }, + { + "epoch": 0.3274829796718115, + "grad_norm": 0.042040273547172546, + "learning_rate": 9.876751539130328e-06, + "loss": 0.0073, + "step": 51060 + }, + { + "epoch": 0.3275471165655976, + "grad_norm": 0.3856167197227478, + "learning_rate": 9.876628003235398e-06, + "loss": 0.006, + "step": 51070 + }, + { + "epoch": 0.3276112534593837, + "grad_norm": 0.1594998985528946, + "learning_rate": 9.876504406232834e-06, + "loss": 0.0048, + "step": 51080 + }, + { + "epoch": 0.3276753903531698, + "grad_norm": 0.11787860840559006, + "learning_rate": 9.876380748124186e-06, + "loss": 0.0048, + "step": 51090 + }, + { + "epoch": 0.3277395272469559, + "grad_norm": 0.3186417520046234, + "learning_rate": 9.876257028911001e-06, + "loss": 0.0042, + "step": 51100 + }, + { + "epoch": 0.327803664140742, + "grad_norm": 0.15282252430915833, + "learning_rate": 9.876133248594831e-06, + "loss": 0.0071, + "step": 51110 + }, + { + "epoch": 0.3278678010345281, + "grad_norm": 0.7064988017082214, + "learning_rate": 9.876009407177226e-06, + "loss": 0.0033, + "step": 51120 + }, + { + "epoch": 0.3279319379283142, + "grad_norm": 0.4531448185443878, + "learning_rate": 9.875885504659738e-06, + "loss": 0.0055, + "step": 51130 + }, + { + "epoch": 0.3279960748221003, + "grad_norm": 0.2045564502477646, + "learning_rate": 9.875761541043921e-06, + "loss": 0.0033, + "step": 51140 + }, + { + "epoch": 0.3280602117158864, + "grad_norm": 0.43037521839141846, + "learning_rate": 9.875637516331327e-06, + "loss": 0.0064, + "step": 51150 + }, + { + "epoch": 0.3281243486096725, + "grad_norm": 0.3135991096496582, + "learning_rate": 9.875513430523509e-06, + "loss": 0.0047, + "step": 51160 + }, + { + "epoch": 0.3281884855034586, + "grad_norm": 0.19914768636226654, + "learning_rate": 9.875389283622025e-06, + "loss": 0.0097, + "step": 51170 + }, + { + "epoch": 0.3282526223972447, + "grad_norm": 0.23053988814353943, + "learning_rate": 9.875265075628429e-06, + "loss": 0.0036, + "step": 51180 + }, + { + "epoch": 0.32831675929103077, + "grad_norm": 0.15195836126804352, + "learning_rate": 9.875140806544275e-06, + "loss": 0.0036, + "step": 51190 + }, + { + "epoch": 0.32838089618481686, + "grad_norm": 0.059653397649526596, + "learning_rate": 9.875016476371124e-06, + "loss": 0.0054, + "step": 51200 + }, + { + "epoch": 0.32844503307860295, + "grad_norm": 0.062237270176410675, + "learning_rate": 9.874892085110532e-06, + "loss": 0.0056, + "step": 51210 + }, + { + "epoch": 0.32850916997238905, + "grad_norm": 0.549043595790863, + "learning_rate": 9.87476763276406e-06, + "loss": 0.0041, + "step": 51220 + }, + { + "epoch": 0.32857330686617514, + "grad_norm": 0.13534654676914215, + "learning_rate": 9.874643119333262e-06, + "loss": 0.0047, + "step": 51230 + }, + { + "epoch": 0.32863744375996123, + "grad_norm": 0.31206753849983215, + "learning_rate": 9.874518544819704e-06, + "loss": 0.0081, + "step": 51240 + }, + { + "epoch": 0.3287015806537474, + "grad_norm": 0.3598429560661316, + "learning_rate": 9.874393909224946e-06, + "loss": 0.0043, + "step": 51250 + }, + { + "epoch": 0.3287657175475335, + "grad_norm": 0.2721366286277771, + "learning_rate": 9.874269212550546e-06, + "loss": 0.0044, + "step": 51260 + }, + { + "epoch": 0.32882985444131957, + "grad_norm": 0.0643984004855156, + "learning_rate": 9.87414445479807e-06, + "loss": 0.0034, + "step": 51270 + }, + { + "epoch": 0.32889399133510566, + "grad_norm": 0.4401140511035919, + "learning_rate": 9.874019635969084e-06, + "loss": 0.0106, + "step": 51280 + }, + { + "epoch": 0.32895812822889176, + "grad_norm": 0.3248900771141052, + "learning_rate": 9.873894756065145e-06, + "loss": 0.005, + "step": 51290 + }, + { + "epoch": 0.32902226512267785, + "grad_norm": 0.19265004992485046, + "learning_rate": 9.873769815087822e-06, + "loss": 0.0027, + "step": 51300 + }, + { + "epoch": 0.32908640201646394, + "grad_norm": 0.14860068261623383, + "learning_rate": 9.87364481303868e-06, + "loss": 0.0039, + "step": 51310 + }, + { + "epoch": 0.32915053891025003, + "grad_norm": 0.46626561880111694, + "learning_rate": 9.873519749919285e-06, + "loss": 0.01, + "step": 51320 + }, + { + "epoch": 0.3292146758040361, + "grad_norm": 0.11976033449172974, + "learning_rate": 9.873394625731205e-06, + "loss": 0.0044, + "step": 51330 + }, + { + "epoch": 0.3292788126978222, + "grad_norm": 0.09584904462099075, + "learning_rate": 9.873269440476007e-06, + "loss": 0.0049, + "step": 51340 + }, + { + "epoch": 0.3293429495916083, + "grad_norm": 0.19121158123016357, + "learning_rate": 9.87314419415526e-06, + "loss": 0.0164, + "step": 51350 + }, + { + "epoch": 0.3294070864853944, + "grad_norm": 0.16358615458011627, + "learning_rate": 9.873018886770533e-06, + "loss": 0.0049, + "step": 51360 + }, + { + "epoch": 0.3294712233791805, + "grad_norm": 0.2830831706523895, + "learning_rate": 9.872893518323398e-06, + "loss": 0.0058, + "step": 51370 + }, + { + "epoch": 0.3295353602729666, + "grad_norm": 0.07893689721822739, + "learning_rate": 9.872768088815425e-06, + "loss": 0.0042, + "step": 51380 + }, + { + "epoch": 0.32959949716675274, + "grad_norm": 0.2660653591156006, + "learning_rate": 9.872642598248184e-06, + "loss": 0.0047, + "step": 51390 + }, + { + "epoch": 0.32966363406053883, + "grad_norm": 0.08735953271389008, + "learning_rate": 9.872517046623249e-06, + "loss": 0.0031, + "step": 51400 + }, + { + "epoch": 0.32972777095432493, + "grad_norm": 0.13710825145244598, + "learning_rate": 9.872391433942191e-06, + "loss": 0.0045, + "step": 51410 + }, + { + "epoch": 0.329791907848111, + "grad_norm": 0.42409124970436096, + "learning_rate": 9.87226576020659e-06, + "loss": 0.0042, + "step": 51420 + }, + { + "epoch": 0.3298560447418971, + "grad_norm": 0.18673092126846313, + "learning_rate": 9.872140025418015e-06, + "loss": 0.0041, + "step": 51430 + }, + { + "epoch": 0.3299201816356832, + "grad_norm": 0.2237669676542282, + "learning_rate": 9.872014229578044e-06, + "loss": 0.0043, + "step": 51440 + }, + { + "epoch": 0.3299843185294693, + "grad_norm": 0.2903503477573395, + "learning_rate": 9.871888372688252e-06, + "loss": 0.0029, + "step": 51450 + }, + { + "epoch": 0.3300484554232554, + "grad_norm": 0.35877344012260437, + "learning_rate": 9.871762454750218e-06, + "loss": 0.004, + "step": 51460 + }, + { + "epoch": 0.3301125923170415, + "grad_norm": 0.20692265033721924, + "learning_rate": 9.871636475765517e-06, + "loss": 0.0055, + "step": 51470 + }, + { + "epoch": 0.3301767292108276, + "grad_norm": 0.19338805973529816, + "learning_rate": 9.87151043573573e-06, + "loss": 0.006, + "step": 51480 + }, + { + "epoch": 0.3302408661046137, + "grad_norm": 0.2326938956975937, + "learning_rate": 9.871384334662436e-06, + "loss": 0.0042, + "step": 51490 + }, + { + "epoch": 0.33030500299839977, + "grad_norm": 0.23102594912052155, + "learning_rate": 9.871258172547215e-06, + "loss": 0.0038, + "step": 51500 + }, + { + "epoch": 0.33036913989218586, + "grad_norm": 0.2446339726448059, + "learning_rate": 9.871131949391645e-06, + "loss": 0.0041, + "step": 51510 + }, + { + "epoch": 0.33043327678597195, + "grad_norm": 0.2786913812160492, + "learning_rate": 9.871005665197313e-06, + "loss": 0.0044, + "step": 51520 + }, + { + "epoch": 0.3304974136797581, + "grad_norm": 0.12254165858030319, + "learning_rate": 9.870879319965798e-06, + "loss": 0.0046, + "step": 51530 + }, + { + "epoch": 0.3305615505735442, + "grad_norm": 0.25559279322624207, + "learning_rate": 9.870752913698685e-06, + "loss": 0.0079, + "step": 51540 + }, + { + "epoch": 0.3306256874673303, + "grad_norm": 0.3081286549568176, + "learning_rate": 9.870626446397555e-06, + "loss": 0.0055, + "step": 51550 + }, + { + "epoch": 0.3306898243611164, + "grad_norm": 0.17108890414237976, + "learning_rate": 9.870499918063996e-06, + "loss": 0.0057, + "step": 51560 + }, + { + "epoch": 0.3307539612549025, + "grad_norm": 0.45415928959846497, + "learning_rate": 9.870373328699591e-06, + "loss": 0.0044, + "step": 51570 + }, + { + "epoch": 0.33081809814868857, + "grad_norm": 0.49789750576019287, + "learning_rate": 9.87024667830593e-06, + "loss": 0.0039, + "step": 51580 + }, + { + "epoch": 0.33088223504247466, + "grad_norm": 0.2867346704006195, + "learning_rate": 9.870119966884595e-06, + "loss": 0.0053, + "step": 51590 + }, + { + "epoch": 0.33094637193626075, + "grad_norm": 0.22337113320827484, + "learning_rate": 9.869993194437177e-06, + "loss": 0.0051, + "step": 51600 + }, + { + "epoch": 0.33101050883004685, + "grad_norm": 0.18967677652835846, + "learning_rate": 9.869866360965263e-06, + "loss": 0.0032, + "step": 51610 + }, + { + "epoch": 0.33107464572383294, + "grad_norm": 0.30326980352401733, + "learning_rate": 9.869739466470444e-06, + "loss": 0.0037, + "step": 51620 + }, + { + "epoch": 0.33113878261761903, + "grad_norm": 0.13057635724544525, + "learning_rate": 9.86961251095431e-06, + "loss": 0.0045, + "step": 51630 + }, + { + "epoch": 0.3312029195114051, + "grad_norm": 0.23110616207122803, + "learning_rate": 9.869485494418449e-06, + "loss": 0.0062, + "step": 51640 + }, + { + "epoch": 0.3312670564051912, + "grad_norm": 0.22782118618488312, + "learning_rate": 9.869358416864454e-06, + "loss": 0.0052, + "step": 51650 + }, + { + "epoch": 0.3313311932989773, + "grad_norm": 0.21024642884731293, + "learning_rate": 9.86923127829392e-06, + "loss": 0.0049, + "step": 51660 + }, + { + "epoch": 0.33139533019276346, + "grad_norm": 0.13623197376728058, + "learning_rate": 9.869104078708439e-06, + "loss": 0.0055, + "step": 51670 + }, + { + "epoch": 0.33145946708654955, + "grad_norm": 0.16198650002479553, + "learning_rate": 9.868976818109602e-06, + "loss": 0.0074, + "step": 51680 + }, + { + "epoch": 0.33152360398033565, + "grad_norm": 0.24429954588413239, + "learning_rate": 9.868849496499004e-06, + "loss": 0.0048, + "step": 51690 + }, + { + "epoch": 0.33158774087412174, + "grad_norm": 0.24430863559246063, + "learning_rate": 9.868722113878245e-06, + "loss": 0.0048, + "step": 51700 + }, + { + "epoch": 0.33165187776790783, + "grad_norm": 0.1951116919517517, + "learning_rate": 9.868594670248917e-06, + "loss": 0.0048, + "step": 51710 + }, + { + "epoch": 0.3317160146616939, + "grad_norm": 0.03462325781583786, + "learning_rate": 9.868467165612619e-06, + "loss": 0.0044, + "step": 51720 + }, + { + "epoch": 0.33178015155548, + "grad_norm": 0.18571272492408752, + "learning_rate": 9.868339599970948e-06, + "loss": 0.0052, + "step": 51730 + }, + { + "epoch": 0.3318442884492661, + "grad_norm": 0.2798001766204834, + "learning_rate": 9.8682119733255e-06, + "loss": 0.0042, + "step": 51740 + }, + { + "epoch": 0.3319084253430522, + "grad_norm": 0.34794336557388306, + "learning_rate": 9.86808428567788e-06, + "loss": 0.0066, + "step": 51750 + }, + { + "epoch": 0.3319725622368383, + "grad_norm": 0.06256379932165146, + "learning_rate": 9.867956537029684e-06, + "loss": 0.0054, + "step": 51760 + }, + { + "epoch": 0.3320366991306244, + "grad_norm": 0.17388401925563812, + "learning_rate": 9.867828727382514e-06, + "loss": 0.0047, + "step": 51770 + }, + { + "epoch": 0.3321008360244105, + "grad_norm": 0.15907210111618042, + "learning_rate": 9.86770085673797e-06, + "loss": 0.0034, + "step": 51780 + }, + { + "epoch": 0.3321649729181966, + "grad_norm": 0.12382688373327255, + "learning_rate": 9.867572925097655e-06, + "loss": 0.0046, + "step": 51790 + }, + { + "epoch": 0.33222910981198267, + "grad_norm": 0.37066325545310974, + "learning_rate": 9.867444932463173e-06, + "loss": 0.0057, + "step": 51800 + }, + { + "epoch": 0.3322932467057688, + "grad_norm": 0.07459404319524765, + "learning_rate": 9.867316878836126e-06, + "loss": 0.0042, + "step": 51810 + }, + { + "epoch": 0.3323573835995549, + "grad_norm": 0.2496977597475052, + "learning_rate": 9.86718876421812e-06, + "loss": 0.0068, + "step": 51820 + }, + { + "epoch": 0.332421520493341, + "grad_norm": 0.1255044937133789, + "learning_rate": 9.867060588610762e-06, + "loss": 0.0058, + "step": 51830 + }, + { + "epoch": 0.3324856573871271, + "grad_norm": 0.16789741814136505, + "learning_rate": 9.866932352015657e-06, + "loss": 0.004, + "step": 51840 + }, + { + "epoch": 0.3325497942809132, + "grad_norm": 0.125722274184227, + "learning_rate": 9.866804054434409e-06, + "loss": 0.0037, + "step": 51850 + }, + { + "epoch": 0.3326139311746993, + "grad_norm": 0.11827981472015381, + "learning_rate": 9.86667569586863e-06, + "loss": 0.0063, + "step": 51860 + }, + { + "epoch": 0.3326780680684854, + "grad_norm": 0.29303354024887085, + "learning_rate": 9.866547276319925e-06, + "loss": 0.0072, + "step": 51870 + }, + { + "epoch": 0.33274220496227147, + "grad_norm": 0.35147103667259216, + "learning_rate": 9.866418795789905e-06, + "loss": 0.004, + "step": 51880 + }, + { + "epoch": 0.33280634185605756, + "grad_norm": 0.3894512355327606, + "learning_rate": 9.86629025428018e-06, + "loss": 0.0031, + "step": 51890 + }, + { + "epoch": 0.33287047874984366, + "grad_norm": 0.17828503251075745, + "learning_rate": 9.866161651792358e-06, + "loss": 0.0042, + "step": 51900 + }, + { + "epoch": 0.33293461564362975, + "grad_norm": 0.13376423716545105, + "learning_rate": 9.866032988328056e-06, + "loss": 0.004, + "step": 51910 + }, + { + "epoch": 0.33299875253741584, + "grad_norm": 0.29704684019088745, + "learning_rate": 9.865904263888882e-06, + "loss": 0.0059, + "step": 51920 + }, + { + "epoch": 0.33306288943120194, + "grad_norm": 0.16345566511154175, + "learning_rate": 9.865775478476447e-06, + "loss": 0.0054, + "step": 51930 + }, + { + "epoch": 0.33312702632498803, + "grad_norm": 0.17470292747020721, + "learning_rate": 9.86564663209237e-06, + "loss": 0.0056, + "step": 51940 + }, + { + "epoch": 0.3331911632187742, + "grad_norm": 0.14169193804264069, + "learning_rate": 9.865517724738263e-06, + "loss": 0.0044, + "step": 51950 + }, + { + "epoch": 0.33325530011256027, + "grad_norm": 0.1908574253320694, + "learning_rate": 9.865388756415744e-06, + "loss": 0.0037, + "step": 51960 + }, + { + "epoch": 0.33331943700634636, + "grad_norm": 0.13204747438430786, + "learning_rate": 9.865259727126424e-06, + "loss": 0.0023, + "step": 51970 + }, + { + "epoch": 0.33338357390013246, + "grad_norm": 0.13022635877132416, + "learning_rate": 9.865130636871922e-06, + "loss": 0.0046, + "step": 51980 + }, + { + "epoch": 0.33344771079391855, + "grad_norm": 0.30808025598526, + "learning_rate": 9.865001485653858e-06, + "loss": 0.0043, + "step": 51990 + }, + { + "epoch": 0.33351184768770464, + "grad_norm": 0.26063185930252075, + "learning_rate": 9.864872273473846e-06, + "loss": 0.0045, + "step": 52000 + }, + { + "epoch": 0.33357598458149074, + "grad_norm": 0.1141660287976265, + "learning_rate": 9.86474300033351e-06, + "loss": 0.0044, + "step": 52010 + }, + { + "epoch": 0.33364012147527683, + "grad_norm": 0.07287078350782394, + "learning_rate": 9.864613666234465e-06, + "loss": 0.0041, + "step": 52020 + }, + { + "epoch": 0.3337042583690629, + "grad_norm": 0.0985855832695961, + "learning_rate": 9.864484271178336e-06, + "loss": 0.0055, + "step": 52030 + }, + { + "epoch": 0.333768395262849, + "grad_norm": 0.09198424965143204, + "learning_rate": 9.864354815166744e-06, + "loss": 0.0048, + "step": 52040 + }, + { + "epoch": 0.3338325321566351, + "grad_norm": 0.12575598061084747, + "learning_rate": 9.864225298201306e-06, + "loss": 0.0041, + "step": 52050 + }, + { + "epoch": 0.3338966690504212, + "grad_norm": 0.4537132978439331, + "learning_rate": 9.864095720283651e-06, + "loss": 0.0071, + "step": 52060 + }, + { + "epoch": 0.3339608059442073, + "grad_norm": 0.21206165850162506, + "learning_rate": 9.863966081415399e-06, + "loss": 0.0023, + "step": 52070 + }, + { + "epoch": 0.3340249428379934, + "grad_norm": 0.19575679302215576, + "learning_rate": 9.863836381598176e-06, + "loss": 0.0029, + "step": 52080 + }, + { + "epoch": 0.33408907973177954, + "grad_norm": 0.06109379231929779, + "learning_rate": 9.863706620833608e-06, + "loss": 0.0037, + "step": 52090 + }, + { + "epoch": 0.33415321662556563, + "grad_norm": 0.18857845664024353, + "learning_rate": 9.86357679912332e-06, + "loss": 0.0039, + "step": 52100 + }, + { + "epoch": 0.3342173535193517, + "grad_norm": 0.2904978096485138, + "learning_rate": 9.863446916468938e-06, + "loss": 0.0064, + "step": 52110 + }, + { + "epoch": 0.3342814904131378, + "grad_norm": 0.3255593180656433, + "learning_rate": 9.86331697287209e-06, + "loss": 0.005, + "step": 52120 + }, + { + "epoch": 0.3343456273069239, + "grad_norm": 0.19768422842025757, + "learning_rate": 9.863186968334404e-06, + "loss": 0.0037, + "step": 52130 + }, + { + "epoch": 0.33440976420071, + "grad_norm": 0.06513660401105881, + "learning_rate": 9.863056902857511e-06, + "loss": 0.0038, + "step": 52140 + }, + { + "epoch": 0.3344739010944961, + "grad_norm": 0.408059298992157, + "learning_rate": 9.862926776443039e-06, + "loss": 0.0077, + "step": 52150 + }, + { + "epoch": 0.3345380379882822, + "grad_norm": 0.17330044507980347, + "learning_rate": 9.86279658909262e-06, + "loss": 0.0032, + "step": 52160 + }, + { + "epoch": 0.3346021748820683, + "grad_norm": 0.17973282933235168, + "learning_rate": 9.862666340807882e-06, + "loss": 0.0065, + "step": 52170 + }, + { + "epoch": 0.3346663117758544, + "grad_norm": 0.17612335085868835, + "learning_rate": 9.862536031590462e-06, + "loss": 0.0059, + "step": 52180 + }, + { + "epoch": 0.33473044866964047, + "grad_norm": 0.19796328246593475, + "learning_rate": 9.862405661441988e-06, + "loss": 0.0043, + "step": 52190 + }, + { + "epoch": 0.33479458556342656, + "grad_norm": 0.1836937516927719, + "learning_rate": 9.862275230364099e-06, + "loss": 0.0049, + "step": 52200 + }, + { + "epoch": 0.33485872245721265, + "grad_norm": 0.35114187002182007, + "learning_rate": 9.862144738358424e-06, + "loss": 0.0059, + "step": 52210 + }, + { + "epoch": 0.33492285935099875, + "grad_norm": 0.3235044777393341, + "learning_rate": 9.862014185426601e-06, + "loss": 0.004, + "step": 52220 + }, + { + "epoch": 0.3349869962447849, + "grad_norm": 0.25618669390678406, + "learning_rate": 9.861883571570265e-06, + "loss": 0.0039, + "step": 52230 + }, + { + "epoch": 0.335051133138571, + "grad_norm": 0.16893281042575836, + "learning_rate": 9.861752896791052e-06, + "loss": 0.0056, + "step": 52240 + }, + { + "epoch": 0.3351152700323571, + "grad_norm": 0.05286022275686264, + "learning_rate": 9.861622161090602e-06, + "loss": 0.007, + "step": 52250 + }, + { + "epoch": 0.3351794069261432, + "grad_norm": 0.3475116491317749, + "learning_rate": 9.861491364470553e-06, + "loss": 0.0046, + "step": 52260 + }, + { + "epoch": 0.33524354381992927, + "grad_norm": 0.021251840516924858, + "learning_rate": 9.861360506932542e-06, + "loss": 0.0031, + "step": 52270 + }, + { + "epoch": 0.33530768071371536, + "grad_norm": 0.05894012749195099, + "learning_rate": 9.861229588478208e-06, + "loss": 0.0027, + "step": 52280 + }, + { + "epoch": 0.33537181760750145, + "grad_norm": 0.32902398705482483, + "learning_rate": 9.861098609109194e-06, + "loss": 0.0033, + "step": 52290 + }, + { + "epoch": 0.33543595450128755, + "grad_norm": 0.14335854351520538, + "learning_rate": 9.86096756882714e-06, + "loss": 0.0036, + "step": 52300 + }, + { + "epoch": 0.33550009139507364, + "grad_norm": 0.13611845672130585, + "learning_rate": 9.86083646763369e-06, + "loss": 0.0036, + "step": 52310 + }, + { + "epoch": 0.33556422828885973, + "grad_norm": 0.13622123003005981, + "learning_rate": 9.860705305530482e-06, + "loss": 0.0039, + "step": 52320 + }, + { + "epoch": 0.3356283651826458, + "grad_norm": 0.13387630879878998, + "learning_rate": 9.860574082519165e-06, + "loss": 0.0043, + "step": 52330 + }, + { + "epoch": 0.3356925020764319, + "grad_norm": 0.7947002649307251, + "learning_rate": 9.86044279860138e-06, + "loss": 0.0046, + "step": 52340 + }, + { + "epoch": 0.335756638970218, + "grad_norm": 0.23654408752918243, + "learning_rate": 9.860311453778773e-06, + "loss": 0.0042, + "step": 52350 + }, + { + "epoch": 0.3358207758640041, + "grad_norm": 0.1772829294204712, + "learning_rate": 9.86018004805299e-06, + "loss": 0.0043, + "step": 52360 + }, + { + "epoch": 0.33588491275779025, + "grad_norm": 0.5392045378684998, + "learning_rate": 9.860048581425679e-06, + "loss": 0.0036, + "step": 52370 + }, + { + "epoch": 0.33594904965157635, + "grad_norm": 0.13509030640125275, + "learning_rate": 9.859917053898485e-06, + "loss": 0.0061, + "step": 52380 + }, + { + "epoch": 0.33601318654536244, + "grad_norm": 0.11849337071180344, + "learning_rate": 9.859785465473055e-06, + "loss": 0.0073, + "step": 52390 + }, + { + "epoch": 0.33607732343914853, + "grad_norm": 0.42279133200645447, + "learning_rate": 9.859653816151042e-06, + "loss": 0.0054, + "step": 52400 + }, + { + "epoch": 0.3361414603329346, + "grad_norm": 0.10719570517539978, + "learning_rate": 9.859522105934092e-06, + "loss": 0.004, + "step": 52410 + }, + { + "epoch": 0.3362055972267207, + "grad_norm": 0.20735955238342285, + "learning_rate": 9.859390334823859e-06, + "loss": 0.004, + "step": 52420 + }, + { + "epoch": 0.3362697341205068, + "grad_norm": 0.19455531239509583, + "learning_rate": 9.85925850282199e-06, + "loss": 0.0048, + "step": 52430 + }, + { + "epoch": 0.3363338710142929, + "grad_norm": 0.36882632970809937, + "learning_rate": 9.859126609930141e-06, + "loss": 0.0046, + "step": 52440 + }, + { + "epoch": 0.336398007908079, + "grad_norm": 0.0744689404964447, + "learning_rate": 9.858994656149961e-06, + "loss": 0.0059, + "step": 52450 + }, + { + "epoch": 0.3364621448018651, + "grad_norm": 0.18131551146507263, + "learning_rate": 9.858862641483107e-06, + "loss": 0.0043, + "step": 52460 + }, + { + "epoch": 0.3365262816956512, + "grad_norm": 0.12779557704925537, + "learning_rate": 9.85873056593123e-06, + "loss": 0.0019, + "step": 52470 + }, + { + "epoch": 0.3365904185894373, + "grad_norm": 0.21120983362197876, + "learning_rate": 9.858598429495986e-06, + "loss": 0.0047, + "step": 52480 + }, + { + "epoch": 0.33665455548322337, + "grad_norm": 0.36974161863327026, + "learning_rate": 9.858466232179033e-06, + "loss": 0.0028, + "step": 52490 + }, + { + "epoch": 0.33671869237700947, + "grad_norm": 0.10227707773447037, + "learning_rate": 9.858333973982026e-06, + "loss": 0.0032, + "step": 52500 + }, + { + "epoch": 0.33678282927079556, + "grad_norm": 0.10719114542007446, + "learning_rate": 9.858201654906621e-06, + "loss": 0.0063, + "step": 52510 + }, + { + "epoch": 0.3368469661645817, + "grad_norm": 0.22539356350898743, + "learning_rate": 9.858069274954479e-06, + "loss": 0.0049, + "step": 52520 + }, + { + "epoch": 0.3369111030583678, + "grad_norm": 0.28657108545303345, + "learning_rate": 9.857936834127255e-06, + "loss": 0.0043, + "step": 52530 + }, + { + "epoch": 0.3369752399521539, + "grad_norm": 0.14741523563861847, + "learning_rate": 9.85780433242661e-06, + "loss": 0.0054, + "step": 52540 + }, + { + "epoch": 0.33703937684594, + "grad_norm": 0.18382100760936737, + "learning_rate": 9.857671769854207e-06, + "loss": 0.0058, + "step": 52550 + }, + { + "epoch": 0.3371035137397261, + "grad_norm": 0.1844129115343094, + "learning_rate": 9.857539146411703e-06, + "loss": 0.004, + "step": 52560 + }, + { + "epoch": 0.3371676506335122, + "grad_norm": 0.3120049238204956, + "learning_rate": 9.857406462100764e-06, + "loss": 0.0053, + "step": 52570 + }, + { + "epoch": 0.33723178752729827, + "grad_norm": 0.13739828765392303, + "learning_rate": 9.85727371692305e-06, + "loss": 0.0063, + "step": 52580 + }, + { + "epoch": 0.33729592442108436, + "grad_norm": 0.13995586335659027, + "learning_rate": 9.857140910880223e-06, + "loss": 0.004, + "step": 52590 + }, + { + "epoch": 0.33736006131487045, + "grad_norm": 0.27522650361061096, + "learning_rate": 9.85700804397395e-06, + "loss": 0.0054, + "step": 52600 + }, + { + "epoch": 0.33742419820865655, + "grad_norm": 0.2531697750091553, + "learning_rate": 9.856875116205895e-06, + "loss": 0.0052, + "step": 52610 + }, + { + "epoch": 0.33748833510244264, + "grad_norm": 0.08868460357189178, + "learning_rate": 9.856742127577726e-06, + "loss": 0.0052, + "step": 52620 + }, + { + "epoch": 0.33755247199622873, + "grad_norm": 0.13962849974632263, + "learning_rate": 9.856609078091104e-06, + "loss": 0.0047, + "step": 52630 + }, + { + "epoch": 0.3376166088900148, + "grad_norm": 0.2872208058834076, + "learning_rate": 9.8564759677477e-06, + "loss": 0.0059, + "step": 52640 + }, + { + "epoch": 0.3376807457838009, + "grad_norm": 0.22213566303253174, + "learning_rate": 9.856342796549181e-06, + "loss": 0.0042, + "step": 52650 + }, + { + "epoch": 0.33774488267758707, + "grad_norm": 0.3229414224624634, + "learning_rate": 9.856209564497217e-06, + "loss": 0.0048, + "step": 52660 + }, + { + "epoch": 0.33780901957137316, + "grad_norm": 0.17573681473731995, + "learning_rate": 9.856076271593476e-06, + "loss": 0.0036, + "step": 52670 + }, + { + "epoch": 0.33787315646515925, + "grad_norm": 0.09097541868686676, + "learning_rate": 9.85594291783963e-06, + "loss": 0.0038, + "step": 52680 + }, + { + "epoch": 0.33793729335894535, + "grad_norm": 0.07667984813451767, + "learning_rate": 9.855809503237345e-06, + "loss": 0.0027, + "step": 52690 + }, + { + "epoch": 0.33800143025273144, + "grad_norm": 0.14468225836753845, + "learning_rate": 9.8556760277883e-06, + "loss": 0.0033, + "step": 52700 + }, + { + "epoch": 0.33806556714651753, + "grad_norm": 0.1518298238515854, + "learning_rate": 9.855542491494163e-06, + "loss": 0.0081, + "step": 52710 + }, + { + "epoch": 0.3381297040403036, + "grad_norm": 0.13446344435214996, + "learning_rate": 9.855408894356608e-06, + "loss": 0.0035, + "step": 52720 + }, + { + "epoch": 0.3381938409340897, + "grad_norm": 0.279994934797287, + "learning_rate": 9.85527523637731e-06, + "loss": 0.007, + "step": 52730 + }, + { + "epoch": 0.3382579778278758, + "grad_norm": 0.12073291093111038, + "learning_rate": 9.855141517557944e-06, + "loss": 0.0037, + "step": 52740 + }, + { + "epoch": 0.3383221147216619, + "grad_norm": 0.11250226199626923, + "learning_rate": 9.855007737900184e-06, + "loss": 0.0035, + "step": 52750 + }, + { + "epoch": 0.338386251615448, + "grad_norm": 0.38413527607917786, + "learning_rate": 9.854873897405708e-06, + "loss": 0.0067, + "step": 52760 + }, + { + "epoch": 0.3384503885092341, + "grad_norm": 0.14131279289722443, + "learning_rate": 9.85473999607619e-06, + "loss": 0.0046, + "step": 52770 + }, + { + "epoch": 0.3385145254030202, + "grad_norm": 0.23225539922714233, + "learning_rate": 9.854606033913312e-06, + "loss": 0.0036, + "step": 52780 + }, + { + "epoch": 0.3385786622968063, + "grad_norm": 0.15460790693759918, + "learning_rate": 9.854472010918751e-06, + "loss": 0.0131, + "step": 52790 + }, + { + "epoch": 0.3386427991905924, + "grad_norm": 0.1357378214597702, + "learning_rate": 9.854337927094186e-06, + "loss": 0.004, + "step": 52800 + }, + { + "epoch": 0.3387069360843785, + "grad_norm": 0.11922430992126465, + "learning_rate": 9.854203782441299e-06, + "loss": 0.0097, + "step": 52810 + }, + { + "epoch": 0.3387710729781646, + "grad_norm": 0.35543474555015564, + "learning_rate": 9.854069576961767e-06, + "loss": 0.0051, + "step": 52820 + }, + { + "epoch": 0.3388352098719507, + "grad_norm": 0.125699982047081, + "learning_rate": 9.853935310657275e-06, + "loss": 0.0048, + "step": 52830 + }, + { + "epoch": 0.3388993467657368, + "grad_norm": 0.0511971153318882, + "learning_rate": 9.853800983529503e-06, + "loss": 0.0032, + "step": 52840 + }, + { + "epoch": 0.3389634836595229, + "grad_norm": 0.20448780059814453, + "learning_rate": 9.853666595580138e-06, + "loss": 0.0057, + "step": 52850 + }, + { + "epoch": 0.339027620553309, + "grad_norm": 0.15460821986198425, + "learning_rate": 9.85353214681086e-06, + "loss": 0.0034, + "step": 52860 + }, + { + "epoch": 0.3390917574470951, + "grad_norm": 0.26127907633781433, + "learning_rate": 9.853397637223357e-06, + "loss": 0.0036, + "step": 52870 + }, + { + "epoch": 0.33915589434088117, + "grad_norm": 0.16217119991779327, + "learning_rate": 9.853263066819312e-06, + "loss": 0.0051, + "step": 52880 + }, + { + "epoch": 0.33922003123466726, + "grad_norm": 0.1911216527223587, + "learning_rate": 9.853128435600412e-06, + "loss": 0.0048, + "step": 52890 + }, + { + "epoch": 0.33928416812845336, + "grad_norm": 0.21904310584068298, + "learning_rate": 9.852993743568345e-06, + "loss": 0.0081, + "step": 52900 + }, + { + "epoch": 0.33934830502223945, + "grad_norm": 0.21412821114063263, + "learning_rate": 9.852858990724796e-06, + "loss": 0.0038, + "step": 52910 + }, + { + "epoch": 0.33941244191602554, + "grad_norm": 0.198672816157341, + "learning_rate": 9.852724177071457e-06, + "loss": 0.003, + "step": 52920 + }, + { + "epoch": 0.33947657880981164, + "grad_norm": 0.3219127953052521, + "learning_rate": 9.852589302610016e-06, + "loss": 0.0059, + "step": 52930 + }, + { + "epoch": 0.3395407157035978, + "grad_norm": 0.15994399785995483, + "learning_rate": 9.852454367342163e-06, + "loss": 0.0049, + "step": 52940 + }, + { + "epoch": 0.3396048525973839, + "grad_norm": 0.5959430932998657, + "learning_rate": 9.85231937126959e-06, + "loss": 0.0039, + "step": 52950 + }, + { + "epoch": 0.33966898949116997, + "grad_norm": 0.225297749042511, + "learning_rate": 9.852184314393985e-06, + "loss": 0.0041, + "step": 52960 + }, + { + "epoch": 0.33973312638495606, + "grad_norm": 0.23923683166503906, + "learning_rate": 9.852049196717045e-06, + "loss": 0.0057, + "step": 52970 + }, + { + "epoch": 0.33979726327874216, + "grad_norm": 0.41412973403930664, + "learning_rate": 9.851914018240458e-06, + "loss": 0.0058, + "step": 52980 + }, + { + "epoch": 0.33986140017252825, + "grad_norm": 0.1495286226272583, + "learning_rate": 9.851778778965923e-06, + "loss": 0.0044, + "step": 52990 + }, + { + "epoch": 0.33992553706631434, + "grad_norm": 0.24865113198757172, + "learning_rate": 9.851643478895132e-06, + "loss": 0.0073, + "step": 53000 + }, + { + "epoch": 0.33998967396010044, + "grad_norm": 0.06629683077335358, + "learning_rate": 9.85150811802978e-06, + "loss": 0.0025, + "step": 53010 + }, + { + "epoch": 0.34005381085388653, + "grad_norm": 0.16571450233459473, + "learning_rate": 9.851372696371563e-06, + "loss": 0.0055, + "step": 53020 + }, + { + "epoch": 0.3401179477476726, + "grad_norm": 0.11346513032913208, + "learning_rate": 9.851237213922182e-06, + "loss": 0.0038, + "step": 53030 + }, + { + "epoch": 0.3401820846414587, + "grad_norm": 0.17872054874897003, + "learning_rate": 9.85110167068333e-06, + "loss": 0.0059, + "step": 53040 + }, + { + "epoch": 0.3402462215352448, + "grad_norm": 0.17010082304477692, + "learning_rate": 9.850966066656707e-06, + "loss": 0.0029, + "step": 53050 + }, + { + "epoch": 0.3403103584290309, + "grad_norm": 0.249822735786438, + "learning_rate": 9.850830401844012e-06, + "loss": 0.0037, + "step": 53060 + }, + { + "epoch": 0.340374495322817, + "grad_norm": 0.5932525992393494, + "learning_rate": 9.850694676246945e-06, + "loss": 0.0063, + "step": 53070 + }, + { + "epoch": 0.34043863221660314, + "grad_norm": 0.24662402272224426, + "learning_rate": 9.850558889867207e-06, + "loss": 0.0033, + "step": 53080 + }, + { + "epoch": 0.34050276911038924, + "grad_norm": 0.2894830107688904, + "learning_rate": 9.850423042706501e-06, + "loss": 0.0052, + "step": 53090 + }, + { + "epoch": 0.34056690600417533, + "grad_norm": 0.11228039860725403, + "learning_rate": 9.850287134766527e-06, + "loss": 0.0061, + "step": 53100 + }, + { + "epoch": 0.3406310428979614, + "grad_norm": 0.23333176970481873, + "learning_rate": 9.850151166048988e-06, + "loss": 0.0039, + "step": 53110 + }, + { + "epoch": 0.3406951797917475, + "grad_norm": 0.12623150646686554, + "learning_rate": 9.850015136555589e-06, + "loss": 0.0047, + "step": 53120 + }, + { + "epoch": 0.3407593166855336, + "grad_norm": 0.31074294447898865, + "learning_rate": 9.849879046288033e-06, + "loss": 0.0039, + "step": 53130 + }, + { + "epoch": 0.3408234535793197, + "grad_norm": 0.1798170506954193, + "learning_rate": 9.84974289524803e-06, + "loss": 0.0029, + "step": 53140 + }, + { + "epoch": 0.3408875904731058, + "grad_norm": 0.08633461594581604, + "learning_rate": 9.849606683437278e-06, + "loss": 0.0047, + "step": 53150 + }, + { + "epoch": 0.3409517273668919, + "grad_norm": 0.28256675601005554, + "learning_rate": 9.84947041085749e-06, + "loss": 0.0031, + "step": 53160 + }, + { + "epoch": 0.341015864260678, + "grad_norm": 0.2820548713207245, + "learning_rate": 9.849334077510373e-06, + "loss": 0.0037, + "step": 53170 + }, + { + "epoch": 0.3410800011544641, + "grad_norm": 0.1605130136013031, + "learning_rate": 9.849197683397633e-06, + "loss": 0.004, + "step": 53180 + }, + { + "epoch": 0.34114413804825017, + "grad_norm": 0.1496482938528061, + "learning_rate": 9.84906122852098e-06, + "loss": 0.0058, + "step": 53190 + }, + { + "epoch": 0.34120827494203626, + "grad_norm": 0.14850284159183502, + "learning_rate": 9.848924712882126e-06, + "loss": 0.0047, + "step": 53200 + }, + { + "epoch": 0.34127241183582235, + "grad_norm": 0.3548075258731842, + "learning_rate": 9.848788136482778e-06, + "loss": 0.0032, + "step": 53210 + }, + { + "epoch": 0.3413365487296085, + "grad_norm": 0.456371545791626, + "learning_rate": 9.84865149932465e-06, + "loss": 0.0036, + "step": 53220 + }, + { + "epoch": 0.3414006856233946, + "grad_norm": 0.1298443228006363, + "learning_rate": 9.848514801409454e-06, + "loss": 0.0063, + "step": 53230 + }, + { + "epoch": 0.3414648225171807, + "grad_norm": 0.21353577077388763, + "learning_rate": 9.848378042738903e-06, + "loss": 0.0024, + "step": 53240 + }, + { + "epoch": 0.3415289594109668, + "grad_norm": 0.18850825726985931, + "learning_rate": 9.848241223314708e-06, + "loss": 0.0033, + "step": 53250 + }, + { + "epoch": 0.3415930963047529, + "grad_norm": 0.312389612197876, + "learning_rate": 9.848104343138586e-06, + "loss": 0.0051, + "step": 53260 + }, + { + "epoch": 0.34165723319853897, + "grad_norm": 0.19968365132808685, + "learning_rate": 9.847967402212253e-06, + "loss": 0.0073, + "step": 53270 + }, + { + "epoch": 0.34172137009232506, + "grad_norm": 0.4982510209083557, + "learning_rate": 9.847830400537421e-06, + "loss": 0.0045, + "step": 53280 + }, + { + "epoch": 0.34178550698611115, + "grad_norm": 0.3132604658603668, + "learning_rate": 9.847693338115811e-06, + "loss": 0.0036, + "step": 53290 + }, + { + "epoch": 0.34184964387989725, + "grad_norm": 0.07934847474098206, + "learning_rate": 9.84755621494914e-06, + "loss": 0.0048, + "step": 53300 + }, + { + "epoch": 0.34191378077368334, + "grad_norm": 0.03799527883529663, + "learning_rate": 9.847419031039125e-06, + "loss": 0.0044, + "step": 53310 + }, + { + "epoch": 0.34197791766746943, + "grad_norm": 0.14706788957118988, + "learning_rate": 9.847281786387483e-06, + "loss": 0.0051, + "step": 53320 + }, + { + "epoch": 0.3420420545612555, + "grad_norm": 0.2135099470615387, + "learning_rate": 9.847144480995938e-06, + "loss": 0.0038, + "step": 53330 + }, + { + "epoch": 0.3421061914550416, + "grad_norm": 0.17504918575286865, + "learning_rate": 9.847007114866207e-06, + "loss": 0.0045, + "step": 53340 + }, + { + "epoch": 0.3421703283488277, + "grad_norm": 0.16393157839775085, + "learning_rate": 9.846869688000013e-06, + "loss": 0.0036, + "step": 53350 + }, + { + "epoch": 0.34223446524261386, + "grad_norm": 0.24033233523368835, + "learning_rate": 9.846732200399079e-06, + "loss": 0.0071, + "step": 53360 + }, + { + "epoch": 0.34229860213639995, + "grad_norm": 0.32101425528526306, + "learning_rate": 9.846594652065126e-06, + "loss": 0.0051, + "step": 53370 + }, + { + "epoch": 0.34236273903018605, + "grad_norm": 0.1657356470823288, + "learning_rate": 9.846457042999877e-06, + "loss": 0.0066, + "step": 53380 + }, + { + "epoch": 0.34242687592397214, + "grad_norm": 0.2051573246717453, + "learning_rate": 9.846319373205059e-06, + "loss": 0.0037, + "step": 53390 + }, + { + "epoch": 0.34249101281775823, + "grad_norm": 0.38923710584640503, + "learning_rate": 9.846181642682395e-06, + "loss": 0.0056, + "step": 53400 + }, + { + "epoch": 0.3425551497115443, + "grad_norm": 0.20260879397392273, + "learning_rate": 9.846043851433612e-06, + "loss": 0.0049, + "step": 53410 + }, + { + "epoch": 0.3426192866053304, + "grad_norm": 0.13483817875385284, + "learning_rate": 9.845905999460436e-06, + "loss": 0.0046, + "step": 53420 + }, + { + "epoch": 0.3426834234991165, + "grad_norm": 0.49794188141822815, + "learning_rate": 9.845768086764594e-06, + "loss": 0.0038, + "step": 53430 + }, + { + "epoch": 0.3427475603929026, + "grad_norm": 0.5910177826881409, + "learning_rate": 9.845630113347814e-06, + "loss": 0.007, + "step": 53440 + }, + { + "epoch": 0.3428116972866887, + "grad_norm": 0.14928394556045532, + "learning_rate": 9.845492079211827e-06, + "loss": 0.0044, + "step": 53450 + }, + { + "epoch": 0.3428758341804748, + "grad_norm": 0.18685732781887054, + "learning_rate": 9.84535398435836e-06, + "loss": 0.0031, + "step": 53460 + }, + { + "epoch": 0.3429399710742609, + "grad_norm": 0.20124071836471558, + "learning_rate": 9.845215828789148e-06, + "loss": 0.004, + "step": 53470 + }, + { + "epoch": 0.343004107968047, + "grad_norm": 0.22984161972999573, + "learning_rate": 9.845077612505916e-06, + "loss": 0.0034, + "step": 53480 + }, + { + "epoch": 0.34306824486183307, + "grad_norm": 0.35153457522392273, + "learning_rate": 9.844939335510397e-06, + "loss": 0.0046, + "step": 53490 + }, + { + "epoch": 0.3431323817556192, + "grad_norm": 0.16015851497650146, + "learning_rate": 9.844800997804328e-06, + "loss": 0.0112, + "step": 53500 + }, + { + "epoch": 0.3431965186494053, + "grad_norm": 0.09807637333869934, + "learning_rate": 9.844662599389442e-06, + "loss": 0.0051, + "step": 53510 + }, + { + "epoch": 0.3432606555431914, + "grad_norm": 0.2192123383283615, + "learning_rate": 9.844524140267466e-06, + "loss": 0.0038, + "step": 53520 + }, + { + "epoch": 0.3433247924369775, + "grad_norm": 0.3703880310058594, + "learning_rate": 9.844385620440144e-06, + "loss": 0.0041, + "step": 53530 + }, + { + "epoch": 0.3433889293307636, + "grad_norm": 0.29409804940223694, + "learning_rate": 9.844247039909207e-06, + "loss": 0.0041, + "step": 53540 + }, + { + "epoch": 0.3434530662245497, + "grad_norm": 0.18779346346855164, + "learning_rate": 9.844108398676392e-06, + "loss": 0.0066, + "step": 53550 + }, + { + "epoch": 0.3435172031183358, + "grad_norm": 0.16231729090213776, + "learning_rate": 9.843969696743437e-06, + "loss": 0.008, + "step": 53560 + }, + { + "epoch": 0.34358134001212187, + "grad_norm": 0.16454057395458221, + "learning_rate": 9.843830934112079e-06, + "loss": 0.0076, + "step": 53570 + }, + { + "epoch": 0.34364547690590797, + "grad_norm": 0.24449777603149414, + "learning_rate": 9.84369211078406e-06, + "loss": 0.0051, + "step": 53580 + }, + { + "epoch": 0.34370961379969406, + "grad_norm": 0.14895787835121155, + "learning_rate": 9.843553226761115e-06, + "loss": 0.004, + "step": 53590 + }, + { + "epoch": 0.34377375069348015, + "grad_norm": 0.025796731933951378, + "learning_rate": 9.843414282044988e-06, + "loss": 0.0024, + "step": 53600 + }, + { + "epoch": 0.34383788758726624, + "grad_norm": 0.6771606206893921, + "learning_rate": 9.843275276637416e-06, + "loss": 0.0025, + "step": 53610 + }, + { + "epoch": 0.34390202448105234, + "grad_norm": 0.20201508700847626, + "learning_rate": 9.843136210540145e-06, + "loss": 0.0033, + "step": 53620 + }, + { + "epoch": 0.34396616137483843, + "grad_norm": 0.2704976499080658, + "learning_rate": 9.842997083754915e-06, + "loss": 0.0045, + "step": 53630 + }, + { + "epoch": 0.3440302982686246, + "grad_norm": 0.18581216037273407, + "learning_rate": 9.842857896283472e-06, + "loss": 0.0041, + "step": 53640 + }, + { + "epoch": 0.3440944351624107, + "grad_norm": 0.19817693531513214, + "learning_rate": 9.842718648127557e-06, + "loss": 0.0039, + "step": 53650 + }, + { + "epoch": 0.34415857205619677, + "grad_norm": 0.0884748250246048, + "learning_rate": 9.842579339288917e-06, + "loss": 0.0074, + "step": 53660 + }, + { + "epoch": 0.34422270894998286, + "grad_norm": 0.18690407276153564, + "learning_rate": 9.842439969769298e-06, + "loss": 0.0071, + "step": 53670 + }, + { + "epoch": 0.34428684584376895, + "grad_norm": 0.4473888874053955, + "learning_rate": 9.842300539570442e-06, + "loss": 0.005, + "step": 53680 + }, + { + "epoch": 0.34435098273755504, + "grad_norm": 0.14846082031726837, + "learning_rate": 9.842161048694102e-06, + "loss": 0.006, + "step": 53690 + }, + { + "epoch": 0.34441511963134114, + "grad_norm": 0.20833083987236023, + "learning_rate": 9.842021497142023e-06, + "loss": 0.0042, + "step": 53700 + }, + { + "epoch": 0.34447925652512723, + "grad_norm": 0.18778206408023834, + "learning_rate": 9.841881884915953e-06, + "loss": 0.0036, + "step": 53710 + }, + { + "epoch": 0.3445433934189133, + "grad_norm": 0.4774073660373688, + "learning_rate": 9.841742212017645e-06, + "loss": 0.0031, + "step": 53720 + }, + { + "epoch": 0.3446075303126994, + "grad_norm": 0.6549052596092224, + "learning_rate": 9.841602478448845e-06, + "loss": 0.0035, + "step": 53730 + }, + { + "epoch": 0.3446716672064855, + "grad_norm": 0.1550072878599167, + "learning_rate": 9.841462684211307e-06, + "loss": 0.0039, + "step": 53740 + }, + { + "epoch": 0.3447358041002716, + "grad_norm": 0.2029644101858139, + "learning_rate": 9.841322829306779e-06, + "loss": 0.0036, + "step": 53750 + }, + { + "epoch": 0.3447999409940577, + "grad_norm": 1.0198986530303955, + "learning_rate": 9.841182913737018e-06, + "loss": 0.0072, + "step": 53760 + }, + { + "epoch": 0.3448640778878438, + "grad_norm": 0.46527236700057983, + "learning_rate": 9.841042937503775e-06, + "loss": 0.0052, + "step": 53770 + }, + { + "epoch": 0.34492821478162994, + "grad_norm": 0.30712541937828064, + "learning_rate": 9.840902900608802e-06, + "loss": 0.0049, + "step": 53780 + }, + { + "epoch": 0.34499235167541603, + "grad_norm": 0.09056705236434937, + "learning_rate": 9.840762803053858e-06, + "loss": 0.0149, + "step": 53790 + }, + { + "epoch": 0.3450564885692021, + "grad_norm": 0.2204216718673706, + "learning_rate": 9.840622644840698e-06, + "loss": 0.0062, + "step": 53800 + }, + { + "epoch": 0.3451206254629882, + "grad_norm": 0.11173199862241745, + "learning_rate": 9.840482425971074e-06, + "loss": 0.0031, + "step": 53810 + }, + { + "epoch": 0.3451847623567743, + "grad_norm": 0.23205724358558655, + "learning_rate": 9.840342146446746e-06, + "loss": 0.0074, + "step": 53820 + }, + { + "epoch": 0.3452488992505604, + "grad_norm": 0.12278829514980316, + "learning_rate": 9.840201806269472e-06, + "loss": 0.0031, + "step": 53830 + }, + { + "epoch": 0.3453130361443465, + "grad_norm": 0.2744913697242737, + "learning_rate": 9.84006140544101e-06, + "loss": 0.0038, + "step": 53840 + }, + { + "epoch": 0.3453771730381326, + "grad_norm": 0.30894720554351807, + "learning_rate": 9.839920943963119e-06, + "loss": 0.0063, + "step": 53850 + }, + { + "epoch": 0.3454413099319187, + "grad_norm": 0.06933877617120743, + "learning_rate": 9.839780421837562e-06, + "loss": 0.0049, + "step": 53860 + }, + { + "epoch": 0.3455054468257048, + "grad_norm": 0.1928311139345169, + "learning_rate": 9.839639839066096e-06, + "loss": 0.0038, + "step": 53870 + }, + { + "epoch": 0.34556958371949087, + "grad_norm": 0.12387371063232422, + "learning_rate": 9.839499195650482e-06, + "loss": 0.0032, + "step": 53880 + }, + { + "epoch": 0.34563372061327696, + "grad_norm": 0.12215980142354965, + "learning_rate": 9.839358491592486e-06, + "loss": 0.0049, + "step": 53890 + }, + { + "epoch": 0.34569785750706306, + "grad_norm": 0.22633928060531616, + "learning_rate": 9.83921772689387e-06, + "loss": 0.0042, + "step": 53900 + }, + { + "epoch": 0.34576199440084915, + "grad_norm": 0.22962556779384613, + "learning_rate": 9.839076901556397e-06, + "loss": 0.0088, + "step": 53910 + }, + { + "epoch": 0.3458261312946353, + "grad_norm": 0.12060944736003876, + "learning_rate": 9.83893601558183e-06, + "loss": 0.0038, + "step": 53920 + }, + { + "epoch": 0.3458902681884214, + "grad_norm": 0.027461474761366844, + "learning_rate": 9.838795068971939e-06, + "loss": 0.0042, + "step": 53930 + }, + { + "epoch": 0.3459544050822075, + "grad_norm": 0.17180879414081573, + "learning_rate": 9.838654061728487e-06, + "loss": 0.006, + "step": 53940 + }, + { + "epoch": 0.3460185419759936, + "grad_norm": 0.04157579317688942, + "learning_rate": 9.83851299385324e-06, + "loss": 0.0041, + "step": 53950 + }, + { + "epoch": 0.34608267886977967, + "grad_norm": 0.15837259590625763, + "learning_rate": 9.83837186534797e-06, + "loss": 0.0032, + "step": 53960 + }, + { + "epoch": 0.34614681576356576, + "grad_norm": 0.1311531513929367, + "learning_rate": 9.838230676214439e-06, + "loss": 0.0035, + "step": 53970 + }, + { + "epoch": 0.34621095265735186, + "grad_norm": 0.24102309346199036, + "learning_rate": 9.838089426454422e-06, + "loss": 0.0044, + "step": 53980 + }, + { + "epoch": 0.34627508955113795, + "grad_norm": 0.12493369728326797, + "learning_rate": 9.837948116069686e-06, + "loss": 0.0057, + "step": 53990 + }, + { + "epoch": 0.34633922644492404, + "grad_norm": 0.04451517388224602, + "learning_rate": 9.837806745062004e-06, + "loss": 0.0047, + "step": 54000 + }, + { + "epoch": 0.34640336333871014, + "grad_norm": 0.13316604495048523, + "learning_rate": 9.837665313433144e-06, + "loss": 0.0055, + "step": 54010 + }, + { + "epoch": 0.34646750023249623, + "grad_norm": 0.15386483073234558, + "learning_rate": 9.837523821184883e-06, + "loss": 0.0046, + "step": 54020 + }, + { + "epoch": 0.3465316371262823, + "grad_norm": 0.0709504559636116, + "learning_rate": 9.837382268318988e-06, + "loss": 0.0039, + "step": 54030 + }, + { + "epoch": 0.3465957740200684, + "grad_norm": 0.4904066324234009, + "learning_rate": 9.837240654837237e-06, + "loss": 0.0044, + "step": 54040 + }, + { + "epoch": 0.3466599109138545, + "grad_norm": 0.1809903234243393, + "learning_rate": 9.837098980741405e-06, + "loss": 0.0033, + "step": 54050 + }, + { + "epoch": 0.3467240478076406, + "grad_norm": 0.12470248341560364, + "learning_rate": 9.836957246033262e-06, + "loss": 0.0031, + "step": 54060 + }, + { + "epoch": 0.34678818470142675, + "grad_norm": 0.32245323061943054, + "learning_rate": 9.836815450714591e-06, + "loss": 0.0047, + "step": 54070 + }, + { + "epoch": 0.34685232159521284, + "grad_norm": 0.11289974302053452, + "learning_rate": 9.836673594787165e-06, + "loss": 0.0036, + "step": 54080 + }, + { + "epoch": 0.34691645848899894, + "grad_norm": 0.02664255164563656, + "learning_rate": 9.836531678252762e-06, + "loss": 0.0067, + "step": 54090 + }, + { + "epoch": 0.34698059538278503, + "grad_norm": 0.26872023940086365, + "learning_rate": 9.83638970111316e-06, + "loss": 0.0035, + "step": 54100 + }, + { + "epoch": 0.3470447322765711, + "grad_norm": 0.12747934460639954, + "learning_rate": 9.83624766337014e-06, + "loss": 0.0041, + "step": 54110 + }, + { + "epoch": 0.3471088691703572, + "grad_norm": 0.07346111536026001, + "learning_rate": 9.836105565025478e-06, + "loss": 0.0041, + "step": 54120 + }, + { + "epoch": 0.3471730060641433, + "grad_norm": 0.09227632731199265, + "learning_rate": 9.83596340608096e-06, + "loss": 0.0049, + "step": 54130 + }, + { + "epoch": 0.3472371429579294, + "grad_norm": 0.07450766861438751, + "learning_rate": 9.835821186538362e-06, + "loss": 0.0043, + "step": 54140 + }, + { + "epoch": 0.3473012798517155, + "grad_norm": 0.35511043667793274, + "learning_rate": 9.835678906399468e-06, + "loss": 0.0088, + "step": 54150 + }, + { + "epoch": 0.3473654167455016, + "grad_norm": 0.24837952852249146, + "learning_rate": 9.835536565666062e-06, + "loss": 0.0036, + "step": 54160 + }, + { + "epoch": 0.3474295536392877, + "grad_norm": 0.0934607982635498, + "learning_rate": 9.835394164339927e-06, + "loss": 0.0039, + "step": 54170 + }, + { + "epoch": 0.3474936905330738, + "grad_norm": 0.2472117394208908, + "learning_rate": 9.835251702422847e-06, + "loss": 0.0053, + "step": 54180 + }, + { + "epoch": 0.34755782742685987, + "grad_norm": 0.28953588008880615, + "learning_rate": 9.83510917991661e-06, + "loss": 0.0042, + "step": 54190 + }, + { + "epoch": 0.34762196432064596, + "grad_norm": 0.36132022738456726, + "learning_rate": 9.834966596822996e-06, + "loss": 0.0067, + "step": 54200 + }, + { + "epoch": 0.3476861012144321, + "grad_norm": 0.1938018649816513, + "learning_rate": 9.834823953143798e-06, + "loss": 0.0025, + "step": 54210 + }, + { + "epoch": 0.3477502381082182, + "grad_norm": 0.23491692543029785, + "learning_rate": 9.834681248880798e-06, + "loss": 0.0079, + "step": 54220 + }, + { + "epoch": 0.3478143750020043, + "grad_norm": 0.24288392066955566, + "learning_rate": 9.834538484035788e-06, + "loss": 0.0056, + "step": 54230 + }, + { + "epoch": 0.3478785118957904, + "grad_norm": 0.14312800765037537, + "learning_rate": 9.834395658610554e-06, + "loss": 0.0046, + "step": 54240 + }, + { + "epoch": 0.3479426487895765, + "grad_norm": 0.3459871709346771, + "learning_rate": 9.834252772606888e-06, + "loss": 0.0044, + "step": 54250 + }, + { + "epoch": 0.3480067856833626, + "grad_norm": 0.466331422328949, + "learning_rate": 9.834109826026582e-06, + "loss": 0.0028, + "step": 54260 + }, + { + "epoch": 0.34807092257714867, + "grad_norm": 0.07888225466012955, + "learning_rate": 9.83396681887142e-06, + "loss": 0.0057, + "step": 54270 + }, + { + "epoch": 0.34813505947093476, + "grad_norm": 0.3045555055141449, + "learning_rate": 9.833823751143204e-06, + "loss": 0.0056, + "step": 54280 + }, + { + "epoch": 0.34819919636472085, + "grad_norm": 0.14433254301548004, + "learning_rate": 9.83368062284372e-06, + "loss": 0.0037, + "step": 54290 + }, + { + "epoch": 0.34826333325850695, + "grad_norm": 0.270381897687912, + "learning_rate": 9.833537433974762e-06, + "loss": 0.0065, + "step": 54300 + }, + { + "epoch": 0.34832747015229304, + "grad_norm": 0.028089692816138268, + "learning_rate": 9.833394184538128e-06, + "loss": 0.0052, + "step": 54310 + }, + { + "epoch": 0.34839160704607913, + "grad_norm": 0.04288167878985405, + "learning_rate": 9.833250874535608e-06, + "loss": 0.0029, + "step": 54320 + }, + { + "epoch": 0.3484557439398652, + "grad_norm": 0.18451502919197083, + "learning_rate": 9.833107503969001e-06, + "loss": 0.007, + "step": 54330 + }, + { + "epoch": 0.3485198808336513, + "grad_norm": 0.30367568135261536, + "learning_rate": 9.832964072840104e-06, + "loss": 0.0042, + "step": 54340 + }, + { + "epoch": 0.34858401772743747, + "grad_norm": 0.08944468945264816, + "learning_rate": 9.832820581150713e-06, + "loss": 0.0045, + "step": 54350 + }, + { + "epoch": 0.34864815462122356, + "grad_norm": 0.130207359790802, + "learning_rate": 9.832677028902625e-06, + "loss": 0.0065, + "step": 54360 + }, + { + "epoch": 0.34871229151500965, + "grad_norm": 0.08129074424505234, + "learning_rate": 9.83253341609764e-06, + "loss": 0.0049, + "step": 54370 + }, + { + "epoch": 0.34877642840879575, + "grad_norm": 0.059726521372795105, + "learning_rate": 9.832389742737558e-06, + "loss": 0.0044, + "step": 54380 + }, + { + "epoch": 0.34884056530258184, + "grad_norm": 0.287548303604126, + "learning_rate": 9.832246008824177e-06, + "loss": 0.0035, + "step": 54390 + }, + { + "epoch": 0.34890470219636793, + "grad_norm": 0.280642032623291, + "learning_rate": 9.832102214359303e-06, + "loss": 0.0076, + "step": 54400 + }, + { + "epoch": 0.348968839090154, + "grad_norm": 0.39101871848106384, + "learning_rate": 9.831958359344733e-06, + "loss": 0.0049, + "step": 54410 + }, + { + "epoch": 0.3490329759839401, + "grad_norm": 0.16636443138122559, + "learning_rate": 9.831814443782273e-06, + "loss": 0.003, + "step": 54420 + }, + { + "epoch": 0.3490971128777262, + "grad_norm": 0.026397155597805977, + "learning_rate": 9.831670467673723e-06, + "loss": 0.0045, + "step": 54430 + }, + { + "epoch": 0.3491612497715123, + "grad_norm": 0.11856728792190552, + "learning_rate": 9.831526431020891e-06, + "loss": 0.0045, + "step": 54440 + }, + { + "epoch": 0.3492253866652984, + "grad_norm": 0.2409236580133438, + "learning_rate": 9.831382333825576e-06, + "loss": 0.0036, + "step": 54450 + }, + { + "epoch": 0.3492895235590845, + "grad_norm": 0.06977161765098572, + "learning_rate": 9.83123817608959e-06, + "loss": 0.0042, + "step": 54460 + }, + { + "epoch": 0.3493536604528706, + "grad_norm": 0.3645520806312561, + "learning_rate": 9.831093957814737e-06, + "loss": 0.0037, + "step": 54470 + }, + { + "epoch": 0.3494177973466567, + "grad_norm": 0.38606497645378113, + "learning_rate": 9.830949679002824e-06, + "loss": 0.0047, + "step": 54480 + }, + { + "epoch": 0.3494819342404428, + "grad_norm": 0.11708054691553116, + "learning_rate": 9.830805339655658e-06, + "loss": 0.0053, + "step": 54490 + }, + { + "epoch": 0.3495460711342289, + "grad_norm": 0.11722180992364883, + "learning_rate": 9.83066093977505e-06, + "loss": 0.0057, + "step": 54500 + }, + { + "epoch": 0.349610208028015, + "grad_norm": 0.16923189163208008, + "learning_rate": 9.830516479362807e-06, + "loss": 0.0053, + "step": 54510 + }, + { + "epoch": 0.3496743449218011, + "grad_norm": 0.060270559042692184, + "learning_rate": 9.830371958420738e-06, + "loss": 0.0037, + "step": 54520 + }, + { + "epoch": 0.3497384818155872, + "grad_norm": 0.20828652381896973, + "learning_rate": 9.830227376950661e-06, + "loss": 0.0038, + "step": 54530 + }, + { + "epoch": 0.3498026187093733, + "grad_norm": 0.2548787295818329, + "learning_rate": 9.83008273495438e-06, + "loss": 0.01, + "step": 54540 + }, + { + "epoch": 0.3498667556031594, + "grad_norm": 0.11429157108068466, + "learning_rate": 9.829938032433711e-06, + "loss": 0.0049, + "step": 54550 + }, + { + "epoch": 0.3499308924969455, + "grad_norm": 0.20134198665618896, + "learning_rate": 9.829793269390465e-06, + "loss": 0.0036, + "step": 54560 + }, + { + "epoch": 0.34999502939073157, + "grad_norm": 0.22732007503509521, + "learning_rate": 9.829648445826459e-06, + "loss": 0.0033, + "step": 54570 + }, + { + "epoch": 0.35005916628451766, + "grad_norm": 0.66534024477005, + "learning_rate": 9.829503561743505e-06, + "loss": 0.0031, + "step": 54580 + }, + { + "epoch": 0.35012330317830376, + "grad_norm": 0.2801349461078644, + "learning_rate": 9.829358617143421e-06, + "loss": 0.0031, + "step": 54590 + }, + { + "epoch": 0.35018744007208985, + "grad_norm": 0.06797728687524796, + "learning_rate": 9.829213612028021e-06, + "loss": 0.0066, + "step": 54600 + }, + { + "epoch": 0.35025157696587594, + "grad_norm": 0.31216877698898315, + "learning_rate": 9.829068546399126e-06, + "loss": 0.0038, + "step": 54610 + }, + { + "epoch": 0.35031571385966204, + "grad_norm": 0.24418389797210693, + "learning_rate": 9.828923420258547e-06, + "loss": 0.004, + "step": 54620 + }, + { + "epoch": 0.3503798507534482, + "grad_norm": 0.15022139251232147, + "learning_rate": 9.828778233608109e-06, + "loss": 0.0036, + "step": 54630 + }, + { + "epoch": 0.3504439876472343, + "grad_norm": 0.10417608171701431, + "learning_rate": 9.828632986449627e-06, + "loss": 0.0061, + "step": 54640 + }, + { + "epoch": 0.35050812454102037, + "grad_norm": 0.2197953164577484, + "learning_rate": 9.828487678784923e-06, + "loss": 0.0043, + "step": 54650 + }, + { + "epoch": 0.35057226143480646, + "grad_norm": 0.5072107911109924, + "learning_rate": 9.828342310615818e-06, + "loss": 0.0056, + "step": 54660 + }, + { + "epoch": 0.35063639832859256, + "grad_norm": 0.07112255692481995, + "learning_rate": 9.828196881944133e-06, + "loss": 0.0039, + "step": 54670 + }, + { + "epoch": 0.35070053522237865, + "grad_norm": 0.5350751280784607, + "learning_rate": 9.828051392771691e-06, + "loss": 0.0048, + "step": 54680 + }, + { + "epoch": 0.35076467211616474, + "grad_norm": 0.2553097605705261, + "learning_rate": 9.827905843100314e-06, + "loss": 0.0033, + "step": 54690 + }, + { + "epoch": 0.35082880900995084, + "grad_norm": 0.26859140396118164, + "learning_rate": 9.827760232931826e-06, + "loss": 0.0041, + "step": 54700 + }, + { + "epoch": 0.35089294590373693, + "grad_norm": 0.5209015607833862, + "learning_rate": 9.827614562268052e-06, + "loss": 0.005, + "step": 54710 + }, + { + "epoch": 0.350957082797523, + "grad_norm": 0.1657070517539978, + "learning_rate": 9.827468831110818e-06, + "loss": 0.0052, + "step": 54720 + }, + { + "epoch": 0.3510212196913091, + "grad_norm": 0.24692420661449432, + "learning_rate": 9.827323039461948e-06, + "loss": 0.0044, + "step": 54730 + }, + { + "epoch": 0.3510853565850952, + "grad_norm": 0.28886139392852783, + "learning_rate": 9.827177187323271e-06, + "loss": 0.004, + "step": 54740 + }, + { + "epoch": 0.3511494934788813, + "grad_norm": 1.9326666593551636, + "learning_rate": 9.827031274696614e-06, + "loss": 0.0064, + "step": 54750 + }, + { + "epoch": 0.3512136303726674, + "grad_norm": 0.2602798640727997, + "learning_rate": 9.826885301583805e-06, + "loss": 0.0061, + "step": 54760 + }, + { + "epoch": 0.35127776726645354, + "grad_norm": 0.37088316679000854, + "learning_rate": 9.826739267986673e-06, + "loss": 0.0091, + "step": 54770 + }, + { + "epoch": 0.35134190416023964, + "grad_norm": 0.29487156867980957, + "learning_rate": 9.826593173907051e-06, + "loss": 0.0054, + "step": 54780 + }, + { + "epoch": 0.35140604105402573, + "grad_norm": 0.24256150424480438, + "learning_rate": 9.826447019346764e-06, + "loss": 0.0028, + "step": 54790 + }, + { + "epoch": 0.3514701779478118, + "grad_norm": 0.24807745218276978, + "learning_rate": 9.826300804307648e-06, + "loss": 0.0047, + "step": 54800 + }, + { + "epoch": 0.3515343148415979, + "grad_norm": 0.1723044365644455, + "learning_rate": 9.826154528791534e-06, + "loss": 0.004, + "step": 54810 + }, + { + "epoch": 0.351598451735384, + "grad_norm": 0.018190717324614525, + "learning_rate": 9.826008192800253e-06, + "loss": 0.0036, + "step": 54820 + }, + { + "epoch": 0.3516625886291701, + "grad_norm": 0.1684645712375641, + "learning_rate": 9.82586179633564e-06, + "loss": 0.0038, + "step": 54830 + }, + { + "epoch": 0.3517267255229562, + "grad_norm": 0.19667181372642517, + "learning_rate": 9.825715339399532e-06, + "loss": 0.0057, + "step": 54840 + }, + { + "epoch": 0.3517908624167423, + "grad_norm": 0.2179258018732071, + "learning_rate": 9.825568821993761e-06, + "loss": 0.0061, + "step": 54850 + }, + { + "epoch": 0.3518549993105284, + "grad_norm": 0.18799977004528046, + "learning_rate": 9.825422244120162e-06, + "loss": 0.0028, + "step": 54860 + }, + { + "epoch": 0.3519191362043145, + "grad_norm": 0.3359462320804596, + "learning_rate": 9.825275605780575e-06, + "loss": 0.0049, + "step": 54870 + }, + { + "epoch": 0.35198327309810057, + "grad_norm": 0.1551109403371811, + "learning_rate": 9.825128906976837e-06, + "loss": 0.0071, + "step": 54880 + }, + { + "epoch": 0.35204740999188666, + "grad_norm": 0.03857658803462982, + "learning_rate": 9.824982147710785e-06, + "loss": 0.0027, + "step": 54890 + }, + { + "epoch": 0.35211154688567275, + "grad_norm": 0.0902140811085701, + "learning_rate": 9.824835327984256e-06, + "loss": 0.004, + "step": 54900 + }, + { + "epoch": 0.3521756837794589, + "grad_norm": 0.28138551115989685, + "learning_rate": 9.824688447799095e-06, + "loss": 0.0039, + "step": 54910 + }, + { + "epoch": 0.352239820673245, + "grad_norm": 0.07199631631374359, + "learning_rate": 9.824541507157138e-06, + "loss": 0.0048, + "step": 54920 + }, + { + "epoch": 0.3523039575670311, + "grad_norm": 0.3760847747325897, + "learning_rate": 9.824394506060228e-06, + "loss": 0.0068, + "step": 54930 + }, + { + "epoch": 0.3523680944608172, + "grad_norm": 0.2786344885826111, + "learning_rate": 9.824247444510209e-06, + "loss": 0.0043, + "step": 54940 + }, + { + "epoch": 0.3524322313546033, + "grad_norm": 0.21483300626277924, + "learning_rate": 9.824100322508918e-06, + "loss": 0.0052, + "step": 54950 + }, + { + "epoch": 0.35249636824838937, + "grad_norm": 0.24060052633285522, + "learning_rate": 9.823953140058205e-06, + "loss": 0.008, + "step": 54960 + }, + { + "epoch": 0.35256050514217546, + "grad_norm": 0.288587749004364, + "learning_rate": 9.823805897159911e-06, + "loss": 0.006, + "step": 54970 + }, + { + "epoch": 0.35262464203596156, + "grad_norm": 0.22966255247592926, + "learning_rate": 9.82365859381588e-06, + "loss": 0.0051, + "step": 54980 + }, + { + "epoch": 0.35268877892974765, + "grad_norm": 0.16944147646427155, + "learning_rate": 9.823511230027962e-06, + "loss": 0.0178, + "step": 54990 + }, + { + "epoch": 0.35275291582353374, + "grad_norm": 0.1313783973455429, + "learning_rate": 9.823363805798e-06, + "loss": 0.0049, + "step": 55000 + }, + { + "epoch": 0.35281705271731983, + "grad_norm": 0.07974176853895187, + "learning_rate": 9.823216321127842e-06, + "loss": 0.0029, + "step": 55010 + }, + { + "epoch": 0.3528811896111059, + "grad_norm": 0.21326179802417755, + "learning_rate": 9.823068776019336e-06, + "loss": 0.0064, + "step": 55020 + }, + { + "epoch": 0.352945326504892, + "grad_norm": 0.21994377672672272, + "learning_rate": 9.822921170474332e-06, + "loss": 0.0033, + "step": 55030 + }, + { + "epoch": 0.3530094633986781, + "grad_norm": 0.0296862181276083, + "learning_rate": 9.822773504494678e-06, + "loss": 0.0031, + "step": 55040 + }, + { + "epoch": 0.35307360029246426, + "grad_norm": 0.21492598950862885, + "learning_rate": 9.822625778082226e-06, + "loss": 0.0042, + "step": 55050 + }, + { + "epoch": 0.35313773718625036, + "grad_norm": 0.36219337582588196, + "learning_rate": 9.822477991238826e-06, + "loss": 0.0055, + "step": 55060 + }, + { + "epoch": 0.35320187408003645, + "grad_norm": 0.40082961320877075, + "learning_rate": 9.82233014396633e-06, + "loss": 0.0033, + "step": 55070 + }, + { + "epoch": 0.35326601097382254, + "grad_norm": 0.36822453141212463, + "learning_rate": 9.822182236266591e-06, + "loss": 0.0047, + "step": 55080 + }, + { + "epoch": 0.35333014786760863, + "grad_norm": 0.22897125780582428, + "learning_rate": 9.822034268141462e-06, + "loss": 0.0035, + "step": 55090 + }, + { + "epoch": 0.35339428476139473, + "grad_norm": 0.09583339095115662, + "learning_rate": 9.821886239592797e-06, + "loss": 0.0041, + "step": 55100 + }, + { + "epoch": 0.3534584216551808, + "grad_norm": 0.057896021753549576, + "learning_rate": 9.821738150622453e-06, + "loss": 0.0043, + "step": 55110 + }, + { + "epoch": 0.3535225585489669, + "grad_norm": 0.26879703998565674, + "learning_rate": 9.821590001232282e-06, + "loss": 0.0067, + "step": 55120 + }, + { + "epoch": 0.353586695442753, + "grad_norm": 0.48501795530319214, + "learning_rate": 9.821441791424143e-06, + "loss": 0.0039, + "step": 55130 + }, + { + "epoch": 0.3536508323365391, + "grad_norm": 0.23591944575309753, + "learning_rate": 9.821293521199892e-06, + "loss": 0.004, + "step": 55140 + }, + { + "epoch": 0.3537149692303252, + "grad_norm": 0.2928210198879242, + "learning_rate": 9.82114519056139e-06, + "loss": 0.0051, + "step": 55150 + }, + { + "epoch": 0.3537791061241113, + "grad_norm": 0.0741635262966156, + "learning_rate": 9.820996799510491e-06, + "loss": 0.0044, + "step": 55160 + }, + { + "epoch": 0.3538432430178974, + "grad_norm": 0.16287928819656372, + "learning_rate": 9.820848348049057e-06, + "loss": 0.0077, + "step": 55170 + }, + { + "epoch": 0.3539073799116835, + "grad_norm": 0.1516534388065338, + "learning_rate": 9.820699836178946e-06, + "loss": 0.0052, + "step": 55180 + }, + { + "epoch": 0.3539715168054696, + "grad_norm": 0.315433144569397, + "learning_rate": 9.820551263902024e-06, + "loss": 0.0044, + "step": 55190 + }, + { + "epoch": 0.3540356536992557, + "grad_norm": 0.09990456700325012, + "learning_rate": 9.820402631220147e-06, + "loss": 0.0031, + "step": 55200 + }, + { + "epoch": 0.3540997905930418, + "grad_norm": 0.17717120051383972, + "learning_rate": 9.82025393813518e-06, + "loss": 0.0044, + "step": 55210 + }, + { + "epoch": 0.3541639274868279, + "grad_norm": 0.12074436247348785, + "learning_rate": 9.820105184648988e-06, + "loss": 0.004, + "step": 55220 + }, + { + "epoch": 0.354228064380614, + "grad_norm": 0.12373752892017365, + "learning_rate": 9.819956370763432e-06, + "loss": 0.0039, + "step": 55230 + }, + { + "epoch": 0.3542922012744001, + "grad_norm": 0.17480479180812836, + "learning_rate": 9.819807496480377e-06, + "loss": 0.0063, + "step": 55240 + }, + { + "epoch": 0.3543563381681862, + "grad_norm": 0.12161833792924881, + "learning_rate": 9.81965856180169e-06, + "loss": 0.0026, + "step": 55250 + }, + { + "epoch": 0.3544204750619723, + "grad_norm": 0.19953209161758423, + "learning_rate": 9.819509566729238e-06, + "loss": 0.0032, + "step": 55260 + }, + { + "epoch": 0.35448461195575837, + "grad_norm": 0.259747177362442, + "learning_rate": 9.819360511264886e-06, + "loss": 0.0035, + "step": 55270 + }, + { + "epoch": 0.35454874884954446, + "grad_norm": 0.11968179792165756, + "learning_rate": 9.819211395410502e-06, + "loss": 0.0069, + "step": 55280 + }, + { + "epoch": 0.35461288574333055, + "grad_norm": 0.175838440656662, + "learning_rate": 9.819062219167956e-06, + "loss": 0.0057, + "step": 55290 + }, + { + "epoch": 0.35467702263711665, + "grad_norm": 0.10187926888465881, + "learning_rate": 9.818912982539114e-06, + "loss": 0.0047, + "step": 55300 + }, + { + "epoch": 0.35474115953090274, + "grad_norm": 0.2792898416519165, + "learning_rate": 9.81876368552585e-06, + "loss": 0.0027, + "step": 55310 + }, + { + "epoch": 0.35480529642468883, + "grad_norm": 0.11766412854194641, + "learning_rate": 9.818614328130035e-06, + "loss": 0.0044, + "step": 55320 + }, + { + "epoch": 0.354869433318475, + "grad_norm": 0.2481955587863922, + "learning_rate": 9.818464910353536e-06, + "loss": 0.0048, + "step": 55330 + }, + { + "epoch": 0.3549335702122611, + "grad_norm": 0.1017189770936966, + "learning_rate": 9.818315432198226e-06, + "loss": 0.0039, + "step": 55340 + }, + { + "epoch": 0.35499770710604717, + "grad_norm": 0.18881593644618988, + "learning_rate": 9.818165893665985e-06, + "loss": 0.0029, + "step": 55350 + }, + { + "epoch": 0.35506184399983326, + "grad_norm": 0.30533653497695923, + "learning_rate": 9.818016294758679e-06, + "loss": 0.0051, + "step": 55360 + }, + { + "epoch": 0.35512598089361935, + "grad_norm": 0.14908923208713531, + "learning_rate": 9.817866635478185e-06, + "loss": 0.0047, + "step": 55370 + }, + { + "epoch": 0.35519011778740545, + "grad_norm": 0.29038989543914795, + "learning_rate": 9.81771691582638e-06, + "loss": 0.0044, + "step": 55380 + }, + { + "epoch": 0.35525425468119154, + "grad_norm": 0.10095025599002838, + "learning_rate": 9.817567135805138e-06, + "loss": 0.0023, + "step": 55390 + }, + { + "epoch": 0.35531839157497763, + "grad_norm": 0.11912602931261063, + "learning_rate": 9.817417295416337e-06, + "loss": 0.0035, + "step": 55400 + }, + { + "epoch": 0.3553825284687637, + "grad_norm": 0.46975573897361755, + "learning_rate": 9.817267394661854e-06, + "loss": 0.0031, + "step": 55410 + }, + { + "epoch": 0.3554466653625498, + "grad_norm": 0.4472891688346863, + "learning_rate": 9.81711743354357e-06, + "loss": 0.0054, + "step": 55420 + }, + { + "epoch": 0.3555108022563359, + "grad_norm": 0.15583384037017822, + "learning_rate": 9.816967412063359e-06, + "loss": 0.0064, + "step": 55430 + }, + { + "epoch": 0.355574939150122, + "grad_norm": 0.1715603917837143, + "learning_rate": 9.816817330223105e-06, + "loss": 0.0059, + "step": 55440 + }, + { + "epoch": 0.3556390760439081, + "grad_norm": 0.2636367976665497, + "learning_rate": 9.816667188024687e-06, + "loss": 0.0039, + "step": 55450 + }, + { + "epoch": 0.3557032129376942, + "grad_norm": 0.03399881720542908, + "learning_rate": 9.816516985469986e-06, + "loss": 0.0027, + "step": 55460 + }, + { + "epoch": 0.3557673498314803, + "grad_norm": 0.3632083833217621, + "learning_rate": 9.816366722560887e-06, + "loss": 0.0072, + "step": 55470 + }, + { + "epoch": 0.35583148672526643, + "grad_norm": 0.13824380934238434, + "learning_rate": 9.81621639929927e-06, + "loss": 0.0049, + "step": 55480 + }, + { + "epoch": 0.3558956236190525, + "grad_norm": 0.4382361173629761, + "learning_rate": 9.816066015687017e-06, + "loss": 0.0048, + "step": 55490 + }, + { + "epoch": 0.3559597605128386, + "grad_norm": 0.13956326246261597, + "learning_rate": 9.815915571726018e-06, + "loss": 0.0035, + "step": 55500 + }, + { + "epoch": 0.3560238974066247, + "grad_norm": 0.14240862429141998, + "learning_rate": 9.815765067418152e-06, + "loss": 0.0037, + "step": 55510 + }, + { + "epoch": 0.3560880343004108, + "grad_norm": 0.20180271565914154, + "learning_rate": 9.815614502765311e-06, + "loss": 0.011, + "step": 55520 + }, + { + "epoch": 0.3561521711941969, + "grad_norm": 0.13986170291900635, + "learning_rate": 9.815463877769377e-06, + "loss": 0.0083, + "step": 55530 + }, + { + "epoch": 0.356216308087983, + "grad_norm": 0.4541982114315033, + "learning_rate": 9.81531319243224e-06, + "loss": 0.0048, + "step": 55540 + }, + { + "epoch": 0.3562804449817691, + "grad_norm": 0.44397735595703125, + "learning_rate": 9.815162446755786e-06, + "loss": 0.0062, + "step": 55550 + }, + { + "epoch": 0.3563445818755552, + "grad_norm": 0.20005185902118683, + "learning_rate": 9.815011640741905e-06, + "loss": 0.0066, + "step": 55560 + }, + { + "epoch": 0.35640871876934127, + "grad_norm": 0.4921051561832428, + "learning_rate": 9.814860774392488e-06, + "loss": 0.005, + "step": 55570 + }, + { + "epoch": 0.35647285566312736, + "grad_norm": 0.10769011080265045, + "learning_rate": 9.814709847709424e-06, + "loss": 0.0049, + "step": 55580 + }, + { + "epoch": 0.35653699255691346, + "grad_norm": 0.4561218023300171, + "learning_rate": 9.814558860694604e-06, + "loss": 0.0072, + "step": 55590 + }, + { + "epoch": 0.35660112945069955, + "grad_norm": 0.0678478330373764, + "learning_rate": 9.814407813349921e-06, + "loss": 0.0048, + "step": 55600 + }, + { + "epoch": 0.35666526634448564, + "grad_norm": 0.19913871586322784, + "learning_rate": 9.814256705677268e-06, + "loss": 0.005, + "step": 55610 + }, + { + "epoch": 0.3567294032382718, + "grad_norm": 0.2567561864852905, + "learning_rate": 9.814105537678536e-06, + "loss": 0.0039, + "step": 55620 + }, + { + "epoch": 0.3567935401320579, + "grad_norm": 0.13420292735099792, + "learning_rate": 9.813954309355621e-06, + "loss": 0.0075, + "step": 55630 + }, + { + "epoch": 0.356857677025844, + "grad_norm": 0.19008968770503998, + "learning_rate": 9.813803020710422e-06, + "loss": 0.0055, + "step": 55640 + }, + { + "epoch": 0.35692181391963007, + "grad_norm": 0.12023936957120895, + "learning_rate": 9.813651671744827e-06, + "loss": 0.0038, + "step": 55650 + }, + { + "epoch": 0.35698595081341616, + "grad_norm": 0.3929827809333801, + "learning_rate": 9.813500262460738e-06, + "loss": 0.0038, + "step": 55660 + }, + { + "epoch": 0.35705008770720226, + "grad_norm": 0.14300161600112915, + "learning_rate": 9.81334879286005e-06, + "loss": 0.0038, + "step": 55670 + }, + { + "epoch": 0.35711422460098835, + "grad_norm": 0.11527574062347412, + "learning_rate": 9.813197262944661e-06, + "loss": 0.0035, + "step": 55680 + }, + { + "epoch": 0.35717836149477444, + "grad_norm": 0.22679197788238525, + "learning_rate": 9.81304567271647e-06, + "loss": 0.0037, + "step": 55690 + }, + { + "epoch": 0.35724249838856054, + "grad_norm": 0.12605354189872742, + "learning_rate": 9.812894022177378e-06, + "loss": 0.0031, + "step": 55700 + }, + { + "epoch": 0.35730663528234663, + "grad_norm": 0.18351490795612335, + "learning_rate": 9.812742311329284e-06, + "loss": 0.002, + "step": 55710 + }, + { + "epoch": 0.3573707721761327, + "grad_norm": 0.1666477471590042, + "learning_rate": 9.812590540174089e-06, + "loss": 0.0036, + "step": 55720 + }, + { + "epoch": 0.3574349090699188, + "grad_norm": 0.2192985564470291, + "learning_rate": 9.812438708713695e-06, + "loss": 0.0042, + "step": 55730 + }, + { + "epoch": 0.3574990459637049, + "grad_norm": 0.3767503798007965, + "learning_rate": 9.812286816950006e-06, + "loss": 0.0048, + "step": 55740 + }, + { + "epoch": 0.357563182857491, + "grad_norm": 0.09017223119735718, + "learning_rate": 9.81213486488492e-06, + "loss": 0.0048, + "step": 55750 + }, + { + "epoch": 0.35762731975127715, + "grad_norm": 0.336397647857666, + "learning_rate": 9.81198285252035e-06, + "loss": 0.0044, + "step": 55760 + }, + { + "epoch": 0.35769145664506324, + "grad_norm": 0.1848268359899521, + "learning_rate": 9.811830779858193e-06, + "loss": 0.0033, + "step": 55770 + }, + { + "epoch": 0.35775559353884934, + "grad_norm": 0.2532404661178589, + "learning_rate": 9.811678646900357e-06, + "loss": 0.0048, + "step": 55780 + }, + { + "epoch": 0.35781973043263543, + "grad_norm": 0.13859908282756805, + "learning_rate": 9.81152645364875e-06, + "loss": 0.0027, + "step": 55790 + }, + { + "epoch": 0.3578838673264215, + "grad_norm": 0.0673801600933075, + "learning_rate": 9.811374200105277e-06, + "loss": 0.0042, + "step": 55800 + }, + { + "epoch": 0.3579480042202076, + "grad_norm": 0.28130680322647095, + "learning_rate": 9.811221886271846e-06, + "loss": 0.0042, + "step": 55810 + }, + { + "epoch": 0.3580121411139937, + "grad_norm": 0.11280748248100281, + "learning_rate": 9.811069512150367e-06, + "loss": 0.0031, + "step": 55820 + }, + { + "epoch": 0.3580762780077798, + "grad_norm": 0.46585577726364136, + "learning_rate": 9.810917077742748e-06, + "loss": 0.0082, + "step": 55830 + }, + { + "epoch": 0.3581404149015659, + "grad_norm": 0.08036241680383682, + "learning_rate": 9.810764583050902e-06, + "loss": 0.0044, + "step": 55840 + }, + { + "epoch": 0.358204551795352, + "grad_norm": 0.08849810808897018, + "learning_rate": 9.810612028076737e-06, + "loss": 0.0037, + "step": 55850 + }, + { + "epoch": 0.3582686886891381, + "grad_norm": 0.1307210773229599, + "learning_rate": 9.810459412822162e-06, + "loss": 0.0031, + "step": 55860 + }, + { + "epoch": 0.3583328255829242, + "grad_norm": 0.2808101773262024, + "learning_rate": 9.810306737289095e-06, + "loss": 0.0048, + "step": 55870 + }, + { + "epoch": 0.35839696247671027, + "grad_norm": 0.09498907625675201, + "learning_rate": 9.810154001479447e-06, + "loss": 0.0027, + "step": 55880 + }, + { + "epoch": 0.35846109937049636, + "grad_norm": 0.13824990391731262, + "learning_rate": 9.810001205395129e-06, + "loss": 0.0028, + "step": 55890 + }, + { + "epoch": 0.3585252362642825, + "grad_norm": 0.20933681726455688, + "learning_rate": 9.809848349038063e-06, + "loss": 0.0046, + "step": 55900 + }, + { + "epoch": 0.3585893731580686, + "grad_norm": 0.06892430037260056, + "learning_rate": 9.809695432410155e-06, + "loss": 0.004, + "step": 55910 + }, + { + "epoch": 0.3586535100518547, + "grad_norm": 0.1586724817752838, + "learning_rate": 9.809542455513328e-06, + "loss": 0.0074, + "step": 55920 + }, + { + "epoch": 0.3587176469456408, + "grad_norm": 0.25260305404663086, + "learning_rate": 9.809389418349496e-06, + "loss": 0.0037, + "step": 55930 + }, + { + "epoch": 0.3587817838394269, + "grad_norm": 0.37511733174324036, + "learning_rate": 9.809236320920578e-06, + "loss": 0.0034, + "step": 55940 + }, + { + "epoch": 0.358845920733213, + "grad_norm": 0.18205967545509338, + "learning_rate": 9.809083163228493e-06, + "loss": 0.0075, + "step": 55950 + }, + { + "epoch": 0.35891005762699907, + "grad_norm": 0.12275759875774384, + "learning_rate": 9.808929945275157e-06, + "loss": 0.0024, + "step": 55960 + }, + { + "epoch": 0.35897419452078516, + "grad_norm": 0.27325794100761414, + "learning_rate": 9.808776667062493e-06, + "loss": 0.0023, + "step": 55970 + }, + { + "epoch": 0.35903833141457125, + "grad_norm": 0.09226607531309128, + "learning_rate": 9.80862332859242e-06, + "loss": 0.0026, + "step": 55980 + }, + { + "epoch": 0.35910246830835735, + "grad_norm": 0.19567179679870605, + "learning_rate": 9.808469929866858e-06, + "loss": 0.0073, + "step": 55990 + }, + { + "epoch": 0.35916660520214344, + "grad_norm": 0.19236637651920319, + "learning_rate": 9.808316470887732e-06, + "loss": 0.0066, + "step": 56000 + }, + { + "epoch": 0.35923074209592953, + "grad_norm": 0.28044867515563965, + "learning_rate": 9.808162951656968e-06, + "loss": 0.0041, + "step": 56010 + }, + { + "epoch": 0.3592948789897156, + "grad_norm": 0.13495536148548126, + "learning_rate": 9.808009372176483e-06, + "loss": 0.0091, + "step": 56020 + }, + { + "epoch": 0.3593590158835017, + "grad_norm": 0.14301171898841858, + "learning_rate": 9.807855732448204e-06, + "loss": 0.0041, + "step": 56030 + }, + { + "epoch": 0.35942315277728787, + "grad_norm": 0.17056189477443695, + "learning_rate": 9.807702032474057e-06, + "loss": 0.0071, + "step": 56040 + }, + { + "epoch": 0.35948728967107396, + "grad_norm": 0.19335652887821198, + "learning_rate": 9.807548272255968e-06, + "loss": 0.0056, + "step": 56050 + }, + { + "epoch": 0.35955142656486005, + "grad_norm": 0.19761638343334198, + "learning_rate": 9.807394451795863e-06, + "loss": 0.0045, + "step": 56060 + }, + { + "epoch": 0.35961556345864615, + "grad_norm": 0.22033339738845825, + "learning_rate": 9.807240571095669e-06, + "loss": 0.0029, + "step": 56070 + }, + { + "epoch": 0.35967970035243224, + "grad_norm": 0.3078736960887909, + "learning_rate": 9.807086630157317e-06, + "loss": 0.0068, + "step": 56080 + }, + { + "epoch": 0.35974383724621833, + "grad_norm": 0.2283318042755127, + "learning_rate": 9.806932628982731e-06, + "loss": 0.0034, + "step": 56090 + }, + { + "epoch": 0.3598079741400044, + "grad_norm": 0.25131726264953613, + "learning_rate": 9.806778567573846e-06, + "loss": 0.0036, + "step": 56100 + }, + { + "epoch": 0.3598721110337905, + "grad_norm": 0.11536833643913269, + "learning_rate": 9.806624445932588e-06, + "loss": 0.0037, + "step": 56110 + }, + { + "epoch": 0.3599362479275766, + "grad_norm": 0.23182439804077148, + "learning_rate": 9.806470264060893e-06, + "loss": 0.0048, + "step": 56120 + }, + { + "epoch": 0.3600003848213627, + "grad_norm": 0.2506871521472931, + "learning_rate": 9.80631602196069e-06, + "loss": 0.0042, + "step": 56130 + }, + { + "epoch": 0.3600645217151488, + "grad_norm": 0.19607239961624146, + "learning_rate": 9.806161719633911e-06, + "loss": 0.0045, + "step": 56140 + }, + { + "epoch": 0.3601286586089349, + "grad_norm": 0.22224532067775726, + "learning_rate": 9.80600735708249e-06, + "loss": 0.0088, + "step": 56150 + }, + { + "epoch": 0.360192795502721, + "grad_norm": 0.20805025100708008, + "learning_rate": 9.805852934308363e-06, + "loss": 0.0051, + "step": 56160 + }, + { + "epoch": 0.3602569323965071, + "grad_norm": 0.08118850737810135, + "learning_rate": 9.805698451313465e-06, + "loss": 0.0044, + "step": 56170 + }, + { + "epoch": 0.3603210692902932, + "grad_norm": 0.07110437750816345, + "learning_rate": 9.80554390809973e-06, + "loss": 0.005, + "step": 56180 + }, + { + "epoch": 0.3603852061840793, + "grad_norm": 0.173774853348732, + "learning_rate": 9.805389304669097e-06, + "loss": 0.0041, + "step": 56190 + }, + { + "epoch": 0.3604493430778654, + "grad_norm": 0.15812437236309052, + "learning_rate": 9.8052346410235e-06, + "loss": 0.0053, + "step": 56200 + }, + { + "epoch": 0.3605134799716515, + "grad_norm": 0.3079059422016144, + "learning_rate": 9.80507991716488e-06, + "loss": 0.0047, + "step": 56210 + }, + { + "epoch": 0.3605776168654376, + "grad_norm": 0.27946552634239197, + "learning_rate": 9.804925133095173e-06, + "loss": 0.005, + "step": 56220 + }, + { + "epoch": 0.3606417537592237, + "grad_norm": 0.19736504554748535, + "learning_rate": 9.804770288816318e-06, + "loss": 0.0062, + "step": 56230 + }, + { + "epoch": 0.3607058906530098, + "grad_norm": 0.19446900486946106, + "learning_rate": 9.804615384330262e-06, + "loss": 0.0053, + "step": 56240 + }, + { + "epoch": 0.3607700275467959, + "grad_norm": 0.07679780572652817, + "learning_rate": 9.80446041963894e-06, + "loss": 0.0057, + "step": 56250 + }, + { + "epoch": 0.360834164440582, + "grad_norm": 0.056387584656476974, + "learning_rate": 9.804305394744293e-06, + "loss": 0.0032, + "step": 56260 + }, + { + "epoch": 0.36089830133436807, + "grad_norm": 0.126143217086792, + "learning_rate": 9.804150309648267e-06, + "loss": 0.0029, + "step": 56270 + }, + { + "epoch": 0.36096243822815416, + "grad_norm": 0.16625581681728363, + "learning_rate": 9.803995164352804e-06, + "loss": 0.0044, + "step": 56280 + }, + { + "epoch": 0.36102657512194025, + "grad_norm": 0.3765086233615875, + "learning_rate": 9.803839958859849e-06, + "loss": 0.004, + "step": 56290 + }, + { + "epoch": 0.36109071201572635, + "grad_norm": 0.21009819209575653, + "learning_rate": 9.803684693171347e-06, + "loss": 0.0047, + "step": 56300 + }, + { + "epoch": 0.36115484890951244, + "grad_norm": 0.06138169765472412, + "learning_rate": 9.80352936728924e-06, + "loss": 0.0059, + "step": 56310 + }, + { + "epoch": 0.3612189858032986, + "grad_norm": 0.2305053174495697, + "learning_rate": 9.803373981215478e-06, + "loss": 0.0052, + "step": 56320 + }, + { + "epoch": 0.3612831226970847, + "grad_norm": 0.25863513350486755, + "learning_rate": 9.803218534952008e-06, + "loss": 0.004, + "step": 56330 + }, + { + "epoch": 0.3613472595908708, + "grad_norm": 0.41510552167892456, + "learning_rate": 9.803063028500776e-06, + "loss": 0.0034, + "step": 56340 + }, + { + "epoch": 0.36141139648465687, + "grad_norm": 0.18759652972221375, + "learning_rate": 9.802907461863734e-06, + "loss": 0.0032, + "step": 56350 + }, + { + "epoch": 0.36147553337844296, + "grad_norm": 0.3013518452644348, + "learning_rate": 9.802751835042825e-06, + "loss": 0.0031, + "step": 56360 + }, + { + "epoch": 0.36153967027222905, + "grad_norm": 0.26576077938079834, + "learning_rate": 9.802596148040006e-06, + "loss": 0.0103, + "step": 56370 + }, + { + "epoch": 0.36160380716601515, + "grad_norm": 0.2088194191455841, + "learning_rate": 9.802440400857223e-06, + "loss": 0.0025, + "step": 56380 + }, + { + "epoch": 0.36166794405980124, + "grad_norm": 0.06073303148150444, + "learning_rate": 9.80228459349643e-06, + "loss": 0.0039, + "step": 56390 + }, + { + "epoch": 0.36173208095358733, + "grad_norm": 0.04010922834277153, + "learning_rate": 9.80212872595958e-06, + "loss": 0.004, + "step": 56400 + }, + { + "epoch": 0.3617962178473734, + "grad_norm": 0.19746612012386322, + "learning_rate": 9.801972798248624e-06, + "loss": 0.0037, + "step": 56410 + }, + { + "epoch": 0.3618603547411595, + "grad_norm": 0.19448116421699524, + "learning_rate": 9.801816810365518e-06, + "loss": 0.0035, + "step": 56420 + }, + { + "epoch": 0.3619244916349456, + "grad_norm": 0.17412406206130981, + "learning_rate": 9.801660762312216e-06, + "loss": 0.0033, + "step": 56430 + }, + { + "epoch": 0.3619886285287317, + "grad_norm": 0.27388522028923035, + "learning_rate": 9.80150465409067e-06, + "loss": 0.0058, + "step": 56440 + }, + { + "epoch": 0.3620527654225178, + "grad_norm": 0.23078088462352753, + "learning_rate": 9.801348485702843e-06, + "loss": 0.004, + "step": 56450 + }, + { + "epoch": 0.36211690231630395, + "grad_norm": 0.19716985523700714, + "learning_rate": 9.801192257150685e-06, + "loss": 0.0033, + "step": 56460 + }, + { + "epoch": 0.36218103921009004, + "grad_norm": 0.08864498883485794, + "learning_rate": 9.801035968436157e-06, + "loss": 0.0038, + "step": 56470 + }, + { + "epoch": 0.36224517610387613, + "grad_norm": 0.19277538359165192, + "learning_rate": 9.800879619561216e-06, + "loss": 0.0042, + "step": 56480 + }, + { + "epoch": 0.3623093129976622, + "grad_norm": 0.36649471521377563, + "learning_rate": 9.800723210527825e-06, + "loss": 0.0045, + "step": 56490 + }, + { + "epoch": 0.3623734498914483, + "grad_norm": 0.1863778978586197, + "learning_rate": 9.800566741337941e-06, + "loss": 0.0148, + "step": 56500 + }, + { + "epoch": 0.3624375867852344, + "grad_norm": 0.09909823536872864, + "learning_rate": 9.800410211993523e-06, + "loss": 0.0045, + "step": 56510 + }, + { + "epoch": 0.3625017236790205, + "grad_norm": 0.14066021144390106, + "learning_rate": 9.800253622496534e-06, + "loss": 0.0032, + "step": 56520 + }, + { + "epoch": 0.3625658605728066, + "grad_norm": 0.2353345900774002, + "learning_rate": 9.800096972848938e-06, + "loss": 0.005, + "step": 56530 + }, + { + "epoch": 0.3626299974665927, + "grad_norm": 0.30205559730529785, + "learning_rate": 9.799940263052696e-06, + "loss": 0.0042, + "step": 56540 + }, + { + "epoch": 0.3626941343603788, + "grad_norm": 0.18081988394260406, + "learning_rate": 9.799783493109772e-06, + "loss": 0.0043, + "step": 56550 + }, + { + "epoch": 0.3627582712541649, + "grad_norm": 0.3089219331741333, + "learning_rate": 9.799626663022129e-06, + "loss": 0.005, + "step": 56560 + }, + { + "epoch": 0.36282240814795097, + "grad_norm": 0.21994343400001526, + "learning_rate": 9.799469772791736e-06, + "loss": 0.0051, + "step": 56570 + }, + { + "epoch": 0.36288654504173706, + "grad_norm": 0.19096291065216064, + "learning_rate": 9.799312822420554e-06, + "loss": 0.0029, + "step": 56580 + }, + { + "epoch": 0.36295068193552316, + "grad_norm": 0.11202064901590347, + "learning_rate": 9.799155811910555e-06, + "loss": 0.0026, + "step": 56590 + }, + { + "epoch": 0.3630148188293093, + "grad_norm": 0.21112869679927826, + "learning_rate": 9.798998741263703e-06, + "loss": 0.0034, + "step": 56600 + }, + { + "epoch": 0.3630789557230954, + "grad_norm": 0.17619720101356506, + "learning_rate": 9.798841610481967e-06, + "loss": 0.0047, + "step": 56610 + }, + { + "epoch": 0.3631430926168815, + "grad_norm": 0.18076972663402557, + "learning_rate": 9.798684419567314e-06, + "loss": 0.0042, + "step": 56620 + }, + { + "epoch": 0.3632072295106676, + "grad_norm": 0.19960550963878632, + "learning_rate": 9.79852716852172e-06, + "loss": 0.0036, + "step": 56630 + }, + { + "epoch": 0.3632713664044537, + "grad_norm": 0.2491637021303177, + "learning_rate": 9.798369857347147e-06, + "loss": 0.0053, + "step": 56640 + }, + { + "epoch": 0.36333550329823977, + "grad_norm": 0.12342242896556854, + "learning_rate": 9.79821248604557e-06, + "loss": 0.0059, + "step": 56650 + }, + { + "epoch": 0.36339964019202586, + "grad_norm": 0.0697750374674797, + "learning_rate": 9.798055054618965e-06, + "loss": 0.0032, + "step": 56660 + }, + { + "epoch": 0.36346377708581196, + "grad_norm": 0.19258704781532288, + "learning_rate": 9.797897563069299e-06, + "loss": 0.0044, + "step": 56670 + }, + { + "epoch": 0.36352791397959805, + "grad_norm": 0.14493019878864288, + "learning_rate": 9.797740011398547e-06, + "loss": 0.003, + "step": 56680 + }, + { + "epoch": 0.36359205087338414, + "grad_norm": 0.261229544878006, + "learning_rate": 9.797582399608686e-06, + "loss": 0.0036, + "step": 56690 + }, + { + "epoch": 0.36365618776717024, + "grad_norm": 0.08727063238620758, + "learning_rate": 9.797424727701685e-06, + "loss": 0.0038, + "step": 56700 + }, + { + "epoch": 0.36372032466095633, + "grad_norm": 0.17275263369083405, + "learning_rate": 9.797266995679527e-06, + "loss": 0.0052, + "step": 56710 + }, + { + "epoch": 0.3637844615547424, + "grad_norm": 0.6614558696746826, + "learning_rate": 9.797109203544183e-06, + "loss": 0.0043, + "step": 56720 + }, + { + "epoch": 0.3638485984485285, + "grad_norm": 0.20420671999454498, + "learning_rate": 9.796951351297632e-06, + "loss": 0.0069, + "step": 56730 + }, + { + "epoch": 0.36391273534231466, + "grad_norm": 0.14583484828472137, + "learning_rate": 9.796793438941853e-06, + "loss": 0.0039, + "step": 56740 + }, + { + "epoch": 0.36397687223610076, + "grad_norm": 0.3670613467693329, + "learning_rate": 9.796635466478824e-06, + "loss": 0.0042, + "step": 56750 + }, + { + "epoch": 0.36404100912988685, + "grad_norm": 0.2378796488046646, + "learning_rate": 9.796477433910526e-06, + "loss": 0.0049, + "step": 56760 + }, + { + "epoch": 0.36410514602367294, + "grad_norm": 0.24503864347934723, + "learning_rate": 9.796319341238936e-06, + "loss": 0.0028, + "step": 56770 + }, + { + "epoch": 0.36416928291745904, + "grad_norm": 0.1846599280834198, + "learning_rate": 9.796161188466036e-06, + "loss": 0.0026, + "step": 56780 + }, + { + "epoch": 0.36423341981124513, + "grad_norm": 0.28431859612464905, + "learning_rate": 9.79600297559381e-06, + "loss": 0.0045, + "step": 56790 + }, + { + "epoch": 0.3642975567050312, + "grad_norm": 0.14934979379177094, + "learning_rate": 9.795844702624239e-06, + "loss": 0.0049, + "step": 56800 + }, + { + "epoch": 0.3643616935988173, + "grad_norm": 0.03336900845170021, + "learning_rate": 9.795686369559304e-06, + "loss": 0.0045, + "step": 56810 + }, + { + "epoch": 0.3644258304926034, + "grad_norm": 0.054301392287015915, + "learning_rate": 9.795527976400996e-06, + "loss": 0.0026, + "step": 56820 + }, + { + "epoch": 0.3644899673863895, + "grad_norm": 0.08391859382390976, + "learning_rate": 9.795369523151292e-06, + "loss": 0.0047, + "step": 56830 + }, + { + "epoch": 0.3645541042801756, + "grad_norm": 0.16880223155021667, + "learning_rate": 9.795211009812181e-06, + "loss": 0.0046, + "step": 56840 + }, + { + "epoch": 0.3646182411739617, + "grad_norm": 0.16764570772647858, + "learning_rate": 9.79505243638565e-06, + "loss": 0.0053, + "step": 56850 + }, + { + "epoch": 0.3646823780677478, + "grad_norm": 0.17726831138134003, + "learning_rate": 9.794893802873683e-06, + "loss": 0.0038, + "step": 56860 + }, + { + "epoch": 0.3647465149615339, + "grad_norm": 0.09660816192626953, + "learning_rate": 9.794735109278272e-06, + "loss": 0.0048, + "step": 56870 + }, + { + "epoch": 0.36481065185531997, + "grad_norm": 0.14028716087341309, + "learning_rate": 9.794576355601401e-06, + "loss": 0.0028, + "step": 56880 + }, + { + "epoch": 0.3648747887491061, + "grad_norm": 0.21155264973640442, + "learning_rate": 9.794417541845064e-06, + "loss": 0.0054, + "step": 56890 + }, + { + "epoch": 0.3649389256428922, + "grad_norm": 0.1422126740217209, + "learning_rate": 9.794258668011247e-06, + "loss": 0.0045, + "step": 56900 + }, + { + "epoch": 0.3650030625366783, + "grad_norm": 0.11498111486434937, + "learning_rate": 9.794099734101943e-06, + "loss": 0.0036, + "step": 56910 + }, + { + "epoch": 0.3650671994304644, + "grad_norm": 0.32154351472854614, + "learning_rate": 9.793940740119143e-06, + "loss": 0.0067, + "step": 56920 + }, + { + "epoch": 0.3651313363242505, + "grad_norm": 0.16818641126155853, + "learning_rate": 9.793781686064841e-06, + "loss": 0.0036, + "step": 56930 + }, + { + "epoch": 0.3651954732180366, + "grad_norm": 0.1259140521287918, + "learning_rate": 9.793622571941026e-06, + "loss": 0.0053, + "step": 56940 + }, + { + "epoch": 0.3652596101118227, + "grad_norm": 0.20569679141044617, + "learning_rate": 9.793463397749695e-06, + "loss": 0.0057, + "step": 56950 + }, + { + "epoch": 0.36532374700560877, + "grad_norm": 0.09647531062364578, + "learning_rate": 9.793304163492843e-06, + "loss": 0.0035, + "step": 56960 + }, + { + "epoch": 0.36538788389939486, + "grad_norm": 0.2747953236103058, + "learning_rate": 9.793144869172462e-06, + "loss": 0.0041, + "step": 56970 + }, + { + "epoch": 0.36545202079318095, + "grad_norm": 0.26019948720932007, + "learning_rate": 9.792985514790552e-06, + "loss": 0.0041, + "step": 56980 + }, + { + "epoch": 0.36551615768696705, + "grad_norm": 0.08870185166597366, + "learning_rate": 9.792826100349106e-06, + "loss": 0.0019, + "step": 56990 + }, + { + "epoch": 0.36558029458075314, + "grad_norm": 0.16642318665981293, + "learning_rate": 9.792666625850126e-06, + "loss": 0.0043, + "step": 57000 + }, + { + "epoch": 0.36564443147453923, + "grad_norm": 0.36684736609458923, + "learning_rate": 9.792507091295607e-06, + "loss": 0.0028, + "step": 57010 + }, + { + "epoch": 0.3657085683683253, + "grad_norm": 0.21510466933250427, + "learning_rate": 9.792347496687548e-06, + "loss": 0.0071, + "step": 57020 + }, + { + "epoch": 0.3657727052621115, + "grad_norm": 0.274148166179657, + "learning_rate": 9.79218784202795e-06, + "loss": 0.005, + "step": 57030 + }, + { + "epoch": 0.36583684215589757, + "grad_norm": 0.10369301587343216, + "learning_rate": 9.792028127318815e-06, + "loss": 0.0035, + "step": 57040 + }, + { + "epoch": 0.36590097904968366, + "grad_norm": 0.2976493537425995, + "learning_rate": 9.79186835256214e-06, + "loss": 0.0029, + "step": 57050 + }, + { + "epoch": 0.36596511594346975, + "grad_norm": 0.05087069049477577, + "learning_rate": 9.791708517759933e-06, + "loss": 0.0044, + "step": 57060 + }, + { + "epoch": 0.36602925283725585, + "grad_norm": 0.12944692373275757, + "learning_rate": 9.791548622914191e-06, + "loss": 0.0067, + "step": 57070 + }, + { + "epoch": 0.36609338973104194, + "grad_norm": 0.13219457864761353, + "learning_rate": 9.791388668026923e-06, + "loss": 0.0027, + "step": 57080 + }, + { + "epoch": 0.36615752662482803, + "grad_norm": 0.15380585193634033, + "learning_rate": 9.79122865310013e-06, + "loss": 0.0056, + "step": 57090 + }, + { + "epoch": 0.3662216635186141, + "grad_norm": 0.10875172913074493, + "learning_rate": 9.791068578135817e-06, + "loss": 0.0029, + "step": 57100 + }, + { + "epoch": 0.3662858004124002, + "grad_norm": 0.28335443139076233, + "learning_rate": 9.79090844313599e-06, + "loss": 0.0049, + "step": 57110 + }, + { + "epoch": 0.3663499373061863, + "grad_norm": 0.4130868911743164, + "learning_rate": 9.790748248102654e-06, + "loss": 0.005, + "step": 57120 + }, + { + "epoch": 0.3664140741999724, + "grad_norm": 0.05378401651978493, + "learning_rate": 9.790587993037824e-06, + "loss": 0.0034, + "step": 57130 + }, + { + "epoch": 0.3664782110937585, + "grad_norm": 0.14871534705162048, + "learning_rate": 9.7904276779435e-06, + "loss": 0.0056, + "step": 57140 + }, + { + "epoch": 0.3665423479875446, + "grad_norm": 0.19948628544807434, + "learning_rate": 9.790267302821692e-06, + "loss": 0.0038, + "step": 57150 + }, + { + "epoch": 0.3666064848813307, + "grad_norm": 0.21524202823638916, + "learning_rate": 9.790106867674415e-06, + "loss": 0.0052, + "step": 57160 + }, + { + "epoch": 0.36667062177511683, + "grad_norm": 0.2842295467853546, + "learning_rate": 9.789946372503672e-06, + "loss": 0.0041, + "step": 57170 + }, + { + "epoch": 0.3667347586689029, + "grad_norm": 0.1311556100845337, + "learning_rate": 9.789785817311477e-06, + "loss": 0.0065, + "step": 57180 + }, + { + "epoch": 0.366798895562689, + "grad_norm": 0.5092558860778809, + "learning_rate": 9.789625202099846e-06, + "loss": 0.0046, + "step": 57190 + }, + { + "epoch": 0.3668630324564751, + "grad_norm": 0.13105471432209015, + "learning_rate": 9.789464526870785e-06, + "loss": 0.0053, + "step": 57200 + }, + { + "epoch": 0.3669271693502612, + "grad_norm": 0.17737802863121033, + "learning_rate": 9.789303791626313e-06, + "loss": 0.0037, + "step": 57210 + }, + { + "epoch": 0.3669913062440473, + "grad_norm": 0.2169865518808365, + "learning_rate": 9.78914299636844e-06, + "loss": 0.0039, + "step": 57220 + }, + { + "epoch": 0.3670554431378334, + "grad_norm": 0.5936599969863892, + "learning_rate": 9.788982141099182e-06, + "loss": 0.0076, + "step": 57230 + }, + { + "epoch": 0.3671195800316195, + "grad_norm": 0.2433180958032608, + "learning_rate": 9.788821225820557e-06, + "loss": 0.0031, + "step": 57240 + }, + { + "epoch": 0.3671837169254056, + "grad_norm": 0.11742263287305832, + "learning_rate": 9.788660250534579e-06, + "loss": 0.0044, + "step": 57250 + }, + { + "epoch": 0.36724785381919167, + "grad_norm": 0.11817143857479095, + "learning_rate": 9.788499215243264e-06, + "loss": 0.0061, + "step": 57260 + }, + { + "epoch": 0.36731199071297777, + "grad_norm": 0.08936230838298798, + "learning_rate": 9.788338119948634e-06, + "loss": 0.005, + "step": 57270 + }, + { + "epoch": 0.36737612760676386, + "grad_norm": 0.25392237305641174, + "learning_rate": 9.788176964652703e-06, + "loss": 0.009, + "step": 57280 + }, + { + "epoch": 0.36744026450054995, + "grad_norm": 0.14953184127807617, + "learning_rate": 9.788015749357493e-06, + "loss": 0.0032, + "step": 57290 + }, + { + "epoch": 0.36750440139433604, + "grad_norm": 1.2418384552001953, + "learning_rate": 9.787854474065026e-06, + "loss": 0.0023, + "step": 57300 + }, + { + "epoch": 0.3675685382881222, + "grad_norm": 0.039404936134815216, + "learning_rate": 9.78769313877732e-06, + "loss": 0.0069, + "step": 57310 + }, + { + "epoch": 0.3676326751819083, + "grad_norm": 0.16414855420589447, + "learning_rate": 9.787531743496394e-06, + "loss": 0.0085, + "step": 57320 + }, + { + "epoch": 0.3676968120756944, + "grad_norm": 0.20494332909584045, + "learning_rate": 9.787370288224277e-06, + "loss": 0.0107, + "step": 57330 + }, + { + "epoch": 0.3677609489694805, + "grad_norm": 0.07614117115736008, + "learning_rate": 9.787208772962988e-06, + "loss": 0.0047, + "step": 57340 + }, + { + "epoch": 0.36782508586326657, + "grad_norm": 0.03945811837911606, + "learning_rate": 9.787047197714549e-06, + "loss": 0.0083, + "step": 57350 + }, + { + "epoch": 0.36788922275705266, + "grad_norm": 0.04837341606616974, + "learning_rate": 9.78688556248099e-06, + "loss": 0.0029, + "step": 57360 + }, + { + "epoch": 0.36795335965083875, + "grad_norm": 0.19262497127056122, + "learning_rate": 9.786723867264335e-06, + "loss": 0.0054, + "step": 57370 + }, + { + "epoch": 0.36801749654462484, + "grad_norm": 0.7808809280395508, + "learning_rate": 9.786562112066606e-06, + "loss": 0.0063, + "step": 57380 + }, + { + "epoch": 0.36808163343841094, + "grad_norm": 0.21556198596954346, + "learning_rate": 9.786400296889835e-06, + "loss": 0.0043, + "step": 57390 + }, + { + "epoch": 0.36814577033219703, + "grad_norm": 0.12813107669353485, + "learning_rate": 9.786238421736048e-06, + "loss": 0.0044, + "step": 57400 + }, + { + "epoch": 0.3682099072259831, + "grad_norm": 0.14205877482891083, + "learning_rate": 9.78607648660727e-06, + "loss": 0.0051, + "step": 57410 + }, + { + "epoch": 0.3682740441197692, + "grad_norm": 0.12445349991321564, + "learning_rate": 9.785914491505535e-06, + "loss": 0.0047, + "step": 57420 + }, + { + "epoch": 0.3683381810135553, + "grad_norm": 0.22566372156143188, + "learning_rate": 9.785752436432871e-06, + "loss": 0.0045, + "step": 57430 + }, + { + "epoch": 0.3684023179073414, + "grad_norm": 0.1090468168258667, + "learning_rate": 9.785590321391308e-06, + "loss": 0.0045, + "step": 57440 + }, + { + "epoch": 0.36846645480112755, + "grad_norm": 0.22308801114559174, + "learning_rate": 9.785428146382878e-06, + "loss": 0.0058, + "step": 57450 + }, + { + "epoch": 0.36853059169491365, + "grad_norm": 0.2822139859199524, + "learning_rate": 9.785265911409613e-06, + "loss": 0.0028, + "step": 57460 + }, + { + "epoch": 0.36859472858869974, + "grad_norm": 0.11054620891809464, + "learning_rate": 9.785103616473547e-06, + "loss": 0.0039, + "step": 57470 + }, + { + "epoch": 0.36865886548248583, + "grad_norm": 0.17774257063865662, + "learning_rate": 9.784941261576714e-06, + "loss": 0.0057, + "step": 57480 + }, + { + "epoch": 0.3687230023762719, + "grad_norm": 0.09684696048498154, + "learning_rate": 9.784778846721146e-06, + "loss": 0.0036, + "step": 57490 + }, + { + "epoch": 0.368787139270058, + "grad_norm": 0.15395021438598633, + "learning_rate": 9.784616371908879e-06, + "loss": 0.0019, + "step": 57500 + }, + { + "epoch": 0.3688512761638441, + "grad_norm": 0.05425999313592911, + "learning_rate": 9.78445383714195e-06, + "loss": 0.0041, + "step": 57510 + }, + { + "epoch": 0.3689154130576302, + "grad_norm": 0.24702796339988708, + "learning_rate": 9.784291242422394e-06, + "loss": 0.0051, + "step": 57520 + }, + { + "epoch": 0.3689795499514163, + "grad_norm": 0.13330377638339996, + "learning_rate": 9.78412858775225e-06, + "loss": 0.0024, + "step": 57530 + }, + { + "epoch": 0.3690436868452024, + "grad_norm": 0.22147420048713684, + "learning_rate": 9.783965873133557e-06, + "loss": 0.006, + "step": 57540 + }, + { + "epoch": 0.3691078237389885, + "grad_norm": 0.25603482127189636, + "learning_rate": 9.78380309856835e-06, + "loss": 0.0021, + "step": 57550 + }, + { + "epoch": 0.3691719606327746, + "grad_norm": 0.14216257631778717, + "learning_rate": 9.783640264058674e-06, + "loss": 0.0043, + "step": 57560 + }, + { + "epoch": 0.36923609752656067, + "grad_norm": 0.16568593680858612, + "learning_rate": 9.783477369606565e-06, + "loss": 0.0032, + "step": 57570 + }, + { + "epoch": 0.36930023442034676, + "grad_norm": 0.09964045882225037, + "learning_rate": 9.783314415214065e-06, + "loss": 0.0049, + "step": 57580 + }, + { + "epoch": 0.3693643713141329, + "grad_norm": 0.2552603781223297, + "learning_rate": 9.783151400883217e-06, + "loss": 0.0062, + "step": 57590 + }, + { + "epoch": 0.369428508207919, + "grad_norm": 0.04480994865298271, + "learning_rate": 9.782988326616065e-06, + "loss": 0.0032, + "step": 57600 + }, + { + "epoch": 0.3694926451017051, + "grad_norm": 0.11393241584300995, + "learning_rate": 9.782825192414651e-06, + "loss": 0.0023, + "step": 57610 + }, + { + "epoch": 0.3695567819954912, + "grad_norm": 0.3635132610797882, + "learning_rate": 9.782661998281018e-06, + "loss": 0.0062, + "step": 57620 + }, + { + "epoch": 0.3696209188892773, + "grad_norm": 0.3223370909690857, + "learning_rate": 9.782498744217213e-06, + "loss": 0.0057, + "step": 57630 + }, + { + "epoch": 0.3696850557830634, + "grad_norm": 0.27565741539001465, + "learning_rate": 9.78233543022528e-06, + "loss": 0.0043, + "step": 57640 + }, + { + "epoch": 0.36974919267684947, + "grad_norm": 0.055533505976200104, + "learning_rate": 9.782172056307267e-06, + "loss": 0.0035, + "step": 57650 + }, + { + "epoch": 0.36981332957063556, + "grad_norm": 0.09045581519603729, + "learning_rate": 9.782008622465218e-06, + "loss": 0.0048, + "step": 57660 + }, + { + "epoch": 0.36987746646442166, + "grad_norm": 0.04754919931292534, + "learning_rate": 9.781845128701186e-06, + "loss": 0.0038, + "step": 57670 + }, + { + "epoch": 0.36994160335820775, + "grad_norm": 0.14638595283031464, + "learning_rate": 9.781681575017216e-06, + "loss": 0.0041, + "step": 57680 + }, + { + "epoch": 0.37000574025199384, + "grad_norm": 0.1129072830080986, + "learning_rate": 9.781517961415358e-06, + "loss": 0.0045, + "step": 57690 + }, + { + "epoch": 0.37006987714577994, + "grad_norm": 0.07457966357469559, + "learning_rate": 9.781354287897663e-06, + "loss": 0.005, + "step": 57700 + }, + { + "epoch": 0.37013401403956603, + "grad_norm": 0.2532108426094055, + "learning_rate": 9.781190554466183e-06, + "loss": 0.0053, + "step": 57710 + }, + { + "epoch": 0.3701981509333521, + "grad_norm": 0.07001078128814697, + "learning_rate": 9.781026761122965e-06, + "loss": 0.005, + "step": 57720 + }, + { + "epoch": 0.37026228782713827, + "grad_norm": 0.04986302927136421, + "learning_rate": 9.780862907870068e-06, + "loss": 0.0037, + "step": 57730 + }, + { + "epoch": 0.37032642472092436, + "grad_norm": 0.04155554622411728, + "learning_rate": 9.78069899470954e-06, + "loss": 0.0058, + "step": 57740 + }, + { + "epoch": 0.37039056161471046, + "grad_norm": 0.10352253913879395, + "learning_rate": 9.780535021643438e-06, + "loss": 0.0038, + "step": 57750 + }, + { + "epoch": 0.37045469850849655, + "grad_norm": 0.5587517023086548, + "learning_rate": 9.780370988673815e-06, + "loss": 0.0051, + "step": 57760 + }, + { + "epoch": 0.37051883540228264, + "grad_norm": 0.24137362837791443, + "learning_rate": 9.780206895802726e-06, + "loss": 0.006, + "step": 57770 + }, + { + "epoch": 0.37058297229606874, + "grad_norm": 0.16731369495391846, + "learning_rate": 9.78004274303223e-06, + "loss": 0.004, + "step": 57780 + }, + { + "epoch": 0.37064710918985483, + "grad_norm": 0.19096648693084717, + "learning_rate": 9.779878530364382e-06, + "loss": 0.0029, + "step": 57790 + }, + { + "epoch": 0.3707112460836409, + "grad_norm": 0.19307313859462738, + "learning_rate": 9.779714257801239e-06, + "loss": 0.0208, + "step": 57800 + }, + { + "epoch": 0.370775382977427, + "grad_norm": 0.19411031901836395, + "learning_rate": 9.779549925344861e-06, + "loss": 0.0047, + "step": 57810 + }, + { + "epoch": 0.3708395198712131, + "grad_norm": 0.3477558493614197, + "learning_rate": 9.779385532997305e-06, + "loss": 0.0031, + "step": 57820 + }, + { + "epoch": 0.3709036567649992, + "grad_norm": 0.18182215094566345, + "learning_rate": 9.779221080760634e-06, + "loss": 0.0047, + "step": 57830 + }, + { + "epoch": 0.3709677936587853, + "grad_norm": 0.25277140736579895, + "learning_rate": 9.779056568636908e-06, + "loss": 0.0103, + "step": 57840 + }, + { + "epoch": 0.3710319305525714, + "grad_norm": 0.09088609367609024, + "learning_rate": 9.778891996628186e-06, + "loss": 0.0056, + "step": 57850 + }, + { + "epoch": 0.3710960674463575, + "grad_norm": 0.2450553923845291, + "learning_rate": 9.778727364736533e-06, + "loss": 0.0051, + "step": 57860 + }, + { + "epoch": 0.37116020434014363, + "grad_norm": 0.23536743223667145, + "learning_rate": 9.77856267296401e-06, + "loss": 0.0034, + "step": 57870 + }, + { + "epoch": 0.3712243412339297, + "grad_norm": 0.12124660611152649, + "learning_rate": 9.778397921312683e-06, + "loss": 0.0055, + "step": 57880 + }, + { + "epoch": 0.3712884781277158, + "grad_norm": 0.08905904740095139, + "learning_rate": 9.778233109784614e-06, + "loss": 0.0028, + "step": 57890 + }, + { + "epoch": 0.3713526150215019, + "grad_norm": 0.1720379889011383, + "learning_rate": 9.778068238381868e-06, + "loss": 0.004, + "step": 57900 + }, + { + "epoch": 0.371416751915288, + "grad_norm": 0.295754611492157, + "learning_rate": 9.777903307106513e-06, + "loss": 0.0046, + "step": 57910 + }, + { + "epoch": 0.3714808888090741, + "grad_norm": 0.1361151933670044, + "learning_rate": 9.777738315960615e-06, + "loss": 0.0034, + "step": 57920 + }, + { + "epoch": 0.3715450257028602, + "grad_norm": 0.14360782504081726, + "learning_rate": 9.777573264946241e-06, + "loss": 0.0029, + "step": 57930 + }, + { + "epoch": 0.3716091625966463, + "grad_norm": 0.0977223590016365, + "learning_rate": 9.77740815406546e-06, + "loss": 0.0038, + "step": 57940 + }, + { + "epoch": 0.3716732994904324, + "grad_norm": 0.05808310583233833, + "learning_rate": 9.77724298332034e-06, + "loss": 0.0024, + "step": 57950 + }, + { + "epoch": 0.37173743638421847, + "grad_norm": 0.04208715260028839, + "learning_rate": 9.777077752712952e-06, + "loss": 0.0031, + "step": 57960 + }, + { + "epoch": 0.37180157327800456, + "grad_norm": 0.4183732867240906, + "learning_rate": 9.776912462245365e-06, + "loss": 0.0071, + "step": 57970 + }, + { + "epoch": 0.37186571017179065, + "grad_norm": 0.19289630651474, + "learning_rate": 9.776747111919652e-06, + "loss": 0.0062, + "step": 57980 + }, + { + "epoch": 0.37192984706557675, + "grad_norm": 0.11601032316684723, + "learning_rate": 9.776581701737881e-06, + "loss": 0.014, + "step": 57990 + }, + { + "epoch": 0.37199398395936284, + "grad_norm": 0.09035704284906387, + "learning_rate": 9.776416231702131e-06, + "loss": 0.0055, + "step": 58000 + }, + { + "epoch": 0.372058120853149, + "grad_norm": 0.12546135485172272, + "learning_rate": 9.77625070181447e-06, + "loss": 0.0062, + "step": 58010 + }, + { + "epoch": 0.3721222577469351, + "grad_norm": 0.2818664014339447, + "learning_rate": 9.776085112076974e-06, + "loss": 0.0038, + "step": 58020 + }, + { + "epoch": 0.3721863946407212, + "grad_norm": 0.11678871512413025, + "learning_rate": 9.775919462491718e-06, + "loss": 0.0074, + "step": 58030 + }, + { + "epoch": 0.37225053153450727, + "grad_norm": 0.2236698418855667, + "learning_rate": 9.77575375306078e-06, + "loss": 0.0058, + "step": 58040 + }, + { + "epoch": 0.37231466842829336, + "grad_norm": 0.20366476476192474, + "learning_rate": 9.775587983786231e-06, + "loss": 0.0033, + "step": 58050 + }, + { + "epoch": 0.37237880532207945, + "grad_norm": 0.15319477021694183, + "learning_rate": 9.775422154670154e-06, + "loss": 0.0043, + "step": 58060 + }, + { + "epoch": 0.37244294221586555, + "grad_norm": 0.09600375592708588, + "learning_rate": 9.775256265714623e-06, + "loss": 0.0035, + "step": 58070 + }, + { + "epoch": 0.37250707910965164, + "grad_norm": 0.17463268339633942, + "learning_rate": 9.775090316921717e-06, + "loss": 0.0041, + "step": 58080 + }, + { + "epoch": 0.37257121600343773, + "grad_norm": 0.12594033777713776, + "learning_rate": 9.77492430829352e-06, + "loss": 0.0053, + "step": 58090 + }, + { + "epoch": 0.3726353528972238, + "grad_norm": 0.18876934051513672, + "learning_rate": 9.774758239832107e-06, + "loss": 0.0042, + "step": 58100 + }, + { + "epoch": 0.3726994897910099, + "grad_norm": 0.31719061732292175, + "learning_rate": 9.774592111539561e-06, + "loss": 0.0042, + "step": 58110 + }, + { + "epoch": 0.372763626684796, + "grad_norm": 0.21381841599941254, + "learning_rate": 9.774425923417964e-06, + "loss": 0.0039, + "step": 58120 + }, + { + "epoch": 0.3728277635785821, + "grad_norm": 0.05288473144173622, + "learning_rate": 9.774259675469397e-06, + "loss": 0.0036, + "step": 58130 + }, + { + "epoch": 0.3728919004723682, + "grad_norm": 0.1426960974931717, + "learning_rate": 9.774093367695945e-06, + "loss": 0.0025, + "step": 58140 + }, + { + "epoch": 0.37295603736615435, + "grad_norm": 0.2808662950992584, + "learning_rate": 9.77392700009969e-06, + "loss": 0.0046, + "step": 58150 + }, + { + "epoch": 0.37302017425994044, + "grad_norm": 0.19471795856952667, + "learning_rate": 9.77376057268272e-06, + "loss": 0.0038, + "step": 58160 + }, + { + "epoch": 0.37308431115372653, + "grad_norm": 0.11051013320684433, + "learning_rate": 9.773594085447116e-06, + "loss": 0.005, + "step": 58170 + }, + { + "epoch": 0.3731484480475126, + "grad_norm": 0.08199208229780197, + "learning_rate": 9.773427538394967e-06, + "loss": 0.0045, + "step": 58180 + }, + { + "epoch": 0.3732125849412987, + "grad_norm": 0.0872143879532814, + "learning_rate": 9.77326093152836e-06, + "loss": 0.0043, + "step": 58190 + }, + { + "epoch": 0.3732767218350848, + "grad_norm": 0.2271701693534851, + "learning_rate": 9.773094264849381e-06, + "loss": 0.004, + "step": 58200 + }, + { + "epoch": 0.3733408587288709, + "grad_norm": 0.07518661767244339, + "learning_rate": 9.772927538360121e-06, + "loss": 0.0083, + "step": 58210 + }, + { + "epoch": 0.373404995622657, + "grad_norm": 0.3071039319038391, + "learning_rate": 9.772760752062666e-06, + "loss": 0.0035, + "step": 58220 + }, + { + "epoch": 0.3734691325164431, + "grad_norm": 0.2126374989748001, + "learning_rate": 9.77259390595911e-06, + "loss": 0.0029, + "step": 58230 + }, + { + "epoch": 0.3735332694102292, + "grad_norm": 0.3216870427131653, + "learning_rate": 9.77242700005154e-06, + "loss": 0.0044, + "step": 58240 + }, + { + "epoch": 0.3735974063040153, + "grad_norm": 0.2805640995502472, + "learning_rate": 9.77226003434205e-06, + "loss": 0.0042, + "step": 58250 + }, + { + "epoch": 0.37366154319780137, + "grad_norm": 0.18705041706562042, + "learning_rate": 9.77209300883273e-06, + "loss": 0.004, + "step": 58260 + }, + { + "epoch": 0.37372568009158746, + "grad_norm": 0.4507295489311218, + "learning_rate": 9.771925923525674e-06, + "loss": 0.0069, + "step": 58270 + }, + { + "epoch": 0.37378981698537356, + "grad_norm": 0.3732795715332031, + "learning_rate": 9.771758778422977e-06, + "loss": 0.0039, + "step": 58280 + }, + { + "epoch": 0.3738539538791597, + "grad_norm": 0.25247877836227417, + "learning_rate": 9.77159157352673e-06, + "loss": 0.0051, + "step": 58290 + }, + { + "epoch": 0.3739180907729458, + "grad_norm": 0.01693500392138958, + "learning_rate": 9.771424308839033e-06, + "loss": 0.003, + "step": 58300 + }, + { + "epoch": 0.3739822276667319, + "grad_norm": 0.028985632583498955, + "learning_rate": 9.771256984361978e-06, + "loss": 0.0041, + "step": 58310 + }, + { + "epoch": 0.374046364560518, + "grad_norm": 0.3382354974746704, + "learning_rate": 9.771089600097663e-06, + "loss": 0.0068, + "step": 58320 + }, + { + "epoch": 0.3741105014543041, + "grad_norm": 0.17548677325248718, + "learning_rate": 9.770922156048185e-06, + "loss": 0.0058, + "step": 58330 + }, + { + "epoch": 0.37417463834809017, + "grad_norm": 0.4082449972629547, + "learning_rate": 9.770754652215644e-06, + "loss": 0.0078, + "step": 58340 + }, + { + "epoch": 0.37423877524187626, + "grad_norm": 0.14011430740356445, + "learning_rate": 9.770587088602135e-06, + "loss": 0.0047, + "step": 58350 + }, + { + "epoch": 0.37430291213566236, + "grad_norm": 0.07062096893787384, + "learning_rate": 9.770419465209762e-06, + "loss": 0.0037, + "step": 58360 + }, + { + "epoch": 0.37436704902944845, + "grad_norm": 0.1349411904811859, + "learning_rate": 9.770251782040625e-06, + "loss": 0.004, + "step": 58370 + }, + { + "epoch": 0.37443118592323454, + "grad_norm": 0.16179393231868744, + "learning_rate": 9.770084039096823e-06, + "loss": 0.0093, + "step": 58380 + }, + { + "epoch": 0.37449532281702064, + "grad_norm": 0.3049332797527313, + "learning_rate": 9.769916236380458e-06, + "loss": 0.0033, + "step": 58390 + }, + { + "epoch": 0.37455945971080673, + "grad_norm": 0.22818659245967865, + "learning_rate": 9.769748373893633e-06, + "loss": 0.0035, + "step": 58400 + }, + { + "epoch": 0.3746235966045928, + "grad_norm": 0.09892981499433517, + "learning_rate": 9.769580451638454e-06, + "loss": 0.0037, + "step": 58410 + }, + { + "epoch": 0.3746877334983789, + "grad_norm": 0.15319883823394775, + "learning_rate": 9.769412469617022e-06, + "loss": 0.0033, + "step": 58420 + }, + { + "epoch": 0.374751870392165, + "grad_norm": 0.14722667634487152, + "learning_rate": 9.769244427831444e-06, + "loss": 0.0032, + "step": 58430 + }, + { + "epoch": 0.37481600728595116, + "grad_norm": 0.6439252495765686, + "learning_rate": 9.769076326283824e-06, + "loss": 0.0034, + "step": 58440 + }, + { + "epoch": 0.37488014417973725, + "grad_norm": 0.16144661605358124, + "learning_rate": 9.768908164976269e-06, + "loss": 0.0037, + "step": 58450 + }, + { + "epoch": 0.37494428107352334, + "grad_norm": 0.10690446943044662, + "learning_rate": 9.768739943910887e-06, + "loss": 0.0047, + "step": 58460 + }, + { + "epoch": 0.37500841796730944, + "grad_norm": 0.19510377943515778, + "learning_rate": 9.768571663089786e-06, + "loss": 0.0055, + "step": 58470 + }, + { + "epoch": 0.37507255486109553, + "grad_norm": 0.5048791170120239, + "learning_rate": 9.768403322515072e-06, + "loss": 0.005, + "step": 58480 + }, + { + "epoch": 0.3751366917548816, + "grad_norm": 0.20897120237350464, + "learning_rate": 9.768234922188858e-06, + "loss": 0.0061, + "step": 58490 + }, + { + "epoch": 0.3752008286486677, + "grad_norm": 0.14457884430885315, + "learning_rate": 9.768066462113252e-06, + "loss": 0.0051, + "step": 58500 + }, + { + "epoch": 0.3752649655424538, + "grad_norm": 0.15007531642913818, + "learning_rate": 9.767897942290366e-06, + "loss": 0.003, + "step": 58510 + }, + { + "epoch": 0.3753291024362399, + "grad_norm": 0.07479389756917953, + "learning_rate": 9.76772936272231e-06, + "loss": 0.0024, + "step": 58520 + }, + { + "epoch": 0.375393239330026, + "grad_norm": 0.20837247371673584, + "learning_rate": 9.767560723411199e-06, + "loss": 0.0067, + "step": 58530 + }, + { + "epoch": 0.3754573762238121, + "grad_norm": 0.22486932575702667, + "learning_rate": 9.767392024359145e-06, + "loss": 0.0036, + "step": 58540 + }, + { + "epoch": 0.3755215131175982, + "grad_norm": 0.03745689615607262, + "learning_rate": 9.767223265568261e-06, + "loss": 0.0024, + "step": 58550 + }, + { + "epoch": 0.3755856500113843, + "grad_norm": 0.308506041765213, + "learning_rate": 9.767054447040663e-06, + "loss": 0.0042, + "step": 58560 + }, + { + "epoch": 0.37564978690517037, + "grad_norm": 0.06810969114303589, + "learning_rate": 9.766885568778465e-06, + "loss": 0.0048, + "step": 58570 + }, + { + "epoch": 0.3757139237989565, + "grad_norm": 0.13696783781051636, + "learning_rate": 9.766716630783784e-06, + "loss": 0.0053, + "step": 58580 + }, + { + "epoch": 0.3757780606927426, + "grad_norm": 0.11493898183107376, + "learning_rate": 9.766547633058737e-06, + "loss": 0.0062, + "step": 58590 + }, + { + "epoch": 0.3758421975865287, + "grad_norm": 0.16757753491401672, + "learning_rate": 9.766378575605442e-06, + "loss": 0.0054, + "step": 58600 + }, + { + "epoch": 0.3759063344803148, + "grad_norm": 0.4030192494392395, + "learning_rate": 9.766209458426018e-06, + "loss": 0.0033, + "step": 58610 + }, + { + "epoch": 0.3759704713741009, + "grad_norm": 0.20232348144054413, + "learning_rate": 9.766040281522581e-06, + "loss": 0.0038, + "step": 58620 + }, + { + "epoch": 0.376034608267887, + "grad_norm": 0.2894548773765564, + "learning_rate": 9.765871044897254e-06, + "loss": 0.0032, + "step": 58630 + }, + { + "epoch": 0.3760987451616731, + "grad_norm": 0.29514172673225403, + "learning_rate": 9.765701748552157e-06, + "loss": 0.0044, + "step": 58640 + }, + { + "epoch": 0.37616288205545917, + "grad_norm": 0.11390168219804764, + "learning_rate": 9.76553239248941e-06, + "loss": 0.0042, + "step": 58650 + }, + { + "epoch": 0.37622701894924526, + "grad_norm": 0.17661869525909424, + "learning_rate": 9.765362976711138e-06, + "loss": 0.0034, + "step": 58660 + }, + { + "epoch": 0.37629115584303136, + "grad_norm": 0.5084266066551208, + "learning_rate": 9.76519350121946e-06, + "loss": 0.0034, + "step": 58670 + }, + { + "epoch": 0.37635529273681745, + "grad_norm": 0.28248316049575806, + "learning_rate": 9.765023966016502e-06, + "loss": 0.0031, + "step": 58680 + }, + { + "epoch": 0.37641942963060354, + "grad_norm": 0.3346003592014313, + "learning_rate": 9.76485437110439e-06, + "loss": 0.0038, + "step": 58690 + }, + { + "epoch": 0.37648356652438963, + "grad_norm": 0.137682244181633, + "learning_rate": 9.764684716485247e-06, + "loss": 0.0058, + "step": 58700 + }, + { + "epoch": 0.3765477034181757, + "grad_norm": 0.15332180261611938, + "learning_rate": 9.764515002161198e-06, + "loss": 0.0036, + "step": 58710 + }, + { + "epoch": 0.3766118403119619, + "grad_norm": 0.1447349637746811, + "learning_rate": 9.76434522813437e-06, + "loss": 0.0043, + "step": 58720 + }, + { + "epoch": 0.37667597720574797, + "grad_norm": 0.15648894011974335, + "learning_rate": 9.764175394406894e-06, + "loss": 0.0042, + "step": 58730 + }, + { + "epoch": 0.37674011409953406, + "grad_norm": 0.08089536428451538, + "learning_rate": 9.764005500980895e-06, + "loss": 0.0078, + "step": 58740 + }, + { + "epoch": 0.37680425099332016, + "grad_norm": 0.17732544243335724, + "learning_rate": 9.763835547858502e-06, + "loss": 0.0054, + "step": 58750 + }, + { + "epoch": 0.37686838788710625, + "grad_norm": 0.24036003649234772, + "learning_rate": 9.763665535041845e-06, + "loss": 0.0048, + "step": 58760 + }, + { + "epoch": 0.37693252478089234, + "grad_norm": 0.01988343335688114, + "learning_rate": 9.763495462533053e-06, + "loss": 0.0048, + "step": 58770 + }, + { + "epoch": 0.37699666167467843, + "grad_norm": 0.42720386385917664, + "learning_rate": 9.763325330334259e-06, + "loss": 0.0052, + "step": 58780 + }, + { + "epoch": 0.37706079856846453, + "grad_norm": 0.24175818264484406, + "learning_rate": 9.763155138447593e-06, + "loss": 0.0032, + "step": 58790 + }, + { + "epoch": 0.3771249354622506, + "grad_norm": 0.16139830648899078, + "learning_rate": 9.76298488687519e-06, + "loss": 0.0072, + "step": 58800 + }, + { + "epoch": 0.3771890723560367, + "grad_norm": 0.11897553503513336, + "learning_rate": 9.762814575619184e-06, + "loss": 0.0036, + "step": 58810 + }, + { + "epoch": 0.3772532092498228, + "grad_norm": 0.06003080680966377, + "learning_rate": 9.762644204681706e-06, + "loss": 0.0053, + "step": 58820 + }, + { + "epoch": 0.3773173461436089, + "grad_norm": 0.21815739572048187, + "learning_rate": 9.762473774064894e-06, + "loss": 0.0045, + "step": 58830 + }, + { + "epoch": 0.377381483037395, + "grad_norm": 0.28276723623275757, + "learning_rate": 9.76230328377088e-06, + "loss": 0.0056, + "step": 58840 + }, + { + "epoch": 0.3774456199311811, + "grad_norm": 0.0559268556535244, + "learning_rate": 9.762132733801802e-06, + "loss": 0.0028, + "step": 58850 + }, + { + "epoch": 0.37750975682496724, + "grad_norm": 0.07177907228469849, + "learning_rate": 9.761962124159799e-06, + "loss": 0.0044, + "step": 58860 + }, + { + "epoch": 0.37757389371875333, + "grad_norm": 0.4044466018676758, + "learning_rate": 9.761791454847005e-06, + "loss": 0.0064, + "step": 58870 + }, + { + "epoch": 0.3776380306125394, + "grad_norm": 0.15512405335903168, + "learning_rate": 9.761620725865563e-06, + "loss": 0.0041, + "step": 58880 + }, + { + "epoch": 0.3777021675063255, + "grad_norm": 0.10939635336399078, + "learning_rate": 9.761449937217609e-06, + "loss": 0.0042, + "step": 58890 + }, + { + "epoch": 0.3777663044001116, + "grad_norm": 0.08103182166814804, + "learning_rate": 9.761279088905285e-06, + "loss": 0.004, + "step": 58900 + }, + { + "epoch": 0.3778304412938977, + "grad_norm": 0.2021099030971527, + "learning_rate": 9.761108180930731e-06, + "loss": 0.0048, + "step": 58910 + }, + { + "epoch": 0.3778945781876838, + "grad_norm": 0.15852302312850952, + "learning_rate": 9.760937213296089e-06, + "loss": 0.0021, + "step": 58920 + }, + { + "epoch": 0.3779587150814699, + "grad_norm": 0.2298976182937622, + "learning_rate": 9.7607661860035e-06, + "loss": 0.003, + "step": 58930 + }, + { + "epoch": 0.378022851975256, + "grad_norm": 0.16082763671875, + "learning_rate": 9.760595099055108e-06, + "loss": 0.0039, + "step": 58940 + }, + { + "epoch": 0.3780869888690421, + "grad_norm": 0.28142333030700684, + "learning_rate": 9.76042395245306e-06, + "loss": 0.0045, + "step": 58950 + }, + { + "epoch": 0.37815112576282817, + "grad_norm": 0.27675023674964905, + "learning_rate": 9.760252746199495e-06, + "loss": 0.0044, + "step": 58960 + }, + { + "epoch": 0.37821526265661426, + "grad_norm": 0.2622521221637726, + "learning_rate": 9.760081480296561e-06, + "loss": 0.0028, + "step": 58970 + }, + { + "epoch": 0.37827939955040035, + "grad_norm": 0.2026662826538086, + "learning_rate": 9.759910154746406e-06, + "loss": 0.0058, + "step": 58980 + }, + { + "epoch": 0.37834353644418645, + "grad_norm": 0.14158673584461212, + "learning_rate": 9.759738769551174e-06, + "loss": 0.0047, + "step": 58990 + }, + { + "epoch": 0.3784076733379726, + "grad_norm": 0.21854153275489807, + "learning_rate": 9.759567324713013e-06, + "loss": 0.0097, + "step": 59000 + }, + { + "epoch": 0.3784718102317587, + "grad_norm": 0.20481084287166595, + "learning_rate": 9.759395820234071e-06, + "loss": 0.0051, + "step": 59010 + }, + { + "epoch": 0.3785359471255448, + "grad_norm": 0.12422089278697968, + "learning_rate": 9.759224256116499e-06, + "loss": 0.0049, + "step": 59020 + }, + { + "epoch": 0.3786000840193309, + "grad_norm": 0.07579615712165833, + "learning_rate": 9.759052632362447e-06, + "loss": 0.0055, + "step": 59030 + }, + { + "epoch": 0.37866422091311697, + "grad_norm": 0.2826728820800781, + "learning_rate": 9.758880948974061e-06, + "loss": 0.0068, + "step": 59040 + }, + { + "epoch": 0.37872835780690306, + "grad_norm": 0.23323026299476624, + "learning_rate": 9.758709205953498e-06, + "loss": 0.0047, + "step": 59050 + }, + { + "epoch": 0.37879249470068915, + "grad_norm": 0.12119197100400925, + "learning_rate": 9.758537403302908e-06, + "loss": 0.0046, + "step": 59060 + }, + { + "epoch": 0.37885663159447525, + "grad_norm": 0.26631271839141846, + "learning_rate": 9.758365541024442e-06, + "loss": 0.0027, + "step": 59070 + }, + { + "epoch": 0.37892076848826134, + "grad_norm": 0.09156379848718643, + "learning_rate": 9.758193619120256e-06, + "loss": 0.0056, + "step": 59080 + }, + { + "epoch": 0.37898490538204743, + "grad_norm": 0.2119128257036209, + "learning_rate": 9.758021637592504e-06, + "loss": 0.0044, + "step": 59090 + }, + { + "epoch": 0.3790490422758335, + "grad_norm": 0.09771595895290375, + "learning_rate": 9.757849596443338e-06, + "loss": 0.0047, + "step": 59100 + }, + { + "epoch": 0.3791131791696196, + "grad_norm": 0.1576254814863205, + "learning_rate": 9.757677495674918e-06, + "loss": 0.0041, + "step": 59110 + }, + { + "epoch": 0.3791773160634057, + "grad_norm": 0.1455041766166687, + "learning_rate": 9.7575053352894e-06, + "loss": 0.0048, + "step": 59120 + }, + { + "epoch": 0.3792414529571918, + "grad_norm": 0.19070494174957275, + "learning_rate": 9.757333115288935e-06, + "loss": 0.0035, + "step": 59130 + }, + { + "epoch": 0.37930558985097795, + "grad_norm": 0.28895506262779236, + "learning_rate": 9.757160835675692e-06, + "loss": 0.0033, + "step": 59140 + }, + { + "epoch": 0.37936972674476405, + "grad_norm": 0.15067146718502045, + "learning_rate": 9.75698849645182e-06, + "loss": 0.0021, + "step": 59150 + }, + { + "epoch": 0.37943386363855014, + "grad_norm": 0.15311887860298157, + "learning_rate": 9.756816097619483e-06, + "loss": 0.0067, + "step": 59160 + }, + { + "epoch": 0.37949800053233623, + "grad_norm": 0.17993053793907166, + "learning_rate": 9.756643639180842e-06, + "loss": 0.0045, + "step": 59170 + }, + { + "epoch": 0.3795621374261223, + "grad_norm": 0.09240105003118515, + "learning_rate": 9.756471121138056e-06, + "loss": 0.0048, + "step": 59180 + }, + { + "epoch": 0.3796262743199084, + "grad_norm": 0.46541252732276917, + "learning_rate": 9.756298543493286e-06, + "loss": 0.003, + "step": 59190 + }, + { + "epoch": 0.3796904112136945, + "grad_norm": 0.1715819388628006, + "learning_rate": 9.756125906248698e-06, + "loss": 0.0034, + "step": 59200 + }, + { + "epoch": 0.3797545481074806, + "grad_norm": 0.4843077063560486, + "learning_rate": 9.755953209406452e-06, + "loss": 0.0063, + "step": 59210 + }, + { + "epoch": 0.3798186850012667, + "grad_norm": 0.06416106224060059, + "learning_rate": 9.755780452968715e-06, + "loss": 0.0028, + "step": 59220 + }, + { + "epoch": 0.3798828218950528, + "grad_norm": 0.12089081108570099, + "learning_rate": 9.755607636937647e-06, + "loss": 0.0037, + "step": 59230 + }, + { + "epoch": 0.3799469587888389, + "grad_norm": 0.2047852724790573, + "learning_rate": 9.755434761315418e-06, + "loss": 0.0043, + "step": 59240 + }, + { + "epoch": 0.380011095682625, + "grad_norm": 0.04684087261557579, + "learning_rate": 9.755261826104194e-06, + "loss": 0.006, + "step": 59250 + }, + { + "epoch": 0.38007523257641107, + "grad_norm": 0.1112871989607811, + "learning_rate": 9.75508883130614e-06, + "loss": 0.0046, + "step": 59260 + }, + { + "epoch": 0.38013936947019716, + "grad_norm": 0.12849275767803192, + "learning_rate": 9.754915776923425e-06, + "loss": 0.0032, + "step": 59270 + }, + { + "epoch": 0.3802035063639833, + "grad_norm": 0.05961214005947113, + "learning_rate": 9.754742662958217e-06, + "loss": 0.0065, + "step": 59280 + }, + { + "epoch": 0.3802676432577694, + "grad_norm": 0.26884549856185913, + "learning_rate": 9.754569489412684e-06, + "loss": 0.0047, + "step": 59290 + }, + { + "epoch": 0.3803317801515555, + "grad_norm": 0.10179663449525833, + "learning_rate": 9.754396256288998e-06, + "loss": 0.0043, + "step": 59300 + }, + { + "epoch": 0.3803959170453416, + "grad_norm": 0.13700559735298157, + "learning_rate": 9.754222963589328e-06, + "loss": 0.0042, + "step": 59310 + }, + { + "epoch": 0.3804600539391277, + "grad_norm": 0.11233152449131012, + "learning_rate": 9.754049611315847e-06, + "loss": 0.003, + "step": 59320 + }, + { + "epoch": 0.3805241908329138, + "grad_norm": 0.3106655776500702, + "learning_rate": 9.753876199470729e-06, + "loss": 0.0067, + "step": 59330 + }, + { + "epoch": 0.38058832772669987, + "grad_norm": 0.05697939917445183, + "learning_rate": 9.753702728056143e-06, + "loss": 0.0059, + "step": 59340 + }, + { + "epoch": 0.38065246462048596, + "grad_norm": 0.1379857361316681, + "learning_rate": 9.753529197074263e-06, + "loss": 0.0051, + "step": 59350 + }, + { + "epoch": 0.38071660151427206, + "grad_norm": 0.30130454897880554, + "learning_rate": 9.753355606527267e-06, + "loss": 0.0045, + "step": 59360 + }, + { + "epoch": 0.38078073840805815, + "grad_norm": 0.10157103836536407, + "learning_rate": 9.753181956417327e-06, + "loss": 0.0026, + "step": 59370 + }, + { + "epoch": 0.38084487530184424, + "grad_norm": 0.16066139936447144, + "learning_rate": 9.75300824674662e-06, + "loss": 0.0034, + "step": 59380 + }, + { + "epoch": 0.38090901219563034, + "grad_norm": 0.3818877935409546, + "learning_rate": 9.752834477517322e-06, + "loss": 0.0065, + "step": 59390 + }, + { + "epoch": 0.38097314908941643, + "grad_norm": 0.11675024032592773, + "learning_rate": 9.752660648731614e-06, + "loss": 0.0065, + "step": 59400 + }, + { + "epoch": 0.3810372859832025, + "grad_norm": 0.20888476073741913, + "learning_rate": 9.752486760391669e-06, + "loss": 0.0029, + "step": 59410 + }, + { + "epoch": 0.38110142287698867, + "grad_norm": 0.20465844869613647, + "learning_rate": 9.752312812499667e-06, + "loss": 0.0049, + "step": 59420 + }, + { + "epoch": 0.38116555977077476, + "grad_norm": 0.16326738893985748, + "learning_rate": 9.75213880505779e-06, + "loss": 0.0064, + "step": 59430 + }, + { + "epoch": 0.38122969666456086, + "grad_norm": 0.1689678281545639, + "learning_rate": 9.75196473806822e-06, + "loss": 0.0018, + "step": 59440 + }, + { + "epoch": 0.38129383355834695, + "grad_norm": 0.32500091195106506, + "learning_rate": 9.751790611533134e-06, + "loss": 0.0028, + "step": 59450 + }, + { + "epoch": 0.38135797045213304, + "grad_norm": 0.28971317410469055, + "learning_rate": 9.751616425454716e-06, + "loss": 0.0031, + "step": 59460 + }, + { + "epoch": 0.38142210734591914, + "grad_norm": 0.1515723168849945, + "learning_rate": 9.751442179835148e-06, + "loss": 0.0031, + "step": 59470 + }, + { + "epoch": 0.38148624423970523, + "grad_norm": 0.16772037744522095, + "learning_rate": 9.751267874676614e-06, + "loss": 0.0056, + "step": 59480 + }, + { + "epoch": 0.3815503811334913, + "grad_norm": 0.21892644464969635, + "learning_rate": 9.751093509981297e-06, + "loss": 0.0037, + "step": 59490 + }, + { + "epoch": 0.3816145180272774, + "grad_norm": 0.033471908420324326, + "learning_rate": 9.750919085751383e-06, + "loss": 0.0039, + "step": 59500 + }, + { + "epoch": 0.3816786549210635, + "grad_norm": 0.8038234114646912, + "learning_rate": 9.750744601989057e-06, + "loss": 0.0056, + "step": 59510 + }, + { + "epoch": 0.3817427918148496, + "grad_norm": 0.09216465801000595, + "learning_rate": 9.750570058696507e-06, + "loss": 0.0093, + "step": 59520 + }, + { + "epoch": 0.3818069287086357, + "grad_norm": 0.18910400569438934, + "learning_rate": 9.750395455875918e-06, + "loss": 0.0047, + "step": 59530 + }, + { + "epoch": 0.3818710656024218, + "grad_norm": 0.5617573857307434, + "learning_rate": 9.75022079352948e-06, + "loss": 0.0056, + "step": 59540 + }, + { + "epoch": 0.3819352024962079, + "grad_norm": 0.3218870759010315, + "learning_rate": 9.75004607165938e-06, + "loss": 0.0044, + "step": 59550 + }, + { + "epoch": 0.38199933938999403, + "grad_norm": 0.21268276870250702, + "learning_rate": 9.749871290267807e-06, + "loss": 0.0052, + "step": 59560 + }, + { + "epoch": 0.3820634762837801, + "grad_norm": 0.17883072793483734, + "learning_rate": 9.749696449356953e-06, + "loss": 0.0036, + "step": 59570 + }, + { + "epoch": 0.3821276131775662, + "grad_norm": 0.24500377476215363, + "learning_rate": 9.749521548929008e-06, + "loss": 0.0054, + "step": 59580 + }, + { + "epoch": 0.3821917500713523, + "grad_norm": 0.34320369362831116, + "learning_rate": 9.749346588986163e-06, + "loss": 0.005, + "step": 59590 + }, + { + "epoch": 0.3822558869651384, + "grad_norm": 0.2913901209831238, + "learning_rate": 9.74917156953061e-06, + "loss": 0.0019, + "step": 59600 + }, + { + "epoch": 0.3823200238589245, + "grad_norm": 0.2491830289363861, + "learning_rate": 9.748996490564546e-06, + "loss": 0.0044, + "step": 59610 + }, + { + "epoch": 0.3823841607527106, + "grad_norm": 0.0809091329574585, + "learning_rate": 9.748821352090159e-06, + "loss": 0.0031, + "step": 59620 + }, + { + "epoch": 0.3824482976464967, + "grad_norm": 0.16306327283382416, + "learning_rate": 9.748646154109648e-06, + "loss": 0.0051, + "step": 59630 + }, + { + "epoch": 0.3825124345402828, + "grad_norm": 0.09098932892084122, + "learning_rate": 9.748470896625206e-06, + "loss": 0.0025, + "step": 59640 + }, + { + "epoch": 0.38257657143406887, + "grad_norm": 0.16054843366146088, + "learning_rate": 9.748295579639031e-06, + "loss": 0.0053, + "step": 59650 + }, + { + "epoch": 0.38264070832785496, + "grad_norm": 0.3167816698551178, + "learning_rate": 9.748120203153318e-06, + "loss": 0.0046, + "step": 59660 + }, + { + "epoch": 0.38270484522164105, + "grad_norm": 0.08863595128059387, + "learning_rate": 9.747944767170267e-06, + "loss": 0.0047, + "step": 59670 + }, + { + "epoch": 0.38276898211542715, + "grad_norm": 0.12066241353750229, + "learning_rate": 9.747769271692074e-06, + "loss": 0.0043, + "step": 59680 + }, + { + "epoch": 0.38283311900921324, + "grad_norm": 0.16344675421714783, + "learning_rate": 9.747593716720937e-06, + "loss": 0.0042, + "step": 59690 + }, + { + "epoch": 0.3828972559029994, + "grad_norm": 0.1318945288658142, + "learning_rate": 9.747418102259059e-06, + "loss": 0.0064, + "step": 59700 + }, + { + "epoch": 0.3829613927967855, + "grad_norm": 0.2592341899871826, + "learning_rate": 9.747242428308639e-06, + "loss": 0.0037, + "step": 59710 + }, + { + "epoch": 0.3830255296905716, + "grad_norm": 0.7534883618354797, + "learning_rate": 9.747066694871881e-06, + "loss": 0.0079, + "step": 59720 + }, + { + "epoch": 0.38308966658435767, + "grad_norm": 0.18560761213302612, + "learning_rate": 9.74689090195098e-06, + "loss": 0.0028, + "step": 59730 + }, + { + "epoch": 0.38315380347814376, + "grad_norm": 0.207354336977005, + "learning_rate": 9.746715049548148e-06, + "loss": 0.0034, + "step": 59740 + }, + { + "epoch": 0.38321794037192985, + "grad_norm": 0.1272207796573639, + "learning_rate": 9.746539137665582e-06, + "loss": 0.0056, + "step": 59750 + }, + { + "epoch": 0.38328207726571595, + "grad_norm": 0.23403020203113556, + "learning_rate": 9.746363166305487e-06, + "loss": 0.0123, + "step": 59760 + }, + { + "epoch": 0.38334621415950204, + "grad_norm": 0.26903918385505676, + "learning_rate": 9.746187135470072e-06, + "loss": 0.0041, + "step": 59770 + }, + { + "epoch": 0.38341035105328813, + "grad_norm": 3.419872999191284, + "learning_rate": 9.746011045161538e-06, + "loss": 0.0069, + "step": 59780 + }, + { + "epoch": 0.3834744879470742, + "grad_norm": 0.24554139375686646, + "learning_rate": 9.745834895382095e-06, + "loss": 0.0038, + "step": 59790 + }, + { + "epoch": 0.3835386248408603, + "grad_norm": 0.05302513390779495, + "learning_rate": 9.745658686133947e-06, + "loss": 0.0035, + "step": 59800 + }, + { + "epoch": 0.3836027617346464, + "grad_norm": 0.19432343542575836, + "learning_rate": 9.745482417419306e-06, + "loss": 0.0035, + "step": 59810 + }, + { + "epoch": 0.3836668986284325, + "grad_norm": 0.17563576996326447, + "learning_rate": 9.745306089240379e-06, + "loss": 0.01, + "step": 59820 + }, + { + "epoch": 0.3837310355222186, + "grad_norm": 0.16629278659820557, + "learning_rate": 9.745129701599374e-06, + "loss": 0.0052, + "step": 59830 + }, + { + "epoch": 0.3837951724160047, + "grad_norm": 0.239976704120636, + "learning_rate": 9.744953254498503e-06, + "loss": 0.006, + "step": 59840 + }, + { + "epoch": 0.38385930930979084, + "grad_norm": 0.19879131019115448, + "learning_rate": 9.744776747939977e-06, + "loss": 0.0042, + "step": 59850 + }, + { + "epoch": 0.38392344620357693, + "grad_norm": 0.0788252130150795, + "learning_rate": 9.744600181926007e-06, + "loss": 0.0034, + "step": 59860 + }, + { + "epoch": 0.383987583097363, + "grad_norm": 0.3260972201824188, + "learning_rate": 9.744423556458805e-06, + "loss": 0.0039, + "step": 59870 + }, + { + "epoch": 0.3840517199911491, + "grad_norm": 0.16560789942741394, + "learning_rate": 9.744246871540585e-06, + "loss": 0.0029, + "step": 59880 + }, + { + "epoch": 0.3841158568849352, + "grad_norm": 0.10938803106546402, + "learning_rate": 9.744070127173562e-06, + "loss": 0.0047, + "step": 59890 + }, + { + "epoch": 0.3841799937787213, + "grad_norm": 0.28586849570274353, + "learning_rate": 9.74389332335995e-06, + "loss": 0.0038, + "step": 59900 + }, + { + "epoch": 0.3842441306725074, + "grad_norm": 0.19201096892356873, + "learning_rate": 9.743716460101965e-06, + "loss": 0.0058, + "step": 59910 + }, + { + "epoch": 0.3843082675662935, + "grad_norm": 0.1080024465918541, + "learning_rate": 9.743539537401821e-06, + "loss": 0.0037, + "step": 59920 + }, + { + "epoch": 0.3843724044600796, + "grad_norm": 0.3251629173755646, + "learning_rate": 9.743362555261737e-06, + "loss": 0.0059, + "step": 59930 + }, + { + "epoch": 0.3844365413538657, + "grad_norm": 0.33186784386634827, + "learning_rate": 9.74318551368393e-06, + "loss": 0.006, + "step": 59940 + }, + { + "epoch": 0.3845006782476518, + "grad_norm": 0.20968593657016754, + "learning_rate": 9.743008412670618e-06, + "loss": 0.0035, + "step": 59950 + }, + { + "epoch": 0.38456481514143787, + "grad_norm": 0.1586632877588272, + "learning_rate": 9.74283125222402e-06, + "loss": 0.0036, + "step": 59960 + }, + { + "epoch": 0.38462895203522396, + "grad_norm": 0.012320125475525856, + "learning_rate": 9.742654032346359e-06, + "loss": 0.0031, + "step": 59970 + }, + { + "epoch": 0.38469308892901005, + "grad_norm": 0.21408711373806, + "learning_rate": 9.742476753039852e-06, + "loss": 0.0035, + "step": 59980 + }, + { + "epoch": 0.3847572258227962, + "grad_norm": 0.16455549001693726, + "learning_rate": 9.742299414306722e-06, + "loss": 0.0037, + "step": 59990 + }, + { + "epoch": 0.3848213627165823, + "grad_norm": 0.20752200484275818, + "learning_rate": 9.742122016149193e-06, + "loss": 0.0041, + "step": 60000 + }, + { + "epoch": 0.3848854996103684, + "grad_norm": 0.27880942821502686, + "learning_rate": 9.741944558569483e-06, + "loss": 0.0043, + "step": 60010 + }, + { + "epoch": 0.3849496365041545, + "grad_norm": 0.9511381983757019, + "learning_rate": 9.741767041569821e-06, + "loss": 0.0052, + "step": 60020 + }, + { + "epoch": 0.3850137733979406, + "grad_norm": 0.42910051345825195, + "learning_rate": 9.741589465152427e-06, + "loss": 0.0051, + "step": 60030 + }, + { + "epoch": 0.38507791029172667, + "grad_norm": 0.17123812437057495, + "learning_rate": 9.741411829319531e-06, + "loss": 0.0052, + "step": 60040 + }, + { + "epoch": 0.38514204718551276, + "grad_norm": 0.10715224593877792, + "learning_rate": 9.741234134073354e-06, + "loss": 0.0018, + "step": 60050 + }, + { + "epoch": 0.38520618407929885, + "grad_norm": 0.21001029014587402, + "learning_rate": 9.741056379416125e-06, + "loss": 0.0037, + "step": 60060 + }, + { + "epoch": 0.38527032097308495, + "grad_norm": 0.18050143122673035, + "learning_rate": 9.740878565350072e-06, + "loss": 0.0036, + "step": 60070 + }, + { + "epoch": 0.38533445786687104, + "grad_norm": 0.012618201784789562, + "learning_rate": 9.740700691877423e-06, + "loss": 0.003, + "step": 60080 + }, + { + "epoch": 0.38539859476065713, + "grad_norm": 0.25209400057792664, + "learning_rate": 9.740522759000404e-06, + "loss": 0.0035, + "step": 60090 + }, + { + "epoch": 0.3854627316544432, + "grad_norm": 0.49384912848472595, + "learning_rate": 9.740344766721248e-06, + "loss": 0.0071, + "step": 60100 + }, + { + "epoch": 0.3855268685482293, + "grad_norm": 0.14024218916893005, + "learning_rate": 9.740166715042184e-06, + "loss": 0.0056, + "step": 60110 + }, + { + "epoch": 0.3855910054420154, + "grad_norm": 0.026863152161240578, + "learning_rate": 9.739988603965444e-06, + "loss": 0.0039, + "step": 60120 + }, + { + "epoch": 0.38565514233580156, + "grad_norm": 0.3183062970638275, + "learning_rate": 9.739810433493258e-06, + "loss": 0.0041, + "step": 60130 + }, + { + "epoch": 0.38571927922958765, + "grad_norm": 0.20068994164466858, + "learning_rate": 9.73963220362786e-06, + "loss": 0.0041, + "step": 60140 + }, + { + "epoch": 0.38578341612337375, + "grad_norm": 0.09155667573213577, + "learning_rate": 9.739453914371484e-06, + "loss": 0.0043, + "step": 60150 + }, + { + "epoch": 0.38584755301715984, + "grad_norm": 0.055506039410829544, + "learning_rate": 9.739275565726363e-06, + "loss": 0.0037, + "step": 60160 + }, + { + "epoch": 0.38591168991094593, + "grad_norm": 0.11468420922756195, + "learning_rate": 9.73909715769473e-06, + "loss": 0.0033, + "step": 60170 + }, + { + "epoch": 0.385975826804732, + "grad_norm": 0.2966194748878479, + "learning_rate": 9.738918690278826e-06, + "loss": 0.0048, + "step": 60180 + }, + { + "epoch": 0.3860399636985181, + "grad_norm": 0.022596042603254318, + "learning_rate": 9.738740163480882e-06, + "loss": 0.0027, + "step": 60190 + }, + { + "epoch": 0.3861041005923042, + "grad_norm": 0.3111661374568939, + "learning_rate": 9.738561577303139e-06, + "loss": 0.0039, + "step": 60200 + }, + { + "epoch": 0.3861682374860903, + "grad_norm": 0.15545833110809326, + "learning_rate": 9.73838293174783e-06, + "loss": 0.0034, + "step": 60210 + }, + { + "epoch": 0.3862323743798764, + "grad_norm": 0.1548236757516861, + "learning_rate": 9.738204226817197e-06, + "loss": 0.0033, + "step": 60220 + }, + { + "epoch": 0.3862965112736625, + "grad_norm": 0.16571511328220367, + "learning_rate": 9.73802546251348e-06, + "loss": 0.0044, + "step": 60230 + }, + { + "epoch": 0.3863606481674486, + "grad_norm": 0.302288293838501, + "learning_rate": 9.737846638838918e-06, + "loss": 0.0095, + "step": 60240 + }, + { + "epoch": 0.3864247850612347, + "grad_norm": 0.2831825613975525, + "learning_rate": 9.73766775579575e-06, + "loss": 0.0039, + "step": 60250 + }, + { + "epoch": 0.38648892195502077, + "grad_norm": 0.04732539504766464, + "learning_rate": 9.737488813386219e-06, + "loss": 0.0026, + "step": 60260 + }, + { + "epoch": 0.3865530588488069, + "grad_norm": 0.1678842306137085, + "learning_rate": 9.737309811612567e-06, + "loss": 0.022, + "step": 60270 + }, + { + "epoch": 0.386617195742593, + "grad_norm": 0.4333482086658478, + "learning_rate": 9.73713075047704e-06, + "loss": 0.0056, + "step": 60280 + }, + { + "epoch": 0.3866813326363791, + "grad_norm": 0.1695176213979721, + "learning_rate": 9.736951629981877e-06, + "loss": 0.0036, + "step": 60290 + }, + { + "epoch": 0.3867454695301652, + "grad_norm": 0.2635030746459961, + "learning_rate": 9.736772450129325e-06, + "loss": 0.0065, + "step": 60300 + }, + { + "epoch": 0.3868096064239513, + "grad_norm": 0.06427385658025742, + "learning_rate": 9.73659321092163e-06, + "loss": 0.0034, + "step": 60310 + }, + { + "epoch": 0.3868737433177374, + "grad_norm": 0.25503623485565186, + "learning_rate": 9.736413912361035e-06, + "loss": 0.0055, + "step": 60320 + }, + { + "epoch": 0.3869378802115235, + "grad_norm": 0.13860364258289337, + "learning_rate": 9.736234554449788e-06, + "loss": 0.0039, + "step": 60330 + }, + { + "epoch": 0.38700201710530957, + "grad_norm": 0.13788312673568726, + "learning_rate": 9.736055137190139e-06, + "loss": 0.0046, + "step": 60340 + }, + { + "epoch": 0.38706615399909566, + "grad_norm": 0.15529346466064453, + "learning_rate": 9.735875660584335e-06, + "loss": 0.0043, + "step": 60350 + }, + { + "epoch": 0.38713029089288176, + "grad_norm": 0.15514250099658966, + "learning_rate": 9.735696124634623e-06, + "loss": 0.0046, + "step": 60360 + }, + { + "epoch": 0.38719442778666785, + "grad_norm": 0.1338747888803482, + "learning_rate": 9.735516529343255e-06, + "loss": 0.0031, + "step": 60370 + }, + { + "epoch": 0.38725856468045394, + "grad_norm": 0.17875945568084717, + "learning_rate": 9.735336874712478e-06, + "loss": 0.0043, + "step": 60380 + }, + { + "epoch": 0.38732270157424004, + "grad_norm": 0.06392395496368408, + "learning_rate": 9.735157160744548e-06, + "loss": 0.0062, + "step": 60390 + }, + { + "epoch": 0.38738683846802613, + "grad_norm": 0.11207230389118195, + "learning_rate": 9.734977387441713e-06, + "loss": 0.0056, + "step": 60400 + }, + { + "epoch": 0.3874509753618123, + "grad_norm": 0.15044747292995453, + "learning_rate": 9.734797554806229e-06, + "loss": 0.0031, + "step": 60410 + }, + { + "epoch": 0.38751511225559837, + "grad_norm": 0.24024957418441772, + "learning_rate": 9.734617662840347e-06, + "loss": 0.0047, + "step": 60420 + }, + { + "epoch": 0.38757924914938446, + "grad_norm": 0.13344290852546692, + "learning_rate": 9.734437711546321e-06, + "loss": 0.0046, + "step": 60430 + }, + { + "epoch": 0.38764338604317056, + "grad_norm": 0.2346503883600235, + "learning_rate": 9.734257700926408e-06, + "loss": 0.0044, + "step": 60440 + }, + { + "epoch": 0.38770752293695665, + "grad_norm": 0.14192451536655426, + "learning_rate": 9.734077630982863e-06, + "loss": 0.0041, + "step": 60450 + }, + { + "epoch": 0.38777165983074274, + "grad_norm": 0.23871690034866333, + "learning_rate": 9.73389750171794e-06, + "loss": 0.007, + "step": 60460 + }, + { + "epoch": 0.38783579672452884, + "grad_norm": 0.16708698868751526, + "learning_rate": 9.733717313133899e-06, + "loss": 0.003, + "step": 60470 + }, + { + "epoch": 0.38789993361831493, + "grad_norm": 0.09737008064985275, + "learning_rate": 9.733537065232998e-06, + "loss": 0.0025, + "step": 60480 + }, + { + "epoch": 0.387964070512101, + "grad_norm": 0.13287779688835144, + "learning_rate": 9.733356758017495e-06, + "loss": 0.0042, + "step": 60490 + }, + { + "epoch": 0.3880282074058871, + "grad_norm": 0.06711868941783905, + "learning_rate": 9.733176391489644e-06, + "loss": 0.0047, + "step": 60500 + }, + { + "epoch": 0.3880923442996732, + "grad_norm": 0.08290399610996246, + "learning_rate": 9.732995965651715e-06, + "loss": 0.0058, + "step": 60510 + }, + { + "epoch": 0.3881564811934593, + "grad_norm": 0.2034006267786026, + "learning_rate": 9.732815480505962e-06, + "loss": 0.0036, + "step": 60520 + }, + { + "epoch": 0.3882206180872454, + "grad_norm": 0.3656480014324188, + "learning_rate": 9.73263493605465e-06, + "loss": 0.0037, + "step": 60530 + }, + { + "epoch": 0.3882847549810315, + "grad_norm": 0.11950068920850754, + "learning_rate": 9.732454332300039e-06, + "loss": 0.0038, + "step": 60540 + }, + { + "epoch": 0.38834889187481764, + "grad_norm": 0.5113601684570312, + "learning_rate": 9.732273669244392e-06, + "loss": 0.0047, + "step": 60550 + }, + { + "epoch": 0.38841302876860373, + "grad_norm": 0.2951570153236389, + "learning_rate": 9.732092946889974e-06, + "loss": 0.0034, + "step": 60560 + }, + { + "epoch": 0.3884771656623898, + "grad_norm": 0.12037892639636993, + "learning_rate": 9.731912165239052e-06, + "loss": 0.0036, + "step": 60570 + }, + { + "epoch": 0.3885413025561759, + "grad_norm": 0.14643444120883942, + "learning_rate": 9.731731324293887e-06, + "loss": 0.0023, + "step": 60580 + }, + { + "epoch": 0.388605439449962, + "grad_norm": 0.2314303070306778, + "learning_rate": 9.731550424056745e-06, + "loss": 0.004, + "step": 60590 + }, + { + "epoch": 0.3886695763437481, + "grad_norm": 0.13713759183883667, + "learning_rate": 9.731369464529897e-06, + "loss": 0.0034, + "step": 60600 + }, + { + "epoch": 0.3887337132375342, + "grad_norm": 0.22650521993637085, + "learning_rate": 9.731188445715608e-06, + "loss": 0.0052, + "step": 60610 + }, + { + "epoch": 0.3887978501313203, + "grad_norm": 0.15206363797187805, + "learning_rate": 9.731007367616146e-06, + "loss": 0.0045, + "step": 60620 + }, + { + "epoch": 0.3888619870251064, + "grad_norm": 0.1030779778957367, + "learning_rate": 9.73082623023378e-06, + "loss": 0.0033, + "step": 60630 + }, + { + "epoch": 0.3889261239188925, + "grad_norm": 0.2049216777086258, + "learning_rate": 9.73064503357078e-06, + "loss": 0.0052, + "step": 60640 + }, + { + "epoch": 0.38899026081267857, + "grad_norm": 0.11878073960542679, + "learning_rate": 9.730463777629417e-06, + "loss": 0.0028, + "step": 60650 + }, + { + "epoch": 0.38905439770646466, + "grad_norm": 0.06839001923799515, + "learning_rate": 9.730282462411964e-06, + "loss": 0.0026, + "step": 60660 + }, + { + "epoch": 0.38911853460025075, + "grad_norm": 0.023172229528427124, + "learning_rate": 9.73010108792069e-06, + "loss": 0.0043, + "step": 60670 + }, + { + "epoch": 0.38918267149403685, + "grad_norm": 0.0986587256193161, + "learning_rate": 9.729919654157867e-06, + "loss": 0.0041, + "step": 60680 + }, + { + "epoch": 0.389246808387823, + "grad_norm": 0.1795051544904709, + "learning_rate": 9.729738161125772e-06, + "loss": 0.0041, + "step": 60690 + }, + { + "epoch": 0.3893109452816091, + "grad_norm": 0.22513192892074585, + "learning_rate": 9.729556608826676e-06, + "loss": 0.0048, + "step": 60700 + }, + { + "epoch": 0.3893750821753952, + "grad_norm": 0.496263712644577, + "learning_rate": 9.729374997262857e-06, + "loss": 0.0047, + "step": 60710 + }, + { + "epoch": 0.3894392190691813, + "grad_norm": 0.22901271283626556, + "learning_rate": 9.72919332643659e-06, + "loss": 0.0037, + "step": 60720 + }, + { + "epoch": 0.38950335596296737, + "grad_norm": 0.15260516107082367, + "learning_rate": 9.729011596350148e-06, + "loss": 0.0058, + "step": 60730 + }, + { + "epoch": 0.38956749285675346, + "grad_norm": 0.3243672251701355, + "learning_rate": 9.728829807005812e-06, + "loss": 0.0045, + "step": 60740 + }, + { + "epoch": 0.38963162975053955, + "grad_norm": 0.3474341034889221, + "learning_rate": 9.728647958405861e-06, + "loss": 0.0079, + "step": 60750 + }, + { + "epoch": 0.38969576664432565, + "grad_norm": 0.25854116678237915, + "learning_rate": 9.72846605055257e-06, + "loss": 0.0074, + "step": 60760 + }, + { + "epoch": 0.38975990353811174, + "grad_norm": 0.1458887904882431, + "learning_rate": 9.72828408344822e-06, + "loss": 0.0033, + "step": 60770 + }, + { + "epoch": 0.38982404043189783, + "grad_norm": 0.12434220314025879, + "learning_rate": 9.72810205709509e-06, + "loss": 0.0049, + "step": 60780 + }, + { + "epoch": 0.3898881773256839, + "grad_norm": 0.04019024968147278, + "learning_rate": 9.727919971495464e-06, + "loss": 0.0038, + "step": 60790 + }, + { + "epoch": 0.38995231421947, + "grad_norm": 0.1241234689950943, + "learning_rate": 9.727737826651621e-06, + "loss": 0.0071, + "step": 60800 + }, + { + "epoch": 0.3900164511132561, + "grad_norm": 0.2500709295272827, + "learning_rate": 9.727555622565845e-06, + "loss": 0.0042, + "step": 60810 + }, + { + "epoch": 0.3900805880070422, + "grad_norm": 0.17337116599082947, + "learning_rate": 9.727373359240417e-06, + "loss": 0.0052, + "step": 60820 + }, + { + "epoch": 0.39014472490082835, + "grad_norm": 0.10613390803337097, + "learning_rate": 9.727191036677624e-06, + "loss": 0.002, + "step": 60830 + }, + { + "epoch": 0.39020886179461445, + "grad_norm": 0.13982334733009338, + "learning_rate": 9.727008654879748e-06, + "loss": 0.0042, + "step": 60840 + }, + { + "epoch": 0.39027299868840054, + "grad_norm": 0.17445537447929382, + "learning_rate": 9.726826213849074e-06, + "loss": 0.0045, + "step": 60850 + }, + { + "epoch": 0.39033713558218663, + "grad_norm": 0.3702257573604584, + "learning_rate": 9.72664371358789e-06, + "loss": 0.0077, + "step": 60860 + }, + { + "epoch": 0.3904012724759727, + "grad_norm": 0.11560417711734772, + "learning_rate": 9.726461154098482e-06, + "loss": 0.004, + "step": 60870 + }, + { + "epoch": 0.3904654093697588, + "grad_norm": 0.11796073615550995, + "learning_rate": 9.726278535383138e-06, + "loss": 0.0036, + "step": 60880 + }, + { + "epoch": 0.3905295462635449, + "grad_norm": 0.1939150094985962, + "learning_rate": 9.726095857444147e-06, + "loss": 0.0063, + "step": 60890 + }, + { + "epoch": 0.390593683157331, + "grad_norm": 0.23682385683059692, + "learning_rate": 9.725913120283796e-06, + "loss": 0.0021, + "step": 60900 + }, + { + "epoch": 0.3906578200511171, + "grad_norm": 0.12155026942491531, + "learning_rate": 9.725730323904377e-06, + "loss": 0.0025, + "step": 60910 + }, + { + "epoch": 0.3907219569449032, + "grad_norm": 0.2544938921928406, + "learning_rate": 9.725547468308178e-06, + "loss": 0.0059, + "step": 60920 + }, + { + "epoch": 0.3907860938386893, + "grad_norm": 0.08578786253929138, + "learning_rate": 9.725364553497494e-06, + "loss": 0.0038, + "step": 60930 + }, + { + "epoch": 0.3908502307324754, + "grad_norm": 0.15020376443862915, + "learning_rate": 9.725181579474614e-06, + "loss": 0.0048, + "step": 60940 + }, + { + "epoch": 0.39091436762626147, + "grad_norm": 0.15422581136226654, + "learning_rate": 9.724998546241831e-06, + "loss": 0.0046, + "step": 60950 + }, + { + "epoch": 0.39097850452004757, + "grad_norm": 0.24887314438819885, + "learning_rate": 9.72481545380144e-06, + "loss": 0.0048, + "step": 60960 + }, + { + "epoch": 0.3910426414138337, + "grad_norm": 0.1745447963476181, + "learning_rate": 9.724632302155735e-06, + "loss": 0.0033, + "step": 60970 + }, + { + "epoch": 0.3911067783076198, + "grad_norm": 0.21266213059425354, + "learning_rate": 9.724449091307011e-06, + "loss": 0.0057, + "step": 60980 + }, + { + "epoch": 0.3911709152014059, + "grad_norm": 0.16322961449623108, + "learning_rate": 9.724265821257562e-06, + "loss": 0.0038, + "step": 60990 + }, + { + "epoch": 0.391235052095192, + "grad_norm": 0.2518809735774994, + "learning_rate": 9.724082492009687e-06, + "loss": 0.0064, + "step": 61000 + }, + { + "epoch": 0.391235052095192, + "eval_loss": 0.004069021437317133, + "eval_runtime": 3.3277, + "eval_samples_per_second": 60.101, + "eval_steps_per_second": 15.025, + "step": 61000 + }, + { + "epoch": 0.3912991889889781, + "grad_norm": 0.11623580008745193, + "learning_rate": 9.72389910356568e-06, + "loss": 0.006, + "step": 61010 + }, + { + "epoch": 0.3913633258827642, + "grad_norm": 0.0640629306435585, + "learning_rate": 9.723715655927844e-06, + "loss": 0.0036, + "step": 61020 + }, + { + "epoch": 0.3914274627765503, + "grad_norm": 0.11713147163391113, + "learning_rate": 9.723532149098473e-06, + "loss": 0.0031, + "step": 61030 + }, + { + "epoch": 0.39149159967033637, + "grad_norm": 0.1606680452823639, + "learning_rate": 9.72334858307987e-06, + "loss": 0.0039, + "step": 61040 + }, + { + "epoch": 0.39155573656412246, + "grad_norm": 0.2158287763595581, + "learning_rate": 9.723164957874332e-06, + "loss": 0.007, + "step": 61050 + }, + { + "epoch": 0.39161987345790855, + "grad_norm": 0.2687753438949585, + "learning_rate": 9.722981273484162e-06, + "loss": 0.0058, + "step": 61060 + }, + { + "epoch": 0.39168401035169464, + "grad_norm": 0.048382628709077835, + "learning_rate": 9.722797529911662e-06, + "loss": 0.0036, + "step": 61070 + }, + { + "epoch": 0.39174814724548074, + "grad_norm": 0.3799852430820465, + "learning_rate": 9.722613727159133e-06, + "loss": 0.0034, + "step": 61080 + }, + { + "epoch": 0.39181228413926683, + "grad_norm": 0.1671069860458374, + "learning_rate": 9.722429865228878e-06, + "loss": 0.0045, + "step": 61090 + }, + { + "epoch": 0.3918764210330529, + "grad_norm": 0.2643856704235077, + "learning_rate": 9.722245944123202e-06, + "loss": 0.0048, + "step": 61100 + }, + { + "epoch": 0.3919405579268391, + "grad_norm": 0.1426529586315155, + "learning_rate": 9.72206196384441e-06, + "loss": 0.0045, + "step": 61110 + }, + { + "epoch": 0.39200469482062517, + "grad_norm": 0.07896137982606888, + "learning_rate": 9.721877924394807e-06, + "loss": 0.0046, + "step": 61120 + }, + { + "epoch": 0.39206883171441126, + "grad_norm": 0.22783268988132477, + "learning_rate": 9.721693825776697e-06, + "loss": 0.0039, + "step": 61130 + }, + { + "epoch": 0.39213296860819735, + "grad_norm": 0.11826110631227493, + "learning_rate": 9.721509667992391e-06, + "loss": 0.0048, + "step": 61140 + }, + { + "epoch": 0.39219710550198345, + "grad_norm": 0.36788371205329895, + "learning_rate": 9.721325451044194e-06, + "loss": 0.0168, + "step": 61150 + }, + { + "epoch": 0.39226124239576954, + "grad_norm": 0.2228899598121643, + "learning_rate": 9.721141174934414e-06, + "loss": 0.0049, + "step": 61160 + }, + { + "epoch": 0.39232537928955563, + "grad_norm": 0.11477508395910263, + "learning_rate": 9.72095683966536e-06, + "loss": 0.0054, + "step": 61170 + }, + { + "epoch": 0.3923895161833417, + "grad_norm": 0.12067042291164398, + "learning_rate": 9.720772445239345e-06, + "loss": 0.0029, + "step": 61180 + }, + { + "epoch": 0.3924536530771278, + "grad_norm": 0.22580377757549286, + "learning_rate": 9.720587991658677e-06, + "loss": 0.0042, + "step": 61190 + }, + { + "epoch": 0.3925177899709139, + "grad_norm": 0.09509395062923431, + "learning_rate": 9.720403478925667e-06, + "loss": 0.0033, + "step": 61200 + }, + { + "epoch": 0.3925819268647, + "grad_norm": 0.14699745178222656, + "learning_rate": 9.72021890704263e-06, + "loss": 0.0034, + "step": 61210 + }, + { + "epoch": 0.3926460637584861, + "grad_norm": 0.1703781932592392, + "learning_rate": 9.720034276011874e-06, + "loss": 0.0038, + "step": 61220 + }, + { + "epoch": 0.3927102006522722, + "grad_norm": 0.2723254859447479, + "learning_rate": 9.719849585835715e-06, + "loss": 0.0044, + "step": 61230 + }, + { + "epoch": 0.3927743375460583, + "grad_norm": 0.09000599384307861, + "learning_rate": 9.719664836516468e-06, + "loss": 0.0029, + "step": 61240 + }, + { + "epoch": 0.3928384744398444, + "grad_norm": 0.16326692700386047, + "learning_rate": 9.719480028056448e-06, + "loss": 0.0036, + "step": 61250 + }, + { + "epoch": 0.3929026113336305, + "grad_norm": 0.29439839720726013, + "learning_rate": 9.71929516045797e-06, + "loss": 0.0041, + "step": 61260 + }, + { + "epoch": 0.3929667482274166, + "grad_norm": 0.1782740354537964, + "learning_rate": 9.719110233723352e-06, + "loss": 0.0033, + "step": 61270 + }, + { + "epoch": 0.3930308851212027, + "grad_norm": 0.19152049720287323, + "learning_rate": 9.718925247854908e-06, + "loss": 0.0052, + "step": 61280 + }, + { + "epoch": 0.3930950220149888, + "grad_norm": 0.25347113609313965, + "learning_rate": 9.71874020285496e-06, + "loss": 0.0039, + "step": 61290 + }, + { + "epoch": 0.3931591589087749, + "grad_norm": 0.10487768054008484, + "learning_rate": 9.718555098725824e-06, + "loss": 0.0016, + "step": 61300 + }, + { + "epoch": 0.393223295802561, + "grad_norm": 0.1674564778804779, + "learning_rate": 9.71836993546982e-06, + "loss": 0.005, + "step": 61310 + }, + { + "epoch": 0.3932874326963471, + "grad_norm": 0.1391586810350418, + "learning_rate": 9.718184713089268e-06, + "loss": 0.0037, + "step": 61320 + }, + { + "epoch": 0.3933515695901332, + "grad_norm": 0.14953207969665527, + "learning_rate": 9.71799943158649e-06, + "loss": 0.003, + "step": 61330 + }, + { + "epoch": 0.39341570648391927, + "grad_norm": 0.32289430499076843, + "learning_rate": 9.717814090963807e-06, + "loss": 0.0074, + "step": 61340 + }, + { + "epoch": 0.39347984337770536, + "grad_norm": 0.14276909828186035, + "learning_rate": 9.717628691223542e-06, + "loss": 0.0036, + "step": 61350 + }, + { + "epoch": 0.39354398027149146, + "grad_norm": 0.11924324929714203, + "learning_rate": 9.717443232368018e-06, + "loss": 0.0052, + "step": 61360 + }, + { + "epoch": 0.39360811716527755, + "grad_norm": 0.20032940804958344, + "learning_rate": 9.71725771439956e-06, + "loss": 0.0036, + "step": 61370 + }, + { + "epoch": 0.39367225405906364, + "grad_norm": 0.16514188051223755, + "learning_rate": 9.717072137320489e-06, + "loss": 0.0031, + "step": 61380 + }, + { + "epoch": 0.39373639095284974, + "grad_norm": 0.2525750398635864, + "learning_rate": 9.716886501133135e-06, + "loss": 0.0044, + "step": 61390 + }, + { + "epoch": 0.3938005278466359, + "grad_norm": 0.08110995590686798, + "learning_rate": 9.71670080583982e-06, + "loss": 0.0036, + "step": 61400 + }, + { + "epoch": 0.393864664740422, + "grad_norm": 0.13291466236114502, + "learning_rate": 9.716515051442876e-06, + "loss": 0.0039, + "step": 61410 + }, + { + "epoch": 0.39392880163420807, + "grad_norm": 0.4126153588294983, + "learning_rate": 9.716329237944624e-06, + "loss": 0.0044, + "step": 61420 + }, + { + "epoch": 0.39399293852799416, + "grad_norm": 0.19799847900867462, + "learning_rate": 9.716143365347398e-06, + "loss": 0.0023, + "step": 61430 + }, + { + "epoch": 0.39405707542178026, + "grad_norm": 0.20207390189170837, + "learning_rate": 9.715957433653524e-06, + "loss": 0.0049, + "step": 61440 + }, + { + "epoch": 0.39412121231556635, + "grad_norm": 0.10306062549352646, + "learning_rate": 9.715771442865334e-06, + "loss": 0.0051, + "step": 61450 + }, + { + "epoch": 0.39418534920935244, + "grad_norm": 0.47320789098739624, + "learning_rate": 9.715585392985157e-06, + "loss": 0.0054, + "step": 61460 + }, + { + "epoch": 0.39424948610313854, + "grad_norm": 0.2486879825592041, + "learning_rate": 9.715399284015325e-06, + "loss": 0.0047, + "step": 61470 + }, + { + "epoch": 0.39431362299692463, + "grad_norm": 0.17979192733764648, + "learning_rate": 9.71521311595817e-06, + "loss": 0.0063, + "step": 61480 + }, + { + "epoch": 0.3943777598907107, + "grad_norm": 0.23293660581111908, + "learning_rate": 9.715026888816024e-06, + "loss": 0.0079, + "step": 61490 + }, + { + "epoch": 0.3944418967844968, + "grad_norm": 0.35949429869651794, + "learning_rate": 9.714840602591222e-06, + "loss": 0.0052, + "step": 61500 + }, + { + "epoch": 0.3945060336782829, + "grad_norm": 0.26240459084510803, + "learning_rate": 9.714654257286098e-06, + "loss": 0.0029, + "step": 61510 + }, + { + "epoch": 0.394570170572069, + "grad_norm": 0.0906815305352211, + "learning_rate": 9.714467852902987e-06, + "loss": 0.006, + "step": 61520 + }, + { + "epoch": 0.3946343074658551, + "grad_norm": 0.1907978355884552, + "learning_rate": 9.714281389444222e-06, + "loss": 0.0055, + "step": 61530 + }, + { + "epoch": 0.39469844435964124, + "grad_norm": 0.2204655110836029, + "learning_rate": 9.714094866912144e-06, + "loss": 0.0036, + "step": 61540 + }, + { + "epoch": 0.39476258125342734, + "grad_norm": 0.05758964642882347, + "learning_rate": 9.713908285309088e-06, + "loss": 0.0033, + "step": 61550 + }, + { + "epoch": 0.39482671814721343, + "grad_norm": 0.1178441122174263, + "learning_rate": 9.713721644637391e-06, + "loss": 0.0053, + "step": 61560 + }, + { + "epoch": 0.3948908550409995, + "grad_norm": 0.1403207629919052, + "learning_rate": 9.713534944899394e-06, + "loss": 0.0039, + "step": 61570 + }, + { + "epoch": 0.3949549919347856, + "grad_norm": 0.15063434839248657, + "learning_rate": 9.713348186097435e-06, + "loss": 0.0051, + "step": 61580 + }, + { + "epoch": 0.3950191288285717, + "grad_norm": 0.04773133993148804, + "learning_rate": 9.713161368233857e-06, + "loss": 0.0055, + "step": 61590 + }, + { + "epoch": 0.3950832657223578, + "grad_norm": 0.35290268063545227, + "learning_rate": 9.712974491310996e-06, + "loss": 0.0042, + "step": 61600 + }, + { + "epoch": 0.3951474026161439, + "grad_norm": 0.15310856699943542, + "learning_rate": 9.712787555331196e-06, + "loss": 0.0032, + "step": 61610 + }, + { + "epoch": 0.39521153950993, + "grad_norm": 0.09890574961900711, + "learning_rate": 9.712600560296802e-06, + "loss": 0.0054, + "step": 61620 + }, + { + "epoch": 0.3952756764037161, + "grad_norm": 0.17375418543815613, + "learning_rate": 9.712413506210151e-06, + "loss": 0.0029, + "step": 61630 + }, + { + "epoch": 0.3953398132975022, + "grad_norm": 0.19882069528102875, + "learning_rate": 9.712226393073596e-06, + "loss": 0.0044, + "step": 61640 + }, + { + "epoch": 0.39540395019128827, + "grad_norm": 0.07727350294589996, + "learning_rate": 9.712039220889473e-06, + "loss": 0.0047, + "step": 61650 + }, + { + "epoch": 0.39546808708507436, + "grad_norm": 0.336527556180954, + "learning_rate": 9.711851989660131e-06, + "loss": 0.0039, + "step": 61660 + }, + { + "epoch": 0.39553222397886045, + "grad_norm": 0.4891013205051422, + "learning_rate": 9.711664699387917e-06, + "loss": 0.0072, + "step": 61670 + }, + { + "epoch": 0.3955963608726466, + "grad_norm": 0.16443385183811188, + "learning_rate": 9.711477350075179e-06, + "loss": 0.0041, + "step": 61680 + }, + { + "epoch": 0.3956604977664327, + "grad_norm": 0.1870088130235672, + "learning_rate": 9.71128994172426e-06, + "loss": 0.0033, + "step": 61690 + }, + { + "epoch": 0.3957246346602188, + "grad_norm": 0.4158104360103607, + "learning_rate": 9.71110247433751e-06, + "loss": 0.0037, + "step": 61700 + }, + { + "epoch": 0.3957887715540049, + "grad_norm": 0.30746588110923767, + "learning_rate": 9.71091494791728e-06, + "loss": 0.0048, + "step": 61710 + }, + { + "epoch": 0.395852908447791, + "grad_norm": 0.22402305901050568, + "learning_rate": 9.710727362465919e-06, + "loss": 0.0045, + "step": 61720 + }, + { + "epoch": 0.39591704534157707, + "grad_norm": 0.26094120740890503, + "learning_rate": 9.710539717985778e-06, + "loss": 0.004, + "step": 61730 + }, + { + "epoch": 0.39598118223536316, + "grad_norm": 0.0821923241019249, + "learning_rate": 9.710352014479208e-06, + "loss": 0.0051, + "step": 61740 + }, + { + "epoch": 0.39604531912914925, + "grad_norm": 0.08290160447359085, + "learning_rate": 9.71016425194856e-06, + "loss": 0.0057, + "step": 61750 + }, + { + "epoch": 0.39610945602293535, + "grad_norm": 0.19509842991828918, + "learning_rate": 9.709976430396188e-06, + "loss": 0.0047, + "step": 61760 + }, + { + "epoch": 0.39617359291672144, + "grad_norm": 0.13025161623954773, + "learning_rate": 9.709788549824445e-06, + "loss": 0.003, + "step": 61770 + }, + { + "epoch": 0.39623772981050753, + "grad_norm": 0.36803415417671204, + "learning_rate": 9.709600610235686e-06, + "loss": 0.0034, + "step": 61780 + }, + { + "epoch": 0.3963018667042936, + "grad_norm": 0.11032091081142426, + "learning_rate": 9.709412611632264e-06, + "loss": 0.005, + "step": 61790 + }, + { + "epoch": 0.3963660035980797, + "grad_norm": 0.10881927609443665, + "learning_rate": 9.709224554016537e-06, + "loss": 0.0038, + "step": 61800 + }, + { + "epoch": 0.3964301404918658, + "grad_norm": 0.05322081968188286, + "learning_rate": 9.709036437390861e-06, + "loss": 0.005, + "step": 61810 + }, + { + "epoch": 0.39649427738565196, + "grad_norm": 0.06323673576116562, + "learning_rate": 9.708848261757593e-06, + "loss": 0.0019, + "step": 61820 + }, + { + "epoch": 0.39655841427943805, + "grad_norm": 0.47415101528167725, + "learning_rate": 9.70866002711909e-06, + "loss": 0.0079, + "step": 61830 + }, + { + "epoch": 0.39662255117322415, + "grad_norm": 0.23023667931556702, + "learning_rate": 9.708471733477714e-06, + "loss": 0.0048, + "step": 61840 + }, + { + "epoch": 0.39668668806701024, + "grad_norm": 0.21648605167865753, + "learning_rate": 9.708283380835818e-06, + "loss": 0.0086, + "step": 61850 + }, + { + "epoch": 0.39675082496079633, + "grad_norm": 0.2983563244342804, + "learning_rate": 9.708094969195769e-06, + "loss": 0.0038, + "step": 61860 + }, + { + "epoch": 0.3968149618545824, + "grad_norm": 0.15146301686763763, + "learning_rate": 9.707906498559926e-06, + "loss": 0.0043, + "step": 61870 + }, + { + "epoch": 0.3968790987483685, + "grad_norm": 0.153154194355011, + "learning_rate": 9.707717968930648e-06, + "loss": 0.0026, + "step": 61880 + }, + { + "epoch": 0.3969432356421546, + "grad_norm": 0.13013337552547455, + "learning_rate": 9.707529380310301e-06, + "loss": 0.006, + "step": 61890 + }, + { + "epoch": 0.3970073725359407, + "grad_norm": 0.17496158182621002, + "learning_rate": 9.707340732701245e-06, + "loss": 0.0045, + "step": 61900 + }, + { + "epoch": 0.3970715094297268, + "grad_norm": 0.4708886742591858, + "learning_rate": 9.707152026105846e-06, + "loss": 0.0065, + "step": 61910 + }, + { + "epoch": 0.3971356463235129, + "grad_norm": 0.11030770093202591, + "learning_rate": 9.706963260526468e-06, + "loss": 0.0053, + "step": 61920 + }, + { + "epoch": 0.397199783217299, + "grad_norm": 0.16902785003185272, + "learning_rate": 9.706774435965477e-06, + "loss": 0.0046, + "step": 61930 + }, + { + "epoch": 0.3972639201110851, + "grad_norm": 0.2294456958770752, + "learning_rate": 9.706585552425236e-06, + "loss": 0.0056, + "step": 61940 + }, + { + "epoch": 0.39732805700487117, + "grad_norm": 0.2020728588104248, + "learning_rate": 9.706396609908115e-06, + "loss": 0.0045, + "step": 61950 + }, + { + "epoch": 0.3973921938986573, + "grad_norm": 0.061888452619314194, + "learning_rate": 9.706207608416483e-06, + "loss": 0.0049, + "step": 61960 + }, + { + "epoch": 0.3974563307924434, + "grad_norm": 0.08395054936408997, + "learning_rate": 9.706018547952704e-06, + "loss": 0.0028, + "step": 61970 + }, + { + "epoch": 0.3975204676862295, + "grad_norm": 0.1081097349524498, + "learning_rate": 9.70582942851915e-06, + "loss": 0.0042, + "step": 61980 + }, + { + "epoch": 0.3975846045800156, + "grad_norm": 0.16980428993701935, + "learning_rate": 9.70564025011819e-06, + "loss": 0.004, + "step": 61990 + }, + { + "epoch": 0.3976487414738017, + "grad_norm": 0.22608888149261475, + "learning_rate": 9.705451012752196e-06, + "loss": 0.0042, + "step": 62000 + }, + { + "epoch": 0.3977128783675878, + "grad_norm": 0.2874959409236908, + "learning_rate": 9.705261716423536e-06, + "loss": 0.0037, + "step": 62010 + }, + { + "epoch": 0.3977770152613739, + "grad_norm": 0.1783289909362793, + "learning_rate": 9.705072361134584e-06, + "loss": 0.0041, + "step": 62020 + }, + { + "epoch": 0.39784115215515997, + "grad_norm": 0.11932146549224854, + "learning_rate": 9.704882946887715e-06, + "loss": 0.0174, + "step": 62030 + }, + { + "epoch": 0.39790528904894606, + "grad_norm": 0.22151726484298706, + "learning_rate": 9.704693473685298e-06, + "loss": 0.0027, + "step": 62040 + }, + { + "epoch": 0.39796942594273216, + "grad_norm": 0.17277520895004272, + "learning_rate": 9.70450394152971e-06, + "loss": 0.003, + "step": 62050 + }, + { + "epoch": 0.39803356283651825, + "grad_norm": 0.2572478950023651, + "learning_rate": 9.704314350423325e-06, + "loss": 0.0051, + "step": 62060 + }, + { + "epoch": 0.39809769973030434, + "grad_norm": 0.21472808718681335, + "learning_rate": 9.704124700368518e-06, + "loss": 0.0041, + "step": 62070 + }, + { + "epoch": 0.39816183662409044, + "grad_norm": 0.053281769156455994, + "learning_rate": 9.703934991367668e-06, + "loss": 0.0063, + "step": 62080 + }, + { + "epoch": 0.39822597351787653, + "grad_norm": 0.09664373099803925, + "learning_rate": 9.70374522342315e-06, + "loss": 0.0029, + "step": 62090 + }, + { + "epoch": 0.3982901104116627, + "grad_norm": 0.10475917160511017, + "learning_rate": 9.703555396537343e-06, + "loss": 0.0042, + "step": 62100 + }, + { + "epoch": 0.39835424730544877, + "grad_norm": 0.16188351809978485, + "learning_rate": 9.703365510712626e-06, + "loss": 0.0038, + "step": 62110 + }, + { + "epoch": 0.39841838419923487, + "grad_norm": 0.2163073718547821, + "learning_rate": 9.703175565951376e-06, + "loss": 0.0023, + "step": 62120 + }, + { + "epoch": 0.39848252109302096, + "grad_norm": 0.1845230907201767, + "learning_rate": 9.702985562255978e-06, + "loss": 0.0045, + "step": 62130 + }, + { + "epoch": 0.39854665798680705, + "grad_norm": 0.22202342748641968, + "learning_rate": 9.702795499628807e-06, + "loss": 0.0037, + "step": 62140 + }, + { + "epoch": 0.39861079488059314, + "grad_norm": 0.21595120429992676, + "learning_rate": 9.702605378072248e-06, + "loss": 0.0053, + "step": 62150 + }, + { + "epoch": 0.39867493177437924, + "grad_norm": 0.45596906542778015, + "learning_rate": 9.702415197588682e-06, + "loss": 0.0072, + "step": 62160 + }, + { + "epoch": 0.39873906866816533, + "grad_norm": 0.5580806732177734, + "learning_rate": 9.702224958180494e-06, + "loss": 0.0051, + "step": 62170 + }, + { + "epoch": 0.3988032055619514, + "grad_norm": 0.2247030884027481, + "learning_rate": 9.702034659850066e-06, + "loss": 0.004, + "step": 62180 + }, + { + "epoch": 0.3988673424557375, + "grad_norm": 0.18748068809509277, + "learning_rate": 9.701844302599784e-06, + "loss": 0.0042, + "step": 62190 + }, + { + "epoch": 0.3989314793495236, + "grad_norm": 0.42575669288635254, + "learning_rate": 9.701653886432031e-06, + "loss": 0.0048, + "step": 62200 + }, + { + "epoch": 0.3989956162433097, + "grad_norm": 0.1619352400302887, + "learning_rate": 9.701463411349195e-06, + "loss": 0.0036, + "step": 62210 + }, + { + "epoch": 0.3990597531370958, + "grad_norm": 0.12561991810798645, + "learning_rate": 9.701272877353664e-06, + "loss": 0.0049, + "step": 62220 + }, + { + "epoch": 0.3991238900308819, + "grad_norm": 0.23943594098091125, + "learning_rate": 9.701082284447823e-06, + "loss": 0.0042, + "step": 62230 + }, + { + "epoch": 0.39918802692466804, + "grad_norm": 0.1591866910457611, + "learning_rate": 9.700891632634059e-06, + "loss": 0.0097, + "step": 62240 + }, + { + "epoch": 0.39925216381845413, + "grad_norm": 0.09385634958744049, + "learning_rate": 9.700700921914765e-06, + "loss": 0.0063, + "step": 62250 + }, + { + "epoch": 0.3993163007122402, + "grad_norm": 0.12799791991710663, + "learning_rate": 9.70051015229233e-06, + "loss": 0.0026, + "step": 62260 + }, + { + "epoch": 0.3993804376060263, + "grad_norm": 0.06453348696231842, + "learning_rate": 9.700319323769143e-06, + "loss": 0.0032, + "step": 62270 + }, + { + "epoch": 0.3994445744998124, + "grad_norm": 0.2493322491645813, + "learning_rate": 9.700128436347595e-06, + "loss": 0.0041, + "step": 62280 + }, + { + "epoch": 0.3995087113935985, + "grad_norm": 0.13772499561309814, + "learning_rate": 9.699937490030078e-06, + "loss": 0.003, + "step": 62290 + }, + { + "epoch": 0.3995728482873846, + "grad_norm": 0.3720828592777252, + "learning_rate": 9.699746484818986e-06, + "loss": 0.0028, + "step": 62300 + }, + { + "epoch": 0.3996369851811707, + "grad_norm": 0.19337330758571625, + "learning_rate": 9.699555420716711e-06, + "loss": 0.0032, + "step": 62310 + }, + { + "epoch": 0.3997011220749568, + "grad_norm": 0.1157507449388504, + "learning_rate": 9.699364297725649e-06, + "loss": 0.0043, + "step": 62320 + }, + { + "epoch": 0.3997652589687429, + "grad_norm": 0.14450615644454956, + "learning_rate": 9.699173115848193e-06, + "loss": 0.0051, + "step": 62330 + }, + { + "epoch": 0.39982939586252897, + "grad_norm": 0.24375490844249725, + "learning_rate": 9.698981875086739e-06, + "loss": 0.0034, + "step": 62340 + }, + { + "epoch": 0.39989353275631506, + "grad_norm": 0.3356948792934418, + "learning_rate": 9.698790575443685e-06, + "loss": 0.0043, + "step": 62350 + }, + { + "epoch": 0.39995766965010116, + "grad_norm": 0.11101289093494415, + "learning_rate": 9.698599216921426e-06, + "loss": 0.0038, + "step": 62360 + }, + { + "epoch": 0.40002180654388725, + "grad_norm": 0.1932825893163681, + "learning_rate": 9.69840779952236e-06, + "loss": 0.0053, + "step": 62370 + }, + { + "epoch": 0.4000859434376734, + "grad_norm": 0.2052309662103653, + "learning_rate": 9.698216323248888e-06, + "loss": 0.0027, + "step": 62380 + }, + { + "epoch": 0.4001500803314595, + "grad_norm": 0.16054415702819824, + "learning_rate": 9.698024788103406e-06, + "loss": 0.0054, + "step": 62390 + }, + { + "epoch": 0.4002142172252456, + "grad_norm": 0.22121907770633698, + "learning_rate": 9.697833194088317e-06, + "loss": 0.004, + "step": 62400 + }, + { + "epoch": 0.4002783541190317, + "grad_norm": 0.35723811388015747, + "learning_rate": 9.69764154120602e-06, + "loss": 0.0039, + "step": 62410 + }, + { + "epoch": 0.40034249101281777, + "grad_norm": 0.17922592163085938, + "learning_rate": 9.697449829458918e-06, + "loss": 0.0048, + "step": 62420 + }, + { + "epoch": 0.40040662790660386, + "grad_norm": 0.3429785966873169, + "learning_rate": 9.697258058849413e-06, + "loss": 0.0036, + "step": 62430 + }, + { + "epoch": 0.40047076480038996, + "grad_norm": 0.23051443696022034, + "learning_rate": 9.697066229379906e-06, + "loss": 0.0068, + "step": 62440 + }, + { + "epoch": 0.40053490169417605, + "grad_norm": 0.12451818585395813, + "learning_rate": 9.696874341052803e-06, + "loss": 0.0049, + "step": 62450 + }, + { + "epoch": 0.40059903858796214, + "grad_norm": 0.11693712323904037, + "learning_rate": 9.696682393870507e-06, + "loss": 0.004, + "step": 62460 + }, + { + "epoch": 0.40066317548174823, + "grad_norm": 0.4547955095767975, + "learning_rate": 9.696490387835425e-06, + "loss": 0.0033, + "step": 62470 + }, + { + "epoch": 0.40072731237553433, + "grad_norm": 0.0886857807636261, + "learning_rate": 9.69629832294996e-06, + "loss": 0.0024, + "step": 62480 + }, + { + "epoch": 0.4007914492693204, + "grad_norm": 0.10484019666910172, + "learning_rate": 9.696106199216521e-06, + "loss": 0.0036, + "step": 62490 + }, + { + "epoch": 0.4008555861631065, + "grad_norm": 0.3736126720905304, + "learning_rate": 9.695914016637517e-06, + "loss": 0.0078, + "step": 62500 + }, + { + "epoch": 0.4009197230568926, + "grad_norm": 0.21714961528778076, + "learning_rate": 9.695721775215353e-06, + "loss": 0.0029, + "step": 62510 + }, + { + "epoch": 0.40098385995067876, + "grad_norm": 0.1946527063846588, + "learning_rate": 9.695529474952439e-06, + "loss": 0.0044, + "step": 62520 + }, + { + "epoch": 0.40104799684446485, + "grad_norm": 0.14760996401309967, + "learning_rate": 9.695337115851186e-06, + "loss": 0.0032, + "step": 62530 + }, + { + "epoch": 0.40111213373825094, + "grad_norm": 0.5645552277565002, + "learning_rate": 9.695144697914001e-06, + "loss": 0.0111, + "step": 62540 + }, + { + "epoch": 0.40117627063203704, + "grad_norm": 0.32809337973594666, + "learning_rate": 9.694952221143299e-06, + "loss": 0.0035, + "step": 62550 + }, + { + "epoch": 0.40124040752582313, + "grad_norm": 0.1572769433259964, + "learning_rate": 9.694759685541491e-06, + "loss": 0.0061, + "step": 62560 + }, + { + "epoch": 0.4013045444196092, + "grad_norm": 0.07307353615760803, + "learning_rate": 9.694567091110987e-06, + "loss": 0.0067, + "step": 62570 + }, + { + "epoch": 0.4013686813133953, + "grad_norm": 0.2429163157939911, + "learning_rate": 9.694374437854204e-06, + "loss": 0.0039, + "step": 62580 + }, + { + "epoch": 0.4014328182071814, + "grad_norm": 0.17218413949012756, + "learning_rate": 9.694181725773553e-06, + "loss": 0.0059, + "step": 62590 + }, + { + "epoch": 0.4014969551009675, + "grad_norm": 0.11023490130901337, + "learning_rate": 9.693988954871449e-06, + "loss": 0.0057, + "step": 62600 + }, + { + "epoch": 0.4015610919947536, + "grad_norm": 0.008556004613637924, + "learning_rate": 9.693796125150312e-06, + "loss": 0.0044, + "step": 62610 + }, + { + "epoch": 0.4016252288885397, + "grad_norm": 0.35116973519325256, + "learning_rate": 9.693603236612551e-06, + "loss": 0.0041, + "step": 62620 + }, + { + "epoch": 0.4016893657823258, + "grad_norm": 0.2881322503089905, + "learning_rate": 9.69341028926059e-06, + "loss": 0.0047, + "step": 62630 + }, + { + "epoch": 0.4017535026761119, + "grad_norm": 0.1554815024137497, + "learning_rate": 9.693217283096843e-06, + "loss": 0.0055, + "step": 62640 + }, + { + "epoch": 0.40181763956989797, + "grad_norm": 0.2013331651687622, + "learning_rate": 9.693024218123728e-06, + "loss": 0.0047, + "step": 62650 + }, + { + "epoch": 0.4018817764636841, + "grad_norm": 0.19363749027252197, + "learning_rate": 9.692831094343666e-06, + "loss": 0.0052, + "step": 62660 + }, + { + "epoch": 0.4019459133574702, + "grad_norm": 0.10971488803625107, + "learning_rate": 9.692637911759077e-06, + "loss": 0.0037, + "step": 62670 + }, + { + "epoch": 0.4020100502512563, + "grad_norm": 0.24738819897174835, + "learning_rate": 9.692444670372382e-06, + "loss": 0.0044, + "step": 62680 + }, + { + "epoch": 0.4020741871450424, + "grad_norm": 0.05652572959661484, + "learning_rate": 9.692251370186002e-06, + "loss": 0.0029, + "step": 62690 + }, + { + "epoch": 0.4021383240388285, + "grad_norm": 0.2002425640821457, + "learning_rate": 9.692058011202356e-06, + "loss": 0.004, + "step": 62700 + }, + { + "epoch": 0.4022024609326146, + "grad_norm": 0.10862979292869568, + "learning_rate": 9.691864593423872e-06, + "loss": 0.0037, + "step": 62710 + }, + { + "epoch": 0.4022665978264007, + "grad_norm": 0.15046946704387665, + "learning_rate": 9.69167111685297e-06, + "loss": 0.0024, + "step": 62720 + }, + { + "epoch": 0.40233073472018677, + "grad_norm": 0.046050697565078735, + "learning_rate": 9.691477581492076e-06, + "loss": 0.005, + "step": 62730 + }, + { + "epoch": 0.40239487161397286, + "grad_norm": 0.20435963571071625, + "learning_rate": 9.691283987343616e-06, + "loss": 0.0045, + "step": 62740 + }, + { + "epoch": 0.40245900850775895, + "grad_norm": 0.32580456137657166, + "learning_rate": 9.691090334410014e-06, + "loss": 0.0031, + "step": 62750 + }, + { + "epoch": 0.40252314540154505, + "grad_norm": 0.1473287045955658, + "learning_rate": 9.6908966226937e-06, + "loss": 0.004, + "step": 62760 + }, + { + "epoch": 0.40258728229533114, + "grad_norm": 0.1179991215467453, + "learning_rate": 9.690702852197094e-06, + "loss": 0.0044, + "step": 62770 + }, + { + "epoch": 0.40265141918911723, + "grad_norm": 0.33118948340415955, + "learning_rate": 9.690509022922632e-06, + "loss": 0.0088, + "step": 62780 + }, + { + "epoch": 0.4027155560829033, + "grad_norm": 0.36124172806739807, + "learning_rate": 9.690315134872738e-06, + "loss": 0.0043, + "step": 62790 + }, + { + "epoch": 0.4027796929766894, + "grad_norm": 0.2574802339076996, + "learning_rate": 9.690121188049845e-06, + "loss": 0.0036, + "step": 62800 + }, + { + "epoch": 0.40284382987047557, + "grad_norm": 0.2140289694070816, + "learning_rate": 9.689927182456383e-06, + "loss": 0.0038, + "step": 62810 + }, + { + "epoch": 0.40290796676426166, + "grad_norm": 0.041685204952955246, + "learning_rate": 9.689733118094777e-06, + "loss": 0.0042, + "step": 62820 + }, + { + "epoch": 0.40297210365804775, + "grad_norm": 0.2141823172569275, + "learning_rate": 9.689538994967467e-06, + "loss": 0.0032, + "step": 62830 + }, + { + "epoch": 0.40303624055183385, + "grad_norm": 0.17390379309654236, + "learning_rate": 9.68934481307688e-06, + "loss": 0.008, + "step": 62840 + }, + { + "epoch": 0.40310037744561994, + "grad_norm": 0.23098264634609222, + "learning_rate": 9.689150572425452e-06, + "loss": 0.0052, + "step": 62850 + }, + { + "epoch": 0.40316451433940603, + "grad_norm": 0.18938297033309937, + "learning_rate": 9.688956273015617e-06, + "loss": 0.005, + "step": 62860 + }, + { + "epoch": 0.4032286512331921, + "grad_norm": 0.14759260416030884, + "learning_rate": 9.688761914849806e-06, + "loss": 0.0046, + "step": 62870 + }, + { + "epoch": 0.4032927881269782, + "grad_norm": 0.11562065780162811, + "learning_rate": 9.688567497930458e-06, + "loss": 0.0041, + "step": 62880 + }, + { + "epoch": 0.4033569250207643, + "grad_norm": 0.20137348771095276, + "learning_rate": 9.68837302226001e-06, + "loss": 0.0053, + "step": 62890 + }, + { + "epoch": 0.4034210619145504, + "grad_norm": 0.08316779136657715, + "learning_rate": 9.688178487840896e-06, + "loss": 0.0032, + "step": 62900 + }, + { + "epoch": 0.4034851988083365, + "grad_norm": 0.1608303189277649, + "learning_rate": 9.687983894675555e-06, + "loss": 0.0044, + "step": 62910 + }, + { + "epoch": 0.4035493357021226, + "grad_norm": 0.19574500620365143, + "learning_rate": 9.687789242766426e-06, + "loss": 0.0026, + "step": 62920 + }, + { + "epoch": 0.4036134725959087, + "grad_norm": 0.4132160246372223, + "learning_rate": 9.687594532115947e-06, + "loss": 0.0044, + "step": 62930 + }, + { + "epoch": 0.4036776094896948, + "grad_norm": 0.2931022346019745, + "learning_rate": 9.687399762726559e-06, + "loss": 0.0069, + "step": 62940 + }, + { + "epoch": 0.4037417463834809, + "grad_norm": 0.15385998785495758, + "learning_rate": 9.687204934600701e-06, + "loss": 0.0033, + "step": 62950 + }, + { + "epoch": 0.403805883277267, + "grad_norm": 0.1795760542154312, + "learning_rate": 9.687010047740816e-06, + "loss": 0.0043, + "step": 62960 + }, + { + "epoch": 0.4038700201710531, + "grad_norm": 0.33173665404319763, + "learning_rate": 9.686815102149346e-06, + "loss": 0.0044, + "step": 62970 + }, + { + "epoch": 0.4039341570648392, + "grad_norm": 0.3033214211463928, + "learning_rate": 9.686620097828732e-06, + "loss": 0.0029, + "step": 62980 + }, + { + "epoch": 0.4039982939586253, + "grad_norm": 0.1678515523672104, + "learning_rate": 9.686425034781418e-06, + "loss": 0.0033, + "step": 62990 + }, + { + "epoch": 0.4040624308524114, + "grad_norm": 0.07023715227842331, + "learning_rate": 9.686229913009851e-06, + "loss": 0.0058, + "step": 63000 + }, + { + "epoch": 0.4041265677461975, + "grad_norm": 0.06690052151679993, + "learning_rate": 9.686034732516471e-06, + "loss": 0.0031, + "step": 63010 + }, + { + "epoch": 0.4041907046399836, + "grad_norm": 0.1076996698975563, + "learning_rate": 9.68583949330373e-06, + "loss": 0.0052, + "step": 63020 + }, + { + "epoch": 0.40425484153376967, + "grad_norm": 0.13543549180030823, + "learning_rate": 9.685644195374068e-06, + "loss": 0.0039, + "step": 63030 + }, + { + "epoch": 0.40431897842755576, + "grad_norm": 0.07360082864761353, + "learning_rate": 9.685448838729939e-06, + "loss": 0.0038, + "step": 63040 + }, + { + "epoch": 0.40438311532134186, + "grad_norm": 0.19905249774456024, + "learning_rate": 9.685253423373786e-06, + "loss": 0.0034, + "step": 63050 + }, + { + "epoch": 0.40444725221512795, + "grad_norm": 0.08411014080047607, + "learning_rate": 9.68505794930806e-06, + "loss": 0.0029, + "step": 63060 + }, + { + "epoch": 0.40451138910891404, + "grad_norm": 0.103714220225811, + "learning_rate": 9.684862416535207e-06, + "loss": 0.0034, + "step": 63070 + }, + { + "epoch": 0.40457552600270014, + "grad_norm": 0.2893984317779541, + "learning_rate": 9.68466682505768e-06, + "loss": 0.0043, + "step": 63080 + }, + { + "epoch": 0.4046396628964863, + "grad_norm": 0.11062141507863998, + "learning_rate": 9.684471174877932e-06, + "loss": 0.004, + "step": 63090 + }, + { + "epoch": 0.4047037997902724, + "grad_norm": 0.20066706836223602, + "learning_rate": 9.68427546599841e-06, + "loss": 0.003, + "step": 63100 + }, + { + "epoch": 0.40476793668405847, + "grad_norm": 0.29416951537132263, + "learning_rate": 9.684079698421569e-06, + "loss": 0.0063, + "step": 63110 + }, + { + "epoch": 0.40483207357784456, + "grad_norm": 0.06612326204776764, + "learning_rate": 9.683883872149864e-06, + "loss": 0.0031, + "step": 63120 + }, + { + "epoch": 0.40489621047163066, + "grad_norm": 0.03334604203701019, + "learning_rate": 9.683687987185744e-06, + "loss": 0.0036, + "step": 63130 + }, + { + "epoch": 0.40496034736541675, + "grad_norm": 0.6492279767990112, + "learning_rate": 9.683492043531665e-06, + "loss": 0.0058, + "step": 63140 + }, + { + "epoch": 0.40502448425920284, + "grad_norm": 0.2623600661754608, + "learning_rate": 9.683296041190087e-06, + "loss": 0.0039, + "step": 63150 + }, + { + "epoch": 0.40508862115298894, + "grad_norm": 0.13972336053848267, + "learning_rate": 9.683099980163461e-06, + "loss": 0.0037, + "step": 63160 + }, + { + "epoch": 0.40515275804677503, + "grad_norm": 0.35072267055511475, + "learning_rate": 9.682903860454245e-06, + "loss": 0.0031, + "step": 63170 + }, + { + "epoch": 0.4052168949405611, + "grad_norm": 0.1438094824552536, + "learning_rate": 9.682707682064897e-06, + "loss": 0.0034, + "step": 63180 + }, + { + "epoch": 0.4052810318343472, + "grad_norm": 0.4629147946834564, + "learning_rate": 9.682511444997876e-06, + "loss": 0.0057, + "step": 63190 + }, + { + "epoch": 0.4053451687281333, + "grad_norm": 0.135417640209198, + "learning_rate": 9.682315149255638e-06, + "loss": 0.0035, + "step": 63200 + }, + { + "epoch": 0.4054093056219194, + "grad_norm": 0.30805763602256775, + "learning_rate": 9.682118794840647e-06, + "loss": 0.0044, + "step": 63210 + }, + { + "epoch": 0.4054734425157055, + "grad_norm": 0.12094740569591522, + "learning_rate": 9.681922381755359e-06, + "loss": 0.0032, + "step": 63220 + }, + { + "epoch": 0.40553757940949164, + "grad_norm": 0.1189800575375557, + "learning_rate": 9.681725910002238e-06, + "loss": 0.0054, + "step": 63230 + }, + { + "epoch": 0.40560171630327774, + "grad_norm": 0.2504672706127167, + "learning_rate": 9.681529379583746e-06, + "loss": 0.0031, + "step": 63240 + }, + { + "epoch": 0.40566585319706383, + "grad_norm": 0.12307994812726974, + "learning_rate": 9.681332790502345e-06, + "loss": 0.0034, + "step": 63250 + }, + { + "epoch": 0.4057299900908499, + "grad_norm": 0.26728805899620056, + "learning_rate": 9.6811361427605e-06, + "loss": 0.0035, + "step": 63260 + }, + { + "epoch": 0.405794126984636, + "grad_norm": 0.23914340138435364, + "learning_rate": 9.68093943636067e-06, + "loss": 0.0033, + "step": 63270 + }, + { + "epoch": 0.4058582638784221, + "grad_norm": 0.058856040239334106, + "learning_rate": 9.680742671305326e-06, + "loss": 0.0041, + "step": 63280 + }, + { + "epoch": 0.4059224007722082, + "grad_norm": 0.05040334537625313, + "learning_rate": 9.680545847596929e-06, + "loss": 0.0039, + "step": 63290 + }, + { + "epoch": 0.4059865376659943, + "grad_norm": 0.11939733475446701, + "learning_rate": 9.68034896523795e-06, + "loss": 0.0039, + "step": 63300 + }, + { + "epoch": 0.4060506745597804, + "grad_norm": 0.24056608974933624, + "learning_rate": 9.680152024230853e-06, + "loss": 0.0041, + "step": 63310 + }, + { + "epoch": 0.4061148114535665, + "grad_norm": 0.06175704672932625, + "learning_rate": 9.679955024578105e-06, + "loss": 0.0022, + "step": 63320 + }, + { + "epoch": 0.4061789483473526, + "grad_norm": 0.10102628171443939, + "learning_rate": 9.679757966282177e-06, + "loss": 0.0043, + "step": 63330 + }, + { + "epoch": 0.40624308524113867, + "grad_norm": 0.050519414246082306, + "learning_rate": 9.679560849345535e-06, + "loss": 0.0035, + "step": 63340 + }, + { + "epoch": 0.40630722213492476, + "grad_norm": 0.37368524074554443, + "learning_rate": 9.679363673770655e-06, + "loss": 0.0046, + "step": 63350 + }, + { + "epoch": 0.40637135902871085, + "grad_norm": 0.10394777357578278, + "learning_rate": 9.67916643956e-06, + "loss": 0.0014, + "step": 63360 + }, + { + "epoch": 0.406435495922497, + "grad_norm": 0.1660034954547882, + "learning_rate": 9.678969146716046e-06, + "loss": 0.0037, + "step": 63370 + }, + { + "epoch": 0.4064996328162831, + "grad_norm": 0.22908726334571838, + "learning_rate": 9.678771795241267e-06, + "loss": 0.0044, + "step": 63380 + }, + { + "epoch": 0.4065637697100692, + "grad_norm": 0.11898116022348404, + "learning_rate": 9.67857438513813e-06, + "loss": 0.0058, + "step": 63390 + }, + { + "epoch": 0.4066279066038553, + "grad_norm": 0.267651230096817, + "learning_rate": 9.678376916409116e-06, + "loss": 0.0047, + "step": 63400 + }, + { + "epoch": 0.4066920434976414, + "grad_norm": 0.3415893614292145, + "learning_rate": 9.678179389056694e-06, + "loss": 0.0033, + "step": 63410 + }, + { + "epoch": 0.40675618039142747, + "grad_norm": 0.3092597723007202, + "learning_rate": 9.677981803083341e-06, + "loss": 0.0051, + "step": 63420 + }, + { + "epoch": 0.40682031728521356, + "grad_norm": 0.2247481346130371, + "learning_rate": 9.677784158491532e-06, + "loss": 0.0143, + "step": 63430 + }, + { + "epoch": 0.40688445417899965, + "grad_norm": 0.3895609378814697, + "learning_rate": 9.677586455283745e-06, + "loss": 0.0065, + "step": 63440 + }, + { + "epoch": 0.40694859107278575, + "grad_norm": 0.22449402511119843, + "learning_rate": 9.677388693462456e-06, + "loss": 0.004, + "step": 63450 + }, + { + "epoch": 0.40701272796657184, + "grad_norm": 0.2221873253583908, + "learning_rate": 9.677190873030144e-06, + "loss": 0.0045, + "step": 63460 + }, + { + "epoch": 0.40707686486035793, + "grad_norm": 0.19022652506828308, + "learning_rate": 9.676992993989286e-06, + "loss": 0.0037, + "step": 63470 + }, + { + "epoch": 0.407141001754144, + "grad_norm": 0.18011286854743958, + "learning_rate": 9.676795056342367e-06, + "loss": 0.003, + "step": 63480 + }, + { + "epoch": 0.4072051386479301, + "grad_norm": 0.35013285279273987, + "learning_rate": 9.676597060091861e-06, + "loss": 0.0036, + "step": 63490 + }, + { + "epoch": 0.4072692755417162, + "grad_norm": 0.1795942634344101, + "learning_rate": 9.676399005240252e-06, + "loss": 0.0031, + "step": 63500 + }, + { + "epoch": 0.40733341243550236, + "grad_norm": 0.27482670545578003, + "learning_rate": 9.676200891790021e-06, + "loss": 0.004, + "step": 63510 + }, + { + "epoch": 0.40739754932928846, + "grad_norm": 0.21835391223430634, + "learning_rate": 9.67600271974365e-06, + "loss": 0.0045, + "step": 63520 + }, + { + "epoch": 0.40746168622307455, + "grad_norm": 0.20426027476787567, + "learning_rate": 9.675804489103626e-06, + "loss": 0.0048, + "step": 63530 + }, + { + "epoch": 0.40752582311686064, + "grad_norm": 0.3146619200706482, + "learning_rate": 9.675606199872429e-06, + "loss": 0.0058, + "step": 63540 + }, + { + "epoch": 0.40758996001064673, + "grad_norm": 0.18899372220039368, + "learning_rate": 9.675407852052542e-06, + "loss": 0.0051, + "step": 63550 + }, + { + "epoch": 0.4076540969044328, + "grad_norm": 0.24110999703407288, + "learning_rate": 9.675209445646456e-06, + "loss": 0.0039, + "step": 63560 + }, + { + "epoch": 0.4077182337982189, + "grad_norm": 0.16395263373851776, + "learning_rate": 9.675010980656654e-06, + "loss": 0.0035, + "step": 63570 + }, + { + "epoch": 0.407782370692005, + "grad_norm": 0.10138542205095291, + "learning_rate": 9.674812457085623e-06, + "loss": 0.0018, + "step": 63580 + }, + { + "epoch": 0.4078465075857911, + "grad_norm": 0.10391603410243988, + "learning_rate": 9.67461387493585e-06, + "loss": 0.0037, + "step": 63590 + }, + { + "epoch": 0.4079106444795772, + "grad_norm": 0.10009389370679855, + "learning_rate": 9.674415234209824e-06, + "loss": 0.0057, + "step": 63600 + }, + { + "epoch": 0.4079747813733633, + "grad_norm": 0.16913220286369324, + "learning_rate": 9.674216534910034e-06, + "loss": 0.0023, + "step": 63610 + }, + { + "epoch": 0.4080389182671494, + "grad_norm": 0.22187593579292297, + "learning_rate": 9.674017777038973e-06, + "loss": 0.004, + "step": 63620 + }, + { + "epoch": 0.4081030551609355, + "grad_norm": 0.21074946224689484, + "learning_rate": 9.673818960599127e-06, + "loss": 0.0023, + "step": 63630 + }, + { + "epoch": 0.4081671920547216, + "grad_norm": 0.3142641484737396, + "learning_rate": 9.673620085592989e-06, + "loss": 0.003, + "step": 63640 + }, + { + "epoch": 0.4082313289485077, + "grad_norm": 0.1447477489709854, + "learning_rate": 9.67342115202305e-06, + "loss": 0.0043, + "step": 63650 + }, + { + "epoch": 0.4082954658422938, + "grad_norm": 0.24877741932868958, + "learning_rate": 9.673222159891807e-06, + "loss": 0.0038, + "step": 63660 + }, + { + "epoch": 0.4083596027360799, + "grad_norm": 0.27425238490104675, + "learning_rate": 9.673023109201747e-06, + "loss": 0.0027, + "step": 63670 + }, + { + "epoch": 0.408423739629866, + "grad_norm": 0.16497421264648438, + "learning_rate": 9.67282399995537e-06, + "loss": 0.0051, + "step": 63680 + }, + { + "epoch": 0.4084878765236521, + "grad_norm": 0.33673834800720215, + "learning_rate": 9.672624832155168e-06, + "loss": 0.0038, + "step": 63690 + }, + { + "epoch": 0.4085520134174382, + "grad_norm": 0.2985970377922058, + "learning_rate": 9.672425605803636e-06, + "loss": 0.0049, + "step": 63700 + }, + { + "epoch": 0.4086161503112243, + "grad_norm": 0.15841737389564514, + "learning_rate": 9.672226320903275e-06, + "loss": 0.004, + "step": 63710 + }, + { + "epoch": 0.4086802872050104, + "grad_norm": 0.18191616237163544, + "learning_rate": 9.672026977456576e-06, + "loss": 0.005, + "step": 63720 + }, + { + "epoch": 0.40874442409879647, + "grad_norm": 0.5124845504760742, + "learning_rate": 9.671827575466041e-06, + "loss": 0.0037, + "step": 63730 + }, + { + "epoch": 0.40880856099258256, + "grad_norm": 0.06640222668647766, + "learning_rate": 9.671628114934169e-06, + "loss": 0.0058, + "step": 63740 + }, + { + "epoch": 0.40887269788636865, + "grad_norm": 0.44437193870544434, + "learning_rate": 9.671428595863457e-06, + "loss": 0.0053, + "step": 63750 + }, + { + "epoch": 0.40893683478015475, + "grad_norm": 0.20514225959777832, + "learning_rate": 9.671229018256405e-06, + "loss": 0.005, + "step": 63760 + }, + { + "epoch": 0.40900097167394084, + "grad_norm": 0.1101483628153801, + "learning_rate": 9.671029382115516e-06, + "loss": 0.0055, + "step": 63770 + }, + { + "epoch": 0.40906510856772693, + "grad_norm": 0.30075669288635254, + "learning_rate": 9.67082968744329e-06, + "loss": 0.0041, + "step": 63780 + }, + { + "epoch": 0.4091292454615131, + "grad_norm": 0.38170281052589417, + "learning_rate": 9.67062993424223e-06, + "loss": 0.0044, + "step": 63790 + }, + { + "epoch": 0.4091933823552992, + "grad_norm": 0.13880175352096558, + "learning_rate": 9.670430122514839e-06, + "loss": 0.004, + "step": 63800 + }, + { + "epoch": 0.40925751924908527, + "grad_norm": 0.3685900568962097, + "learning_rate": 9.670230252263621e-06, + "loss": 0.0075, + "step": 63810 + }, + { + "epoch": 0.40932165614287136, + "grad_norm": 0.16778992116451263, + "learning_rate": 9.670030323491079e-06, + "loss": 0.004, + "step": 63820 + }, + { + "epoch": 0.40938579303665745, + "grad_norm": 0.09747684001922607, + "learning_rate": 9.66983033619972e-06, + "loss": 0.0074, + "step": 63830 + }, + { + "epoch": 0.40944992993044355, + "grad_norm": 0.1732402741909027, + "learning_rate": 9.669630290392051e-06, + "loss": 0.0055, + "step": 63840 + }, + { + "epoch": 0.40951406682422964, + "grad_norm": 0.030933083966374397, + "learning_rate": 9.669430186070575e-06, + "loss": 0.0046, + "step": 63850 + }, + { + "epoch": 0.40957820371801573, + "grad_norm": 0.010907845571637154, + "learning_rate": 9.669230023237803e-06, + "loss": 0.0027, + "step": 63860 + }, + { + "epoch": 0.4096423406118018, + "grad_norm": 0.20749413967132568, + "learning_rate": 9.66902980189624e-06, + "loss": 0.0034, + "step": 63870 + }, + { + "epoch": 0.4097064775055879, + "grad_norm": 0.36841881275177, + "learning_rate": 9.668829522048397e-06, + "loss": 0.0044, + "step": 63880 + }, + { + "epoch": 0.409770614399374, + "grad_norm": 0.26955240964889526, + "learning_rate": 9.668629183696784e-06, + "loss": 0.005, + "step": 63890 + }, + { + "epoch": 0.4098347512931601, + "grad_norm": 0.060388293117284775, + "learning_rate": 9.668428786843911e-06, + "loss": 0.0045, + "step": 63900 + }, + { + "epoch": 0.4098988881869462, + "grad_norm": 0.23592258989810944, + "learning_rate": 9.668228331492288e-06, + "loss": 0.0025, + "step": 63910 + }, + { + "epoch": 0.4099630250807323, + "grad_norm": 0.3113408386707306, + "learning_rate": 9.66802781764443e-06, + "loss": 0.0046, + "step": 63920 + }, + { + "epoch": 0.41002716197451844, + "grad_norm": 0.04270043224096298, + "learning_rate": 9.667827245302845e-06, + "loss": 0.0027, + "step": 63930 + }, + { + "epoch": 0.41009129886830453, + "grad_norm": 0.0638560876250267, + "learning_rate": 9.66762661447005e-06, + "loss": 0.0041, + "step": 63940 + }, + { + "epoch": 0.4101554357620906, + "grad_norm": 0.11248234659433365, + "learning_rate": 9.667425925148555e-06, + "loss": 0.0033, + "step": 63950 + }, + { + "epoch": 0.4102195726558767, + "grad_norm": 0.03995837643742561, + "learning_rate": 9.66722517734088e-06, + "loss": 0.004, + "step": 63960 + }, + { + "epoch": 0.4102837095496628, + "grad_norm": 0.24541008472442627, + "learning_rate": 9.667024371049537e-06, + "loss": 0.005, + "step": 63970 + }, + { + "epoch": 0.4103478464434489, + "grad_norm": 0.1571703851222992, + "learning_rate": 9.666823506277044e-06, + "loss": 0.0046, + "step": 63980 + }, + { + "epoch": 0.410411983337235, + "grad_norm": 0.18189582228660583, + "learning_rate": 9.666622583025915e-06, + "loss": 0.0047, + "step": 63990 + }, + { + "epoch": 0.4104761202310211, + "grad_norm": 0.274532288312912, + "learning_rate": 9.66642160129867e-06, + "loss": 0.0039, + "step": 64000 + }, + { + "epoch": 0.4105402571248072, + "grad_norm": 0.1679452359676361, + "learning_rate": 9.66622056109783e-06, + "loss": 0.005, + "step": 64010 + }, + { + "epoch": 0.4106043940185933, + "grad_norm": 0.1124291941523552, + "learning_rate": 9.66601946242591e-06, + "loss": 0.003, + "step": 64020 + }, + { + "epoch": 0.41066853091237937, + "grad_norm": 0.0889439508318901, + "learning_rate": 9.66581830528543e-06, + "loss": 0.0034, + "step": 64030 + }, + { + "epoch": 0.41073266780616546, + "grad_norm": 0.12128795683383942, + "learning_rate": 9.665617089678913e-06, + "loss": 0.0025, + "step": 64040 + }, + { + "epoch": 0.41079680469995156, + "grad_norm": 0.18444238603115082, + "learning_rate": 9.66541581560888e-06, + "loss": 0.004, + "step": 64050 + }, + { + "epoch": 0.41086094159373765, + "grad_norm": 0.08144691586494446, + "learning_rate": 9.665214483077852e-06, + "loss": 0.004, + "step": 64060 + }, + { + "epoch": 0.4109250784875238, + "grad_norm": 0.25433894991874695, + "learning_rate": 9.665013092088353e-06, + "loss": 0.0038, + "step": 64070 + }, + { + "epoch": 0.4109892153813099, + "grad_norm": 0.08502595871686935, + "learning_rate": 9.664811642642905e-06, + "loss": 0.0036, + "step": 64080 + }, + { + "epoch": 0.411053352275096, + "grad_norm": 0.08852414786815643, + "learning_rate": 9.664610134744034e-06, + "loss": 0.0043, + "step": 64090 + }, + { + "epoch": 0.4111174891688821, + "grad_norm": 0.11151493340730667, + "learning_rate": 9.664408568394265e-06, + "loss": 0.0033, + "step": 64100 + }, + { + "epoch": 0.41118162606266817, + "grad_norm": 0.18578369915485382, + "learning_rate": 9.664206943596122e-06, + "loss": 0.0033, + "step": 64110 + }, + { + "epoch": 0.41124576295645426, + "grad_norm": 0.09410499036312103, + "learning_rate": 9.664005260352132e-06, + "loss": 0.004, + "step": 64120 + }, + { + "epoch": 0.41130989985024036, + "grad_norm": 0.24096010625362396, + "learning_rate": 9.663803518664823e-06, + "loss": 0.0042, + "step": 64130 + }, + { + "epoch": 0.41137403674402645, + "grad_norm": 0.24409767985343933, + "learning_rate": 9.663601718536724e-06, + "loss": 0.0054, + "step": 64140 + }, + { + "epoch": 0.41143817363781254, + "grad_norm": 0.33792802691459656, + "learning_rate": 9.663399859970361e-06, + "loss": 0.004, + "step": 64150 + }, + { + "epoch": 0.41150231053159864, + "grad_norm": 0.18288464844226837, + "learning_rate": 9.663197942968266e-06, + "loss": 0.0044, + "step": 64160 + }, + { + "epoch": 0.41156644742538473, + "grad_norm": 0.24157993495464325, + "learning_rate": 9.662995967532966e-06, + "loss": 0.0067, + "step": 64170 + }, + { + "epoch": 0.4116305843191708, + "grad_norm": 0.24919168651103973, + "learning_rate": 9.662793933666997e-06, + "loss": 0.0028, + "step": 64180 + }, + { + "epoch": 0.4116947212129569, + "grad_norm": 0.12063928693532944, + "learning_rate": 9.662591841372885e-06, + "loss": 0.0042, + "step": 64190 + }, + { + "epoch": 0.411758858106743, + "grad_norm": 0.05848376452922821, + "learning_rate": 9.662389690653166e-06, + "loss": 0.0039, + "step": 64200 + }, + { + "epoch": 0.4118229950005291, + "grad_norm": 0.09494820982217789, + "learning_rate": 9.662187481510371e-06, + "loss": 0.0035, + "step": 64210 + }, + { + "epoch": 0.41188713189431525, + "grad_norm": 0.09575354307889938, + "learning_rate": 9.661985213947037e-06, + "loss": 0.0032, + "step": 64220 + }, + { + "epoch": 0.41195126878810134, + "grad_norm": 0.36199530959129333, + "learning_rate": 9.661782887965694e-06, + "loss": 0.0095, + "step": 64230 + }, + { + "epoch": 0.41201540568188744, + "grad_norm": 0.007769663352519274, + "learning_rate": 9.661580503568881e-06, + "loss": 0.0042, + "step": 64240 + }, + { + "epoch": 0.41207954257567353, + "grad_norm": 0.2087806910276413, + "learning_rate": 9.661378060759132e-06, + "loss": 0.0036, + "step": 64250 + }, + { + "epoch": 0.4121436794694596, + "grad_norm": 0.5809451937675476, + "learning_rate": 9.661175559538987e-06, + "loss": 0.0059, + "step": 64260 + }, + { + "epoch": 0.4122078163632457, + "grad_norm": 0.29702523350715637, + "learning_rate": 9.660972999910977e-06, + "loss": 0.0032, + "step": 64270 + }, + { + "epoch": 0.4122719532570318, + "grad_norm": 0.12000985443592072, + "learning_rate": 9.660770381877647e-06, + "loss": 0.0032, + "step": 64280 + }, + { + "epoch": 0.4123360901508179, + "grad_norm": 0.26523804664611816, + "learning_rate": 9.660567705441532e-06, + "loss": 0.0035, + "step": 64290 + }, + { + "epoch": 0.412400227044604, + "grad_norm": 0.20523472130298615, + "learning_rate": 9.660364970605174e-06, + "loss": 0.0037, + "step": 64300 + }, + { + "epoch": 0.4124643639383901, + "grad_norm": 1.1858102083206177, + "learning_rate": 9.66016217737111e-06, + "loss": 0.0059, + "step": 64310 + }, + { + "epoch": 0.4125285008321762, + "grad_norm": 0.09031245857477188, + "learning_rate": 9.659959325741885e-06, + "loss": 0.0035, + "step": 64320 + }, + { + "epoch": 0.4125926377259623, + "grad_norm": 0.13391923904418945, + "learning_rate": 9.659756415720038e-06, + "loss": 0.0049, + "step": 64330 + }, + { + "epoch": 0.41265677461974837, + "grad_norm": 0.21507328748703003, + "learning_rate": 9.659553447308114e-06, + "loss": 0.0041, + "step": 64340 + }, + { + "epoch": 0.41272091151353446, + "grad_norm": 0.22505846619606018, + "learning_rate": 9.659350420508656e-06, + "loss": 0.005, + "step": 64350 + }, + { + "epoch": 0.4127850484073206, + "grad_norm": 0.22557596862316132, + "learning_rate": 9.659147335324205e-06, + "loss": 0.0043, + "step": 64360 + }, + { + "epoch": 0.4128491853011067, + "grad_norm": 0.16148000955581665, + "learning_rate": 9.658944191757309e-06, + "loss": 0.0044, + "step": 64370 + }, + { + "epoch": 0.4129133221948928, + "grad_norm": 0.14673484861850739, + "learning_rate": 9.658740989810512e-06, + "loss": 0.0055, + "step": 64380 + }, + { + "epoch": 0.4129774590886789, + "grad_norm": 0.06320397555828094, + "learning_rate": 9.658537729486362e-06, + "loss": 0.0029, + "step": 64390 + }, + { + "epoch": 0.413041595982465, + "grad_norm": 0.15201781690120697, + "learning_rate": 9.658334410787405e-06, + "loss": 0.0035, + "step": 64400 + }, + { + "epoch": 0.4131057328762511, + "grad_norm": 0.3072662353515625, + "learning_rate": 9.658131033716187e-06, + "loss": 0.0047, + "step": 64410 + }, + { + "epoch": 0.41316986977003717, + "grad_norm": 0.10264178365468979, + "learning_rate": 9.65792759827526e-06, + "loss": 0.0031, + "step": 64420 + }, + { + "epoch": 0.41323400666382326, + "grad_norm": 0.17404109239578247, + "learning_rate": 9.65772410446717e-06, + "loss": 0.0097, + "step": 64430 + }, + { + "epoch": 0.41329814355760935, + "grad_norm": 0.35060179233551025, + "learning_rate": 9.657520552294469e-06, + "loss": 0.0041, + "step": 64440 + }, + { + "epoch": 0.41336228045139545, + "grad_norm": 0.2839616537094116, + "learning_rate": 9.657316941759705e-06, + "loss": 0.0062, + "step": 64450 + }, + { + "epoch": 0.41342641734518154, + "grad_norm": 0.21618971228599548, + "learning_rate": 9.657113272865433e-06, + "loss": 0.0026, + "step": 64460 + }, + { + "epoch": 0.41349055423896763, + "grad_norm": 0.37534889578819275, + "learning_rate": 9.656909545614201e-06, + "loss": 0.0081, + "step": 64470 + }, + { + "epoch": 0.4135546911327537, + "grad_norm": 0.176219180226326, + "learning_rate": 9.656705760008566e-06, + "loss": 0.0045, + "step": 64480 + }, + { + "epoch": 0.4136188280265398, + "grad_norm": 0.3384178876876831, + "learning_rate": 9.65650191605108e-06, + "loss": 0.0034, + "step": 64490 + }, + { + "epoch": 0.41368296492032597, + "grad_norm": 0.06688294559717178, + "learning_rate": 9.656298013744296e-06, + "loss": 0.0021, + "step": 64500 + }, + { + "epoch": 0.41374710181411206, + "grad_norm": 0.17957651615142822, + "learning_rate": 9.65609405309077e-06, + "loss": 0.0032, + "step": 64510 + }, + { + "epoch": 0.41381123870789815, + "grad_norm": 0.42804670333862305, + "learning_rate": 9.655890034093059e-06, + "loss": 0.0043, + "step": 64520 + }, + { + "epoch": 0.41387537560168425, + "grad_norm": 0.29949918389320374, + "learning_rate": 9.655685956753718e-06, + "loss": 0.0039, + "step": 64530 + }, + { + "epoch": 0.41393951249547034, + "grad_norm": 0.22192387282848358, + "learning_rate": 9.655481821075305e-06, + "loss": 0.0028, + "step": 64540 + }, + { + "epoch": 0.41400364938925643, + "grad_norm": 0.17723120748996735, + "learning_rate": 9.655277627060376e-06, + "loss": 0.0071, + "step": 64550 + }, + { + "epoch": 0.4140677862830425, + "grad_norm": 0.15555155277252197, + "learning_rate": 9.655073374711492e-06, + "loss": 0.0026, + "step": 64560 + }, + { + "epoch": 0.4141319231768286, + "grad_norm": 0.2917649745941162, + "learning_rate": 9.654869064031212e-06, + "loss": 0.0042, + "step": 64570 + }, + { + "epoch": 0.4141960600706147, + "grad_norm": 0.21928732097148895, + "learning_rate": 9.654664695022096e-06, + "loss": 0.0032, + "step": 64580 + }, + { + "epoch": 0.4142601969644008, + "grad_norm": 0.3652102053165436, + "learning_rate": 9.654460267686705e-06, + "loss": 0.0046, + "step": 64590 + }, + { + "epoch": 0.4143243338581869, + "grad_norm": 0.1732681393623352, + "learning_rate": 9.654255782027599e-06, + "loss": 0.0044, + "step": 64600 + }, + { + "epoch": 0.414388470751973, + "grad_norm": 0.0742977187037468, + "learning_rate": 9.654051238047343e-06, + "loss": 0.0026, + "step": 64610 + }, + { + "epoch": 0.4144526076457591, + "grad_norm": 0.2823348045349121, + "learning_rate": 9.653846635748498e-06, + "loss": 0.0085, + "step": 64620 + }, + { + "epoch": 0.4145167445395452, + "grad_norm": 0.4840265214443207, + "learning_rate": 9.65364197513363e-06, + "loss": 0.0055, + "step": 64630 + }, + { + "epoch": 0.4145808814333313, + "grad_norm": 0.2717534303665161, + "learning_rate": 9.6534372562053e-06, + "loss": 0.0057, + "step": 64640 + }, + { + "epoch": 0.4146450183271174, + "grad_norm": 0.3104613423347473, + "learning_rate": 9.653232478966076e-06, + "loss": 0.0063, + "step": 64650 + }, + { + "epoch": 0.4147091552209035, + "grad_norm": 0.25344496965408325, + "learning_rate": 9.653027643418525e-06, + "loss": 0.0042, + "step": 64660 + }, + { + "epoch": 0.4147732921146896, + "grad_norm": 0.059659067541360855, + "learning_rate": 9.65282274956521e-06, + "loss": 0.005, + "step": 64670 + }, + { + "epoch": 0.4148374290084757, + "grad_norm": 0.10441508889198303, + "learning_rate": 9.652617797408702e-06, + "loss": 0.0034, + "step": 64680 + }, + { + "epoch": 0.4149015659022618, + "grad_norm": 0.054133035242557526, + "learning_rate": 9.652412786951568e-06, + "loss": 0.0031, + "step": 64690 + }, + { + "epoch": 0.4149657027960479, + "grad_norm": 1.1095691919326782, + "learning_rate": 9.652207718196376e-06, + "loss": 0.0043, + "step": 64700 + }, + { + "epoch": 0.415029839689834, + "grad_norm": 0.10577990114688873, + "learning_rate": 9.652002591145697e-06, + "loss": 0.0048, + "step": 64710 + }, + { + "epoch": 0.4150939765836201, + "grad_norm": 0.19246532022953033, + "learning_rate": 9.651797405802099e-06, + "loss": 0.0029, + "step": 64720 + }, + { + "epoch": 0.41515811347740617, + "grad_norm": 0.1980627179145813, + "learning_rate": 9.651592162168157e-06, + "loss": 0.0046, + "step": 64730 + }, + { + "epoch": 0.41522225037119226, + "grad_norm": 0.11953790485858917, + "learning_rate": 9.651386860246442e-06, + "loss": 0.0042, + "step": 64740 + }, + { + "epoch": 0.41528638726497835, + "grad_norm": 0.12883557379245758, + "learning_rate": 9.651181500039522e-06, + "loss": 0.0054, + "step": 64750 + }, + { + "epoch": 0.41535052415876444, + "grad_norm": 0.23231658339500427, + "learning_rate": 9.650976081549975e-06, + "loss": 0.0027, + "step": 64760 + }, + { + "epoch": 0.41541466105255054, + "grad_norm": 0.11305467039346695, + "learning_rate": 9.650770604780375e-06, + "loss": 0.0051, + "step": 64770 + }, + { + "epoch": 0.4154787979463367, + "grad_norm": 0.10067601501941681, + "learning_rate": 9.650565069733293e-06, + "loss": 0.0049, + "step": 64780 + }, + { + "epoch": 0.4155429348401228, + "grad_norm": 0.23430085182189941, + "learning_rate": 9.650359476411309e-06, + "loss": 0.0059, + "step": 64790 + }, + { + "epoch": 0.4156070717339089, + "grad_norm": 0.1708226501941681, + "learning_rate": 9.650153824816995e-06, + "loss": 0.0033, + "step": 64800 + }, + { + "epoch": 0.41567120862769497, + "grad_norm": 0.1529286801815033, + "learning_rate": 9.649948114952932e-06, + "loss": 0.0055, + "step": 64810 + }, + { + "epoch": 0.41573534552148106, + "grad_norm": 0.01400473341345787, + "learning_rate": 9.649742346821696e-06, + "loss": 0.0049, + "step": 64820 + }, + { + "epoch": 0.41579948241526715, + "grad_norm": 0.08111272007226944, + "learning_rate": 9.649536520425864e-06, + "loss": 0.0041, + "step": 64830 + }, + { + "epoch": 0.41586361930905325, + "grad_norm": 0.1852743923664093, + "learning_rate": 9.649330635768017e-06, + "loss": 0.0034, + "step": 64840 + }, + { + "epoch": 0.41592775620283934, + "grad_norm": 0.20524005591869354, + "learning_rate": 9.649124692850736e-06, + "loss": 0.0038, + "step": 64850 + }, + { + "epoch": 0.41599189309662543, + "grad_norm": 0.18945936858654022, + "learning_rate": 9.648918691676597e-06, + "loss": 0.006, + "step": 64860 + }, + { + "epoch": 0.4160560299904115, + "grad_norm": 0.5125710964202881, + "learning_rate": 9.648712632248188e-06, + "loss": 0.0038, + "step": 64870 + }, + { + "epoch": 0.4161201668841976, + "grad_norm": 0.06856992095708847, + "learning_rate": 9.648506514568083e-06, + "loss": 0.0048, + "step": 64880 + }, + { + "epoch": 0.4161843037779837, + "grad_norm": 0.31569772958755493, + "learning_rate": 9.648300338638872e-06, + "loss": 0.0048, + "step": 64890 + }, + { + "epoch": 0.4162484406717698, + "grad_norm": 0.07696610689163208, + "learning_rate": 9.648094104463135e-06, + "loss": 0.0042, + "step": 64900 + }, + { + "epoch": 0.4163125775655559, + "grad_norm": 0.15830492973327637, + "learning_rate": 9.647887812043457e-06, + "loss": 0.0025, + "step": 64910 + }, + { + "epoch": 0.41637671445934205, + "grad_norm": 0.22576509416103363, + "learning_rate": 9.647681461382421e-06, + "loss": 0.0074, + "step": 64920 + }, + { + "epoch": 0.41644085135312814, + "grad_norm": 0.21804648637771606, + "learning_rate": 9.647475052482617e-06, + "loss": 0.0055, + "step": 64930 + }, + { + "epoch": 0.41650498824691423, + "grad_norm": 0.18545271456241608, + "learning_rate": 9.647268585346627e-06, + "loss": 0.0037, + "step": 64940 + }, + { + "epoch": 0.4165691251407003, + "grad_norm": 0.16034992039203644, + "learning_rate": 9.647062059977043e-06, + "loss": 0.0027, + "step": 64950 + }, + { + "epoch": 0.4166332620344864, + "grad_norm": 0.18599581718444824, + "learning_rate": 9.646855476376448e-06, + "loss": 0.0042, + "step": 64960 + }, + { + "epoch": 0.4166973989282725, + "grad_norm": 0.21841683983802795, + "learning_rate": 9.646648834547434e-06, + "loss": 0.004, + "step": 64970 + }, + { + "epoch": 0.4167615358220586, + "grad_norm": 0.06854478269815445, + "learning_rate": 9.646442134492589e-06, + "loss": 0.0045, + "step": 64980 + }, + { + "epoch": 0.4168256727158447, + "grad_norm": 0.100202277302742, + "learning_rate": 9.646235376214503e-06, + "loss": 0.0036, + "step": 64990 + }, + { + "epoch": 0.4168898096096308, + "grad_norm": 0.5943800210952759, + "learning_rate": 9.646028559715767e-06, + "loss": 0.0036, + "step": 65000 + }, + { + "epoch": 0.4169539465034169, + "grad_norm": 0.21876904368400574, + "learning_rate": 9.645821684998972e-06, + "loss": 0.0032, + "step": 65010 + }, + { + "epoch": 0.417018083397203, + "grad_norm": 0.1890275627374649, + "learning_rate": 9.645614752066715e-06, + "loss": 0.0036, + "step": 65020 + }, + { + "epoch": 0.41708222029098907, + "grad_norm": 0.3648800849914551, + "learning_rate": 9.64540776092158e-06, + "loss": 0.006, + "step": 65030 + }, + { + "epoch": 0.41714635718477516, + "grad_norm": 0.2900066077709198, + "learning_rate": 9.645200711566169e-06, + "loss": 0.0037, + "step": 65040 + }, + { + "epoch": 0.41721049407856126, + "grad_norm": 0.20067039132118225, + "learning_rate": 9.644993604003071e-06, + "loss": 0.0045, + "step": 65050 + }, + { + "epoch": 0.4172746309723474, + "grad_norm": 0.19734036922454834, + "learning_rate": 9.644786438234884e-06, + "loss": 0.0041, + "step": 65060 + }, + { + "epoch": 0.4173387678661335, + "grad_norm": 0.1580866575241089, + "learning_rate": 9.644579214264204e-06, + "loss": 0.0029, + "step": 65070 + }, + { + "epoch": 0.4174029047599196, + "grad_norm": 0.17017881572246552, + "learning_rate": 9.644371932093627e-06, + "loss": 0.0036, + "step": 65080 + }, + { + "epoch": 0.4174670416537057, + "grad_norm": 0.13641498982906342, + "learning_rate": 9.644164591725748e-06, + "loss": 0.0056, + "step": 65090 + }, + { + "epoch": 0.4175311785474918, + "grad_norm": 0.3891378939151764, + "learning_rate": 9.643957193163172e-06, + "loss": 0.0053, + "step": 65100 + }, + { + "epoch": 0.41759531544127787, + "grad_norm": 0.3308427035808563, + "learning_rate": 9.643749736408489e-06, + "loss": 0.0034, + "step": 65110 + }, + { + "epoch": 0.41765945233506396, + "grad_norm": 0.2427821010351181, + "learning_rate": 9.643542221464306e-06, + "loss": 0.0035, + "step": 65120 + }, + { + "epoch": 0.41772358922885006, + "grad_norm": 0.04089587926864624, + "learning_rate": 9.643334648333218e-06, + "loss": 0.0044, + "step": 65130 + }, + { + "epoch": 0.41778772612263615, + "grad_norm": 0.2266939878463745, + "learning_rate": 9.64312701701783e-06, + "loss": 0.0033, + "step": 65140 + }, + { + "epoch": 0.41785186301642224, + "grad_norm": 0.17413191497325897, + "learning_rate": 9.642919327520741e-06, + "loss": 0.0044, + "step": 65150 + }, + { + "epoch": 0.41791599991020834, + "grad_norm": 0.15931077301502228, + "learning_rate": 9.642711579844558e-06, + "loss": 0.0035, + "step": 65160 + }, + { + "epoch": 0.41798013680399443, + "grad_norm": 0.1775781512260437, + "learning_rate": 9.642503773991876e-06, + "loss": 0.0028, + "step": 65170 + }, + { + "epoch": 0.4180442736977805, + "grad_norm": 0.17191636562347412, + "learning_rate": 9.642295909965305e-06, + "loss": 0.0048, + "step": 65180 + }, + { + "epoch": 0.4181084105915666, + "grad_norm": 0.29047107696533203, + "learning_rate": 9.64208798776745e-06, + "loss": 0.0045, + "step": 65190 + }, + { + "epoch": 0.41817254748535276, + "grad_norm": 0.18347904086112976, + "learning_rate": 9.641880007400915e-06, + "loss": 0.0047, + "step": 65200 + }, + { + "epoch": 0.41823668437913886, + "grad_norm": 0.11868282407522202, + "learning_rate": 9.641671968868305e-06, + "loss": 0.0048, + "step": 65210 + }, + { + "epoch": 0.41830082127292495, + "grad_norm": 0.11434541642665863, + "learning_rate": 9.641463872172227e-06, + "loss": 0.003, + "step": 65220 + }, + { + "epoch": 0.41836495816671104, + "grad_norm": 0.30812957882881165, + "learning_rate": 9.64125571731529e-06, + "loss": 0.0066, + "step": 65230 + }, + { + "epoch": 0.41842909506049714, + "grad_norm": 0.11301054805517197, + "learning_rate": 9.641047504300103e-06, + "loss": 0.0064, + "step": 65240 + }, + { + "epoch": 0.41849323195428323, + "grad_norm": 0.24813212454319, + "learning_rate": 9.640839233129274e-06, + "loss": 0.0043, + "step": 65250 + }, + { + "epoch": 0.4185573688480693, + "grad_norm": 0.18483519554138184, + "learning_rate": 9.640630903805411e-06, + "loss": 0.0033, + "step": 65260 + }, + { + "epoch": 0.4186215057418554, + "grad_norm": 0.11620765924453735, + "learning_rate": 9.640422516331127e-06, + "loss": 0.0031, + "step": 65270 + }, + { + "epoch": 0.4186856426356415, + "grad_norm": 0.21796372532844543, + "learning_rate": 9.640214070709033e-06, + "loss": 0.0035, + "step": 65280 + }, + { + "epoch": 0.4187497795294276, + "grad_norm": 0.18819208443164825, + "learning_rate": 9.64000556694174e-06, + "loss": 0.0035, + "step": 65290 + }, + { + "epoch": 0.4188139164232137, + "grad_norm": 0.18838195502758026, + "learning_rate": 9.639797005031859e-06, + "loss": 0.0053, + "step": 65300 + }, + { + "epoch": 0.4188780533169998, + "grad_norm": 0.22085854411125183, + "learning_rate": 9.639588384982008e-06, + "loss": 0.0109, + "step": 65310 + }, + { + "epoch": 0.4189421902107859, + "grad_norm": 0.14810186624526978, + "learning_rate": 9.639379706794798e-06, + "loss": 0.0028, + "step": 65320 + }, + { + "epoch": 0.419006327104572, + "grad_norm": 0.24572016298770905, + "learning_rate": 9.639170970472845e-06, + "loss": 0.0037, + "step": 65330 + }, + { + "epoch": 0.4190704639983581, + "grad_norm": 0.28217583894729614, + "learning_rate": 9.638962176018762e-06, + "loss": 0.003, + "step": 65340 + }, + { + "epoch": 0.4191346008921442, + "grad_norm": 0.06116315349936485, + "learning_rate": 9.63875332343517e-06, + "loss": 0.0045, + "step": 65350 + }, + { + "epoch": 0.4191987377859303, + "grad_norm": 0.033075958490371704, + "learning_rate": 9.638544412724682e-06, + "loss": 0.0037, + "step": 65360 + }, + { + "epoch": 0.4192628746797164, + "grad_norm": 0.10180090367794037, + "learning_rate": 9.63833544388992e-06, + "loss": 0.0043, + "step": 65370 + }, + { + "epoch": 0.4193270115735025, + "grad_norm": 0.22493982315063477, + "learning_rate": 9.638126416933497e-06, + "loss": 0.003, + "step": 65380 + }, + { + "epoch": 0.4193911484672886, + "grad_norm": 0.1564272940158844, + "learning_rate": 9.637917331858037e-06, + "loss": 0.0034, + "step": 65390 + }, + { + "epoch": 0.4194552853610747, + "grad_norm": 0.1368493139743805, + "learning_rate": 9.637708188666156e-06, + "loss": 0.0039, + "step": 65400 + }, + { + "epoch": 0.4195194222548608, + "grad_norm": 0.16612175107002258, + "learning_rate": 9.637498987360479e-06, + "loss": 0.0046, + "step": 65410 + }, + { + "epoch": 0.41958355914864687, + "grad_norm": 0.17109805345535278, + "learning_rate": 9.637289727943625e-06, + "loss": 0.0036, + "step": 65420 + }, + { + "epoch": 0.41964769604243296, + "grad_norm": 0.08433777093887329, + "learning_rate": 9.637080410418215e-06, + "loss": 0.0061, + "step": 65430 + }, + { + "epoch": 0.41971183293621905, + "grad_norm": 0.30764898657798767, + "learning_rate": 9.636871034786876e-06, + "loss": 0.0061, + "step": 65440 + }, + { + "epoch": 0.41977596983000515, + "grad_norm": 0.08212987333536148, + "learning_rate": 9.636661601052227e-06, + "loss": 0.005, + "step": 65450 + }, + { + "epoch": 0.41984010672379124, + "grad_norm": 0.32802894711494446, + "learning_rate": 9.636452109216894e-06, + "loss": 0.0032, + "step": 65460 + }, + { + "epoch": 0.41990424361757733, + "grad_norm": 0.1307886391878128, + "learning_rate": 9.636242559283505e-06, + "loss": 0.0055, + "step": 65470 + }, + { + "epoch": 0.4199683805113635, + "grad_norm": 0.2157278209924698, + "learning_rate": 9.636032951254681e-06, + "loss": 0.0039, + "step": 65480 + }, + { + "epoch": 0.4200325174051496, + "grad_norm": 0.2503538429737091, + "learning_rate": 9.635823285133052e-06, + "loss": 0.0049, + "step": 65490 + }, + { + "epoch": 0.42009665429893567, + "grad_norm": 0.34493184089660645, + "learning_rate": 9.635613560921242e-06, + "loss": 0.0041, + "step": 65500 + }, + { + "epoch": 0.42016079119272176, + "grad_norm": 0.2957816421985626, + "learning_rate": 9.635403778621883e-06, + "loss": 0.0061, + "step": 65510 + }, + { + "epoch": 0.42022492808650785, + "grad_norm": 0.22828510403633118, + "learning_rate": 9.6351939382376e-06, + "loss": 0.0033, + "step": 65520 + }, + { + "epoch": 0.42028906498029395, + "grad_norm": 0.31681329011917114, + "learning_rate": 9.634984039771027e-06, + "loss": 0.0045, + "step": 65530 + }, + { + "epoch": 0.42035320187408004, + "grad_norm": 0.1677008718252182, + "learning_rate": 9.634774083224789e-06, + "loss": 0.0042, + "step": 65540 + }, + { + "epoch": 0.42041733876786613, + "grad_norm": 0.0993567481637001, + "learning_rate": 9.634564068601521e-06, + "loss": 0.0022, + "step": 65550 + }, + { + "epoch": 0.4204814756616522, + "grad_norm": 0.1264619082212448, + "learning_rate": 9.634353995903852e-06, + "loss": 0.0065, + "step": 65560 + }, + { + "epoch": 0.4205456125554383, + "grad_norm": 0.04126043617725372, + "learning_rate": 9.634143865134415e-06, + "loss": 0.0027, + "step": 65570 + }, + { + "epoch": 0.4206097494492244, + "grad_norm": 0.20881688594818115, + "learning_rate": 9.633933676295847e-06, + "loss": 0.003, + "step": 65580 + }, + { + "epoch": 0.4206738863430105, + "grad_norm": 0.20779581367969513, + "learning_rate": 9.633723429390773e-06, + "loss": 0.0045, + "step": 65590 + }, + { + "epoch": 0.4207380232367966, + "grad_norm": 0.18272095918655396, + "learning_rate": 9.633513124421834e-06, + "loss": 0.0043, + "step": 65600 + }, + { + "epoch": 0.4208021601305827, + "grad_norm": 0.1345992386341095, + "learning_rate": 9.633302761391665e-06, + "loss": 0.0039, + "step": 65610 + }, + { + "epoch": 0.4208662970243688, + "grad_norm": 0.275342732667923, + "learning_rate": 9.633092340302902e-06, + "loss": 0.003, + "step": 65620 + }, + { + "epoch": 0.42093043391815493, + "grad_norm": 0.27025195956230164, + "learning_rate": 9.632881861158179e-06, + "loss": 0.0051, + "step": 65630 + }, + { + "epoch": 0.420994570811941, + "grad_norm": 0.06833307445049286, + "learning_rate": 9.632671323960137e-06, + "loss": 0.0034, + "step": 65640 + }, + { + "epoch": 0.4210587077057271, + "grad_norm": 0.20985299348831177, + "learning_rate": 9.63246072871141e-06, + "loss": 0.0033, + "step": 65650 + }, + { + "epoch": 0.4211228445995132, + "grad_norm": 0.09783653914928436, + "learning_rate": 9.632250075414642e-06, + "loss": 0.0026, + "step": 65660 + }, + { + "epoch": 0.4211869814932993, + "grad_norm": 0.27587318420410156, + "learning_rate": 9.632039364072467e-06, + "loss": 0.0043, + "step": 65670 + }, + { + "epoch": 0.4212511183870854, + "grad_norm": 0.14510606229305267, + "learning_rate": 9.63182859468753e-06, + "loss": 0.0033, + "step": 65680 + }, + { + "epoch": 0.4213152552808715, + "grad_norm": 0.2852429449558258, + "learning_rate": 9.631617767262472e-06, + "loss": 0.0032, + "step": 65690 + }, + { + "epoch": 0.4213793921746576, + "grad_norm": 0.23456509411334991, + "learning_rate": 9.63140688179993e-06, + "loss": 0.0071, + "step": 65700 + }, + { + "epoch": 0.4214435290684437, + "grad_norm": 0.6252493262290955, + "learning_rate": 9.631195938302551e-06, + "loss": 0.0053, + "step": 65710 + }, + { + "epoch": 0.42150766596222977, + "grad_norm": 0.3259626030921936, + "learning_rate": 9.630984936772979e-06, + "loss": 0.0049, + "step": 65720 + }, + { + "epoch": 0.42157180285601586, + "grad_norm": 0.26559481024742126, + "learning_rate": 9.630773877213853e-06, + "loss": 0.0042, + "step": 65730 + }, + { + "epoch": 0.42163593974980196, + "grad_norm": 0.1573127657175064, + "learning_rate": 9.630562759627822e-06, + "loss": 0.0043, + "step": 65740 + }, + { + "epoch": 0.42170007664358805, + "grad_norm": 0.18279732763767242, + "learning_rate": 9.63035158401753e-06, + "loss": 0.0024, + "step": 65750 + }, + { + "epoch": 0.42176421353737414, + "grad_norm": 0.14254005253314972, + "learning_rate": 9.630140350385623e-06, + "loss": 0.0045, + "step": 65760 + }, + { + "epoch": 0.4218283504311603, + "grad_norm": 0.16114702820777893, + "learning_rate": 9.62992905873475e-06, + "loss": 0.003, + "step": 65770 + }, + { + "epoch": 0.4218924873249464, + "grad_norm": 0.4747255742549896, + "learning_rate": 9.629717709067557e-06, + "loss": 0.0072, + "step": 65780 + }, + { + "epoch": 0.4219566242187325, + "grad_norm": 0.1535458266735077, + "learning_rate": 9.629506301386689e-06, + "loss": 0.0047, + "step": 65790 + }, + { + "epoch": 0.42202076111251857, + "grad_norm": 0.15451142191886902, + "learning_rate": 9.629294835694801e-06, + "loss": 0.0057, + "step": 65800 + }, + { + "epoch": 0.42208489800630467, + "grad_norm": 0.1924429088830948, + "learning_rate": 9.629083311994541e-06, + "loss": 0.0047, + "step": 65810 + }, + { + "epoch": 0.42214903490009076, + "grad_norm": 0.05497288703918457, + "learning_rate": 9.628871730288555e-06, + "loss": 0.0042, + "step": 65820 + }, + { + "epoch": 0.42221317179387685, + "grad_norm": 0.2067718654870987, + "learning_rate": 9.628660090579503e-06, + "loss": 0.005, + "step": 65830 + }, + { + "epoch": 0.42227730868766294, + "grad_norm": 0.16331195831298828, + "learning_rate": 9.62844839287003e-06, + "loss": 0.0044, + "step": 65840 + }, + { + "epoch": 0.42234144558144904, + "grad_norm": 0.14620499312877655, + "learning_rate": 9.628236637162788e-06, + "loss": 0.0032, + "step": 65850 + }, + { + "epoch": 0.42240558247523513, + "grad_norm": 0.0732194036245346, + "learning_rate": 9.628024823460436e-06, + "loss": 0.0023, + "step": 65860 + }, + { + "epoch": 0.4224697193690212, + "grad_norm": 0.5353759527206421, + "learning_rate": 9.627812951765624e-06, + "loss": 0.0053, + "step": 65870 + }, + { + "epoch": 0.4225338562628073, + "grad_norm": 0.18316306173801422, + "learning_rate": 9.62760102208101e-06, + "loss": 0.0036, + "step": 65880 + }, + { + "epoch": 0.4225979931565934, + "grad_norm": 0.1757930964231491, + "learning_rate": 9.627389034409247e-06, + "loss": 0.0036, + "step": 65890 + }, + { + "epoch": 0.4226621300503795, + "grad_norm": 0.1982915848493576, + "learning_rate": 9.627176988752992e-06, + "loss": 0.0043, + "step": 65900 + }, + { + "epoch": 0.42272626694416565, + "grad_norm": 0.1785518229007721, + "learning_rate": 9.626964885114902e-06, + "loss": 0.0061, + "step": 65910 + }, + { + "epoch": 0.42279040383795174, + "grad_norm": 0.19323821365833282, + "learning_rate": 9.626752723497637e-06, + "loss": 0.0064, + "step": 65920 + }, + { + "epoch": 0.42285454073173784, + "grad_norm": 0.15979072451591492, + "learning_rate": 9.626540503903852e-06, + "loss": 0.004, + "step": 65930 + }, + { + "epoch": 0.42291867762552393, + "grad_norm": 0.10884411633014679, + "learning_rate": 9.626328226336209e-06, + "loss": 0.0066, + "step": 65940 + }, + { + "epoch": 0.42298281451931, + "grad_norm": 0.21978871524333954, + "learning_rate": 9.626115890797365e-06, + "loss": 0.004, + "step": 65950 + }, + { + "epoch": 0.4230469514130961, + "grad_norm": 0.10031398385763168, + "learning_rate": 9.625903497289984e-06, + "loss": 0.0029, + "step": 65960 + }, + { + "epoch": 0.4231110883068822, + "grad_norm": 0.10546796768903732, + "learning_rate": 9.625691045816726e-06, + "loss": 0.0042, + "step": 65970 + }, + { + "epoch": 0.4231752252006683, + "grad_norm": 0.11720538139343262, + "learning_rate": 9.625478536380252e-06, + "loss": 0.0058, + "step": 65980 + }, + { + "epoch": 0.4232393620944544, + "grad_norm": 0.044053900986909866, + "learning_rate": 9.625265968983228e-06, + "loss": 0.0031, + "step": 65990 + }, + { + "epoch": 0.4233034989882405, + "grad_norm": 0.29401299357414246, + "learning_rate": 9.625053343628316e-06, + "loss": 0.0053, + "step": 66000 + }, + { + "epoch": 0.4233676358820266, + "grad_norm": 0.11990197002887726, + "learning_rate": 9.62484066031818e-06, + "loss": 0.0048, + "step": 66010 + }, + { + "epoch": 0.4234317727758127, + "grad_norm": 0.23408041894435883, + "learning_rate": 9.624627919055484e-06, + "loss": 0.0041, + "step": 66020 + }, + { + "epoch": 0.42349590966959877, + "grad_norm": 0.16822808980941772, + "learning_rate": 9.624415119842895e-06, + "loss": 0.0032, + "step": 66030 + }, + { + "epoch": 0.42356004656338486, + "grad_norm": 0.3313102722167969, + "learning_rate": 9.62420226268308e-06, + "loss": 0.005, + "step": 66040 + }, + { + "epoch": 0.423624183457171, + "grad_norm": 0.21190787851810455, + "learning_rate": 9.623989347578706e-06, + "loss": 0.0029, + "step": 66050 + }, + { + "epoch": 0.4236883203509571, + "grad_norm": 0.1187615618109703, + "learning_rate": 9.623776374532442e-06, + "loss": 0.0078, + "step": 66060 + }, + { + "epoch": 0.4237524572447432, + "grad_norm": 0.20854294300079346, + "learning_rate": 9.623563343546954e-06, + "loss": 0.0039, + "step": 66070 + }, + { + "epoch": 0.4238165941385293, + "grad_norm": 0.23553693294525146, + "learning_rate": 9.62335025462491e-06, + "loss": 0.0047, + "step": 66080 + }, + { + "epoch": 0.4238807310323154, + "grad_norm": 0.042550235986709595, + "learning_rate": 9.623137107768987e-06, + "loss": 0.0044, + "step": 66090 + }, + { + "epoch": 0.4239448679261015, + "grad_norm": 0.12277571856975555, + "learning_rate": 9.62292390298185e-06, + "loss": 0.0039, + "step": 66100 + }, + { + "epoch": 0.42400900481988757, + "grad_norm": 0.24997268617153168, + "learning_rate": 9.622710640266175e-06, + "loss": 0.0053, + "step": 66110 + }, + { + "epoch": 0.42407314171367366, + "grad_norm": 0.11305372416973114, + "learning_rate": 9.62249731962463e-06, + "loss": 0.0045, + "step": 66120 + }, + { + "epoch": 0.42413727860745976, + "grad_norm": 0.3188675045967102, + "learning_rate": 9.62228394105989e-06, + "loss": 0.0035, + "step": 66130 + }, + { + "epoch": 0.42420141550124585, + "grad_norm": 0.15568873286247253, + "learning_rate": 9.62207050457463e-06, + "loss": 0.0042, + "step": 66140 + }, + { + "epoch": 0.42426555239503194, + "grad_norm": 0.3959549069404602, + "learning_rate": 9.621857010171523e-06, + "loss": 0.0045, + "step": 66150 + }, + { + "epoch": 0.42432968928881803, + "grad_norm": 0.11153994500637054, + "learning_rate": 9.621643457853243e-06, + "loss": 0.0048, + "step": 66160 + }, + { + "epoch": 0.42439382618260413, + "grad_norm": 0.08319900184869766, + "learning_rate": 9.621429847622467e-06, + "loss": 0.0032, + "step": 66170 + }, + { + "epoch": 0.4244579630763902, + "grad_norm": 0.2318873405456543, + "learning_rate": 9.621216179481874e-06, + "loss": 0.0062, + "step": 66180 + }, + { + "epoch": 0.42452209997017637, + "grad_norm": 0.16006936132907867, + "learning_rate": 9.621002453434138e-06, + "loss": 0.0054, + "step": 66190 + }, + { + "epoch": 0.42458623686396246, + "grad_norm": 0.24659541249275208, + "learning_rate": 9.62078866948194e-06, + "loss": 0.0031, + "step": 66200 + }, + { + "epoch": 0.42465037375774856, + "grad_norm": 0.0987880527973175, + "learning_rate": 9.620574827627957e-06, + "loss": 0.0038, + "step": 66210 + }, + { + "epoch": 0.42471451065153465, + "grad_norm": 0.18199047446250916, + "learning_rate": 9.62036092787487e-06, + "loss": 0.0044, + "step": 66220 + }, + { + "epoch": 0.42477864754532074, + "grad_norm": 0.16193823516368866, + "learning_rate": 9.620146970225357e-06, + "loss": 0.0042, + "step": 66230 + }, + { + "epoch": 0.42484278443910684, + "grad_norm": 0.32942768931388855, + "learning_rate": 9.6199329546821e-06, + "loss": 0.0044, + "step": 66240 + }, + { + "epoch": 0.42490692133289293, + "grad_norm": 0.30845940113067627, + "learning_rate": 9.619718881247784e-06, + "loss": 0.006, + "step": 66250 + }, + { + "epoch": 0.424971058226679, + "grad_norm": 0.1413591355085373, + "learning_rate": 9.619504749925086e-06, + "loss": 0.0048, + "step": 66260 + }, + { + "epoch": 0.4250351951204651, + "grad_norm": 0.019584035500884056, + "learning_rate": 9.619290560716694e-06, + "loss": 0.0022, + "step": 66270 + }, + { + "epoch": 0.4250993320142512, + "grad_norm": 0.02616703324019909, + "learning_rate": 9.619076313625289e-06, + "loss": 0.004, + "step": 66280 + }, + { + "epoch": 0.4251634689080373, + "grad_norm": 0.3452214300632477, + "learning_rate": 9.618862008653556e-06, + "loss": 0.0038, + "step": 66290 + }, + { + "epoch": 0.4252276058018234, + "grad_norm": 0.06459818035364151, + "learning_rate": 9.618647645804182e-06, + "loss": 0.0047, + "step": 66300 + }, + { + "epoch": 0.4252917426956095, + "grad_norm": 0.15330830216407776, + "learning_rate": 9.618433225079852e-06, + "loss": 0.0035, + "step": 66310 + }, + { + "epoch": 0.4253558795893956, + "grad_norm": 0.17286977171897888, + "learning_rate": 9.618218746483251e-06, + "loss": 0.0037, + "step": 66320 + }, + { + "epoch": 0.42542001648318173, + "grad_norm": 0.498504102230072, + "learning_rate": 9.618004210017072e-06, + "loss": 0.0046, + "step": 66330 + }, + { + "epoch": 0.4254841533769678, + "grad_norm": 0.043882135301828384, + "learning_rate": 9.617789615683998e-06, + "loss": 0.0043, + "step": 66340 + }, + { + "epoch": 0.4255482902707539, + "grad_norm": 0.15280118584632874, + "learning_rate": 9.61757496348672e-06, + "loss": 0.0039, + "step": 66350 + }, + { + "epoch": 0.42561242716454, + "grad_norm": 0.1655089557170868, + "learning_rate": 9.617360253427927e-06, + "loss": 0.0027, + "step": 66360 + }, + { + "epoch": 0.4256765640583261, + "grad_norm": 0.11018262803554535, + "learning_rate": 9.617145485510311e-06, + "loss": 0.0031, + "step": 66370 + }, + { + "epoch": 0.4257407009521122, + "grad_norm": 0.16910359263420105, + "learning_rate": 9.616930659736561e-06, + "loss": 0.0051, + "step": 66380 + }, + { + "epoch": 0.4258048378458983, + "grad_norm": 0.38406896591186523, + "learning_rate": 9.61671577610937e-06, + "loss": 0.0055, + "step": 66390 + }, + { + "epoch": 0.4258689747396844, + "grad_norm": 0.2939324378967285, + "learning_rate": 9.616500834631433e-06, + "loss": 0.0027, + "step": 66400 + }, + { + "epoch": 0.4259331116334705, + "grad_norm": 0.24132554233074188, + "learning_rate": 9.61628583530544e-06, + "loss": 0.003, + "step": 66410 + }, + { + "epoch": 0.42599724852725657, + "grad_norm": 0.3862217664718628, + "learning_rate": 9.616070778134085e-06, + "loss": 0.0046, + "step": 66420 + }, + { + "epoch": 0.42606138542104266, + "grad_norm": 0.2583540081977844, + "learning_rate": 9.615855663120066e-06, + "loss": 0.0038, + "step": 66430 + }, + { + "epoch": 0.42612552231482875, + "grad_norm": 0.07580406218767166, + "learning_rate": 9.615640490266074e-06, + "loss": 0.0042, + "step": 66440 + }, + { + "epoch": 0.42618965920861485, + "grad_norm": 0.22733516991138458, + "learning_rate": 9.615425259574812e-06, + "loss": 0.0039, + "step": 66450 + }, + { + "epoch": 0.42625379610240094, + "grad_norm": 0.25381848216056824, + "learning_rate": 9.61520997104897e-06, + "loss": 0.0045, + "step": 66460 + }, + { + "epoch": 0.4263179329961871, + "grad_norm": 0.107947438955307, + "learning_rate": 9.614994624691248e-06, + "loss": 0.0029, + "step": 66470 + }, + { + "epoch": 0.4263820698899732, + "grad_norm": 0.11911238729953766, + "learning_rate": 9.614779220504347e-06, + "loss": 0.0034, + "step": 66480 + }, + { + "epoch": 0.4264462067837593, + "grad_norm": 0.26806512475013733, + "learning_rate": 9.614563758490963e-06, + "loss": 0.0039, + "step": 66490 + }, + { + "epoch": 0.42651034367754537, + "grad_norm": 0.25328999757766724, + "learning_rate": 9.6143482386538e-06, + "loss": 0.0038, + "step": 66500 + }, + { + "epoch": 0.42657448057133146, + "grad_norm": 0.1805124580860138, + "learning_rate": 9.614132660995553e-06, + "loss": 0.0036, + "step": 66510 + }, + { + "epoch": 0.42663861746511755, + "grad_norm": 0.32967978715896606, + "learning_rate": 9.613917025518925e-06, + "loss": 0.0045, + "step": 66520 + }, + { + "epoch": 0.42670275435890365, + "grad_norm": 0.12399055808782578, + "learning_rate": 9.613701332226622e-06, + "loss": 0.0048, + "step": 66530 + }, + { + "epoch": 0.42676689125268974, + "grad_norm": 0.17118918895721436, + "learning_rate": 9.61348558112134e-06, + "loss": 0.0038, + "step": 66540 + }, + { + "epoch": 0.42683102814647583, + "grad_norm": 0.15596435964107513, + "learning_rate": 9.613269772205791e-06, + "loss": 0.0083, + "step": 66550 + }, + { + "epoch": 0.4268951650402619, + "grad_norm": 0.11981067806482315, + "learning_rate": 9.613053905482672e-06, + "loss": 0.0028, + "step": 66560 + }, + { + "epoch": 0.426959301934048, + "grad_norm": 0.2925792932510376, + "learning_rate": 9.612837980954692e-06, + "loss": 0.0056, + "step": 66570 + }, + { + "epoch": 0.4270234388278341, + "grad_norm": 0.07776892930269241, + "learning_rate": 9.612621998624554e-06, + "loss": 0.005, + "step": 66580 + }, + { + "epoch": 0.4270875757216202, + "grad_norm": 0.20200687646865845, + "learning_rate": 9.612405958494967e-06, + "loss": 0.0029, + "step": 66590 + }, + { + "epoch": 0.4271517126154063, + "grad_norm": 0.1193302720785141, + "learning_rate": 9.612189860568636e-06, + "loss": 0.0036, + "step": 66600 + }, + { + "epoch": 0.42721584950919245, + "grad_norm": 0.19039000570774078, + "learning_rate": 9.61197370484827e-06, + "loss": 0.0042, + "step": 66610 + }, + { + "epoch": 0.42727998640297854, + "grad_norm": 0.21106891334056854, + "learning_rate": 9.611757491336578e-06, + "loss": 0.0055, + "step": 66620 + }, + { + "epoch": 0.42734412329676463, + "grad_norm": 0.11191011965274811, + "learning_rate": 9.61154122003627e-06, + "loss": 0.0037, + "step": 66630 + }, + { + "epoch": 0.4274082601905507, + "grad_norm": 0.15065521001815796, + "learning_rate": 9.611324890950052e-06, + "loss": 0.0037, + "step": 66640 + }, + { + "epoch": 0.4274723970843368, + "grad_norm": 0.21732980012893677, + "learning_rate": 9.611108504080637e-06, + "loss": 0.0054, + "step": 66650 + }, + { + "epoch": 0.4275365339781229, + "grad_norm": 0.15280568599700928, + "learning_rate": 9.610892059430738e-06, + "loss": 0.0032, + "step": 66660 + }, + { + "epoch": 0.427600670871909, + "grad_norm": 0.14303740859031677, + "learning_rate": 9.610675557003067e-06, + "loss": 0.0106, + "step": 66670 + }, + { + "epoch": 0.4276648077656951, + "grad_norm": 0.30987024307250977, + "learning_rate": 9.610458996800336e-06, + "loss": 0.0051, + "step": 66680 + }, + { + "epoch": 0.4277289446594812, + "grad_norm": 0.16005373001098633, + "learning_rate": 9.610242378825258e-06, + "loss": 0.0045, + "step": 66690 + }, + { + "epoch": 0.4277930815532673, + "grad_norm": 0.23138290643692017, + "learning_rate": 9.61002570308055e-06, + "loss": 0.0036, + "step": 66700 + }, + { + "epoch": 0.4278572184470534, + "grad_norm": 0.29233992099761963, + "learning_rate": 9.609808969568922e-06, + "loss": 0.0039, + "step": 66710 + }, + { + "epoch": 0.42792135534083947, + "grad_norm": 0.256077378988266, + "learning_rate": 9.609592178293095e-06, + "loss": 0.0046, + "step": 66720 + }, + { + "epoch": 0.42798549223462556, + "grad_norm": 0.24460352957248688, + "learning_rate": 9.609375329255784e-06, + "loss": 0.0058, + "step": 66730 + }, + { + "epoch": 0.42804962912841166, + "grad_norm": 0.5737667083740234, + "learning_rate": 9.609158422459703e-06, + "loss": 0.0045, + "step": 66740 + }, + { + "epoch": 0.4281137660221978, + "grad_norm": 0.09366708993911743, + "learning_rate": 9.608941457907576e-06, + "loss": 0.0023, + "step": 66750 + }, + { + "epoch": 0.4281779029159839, + "grad_norm": 0.12595345079898834, + "learning_rate": 9.608724435602117e-06, + "loss": 0.0044, + "step": 66760 + }, + { + "epoch": 0.42824203980977, + "grad_norm": 0.27345553040504456, + "learning_rate": 9.608507355546048e-06, + "loss": 0.0031, + "step": 66770 + }, + { + "epoch": 0.4283061767035561, + "grad_norm": 0.261642187833786, + "learning_rate": 9.608290217742086e-06, + "loss": 0.0047, + "step": 66780 + }, + { + "epoch": 0.4283703135973422, + "grad_norm": 0.16929571330547333, + "learning_rate": 9.608073022192956e-06, + "loss": 0.0035, + "step": 66790 + }, + { + "epoch": 0.42843445049112827, + "grad_norm": 0.07420452684164047, + "learning_rate": 9.607855768901378e-06, + "loss": 0.0048, + "step": 66800 + }, + { + "epoch": 0.42849858738491436, + "grad_norm": 0.04938085377216339, + "learning_rate": 9.607638457870075e-06, + "loss": 0.0025, + "step": 66810 + }, + { + "epoch": 0.42856272427870046, + "grad_norm": 0.0963282585144043, + "learning_rate": 9.607421089101767e-06, + "loss": 0.0033, + "step": 66820 + }, + { + "epoch": 0.42862686117248655, + "grad_norm": 0.1891966015100479, + "learning_rate": 9.60720366259918e-06, + "loss": 0.0021, + "step": 66830 + }, + { + "epoch": 0.42869099806627264, + "grad_norm": 0.15832215547561646, + "learning_rate": 9.606986178365038e-06, + "loss": 0.0037, + "step": 66840 + }, + { + "epoch": 0.42875513496005874, + "grad_norm": 0.39101964235305786, + "learning_rate": 9.606768636402069e-06, + "loss": 0.0054, + "step": 66850 + }, + { + "epoch": 0.42881927185384483, + "grad_norm": 0.17650574445724487, + "learning_rate": 9.606551036712995e-06, + "loss": 0.0077, + "step": 66860 + }, + { + "epoch": 0.4288834087476309, + "grad_norm": 0.15891918540000916, + "learning_rate": 9.606333379300542e-06, + "loss": 0.0047, + "step": 66870 + }, + { + "epoch": 0.428947545641417, + "grad_norm": 0.18898074328899384, + "learning_rate": 9.606115664167443e-06, + "loss": 0.0026, + "step": 66880 + }, + { + "epoch": 0.42901168253520316, + "grad_norm": 0.05703195556998253, + "learning_rate": 9.605897891316422e-06, + "loss": 0.0038, + "step": 66890 + }, + { + "epoch": 0.42907581942898926, + "grad_norm": 0.24253630638122559, + "learning_rate": 9.605680060750208e-06, + "loss": 0.004, + "step": 66900 + }, + { + "epoch": 0.42913995632277535, + "grad_norm": 0.3023951053619385, + "learning_rate": 9.605462172471532e-06, + "loss": 0.0038, + "step": 66910 + }, + { + "epoch": 0.42920409321656144, + "grad_norm": 0.13640546798706055, + "learning_rate": 9.605244226483123e-06, + "loss": 0.005, + "step": 66920 + }, + { + "epoch": 0.42926823011034754, + "grad_norm": 0.2519367039203644, + "learning_rate": 9.605026222787712e-06, + "loss": 0.0038, + "step": 66930 + }, + { + "epoch": 0.42933236700413363, + "grad_norm": 0.26465660333633423, + "learning_rate": 9.604808161388034e-06, + "loss": 0.0032, + "step": 66940 + }, + { + "epoch": 0.4293965038979197, + "grad_norm": 0.16541948914527893, + "learning_rate": 9.604590042286814e-06, + "loss": 0.0039, + "step": 66950 + }, + { + "epoch": 0.4294606407917058, + "grad_norm": 0.20352678000926971, + "learning_rate": 9.604371865486793e-06, + "loss": 0.0037, + "step": 66960 + }, + { + "epoch": 0.4295247776854919, + "grad_norm": 0.323935866355896, + "learning_rate": 9.6041536309907e-06, + "loss": 0.0042, + "step": 66970 + }, + { + "epoch": 0.429588914579278, + "grad_norm": 0.22220510244369507, + "learning_rate": 9.603935338801275e-06, + "loss": 0.0039, + "step": 66980 + }, + { + "epoch": 0.4296530514730641, + "grad_norm": 0.20077809691429138, + "learning_rate": 9.603716988921249e-06, + "loss": 0.0046, + "step": 66990 + }, + { + "epoch": 0.4297171883668502, + "grad_norm": 0.05188808962702751, + "learning_rate": 9.603498581353355e-06, + "loss": 0.0047, + "step": 67000 + }, + { + "epoch": 0.4297813252606363, + "grad_norm": 0.1651322990655899, + "learning_rate": 9.603280116100336e-06, + "loss": 0.007, + "step": 67010 + }, + { + "epoch": 0.4298454621544224, + "grad_norm": 0.19030052423477173, + "learning_rate": 9.603061593164928e-06, + "loss": 0.0094, + "step": 67020 + }, + { + "epoch": 0.4299095990482085, + "grad_norm": 0.316009521484375, + "learning_rate": 9.602843012549867e-06, + "loss": 0.0039, + "step": 67030 + }, + { + "epoch": 0.4299737359419946, + "grad_norm": 0.05603104084730148, + "learning_rate": 9.602624374257897e-06, + "loss": 0.0031, + "step": 67040 + }, + { + "epoch": 0.4300378728357807, + "grad_norm": 0.1649898737668991, + "learning_rate": 9.602405678291751e-06, + "loss": 0.003, + "step": 67050 + }, + { + "epoch": 0.4301020097295668, + "grad_norm": 0.1882244497537613, + "learning_rate": 9.602186924654172e-06, + "loss": 0.0057, + "step": 67060 + }, + { + "epoch": 0.4301661466233529, + "grad_norm": 0.2369825690984726, + "learning_rate": 9.601968113347904e-06, + "loss": 0.0065, + "step": 67070 + }, + { + "epoch": 0.430230283517139, + "grad_norm": 0.24803709983825684, + "learning_rate": 9.601749244375684e-06, + "loss": 0.0038, + "step": 67080 + }, + { + "epoch": 0.4302944204109251, + "grad_norm": 0.2131635546684265, + "learning_rate": 9.601530317740258e-06, + "loss": 0.0044, + "step": 67090 + }, + { + "epoch": 0.4303585573047112, + "grad_norm": 0.05162657052278519, + "learning_rate": 9.60131133344437e-06, + "loss": 0.0046, + "step": 67100 + }, + { + "epoch": 0.43042269419849727, + "grad_norm": 0.11941119283437729, + "learning_rate": 9.601092291490761e-06, + "loss": 0.0047, + "step": 67110 + }, + { + "epoch": 0.43048683109228336, + "grad_norm": 0.3676694929599762, + "learning_rate": 9.600873191882178e-06, + "loss": 0.0037, + "step": 67120 + }, + { + "epoch": 0.43055096798606945, + "grad_norm": 0.1545587182044983, + "learning_rate": 9.600654034621366e-06, + "loss": 0.0046, + "step": 67130 + }, + { + "epoch": 0.43061510487985555, + "grad_norm": 0.21092180907726288, + "learning_rate": 9.600434819711068e-06, + "loss": 0.004, + "step": 67140 + }, + { + "epoch": 0.43067924177364164, + "grad_norm": 0.24385568499565125, + "learning_rate": 9.600215547154037e-06, + "loss": 0.0041, + "step": 67150 + }, + { + "epoch": 0.43074337866742773, + "grad_norm": 0.06852970272302628, + "learning_rate": 9.599996216953017e-06, + "loss": 0.0056, + "step": 67160 + }, + { + "epoch": 0.4308075155612138, + "grad_norm": 0.14821739494800568, + "learning_rate": 9.599776829110757e-06, + "loss": 0.0034, + "step": 67170 + }, + { + "epoch": 0.430871652455, + "grad_norm": 0.11472298949956894, + "learning_rate": 9.599557383630005e-06, + "loss": 0.0078, + "step": 67180 + }, + { + "epoch": 0.43093578934878607, + "grad_norm": 0.08596605807542801, + "learning_rate": 9.599337880513511e-06, + "loss": 0.0043, + "step": 67190 + }, + { + "epoch": 0.43099992624257216, + "grad_norm": 0.23955640196800232, + "learning_rate": 9.599118319764028e-06, + "loss": 0.0047, + "step": 67200 + }, + { + "epoch": 0.43106406313635826, + "grad_norm": 0.1155315563082695, + "learning_rate": 9.598898701384306e-06, + "loss": 0.0032, + "step": 67210 + }, + { + "epoch": 0.43112820003014435, + "grad_norm": 0.0761498361825943, + "learning_rate": 9.598679025377095e-06, + "loss": 0.0038, + "step": 67220 + }, + { + "epoch": 0.43119233692393044, + "grad_norm": 0.41123166680336, + "learning_rate": 9.59845929174515e-06, + "loss": 0.0028, + "step": 67230 + }, + { + "epoch": 0.43125647381771653, + "grad_norm": 0.05414069443941116, + "learning_rate": 9.598239500491222e-06, + "loss": 0.0039, + "step": 67240 + }, + { + "epoch": 0.43132061071150263, + "grad_norm": 0.10585710406303406, + "learning_rate": 9.598019651618068e-06, + "loss": 0.0042, + "step": 67250 + }, + { + "epoch": 0.4313847476052887, + "grad_norm": 0.09694528579711914, + "learning_rate": 9.597799745128442e-06, + "loss": 0.0043, + "step": 67260 + }, + { + "epoch": 0.4314488844990748, + "grad_norm": 0.09088045358657837, + "learning_rate": 9.597579781025098e-06, + "loss": 0.003, + "step": 67270 + }, + { + "epoch": 0.4315130213928609, + "grad_norm": 0.07393519580364227, + "learning_rate": 9.597359759310793e-06, + "loss": 0.0053, + "step": 67280 + }, + { + "epoch": 0.431577158286647, + "grad_norm": 0.23588484525680542, + "learning_rate": 9.597139679988287e-06, + "loss": 0.0023, + "step": 67290 + }, + { + "epoch": 0.4316412951804331, + "grad_norm": 0.13709813356399536, + "learning_rate": 9.596919543060334e-06, + "loss": 0.004, + "step": 67300 + }, + { + "epoch": 0.4317054320742192, + "grad_norm": 0.27046436071395874, + "learning_rate": 9.596699348529695e-06, + "loss": 0.0054, + "step": 67310 + }, + { + "epoch": 0.43176956896800534, + "grad_norm": 0.1580420732498169, + "learning_rate": 9.596479096399125e-06, + "loss": 0.0063, + "step": 67320 + }, + { + "epoch": 0.43183370586179143, + "grad_norm": 0.4832228124141693, + "learning_rate": 9.596258786671389e-06, + "loss": 0.0039, + "step": 67330 + }, + { + "epoch": 0.4318978427555775, + "grad_norm": 0.11211079359054565, + "learning_rate": 9.596038419349246e-06, + "loss": 0.0031, + "step": 67340 + }, + { + "epoch": 0.4319619796493636, + "grad_norm": 0.22395701706409454, + "learning_rate": 9.595817994435454e-06, + "loss": 0.0065, + "step": 67350 + }, + { + "epoch": 0.4320261165431497, + "grad_norm": 0.1439417004585266, + "learning_rate": 9.595597511932782e-06, + "loss": 0.003, + "step": 67360 + }, + { + "epoch": 0.4320902534369358, + "grad_norm": 0.06668273359537125, + "learning_rate": 9.595376971843986e-06, + "loss": 0.0043, + "step": 67370 + }, + { + "epoch": 0.4321543903307219, + "grad_norm": 0.11449944972991943, + "learning_rate": 9.595156374171833e-06, + "loss": 0.0031, + "step": 67380 + }, + { + "epoch": 0.432218527224508, + "grad_norm": 0.13027827441692352, + "learning_rate": 9.594935718919086e-06, + "loss": 0.0026, + "step": 67390 + }, + { + "epoch": 0.4322826641182941, + "grad_norm": 0.3017429709434509, + "learning_rate": 9.594715006088511e-06, + "loss": 0.0033, + "step": 67400 + }, + { + "epoch": 0.4323468010120802, + "grad_norm": 0.15744541585445404, + "learning_rate": 9.594494235682873e-06, + "loss": 0.004, + "step": 67410 + }, + { + "epoch": 0.43241093790586627, + "grad_norm": 0.162835031747818, + "learning_rate": 9.594273407704938e-06, + "loss": 0.0049, + "step": 67420 + }, + { + "epoch": 0.43247507479965236, + "grad_norm": 0.1257157027721405, + "learning_rate": 9.594052522157476e-06, + "loss": 0.0026, + "step": 67430 + }, + { + "epoch": 0.43253921169343845, + "grad_norm": 0.14831848442554474, + "learning_rate": 9.59383157904325e-06, + "loss": 0.0048, + "step": 67440 + }, + { + "epoch": 0.43260334858722455, + "grad_norm": 0.1549537628889084, + "learning_rate": 9.593610578365033e-06, + "loss": 0.0039, + "step": 67450 + }, + { + "epoch": 0.4326674854810107, + "grad_norm": 0.07446286827325821, + "learning_rate": 9.593389520125591e-06, + "loss": 0.0077, + "step": 67460 + }, + { + "epoch": 0.4327316223747968, + "grad_norm": 0.33825135231018066, + "learning_rate": 9.593168404327697e-06, + "loss": 0.0031, + "step": 67470 + }, + { + "epoch": 0.4327957592685829, + "grad_norm": 0.18623273074626923, + "learning_rate": 9.592947230974119e-06, + "loss": 0.0042, + "step": 67480 + }, + { + "epoch": 0.432859896162369, + "grad_norm": 0.0629829540848732, + "learning_rate": 9.592726000067629e-06, + "loss": 0.0027, + "step": 67490 + }, + { + "epoch": 0.43292403305615507, + "grad_norm": 0.07424663752317429, + "learning_rate": 9.592504711611001e-06, + "loss": 0.0043, + "step": 67500 + }, + { + "epoch": 0.43298816994994116, + "grad_norm": 0.1887056678533554, + "learning_rate": 9.592283365607008e-06, + "loss": 0.0039, + "step": 67510 + }, + { + "epoch": 0.43305230684372725, + "grad_norm": 0.11508423835039139, + "learning_rate": 9.592061962058418e-06, + "loss": 0.0032, + "step": 67520 + }, + { + "epoch": 0.43311644373751335, + "grad_norm": 0.24281243979930878, + "learning_rate": 9.591840500968014e-06, + "loss": 0.0035, + "step": 67530 + }, + { + "epoch": 0.43318058063129944, + "grad_norm": 0.11407119780778885, + "learning_rate": 9.591618982338565e-06, + "loss": 0.0015, + "step": 67540 + }, + { + "epoch": 0.43324471752508553, + "grad_norm": 0.16879330575466156, + "learning_rate": 9.591397406172848e-06, + "loss": 0.0033, + "step": 67550 + }, + { + "epoch": 0.4333088544188716, + "grad_norm": 0.23465494811534882, + "learning_rate": 9.591175772473642e-06, + "loss": 0.0034, + "step": 67560 + }, + { + "epoch": 0.4333729913126577, + "grad_norm": 0.13142143189907074, + "learning_rate": 9.590954081243722e-06, + "loss": 0.0044, + "step": 67570 + }, + { + "epoch": 0.4334371282064438, + "grad_norm": 0.15959471464157104, + "learning_rate": 9.590732332485865e-06, + "loss": 0.0036, + "step": 67580 + }, + { + "epoch": 0.4335012651002299, + "grad_norm": 0.26229506731033325, + "learning_rate": 9.590510526202852e-06, + "loss": 0.0048, + "step": 67590 + }, + { + "epoch": 0.43356540199401605, + "grad_norm": 0.39532706141471863, + "learning_rate": 9.590288662397462e-06, + "loss": 0.0033, + "step": 67600 + }, + { + "epoch": 0.43362953888780215, + "grad_norm": 0.1433018445968628, + "learning_rate": 9.590066741072472e-06, + "loss": 0.0032, + "step": 67610 + }, + { + "epoch": 0.43369367578158824, + "grad_norm": 0.771902859210968, + "learning_rate": 9.589844762230666e-06, + "loss": 0.0045, + "step": 67620 + }, + { + "epoch": 0.43375781267537433, + "grad_norm": 0.3603050112724304, + "learning_rate": 9.589622725874827e-06, + "loss": 0.0036, + "step": 67630 + }, + { + "epoch": 0.4338219495691604, + "grad_norm": 0.04795801639556885, + "learning_rate": 9.589400632007733e-06, + "loss": 0.0023, + "step": 67640 + }, + { + "epoch": 0.4338860864629465, + "grad_norm": 0.2554285228252411, + "learning_rate": 9.589178480632169e-06, + "loss": 0.0032, + "step": 67650 + }, + { + "epoch": 0.4339502233567326, + "grad_norm": 0.08371347934007645, + "learning_rate": 9.58895627175092e-06, + "loss": 0.0014, + "step": 67660 + }, + { + "epoch": 0.4340143602505187, + "grad_norm": 0.3796055018901825, + "learning_rate": 9.588734005366768e-06, + "loss": 0.0032, + "step": 67670 + }, + { + "epoch": 0.4340784971443048, + "grad_norm": 0.20644047856330872, + "learning_rate": 9.588511681482499e-06, + "loss": 0.0047, + "step": 67680 + }, + { + "epoch": 0.4341426340380909, + "grad_norm": 0.23971392214298248, + "learning_rate": 9.588289300100901e-06, + "loss": 0.0063, + "step": 67690 + }, + { + "epoch": 0.434206770931877, + "grad_norm": 0.15959949791431427, + "learning_rate": 9.588066861224758e-06, + "loss": 0.003, + "step": 67700 + }, + { + "epoch": 0.4342709078256631, + "grad_norm": 0.3129645884037018, + "learning_rate": 9.58784436485686e-06, + "loss": 0.0075, + "step": 67710 + }, + { + "epoch": 0.43433504471944917, + "grad_norm": 0.18099181354045868, + "learning_rate": 9.58762181099999e-06, + "loss": 0.0032, + "step": 67720 + }, + { + "epoch": 0.43439918161323526, + "grad_norm": 0.3097049295902252, + "learning_rate": 9.587399199656941e-06, + "loss": 0.0118, + "step": 67730 + }, + { + "epoch": 0.4344633185070214, + "grad_norm": 0.2578749656677246, + "learning_rate": 9.587176530830503e-06, + "loss": 0.0046, + "step": 67740 + }, + { + "epoch": 0.4345274554008075, + "grad_norm": 0.19821570813655853, + "learning_rate": 9.586953804523465e-06, + "loss": 0.0029, + "step": 67750 + }, + { + "epoch": 0.4345915922945936, + "grad_norm": 0.2031194269657135, + "learning_rate": 9.586731020738615e-06, + "loss": 0.0039, + "step": 67760 + }, + { + "epoch": 0.4346557291883797, + "grad_norm": 0.16306443512439728, + "learning_rate": 9.586508179478749e-06, + "loss": 0.0029, + "step": 67770 + }, + { + "epoch": 0.4347198660821658, + "grad_norm": 0.12638349831104279, + "learning_rate": 9.586285280746657e-06, + "loss": 0.003, + "step": 67780 + }, + { + "epoch": 0.4347840029759519, + "grad_norm": 0.23570360243320465, + "learning_rate": 9.586062324545131e-06, + "loss": 0.0038, + "step": 67790 + }, + { + "epoch": 0.43484813986973797, + "grad_norm": 0.17586968839168549, + "learning_rate": 9.585839310876969e-06, + "loss": 0.0027, + "step": 67800 + }, + { + "epoch": 0.43491227676352406, + "grad_norm": 0.1429784595966339, + "learning_rate": 9.585616239744963e-06, + "loss": 0.0035, + "step": 67810 + }, + { + "epoch": 0.43497641365731016, + "grad_norm": 0.17840762436389923, + "learning_rate": 9.585393111151908e-06, + "loss": 0.0034, + "step": 67820 + }, + { + "epoch": 0.43504055055109625, + "grad_norm": 0.04029030352830887, + "learning_rate": 9.585169925100599e-06, + "loss": 0.0044, + "step": 67830 + }, + { + "epoch": 0.43510468744488234, + "grad_norm": 0.2338280975818634, + "learning_rate": 9.584946681593834e-06, + "loss": 0.0038, + "step": 67840 + }, + { + "epoch": 0.43516882433866844, + "grad_norm": 0.36239415407180786, + "learning_rate": 9.58472338063441e-06, + "loss": 0.0021, + "step": 67850 + }, + { + "epoch": 0.43523296123245453, + "grad_norm": 0.2979362905025482, + "learning_rate": 9.584500022225128e-06, + "loss": 0.0057, + "step": 67860 + }, + { + "epoch": 0.4352970981262406, + "grad_norm": 0.2265724241733551, + "learning_rate": 9.584276606368781e-06, + "loss": 0.0047, + "step": 67870 + }, + { + "epoch": 0.43536123502002677, + "grad_norm": 0.09479842334985733, + "learning_rate": 9.584053133068173e-06, + "loss": 0.0028, + "step": 67880 + }, + { + "epoch": 0.43542537191381286, + "grad_norm": 0.08840498328208923, + "learning_rate": 9.583829602326104e-06, + "loss": 0.0027, + "step": 67890 + }, + { + "epoch": 0.43548950880759896, + "grad_norm": 0.10118495672941208, + "learning_rate": 9.583606014145373e-06, + "loss": 0.0054, + "step": 67900 + }, + { + "epoch": 0.43555364570138505, + "grad_norm": 0.3275286555290222, + "learning_rate": 9.583382368528781e-06, + "loss": 0.0027, + "step": 67910 + }, + { + "epoch": 0.43561778259517114, + "grad_norm": 0.10471635311841965, + "learning_rate": 9.583158665479135e-06, + "loss": 0.0033, + "step": 67920 + }, + { + "epoch": 0.43568191948895724, + "grad_norm": 0.17559470236301422, + "learning_rate": 9.582934904999234e-06, + "loss": 0.0038, + "step": 67930 + }, + { + "epoch": 0.43574605638274333, + "grad_norm": 0.09734024107456207, + "learning_rate": 9.582711087091884e-06, + "loss": 0.0036, + "step": 67940 + }, + { + "epoch": 0.4358101932765294, + "grad_norm": 0.12025992572307587, + "learning_rate": 9.58248721175989e-06, + "loss": 0.0043, + "step": 67950 + }, + { + "epoch": 0.4358743301703155, + "grad_norm": 0.38899630308151245, + "learning_rate": 9.582263279006055e-06, + "loss": 0.0033, + "step": 67960 + }, + { + "epoch": 0.4359384670641016, + "grad_norm": 0.24911749362945557, + "learning_rate": 9.582039288833187e-06, + "loss": 0.0056, + "step": 67970 + }, + { + "epoch": 0.4360026039578877, + "grad_norm": 0.14373457431793213, + "learning_rate": 9.58181524124409e-06, + "loss": 0.0041, + "step": 67980 + }, + { + "epoch": 0.4360667408516738, + "grad_norm": 0.49666643142700195, + "learning_rate": 9.581591136241575e-06, + "loss": 0.0036, + "step": 67990 + }, + { + "epoch": 0.4361308777454599, + "grad_norm": 0.2246396541595459, + "learning_rate": 9.581366973828448e-06, + "loss": 0.0023, + "step": 68000 + }, + { + "epoch": 0.436195014639246, + "grad_norm": 0.23568378388881683, + "learning_rate": 9.58114275400752e-06, + "loss": 0.0033, + "step": 68010 + }, + { + "epoch": 0.43625915153303213, + "grad_norm": 0.10890336334705353, + "learning_rate": 9.580918476781598e-06, + "loss": 0.0035, + "step": 68020 + }, + { + "epoch": 0.4363232884268182, + "grad_norm": 0.045626670122146606, + "learning_rate": 9.580694142153492e-06, + "loss": 0.0036, + "step": 68030 + }, + { + "epoch": 0.4363874253206043, + "grad_norm": 0.18727976083755493, + "learning_rate": 9.580469750126018e-06, + "loss": 0.0027, + "step": 68040 + }, + { + "epoch": 0.4364515622143904, + "grad_norm": 0.23157556354999542, + "learning_rate": 9.580245300701982e-06, + "loss": 0.0058, + "step": 68050 + }, + { + "epoch": 0.4365156991081765, + "grad_norm": 0.13583151996135712, + "learning_rate": 9.5800207938842e-06, + "loss": 0.0045, + "step": 68060 + }, + { + "epoch": 0.4365798360019626, + "grad_norm": 0.14473675191402435, + "learning_rate": 9.579796229675482e-06, + "loss": 0.0028, + "step": 68070 + }, + { + "epoch": 0.4366439728957487, + "grad_norm": 0.295076459646225, + "learning_rate": 9.579571608078647e-06, + "loss": 0.0045, + "step": 68080 + }, + { + "epoch": 0.4367081097895348, + "grad_norm": 0.04209938272833824, + "learning_rate": 9.579346929096505e-06, + "loss": 0.0038, + "step": 68090 + }, + { + "epoch": 0.4367722466833209, + "grad_norm": 0.22433242201805115, + "learning_rate": 9.579122192731874e-06, + "loss": 0.004, + "step": 68100 + }, + { + "epoch": 0.43683638357710697, + "grad_norm": 0.08067035675048828, + "learning_rate": 9.57889739898757e-06, + "loss": 0.0042, + "step": 68110 + }, + { + "epoch": 0.43690052047089306, + "grad_norm": 0.10899264365434647, + "learning_rate": 9.578672547866408e-06, + "loss": 0.0103, + "step": 68120 + }, + { + "epoch": 0.43696465736467915, + "grad_norm": 0.4292784631252289, + "learning_rate": 9.578447639371207e-06, + "loss": 0.0044, + "step": 68130 + }, + { + "epoch": 0.43702879425846525, + "grad_norm": 0.5548121333122253, + "learning_rate": 9.578222673504785e-06, + "loss": 0.0048, + "step": 68140 + }, + { + "epoch": 0.43709293115225134, + "grad_norm": 0.23005898296833038, + "learning_rate": 9.577997650269959e-06, + "loss": 0.0054, + "step": 68150 + }, + { + "epoch": 0.4371570680460375, + "grad_norm": 0.11167255789041519, + "learning_rate": 9.577772569669552e-06, + "loss": 0.0052, + "step": 68160 + }, + { + "epoch": 0.4372212049398236, + "grad_norm": 0.056567851454019547, + "learning_rate": 9.577547431706384e-06, + "loss": 0.0053, + "step": 68170 + }, + { + "epoch": 0.4372853418336097, + "grad_norm": 0.29500824213027954, + "learning_rate": 9.577322236383276e-06, + "loss": 0.0045, + "step": 68180 + }, + { + "epoch": 0.43734947872739577, + "grad_norm": 0.14376701414585114, + "learning_rate": 9.577096983703046e-06, + "loss": 0.0047, + "step": 68190 + }, + { + "epoch": 0.43741361562118186, + "grad_norm": 0.15609560906887054, + "learning_rate": 9.57687167366852e-06, + "loss": 0.002, + "step": 68200 + }, + { + "epoch": 0.43747775251496795, + "grad_norm": 0.09901092201471329, + "learning_rate": 9.576646306282523e-06, + "loss": 0.0047, + "step": 68210 + }, + { + "epoch": 0.43754188940875405, + "grad_norm": 0.24435746669769287, + "learning_rate": 9.576420881547875e-06, + "loss": 0.0045, + "step": 68220 + }, + { + "epoch": 0.43760602630254014, + "grad_norm": 0.15269044041633606, + "learning_rate": 9.576195399467404e-06, + "loss": 0.0032, + "step": 68230 + }, + { + "epoch": 0.43767016319632623, + "grad_norm": 0.15219302475452423, + "learning_rate": 9.575969860043934e-06, + "loss": 0.0058, + "step": 68240 + }, + { + "epoch": 0.4377343000901123, + "grad_norm": 0.2758413255214691, + "learning_rate": 9.57574426328029e-06, + "loss": 0.0027, + "step": 68250 + }, + { + "epoch": 0.4377984369838984, + "grad_norm": 0.35995030403137207, + "learning_rate": 9.575518609179302e-06, + "loss": 0.003, + "step": 68260 + }, + { + "epoch": 0.4378625738776845, + "grad_norm": 0.13239847123622894, + "learning_rate": 9.575292897743793e-06, + "loss": 0.0035, + "step": 68270 + }, + { + "epoch": 0.4379267107714706, + "grad_norm": 0.07212992012500763, + "learning_rate": 9.575067128976596e-06, + "loss": 0.0038, + "step": 68280 + }, + { + "epoch": 0.4379908476652567, + "grad_norm": 0.2119167298078537, + "learning_rate": 9.574841302880538e-06, + "loss": 0.0032, + "step": 68290 + }, + { + "epoch": 0.43805498455904285, + "grad_norm": 0.24278458952903748, + "learning_rate": 9.574615419458448e-06, + "loss": 0.0035, + "step": 68300 + }, + { + "epoch": 0.43811912145282894, + "grad_norm": 0.17022500932216644, + "learning_rate": 9.57438947871316e-06, + "loss": 0.0048, + "step": 68310 + }, + { + "epoch": 0.43818325834661503, + "grad_norm": 0.1462307572364807, + "learning_rate": 9.5741634806475e-06, + "loss": 0.0054, + "step": 68320 + }, + { + "epoch": 0.4382473952404011, + "grad_norm": 0.16263353824615479, + "learning_rate": 9.573937425264304e-06, + "loss": 0.0029, + "step": 68330 + }, + { + "epoch": 0.4383115321341872, + "grad_norm": 0.1209472119808197, + "learning_rate": 9.5737113125664e-06, + "loss": 0.0031, + "step": 68340 + }, + { + "epoch": 0.4383756690279733, + "grad_norm": 0.14892970025539398, + "learning_rate": 9.573485142556629e-06, + "loss": 0.0025, + "step": 68350 + }, + { + "epoch": 0.4384398059217594, + "grad_norm": 0.06195222958922386, + "learning_rate": 9.573258915237818e-06, + "loss": 0.0039, + "step": 68360 + }, + { + "epoch": 0.4385039428155455, + "grad_norm": 0.35005781054496765, + "learning_rate": 9.573032630612804e-06, + "loss": 0.0041, + "step": 68370 + }, + { + "epoch": 0.4385680797093316, + "grad_norm": 0.15759289264678955, + "learning_rate": 9.572806288684425e-06, + "loss": 0.0045, + "step": 68380 + }, + { + "epoch": 0.4386322166031177, + "grad_norm": 0.07934365421533585, + "learning_rate": 9.572579889455513e-06, + "loss": 0.0047, + "step": 68390 + }, + { + "epoch": 0.4386963534969038, + "grad_norm": 0.2578205168247223, + "learning_rate": 9.572353432928907e-06, + "loss": 0.0044, + "step": 68400 + }, + { + "epoch": 0.4387604903906899, + "grad_norm": 0.1401975154876709, + "learning_rate": 9.572126919107445e-06, + "loss": 0.0056, + "step": 68410 + }, + { + "epoch": 0.43882462728447597, + "grad_norm": 0.29037439823150635, + "learning_rate": 9.571900347993965e-06, + "loss": 0.0031, + "step": 68420 + }, + { + "epoch": 0.43888876417826206, + "grad_norm": 0.22732864320278168, + "learning_rate": 9.571673719591307e-06, + "loss": 0.0032, + "step": 68430 + }, + { + "epoch": 0.4389529010720482, + "grad_norm": 0.1276884377002716, + "learning_rate": 9.571447033902309e-06, + "loss": 0.0029, + "step": 68440 + }, + { + "epoch": 0.4390170379658343, + "grad_norm": 0.21823787689208984, + "learning_rate": 9.571220290929812e-06, + "loss": 0.0037, + "step": 68450 + }, + { + "epoch": 0.4390811748596204, + "grad_norm": 0.10427073389291763, + "learning_rate": 9.570993490676658e-06, + "loss": 0.0027, + "step": 68460 + }, + { + "epoch": 0.4391453117534065, + "grad_norm": 0.4085180163383484, + "learning_rate": 9.57076663314569e-06, + "loss": 0.0047, + "step": 68470 + }, + { + "epoch": 0.4392094486471926, + "grad_norm": 0.12851525843143463, + "learning_rate": 9.57053971833975e-06, + "loss": 0.0096, + "step": 68480 + }, + { + "epoch": 0.4392735855409787, + "grad_norm": 0.15416646003723145, + "learning_rate": 9.570312746261678e-06, + "loss": 0.0046, + "step": 68490 + }, + { + "epoch": 0.43933772243476477, + "grad_norm": 0.2808114290237427, + "learning_rate": 9.570085716914323e-06, + "loss": 0.0034, + "step": 68500 + }, + { + "epoch": 0.43940185932855086, + "grad_norm": 0.13371069729328156, + "learning_rate": 9.569858630300528e-06, + "loss": 0.0041, + "step": 68510 + }, + { + "epoch": 0.43946599622233695, + "grad_norm": 0.05387774109840393, + "learning_rate": 9.569631486423138e-06, + "loss": 0.0035, + "step": 68520 + }, + { + "epoch": 0.43953013311612305, + "grad_norm": 0.24315620958805084, + "learning_rate": 9.569404285284999e-06, + "loss": 0.0068, + "step": 68530 + }, + { + "epoch": 0.43959427000990914, + "grad_norm": 0.044655054807662964, + "learning_rate": 9.569177026888958e-06, + "loss": 0.0033, + "step": 68540 + }, + { + "epoch": 0.43965840690369523, + "grad_norm": 0.04376442730426788, + "learning_rate": 9.568949711237865e-06, + "loss": 0.0031, + "step": 68550 + }, + { + "epoch": 0.4397225437974813, + "grad_norm": 0.24665457010269165, + "learning_rate": 9.568722338334567e-06, + "loss": 0.0053, + "step": 68560 + }, + { + "epoch": 0.4397866806912674, + "grad_norm": 0.08923192322254181, + "learning_rate": 9.568494908181911e-06, + "loss": 0.0065, + "step": 68570 + }, + { + "epoch": 0.4398508175850535, + "grad_norm": 0.20345203578472137, + "learning_rate": 9.568267420782749e-06, + "loss": 0.0035, + "step": 68580 + }, + { + "epoch": 0.43991495447883966, + "grad_norm": 0.22325508296489716, + "learning_rate": 9.568039876139932e-06, + "loss": 0.0041, + "step": 68590 + }, + { + "epoch": 0.43997909137262575, + "grad_norm": 0.3755197823047638, + "learning_rate": 9.56781227425631e-06, + "loss": 0.0032, + "step": 68600 + }, + { + "epoch": 0.44004322826641185, + "grad_norm": 0.23552502691745758, + "learning_rate": 9.567584615134738e-06, + "loss": 0.0051, + "step": 68610 + }, + { + "epoch": 0.44010736516019794, + "grad_norm": 0.30500590801239014, + "learning_rate": 9.567356898778064e-06, + "loss": 0.0061, + "step": 68620 + }, + { + "epoch": 0.44017150205398403, + "grad_norm": 0.28967979550361633, + "learning_rate": 9.567129125189143e-06, + "loss": 0.0048, + "step": 68630 + }, + { + "epoch": 0.4402356389477701, + "grad_norm": 0.13767173886299133, + "learning_rate": 9.566901294370832e-06, + "loss": 0.004, + "step": 68640 + }, + { + "epoch": 0.4402997758415562, + "grad_norm": 0.1392887532711029, + "learning_rate": 9.566673406325983e-06, + "loss": 0.002, + "step": 68650 + }, + { + "epoch": 0.4403639127353423, + "grad_norm": 0.12102367728948593, + "learning_rate": 9.566445461057452e-06, + "loss": 0.004, + "step": 68660 + }, + { + "epoch": 0.4404280496291284, + "grad_norm": 0.17383573949337006, + "learning_rate": 9.566217458568096e-06, + "loss": 0.0036, + "step": 68670 + }, + { + "epoch": 0.4404921865229145, + "grad_norm": 0.103236123919487, + "learning_rate": 9.565989398860774e-06, + "loss": 0.0042, + "step": 68680 + }, + { + "epoch": 0.4405563234167006, + "grad_norm": 0.06880658864974976, + "learning_rate": 9.56576128193834e-06, + "loss": 0.0035, + "step": 68690 + }, + { + "epoch": 0.4406204603104867, + "grad_norm": 0.11575897783041, + "learning_rate": 9.565533107803652e-06, + "loss": 0.0029, + "step": 68700 + }, + { + "epoch": 0.4406845972042728, + "grad_norm": 0.06390535086393356, + "learning_rate": 9.565304876459574e-06, + "loss": 0.0057, + "step": 68710 + }, + { + "epoch": 0.44074873409805887, + "grad_norm": 0.22714078426361084, + "learning_rate": 9.565076587908962e-06, + "loss": 0.0037, + "step": 68720 + }, + { + "epoch": 0.440812870991845, + "grad_norm": 0.09389592707157135, + "learning_rate": 9.564848242154678e-06, + "loss": 0.0097, + "step": 68730 + }, + { + "epoch": 0.4408770078856311, + "grad_norm": 0.12381519377231598, + "learning_rate": 9.564619839199583e-06, + "loss": 0.007, + "step": 68740 + }, + { + "epoch": 0.4409411447794172, + "grad_norm": 0.14362052083015442, + "learning_rate": 9.564391379046539e-06, + "loss": 0.0037, + "step": 68750 + }, + { + "epoch": 0.4410052816732033, + "grad_norm": 0.09533475339412689, + "learning_rate": 9.56416286169841e-06, + "loss": 0.0044, + "step": 68760 + }, + { + "epoch": 0.4410694185669894, + "grad_norm": 0.06411031633615494, + "learning_rate": 9.563934287158057e-06, + "loss": 0.0021, + "step": 68770 + }, + { + "epoch": 0.4411335554607755, + "grad_norm": 0.5412489175796509, + "learning_rate": 9.563705655428347e-06, + "loss": 0.0049, + "step": 68780 + }, + { + "epoch": 0.4411976923545616, + "grad_norm": 0.2374279946088791, + "learning_rate": 9.56347696651214e-06, + "loss": 0.0025, + "step": 68790 + }, + { + "epoch": 0.44126182924834767, + "grad_norm": 0.31077682971954346, + "learning_rate": 9.56324822041231e-06, + "loss": 0.0067, + "step": 68800 + }, + { + "epoch": 0.44132596614213376, + "grad_norm": 0.07084587961435318, + "learning_rate": 9.563019417131716e-06, + "loss": 0.0056, + "step": 68810 + }, + { + "epoch": 0.44139010303591986, + "grad_norm": 0.097480908036232, + "learning_rate": 9.562790556673228e-06, + "loss": 0.0034, + "step": 68820 + }, + { + "epoch": 0.44145423992970595, + "grad_norm": 0.02128240466117859, + "learning_rate": 9.562561639039711e-06, + "loss": 0.0018, + "step": 68830 + }, + { + "epoch": 0.44151837682349204, + "grad_norm": 0.14919476211071014, + "learning_rate": 9.56233266423404e-06, + "loss": 0.0021, + "step": 68840 + }, + { + "epoch": 0.44158251371727814, + "grad_norm": 0.2957935631275177, + "learning_rate": 9.562103632259076e-06, + "loss": 0.004, + "step": 68850 + }, + { + "epoch": 0.44164665061106423, + "grad_norm": 0.14767687022686005, + "learning_rate": 9.561874543117695e-06, + "loss": 0.0036, + "step": 68860 + }, + { + "epoch": 0.4417107875048504, + "grad_norm": 0.007925967685878277, + "learning_rate": 9.561645396812767e-06, + "loss": 0.0073, + "step": 68870 + }, + { + "epoch": 0.44177492439863647, + "grad_norm": 0.2985318899154663, + "learning_rate": 9.56141619334716e-06, + "loss": 0.0029, + "step": 68880 + }, + { + "epoch": 0.44183906129242256, + "grad_norm": 0.247298926115036, + "learning_rate": 9.561186932723748e-06, + "loss": 0.0031, + "step": 68890 + }, + { + "epoch": 0.44190319818620866, + "grad_norm": 0.40475961565971375, + "learning_rate": 9.560957614945406e-06, + "loss": 0.0047, + "step": 68900 + }, + { + "epoch": 0.44196733507999475, + "grad_norm": 0.100443035364151, + "learning_rate": 9.560728240015003e-06, + "loss": 0.004, + "step": 68910 + }, + { + "epoch": 0.44203147197378084, + "grad_norm": 0.18375732004642487, + "learning_rate": 9.560498807935416e-06, + "loss": 0.0049, + "step": 68920 + }, + { + "epoch": 0.44209560886756694, + "grad_norm": 0.06509524583816528, + "learning_rate": 9.56026931870952e-06, + "loss": 0.0035, + "step": 68930 + }, + { + "epoch": 0.44215974576135303, + "grad_norm": 0.10219797492027283, + "learning_rate": 9.560039772340191e-06, + "loss": 0.0039, + "step": 68940 + }, + { + "epoch": 0.4422238826551391, + "grad_norm": 0.2416619211435318, + "learning_rate": 9.559810168830304e-06, + "loss": 0.0036, + "step": 68950 + }, + { + "epoch": 0.4422880195489252, + "grad_norm": 0.14863581955432892, + "learning_rate": 9.559580508182737e-06, + "loss": 0.005, + "step": 68960 + }, + { + "epoch": 0.4423521564427113, + "grad_norm": 0.23987102508544922, + "learning_rate": 9.559350790400369e-06, + "loss": 0.0039, + "step": 68970 + }, + { + "epoch": 0.4424162933364974, + "grad_norm": 0.042374927550554276, + "learning_rate": 9.559121015486075e-06, + "loss": 0.0036, + "step": 68980 + }, + { + "epoch": 0.4424804302302835, + "grad_norm": 0.11949405819177628, + "learning_rate": 9.558891183442736e-06, + "loss": 0.0044, + "step": 68990 + }, + { + "epoch": 0.4425445671240696, + "grad_norm": 0.2754271328449249, + "learning_rate": 9.558661294273234e-06, + "loss": 0.0047, + "step": 69000 + }, + { + "epoch": 0.44260870401785574, + "grad_norm": 0.19826200604438782, + "learning_rate": 9.558431347980447e-06, + "loss": 0.0044, + "step": 69010 + }, + { + "epoch": 0.44267284091164183, + "grad_norm": 0.5750806927680969, + "learning_rate": 9.558201344567257e-06, + "loss": 0.0052, + "step": 69020 + }, + { + "epoch": 0.4427369778054279, + "grad_norm": 0.1713956892490387, + "learning_rate": 9.557971284036547e-06, + "loss": 0.0022, + "step": 69030 + }, + { + "epoch": 0.442801114699214, + "grad_norm": 0.22112691402435303, + "learning_rate": 9.557741166391198e-06, + "loss": 0.0045, + "step": 69040 + }, + { + "epoch": 0.4428652515930001, + "grad_norm": 0.2011345624923706, + "learning_rate": 9.557510991634097e-06, + "loss": 0.0046, + "step": 69050 + }, + { + "epoch": 0.4429293884867862, + "grad_norm": 0.1980188637971878, + "learning_rate": 9.557280759768126e-06, + "loss": 0.0064, + "step": 69060 + }, + { + "epoch": 0.4429935253805723, + "grad_norm": 0.1128661185503006, + "learning_rate": 9.557050470796169e-06, + "loss": 0.0054, + "step": 69070 + }, + { + "epoch": 0.4430576622743584, + "grad_norm": 0.134240061044693, + "learning_rate": 9.556820124721113e-06, + "loss": 0.0051, + "step": 69080 + }, + { + "epoch": 0.4431217991681445, + "grad_norm": 0.047759849578142166, + "learning_rate": 9.556589721545844e-06, + "loss": 0.0046, + "step": 69090 + }, + { + "epoch": 0.4431859360619306, + "grad_norm": 0.15535880625247955, + "learning_rate": 9.556359261273249e-06, + "loss": 0.0029, + "step": 69100 + }, + { + "epoch": 0.44325007295571667, + "grad_norm": 0.15249252319335938, + "learning_rate": 9.556128743906216e-06, + "loss": 0.0022, + "step": 69110 + }, + { + "epoch": 0.44331420984950276, + "grad_norm": 0.3138236999511719, + "learning_rate": 9.555898169447636e-06, + "loss": 0.0095, + "step": 69120 + }, + { + "epoch": 0.44337834674328885, + "grad_norm": 0.0569743849337101, + "learning_rate": 9.555667537900393e-06, + "loss": 0.0034, + "step": 69130 + }, + { + "epoch": 0.44344248363707495, + "grad_norm": 0.03304928541183472, + "learning_rate": 9.55543684926738e-06, + "loss": 0.0022, + "step": 69140 + }, + { + "epoch": 0.4435066205308611, + "grad_norm": 0.16622063517570496, + "learning_rate": 9.555206103551488e-06, + "loss": 0.0052, + "step": 69150 + }, + { + "epoch": 0.4435707574246472, + "grad_norm": 0.1225697472691536, + "learning_rate": 9.554975300755608e-06, + "loss": 0.0037, + "step": 69160 + }, + { + "epoch": 0.4436348943184333, + "grad_norm": 0.1855577975511551, + "learning_rate": 9.554744440882633e-06, + "loss": 0.0063, + "step": 69170 + }, + { + "epoch": 0.4436990312122194, + "grad_norm": 0.46637067198753357, + "learning_rate": 9.554513523935454e-06, + "loss": 0.0054, + "step": 69180 + }, + { + "epoch": 0.44376316810600547, + "grad_norm": 0.11185431480407715, + "learning_rate": 9.554282549916966e-06, + "loss": 0.0032, + "step": 69190 + }, + { + "epoch": 0.44382730499979156, + "grad_norm": 0.14200040698051453, + "learning_rate": 9.554051518830062e-06, + "loss": 0.0032, + "step": 69200 + }, + { + "epoch": 0.44389144189357765, + "grad_norm": 0.2912452518939972, + "learning_rate": 9.553820430677639e-06, + "loss": 0.0038, + "step": 69210 + }, + { + "epoch": 0.44395557878736375, + "grad_norm": 0.24687382578849792, + "learning_rate": 9.55358928546259e-06, + "loss": 0.0026, + "step": 69220 + }, + { + "epoch": 0.44401971568114984, + "grad_norm": 0.13388454914093018, + "learning_rate": 9.553358083187813e-06, + "loss": 0.003, + "step": 69230 + }, + { + "epoch": 0.44408385257493593, + "grad_norm": 0.12316068261861801, + "learning_rate": 9.553126823856204e-06, + "loss": 0.0028, + "step": 69240 + }, + { + "epoch": 0.444147989468722, + "grad_norm": 0.15901079773902893, + "learning_rate": 9.552895507470665e-06, + "loss": 0.0033, + "step": 69250 + }, + { + "epoch": 0.4442121263625081, + "grad_norm": 0.32043325901031494, + "learning_rate": 9.55266413403409e-06, + "loss": 0.0033, + "step": 69260 + }, + { + "epoch": 0.4442762632562942, + "grad_norm": 0.4112547039985657, + "learning_rate": 9.552432703549379e-06, + "loss": 0.0033, + "step": 69270 + }, + { + "epoch": 0.4443404001500803, + "grad_norm": 0.12032436579465866, + "learning_rate": 9.552201216019432e-06, + "loss": 0.0046, + "step": 69280 + }, + { + "epoch": 0.44440453704386645, + "grad_norm": 0.03089107573032379, + "learning_rate": 9.551969671447152e-06, + "loss": 0.0015, + "step": 69290 + }, + { + "epoch": 0.44446867393765255, + "grad_norm": 0.23203957080841064, + "learning_rate": 9.551738069835438e-06, + "loss": 0.004, + "step": 69300 + }, + { + "epoch": 0.44453281083143864, + "grad_norm": 0.15184760093688965, + "learning_rate": 9.551506411187194e-06, + "loss": 0.0041, + "step": 69310 + }, + { + "epoch": 0.44459694772522473, + "grad_norm": 0.07851779460906982, + "learning_rate": 9.551274695505321e-06, + "loss": 0.0022, + "step": 69320 + }, + { + "epoch": 0.4446610846190108, + "grad_norm": 0.2914571762084961, + "learning_rate": 9.551042922792721e-06, + "loss": 0.0041, + "step": 69330 + }, + { + "epoch": 0.4447252215127969, + "grad_norm": 0.10128697007894516, + "learning_rate": 9.550811093052304e-06, + "loss": 0.0056, + "step": 69340 + }, + { + "epoch": 0.444789358406583, + "grad_norm": 0.1086357980966568, + "learning_rate": 9.55057920628697e-06, + "loss": 0.0026, + "step": 69350 + }, + { + "epoch": 0.4448534953003691, + "grad_norm": 0.20596520602703094, + "learning_rate": 9.550347262499626e-06, + "loss": 0.0042, + "step": 69360 + }, + { + "epoch": 0.4449176321941552, + "grad_norm": 0.15991929173469543, + "learning_rate": 9.55011526169318e-06, + "loss": 0.0044, + "step": 69370 + }, + { + "epoch": 0.4449817690879413, + "grad_norm": 0.463979035615921, + "learning_rate": 9.549883203870537e-06, + "loss": 0.0044, + "step": 69380 + }, + { + "epoch": 0.4450459059817274, + "grad_norm": 0.056873906403779984, + "learning_rate": 9.549651089034607e-06, + "loss": 0.0042, + "step": 69390 + }, + { + "epoch": 0.4451100428755135, + "grad_norm": 0.07245271652936935, + "learning_rate": 9.549418917188295e-06, + "loss": 0.0094, + "step": 69400 + }, + { + "epoch": 0.44517417976929957, + "grad_norm": 0.1018892154097557, + "learning_rate": 9.549186688334514e-06, + "loss": 0.0028, + "step": 69410 + }, + { + "epoch": 0.44523831666308566, + "grad_norm": 0.14666366577148438, + "learning_rate": 9.548954402476172e-06, + "loss": 0.0039, + "step": 69420 + }, + { + "epoch": 0.4453024535568718, + "grad_norm": 0.17333391308784485, + "learning_rate": 9.54872205961618e-06, + "loss": 0.004, + "step": 69430 + }, + { + "epoch": 0.4453665904506579, + "grad_norm": 0.2100229263305664, + "learning_rate": 9.54848965975745e-06, + "loss": 0.0035, + "step": 69440 + }, + { + "epoch": 0.445430727344444, + "grad_norm": 0.34473252296447754, + "learning_rate": 9.548257202902896e-06, + "loss": 0.0056, + "step": 69450 + }, + { + "epoch": 0.4454948642382301, + "grad_norm": 0.15477558970451355, + "learning_rate": 9.548024689055426e-06, + "loss": 0.0046, + "step": 69460 + }, + { + "epoch": 0.4455590011320162, + "grad_norm": 0.17489725351333618, + "learning_rate": 9.547792118217956e-06, + "loss": 0.003, + "step": 69470 + }, + { + "epoch": 0.4456231380258023, + "grad_norm": 0.1335802674293518, + "learning_rate": 9.5475594903934e-06, + "loss": 0.003, + "step": 69480 + }, + { + "epoch": 0.44568727491958837, + "grad_norm": 0.22918473184108734, + "learning_rate": 9.547326805584676e-06, + "loss": 0.0032, + "step": 69490 + }, + { + "epoch": 0.44575141181337447, + "grad_norm": 0.14939549565315247, + "learning_rate": 9.547094063794697e-06, + "loss": 0.0052, + "step": 69500 + }, + { + "epoch": 0.44581554870716056, + "grad_norm": 0.051198314875364304, + "learning_rate": 9.546861265026379e-06, + "loss": 0.003, + "step": 69510 + }, + { + "epoch": 0.44587968560094665, + "grad_norm": 0.11114027351140976, + "learning_rate": 9.546628409282638e-06, + "loss": 0.0031, + "step": 69520 + }, + { + "epoch": 0.44594382249473274, + "grad_norm": 0.4134508967399597, + "learning_rate": 9.546395496566394e-06, + "loss": 0.0032, + "step": 69530 + }, + { + "epoch": 0.44600795938851884, + "grad_norm": 0.34015557169914246, + "learning_rate": 9.546162526880566e-06, + "loss": 0.003, + "step": 69540 + }, + { + "epoch": 0.44607209628230493, + "grad_norm": 0.1437218338251114, + "learning_rate": 9.545929500228074e-06, + "loss": 0.0045, + "step": 69550 + }, + { + "epoch": 0.446136233176091, + "grad_norm": 0.26935333013534546, + "learning_rate": 9.545696416611835e-06, + "loss": 0.0043, + "step": 69560 + }, + { + "epoch": 0.4462003700698772, + "grad_norm": 0.2559076249599457, + "learning_rate": 9.545463276034772e-06, + "loss": 0.0037, + "step": 69570 + }, + { + "epoch": 0.44626450696366327, + "grad_norm": 0.2649496793746948, + "learning_rate": 9.545230078499803e-06, + "loss": 0.0034, + "step": 69580 + }, + { + "epoch": 0.44632864385744936, + "grad_norm": 0.16384756565093994, + "learning_rate": 9.544996824009855e-06, + "loss": 0.0033, + "step": 69590 + }, + { + "epoch": 0.44639278075123545, + "grad_norm": 0.23684534430503845, + "learning_rate": 9.544763512567849e-06, + "loss": 0.0048, + "step": 69600 + }, + { + "epoch": 0.44645691764502154, + "grad_norm": 0.07358350604772568, + "learning_rate": 9.544530144176707e-06, + "loss": 0.0026, + "step": 69610 + }, + { + "epoch": 0.44652105453880764, + "grad_norm": 0.2703312635421753, + "learning_rate": 9.544296718839354e-06, + "loss": 0.0047, + "step": 69620 + }, + { + "epoch": 0.44658519143259373, + "grad_norm": 0.1530401110649109, + "learning_rate": 9.544063236558715e-06, + "loss": 0.0042, + "step": 69630 + }, + { + "epoch": 0.4466493283263798, + "grad_norm": 0.238456130027771, + "learning_rate": 9.543829697337717e-06, + "loss": 0.0028, + "step": 69640 + }, + { + "epoch": 0.4467134652201659, + "grad_norm": 0.343877911567688, + "learning_rate": 9.543596101179285e-06, + "loss": 0.005, + "step": 69650 + }, + { + "epoch": 0.446777602113952, + "grad_norm": 0.08465104550123215, + "learning_rate": 9.543362448086347e-06, + "loss": 0.0043, + "step": 69660 + }, + { + "epoch": 0.4468417390077381, + "grad_norm": 0.08829823136329651, + "learning_rate": 9.543128738061832e-06, + "loss": 0.0019, + "step": 69670 + }, + { + "epoch": 0.4469058759015242, + "grad_norm": 0.04483136162161827, + "learning_rate": 9.542894971108664e-06, + "loss": 0.0044, + "step": 69680 + }, + { + "epoch": 0.4469700127953103, + "grad_norm": 0.4056674838066101, + "learning_rate": 9.542661147229777e-06, + "loss": 0.0048, + "step": 69690 + }, + { + "epoch": 0.4470341496890964, + "grad_norm": 0.1599629819393158, + "learning_rate": 9.542427266428099e-06, + "loss": 0.0032, + "step": 69700 + }, + { + "epoch": 0.44709828658288253, + "grad_norm": 0.10350514948368073, + "learning_rate": 9.54219332870656e-06, + "loss": 0.0048, + "step": 69710 + }, + { + "epoch": 0.4471624234766686, + "grad_norm": 0.15455134212970734, + "learning_rate": 9.541959334068094e-06, + "loss": 0.0038, + "step": 69720 + }, + { + "epoch": 0.4472265603704547, + "grad_norm": 0.26008278131484985, + "learning_rate": 9.541725282515631e-06, + "loss": 0.0041, + "step": 69730 + }, + { + "epoch": 0.4472906972642408, + "grad_norm": 0.24542184174060822, + "learning_rate": 9.541491174052101e-06, + "loss": 0.0043, + "step": 69740 + }, + { + "epoch": 0.4473548341580269, + "grad_norm": 0.07911483943462372, + "learning_rate": 9.541257008680445e-06, + "loss": 0.002, + "step": 69750 + }, + { + "epoch": 0.447418971051813, + "grad_norm": 0.2870321571826935, + "learning_rate": 9.541022786403592e-06, + "loss": 0.0036, + "step": 69760 + }, + { + "epoch": 0.4474831079455991, + "grad_norm": 0.3157743811607361, + "learning_rate": 9.540788507224478e-06, + "loss": 0.0051, + "step": 69770 + }, + { + "epoch": 0.4475472448393852, + "grad_norm": 0.0588856041431427, + "learning_rate": 9.54055417114604e-06, + "loss": 0.0037, + "step": 69780 + }, + { + "epoch": 0.4476113817331713, + "grad_norm": 0.29160788655281067, + "learning_rate": 9.54031977817121e-06, + "loss": 0.0018, + "step": 69790 + }, + { + "epoch": 0.44767551862695737, + "grad_norm": 0.5045191049575806, + "learning_rate": 9.54008532830293e-06, + "loss": 0.0055, + "step": 69800 + }, + { + "epoch": 0.44773965552074346, + "grad_norm": 0.11215846240520477, + "learning_rate": 9.539850821544137e-06, + "loss": 0.0051, + "step": 69810 + }, + { + "epoch": 0.44780379241452956, + "grad_norm": 0.24262496829032898, + "learning_rate": 9.539616257897766e-06, + "loss": 0.0059, + "step": 69820 + }, + { + "epoch": 0.44786792930831565, + "grad_norm": 0.08639699220657349, + "learning_rate": 9.539381637366762e-06, + "loss": 0.0205, + "step": 69830 + }, + { + "epoch": 0.44793206620210174, + "grad_norm": 0.3160640299320221, + "learning_rate": 9.53914695995406e-06, + "loss": 0.004, + "step": 69840 + }, + { + "epoch": 0.4479962030958879, + "grad_norm": 0.1780281960964203, + "learning_rate": 9.538912225662602e-06, + "loss": 0.0038, + "step": 69850 + }, + { + "epoch": 0.448060339989674, + "grad_norm": 0.07597067207098007, + "learning_rate": 9.538677434495331e-06, + "loss": 0.0065, + "step": 69860 + }, + { + "epoch": 0.4481244768834601, + "grad_norm": 0.24969497323036194, + "learning_rate": 9.538442586455187e-06, + "loss": 0.0048, + "step": 69870 + }, + { + "epoch": 0.44818861377724617, + "grad_norm": 0.5276156663894653, + "learning_rate": 9.538207681545115e-06, + "loss": 0.0044, + "step": 69880 + }, + { + "epoch": 0.44825275067103226, + "grad_norm": 0.05741346254944801, + "learning_rate": 9.537972719768059e-06, + "loss": 0.0047, + "step": 69890 + }, + { + "epoch": 0.44831688756481836, + "grad_norm": 0.1554454267024994, + "learning_rate": 9.53773770112696e-06, + "loss": 0.0021, + "step": 69900 + }, + { + "epoch": 0.44838102445860445, + "grad_norm": 0.27648258209228516, + "learning_rate": 9.53750262562476e-06, + "loss": 0.0044, + "step": 69910 + }, + { + "epoch": 0.44844516135239054, + "grad_norm": 0.08419109880924225, + "learning_rate": 9.537267493264415e-06, + "loss": 0.0057, + "step": 69920 + }, + { + "epoch": 0.44850929824617664, + "grad_norm": 0.13251163065433502, + "learning_rate": 9.537032304048864e-06, + "loss": 0.0043, + "step": 69930 + }, + { + "epoch": 0.44857343513996273, + "grad_norm": 0.12250209599733353, + "learning_rate": 9.536797057981055e-06, + "loss": 0.0025, + "step": 69940 + }, + { + "epoch": 0.4486375720337488, + "grad_norm": 0.37543269991874695, + "learning_rate": 9.536561755063937e-06, + "loss": 0.0029, + "step": 69950 + }, + { + "epoch": 0.4487017089275349, + "grad_norm": 0.14071187376976013, + "learning_rate": 9.53632639530046e-06, + "loss": 0.0033, + "step": 69960 + }, + { + "epoch": 0.448765845821321, + "grad_norm": 0.1607028692960739, + "learning_rate": 9.536090978693568e-06, + "loss": 0.0021, + "step": 69970 + }, + { + "epoch": 0.4488299827151071, + "grad_norm": 0.19182954728603363, + "learning_rate": 9.535855505246215e-06, + "loss": 0.0054, + "step": 69980 + }, + { + "epoch": 0.4488941196088932, + "grad_norm": 0.11695774644613266, + "learning_rate": 9.535619974961352e-06, + "loss": 0.0065, + "step": 69990 + }, + { + "epoch": 0.44895825650267934, + "grad_norm": 0.1232537105679512, + "learning_rate": 9.535384387841927e-06, + "loss": 0.0036, + "step": 70000 + }, + { + "epoch": 0.44902239339646544, + "grad_norm": 0.08189796656370163, + "learning_rate": 9.535148743890896e-06, + "loss": 0.0032, + "step": 70010 + }, + { + "epoch": 0.44908653029025153, + "grad_norm": 0.20595207810401917, + "learning_rate": 9.534913043111209e-06, + "loss": 0.0029, + "step": 70020 + }, + { + "epoch": 0.4491506671840376, + "grad_norm": 0.31134435534477234, + "learning_rate": 9.534677285505822e-06, + "loss": 0.0034, + "step": 70030 + }, + { + "epoch": 0.4492148040778237, + "grad_norm": 0.26455387473106384, + "learning_rate": 9.534441471077687e-06, + "loss": 0.0027, + "step": 70040 + }, + { + "epoch": 0.4492789409716098, + "grad_norm": 0.34831151366233826, + "learning_rate": 9.534205599829758e-06, + "loss": 0.0037, + "step": 70050 + }, + { + "epoch": 0.4493430778653959, + "grad_norm": 0.11280174553394318, + "learning_rate": 9.533969671764994e-06, + "loss": 0.0022, + "step": 70060 + }, + { + "epoch": 0.449407214759182, + "grad_norm": 0.4784580171108246, + "learning_rate": 9.533733686886349e-06, + "loss": 0.0055, + "step": 70070 + }, + { + "epoch": 0.4494713516529681, + "grad_norm": 0.33552631735801697, + "learning_rate": 9.533497645196781e-06, + "loss": 0.0047, + "step": 70080 + }, + { + "epoch": 0.4495354885467542, + "grad_norm": 0.03899163007736206, + "learning_rate": 9.533261546699248e-06, + "loss": 0.0037, + "step": 70090 + }, + { + "epoch": 0.4495996254405403, + "grad_norm": 0.1701935976743698, + "learning_rate": 9.533025391396708e-06, + "loss": 0.0048, + "step": 70100 + }, + { + "epoch": 0.44966376233432637, + "grad_norm": 0.4440838694572449, + "learning_rate": 9.532789179292119e-06, + "loss": 0.0028, + "step": 70110 + }, + { + "epoch": 0.44972789922811246, + "grad_norm": 0.34770143032073975, + "learning_rate": 9.532552910388441e-06, + "loss": 0.0058, + "step": 70120 + }, + { + "epoch": 0.44979203612189855, + "grad_norm": 0.289668470621109, + "learning_rate": 9.53231658468864e-06, + "loss": 0.005, + "step": 70130 + }, + { + "epoch": 0.4498561730156847, + "grad_norm": 0.18488343060016632, + "learning_rate": 9.532080202195669e-06, + "loss": 0.0031, + "step": 70140 + }, + { + "epoch": 0.4499203099094708, + "grad_norm": 0.06246650218963623, + "learning_rate": 9.531843762912496e-06, + "loss": 0.0045, + "step": 70150 + }, + { + "epoch": 0.4499844468032569, + "grad_norm": 0.1957128643989563, + "learning_rate": 9.53160726684208e-06, + "loss": 0.0038, + "step": 70160 + }, + { + "epoch": 0.450048583697043, + "grad_norm": 0.10655216872692108, + "learning_rate": 9.531370713987388e-06, + "loss": 0.0041, + "step": 70170 + }, + { + "epoch": 0.4501127205908291, + "grad_norm": 0.23367515206336975, + "learning_rate": 9.531134104351382e-06, + "loss": 0.0061, + "step": 70180 + }, + { + "epoch": 0.45017685748461517, + "grad_norm": 0.08084133267402649, + "learning_rate": 9.530897437937027e-06, + "loss": 0.0034, + "step": 70190 + }, + { + "epoch": 0.45024099437840126, + "grad_norm": 0.17834603786468506, + "learning_rate": 9.530660714747288e-06, + "loss": 0.0042, + "step": 70200 + }, + { + "epoch": 0.45030513127218735, + "grad_norm": 0.17205336689949036, + "learning_rate": 9.530423934785133e-06, + "loss": 0.0035, + "step": 70210 + }, + { + "epoch": 0.45036926816597345, + "grad_norm": 0.1258828490972519, + "learning_rate": 9.53018709805353e-06, + "loss": 0.0026, + "step": 70220 + }, + { + "epoch": 0.45043340505975954, + "grad_norm": 0.18292279541492462, + "learning_rate": 9.529950204555443e-06, + "loss": 0.0066, + "step": 70230 + }, + { + "epoch": 0.45049754195354563, + "grad_norm": 0.1315743774175644, + "learning_rate": 9.529713254293841e-06, + "loss": 0.004, + "step": 70240 + }, + { + "epoch": 0.4505616788473317, + "grad_norm": 0.11516522616147995, + "learning_rate": 9.529476247271698e-06, + "loss": 0.0047, + "step": 70250 + }, + { + "epoch": 0.4506258157411178, + "grad_norm": 0.2952657639980316, + "learning_rate": 9.529239183491978e-06, + "loss": 0.0054, + "step": 70260 + }, + { + "epoch": 0.4506899526349039, + "grad_norm": 0.12432711571455002, + "learning_rate": 9.529002062957657e-06, + "loss": 0.0042, + "step": 70270 + }, + { + "epoch": 0.45075408952869006, + "grad_norm": 0.7580963969230652, + "learning_rate": 9.5287648856717e-06, + "loss": 0.002, + "step": 70280 + }, + { + "epoch": 0.45081822642247615, + "grad_norm": 0.13588938117027283, + "learning_rate": 9.528527651637084e-06, + "loss": 0.0052, + "step": 70290 + }, + { + "epoch": 0.45088236331626225, + "grad_norm": 0.0825934186577797, + "learning_rate": 9.52829036085678e-06, + "loss": 0.0044, + "step": 70300 + }, + { + "epoch": 0.45094650021004834, + "grad_norm": 0.14910919964313507, + "learning_rate": 9.52805301333376e-06, + "loss": 0.0068, + "step": 70310 + }, + { + "epoch": 0.45101063710383443, + "grad_norm": 0.06526334583759308, + "learning_rate": 9.527815609071002e-06, + "loss": 0.0046, + "step": 70320 + }, + { + "epoch": 0.4510747739976205, + "grad_norm": 0.21731288731098175, + "learning_rate": 9.527578148071476e-06, + "loss": 0.0043, + "step": 70330 + }, + { + "epoch": 0.4511389108914066, + "grad_norm": 0.16899237036705017, + "learning_rate": 9.527340630338162e-06, + "loss": 0.0045, + "step": 70340 + }, + { + "epoch": 0.4512030477851927, + "grad_norm": 0.11214538663625717, + "learning_rate": 9.527103055874034e-06, + "loss": 0.0041, + "step": 70350 + }, + { + "epoch": 0.4512671846789788, + "grad_norm": 0.19497740268707275, + "learning_rate": 9.526865424682068e-06, + "loss": 0.0034, + "step": 70360 + }, + { + "epoch": 0.4513313215727649, + "grad_norm": 0.40791797637939453, + "learning_rate": 9.526627736765245e-06, + "loss": 0.0029, + "step": 70370 + }, + { + "epoch": 0.451395458466551, + "grad_norm": 0.052078232169151306, + "learning_rate": 9.52638999212654e-06, + "loss": 0.0073, + "step": 70380 + }, + { + "epoch": 0.4514595953603371, + "grad_norm": 0.18052902817726135, + "learning_rate": 9.526152190768932e-06, + "loss": 0.0032, + "step": 70390 + }, + { + "epoch": 0.4515237322541232, + "grad_norm": 0.23692411184310913, + "learning_rate": 9.525914332695403e-06, + "loss": 0.0041, + "step": 70400 + }, + { + "epoch": 0.45158786914790927, + "grad_norm": 0.1767560839653015, + "learning_rate": 9.525676417908933e-06, + "loss": 0.0056, + "step": 70410 + }, + { + "epoch": 0.4516520060416954, + "grad_norm": 0.1576530635356903, + "learning_rate": 9.525438446412504e-06, + "loss": 0.0045, + "step": 70420 + }, + { + "epoch": 0.4517161429354815, + "grad_norm": 0.18247544765472412, + "learning_rate": 9.525200418209095e-06, + "loss": 0.0065, + "step": 70430 + }, + { + "epoch": 0.4517802798292676, + "grad_norm": 0.05818657577037811, + "learning_rate": 9.524962333301694e-06, + "loss": 0.0049, + "step": 70440 + }, + { + "epoch": 0.4518444167230537, + "grad_norm": 0.2344546765089035, + "learning_rate": 9.52472419169328e-06, + "loss": 0.0045, + "step": 70450 + }, + { + "epoch": 0.4519085536168398, + "grad_norm": 0.19400371611118317, + "learning_rate": 9.524485993386836e-06, + "loss": 0.002, + "step": 70460 + }, + { + "epoch": 0.4519726905106259, + "grad_norm": 0.07352828234434128, + "learning_rate": 9.52424773838535e-06, + "loss": 0.003, + "step": 70470 + }, + { + "epoch": 0.452036827404412, + "grad_norm": 0.030290966853499413, + "learning_rate": 9.524009426691806e-06, + "loss": 0.0021, + "step": 70480 + }, + { + "epoch": 0.45210096429819807, + "grad_norm": 0.17533548176288605, + "learning_rate": 9.523771058309192e-06, + "loss": 0.0037, + "step": 70490 + }, + { + "epoch": 0.45216510119198416, + "grad_norm": 0.16610278189182281, + "learning_rate": 9.523532633240492e-06, + "loss": 0.0036, + "step": 70500 + }, + { + "epoch": 0.45222923808577026, + "grad_norm": 0.4312562346458435, + "learning_rate": 9.523294151488696e-06, + "loss": 0.0047, + "step": 70510 + }, + { + "epoch": 0.45229337497955635, + "grad_norm": 0.15876057744026184, + "learning_rate": 9.523055613056791e-06, + "loss": 0.004, + "step": 70520 + }, + { + "epoch": 0.45235751187334244, + "grad_norm": 0.06935004144906998, + "learning_rate": 9.522817017947767e-06, + "loss": 0.0037, + "step": 70530 + }, + { + "epoch": 0.45242164876712854, + "grad_norm": 0.07293408364057541, + "learning_rate": 9.522578366164614e-06, + "loss": 0.0069, + "step": 70540 + }, + { + "epoch": 0.45248578566091463, + "grad_norm": 0.06884395331144333, + "learning_rate": 9.52233965771032e-06, + "loss": 0.0028, + "step": 70550 + }, + { + "epoch": 0.4525499225547008, + "grad_norm": 0.13001060485839844, + "learning_rate": 9.52210089258788e-06, + "loss": 0.0026, + "step": 70560 + }, + { + "epoch": 0.45261405944848687, + "grad_norm": 0.38709232211112976, + "learning_rate": 9.521862070800283e-06, + "loss": 0.0046, + "step": 70570 + }, + { + "epoch": 0.45267819634227296, + "grad_norm": 0.26731571555137634, + "learning_rate": 9.521623192350522e-06, + "loss": 0.004, + "step": 70580 + }, + { + "epoch": 0.45274233323605906, + "grad_norm": 0.20551343262195587, + "learning_rate": 9.521384257241592e-06, + "loss": 0.0043, + "step": 70590 + }, + { + "epoch": 0.45280647012984515, + "grad_norm": 1.012932300567627, + "learning_rate": 9.521145265476486e-06, + "loss": 0.0132, + "step": 70600 + }, + { + "epoch": 0.45287060702363124, + "grad_norm": 0.15066519379615784, + "learning_rate": 9.520906217058197e-06, + "loss": 0.0027, + "step": 70610 + }, + { + "epoch": 0.45293474391741734, + "grad_norm": 0.11236685514450073, + "learning_rate": 9.520667111989722e-06, + "loss": 0.0036, + "step": 70620 + }, + { + "epoch": 0.45299888081120343, + "grad_norm": 0.3001996576786041, + "learning_rate": 9.52042795027406e-06, + "loss": 0.0051, + "step": 70630 + }, + { + "epoch": 0.4530630177049895, + "grad_norm": 0.05467584356665611, + "learning_rate": 9.520188731914203e-06, + "loss": 0.0071, + "step": 70640 + }, + { + "epoch": 0.4531271545987756, + "grad_norm": 0.06468317657709122, + "learning_rate": 9.51994945691315e-06, + "loss": 0.004, + "step": 70650 + }, + { + "epoch": 0.4531912914925617, + "grad_norm": 0.1886305809020996, + "learning_rate": 9.519710125273902e-06, + "loss": 0.0184, + "step": 70660 + }, + { + "epoch": 0.4532554283863478, + "grad_norm": 0.0639183297753334, + "learning_rate": 9.519470736999455e-06, + "loss": 0.0037, + "step": 70670 + }, + { + "epoch": 0.4533195652801339, + "grad_norm": 0.0735633373260498, + "learning_rate": 9.519231292092808e-06, + "loss": 0.0045, + "step": 70680 + }, + { + "epoch": 0.45338370217392, + "grad_norm": 0.34701141715049744, + "learning_rate": 9.518991790556965e-06, + "loss": 0.0043, + "step": 70690 + }, + { + "epoch": 0.45344783906770614, + "grad_norm": 0.18114013969898224, + "learning_rate": 9.518752232394925e-06, + "loss": 0.0036, + "step": 70700 + }, + { + "epoch": 0.45351197596149223, + "grad_norm": 0.32887938618659973, + "learning_rate": 9.51851261760969e-06, + "loss": 0.0059, + "step": 70710 + }, + { + "epoch": 0.4535761128552783, + "grad_norm": 0.2900322675704956, + "learning_rate": 9.518272946204263e-06, + "loss": 0.0052, + "step": 70720 + }, + { + "epoch": 0.4536402497490644, + "grad_norm": 0.28895503282546997, + "learning_rate": 9.518033218181646e-06, + "loss": 0.0061, + "step": 70730 + }, + { + "epoch": 0.4537043866428505, + "grad_norm": 0.11479459702968597, + "learning_rate": 9.517793433544844e-06, + "loss": 0.0027, + "step": 70740 + }, + { + "epoch": 0.4537685235366366, + "grad_norm": 0.07610035687685013, + "learning_rate": 9.51755359229686e-06, + "loss": 0.0036, + "step": 70750 + }, + { + "epoch": 0.4538326604304227, + "grad_norm": 0.10561048239469528, + "learning_rate": 9.517313694440702e-06, + "loss": 0.0024, + "step": 70760 + }, + { + "epoch": 0.4538967973242088, + "grad_norm": 0.2110764980316162, + "learning_rate": 9.517073739979377e-06, + "loss": 0.0033, + "step": 70770 + }, + { + "epoch": 0.4539609342179949, + "grad_norm": 0.19672437012195587, + "learning_rate": 9.516833728915887e-06, + "loss": 0.0042, + "step": 70780 + }, + { + "epoch": 0.454025071111781, + "grad_norm": 0.1142696738243103, + "learning_rate": 9.516593661253244e-06, + "loss": 0.0031, + "step": 70790 + }, + { + "epoch": 0.45408920800556707, + "grad_norm": 0.17315009236335754, + "learning_rate": 9.516353536994452e-06, + "loss": 0.0107, + "step": 70800 + }, + { + "epoch": 0.45415334489935316, + "grad_norm": 0.10731589049100876, + "learning_rate": 9.516113356142525e-06, + "loss": 0.0054, + "step": 70810 + }, + { + "epoch": 0.45421748179313925, + "grad_norm": 0.10094081610441208, + "learning_rate": 9.515873118700469e-06, + "loss": 0.0021, + "step": 70820 + }, + { + "epoch": 0.45428161868692535, + "grad_norm": 0.12132128328084946, + "learning_rate": 9.515632824671294e-06, + "loss": 0.0016, + "step": 70830 + }, + { + "epoch": 0.4543457555807115, + "grad_norm": 0.21277032792568207, + "learning_rate": 9.515392474058015e-06, + "loss": 0.0031, + "step": 70840 + }, + { + "epoch": 0.4544098924744976, + "grad_norm": 0.04919726029038429, + "learning_rate": 9.51515206686364e-06, + "loss": 0.0037, + "step": 70850 + }, + { + "epoch": 0.4544740293682837, + "grad_norm": 0.031588517129421234, + "learning_rate": 9.514911603091183e-06, + "loss": 0.008, + "step": 70860 + }, + { + "epoch": 0.4545381662620698, + "grad_norm": 0.2616173028945923, + "learning_rate": 9.514671082743656e-06, + "loss": 0.0028, + "step": 70870 + }, + { + "epoch": 0.45460230315585587, + "grad_norm": 0.19474613666534424, + "learning_rate": 9.514430505824075e-06, + "loss": 0.0043, + "step": 70880 + }, + { + "epoch": 0.45466644004964196, + "grad_norm": 0.09102250635623932, + "learning_rate": 9.514189872335454e-06, + "loss": 0.0033, + "step": 70890 + }, + { + "epoch": 0.45473057694342806, + "grad_norm": 0.1772601306438446, + "learning_rate": 9.513949182280804e-06, + "loss": 0.0047, + "step": 70900 + }, + { + "epoch": 0.45479471383721415, + "grad_norm": 0.254000186920166, + "learning_rate": 9.513708435663147e-06, + "loss": 0.0038, + "step": 70910 + }, + { + "epoch": 0.45485885073100024, + "grad_norm": 0.15698865056037903, + "learning_rate": 9.513467632485498e-06, + "loss": 0.0044, + "step": 70920 + }, + { + "epoch": 0.45492298762478633, + "grad_norm": 0.43258821964263916, + "learning_rate": 9.513226772750873e-06, + "loss": 0.0049, + "step": 70930 + }, + { + "epoch": 0.45498712451857243, + "grad_norm": 0.02216915786266327, + "learning_rate": 9.512985856462292e-06, + "loss": 0.0036, + "step": 70940 + }, + { + "epoch": 0.4550512614123585, + "grad_norm": 0.19537481665611267, + "learning_rate": 9.512744883622772e-06, + "loss": 0.0045, + "step": 70950 + }, + { + "epoch": 0.4551153983061446, + "grad_norm": 0.1020500659942627, + "learning_rate": 9.512503854235333e-06, + "loss": 0.0038, + "step": 70960 + }, + { + "epoch": 0.4551795351999307, + "grad_norm": 0.23862917721271515, + "learning_rate": 9.512262768302996e-06, + "loss": 0.0043, + "step": 70970 + }, + { + "epoch": 0.45524367209371686, + "grad_norm": 0.1556473970413208, + "learning_rate": 9.512021625828782e-06, + "loss": 0.0059, + "step": 70980 + }, + { + "epoch": 0.45530780898750295, + "grad_norm": 0.09845450520515442, + "learning_rate": 9.511780426815712e-06, + "loss": 0.0039, + "step": 70990 + }, + { + "epoch": 0.45537194588128904, + "grad_norm": 0.22103340923786163, + "learning_rate": 9.511539171266808e-06, + "loss": 0.0031, + "step": 71000 + }, + { + "epoch": 0.45543608277507514, + "grad_norm": 0.20481856167316437, + "learning_rate": 9.511297859185095e-06, + "loss": 0.003, + "step": 71010 + }, + { + "epoch": 0.45550021966886123, + "grad_norm": 0.12524117529392242, + "learning_rate": 9.511056490573596e-06, + "loss": 0.006, + "step": 71020 + }, + { + "epoch": 0.4555643565626473, + "grad_norm": 0.2552565634250641, + "learning_rate": 9.510815065435335e-06, + "loss": 0.0031, + "step": 71030 + }, + { + "epoch": 0.4556284934564334, + "grad_norm": 0.10889584571123123, + "learning_rate": 9.510573583773336e-06, + "loss": 0.004, + "step": 71040 + }, + { + "epoch": 0.4556926303502195, + "grad_norm": 0.06692889332771301, + "learning_rate": 9.510332045590627e-06, + "loss": 0.0048, + "step": 71050 + }, + { + "epoch": 0.4557567672440056, + "grad_norm": 0.3315446078777313, + "learning_rate": 9.510090450890236e-06, + "loss": 0.0045, + "step": 71060 + }, + { + "epoch": 0.4558209041377917, + "grad_norm": 0.20021110773086548, + "learning_rate": 9.509848799675186e-06, + "loss": 0.0069, + "step": 71070 + }, + { + "epoch": 0.4558850410315778, + "grad_norm": 0.33070963621139526, + "learning_rate": 9.509607091948507e-06, + "loss": 0.0038, + "step": 71080 + }, + { + "epoch": 0.4559491779253639, + "grad_norm": 0.05399494618177414, + "learning_rate": 9.509365327713229e-06, + "loss": 0.002, + "step": 71090 + }, + { + "epoch": 0.45601331481915, + "grad_norm": 0.0842948704957962, + "learning_rate": 9.509123506972382e-06, + "loss": 0.0051, + "step": 71100 + }, + { + "epoch": 0.45607745171293607, + "grad_norm": 0.0796792134642601, + "learning_rate": 9.508881629728992e-06, + "loss": 0.0037, + "step": 71110 + }, + { + "epoch": 0.4561415886067222, + "grad_norm": 0.32939353585243225, + "learning_rate": 9.508639695986094e-06, + "loss": 0.0038, + "step": 71120 + }, + { + "epoch": 0.4562057255005083, + "grad_norm": 0.24936258792877197, + "learning_rate": 9.508397705746719e-06, + "loss": 0.003, + "step": 71130 + }, + { + "epoch": 0.4562698623942944, + "grad_norm": 0.23419713973999023, + "learning_rate": 9.5081556590139e-06, + "loss": 0.0035, + "step": 71140 + }, + { + "epoch": 0.4563339992880805, + "grad_norm": 0.5287534594535828, + "learning_rate": 9.507913555790666e-06, + "loss": 0.0066, + "step": 71150 + }, + { + "epoch": 0.4563981361818666, + "grad_norm": 0.2167656570672989, + "learning_rate": 9.507671396080054e-06, + "loss": 0.0051, + "step": 71160 + }, + { + "epoch": 0.4564622730756527, + "grad_norm": 0.034404855221509933, + "learning_rate": 9.5074291798851e-06, + "loss": 0.0056, + "step": 71170 + }, + { + "epoch": 0.4565264099694388, + "grad_norm": 0.46575120091438293, + "learning_rate": 9.507186907208834e-06, + "loss": 0.0059, + "step": 71180 + }, + { + "epoch": 0.45659054686322487, + "grad_norm": 0.11899673938751221, + "learning_rate": 9.506944578054295e-06, + "loss": 0.0038, + "step": 71190 + }, + { + "epoch": 0.45665468375701096, + "grad_norm": 0.48142024874687195, + "learning_rate": 9.506702192424522e-06, + "loss": 0.0041, + "step": 71200 + }, + { + "epoch": 0.45671882065079705, + "grad_norm": 0.15543551743030548, + "learning_rate": 9.506459750322548e-06, + "loss": 0.0052, + "step": 71210 + }, + { + "epoch": 0.45678295754458315, + "grad_norm": 0.1856965869665146, + "learning_rate": 9.506217251751411e-06, + "loss": 0.0046, + "step": 71220 + }, + { + "epoch": 0.45684709443836924, + "grad_norm": 0.23232631385326385, + "learning_rate": 9.505974696714153e-06, + "loss": 0.0043, + "step": 71230 + }, + { + "epoch": 0.45691123133215533, + "grad_norm": 0.23498240113258362, + "learning_rate": 9.505732085213812e-06, + "loss": 0.0029, + "step": 71240 + }, + { + "epoch": 0.4569753682259414, + "grad_norm": 0.10573287308216095, + "learning_rate": 9.505489417253428e-06, + "loss": 0.0042, + "step": 71250 + }, + { + "epoch": 0.4570395051197276, + "grad_norm": 0.13049164414405823, + "learning_rate": 9.505246692836041e-06, + "loss": 0.0021, + "step": 71260 + }, + { + "epoch": 0.45710364201351367, + "grad_norm": 0.24431076645851135, + "learning_rate": 9.505003911964692e-06, + "loss": 0.0035, + "step": 71270 + }, + { + "epoch": 0.45716777890729976, + "grad_norm": 0.14189817011356354, + "learning_rate": 9.504761074642426e-06, + "loss": 0.0035, + "step": 71280 + }, + { + "epoch": 0.45723191580108585, + "grad_norm": 0.08879505097866058, + "learning_rate": 9.504518180872283e-06, + "loss": 0.0041, + "step": 71290 + }, + { + "epoch": 0.45729605269487195, + "grad_norm": 0.177837073802948, + "learning_rate": 9.504275230657309e-06, + "loss": 0.0033, + "step": 71300 + }, + { + "epoch": 0.45736018958865804, + "grad_norm": 0.182501420378685, + "learning_rate": 9.504032224000546e-06, + "loss": 0.0035, + "step": 71310 + }, + { + "epoch": 0.45742432648244413, + "grad_norm": 0.1433417797088623, + "learning_rate": 9.503789160905042e-06, + "loss": 0.0064, + "step": 71320 + }, + { + "epoch": 0.4574884633762302, + "grad_norm": 0.009760375134646893, + "learning_rate": 9.503546041373838e-06, + "loss": 0.0046, + "step": 71330 + }, + { + "epoch": 0.4575526002700163, + "grad_norm": 0.040973514318466187, + "learning_rate": 9.503302865409987e-06, + "loss": 0.0029, + "step": 71340 + }, + { + "epoch": 0.4576167371638024, + "grad_norm": 0.057639192789793015, + "learning_rate": 9.503059633016529e-06, + "loss": 0.0026, + "step": 71350 + }, + { + "epoch": 0.4576808740575885, + "grad_norm": 0.10568902641534805, + "learning_rate": 9.502816344196517e-06, + "loss": 0.0032, + "step": 71360 + }, + { + "epoch": 0.4577450109513746, + "grad_norm": 0.2689015865325928, + "learning_rate": 9.502572998953e-06, + "loss": 0.0022, + "step": 71370 + }, + { + "epoch": 0.4578091478451607, + "grad_norm": 0.049804773181676865, + "learning_rate": 9.502329597289025e-06, + "loss": 0.0024, + "step": 71380 + }, + { + "epoch": 0.4578732847389468, + "grad_norm": 0.1805211454629898, + "learning_rate": 9.50208613920764e-06, + "loss": 0.004, + "step": 71390 + }, + { + "epoch": 0.45793742163273293, + "grad_norm": 0.1536896973848343, + "learning_rate": 9.501842624711899e-06, + "loss": 0.0053, + "step": 71400 + }, + { + "epoch": 0.458001558526519, + "grad_norm": 0.14408615231513977, + "learning_rate": 9.501599053804854e-06, + "loss": 0.0049, + "step": 71410 + }, + { + "epoch": 0.4580656954203051, + "grad_norm": 0.568912923336029, + "learning_rate": 9.501355426489553e-06, + "loss": 0.0025, + "step": 71420 + }, + { + "epoch": 0.4581298323140912, + "grad_norm": 0.1006404310464859, + "learning_rate": 9.501111742769054e-06, + "loss": 0.0027, + "step": 71430 + }, + { + "epoch": 0.4581939692078773, + "grad_norm": 0.010831396095454693, + "learning_rate": 9.500868002646407e-06, + "loss": 0.0052, + "step": 71440 + }, + { + "epoch": 0.4582581061016634, + "grad_norm": 0.3710175156593323, + "learning_rate": 9.500624206124667e-06, + "loss": 0.0062, + "step": 71450 + }, + { + "epoch": 0.4583222429954495, + "grad_norm": 0.15602272748947144, + "learning_rate": 9.50038035320689e-06, + "loss": 0.0079, + "step": 71460 + }, + { + "epoch": 0.4583863798892356, + "grad_norm": 0.11349541693925858, + "learning_rate": 9.50013644389613e-06, + "loss": 0.0045, + "step": 71470 + }, + { + "epoch": 0.4584505167830217, + "grad_norm": 0.13774773478507996, + "learning_rate": 9.499892478195444e-06, + "loss": 0.0029, + "step": 71480 + }, + { + "epoch": 0.45851465367680777, + "grad_norm": 0.10175567120313644, + "learning_rate": 9.49964845610789e-06, + "loss": 0.0029, + "step": 71490 + }, + { + "epoch": 0.45857879057059386, + "grad_norm": 0.14116734266281128, + "learning_rate": 9.499404377636523e-06, + "loss": 0.0051, + "step": 71500 + }, + { + "epoch": 0.45864292746437996, + "grad_norm": 0.13040600717067719, + "learning_rate": 9.499160242784406e-06, + "loss": 0.0026, + "step": 71510 + }, + { + "epoch": 0.45870706435816605, + "grad_norm": 0.21801449358463287, + "learning_rate": 9.498916051554595e-06, + "loss": 0.0035, + "step": 71520 + }, + { + "epoch": 0.45877120125195214, + "grad_norm": 0.34369441866874695, + "learning_rate": 9.49867180395015e-06, + "loss": 0.0026, + "step": 71530 + }, + { + "epoch": 0.45883533814573824, + "grad_norm": 0.18488413095474243, + "learning_rate": 9.49842749997413e-06, + "loss": 0.0035, + "step": 71540 + }, + { + "epoch": 0.4588994750395244, + "grad_norm": 0.14039179682731628, + "learning_rate": 9.498183139629602e-06, + "loss": 0.0031, + "step": 71550 + }, + { + "epoch": 0.4589636119333105, + "grad_norm": 0.18675455451011658, + "learning_rate": 9.497938722919623e-06, + "loss": 0.0049, + "step": 71560 + }, + { + "epoch": 0.45902774882709657, + "grad_norm": 0.1299862116575241, + "learning_rate": 9.497694249847258e-06, + "loss": 0.0025, + "step": 71570 + }, + { + "epoch": 0.45909188572088266, + "grad_norm": 0.16409076750278473, + "learning_rate": 9.497449720415568e-06, + "loss": 0.0033, + "step": 71580 + }, + { + "epoch": 0.45915602261466876, + "grad_norm": 0.19340041279792786, + "learning_rate": 9.497205134627621e-06, + "loss": 0.0023, + "step": 71590 + }, + { + "epoch": 0.45922015950845485, + "grad_norm": 0.11618074029684067, + "learning_rate": 9.496960492486478e-06, + "loss": 0.0033, + "step": 71600 + }, + { + "epoch": 0.45928429640224094, + "grad_norm": 0.3043084740638733, + "learning_rate": 9.496715793995206e-06, + "loss": 0.0064, + "step": 71610 + }, + { + "epoch": 0.45934843329602704, + "grad_norm": 0.10752145200967789, + "learning_rate": 9.496471039156871e-06, + "loss": 0.0021, + "step": 71620 + }, + { + "epoch": 0.45941257018981313, + "grad_norm": 0.08993718028068542, + "learning_rate": 9.49622622797454e-06, + "loss": 0.003, + "step": 71630 + }, + { + "epoch": 0.4594767070835992, + "grad_norm": 0.14545069634914398, + "learning_rate": 9.495981360451283e-06, + "loss": 0.0052, + "step": 71640 + }, + { + "epoch": 0.4595408439773853, + "grad_norm": 0.06524930894374847, + "learning_rate": 9.495736436590166e-06, + "loss": 0.0029, + "step": 71650 + }, + { + "epoch": 0.4596049808711714, + "grad_norm": 0.09713975340127945, + "learning_rate": 9.495491456394257e-06, + "loss": 0.0032, + "step": 71660 + }, + { + "epoch": 0.4596691177649575, + "grad_norm": 0.09874634444713593, + "learning_rate": 9.495246419866628e-06, + "loss": 0.0029, + "step": 71670 + }, + { + "epoch": 0.4597332546587436, + "grad_norm": 0.04133143648505211, + "learning_rate": 9.49500132701035e-06, + "loss": 0.0032, + "step": 71680 + }, + { + "epoch": 0.45979739155252974, + "grad_norm": 0.48549845814704895, + "learning_rate": 9.49475617782849e-06, + "loss": 0.0041, + "step": 71690 + }, + { + "epoch": 0.45986152844631584, + "grad_norm": 0.08148328214883804, + "learning_rate": 9.494510972324124e-06, + "loss": 0.004, + "step": 71700 + }, + { + "epoch": 0.45992566534010193, + "grad_norm": 0.2185431867837906, + "learning_rate": 9.494265710500324e-06, + "loss": 0.0033, + "step": 71710 + }, + { + "epoch": 0.459989802233888, + "grad_norm": 0.13162735104560852, + "learning_rate": 9.494020392360161e-06, + "loss": 0.0021, + "step": 71720 + }, + { + "epoch": 0.4600539391276741, + "grad_norm": 0.17970408499240875, + "learning_rate": 9.493775017906712e-06, + "loss": 0.0034, + "step": 71730 + }, + { + "epoch": 0.4601180760214602, + "grad_norm": 0.19356241822242737, + "learning_rate": 9.49352958714305e-06, + "loss": 0.0089, + "step": 71740 + }, + { + "epoch": 0.4601822129152463, + "grad_norm": 0.1996283084154129, + "learning_rate": 9.49328410007225e-06, + "loss": 0.0024, + "step": 71750 + }, + { + "epoch": 0.4602463498090324, + "grad_norm": 0.28187283873558044, + "learning_rate": 9.493038556697391e-06, + "loss": 0.0053, + "step": 71760 + }, + { + "epoch": 0.4603104867028185, + "grad_norm": 0.06501268595457077, + "learning_rate": 9.492792957021546e-06, + "loss": 0.0051, + "step": 71770 + }, + { + "epoch": 0.4603746235966046, + "grad_norm": 0.3292664885520935, + "learning_rate": 9.492547301047794e-06, + "loss": 0.0038, + "step": 71780 + }, + { + "epoch": 0.4604387604903907, + "grad_norm": 0.15045075118541718, + "learning_rate": 9.492301588779215e-06, + "loss": 0.003, + "step": 71790 + }, + { + "epoch": 0.46050289738417677, + "grad_norm": 0.07394632697105408, + "learning_rate": 9.492055820218886e-06, + "loss": 0.0035, + "step": 71800 + }, + { + "epoch": 0.46056703427796286, + "grad_norm": 0.09739391505718231, + "learning_rate": 9.491809995369888e-06, + "loss": 0.0034, + "step": 71810 + }, + { + "epoch": 0.46063117117174895, + "grad_norm": 0.11213880777359009, + "learning_rate": 9.491564114235299e-06, + "loss": 0.0037, + "step": 71820 + }, + { + "epoch": 0.4606953080655351, + "grad_norm": 0.10225711017847061, + "learning_rate": 9.491318176818203e-06, + "loss": 0.005, + "step": 71830 + }, + { + "epoch": 0.4607594449593212, + "grad_norm": 0.1804584562778473, + "learning_rate": 9.491072183121679e-06, + "loss": 0.0029, + "step": 71840 + }, + { + "epoch": 0.4608235818531073, + "grad_norm": 0.21691973507404327, + "learning_rate": 9.490826133148812e-06, + "loss": 0.007, + "step": 71850 + }, + { + "epoch": 0.4608877187468934, + "grad_norm": 0.40952345728874207, + "learning_rate": 9.490580026902683e-06, + "loss": 0.003, + "step": 71860 + }, + { + "epoch": 0.4609518556406795, + "grad_norm": 0.07483315467834473, + "learning_rate": 9.490333864386377e-06, + "loss": 0.0048, + "step": 71870 + }, + { + "epoch": 0.46101599253446557, + "grad_norm": 0.06613636016845703, + "learning_rate": 9.490087645602978e-06, + "loss": 0.0039, + "step": 71880 + }, + { + "epoch": 0.46108012942825166, + "grad_norm": 0.20446856319904327, + "learning_rate": 9.489841370555573e-06, + "loss": 0.0033, + "step": 71890 + }, + { + "epoch": 0.46114426632203775, + "grad_norm": 0.23011180758476257, + "learning_rate": 9.489595039247246e-06, + "loss": 0.0054, + "step": 71900 + }, + { + "epoch": 0.46120840321582385, + "grad_norm": 0.16415435075759888, + "learning_rate": 9.489348651681085e-06, + "loss": 0.0053, + "step": 71910 + }, + { + "epoch": 0.46127254010960994, + "grad_norm": 0.2752489149570465, + "learning_rate": 9.489102207860175e-06, + "loss": 0.0056, + "step": 71920 + }, + { + "epoch": 0.46133667700339603, + "grad_norm": 0.19223229587078094, + "learning_rate": 9.488855707787609e-06, + "loss": 0.0034, + "step": 71930 + }, + { + "epoch": 0.4614008138971821, + "grad_norm": 0.05069417878985405, + "learning_rate": 9.488609151466471e-06, + "loss": 0.0032, + "step": 71940 + }, + { + "epoch": 0.4614649507909682, + "grad_norm": 0.11428840458393097, + "learning_rate": 9.488362538899854e-06, + "loss": 0.0044, + "step": 71950 + }, + { + "epoch": 0.4615290876847543, + "grad_norm": 0.1510365903377533, + "learning_rate": 9.488115870090843e-06, + "loss": 0.0041, + "step": 71960 + }, + { + "epoch": 0.46159322457854046, + "grad_norm": 0.2194390743970871, + "learning_rate": 9.487869145042537e-06, + "loss": 0.0043, + "step": 71970 + }, + { + "epoch": 0.46165736147232656, + "grad_norm": 0.22613966464996338, + "learning_rate": 9.48762236375802e-06, + "loss": 0.0048, + "step": 71980 + }, + { + "epoch": 0.46172149836611265, + "grad_norm": 0.16205669939517975, + "learning_rate": 9.48737552624039e-06, + "loss": 0.0054, + "step": 71990 + }, + { + "epoch": 0.46178563525989874, + "grad_norm": 0.24870528280735016, + "learning_rate": 9.487128632492735e-06, + "loss": 0.0039, + "step": 72000 + }, + { + "epoch": 0.46184977215368483, + "grad_norm": 0.16427086293697357, + "learning_rate": 9.486881682518155e-06, + "loss": 0.0024, + "step": 72010 + }, + { + "epoch": 0.4619139090474709, + "grad_norm": 0.14098943769931793, + "learning_rate": 9.486634676319736e-06, + "loss": 0.0026, + "step": 72020 + }, + { + "epoch": 0.461978045941257, + "grad_norm": 0.13580282032489777, + "learning_rate": 9.48638761390058e-06, + "loss": 0.0052, + "step": 72030 + }, + { + "epoch": 0.4620421828350431, + "grad_norm": 0.14998015761375427, + "learning_rate": 9.486140495263783e-06, + "loss": 0.0026, + "step": 72040 + }, + { + "epoch": 0.4621063197288292, + "grad_norm": 0.11328137665987015, + "learning_rate": 9.485893320412438e-06, + "loss": 0.0027, + "step": 72050 + }, + { + "epoch": 0.4621704566226153, + "grad_norm": 0.15475858747959137, + "learning_rate": 9.485646089349643e-06, + "loss": 0.0054, + "step": 72060 + }, + { + "epoch": 0.4622345935164014, + "grad_norm": 0.1065763458609581, + "learning_rate": 9.485398802078497e-06, + "loss": 0.006, + "step": 72070 + }, + { + "epoch": 0.4622987304101875, + "grad_norm": 0.19568683207035065, + "learning_rate": 9.485151458602097e-06, + "loss": 0.0043, + "step": 72080 + }, + { + "epoch": 0.4623628673039736, + "grad_norm": 0.2813143730163574, + "learning_rate": 9.484904058923546e-06, + "loss": 0.0067, + "step": 72090 + }, + { + "epoch": 0.4624270041977597, + "grad_norm": 0.22191986441612244, + "learning_rate": 9.48465660304594e-06, + "loss": 0.0046, + "step": 72100 + }, + { + "epoch": 0.4624911410915458, + "grad_norm": 0.17017143964767456, + "learning_rate": 9.484409090972384e-06, + "loss": 0.0028, + "step": 72110 + }, + { + "epoch": 0.4625552779853319, + "grad_norm": 0.11975177377462387, + "learning_rate": 9.484161522705975e-06, + "loss": 0.0036, + "step": 72120 + }, + { + "epoch": 0.462619414879118, + "grad_norm": 0.04267343133687973, + "learning_rate": 9.483913898249817e-06, + "loss": 0.003, + "step": 72130 + }, + { + "epoch": 0.4626835517729041, + "grad_norm": 0.035006728023290634, + "learning_rate": 9.483666217607015e-06, + "loss": 0.0039, + "step": 72140 + }, + { + "epoch": 0.4627476886666902, + "grad_norm": 0.09996423125267029, + "learning_rate": 9.48341848078067e-06, + "loss": 0.0024, + "step": 72150 + }, + { + "epoch": 0.4628118255604763, + "grad_norm": 0.44871264696121216, + "learning_rate": 9.483170687773888e-06, + "loss": 0.0049, + "step": 72160 + }, + { + "epoch": 0.4628759624542624, + "grad_norm": 0.10117613524198532, + "learning_rate": 9.482922838589772e-06, + "loss": 0.0034, + "step": 72170 + }, + { + "epoch": 0.4629400993480485, + "grad_norm": 0.1156907007098198, + "learning_rate": 9.482674933231428e-06, + "loss": 0.0025, + "step": 72180 + }, + { + "epoch": 0.46300423624183457, + "grad_norm": 0.05373195931315422, + "learning_rate": 9.482426971701966e-06, + "loss": 0.0032, + "step": 72190 + }, + { + "epoch": 0.46306837313562066, + "grad_norm": 0.24135103821754456, + "learning_rate": 9.482178954004488e-06, + "loss": 0.0033, + "step": 72200 + }, + { + "epoch": 0.46313251002940675, + "grad_norm": 0.2547978162765503, + "learning_rate": 9.481930880142107e-06, + "loss": 0.0041, + "step": 72210 + }, + { + "epoch": 0.46319664692319285, + "grad_norm": 0.27852529287338257, + "learning_rate": 9.481682750117926e-06, + "loss": 0.0038, + "step": 72220 + }, + { + "epoch": 0.46326078381697894, + "grad_norm": 0.12560653686523438, + "learning_rate": 9.48143456393506e-06, + "loss": 0.006, + "step": 72230 + }, + { + "epoch": 0.46332492071076503, + "grad_norm": 0.15211427211761475, + "learning_rate": 9.481186321596614e-06, + "loss": 0.005, + "step": 72240 + }, + { + "epoch": 0.4633890576045512, + "grad_norm": 0.16033463180065155, + "learning_rate": 9.480938023105702e-06, + "loss": 0.0031, + "step": 72250 + }, + { + "epoch": 0.4634531944983373, + "grad_norm": 0.1595131754875183, + "learning_rate": 9.480689668465433e-06, + "loss": 0.0039, + "step": 72260 + }, + { + "epoch": 0.46351733139212337, + "grad_norm": 0.27040454745292664, + "learning_rate": 9.48044125767892e-06, + "loss": 0.0038, + "step": 72270 + }, + { + "epoch": 0.46358146828590946, + "grad_norm": 0.37454044818878174, + "learning_rate": 9.480192790749277e-06, + "loss": 0.0038, + "step": 72280 + }, + { + "epoch": 0.46364560517969555, + "grad_norm": 0.16823004186153412, + "learning_rate": 9.479944267679617e-06, + "loss": 0.0041, + "step": 72290 + }, + { + "epoch": 0.46370974207348165, + "grad_norm": 0.07666993141174316, + "learning_rate": 9.479695688473051e-06, + "loss": 0.0029, + "step": 72300 + }, + { + "epoch": 0.46377387896726774, + "grad_norm": 0.2639802098274231, + "learning_rate": 9.4794470531327e-06, + "loss": 0.0052, + "step": 72310 + }, + { + "epoch": 0.46383801586105383, + "grad_norm": 0.05207554250955582, + "learning_rate": 9.479198361661673e-06, + "loss": 0.0041, + "step": 72320 + }, + { + "epoch": 0.4639021527548399, + "grad_norm": 0.11141420155763626, + "learning_rate": 9.47894961406309e-06, + "loss": 0.003, + "step": 72330 + }, + { + "epoch": 0.463966289648626, + "grad_norm": 0.16782517731189728, + "learning_rate": 9.478700810340067e-06, + "loss": 0.0045, + "step": 72340 + }, + { + "epoch": 0.4640304265424121, + "grad_norm": 0.10479758679866791, + "learning_rate": 9.478451950495725e-06, + "loss": 0.0034, + "step": 72350 + }, + { + "epoch": 0.4640945634361982, + "grad_norm": 0.1993977129459381, + "learning_rate": 9.478203034533176e-06, + "loss": 0.0031, + "step": 72360 + }, + { + "epoch": 0.4641587003299843, + "grad_norm": 0.12023179978132248, + "learning_rate": 9.477954062455543e-06, + "loss": 0.0032, + "step": 72370 + }, + { + "epoch": 0.4642228372237704, + "grad_norm": 0.1690799742937088, + "learning_rate": 9.477705034265945e-06, + "loss": 0.0033, + "step": 72380 + }, + { + "epoch": 0.46428697411755654, + "grad_norm": 0.15061965584754944, + "learning_rate": 9.477455949967504e-06, + "loss": 0.0025, + "step": 72390 + }, + { + "epoch": 0.46435111101134263, + "grad_norm": 0.1160641685128212, + "learning_rate": 9.47720680956334e-06, + "loss": 0.0028, + "step": 72400 + }, + { + "epoch": 0.4644152479051287, + "grad_norm": 0.4018762409687042, + "learning_rate": 9.476957613056574e-06, + "loss": 0.0042, + "step": 72410 + }, + { + "epoch": 0.4644793847989148, + "grad_norm": 0.22380676865577698, + "learning_rate": 9.476708360450328e-06, + "loss": 0.0041, + "step": 72420 + }, + { + "epoch": 0.4645435216927009, + "grad_norm": 0.19054381549358368, + "learning_rate": 9.476459051747729e-06, + "loss": 0.004, + "step": 72430 + }, + { + "epoch": 0.464607658586487, + "grad_norm": 0.1574215590953827, + "learning_rate": 9.476209686951898e-06, + "loss": 0.0042, + "step": 72440 + }, + { + "epoch": 0.4646717954802731, + "grad_norm": 0.20319758355617523, + "learning_rate": 9.475960266065962e-06, + "loss": 0.0053, + "step": 72450 + }, + { + "epoch": 0.4647359323740592, + "grad_norm": 0.25375211238861084, + "learning_rate": 9.475710789093043e-06, + "loss": 0.0024, + "step": 72460 + }, + { + "epoch": 0.4648000692678453, + "grad_norm": 0.2655993402004242, + "learning_rate": 9.475461256036268e-06, + "loss": 0.006, + "step": 72470 + }, + { + "epoch": 0.4648642061616314, + "grad_norm": 0.05914941430091858, + "learning_rate": 9.475211666898769e-06, + "loss": 0.0029, + "step": 72480 + }, + { + "epoch": 0.46492834305541747, + "grad_norm": 0.18190039694309235, + "learning_rate": 9.474962021683667e-06, + "loss": 0.0034, + "step": 72490 + }, + { + "epoch": 0.46499247994920356, + "grad_norm": 0.1939253956079483, + "learning_rate": 9.474712320394092e-06, + "loss": 0.004, + "step": 72500 + }, + { + "epoch": 0.46505661684298966, + "grad_norm": 0.0900583490729332, + "learning_rate": 9.474462563033174e-06, + "loss": 0.0034, + "step": 72510 + }, + { + "epoch": 0.46512075373677575, + "grad_norm": 0.15111422538757324, + "learning_rate": 9.474212749604044e-06, + "loss": 0.004, + "step": 72520 + }, + { + "epoch": 0.4651848906305619, + "grad_norm": 0.05144800618290901, + "learning_rate": 9.473962880109828e-06, + "loss": 0.0026, + "step": 72530 + }, + { + "epoch": 0.465249027524348, + "grad_norm": 0.0993695855140686, + "learning_rate": 9.473712954553661e-06, + "loss": 0.0028, + "step": 72540 + }, + { + "epoch": 0.4653131644181341, + "grad_norm": 0.2637014389038086, + "learning_rate": 9.473462972938673e-06, + "loss": 0.0049, + "step": 72550 + }, + { + "epoch": 0.4653773013119202, + "grad_norm": 0.031208178028464317, + "learning_rate": 9.473212935267997e-06, + "loss": 0.003, + "step": 72560 + }, + { + "epoch": 0.46544143820570627, + "grad_norm": 0.18791769444942474, + "learning_rate": 9.472962841544767e-06, + "loss": 0.0028, + "step": 72570 + }, + { + "epoch": 0.46550557509949236, + "grad_norm": 0.25973379611968994, + "learning_rate": 9.472712691772114e-06, + "loss": 0.0031, + "step": 72580 + }, + { + "epoch": 0.46556971199327846, + "grad_norm": 0.4147563874721527, + "learning_rate": 9.472462485953175e-06, + "loss": 0.0032, + "step": 72590 + }, + { + "epoch": 0.46563384888706455, + "grad_norm": 0.1889244019985199, + "learning_rate": 9.472212224091084e-06, + "loss": 0.0052, + "step": 72600 + }, + { + "epoch": 0.46569798578085064, + "grad_norm": 0.14517726004123688, + "learning_rate": 9.47196190618898e-06, + "loss": 0.0022, + "step": 72610 + }, + { + "epoch": 0.46576212267463674, + "grad_norm": 0.18792518973350525, + "learning_rate": 9.471711532249994e-06, + "loss": 0.0037, + "step": 72620 + }, + { + "epoch": 0.46582625956842283, + "grad_norm": 0.10088556259870529, + "learning_rate": 9.471461102277269e-06, + "loss": 0.0047, + "step": 72630 + }, + { + "epoch": 0.4658903964622089, + "grad_norm": 0.055122584104537964, + "learning_rate": 9.471210616273941e-06, + "loss": 0.0042, + "step": 72640 + }, + { + "epoch": 0.465954533355995, + "grad_norm": 0.24400022625923157, + "learning_rate": 9.470960074243146e-06, + "loss": 0.0039, + "step": 72650 + }, + { + "epoch": 0.4660186702497811, + "grad_norm": 0.22660301625728607, + "learning_rate": 9.470709476188027e-06, + "loss": 0.0022, + "step": 72660 + }, + { + "epoch": 0.46608280714356726, + "grad_norm": 0.2594574987888336, + "learning_rate": 9.470458822111724e-06, + "loss": 0.005, + "step": 72670 + }, + { + "epoch": 0.46614694403735335, + "grad_norm": 0.21476195752620697, + "learning_rate": 9.470208112017376e-06, + "loss": 0.0092, + "step": 72680 + }, + { + "epoch": 0.46621108093113944, + "grad_norm": 0.1171347126364708, + "learning_rate": 9.469957345908125e-06, + "loss": 0.0039, + "step": 72690 + }, + { + "epoch": 0.46627521782492554, + "grad_norm": 0.2394641488790512, + "learning_rate": 9.469706523787116e-06, + "loss": 0.0039, + "step": 72700 + }, + { + "epoch": 0.46633935471871163, + "grad_norm": 0.1809658259153366, + "learning_rate": 9.469455645657488e-06, + "loss": 0.0035, + "step": 72710 + }, + { + "epoch": 0.4664034916124977, + "grad_norm": 0.0744139775633812, + "learning_rate": 9.469204711522387e-06, + "loss": 0.0036, + "step": 72720 + }, + { + "epoch": 0.4664676285062838, + "grad_norm": 0.14177826046943665, + "learning_rate": 9.468953721384957e-06, + "loss": 0.0029, + "step": 72730 + }, + { + "epoch": 0.4665317654000699, + "grad_norm": 0.1294241100549698, + "learning_rate": 9.468702675248342e-06, + "loss": 0.0029, + "step": 72740 + }, + { + "epoch": 0.466595902293856, + "grad_norm": 0.07392989099025726, + "learning_rate": 9.46845157311569e-06, + "loss": 0.0034, + "step": 72750 + }, + { + "epoch": 0.4666600391876421, + "grad_norm": 0.1666719615459442, + "learning_rate": 9.468200414990147e-06, + "loss": 0.0026, + "step": 72760 + }, + { + "epoch": 0.4667241760814282, + "grad_norm": 0.22215905785560608, + "learning_rate": 9.467949200874858e-06, + "loss": 0.0053, + "step": 72770 + }, + { + "epoch": 0.4667883129752143, + "grad_norm": 0.11765000224113464, + "learning_rate": 9.467697930772972e-06, + "loss": 0.0035, + "step": 72780 + }, + { + "epoch": 0.4668524498690004, + "grad_norm": 0.5070384740829468, + "learning_rate": 9.467446604687639e-06, + "loss": 0.0048, + "step": 72790 + }, + { + "epoch": 0.46691658676278647, + "grad_norm": 0.20242191851139069, + "learning_rate": 9.467195222622005e-06, + "loss": 0.0021, + "step": 72800 + }, + { + "epoch": 0.4669807236565726, + "grad_norm": 0.09090352803468704, + "learning_rate": 9.466943784579226e-06, + "loss": 0.0032, + "step": 72810 + }, + { + "epoch": 0.4670448605503587, + "grad_norm": 0.02958475984632969, + "learning_rate": 9.466692290562445e-06, + "loss": 0.003, + "step": 72820 + }, + { + "epoch": 0.4671089974441448, + "grad_norm": 0.20340225100517273, + "learning_rate": 9.46644074057482e-06, + "loss": 0.0037, + "step": 72830 + }, + { + "epoch": 0.4671731343379309, + "grad_norm": 0.22365257143974304, + "learning_rate": 9.4661891346195e-06, + "loss": 0.0046, + "step": 72840 + }, + { + "epoch": 0.467237271231717, + "grad_norm": 0.18572106957435608, + "learning_rate": 9.465937472699638e-06, + "loss": 0.0042, + "step": 72850 + }, + { + "epoch": 0.4673014081255031, + "grad_norm": 0.2335018366575241, + "learning_rate": 9.465685754818387e-06, + "loss": 0.0038, + "step": 72860 + }, + { + "epoch": 0.4673655450192892, + "grad_norm": 0.15035131573677063, + "learning_rate": 9.465433980978902e-06, + "loss": 0.0032, + "step": 72870 + }, + { + "epoch": 0.46742968191307527, + "grad_norm": 0.43644216656684875, + "learning_rate": 9.465182151184337e-06, + "loss": 0.0041, + "step": 72880 + }, + { + "epoch": 0.46749381880686136, + "grad_norm": 0.15509265661239624, + "learning_rate": 9.46493026543785e-06, + "loss": 0.0032, + "step": 72890 + }, + { + "epoch": 0.46755795570064745, + "grad_norm": 0.16943609714508057, + "learning_rate": 9.464678323742595e-06, + "loss": 0.0026, + "step": 72900 + }, + { + "epoch": 0.46762209259443355, + "grad_norm": 0.05730758234858513, + "learning_rate": 9.46442632610173e-06, + "loss": 0.0045, + "step": 72910 + }, + { + "epoch": 0.46768622948821964, + "grad_norm": 0.1519549936056137, + "learning_rate": 9.464174272518414e-06, + "loss": 0.0036, + "step": 72920 + }, + { + "epoch": 0.46775036638200573, + "grad_norm": 0.2133433073759079, + "learning_rate": 9.463922162995801e-06, + "loss": 0.0047, + "step": 72930 + }, + { + "epoch": 0.4678145032757918, + "grad_norm": 0.11244549602270126, + "learning_rate": 9.463669997537055e-06, + "loss": 0.004, + "step": 72940 + }, + { + "epoch": 0.4678786401695779, + "grad_norm": 0.2576900124549866, + "learning_rate": 9.463417776145334e-06, + "loss": 0.0038, + "step": 72950 + }, + { + "epoch": 0.46794277706336407, + "grad_norm": 0.079642154276371, + "learning_rate": 9.463165498823797e-06, + "loss": 0.0071, + "step": 72960 + }, + { + "epoch": 0.46800691395715016, + "grad_norm": 0.24542100727558136, + "learning_rate": 9.462913165575606e-06, + "loss": 0.0048, + "step": 72970 + }, + { + "epoch": 0.46807105085093625, + "grad_norm": 0.14651556313037872, + "learning_rate": 9.462660776403924e-06, + "loss": 0.0042, + "step": 72980 + }, + { + "epoch": 0.46813518774472235, + "grad_norm": 0.00939899031072855, + "learning_rate": 9.462408331311914e-06, + "loss": 0.004, + "step": 72990 + }, + { + "epoch": 0.46819932463850844, + "grad_norm": 0.31788161396980286, + "learning_rate": 9.462155830302738e-06, + "loss": 0.003, + "step": 73000 + }, + { + "epoch": 0.46826346153229453, + "grad_norm": 0.15321850776672363, + "learning_rate": 9.46190327337956e-06, + "loss": 0.0031, + "step": 73010 + }, + { + "epoch": 0.4683275984260806, + "grad_norm": 0.4166426658630371, + "learning_rate": 9.461650660545547e-06, + "loss": 0.0029, + "step": 73020 + }, + { + "epoch": 0.4683917353198667, + "grad_norm": 0.06505458801984787, + "learning_rate": 9.46139799180386e-06, + "loss": 0.003, + "step": 73030 + }, + { + "epoch": 0.4684558722136528, + "grad_norm": 0.46712160110473633, + "learning_rate": 9.46114526715767e-06, + "loss": 0.0048, + "step": 73040 + }, + { + "epoch": 0.4685200091074389, + "grad_norm": 0.09600020200014114, + "learning_rate": 9.460892486610138e-06, + "loss": 0.0046, + "step": 73050 + }, + { + "epoch": 0.468584146001225, + "grad_norm": 0.1537150740623474, + "learning_rate": 9.460639650164439e-06, + "loss": 0.0032, + "step": 73060 + }, + { + "epoch": 0.4686482828950111, + "grad_norm": 0.3263317346572876, + "learning_rate": 9.460386757823734e-06, + "loss": 0.0053, + "step": 73070 + }, + { + "epoch": 0.4687124197887972, + "grad_norm": 0.17880694568157196, + "learning_rate": 9.460133809591197e-06, + "loss": 0.0035, + "step": 73080 + }, + { + "epoch": 0.4687765566825833, + "grad_norm": 0.0824461504817009, + "learning_rate": 9.459880805469994e-06, + "loss": 0.0036, + "step": 73090 + }, + { + "epoch": 0.4688406935763694, + "grad_norm": 0.17056573927402496, + "learning_rate": 9.459627745463298e-06, + "loss": 0.0053, + "step": 73100 + }, + { + "epoch": 0.4689048304701555, + "grad_norm": 0.6064552664756775, + "learning_rate": 9.459374629574279e-06, + "loss": 0.0034, + "step": 73110 + }, + { + "epoch": 0.4689689673639416, + "grad_norm": 0.06469376385211945, + "learning_rate": 9.45912145780611e-06, + "loss": 0.0037, + "step": 73120 + }, + { + "epoch": 0.4690331042577277, + "grad_norm": 0.09984765201807022, + "learning_rate": 9.458868230161962e-06, + "loss": 0.0059, + "step": 73130 + }, + { + "epoch": 0.4690972411515138, + "grad_norm": 0.1453811526298523, + "learning_rate": 9.458614946645006e-06, + "loss": 0.0041, + "step": 73140 + }, + { + "epoch": 0.4691613780452999, + "grad_norm": 0.07419601827859879, + "learning_rate": 9.45836160725842e-06, + "loss": 0.0043, + "step": 73150 + }, + { + "epoch": 0.469225514939086, + "grad_norm": 0.2962948679924011, + "learning_rate": 9.458108212005378e-06, + "loss": 0.0033, + "step": 73160 + }, + { + "epoch": 0.4692896518328721, + "grad_norm": 0.3422074615955353, + "learning_rate": 9.457854760889052e-06, + "loss": 0.0047, + "step": 73170 + }, + { + "epoch": 0.46935378872665817, + "grad_norm": 0.13518819212913513, + "learning_rate": 9.45760125391262e-06, + "loss": 0.004, + "step": 73180 + }, + { + "epoch": 0.46941792562044427, + "grad_norm": 0.053319867700338364, + "learning_rate": 9.457347691079259e-06, + "loss": 0.002, + "step": 73190 + }, + { + "epoch": 0.46948206251423036, + "grad_norm": 0.11041786521673203, + "learning_rate": 9.457094072392145e-06, + "loss": 0.0039, + "step": 73200 + }, + { + "epoch": 0.46954619940801645, + "grad_norm": 0.16768397390842438, + "learning_rate": 9.456840397854459e-06, + "loss": 0.0059, + "step": 73210 + }, + { + "epoch": 0.46961033630180254, + "grad_norm": 0.15237963199615479, + "learning_rate": 9.456586667469376e-06, + "loss": 0.0045, + "step": 73220 + }, + { + "epoch": 0.46967447319558864, + "grad_norm": 0.12915267050266266, + "learning_rate": 9.456332881240077e-06, + "loss": 0.0047, + "step": 73230 + }, + { + "epoch": 0.4697386100893748, + "grad_norm": 0.1947353184223175, + "learning_rate": 9.456079039169743e-06, + "loss": 0.0083, + "step": 73240 + }, + { + "epoch": 0.4698027469831609, + "grad_norm": 0.1648906022310257, + "learning_rate": 9.455825141261552e-06, + "loss": 0.004, + "step": 73250 + }, + { + "epoch": 0.469866883876947, + "grad_norm": 0.15711960196495056, + "learning_rate": 9.455571187518689e-06, + "loss": 0.0019, + "step": 73260 + }, + { + "epoch": 0.46993102077073307, + "grad_norm": 0.12882910668849945, + "learning_rate": 9.455317177944335e-06, + "loss": 0.0039, + "step": 73270 + }, + { + "epoch": 0.46999515766451916, + "grad_norm": 0.21856532990932465, + "learning_rate": 9.455063112541672e-06, + "loss": 0.0052, + "step": 73280 + }, + { + "epoch": 0.47005929455830525, + "grad_norm": 0.1816015988588333, + "learning_rate": 9.454808991313883e-06, + "loss": 0.0029, + "step": 73290 + }, + { + "epoch": 0.47012343145209134, + "grad_norm": 0.05643494427204132, + "learning_rate": 9.454554814264155e-06, + "loss": 0.0041, + "step": 73300 + }, + { + "epoch": 0.47018756834587744, + "grad_norm": 0.05270713195204735, + "learning_rate": 9.45430058139567e-06, + "loss": 0.0022, + "step": 73310 + }, + { + "epoch": 0.47025170523966353, + "grad_norm": 0.11443763971328735, + "learning_rate": 9.454046292711617e-06, + "loss": 0.0027, + "step": 73320 + }, + { + "epoch": 0.4703158421334496, + "grad_norm": 0.40134239196777344, + "learning_rate": 9.453791948215181e-06, + "loss": 0.0033, + "step": 73330 + }, + { + "epoch": 0.4703799790272357, + "grad_norm": 0.05452294647693634, + "learning_rate": 9.453537547909547e-06, + "loss": 0.0037, + "step": 73340 + }, + { + "epoch": 0.4704441159210218, + "grad_norm": 0.22592099010944366, + "learning_rate": 9.453283091797905e-06, + "loss": 0.0029, + "step": 73350 + }, + { + "epoch": 0.4705082528148079, + "grad_norm": 0.12825137376785278, + "learning_rate": 9.453028579883446e-06, + "loss": 0.0042, + "step": 73360 + }, + { + "epoch": 0.470572389708594, + "grad_norm": 0.323760062456131, + "learning_rate": 9.452774012169352e-06, + "loss": 0.0038, + "step": 73370 + }, + { + "epoch": 0.47063652660238015, + "grad_norm": 0.07360520958900452, + "learning_rate": 9.45251938865882e-06, + "loss": 0.0024, + "step": 73380 + }, + { + "epoch": 0.47070066349616624, + "grad_norm": 0.19476427137851715, + "learning_rate": 9.452264709355037e-06, + "loss": 0.0031, + "step": 73390 + }, + { + "epoch": 0.47076480038995233, + "grad_norm": 0.11744065582752228, + "learning_rate": 9.452009974261196e-06, + "loss": 0.0023, + "step": 73400 + }, + { + "epoch": 0.4708289372837384, + "grad_norm": 0.5189984440803528, + "learning_rate": 9.451755183380487e-06, + "loss": 0.0033, + "step": 73410 + }, + { + "epoch": 0.4708930741775245, + "grad_norm": 0.20697681605815887, + "learning_rate": 9.451500336716106e-06, + "loss": 0.0029, + "step": 73420 + }, + { + "epoch": 0.4709572110713106, + "grad_norm": 0.1401924192905426, + "learning_rate": 9.45124543427124e-06, + "loss": 0.0034, + "step": 73430 + }, + { + "epoch": 0.4710213479650967, + "grad_norm": 0.16114145517349243, + "learning_rate": 9.450990476049092e-06, + "loss": 0.0029, + "step": 73440 + }, + { + "epoch": 0.4710854848588828, + "grad_norm": 0.43921709060668945, + "learning_rate": 9.45073546205285e-06, + "loss": 0.0028, + "step": 73450 + }, + { + "epoch": 0.4711496217526689, + "grad_norm": 0.09842345863580704, + "learning_rate": 9.450480392285714e-06, + "loss": 0.0026, + "step": 73460 + }, + { + "epoch": 0.471213758646455, + "grad_norm": 0.0862642303109169, + "learning_rate": 9.450225266750877e-06, + "loss": 0.0023, + "step": 73470 + }, + { + "epoch": 0.4712778955402411, + "grad_norm": 0.09110996127128601, + "learning_rate": 9.449970085451535e-06, + "loss": 0.0126, + "step": 73480 + }, + { + "epoch": 0.47134203243402717, + "grad_norm": 0.08711467683315277, + "learning_rate": 9.449714848390889e-06, + "loss": 0.0029, + "step": 73490 + }, + { + "epoch": 0.47140616932781326, + "grad_norm": 0.14931470155715942, + "learning_rate": 9.449459555572135e-06, + "loss": 0.0018, + "step": 73500 + }, + { + "epoch": 0.47147030622159936, + "grad_norm": 0.3300265967845917, + "learning_rate": 9.449204206998474e-06, + "loss": 0.0043, + "step": 73510 + }, + { + "epoch": 0.4715344431153855, + "grad_norm": 0.19805078208446503, + "learning_rate": 9.448948802673103e-06, + "loss": 0.0037, + "step": 73520 + }, + { + "epoch": 0.4715985800091716, + "grad_norm": 0.07131168246269226, + "learning_rate": 9.448693342599225e-06, + "loss": 0.0028, + "step": 73530 + }, + { + "epoch": 0.4716627169029577, + "grad_norm": 0.14850689470767975, + "learning_rate": 9.448437826780041e-06, + "loss": 0.0022, + "step": 73540 + }, + { + "epoch": 0.4717268537967438, + "grad_norm": 0.1711905300617218, + "learning_rate": 9.44818225521875e-06, + "loss": 0.0043, + "step": 73550 + }, + { + "epoch": 0.4717909906905299, + "grad_norm": 1.425281286239624, + "learning_rate": 9.447926627918557e-06, + "loss": 0.0063, + "step": 73560 + }, + { + "epoch": 0.47185512758431597, + "grad_norm": 0.1310764104127884, + "learning_rate": 9.447670944882663e-06, + "loss": 0.0033, + "step": 73570 + }, + { + "epoch": 0.47191926447810206, + "grad_norm": 0.24520087242126465, + "learning_rate": 9.447415206114275e-06, + "loss": 0.0046, + "step": 73580 + }, + { + "epoch": 0.47198340137188816, + "grad_norm": 0.2960132956504822, + "learning_rate": 9.447159411616595e-06, + "loss": 0.0069, + "step": 73590 + }, + { + "epoch": 0.47204753826567425, + "grad_norm": 0.12461918592453003, + "learning_rate": 9.44690356139283e-06, + "loss": 0.0038, + "step": 73600 + }, + { + "epoch": 0.47211167515946034, + "grad_norm": 0.08798740059137344, + "learning_rate": 9.446647655446186e-06, + "loss": 0.0036, + "step": 73610 + }, + { + "epoch": 0.47217581205324644, + "grad_norm": 0.05591832101345062, + "learning_rate": 9.446391693779868e-06, + "loss": 0.0087, + "step": 73620 + }, + { + "epoch": 0.47223994894703253, + "grad_norm": 0.13037869334220886, + "learning_rate": 9.446135676397084e-06, + "loss": 0.0032, + "step": 73630 + }, + { + "epoch": 0.4723040858408186, + "grad_norm": 0.20414581894874573, + "learning_rate": 9.445879603301043e-06, + "loss": 0.0044, + "step": 73640 + }, + { + "epoch": 0.4723682227346047, + "grad_norm": 0.2094263732433319, + "learning_rate": 9.445623474494951e-06, + "loss": 0.0052, + "step": 73650 + }, + { + "epoch": 0.47243235962839086, + "grad_norm": 0.08255743235349655, + "learning_rate": 9.445367289982022e-06, + "loss": 0.0033, + "step": 73660 + }, + { + "epoch": 0.47249649652217696, + "grad_norm": 0.024607084691524506, + "learning_rate": 9.445111049765463e-06, + "loss": 0.0033, + "step": 73670 + }, + { + "epoch": 0.47256063341596305, + "grad_norm": 0.029220154508948326, + "learning_rate": 9.444854753848485e-06, + "loss": 0.0034, + "step": 73680 + }, + { + "epoch": 0.47262477030974914, + "grad_norm": 0.13529035449028015, + "learning_rate": 9.444598402234302e-06, + "loss": 0.0028, + "step": 73690 + }, + { + "epoch": 0.47268890720353524, + "grad_norm": 0.03135405853390694, + "learning_rate": 9.444341994926122e-06, + "loss": 0.0059, + "step": 73700 + }, + { + "epoch": 0.47275304409732133, + "grad_norm": 0.2813445031642914, + "learning_rate": 9.444085531927162e-06, + "loss": 0.0031, + "step": 73710 + }, + { + "epoch": 0.4728171809911074, + "grad_norm": 0.2533155679702759, + "learning_rate": 9.443829013240635e-06, + "loss": 0.0041, + "step": 73720 + }, + { + "epoch": 0.4728813178848935, + "grad_norm": 0.5630262494087219, + "learning_rate": 9.443572438869754e-06, + "loss": 0.0036, + "step": 73730 + }, + { + "epoch": 0.4729454547786796, + "grad_norm": 0.27068030834198, + "learning_rate": 9.443315808817735e-06, + "loss": 0.0047, + "step": 73740 + }, + { + "epoch": 0.4730095916724657, + "grad_norm": 0.2189641296863556, + "learning_rate": 9.443059123087793e-06, + "loss": 0.0023, + "step": 73750 + }, + { + "epoch": 0.4730737285662518, + "grad_norm": 0.09532841295003891, + "learning_rate": 9.442802381683144e-06, + "loss": 0.0029, + "step": 73760 + }, + { + "epoch": 0.4731378654600379, + "grad_norm": 0.10535168647766113, + "learning_rate": 9.442545584607005e-06, + "loss": 0.0033, + "step": 73770 + }, + { + "epoch": 0.473202002353824, + "grad_norm": 0.11029849946498871, + "learning_rate": 9.442288731862597e-06, + "loss": 0.0037, + "step": 73780 + }, + { + "epoch": 0.4732661392476101, + "grad_norm": 0.0792105495929718, + "learning_rate": 9.442031823453134e-06, + "loss": 0.0025, + "step": 73790 + }, + { + "epoch": 0.4733302761413962, + "grad_norm": 0.10897326469421387, + "learning_rate": 9.441774859381841e-06, + "loss": 0.003, + "step": 73800 + }, + { + "epoch": 0.4733944130351823, + "grad_norm": 0.05002043768763542, + "learning_rate": 9.441517839651932e-06, + "loss": 0.0034, + "step": 73810 + }, + { + "epoch": 0.4734585499289684, + "grad_norm": 0.039975326508283615, + "learning_rate": 9.441260764266632e-06, + "loss": 0.0036, + "step": 73820 + }, + { + "epoch": 0.4735226868227545, + "grad_norm": 0.14692874252796173, + "learning_rate": 9.441003633229158e-06, + "loss": 0.0061, + "step": 73830 + }, + { + "epoch": 0.4735868237165406, + "grad_norm": 0.31991007924079895, + "learning_rate": 9.440746446542736e-06, + "loss": 0.0031, + "step": 73840 + }, + { + "epoch": 0.4736509606103267, + "grad_norm": 0.1329358071088791, + "learning_rate": 9.440489204210588e-06, + "loss": 0.0038, + "step": 73850 + }, + { + "epoch": 0.4737150975041128, + "grad_norm": 0.16461928188800812, + "learning_rate": 9.440231906235935e-06, + "loss": 0.0035, + "step": 73860 + }, + { + "epoch": 0.4737792343978989, + "grad_norm": 0.13529208302497864, + "learning_rate": 9.439974552622003e-06, + "loss": 0.0042, + "step": 73870 + }, + { + "epoch": 0.47384337129168497, + "grad_norm": 0.4698379337787628, + "learning_rate": 9.439717143372017e-06, + "loss": 0.0074, + "step": 73880 + }, + { + "epoch": 0.47390750818547106, + "grad_norm": 0.2502039968967438, + "learning_rate": 9.439459678489203e-06, + "loss": 0.0037, + "step": 73890 + }, + { + "epoch": 0.47397164507925715, + "grad_norm": 0.06690411269664764, + "learning_rate": 9.439202157976786e-06, + "loss": 0.0031, + "step": 73900 + }, + { + "epoch": 0.47403578197304325, + "grad_norm": 0.27009642124176025, + "learning_rate": 9.438944581837993e-06, + "loss": 0.0027, + "step": 73910 + }, + { + "epoch": 0.47409991886682934, + "grad_norm": 0.12579452991485596, + "learning_rate": 9.438686950076052e-06, + "loss": 0.0061, + "step": 73920 + }, + { + "epoch": 0.47416405576061543, + "grad_norm": 0.2248860001564026, + "learning_rate": 9.43842926269419e-06, + "loss": 0.0099, + "step": 73930 + }, + { + "epoch": 0.4742281926544016, + "grad_norm": 0.23569662868976593, + "learning_rate": 9.43817151969564e-06, + "loss": 0.0067, + "step": 73940 + }, + { + "epoch": 0.4742923295481877, + "grad_norm": 0.055896393954753876, + "learning_rate": 9.437913721083628e-06, + "loss": 0.0024, + "step": 73950 + }, + { + "epoch": 0.47435646644197377, + "grad_norm": 0.043094929307699203, + "learning_rate": 9.437655866861383e-06, + "loss": 0.0037, + "step": 73960 + }, + { + "epoch": 0.47442060333575986, + "grad_norm": 0.07713795453310013, + "learning_rate": 9.437397957032141e-06, + "loss": 0.0027, + "step": 73970 + }, + { + "epoch": 0.47448474022954595, + "grad_norm": 0.1753903180360794, + "learning_rate": 9.43713999159913e-06, + "loss": 0.0033, + "step": 73980 + }, + { + "epoch": 0.47454887712333205, + "grad_norm": 0.0889492928981781, + "learning_rate": 9.436881970565583e-06, + "loss": 0.0033, + "step": 73990 + }, + { + "epoch": 0.47461301401711814, + "grad_norm": 0.14199770987033844, + "learning_rate": 9.436623893934735e-06, + "loss": 0.0031, + "step": 74000 + }, + { + "epoch": 0.47467715091090423, + "grad_norm": 0.39867103099823, + "learning_rate": 9.43636576170982e-06, + "loss": 0.0052, + "step": 74010 + }, + { + "epoch": 0.4747412878046903, + "grad_norm": 0.11389205604791641, + "learning_rate": 9.43610757389407e-06, + "loss": 0.002, + "step": 74020 + }, + { + "epoch": 0.4748054246984764, + "grad_norm": 0.2963626980781555, + "learning_rate": 9.435849330490722e-06, + "loss": 0.0049, + "step": 74030 + }, + { + "epoch": 0.4748695615922625, + "grad_norm": 0.2825103998184204, + "learning_rate": 9.43559103150301e-06, + "loss": 0.0045, + "step": 74040 + }, + { + "epoch": 0.4749336984860486, + "grad_norm": 0.17604900896549225, + "learning_rate": 9.435332676934174e-06, + "loss": 0.003, + "step": 74050 + }, + { + "epoch": 0.4749978353798347, + "grad_norm": 0.16739438474178314, + "learning_rate": 9.435074266787451e-06, + "loss": 0.0032, + "step": 74060 + }, + { + "epoch": 0.4750619722736208, + "grad_norm": 0.27554914355278015, + "learning_rate": 9.434815801066076e-06, + "loss": 0.0031, + "step": 74070 + }, + { + "epoch": 0.47512610916740694, + "grad_norm": 0.09619660675525665, + "learning_rate": 9.43455727977329e-06, + "loss": 0.0045, + "step": 74080 + }, + { + "epoch": 0.47519024606119303, + "grad_norm": 0.29939550161361694, + "learning_rate": 9.434298702912333e-06, + "loss": 0.0042, + "step": 74090 + }, + { + "epoch": 0.4752543829549791, + "grad_norm": 0.09388145804405212, + "learning_rate": 9.43404007048644e-06, + "loss": 0.0039, + "step": 74100 + }, + { + "epoch": 0.4753185198487652, + "grad_norm": 0.16488589346408844, + "learning_rate": 9.433781382498862e-06, + "loss": 0.0045, + "step": 74110 + }, + { + "epoch": 0.4753826567425513, + "grad_norm": 0.0874987542629242, + "learning_rate": 9.43352263895283e-06, + "loss": 0.0036, + "step": 74120 + }, + { + "epoch": 0.4754467936363374, + "grad_norm": 0.35496601462364197, + "learning_rate": 9.433263839851592e-06, + "loss": 0.0032, + "step": 74130 + }, + { + "epoch": 0.4755109305301235, + "grad_norm": 0.25578826665878296, + "learning_rate": 9.43300498519839e-06, + "loss": 0.0023, + "step": 74140 + }, + { + "epoch": 0.4755750674239096, + "grad_norm": 0.13040082156658173, + "learning_rate": 9.432746074996466e-06, + "loss": 0.003, + "step": 74150 + }, + { + "epoch": 0.4756392043176957, + "grad_norm": 0.10859714448451996, + "learning_rate": 9.432487109249067e-06, + "loss": 0.0028, + "step": 74160 + }, + { + "epoch": 0.4757033412114818, + "grad_norm": 0.1338357925415039, + "learning_rate": 9.432228087959436e-06, + "loss": 0.0026, + "step": 74170 + }, + { + "epoch": 0.47576747810526787, + "grad_norm": 0.2157885879278183, + "learning_rate": 9.43196901113082e-06, + "loss": 0.0032, + "step": 74180 + }, + { + "epoch": 0.47583161499905396, + "grad_norm": 1.2301640510559082, + "learning_rate": 9.431709878766465e-06, + "loss": 0.0056, + "step": 74190 + }, + { + "epoch": 0.47589575189284006, + "grad_norm": 0.0766885057091713, + "learning_rate": 9.431450690869617e-06, + "loss": 0.0068, + "step": 74200 + }, + { + "epoch": 0.47595988878662615, + "grad_norm": 0.26054835319519043, + "learning_rate": 9.431191447443526e-06, + "loss": 0.0028, + "step": 74210 + }, + { + "epoch": 0.4760240256804123, + "grad_norm": 0.10278470069169998, + "learning_rate": 9.430932148491439e-06, + "loss": 0.0044, + "step": 74220 + }, + { + "epoch": 0.4760881625741984, + "grad_norm": 0.16061940789222717, + "learning_rate": 9.430672794016604e-06, + "loss": 0.0051, + "step": 74230 + }, + { + "epoch": 0.4761522994679845, + "grad_norm": 0.16396574676036835, + "learning_rate": 9.430413384022273e-06, + "loss": 0.0049, + "step": 74240 + }, + { + "epoch": 0.4762164363617706, + "grad_norm": 0.07798841595649719, + "learning_rate": 9.430153918511696e-06, + "loss": 0.0044, + "step": 74250 + }, + { + "epoch": 0.47628057325555667, + "grad_norm": 0.07246286422014236, + "learning_rate": 9.429894397488125e-06, + "loss": 0.0029, + "step": 74260 + }, + { + "epoch": 0.47634471014934276, + "grad_norm": 0.3123680055141449, + "learning_rate": 9.42963482095481e-06, + "loss": 0.0024, + "step": 74270 + }, + { + "epoch": 0.47640884704312886, + "grad_norm": 0.27817103266716003, + "learning_rate": 9.429375188915007e-06, + "loss": 0.0035, + "step": 74280 + }, + { + "epoch": 0.47647298393691495, + "grad_norm": 0.179836243391037, + "learning_rate": 9.429115501371963e-06, + "loss": 0.0029, + "step": 74290 + }, + { + "epoch": 0.47653712083070104, + "grad_norm": 0.04475254565477371, + "learning_rate": 9.42885575832894e-06, + "loss": 0.0033, + "step": 74300 + }, + { + "epoch": 0.47660125772448714, + "grad_norm": 0.1160324290394783, + "learning_rate": 9.428595959789188e-06, + "loss": 0.0029, + "step": 74310 + }, + { + "epoch": 0.47666539461827323, + "grad_norm": 0.11190337687730789, + "learning_rate": 9.428336105755964e-06, + "loss": 0.0053, + "step": 74320 + }, + { + "epoch": 0.4767295315120593, + "grad_norm": 0.16089123487472534, + "learning_rate": 9.428076196232522e-06, + "loss": 0.0029, + "step": 74330 + }, + { + "epoch": 0.4767936684058454, + "grad_norm": 0.25040191411972046, + "learning_rate": 9.427816231222124e-06, + "loss": 0.0042, + "step": 74340 + }, + { + "epoch": 0.4768578052996315, + "grad_norm": 0.07773885130882263, + "learning_rate": 9.427556210728022e-06, + "loss": 0.0028, + "step": 74350 + }, + { + "epoch": 0.4769219421934176, + "grad_norm": 0.13476145267486572, + "learning_rate": 9.427296134753477e-06, + "loss": 0.0021, + "step": 74360 + }, + { + "epoch": 0.47698607908720375, + "grad_norm": 0.008924542926251888, + "learning_rate": 9.427036003301746e-06, + "loss": 0.004, + "step": 74370 + }, + { + "epoch": 0.47705021598098984, + "grad_norm": 0.09451120346784592, + "learning_rate": 9.42677581637609e-06, + "loss": 0.0037, + "step": 74380 + }, + { + "epoch": 0.47711435287477594, + "grad_norm": 0.14821767807006836, + "learning_rate": 9.42651557397977e-06, + "loss": 0.003, + "step": 74390 + }, + { + "epoch": 0.47717848976856203, + "grad_norm": 0.1648290902376175, + "learning_rate": 9.426255276116046e-06, + "loss": 0.0042, + "step": 74400 + }, + { + "epoch": 0.4772426266623481, + "grad_norm": 0.16127754747867584, + "learning_rate": 9.42599492278818e-06, + "loss": 0.0045, + "step": 74410 + }, + { + "epoch": 0.4773067635561342, + "grad_norm": 0.03292185813188553, + "learning_rate": 9.425734513999435e-06, + "loss": 0.0033, + "step": 74420 + }, + { + "epoch": 0.4773709004499203, + "grad_norm": 0.13805095851421356, + "learning_rate": 9.425474049753074e-06, + "loss": 0.0029, + "step": 74430 + }, + { + "epoch": 0.4774350373437064, + "grad_norm": 0.06245690584182739, + "learning_rate": 9.425213530052359e-06, + "loss": 0.0032, + "step": 74440 + }, + { + "epoch": 0.4774991742374925, + "grad_norm": 0.08769892156124115, + "learning_rate": 9.424952954900556e-06, + "loss": 0.0025, + "step": 74450 + }, + { + "epoch": 0.4775633111312786, + "grad_norm": 0.08083859831094742, + "learning_rate": 9.42469232430093e-06, + "loss": 0.0042, + "step": 74460 + }, + { + "epoch": 0.4776274480250647, + "grad_norm": 0.16766510903835297, + "learning_rate": 9.424431638256746e-06, + "loss": 0.0025, + "step": 74470 + }, + { + "epoch": 0.4776915849188508, + "grad_norm": 0.27961698174476624, + "learning_rate": 9.424170896771274e-06, + "loss": 0.0027, + "step": 74480 + }, + { + "epoch": 0.47775572181263687, + "grad_norm": 0.06486645340919495, + "learning_rate": 9.423910099847777e-06, + "loss": 0.0031, + "step": 74490 + }, + { + "epoch": 0.47781985870642296, + "grad_norm": 0.20511706173419952, + "learning_rate": 9.423649247489525e-06, + "loss": 0.0045, + "step": 74500 + }, + { + "epoch": 0.4778839956002091, + "grad_norm": 0.21036206185817719, + "learning_rate": 9.423388339699786e-06, + "loss": 0.0029, + "step": 74510 + }, + { + "epoch": 0.4779481324939952, + "grad_norm": 0.41878727078437805, + "learning_rate": 9.42312737648183e-06, + "loss": 0.0033, + "step": 74520 + }, + { + "epoch": 0.4780122693877813, + "grad_norm": 0.17183734476566315, + "learning_rate": 9.422866357838928e-06, + "loss": 0.0027, + "step": 74530 + }, + { + "epoch": 0.4780764062815674, + "grad_norm": 0.0930318757891655, + "learning_rate": 9.422605283774349e-06, + "loss": 0.0028, + "step": 74540 + }, + { + "epoch": 0.4781405431753535, + "grad_norm": 0.23828619718551636, + "learning_rate": 9.422344154291364e-06, + "loss": 0.0047, + "step": 74550 + }, + { + "epoch": 0.4782046800691396, + "grad_norm": 0.1259741336107254, + "learning_rate": 9.422082969393246e-06, + "loss": 0.0027, + "step": 74560 + }, + { + "epoch": 0.47826881696292567, + "grad_norm": 0.07219063490629196, + "learning_rate": 9.42182172908327e-06, + "loss": 0.0028, + "step": 74570 + }, + { + "epoch": 0.47833295385671176, + "grad_norm": 0.33875951170921326, + "learning_rate": 9.421560433364706e-06, + "loss": 0.0039, + "step": 74580 + }, + { + "epoch": 0.47839709075049786, + "grad_norm": 0.03250084072351456, + "learning_rate": 9.42129908224083e-06, + "loss": 0.0018, + "step": 74590 + }, + { + "epoch": 0.47846122764428395, + "grad_norm": 0.10010303556919098, + "learning_rate": 9.421037675714917e-06, + "loss": 0.003, + "step": 74600 + }, + { + "epoch": 0.47852536453807004, + "grad_norm": 0.2187255173921585, + "learning_rate": 9.42077621379024e-06, + "loss": 0.0033, + "step": 74610 + }, + { + "epoch": 0.47858950143185613, + "grad_norm": 0.007869267836213112, + "learning_rate": 9.42051469647008e-06, + "loss": 0.0048, + "step": 74620 + }, + { + "epoch": 0.47865363832564223, + "grad_norm": 0.25898319482803345, + "learning_rate": 9.420253123757712e-06, + "loss": 0.0024, + "step": 74630 + }, + { + "epoch": 0.4787177752194283, + "grad_norm": 0.1980488896369934, + "learning_rate": 9.419991495656411e-06, + "loss": 0.0048, + "step": 74640 + }, + { + "epoch": 0.47878191211321447, + "grad_norm": 0.15262740850448608, + "learning_rate": 9.41972981216946e-06, + "loss": 0.0039, + "step": 74650 + }, + { + "epoch": 0.47884604900700056, + "grad_norm": 0.1269340068101883, + "learning_rate": 9.419468073300135e-06, + "loss": 0.0037, + "step": 74660 + }, + { + "epoch": 0.47891018590078666, + "grad_norm": 0.0766548290848732, + "learning_rate": 9.419206279051716e-06, + "loss": 0.0038, + "step": 74670 + }, + { + "epoch": 0.47897432279457275, + "grad_norm": 0.11249018460512161, + "learning_rate": 9.418944429427484e-06, + "loss": 0.0031, + "step": 74680 + }, + { + "epoch": 0.47903845968835884, + "grad_norm": 0.05062698945403099, + "learning_rate": 9.41868252443072e-06, + "loss": 0.0034, + "step": 74690 + }, + { + "epoch": 0.47910259658214494, + "grad_norm": 0.15498970448970795, + "learning_rate": 9.418420564064706e-06, + "loss": 0.0026, + "step": 74700 + }, + { + "epoch": 0.47916673347593103, + "grad_norm": 0.5996679067611694, + "learning_rate": 9.418158548332725e-06, + "loss": 0.0061, + "step": 74710 + }, + { + "epoch": 0.4792308703697171, + "grad_norm": 0.2909507155418396, + "learning_rate": 9.41789647723806e-06, + "loss": 0.0026, + "step": 74720 + }, + { + "epoch": 0.4792950072635032, + "grad_norm": 0.05826156586408615, + "learning_rate": 9.417634350783993e-06, + "loss": 0.0029, + "step": 74730 + }, + { + "epoch": 0.4793591441572893, + "grad_norm": 0.20538924634456635, + "learning_rate": 9.417372168973812e-06, + "loss": 0.0032, + "step": 74740 + }, + { + "epoch": 0.4794232810510754, + "grad_norm": 0.3443412482738495, + "learning_rate": 9.417109931810799e-06, + "loss": 0.0035, + "step": 74750 + }, + { + "epoch": 0.4794874179448615, + "grad_norm": 0.08114752173423767, + "learning_rate": 9.416847639298244e-06, + "loss": 0.007, + "step": 74760 + }, + { + "epoch": 0.4795515548386476, + "grad_norm": 0.031641677021980286, + "learning_rate": 9.41658529143943e-06, + "loss": 0.0033, + "step": 74770 + }, + { + "epoch": 0.4796156917324337, + "grad_norm": 0.19229751825332642, + "learning_rate": 9.416322888237646e-06, + "loss": 0.0041, + "step": 74780 + }, + { + "epoch": 0.47967982862621983, + "grad_norm": 0.18572895228862762, + "learning_rate": 9.41606042969618e-06, + "loss": 0.005, + "step": 74790 + }, + { + "epoch": 0.4797439655200059, + "grad_norm": 0.07986016571521759, + "learning_rate": 9.415797915818322e-06, + "loss": 0.0077, + "step": 74800 + }, + { + "epoch": 0.479808102413792, + "grad_norm": 0.07696294039487839, + "learning_rate": 9.415535346607358e-06, + "loss": 0.006, + "step": 74810 + }, + { + "epoch": 0.4798722393075781, + "grad_norm": 0.15090428292751312, + "learning_rate": 9.415272722066581e-06, + "loss": 0.0034, + "step": 74820 + }, + { + "epoch": 0.4799363762013642, + "grad_norm": 0.12085235863924026, + "learning_rate": 9.415010042199283e-06, + "loss": 0.0036, + "step": 74830 + }, + { + "epoch": 0.4800005130951503, + "grad_norm": 0.5605502724647522, + "learning_rate": 9.414747307008752e-06, + "loss": 0.0059, + "step": 74840 + }, + { + "epoch": 0.4800646499889364, + "grad_norm": 0.16201697289943695, + "learning_rate": 9.414484516498281e-06, + "loss": 0.0053, + "step": 74850 + }, + { + "epoch": 0.4801287868827225, + "grad_norm": 0.14369170367717743, + "learning_rate": 9.414221670671167e-06, + "loss": 0.0021, + "step": 74860 + }, + { + "epoch": 0.4801929237765086, + "grad_norm": 0.16418346762657166, + "learning_rate": 9.413958769530698e-06, + "loss": 0.0034, + "step": 74870 + }, + { + "epoch": 0.48025706067029467, + "grad_norm": 0.46877381205558777, + "learning_rate": 9.413695813080173e-06, + "loss": 0.0034, + "step": 74880 + }, + { + "epoch": 0.48032119756408076, + "grad_norm": 0.16352395713329315, + "learning_rate": 9.413432801322883e-06, + "loss": 0.0035, + "step": 74890 + }, + { + "epoch": 0.48038533445786685, + "grad_norm": 0.25804394483566284, + "learning_rate": 9.413169734262128e-06, + "loss": 0.0029, + "step": 74900 + }, + { + "epoch": 0.48044947135165295, + "grad_norm": 0.3428088426589966, + "learning_rate": 9.4129066119012e-06, + "loss": 0.0034, + "step": 74910 + }, + { + "epoch": 0.48051360824543904, + "grad_norm": 0.2453552633523941, + "learning_rate": 9.4126434342434e-06, + "loss": 0.0027, + "step": 74920 + }, + { + "epoch": 0.4805777451392252, + "grad_norm": 0.5071871876716614, + "learning_rate": 9.412380201292023e-06, + "loss": 0.0028, + "step": 74930 + }, + { + "epoch": 0.4806418820330113, + "grad_norm": 0.40177619457244873, + "learning_rate": 9.412116913050371e-06, + "loss": 0.0055, + "step": 74940 + }, + { + "epoch": 0.4807060189267974, + "grad_norm": 0.3960021734237671, + "learning_rate": 9.411853569521738e-06, + "loss": 0.0039, + "step": 74950 + }, + { + "epoch": 0.48077015582058347, + "grad_norm": 0.22092705965042114, + "learning_rate": 9.411590170709429e-06, + "loss": 0.0065, + "step": 74960 + }, + { + "epoch": 0.48083429271436956, + "grad_norm": 0.0896230936050415, + "learning_rate": 9.41132671661674e-06, + "loss": 0.0038, + "step": 74970 + }, + { + "epoch": 0.48089842960815565, + "grad_norm": 0.21776001155376434, + "learning_rate": 9.411063207246976e-06, + "loss": 0.0044, + "step": 74980 + }, + { + "epoch": 0.48096256650194175, + "grad_norm": 0.06775982677936554, + "learning_rate": 9.410799642603435e-06, + "loss": 0.0034, + "step": 74990 + }, + { + "epoch": 0.48102670339572784, + "grad_norm": 0.13864344358444214, + "learning_rate": 9.410536022689425e-06, + "loss": 0.0019, + "step": 75000 + }, + { + "epoch": 0.48109084028951393, + "grad_norm": 0.1059882789850235, + "learning_rate": 9.410272347508245e-06, + "loss": 0.0035, + "step": 75010 + }, + { + "epoch": 0.4811549771833, + "grad_norm": 0.11833040416240692, + "learning_rate": 9.4100086170632e-06, + "loss": 0.0044, + "step": 75020 + }, + { + "epoch": 0.4812191140770861, + "grad_norm": 0.13698428869247437, + "learning_rate": 9.409744831357597e-06, + "loss": 0.0041, + "step": 75030 + }, + { + "epoch": 0.4812832509708722, + "grad_norm": 0.05028630048036575, + "learning_rate": 9.40948099039474e-06, + "loss": 0.0035, + "step": 75040 + }, + { + "epoch": 0.4813473878646583, + "grad_norm": 0.10551158338785172, + "learning_rate": 9.409217094177932e-06, + "loss": 0.0038, + "step": 75050 + }, + { + "epoch": 0.4814115247584444, + "grad_norm": 0.08216790854930878, + "learning_rate": 9.408953142710483e-06, + "loss": 0.0029, + "step": 75060 + }, + { + "epoch": 0.48147566165223055, + "grad_norm": 0.2770686447620392, + "learning_rate": 9.408689135995704e-06, + "loss": 0.0057, + "step": 75070 + }, + { + "epoch": 0.48153979854601664, + "grad_norm": 0.07415860891342163, + "learning_rate": 9.408425074036895e-06, + "loss": 0.0029, + "step": 75080 + }, + { + "epoch": 0.48160393543980273, + "grad_norm": 0.24299880862236023, + "learning_rate": 9.408160956837372e-06, + "loss": 0.0026, + "step": 75090 + }, + { + "epoch": 0.4816680723335888, + "grad_norm": 0.17356914281845093, + "learning_rate": 9.40789678440044e-06, + "loss": 0.0026, + "step": 75100 + }, + { + "epoch": 0.4817322092273749, + "grad_norm": 0.23093180358409882, + "learning_rate": 9.407632556729412e-06, + "loss": 0.0066, + "step": 75110 + }, + { + "epoch": 0.481796346121161, + "grad_norm": 0.07763124257326126, + "learning_rate": 9.407368273827598e-06, + "loss": 0.0042, + "step": 75120 + }, + { + "epoch": 0.4818604830149471, + "grad_norm": 0.12229952216148376, + "learning_rate": 9.407103935698308e-06, + "loss": 0.0051, + "step": 75130 + }, + { + "epoch": 0.4819246199087332, + "grad_norm": 0.033002957701683044, + "learning_rate": 9.406839542344857e-06, + "loss": 0.0037, + "step": 75140 + }, + { + "epoch": 0.4819887568025193, + "grad_norm": 0.0705639198422432, + "learning_rate": 9.406575093770558e-06, + "loss": 0.0025, + "step": 75150 + }, + { + "epoch": 0.4820528936963054, + "grad_norm": 0.06316683441400528, + "learning_rate": 9.406310589978721e-06, + "loss": 0.0027, + "step": 75160 + }, + { + "epoch": 0.4821170305900915, + "grad_norm": 0.1080327183008194, + "learning_rate": 9.406046030972666e-06, + "loss": 0.0038, + "step": 75170 + }, + { + "epoch": 0.48218116748387757, + "grad_norm": 0.18605861067771912, + "learning_rate": 9.405781416755704e-06, + "loss": 0.0031, + "step": 75180 + }, + { + "epoch": 0.48224530437766366, + "grad_norm": 0.14519374072551727, + "learning_rate": 9.405516747331152e-06, + "loss": 0.0022, + "step": 75190 + }, + { + "epoch": 0.48230944127144976, + "grad_norm": 0.22531086206436157, + "learning_rate": 9.405252022702328e-06, + "loss": 0.0036, + "step": 75200 + }, + { + "epoch": 0.4823735781652359, + "grad_norm": 0.13491190969944, + "learning_rate": 9.404987242872547e-06, + "loss": 0.0045, + "step": 75210 + }, + { + "epoch": 0.482437715059022, + "grad_norm": 0.2310151308774948, + "learning_rate": 9.404722407845128e-06, + "loss": 0.0052, + "step": 75220 + }, + { + "epoch": 0.4825018519528081, + "grad_norm": 0.0630815178155899, + "learning_rate": 9.404457517623388e-06, + "loss": 0.0038, + "step": 75230 + }, + { + "epoch": 0.4825659888465942, + "grad_norm": 0.033768199384212494, + "learning_rate": 9.404192572210648e-06, + "loss": 0.0041, + "step": 75240 + }, + { + "epoch": 0.4826301257403803, + "grad_norm": 0.07099565863609314, + "learning_rate": 9.403927571610227e-06, + "loss": 0.0037, + "step": 75250 + }, + { + "epoch": 0.48269426263416637, + "grad_norm": 0.17977134883403778, + "learning_rate": 9.403662515825447e-06, + "loss": 0.003, + "step": 75260 + }, + { + "epoch": 0.48275839952795246, + "grad_norm": 0.05882582440972328, + "learning_rate": 9.403397404859628e-06, + "loss": 0.0053, + "step": 75270 + }, + { + "epoch": 0.48282253642173856, + "grad_norm": 0.29024794697761536, + "learning_rate": 9.403132238716093e-06, + "loss": 0.0035, + "step": 75280 + }, + { + "epoch": 0.48288667331552465, + "grad_norm": 0.23191240429878235, + "learning_rate": 9.402867017398163e-06, + "loss": 0.0035, + "step": 75290 + }, + { + "epoch": 0.48295081020931074, + "grad_norm": 0.24628372490406036, + "learning_rate": 9.402601740909163e-06, + "loss": 0.0036, + "step": 75300 + }, + { + "epoch": 0.48301494710309684, + "grad_norm": 0.3608076572418213, + "learning_rate": 9.402336409252417e-06, + "loss": 0.0066, + "step": 75310 + }, + { + "epoch": 0.48307908399688293, + "grad_norm": 0.0643908828496933, + "learning_rate": 9.40207102243125e-06, + "loss": 0.0038, + "step": 75320 + }, + { + "epoch": 0.483143220890669, + "grad_norm": 0.21337231993675232, + "learning_rate": 9.401805580448986e-06, + "loss": 0.0031, + "step": 75330 + }, + { + "epoch": 0.4832073577844551, + "grad_norm": 0.15173020958900452, + "learning_rate": 9.401540083308954e-06, + "loss": 0.0024, + "step": 75340 + }, + { + "epoch": 0.48327149467824126, + "grad_norm": 0.10597799718379974, + "learning_rate": 9.401274531014477e-06, + "loss": 0.0035, + "step": 75350 + }, + { + "epoch": 0.48333563157202736, + "grad_norm": 0.09346621483564377, + "learning_rate": 9.401008923568883e-06, + "loss": 0.0026, + "step": 75360 + }, + { + "epoch": 0.48339976846581345, + "grad_norm": 0.1949937343597412, + "learning_rate": 9.400743260975505e-06, + "loss": 0.0031, + "step": 75370 + }, + { + "epoch": 0.48346390535959954, + "grad_norm": 0.2209199219942093, + "learning_rate": 9.400477543237669e-06, + "loss": 0.0038, + "step": 75380 + }, + { + "epoch": 0.48352804225338564, + "grad_norm": 0.0370480976998806, + "learning_rate": 9.400211770358702e-06, + "loss": 0.0069, + "step": 75390 + }, + { + "epoch": 0.48359217914717173, + "grad_norm": 0.1484469175338745, + "learning_rate": 9.399945942341939e-06, + "loss": 0.0033, + "step": 75400 + }, + { + "epoch": 0.4836563160409578, + "grad_norm": 0.20110897719860077, + "learning_rate": 9.399680059190708e-06, + "loss": 0.0028, + "step": 75410 + }, + { + "epoch": 0.4837204529347439, + "grad_norm": 0.2016342133283615, + "learning_rate": 9.39941412090834e-06, + "loss": 0.0046, + "step": 75420 + }, + { + "epoch": 0.48378458982853, + "grad_norm": 0.12587103247642517, + "learning_rate": 9.39914812749817e-06, + "loss": 0.0118, + "step": 75430 + }, + { + "epoch": 0.4838487267223161, + "grad_norm": 0.04521534591913223, + "learning_rate": 9.39888207896353e-06, + "loss": 0.0041, + "step": 75440 + }, + { + "epoch": 0.4839128636161022, + "grad_norm": 0.128385528922081, + "learning_rate": 9.398615975307755e-06, + "loss": 0.0041, + "step": 75450 + }, + { + "epoch": 0.4839770005098883, + "grad_norm": 0.1759091317653656, + "learning_rate": 9.398349816534176e-06, + "loss": 0.0028, + "step": 75460 + }, + { + "epoch": 0.4840411374036744, + "grad_norm": 0.16885876655578613, + "learning_rate": 9.398083602646131e-06, + "loss": 0.009, + "step": 75470 + }, + { + "epoch": 0.4841052742974605, + "grad_norm": 0.20741593837738037, + "learning_rate": 9.397817333646955e-06, + "loss": 0.0045, + "step": 75480 + }, + { + "epoch": 0.4841694111912466, + "grad_norm": 0.18445414304733276, + "learning_rate": 9.397551009539985e-06, + "loss": 0.0025, + "step": 75490 + }, + { + "epoch": 0.4842335480850327, + "grad_norm": 0.23914507031440735, + "learning_rate": 9.397284630328558e-06, + "loss": 0.0029, + "step": 75500 + }, + { + "epoch": 0.4842976849788188, + "grad_norm": 0.11996269971132278, + "learning_rate": 9.397018196016012e-06, + "loss": 0.0025, + "step": 75510 + }, + { + "epoch": 0.4843618218726049, + "grad_norm": 0.169418066740036, + "learning_rate": 9.396751706605686e-06, + "loss": 0.0041, + "step": 75520 + }, + { + "epoch": 0.484425958766391, + "grad_norm": 0.08760794252157211, + "learning_rate": 9.39648516210092e-06, + "loss": 0.0024, + "step": 75530 + }, + { + "epoch": 0.4844900956601771, + "grad_norm": 0.12806841731071472, + "learning_rate": 9.39621856250505e-06, + "loss": 0.003, + "step": 75540 + }, + { + "epoch": 0.4845542325539632, + "grad_norm": 0.2134033590555191, + "learning_rate": 9.39595190782142e-06, + "loss": 0.0025, + "step": 75550 + }, + { + "epoch": 0.4846183694477493, + "grad_norm": 0.152133971452713, + "learning_rate": 9.395685198053372e-06, + "loss": 0.0039, + "step": 75560 + }, + { + "epoch": 0.48468250634153537, + "grad_norm": 0.07009343057870865, + "learning_rate": 9.395418433204246e-06, + "loss": 0.0055, + "step": 75570 + }, + { + "epoch": 0.48474664323532146, + "grad_norm": 0.19351892173290253, + "learning_rate": 9.395151613277385e-06, + "loss": 0.0027, + "step": 75580 + }, + { + "epoch": 0.48481078012910755, + "grad_norm": 0.09136834740638733, + "learning_rate": 9.394884738276136e-06, + "loss": 0.0026, + "step": 75590 + }, + { + "epoch": 0.48487491702289365, + "grad_norm": 0.15920791029930115, + "learning_rate": 9.394617808203837e-06, + "loss": 0.0025, + "step": 75600 + }, + { + "epoch": 0.48493905391667974, + "grad_norm": 0.12583747506141663, + "learning_rate": 9.394350823063839e-06, + "loss": 0.0051, + "step": 75610 + }, + { + "epoch": 0.48500319081046583, + "grad_norm": 0.13267534971237183, + "learning_rate": 9.394083782859482e-06, + "loss": 0.0042, + "step": 75620 + }, + { + "epoch": 0.485067327704252, + "grad_norm": 0.4726957380771637, + "learning_rate": 9.393816687594115e-06, + "loss": 0.0044, + "step": 75630 + }, + { + "epoch": 0.4851314645980381, + "grad_norm": 0.16043677926063538, + "learning_rate": 9.393549537271086e-06, + "loss": 0.0045, + "step": 75640 + }, + { + "epoch": 0.48519560149182417, + "grad_norm": 0.06730765104293823, + "learning_rate": 9.39328233189374e-06, + "loss": 0.0031, + "step": 75650 + }, + { + "epoch": 0.48525973838561026, + "grad_norm": 0.23805025219917297, + "learning_rate": 9.393015071465427e-06, + "loss": 0.0044, + "step": 75660 + }, + { + "epoch": 0.48532387527939636, + "grad_norm": 0.12560462951660156, + "learning_rate": 9.392747755989497e-06, + "loss": 0.0041, + "step": 75670 + }, + { + "epoch": 0.48538801217318245, + "grad_norm": 0.2656175196170807, + "learning_rate": 9.392480385469298e-06, + "loss": 0.0028, + "step": 75680 + }, + { + "epoch": 0.48545214906696854, + "grad_norm": 0.09987766295671463, + "learning_rate": 9.392212959908179e-06, + "loss": 0.0032, + "step": 75690 + }, + { + "epoch": 0.48551628596075463, + "grad_norm": 0.07306644320487976, + "learning_rate": 9.391945479309494e-06, + "loss": 0.0076, + "step": 75700 + }, + { + "epoch": 0.4855804228545407, + "grad_norm": 0.05576948821544647, + "learning_rate": 9.391677943676592e-06, + "loss": 0.0028, + "step": 75710 + }, + { + "epoch": 0.4856445597483268, + "grad_norm": 0.22854074835777283, + "learning_rate": 9.391410353012826e-06, + "loss": 0.0032, + "step": 75720 + }, + { + "epoch": 0.4857086966421129, + "grad_norm": 0.24014638364315033, + "learning_rate": 9.391142707321553e-06, + "loss": 0.0035, + "step": 75730 + }, + { + "epoch": 0.485772833535899, + "grad_norm": 0.25563791394233704, + "learning_rate": 9.39087500660612e-06, + "loss": 0.0033, + "step": 75740 + }, + { + "epoch": 0.4858369704296851, + "grad_norm": 0.2181861698627472, + "learning_rate": 9.390607250869885e-06, + "loss": 0.0022, + "step": 75750 + }, + { + "epoch": 0.4859011073234712, + "grad_norm": 0.7732840776443481, + "learning_rate": 9.390339440116206e-06, + "loss": 0.0144, + "step": 75760 + }, + { + "epoch": 0.48596524421725734, + "grad_norm": 0.04870379716157913, + "learning_rate": 9.390071574348434e-06, + "loss": 0.004, + "step": 75770 + }, + { + "epoch": 0.48602938111104343, + "grad_norm": 0.06384976208209991, + "learning_rate": 9.389803653569929e-06, + "loss": 0.0036, + "step": 75780 + }, + { + "epoch": 0.48609351800482953, + "grad_norm": 0.1405668556690216, + "learning_rate": 9.389535677784046e-06, + "loss": 0.0032, + "step": 75790 + }, + { + "epoch": 0.4861576548986156, + "grad_norm": 0.29477834701538086, + "learning_rate": 9.389267646994145e-06, + "loss": 0.0027, + "step": 75800 + }, + { + "epoch": 0.4862217917924017, + "grad_norm": 0.25403454899787903, + "learning_rate": 9.388999561203582e-06, + "loss": 0.0068, + "step": 75810 + }, + { + "epoch": 0.4862859286861878, + "grad_norm": 0.08830229938030243, + "learning_rate": 9.388731420415718e-06, + "loss": 0.005, + "step": 75820 + }, + { + "epoch": 0.4863500655799739, + "grad_norm": 0.1264958530664444, + "learning_rate": 9.388463224633912e-06, + "loss": 0.003, + "step": 75830 + }, + { + "epoch": 0.48641420247376, + "grad_norm": 0.49424171447753906, + "learning_rate": 9.388194973861528e-06, + "loss": 0.0041, + "step": 75840 + }, + { + "epoch": 0.4864783393675461, + "grad_norm": 0.04571045562624931, + "learning_rate": 9.387926668101923e-06, + "loss": 0.0035, + "step": 75850 + }, + { + "epoch": 0.4865424762613322, + "grad_norm": 0.07948622107505798, + "learning_rate": 9.38765830735846e-06, + "loss": 0.0023, + "step": 75860 + }, + { + "epoch": 0.4866066131551183, + "grad_norm": 0.07302649319171906, + "learning_rate": 9.387389891634505e-06, + "loss": 0.0041, + "step": 75870 + }, + { + "epoch": 0.48667075004890437, + "grad_norm": 0.15792128443717957, + "learning_rate": 9.387121420933417e-06, + "loss": 0.0046, + "step": 75880 + }, + { + "epoch": 0.48673488694269046, + "grad_norm": 0.15861865878105164, + "learning_rate": 9.386852895258562e-06, + "loss": 0.0055, + "step": 75890 + }, + { + "epoch": 0.48679902383647655, + "grad_norm": 0.1404324769973755, + "learning_rate": 9.386584314613307e-06, + "loss": 0.0028, + "step": 75900 + }, + { + "epoch": 0.48686316073026265, + "grad_norm": 0.06636282056570053, + "learning_rate": 9.386315679001014e-06, + "loss": 0.0025, + "step": 75910 + }, + { + "epoch": 0.4869272976240488, + "grad_norm": 0.26765817403793335, + "learning_rate": 9.38604698842505e-06, + "loss": 0.0037, + "step": 75920 + }, + { + "epoch": 0.4869914345178349, + "grad_norm": 0.17180879414081573, + "learning_rate": 9.385778242888784e-06, + "loss": 0.0032, + "step": 75930 + }, + { + "epoch": 0.487055571411621, + "grad_norm": 0.06586155295372009, + "learning_rate": 9.385509442395583e-06, + "loss": 0.0032, + "step": 75940 + }, + { + "epoch": 0.4871197083054071, + "grad_norm": 0.14726197719573975, + "learning_rate": 9.385240586948812e-06, + "loss": 0.0043, + "step": 75950 + }, + { + "epoch": 0.48718384519919317, + "grad_norm": 0.2654615342617035, + "learning_rate": 9.384971676551844e-06, + "loss": 0.0043, + "step": 75960 + }, + { + "epoch": 0.48724798209297926, + "grad_norm": 0.2780192494392395, + "learning_rate": 9.384702711208047e-06, + "loss": 0.0037, + "step": 75970 + }, + { + "epoch": 0.48731211898676535, + "grad_norm": 0.1765231043100357, + "learning_rate": 9.38443369092079e-06, + "loss": 0.0028, + "step": 75980 + }, + { + "epoch": 0.48737625588055145, + "grad_norm": 0.17971616983413696, + "learning_rate": 9.384164615693445e-06, + "loss": 0.0023, + "step": 75990 + }, + { + "epoch": 0.48744039277433754, + "grad_norm": 0.17881891131401062, + "learning_rate": 9.383895485529387e-06, + "loss": 0.0025, + "step": 76000 + }, + { + "epoch": 0.48750452966812363, + "grad_norm": 0.03880459442734718, + "learning_rate": 9.383626300431982e-06, + "loss": 0.0084, + "step": 76010 + }, + { + "epoch": 0.4875686665619097, + "grad_norm": 0.20588567852973938, + "learning_rate": 9.38335706040461e-06, + "loss": 0.003, + "step": 76020 + }, + { + "epoch": 0.4876328034556958, + "grad_norm": 0.14494794607162476, + "learning_rate": 9.383087765450638e-06, + "loss": 0.0037, + "step": 76030 + }, + { + "epoch": 0.4876969403494819, + "grad_norm": 0.2012653797864914, + "learning_rate": 9.382818415573446e-06, + "loss": 0.0059, + "step": 76040 + }, + { + "epoch": 0.487761077243268, + "grad_norm": 0.06287068873643875, + "learning_rate": 9.382549010776407e-06, + "loss": 0.0046, + "step": 76050 + }, + { + "epoch": 0.48782521413705415, + "grad_norm": 0.13338372111320496, + "learning_rate": 9.382279551062894e-06, + "loss": 0.0049, + "step": 76060 + }, + { + "epoch": 0.48788935103084025, + "grad_norm": 0.4038761258125305, + "learning_rate": 9.38201003643629e-06, + "loss": 0.0032, + "step": 76070 + }, + { + "epoch": 0.48795348792462634, + "grad_norm": 0.16861388087272644, + "learning_rate": 9.381740466899966e-06, + "loss": 0.0048, + "step": 76080 + }, + { + "epoch": 0.48801762481841243, + "grad_norm": 0.11070922762155533, + "learning_rate": 9.381470842457304e-06, + "loss": 0.0038, + "step": 76090 + }, + { + "epoch": 0.4880817617121985, + "grad_norm": 0.11704026162624359, + "learning_rate": 9.381201163111678e-06, + "loss": 0.0063, + "step": 76100 + }, + { + "epoch": 0.4881458986059846, + "grad_norm": 0.23959235846996307, + "learning_rate": 9.380931428866473e-06, + "loss": 0.0039, + "step": 76110 + }, + { + "epoch": 0.4882100354997707, + "grad_norm": 0.1530313491821289, + "learning_rate": 9.380661639725065e-06, + "loss": 0.0038, + "step": 76120 + }, + { + "epoch": 0.4882741723935568, + "grad_norm": 0.24976380169391632, + "learning_rate": 9.380391795690836e-06, + "loss": 0.0047, + "step": 76130 + }, + { + "epoch": 0.4883383092873429, + "grad_norm": 0.28350764513015747, + "learning_rate": 9.380121896767167e-06, + "loss": 0.0037, + "step": 76140 + }, + { + "epoch": 0.488402446181129, + "grad_norm": 0.06945674866437912, + "learning_rate": 9.37985194295744e-06, + "loss": 0.002, + "step": 76150 + }, + { + "epoch": 0.4884665830749151, + "grad_norm": 0.4729636311531067, + "learning_rate": 9.379581934265039e-06, + "loss": 0.0039, + "step": 76160 + }, + { + "epoch": 0.4885307199687012, + "grad_norm": 0.04570367559790611, + "learning_rate": 9.379311870693346e-06, + "loss": 0.0027, + "step": 76170 + }, + { + "epoch": 0.48859485686248727, + "grad_norm": 0.112314872443676, + "learning_rate": 9.379041752245746e-06, + "loss": 0.0027, + "step": 76180 + }, + { + "epoch": 0.48865899375627336, + "grad_norm": 0.11408799141645432, + "learning_rate": 9.378771578925621e-06, + "loss": 0.0032, + "step": 76190 + }, + { + "epoch": 0.4887231306500595, + "grad_norm": 0.2096250206232071, + "learning_rate": 9.37850135073636e-06, + "loss": 0.0045, + "step": 76200 + }, + { + "epoch": 0.4887872675438456, + "grad_norm": 0.06932587921619415, + "learning_rate": 9.378231067681349e-06, + "loss": 0.0036, + "step": 76210 + }, + { + "epoch": 0.4888514044376317, + "grad_norm": 0.04841315373778343, + "learning_rate": 9.377960729763971e-06, + "loss": 0.0059, + "step": 76220 + }, + { + "epoch": 0.4889155413314178, + "grad_norm": 0.14756424725055695, + "learning_rate": 9.377690336987619e-06, + "loss": 0.0038, + "step": 76230 + }, + { + "epoch": 0.4889796782252039, + "grad_norm": 0.1798335611820221, + "learning_rate": 9.377419889355677e-06, + "loss": 0.0045, + "step": 76240 + }, + { + "epoch": 0.48904381511899, + "grad_norm": 0.10555068403482437, + "learning_rate": 9.377149386871536e-06, + "loss": 0.0027, + "step": 76250 + }, + { + "epoch": 0.48910795201277607, + "grad_norm": 0.0862729549407959, + "learning_rate": 9.376878829538583e-06, + "loss": 0.0022, + "step": 76260 + }, + { + "epoch": 0.48917208890656216, + "grad_norm": 0.1376761496067047, + "learning_rate": 9.376608217360212e-06, + "loss": 0.0039, + "step": 76270 + }, + { + "epoch": 0.48923622580034826, + "grad_norm": 0.2567867934703827, + "learning_rate": 9.376337550339811e-06, + "loss": 0.0056, + "step": 76280 + }, + { + "epoch": 0.48930036269413435, + "grad_norm": 0.05342590808868408, + "learning_rate": 9.376066828480774e-06, + "loss": 0.0018, + "step": 76290 + }, + { + "epoch": 0.48936449958792044, + "grad_norm": 0.22637926042079926, + "learning_rate": 9.375796051786492e-06, + "loss": 0.0027, + "step": 76300 + }, + { + "epoch": 0.48942863648170654, + "grad_norm": 0.24293087422847748, + "learning_rate": 9.375525220260356e-06, + "loss": 0.004, + "step": 76310 + }, + { + "epoch": 0.48949277337549263, + "grad_norm": 0.2584352195262909, + "learning_rate": 9.375254333905764e-06, + "loss": 0.0047, + "step": 76320 + }, + { + "epoch": 0.4895569102692787, + "grad_norm": 0.045442432165145874, + "learning_rate": 9.374983392726107e-06, + "loss": 0.0052, + "step": 76330 + }, + { + "epoch": 0.48962104716306487, + "grad_norm": 0.1458451747894287, + "learning_rate": 9.374712396724782e-06, + "loss": 0.0028, + "step": 76340 + }, + { + "epoch": 0.48968518405685096, + "grad_norm": 0.07689321041107178, + "learning_rate": 9.374441345905184e-06, + "loss": 0.0032, + "step": 76350 + }, + { + "epoch": 0.48974932095063706, + "grad_norm": 0.4120532274246216, + "learning_rate": 9.37417024027071e-06, + "loss": 0.007, + "step": 76360 + }, + { + "epoch": 0.48981345784442315, + "grad_norm": 0.03383118659257889, + "learning_rate": 9.373899079824757e-06, + "loss": 0.0024, + "step": 76370 + }, + { + "epoch": 0.48987759473820924, + "grad_norm": 0.2416459619998932, + "learning_rate": 9.373627864570722e-06, + "loss": 0.0041, + "step": 76380 + }, + { + "epoch": 0.48994173163199534, + "grad_norm": 0.12423533946275711, + "learning_rate": 9.373356594512004e-06, + "loss": 0.0024, + "step": 76390 + }, + { + "epoch": 0.49000586852578143, + "grad_norm": 0.14251717925071716, + "learning_rate": 9.373085269652003e-06, + "loss": 0.0042, + "step": 76400 + }, + { + "epoch": 0.4900700054195675, + "grad_norm": 0.23361019790172577, + "learning_rate": 9.372813889994116e-06, + "loss": 0.003, + "step": 76410 + }, + { + "epoch": 0.4901341423133536, + "grad_norm": 0.15831750631332397, + "learning_rate": 9.372542455541748e-06, + "loss": 0.0039, + "step": 76420 + }, + { + "epoch": 0.4901982792071397, + "grad_norm": 0.10762511938810349, + "learning_rate": 9.372270966298296e-06, + "loss": 0.0037, + "step": 76430 + }, + { + "epoch": 0.4902624161009258, + "grad_norm": 0.0905207172036171, + "learning_rate": 9.371999422267166e-06, + "loss": 0.0054, + "step": 76440 + }, + { + "epoch": 0.4903265529947119, + "grad_norm": 0.025468701496720314, + "learning_rate": 9.371727823451758e-06, + "loss": 0.0036, + "step": 76450 + }, + { + "epoch": 0.490390689888498, + "grad_norm": 0.08132292330265045, + "learning_rate": 9.371456169855476e-06, + "loss": 0.0037, + "step": 76460 + }, + { + "epoch": 0.4904548267822841, + "grad_norm": 0.2487182915210724, + "learning_rate": 9.371184461481724e-06, + "loss": 0.0033, + "step": 76470 + }, + { + "epoch": 0.49051896367607023, + "grad_norm": 0.14117854833602905, + "learning_rate": 9.370912698333906e-06, + "loss": 0.0039, + "step": 76480 + }, + { + "epoch": 0.4905831005698563, + "grad_norm": 0.2164766639471054, + "learning_rate": 9.370640880415428e-06, + "loss": 0.0042, + "step": 76490 + }, + { + "epoch": 0.4906472374636424, + "grad_norm": 0.1885480433702469, + "learning_rate": 9.370369007729697e-06, + "loss": 0.0025, + "step": 76500 + }, + { + "epoch": 0.4907113743574285, + "grad_norm": 0.016325635835528374, + "learning_rate": 9.370097080280118e-06, + "loss": 0.0045, + "step": 76510 + }, + { + "epoch": 0.4907755112512146, + "grad_norm": 0.21638783812522888, + "learning_rate": 9.3698250980701e-06, + "loss": 0.0038, + "step": 76520 + }, + { + "epoch": 0.4908396481450007, + "grad_norm": 0.13634824752807617, + "learning_rate": 9.36955306110305e-06, + "loss": 0.0066, + "step": 76530 + }, + { + "epoch": 0.4909037850387868, + "grad_norm": 0.012613404542207718, + "learning_rate": 9.369280969382378e-06, + "loss": 0.006, + "step": 76540 + }, + { + "epoch": 0.4909679219325729, + "grad_norm": 0.09728101640939713, + "learning_rate": 9.369008822911492e-06, + "loss": 0.0037, + "step": 76550 + }, + { + "epoch": 0.491032058826359, + "grad_norm": 0.06714236736297607, + "learning_rate": 9.368736621693803e-06, + "loss": 0.004, + "step": 76560 + }, + { + "epoch": 0.49109619572014507, + "grad_norm": 0.08015460520982742, + "learning_rate": 9.368464365732721e-06, + "loss": 0.0041, + "step": 76570 + }, + { + "epoch": 0.49116033261393116, + "grad_norm": 0.3189891278743744, + "learning_rate": 9.36819205503166e-06, + "loss": 0.0028, + "step": 76580 + }, + { + "epoch": 0.49122446950771725, + "grad_norm": 0.06811632215976715, + "learning_rate": 9.367919689594031e-06, + "loss": 0.0028, + "step": 76590 + }, + { + "epoch": 0.49128860640150335, + "grad_norm": 0.36249682307243347, + "learning_rate": 9.367647269423246e-06, + "loss": 0.003, + "step": 76600 + }, + { + "epoch": 0.49135274329528944, + "grad_norm": 0.2812708616256714, + "learning_rate": 9.367374794522719e-06, + "loss": 0.004, + "step": 76610 + }, + { + "epoch": 0.4914168801890756, + "grad_norm": 0.19123730063438416, + "learning_rate": 9.367102264895864e-06, + "loss": 0.0042, + "step": 76620 + }, + { + "epoch": 0.4914810170828617, + "grad_norm": 0.10681630671024323, + "learning_rate": 9.366829680546096e-06, + "loss": 0.0028, + "step": 76630 + }, + { + "epoch": 0.4915451539766478, + "grad_norm": 0.11751936376094818, + "learning_rate": 9.366557041476832e-06, + "loss": 0.0023, + "step": 76640 + }, + { + "epoch": 0.49160929087043387, + "grad_norm": 0.2586619555950165, + "learning_rate": 9.366284347691489e-06, + "loss": 0.0028, + "step": 76650 + }, + { + "epoch": 0.49167342776421996, + "grad_norm": 0.09923292696475983, + "learning_rate": 9.36601159919348e-06, + "loss": 0.005, + "step": 76660 + }, + { + "epoch": 0.49173756465800605, + "grad_norm": 0.07947537302970886, + "learning_rate": 9.365738795986227e-06, + "loss": 0.0036, + "step": 76670 + }, + { + "epoch": 0.49180170155179215, + "grad_norm": 0.2412528395652771, + "learning_rate": 9.365465938073146e-06, + "loss": 0.0043, + "step": 76680 + }, + { + "epoch": 0.49186583844557824, + "grad_norm": 0.03592299297451973, + "learning_rate": 9.365193025457657e-06, + "loss": 0.0037, + "step": 76690 + }, + { + "epoch": 0.49192997533936433, + "grad_norm": 0.3043968677520752, + "learning_rate": 9.364920058143181e-06, + "loss": 0.0064, + "step": 76700 + }, + { + "epoch": 0.4919941122331504, + "grad_norm": 0.10949330776929855, + "learning_rate": 9.364647036133135e-06, + "loss": 0.0043, + "step": 76710 + }, + { + "epoch": 0.4920582491269365, + "grad_norm": 0.07797112315893173, + "learning_rate": 9.364373959430944e-06, + "loss": 0.0053, + "step": 76720 + }, + { + "epoch": 0.4921223860207226, + "grad_norm": 0.28760960698127747, + "learning_rate": 9.364100828040026e-06, + "loss": 0.0062, + "step": 76730 + }, + { + "epoch": 0.4921865229145087, + "grad_norm": 0.20983117818832397, + "learning_rate": 9.363827641963808e-06, + "loss": 0.0032, + "step": 76740 + }, + { + "epoch": 0.4922506598082948, + "grad_norm": 0.06841584295034409, + "learning_rate": 9.36355440120571e-06, + "loss": 0.0048, + "step": 76750 + }, + { + "epoch": 0.49231479670208095, + "grad_norm": 0.1825391948223114, + "learning_rate": 9.363281105769155e-06, + "loss": 0.003, + "step": 76760 + }, + { + "epoch": 0.49237893359586704, + "grad_norm": 0.19833020865917206, + "learning_rate": 9.363007755657571e-06, + "loss": 0.0048, + "step": 76770 + }, + { + "epoch": 0.49244307048965313, + "grad_norm": 0.1410582810640335, + "learning_rate": 9.362734350874382e-06, + "loss": 0.0018, + "step": 76780 + }, + { + "epoch": 0.4925072073834392, + "grad_norm": 0.1286795288324356, + "learning_rate": 9.362460891423013e-06, + "loss": 0.0031, + "step": 76790 + }, + { + "epoch": 0.4925713442772253, + "grad_norm": 0.17199252545833588, + "learning_rate": 9.362187377306892e-06, + "loss": 0.0035, + "step": 76800 + }, + { + "epoch": 0.4926354811710114, + "grad_norm": 0.2555850148200989, + "learning_rate": 9.361913808529443e-06, + "loss": 0.0046, + "step": 76810 + }, + { + "epoch": 0.4926996180647975, + "grad_norm": 0.1507069617509842, + "learning_rate": 9.3616401850941e-06, + "loss": 0.0037, + "step": 76820 + }, + { + "epoch": 0.4927637549585836, + "grad_norm": 0.20394714176654816, + "learning_rate": 9.361366507004286e-06, + "loss": 0.0026, + "step": 76830 + }, + { + "epoch": 0.4928278918523697, + "grad_norm": 0.26713767647743225, + "learning_rate": 9.361092774263434e-06, + "loss": 0.0076, + "step": 76840 + }, + { + "epoch": 0.4928920287461558, + "grad_norm": 0.07934827357530594, + "learning_rate": 9.360818986874971e-06, + "loss": 0.0044, + "step": 76850 + }, + { + "epoch": 0.4929561656399419, + "grad_norm": 0.13926687836647034, + "learning_rate": 9.360545144842332e-06, + "loss": 0.0036, + "step": 76860 + }, + { + "epoch": 0.49302030253372797, + "grad_norm": 0.09511993080377579, + "learning_rate": 9.360271248168944e-06, + "loss": 0.0032, + "step": 76870 + }, + { + "epoch": 0.49308443942751407, + "grad_norm": 0.28910213708877563, + "learning_rate": 9.359997296858241e-06, + "loss": 0.0044, + "step": 76880 + }, + { + "epoch": 0.49314857632130016, + "grad_norm": 0.10169561207294464, + "learning_rate": 9.359723290913656e-06, + "loss": 0.0045, + "step": 76890 + }, + { + "epoch": 0.4932127132150863, + "grad_norm": 0.1778484284877777, + "learning_rate": 9.359449230338622e-06, + "loss": 0.0031, + "step": 76900 + }, + { + "epoch": 0.4932768501088724, + "grad_norm": 0.44378116726875305, + "learning_rate": 9.359175115136575e-06, + "loss": 0.0032, + "step": 76910 + }, + { + "epoch": 0.4933409870026585, + "grad_norm": 0.31982800364494324, + "learning_rate": 9.358900945310947e-06, + "loss": 0.0047, + "step": 76920 + }, + { + "epoch": 0.4934051238964446, + "grad_norm": 0.11058761179447174, + "learning_rate": 9.358626720865176e-06, + "loss": 0.0049, + "step": 76930 + }, + { + "epoch": 0.4934692607902307, + "grad_norm": 0.28282925486564636, + "learning_rate": 9.358352441802696e-06, + "loss": 0.0087, + "step": 76940 + }, + { + "epoch": 0.4935333976840168, + "grad_norm": 0.07016579061746597, + "learning_rate": 9.358078108126947e-06, + "loss": 0.0032, + "step": 76950 + }, + { + "epoch": 0.49359753457780287, + "grad_norm": 0.05021855607628822, + "learning_rate": 9.357803719841362e-06, + "loss": 0.002, + "step": 76960 + }, + { + "epoch": 0.49366167147158896, + "grad_norm": 0.24600309133529663, + "learning_rate": 9.357529276949383e-06, + "loss": 0.0036, + "step": 76970 + }, + { + "epoch": 0.49372580836537505, + "grad_norm": 0.21833530068397522, + "learning_rate": 9.357254779454448e-06, + "loss": 0.005, + "step": 76980 + }, + { + "epoch": 0.49378994525916114, + "grad_norm": 0.19952186942100525, + "learning_rate": 9.356980227359998e-06, + "loss": 0.007, + "step": 76990 + }, + { + "epoch": 0.49385408215294724, + "grad_norm": 0.11434321850538254, + "learning_rate": 9.356705620669469e-06, + "loss": 0.0049, + "step": 77000 + }, + { + "epoch": 0.49391821904673333, + "grad_norm": 0.1786748468875885, + "learning_rate": 9.356430959386307e-06, + "loss": 0.0046, + "step": 77010 + }, + { + "epoch": 0.4939823559405194, + "grad_norm": 0.364108145236969, + "learning_rate": 9.35615624351395e-06, + "loss": 0.0062, + "step": 77020 + }, + { + "epoch": 0.4940464928343055, + "grad_norm": 0.3673844337463379, + "learning_rate": 9.355881473055844e-06, + "loss": 0.005, + "step": 77030 + }, + { + "epoch": 0.49411062972809167, + "grad_norm": 0.11770845949649811, + "learning_rate": 9.355606648015428e-06, + "loss": 0.0039, + "step": 77040 + }, + { + "epoch": 0.49417476662187776, + "grad_norm": 0.022180555388331413, + "learning_rate": 9.355331768396148e-06, + "loss": 0.0039, + "step": 77050 + }, + { + "epoch": 0.49423890351566385, + "grad_norm": 0.1450425386428833, + "learning_rate": 9.35505683420145e-06, + "loss": 0.0024, + "step": 77060 + }, + { + "epoch": 0.49430304040944995, + "grad_norm": 0.0696171298623085, + "learning_rate": 9.354781845434774e-06, + "loss": 0.0028, + "step": 77070 + }, + { + "epoch": 0.49436717730323604, + "grad_norm": 0.12352907657623291, + "learning_rate": 9.354506802099572e-06, + "loss": 0.003, + "step": 77080 + }, + { + "epoch": 0.49443131419702213, + "grad_norm": 0.050051745027303696, + "learning_rate": 9.354231704199288e-06, + "loss": 0.0025, + "step": 77090 + }, + { + "epoch": 0.4944954510908082, + "grad_norm": 0.22080251574516296, + "learning_rate": 9.353956551737367e-06, + "loss": 0.0028, + "step": 77100 + }, + { + "epoch": 0.4945595879845943, + "grad_norm": 0.07721296697854996, + "learning_rate": 9.35368134471726e-06, + "loss": 0.0028, + "step": 77110 + }, + { + "epoch": 0.4946237248783804, + "grad_norm": 0.009079434908926487, + "learning_rate": 9.353406083142414e-06, + "loss": 0.0038, + "step": 77120 + }, + { + "epoch": 0.4946878617721665, + "grad_norm": 0.23502038419246674, + "learning_rate": 9.353130767016278e-06, + "loss": 0.0037, + "step": 77130 + }, + { + "epoch": 0.4947519986659526, + "grad_norm": 0.0968853309750557, + "learning_rate": 9.352855396342302e-06, + "loss": 0.004, + "step": 77140 + }, + { + "epoch": 0.4948161355597387, + "grad_norm": 0.16828656196594238, + "learning_rate": 9.352579971123938e-06, + "loss": 0.0026, + "step": 77150 + }, + { + "epoch": 0.4948802724535248, + "grad_norm": 0.1029152199625969, + "learning_rate": 9.352304491364636e-06, + "loss": 0.0062, + "step": 77160 + }, + { + "epoch": 0.4949444093473109, + "grad_norm": 0.12365434318780899, + "learning_rate": 9.352028957067848e-06, + "loss": 0.0036, + "step": 77170 + }, + { + "epoch": 0.495008546241097, + "grad_norm": 0.1805618852376938, + "learning_rate": 9.351753368237027e-06, + "loss": 0.0028, + "step": 77180 + }, + { + "epoch": 0.4950726831348831, + "grad_norm": 0.16249337792396545, + "learning_rate": 9.351477724875623e-06, + "loss": 0.0026, + "step": 77190 + }, + { + "epoch": 0.4951368200286692, + "grad_norm": 0.07809522747993469, + "learning_rate": 9.351202026987098e-06, + "loss": 0.0026, + "step": 77200 + }, + { + "epoch": 0.4952009569224553, + "grad_norm": 0.06574942171573639, + "learning_rate": 9.3509262745749e-06, + "loss": 0.0046, + "step": 77210 + }, + { + "epoch": 0.4952650938162414, + "grad_norm": 0.05719909444451332, + "learning_rate": 9.350650467642486e-06, + "loss": 0.0032, + "step": 77220 + }, + { + "epoch": 0.4953292307100275, + "grad_norm": 0.016466276720166206, + "learning_rate": 9.350374606193311e-06, + "loss": 0.0032, + "step": 77230 + }, + { + "epoch": 0.4953933676038136, + "grad_norm": 0.1982014775276184, + "learning_rate": 9.350098690230835e-06, + "loss": 0.0014, + "step": 77240 + }, + { + "epoch": 0.4954575044975997, + "grad_norm": 0.3372277021408081, + "learning_rate": 9.349822719758514e-06, + "loss": 0.0041, + "step": 77250 + }, + { + "epoch": 0.49552164139138577, + "grad_norm": 0.07121748477220535, + "learning_rate": 9.349546694779803e-06, + "loss": 0.0026, + "step": 77260 + }, + { + "epoch": 0.49558577828517186, + "grad_norm": 0.18666061758995056, + "learning_rate": 9.349270615298165e-06, + "loss": 0.0032, + "step": 77270 + }, + { + "epoch": 0.49564991517895796, + "grad_norm": 0.4052756428718567, + "learning_rate": 9.348994481317057e-06, + "loss": 0.0041, + "step": 77280 + }, + { + "epoch": 0.49571405207274405, + "grad_norm": 0.32904452085494995, + "learning_rate": 9.34871829283994e-06, + "loss": 0.0069, + "step": 77290 + }, + { + "epoch": 0.49577818896653014, + "grad_norm": 0.052296873182058334, + "learning_rate": 9.348442049870276e-06, + "loss": 0.0026, + "step": 77300 + }, + { + "epoch": 0.49584232586031624, + "grad_norm": 0.27496102452278137, + "learning_rate": 9.348165752411524e-06, + "loss": 0.0044, + "step": 77310 + }, + { + "epoch": 0.49590646275410233, + "grad_norm": 0.24938185513019562, + "learning_rate": 9.34788940046715e-06, + "loss": 0.0024, + "step": 77320 + }, + { + "epoch": 0.4959705996478885, + "grad_norm": 0.1310003399848938, + "learning_rate": 9.34761299404061e-06, + "loss": 0.0045, + "step": 77330 + }, + { + "epoch": 0.49603473654167457, + "grad_norm": 0.18255215883255005, + "learning_rate": 9.347336533135376e-06, + "loss": 0.0033, + "step": 77340 + }, + { + "epoch": 0.49609887343546066, + "grad_norm": 0.12947233021259308, + "learning_rate": 9.347060017754908e-06, + "loss": 0.0058, + "step": 77350 + }, + { + "epoch": 0.49616301032924676, + "grad_norm": 0.0997142493724823, + "learning_rate": 9.34678344790267e-06, + "loss": 0.0017, + "step": 77360 + }, + { + "epoch": 0.49622714722303285, + "grad_norm": 0.0739133283495903, + "learning_rate": 9.346506823582128e-06, + "loss": 0.0025, + "step": 77370 + }, + { + "epoch": 0.49629128411681894, + "grad_norm": 0.14539377391338348, + "learning_rate": 9.34623014479675e-06, + "loss": 0.0032, + "step": 77380 + }, + { + "epoch": 0.49635542101060504, + "grad_norm": 0.1064482033252716, + "learning_rate": 9.345953411550002e-06, + "loss": 0.004, + "step": 77390 + }, + { + "epoch": 0.49641955790439113, + "grad_norm": 0.19258669018745422, + "learning_rate": 9.345676623845351e-06, + "loss": 0.0018, + "step": 77400 + }, + { + "epoch": 0.4964836947981772, + "grad_norm": 0.1373535841703415, + "learning_rate": 9.345399781686267e-06, + "loss": 0.0043, + "step": 77410 + }, + { + "epoch": 0.4965478316919633, + "grad_norm": 0.2516781687736511, + "learning_rate": 9.345122885076219e-06, + "loss": 0.0044, + "step": 77420 + }, + { + "epoch": 0.4966119685857494, + "grad_norm": 0.1292618066072464, + "learning_rate": 9.344845934018674e-06, + "loss": 0.0023, + "step": 77430 + }, + { + "epoch": 0.4966761054795355, + "grad_norm": 0.2453163117170334, + "learning_rate": 9.344568928517105e-06, + "loss": 0.0071, + "step": 77440 + }, + { + "epoch": 0.4967402423733216, + "grad_norm": 0.07375278323888779, + "learning_rate": 9.344291868574982e-06, + "loss": 0.0041, + "step": 77450 + }, + { + "epoch": 0.4968043792671077, + "grad_norm": 0.1308964341878891, + "learning_rate": 9.344014754195779e-06, + "loss": 0.0023, + "step": 77460 + }, + { + "epoch": 0.49686851616089384, + "grad_norm": 0.10841777175664902, + "learning_rate": 9.343737585382963e-06, + "loss": 0.0035, + "step": 77470 + }, + { + "epoch": 0.49693265305467993, + "grad_norm": 0.16187961399555206, + "learning_rate": 9.343460362140014e-06, + "loss": 0.0029, + "step": 77480 + }, + { + "epoch": 0.496996789948466, + "grad_norm": 0.14524035155773163, + "learning_rate": 9.3431830844704e-06, + "loss": 0.0065, + "step": 77490 + }, + { + "epoch": 0.4970609268422521, + "grad_norm": 0.23628103733062744, + "learning_rate": 9.342905752377598e-06, + "loss": 0.0075, + "step": 77500 + }, + { + "epoch": 0.4971250637360382, + "grad_norm": 0.2749999761581421, + "learning_rate": 9.342628365865084e-06, + "loss": 0.0027, + "step": 77510 + }, + { + "epoch": 0.4971892006298243, + "grad_norm": 0.2397809773683548, + "learning_rate": 9.342350924936335e-06, + "loss": 0.0052, + "step": 77520 + }, + { + "epoch": 0.4972533375236104, + "grad_norm": 0.2140444964170456, + "learning_rate": 9.342073429594822e-06, + "loss": 0.0051, + "step": 77530 + }, + { + "epoch": 0.4973174744173965, + "grad_norm": 0.19114813208580017, + "learning_rate": 9.341795879844026e-06, + "loss": 0.0068, + "step": 77540 + }, + { + "epoch": 0.4973816113111826, + "grad_norm": 0.09587656706571579, + "learning_rate": 9.341518275687426e-06, + "loss": 0.0027, + "step": 77550 + }, + { + "epoch": 0.4974457482049687, + "grad_norm": 0.10873784869909286, + "learning_rate": 9.341240617128499e-06, + "loss": 0.003, + "step": 77560 + }, + { + "epoch": 0.49750988509875477, + "grad_norm": 0.290728360414505, + "learning_rate": 9.340962904170726e-06, + "loss": 0.0034, + "step": 77570 + }, + { + "epoch": 0.49757402199254086, + "grad_norm": 0.0844135656952858, + "learning_rate": 9.340685136817582e-06, + "loss": 0.003, + "step": 77580 + }, + { + "epoch": 0.49763815888632695, + "grad_norm": 0.3078419268131256, + "learning_rate": 9.340407315072553e-06, + "loss": 0.0036, + "step": 77590 + }, + { + "epoch": 0.49770229578011305, + "grad_norm": 0.08919071406126022, + "learning_rate": 9.340129438939119e-06, + "loss": 0.003, + "step": 77600 + }, + { + "epoch": 0.4977664326738992, + "grad_norm": 0.18456967175006866, + "learning_rate": 9.33985150842076e-06, + "loss": 0.0025, + "step": 77610 + }, + { + "epoch": 0.4978305695676853, + "grad_norm": 0.24292078614234924, + "learning_rate": 9.33957352352096e-06, + "loss": 0.0036, + "step": 77620 + }, + { + "epoch": 0.4978947064614714, + "grad_norm": 0.09577593952417374, + "learning_rate": 9.339295484243203e-06, + "loss": 0.0022, + "step": 77630 + }, + { + "epoch": 0.4979588433552575, + "grad_norm": 0.08392419666051865, + "learning_rate": 9.339017390590971e-06, + "loss": 0.0024, + "step": 77640 + }, + { + "epoch": 0.49802298024904357, + "grad_norm": 0.20557403564453125, + "learning_rate": 9.338739242567752e-06, + "loss": 0.0048, + "step": 77650 + }, + { + "epoch": 0.49808711714282966, + "grad_norm": 0.06604457646608353, + "learning_rate": 9.338461040177026e-06, + "loss": 0.0032, + "step": 77660 + }, + { + "epoch": 0.49815125403661575, + "grad_norm": 0.055695466697216034, + "learning_rate": 9.338182783422286e-06, + "loss": 0.0026, + "step": 77670 + }, + { + "epoch": 0.49821539093040185, + "grad_norm": 0.2561768591403961, + "learning_rate": 9.337904472307013e-06, + "loss": 0.0034, + "step": 77680 + }, + { + "epoch": 0.49827952782418794, + "grad_norm": 0.07979772239923477, + "learning_rate": 9.337626106834698e-06, + "loss": 0.0041, + "step": 77690 + }, + { + "epoch": 0.49834366471797403, + "grad_norm": 0.2786787748336792, + "learning_rate": 9.337347687008828e-06, + "loss": 0.0046, + "step": 77700 + }, + { + "epoch": 0.4984078016117601, + "grad_norm": 0.09194999188184738, + "learning_rate": 9.337069212832892e-06, + "loss": 0.004, + "step": 77710 + }, + { + "epoch": 0.4984719385055462, + "grad_norm": 0.1742803454399109, + "learning_rate": 9.336790684310377e-06, + "loss": 0.0035, + "step": 77720 + }, + { + "epoch": 0.4985360753993323, + "grad_norm": 0.11242389678955078, + "learning_rate": 9.336512101444776e-06, + "loss": 0.0046, + "step": 77730 + }, + { + "epoch": 0.4986002122931184, + "grad_norm": 0.1943175494670868, + "learning_rate": 9.33623346423958e-06, + "loss": 0.0036, + "step": 77740 + }, + { + "epoch": 0.49866434918690455, + "grad_norm": 0.07592809945344925, + "learning_rate": 9.335954772698282e-06, + "loss": 0.002, + "step": 77750 + }, + { + "epoch": 0.49872848608069065, + "grad_norm": 0.09480135142803192, + "learning_rate": 9.335676026824367e-06, + "loss": 0.0034, + "step": 77760 + }, + { + "epoch": 0.49879262297447674, + "grad_norm": 0.041317373514175415, + "learning_rate": 9.335397226621336e-06, + "loss": 0.0033, + "step": 77770 + }, + { + "epoch": 0.49885675986826283, + "grad_norm": 0.09598638862371445, + "learning_rate": 9.335118372092679e-06, + "loss": 0.0027, + "step": 77780 + }, + { + "epoch": 0.4989208967620489, + "grad_norm": 0.18568679690361023, + "learning_rate": 9.33483946324189e-06, + "loss": 0.0036, + "step": 77790 + }, + { + "epoch": 0.498985033655835, + "grad_norm": 0.04725318402051926, + "learning_rate": 9.334560500072463e-06, + "loss": 0.0032, + "step": 77800 + }, + { + "epoch": 0.4990491705496211, + "grad_norm": 0.08631976693868637, + "learning_rate": 9.334281482587897e-06, + "loss": 0.0095, + "step": 77810 + }, + { + "epoch": 0.4991133074434072, + "grad_norm": 0.13214997947216034, + "learning_rate": 9.334002410791685e-06, + "loss": 0.0038, + "step": 77820 + }, + { + "epoch": 0.4991774443371933, + "grad_norm": 0.10650306940078735, + "learning_rate": 9.333723284687326e-06, + "loss": 0.0052, + "step": 77830 + }, + { + "epoch": 0.4992415812309794, + "grad_norm": 0.2304917722940445, + "learning_rate": 9.333444104278317e-06, + "loss": 0.0055, + "step": 77840 + }, + { + "epoch": 0.4993057181247655, + "grad_norm": 0.3047102987766266, + "learning_rate": 9.333164869568156e-06, + "loss": 0.0041, + "step": 77850 + }, + { + "epoch": 0.4993698550185516, + "grad_norm": 0.2160777449607849, + "learning_rate": 9.332885580560342e-06, + "loss": 0.0046, + "step": 77860 + }, + { + "epoch": 0.49943399191233767, + "grad_norm": 0.17151705920696259, + "learning_rate": 9.332606237258376e-06, + "loss": 0.0071, + "step": 77870 + }, + { + "epoch": 0.49949812880612376, + "grad_norm": 0.12222845107316971, + "learning_rate": 9.332326839665758e-06, + "loss": 0.0034, + "step": 77880 + }, + { + "epoch": 0.4995622656999099, + "grad_norm": 0.07964403182268143, + "learning_rate": 9.332047387785988e-06, + "loss": 0.004, + "step": 77890 + }, + { + "epoch": 0.499626402593696, + "grad_norm": 0.10945140570402145, + "learning_rate": 9.331767881622567e-06, + "loss": 0.0039, + "step": 77900 + }, + { + "epoch": 0.4996905394874821, + "grad_norm": 0.023508165031671524, + "learning_rate": 9.331488321178999e-06, + "loss": 0.0034, + "step": 77910 + }, + { + "epoch": 0.4997546763812682, + "grad_norm": 0.11984086781740189, + "learning_rate": 9.331208706458787e-06, + "loss": 0.005, + "step": 77920 + }, + { + "epoch": 0.4998188132750543, + "grad_norm": 0.11363513022661209, + "learning_rate": 9.330929037465435e-06, + "loss": 0.0029, + "step": 77930 + }, + { + "epoch": 0.4998829501688404, + "grad_norm": 0.060195907950401306, + "learning_rate": 9.330649314202444e-06, + "loss": 0.003, + "step": 77940 + }, + { + "epoch": 0.49994708706262647, + "grad_norm": 0.05659855902194977, + "learning_rate": 9.330369536673324e-06, + "loss": 0.003, + "step": 77950 + }, + { + "epoch": 0.5000112239564126, + "grad_norm": 0.48760223388671875, + "learning_rate": 9.33008970488158e-06, + "loss": 0.0055, + "step": 77960 + }, + { + "epoch": 0.5000753608501987, + "grad_norm": 0.12812398374080658, + "learning_rate": 9.329809818830717e-06, + "loss": 0.0027, + "step": 77970 + }, + { + "epoch": 0.5001394977439848, + "grad_norm": 1.885194182395935, + "learning_rate": 9.329529878524242e-06, + "loss": 0.0035, + "step": 77980 + }, + { + "epoch": 0.5002036346377708, + "grad_norm": 0.2891188859939575, + "learning_rate": 9.329249883965663e-06, + "loss": 0.0052, + "step": 77990 + }, + { + "epoch": 0.500267771531557, + "grad_norm": 0.15436619520187378, + "learning_rate": 9.328969835158489e-06, + "loss": 0.004, + "step": 78000 + }, + { + "epoch": 0.500331908425343, + "grad_norm": 0.32490304112434387, + "learning_rate": 9.328689732106229e-06, + "loss": 0.0038, + "step": 78010 + }, + { + "epoch": 0.5003960453191292, + "grad_norm": 0.1172533929347992, + "learning_rate": 9.328409574812394e-06, + "loss": 0.0028, + "step": 78020 + }, + { + "epoch": 0.5004601822129152, + "grad_norm": 0.1780451238155365, + "learning_rate": 9.328129363280492e-06, + "loss": 0.0029, + "step": 78030 + }, + { + "epoch": 0.5005243191067014, + "grad_norm": 0.13680961728096008, + "learning_rate": 9.327849097514038e-06, + "loss": 0.0041, + "step": 78040 + }, + { + "epoch": 0.5005884560004874, + "grad_norm": 0.04050092399120331, + "learning_rate": 9.327568777516538e-06, + "loss": 0.0049, + "step": 78050 + }, + { + "epoch": 0.5006525928942736, + "grad_norm": 0.14011479914188385, + "learning_rate": 9.32728840329151e-06, + "loss": 0.004, + "step": 78060 + }, + { + "epoch": 0.5007167297880596, + "grad_norm": 0.06274769455194473, + "learning_rate": 9.327007974842468e-06, + "loss": 0.0023, + "step": 78070 + }, + { + "epoch": 0.5007808666818457, + "grad_norm": 0.1820869743824005, + "learning_rate": 9.326727492172921e-06, + "loss": 0.003, + "step": 78080 + }, + { + "epoch": 0.5008450035756318, + "grad_norm": 0.08584950864315033, + "learning_rate": 9.326446955286387e-06, + "loss": 0.0045, + "step": 78090 + }, + { + "epoch": 0.5009091404694179, + "grad_norm": 0.1383316069841385, + "learning_rate": 9.32616636418638e-06, + "loss": 0.0029, + "step": 78100 + }, + { + "epoch": 0.5009732773632041, + "grad_norm": 0.23082928359508514, + "learning_rate": 9.325885718876419e-06, + "loss": 0.0041, + "step": 78110 + }, + { + "epoch": 0.5010374142569901, + "grad_norm": 0.16334521770477295, + "learning_rate": 9.325605019360015e-06, + "loss": 0.0033, + "step": 78120 + }, + { + "epoch": 0.5011015511507763, + "grad_norm": 0.08774948865175247, + "learning_rate": 9.325324265640692e-06, + "loss": 0.0035, + "step": 78130 + }, + { + "epoch": 0.5011656880445623, + "grad_norm": 0.1896505355834961, + "learning_rate": 9.325043457721964e-06, + "loss": 0.0027, + "step": 78140 + }, + { + "epoch": 0.5012298249383484, + "grad_norm": 0.07469774037599564, + "learning_rate": 9.324762595607348e-06, + "loss": 0.0026, + "step": 78150 + }, + { + "epoch": 0.5012939618321345, + "grad_norm": 0.13461455702781677, + "learning_rate": 9.324481679300366e-06, + "loss": 0.0036, + "step": 78160 + }, + { + "epoch": 0.5013580987259206, + "grad_norm": 0.12720540165901184, + "learning_rate": 9.32420070880454e-06, + "loss": 0.0058, + "step": 78170 + }, + { + "epoch": 0.5014222356197067, + "grad_norm": 0.128628209233284, + "learning_rate": 9.323919684123388e-06, + "loss": 0.004, + "step": 78180 + }, + { + "epoch": 0.5014863725134928, + "grad_norm": 0.22741807997226715, + "learning_rate": 9.323638605260432e-06, + "loss": 0.0019, + "step": 78190 + }, + { + "epoch": 0.5015505094072789, + "grad_norm": 0.16844815015792847, + "learning_rate": 9.323357472219195e-06, + "loss": 0.0042, + "step": 78200 + }, + { + "epoch": 0.501614646301065, + "grad_norm": 0.11421308666467667, + "learning_rate": 9.323076285003197e-06, + "loss": 0.0021, + "step": 78210 + }, + { + "epoch": 0.501678783194851, + "grad_norm": 0.12311401963233948, + "learning_rate": 9.322795043615964e-06, + "loss": 0.0026, + "step": 78220 + }, + { + "epoch": 0.5017429200886372, + "grad_norm": 0.18353450298309326, + "learning_rate": 9.32251374806102e-06, + "loss": 0.0024, + "step": 78230 + }, + { + "epoch": 0.5018070569824233, + "grad_norm": 0.24150238931179047, + "learning_rate": 9.32223239834189e-06, + "loss": 0.0039, + "step": 78240 + }, + { + "epoch": 0.5018711938762094, + "grad_norm": 0.1451845020055771, + "learning_rate": 9.3219509944621e-06, + "loss": 0.0032, + "step": 78250 + }, + { + "epoch": 0.5019353307699955, + "grad_norm": 0.1299157738685608, + "learning_rate": 9.321669536425172e-06, + "loss": 0.0024, + "step": 78260 + }, + { + "epoch": 0.5019994676637816, + "grad_norm": 0.10516326129436493, + "learning_rate": 9.321388024234638e-06, + "loss": 0.0033, + "step": 78270 + }, + { + "epoch": 0.5020636045575677, + "grad_norm": 0.26224756240844727, + "learning_rate": 9.321106457894023e-06, + "loss": 0.0031, + "step": 78280 + }, + { + "epoch": 0.5021277414513537, + "grad_norm": 0.16212397813796997, + "learning_rate": 9.320824837406856e-06, + "loss": 0.0017, + "step": 78290 + }, + { + "epoch": 0.5021918783451399, + "grad_norm": 0.1332699954509735, + "learning_rate": 9.320543162776667e-06, + "loss": 0.0041, + "step": 78300 + }, + { + "epoch": 0.5022560152389259, + "grad_norm": 0.06167587637901306, + "learning_rate": 9.320261434006983e-06, + "loss": 0.0032, + "step": 78310 + }, + { + "epoch": 0.5023201521327121, + "grad_norm": 0.03406332805752754, + "learning_rate": 9.319979651101336e-06, + "loss": 0.0027, + "step": 78320 + }, + { + "epoch": 0.5023842890264981, + "grad_norm": 0.24660973250865936, + "learning_rate": 9.319697814063257e-06, + "loss": 0.0038, + "step": 78330 + }, + { + "epoch": 0.5024484259202843, + "grad_norm": 0.0480784997344017, + "learning_rate": 9.319415922896278e-06, + "loss": 0.0028, + "step": 78340 + }, + { + "epoch": 0.5025125628140703, + "grad_norm": 0.07655072212219238, + "learning_rate": 9.319133977603929e-06, + "loss": 0.0044, + "step": 78350 + }, + { + "epoch": 0.5025766997078565, + "grad_norm": 0.15556862950325012, + "learning_rate": 9.318851978189744e-06, + "loss": 0.0043, + "step": 78360 + }, + { + "epoch": 0.5026408366016425, + "grad_norm": 0.19853724539279938, + "learning_rate": 9.318569924657257e-06, + "loss": 0.004, + "step": 78370 + }, + { + "epoch": 0.5027049734954286, + "grad_norm": 0.12763874232769012, + "learning_rate": 9.318287817010003e-06, + "loss": 0.003, + "step": 78380 + }, + { + "epoch": 0.5027691103892148, + "grad_norm": 0.16955415904521942, + "learning_rate": 9.318005655251517e-06, + "loss": 0.0072, + "step": 78390 + }, + { + "epoch": 0.5028332472830008, + "grad_norm": 0.20948928594589233, + "learning_rate": 9.317723439385333e-06, + "loss": 0.0033, + "step": 78400 + }, + { + "epoch": 0.502897384176787, + "grad_norm": 0.205460324883461, + "learning_rate": 9.317441169414989e-06, + "loss": 0.004, + "step": 78410 + }, + { + "epoch": 0.502961521070573, + "grad_norm": 0.10899683088064194, + "learning_rate": 9.317158845344022e-06, + "loss": 0.004, + "step": 78420 + }, + { + "epoch": 0.5030256579643592, + "grad_norm": 0.1819201558828354, + "learning_rate": 9.316876467175969e-06, + "loss": 0.009, + "step": 78430 + }, + { + "epoch": 0.5030897948581452, + "grad_norm": 0.1324969232082367, + "learning_rate": 9.316594034914368e-06, + "loss": 0.003, + "step": 78440 + }, + { + "epoch": 0.5031539317519313, + "grad_norm": 0.11342664808034897, + "learning_rate": 9.31631154856276e-06, + "loss": 0.006, + "step": 78450 + }, + { + "epoch": 0.5032180686457174, + "grad_norm": 0.09595182538032532, + "learning_rate": 9.316029008124682e-06, + "loss": 0.0038, + "step": 78460 + }, + { + "epoch": 0.5032822055395035, + "grad_norm": 0.26570847630500793, + "learning_rate": 9.315746413603676e-06, + "loss": 0.0034, + "step": 78470 + }, + { + "epoch": 0.5033463424332896, + "grad_norm": 0.14782577753067017, + "learning_rate": 9.315463765003283e-06, + "loss": 0.003, + "step": 78480 + }, + { + "epoch": 0.5034104793270757, + "grad_norm": 0.20060312747955322, + "learning_rate": 9.315181062327046e-06, + "loss": 0.003, + "step": 78490 + }, + { + "epoch": 0.5034746162208618, + "grad_norm": 0.15654371678829193, + "learning_rate": 9.314898305578505e-06, + "loss": 0.0037, + "step": 78500 + }, + { + "epoch": 0.5035387531146479, + "grad_norm": 0.05840962380170822, + "learning_rate": 9.314615494761206e-06, + "loss": 0.0029, + "step": 78510 + }, + { + "epoch": 0.5036028900084341, + "grad_norm": 0.1299424022436142, + "learning_rate": 9.314332629878691e-06, + "loss": 0.0042, + "step": 78520 + }, + { + "epoch": 0.5036670269022201, + "grad_norm": 0.2413955181837082, + "learning_rate": 9.314049710934504e-06, + "loss": 0.0053, + "step": 78530 + }, + { + "epoch": 0.5037311637960062, + "grad_norm": 0.05778886750340462, + "learning_rate": 9.31376673793219e-06, + "loss": 0.0028, + "step": 78540 + }, + { + "epoch": 0.5037953006897923, + "grad_norm": 0.015478234738111496, + "learning_rate": 9.313483710875298e-06, + "loss": 0.0019, + "step": 78550 + }, + { + "epoch": 0.5038594375835784, + "grad_norm": 0.07730729877948761, + "learning_rate": 9.313200629767371e-06, + "loss": 0.0046, + "step": 78560 + }, + { + "epoch": 0.5039235744773645, + "grad_norm": 0.0688692033290863, + "learning_rate": 9.312917494611957e-06, + "loss": 0.0039, + "step": 78570 + }, + { + "epoch": 0.5039877113711506, + "grad_norm": 0.29817402362823486, + "learning_rate": 9.312634305412604e-06, + "loss": 0.0118, + "step": 78580 + }, + { + "epoch": 0.5040518482649367, + "grad_norm": 0.09822961688041687, + "learning_rate": 9.312351062172862e-06, + "loss": 0.0036, + "step": 78590 + }, + { + "epoch": 0.5041159851587228, + "grad_norm": 0.2513461410999298, + "learning_rate": 9.312067764896279e-06, + "loss": 0.0049, + "step": 78600 + }, + { + "epoch": 0.5041801220525088, + "grad_norm": 0.13050639629364014, + "learning_rate": 9.311784413586407e-06, + "loss": 0.0034, + "step": 78610 + }, + { + "epoch": 0.504244258946295, + "grad_norm": 0.44473275542259216, + "learning_rate": 9.311501008246792e-06, + "loss": 0.0056, + "step": 78620 + }, + { + "epoch": 0.504308395840081, + "grad_norm": 0.07539486885070801, + "learning_rate": 9.311217548880988e-06, + "loss": 0.0031, + "step": 78630 + }, + { + "epoch": 0.5043725327338672, + "grad_norm": 0.0902412086725235, + "learning_rate": 9.31093403549255e-06, + "loss": 0.0031, + "step": 78640 + }, + { + "epoch": 0.5044366696276532, + "grad_norm": 0.15105240046977997, + "learning_rate": 9.310650468085023e-06, + "loss": 0.0075, + "step": 78650 + }, + { + "epoch": 0.5045008065214394, + "grad_norm": 0.15025536715984344, + "learning_rate": 9.310366846661969e-06, + "loss": 0.0035, + "step": 78660 + }, + { + "epoch": 0.5045649434152255, + "grad_norm": 0.18562453985214233, + "learning_rate": 9.310083171226935e-06, + "loss": 0.0054, + "step": 78670 + }, + { + "epoch": 0.5046290803090115, + "grad_norm": 0.2005065530538559, + "learning_rate": 9.30979944178348e-06, + "loss": 0.0048, + "step": 78680 + }, + { + "epoch": 0.5046932172027977, + "grad_norm": 0.08502445369958878, + "learning_rate": 9.309515658335158e-06, + "loss": 0.0038, + "step": 78690 + }, + { + "epoch": 0.5047573540965837, + "grad_norm": 0.06351223587989807, + "learning_rate": 9.309231820885523e-06, + "loss": 0.0024, + "step": 78700 + }, + { + "epoch": 0.5048214909903699, + "grad_norm": 0.13348788022994995, + "learning_rate": 9.308947929438135e-06, + "loss": 0.003, + "step": 78710 + }, + { + "epoch": 0.5048856278841559, + "grad_norm": 0.08967173844575882, + "learning_rate": 9.30866398399655e-06, + "loss": 0.0032, + "step": 78720 + }, + { + "epoch": 0.5049497647779421, + "grad_norm": 0.20979301631450653, + "learning_rate": 9.308379984564326e-06, + "loss": 0.0043, + "step": 78730 + }, + { + "epoch": 0.5050139016717281, + "grad_norm": 0.18976452946662903, + "learning_rate": 9.308095931145021e-06, + "loss": 0.0068, + "step": 78740 + }, + { + "epoch": 0.5050780385655143, + "grad_norm": 0.048686880618333817, + "learning_rate": 9.307811823742195e-06, + "loss": 0.0039, + "step": 78750 + }, + { + "epoch": 0.5051421754593003, + "grad_norm": 0.2524215579032898, + "learning_rate": 9.307527662359408e-06, + "loss": 0.0054, + "step": 78760 + }, + { + "epoch": 0.5052063123530864, + "grad_norm": 0.2951013445854187, + "learning_rate": 9.307243447000222e-06, + "loss": 0.0044, + "step": 78770 + }, + { + "epoch": 0.5052704492468725, + "grad_norm": 0.28178176283836365, + "learning_rate": 9.306959177668195e-06, + "loss": 0.0052, + "step": 78780 + }, + { + "epoch": 0.5053345861406586, + "grad_norm": 0.1739809662103653, + "learning_rate": 9.306674854366895e-06, + "loss": 0.0027, + "step": 78790 + }, + { + "epoch": 0.5053987230344447, + "grad_norm": 0.18351706862449646, + "learning_rate": 9.306390477099878e-06, + "loss": 0.0059, + "step": 78800 + }, + { + "epoch": 0.5054628599282308, + "grad_norm": 0.15412580966949463, + "learning_rate": 9.30610604587071e-06, + "loss": 0.004, + "step": 78810 + }, + { + "epoch": 0.505526996822017, + "grad_norm": 0.08657550066709518, + "learning_rate": 9.305821560682959e-06, + "loss": 0.006, + "step": 78820 + }, + { + "epoch": 0.505591133715803, + "grad_norm": 4.012050151824951, + "learning_rate": 9.305537021540186e-06, + "loss": 0.0118, + "step": 78830 + }, + { + "epoch": 0.5056552706095891, + "grad_norm": 0.16037659347057343, + "learning_rate": 9.305252428445954e-06, + "loss": 0.0055, + "step": 78840 + }, + { + "epoch": 0.5057194075033752, + "grad_norm": 0.0963616594672203, + "learning_rate": 9.304967781403835e-06, + "loss": 0.0057, + "step": 78850 + }, + { + "epoch": 0.5057835443971613, + "grad_norm": 0.11402077227830887, + "learning_rate": 9.304683080417392e-06, + "loss": 0.0045, + "step": 78860 + }, + { + "epoch": 0.5058476812909474, + "grad_norm": 0.13955287635326385, + "learning_rate": 9.304398325490194e-06, + "loss": 0.0032, + "step": 78870 + }, + { + "epoch": 0.5059118181847335, + "grad_norm": 0.3319750428199768, + "learning_rate": 9.304113516625808e-06, + "loss": 0.0073, + "step": 78880 + }, + { + "epoch": 0.5059759550785196, + "grad_norm": 0.09229553490877151, + "learning_rate": 9.303828653827802e-06, + "loss": 0.0074, + "step": 78890 + }, + { + "epoch": 0.5060400919723057, + "grad_norm": 0.21243679523468018, + "learning_rate": 9.303543737099749e-06, + "loss": 0.004, + "step": 78900 + }, + { + "epoch": 0.5061042288660917, + "grad_norm": 0.20914272964000702, + "learning_rate": 9.303258766445218e-06, + "loss": 0.0051, + "step": 78910 + }, + { + "epoch": 0.5061683657598779, + "grad_norm": 0.10656081140041351, + "learning_rate": 9.302973741867778e-06, + "loss": 0.0029, + "step": 78920 + }, + { + "epoch": 0.5062325026536639, + "grad_norm": 0.08997952193021774, + "learning_rate": 9.302688663371002e-06, + "loss": 0.0058, + "step": 78930 + }, + { + "epoch": 0.5062966395474501, + "grad_norm": 0.1475543975830078, + "learning_rate": 9.302403530958462e-06, + "loss": 0.0037, + "step": 78940 + }, + { + "epoch": 0.5063607764412362, + "grad_norm": 0.1437944918870926, + "learning_rate": 9.30211834463373e-06, + "loss": 0.0034, + "step": 78950 + }, + { + "epoch": 0.5064249133350223, + "grad_norm": 0.17114025354385376, + "learning_rate": 9.301833104400382e-06, + "loss": 0.0041, + "step": 78960 + }, + { + "epoch": 0.5064890502288084, + "grad_norm": 0.11799921095371246, + "learning_rate": 9.30154781026199e-06, + "loss": 0.0031, + "step": 78970 + }, + { + "epoch": 0.5065531871225945, + "grad_norm": 0.28136688470840454, + "learning_rate": 9.30126246222213e-06, + "loss": 0.0058, + "step": 78980 + }, + { + "epoch": 0.5066173240163806, + "grad_norm": 0.10884694010019302, + "learning_rate": 9.300977060284377e-06, + "loss": 0.0027, + "step": 78990 + }, + { + "epoch": 0.5066814609101666, + "grad_norm": 0.3540794849395752, + "learning_rate": 9.300691604452306e-06, + "loss": 0.0052, + "step": 79000 + }, + { + "epoch": 0.5067455978039528, + "grad_norm": 0.24561040103435516, + "learning_rate": 9.300406094729496e-06, + "loss": 0.0041, + "step": 79010 + }, + { + "epoch": 0.5068097346977388, + "grad_norm": 0.09596218168735504, + "learning_rate": 9.300120531119527e-06, + "loss": 0.0046, + "step": 79020 + }, + { + "epoch": 0.506873871591525, + "grad_norm": 0.09170177578926086, + "learning_rate": 9.299834913625971e-06, + "loss": 0.0043, + "step": 79030 + }, + { + "epoch": 0.506938008485311, + "grad_norm": 0.06456504762172699, + "learning_rate": 9.299549242252414e-06, + "loss": 0.0046, + "step": 79040 + }, + { + "epoch": 0.5070021453790972, + "grad_norm": 0.24337686598300934, + "learning_rate": 9.29926351700243e-06, + "loss": 0.0029, + "step": 79050 + }, + { + "epoch": 0.5070662822728832, + "grad_norm": 0.2393103688955307, + "learning_rate": 9.298977737879602e-06, + "loss": 0.0053, + "step": 79060 + }, + { + "epoch": 0.5071304191666693, + "grad_norm": 0.2494836449623108, + "learning_rate": 9.298691904887508e-06, + "loss": 0.0025, + "step": 79070 + }, + { + "epoch": 0.5071945560604554, + "grad_norm": 0.42681920528411865, + "learning_rate": 9.298406018029737e-06, + "loss": 0.0084, + "step": 79080 + }, + { + "epoch": 0.5072586929542415, + "grad_norm": 0.13219456374645233, + "learning_rate": 9.298120077309864e-06, + "loss": 0.0051, + "step": 79090 + }, + { + "epoch": 0.5073228298480277, + "grad_norm": 0.13975024223327637, + "learning_rate": 9.297834082731474e-06, + "loss": 0.0024, + "step": 79100 + }, + { + "epoch": 0.5073869667418137, + "grad_norm": 0.2604387402534485, + "learning_rate": 9.297548034298151e-06, + "loss": 0.0063, + "step": 79110 + }, + { + "epoch": 0.5074511036355999, + "grad_norm": 0.1672421097755432, + "learning_rate": 9.29726193201348e-06, + "loss": 0.0029, + "step": 79120 + }, + { + "epoch": 0.5075152405293859, + "grad_norm": 0.11025706678628922, + "learning_rate": 9.296975775881049e-06, + "loss": 0.0029, + "step": 79130 + }, + { + "epoch": 0.507579377423172, + "grad_norm": 0.19144493341445923, + "learning_rate": 9.296689565904437e-06, + "loss": 0.0033, + "step": 79140 + }, + { + "epoch": 0.5076435143169581, + "grad_norm": 0.06302493065595627, + "learning_rate": 9.296403302087236e-06, + "loss": 0.0029, + "step": 79150 + }, + { + "epoch": 0.5077076512107442, + "grad_norm": 0.12776464223861694, + "learning_rate": 9.29611698443303e-06, + "loss": 0.0036, + "step": 79160 + }, + { + "epoch": 0.5077717881045303, + "grad_norm": 0.10405514389276505, + "learning_rate": 9.295830612945406e-06, + "loss": 0.0022, + "step": 79170 + }, + { + "epoch": 0.5078359249983164, + "grad_norm": 0.12541966140270233, + "learning_rate": 9.295544187627957e-06, + "loss": 0.0044, + "step": 79180 + }, + { + "epoch": 0.5079000618921025, + "grad_norm": 0.2913911044597626, + "learning_rate": 9.295257708484269e-06, + "loss": 0.0058, + "step": 79190 + }, + { + "epoch": 0.5079641987858886, + "grad_norm": 0.11696656048297882, + "learning_rate": 9.294971175517931e-06, + "loss": 0.0033, + "step": 79200 + }, + { + "epoch": 0.5080283356796746, + "grad_norm": 0.18840357661247253, + "learning_rate": 9.294684588732536e-06, + "loss": 0.0021, + "step": 79210 + }, + { + "epoch": 0.5080924725734608, + "grad_norm": 0.17457997798919678, + "learning_rate": 9.294397948131673e-06, + "loss": 0.0061, + "step": 79220 + }, + { + "epoch": 0.508156609467247, + "grad_norm": 0.14138942956924438, + "learning_rate": 9.294111253718934e-06, + "loss": 0.0028, + "step": 79230 + }, + { + "epoch": 0.508220746361033, + "grad_norm": 0.10878881067037582, + "learning_rate": 9.293824505497912e-06, + "loss": 0.0028, + "step": 79240 + }, + { + "epoch": 0.5082848832548191, + "grad_norm": 0.14829964935779572, + "learning_rate": 9.2935377034722e-06, + "loss": 0.0052, + "step": 79250 + }, + { + "epoch": 0.5083490201486052, + "grad_norm": 0.11220408231019974, + "learning_rate": 9.293250847645394e-06, + "loss": 0.0057, + "step": 79260 + }, + { + "epoch": 0.5084131570423913, + "grad_norm": 0.11413662135601044, + "learning_rate": 9.292963938021085e-06, + "loss": 0.0035, + "step": 79270 + }, + { + "epoch": 0.5084772939361774, + "grad_norm": 0.20025412738323212, + "learning_rate": 9.29267697460287e-06, + "loss": 0.0027, + "step": 79280 + }, + { + "epoch": 0.5085414308299635, + "grad_norm": 0.1414087861776352, + "learning_rate": 9.292389957394345e-06, + "loss": 0.0083, + "step": 79290 + }, + { + "epoch": 0.5086055677237495, + "grad_norm": 0.5542187094688416, + "learning_rate": 9.292102886399104e-06, + "loss": 0.0052, + "step": 79300 + }, + { + "epoch": 0.5086697046175357, + "grad_norm": 0.1466957926750183, + "learning_rate": 9.291815761620748e-06, + "loss": 0.0033, + "step": 79310 + }, + { + "epoch": 0.5087338415113217, + "grad_norm": 0.051578179001808167, + "learning_rate": 9.291528583062873e-06, + "loss": 0.0026, + "step": 79320 + }, + { + "epoch": 0.5087979784051079, + "grad_norm": 0.07024681568145752, + "learning_rate": 9.291241350729078e-06, + "loss": 0.0044, + "step": 79330 + }, + { + "epoch": 0.5088621152988939, + "grad_norm": 0.083675317466259, + "learning_rate": 9.290954064622963e-06, + "loss": 0.0033, + "step": 79340 + }, + { + "epoch": 0.5089262521926801, + "grad_norm": 0.06051542982459068, + "learning_rate": 9.290666724748125e-06, + "loss": 0.0018, + "step": 79350 + }, + { + "epoch": 0.5089903890864661, + "grad_norm": 0.11042298376560211, + "learning_rate": 9.290379331108168e-06, + "loss": 0.0029, + "step": 79360 + }, + { + "epoch": 0.5090545259802522, + "grad_norm": 0.3282707631587982, + "learning_rate": 9.290091883706692e-06, + "loss": 0.0031, + "step": 79370 + }, + { + "epoch": 0.5091186628740384, + "grad_norm": 0.25048360228538513, + "learning_rate": 9.289804382547296e-06, + "loss": 0.0031, + "step": 79380 + }, + { + "epoch": 0.5091827997678244, + "grad_norm": 0.12474612891674042, + "learning_rate": 9.289516827633587e-06, + "loss": 0.0027, + "step": 79390 + }, + { + "epoch": 0.5092469366616106, + "grad_norm": 0.0450366735458374, + "learning_rate": 9.289229218969166e-06, + "loss": 0.002, + "step": 79400 + }, + { + "epoch": 0.5093110735553966, + "grad_norm": 0.20078837871551514, + "learning_rate": 9.288941556557639e-06, + "loss": 0.0036, + "step": 79410 + }, + { + "epoch": 0.5093752104491828, + "grad_norm": 0.04043214023113251, + "learning_rate": 9.288653840402607e-06, + "loss": 0.0022, + "step": 79420 + }, + { + "epoch": 0.5094393473429688, + "grad_norm": 0.16537418961524963, + "learning_rate": 9.288366070507677e-06, + "loss": 0.0038, + "step": 79430 + }, + { + "epoch": 0.509503484236755, + "grad_norm": 0.08483941853046417, + "learning_rate": 9.288078246876456e-06, + "loss": 0.0118, + "step": 79440 + }, + { + "epoch": 0.509567621130541, + "grad_norm": 0.10477130115032196, + "learning_rate": 9.28779036951255e-06, + "loss": 0.0042, + "step": 79450 + }, + { + "epoch": 0.5096317580243271, + "grad_norm": 0.10791230201721191, + "learning_rate": 9.287502438419567e-06, + "loss": 0.0041, + "step": 79460 + }, + { + "epoch": 0.5096958949181132, + "grad_norm": 0.17048099637031555, + "learning_rate": 9.287214453601115e-06, + "loss": 0.0027, + "step": 79470 + }, + { + "epoch": 0.5097600318118993, + "grad_norm": 0.20219752192497253, + "learning_rate": 9.2869264150608e-06, + "loss": 0.005, + "step": 79480 + }, + { + "epoch": 0.5098241687056854, + "grad_norm": 0.08709075301885605, + "learning_rate": 9.286638322802233e-06, + "loss": 0.003, + "step": 79490 + }, + { + "epoch": 0.5098883055994715, + "grad_norm": 0.23793639242649078, + "learning_rate": 9.286350176829024e-06, + "loss": 0.0026, + "step": 79500 + }, + { + "epoch": 0.5099524424932577, + "grad_norm": 0.122853584587574, + "learning_rate": 9.286061977144786e-06, + "loss": 0.0042, + "step": 79510 + }, + { + "epoch": 0.5100165793870437, + "grad_norm": 0.21153560280799866, + "learning_rate": 9.285773723753127e-06, + "loss": 0.0054, + "step": 79520 + }, + { + "epoch": 0.5100807162808298, + "grad_norm": 0.1601635068655014, + "learning_rate": 9.28548541665766e-06, + "loss": 0.0036, + "step": 79530 + }, + { + "epoch": 0.5101448531746159, + "grad_norm": 0.10221725702285767, + "learning_rate": 9.285197055861998e-06, + "loss": 0.0037, + "step": 79540 + }, + { + "epoch": 0.510208990068402, + "grad_norm": 0.10448180884122849, + "learning_rate": 9.284908641369755e-06, + "loss": 0.0049, + "step": 79550 + }, + { + "epoch": 0.5102731269621881, + "grad_norm": 0.31749647855758667, + "learning_rate": 9.284620173184545e-06, + "loss": 0.0026, + "step": 79560 + }, + { + "epoch": 0.5103372638559742, + "grad_norm": 0.21306177973747253, + "learning_rate": 9.28433165130998e-06, + "loss": 0.0039, + "step": 79570 + }, + { + "epoch": 0.5104014007497603, + "grad_norm": 0.03203563764691353, + "learning_rate": 9.284043075749678e-06, + "loss": 0.004, + "step": 79580 + }, + { + "epoch": 0.5104655376435464, + "grad_norm": 0.05935882776975632, + "learning_rate": 9.283754446507253e-06, + "loss": 0.0032, + "step": 79590 + }, + { + "epoch": 0.5105296745373324, + "grad_norm": 0.05813802778720856, + "learning_rate": 9.283465763586325e-06, + "loss": 0.0025, + "step": 79600 + }, + { + "epoch": 0.5105938114311186, + "grad_norm": 0.08461745083332062, + "learning_rate": 9.283177026990512e-06, + "loss": 0.0031, + "step": 79610 + }, + { + "epoch": 0.5106579483249046, + "grad_norm": 0.07877620309591293, + "learning_rate": 9.282888236723426e-06, + "loss": 0.0053, + "step": 79620 + }, + { + "epoch": 0.5107220852186908, + "grad_norm": 0.15824204683303833, + "learning_rate": 9.282599392788692e-06, + "loss": 0.003, + "step": 79630 + }, + { + "epoch": 0.5107862221124768, + "grad_norm": 0.05316735431551933, + "learning_rate": 9.282310495189926e-06, + "loss": 0.0025, + "step": 79640 + }, + { + "epoch": 0.510850359006263, + "grad_norm": 0.18523530662059784, + "learning_rate": 9.282021543930748e-06, + "loss": 0.0023, + "step": 79650 + }, + { + "epoch": 0.5109144959000491, + "grad_norm": 0.37745577096939087, + "learning_rate": 9.28173253901478e-06, + "loss": 0.0052, + "step": 79660 + }, + { + "epoch": 0.5109786327938352, + "grad_norm": 0.1288391649723053, + "learning_rate": 9.281443480445644e-06, + "loss": 0.0024, + "step": 79670 + }, + { + "epoch": 0.5110427696876213, + "grad_norm": 0.20558273792266846, + "learning_rate": 9.281154368226961e-06, + "loss": 0.0057, + "step": 79680 + }, + { + "epoch": 0.5111069065814073, + "grad_norm": 0.0038497569039463997, + "learning_rate": 9.280865202362355e-06, + "loss": 0.003, + "step": 79690 + }, + { + "epoch": 0.5111710434751935, + "grad_norm": 0.11807820200920105, + "learning_rate": 9.280575982855447e-06, + "loss": 0.0036, + "step": 79700 + }, + { + "epoch": 0.5112351803689795, + "grad_norm": 0.057972654700279236, + "learning_rate": 9.280286709709862e-06, + "loss": 0.0058, + "step": 79710 + }, + { + "epoch": 0.5112993172627657, + "grad_norm": 0.3088960647583008, + "learning_rate": 9.279997382929227e-06, + "loss": 0.0075, + "step": 79720 + }, + { + "epoch": 0.5113634541565517, + "grad_norm": 0.10937351733446121, + "learning_rate": 9.279708002517166e-06, + "loss": 0.004, + "step": 79730 + }, + { + "epoch": 0.5114275910503379, + "grad_norm": 0.09601975977420807, + "learning_rate": 9.279418568477305e-06, + "loss": 0.0036, + "step": 79740 + }, + { + "epoch": 0.5114917279441239, + "grad_norm": 0.11423894762992859, + "learning_rate": 9.279129080813269e-06, + "loss": 0.0025, + "step": 79750 + }, + { + "epoch": 0.51155586483791, + "grad_norm": 0.16922509670257568, + "learning_rate": 9.27883953952869e-06, + "loss": 0.0029, + "step": 79760 + }, + { + "epoch": 0.5116200017316961, + "grad_norm": 0.1398465782403946, + "learning_rate": 9.278549944627192e-06, + "loss": 0.0022, + "step": 79770 + }, + { + "epoch": 0.5116841386254822, + "grad_norm": 0.2333763986825943, + "learning_rate": 9.278260296112406e-06, + "loss": 0.0045, + "step": 79780 + }, + { + "epoch": 0.5117482755192684, + "grad_norm": 0.07165589928627014, + "learning_rate": 9.277970593987961e-06, + "loss": 0.0045, + "step": 79790 + }, + { + "epoch": 0.5118124124130544, + "grad_norm": 0.1786176860332489, + "learning_rate": 9.277680838257486e-06, + "loss": 0.0036, + "step": 79800 + }, + { + "epoch": 0.5118765493068406, + "grad_norm": 0.1743316501379013, + "learning_rate": 9.277391028924614e-06, + "loss": 0.0049, + "step": 79810 + }, + { + "epoch": 0.5119406862006266, + "grad_norm": 0.1622972935438156, + "learning_rate": 9.277101165992975e-06, + "loss": 0.0024, + "step": 79820 + }, + { + "epoch": 0.5120048230944128, + "grad_norm": 0.02936510369181633, + "learning_rate": 9.276811249466201e-06, + "loss": 0.0037, + "step": 79830 + }, + { + "epoch": 0.5120689599881988, + "grad_norm": 0.13769282400608063, + "learning_rate": 9.276521279347926e-06, + "loss": 0.0026, + "step": 79840 + }, + { + "epoch": 0.5121330968819849, + "grad_norm": 0.03349493816494942, + "learning_rate": 9.276231255641783e-06, + "loss": 0.0061, + "step": 79850 + }, + { + "epoch": 0.512197233775771, + "grad_norm": 0.23810327053070068, + "learning_rate": 9.275941178351406e-06, + "loss": 0.003, + "step": 79860 + }, + { + "epoch": 0.5122613706695571, + "grad_norm": 0.06807134300470352, + "learning_rate": 9.275651047480431e-06, + "loss": 0.0028, + "step": 79870 + }, + { + "epoch": 0.5123255075633432, + "grad_norm": 0.11463180184364319, + "learning_rate": 9.275360863032492e-06, + "loss": 0.003, + "step": 79880 + }, + { + "epoch": 0.5123896444571293, + "grad_norm": 0.2272919863462448, + "learning_rate": 9.275070625011226e-06, + "loss": 0.0045, + "step": 79890 + }, + { + "epoch": 0.5124537813509153, + "grad_norm": 0.060492340475320816, + "learning_rate": 9.27478033342027e-06, + "loss": 0.002, + "step": 79900 + }, + { + "epoch": 0.5125179182447015, + "grad_norm": 0.1360568255186081, + "learning_rate": 9.27448998826326e-06, + "loss": 0.0068, + "step": 79910 + }, + { + "epoch": 0.5125820551384875, + "grad_norm": 0.5712762475013733, + "learning_rate": 9.274199589543836e-06, + "loss": 0.0082, + "step": 79920 + }, + { + "epoch": 0.5126461920322737, + "grad_norm": 0.13371999561786652, + "learning_rate": 9.273909137265637e-06, + "loss": 0.004, + "step": 79930 + }, + { + "epoch": 0.5127103289260598, + "grad_norm": 0.14738905429840088, + "learning_rate": 9.273618631432301e-06, + "loss": 0.003, + "step": 79940 + }, + { + "epoch": 0.5127744658198459, + "grad_norm": 0.08841729909181595, + "learning_rate": 9.27332807204747e-06, + "loss": 0.0035, + "step": 79950 + }, + { + "epoch": 0.512838602713632, + "grad_norm": 0.19606488943099976, + "learning_rate": 9.273037459114784e-06, + "loss": 0.0041, + "step": 79960 + }, + { + "epoch": 0.5129027396074181, + "grad_norm": 0.05790529400110245, + "learning_rate": 9.272746792637885e-06, + "loss": 0.0047, + "step": 79970 + }, + { + "epoch": 0.5129668765012042, + "grad_norm": 0.22945904731750488, + "learning_rate": 9.272456072620413e-06, + "loss": 0.0049, + "step": 79980 + }, + { + "epoch": 0.5130310133949902, + "grad_norm": 0.05827128514647484, + "learning_rate": 9.272165299066016e-06, + "loss": 0.0038, + "step": 79990 + }, + { + "epoch": 0.5130951502887764, + "grad_norm": 0.10648482292890549, + "learning_rate": 9.271874471978333e-06, + "loss": 0.0018, + "step": 80000 + }, + { + "epoch": 0.5131592871825624, + "grad_norm": 0.10429185628890991, + "learning_rate": 9.27158359136101e-06, + "loss": 0.0032, + "step": 80010 + }, + { + "epoch": 0.5132234240763486, + "grad_norm": 0.12299264967441559, + "learning_rate": 9.271292657217692e-06, + "loss": 0.0034, + "step": 80020 + }, + { + "epoch": 0.5132875609701346, + "grad_norm": 0.16474628448486328, + "learning_rate": 9.271001669552024e-06, + "loss": 0.003, + "step": 80030 + }, + { + "epoch": 0.5133516978639208, + "grad_norm": 0.08531386405229568, + "learning_rate": 9.270710628367653e-06, + "loss": 0.0034, + "step": 80040 + }, + { + "epoch": 0.5134158347577068, + "grad_norm": 0.18470297753810883, + "learning_rate": 9.270419533668225e-06, + "loss": 0.0025, + "step": 80050 + }, + { + "epoch": 0.513479971651493, + "grad_norm": 0.09929078817367554, + "learning_rate": 9.270128385457389e-06, + "loss": 0.0053, + "step": 80060 + }, + { + "epoch": 0.5135441085452791, + "grad_norm": 0.1945580393075943, + "learning_rate": 9.269837183738792e-06, + "loss": 0.0038, + "step": 80070 + }, + { + "epoch": 0.5136082454390651, + "grad_norm": 0.07254920899868011, + "learning_rate": 9.269545928516083e-06, + "loss": 0.0035, + "step": 80080 + }, + { + "epoch": 0.5136723823328513, + "grad_norm": 0.1123448982834816, + "learning_rate": 9.269254619792914e-06, + "loss": 0.0019, + "step": 80090 + }, + { + "epoch": 0.5137365192266373, + "grad_norm": 0.5760213136672974, + "learning_rate": 9.268963257572932e-06, + "loss": 0.0043, + "step": 80100 + }, + { + "epoch": 0.5138006561204235, + "grad_norm": 0.11048474907875061, + "learning_rate": 9.268671841859789e-06, + "loss": 0.0022, + "step": 80110 + }, + { + "epoch": 0.5138647930142095, + "grad_norm": 0.34163668751716614, + "learning_rate": 9.268380372657137e-06, + "loss": 0.0031, + "step": 80120 + }, + { + "epoch": 0.5139289299079957, + "grad_norm": 0.13767167925834656, + "learning_rate": 9.268088849968629e-06, + "loss": 0.0035, + "step": 80130 + }, + { + "epoch": 0.5139930668017817, + "grad_norm": 0.15491144359111786, + "learning_rate": 9.267797273797918e-06, + "loss": 0.003, + "step": 80140 + }, + { + "epoch": 0.5140572036955678, + "grad_norm": 0.0027098332066088915, + "learning_rate": 9.267505644148655e-06, + "loss": 0.0028, + "step": 80150 + }, + { + "epoch": 0.5141213405893539, + "grad_norm": 0.391956627368927, + "learning_rate": 9.267213961024499e-06, + "loss": 0.0038, + "step": 80160 + }, + { + "epoch": 0.51418547748314, + "grad_norm": 0.3579157292842865, + "learning_rate": 9.2669222244291e-06, + "loss": 0.0031, + "step": 80170 + }, + { + "epoch": 0.5142496143769261, + "grad_norm": 0.3213461935520172, + "learning_rate": 9.266630434366118e-06, + "loss": 0.0036, + "step": 80180 + }, + { + "epoch": 0.5143137512707122, + "grad_norm": 0.12931661307811737, + "learning_rate": 9.266338590839205e-06, + "loss": 0.0039, + "step": 80190 + }, + { + "epoch": 0.5143778881644983, + "grad_norm": 0.08285976946353912, + "learning_rate": 9.266046693852023e-06, + "loss": 0.0022, + "step": 80200 + }, + { + "epoch": 0.5144420250582844, + "grad_norm": 0.24085961282253265, + "learning_rate": 9.265754743408225e-06, + "loss": 0.004, + "step": 80210 + }, + { + "epoch": 0.5145061619520706, + "grad_norm": 0.25267624855041504, + "learning_rate": 9.265462739511473e-06, + "loss": 0.0053, + "step": 80220 + }, + { + "epoch": 0.5145702988458566, + "grad_norm": 0.12650226056575775, + "learning_rate": 9.265170682165423e-06, + "loss": 0.0023, + "step": 80230 + }, + { + "epoch": 0.5146344357396427, + "grad_norm": 0.09412693977355957, + "learning_rate": 9.264878571373737e-06, + "loss": 0.0038, + "step": 80240 + }, + { + "epoch": 0.5146985726334288, + "grad_norm": 0.1114264577627182, + "learning_rate": 9.264586407140074e-06, + "loss": 0.0034, + "step": 80250 + }, + { + "epoch": 0.5147627095272149, + "grad_norm": 0.22017377614974976, + "learning_rate": 9.264294189468095e-06, + "loss": 0.0026, + "step": 80260 + }, + { + "epoch": 0.514826846421001, + "grad_norm": 0.11743912845849991, + "learning_rate": 9.264001918361462e-06, + "loss": 0.0029, + "step": 80270 + }, + { + "epoch": 0.5148909833147871, + "grad_norm": 0.2741183638572693, + "learning_rate": 9.263709593823839e-06, + "loss": 0.0059, + "step": 80280 + }, + { + "epoch": 0.5149551202085731, + "grad_norm": 0.12666159868240356, + "learning_rate": 9.263417215858886e-06, + "loss": 0.0028, + "step": 80290 + }, + { + "epoch": 0.5150192571023593, + "grad_norm": 1.0686538219451904, + "learning_rate": 9.26312478447027e-06, + "loss": 0.0044, + "step": 80300 + }, + { + "epoch": 0.5150833939961453, + "grad_norm": 0.42256465554237366, + "learning_rate": 9.262832299661652e-06, + "loss": 0.0057, + "step": 80310 + }, + { + "epoch": 0.5151475308899315, + "grad_norm": 0.09631174802780151, + "learning_rate": 9.2625397614367e-06, + "loss": 0.0033, + "step": 80320 + }, + { + "epoch": 0.5152116677837175, + "grad_norm": 0.17305755615234375, + "learning_rate": 9.262247169799077e-06, + "loss": 0.0045, + "step": 80330 + }, + { + "epoch": 0.5152758046775037, + "grad_norm": 0.2103009968996048, + "learning_rate": 9.261954524752452e-06, + "loss": 0.0034, + "step": 80340 + }, + { + "epoch": 0.5153399415712897, + "grad_norm": 0.19020456075668335, + "learning_rate": 9.26166182630049e-06, + "loss": 0.0036, + "step": 80350 + }, + { + "epoch": 0.5154040784650759, + "grad_norm": 0.24571269750595093, + "learning_rate": 9.26136907444686e-06, + "loss": 0.0048, + "step": 80360 + }, + { + "epoch": 0.515468215358862, + "grad_norm": 0.5711245536804199, + "learning_rate": 9.261076269195229e-06, + "loss": 0.0046, + "step": 80370 + }, + { + "epoch": 0.515532352252648, + "grad_norm": 0.1847742795944214, + "learning_rate": 9.260783410549268e-06, + "loss": 0.0047, + "step": 80380 + }, + { + "epoch": 0.5155964891464342, + "grad_norm": 0.2800198197364807, + "learning_rate": 9.260490498512647e-06, + "loss": 0.0054, + "step": 80390 + }, + { + "epoch": 0.5156606260402202, + "grad_norm": 0.18284857273101807, + "learning_rate": 9.260197533089032e-06, + "loss": 0.0036, + "step": 80400 + }, + { + "epoch": 0.5157247629340064, + "grad_norm": 0.06317662447690964, + "learning_rate": 9.259904514282099e-06, + "loss": 0.0052, + "step": 80410 + }, + { + "epoch": 0.5157888998277924, + "grad_norm": 0.08233999460935593, + "learning_rate": 9.259611442095518e-06, + "loss": 0.0024, + "step": 80420 + }, + { + "epoch": 0.5158530367215786, + "grad_norm": 0.18084761500358582, + "learning_rate": 9.259318316532962e-06, + "loss": 0.0049, + "step": 80430 + }, + { + "epoch": 0.5159171736153646, + "grad_norm": 0.3981788456439972, + "learning_rate": 9.2590251375981e-06, + "loss": 0.0039, + "step": 80440 + }, + { + "epoch": 0.5159813105091507, + "grad_norm": 0.3020228147506714, + "learning_rate": 9.258731905294614e-06, + "loss": 0.0049, + "step": 80450 + }, + { + "epoch": 0.5160454474029368, + "grad_norm": 0.23093923926353455, + "learning_rate": 9.258438619626171e-06, + "loss": 0.0022, + "step": 80460 + }, + { + "epoch": 0.5161095842967229, + "grad_norm": 0.25309401750564575, + "learning_rate": 9.258145280596449e-06, + "loss": 0.0044, + "step": 80470 + }, + { + "epoch": 0.516173721190509, + "grad_norm": 0.13883396983146667, + "learning_rate": 9.257851888209122e-06, + "loss": 0.0046, + "step": 80480 + }, + { + "epoch": 0.5162378580842951, + "grad_norm": 0.15709243714809418, + "learning_rate": 9.25755844246787e-06, + "loss": 0.0041, + "step": 80490 + }, + { + "epoch": 0.5163019949780813, + "grad_norm": 0.2982932925224304, + "learning_rate": 9.257264943376367e-06, + "loss": 0.003, + "step": 80500 + }, + { + "epoch": 0.5163661318718673, + "grad_norm": 0.06413408368825912, + "learning_rate": 9.256971390938293e-06, + "loss": 0.0036, + "step": 80510 + }, + { + "epoch": 0.5164302687656535, + "grad_norm": 0.17831914126873016, + "learning_rate": 9.256677785157322e-06, + "loss": 0.0034, + "step": 80520 + }, + { + "epoch": 0.5164944056594395, + "grad_norm": 0.08256985247135162, + "learning_rate": 9.256384126037138e-06, + "loss": 0.0028, + "step": 80530 + }, + { + "epoch": 0.5165585425532256, + "grad_norm": 0.2097683995962143, + "learning_rate": 9.256090413581418e-06, + "loss": 0.0024, + "step": 80540 + }, + { + "epoch": 0.5166226794470117, + "grad_norm": 0.1839880794286728, + "learning_rate": 9.255796647793845e-06, + "loss": 0.0041, + "step": 80550 + }, + { + "epoch": 0.5166868163407978, + "grad_norm": 0.16421499848365784, + "learning_rate": 9.255502828678098e-06, + "loss": 0.0024, + "step": 80560 + }, + { + "epoch": 0.5167509532345839, + "grad_norm": 0.06348922848701477, + "learning_rate": 9.255208956237859e-06, + "loss": 0.0027, + "step": 80570 + }, + { + "epoch": 0.51681509012837, + "grad_norm": 0.04008301720023155, + "learning_rate": 9.254915030476809e-06, + "loss": 0.0021, + "step": 80580 + }, + { + "epoch": 0.516879227022156, + "grad_norm": 0.163941890001297, + "learning_rate": 9.254621051398634e-06, + "loss": 0.0035, + "step": 80590 + }, + { + "epoch": 0.5169433639159422, + "grad_norm": 0.21079358458518982, + "learning_rate": 9.254327019007017e-06, + "loss": 0.003, + "step": 80600 + }, + { + "epoch": 0.5170075008097282, + "grad_norm": 0.11273708194494247, + "learning_rate": 9.25403293330564e-06, + "loss": 0.0032, + "step": 80610 + }, + { + "epoch": 0.5170716377035144, + "grad_norm": 0.23483584821224213, + "learning_rate": 9.253738794298192e-06, + "loss": 0.0048, + "step": 80620 + }, + { + "epoch": 0.5171357745973004, + "grad_norm": 0.16384592652320862, + "learning_rate": 9.253444601988358e-06, + "loss": 0.0053, + "step": 80630 + }, + { + "epoch": 0.5171999114910866, + "grad_norm": 0.14626704156398773, + "learning_rate": 9.25315035637982e-06, + "loss": 0.0042, + "step": 80640 + }, + { + "epoch": 0.5172640483848727, + "grad_norm": 0.2711910009384155, + "learning_rate": 9.25285605747627e-06, + "loss": 0.0033, + "step": 80650 + }, + { + "epoch": 0.5173281852786588, + "grad_norm": 0.23008853197097778, + "learning_rate": 9.252561705281392e-06, + "loss": 0.0032, + "step": 80660 + }, + { + "epoch": 0.5173923221724449, + "grad_norm": 0.020175419747829437, + "learning_rate": 9.252267299798881e-06, + "loss": 0.0015, + "step": 80670 + }, + { + "epoch": 0.517456459066231, + "grad_norm": 0.2377467155456543, + "learning_rate": 9.251972841032419e-06, + "loss": 0.004, + "step": 80680 + }, + { + "epoch": 0.5175205959600171, + "grad_norm": 0.16429409384727478, + "learning_rate": 9.251678328985697e-06, + "loss": 0.0036, + "step": 80690 + }, + { + "epoch": 0.5175847328538031, + "grad_norm": 0.08902248740196228, + "learning_rate": 9.25138376366241e-06, + "loss": 0.0029, + "step": 80700 + }, + { + "epoch": 0.5176488697475893, + "grad_norm": 0.597663938999176, + "learning_rate": 9.251089145066246e-06, + "loss": 0.0032, + "step": 80710 + }, + { + "epoch": 0.5177130066413753, + "grad_norm": 0.5348776578903198, + "learning_rate": 9.250794473200895e-06, + "loss": 0.0035, + "step": 80720 + }, + { + "epoch": 0.5177771435351615, + "grad_norm": 0.09424172341823578, + "learning_rate": 9.25049974807005e-06, + "loss": 0.0033, + "step": 80730 + }, + { + "epoch": 0.5178412804289475, + "grad_norm": 0.19990532100200653, + "learning_rate": 9.250204969677408e-06, + "loss": 0.0031, + "step": 80740 + }, + { + "epoch": 0.5179054173227337, + "grad_norm": 0.06266714632511139, + "learning_rate": 9.249910138026658e-06, + "loss": 0.0029, + "step": 80750 + }, + { + "epoch": 0.5179695542165197, + "grad_norm": 0.42398256063461304, + "learning_rate": 9.249615253121498e-06, + "loss": 0.004, + "step": 80760 + }, + { + "epoch": 0.5180336911103058, + "grad_norm": 0.12699414789676666, + "learning_rate": 9.249320314965621e-06, + "loss": 0.0044, + "step": 80770 + }, + { + "epoch": 0.518097828004092, + "grad_norm": 0.04501233622431755, + "learning_rate": 9.249025323562726e-06, + "loss": 0.0048, + "step": 80780 + }, + { + "epoch": 0.518161964897878, + "grad_norm": 0.434689998626709, + "learning_rate": 9.248730278916505e-06, + "loss": 0.0036, + "step": 80790 + }, + { + "epoch": 0.5182261017916642, + "grad_norm": 0.14772498607635498, + "learning_rate": 9.248435181030657e-06, + "loss": 0.0029, + "step": 80800 + }, + { + "epoch": 0.5182902386854502, + "grad_norm": 0.05148780718445778, + "learning_rate": 9.248140029908879e-06, + "loss": 0.0047, + "step": 80810 + }, + { + "epoch": 0.5183543755792364, + "grad_norm": 0.42253732681274414, + "learning_rate": 9.247844825554872e-06, + "loss": 0.0039, + "step": 80820 + }, + { + "epoch": 0.5184185124730224, + "grad_norm": 0.2593359351158142, + "learning_rate": 9.247549567972332e-06, + "loss": 0.0035, + "step": 80830 + }, + { + "epoch": 0.5184826493668085, + "grad_norm": 0.3482550382614136, + "learning_rate": 9.247254257164962e-06, + "loss": 0.0038, + "step": 80840 + }, + { + "epoch": 0.5185467862605946, + "grad_norm": 0.29686230421066284, + "learning_rate": 9.246958893136459e-06, + "loss": 0.0056, + "step": 80850 + }, + { + "epoch": 0.5186109231543807, + "grad_norm": 0.04358689486980438, + "learning_rate": 9.246663475890528e-06, + "loss": 0.0043, + "step": 80860 + }, + { + "epoch": 0.5186750600481668, + "grad_norm": 0.18454205989837646, + "learning_rate": 9.246368005430866e-06, + "loss": 0.0026, + "step": 80870 + }, + { + "epoch": 0.5187391969419529, + "grad_norm": 0.17590831220149994, + "learning_rate": 9.24607248176118e-06, + "loss": 0.0031, + "step": 80880 + }, + { + "epoch": 0.518803333835739, + "grad_norm": 0.0882411077618599, + "learning_rate": 9.24577690488517e-06, + "loss": 0.0044, + "step": 80890 + }, + { + "epoch": 0.5188674707295251, + "grad_norm": 0.15208327770233154, + "learning_rate": 9.24548127480654e-06, + "loss": 0.0038, + "step": 80900 + }, + { + "epoch": 0.5189316076233111, + "grad_norm": 0.1888751983642578, + "learning_rate": 9.245185591528997e-06, + "loss": 0.0039, + "step": 80910 + }, + { + "epoch": 0.5189957445170973, + "grad_norm": 0.05959264189004898, + "learning_rate": 9.244889855056245e-06, + "loss": 0.0024, + "step": 80920 + }, + { + "epoch": 0.5190598814108834, + "grad_norm": 0.18062947690486908, + "learning_rate": 9.244594065391989e-06, + "loss": 0.0025, + "step": 80930 + }, + { + "epoch": 0.5191240183046695, + "grad_norm": 0.07806473970413208, + "learning_rate": 9.244298222539936e-06, + "loss": 0.0024, + "step": 80940 + }, + { + "epoch": 0.5191881551984556, + "grad_norm": 0.1433127522468567, + "learning_rate": 9.244002326503792e-06, + "loss": 0.0046, + "step": 80950 + }, + { + "epoch": 0.5192522920922417, + "grad_norm": 0.6837491989135742, + "learning_rate": 9.243706377287264e-06, + "loss": 0.0031, + "step": 80960 + }, + { + "epoch": 0.5193164289860278, + "grad_norm": 0.2809620201587677, + "learning_rate": 9.243410374894066e-06, + "loss": 0.0029, + "step": 80970 + }, + { + "epoch": 0.5193805658798138, + "grad_norm": 0.15400190651416779, + "learning_rate": 9.243114319327902e-06, + "loss": 0.0054, + "step": 80980 + }, + { + "epoch": 0.5194447027736, + "grad_norm": 0.12674783170223236, + "learning_rate": 9.24281821059248e-06, + "loss": 0.0035, + "step": 80990 + }, + { + "epoch": 0.519508839667386, + "grad_norm": 0.3740582764148712, + "learning_rate": 9.242522048691518e-06, + "loss": 0.0042, + "step": 81000 + }, + { + "epoch": 0.5195729765611722, + "grad_norm": 0.1282520294189453, + "learning_rate": 9.242225833628721e-06, + "loss": 0.0031, + "step": 81010 + }, + { + "epoch": 0.5196371134549582, + "grad_norm": 0.48991167545318604, + "learning_rate": 9.2419295654078e-06, + "loss": 0.0022, + "step": 81020 + }, + { + "epoch": 0.5197012503487444, + "grad_norm": 0.15189383924007416, + "learning_rate": 9.241633244032474e-06, + "loss": 0.0032, + "step": 81030 + }, + { + "epoch": 0.5197653872425304, + "grad_norm": 0.17220990359783173, + "learning_rate": 9.24133686950645e-06, + "loss": 0.0084, + "step": 81040 + }, + { + "epoch": 0.5198295241363166, + "grad_norm": 0.2771851122379303, + "learning_rate": 9.241040441833444e-06, + "loss": 0.0044, + "step": 81050 + }, + { + "epoch": 0.5198936610301027, + "grad_norm": 0.11452841013669968, + "learning_rate": 9.24074396101717e-06, + "loss": 0.0038, + "step": 81060 + }, + { + "epoch": 0.5199577979238887, + "grad_norm": 0.12710310518741608, + "learning_rate": 9.240447427061343e-06, + "loss": 0.003, + "step": 81070 + }, + { + "epoch": 0.5200219348176749, + "grad_norm": 0.08416347950696945, + "learning_rate": 9.24015083996968e-06, + "loss": 0.0025, + "step": 81080 + }, + { + "epoch": 0.5200860717114609, + "grad_norm": 0.2778503894805908, + "learning_rate": 9.239854199745897e-06, + "loss": 0.0024, + "step": 81090 + }, + { + "epoch": 0.5201502086052471, + "grad_norm": 0.25798657536506653, + "learning_rate": 9.239557506393709e-06, + "loss": 0.0035, + "step": 81100 + }, + { + "epoch": 0.5202143454990331, + "grad_norm": 0.11615985631942749, + "learning_rate": 9.239260759916836e-06, + "loss": 0.0039, + "step": 81110 + }, + { + "epoch": 0.5202784823928193, + "grad_norm": 0.11622320860624313, + "learning_rate": 9.238963960318996e-06, + "loss": 0.0042, + "step": 81120 + }, + { + "epoch": 0.5203426192866053, + "grad_norm": 0.43766507506370544, + "learning_rate": 9.23866710760391e-06, + "loss": 0.0038, + "step": 81130 + }, + { + "epoch": 0.5204067561803914, + "grad_norm": 0.21479366719722748, + "learning_rate": 9.238370201775294e-06, + "loss": 0.0037, + "step": 81140 + }, + { + "epoch": 0.5204708930741775, + "grad_norm": 0.37137898802757263, + "learning_rate": 9.238073242836868e-06, + "loss": 0.0022, + "step": 81150 + }, + { + "epoch": 0.5205350299679636, + "grad_norm": 0.20445744693279266, + "learning_rate": 9.237776230792359e-06, + "loss": 0.0034, + "step": 81160 + }, + { + "epoch": 0.5205991668617497, + "grad_norm": 0.23320919275283813, + "learning_rate": 9.237479165645484e-06, + "loss": 0.0031, + "step": 81170 + }, + { + "epoch": 0.5206633037555358, + "grad_norm": 0.13351120054721832, + "learning_rate": 9.237182047399966e-06, + "loss": 0.0035, + "step": 81180 + }, + { + "epoch": 0.5207274406493219, + "grad_norm": 0.1444133073091507, + "learning_rate": 9.236884876059529e-06, + "loss": 0.0037, + "step": 81190 + }, + { + "epoch": 0.520791577543108, + "grad_norm": 0.47807008028030396, + "learning_rate": 9.236587651627898e-06, + "loss": 0.0037, + "step": 81200 + }, + { + "epoch": 0.5208557144368942, + "grad_norm": 0.17233484983444214, + "learning_rate": 9.236290374108794e-06, + "loss": 0.004, + "step": 81210 + }, + { + "epoch": 0.5209198513306802, + "grad_norm": 0.23898974061012268, + "learning_rate": 9.235993043505943e-06, + "loss": 0.0037, + "step": 81220 + }, + { + "epoch": 0.5209839882244663, + "grad_norm": 0.063786581158638, + "learning_rate": 9.235695659823074e-06, + "loss": 0.0026, + "step": 81230 + }, + { + "epoch": 0.5210481251182524, + "grad_norm": 0.17210139334201813, + "learning_rate": 9.23539822306391e-06, + "loss": 0.0025, + "step": 81240 + }, + { + "epoch": 0.5211122620120385, + "grad_norm": 0.27867937088012695, + "learning_rate": 9.235100733232181e-06, + "loss": 0.0049, + "step": 81250 + }, + { + "epoch": 0.5211763989058246, + "grad_norm": 0.0784267783164978, + "learning_rate": 9.23480319033161e-06, + "loss": 0.004, + "step": 81260 + }, + { + "epoch": 0.5212405357996107, + "grad_norm": 0.10288307815790176, + "learning_rate": 9.23450559436593e-06, + "loss": 0.0034, + "step": 81270 + }, + { + "epoch": 0.5213046726933968, + "grad_norm": 0.03400523215532303, + "learning_rate": 9.234207945338869e-06, + "loss": 0.0027, + "step": 81280 + }, + { + "epoch": 0.5213688095871829, + "grad_norm": 0.2448701560497284, + "learning_rate": 9.233910243254156e-06, + "loss": 0.0032, + "step": 81290 + }, + { + "epoch": 0.5214329464809689, + "grad_norm": 0.33407291769981384, + "learning_rate": 9.233612488115521e-06, + "loss": 0.0064, + "step": 81300 + }, + { + "epoch": 0.5214970833747551, + "grad_norm": 0.2003421038389206, + "learning_rate": 9.233314679926698e-06, + "loss": 0.0032, + "step": 81310 + }, + { + "epoch": 0.5215612202685411, + "grad_norm": 0.12202927470207214, + "learning_rate": 9.233016818691415e-06, + "loss": 0.0038, + "step": 81320 + }, + { + "epoch": 0.5216253571623273, + "grad_norm": 0.2097141444683075, + "learning_rate": 9.232718904413405e-06, + "loss": 0.0021, + "step": 81330 + }, + { + "epoch": 0.5216894940561134, + "grad_norm": 0.17498457431793213, + "learning_rate": 9.232420937096403e-06, + "loss": 0.0052, + "step": 81340 + }, + { + "epoch": 0.5217536309498995, + "grad_norm": 0.21181584894657135, + "learning_rate": 9.232122916744141e-06, + "loss": 0.002, + "step": 81350 + }, + { + "epoch": 0.5218177678436856, + "grad_norm": 0.22371451556682587, + "learning_rate": 9.231824843360356e-06, + "loss": 0.0022, + "step": 81360 + }, + { + "epoch": 0.5218819047374716, + "grad_norm": 0.11706782877445221, + "learning_rate": 9.23152671694878e-06, + "loss": 0.0031, + "step": 81370 + }, + { + "epoch": 0.5219460416312578, + "grad_norm": 0.19505396485328674, + "learning_rate": 9.23122853751315e-06, + "loss": 0.0041, + "step": 81380 + }, + { + "epoch": 0.5220101785250438, + "grad_norm": 0.2122873216867447, + "learning_rate": 9.230930305057204e-06, + "loss": 0.0028, + "step": 81390 + }, + { + "epoch": 0.52207431541883, + "grad_norm": 0.17082518339157104, + "learning_rate": 9.230632019584676e-06, + "loss": 0.0023, + "step": 81400 + }, + { + "epoch": 0.522138452312616, + "grad_norm": 0.1508171409368515, + "learning_rate": 9.230333681099305e-06, + "loss": 0.0028, + "step": 81410 + }, + { + "epoch": 0.5222025892064022, + "grad_norm": 0.07956456393003464, + "learning_rate": 9.23003528960483e-06, + "loss": 0.0023, + "step": 81420 + }, + { + "epoch": 0.5222667261001882, + "grad_norm": 0.05028301477432251, + "learning_rate": 9.229736845104991e-06, + "loss": 0.0037, + "step": 81430 + }, + { + "epoch": 0.5223308629939744, + "grad_norm": 0.09844990819692612, + "learning_rate": 9.229438347603525e-06, + "loss": 0.0043, + "step": 81440 + }, + { + "epoch": 0.5223949998877604, + "grad_norm": 0.11157780140638351, + "learning_rate": 9.229139797104173e-06, + "loss": 0.002, + "step": 81450 + }, + { + "epoch": 0.5224591367815465, + "grad_norm": 0.22767135500907898, + "learning_rate": 9.228841193610679e-06, + "loss": 0.0035, + "step": 81460 + }, + { + "epoch": 0.5225232736753326, + "grad_norm": 0.09955685585737228, + "learning_rate": 9.22854253712678e-06, + "loss": 0.0034, + "step": 81470 + }, + { + "epoch": 0.5225874105691187, + "grad_norm": 0.19802632927894592, + "learning_rate": 9.228243827656222e-06, + "loss": 0.0036, + "step": 81480 + }, + { + "epoch": 0.5226515474629049, + "grad_norm": 0.0587104968726635, + "learning_rate": 9.227945065202746e-06, + "loss": 0.0032, + "step": 81490 + }, + { + "epoch": 0.5227156843566909, + "grad_norm": 0.29555052518844604, + "learning_rate": 9.2276462497701e-06, + "loss": 0.0059, + "step": 81500 + }, + { + "epoch": 0.5227798212504771, + "grad_norm": 0.14725208282470703, + "learning_rate": 9.227347381362021e-06, + "loss": 0.0035, + "step": 81510 + }, + { + "epoch": 0.5228439581442631, + "grad_norm": 0.15954144299030304, + "learning_rate": 9.227048459982261e-06, + "loss": 0.0024, + "step": 81520 + }, + { + "epoch": 0.5229080950380492, + "grad_norm": 0.1816718876361847, + "learning_rate": 9.226749485634561e-06, + "loss": 0.0044, + "step": 81530 + }, + { + "epoch": 0.5229722319318353, + "grad_norm": 0.017237460240721703, + "learning_rate": 9.22645045832267e-06, + "loss": 0.0036, + "step": 81540 + }, + { + "epoch": 0.5230363688256214, + "grad_norm": 0.1542568802833557, + "learning_rate": 9.226151378050334e-06, + "loss": 0.0015, + "step": 81550 + }, + { + "epoch": 0.5231005057194075, + "grad_norm": 0.25452756881713867, + "learning_rate": 9.2258522448213e-06, + "loss": 0.0039, + "step": 81560 + }, + { + "epoch": 0.5231646426131936, + "grad_norm": 0.20770305395126343, + "learning_rate": 9.225553058639318e-06, + "loss": 0.002, + "step": 81570 + }, + { + "epoch": 0.5232287795069797, + "grad_norm": 0.25020110607147217, + "learning_rate": 9.225253819508137e-06, + "loss": 0.0045, + "step": 81580 + }, + { + "epoch": 0.5232929164007658, + "grad_norm": 0.06710415333509445, + "learning_rate": 9.224954527431504e-06, + "loss": 0.0023, + "step": 81590 + }, + { + "epoch": 0.5233570532945518, + "grad_norm": 0.1380765736103058, + "learning_rate": 9.224655182413174e-06, + "loss": 0.0041, + "step": 81600 + }, + { + "epoch": 0.523421190188338, + "grad_norm": 0.06237601116299629, + "learning_rate": 9.224355784456894e-06, + "loss": 0.0022, + "step": 81610 + }, + { + "epoch": 0.5234853270821241, + "grad_norm": 0.15989889204502106, + "learning_rate": 9.224056333566416e-06, + "loss": 0.0035, + "step": 81620 + }, + { + "epoch": 0.5235494639759102, + "grad_norm": 0.06872167438268661, + "learning_rate": 9.223756829745494e-06, + "loss": 0.0028, + "step": 81630 + }, + { + "epoch": 0.5236136008696963, + "grad_norm": 0.06739316880702972, + "learning_rate": 9.223457272997878e-06, + "loss": 0.006, + "step": 81640 + }, + { + "epoch": 0.5236777377634824, + "grad_norm": 0.10684234648942947, + "learning_rate": 9.223157663327326e-06, + "loss": 0.0065, + "step": 81650 + }, + { + "epoch": 0.5237418746572685, + "grad_norm": 0.2152058184146881, + "learning_rate": 9.22285800073759e-06, + "loss": 0.0036, + "step": 81660 + }, + { + "epoch": 0.5238060115510546, + "grad_norm": 0.100911445915699, + "learning_rate": 9.222558285232426e-06, + "loss": 0.0027, + "step": 81670 + }, + { + "epoch": 0.5238701484448407, + "grad_norm": 0.1213904395699501, + "learning_rate": 9.222258516815589e-06, + "loss": 0.0025, + "step": 81680 + }, + { + "epoch": 0.5239342853386267, + "grad_norm": 0.25971806049346924, + "learning_rate": 9.221958695490834e-06, + "loss": 0.004, + "step": 81690 + }, + { + "epoch": 0.5239984222324129, + "grad_norm": 0.17598684132099152, + "learning_rate": 9.22165882126192e-06, + "loss": 0.0034, + "step": 81700 + }, + { + "epoch": 0.5240625591261989, + "grad_norm": 0.2927113473415375, + "learning_rate": 9.221358894132604e-06, + "loss": 0.0023, + "step": 81710 + }, + { + "epoch": 0.5241266960199851, + "grad_norm": 0.1922122985124588, + "learning_rate": 9.221058914106643e-06, + "loss": 0.0039, + "step": 81720 + }, + { + "epoch": 0.5241908329137711, + "grad_norm": 0.060337141156196594, + "learning_rate": 9.220758881187797e-06, + "loss": 0.0058, + "step": 81730 + }, + { + "epoch": 0.5242549698075573, + "grad_norm": 0.4425851106643677, + "learning_rate": 9.220458795379825e-06, + "loss": 0.0034, + "step": 81740 + }, + { + "epoch": 0.5243191067013433, + "grad_norm": 0.15111497044563293, + "learning_rate": 9.22015865668649e-06, + "loss": 0.0018, + "step": 81750 + }, + { + "epoch": 0.5243832435951294, + "grad_norm": 0.30985045433044434, + "learning_rate": 9.219858465111551e-06, + "loss": 0.0031, + "step": 81760 + }, + { + "epoch": 0.5244473804889156, + "grad_norm": 0.17037814855575562, + "learning_rate": 9.219558220658768e-06, + "loss": 0.0054, + "step": 81770 + }, + { + "epoch": 0.5245115173827016, + "grad_norm": 0.19051095843315125, + "learning_rate": 9.219257923331906e-06, + "loss": 0.0033, + "step": 81780 + }, + { + "epoch": 0.5245756542764878, + "grad_norm": 0.13113057613372803, + "learning_rate": 9.218957573134725e-06, + "loss": 0.004, + "step": 81790 + }, + { + "epoch": 0.5246397911702738, + "grad_norm": 0.14635784924030304, + "learning_rate": 9.218657170070993e-06, + "loss": 0.0031, + "step": 81800 + }, + { + "epoch": 0.52470392806406, + "grad_norm": 0.07720329612493515, + "learning_rate": 9.21835671414447e-06, + "loss": 0.0021, + "step": 81810 + }, + { + "epoch": 0.524768064957846, + "grad_norm": 0.046794239431619644, + "learning_rate": 9.218056205358924e-06, + "loss": 0.0039, + "step": 81820 + }, + { + "epoch": 0.5248322018516322, + "grad_norm": 0.0852995216846466, + "learning_rate": 9.217755643718116e-06, + "loss": 0.0045, + "step": 81830 + }, + { + "epoch": 0.5248963387454182, + "grad_norm": 0.16625656187534332, + "learning_rate": 9.217455029225818e-06, + "loss": 0.0027, + "step": 81840 + }, + { + "epoch": 0.5249604756392043, + "grad_norm": 0.01320594735443592, + "learning_rate": 9.217154361885794e-06, + "loss": 0.0031, + "step": 81850 + }, + { + "epoch": 0.5250246125329904, + "grad_norm": 0.4065563976764679, + "learning_rate": 9.216853641701811e-06, + "loss": 0.0026, + "step": 81860 + }, + { + "epoch": 0.5250887494267765, + "grad_norm": 0.07078109681606293, + "learning_rate": 9.216552868677639e-06, + "loss": 0.003, + "step": 81870 + }, + { + "epoch": 0.5251528863205626, + "grad_norm": 0.2689833641052246, + "learning_rate": 9.216252042817045e-06, + "loss": 0.0032, + "step": 81880 + }, + { + "epoch": 0.5252170232143487, + "grad_norm": 0.4440864026546478, + "learning_rate": 9.2159511641238e-06, + "loss": 0.0038, + "step": 81890 + }, + { + "epoch": 0.5252811601081347, + "grad_norm": 0.09967883676290512, + "learning_rate": 9.215650232601673e-06, + "loss": 0.0043, + "step": 81900 + }, + { + "epoch": 0.5253452970019209, + "grad_norm": 0.35125958919525146, + "learning_rate": 9.215349248254439e-06, + "loss": 0.0046, + "step": 81910 + }, + { + "epoch": 0.525409433895707, + "grad_norm": 0.034344110637903214, + "learning_rate": 9.215048211085864e-06, + "loss": 0.0017, + "step": 81920 + }, + { + "epoch": 0.5254735707894931, + "grad_norm": 0.20028561353683472, + "learning_rate": 9.214747121099721e-06, + "loss": 0.0032, + "step": 81930 + }, + { + "epoch": 0.5255377076832792, + "grad_norm": 0.16437016427516937, + "learning_rate": 9.214445978299787e-06, + "loss": 0.004, + "step": 81940 + }, + { + "epoch": 0.5256018445770653, + "grad_norm": 0.20637601613998413, + "learning_rate": 9.214144782689832e-06, + "loss": 0.0025, + "step": 81950 + }, + { + "epoch": 0.5256659814708514, + "grad_norm": 0.09163656085729599, + "learning_rate": 9.21384353427363e-06, + "loss": 0.0023, + "step": 81960 + }, + { + "epoch": 0.5257301183646375, + "grad_norm": 0.22363221645355225, + "learning_rate": 9.213542233054958e-06, + "loss": 0.0039, + "step": 81970 + }, + { + "epoch": 0.5257942552584236, + "grad_norm": 0.174314945936203, + "learning_rate": 9.21324087903759e-06, + "loss": 0.0036, + "step": 81980 + }, + { + "epoch": 0.5258583921522096, + "grad_norm": 0.30793848633766174, + "learning_rate": 9.212939472225304e-06, + "loss": 0.0028, + "step": 81990 + }, + { + "epoch": 0.5259225290459958, + "grad_norm": 0.38619428873062134, + "learning_rate": 9.212638012621875e-06, + "loss": 0.003, + "step": 82000 + }, + { + "epoch": 0.5259866659397818, + "grad_norm": 0.43908780813217163, + "learning_rate": 9.212336500231079e-06, + "loss": 0.0065, + "step": 82010 + }, + { + "epoch": 0.526050802833568, + "grad_norm": 0.08649495989084244, + "learning_rate": 9.212034935056698e-06, + "loss": 0.0017, + "step": 82020 + }, + { + "epoch": 0.526114939727354, + "grad_norm": 0.10458250343799591, + "learning_rate": 9.211733317102509e-06, + "loss": 0.0045, + "step": 82030 + }, + { + "epoch": 0.5261790766211402, + "grad_norm": 0.14031673967838287, + "learning_rate": 9.21143164637229e-06, + "loss": 0.0029, + "step": 82040 + }, + { + "epoch": 0.5262432135149263, + "grad_norm": 0.2695571780204773, + "learning_rate": 9.211129922869823e-06, + "loss": 0.0033, + "step": 82050 + }, + { + "epoch": 0.5263073504087123, + "grad_norm": 0.05847835913300514, + "learning_rate": 9.210828146598889e-06, + "loss": 0.0028, + "step": 82060 + }, + { + "epoch": 0.5263714873024985, + "grad_norm": 0.12969672679901123, + "learning_rate": 9.210526317563269e-06, + "loss": 0.0033, + "step": 82070 + }, + { + "epoch": 0.5264356241962845, + "grad_norm": 0.24454118311405182, + "learning_rate": 9.210224435766743e-06, + "loss": 0.0045, + "step": 82080 + }, + { + "epoch": 0.5264997610900707, + "grad_norm": 0.1197141483426094, + "learning_rate": 9.209922501213098e-06, + "loss": 0.0041, + "step": 82090 + }, + { + "epoch": 0.5265638979838567, + "grad_norm": 0.3682247996330261, + "learning_rate": 9.209620513906116e-06, + "loss": 0.0052, + "step": 82100 + }, + { + "epoch": 0.5266280348776429, + "grad_norm": 0.2559485137462616, + "learning_rate": 9.20931847384958e-06, + "loss": 0.0022, + "step": 82110 + }, + { + "epoch": 0.5266921717714289, + "grad_norm": 0.07267298549413681, + "learning_rate": 9.209016381047273e-06, + "loss": 0.0033, + "step": 82120 + }, + { + "epoch": 0.5267563086652151, + "grad_norm": 0.155486598610878, + "learning_rate": 9.208714235502985e-06, + "loss": 0.0052, + "step": 82130 + }, + { + "epoch": 0.5268204455590011, + "grad_norm": 0.14211998879909515, + "learning_rate": 9.208412037220497e-06, + "loss": 0.0046, + "step": 82140 + }, + { + "epoch": 0.5268845824527872, + "grad_norm": 0.16526386141777039, + "learning_rate": 9.208109786203602e-06, + "loss": 0.0023, + "step": 82150 + }, + { + "epoch": 0.5269487193465733, + "grad_norm": 0.40999835729599, + "learning_rate": 9.207807482456081e-06, + "loss": 0.0061, + "step": 82160 + }, + { + "epoch": 0.5270128562403594, + "grad_norm": 0.3757804036140442, + "learning_rate": 9.207505125981726e-06, + "loss": 0.0073, + "step": 82170 + }, + { + "epoch": 0.5270769931341455, + "grad_norm": 0.1633116602897644, + "learning_rate": 9.207202716784326e-06, + "loss": 0.0051, + "step": 82180 + }, + { + "epoch": 0.5271411300279316, + "grad_norm": 0.3592239320278168, + "learning_rate": 9.206900254867669e-06, + "loss": 0.007, + "step": 82190 + }, + { + "epoch": 0.5272052669217178, + "grad_norm": 0.27414438128471375, + "learning_rate": 9.206597740235543e-06, + "loss": 0.0071, + "step": 82200 + }, + { + "epoch": 0.5272694038155038, + "grad_norm": 0.07317782193422318, + "learning_rate": 9.206295172891742e-06, + "loss": 0.0036, + "step": 82210 + }, + { + "epoch": 0.52733354070929, + "grad_norm": 0.11561580747365952, + "learning_rate": 9.205992552840056e-06, + "loss": 0.0043, + "step": 82220 + }, + { + "epoch": 0.527397677603076, + "grad_norm": 0.14711986482143402, + "learning_rate": 9.205689880084277e-06, + "loss": 0.0034, + "step": 82230 + }, + { + "epoch": 0.5274618144968621, + "grad_norm": 0.09318558871746063, + "learning_rate": 9.205387154628198e-06, + "loss": 0.0058, + "step": 82240 + }, + { + "epoch": 0.5275259513906482, + "grad_norm": 0.14137667417526245, + "learning_rate": 9.205084376475615e-06, + "loss": 0.0031, + "step": 82250 + }, + { + "epoch": 0.5275900882844343, + "grad_norm": 0.22283531725406647, + "learning_rate": 9.204781545630317e-06, + "loss": 0.0044, + "step": 82260 + }, + { + "epoch": 0.5276542251782204, + "grad_norm": 0.15352214872837067, + "learning_rate": 9.204478662096101e-06, + "loss": 0.0036, + "step": 82270 + }, + { + "epoch": 0.5277183620720065, + "grad_norm": 0.18237556517124176, + "learning_rate": 9.204175725876762e-06, + "loss": 0.0018, + "step": 82280 + }, + { + "epoch": 0.5277824989657925, + "grad_norm": 0.07068685442209244, + "learning_rate": 9.203872736976098e-06, + "loss": 0.0046, + "step": 82290 + }, + { + "epoch": 0.5278466358595787, + "grad_norm": 0.1488722711801529, + "learning_rate": 9.203569695397905e-06, + "loss": 0.0034, + "step": 82300 + }, + { + "epoch": 0.5279107727533647, + "grad_norm": 0.10469914972782135, + "learning_rate": 9.203266601145977e-06, + "loss": 0.0037, + "step": 82310 + }, + { + "epoch": 0.5279749096471509, + "grad_norm": 0.17077229917049408, + "learning_rate": 9.202963454224117e-06, + "loss": 0.0028, + "step": 82320 + }, + { + "epoch": 0.528039046540937, + "grad_norm": 0.34882014989852905, + "learning_rate": 9.202660254636118e-06, + "loss": 0.0058, + "step": 82330 + }, + { + "epoch": 0.5281031834347231, + "grad_norm": 0.2677260935306549, + "learning_rate": 9.202357002385784e-06, + "loss": 0.0047, + "step": 82340 + }, + { + "epoch": 0.5281673203285092, + "grad_norm": 0.19920547306537628, + "learning_rate": 9.202053697476915e-06, + "loss": 0.0042, + "step": 82350 + }, + { + "epoch": 0.5282314572222953, + "grad_norm": 0.03182956576347351, + "learning_rate": 9.201750339913309e-06, + "loss": 0.0025, + "step": 82360 + }, + { + "epoch": 0.5282955941160814, + "grad_norm": 0.08129862695932388, + "learning_rate": 9.201446929698767e-06, + "loss": 0.0028, + "step": 82370 + }, + { + "epoch": 0.5283597310098674, + "grad_norm": 0.20995286107063293, + "learning_rate": 9.201143466837093e-06, + "loss": 0.0017, + "step": 82380 + }, + { + "epoch": 0.5284238679036536, + "grad_norm": 0.0474662110209465, + "learning_rate": 9.200839951332088e-06, + "loss": 0.0048, + "step": 82390 + }, + { + "epoch": 0.5284880047974396, + "grad_norm": 0.14652827382087708, + "learning_rate": 9.200536383187557e-06, + "loss": 0.0024, + "step": 82400 + }, + { + "epoch": 0.5285521416912258, + "grad_norm": 0.07190393656492233, + "learning_rate": 9.200232762407305e-06, + "loss": 0.0025, + "step": 82410 + }, + { + "epoch": 0.5286162785850118, + "grad_norm": 0.043861404061317444, + "learning_rate": 9.199929088995132e-06, + "loss": 0.0041, + "step": 82420 + }, + { + "epoch": 0.528680415478798, + "grad_norm": 0.08191066980361938, + "learning_rate": 9.199625362954847e-06, + "loss": 0.0034, + "step": 82430 + }, + { + "epoch": 0.528744552372584, + "grad_norm": 0.14318427443504333, + "learning_rate": 9.199321584290253e-06, + "loss": 0.0044, + "step": 82440 + }, + { + "epoch": 0.5288086892663701, + "grad_norm": 0.10137626528739929, + "learning_rate": 9.199017753005159e-06, + "loss": 0.0047, + "step": 82450 + }, + { + "epoch": 0.5288728261601562, + "grad_norm": 0.29027968645095825, + "learning_rate": 9.198713869103373e-06, + "loss": 0.0034, + "step": 82460 + }, + { + "epoch": 0.5289369630539423, + "grad_norm": 0.17395056784152985, + "learning_rate": 9.1984099325887e-06, + "loss": 0.0049, + "step": 82470 + }, + { + "epoch": 0.5290010999477285, + "grad_norm": 0.1001816838979721, + "learning_rate": 9.198105943464953e-06, + "loss": 0.0054, + "step": 82480 + }, + { + "epoch": 0.5290652368415145, + "grad_norm": 0.08453565835952759, + "learning_rate": 9.197801901735934e-06, + "loss": 0.003, + "step": 82490 + }, + { + "epoch": 0.5291293737353007, + "grad_norm": 0.17507454752922058, + "learning_rate": 9.19749780740546e-06, + "loss": 0.0024, + "step": 82500 + }, + { + "epoch": 0.5291935106290867, + "grad_norm": 0.3217713534832001, + "learning_rate": 9.197193660477337e-06, + "loss": 0.0023, + "step": 82510 + }, + { + "epoch": 0.5292576475228729, + "grad_norm": 0.45715153217315674, + "learning_rate": 9.196889460955379e-06, + "loss": 0.0044, + "step": 82520 + }, + { + "epoch": 0.5293217844166589, + "grad_norm": 0.07727304846048355, + "learning_rate": 9.196585208843397e-06, + "loss": 0.003, + "step": 82530 + }, + { + "epoch": 0.529385921310445, + "grad_norm": 0.0397333949804306, + "learning_rate": 9.1962809041452e-06, + "loss": 0.0036, + "step": 82540 + }, + { + "epoch": 0.5294500582042311, + "grad_norm": 0.03212263435125351, + "learning_rate": 9.195976546864607e-06, + "loss": 0.0041, + "step": 82550 + }, + { + "epoch": 0.5295141950980172, + "grad_norm": 0.46381548047065735, + "learning_rate": 9.195672137005427e-06, + "loss": 0.0023, + "step": 82560 + }, + { + "epoch": 0.5295783319918033, + "grad_norm": 0.31793341040611267, + "learning_rate": 9.195367674571477e-06, + "loss": 0.0031, + "step": 82570 + }, + { + "epoch": 0.5296424688855894, + "grad_norm": 0.05849529430270195, + "learning_rate": 9.195063159566574e-06, + "loss": 0.0024, + "step": 82580 + }, + { + "epoch": 0.5297066057793755, + "grad_norm": 0.2585103511810303, + "learning_rate": 9.19475859199453e-06, + "loss": 0.0024, + "step": 82590 + }, + { + "epoch": 0.5297707426731616, + "grad_norm": 0.1477556675672531, + "learning_rate": 9.194453971859162e-06, + "loss": 0.0027, + "step": 82600 + }, + { + "epoch": 0.5298348795669477, + "grad_norm": 0.12497889250516891, + "learning_rate": 9.19414929916429e-06, + "loss": 0.0038, + "step": 82610 + }, + { + "epoch": 0.5298990164607338, + "grad_norm": 0.2745285630226135, + "learning_rate": 9.193844573913726e-06, + "loss": 0.0022, + "step": 82620 + }, + { + "epoch": 0.5299631533545199, + "grad_norm": 0.029844246804714203, + "learning_rate": 9.193539796111296e-06, + "loss": 0.0039, + "step": 82630 + }, + { + "epoch": 0.530027290248306, + "grad_norm": 0.24429070949554443, + "learning_rate": 9.193234965760813e-06, + "loss": 0.0046, + "step": 82640 + }, + { + "epoch": 0.5300914271420921, + "grad_norm": 0.14826074242591858, + "learning_rate": 9.1929300828661e-06, + "loss": 0.0026, + "step": 82650 + }, + { + "epoch": 0.5301555640358782, + "grad_norm": 0.15577656030654907, + "learning_rate": 9.192625147430978e-06, + "loss": 0.003, + "step": 82660 + }, + { + "epoch": 0.5302197009296643, + "grad_norm": 0.0887686088681221, + "learning_rate": 9.192320159459263e-06, + "loss": 0.0035, + "step": 82670 + }, + { + "epoch": 0.5302838378234503, + "grad_norm": 0.058395545929670334, + "learning_rate": 9.192015118954782e-06, + "loss": 0.0025, + "step": 82680 + }, + { + "epoch": 0.5303479747172365, + "grad_norm": 0.023756619542837143, + "learning_rate": 9.191710025921356e-06, + "loss": 0.0037, + "step": 82690 + }, + { + "epoch": 0.5304121116110225, + "grad_norm": 0.3501819670200348, + "learning_rate": 9.191404880362807e-06, + "loss": 0.0038, + "step": 82700 + }, + { + "epoch": 0.5304762485048087, + "grad_norm": 0.2135632187128067, + "learning_rate": 9.19109968228296e-06, + "loss": 0.0019, + "step": 82710 + }, + { + "epoch": 0.5305403853985947, + "grad_norm": 0.11029896140098572, + "learning_rate": 9.19079443168564e-06, + "loss": 0.0025, + "step": 82720 + }, + { + "epoch": 0.5306045222923809, + "grad_norm": 0.23255330324172974, + "learning_rate": 9.190489128574666e-06, + "loss": 0.0047, + "step": 82730 + }, + { + "epoch": 0.5306686591861669, + "grad_norm": 0.08605362474918365, + "learning_rate": 9.190183772953872e-06, + "loss": 0.0039, + "step": 82740 + }, + { + "epoch": 0.530732796079953, + "grad_norm": 0.1807021051645279, + "learning_rate": 9.18987836482708e-06, + "loss": 0.003, + "step": 82750 + }, + { + "epoch": 0.5307969329737392, + "grad_norm": 0.12389001250267029, + "learning_rate": 9.189572904198118e-06, + "loss": 0.0029, + "step": 82760 + }, + { + "epoch": 0.5308610698675252, + "grad_norm": 0.045452624559402466, + "learning_rate": 9.189267391070812e-06, + "loss": 0.004, + "step": 82770 + }, + { + "epoch": 0.5309252067613114, + "grad_norm": 0.05268412083387375, + "learning_rate": 9.188961825448992e-06, + "loss": 0.003, + "step": 82780 + }, + { + "epoch": 0.5309893436550974, + "grad_norm": 0.2025286704301834, + "learning_rate": 9.188656207336488e-06, + "loss": 0.0031, + "step": 82790 + }, + { + "epoch": 0.5310534805488836, + "grad_norm": 0.0918094664812088, + "learning_rate": 9.188350536737126e-06, + "loss": 0.005, + "step": 82800 + }, + { + "epoch": 0.5311176174426696, + "grad_norm": 0.11574849486351013, + "learning_rate": 9.18804481365474e-06, + "loss": 0.0038, + "step": 82810 + }, + { + "epoch": 0.5311817543364558, + "grad_norm": 0.2847580015659332, + "learning_rate": 9.187739038093157e-06, + "loss": 0.0061, + "step": 82820 + }, + { + "epoch": 0.5312458912302418, + "grad_norm": 0.3507842719554901, + "learning_rate": 9.187433210056214e-06, + "loss": 0.0027, + "step": 82830 + }, + { + "epoch": 0.5313100281240279, + "grad_norm": 0.196197047829628, + "learning_rate": 9.187127329547739e-06, + "loss": 0.0017, + "step": 82840 + }, + { + "epoch": 0.531374165017814, + "grad_norm": 0.1247221902012825, + "learning_rate": 9.186821396571565e-06, + "loss": 0.0054, + "step": 82850 + }, + { + "epoch": 0.5314383019116001, + "grad_norm": 0.16086354851722717, + "learning_rate": 9.186515411131527e-06, + "loss": 0.0019, + "step": 82860 + }, + { + "epoch": 0.5315024388053862, + "grad_norm": 0.11792601644992828, + "learning_rate": 9.186209373231461e-06, + "loss": 0.0042, + "step": 82870 + }, + { + "epoch": 0.5315665756991723, + "grad_norm": 0.23305004835128784, + "learning_rate": 9.185903282875197e-06, + "loss": 0.0029, + "step": 82880 + }, + { + "epoch": 0.5316307125929585, + "grad_norm": 0.23246848583221436, + "learning_rate": 9.185597140066573e-06, + "loss": 0.0035, + "step": 82890 + }, + { + "epoch": 0.5316948494867445, + "grad_norm": 0.2900582253932953, + "learning_rate": 9.185290944809429e-06, + "loss": 0.0037, + "step": 82900 + }, + { + "epoch": 0.5317589863805307, + "grad_norm": 0.11810173094272614, + "learning_rate": 9.184984697107594e-06, + "loss": 0.0026, + "step": 82910 + }, + { + "epoch": 0.5318231232743167, + "grad_norm": 0.18069452047348022, + "learning_rate": 9.184678396964911e-06, + "loss": 0.0032, + "step": 82920 + }, + { + "epoch": 0.5318872601681028, + "grad_norm": 0.06355767697095871, + "learning_rate": 9.184372044385219e-06, + "loss": 0.0033, + "step": 82930 + }, + { + "epoch": 0.5319513970618889, + "grad_norm": 0.018011651933193207, + "learning_rate": 9.184065639372353e-06, + "loss": 0.0027, + "step": 82940 + }, + { + "epoch": 0.532015533955675, + "grad_norm": 0.11940117180347443, + "learning_rate": 9.183759181930154e-06, + "loss": 0.0019, + "step": 82950 + }, + { + "epoch": 0.5320796708494611, + "grad_norm": 0.14173869788646698, + "learning_rate": 9.183452672062464e-06, + "loss": 0.0027, + "step": 82960 + }, + { + "epoch": 0.5321438077432472, + "grad_norm": 0.014904815703630447, + "learning_rate": 9.183146109773121e-06, + "loss": 0.0031, + "step": 82970 + }, + { + "epoch": 0.5322079446370332, + "grad_norm": 0.132496178150177, + "learning_rate": 9.182839495065968e-06, + "loss": 0.0025, + "step": 82980 + }, + { + "epoch": 0.5322720815308194, + "grad_norm": 0.12323196232318878, + "learning_rate": 9.182532827944846e-06, + "loss": 0.004, + "step": 82990 + }, + { + "epoch": 0.5323362184246054, + "grad_norm": 0.2556149661540985, + "learning_rate": 9.1822261084136e-06, + "loss": 0.008, + "step": 83000 + }, + { + "epoch": 0.5324003553183916, + "grad_norm": 0.11383537948131561, + "learning_rate": 9.18191933647607e-06, + "loss": 0.0032, + "step": 83010 + }, + { + "epoch": 0.5324644922121776, + "grad_norm": 0.24259330332279205, + "learning_rate": 9.181612512136103e-06, + "loss": 0.004, + "step": 83020 + }, + { + "epoch": 0.5325286291059638, + "grad_norm": 0.296653151512146, + "learning_rate": 9.181305635397542e-06, + "loss": 0.0027, + "step": 83030 + }, + { + "epoch": 0.5325927659997499, + "grad_norm": 0.07351360470056534, + "learning_rate": 9.180998706264234e-06, + "loss": 0.0024, + "step": 83040 + }, + { + "epoch": 0.532656902893536, + "grad_norm": 0.11533034592866898, + "learning_rate": 9.180691724740023e-06, + "loss": 0.0031, + "step": 83050 + }, + { + "epoch": 0.5327210397873221, + "grad_norm": 0.07853560894727707, + "learning_rate": 9.180384690828758e-06, + "loss": 0.0027, + "step": 83060 + }, + { + "epoch": 0.5327851766811081, + "grad_norm": 0.08176155388355255, + "learning_rate": 9.180077604534283e-06, + "loss": 0.0027, + "step": 83070 + }, + { + "epoch": 0.5328493135748943, + "grad_norm": 0.3176024854183197, + "learning_rate": 9.17977046586045e-06, + "loss": 0.0028, + "step": 83080 + }, + { + "epoch": 0.5329134504686803, + "grad_norm": 0.28645059466362, + "learning_rate": 9.179463274811106e-06, + "loss": 0.0028, + "step": 83090 + }, + { + "epoch": 0.5329775873624665, + "grad_norm": 0.16513247787952423, + "learning_rate": 9.179156031390099e-06, + "loss": 0.0051, + "step": 83100 + }, + { + "epoch": 0.5330417242562525, + "grad_norm": 0.1360175758600235, + "learning_rate": 9.178848735601281e-06, + "loss": 0.0043, + "step": 83110 + }, + { + "epoch": 0.5331058611500387, + "grad_norm": 0.07889141887426376, + "learning_rate": 9.178541387448502e-06, + "loss": 0.0043, + "step": 83120 + }, + { + "epoch": 0.5331699980438247, + "grad_norm": 0.11900272965431213, + "learning_rate": 9.178233986935612e-06, + "loss": 0.0043, + "step": 83130 + }, + { + "epoch": 0.5332341349376108, + "grad_norm": 0.09275635331869125, + "learning_rate": 9.177926534066466e-06, + "loss": 0.0023, + "step": 83140 + }, + { + "epoch": 0.5332982718313969, + "grad_norm": 0.014314381405711174, + "learning_rate": 9.177619028844911e-06, + "loss": 0.0029, + "step": 83150 + }, + { + "epoch": 0.533362408725183, + "grad_norm": 0.10282004624605179, + "learning_rate": 9.177311471274808e-06, + "loss": 0.0044, + "step": 83160 + }, + { + "epoch": 0.5334265456189691, + "grad_norm": 0.1841009259223938, + "learning_rate": 9.177003861360003e-06, + "loss": 0.0063, + "step": 83170 + }, + { + "epoch": 0.5334906825127552, + "grad_norm": 0.1191323846578598, + "learning_rate": 9.176696199104358e-06, + "loss": 0.0032, + "step": 83180 + }, + { + "epoch": 0.5335548194065414, + "grad_norm": 0.047740787267684937, + "learning_rate": 9.176388484511722e-06, + "loss": 0.003, + "step": 83190 + }, + { + "epoch": 0.5336189563003274, + "grad_norm": 0.27387723326683044, + "learning_rate": 9.176080717585954e-06, + "loss": 0.0043, + "step": 83200 + }, + { + "epoch": 0.5336830931941136, + "grad_norm": 0.2824685871601105, + "learning_rate": 9.17577289833091e-06, + "loss": 0.0038, + "step": 83210 + }, + { + "epoch": 0.5337472300878996, + "grad_norm": 0.16182850301265717, + "learning_rate": 9.175465026750447e-06, + "loss": 0.0049, + "step": 83220 + }, + { + "epoch": 0.5338113669816857, + "grad_norm": 0.07123143970966339, + "learning_rate": 9.175157102848425e-06, + "loss": 0.0043, + "step": 83230 + }, + { + "epoch": 0.5338755038754718, + "grad_norm": 0.22581571340560913, + "learning_rate": 9.174849126628698e-06, + "loss": 0.0025, + "step": 83240 + }, + { + "epoch": 0.5339396407692579, + "grad_norm": 0.11887729167938232, + "learning_rate": 9.174541098095128e-06, + "loss": 0.0042, + "step": 83250 + }, + { + "epoch": 0.534003777663044, + "grad_norm": 0.09506013989448547, + "learning_rate": 9.174233017251577e-06, + "loss": 0.0025, + "step": 83260 + }, + { + "epoch": 0.5340679145568301, + "grad_norm": 0.3059485852718353, + "learning_rate": 9.173924884101902e-06, + "loss": 0.0035, + "step": 83270 + }, + { + "epoch": 0.5341320514506162, + "grad_norm": 0.2697980999946594, + "learning_rate": 9.173616698649963e-06, + "loss": 0.0034, + "step": 83280 + }, + { + "epoch": 0.5341961883444023, + "grad_norm": 0.06674520671367645, + "learning_rate": 9.173308460899627e-06, + "loss": 0.0032, + "step": 83290 + }, + { + "epoch": 0.5342603252381883, + "grad_norm": 0.33980584144592285, + "learning_rate": 9.173000170854752e-06, + "loss": 0.003, + "step": 83300 + }, + { + "epoch": 0.5343244621319745, + "grad_norm": 0.1164994016289711, + "learning_rate": 9.1726918285192e-06, + "loss": 0.0047, + "step": 83310 + }, + { + "epoch": 0.5343885990257606, + "grad_norm": 0.21461822092533112, + "learning_rate": 9.172383433896841e-06, + "loss": 0.0042, + "step": 83320 + }, + { + "epoch": 0.5344527359195467, + "grad_norm": 0.1043817549943924, + "learning_rate": 9.172074986991532e-06, + "loss": 0.0029, + "step": 83330 + }, + { + "epoch": 0.5345168728133328, + "grad_norm": 0.22230912744998932, + "learning_rate": 9.171766487807146e-06, + "loss": 0.0032, + "step": 83340 + }, + { + "epoch": 0.5345810097071189, + "grad_norm": 0.1703413873910904, + "learning_rate": 9.171457936347541e-06, + "loss": 0.0035, + "step": 83350 + }, + { + "epoch": 0.534645146600905, + "grad_norm": 0.20179855823516846, + "learning_rate": 9.171149332616589e-06, + "loss": 0.0054, + "step": 83360 + }, + { + "epoch": 0.534709283494691, + "grad_norm": 0.12678289413452148, + "learning_rate": 9.170840676618153e-06, + "loss": 0.0053, + "step": 83370 + }, + { + "epoch": 0.5347734203884772, + "grad_norm": 0.1558840423822403, + "learning_rate": 9.170531968356103e-06, + "loss": 0.0033, + "step": 83380 + }, + { + "epoch": 0.5348375572822632, + "grad_norm": 0.06316942721605301, + "learning_rate": 9.170223207834308e-06, + "loss": 0.0022, + "step": 83390 + }, + { + "epoch": 0.5349016941760494, + "grad_norm": 0.16530416905879974, + "learning_rate": 9.169914395056634e-06, + "loss": 0.0029, + "step": 83400 + }, + { + "epoch": 0.5349658310698354, + "grad_norm": 0.11882420629262924, + "learning_rate": 9.169605530026953e-06, + "loss": 0.0014, + "step": 83410 + }, + { + "epoch": 0.5350299679636216, + "grad_norm": 0.1492297351360321, + "learning_rate": 9.169296612749134e-06, + "loss": 0.0031, + "step": 83420 + }, + { + "epoch": 0.5350941048574076, + "grad_norm": 0.05903792381286621, + "learning_rate": 9.16898764322705e-06, + "loss": 0.0027, + "step": 83430 + }, + { + "epoch": 0.5351582417511938, + "grad_norm": 0.14036016166210175, + "learning_rate": 9.16867862146457e-06, + "loss": 0.0029, + "step": 83440 + }, + { + "epoch": 0.5352223786449798, + "grad_norm": 0.14284096658229828, + "learning_rate": 9.16836954746557e-06, + "loss": 0.0033, + "step": 83450 + }, + { + "epoch": 0.5352865155387659, + "grad_norm": 0.17533113062381744, + "learning_rate": 9.168060421233918e-06, + "loss": 0.0017, + "step": 83460 + }, + { + "epoch": 0.5353506524325521, + "grad_norm": 0.11281425505876541, + "learning_rate": 9.167751242773492e-06, + "loss": 0.0032, + "step": 83470 + }, + { + "epoch": 0.5354147893263381, + "grad_norm": 0.09841414541006088, + "learning_rate": 9.167442012088164e-06, + "loss": 0.0023, + "step": 83480 + }, + { + "epoch": 0.5354789262201243, + "grad_norm": 0.14227591454982758, + "learning_rate": 9.167132729181807e-06, + "loss": 0.0035, + "step": 83490 + }, + { + "epoch": 0.5355430631139103, + "grad_norm": 0.1604137420654297, + "learning_rate": 9.1668233940583e-06, + "loss": 0.0047, + "step": 83500 + }, + { + "epoch": 0.5356072000076965, + "grad_norm": 0.3364444375038147, + "learning_rate": 9.16651400672152e-06, + "loss": 0.0053, + "step": 83510 + }, + { + "epoch": 0.5356713369014825, + "grad_norm": 0.22063446044921875, + "learning_rate": 9.166204567175338e-06, + "loss": 0.0035, + "step": 83520 + }, + { + "epoch": 0.5357354737952686, + "grad_norm": 0.08615615218877792, + "learning_rate": 9.165895075423638e-06, + "loss": 0.0027, + "step": 83530 + }, + { + "epoch": 0.5357996106890547, + "grad_norm": 0.14934763312339783, + "learning_rate": 9.165585531470294e-06, + "loss": 0.0041, + "step": 83540 + }, + { + "epoch": 0.5358637475828408, + "grad_norm": 0.12722347676753998, + "learning_rate": 9.165275935319186e-06, + "loss": 0.0019, + "step": 83550 + }, + { + "epoch": 0.5359278844766269, + "grad_norm": 0.15359367430210114, + "learning_rate": 9.164966286974195e-06, + "loss": 0.0037, + "step": 83560 + }, + { + "epoch": 0.535992021370413, + "grad_norm": 0.055601347237825394, + "learning_rate": 9.164656586439199e-06, + "loss": 0.0025, + "step": 83570 + }, + { + "epoch": 0.5360561582641991, + "grad_norm": 0.1848033219575882, + "learning_rate": 9.16434683371808e-06, + "loss": 0.0028, + "step": 83580 + }, + { + "epoch": 0.5361202951579852, + "grad_norm": 0.16610196232795715, + "learning_rate": 9.164037028814718e-06, + "loss": 0.0039, + "step": 83590 + }, + { + "epoch": 0.5361844320517714, + "grad_norm": 0.012549067847430706, + "learning_rate": 9.163727171732997e-06, + "loss": 0.0021, + "step": 83600 + }, + { + "epoch": 0.5362485689455574, + "grad_norm": 0.14107391238212585, + "learning_rate": 9.163417262476797e-06, + "loss": 0.003, + "step": 83610 + }, + { + "epoch": 0.5363127058393435, + "grad_norm": 0.21862909197807312, + "learning_rate": 9.163107301050005e-06, + "loss": 0.0041, + "step": 83620 + }, + { + "epoch": 0.5363768427331296, + "grad_norm": 0.1434558480978012, + "learning_rate": 9.162797287456502e-06, + "loss": 0.0025, + "step": 83630 + }, + { + "epoch": 0.5364409796269157, + "grad_norm": 0.07252000272274017, + "learning_rate": 9.162487221700176e-06, + "loss": 0.0029, + "step": 83640 + }, + { + "epoch": 0.5365051165207018, + "grad_norm": 0.2747831642627716, + "learning_rate": 9.162177103784908e-06, + "loss": 0.0028, + "step": 83650 + }, + { + "epoch": 0.5365692534144879, + "grad_norm": 0.07841059565544128, + "learning_rate": 9.161866933714587e-06, + "loss": 0.0035, + "step": 83660 + }, + { + "epoch": 0.536633390308274, + "grad_norm": 0.12187693268060684, + "learning_rate": 9.161556711493098e-06, + "loss": 0.0021, + "step": 83670 + }, + { + "epoch": 0.5366975272020601, + "grad_norm": 0.27229297161102295, + "learning_rate": 9.161246437124331e-06, + "loss": 0.004, + "step": 83680 + }, + { + "epoch": 0.5367616640958461, + "grad_norm": 0.06621730327606201, + "learning_rate": 9.160936110612172e-06, + "loss": 0.0045, + "step": 83690 + }, + { + "epoch": 0.5368258009896323, + "grad_norm": 0.09571243077516556, + "learning_rate": 9.160625731960508e-06, + "loss": 0.0036, + "step": 83700 + }, + { + "epoch": 0.5368899378834183, + "grad_norm": 0.16894841194152832, + "learning_rate": 9.16031530117323e-06, + "loss": 0.003, + "step": 83710 + }, + { + "epoch": 0.5369540747772045, + "grad_norm": 0.08017881959676743, + "learning_rate": 9.16000481825423e-06, + "loss": 0.0035, + "step": 83720 + }, + { + "epoch": 0.5370182116709905, + "grad_norm": 0.1632446050643921, + "learning_rate": 9.159694283207395e-06, + "loss": 0.0031, + "step": 83730 + }, + { + "epoch": 0.5370823485647767, + "grad_norm": 0.005749837029725313, + "learning_rate": 9.159383696036618e-06, + "loss": 0.0026, + "step": 83740 + }, + { + "epoch": 0.5371464854585628, + "grad_norm": 0.05540066584944725, + "learning_rate": 9.159073056745791e-06, + "loss": 0.0026, + "step": 83750 + }, + { + "epoch": 0.5372106223523488, + "grad_norm": 0.09091489017009735, + "learning_rate": 9.158762365338807e-06, + "loss": 0.0029, + "step": 83760 + }, + { + "epoch": 0.537274759246135, + "grad_norm": 0.11926014721393585, + "learning_rate": 9.158451621819558e-06, + "loss": 0.0039, + "step": 83770 + }, + { + "epoch": 0.537338896139921, + "grad_norm": 0.2170051485300064, + "learning_rate": 9.158140826191936e-06, + "loss": 0.0037, + "step": 83780 + }, + { + "epoch": 0.5374030330337072, + "grad_norm": 0.04482047259807587, + "learning_rate": 9.15782997845984e-06, + "loss": 0.0027, + "step": 83790 + }, + { + "epoch": 0.5374671699274932, + "grad_norm": 0.07231690734624863, + "learning_rate": 9.157519078627162e-06, + "loss": 0.0027, + "step": 83800 + }, + { + "epoch": 0.5375313068212794, + "grad_norm": 0.15165331959724426, + "learning_rate": 9.157208126697797e-06, + "loss": 0.0047, + "step": 83810 + }, + { + "epoch": 0.5375954437150654, + "grad_norm": 0.21467633545398712, + "learning_rate": 9.156897122675645e-06, + "loss": 0.0051, + "step": 83820 + }, + { + "epoch": 0.5376595806088516, + "grad_norm": 0.30737295746803284, + "learning_rate": 9.1565860665646e-06, + "loss": 0.0049, + "step": 83830 + }, + { + "epoch": 0.5377237175026376, + "grad_norm": 0.2601054310798645, + "learning_rate": 9.156274958368563e-06, + "loss": 0.0034, + "step": 83840 + }, + { + "epoch": 0.5377878543964237, + "grad_norm": 0.08841709047555923, + "learning_rate": 9.15596379809143e-06, + "loss": 0.0034, + "step": 83850 + }, + { + "epoch": 0.5378519912902098, + "grad_norm": 0.1914767175912857, + "learning_rate": 9.155652585737098e-06, + "loss": 0.0028, + "step": 83860 + }, + { + "epoch": 0.5379161281839959, + "grad_norm": 0.6179146766662598, + "learning_rate": 9.15534132130947e-06, + "loss": 0.0037, + "step": 83870 + }, + { + "epoch": 0.5379802650777821, + "grad_norm": 0.2201271653175354, + "learning_rate": 9.155030004812446e-06, + "loss": 0.004, + "step": 83880 + }, + { + "epoch": 0.5380444019715681, + "grad_norm": 0.3279046416282654, + "learning_rate": 9.154718636249928e-06, + "loss": 0.0043, + "step": 83890 + }, + { + "epoch": 0.5381085388653543, + "grad_norm": 0.10568535327911377, + "learning_rate": 9.154407215625814e-06, + "loss": 0.0022, + "step": 83900 + }, + { + "epoch": 0.5381726757591403, + "grad_norm": 0.12581396102905273, + "learning_rate": 9.15409574294401e-06, + "loss": 0.0029, + "step": 83910 + }, + { + "epoch": 0.5382368126529264, + "grad_norm": 0.2874618470668793, + "learning_rate": 9.153784218208416e-06, + "loss": 0.0021, + "step": 83920 + }, + { + "epoch": 0.5383009495467125, + "grad_norm": 0.16133472323417664, + "learning_rate": 9.153472641422938e-06, + "loss": 0.0048, + "step": 83930 + }, + { + "epoch": 0.5383650864404986, + "grad_norm": 0.17416003346443176, + "learning_rate": 9.153161012591478e-06, + "loss": 0.0029, + "step": 83940 + }, + { + "epoch": 0.5384292233342847, + "grad_norm": 0.15187788009643555, + "learning_rate": 9.152849331717944e-06, + "loss": 0.0023, + "step": 83950 + }, + { + "epoch": 0.5384933602280708, + "grad_norm": 0.2774655222892761, + "learning_rate": 9.15253759880624e-06, + "loss": 0.0038, + "step": 83960 + }, + { + "epoch": 0.5385574971218569, + "grad_norm": 0.2558445632457733, + "learning_rate": 9.15222581386027e-06, + "loss": 0.0029, + "step": 83970 + }, + { + "epoch": 0.538621634015643, + "grad_norm": 0.2403038740158081, + "learning_rate": 9.151913976883944e-06, + "loss": 0.0031, + "step": 83980 + }, + { + "epoch": 0.538685770909429, + "grad_norm": 0.18249399960041046, + "learning_rate": 9.151602087881169e-06, + "loss": 0.0043, + "step": 83990 + }, + { + "epoch": 0.5387499078032152, + "grad_norm": 0.2081063836812973, + "learning_rate": 9.151290146855853e-06, + "loss": 0.0054, + "step": 84000 + }, + { + "epoch": 0.5388140446970012, + "grad_norm": 0.17994354665279388, + "learning_rate": 9.150978153811904e-06, + "loss": 0.0038, + "step": 84010 + }, + { + "epoch": 0.5388781815907874, + "grad_norm": 0.07765493541955948, + "learning_rate": 9.150666108753232e-06, + "loss": 0.0038, + "step": 84020 + }, + { + "epoch": 0.5389423184845735, + "grad_norm": 0.140521839261055, + "learning_rate": 9.150354011683748e-06, + "loss": 0.0049, + "step": 84030 + }, + { + "epoch": 0.5390064553783596, + "grad_norm": 0.10827630013227463, + "learning_rate": 9.150041862607362e-06, + "loss": 0.0023, + "step": 84040 + }, + { + "epoch": 0.5390705922721457, + "grad_norm": 0.14978931844234467, + "learning_rate": 9.149729661527984e-06, + "loss": 0.0035, + "step": 84050 + }, + { + "epoch": 0.5391347291659317, + "grad_norm": 0.12695223093032837, + "learning_rate": 9.149417408449528e-06, + "loss": 0.0029, + "step": 84060 + }, + { + "epoch": 0.5391988660597179, + "grad_norm": 0.20081543922424316, + "learning_rate": 9.149105103375908e-06, + "loss": 0.0034, + "step": 84070 + }, + { + "epoch": 0.5392630029535039, + "grad_norm": 0.3733041286468506, + "learning_rate": 9.148792746311034e-06, + "loss": 0.0044, + "step": 84080 + }, + { + "epoch": 0.5393271398472901, + "grad_norm": 0.10281600058078766, + "learning_rate": 9.148480337258824e-06, + "loss": 0.0024, + "step": 84090 + }, + { + "epoch": 0.5393912767410761, + "grad_norm": 0.12156189978122711, + "learning_rate": 9.148167876223188e-06, + "loss": 0.0035, + "step": 84100 + }, + { + "epoch": 0.5394554136348623, + "grad_norm": 0.06447888165712357, + "learning_rate": 9.147855363208044e-06, + "loss": 0.0031, + "step": 84110 + }, + { + "epoch": 0.5395195505286483, + "grad_norm": 0.14546574652194977, + "learning_rate": 9.147542798217309e-06, + "loss": 0.0029, + "step": 84120 + }, + { + "epoch": 0.5395836874224345, + "grad_norm": 0.4374399781227112, + "learning_rate": 9.147230181254898e-06, + "loss": 0.0051, + "step": 84130 + }, + { + "epoch": 0.5396478243162205, + "grad_norm": 0.08889298886060715, + "learning_rate": 9.14691751232473e-06, + "loss": 0.0038, + "step": 84140 + }, + { + "epoch": 0.5397119612100066, + "grad_norm": 0.013681549578905106, + "learning_rate": 9.14660479143072e-06, + "loss": 0.003, + "step": 84150 + }, + { + "epoch": 0.5397760981037928, + "grad_norm": 0.1032249853014946, + "learning_rate": 9.146292018576788e-06, + "loss": 0.0032, + "step": 84160 + }, + { + "epoch": 0.5398402349975788, + "grad_norm": 0.16689357161521912, + "learning_rate": 9.145979193766855e-06, + "loss": 0.0023, + "step": 84170 + }, + { + "epoch": 0.539904371891365, + "grad_norm": 0.38058799505233765, + "learning_rate": 9.14566631700484e-06, + "loss": 0.0043, + "step": 84180 + }, + { + "epoch": 0.539968508785151, + "grad_norm": 0.2105763554573059, + "learning_rate": 9.145353388294662e-06, + "loss": 0.0037, + "step": 84190 + }, + { + "epoch": 0.5400326456789372, + "grad_norm": 0.09349032491445541, + "learning_rate": 9.145040407640245e-06, + "loss": 0.0044, + "step": 84200 + }, + { + "epoch": 0.5400967825727232, + "grad_norm": 0.13553950190544128, + "learning_rate": 9.144727375045507e-06, + "loss": 0.0043, + "step": 84210 + }, + { + "epoch": 0.5401609194665093, + "grad_norm": 0.07749900966882706, + "learning_rate": 9.144414290514374e-06, + "loss": 0.0017, + "step": 84220 + }, + { + "epoch": 0.5402250563602954, + "grad_norm": 0.2512030005455017, + "learning_rate": 9.144101154050769e-06, + "loss": 0.0048, + "step": 84230 + }, + { + "epoch": 0.5402891932540815, + "grad_norm": 0.031024569645524025, + "learning_rate": 9.143787965658615e-06, + "loss": 0.005, + "step": 84240 + }, + { + "epoch": 0.5403533301478676, + "grad_norm": 0.1312863677740097, + "learning_rate": 9.143474725341835e-06, + "loss": 0.0019, + "step": 84250 + }, + { + "epoch": 0.5404174670416537, + "grad_norm": 0.5630732178688049, + "learning_rate": 9.143161433104355e-06, + "loss": 0.0033, + "step": 84260 + }, + { + "epoch": 0.5404816039354398, + "grad_norm": 0.17601896822452545, + "learning_rate": 9.142848088950102e-06, + "loss": 0.0045, + "step": 84270 + }, + { + "epoch": 0.5405457408292259, + "grad_norm": 0.2565477192401886, + "learning_rate": 9.142534692883002e-06, + "loss": 0.0025, + "step": 84280 + }, + { + "epoch": 0.540609877723012, + "grad_norm": 0.21444521844387054, + "learning_rate": 9.14222124490698e-06, + "loss": 0.0034, + "step": 84290 + }, + { + "epoch": 0.5406740146167981, + "grad_norm": 0.07533738762140274, + "learning_rate": 9.141907745025966e-06, + "loss": 0.002, + "step": 84300 + }, + { + "epoch": 0.5407381515105842, + "grad_norm": 0.0890752300620079, + "learning_rate": 9.141594193243888e-06, + "loss": 0.004, + "step": 84310 + }, + { + "epoch": 0.5408022884043703, + "grad_norm": 0.2074747085571289, + "learning_rate": 9.141280589564676e-06, + "loss": 0.004, + "step": 84320 + }, + { + "epoch": 0.5408664252981564, + "grad_norm": 0.16621294617652893, + "learning_rate": 9.140966933992256e-06, + "loss": 0.0041, + "step": 84330 + }, + { + "epoch": 0.5409305621919425, + "grad_norm": 0.04957873001694679, + "learning_rate": 9.140653226530564e-06, + "loss": 0.0026, + "step": 84340 + }, + { + "epoch": 0.5409946990857286, + "grad_norm": 0.3442468047142029, + "learning_rate": 9.140339467183525e-06, + "loss": 0.0041, + "step": 84350 + }, + { + "epoch": 0.5410588359795147, + "grad_norm": 0.07848238199949265, + "learning_rate": 9.140025655955076e-06, + "loss": 0.004, + "step": 84360 + }, + { + "epoch": 0.5411229728733008, + "grad_norm": 0.044110093265771866, + "learning_rate": 9.139711792849144e-06, + "loss": 0.0032, + "step": 84370 + }, + { + "epoch": 0.5411871097670868, + "grad_norm": 0.2451447993516922, + "learning_rate": 9.139397877869667e-06, + "loss": 0.0033, + "step": 84380 + }, + { + "epoch": 0.541251246660873, + "grad_norm": 0.0834902822971344, + "learning_rate": 9.139083911020574e-06, + "loss": 0.0054, + "step": 84390 + }, + { + "epoch": 0.541315383554659, + "grad_norm": 0.12984836101531982, + "learning_rate": 9.138769892305803e-06, + "loss": 0.0022, + "step": 84400 + }, + { + "epoch": 0.5413795204484452, + "grad_norm": 0.14569932222366333, + "learning_rate": 9.138455821729287e-06, + "loss": 0.0043, + "step": 84410 + }, + { + "epoch": 0.5414436573422312, + "grad_norm": 0.30914196372032166, + "learning_rate": 9.138141699294962e-06, + "loss": 0.0087, + "step": 84420 + }, + { + "epoch": 0.5415077942360174, + "grad_norm": 0.27151378989219666, + "learning_rate": 9.137827525006763e-06, + "loss": 0.0033, + "step": 84430 + }, + { + "epoch": 0.5415719311298035, + "grad_norm": 0.14409923553466797, + "learning_rate": 9.13751329886863e-06, + "loss": 0.0029, + "step": 84440 + }, + { + "epoch": 0.5416360680235895, + "grad_norm": 0.1737694889307022, + "learning_rate": 9.137199020884497e-06, + "loss": 0.0025, + "step": 84450 + }, + { + "epoch": 0.5417002049173757, + "grad_norm": 0.019738130271434784, + "learning_rate": 9.136884691058303e-06, + "loss": 0.0026, + "step": 84460 + }, + { + "epoch": 0.5417643418111617, + "grad_norm": 0.0797569677233696, + "learning_rate": 9.136570309393988e-06, + "loss": 0.0034, + "step": 84470 + }, + { + "epoch": 0.5418284787049479, + "grad_norm": 0.21312910318374634, + "learning_rate": 9.13625587589549e-06, + "loss": 0.0034, + "step": 84480 + }, + { + "epoch": 0.5418926155987339, + "grad_norm": 0.151839017868042, + "learning_rate": 9.135941390566749e-06, + "loss": 0.0021, + "step": 84490 + }, + { + "epoch": 0.5419567524925201, + "grad_norm": 0.28048601746559143, + "learning_rate": 9.135626853411707e-06, + "loss": 0.0054, + "step": 84500 + }, + { + "epoch": 0.5420208893863061, + "grad_norm": 0.11831089854240417, + "learning_rate": 9.135312264434306e-06, + "loss": 0.0027, + "step": 84510 + }, + { + "epoch": 0.5420850262800923, + "grad_norm": 0.0896664634346962, + "learning_rate": 9.134997623638487e-06, + "loss": 0.0038, + "step": 84520 + }, + { + "epoch": 0.5421491631738783, + "grad_norm": 0.09071475267410278, + "learning_rate": 9.134682931028192e-06, + "loss": 0.0044, + "step": 84530 + }, + { + "epoch": 0.5422133000676644, + "grad_norm": 0.08034510165452957, + "learning_rate": 9.134368186607363e-06, + "loss": 0.0049, + "step": 84540 + }, + { + "epoch": 0.5422774369614505, + "grad_norm": 0.30859583616256714, + "learning_rate": 9.134053390379948e-06, + "loss": 0.0042, + "step": 84550 + }, + { + "epoch": 0.5423415738552366, + "grad_norm": 0.24405072629451752, + "learning_rate": 9.133738542349888e-06, + "loss": 0.0033, + "step": 84560 + }, + { + "epoch": 0.5424057107490227, + "grad_norm": 0.14305658638477325, + "learning_rate": 9.13342364252113e-06, + "loss": 0.0044, + "step": 84570 + }, + { + "epoch": 0.5424698476428088, + "grad_norm": 0.2623572051525116, + "learning_rate": 9.13310869089762e-06, + "loss": 0.0069, + "step": 84580 + }, + { + "epoch": 0.542533984536595, + "grad_norm": 0.35897016525268555, + "learning_rate": 9.132793687483305e-06, + "loss": 0.0034, + "step": 84590 + }, + { + "epoch": 0.542598121430381, + "grad_norm": 0.1529109925031662, + "learning_rate": 9.132478632282132e-06, + "loss": 0.0039, + "step": 84600 + }, + { + "epoch": 0.5426622583241671, + "grad_norm": 0.20279178023338318, + "learning_rate": 9.132163525298047e-06, + "loss": 0.0051, + "step": 84610 + }, + { + "epoch": 0.5427263952179532, + "grad_norm": 0.09714586287736893, + "learning_rate": 9.131848366535e-06, + "loss": 0.0052, + "step": 84620 + }, + { + "epoch": 0.5427905321117393, + "grad_norm": 0.21548357605934143, + "learning_rate": 9.131533155996939e-06, + "loss": 0.0028, + "step": 84630 + }, + { + "epoch": 0.5428546690055254, + "grad_norm": 0.12653973698616028, + "learning_rate": 9.131217893687817e-06, + "loss": 0.0044, + "step": 84640 + }, + { + "epoch": 0.5429188058993115, + "grad_norm": 0.0521419532597065, + "learning_rate": 9.130902579611581e-06, + "loss": 0.0027, + "step": 84650 + }, + { + "epoch": 0.5429829427930976, + "grad_norm": 0.2173508256673813, + "learning_rate": 9.130587213772181e-06, + "loss": 0.0028, + "step": 84660 + }, + { + "epoch": 0.5430470796868837, + "grad_norm": 0.3113865256309509, + "learning_rate": 9.130271796173576e-06, + "loss": 0.0036, + "step": 84670 + }, + { + "epoch": 0.5431112165806697, + "grad_norm": 0.24822306632995605, + "learning_rate": 9.12995632681971e-06, + "loss": 0.006, + "step": 84680 + }, + { + "epoch": 0.5431753534744559, + "grad_norm": 0.22321628034114838, + "learning_rate": 9.129640805714542e-06, + "loss": 0.0031, + "step": 84690 + }, + { + "epoch": 0.5432394903682419, + "grad_norm": 0.1794038861989975, + "learning_rate": 9.129325232862021e-06, + "loss": 0.0027, + "step": 84700 + }, + { + "epoch": 0.5433036272620281, + "grad_norm": 0.21157395839691162, + "learning_rate": 9.129009608266104e-06, + "loss": 0.0026, + "step": 84710 + }, + { + "epoch": 0.5433677641558141, + "grad_norm": 0.19182851910591125, + "learning_rate": 9.128693931930747e-06, + "loss": 0.0033, + "step": 84720 + }, + { + "epoch": 0.5434319010496003, + "grad_norm": 0.17911753058433533, + "learning_rate": 9.128378203859902e-06, + "loss": 0.0039, + "step": 84730 + }, + { + "epoch": 0.5434960379433864, + "grad_norm": 0.22276780009269714, + "learning_rate": 9.12806242405753e-06, + "loss": 0.0025, + "step": 84740 + }, + { + "epoch": 0.5435601748371724, + "grad_norm": 0.08404522389173508, + "learning_rate": 9.127746592527585e-06, + "loss": 0.0039, + "step": 84750 + }, + { + "epoch": 0.5436243117309586, + "grad_norm": 0.1111041009426117, + "learning_rate": 9.127430709274025e-06, + "loss": 0.0024, + "step": 84760 + }, + { + "epoch": 0.5436884486247446, + "grad_norm": 0.4655708372592926, + "learning_rate": 9.127114774300807e-06, + "loss": 0.0058, + "step": 84770 + }, + { + "epoch": 0.5437525855185308, + "grad_norm": 0.11205190420150757, + "learning_rate": 9.126798787611892e-06, + "loss": 0.0058, + "step": 84780 + }, + { + "epoch": 0.5438167224123168, + "grad_norm": 0.04771514981985092, + "learning_rate": 9.12648274921124e-06, + "loss": 0.0021, + "step": 84790 + }, + { + "epoch": 0.543880859306103, + "grad_norm": 0.27232682704925537, + "learning_rate": 9.12616665910281e-06, + "loss": 0.0034, + "step": 84800 + }, + { + "epoch": 0.543944996199889, + "grad_norm": 0.17340819537639618, + "learning_rate": 9.125850517290561e-06, + "loss": 0.0035, + "step": 84810 + }, + { + "epoch": 0.5440091330936752, + "grad_norm": 0.1362583041191101, + "learning_rate": 9.125534323778458e-06, + "loss": 0.0037, + "step": 84820 + }, + { + "epoch": 0.5440732699874612, + "grad_norm": 0.15964961051940918, + "learning_rate": 9.12521807857046e-06, + "loss": 0.0039, + "step": 84830 + }, + { + "epoch": 0.5441374068812473, + "grad_norm": 0.29718807339668274, + "learning_rate": 9.124901781670533e-06, + "loss": 0.0033, + "step": 84840 + }, + { + "epoch": 0.5442015437750334, + "grad_norm": 0.1734071522951126, + "learning_rate": 9.124585433082638e-06, + "loss": 0.0024, + "step": 84850 + }, + { + "epoch": 0.5442656806688195, + "grad_norm": 0.11542264372110367, + "learning_rate": 9.12426903281074e-06, + "loss": 0.0031, + "step": 84860 + }, + { + "epoch": 0.5443298175626057, + "grad_norm": 0.06102282553911209, + "learning_rate": 9.123952580858801e-06, + "loss": 0.0038, + "step": 84870 + }, + { + "epoch": 0.5443939544563917, + "grad_norm": 0.1172720268368721, + "learning_rate": 9.123636077230792e-06, + "loss": 0.002, + "step": 84880 + }, + { + "epoch": 0.5444580913501779, + "grad_norm": 0.29675355553627014, + "learning_rate": 9.123319521930674e-06, + "loss": 0.0022, + "step": 84890 + }, + { + "epoch": 0.5445222282439639, + "grad_norm": 0.037560850381851196, + "learning_rate": 9.123002914962417e-06, + "loss": 0.0032, + "step": 84900 + }, + { + "epoch": 0.54458636513775, + "grad_norm": 0.15551374852657318, + "learning_rate": 9.122686256329985e-06, + "loss": 0.0039, + "step": 84910 + }, + { + "epoch": 0.5446505020315361, + "grad_norm": 0.23275664448738098, + "learning_rate": 9.122369546037348e-06, + "loss": 0.0046, + "step": 84920 + }, + { + "epoch": 0.5447146389253222, + "grad_norm": 0.30361834168434143, + "learning_rate": 9.122052784088476e-06, + "loss": 0.0046, + "step": 84930 + }, + { + "epoch": 0.5447787758191083, + "grad_norm": 0.216138556599617, + "learning_rate": 9.121735970487335e-06, + "loss": 0.0048, + "step": 84940 + }, + { + "epoch": 0.5448429127128944, + "grad_norm": 0.08227672427892685, + "learning_rate": 9.121419105237898e-06, + "loss": 0.0029, + "step": 84950 + }, + { + "epoch": 0.5449070496066805, + "grad_norm": 0.4803912937641144, + "learning_rate": 9.121102188344132e-06, + "loss": 0.0021, + "step": 84960 + }, + { + "epoch": 0.5449711865004666, + "grad_norm": 0.0508844256401062, + "learning_rate": 9.120785219810012e-06, + "loss": 0.0038, + "step": 84970 + }, + { + "epoch": 0.5450353233942526, + "grad_norm": 0.1788831651210785, + "learning_rate": 9.120468199639507e-06, + "loss": 0.0042, + "step": 84980 + }, + { + "epoch": 0.5450994602880388, + "grad_norm": 0.38108617067337036, + "learning_rate": 9.120151127836591e-06, + "loss": 0.0053, + "step": 84990 + }, + { + "epoch": 0.5451635971818248, + "grad_norm": 0.0781199187040329, + "learning_rate": 9.119834004405239e-06, + "loss": 0.0062, + "step": 85000 + }, + { + "epoch": 0.545227734075611, + "grad_norm": 0.08282937109470367, + "learning_rate": 9.11951682934942e-06, + "loss": 0.0022, + "step": 85010 + }, + { + "epoch": 0.5452918709693971, + "grad_norm": 0.11112157255411148, + "learning_rate": 9.119199602673112e-06, + "loss": 0.0021, + "step": 85020 + }, + { + "epoch": 0.5453560078631832, + "grad_norm": 0.09030723571777344, + "learning_rate": 9.118882324380287e-06, + "loss": 0.0054, + "step": 85030 + }, + { + "epoch": 0.5454201447569693, + "grad_norm": 0.07033137232065201, + "learning_rate": 9.118564994474925e-06, + "loss": 0.004, + "step": 85040 + }, + { + "epoch": 0.5454842816507554, + "grad_norm": 0.18817007541656494, + "learning_rate": 9.118247612961e-06, + "loss": 0.0041, + "step": 85050 + }, + { + "epoch": 0.5455484185445415, + "grad_norm": 0.26980462670326233, + "learning_rate": 9.117930179842488e-06, + "loss": 0.0027, + "step": 85060 + }, + { + "epoch": 0.5456125554383275, + "grad_norm": 0.10838302224874496, + "learning_rate": 9.117612695123368e-06, + "loss": 0.0047, + "step": 85070 + }, + { + "epoch": 0.5456766923321137, + "grad_norm": 0.16677549481391907, + "learning_rate": 9.117295158807618e-06, + "loss": 0.0021, + "step": 85080 + }, + { + "epoch": 0.5457408292258997, + "grad_norm": 0.1712741255760193, + "learning_rate": 9.116977570899219e-06, + "loss": 0.0061, + "step": 85090 + }, + { + "epoch": 0.5458049661196859, + "grad_norm": 0.05557706952095032, + "learning_rate": 9.116659931402147e-06, + "loss": 0.0065, + "step": 85100 + }, + { + "epoch": 0.5458691030134719, + "grad_norm": 0.009485971182584763, + "learning_rate": 9.116342240320384e-06, + "loss": 0.0028, + "step": 85110 + }, + { + "epoch": 0.5459332399072581, + "grad_norm": 0.20825040340423584, + "learning_rate": 9.11602449765791e-06, + "loss": 0.005, + "step": 85120 + }, + { + "epoch": 0.5459973768010441, + "grad_norm": 0.12041359394788742, + "learning_rate": 9.115706703418709e-06, + "loss": 0.0025, + "step": 85130 + }, + { + "epoch": 0.5460615136948302, + "grad_norm": 0.2229558229446411, + "learning_rate": 9.11538885760676e-06, + "loss": 0.0039, + "step": 85140 + }, + { + "epoch": 0.5461256505886164, + "grad_norm": 0.23140066862106323, + "learning_rate": 9.11507096022605e-06, + "loss": 0.0035, + "step": 85150 + }, + { + "epoch": 0.5461897874824024, + "grad_norm": 0.25123244524002075, + "learning_rate": 9.114753011280557e-06, + "loss": 0.0039, + "step": 85160 + }, + { + "epoch": 0.5462539243761886, + "grad_norm": 0.1634216457605362, + "learning_rate": 9.11443501077427e-06, + "loss": 0.0028, + "step": 85170 + }, + { + "epoch": 0.5463180612699746, + "grad_norm": 0.2737909257411957, + "learning_rate": 9.11411695871117e-06, + "loss": 0.0037, + "step": 85180 + }, + { + "epoch": 0.5463821981637608, + "grad_norm": 0.08838663250207901, + "learning_rate": 9.113798855095244e-06, + "loss": 0.0022, + "step": 85190 + }, + { + "epoch": 0.5464463350575468, + "grad_norm": 0.050015341490507126, + "learning_rate": 9.113480699930479e-06, + "loss": 0.0018, + "step": 85200 + }, + { + "epoch": 0.546510471951333, + "grad_norm": 0.2101522982120514, + "learning_rate": 9.113162493220862e-06, + "loss": 0.0049, + "step": 85210 + }, + { + "epoch": 0.546574608845119, + "grad_norm": 0.10002795606851578, + "learning_rate": 9.112844234970379e-06, + "loss": 0.0047, + "step": 85220 + }, + { + "epoch": 0.5466387457389051, + "grad_norm": 0.17288456857204437, + "learning_rate": 9.112525925183017e-06, + "loss": 0.0036, + "step": 85230 + }, + { + "epoch": 0.5467028826326912, + "grad_norm": 0.34862926602363586, + "learning_rate": 9.112207563862767e-06, + "loss": 0.0035, + "step": 85240 + }, + { + "epoch": 0.5467670195264773, + "grad_norm": 0.08521570265293121, + "learning_rate": 9.111889151013618e-06, + "loss": 0.0038, + "step": 85250 + }, + { + "epoch": 0.5468311564202634, + "grad_norm": 0.14284466207027435, + "learning_rate": 9.11157068663956e-06, + "loss": 0.0033, + "step": 85260 + }, + { + "epoch": 0.5468952933140495, + "grad_norm": 0.24869143962860107, + "learning_rate": 9.11125217074458e-06, + "loss": 0.0052, + "step": 85270 + }, + { + "epoch": 0.5469594302078356, + "grad_norm": 0.07540538907051086, + "learning_rate": 9.110933603332674e-06, + "loss": 0.0039, + "step": 85280 + }, + { + "epoch": 0.5470235671016217, + "grad_norm": 0.18324674665927887, + "learning_rate": 9.110614984407831e-06, + "loss": 0.0037, + "step": 85290 + }, + { + "epoch": 0.5470877039954078, + "grad_norm": 0.009447806514799595, + "learning_rate": 9.110296313974043e-06, + "loss": 0.0037, + "step": 85300 + }, + { + "epoch": 0.5471518408891939, + "grad_norm": 0.4537900388240814, + "learning_rate": 9.109977592035308e-06, + "loss": 0.0034, + "step": 85310 + }, + { + "epoch": 0.54721597778298, + "grad_norm": 0.16369648277759552, + "learning_rate": 9.109658818595614e-06, + "loss": 0.0039, + "step": 85320 + }, + { + "epoch": 0.5472801146767661, + "grad_norm": 0.13680054247379303, + "learning_rate": 9.10933999365896e-06, + "loss": 0.003, + "step": 85330 + }, + { + "epoch": 0.5473442515705522, + "grad_norm": 0.104710653424263, + "learning_rate": 9.109021117229336e-06, + "loss": 0.0023, + "step": 85340 + }, + { + "epoch": 0.5474083884643383, + "grad_norm": 0.1867726445198059, + "learning_rate": 9.108702189310742e-06, + "loss": 0.0033, + "step": 85350 + }, + { + "epoch": 0.5474725253581244, + "grad_norm": 0.07051552832126617, + "learning_rate": 9.108383209907173e-06, + "loss": 0.009, + "step": 85360 + }, + { + "epoch": 0.5475366622519104, + "grad_norm": 0.06497001647949219, + "learning_rate": 9.108064179022626e-06, + "loss": 0.0028, + "step": 85370 + }, + { + "epoch": 0.5476007991456966, + "grad_norm": 0.08399824798107147, + "learning_rate": 9.1077450966611e-06, + "loss": 0.0023, + "step": 85380 + }, + { + "epoch": 0.5476649360394826, + "grad_norm": 0.19393685460090637, + "learning_rate": 9.107425962826592e-06, + "loss": 0.004, + "step": 85390 + }, + { + "epoch": 0.5477290729332688, + "grad_norm": 0.2073168158531189, + "learning_rate": 9.1071067775231e-06, + "loss": 0.0052, + "step": 85400 + }, + { + "epoch": 0.5477932098270548, + "grad_norm": 0.06114586070179939, + "learning_rate": 9.106787540754626e-06, + "loss": 0.0028, + "step": 85410 + }, + { + "epoch": 0.547857346720841, + "grad_norm": 0.07383716851472855, + "learning_rate": 9.106468252525168e-06, + "loss": 0.0039, + "step": 85420 + }, + { + "epoch": 0.5479214836146271, + "grad_norm": 0.14534394443035126, + "learning_rate": 9.106148912838729e-06, + "loss": 0.0037, + "step": 85430 + }, + { + "epoch": 0.5479856205084132, + "grad_norm": 0.2565113604068756, + "learning_rate": 9.10582952169931e-06, + "loss": 0.0081, + "step": 85440 + }, + { + "epoch": 0.5480497574021993, + "grad_norm": 0.2744908630847931, + "learning_rate": 9.10551007911091e-06, + "loss": 0.0044, + "step": 85450 + }, + { + "epoch": 0.5481138942959853, + "grad_norm": 0.1304347813129425, + "learning_rate": 9.105190585077537e-06, + "loss": 0.0026, + "step": 85460 + }, + { + "epoch": 0.5481780311897715, + "grad_norm": 0.18746119737625122, + "learning_rate": 9.104871039603192e-06, + "loss": 0.0041, + "step": 85470 + }, + { + "epoch": 0.5482421680835575, + "grad_norm": 0.06296703964471817, + "learning_rate": 9.104551442691878e-06, + "loss": 0.005, + "step": 85480 + }, + { + "epoch": 0.5483063049773437, + "grad_norm": 0.22583769261837006, + "learning_rate": 9.1042317943476e-06, + "loss": 0.0029, + "step": 85490 + }, + { + "epoch": 0.5483704418711297, + "grad_norm": 0.21536467969417572, + "learning_rate": 9.103912094574365e-06, + "loss": 0.0043, + "step": 85500 + }, + { + "epoch": 0.5484345787649159, + "grad_norm": 0.03523802012205124, + "learning_rate": 9.10359234337618e-06, + "loss": 0.0038, + "step": 85510 + }, + { + "epoch": 0.5484987156587019, + "grad_norm": 0.08168116956949234, + "learning_rate": 9.103272540757048e-06, + "loss": 0.0022, + "step": 85520 + }, + { + "epoch": 0.548562852552488, + "grad_norm": 0.19365718960762024, + "learning_rate": 9.10295268672098e-06, + "loss": 0.0052, + "step": 85530 + }, + { + "epoch": 0.5486269894462741, + "grad_norm": 0.0632062554359436, + "learning_rate": 9.102632781271983e-06, + "loss": 0.0029, + "step": 85540 + }, + { + "epoch": 0.5486911263400602, + "grad_norm": 0.0994727835059166, + "learning_rate": 9.102312824414064e-06, + "loss": 0.0036, + "step": 85550 + }, + { + "epoch": 0.5487552632338463, + "grad_norm": 0.11295632272958755, + "learning_rate": 9.101992816151236e-06, + "loss": 0.0033, + "step": 85560 + }, + { + "epoch": 0.5488194001276324, + "grad_norm": 0.15867988765239716, + "learning_rate": 9.101672756487503e-06, + "loss": 0.0072, + "step": 85570 + }, + { + "epoch": 0.5488835370214186, + "grad_norm": 0.161295548081398, + "learning_rate": 9.101352645426879e-06, + "loss": 0.0028, + "step": 85580 + }, + { + "epoch": 0.5489476739152046, + "grad_norm": 0.07710425555706024, + "learning_rate": 9.101032482973378e-06, + "loss": 0.0029, + "step": 85590 + }, + { + "epoch": 0.5490118108089908, + "grad_norm": 0.07768481969833374, + "learning_rate": 9.100712269131008e-06, + "loss": 0.0036, + "step": 85600 + }, + { + "epoch": 0.5490759477027768, + "grad_norm": 0.22926555573940277, + "learning_rate": 9.10039200390378e-06, + "loss": 0.0043, + "step": 85610 + }, + { + "epoch": 0.5491400845965629, + "grad_norm": 0.10633791983127594, + "learning_rate": 9.100071687295712e-06, + "loss": 0.0026, + "step": 85620 + }, + { + "epoch": 0.549204221490349, + "grad_norm": 0.16838468611240387, + "learning_rate": 9.099751319310814e-06, + "loss": 0.0033, + "step": 85630 + }, + { + "epoch": 0.5492683583841351, + "grad_norm": 0.16128893196582794, + "learning_rate": 9.099430899953103e-06, + "loss": 0.0034, + "step": 85640 + }, + { + "epoch": 0.5493324952779212, + "grad_norm": 0.18620869517326355, + "learning_rate": 9.099110429226593e-06, + "loss": 0.0055, + "step": 85650 + }, + { + "epoch": 0.5493966321717073, + "grad_norm": 0.11803747713565826, + "learning_rate": 9.098789907135302e-06, + "loss": 0.0024, + "step": 85660 + }, + { + "epoch": 0.5494607690654933, + "grad_norm": 0.19250069558620453, + "learning_rate": 9.09846933368324e-06, + "loss": 0.0032, + "step": 85670 + }, + { + "epoch": 0.5495249059592795, + "grad_norm": 0.15483903884887695, + "learning_rate": 9.09814870887443e-06, + "loss": 0.004, + "step": 85680 + }, + { + "epoch": 0.5495890428530655, + "grad_norm": 0.030415749177336693, + "learning_rate": 9.097828032712888e-06, + "loss": 0.0053, + "step": 85690 + }, + { + "epoch": 0.5496531797468517, + "grad_norm": 0.1431485414505005, + "learning_rate": 9.097507305202632e-06, + "loss": 0.0025, + "step": 85700 + }, + { + "epoch": 0.5497173166406378, + "grad_norm": 0.3654477000236511, + "learning_rate": 9.09718652634768e-06, + "loss": 0.005, + "step": 85710 + }, + { + "epoch": 0.5497814535344239, + "grad_norm": 0.06691776216030121, + "learning_rate": 9.096865696152053e-06, + "loss": 0.0026, + "step": 85720 + }, + { + "epoch": 0.54984559042821, + "grad_norm": 0.043519288301467896, + "learning_rate": 9.096544814619772e-06, + "loss": 0.0029, + "step": 85730 + }, + { + "epoch": 0.5499097273219961, + "grad_norm": 0.2356233447790146, + "learning_rate": 9.096223881754855e-06, + "loss": 0.0039, + "step": 85740 + }, + { + "epoch": 0.5499738642157822, + "grad_norm": 1.9528368711471558, + "learning_rate": 9.095902897561326e-06, + "loss": 0.0072, + "step": 85750 + }, + { + "epoch": 0.5500380011095682, + "grad_norm": 0.13375617563724518, + "learning_rate": 9.095581862043207e-06, + "loss": 0.0044, + "step": 85760 + }, + { + "epoch": 0.5501021380033544, + "grad_norm": 0.13341820240020752, + "learning_rate": 9.09526077520452e-06, + "loss": 0.0025, + "step": 85770 + }, + { + "epoch": 0.5501662748971404, + "grad_norm": 0.2529837489128113, + "learning_rate": 9.094939637049288e-06, + "loss": 0.0028, + "step": 85780 + }, + { + "epoch": 0.5502304117909266, + "grad_norm": 0.14536824822425842, + "learning_rate": 9.094618447581537e-06, + "loss": 0.0032, + "step": 85790 + }, + { + "epoch": 0.5502945486847126, + "grad_norm": 0.13131175935268402, + "learning_rate": 9.09429720680529e-06, + "loss": 0.0058, + "step": 85800 + }, + { + "epoch": 0.5503586855784988, + "grad_norm": 0.0458420030772686, + "learning_rate": 9.093975914724572e-06, + "loss": 0.0023, + "step": 85810 + }, + { + "epoch": 0.5504228224722848, + "grad_norm": 0.137325718998909, + "learning_rate": 9.093654571343411e-06, + "loss": 0.0037, + "step": 85820 + }, + { + "epoch": 0.550486959366071, + "grad_norm": 0.07254275679588318, + "learning_rate": 9.093333176665831e-06, + "loss": 0.0084, + "step": 85830 + }, + { + "epoch": 0.550551096259857, + "grad_norm": 0.21434733271598816, + "learning_rate": 9.093011730695861e-06, + "loss": 0.0032, + "step": 85840 + }, + { + "epoch": 0.5506152331536431, + "grad_norm": 0.11275189369916916, + "learning_rate": 9.092690233437531e-06, + "loss": 0.0024, + "step": 85850 + }, + { + "epoch": 0.5506793700474293, + "grad_norm": 0.25155019760131836, + "learning_rate": 9.092368684894865e-06, + "loss": 0.003, + "step": 85860 + }, + { + "epoch": 0.5507435069412153, + "grad_norm": 0.14386850595474243, + "learning_rate": 9.092047085071895e-06, + "loss": 0.0035, + "step": 85870 + }, + { + "epoch": 0.5508076438350015, + "grad_norm": 0.08390353620052338, + "learning_rate": 9.091725433972651e-06, + "loss": 0.0041, + "step": 85880 + }, + { + "epoch": 0.5508717807287875, + "grad_norm": 0.09561295807361603, + "learning_rate": 9.091403731601165e-06, + "loss": 0.0017, + "step": 85890 + }, + { + "epoch": 0.5509359176225737, + "grad_norm": 0.14668424427509308, + "learning_rate": 9.091081977961462e-06, + "loss": 0.0064, + "step": 85900 + }, + { + "epoch": 0.5510000545163597, + "grad_norm": 0.26864367723464966, + "learning_rate": 9.090760173057581e-06, + "loss": 0.0021, + "step": 85910 + }, + { + "epoch": 0.5510641914101458, + "grad_norm": 0.2498502880334854, + "learning_rate": 9.090438316893551e-06, + "loss": 0.002, + "step": 85920 + }, + { + "epoch": 0.5511283283039319, + "grad_norm": 0.2963554859161377, + "learning_rate": 9.090116409473404e-06, + "loss": 0.0048, + "step": 85930 + }, + { + "epoch": 0.551192465197718, + "grad_norm": 0.18236757814884186, + "learning_rate": 9.089794450801178e-06, + "loss": 0.0043, + "step": 85940 + }, + { + "epoch": 0.5512566020915041, + "grad_norm": 0.09088752418756485, + "learning_rate": 9.089472440880901e-06, + "loss": 0.0035, + "step": 85950 + }, + { + "epoch": 0.5513207389852902, + "grad_norm": 0.14687426388263702, + "learning_rate": 9.089150379716614e-06, + "loss": 0.0028, + "step": 85960 + }, + { + "epoch": 0.5513848758790763, + "grad_norm": 0.07180049270391464, + "learning_rate": 9.08882826731235e-06, + "loss": 0.0057, + "step": 85970 + }, + { + "epoch": 0.5514490127728624, + "grad_norm": 0.06035429984331131, + "learning_rate": 9.088506103672146e-06, + "loss": 0.0025, + "step": 85980 + }, + { + "epoch": 0.5515131496666485, + "grad_norm": 0.2776930034160614, + "learning_rate": 9.088183888800038e-06, + "loss": 0.0034, + "step": 85990 + }, + { + "epoch": 0.5515772865604346, + "grad_norm": 0.11867208778858185, + "learning_rate": 9.087861622700062e-06, + "loss": 0.0031, + "step": 86000 + }, + { + "epoch": 0.5516414234542207, + "grad_norm": 0.3291597068309784, + "learning_rate": 9.087539305376261e-06, + "loss": 0.0045, + "step": 86010 + }, + { + "epoch": 0.5517055603480068, + "grad_norm": 0.19839000701904297, + "learning_rate": 9.08721693683267e-06, + "loss": 0.0026, + "step": 86020 + }, + { + "epoch": 0.5517696972417929, + "grad_norm": 0.05654887109994888, + "learning_rate": 9.08689451707333e-06, + "loss": 0.0033, + "step": 86030 + }, + { + "epoch": 0.551833834135579, + "grad_norm": 0.0997452586889267, + "learning_rate": 9.086572046102281e-06, + "loss": 0.0045, + "step": 86040 + }, + { + "epoch": 0.5518979710293651, + "grad_norm": 0.28388577699661255, + "learning_rate": 9.086249523923563e-06, + "loss": 0.003, + "step": 86050 + }, + { + "epoch": 0.5519621079231511, + "grad_norm": 0.34730029106140137, + "learning_rate": 9.085926950541219e-06, + "loss": 0.0045, + "step": 86060 + }, + { + "epoch": 0.5520262448169373, + "grad_norm": 0.10112051665782928, + "learning_rate": 9.08560432595929e-06, + "loss": 0.0038, + "step": 86070 + }, + { + "epoch": 0.5520903817107233, + "grad_norm": 0.2704380452632904, + "learning_rate": 9.085281650181818e-06, + "loss": 0.0039, + "step": 86080 + }, + { + "epoch": 0.5521545186045095, + "grad_norm": 0.023337703198194504, + "learning_rate": 9.084958923212848e-06, + "loss": 0.0033, + "step": 86090 + }, + { + "epoch": 0.5522186554982955, + "grad_norm": 0.12786415219306946, + "learning_rate": 9.084636145056423e-06, + "loss": 0.0031, + "step": 86100 + }, + { + "epoch": 0.5522827923920817, + "grad_norm": 0.29230159521102905, + "learning_rate": 9.084313315716589e-06, + "loss": 0.0029, + "step": 86110 + }, + { + "epoch": 0.5523469292858677, + "grad_norm": 0.160085529088974, + "learning_rate": 9.083990435197389e-06, + "loss": 0.0036, + "step": 86120 + }, + { + "epoch": 0.5524110661796539, + "grad_norm": 0.14612051844596863, + "learning_rate": 9.08366750350287e-06, + "loss": 0.005, + "step": 86130 + }, + { + "epoch": 0.55247520307344, + "grad_norm": 0.17617826163768768, + "learning_rate": 9.083344520637079e-06, + "loss": 0.0034, + "step": 86140 + }, + { + "epoch": 0.552539339967226, + "grad_norm": 0.055370114743709564, + "learning_rate": 9.083021486604062e-06, + "loss": 0.0048, + "step": 86150 + }, + { + "epoch": 0.5526034768610122, + "grad_norm": 0.05733392387628555, + "learning_rate": 9.082698401407868e-06, + "loss": 0.0032, + "step": 86160 + }, + { + "epoch": 0.5526676137547982, + "grad_norm": 0.04752342775464058, + "learning_rate": 9.082375265052545e-06, + "loss": 0.0046, + "step": 86170 + }, + { + "epoch": 0.5527317506485844, + "grad_norm": 0.13037846982479095, + "learning_rate": 9.082052077542142e-06, + "loss": 0.0029, + "step": 86180 + }, + { + "epoch": 0.5527958875423704, + "grad_norm": 0.06878279894590378, + "learning_rate": 9.08172883888071e-06, + "loss": 0.0017, + "step": 86190 + }, + { + "epoch": 0.5528600244361566, + "grad_norm": 0.10957800596952438, + "learning_rate": 9.081405549072299e-06, + "loss": 0.0042, + "step": 86200 + }, + { + "epoch": 0.5529241613299426, + "grad_norm": 0.05714105814695358, + "learning_rate": 9.08108220812096e-06, + "loss": 0.0067, + "step": 86210 + }, + { + "epoch": 0.5529882982237287, + "grad_norm": 0.16612716019153595, + "learning_rate": 9.080758816030741e-06, + "loss": 0.0025, + "step": 86220 + }, + { + "epoch": 0.5530524351175148, + "grad_norm": 0.12985143065452576, + "learning_rate": 9.0804353728057e-06, + "loss": 0.0043, + "step": 86230 + }, + { + "epoch": 0.5531165720113009, + "grad_norm": 0.4922974109649658, + "learning_rate": 9.080111878449887e-06, + "loss": 0.0034, + "step": 86240 + }, + { + "epoch": 0.553180708905087, + "grad_norm": 0.17751812934875488, + "learning_rate": 9.079788332967357e-06, + "loss": 0.0058, + "step": 86250 + }, + { + "epoch": 0.5532448457988731, + "grad_norm": 0.1458687037229538, + "learning_rate": 9.079464736362162e-06, + "loss": 0.0031, + "step": 86260 + }, + { + "epoch": 0.5533089826926592, + "grad_norm": 0.13435406982898712, + "learning_rate": 9.07914108863836e-06, + "loss": 0.0021, + "step": 86270 + }, + { + "epoch": 0.5533731195864453, + "grad_norm": 0.18028303980827332, + "learning_rate": 9.078817389800004e-06, + "loss": 0.005, + "step": 86280 + }, + { + "epoch": 0.5534372564802315, + "grad_norm": 0.059606894850730896, + "learning_rate": 9.07849363985115e-06, + "loss": 0.0057, + "step": 86290 + }, + { + "epoch": 0.5535013933740175, + "grad_norm": 0.408214271068573, + "learning_rate": 9.078169838795858e-06, + "loss": 0.0023, + "step": 86300 + }, + { + "epoch": 0.5535655302678036, + "grad_norm": 0.15087488293647766, + "learning_rate": 9.077845986638181e-06, + "loss": 0.0025, + "step": 86310 + }, + { + "epoch": 0.5536296671615897, + "grad_norm": 0.10014703124761581, + "learning_rate": 9.07752208338218e-06, + "loss": 0.0018, + "step": 86320 + }, + { + "epoch": 0.5536938040553758, + "grad_norm": 0.14538487792015076, + "learning_rate": 9.077198129031915e-06, + "loss": 0.0036, + "step": 86330 + }, + { + "epoch": 0.5537579409491619, + "grad_norm": 0.02291569486260414, + "learning_rate": 9.076874123591441e-06, + "loss": 0.0031, + "step": 86340 + }, + { + "epoch": 0.553822077842948, + "grad_norm": 0.34157443046569824, + "learning_rate": 9.076550067064822e-06, + "loss": 0.0035, + "step": 86350 + }, + { + "epoch": 0.553886214736734, + "grad_norm": 0.059290993958711624, + "learning_rate": 9.076225959456117e-06, + "loss": 0.0043, + "step": 86360 + }, + { + "epoch": 0.5539503516305202, + "grad_norm": 0.1451360583305359, + "learning_rate": 9.075901800769387e-06, + "loss": 0.0014, + "step": 86370 + }, + { + "epoch": 0.5540144885243062, + "grad_norm": 0.08115583658218384, + "learning_rate": 9.075577591008694e-06, + "loss": 0.0012, + "step": 86380 + }, + { + "epoch": 0.5540786254180924, + "grad_norm": 0.11323795467615128, + "learning_rate": 9.075253330178102e-06, + "loss": 0.004, + "step": 86390 + }, + { + "epoch": 0.5541427623118784, + "grad_norm": 0.1205209419131279, + "learning_rate": 9.074929018281672e-06, + "loss": 0.002, + "step": 86400 + }, + { + "epoch": 0.5542068992056646, + "grad_norm": 0.12457386404275894, + "learning_rate": 9.07460465532347e-06, + "loss": 0.0026, + "step": 86410 + }, + { + "epoch": 0.5542710360994507, + "grad_norm": 0.20637083053588867, + "learning_rate": 9.07428024130756e-06, + "loss": 0.0055, + "step": 86420 + }, + { + "epoch": 0.5543351729932368, + "grad_norm": 0.32814687490463257, + "learning_rate": 9.073955776238006e-06, + "loss": 0.0039, + "step": 86430 + }, + { + "epoch": 0.5543993098870229, + "grad_norm": 0.24031226336956024, + "learning_rate": 9.073631260118875e-06, + "loss": 0.0033, + "step": 86440 + }, + { + "epoch": 0.5544634467808089, + "grad_norm": 0.1292991042137146, + "learning_rate": 9.073306692954234e-06, + "loss": 0.0029, + "step": 86450 + }, + { + "epoch": 0.5545275836745951, + "grad_norm": 0.044169165194034576, + "learning_rate": 9.072982074748147e-06, + "loss": 0.0032, + "step": 86460 + }, + { + "epoch": 0.5545917205683811, + "grad_norm": 0.06301355361938477, + "learning_rate": 9.072657405504685e-06, + "loss": 0.0031, + "step": 86470 + }, + { + "epoch": 0.5546558574621673, + "grad_norm": 0.04632085561752319, + "learning_rate": 9.072332685227913e-06, + "loss": 0.0035, + "step": 86480 + }, + { + "epoch": 0.5547199943559533, + "grad_norm": 0.13859742879867554, + "learning_rate": 9.072007913921906e-06, + "loss": 0.0028, + "step": 86490 + }, + { + "epoch": 0.5547841312497395, + "grad_norm": 0.11913814395666122, + "learning_rate": 9.071683091590726e-06, + "loss": 0.0037, + "step": 86500 + }, + { + "epoch": 0.5548482681435255, + "grad_norm": 0.26309624314308167, + "learning_rate": 9.071358218238448e-06, + "loss": 0.0049, + "step": 86510 + }, + { + "epoch": 0.5549124050373117, + "grad_norm": 0.0424310527741909, + "learning_rate": 9.071033293869142e-06, + "loss": 0.0034, + "step": 86520 + }, + { + "epoch": 0.5549765419310977, + "grad_norm": 0.1482691913843155, + "learning_rate": 9.070708318486878e-06, + "loss": 0.0027, + "step": 86530 + }, + { + "epoch": 0.5550406788248838, + "grad_norm": 0.15104617178440094, + "learning_rate": 9.070383292095731e-06, + "loss": 0.0024, + "step": 86540 + }, + { + "epoch": 0.5551048157186699, + "grad_norm": 0.17228814959526062, + "learning_rate": 9.070058214699772e-06, + "loss": 0.0031, + "step": 86550 + }, + { + "epoch": 0.555168952612456, + "grad_norm": 0.11480726301670074, + "learning_rate": 9.069733086303075e-06, + "loss": 0.002, + "step": 86560 + }, + { + "epoch": 0.5552330895062422, + "grad_norm": 0.0560038685798645, + "learning_rate": 9.069407906909712e-06, + "loss": 0.0035, + "step": 86570 + }, + { + "epoch": 0.5552972264000282, + "grad_norm": 0.14466722309589386, + "learning_rate": 9.069082676523762e-06, + "loss": 0.004, + "step": 86580 + }, + { + "epoch": 0.5553613632938144, + "grad_norm": 0.028515275567770004, + "learning_rate": 9.068757395149297e-06, + "loss": 0.0049, + "step": 86590 + }, + { + "epoch": 0.5554255001876004, + "grad_norm": 0.07086604088544846, + "learning_rate": 9.068432062790393e-06, + "loss": 0.0025, + "step": 86600 + }, + { + "epoch": 0.5554896370813865, + "grad_norm": 0.09528445452451706, + "learning_rate": 9.06810667945113e-06, + "loss": 0.0036, + "step": 86610 + }, + { + "epoch": 0.5555537739751726, + "grad_norm": 0.13632024824619293, + "learning_rate": 9.067781245135581e-06, + "loss": 0.0036, + "step": 86620 + }, + { + "epoch": 0.5556179108689587, + "grad_norm": 0.16784769296646118, + "learning_rate": 9.067455759847825e-06, + "loss": 0.004, + "step": 86630 + }, + { + "epoch": 0.5556820477627448, + "grad_norm": 0.0556255541741848, + "learning_rate": 9.067130223591943e-06, + "loss": 0.0026, + "step": 86640 + }, + { + "epoch": 0.5557461846565309, + "grad_norm": 0.2098006159067154, + "learning_rate": 9.066804636372011e-06, + "loss": 0.0031, + "step": 86650 + }, + { + "epoch": 0.555810321550317, + "grad_norm": 0.21030020713806152, + "learning_rate": 9.066478998192113e-06, + "loss": 0.003, + "step": 86660 + }, + { + "epoch": 0.5558744584441031, + "grad_norm": 0.2861911356449127, + "learning_rate": 9.066153309056325e-06, + "loss": 0.0048, + "step": 86670 + }, + { + "epoch": 0.5559385953378891, + "grad_norm": 0.0764365941286087, + "learning_rate": 9.06582756896873e-06, + "loss": 0.0029, + "step": 86680 + }, + { + "epoch": 0.5560027322316753, + "grad_norm": 0.10217791795730591, + "learning_rate": 9.06550177793341e-06, + "loss": 0.0026, + "step": 86690 + }, + { + "epoch": 0.5560668691254614, + "grad_norm": 0.2130669355392456, + "learning_rate": 9.065175935954447e-06, + "loss": 0.0037, + "step": 86700 + }, + { + "epoch": 0.5561310060192475, + "grad_norm": 0.12732025980949402, + "learning_rate": 9.064850043035925e-06, + "loss": 0.0029, + "step": 86710 + }, + { + "epoch": 0.5561951429130336, + "grad_norm": 0.2850833237171173, + "learning_rate": 9.064524099181925e-06, + "loss": 0.0046, + "step": 86720 + }, + { + "epoch": 0.5562592798068197, + "grad_norm": 0.1203378364443779, + "learning_rate": 9.064198104396536e-06, + "loss": 0.0037, + "step": 86730 + }, + { + "epoch": 0.5563234167006058, + "grad_norm": 0.3390064835548401, + "learning_rate": 9.063872058683838e-06, + "loss": 0.0033, + "step": 86740 + }, + { + "epoch": 0.5563875535943918, + "grad_norm": 0.09684395045042038, + "learning_rate": 9.06354596204792e-06, + "loss": 0.0017, + "step": 86750 + }, + { + "epoch": 0.556451690488178, + "grad_norm": 0.12574532628059387, + "learning_rate": 9.063219814492866e-06, + "loss": 0.0025, + "step": 86760 + }, + { + "epoch": 0.556515827381964, + "grad_norm": 0.12113095074892044, + "learning_rate": 9.062893616022763e-06, + "loss": 0.0034, + "step": 86770 + }, + { + "epoch": 0.5565799642757502, + "grad_norm": 0.04478220269083977, + "learning_rate": 9.062567366641701e-06, + "loss": 0.004, + "step": 86780 + }, + { + "epoch": 0.5566441011695362, + "grad_norm": 0.2971895635128021, + "learning_rate": 9.062241066353766e-06, + "loss": 0.0033, + "step": 86790 + }, + { + "epoch": 0.5567082380633224, + "grad_norm": 0.2617679834365845, + "learning_rate": 9.061914715163047e-06, + "loss": 0.0036, + "step": 86800 + }, + { + "epoch": 0.5567723749571084, + "grad_norm": 0.09581983834505081, + "learning_rate": 9.061588313073633e-06, + "loss": 0.0035, + "step": 86810 + }, + { + "epoch": 0.5568365118508946, + "grad_norm": 0.2524416148662567, + "learning_rate": 9.061261860089614e-06, + "loss": 0.0025, + "step": 86820 + }, + { + "epoch": 0.5569006487446806, + "grad_norm": 0.29080846905708313, + "learning_rate": 9.060935356215084e-06, + "loss": 0.0054, + "step": 86830 + }, + { + "epoch": 0.5569647856384667, + "grad_norm": 0.3046024441719055, + "learning_rate": 9.06060880145413e-06, + "loss": 0.0042, + "step": 86840 + }, + { + "epoch": 0.5570289225322529, + "grad_norm": 0.14829081296920776, + "learning_rate": 9.060282195810845e-06, + "loss": 0.0026, + "step": 86850 + }, + { + "epoch": 0.5570930594260389, + "grad_norm": 0.18649496138095856, + "learning_rate": 9.059955539289323e-06, + "loss": 0.0015, + "step": 86860 + }, + { + "epoch": 0.5571571963198251, + "grad_norm": 0.23416541516780853, + "learning_rate": 9.059628831893657e-06, + "loss": 0.0039, + "step": 86870 + }, + { + "epoch": 0.5572213332136111, + "grad_norm": 0.24305544793605804, + "learning_rate": 9.05930207362794e-06, + "loss": 0.0051, + "step": 86880 + }, + { + "epoch": 0.5572854701073973, + "grad_norm": 0.3699105381965637, + "learning_rate": 9.058975264496266e-06, + "loss": 0.006, + "step": 86890 + }, + { + "epoch": 0.5573496070011833, + "grad_norm": 0.07164635509252548, + "learning_rate": 9.058648404502732e-06, + "loss": 0.0039, + "step": 86900 + }, + { + "epoch": 0.5574137438949694, + "grad_norm": 0.1825900375843048, + "learning_rate": 9.058321493651432e-06, + "loss": 0.0018, + "step": 86910 + }, + { + "epoch": 0.5574778807887555, + "grad_norm": 0.292298823595047, + "learning_rate": 9.057994531946463e-06, + "loss": 0.004, + "step": 86920 + }, + { + "epoch": 0.5575420176825416, + "grad_norm": 0.14147339761257172, + "learning_rate": 9.057667519391924e-06, + "loss": 0.0025, + "step": 86930 + }, + { + "epoch": 0.5576061545763277, + "grad_norm": 0.3002302646636963, + "learning_rate": 9.057340455991912e-06, + "loss": 0.0028, + "step": 86940 + }, + { + "epoch": 0.5576702914701138, + "grad_norm": 0.23513545095920563, + "learning_rate": 9.057013341750522e-06, + "loss": 0.0045, + "step": 86950 + }, + { + "epoch": 0.5577344283638999, + "grad_norm": 0.10269004851579666, + "learning_rate": 9.056686176671855e-06, + "loss": 0.0054, + "step": 86960 + }, + { + "epoch": 0.557798565257686, + "grad_norm": 0.13707150518894196, + "learning_rate": 9.056358960760013e-06, + "loss": 0.0047, + "step": 86970 + }, + { + "epoch": 0.5578627021514722, + "grad_norm": 0.18656758964061737, + "learning_rate": 9.056031694019092e-06, + "loss": 0.0151, + "step": 86980 + }, + { + "epoch": 0.5579268390452582, + "grad_norm": 0.29598280787467957, + "learning_rate": 9.055704376453197e-06, + "loss": 0.0039, + "step": 86990 + }, + { + "epoch": 0.5579909759390443, + "grad_norm": 0.07861374318599701, + "learning_rate": 9.055377008066425e-06, + "loss": 0.0036, + "step": 87000 + }, + { + "epoch": 0.5580551128328304, + "grad_norm": 0.24993683397769928, + "learning_rate": 9.055049588862883e-06, + "loss": 0.004, + "step": 87010 + }, + { + "epoch": 0.5581192497266165, + "grad_norm": 0.06908705830574036, + "learning_rate": 9.054722118846671e-06, + "loss": 0.0022, + "step": 87020 + }, + { + "epoch": 0.5581833866204026, + "grad_norm": 0.062278080731630325, + "learning_rate": 9.054394598021894e-06, + "loss": 0.003, + "step": 87030 + }, + { + "epoch": 0.5582475235141887, + "grad_norm": 0.13321839272975922, + "learning_rate": 9.054067026392654e-06, + "loss": 0.0026, + "step": 87040 + }, + { + "epoch": 0.5583116604079748, + "grad_norm": 0.12728707492351532, + "learning_rate": 9.053739403963056e-06, + "loss": 0.0029, + "step": 87050 + }, + { + "epoch": 0.5583757973017609, + "grad_norm": 0.12646393477916718, + "learning_rate": 9.053411730737208e-06, + "loss": 0.0027, + "step": 87060 + }, + { + "epoch": 0.5584399341955469, + "grad_norm": 0.0910581424832344, + "learning_rate": 9.053084006719212e-06, + "loss": 0.0054, + "step": 87070 + }, + { + "epoch": 0.5585040710893331, + "grad_norm": 0.0338672511279583, + "learning_rate": 9.052756231913178e-06, + "loss": 0.002, + "step": 87080 + }, + { + "epoch": 0.5585682079831191, + "grad_norm": 0.013740995898842812, + "learning_rate": 9.05242840632321e-06, + "loss": 0.0026, + "step": 87090 + }, + { + "epoch": 0.5586323448769053, + "grad_norm": 0.05321336165070534, + "learning_rate": 9.05210052995342e-06, + "loss": 0.0045, + "step": 87100 + }, + { + "epoch": 0.5586964817706913, + "grad_norm": 0.16705931723117828, + "learning_rate": 9.051772602807913e-06, + "loss": 0.0028, + "step": 87110 + }, + { + "epoch": 0.5587606186644775, + "grad_norm": 0.1733585000038147, + "learning_rate": 9.051444624890801e-06, + "loss": 0.003, + "step": 87120 + }, + { + "epoch": 0.5588247555582636, + "grad_norm": 0.20235127210617065, + "learning_rate": 9.051116596206192e-06, + "loss": 0.0026, + "step": 87130 + }, + { + "epoch": 0.5588888924520496, + "grad_norm": 0.07634595036506653, + "learning_rate": 9.050788516758196e-06, + "loss": 0.002, + "step": 87140 + }, + { + "epoch": 0.5589530293458358, + "grad_norm": 0.20534726977348328, + "learning_rate": 9.050460386550926e-06, + "loss": 0.0034, + "step": 87150 + }, + { + "epoch": 0.5590171662396218, + "grad_norm": 0.14344623684883118, + "learning_rate": 9.050132205588492e-06, + "loss": 0.0027, + "step": 87160 + }, + { + "epoch": 0.559081303133408, + "grad_norm": 0.11710309237241745, + "learning_rate": 9.049803973875007e-06, + "loss": 0.0034, + "step": 87170 + }, + { + "epoch": 0.559145440027194, + "grad_norm": 0.1879531890153885, + "learning_rate": 9.049475691414582e-06, + "loss": 0.0035, + "step": 87180 + }, + { + "epoch": 0.5592095769209802, + "grad_norm": 0.1346030980348587, + "learning_rate": 9.049147358211336e-06, + "loss": 0.0041, + "step": 87190 + }, + { + "epoch": 0.5592737138147662, + "grad_norm": 0.1864037811756134, + "learning_rate": 9.048818974269378e-06, + "loss": 0.0035, + "step": 87200 + }, + { + "epoch": 0.5593378507085524, + "grad_norm": 0.08595872670412064, + "learning_rate": 9.048490539592824e-06, + "loss": 0.004, + "step": 87210 + }, + { + "epoch": 0.5594019876023384, + "grad_norm": 0.3378288447856903, + "learning_rate": 9.048162054185789e-06, + "loss": 0.0057, + "step": 87220 + }, + { + "epoch": 0.5594661244961245, + "grad_norm": 0.017530549317598343, + "learning_rate": 9.047833518052393e-06, + "loss": 0.0031, + "step": 87230 + }, + { + "epoch": 0.5595302613899106, + "grad_norm": 0.11569032818078995, + "learning_rate": 9.047504931196748e-06, + "loss": 0.0034, + "step": 87240 + }, + { + "epoch": 0.5595943982836967, + "grad_norm": 0.34084203839302063, + "learning_rate": 9.047176293622975e-06, + "loss": 0.0052, + "step": 87250 + }, + { + "epoch": 0.5596585351774829, + "grad_norm": 0.12566979229450226, + "learning_rate": 9.046847605335189e-06, + "loss": 0.0034, + "step": 87260 + }, + { + "epoch": 0.5597226720712689, + "grad_norm": 0.21158519387245178, + "learning_rate": 9.04651886633751e-06, + "loss": 0.0038, + "step": 87270 + }, + { + "epoch": 0.5597868089650551, + "grad_norm": 0.009799067862331867, + "learning_rate": 9.04619007663406e-06, + "loss": 0.0045, + "step": 87280 + }, + { + "epoch": 0.5598509458588411, + "grad_norm": 0.20503848791122437, + "learning_rate": 9.045861236228955e-06, + "loss": 0.0027, + "step": 87290 + }, + { + "epoch": 0.5599150827526272, + "grad_norm": 0.05585271492600441, + "learning_rate": 9.045532345126318e-06, + "loss": 0.0026, + "step": 87300 + }, + { + "epoch": 0.5599792196464133, + "grad_norm": 0.16156123578548431, + "learning_rate": 9.04520340333027e-06, + "loss": 0.0029, + "step": 87310 + }, + { + "epoch": 0.5600433565401994, + "grad_norm": 0.07933583110570908, + "learning_rate": 9.044874410844931e-06, + "loss": 0.0027, + "step": 87320 + }, + { + "epoch": 0.5601074934339855, + "grad_norm": 0.2687130570411682, + "learning_rate": 9.044545367674425e-06, + "loss": 0.0046, + "step": 87330 + }, + { + "epoch": 0.5601716303277716, + "grad_norm": 0.24467165768146515, + "learning_rate": 9.044216273822875e-06, + "loss": 0.0032, + "step": 87340 + }, + { + "epoch": 0.5602357672215577, + "grad_norm": 0.1341111958026886, + "learning_rate": 9.043887129294403e-06, + "loss": 0.0037, + "step": 87350 + }, + { + "epoch": 0.5602999041153438, + "grad_norm": 0.1187986359000206, + "learning_rate": 9.043557934093138e-06, + "loss": 0.0024, + "step": 87360 + }, + { + "epoch": 0.5603640410091298, + "grad_norm": 0.14248231053352356, + "learning_rate": 9.0432286882232e-06, + "loss": 0.0029, + "step": 87370 + }, + { + "epoch": 0.560428177902916, + "grad_norm": 0.2501228153705597, + "learning_rate": 9.04289939168872e-06, + "loss": 0.0026, + "step": 87380 + }, + { + "epoch": 0.560492314796702, + "grad_norm": 0.20401206612586975, + "learning_rate": 9.042570044493817e-06, + "loss": 0.005, + "step": 87390 + }, + { + "epoch": 0.5605564516904882, + "grad_norm": 0.19732318818569183, + "learning_rate": 9.042240646642627e-06, + "loss": 0.0035, + "step": 87400 + }, + { + "epoch": 0.5606205885842743, + "grad_norm": 0.17643578350543976, + "learning_rate": 9.041911198139267e-06, + "loss": 0.0034, + "step": 87410 + }, + { + "epoch": 0.5606847254780604, + "grad_norm": 0.12217200547456741, + "learning_rate": 9.041581698987876e-06, + "loss": 0.0023, + "step": 87420 + }, + { + "epoch": 0.5607488623718465, + "grad_norm": 0.16141095757484436, + "learning_rate": 9.041252149192575e-06, + "loss": 0.0039, + "step": 87430 + }, + { + "epoch": 0.5608129992656326, + "grad_norm": 0.2334379255771637, + "learning_rate": 9.040922548757497e-06, + "loss": 0.0057, + "step": 87440 + }, + { + "epoch": 0.5608771361594187, + "grad_norm": 0.20374825596809387, + "learning_rate": 9.040592897686772e-06, + "loss": 0.0031, + "step": 87450 + }, + { + "epoch": 0.5609412730532047, + "grad_norm": 0.22686593234539032, + "learning_rate": 9.04026319598453e-06, + "loss": 0.0053, + "step": 87460 + }, + { + "epoch": 0.5610054099469909, + "grad_norm": 0.02577713504433632, + "learning_rate": 9.039933443654902e-06, + "loss": 0.0033, + "step": 87470 + }, + { + "epoch": 0.5610695468407769, + "grad_norm": 0.0653962716460228, + "learning_rate": 9.039603640702022e-06, + "loss": 0.0039, + "step": 87480 + }, + { + "epoch": 0.5611336837345631, + "grad_norm": 0.09157300740480423, + "learning_rate": 9.039273787130021e-06, + "loss": 0.0055, + "step": 87490 + }, + { + "epoch": 0.5611978206283491, + "grad_norm": 0.23918955028057098, + "learning_rate": 9.038943882943031e-06, + "loss": 0.0048, + "step": 87500 + }, + { + "epoch": 0.5612619575221353, + "grad_norm": 0.134276881814003, + "learning_rate": 9.038613928145187e-06, + "loss": 0.0033, + "step": 87510 + }, + { + "epoch": 0.5613260944159213, + "grad_norm": 0.044359344989061356, + "learning_rate": 9.038283922740626e-06, + "loss": 0.0042, + "step": 87520 + }, + { + "epoch": 0.5613902313097074, + "grad_norm": 0.045472290366888046, + "learning_rate": 9.037953866733482e-06, + "loss": 0.0031, + "step": 87530 + }, + { + "epoch": 0.5614543682034935, + "grad_norm": 0.1305588036775589, + "learning_rate": 9.03762376012789e-06, + "loss": 0.0053, + "step": 87540 + }, + { + "epoch": 0.5615185050972796, + "grad_norm": 0.142921581864357, + "learning_rate": 9.037293602927986e-06, + "loss": 0.0038, + "step": 87550 + }, + { + "epoch": 0.5615826419910658, + "grad_norm": 0.25706934928894043, + "learning_rate": 9.036963395137907e-06, + "loss": 0.004, + "step": 87560 + }, + { + "epoch": 0.5616467788848518, + "grad_norm": 0.13787363469600677, + "learning_rate": 9.036633136761792e-06, + "loss": 0.0029, + "step": 87570 + }, + { + "epoch": 0.561710915778638, + "grad_norm": 0.21606731414794922, + "learning_rate": 9.03630282780378e-06, + "loss": 0.0025, + "step": 87580 + }, + { + "epoch": 0.561775052672424, + "grad_norm": 0.26017582416534424, + "learning_rate": 9.035972468268007e-06, + "loss": 0.0051, + "step": 87590 + }, + { + "epoch": 0.5618391895662102, + "grad_norm": 0.32015421986579895, + "learning_rate": 9.035642058158616e-06, + "loss": 0.0042, + "step": 87600 + }, + { + "epoch": 0.5619033264599962, + "grad_norm": 0.10264544934034348, + "learning_rate": 9.035311597479746e-06, + "loss": 0.0026, + "step": 87610 + }, + { + "epoch": 0.5619674633537823, + "grad_norm": 0.12089379131793976, + "learning_rate": 9.034981086235535e-06, + "loss": 0.0026, + "step": 87620 + }, + { + "epoch": 0.5620316002475684, + "grad_norm": 0.06433777511119843, + "learning_rate": 9.034650524430129e-06, + "loss": 0.0035, + "step": 87630 + }, + { + "epoch": 0.5620957371413545, + "grad_norm": 0.13378623127937317, + "learning_rate": 9.034319912067669e-06, + "loss": 0.0037, + "step": 87640 + }, + { + "epoch": 0.5621598740351406, + "grad_norm": 0.13473980128765106, + "learning_rate": 9.033989249152297e-06, + "loss": 0.0022, + "step": 87650 + }, + { + "epoch": 0.5622240109289267, + "grad_norm": 0.13181231915950775, + "learning_rate": 9.033658535688157e-06, + "loss": 0.0028, + "step": 87660 + }, + { + "epoch": 0.5622881478227127, + "grad_norm": 0.05946122109889984, + "learning_rate": 9.033327771679393e-06, + "loss": 0.0028, + "step": 87670 + }, + { + "epoch": 0.5623522847164989, + "grad_norm": 0.25498464703559875, + "learning_rate": 9.032996957130146e-06, + "loss": 0.0062, + "step": 87680 + }, + { + "epoch": 0.562416421610285, + "grad_norm": 0.1765357255935669, + "learning_rate": 9.03266609204457e-06, + "loss": 0.0033, + "step": 87690 + }, + { + "epoch": 0.5624805585040711, + "grad_norm": 0.11471286416053772, + "learning_rate": 9.032335176426801e-06, + "loss": 0.0026, + "step": 87700 + }, + { + "epoch": 0.5625446953978572, + "grad_norm": 0.436392605304718, + "learning_rate": 9.032004210280993e-06, + "loss": 0.0043, + "step": 87710 + }, + { + "epoch": 0.5626088322916433, + "grad_norm": 0.09560250490903854, + "learning_rate": 9.03167319361129e-06, + "loss": 0.0018, + "step": 87720 + }, + { + "epoch": 0.5626729691854294, + "grad_norm": 0.1273191273212433, + "learning_rate": 9.03134212642184e-06, + "loss": 0.0071, + "step": 87730 + }, + { + "epoch": 0.5627371060792155, + "grad_norm": 0.09865515679121017, + "learning_rate": 9.031011008716793e-06, + "loss": 0.0038, + "step": 87740 + }, + { + "epoch": 0.5628012429730016, + "grad_norm": 0.2741306722164154, + "learning_rate": 9.030679840500298e-06, + "loss": 0.0044, + "step": 87750 + }, + { + "epoch": 0.5628653798667876, + "grad_norm": 0.10659819841384888, + "learning_rate": 9.030348621776502e-06, + "loss": 0.0019, + "step": 87760 + }, + { + "epoch": 0.5629295167605738, + "grad_norm": 0.280533105134964, + "learning_rate": 9.030017352549558e-06, + "loss": 0.0046, + "step": 87770 + }, + { + "epoch": 0.5629936536543598, + "grad_norm": 0.06319417804479599, + "learning_rate": 9.029686032823615e-06, + "loss": 0.0083, + "step": 87780 + }, + { + "epoch": 0.563057790548146, + "grad_norm": 0.13399365544319153, + "learning_rate": 9.029354662602829e-06, + "loss": 0.0029, + "step": 87790 + }, + { + "epoch": 0.563121927441932, + "grad_norm": 0.2020081877708435, + "learning_rate": 9.029023241891347e-06, + "loss": 0.0028, + "step": 87800 + }, + { + "epoch": 0.5631860643357182, + "grad_norm": 0.4207175672054291, + "learning_rate": 9.028691770693324e-06, + "loss": 0.0039, + "step": 87810 + }, + { + "epoch": 0.5632502012295042, + "grad_norm": 0.39699438214302063, + "learning_rate": 9.028360249012915e-06, + "loss": 0.0041, + "step": 87820 + }, + { + "epoch": 0.5633143381232903, + "grad_norm": 0.02185790054500103, + "learning_rate": 9.028028676854274e-06, + "loss": 0.0044, + "step": 87830 + }, + { + "epoch": 0.5633784750170765, + "grad_norm": 0.0680369883775711, + "learning_rate": 9.027697054221554e-06, + "loss": 0.0038, + "step": 87840 + }, + { + "epoch": 0.5634426119108625, + "grad_norm": 0.1581745594739914, + "learning_rate": 9.02736538111891e-06, + "loss": 0.0036, + "step": 87850 + }, + { + "epoch": 0.5635067488046487, + "grad_norm": 0.4550938606262207, + "learning_rate": 9.027033657550502e-06, + "loss": 0.0087, + "step": 87860 + }, + { + "epoch": 0.5635708856984347, + "grad_norm": 0.055353473871946335, + "learning_rate": 9.026701883520481e-06, + "loss": 0.0031, + "step": 87870 + }, + { + "epoch": 0.5636350225922209, + "grad_norm": 0.16755340993404388, + "learning_rate": 9.02637005903301e-06, + "loss": 0.0035, + "step": 87880 + }, + { + "epoch": 0.5636991594860069, + "grad_norm": 0.16791850328445435, + "learning_rate": 9.026038184092244e-06, + "loss": 0.0024, + "step": 87890 + }, + { + "epoch": 0.563763296379793, + "grad_norm": 0.13935136795043945, + "learning_rate": 9.025706258702343e-06, + "loss": 0.0029, + "step": 87900 + }, + { + "epoch": 0.5638274332735791, + "grad_norm": 0.22306275367736816, + "learning_rate": 9.025374282867465e-06, + "loss": 0.0035, + "step": 87910 + }, + { + "epoch": 0.5638915701673652, + "grad_norm": 0.09611920267343521, + "learning_rate": 9.02504225659177e-06, + "loss": 0.0017, + "step": 87920 + }, + { + "epoch": 0.5639557070611513, + "grad_norm": 0.2897023856639862, + "learning_rate": 9.024710179879417e-06, + "loss": 0.0036, + "step": 87930 + }, + { + "epoch": 0.5640198439549374, + "grad_norm": 0.21228285133838654, + "learning_rate": 9.02437805273457e-06, + "loss": 0.0033, + "step": 87940 + }, + { + "epoch": 0.5640839808487235, + "grad_norm": 0.01964620314538479, + "learning_rate": 9.024045875161392e-06, + "loss": 0.0023, + "step": 87950 + }, + { + "epoch": 0.5641481177425096, + "grad_norm": 0.8125170469284058, + "learning_rate": 9.023713647164041e-06, + "loss": 0.0038, + "step": 87960 + }, + { + "epoch": 0.5642122546362958, + "grad_norm": 0.09887786954641342, + "learning_rate": 9.023381368746685e-06, + "loss": 0.0053, + "step": 87970 + }, + { + "epoch": 0.5642763915300818, + "grad_norm": 0.05003981292247772, + "learning_rate": 9.023049039913482e-06, + "loss": 0.0014, + "step": 87980 + }, + { + "epoch": 0.564340528423868, + "grad_norm": 0.2274905890226364, + "learning_rate": 9.0227166606686e-06, + "loss": 0.0042, + "step": 87990 + }, + { + "epoch": 0.564404665317654, + "grad_norm": 0.1140158474445343, + "learning_rate": 9.022384231016203e-06, + "loss": 0.0033, + "step": 88000 + }, + { + "epoch": 0.5644688022114401, + "grad_norm": 0.16795748472213745, + "learning_rate": 9.022051750960458e-06, + "loss": 0.0029, + "step": 88010 + }, + { + "epoch": 0.5645329391052262, + "grad_norm": 0.18950389325618744, + "learning_rate": 9.02171922050553e-06, + "loss": 0.0043, + "step": 88020 + }, + { + "epoch": 0.5645970759990123, + "grad_norm": 0.14368632435798645, + "learning_rate": 9.021386639655585e-06, + "loss": 0.0053, + "step": 88030 + }, + { + "epoch": 0.5646612128927984, + "grad_norm": 0.18149404227733612, + "learning_rate": 9.021054008414791e-06, + "loss": 0.0037, + "step": 88040 + }, + { + "epoch": 0.5647253497865845, + "grad_norm": 0.0776793360710144, + "learning_rate": 9.02072132678732e-06, + "loss": 0.0035, + "step": 88050 + }, + { + "epoch": 0.5647894866803705, + "grad_norm": 0.08136197924613953, + "learning_rate": 9.020388594777333e-06, + "loss": 0.0036, + "step": 88060 + }, + { + "epoch": 0.5648536235741567, + "grad_norm": 0.17837202548980713, + "learning_rate": 9.020055812389005e-06, + "loss": 0.0066, + "step": 88070 + }, + { + "epoch": 0.5649177604679427, + "grad_norm": 0.13673412799835205, + "learning_rate": 9.019722979626504e-06, + "loss": 0.0026, + "step": 88080 + }, + { + "epoch": 0.5649818973617289, + "grad_norm": 0.13098108768463135, + "learning_rate": 9.019390096494003e-06, + "loss": 0.003, + "step": 88090 + }, + { + "epoch": 0.5650460342555149, + "grad_norm": 0.1096213161945343, + "learning_rate": 9.01905716299567e-06, + "loss": 0.0026, + "step": 88100 + }, + { + "epoch": 0.5651101711493011, + "grad_norm": 0.06392141431570053, + "learning_rate": 9.018724179135679e-06, + "loss": 0.0038, + "step": 88110 + }, + { + "epoch": 0.5651743080430872, + "grad_norm": 0.20007091760635376, + "learning_rate": 9.018391144918201e-06, + "loss": 0.005, + "step": 88120 + }, + { + "epoch": 0.5652384449368733, + "grad_norm": 0.10015270113945007, + "learning_rate": 9.01805806034741e-06, + "loss": 0.0023, + "step": 88130 + }, + { + "epoch": 0.5653025818306594, + "grad_norm": 0.12199140340089798, + "learning_rate": 9.017724925427482e-06, + "loss": 0.0021, + "step": 88140 + }, + { + "epoch": 0.5653667187244454, + "grad_norm": 0.12377575039863586, + "learning_rate": 9.017391740162587e-06, + "loss": 0.0018, + "step": 88150 + }, + { + "epoch": 0.5654308556182316, + "grad_norm": 0.35765019059181213, + "learning_rate": 9.017058504556904e-06, + "loss": 0.0026, + "step": 88160 + }, + { + "epoch": 0.5654949925120176, + "grad_norm": 0.12522195279598236, + "learning_rate": 9.016725218614606e-06, + "loss": 0.0028, + "step": 88170 + }, + { + "epoch": 0.5655591294058038, + "grad_norm": 0.12536902725696564, + "learning_rate": 9.01639188233987e-06, + "loss": 0.0065, + "step": 88180 + }, + { + "epoch": 0.5656232662995898, + "grad_norm": 0.187701016664505, + "learning_rate": 9.016058495736873e-06, + "loss": 0.0038, + "step": 88190 + }, + { + "epoch": 0.565687403193376, + "grad_norm": 0.09773750603199005, + "learning_rate": 9.015725058809793e-06, + "loss": 0.0019, + "step": 88200 + }, + { + "epoch": 0.565751540087162, + "grad_norm": 0.011167613789439201, + "learning_rate": 9.015391571562809e-06, + "loss": 0.0023, + "step": 88210 + }, + { + "epoch": 0.5658156769809481, + "grad_norm": 0.16488158702850342, + "learning_rate": 9.015058034000097e-06, + "loss": 0.0024, + "step": 88220 + }, + { + "epoch": 0.5658798138747342, + "grad_norm": 0.19378674030303955, + "learning_rate": 9.01472444612584e-06, + "loss": 0.003, + "step": 88230 + }, + { + "epoch": 0.5659439507685203, + "grad_norm": 0.11837329715490341, + "learning_rate": 9.014390807944217e-06, + "loss": 0.0038, + "step": 88240 + }, + { + "epoch": 0.5660080876623065, + "grad_norm": 0.05158904567360878, + "learning_rate": 9.014057119459408e-06, + "loss": 0.0036, + "step": 88250 + }, + { + "epoch": 0.5660722245560925, + "grad_norm": 0.08860959112644196, + "learning_rate": 9.013723380675591e-06, + "loss": 0.0021, + "step": 88260 + }, + { + "epoch": 0.5661363614498787, + "grad_norm": 0.1362515538930893, + "learning_rate": 9.013389591596954e-06, + "loss": 0.0034, + "step": 88270 + }, + { + "epoch": 0.5662004983436647, + "grad_norm": 0.21222984790802002, + "learning_rate": 9.013055752227679e-06, + "loss": 0.0039, + "step": 88280 + }, + { + "epoch": 0.5662646352374509, + "grad_norm": 0.10679440200328827, + "learning_rate": 9.012721862571945e-06, + "loss": 0.0024, + "step": 88290 + }, + { + "epoch": 0.5663287721312369, + "grad_norm": 0.2545750141143799, + "learning_rate": 9.012387922633938e-06, + "loss": 0.0038, + "step": 88300 + }, + { + "epoch": 0.566392909025023, + "grad_norm": 0.2821141183376312, + "learning_rate": 9.012053932417844e-06, + "loss": 0.0029, + "step": 88310 + }, + { + "epoch": 0.5664570459188091, + "grad_norm": 0.16089150309562683, + "learning_rate": 9.011719891927846e-06, + "loss": 0.0035, + "step": 88320 + }, + { + "epoch": 0.5665211828125952, + "grad_norm": 0.10448617488145828, + "learning_rate": 9.011385801168131e-06, + "loss": 0.0026, + "step": 88330 + }, + { + "epoch": 0.5665853197063813, + "grad_norm": 0.13759052753448486, + "learning_rate": 9.011051660142885e-06, + "loss": 0.0022, + "step": 88340 + }, + { + "epoch": 0.5666494566001674, + "grad_norm": 0.3571251928806305, + "learning_rate": 9.010717468856295e-06, + "loss": 0.0029, + "step": 88350 + }, + { + "epoch": 0.5667135934939534, + "grad_norm": 0.08513778448104858, + "learning_rate": 9.01038322731255e-06, + "loss": 0.0032, + "step": 88360 + }, + { + "epoch": 0.5667777303877396, + "grad_norm": 0.06159251555800438, + "learning_rate": 9.010048935515835e-06, + "loss": 0.0036, + "step": 88370 + }, + { + "epoch": 0.5668418672815256, + "grad_norm": 0.06133711710572243, + "learning_rate": 9.00971459347034e-06, + "loss": 0.0026, + "step": 88380 + }, + { + "epoch": 0.5669060041753118, + "grad_norm": 0.3495323061943054, + "learning_rate": 9.009380201180258e-06, + "loss": 0.0045, + "step": 88390 + }, + { + "epoch": 0.5669701410690979, + "grad_norm": 0.13650336861610413, + "learning_rate": 9.009045758649777e-06, + "loss": 0.0037, + "step": 88400 + }, + { + "epoch": 0.567034277962884, + "grad_norm": 0.2663722038269043, + "learning_rate": 9.008711265883086e-06, + "loss": 0.0042, + "step": 88410 + }, + { + "epoch": 0.5670984148566701, + "grad_norm": 0.30290576815605164, + "learning_rate": 9.008376722884376e-06, + "loss": 0.0052, + "step": 88420 + }, + { + "epoch": 0.5671625517504562, + "grad_norm": 0.07226832211017609, + "learning_rate": 9.008042129657843e-06, + "loss": 0.0026, + "step": 88430 + }, + { + "epoch": 0.5672266886442423, + "grad_norm": 0.3313843905925751, + "learning_rate": 9.007707486207677e-06, + "loss": 0.0032, + "step": 88440 + }, + { + "epoch": 0.5672908255380283, + "grad_norm": 0.1292109340429306, + "learning_rate": 9.007372792538074e-06, + "loss": 0.0033, + "step": 88450 + }, + { + "epoch": 0.5673549624318145, + "grad_norm": 0.08601272851228714, + "learning_rate": 9.007038048653224e-06, + "loss": 0.0029, + "step": 88460 + }, + { + "epoch": 0.5674190993256005, + "grad_norm": 0.09319724887609482, + "learning_rate": 9.006703254557324e-06, + "loss": 0.0014, + "step": 88470 + }, + { + "epoch": 0.5674832362193867, + "grad_norm": 0.04385272040963173, + "learning_rate": 9.006368410254568e-06, + "loss": 0.0031, + "step": 88480 + }, + { + "epoch": 0.5675473731131727, + "grad_norm": 0.16889996826648712, + "learning_rate": 9.006033515749153e-06, + "loss": 0.0026, + "step": 88490 + }, + { + "epoch": 0.5676115100069589, + "grad_norm": 0.02448371797800064, + "learning_rate": 9.005698571045273e-06, + "loss": 0.0031, + "step": 88500 + }, + { + "epoch": 0.5676756469007449, + "grad_norm": 0.15226224064826965, + "learning_rate": 9.00536357614713e-06, + "loss": 0.0036, + "step": 88510 + }, + { + "epoch": 0.567739783794531, + "grad_norm": 0.14777974784374237, + "learning_rate": 9.005028531058918e-06, + "loss": 0.0039, + "step": 88520 + }, + { + "epoch": 0.5678039206883172, + "grad_norm": 0.209447979927063, + "learning_rate": 9.004693435784837e-06, + "loss": 0.0034, + "step": 88530 + }, + { + "epoch": 0.5678680575821032, + "grad_norm": 0.14773207902908325, + "learning_rate": 9.004358290329082e-06, + "loss": 0.0036, + "step": 88540 + }, + { + "epoch": 0.5679321944758894, + "grad_norm": 0.0850907489657402, + "learning_rate": 9.00402309469586e-06, + "loss": 0.004, + "step": 88550 + }, + { + "epoch": 0.5679963313696754, + "grad_norm": 0.10000663995742798, + "learning_rate": 9.003687848889365e-06, + "loss": 0.0033, + "step": 88560 + }, + { + "epoch": 0.5680604682634616, + "grad_norm": 0.131527841091156, + "learning_rate": 9.003352552913799e-06, + "loss": 0.0026, + "step": 88570 + }, + { + "epoch": 0.5681246051572476, + "grad_norm": 0.27508410811424255, + "learning_rate": 9.003017206773365e-06, + "loss": 0.004, + "step": 88580 + }, + { + "epoch": 0.5681887420510338, + "grad_norm": 0.09187949448823929, + "learning_rate": 9.002681810472265e-06, + "loss": 0.003, + "step": 88590 + }, + { + "epoch": 0.5682528789448198, + "grad_norm": 0.2346142679452896, + "learning_rate": 9.0023463640147e-06, + "loss": 0.0067, + "step": 88600 + }, + { + "epoch": 0.5683170158386059, + "grad_norm": 0.09984955191612244, + "learning_rate": 9.002010867404876e-06, + "loss": 0.0019, + "step": 88610 + }, + { + "epoch": 0.568381152732392, + "grad_norm": 0.10223357379436493, + "learning_rate": 9.001675320646994e-06, + "loss": 0.002, + "step": 88620 + }, + { + "epoch": 0.5684452896261781, + "grad_norm": 0.14836329221725464, + "learning_rate": 9.001339723745262e-06, + "loss": 0.0029, + "step": 88630 + }, + { + "epoch": 0.5685094265199642, + "grad_norm": 0.11841240525245667, + "learning_rate": 9.001004076703883e-06, + "loss": 0.0033, + "step": 88640 + }, + { + "epoch": 0.5685735634137503, + "grad_norm": 0.24895945191383362, + "learning_rate": 9.000668379527062e-06, + "loss": 0.0026, + "step": 88650 + }, + { + "epoch": 0.5686377003075364, + "grad_norm": 0.05293124541640282, + "learning_rate": 9.000332632219009e-06, + "loss": 0.0022, + "step": 88660 + }, + { + "epoch": 0.5687018372013225, + "grad_norm": 0.16243022680282593, + "learning_rate": 8.999996834783929e-06, + "loss": 0.0037, + "step": 88670 + }, + { + "epoch": 0.5687659740951087, + "grad_norm": 0.06476463377475739, + "learning_rate": 8.99966098722603e-06, + "loss": 0.0035, + "step": 88680 + }, + { + "epoch": 0.5688301109888947, + "grad_norm": 0.12459227442741394, + "learning_rate": 8.999325089549518e-06, + "loss": 0.0036, + "step": 88690 + }, + { + "epoch": 0.5688942478826808, + "grad_norm": 0.1006166934967041, + "learning_rate": 8.998989141758607e-06, + "loss": 0.0079, + "step": 88700 + }, + { + "epoch": 0.5689583847764669, + "grad_norm": 0.07273389399051666, + "learning_rate": 8.998653143857501e-06, + "loss": 0.0029, + "step": 88710 + }, + { + "epoch": 0.569022521670253, + "grad_norm": 0.0876203402876854, + "learning_rate": 8.998317095850416e-06, + "loss": 0.0027, + "step": 88720 + }, + { + "epoch": 0.5690866585640391, + "grad_norm": 0.06741435080766678, + "learning_rate": 8.99798099774156e-06, + "loss": 0.0041, + "step": 88730 + }, + { + "epoch": 0.5691507954578252, + "grad_norm": 0.12473282963037491, + "learning_rate": 8.997644849535144e-06, + "loss": 0.0022, + "step": 88740 + }, + { + "epoch": 0.5692149323516112, + "grad_norm": 0.13442359864711761, + "learning_rate": 8.99730865123538e-06, + "loss": 0.0058, + "step": 88750 + }, + { + "epoch": 0.5692790692453974, + "grad_norm": 0.1543642282485962, + "learning_rate": 8.996972402846485e-06, + "loss": 0.0032, + "step": 88760 + }, + { + "epoch": 0.5693432061391834, + "grad_norm": 0.07570601254701614, + "learning_rate": 8.996636104372667e-06, + "loss": 0.0027, + "step": 88770 + }, + { + "epoch": 0.5694073430329696, + "grad_norm": 0.20600873231887817, + "learning_rate": 8.996299755818142e-06, + "loss": 0.0036, + "step": 88780 + }, + { + "epoch": 0.5694714799267556, + "grad_norm": 0.09505226463079453, + "learning_rate": 8.995963357187125e-06, + "loss": 0.0028, + "step": 88790 + }, + { + "epoch": 0.5695356168205418, + "grad_norm": 0.10055418312549591, + "learning_rate": 8.995626908483833e-06, + "loss": 0.003, + "step": 88800 + }, + { + "epoch": 0.5695997537143279, + "grad_norm": 0.08040245622396469, + "learning_rate": 8.995290409712479e-06, + "loss": 0.006, + "step": 88810 + }, + { + "epoch": 0.569663890608114, + "grad_norm": 0.3970910608768463, + "learning_rate": 8.99495386087728e-06, + "loss": 0.0023, + "step": 88820 + }, + { + "epoch": 0.5697280275019001, + "grad_norm": 0.0972469300031662, + "learning_rate": 8.994617261982454e-06, + "loss": 0.0026, + "step": 88830 + }, + { + "epoch": 0.5697921643956861, + "grad_norm": 0.17260612547397614, + "learning_rate": 8.99428061303222e-06, + "loss": 0.0042, + "step": 88840 + }, + { + "epoch": 0.5698563012894723, + "grad_norm": 0.0984884649515152, + "learning_rate": 8.993943914030793e-06, + "loss": 0.0034, + "step": 88850 + }, + { + "epoch": 0.5699204381832583, + "grad_norm": 0.2577366232872009, + "learning_rate": 8.993607164982398e-06, + "loss": 0.0031, + "step": 88860 + }, + { + "epoch": 0.5699845750770445, + "grad_norm": 0.0792558565735817, + "learning_rate": 8.993270365891249e-06, + "loss": 0.002, + "step": 88870 + }, + { + "epoch": 0.5700487119708305, + "grad_norm": 0.05996266379952431, + "learning_rate": 8.992933516761569e-06, + "loss": 0.004, + "step": 88880 + }, + { + "epoch": 0.5701128488646167, + "grad_norm": 0.21832101047039032, + "learning_rate": 8.992596617597577e-06, + "loss": 0.0038, + "step": 88890 + }, + { + "epoch": 0.5701769857584027, + "grad_norm": 0.18120752274990082, + "learning_rate": 8.992259668403495e-06, + "loss": 0.0027, + "step": 88900 + }, + { + "epoch": 0.5702411226521888, + "grad_norm": 0.01756487786769867, + "learning_rate": 8.99192266918355e-06, + "loss": 0.0032, + "step": 88910 + }, + { + "epoch": 0.5703052595459749, + "grad_norm": 0.2363017350435257, + "learning_rate": 8.991585619941958e-06, + "loss": 0.0067, + "step": 88920 + }, + { + "epoch": 0.570369396439761, + "grad_norm": 0.27013689279556274, + "learning_rate": 8.991248520682948e-06, + "loss": 0.0029, + "step": 88930 + }, + { + "epoch": 0.5704335333335471, + "grad_norm": 0.031348928809165955, + "learning_rate": 8.990911371410738e-06, + "loss": 0.003, + "step": 88940 + }, + { + "epoch": 0.5704976702273332, + "grad_norm": 0.07888729870319366, + "learning_rate": 8.990574172129559e-06, + "loss": 0.0029, + "step": 88950 + }, + { + "epoch": 0.5705618071211194, + "grad_norm": 0.16150203347206116, + "learning_rate": 8.990236922843632e-06, + "loss": 0.0031, + "step": 88960 + }, + { + "epoch": 0.5706259440149054, + "grad_norm": 0.26639118790626526, + "learning_rate": 8.989899623557184e-06, + "loss": 0.0025, + "step": 88970 + }, + { + "epoch": 0.5706900809086916, + "grad_norm": 0.13787950575351715, + "learning_rate": 8.989562274274444e-06, + "loss": 0.0039, + "step": 88980 + }, + { + "epoch": 0.5707542178024776, + "grad_norm": 0.11798261106014252, + "learning_rate": 8.989224874999637e-06, + "loss": 0.0056, + "step": 88990 + }, + { + "epoch": 0.5708183546962637, + "grad_norm": 0.20718160271644592, + "learning_rate": 8.98888742573699e-06, + "loss": 0.005, + "step": 89000 + }, + { + "epoch": 0.5708824915900498, + "grad_norm": 0.23089097440242767, + "learning_rate": 8.988549926490736e-06, + "loss": 0.0046, + "step": 89010 + }, + { + "epoch": 0.5709466284838359, + "grad_norm": 0.02872941456735134, + "learning_rate": 8.988212377265098e-06, + "loss": 0.0023, + "step": 89020 + }, + { + "epoch": 0.571010765377622, + "grad_norm": 0.10600224137306213, + "learning_rate": 8.987874778064309e-06, + "loss": 0.0041, + "step": 89030 + }, + { + "epoch": 0.5710749022714081, + "grad_norm": 0.32066240906715393, + "learning_rate": 8.987537128892598e-06, + "loss": 0.0051, + "step": 89040 + }, + { + "epoch": 0.5711390391651942, + "grad_norm": 0.05352598428726196, + "learning_rate": 8.987199429754199e-06, + "loss": 0.0053, + "step": 89050 + }, + { + "epoch": 0.5712031760589803, + "grad_norm": 0.05201287567615509, + "learning_rate": 8.98686168065334e-06, + "loss": 0.0047, + "step": 89060 + }, + { + "epoch": 0.5712673129527663, + "grad_norm": 0.18884700536727905, + "learning_rate": 8.986523881594255e-06, + "loss": 0.0023, + "step": 89070 + }, + { + "epoch": 0.5713314498465525, + "grad_norm": 0.2029830664396286, + "learning_rate": 8.986186032581177e-06, + "loss": 0.0045, + "step": 89080 + }, + { + "epoch": 0.5713955867403385, + "grad_norm": 0.043966054916381836, + "learning_rate": 8.98584813361834e-06, + "loss": 0.0051, + "step": 89090 + }, + { + "epoch": 0.5714597236341247, + "grad_norm": 0.053840771317481995, + "learning_rate": 8.985510184709976e-06, + "loss": 0.003, + "step": 89100 + }, + { + "epoch": 0.5715238605279108, + "grad_norm": 0.08549485355615616, + "learning_rate": 8.985172185860321e-06, + "loss": 0.0051, + "step": 89110 + }, + { + "epoch": 0.5715879974216969, + "grad_norm": 0.15409742295742035, + "learning_rate": 8.984834137073611e-06, + "loss": 0.003, + "step": 89120 + }, + { + "epoch": 0.571652134315483, + "grad_norm": 0.09277409315109253, + "learning_rate": 8.984496038354081e-06, + "loss": 0.003, + "step": 89130 + }, + { + "epoch": 0.571716271209269, + "grad_norm": 0.16708652675151825, + "learning_rate": 8.984157889705968e-06, + "loss": 0.003, + "step": 89140 + }, + { + "epoch": 0.5717804081030552, + "grad_norm": 0.09447616338729858, + "learning_rate": 8.983819691133508e-06, + "loss": 0.003, + "step": 89150 + }, + { + "epoch": 0.5718445449968412, + "grad_norm": 0.03978119418025017, + "learning_rate": 8.983481442640942e-06, + "loss": 0.0033, + "step": 89160 + }, + { + "epoch": 0.5719086818906274, + "grad_norm": 0.03759925439953804, + "learning_rate": 8.983143144232507e-06, + "loss": 0.0029, + "step": 89170 + }, + { + "epoch": 0.5719728187844134, + "grad_norm": 0.06725975126028061, + "learning_rate": 8.98280479591244e-06, + "loss": 0.0037, + "step": 89180 + }, + { + "epoch": 0.5720369556781996, + "grad_norm": 0.15373003482818604, + "learning_rate": 8.982466397684984e-06, + "loss": 0.0021, + "step": 89190 + }, + { + "epoch": 0.5721010925719856, + "grad_norm": 0.18197908997535706, + "learning_rate": 8.982127949554375e-06, + "loss": 0.0021, + "step": 89200 + }, + { + "epoch": 0.5721652294657718, + "grad_norm": 0.004371248185634613, + "learning_rate": 8.981789451524859e-06, + "loss": 0.0022, + "step": 89210 + }, + { + "epoch": 0.5722293663595578, + "grad_norm": 0.10552152991294861, + "learning_rate": 8.981450903600674e-06, + "loss": 0.0023, + "step": 89220 + }, + { + "epoch": 0.5722935032533439, + "grad_norm": 0.23676060140132904, + "learning_rate": 8.981112305786066e-06, + "loss": 0.0044, + "step": 89230 + }, + { + "epoch": 0.5723576401471301, + "grad_norm": 0.21808232367038727, + "learning_rate": 8.980773658085274e-06, + "loss": 0.0042, + "step": 89240 + }, + { + "epoch": 0.5724217770409161, + "grad_norm": 0.11256436258554459, + "learning_rate": 8.980434960502544e-06, + "loss": 0.0023, + "step": 89250 + }, + { + "epoch": 0.5724859139347023, + "grad_norm": 0.18191315233707428, + "learning_rate": 8.980096213042118e-06, + "loss": 0.0027, + "step": 89260 + }, + { + "epoch": 0.5725500508284883, + "grad_norm": 0.24376623332500458, + "learning_rate": 8.979757415708243e-06, + "loss": 0.0036, + "step": 89270 + }, + { + "epoch": 0.5726141877222745, + "grad_norm": 0.17087090015411377, + "learning_rate": 8.979418568505163e-06, + "loss": 0.0028, + "step": 89280 + }, + { + "epoch": 0.5726783246160605, + "grad_norm": 0.6307005286216736, + "learning_rate": 8.979079671437123e-06, + "loss": 0.0163, + "step": 89290 + }, + { + "epoch": 0.5727424615098466, + "grad_norm": 0.005229447968304157, + "learning_rate": 8.978740724508373e-06, + "loss": 0.0024, + "step": 89300 + }, + { + "epoch": 0.5728065984036327, + "grad_norm": 0.17592014372348785, + "learning_rate": 8.978401727723156e-06, + "loss": 0.0036, + "step": 89310 + }, + { + "epoch": 0.5728707352974188, + "grad_norm": 0.24231776595115662, + "learning_rate": 8.978062681085724e-06, + "loss": 0.003, + "step": 89320 + }, + { + "epoch": 0.5729348721912049, + "grad_norm": 0.18802590668201447, + "learning_rate": 8.977723584600322e-06, + "loss": 0.0038, + "step": 89330 + }, + { + "epoch": 0.572999009084991, + "grad_norm": 0.12922777235507965, + "learning_rate": 8.977384438271202e-06, + "loss": 0.0034, + "step": 89340 + }, + { + "epoch": 0.573063145978777, + "grad_norm": 0.07981414347887039, + "learning_rate": 8.977045242102611e-06, + "loss": 0.0025, + "step": 89350 + }, + { + "epoch": 0.5731272828725632, + "grad_norm": 0.04034131392836571, + "learning_rate": 8.976705996098801e-06, + "loss": 0.0025, + "step": 89360 + }, + { + "epoch": 0.5731914197663492, + "grad_norm": 0.07132384926080704, + "learning_rate": 8.976366700264023e-06, + "loss": 0.0025, + "step": 89370 + }, + { + "epoch": 0.5732555566601354, + "grad_norm": 0.04554812237620354, + "learning_rate": 8.97602735460253e-06, + "loss": 0.002, + "step": 89380 + }, + { + "epoch": 0.5733196935539215, + "grad_norm": 0.15574468672275543, + "learning_rate": 8.975687959118571e-06, + "loss": 0.0033, + "step": 89390 + }, + { + "epoch": 0.5733838304477076, + "grad_norm": 0.05257434397935867, + "learning_rate": 8.9753485138164e-06, + "loss": 0.0057, + "step": 89400 + }, + { + "epoch": 0.5734479673414937, + "grad_norm": 0.356705904006958, + "learning_rate": 8.975009018700271e-06, + "loss": 0.004, + "step": 89410 + }, + { + "epoch": 0.5735121042352798, + "grad_norm": 0.1239226907491684, + "learning_rate": 8.97466947377444e-06, + "loss": 0.0029, + "step": 89420 + }, + { + "epoch": 0.5735762411290659, + "grad_norm": 0.08364835381507874, + "learning_rate": 8.974329879043158e-06, + "loss": 0.0031, + "step": 89430 + }, + { + "epoch": 0.573640378022852, + "grad_norm": 0.24030348658561707, + "learning_rate": 8.973990234510684e-06, + "loss": 0.0031, + "step": 89440 + }, + { + "epoch": 0.5737045149166381, + "grad_norm": 0.1440877467393875, + "learning_rate": 8.973650540181271e-06, + "loss": 0.0027, + "step": 89450 + }, + { + "epoch": 0.5737686518104241, + "grad_norm": 0.07367430627346039, + "learning_rate": 8.973310796059175e-06, + "loss": 0.0047, + "step": 89460 + }, + { + "epoch": 0.5738327887042103, + "grad_norm": 0.1588653326034546, + "learning_rate": 8.972971002148658e-06, + "loss": 0.0034, + "step": 89470 + }, + { + "epoch": 0.5738969255979963, + "grad_norm": 0.2074684500694275, + "learning_rate": 8.972631158453973e-06, + "loss": 0.0032, + "step": 89480 + }, + { + "epoch": 0.5739610624917825, + "grad_norm": 0.2064570039510727, + "learning_rate": 8.972291264979383e-06, + "loss": 0.0029, + "step": 89490 + }, + { + "epoch": 0.5740251993855685, + "grad_norm": 0.13895684480667114, + "learning_rate": 8.971951321729142e-06, + "loss": 0.0035, + "step": 89500 + }, + { + "epoch": 0.5740893362793547, + "grad_norm": 0.18333616852760315, + "learning_rate": 8.971611328707512e-06, + "loss": 0.0029, + "step": 89510 + }, + { + "epoch": 0.5741534731731408, + "grad_norm": 0.08863773941993713, + "learning_rate": 8.971271285918755e-06, + "loss": 0.0037, + "step": 89520 + }, + { + "epoch": 0.5742176100669268, + "grad_norm": 0.1741122305393219, + "learning_rate": 8.97093119336713e-06, + "loss": 0.005, + "step": 89530 + }, + { + "epoch": 0.574281746960713, + "grad_norm": 0.28641340136528015, + "learning_rate": 8.970591051056899e-06, + "loss": 0.0052, + "step": 89540 + }, + { + "epoch": 0.574345883854499, + "grad_norm": 0.1354037970304489, + "learning_rate": 8.970250858992325e-06, + "loss": 0.0023, + "step": 89550 + }, + { + "epoch": 0.5744100207482852, + "grad_norm": 0.10131777077913284, + "learning_rate": 8.969910617177668e-06, + "loss": 0.0033, + "step": 89560 + }, + { + "epoch": 0.5744741576420712, + "grad_norm": 0.19914120435714722, + "learning_rate": 8.969570325617196e-06, + "loss": 0.0034, + "step": 89570 + }, + { + "epoch": 0.5745382945358574, + "grad_norm": 0.18307963013648987, + "learning_rate": 8.969229984315172e-06, + "loss": 0.003, + "step": 89580 + }, + { + "epoch": 0.5746024314296434, + "grad_norm": 0.054778408259153366, + "learning_rate": 8.968889593275857e-06, + "loss": 0.0039, + "step": 89590 + }, + { + "epoch": 0.5746665683234295, + "grad_norm": 0.09347351640462875, + "learning_rate": 8.96854915250352e-06, + "loss": 0.0031, + "step": 89600 + }, + { + "epoch": 0.5747307052172156, + "grad_norm": 0.22702820599079132, + "learning_rate": 8.968208662002425e-06, + "loss": 0.0041, + "step": 89610 + }, + { + "epoch": 0.5747948421110017, + "grad_norm": 0.10594911873340607, + "learning_rate": 8.967868121776841e-06, + "loss": 0.0032, + "step": 89620 + }, + { + "epoch": 0.5748589790047878, + "grad_norm": 0.10526852309703827, + "learning_rate": 8.967527531831033e-06, + "loss": 0.0036, + "step": 89630 + }, + { + "epoch": 0.5749231158985739, + "grad_norm": 0.14717718958854675, + "learning_rate": 8.967186892169269e-06, + "loss": 0.002, + "step": 89640 + }, + { + "epoch": 0.57498725279236, + "grad_norm": 0.14983873069286346, + "learning_rate": 8.966846202795818e-06, + "loss": 0.0046, + "step": 89650 + }, + { + "epoch": 0.5750513896861461, + "grad_norm": 0.19545316696166992, + "learning_rate": 8.966505463714948e-06, + "loss": 0.0057, + "step": 89660 + }, + { + "epoch": 0.5751155265799323, + "grad_norm": 0.25773897767066956, + "learning_rate": 8.96616467493093e-06, + "loss": 0.0026, + "step": 89670 + }, + { + "epoch": 0.5751796634737183, + "grad_norm": 0.2134181708097458, + "learning_rate": 8.965823836448035e-06, + "loss": 0.0037, + "step": 89680 + }, + { + "epoch": 0.5752438003675044, + "grad_norm": 0.024075880646705627, + "learning_rate": 8.965482948270533e-06, + "loss": 0.0099, + "step": 89690 + }, + { + "epoch": 0.5753079372612905, + "grad_norm": 0.18777407705783844, + "learning_rate": 8.965142010402696e-06, + "loss": 0.0037, + "step": 89700 + }, + { + "epoch": 0.5753720741550766, + "grad_norm": 0.2322371006011963, + "learning_rate": 8.964801022848795e-06, + "loss": 0.0054, + "step": 89710 + }, + { + "epoch": 0.5754362110488627, + "grad_norm": 0.1455300748348236, + "learning_rate": 8.964459985613104e-06, + "loss": 0.0022, + "step": 89720 + }, + { + "epoch": 0.5755003479426488, + "grad_norm": 0.1370176523923874, + "learning_rate": 8.964118898699896e-06, + "loss": 0.0028, + "step": 89730 + }, + { + "epoch": 0.5755644848364349, + "grad_norm": 0.3160856068134308, + "learning_rate": 8.963777762113445e-06, + "loss": 0.0053, + "step": 89740 + }, + { + "epoch": 0.575628621730221, + "grad_norm": 0.12635572254657745, + "learning_rate": 8.963436575858026e-06, + "loss": 0.0062, + "step": 89750 + }, + { + "epoch": 0.575692758624007, + "grad_norm": 0.29390159249305725, + "learning_rate": 8.963095339937914e-06, + "loss": 0.0038, + "step": 89760 + }, + { + "epoch": 0.5757568955177932, + "grad_norm": 0.09332332015037537, + "learning_rate": 8.962754054357385e-06, + "loss": 0.0036, + "step": 89770 + }, + { + "epoch": 0.5758210324115792, + "grad_norm": 0.4807676672935486, + "learning_rate": 8.962412719120715e-06, + "loss": 0.0044, + "step": 89780 + }, + { + "epoch": 0.5758851693053654, + "grad_norm": 0.14331871271133423, + "learning_rate": 8.962071334232182e-06, + "loss": 0.0029, + "step": 89790 + }, + { + "epoch": 0.5759493061991515, + "grad_norm": 0.3488805294036865, + "learning_rate": 8.961729899696064e-06, + "loss": 0.0067, + "step": 89800 + }, + { + "epoch": 0.5760134430929376, + "grad_norm": 1.0194358825683594, + "learning_rate": 8.961388415516638e-06, + "loss": 0.0025, + "step": 89810 + }, + { + "epoch": 0.5760775799867237, + "grad_norm": 0.2737617790699005, + "learning_rate": 8.961046881698184e-06, + "loss": 0.0038, + "step": 89820 + }, + { + "epoch": 0.5761417168805097, + "grad_norm": 0.15384909510612488, + "learning_rate": 8.960705298244982e-06, + "loss": 0.0024, + "step": 89830 + }, + { + "epoch": 0.5762058537742959, + "grad_norm": 0.11745814234018326, + "learning_rate": 8.960363665161313e-06, + "loss": 0.0054, + "step": 89840 + }, + { + "epoch": 0.5762699906680819, + "grad_norm": 0.1430215686559677, + "learning_rate": 8.960021982451455e-06, + "loss": 0.0023, + "step": 89850 + }, + { + "epoch": 0.5763341275618681, + "grad_norm": 0.13121038675308228, + "learning_rate": 8.959680250119693e-06, + "loss": 0.0043, + "step": 89860 + }, + { + "epoch": 0.5763982644556541, + "grad_norm": 0.24054284393787384, + "learning_rate": 8.959338468170307e-06, + "loss": 0.003, + "step": 89870 + }, + { + "epoch": 0.5764624013494403, + "grad_norm": 0.12309806048870087, + "learning_rate": 8.95899663660758e-06, + "loss": 0.0036, + "step": 89880 + }, + { + "epoch": 0.5765265382432263, + "grad_norm": 0.24552305042743683, + "learning_rate": 8.958654755435796e-06, + "loss": 0.0038, + "step": 89890 + }, + { + "epoch": 0.5765906751370125, + "grad_norm": 0.17345677316188812, + "learning_rate": 8.95831282465924e-06, + "loss": 0.0064, + "step": 89900 + }, + { + "epoch": 0.5766548120307985, + "grad_norm": 0.16772368550300598, + "learning_rate": 8.957970844282192e-06, + "loss": 0.0036, + "step": 89910 + }, + { + "epoch": 0.5767189489245846, + "grad_norm": 0.14221370220184326, + "learning_rate": 8.957628814308943e-06, + "loss": 0.0022, + "step": 89920 + }, + { + "epoch": 0.5767830858183707, + "grad_norm": 0.12244567275047302, + "learning_rate": 8.957286734743775e-06, + "loss": 0.0046, + "step": 89930 + }, + { + "epoch": 0.5768472227121568, + "grad_norm": 0.4485814869403839, + "learning_rate": 8.956944605590979e-06, + "loss": 0.0043, + "step": 89940 + }, + { + "epoch": 0.576911359605943, + "grad_norm": 0.04381153732538223, + "learning_rate": 8.956602426854836e-06, + "loss": 0.0025, + "step": 89950 + }, + { + "epoch": 0.576975496499729, + "grad_norm": 0.12222953140735626, + "learning_rate": 8.956260198539637e-06, + "loss": 0.005, + "step": 89960 + }, + { + "epoch": 0.5770396333935152, + "grad_norm": 0.011206181719899178, + "learning_rate": 8.955917920649672e-06, + "loss": 0.0019, + "step": 89970 + }, + { + "epoch": 0.5771037702873012, + "grad_norm": 0.22010645270347595, + "learning_rate": 8.955575593189227e-06, + "loss": 0.0032, + "step": 89980 + }, + { + "epoch": 0.5771679071810873, + "grad_norm": 0.1770000010728836, + "learning_rate": 8.955233216162594e-06, + "loss": 0.0028, + "step": 89990 + }, + { + "epoch": 0.5772320440748734, + "grad_norm": 0.18463435769081116, + "learning_rate": 8.95489078957406e-06, + "loss": 0.0037, + "step": 90000 + }, + { + "epoch": 0.5772961809686595, + "grad_norm": 0.11717001348733902, + "learning_rate": 8.95454831342792e-06, + "loss": 0.0019, + "step": 90010 + }, + { + "epoch": 0.5773603178624456, + "grad_norm": 0.06984009593725204, + "learning_rate": 8.954205787728462e-06, + "loss": 0.0019, + "step": 90020 + }, + { + "epoch": 0.5774244547562317, + "grad_norm": 0.38813281059265137, + "learning_rate": 8.95386321247998e-06, + "loss": 0.0047, + "step": 90030 + }, + { + "epoch": 0.5774885916500178, + "grad_norm": 0.15131741762161255, + "learning_rate": 8.953520587686766e-06, + "loss": 0.0045, + "step": 90040 + }, + { + "epoch": 0.5775527285438039, + "grad_norm": 0.05685955658555031, + "learning_rate": 8.953177913353113e-06, + "loss": 0.0025, + "step": 90050 + }, + { + "epoch": 0.5776168654375899, + "grad_norm": 0.3332849144935608, + "learning_rate": 8.952835189483316e-06, + "loss": 0.0033, + "step": 90060 + }, + { + "epoch": 0.5776810023313761, + "grad_norm": 0.15496273338794708, + "learning_rate": 8.95249241608167e-06, + "loss": 0.0024, + "step": 90070 + }, + { + "epoch": 0.5777451392251622, + "grad_norm": 0.20683197677135468, + "learning_rate": 8.952149593152468e-06, + "loss": 0.0025, + "step": 90080 + }, + { + "epoch": 0.5778092761189483, + "grad_norm": 0.1645357310771942, + "learning_rate": 8.951806720700007e-06, + "loss": 0.004, + "step": 90090 + }, + { + "epoch": 0.5778734130127344, + "grad_norm": 0.2908009886741638, + "learning_rate": 8.951463798728584e-06, + "loss": 0.0045, + "step": 90100 + }, + { + "epoch": 0.5779375499065205, + "grad_norm": 0.13992683589458466, + "learning_rate": 8.951120827242495e-06, + "loss": 0.0031, + "step": 90110 + }, + { + "epoch": 0.5780016868003066, + "grad_norm": 0.06215394660830498, + "learning_rate": 8.950777806246039e-06, + "loss": 0.0014, + "step": 90120 + }, + { + "epoch": 0.5780658236940927, + "grad_norm": 0.04137173667550087, + "learning_rate": 8.950434735743512e-06, + "loss": 0.0029, + "step": 90130 + }, + { + "epoch": 0.5781299605878788, + "grad_norm": 0.7353517413139343, + "learning_rate": 8.950091615739217e-06, + "loss": 0.0036, + "step": 90140 + }, + { + "epoch": 0.5781940974816648, + "grad_norm": 0.17141081392765045, + "learning_rate": 8.949748446237448e-06, + "loss": 0.0041, + "step": 90150 + }, + { + "epoch": 0.578258234375451, + "grad_norm": 0.2574852406978607, + "learning_rate": 8.949405227242509e-06, + "loss": 0.0045, + "step": 90160 + }, + { + "epoch": 0.578322371269237, + "grad_norm": 0.10717305541038513, + "learning_rate": 8.9490619587587e-06, + "loss": 0.003, + "step": 90170 + }, + { + "epoch": 0.5783865081630232, + "grad_norm": 0.14781375229358673, + "learning_rate": 8.948718640790323e-06, + "loss": 0.0037, + "step": 90180 + }, + { + "epoch": 0.5784506450568092, + "grad_norm": 0.21729065477848053, + "learning_rate": 8.948375273341681e-06, + "loss": 0.0032, + "step": 90190 + }, + { + "epoch": 0.5785147819505954, + "grad_norm": 0.3163394629955292, + "learning_rate": 8.948031856417072e-06, + "loss": 0.0025, + "step": 90200 + }, + { + "epoch": 0.5785789188443814, + "grad_norm": 0.3094060719013214, + "learning_rate": 8.947688390020803e-06, + "loss": 0.0148, + "step": 90210 + }, + { + "epoch": 0.5786430557381675, + "grad_norm": 0.12238597124814987, + "learning_rate": 8.947344874157179e-06, + "loss": 0.0059, + "step": 90220 + }, + { + "epoch": 0.5787071926319537, + "grad_norm": 0.12434793263673782, + "learning_rate": 8.9470013088305e-06, + "loss": 0.0033, + "step": 90230 + }, + { + "epoch": 0.5787713295257397, + "grad_norm": 0.07099161297082901, + "learning_rate": 8.946657694045074e-06, + "loss": 0.003, + "step": 90240 + }, + { + "epoch": 0.5788354664195259, + "grad_norm": 0.336298406124115, + "learning_rate": 8.946314029805208e-06, + "loss": 0.0028, + "step": 90250 + }, + { + "epoch": 0.5788996033133119, + "grad_norm": 0.6149856448173523, + "learning_rate": 8.945970316115205e-06, + "loss": 0.004, + "step": 90260 + }, + { + "epoch": 0.5789637402070981, + "grad_norm": 0.02784036658704281, + "learning_rate": 8.945626552979377e-06, + "loss": 0.0032, + "step": 90270 + }, + { + "epoch": 0.5790278771008841, + "grad_norm": 0.12195788323879242, + "learning_rate": 8.945282740402024e-06, + "loss": 0.0042, + "step": 90280 + }, + { + "epoch": 0.5790920139946703, + "grad_norm": 0.16925176978111267, + "learning_rate": 8.944938878387461e-06, + "loss": 0.0042, + "step": 90290 + }, + { + "epoch": 0.5791561508884563, + "grad_norm": 0.16154712438583374, + "learning_rate": 8.944594966939994e-06, + "loss": 0.0053, + "step": 90300 + }, + { + "epoch": 0.5792202877822424, + "grad_norm": 0.07512946426868439, + "learning_rate": 8.944251006063934e-06, + "loss": 0.0022, + "step": 90310 + }, + { + "epoch": 0.5792844246760285, + "grad_norm": 0.18358130753040314, + "learning_rate": 8.94390699576359e-06, + "loss": 0.0047, + "step": 90320 + }, + { + "epoch": 0.5793485615698146, + "grad_norm": 0.010401815176010132, + "learning_rate": 8.94356293604327e-06, + "loss": 0.0022, + "step": 90330 + }, + { + "epoch": 0.5794126984636007, + "grad_norm": 0.1601855605840683, + "learning_rate": 8.94321882690729e-06, + "loss": 0.0028, + "step": 90340 + }, + { + "epoch": 0.5794768353573868, + "grad_norm": 0.12499598413705826, + "learning_rate": 8.94287466835996e-06, + "loss": 0.0026, + "step": 90350 + }, + { + "epoch": 0.579540972251173, + "grad_norm": 0.0920039489865303, + "learning_rate": 8.942530460405592e-06, + "loss": 0.0033, + "step": 90360 + }, + { + "epoch": 0.579605109144959, + "grad_norm": 0.0942494347691536, + "learning_rate": 8.942186203048499e-06, + "loss": 0.0033, + "step": 90370 + }, + { + "epoch": 0.5796692460387451, + "grad_norm": 0.19644950330257416, + "learning_rate": 8.941841896292997e-06, + "loss": 0.0051, + "step": 90380 + }, + { + "epoch": 0.5797333829325312, + "grad_norm": 0.15651078522205353, + "learning_rate": 8.941497540143397e-06, + "loss": 0.0031, + "step": 90390 + }, + { + "epoch": 0.5797975198263173, + "grad_norm": 0.2172597348690033, + "learning_rate": 8.941153134604018e-06, + "loss": 0.0025, + "step": 90400 + }, + { + "epoch": 0.5798616567201034, + "grad_norm": 0.17114651203155518, + "learning_rate": 8.940808679679172e-06, + "loss": 0.0039, + "step": 90410 + }, + { + "epoch": 0.5799257936138895, + "grad_norm": 0.15437696874141693, + "learning_rate": 8.940464175373178e-06, + "loss": 0.0041, + "step": 90420 + }, + { + "epoch": 0.5799899305076756, + "grad_norm": 0.39087653160095215, + "learning_rate": 8.940119621690351e-06, + "loss": 0.0044, + "step": 90430 + }, + { + "epoch": 0.5800540674014617, + "grad_norm": 0.1864572912454605, + "learning_rate": 8.939775018635008e-06, + "loss": 0.0024, + "step": 90440 + }, + { + "epoch": 0.5801182042952477, + "grad_norm": 0.10369669646024704, + "learning_rate": 8.93943036621147e-06, + "loss": 0.005, + "step": 90450 + }, + { + "epoch": 0.5801823411890339, + "grad_norm": 0.09310199320316315, + "learning_rate": 8.939085664424055e-06, + "loss": 0.0024, + "step": 90460 + }, + { + "epoch": 0.5802464780828199, + "grad_norm": 0.046526405960321426, + "learning_rate": 8.938740913277079e-06, + "loss": 0.0027, + "step": 90470 + }, + { + "epoch": 0.5803106149766061, + "grad_norm": 0.14390631020069122, + "learning_rate": 8.938396112774866e-06, + "loss": 0.0028, + "step": 90480 + }, + { + "epoch": 0.5803747518703921, + "grad_norm": 0.13936559855937958, + "learning_rate": 8.938051262921735e-06, + "loss": 0.0033, + "step": 90490 + }, + { + "epoch": 0.5804388887641783, + "grad_norm": 0.05774078145623207, + "learning_rate": 8.937706363722004e-06, + "loss": 0.0033, + "step": 90500 + }, + { + "epoch": 0.5805030256579644, + "grad_norm": 0.05919841304421425, + "learning_rate": 8.937361415180001e-06, + "loss": 0.0047, + "step": 90510 + }, + { + "epoch": 0.5805671625517504, + "grad_norm": 0.3022819757461548, + "learning_rate": 8.937016417300046e-06, + "loss": 0.0021, + "step": 90520 + }, + { + "epoch": 0.5806312994455366, + "grad_norm": 0.2363024353981018, + "learning_rate": 8.93667137008646e-06, + "loss": 0.0028, + "step": 90530 + }, + { + "epoch": 0.5806954363393226, + "grad_norm": 0.3508833348751068, + "learning_rate": 8.93632627354357e-06, + "loss": 0.0029, + "step": 90540 + }, + { + "epoch": 0.5807595732331088, + "grad_norm": 0.1320483237504959, + "learning_rate": 8.935981127675695e-06, + "loss": 0.0022, + "step": 90550 + }, + { + "epoch": 0.5808237101268948, + "grad_norm": 0.2474808692932129, + "learning_rate": 8.935635932487166e-06, + "loss": 0.0037, + "step": 90560 + }, + { + "epoch": 0.580887847020681, + "grad_norm": 0.08279784768819809, + "learning_rate": 8.935290687982306e-06, + "loss": 0.0039, + "step": 90570 + }, + { + "epoch": 0.580951983914467, + "grad_norm": 0.11578167974948883, + "learning_rate": 8.934945394165442e-06, + "loss": 0.0028, + "step": 90580 + }, + { + "epoch": 0.5810161208082532, + "grad_norm": 0.5153977870941162, + "learning_rate": 8.934600051040898e-06, + "loss": 0.0047, + "step": 90590 + }, + { + "epoch": 0.5810802577020392, + "grad_norm": 0.15942715108394623, + "learning_rate": 8.934254658613003e-06, + "loss": 0.0066, + "step": 90600 + }, + { + "epoch": 0.5811443945958253, + "grad_norm": 0.10940330475568771, + "learning_rate": 8.933909216886087e-06, + "loss": 0.0028, + "step": 90610 + }, + { + "epoch": 0.5812085314896114, + "grad_norm": 0.3347488045692444, + "learning_rate": 8.933563725864478e-06, + "loss": 0.004, + "step": 90620 + }, + { + "epoch": 0.5812726683833975, + "grad_norm": 0.15730610489845276, + "learning_rate": 8.933218185552503e-06, + "loss": 0.003, + "step": 90630 + }, + { + "epoch": 0.5813368052771836, + "grad_norm": 0.18061256408691406, + "learning_rate": 8.932872595954493e-06, + "loss": 0.0024, + "step": 90640 + }, + { + "epoch": 0.5814009421709697, + "grad_norm": 0.09252581745386124, + "learning_rate": 8.93252695707478e-06, + "loss": 0.0025, + "step": 90650 + }, + { + "epoch": 0.5814650790647559, + "grad_norm": 0.24369707703590393, + "learning_rate": 8.93218126891769e-06, + "loss": 0.0031, + "step": 90660 + }, + { + "epoch": 0.5815292159585419, + "grad_norm": 0.2268710881471634, + "learning_rate": 8.93183553148756e-06, + "loss": 0.0036, + "step": 90670 + }, + { + "epoch": 0.581593352852328, + "grad_norm": 0.1491227000951767, + "learning_rate": 8.931489744788722e-06, + "loss": 0.0018, + "step": 90680 + }, + { + "epoch": 0.5816574897461141, + "grad_norm": 0.18808454275131226, + "learning_rate": 8.931143908825508e-06, + "loss": 0.0043, + "step": 90690 + }, + { + "epoch": 0.5817216266399002, + "grad_norm": 0.10794474184513092, + "learning_rate": 8.93079802360225e-06, + "loss": 0.004, + "step": 90700 + }, + { + "epoch": 0.5817857635336863, + "grad_norm": 0.14503294229507446, + "learning_rate": 8.930452089123283e-06, + "loss": 0.0026, + "step": 90710 + }, + { + "epoch": 0.5818499004274724, + "grad_norm": 0.10027197748422623, + "learning_rate": 8.930106105392944e-06, + "loss": 0.0011, + "step": 90720 + }, + { + "epoch": 0.5819140373212585, + "grad_norm": 0.057823918759822845, + "learning_rate": 8.929760072415565e-06, + "loss": 0.0047, + "step": 90730 + }, + { + "epoch": 0.5819781742150446, + "grad_norm": 0.19504736363887787, + "learning_rate": 8.929413990195485e-06, + "loss": 0.0031, + "step": 90740 + }, + { + "epoch": 0.5820423111088306, + "grad_norm": 0.17649537324905396, + "learning_rate": 8.929067858737039e-06, + "loss": 0.0029, + "step": 90750 + }, + { + "epoch": 0.5821064480026168, + "grad_norm": 0.05666594207286835, + "learning_rate": 8.928721678044564e-06, + "loss": 0.0032, + "step": 90760 + }, + { + "epoch": 0.5821705848964028, + "grad_norm": 0.14649075269699097, + "learning_rate": 8.928375448122399e-06, + "loss": 0.0024, + "step": 90770 + }, + { + "epoch": 0.582234721790189, + "grad_norm": 0.26468437910079956, + "learning_rate": 8.928029168974881e-06, + "loss": 0.0029, + "step": 90780 + }, + { + "epoch": 0.5822988586839751, + "grad_norm": 0.07763198018074036, + "learning_rate": 8.927682840606352e-06, + "loss": 0.0028, + "step": 90790 + }, + { + "epoch": 0.5823629955777612, + "grad_norm": 0.014735487289726734, + "learning_rate": 8.92733646302115e-06, + "loss": 0.0032, + "step": 90800 + }, + { + "epoch": 0.5824271324715473, + "grad_norm": 0.26133033633232117, + "learning_rate": 8.926990036223615e-06, + "loss": 0.0033, + "step": 90810 + }, + { + "epoch": 0.5824912693653334, + "grad_norm": 0.15196596086025238, + "learning_rate": 8.926643560218087e-06, + "loss": 0.0029, + "step": 90820 + }, + { + "epoch": 0.5825554062591195, + "grad_norm": 0.14441141486167908, + "learning_rate": 8.92629703500891e-06, + "loss": 0.002, + "step": 90830 + }, + { + "epoch": 0.5826195431529055, + "grad_norm": 0.2370399534702301, + "learning_rate": 8.925950460600425e-06, + "loss": 0.0029, + "step": 90840 + }, + { + "epoch": 0.5826836800466917, + "grad_norm": 0.5152098536491394, + "learning_rate": 8.925603836996975e-06, + "loss": 0.0022, + "step": 90850 + }, + { + "epoch": 0.5827478169404777, + "grad_norm": 0.29338768124580383, + "learning_rate": 8.925257164202903e-06, + "loss": 0.002, + "step": 90860 + }, + { + "epoch": 0.5828119538342639, + "grad_norm": 0.05307412147521973, + "learning_rate": 8.924910442222555e-06, + "loss": 0.0031, + "step": 90870 + }, + { + "epoch": 0.5828760907280499, + "grad_norm": 0.16998408734798431, + "learning_rate": 8.924563671060272e-06, + "loss": 0.0034, + "step": 90880 + }, + { + "epoch": 0.5829402276218361, + "grad_norm": 0.1047939583659172, + "learning_rate": 8.9242168507204e-06, + "loss": 0.0013, + "step": 90890 + }, + { + "epoch": 0.5830043645156221, + "grad_norm": 0.09160842001438141, + "learning_rate": 8.923869981207289e-06, + "loss": 0.0037, + "step": 90900 + }, + { + "epoch": 0.5830685014094082, + "grad_norm": 0.10264736413955688, + "learning_rate": 8.923523062525282e-06, + "loss": 0.0039, + "step": 90910 + }, + { + "epoch": 0.5831326383031943, + "grad_norm": 0.05445673316717148, + "learning_rate": 8.923176094678726e-06, + "loss": 0.0043, + "step": 90920 + }, + { + "epoch": 0.5831967751969804, + "grad_norm": 0.06704435497522354, + "learning_rate": 8.92282907767197e-06, + "loss": 0.0055, + "step": 90930 + }, + { + "epoch": 0.5832609120907666, + "grad_norm": 0.03559799864888191, + "learning_rate": 8.922482011509364e-06, + "loss": 0.006, + "step": 90940 + }, + { + "epoch": 0.5833250489845526, + "grad_norm": 0.11612100899219513, + "learning_rate": 8.922134896195253e-06, + "loss": 0.0044, + "step": 90950 + }, + { + "epoch": 0.5833891858783388, + "grad_norm": 0.013056891039013863, + "learning_rate": 8.92178773173399e-06, + "loss": 0.0027, + "step": 90960 + }, + { + "epoch": 0.5834533227721248, + "grad_norm": 0.06981601566076279, + "learning_rate": 8.921440518129922e-06, + "loss": 0.0033, + "step": 90970 + }, + { + "epoch": 0.583517459665911, + "grad_norm": 0.11842454969882965, + "learning_rate": 8.921093255387402e-06, + "loss": 0.0028, + "step": 90980 + }, + { + "epoch": 0.583581596559697, + "grad_norm": 0.35216161608695984, + "learning_rate": 8.920745943510783e-06, + "loss": 0.0026, + "step": 90990 + }, + { + "epoch": 0.5836457334534831, + "grad_norm": 0.05675550177693367, + "learning_rate": 8.920398582504415e-06, + "loss": 0.0057, + "step": 91000 + }, + { + "epoch": 0.5837098703472692, + "grad_norm": 0.09108414500951767, + "learning_rate": 8.92005117237265e-06, + "loss": 0.0025, + "step": 91010 + }, + { + "epoch": 0.5837740072410553, + "grad_norm": 0.15295842289924622, + "learning_rate": 8.919703713119842e-06, + "loss": 0.0018, + "step": 91020 + }, + { + "epoch": 0.5838381441348414, + "grad_norm": 0.16002964973449707, + "learning_rate": 8.919356204750346e-06, + "loss": 0.0039, + "step": 91030 + }, + { + "epoch": 0.5839022810286275, + "grad_norm": 0.3844493329524994, + "learning_rate": 8.919008647268515e-06, + "loss": 0.0032, + "step": 91040 + }, + { + "epoch": 0.5839664179224135, + "grad_norm": 0.11783535778522491, + "learning_rate": 8.918661040678705e-06, + "loss": 0.002, + "step": 91050 + }, + { + "epoch": 0.5840305548161997, + "grad_norm": 0.3455294072628021, + "learning_rate": 8.918313384985271e-06, + "loss": 0.0041, + "step": 91060 + }, + { + "epoch": 0.5840946917099858, + "grad_norm": 0.05667266622185707, + "learning_rate": 8.91796568019257e-06, + "loss": 0.0031, + "step": 91070 + }, + { + "epoch": 0.5841588286037719, + "grad_norm": 0.05197868496179581, + "learning_rate": 8.917617926304957e-06, + "loss": 0.0061, + "step": 91080 + }, + { + "epoch": 0.584222965497558, + "grad_norm": 0.16658474504947662, + "learning_rate": 8.917270123326796e-06, + "loss": 0.0029, + "step": 91090 + }, + { + "epoch": 0.5842871023913441, + "grad_norm": 0.4391426146030426, + "learning_rate": 8.916922271262438e-06, + "loss": 0.0029, + "step": 91100 + }, + { + "epoch": 0.5843512392851302, + "grad_norm": 0.16311413049697876, + "learning_rate": 8.916574370116245e-06, + "loss": 0.0031, + "step": 91110 + }, + { + "epoch": 0.5844153761789163, + "grad_norm": 0.2503029704093933, + "learning_rate": 8.916226419892576e-06, + "loss": 0.0023, + "step": 91120 + }, + { + "epoch": 0.5844795130727024, + "grad_norm": 0.040789201855659485, + "learning_rate": 8.91587842059579e-06, + "loss": 0.004, + "step": 91130 + }, + { + "epoch": 0.5845436499664884, + "grad_norm": 0.5060019493103027, + "learning_rate": 8.91553037223025e-06, + "loss": 0.0086, + "step": 91140 + }, + { + "epoch": 0.5846077868602746, + "grad_norm": 0.13935501873493195, + "learning_rate": 8.915182274800315e-06, + "loss": 0.0026, + "step": 91150 + }, + { + "epoch": 0.5846719237540606, + "grad_norm": 0.2682758867740631, + "learning_rate": 8.91483412831035e-06, + "loss": 0.0029, + "step": 91160 + }, + { + "epoch": 0.5847360606478468, + "grad_norm": 0.2726670205593109, + "learning_rate": 8.914485932764714e-06, + "loss": 0.0034, + "step": 91170 + }, + { + "epoch": 0.5848001975416328, + "grad_norm": 0.1472742110490799, + "learning_rate": 8.914137688167772e-06, + "loss": 0.0041, + "step": 91180 + }, + { + "epoch": 0.584864334435419, + "grad_norm": 0.21696236729621887, + "learning_rate": 8.913789394523887e-06, + "loss": 0.0037, + "step": 91190 + }, + { + "epoch": 0.584928471329205, + "grad_norm": 0.16352948546409607, + "learning_rate": 8.913441051837424e-06, + "loss": 0.0038, + "step": 91200 + }, + { + "epoch": 0.5849926082229912, + "grad_norm": 0.1005765050649643, + "learning_rate": 8.913092660112748e-06, + "loss": 0.0048, + "step": 91210 + }, + { + "epoch": 0.5850567451167773, + "grad_norm": 0.1923978626728058, + "learning_rate": 8.912744219354224e-06, + "loss": 0.004, + "step": 91220 + }, + { + "epoch": 0.5851208820105633, + "grad_norm": 0.054473694413900375, + "learning_rate": 8.912395729566219e-06, + "loss": 0.0029, + "step": 91230 + }, + { + "epoch": 0.5851850189043495, + "grad_norm": 0.10405640304088593, + "learning_rate": 8.912047190753098e-06, + "loss": 0.0032, + "step": 91240 + }, + { + "epoch": 0.5852491557981355, + "grad_norm": 0.1484929919242859, + "learning_rate": 8.91169860291923e-06, + "loss": 0.0041, + "step": 91250 + }, + { + "epoch": 0.5853132926919217, + "grad_norm": 0.22620069980621338, + "learning_rate": 8.911349966068986e-06, + "loss": 0.0049, + "step": 91260 + }, + { + "epoch": 0.5853774295857077, + "grad_norm": 0.18138428032398224, + "learning_rate": 8.911001280206728e-06, + "loss": 0.0035, + "step": 91270 + }, + { + "epoch": 0.5854415664794939, + "grad_norm": 0.09323873370885849, + "learning_rate": 8.910652545336828e-06, + "loss": 0.0024, + "step": 91280 + }, + { + "epoch": 0.5855057033732799, + "grad_norm": 0.21446284651756287, + "learning_rate": 8.91030376146366e-06, + "loss": 0.0041, + "step": 91290 + }, + { + "epoch": 0.585569840267066, + "grad_norm": 0.07625725865364075, + "learning_rate": 8.90995492859159e-06, + "loss": 0.0023, + "step": 91300 + }, + { + "epoch": 0.5856339771608521, + "grad_norm": 0.15408417582511902, + "learning_rate": 8.90960604672499e-06, + "loss": 0.0026, + "step": 91310 + }, + { + "epoch": 0.5856981140546382, + "grad_norm": 0.14646489918231964, + "learning_rate": 8.909257115868232e-06, + "loss": 0.0021, + "step": 91320 + }, + { + "epoch": 0.5857622509484243, + "grad_norm": 0.08991856873035431, + "learning_rate": 8.908908136025689e-06, + "loss": 0.0018, + "step": 91330 + }, + { + "epoch": 0.5858263878422104, + "grad_norm": 0.39072686433792114, + "learning_rate": 8.908559107201732e-06, + "loss": 0.0033, + "step": 91340 + }, + { + "epoch": 0.5858905247359966, + "grad_norm": 0.08406838774681091, + "learning_rate": 8.908210029400738e-06, + "loss": 0.0033, + "step": 91350 + }, + { + "epoch": 0.5859546616297826, + "grad_norm": 0.07078109681606293, + "learning_rate": 8.90786090262708e-06, + "loss": 0.0032, + "step": 91360 + }, + { + "epoch": 0.5860187985235688, + "grad_norm": 0.20980200171470642, + "learning_rate": 8.90751172688513e-06, + "loss": 0.0044, + "step": 91370 + }, + { + "epoch": 0.5860829354173548, + "grad_norm": 0.13432657718658447, + "learning_rate": 8.907162502179266e-06, + "loss": 0.0028, + "step": 91380 + }, + { + "epoch": 0.5861470723111409, + "grad_norm": 0.1478530764579773, + "learning_rate": 8.906813228513863e-06, + "loss": 0.0033, + "step": 91390 + }, + { + "epoch": 0.586211209204927, + "grad_norm": 0.3381941020488739, + "learning_rate": 8.906463905893296e-06, + "loss": 0.0037, + "step": 91400 + }, + { + "epoch": 0.5862753460987131, + "grad_norm": 0.22154425084590912, + "learning_rate": 8.906114534321948e-06, + "loss": 0.0034, + "step": 91410 + }, + { + "epoch": 0.5863394829924992, + "grad_norm": 0.04053284227848053, + "learning_rate": 8.90576511380419e-06, + "loss": 0.0048, + "step": 91420 + }, + { + "epoch": 0.5864036198862853, + "grad_norm": 0.11504218727350235, + "learning_rate": 8.905415644344406e-06, + "loss": 0.0025, + "step": 91430 + }, + { + "epoch": 0.5864677567800713, + "grad_norm": 0.2638290226459503, + "learning_rate": 8.905066125946973e-06, + "loss": 0.0033, + "step": 91440 + }, + { + "epoch": 0.5865318936738575, + "grad_norm": 0.12543456256389618, + "learning_rate": 8.904716558616269e-06, + "loss": 0.0031, + "step": 91450 + }, + { + "epoch": 0.5865960305676435, + "grad_norm": 0.1517900973558426, + "learning_rate": 8.904366942356677e-06, + "loss": 0.0029, + "step": 91460 + }, + { + "epoch": 0.5866601674614297, + "grad_norm": 0.2582097351551056, + "learning_rate": 8.904017277172577e-06, + "loss": 0.0036, + "step": 91470 + }, + { + "epoch": 0.5867243043552157, + "grad_norm": 0.12383686751127243, + "learning_rate": 8.90366756306835e-06, + "loss": 0.0031, + "step": 91480 + }, + { + "epoch": 0.5867884412490019, + "grad_norm": 0.08443358540534973, + "learning_rate": 8.903317800048378e-06, + "loss": 0.0058, + "step": 91490 + }, + { + "epoch": 0.586852578142788, + "grad_norm": 0.23556144535541534, + "learning_rate": 8.902967988117044e-06, + "loss": 0.003, + "step": 91500 + }, + { + "epoch": 0.586916715036574, + "grad_norm": 0.09631785750389099, + "learning_rate": 8.902618127278733e-06, + "loss": 0.0031, + "step": 91510 + }, + { + "epoch": 0.5869808519303602, + "grad_norm": 0.08299261331558228, + "learning_rate": 8.902268217537827e-06, + "loss": 0.0019, + "step": 91520 + }, + { + "epoch": 0.5870449888241462, + "grad_norm": 0.0696060061454773, + "learning_rate": 8.901918258898711e-06, + "loss": 0.0038, + "step": 91530 + }, + { + "epoch": 0.5871091257179324, + "grad_norm": 0.22105728089809418, + "learning_rate": 8.90156825136577e-06, + "loss": 0.0049, + "step": 91540 + }, + { + "epoch": 0.5871732626117184, + "grad_norm": 0.1963120400905609, + "learning_rate": 8.901218194943392e-06, + "loss": 0.002, + "step": 91550 + }, + { + "epoch": 0.5872373995055046, + "grad_norm": 0.1863541156053543, + "learning_rate": 8.900868089635963e-06, + "loss": 0.0033, + "step": 91560 + }, + { + "epoch": 0.5873015363992906, + "grad_norm": 0.16078346967697144, + "learning_rate": 8.900517935447866e-06, + "loss": 0.0028, + "step": 91570 + }, + { + "epoch": 0.5873656732930768, + "grad_norm": 0.13000799715518951, + "learning_rate": 8.900167732383494e-06, + "loss": 0.0032, + "step": 91580 + }, + { + "epoch": 0.5874298101868628, + "grad_norm": 0.3557916581630707, + "learning_rate": 8.89981748044723e-06, + "loss": 0.005, + "step": 91590 + }, + { + "epoch": 0.587493947080649, + "grad_norm": 0.1393110156059265, + "learning_rate": 8.899467179643469e-06, + "loss": 0.0046, + "step": 91600 + }, + { + "epoch": 0.587558083974435, + "grad_norm": 0.15164828300476074, + "learning_rate": 8.899116829976595e-06, + "loss": 0.0045, + "step": 91610 + }, + { + "epoch": 0.5876222208682211, + "grad_norm": 0.14568741619586945, + "learning_rate": 8.898766431451001e-06, + "loss": 0.0043, + "step": 91620 + }, + { + "epoch": 0.5876863577620073, + "grad_norm": 0.058165088295936584, + "learning_rate": 8.898415984071078e-06, + "loss": 0.0026, + "step": 91630 + }, + { + "epoch": 0.5877504946557933, + "grad_norm": 0.08098440617322922, + "learning_rate": 8.898065487841216e-06, + "loss": 0.002, + "step": 91640 + }, + { + "epoch": 0.5878146315495795, + "grad_norm": 0.021908633410930634, + "learning_rate": 8.897714942765806e-06, + "loss": 0.0019, + "step": 91650 + }, + { + "epoch": 0.5878787684433655, + "grad_norm": 0.07424472272396088, + "learning_rate": 8.897364348849244e-06, + "loss": 0.0057, + "step": 91660 + }, + { + "epoch": 0.5879429053371517, + "grad_norm": 0.39349886775016785, + "learning_rate": 8.897013706095921e-06, + "loss": 0.004, + "step": 91670 + }, + { + "epoch": 0.5880070422309377, + "grad_norm": 0.14584460854530334, + "learning_rate": 8.896663014510231e-06, + "loss": 0.0022, + "step": 91680 + }, + { + "epoch": 0.5880711791247238, + "grad_norm": 0.18785099685192108, + "learning_rate": 8.89631227409657e-06, + "loss": 0.004, + "step": 91690 + }, + { + "epoch": 0.5881353160185099, + "grad_norm": 0.1897226721048355, + "learning_rate": 8.89596148485933e-06, + "loss": 0.004, + "step": 91700 + }, + { + "epoch": 0.588199452912296, + "grad_norm": 0.0959388017654419, + "learning_rate": 8.895610646802907e-06, + "loss": 0.0049, + "step": 91710 + }, + { + "epoch": 0.5882635898060821, + "grad_norm": 0.04107668995857239, + "learning_rate": 8.895259759931701e-06, + "loss": 0.0038, + "step": 91720 + }, + { + "epoch": 0.5883277266998682, + "grad_norm": 0.30913951992988586, + "learning_rate": 8.894908824250106e-06, + "loss": 0.0046, + "step": 91730 + }, + { + "epoch": 0.5883918635936543, + "grad_norm": 0.13641460239887238, + "learning_rate": 8.894557839762518e-06, + "loss": 0.0043, + "step": 91740 + }, + { + "epoch": 0.5884560004874404, + "grad_norm": 0.291328489780426, + "learning_rate": 8.894206806473337e-06, + "loss": 0.0027, + "step": 91750 + }, + { + "epoch": 0.5885201373812264, + "grad_norm": 0.14278985559940338, + "learning_rate": 8.893855724386964e-06, + "loss": 0.0038, + "step": 91760 + }, + { + "epoch": 0.5885842742750126, + "grad_norm": 0.17391382157802582, + "learning_rate": 8.893504593507793e-06, + "loss": 0.0025, + "step": 91770 + }, + { + "epoch": 0.5886484111687987, + "grad_norm": 0.10660814493894577, + "learning_rate": 8.893153413840228e-06, + "loss": 0.003, + "step": 91780 + }, + { + "epoch": 0.5887125480625848, + "grad_norm": 0.15488052368164062, + "learning_rate": 8.892802185388669e-06, + "loss": 0.0062, + "step": 91790 + }, + { + "epoch": 0.5887766849563709, + "grad_norm": 0.02289946936070919, + "learning_rate": 8.892450908157514e-06, + "loss": 0.0062, + "step": 91800 + }, + { + "epoch": 0.588840821850157, + "grad_norm": 0.03142036497592926, + "learning_rate": 8.89209958215117e-06, + "loss": 0.0041, + "step": 91810 + }, + { + "epoch": 0.5889049587439431, + "grad_norm": 0.042830560356378555, + "learning_rate": 8.891748207374036e-06, + "loss": 0.0032, + "step": 91820 + }, + { + "epoch": 0.5889690956377291, + "grad_norm": 0.1880921572446823, + "learning_rate": 8.891396783830515e-06, + "loss": 0.0053, + "step": 91830 + }, + { + "epoch": 0.5890332325315153, + "grad_norm": 0.33972400426864624, + "learning_rate": 8.891045311525011e-06, + "loss": 0.0017, + "step": 91840 + }, + { + "epoch": 0.5890973694253013, + "grad_norm": 0.2004566192626953, + "learning_rate": 8.89069379046193e-06, + "loss": 0.0024, + "step": 91850 + }, + { + "epoch": 0.5891615063190875, + "grad_norm": 0.12435439974069595, + "learning_rate": 8.890342220645674e-06, + "loss": 0.0032, + "step": 91860 + }, + { + "epoch": 0.5892256432128735, + "grad_norm": 0.22347719967365265, + "learning_rate": 8.889990602080649e-06, + "loss": 0.0048, + "step": 91870 + }, + { + "epoch": 0.5892897801066597, + "grad_norm": 0.15794594585895538, + "learning_rate": 8.889638934771262e-06, + "loss": 0.0028, + "step": 91880 + }, + { + "epoch": 0.5893539170004457, + "grad_norm": 0.16316534578800201, + "learning_rate": 8.889287218721921e-06, + "loss": 0.0037, + "step": 91890 + }, + { + "epoch": 0.5894180538942319, + "grad_norm": 0.23359239101409912, + "learning_rate": 8.888935453937028e-06, + "loss": 0.0043, + "step": 91900 + }, + { + "epoch": 0.5894821907880179, + "grad_norm": 0.25535106658935547, + "learning_rate": 8.888583640420996e-06, + "loss": 0.0039, + "step": 91910 + }, + { + "epoch": 0.589546327681804, + "grad_norm": 0.3038575351238251, + "learning_rate": 8.888231778178234e-06, + "loss": 0.0034, + "step": 91920 + }, + { + "epoch": 0.5896104645755902, + "grad_norm": 0.21951676905155182, + "learning_rate": 8.887879867213146e-06, + "loss": 0.0034, + "step": 91930 + }, + { + "epoch": 0.5896746014693762, + "grad_norm": 0.1366313397884369, + "learning_rate": 8.887527907530146e-06, + "loss": 0.0056, + "step": 91940 + }, + { + "epoch": 0.5897387383631624, + "grad_norm": 0.12945735454559326, + "learning_rate": 8.887175899133642e-06, + "loss": 0.0023, + "step": 91950 + }, + { + "epoch": 0.5898028752569484, + "grad_norm": 0.21439526975154877, + "learning_rate": 8.886823842028047e-06, + "loss": 0.0059, + "step": 91960 + }, + { + "epoch": 0.5898670121507346, + "grad_norm": 0.2983899712562561, + "learning_rate": 8.88647173621777e-06, + "loss": 0.0026, + "step": 91970 + }, + { + "epoch": 0.5899311490445206, + "grad_norm": 0.05508747324347496, + "learning_rate": 8.886119581707227e-06, + "loss": 0.0022, + "step": 91980 + }, + { + "epoch": 0.5899952859383067, + "grad_norm": 0.45103803277015686, + "learning_rate": 8.885767378500827e-06, + "loss": 0.0069, + "step": 91990 + }, + { + "epoch": 0.5900594228320928, + "grad_norm": 0.07513292878866196, + "learning_rate": 8.885415126602983e-06, + "loss": 0.0045, + "step": 92000 + }, + { + "epoch": 0.5901235597258789, + "grad_norm": 0.2919937074184418, + "learning_rate": 8.88506282601811e-06, + "loss": 0.0027, + "step": 92010 + }, + { + "epoch": 0.590187696619665, + "grad_norm": 0.13594791293144226, + "learning_rate": 8.884710476750628e-06, + "loss": 0.0015, + "step": 92020 + }, + { + "epoch": 0.5902518335134511, + "grad_norm": 0.10354795306921005, + "learning_rate": 8.884358078804944e-06, + "loss": 0.0038, + "step": 92030 + }, + { + "epoch": 0.5903159704072372, + "grad_norm": 0.20591247081756592, + "learning_rate": 8.884005632185477e-06, + "loss": 0.0019, + "step": 92040 + }, + { + "epoch": 0.5903801073010233, + "grad_norm": 0.19913780689239502, + "learning_rate": 8.883653136896644e-06, + "loss": 0.0037, + "step": 92050 + }, + { + "epoch": 0.5904442441948095, + "grad_norm": 0.15183718502521515, + "learning_rate": 8.883300592942863e-06, + "loss": 0.0027, + "step": 92060 + }, + { + "epoch": 0.5905083810885955, + "grad_norm": 0.1707528531551361, + "learning_rate": 8.882948000328548e-06, + "loss": 0.0029, + "step": 92070 + }, + { + "epoch": 0.5905725179823816, + "grad_norm": 0.21265354752540588, + "learning_rate": 8.882595359058122e-06, + "loss": 0.0044, + "step": 92080 + }, + { + "epoch": 0.5906366548761677, + "grad_norm": 0.11014064401388168, + "learning_rate": 8.882242669135999e-06, + "loss": 0.0038, + "step": 92090 + }, + { + "epoch": 0.5907007917699538, + "grad_norm": 0.31167688965797424, + "learning_rate": 8.8818899305666e-06, + "loss": 0.0025, + "step": 92100 + }, + { + "epoch": 0.5907649286637399, + "grad_norm": 0.2547362148761749, + "learning_rate": 8.881537143354349e-06, + "loss": 0.004, + "step": 92110 + }, + { + "epoch": 0.590829065557526, + "grad_norm": 0.09043922275304794, + "learning_rate": 8.881184307503662e-06, + "loss": 0.0029, + "step": 92120 + }, + { + "epoch": 0.590893202451312, + "grad_norm": 0.14289937913417816, + "learning_rate": 8.88083142301896e-06, + "loss": 0.0022, + "step": 92130 + }, + { + "epoch": 0.5909573393450982, + "grad_norm": 0.1801345944404602, + "learning_rate": 8.880478489904669e-06, + "loss": 0.0058, + "step": 92140 + }, + { + "epoch": 0.5910214762388842, + "grad_norm": 0.07819484919309616, + "learning_rate": 8.88012550816521e-06, + "loss": 0.0028, + "step": 92150 + }, + { + "epoch": 0.5910856131326704, + "grad_norm": 0.12236767262220383, + "learning_rate": 8.879772477805003e-06, + "loss": 0.0034, + "step": 92160 + }, + { + "epoch": 0.5911497500264564, + "grad_norm": 0.14268234372138977, + "learning_rate": 8.879419398828476e-06, + "loss": 0.0042, + "step": 92170 + }, + { + "epoch": 0.5912138869202426, + "grad_norm": 0.15880000591278076, + "learning_rate": 8.87906627124005e-06, + "loss": 0.0018, + "step": 92180 + }, + { + "epoch": 0.5912780238140286, + "grad_norm": 0.1848147213459015, + "learning_rate": 8.878713095044152e-06, + "loss": 0.0032, + "step": 92190 + }, + { + "epoch": 0.5913421607078148, + "grad_norm": 0.3286655843257904, + "learning_rate": 8.878359870245205e-06, + "loss": 0.0043, + "step": 92200 + }, + { + "epoch": 0.5914062976016009, + "grad_norm": 0.3613602817058563, + "learning_rate": 8.878006596847638e-06, + "loss": 0.0039, + "step": 92210 + }, + { + "epoch": 0.5914704344953869, + "grad_norm": 0.1943521499633789, + "learning_rate": 8.877653274855877e-06, + "loss": 0.0033, + "step": 92220 + }, + { + "epoch": 0.5915345713891731, + "grad_norm": 0.28001415729522705, + "learning_rate": 8.87729990427435e-06, + "loss": 0.0032, + "step": 92230 + }, + { + "epoch": 0.5915987082829591, + "grad_norm": 0.03330661356449127, + "learning_rate": 8.876946485107482e-06, + "loss": 0.0025, + "step": 92240 + }, + { + "epoch": 0.5916628451767453, + "grad_norm": 0.16898031532764435, + "learning_rate": 8.876593017359706e-06, + "loss": 0.0088, + "step": 92250 + }, + { + "epoch": 0.5917269820705313, + "grad_norm": 0.18300198018550873, + "learning_rate": 8.876239501035448e-06, + "loss": 0.0049, + "step": 92260 + }, + { + "epoch": 0.5917911189643175, + "grad_norm": 0.15687091648578644, + "learning_rate": 8.87588593613914e-06, + "loss": 0.0027, + "step": 92270 + }, + { + "epoch": 0.5918552558581035, + "grad_norm": 0.500644326210022, + "learning_rate": 8.875532322675208e-06, + "loss": 0.0064, + "step": 92280 + }, + { + "epoch": 0.5919193927518897, + "grad_norm": 0.06440155953168869, + "learning_rate": 8.87517866064809e-06, + "loss": 0.0052, + "step": 92290 + }, + { + "epoch": 0.5919835296456757, + "grad_norm": 0.05679481849074364, + "learning_rate": 8.87482495006221e-06, + "loss": 0.0037, + "step": 92300 + }, + { + "epoch": 0.5920476665394618, + "grad_norm": 0.13847221434116364, + "learning_rate": 8.874471190922007e-06, + "loss": 0.0031, + "step": 92310 + }, + { + "epoch": 0.5921118034332479, + "grad_norm": 0.07948891073465347, + "learning_rate": 8.87411738323191e-06, + "loss": 0.0023, + "step": 92320 + }, + { + "epoch": 0.592175940327034, + "grad_norm": 0.21524807810783386, + "learning_rate": 8.873763526996353e-06, + "loss": 0.0025, + "step": 92330 + }, + { + "epoch": 0.5922400772208202, + "grad_norm": 0.050780776888132095, + "learning_rate": 8.873409622219771e-06, + "loss": 0.0031, + "step": 92340 + }, + { + "epoch": 0.5923042141146062, + "grad_norm": 0.1852717250585556, + "learning_rate": 8.873055668906597e-06, + "loss": 0.0028, + "step": 92350 + }, + { + "epoch": 0.5923683510083924, + "grad_norm": 0.15096399188041687, + "learning_rate": 8.87270166706127e-06, + "loss": 0.0031, + "step": 92360 + }, + { + "epoch": 0.5924324879021784, + "grad_norm": 0.16869080066680908, + "learning_rate": 8.872347616688222e-06, + "loss": 0.0032, + "step": 92370 + }, + { + "epoch": 0.5924966247959645, + "grad_norm": 0.31817638874053955, + "learning_rate": 8.871993517791891e-06, + "loss": 0.0071, + "step": 92380 + }, + { + "epoch": 0.5925607616897506, + "grad_norm": 0.11699803173542023, + "learning_rate": 8.871639370376713e-06, + "loss": 0.0033, + "step": 92390 + }, + { + "epoch": 0.5926248985835367, + "grad_norm": 0.1312762051820755, + "learning_rate": 8.871285174447127e-06, + "loss": 0.0038, + "step": 92400 + }, + { + "epoch": 0.5926890354773228, + "grad_norm": 0.12681037187576294, + "learning_rate": 8.87093093000757e-06, + "loss": 0.0034, + "step": 92410 + }, + { + "epoch": 0.5927531723711089, + "grad_norm": 0.2736920416355133, + "learning_rate": 8.870576637062484e-06, + "loss": 0.0045, + "step": 92420 + }, + { + "epoch": 0.592817309264895, + "grad_norm": 0.15666793286800385, + "learning_rate": 8.870222295616307e-06, + "loss": 0.0052, + "step": 92430 + }, + { + "epoch": 0.5928814461586811, + "grad_norm": 0.06761337071657181, + "learning_rate": 8.869867905673478e-06, + "loss": 0.0031, + "step": 92440 + }, + { + "epoch": 0.5929455830524671, + "grad_norm": 0.09654685854911804, + "learning_rate": 8.869513467238437e-06, + "loss": 0.0037, + "step": 92450 + }, + { + "epoch": 0.5930097199462533, + "grad_norm": 0.14291098713874817, + "learning_rate": 8.869158980315626e-06, + "loss": 0.0023, + "step": 92460 + }, + { + "epoch": 0.5930738568400393, + "grad_norm": 0.14262396097183228, + "learning_rate": 8.868804444909488e-06, + "loss": 0.0029, + "step": 92470 + }, + { + "epoch": 0.5931379937338255, + "grad_norm": 0.36440786719322205, + "learning_rate": 8.868449861024468e-06, + "loss": 0.0028, + "step": 92480 + }, + { + "epoch": 0.5932021306276116, + "grad_norm": 0.10059228539466858, + "learning_rate": 8.868095228665006e-06, + "loss": 0.0025, + "step": 92490 + }, + { + "epoch": 0.5932662675213977, + "grad_norm": 0.25650525093078613, + "learning_rate": 8.867740547835544e-06, + "loss": 0.0039, + "step": 92500 + }, + { + "epoch": 0.5933304044151838, + "grad_norm": 0.08289292454719543, + "learning_rate": 8.86738581854053e-06, + "loss": 0.0026, + "step": 92510 + }, + { + "epoch": 0.5933945413089698, + "grad_norm": 0.13036350905895233, + "learning_rate": 8.86703104078441e-06, + "loss": 0.0031, + "step": 92520 + }, + { + "epoch": 0.593458678202756, + "grad_norm": 0.038229234516620636, + "learning_rate": 8.866676214571623e-06, + "loss": 0.0049, + "step": 92530 + }, + { + "epoch": 0.593522815096542, + "grad_norm": 0.2246652990579605, + "learning_rate": 8.866321339906623e-06, + "loss": 0.0028, + "step": 92540 + }, + { + "epoch": 0.5935869519903282, + "grad_norm": 0.2030942291021347, + "learning_rate": 8.86596641679385e-06, + "loss": 0.003, + "step": 92550 + }, + { + "epoch": 0.5936510888841142, + "grad_norm": 0.10550177097320557, + "learning_rate": 8.865611445237758e-06, + "loss": 0.0046, + "step": 92560 + }, + { + "epoch": 0.5937152257779004, + "grad_norm": 0.18679168820381165, + "learning_rate": 8.86525642524279e-06, + "loss": 0.003, + "step": 92570 + }, + { + "epoch": 0.5937793626716864, + "grad_norm": 0.11270534247159958, + "learning_rate": 8.864901356813398e-06, + "loss": 0.0055, + "step": 92580 + }, + { + "epoch": 0.5938434995654726, + "grad_norm": 0.02422017604112625, + "learning_rate": 8.864546239954028e-06, + "loss": 0.0026, + "step": 92590 + }, + { + "epoch": 0.5939076364592586, + "grad_norm": 0.16464656591415405, + "learning_rate": 8.864191074669133e-06, + "loss": 0.0025, + "step": 92600 + }, + { + "epoch": 0.5939717733530447, + "grad_norm": 0.13741368055343628, + "learning_rate": 8.863835860963162e-06, + "loss": 0.0079, + "step": 92610 + }, + { + "epoch": 0.5940359102468309, + "grad_norm": 0.02941173128783703, + "learning_rate": 8.863480598840565e-06, + "loss": 0.0016, + "step": 92620 + }, + { + "epoch": 0.5941000471406169, + "grad_norm": 0.048090964555740356, + "learning_rate": 8.863125288305797e-06, + "loss": 0.0033, + "step": 92630 + }, + { + "epoch": 0.5941641840344031, + "grad_norm": 0.07795095443725586, + "learning_rate": 8.862769929363307e-06, + "loss": 0.0029, + "step": 92640 + }, + { + "epoch": 0.5942283209281891, + "grad_norm": 0.13922512531280518, + "learning_rate": 8.862414522017549e-06, + "loss": 0.0019, + "step": 92650 + }, + { + "epoch": 0.5942924578219753, + "grad_norm": 0.23520700633525848, + "learning_rate": 8.862059066272978e-06, + "loss": 0.0042, + "step": 92660 + }, + { + "epoch": 0.5943565947157613, + "grad_norm": 0.22586287558078766, + "learning_rate": 8.861703562134046e-06, + "loss": 0.0031, + "step": 92670 + }, + { + "epoch": 0.5944207316095474, + "grad_norm": 0.28761547803878784, + "learning_rate": 8.861348009605207e-06, + "loss": 0.0025, + "step": 92680 + }, + { + "epoch": 0.5944848685033335, + "grad_norm": 0.05647381395101547, + "learning_rate": 8.860992408690919e-06, + "loss": 0.0033, + "step": 92690 + }, + { + "epoch": 0.5945490053971196, + "grad_norm": 0.22699567675590515, + "learning_rate": 8.860636759395637e-06, + "loss": 0.0044, + "step": 92700 + }, + { + "epoch": 0.5946131422909057, + "grad_norm": 0.03414135426282883, + "learning_rate": 8.860281061723816e-06, + "loss": 0.0037, + "step": 92710 + }, + { + "epoch": 0.5946772791846918, + "grad_norm": 0.08566999435424805, + "learning_rate": 8.859925315679916e-06, + "loss": 0.0038, + "step": 92720 + }, + { + "epoch": 0.5947414160784779, + "grad_norm": 0.0905214250087738, + "learning_rate": 8.859569521268391e-06, + "loss": 0.0017, + "step": 92730 + }, + { + "epoch": 0.594805552972264, + "grad_norm": 0.142642080783844, + "learning_rate": 8.859213678493703e-06, + "loss": 0.0037, + "step": 92740 + }, + { + "epoch": 0.59486968986605, + "grad_norm": 0.05979500338435173, + "learning_rate": 8.858857787360311e-06, + "loss": 0.002, + "step": 92750 + }, + { + "epoch": 0.5949338267598362, + "grad_norm": 0.10995723307132721, + "learning_rate": 8.858501847872671e-06, + "loss": 0.0032, + "step": 92760 + }, + { + "epoch": 0.5949979636536223, + "grad_norm": 0.18088792264461517, + "learning_rate": 8.858145860035246e-06, + "loss": 0.0035, + "step": 92770 + }, + { + "epoch": 0.5950621005474084, + "grad_norm": 0.16171729564666748, + "learning_rate": 8.857789823852495e-06, + "loss": 0.0044, + "step": 92780 + }, + { + "epoch": 0.5951262374411945, + "grad_norm": 0.20499688386917114, + "learning_rate": 8.85743373932888e-06, + "loss": 0.0031, + "step": 92790 + }, + { + "epoch": 0.5951903743349806, + "grad_norm": 0.3269047141075134, + "learning_rate": 8.857077606468864e-06, + "loss": 0.0042, + "step": 92800 + }, + { + "epoch": 0.5952545112287667, + "grad_norm": 0.10731284320354462, + "learning_rate": 8.856721425276912e-06, + "loss": 0.0025, + "step": 92810 + }, + { + "epoch": 0.5953186481225528, + "grad_norm": 0.13672778010368347, + "learning_rate": 8.85636519575748e-06, + "loss": 0.0024, + "step": 92820 + }, + { + "epoch": 0.5953827850163389, + "grad_norm": 0.07215884327888489, + "learning_rate": 8.856008917915037e-06, + "loss": 0.003, + "step": 92830 + }, + { + "epoch": 0.5954469219101249, + "grad_norm": 0.07725408673286438, + "learning_rate": 8.855652591754047e-06, + "loss": 0.0037, + "step": 92840 + }, + { + "epoch": 0.5955110588039111, + "grad_norm": 0.10322929918766022, + "learning_rate": 8.855296217278974e-06, + "loss": 0.0029, + "step": 92850 + }, + { + "epoch": 0.5955751956976971, + "grad_norm": 0.04164673015475273, + "learning_rate": 8.854939794494284e-06, + "loss": 0.0022, + "step": 92860 + }, + { + "epoch": 0.5956393325914833, + "grad_norm": 0.13567589223384857, + "learning_rate": 8.854583323404443e-06, + "loss": 0.003, + "step": 92870 + }, + { + "epoch": 0.5957034694852693, + "grad_norm": 0.07293649017810822, + "learning_rate": 8.854226804013921e-06, + "loss": 0.0017, + "step": 92880 + }, + { + "epoch": 0.5957676063790555, + "grad_norm": 0.11784832179546356, + "learning_rate": 8.85387023632718e-06, + "loss": 0.0052, + "step": 92890 + }, + { + "epoch": 0.5958317432728416, + "grad_norm": 0.3238033056259155, + "learning_rate": 8.85351362034869e-06, + "loss": 0.0038, + "step": 92900 + }, + { + "epoch": 0.5958958801666276, + "grad_norm": 0.038372065871953964, + "learning_rate": 8.853156956082921e-06, + "loss": 0.0065, + "step": 92910 + }, + { + "epoch": 0.5959600170604138, + "grad_norm": 0.09675122052431107, + "learning_rate": 8.852800243534343e-06, + "loss": 0.004, + "step": 92920 + }, + { + "epoch": 0.5960241539541998, + "grad_norm": 0.15376578271389008, + "learning_rate": 8.852443482707423e-06, + "loss": 0.0031, + "step": 92930 + }, + { + "epoch": 0.596088290847986, + "grad_norm": 0.1450980007648468, + "learning_rate": 8.852086673606634e-06, + "loss": 0.0034, + "step": 92940 + }, + { + "epoch": 0.596152427741772, + "grad_norm": 0.1521768420934677, + "learning_rate": 8.851729816236445e-06, + "loss": 0.0038, + "step": 92950 + }, + { + "epoch": 0.5962165646355582, + "grad_norm": 0.1504070907831192, + "learning_rate": 8.851372910601328e-06, + "loss": 0.0043, + "step": 92960 + }, + { + "epoch": 0.5962807015293442, + "grad_norm": 0.09424560517072678, + "learning_rate": 8.851015956705757e-06, + "loss": 0.0036, + "step": 92970 + }, + { + "epoch": 0.5963448384231304, + "grad_norm": 0.09183598309755325, + "learning_rate": 8.850658954554203e-06, + "loss": 0.0024, + "step": 92980 + }, + { + "epoch": 0.5964089753169164, + "grad_norm": 0.08142650872468948, + "learning_rate": 8.85030190415114e-06, + "loss": 0.0041, + "step": 92990 + }, + { + "epoch": 0.5964731122107025, + "grad_norm": 0.05937165766954422, + "learning_rate": 8.849944805501043e-06, + "loss": 0.0021, + "step": 93000 + }, + { + "epoch": 0.5965372491044886, + "grad_norm": 0.4259531497955322, + "learning_rate": 8.849587658608386e-06, + "loss": 0.003, + "step": 93010 + }, + { + "epoch": 0.5966013859982747, + "grad_norm": 0.11123054474592209, + "learning_rate": 8.849230463477645e-06, + "loss": 0.0033, + "step": 93020 + }, + { + "epoch": 0.5966655228920608, + "grad_norm": 0.21839596331119537, + "learning_rate": 8.848873220113294e-06, + "loss": 0.0018, + "step": 93030 + }, + { + "epoch": 0.5967296597858469, + "grad_norm": 0.053809747099876404, + "learning_rate": 8.848515928519812e-06, + "loss": 0.0019, + "step": 93040 + }, + { + "epoch": 0.5967937966796331, + "grad_norm": 0.11655854433774948, + "learning_rate": 8.848158588701674e-06, + "loss": 0.0037, + "step": 93050 + }, + { + "epoch": 0.5968579335734191, + "grad_norm": 0.22731737792491913, + "learning_rate": 8.84780120066336e-06, + "loss": 0.0033, + "step": 93060 + }, + { + "epoch": 0.5969220704672052, + "grad_norm": 0.07772057503461838, + "learning_rate": 8.847443764409344e-06, + "loss": 0.0052, + "step": 93070 + }, + { + "epoch": 0.5969862073609913, + "grad_norm": 0.11905039846897125, + "learning_rate": 8.847086279944112e-06, + "loss": 0.0026, + "step": 93080 + }, + { + "epoch": 0.5970503442547774, + "grad_norm": 0.23587697744369507, + "learning_rate": 8.846728747272137e-06, + "loss": 0.0024, + "step": 93090 + }, + { + "epoch": 0.5971144811485635, + "grad_norm": 0.02773495949804783, + "learning_rate": 8.846371166397903e-06, + "loss": 0.0044, + "step": 93100 + }, + { + "epoch": 0.5971786180423496, + "grad_norm": 0.4221494197845459, + "learning_rate": 8.846013537325887e-06, + "loss": 0.0048, + "step": 93110 + }, + { + "epoch": 0.5972427549361357, + "grad_norm": 0.11520206928253174, + "learning_rate": 8.845655860060574e-06, + "loss": 0.0031, + "step": 93120 + }, + { + "epoch": 0.5973068918299218, + "grad_norm": 0.22873573005199432, + "learning_rate": 8.845298134606445e-06, + "loss": 0.0016, + "step": 93130 + }, + { + "epoch": 0.5973710287237078, + "grad_norm": 0.3438142240047455, + "learning_rate": 8.844940360967981e-06, + "loss": 0.0033, + "step": 93140 + }, + { + "epoch": 0.597435165617494, + "grad_norm": 0.10318133234977722, + "learning_rate": 8.844582539149667e-06, + "loss": 0.0025, + "step": 93150 + }, + { + "epoch": 0.59749930251128, + "grad_norm": 0.15780027210712433, + "learning_rate": 8.844224669155986e-06, + "loss": 0.004, + "step": 93160 + }, + { + "epoch": 0.5975634394050662, + "grad_norm": 0.09695665538311005, + "learning_rate": 8.843866750991424e-06, + "loss": 0.0043, + "step": 93170 + }, + { + "epoch": 0.5976275762988523, + "grad_norm": 0.03867492824792862, + "learning_rate": 8.843508784660461e-06, + "loss": 0.0021, + "step": 93180 + }, + { + "epoch": 0.5976917131926384, + "grad_norm": 0.07050751894712448, + "learning_rate": 8.843150770167589e-06, + "loss": 0.0034, + "step": 93190 + }, + { + "epoch": 0.5977558500864245, + "grad_norm": 0.09821964800357819, + "learning_rate": 8.84279270751729e-06, + "loss": 0.0024, + "step": 93200 + }, + { + "epoch": 0.5978199869802105, + "grad_norm": 0.05456443503499031, + "learning_rate": 8.842434596714054e-06, + "loss": 0.0023, + "step": 93210 + }, + { + "epoch": 0.5978841238739967, + "grad_norm": 0.3301343619823456, + "learning_rate": 8.842076437762364e-06, + "loss": 0.0044, + "step": 93220 + }, + { + "epoch": 0.5979482607677827, + "grad_norm": 0.11671298742294312, + "learning_rate": 8.84171823066671e-06, + "loss": 0.0039, + "step": 93230 + }, + { + "epoch": 0.5980123976615689, + "grad_norm": 0.05689849331974983, + "learning_rate": 8.841359975431583e-06, + "loss": 0.0028, + "step": 93240 + }, + { + "epoch": 0.5980765345553549, + "grad_norm": 0.02990148402750492, + "learning_rate": 8.841001672061468e-06, + "loss": 0.0031, + "step": 93250 + }, + { + "epoch": 0.5981406714491411, + "grad_norm": 0.05530443415045738, + "learning_rate": 8.84064332056086e-06, + "loss": 0.0029, + "step": 93260 + }, + { + "epoch": 0.5982048083429271, + "grad_norm": 0.10282295942306519, + "learning_rate": 8.840284920934243e-06, + "loss": 0.0052, + "step": 93270 + }, + { + "epoch": 0.5982689452367133, + "grad_norm": 0.07693000882863998, + "learning_rate": 8.839926473186114e-06, + "loss": 0.0041, + "step": 93280 + }, + { + "epoch": 0.5983330821304993, + "grad_norm": 0.11836773157119751, + "learning_rate": 8.83956797732096e-06, + "loss": 0.0033, + "step": 93290 + }, + { + "epoch": 0.5983972190242854, + "grad_norm": 0.11034104973077774, + "learning_rate": 8.839209433343273e-06, + "loss": 0.0036, + "step": 93300 + }, + { + "epoch": 0.5984613559180715, + "grad_norm": 0.11941079050302505, + "learning_rate": 8.838850841257552e-06, + "loss": 0.0018, + "step": 93310 + }, + { + "epoch": 0.5985254928118576, + "grad_norm": 0.17596879601478577, + "learning_rate": 8.838492201068285e-06, + "loss": 0.0038, + "step": 93320 + }, + { + "epoch": 0.5985896297056438, + "grad_norm": 0.004393393639475107, + "learning_rate": 8.838133512779968e-06, + "loss": 0.0042, + "step": 93330 + }, + { + "epoch": 0.5986537665994298, + "grad_norm": 0.3173533082008362, + "learning_rate": 8.837774776397095e-06, + "loss": 0.0031, + "step": 93340 + }, + { + "epoch": 0.598717903493216, + "grad_norm": 0.11862670630216599, + "learning_rate": 8.83741599192416e-06, + "loss": 0.0034, + "step": 93350 + }, + { + "epoch": 0.598782040387002, + "grad_norm": 0.10155455023050308, + "learning_rate": 8.83705715936566e-06, + "loss": 0.0021, + "step": 93360 + }, + { + "epoch": 0.5988461772807882, + "grad_norm": 0.15258212387561798, + "learning_rate": 8.836698278726092e-06, + "loss": 0.0029, + "step": 93370 + }, + { + "epoch": 0.5989103141745742, + "grad_norm": 0.11390714347362518, + "learning_rate": 8.836339350009954e-06, + "loss": 0.0024, + "step": 93380 + }, + { + "epoch": 0.5989744510683603, + "grad_norm": 0.0799383893609047, + "learning_rate": 8.835980373221741e-06, + "loss": 0.002, + "step": 93390 + }, + { + "epoch": 0.5990385879621464, + "grad_norm": 0.10548446327447891, + "learning_rate": 8.835621348365952e-06, + "loss": 0.0022, + "step": 93400 + }, + { + "epoch": 0.5991027248559325, + "grad_norm": 0.26774832606315613, + "learning_rate": 8.835262275447087e-06, + "loss": 0.0049, + "step": 93410 + }, + { + "epoch": 0.5991668617497186, + "grad_norm": 0.18672409653663635, + "learning_rate": 8.834903154469643e-06, + "loss": 0.0019, + "step": 93420 + }, + { + "epoch": 0.5992309986435047, + "grad_norm": 0.16137036681175232, + "learning_rate": 8.834543985438125e-06, + "loss": 0.0043, + "step": 93430 + }, + { + "epoch": 0.5992951355372907, + "grad_norm": 0.08167710155248642, + "learning_rate": 8.834184768357028e-06, + "loss": 0.0033, + "step": 93440 + }, + { + "epoch": 0.5993592724310769, + "grad_norm": 0.14367537200450897, + "learning_rate": 8.833825503230854e-06, + "loss": 0.0043, + "step": 93450 + }, + { + "epoch": 0.5994234093248629, + "grad_norm": 0.0750400498509407, + "learning_rate": 8.83346619006411e-06, + "loss": 0.0022, + "step": 93460 + }, + { + "epoch": 0.5994875462186491, + "grad_norm": 0.6188009977340698, + "learning_rate": 8.833106828861294e-06, + "loss": 0.0034, + "step": 93470 + }, + { + "epoch": 0.5995516831124352, + "grad_norm": 0.3307935893535614, + "learning_rate": 8.832747419626908e-06, + "loss": 0.0049, + "step": 93480 + }, + { + "epoch": 0.5996158200062213, + "grad_norm": 0.10426060855388641, + "learning_rate": 8.832387962365458e-06, + "loss": 0.0031, + "step": 93490 + }, + { + "epoch": 0.5996799569000074, + "grad_norm": 0.298533171415329, + "learning_rate": 8.83202845708145e-06, + "loss": 0.0052, + "step": 93500 + }, + { + "epoch": 0.5997440937937935, + "grad_norm": 0.13313566148281097, + "learning_rate": 8.831668903779384e-06, + "loss": 0.0025, + "step": 93510 + }, + { + "epoch": 0.5998082306875796, + "grad_norm": 0.08680471777915955, + "learning_rate": 8.83130930246377e-06, + "loss": 0.0048, + "step": 93520 + }, + { + "epoch": 0.5998723675813656, + "grad_norm": 0.07953489571809769, + "learning_rate": 8.83094965313911e-06, + "loss": 0.0033, + "step": 93530 + }, + { + "epoch": 0.5999365044751518, + "grad_norm": 0.1352090984582901, + "learning_rate": 8.830589955809916e-06, + "loss": 0.0029, + "step": 93540 + }, + { + "epoch": 0.6000006413689378, + "grad_norm": 0.1552029699087143, + "learning_rate": 8.830230210480692e-06, + "loss": 0.0024, + "step": 93550 + }, + { + "epoch": 0.600064778262724, + "grad_norm": 0.2718900442123413, + "learning_rate": 8.829870417155944e-06, + "loss": 0.0027, + "step": 93560 + }, + { + "epoch": 0.60012891515651, + "grad_norm": 0.14161957800388336, + "learning_rate": 8.829510575840184e-06, + "loss": 0.0027, + "step": 93570 + }, + { + "epoch": 0.6001930520502962, + "grad_norm": 0.22129161655902863, + "learning_rate": 8.829150686537919e-06, + "loss": 0.0022, + "step": 93580 + }, + { + "epoch": 0.6002571889440822, + "grad_norm": 0.1630723923444748, + "learning_rate": 8.82879074925366e-06, + "loss": 0.0035, + "step": 93590 + }, + { + "epoch": 0.6003213258378683, + "grad_norm": 0.06927991658449173, + "learning_rate": 8.828430763991916e-06, + "loss": 0.0018, + "step": 93600 + }, + { + "epoch": 0.6003854627316545, + "grad_norm": 0.14366739988327026, + "learning_rate": 8.828070730757196e-06, + "loss": 0.0018, + "step": 93610 + }, + { + "epoch": 0.6004495996254405, + "grad_norm": 0.12288960069417953, + "learning_rate": 8.827710649554018e-06, + "loss": 0.0022, + "step": 93620 + }, + { + "epoch": 0.6005137365192267, + "grad_norm": 0.13222576677799225, + "learning_rate": 8.827350520386886e-06, + "loss": 0.0037, + "step": 93630 + }, + { + "epoch": 0.6005778734130127, + "grad_norm": 0.0924672782421112, + "learning_rate": 8.82699034326032e-06, + "loss": 0.0043, + "step": 93640 + }, + { + "epoch": 0.6006420103067989, + "grad_norm": 0.11825573444366455, + "learning_rate": 8.826630118178828e-06, + "loss": 0.003, + "step": 93650 + }, + { + "epoch": 0.6007061472005849, + "grad_norm": 0.11818478256464005, + "learning_rate": 8.826269845146926e-06, + "loss": 0.0018, + "step": 93660 + }, + { + "epoch": 0.600770284094371, + "grad_norm": 0.12195321172475815, + "learning_rate": 8.825909524169129e-06, + "loss": 0.003, + "step": 93670 + }, + { + "epoch": 0.6008344209881571, + "grad_norm": 0.081720270216465, + "learning_rate": 8.825549155249951e-06, + "loss": 0.0026, + "step": 93680 + }, + { + "epoch": 0.6008985578819432, + "grad_norm": 0.08222363889217377, + "learning_rate": 8.825188738393908e-06, + "loss": 0.0022, + "step": 93690 + }, + { + "epoch": 0.6009626947757293, + "grad_norm": 0.1412651687860489, + "learning_rate": 8.824828273605515e-06, + "loss": 0.0057, + "step": 93700 + }, + { + "epoch": 0.6010268316695154, + "grad_norm": 0.09899317473173141, + "learning_rate": 8.824467760889291e-06, + "loss": 0.0043, + "step": 93710 + }, + { + "epoch": 0.6010909685633015, + "grad_norm": 0.23038393259048462, + "learning_rate": 8.824107200249754e-06, + "loss": 0.002, + "step": 93720 + }, + { + "epoch": 0.6011551054570876, + "grad_norm": 0.26497548818588257, + "learning_rate": 8.82374659169142e-06, + "loss": 0.0046, + "step": 93730 + }, + { + "epoch": 0.6012192423508737, + "grad_norm": 0.36057302355766296, + "learning_rate": 8.823385935218806e-06, + "loss": 0.0032, + "step": 93740 + }, + { + "epoch": 0.6012833792446598, + "grad_norm": 0.16276083886623383, + "learning_rate": 8.823025230836438e-06, + "loss": 0.0034, + "step": 93750 + }, + { + "epoch": 0.601347516138446, + "grad_norm": 0.12810267508029938, + "learning_rate": 8.822664478548829e-06, + "loss": 0.0044, + "step": 93760 + }, + { + "epoch": 0.601411653032232, + "grad_norm": 0.08066945523023605, + "learning_rate": 8.822303678360502e-06, + "loss": 0.0029, + "step": 93770 + }, + { + "epoch": 0.6014757899260181, + "grad_norm": 0.3909737169742584, + "learning_rate": 8.821942830275978e-06, + "loss": 0.0021, + "step": 93780 + }, + { + "epoch": 0.6015399268198042, + "grad_norm": 0.33293431997299194, + "learning_rate": 8.82158193429978e-06, + "loss": 0.0043, + "step": 93790 + }, + { + "epoch": 0.6016040637135903, + "grad_norm": 0.16712966561317444, + "learning_rate": 8.821220990436427e-06, + "loss": 0.0035, + "step": 93800 + }, + { + "epoch": 0.6016682006073764, + "grad_norm": 0.20190496742725372, + "learning_rate": 8.820859998690448e-06, + "loss": 0.0027, + "step": 93810 + }, + { + "epoch": 0.6017323375011625, + "grad_norm": 0.25855904817581177, + "learning_rate": 8.820498959066359e-06, + "loss": 0.0118, + "step": 93820 + }, + { + "epoch": 0.6017964743949485, + "grad_norm": 0.06966379284858704, + "learning_rate": 8.820137871568688e-06, + "loss": 0.005, + "step": 93830 + }, + { + "epoch": 0.6018606112887347, + "grad_norm": 0.34347179532051086, + "learning_rate": 8.81977673620196e-06, + "loss": 0.0019, + "step": 93840 + }, + { + "epoch": 0.6019247481825207, + "grad_norm": 0.5182726979255676, + "learning_rate": 8.8194155529707e-06, + "loss": 0.0027, + "step": 93850 + }, + { + "epoch": 0.6019888850763069, + "grad_norm": 0.09111955761909485, + "learning_rate": 8.819054321879433e-06, + "loss": 0.0046, + "step": 93860 + }, + { + "epoch": 0.6020530219700929, + "grad_norm": 0.16851943731307983, + "learning_rate": 8.818693042932685e-06, + "loss": 0.0019, + "step": 93870 + }, + { + "epoch": 0.6021171588638791, + "grad_norm": 0.07118560373783112, + "learning_rate": 8.818331716134984e-06, + "loss": 0.0021, + "step": 93880 + }, + { + "epoch": 0.6021812957576652, + "grad_norm": 0.10934966057538986, + "learning_rate": 8.817970341490859e-06, + "loss": 0.0029, + "step": 93890 + }, + { + "epoch": 0.6022454326514513, + "grad_norm": 0.05146744102239609, + "learning_rate": 8.817608919004836e-06, + "loss": 0.0028, + "step": 93900 + }, + { + "epoch": 0.6023095695452374, + "grad_norm": 0.05124472454190254, + "learning_rate": 8.817247448681446e-06, + "loss": 0.0025, + "step": 93910 + }, + { + "epoch": 0.6023737064390234, + "grad_norm": 0.16353295743465424, + "learning_rate": 8.816885930525216e-06, + "loss": 0.0054, + "step": 93920 + }, + { + "epoch": 0.6024378433328096, + "grad_norm": 0.1566086709499359, + "learning_rate": 8.816524364540678e-06, + "loss": 0.0065, + "step": 93930 + }, + { + "epoch": 0.6025019802265956, + "grad_norm": 0.21201664209365845, + "learning_rate": 8.816162750732362e-06, + "loss": 0.0037, + "step": 93940 + }, + { + "epoch": 0.6025661171203818, + "grad_norm": 0.3268226683139801, + "learning_rate": 8.815801089104799e-06, + "loss": 0.0013, + "step": 93950 + }, + { + "epoch": 0.6026302540141678, + "grad_norm": 0.042904358357191086, + "learning_rate": 8.815439379662522e-06, + "loss": 0.0049, + "step": 93960 + }, + { + "epoch": 0.602694390907954, + "grad_norm": 0.08511478453874588, + "learning_rate": 8.815077622410062e-06, + "loss": 0.0024, + "step": 93970 + }, + { + "epoch": 0.60275852780174, + "grad_norm": 0.07031531631946564, + "learning_rate": 8.814715817351954e-06, + "loss": 0.005, + "step": 93980 + }, + { + "epoch": 0.6028226646955261, + "grad_norm": 0.22578291594982147, + "learning_rate": 8.814353964492729e-06, + "loss": 0.0028, + "step": 93990 + }, + { + "epoch": 0.6028868015893122, + "grad_norm": 0.07537861168384552, + "learning_rate": 8.813992063836923e-06, + "loss": 0.0037, + "step": 94000 + }, + { + "epoch": 0.6029509384830983, + "grad_norm": 0.3766474723815918, + "learning_rate": 8.813630115389071e-06, + "loss": 0.0022, + "step": 94010 + }, + { + "epoch": 0.6030150753768844, + "grad_norm": 0.09774509072303772, + "learning_rate": 8.813268119153709e-06, + "loss": 0.0027, + "step": 94020 + }, + { + "epoch": 0.6030792122706705, + "grad_norm": 0.1362939476966858, + "learning_rate": 8.81290607513537e-06, + "loss": 0.0029, + "step": 94030 + }, + { + "epoch": 0.6031433491644567, + "grad_norm": 0.14567211270332336, + "learning_rate": 8.812543983338595e-06, + "loss": 0.0033, + "step": 94040 + }, + { + "epoch": 0.6032074860582427, + "grad_norm": 0.3329886496067047, + "learning_rate": 8.812181843767918e-06, + "loss": 0.0042, + "step": 94050 + }, + { + "epoch": 0.6032716229520289, + "grad_norm": 0.1347803920507431, + "learning_rate": 8.811819656427877e-06, + "loss": 0.0019, + "step": 94060 + }, + { + "epoch": 0.6033357598458149, + "grad_norm": 0.058834757655858994, + "learning_rate": 8.811457421323013e-06, + "loss": 0.003, + "step": 94070 + }, + { + "epoch": 0.603399896739601, + "grad_norm": 0.1033516675233841, + "learning_rate": 8.811095138457863e-06, + "loss": 0.0032, + "step": 94080 + }, + { + "epoch": 0.6034640336333871, + "grad_norm": 0.1391102820634842, + "learning_rate": 8.810732807836968e-06, + "loss": 0.0043, + "step": 94090 + }, + { + "epoch": 0.6035281705271732, + "grad_norm": 0.14105257391929626, + "learning_rate": 8.810370429464867e-06, + "loss": 0.0034, + "step": 94100 + }, + { + "epoch": 0.6035923074209593, + "grad_norm": 0.1752769649028778, + "learning_rate": 8.8100080033461e-06, + "loss": 0.0022, + "step": 94110 + }, + { + "epoch": 0.6036564443147454, + "grad_norm": 0.07758922874927521, + "learning_rate": 8.80964552948521e-06, + "loss": 0.003, + "step": 94120 + }, + { + "epoch": 0.6037205812085314, + "grad_norm": 0.17690975964069366, + "learning_rate": 8.80928300788674e-06, + "loss": 0.0024, + "step": 94130 + }, + { + "epoch": 0.6037847181023176, + "grad_norm": 0.444504976272583, + "learning_rate": 8.808920438555231e-06, + "loss": 0.0025, + "step": 94140 + }, + { + "epoch": 0.6038488549961036, + "grad_norm": 0.20921805500984192, + "learning_rate": 8.808557821495227e-06, + "loss": 0.0041, + "step": 94150 + }, + { + "epoch": 0.6039129918898898, + "grad_norm": 0.03435481712222099, + "learning_rate": 8.808195156711273e-06, + "loss": 0.002, + "step": 94160 + }, + { + "epoch": 0.6039771287836759, + "grad_norm": 0.24317345023155212, + "learning_rate": 8.80783244420791e-06, + "loss": 0.0031, + "step": 94170 + }, + { + "epoch": 0.604041265677462, + "grad_norm": 0.07621653378009796, + "learning_rate": 8.807469683989685e-06, + "loss": 0.0053, + "step": 94180 + }, + { + "epoch": 0.6041054025712481, + "grad_norm": 0.04928126186132431, + "learning_rate": 8.807106876061143e-06, + "loss": 0.0032, + "step": 94190 + }, + { + "epoch": 0.6041695394650342, + "grad_norm": 0.0837152749300003, + "learning_rate": 8.806744020426832e-06, + "loss": 0.0026, + "step": 94200 + }, + { + "epoch": 0.6042336763588203, + "grad_norm": 0.04882385581731796, + "learning_rate": 8.806381117091298e-06, + "loss": 0.0034, + "step": 94210 + }, + { + "epoch": 0.6042978132526063, + "grad_norm": 0.1601816862821579, + "learning_rate": 8.806018166059087e-06, + "loss": 0.0052, + "step": 94220 + }, + { + "epoch": 0.6043619501463925, + "grad_norm": 0.655596911907196, + "learning_rate": 8.80565516733475e-06, + "loss": 0.0037, + "step": 94230 + }, + { + "epoch": 0.6044260870401785, + "grad_norm": 0.25453537702560425, + "learning_rate": 8.805292120922832e-06, + "loss": 0.0034, + "step": 94240 + }, + { + "epoch": 0.6044902239339647, + "grad_norm": 0.19453126192092896, + "learning_rate": 8.804929026827887e-06, + "loss": 0.0021, + "step": 94250 + }, + { + "epoch": 0.6045543608277507, + "grad_norm": 0.1085091158747673, + "learning_rate": 8.804565885054458e-06, + "loss": 0.0021, + "step": 94260 + }, + { + "epoch": 0.6046184977215369, + "grad_norm": 0.22164227068424225, + "learning_rate": 8.804202695607102e-06, + "loss": 0.0035, + "step": 94270 + }, + { + "epoch": 0.6046826346153229, + "grad_norm": 0.13246764242649078, + "learning_rate": 8.803839458490368e-06, + "loss": 0.005, + "step": 94280 + }, + { + "epoch": 0.604746771509109, + "grad_norm": 0.23474083840847015, + "learning_rate": 8.803476173708806e-06, + "loss": 0.0026, + "step": 94290 + }, + { + "epoch": 0.6048109084028951, + "grad_norm": 0.01400019135326147, + "learning_rate": 8.80311284126697e-06, + "loss": 0.0025, + "step": 94300 + }, + { + "epoch": 0.6048750452966812, + "grad_norm": 0.11559020727872849, + "learning_rate": 8.802749461169411e-06, + "loss": 0.0021, + "step": 94310 + }, + { + "epoch": 0.6049391821904674, + "grad_norm": 0.08287182450294495, + "learning_rate": 8.802386033420682e-06, + "loss": 0.0028, + "step": 94320 + }, + { + "epoch": 0.6050033190842534, + "grad_norm": 0.10514973849058151, + "learning_rate": 8.802022558025341e-06, + "loss": 0.0022, + "step": 94330 + }, + { + "epoch": 0.6050674559780396, + "grad_norm": 0.3619040846824646, + "learning_rate": 8.80165903498794e-06, + "loss": 0.0036, + "step": 94340 + }, + { + "epoch": 0.6051315928718256, + "grad_norm": 0.09895238280296326, + "learning_rate": 8.801295464313032e-06, + "loss": 0.0032, + "step": 94350 + }, + { + "epoch": 0.6051957297656118, + "grad_norm": 0.035954151302576065, + "learning_rate": 8.800931846005178e-06, + "loss": 0.0038, + "step": 94360 + }, + { + "epoch": 0.6052598666593978, + "grad_norm": 0.08849111944437027, + "learning_rate": 8.80056818006893e-06, + "loss": 0.0021, + "step": 94370 + }, + { + "epoch": 0.6053240035531839, + "grad_norm": 0.05876295641064644, + "learning_rate": 8.800204466508846e-06, + "loss": 0.0021, + "step": 94380 + }, + { + "epoch": 0.60538814044697, + "grad_norm": 0.07266619056463242, + "learning_rate": 8.799840705329486e-06, + "loss": 0.0042, + "step": 94390 + }, + { + "epoch": 0.6054522773407561, + "grad_norm": 0.1223178282380104, + "learning_rate": 8.799476896535403e-06, + "loss": 0.0021, + "step": 94400 + }, + { + "epoch": 0.6055164142345422, + "grad_norm": 0.10297498852014542, + "learning_rate": 8.799113040131161e-06, + "loss": 0.004, + "step": 94410 + }, + { + "epoch": 0.6055805511283283, + "grad_norm": 0.1572066694498062, + "learning_rate": 8.798749136121318e-06, + "loss": 0.0046, + "step": 94420 + }, + { + "epoch": 0.6056446880221144, + "grad_norm": 0.10034254193305969, + "learning_rate": 8.798385184510435e-06, + "loss": 0.0038, + "step": 94430 + }, + { + "epoch": 0.6057088249159005, + "grad_norm": 0.3818894624710083, + "learning_rate": 8.798021185303067e-06, + "loss": 0.0043, + "step": 94440 + }, + { + "epoch": 0.6057729618096866, + "grad_norm": 0.12883812189102173, + "learning_rate": 8.797657138503781e-06, + "loss": 0.0015, + "step": 94450 + }, + { + "epoch": 0.6058370987034727, + "grad_norm": 0.18104223906993866, + "learning_rate": 8.797293044117137e-06, + "loss": 0.0026, + "step": 94460 + }, + { + "epoch": 0.6059012355972588, + "grad_norm": 0.05865728482604027, + "learning_rate": 8.796928902147698e-06, + "loss": 0.0022, + "step": 94470 + }, + { + "epoch": 0.6059653724910449, + "grad_norm": 0.146976038813591, + "learning_rate": 8.796564712600024e-06, + "loss": 0.0052, + "step": 94480 + }, + { + "epoch": 0.606029509384831, + "grad_norm": 0.1646391898393631, + "learning_rate": 8.796200475478683e-06, + "loss": 0.0033, + "step": 94490 + }, + { + "epoch": 0.6060936462786171, + "grad_norm": 0.1387776881456375, + "learning_rate": 8.795836190788237e-06, + "loss": 0.0026, + "step": 94500 + }, + { + "epoch": 0.6061577831724032, + "grad_norm": 0.18178357183933258, + "learning_rate": 8.795471858533249e-06, + "loss": 0.0032, + "step": 94510 + }, + { + "epoch": 0.6062219200661892, + "grad_norm": 0.27202317118644714, + "learning_rate": 8.795107478718287e-06, + "loss": 0.0034, + "step": 94520 + }, + { + "epoch": 0.6062860569599754, + "grad_norm": 0.19006876647472382, + "learning_rate": 8.794743051347916e-06, + "loss": 0.0031, + "step": 94530 + }, + { + "epoch": 0.6063501938537614, + "grad_norm": 0.07087966054677963, + "learning_rate": 8.794378576426702e-06, + "loss": 0.003, + "step": 94540 + }, + { + "epoch": 0.6064143307475476, + "grad_norm": 0.008801287040114403, + "learning_rate": 8.794014053959211e-06, + "loss": 0.0024, + "step": 94550 + }, + { + "epoch": 0.6064784676413336, + "grad_norm": 0.11481478065252304, + "learning_rate": 8.793649483950015e-06, + "loss": 0.0055, + "step": 94560 + }, + { + "epoch": 0.6065426045351198, + "grad_norm": 0.1139615997672081, + "learning_rate": 8.79328486640368e-06, + "loss": 0.0018, + "step": 94570 + }, + { + "epoch": 0.6066067414289058, + "grad_norm": 0.09423598647117615, + "learning_rate": 8.792920201324773e-06, + "loss": 0.0047, + "step": 94580 + }, + { + "epoch": 0.606670878322692, + "grad_norm": 0.12620431184768677, + "learning_rate": 8.792555488717866e-06, + "loss": 0.0114, + "step": 94590 + }, + { + "epoch": 0.6067350152164781, + "grad_norm": 0.12396939843893051, + "learning_rate": 8.792190728587529e-06, + "loss": 0.0026, + "step": 94600 + }, + { + "epoch": 0.6067991521102641, + "grad_norm": 0.4281218349933624, + "learning_rate": 8.79182592093833e-06, + "loss": 0.0061, + "step": 94610 + }, + { + "epoch": 0.6068632890040503, + "grad_norm": 0.07776124775409698, + "learning_rate": 8.791461065774843e-06, + "loss": 0.0019, + "step": 94620 + }, + { + "epoch": 0.6069274258978363, + "grad_norm": 0.07456205040216446, + "learning_rate": 8.791096163101639e-06, + "loss": 0.004, + "step": 94630 + }, + { + "epoch": 0.6069915627916225, + "grad_norm": 0.12895146012306213, + "learning_rate": 8.790731212923292e-06, + "loss": 0.0021, + "step": 94640 + }, + { + "epoch": 0.6070556996854085, + "grad_norm": 0.13422219455242157, + "learning_rate": 8.790366215244372e-06, + "loss": 0.0028, + "step": 94650 + }, + { + "epoch": 0.6071198365791947, + "grad_norm": 0.15804046392440796, + "learning_rate": 8.790001170069454e-06, + "loss": 0.0033, + "step": 94660 + }, + { + "epoch": 0.6071839734729807, + "grad_norm": 0.04166124016046524, + "learning_rate": 8.789636077403114e-06, + "loss": 0.0042, + "step": 94670 + }, + { + "epoch": 0.6072481103667668, + "grad_norm": 0.06918887048959732, + "learning_rate": 8.789270937249925e-06, + "loss": 0.0025, + "step": 94680 + }, + { + "epoch": 0.6073122472605529, + "grad_norm": 0.0580136813223362, + "learning_rate": 8.788905749614463e-06, + "loss": 0.0026, + "step": 94690 + }, + { + "epoch": 0.607376384154339, + "grad_norm": 0.1450560837984085, + "learning_rate": 8.788540514501305e-06, + "loss": 0.0037, + "step": 94700 + }, + { + "epoch": 0.6074405210481251, + "grad_norm": 0.1259308159351349, + "learning_rate": 8.788175231915026e-06, + "loss": 0.0035, + "step": 94710 + }, + { + "epoch": 0.6075046579419112, + "grad_norm": 0.10257034003734589, + "learning_rate": 8.787809901860203e-06, + "loss": 0.0022, + "step": 94720 + }, + { + "epoch": 0.6075687948356974, + "grad_norm": 0.3289741277694702, + "learning_rate": 8.787444524341414e-06, + "loss": 0.0034, + "step": 94730 + }, + { + "epoch": 0.6076329317294834, + "grad_norm": 0.13260440528392792, + "learning_rate": 8.787079099363241e-06, + "loss": 0.0027, + "step": 94740 + }, + { + "epoch": 0.6076970686232696, + "grad_norm": 0.176558718085289, + "learning_rate": 8.78671362693026e-06, + "loss": 0.0046, + "step": 94750 + }, + { + "epoch": 0.6077612055170556, + "grad_norm": 0.023784136399626732, + "learning_rate": 8.78634810704705e-06, + "loss": 0.0038, + "step": 94760 + }, + { + "epoch": 0.6078253424108417, + "grad_norm": 0.09431969374418259, + "learning_rate": 8.785982539718194e-06, + "loss": 0.0045, + "step": 94770 + }, + { + "epoch": 0.6078894793046278, + "grad_norm": 0.10205917805433273, + "learning_rate": 8.785616924948269e-06, + "loss": 0.0038, + "step": 94780 + }, + { + "epoch": 0.6079536161984139, + "grad_norm": 0.2259370982646942, + "learning_rate": 8.785251262741858e-06, + "loss": 0.0039, + "step": 94790 + }, + { + "epoch": 0.6080177530922, + "grad_norm": 0.021746670827269554, + "learning_rate": 8.784885553103543e-06, + "loss": 0.0021, + "step": 94800 + }, + { + "epoch": 0.6080818899859861, + "grad_norm": 0.06631411612033844, + "learning_rate": 8.784519796037909e-06, + "loss": 0.0044, + "step": 94810 + }, + { + "epoch": 0.6081460268797722, + "grad_norm": 1.4281866550445557, + "learning_rate": 8.784153991549537e-06, + "loss": 0.0032, + "step": 94820 + }, + { + "epoch": 0.6082101637735583, + "grad_norm": 0.13056783378124237, + "learning_rate": 8.78378813964301e-06, + "loss": 0.0053, + "step": 94830 + }, + { + "epoch": 0.6082743006673443, + "grad_norm": 0.13347940146923065, + "learning_rate": 8.783422240322913e-06, + "loss": 0.0028, + "step": 94840 + }, + { + "epoch": 0.6083384375611305, + "grad_norm": 0.16992448270320892, + "learning_rate": 8.783056293593832e-06, + "loss": 0.0027, + "step": 94850 + }, + { + "epoch": 0.6084025744549165, + "grad_norm": 0.1951434165239334, + "learning_rate": 8.782690299460353e-06, + "loss": 0.0028, + "step": 94860 + }, + { + "epoch": 0.6084667113487027, + "grad_norm": 0.05179301276803017, + "learning_rate": 8.78232425792706e-06, + "loss": 0.0032, + "step": 94870 + }, + { + "epoch": 0.6085308482424888, + "grad_norm": 0.10104519128799438, + "learning_rate": 8.781958168998542e-06, + "loss": 0.0032, + "step": 94880 + }, + { + "epoch": 0.6085949851362749, + "grad_norm": 0.23673437535762787, + "learning_rate": 8.781592032679382e-06, + "loss": 0.0046, + "step": 94890 + }, + { + "epoch": 0.608659122030061, + "grad_norm": 0.3082229197025299, + "learning_rate": 8.781225848974174e-06, + "loss": 0.0066, + "step": 94900 + }, + { + "epoch": 0.608723258923847, + "grad_norm": 0.2882019281387329, + "learning_rate": 8.780859617887503e-06, + "loss": 0.0031, + "step": 94910 + }, + { + "epoch": 0.6087873958176332, + "grad_norm": 0.09817387163639069, + "learning_rate": 8.780493339423958e-06, + "loss": 0.0036, + "step": 94920 + }, + { + "epoch": 0.6088515327114192, + "grad_norm": 0.12644025683403015, + "learning_rate": 8.780127013588131e-06, + "loss": 0.0018, + "step": 94930 + }, + { + "epoch": 0.6089156696052054, + "grad_norm": 0.04789069667458534, + "learning_rate": 8.779760640384612e-06, + "loss": 0.003, + "step": 94940 + }, + { + "epoch": 0.6089798064989914, + "grad_norm": 0.06322155147790909, + "learning_rate": 8.779394219817989e-06, + "loss": 0.0022, + "step": 94950 + }, + { + "epoch": 0.6090439433927776, + "grad_norm": 0.15940053761005402, + "learning_rate": 8.779027751892855e-06, + "loss": 0.0024, + "step": 94960 + }, + { + "epoch": 0.6091080802865636, + "grad_norm": 0.18789884448051453, + "learning_rate": 8.778661236613803e-06, + "loss": 0.0027, + "step": 94970 + }, + { + "epoch": 0.6091722171803498, + "grad_norm": 0.06264634430408478, + "learning_rate": 8.778294673985426e-06, + "loss": 0.0043, + "step": 94980 + }, + { + "epoch": 0.6092363540741358, + "grad_norm": 0.05772439390420914, + "learning_rate": 8.777928064012315e-06, + "loss": 0.0023, + "step": 94990 + }, + { + "epoch": 0.6093004909679219, + "grad_norm": 0.17368662357330322, + "learning_rate": 8.777561406699065e-06, + "loss": 0.0031, + "step": 95000 + }, + { + "epoch": 0.609364627861708, + "grad_norm": 0.062287699431180954, + "learning_rate": 8.777194702050273e-06, + "loss": 0.0032, + "step": 95010 + }, + { + "epoch": 0.6094287647554941, + "grad_norm": 0.04523130878806114, + "learning_rate": 8.77682795007053e-06, + "loss": 0.0027, + "step": 95020 + }, + { + "epoch": 0.6094929016492803, + "grad_norm": 0.3858431875705719, + "learning_rate": 8.776461150764434e-06, + "loss": 0.0038, + "step": 95030 + }, + { + "epoch": 0.6095570385430663, + "grad_norm": 0.11962731927633286, + "learning_rate": 8.776094304136581e-06, + "loss": 0.0033, + "step": 95040 + }, + { + "epoch": 0.6096211754368525, + "grad_norm": 0.3771175444126129, + "learning_rate": 8.775727410191566e-06, + "loss": 0.0028, + "step": 95050 + }, + { + "epoch": 0.6096853123306385, + "grad_norm": 0.3290991485118866, + "learning_rate": 8.77536046893399e-06, + "loss": 0.0044, + "step": 95060 + }, + { + "epoch": 0.6097494492244246, + "grad_norm": 0.05832473561167717, + "learning_rate": 8.77499348036845e-06, + "loss": 0.0029, + "step": 95070 + }, + { + "epoch": 0.6098135861182107, + "grad_norm": 0.11864414066076279, + "learning_rate": 8.774626444499542e-06, + "loss": 0.0044, + "step": 95080 + }, + { + "epoch": 0.6098777230119968, + "grad_norm": 0.17416812479496002, + "learning_rate": 8.774259361331868e-06, + "loss": 0.0031, + "step": 95090 + }, + { + "epoch": 0.6099418599057829, + "grad_norm": 0.06899726390838623, + "learning_rate": 8.773892230870025e-06, + "loss": 0.0026, + "step": 95100 + }, + { + "epoch": 0.610005996799569, + "grad_norm": 0.2982441782951355, + "learning_rate": 8.773525053118617e-06, + "loss": 0.004, + "step": 95110 + }, + { + "epoch": 0.610070133693355, + "grad_norm": 0.07267230749130249, + "learning_rate": 8.773157828082242e-06, + "loss": 0.0061, + "step": 95120 + }, + { + "epoch": 0.6101342705871412, + "grad_norm": 0.20534218847751617, + "learning_rate": 8.772790555765504e-06, + "loss": 0.0026, + "step": 95130 + }, + { + "epoch": 0.6101984074809272, + "grad_norm": 0.0956711694598198, + "learning_rate": 8.772423236173001e-06, + "loss": 0.0028, + "step": 95140 + }, + { + "epoch": 0.6102625443747134, + "grad_norm": 0.3632100224494934, + "learning_rate": 8.772055869309343e-06, + "loss": 0.003, + "step": 95150 + }, + { + "epoch": 0.6103266812684995, + "grad_norm": 0.3185942769050598, + "learning_rate": 8.771688455179128e-06, + "loss": 0.0043, + "step": 95160 + }, + { + "epoch": 0.6103908181622856, + "grad_norm": 0.21670421957969666, + "learning_rate": 8.771320993786958e-06, + "loss": 0.0031, + "step": 95170 + }, + { + "epoch": 0.6104549550560717, + "grad_norm": 0.2991900146007538, + "learning_rate": 8.770953485137444e-06, + "loss": 0.0034, + "step": 95180 + }, + { + "epoch": 0.6105190919498578, + "grad_norm": 0.12183010578155518, + "learning_rate": 8.770585929235187e-06, + "loss": 0.0031, + "step": 95190 + }, + { + "epoch": 0.6105832288436439, + "grad_norm": 0.23525065183639526, + "learning_rate": 8.770218326084793e-06, + "loss": 0.003, + "step": 95200 + }, + { + "epoch": 0.61064736573743, + "grad_norm": 0.23921328783035278, + "learning_rate": 8.76985067569087e-06, + "loss": 0.005, + "step": 95210 + }, + { + "epoch": 0.6107115026312161, + "grad_norm": 0.22330203652381897, + "learning_rate": 8.769482978058021e-06, + "loss": 0.0025, + "step": 95220 + }, + { + "epoch": 0.6107756395250021, + "grad_norm": 0.27113255858421326, + "learning_rate": 8.769115233190859e-06, + "loss": 0.0034, + "step": 95230 + }, + { + "epoch": 0.6108397764187883, + "grad_norm": 0.07810204476118088, + "learning_rate": 8.768747441093988e-06, + "loss": 0.0033, + "step": 95240 + }, + { + "epoch": 0.6109039133125743, + "grad_norm": 0.15932008624076843, + "learning_rate": 8.768379601772018e-06, + "loss": 0.0042, + "step": 95250 + }, + { + "epoch": 0.6109680502063605, + "grad_norm": 0.07703088223934174, + "learning_rate": 8.768011715229559e-06, + "loss": 0.0047, + "step": 95260 + }, + { + "epoch": 0.6110321871001465, + "grad_norm": 0.4311964511871338, + "learning_rate": 8.76764378147122e-06, + "loss": 0.0048, + "step": 95270 + }, + { + "epoch": 0.6110963239939327, + "grad_norm": 0.07001207023859024, + "learning_rate": 8.76727580050161e-06, + "loss": 0.0025, + "step": 95280 + }, + { + "epoch": 0.6111604608877187, + "grad_norm": 0.12717720866203308, + "learning_rate": 8.766907772325344e-06, + "loss": 0.0023, + "step": 95290 + }, + { + "epoch": 0.6112245977815048, + "grad_norm": 0.10921081900596619, + "learning_rate": 8.766539696947032e-06, + "loss": 0.003, + "step": 95300 + }, + { + "epoch": 0.611288734675291, + "grad_norm": 0.06188970059156418, + "learning_rate": 8.766171574371285e-06, + "loss": 0.0035, + "step": 95310 + }, + { + "epoch": 0.611352871569077, + "grad_norm": 0.08212490379810333, + "learning_rate": 8.765803404602716e-06, + "loss": 0.0049, + "step": 95320 + }, + { + "epoch": 0.6114170084628632, + "grad_norm": 0.11185979098081589, + "learning_rate": 8.76543518764594e-06, + "loss": 0.0028, + "step": 95330 + }, + { + "epoch": 0.6114811453566492, + "grad_norm": 0.2138770967721939, + "learning_rate": 8.765066923505569e-06, + "loss": 0.0025, + "step": 95340 + }, + { + "epoch": 0.6115452822504354, + "grad_norm": 0.12622995674610138, + "learning_rate": 8.764698612186217e-06, + "loss": 0.0025, + "step": 95350 + }, + { + "epoch": 0.6116094191442214, + "grad_norm": 0.11631227284669876, + "learning_rate": 8.764330253692504e-06, + "loss": 0.0063, + "step": 95360 + }, + { + "epoch": 0.6116735560380075, + "grad_norm": 0.10350869596004486, + "learning_rate": 8.76396184802904e-06, + "loss": 0.0026, + "step": 95370 + }, + { + "epoch": 0.6117376929317936, + "grad_norm": 0.18187642097473145, + "learning_rate": 8.763593395200446e-06, + "loss": 0.0034, + "step": 95380 + }, + { + "epoch": 0.6118018298255797, + "grad_norm": 0.17044878005981445, + "learning_rate": 8.763224895211336e-06, + "loss": 0.0036, + "step": 95390 + }, + { + "epoch": 0.6118659667193658, + "grad_norm": 0.08452159911394119, + "learning_rate": 8.76285634806633e-06, + "loss": 0.0037, + "step": 95400 + }, + { + "epoch": 0.6119301036131519, + "grad_norm": 0.12100055813789368, + "learning_rate": 8.762487753770041e-06, + "loss": 0.0047, + "step": 95410 + }, + { + "epoch": 0.611994240506938, + "grad_norm": 0.12239952385425568, + "learning_rate": 8.762119112327095e-06, + "loss": 0.0045, + "step": 95420 + }, + { + "epoch": 0.6120583774007241, + "grad_norm": 0.9005413055419922, + "learning_rate": 8.761750423742106e-06, + "loss": 0.0035, + "step": 95430 + }, + { + "epoch": 0.6121225142945103, + "grad_norm": 0.48149168491363525, + "learning_rate": 8.761381688019696e-06, + "loss": 0.0037, + "step": 95440 + }, + { + "epoch": 0.6121866511882963, + "grad_norm": 0.435469388961792, + "learning_rate": 8.761012905164487e-06, + "loss": 0.0035, + "step": 95450 + }, + { + "epoch": 0.6122507880820824, + "grad_norm": 0.24214886128902435, + "learning_rate": 8.760644075181097e-06, + "loss": 0.0033, + "step": 95460 + }, + { + "epoch": 0.6123149249758685, + "grad_norm": 0.09286938607692719, + "learning_rate": 8.760275198074148e-06, + "loss": 0.0038, + "step": 95470 + }, + { + "epoch": 0.6123790618696546, + "grad_norm": 0.06739024817943573, + "learning_rate": 8.759906273848265e-06, + "loss": 0.0038, + "step": 95480 + }, + { + "epoch": 0.6124431987634407, + "grad_norm": 0.24788588285446167, + "learning_rate": 8.759537302508067e-06, + "loss": 0.0044, + "step": 95490 + }, + { + "epoch": 0.6125073356572268, + "grad_norm": 0.09615003317594528, + "learning_rate": 8.759168284058182e-06, + "loss": 0.0034, + "step": 95500 + }, + { + "epoch": 0.6125714725510129, + "grad_norm": 0.22313320636749268, + "learning_rate": 8.758799218503232e-06, + "loss": 0.0032, + "step": 95510 + }, + { + "epoch": 0.612635609444799, + "grad_norm": 0.20353081822395325, + "learning_rate": 8.758430105847839e-06, + "loss": 0.0043, + "step": 95520 + }, + { + "epoch": 0.612699746338585, + "grad_norm": 0.38399964570999146, + "learning_rate": 8.75806094609663e-06, + "loss": 0.0085, + "step": 95530 + }, + { + "epoch": 0.6127638832323712, + "grad_norm": 0.1326301544904709, + "learning_rate": 8.757691739254235e-06, + "loss": 0.0092, + "step": 95540 + }, + { + "epoch": 0.6128280201261572, + "grad_norm": 0.112067811191082, + "learning_rate": 8.757322485325276e-06, + "loss": 0.0019, + "step": 95550 + }, + { + "epoch": 0.6128921570199434, + "grad_norm": 0.4138074517250061, + "learning_rate": 8.75695318431438e-06, + "loss": 0.0051, + "step": 95560 + }, + { + "epoch": 0.6129562939137294, + "grad_norm": 0.24018257856369019, + "learning_rate": 8.756583836226176e-06, + "loss": 0.0028, + "step": 95570 + }, + { + "epoch": 0.6130204308075156, + "grad_norm": 0.20457017421722412, + "learning_rate": 8.756214441065291e-06, + "loss": 0.0035, + "step": 95580 + }, + { + "epoch": 0.6130845677013017, + "grad_norm": 0.11519509553909302, + "learning_rate": 8.755844998836356e-06, + "loss": 0.0043, + "step": 95590 + }, + { + "epoch": 0.6131487045950877, + "grad_norm": 0.21396492421627045, + "learning_rate": 8.755475509543999e-06, + "loss": 0.0053, + "step": 95600 + }, + { + "epoch": 0.6132128414888739, + "grad_norm": 0.09090571105480194, + "learning_rate": 8.755105973192847e-06, + "loss": 0.0063, + "step": 95610 + }, + { + "epoch": 0.6132769783826599, + "grad_norm": 0.21443317830562592, + "learning_rate": 8.754736389787536e-06, + "loss": 0.0032, + "step": 95620 + }, + { + "epoch": 0.6133411152764461, + "grad_norm": 0.08013524115085602, + "learning_rate": 8.754366759332695e-06, + "loss": 0.0028, + "step": 95630 + }, + { + "epoch": 0.6134052521702321, + "grad_norm": 0.31743213534355164, + "learning_rate": 8.753997081832954e-06, + "loss": 0.0051, + "step": 95640 + }, + { + "epoch": 0.6134693890640183, + "grad_norm": 0.14229175448417664, + "learning_rate": 8.753627357292947e-06, + "loss": 0.0036, + "step": 95650 + }, + { + "epoch": 0.6135335259578043, + "grad_norm": 0.07585162669420242, + "learning_rate": 8.753257585717305e-06, + "loss": 0.0023, + "step": 95660 + }, + { + "epoch": 0.6135976628515905, + "grad_norm": 0.17819391191005707, + "learning_rate": 8.752887767110666e-06, + "loss": 0.0051, + "step": 95670 + }, + { + "epoch": 0.6136617997453765, + "grad_norm": 0.0805431678891182, + "learning_rate": 8.752517901477658e-06, + "loss": 0.0014, + "step": 95680 + }, + { + "epoch": 0.6137259366391626, + "grad_norm": 0.18486371636390686, + "learning_rate": 8.752147988822921e-06, + "loss": 0.0023, + "step": 95690 + }, + { + "epoch": 0.6137900735329487, + "grad_norm": 0.2803860306739807, + "learning_rate": 8.751778029151087e-06, + "loss": 0.0039, + "step": 95700 + }, + { + "epoch": 0.6138542104267348, + "grad_norm": 0.1302492767572403, + "learning_rate": 8.751408022466794e-06, + "loss": 0.0033, + "step": 95710 + }, + { + "epoch": 0.613918347320521, + "grad_norm": 0.11332884430885315, + "learning_rate": 8.751037968774676e-06, + "loss": 0.002, + "step": 95720 + }, + { + "epoch": 0.613982484214307, + "grad_norm": 0.08457744121551514, + "learning_rate": 8.750667868079372e-06, + "loss": 0.0028, + "step": 95730 + }, + { + "epoch": 0.6140466211080932, + "grad_norm": 0.08075898140668869, + "learning_rate": 8.75029772038552e-06, + "loss": 0.0065, + "step": 95740 + }, + { + "epoch": 0.6141107580018792, + "grad_norm": 0.06535733491182327, + "learning_rate": 8.749927525697757e-06, + "loss": 0.0035, + "step": 95750 + }, + { + "epoch": 0.6141748948956653, + "grad_norm": 0.1477227509021759, + "learning_rate": 8.74955728402072e-06, + "loss": 0.0056, + "step": 95760 + }, + { + "epoch": 0.6142390317894514, + "grad_norm": 0.10869273543357849, + "learning_rate": 8.749186995359054e-06, + "loss": 0.0024, + "step": 95770 + }, + { + "epoch": 0.6143031686832375, + "grad_norm": 0.2968710958957672, + "learning_rate": 8.748816659717392e-06, + "loss": 0.0057, + "step": 95780 + }, + { + "epoch": 0.6143673055770236, + "grad_norm": 0.07847687602043152, + "learning_rate": 8.74844627710038e-06, + "loss": 0.0032, + "step": 95790 + }, + { + "epoch": 0.6144314424708097, + "grad_norm": 0.14137952029705048, + "learning_rate": 8.748075847512656e-06, + "loss": 0.0035, + "step": 95800 + }, + { + "epoch": 0.6144955793645958, + "grad_norm": 0.22979572415351868, + "learning_rate": 8.747705370958865e-06, + "loss": 0.0031, + "step": 95810 + }, + { + "epoch": 0.6145597162583819, + "grad_norm": 0.05405260995030403, + "learning_rate": 8.747334847443644e-06, + "loss": 0.0011, + "step": 95820 + }, + { + "epoch": 0.6146238531521679, + "grad_norm": 0.2479952871799469, + "learning_rate": 8.746964276971642e-06, + "loss": 0.004, + "step": 95830 + }, + { + "epoch": 0.6146879900459541, + "grad_norm": 0.08774492889642715, + "learning_rate": 8.7465936595475e-06, + "loss": 0.0053, + "step": 95840 + }, + { + "epoch": 0.6147521269397401, + "grad_norm": 0.16067706048488617, + "learning_rate": 8.74622299517586e-06, + "loss": 0.004, + "step": 95850 + }, + { + "epoch": 0.6148162638335263, + "grad_norm": 0.22178137302398682, + "learning_rate": 8.745852283861368e-06, + "loss": 0.0034, + "step": 95860 + }, + { + "epoch": 0.6148804007273124, + "grad_norm": 0.1768014281988144, + "learning_rate": 8.745481525608671e-06, + "loss": 0.0016, + "step": 95870 + }, + { + "epoch": 0.6149445376210985, + "grad_norm": 0.04831257462501526, + "learning_rate": 8.745110720422414e-06, + "loss": 0.0027, + "step": 95880 + }, + { + "epoch": 0.6150086745148846, + "grad_norm": 0.3030824363231659, + "learning_rate": 8.744739868307241e-06, + "loss": 0.0041, + "step": 95890 + }, + { + "epoch": 0.6150728114086706, + "grad_norm": 0.15502512454986572, + "learning_rate": 8.744368969267804e-06, + "loss": 0.0044, + "step": 95900 + }, + { + "epoch": 0.6151369483024568, + "grad_norm": 0.052240338176488876, + "learning_rate": 8.743998023308747e-06, + "loss": 0.0017, + "step": 95910 + }, + { + "epoch": 0.6152010851962428, + "grad_norm": 0.2639593482017517, + "learning_rate": 8.743627030434718e-06, + "loss": 0.0031, + "step": 95920 + }, + { + "epoch": 0.615265222090029, + "grad_norm": 0.21429011225700378, + "learning_rate": 8.743255990650365e-06, + "loss": 0.0029, + "step": 95930 + }, + { + "epoch": 0.615329358983815, + "grad_norm": 0.28203192353248596, + "learning_rate": 8.742884903960343e-06, + "loss": 0.0023, + "step": 95940 + }, + { + "epoch": 0.6153934958776012, + "grad_norm": 0.0851309522986412, + "learning_rate": 8.742513770369297e-06, + "loss": 0.0018, + "step": 95950 + }, + { + "epoch": 0.6154576327713872, + "grad_norm": 0.09846670180559158, + "learning_rate": 8.742142589881876e-06, + "loss": 0.0022, + "step": 95960 + }, + { + "epoch": 0.6155217696651734, + "grad_norm": 0.059684813022613525, + "learning_rate": 8.741771362502738e-06, + "loss": 0.0039, + "step": 95970 + }, + { + "epoch": 0.6155859065589594, + "grad_norm": 0.04888633266091347, + "learning_rate": 8.741400088236527e-06, + "loss": 0.0025, + "step": 95980 + }, + { + "epoch": 0.6156500434527455, + "grad_norm": 0.22315432131290436, + "learning_rate": 8.741028767087902e-06, + "loss": 0.0033, + "step": 95990 + }, + { + "epoch": 0.6157141803465317, + "grad_norm": 0.08538077026605606, + "learning_rate": 8.740657399061513e-06, + "loss": 0.0025, + "step": 96000 + }, + { + "epoch": 0.6157783172403177, + "grad_norm": 0.320173442363739, + "learning_rate": 8.74028598416201e-06, + "loss": 0.0038, + "step": 96010 + }, + { + "epoch": 0.6158424541341039, + "grad_norm": 0.15246106684207916, + "learning_rate": 8.739914522394052e-06, + "loss": 0.0046, + "step": 96020 + }, + { + "epoch": 0.6159065910278899, + "grad_norm": 0.12204425036907196, + "learning_rate": 8.739543013762292e-06, + "loss": 0.005, + "step": 96030 + }, + { + "epoch": 0.6159707279216761, + "grad_norm": 0.16110706329345703, + "learning_rate": 8.739171458271387e-06, + "loss": 0.0031, + "step": 96040 + }, + { + "epoch": 0.6160348648154621, + "grad_norm": 0.8272030353546143, + "learning_rate": 8.738799855925991e-06, + "loss": 0.0026, + "step": 96050 + }, + { + "epoch": 0.6160990017092483, + "grad_norm": 0.2025410681962967, + "learning_rate": 8.738428206730758e-06, + "loss": 0.0049, + "step": 96060 + }, + { + "epoch": 0.6161631386030343, + "grad_norm": 0.1599372774362564, + "learning_rate": 8.73805651069035e-06, + "loss": 0.0036, + "step": 96070 + }, + { + "epoch": 0.6162272754968204, + "grad_norm": 0.10138299316167831, + "learning_rate": 8.737684767809423e-06, + "loss": 0.0027, + "step": 96080 + }, + { + "epoch": 0.6162914123906065, + "grad_norm": 0.16237998008728027, + "learning_rate": 8.737312978092634e-06, + "loss": 0.0029, + "step": 96090 + }, + { + "epoch": 0.6163555492843926, + "grad_norm": 0.035956889390945435, + "learning_rate": 8.736941141544642e-06, + "loss": 0.0037, + "step": 96100 + }, + { + "epoch": 0.6164196861781787, + "grad_norm": 0.09827184677124023, + "learning_rate": 8.736569258170107e-06, + "loss": 0.004, + "step": 96110 + }, + { + "epoch": 0.6164838230719648, + "grad_norm": 0.25321224331855774, + "learning_rate": 8.736197327973688e-06, + "loss": 0.0025, + "step": 96120 + }, + { + "epoch": 0.6165479599657508, + "grad_norm": 0.08422735333442688, + "learning_rate": 8.735825350960045e-06, + "loss": 0.0041, + "step": 96130 + }, + { + "epoch": 0.616612096859537, + "grad_norm": 0.12227088958024979, + "learning_rate": 8.735453327133842e-06, + "loss": 0.0024, + "step": 96140 + }, + { + "epoch": 0.6166762337533231, + "grad_norm": 0.06718363612890244, + "learning_rate": 8.73508125649974e-06, + "loss": 0.0029, + "step": 96150 + }, + { + "epoch": 0.6167403706471092, + "grad_norm": 0.038292769342660904, + "learning_rate": 8.7347091390624e-06, + "loss": 0.0016, + "step": 96160 + }, + { + "epoch": 0.6168045075408953, + "grad_norm": 0.08811763674020767, + "learning_rate": 8.734336974826486e-06, + "loss": 0.0028, + "step": 96170 + }, + { + "epoch": 0.6168686444346814, + "grad_norm": 0.1309226006269455, + "learning_rate": 8.733964763796659e-06, + "loss": 0.0032, + "step": 96180 + }, + { + "epoch": 0.6169327813284675, + "grad_norm": 0.13165056705474854, + "learning_rate": 8.733592505977586e-06, + "loss": 0.0039, + "step": 96190 + }, + { + "epoch": 0.6169969182222536, + "grad_norm": 0.06533897668123245, + "learning_rate": 8.73322020137393e-06, + "loss": 0.0032, + "step": 96200 + }, + { + "epoch": 0.6170610551160397, + "grad_norm": 0.20202882587909698, + "learning_rate": 8.732847849990358e-06, + "loss": 0.0028, + "step": 96210 + }, + { + "epoch": 0.6171251920098257, + "grad_norm": 0.1397777944803238, + "learning_rate": 8.732475451831533e-06, + "loss": 0.0039, + "step": 96220 + }, + { + "epoch": 0.6171893289036119, + "grad_norm": 0.1611679047346115, + "learning_rate": 8.732103006902125e-06, + "loss": 0.0037, + "step": 96230 + }, + { + "epoch": 0.6172534657973979, + "grad_norm": 0.18416641652584076, + "learning_rate": 8.731730515206796e-06, + "loss": 0.0033, + "step": 96240 + }, + { + "epoch": 0.6173176026911841, + "grad_norm": 0.04296709969639778, + "learning_rate": 8.731357976750219e-06, + "loss": 0.0032, + "step": 96250 + }, + { + "epoch": 0.6173817395849701, + "grad_norm": 0.11918571591377258, + "learning_rate": 8.730985391537059e-06, + "loss": 0.0022, + "step": 96260 + }, + { + "epoch": 0.6174458764787563, + "grad_norm": 0.4370180666446686, + "learning_rate": 8.730612759571986e-06, + "loss": 0.0044, + "step": 96270 + }, + { + "epoch": 0.6175100133725423, + "grad_norm": 0.05504896864295006, + "learning_rate": 8.730240080859667e-06, + "loss": 0.0018, + "step": 96280 + }, + { + "epoch": 0.6175741502663284, + "grad_norm": 0.2740112245082855, + "learning_rate": 8.729867355404776e-06, + "loss": 0.0033, + "step": 96290 + }, + { + "epoch": 0.6176382871601146, + "grad_norm": 0.1244712620973587, + "learning_rate": 8.729494583211981e-06, + "loss": 0.0025, + "step": 96300 + }, + { + "epoch": 0.6177024240539006, + "grad_norm": 0.04363995045423508, + "learning_rate": 8.729121764285953e-06, + "loss": 0.0025, + "step": 96310 + }, + { + "epoch": 0.6177665609476868, + "grad_norm": 0.1507520228624344, + "learning_rate": 8.728748898631365e-06, + "loss": 0.0031, + "step": 96320 + }, + { + "epoch": 0.6178306978414728, + "grad_norm": 0.08464416861534119, + "learning_rate": 8.728375986252888e-06, + "loss": 0.0046, + "step": 96330 + }, + { + "epoch": 0.617894834735259, + "grad_norm": 0.1334334909915924, + "learning_rate": 8.728003027155194e-06, + "loss": 0.0038, + "step": 96340 + }, + { + "epoch": 0.617958971629045, + "grad_norm": 0.2237366884946823, + "learning_rate": 8.727630021342958e-06, + "loss": 0.0046, + "step": 96350 + }, + { + "epoch": 0.6180231085228312, + "grad_norm": 0.1561523973941803, + "learning_rate": 8.727256968820855e-06, + "loss": 0.0032, + "step": 96360 + }, + { + "epoch": 0.6180872454166172, + "grad_norm": 0.13996556401252747, + "learning_rate": 8.72688386959356e-06, + "loss": 0.0071, + "step": 96370 + }, + { + "epoch": 0.6181513823104033, + "grad_norm": 0.06766559183597565, + "learning_rate": 8.726510723665742e-06, + "loss": 0.0022, + "step": 96380 + }, + { + "epoch": 0.6182155192041894, + "grad_norm": 0.3309437334537506, + "learning_rate": 8.726137531042084e-06, + "loss": 0.004, + "step": 96390 + }, + { + "epoch": 0.6182796560979755, + "grad_norm": 0.08681367337703705, + "learning_rate": 8.72576429172726e-06, + "loss": 0.003, + "step": 96400 + }, + { + "epoch": 0.6183437929917616, + "grad_norm": 0.13890603184700012, + "learning_rate": 8.725391005725944e-06, + "loss": 0.0049, + "step": 96410 + }, + { + "epoch": 0.6184079298855477, + "grad_norm": 0.0536317341029644, + "learning_rate": 8.725017673042819e-06, + "loss": 0.0026, + "step": 96420 + }, + { + "epoch": 0.6184720667793339, + "grad_norm": 0.1660311073064804, + "learning_rate": 8.72464429368256e-06, + "loss": 0.0033, + "step": 96430 + }, + { + "epoch": 0.6185362036731199, + "grad_norm": 0.07175929844379425, + "learning_rate": 8.724270867649846e-06, + "loss": 0.0018, + "step": 96440 + }, + { + "epoch": 0.618600340566906, + "grad_norm": 0.08966167271137238, + "learning_rate": 8.723897394949357e-06, + "loss": 0.0045, + "step": 96450 + }, + { + "epoch": 0.6186644774606921, + "grad_norm": 0.1288372278213501, + "learning_rate": 8.72352387558577e-06, + "loss": 0.0019, + "step": 96460 + }, + { + "epoch": 0.6187286143544782, + "grad_norm": 0.10833292454481125, + "learning_rate": 8.723150309563769e-06, + "loss": 0.0029, + "step": 96470 + }, + { + "epoch": 0.6187927512482643, + "grad_norm": 0.011052173562347889, + "learning_rate": 8.722776696888033e-06, + "loss": 0.0031, + "step": 96480 + }, + { + "epoch": 0.6188568881420504, + "grad_norm": 0.11594557017087936, + "learning_rate": 8.722403037563244e-06, + "loss": 0.0024, + "step": 96490 + }, + { + "epoch": 0.6189210250358365, + "grad_norm": 0.1987140029668808, + "learning_rate": 8.722029331594086e-06, + "loss": 0.0033, + "step": 96500 + }, + { + "epoch": 0.6189851619296226, + "grad_norm": 0.05850119888782501, + "learning_rate": 8.72165557898524e-06, + "loss": 0.0028, + "step": 96510 + }, + { + "epoch": 0.6190492988234086, + "grad_norm": 0.10307785868644714, + "learning_rate": 8.721281779741391e-06, + "loss": 0.003, + "step": 96520 + }, + { + "epoch": 0.6191134357171948, + "grad_norm": 0.046492837369441986, + "learning_rate": 8.72090793386722e-06, + "loss": 0.0067, + "step": 96530 + }, + { + "epoch": 0.6191775726109808, + "grad_norm": 0.3101746439933777, + "learning_rate": 8.720534041367414e-06, + "loss": 0.0026, + "step": 96540 + }, + { + "epoch": 0.619241709504767, + "grad_norm": 0.11407823860645294, + "learning_rate": 8.720160102246657e-06, + "loss": 0.003, + "step": 96550 + }, + { + "epoch": 0.619305846398553, + "grad_norm": 0.2032564878463745, + "learning_rate": 8.719786116509633e-06, + "loss": 0.0052, + "step": 96560 + }, + { + "epoch": 0.6193699832923392, + "grad_norm": 0.2516917586326599, + "learning_rate": 8.719412084161034e-06, + "loss": 0.0031, + "step": 96570 + }, + { + "epoch": 0.6194341201861253, + "grad_norm": 0.2087072730064392, + "learning_rate": 8.719038005205542e-06, + "loss": 0.0028, + "step": 96580 + }, + { + "epoch": 0.6194982570799114, + "grad_norm": 0.06007470190525055, + "learning_rate": 8.718663879647846e-06, + "loss": 0.006, + "step": 96590 + }, + { + "epoch": 0.6195623939736975, + "grad_norm": 0.05991493910551071, + "learning_rate": 8.718289707492634e-06, + "loss": 0.0053, + "step": 96600 + }, + { + "epoch": 0.6196265308674835, + "grad_norm": 0.2905253767967224, + "learning_rate": 8.717915488744595e-06, + "loss": 0.0043, + "step": 96610 + }, + { + "epoch": 0.6196906677612697, + "grad_norm": 0.16421377658843994, + "learning_rate": 8.717541223408417e-06, + "loss": 0.0042, + "step": 96620 + }, + { + "epoch": 0.6197548046550557, + "grad_norm": 0.14254648983478546, + "learning_rate": 8.71716691148879e-06, + "loss": 0.0034, + "step": 96630 + }, + { + "epoch": 0.6198189415488419, + "grad_norm": 0.1617589294910431, + "learning_rate": 8.716792552990405e-06, + "loss": 0.004, + "step": 96640 + }, + { + "epoch": 0.6198830784426279, + "grad_norm": 0.2060152143239975, + "learning_rate": 8.716418147917954e-06, + "loss": 0.0028, + "step": 96650 + }, + { + "epoch": 0.6199472153364141, + "grad_norm": 0.14975200593471527, + "learning_rate": 8.716043696276128e-06, + "loss": 0.0029, + "step": 96660 + }, + { + "epoch": 0.6200113522302001, + "grad_norm": 0.06567097455263138, + "learning_rate": 8.715669198069617e-06, + "loss": 0.0035, + "step": 96670 + }, + { + "epoch": 0.6200754891239862, + "grad_norm": 0.11589567363262177, + "learning_rate": 8.715294653303117e-06, + "loss": 0.0022, + "step": 96680 + }, + { + "epoch": 0.6201396260177723, + "grad_norm": 0.22879017889499664, + "learning_rate": 8.714920061981317e-06, + "loss": 0.0026, + "step": 96690 + }, + { + "epoch": 0.6202037629115584, + "grad_norm": 0.2359607070684433, + "learning_rate": 8.714545424108918e-06, + "loss": 0.0032, + "step": 96700 + }, + { + "epoch": 0.6202678998053446, + "grad_norm": 0.10882429778575897, + "learning_rate": 8.714170739690606e-06, + "loss": 0.002, + "step": 96710 + }, + { + "epoch": 0.6203320366991306, + "grad_norm": 0.48844268918037415, + "learning_rate": 8.713796008731084e-06, + "loss": 0.0058, + "step": 96720 + }, + { + "epoch": 0.6203961735929168, + "grad_norm": 0.2939786911010742, + "learning_rate": 8.713421231235039e-06, + "loss": 0.0023, + "step": 96730 + }, + { + "epoch": 0.6204603104867028, + "grad_norm": 0.03793249651789665, + "learning_rate": 8.713046407207176e-06, + "loss": 0.0029, + "step": 96740 + }, + { + "epoch": 0.620524447380489, + "grad_norm": 0.0540674589574337, + "learning_rate": 8.712671536652187e-06, + "loss": 0.0061, + "step": 96750 + }, + { + "epoch": 0.620588584274275, + "grad_norm": 0.11687786132097244, + "learning_rate": 8.712296619574769e-06, + "loss": 0.0026, + "step": 96760 + }, + { + "epoch": 0.6206527211680611, + "grad_norm": 0.14689037203788757, + "learning_rate": 8.711921655979622e-06, + "loss": 0.0026, + "step": 96770 + }, + { + "epoch": 0.6207168580618472, + "grad_norm": 0.15522897243499756, + "learning_rate": 8.711546645871444e-06, + "loss": 0.0032, + "step": 96780 + }, + { + "epoch": 0.6207809949556333, + "grad_norm": 0.2766576111316681, + "learning_rate": 8.711171589254934e-06, + "loss": 0.0025, + "step": 96790 + }, + { + "epoch": 0.6208451318494194, + "grad_norm": 0.15739668905735016, + "learning_rate": 8.71079648613479e-06, + "loss": 0.0047, + "step": 96800 + }, + { + "epoch": 0.6209092687432055, + "grad_norm": 0.14163656532764435, + "learning_rate": 8.710421336515715e-06, + "loss": 0.0037, + "step": 96810 + }, + { + "epoch": 0.6209734056369915, + "grad_norm": 0.30086272954940796, + "learning_rate": 8.71004614040241e-06, + "loss": 0.0042, + "step": 96820 + }, + { + "epoch": 0.6210375425307777, + "grad_norm": 0.029013173654675484, + "learning_rate": 8.709670897799574e-06, + "loss": 0.0018, + "step": 96830 + }, + { + "epoch": 0.6211016794245637, + "grad_norm": 0.11647813767194748, + "learning_rate": 8.709295608711912e-06, + "loss": 0.0055, + "step": 96840 + }, + { + "epoch": 0.6211658163183499, + "grad_norm": 0.14268749952316284, + "learning_rate": 8.708920273144124e-06, + "loss": 0.0021, + "step": 96850 + }, + { + "epoch": 0.621229953212136, + "grad_norm": 0.37514549493789673, + "learning_rate": 8.708544891100914e-06, + "loss": 0.0041, + "step": 96860 + }, + { + "epoch": 0.6212940901059221, + "grad_norm": 0.2563324570655823, + "learning_rate": 8.708169462586986e-06, + "loss": 0.0035, + "step": 96870 + }, + { + "epoch": 0.6213582269997082, + "grad_norm": 0.5971559882164001, + "learning_rate": 8.707793987607044e-06, + "loss": 0.0059, + "step": 96880 + }, + { + "epoch": 0.6214223638934943, + "grad_norm": 0.17791274189949036, + "learning_rate": 8.707418466165795e-06, + "loss": 0.0029, + "step": 96890 + }, + { + "epoch": 0.6214865007872804, + "grad_norm": 0.08535439521074295, + "learning_rate": 8.70704289826794e-06, + "loss": 0.0031, + "step": 96900 + }, + { + "epoch": 0.6215506376810664, + "grad_norm": 0.28252682089805603, + "learning_rate": 8.70666728391819e-06, + "loss": 0.0031, + "step": 96910 + }, + { + "epoch": 0.6216147745748526, + "grad_norm": 0.07908795028924942, + "learning_rate": 8.706291623121252e-06, + "loss": 0.0027, + "step": 96920 + }, + { + "epoch": 0.6216789114686386, + "grad_norm": 0.06172311678528786, + "learning_rate": 8.705915915881828e-06, + "loss": 0.0027, + "step": 96930 + }, + { + "epoch": 0.6217430483624248, + "grad_norm": 0.0566222220659256, + "learning_rate": 8.70554016220463e-06, + "loss": 0.0024, + "step": 96940 + }, + { + "epoch": 0.6218071852562108, + "grad_norm": 0.3074969947338104, + "learning_rate": 8.705164362094366e-06, + "loss": 0.0025, + "step": 96950 + }, + { + "epoch": 0.621871322149997, + "grad_norm": 0.007374696433544159, + "learning_rate": 8.704788515555745e-06, + "loss": 0.0025, + "step": 96960 + }, + { + "epoch": 0.621935459043783, + "grad_norm": 0.13997507095336914, + "learning_rate": 8.704412622593474e-06, + "loss": 0.0022, + "step": 96970 + }, + { + "epoch": 0.6219995959375691, + "grad_norm": 0.16851702332496643, + "learning_rate": 8.704036683212268e-06, + "loss": 0.0033, + "step": 96980 + }, + { + "epoch": 0.6220637328313553, + "grad_norm": 0.3087330460548401, + "learning_rate": 8.703660697416832e-06, + "loss": 0.0028, + "step": 96990 + }, + { + "epoch": 0.6221278697251413, + "grad_norm": 0.6273163557052612, + "learning_rate": 8.703284665211882e-06, + "loss": 0.0024, + "step": 97000 + }, + { + "epoch": 0.6221920066189275, + "grad_norm": 0.3395378887653351, + "learning_rate": 8.702908586602128e-06, + "loss": 0.0053, + "step": 97010 + }, + { + "epoch": 0.6222561435127135, + "grad_norm": 0.026514574885368347, + "learning_rate": 8.702532461592283e-06, + "loss": 0.0027, + "step": 97020 + }, + { + "epoch": 0.6223202804064997, + "grad_norm": 0.051178060472011566, + "learning_rate": 8.702156290187061e-06, + "loss": 0.0027, + "step": 97030 + }, + { + "epoch": 0.6223844173002857, + "grad_norm": 0.24971944093704224, + "learning_rate": 8.701780072391175e-06, + "loss": 0.0025, + "step": 97040 + }, + { + "epoch": 0.6224485541940719, + "grad_norm": 0.11027555167675018, + "learning_rate": 8.701403808209339e-06, + "loss": 0.0021, + "step": 97050 + }, + { + "epoch": 0.6225126910878579, + "grad_norm": 0.15564948320388794, + "learning_rate": 8.701027497646267e-06, + "loss": 0.0018, + "step": 97060 + }, + { + "epoch": 0.622576827981644, + "grad_norm": 0.21082234382629395, + "learning_rate": 8.700651140706676e-06, + "loss": 0.0019, + "step": 97070 + }, + { + "epoch": 0.6226409648754301, + "grad_norm": 0.09402766078710556, + "learning_rate": 8.700274737395282e-06, + "loss": 0.0035, + "step": 97080 + }, + { + "epoch": 0.6227051017692162, + "grad_norm": 0.09840180724859238, + "learning_rate": 8.6998982877168e-06, + "loss": 0.0018, + "step": 97090 + }, + { + "epoch": 0.6227692386630023, + "grad_norm": 0.06667862832546234, + "learning_rate": 8.699521791675947e-06, + "loss": 0.0019, + "step": 97100 + }, + { + "epoch": 0.6228333755567884, + "grad_norm": 0.11117648333311081, + "learning_rate": 8.699145249277444e-06, + "loss": 0.004, + "step": 97110 + }, + { + "epoch": 0.6228975124505745, + "grad_norm": 0.2256828397512436, + "learning_rate": 8.698768660526007e-06, + "loss": 0.0045, + "step": 97120 + }, + { + "epoch": 0.6229616493443606, + "grad_norm": 0.07285215705633163, + "learning_rate": 8.698392025426355e-06, + "loss": 0.0032, + "step": 97130 + }, + { + "epoch": 0.6230257862381468, + "grad_norm": 0.3315751254558563, + "learning_rate": 8.698015343983205e-06, + "loss": 0.0026, + "step": 97140 + }, + { + "epoch": 0.6230899231319328, + "grad_norm": 0.15173321962356567, + "learning_rate": 8.697638616201284e-06, + "loss": 0.0013, + "step": 97150 + }, + { + "epoch": 0.6231540600257189, + "grad_norm": 0.1298132687807083, + "learning_rate": 8.697261842085306e-06, + "loss": 0.0039, + "step": 97160 + }, + { + "epoch": 0.623218196919505, + "grad_norm": 0.19186744093894958, + "learning_rate": 8.696885021639996e-06, + "loss": 0.003, + "step": 97170 + }, + { + "epoch": 0.6232823338132911, + "grad_norm": 0.3175899386405945, + "learning_rate": 8.696508154870073e-06, + "loss": 0.0036, + "step": 97180 + }, + { + "epoch": 0.6233464707070772, + "grad_norm": 0.2632112205028534, + "learning_rate": 8.696131241780264e-06, + "loss": 0.0036, + "step": 97190 + }, + { + "epoch": 0.6234106076008633, + "grad_norm": 0.21239538490772247, + "learning_rate": 8.695754282375285e-06, + "loss": 0.0036, + "step": 97200 + }, + { + "epoch": 0.6234747444946493, + "grad_norm": 0.23316553235054016, + "learning_rate": 8.695377276659867e-06, + "loss": 0.0037, + "step": 97210 + }, + { + "epoch": 0.6235388813884355, + "grad_norm": 0.0754329115152359, + "learning_rate": 8.695000224638729e-06, + "loss": 0.0048, + "step": 97220 + }, + { + "epoch": 0.6236030182822215, + "grad_norm": 0.0366726852953434, + "learning_rate": 8.694623126316596e-06, + "loss": 0.0026, + "step": 97230 + }, + { + "epoch": 0.6236671551760077, + "grad_norm": 0.2261466383934021, + "learning_rate": 8.694245981698198e-06, + "loss": 0.0031, + "step": 97240 + }, + { + "epoch": 0.6237312920697937, + "grad_norm": 0.14822643995285034, + "learning_rate": 8.693868790788256e-06, + "loss": 0.002, + "step": 97250 + }, + { + "epoch": 0.6237954289635799, + "grad_norm": 0.035959940403699875, + "learning_rate": 8.693491553591498e-06, + "loss": 0.0046, + "step": 97260 + }, + { + "epoch": 0.623859565857366, + "grad_norm": 0.12748566269874573, + "learning_rate": 8.69311427011265e-06, + "loss": 0.0024, + "step": 97270 + }, + { + "epoch": 0.623923702751152, + "grad_norm": 0.08464737236499786, + "learning_rate": 8.692736940356443e-06, + "loss": 0.0021, + "step": 97280 + }, + { + "epoch": 0.6239878396449382, + "grad_norm": 0.028743699193000793, + "learning_rate": 8.692359564327601e-06, + "loss": 0.0044, + "step": 97290 + }, + { + "epoch": 0.6240519765387242, + "grad_norm": 0.07888203114271164, + "learning_rate": 8.691982142030857e-06, + "loss": 0.0031, + "step": 97300 + }, + { + "epoch": 0.6241161134325104, + "grad_norm": 0.08890816569328308, + "learning_rate": 8.691604673470935e-06, + "loss": 0.0025, + "step": 97310 + }, + { + "epoch": 0.6241802503262964, + "grad_norm": 0.28032347559928894, + "learning_rate": 8.69122715865257e-06, + "loss": 0.0018, + "step": 97320 + }, + { + "epoch": 0.6242443872200826, + "grad_norm": 0.3093320429325104, + "learning_rate": 8.690849597580491e-06, + "loss": 0.0029, + "step": 97330 + }, + { + "epoch": 0.6243085241138686, + "grad_norm": 0.48800718784332275, + "learning_rate": 8.690471990259429e-06, + "loss": 0.0025, + "step": 97340 + }, + { + "epoch": 0.6243726610076548, + "grad_norm": 0.15695936977863312, + "learning_rate": 8.690094336694115e-06, + "loss": 0.0038, + "step": 97350 + }, + { + "epoch": 0.6244367979014408, + "grad_norm": 0.09537257999181747, + "learning_rate": 8.68971663688928e-06, + "loss": 0.0018, + "step": 97360 + }, + { + "epoch": 0.624500934795227, + "grad_norm": 0.007126423995941877, + "learning_rate": 8.68933889084966e-06, + "loss": 0.0036, + "step": 97370 + }, + { + "epoch": 0.624565071689013, + "grad_norm": 0.20154450833797455, + "learning_rate": 8.688961098579987e-06, + "loss": 0.0055, + "step": 97380 + }, + { + "epoch": 0.6246292085827991, + "grad_norm": 0.012513830326497555, + "learning_rate": 8.688583260084995e-06, + "loss": 0.0069, + "step": 97390 + }, + { + "epoch": 0.6246933454765852, + "grad_norm": 0.17888513207435608, + "learning_rate": 8.688205375369418e-06, + "loss": 0.0017, + "step": 97400 + }, + { + "epoch": 0.6247574823703713, + "grad_norm": 0.1948905736207962, + "learning_rate": 8.687827444437993e-06, + "loss": 0.0016, + "step": 97410 + }, + { + "epoch": 0.6248216192641575, + "grad_norm": 0.12320704758167267, + "learning_rate": 8.687449467295453e-06, + "loss": 0.0023, + "step": 97420 + }, + { + "epoch": 0.6248857561579435, + "grad_norm": 0.06823863089084625, + "learning_rate": 8.687071443946538e-06, + "loss": 0.0021, + "step": 97430 + }, + { + "epoch": 0.6249498930517297, + "grad_norm": 0.14087337255477905, + "learning_rate": 8.68669337439598e-06, + "loss": 0.003, + "step": 97440 + }, + { + "epoch": 0.6250140299455157, + "grad_norm": 0.08630002290010452, + "learning_rate": 8.68631525864852e-06, + "loss": 0.0026, + "step": 97450 + }, + { + "epoch": 0.6250781668393018, + "grad_norm": 0.05025516450405121, + "learning_rate": 8.685937096708896e-06, + "loss": 0.0038, + "step": 97460 + }, + { + "epoch": 0.6251423037330879, + "grad_norm": 0.09566188603639603, + "learning_rate": 8.685558888581845e-06, + "loss": 0.0028, + "step": 97470 + }, + { + "epoch": 0.625206440626874, + "grad_norm": 0.11923102289438248, + "learning_rate": 8.685180634272108e-06, + "loss": 0.0023, + "step": 97480 + }, + { + "epoch": 0.6252705775206601, + "grad_norm": 0.09973512589931488, + "learning_rate": 8.684802333784423e-06, + "loss": 0.0033, + "step": 97490 + }, + { + "epoch": 0.6253347144144462, + "grad_norm": 0.29155007004737854, + "learning_rate": 8.684423987123532e-06, + "loss": 0.0042, + "step": 97500 + }, + { + "epoch": 0.6253988513082323, + "grad_norm": 0.1789742112159729, + "learning_rate": 8.684045594294176e-06, + "loss": 0.0039, + "step": 97510 + }, + { + "epoch": 0.6254629882020184, + "grad_norm": 0.15239335596561432, + "learning_rate": 8.683667155301093e-06, + "loss": 0.0044, + "step": 97520 + }, + { + "epoch": 0.6255271250958044, + "grad_norm": 0.15978781878948212, + "learning_rate": 8.68328867014903e-06, + "loss": 0.0024, + "step": 97530 + }, + { + "epoch": 0.6255912619895906, + "grad_norm": 0.07719306647777557, + "learning_rate": 8.682910138842725e-06, + "loss": 0.0027, + "step": 97540 + }, + { + "epoch": 0.6256553988833767, + "grad_norm": 0.0992724671959877, + "learning_rate": 8.682531561386927e-06, + "loss": 0.0038, + "step": 97550 + }, + { + "epoch": 0.6257195357771628, + "grad_norm": 0.026066681370139122, + "learning_rate": 8.682152937786376e-06, + "loss": 0.0042, + "step": 97560 + }, + { + "epoch": 0.6257836726709489, + "grad_norm": 0.04518354311585426, + "learning_rate": 8.681774268045817e-06, + "loss": 0.0032, + "step": 97570 + }, + { + "epoch": 0.625847809564735, + "grad_norm": 0.06145719811320305, + "learning_rate": 8.681395552169993e-06, + "loss": 0.002, + "step": 97580 + }, + { + "epoch": 0.6259119464585211, + "grad_norm": 0.2337571531534195, + "learning_rate": 8.681016790163654e-06, + "loss": 0.0027, + "step": 97590 + }, + { + "epoch": 0.6259760833523071, + "grad_norm": 0.10886605829000473, + "learning_rate": 8.680637982031543e-06, + "loss": 0.007, + "step": 97600 + }, + { + "epoch": 0.6260402202460933, + "grad_norm": 0.13759419322013855, + "learning_rate": 8.680259127778408e-06, + "loss": 0.0037, + "step": 97610 + }, + { + "epoch": 0.6261043571398793, + "grad_norm": 0.11640704423189163, + "learning_rate": 8.679880227408997e-06, + "loss": 0.0075, + "step": 97620 + }, + { + "epoch": 0.6261684940336655, + "grad_norm": 0.08335297554731369, + "learning_rate": 8.679501280928055e-06, + "loss": 0.0031, + "step": 97630 + }, + { + "epoch": 0.6262326309274515, + "grad_norm": 0.7324811816215515, + "learning_rate": 8.679122288340332e-06, + "loss": 0.0041, + "step": 97640 + }, + { + "epoch": 0.6262967678212377, + "grad_norm": 0.2840883433818817, + "learning_rate": 8.678743249650579e-06, + "loss": 0.0025, + "step": 97650 + }, + { + "epoch": 0.6263609047150237, + "grad_norm": 0.08255748450756073, + "learning_rate": 8.678364164863541e-06, + "loss": 0.0021, + "step": 97660 + }, + { + "epoch": 0.6264250416088099, + "grad_norm": 0.13584935665130615, + "learning_rate": 8.677985033983974e-06, + "loss": 0.0023, + "step": 97670 + }, + { + "epoch": 0.6264891785025959, + "grad_norm": 0.2118493914604187, + "learning_rate": 8.677605857016625e-06, + "loss": 0.0028, + "step": 97680 + }, + { + "epoch": 0.626553315396382, + "grad_norm": 0.16938568651676178, + "learning_rate": 8.677226633966248e-06, + "loss": 0.0031, + "step": 97690 + }, + { + "epoch": 0.6266174522901682, + "grad_norm": 0.2965881824493408, + "learning_rate": 8.67684736483759e-06, + "loss": 0.0034, + "step": 97700 + }, + { + "epoch": 0.6266815891839542, + "grad_norm": 0.1577543169260025, + "learning_rate": 8.676468049635409e-06, + "loss": 0.0034, + "step": 97710 + }, + { + "epoch": 0.6267457260777404, + "grad_norm": 0.15394441783428192, + "learning_rate": 8.676088688364454e-06, + "loss": 0.0032, + "step": 97720 + }, + { + "epoch": 0.6268098629715264, + "grad_norm": 0.10031639784574509, + "learning_rate": 8.675709281029483e-06, + "loss": 0.0021, + "step": 97730 + }, + { + "epoch": 0.6268739998653126, + "grad_norm": 0.29562628269195557, + "learning_rate": 8.675329827635246e-06, + "loss": 0.0023, + "step": 97740 + }, + { + "epoch": 0.6269381367590986, + "grad_norm": 0.06728474795818329, + "learning_rate": 8.674950328186499e-06, + "loss": 0.0028, + "step": 97750 + }, + { + "epoch": 0.6270022736528847, + "grad_norm": 0.10657131671905518, + "learning_rate": 8.674570782687999e-06, + "loss": 0.003, + "step": 97760 + }, + { + "epoch": 0.6270664105466708, + "grad_norm": 0.026148566976189613, + "learning_rate": 8.6741911911445e-06, + "loss": 0.0029, + "step": 97770 + }, + { + "epoch": 0.6271305474404569, + "grad_norm": 0.36615893244743347, + "learning_rate": 8.673811553560761e-06, + "loss": 0.0033, + "step": 97780 + }, + { + "epoch": 0.627194684334243, + "grad_norm": 0.20040257275104523, + "learning_rate": 8.673431869941535e-06, + "loss": 0.0028, + "step": 97790 + }, + { + "epoch": 0.6272588212280291, + "grad_norm": 0.034277066588401794, + "learning_rate": 8.673052140291584e-06, + "loss": 0.0033, + "step": 97800 + }, + { + "epoch": 0.6273229581218152, + "grad_norm": 0.06177086755633354, + "learning_rate": 8.672672364615665e-06, + "loss": 0.0018, + "step": 97810 + }, + { + "epoch": 0.6273870950156013, + "grad_norm": 0.16230791807174683, + "learning_rate": 8.672292542918537e-06, + "loss": 0.0032, + "step": 97820 + }, + { + "epoch": 0.6274512319093873, + "grad_norm": 0.16837529838085175, + "learning_rate": 8.671912675204957e-06, + "loss": 0.0036, + "step": 97830 + }, + { + "epoch": 0.6275153688031735, + "grad_norm": 0.06041654571890831, + "learning_rate": 8.671532761479688e-06, + "loss": 0.0044, + "step": 97840 + }, + { + "epoch": 0.6275795056969596, + "grad_norm": 0.10664664208889008, + "learning_rate": 8.671152801747489e-06, + "loss": 0.0031, + "step": 97850 + }, + { + "epoch": 0.6276436425907457, + "grad_norm": 0.08125737309455872, + "learning_rate": 8.670772796013122e-06, + "loss": 0.0024, + "step": 97860 + }, + { + "epoch": 0.6277077794845318, + "grad_norm": 0.22814540565013885, + "learning_rate": 8.670392744281348e-06, + "loss": 0.0057, + "step": 97870 + }, + { + "epoch": 0.6277719163783179, + "grad_norm": 0.14703914523124695, + "learning_rate": 8.670012646556931e-06, + "loss": 0.0026, + "step": 97880 + }, + { + "epoch": 0.627836053272104, + "grad_norm": 0.0672401413321495, + "learning_rate": 8.669632502844631e-06, + "loss": 0.0017, + "step": 97890 + }, + { + "epoch": 0.62790019016589, + "grad_norm": 0.09100368618965149, + "learning_rate": 8.669252313149214e-06, + "loss": 0.0039, + "step": 97900 + }, + { + "epoch": 0.6279643270596762, + "grad_norm": 0.05269211530685425, + "learning_rate": 8.668872077475443e-06, + "loss": 0.0021, + "step": 97910 + }, + { + "epoch": 0.6280284639534622, + "grad_norm": 0.12907226383686066, + "learning_rate": 8.668491795828082e-06, + "loss": 0.0037, + "step": 97920 + }, + { + "epoch": 0.6280926008472484, + "grad_norm": 0.41889700293540955, + "learning_rate": 8.668111468211898e-06, + "loss": 0.0015, + "step": 97930 + }, + { + "epoch": 0.6281567377410344, + "grad_norm": 0.09742482751607895, + "learning_rate": 8.667731094631656e-06, + "loss": 0.0027, + "step": 97940 + }, + { + "epoch": 0.6282208746348206, + "grad_norm": 0.15996062755584717, + "learning_rate": 8.667350675092121e-06, + "loss": 0.0041, + "step": 97950 + }, + { + "epoch": 0.6282850115286066, + "grad_norm": 0.06355822086334229, + "learning_rate": 8.666970209598062e-06, + "loss": 0.0018, + "step": 97960 + }, + { + "epoch": 0.6283491484223928, + "grad_norm": 0.11760903149843216, + "learning_rate": 8.666589698154245e-06, + "loss": 0.0033, + "step": 97970 + }, + { + "epoch": 0.6284132853161789, + "grad_norm": 0.05329267680644989, + "learning_rate": 8.666209140765437e-06, + "loss": 0.0022, + "step": 97980 + }, + { + "epoch": 0.6284774222099649, + "grad_norm": 0.2392531931400299, + "learning_rate": 8.665828537436412e-06, + "loss": 0.0041, + "step": 97990 + }, + { + "epoch": 0.6285415591037511, + "grad_norm": 0.09396185725927353, + "learning_rate": 8.665447888171933e-06, + "loss": 0.0059, + "step": 98000 + }, + { + "epoch": 0.6286056959975371, + "grad_norm": 0.24304385483264923, + "learning_rate": 8.665067192976773e-06, + "loss": 0.0023, + "step": 98010 + }, + { + "epoch": 0.6286698328913233, + "grad_norm": 0.15212446451187134, + "learning_rate": 8.664686451855701e-06, + "loss": 0.0027, + "step": 98020 + }, + { + "epoch": 0.6287339697851093, + "grad_norm": 0.20396752655506134, + "learning_rate": 8.664305664813488e-06, + "loss": 0.0037, + "step": 98030 + }, + { + "epoch": 0.6287981066788955, + "grad_norm": 0.12427476048469543, + "learning_rate": 8.663924831854909e-06, + "loss": 0.003, + "step": 98040 + }, + { + "epoch": 0.6288622435726815, + "grad_norm": 0.3079765737056732, + "learning_rate": 8.66354395298473e-06, + "loss": 0.003, + "step": 98050 + }, + { + "epoch": 0.6289263804664676, + "grad_norm": 0.15311117470264435, + "learning_rate": 8.663163028207728e-06, + "loss": 0.0045, + "step": 98060 + }, + { + "epoch": 0.6289905173602537, + "grad_norm": 0.12783585488796234, + "learning_rate": 8.662782057528677e-06, + "loss": 0.0037, + "step": 98070 + }, + { + "epoch": 0.6290546542540398, + "grad_norm": 0.19281822443008423, + "learning_rate": 8.662401040952349e-06, + "loss": 0.0023, + "step": 98080 + }, + { + "epoch": 0.6291187911478259, + "grad_norm": 0.09296832233667374, + "learning_rate": 8.662019978483516e-06, + "loss": 0.0023, + "step": 98090 + }, + { + "epoch": 0.629182928041612, + "grad_norm": 0.29387709498405457, + "learning_rate": 8.661638870126956e-06, + "loss": 0.0059, + "step": 98100 + }, + { + "epoch": 0.6292470649353981, + "grad_norm": 0.09903071820735931, + "learning_rate": 8.661257715887446e-06, + "loss": 0.0023, + "step": 98110 + }, + { + "epoch": 0.6293112018291842, + "grad_norm": 0.11103134602308273, + "learning_rate": 8.660876515769759e-06, + "loss": 0.0018, + "step": 98120 + }, + { + "epoch": 0.6293753387229704, + "grad_norm": 0.08121154457330704, + "learning_rate": 8.660495269778673e-06, + "loss": 0.0027, + "step": 98130 + }, + { + "epoch": 0.6294394756167564, + "grad_norm": 0.12221727520227432, + "learning_rate": 8.660113977918964e-06, + "loss": 0.0026, + "step": 98140 + }, + { + "epoch": 0.6295036125105425, + "grad_norm": 0.36664316058158875, + "learning_rate": 8.659732640195411e-06, + "loss": 0.0041, + "step": 98150 + }, + { + "epoch": 0.6295677494043286, + "grad_norm": 0.16236406564712524, + "learning_rate": 8.659351256612795e-06, + "loss": 0.0039, + "step": 98160 + }, + { + "epoch": 0.6296318862981147, + "grad_norm": 0.20031431317329407, + "learning_rate": 8.658969827175891e-06, + "loss": 0.004, + "step": 98170 + }, + { + "epoch": 0.6296960231919008, + "grad_norm": 0.181797593832016, + "learning_rate": 8.658588351889478e-06, + "loss": 0.003, + "step": 98180 + }, + { + "epoch": 0.6297601600856869, + "grad_norm": 0.07122781127691269, + "learning_rate": 8.658206830758342e-06, + "loss": 0.0033, + "step": 98190 + }, + { + "epoch": 0.629824296979473, + "grad_norm": 0.15625609457492828, + "learning_rate": 8.657825263787258e-06, + "loss": 0.002, + "step": 98200 + }, + { + "epoch": 0.6298884338732591, + "grad_norm": 0.15279321372509003, + "learning_rate": 8.657443650981007e-06, + "loss": 0.0037, + "step": 98210 + }, + { + "epoch": 0.6299525707670451, + "grad_norm": 0.11406347155570984, + "learning_rate": 8.657061992344377e-06, + "loss": 0.0025, + "step": 98220 + }, + { + "epoch": 0.6300167076608313, + "grad_norm": 0.13814681768417358, + "learning_rate": 8.656680287882145e-06, + "loss": 0.003, + "step": 98230 + }, + { + "epoch": 0.6300808445546173, + "grad_norm": 0.4896417558193207, + "learning_rate": 8.656298537599094e-06, + "loss": 0.0031, + "step": 98240 + }, + { + "epoch": 0.6301449814484035, + "grad_norm": 0.2060568630695343, + "learning_rate": 8.655916741500013e-06, + "loss": 0.0039, + "step": 98250 + }, + { + "epoch": 0.6302091183421896, + "grad_norm": 0.11104012280702591, + "learning_rate": 8.65553489958968e-06, + "loss": 0.0026, + "step": 98260 + }, + { + "epoch": 0.6302732552359757, + "grad_norm": 0.07269799709320068, + "learning_rate": 8.65515301187288e-06, + "loss": 0.0041, + "step": 98270 + }, + { + "epoch": 0.6303373921297618, + "grad_norm": 0.17827273905277252, + "learning_rate": 8.654771078354405e-06, + "loss": 0.0028, + "step": 98280 + }, + { + "epoch": 0.6304015290235478, + "grad_norm": 0.049873966723680496, + "learning_rate": 8.654389099039034e-06, + "loss": 0.0017, + "step": 98290 + }, + { + "epoch": 0.630465665917334, + "grad_norm": 0.3912801146507263, + "learning_rate": 8.654007073931556e-06, + "loss": 0.0033, + "step": 98300 + }, + { + "epoch": 0.63052980281112, + "grad_norm": 0.13663004338741302, + "learning_rate": 8.653625003036757e-06, + "loss": 0.0033, + "step": 98310 + }, + { + "epoch": 0.6305939397049062, + "grad_norm": 0.07535296678543091, + "learning_rate": 8.653242886359427e-06, + "loss": 0.0021, + "step": 98320 + }, + { + "epoch": 0.6306580765986922, + "grad_norm": 0.20357947051525116, + "learning_rate": 8.652860723904352e-06, + "loss": 0.0021, + "step": 98330 + }, + { + "epoch": 0.6307222134924784, + "grad_norm": 0.1544470340013504, + "learning_rate": 8.652478515676322e-06, + "loss": 0.0024, + "step": 98340 + }, + { + "epoch": 0.6307863503862644, + "grad_norm": 0.3345314562320709, + "learning_rate": 8.652096261680125e-06, + "loss": 0.0042, + "step": 98350 + }, + { + "epoch": 0.6308504872800506, + "grad_norm": 0.18697094917297363, + "learning_rate": 8.651713961920552e-06, + "loss": 0.0044, + "step": 98360 + }, + { + "epoch": 0.6309146241738366, + "grad_norm": 0.03998371213674545, + "learning_rate": 8.651331616402392e-06, + "loss": 0.0024, + "step": 98370 + }, + { + "epoch": 0.6309787610676227, + "grad_norm": 0.18124331533908844, + "learning_rate": 8.650949225130439e-06, + "loss": 0.0036, + "step": 98380 + }, + { + "epoch": 0.6310428979614088, + "grad_norm": 0.12990468740463257, + "learning_rate": 8.650566788109482e-06, + "loss": 0.0019, + "step": 98390 + }, + { + "epoch": 0.6311070348551949, + "grad_norm": 0.0784650593996048, + "learning_rate": 8.650184305344315e-06, + "loss": 0.0041, + "step": 98400 + }, + { + "epoch": 0.6311711717489811, + "grad_norm": 0.11469721794128418, + "learning_rate": 8.649801776839731e-06, + "loss": 0.0029, + "step": 98410 + }, + { + "epoch": 0.6312353086427671, + "grad_norm": 0.14325492084026337, + "learning_rate": 8.649419202600519e-06, + "loss": 0.0029, + "step": 98420 + }, + { + "epoch": 0.6312994455365533, + "grad_norm": 0.07019853591918945, + "learning_rate": 8.649036582631479e-06, + "loss": 0.0042, + "step": 98430 + }, + { + "epoch": 0.6313635824303393, + "grad_norm": 0.1867142617702484, + "learning_rate": 8.648653916937401e-06, + "loss": 0.003, + "step": 98440 + }, + { + "epoch": 0.6314277193241254, + "grad_norm": 0.24688571691513062, + "learning_rate": 8.648271205523082e-06, + "loss": 0.0041, + "step": 98450 + }, + { + "epoch": 0.6314918562179115, + "grad_norm": 0.008389686234295368, + "learning_rate": 8.647888448393317e-06, + "loss": 0.0018, + "step": 98460 + }, + { + "epoch": 0.6315559931116976, + "grad_norm": 0.24063777923583984, + "learning_rate": 8.647505645552905e-06, + "loss": 0.0042, + "step": 98470 + }, + { + "epoch": 0.6316201300054837, + "grad_norm": 0.04026223346590996, + "learning_rate": 8.64712279700664e-06, + "loss": 0.0027, + "step": 98480 + }, + { + "epoch": 0.6316842668992698, + "grad_norm": 0.49291589856147766, + "learning_rate": 8.646739902759317e-06, + "loss": 0.0048, + "step": 98490 + }, + { + "epoch": 0.6317484037930559, + "grad_norm": 0.2567426562309265, + "learning_rate": 8.646356962815738e-06, + "loss": 0.003, + "step": 98500 + }, + { + "epoch": 0.631812540686842, + "grad_norm": 0.1662866771221161, + "learning_rate": 8.645973977180702e-06, + "loss": 0.0019, + "step": 98510 + }, + { + "epoch": 0.631876677580628, + "grad_norm": 0.0940394327044487, + "learning_rate": 8.645590945859005e-06, + "loss": 0.0037, + "step": 98520 + }, + { + "epoch": 0.6319408144744142, + "grad_norm": 0.052625905722379684, + "learning_rate": 8.645207868855447e-06, + "loss": 0.0021, + "step": 98530 + }, + { + "epoch": 0.6320049513682003, + "grad_norm": 0.09014574438333511, + "learning_rate": 8.64482474617483e-06, + "loss": 0.0015, + "step": 98540 + }, + { + "epoch": 0.6320690882619864, + "grad_norm": 0.18140502274036407, + "learning_rate": 8.644441577821955e-06, + "loss": 0.0024, + "step": 98550 + }, + { + "epoch": 0.6321332251557725, + "grad_norm": 0.047012731432914734, + "learning_rate": 8.644058363801622e-06, + "loss": 0.0065, + "step": 98560 + }, + { + "epoch": 0.6321973620495586, + "grad_norm": 0.24054725468158722, + "learning_rate": 8.643675104118631e-06, + "loss": 0.0033, + "step": 98570 + }, + { + "epoch": 0.6322614989433447, + "grad_norm": 0.12858544290065765, + "learning_rate": 8.64329179877779e-06, + "loss": 0.0027, + "step": 98580 + }, + { + "epoch": 0.6323256358371308, + "grad_norm": 0.06197696551680565, + "learning_rate": 8.642908447783898e-06, + "loss": 0.0033, + "step": 98590 + }, + { + "epoch": 0.6323897727309169, + "grad_norm": 0.08043265342712402, + "learning_rate": 8.64252505114176e-06, + "loss": 0.0023, + "step": 98600 + }, + { + "epoch": 0.6324539096247029, + "grad_norm": 0.14281512796878815, + "learning_rate": 8.642141608856178e-06, + "loss": 0.003, + "step": 98610 + }, + { + "epoch": 0.6325180465184891, + "grad_norm": 0.11095147579908371, + "learning_rate": 8.64175812093196e-06, + "loss": 0.0019, + "step": 98620 + }, + { + "epoch": 0.6325821834122751, + "grad_norm": 0.14786294102668762, + "learning_rate": 8.64137458737391e-06, + "loss": 0.0038, + "step": 98630 + }, + { + "epoch": 0.6326463203060613, + "grad_norm": 0.12939073145389557, + "learning_rate": 8.640991008186834e-06, + "loss": 0.0021, + "step": 98640 + }, + { + "epoch": 0.6327104571998473, + "grad_norm": 0.12580779194831848, + "learning_rate": 8.640607383375539e-06, + "loss": 0.0012, + "step": 98650 + }, + { + "epoch": 0.6327745940936335, + "grad_norm": 0.13374793529510498, + "learning_rate": 8.64022371294483e-06, + "loss": 0.0043, + "step": 98660 + }, + { + "epoch": 0.6328387309874195, + "grad_norm": 0.14486871659755707, + "learning_rate": 8.639839996899516e-06, + "loss": 0.0029, + "step": 98670 + }, + { + "epoch": 0.6329028678812056, + "grad_norm": 0.153135284781456, + "learning_rate": 8.639456235244408e-06, + "loss": 0.002, + "step": 98680 + }, + { + "epoch": 0.6329670047749918, + "grad_norm": 0.171351820230484, + "learning_rate": 8.63907242798431e-06, + "loss": 0.0026, + "step": 98690 + }, + { + "epoch": 0.6330311416687778, + "grad_norm": 0.17365366220474243, + "learning_rate": 8.638688575124034e-06, + "loss": 0.0034, + "step": 98700 + }, + { + "epoch": 0.633095278562564, + "grad_norm": 0.28094935417175293, + "learning_rate": 8.638304676668392e-06, + "loss": 0.0035, + "step": 98710 + }, + { + "epoch": 0.63315941545635, + "grad_norm": 0.23114296793937683, + "learning_rate": 8.63792073262219e-06, + "loss": 0.0027, + "step": 98720 + }, + { + "epoch": 0.6332235523501362, + "grad_norm": 0.09477975219488144, + "learning_rate": 8.63753674299024e-06, + "loss": 0.0039, + "step": 98730 + }, + { + "epoch": 0.6332876892439222, + "grad_norm": 0.26978856325149536, + "learning_rate": 8.637152707777356e-06, + "loss": 0.0023, + "step": 98740 + }, + { + "epoch": 0.6333518261377084, + "grad_norm": 0.09891938418149948, + "learning_rate": 8.63676862698835e-06, + "loss": 0.0025, + "step": 98750 + }, + { + "epoch": 0.6334159630314944, + "grad_norm": 0.012529808096587658, + "learning_rate": 8.636384500628034e-06, + "loss": 0.0027, + "step": 98760 + }, + { + "epoch": 0.6334800999252805, + "grad_norm": 0.07490794360637665, + "learning_rate": 8.636000328701222e-06, + "loss": 0.0017, + "step": 98770 + }, + { + "epoch": 0.6335442368190666, + "grad_norm": 0.11942527443170547, + "learning_rate": 8.635616111212725e-06, + "loss": 0.0043, + "step": 98780 + }, + { + "epoch": 0.6336083737128527, + "grad_norm": 0.18825405836105347, + "learning_rate": 8.635231848167361e-06, + "loss": 0.0034, + "step": 98790 + }, + { + "epoch": 0.6336725106066388, + "grad_norm": 0.08807805180549622, + "learning_rate": 8.634847539569944e-06, + "loss": 0.0047, + "step": 98800 + }, + { + "epoch": 0.6337366475004249, + "grad_norm": 0.12823423743247986, + "learning_rate": 8.63446318542529e-06, + "loss": 0.0026, + "step": 98810 + }, + { + "epoch": 0.6338007843942111, + "grad_norm": 0.08802173286676407, + "learning_rate": 8.634078785738213e-06, + "loss": 0.0017, + "step": 98820 + }, + { + "epoch": 0.6338649212879971, + "grad_norm": 0.024244409054517746, + "learning_rate": 8.633694340513533e-06, + "loss": 0.0031, + "step": 98830 + }, + { + "epoch": 0.6339290581817832, + "grad_norm": 0.023430783301591873, + "learning_rate": 8.633309849756067e-06, + "loss": 0.0036, + "step": 98840 + }, + { + "epoch": 0.6339931950755693, + "grad_norm": 0.0960763692855835, + "learning_rate": 8.63292531347063e-06, + "loss": 0.0032, + "step": 98850 + }, + { + "epoch": 0.6340573319693554, + "grad_norm": 0.20833489298820496, + "learning_rate": 8.632540731662042e-06, + "loss": 0.004, + "step": 98860 + }, + { + "epoch": 0.6341214688631415, + "grad_norm": 0.044883791357278824, + "learning_rate": 8.632156104335123e-06, + "loss": 0.0027, + "step": 98870 + }, + { + "epoch": 0.6341856057569276, + "grad_norm": 0.09762940555810928, + "learning_rate": 8.631771431494694e-06, + "loss": 0.0024, + "step": 98880 + }, + { + "epoch": 0.6342497426507137, + "grad_norm": 0.18341349065303802, + "learning_rate": 8.631386713145572e-06, + "loss": 0.0027, + "step": 98890 + }, + { + "epoch": 0.6343138795444998, + "grad_norm": 0.0660662055015564, + "learning_rate": 8.631001949292579e-06, + "loss": 0.003, + "step": 98900 + }, + { + "epoch": 0.6343780164382858, + "grad_norm": 0.1865323781967163, + "learning_rate": 8.630617139940536e-06, + "loss": 0.0034, + "step": 98910 + }, + { + "epoch": 0.634442153332072, + "grad_norm": 0.04117047041654587, + "learning_rate": 8.630232285094266e-06, + "loss": 0.0025, + "step": 98920 + }, + { + "epoch": 0.634506290225858, + "grad_norm": 0.19933748245239258, + "learning_rate": 8.629847384758592e-06, + "loss": 0.0033, + "step": 98930 + }, + { + "epoch": 0.6345704271196442, + "grad_norm": 0.2053903043270111, + "learning_rate": 8.629462438938333e-06, + "loss": 0.0018, + "step": 98940 + }, + { + "epoch": 0.6346345640134302, + "grad_norm": 0.20126311480998993, + "learning_rate": 8.629077447638319e-06, + "loss": 0.0038, + "step": 98950 + }, + { + "epoch": 0.6346987009072164, + "grad_norm": 0.1050531417131424, + "learning_rate": 8.62869241086337e-06, + "loss": 0.0028, + "step": 98960 + }, + { + "epoch": 0.6347628378010025, + "grad_norm": 0.35862889885902405, + "learning_rate": 8.628307328618312e-06, + "loss": 0.0049, + "step": 98970 + }, + { + "epoch": 0.6348269746947885, + "grad_norm": 0.14179842174053192, + "learning_rate": 8.62792220090797e-06, + "loss": 0.003, + "step": 98980 + }, + { + "epoch": 0.6348911115885747, + "grad_norm": 0.13463851809501648, + "learning_rate": 8.627537027737169e-06, + "loss": 0.003, + "step": 98990 + }, + { + "epoch": 0.6349552484823607, + "grad_norm": 0.08901742845773697, + "learning_rate": 8.627151809110737e-06, + "loss": 0.0027, + "step": 99000 + }, + { + "epoch": 0.6350193853761469, + "grad_norm": 0.1018950343132019, + "learning_rate": 8.6267665450335e-06, + "loss": 0.003, + "step": 99010 + }, + { + "epoch": 0.6350835222699329, + "grad_norm": 0.13207198679447174, + "learning_rate": 8.626381235510286e-06, + "loss": 0.0016, + "step": 99020 + }, + { + "epoch": 0.6351476591637191, + "grad_norm": 0.15876226127147675, + "learning_rate": 8.625995880545926e-06, + "loss": 0.0027, + "step": 99030 + }, + { + "epoch": 0.6352117960575051, + "grad_norm": 0.05085441470146179, + "learning_rate": 8.625610480145244e-06, + "loss": 0.002, + "step": 99040 + }, + { + "epoch": 0.6352759329512913, + "grad_norm": 0.04116792231798172, + "learning_rate": 8.625225034313071e-06, + "loss": 0.0037, + "step": 99050 + }, + { + "epoch": 0.6353400698450773, + "grad_norm": 0.18012458086013794, + "learning_rate": 8.624839543054238e-06, + "loss": 0.003, + "step": 99060 + }, + { + "epoch": 0.6354042067388634, + "grad_norm": 0.16937007009983063, + "learning_rate": 8.624454006373577e-06, + "loss": 0.0022, + "step": 99070 + }, + { + "epoch": 0.6354683436326495, + "grad_norm": 0.33522242307662964, + "learning_rate": 8.624068424275913e-06, + "loss": 0.004, + "step": 99080 + }, + { + "epoch": 0.6355324805264356, + "grad_norm": 0.07128586620092392, + "learning_rate": 8.623682796766085e-06, + "loss": 0.0025, + "step": 99090 + }, + { + "epoch": 0.6355966174202218, + "grad_norm": 0.1760418862104416, + "learning_rate": 8.62329712384892e-06, + "loss": 0.0024, + "step": 99100 + }, + { + "epoch": 0.6356607543140078, + "grad_norm": 0.19041508436203003, + "learning_rate": 8.622911405529253e-06, + "loss": 0.0048, + "step": 99110 + }, + { + "epoch": 0.635724891207794, + "grad_norm": 0.16518241167068481, + "learning_rate": 8.622525641811917e-06, + "loss": 0.0034, + "step": 99120 + }, + { + "epoch": 0.63578902810158, + "grad_norm": 0.034229397773742676, + "learning_rate": 8.622139832701744e-06, + "loss": 0.0029, + "step": 99130 + }, + { + "epoch": 0.6358531649953661, + "grad_norm": 0.19417065382003784, + "learning_rate": 8.621753978203572e-06, + "loss": 0.0035, + "step": 99140 + }, + { + "epoch": 0.6359173018891522, + "grad_norm": 0.0759410634636879, + "learning_rate": 8.621368078322234e-06, + "loss": 0.0025, + "step": 99150 + }, + { + "epoch": 0.6359814387829383, + "grad_norm": 0.07072167843580246, + "learning_rate": 8.620982133062566e-06, + "loss": 0.0026, + "step": 99160 + }, + { + "epoch": 0.6360455756767244, + "grad_norm": 0.16522380709648132, + "learning_rate": 8.620596142429402e-06, + "loss": 0.0047, + "step": 99170 + }, + { + "epoch": 0.6361097125705105, + "grad_norm": 0.22960606217384338, + "learning_rate": 8.620210106427584e-06, + "loss": 0.0031, + "step": 99180 + }, + { + "epoch": 0.6361738494642966, + "grad_norm": 0.0563676580786705, + "learning_rate": 8.619824025061945e-06, + "loss": 0.0026, + "step": 99190 + }, + { + "epoch": 0.6362379863580827, + "grad_norm": 0.1583438366651535, + "learning_rate": 8.619437898337323e-06, + "loss": 0.0063, + "step": 99200 + }, + { + "epoch": 0.6363021232518687, + "grad_norm": 0.5747680068016052, + "learning_rate": 8.619051726258557e-06, + "loss": 0.0021, + "step": 99210 + }, + { + "epoch": 0.6363662601456549, + "grad_norm": 0.1903034895658493, + "learning_rate": 8.618665508830488e-06, + "loss": 0.0053, + "step": 99220 + }, + { + "epoch": 0.6364303970394409, + "grad_norm": 0.15191338956356049, + "learning_rate": 8.618279246057953e-06, + "loss": 0.003, + "step": 99230 + }, + { + "epoch": 0.6364945339332271, + "grad_norm": 0.18410761654376984, + "learning_rate": 8.617892937945794e-06, + "loss": 0.004, + "step": 99240 + }, + { + "epoch": 0.6365586708270132, + "grad_norm": 0.16988497972488403, + "learning_rate": 8.61750658449885e-06, + "loss": 0.0036, + "step": 99250 + }, + { + "epoch": 0.6366228077207993, + "grad_norm": 0.07580164074897766, + "learning_rate": 8.61712018572196e-06, + "loss": 0.0022, + "step": 99260 + }, + { + "epoch": 0.6366869446145854, + "grad_norm": 0.11030968278646469, + "learning_rate": 8.616733741619973e-06, + "loss": 0.0048, + "step": 99270 + }, + { + "epoch": 0.6367510815083715, + "grad_norm": 0.07485426962375641, + "learning_rate": 8.616347252197727e-06, + "loss": 0.0025, + "step": 99280 + }, + { + "epoch": 0.6368152184021576, + "grad_norm": 0.16262781620025635, + "learning_rate": 8.615960717460065e-06, + "loss": 0.0024, + "step": 99290 + }, + { + "epoch": 0.6368793552959436, + "grad_norm": 0.12966498732566833, + "learning_rate": 8.61557413741183e-06, + "loss": 0.0025, + "step": 99300 + }, + { + "epoch": 0.6369434921897298, + "grad_norm": 0.41973453760147095, + "learning_rate": 8.615187512057867e-06, + "loss": 0.0021, + "step": 99310 + }, + { + "epoch": 0.6370076290835158, + "grad_norm": 0.1630977839231491, + "learning_rate": 8.614800841403023e-06, + "loss": 0.0026, + "step": 99320 + }, + { + "epoch": 0.637071765977302, + "grad_norm": 0.1655716449022293, + "learning_rate": 8.614414125452139e-06, + "loss": 0.0019, + "step": 99330 + }, + { + "epoch": 0.637135902871088, + "grad_norm": 0.06753179430961609, + "learning_rate": 8.614027364210064e-06, + "loss": 0.0031, + "step": 99340 + }, + { + "epoch": 0.6372000397648742, + "grad_norm": 0.08307766169309616, + "learning_rate": 8.613640557681642e-06, + "loss": 0.0067, + "step": 99350 + }, + { + "epoch": 0.6372641766586602, + "grad_norm": 0.10214082151651382, + "learning_rate": 8.613253705871723e-06, + "loss": 0.0027, + "step": 99360 + }, + { + "epoch": 0.6373283135524463, + "grad_norm": 0.021376170217990875, + "learning_rate": 8.61286680878515e-06, + "loss": 0.0032, + "step": 99370 + }, + { + "epoch": 0.6373924504462324, + "grad_norm": 0.16414782404899597, + "learning_rate": 8.612479866426775e-06, + "loss": 0.0042, + "step": 99380 + }, + { + "epoch": 0.6374565873400185, + "grad_norm": 0.21608483791351318, + "learning_rate": 8.612092878801446e-06, + "loss": 0.0029, + "step": 99390 + }, + { + "epoch": 0.6375207242338047, + "grad_norm": 0.5427488684654236, + "learning_rate": 8.611705845914011e-06, + "loss": 0.0079, + "step": 99400 + }, + { + "epoch": 0.6375848611275907, + "grad_norm": 0.10978326946496964, + "learning_rate": 8.611318767769321e-06, + "loss": 0.0036, + "step": 99410 + }, + { + "epoch": 0.6376489980213769, + "grad_norm": 0.23859313130378723, + "learning_rate": 8.610931644372226e-06, + "loss": 0.0026, + "step": 99420 + }, + { + "epoch": 0.6377131349151629, + "grad_norm": 0.1963741034269333, + "learning_rate": 8.610544475727576e-06, + "loss": 0.0019, + "step": 99430 + }, + { + "epoch": 0.637777271808949, + "grad_norm": 0.31492742896080017, + "learning_rate": 8.610157261840224e-06, + "loss": 0.004, + "step": 99440 + }, + { + "epoch": 0.6378414087027351, + "grad_norm": 0.15649573504924774, + "learning_rate": 8.609770002715022e-06, + "loss": 0.003, + "step": 99450 + }, + { + "epoch": 0.6379055455965212, + "grad_norm": 0.10112041980028152, + "learning_rate": 8.60938269835682e-06, + "loss": 0.0032, + "step": 99460 + }, + { + "epoch": 0.6379696824903073, + "grad_norm": 0.18804387748241425, + "learning_rate": 8.608995348770474e-06, + "loss": 0.0037, + "step": 99470 + }, + { + "epoch": 0.6380338193840934, + "grad_norm": 0.07315003871917725, + "learning_rate": 8.60860795396084e-06, + "loss": 0.0056, + "step": 99480 + }, + { + "epoch": 0.6380979562778795, + "grad_norm": 0.08535873889923096, + "learning_rate": 8.608220513932766e-06, + "loss": 0.003, + "step": 99490 + }, + { + "epoch": 0.6381620931716656, + "grad_norm": 0.28759923577308655, + "learning_rate": 8.607833028691111e-06, + "loss": 0.0026, + "step": 99500 + }, + { + "epoch": 0.6382262300654516, + "grad_norm": 0.2628440856933594, + "learning_rate": 8.607445498240729e-06, + "loss": 0.004, + "step": 99510 + }, + { + "epoch": 0.6382903669592378, + "grad_norm": 0.18443447351455688, + "learning_rate": 8.607057922586477e-06, + "loss": 0.0033, + "step": 99520 + }, + { + "epoch": 0.638354503853024, + "grad_norm": 0.08273148536682129, + "learning_rate": 8.606670301733212e-06, + "loss": 0.003, + "step": 99530 + }, + { + "epoch": 0.63841864074681, + "grad_norm": 0.25067704916000366, + "learning_rate": 8.606282635685789e-06, + "loss": 0.0048, + "step": 99540 + }, + { + "epoch": 0.6384827776405961, + "grad_norm": 0.17173099517822266, + "learning_rate": 8.60589492444907e-06, + "loss": 0.006, + "step": 99550 + }, + { + "epoch": 0.6385469145343822, + "grad_norm": 0.1476057916879654, + "learning_rate": 8.60550716802791e-06, + "loss": 0.0019, + "step": 99560 + }, + { + "epoch": 0.6386110514281683, + "grad_norm": 0.04123391956090927, + "learning_rate": 8.605119366427165e-06, + "loss": 0.002, + "step": 99570 + }, + { + "epoch": 0.6386751883219544, + "grad_norm": 0.1771586537361145, + "learning_rate": 8.6047315196517e-06, + "loss": 0.005, + "step": 99580 + }, + { + "epoch": 0.6387393252157405, + "grad_norm": 0.21558478474617004, + "learning_rate": 8.604343627706374e-06, + "loss": 0.0038, + "step": 99590 + }, + { + "epoch": 0.6388034621095265, + "grad_norm": 0.1837262660264969, + "learning_rate": 8.603955690596044e-06, + "loss": 0.0047, + "step": 99600 + }, + { + "epoch": 0.6388675990033127, + "grad_norm": 0.07837624102830887, + "learning_rate": 8.603567708325576e-06, + "loss": 0.0025, + "step": 99610 + }, + { + "epoch": 0.6389317358970987, + "grad_norm": 0.008626329712569714, + "learning_rate": 8.603179680899827e-06, + "loss": 0.0034, + "step": 99620 + }, + { + "epoch": 0.6389958727908849, + "grad_norm": 0.05696270242333412, + "learning_rate": 8.602791608323662e-06, + "loss": 0.005, + "step": 99630 + }, + { + "epoch": 0.6390600096846709, + "grad_norm": 0.07516892999410629, + "learning_rate": 8.602403490601943e-06, + "loss": 0.0035, + "step": 99640 + }, + { + "epoch": 0.6391241465784571, + "grad_norm": 0.15452761948108673, + "learning_rate": 8.602015327739535e-06, + "loss": 0.0038, + "step": 99650 + }, + { + "epoch": 0.6391882834722431, + "grad_norm": 0.09730653464794159, + "learning_rate": 8.6016271197413e-06, + "loss": 0.0039, + "step": 99660 + }, + { + "epoch": 0.6392524203660293, + "grad_norm": 0.23667676746845245, + "learning_rate": 8.601238866612103e-06, + "loss": 0.0031, + "step": 99670 + }, + { + "epoch": 0.6393165572598154, + "grad_norm": 0.17529785633087158, + "learning_rate": 8.60085056835681e-06, + "loss": 0.0025, + "step": 99680 + }, + { + "epoch": 0.6393806941536014, + "grad_norm": 0.08143072575330734, + "learning_rate": 8.600462224980283e-06, + "loss": 0.0044, + "step": 99690 + }, + { + "epoch": 0.6394448310473876, + "grad_norm": 0.07394933700561523, + "learning_rate": 8.600073836487392e-06, + "loss": 0.005, + "step": 99700 + }, + { + "epoch": 0.6395089679411736, + "grad_norm": 0.058153364807367325, + "learning_rate": 8.599685402883004e-06, + "loss": 0.001, + "step": 99710 + }, + { + "epoch": 0.6395731048349598, + "grad_norm": 0.4131733775138855, + "learning_rate": 8.599296924171987e-06, + "loss": 0.0034, + "step": 99720 + }, + { + "epoch": 0.6396372417287458, + "grad_norm": 0.31498780846595764, + "learning_rate": 8.598908400359205e-06, + "loss": 0.0031, + "step": 99730 + }, + { + "epoch": 0.639701378622532, + "grad_norm": 0.24626566469669342, + "learning_rate": 8.59851983144953e-06, + "loss": 0.003, + "step": 99740 + }, + { + "epoch": 0.639765515516318, + "grad_norm": 0.06797293573617935, + "learning_rate": 8.598131217447826e-06, + "loss": 0.0026, + "step": 99750 + }, + { + "epoch": 0.6398296524101041, + "grad_norm": 0.090883269906044, + "learning_rate": 8.59774255835897e-06, + "loss": 0.0051, + "step": 99760 + }, + { + "epoch": 0.6398937893038902, + "grad_norm": 0.08502686023712158, + "learning_rate": 8.59735385418783e-06, + "loss": 0.0038, + "step": 99770 + }, + { + "epoch": 0.6399579261976763, + "grad_norm": 0.26698365807533264, + "learning_rate": 8.596965104939271e-06, + "loss": 0.0039, + "step": 99780 + }, + { + "epoch": 0.6400220630914624, + "grad_norm": 0.15803366899490356, + "learning_rate": 8.59657631061817e-06, + "loss": 0.0013, + "step": 99790 + }, + { + "epoch": 0.6400861999852485, + "grad_norm": 0.0447712242603302, + "learning_rate": 8.596187471229398e-06, + "loss": 0.0037, + "step": 99800 + }, + { + "epoch": 0.6401503368790347, + "grad_norm": 0.14180706441402435, + "learning_rate": 8.595798586777827e-06, + "loss": 0.0035, + "step": 99810 + }, + { + "epoch": 0.6402144737728207, + "grad_norm": 0.556554913520813, + "learning_rate": 8.59540965726833e-06, + "loss": 0.0069, + "step": 99820 + }, + { + "epoch": 0.6402786106666069, + "grad_norm": 0.0933576375246048, + "learning_rate": 8.595020682705778e-06, + "loss": 0.0032, + "step": 99830 + }, + { + "epoch": 0.6403427475603929, + "grad_norm": 0.09699434041976929, + "learning_rate": 8.59463166309505e-06, + "loss": 0.0049, + "step": 99840 + }, + { + "epoch": 0.640406884454179, + "grad_norm": 0.21019862592220306, + "learning_rate": 8.594242598441018e-06, + "loss": 0.0048, + "step": 99850 + }, + { + "epoch": 0.6404710213479651, + "grad_norm": 0.08167964965105057, + "learning_rate": 8.593853488748557e-06, + "loss": 0.0025, + "step": 99860 + }, + { + "epoch": 0.6405351582417512, + "grad_norm": 0.06507817655801773, + "learning_rate": 8.593464334022543e-06, + "loss": 0.0019, + "step": 99870 + }, + { + "epoch": 0.6405992951355373, + "grad_norm": 0.09788227826356888, + "learning_rate": 8.593075134267852e-06, + "loss": 0.0015, + "step": 99880 + }, + { + "epoch": 0.6406634320293234, + "grad_norm": 0.12715961039066315, + "learning_rate": 8.592685889489363e-06, + "loss": 0.0028, + "step": 99890 + }, + { + "epoch": 0.6407275689231094, + "grad_norm": 0.12133368849754333, + "learning_rate": 8.592296599691952e-06, + "loss": 0.0042, + "step": 99900 + }, + { + "epoch": 0.6407917058168956, + "grad_norm": 0.1371624916791916, + "learning_rate": 8.591907264880497e-06, + "loss": 0.0028, + "step": 99910 + }, + { + "epoch": 0.6408558427106816, + "grad_norm": 0.14875027537345886, + "learning_rate": 8.591517885059877e-06, + "loss": 0.0069, + "step": 99920 + }, + { + "epoch": 0.6409199796044678, + "grad_norm": 0.1025613322854042, + "learning_rate": 8.59112846023497e-06, + "loss": 0.0025, + "step": 99930 + }, + { + "epoch": 0.6409841164982538, + "grad_norm": 0.28941577672958374, + "learning_rate": 8.59073899041066e-06, + "loss": 0.003, + "step": 99940 + }, + { + "epoch": 0.64104825339204, + "grad_norm": 0.08539040386676788, + "learning_rate": 8.59034947559182e-06, + "loss": 0.0025, + "step": 99950 + }, + { + "epoch": 0.6411123902858261, + "grad_norm": 0.1081579178571701, + "learning_rate": 8.589959915783337e-06, + "loss": 0.0019, + "step": 99960 + }, + { + "epoch": 0.6411765271796122, + "grad_norm": 0.13859598338603973, + "learning_rate": 8.589570310990093e-06, + "loss": 0.0031, + "step": 99970 + }, + { + "epoch": 0.6412406640733983, + "grad_norm": 0.08517938107252121, + "learning_rate": 8.589180661216962e-06, + "loss": 0.0027, + "step": 99980 + }, + { + "epoch": 0.6413048009671843, + "grad_norm": 0.23588401079177856, + "learning_rate": 8.588790966468836e-06, + "loss": 0.0035, + "step": 99990 + }, + { + "epoch": 0.6413689378609705, + "grad_norm": 0.34334316849708557, + "learning_rate": 8.588401226750595e-06, + "loss": 0.0031, + "step": 100000 + }, + { + "epoch": 0.6414330747547565, + "grad_norm": 0.2593560814857483, + "learning_rate": 8.58801144206712e-06, + "loss": 0.0038, + "step": 100010 + }, + { + "epoch": 0.6414972116485427, + "grad_norm": 0.1017594113945961, + "learning_rate": 8.587621612423298e-06, + "loss": 0.0023, + "step": 100020 + }, + { + "epoch": 0.6415613485423287, + "grad_norm": 0.09830541163682938, + "learning_rate": 8.587231737824013e-06, + "loss": 0.003, + "step": 100030 + }, + { + "epoch": 0.6416254854361149, + "grad_norm": 0.07964377105236053, + "learning_rate": 8.586841818274152e-06, + "loss": 0.0013, + "step": 100040 + }, + { + "epoch": 0.6416896223299009, + "grad_norm": 0.049259621649980545, + "learning_rate": 8.586451853778597e-06, + "loss": 0.0025, + "step": 100050 + }, + { + "epoch": 0.641753759223687, + "grad_norm": 0.10759458690881729, + "learning_rate": 8.58606184434224e-06, + "loss": 0.0029, + "step": 100060 + }, + { + "epoch": 0.6418178961174731, + "grad_norm": 0.06379517912864685, + "learning_rate": 8.585671789969963e-06, + "loss": 0.0042, + "step": 100070 + }, + { + "epoch": 0.6418820330112592, + "grad_norm": 0.20491823554039001, + "learning_rate": 8.585281690666656e-06, + "loss": 0.0028, + "step": 100080 + }, + { + "epoch": 0.6419461699050454, + "grad_norm": 0.4074902832508087, + "learning_rate": 8.584891546437206e-06, + "loss": 0.0049, + "step": 100090 + }, + { + "epoch": 0.6420103067988314, + "grad_norm": 0.18549945950508118, + "learning_rate": 8.584501357286505e-06, + "loss": 0.0023, + "step": 100100 + }, + { + "epoch": 0.6420744436926176, + "grad_norm": 0.1402589976787567, + "learning_rate": 8.584111123219438e-06, + "loss": 0.0025, + "step": 100110 + }, + { + "epoch": 0.6421385805864036, + "grad_norm": 0.10942061245441437, + "learning_rate": 8.5837208442409e-06, + "loss": 0.0028, + "step": 100120 + }, + { + "epoch": 0.6422027174801898, + "grad_norm": 0.16121609508991241, + "learning_rate": 8.583330520355777e-06, + "loss": 0.0024, + "step": 100130 + }, + { + "epoch": 0.6422668543739758, + "grad_norm": 0.1497829556465149, + "learning_rate": 8.582940151568961e-06, + "loss": 0.0023, + "step": 100140 + }, + { + "epoch": 0.6423309912677619, + "grad_norm": 0.10273135453462601, + "learning_rate": 8.582549737885346e-06, + "loss": 0.0048, + "step": 100150 + }, + { + "epoch": 0.642395128161548, + "grad_norm": 0.172627791762352, + "learning_rate": 8.582159279309821e-06, + "loss": 0.0028, + "step": 100160 + }, + { + "epoch": 0.6424592650553341, + "grad_norm": 0.18407230079174042, + "learning_rate": 8.58176877584728e-06, + "loss": 0.0051, + "step": 100170 + }, + { + "epoch": 0.6425234019491202, + "grad_norm": 0.14890094101428986, + "learning_rate": 8.581378227502616e-06, + "loss": 0.0064, + "step": 100180 + }, + { + "epoch": 0.6425875388429063, + "grad_norm": 0.11829569190740585, + "learning_rate": 8.580987634280727e-06, + "loss": 0.0028, + "step": 100190 + }, + { + "epoch": 0.6426516757366924, + "grad_norm": 0.05861892178654671, + "learning_rate": 8.580596996186499e-06, + "loss": 0.0015, + "step": 100200 + }, + { + "epoch": 0.6427158126304785, + "grad_norm": 0.09609236568212509, + "learning_rate": 8.580206313224833e-06, + "loss": 0.0053, + "step": 100210 + }, + { + "epoch": 0.6427799495242645, + "grad_norm": 0.2850412130355835, + "learning_rate": 8.579815585400624e-06, + "loss": 0.0021, + "step": 100220 + }, + { + "epoch": 0.6428440864180507, + "grad_norm": 0.030681833624839783, + "learning_rate": 8.579424812718767e-06, + "loss": 0.0019, + "step": 100230 + }, + { + "epoch": 0.6429082233118368, + "grad_norm": 0.07587326318025589, + "learning_rate": 8.579033995184159e-06, + "loss": 0.0039, + "step": 100240 + }, + { + "epoch": 0.6429723602056229, + "grad_norm": 0.03634805977344513, + "learning_rate": 8.578643132801697e-06, + "loss": 0.0016, + "step": 100250 + }, + { + "epoch": 0.643036497099409, + "grad_norm": 0.272158145904541, + "learning_rate": 8.578252225576278e-06, + "loss": 0.0047, + "step": 100260 + }, + { + "epoch": 0.6431006339931951, + "grad_norm": 0.20765464007854462, + "learning_rate": 8.577861273512801e-06, + "loss": 0.0028, + "step": 100270 + }, + { + "epoch": 0.6431647708869812, + "grad_norm": 0.1623903512954712, + "learning_rate": 8.577470276616166e-06, + "loss": 0.0027, + "step": 100280 + }, + { + "epoch": 0.6432289077807672, + "grad_norm": 0.24394509196281433, + "learning_rate": 8.577079234891273e-06, + "loss": 0.0029, + "step": 100290 + }, + { + "epoch": 0.6432930446745534, + "grad_norm": 0.09946839511394501, + "learning_rate": 8.57668814834302e-06, + "loss": 0.0015, + "step": 100300 + }, + { + "epoch": 0.6433571815683394, + "grad_norm": 0.01027774065732956, + "learning_rate": 8.576297016976307e-06, + "loss": 0.005, + "step": 100310 + }, + { + "epoch": 0.6434213184621256, + "grad_norm": 0.140450119972229, + "learning_rate": 8.575905840796038e-06, + "loss": 0.0045, + "step": 100320 + }, + { + "epoch": 0.6434854553559116, + "grad_norm": 0.08717735856771469, + "learning_rate": 8.575514619807112e-06, + "loss": 0.0061, + "step": 100330 + }, + { + "epoch": 0.6435495922496978, + "grad_norm": 0.21483832597732544, + "learning_rate": 8.575123354014432e-06, + "loss": 0.0033, + "step": 100340 + }, + { + "epoch": 0.6436137291434838, + "grad_norm": 0.1500617414712906, + "learning_rate": 8.574732043422902e-06, + "loss": 0.0024, + "step": 100350 + }, + { + "epoch": 0.64367786603727, + "grad_norm": 0.11223375052213669, + "learning_rate": 8.574340688037426e-06, + "loss": 0.0025, + "step": 100360 + }, + { + "epoch": 0.6437420029310561, + "grad_norm": 0.02819480001926422, + "learning_rate": 8.573949287862905e-06, + "loss": 0.0016, + "step": 100370 + }, + { + "epoch": 0.6438061398248421, + "grad_norm": 0.0947762280702591, + "learning_rate": 8.573557842904245e-06, + "loss": 0.0016, + "step": 100380 + }, + { + "epoch": 0.6438702767186283, + "grad_norm": 0.2611057758331299, + "learning_rate": 8.573166353166352e-06, + "loss": 0.0034, + "step": 100390 + }, + { + "epoch": 0.6439344136124143, + "grad_norm": 0.1276160478591919, + "learning_rate": 8.57277481865413e-06, + "loss": 0.003, + "step": 100400 + }, + { + "epoch": 0.6439985505062005, + "grad_norm": 0.10863440483808517, + "learning_rate": 8.572383239372488e-06, + "loss": 0.0024, + "step": 100410 + }, + { + "epoch": 0.6440626873999865, + "grad_norm": 0.10743433982133865, + "learning_rate": 8.57199161532633e-06, + "loss": 0.0026, + "step": 100420 + }, + { + "epoch": 0.6441268242937727, + "grad_norm": 0.2762799561023712, + "learning_rate": 8.571599946520563e-06, + "loss": 0.004, + "step": 100430 + }, + { + "epoch": 0.6441909611875587, + "grad_norm": 0.050531577318906784, + "learning_rate": 8.571208232960097e-06, + "loss": 0.0044, + "step": 100440 + }, + { + "epoch": 0.6442550980813448, + "grad_norm": 0.33933791518211365, + "learning_rate": 8.570816474649838e-06, + "loss": 0.007, + "step": 100450 + }, + { + "epoch": 0.6443192349751309, + "grad_norm": 0.13272233307361603, + "learning_rate": 8.5704246715947e-06, + "loss": 0.0034, + "step": 100460 + }, + { + "epoch": 0.644383371868917, + "grad_norm": 0.05244365707039833, + "learning_rate": 8.570032823799588e-06, + "loss": 0.0015, + "step": 100470 + }, + { + "epoch": 0.6444475087627031, + "grad_norm": 0.08542118966579437, + "learning_rate": 8.569640931269411e-06, + "loss": 0.0031, + "step": 100480 + }, + { + "epoch": 0.6445116456564892, + "grad_norm": 0.4075598418712616, + "learning_rate": 8.569248994009083e-06, + "loss": 0.0049, + "step": 100490 + }, + { + "epoch": 0.6445757825502753, + "grad_norm": 0.055834926664829254, + "learning_rate": 8.568857012023515e-06, + "loss": 0.002, + "step": 100500 + }, + { + "epoch": 0.6446399194440614, + "grad_norm": 0.11826679110527039, + "learning_rate": 8.568464985317618e-06, + "loss": 0.0028, + "step": 100510 + }, + { + "epoch": 0.6447040563378476, + "grad_norm": 0.5990362763404846, + "learning_rate": 8.568072913896304e-06, + "loss": 0.0029, + "step": 100520 + }, + { + "epoch": 0.6447681932316336, + "grad_norm": 0.03626729175448418, + "learning_rate": 8.567680797764486e-06, + "loss": 0.0026, + "step": 100530 + }, + { + "epoch": 0.6448323301254197, + "grad_norm": 0.0920369103550911, + "learning_rate": 8.567288636927078e-06, + "loss": 0.0018, + "step": 100540 + }, + { + "epoch": 0.6448964670192058, + "grad_norm": 0.3881969749927521, + "learning_rate": 8.566896431388994e-06, + "loss": 0.0051, + "step": 100550 + }, + { + "epoch": 0.6449606039129919, + "grad_norm": 0.1653403490781784, + "learning_rate": 8.56650418115515e-06, + "loss": 0.0023, + "step": 100560 + }, + { + "epoch": 0.645024740806778, + "grad_norm": 0.271144300699234, + "learning_rate": 8.566111886230457e-06, + "loss": 0.0028, + "step": 100570 + }, + { + "epoch": 0.6450888777005641, + "grad_norm": 0.15923184156417847, + "learning_rate": 8.565719546619835e-06, + "loss": 0.002, + "step": 100580 + }, + { + "epoch": 0.6451530145943501, + "grad_norm": 0.09394088387489319, + "learning_rate": 8.565327162328199e-06, + "loss": 0.0027, + "step": 100590 + }, + { + "epoch": 0.6452171514881363, + "grad_norm": 0.23985207080841064, + "learning_rate": 8.564934733360463e-06, + "loss": 0.0031, + "step": 100600 + }, + { + "epoch": 0.6452812883819223, + "grad_norm": 0.2356472611427307, + "learning_rate": 8.56454225972155e-06, + "loss": 0.0023, + "step": 100610 + }, + { + "epoch": 0.6453454252757085, + "grad_norm": 0.25987300276756287, + "learning_rate": 8.564149741416372e-06, + "loss": 0.0032, + "step": 100620 + }, + { + "epoch": 0.6454095621694945, + "grad_norm": 0.18951743841171265, + "learning_rate": 8.563757178449854e-06, + "loss": 0.0038, + "step": 100630 + }, + { + "epoch": 0.6454736990632807, + "grad_norm": 0.14653554558753967, + "learning_rate": 8.56336457082691e-06, + "loss": 0.0023, + "step": 100640 + }, + { + "epoch": 0.6455378359570667, + "grad_norm": 0.10666285455226898, + "learning_rate": 8.56297191855246e-06, + "loss": 0.0024, + "step": 100650 + }, + { + "epoch": 0.6456019728508529, + "grad_norm": 0.4330354332923889, + "learning_rate": 8.562579221631427e-06, + "loss": 0.0062, + "step": 100660 + }, + { + "epoch": 0.645666109744639, + "grad_norm": 0.011996464803814888, + "learning_rate": 8.562186480068727e-06, + "loss": 0.002, + "step": 100670 + }, + { + "epoch": 0.645730246638425, + "grad_norm": 0.10304580628871918, + "learning_rate": 8.56179369386929e-06, + "loss": 0.0034, + "step": 100680 + }, + { + "epoch": 0.6457943835322112, + "grad_norm": 0.22657014429569244, + "learning_rate": 8.561400863038029e-06, + "loss": 0.0027, + "step": 100690 + }, + { + "epoch": 0.6458585204259972, + "grad_norm": 0.057219889014959335, + "learning_rate": 8.56100798757987e-06, + "loss": 0.0015, + "step": 100700 + }, + { + "epoch": 0.6459226573197834, + "grad_norm": 0.11843618005514145, + "learning_rate": 8.560615067499737e-06, + "loss": 0.0037, + "step": 100710 + }, + { + "epoch": 0.6459867942135694, + "grad_norm": 0.10030394047498703, + "learning_rate": 8.56022210280255e-06, + "loss": 0.0036, + "step": 100720 + }, + { + "epoch": 0.6460509311073556, + "grad_norm": 0.13362765312194824, + "learning_rate": 8.55982909349324e-06, + "loss": 0.0033, + "step": 100730 + }, + { + "epoch": 0.6461150680011416, + "grad_norm": 0.029844725504517555, + "learning_rate": 8.559436039576723e-06, + "loss": 0.0035, + "step": 100740 + }, + { + "epoch": 0.6461792048949278, + "grad_norm": 0.1949865221977234, + "learning_rate": 8.559042941057931e-06, + "loss": 0.003, + "step": 100750 + }, + { + "epoch": 0.6462433417887138, + "grad_norm": 0.0806402787566185, + "learning_rate": 8.558649797941788e-06, + "loss": 0.0032, + "step": 100760 + }, + { + "epoch": 0.6463074786824999, + "grad_norm": 0.13549336791038513, + "learning_rate": 8.558256610233218e-06, + "loss": 0.0019, + "step": 100770 + }, + { + "epoch": 0.646371615576286, + "grad_norm": 0.287882924079895, + "learning_rate": 8.55786337793715e-06, + "loss": 0.003, + "step": 100780 + }, + { + "epoch": 0.6464357524700721, + "grad_norm": 0.08676916360855103, + "learning_rate": 8.55747010105851e-06, + "loss": 0.0025, + "step": 100790 + }, + { + "epoch": 0.6464998893638583, + "grad_norm": 0.06810786575078964, + "learning_rate": 8.557076779602229e-06, + "loss": 0.0044, + "step": 100800 + }, + { + "epoch": 0.6465640262576443, + "grad_norm": 0.14305730164051056, + "learning_rate": 8.556683413573233e-06, + "loss": 0.0044, + "step": 100810 + }, + { + "epoch": 0.6466281631514305, + "grad_norm": 0.1948164701461792, + "learning_rate": 8.556290002976452e-06, + "loss": 0.0023, + "step": 100820 + }, + { + "epoch": 0.6466923000452165, + "grad_norm": 0.16761861741542816, + "learning_rate": 8.555896547816815e-06, + "loss": 0.0023, + "step": 100830 + }, + { + "epoch": 0.6467564369390026, + "grad_norm": 0.10438597947359085, + "learning_rate": 8.555503048099253e-06, + "loss": 0.0038, + "step": 100840 + }, + { + "epoch": 0.6468205738327887, + "grad_norm": 0.07459600269794464, + "learning_rate": 8.555109503828699e-06, + "loss": 0.004, + "step": 100850 + }, + { + "epoch": 0.6468847107265748, + "grad_norm": 0.05684248358011246, + "learning_rate": 8.55471591501008e-06, + "loss": 0.0015, + "step": 100860 + }, + { + "epoch": 0.6469488476203609, + "grad_norm": 0.14277034997940063, + "learning_rate": 8.55432228164833e-06, + "loss": 0.0036, + "step": 100870 + }, + { + "epoch": 0.647012984514147, + "grad_norm": 0.28751543164253235, + "learning_rate": 8.55392860374838e-06, + "loss": 0.0035, + "step": 100880 + }, + { + "epoch": 0.647077121407933, + "grad_norm": 0.1565263569355011, + "learning_rate": 8.55353488131517e-06, + "loss": 0.0033, + "step": 100890 + }, + { + "epoch": 0.6471412583017192, + "grad_norm": 0.2728831171989441, + "learning_rate": 8.553141114353622e-06, + "loss": 0.0038, + "step": 100900 + }, + { + "epoch": 0.6472053951955052, + "grad_norm": 0.11188970506191254, + "learning_rate": 8.55274730286868e-06, + "loss": 0.0028, + "step": 100910 + }, + { + "epoch": 0.6472695320892914, + "grad_norm": 0.13675358891487122, + "learning_rate": 8.552353446865275e-06, + "loss": 0.0044, + "step": 100920 + }, + { + "epoch": 0.6473336689830774, + "grad_norm": 0.16897651553153992, + "learning_rate": 8.551959546348341e-06, + "loss": 0.0019, + "step": 100930 + }, + { + "epoch": 0.6473978058768636, + "grad_norm": 0.04027769714593887, + "learning_rate": 8.551565601322818e-06, + "loss": 0.0023, + "step": 100940 + }, + { + "epoch": 0.6474619427706497, + "grad_norm": 0.1581668108701706, + "learning_rate": 8.551171611793637e-06, + "loss": 0.0017, + "step": 100950 + }, + { + "epoch": 0.6475260796644358, + "grad_norm": 0.12060859799385071, + "learning_rate": 8.550777577765739e-06, + "loss": 0.0024, + "step": 100960 + }, + { + "epoch": 0.6475902165582219, + "grad_norm": 0.06283990293741226, + "learning_rate": 8.550383499244059e-06, + "loss": 0.003, + "step": 100970 + }, + { + "epoch": 0.647654353452008, + "grad_norm": 0.14902283251285553, + "learning_rate": 8.549989376233539e-06, + "loss": 0.0027, + "step": 100980 + }, + { + "epoch": 0.6477184903457941, + "grad_norm": 0.07595626264810562, + "learning_rate": 8.549595208739111e-06, + "loss": 0.0025, + "step": 100990 + }, + { + "epoch": 0.6477826272395801, + "grad_norm": 0.11632867902517319, + "learning_rate": 8.549200996765722e-06, + "loss": 0.0026, + "step": 101000 + }, + { + "epoch": 0.6478467641333663, + "grad_norm": 0.03205646947026253, + "learning_rate": 8.548806740318305e-06, + "loss": 0.0019, + "step": 101010 + }, + { + "epoch": 0.6479109010271523, + "grad_norm": 0.11372930556535721, + "learning_rate": 8.548412439401805e-06, + "loss": 0.0013, + "step": 101020 + }, + { + "epoch": 0.6479750379209385, + "grad_norm": 0.2693907916545868, + "learning_rate": 8.548018094021161e-06, + "loss": 0.0031, + "step": 101030 + }, + { + "epoch": 0.6480391748147245, + "grad_norm": 0.021016070619225502, + "learning_rate": 8.547623704181316e-06, + "loss": 0.0027, + "step": 101040 + }, + { + "epoch": 0.6481033117085107, + "grad_norm": 0.04376423358917236, + "learning_rate": 8.547229269887207e-06, + "loss": 0.002, + "step": 101050 + }, + { + "epoch": 0.6481674486022967, + "grad_norm": 0.06742499768733978, + "learning_rate": 8.546834791143783e-06, + "loss": 0.0023, + "step": 101060 + }, + { + "epoch": 0.6482315854960828, + "grad_norm": 0.19039209187030792, + "learning_rate": 8.546440267955982e-06, + "loss": 0.0034, + "step": 101070 + }, + { + "epoch": 0.648295722389869, + "grad_norm": 0.17136657238006592, + "learning_rate": 8.54604570032875e-06, + "loss": 0.0028, + "step": 101080 + }, + { + "epoch": 0.648359859283655, + "grad_norm": 0.10204244405031204, + "learning_rate": 8.545651088267035e-06, + "loss": 0.004, + "step": 101090 + }, + { + "epoch": 0.6484239961774412, + "grad_norm": 0.04198237136006355, + "learning_rate": 8.545256431775774e-06, + "loss": 0.0035, + "step": 101100 + }, + { + "epoch": 0.6484881330712272, + "grad_norm": 0.15968136489391327, + "learning_rate": 8.544861730859917e-06, + "loss": 0.0038, + "step": 101110 + }, + { + "epoch": 0.6485522699650134, + "grad_norm": 0.16758500039577484, + "learning_rate": 8.54446698552441e-06, + "loss": 0.0052, + "step": 101120 + }, + { + "epoch": 0.6486164068587994, + "grad_norm": 0.0753367617726326, + "learning_rate": 8.544072195774195e-06, + "loss": 0.0038, + "step": 101130 + }, + { + "epoch": 0.6486805437525855, + "grad_norm": 0.06411454826593399, + "learning_rate": 8.543677361614226e-06, + "loss": 0.002, + "step": 101140 + }, + { + "epoch": 0.6487446806463716, + "grad_norm": 0.5838292241096497, + "learning_rate": 8.543282483049446e-06, + "loss": 0.0029, + "step": 101150 + }, + { + "epoch": 0.6488088175401577, + "grad_norm": 0.039258599281311035, + "learning_rate": 8.542887560084806e-06, + "loss": 0.0036, + "step": 101160 + }, + { + "epoch": 0.6488729544339438, + "grad_norm": 0.11288158595561981, + "learning_rate": 8.542492592725248e-06, + "loss": 0.0046, + "step": 101170 + }, + { + "epoch": 0.6489370913277299, + "grad_norm": 0.14804130792617798, + "learning_rate": 8.54209758097573e-06, + "loss": 0.0049, + "step": 101180 + }, + { + "epoch": 0.649001228221516, + "grad_norm": 0.16995002329349518, + "learning_rate": 8.541702524841198e-06, + "loss": 0.0029, + "step": 101190 + }, + { + "epoch": 0.6490653651153021, + "grad_norm": 0.1034122109413147, + "learning_rate": 8.5413074243266e-06, + "loss": 0.0042, + "step": 101200 + }, + { + "epoch": 0.6491295020090881, + "grad_norm": 0.25434547662734985, + "learning_rate": 8.54091227943689e-06, + "loss": 0.0019, + "step": 101210 + }, + { + "epoch": 0.6491936389028743, + "grad_norm": 0.13532720506191254, + "learning_rate": 8.540517090177019e-06, + "loss": 0.003, + "step": 101220 + }, + { + "epoch": 0.6492577757966604, + "grad_norm": 0.03820735588669777, + "learning_rate": 8.540121856551938e-06, + "loss": 0.0036, + "step": 101230 + }, + { + "epoch": 0.6493219126904465, + "grad_norm": 0.09814277291297913, + "learning_rate": 8.5397265785666e-06, + "loss": 0.0033, + "step": 101240 + }, + { + "epoch": 0.6493860495842326, + "grad_norm": 0.32696372270584106, + "learning_rate": 8.539331256225958e-06, + "loss": 0.0046, + "step": 101250 + }, + { + "epoch": 0.6494501864780187, + "grad_norm": 0.23048560321331024, + "learning_rate": 8.538935889534966e-06, + "loss": 0.0056, + "step": 101260 + }, + { + "epoch": 0.6495143233718048, + "grad_norm": 0.17196024954319, + "learning_rate": 8.538540478498577e-06, + "loss": 0.0055, + "step": 101270 + }, + { + "epoch": 0.6495784602655909, + "grad_norm": 0.09881290793418884, + "learning_rate": 8.538145023121748e-06, + "loss": 0.0032, + "step": 101280 + }, + { + "epoch": 0.649642597159377, + "grad_norm": 0.10250058025121689, + "learning_rate": 8.537749523409434e-06, + "loss": 0.0022, + "step": 101290 + }, + { + "epoch": 0.649706734053163, + "grad_norm": 0.07143467664718628, + "learning_rate": 8.537353979366589e-06, + "loss": 0.0044, + "step": 101300 + }, + { + "epoch": 0.6497708709469492, + "grad_norm": 0.19280396401882172, + "learning_rate": 8.53695839099817e-06, + "loss": 0.0031, + "step": 101310 + }, + { + "epoch": 0.6498350078407352, + "grad_norm": 0.14994238317012787, + "learning_rate": 8.536562758309138e-06, + "loss": 0.003, + "step": 101320 + }, + { + "epoch": 0.6498991447345214, + "grad_norm": 0.16695557534694672, + "learning_rate": 8.536167081304442e-06, + "loss": 0.0038, + "step": 101330 + }, + { + "epoch": 0.6499632816283074, + "grad_norm": 0.08642906695604324, + "learning_rate": 8.53577135998905e-06, + "loss": 0.0031, + "step": 101340 + }, + { + "epoch": 0.6500274185220936, + "grad_norm": 0.21799863874912262, + "learning_rate": 8.535375594367911e-06, + "loss": 0.0037, + "step": 101350 + }, + { + "epoch": 0.6500915554158797, + "grad_norm": 0.09290055185556412, + "learning_rate": 8.534979784445992e-06, + "loss": 0.0039, + "step": 101360 + }, + { + "epoch": 0.6501556923096657, + "grad_norm": 0.11244393140077591, + "learning_rate": 8.53458393022825e-06, + "loss": 0.0027, + "step": 101370 + }, + { + "epoch": 0.6502198292034519, + "grad_norm": 0.4753751754760742, + "learning_rate": 8.534188031719645e-06, + "loss": 0.0031, + "step": 101380 + }, + { + "epoch": 0.6502839660972379, + "grad_norm": 0.04352414980530739, + "learning_rate": 8.533792088925137e-06, + "loss": 0.0021, + "step": 101390 + }, + { + "epoch": 0.6503481029910241, + "grad_norm": 0.2539691925048828, + "learning_rate": 8.533396101849689e-06, + "loss": 0.0021, + "step": 101400 + }, + { + "epoch": 0.6504122398848101, + "grad_norm": 0.08388808369636536, + "learning_rate": 8.533000070498264e-06, + "loss": 0.0022, + "step": 101410 + }, + { + "epoch": 0.6504763767785963, + "grad_norm": 0.037809524685144424, + "learning_rate": 8.532603994875823e-06, + "loss": 0.0011, + "step": 101420 + }, + { + "epoch": 0.6505405136723823, + "grad_norm": 0.31021374464035034, + "learning_rate": 8.532207874987327e-06, + "loss": 0.0052, + "step": 101430 + }, + { + "epoch": 0.6506046505661685, + "grad_norm": 0.10550630837678909, + "learning_rate": 8.531811710837742e-06, + "loss": 0.0037, + "step": 101440 + }, + { + "epoch": 0.6506687874599545, + "grad_norm": 0.1269540637731552, + "learning_rate": 8.531415502432035e-06, + "loss": 0.0033, + "step": 101450 + }, + { + "epoch": 0.6507329243537406, + "grad_norm": 0.19207847118377686, + "learning_rate": 8.531019249775166e-06, + "loss": 0.003, + "step": 101460 + }, + { + "epoch": 0.6507970612475267, + "grad_norm": 0.017273563891649246, + "learning_rate": 8.530622952872104e-06, + "loss": 0.0017, + "step": 101470 + }, + { + "epoch": 0.6508611981413128, + "grad_norm": 0.14974822103977203, + "learning_rate": 8.53022661172781e-06, + "loss": 0.0025, + "step": 101480 + }, + { + "epoch": 0.6509253350350989, + "grad_norm": 0.06803373247385025, + "learning_rate": 8.529830226347256e-06, + "loss": 0.0023, + "step": 101490 + }, + { + "epoch": 0.650989471928885, + "grad_norm": 0.11219818890094757, + "learning_rate": 8.529433796735404e-06, + "loss": 0.0025, + "step": 101500 + }, + { + "epoch": 0.6510536088226712, + "grad_norm": 0.24247153103351593, + "learning_rate": 8.529037322897227e-06, + "loss": 0.0048, + "step": 101510 + }, + { + "epoch": 0.6511177457164572, + "grad_norm": 0.14471827447414398, + "learning_rate": 8.528640804837689e-06, + "loss": 0.004, + "step": 101520 + }, + { + "epoch": 0.6511818826102433, + "grad_norm": 0.17130020260810852, + "learning_rate": 8.528244242561758e-06, + "loss": 0.004, + "step": 101530 + }, + { + "epoch": 0.6512460195040294, + "grad_norm": 0.1778392642736435, + "learning_rate": 8.527847636074408e-06, + "loss": 0.0022, + "step": 101540 + }, + { + "epoch": 0.6513101563978155, + "grad_norm": 0.2364269196987152, + "learning_rate": 8.527450985380605e-06, + "loss": 0.0024, + "step": 101550 + }, + { + "epoch": 0.6513742932916016, + "grad_norm": 0.16268400847911835, + "learning_rate": 8.527054290485319e-06, + "loss": 0.0023, + "step": 101560 + }, + { + "epoch": 0.6514384301853877, + "grad_norm": 0.35830166935920715, + "learning_rate": 8.526657551393522e-06, + "loss": 0.0042, + "step": 101570 + }, + { + "epoch": 0.6515025670791738, + "grad_norm": 0.12425675988197327, + "learning_rate": 8.526260768110185e-06, + "loss": 0.0042, + "step": 101580 + }, + { + "epoch": 0.6515667039729599, + "grad_norm": 0.16214703023433685, + "learning_rate": 8.525863940640282e-06, + "loss": 0.0055, + "step": 101590 + }, + { + "epoch": 0.6516308408667459, + "grad_norm": 0.41682323813438416, + "learning_rate": 8.525467068988783e-06, + "loss": 0.0024, + "step": 101600 + }, + { + "epoch": 0.6516949777605321, + "grad_norm": 0.37435153126716614, + "learning_rate": 8.525070153160664e-06, + "loss": 0.0032, + "step": 101610 + }, + { + "epoch": 0.6517591146543181, + "grad_norm": 0.40198203921318054, + "learning_rate": 8.524673193160894e-06, + "loss": 0.0033, + "step": 101620 + }, + { + "epoch": 0.6518232515481043, + "grad_norm": 0.007691803388297558, + "learning_rate": 8.524276188994452e-06, + "loss": 0.0039, + "step": 101630 + }, + { + "epoch": 0.6518873884418904, + "grad_norm": 0.19811321794986725, + "learning_rate": 8.523879140666308e-06, + "loss": 0.0028, + "step": 101640 + }, + { + "epoch": 0.6519515253356765, + "grad_norm": 0.4874114990234375, + "learning_rate": 8.523482048181443e-06, + "loss": 0.0042, + "step": 101650 + }, + { + "epoch": 0.6520156622294626, + "grad_norm": 0.27024710178375244, + "learning_rate": 8.523084911544827e-06, + "loss": 0.0043, + "step": 101660 + }, + { + "epoch": 0.6520797991232486, + "grad_norm": 0.12862282991409302, + "learning_rate": 8.522687730761441e-06, + "loss": 0.0025, + "step": 101670 + }, + { + "epoch": 0.6521439360170348, + "grad_norm": 0.04814813286066055, + "learning_rate": 8.522290505836259e-06, + "loss": 0.0029, + "step": 101680 + }, + { + "epoch": 0.6522080729108208, + "grad_norm": 0.1256941705942154, + "learning_rate": 8.521893236774262e-06, + "loss": 0.0029, + "step": 101690 + }, + { + "epoch": 0.652272209804607, + "grad_norm": 0.021735558286309242, + "learning_rate": 8.52149592358042e-06, + "loss": 0.0037, + "step": 101700 + }, + { + "epoch": 0.652336346698393, + "grad_norm": 0.21016880869865417, + "learning_rate": 8.521098566259723e-06, + "loss": 0.0035, + "step": 101710 + }, + { + "epoch": 0.6524004835921792, + "grad_norm": 0.11760730296373367, + "learning_rate": 8.520701164817142e-06, + "loss": 0.0026, + "step": 101720 + }, + { + "epoch": 0.6524646204859652, + "grad_norm": 0.2804056406021118, + "learning_rate": 8.52030371925766e-06, + "loss": 0.004, + "step": 101730 + }, + { + "epoch": 0.6525287573797514, + "grad_norm": 0.16390132904052734, + "learning_rate": 8.519906229586255e-06, + "loss": 0.0017, + "step": 101740 + }, + { + "epoch": 0.6525928942735374, + "grad_norm": 0.10632698237895966, + "learning_rate": 8.519508695807912e-06, + "loss": 0.0024, + "step": 101750 + }, + { + "epoch": 0.6526570311673235, + "grad_norm": 0.05945868045091629, + "learning_rate": 8.519111117927608e-06, + "loss": 0.0018, + "step": 101760 + }, + { + "epoch": 0.6527211680611096, + "grad_norm": 0.14257098734378815, + "learning_rate": 8.518713495950326e-06, + "loss": 0.0024, + "step": 101770 + }, + { + "epoch": 0.6527853049548957, + "grad_norm": 0.1678561270236969, + "learning_rate": 8.51831582988105e-06, + "loss": 0.0033, + "step": 101780 + }, + { + "epoch": 0.6528494418486819, + "grad_norm": 0.14132031798362732, + "learning_rate": 8.517918119724762e-06, + "loss": 0.0059, + "step": 101790 + }, + { + "epoch": 0.6529135787424679, + "grad_norm": 0.16761355102062225, + "learning_rate": 8.517520365486445e-06, + "loss": 0.0027, + "step": 101800 + }, + { + "epoch": 0.6529777156362541, + "grad_norm": 0.06852803379297256, + "learning_rate": 8.517122567171085e-06, + "loss": 0.0021, + "step": 101810 + }, + { + "epoch": 0.6530418525300401, + "grad_norm": 0.1299738734960556, + "learning_rate": 8.516724724783665e-06, + "loss": 0.0035, + "step": 101820 + }, + { + "epoch": 0.6531059894238262, + "grad_norm": 0.0322495736181736, + "learning_rate": 8.516326838329172e-06, + "loss": 0.0033, + "step": 101830 + }, + { + "epoch": 0.6531701263176123, + "grad_norm": 0.11352141946554184, + "learning_rate": 8.51592890781259e-06, + "loss": 0.0021, + "step": 101840 + }, + { + "epoch": 0.6532342632113984, + "grad_norm": 0.14519372582435608, + "learning_rate": 8.515530933238904e-06, + "loss": 0.0023, + "step": 101850 + }, + { + "epoch": 0.6532984001051845, + "grad_norm": 0.13392499089241028, + "learning_rate": 8.515132914613105e-06, + "loss": 0.0021, + "step": 101860 + }, + { + "epoch": 0.6533625369989706, + "grad_norm": 0.039920829236507416, + "learning_rate": 8.514734851940178e-06, + "loss": 0.0024, + "step": 101870 + }, + { + "epoch": 0.6534266738927567, + "grad_norm": 0.07632692903280258, + "learning_rate": 8.51433674522511e-06, + "loss": 0.0143, + "step": 101880 + }, + { + "epoch": 0.6534908107865428, + "grad_norm": 0.10139046609401703, + "learning_rate": 8.513938594472895e-06, + "loss": 0.0027, + "step": 101890 + }, + { + "epoch": 0.6535549476803288, + "grad_norm": 0.06001872569322586, + "learning_rate": 8.513540399688514e-06, + "loss": 0.0023, + "step": 101900 + }, + { + "epoch": 0.653619084574115, + "grad_norm": 0.15371845662593842, + "learning_rate": 8.513142160876962e-06, + "loss": 0.0018, + "step": 101910 + }, + { + "epoch": 0.6536832214679011, + "grad_norm": 0.08152685314416885, + "learning_rate": 8.512743878043228e-06, + "loss": 0.0043, + "step": 101920 + }, + { + "epoch": 0.6537473583616872, + "grad_norm": 0.17926916480064392, + "learning_rate": 8.512345551192302e-06, + "loss": 0.0035, + "step": 101930 + }, + { + "epoch": 0.6538114952554733, + "grad_norm": 0.29780837893486023, + "learning_rate": 8.511947180329177e-06, + "loss": 0.0029, + "step": 101940 + }, + { + "epoch": 0.6538756321492594, + "grad_norm": 0.07611296325922012, + "learning_rate": 8.511548765458844e-06, + "loss": 0.0052, + "step": 101950 + }, + { + "epoch": 0.6539397690430455, + "grad_norm": 0.15462587773799896, + "learning_rate": 8.511150306586295e-06, + "loss": 0.0041, + "step": 101960 + }, + { + "epoch": 0.6540039059368316, + "grad_norm": 0.17087388038635254, + "learning_rate": 8.510751803716523e-06, + "loss": 0.0026, + "step": 101970 + }, + { + "epoch": 0.6540680428306177, + "grad_norm": 0.011938631534576416, + "learning_rate": 8.51035325685452e-06, + "loss": 0.0014, + "step": 101980 + }, + { + "epoch": 0.6541321797244037, + "grad_norm": 0.03938727453351021, + "learning_rate": 8.509954666005285e-06, + "loss": 0.0027, + "step": 101990 + }, + { + "epoch": 0.6541963166181899, + "grad_norm": 0.1165597215294838, + "learning_rate": 8.509556031173808e-06, + "loss": 0.0023, + "step": 102000 + }, + { + "epoch": 0.6542604535119759, + "grad_norm": 0.41937723755836487, + "learning_rate": 8.509157352365087e-06, + "loss": 0.0075, + "step": 102010 + }, + { + "epoch": 0.6543245904057621, + "grad_norm": 0.07766172289848328, + "learning_rate": 8.508758629584113e-06, + "loss": 0.0024, + "step": 102020 + }, + { + "epoch": 0.6543887272995481, + "grad_norm": 0.1976241171360016, + "learning_rate": 8.50835986283589e-06, + "loss": 0.0035, + "step": 102030 + }, + { + "epoch": 0.6544528641933343, + "grad_norm": 0.05941390618681908, + "learning_rate": 8.507961052125409e-06, + "loss": 0.0027, + "step": 102040 + }, + { + "epoch": 0.6545170010871203, + "grad_norm": 0.00836424995213747, + "learning_rate": 8.507562197457667e-06, + "loss": 0.003, + "step": 102050 + }, + { + "epoch": 0.6545811379809064, + "grad_norm": 0.0073838010430336, + "learning_rate": 8.507163298837667e-06, + "loss": 0.0045, + "step": 102060 + }, + { + "epoch": 0.6546452748746926, + "grad_norm": 0.08924354612827301, + "learning_rate": 8.5067643562704e-06, + "loss": 0.0048, + "step": 102070 + }, + { + "epoch": 0.6547094117684786, + "grad_norm": 0.16871915757656097, + "learning_rate": 8.506365369760872e-06, + "loss": 0.0038, + "step": 102080 + }, + { + "epoch": 0.6547735486622648, + "grad_norm": 0.7401137351989746, + "learning_rate": 8.50596633931408e-06, + "loss": 0.0029, + "step": 102090 + }, + { + "epoch": 0.6548376855560508, + "grad_norm": 0.11338161677122116, + "learning_rate": 8.505567264935025e-06, + "loss": 0.0021, + "step": 102100 + }, + { + "epoch": 0.654901822449837, + "grad_norm": 0.3115667700767517, + "learning_rate": 8.505168146628707e-06, + "loss": 0.0035, + "step": 102110 + }, + { + "epoch": 0.654965959343623, + "grad_norm": 0.14822430908679962, + "learning_rate": 8.504768984400125e-06, + "loss": 0.0029, + "step": 102120 + }, + { + "epoch": 0.6550300962374092, + "grad_norm": 0.25849664211273193, + "learning_rate": 8.504369778254283e-06, + "loss": 0.0069, + "step": 102130 + }, + { + "epoch": 0.6550942331311952, + "grad_norm": 0.10550545156002045, + "learning_rate": 8.503970528196183e-06, + "loss": 0.0033, + "step": 102140 + }, + { + "epoch": 0.6551583700249813, + "grad_norm": 0.22024445235729218, + "learning_rate": 8.503571234230829e-06, + "loss": 0.0035, + "step": 102150 + }, + { + "epoch": 0.6552225069187674, + "grad_norm": 0.11373266577720642, + "learning_rate": 8.503171896363225e-06, + "loss": 0.0027, + "step": 102160 + }, + { + "epoch": 0.6552866438125535, + "grad_norm": 0.08975688368082047, + "learning_rate": 8.502772514598371e-06, + "loss": 0.0037, + "step": 102170 + }, + { + "epoch": 0.6553507807063396, + "grad_norm": 0.10338490456342697, + "learning_rate": 8.502373088941273e-06, + "loss": 0.0028, + "step": 102180 + }, + { + "epoch": 0.6554149176001257, + "grad_norm": 0.0868222787976265, + "learning_rate": 8.501973619396941e-06, + "loss": 0.0056, + "step": 102190 + }, + { + "epoch": 0.6554790544939118, + "grad_norm": 0.0775335505604744, + "learning_rate": 8.501574105970373e-06, + "loss": 0.0028, + "step": 102200 + }, + { + "epoch": 0.6555431913876979, + "grad_norm": 0.2917906641960144, + "learning_rate": 8.501174548666582e-06, + "loss": 0.0041, + "step": 102210 + }, + { + "epoch": 0.655607328281484, + "grad_norm": 0.10409858822822571, + "learning_rate": 8.500774947490569e-06, + "loss": 0.0024, + "step": 102220 + }, + { + "epoch": 0.6556714651752701, + "grad_norm": 0.08417538553476334, + "learning_rate": 8.500375302447346e-06, + "loss": 0.0026, + "step": 102230 + }, + { + "epoch": 0.6557356020690562, + "grad_norm": 0.03060046024620533, + "learning_rate": 8.499975613541917e-06, + "loss": 0.0064, + "step": 102240 + }, + { + "epoch": 0.6557997389628423, + "grad_norm": 0.21709373593330383, + "learning_rate": 8.499575880779295e-06, + "loss": 0.0047, + "step": 102250 + }, + { + "epoch": 0.6558638758566284, + "grad_norm": 0.13628128170967102, + "learning_rate": 8.499176104164482e-06, + "loss": 0.0029, + "step": 102260 + }, + { + "epoch": 0.6559280127504145, + "grad_norm": 0.09415052086114883, + "learning_rate": 8.498776283702495e-06, + "loss": 0.0034, + "step": 102270 + }, + { + "epoch": 0.6559921496442006, + "grad_norm": 0.14981704950332642, + "learning_rate": 8.49837641939834e-06, + "loss": 0.0041, + "step": 102280 + }, + { + "epoch": 0.6560562865379866, + "grad_norm": 0.09459282457828522, + "learning_rate": 8.497976511257027e-06, + "loss": 0.0026, + "step": 102290 + }, + { + "epoch": 0.6561204234317728, + "grad_norm": 0.23267565667629242, + "learning_rate": 8.497576559283569e-06, + "loss": 0.006, + "step": 102300 + }, + { + "epoch": 0.6561845603255588, + "grad_norm": 0.2411142885684967, + "learning_rate": 8.497176563482976e-06, + "loss": 0.004, + "step": 102310 + }, + { + "epoch": 0.656248697219345, + "grad_norm": 0.05634717270731926, + "learning_rate": 8.496776523860263e-06, + "loss": 0.0026, + "step": 102320 + }, + { + "epoch": 0.656312834113131, + "grad_norm": 0.08043432235717773, + "learning_rate": 8.496376440420439e-06, + "loss": 0.0026, + "step": 102330 + }, + { + "epoch": 0.6563769710069172, + "grad_norm": 0.38449281454086304, + "learning_rate": 8.495976313168522e-06, + "loss": 0.0029, + "step": 102340 + }, + { + "epoch": 0.6564411079007033, + "grad_norm": 0.06370898336172104, + "learning_rate": 8.495576142109521e-06, + "loss": 0.003, + "step": 102350 + }, + { + "epoch": 0.6565052447944894, + "grad_norm": 0.039983682334423065, + "learning_rate": 8.495175927248454e-06, + "loss": 0.0033, + "step": 102360 + }, + { + "epoch": 0.6565693816882755, + "grad_norm": 0.16125039756298065, + "learning_rate": 8.494775668590334e-06, + "loss": 0.0036, + "step": 102370 + }, + { + "epoch": 0.6566335185820615, + "grad_norm": 0.11923573166131973, + "learning_rate": 8.494375366140177e-06, + "loss": 0.0046, + "step": 102380 + }, + { + "epoch": 0.6566976554758477, + "grad_norm": 0.037645936012268066, + "learning_rate": 8.493975019903e-06, + "loss": 0.0028, + "step": 102390 + }, + { + "epoch": 0.6567617923696337, + "grad_norm": 0.16430379450321198, + "learning_rate": 8.493574629883818e-06, + "loss": 0.0035, + "step": 102400 + }, + { + "epoch": 0.6568259292634199, + "grad_norm": 0.056253910064697266, + "learning_rate": 8.49317419608765e-06, + "loss": 0.0027, + "step": 102410 + }, + { + "epoch": 0.6568900661572059, + "grad_norm": 0.1980583667755127, + "learning_rate": 8.492773718519513e-06, + "loss": 0.0031, + "step": 102420 + }, + { + "epoch": 0.6569542030509921, + "grad_norm": 0.15534088015556335, + "learning_rate": 8.492373197184425e-06, + "loss": 0.002, + "step": 102430 + }, + { + "epoch": 0.6570183399447781, + "grad_norm": 0.056002579629421234, + "learning_rate": 8.491972632087404e-06, + "loss": 0.002, + "step": 102440 + }, + { + "epoch": 0.6570824768385642, + "grad_norm": 0.18479229509830475, + "learning_rate": 8.49157202323347e-06, + "loss": 0.0039, + "step": 102450 + }, + { + "epoch": 0.6571466137323503, + "grad_norm": 0.15330883860588074, + "learning_rate": 8.491171370627645e-06, + "loss": 0.0021, + "step": 102460 + }, + { + "epoch": 0.6572107506261364, + "grad_norm": 0.036105621606111526, + "learning_rate": 8.490770674274945e-06, + "loss": 0.0036, + "step": 102470 + }, + { + "epoch": 0.6572748875199225, + "grad_norm": 0.33400094509124756, + "learning_rate": 8.490369934180396e-06, + "loss": 0.0031, + "step": 102480 + }, + { + "epoch": 0.6573390244137086, + "grad_norm": 0.04812569171190262, + "learning_rate": 8.489969150349016e-06, + "loss": 0.003, + "step": 102490 + }, + { + "epoch": 0.6574031613074948, + "grad_norm": 0.1350105255842209, + "learning_rate": 8.489568322785828e-06, + "loss": 0.0036, + "step": 102500 + }, + { + "epoch": 0.6574672982012808, + "grad_norm": 0.11404775083065033, + "learning_rate": 8.489167451495857e-06, + "loss": 0.0024, + "step": 102510 + }, + { + "epoch": 0.657531435095067, + "grad_norm": 0.0941624641418457, + "learning_rate": 8.48876653648412e-06, + "loss": 0.0024, + "step": 102520 + }, + { + "epoch": 0.657595571988853, + "grad_norm": 0.1571592390537262, + "learning_rate": 8.48836557775565e-06, + "loss": 0.002, + "step": 102530 + }, + { + "epoch": 0.6576597088826391, + "grad_norm": 0.05018900707364082, + "learning_rate": 8.487964575315463e-06, + "loss": 0.0021, + "step": 102540 + }, + { + "epoch": 0.6577238457764252, + "grad_norm": 0.2466675490140915, + "learning_rate": 8.487563529168588e-06, + "loss": 0.0036, + "step": 102550 + }, + { + "epoch": 0.6577879826702113, + "grad_norm": 0.13947294652462006, + "learning_rate": 8.487162439320048e-06, + "loss": 0.0022, + "step": 102560 + }, + { + "epoch": 0.6578521195639974, + "grad_norm": 0.15182632207870483, + "learning_rate": 8.486761305774874e-06, + "loss": 0.0025, + "step": 102570 + }, + { + "epoch": 0.6579162564577835, + "grad_norm": 0.09285365790128708, + "learning_rate": 8.486360128538084e-06, + "loss": 0.0048, + "step": 102580 + }, + { + "epoch": 0.6579803933515695, + "grad_norm": 0.06940281391143799, + "learning_rate": 8.485958907614714e-06, + "loss": 0.0022, + "step": 102590 + }, + { + "epoch": 0.6580445302453557, + "grad_norm": 0.09990634024143219, + "learning_rate": 8.485557643009786e-06, + "loss": 0.0021, + "step": 102600 + }, + { + "epoch": 0.6581086671391417, + "grad_norm": 0.15104538202285767, + "learning_rate": 8.485156334728331e-06, + "loss": 0.0085, + "step": 102610 + }, + { + "epoch": 0.6581728040329279, + "grad_norm": 0.1043042317032814, + "learning_rate": 8.484754982775374e-06, + "loss": 0.0029, + "step": 102620 + }, + { + "epoch": 0.658236940926714, + "grad_norm": 0.4126298129558563, + "learning_rate": 8.484353587155947e-06, + "loss": 0.0016, + "step": 102630 + }, + { + "epoch": 0.6583010778205001, + "grad_norm": 0.06483572721481323, + "learning_rate": 8.48395214787508e-06, + "loss": 0.0023, + "step": 102640 + }, + { + "epoch": 0.6583652147142862, + "grad_norm": 0.16036419570446014, + "learning_rate": 8.483550664937804e-06, + "loss": 0.0019, + "step": 102650 + }, + { + "epoch": 0.6584293516080723, + "grad_norm": 0.10775353759527206, + "learning_rate": 8.483149138349146e-06, + "loss": 0.0023, + "step": 102660 + }, + { + "epoch": 0.6584934885018584, + "grad_norm": 0.31090420484542847, + "learning_rate": 8.48274756811414e-06, + "loss": 0.0038, + "step": 102670 + }, + { + "epoch": 0.6585576253956444, + "grad_norm": 0.17591483891010284, + "learning_rate": 8.48234595423782e-06, + "loss": 0.0014, + "step": 102680 + }, + { + "epoch": 0.6586217622894306, + "grad_norm": 0.2681688666343689, + "learning_rate": 8.481944296725216e-06, + "loss": 0.0051, + "step": 102690 + }, + { + "epoch": 0.6586858991832166, + "grad_norm": 0.3551349639892578, + "learning_rate": 8.48154259558136e-06, + "loss": 0.0053, + "step": 102700 + }, + { + "epoch": 0.6587500360770028, + "grad_norm": 0.04370328038930893, + "learning_rate": 8.481140850811287e-06, + "loss": 0.0023, + "step": 102710 + }, + { + "epoch": 0.6588141729707888, + "grad_norm": 0.06229441985487938, + "learning_rate": 8.480739062420033e-06, + "loss": 0.0035, + "step": 102720 + }, + { + "epoch": 0.658878309864575, + "grad_norm": 0.018355660140514374, + "learning_rate": 8.48033723041263e-06, + "loss": 0.0023, + "step": 102730 + }, + { + "epoch": 0.658942446758361, + "grad_norm": 0.15766875445842743, + "learning_rate": 8.479935354794113e-06, + "loss": 0.0031, + "step": 102740 + }, + { + "epoch": 0.6590065836521471, + "grad_norm": 0.21535296738147736, + "learning_rate": 8.47953343556952e-06, + "loss": 0.0021, + "step": 102750 + }, + { + "epoch": 0.6590707205459332, + "grad_norm": 0.058433424681425095, + "learning_rate": 8.479131472743887e-06, + "loss": 0.0022, + "step": 102760 + }, + { + "epoch": 0.6591348574397193, + "grad_norm": 0.0867202877998352, + "learning_rate": 8.47872946632225e-06, + "loss": 0.0047, + "step": 102770 + }, + { + "epoch": 0.6591989943335055, + "grad_norm": 0.15360093116760254, + "learning_rate": 8.478327416309645e-06, + "loss": 0.003, + "step": 102780 + }, + { + "epoch": 0.6592631312272915, + "grad_norm": 0.1224280372262001, + "learning_rate": 8.477925322711112e-06, + "loss": 0.0036, + "step": 102790 + }, + { + "epoch": 0.6593272681210777, + "grad_norm": 0.13371972739696503, + "learning_rate": 8.47752318553169e-06, + "loss": 0.0015, + "step": 102800 + }, + { + "epoch": 0.6593914050148637, + "grad_norm": 0.0641275942325592, + "learning_rate": 8.477121004776417e-06, + "loss": 0.0018, + "step": 102810 + }, + { + "epoch": 0.6594555419086499, + "grad_norm": 0.14218440651893616, + "learning_rate": 8.476718780450332e-06, + "loss": 0.0026, + "step": 102820 + }, + { + "epoch": 0.6595196788024359, + "grad_norm": 0.17348818480968475, + "learning_rate": 8.476316512558475e-06, + "loss": 0.0018, + "step": 102830 + }, + { + "epoch": 0.659583815696222, + "grad_norm": 0.10095454752445221, + "learning_rate": 8.47591420110589e-06, + "loss": 0.0016, + "step": 102840 + }, + { + "epoch": 0.6596479525900081, + "grad_norm": 0.048792753368616104, + "learning_rate": 8.475511846097615e-06, + "loss": 0.0044, + "step": 102850 + }, + { + "epoch": 0.6597120894837942, + "grad_norm": 0.17745624482631683, + "learning_rate": 8.475109447538691e-06, + "loss": 0.0031, + "step": 102860 + }, + { + "epoch": 0.6597762263775803, + "grad_norm": 0.12286870181560516, + "learning_rate": 8.474707005434165e-06, + "loss": 0.0028, + "step": 102870 + }, + { + "epoch": 0.6598403632713664, + "grad_norm": 0.22015081346035004, + "learning_rate": 8.474304519789077e-06, + "loss": 0.0028, + "step": 102880 + }, + { + "epoch": 0.6599045001651525, + "grad_norm": 0.20826350152492523, + "learning_rate": 8.473901990608467e-06, + "loss": 0.0021, + "step": 102890 + }, + { + "epoch": 0.6599686370589386, + "grad_norm": 0.19223351776599884, + "learning_rate": 8.473499417897384e-06, + "loss": 0.002, + "step": 102900 + }, + { + "epoch": 0.6600327739527247, + "grad_norm": 0.07205517590045929, + "learning_rate": 8.473096801660871e-06, + "loss": 0.0029, + "step": 102910 + }, + { + "epoch": 0.6600969108465108, + "grad_norm": 0.1505216509103775, + "learning_rate": 8.472694141903971e-06, + "loss": 0.0028, + "step": 102920 + }, + { + "epoch": 0.6601610477402969, + "grad_norm": 0.1652790606021881, + "learning_rate": 8.472291438631735e-06, + "loss": 0.0036, + "step": 102930 + }, + { + "epoch": 0.660225184634083, + "grad_norm": 0.10180681943893433, + "learning_rate": 8.471888691849205e-06, + "loss": 0.0018, + "step": 102940 + }, + { + "epoch": 0.6602893215278691, + "grad_norm": 0.06222664564847946, + "learning_rate": 8.471485901561426e-06, + "loss": 0.0029, + "step": 102950 + }, + { + "epoch": 0.6603534584216552, + "grad_norm": 0.1303701251745224, + "learning_rate": 8.471083067773448e-06, + "loss": 0.0029, + "step": 102960 + }, + { + "epoch": 0.6604175953154413, + "grad_norm": 0.23145528137683868, + "learning_rate": 8.470680190490319e-06, + "loss": 0.0036, + "step": 102970 + }, + { + "epoch": 0.6604817322092273, + "grad_norm": 0.13066375255584717, + "learning_rate": 8.47027726971709e-06, + "loss": 0.0024, + "step": 102980 + }, + { + "epoch": 0.6605458691030135, + "grad_norm": 0.18564635515213013, + "learning_rate": 8.469874305458802e-06, + "loss": 0.0038, + "step": 102990 + }, + { + "epoch": 0.6606100059967995, + "grad_norm": 0.1200670599937439, + "learning_rate": 8.46947129772051e-06, + "loss": 0.0023, + "step": 103000 + }, + { + "epoch": 0.6606741428905857, + "grad_norm": 0.05317830666899681, + "learning_rate": 8.469068246507264e-06, + "loss": 0.002, + "step": 103010 + }, + { + "epoch": 0.6607382797843717, + "grad_norm": 0.33798936009407043, + "learning_rate": 8.468665151824113e-06, + "loss": 0.0032, + "step": 103020 + }, + { + "epoch": 0.6608024166781579, + "grad_norm": 0.17617559432983398, + "learning_rate": 8.46826201367611e-06, + "loss": 0.0023, + "step": 103030 + }, + { + "epoch": 0.6608665535719439, + "grad_norm": 0.2390466332435608, + "learning_rate": 8.467858832068303e-06, + "loss": 0.0029, + "step": 103040 + }, + { + "epoch": 0.66093069046573, + "grad_norm": 0.11099249869585037, + "learning_rate": 8.467455607005748e-06, + "loss": 0.0021, + "step": 103050 + }, + { + "epoch": 0.6609948273595162, + "grad_norm": 0.06324342638254166, + "learning_rate": 8.467052338493496e-06, + "loss": 0.0019, + "step": 103060 + }, + { + "epoch": 0.6610589642533022, + "grad_norm": 0.11985218524932861, + "learning_rate": 8.466649026536599e-06, + "loss": 0.0034, + "step": 103070 + }, + { + "epoch": 0.6611231011470884, + "grad_norm": 0.22718679904937744, + "learning_rate": 8.466245671140115e-06, + "loss": 0.0025, + "step": 103080 + }, + { + "epoch": 0.6611872380408744, + "grad_norm": 0.07870079576969147, + "learning_rate": 8.465842272309093e-06, + "loss": 0.0021, + "step": 103090 + }, + { + "epoch": 0.6612513749346606, + "grad_norm": 0.3526822030544281, + "learning_rate": 8.46543883004859e-06, + "loss": 0.0031, + "step": 103100 + }, + { + "epoch": 0.6613155118284466, + "grad_norm": 0.08089350908994675, + "learning_rate": 8.465035344363664e-06, + "loss": 0.0069, + "step": 103110 + }, + { + "epoch": 0.6613796487222328, + "grad_norm": 0.676216185092926, + "learning_rate": 8.464631815259368e-06, + "loss": 0.0056, + "step": 103120 + }, + { + "epoch": 0.6614437856160188, + "grad_norm": 0.07288841158151627, + "learning_rate": 8.464228242740758e-06, + "loss": 0.003, + "step": 103130 + }, + { + "epoch": 0.661507922509805, + "grad_norm": 0.17153863608837128, + "learning_rate": 8.463824626812893e-06, + "loss": 0.0018, + "step": 103140 + }, + { + "epoch": 0.661572059403591, + "grad_norm": 0.06785765290260315, + "learning_rate": 8.463420967480828e-06, + "loss": 0.0031, + "step": 103150 + }, + { + "epoch": 0.6616361962973771, + "grad_norm": 0.26055482029914856, + "learning_rate": 8.463017264749626e-06, + "loss": 0.003, + "step": 103160 + }, + { + "epoch": 0.6617003331911632, + "grad_norm": 0.12863045930862427, + "learning_rate": 8.462613518624341e-06, + "loss": 0.0033, + "step": 103170 + }, + { + "epoch": 0.6617644700849493, + "grad_norm": 0.1738891750574112, + "learning_rate": 8.462209729110035e-06, + "loss": 0.0024, + "step": 103180 + }, + { + "epoch": 0.6618286069787355, + "grad_norm": 0.06312116235494614, + "learning_rate": 8.461805896211766e-06, + "loss": 0.0025, + "step": 103190 + }, + { + "epoch": 0.6618927438725215, + "grad_norm": 0.008281880989670753, + "learning_rate": 8.461402019934594e-06, + "loss": 0.0027, + "step": 103200 + }, + { + "epoch": 0.6619568807663077, + "grad_norm": 0.1504332423210144, + "learning_rate": 8.460998100283584e-06, + "loss": 0.0025, + "step": 103210 + }, + { + "epoch": 0.6620210176600937, + "grad_norm": 0.10833275318145752, + "learning_rate": 8.460594137263792e-06, + "loss": 0.0033, + "step": 103220 + }, + { + "epoch": 0.6620851545538798, + "grad_norm": 0.15487238764762878, + "learning_rate": 8.460190130880282e-06, + "loss": 0.0046, + "step": 103230 + }, + { + "epoch": 0.6621492914476659, + "grad_norm": 0.08042314648628235, + "learning_rate": 8.459786081138116e-06, + "loss": 0.0018, + "step": 103240 + }, + { + "epoch": 0.662213428341452, + "grad_norm": 0.09010224789381027, + "learning_rate": 8.45938198804236e-06, + "loss": 0.0034, + "step": 103250 + }, + { + "epoch": 0.6622775652352381, + "grad_norm": 0.16834987699985504, + "learning_rate": 8.458977851598074e-06, + "loss": 0.0031, + "step": 103260 + }, + { + "epoch": 0.6623417021290242, + "grad_norm": 0.11550785601139069, + "learning_rate": 8.458573671810323e-06, + "loss": 0.0027, + "step": 103270 + }, + { + "epoch": 0.6624058390228102, + "grad_norm": 0.14988340437412262, + "learning_rate": 8.458169448684172e-06, + "loss": 0.0045, + "step": 103280 + }, + { + "epoch": 0.6624699759165964, + "grad_norm": 0.03008304163813591, + "learning_rate": 8.457765182224688e-06, + "loss": 0.0028, + "step": 103290 + }, + { + "epoch": 0.6625341128103824, + "grad_norm": 0.20641933381557465, + "learning_rate": 8.457360872436933e-06, + "loss": 0.0019, + "step": 103300 + }, + { + "epoch": 0.6625982497041686, + "grad_norm": 0.09332001209259033, + "learning_rate": 8.456956519325976e-06, + "loss": 0.0045, + "step": 103310 + }, + { + "epoch": 0.6626623865979546, + "grad_norm": 0.16936729848384857, + "learning_rate": 8.456552122896884e-06, + "loss": 0.0037, + "step": 103320 + }, + { + "epoch": 0.6627265234917408, + "grad_norm": 0.03418375924229622, + "learning_rate": 8.456147683154722e-06, + "loss": 0.0039, + "step": 103330 + }, + { + "epoch": 0.6627906603855269, + "grad_norm": 0.10110632330179214, + "learning_rate": 8.455743200104563e-06, + "loss": 0.0022, + "step": 103340 + }, + { + "epoch": 0.662854797279313, + "grad_norm": 0.07637202739715576, + "learning_rate": 8.455338673751467e-06, + "loss": 0.0045, + "step": 103350 + }, + { + "epoch": 0.6629189341730991, + "grad_norm": 0.02998146414756775, + "learning_rate": 8.454934104100513e-06, + "loss": 0.0041, + "step": 103360 + }, + { + "epoch": 0.6629830710668851, + "grad_norm": 0.14679434895515442, + "learning_rate": 8.454529491156762e-06, + "loss": 0.0031, + "step": 103370 + }, + { + "epoch": 0.6630472079606713, + "grad_norm": 0.1804307997226715, + "learning_rate": 8.454124834925289e-06, + "loss": 0.005, + "step": 103380 + }, + { + "epoch": 0.6631113448544573, + "grad_norm": 0.044211965054273605, + "learning_rate": 8.45372013541116e-06, + "loss": 0.0091, + "step": 103390 + }, + { + "epoch": 0.6631754817482435, + "grad_norm": 0.2491360604763031, + "learning_rate": 8.453315392619453e-06, + "loss": 0.0031, + "step": 103400 + }, + { + "epoch": 0.6632396186420295, + "grad_norm": 0.08462058752775192, + "learning_rate": 8.452910606555236e-06, + "loss": 0.0022, + "step": 103410 + }, + { + "epoch": 0.6633037555358157, + "grad_norm": 0.22145047783851624, + "learning_rate": 8.45250577722358e-06, + "loss": 0.0062, + "step": 103420 + }, + { + "epoch": 0.6633678924296017, + "grad_norm": 0.0859028697013855, + "learning_rate": 8.45210090462956e-06, + "loss": 0.0022, + "step": 103430 + }, + { + "epoch": 0.6634320293233879, + "grad_norm": 0.08140894770622253, + "learning_rate": 8.451695988778246e-06, + "loss": 0.003, + "step": 103440 + }, + { + "epoch": 0.6634961662171739, + "grad_norm": 0.16863515973091125, + "learning_rate": 8.451291029674717e-06, + "loss": 0.0059, + "step": 103450 + }, + { + "epoch": 0.66356030311096, + "grad_norm": 0.08412231504917145, + "learning_rate": 8.450886027324045e-06, + "loss": 0.0045, + "step": 103460 + }, + { + "epoch": 0.6636244400047462, + "grad_norm": 0.2956025004386902, + "learning_rate": 8.450480981731303e-06, + "loss": 0.0036, + "step": 103470 + }, + { + "epoch": 0.6636885768985322, + "grad_norm": 0.18763288855552673, + "learning_rate": 8.450075892901571e-06, + "loss": 0.0029, + "step": 103480 + }, + { + "epoch": 0.6637527137923184, + "grad_norm": 0.35756680369377136, + "learning_rate": 8.449670760839919e-06, + "loss": 0.0031, + "step": 103490 + }, + { + "epoch": 0.6638168506861044, + "grad_norm": 0.12096337974071503, + "learning_rate": 8.44926558555143e-06, + "loss": 0.0033, + "step": 103500 + }, + { + "epoch": 0.6638809875798906, + "grad_norm": 0.2119651436805725, + "learning_rate": 8.448860367041176e-06, + "loss": 0.0033, + "step": 103510 + }, + { + "epoch": 0.6639451244736766, + "grad_norm": 0.37487390637397766, + "learning_rate": 8.448455105314238e-06, + "loss": 0.0027, + "step": 103520 + }, + { + "epoch": 0.6640092613674627, + "grad_norm": 0.049838390201330185, + "learning_rate": 8.448049800375691e-06, + "loss": 0.003, + "step": 103530 + }, + { + "epoch": 0.6640733982612488, + "grad_norm": 0.08873938024044037, + "learning_rate": 8.447644452230617e-06, + "loss": 0.0024, + "step": 103540 + }, + { + "epoch": 0.6641375351550349, + "grad_norm": 0.529564380645752, + "learning_rate": 8.447239060884094e-06, + "loss": 0.0024, + "step": 103550 + }, + { + "epoch": 0.664201672048821, + "grad_norm": 0.11816108971834183, + "learning_rate": 8.446833626341202e-06, + "loss": 0.0031, + "step": 103560 + }, + { + "epoch": 0.6642658089426071, + "grad_norm": 0.08333977311849594, + "learning_rate": 8.44642814860702e-06, + "loss": 0.0023, + "step": 103570 + }, + { + "epoch": 0.6643299458363932, + "grad_norm": 0.11790352314710617, + "learning_rate": 8.446022627686632e-06, + "loss": 0.0035, + "step": 103580 + }, + { + "epoch": 0.6643940827301793, + "grad_norm": 0.10611840337514877, + "learning_rate": 8.445617063585116e-06, + "loss": 0.003, + "step": 103590 + }, + { + "epoch": 0.6644582196239653, + "grad_norm": 0.19753526151180267, + "learning_rate": 8.445211456307557e-06, + "loss": 0.0038, + "step": 103600 + }, + { + "epoch": 0.6645223565177515, + "grad_norm": 0.04932020232081413, + "learning_rate": 8.444805805859036e-06, + "loss": 0.0058, + "step": 103610 + }, + { + "epoch": 0.6645864934115376, + "grad_norm": 0.17282375693321228, + "learning_rate": 8.444400112244635e-06, + "loss": 0.0031, + "step": 103620 + }, + { + "epoch": 0.6646506303053237, + "grad_norm": 0.10250164568424225, + "learning_rate": 8.44399437546944e-06, + "loss": 0.003, + "step": 103630 + }, + { + "epoch": 0.6647147671991098, + "grad_norm": 0.29081571102142334, + "learning_rate": 8.443588595538534e-06, + "loss": 0.0053, + "step": 103640 + }, + { + "epoch": 0.6647789040928959, + "grad_norm": 0.28330540657043457, + "learning_rate": 8.443182772457002e-06, + "loss": 0.0023, + "step": 103650 + }, + { + "epoch": 0.664843040986682, + "grad_norm": 0.24046741425991058, + "learning_rate": 8.442776906229928e-06, + "loss": 0.0053, + "step": 103660 + }, + { + "epoch": 0.664907177880468, + "grad_norm": 0.0589718371629715, + "learning_rate": 8.442370996862402e-06, + "loss": 0.0012, + "step": 103670 + }, + { + "epoch": 0.6649713147742542, + "grad_norm": 0.14298389852046967, + "learning_rate": 8.441965044359504e-06, + "loss": 0.0016, + "step": 103680 + }, + { + "epoch": 0.6650354516680402, + "grad_norm": 0.1267651468515396, + "learning_rate": 8.441559048726324e-06, + "loss": 0.0025, + "step": 103690 + }, + { + "epoch": 0.6650995885618264, + "grad_norm": 0.014233440160751343, + "learning_rate": 8.441153009967951e-06, + "loss": 0.0016, + "step": 103700 + }, + { + "epoch": 0.6651637254556124, + "grad_norm": 0.18992894887924194, + "learning_rate": 8.44074692808947e-06, + "loss": 0.0032, + "step": 103710 + }, + { + "epoch": 0.6652278623493986, + "grad_norm": 0.06404879689216614, + "learning_rate": 8.440340803095972e-06, + "loss": 0.0023, + "step": 103720 + }, + { + "epoch": 0.6652919992431846, + "grad_norm": 0.03764347359538078, + "learning_rate": 8.439934634992547e-06, + "loss": 0.002, + "step": 103730 + }, + { + "epoch": 0.6653561361369708, + "grad_norm": 0.42796599864959717, + "learning_rate": 8.439528423784278e-06, + "loss": 0.0044, + "step": 103740 + }, + { + "epoch": 0.6654202730307568, + "grad_norm": 0.07582218945026398, + "learning_rate": 8.439122169476262e-06, + "loss": 0.0028, + "step": 103750 + }, + { + "epoch": 0.6654844099245429, + "grad_norm": 0.30783307552337646, + "learning_rate": 8.438715872073588e-06, + "loss": 0.0028, + "step": 103760 + }, + { + "epoch": 0.6655485468183291, + "grad_norm": 0.10885994136333466, + "learning_rate": 8.438309531581345e-06, + "loss": 0.0026, + "step": 103770 + }, + { + "epoch": 0.6656126837121151, + "grad_norm": 0.1359860599040985, + "learning_rate": 8.437903148004627e-06, + "loss": 0.0048, + "step": 103780 + }, + { + "epoch": 0.6656768206059013, + "grad_norm": 0.107600137591362, + "learning_rate": 8.437496721348526e-06, + "loss": 0.0038, + "step": 103790 + }, + { + "epoch": 0.6657409574996873, + "grad_norm": 0.10635979473590851, + "learning_rate": 8.437090251618134e-06, + "loss": 0.0028, + "step": 103800 + }, + { + "epoch": 0.6658050943934735, + "grad_norm": 0.11203435063362122, + "learning_rate": 8.436683738818546e-06, + "loss": 0.005, + "step": 103810 + }, + { + "epoch": 0.6658692312872595, + "grad_norm": 0.07321670651435852, + "learning_rate": 8.436277182954852e-06, + "loss": 0.0028, + "step": 103820 + }, + { + "epoch": 0.6659333681810456, + "grad_norm": 0.1538209766149521, + "learning_rate": 8.435870584032149e-06, + "loss": 0.0024, + "step": 103830 + }, + { + "epoch": 0.6659975050748317, + "grad_norm": 0.15127509832382202, + "learning_rate": 8.435463942055534e-06, + "loss": 0.0035, + "step": 103840 + }, + { + "epoch": 0.6660616419686178, + "grad_norm": 0.07844885438680649, + "learning_rate": 8.435057257030099e-06, + "loss": 0.0027, + "step": 103850 + }, + { + "epoch": 0.6661257788624039, + "grad_norm": 0.13272012770175934, + "learning_rate": 8.434650528960944e-06, + "loss": 0.0026, + "step": 103860 + }, + { + "epoch": 0.66618991575619, + "grad_norm": 0.07353035360574722, + "learning_rate": 8.43424375785316e-06, + "loss": 0.0027, + "step": 103870 + }, + { + "epoch": 0.6662540526499761, + "grad_norm": 0.0674884021282196, + "learning_rate": 8.433836943711849e-06, + "loss": 0.0033, + "step": 103880 + }, + { + "epoch": 0.6663181895437622, + "grad_norm": 0.054299406707286835, + "learning_rate": 8.433430086542107e-06, + "loss": 0.0026, + "step": 103890 + }, + { + "epoch": 0.6663823264375484, + "grad_norm": 0.24317777156829834, + "learning_rate": 8.433023186349032e-06, + "loss": 0.0022, + "step": 103900 + }, + { + "epoch": 0.6664464633313344, + "grad_norm": 0.1725510209798813, + "learning_rate": 8.432616243137723e-06, + "loss": 0.0078, + "step": 103910 + }, + { + "epoch": 0.6665106002251205, + "grad_norm": 0.18429318070411682, + "learning_rate": 8.432209256913279e-06, + "loss": 0.0035, + "step": 103920 + }, + { + "epoch": 0.6665747371189066, + "grad_norm": 0.11312185972929001, + "learning_rate": 8.4318022276808e-06, + "loss": 0.0034, + "step": 103930 + }, + { + "epoch": 0.6666388740126927, + "grad_norm": 0.07917297631502151, + "learning_rate": 8.431395155445386e-06, + "loss": 0.0036, + "step": 103940 + }, + { + "epoch": 0.6667030109064788, + "grad_norm": 0.1329788863658905, + "learning_rate": 8.430988040212139e-06, + "loss": 0.0037, + "step": 103950 + }, + { + "epoch": 0.6667671478002649, + "grad_norm": 0.08584097772836685, + "learning_rate": 8.430580881986159e-06, + "loss": 0.0036, + "step": 103960 + }, + { + "epoch": 0.666831284694051, + "grad_norm": 0.13817085325717926, + "learning_rate": 8.43017368077255e-06, + "loss": 0.0037, + "step": 103970 + }, + { + "epoch": 0.6668954215878371, + "grad_norm": 0.20671610534191132, + "learning_rate": 8.429766436576413e-06, + "loss": 0.0025, + "step": 103980 + }, + { + "epoch": 0.6669595584816231, + "grad_norm": 0.2456267923116684, + "learning_rate": 8.42935914940285e-06, + "loss": 0.0022, + "step": 103990 + }, + { + "epoch": 0.6670236953754093, + "grad_norm": 0.07276738435029984, + "learning_rate": 8.428951819256968e-06, + "loss": 0.0034, + "step": 104000 + }, + { + "epoch": 0.6670878322691953, + "grad_norm": 0.04010990262031555, + "learning_rate": 8.428544446143867e-06, + "loss": 0.0035, + "step": 104010 + }, + { + "epoch": 0.6671519691629815, + "grad_norm": 0.10394640266895294, + "learning_rate": 8.428137030068654e-06, + "loss": 0.0028, + "step": 104020 + }, + { + "epoch": 0.6672161060567675, + "grad_norm": 0.06314918398857117, + "learning_rate": 8.427729571036435e-06, + "loss": 0.0012, + "step": 104030 + }, + { + "epoch": 0.6672802429505537, + "grad_norm": 0.00908320490270853, + "learning_rate": 8.427322069052315e-06, + "loss": 0.0033, + "step": 104040 + }, + { + "epoch": 0.6673443798443398, + "grad_norm": 0.1839200258255005, + "learning_rate": 8.4269145241214e-06, + "loss": 0.0026, + "step": 104050 + }, + { + "epoch": 0.6674085167381258, + "grad_norm": 0.11998382210731506, + "learning_rate": 8.426506936248794e-06, + "loss": 0.002, + "step": 104060 + }, + { + "epoch": 0.667472653631912, + "grad_norm": 0.30450543761253357, + "learning_rate": 8.42609930543961e-06, + "loss": 0.0024, + "step": 104070 + }, + { + "epoch": 0.667536790525698, + "grad_norm": 0.12889540195465088, + "learning_rate": 8.425691631698954e-06, + "loss": 0.0017, + "step": 104080 + }, + { + "epoch": 0.6676009274194842, + "grad_norm": 0.0068665496073663235, + "learning_rate": 8.425283915031931e-06, + "loss": 0.0021, + "step": 104090 + }, + { + "epoch": 0.6676650643132702, + "grad_norm": 0.08512814342975616, + "learning_rate": 8.424876155443653e-06, + "loss": 0.0038, + "step": 104100 + }, + { + "epoch": 0.6677292012070564, + "grad_norm": 0.026818742975592613, + "learning_rate": 8.42446835293923e-06, + "loss": 0.002, + "step": 104110 + }, + { + "epoch": 0.6677933381008424, + "grad_norm": 0.08405889570713043, + "learning_rate": 8.42406050752377e-06, + "loss": 0.0051, + "step": 104120 + }, + { + "epoch": 0.6678574749946286, + "grad_norm": 0.25920823216438293, + "learning_rate": 8.423652619202386e-06, + "loss": 0.0036, + "step": 104130 + }, + { + "epoch": 0.6679216118884146, + "grad_norm": 0.1724155843257904, + "learning_rate": 8.423244687980188e-06, + "loss": 0.005, + "step": 104140 + }, + { + "epoch": 0.6679857487822007, + "grad_norm": 0.06032963842153549, + "learning_rate": 8.422836713862285e-06, + "loss": 0.0026, + "step": 104150 + }, + { + "epoch": 0.6680498856759868, + "grad_norm": 0.03222297877073288, + "learning_rate": 8.422428696853795e-06, + "loss": 0.0043, + "step": 104160 + }, + { + "epoch": 0.6681140225697729, + "grad_norm": 0.03689512237906456, + "learning_rate": 8.422020636959826e-06, + "loss": 0.0019, + "step": 104170 + }, + { + "epoch": 0.6681781594635591, + "grad_norm": 0.12515857815742493, + "learning_rate": 8.421612534185493e-06, + "loss": 0.0025, + "step": 104180 + }, + { + "epoch": 0.6682422963573451, + "grad_norm": 0.09245526045560837, + "learning_rate": 8.421204388535908e-06, + "loss": 0.0034, + "step": 104190 + }, + { + "epoch": 0.6683064332511313, + "grad_norm": 0.1860002726316452, + "learning_rate": 8.42079620001619e-06, + "loss": 0.0028, + "step": 104200 + }, + { + "epoch": 0.6683705701449173, + "grad_norm": 0.08889582753181458, + "learning_rate": 8.420387968631448e-06, + "loss": 0.003, + "step": 104210 + }, + { + "epoch": 0.6684347070387034, + "grad_norm": 0.20289276540279388, + "learning_rate": 8.419979694386802e-06, + "loss": 0.0046, + "step": 104220 + }, + { + "epoch": 0.6684988439324895, + "grad_norm": 0.16857382655143738, + "learning_rate": 8.419571377287366e-06, + "loss": 0.0028, + "step": 104230 + }, + { + "epoch": 0.6685629808262756, + "grad_norm": 0.012377630919218063, + "learning_rate": 8.419163017338254e-06, + "loss": 0.0034, + "step": 104240 + }, + { + "epoch": 0.6686271177200617, + "grad_norm": 0.15620973706245422, + "learning_rate": 8.41875461454459e-06, + "loss": 0.0028, + "step": 104250 + }, + { + "epoch": 0.6686912546138478, + "grad_norm": 0.08912920951843262, + "learning_rate": 8.418346168911485e-06, + "loss": 0.0016, + "step": 104260 + }, + { + "epoch": 0.6687553915076339, + "grad_norm": 0.25533559918403625, + "learning_rate": 8.41793768044406e-06, + "loss": 0.0031, + "step": 104270 + }, + { + "epoch": 0.66881952840142, + "grad_norm": 0.0831306129693985, + "learning_rate": 8.417529149147431e-06, + "loss": 0.0021, + "step": 104280 + }, + { + "epoch": 0.668883665295206, + "grad_norm": 0.14187638461589813, + "learning_rate": 8.417120575026721e-06, + "loss": 0.0024, + "step": 104290 + }, + { + "epoch": 0.6689478021889922, + "grad_norm": 0.14810237288475037, + "learning_rate": 8.41671195808705e-06, + "loss": 0.0027, + "step": 104300 + }, + { + "epoch": 0.6690119390827782, + "grad_norm": 0.22693736851215363, + "learning_rate": 8.416303298333533e-06, + "loss": 0.0038, + "step": 104310 + }, + { + "epoch": 0.6690760759765644, + "grad_norm": 0.07919272780418396, + "learning_rate": 8.415894595771295e-06, + "loss": 0.0025, + "step": 104320 + }, + { + "epoch": 0.6691402128703505, + "grad_norm": 0.09138061106204987, + "learning_rate": 8.415485850405456e-06, + "loss": 0.0027, + "step": 104330 + }, + { + "epoch": 0.6692043497641366, + "grad_norm": 0.21584628522396088, + "learning_rate": 8.41507706224114e-06, + "loss": 0.0021, + "step": 104340 + }, + { + "epoch": 0.6692684866579227, + "grad_norm": 0.04365155100822449, + "learning_rate": 8.414668231283468e-06, + "loss": 0.0022, + "step": 104350 + }, + { + "epoch": 0.6693326235517087, + "grad_norm": 0.2859312891960144, + "learning_rate": 8.41425935753756e-06, + "loss": 0.0035, + "step": 104360 + }, + { + "epoch": 0.6693967604454949, + "grad_norm": 0.09819292277097702, + "learning_rate": 8.413850441008545e-06, + "loss": 0.003, + "step": 104370 + }, + { + "epoch": 0.6694608973392809, + "grad_norm": 0.02959253638982773, + "learning_rate": 8.41344148170154e-06, + "loss": 0.0017, + "step": 104380 + }, + { + "epoch": 0.6695250342330671, + "grad_norm": 0.26345840096473694, + "learning_rate": 8.413032479621678e-06, + "loss": 0.0044, + "step": 104390 + }, + { + "epoch": 0.6695891711268531, + "grad_norm": 0.15691974759101868, + "learning_rate": 8.412623434774078e-06, + "loss": 0.0033, + "step": 104400 + }, + { + "epoch": 0.6696533080206393, + "grad_norm": 0.156972736120224, + "learning_rate": 8.412214347163867e-06, + "loss": 0.003, + "step": 104410 + }, + { + "epoch": 0.6697174449144253, + "grad_norm": 0.084920734167099, + "learning_rate": 8.411805216796172e-06, + "loss": 0.0027, + "step": 104420 + }, + { + "epoch": 0.6697815818082115, + "grad_norm": 0.12769630551338196, + "learning_rate": 8.41139604367612e-06, + "loss": 0.0041, + "step": 104430 + }, + { + "epoch": 0.6698457187019975, + "grad_norm": 0.3678871989250183, + "learning_rate": 8.410986827808836e-06, + "loss": 0.0047, + "step": 104440 + }, + { + "epoch": 0.6699098555957836, + "grad_norm": 0.06114371865987778, + "learning_rate": 8.41057756919945e-06, + "loss": 0.0022, + "step": 104450 + }, + { + "epoch": 0.6699739924895698, + "grad_norm": 0.08031799644231796, + "learning_rate": 8.41016826785309e-06, + "loss": 0.0056, + "step": 104460 + }, + { + "epoch": 0.6700381293833558, + "grad_norm": 0.2285887897014618, + "learning_rate": 8.409758923774885e-06, + "loss": 0.0015, + "step": 104470 + }, + { + "epoch": 0.670102266277142, + "grad_norm": 0.009799333289265633, + "learning_rate": 8.409349536969962e-06, + "loss": 0.0025, + "step": 104480 + }, + { + "epoch": 0.670166403170928, + "grad_norm": 0.16017772257328033, + "learning_rate": 8.408940107443452e-06, + "loss": 0.0043, + "step": 104490 + }, + { + "epoch": 0.6702305400647142, + "grad_norm": 0.043988995254039764, + "learning_rate": 8.40853063520049e-06, + "loss": 0.0021, + "step": 104500 + }, + { + "epoch": 0.6702946769585002, + "grad_norm": 0.1321130394935608, + "learning_rate": 8.4081211202462e-06, + "loss": 0.0031, + "step": 104510 + }, + { + "epoch": 0.6703588138522864, + "grad_norm": 0.23636986315250397, + "learning_rate": 8.407711562585717e-06, + "loss": 0.0023, + "step": 104520 + }, + { + "epoch": 0.6704229507460724, + "grad_norm": 0.06349032372236252, + "learning_rate": 8.407301962224174e-06, + "loss": 0.0017, + "step": 104530 + }, + { + "epoch": 0.6704870876398585, + "grad_norm": 0.10937687754631042, + "learning_rate": 8.406892319166701e-06, + "loss": 0.0031, + "step": 104540 + }, + { + "epoch": 0.6705512245336446, + "grad_norm": 0.3712023198604584, + "learning_rate": 8.406482633418432e-06, + "loss": 0.0052, + "step": 104550 + }, + { + "epoch": 0.6706153614274307, + "grad_norm": 0.09075483679771423, + "learning_rate": 8.406072904984501e-06, + "loss": 0.0032, + "step": 104560 + }, + { + "epoch": 0.6706794983212168, + "grad_norm": 0.13852781057357788, + "learning_rate": 8.405663133870044e-06, + "loss": 0.0035, + "step": 104570 + }, + { + "epoch": 0.6707436352150029, + "grad_norm": 0.3938485085964203, + "learning_rate": 8.405253320080194e-06, + "loss": 0.0041, + "step": 104580 + }, + { + "epoch": 0.670807772108789, + "grad_norm": 0.20136603713035583, + "learning_rate": 8.404843463620083e-06, + "loss": 0.0049, + "step": 104590 + }, + { + "epoch": 0.6708719090025751, + "grad_norm": 0.13110202550888062, + "learning_rate": 8.404433564494852e-06, + "loss": 0.003, + "step": 104600 + }, + { + "epoch": 0.6709360458963612, + "grad_norm": 0.26542943716049194, + "learning_rate": 8.404023622709636e-06, + "loss": 0.0027, + "step": 104610 + }, + { + "epoch": 0.6710001827901473, + "grad_norm": 0.1232231929898262, + "learning_rate": 8.403613638269569e-06, + "loss": 0.0032, + "step": 104620 + }, + { + "epoch": 0.6710643196839334, + "grad_norm": 0.15485745668411255, + "learning_rate": 8.403203611179794e-06, + "loss": 0.0056, + "step": 104630 + }, + { + "epoch": 0.6711284565777195, + "grad_norm": 0.06909587234258652, + "learning_rate": 8.402793541445443e-06, + "loss": 0.004, + "step": 104640 + }, + { + "epoch": 0.6711925934715056, + "grad_norm": 0.011975661851465702, + "learning_rate": 8.402383429071657e-06, + "loss": 0.0046, + "step": 104650 + }, + { + "epoch": 0.6712567303652917, + "grad_norm": 0.16399651765823364, + "learning_rate": 8.401973274063576e-06, + "loss": 0.0029, + "step": 104660 + }, + { + "epoch": 0.6713208672590778, + "grad_norm": 0.04149583727121353, + "learning_rate": 8.401563076426338e-06, + "loss": 0.0042, + "step": 104670 + }, + { + "epoch": 0.6713850041528638, + "grad_norm": 0.15252408385276794, + "learning_rate": 8.401152836165085e-06, + "loss": 0.0054, + "step": 104680 + }, + { + "epoch": 0.67144914104665, + "grad_norm": 0.051730990409851074, + "learning_rate": 8.400742553284954e-06, + "loss": 0.0033, + "step": 104690 + }, + { + "epoch": 0.671513277940436, + "grad_norm": 0.3143163323402405, + "learning_rate": 8.400332227791089e-06, + "loss": 0.0034, + "step": 104700 + }, + { + "epoch": 0.6715774148342222, + "grad_norm": 0.11804922670125961, + "learning_rate": 8.39992185968863e-06, + "loss": 0.0017, + "step": 104710 + }, + { + "epoch": 0.6716415517280082, + "grad_norm": 0.12357106059789658, + "learning_rate": 8.399511448982724e-06, + "loss": 0.0053, + "step": 104720 + }, + { + "epoch": 0.6717056886217944, + "grad_norm": 0.0229622982442379, + "learning_rate": 8.399100995678506e-06, + "loss": 0.0027, + "step": 104730 + }, + { + "epoch": 0.6717698255155805, + "grad_norm": 0.2615741789340973, + "learning_rate": 8.398690499781125e-06, + "loss": 0.0016, + "step": 104740 + }, + { + "epoch": 0.6718339624093665, + "grad_norm": 0.012564479373395443, + "learning_rate": 8.398279961295723e-06, + "loss": 0.0023, + "step": 104750 + }, + { + "epoch": 0.6718980993031527, + "grad_norm": 0.09483960270881653, + "learning_rate": 8.397869380227444e-06, + "loss": 0.003, + "step": 104760 + }, + { + "epoch": 0.6719622361969387, + "grad_norm": 0.15407006442546844, + "learning_rate": 8.397458756581432e-06, + "loss": 0.0036, + "step": 104770 + }, + { + "epoch": 0.6720263730907249, + "grad_norm": 0.0985838919878006, + "learning_rate": 8.397048090362836e-06, + "loss": 0.003, + "step": 104780 + }, + { + "epoch": 0.6720905099845109, + "grad_norm": 0.06903088092803955, + "learning_rate": 8.396637381576797e-06, + "loss": 0.0015, + "step": 104790 + }, + { + "epoch": 0.6721546468782971, + "grad_norm": 0.15456879138946533, + "learning_rate": 8.396226630228466e-06, + "loss": 0.0033, + "step": 104800 + }, + { + "epoch": 0.6722187837720831, + "grad_norm": 0.14249612390995026, + "learning_rate": 8.395815836322988e-06, + "loss": 0.004, + "step": 104810 + }, + { + "epoch": 0.6722829206658693, + "grad_norm": 0.08765123039484024, + "learning_rate": 8.395404999865511e-06, + "loss": 0.0019, + "step": 104820 + }, + { + "epoch": 0.6723470575596553, + "grad_norm": 0.09199008345603943, + "learning_rate": 8.394994120861182e-06, + "loss": 0.0013, + "step": 104830 + }, + { + "epoch": 0.6724111944534414, + "grad_norm": 0.09696024656295776, + "learning_rate": 8.39458319931515e-06, + "loss": 0.0023, + "step": 104840 + }, + { + "epoch": 0.6724753313472275, + "grad_norm": 0.26852133870124817, + "learning_rate": 8.394172235232564e-06, + "loss": 0.0034, + "step": 104850 + }, + { + "epoch": 0.6725394682410136, + "grad_norm": 0.21888944506645203, + "learning_rate": 8.393761228618576e-06, + "loss": 0.0027, + "step": 104860 + }, + { + "epoch": 0.6726036051347997, + "grad_norm": 0.14674261212348938, + "learning_rate": 8.393350179478333e-06, + "loss": 0.0029, + "step": 104870 + }, + { + "epoch": 0.6726677420285858, + "grad_norm": 0.0999063029885292, + "learning_rate": 8.392939087816987e-06, + "loss": 0.0038, + "step": 104880 + }, + { + "epoch": 0.672731878922372, + "grad_norm": 0.18848982453346252, + "learning_rate": 8.39252795363969e-06, + "loss": 0.0019, + "step": 104890 + }, + { + "epoch": 0.672796015816158, + "grad_norm": 0.11278213560581207, + "learning_rate": 8.392116776951592e-06, + "loss": 0.003, + "step": 104900 + }, + { + "epoch": 0.6728601527099441, + "grad_norm": 0.043803002685308456, + "learning_rate": 8.391705557757848e-06, + "loss": 0.0025, + "step": 104910 + }, + { + "epoch": 0.6729242896037302, + "grad_norm": 0.09668834507465363, + "learning_rate": 8.391294296063608e-06, + "loss": 0.004, + "step": 104920 + }, + { + "epoch": 0.6729884264975163, + "grad_norm": 0.0840938612818718, + "learning_rate": 8.390882991874028e-06, + "loss": 0.0025, + "step": 104930 + }, + { + "epoch": 0.6730525633913024, + "grad_norm": 0.22783967852592468, + "learning_rate": 8.390471645194258e-06, + "loss": 0.0025, + "step": 104940 + }, + { + "epoch": 0.6731167002850885, + "grad_norm": 0.1316479593515396, + "learning_rate": 8.390060256029457e-06, + "loss": 0.0033, + "step": 104950 + }, + { + "epoch": 0.6731808371788746, + "grad_norm": 0.04257906228303909, + "learning_rate": 8.38964882438478e-06, + "loss": 0.004, + "step": 104960 + }, + { + "epoch": 0.6732449740726607, + "grad_norm": 0.16650620102882385, + "learning_rate": 8.389237350265378e-06, + "loss": 0.0049, + "step": 104970 + }, + { + "epoch": 0.6733091109664467, + "grad_norm": 0.04592067003250122, + "learning_rate": 8.38882583367641e-06, + "loss": 0.003, + "step": 104980 + }, + { + "epoch": 0.6733732478602329, + "grad_norm": 0.040006231516599655, + "learning_rate": 8.388414274623034e-06, + "loss": 0.0022, + "step": 104990 + }, + { + "epoch": 0.6734373847540189, + "grad_norm": 0.07911369949579239, + "learning_rate": 8.388002673110406e-06, + "loss": 0.0026, + "step": 105000 + }, + { + "epoch": 0.6735015216478051, + "grad_norm": 0.15251368284225464, + "learning_rate": 8.387591029143681e-06, + "loss": 0.0027, + "step": 105010 + }, + { + "epoch": 0.6735656585415911, + "grad_norm": 0.05519964545965195, + "learning_rate": 8.387179342728019e-06, + "loss": 0.0025, + "step": 105020 + }, + { + "epoch": 0.6736297954353773, + "grad_norm": 0.16007065773010254, + "learning_rate": 8.38676761386858e-06, + "loss": 0.0026, + "step": 105030 + }, + { + "epoch": 0.6736939323291634, + "grad_norm": 0.09965284168720245, + "learning_rate": 8.386355842570522e-06, + "loss": 0.0024, + "step": 105040 + }, + { + "epoch": 0.6737580692229495, + "grad_norm": 0.09603425860404968, + "learning_rate": 8.385944028839006e-06, + "loss": 0.0026, + "step": 105050 + }, + { + "epoch": 0.6738222061167356, + "grad_norm": 0.06844042986631393, + "learning_rate": 8.38553217267919e-06, + "loss": 0.0026, + "step": 105060 + }, + { + "epoch": 0.6738863430105216, + "grad_norm": 0.05518994480371475, + "learning_rate": 8.385120274096238e-06, + "loss": 0.0014, + "step": 105070 + }, + { + "epoch": 0.6739504799043078, + "grad_norm": 0.2585274875164032, + "learning_rate": 8.384708333095308e-06, + "loss": 0.003, + "step": 105080 + }, + { + "epoch": 0.6740146167980938, + "grad_norm": 0.08640757948160172, + "learning_rate": 8.384296349681565e-06, + "loss": 0.003, + "step": 105090 + }, + { + "epoch": 0.67407875369188, + "grad_norm": 0.2759339213371277, + "learning_rate": 8.383884323860168e-06, + "loss": 0.0045, + "step": 105100 + }, + { + "epoch": 0.674142890585666, + "grad_norm": 0.10095436871051788, + "learning_rate": 8.383472255636285e-06, + "loss": 0.0022, + "step": 105110 + }, + { + "epoch": 0.6742070274794522, + "grad_norm": 0.2735072374343872, + "learning_rate": 8.383060145015075e-06, + "loss": 0.0028, + "step": 105120 + }, + { + "epoch": 0.6742711643732382, + "grad_norm": 0.03803719952702522, + "learning_rate": 8.382647992001703e-06, + "loss": 0.0022, + "step": 105130 + }, + { + "epoch": 0.6743353012670243, + "grad_norm": 0.14733968675136566, + "learning_rate": 8.382235796601334e-06, + "loss": 0.0028, + "step": 105140 + }, + { + "epoch": 0.6743994381608104, + "grad_norm": 0.20038215816020966, + "learning_rate": 8.381823558819133e-06, + "loss": 0.0029, + "step": 105150 + }, + { + "epoch": 0.6744635750545965, + "grad_norm": 0.059229981154203415, + "learning_rate": 8.381411278660268e-06, + "loss": 0.0037, + "step": 105160 + }, + { + "epoch": 0.6745277119483827, + "grad_norm": 0.1375369429588318, + "learning_rate": 8.3809989561299e-06, + "loss": 0.0025, + "step": 105170 + }, + { + "epoch": 0.6745918488421687, + "grad_norm": 0.055774934589862823, + "learning_rate": 8.380586591233201e-06, + "loss": 0.0024, + "step": 105180 + }, + { + "epoch": 0.6746559857359549, + "grad_norm": 0.11341521143913269, + "learning_rate": 8.380174183975336e-06, + "loss": 0.0026, + "step": 105190 + }, + { + "epoch": 0.6747201226297409, + "grad_norm": 0.20973770320415497, + "learning_rate": 8.37976173436147e-06, + "loss": 0.0048, + "step": 105200 + }, + { + "epoch": 0.674784259523527, + "grad_norm": 0.15058109164237976, + "learning_rate": 8.379349242396777e-06, + "loss": 0.0063, + "step": 105210 + }, + { + "epoch": 0.6748483964173131, + "grad_norm": 0.09401237219572067, + "learning_rate": 8.378936708086422e-06, + "loss": 0.0039, + "step": 105220 + }, + { + "epoch": 0.6749125333110992, + "grad_norm": 0.16512343287467957, + "learning_rate": 8.378524131435575e-06, + "loss": 0.0021, + "step": 105230 + }, + { + "epoch": 0.6749766702048853, + "grad_norm": 0.08649599552154541, + "learning_rate": 8.378111512449406e-06, + "loss": 0.0025, + "step": 105240 + }, + { + "epoch": 0.6750408070986714, + "grad_norm": 0.05820036306977272, + "learning_rate": 8.377698851133085e-06, + "loss": 0.0022, + "step": 105250 + }, + { + "epoch": 0.6751049439924575, + "grad_norm": 0.12218081206083298, + "learning_rate": 8.377286147491784e-06, + "loss": 0.003, + "step": 105260 + }, + { + "epoch": 0.6751690808862436, + "grad_norm": 0.5234624147415161, + "learning_rate": 8.376873401530674e-06, + "loss": 0.0043, + "step": 105270 + }, + { + "epoch": 0.6752332177800296, + "grad_norm": 0.20814195275306702, + "learning_rate": 8.376460613254926e-06, + "loss": 0.0019, + "step": 105280 + }, + { + "epoch": 0.6752973546738158, + "grad_norm": 0.10753771662712097, + "learning_rate": 8.376047782669713e-06, + "loss": 0.0042, + "step": 105290 + }, + { + "epoch": 0.6753614915676018, + "grad_norm": 0.12748068571090698, + "learning_rate": 8.37563490978021e-06, + "loss": 0.0031, + "step": 105300 + }, + { + "epoch": 0.675425628461388, + "grad_norm": 0.21075694262981415, + "learning_rate": 8.375221994591589e-06, + "loss": 0.0032, + "step": 105310 + }, + { + "epoch": 0.6754897653551741, + "grad_norm": 0.0806947872042656, + "learning_rate": 8.374809037109024e-06, + "loss": 0.0034, + "step": 105320 + }, + { + "epoch": 0.6755539022489602, + "grad_norm": 0.21661756932735443, + "learning_rate": 8.374396037337688e-06, + "loss": 0.0024, + "step": 105330 + }, + { + "epoch": 0.6756180391427463, + "grad_norm": 0.07489059120416641, + "learning_rate": 8.373982995282762e-06, + "loss": 0.0021, + "step": 105340 + }, + { + "epoch": 0.6756821760365324, + "grad_norm": 0.11895561218261719, + "learning_rate": 8.373569910949414e-06, + "loss": 0.0021, + "step": 105350 + }, + { + "epoch": 0.6757463129303185, + "grad_norm": 0.04192807152867317, + "learning_rate": 8.373156784342825e-06, + "loss": 0.0026, + "step": 105360 + }, + { + "epoch": 0.6758104498241045, + "grad_norm": 0.03970417380332947, + "learning_rate": 8.372743615468171e-06, + "loss": 0.0021, + "step": 105370 + }, + { + "epoch": 0.6758745867178907, + "grad_norm": 0.11419545859098434, + "learning_rate": 8.372330404330628e-06, + "loss": 0.0056, + "step": 105380 + }, + { + "epoch": 0.6759387236116767, + "grad_norm": 0.31705695390701294, + "learning_rate": 8.371917150935378e-06, + "loss": 0.0032, + "step": 105390 + }, + { + "epoch": 0.6760028605054629, + "grad_norm": 0.03702298924326897, + "learning_rate": 8.371503855287593e-06, + "loss": 0.0019, + "step": 105400 + }, + { + "epoch": 0.6760669973992489, + "grad_norm": 0.12052658200263977, + "learning_rate": 8.371090517392455e-06, + "loss": 0.0049, + "step": 105410 + }, + { + "epoch": 0.6761311342930351, + "grad_norm": 0.012704034335911274, + "learning_rate": 8.370677137255145e-06, + "loss": 0.0028, + "step": 105420 + }, + { + "epoch": 0.6761952711868211, + "grad_norm": 0.11155135929584503, + "learning_rate": 8.370263714880843e-06, + "loss": 0.0034, + "step": 105430 + }, + { + "epoch": 0.6762594080806072, + "grad_norm": 0.14455078542232513, + "learning_rate": 8.369850250274725e-06, + "loss": 0.004, + "step": 105440 + }, + { + "epoch": 0.6763235449743934, + "grad_norm": 0.15305106341838837, + "learning_rate": 8.369436743441977e-06, + "loss": 0.0026, + "step": 105450 + }, + { + "epoch": 0.6763876818681794, + "grad_norm": 0.2909550070762634, + "learning_rate": 8.369023194387777e-06, + "loss": 0.0043, + "step": 105460 + }, + { + "epoch": 0.6764518187619656, + "grad_norm": 0.2682478427886963, + "learning_rate": 8.368609603117307e-06, + "loss": 0.0028, + "step": 105470 + }, + { + "epoch": 0.6765159556557516, + "grad_norm": 0.13300544023513794, + "learning_rate": 8.368195969635756e-06, + "loss": 0.0023, + "step": 105480 + }, + { + "epoch": 0.6765800925495378, + "grad_norm": 0.1423490047454834, + "learning_rate": 8.367782293948299e-06, + "loss": 0.0029, + "step": 105490 + }, + { + "epoch": 0.6766442294433238, + "grad_norm": 0.10811354219913483, + "learning_rate": 8.367368576060122e-06, + "loss": 0.0032, + "step": 105500 + }, + { + "epoch": 0.67670836633711, + "grad_norm": 0.15842215716838837, + "learning_rate": 8.366954815976412e-06, + "loss": 0.0031, + "step": 105510 + }, + { + "epoch": 0.676772503230896, + "grad_norm": 0.03228527307510376, + "learning_rate": 8.366541013702351e-06, + "loss": 0.0021, + "step": 105520 + }, + { + "epoch": 0.6768366401246821, + "grad_norm": 0.23487135767936707, + "learning_rate": 8.366127169243126e-06, + "loss": 0.0022, + "step": 105530 + }, + { + "epoch": 0.6769007770184682, + "grad_norm": 0.10690513998270035, + "learning_rate": 8.365713282603923e-06, + "loss": 0.0018, + "step": 105540 + }, + { + "epoch": 0.6769649139122543, + "grad_norm": 0.23628225922584534, + "learning_rate": 8.365299353789924e-06, + "loss": 0.003, + "step": 105550 + }, + { + "epoch": 0.6770290508060404, + "grad_norm": 0.14227193593978882, + "learning_rate": 8.364885382806321e-06, + "loss": 0.01, + "step": 105560 + }, + { + "epoch": 0.6770931876998265, + "grad_norm": 0.09515100717544556, + "learning_rate": 8.364471369658299e-06, + "loss": 0.0022, + "step": 105570 + }, + { + "epoch": 0.6771573245936126, + "grad_norm": 0.3159456253051758, + "learning_rate": 8.364057314351044e-06, + "loss": 0.0026, + "step": 105580 + }, + { + "epoch": 0.6772214614873987, + "grad_norm": 0.12352485954761505, + "learning_rate": 8.36364321688975e-06, + "loss": 0.0028, + "step": 105590 + }, + { + "epoch": 0.6772855983811849, + "grad_norm": 0.06309013813734055, + "learning_rate": 8.3632290772796e-06, + "loss": 0.002, + "step": 105600 + }, + { + "epoch": 0.6773497352749709, + "grad_norm": 0.08990509808063507, + "learning_rate": 8.362814895525787e-06, + "loss": 0.0024, + "step": 105610 + }, + { + "epoch": 0.677413872168757, + "grad_norm": 0.2627856433391571, + "learning_rate": 8.362400671633501e-06, + "loss": 0.0028, + "step": 105620 + }, + { + "epoch": 0.6774780090625431, + "grad_norm": 0.16149525344371796, + "learning_rate": 8.361986405607931e-06, + "loss": 0.0022, + "step": 105630 + }, + { + "epoch": 0.6775421459563292, + "grad_norm": 0.20449747145175934, + "learning_rate": 8.361572097454268e-06, + "loss": 0.0037, + "step": 105640 + }, + { + "epoch": 0.6776062828501153, + "grad_norm": 0.48451846837997437, + "learning_rate": 8.361157747177703e-06, + "loss": 0.0081, + "step": 105650 + }, + { + "epoch": 0.6776704197439014, + "grad_norm": 0.09318164736032486, + "learning_rate": 8.360743354783432e-06, + "loss": 0.0016, + "step": 105660 + }, + { + "epoch": 0.6777345566376874, + "grad_norm": 0.1288747489452362, + "learning_rate": 8.360328920276644e-06, + "loss": 0.0029, + "step": 105670 + }, + { + "epoch": 0.6777986935314736, + "grad_norm": 0.05139090493321419, + "learning_rate": 8.359914443662532e-06, + "loss": 0.0033, + "step": 105680 + }, + { + "epoch": 0.6778628304252596, + "grad_norm": 0.11765347421169281, + "learning_rate": 8.359499924946291e-06, + "loss": 0.0027, + "step": 105690 + }, + { + "epoch": 0.6779269673190458, + "grad_norm": 0.22567494213581085, + "learning_rate": 8.359085364133115e-06, + "loss": 0.0028, + "step": 105700 + }, + { + "epoch": 0.6779911042128318, + "grad_norm": 0.1894441545009613, + "learning_rate": 8.358670761228198e-06, + "loss": 0.0045, + "step": 105710 + }, + { + "epoch": 0.678055241106618, + "grad_norm": 0.09808771312236786, + "learning_rate": 8.358256116236738e-06, + "loss": 0.0025, + "step": 105720 + }, + { + "epoch": 0.6781193780004041, + "grad_norm": 0.1675807237625122, + "learning_rate": 8.357841429163927e-06, + "loss": 0.0018, + "step": 105730 + }, + { + "epoch": 0.6781835148941902, + "grad_norm": 0.2744348645210266, + "learning_rate": 8.357426700014963e-06, + "loss": 0.0015, + "step": 105740 + }, + { + "epoch": 0.6782476517879763, + "grad_norm": 0.16625988483428955, + "learning_rate": 8.357011928795045e-06, + "loss": 0.0032, + "step": 105750 + }, + { + "epoch": 0.6783117886817623, + "grad_norm": 0.04192342236638069, + "learning_rate": 8.356597115509365e-06, + "loss": 0.0041, + "step": 105760 + }, + { + "epoch": 0.6783759255755485, + "grad_norm": 0.02599765732884407, + "learning_rate": 8.356182260163128e-06, + "loss": 0.0026, + "step": 105770 + }, + { + "epoch": 0.6784400624693345, + "grad_norm": 0.18142566084861755, + "learning_rate": 8.355767362761526e-06, + "loss": 0.002, + "step": 105780 + }, + { + "epoch": 0.6785041993631207, + "grad_norm": 0.12043923884630203, + "learning_rate": 8.355352423309762e-06, + "loss": 0.0055, + "step": 105790 + }, + { + "epoch": 0.6785683362569067, + "grad_norm": 0.05678357183933258, + "learning_rate": 8.354937441813032e-06, + "loss": 0.0047, + "step": 105800 + }, + { + "epoch": 0.6786324731506929, + "grad_norm": 0.04629009962081909, + "learning_rate": 8.354522418276541e-06, + "loss": 0.0045, + "step": 105810 + }, + { + "epoch": 0.6786966100444789, + "grad_norm": 0.19256654381752014, + "learning_rate": 8.354107352705484e-06, + "loss": 0.0023, + "step": 105820 + }, + { + "epoch": 0.678760746938265, + "grad_norm": 0.015678269788622856, + "learning_rate": 8.353692245105066e-06, + "loss": 0.0044, + "step": 105830 + }, + { + "epoch": 0.6788248838320511, + "grad_norm": 0.11686255782842636, + "learning_rate": 8.353277095480487e-06, + "loss": 0.0101, + "step": 105840 + }, + { + "epoch": 0.6788890207258372, + "grad_norm": 0.20758382976055145, + "learning_rate": 8.352861903836951e-06, + "loss": 0.0034, + "step": 105850 + }, + { + "epoch": 0.6789531576196233, + "grad_norm": 0.11493109166622162, + "learning_rate": 8.352446670179656e-06, + "loss": 0.0027, + "step": 105860 + }, + { + "epoch": 0.6790172945134094, + "grad_norm": 0.17362689971923828, + "learning_rate": 8.35203139451381e-06, + "loss": 0.0031, + "step": 105870 + }, + { + "epoch": 0.6790814314071956, + "grad_norm": 0.09448014944791794, + "learning_rate": 8.351616076844615e-06, + "loss": 0.0032, + "step": 105880 + }, + { + "epoch": 0.6791455683009816, + "grad_norm": 0.07692663371562958, + "learning_rate": 8.351200717177276e-06, + "loss": 0.0033, + "step": 105890 + }, + { + "epoch": 0.6792097051947678, + "grad_norm": 0.2039974331855774, + "learning_rate": 8.350785315516997e-06, + "loss": 0.0036, + "step": 105900 + }, + { + "epoch": 0.6792738420885538, + "grad_norm": 0.4582759737968445, + "learning_rate": 8.350369871868982e-06, + "loss": 0.0033, + "step": 105910 + }, + { + "epoch": 0.6793379789823399, + "grad_norm": 0.28730398416519165, + "learning_rate": 8.349954386238437e-06, + "loss": 0.0025, + "step": 105920 + }, + { + "epoch": 0.679402115876126, + "grad_norm": 0.11131205409765244, + "learning_rate": 8.34953885863057e-06, + "loss": 0.0011, + "step": 105930 + }, + { + "epoch": 0.6794662527699121, + "grad_norm": 0.3950798213481903, + "learning_rate": 8.349123289050589e-06, + "loss": 0.0023, + "step": 105940 + }, + { + "epoch": 0.6795303896636982, + "grad_norm": 0.13830557465553284, + "learning_rate": 8.348707677503698e-06, + "loss": 0.0033, + "step": 105950 + }, + { + "epoch": 0.6795945265574843, + "grad_norm": 0.20884209871292114, + "learning_rate": 8.348292023995108e-06, + "loss": 0.0031, + "step": 105960 + }, + { + "epoch": 0.6796586634512704, + "grad_norm": 0.22090451419353485, + "learning_rate": 8.347876328530024e-06, + "loss": 0.0024, + "step": 105970 + }, + { + "epoch": 0.6797228003450565, + "grad_norm": 0.29745471477508545, + "learning_rate": 8.347460591113658e-06, + "loss": 0.005, + "step": 105980 + }, + { + "epoch": 0.6797869372388425, + "grad_norm": 0.09324681013822556, + "learning_rate": 8.34704481175122e-06, + "loss": 0.0036, + "step": 105990 + }, + { + "epoch": 0.6798510741326287, + "grad_norm": 0.28559282422065735, + "learning_rate": 8.346628990447916e-06, + "loss": 0.003, + "step": 106000 + }, + { + "epoch": 0.6799152110264148, + "grad_norm": 0.1490248143672943, + "learning_rate": 8.34621312720896e-06, + "loss": 0.0045, + "step": 106010 + }, + { + "epoch": 0.6799793479202009, + "grad_norm": 0.04657105728983879, + "learning_rate": 8.345797222039562e-06, + "loss": 0.0033, + "step": 106020 + }, + { + "epoch": 0.680043484813987, + "grad_norm": 0.09144826978445053, + "learning_rate": 8.345381274944932e-06, + "loss": 0.007, + "step": 106030 + }, + { + "epoch": 0.6801076217077731, + "grad_norm": 0.11840663850307465, + "learning_rate": 8.344965285930286e-06, + "loss": 0.0028, + "step": 106040 + }, + { + "epoch": 0.6801717586015592, + "grad_norm": 0.18734104931354523, + "learning_rate": 8.344549255000833e-06, + "loss": 0.0026, + "step": 106050 + }, + { + "epoch": 0.6802358954953452, + "grad_norm": 0.1605960726737976, + "learning_rate": 8.344133182161788e-06, + "loss": 0.0021, + "step": 106060 + }, + { + "epoch": 0.6803000323891314, + "grad_norm": 0.18541651964187622, + "learning_rate": 8.343717067418364e-06, + "loss": 0.0032, + "step": 106070 + }, + { + "epoch": 0.6803641692829174, + "grad_norm": 0.14585164189338684, + "learning_rate": 8.343300910775777e-06, + "loss": 0.0025, + "step": 106080 + }, + { + "epoch": 0.6804283061767036, + "grad_norm": 0.27248677611351013, + "learning_rate": 8.342884712239238e-06, + "loss": 0.0021, + "step": 106090 + }, + { + "epoch": 0.6804924430704896, + "grad_norm": 0.3925865590572357, + "learning_rate": 8.342468471813965e-06, + "loss": 0.0065, + "step": 106100 + }, + { + "epoch": 0.6805565799642758, + "grad_norm": 0.08288202434778214, + "learning_rate": 8.342052189505175e-06, + "loss": 0.0036, + "step": 106110 + }, + { + "epoch": 0.6806207168580618, + "grad_norm": 0.20843671262264252, + "learning_rate": 8.34163586531808e-06, + "loss": 0.0015, + "step": 106120 + }, + { + "epoch": 0.680684853751848, + "grad_norm": 0.10642247647047043, + "learning_rate": 8.3412194992579e-06, + "loss": 0.0024, + "step": 106130 + }, + { + "epoch": 0.680748990645634, + "grad_norm": 0.07317498326301575, + "learning_rate": 8.340803091329852e-06, + "loss": 0.0031, + "step": 106140 + }, + { + "epoch": 0.6808131275394201, + "grad_norm": 0.09721281379461288, + "learning_rate": 8.340386641539153e-06, + "loss": 0.0045, + "step": 106150 + }, + { + "epoch": 0.6808772644332063, + "grad_norm": 0.08040506392717361, + "learning_rate": 8.339970149891024e-06, + "loss": 0.003, + "step": 106160 + }, + { + "epoch": 0.6809414013269923, + "grad_norm": 0.2040429264307022, + "learning_rate": 8.33955361639068e-06, + "loss": 0.0035, + "step": 106170 + }, + { + "epoch": 0.6810055382207785, + "grad_norm": 0.2061181366443634, + "learning_rate": 8.339137041043343e-06, + "loss": 0.0025, + "step": 106180 + }, + { + "epoch": 0.6810696751145645, + "grad_norm": 0.135468527674675, + "learning_rate": 8.338720423854233e-06, + "loss": 0.0032, + "step": 106190 + }, + { + "epoch": 0.6811338120083507, + "grad_norm": 0.09755928814411163, + "learning_rate": 8.33830376482857e-06, + "loss": 0.009, + "step": 106200 + }, + { + "epoch": 0.6811979489021367, + "grad_norm": 0.3986220955848694, + "learning_rate": 8.337887063971575e-06, + "loss": 0.0035, + "step": 106210 + }, + { + "epoch": 0.6812620857959228, + "grad_norm": 0.22247038781642914, + "learning_rate": 8.337470321288468e-06, + "loss": 0.0035, + "step": 106220 + }, + { + "epoch": 0.6813262226897089, + "grad_norm": 0.16053420305252075, + "learning_rate": 8.337053536784474e-06, + "loss": 0.0051, + "step": 106230 + }, + { + "epoch": 0.681390359583495, + "grad_norm": 0.08029992133378983, + "learning_rate": 8.336636710464813e-06, + "loss": 0.0033, + "step": 106240 + }, + { + "epoch": 0.6814544964772811, + "grad_norm": 0.19211438298225403, + "learning_rate": 8.336219842334712e-06, + "loss": 0.0026, + "step": 106250 + }, + { + "epoch": 0.6815186333710672, + "grad_norm": 0.21166279911994934, + "learning_rate": 8.335802932399389e-06, + "loss": 0.0022, + "step": 106260 + }, + { + "epoch": 0.6815827702648533, + "grad_norm": 0.11894948780536652, + "learning_rate": 8.335385980664072e-06, + "loss": 0.0019, + "step": 106270 + }, + { + "epoch": 0.6816469071586394, + "grad_norm": 0.11670980602502823, + "learning_rate": 8.334968987133987e-06, + "loss": 0.0025, + "step": 106280 + }, + { + "epoch": 0.6817110440524256, + "grad_norm": 0.23702040314674377, + "learning_rate": 8.334551951814354e-06, + "loss": 0.0016, + "step": 106290 + }, + { + "epoch": 0.6817751809462116, + "grad_norm": 0.13341011106967926, + "learning_rate": 8.334134874710403e-06, + "loss": 0.0033, + "step": 106300 + }, + { + "epoch": 0.6818393178399977, + "grad_norm": 0.0981462150812149, + "learning_rate": 8.33371775582736e-06, + "loss": 0.0045, + "step": 106310 + }, + { + "epoch": 0.6819034547337838, + "grad_norm": 0.0713023766875267, + "learning_rate": 8.33330059517045e-06, + "loss": 0.0016, + "step": 106320 + }, + { + "epoch": 0.6819675916275699, + "grad_norm": 0.0074376314878463745, + "learning_rate": 8.332883392744903e-06, + "loss": 0.0026, + "step": 106330 + }, + { + "epoch": 0.682031728521356, + "grad_norm": 0.11056984215974808, + "learning_rate": 8.332466148555944e-06, + "loss": 0.0023, + "step": 106340 + }, + { + "epoch": 0.6820958654151421, + "grad_norm": 0.2786552608013153, + "learning_rate": 8.332048862608801e-06, + "loss": 0.0031, + "step": 106350 + }, + { + "epoch": 0.6821600023089281, + "grad_norm": 0.10690515488386154, + "learning_rate": 8.331631534908706e-06, + "loss": 0.0014, + "step": 106360 + }, + { + "epoch": 0.6822241392027143, + "grad_norm": 0.1778476983308792, + "learning_rate": 8.331214165460885e-06, + "loss": 0.005, + "step": 106370 + }, + { + "epoch": 0.6822882760965003, + "grad_norm": 0.3530474603176117, + "learning_rate": 8.330796754270572e-06, + "loss": 0.005, + "step": 106380 + }, + { + "epoch": 0.6823524129902865, + "grad_norm": 0.15935179591178894, + "learning_rate": 8.330379301342994e-06, + "loss": 0.0027, + "step": 106390 + }, + { + "epoch": 0.6824165498840725, + "grad_norm": 0.21754521131515503, + "learning_rate": 8.329961806683384e-06, + "loss": 0.0016, + "step": 106400 + }, + { + "epoch": 0.6824806867778587, + "grad_norm": 0.5963869690895081, + "learning_rate": 8.329544270296973e-06, + "loss": 0.0104, + "step": 106410 + }, + { + "epoch": 0.6825448236716447, + "grad_norm": 0.14186711609363556, + "learning_rate": 8.329126692188991e-06, + "loss": 0.0041, + "step": 106420 + }, + { + "epoch": 0.6826089605654309, + "grad_norm": 0.10958068817853928, + "learning_rate": 8.328709072364674e-06, + "loss": 0.0019, + "step": 106430 + }, + { + "epoch": 0.682673097459217, + "grad_norm": 0.1777481883764267, + "learning_rate": 8.328291410829252e-06, + "loss": 0.0051, + "step": 106440 + }, + { + "epoch": 0.682737234353003, + "grad_norm": 0.4342871904373169, + "learning_rate": 8.327873707587961e-06, + "loss": 0.0014, + "step": 106450 + }, + { + "epoch": 0.6828013712467892, + "grad_norm": 0.20745062828063965, + "learning_rate": 8.327455962646034e-06, + "loss": 0.0018, + "step": 106460 + }, + { + "epoch": 0.6828655081405752, + "grad_norm": 0.18711547553539276, + "learning_rate": 8.327038176008705e-06, + "loss": 0.0032, + "step": 106470 + }, + { + "epoch": 0.6829296450343614, + "grad_norm": 0.1682271659374237, + "learning_rate": 8.32662034768121e-06, + "loss": 0.001, + "step": 106480 + }, + { + "epoch": 0.6829937819281474, + "grad_norm": 0.06280054152011871, + "learning_rate": 8.326202477668786e-06, + "loss": 0.0031, + "step": 106490 + }, + { + "epoch": 0.6830579188219336, + "grad_norm": 0.4445003867149353, + "learning_rate": 8.325784565976665e-06, + "loss": 0.0032, + "step": 106500 + }, + { + "epoch": 0.6831220557157196, + "grad_norm": 0.27170541882514954, + "learning_rate": 8.325366612610088e-06, + "loss": 0.0043, + "step": 106510 + }, + { + "epoch": 0.6831861926095057, + "grad_norm": 0.034090470522642136, + "learning_rate": 8.324948617574292e-06, + "loss": 0.0022, + "step": 106520 + }, + { + "epoch": 0.6832503295032918, + "grad_norm": 0.08589209616184235, + "learning_rate": 8.32453058087451e-06, + "loss": 0.0023, + "step": 106530 + }, + { + "epoch": 0.6833144663970779, + "grad_norm": 0.030689751729369164, + "learning_rate": 8.324112502515986e-06, + "loss": 0.0017, + "step": 106540 + }, + { + "epoch": 0.683378603290864, + "grad_norm": 0.23931913077831268, + "learning_rate": 8.323694382503958e-06, + "loss": 0.0032, + "step": 106550 + }, + { + "epoch": 0.6834427401846501, + "grad_norm": 0.11819394677877426, + "learning_rate": 8.323276220843661e-06, + "loss": 0.0017, + "step": 106560 + }, + { + "epoch": 0.6835068770784362, + "grad_norm": 0.15443769097328186, + "learning_rate": 8.322858017540341e-06, + "loss": 0.0036, + "step": 106570 + }, + { + "epoch": 0.6835710139722223, + "grad_norm": 0.02909116819500923, + "learning_rate": 8.322439772599233e-06, + "loss": 0.003, + "step": 106580 + }, + { + "epoch": 0.6836351508660085, + "grad_norm": 0.1778852790594101, + "learning_rate": 8.32202148602558e-06, + "loss": 0.0023, + "step": 106590 + }, + { + "epoch": 0.6836992877597945, + "grad_norm": 0.06952908635139465, + "learning_rate": 8.321603157824625e-06, + "loss": 0.0035, + "step": 106600 + }, + { + "epoch": 0.6837634246535806, + "grad_norm": 0.069687619805336, + "learning_rate": 8.321184788001609e-06, + "loss": 0.0059, + "step": 106610 + }, + { + "epoch": 0.6838275615473667, + "grad_norm": 0.07610204070806503, + "learning_rate": 8.32076637656177e-06, + "loss": 0.002, + "step": 106620 + }, + { + "epoch": 0.6838916984411528, + "grad_norm": 0.1031593456864357, + "learning_rate": 8.320347923510359e-06, + "loss": 0.0035, + "step": 106630 + }, + { + "epoch": 0.6839558353349389, + "grad_norm": 0.14455530047416687, + "learning_rate": 8.319929428852613e-06, + "loss": 0.0034, + "step": 106640 + }, + { + "epoch": 0.684019972228725, + "grad_norm": 0.11688445508480072, + "learning_rate": 8.31951089259378e-06, + "loss": 0.0082, + "step": 106650 + }, + { + "epoch": 0.684084109122511, + "grad_norm": 0.26938173174858093, + "learning_rate": 8.319092314739103e-06, + "loss": 0.0043, + "step": 106660 + }, + { + "epoch": 0.6841482460162972, + "grad_norm": 0.08609388768672943, + "learning_rate": 8.318673695293829e-06, + "loss": 0.0047, + "step": 106670 + }, + { + "epoch": 0.6842123829100832, + "grad_norm": 0.22282618284225464, + "learning_rate": 8.3182550342632e-06, + "loss": 0.0035, + "step": 106680 + }, + { + "epoch": 0.6842765198038694, + "grad_norm": 0.14885510504245758, + "learning_rate": 8.317836331652463e-06, + "loss": 0.0018, + "step": 106690 + }, + { + "epoch": 0.6843406566976554, + "grad_norm": 0.12032639980316162, + "learning_rate": 8.317417587466866e-06, + "loss": 0.0015, + "step": 106700 + }, + { + "epoch": 0.6844047935914416, + "grad_norm": 0.1598871946334839, + "learning_rate": 8.316998801711655e-06, + "loss": 0.0024, + "step": 106710 + }, + { + "epoch": 0.6844689304852277, + "grad_norm": 0.10817835479974747, + "learning_rate": 8.316579974392081e-06, + "loss": 0.0048, + "step": 106720 + }, + { + "epoch": 0.6845330673790138, + "grad_norm": 0.16026908159255981, + "learning_rate": 8.316161105513387e-06, + "loss": 0.0026, + "step": 106730 + }, + { + "epoch": 0.6845972042727999, + "grad_norm": 0.17550593614578247, + "learning_rate": 8.315742195080824e-06, + "loss": 0.0031, + "step": 106740 + }, + { + "epoch": 0.684661341166586, + "grad_norm": 0.08394819498062134, + "learning_rate": 8.315323243099644e-06, + "loss": 0.0053, + "step": 106750 + }, + { + "epoch": 0.6847254780603721, + "grad_norm": 0.17244599759578705, + "learning_rate": 8.314904249575093e-06, + "loss": 0.0032, + "step": 106760 + }, + { + "epoch": 0.6847896149541581, + "grad_norm": 0.06605623662471771, + "learning_rate": 8.314485214512424e-06, + "loss": 0.0024, + "step": 106770 + }, + { + "epoch": 0.6848537518479443, + "grad_norm": 0.2065979242324829, + "learning_rate": 8.314066137916885e-06, + "loss": 0.0034, + "step": 106780 + }, + { + "epoch": 0.6849178887417303, + "grad_norm": 0.22780033946037292, + "learning_rate": 8.31364701979373e-06, + "loss": 0.0026, + "step": 106790 + }, + { + "epoch": 0.6849820256355165, + "grad_norm": 0.2013179212808609, + "learning_rate": 8.313227860148208e-06, + "loss": 0.0021, + "step": 106800 + }, + { + "epoch": 0.6850461625293025, + "grad_norm": 0.08906995505094528, + "learning_rate": 8.312808658985575e-06, + "loss": 0.0025, + "step": 106810 + }, + { + "epoch": 0.6851102994230887, + "grad_norm": 0.049765344709157944, + "learning_rate": 8.31238941631108e-06, + "loss": 0.0028, + "step": 106820 + }, + { + "epoch": 0.6851744363168747, + "grad_norm": 0.017044998705387115, + "learning_rate": 8.311970132129978e-06, + "loss": 0.0031, + "step": 106830 + }, + { + "epoch": 0.6852385732106608, + "grad_norm": 0.23861342668533325, + "learning_rate": 8.311550806447523e-06, + "loss": 0.0019, + "step": 106840 + }, + { + "epoch": 0.6853027101044469, + "grad_norm": 0.14938272535800934, + "learning_rate": 8.311131439268971e-06, + "loss": 0.0028, + "step": 106850 + }, + { + "epoch": 0.685366846998233, + "grad_norm": 0.11288265883922577, + "learning_rate": 8.310712030599576e-06, + "loss": 0.0028, + "step": 106860 + }, + { + "epoch": 0.6854309838920192, + "grad_norm": 0.02465154230594635, + "learning_rate": 8.310292580444593e-06, + "loss": 0.002, + "step": 106870 + }, + { + "epoch": 0.6854951207858052, + "grad_norm": 0.05949242785573006, + "learning_rate": 8.309873088809278e-06, + "loss": 0.0039, + "step": 106880 + }, + { + "epoch": 0.6855592576795914, + "grad_norm": 0.20034784078598022, + "learning_rate": 8.309453555698886e-06, + "loss": 0.0029, + "step": 106890 + }, + { + "epoch": 0.6856233945733774, + "grad_norm": 0.19417355954647064, + "learning_rate": 8.309033981118678e-06, + "loss": 0.0026, + "step": 106900 + }, + { + "epoch": 0.6856875314671635, + "grad_norm": 0.022811679169535637, + "learning_rate": 8.308614365073908e-06, + "loss": 0.0017, + "step": 106910 + }, + { + "epoch": 0.6857516683609496, + "grad_norm": 0.10058672726154327, + "learning_rate": 8.308194707569836e-06, + "loss": 0.0034, + "step": 106920 + }, + { + "epoch": 0.6858158052547357, + "grad_norm": 0.1317497044801712, + "learning_rate": 8.30777500861172e-06, + "loss": 0.0016, + "step": 106930 + }, + { + "epoch": 0.6858799421485218, + "grad_norm": 0.1980799287557602, + "learning_rate": 8.307355268204817e-06, + "loss": 0.0018, + "step": 106940 + }, + { + "epoch": 0.6859440790423079, + "grad_norm": 0.1505780667066574, + "learning_rate": 8.306935486354391e-06, + "loss": 0.0036, + "step": 106950 + }, + { + "epoch": 0.686008215936094, + "grad_norm": 0.17634980380535126, + "learning_rate": 8.306515663065699e-06, + "loss": 0.0032, + "step": 106960 + }, + { + "epoch": 0.6860723528298801, + "grad_norm": 0.08681733906269073, + "learning_rate": 8.306095798344004e-06, + "loss": 0.0036, + "step": 106970 + }, + { + "epoch": 0.6861364897236661, + "grad_norm": 0.06697957962751389, + "learning_rate": 8.305675892194564e-06, + "loss": 0.0028, + "step": 106980 + }, + { + "epoch": 0.6862006266174523, + "grad_norm": 0.038370467722415924, + "learning_rate": 8.305255944622644e-06, + "loss": 0.0031, + "step": 106990 + }, + { + "epoch": 0.6862647635112384, + "grad_norm": 0.12728312611579895, + "learning_rate": 8.304835955633504e-06, + "loss": 0.0024, + "step": 107000 + }, + { + "epoch": 0.6863289004050245, + "grad_norm": 0.25424623489379883, + "learning_rate": 8.304415925232407e-06, + "loss": 0.0026, + "step": 107010 + }, + { + "epoch": 0.6863930372988106, + "grad_norm": 0.37205880880355835, + "learning_rate": 8.303995853424617e-06, + "loss": 0.0023, + "step": 107020 + }, + { + "epoch": 0.6864571741925967, + "grad_norm": 0.07315251231193542, + "learning_rate": 8.303575740215399e-06, + "loss": 0.0018, + "step": 107030 + }, + { + "epoch": 0.6865213110863828, + "grad_norm": 0.08466996252536774, + "learning_rate": 8.303155585610017e-06, + "loss": 0.0035, + "step": 107040 + }, + { + "epoch": 0.6865854479801689, + "grad_norm": 0.08715147525072098, + "learning_rate": 8.302735389613733e-06, + "loss": 0.004, + "step": 107050 + }, + { + "epoch": 0.686649584873955, + "grad_norm": 0.21168333292007446, + "learning_rate": 8.302315152231812e-06, + "loss": 0.0026, + "step": 107060 + }, + { + "epoch": 0.686713721767741, + "grad_norm": 0.3521987199783325, + "learning_rate": 8.301894873469524e-06, + "loss": 0.0043, + "step": 107070 + }, + { + "epoch": 0.6867778586615272, + "grad_norm": 0.06002356857061386, + "learning_rate": 8.301474553332134e-06, + "loss": 0.0036, + "step": 107080 + }, + { + "epoch": 0.6868419955553132, + "grad_norm": 0.22865359485149384, + "learning_rate": 8.301054191824907e-06, + "loss": 0.0031, + "step": 107090 + }, + { + "epoch": 0.6869061324490994, + "grad_norm": 0.08421870321035385, + "learning_rate": 8.300633788953113e-06, + "loss": 0.0034, + "step": 107100 + }, + { + "epoch": 0.6869702693428854, + "grad_norm": 0.27708613872528076, + "learning_rate": 8.30021334472202e-06, + "loss": 0.0052, + "step": 107110 + }, + { + "epoch": 0.6870344062366716, + "grad_norm": 0.06739004701375961, + "learning_rate": 8.299792859136892e-06, + "loss": 0.0032, + "step": 107120 + }, + { + "epoch": 0.6870985431304576, + "grad_norm": 0.03162672743201256, + "learning_rate": 8.299372332203002e-06, + "loss": 0.0026, + "step": 107130 + }, + { + "epoch": 0.6871626800242437, + "grad_norm": 0.11178423464298248, + "learning_rate": 8.29895176392562e-06, + "loss": 0.0044, + "step": 107140 + }, + { + "epoch": 0.6872268169180299, + "grad_norm": 0.04783308506011963, + "learning_rate": 8.298531154310013e-06, + "loss": 0.002, + "step": 107150 + }, + { + "epoch": 0.6872909538118159, + "grad_norm": 0.08391523361206055, + "learning_rate": 8.298110503361453e-06, + "loss": 0.0035, + "step": 107160 + }, + { + "epoch": 0.6873550907056021, + "grad_norm": 0.066973976790905, + "learning_rate": 8.297689811085215e-06, + "loss": 0.0023, + "step": 107170 + }, + { + "epoch": 0.6874192275993881, + "grad_norm": 0.47545182704925537, + "learning_rate": 8.297269077486563e-06, + "loss": 0.0068, + "step": 107180 + }, + { + "epoch": 0.6874833644931743, + "grad_norm": 0.22450487315654755, + "learning_rate": 8.296848302570775e-06, + "loss": 0.0024, + "step": 107190 + }, + { + "epoch": 0.6875475013869603, + "grad_norm": 0.25167715549468994, + "learning_rate": 8.29642748634312e-06, + "loss": 0.0035, + "step": 107200 + }, + { + "epoch": 0.6876116382807465, + "grad_norm": 0.32105982303619385, + "learning_rate": 8.296006628808872e-06, + "loss": 0.0038, + "step": 107210 + }, + { + "epoch": 0.6876757751745325, + "grad_norm": 0.2346927523612976, + "learning_rate": 8.295585729973307e-06, + "loss": 0.0024, + "step": 107220 + }, + { + "epoch": 0.6877399120683186, + "grad_norm": 0.06694585829973221, + "learning_rate": 8.295164789841699e-06, + "loss": 0.0037, + "step": 107230 + }, + { + "epoch": 0.6878040489621047, + "grad_norm": 0.4164973199367523, + "learning_rate": 8.294743808419319e-06, + "loss": 0.0042, + "step": 107240 + }, + { + "epoch": 0.6878681858558908, + "grad_norm": 0.14734579622745514, + "learning_rate": 8.294322785711444e-06, + "loss": 0.003, + "step": 107250 + }, + { + "epoch": 0.6879323227496769, + "grad_norm": 0.33766835927963257, + "learning_rate": 8.29390172172335e-06, + "loss": 0.0033, + "step": 107260 + }, + { + "epoch": 0.687996459643463, + "grad_norm": 0.16679903864860535, + "learning_rate": 8.293480616460314e-06, + "loss": 0.0027, + "step": 107270 + }, + { + "epoch": 0.6880605965372492, + "grad_norm": 0.10429362207651138, + "learning_rate": 8.293059469927614e-06, + "loss": 0.0043, + "step": 107280 + }, + { + "epoch": 0.6881247334310352, + "grad_norm": 0.1312631070613861, + "learning_rate": 8.292638282130523e-06, + "loss": 0.0021, + "step": 107290 + }, + { + "epoch": 0.6881888703248213, + "grad_norm": 0.198140487074852, + "learning_rate": 8.29221705307432e-06, + "loss": 0.0028, + "step": 107300 + }, + { + "epoch": 0.6882530072186074, + "grad_norm": 0.1893017292022705, + "learning_rate": 8.291795782764287e-06, + "loss": 0.0018, + "step": 107310 + }, + { + "epoch": 0.6883171441123935, + "grad_norm": 0.17619863152503967, + "learning_rate": 8.2913744712057e-06, + "loss": 0.002, + "step": 107320 + }, + { + "epoch": 0.6883812810061796, + "grad_norm": 0.15994544327259064, + "learning_rate": 8.290953118403838e-06, + "loss": 0.0014, + "step": 107330 + }, + { + "epoch": 0.6884454178999657, + "grad_norm": 0.13528279960155487, + "learning_rate": 8.29053172436398e-06, + "loss": 0.0014, + "step": 107340 + }, + { + "epoch": 0.6885095547937518, + "grad_norm": 0.15273387730121613, + "learning_rate": 8.29011028909141e-06, + "loss": 0.0033, + "step": 107350 + }, + { + "epoch": 0.6885736916875379, + "grad_norm": 0.11530592292547226, + "learning_rate": 8.289688812591405e-06, + "loss": 0.0024, + "step": 107360 + }, + { + "epoch": 0.6886378285813239, + "grad_norm": 0.02847360447049141, + "learning_rate": 8.289267294869249e-06, + "loss": 0.0023, + "step": 107370 + }, + { + "epoch": 0.6887019654751101, + "grad_norm": 0.0729435458779335, + "learning_rate": 8.288845735930221e-06, + "loss": 0.0026, + "step": 107380 + }, + { + "epoch": 0.6887661023688961, + "grad_norm": 0.10790400207042694, + "learning_rate": 8.28842413577961e-06, + "loss": 0.003, + "step": 107390 + }, + { + "epoch": 0.6888302392626823, + "grad_norm": 0.07635890692472458, + "learning_rate": 8.28800249442269e-06, + "loss": 0.0043, + "step": 107400 + }, + { + "epoch": 0.6888943761564683, + "grad_norm": 0.12413554638624191, + "learning_rate": 8.287580811864751e-06, + "loss": 0.0019, + "step": 107410 + }, + { + "epoch": 0.6889585130502545, + "grad_norm": 0.2052922397851944, + "learning_rate": 8.287159088111075e-06, + "loss": 0.0026, + "step": 107420 + }, + { + "epoch": 0.6890226499440406, + "grad_norm": 0.23809462785720825, + "learning_rate": 8.286737323166945e-06, + "loss": 0.0024, + "step": 107430 + }, + { + "epoch": 0.6890867868378266, + "grad_norm": 0.11305812001228333, + "learning_rate": 8.286315517037646e-06, + "loss": 0.0051, + "step": 107440 + }, + { + "epoch": 0.6891509237316128, + "grad_norm": 0.03867397457361221, + "learning_rate": 8.285893669728467e-06, + "loss": 0.0038, + "step": 107450 + }, + { + "epoch": 0.6892150606253988, + "grad_norm": 0.17843380570411682, + "learning_rate": 8.285471781244692e-06, + "loss": 0.0042, + "step": 107460 + }, + { + "epoch": 0.689279197519185, + "grad_norm": 0.31200113892555237, + "learning_rate": 8.285049851591606e-06, + "loss": 0.0057, + "step": 107470 + }, + { + "epoch": 0.689343334412971, + "grad_norm": 0.15629765391349792, + "learning_rate": 8.2846278807745e-06, + "loss": 0.0023, + "step": 107480 + }, + { + "epoch": 0.6894074713067572, + "grad_norm": 0.12026771157979965, + "learning_rate": 8.284205868798658e-06, + "loss": 0.0025, + "step": 107490 + }, + { + "epoch": 0.6894716082005432, + "grad_norm": 0.0701836422085762, + "learning_rate": 8.283783815669369e-06, + "loss": 0.0024, + "step": 107500 + }, + { + "epoch": 0.6895357450943294, + "grad_norm": 0.3383144438266754, + "learning_rate": 8.283361721391923e-06, + "loss": 0.004, + "step": 107510 + }, + { + "epoch": 0.6895998819881154, + "grad_norm": 0.15997891128063202, + "learning_rate": 8.282939585971606e-06, + "loss": 0.0028, + "step": 107520 + }, + { + "epoch": 0.6896640188819015, + "grad_norm": 0.1713678538799286, + "learning_rate": 8.282517409413711e-06, + "loss": 0.0028, + "step": 107530 + }, + { + "epoch": 0.6897281557756876, + "grad_norm": 0.21673579514026642, + "learning_rate": 8.282095191723527e-06, + "loss": 0.002, + "step": 107540 + }, + { + "epoch": 0.6897922926694737, + "grad_norm": 0.17311428487300873, + "learning_rate": 8.281672932906344e-06, + "loss": 0.0037, + "step": 107550 + }, + { + "epoch": 0.6898564295632599, + "grad_norm": 0.24376513063907623, + "learning_rate": 8.281250632967455e-06, + "loss": 0.0034, + "step": 107560 + }, + { + "epoch": 0.6899205664570459, + "grad_norm": 0.0667029544711113, + "learning_rate": 8.280828291912148e-06, + "loss": 0.002, + "step": 107570 + }, + { + "epoch": 0.6899847033508321, + "grad_norm": 0.1129802018404007, + "learning_rate": 8.28040590974572e-06, + "loss": 0.0042, + "step": 107580 + }, + { + "epoch": 0.6900488402446181, + "grad_norm": 0.15882444381713867, + "learning_rate": 8.27998348647346e-06, + "loss": 0.0031, + "step": 107590 + }, + { + "epoch": 0.6901129771384042, + "grad_norm": 0.03456937521696091, + "learning_rate": 8.279561022100665e-06, + "loss": 0.0013, + "step": 107600 + }, + { + "epoch": 0.6901771140321903, + "grad_norm": 0.1546746790409088, + "learning_rate": 8.279138516632624e-06, + "loss": 0.007, + "step": 107610 + }, + { + "epoch": 0.6902412509259764, + "grad_norm": 0.04763857275247574, + "learning_rate": 8.278715970074636e-06, + "loss": 0.0027, + "step": 107620 + }, + { + "epoch": 0.6903053878197625, + "grad_norm": 0.13871750235557556, + "learning_rate": 8.278293382431992e-06, + "loss": 0.003, + "step": 107630 + }, + { + "epoch": 0.6903695247135486, + "grad_norm": 0.07775358855724335, + "learning_rate": 8.277870753709988e-06, + "loss": 0.002, + "step": 107640 + }, + { + "epoch": 0.6904336616073347, + "grad_norm": 0.13904011249542236, + "learning_rate": 8.277448083913924e-06, + "loss": 0.0038, + "step": 107650 + }, + { + "epoch": 0.6904977985011208, + "grad_norm": 0.06254595518112183, + "learning_rate": 8.27702537304909e-06, + "loss": 0.0023, + "step": 107660 + }, + { + "epoch": 0.6905619353949068, + "grad_norm": 0.1485593467950821, + "learning_rate": 8.276602621120788e-06, + "loss": 0.0052, + "step": 107670 + }, + { + "epoch": 0.690626072288693, + "grad_norm": 0.1240374967455864, + "learning_rate": 8.276179828134314e-06, + "loss": 0.0042, + "step": 107680 + }, + { + "epoch": 0.690690209182479, + "grad_norm": 0.1361633539199829, + "learning_rate": 8.275756994094963e-06, + "loss": 0.0025, + "step": 107690 + }, + { + "epoch": 0.6907543460762652, + "grad_norm": 0.0703364908695221, + "learning_rate": 8.275334119008037e-06, + "loss": 0.0037, + "step": 107700 + }, + { + "epoch": 0.6908184829700513, + "grad_norm": 0.09072870761156082, + "learning_rate": 8.274911202878834e-06, + "loss": 0.0011, + "step": 107710 + }, + { + "epoch": 0.6908826198638374, + "grad_norm": 0.043299779295921326, + "learning_rate": 8.274488245712653e-06, + "loss": 0.0021, + "step": 107720 + }, + { + "epoch": 0.6909467567576235, + "grad_norm": 0.08427557349205017, + "learning_rate": 8.274065247514793e-06, + "loss": 0.0062, + "step": 107730 + }, + { + "epoch": 0.6910108936514096, + "grad_norm": 0.13290773332118988, + "learning_rate": 8.273642208290555e-06, + "loss": 0.0023, + "step": 107740 + }, + { + "epoch": 0.6910750305451957, + "grad_norm": 0.05695051699876785, + "learning_rate": 8.273219128045241e-06, + "loss": 0.0045, + "step": 107750 + }, + { + "epoch": 0.6911391674389817, + "grad_norm": 0.068753182888031, + "learning_rate": 8.272796006784153e-06, + "loss": 0.0023, + "step": 107760 + }, + { + "epoch": 0.6912033043327679, + "grad_norm": 0.05978182330727577, + "learning_rate": 8.272372844512593e-06, + "loss": 0.0052, + "step": 107770 + }, + { + "epoch": 0.6912674412265539, + "grad_norm": 0.17988887429237366, + "learning_rate": 8.271949641235861e-06, + "loss": 0.0034, + "step": 107780 + }, + { + "epoch": 0.6913315781203401, + "grad_norm": 0.14107699692249298, + "learning_rate": 8.271526396959261e-06, + "loss": 0.0026, + "step": 107790 + }, + { + "epoch": 0.6913957150141261, + "grad_norm": 0.3287390470504761, + "learning_rate": 8.271103111688098e-06, + "loss": 0.0021, + "step": 107800 + }, + { + "epoch": 0.6914598519079123, + "grad_norm": 0.5077758431434631, + "learning_rate": 8.270679785427675e-06, + "loss": 0.0041, + "step": 107810 + }, + { + "epoch": 0.6915239888016983, + "grad_norm": 0.3003099858760834, + "learning_rate": 8.270256418183297e-06, + "loss": 0.004, + "step": 107820 + }, + { + "epoch": 0.6915881256954844, + "grad_norm": 0.10210637748241425, + "learning_rate": 8.26983300996027e-06, + "loss": 0.0068, + "step": 107830 + }, + { + "epoch": 0.6916522625892706, + "grad_norm": 0.11000536382198334, + "learning_rate": 8.269409560763896e-06, + "loss": 0.0032, + "step": 107840 + }, + { + "epoch": 0.6917163994830566, + "grad_norm": 0.12499922513961792, + "learning_rate": 8.268986070599485e-06, + "loss": 0.0023, + "step": 107850 + }, + { + "epoch": 0.6917805363768428, + "grad_norm": 0.19037102162837982, + "learning_rate": 8.268562539472342e-06, + "loss": 0.0043, + "step": 107860 + }, + { + "epoch": 0.6918446732706288, + "grad_norm": 0.06971777230501175, + "learning_rate": 8.268138967387773e-06, + "loss": 0.0024, + "step": 107870 + }, + { + "epoch": 0.691908810164415, + "grad_norm": 0.07947616279125214, + "learning_rate": 8.267715354351088e-06, + "loss": 0.0041, + "step": 107880 + }, + { + "epoch": 0.691972947058201, + "grad_norm": 0.18607938289642334, + "learning_rate": 8.267291700367595e-06, + "loss": 0.0031, + "step": 107890 + }, + { + "epoch": 0.6920370839519872, + "grad_norm": 0.12689931690692902, + "learning_rate": 8.266868005442603e-06, + "loss": 0.0025, + "step": 107900 + }, + { + "epoch": 0.6921012208457732, + "grad_norm": 0.09177198261022568, + "learning_rate": 8.266444269581417e-06, + "loss": 0.0024, + "step": 107910 + }, + { + "epoch": 0.6921653577395593, + "grad_norm": 0.13121455907821655, + "learning_rate": 8.266020492789352e-06, + "loss": 0.0048, + "step": 107920 + }, + { + "epoch": 0.6922294946333454, + "grad_norm": 0.35130009055137634, + "learning_rate": 8.265596675071715e-06, + "loss": 0.0028, + "step": 107930 + }, + { + "epoch": 0.6922936315271315, + "grad_norm": 0.18090005218982697, + "learning_rate": 8.265172816433818e-06, + "loss": 0.0042, + "step": 107940 + }, + { + "epoch": 0.6923577684209176, + "grad_norm": 0.14326050877571106, + "learning_rate": 8.264748916880973e-06, + "loss": 0.0028, + "step": 107950 + }, + { + "epoch": 0.6924219053147037, + "grad_norm": 0.09501796215772629, + "learning_rate": 8.264324976418489e-06, + "loss": 0.0027, + "step": 107960 + }, + { + "epoch": 0.6924860422084897, + "grad_norm": 0.1570703536272049, + "learning_rate": 8.263900995051681e-06, + "loss": 0.0029, + "step": 107970 + }, + { + "epoch": 0.6925501791022759, + "grad_norm": 0.03497795760631561, + "learning_rate": 8.263476972785862e-06, + "loss": 0.0025, + "step": 107980 + }, + { + "epoch": 0.692614315996062, + "grad_norm": 0.040670089423656464, + "learning_rate": 8.263052909626343e-06, + "loss": 0.0023, + "step": 107990 + }, + { + "epoch": 0.6926784528898481, + "grad_norm": 0.14497928321361542, + "learning_rate": 8.26262880557844e-06, + "loss": 0.0028, + "step": 108000 + }, + { + "epoch": 0.6927425897836342, + "grad_norm": 0.17702153325080872, + "learning_rate": 8.262204660647463e-06, + "loss": 0.0022, + "step": 108010 + }, + { + "epoch": 0.6928067266774203, + "grad_norm": 0.07231775671243668, + "learning_rate": 8.261780474838735e-06, + "loss": 0.0027, + "step": 108020 + }, + { + "epoch": 0.6928708635712064, + "grad_norm": 0.12166418880224228, + "learning_rate": 8.261356248157563e-06, + "loss": 0.0025, + "step": 108030 + }, + { + "epoch": 0.6929350004649925, + "grad_norm": 0.017073825001716614, + "learning_rate": 8.260931980609268e-06, + "loss": 0.0024, + "step": 108040 + }, + { + "epoch": 0.6929991373587786, + "grad_norm": 0.023968270048499107, + "learning_rate": 8.260507672199163e-06, + "loss": 0.0025, + "step": 108050 + }, + { + "epoch": 0.6930632742525646, + "grad_norm": 0.17493650317192078, + "learning_rate": 8.260083322932569e-06, + "loss": 0.004, + "step": 108060 + }, + { + "epoch": 0.6931274111463508, + "grad_norm": 0.3527976870536804, + "learning_rate": 8.259658932814798e-06, + "loss": 0.0029, + "step": 108070 + }, + { + "epoch": 0.6931915480401368, + "grad_norm": 0.28017330169677734, + "learning_rate": 8.259234501851173e-06, + "loss": 0.0025, + "step": 108080 + }, + { + "epoch": 0.693255684933923, + "grad_norm": 0.14413754642009735, + "learning_rate": 8.25881003004701e-06, + "loss": 0.0022, + "step": 108090 + }, + { + "epoch": 0.693319821827709, + "grad_norm": 0.21677450835704803, + "learning_rate": 8.258385517407627e-06, + "loss": 0.0022, + "step": 108100 + }, + { + "epoch": 0.6933839587214952, + "grad_norm": 0.11794870346784592, + "learning_rate": 8.257960963938348e-06, + "loss": 0.0042, + "step": 108110 + }, + { + "epoch": 0.6934480956152812, + "grad_norm": 0.12071501463651657, + "learning_rate": 8.257536369644487e-06, + "loss": 0.0026, + "step": 108120 + }, + { + "epoch": 0.6935122325090674, + "grad_norm": 0.12205830961465836, + "learning_rate": 8.257111734531367e-06, + "loss": 0.0055, + "step": 108130 + }, + { + "epoch": 0.6935763694028535, + "grad_norm": 0.08712588995695114, + "learning_rate": 8.256687058604312e-06, + "loss": 0.0037, + "step": 108140 + }, + { + "epoch": 0.6936405062966395, + "grad_norm": 0.060235004872083664, + "learning_rate": 8.256262341868637e-06, + "loss": 0.0034, + "step": 108150 + }, + { + "epoch": 0.6937046431904257, + "grad_norm": 0.12908978760242462, + "learning_rate": 8.255837584329671e-06, + "loss": 0.0039, + "step": 108160 + }, + { + "epoch": 0.6937687800842117, + "grad_norm": 0.06922983378171921, + "learning_rate": 8.255412785992731e-06, + "loss": 0.0022, + "step": 108170 + }, + { + "epoch": 0.6938329169779979, + "grad_norm": 0.07774290442466736, + "learning_rate": 8.254987946863142e-06, + "loss": 0.0042, + "step": 108180 + }, + { + "epoch": 0.6938970538717839, + "grad_norm": 0.06928818672895432, + "learning_rate": 8.25456306694623e-06, + "loss": 0.0036, + "step": 108190 + }, + { + "epoch": 0.6939611907655701, + "grad_norm": 0.31480279564857483, + "learning_rate": 8.254138146247313e-06, + "loss": 0.0025, + "step": 108200 + }, + { + "epoch": 0.6940253276593561, + "grad_norm": 0.22580046951770782, + "learning_rate": 8.25371318477172e-06, + "loss": 0.0026, + "step": 108210 + }, + { + "epoch": 0.6940894645531422, + "grad_norm": 0.25423887372016907, + "learning_rate": 8.253288182524778e-06, + "loss": 0.003, + "step": 108220 + }, + { + "epoch": 0.6941536014469283, + "grad_norm": 0.1763753741979599, + "learning_rate": 8.252863139511809e-06, + "loss": 0.0027, + "step": 108230 + }, + { + "epoch": 0.6942177383407144, + "grad_norm": 0.09514214098453522, + "learning_rate": 8.25243805573814e-06, + "loss": 0.0032, + "step": 108240 + }, + { + "epoch": 0.6942818752345005, + "grad_norm": 0.06112831085920334, + "learning_rate": 8.252012931209097e-06, + "loss": 0.0064, + "step": 108250 + }, + { + "epoch": 0.6943460121282866, + "grad_norm": 0.02216607704758644, + "learning_rate": 8.251587765930009e-06, + "loss": 0.0022, + "step": 108260 + }, + { + "epoch": 0.6944101490220728, + "grad_norm": 0.09686776995658875, + "learning_rate": 8.251162559906201e-06, + "loss": 0.0028, + "step": 108270 + }, + { + "epoch": 0.6944742859158588, + "grad_norm": 0.0727643296122551, + "learning_rate": 8.250737313143003e-06, + "loss": 0.0024, + "step": 108280 + }, + { + "epoch": 0.694538422809645, + "grad_norm": 0.17950348556041718, + "learning_rate": 8.250312025645744e-06, + "loss": 0.0032, + "step": 108290 + }, + { + "epoch": 0.694602559703431, + "grad_norm": 0.11519443243741989, + "learning_rate": 8.249886697419753e-06, + "loss": 0.0026, + "step": 108300 + }, + { + "epoch": 0.6946666965972171, + "grad_norm": 0.1178741529583931, + "learning_rate": 8.249461328470357e-06, + "loss": 0.0072, + "step": 108310 + }, + { + "epoch": 0.6947308334910032, + "grad_norm": 0.21187977492809296, + "learning_rate": 8.24903591880289e-06, + "loss": 0.0031, + "step": 108320 + }, + { + "epoch": 0.6947949703847893, + "grad_norm": 0.1189272403717041, + "learning_rate": 8.248610468422679e-06, + "loss": 0.0026, + "step": 108330 + }, + { + "epoch": 0.6948591072785754, + "grad_norm": 0.2139284908771515, + "learning_rate": 8.24818497733506e-06, + "loss": 0.0041, + "step": 108340 + }, + { + "epoch": 0.6949232441723615, + "grad_norm": 0.2531370520591736, + "learning_rate": 8.247759445545358e-06, + "loss": 0.002, + "step": 108350 + }, + { + "epoch": 0.6949873810661475, + "grad_norm": 0.11315297335386276, + "learning_rate": 8.247333873058912e-06, + "loss": 0.003, + "step": 108360 + }, + { + "epoch": 0.6950515179599337, + "grad_norm": 0.05931203439831734, + "learning_rate": 8.24690825988105e-06, + "loss": 0.0025, + "step": 108370 + }, + { + "epoch": 0.6951156548537197, + "grad_norm": 0.08369827270507812, + "learning_rate": 8.246482606017107e-06, + "loss": 0.0026, + "step": 108380 + }, + { + "epoch": 0.6951797917475059, + "grad_norm": 0.0766642689704895, + "learning_rate": 8.246056911472417e-06, + "loss": 0.0022, + "step": 108390 + }, + { + "epoch": 0.6952439286412919, + "grad_norm": 0.16475103795528412, + "learning_rate": 8.245631176252316e-06, + "loss": 0.0022, + "step": 108400 + }, + { + "epoch": 0.6953080655350781, + "grad_norm": 0.14208433032035828, + "learning_rate": 8.245205400362134e-06, + "loss": 0.0025, + "step": 108410 + }, + { + "epoch": 0.6953722024288642, + "grad_norm": 0.2255280315876007, + "learning_rate": 8.24477958380721e-06, + "loss": 0.0079, + "step": 108420 + }, + { + "epoch": 0.6954363393226503, + "grad_norm": 0.11754395067691803, + "learning_rate": 8.24435372659288e-06, + "loss": 0.0023, + "step": 108430 + }, + { + "epoch": 0.6955004762164364, + "grad_norm": 0.12908974289894104, + "learning_rate": 8.243927828724475e-06, + "loss": 0.0043, + "step": 108440 + }, + { + "epoch": 0.6955646131102224, + "grad_norm": 0.08824348449707031, + "learning_rate": 8.24350189020734e-06, + "loss": 0.0027, + "step": 108450 + }, + { + "epoch": 0.6956287500040086, + "grad_norm": 0.22994863986968994, + "learning_rate": 8.243075911046807e-06, + "loss": 0.0025, + "step": 108460 + }, + { + "epoch": 0.6956928868977946, + "grad_norm": 0.17440223693847656, + "learning_rate": 8.242649891248215e-06, + "loss": 0.0026, + "step": 108470 + }, + { + "epoch": 0.6957570237915808, + "grad_norm": 0.17761161923408508, + "learning_rate": 8.242223830816905e-06, + "loss": 0.0029, + "step": 108480 + }, + { + "epoch": 0.6958211606853668, + "grad_norm": 0.21196191012859344, + "learning_rate": 8.24179772975821e-06, + "loss": 0.0023, + "step": 108490 + }, + { + "epoch": 0.695885297579153, + "grad_norm": 0.15021458268165588, + "learning_rate": 8.241371588077473e-06, + "loss": 0.002, + "step": 108500 + }, + { + "epoch": 0.695949434472939, + "grad_norm": 0.058959461748600006, + "learning_rate": 8.240945405780035e-06, + "loss": 0.003, + "step": 108510 + }, + { + "epoch": 0.6960135713667251, + "grad_norm": 0.22570465505123138, + "learning_rate": 8.240519182871232e-06, + "loss": 0.0042, + "step": 108520 + }, + { + "epoch": 0.6960777082605112, + "grad_norm": 0.22025471925735474, + "learning_rate": 8.240092919356409e-06, + "loss": 0.0043, + "step": 108530 + }, + { + "epoch": 0.6961418451542973, + "grad_norm": 0.19205790758132935, + "learning_rate": 8.239666615240906e-06, + "loss": 0.0023, + "step": 108540 + }, + { + "epoch": 0.6962059820480835, + "grad_norm": 0.16703635454177856, + "learning_rate": 8.239240270530065e-06, + "loss": 0.0025, + "step": 108550 + }, + { + "epoch": 0.6962701189418695, + "grad_norm": 0.1193094551563263, + "learning_rate": 8.23881388522923e-06, + "loss": 0.0019, + "step": 108560 + }, + { + "epoch": 0.6963342558356557, + "grad_norm": 0.052605997771024704, + "learning_rate": 8.23838745934374e-06, + "loss": 0.0033, + "step": 108570 + }, + { + "epoch": 0.6963983927294417, + "grad_norm": 0.046040553599596024, + "learning_rate": 8.237960992878941e-06, + "loss": 0.002, + "step": 108580 + }, + { + "epoch": 0.6964625296232279, + "grad_norm": 0.06242847442626953, + "learning_rate": 8.237534485840177e-06, + "loss": 0.0022, + "step": 108590 + }, + { + "epoch": 0.6965266665170139, + "grad_norm": 0.15265408158302307, + "learning_rate": 8.237107938232792e-06, + "loss": 0.005, + "step": 108600 + }, + { + "epoch": 0.6965908034108, + "grad_norm": 0.2434060275554657, + "learning_rate": 8.23668135006213e-06, + "loss": 0.0047, + "step": 108610 + }, + { + "epoch": 0.6966549403045861, + "grad_norm": 0.1061597615480423, + "learning_rate": 8.236254721333541e-06, + "loss": 0.0028, + "step": 108620 + }, + { + "epoch": 0.6967190771983722, + "grad_norm": 0.2710915803909302, + "learning_rate": 8.235828052052363e-06, + "loss": 0.0022, + "step": 108630 + }, + { + "epoch": 0.6967832140921583, + "grad_norm": 0.22070899605751038, + "learning_rate": 8.23540134222395e-06, + "loss": 0.002, + "step": 108640 + }, + { + "epoch": 0.6968473509859444, + "grad_norm": 0.022739490494132042, + "learning_rate": 8.234974591853646e-06, + "loss": 0.0026, + "step": 108650 + }, + { + "epoch": 0.6969114878797305, + "grad_norm": 0.10844630002975464, + "learning_rate": 8.234547800946797e-06, + "loss": 0.004, + "step": 108660 + }, + { + "epoch": 0.6969756247735166, + "grad_norm": 0.29461532831192017, + "learning_rate": 8.234120969508752e-06, + "loss": 0.0029, + "step": 108670 + }, + { + "epoch": 0.6970397616673026, + "grad_norm": 0.5747034549713135, + "learning_rate": 8.23369409754486e-06, + "loss": 0.0033, + "step": 108680 + }, + { + "epoch": 0.6971038985610888, + "grad_norm": 0.19798482954502106, + "learning_rate": 8.233267185060473e-06, + "loss": 0.0022, + "step": 108690 + }, + { + "epoch": 0.6971680354548749, + "grad_norm": 0.1314244568347931, + "learning_rate": 8.232840232060932e-06, + "loss": 0.0014, + "step": 108700 + }, + { + "epoch": 0.697232172348661, + "grad_norm": 0.11207201331853867, + "learning_rate": 8.232413238551595e-06, + "loss": 0.0034, + "step": 108710 + }, + { + "epoch": 0.6972963092424471, + "grad_norm": 0.10562792420387268, + "learning_rate": 8.231986204537811e-06, + "loss": 0.0015, + "step": 108720 + }, + { + "epoch": 0.6973604461362332, + "grad_norm": 0.18661898374557495, + "learning_rate": 8.231559130024928e-06, + "loss": 0.0031, + "step": 108730 + }, + { + "epoch": 0.6974245830300193, + "grad_norm": 0.3229295313358307, + "learning_rate": 8.231132015018301e-06, + "loss": 0.0038, + "step": 108740 + }, + { + "epoch": 0.6974887199238053, + "grad_norm": 0.05444495379924774, + "learning_rate": 8.230704859523281e-06, + "loss": 0.0018, + "step": 108750 + }, + { + "epoch": 0.6975528568175915, + "grad_norm": 0.22647680342197418, + "learning_rate": 8.230277663545218e-06, + "loss": 0.005, + "step": 108760 + }, + { + "epoch": 0.6976169937113775, + "grad_norm": 0.09481486678123474, + "learning_rate": 8.22985042708947e-06, + "loss": 0.0023, + "step": 108770 + }, + { + "epoch": 0.6976811306051637, + "grad_norm": 0.06593775004148483, + "learning_rate": 8.229423150161387e-06, + "loss": 0.0045, + "step": 108780 + }, + { + "epoch": 0.6977452674989497, + "grad_norm": 0.19967499375343323, + "learning_rate": 8.228995832766322e-06, + "loss": 0.0018, + "step": 108790 + }, + { + "epoch": 0.6978094043927359, + "grad_norm": 0.11123993247747421, + "learning_rate": 8.228568474909632e-06, + "loss": 0.0029, + "step": 108800 + }, + { + "epoch": 0.6978735412865219, + "grad_norm": 0.039687998592853546, + "learning_rate": 8.228141076596673e-06, + "loss": 0.0018, + "step": 108810 + }, + { + "epoch": 0.697937678180308, + "grad_norm": 0.08039312809705734, + "learning_rate": 8.227713637832799e-06, + "loss": 0.0025, + "step": 108820 + }, + { + "epoch": 0.6980018150740942, + "grad_norm": 0.1507989466190338, + "learning_rate": 8.227286158623367e-06, + "loss": 0.0024, + "step": 108830 + }, + { + "epoch": 0.6980659519678802, + "grad_norm": 0.18190570175647736, + "learning_rate": 8.226858638973731e-06, + "loss": 0.0015, + "step": 108840 + }, + { + "epoch": 0.6981300888616664, + "grad_norm": 0.08606167882680893, + "learning_rate": 8.226431078889252e-06, + "loss": 0.0024, + "step": 108850 + }, + { + "epoch": 0.6981942257554524, + "grad_norm": 0.11085907369852066, + "learning_rate": 8.226003478375285e-06, + "loss": 0.0029, + "step": 108860 + }, + { + "epoch": 0.6982583626492386, + "grad_norm": 0.02125435322523117, + "learning_rate": 8.225575837437187e-06, + "loss": 0.003, + "step": 108870 + }, + { + "epoch": 0.6983224995430246, + "grad_norm": 0.1312132328748703, + "learning_rate": 8.22514815608032e-06, + "loss": 0.0034, + "step": 108880 + }, + { + "epoch": 0.6983866364368108, + "grad_norm": 0.18251964449882507, + "learning_rate": 8.224720434310042e-06, + "loss": 0.0018, + "step": 108890 + }, + { + "epoch": 0.6984507733305968, + "grad_norm": 0.09210048615932465, + "learning_rate": 8.224292672131711e-06, + "loss": 0.0034, + "step": 108900 + }, + { + "epoch": 0.698514910224383, + "grad_norm": 0.11854143440723419, + "learning_rate": 8.223864869550689e-06, + "loss": 0.0021, + "step": 108910 + }, + { + "epoch": 0.698579047118169, + "grad_norm": 0.1463458389043808, + "learning_rate": 8.223437026572336e-06, + "loss": 0.004, + "step": 108920 + }, + { + "epoch": 0.6986431840119551, + "grad_norm": 0.28385433554649353, + "learning_rate": 8.223009143202015e-06, + "loss": 0.0028, + "step": 108930 + }, + { + "epoch": 0.6987073209057412, + "grad_norm": 0.1049434095621109, + "learning_rate": 8.222581219445084e-06, + "loss": 0.0028, + "step": 108940 + }, + { + "epoch": 0.6987714577995273, + "grad_norm": 0.3270774185657501, + "learning_rate": 8.222153255306908e-06, + "loss": 0.0051, + "step": 108950 + }, + { + "epoch": 0.6988355946933134, + "grad_norm": 0.048887260258197784, + "learning_rate": 8.221725250792849e-06, + "loss": 0.0019, + "step": 108960 + }, + { + "epoch": 0.6988997315870995, + "grad_norm": 0.1261194795370102, + "learning_rate": 8.221297205908269e-06, + "loss": 0.0057, + "step": 108970 + }, + { + "epoch": 0.6989638684808857, + "grad_norm": 0.05719394236803055, + "learning_rate": 8.220869120658534e-06, + "loss": 0.0018, + "step": 108980 + }, + { + "epoch": 0.6990280053746717, + "grad_norm": 0.09047127515077591, + "learning_rate": 8.220440995049007e-06, + "loss": 0.0035, + "step": 108990 + }, + { + "epoch": 0.6990921422684578, + "grad_norm": 0.058118436485528946, + "learning_rate": 8.220012829085051e-06, + "loss": 0.0022, + "step": 109000 + }, + { + "epoch": 0.6991562791622439, + "grad_norm": 0.09301625192165375, + "learning_rate": 8.219584622772034e-06, + "loss": 0.003, + "step": 109010 + }, + { + "epoch": 0.69922041605603, + "grad_norm": 0.03872682526707649, + "learning_rate": 8.219156376115321e-06, + "loss": 0.0035, + "step": 109020 + }, + { + "epoch": 0.6992845529498161, + "grad_norm": 0.12496228516101837, + "learning_rate": 8.218728089120278e-06, + "loss": 0.0025, + "step": 109030 + }, + { + "epoch": 0.6993486898436022, + "grad_norm": 0.2176547795534134, + "learning_rate": 8.218299761792272e-06, + "loss": 0.004, + "step": 109040 + }, + { + "epoch": 0.6994128267373882, + "grad_norm": 0.06557410210371017, + "learning_rate": 8.217871394136669e-06, + "loss": 0.0033, + "step": 109050 + }, + { + "epoch": 0.6994769636311744, + "grad_norm": 0.23859727382659912, + "learning_rate": 8.217442986158837e-06, + "loss": 0.0024, + "step": 109060 + }, + { + "epoch": 0.6995411005249604, + "grad_norm": 0.07227912545204163, + "learning_rate": 8.217014537864147e-06, + "loss": 0.0033, + "step": 109070 + }, + { + "epoch": 0.6996052374187466, + "grad_norm": 0.11460322886705399, + "learning_rate": 8.216586049257965e-06, + "loss": 0.0027, + "step": 109080 + }, + { + "epoch": 0.6996693743125326, + "grad_norm": 0.03642609342932701, + "learning_rate": 8.21615752034566e-06, + "loss": 0.0022, + "step": 109090 + }, + { + "epoch": 0.6997335112063188, + "grad_norm": 0.1255207508802414, + "learning_rate": 8.215728951132603e-06, + "loss": 0.0038, + "step": 109100 + }, + { + "epoch": 0.6997976481001049, + "grad_norm": 0.15166237950325012, + "learning_rate": 8.215300341624164e-06, + "loss": 0.0042, + "step": 109110 + }, + { + "epoch": 0.699861784993891, + "grad_norm": 0.15849529206752777, + "learning_rate": 8.214871691825714e-06, + "loss": 0.0027, + "step": 109120 + }, + { + "epoch": 0.6999259218876771, + "grad_norm": 0.17884567379951477, + "learning_rate": 8.214443001742624e-06, + "loss": 0.0028, + "step": 109130 + }, + { + "epoch": 0.6999900587814631, + "grad_norm": 0.10719947516918182, + "learning_rate": 8.214014271380266e-06, + "loss": 0.0029, + "step": 109140 + }, + { + "epoch": 0.7000541956752493, + "grad_norm": 0.12732060253620148, + "learning_rate": 8.213585500744012e-06, + "loss": 0.0049, + "step": 109150 + }, + { + "epoch": 0.7001183325690353, + "grad_norm": 0.03269217908382416, + "learning_rate": 8.213156689839235e-06, + "loss": 0.004, + "step": 109160 + }, + { + "epoch": 0.7001824694628215, + "grad_norm": 0.21076838672161102, + "learning_rate": 8.212727838671308e-06, + "loss": 0.0049, + "step": 109170 + }, + { + "epoch": 0.7002466063566075, + "grad_norm": 0.19034221768379211, + "learning_rate": 8.212298947245605e-06, + "loss": 0.0018, + "step": 109180 + }, + { + "epoch": 0.7003107432503937, + "grad_norm": 0.17826853692531586, + "learning_rate": 8.211870015567503e-06, + "loss": 0.0034, + "step": 109190 + }, + { + "epoch": 0.7003748801441797, + "grad_norm": 0.08181103318929672, + "learning_rate": 8.211441043642371e-06, + "loss": 0.0044, + "step": 109200 + }, + { + "epoch": 0.7004390170379658, + "grad_norm": 0.17758990824222565, + "learning_rate": 8.21101203147559e-06, + "loss": 0.0026, + "step": 109210 + }, + { + "epoch": 0.7005031539317519, + "grad_norm": 0.14397963881492615, + "learning_rate": 8.21058297907253e-06, + "loss": 0.0022, + "step": 109220 + }, + { + "epoch": 0.700567290825538, + "grad_norm": 0.09560272097587585, + "learning_rate": 8.210153886438573e-06, + "loss": 0.0024, + "step": 109230 + }, + { + "epoch": 0.7006314277193241, + "grad_norm": 0.18891851603984833, + "learning_rate": 8.209724753579093e-06, + "loss": 0.0042, + "step": 109240 + }, + { + "epoch": 0.7006955646131102, + "grad_norm": 0.11252248287200928, + "learning_rate": 8.209295580499467e-06, + "loss": 0.0046, + "step": 109250 + }, + { + "epoch": 0.7007597015068964, + "grad_norm": 0.03766665980219841, + "learning_rate": 8.208866367205077e-06, + "loss": 0.0029, + "step": 109260 + }, + { + "epoch": 0.7008238384006824, + "grad_norm": 0.2850850224494934, + "learning_rate": 8.208437113701295e-06, + "loss": 0.0034, + "step": 109270 + }, + { + "epoch": 0.7008879752944686, + "grad_norm": 0.043535564094781876, + "learning_rate": 8.208007819993505e-06, + "loss": 0.0024, + "step": 109280 + }, + { + "epoch": 0.7009521121882546, + "grad_norm": 0.4656340479850769, + "learning_rate": 8.207578486087083e-06, + "loss": 0.0022, + "step": 109290 + }, + { + "epoch": 0.7010162490820407, + "grad_norm": 0.02832138165831566, + "learning_rate": 8.207149111987409e-06, + "loss": 0.0024, + "step": 109300 + }, + { + "epoch": 0.7010803859758268, + "grad_norm": 0.3228578567504883, + "learning_rate": 8.206719697699866e-06, + "loss": 0.0113, + "step": 109310 + }, + { + "epoch": 0.7011445228696129, + "grad_norm": 0.3972824215888977, + "learning_rate": 8.206290243229833e-06, + "loss": 0.0046, + "step": 109320 + }, + { + "epoch": 0.701208659763399, + "grad_norm": 0.13330219686031342, + "learning_rate": 8.205860748582692e-06, + "loss": 0.0051, + "step": 109330 + }, + { + "epoch": 0.7012727966571851, + "grad_norm": 0.0320163257420063, + "learning_rate": 8.205431213763824e-06, + "loss": 0.0039, + "step": 109340 + }, + { + "epoch": 0.7013369335509712, + "grad_norm": 0.08879612386226654, + "learning_rate": 8.205001638778614e-06, + "loss": 0.003, + "step": 109350 + }, + { + "epoch": 0.7014010704447573, + "grad_norm": 0.1196327656507492, + "learning_rate": 8.20457202363244e-06, + "loss": 0.0031, + "step": 109360 + }, + { + "epoch": 0.7014652073385433, + "grad_norm": 0.1541898101568222, + "learning_rate": 8.20414236833069e-06, + "loss": 0.0034, + "step": 109370 + }, + { + "epoch": 0.7015293442323295, + "grad_norm": 0.18361224234104156, + "learning_rate": 8.203712672878745e-06, + "loss": 0.0022, + "step": 109380 + }, + { + "epoch": 0.7015934811261155, + "grad_norm": 0.0930710956454277, + "learning_rate": 8.203282937281991e-06, + "loss": 0.0022, + "step": 109390 + }, + { + "epoch": 0.7016576180199017, + "grad_norm": 0.0693768560886383, + "learning_rate": 8.202853161545814e-06, + "loss": 0.0027, + "step": 109400 + }, + { + "epoch": 0.7017217549136878, + "grad_norm": 0.11731088906526566, + "learning_rate": 8.202423345675597e-06, + "loss": 0.0022, + "step": 109410 + }, + { + "epoch": 0.7017858918074739, + "grad_norm": 0.12514188885688782, + "learning_rate": 8.201993489676724e-06, + "loss": 0.0025, + "step": 109420 + }, + { + "epoch": 0.70185002870126, + "grad_norm": 0.1392947882413864, + "learning_rate": 8.201563593554587e-06, + "loss": 0.0025, + "step": 109430 + }, + { + "epoch": 0.701914165595046, + "grad_norm": 0.1546277105808258, + "learning_rate": 8.20113365731457e-06, + "loss": 0.0041, + "step": 109440 + }, + { + "epoch": 0.7019783024888322, + "grad_norm": 0.09571681916713715, + "learning_rate": 8.20070368096206e-06, + "loss": 0.0035, + "step": 109450 + }, + { + "epoch": 0.7020424393826182, + "grad_norm": 0.20175130665302277, + "learning_rate": 8.200273664502446e-06, + "loss": 0.0025, + "step": 109460 + }, + { + "epoch": 0.7021065762764044, + "grad_norm": 0.16607627272605896, + "learning_rate": 8.199843607941115e-06, + "loss": 0.0037, + "step": 109470 + }, + { + "epoch": 0.7021707131701904, + "grad_norm": 0.21978610754013062, + "learning_rate": 8.199413511283456e-06, + "loss": 0.003, + "step": 109480 + }, + { + "epoch": 0.7022348500639766, + "grad_norm": 0.1604081243276596, + "learning_rate": 8.198983374534861e-06, + "loss": 0.0029, + "step": 109490 + }, + { + "epoch": 0.7022989869577626, + "grad_norm": 0.12393929809331894, + "learning_rate": 8.198553197700717e-06, + "loss": 0.002, + "step": 109500 + }, + { + "epoch": 0.7023631238515488, + "grad_norm": 0.21514670550823212, + "learning_rate": 8.198122980786416e-06, + "loss": 0.003, + "step": 109510 + }, + { + "epoch": 0.7024272607453348, + "grad_norm": 0.03178085386753082, + "learning_rate": 8.197692723797349e-06, + "loss": 0.0055, + "step": 109520 + }, + { + "epoch": 0.7024913976391209, + "grad_norm": 0.2929263114929199, + "learning_rate": 8.197262426738903e-06, + "loss": 0.0018, + "step": 109530 + }, + { + "epoch": 0.7025555345329071, + "grad_norm": 0.09754671901464462, + "learning_rate": 8.196832089616477e-06, + "loss": 0.0033, + "step": 109540 + }, + { + "epoch": 0.7026196714266931, + "grad_norm": 0.13949570059776306, + "learning_rate": 8.19640171243546e-06, + "loss": 0.0023, + "step": 109550 + }, + { + "epoch": 0.7026838083204793, + "grad_norm": 0.2180301547050476, + "learning_rate": 8.195971295201245e-06, + "loss": 0.0038, + "step": 109560 + }, + { + "epoch": 0.7027479452142653, + "grad_norm": 0.059337735176086426, + "learning_rate": 8.195540837919224e-06, + "loss": 0.0025, + "step": 109570 + }, + { + "epoch": 0.7028120821080515, + "grad_norm": 0.07762123644351959, + "learning_rate": 8.195110340594795e-06, + "loss": 0.004, + "step": 109580 + }, + { + "epoch": 0.7028762190018375, + "grad_norm": 0.5540107488632202, + "learning_rate": 8.19467980323335e-06, + "loss": 0.0018, + "step": 109590 + }, + { + "epoch": 0.7029403558956236, + "grad_norm": 0.24830619990825653, + "learning_rate": 8.194249225840283e-06, + "loss": 0.0039, + "step": 109600 + }, + { + "epoch": 0.7030044927894097, + "grad_norm": 0.16720372438430786, + "learning_rate": 8.193818608420988e-06, + "loss": 0.0026, + "step": 109610 + }, + { + "epoch": 0.7030686296831958, + "grad_norm": 0.12153538316488266, + "learning_rate": 8.193387950980864e-06, + "loss": 0.0018, + "step": 109620 + }, + { + "epoch": 0.7031327665769819, + "grad_norm": 0.1623486578464508, + "learning_rate": 8.19295725352531e-06, + "loss": 0.0022, + "step": 109630 + }, + { + "epoch": 0.703196903470768, + "grad_norm": 0.1736610233783722, + "learning_rate": 8.192526516059715e-06, + "loss": 0.0018, + "step": 109640 + }, + { + "epoch": 0.7032610403645541, + "grad_norm": 0.31248173117637634, + "learning_rate": 8.192095738589484e-06, + "loss": 0.0017, + "step": 109650 + }, + { + "epoch": 0.7033251772583402, + "grad_norm": 0.13344676792621613, + "learning_rate": 8.191664921120014e-06, + "loss": 0.0026, + "step": 109660 + }, + { + "epoch": 0.7033893141521262, + "grad_norm": 0.13243715465068817, + "learning_rate": 8.191234063656698e-06, + "loss": 0.002, + "step": 109670 + }, + { + "epoch": 0.7034534510459124, + "grad_norm": 0.17147253453731537, + "learning_rate": 8.19080316620494e-06, + "loss": 0.0027, + "step": 109680 + }, + { + "epoch": 0.7035175879396985, + "grad_norm": 0.010082487016916275, + "learning_rate": 8.190372228770137e-06, + "loss": 0.0027, + "step": 109690 + }, + { + "epoch": 0.7035817248334846, + "grad_norm": 0.3268413543701172, + "learning_rate": 8.18994125135769e-06, + "loss": 0.0028, + "step": 109700 + }, + { + "epoch": 0.7036458617272707, + "grad_norm": 0.06293269246816635, + "learning_rate": 8.189510233973e-06, + "loss": 0.004, + "step": 109710 + }, + { + "epoch": 0.7037099986210568, + "grad_norm": 0.16118904948234558, + "learning_rate": 8.189079176621465e-06, + "loss": 0.0035, + "step": 109720 + }, + { + "epoch": 0.7037741355148429, + "grad_norm": 0.13254894316196442, + "learning_rate": 8.188648079308492e-06, + "loss": 0.0024, + "step": 109730 + }, + { + "epoch": 0.703838272408629, + "grad_norm": 0.1677081286907196, + "learning_rate": 8.188216942039477e-06, + "loss": 0.0034, + "step": 109740 + }, + { + "epoch": 0.7039024093024151, + "grad_norm": 0.2145616114139557, + "learning_rate": 8.187785764819826e-06, + "loss": 0.0023, + "step": 109750 + }, + { + "epoch": 0.7039665461962011, + "grad_norm": 0.12412948906421661, + "learning_rate": 8.187354547654942e-06, + "loss": 0.0035, + "step": 109760 + }, + { + "epoch": 0.7040306830899873, + "grad_norm": 0.43917155265808105, + "learning_rate": 8.186923290550227e-06, + "loss": 0.0041, + "step": 109770 + }, + { + "epoch": 0.7040948199837733, + "grad_norm": 0.14502884447574615, + "learning_rate": 8.186491993511086e-06, + "loss": 0.0029, + "step": 109780 + }, + { + "epoch": 0.7041589568775595, + "grad_norm": 0.052646420896053314, + "learning_rate": 8.186060656542922e-06, + "loss": 0.0022, + "step": 109790 + }, + { + "epoch": 0.7042230937713455, + "grad_norm": 0.12846732139587402, + "learning_rate": 8.185629279651142e-06, + "loss": 0.002, + "step": 109800 + }, + { + "epoch": 0.7042872306651317, + "grad_norm": 0.3380899429321289, + "learning_rate": 8.18519786284115e-06, + "loss": 0.0025, + "step": 109810 + }, + { + "epoch": 0.7043513675589178, + "grad_norm": 0.09375136345624924, + "learning_rate": 8.184766406118351e-06, + "loss": 0.0032, + "step": 109820 + }, + { + "epoch": 0.7044155044527038, + "grad_norm": 0.03884616494178772, + "learning_rate": 8.184334909488154e-06, + "loss": 0.0028, + "step": 109830 + }, + { + "epoch": 0.70447964134649, + "grad_norm": 0.10116954147815704, + "learning_rate": 8.183903372955965e-06, + "loss": 0.0042, + "step": 109840 + }, + { + "epoch": 0.704543778240276, + "grad_norm": 0.23590809106826782, + "learning_rate": 8.183471796527191e-06, + "loss": 0.0023, + "step": 109850 + }, + { + "epoch": 0.7046079151340622, + "grad_norm": 0.2649398148059845, + "learning_rate": 8.183040180207241e-06, + "loss": 0.0026, + "step": 109860 + }, + { + "epoch": 0.7046720520278482, + "grad_norm": 0.0337352491915226, + "learning_rate": 8.18260852400152e-06, + "loss": 0.003, + "step": 109870 + }, + { + "epoch": 0.7047361889216344, + "grad_norm": 0.22226230800151825, + "learning_rate": 8.182176827915443e-06, + "loss": 0.0042, + "step": 109880 + }, + { + "epoch": 0.7048003258154204, + "grad_norm": 0.0936257615685463, + "learning_rate": 8.181745091954413e-06, + "loss": 0.0049, + "step": 109890 + }, + { + "epoch": 0.7048644627092066, + "grad_norm": 0.1624756008386612, + "learning_rate": 8.181313316123844e-06, + "loss": 0.0025, + "step": 109900 + }, + { + "epoch": 0.7049285996029926, + "grad_norm": 0.022759467363357544, + "learning_rate": 8.180881500429147e-06, + "loss": 0.0051, + "step": 109910 + }, + { + "epoch": 0.7049927364967787, + "grad_norm": 0.07997841387987137, + "learning_rate": 8.18044964487573e-06, + "loss": 0.0024, + "step": 109920 + }, + { + "epoch": 0.7050568733905648, + "grad_norm": 0.10638929158449173, + "learning_rate": 8.180017749469007e-06, + "loss": 0.004, + "step": 109930 + }, + { + "epoch": 0.7051210102843509, + "grad_norm": 0.11960377544164658, + "learning_rate": 8.179585814214387e-06, + "loss": 0.0011, + "step": 109940 + }, + { + "epoch": 0.705185147178137, + "grad_norm": 0.27875372767448425, + "learning_rate": 8.179153839117286e-06, + "loss": 0.0029, + "step": 109950 + }, + { + "epoch": 0.7052492840719231, + "grad_norm": 0.1304447054862976, + "learning_rate": 8.178721824183116e-06, + "loss": 0.0037, + "step": 109960 + }, + { + "epoch": 0.7053134209657093, + "grad_norm": 0.14808189868927002, + "learning_rate": 8.17828976941729e-06, + "loss": 0.0013, + "step": 109970 + }, + { + "epoch": 0.7053775578594953, + "grad_norm": 0.1582537591457367, + "learning_rate": 8.177857674825217e-06, + "loss": 0.0031, + "step": 109980 + }, + { + "epoch": 0.7054416947532814, + "grad_norm": 0.1076810210943222, + "learning_rate": 8.177425540412322e-06, + "loss": 0.0046, + "step": 109990 + }, + { + "epoch": 0.7055058316470675, + "grad_norm": 0.04820389673113823, + "learning_rate": 8.176993366184012e-06, + "loss": 0.0037, + "step": 110000 + }, + { + "epoch": 0.7055699685408536, + "grad_norm": 0.1892995983362198, + "learning_rate": 8.176561152145704e-06, + "loss": 0.0018, + "step": 110010 + }, + { + "epoch": 0.7056341054346397, + "grad_norm": 0.1477370262145996, + "learning_rate": 8.176128898302813e-06, + "loss": 0.0028, + "step": 110020 + }, + { + "epoch": 0.7056982423284258, + "grad_norm": 0.06276816874742508, + "learning_rate": 8.175696604660759e-06, + "loss": 0.0038, + "step": 110030 + }, + { + "epoch": 0.7057623792222119, + "grad_norm": 0.15604333579540253, + "learning_rate": 8.175264271224957e-06, + "loss": 0.0047, + "step": 110040 + }, + { + "epoch": 0.705826516115998, + "grad_norm": 0.1175050437450409, + "learning_rate": 8.174831898000824e-06, + "loss": 0.003, + "step": 110050 + }, + { + "epoch": 0.705890653009784, + "grad_norm": 0.21799638867378235, + "learning_rate": 8.174399484993777e-06, + "loss": 0.0039, + "step": 110060 + }, + { + "epoch": 0.7059547899035702, + "grad_norm": 0.06989800930023193, + "learning_rate": 8.173967032209237e-06, + "loss": 0.0049, + "step": 110070 + }, + { + "epoch": 0.7060189267973562, + "grad_norm": 0.055953364819288254, + "learning_rate": 8.173534539652623e-06, + "loss": 0.0018, + "step": 110080 + }, + { + "epoch": 0.7060830636911424, + "grad_norm": 0.14363153278827667, + "learning_rate": 8.173102007329353e-06, + "loss": 0.0032, + "step": 110090 + }, + { + "epoch": 0.7061472005849285, + "grad_norm": 0.05500480532646179, + "learning_rate": 8.172669435244845e-06, + "loss": 0.0031, + "step": 110100 + }, + { + "epoch": 0.7062113374787146, + "grad_norm": 0.06739882379770279, + "learning_rate": 8.17223682340452e-06, + "loss": 0.0026, + "step": 110110 + }, + { + "epoch": 0.7062754743725007, + "grad_norm": 0.05695938691496849, + "learning_rate": 8.171804171813804e-06, + "loss": 0.0018, + "step": 110120 + }, + { + "epoch": 0.7063396112662867, + "grad_norm": 0.07226129621267319, + "learning_rate": 8.171371480478115e-06, + "loss": 0.0021, + "step": 110130 + }, + { + "epoch": 0.7064037481600729, + "grad_norm": 0.20298391580581665, + "learning_rate": 8.170938749402873e-06, + "loss": 0.0027, + "step": 110140 + }, + { + "epoch": 0.7064678850538589, + "grad_norm": 0.2183033972978592, + "learning_rate": 8.170505978593501e-06, + "loss": 0.0029, + "step": 110150 + }, + { + "epoch": 0.7065320219476451, + "grad_norm": 0.11818025261163712, + "learning_rate": 8.170073168055426e-06, + "loss": 0.002, + "step": 110160 + }, + { + "epoch": 0.7065961588414311, + "grad_norm": 0.06451547145843506, + "learning_rate": 8.169640317794066e-06, + "loss": 0.0021, + "step": 110170 + }, + { + "epoch": 0.7066602957352173, + "grad_norm": 0.09229018539190292, + "learning_rate": 8.16920742781485e-06, + "loss": 0.0058, + "step": 110180 + }, + { + "epoch": 0.7067244326290033, + "grad_norm": 0.10356393456459045, + "learning_rate": 8.1687744981232e-06, + "loss": 0.0037, + "step": 110190 + }, + { + "epoch": 0.7067885695227895, + "grad_norm": 0.2616499960422516, + "learning_rate": 8.168341528724539e-06, + "loss": 0.0025, + "step": 110200 + }, + { + "epoch": 0.7068527064165755, + "grad_norm": 0.07241521030664444, + "learning_rate": 8.167908519624295e-06, + "loss": 0.0034, + "step": 110210 + }, + { + "epoch": 0.7069168433103616, + "grad_norm": 0.2841012179851532, + "learning_rate": 8.167475470827893e-06, + "loss": 0.0028, + "step": 110220 + }, + { + "epoch": 0.7069809802041477, + "grad_norm": 0.14491187036037445, + "learning_rate": 8.16704238234076e-06, + "loss": 0.0025, + "step": 110230 + }, + { + "epoch": 0.7070451170979338, + "grad_norm": 0.1743507832288742, + "learning_rate": 8.166609254168321e-06, + "loss": 0.0019, + "step": 110240 + }, + { + "epoch": 0.70710925399172, + "grad_norm": 0.07084076851606369, + "learning_rate": 8.166176086316007e-06, + "loss": 0.003, + "step": 110250 + }, + { + "epoch": 0.707173390885506, + "grad_norm": 0.12673059105873108, + "learning_rate": 8.165742878789243e-06, + "loss": 0.004, + "step": 110260 + }, + { + "epoch": 0.7072375277792922, + "grad_norm": 0.23712843656539917, + "learning_rate": 8.165309631593457e-06, + "loss": 0.0026, + "step": 110270 + }, + { + "epoch": 0.7073016646730782, + "grad_norm": 0.0420183502137661, + "learning_rate": 8.164876344734081e-06, + "loss": 0.0037, + "step": 110280 + }, + { + "epoch": 0.7073658015668643, + "grad_norm": 0.2065224051475525, + "learning_rate": 8.164443018216542e-06, + "loss": 0.0019, + "step": 110290 + }, + { + "epoch": 0.7074299384606504, + "grad_norm": 0.10177796334028244, + "learning_rate": 8.164009652046269e-06, + "loss": 0.0038, + "step": 110300 + }, + { + "epoch": 0.7074940753544365, + "grad_norm": 0.06011974439024925, + "learning_rate": 8.163576246228697e-06, + "loss": 0.0021, + "step": 110310 + }, + { + "epoch": 0.7075582122482226, + "grad_norm": 0.15856629610061646, + "learning_rate": 8.16314280076925e-06, + "loss": 0.0032, + "step": 110320 + }, + { + "epoch": 0.7076223491420087, + "grad_norm": 0.2633246183395386, + "learning_rate": 8.162709315673366e-06, + "loss": 0.0034, + "step": 110330 + }, + { + "epoch": 0.7076864860357948, + "grad_norm": 0.11729785054922104, + "learning_rate": 8.162275790946472e-06, + "loss": 0.0031, + "step": 110340 + }, + { + "epoch": 0.7077506229295809, + "grad_norm": 0.10076826810836792, + "learning_rate": 8.161842226594002e-06, + "loss": 0.0042, + "step": 110350 + }, + { + "epoch": 0.707814759823367, + "grad_norm": 0.12335386872291565, + "learning_rate": 8.161408622621391e-06, + "loss": 0.0023, + "step": 110360 + }, + { + "epoch": 0.7078788967171531, + "grad_norm": 0.05566006898880005, + "learning_rate": 8.160974979034068e-06, + "loss": 0.0022, + "step": 110370 + }, + { + "epoch": 0.7079430336109392, + "grad_norm": 0.13265535235404968, + "learning_rate": 8.160541295837471e-06, + "loss": 0.0034, + "step": 110380 + }, + { + "epoch": 0.7080071705047253, + "grad_norm": 0.23299631476402283, + "learning_rate": 8.160107573037034e-06, + "loss": 0.0035, + "step": 110390 + }, + { + "epoch": 0.7080713073985114, + "grad_norm": 0.040941670536994934, + "learning_rate": 8.159673810638188e-06, + "loss": 0.0032, + "step": 110400 + }, + { + "epoch": 0.7081354442922975, + "grad_norm": 0.2033630758523941, + "learning_rate": 8.159240008646374e-06, + "loss": 0.003, + "step": 110410 + }, + { + "epoch": 0.7081995811860836, + "grad_norm": 0.2089751809835434, + "learning_rate": 8.158806167067022e-06, + "loss": 0.0013, + "step": 110420 + }, + { + "epoch": 0.7082637180798697, + "grad_norm": 0.07643994688987732, + "learning_rate": 8.158372285905573e-06, + "loss": 0.0015, + "step": 110430 + }, + { + "epoch": 0.7083278549736558, + "grad_norm": 0.08906198292970657, + "learning_rate": 8.157938365167461e-06, + "loss": 0.0036, + "step": 110440 + }, + { + "epoch": 0.7083919918674418, + "grad_norm": 0.32955411076545715, + "learning_rate": 8.157504404858125e-06, + "loss": 0.0024, + "step": 110450 + }, + { + "epoch": 0.708456128761228, + "grad_norm": 0.18114842474460602, + "learning_rate": 8.157070404983001e-06, + "loss": 0.0053, + "step": 110460 + }, + { + "epoch": 0.708520265655014, + "grad_norm": 0.20772533118724823, + "learning_rate": 8.15663636554753e-06, + "loss": 0.0031, + "step": 110470 + }, + { + "epoch": 0.7085844025488002, + "grad_norm": 0.12291453778743744, + "learning_rate": 8.156202286557149e-06, + "loss": 0.003, + "step": 110480 + }, + { + "epoch": 0.7086485394425862, + "grad_norm": 0.23288308084011078, + "learning_rate": 8.155768168017298e-06, + "loss": 0.0057, + "step": 110490 + }, + { + "epoch": 0.7087126763363724, + "grad_norm": 0.12418215721845627, + "learning_rate": 8.155334009933414e-06, + "loss": 0.0045, + "step": 110500 + }, + { + "epoch": 0.7087768132301584, + "grad_norm": 0.2861568033695221, + "learning_rate": 8.154899812310942e-06, + "loss": 0.0029, + "step": 110510 + }, + { + "epoch": 0.7088409501239445, + "grad_norm": 0.11753934621810913, + "learning_rate": 8.15446557515532e-06, + "loss": 0.0025, + "step": 110520 + }, + { + "epoch": 0.7089050870177307, + "grad_norm": 0.18481867015361786, + "learning_rate": 8.15403129847199e-06, + "loss": 0.0041, + "step": 110530 + }, + { + "epoch": 0.7089692239115167, + "grad_norm": 0.47326895594596863, + "learning_rate": 8.153596982266392e-06, + "loss": 0.005, + "step": 110540 + }, + { + "epoch": 0.7090333608053029, + "grad_norm": 0.16735628247261047, + "learning_rate": 8.153162626543972e-06, + "loss": 0.0048, + "step": 110550 + }, + { + "epoch": 0.7090974976990889, + "grad_norm": 0.1563166379928589, + "learning_rate": 8.15272823131017e-06, + "loss": 0.0025, + "step": 110560 + }, + { + "epoch": 0.7091616345928751, + "grad_norm": 0.04397542029619217, + "learning_rate": 8.152293796570432e-06, + "loss": 0.0031, + "step": 110570 + }, + { + "epoch": 0.7092257714866611, + "grad_norm": 0.5582118034362793, + "learning_rate": 8.151859322330197e-06, + "loss": 0.0049, + "step": 110580 + }, + { + "epoch": 0.7092899083804473, + "grad_norm": 0.1282324641942978, + "learning_rate": 8.151424808594914e-06, + "loss": 0.0037, + "step": 110590 + }, + { + "epoch": 0.7093540452742333, + "grad_norm": 0.27893584966659546, + "learning_rate": 8.150990255370025e-06, + "loss": 0.0031, + "step": 110600 + }, + { + "epoch": 0.7094181821680194, + "grad_norm": 0.05734465271234512, + "learning_rate": 8.150555662660976e-06, + "loss": 0.0029, + "step": 110610 + }, + { + "epoch": 0.7094823190618055, + "grad_norm": 0.06638863682746887, + "learning_rate": 8.150121030473214e-06, + "loss": 0.002, + "step": 110620 + }, + { + "epoch": 0.7095464559555916, + "grad_norm": 0.13825614750385284, + "learning_rate": 8.149686358812183e-06, + "loss": 0.0041, + "step": 110630 + }, + { + "epoch": 0.7096105928493777, + "grad_norm": 0.10324165970087051, + "learning_rate": 8.149251647683332e-06, + "loss": 0.0067, + "step": 110640 + }, + { + "epoch": 0.7096747297431638, + "grad_norm": 0.04771149903535843, + "learning_rate": 8.148816897092106e-06, + "loss": 0.0024, + "step": 110650 + }, + { + "epoch": 0.70973886663695, + "grad_norm": 0.05605524405837059, + "learning_rate": 8.148382107043954e-06, + "loss": 0.005, + "step": 110660 + }, + { + "epoch": 0.709803003530736, + "grad_norm": 0.0790896937251091, + "learning_rate": 8.147947277544324e-06, + "loss": 0.0026, + "step": 110670 + }, + { + "epoch": 0.7098671404245221, + "grad_norm": 0.18265533447265625, + "learning_rate": 8.147512408598664e-06, + "loss": 0.0028, + "step": 110680 + }, + { + "epoch": 0.7099312773183082, + "grad_norm": 0.06397712975740433, + "learning_rate": 8.147077500212426e-06, + "loss": 0.0025, + "step": 110690 + }, + { + "epoch": 0.7099954142120943, + "grad_norm": 0.20617574453353882, + "learning_rate": 8.146642552391057e-06, + "loss": 0.0031, + "step": 110700 + }, + { + "epoch": 0.7100595511058804, + "grad_norm": 0.09721191972494125, + "learning_rate": 8.146207565140007e-06, + "loss": 0.0022, + "step": 110710 + }, + { + "epoch": 0.7101236879996665, + "grad_norm": 0.05267834663391113, + "learning_rate": 8.145772538464729e-06, + "loss": 0.0024, + "step": 110720 + }, + { + "epoch": 0.7101878248934526, + "grad_norm": 0.12130790948867798, + "learning_rate": 8.145337472370672e-06, + "loss": 0.002, + "step": 110730 + }, + { + "epoch": 0.7102519617872387, + "grad_norm": 0.23529313504695892, + "learning_rate": 8.14490236686329e-06, + "loss": 0.0026, + "step": 110740 + }, + { + "epoch": 0.7103160986810247, + "grad_norm": 0.06817247718572617, + "learning_rate": 8.14446722194803e-06, + "loss": 0.0018, + "step": 110750 + }, + { + "epoch": 0.7103802355748109, + "grad_norm": 0.15603575110435486, + "learning_rate": 8.144032037630351e-06, + "loss": 0.0025, + "step": 110760 + }, + { + "epoch": 0.7104443724685969, + "grad_norm": 0.061270855367183685, + "learning_rate": 8.143596813915704e-06, + "loss": 0.0027, + "step": 110770 + }, + { + "epoch": 0.7105085093623831, + "grad_norm": 0.1100795716047287, + "learning_rate": 8.143161550809542e-06, + "loss": 0.0033, + "step": 110780 + }, + { + "epoch": 0.7105726462561691, + "grad_norm": 0.6656703352928162, + "learning_rate": 8.14272624831732e-06, + "loss": 0.0098, + "step": 110790 + }, + { + "epoch": 0.7106367831499553, + "grad_norm": 0.1621638387441635, + "learning_rate": 8.14229090644449e-06, + "loss": 0.0032, + "step": 110800 + }, + { + "epoch": 0.7107009200437414, + "grad_norm": 0.03211973235011101, + "learning_rate": 8.14185552519651e-06, + "loss": 0.0024, + "step": 110810 + }, + { + "epoch": 0.7107650569375275, + "grad_norm": 0.12337400019168854, + "learning_rate": 8.141420104578836e-06, + "loss": 0.002, + "step": 110820 + }, + { + "epoch": 0.7108291938313136, + "grad_norm": 0.027781283482909203, + "learning_rate": 8.140984644596921e-06, + "loss": 0.001, + "step": 110830 + }, + { + "epoch": 0.7108933307250996, + "grad_norm": 0.19105949997901917, + "learning_rate": 8.140549145256225e-06, + "loss": 0.0032, + "step": 110840 + }, + { + "epoch": 0.7109574676188858, + "grad_norm": 0.15865445137023926, + "learning_rate": 8.140113606562204e-06, + "loss": 0.0025, + "step": 110850 + }, + { + "epoch": 0.7110216045126718, + "grad_norm": 0.16358639299869537, + "learning_rate": 8.139678028520315e-06, + "loss": 0.0026, + "step": 110860 + }, + { + "epoch": 0.711085741406458, + "grad_norm": 0.3070227801799774, + "learning_rate": 8.139242411136015e-06, + "loss": 0.0028, + "step": 110870 + }, + { + "epoch": 0.711149878300244, + "grad_norm": 0.20219583809375763, + "learning_rate": 8.138806754414765e-06, + "loss": 0.0041, + "step": 110880 + }, + { + "epoch": 0.7112140151940302, + "grad_norm": 0.34467488527297974, + "learning_rate": 8.138371058362022e-06, + "loss": 0.0024, + "step": 110890 + }, + { + "epoch": 0.7112781520878162, + "grad_norm": 0.10602174699306488, + "learning_rate": 8.137935322983247e-06, + "loss": 0.0061, + "step": 110900 + }, + { + "epoch": 0.7113422889816023, + "grad_norm": 0.0546005479991436, + "learning_rate": 8.137499548283902e-06, + "loss": 0.003, + "step": 110910 + }, + { + "epoch": 0.7114064258753884, + "grad_norm": 0.11193834245204926, + "learning_rate": 8.137063734269444e-06, + "loss": 0.0043, + "step": 110920 + }, + { + "epoch": 0.7114705627691745, + "grad_norm": 0.24336637556552887, + "learning_rate": 8.136627880945336e-06, + "loss": 0.0021, + "step": 110930 + }, + { + "epoch": 0.7115346996629606, + "grad_norm": 0.14344747364521027, + "learning_rate": 8.136191988317037e-06, + "loss": 0.0035, + "step": 110940 + }, + { + "epoch": 0.7115988365567467, + "grad_norm": 0.04977329447865486, + "learning_rate": 8.135756056390013e-06, + "loss": 0.0038, + "step": 110950 + }, + { + "epoch": 0.7116629734505329, + "grad_norm": 0.18715287744998932, + "learning_rate": 8.135320085169723e-06, + "loss": 0.0031, + "step": 110960 + }, + { + "epoch": 0.7117271103443189, + "grad_norm": 0.010879084467887878, + "learning_rate": 8.134884074661633e-06, + "loss": 0.0021, + "step": 110970 + }, + { + "epoch": 0.711791247238105, + "grad_norm": 0.15092532336711884, + "learning_rate": 8.134448024871204e-06, + "loss": 0.0042, + "step": 110980 + }, + { + "epoch": 0.7118553841318911, + "grad_norm": 0.11990223824977875, + "learning_rate": 8.134011935803901e-06, + "loss": 0.0026, + "step": 110990 + }, + { + "epoch": 0.7119195210256772, + "grad_norm": 0.0894118919968605, + "learning_rate": 8.13357580746519e-06, + "loss": 0.003, + "step": 111000 + }, + { + "epoch": 0.7119836579194633, + "grad_norm": 0.1143919974565506, + "learning_rate": 8.133139639860533e-06, + "loss": 0.0021, + "step": 111010 + }, + { + "epoch": 0.7120477948132494, + "grad_norm": 0.16754014790058136, + "learning_rate": 8.132703432995398e-06, + "loss": 0.0042, + "step": 111020 + }, + { + "epoch": 0.7121119317070355, + "grad_norm": 0.11708588898181915, + "learning_rate": 8.13226718687525e-06, + "loss": 0.002, + "step": 111030 + }, + { + "epoch": 0.7121760686008216, + "grad_norm": 0.13375858962535858, + "learning_rate": 8.131830901505556e-06, + "loss": 0.0032, + "step": 111040 + }, + { + "epoch": 0.7122402054946076, + "grad_norm": 0.054817404597997665, + "learning_rate": 8.13139457689178e-06, + "loss": 0.0027, + "step": 111050 + }, + { + "epoch": 0.7123043423883938, + "grad_norm": 0.2613508701324463, + "learning_rate": 8.130958213039395e-06, + "loss": 0.0023, + "step": 111060 + }, + { + "epoch": 0.7123684792821798, + "grad_norm": 0.04319367557764053, + "learning_rate": 8.130521809953863e-06, + "loss": 0.0027, + "step": 111070 + }, + { + "epoch": 0.712432616175966, + "grad_norm": 0.15703964233398438, + "learning_rate": 8.130085367640659e-06, + "loss": 0.0021, + "step": 111080 + }, + { + "epoch": 0.7124967530697521, + "grad_norm": 0.21569177508354187, + "learning_rate": 8.129648886105246e-06, + "loss": 0.0017, + "step": 111090 + }, + { + "epoch": 0.7125608899635382, + "grad_norm": 0.08998652547597885, + "learning_rate": 8.129212365353096e-06, + "loss": 0.0032, + "step": 111100 + }, + { + "epoch": 0.7126250268573243, + "grad_norm": 0.07066009193658829, + "learning_rate": 8.12877580538968e-06, + "loss": 0.0046, + "step": 111110 + }, + { + "epoch": 0.7126891637511104, + "grad_norm": 0.13526740670204163, + "learning_rate": 8.128339206220466e-06, + "loss": 0.0021, + "step": 111120 + }, + { + "epoch": 0.7127533006448965, + "grad_norm": 0.2631956934928894, + "learning_rate": 8.127902567850924e-06, + "loss": 0.0038, + "step": 111130 + }, + { + "epoch": 0.7128174375386825, + "grad_norm": 0.14094075560569763, + "learning_rate": 8.12746589028653e-06, + "loss": 0.0024, + "step": 111140 + }, + { + "epoch": 0.7128815744324687, + "grad_norm": 0.08425044268369675, + "learning_rate": 8.127029173532753e-06, + "loss": 0.0027, + "step": 111150 + }, + { + "epoch": 0.7129457113262547, + "grad_norm": 0.14520998299121857, + "learning_rate": 8.126592417595065e-06, + "loss": 0.0039, + "step": 111160 + }, + { + "epoch": 0.7130098482200409, + "grad_norm": 0.07398927211761475, + "learning_rate": 8.12615562247894e-06, + "loss": 0.0015, + "step": 111170 + }, + { + "epoch": 0.7130739851138269, + "grad_norm": 0.036775704473257065, + "learning_rate": 8.12571878818985e-06, + "loss": 0.0029, + "step": 111180 + }, + { + "epoch": 0.7131381220076131, + "grad_norm": 0.34897202253341675, + "learning_rate": 8.12528191473327e-06, + "loss": 0.0046, + "step": 111190 + }, + { + "epoch": 0.7132022589013991, + "grad_norm": 0.2502409517765045, + "learning_rate": 8.124845002114674e-06, + "loss": 0.0036, + "step": 111200 + }, + { + "epoch": 0.7132663957951852, + "grad_norm": 0.07636435329914093, + "learning_rate": 8.124408050339536e-06, + "loss": 0.0124, + "step": 111210 + }, + { + "epoch": 0.7133305326889713, + "grad_norm": 0.1467924267053604, + "learning_rate": 8.123971059413333e-06, + "loss": 0.0032, + "step": 111220 + }, + { + "epoch": 0.7133946695827574, + "grad_norm": 0.12310754507780075, + "learning_rate": 8.12353402934154e-06, + "loss": 0.0033, + "step": 111230 + }, + { + "epoch": 0.7134588064765436, + "grad_norm": 0.2299281358718872, + "learning_rate": 8.123096960129633e-06, + "loss": 0.0037, + "step": 111240 + }, + { + "epoch": 0.7135229433703296, + "grad_norm": 0.1423099786043167, + "learning_rate": 8.12265985178309e-06, + "loss": 0.0048, + "step": 111250 + }, + { + "epoch": 0.7135870802641158, + "grad_norm": 0.12548957765102386, + "learning_rate": 8.122222704307386e-06, + "loss": 0.0019, + "step": 111260 + }, + { + "epoch": 0.7136512171579018, + "grad_norm": 0.10565450042486191, + "learning_rate": 8.121785517708e-06, + "loss": 0.0034, + "step": 111270 + }, + { + "epoch": 0.713715354051688, + "grad_norm": 0.11274101585149765, + "learning_rate": 8.121348291990411e-06, + "loss": 0.0025, + "step": 111280 + }, + { + "epoch": 0.713779490945474, + "grad_norm": 0.08379390090703964, + "learning_rate": 8.120911027160097e-06, + "loss": 0.0027, + "step": 111290 + }, + { + "epoch": 0.7138436278392601, + "grad_norm": 0.20944179594516754, + "learning_rate": 8.120473723222537e-06, + "loss": 0.0042, + "step": 111300 + }, + { + "epoch": 0.7139077647330462, + "grad_norm": 0.3111606240272522, + "learning_rate": 8.120036380183212e-06, + "loss": 0.0039, + "step": 111310 + }, + { + "epoch": 0.7139719016268323, + "grad_norm": 0.22834256291389465, + "learning_rate": 8.1195989980476e-06, + "loss": 0.0025, + "step": 111320 + }, + { + "epoch": 0.7140360385206184, + "grad_norm": 0.186443492770195, + "learning_rate": 8.119161576821185e-06, + "loss": 0.002, + "step": 111330 + }, + { + "epoch": 0.7141001754144045, + "grad_norm": 0.056203048676252365, + "learning_rate": 8.118724116509444e-06, + "loss": 0.0031, + "step": 111340 + }, + { + "epoch": 0.7141643123081906, + "grad_norm": 0.17152699828147888, + "learning_rate": 8.118286617117863e-06, + "loss": 0.0034, + "step": 111350 + }, + { + "epoch": 0.7142284492019767, + "grad_norm": 0.09365913271903992, + "learning_rate": 8.11784907865192e-06, + "loss": 0.0034, + "step": 111360 + }, + { + "epoch": 0.7142925860957628, + "grad_norm": 0.23707567155361176, + "learning_rate": 8.1174115011171e-06, + "loss": 0.0052, + "step": 111370 + }, + { + "epoch": 0.7143567229895489, + "grad_norm": 0.0613962784409523, + "learning_rate": 8.116973884518888e-06, + "loss": 0.0055, + "step": 111380 + }, + { + "epoch": 0.714420859883335, + "grad_norm": 0.1931670755147934, + "learning_rate": 8.116536228862764e-06, + "loss": 0.0033, + "step": 111390 + }, + { + "epoch": 0.7144849967771211, + "grad_norm": 0.39843279123306274, + "learning_rate": 8.116098534154214e-06, + "loss": 0.0043, + "step": 111400 + }, + { + "epoch": 0.7145491336709072, + "grad_norm": 0.10760731995105743, + "learning_rate": 8.115660800398723e-06, + "loss": 0.003, + "step": 111410 + }, + { + "epoch": 0.7146132705646933, + "grad_norm": 0.23782630264759064, + "learning_rate": 8.115223027601776e-06, + "loss": 0.0037, + "step": 111420 + }, + { + "epoch": 0.7146774074584794, + "grad_norm": 0.1823507845401764, + "learning_rate": 8.114785215768854e-06, + "loss": 0.0015, + "step": 111430 + }, + { + "epoch": 0.7147415443522654, + "grad_norm": 0.1340901404619217, + "learning_rate": 8.114347364905451e-06, + "loss": 0.0035, + "step": 111440 + }, + { + "epoch": 0.7148056812460516, + "grad_norm": 0.12971192598342896, + "learning_rate": 8.11390947501705e-06, + "loss": 0.0033, + "step": 111450 + }, + { + "epoch": 0.7148698181398376, + "grad_norm": 0.20929332077503204, + "learning_rate": 8.113471546109135e-06, + "loss": 0.0021, + "step": 111460 + }, + { + "epoch": 0.7149339550336238, + "grad_norm": 0.13546518981456757, + "learning_rate": 8.113033578187199e-06, + "loss": 0.0028, + "step": 111470 + }, + { + "epoch": 0.7149980919274098, + "grad_norm": 0.15590383112430573, + "learning_rate": 8.112595571256725e-06, + "loss": 0.0038, + "step": 111480 + }, + { + "epoch": 0.715062228821196, + "grad_norm": 0.08301430940628052, + "learning_rate": 8.112157525323206e-06, + "loss": 0.0022, + "step": 111490 + }, + { + "epoch": 0.715126365714982, + "grad_norm": 0.04288941249251366, + "learning_rate": 8.111719440392127e-06, + "loss": 0.0015, + "step": 111500 + }, + { + "epoch": 0.7151905026087682, + "grad_norm": 0.21083122491836548, + "learning_rate": 8.111281316468981e-06, + "loss": 0.002, + "step": 111510 + }, + { + "epoch": 0.7152546395025543, + "grad_norm": 0.06173493340611458, + "learning_rate": 8.110843153559257e-06, + "loss": 0.0048, + "step": 111520 + }, + { + "epoch": 0.7153187763963403, + "grad_norm": 0.17022399604320526, + "learning_rate": 8.110404951668444e-06, + "loss": 0.0168, + "step": 111530 + }, + { + "epoch": 0.7153829132901265, + "grad_norm": 0.03184816986322403, + "learning_rate": 8.109966710802033e-06, + "loss": 0.0021, + "step": 111540 + }, + { + "epoch": 0.7154470501839125, + "grad_norm": 0.1753406822681427, + "learning_rate": 8.10952843096552e-06, + "loss": 0.0031, + "step": 111550 + }, + { + "epoch": 0.7155111870776987, + "grad_norm": 0.12510491907596588, + "learning_rate": 8.10909011216439e-06, + "loss": 0.0036, + "step": 111560 + }, + { + "epoch": 0.7155753239714847, + "grad_norm": 0.07739254832267761, + "learning_rate": 8.10865175440414e-06, + "loss": 0.0027, + "step": 111570 + }, + { + "epoch": 0.7156394608652709, + "grad_norm": 0.034791167825460434, + "learning_rate": 8.10821335769026e-06, + "loss": 0.0021, + "step": 111580 + }, + { + "epoch": 0.7157035977590569, + "grad_norm": 0.19611859321594238, + "learning_rate": 8.107774922028248e-06, + "loss": 0.0028, + "step": 111590 + }, + { + "epoch": 0.715767734652843, + "grad_norm": 0.410812646150589, + "learning_rate": 8.107336447423594e-06, + "loss": 0.0026, + "step": 111600 + }, + { + "epoch": 0.7158318715466291, + "grad_norm": 0.1915685534477234, + "learning_rate": 8.106897933881794e-06, + "loss": 0.0038, + "step": 111610 + }, + { + "epoch": 0.7158960084404152, + "grad_norm": 0.31799638271331787, + "learning_rate": 8.106459381408342e-06, + "loss": 0.0029, + "step": 111620 + }, + { + "epoch": 0.7159601453342013, + "grad_norm": 0.061092738062143326, + "learning_rate": 8.106020790008732e-06, + "loss": 0.0037, + "step": 111630 + }, + { + "epoch": 0.7160242822279874, + "grad_norm": 0.1637531816959381, + "learning_rate": 8.105582159688465e-06, + "loss": 0.0091, + "step": 111640 + }, + { + "epoch": 0.7160884191217736, + "grad_norm": 0.1069796234369278, + "learning_rate": 8.105143490453031e-06, + "loss": 0.0021, + "step": 111650 + }, + { + "epoch": 0.7161525560155596, + "grad_norm": 0.009329847991466522, + "learning_rate": 8.104704782307932e-06, + "loss": 0.0038, + "step": 111660 + }, + { + "epoch": 0.7162166929093458, + "grad_norm": 0.04874225705862045, + "learning_rate": 8.104266035258663e-06, + "loss": 0.0027, + "step": 111670 + }, + { + "epoch": 0.7162808298031318, + "grad_norm": 0.04451783001422882, + "learning_rate": 8.10382724931072e-06, + "loss": 0.0025, + "step": 111680 + }, + { + "epoch": 0.7163449666969179, + "grad_norm": 0.0949191078543663, + "learning_rate": 8.103388424469604e-06, + "loss": 0.0015, + "step": 111690 + }, + { + "epoch": 0.716409103590704, + "grad_norm": 0.16580696403980255, + "learning_rate": 8.102949560740816e-06, + "loss": 0.002, + "step": 111700 + }, + { + "epoch": 0.7164732404844901, + "grad_norm": 0.08396851271390915, + "learning_rate": 8.10251065812985e-06, + "loss": 0.002, + "step": 111710 + }, + { + "epoch": 0.7165373773782762, + "grad_norm": 0.14391309022903442, + "learning_rate": 8.102071716642209e-06, + "loss": 0.0032, + "step": 111720 + }, + { + "epoch": 0.7166015142720623, + "grad_norm": 0.1950514167547226, + "learning_rate": 8.10163273628339e-06, + "loss": 0.004, + "step": 111730 + }, + { + "epoch": 0.7166656511658483, + "grad_norm": 0.09286870807409286, + "learning_rate": 8.101193717058898e-06, + "loss": 0.0039, + "step": 111740 + }, + { + "epoch": 0.7167297880596345, + "grad_norm": 0.2724185585975647, + "learning_rate": 8.100754658974233e-06, + "loss": 0.0032, + "step": 111750 + }, + { + "epoch": 0.7167939249534205, + "grad_norm": 0.24928061664104462, + "learning_rate": 8.100315562034896e-06, + "loss": 0.004, + "step": 111760 + }, + { + "epoch": 0.7168580618472067, + "grad_norm": 0.0693969577550888, + "learning_rate": 8.099876426246387e-06, + "loss": 0.0032, + "step": 111770 + }, + { + "epoch": 0.7169221987409927, + "grad_norm": 0.06157182157039642, + "learning_rate": 8.099437251614212e-06, + "loss": 0.002, + "step": 111780 + }, + { + "epoch": 0.7169863356347789, + "grad_norm": 0.0845608115196228, + "learning_rate": 8.098998038143873e-06, + "loss": 0.0017, + "step": 111790 + }, + { + "epoch": 0.717050472528565, + "grad_norm": 0.23274195194244385, + "learning_rate": 8.098558785840876e-06, + "loss": 0.0021, + "step": 111800 + }, + { + "epoch": 0.7171146094223511, + "grad_norm": 0.16222944855690002, + "learning_rate": 8.098119494710721e-06, + "loss": 0.0035, + "step": 111810 + }, + { + "epoch": 0.7171787463161372, + "grad_norm": 0.20074084401130676, + "learning_rate": 8.097680164758914e-06, + "loss": 0.0028, + "step": 111820 + }, + { + "epoch": 0.7172428832099232, + "grad_norm": 0.07163853198289871, + "learning_rate": 8.09724079599096e-06, + "loss": 0.0022, + "step": 111830 + }, + { + "epoch": 0.7173070201037094, + "grad_norm": 0.15048721432685852, + "learning_rate": 8.096801388412368e-06, + "loss": 0.0042, + "step": 111840 + }, + { + "epoch": 0.7173711569974954, + "grad_norm": 0.054579176008701324, + "learning_rate": 8.09636194202864e-06, + "loss": 0.0018, + "step": 111850 + }, + { + "epoch": 0.7174352938912816, + "grad_norm": 0.2996588945388794, + "learning_rate": 8.095922456845284e-06, + "loss": 0.0032, + "step": 111860 + }, + { + "epoch": 0.7174994307850676, + "grad_norm": 0.11844748258590698, + "learning_rate": 8.095482932867807e-06, + "loss": 0.0027, + "step": 111870 + }, + { + "epoch": 0.7175635676788538, + "grad_norm": 0.03289315477013588, + "learning_rate": 8.095043370101716e-06, + "loss": 0.0019, + "step": 111880 + }, + { + "epoch": 0.7176277045726398, + "grad_norm": 0.1594281643629074, + "learning_rate": 8.094603768552521e-06, + "loss": 0.0033, + "step": 111890 + }, + { + "epoch": 0.717691841466426, + "grad_norm": 0.054819926619529724, + "learning_rate": 8.094164128225727e-06, + "loss": 0.0039, + "step": 111900 + }, + { + "epoch": 0.717755978360212, + "grad_norm": 0.058008257299661636, + "learning_rate": 8.093724449126846e-06, + "loss": 0.005, + "step": 111910 + }, + { + "epoch": 0.7178201152539981, + "grad_norm": 0.041709840297698975, + "learning_rate": 8.093284731261387e-06, + "loss": 0.0018, + "step": 111920 + }, + { + "epoch": 0.7178842521477843, + "grad_norm": 0.07255569100379944, + "learning_rate": 8.09284497463486e-06, + "loss": 0.0026, + "step": 111930 + }, + { + "epoch": 0.7179483890415703, + "grad_norm": 0.19830240309238434, + "learning_rate": 8.092405179252774e-06, + "loss": 0.0047, + "step": 111940 + }, + { + "epoch": 0.7180125259353565, + "grad_norm": 0.12875518202781677, + "learning_rate": 8.091965345120641e-06, + "loss": 0.0029, + "step": 111950 + }, + { + "epoch": 0.7180766628291425, + "grad_norm": 0.27468159794807434, + "learning_rate": 8.091525472243972e-06, + "loss": 0.0023, + "step": 111960 + }, + { + "epoch": 0.7181407997229287, + "grad_norm": 0.1361653357744217, + "learning_rate": 8.091085560628282e-06, + "loss": 0.0026, + "step": 111970 + }, + { + "epoch": 0.7182049366167147, + "grad_norm": 0.17453205585479736, + "learning_rate": 8.090645610279078e-06, + "loss": 0.0039, + "step": 111980 + }, + { + "epoch": 0.7182690735105008, + "grad_norm": 0.053696081042289734, + "learning_rate": 8.090205621201878e-06, + "loss": 0.0025, + "step": 111990 + }, + { + "epoch": 0.7183332104042869, + "grad_norm": 0.30712389945983887, + "learning_rate": 8.089765593402192e-06, + "loss": 0.0024, + "step": 112000 + }, + { + "epoch": 0.718397347298073, + "grad_norm": 0.3034997284412384, + "learning_rate": 8.089325526885534e-06, + "loss": 0.006, + "step": 112010 + }, + { + "epoch": 0.7184614841918591, + "grad_norm": 0.2254328578710556, + "learning_rate": 8.08888542165742e-06, + "loss": 0.0028, + "step": 112020 + }, + { + "epoch": 0.7185256210856452, + "grad_norm": 0.16973623633384705, + "learning_rate": 8.088445277723362e-06, + "loss": 0.0026, + "step": 112030 + }, + { + "epoch": 0.7185897579794313, + "grad_norm": 0.13380980491638184, + "learning_rate": 8.08800509508888e-06, + "loss": 0.0021, + "step": 112040 + }, + { + "epoch": 0.7186538948732174, + "grad_norm": 0.139179527759552, + "learning_rate": 8.087564873759486e-06, + "loss": 0.0025, + "step": 112050 + }, + { + "epoch": 0.7187180317670034, + "grad_norm": 0.06634630262851715, + "learning_rate": 8.087124613740698e-06, + "loss": 0.0041, + "step": 112060 + }, + { + "epoch": 0.7187821686607896, + "grad_norm": 0.30491846799850464, + "learning_rate": 8.086684315038033e-06, + "loss": 0.0032, + "step": 112070 + }, + { + "epoch": 0.7188463055545757, + "grad_norm": 0.026031427085399628, + "learning_rate": 8.086243977657005e-06, + "loss": 0.0028, + "step": 112080 + }, + { + "epoch": 0.7189104424483618, + "grad_norm": 0.19404006004333496, + "learning_rate": 8.085803601603138e-06, + "loss": 0.0027, + "step": 112090 + }, + { + "epoch": 0.7189745793421479, + "grad_norm": 0.06454899907112122, + "learning_rate": 8.085363186881942e-06, + "loss": 0.002, + "step": 112100 + }, + { + "epoch": 0.719038716235934, + "grad_norm": 0.15043289959430695, + "learning_rate": 8.084922733498943e-06, + "loss": 0.0029, + "step": 112110 + }, + { + "epoch": 0.7191028531297201, + "grad_norm": 0.09538527578115463, + "learning_rate": 8.084482241459658e-06, + "loss": 0.0027, + "step": 112120 + }, + { + "epoch": 0.7191669900235061, + "grad_norm": 0.3106303811073303, + "learning_rate": 8.084041710769606e-06, + "loss": 0.0043, + "step": 112130 + }, + { + "epoch": 0.7192311269172923, + "grad_norm": 0.08692137897014618, + "learning_rate": 8.083601141434305e-06, + "loss": 0.0021, + "step": 112140 + }, + { + "epoch": 0.7192952638110783, + "grad_norm": 0.12955030798912048, + "learning_rate": 8.08316053345928e-06, + "loss": 0.0018, + "step": 112150 + }, + { + "epoch": 0.7193594007048645, + "grad_norm": 0.24629080295562744, + "learning_rate": 8.082719886850048e-06, + "loss": 0.0037, + "step": 112160 + }, + { + "epoch": 0.7194235375986505, + "grad_norm": 0.035296481102705, + "learning_rate": 8.082279201612135e-06, + "loss": 0.0033, + "step": 112170 + }, + { + "epoch": 0.7194876744924367, + "grad_norm": 0.13728514313697815, + "learning_rate": 8.08183847775106e-06, + "loss": 0.0037, + "step": 112180 + }, + { + "epoch": 0.7195518113862227, + "grad_norm": 0.2915211021900177, + "learning_rate": 8.081397715272346e-06, + "loss": 0.0028, + "step": 112190 + }, + { + "epoch": 0.7196159482800089, + "grad_norm": 0.13166002929210663, + "learning_rate": 8.080956914181515e-06, + "loss": 0.0044, + "step": 112200 + }, + { + "epoch": 0.719680085173795, + "grad_norm": 0.24949447810649872, + "learning_rate": 8.080516074484093e-06, + "loss": 0.0035, + "step": 112210 + }, + { + "epoch": 0.719744222067581, + "grad_norm": 0.2722225487232208, + "learning_rate": 8.080075196185605e-06, + "loss": 0.003, + "step": 112220 + }, + { + "epoch": 0.7198083589613672, + "grad_norm": 0.03342172130942345, + "learning_rate": 8.07963427929157e-06, + "loss": 0.0033, + "step": 112230 + }, + { + "epoch": 0.7198724958551532, + "grad_norm": 0.0899524912238121, + "learning_rate": 8.079193323807519e-06, + "loss": 0.0039, + "step": 112240 + }, + { + "epoch": 0.7199366327489394, + "grad_norm": 0.016616996377706528, + "learning_rate": 8.078752329738974e-06, + "loss": 0.0016, + "step": 112250 + }, + { + "epoch": 0.7200007696427254, + "grad_norm": 0.10823524743318558, + "learning_rate": 8.078311297091462e-06, + "loss": 0.0034, + "step": 112260 + }, + { + "epoch": 0.7200649065365116, + "grad_norm": 0.28435730934143066, + "learning_rate": 8.07787022587051e-06, + "loss": 0.0041, + "step": 112270 + }, + { + "epoch": 0.7201290434302976, + "grad_norm": 0.09790398925542831, + "learning_rate": 8.077429116081643e-06, + "loss": 0.0059, + "step": 112280 + }, + { + "epoch": 0.7201931803240837, + "grad_norm": 0.20814689993858337, + "learning_rate": 8.07698796773039e-06, + "loss": 0.0034, + "step": 112290 + }, + { + "epoch": 0.7202573172178698, + "grad_norm": 0.2655559182167053, + "learning_rate": 8.076546780822281e-06, + "loss": 0.0024, + "step": 112300 + }, + { + "epoch": 0.7203214541116559, + "grad_norm": 0.25793513655662537, + "learning_rate": 8.07610555536284e-06, + "loss": 0.0045, + "step": 112310 + }, + { + "epoch": 0.720385591005442, + "grad_norm": 0.1332065910100937, + "learning_rate": 8.075664291357598e-06, + "loss": 0.004, + "step": 112320 + }, + { + "epoch": 0.7204497278992281, + "grad_norm": 0.11241687834262848, + "learning_rate": 8.075222988812085e-06, + "loss": 0.0024, + "step": 112330 + }, + { + "epoch": 0.7205138647930142, + "grad_norm": 0.1313232034444809, + "learning_rate": 8.07478164773183e-06, + "loss": 0.0023, + "step": 112340 + }, + { + "epoch": 0.7205780016868003, + "grad_norm": 0.046554699540138245, + "learning_rate": 8.074340268122363e-06, + "loss": 0.002, + "step": 112350 + }, + { + "epoch": 0.7206421385805865, + "grad_norm": 0.10994312912225723, + "learning_rate": 8.073898849989214e-06, + "loss": 0.0019, + "step": 112360 + }, + { + "epoch": 0.7207062754743725, + "grad_norm": 0.09446525573730469, + "learning_rate": 8.073457393337918e-06, + "loss": 0.0024, + "step": 112370 + }, + { + "epoch": 0.7207704123681586, + "grad_norm": 0.5231614708900452, + "learning_rate": 8.073015898174003e-06, + "loss": 0.0024, + "step": 112380 + }, + { + "epoch": 0.7208345492619447, + "grad_norm": 0.1255865842103958, + "learning_rate": 8.072574364503002e-06, + "loss": 0.0023, + "step": 112390 + }, + { + "epoch": 0.7208986861557308, + "grad_norm": 0.1481911838054657, + "learning_rate": 8.07213279233045e-06, + "loss": 0.002, + "step": 112400 + }, + { + "epoch": 0.7209628230495169, + "grad_norm": 0.25029078125953674, + "learning_rate": 8.071691181661879e-06, + "loss": 0.0025, + "step": 112410 + }, + { + "epoch": 0.721026959943303, + "grad_norm": 0.18313568830490112, + "learning_rate": 8.071249532502818e-06, + "loss": 0.0017, + "step": 112420 + }, + { + "epoch": 0.721091096837089, + "grad_norm": 0.3427973687648773, + "learning_rate": 8.070807844858808e-06, + "loss": 0.0046, + "step": 112430 + }, + { + "epoch": 0.7211552337308752, + "grad_norm": 0.10546506941318512, + "learning_rate": 8.070366118735381e-06, + "loss": 0.0018, + "step": 112440 + }, + { + "epoch": 0.7212193706246612, + "grad_norm": 0.12715408205986023, + "learning_rate": 8.069924354138073e-06, + "loss": 0.0026, + "step": 112450 + }, + { + "epoch": 0.7212835075184474, + "grad_norm": 0.1146603673696518, + "learning_rate": 8.069482551072419e-06, + "loss": 0.0025, + "step": 112460 + }, + { + "epoch": 0.7213476444122334, + "grad_norm": 0.07918599247932434, + "learning_rate": 8.069040709543953e-06, + "loss": 0.0016, + "step": 112470 + }, + { + "epoch": 0.7214117813060196, + "grad_norm": 0.24079379439353943, + "learning_rate": 8.068598829558216e-06, + "loss": 0.0017, + "step": 112480 + }, + { + "epoch": 0.7214759181998056, + "grad_norm": 0.041503965854644775, + "learning_rate": 8.06815691112074e-06, + "loss": 0.0017, + "step": 112490 + }, + { + "epoch": 0.7215400550935918, + "grad_norm": 0.04950868710875511, + "learning_rate": 8.067714954237066e-06, + "loss": 0.0029, + "step": 112500 + }, + { + "epoch": 0.7216041919873779, + "grad_norm": 0.2447650134563446, + "learning_rate": 8.067272958912732e-06, + "loss": 0.0034, + "step": 112510 + }, + { + "epoch": 0.721668328881164, + "grad_norm": 0.07529481500387192, + "learning_rate": 8.066830925153276e-06, + "loss": 0.0026, + "step": 112520 + }, + { + "epoch": 0.7217324657749501, + "grad_norm": 0.17679598927497864, + "learning_rate": 8.066388852964235e-06, + "loss": 0.003, + "step": 112530 + }, + { + "epoch": 0.7217966026687361, + "grad_norm": 0.05174412950873375, + "learning_rate": 8.06594674235115e-06, + "loss": 0.002, + "step": 112540 + }, + { + "epoch": 0.7218607395625223, + "grad_norm": 0.09445449709892273, + "learning_rate": 8.065504593319561e-06, + "loss": 0.0034, + "step": 112550 + }, + { + "epoch": 0.7219248764563083, + "grad_norm": 0.1533842384815216, + "learning_rate": 8.065062405875011e-06, + "loss": 0.0056, + "step": 112560 + }, + { + "epoch": 0.7219890133500945, + "grad_norm": 0.07002757489681244, + "learning_rate": 8.064620180023037e-06, + "loss": 0.002, + "step": 112570 + }, + { + "epoch": 0.7220531502438805, + "grad_norm": 0.05978749319911003, + "learning_rate": 8.064177915769182e-06, + "loss": 0.0019, + "step": 112580 + }, + { + "epoch": 0.7221172871376667, + "grad_norm": 0.14716807007789612, + "learning_rate": 8.063735613118988e-06, + "loss": 0.0026, + "step": 112590 + }, + { + "epoch": 0.7221814240314527, + "grad_norm": 0.015161024406552315, + "learning_rate": 8.063293272077996e-06, + "loss": 0.003, + "step": 112600 + }, + { + "epoch": 0.7222455609252388, + "grad_norm": 0.23610588908195496, + "learning_rate": 8.062850892651752e-06, + "loss": 0.0028, + "step": 112610 + }, + { + "epoch": 0.7223096978190249, + "grad_norm": 0.1815919280052185, + "learning_rate": 8.062408474845796e-06, + "loss": 0.0025, + "step": 112620 + }, + { + "epoch": 0.722373834712811, + "grad_norm": 0.19692429900169373, + "learning_rate": 8.061966018665672e-06, + "loss": 0.0037, + "step": 112630 + }, + { + "epoch": 0.7224379716065972, + "grad_norm": 0.16444410383701324, + "learning_rate": 8.061523524116927e-06, + "loss": 0.005, + "step": 112640 + }, + { + "epoch": 0.7225021085003832, + "grad_norm": 0.11123505234718323, + "learning_rate": 8.061080991205104e-06, + "loss": 0.0019, + "step": 112650 + }, + { + "epoch": 0.7225662453941694, + "grad_norm": 0.11508255451917648, + "learning_rate": 8.06063841993575e-06, + "loss": 0.0027, + "step": 112660 + }, + { + "epoch": 0.7226303822879554, + "grad_norm": 0.06001582741737366, + "learning_rate": 8.060195810314407e-06, + "loss": 0.0047, + "step": 112670 + }, + { + "epoch": 0.7226945191817415, + "grad_norm": 0.763580322265625, + "learning_rate": 8.059753162346624e-06, + "loss": 0.0057, + "step": 112680 + }, + { + "epoch": 0.7227586560755276, + "grad_norm": 0.34137997031211853, + "learning_rate": 8.059310476037947e-06, + "loss": 0.0038, + "step": 112690 + }, + { + "epoch": 0.7228227929693137, + "grad_norm": 0.21813026070594788, + "learning_rate": 8.058867751393923e-06, + "loss": 0.0043, + "step": 112700 + }, + { + "epoch": 0.7228869298630998, + "grad_norm": 0.23461997509002686, + "learning_rate": 8.058424988420101e-06, + "loss": 0.0034, + "step": 112710 + }, + { + "epoch": 0.7229510667568859, + "grad_norm": 0.16151976585388184, + "learning_rate": 8.057982187122027e-06, + "loss": 0.004, + "step": 112720 + }, + { + "epoch": 0.723015203650672, + "grad_norm": 0.09632989764213562, + "learning_rate": 8.057539347505252e-06, + "loss": 0.0033, + "step": 112730 + }, + { + "epoch": 0.7230793405444581, + "grad_norm": 0.2594684958457947, + "learning_rate": 8.057096469575324e-06, + "loss": 0.0023, + "step": 112740 + }, + { + "epoch": 0.7231434774382441, + "grad_norm": 0.1471938192844391, + "learning_rate": 8.05665355333779e-06, + "loss": 0.0052, + "step": 112750 + }, + { + "epoch": 0.7232076143320303, + "grad_norm": 0.1528429388999939, + "learning_rate": 8.056210598798204e-06, + "loss": 0.0022, + "step": 112760 + }, + { + "epoch": 0.7232717512258163, + "grad_norm": 0.14047491550445557, + "learning_rate": 8.055767605962115e-06, + "loss": 0.003, + "step": 112770 + }, + { + "epoch": 0.7233358881196025, + "grad_norm": 0.10037308186292648, + "learning_rate": 8.055324574835072e-06, + "loss": 0.0025, + "step": 112780 + }, + { + "epoch": 0.7234000250133886, + "grad_norm": 0.05584083870053291, + "learning_rate": 8.054881505422631e-06, + "loss": 0.0032, + "step": 112790 + }, + { + "epoch": 0.7234641619071747, + "grad_norm": 0.18206900358200073, + "learning_rate": 8.05443839773034e-06, + "loss": 0.0024, + "step": 112800 + }, + { + "epoch": 0.7235282988009608, + "grad_norm": 0.01469647977501154, + "learning_rate": 8.053995251763753e-06, + "loss": 0.0038, + "step": 112810 + }, + { + "epoch": 0.7235924356947468, + "grad_norm": 0.021825265139341354, + "learning_rate": 8.053552067528423e-06, + "loss": 0.0036, + "step": 112820 + }, + { + "epoch": 0.723656572588533, + "grad_norm": 0.15762962400913239, + "learning_rate": 8.053108845029905e-06, + "loss": 0.0019, + "step": 112830 + }, + { + "epoch": 0.723720709482319, + "grad_norm": 0.08991697430610657, + "learning_rate": 8.05266558427375e-06, + "loss": 0.0032, + "step": 112840 + }, + { + "epoch": 0.7237848463761052, + "grad_norm": 0.09897933900356293, + "learning_rate": 8.052222285265512e-06, + "loss": 0.0024, + "step": 112850 + }, + { + "epoch": 0.7238489832698912, + "grad_norm": 0.03703915700316429, + "learning_rate": 8.05177894801075e-06, + "loss": 0.0047, + "step": 112860 + }, + { + "epoch": 0.7239131201636774, + "grad_norm": 0.20504747331142426, + "learning_rate": 8.051335572515013e-06, + "loss": 0.0022, + "step": 112870 + }, + { + "epoch": 0.7239772570574634, + "grad_norm": 0.22929313778877258, + "learning_rate": 8.050892158783862e-06, + "loss": 0.0031, + "step": 112880 + }, + { + "epoch": 0.7240413939512496, + "grad_norm": 0.21101884543895721, + "learning_rate": 8.050448706822855e-06, + "loss": 0.0045, + "step": 112890 + }, + { + "epoch": 0.7241055308450356, + "grad_norm": 0.062430642545223236, + "learning_rate": 8.050005216637543e-06, + "loss": 0.0027, + "step": 112900 + }, + { + "epoch": 0.7241696677388217, + "grad_norm": 0.035764604806900024, + "learning_rate": 8.049561688233485e-06, + "loss": 0.0026, + "step": 112910 + }, + { + "epoch": 0.7242338046326079, + "grad_norm": 0.052385181188583374, + "learning_rate": 8.049118121616242e-06, + "loss": 0.0023, + "step": 112920 + }, + { + "epoch": 0.7242979415263939, + "grad_norm": 0.02150088921189308, + "learning_rate": 8.048674516791368e-06, + "loss": 0.0021, + "step": 112930 + }, + { + "epoch": 0.7243620784201801, + "grad_norm": 0.026390841230750084, + "learning_rate": 8.048230873764422e-06, + "loss": 0.0018, + "step": 112940 + }, + { + "epoch": 0.7244262153139661, + "grad_norm": 0.19471149146556854, + "learning_rate": 8.047787192540967e-06, + "loss": 0.0041, + "step": 112950 + }, + { + "epoch": 0.7244903522077523, + "grad_norm": 0.12749211490154266, + "learning_rate": 8.047343473126559e-06, + "loss": 0.0037, + "step": 112960 + }, + { + "epoch": 0.7245544891015383, + "grad_norm": 0.07260242104530334, + "learning_rate": 8.046899715526762e-06, + "loss": 0.0027, + "step": 112970 + }, + { + "epoch": 0.7246186259953245, + "grad_norm": 0.13378237187862396, + "learning_rate": 8.046455919747131e-06, + "loss": 0.0017, + "step": 112980 + }, + { + "epoch": 0.7246827628891105, + "grad_norm": 0.15227557718753815, + "learning_rate": 8.046012085793231e-06, + "loss": 0.0022, + "step": 112990 + }, + { + "epoch": 0.7247468997828966, + "grad_norm": 0.11496797949075699, + "learning_rate": 8.045568213670623e-06, + "loss": 0.0024, + "step": 113000 + }, + { + "epoch": 0.7248110366766827, + "grad_norm": 0.05050666630268097, + "learning_rate": 8.045124303384868e-06, + "loss": 0.0017, + "step": 113010 + }, + { + "epoch": 0.7248751735704688, + "grad_norm": 0.042164478451013565, + "learning_rate": 8.04468035494153e-06, + "loss": 0.0018, + "step": 113020 + }, + { + "epoch": 0.7249393104642549, + "grad_norm": 0.09713324904441833, + "learning_rate": 8.04423636834617e-06, + "loss": 0.0047, + "step": 113030 + }, + { + "epoch": 0.725003447358041, + "grad_norm": 0.05479344353079796, + "learning_rate": 8.043792343604354e-06, + "loss": 0.0027, + "step": 113040 + }, + { + "epoch": 0.725067584251827, + "grad_norm": 0.029082629829645157, + "learning_rate": 8.043348280721643e-06, + "loss": 0.0036, + "step": 113050 + }, + { + "epoch": 0.7251317211456132, + "grad_norm": 0.09260375797748566, + "learning_rate": 8.042904179703605e-06, + "loss": 0.002, + "step": 113060 + }, + { + "epoch": 0.7251958580393993, + "grad_norm": 0.1256377398967743, + "learning_rate": 8.042460040555802e-06, + "loss": 0.0034, + "step": 113070 + }, + { + "epoch": 0.7252599949331854, + "grad_norm": 0.07848475128412247, + "learning_rate": 8.042015863283799e-06, + "loss": 0.0025, + "step": 113080 + }, + { + "epoch": 0.7253241318269715, + "grad_norm": 0.0060674287378787994, + "learning_rate": 8.041571647893165e-06, + "loss": 0.002, + "step": 113090 + }, + { + "epoch": 0.7253882687207576, + "grad_norm": 0.12943817675113678, + "learning_rate": 8.041127394389462e-06, + "loss": 0.0026, + "step": 113100 + }, + { + "epoch": 0.7254524056145437, + "grad_norm": 0.019311005249619484, + "learning_rate": 8.040683102778262e-06, + "loss": 0.0019, + "step": 113110 + }, + { + "epoch": 0.7255165425083298, + "grad_norm": 0.23863111436367035, + "learning_rate": 8.040238773065128e-06, + "loss": 0.0032, + "step": 113120 + }, + { + "epoch": 0.7255806794021159, + "grad_norm": 0.17583878338336945, + "learning_rate": 8.03979440525563e-06, + "loss": 0.0029, + "step": 113130 + }, + { + "epoch": 0.7256448162959019, + "grad_norm": 0.2913358211517334, + "learning_rate": 8.039349999355336e-06, + "loss": 0.0042, + "step": 113140 + }, + { + "epoch": 0.7257089531896881, + "grad_norm": 0.022446373477578163, + "learning_rate": 8.038905555369814e-06, + "loss": 0.0024, + "step": 113150 + }, + { + "epoch": 0.7257730900834741, + "grad_norm": 0.1001579761505127, + "learning_rate": 8.038461073304633e-06, + "loss": 0.0024, + "step": 113160 + }, + { + "epoch": 0.7258372269772603, + "grad_norm": 0.09300924092531204, + "learning_rate": 8.038016553165363e-06, + "loss": 0.0048, + "step": 113170 + }, + { + "epoch": 0.7259013638710463, + "grad_norm": 0.09971919655799866, + "learning_rate": 8.037571994957576e-06, + "loss": 0.0013, + "step": 113180 + }, + { + "epoch": 0.7259655007648325, + "grad_norm": 0.1383122205734253, + "learning_rate": 8.037127398686838e-06, + "loss": 0.0025, + "step": 113190 + }, + { + "epoch": 0.7260296376586186, + "grad_norm": 0.179524227976799, + "learning_rate": 8.036682764358725e-06, + "loss": 0.002, + "step": 113200 + }, + { + "epoch": 0.7260937745524046, + "grad_norm": 0.11412809044122696, + "learning_rate": 8.036238091978808e-06, + "loss": 0.0043, + "step": 113210 + }, + { + "epoch": 0.7261579114461908, + "grad_norm": 0.08796469867229462, + "learning_rate": 8.035793381552655e-06, + "loss": 0.0017, + "step": 113220 + }, + { + "epoch": 0.7262220483399768, + "grad_norm": 0.05354146286845207, + "learning_rate": 8.035348633085842e-06, + "loss": 0.002, + "step": 113230 + }, + { + "epoch": 0.726286185233763, + "grad_norm": 0.13761520385742188, + "learning_rate": 8.034903846583942e-06, + "loss": 0.0146, + "step": 113240 + }, + { + "epoch": 0.726350322127549, + "grad_norm": 3.3249149322509766, + "learning_rate": 8.034459022052527e-06, + "loss": 0.0147, + "step": 113250 + }, + { + "epoch": 0.7264144590213352, + "grad_norm": 0.0731549933552742, + "learning_rate": 8.034014159497174e-06, + "loss": 0.0027, + "step": 113260 + }, + { + "epoch": 0.7264785959151212, + "grad_norm": 0.32172027230262756, + "learning_rate": 8.033569258923453e-06, + "loss": 0.0031, + "step": 113270 + }, + { + "epoch": 0.7265427328089074, + "grad_norm": 0.18017232418060303, + "learning_rate": 8.033124320336942e-06, + "loss": 0.0066, + "step": 113280 + }, + { + "epoch": 0.7266068697026934, + "grad_norm": 0.08678385615348816, + "learning_rate": 8.032679343743215e-06, + "loss": 0.0018, + "step": 113290 + }, + { + "epoch": 0.7266710065964795, + "grad_norm": 0.17082479596138, + "learning_rate": 8.032234329147849e-06, + "loss": 0.0031, + "step": 113300 + }, + { + "epoch": 0.7267351434902656, + "grad_norm": 0.017062757164239883, + "learning_rate": 8.031789276556418e-06, + "loss": 0.0027, + "step": 113310 + }, + { + "epoch": 0.7267992803840517, + "grad_norm": 0.38885724544525146, + "learning_rate": 8.031344185974504e-06, + "loss": 0.0017, + "step": 113320 + }, + { + "epoch": 0.7268634172778378, + "grad_norm": 0.08904733508825302, + "learning_rate": 8.030899057407679e-06, + "loss": 0.003, + "step": 113330 + }, + { + "epoch": 0.7269275541716239, + "grad_norm": 0.07714398205280304, + "learning_rate": 8.030453890861522e-06, + "loss": 0.0022, + "step": 113340 + }, + { + "epoch": 0.7269916910654101, + "grad_norm": 0.206559419631958, + "learning_rate": 8.030008686341613e-06, + "loss": 0.0018, + "step": 113350 + }, + { + "epoch": 0.7270558279591961, + "grad_norm": 0.21958573162555695, + "learning_rate": 8.029563443853529e-06, + "loss": 0.0032, + "step": 113360 + }, + { + "epoch": 0.7271199648529822, + "grad_norm": 0.12396115809679031, + "learning_rate": 8.029118163402849e-06, + "loss": 0.0028, + "step": 113370 + }, + { + "epoch": 0.7271841017467683, + "grad_norm": 0.1274358332157135, + "learning_rate": 8.028672844995155e-06, + "loss": 0.003, + "step": 113380 + }, + { + "epoch": 0.7272482386405544, + "grad_norm": 0.13518588244915009, + "learning_rate": 8.028227488636024e-06, + "loss": 0.0015, + "step": 113390 + }, + { + "epoch": 0.7273123755343405, + "grad_norm": 0.11445048451423645, + "learning_rate": 8.027782094331041e-06, + "loss": 0.0042, + "step": 113400 + }, + { + "epoch": 0.7273765124281266, + "grad_norm": 0.10959281027317047, + "learning_rate": 8.027336662085784e-06, + "loss": 0.0059, + "step": 113410 + }, + { + "epoch": 0.7274406493219127, + "grad_norm": 0.06854724884033203, + "learning_rate": 8.026891191905832e-06, + "loss": 0.003, + "step": 113420 + }, + { + "epoch": 0.7275047862156988, + "grad_norm": 0.15871262550354004, + "learning_rate": 8.026445683796772e-06, + "loss": 0.0026, + "step": 113430 + }, + { + "epoch": 0.7275689231094848, + "grad_norm": 0.058151207864284515, + "learning_rate": 8.026000137764185e-06, + "loss": 0.0024, + "step": 113440 + }, + { + "epoch": 0.727633060003271, + "grad_norm": 0.19693368673324585, + "learning_rate": 8.025554553813654e-06, + "loss": 0.0028, + "step": 113450 + }, + { + "epoch": 0.727697196897057, + "grad_norm": 0.15146328508853912, + "learning_rate": 8.02510893195076e-06, + "loss": 0.0017, + "step": 113460 + }, + { + "epoch": 0.7277613337908432, + "grad_norm": 0.023606721311807632, + "learning_rate": 8.024663272181092e-06, + "loss": 0.0025, + "step": 113470 + }, + { + "epoch": 0.7278254706846293, + "grad_norm": 0.15048794448375702, + "learning_rate": 8.024217574510229e-06, + "loss": 0.003, + "step": 113480 + }, + { + "epoch": 0.7278896075784154, + "grad_norm": 0.10539175570011139, + "learning_rate": 8.02377183894376e-06, + "loss": 0.003, + "step": 113490 + }, + { + "epoch": 0.7279537444722015, + "grad_norm": 0.2851220667362213, + "learning_rate": 8.023326065487267e-06, + "loss": 0.0032, + "step": 113500 + }, + { + "epoch": 0.7280178813659876, + "grad_norm": 0.11118152737617493, + "learning_rate": 8.02288025414634e-06, + "loss": 0.0023, + "step": 113510 + }, + { + "epoch": 0.7280820182597737, + "grad_norm": 0.08770406246185303, + "learning_rate": 8.022434404926563e-06, + "loss": 0.0021, + "step": 113520 + }, + { + "epoch": 0.7281461551535597, + "grad_norm": 0.1460132896900177, + "learning_rate": 8.02198851783352e-06, + "loss": 0.0024, + "step": 113530 + }, + { + "epoch": 0.7282102920473459, + "grad_norm": 0.1536615639925003, + "learning_rate": 8.021542592872804e-06, + "loss": 0.0023, + "step": 113540 + }, + { + "epoch": 0.7282744289411319, + "grad_norm": 0.07052811980247498, + "learning_rate": 8.021096630049999e-06, + "loss": 0.0021, + "step": 113550 + }, + { + "epoch": 0.7283385658349181, + "grad_norm": 0.07707761973142624, + "learning_rate": 8.020650629370692e-06, + "loss": 0.0033, + "step": 113560 + }, + { + "epoch": 0.7284027027287041, + "grad_norm": 0.19061174988746643, + "learning_rate": 8.020204590840474e-06, + "loss": 0.0025, + "step": 113570 + }, + { + "epoch": 0.7284668396224903, + "grad_norm": 0.023339703679084778, + "learning_rate": 8.019758514464936e-06, + "loss": 0.0043, + "step": 113580 + }, + { + "epoch": 0.7285309765162763, + "grad_norm": 0.1458740234375, + "learning_rate": 8.019312400249665e-06, + "loss": 0.0026, + "step": 113590 + }, + { + "epoch": 0.7285951134100624, + "grad_norm": 0.1815797984600067, + "learning_rate": 8.018866248200253e-06, + "loss": 0.0042, + "step": 113600 + }, + { + "epoch": 0.7286592503038485, + "grad_norm": 0.09067380428314209, + "learning_rate": 8.018420058322288e-06, + "loss": 0.0026, + "step": 113610 + }, + { + "epoch": 0.7287233871976346, + "grad_norm": 0.03138352185487747, + "learning_rate": 8.017973830621363e-06, + "loss": 0.0031, + "step": 113620 + }, + { + "epoch": 0.7287875240914208, + "grad_norm": 0.11717572808265686, + "learning_rate": 8.017527565103068e-06, + "loss": 0.0025, + "step": 113630 + }, + { + "epoch": 0.7288516609852068, + "grad_norm": 0.12940505146980286, + "learning_rate": 8.017081261772996e-06, + "loss": 0.0051, + "step": 113640 + }, + { + "epoch": 0.728915797878993, + "grad_norm": 0.1723572462797165, + "learning_rate": 8.016634920636741e-06, + "loss": 0.0037, + "step": 113650 + }, + { + "epoch": 0.728979934772779, + "grad_norm": 0.07741840928792953, + "learning_rate": 8.016188541699894e-06, + "loss": 0.0044, + "step": 113660 + }, + { + "epoch": 0.7290440716665652, + "grad_norm": 0.018096577376127243, + "learning_rate": 8.015742124968048e-06, + "loss": 0.0023, + "step": 113670 + }, + { + "epoch": 0.7291082085603512, + "grad_norm": 0.10388552397489548, + "learning_rate": 8.015295670446799e-06, + "loss": 0.0058, + "step": 113680 + }, + { + "epoch": 0.7291723454541373, + "grad_norm": 0.1819618195295334, + "learning_rate": 8.01484917814174e-06, + "loss": 0.0025, + "step": 113690 + }, + { + "epoch": 0.7292364823479234, + "grad_norm": 0.003572376910597086, + "learning_rate": 8.014402648058465e-06, + "loss": 0.0016, + "step": 113700 + }, + { + "epoch": 0.7293006192417095, + "grad_norm": 0.054669469594955444, + "learning_rate": 8.013956080202571e-06, + "loss": 0.0023, + "step": 113710 + }, + { + "epoch": 0.7293647561354956, + "grad_norm": 0.09258153289556503, + "learning_rate": 8.013509474579653e-06, + "loss": 0.0068, + "step": 113720 + }, + { + "epoch": 0.7294288930292817, + "grad_norm": 0.17361032962799072, + "learning_rate": 8.013062831195309e-06, + "loss": 0.0027, + "step": 113730 + }, + { + "epoch": 0.7294930299230677, + "grad_norm": 0.165913388133049, + "learning_rate": 8.012616150055133e-06, + "loss": 0.0028, + "step": 113740 + }, + { + "epoch": 0.7295571668168539, + "grad_norm": 0.33771705627441406, + "learning_rate": 8.012169431164725e-06, + "loss": 0.006, + "step": 113750 + }, + { + "epoch": 0.7296213037106399, + "grad_norm": 0.06362960487604141, + "learning_rate": 8.01172267452968e-06, + "loss": 0.0036, + "step": 113760 + }, + { + "epoch": 0.7296854406044261, + "grad_norm": 0.06443426012992859, + "learning_rate": 8.011275880155599e-06, + "loss": 0.002, + "step": 113770 + }, + { + "epoch": 0.7297495774982122, + "grad_norm": 0.14011427760124207, + "learning_rate": 8.010829048048075e-06, + "loss": 0.0024, + "step": 113780 + }, + { + "epoch": 0.7298137143919983, + "grad_norm": 0.165731742978096, + "learning_rate": 8.010382178212714e-06, + "loss": 0.0013, + "step": 113790 + }, + { + "epoch": 0.7298778512857844, + "grad_norm": 0.04552525281906128, + "learning_rate": 8.009935270655114e-06, + "loss": 0.002, + "step": 113800 + }, + { + "epoch": 0.7299419881795705, + "grad_norm": 0.1588314175605774, + "learning_rate": 8.009488325380872e-06, + "loss": 0.0055, + "step": 113810 + }, + { + "epoch": 0.7300061250733566, + "grad_norm": 0.045454684644937515, + "learning_rate": 8.00904134239559e-06, + "loss": 0.0016, + "step": 113820 + }, + { + "epoch": 0.7300702619671426, + "grad_norm": 0.25208306312561035, + "learning_rate": 8.008594321704872e-06, + "loss": 0.0024, + "step": 113830 + }, + { + "epoch": 0.7301343988609288, + "grad_norm": 0.08057577162981033, + "learning_rate": 8.008147263314316e-06, + "loss": 0.002, + "step": 113840 + }, + { + "epoch": 0.7301985357547148, + "grad_norm": 0.11513067781925201, + "learning_rate": 8.007700167229525e-06, + "loss": 0.0016, + "step": 113850 + }, + { + "epoch": 0.730262672648501, + "grad_norm": 0.16625617444515228, + "learning_rate": 8.007253033456099e-06, + "loss": 0.0022, + "step": 113860 + }, + { + "epoch": 0.730326809542287, + "grad_norm": 0.11144071072340012, + "learning_rate": 8.006805861999645e-06, + "loss": 0.0025, + "step": 113870 + }, + { + "epoch": 0.7303909464360732, + "grad_norm": 0.12607555091381073, + "learning_rate": 8.006358652865764e-06, + "loss": 0.0046, + "step": 113880 + }, + { + "epoch": 0.7304550833298592, + "grad_norm": 0.08143970370292664, + "learning_rate": 8.005911406060062e-06, + "loss": 0.002, + "step": 113890 + }, + { + "epoch": 0.7305192202236453, + "grad_norm": 0.18797878921031952, + "learning_rate": 8.005464121588142e-06, + "loss": 0.0022, + "step": 113900 + }, + { + "epoch": 0.7305833571174315, + "grad_norm": 0.13115228712558746, + "learning_rate": 8.005016799455607e-06, + "loss": 0.0029, + "step": 113910 + }, + { + "epoch": 0.7306474940112175, + "grad_norm": 0.09957197308540344, + "learning_rate": 8.004569439668064e-06, + "loss": 0.0022, + "step": 113920 + }, + { + "epoch": 0.7307116309050037, + "grad_norm": 0.055061738938093185, + "learning_rate": 8.004122042231118e-06, + "loss": 0.0036, + "step": 113930 + }, + { + "epoch": 0.7307757677987897, + "grad_norm": 0.2100890427827835, + "learning_rate": 8.003674607150377e-06, + "loss": 0.0025, + "step": 113940 + }, + { + "epoch": 0.7308399046925759, + "grad_norm": 0.1487421691417694, + "learning_rate": 8.003227134431446e-06, + "loss": 0.0041, + "step": 113950 + }, + { + "epoch": 0.7309040415863619, + "grad_norm": 0.09251925349235535, + "learning_rate": 8.002779624079933e-06, + "loss": 0.0022, + "step": 113960 + }, + { + "epoch": 0.7309681784801481, + "grad_norm": 0.08976790308952332, + "learning_rate": 8.002332076101445e-06, + "loss": 0.0017, + "step": 113970 + }, + { + "epoch": 0.7310323153739341, + "grad_norm": 0.11529594659805298, + "learning_rate": 8.00188449050159e-06, + "loss": 0.0028, + "step": 113980 + }, + { + "epoch": 0.7310964522677202, + "grad_norm": 0.09520257264375687, + "learning_rate": 8.001436867285977e-06, + "loss": 0.0019, + "step": 113990 + }, + { + "epoch": 0.7311605891615063, + "grad_norm": 0.02042161114513874, + "learning_rate": 8.000989206460215e-06, + "loss": 0.003, + "step": 114000 + }, + { + "epoch": 0.7312247260552924, + "grad_norm": 0.06751138716936111, + "learning_rate": 8.000541508029914e-06, + "loss": 0.0016, + "step": 114010 + }, + { + "epoch": 0.7312888629490785, + "grad_norm": 0.17682015895843506, + "learning_rate": 8.000093772000682e-06, + "loss": 0.003, + "step": 114020 + }, + { + "epoch": 0.7313529998428646, + "grad_norm": 0.12699271738529205, + "learning_rate": 7.999645998378133e-06, + "loss": 0.0037, + "step": 114030 + }, + { + "epoch": 0.7314171367366507, + "grad_norm": 0.10249500721693039, + "learning_rate": 7.999198187167875e-06, + "loss": 0.0094, + "step": 114040 + }, + { + "epoch": 0.7314812736304368, + "grad_norm": 0.019974878057837486, + "learning_rate": 7.99875033837552e-06, + "loss": 0.0018, + "step": 114050 + }, + { + "epoch": 0.731545410524223, + "grad_norm": 0.12484804540872574, + "learning_rate": 7.99830245200668e-06, + "loss": 0.0034, + "step": 114060 + }, + { + "epoch": 0.731609547418009, + "grad_norm": 0.27671071887016296, + "learning_rate": 7.997854528066968e-06, + "loss": 0.0058, + "step": 114070 + }, + { + "epoch": 0.7316736843117951, + "grad_norm": 0.08566673845052719, + "learning_rate": 7.997406566561996e-06, + "loss": 0.0032, + "step": 114080 + }, + { + "epoch": 0.7317378212055812, + "grad_norm": 0.03367238491773605, + "learning_rate": 7.996958567497377e-06, + "loss": 0.0013, + "step": 114090 + }, + { + "epoch": 0.7318019580993673, + "grad_norm": 0.11499220132827759, + "learning_rate": 7.996510530878726e-06, + "loss": 0.0014, + "step": 114100 + }, + { + "epoch": 0.7318660949931534, + "grad_norm": 0.10906586796045303, + "learning_rate": 7.996062456711656e-06, + "loss": 0.0025, + "step": 114110 + }, + { + "epoch": 0.7319302318869395, + "grad_norm": 0.10528113692998886, + "learning_rate": 7.995614345001783e-06, + "loss": 0.0021, + "step": 114120 + }, + { + "epoch": 0.7319943687807255, + "grad_norm": 0.014887388795614243, + "learning_rate": 7.99516619575472e-06, + "loss": 0.0018, + "step": 114130 + }, + { + "epoch": 0.7320585056745117, + "grad_norm": 0.171325221657753, + "learning_rate": 7.994718008976085e-06, + "loss": 0.003, + "step": 114140 + }, + { + "epoch": 0.7321226425682977, + "grad_norm": 0.05207236483693123, + "learning_rate": 7.99426978467149e-06, + "loss": 0.0017, + "step": 114150 + }, + { + "epoch": 0.7321867794620839, + "grad_norm": 0.4033132791519165, + "learning_rate": 7.993821522846559e-06, + "loss": 0.0026, + "step": 114160 + }, + { + "epoch": 0.7322509163558699, + "grad_norm": 0.43490123748779297, + "learning_rate": 7.993373223506904e-06, + "loss": 0.003, + "step": 114170 + }, + { + "epoch": 0.7323150532496561, + "grad_norm": 0.24397335946559906, + "learning_rate": 7.992924886658141e-06, + "loss": 0.002, + "step": 114180 + }, + { + "epoch": 0.7323791901434422, + "grad_norm": 0.04103788733482361, + "learning_rate": 7.99247651230589e-06, + "loss": 0.001, + "step": 114190 + }, + { + "epoch": 0.7324433270372283, + "grad_norm": 0.1666671484708786, + "learning_rate": 7.992028100455774e-06, + "loss": 0.0035, + "step": 114200 + }, + { + "epoch": 0.7325074639310144, + "grad_norm": 0.09332185238599777, + "learning_rate": 7.991579651113404e-06, + "loss": 0.0036, + "step": 114210 + }, + { + "epoch": 0.7325716008248004, + "grad_norm": 0.04582349583506584, + "learning_rate": 7.991131164284402e-06, + "loss": 0.0015, + "step": 114220 + }, + { + "epoch": 0.7326357377185866, + "grad_norm": 0.1251188963651657, + "learning_rate": 7.99068263997439e-06, + "loss": 0.0022, + "step": 114230 + }, + { + "epoch": 0.7326998746123726, + "grad_norm": 0.07623518258333206, + "learning_rate": 7.990234078188988e-06, + "loss": 0.003, + "step": 114240 + }, + { + "epoch": 0.7327640115061588, + "grad_norm": 0.019086243584752083, + "learning_rate": 7.989785478933814e-06, + "loss": 0.0018, + "step": 114250 + }, + { + "epoch": 0.7328281483999448, + "grad_norm": 0.11019234359264374, + "learning_rate": 7.989336842214492e-06, + "loss": 0.0025, + "step": 114260 + }, + { + "epoch": 0.732892285293731, + "grad_norm": 0.1383860558271408, + "learning_rate": 7.988888168036644e-06, + "loss": 0.002, + "step": 114270 + }, + { + "epoch": 0.732956422187517, + "grad_norm": 0.2850213646888733, + "learning_rate": 7.98843945640589e-06, + "loss": 0.0018, + "step": 114280 + }, + { + "epoch": 0.7330205590813031, + "grad_norm": 0.06595045328140259, + "learning_rate": 7.987990707327854e-06, + "loss": 0.0028, + "step": 114290 + }, + { + "epoch": 0.7330846959750892, + "grad_norm": 0.15467871725559235, + "learning_rate": 7.98754192080816e-06, + "loss": 0.002, + "step": 114300 + }, + { + "epoch": 0.7331488328688753, + "grad_norm": 0.05410248786211014, + "learning_rate": 7.98709309685243e-06, + "loss": 0.0027, + "step": 114310 + }, + { + "epoch": 0.7332129697626614, + "grad_norm": 0.002955447882413864, + "learning_rate": 7.986644235466287e-06, + "loss": 0.0016, + "step": 114320 + }, + { + "epoch": 0.7332771066564475, + "grad_norm": 0.16507507860660553, + "learning_rate": 7.986195336655359e-06, + "loss": 0.0019, + "step": 114330 + }, + { + "epoch": 0.7333412435502337, + "grad_norm": 0.14873072504997253, + "learning_rate": 7.985746400425268e-06, + "loss": 0.0029, + "step": 114340 + }, + { + "epoch": 0.7334053804440197, + "grad_norm": 0.046916212886571884, + "learning_rate": 7.985297426781641e-06, + "loss": 0.0055, + "step": 114350 + }, + { + "epoch": 0.7334695173378059, + "grad_norm": 0.09673117846250534, + "learning_rate": 7.984848415730104e-06, + "loss": 0.0071, + "step": 114360 + }, + { + "epoch": 0.7335336542315919, + "grad_norm": 0.15769807994365692, + "learning_rate": 7.984399367276283e-06, + "loss": 0.0041, + "step": 114370 + }, + { + "epoch": 0.733597791125378, + "grad_norm": 0.023746896535158157, + "learning_rate": 7.983950281425806e-06, + "loss": 0.0033, + "step": 114380 + }, + { + "epoch": 0.7336619280191641, + "grad_norm": 0.09823162108659744, + "learning_rate": 7.9835011581843e-06, + "loss": 0.0023, + "step": 114390 + }, + { + "epoch": 0.7337260649129502, + "grad_norm": 0.1715027391910553, + "learning_rate": 7.98305199755739e-06, + "loss": 0.0041, + "step": 114400 + }, + { + "epoch": 0.7337902018067363, + "grad_norm": 0.11333558708429337, + "learning_rate": 7.982602799550707e-06, + "loss": 0.0021, + "step": 114410 + }, + { + "epoch": 0.7338543387005224, + "grad_norm": 0.10286201536655426, + "learning_rate": 7.98215356416988e-06, + "loss": 0.0027, + "step": 114420 + }, + { + "epoch": 0.7339184755943085, + "grad_norm": 0.11100887507200241, + "learning_rate": 7.981704291420536e-06, + "loss": 0.0032, + "step": 114430 + }, + { + "epoch": 0.7339826124880946, + "grad_norm": 0.12641538679599762, + "learning_rate": 7.981254981308308e-06, + "loss": 0.0058, + "step": 114440 + }, + { + "epoch": 0.7340467493818806, + "grad_norm": 0.09120380133390427, + "learning_rate": 7.980805633838824e-06, + "loss": 0.0028, + "step": 114450 + }, + { + "epoch": 0.7341108862756668, + "grad_norm": 0.06803891807794571, + "learning_rate": 7.980356249017716e-06, + "loss": 0.0019, + "step": 114460 + }, + { + "epoch": 0.7341750231694529, + "grad_norm": 0.06600677222013474, + "learning_rate": 7.979906826850611e-06, + "loss": 0.0181, + "step": 114470 + }, + { + "epoch": 0.734239160063239, + "grad_norm": 0.29142773151397705, + "learning_rate": 7.979457367343147e-06, + "loss": 0.0025, + "step": 114480 + }, + { + "epoch": 0.7343032969570251, + "grad_norm": 0.13607315719127655, + "learning_rate": 7.979007870500952e-06, + "loss": 0.0036, + "step": 114490 + }, + { + "epoch": 0.7343674338508112, + "grad_norm": 0.09334375709295273, + "learning_rate": 7.978558336329658e-06, + "loss": 0.0023, + "step": 114500 + }, + { + "epoch": 0.7344315707445973, + "grad_norm": 0.011306799948215485, + "learning_rate": 7.978108764834899e-06, + "loss": 0.0019, + "step": 114510 + }, + { + "epoch": 0.7344957076383833, + "grad_norm": 0.03890885412693024, + "learning_rate": 7.977659156022311e-06, + "loss": 0.0024, + "step": 114520 + }, + { + "epoch": 0.7345598445321695, + "grad_norm": 0.022687526419758797, + "learning_rate": 7.977209509897526e-06, + "loss": 0.003, + "step": 114530 + }, + { + "epoch": 0.7346239814259555, + "grad_norm": 0.23136533796787262, + "learning_rate": 7.976759826466177e-06, + "loss": 0.0025, + "step": 114540 + }, + { + "epoch": 0.7346881183197417, + "grad_norm": 0.09535142779350281, + "learning_rate": 7.976310105733899e-06, + "loss": 0.0029, + "step": 114550 + }, + { + "epoch": 0.7347522552135277, + "grad_norm": 0.29435694217681885, + "learning_rate": 7.975860347706328e-06, + "loss": 0.005, + "step": 114560 + }, + { + "epoch": 0.7348163921073139, + "grad_norm": 0.0760713517665863, + "learning_rate": 7.975410552389102e-06, + "loss": 0.002, + "step": 114570 + }, + { + "epoch": 0.7348805290010999, + "grad_norm": 0.14145128428936005, + "learning_rate": 7.974960719787854e-06, + "loss": 0.0037, + "step": 114580 + }, + { + "epoch": 0.734944665894886, + "grad_norm": 0.17666327953338623, + "learning_rate": 7.974510849908221e-06, + "loss": 0.0073, + "step": 114590 + }, + { + "epoch": 0.7350088027886721, + "grad_norm": 0.10045598447322845, + "learning_rate": 7.974060942755844e-06, + "loss": 0.0028, + "step": 114600 + }, + { + "epoch": 0.7350729396824582, + "grad_norm": 0.1442408561706543, + "learning_rate": 7.973610998336356e-06, + "loss": 0.0043, + "step": 114610 + }, + { + "epoch": 0.7351370765762444, + "grad_norm": 0.18976947665214539, + "learning_rate": 7.973161016655397e-06, + "loss": 0.0019, + "step": 114620 + }, + { + "epoch": 0.7352012134700304, + "grad_norm": 0.09054972976446152, + "learning_rate": 7.972710997718606e-06, + "loss": 0.0025, + "step": 114630 + }, + { + "epoch": 0.7352653503638166, + "grad_norm": 0.27474135160446167, + "learning_rate": 7.972260941531621e-06, + "loss": 0.0031, + "step": 114640 + }, + { + "epoch": 0.7353294872576026, + "grad_norm": 0.19614854454994202, + "learning_rate": 7.971810848100083e-06, + "loss": 0.0031, + "step": 114650 + }, + { + "epoch": 0.7353936241513888, + "grad_norm": 0.06520947068929672, + "learning_rate": 7.97136071742963e-06, + "loss": 0.0016, + "step": 114660 + }, + { + "epoch": 0.7354577610451748, + "grad_norm": 0.142005056142807, + "learning_rate": 7.970910549525905e-06, + "loss": 0.0048, + "step": 114670 + }, + { + "epoch": 0.735521897938961, + "grad_norm": 0.10520284622907639, + "learning_rate": 7.970460344394547e-06, + "loss": 0.0051, + "step": 114680 + }, + { + "epoch": 0.735586034832747, + "grad_norm": 0.12937726080417633, + "learning_rate": 7.970010102041198e-06, + "loss": 0.0024, + "step": 114690 + }, + { + "epoch": 0.7356501717265331, + "grad_norm": 0.06415631622076035, + "learning_rate": 7.9695598224715e-06, + "loss": 0.0015, + "step": 114700 + }, + { + "epoch": 0.7357143086203192, + "grad_norm": 0.23039346933364868, + "learning_rate": 7.969109505691095e-06, + "loss": 0.0055, + "step": 114710 + }, + { + "epoch": 0.7357784455141053, + "grad_norm": 0.20240607857704163, + "learning_rate": 7.968659151705625e-06, + "loss": 0.0018, + "step": 114720 + }, + { + "epoch": 0.7358425824078914, + "grad_norm": 0.18377630412578583, + "learning_rate": 7.968208760520734e-06, + "loss": 0.0014, + "step": 114730 + }, + { + "epoch": 0.7359067193016775, + "grad_norm": 0.05094422027468681, + "learning_rate": 7.967758332142066e-06, + "loss": 0.0026, + "step": 114740 + }, + { + "epoch": 0.7359708561954637, + "grad_norm": 0.3360855281352997, + "learning_rate": 7.967307866575266e-06, + "loss": 0.0039, + "step": 114750 + }, + { + "epoch": 0.7360349930892497, + "grad_norm": 0.15867236256599426, + "learning_rate": 7.966857363825978e-06, + "loss": 0.0016, + "step": 114760 + }, + { + "epoch": 0.7360991299830358, + "grad_norm": 0.2619006335735321, + "learning_rate": 7.966406823899846e-06, + "loss": 0.0035, + "step": 114770 + }, + { + "epoch": 0.7361632668768219, + "grad_norm": 0.15900486707687378, + "learning_rate": 7.965956246802518e-06, + "loss": 0.0025, + "step": 114780 + }, + { + "epoch": 0.736227403770608, + "grad_norm": 0.012450681068003178, + "learning_rate": 7.965505632539637e-06, + "loss": 0.002, + "step": 114790 + }, + { + "epoch": 0.7362915406643941, + "grad_norm": 0.11295517534017563, + "learning_rate": 7.96505498111685e-06, + "loss": 0.0025, + "step": 114800 + }, + { + "epoch": 0.7363556775581802, + "grad_norm": 0.121597059071064, + "learning_rate": 7.964604292539807e-06, + "loss": 0.004, + "step": 114810 + }, + { + "epoch": 0.7364198144519662, + "grad_norm": 0.18834513425827026, + "learning_rate": 7.964153566814152e-06, + "loss": 0.0025, + "step": 114820 + }, + { + "epoch": 0.7364839513457524, + "grad_norm": 0.066258504986763, + "learning_rate": 7.963702803945537e-06, + "loss": 0.0019, + "step": 114830 + }, + { + "epoch": 0.7365480882395384, + "grad_norm": 0.12791961431503296, + "learning_rate": 7.963252003939604e-06, + "loss": 0.0017, + "step": 114840 + }, + { + "epoch": 0.7366122251333246, + "grad_norm": 0.13025297224521637, + "learning_rate": 7.962801166802008e-06, + "loss": 0.0037, + "step": 114850 + }, + { + "epoch": 0.7366763620271106, + "grad_norm": 0.07951223105192184, + "learning_rate": 7.962350292538395e-06, + "loss": 0.0019, + "step": 114860 + }, + { + "epoch": 0.7367404989208968, + "grad_norm": 0.09436211735010147, + "learning_rate": 7.961899381154416e-06, + "loss": 0.0062, + "step": 114870 + }, + { + "epoch": 0.7368046358146828, + "grad_norm": 0.017899997532367706, + "learning_rate": 7.961448432655721e-06, + "loss": 0.0036, + "step": 114880 + }, + { + "epoch": 0.736868772708469, + "grad_norm": 0.07869095355272293, + "learning_rate": 7.96099744704796e-06, + "loss": 0.0038, + "step": 114890 + }, + { + "epoch": 0.7369329096022551, + "grad_norm": 0.034353915601968765, + "learning_rate": 7.960546424336786e-06, + "loss": 0.0015, + "step": 114900 + }, + { + "epoch": 0.7369970464960411, + "grad_norm": 0.022068442776799202, + "learning_rate": 7.960095364527849e-06, + "loss": 0.002, + "step": 114910 + }, + { + "epoch": 0.7370611833898273, + "grad_norm": 0.05181076377630234, + "learning_rate": 7.9596442676268e-06, + "loss": 0.0035, + "step": 114920 + }, + { + "epoch": 0.7371253202836133, + "grad_norm": 0.014081995002925396, + "learning_rate": 7.959193133639296e-06, + "loss": 0.0023, + "step": 114930 + }, + { + "epoch": 0.7371894571773995, + "grad_norm": 0.03727160021662712, + "learning_rate": 7.958741962570985e-06, + "loss": 0.0032, + "step": 114940 + }, + { + "epoch": 0.7372535940711855, + "grad_norm": 0.1262127161026001, + "learning_rate": 7.958290754427524e-06, + "loss": 0.0019, + "step": 114950 + }, + { + "epoch": 0.7373177309649717, + "grad_norm": 0.07796236872673035, + "learning_rate": 7.957839509214565e-06, + "loss": 0.0034, + "step": 114960 + }, + { + "epoch": 0.7373818678587577, + "grad_norm": 0.1312754899263382, + "learning_rate": 7.957388226937762e-06, + "loss": 0.0021, + "step": 114970 + }, + { + "epoch": 0.7374460047525438, + "grad_norm": 0.22057336568832397, + "learning_rate": 7.95693690760277e-06, + "loss": 0.0071, + "step": 114980 + }, + { + "epoch": 0.7375101416463299, + "grad_norm": 0.1529206484556198, + "learning_rate": 7.956485551215247e-06, + "loss": 0.0028, + "step": 114990 + }, + { + "epoch": 0.737574278540116, + "grad_norm": 0.17639851570129395, + "learning_rate": 7.956034157780848e-06, + "loss": 0.0029, + "step": 115000 + }, + { + "epoch": 0.7376384154339021, + "grad_norm": 0.18121221661567688, + "learning_rate": 7.955582727305226e-06, + "loss": 0.0026, + "step": 115010 + }, + { + "epoch": 0.7377025523276882, + "grad_norm": 0.15257194638252258, + "learning_rate": 7.955131259794042e-06, + "loss": 0.0034, + "step": 115020 + }, + { + "epoch": 0.7377666892214744, + "grad_norm": 0.2247915416955948, + "learning_rate": 7.954679755252953e-06, + "loss": 0.0033, + "step": 115030 + }, + { + "epoch": 0.7378308261152604, + "grad_norm": 0.1564386785030365, + "learning_rate": 7.954228213687611e-06, + "loss": 0.0024, + "step": 115040 + }, + { + "epoch": 0.7378949630090466, + "grad_norm": 0.04837026074528694, + "learning_rate": 7.95377663510368e-06, + "loss": 0.0029, + "step": 115050 + }, + { + "epoch": 0.7379590999028326, + "grad_norm": 0.26205313205718994, + "learning_rate": 7.953325019506817e-06, + "loss": 0.0041, + "step": 115060 + }, + { + "epoch": 0.7380232367966187, + "grad_norm": 0.15560157597064972, + "learning_rate": 7.95287336690268e-06, + "loss": 0.0019, + "step": 115070 + }, + { + "epoch": 0.7380873736904048, + "grad_norm": 0.06288137286901474, + "learning_rate": 7.952421677296929e-06, + "loss": 0.0041, + "step": 115080 + }, + { + "epoch": 0.7381515105841909, + "grad_norm": 0.05975082889199257, + "learning_rate": 7.951969950695226e-06, + "loss": 0.0022, + "step": 115090 + }, + { + "epoch": 0.738215647477977, + "grad_norm": 0.06963266432285309, + "learning_rate": 7.951518187103228e-06, + "loss": 0.0028, + "step": 115100 + }, + { + "epoch": 0.7382797843717631, + "grad_norm": 0.04305969551205635, + "learning_rate": 7.951066386526599e-06, + "loss": 0.0029, + "step": 115110 + }, + { + "epoch": 0.7383439212655492, + "grad_norm": 0.1279025375843048, + "learning_rate": 7.950614548971e-06, + "loss": 0.0019, + "step": 115120 + }, + { + "epoch": 0.7384080581593353, + "grad_norm": 0.13910166919231415, + "learning_rate": 7.95016267444209e-06, + "loss": 0.0034, + "step": 115130 + }, + { + "epoch": 0.7384721950531213, + "grad_norm": 0.13004015386104584, + "learning_rate": 7.949710762945532e-06, + "loss": 0.0024, + "step": 115140 + }, + { + "epoch": 0.7385363319469075, + "grad_norm": 0.11320220679044724, + "learning_rate": 7.949258814486992e-06, + "loss": 0.0019, + "step": 115150 + }, + { + "epoch": 0.7386004688406935, + "grad_norm": 0.08462710678577423, + "learning_rate": 7.948806829072131e-06, + "loss": 0.0019, + "step": 115160 + }, + { + "epoch": 0.7386646057344797, + "grad_norm": 0.012864558957517147, + "learning_rate": 7.948354806706612e-06, + "loss": 0.0016, + "step": 115170 + }, + { + "epoch": 0.7387287426282658, + "grad_norm": 0.14978507161140442, + "learning_rate": 7.947902747396104e-06, + "loss": 0.0023, + "step": 115180 + }, + { + "epoch": 0.7387928795220519, + "grad_norm": 0.15812519192695618, + "learning_rate": 7.947450651146263e-06, + "loss": 0.0025, + "step": 115190 + }, + { + "epoch": 0.738857016415838, + "grad_norm": 0.08912328630685806, + "learning_rate": 7.946998517962761e-06, + "loss": 0.0027, + "step": 115200 + }, + { + "epoch": 0.738921153309624, + "grad_norm": 0.1578788161277771, + "learning_rate": 7.946546347851261e-06, + "loss": 0.0025, + "step": 115210 + }, + { + "epoch": 0.7389852902034102, + "grad_norm": 0.021888367831707, + "learning_rate": 7.946094140817429e-06, + "loss": 0.0047, + "step": 115220 + }, + { + "epoch": 0.7390494270971962, + "grad_norm": 0.10366988927125931, + "learning_rate": 7.945641896866932e-06, + "loss": 0.0033, + "step": 115230 + }, + { + "epoch": 0.7391135639909824, + "grad_norm": 0.17833055555820465, + "learning_rate": 7.945189616005437e-06, + "loss": 0.0029, + "step": 115240 + }, + { + "epoch": 0.7391777008847684, + "grad_norm": 0.167525514960289, + "learning_rate": 7.944737298238612e-06, + "loss": 0.002, + "step": 115250 + }, + { + "epoch": 0.7392418377785546, + "grad_norm": 0.1168316900730133, + "learning_rate": 7.944284943572124e-06, + "loss": 0.0026, + "step": 115260 + }, + { + "epoch": 0.7393059746723406, + "grad_norm": 0.08253283053636551, + "learning_rate": 7.94383255201164e-06, + "loss": 0.0022, + "step": 115270 + }, + { + "epoch": 0.7393701115661268, + "grad_norm": 0.20252768695354462, + "learning_rate": 7.943380123562831e-06, + "loss": 0.0035, + "step": 115280 + }, + { + "epoch": 0.7394342484599128, + "grad_norm": 0.07709493488073349, + "learning_rate": 7.942927658231367e-06, + "loss": 0.0033, + "step": 115290 + }, + { + "epoch": 0.7394983853536989, + "grad_norm": 0.06704801321029663, + "learning_rate": 7.942475156022914e-06, + "loss": 0.0015, + "step": 115300 + }, + { + "epoch": 0.739562522247485, + "grad_norm": 0.04719102755188942, + "learning_rate": 7.942022616943145e-06, + "loss": 0.0014, + "step": 115310 + }, + { + "epoch": 0.7396266591412711, + "grad_norm": 0.009879032149910927, + "learning_rate": 7.94157004099773e-06, + "loss": 0.004, + "step": 115320 + }, + { + "epoch": 0.7396907960350573, + "grad_norm": 0.17882804572582245, + "learning_rate": 7.94111742819234e-06, + "loss": 0.0025, + "step": 115330 + }, + { + "epoch": 0.7397549329288433, + "grad_norm": 0.2576644718647003, + "learning_rate": 7.940664778532646e-06, + "loss": 0.0022, + "step": 115340 + }, + { + "epoch": 0.7398190698226295, + "grad_norm": 0.07398247718811035, + "learning_rate": 7.940212092024323e-06, + "loss": 0.0033, + "step": 115350 + }, + { + "epoch": 0.7398832067164155, + "grad_norm": 0.055791664868593216, + "learning_rate": 7.93975936867304e-06, + "loss": 0.0036, + "step": 115360 + }, + { + "epoch": 0.7399473436102016, + "grad_norm": 0.16191360354423523, + "learning_rate": 7.93930660848447e-06, + "loss": 0.0027, + "step": 115370 + }, + { + "epoch": 0.7400114805039877, + "grad_norm": 0.06203453615307808, + "learning_rate": 7.938853811464286e-06, + "loss": 0.0024, + "step": 115380 + }, + { + "epoch": 0.7400756173977738, + "grad_norm": 0.03755979612469673, + "learning_rate": 7.938400977618165e-06, + "loss": 0.0012, + "step": 115390 + }, + { + "epoch": 0.7401397542915599, + "grad_norm": 0.2009858340024948, + "learning_rate": 7.937948106951781e-06, + "loss": 0.0031, + "step": 115400 + }, + { + "epoch": 0.740203891185346, + "grad_norm": 0.004805264063179493, + "learning_rate": 7.937495199470807e-06, + "loss": 0.0022, + "step": 115410 + }, + { + "epoch": 0.7402680280791321, + "grad_norm": 0.06541886925697327, + "learning_rate": 7.937042255180919e-06, + "loss": 0.0032, + "step": 115420 + }, + { + "epoch": 0.7403321649729182, + "grad_norm": 0.11845489591360092, + "learning_rate": 7.936589274087791e-06, + "loss": 0.0036, + "step": 115430 + }, + { + "epoch": 0.7403963018667042, + "grad_norm": 0.07816637307405472, + "learning_rate": 7.936136256197102e-06, + "loss": 0.0055, + "step": 115440 + }, + { + "epoch": 0.7404604387604904, + "grad_norm": 0.03445274010300636, + "learning_rate": 7.935683201514528e-06, + "loss": 0.0029, + "step": 115450 + }, + { + "epoch": 0.7405245756542765, + "grad_norm": 0.05244622379541397, + "learning_rate": 7.935230110045745e-06, + "loss": 0.0016, + "step": 115460 + }, + { + "epoch": 0.7405887125480626, + "grad_norm": 0.0628136396408081, + "learning_rate": 7.934776981796428e-06, + "loss": 0.003, + "step": 115470 + }, + { + "epoch": 0.7406528494418487, + "grad_norm": 0.11316493898630142, + "learning_rate": 7.93432381677226e-06, + "loss": 0.0021, + "step": 115480 + }, + { + "epoch": 0.7407169863356348, + "grad_norm": 0.10986774414777756, + "learning_rate": 7.933870614978918e-06, + "loss": 0.0032, + "step": 115490 + }, + { + "epoch": 0.7407811232294209, + "grad_norm": 0.1046801283955574, + "learning_rate": 7.93341737642208e-06, + "loss": 0.0024, + "step": 115500 + }, + { + "epoch": 0.740845260123207, + "grad_norm": 0.08572613447904587, + "learning_rate": 7.932964101107426e-06, + "loss": 0.0018, + "step": 115510 + }, + { + "epoch": 0.7409093970169931, + "grad_norm": 0.10648373514413834, + "learning_rate": 7.932510789040635e-06, + "loss": 0.0033, + "step": 115520 + }, + { + "epoch": 0.7409735339107791, + "grad_norm": 0.11053439974784851, + "learning_rate": 7.932057440227387e-06, + "loss": 0.0023, + "step": 115530 + }, + { + "epoch": 0.7410376708045653, + "grad_norm": 0.045133188366889954, + "learning_rate": 7.931604054673366e-06, + "loss": 0.0049, + "step": 115540 + }, + { + "epoch": 0.7411018076983513, + "grad_norm": 0.15804682672023773, + "learning_rate": 7.93115063238425e-06, + "loss": 0.0023, + "step": 115550 + }, + { + "epoch": 0.7411659445921375, + "grad_norm": 0.09051597863435745, + "learning_rate": 7.930697173365722e-06, + "loss": 0.0024, + "step": 115560 + }, + { + "epoch": 0.7412300814859235, + "grad_norm": 0.11107442528009415, + "learning_rate": 7.930243677623464e-06, + "loss": 0.0023, + "step": 115570 + }, + { + "epoch": 0.7412942183797097, + "grad_norm": 0.14368917047977448, + "learning_rate": 7.929790145163159e-06, + "loss": 0.0028, + "step": 115580 + }, + { + "epoch": 0.7413583552734957, + "grad_norm": 0.29199692606925964, + "learning_rate": 7.929336575990489e-06, + "loss": 0.0029, + "step": 115590 + }, + { + "epoch": 0.7414224921672818, + "grad_norm": 0.2326393723487854, + "learning_rate": 7.928882970111138e-06, + "loss": 0.0045, + "step": 115600 + }, + { + "epoch": 0.741486629061068, + "grad_norm": 0.09193108975887299, + "learning_rate": 7.92842932753079e-06, + "loss": 0.0024, + "step": 115610 + }, + { + "epoch": 0.741550765954854, + "grad_norm": 0.06306244432926178, + "learning_rate": 7.927975648255129e-06, + "loss": 0.0046, + "step": 115620 + }, + { + "epoch": 0.7416149028486402, + "grad_norm": 0.09674990922212601, + "learning_rate": 7.927521932289841e-06, + "loss": 0.0024, + "step": 115630 + }, + { + "epoch": 0.7416790397424262, + "grad_norm": 0.15148349106311798, + "learning_rate": 7.92706817964061e-06, + "loss": 0.0025, + "step": 115640 + }, + { + "epoch": 0.7417431766362124, + "grad_norm": 0.10181494802236557, + "learning_rate": 7.926614390313126e-06, + "loss": 0.0023, + "step": 115650 + }, + { + "epoch": 0.7418073135299984, + "grad_norm": 0.239225372672081, + "learning_rate": 7.92616056431307e-06, + "loss": 0.002, + "step": 115660 + }, + { + "epoch": 0.7418714504237846, + "grad_norm": 0.1631385236978531, + "learning_rate": 7.925706701646131e-06, + "loss": 0.002, + "step": 115670 + }, + { + "epoch": 0.7419355873175706, + "grad_norm": 0.11287748068571091, + "learning_rate": 7.925252802317995e-06, + "loss": 0.0059, + "step": 115680 + }, + { + "epoch": 0.7419997242113567, + "grad_norm": 0.17991571128368378, + "learning_rate": 7.924798866334352e-06, + "loss": 0.0028, + "step": 115690 + }, + { + "epoch": 0.7420638611051428, + "grad_norm": 0.13380445539951324, + "learning_rate": 7.924344893700888e-06, + "loss": 0.0029, + "step": 115700 + }, + { + "epoch": 0.7421279979989289, + "grad_norm": 0.15858301520347595, + "learning_rate": 7.923890884423294e-06, + "loss": 0.0029, + "step": 115710 + }, + { + "epoch": 0.742192134892715, + "grad_norm": 0.16018827259540558, + "learning_rate": 7.923436838507257e-06, + "loss": 0.0022, + "step": 115720 + }, + { + "epoch": 0.7422562717865011, + "grad_norm": 0.09589401632547379, + "learning_rate": 7.922982755958466e-06, + "loss": 0.0017, + "step": 115730 + }, + { + "epoch": 0.7423204086802873, + "grad_norm": 0.12657934427261353, + "learning_rate": 7.922528636782613e-06, + "loss": 0.0022, + "step": 115740 + }, + { + "epoch": 0.7423845455740733, + "grad_norm": 0.1356390118598938, + "learning_rate": 7.922074480985386e-06, + "loss": 0.0042, + "step": 115750 + }, + { + "epoch": 0.7424486824678594, + "grad_norm": 0.08364280313253403, + "learning_rate": 7.921620288572479e-06, + "loss": 0.003, + "step": 115760 + }, + { + "epoch": 0.7425128193616455, + "grad_norm": 0.05459647253155708, + "learning_rate": 7.92116605954958e-06, + "loss": 0.0024, + "step": 115770 + }, + { + "epoch": 0.7425769562554316, + "grad_norm": 0.01840919628739357, + "learning_rate": 7.920711793922386e-06, + "loss": 0.0039, + "step": 115780 + }, + { + "epoch": 0.7426410931492177, + "grad_norm": 0.15585069358348846, + "learning_rate": 7.920257491696583e-06, + "loss": 0.0032, + "step": 115790 + }, + { + "epoch": 0.7427052300430038, + "grad_norm": 0.22318945825099945, + "learning_rate": 7.919803152877868e-06, + "loss": 0.0021, + "step": 115800 + }, + { + "epoch": 0.7427693669367899, + "grad_norm": 0.2111048698425293, + "learning_rate": 7.919348777471932e-06, + "loss": 0.0024, + "step": 115810 + }, + { + "epoch": 0.742833503830576, + "grad_norm": 0.04281293600797653, + "learning_rate": 7.91889436548447e-06, + "loss": 0.0019, + "step": 115820 + }, + { + "epoch": 0.742897640724362, + "grad_norm": 0.09011639654636383, + "learning_rate": 7.918439916921174e-06, + "loss": 0.0021, + "step": 115830 + }, + { + "epoch": 0.7429617776181482, + "grad_norm": 0.6694950461387634, + "learning_rate": 7.91798543178774e-06, + "loss": 0.0028, + "step": 115840 + }, + { + "epoch": 0.7430259145119342, + "grad_norm": 0.08501161634922028, + "learning_rate": 7.917530910089863e-06, + "loss": 0.0021, + "step": 115850 + }, + { + "epoch": 0.7430900514057204, + "grad_norm": 0.11211014539003372, + "learning_rate": 7.917076351833241e-06, + "loss": 0.0029, + "step": 115860 + }, + { + "epoch": 0.7431541882995064, + "grad_norm": 0.3071892559528351, + "learning_rate": 7.916621757023566e-06, + "loss": 0.0041, + "step": 115870 + }, + { + "epoch": 0.7432183251932926, + "grad_norm": 0.08402853459119797, + "learning_rate": 7.916167125666535e-06, + "loss": 0.0031, + "step": 115880 + }, + { + "epoch": 0.7432824620870787, + "grad_norm": 0.1922350823879242, + "learning_rate": 7.915712457767847e-06, + "loss": 0.0049, + "step": 115890 + }, + { + "epoch": 0.7433465989808647, + "grad_norm": 0.08401606231927872, + "learning_rate": 7.915257753333198e-06, + "loss": 0.0018, + "step": 115900 + }, + { + "epoch": 0.7434107358746509, + "grad_norm": 0.06123369559645653, + "learning_rate": 7.914803012368284e-06, + "loss": 0.0015, + "step": 115910 + }, + { + "epoch": 0.7434748727684369, + "grad_norm": 0.1281592696905136, + "learning_rate": 7.914348234878809e-06, + "loss": 0.0022, + "step": 115920 + }, + { + "epoch": 0.7435390096622231, + "grad_norm": 0.47530466318130493, + "learning_rate": 7.913893420870464e-06, + "loss": 0.0022, + "step": 115930 + }, + { + "epoch": 0.7436031465560091, + "grad_norm": 0.10956531018018723, + "learning_rate": 7.913438570348954e-06, + "loss": 0.003, + "step": 115940 + }, + { + "epoch": 0.7436672834497953, + "grad_norm": 0.11079762876033783, + "learning_rate": 7.912983683319977e-06, + "loss": 0.0037, + "step": 115950 + }, + { + "epoch": 0.7437314203435813, + "grad_norm": 0.12005963921546936, + "learning_rate": 7.91252875978923e-06, + "loss": 0.0047, + "step": 115960 + }, + { + "epoch": 0.7437955572373675, + "grad_norm": 0.14262211322784424, + "learning_rate": 7.912073799762418e-06, + "loss": 0.0043, + "step": 115970 + }, + { + "epoch": 0.7438596941311535, + "grad_norm": 0.19480326771736145, + "learning_rate": 7.91161880324524e-06, + "loss": 0.0014, + "step": 115980 + }, + { + "epoch": 0.7439238310249396, + "grad_norm": 0.24568434059619904, + "learning_rate": 7.911163770243397e-06, + "loss": 0.0053, + "step": 115990 + }, + { + "epoch": 0.7439879679187257, + "grad_norm": 0.0683426558971405, + "learning_rate": 7.910708700762592e-06, + "loss": 0.0016, + "step": 116000 + }, + { + "epoch": 0.7440521048125118, + "grad_norm": 0.20744992792606354, + "learning_rate": 7.910253594808525e-06, + "loss": 0.0029, + "step": 116010 + }, + { + "epoch": 0.744116241706298, + "grad_norm": 0.17102357745170593, + "learning_rate": 7.909798452386903e-06, + "loss": 0.0048, + "step": 116020 + }, + { + "epoch": 0.744180378600084, + "grad_norm": 0.1635199785232544, + "learning_rate": 7.909343273503425e-06, + "loss": 0.0028, + "step": 116030 + }, + { + "epoch": 0.7442445154938702, + "grad_norm": 0.05233636498451233, + "learning_rate": 7.908888058163798e-06, + "loss": 0.0025, + "step": 116040 + }, + { + "epoch": 0.7443086523876562, + "grad_norm": 0.09148632735013962, + "learning_rate": 7.908432806373722e-06, + "loss": 0.0017, + "step": 116050 + }, + { + "epoch": 0.7443727892814423, + "grad_norm": 0.10150976479053497, + "learning_rate": 7.907977518138907e-06, + "loss": 0.0046, + "step": 116060 + }, + { + "epoch": 0.7444369261752284, + "grad_norm": 0.14146283268928528, + "learning_rate": 7.907522193465053e-06, + "loss": 0.0027, + "step": 116070 + }, + { + "epoch": 0.7445010630690145, + "grad_norm": 0.033894527703523636, + "learning_rate": 7.90706683235787e-06, + "loss": 0.003, + "step": 116080 + }, + { + "epoch": 0.7445651999628006, + "grad_norm": 0.11578021943569183, + "learning_rate": 7.906611434823062e-06, + "loss": 0.002, + "step": 116090 + }, + { + "epoch": 0.7446293368565867, + "grad_norm": 0.13569103181362152, + "learning_rate": 7.906156000866334e-06, + "loss": 0.0047, + "step": 116100 + }, + { + "epoch": 0.7446934737503728, + "grad_norm": 0.10451752692461014, + "learning_rate": 7.905700530493395e-06, + "loss": 0.0034, + "step": 116110 + }, + { + "epoch": 0.7447576106441589, + "grad_norm": 0.13221339881420135, + "learning_rate": 7.905245023709953e-06, + "loss": 0.0032, + "step": 116120 + }, + { + "epoch": 0.744821747537945, + "grad_norm": 0.17002162337303162, + "learning_rate": 7.904789480521712e-06, + "loss": 0.0021, + "step": 116130 + }, + { + "epoch": 0.7448858844317311, + "grad_norm": 0.09643540531396866, + "learning_rate": 7.904333900934384e-06, + "loss": 0.0019, + "step": 116140 + }, + { + "epoch": 0.7449500213255171, + "grad_norm": 0.18107008934020996, + "learning_rate": 7.903878284953676e-06, + "loss": 0.003, + "step": 116150 + }, + { + "epoch": 0.7450141582193033, + "grad_norm": 0.10610051453113556, + "learning_rate": 7.903422632585301e-06, + "loss": 0.0023, + "step": 116160 + }, + { + "epoch": 0.7450782951130894, + "grad_norm": 0.08352890610694885, + "learning_rate": 7.902966943834961e-06, + "loss": 0.0025, + "step": 116170 + }, + { + "epoch": 0.7451424320068755, + "grad_norm": 0.13513338565826416, + "learning_rate": 7.902511218708374e-06, + "loss": 0.0027, + "step": 116180 + }, + { + "epoch": 0.7452065689006616, + "grad_norm": 0.13289561867713928, + "learning_rate": 7.902055457211243e-06, + "loss": 0.0039, + "step": 116190 + }, + { + "epoch": 0.7452707057944477, + "grad_norm": 0.007371044717729092, + "learning_rate": 7.901599659349285e-06, + "loss": 0.0028, + "step": 116200 + }, + { + "epoch": 0.7453348426882338, + "grad_norm": 0.10732848942279816, + "learning_rate": 7.90114382512821e-06, + "loss": 0.0021, + "step": 116210 + }, + { + "epoch": 0.7453989795820198, + "grad_norm": 0.03691869601607323, + "learning_rate": 7.900687954553729e-06, + "loss": 0.0048, + "step": 116220 + }, + { + "epoch": 0.745463116475806, + "grad_norm": 0.10616063326597214, + "learning_rate": 7.900232047631555e-06, + "loss": 0.0013, + "step": 116230 + }, + { + "epoch": 0.745527253369592, + "grad_norm": 0.14096885919570923, + "learning_rate": 7.8997761043674e-06, + "loss": 0.002, + "step": 116240 + }, + { + "epoch": 0.7455913902633782, + "grad_norm": 0.10583070665597916, + "learning_rate": 7.899320124766978e-06, + "loss": 0.0025, + "step": 116250 + }, + { + "epoch": 0.7456555271571642, + "grad_norm": 0.07014064490795135, + "learning_rate": 7.898864108836003e-06, + "loss": 0.0025, + "step": 116260 + }, + { + "epoch": 0.7457196640509504, + "grad_norm": 0.054637208580970764, + "learning_rate": 7.89840805658019e-06, + "loss": 0.001, + "step": 116270 + }, + { + "epoch": 0.7457838009447364, + "grad_norm": 0.00924642663449049, + "learning_rate": 7.897951968005253e-06, + "loss": 0.0016, + "step": 116280 + }, + { + "epoch": 0.7458479378385225, + "grad_norm": 0.19948308169841766, + "learning_rate": 7.897495843116905e-06, + "loss": 0.0035, + "step": 116290 + }, + { + "epoch": 0.7459120747323087, + "grad_norm": 0.09530455619096756, + "learning_rate": 7.897039681920863e-06, + "loss": 0.0016, + "step": 116300 + }, + { + "epoch": 0.7459762116260947, + "grad_norm": 0.09297530353069305, + "learning_rate": 7.896583484422845e-06, + "loss": 0.0019, + "step": 116310 + }, + { + "epoch": 0.7460403485198809, + "grad_norm": 0.22910071909427643, + "learning_rate": 7.896127250628565e-06, + "loss": 0.0034, + "step": 116320 + }, + { + "epoch": 0.7461044854136669, + "grad_norm": 0.14804109930992126, + "learning_rate": 7.895670980543742e-06, + "loss": 0.0018, + "step": 116330 + }, + { + "epoch": 0.7461686223074531, + "grad_norm": 0.07542126625776291, + "learning_rate": 7.89521467417409e-06, + "loss": 0.0026, + "step": 116340 + }, + { + "epoch": 0.7462327592012391, + "grad_norm": 0.22744214534759521, + "learning_rate": 7.894758331525329e-06, + "loss": 0.004, + "step": 116350 + }, + { + "epoch": 0.7462968960950253, + "grad_norm": 0.11813952028751373, + "learning_rate": 7.894301952603178e-06, + "loss": 0.0021, + "step": 116360 + }, + { + "epoch": 0.7463610329888113, + "grad_norm": 0.0634949803352356, + "learning_rate": 7.893845537413356e-06, + "loss": 0.0025, + "step": 116370 + }, + { + "epoch": 0.7464251698825974, + "grad_norm": 0.08142178505659103, + "learning_rate": 7.89338908596158e-06, + "loss": 0.003, + "step": 116380 + }, + { + "epoch": 0.7464893067763835, + "grad_norm": 0.08885622769594193, + "learning_rate": 7.892932598253571e-06, + "loss": 0.0029, + "step": 116390 + }, + { + "epoch": 0.7465534436701696, + "grad_norm": 0.04796062782406807, + "learning_rate": 7.89247607429505e-06, + "loss": 0.0099, + "step": 116400 + }, + { + "epoch": 0.7466175805639557, + "grad_norm": 0.1358189582824707, + "learning_rate": 7.892019514091735e-06, + "loss": 0.0029, + "step": 116410 + }, + { + "epoch": 0.7466817174577418, + "grad_norm": 0.16324840486049652, + "learning_rate": 7.891562917649349e-06, + "loss": 0.0032, + "step": 116420 + }, + { + "epoch": 0.7467458543515278, + "grad_norm": 0.3833000659942627, + "learning_rate": 7.891106284973613e-06, + "loss": 0.0039, + "step": 116430 + }, + { + "epoch": 0.746809991245314, + "grad_norm": 0.10605830699205399, + "learning_rate": 7.89064961607025e-06, + "loss": 0.0042, + "step": 116440 + }, + { + "epoch": 0.7468741281391001, + "grad_norm": 0.07625432312488556, + "learning_rate": 7.89019291094498e-06, + "loss": 0.0022, + "step": 116450 + }, + { + "epoch": 0.7469382650328862, + "grad_norm": 0.10014986991882324, + "learning_rate": 7.889736169603528e-06, + "loss": 0.0024, + "step": 116460 + }, + { + "epoch": 0.7470024019266723, + "grad_norm": 0.09487932175397873, + "learning_rate": 7.889279392051617e-06, + "loss": 0.0034, + "step": 116470 + }, + { + "epoch": 0.7470665388204584, + "grad_norm": 0.18687134981155396, + "learning_rate": 7.88882257829497e-06, + "loss": 0.0035, + "step": 116480 + }, + { + "epoch": 0.7471306757142445, + "grad_norm": 0.2099267542362213, + "learning_rate": 7.888365728339311e-06, + "loss": 0.0052, + "step": 116490 + }, + { + "epoch": 0.7471948126080306, + "grad_norm": 0.024278149008750916, + "learning_rate": 7.887908842190366e-06, + "loss": 0.0035, + "step": 116500 + }, + { + "epoch": 0.7472589495018167, + "grad_norm": 0.07662907987833023, + "learning_rate": 7.887451919853858e-06, + "loss": 0.0034, + "step": 116510 + }, + { + "epoch": 0.7473230863956027, + "grad_norm": 0.009092072956264019, + "learning_rate": 7.886994961335515e-06, + "loss": 0.0022, + "step": 116520 + }, + { + "epoch": 0.7473872232893889, + "grad_norm": 0.050177864730358124, + "learning_rate": 7.886537966641061e-06, + "loss": 0.0034, + "step": 116530 + }, + { + "epoch": 0.7474513601831749, + "grad_norm": 0.025178076699376106, + "learning_rate": 7.886080935776224e-06, + "loss": 0.0031, + "step": 116540 + }, + { + "epoch": 0.7475154970769611, + "grad_norm": 0.09660907834768295, + "learning_rate": 7.885623868746729e-06, + "loss": 0.0038, + "step": 116550 + }, + { + "epoch": 0.7475796339707471, + "grad_norm": 0.008361510001122952, + "learning_rate": 7.885166765558306e-06, + "loss": 0.0019, + "step": 116560 + }, + { + "epoch": 0.7476437708645333, + "grad_norm": 0.1537180244922638, + "learning_rate": 7.88470962621668e-06, + "loss": 0.0027, + "step": 116570 + }, + { + "epoch": 0.7477079077583194, + "grad_norm": 0.6607270836830139, + "learning_rate": 7.884252450727582e-06, + "loss": 0.0027, + "step": 116580 + }, + { + "epoch": 0.7477720446521054, + "grad_norm": 0.17144745588302612, + "learning_rate": 7.883795239096739e-06, + "loss": 0.003, + "step": 116590 + }, + { + "epoch": 0.7478361815458916, + "grad_norm": 0.3937118351459503, + "learning_rate": 7.883337991329881e-06, + "loss": 0.0032, + "step": 116600 + }, + { + "epoch": 0.7479003184396776, + "grad_norm": 0.15119680762290955, + "learning_rate": 7.882880707432736e-06, + "loss": 0.0037, + "step": 116610 + }, + { + "epoch": 0.7479644553334638, + "grad_norm": 0.35869231820106506, + "learning_rate": 7.882423387411037e-06, + "loss": 0.0034, + "step": 116620 + }, + { + "epoch": 0.7480285922272498, + "grad_norm": 0.055572181940078735, + "learning_rate": 7.881966031270512e-06, + "loss": 0.0015, + "step": 116630 + }, + { + "epoch": 0.748092729121036, + "grad_norm": 0.1298820823431015, + "learning_rate": 7.881508639016893e-06, + "loss": 0.003, + "step": 116640 + }, + { + "epoch": 0.748156866014822, + "grad_norm": 0.13784027099609375, + "learning_rate": 7.88105121065591e-06, + "loss": 0.0015, + "step": 116650 + }, + { + "epoch": 0.7482210029086082, + "grad_norm": 0.25377780199050903, + "learning_rate": 7.880593746193298e-06, + "loss": 0.01, + "step": 116660 + }, + { + "epoch": 0.7482851398023942, + "grad_norm": 0.06298059225082397, + "learning_rate": 7.880136245634789e-06, + "loss": 0.0027, + "step": 116670 + }, + { + "epoch": 0.7483492766961803, + "grad_norm": 0.05065106227993965, + "learning_rate": 7.879678708986113e-06, + "loss": 0.0025, + "step": 116680 + }, + { + "epoch": 0.7484134135899664, + "grad_norm": 0.02653634175658226, + "learning_rate": 7.879221136253003e-06, + "loss": 0.0023, + "step": 116690 + }, + { + "epoch": 0.7484775504837525, + "grad_norm": 0.08247742801904678, + "learning_rate": 7.878763527441198e-06, + "loss": 0.0022, + "step": 116700 + }, + { + "epoch": 0.7485416873775386, + "grad_norm": 0.10867869108915329, + "learning_rate": 7.878305882556426e-06, + "loss": 0.0041, + "step": 116710 + }, + { + "epoch": 0.7486058242713247, + "grad_norm": 0.1672622412443161, + "learning_rate": 7.877848201604425e-06, + "loss": 0.0027, + "step": 116720 + }, + { + "epoch": 0.7486699611651109, + "grad_norm": 0.25147178769111633, + "learning_rate": 7.877390484590928e-06, + "loss": 0.0025, + "step": 116730 + }, + { + "epoch": 0.7487340980588969, + "grad_norm": 0.04914018139243126, + "learning_rate": 7.876932731521673e-06, + "loss": 0.0016, + "step": 116740 + }, + { + "epoch": 0.748798234952683, + "grad_norm": 0.07496075332164764, + "learning_rate": 7.876474942402395e-06, + "loss": 0.0018, + "step": 116750 + }, + { + "epoch": 0.7488623718464691, + "grad_norm": 0.19975468516349792, + "learning_rate": 7.87601711723883e-06, + "loss": 0.0038, + "step": 116760 + }, + { + "epoch": 0.7489265087402552, + "grad_norm": 0.05046171694993973, + "learning_rate": 7.875559256036714e-06, + "loss": 0.0034, + "step": 116770 + }, + { + "epoch": 0.7489906456340413, + "grad_norm": 0.07045719027519226, + "learning_rate": 7.875101358801787e-06, + "loss": 0.0051, + "step": 116780 + }, + { + "epoch": 0.7490547825278274, + "grad_norm": 0.15396460890769958, + "learning_rate": 7.874643425539785e-06, + "loss": 0.0018, + "step": 116790 + }, + { + "epoch": 0.7491189194216135, + "grad_norm": 0.23495283722877502, + "learning_rate": 7.874185456256444e-06, + "loss": 0.0038, + "step": 116800 + }, + { + "epoch": 0.7491830563153996, + "grad_norm": 0.1421741098165512, + "learning_rate": 7.873727450957506e-06, + "loss": 0.0031, + "step": 116810 + }, + { + "epoch": 0.7492471932091856, + "grad_norm": 0.06015840545296669, + "learning_rate": 7.873269409648711e-06, + "loss": 0.0035, + "step": 116820 + }, + { + "epoch": 0.7493113301029718, + "grad_norm": 0.20923273265361786, + "learning_rate": 7.872811332335796e-06, + "loss": 0.0027, + "step": 116830 + }, + { + "epoch": 0.7493754669967578, + "grad_norm": 0.06418105959892273, + "learning_rate": 7.872353219024503e-06, + "loss": 0.0024, + "step": 116840 + }, + { + "epoch": 0.749439603890544, + "grad_norm": 0.06075701117515564, + "learning_rate": 7.871895069720569e-06, + "loss": 0.0018, + "step": 116850 + }, + { + "epoch": 0.74950374078433, + "grad_norm": 0.060914766043424606, + "learning_rate": 7.871436884429739e-06, + "loss": 0.002, + "step": 116860 + }, + { + "epoch": 0.7495678776781162, + "grad_norm": 0.003072801511734724, + "learning_rate": 7.87097866315775e-06, + "loss": 0.0042, + "step": 116870 + }, + { + "epoch": 0.7496320145719023, + "grad_norm": 0.11973962932825089, + "learning_rate": 7.87052040591035e-06, + "loss": 0.0013, + "step": 116880 + }, + { + "epoch": 0.7496961514656884, + "grad_norm": 0.0684128925204277, + "learning_rate": 7.870062112693277e-06, + "loss": 0.0042, + "step": 116890 + }, + { + "epoch": 0.7497602883594745, + "grad_norm": 0.25988301634788513, + "learning_rate": 7.869603783512273e-06, + "loss": 0.0029, + "step": 116900 + }, + { + "epoch": 0.7498244252532605, + "grad_norm": 0.14829394221305847, + "learning_rate": 7.869145418373083e-06, + "loss": 0.0022, + "step": 116910 + }, + { + "epoch": 0.7498885621470467, + "grad_norm": 0.09994681924581528, + "learning_rate": 7.868687017281452e-06, + "loss": 0.0031, + "step": 116920 + }, + { + "epoch": 0.7499526990408327, + "grad_norm": 0.09394358843564987, + "learning_rate": 7.86822858024312e-06, + "loss": 0.0042, + "step": 116930 + }, + { + "epoch": 0.7500168359346189, + "grad_norm": 0.11060336232185364, + "learning_rate": 7.867770107263837e-06, + "loss": 0.0042, + "step": 116940 + }, + { + "epoch": 0.7500809728284049, + "grad_norm": 0.25255483388900757, + "learning_rate": 7.867311598349343e-06, + "loss": 0.0029, + "step": 116950 + }, + { + "epoch": 0.7501451097221911, + "grad_norm": 0.13214614987373352, + "learning_rate": 7.866853053505386e-06, + "loss": 0.0021, + "step": 116960 + }, + { + "epoch": 0.7502092466159771, + "grad_norm": 0.1233336552977562, + "learning_rate": 7.86639447273771e-06, + "loss": 0.0019, + "step": 116970 + }, + { + "epoch": 0.7502733835097632, + "grad_norm": 0.334183931350708, + "learning_rate": 7.865935856052064e-06, + "loss": 0.0038, + "step": 116980 + }, + { + "epoch": 0.7503375204035493, + "grad_norm": 0.0827973261475563, + "learning_rate": 7.865477203454193e-06, + "loss": 0.0025, + "step": 116990 + }, + { + "epoch": 0.7504016572973354, + "grad_norm": 0.004071071743965149, + "learning_rate": 7.865018514949844e-06, + "loss": 0.0028, + "step": 117000 + }, + { + "epoch": 0.7504657941911216, + "grad_norm": 0.11148324608802795, + "learning_rate": 7.864559790544768e-06, + "loss": 0.002, + "step": 117010 + }, + { + "epoch": 0.7505299310849076, + "grad_norm": 0.16427700221538544, + "learning_rate": 7.864101030244708e-06, + "loss": 0.0028, + "step": 117020 + }, + { + "epoch": 0.7505940679786938, + "grad_norm": 0.04392922297120094, + "learning_rate": 7.863642234055416e-06, + "loss": 0.0009, + "step": 117030 + }, + { + "epoch": 0.7506582048724798, + "grad_norm": 0.08696134388446808, + "learning_rate": 7.86318340198264e-06, + "loss": 0.0027, + "step": 117040 + }, + { + "epoch": 0.750722341766266, + "grad_norm": 0.07006500661373138, + "learning_rate": 7.862724534032131e-06, + "loss": 0.0024, + "step": 117050 + }, + { + "epoch": 0.750786478660052, + "grad_norm": 0.10490524768829346, + "learning_rate": 7.862265630209635e-06, + "loss": 0.0044, + "step": 117060 + }, + { + "epoch": 0.7508506155538381, + "grad_norm": 0.0829753503203392, + "learning_rate": 7.861806690520908e-06, + "loss": 0.0021, + "step": 117070 + }, + { + "epoch": 0.7509147524476242, + "grad_norm": 0.2763497829437256, + "learning_rate": 7.861347714971696e-06, + "loss": 0.002, + "step": 117080 + }, + { + "epoch": 0.7509788893414103, + "grad_norm": 0.1821936070919037, + "learning_rate": 7.860888703567753e-06, + "loss": 0.0023, + "step": 117090 + }, + { + "epoch": 0.7510430262351964, + "grad_norm": 0.1808260977268219, + "learning_rate": 7.860429656314827e-06, + "loss": 0.004, + "step": 117100 + }, + { + "epoch": 0.7511071631289825, + "grad_norm": 0.08387022465467453, + "learning_rate": 7.859970573218675e-06, + "loss": 0.0029, + "step": 117110 + }, + { + "epoch": 0.7511713000227686, + "grad_norm": 0.10462870448827744, + "learning_rate": 7.85951145428505e-06, + "loss": 0.0034, + "step": 117120 + }, + { + "epoch": 0.7512354369165547, + "grad_norm": 0.06143558397889137, + "learning_rate": 7.859052299519701e-06, + "loss": 0.0022, + "step": 117130 + }, + { + "epoch": 0.7512995738103407, + "grad_norm": 0.09308692067861557, + "learning_rate": 7.858593108928383e-06, + "loss": 0.0024, + "step": 117140 + }, + { + "epoch": 0.7513637107041269, + "grad_norm": 0.038160715252161026, + "learning_rate": 7.858133882516852e-06, + "loss": 0.0021, + "step": 117150 + }, + { + "epoch": 0.751427847597913, + "grad_norm": 0.13036786019802094, + "learning_rate": 7.85767462029086e-06, + "loss": 0.0016, + "step": 117160 + }, + { + "epoch": 0.7514919844916991, + "grad_norm": 0.2630171775817871, + "learning_rate": 7.857215322256162e-06, + "loss": 0.0038, + "step": 117170 + }, + { + "epoch": 0.7515561213854852, + "grad_norm": 0.04008711874485016, + "learning_rate": 7.856755988418514e-06, + "loss": 0.0049, + "step": 117180 + }, + { + "epoch": 0.7516202582792713, + "grad_norm": 0.13282530009746552, + "learning_rate": 7.856296618783672e-06, + "loss": 0.0036, + "step": 117190 + }, + { + "epoch": 0.7516843951730574, + "grad_norm": 0.13415652513504028, + "learning_rate": 7.85583721335739e-06, + "loss": 0.003, + "step": 117200 + }, + { + "epoch": 0.7517485320668434, + "grad_norm": 0.04852619394659996, + "learning_rate": 7.855377772145431e-06, + "loss": 0.0023, + "step": 117210 + }, + { + "epoch": 0.7518126689606296, + "grad_norm": 0.10932836681604385, + "learning_rate": 7.854918295153546e-06, + "loss": 0.0051, + "step": 117220 + }, + { + "epoch": 0.7518768058544156, + "grad_norm": 0.0693749263882637, + "learning_rate": 7.854458782387494e-06, + "loss": 0.0028, + "step": 117230 + }, + { + "epoch": 0.7519409427482018, + "grad_norm": 0.15589742362499237, + "learning_rate": 7.853999233853032e-06, + "loss": 0.0016, + "step": 117240 + }, + { + "epoch": 0.7520050796419878, + "grad_norm": 0.2942682206630707, + "learning_rate": 7.853539649555922e-06, + "loss": 0.0041, + "step": 117250 + }, + { + "epoch": 0.752069216535774, + "grad_norm": 0.10200204700231552, + "learning_rate": 7.853080029501918e-06, + "loss": 0.0042, + "step": 117260 + }, + { + "epoch": 0.75213335342956, + "grad_norm": 0.12070576101541519, + "learning_rate": 7.852620373696786e-06, + "loss": 0.0034, + "step": 117270 + }, + { + "epoch": 0.7521974903233462, + "grad_norm": 0.10774677991867065, + "learning_rate": 7.85216068214628e-06, + "loss": 0.0023, + "step": 117280 + }, + { + "epoch": 0.7522616272171323, + "grad_norm": 0.15308523178100586, + "learning_rate": 7.851700954856162e-06, + "loss": 0.0033, + "step": 117290 + }, + { + "epoch": 0.7523257641109183, + "grad_norm": 0.2831006646156311, + "learning_rate": 7.851241191832192e-06, + "loss": 0.0035, + "step": 117300 + }, + { + "epoch": 0.7523899010047045, + "grad_norm": 0.06799483299255371, + "learning_rate": 7.850781393080134e-06, + "loss": 0.0011, + "step": 117310 + }, + { + "epoch": 0.7524540378984905, + "grad_norm": 0.16940274834632874, + "learning_rate": 7.850321558605748e-06, + "loss": 0.0024, + "step": 117320 + }, + { + "epoch": 0.7525181747922767, + "grad_norm": 0.0938853770494461, + "learning_rate": 7.849861688414795e-06, + "loss": 0.0027, + "step": 117330 + }, + { + "epoch": 0.7525823116860627, + "grad_norm": 0.18646354973316193, + "learning_rate": 7.849401782513037e-06, + "loss": 0.0056, + "step": 117340 + }, + { + "epoch": 0.7526464485798489, + "grad_norm": 0.16712817549705505, + "learning_rate": 7.84894184090624e-06, + "loss": 0.0034, + "step": 117350 + }, + { + "epoch": 0.7527105854736349, + "grad_norm": 0.2718506157398224, + "learning_rate": 7.848481863600165e-06, + "loss": 0.0035, + "step": 117360 + }, + { + "epoch": 0.752774722367421, + "grad_norm": 0.16131126880645752, + "learning_rate": 7.848021850600578e-06, + "loss": 0.0028, + "step": 117370 + }, + { + "epoch": 0.7528388592612071, + "grad_norm": 0.10022114962339401, + "learning_rate": 7.84756180191324e-06, + "loss": 0.002, + "step": 117380 + }, + { + "epoch": 0.7529029961549932, + "grad_norm": 0.18879581987857819, + "learning_rate": 7.847101717543916e-06, + "loss": 0.0048, + "step": 117390 + }, + { + "epoch": 0.7529671330487793, + "grad_norm": 0.02280520647764206, + "learning_rate": 7.846641597498375e-06, + "loss": 0.0017, + "step": 117400 + }, + { + "epoch": 0.7530312699425654, + "grad_norm": 0.22137390077114105, + "learning_rate": 7.84618144178238e-06, + "loss": 0.0031, + "step": 117410 + }, + { + "epoch": 0.7530954068363515, + "grad_norm": 0.01127664279192686, + "learning_rate": 7.845721250401697e-06, + "loss": 0.0019, + "step": 117420 + }, + { + "epoch": 0.7531595437301376, + "grad_norm": 0.15001355111598969, + "learning_rate": 7.845261023362093e-06, + "loss": 0.0022, + "step": 117430 + }, + { + "epoch": 0.7532236806239238, + "grad_norm": 0.3733644485473633, + "learning_rate": 7.844800760669336e-06, + "loss": 0.0028, + "step": 117440 + }, + { + "epoch": 0.7532878175177098, + "grad_norm": 0.06113684922456741, + "learning_rate": 7.84434046232919e-06, + "loss": 0.0021, + "step": 117450 + }, + { + "epoch": 0.7533519544114959, + "grad_norm": 0.17942652106285095, + "learning_rate": 7.843880128347426e-06, + "loss": 0.0025, + "step": 117460 + }, + { + "epoch": 0.753416091305282, + "grad_norm": 0.14272238314151764, + "learning_rate": 7.843419758729814e-06, + "loss": 0.0022, + "step": 117470 + }, + { + "epoch": 0.7534802281990681, + "grad_norm": 0.05989169329404831, + "learning_rate": 7.842959353482116e-06, + "loss": 0.0014, + "step": 117480 + }, + { + "epoch": 0.7535443650928542, + "grad_norm": 0.029922956600785255, + "learning_rate": 7.842498912610109e-06, + "loss": 0.0031, + "step": 117490 + }, + { + "epoch": 0.7536085019866403, + "grad_norm": 0.16737212240695953, + "learning_rate": 7.842038436119558e-06, + "loss": 0.0034, + "step": 117500 + }, + { + "epoch": 0.7536726388804263, + "grad_norm": 0.28119874000549316, + "learning_rate": 7.841577924016233e-06, + "loss": 0.0034, + "step": 117510 + }, + { + "epoch": 0.7537367757742125, + "grad_norm": 0.2850058972835541, + "learning_rate": 7.841117376305906e-06, + "loss": 0.0018, + "step": 117520 + }, + { + "epoch": 0.7538009126679985, + "grad_norm": 0.08792038261890411, + "learning_rate": 7.84065679299435e-06, + "loss": 0.0036, + "step": 117530 + }, + { + "epoch": 0.7538650495617847, + "grad_norm": 0.032527461647987366, + "learning_rate": 7.840196174087333e-06, + "loss": 0.0038, + "step": 117540 + }, + { + "epoch": 0.7539291864555707, + "grad_norm": 0.08409000188112259, + "learning_rate": 7.839735519590628e-06, + "loss": 0.0036, + "step": 117550 + }, + { + "epoch": 0.7539933233493569, + "grad_norm": 0.0372193269431591, + "learning_rate": 7.839274829510008e-06, + "loss": 0.0024, + "step": 117560 + }, + { + "epoch": 0.754057460243143, + "grad_norm": 0.17293556034564972, + "learning_rate": 7.838814103851244e-06, + "loss": 0.003, + "step": 117570 + }, + { + "epoch": 0.7541215971369291, + "grad_norm": 0.09905791282653809, + "learning_rate": 7.83835334262011e-06, + "loss": 0.0022, + "step": 117580 + }, + { + "epoch": 0.7541857340307152, + "grad_norm": 0.2138233780860901, + "learning_rate": 7.837892545822381e-06, + "loss": 0.0024, + "step": 117590 + }, + { + "epoch": 0.7542498709245012, + "grad_norm": 0.35840851068496704, + "learning_rate": 7.837431713463831e-06, + "loss": 0.0023, + "step": 117600 + }, + { + "epoch": 0.7543140078182874, + "grad_norm": 0.18870952725410461, + "learning_rate": 7.836970845550232e-06, + "loss": 0.0028, + "step": 117610 + }, + { + "epoch": 0.7543781447120734, + "grad_norm": 0.010556691326200962, + "learning_rate": 7.836509942087362e-06, + "loss": 0.0041, + "step": 117620 + }, + { + "epoch": 0.7544422816058596, + "grad_norm": 0.029404442757368088, + "learning_rate": 7.836049003080994e-06, + "loss": 0.002, + "step": 117630 + }, + { + "epoch": 0.7545064184996456, + "grad_norm": 0.05506880208849907, + "learning_rate": 7.835588028536906e-06, + "loss": 0.0021, + "step": 117640 + }, + { + "epoch": 0.7545705553934318, + "grad_norm": 0.20457029342651367, + "learning_rate": 7.835127018460876e-06, + "loss": 0.0039, + "step": 117650 + }, + { + "epoch": 0.7546346922872178, + "grad_norm": 0.1909932941198349, + "learning_rate": 7.834665972858674e-06, + "loss": 0.0045, + "step": 117660 + }, + { + "epoch": 0.754698829181004, + "grad_norm": 0.045761752873659134, + "learning_rate": 7.834204891736083e-06, + "loss": 0.0023, + "step": 117670 + }, + { + "epoch": 0.75476296607479, + "grad_norm": 0.23617449402809143, + "learning_rate": 7.83374377509888e-06, + "loss": 0.0034, + "step": 117680 + }, + { + "epoch": 0.7548271029685761, + "grad_norm": 0.018489936366677284, + "learning_rate": 7.833282622952842e-06, + "loss": 0.0017, + "step": 117690 + }, + { + "epoch": 0.7548912398623622, + "grad_norm": 0.16313032805919647, + "learning_rate": 7.832821435303745e-06, + "loss": 0.0028, + "step": 117700 + }, + { + "epoch": 0.7549553767561483, + "grad_norm": 0.1480388343334198, + "learning_rate": 7.832360212157374e-06, + "loss": 0.0028, + "step": 117710 + }, + { + "epoch": 0.7550195136499345, + "grad_norm": 0.00466338824480772, + "learning_rate": 7.831898953519505e-06, + "loss": 0.0023, + "step": 117720 + }, + { + "epoch": 0.7550836505437205, + "grad_norm": 0.14430424571037292, + "learning_rate": 7.831437659395917e-06, + "loss": 0.0021, + "step": 117730 + }, + { + "epoch": 0.7551477874375067, + "grad_norm": 0.24340131878852844, + "learning_rate": 7.830976329792393e-06, + "loss": 0.002, + "step": 117740 + }, + { + "epoch": 0.7552119243312927, + "grad_norm": 0.025799771770834923, + "learning_rate": 7.83051496471471e-06, + "loss": 0.0023, + "step": 117750 + }, + { + "epoch": 0.7552760612250788, + "grad_norm": 0.12621502578258514, + "learning_rate": 7.830053564168654e-06, + "loss": 0.0026, + "step": 117760 + }, + { + "epoch": 0.7553401981188649, + "grad_norm": 0.20064160227775574, + "learning_rate": 7.829592128160003e-06, + "loss": 0.0029, + "step": 117770 + }, + { + "epoch": 0.755404335012651, + "grad_norm": 0.1149071529507637, + "learning_rate": 7.82913065669454e-06, + "loss": 0.0014, + "step": 117780 + }, + { + "epoch": 0.7554684719064371, + "grad_norm": 0.23584522306919098, + "learning_rate": 7.828669149778048e-06, + "loss": 0.0032, + "step": 117790 + }, + { + "epoch": 0.7555326088002232, + "grad_norm": 0.020709898322820663, + "learning_rate": 7.828207607416312e-06, + "loss": 0.0029, + "step": 117800 + }, + { + "epoch": 0.7555967456940093, + "grad_norm": 0.08159718662500381, + "learning_rate": 7.827746029615112e-06, + "loss": 0.0033, + "step": 117810 + }, + { + "epoch": 0.7556608825877954, + "grad_norm": 0.05778425559401512, + "learning_rate": 7.827284416380231e-06, + "loss": 0.0017, + "step": 117820 + }, + { + "epoch": 0.7557250194815814, + "grad_norm": 0.08645426481962204, + "learning_rate": 7.82682276771746e-06, + "loss": 0.0018, + "step": 117830 + }, + { + "epoch": 0.7557891563753676, + "grad_norm": 0.1733463555574417, + "learning_rate": 7.826361083632576e-06, + "loss": 0.0026, + "step": 117840 + }, + { + "epoch": 0.7558532932691537, + "grad_norm": 0.12011563777923584, + "learning_rate": 7.825899364131368e-06, + "loss": 0.003, + "step": 117850 + }, + { + "epoch": 0.7559174301629398, + "grad_norm": 0.07007107138633728, + "learning_rate": 7.825437609219622e-06, + "loss": 0.0066, + "step": 117860 + }, + { + "epoch": 0.7559815670567259, + "grad_norm": 0.06176275387406349, + "learning_rate": 7.824975818903124e-06, + "loss": 0.0028, + "step": 117870 + }, + { + "epoch": 0.756045703950512, + "grad_norm": 0.07692543417215347, + "learning_rate": 7.82451399318766e-06, + "loss": 0.0015, + "step": 117880 + }, + { + "epoch": 0.7561098408442981, + "grad_norm": 0.33473703265190125, + "learning_rate": 7.824052132079017e-06, + "loss": 0.0037, + "step": 117890 + }, + { + "epoch": 0.7561739777380841, + "grad_norm": 0.10095279663801193, + "learning_rate": 7.823590235582982e-06, + "loss": 0.0028, + "step": 117900 + }, + { + "epoch": 0.7562381146318703, + "grad_norm": 0.10655076056718826, + "learning_rate": 7.823128303705343e-06, + "loss": 0.002, + "step": 117910 + }, + { + "epoch": 0.7563022515256563, + "grad_norm": 0.10879048705101013, + "learning_rate": 7.822666336451889e-06, + "loss": 0.0047, + "step": 117920 + }, + { + "epoch": 0.7563663884194425, + "grad_norm": 0.16622315347194672, + "learning_rate": 7.822204333828409e-06, + "loss": 0.0025, + "step": 117930 + }, + { + "epoch": 0.7564305253132285, + "grad_norm": 0.08693701028823853, + "learning_rate": 7.821742295840692e-06, + "loss": 0.0021, + "step": 117940 + }, + { + "epoch": 0.7564946622070147, + "grad_norm": 0.18205344676971436, + "learning_rate": 7.821280222494526e-06, + "loss": 0.0016, + "step": 117950 + }, + { + "epoch": 0.7565587991008007, + "grad_norm": 0.16920578479766846, + "learning_rate": 7.820818113795702e-06, + "loss": 0.0057, + "step": 117960 + }, + { + "epoch": 0.7566229359945869, + "grad_norm": 0.05380775406956673, + "learning_rate": 7.820355969750012e-06, + "loss": 0.0021, + "step": 117970 + }, + { + "epoch": 0.7566870728883729, + "grad_norm": 0.13284482061862946, + "learning_rate": 7.819893790363248e-06, + "loss": 0.0019, + "step": 117980 + }, + { + "epoch": 0.756751209782159, + "grad_norm": 0.3372058570384979, + "learning_rate": 7.819431575641197e-06, + "loss": 0.0044, + "step": 117990 + }, + { + "epoch": 0.7568153466759452, + "grad_norm": 0.08662164211273193, + "learning_rate": 7.818969325589654e-06, + "loss": 0.0028, + "step": 118000 + }, + { + "epoch": 0.7568794835697312, + "grad_norm": 0.026164045557379723, + "learning_rate": 7.818507040214411e-06, + "loss": 0.004, + "step": 118010 + }, + { + "epoch": 0.7569436204635174, + "grad_norm": 0.14952978491783142, + "learning_rate": 7.81804471952126e-06, + "loss": 0.0022, + "step": 118020 + }, + { + "epoch": 0.7570077573573034, + "grad_norm": 0.126239612698555, + "learning_rate": 7.817582363515994e-06, + "loss": 0.0045, + "step": 118030 + }, + { + "epoch": 0.7570718942510896, + "grad_norm": 0.13074319064617157, + "learning_rate": 7.817119972204409e-06, + "loss": 0.0032, + "step": 118040 + }, + { + "epoch": 0.7571360311448756, + "grad_norm": 0.09498288482427597, + "learning_rate": 7.816657545592297e-06, + "loss": 0.0024, + "step": 118050 + }, + { + "epoch": 0.7572001680386617, + "grad_norm": 0.09087800979614258, + "learning_rate": 7.816195083685452e-06, + "loss": 0.0034, + "step": 118060 + }, + { + "epoch": 0.7572643049324478, + "grad_norm": 0.04143861308693886, + "learning_rate": 7.815732586489671e-06, + "loss": 0.0051, + "step": 118070 + }, + { + "epoch": 0.7573284418262339, + "grad_norm": 0.10549326241016388, + "learning_rate": 7.815270054010747e-06, + "loss": 0.0047, + "step": 118080 + }, + { + "epoch": 0.75739257872002, + "grad_norm": 0.07025926560163498, + "learning_rate": 7.814807486254477e-06, + "loss": 0.0029, + "step": 118090 + }, + { + "epoch": 0.7574567156138061, + "grad_norm": 0.07964323461055756, + "learning_rate": 7.81434488322666e-06, + "loss": 0.0025, + "step": 118100 + }, + { + "epoch": 0.7575208525075922, + "grad_norm": 0.06105535849928856, + "learning_rate": 7.813882244933086e-06, + "loss": 0.0023, + "step": 118110 + }, + { + "epoch": 0.7575849894013783, + "grad_norm": 0.0810265839099884, + "learning_rate": 7.813419571379558e-06, + "loss": 0.0015, + "step": 118120 + }, + { + "epoch": 0.7576491262951643, + "grad_norm": 0.04925093799829483, + "learning_rate": 7.812956862571874e-06, + "loss": 0.0021, + "step": 118130 + }, + { + "epoch": 0.7577132631889505, + "grad_norm": 0.02250676043331623, + "learning_rate": 7.81249411851583e-06, + "loss": 0.0015, + "step": 118140 + }, + { + "epoch": 0.7577774000827366, + "grad_norm": 0.08988325297832489, + "learning_rate": 7.812031339217223e-06, + "loss": 0.0014, + "step": 118150 + }, + { + "epoch": 0.7578415369765227, + "grad_norm": 0.02714879997074604, + "learning_rate": 7.811568524681854e-06, + "loss": 0.0018, + "step": 118160 + }, + { + "epoch": 0.7579056738703088, + "grad_norm": 0.20410077273845673, + "learning_rate": 7.811105674915523e-06, + "loss": 0.0056, + "step": 118170 + }, + { + "epoch": 0.7579698107640949, + "grad_norm": 0.07401904463768005, + "learning_rate": 7.810642789924027e-06, + "loss": 0.0018, + "step": 118180 + }, + { + "epoch": 0.758033947657881, + "grad_norm": 0.1179809644818306, + "learning_rate": 7.810179869713169e-06, + "loss": 0.0026, + "step": 118190 + }, + { + "epoch": 0.758098084551667, + "grad_norm": 0.2100778967142105, + "learning_rate": 7.80971691428875e-06, + "loss": 0.0032, + "step": 118200 + }, + { + "epoch": 0.7581622214454532, + "grad_norm": 0.3232751190662384, + "learning_rate": 7.809253923656567e-06, + "loss": 0.0024, + "step": 118210 + }, + { + "epoch": 0.7582263583392392, + "grad_norm": 0.09631489962339401, + "learning_rate": 7.808790897822427e-06, + "loss": 0.0045, + "step": 118220 + }, + { + "epoch": 0.7582904952330254, + "grad_norm": 0.10500827431678772, + "learning_rate": 7.80832783679213e-06, + "loss": 0.0039, + "step": 118230 + }, + { + "epoch": 0.7583546321268114, + "grad_norm": 0.09026511758565903, + "learning_rate": 7.807864740571479e-06, + "loss": 0.0019, + "step": 118240 + }, + { + "epoch": 0.7584187690205976, + "grad_norm": 0.2540445625782013, + "learning_rate": 7.807401609166274e-06, + "loss": 0.0039, + "step": 118250 + }, + { + "epoch": 0.7584829059143836, + "grad_norm": 0.21617233753204346, + "learning_rate": 7.80693844258232e-06, + "loss": 0.0038, + "step": 118260 + }, + { + "epoch": 0.7585470428081698, + "grad_norm": 0.030382253229618073, + "learning_rate": 7.806475240825421e-06, + "loss": 0.0028, + "step": 118270 + }, + { + "epoch": 0.7586111797019559, + "grad_norm": 0.09669791907072067, + "learning_rate": 7.806012003901384e-06, + "loss": 0.0016, + "step": 118280 + }, + { + "epoch": 0.7586753165957419, + "grad_norm": 0.07175273448228836, + "learning_rate": 7.805548731816009e-06, + "loss": 0.0039, + "step": 118290 + }, + { + "epoch": 0.7587394534895281, + "grad_norm": 0.1716214120388031, + "learning_rate": 7.805085424575104e-06, + "loss": 0.0027, + "step": 118300 + }, + { + "epoch": 0.7588035903833141, + "grad_norm": 0.06944598257541656, + "learning_rate": 7.804622082184473e-06, + "loss": 0.0019, + "step": 118310 + }, + { + "epoch": 0.7588677272771003, + "grad_norm": 0.1075916513800621, + "learning_rate": 7.804158704649925e-06, + "loss": 0.0028, + "step": 118320 + }, + { + "epoch": 0.7589318641708863, + "grad_norm": 0.1181074008345604, + "learning_rate": 7.803695291977262e-06, + "loss": 0.0033, + "step": 118330 + }, + { + "epoch": 0.7589960010646725, + "grad_norm": 0.13426217436790466, + "learning_rate": 7.803231844172295e-06, + "loss": 0.0044, + "step": 118340 + }, + { + "epoch": 0.7590601379584585, + "grad_norm": 0.30457431077957153, + "learning_rate": 7.80276836124083e-06, + "loss": 0.004, + "step": 118350 + }, + { + "epoch": 0.7591242748522447, + "grad_norm": 0.3851284980773926, + "learning_rate": 7.802304843188672e-06, + "loss": 0.0013, + "step": 118360 + }, + { + "epoch": 0.7591884117460307, + "grad_norm": 0.15478985011577606, + "learning_rate": 7.801841290021632e-06, + "loss": 0.0041, + "step": 118370 + }, + { + "epoch": 0.7592525486398168, + "grad_norm": 0.1672656387090683, + "learning_rate": 7.801377701745518e-06, + "loss": 0.003, + "step": 118380 + }, + { + "epoch": 0.7593166855336029, + "grad_norm": 0.10904452949762344, + "learning_rate": 7.800914078366142e-06, + "loss": 0.0039, + "step": 118390 + }, + { + "epoch": 0.759380822427389, + "grad_norm": 0.12326830625534058, + "learning_rate": 7.800450419889308e-06, + "loss": 0.0031, + "step": 118400 + }, + { + "epoch": 0.7594449593211751, + "grad_norm": 0.04942172020673752, + "learning_rate": 7.79998672632083e-06, + "loss": 0.0023, + "step": 118410 + }, + { + "epoch": 0.7595090962149612, + "grad_norm": 0.17837822437286377, + "learning_rate": 7.799522997666517e-06, + "loss": 0.004, + "step": 118420 + }, + { + "epoch": 0.7595732331087474, + "grad_norm": 0.37312379479408264, + "learning_rate": 7.79905923393218e-06, + "loss": 0.0016, + "step": 118430 + }, + { + "epoch": 0.7596373700025334, + "grad_norm": 0.0728418305516243, + "learning_rate": 7.79859543512363e-06, + "loss": 0.0013, + "step": 118440 + }, + { + "epoch": 0.7597015068963195, + "grad_norm": 0.6216109991073608, + "learning_rate": 7.798131601246679e-06, + "loss": 0.0037, + "step": 118450 + }, + { + "epoch": 0.7597656437901056, + "grad_norm": 0.283000111579895, + "learning_rate": 7.79766773230714e-06, + "loss": 0.0054, + "step": 118460 + }, + { + "epoch": 0.7598297806838917, + "grad_norm": 0.10428867489099503, + "learning_rate": 7.797203828310824e-06, + "loss": 0.004, + "step": 118470 + }, + { + "epoch": 0.7598939175776778, + "grad_norm": 0.10073235630989075, + "learning_rate": 7.796739889263546e-06, + "loss": 0.002, + "step": 118480 + }, + { + "epoch": 0.7599580544714639, + "grad_norm": 0.31705033779144287, + "learning_rate": 7.796275915171119e-06, + "loss": 0.0051, + "step": 118490 + }, + { + "epoch": 0.76002219136525, + "grad_norm": 0.13120688498020172, + "learning_rate": 7.795811906039354e-06, + "loss": 0.0046, + "step": 118500 + }, + { + "epoch": 0.7600863282590361, + "grad_norm": 0.07548979669809341, + "learning_rate": 7.795347861874069e-06, + "loss": 0.0027, + "step": 118510 + }, + { + "epoch": 0.7601504651528221, + "grad_norm": 0.1204274445772171, + "learning_rate": 7.794883782681077e-06, + "loss": 0.0051, + "step": 118520 + }, + { + "epoch": 0.7602146020466083, + "grad_norm": 0.1520080715417862, + "learning_rate": 7.794419668466194e-06, + "loss": 0.0033, + "step": 118530 + }, + { + "epoch": 0.7602787389403943, + "grad_norm": 0.24985525012016296, + "learning_rate": 7.793955519235236e-06, + "loss": 0.0028, + "step": 118540 + }, + { + "epoch": 0.7603428758341805, + "grad_norm": 0.06198232248425484, + "learning_rate": 7.793491334994017e-06, + "loss": 0.0023, + "step": 118550 + }, + { + "epoch": 0.7604070127279666, + "grad_norm": 0.11053550988435745, + "learning_rate": 7.793027115748357e-06, + "loss": 0.0036, + "step": 118560 + }, + { + "epoch": 0.7604711496217527, + "grad_norm": 0.07890421152114868, + "learning_rate": 7.79256286150407e-06, + "loss": 0.0018, + "step": 118570 + }, + { + "epoch": 0.7605352865155388, + "grad_norm": 0.3829288184642792, + "learning_rate": 7.792098572266974e-06, + "loss": 0.0068, + "step": 118580 + }, + { + "epoch": 0.7605994234093248, + "grad_norm": 0.10420060157775879, + "learning_rate": 7.791634248042887e-06, + "loss": 0.0015, + "step": 118590 + }, + { + "epoch": 0.760663560303111, + "grad_norm": 0.036105263978242874, + "learning_rate": 7.79116988883763e-06, + "loss": 0.0023, + "step": 118600 + }, + { + "epoch": 0.760727697196897, + "grad_norm": 0.1294431984424591, + "learning_rate": 7.790705494657018e-06, + "loss": 0.0017, + "step": 118610 + }, + { + "epoch": 0.7607918340906832, + "grad_norm": 0.18665571510791779, + "learning_rate": 7.790241065506871e-06, + "loss": 0.0018, + "step": 118620 + }, + { + "epoch": 0.7608559709844692, + "grad_norm": 0.0662047490477562, + "learning_rate": 7.78977660139301e-06, + "loss": 0.0013, + "step": 118630 + }, + { + "epoch": 0.7609201078782554, + "grad_norm": 0.1589532196521759, + "learning_rate": 7.789312102321256e-06, + "loss": 0.0016, + "step": 118640 + }, + { + "epoch": 0.7609842447720414, + "grad_norm": 0.11786510795354843, + "learning_rate": 7.788847568297426e-06, + "loss": 0.0027, + "step": 118650 + }, + { + "epoch": 0.7610483816658276, + "grad_norm": 0.16013629734516144, + "learning_rate": 7.788382999327342e-06, + "loss": 0.0016, + "step": 118660 + }, + { + "epoch": 0.7611125185596136, + "grad_norm": 0.08604548871517181, + "learning_rate": 7.78791839541683e-06, + "loss": 0.0016, + "step": 118670 + }, + { + "epoch": 0.7611766554533997, + "grad_norm": 0.09176530689001083, + "learning_rate": 7.787453756571703e-06, + "loss": 0.0026, + "step": 118680 + }, + { + "epoch": 0.7612407923471858, + "grad_norm": 0.04124186933040619, + "learning_rate": 7.78698908279779e-06, + "loss": 0.0032, + "step": 118690 + }, + { + "epoch": 0.7613049292409719, + "grad_norm": 0.08384433388710022, + "learning_rate": 7.786524374100915e-06, + "loss": 0.0047, + "step": 118700 + }, + { + "epoch": 0.7613690661347581, + "grad_norm": 0.5120087265968323, + "learning_rate": 7.786059630486895e-06, + "loss": 0.0032, + "step": 118710 + }, + { + "epoch": 0.7614332030285441, + "grad_norm": 0.364688515663147, + "learning_rate": 7.785594851961558e-06, + "loss": 0.005, + "step": 118720 + }, + { + "epoch": 0.7614973399223303, + "grad_norm": 0.20993518829345703, + "learning_rate": 7.785130038530726e-06, + "loss": 0.0041, + "step": 118730 + }, + { + "epoch": 0.7615614768161163, + "grad_norm": 0.1628425121307373, + "learning_rate": 7.784665190200225e-06, + "loss": 0.0026, + "step": 118740 + }, + { + "epoch": 0.7616256137099024, + "grad_norm": 0.18203139305114746, + "learning_rate": 7.784200306975878e-06, + "loss": 0.0023, + "step": 118750 + }, + { + "epoch": 0.7616897506036885, + "grad_norm": 0.10203225165605545, + "learning_rate": 7.783735388863511e-06, + "loss": 0.0013, + "step": 118760 + }, + { + "epoch": 0.7617538874974746, + "grad_norm": 0.06325043737888336, + "learning_rate": 7.78327043586895e-06, + "loss": 0.0027, + "step": 118770 + }, + { + "epoch": 0.7618180243912607, + "grad_norm": 0.05929301679134369, + "learning_rate": 7.782805447998023e-06, + "loss": 0.002, + "step": 118780 + }, + { + "epoch": 0.7618821612850468, + "grad_norm": 0.15617991983890533, + "learning_rate": 7.782340425256553e-06, + "loss": 0.0042, + "step": 118790 + }, + { + "epoch": 0.7619462981788329, + "grad_norm": 0.054896991699934006, + "learning_rate": 7.78187536765037e-06, + "loss": 0.0028, + "step": 118800 + }, + { + "epoch": 0.762010435072619, + "grad_norm": 0.0946490690112114, + "learning_rate": 7.781410275185301e-06, + "loss": 0.0045, + "step": 118810 + }, + { + "epoch": 0.762074571966405, + "grad_norm": 0.12389269471168518, + "learning_rate": 7.780945147867172e-06, + "loss": 0.003, + "step": 118820 + }, + { + "epoch": 0.7621387088601912, + "grad_norm": 0.13792073726654053, + "learning_rate": 7.780479985701813e-06, + "loss": 0.0032, + "step": 118830 + }, + { + "epoch": 0.7622028457539773, + "grad_norm": 0.17356400191783905, + "learning_rate": 7.780014788695054e-06, + "loss": 0.0022, + "step": 118840 + }, + { + "epoch": 0.7622669826477634, + "grad_norm": 0.25488826632499695, + "learning_rate": 7.779549556852722e-06, + "loss": 0.0034, + "step": 118850 + }, + { + "epoch": 0.7623311195415495, + "grad_norm": 0.14415204524993896, + "learning_rate": 7.779084290180648e-06, + "loss": 0.0031, + "step": 118860 + }, + { + "epoch": 0.7623952564353356, + "grad_norm": 0.18693728744983673, + "learning_rate": 7.77861898868466e-06, + "loss": 0.0023, + "step": 118870 + }, + { + "epoch": 0.7624593933291217, + "grad_norm": 0.32552337646484375, + "learning_rate": 7.77815365237059e-06, + "loss": 0.003, + "step": 118880 + }, + { + "epoch": 0.7625235302229078, + "grad_norm": 0.21315798163414001, + "learning_rate": 7.777688281244272e-06, + "loss": 0.0029, + "step": 118890 + }, + { + "epoch": 0.7625876671166939, + "grad_norm": 0.3089481294155121, + "learning_rate": 7.777222875311533e-06, + "loss": 0.0021, + "step": 118900 + }, + { + "epoch": 0.7626518040104799, + "grad_norm": 0.07197723537683487, + "learning_rate": 7.776757434578206e-06, + "loss": 0.0023, + "step": 118910 + }, + { + "epoch": 0.7627159409042661, + "grad_norm": 0.10376438498497009, + "learning_rate": 7.776291959050125e-06, + "loss": 0.0018, + "step": 118920 + }, + { + "epoch": 0.7627800777980521, + "grad_norm": 0.16951358318328857, + "learning_rate": 7.775826448733121e-06, + "loss": 0.0024, + "step": 118930 + }, + { + "epoch": 0.7628442146918383, + "grad_norm": 0.20929601788520813, + "learning_rate": 7.775360903633026e-06, + "loss": 0.0049, + "step": 118940 + }, + { + "epoch": 0.7629083515856243, + "grad_norm": 0.09313658624887466, + "learning_rate": 7.774895323755678e-06, + "loss": 0.0032, + "step": 118950 + }, + { + "epoch": 0.7629724884794105, + "grad_norm": 0.2288755476474762, + "learning_rate": 7.774429709106907e-06, + "loss": 0.0049, + "step": 118960 + }, + { + "epoch": 0.7630366253731965, + "grad_norm": 0.13903319835662842, + "learning_rate": 7.77396405969255e-06, + "loss": 0.0028, + "step": 118970 + }, + { + "epoch": 0.7631007622669826, + "grad_norm": 0.056591082364320755, + "learning_rate": 7.77349837551844e-06, + "loss": 0.002, + "step": 118980 + }, + { + "epoch": 0.7631648991607688, + "grad_norm": 0.10128097981214523, + "learning_rate": 7.773032656590414e-06, + "loss": 0.0037, + "step": 118990 + }, + { + "epoch": 0.7632290360545548, + "grad_norm": 0.2593241333961487, + "learning_rate": 7.772566902914307e-06, + "loss": 0.0035, + "step": 119000 + }, + { + "epoch": 0.763293172948341, + "grad_norm": 0.40354588627815247, + "learning_rate": 7.772101114495953e-06, + "loss": 0.0041, + "step": 119010 + }, + { + "epoch": 0.763357309842127, + "grad_norm": 0.10311109572649002, + "learning_rate": 7.771635291341193e-06, + "loss": 0.003, + "step": 119020 + }, + { + "epoch": 0.7634214467359132, + "grad_norm": 0.09408441185951233, + "learning_rate": 7.771169433455861e-06, + "loss": 0.0018, + "step": 119030 + }, + { + "epoch": 0.7634855836296992, + "grad_norm": 0.11243224889039993, + "learning_rate": 7.770703540845797e-06, + "loss": 0.0024, + "step": 119040 + }, + { + "epoch": 0.7635497205234854, + "grad_norm": 0.07582049071788788, + "learning_rate": 7.770237613516836e-06, + "loss": 0.0019, + "step": 119050 + }, + { + "epoch": 0.7636138574172714, + "grad_norm": 0.17291785776615143, + "learning_rate": 7.769771651474817e-06, + "loss": 0.0021, + "step": 119060 + }, + { + "epoch": 0.7636779943110575, + "grad_norm": 0.04297932982444763, + "learning_rate": 7.769305654725583e-06, + "loss": 0.0018, + "step": 119070 + }, + { + "epoch": 0.7637421312048436, + "grad_norm": 0.18742135167121887, + "learning_rate": 7.768839623274967e-06, + "loss": 0.002, + "step": 119080 + }, + { + "epoch": 0.7638062680986297, + "grad_norm": 0.05562547594308853, + "learning_rate": 7.768373557128812e-06, + "loss": 0.0032, + "step": 119090 + }, + { + "epoch": 0.7638704049924158, + "grad_norm": 0.44265061616897583, + "learning_rate": 7.767907456292959e-06, + "loss": 0.0018, + "step": 119100 + }, + { + "epoch": 0.7639345418862019, + "grad_norm": 0.09151113778352737, + "learning_rate": 7.767441320773246e-06, + "loss": 0.0025, + "step": 119110 + }, + { + "epoch": 0.7639986787799881, + "grad_norm": 0.13599392771720886, + "learning_rate": 7.766975150575516e-06, + "loss": 0.0037, + "step": 119120 + }, + { + "epoch": 0.7640628156737741, + "grad_norm": 0.1208871528506279, + "learning_rate": 7.76650894570561e-06, + "loss": 0.0023, + "step": 119130 + }, + { + "epoch": 0.7641269525675602, + "grad_norm": 0.1041627898812294, + "learning_rate": 7.76604270616937e-06, + "loss": 0.0058, + "step": 119140 + }, + { + "epoch": 0.7641910894613463, + "grad_norm": 0.19829201698303223, + "learning_rate": 7.765576431972637e-06, + "loss": 0.0021, + "step": 119150 + }, + { + "epoch": 0.7642552263551324, + "grad_norm": 0.19033165276050568, + "learning_rate": 7.765110123121255e-06, + "loss": 0.0042, + "step": 119160 + }, + { + "epoch": 0.7643193632489185, + "grad_norm": 0.0724247470498085, + "learning_rate": 7.764643779621069e-06, + "loss": 0.0021, + "step": 119170 + }, + { + "epoch": 0.7643835001427046, + "grad_norm": 0.1512024700641632, + "learning_rate": 7.764177401477918e-06, + "loss": 0.0017, + "step": 119180 + }, + { + "epoch": 0.7644476370364907, + "grad_norm": 0.038425736129283905, + "learning_rate": 7.76371098869765e-06, + "loss": 0.0029, + "step": 119190 + }, + { + "epoch": 0.7645117739302768, + "grad_norm": 0.06435366719961166, + "learning_rate": 7.763244541286108e-06, + "loss": 0.0031, + "step": 119200 + }, + { + "epoch": 0.7645759108240628, + "grad_norm": 0.07712484896183014, + "learning_rate": 7.762778059249136e-06, + "loss": 0.0039, + "step": 119210 + }, + { + "epoch": 0.764640047717849, + "grad_norm": 0.0393238440155983, + "learning_rate": 7.762311542592579e-06, + "loss": 0.0014, + "step": 119220 + }, + { + "epoch": 0.764704184611635, + "grad_norm": 0.04551048204302788, + "learning_rate": 7.761844991322287e-06, + "loss": 0.0017, + "step": 119230 + }, + { + "epoch": 0.7647683215054212, + "grad_norm": 0.01426377147436142, + "learning_rate": 7.761378405444104e-06, + "loss": 0.0026, + "step": 119240 + }, + { + "epoch": 0.7648324583992072, + "grad_norm": 0.04326840117573738, + "learning_rate": 7.760911784963874e-06, + "loss": 0.0017, + "step": 119250 + }, + { + "epoch": 0.7648965952929934, + "grad_norm": 0.16590778529644012, + "learning_rate": 7.760445129887447e-06, + "loss": 0.0045, + "step": 119260 + }, + { + "epoch": 0.7649607321867795, + "grad_norm": 0.0777582973241806, + "learning_rate": 7.75997844022067e-06, + "loss": 0.0018, + "step": 119270 + }, + { + "epoch": 0.7650248690805656, + "grad_norm": 0.08656816184520721, + "learning_rate": 7.75951171596939e-06, + "loss": 0.0024, + "step": 119280 + }, + { + "epoch": 0.7650890059743517, + "grad_norm": 0.13881435990333557, + "learning_rate": 7.759044957139456e-06, + "loss": 0.0036, + "step": 119290 + }, + { + "epoch": 0.7651531428681377, + "grad_norm": 0.19447311758995056, + "learning_rate": 7.758578163736716e-06, + "loss": 0.0016, + "step": 119300 + }, + { + "epoch": 0.7652172797619239, + "grad_norm": 0.021220847964286804, + "learning_rate": 7.758111335767021e-06, + "loss": 0.0021, + "step": 119310 + }, + { + "epoch": 0.7652814166557099, + "grad_norm": 0.04552415758371353, + "learning_rate": 7.75764447323622e-06, + "loss": 0.0019, + "step": 119320 + }, + { + "epoch": 0.7653455535494961, + "grad_norm": 0.07058366388082504, + "learning_rate": 7.757177576150164e-06, + "loss": 0.0025, + "step": 119330 + }, + { + "epoch": 0.7654096904432821, + "grad_norm": 0.04425279423594475, + "learning_rate": 7.7567106445147e-06, + "loss": 0.0021, + "step": 119340 + }, + { + "epoch": 0.7654738273370683, + "grad_norm": 0.04987210035324097, + "learning_rate": 7.756243678335681e-06, + "loss": 0.0017, + "step": 119350 + }, + { + "epoch": 0.7655379642308543, + "grad_norm": 0.07812074571847916, + "learning_rate": 7.755776677618962e-06, + "loss": 0.0032, + "step": 119360 + }, + { + "epoch": 0.7656021011246404, + "grad_norm": 0.062241747975349426, + "learning_rate": 7.75530964237039e-06, + "loss": 0.0031, + "step": 119370 + }, + { + "epoch": 0.7656662380184265, + "grad_norm": 0.5053013563156128, + "learning_rate": 7.754842572595818e-06, + "loss": 0.0034, + "step": 119380 + }, + { + "epoch": 0.7657303749122126, + "grad_norm": 0.19220857322216034, + "learning_rate": 7.754375468301103e-06, + "loss": 0.0024, + "step": 119390 + }, + { + "epoch": 0.7657945118059988, + "grad_norm": 0.1082582101225853, + "learning_rate": 7.753908329492092e-06, + "loss": 0.0031, + "step": 119400 + }, + { + "epoch": 0.7658586486997848, + "grad_norm": 0.13472908735275269, + "learning_rate": 7.753441156174642e-06, + "loss": 0.002, + "step": 119410 + }, + { + "epoch": 0.765922785593571, + "grad_norm": 0.08649571239948273, + "learning_rate": 7.752973948354606e-06, + "loss": 0.0025, + "step": 119420 + }, + { + "epoch": 0.765986922487357, + "grad_norm": 0.1184421181678772, + "learning_rate": 7.752506706037839e-06, + "loss": 0.0019, + "step": 119430 + }, + { + "epoch": 0.7660510593811432, + "grad_norm": 0.15916390717029572, + "learning_rate": 7.752039429230197e-06, + "loss": 0.0044, + "step": 119440 + }, + { + "epoch": 0.7661151962749292, + "grad_norm": 0.051754746586084366, + "learning_rate": 7.751572117937534e-06, + "loss": 0.0031, + "step": 119450 + }, + { + "epoch": 0.7661793331687153, + "grad_norm": 0.04972861707210541, + "learning_rate": 7.751104772165704e-06, + "loss": 0.0017, + "step": 119460 + }, + { + "epoch": 0.7662434700625014, + "grad_norm": 0.08772239834070206, + "learning_rate": 7.750637391920567e-06, + "loss": 0.0015, + "step": 119470 + }, + { + "epoch": 0.7663076069562875, + "grad_norm": 0.18049634993076324, + "learning_rate": 7.750169977207977e-06, + "loss": 0.0022, + "step": 119480 + }, + { + "epoch": 0.7663717438500736, + "grad_norm": 0.46327391266822815, + "learning_rate": 7.749702528033792e-06, + "loss": 0.0018, + "step": 119490 + }, + { + "epoch": 0.7664358807438597, + "grad_norm": 0.031109677627682686, + "learning_rate": 7.749235044403869e-06, + "loss": 0.0032, + "step": 119500 + }, + { + "epoch": 0.7665000176376457, + "grad_norm": 0.052154701203107834, + "learning_rate": 7.748767526324065e-06, + "loss": 0.0027, + "step": 119510 + }, + { + "epoch": 0.7665641545314319, + "grad_norm": 0.06638770550489426, + "learning_rate": 7.74829997380024e-06, + "loss": 0.0032, + "step": 119520 + }, + { + "epoch": 0.7666282914252179, + "grad_norm": 0.3107570707798004, + "learning_rate": 7.747832386838253e-06, + "loss": 0.0023, + "step": 119530 + }, + { + "epoch": 0.7666924283190041, + "grad_norm": 0.10106756538152695, + "learning_rate": 7.74736476544396e-06, + "loss": 0.0034, + "step": 119540 + }, + { + "epoch": 0.7667565652127902, + "grad_norm": 0.10745019465684891, + "learning_rate": 7.746897109623226e-06, + "loss": 0.0031, + "step": 119550 + }, + { + "epoch": 0.7668207021065763, + "grad_norm": 0.21665680408477783, + "learning_rate": 7.746429419381906e-06, + "loss": 0.0032, + "step": 119560 + }, + { + "epoch": 0.7668848390003624, + "grad_norm": 0.3388836681842804, + "learning_rate": 7.745961694725863e-06, + "loss": 0.0025, + "step": 119570 + }, + { + "epoch": 0.7669489758941485, + "grad_norm": 0.1119726300239563, + "learning_rate": 7.745493935660957e-06, + "loss": 0.002, + "step": 119580 + }, + { + "epoch": 0.7670131127879346, + "grad_norm": 0.17020221054553986, + "learning_rate": 7.745026142193051e-06, + "loss": 0.0038, + "step": 119590 + }, + { + "epoch": 0.7670772496817206, + "grad_norm": 0.07487424463033676, + "learning_rate": 7.744558314328006e-06, + "loss": 0.0013, + "step": 119600 + }, + { + "epoch": 0.7671413865755068, + "grad_norm": 0.044883064925670624, + "learning_rate": 7.744090452071682e-06, + "loss": 0.0016, + "step": 119610 + }, + { + "epoch": 0.7672055234692928, + "grad_norm": 0.2747569680213928, + "learning_rate": 7.743622555429944e-06, + "loss": 0.0033, + "step": 119620 + }, + { + "epoch": 0.767269660363079, + "grad_norm": 0.22496001422405243, + "learning_rate": 7.743154624408655e-06, + "loss": 0.0055, + "step": 119630 + }, + { + "epoch": 0.767333797256865, + "grad_norm": 0.08470924943685532, + "learning_rate": 7.742686659013679e-06, + "loss": 0.0013, + "step": 119640 + }, + { + "epoch": 0.7673979341506512, + "grad_norm": 0.3732026517391205, + "learning_rate": 7.742218659250877e-06, + "loss": 0.0043, + "step": 119650 + }, + { + "epoch": 0.7674620710444372, + "grad_norm": 0.08522894233465195, + "learning_rate": 7.741750625126117e-06, + "loss": 0.003, + "step": 119660 + }, + { + "epoch": 0.7675262079382233, + "grad_norm": 0.038193341344594955, + "learning_rate": 7.74128255664526e-06, + "loss": 0.0017, + "step": 119670 + }, + { + "epoch": 0.7675903448320094, + "grad_norm": 0.09714947640895844, + "learning_rate": 7.740814453814177e-06, + "loss": 0.0033, + "step": 119680 + }, + { + "epoch": 0.7676544817257955, + "grad_norm": 0.13144101202487946, + "learning_rate": 7.740346316638727e-06, + "loss": 0.0034, + "step": 119690 + }, + { + "epoch": 0.7677186186195817, + "grad_norm": 0.06684640794992447, + "learning_rate": 7.739878145124779e-06, + "loss": 0.0024, + "step": 119700 + }, + { + "epoch": 0.7677827555133677, + "grad_norm": 0.0352996401488781, + "learning_rate": 7.7394099392782e-06, + "loss": 0.0027, + "step": 119710 + }, + { + "epoch": 0.7678468924071539, + "grad_norm": 0.1148243248462677, + "learning_rate": 7.738941699104859e-06, + "loss": 0.0018, + "step": 119720 + }, + { + "epoch": 0.7679110293009399, + "grad_norm": 0.10295584797859192, + "learning_rate": 7.738473424610618e-06, + "loss": 0.0033, + "step": 119730 + }, + { + "epoch": 0.767975166194726, + "grad_norm": 0.26023539900779724, + "learning_rate": 7.738005115801349e-06, + "loss": 0.0022, + "step": 119740 + }, + { + "epoch": 0.7680393030885121, + "grad_norm": 0.08188511431217194, + "learning_rate": 7.73753677268292e-06, + "loss": 0.0038, + "step": 119750 + }, + { + "epoch": 0.7681034399822982, + "grad_norm": 0.10123003274202347, + "learning_rate": 7.737068395261198e-06, + "loss": 0.004, + "step": 119760 + }, + { + "epoch": 0.7681675768760843, + "grad_norm": 0.14842653274536133, + "learning_rate": 7.736599983542053e-06, + "loss": 0.0032, + "step": 119770 + }, + { + "epoch": 0.7682317137698704, + "grad_norm": 0.03348490595817566, + "learning_rate": 7.736131537531354e-06, + "loss": 0.0029, + "step": 119780 + }, + { + "epoch": 0.7682958506636565, + "grad_norm": 0.22534605860710144, + "learning_rate": 7.735663057234972e-06, + "loss": 0.0027, + "step": 119790 + }, + { + "epoch": 0.7683599875574426, + "grad_norm": 0.1607537865638733, + "learning_rate": 7.735194542658776e-06, + "loss": 0.0019, + "step": 119800 + }, + { + "epoch": 0.7684241244512287, + "grad_norm": 0.1520402729511261, + "learning_rate": 7.734725993808638e-06, + "loss": 0.0053, + "step": 119810 + }, + { + "epoch": 0.7684882613450148, + "grad_norm": 0.18805626034736633, + "learning_rate": 7.734257410690429e-06, + "loss": 0.003, + "step": 119820 + }, + { + "epoch": 0.768552398238801, + "grad_norm": 0.2867084741592407, + "learning_rate": 7.733788793310019e-06, + "loss": 0.0022, + "step": 119830 + }, + { + "epoch": 0.768616535132587, + "grad_norm": 0.07489742338657379, + "learning_rate": 7.733320141673285e-06, + "loss": 0.0034, + "step": 119840 + }, + { + "epoch": 0.7686806720263731, + "grad_norm": 0.19532790780067444, + "learning_rate": 7.732851455786092e-06, + "loss": 0.003, + "step": 119850 + }, + { + "epoch": 0.7687448089201592, + "grad_norm": 0.06110703945159912, + "learning_rate": 7.73238273565432e-06, + "loss": 0.0047, + "step": 119860 + }, + { + "epoch": 0.7688089458139453, + "grad_norm": 0.057539068162441254, + "learning_rate": 7.731913981283838e-06, + "loss": 0.0035, + "step": 119870 + }, + { + "epoch": 0.7688730827077314, + "grad_norm": 0.12065283209085464, + "learning_rate": 7.731445192680522e-06, + "loss": 0.0025, + "step": 119880 + }, + { + "epoch": 0.7689372196015175, + "grad_norm": 0.20543505251407623, + "learning_rate": 7.730976369850245e-06, + "loss": 0.002, + "step": 119890 + }, + { + "epoch": 0.7690013564953035, + "grad_norm": 0.03451468423008919, + "learning_rate": 7.730507512798883e-06, + "loss": 0.0024, + "step": 119900 + }, + { + "epoch": 0.7690654933890897, + "grad_norm": 0.1505160927772522, + "learning_rate": 7.730038621532312e-06, + "loss": 0.0026, + "step": 119910 + }, + { + "epoch": 0.7691296302828757, + "grad_norm": 0.020788371562957764, + "learning_rate": 7.729569696056404e-06, + "loss": 0.0023, + "step": 119920 + }, + { + "epoch": 0.7691937671766619, + "grad_norm": 0.05163104459643364, + "learning_rate": 7.729100736377036e-06, + "loss": 0.0026, + "step": 119930 + }, + { + "epoch": 0.7692579040704479, + "grad_norm": 0.15215818583965302, + "learning_rate": 7.728631742500088e-06, + "loss": 0.0027, + "step": 119940 + }, + { + "epoch": 0.7693220409642341, + "grad_norm": 0.4038577377796173, + "learning_rate": 7.728162714431431e-06, + "loss": 0.0017, + "step": 119950 + }, + { + "epoch": 0.7693861778580201, + "grad_norm": 0.11586163938045502, + "learning_rate": 7.727693652176948e-06, + "loss": 0.0037, + "step": 119960 + }, + { + "epoch": 0.7694503147518063, + "grad_norm": 0.13993075489997864, + "learning_rate": 7.727224555742513e-06, + "loss": 0.0026, + "step": 119970 + }, + { + "epoch": 0.7695144516455924, + "grad_norm": 0.08906079828739166, + "learning_rate": 7.726755425134006e-06, + "loss": 0.0027, + "step": 119980 + }, + { + "epoch": 0.7695785885393784, + "grad_norm": 0.09948492795228958, + "learning_rate": 7.726286260357304e-06, + "loss": 0.0027, + "step": 119990 + }, + { + "epoch": 0.7696427254331646, + "grad_norm": 0.05504319444298744, + "learning_rate": 7.725817061418286e-06, + "loss": 0.0047, + "step": 120000 + }, + { + "epoch": 0.7697068623269506, + "grad_norm": 0.15614096820354462, + "learning_rate": 7.725347828322834e-06, + "loss": 0.0031, + "step": 120010 + }, + { + "epoch": 0.7697709992207368, + "grad_norm": 0.04980633407831192, + "learning_rate": 7.724878561076822e-06, + "loss": 0.0015, + "step": 120020 + }, + { + "epoch": 0.7698351361145228, + "grad_norm": 0.2578064799308777, + "learning_rate": 7.724409259686139e-06, + "loss": 0.0027, + "step": 120030 + }, + { + "epoch": 0.769899273008309, + "grad_norm": 0.08008996397256851, + "learning_rate": 7.723939924156658e-06, + "loss": 0.002, + "step": 120040 + }, + { + "epoch": 0.769963409902095, + "grad_norm": 0.2172783762216568, + "learning_rate": 7.723470554494264e-06, + "loss": 0.0043, + "step": 120050 + }, + { + "epoch": 0.7700275467958811, + "grad_norm": 0.09386609494686127, + "learning_rate": 7.723001150704837e-06, + "loss": 0.0046, + "step": 120060 + }, + { + "epoch": 0.7700916836896672, + "grad_norm": 0.1730341613292694, + "learning_rate": 7.722531712794262e-06, + "loss": 0.002, + "step": 120070 + }, + { + "epoch": 0.7701558205834533, + "grad_norm": 0.055997759103775024, + "learning_rate": 7.722062240768413e-06, + "loss": 0.0058, + "step": 120080 + }, + { + "epoch": 0.7702199574772394, + "grad_norm": 0.0635523796081543, + "learning_rate": 7.721592734633183e-06, + "loss": 0.0021, + "step": 120090 + }, + { + "epoch": 0.7702840943710255, + "grad_norm": 0.1634179949760437, + "learning_rate": 7.72112319439445e-06, + "loss": 0.0024, + "step": 120100 + }, + { + "epoch": 0.7703482312648117, + "grad_norm": 0.08533147722482681, + "learning_rate": 7.720653620058101e-06, + "loss": 0.002, + "step": 120110 + }, + { + "epoch": 0.7704123681585977, + "grad_norm": 0.2712162137031555, + "learning_rate": 7.720184011630014e-06, + "loss": 0.0045, + "step": 120120 + }, + { + "epoch": 0.7704765050523839, + "grad_norm": 0.08651968836784363, + "learning_rate": 7.71971436911608e-06, + "loss": 0.0033, + "step": 120130 + }, + { + "epoch": 0.7705406419461699, + "grad_norm": 0.07410996407270432, + "learning_rate": 7.71924469252218e-06, + "loss": 0.0038, + "step": 120140 + }, + { + "epoch": 0.770604778839956, + "grad_norm": 0.07435926795005798, + "learning_rate": 7.7187749818542e-06, + "loss": 0.0032, + "step": 120150 + }, + { + "epoch": 0.7706689157337421, + "grad_norm": 0.33352407813072205, + "learning_rate": 7.718305237118028e-06, + "loss": 0.0034, + "step": 120160 + }, + { + "epoch": 0.7707330526275282, + "grad_norm": 0.1434878557920456, + "learning_rate": 7.717835458319546e-06, + "loss": 0.0031, + "step": 120170 + }, + { + "epoch": 0.7707971895213143, + "grad_norm": 0.19026531279087067, + "learning_rate": 7.717365645464646e-06, + "loss": 0.0021, + "step": 120180 + }, + { + "epoch": 0.7708613264151004, + "grad_norm": 0.1479296088218689, + "learning_rate": 7.716895798559212e-06, + "loss": 0.0019, + "step": 120190 + }, + { + "epoch": 0.7709254633088864, + "grad_norm": 0.1592334359884262, + "learning_rate": 7.716425917609131e-06, + "loss": 0.0021, + "step": 120200 + }, + { + "epoch": 0.7709896002026726, + "grad_norm": 0.24233010411262512, + "learning_rate": 7.715956002620293e-06, + "loss": 0.0028, + "step": 120210 + }, + { + "epoch": 0.7710537370964586, + "grad_norm": 0.11257760226726532, + "learning_rate": 7.715486053598584e-06, + "loss": 0.0022, + "step": 120220 + }, + { + "epoch": 0.7711178739902448, + "grad_norm": 0.11522355675697327, + "learning_rate": 7.715016070549895e-06, + "loss": 0.0032, + "step": 120230 + }, + { + "epoch": 0.7711820108840308, + "grad_norm": 0.1420866847038269, + "learning_rate": 7.714546053480111e-06, + "loss": 0.0022, + "step": 120240 + }, + { + "epoch": 0.771246147777817, + "grad_norm": 0.1342962086200714, + "learning_rate": 7.71407600239513e-06, + "loss": 0.0015, + "step": 120250 + }, + { + "epoch": 0.7713102846716031, + "grad_norm": 0.06081683188676834, + "learning_rate": 7.713605917300834e-06, + "loss": 0.0026, + "step": 120260 + }, + { + "epoch": 0.7713744215653892, + "grad_norm": 0.1337926685810089, + "learning_rate": 7.713135798203117e-06, + "loss": 0.0065, + "step": 120270 + }, + { + "epoch": 0.7714385584591753, + "grad_norm": 0.23072844743728638, + "learning_rate": 7.71266564510787e-06, + "loss": 0.003, + "step": 120280 + }, + { + "epoch": 0.7715026953529613, + "grad_norm": 0.12262122333049774, + "learning_rate": 7.712195458020983e-06, + "loss": 0.0022, + "step": 120290 + }, + { + "epoch": 0.7715668322467475, + "grad_norm": 0.1738017052412033, + "learning_rate": 7.711725236948349e-06, + "loss": 0.002, + "step": 120300 + }, + { + "epoch": 0.7716309691405335, + "grad_norm": 0.31155627965927124, + "learning_rate": 7.711254981895859e-06, + "loss": 0.0018, + "step": 120310 + }, + { + "epoch": 0.7716951060343197, + "grad_norm": 0.05566750839352608, + "learning_rate": 7.710784692869406e-06, + "loss": 0.003, + "step": 120320 + }, + { + "epoch": 0.7717592429281057, + "grad_norm": 0.20950689911842346, + "learning_rate": 7.710314369874885e-06, + "loss": 0.0028, + "step": 120330 + }, + { + "epoch": 0.7718233798218919, + "grad_norm": 0.15361624956130981, + "learning_rate": 7.709844012918187e-06, + "loss": 0.0014, + "step": 120340 + }, + { + "epoch": 0.7718875167156779, + "grad_norm": 0.05247703939676285, + "learning_rate": 7.709373622005208e-06, + "loss": 0.0021, + "step": 120350 + }, + { + "epoch": 0.771951653609464, + "grad_norm": 0.09840486198663712, + "learning_rate": 7.70890319714184e-06, + "loss": 0.0023, + "step": 120360 + }, + { + "epoch": 0.7720157905032501, + "grad_norm": 0.10478629171848297, + "learning_rate": 7.708432738333978e-06, + "loss": 0.0023, + "step": 120370 + }, + { + "epoch": 0.7720799273970362, + "grad_norm": 0.14441901445388794, + "learning_rate": 7.707962245587519e-06, + "loss": 0.0024, + "step": 120380 + }, + { + "epoch": 0.7721440642908224, + "grad_norm": 0.0552758127450943, + "learning_rate": 7.707491718908357e-06, + "loss": 0.0024, + "step": 120390 + }, + { + "epoch": 0.7722082011846084, + "grad_norm": 0.11776135116815567, + "learning_rate": 7.70702115830239e-06, + "loss": 0.0032, + "step": 120400 + }, + { + "epoch": 0.7722723380783946, + "grad_norm": 0.08400656282901764, + "learning_rate": 7.706550563775514e-06, + "loss": 0.0023, + "step": 120410 + }, + { + "epoch": 0.7723364749721806, + "grad_norm": 0.23327843844890594, + "learning_rate": 7.706079935333623e-06, + "loss": 0.0035, + "step": 120420 + }, + { + "epoch": 0.7724006118659668, + "grad_norm": 0.13265545666217804, + "learning_rate": 7.705609272982618e-06, + "loss": 0.0024, + "step": 120430 + }, + { + "epoch": 0.7724647487597528, + "grad_norm": 0.2691594362258911, + "learning_rate": 7.705138576728395e-06, + "loss": 0.0029, + "step": 120440 + }, + { + "epoch": 0.7725288856535389, + "grad_norm": 0.12558777630329132, + "learning_rate": 7.704667846576851e-06, + "loss": 0.0043, + "step": 120450 + }, + { + "epoch": 0.772593022547325, + "grad_norm": 0.1107628121972084, + "learning_rate": 7.704197082533886e-06, + "loss": 0.0028, + "step": 120460 + }, + { + "epoch": 0.7726571594411111, + "grad_norm": 0.125325545668602, + "learning_rate": 7.7037262846054e-06, + "loss": 0.0029, + "step": 120470 + }, + { + "epoch": 0.7727212963348972, + "grad_norm": 0.3176846504211426, + "learning_rate": 7.70325545279729e-06, + "loss": 0.004, + "step": 120480 + }, + { + "epoch": 0.7727854332286833, + "grad_norm": 0.02290859818458557, + "learning_rate": 7.702784587115458e-06, + "loss": 0.0029, + "step": 120490 + }, + { + "epoch": 0.7728495701224694, + "grad_norm": 0.14008145034313202, + "learning_rate": 7.702313687565803e-06, + "loss": 0.0023, + "step": 120500 + }, + { + "epoch": 0.7729137070162555, + "grad_norm": 0.04517088830471039, + "learning_rate": 7.701842754154227e-06, + "loss": 0.0017, + "step": 120510 + }, + { + "epoch": 0.7729778439100415, + "grad_norm": 0.004795001354068518, + "learning_rate": 7.701371786886631e-06, + "loss": 0.0025, + "step": 120520 + }, + { + "epoch": 0.7730419808038277, + "grad_norm": 0.11474525183439255, + "learning_rate": 7.700900785768914e-06, + "loss": 0.0018, + "step": 120530 + }, + { + "epoch": 0.7731061176976138, + "grad_norm": 0.18473678827285767, + "learning_rate": 7.700429750806982e-06, + "loss": 0.0026, + "step": 120540 + }, + { + "epoch": 0.7731702545913999, + "grad_norm": 0.1098489761352539, + "learning_rate": 7.699958682006734e-06, + "loss": 0.0016, + "step": 120550 + }, + { + "epoch": 0.773234391485186, + "grad_norm": 0.12431825697422028, + "learning_rate": 7.699487579374074e-06, + "loss": 0.0031, + "step": 120560 + }, + { + "epoch": 0.7732985283789721, + "grad_norm": 0.14041979610919952, + "learning_rate": 7.699016442914904e-06, + "loss": 0.0017, + "step": 120570 + }, + { + "epoch": 0.7733626652727582, + "grad_norm": 0.12669505178928375, + "learning_rate": 7.698545272635132e-06, + "loss": 0.0035, + "step": 120580 + }, + { + "epoch": 0.7734268021665442, + "grad_norm": 0.0775560587644577, + "learning_rate": 7.698074068540656e-06, + "loss": 0.0013, + "step": 120590 + }, + { + "epoch": 0.7734909390603304, + "grad_norm": 0.0368109866976738, + "learning_rate": 7.697602830637386e-06, + "loss": 0.0015, + "step": 120600 + }, + { + "epoch": 0.7735550759541164, + "grad_norm": 0.15224739909172058, + "learning_rate": 7.697131558931224e-06, + "loss": 0.0033, + "step": 120610 + }, + { + "epoch": 0.7736192128479026, + "grad_norm": 0.0786733403801918, + "learning_rate": 7.696660253428076e-06, + "loss": 0.0028, + "step": 120620 + }, + { + "epoch": 0.7736833497416886, + "grad_norm": 0.16595987975597382, + "learning_rate": 7.696188914133847e-06, + "loss": 0.0028, + "step": 120630 + }, + { + "epoch": 0.7737474866354748, + "grad_norm": 0.04862990230321884, + "learning_rate": 7.695717541054445e-06, + "loss": 0.0029, + "step": 120640 + }, + { + "epoch": 0.7738116235292608, + "grad_norm": 0.09698370099067688, + "learning_rate": 7.695246134195773e-06, + "loss": 0.0017, + "step": 120650 + }, + { + "epoch": 0.773875760423047, + "grad_norm": 0.10276313871145248, + "learning_rate": 7.694774693563744e-06, + "loss": 0.0022, + "step": 120660 + }, + { + "epoch": 0.7739398973168331, + "grad_norm": 0.256753534078598, + "learning_rate": 7.694303219164261e-06, + "loss": 0.0034, + "step": 120670 + }, + { + "epoch": 0.7740040342106191, + "grad_norm": 0.09404375404119492, + "learning_rate": 7.693831711003233e-06, + "loss": 0.0028, + "step": 120680 + }, + { + "epoch": 0.7740681711044053, + "grad_norm": 0.3127039074897766, + "learning_rate": 7.693360169086567e-06, + "loss": 0.0037, + "step": 120690 + }, + { + "epoch": 0.7741323079981913, + "grad_norm": 0.2723565101623535, + "learning_rate": 7.692888593420176e-06, + "loss": 0.004, + "step": 120700 + }, + { + "epoch": 0.7741964448919775, + "grad_norm": 0.1416441649198532, + "learning_rate": 7.692416984009965e-06, + "loss": 0.0027, + "step": 120710 + }, + { + "epoch": 0.7742605817857635, + "grad_norm": 0.048688746988773346, + "learning_rate": 7.691945340861843e-06, + "loss": 0.0037, + "step": 120720 + }, + { + "epoch": 0.7743247186795497, + "grad_norm": 0.24894799292087555, + "learning_rate": 7.691473663981726e-06, + "loss": 0.0019, + "step": 120730 + }, + { + "epoch": 0.7743888555733357, + "grad_norm": 0.07757014036178589, + "learning_rate": 7.691001953375517e-06, + "loss": 0.0033, + "step": 120740 + }, + { + "epoch": 0.7744529924671218, + "grad_norm": 0.22163419425487518, + "learning_rate": 7.690530209049131e-06, + "loss": 0.003, + "step": 120750 + }, + { + "epoch": 0.7745171293609079, + "grad_norm": 0.09050728380680084, + "learning_rate": 7.69005843100848e-06, + "loss": 0.0068, + "step": 120760 + }, + { + "epoch": 0.774581266254694, + "grad_norm": 0.2518390417098999, + "learning_rate": 7.689586619259474e-06, + "loss": 0.0031, + "step": 120770 + }, + { + "epoch": 0.7746454031484801, + "grad_norm": 0.0962381511926651, + "learning_rate": 7.689114773808024e-06, + "loss": 0.0023, + "step": 120780 + }, + { + "epoch": 0.7747095400422662, + "grad_norm": 0.08857965469360352, + "learning_rate": 7.688642894660044e-06, + "loss": 0.0046, + "step": 120790 + }, + { + "epoch": 0.7747736769360523, + "grad_norm": 0.1404399424791336, + "learning_rate": 7.68817098182145e-06, + "loss": 0.0019, + "step": 120800 + }, + { + "epoch": 0.7748378138298384, + "grad_norm": 0.13727347552776337, + "learning_rate": 7.687699035298148e-06, + "loss": 0.0021, + "step": 120810 + }, + { + "epoch": 0.7749019507236246, + "grad_norm": 0.036177147179841995, + "learning_rate": 7.68722705509606e-06, + "loss": 0.0042, + "step": 120820 + }, + { + "epoch": 0.7749660876174106, + "grad_norm": 0.06073131412267685, + "learning_rate": 7.686755041221095e-06, + "loss": 0.0026, + "step": 120830 + }, + { + "epoch": 0.7750302245111967, + "grad_norm": 0.07920609414577484, + "learning_rate": 7.686282993679169e-06, + "loss": 0.0019, + "step": 120840 + }, + { + "epoch": 0.7750943614049828, + "grad_norm": 0.18040022253990173, + "learning_rate": 7.685810912476194e-06, + "loss": 0.0026, + "step": 120850 + }, + { + "epoch": 0.7751584982987689, + "grad_norm": 0.19039228558540344, + "learning_rate": 7.685338797618093e-06, + "loss": 0.0018, + "step": 120860 + }, + { + "epoch": 0.775222635192555, + "grad_norm": 0.04722949117422104, + "learning_rate": 7.684866649110779e-06, + "loss": 0.0019, + "step": 120870 + }, + { + "epoch": 0.7752867720863411, + "grad_norm": 0.13327927887439728, + "learning_rate": 7.684394466960164e-06, + "loss": 0.0032, + "step": 120880 + }, + { + "epoch": 0.7753509089801272, + "grad_norm": 0.10673610866069794, + "learning_rate": 7.683922251172169e-06, + "loss": 0.0024, + "step": 120890 + }, + { + "epoch": 0.7754150458739133, + "grad_norm": 0.18592707812786102, + "learning_rate": 7.683450001752708e-06, + "loss": 0.0033, + "step": 120900 + }, + { + "epoch": 0.7754791827676993, + "grad_norm": 0.1468019187450409, + "learning_rate": 7.682977718707703e-06, + "loss": 0.0018, + "step": 120910 + }, + { + "epoch": 0.7755433196614855, + "grad_norm": 0.11070624738931656, + "learning_rate": 7.682505402043069e-06, + "loss": 0.0026, + "step": 120920 + }, + { + "epoch": 0.7756074565552715, + "grad_norm": 0.014107703231275082, + "learning_rate": 7.682033051764725e-06, + "loss": 0.0017, + "step": 120930 + }, + { + "epoch": 0.7756715934490577, + "grad_norm": 0.24959653615951538, + "learning_rate": 7.681560667878591e-06, + "loss": 0.0025, + "step": 120940 + }, + { + "epoch": 0.7757357303428438, + "grad_norm": 0.061088040471076965, + "learning_rate": 7.681088250390583e-06, + "loss": 0.0016, + "step": 120950 + }, + { + "epoch": 0.7757998672366299, + "grad_norm": 0.2523617148399353, + "learning_rate": 7.680615799306625e-06, + "loss": 0.0034, + "step": 120960 + }, + { + "epoch": 0.775864004130416, + "grad_norm": 0.10077609121799469, + "learning_rate": 7.680143314632635e-06, + "loss": 0.0027, + "step": 120970 + }, + { + "epoch": 0.775928141024202, + "grad_norm": 0.23623411357402802, + "learning_rate": 7.679670796374534e-06, + "loss": 0.002, + "step": 120980 + }, + { + "epoch": 0.7759922779179882, + "grad_norm": 0.20702892541885376, + "learning_rate": 7.679198244538241e-06, + "loss": 0.0021, + "step": 120990 + }, + { + "epoch": 0.7760564148117742, + "grad_norm": 0.1016428992152214, + "learning_rate": 7.67872565912968e-06, + "loss": 0.003, + "step": 121000 + }, + { + "epoch": 0.7761205517055604, + "grad_norm": 0.23183010518550873, + "learning_rate": 7.678253040154775e-06, + "loss": 0.0042, + "step": 121010 + }, + { + "epoch": 0.7761846885993464, + "grad_norm": 0.11860893666744232, + "learning_rate": 7.677780387619443e-06, + "loss": 0.0026, + "step": 121020 + }, + { + "epoch": 0.7762488254931326, + "grad_norm": 0.10921378433704376, + "learning_rate": 7.677307701529608e-06, + "loss": 0.0023, + "step": 121030 + }, + { + "epoch": 0.7763129623869186, + "grad_norm": 0.018968788906931877, + "learning_rate": 7.676834981891194e-06, + "loss": 0.001, + "step": 121040 + }, + { + "epoch": 0.7763770992807048, + "grad_norm": 0.14218148589134216, + "learning_rate": 7.676362228710125e-06, + "loss": 0.0027, + "step": 121050 + }, + { + "epoch": 0.7764412361744908, + "grad_norm": 0.11908269673585892, + "learning_rate": 7.675889441992326e-06, + "loss": 0.0032, + "step": 121060 + }, + { + "epoch": 0.7765053730682769, + "grad_norm": 0.18243275582790375, + "learning_rate": 7.675416621743718e-06, + "loss": 0.0056, + "step": 121070 + }, + { + "epoch": 0.776569509962063, + "grad_norm": 0.10393267124891281, + "learning_rate": 7.674943767970229e-06, + "loss": 0.0018, + "step": 121080 + }, + { + "epoch": 0.7766336468558491, + "grad_norm": 0.2885073125362396, + "learning_rate": 7.674470880677784e-06, + "loss": 0.0058, + "step": 121090 + }, + { + "epoch": 0.7766977837496353, + "grad_norm": 0.21264208853244781, + "learning_rate": 7.673997959872305e-06, + "loss": 0.0016, + "step": 121100 + }, + { + "epoch": 0.7767619206434213, + "grad_norm": 0.1224047988653183, + "learning_rate": 7.673525005559721e-06, + "loss": 0.0028, + "step": 121110 + }, + { + "epoch": 0.7768260575372075, + "grad_norm": 0.19190648198127747, + "learning_rate": 7.673052017745958e-06, + "loss": 0.0073, + "step": 121120 + }, + { + "epoch": 0.7768901944309935, + "grad_norm": 0.08077745884656906, + "learning_rate": 7.672578996436943e-06, + "loss": 0.0035, + "step": 121130 + }, + { + "epoch": 0.7769543313247796, + "grad_norm": 0.2599153518676758, + "learning_rate": 7.672105941638604e-06, + "loss": 0.0052, + "step": 121140 + }, + { + "epoch": 0.7770184682185657, + "grad_norm": 0.10700134187936783, + "learning_rate": 7.671632853356865e-06, + "loss": 0.0023, + "step": 121150 + }, + { + "epoch": 0.7770826051123518, + "grad_norm": 0.04627247527241707, + "learning_rate": 7.67115973159766e-06, + "loss": 0.004, + "step": 121160 + }, + { + "epoch": 0.7771467420061379, + "grad_norm": 0.11000658571720123, + "learning_rate": 7.670686576366912e-06, + "loss": 0.002, + "step": 121170 + }, + { + "epoch": 0.777210878899924, + "grad_norm": 0.2015426605939865, + "learning_rate": 7.670213387670555e-06, + "loss": 0.0028, + "step": 121180 + }, + { + "epoch": 0.7772750157937101, + "grad_norm": 0.07996021211147308, + "learning_rate": 7.669740165514514e-06, + "loss": 0.0015, + "step": 121190 + }, + { + "epoch": 0.7773391526874962, + "grad_norm": 0.15272463858127594, + "learning_rate": 7.669266909904722e-06, + "loss": 0.0016, + "step": 121200 + }, + { + "epoch": 0.7774032895812822, + "grad_norm": 0.19971288740634918, + "learning_rate": 7.668793620847108e-06, + "loss": 0.0018, + "step": 121210 + }, + { + "epoch": 0.7774674264750684, + "grad_norm": 0.28225573897361755, + "learning_rate": 7.668320298347602e-06, + "loss": 0.0016, + "step": 121220 + }, + { + "epoch": 0.7775315633688544, + "grad_norm": 0.03997465595602989, + "learning_rate": 7.667846942412136e-06, + "loss": 0.0036, + "step": 121230 + }, + { + "epoch": 0.7775957002626406, + "grad_norm": 0.1938021183013916, + "learning_rate": 7.667373553046639e-06, + "loss": 0.0039, + "step": 121240 + }, + { + "epoch": 0.7776598371564267, + "grad_norm": 0.18448276817798615, + "learning_rate": 7.666900130257046e-06, + "loss": 0.0036, + "step": 121250 + }, + { + "epoch": 0.7777239740502128, + "grad_norm": 0.02420881949365139, + "learning_rate": 7.666426674049291e-06, + "loss": 0.003, + "step": 121260 + }, + { + "epoch": 0.7777881109439989, + "grad_norm": 0.04524382948875427, + "learning_rate": 7.665953184429302e-06, + "loss": 0.0016, + "step": 121270 + }, + { + "epoch": 0.777852247837785, + "grad_norm": 0.08701635897159576, + "learning_rate": 7.665479661403014e-06, + "loss": 0.0026, + "step": 121280 + }, + { + "epoch": 0.7779163847315711, + "grad_norm": 0.14590567350387573, + "learning_rate": 7.665006104976363e-06, + "loss": 0.0034, + "step": 121290 + }, + { + "epoch": 0.7779805216253571, + "grad_norm": 0.21900488436222076, + "learning_rate": 7.66453251515528e-06, + "loss": 0.0024, + "step": 121300 + }, + { + "epoch": 0.7780446585191433, + "grad_norm": 0.07465245574712753, + "learning_rate": 7.664058891945699e-06, + "loss": 0.0039, + "step": 121310 + }, + { + "epoch": 0.7781087954129293, + "grad_norm": 0.20187869668006897, + "learning_rate": 7.663585235353555e-06, + "loss": 0.0025, + "step": 121320 + }, + { + "epoch": 0.7781729323067155, + "grad_norm": 0.06113561615347862, + "learning_rate": 7.663111545384787e-06, + "loss": 0.0035, + "step": 121330 + }, + { + "epoch": 0.7782370692005015, + "grad_norm": 0.2926420271396637, + "learning_rate": 7.662637822045326e-06, + "loss": 0.0032, + "step": 121340 + }, + { + "epoch": 0.7783012060942877, + "grad_norm": 0.10521459579467773, + "learning_rate": 7.662164065341112e-06, + "loss": 0.003, + "step": 121350 + }, + { + "epoch": 0.7783653429880737, + "grad_norm": 0.037617068737745285, + "learning_rate": 7.661690275278077e-06, + "loss": 0.0023, + "step": 121360 + }, + { + "epoch": 0.7784294798818598, + "grad_norm": 0.10023058205842972, + "learning_rate": 7.661216451862163e-06, + "loss": 0.0018, + "step": 121370 + }, + { + "epoch": 0.778493616775646, + "grad_norm": 0.1474991738796234, + "learning_rate": 7.660742595099303e-06, + "loss": 0.0016, + "step": 121380 + }, + { + "epoch": 0.778557753669432, + "grad_norm": 0.0758986547589302, + "learning_rate": 7.66026870499544e-06, + "loss": 0.0048, + "step": 121390 + }, + { + "epoch": 0.7786218905632182, + "grad_norm": 0.199615478515625, + "learning_rate": 7.659794781556507e-06, + "loss": 0.0022, + "step": 121400 + }, + { + "epoch": 0.7786860274570042, + "grad_norm": 0.19785688817501068, + "learning_rate": 7.659320824788443e-06, + "loss": 0.0033, + "step": 121410 + }, + { + "epoch": 0.7787501643507904, + "grad_norm": 0.17966535687446594, + "learning_rate": 7.65884683469719e-06, + "loss": 0.0034, + "step": 121420 + }, + { + "epoch": 0.7788143012445764, + "grad_norm": 0.1133250966668129, + "learning_rate": 7.658372811288687e-06, + "loss": 0.0027, + "step": 121430 + }, + { + "epoch": 0.7788784381383625, + "grad_norm": 0.13527756929397583, + "learning_rate": 7.65789875456887e-06, + "loss": 0.0019, + "step": 121440 + }, + { + "epoch": 0.7789425750321486, + "grad_norm": 0.033486563712358475, + "learning_rate": 7.657424664543684e-06, + "loss": 0.0026, + "step": 121450 + }, + { + "epoch": 0.7790067119259347, + "grad_norm": 0.3420703113079071, + "learning_rate": 7.656950541219069e-06, + "loss": 0.0023, + "step": 121460 + }, + { + "epoch": 0.7790708488197208, + "grad_norm": 0.16449497640132904, + "learning_rate": 7.65647638460096e-06, + "loss": 0.0029, + "step": 121470 + }, + { + "epoch": 0.7791349857135069, + "grad_norm": 0.07003653794527054, + "learning_rate": 7.656002194695308e-06, + "loss": 0.0026, + "step": 121480 + }, + { + "epoch": 0.779199122607293, + "grad_norm": 0.11345074325799942, + "learning_rate": 7.655527971508048e-06, + "loss": 0.0037, + "step": 121490 + }, + { + "epoch": 0.7792632595010791, + "grad_norm": 0.1076662465929985, + "learning_rate": 7.655053715045126e-06, + "loss": 0.0024, + "step": 121500 + }, + { + "epoch": 0.7793273963948651, + "grad_norm": 0.07580321282148361, + "learning_rate": 7.654579425312482e-06, + "loss": 0.0014, + "step": 121510 + }, + { + "epoch": 0.7793915332886513, + "grad_norm": 0.04696830362081528, + "learning_rate": 7.654105102316063e-06, + "loss": 0.0055, + "step": 121520 + }, + { + "epoch": 0.7794556701824374, + "grad_norm": 0.060965411365032196, + "learning_rate": 7.653630746061807e-06, + "loss": 0.003, + "step": 121530 + }, + { + "epoch": 0.7795198070762235, + "grad_norm": 0.03723333030939102, + "learning_rate": 7.653156356555662e-06, + "loss": 0.0028, + "step": 121540 + }, + { + "epoch": 0.7795839439700096, + "grad_norm": 0.13844603300094604, + "learning_rate": 7.652681933803573e-06, + "loss": 0.0031, + "step": 121550 + }, + { + "epoch": 0.7796480808637957, + "grad_norm": 0.1367502212524414, + "learning_rate": 7.652207477811484e-06, + "loss": 0.0019, + "step": 121560 + }, + { + "epoch": 0.7797122177575818, + "grad_norm": 0.13187959790229797, + "learning_rate": 7.651732988585338e-06, + "loss": 0.0022, + "step": 121570 + }, + { + "epoch": 0.7797763546513679, + "grad_norm": 0.13078394532203674, + "learning_rate": 7.651258466131083e-06, + "loss": 0.0024, + "step": 121580 + }, + { + "epoch": 0.779840491545154, + "grad_norm": 0.1649218499660492, + "learning_rate": 7.650783910454666e-06, + "loss": 0.0022, + "step": 121590 + }, + { + "epoch": 0.77990462843894, + "grad_norm": 0.09971890598535538, + "learning_rate": 7.65030932156203e-06, + "loss": 0.0026, + "step": 121600 + }, + { + "epoch": 0.7799687653327262, + "grad_norm": 0.03387978672981262, + "learning_rate": 7.649834699459124e-06, + "loss": 0.002, + "step": 121610 + }, + { + "epoch": 0.7800329022265122, + "grad_norm": 0.011331469751894474, + "learning_rate": 7.649360044151896e-06, + "loss": 0.0023, + "step": 121620 + }, + { + "epoch": 0.7800970391202984, + "grad_norm": 0.3146549165248871, + "learning_rate": 7.648885355646295e-06, + "loss": 0.0028, + "step": 121630 + }, + { + "epoch": 0.7801611760140844, + "grad_norm": 0.06297134608030319, + "learning_rate": 7.648410633948265e-06, + "loss": 0.0017, + "step": 121640 + }, + { + "epoch": 0.7802253129078706, + "grad_norm": 0.3688918948173523, + "learning_rate": 7.647935879063758e-06, + "loss": 0.0021, + "step": 121650 + }, + { + "epoch": 0.7802894498016567, + "grad_norm": 0.07675735652446747, + "learning_rate": 7.647461090998722e-06, + "loss": 0.0049, + "step": 121660 + }, + { + "epoch": 0.7803535866954427, + "grad_norm": 0.1179722249507904, + "learning_rate": 7.646986269759107e-06, + "loss": 0.0032, + "step": 121670 + }, + { + "epoch": 0.7804177235892289, + "grad_norm": 0.0887451022863388, + "learning_rate": 7.646511415350861e-06, + "loss": 0.0041, + "step": 121680 + }, + { + "epoch": 0.7804818604830149, + "grad_norm": 0.12331262975931168, + "learning_rate": 7.646036527779937e-06, + "loss": 0.0022, + "step": 121690 + }, + { + "epoch": 0.7805459973768011, + "grad_norm": 0.10870742052793503, + "learning_rate": 7.645561607052283e-06, + "loss": 0.003, + "step": 121700 + }, + { + "epoch": 0.7806101342705871, + "grad_norm": 0.10599672049283981, + "learning_rate": 7.64508665317385e-06, + "loss": 0.0027, + "step": 121710 + }, + { + "epoch": 0.7806742711643733, + "grad_norm": 0.12671251595020294, + "learning_rate": 7.644611666150593e-06, + "loss": 0.0045, + "step": 121720 + }, + { + "epoch": 0.7807384080581593, + "grad_norm": 0.18857617676258087, + "learning_rate": 7.644136645988463e-06, + "loss": 0.0022, + "step": 121730 + }, + { + "epoch": 0.7808025449519455, + "grad_norm": 0.08521877974271774, + "learning_rate": 7.643661592693408e-06, + "loss": 0.0025, + "step": 121740 + }, + { + "epoch": 0.7808666818457315, + "grad_norm": 0.10013040155172348, + "learning_rate": 7.643186506271386e-06, + "loss": 0.0015, + "step": 121750 + }, + { + "epoch": 0.7809308187395176, + "grad_norm": 0.14211426675319672, + "learning_rate": 7.642711386728346e-06, + "loss": 0.0015, + "step": 121760 + }, + { + "epoch": 0.7809949556333037, + "grad_norm": 0.05439943075180054, + "learning_rate": 7.642236234070246e-06, + "loss": 0.0058, + "step": 121770 + }, + { + "epoch": 0.7810590925270898, + "grad_norm": 0.11683088541030884, + "learning_rate": 7.641761048303037e-06, + "loss": 0.0027, + "step": 121780 + }, + { + "epoch": 0.7811232294208759, + "grad_norm": 0.023444602265954018, + "learning_rate": 7.641285829432671e-06, + "loss": 0.0057, + "step": 121790 + }, + { + "epoch": 0.781187366314662, + "grad_norm": 0.1272461712360382, + "learning_rate": 7.64081057746511e-06, + "loss": 0.0028, + "step": 121800 + }, + { + "epoch": 0.7812515032084482, + "grad_norm": 0.04274454340338707, + "learning_rate": 7.640335292406303e-06, + "loss": 0.0043, + "step": 121810 + }, + { + "epoch": 0.7813156401022342, + "grad_norm": 0.1526411920785904, + "learning_rate": 7.639859974262208e-06, + "loss": 0.0028, + "step": 121820 + }, + { + "epoch": 0.7813797769960203, + "grad_norm": 0.2035612016916275, + "learning_rate": 7.639384623038782e-06, + "loss": 0.0097, + "step": 121830 + }, + { + "epoch": 0.7814439138898064, + "grad_norm": 0.07125157862901688, + "learning_rate": 7.638909238741978e-06, + "loss": 0.003, + "step": 121840 + }, + { + "epoch": 0.7815080507835925, + "grad_norm": 0.07473345100879669, + "learning_rate": 7.638433821377756e-06, + "loss": 0.0015, + "step": 121850 + }, + { + "epoch": 0.7815721876773786, + "grad_norm": 0.12324604392051697, + "learning_rate": 7.637958370952074e-06, + "loss": 0.0018, + "step": 121860 + }, + { + "epoch": 0.7816363245711647, + "grad_norm": 0.08533834666013718, + "learning_rate": 7.637482887470886e-06, + "loss": 0.0026, + "step": 121870 + }, + { + "epoch": 0.7817004614649508, + "grad_norm": 0.12150117009878159, + "learning_rate": 7.637007370940155e-06, + "loss": 0.0029, + "step": 121880 + }, + { + "epoch": 0.7817645983587369, + "grad_norm": 0.09350825101137161, + "learning_rate": 7.636531821365835e-06, + "loss": 0.0025, + "step": 121890 + }, + { + "epoch": 0.7818287352525229, + "grad_norm": 0.024046439677476883, + "learning_rate": 7.636056238753888e-06, + "loss": 0.0012, + "step": 121900 + }, + { + "epoch": 0.7818928721463091, + "grad_norm": 0.23442338407039642, + "learning_rate": 7.635580623110273e-06, + "loss": 0.0039, + "step": 121910 + }, + { + "epoch": 0.7819570090400951, + "grad_norm": 0.08703475445508957, + "learning_rate": 7.635104974440948e-06, + "loss": 0.0032, + "step": 121920 + }, + { + "epoch": 0.7820211459338813, + "grad_norm": 0.19465945661067963, + "learning_rate": 7.634629292751874e-06, + "loss": 0.0021, + "step": 121930 + }, + { + "epoch": 0.7820852828276674, + "grad_norm": 0.09618143737316132, + "learning_rate": 7.634153578049014e-06, + "loss": 0.0045, + "step": 121940 + }, + { + "epoch": 0.7821494197214535, + "grad_norm": 0.004982649814337492, + "learning_rate": 7.633677830338326e-06, + "loss": 0.0036, + "step": 121950 + }, + { + "epoch": 0.7822135566152396, + "grad_norm": 0.15258103609085083, + "learning_rate": 7.633202049625772e-06, + "loss": 0.0037, + "step": 121960 + }, + { + "epoch": 0.7822776935090257, + "grad_norm": 0.03397071361541748, + "learning_rate": 7.632726235917314e-06, + "loss": 0.0035, + "step": 121970 + }, + { + "epoch": 0.7823418304028118, + "grad_norm": 0.14839141070842743, + "learning_rate": 7.632250389218917e-06, + "loss": 0.0031, + "step": 121980 + }, + { + "epoch": 0.7824059672965978, + "grad_norm": 0.15496550500392914, + "learning_rate": 7.631774509536538e-06, + "loss": 0.0021, + "step": 121990 + }, + { + "epoch": 0.782470104190384, + "grad_norm": 0.0802803486585617, + "learning_rate": 7.631298596876146e-06, + "loss": 0.0029, + "step": 122000 + }, + { + "epoch": 0.782470104190384, + "eval_loss": 0.0031755194067955017, + "eval_runtime": 3.3154, + "eval_samples_per_second": 60.325, + "eval_steps_per_second": 15.081, + "step": 122000 + }, + { + "epoch": 0.78253424108417, + "grad_norm": 0.07263299077749252, + "learning_rate": 7.6308226512437e-06, + "loss": 0.002, + "step": 122010 + }, + { + "epoch": 0.7825983779779562, + "grad_norm": 0.40417736768722534, + "learning_rate": 7.630346672645168e-06, + "loss": 0.0026, + "step": 122020 + }, + { + "epoch": 0.7826625148717422, + "grad_norm": 0.0935332328081131, + "learning_rate": 7.62987066108651e-06, + "loss": 0.0023, + "step": 122030 + }, + { + "epoch": 0.7827266517655284, + "grad_norm": 0.10363059490919113, + "learning_rate": 7.629394616573697e-06, + "loss": 0.0038, + "step": 122040 + }, + { + "epoch": 0.7827907886593144, + "grad_norm": 0.07930910587310791, + "learning_rate": 7.628918539112686e-06, + "loss": 0.002, + "step": 122050 + }, + { + "epoch": 0.7828549255531005, + "grad_norm": 0.06223560497164726, + "learning_rate": 7.628442428709449e-06, + "loss": 0.0018, + "step": 122060 + }, + { + "epoch": 0.7829190624468866, + "grad_norm": 0.08662588149309158, + "learning_rate": 7.62796628536995e-06, + "loss": 0.0016, + "step": 122070 + }, + { + "epoch": 0.7829831993406727, + "grad_norm": 0.07027466595172882, + "learning_rate": 7.6274901091001555e-06, + "loss": 0.0033, + "step": 122080 + }, + { + "epoch": 0.7830473362344589, + "grad_norm": 0.07510018348693848, + "learning_rate": 7.627013899906032e-06, + "loss": 0.0028, + "step": 122090 + }, + { + "epoch": 0.7831114731282449, + "grad_norm": 0.16565001010894775, + "learning_rate": 7.626537657793545e-06, + "loss": 0.003, + "step": 122100 + }, + { + "epoch": 0.7831756100220311, + "grad_norm": 0.09801848232746124, + "learning_rate": 7.626061382768666e-06, + "loss": 0.003, + "step": 122110 + }, + { + "epoch": 0.7832397469158171, + "grad_norm": 0.05887960270047188, + "learning_rate": 7.625585074837361e-06, + "loss": 0.0022, + "step": 122120 + }, + { + "epoch": 0.7833038838096033, + "grad_norm": 0.17415302991867065, + "learning_rate": 7.625108734005597e-06, + "loss": 0.0024, + "step": 122130 + }, + { + "epoch": 0.7833680207033893, + "grad_norm": 0.05905602499842644, + "learning_rate": 7.624632360279345e-06, + "loss": 0.0018, + "step": 122140 + }, + { + "epoch": 0.7834321575971754, + "grad_norm": 0.18371962010860443, + "learning_rate": 7.624155953664575e-06, + "loss": 0.0026, + "step": 122150 + }, + { + "epoch": 0.7834962944909615, + "grad_norm": 0.016537338495254517, + "learning_rate": 7.623679514167254e-06, + "loss": 0.0028, + "step": 122160 + }, + { + "epoch": 0.7835604313847476, + "grad_norm": 0.04831685498356819, + "learning_rate": 7.623203041793354e-06, + "loss": 0.0035, + "step": 122170 + }, + { + "epoch": 0.7836245682785337, + "grad_norm": 0.04566079005599022, + "learning_rate": 7.622726536548846e-06, + "loss": 0.0013, + "step": 122180 + }, + { + "epoch": 0.7836887051723198, + "grad_norm": 0.08743573725223541, + "learning_rate": 7.622249998439698e-06, + "loss": 0.0021, + "step": 122190 + }, + { + "epoch": 0.7837528420661058, + "grad_norm": 0.009777238592505455, + "learning_rate": 7.621773427471886e-06, + "loss": 0.0016, + "step": 122200 + }, + { + "epoch": 0.783816978959892, + "grad_norm": 0.16177983582019806, + "learning_rate": 7.621296823651376e-06, + "loss": 0.0017, + "step": 122210 + }, + { + "epoch": 0.7838811158536781, + "grad_norm": 0.17031234502792358, + "learning_rate": 7.620820186984146e-06, + "loss": 0.0035, + "step": 122220 + }, + { + "epoch": 0.7839452527474642, + "grad_norm": 0.07148251682519913, + "learning_rate": 7.620343517476165e-06, + "loss": 0.006, + "step": 122230 + }, + { + "epoch": 0.7840093896412503, + "grad_norm": 0.11304371803998947, + "learning_rate": 7.619866815133408e-06, + "loss": 0.0058, + "step": 122240 + }, + { + "epoch": 0.7840735265350364, + "grad_norm": 0.0622345469892025, + "learning_rate": 7.619390079961846e-06, + "loss": 0.002, + "step": 122250 + }, + { + "epoch": 0.7841376634288225, + "grad_norm": 0.0038027805276215076, + "learning_rate": 7.618913311967455e-06, + "loss": 0.0045, + "step": 122260 + }, + { + "epoch": 0.7842018003226086, + "grad_norm": 0.23910140991210938, + "learning_rate": 7.618436511156209e-06, + "loss": 0.0024, + "step": 122270 + }, + { + "epoch": 0.7842659372163947, + "grad_norm": 0.04277581349015236, + "learning_rate": 7.617959677534081e-06, + "loss": 0.0026, + "step": 122280 + }, + { + "epoch": 0.7843300741101807, + "grad_norm": 0.10653826594352722, + "learning_rate": 7.617482811107049e-06, + "loss": 0.0025, + "step": 122290 + }, + { + "epoch": 0.7843942110039669, + "grad_norm": 0.16174641251564026, + "learning_rate": 7.617005911881085e-06, + "loss": 0.0053, + "step": 122300 + }, + { + "epoch": 0.7844583478977529, + "grad_norm": 0.14989493787288666, + "learning_rate": 7.616528979862167e-06, + "loss": 0.0033, + "step": 122310 + }, + { + "epoch": 0.7845224847915391, + "grad_norm": 0.06515716761350632, + "learning_rate": 7.616052015056271e-06, + "loss": 0.0044, + "step": 122320 + }, + { + "epoch": 0.7845866216853251, + "grad_norm": 0.12088882178068161, + "learning_rate": 7.615575017469372e-06, + "loss": 0.0027, + "step": 122330 + }, + { + "epoch": 0.7846507585791113, + "grad_norm": 0.3991908133029938, + "learning_rate": 7.615097987107452e-06, + "loss": 0.0024, + "step": 122340 + }, + { + "epoch": 0.7847148954728973, + "grad_norm": 0.06363681703805923, + "learning_rate": 7.614620923976484e-06, + "loss": 0.0015, + "step": 122350 + }, + { + "epoch": 0.7847790323666834, + "grad_norm": 0.05574396252632141, + "learning_rate": 7.614143828082445e-06, + "loss": 0.0028, + "step": 122360 + }, + { + "epoch": 0.7848431692604696, + "grad_norm": 0.0889599472284317, + "learning_rate": 7.613666699431317e-06, + "loss": 0.0027, + "step": 122370 + }, + { + "epoch": 0.7849073061542556, + "grad_norm": 0.04563826695084572, + "learning_rate": 7.613189538029078e-06, + "loss": 0.0014, + "step": 122380 + }, + { + "epoch": 0.7849714430480418, + "grad_norm": 0.027129748836159706, + "learning_rate": 7.612712343881705e-06, + "loss": 0.0026, + "step": 122390 + }, + { + "epoch": 0.7850355799418278, + "grad_norm": 0.035565756261348724, + "learning_rate": 7.6122351169951795e-06, + "loss": 0.0023, + "step": 122400 + }, + { + "epoch": 0.785099716835614, + "grad_norm": 0.15562231838703156, + "learning_rate": 7.611757857375482e-06, + "loss": 0.0037, + "step": 122410 + }, + { + "epoch": 0.7851638537294, + "grad_norm": 0.05388416349887848, + "learning_rate": 7.611280565028592e-06, + "loss": 0.0024, + "step": 122420 + }, + { + "epoch": 0.7852279906231862, + "grad_norm": 0.11233717948198318, + "learning_rate": 7.610803239960489e-06, + "loss": 0.0021, + "step": 122430 + }, + { + "epoch": 0.7852921275169722, + "grad_norm": 0.08933355659246445, + "learning_rate": 7.610325882177156e-06, + "loss": 0.0024, + "step": 122440 + }, + { + "epoch": 0.7853562644107583, + "grad_norm": 0.06173817813396454, + "learning_rate": 7.609848491684575e-06, + "loss": 0.0029, + "step": 122450 + }, + { + "epoch": 0.7854204013045444, + "grad_norm": 0.21297572553157806, + "learning_rate": 7.609371068488727e-06, + "loss": 0.0019, + "step": 122460 + }, + { + "epoch": 0.7854845381983305, + "grad_norm": 0.043749645352363586, + "learning_rate": 7.608893612595594e-06, + "loss": 0.0035, + "step": 122470 + }, + { + "epoch": 0.7855486750921166, + "grad_norm": 0.16853967308998108, + "learning_rate": 7.608416124011158e-06, + "loss": 0.0042, + "step": 122480 + }, + { + "epoch": 0.7856128119859027, + "grad_norm": 0.0530732087790966, + "learning_rate": 7.607938602741407e-06, + "loss": 0.0022, + "step": 122490 + }, + { + "epoch": 0.7856769488796888, + "grad_norm": 0.12097512930631638, + "learning_rate": 7.6074610487923194e-06, + "loss": 0.0044, + "step": 122500 + }, + { + "epoch": 0.7857410857734749, + "grad_norm": 0.10567058622837067, + "learning_rate": 7.6069834621698815e-06, + "loss": 0.0031, + "step": 122510 + }, + { + "epoch": 0.785805222667261, + "grad_norm": 0.09319041669368744, + "learning_rate": 7.606505842880079e-06, + "loss": 0.0012, + "step": 122520 + }, + { + "epoch": 0.7858693595610471, + "grad_norm": 0.0469089075922966, + "learning_rate": 7.606028190928893e-06, + "loss": 0.0034, + "step": 122530 + }, + { + "epoch": 0.7859334964548332, + "grad_norm": 0.1647704839706421, + "learning_rate": 7.6055505063223125e-06, + "loss": 0.0026, + "step": 122540 + }, + { + "epoch": 0.7859976333486193, + "grad_norm": 0.1056341826915741, + "learning_rate": 7.605072789066322e-06, + "loss": 0.0029, + "step": 122550 + }, + { + "epoch": 0.7860617702424054, + "grad_norm": 0.07093048095703125, + "learning_rate": 7.604595039166909e-06, + "loss": 0.0017, + "step": 122560 + }, + { + "epoch": 0.7861259071361915, + "grad_norm": 0.034791190177202225, + "learning_rate": 7.604117256630057e-06, + "loss": 0.0016, + "step": 122570 + }, + { + "epoch": 0.7861900440299776, + "grad_norm": 0.046128518879413605, + "learning_rate": 7.603639441461755e-06, + "loss": 0.0043, + "step": 122580 + }, + { + "epoch": 0.7862541809237636, + "grad_norm": 0.0922919511795044, + "learning_rate": 7.603161593667989e-06, + "loss": 0.0031, + "step": 122590 + }, + { + "epoch": 0.7863183178175498, + "grad_norm": 0.25014355778694153, + "learning_rate": 7.60268371325475e-06, + "loss": 0.003, + "step": 122600 + }, + { + "epoch": 0.7863824547113358, + "grad_norm": 0.010387763381004333, + "learning_rate": 7.602205800228022e-06, + "loss": 0.0016, + "step": 122610 + }, + { + "epoch": 0.786446591605122, + "grad_norm": 0.03959250822663307, + "learning_rate": 7.601727854593796e-06, + "loss": 0.0021, + "step": 122620 + }, + { + "epoch": 0.786510728498908, + "grad_norm": 0.218663290143013, + "learning_rate": 7.601249876358061e-06, + "loss": 0.0048, + "step": 122630 + }, + { + "epoch": 0.7865748653926942, + "grad_norm": 0.07799277454614639, + "learning_rate": 7.600771865526807e-06, + "loss": 0.0039, + "step": 122640 + }, + { + "epoch": 0.7866390022864803, + "grad_norm": 0.11367930471897125, + "learning_rate": 7.600293822106022e-06, + "loss": 0.0024, + "step": 122650 + }, + { + "epoch": 0.7867031391802664, + "grad_norm": 0.29839062690734863, + "learning_rate": 7.599815746101696e-06, + "loss": 0.0023, + "step": 122660 + }, + { + "epoch": 0.7867672760740525, + "grad_norm": 0.07987088710069656, + "learning_rate": 7.599337637519821e-06, + "loss": 0.0014, + "step": 122670 + }, + { + "epoch": 0.7868314129678385, + "grad_norm": 0.05106490105390549, + "learning_rate": 7.598859496366389e-06, + "loss": 0.0028, + "step": 122680 + }, + { + "epoch": 0.7868955498616247, + "grad_norm": 0.2657345235347748, + "learning_rate": 7.598381322647388e-06, + "loss": 0.0031, + "step": 122690 + }, + { + "epoch": 0.7869596867554107, + "grad_norm": 0.09356044232845306, + "learning_rate": 7.597903116368813e-06, + "loss": 0.0027, + "step": 122700 + }, + { + "epoch": 0.7870238236491969, + "grad_norm": 0.06518931686878204, + "learning_rate": 7.597424877536656e-06, + "loss": 0.0014, + "step": 122710 + }, + { + "epoch": 0.7870879605429829, + "grad_norm": 0.1209230124950409, + "learning_rate": 7.596946606156908e-06, + "loss": 0.0035, + "step": 122720 + }, + { + "epoch": 0.7871520974367691, + "grad_norm": 0.10231737047433853, + "learning_rate": 7.5964683022355646e-06, + "loss": 0.0016, + "step": 122730 + }, + { + "epoch": 0.7872162343305551, + "grad_norm": 0.04111761599779129, + "learning_rate": 7.595989965778615e-06, + "loss": 0.0027, + "step": 122740 + }, + { + "epoch": 0.7872803712243412, + "grad_norm": 0.2692529857158661, + "learning_rate": 7.595511596792058e-06, + "loss": 0.0026, + "step": 122750 + }, + { + "epoch": 0.7873445081181273, + "grad_norm": 0.07208271324634552, + "learning_rate": 7.595033195281884e-06, + "loss": 0.0044, + "step": 122760 + }, + { + "epoch": 0.7874086450119134, + "grad_norm": 0.04921235144138336, + "learning_rate": 7.59455476125409e-06, + "loss": 0.0033, + "step": 122770 + }, + { + "epoch": 0.7874727819056995, + "grad_norm": 0.14421400427818298, + "learning_rate": 7.594076294714671e-06, + "loss": 0.003, + "step": 122780 + }, + { + "epoch": 0.7875369187994856, + "grad_norm": 0.012242033146321774, + "learning_rate": 7.593597795669623e-06, + "loss": 0.0025, + "step": 122790 + }, + { + "epoch": 0.7876010556932718, + "grad_norm": 0.04912176355719566, + "learning_rate": 7.59311926412494e-06, + "loss": 0.0049, + "step": 122800 + }, + { + "epoch": 0.7876651925870578, + "grad_norm": 0.0413503423333168, + "learning_rate": 7.592640700086619e-06, + "loss": 0.0008, + "step": 122810 + }, + { + "epoch": 0.787729329480844, + "grad_norm": 0.1055869534611702, + "learning_rate": 7.592162103560656e-06, + "loss": 0.0021, + "step": 122820 + }, + { + "epoch": 0.78779346637463, + "grad_norm": 0.05369802191853523, + "learning_rate": 7.591683474553052e-06, + "loss": 0.003, + "step": 122830 + }, + { + "epoch": 0.7878576032684161, + "grad_norm": 0.08172406256198883, + "learning_rate": 7.5912048130698004e-06, + "loss": 0.0022, + "step": 122840 + }, + { + "epoch": 0.7879217401622022, + "grad_norm": 0.20662006735801697, + "learning_rate": 7.5907261191169e-06, + "loss": 0.0037, + "step": 122850 + }, + { + "epoch": 0.7879858770559883, + "grad_norm": 0.1541481614112854, + "learning_rate": 7.59024739270035e-06, + "loss": 0.0023, + "step": 122860 + }, + { + "epoch": 0.7880500139497744, + "grad_norm": 0.17998526990413666, + "learning_rate": 7.589768633826151e-06, + "loss": 0.0023, + "step": 122870 + }, + { + "epoch": 0.7881141508435605, + "grad_norm": 0.06872714310884476, + "learning_rate": 7.589289842500298e-06, + "loss": 0.0023, + "step": 122880 + }, + { + "epoch": 0.7881782877373466, + "grad_norm": 0.12394532561302185, + "learning_rate": 7.588811018728793e-06, + "loss": 0.0053, + "step": 122890 + }, + { + "epoch": 0.7882424246311327, + "grad_norm": 0.060991834849119186, + "learning_rate": 7.588332162517636e-06, + "loss": 0.0017, + "step": 122900 + }, + { + "epoch": 0.7883065615249187, + "grad_norm": 0.018962649628520012, + "learning_rate": 7.587853273872827e-06, + "loss": 0.0022, + "step": 122910 + }, + { + "epoch": 0.7883706984187049, + "grad_norm": 0.18350961804389954, + "learning_rate": 7.587374352800367e-06, + "loss": 0.002, + "step": 122920 + }, + { + "epoch": 0.788434835312491, + "grad_norm": 0.014333275146782398, + "learning_rate": 7.5868953993062576e-06, + "loss": 0.0031, + "step": 122930 + }, + { + "epoch": 0.7884989722062771, + "grad_norm": 0.13420072197914124, + "learning_rate": 7.5864164133965e-06, + "loss": 0.0013, + "step": 122940 + }, + { + "epoch": 0.7885631091000632, + "grad_norm": 0.03388292342424393, + "learning_rate": 7.585937395077095e-06, + "loss": 0.0017, + "step": 122950 + }, + { + "epoch": 0.7886272459938493, + "grad_norm": 0.04059087857604027, + "learning_rate": 7.585458344354049e-06, + "loss": 0.003, + "step": 122960 + }, + { + "epoch": 0.7886913828876354, + "grad_norm": 0.03942510858178139, + "learning_rate": 7.5849792612333595e-06, + "loss": 0.0029, + "step": 122970 + }, + { + "epoch": 0.7887555197814214, + "grad_norm": 0.047274697571992874, + "learning_rate": 7.584500145721034e-06, + "loss": 0.0031, + "step": 122980 + }, + { + "epoch": 0.7888196566752076, + "grad_norm": 0.0786990150809288, + "learning_rate": 7.584020997823074e-06, + "loss": 0.0032, + "step": 122990 + }, + { + "epoch": 0.7888837935689936, + "grad_norm": 0.1254752278327942, + "learning_rate": 7.583541817545483e-06, + "loss": 0.0013, + "step": 123000 + }, + { + "epoch": 0.7889479304627798, + "grad_norm": 0.19860519468784332, + "learning_rate": 7.583062604894268e-06, + "loss": 0.0017, + "step": 123010 + }, + { + "epoch": 0.7890120673565658, + "grad_norm": 0.3162236213684082, + "learning_rate": 7.582583359875433e-06, + "loss": 0.0027, + "step": 123020 + }, + { + "epoch": 0.789076204250352, + "grad_norm": 0.12642884254455566, + "learning_rate": 7.582104082494981e-06, + "loss": 0.0033, + "step": 123030 + }, + { + "epoch": 0.789140341144138, + "grad_norm": 0.2137831300497055, + "learning_rate": 7.5816247727589195e-06, + "loss": 0.0025, + "step": 123040 + }, + { + "epoch": 0.7892044780379242, + "grad_norm": 0.06308590620756149, + "learning_rate": 7.581145430673256e-06, + "loss": 0.003, + "step": 123050 + }, + { + "epoch": 0.7892686149317102, + "grad_norm": 0.1454891562461853, + "learning_rate": 7.580666056243995e-06, + "loss": 0.002, + "step": 123060 + }, + { + "epoch": 0.7893327518254963, + "grad_norm": 0.1343536227941513, + "learning_rate": 7.580186649477144e-06, + "loss": 0.0038, + "step": 123070 + }, + { + "epoch": 0.7893968887192825, + "grad_norm": 0.20746780931949615, + "learning_rate": 7.579707210378709e-06, + "loss": 0.0024, + "step": 123080 + }, + { + "epoch": 0.7894610256130685, + "grad_norm": 0.08619903028011322, + "learning_rate": 7.579227738954701e-06, + "loss": 0.0029, + "step": 123090 + }, + { + "epoch": 0.7895251625068547, + "grad_norm": 0.024186434224247932, + "learning_rate": 7.578748235211124e-06, + "loss": 0.0009, + "step": 123100 + }, + { + "epoch": 0.7895892994006407, + "grad_norm": 0.049256592988967896, + "learning_rate": 7.5782686991539914e-06, + "loss": 0.0028, + "step": 123110 + }, + { + "epoch": 0.7896534362944269, + "grad_norm": 0.3305787742137909, + "learning_rate": 7.577789130789306e-06, + "loss": 0.0028, + "step": 123120 + }, + { + "epoch": 0.7897175731882129, + "grad_norm": 0.12885724008083344, + "learning_rate": 7.577309530123082e-06, + "loss": 0.0042, + "step": 123130 + }, + { + "epoch": 0.789781710081999, + "grad_norm": 0.10026602447032928, + "learning_rate": 7.576829897161327e-06, + "loss": 0.0032, + "step": 123140 + }, + { + "epoch": 0.7898458469757851, + "grad_norm": 0.03776301443576813, + "learning_rate": 7.5763502319100535e-06, + "loss": 0.0039, + "step": 123150 + }, + { + "epoch": 0.7899099838695712, + "grad_norm": 0.10966672003269196, + "learning_rate": 7.575870534375269e-06, + "loss": 0.0025, + "step": 123160 + }, + { + "epoch": 0.7899741207633573, + "grad_norm": 0.1907765418291092, + "learning_rate": 7.575390804562987e-06, + "loss": 0.0042, + "step": 123170 + }, + { + "epoch": 0.7900382576571434, + "grad_norm": 0.1846044957637787, + "learning_rate": 7.574911042479216e-06, + "loss": 0.0017, + "step": 123180 + }, + { + "epoch": 0.7901023945509295, + "grad_norm": 0.09555988758802414, + "learning_rate": 7.57443124812997e-06, + "loss": 0.0032, + "step": 123190 + }, + { + "epoch": 0.7901665314447156, + "grad_norm": 0.37532809376716614, + "learning_rate": 7.57395142152126e-06, + "loss": 0.0043, + "step": 123200 + }, + { + "epoch": 0.7902306683385018, + "grad_norm": 0.09359215199947357, + "learning_rate": 7.5734715626591004e-06, + "loss": 0.0021, + "step": 123210 + }, + { + "epoch": 0.7902948052322878, + "grad_norm": 0.013770547695457935, + "learning_rate": 7.572991671549503e-06, + "loss": 0.0018, + "step": 123220 + }, + { + "epoch": 0.7903589421260739, + "grad_norm": 0.06928499788045883, + "learning_rate": 7.57251174819848e-06, + "loss": 0.0051, + "step": 123230 + }, + { + "epoch": 0.79042307901986, + "grad_norm": 0.18177980184555054, + "learning_rate": 7.5720317926120455e-06, + "loss": 0.0044, + "step": 123240 + }, + { + "epoch": 0.7904872159136461, + "grad_norm": 0.16453734040260315, + "learning_rate": 7.571551804796216e-06, + "loss": 0.0028, + "step": 123250 + }, + { + "epoch": 0.7905513528074322, + "grad_norm": 0.15786118805408478, + "learning_rate": 7.571071784757004e-06, + "loss": 0.005, + "step": 123260 + }, + { + "epoch": 0.7906154897012183, + "grad_norm": 0.0929371789097786, + "learning_rate": 7.5705917325004254e-06, + "loss": 0.0027, + "step": 123270 + }, + { + "epoch": 0.7906796265950043, + "grad_norm": 0.08085958659648895, + "learning_rate": 7.570111648032494e-06, + "loss": 0.002, + "step": 123280 + }, + { + "epoch": 0.7907437634887905, + "grad_norm": 0.23638342320919037, + "learning_rate": 7.569631531359227e-06, + "loss": 0.0031, + "step": 123290 + }, + { + "epoch": 0.7908079003825765, + "grad_norm": 0.020316675305366516, + "learning_rate": 7.569151382486641e-06, + "loss": 0.0039, + "step": 123300 + }, + { + "epoch": 0.7908720372763627, + "grad_norm": 0.05101149156689644, + "learning_rate": 7.568671201420752e-06, + "loss": 0.0025, + "step": 123310 + }, + { + "epoch": 0.7909361741701487, + "grad_norm": 0.09833734482526779, + "learning_rate": 7.568190988167578e-06, + "loss": 0.0025, + "step": 123320 + }, + { + "epoch": 0.7910003110639349, + "grad_norm": 0.11523966491222382, + "learning_rate": 7.567710742733134e-06, + "loss": 0.002, + "step": 123330 + }, + { + "epoch": 0.7910644479577209, + "grad_norm": 0.04753712937235832, + "learning_rate": 7.567230465123441e-06, + "loss": 0.0036, + "step": 123340 + }, + { + "epoch": 0.791128584851507, + "grad_norm": 0.2613712251186371, + "learning_rate": 7.5667501553445135e-06, + "loss": 0.004, + "step": 123350 + }, + { + "epoch": 0.7911927217452932, + "grad_norm": 0.028088044375181198, + "learning_rate": 7.566269813402374e-06, + "loss": 0.0021, + "step": 123360 + }, + { + "epoch": 0.7912568586390792, + "grad_norm": 0.12382286041975021, + "learning_rate": 7.565789439303037e-06, + "loss": 0.0028, + "step": 123370 + }, + { + "epoch": 0.7913209955328654, + "grad_norm": 0.02080100029706955, + "learning_rate": 7.565309033052528e-06, + "loss": 0.0019, + "step": 123380 + }, + { + "epoch": 0.7913851324266514, + "grad_norm": 0.13737565279006958, + "learning_rate": 7.5648285946568615e-06, + "loss": 0.0045, + "step": 123390 + }, + { + "epoch": 0.7914492693204376, + "grad_norm": 0.020041542127728462, + "learning_rate": 7.5643481241220585e-06, + "loss": 0.0024, + "step": 123400 + }, + { + "epoch": 0.7915134062142236, + "grad_norm": 0.23024889826774597, + "learning_rate": 7.5638676214541425e-06, + "loss": 0.0033, + "step": 123410 + }, + { + "epoch": 0.7915775431080098, + "grad_norm": 0.22192806005477905, + "learning_rate": 7.563387086659133e-06, + "loss": 0.0029, + "step": 123420 + }, + { + "epoch": 0.7916416800017958, + "grad_norm": 0.08314814418554306, + "learning_rate": 7.562906519743051e-06, + "loss": 0.0018, + "step": 123430 + }, + { + "epoch": 0.791705816895582, + "grad_norm": 0.15509885549545288, + "learning_rate": 7.5624259207119174e-06, + "loss": 0.0054, + "step": 123440 + }, + { + "epoch": 0.791769953789368, + "grad_norm": 0.13001513481140137, + "learning_rate": 7.561945289571757e-06, + "loss": 0.0027, + "step": 123450 + }, + { + "epoch": 0.7918340906831541, + "grad_norm": 0.0911867767572403, + "learning_rate": 7.561464626328591e-06, + "loss": 0.0037, + "step": 123460 + }, + { + "epoch": 0.7918982275769402, + "grad_norm": 0.12942135334014893, + "learning_rate": 7.560983930988443e-06, + "loss": 0.0023, + "step": 123470 + }, + { + "epoch": 0.7919623644707263, + "grad_norm": 0.09583374112844467, + "learning_rate": 7.560503203557335e-06, + "loss": 0.0026, + "step": 123480 + }, + { + "epoch": 0.7920265013645125, + "grad_norm": 0.1424064189195633, + "learning_rate": 7.560022444041291e-06, + "loss": 0.0029, + "step": 123490 + }, + { + "epoch": 0.7920906382582985, + "grad_norm": 0.0760183185338974, + "learning_rate": 7.559541652446338e-06, + "loss": 0.0018, + "step": 123500 + }, + { + "epoch": 0.7921547751520847, + "grad_norm": 0.13475407660007477, + "learning_rate": 7.5590608287784995e-06, + "loss": 0.0027, + "step": 123510 + }, + { + "epoch": 0.7922189120458707, + "grad_norm": 0.12201106548309326, + "learning_rate": 7.558579973043798e-06, + "loss": 0.002, + "step": 123520 + }, + { + "epoch": 0.7922830489396568, + "grad_norm": 0.08769958466291428, + "learning_rate": 7.558099085248261e-06, + "loss": 0.0024, + "step": 123530 + }, + { + "epoch": 0.7923471858334429, + "grad_norm": 0.10830560326576233, + "learning_rate": 7.557618165397913e-06, + "loss": 0.0021, + "step": 123540 + }, + { + "epoch": 0.792411322727229, + "grad_norm": 0.06387370824813843, + "learning_rate": 7.557137213498784e-06, + "loss": 0.0015, + "step": 123550 + }, + { + "epoch": 0.7924754596210151, + "grad_norm": 0.03704097867012024, + "learning_rate": 7.5566562295568966e-06, + "loss": 0.0068, + "step": 123560 + }, + { + "epoch": 0.7925395965148012, + "grad_norm": 0.057072702795267105, + "learning_rate": 7.556175213578281e-06, + "loss": 0.0034, + "step": 123570 + }, + { + "epoch": 0.7926037334085873, + "grad_norm": 0.10741273313760757, + "learning_rate": 7.555694165568962e-06, + "loss": 0.0017, + "step": 123580 + }, + { + "epoch": 0.7926678703023734, + "grad_norm": 0.06526832282543182, + "learning_rate": 7.555213085534969e-06, + "loss": 0.0026, + "step": 123590 + }, + { + "epoch": 0.7927320071961594, + "grad_norm": 0.11733478307723999, + "learning_rate": 7.55473197348233e-06, + "loss": 0.0061, + "step": 123600 + }, + { + "epoch": 0.7927961440899456, + "grad_norm": 0.163051575422287, + "learning_rate": 7.554250829417072e-06, + "loss": 0.0049, + "step": 123610 + }, + { + "epoch": 0.7928602809837316, + "grad_norm": 0.11553701758384705, + "learning_rate": 7.553769653345227e-06, + "loss": 0.0043, + "step": 123620 + }, + { + "epoch": 0.7929244178775178, + "grad_norm": 0.1378750205039978, + "learning_rate": 7.553288445272823e-06, + "loss": 0.005, + "step": 123630 + }, + { + "epoch": 0.7929885547713039, + "grad_norm": 0.1761714071035385, + "learning_rate": 7.55280720520589e-06, + "loss": 0.0037, + "step": 123640 + }, + { + "epoch": 0.79305269166509, + "grad_norm": 0.13274933397769928, + "learning_rate": 7.552325933150458e-06, + "loss": 0.0014, + "step": 123650 + }, + { + "epoch": 0.7931168285588761, + "grad_norm": 0.25353050231933594, + "learning_rate": 7.551844629112559e-06, + "loss": 0.005, + "step": 123660 + }, + { + "epoch": 0.7931809654526621, + "grad_norm": 0.042722560465335846, + "learning_rate": 7.551363293098222e-06, + "loss": 0.0022, + "step": 123670 + }, + { + "epoch": 0.7932451023464483, + "grad_norm": 0.37281107902526855, + "learning_rate": 7.55088192511348e-06, + "loss": 0.0027, + "step": 123680 + }, + { + "epoch": 0.7933092392402343, + "grad_norm": 0.043687593191862106, + "learning_rate": 7.550400525164363e-06, + "loss": 0.0064, + "step": 123690 + }, + { + "epoch": 0.7933733761340205, + "grad_norm": 0.23913761973381042, + "learning_rate": 7.549919093256905e-06, + "loss": 0.0021, + "step": 123700 + }, + { + "epoch": 0.7934375130278065, + "grad_norm": 0.1854463666677475, + "learning_rate": 7.549437629397141e-06, + "loss": 0.0034, + "step": 123710 + }, + { + "epoch": 0.7935016499215927, + "grad_norm": 0.40040287375450134, + "learning_rate": 7.548956133591099e-06, + "loss": 0.0044, + "step": 123720 + }, + { + "epoch": 0.7935657868153787, + "grad_norm": 0.09918133914470673, + "learning_rate": 7.548474605844815e-06, + "loss": 0.0028, + "step": 123730 + }, + { + "epoch": 0.7936299237091649, + "grad_norm": 0.24481476843357086, + "learning_rate": 7.5479930461643235e-06, + "loss": 0.0019, + "step": 123740 + }, + { + "epoch": 0.7936940606029509, + "grad_norm": 0.14661909639835358, + "learning_rate": 7.547511454555657e-06, + "loss": 0.0022, + "step": 123750 + }, + { + "epoch": 0.793758197496737, + "grad_norm": 0.11050709336996078, + "learning_rate": 7.547029831024852e-06, + "loss": 0.0024, + "step": 123760 + }, + { + "epoch": 0.7938223343905232, + "grad_norm": 0.02962980791926384, + "learning_rate": 7.546548175577944e-06, + "loss": 0.0031, + "step": 123770 + }, + { + "epoch": 0.7938864712843092, + "grad_norm": 0.07772235572338104, + "learning_rate": 7.5460664882209655e-06, + "loss": 0.0024, + "step": 123780 + }, + { + "epoch": 0.7939506081780954, + "grad_norm": 0.12356381863355637, + "learning_rate": 7.545584768959956e-06, + "loss": 0.0043, + "step": 123790 + }, + { + "epoch": 0.7940147450718814, + "grad_norm": 0.1655791848897934, + "learning_rate": 7.545103017800948e-06, + "loss": 0.001, + "step": 123800 + }, + { + "epoch": 0.7940788819656676, + "grad_norm": 0.11550326645374298, + "learning_rate": 7.544621234749983e-06, + "loss": 0.0024, + "step": 123810 + }, + { + "epoch": 0.7941430188594536, + "grad_norm": 0.18776899576187134, + "learning_rate": 7.544139419813093e-06, + "loss": 0.0024, + "step": 123820 + }, + { + "epoch": 0.7942071557532397, + "grad_norm": 0.021684233099222183, + "learning_rate": 7.543657572996319e-06, + "loss": 0.0027, + "step": 123830 + }, + { + "epoch": 0.7942712926470258, + "grad_norm": 0.11533031612634659, + "learning_rate": 7.543175694305697e-06, + "loss": 0.0012, + "step": 123840 + }, + { + "epoch": 0.7943354295408119, + "grad_norm": 0.08878498524427414, + "learning_rate": 7.542693783747266e-06, + "loss": 0.0035, + "step": 123850 + }, + { + "epoch": 0.794399566434598, + "grad_norm": 0.06881114095449448, + "learning_rate": 7.542211841327065e-06, + "loss": 0.0023, + "step": 123860 + }, + { + "epoch": 0.7944637033283841, + "grad_norm": 0.34978440403938293, + "learning_rate": 7.541729867051133e-06, + "loss": 0.0022, + "step": 123870 + }, + { + "epoch": 0.7945278402221702, + "grad_norm": 0.30584773421287537, + "learning_rate": 7.541247860925508e-06, + "loss": 0.0052, + "step": 123880 + }, + { + "epoch": 0.7945919771159563, + "grad_norm": 0.17105741798877716, + "learning_rate": 7.5407658229562305e-06, + "loss": 0.0021, + "step": 123890 + }, + { + "epoch": 0.7946561140097423, + "grad_norm": 0.1544884592294693, + "learning_rate": 7.540283753149344e-06, + "loss": 0.0028, + "step": 123900 + }, + { + "epoch": 0.7947202509035285, + "grad_norm": 0.08491586148738861, + "learning_rate": 7.539801651510885e-06, + "loss": 0.0022, + "step": 123910 + }, + { + "epoch": 0.7947843877973146, + "grad_norm": 0.12208464741706848, + "learning_rate": 7.539319518046897e-06, + "loss": 0.0027, + "step": 123920 + }, + { + "epoch": 0.7948485246911007, + "grad_norm": 0.2068665474653244, + "learning_rate": 7.5388373527634195e-06, + "loss": 0.0025, + "step": 123930 + }, + { + "epoch": 0.7949126615848868, + "grad_norm": 0.12112890928983688, + "learning_rate": 7.538355155666496e-06, + "loss": 0.0035, + "step": 123940 + }, + { + "epoch": 0.7949767984786729, + "grad_norm": 0.11285851150751114, + "learning_rate": 7.537872926762168e-06, + "loss": 0.004, + "step": 123950 + }, + { + "epoch": 0.795040935372459, + "grad_norm": 0.1206316277384758, + "learning_rate": 7.537390666056479e-06, + "loss": 0.0027, + "step": 123960 + }, + { + "epoch": 0.795105072266245, + "grad_norm": 0.24223995208740234, + "learning_rate": 7.536908373555472e-06, + "loss": 0.0021, + "step": 123970 + }, + { + "epoch": 0.7951692091600312, + "grad_norm": 0.029918193817138672, + "learning_rate": 7.5364260492651886e-06, + "loss": 0.0019, + "step": 123980 + }, + { + "epoch": 0.7952333460538172, + "grad_norm": 0.23039722442626953, + "learning_rate": 7.535943693191674e-06, + "loss": 0.0013, + "step": 123990 + }, + { + "epoch": 0.7952974829476034, + "grad_norm": 0.1353999227285385, + "learning_rate": 7.535461305340974e-06, + "loss": 0.003, + "step": 124000 + }, + { + "epoch": 0.7953616198413894, + "grad_norm": 0.3367917537689209, + "learning_rate": 7.534978885719131e-06, + "loss": 0.0028, + "step": 124010 + }, + { + "epoch": 0.7954257567351756, + "grad_norm": 0.12652994692325592, + "learning_rate": 7.534496434332191e-06, + "loss": 0.0034, + "step": 124020 + }, + { + "epoch": 0.7954898936289616, + "grad_norm": 0.008631177246570587, + "learning_rate": 7.534013951186199e-06, + "loss": 0.0014, + "step": 124030 + }, + { + "epoch": 0.7955540305227478, + "grad_norm": 0.1561666578054428, + "learning_rate": 7.533531436287203e-06, + "loss": 0.0014, + "step": 124040 + }, + { + "epoch": 0.7956181674165338, + "grad_norm": 0.3468468189239502, + "learning_rate": 7.533048889641243e-06, + "loss": 0.0022, + "step": 124050 + }, + { + "epoch": 0.7956823043103199, + "grad_norm": 0.037443943321704865, + "learning_rate": 7.532566311254374e-06, + "loss": 0.001, + "step": 124060 + }, + { + "epoch": 0.7957464412041061, + "grad_norm": 0.04735523462295532, + "learning_rate": 7.532083701132637e-06, + "loss": 0.0024, + "step": 124070 + }, + { + "epoch": 0.7958105780978921, + "grad_norm": 0.12967167794704437, + "learning_rate": 7.531601059282083e-06, + "loss": 0.0034, + "step": 124080 + }, + { + "epoch": 0.7958747149916783, + "grad_norm": 0.17066501080989838, + "learning_rate": 7.531118385708758e-06, + "loss": 0.0028, + "step": 124090 + }, + { + "epoch": 0.7959388518854643, + "grad_norm": 0.06425528973340988, + "learning_rate": 7.53063568041871e-06, + "loss": 0.0023, + "step": 124100 + }, + { + "epoch": 0.7960029887792505, + "grad_norm": 0.09032629430294037, + "learning_rate": 7.53015294341799e-06, + "loss": 0.0011, + "step": 124110 + }, + { + "epoch": 0.7960671256730365, + "grad_norm": 0.2610722482204437, + "learning_rate": 7.529670174712643e-06, + "loss": 0.0057, + "step": 124120 + }, + { + "epoch": 0.7961312625668227, + "grad_norm": 0.23317117989063263, + "learning_rate": 7.529187374308723e-06, + "loss": 0.0024, + "step": 124130 + }, + { + "epoch": 0.7961953994606087, + "grad_norm": 0.08279764652252197, + "learning_rate": 7.528704542212276e-06, + "loss": 0.002, + "step": 124140 + }, + { + "epoch": 0.7962595363543948, + "grad_norm": 0.02375340461730957, + "learning_rate": 7.528221678429355e-06, + "loss": 0.0043, + "step": 124150 + }, + { + "epoch": 0.7963236732481809, + "grad_norm": 0.03122488595545292, + "learning_rate": 7.527738782966008e-06, + "loss": 0.0016, + "step": 124160 + }, + { + "epoch": 0.796387810141967, + "grad_norm": 0.10670062899589539, + "learning_rate": 7.52725585582829e-06, + "loss": 0.0015, + "step": 124170 + }, + { + "epoch": 0.7964519470357531, + "grad_norm": 0.05864892154932022, + "learning_rate": 7.526772897022247e-06, + "loss": 0.0022, + "step": 124180 + }, + { + "epoch": 0.7965160839295392, + "grad_norm": 0.06157020851969719, + "learning_rate": 7.5262899065539365e-06, + "loss": 0.0031, + "step": 124190 + }, + { + "epoch": 0.7965802208233254, + "grad_norm": 0.23451292514801025, + "learning_rate": 7.525806884429405e-06, + "loss": 0.0033, + "step": 124200 + }, + { + "epoch": 0.7966443577171114, + "grad_norm": 0.22916685044765472, + "learning_rate": 7.525323830654712e-06, + "loss": 0.0064, + "step": 124210 + }, + { + "epoch": 0.7967084946108975, + "grad_norm": 0.24594850838184357, + "learning_rate": 7.524840745235903e-06, + "loss": 0.0012, + "step": 124220 + }, + { + "epoch": 0.7967726315046836, + "grad_norm": 0.1346345990896225, + "learning_rate": 7.524357628179037e-06, + "loss": 0.0028, + "step": 124230 + }, + { + "epoch": 0.7968367683984697, + "grad_norm": 0.08220899105072021, + "learning_rate": 7.523874479490164e-06, + "loss": 0.002, + "step": 124240 + }, + { + "epoch": 0.7969009052922558, + "grad_norm": 0.017328398302197456, + "learning_rate": 7.52339129917534e-06, + "loss": 0.0025, + "step": 124250 + }, + { + "epoch": 0.7969650421860419, + "grad_norm": 0.0716826543211937, + "learning_rate": 7.5229080872406215e-06, + "loss": 0.002, + "step": 124260 + }, + { + "epoch": 0.797029179079828, + "grad_norm": 0.1872122883796692, + "learning_rate": 7.5224248436920596e-06, + "loss": 0.0027, + "step": 124270 + }, + { + "epoch": 0.7970933159736141, + "grad_norm": 0.29147228598594666, + "learning_rate": 7.5219415685357136e-06, + "loss": 0.0024, + "step": 124280 + }, + { + "epoch": 0.7971574528674001, + "grad_norm": 0.07466059178113937, + "learning_rate": 7.521458261777636e-06, + "loss": 0.0049, + "step": 124290 + }, + { + "epoch": 0.7972215897611863, + "grad_norm": 0.057647235691547394, + "learning_rate": 7.520974923423885e-06, + "loss": 0.0025, + "step": 124300 + }, + { + "epoch": 0.7972857266549723, + "grad_norm": 0.0419759601354599, + "learning_rate": 7.5204915534805154e-06, + "loss": 0.0009, + "step": 124310 + }, + { + "epoch": 0.7973498635487585, + "grad_norm": 0.19892175495624542, + "learning_rate": 7.520008151953586e-06, + "loss": 0.0026, + "step": 124320 + }, + { + "epoch": 0.7974140004425445, + "grad_norm": 0.10667607188224792, + "learning_rate": 7.519524718849154e-06, + "loss": 0.0012, + "step": 124330 + }, + { + "epoch": 0.7974781373363307, + "grad_norm": 0.9881979823112488, + "learning_rate": 7.519041254173276e-06, + "loss": 0.0028, + "step": 124340 + }, + { + "epoch": 0.7975422742301168, + "grad_norm": 0.2386818826198578, + "learning_rate": 7.518557757932011e-06, + "loss": 0.0051, + "step": 124350 + }, + { + "epoch": 0.7976064111239028, + "grad_norm": 0.12765030562877655, + "learning_rate": 7.518074230131418e-06, + "loss": 0.0024, + "step": 124360 + }, + { + "epoch": 0.797670548017689, + "grad_norm": 0.013037353754043579, + "learning_rate": 7.5175906707775534e-06, + "loss": 0.0015, + "step": 124370 + }, + { + "epoch": 0.797734684911475, + "grad_norm": 0.04132802411913872, + "learning_rate": 7.51710707987648e-06, + "loss": 0.0027, + "step": 124380 + }, + { + "epoch": 0.7977988218052612, + "grad_norm": 0.030632011592388153, + "learning_rate": 7.516623457434255e-06, + "loss": 0.0029, + "step": 124390 + }, + { + "epoch": 0.7978629586990472, + "grad_norm": 0.17068198323249817, + "learning_rate": 7.516139803456941e-06, + "loss": 0.0033, + "step": 124400 + }, + { + "epoch": 0.7979270955928334, + "grad_norm": 0.022175561636686325, + "learning_rate": 7.515656117950595e-06, + "loss": 0.0016, + "step": 124410 + }, + { + "epoch": 0.7979912324866194, + "grad_norm": 0.1440616101026535, + "learning_rate": 7.515172400921281e-06, + "loss": 0.0029, + "step": 124420 + }, + { + "epoch": 0.7980553693804056, + "grad_norm": 0.015570059418678284, + "learning_rate": 7.5146886523750596e-06, + "loss": 0.0036, + "step": 124430 + }, + { + "epoch": 0.7981195062741916, + "grad_norm": 0.05740491673350334, + "learning_rate": 7.514204872317991e-06, + "loss": 0.0016, + "step": 124440 + }, + { + "epoch": 0.7981836431679777, + "grad_norm": 0.07615818828344345, + "learning_rate": 7.51372106075614e-06, + "loss": 0.0026, + "step": 124450 + }, + { + "epoch": 0.7982477800617638, + "grad_norm": 0.11525259166955948, + "learning_rate": 7.513237217695566e-06, + "loss": 0.0028, + "step": 124460 + }, + { + "epoch": 0.7983119169555499, + "grad_norm": 0.01986979879438877, + "learning_rate": 7.512753343142334e-06, + "loss": 0.0025, + "step": 124470 + }, + { + "epoch": 0.7983760538493361, + "grad_norm": 0.1199314221739769, + "learning_rate": 7.512269437102506e-06, + "loss": 0.0023, + "step": 124480 + }, + { + "epoch": 0.7984401907431221, + "grad_norm": 0.05994654819369316, + "learning_rate": 7.5117854995821495e-06, + "loss": 0.0038, + "step": 124490 + }, + { + "epoch": 0.7985043276369083, + "grad_norm": 0.27077120542526245, + "learning_rate": 7.511301530587322e-06, + "loss": 0.0031, + "step": 124500 + }, + { + "epoch": 0.7985684645306943, + "grad_norm": 0.17333291471004486, + "learning_rate": 7.510817530124094e-06, + "loss": 0.0022, + "step": 124510 + }, + { + "epoch": 0.7986326014244804, + "grad_norm": 0.020280921831727028, + "learning_rate": 7.510333498198525e-06, + "loss": 0.0024, + "step": 124520 + }, + { + "epoch": 0.7986967383182665, + "grad_norm": 0.17286615073680878, + "learning_rate": 7.509849434816687e-06, + "loss": 0.0017, + "step": 124530 + }, + { + "epoch": 0.7987608752120526, + "grad_norm": 0.09009113162755966, + "learning_rate": 7.509365339984639e-06, + "loss": 0.003, + "step": 124540 + }, + { + "epoch": 0.7988250121058387, + "grad_norm": 0.034896817058324814, + "learning_rate": 7.508881213708451e-06, + "loss": 0.0019, + "step": 124550 + }, + { + "epoch": 0.7988891489996248, + "grad_norm": 0.15198229253292084, + "learning_rate": 7.508397055994188e-06, + "loss": 0.0032, + "step": 124560 + }, + { + "epoch": 0.7989532858934109, + "grad_norm": 0.1587715595960617, + "learning_rate": 7.507912866847918e-06, + "loss": 0.0031, + "step": 124570 + }, + { + "epoch": 0.799017422787197, + "grad_norm": 0.07122717797756195, + "learning_rate": 7.507428646275705e-06, + "loss": 0.0024, + "step": 124580 + }, + { + "epoch": 0.799081559680983, + "grad_norm": 0.3986320197582245, + "learning_rate": 7.506944394283622e-06, + "loss": 0.0031, + "step": 124590 + }, + { + "epoch": 0.7991456965747692, + "grad_norm": 0.1260489672422409, + "learning_rate": 7.5064601108777315e-06, + "loss": 0.0028, + "step": 124600 + }, + { + "epoch": 0.7992098334685552, + "grad_norm": 0.10430359840393066, + "learning_rate": 7.505975796064106e-06, + "loss": 0.0025, + "step": 124610 + }, + { + "epoch": 0.7992739703623414, + "grad_norm": 0.18059343099594116, + "learning_rate": 7.505491449848812e-06, + "loss": 0.0026, + "step": 124620 + }, + { + "epoch": 0.7993381072561275, + "grad_norm": 0.1164829358458519, + "learning_rate": 7.50500707223792e-06, + "loss": 0.0038, + "step": 124630 + }, + { + "epoch": 0.7994022441499136, + "grad_norm": 0.13949273526668549, + "learning_rate": 7.504522663237499e-06, + "loss": 0.0028, + "step": 124640 + }, + { + "epoch": 0.7994663810436997, + "grad_norm": 0.14682637155056, + "learning_rate": 7.504038222853619e-06, + "loss": 0.002, + "step": 124650 + }, + { + "epoch": 0.7995305179374858, + "grad_norm": 0.3267804682254791, + "learning_rate": 7.503553751092352e-06, + "loss": 0.0012, + "step": 124660 + }, + { + "epoch": 0.7995946548312719, + "grad_norm": 0.0706501454114914, + "learning_rate": 7.503069247959765e-06, + "loss": 0.0032, + "step": 124670 + }, + { + "epoch": 0.7996587917250579, + "grad_norm": 0.3418445289134979, + "learning_rate": 7.502584713461931e-06, + "loss": 0.005, + "step": 124680 + }, + { + "epoch": 0.7997229286188441, + "grad_norm": 0.1267576962709427, + "learning_rate": 7.5021001476049225e-06, + "loss": 0.0031, + "step": 124690 + }, + { + "epoch": 0.7997870655126301, + "grad_norm": 0.11980558186769485, + "learning_rate": 7.5016155503948116e-06, + "loss": 0.003, + "step": 124700 + }, + { + "epoch": 0.7998512024064163, + "grad_norm": 0.10078209638595581, + "learning_rate": 7.501130921837671e-06, + "loss": 0.0037, + "step": 124710 + }, + { + "epoch": 0.7999153393002023, + "grad_norm": 0.15275952219963074, + "learning_rate": 7.500646261939571e-06, + "loss": 0.0024, + "step": 124720 + }, + { + "epoch": 0.7999794761939885, + "grad_norm": 0.3661182224750519, + "learning_rate": 7.500161570706586e-06, + "loss": 0.0031, + "step": 124730 + }, + { + "epoch": 0.8000436130877745, + "grad_norm": 0.0601467490196228, + "learning_rate": 7.499676848144791e-06, + "loss": 0.0022, + "step": 124740 + }, + { + "epoch": 0.8001077499815606, + "grad_norm": 0.06747540086507797, + "learning_rate": 7.499192094260257e-06, + "loss": 0.0022, + "step": 124750 + }, + { + "epoch": 0.8001718868753468, + "grad_norm": 0.19947946071624756, + "learning_rate": 7.498707309059061e-06, + "loss": 0.0017, + "step": 124760 + }, + { + "epoch": 0.8002360237691328, + "grad_norm": 0.15963557362556458, + "learning_rate": 7.498222492547277e-06, + "loss": 0.0026, + "step": 124770 + }, + { + "epoch": 0.800300160662919, + "grad_norm": 0.1204855665564537, + "learning_rate": 7.497737644730979e-06, + "loss": 0.0015, + "step": 124780 + }, + { + "epoch": 0.800364297556705, + "grad_norm": 0.03258391097187996, + "learning_rate": 7.497252765616243e-06, + "loss": 0.0021, + "step": 124790 + }, + { + "epoch": 0.8004284344504912, + "grad_norm": 0.13139325380325317, + "learning_rate": 7.496767855209146e-06, + "loss": 0.0027, + "step": 124800 + }, + { + "epoch": 0.8004925713442772, + "grad_norm": 0.10871946066617966, + "learning_rate": 7.4962829135157625e-06, + "loss": 0.0024, + "step": 124810 + }, + { + "epoch": 0.8005567082380634, + "grad_norm": 0.1199418231844902, + "learning_rate": 7.495797940542169e-06, + "loss": 0.0014, + "step": 124820 + }, + { + "epoch": 0.8006208451318494, + "grad_norm": 0.12236741185188293, + "learning_rate": 7.495312936294445e-06, + "loss": 0.0034, + "step": 124830 + }, + { + "epoch": 0.8006849820256355, + "grad_norm": 0.15686768293380737, + "learning_rate": 7.494827900778667e-06, + "loss": 0.0028, + "step": 124840 + }, + { + "epoch": 0.8007491189194216, + "grad_norm": 0.203244149684906, + "learning_rate": 7.494342834000912e-06, + "loss": 0.0058, + "step": 124850 + }, + { + "epoch": 0.8008132558132077, + "grad_norm": 0.055326201021671295, + "learning_rate": 7.493857735967258e-06, + "loss": 0.003, + "step": 124860 + }, + { + "epoch": 0.8008773927069938, + "grad_norm": 0.1566278487443924, + "learning_rate": 7.493372606683784e-06, + "loss": 0.0028, + "step": 124870 + }, + { + "epoch": 0.8009415296007799, + "grad_norm": 0.02740250527858734, + "learning_rate": 7.49288744615657e-06, + "loss": 0.0022, + "step": 124880 + }, + { + "epoch": 0.801005666494566, + "grad_norm": 0.12024752795696259, + "learning_rate": 7.4924022543916955e-06, + "loss": 0.0019, + "step": 124890 + }, + { + "epoch": 0.8010698033883521, + "grad_norm": 0.02664109319448471, + "learning_rate": 7.491917031395237e-06, + "loss": 0.0033, + "step": 124900 + }, + { + "epoch": 0.8011339402821382, + "grad_norm": 0.12732449173927307, + "learning_rate": 7.49143177717328e-06, + "loss": 0.0025, + "step": 124910 + }, + { + "epoch": 0.8011980771759243, + "grad_norm": 0.06920778006315231, + "learning_rate": 7.490946491731901e-06, + "loss": 0.002, + "step": 124920 + }, + { + "epoch": 0.8012622140697104, + "grad_norm": 0.14909569919109344, + "learning_rate": 7.490461175077182e-06, + "loss": 0.0013, + "step": 124930 + }, + { + "epoch": 0.8013263509634965, + "grad_norm": 0.06066959351301193, + "learning_rate": 7.489975827215203e-06, + "loss": 0.0018, + "step": 124940 + }, + { + "epoch": 0.8013904878572826, + "grad_norm": 0.14901049435138702, + "learning_rate": 7.489490448152049e-06, + "loss": 0.0025, + "step": 124950 + }, + { + "epoch": 0.8014546247510687, + "grad_norm": 0.08036942780017853, + "learning_rate": 7.489005037893802e-06, + "loss": 0.004, + "step": 124960 + }, + { + "epoch": 0.8015187616448548, + "grad_norm": 0.4628899097442627, + "learning_rate": 7.4885195964465396e-06, + "loss": 0.0079, + "step": 124970 + }, + { + "epoch": 0.8015828985386408, + "grad_norm": 0.11064311861991882, + "learning_rate": 7.488034123816351e-06, + "loss": 0.0023, + "step": 124980 + }, + { + "epoch": 0.801647035432427, + "grad_norm": 0.20454680919647217, + "learning_rate": 7.487548620009315e-06, + "loss": 0.0027, + "step": 124990 + }, + { + "epoch": 0.801711172326213, + "grad_norm": 0.09361718595027924, + "learning_rate": 7.487063085031519e-06, + "loss": 0.0015, + "step": 125000 + }, + { + "epoch": 0.8017753092199992, + "grad_norm": 0.08985812216997147, + "learning_rate": 7.486577518889042e-06, + "loss": 0.0038, + "step": 125010 + }, + { + "epoch": 0.8018394461137852, + "grad_norm": 0.008312336169183254, + "learning_rate": 7.486091921587975e-06, + "loss": 0.003, + "step": 125020 + }, + { + "epoch": 0.8019035830075714, + "grad_norm": 0.11346910893917084, + "learning_rate": 7.485606293134397e-06, + "loss": 0.0019, + "step": 125030 + }, + { + "epoch": 0.8019677199013575, + "grad_norm": 0.11304417997598648, + "learning_rate": 7.485120633534396e-06, + "loss": 0.0028, + "step": 125040 + }, + { + "epoch": 0.8020318567951435, + "grad_norm": 0.33306020498275757, + "learning_rate": 7.484634942794058e-06, + "loss": 0.0026, + "step": 125050 + }, + { + "epoch": 0.8020959936889297, + "grad_norm": 0.10592376440763474, + "learning_rate": 7.484149220919468e-06, + "loss": 0.0014, + "step": 125060 + }, + { + "epoch": 0.8021601305827157, + "grad_norm": 0.11216168850660324, + "learning_rate": 7.483663467916712e-06, + "loss": 0.0015, + "step": 125070 + }, + { + "epoch": 0.8022242674765019, + "grad_norm": 0.1404658704996109, + "learning_rate": 7.483177683791879e-06, + "loss": 0.002, + "step": 125080 + }, + { + "epoch": 0.8022884043702879, + "grad_norm": 0.34358495473861694, + "learning_rate": 7.482691868551054e-06, + "loss": 0.002, + "step": 125090 + }, + { + "epoch": 0.8023525412640741, + "grad_norm": 0.008700758218765259, + "learning_rate": 7.482206022200327e-06, + "loss": 0.0022, + "step": 125100 + }, + { + "epoch": 0.8024166781578601, + "grad_norm": 0.0656890794634819, + "learning_rate": 7.481720144745783e-06, + "loss": 0.0028, + "step": 125110 + }, + { + "epoch": 0.8024808150516463, + "grad_norm": 0.06679105758666992, + "learning_rate": 7.481234236193513e-06, + "loss": 0.004, + "step": 125120 + }, + { + "epoch": 0.8025449519454323, + "grad_norm": 0.09332876652479172, + "learning_rate": 7.480748296549605e-06, + "loss": 0.0035, + "step": 125130 + }, + { + "epoch": 0.8026090888392184, + "grad_norm": 0.12459799647331238, + "learning_rate": 7.480262325820147e-06, + "loss": 0.0024, + "step": 125140 + }, + { + "epoch": 0.8026732257330045, + "grad_norm": 0.17064888775348663, + "learning_rate": 7.47977632401123e-06, + "loss": 0.0026, + "step": 125150 + }, + { + "epoch": 0.8027373626267906, + "grad_norm": 0.060531724244356155, + "learning_rate": 7.479290291128942e-06, + "loss": 0.0013, + "step": 125160 + }, + { + "epoch": 0.8028014995205767, + "grad_norm": 0.08411803096532822, + "learning_rate": 7.478804227179376e-06, + "loss": 0.0048, + "step": 125170 + }, + { + "epoch": 0.8028656364143628, + "grad_norm": 0.17802155017852783, + "learning_rate": 7.478318132168621e-06, + "loss": 0.0024, + "step": 125180 + }, + { + "epoch": 0.802929773308149, + "grad_norm": 0.12030982971191406, + "learning_rate": 7.47783200610277e-06, + "loss": 0.0029, + "step": 125190 + }, + { + "epoch": 0.802993910201935, + "grad_norm": 0.08102941513061523, + "learning_rate": 7.477345848987911e-06, + "loss": 0.0035, + "step": 125200 + }, + { + "epoch": 0.8030580470957212, + "grad_norm": 0.06955002248287201, + "learning_rate": 7.476859660830139e-06, + "loss": 0.0022, + "step": 125210 + }, + { + "epoch": 0.8031221839895072, + "grad_norm": 0.8118174076080322, + "learning_rate": 7.476373441635545e-06, + "loss": 0.0046, + "step": 125220 + }, + { + "epoch": 0.8031863208832933, + "grad_norm": 0.18293973803520203, + "learning_rate": 7.475887191410223e-06, + "loss": 0.0012, + "step": 125230 + }, + { + "epoch": 0.8032504577770794, + "grad_norm": 0.14403167366981506, + "learning_rate": 7.4754009101602635e-06, + "loss": 0.0018, + "step": 125240 + }, + { + "epoch": 0.8033145946708655, + "grad_norm": 0.29412204027175903, + "learning_rate": 7.474914597891763e-06, + "loss": 0.003, + "step": 125250 + }, + { + "epoch": 0.8033787315646516, + "grad_norm": 0.11173876374959946, + "learning_rate": 7.474428254610812e-06, + "loss": 0.0017, + "step": 125260 + }, + { + "epoch": 0.8034428684584377, + "grad_norm": 0.08756504207849503, + "learning_rate": 7.473941880323507e-06, + "loss": 0.0023, + "step": 125270 + }, + { + "epoch": 0.8035070053522237, + "grad_norm": 0.04347633942961693, + "learning_rate": 7.473455475035942e-06, + "loss": 0.004, + "step": 125280 + }, + { + "epoch": 0.8035711422460099, + "grad_norm": 0.029598882421851158, + "learning_rate": 7.472969038754214e-06, + "loss": 0.0013, + "step": 125290 + }, + { + "epoch": 0.8036352791397959, + "grad_norm": 0.09162858873605728, + "learning_rate": 7.472482571484414e-06, + "loss": 0.0033, + "step": 125300 + }, + { + "epoch": 0.8036994160335821, + "grad_norm": 0.08080438524484634, + "learning_rate": 7.471996073232641e-06, + "loss": 0.0038, + "step": 125310 + }, + { + "epoch": 0.8037635529273682, + "grad_norm": 0.1008782610297203, + "learning_rate": 7.471509544004993e-06, + "loss": 0.0031, + "step": 125320 + }, + { + "epoch": 0.8038276898211543, + "grad_norm": 0.12188203632831573, + "learning_rate": 7.471022983807561e-06, + "loss": 0.0015, + "step": 125330 + }, + { + "epoch": 0.8038918267149404, + "grad_norm": 0.056016705930233, + "learning_rate": 7.470536392646446e-06, + "loss": 0.0017, + "step": 125340 + }, + { + "epoch": 0.8039559636087265, + "grad_norm": 0.17744384706020355, + "learning_rate": 7.470049770527744e-06, + "loss": 0.0016, + "step": 125350 + }, + { + "epoch": 0.8040201005025126, + "grad_norm": 0.06114094331860542, + "learning_rate": 7.469563117457555e-06, + "loss": 0.0024, + "step": 125360 + }, + { + "epoch": 0.8040842373962986, + "grad_norm": 0.031616564840078354, + "learning_rate": 7.469076433441972e-06, + "loss": 0.0034, + "step": 125370 + }, + { + "epoch": 0.8041483742900848, + "grad_norm": 0.11400750279426575, + "learning_rate": 7.468589718487099e-06, + "loss": 0.0021, + "step": 125380 + }, + { + "epoch": 0.8042125111838708, + "grad_norm": 0.11139538139104843, + "learning_rate": 7.468102972599032e-06, + "loss": 0.0022, + "step": 125390 + }, + { + "epoch": 0.804276648077657, + "grad_norm": 0.011781953275203705, + "learning_rate": 7.467616195783871e-06, + "loss": 0.0041, + "step": 125400 + }, + { + "epoch": 0.804340784971443, + "grad_norm": 0.08708330988883972, + "learning_rate": 7.467129388047714e-06, + "loss": 0.0023, + "step": 125410 + }, + { + "epoch": 0.8044049218652292, + "grad_norm": 0.11689251661300659, + "learning_rate": 7.4666425493966646e-06, + "loss": 0.0022, + "step": 125420 + }, + { + "epoch": 0.8044690587590152, + "grad_norm": 0.16960015892982483, + "learning_rate": 7.46615567983682e-06, + "loss": 0.0034, + "step": 125430 + }, + { + "epoch": 0.8045331956528013, + "grad_norm": 0.13801692426204681, + "learning_rate": 7.4656687793742826e-06, + "loss": 0.0024, + "step": 125440 + }, + { + "epoch": 0.8045973325465874, + "grad_norm": 0.12104618549346924, + "learning_rate": 7.465181848015152e-06, + "loss": 0.0029, + "step": 125450 + }, + { + "epoch": 0.8046614694403735, + "grad_norm": 0.14934170246124268, + "learning_rate": 7.464694885765532e-06, + "loss": 0.0032, + "step": 125460 + }, + { + "epoch": 0.8047256063341597, + "grad_norm": 0.08867646753787994, + "learning_rate": 7.464207892631523e-06, + "loss": 0.0041, + "step": 125470 + }, + { + "epoch": 0.8047897432279457, + "grad_norm": 0.029540695250034332, + "learning_rate": 7.4637208686192295e-06, + "loss": 0.0027, + "step": 125480 + }, + { + "epoch": 0.8048538801217319, + "grad_norm": 0.042014963924884796, + "learning_rate": 7.463233813734752e-06, + "loss": 0.0021, + "step": 125490 + }, + { + "epoch": 0.8049180170155179, + "grad_norm": 0.07194870710372925, + "learning_rate": 7.462746727984193e-06, + "loss": 0.0023, + "step": 125500 + }, + { + "epoch": 0.804982153909304, + "grad_norm": 0.11041359603404999, + "learning_rate": 7.462259611373659e-06, + "loss": 0.0025, + "step": 125510 + }, + { + "epoch": 0.8050462908030901, + "grad_norm": 0.08371560275554657, + "learning_rate": 7.461772463909252e-06, + "loss": 0.0029, + "step": 125520 + }, + { + "epoch": 0.8051104276968762, + "grad_norm": 0.1286308616399765, + "learning_rate": 7.461285285597078e-06, + "loss": 0.0046, + "step": 125530 + }, + { + "epoch": 0.8051745645906623, + "grad_norm": 0.09027030318975449, + "learning_rate": 7.460798076443237e-06, + "loss": 0.0042, + "step": 125540 + }, + { + "epoch": 0.8052387014844484, + "grad_norm": 0.020330199971795082, + "learning_rate": 7.46031083645384e-06, + "loss": 0.0015, + "step": 125550 + }, + { + "epoch": 0.8053028383782345, + "grad_norm": 0.028711901977658272, + "learning_rate": 7.459823565634989e-06, + "loss": 0.0025, + "step": 125560 + }, + { + "epoch": 0.8053669752720206, + "grad_norm": 0.04168624058365822, + "learning_rate": 7.459336263992791e-06, + "loss": 0.0028, + "step": 125570 + }, + { + "epoch": 0.8054311121658067, + "grad_norm": 0.13903804123401642, + "learning_rate": 7.4588489315333514e-06, + "loss": 0.0028, + "step": 125580 + }, + { + "epoch": 0.8054952490595928, + "grad_norm": 0.07040738314390182, + "learning_rate": 7.458361568262778e-06, + "loss": 0.0016, + "step": 125590 + }, + { + "epoch": 0.8055593859533788, + "grad_norm": 0.03716859221458435, + "learning_rate": 7.457874174187176e-06, + "loss": 0.0037, + "step": 125600 + }, + { + "epoch": 0.805623522847165, + "grad_norm": 0.07053796201944351, + "learning_rate": 7.457386749312656e-06, + "loss": 0.0024, + "step": 125610 + }, + { + "epoch": 0.8056876597409511, + "grad_norm": 0.055847663432359695, + "learning_rate": 7.456899293645322e-06, + "loss": 0.0021, + "step": 125620 + }, + { + "epoch": 0.8057517966347372, + "grad_norm": 0.1960245817899704, + "learning_rate": 7.456411807191286e-06, + "loss": 0.0023, + "step": 125630 + }, + { + "epoch": 0.8058159335285233, + "grad_norm": 0.08720597624778748, + "learning_rate": 7.455924289956653e-06, + "loss": 0.0031, + "step": 125640 + }, + { + "epoch": 0.8058800704223094, + "grad_norm": 0.11513277888298035, + "learning_rate": 7.455436741947533e-06, + "loss": 0.0023, + "step": 125650 + }, + { + "epoch": 0.8059442073160955, + "grad_norm": 0.18705707788467407, + "learning_rate": 7.454949163170037e-06, + "loss": 0.0016, + "step": 125660 + }, + { + "epoch": 0.8060083442098815, + "grad_norm": 0.16182366013526917, + "learning_rate": 7.454461553630272e-06, + "loss": 0.0029, + "step": 125670 + }, + { + "epoch": 0.8060724811036677, + "grad_norm": 0.05336945131421089, + "learning_rate": 7.45397391333435e-06, + "loss": 0.0026, + "step": 125680 + }, + { + "epoch": 0.8061366179974537, + "grad_norm": 0.04091660678386688, + "learning_rate": 7.453486242288381e-06, + "loss": 0.0036, + "step": 125690 + }, + { + "epoch": 0.8062007548912399, + "grad_norm": 0.17214728891849518, + "learning_rate": 7.452998540498477e-06, + "loss": 0.0017, + "step": 125700 + }, + { + "epoch": 0.8062648917850259, + "grad_norm": 0.20068220794200897, + "learning_rate": 7.452510807970747e-06, + "loss": 0.0024, + "step": 125710 + }, + { + "epoch": 0.8063290286788121, + "grad_norm": 0.026574088260531425, + "learning_rate": 7.452023044711305e-06, + "loss": 0.0016, + "step": 125720 + }, + { + "epoch": 0.8063931655725981, + "grad_norm": 0.09764420241117477, + "learning_rate": 7.451535250726261e-06, + "loss": 0.0043, + "step": 125730 + }, + { + "epoch": 0.8064573024663843, + "grad_norm": 0.09181403368711472, + "learning_rate": 7.451047426021728e-06, + "loss": 0.0029, + "step": 125740 + }, + { + "epoch": 0.8065214393601704, + "grad_norm": 0.0713183730840683, + "learning_rate": 7.450559570603819e-06, + "loss": 0.002, + "step": 125750 + }, + { + "epoch": 0.8065855762539564, + "grad_norm": 0.1376122534275055, + "learning_rate": 7.450071684478648e-06, + "loss": 0.0015, + "step": 125760 + }, + { + "epoch": 0.8066497131477426, + "grad_norm": 0.08532290905714035, + "learning_rate": 7.449583767652326e-06, + "loss": 0.0018, + "step": 125770 + }, + { + "epoch": 0.8067138500415286, + "grad_norm": 0.09605459123849869, + "learning_rate": 7.449095820130972e-06, + "loss": 0.005, + "step": 125780 + }, + { + "epoch": 0.8067779869353148, + "grad_norm": 0.0583382323384285, + "learning_rate": 7.448607841920693e-06, + "loss": 0.0028, + "step": 125790 + }, + { + "epoch": 0.8068421238291008, + "grad_norm": 0.10395050793886185, + "learning_rate": 7.448119833027609e-06, + "loss": 0.003, + "step": 125800 + }, + { + "epoch": 0.806906260722887, + "grad_norm": 0.13060331344604492, + "learning_rate": 7.447631793457836e-06, + "loss": 0.0027, + "step": 125810 + }, + { + "epoch": 0.806970397616673, + "grad_norm": 0.09807195514440536, + "learning_rate": 7.447143723217485e-06, + "loss": 0.003, + "step": 125820 + }, + { + "epoch": 0.8070345345104591, + "grad_norm": 0.0894220769405365, + "learning_rate": 7.446655622312675e-06, + "loss": 0.0023, + "step": 125830 + }, + { + "epoch": 0.8070986714042452, + "grad_norm": 0.4230436682701111, + "learning_rate": 7.446167490749522e-06, + "loss": 0.0063, + "step": 125840 + }, + { + "epoch": 0.8071628082980313, + "grad_norm": 0.20460151135921478, + "learning_rate": 7.445679328534141e-06, + "loss": 0.0021, + "step": 125850 + }, + { + "epoch": 0.8072269451918174, + "grad_norm": 0.07890148460865021, + "learning_rate": 7.445191135672652e-06, + "loss": 0.0021, + "step": 125860 + }, + { + "epoch": 0.8072910820856035, + "grad_norm": 0.01286834292113781, + "learning_rate": 7.444702912171168e-06, + "loss": 0.0019, + "step": 125870 + }, + { + "epoch": 0.8073552189793896, + "grad_norm": 0.030550939962267876, + "learning_rate": 7.44421465803581e-06, + "loss": 0.0033, + "step": 125880 + }, + { + "epoch": 0.8074193558731757, + "grad_norm": 0.1814279854297638, + "learning_rate": 7.443726373272696e-06, + "loss": 0.0048, + "step": 125890 + }, + { + "epoch": 0.8074834927669619, + "grad_norm": 0.11485601961612701, + "learning_rate": 7.443238057887943e-06, + "loss": 0.0021, + "step": 125900 + }, + { + "epoch": 0.8075476296607479, + "grad_norm": 0.09258340299129486, + "learning_rate": 7.442749711887673e-06, + "loss": 0.0016, + "step": 125910 + }, + { + "epoch": 0.807611766554534, + "grad_norm": 0.10012565553188324, + "learning_rate": 7.442261335278003e-06, + "loss": 0.002, + "step": 125920 + }, + { + "epoch": 0.8076759034483201, + "grad_norm": 0.18450987339019775, + "learning_rate": 7.441772928065052e-06, + "loss": 0.0016, + "step": 125930 + }, + { + "epoch": 0.8077400403421062, + "grad_norm": 0.24779313802719116, + "learning_rate": 7.4412844902549405e-06, + "loss": 0.0046, + "step": 125940 + }, + { + "epoch": 0.8078041772358923, + "grad_norm": 0.2482410967350006, + "learning_rate": 7.440796021853792e-06, + "loss": 0.0042, + "step": 125950 + }, + { + "epoch": 0.8078683141296784, + "grad_norm": 0.1761637181043625, + "learning_rate": 7.440307522867722e-06, + "loss": 0.0031, + "step": 125960 + }, + { + "epoch": 0.8079324510234644, + "grad_norm": 0.21982888877391815, + "learning_rate": 7.4398189933028576e-06, + "loss": 0.004, + "step": 125970 + }, + { + "epoch": 0.8079965879172506, + "grad_norm": 0.022976882755756378, + "learning_rate": 7.439330433165315e-06, + "loss": 0.0025, + "step": 125980 + }, + { + "epoch": 0.8080607248110366, + "grad_norm": 0.06587019562721252, + "learning_rate": 7.438841842461221e-06, + "loss": 0.0018, + "step": 125990 + }, + { + "epoch": 0.8081248617048228, + "grad_norm": 0.09989341348409653, + "learning_rate": 7.438353221196693e-06, + "loss": 0.0023, + "step": 126000 + }, + { + "epoch": 0.8081889985986088, + "grad_norm": 0.15177001059055328, + "learning_rate": 7.437864569377858e-06, + "loss": 0.0022, + "step": 126010 + }, + { + "epoch": 0.808253135492395, + "grad_norm": 0.28216689825057983, + "learning_rate": 7.437375887010838e-06, + "loss": 0.0027, + "step": 126020 + }, + { + "epoch": 0.8083172723861811, + "grad_norm": 0.05767377093434334, + "learning_rate": 7.436887174101756e-06, + "loss": 0.0023, + "step": 126030 + }, + { + "epoch": 0.8083814092799672, + "grad_norm": 0.1304662525653839, + "learning_rate": 7.436398430656736e-06, + "loss": 0.0025, + "step": 126040 + }, + { + "epoch": 0.8084455461737533, + "grad_norm": 0.013885071501135826, + "learning_rate": 7.435909656681901e-06, + "loss": 0.0023, + "step": 126050 + }, + { + "epoch": 0.8085096830675393, + "grad_norm": 0.3168763816356659, + "learning_rate": 7.435420852183379e-06, + "loss": 0.0033, + "step": 126060 + }, + { + "epoch": 0.8085738199613255, + "grad_norm": 0.17479267716407776, + "learning_rate": 7.4349320171672926e-06, + "loss": 0.002, + "step": 126070 + }, + { + "epoch": 0.8086379568551115, + "grad_norm": 0.1837267279624939, + "learning_rate": 7.434443151639767e-06, + "loss": 0.0023, + "step": 126080 + }, + { + "epoch": 0.8087020937488977, + "grad_norm": 0.09188154339790344, + "learning_rate": 7.43395425560693e-06, + "loss": 0.0024, + "step": 126090 + }, + { + "epoch": 0.8087662306426837, + "grad_norm": 0.0633850023150444, + "learning_rate": 7.433465329074907e-06, + "loss": 0.0038, + "step": 126100 + }, + { + "epoch": 0.8088303675364699, + "grad_norm": 0.07520250976085663, + "learning_rate": 7.4329763720498226e-06, + "loss": 0.0019, + "step": 126110 + }, + { + "epoch": 0.8088945044302559, + "grad_norm": 0.09829988330602646, + "learning_rate": 7.432487384537806e-06, + "loss": 0.001, + "step": 126120 + }, + { + "epoch": 0.808958641324042, + "grad_norm": 0.09438778460025787, + "learning_rate": 7.431998366544985e-06, + "loss": 0.0026, + "step": 126130 + }, + { + "epoch": 0.8090227782178281, + "grad_norm": 0.085836261510849, + "learning_rate": 7.431509318077486e-06, + "loss": 0.0028, + "step": 126140 + }, + { + "epoch": 0.8090869151116142, + "grad_norm": 0.10999659448862076, + "learning_rate": 7.431020239141437e-06, + "loss": 0.0036, + "step": 126150 + }, + { + "epoch": 0.8091510520054003, + "grad_norm": 0.14788612723350525, + "learning_rate": 7.4305311297429665e-06, + "loss": 0.003, + "step": 126160 + }, + { + "epoch": 0.8092151888991864, + "grad_norm": 0.12257693707942963, + "learning_rate": 7.430041989888204e-06, + "loss": 0.0038, + "step": 126170 + }, + { + "epoch": 0.8092793257929726, + "grad_norm": 0.028312664479017258, + "learning_rate": 7.429552819583279e-06, + "loss": 0.0033, + "step": 126180 + }, + { + "epoch": 0.8093434626867586, + "grad_norm": 0.16652943193912506, + "learning_rate": 7.429063618834321e-06, + "loss": 0.002, + "step": 126190 + }, + { + "epoch": 0.8094075995805448, + "grad_norm": 0.12218911200761795, + "learning_rate": 7.428574387647459e-06, + "loss": 0.0032, + "step": 126200 + }, + { + "epoch": 0.8094717364743308, + "grad_norm": 0.14190293848514557, + "learning_rate": 7.428085126028826e-06, + "loss": 0.002, + "step": 126210 + }, + { + "epoch": 0.8095358733681169, + "grad_norm": 0.0556914247572422, + "learning_rate": 7.427595833984549e-06, + "loss": 0.0013, + "step": 126220 + }, + { + "epoch": 0.809600010261903, + "grad_norm": 0.10317368060350418, + "learning_rate": 7.427106511520762e-06, + "loss": 0.003, + "step": 126230 + }, + { + "epoch": 0.8096641471556891, + "grad_norm": 0.02198263444006443, + "learning_rate": 7.426617158643595e-06, + "loss": 0.0017, + "step": 126240 + }, + { + "epoch": 0.8097282840494752, + "grad_norm": 0.0942700058221817, + "learning_rate": 7.426127775359183e-06, + "loss": 0.0028, + "step": 126250 + }, + { + "epoch": 0.8097924209432613, + "grad_norm": 0.11407773941755295, + "learning_rate": 7.425638361673655e-06, + "loss": 0.0016, + "step": 126260 + }, + { + "epoch": 0.8098565578370474, + "grad_norm": 0.08928665518760681, + "learning_rate": 7.425148917593145e-06, + "loss": 0.0025, + "step": 126270 + }, + { + "epoch": 0.8099206947308335, + "grad_norm": 0.05590301752090454, + "learning_rate": 7.424659443123784e-06, + "loss": 0.0034, + "step": 126280 + }, + { + "epoch": 0.8099848316246195, + "grad_norm": 0.08511009812355042, + "learning_rate": 7.4241699382717095e-06, + "loss": 0.0033, + "step": 126290 + }, + { + "epoch": 0.8100489685184057, + "grad_norm": 0.10576985031366348, + "learning_rate": 7.423680403043052e-06, + "loss": 0.0021, + "step": 126300 + }, + { + "epoch": 0.8101131054121918, + "grad_norm": 0.09998160600662231, + "learning_rate": 7.423190837443948e-06, + "loss": 0.0025, + "step": 126310 + }, + { + "epoch": 0.8101772423059779, + "grad_norm": 0.3955870568752289, + "learning_rate": 7.422701241480529e-06, + "loss": 0.0019, + "step": 126320 + }, + { + "epoch": 0.810241379199764, + "grad_norm": 0.06922601163387299, + "learning_rate": 7.422211615158934e-06, + "loss": 0.0037, + "step": 126330 + }, + { + "epoch": 0.8103055160935501, + "grad_norm": 0.252462774515152, + "learning_rate": 7.421721958485295e-06, + "loss": 0.003, + "step": 126340 + }, + { + "epoch": 0.8103696529873362, + "grad_norm": 0.10331380367279053, + "learning_rate": 7.42123227146575e-06, + "loss": 0.0022, + "step": 126350 + }, + { + "epoch": 0.8104337898811222, + "grad_norm": 0.10928361862897873, + "learning_rate": 7.420742554106433e-06, + "loss": 0.003, + "step": 126360 + }, + { + "epoch": 0.8104979267749084, + "grad_norm": 0.5361838936805725, + "learning_rate": 7.420252806413482e-06, + "loss": 0.0019, + "step": 126370 + }, + { + "epoch": 0.8105620636686944, + "grad_norm": 0.1608307957649231, + "learning_rate": 7.419763028393034e-06, + "loss": 0.0026, + "step": 126380 + }, + { + "epoch": 0.8106262005624806, + "grad_norm": 0.36987513303756714, + "learning_rate": 7.419273220051226e-06, + "loss": 0.0085, + "step": 126390 + }, + { + "epoch": 0.8106903374562666, + "grad_norm": 0.0979725793004036, + "learning_rate": 7.418783381394195e-06, + "loss": 0.0018, + "step": 126400 + }, + { + "epoch": 0.8107544743500528, + "grad_norm": 0.15433906018733978, + "learning_rate": 7.418293512428081e-06, + "loss": 0.0026, + "step": 126410 + }, + { + "epoch": 0.8108186112438388, + "grad_norm": 0.05753166601061821, + "learning_rate": 7.41780361315902e-06, + "loss": 0.0022, + "step": 126420 + }, + { + "epoch": 0.810882748137625, + "grad_norm": 0.160835862159729, + "learning_rate": 7.417313683593152e-06, + "loss": 0.0023, + "step": 126430 + }, + { + "epoch": 0.810946885031411, + "grad_norm": 0.10924722999334335, + "learning_rate": 7.416823723736615e-06, + "loss": 0.0026, + "step": 126440 + }, + { + "epoch": 0.8110110219251971, + "grad_norm": 0.12125886976718903, + "learning_rate": 7.41633373359555e-06, + "loss": 0.0015, + "step": 126450 + }, + { + "epoch": 0.8110751588189833, + "grad_norm": 0.028079597279429436, + "learning_rate": 7.415843713176096e-06, + "loss": 0.0013, + "step": 126460 + }, + { + "epoch": 0.8111392957127693, + "grad_norm": 0.086025170981884, + "learning_rate": 7.4153536624843944e-06, + "loss": 0.0036, + "step": 126470 + }, + { + "epoch": 0.8112034326065555, + "grad_norm": 0.08843878656625748, + "learning_rate": 7.414863581526586e-06, + "loss": 0.0017, + "step": 126480 + }, + { + "epoch": 0.8112675695003415, + "grad_norm": 0.059826839715242386, + "learning_rate": 7.414373470308809e-06, + "loss": 0.0027, + "step": 126490 + }, + { + "epoch": 0.8113317063941277, + "grad_norm": 0.12478183209896088, + "learning_rate": 7.413883328837208e-06, + "loss": 0.0025, + "step": 126500 + }, + { + "epoch": 0.8113958432879137, + "grad_norm": 0.09462691098451614, + "learning_rate": 7.4133931571179244e-06, + "loss": 0.0027, + "step": 126510 + }, + { + "epoch": 0.8114599801816998, + "grad_norm": 0.26414474844932556, + "learning_rate": 7.412902955157097e-06, + "loss": 0.0028, + "step": 126520 + }, + { + "epoch": 0.8115241170754859, + "grad_norm": 0.1463708132505417, + "learning_rate": 7.412412722960875e-06, + "loss": 0.0028, + "step": 126530 + }, + { + "epoch": 0.811588253969272, + "grad_norm": 0.04612165316939354, + "learning_rate": 7.411922460535394e-06, + "loss": 0.0017, + "step": 126540 + }, + { + "epoch": 0.8116523908630581, + "grad_norm": 0.3414027690887451, + "learning_rate": 7.411432167886803e-06, + "loss": 0.0018, + "step": 126550 + }, + { + "epoch": 0.8117165277568442, + "grad_norm": 0.1312377154827118, + "learning_rate": 7.4109418450212424e-06, + "loss": 0.0032, + "step": 126560 + }, + { + "epoch": 0.8117806646506303, + "grad_norm": 0.1252509504556656, + "learning_rate": 7.410451491944859e-06, + "loss": 0.0023, + "step": 126570 + }, + { + "epoch": 0.8118448015444164, + "grad_norm": 0.09422841668128967, + "learning_rate": 7.409961108663794e-06, + "loss": 0.0011, + "step": 126580 + }, + { + "epoch": 0.8119089384382026, + "grad_norm": 0.17276807129383087, + "learning_rate": 7.409470695184193e-06, + "loss": 0.0035, + "step": 126590 + }, + { + "epoch": 0.8119730753319886, + "grad_norm": 0.1141679510474205, + "learning_rate": 7.4089802515122035e-06, + "loss": 0.0034, + "step": 126600 + }, + { + "epoch": 0.8120372122257747, + "grad_norm": 0.0753118246793747, + "learning_rate": 7.4084897776539695e-06, + "loss": 0.0022, + "step": 126610 + }, + { + "epoch": 0.8121013491195608, + "grad_norm": 0.25150066614151, + "learning_rate": 7.407999273615638e-06, + "loss": 0.0039, + "step": 126620 + }, + { + "epoch": 0.8121654860133469, + "grad_norm": 0.14544636011123657, + "learning_rate": 7.407508739403355e-06, + "loss": 0.0027, + "step": 126630 + }, + { + "epoch": 0.812229622907133, + "grad_norm": 0.08018797636032104, + "learning_rate": 7.407018175023265e-06, + "loss": 0.0027, + "step": 126640 + }, + { + "epoch": 0.8122937598009191, + "grad_norm": 0.2047436684370041, + "learning_rate": 7.406527580481519e-06, + "loss": 0.0055, + "step": 126650 + }, + { + "epoch": 0.8123578966947052, + "grad_norm": 0.10463303327560425, + "learning_rate": 7.406036955784261e-06, + "loss": 0.0035, + "step": 126660 + }, + { + "epoch": 0.8124220335884913, + "grad_norm": 0.1510915458202362, + "learning_rate": 7.405546300937641e-06, + "loss": 0.0033, + "step": 126670 + }, + { + "epoch": 0.8124861704822773, + "grad_norm": 0.10969695448875427, + "learning_rate": 7.405055615947807e-06, + "loss": 0.0017, + "step": 126680 + }, + { + "epoch": 0.8125503073760635, + "grad_norm": 0.37942472100257874, + "learning_rate": 7.4045649008209074e-06, + "loss": 0.003, + "step": 126690 + }, + { + "epoch": 0.8126144442698495, + "grad_norm": 0.08156800270080566, + "learning_rate": 7.40407415556309e-06, + "loss": 0.0035, + "step": 126700 + }, + { + "epoch": 0.8126785811636357, + "grad_norm": 0.2511783242225647, + "learning_rate": 7.403583380180506e-06, + "loss": 0.002, + "step": 126710 + }, + { + "epoch": 0.8127427180574217, + "grad_norm": 0.04892609268426895, + "learning_rate": 7.403092574679303e-06, + "loss": 0.0032, + "step": 126720 + }, + { + "epoch": 0.8128068549512079, + "grad_norm": 0.15845321118831635, + "learning_rate": 7.402601739065635e-06, + "loss": 0.0043, + "step": 126730 + }, + { + "epoch": 0.812870991844994, + "grad_norm": 0.11157383024692535, + "learning_rate": 7.402110873345649e-06, + "loss": 0.0026, + "step": 126740 + }, + { + "epoch": 0.81293512873878, + "grad_norm": 0.25845885276794434, + "learning_rate": 7.401619977525496e-06, + "loss": 0.0048, + "step": 126750 + }, + { + "epoch": 0.8129992656325662, + "grad_norm": 0.18509311974048615, + "learning_rate": 7.40112905161133e-06, + "loss": 0.0018, + "step": 126760 + }, + { + "epoch": 0.8130634025263522, + "grad_norm": 0.21958787739276886, + "learning_rate": 7.400638095609299e-06, + "loss": 0.0023, + "step": 126770 + }, + { + "epoch": 0.8131275394201384, + "grad_norm": 0.323677122592926, + "learning_rate": 7.40014710952556e-06, + "loss": 0.0032, + "step": 126780 + }, + { + "epoch": 0.8131916763139244, + "grad_norm": 0.19918932020664215, + "learning_rate": 7.399656093366259e-06, + "loss": 0.0015, + "step": 126790 + }, + { + "epoch": 0.8132558132077106, + "grad_norm": 0.2212468683719635, + "learning_rate": 7.399165047137554e-06, + "loss": 0.0034, + "step": 126800 + }, + { + "epoch": 0.8133199501014966, + "grad_norm": 0.22588036954402924, + "learning_rate": 7.398673970845596e-06, + "loss": 0.0019, + "step": 126810 + }, + { + "epoch": 0.8133840869952828, + "grad_norm": 0.13337135314941406, + "learning_rate": 7.3981828644965394e-06, + "loss": 0.0018, + "step": 126820 + }, + { + "epoch": 0.8134482238890688, + "grad_norm": 0.06398195773363113, + "learning_rate": 7.397691728096536e-06, + "loss": 0.0048, + "step": 126830 + }, + { + "epoch": 0.8135123607828549, + "grad_norm": 0.06610044091939926, + "learning_rate": 7.397200561651743e-06, + "loss": 0.0016, + "step": 126840 + }, + { + "epoch": 0.813576497676641, + "grad_norm": 0.05419051647186279, + "learning_rate": 7.396709365168313e-06, + "loss": 0.002, + "step": 126850 + }, + { + "epoch": 0.8136406345704271, + "grad_norm": 0.027123944833874702, + "learning_rate": 7.396218138652401e-06, + "loss": 0.0043, + "step": 126860 + }, + { + "epoch": 0.8137047714642132, + "grad_norm": 0.06095758080482483, + "learning_rate": 7.395726882110164e-06, + "loss": 0.0021, + "step": 126870 + }, + { + "epoch": 0.8137689083579993, + "grad_norm": 0.12250997871160507, + "learning_rate": 7.395235595547757e-06, + "loss": 0.0022, + "step": 126880 + }, + { + "epoch": 0.8138330452517855, + "grad_norm": 0.18670979142189026, + "learning_rate": 7.394744278971337e-06, + "loss": 0.0027, + "step": 126890 + }, + { + "epoch": 0.8138971821455715, + "grad_norm": 0.11035393923521042, + "learning_rate": 7.394252932387058e-06, + "loss": 0.0025, + "step": 126900 + }, + { + "epoch": 0.8139613190393576, + "grad_norm": 0.06221034377813339, + "learning_rate": 7.393761555801081e-06, + "loss": 0.0017, + "step": 126910 + }, + { + "epoch": 0.8140254559331437, + "grad_norm": 0.26314976811408997, + "learning_rate": 7.3932701492195594e-06, + "loss": 0.0026, + "step": 126920 + }, + { + "epoch": 0.8140895928269298, + "grad_norm": 0.11423099040985107, + "learning_rate": 7.3927787126486515e-06, + "loss": 0.0039, + "step": 126930 + }, + { + "epoch": 0.8141537297207159, + "grad_norm": 0.21977105736732483, + "learning_rate": 7.392287246094517e-06, + "loss": 0.0033, + "step": 126940 + }, + { + "epoch": 0.814217866614502, + "grad_norm": 0.3997001647949219, + "learning_rate": 7.391795749563315e-06, + "loss": 0.0025, + "step": 126950 + }, + { + "epoch": 0.814282003508288, + "grad_norm": 0.06910167634487152, + "learning_rate": 7.391304223061201e-06, + "loss": 0.0026, + "step": 126960 + }, + { + "epoch": 0.8143461404020742, + "grad_norm": 0.04946955293416977, + "learning_rate": 7.3908126665943355e-06, + "loss": 0.0022, + "step": 126970 + }, + { + "epoch": 0.8144102772958602, + "grad_norm": 0.05174172297120094, + "learning_rate": 7.390321080168879e-06, + "loss": 0.0016, + "step": 126980 + }, + { + "epoch": 0.8144744141896464, + "grad_norm": 0.19454753398895264, + "learning_rate": 7.389829463790993e-06, + "loss": 0.0032, + "step": 126990 + }, + { + "epoch": 0.8145385510834324, + "grad_norm": 0.03614850714802742, + "learning_rate": 7.389337817466834e-06, + "loss": 0.0015, + "step": 127000 + }, + { + "epoch": 0.8146026879772186, + "grad_norm": 0.15189678966999054, + "learning_rate": 7.388846141202566e-06, + "loss": 0.002, + "step": 127010 + }, + { + "epoch": 0.8146668248710047, + "grad_norm": 0.180029958486557, + "learning_rate": 7.388354435004346e-06, + "loss": 0.0027, + "step": 127020 + }, + { + "epoch": 0.8147309617647908, + "grad_norm": 0.07591405510902405, + "learning_rate": 7.387862698878341e-06, + "loss": 0.0014, + "step": 127030 + }, + { + "epoch": 0.8147950986585769, + "grad_norm": 0.24453487992286682, + "learning_rate": 7.387370932830709e-06, + "loss": 0.0034, + "step": 127040 + }, + { + "epoch": 0.814859235552363, + "grad_norm": 0.04842061176896095, + "learning_rate": 7.3868791368676125e-06, + "loss": 0.0042, + "step": 127050 + }, + { + "epoch": 0.8149233724461491, + "grad_norm": 0.1432727575302124, + "learning_rate": 7.386387310995213e-06, + "loss": 0.0025, + "step": 127060 + }, + { + "epoch": 0.8149875093399351, + "grad_norm": 0.005806133151054382, + "learning_rate": 7.385895455219677e-06, + "loss": 0.002, + "step": 127070 + }, + { + "epoch": 0.8150516462337213, + "grad_norm": 0.1719507873058319, + "learning_rate": 7.385403569547166e-06, + "loss": 0.0032, + "step": 127080 + }, + { + "epoch": 0.8151157831275073, + "grad_norm": 0.018636295571923256, + "learning_rate": 7.3849116539838415e-06, + "loss": 0.0074, + "step": 127090 + }, + { + "epoch": 0.8151799200212935, + "grad_norm": 0.09164224565029144, + "learning_rate": 7.384419708535872e-06, + "loss": 0.0027, + "step": 127100 + }, + { + "epoch": 0.8152440569150795, + "grad_norm": 0.1219768151640892, + "learning_rate": 7.383927733209417e-06, + "loss": 0.0015, + "step": 127110 + }, + { + "epoch": 0.8153081938088657, + "grad_norm": 0.02932930551469326, + "learning_rate": 7.383435728010646e-06, + "loss": 0.0019, + "step": 127120 + }, + { + "epoch": 0.8153723307026517, + "grad_norm": 0.056617170572280884, + "learning_rate": 7.38294369294572e-06, + "loss": 0.0012, + "step": 127130 + }, + { + "epoch": 0.8154364675964378, + "grad_norm": 0.11760145425796509, + "learning_rate": 7.382451628020808e-06, + "loss": 0.0015, + "step": 127140 + }, + { + "epoch": 0.8155006044902239, + "grad_norm": 0.19532443583011627, + "learning_rate": 7.381959533242074e-06, + "loss": 0.0028, + "step": 127150 + }, + { + "epoch": 0.81556474138401, + "grad_norm": 0.16045066714286804, + "learning_rate": 7.381467408615684e-06, + "loss": 0.0027, + "step": 127160 + }, + { + "epoch": 0.8156288782777962, + "grad_norm": 0.23277214169502258, + "learning_rate": 7.380975254147805e-06, + "loss": 0.0036, + "step": 127170 + }, + { + "epoch": 0.8156930151715822, + "grad_norm": 0.16956904530525208, + "learning_rate": 7.380483069844606e-06, + "loss": 0.003, + "step": 127180 + }, + { + "epoch": 0.8157571520653684, + "grad_norm": 0.0400191992521286, + "learning_rate": 7.379990855712251e-06, + "loss": 0.0015, + "step": 127190 + }, + { + "epoch": 0.8158212889591544, + "grad_norm": 0.2659646272659302, + "learning_rate": 7.3794986117569116e-06, + "loss": 0.0022, + "step": 127200 + }, + { + "epoch": 0.8158854258529405, + "grad_norm": 0.05728033557534218, + "learning_rate": 7.3790063379847525e-06, + "loss": 0.0048, + "step": 127210 + }, + { + "epoch": 0.8159495627467266, + "grad_norm": 0.09088273346424103, + "learning_rate": 7.378514034401944e-06, + "loss": 0.003, + "step": 127220 + }, + { + "epoch": 0.8160136996405127, + "grad_norm": 0.1122676208615303, + "learning_rate": 7.378021701014655e-06, + "loss": 0.0075, + "step": 127230 + }, + { + "epoch": 0.8160778365342988, + "grad_norm": 0.09521377086639404, + "learning_rate": 7.377529337829054e-06, + "loss": 0.0013, + "step": 127240 + }, + { + "epoch": 0.8161419734280849, + "grad_norm": 0.0657171756029129, + "learning_rate": 7.377036944851312e-06, + "loss": 0.0024, + "step": 127250 + }, + { + "epoch": 0.816206110321871, + "grad_norm": 0.08039627969264984, + "learning_rate": 7.3765445220875976e-06, + "loss": 0.0019, + "step": 127260 + }, + { + "epoch": 0.8162702472156571, + "grad_norm": 0.05933055281639099, + "learning_rate": 7.376052069544083e-06, + "loss": 0.0037, + "step": 127270 + }, + { + "epoch": 0.8163343841094431, + "grad_norm": 0.011957366019487381, + "learning_rate": 7.375559587226936e-06, + "loss": 0.0028, + "step": 127280 + }, + { + "epoch": 0.8163985210032293, + "grad_norm": 0.06866823136806488, + "learning_rate": 7.375067075142331e-06, + "loss": 0.0025, + "step": 127290 + }, + { + "epoch": 0.8164626578970154, + "grad_norm": 0.03869573399424553, + "learning_rate": 7.374574533296437e-06, + "loss": 0.0022, + "step": 127300 + }, + { + "epoch": 0.8165267947908015, + "grad_norm": 0.10332153737545013, + "learning_rate": 7.374081961695429e-06, + "loss": 0.0034, + "step": 127310 + }, + { + "epoch": 0.8165909316845876, + "grad_norm": 0.05911581963300705, + "learning_rate": 7.3735893603454755e-06, + "loss": 0.0024, + "step": 127320 + }, + { + "epoch": 0.8166550685783737, + "grad_norm": 0.056003425270318985, + "learning_rate": 7.373096729252751e-06, + "loss": 0.0061, + "step": 127330 + }, + { + "epoch": 0.8167192054721598, + "grad_norm": 0.07668650895357132, + "learning_rate": 7.372604068423429e-06, + "loss": 0.0019, + "step": 127340 + }, + { + "epoch": 0.8167833423659459, + "grad_norm": 0.11102576553821564, + "learning_rate": 7.372111377863682e-06, + "loss": 0.0033, + "step": 127350 + }, + { + "epoch": 0.816847479259732, + "grad_norm": 0.009732525795698166, + "learning_rate": 7.371618657579683e-06, + "loss": 0.0036, + "step": 127360 + }, + { + "epoch": 0.816911616153518, + "grad_norm": 0.05169150233268738, + "learning_rate": 7.37112590757761e-06, + "loss": 0.0027, + "step": 127370 + }, + { + "epoch": 0.8169757530473042, + "grad_norm": 0.14333856105804443, + "learning_rate": 7.370633127863634e-06, + "loss": 0.0022, + "step": 127380 + }, + { + "epoch": 0.8170398899410902, + "grad_norm": 0.09235555678606033, + "learning_rate": 7.370140318443929e-06, + "loss": 0.004, + "step": 127390 + }, + { + "epoch": 0.8171040268348764, + "grad_norm": 0.1903359591960907, + "learning_rate": 7.369647479324673e-06, + "loss": 0.0025, + "step": 127400 + }, + { + "epoch": 0.8171681637286624, + "grad_norm": 0.1300700604915619, + "learning_rate": 7.369154610512041e-06, + "loss": 0.0028, + "step": 127410 + }, + { + "epoch": 0.8172323006224486, + "grad_norm": 0.3448699712753296, + "learning_rate": 7.368661712012207e-06, + "loss": 0.0028, + "step": 127420 + }, + { + "epoch": 0.8172964375162346, + "grad_norm": 0.5382606387138367, + "learning_rate": 7.36816878383135e-06, + "loss": 0.0023, + "step": 127430 + }, + { + "epoch": 0.8173605744100207, + "grad_norm": 0.02058287337422371, + "learning_rate": 7.367675825975645e-06, + "loss": 0.0021, + "step": 127440 + }, + { + "epoch": 0.8174247113038069, + "grad_norm": 0.047119539231061935, + "learning_rate": 7.36718283845127e-06, + "loss": 0.0045, + "step": 127450 + }, + { + "epoch": 0.8174888481975929, + "grad_norm": 0.12040512263774872, + "learning_rate": 7.366689821264403e-06, + "loss": 0.0016, + "step": 127460 + }, + { + "epoch": 0.8175529850913791, + "grad_norm": 0.0925847664475441, + "learning_rate": 7.36619677442122e-06, + "loss": 0.0033, + "step": 127470 + }, + { + "epoch": 0.8176171219851651, + "grad_norm": 0.06187743693590164, + "learning_rate": 7.3657036979279015e-06, + "loss": 0.0023, + "step": 127480 + }, + { + "epoch": 0.8176812588789513, + "grad_norm": 0.2978494167327881, + "learning_rate": 7.365210591790623e-06, + "loss": 0.0039, + "step": 127490 + }, + { + "epoch": 0.8177453957727373, + "grad_norm": 0.04251876845955849, + "learning_rate": 7.364717456015569e-06, + "loss": 0.0023, + "step": 127500 + }, + { + "epoch": 0.8178095326665235, + "grad_norm": 0.06055247038602829, + "learning_rate": 7.364224290608911e-06, + "loss": 0.0026, + "step": 127510 + }, + { + "epoch": 0.8178736695603095, + "grad_norm": 0.0591123029589653, + "learning_rate": 7.363731095576836e-06, + "loss": 0.0022, + "step": 127520 + }, + { + "epoch": 0.8179378064540956, + "grad_norm": 0.29559198021888733, + "learning_rate": 7.3632378709255195e-06, + "loss": 0.0019, + "step": 127530 + }, + { + "epoch": 0.8180019433478817, + "grad_norm": 0.12792937457561493, + "learning_rate": 7.362744616661144e-06, + "loss": 0.0013, + "step": 127540 + }, + { + "epoch": 0.8180660802416678, + "grad_norm": 0.24491946399211884, + "learning_rate": 7.3622513327898894e-06, + "loss": 0.0019, + "step": 127550 + }, + { + "epoch": 0.8181302171354539, + "grad_norm": 0.2049376666545868, + "learning_rate": 7.361758019317939e-06, + "loss": 0.0023, + "step": 127560 + }, + { + "epoch": 0.81819435402924, + "grad_norm": 0.07258431613445282, + "learning_rate": 7.361264676251471e-06, + "loss": 0.0041, + "step": 127570 + }, + { + "epoch": 0.8182584909230262, + "grad_norm": 0.10781293362379074, + "learning_rate": 7.3607713035966675e-06, + "loss": 0.0028, + "step": 127580 + }, + { + "epoch": 0.8183226278168122, + "grad_norm": 0.05559421330690384, + "learning_rate": 7.360277901359716e-06, + "loss": 0.0024, + "step": 127590 + }, + { + "epoch": 0.8183867647105983, + "grad_norm": 0.1653282791376114, + "learning_rate": 7.3597844695467925e-06, + "loss": 0.0025, + "step": 127600 + }, + { + "epoch": 0.8184509016043844, + "grad_norm": 0.05223669484257698, + "learning_rate": 7.359291008164084e-06, + "loss": 0.0026, + "step": 127610 + }, + { + "epoch": 0.8185150384981705, + "grad_norm": 0.027781745418906212, + "learning_rate": 7.358797517217774e-06, + "loss": 0.0025, + "step": 127620 + }, + { + "epoch": 0.8185791753919566, + "grad_norm": 0.07809358090162277, + "learning_rate": 7.3583039967140446e-06, + "loss": 0.0025, + "step": 127630 + }, + { + "epoch": 0.8186433122857427, + "grad_norm": 0.22004097700119019, + "learning_rate": 7.357810446659081e-06, + "loss": 0.0034, + "step": 127640 + }, + { + "epoch": 0.8187074491795288, + "grad_norm": 0.3288693428039551, + "learning_rate": 7.357316867059068e-06, + "loss": 0.0034, + "step": 127650 + }, + { + "epoch": 0.8187715860733149, + "grad_norm": 0.13195857405662537, + "learning_rate": 7.356823257920188e-06, + "loss": 0.0017, + "step": 127660 + }, + { + "epoch": 0.8188357229671009, + "grad_norm": 0.1732707917690277, + "learning_rate": 7.35632961924863e-06, + "loss": 0.0034, + "step": 127670 + }, + { + "epoch": 0.8188998598608871, + "grad_norm": 0.057275883853435516, + "learning_rate": 7.355835951050576e-06, + "loss": 0.0011, + "step": 127680 + }, + { + "epoch": 0.8189639967546731, + "grad_norm": 0.05436362698674202, + "learning_rate": 7.355342253332216e-06, + "loss": 0.0037, + "step": 127690 + }, + { + "epoch": 0.8190281336484593, + "grad_norm": 0.17967228591442108, + "learning_rate": 7.3548485260997325e-06, + "loss": 0.0046, + "step": 127700 + }, + { + "epoch": 0.8190922705422453, + "grad_norm": 0.06257455050945282, + "learning_rate": 7.354354769359314e-06, + "loss": 0.0019, + "step": 127710 + }, + { + "epoch": 0.8191564074360315, + "grad_norm": 0.13455961644649506, + "learning_rate": 7.353860983117149e-06, + "loss": 0.0032, + "step": 127720 + }, + { + "epoch": 0.8192205443298176, + "grad_norm": 0.0575551763176918, + "learning_rate": 7.353367167379421e-06, + "loss": 0.0017, + "step": 127730 + }, + { + "epoch": 0.8192846812236037, + "grad_norm": 0.08193744719028473, + "learning_rate": 7.352873322152323e-06, + "loss": 0.0021, + "step": 127740 + }, + { + "epoch": 0.8193488181173898, + "grad_norm": 0.13074032962322235, + "learning_rate": 7.352379447442041e-06, + "loss": 0.0022, + "step": 127750 + }, + { + "epoch": 0.8194129550111758, + "grad_norm": 0.05399656295776367, + "learning_rate": 7.351885543254761e-06, + "loss": 0.0032, + "step": 127760 + }, + { + "epoch": 0.819477091904962, + "grad_norm": 0.13823533058166504, + "learning_rate": 7.351391609596675e-06, + "loss": 0.0027, + "step": 127770 + }, + { + "epoch": 0.819541228798748, + "grad_norm": 0.07404447346925735, + "learning_rate": 7.350897646473974e-06, + "loss": 0.0033, + "step": 127780 + }, + { + "epoch": 0.8196053656925342, + "grad_norm": 0.012039989233016968, + "learning_rate": 7.350403653892842e-06, + "loss": 0.0025, + "step": 127790 + }, + { + "epoch": 0.8196695025863202, + "grad_norm": 0.09605922549962997, + "learning_rate": 7.349909631859475e-06, + "loss": 0.0018, + "step": 127800 + }, + { + "epoch": 0.8197336394801064, + "grad_norm": 0.34952735900878906, + "learning_rate": 7.349415580380059e-06, + "loss": 0.0022, + "step": 127810 + }, + { + "epoch": 0.8197977763738924, + "grad_norm": 0.20634576678276062, + "learning_rate": 7.348921499460788e-06, + "loss": 0.0027, + "step": 127820 + }, + { + "epoch": 0.8198619132676785, + "grad_norm": 0.022601787000894547, + "learning_rate": 7.348427389107851e-06, + "loss": 0.0028, + "step": 127830 + }, + { + "epoch": 0.8199260501614646, + "grad_norm": 0.09635855257511139, + "learning_rate": 7.347933249327441e-06, + "loss": 0.0029, + "step": 127840 + }, + { + "epoch": 0.8199901870552507, + "grad_norm": 0.1902410238981247, + "learning_rate": 7.347439080125749e-06, + "loss": 0.003, + "step": 127850 + }, + { + "epoch": 0.8200543239490369, + "grad_norm": 0.19818753004074097, + "learning_rate": 7.346944881508967e-06, + "loss": 0.0023, + "step": 127860 + }, + { + "epoch": 0.8201184608428229, + "grad_norm": 0.10266011953353882, + "learning_rate": 7.346450653483289e-06, + "loss": 0.0013, + "step": 127870 + }, + { + "epoch": 0.8201825977366091, + "grad_norm": 0.18944767117500305, + "learning_rate": 7.3459563960549075e-06, + "loss": 0.0024, + "step": 127880 + }, + { + "epoch": 0.8202467346303951, + "grad_norm": 0.036455295979976654, + "learning_rate": 7.345462109230015e-06, + "loss": 0.0019, + "step": 127890 + }, + { + "epoch": 0.8203108715241813, + "grad_norm": 0.1035575121641159, + "learning_rate": 7.344967793014808e-06, + "loss": 0.0015, + "step": 127900 + }, + { + "epoch": 0.8203750084179673, + "grad_norm": 0.03167593851685524, + "learning_rate": 7.344473447415477e-06, + "loss": 0.002, + "step": 127910 + }, + { + "epoch": 0.8204391453117534, + "grad_norm": 0.05191560462117195, + "learning_rate": 7.3439790724382165e-06, + "loss": 0.0043, + "step": 127920 + }, + { + "epoch": 0.8205032822055395, + "grad_norm": 0.49309152364730835, + "learning_rate": 7.3434846680892255e-06, + "loss": 0.0038, + "step": 127930 + }, + { + "epoch": 0.8205674190993256, + "grad_norm": 0.17252881824970245, + "learning_rate": 7.342990234374696e-06, + "loss": 0.0018, + "step": 127940 + }, + { + "epoch": 0.8206315559931117, + "grad_norm": 0.07217154651880264, + "learning_rate": 7.342495771300825e-06, + "loss": 0.0028, + "step": 127950 + }, + { + "epoch": 0.8206956928868978, + "grad_norm": 0.03394459933042526, + "learning_rate": 7.342001278873807e-06, + "loss": 0.0026, + "step": 127960 + }, + { + "epoch": 0.8207598297806838, + "grad_norm": 0.09284265339374542, + "learning_rate": 7.3415067570998395e-06, + "loss": 0.0022, + "step": 127970 + }, + { + "epoch": 0.82082396667447, + "grad_norm": 0.10128198564052582, + "learning_rate": 7.341012205985119e-06, + "loss": 0.0034, + "step": 127980 + }, + { + "epoch": 0.820888103568256, + "grad_norm": 0.10883468389511108, + "learning_rate": 7.3405176255358425e-06, + "loss": 0.0023, + "step": 127990 + }, + { + "epoch": 0.8209522404620422, + "grad_norm": 0.07447699457406998, + "learning_rate": 7.3400230157582065e-06, + "loss": 0.0017, + "step": 128000 + }, + { + "epoch": 0.8210163773558283, + "grad_norm": 0.05366537719964981, + "learning_rate": 7.339528376658412e-06, + "loss": 0.0015, + "step": 128010 + }, + { + "epoch": 0.8210805142496144, + "grad_norm": 0.06155402213335037, + "learning_rate": 7.339033708242652e-06, + "loss": 0.0029, + "step": 128020 + }, + { + "epoch": 0.8211446511434005, + "grad_norm": 0.15945905447006226, + "learning_rate": 7.33853901051713e-06, + "loss": 0.0021, + "step": 128030 + }, + { + "epoch": 0.8212087880371866, + "grad_norm": 0.06520810723304749, + "learning_rate": 7.3380442834880414e-06, + "loss": 0.0041, + "step": 128040 + }, + { + "epoch": 0.8212729249309727, + "grad_norm": 0.2926897704601288, + "learning_rate": 7.337549527161588e-06, + "loss": 0.0019, + "step": 128050 + }, + { + "epoch": 0.8213370618247587, + "grad_norm": 0.04401979595422745, + "learning_rate": 7.337054741543968e-06, + "loss": 0.0014, + "step": 128060 + }, + { + "epoch": 0.8214011987185449, + "grad_norm": 0.07479792088270187, + "learning_rate": 7.336559926641381e-06, + "loss": 0.0031, + "step": 128070 + }, + { + "epoch": 0.8214653356123309, + "grad_norm": 0.13457804918289185, + "learning_rate": 7.33606508246003e-06, + "loss": 0.0026, + "step": 128080 + }, + { + "epoch": 0.8215294725061171, + "grad_norm": 0.08685542643070221, + "learning_rate": 7.335570209006112e-06, + "loss": 0.0029, + "step": 128090 + }, + { + "epoch": 0.8215936093999031, + "grad_norm": 0.09798700362443924, + "learning_rate": 7.335075306285832e-06, + "loss": 0.002, + "step": 128100 + }, + { + "epoch": 0.8216577462936893, + "grad_norm": 0.19563564658164978, + "learning_rate": 7.334580374305389e-06, + "loss": 0.0031, + "step": 128110 + }, + { + "epoch": 0.8217218831874753, + "grad_norm": 0.1523904949426651, + "learning_rate": 7.334085413070984e-06, + "loss": 0.0023, + "step": 128120 + }, + { + "epoch": 0.8217860200812614, + "grad_norm": 0.07425139844417572, + "learning_rate": 7.333590422588821e-06, + "loss": 0.002, + "step": 128130 + }, + { + "epoch": 0.8218501569750476, + "grad_norm": 0.06021131947636604, + "learning_rate": 7.333095402865104e-06, + "loss": 0.0014, + "step": 128140 + }, + { + "epoch": 0.8219142938688336, + "grad_norm": 0.21700631082057953, + "learning_rate": 7.332600353906032e-06, + "loss": 0.0022, + "step": 128150 + }, + { + "epoch": 0.8219784307626198, + "grad_norm": 0.049357131123542786, + "learning_rate": 7.332105275717812e-06, + "loss": 0.0019, + "step": 128160 + }, + { + "epoch": 0.8220425676564058, + "grad_norm": 0.320541113615036, + "learning_rate": 7.331610168306645e-06, + "loss": 0.0035, + "step": 128170 + }, + { + "epoch": 0.822106704550192, + "grad_norm": 0.23093421757221222, + "learning_rate": 7.3311150316787385e-06, + "loss": 0.0028, + "step": 128180 + }, + { + "epoch": 0.822170841443978, + "grad_norm": 0.12479311972856522, + "learning_rate": 7.330619865840292e-06, + "loss": 0.003, + "step": 128190 + }, + { + "epoch": 0.8222349783377642, + "grad_norm": 0.13449817895889282, + "learning_rate": 7.330124670797515e-06, + "loss": 0.0023, + "step": 128200 + }, + { + "epoch": 0.8222991152315502, + "grad_norm": 0.15000073611736298, + "learning_rate": 7.329629446556609e-06, + "loss": 0.0034, + "step": 128210 + }, + { + "epoch": 0.8223632521253363, + "grad_norm": 0.04748809337615967, + "learning_rate": 7.3291341931237825e-06, + "loss": 0.0083, + "step": 128220 + }, + { + "epoch": 0.8224273890191224, + "grad_norm": 0.13411863148212433, + "learning_rate": 7.328638910505238e-06, + "loss": 0.0025, + "step": 128230 + }, + { + "epoch": 0.8224915259129085, + "grad_norm": 0.03303762525320053, + "learning_rate": 7.3281435987071854e-06, + "loss": 0.0016, + "step": 128240 + }, + { + "epoch": 0.8225556628066946, + "grad_norm": 0.3325529396533966, + "learning_rate": 7.3276482577358285e-06, + "loss": 0.0024, + "step": 128250 + }, + { + "epoch": 0.8226197997004807, + "grad_norm": 0.02374950423836708, + "learning_rate": 7.327152887597377e-06, + "loss": 0.0027, + "step": 128260 + }, + { + "epoch": 0.8226839365942668, + "grad_norm": 0.04302360117435455, + "learning_rate": 7.326657488298035e-06, + "loss": 0.0013, + "step": 128270 + }, + { + "epoch": 0.8227480734880529, + "grad_norm": 0.0613829642534256, + "learning_rate": 7.326162059844012e-06, + "loss": 0.0032, + "step": 128280 + }, + { + "epoch": 0.822812210381839, + "grad_norm": 0.04095643386244774, + "learning_rate": 7.325666602241517e-06, + "loss": 0.0041, + "step": 128290 + }, + { + "epoch": 0.8228763472756251, + "grad_norm": 0.0421566404402256, + "learning_rate": 7.325171115496755e-06, + "loss": 0.0014, + "step": 128300 + }, + { + "epoch": 0.8229404841694112, + "grad_norm": 0.03124876134097576, + "learning_rate": 7.32467559961594e-06, + "loss": 0.0026, + "step": 128310 + }, + { + "epoch": 0.8230046210631973, + "grad_norm": 0.1574496179819107, + "learning_rate": 7.324180054605277e-06, + "loss": 0.0029, + "step": 128320 + }, + { + "epoch": 0.8230687579569834, + "grad_norm": 0.19296900928020477, + "learning_rate": 7.323684480470977e-06, + "loss": 0.002, + "step": 128330 + }, + { + "epoch": 0.8231328948507695, + "grad_norm": 0.10460526496171951, + "learning_rate": 7.323188877219249e-06, + "loss": 0.0017, + "step": 128340 + }, + { + "epoch": 0.8231970317445556, + "grad_norm": 0.0408439114689827, + "learning_rate": 7.322693244856305e-06, + "loss": 0.0021, + "step": 128350 + }, + { + "epoch": 0.8232611686383416, + "grad_norm": 0.2912435531616211, + "learning_rate": 7.322197583388354e-06, + "loss": 0.002, + "step": 128360 + }, + { + "epoch": 0.8233253055321278, + "grad_norm": 0.04117957875132561, + "learning_rate": 7.321701892821609e-06, + "loss": 0.0025, + "step": 128370 + }, + { + "epoch": 0.8233894424259138, + "grad_norm": 0.036341726779937744, + "learning_rate": 7.3212061731622785e-06, + "loss": 0.0017, + "step": 128380 + }, + { + "epoch": 0.8234535793197, + "grad_norm": 0.19118447601795197, + "learning_rate": 7.3207104244165765e-06, + "loss": 0.0018, + "step": 128390 + }, + { + "epoch": 0.823517716213486, + "grad_norm": 0.19198226928710938, + "learning_rate": 7.320214646590713e-06, + "loss": 0.0019, + "step": 128400 + }, + { + "epoch": 0.8235818531072722, + "grad_norm": 0.16182208061218262, + "learning_rate": 7.319718839690903e-06, + "loss": 0.0013, + "step": 128410 + }, + { + "epoch": 0.8236459900010582, + "grad_norm": 0.06703642755746841, + "learning_rate": 7.319223003723358e-06, + "loss": 0.0022, + "step": 128420 + }, + { + "epoch": 0.8237101268948444, + "grad_norm": 0.15633465349674225, + "learning_rate": 7.318727138694291e-06, + "loss": 0.0028, + "step": 128430 + }, + { + "epoch": 0.8237742637886305, + "grad_norm": 0.18169882893562317, + "learning_rate": 7.318231244609916e-06, + "loss": 0.0022, + "step": 128440 + }, + { + "epoch": 0.8238384006824165, + "grad_norm": 0.060424067080020905, + "learning_rate": 7.317735321476446e-06, + "loss": 0.0025, + "step": 128450 + }, + { + "epoch": 0.8239025375762027, + "grad_norm": 0.11214757710695267, + "learning_rate": 7.317239369300096e-06, + "loss": 0.002, + "step": 128460 + }, + { + "epoch": 0.8239666744699887, + "grad_norm": 0.19199307262897491, + "learning_rate": 7.316743388087081e-06, + "loss": 0.0018, + "step": 128470 + }, + { + "epoch": 0.8240308113637749, + "grad_norm": 0.22552119195461273, + "learning_rate": 7.316247377843616e-06, + "loss": 0.0021, + "step": 128480 + }, + { + "epoch": 0.8240949482575609, + "grad_norm": 0.1364929974079132, + "learning_rate": 7.315751338575916e-06, + "loss": 0.0027, + "step": 128490 + }, + { + "epoch": 0.8241590851513471, + "grad_norm": 0.26622894406318665, + "learning_rate": 7.315255270290196e-06, + "loss": 0.0045, + "step": 128500 + }, + { + "epoch": 0.8242232220451331, + "grad_norm": 0.09583581984043121, + "learning_rate": 7.314759172992671e-06, + "loss": 0.0055, + "step": 128510 + }, + { + "epoch": 0.8242873589389192, + "grad_norm": 0.1296004056930542, + "learning_rate": 7.314263046689562e-06, + "loss": 0.002, + "step": 128520 + }, + { + "epoch": 0.8243514958327053, + "grad_norm": 0.06543765962123871, + "learning_rate": 7.313766891387082e-06, + "loss": 0.0024, + "step": 128530 + }, + { + "epoch": 0.8244156327264914, + "grad_norm": 0.07670734822750092, + "learning_rate": 7.313270707091451e-06, + "loss": 0.0026, + "step": 128540 + }, + { + "epoch": 0.8244797696202775, + "grad_norm": 0.17300914227962494, + "learning_rate": 7.312774493808883e-06, + "loss": 0.0029, + "step": 128550 + }, + { + "epoch": 0.8245439065140636, + "grad_norm": 0.19316570460796356, + "learning_rate": 7.312278251545598e-06, + "loss": 0.0021, + "step": 128560 + }, + { + "epoch": 0.8246080434078498, + "grad_norm": 0.031232358887791634, + "learning_rate": 7.311781980307813e-06, + "loss": 0.0038, + "step": 128570 + }, + { + "epoch": 0.8246721803016358, + "grad_norm": 0.04309896379709244, + "learning_rate": 7.311285680101749e-06, + "loss": 0.0029, + "step": 128580 + }, + { + "epoch": 0.824736317195422, + "grad_norm": 0.4938768446445465, + "learning_rate": 7.310789350933623e-06, + "loss": 0.0044, + "step": 128590 + }, + { + "epoch": 0.824800454089208, + "grad_norm": 0.06435202807188034, + "learning_rate": 7.310292992809655e-06, + "loss": 0.0021, + "step": 128600 + }, + { + "epoch": 0.8248645909829941, + "grad_norm": 0.21440468728542328, + "learning_rate": 7.309796605736065e-06, + "loss": 0.0028, + "step": 128610 + }, + { + "epoch": 0.8249287278767802, + "grad_norm": 0.10133481025695801, + "learning_rate": 7.309300189719072e-06, + "loss": 0.0018, + "step": 128620 + }, + { + "epoch": 0.8249928647705663, + "grad_norm": 0.04468727484345436, + "learning_rate": 7.308803744764898e-06, + "loss": 0.004, + "step": 128630 + }, + { + "epoch": 0.8250570016643524, + "grad_norm": 0.06408118456602097, + "learning_rate": 7.308307270879762e-06, + "loss": 0.0015, + "step": 128640 + }, + { + "epoch": 0.8251211385581385, + "grad_norm": 0.1306520700454712, + "learning_rate": 7.307810768069888e-06, + "loss": 0.003, + "step": 128650 + }, + { + "epoch": 0.8251852754519245, + "grad_norm": 0.012615872547030449, + "learning_rate": 7.307314236341494e-06, + "loss": 0.0019, + "step": 128660 + }, + { + "epoch": 0.8252494123457107, + "grad_norm": 0.046122532337903976, + "learning_rate": 7.306817675700804e-06, + "loss": 0.0015, + "step": 128670 + }, + { + "epoch": 0.8253135492394967, + "grad_norm": 0.07508425414562225, + "learning_rate": 7.30632108615404e-06, + "loss": 0.0024, + "step": 128680 + }, + { + "epoch": 0.8253776861332829, + "grad_norm": 0.26308661699295044, + "learning_rate": 7.3058244677074245e-06, + "loss": 0.0016, + "step": 128690 + }, + { + "epoch": 0.8254418230270689, + "grad_norm": 0.15130212903022766, + "learning_rate": 7.305327820367179e-06, + "loss": 0.0014, + "step": 128700 + }, + { + "epoch": 0.8255059599208551, + "grad_norm": 0.06789848208427429, + "learning_rate": 7.304831144139531e-06, + "loss": 0.0019, + "step": 128710 + }, + { + "epoch": 0.8255700968146412, + "grad_norm": 0.009132793173193932, + "learning_rate": 7.3043344390307005e-06, + "loss": 0.0019, + "step": 128720 + }, + { + "epoch": 0.8256342337084273, + "grad_norm": 0.0689903199672699, + "learning_rate": 7.303837705046912e-06, + "loss": 0.002, + "step": 128730 + }, + { + "epoch": 0.8256983706022134, + "grad_norm": 0.028618192300200462, + "learning_rate": 7.303340942194391e-06, + "loss": 0.0038, + "step": 128740 + }, + { + "epoch": 0.8257625074959994, + "grad_norm": 0.2864423990249634, + "learning_rate": 7.302844150479362e-06, + "loss": 0.0044, + "step": 128750 + }, + { + "epoch": 0.8258266443897856, + "grad_norm": 0.165174663066864, + "learning_rate": 7.302347329908049e-06, + "loss": 0.0038, + "step": 128760 + }, + { + "epoch": 0.8258907812835716, + "grad_norm": 0.09707002341747284, + "learning_rate": 7.301850480486678e-06, + "loss": 0.0018, + "step": 128770 + }, + { + "epoch": 0.8259549181773578, + "grad_norm": 0.16221855580806732, + "learning_rate": 7.301353602221478e-06, + "loss": 0.0036, + "step": 128780 + }, + { + "epoch": 0.8260190550711438, + "grad_norm": 0.21278564631938934, + "learning_rate": 7.300856695118671e-06, + "loss": 0.0017, + "step": 128790 + }, + { + "epoch": 0.82608319196493, + "grad_norm": 0.0670095831155777, + "learning_rate": 7.300359759184485e-06, + "loss": 0.0033, + "step": 128800 + }, + { + "epoch": 0.826147328858716, + "grad_norm": 0.07428467273712158, + "learning_rate": 7.2998627944251476e-06, + "loss": 0.0034, + "step": 128810 + }, + { + "epoch": 0.8262114657525021, + "grad_norm": 0.2314709573984146, + "learning_rate": 7.299365800846887e-06, + "loss": 0.0022, + "step": 128820 + }, + { + "epoch": 0.8262756026462882, + "grad_norm": 0.07954928278923035, + "learning_rate": 7.298868778455928e-06, + "loss": 0.0026, + "step": 128830 + }, + { + "epoch": 0.8263397395400743, + "grad_norm": 0.06688231974840164, + "learning_rate": 7.2983717272585e-06, + "loss": 0.0015, + "step": 128840 + }, + { + "epoch": 0.8264038764338605, + "grad_norm": 0.1042524129152298, + "learning_rate": 7.297874647260832e-06, + "loss": 0.003, + "step": 128850 + }, + { + "epoch": 0.8264680133276465, + "grad_norm": 0.0997534990310669, + "learning_rate": 7.297377538469152e-06, + "loss": 0.0052, + "step": 128860 + }, + { + "epoch": 0.8265321502214327, + "grad_norm": 0.14298345148563385, + "learning_rate": 7.296880400889691e-06, + "loss": 0.0027, + "step": 128870 + }, + { + "epoch": 0.8265962871152187, + "grad_norm": 0.05371713638305664, + "learning_rate": 7.2963832345286766e-06, + "loss": 0.0058, + "step": 128880 + }, + { + "epoch": 0.8266604240090049, + "grad_norm": 0.14610141515731812, + "learning_rate": 7.295886039392337e-06, + "loss": 0.0021, + "step": 128890 + }, + { + "epoch": 0.8267245609027909, + "grad_norm": 0.19027216732501984, + "learning_rate": 7.295388815486905e-06, + "loss": 0.0029, + "step": 128900 + }, + { + "epoch": 0.826788697796577, + "grad_norm": 0.24345919489860535, + "learning_rate": 7.294891562818612e-06, + "loss": 0.004, + "step": 128910 + }, + { + "epoch": 0.8268528346903631, + "grad_norm": 0.19334959983825684, + "learning_rate": 7.294394281393689e-06, + "loss": 0.0031, + "step": 128920 + }, + { + "epoch": 0.8269169715841492, + "grad_norm": 0.18153344094753265, + "learning_rate": 7.293896971218362e-06, + "loss": 0.0029, + "step": 128930 + }, + { + "epoch": 0.8269811084779353, + "grad_norm": 0.14814141392707825, + "learning_rate": 7.2933996322988695e-06, + "loss": 0.0025, + "step": 128940 + }, + { + "epoch": 0.8270452453717214, + "grad_norm": 1.0534054040908813, + "learning_rate": 7.292902264641439e-06, + "loss": 0.0065, + "step": 128950 + }, + { + "epoch": 0.8271093822655075, + "grad_norm": 0.11299543082714081, + "learning_rate": 7.292404868252307e-06, + "loss": 0.0028, + "step": 128960 + }, + { + "epoch": 0.8271735191592936, + "grad_norm": 0.513168215751648, + "learning_rate": 7.2919074431377e-06, + "loss": 0.0018, + "step": 128970 + }, + { + "epoch": 0.8272376560530796, + "grad_norm": 0.08805724233388901, + "learning_rate": 7.291409989303857e-06, + "loss": 0.0021, + "step": 128980 + }, + { + "epoch": 0.8273017929468658, + "grad_norm": 0.028845224529504776, + "learning_rate": 7.290912506757009e-06, + "loss": 0.0021, + "step": 128990 + }, + { + "epoch": 0.8273659298406519, + "grad_norm": 0.13066190481185913, + "learning_rate": 7.29041499550339e-06, + "loss": 0.0024, + "step": 129000 + }, + { + "epoch": 0.827430066734438, + "grad_norm": 0.08103853464126587, + "learning_rate": 7.289917455549234e-06, + "loss": 0.002, + "step": 129010 + }, + { + "epoch": 0.8274942036282241, + "grad_norm": 0.07092414796352386, + "learning_rate": 7.289419886900775e-06, + "loss": 0.0026, + "step": 129020 + }, + { + "epoch": 0.8275583405220102, + "grad_norm": 0.0720643550157547, + "learning_rate": 7.288922289564249e-06, + "loss": 0.0022, + "step": 129030 + }, + { + "epoch": 0.8276224774157963, + "grad_norm": 0.0965786948800087, + "learning_rate": 7.288424663545891e-06, + "loss": 0.0027, + "step": 129040 + }, + { + "epoch": 0.8276866143095823, + "grad_norm": 0.027247527614235878, + "learning_rate": 7.287927008851937e-06, + "loss": 0.0031, + "step": 129050 + }, + { + "epoch": 0.8277507512033685, + "grad_norm": 0.041653770953416824, + "learning_rate": 7.2874293254886206e-06, + "loss": 0.0024, + "step": 129060 + }, + { + "epoch": 0.8278148880971545, + "grad_norm": 0.09017782658338547, + "learning_rate": 7.2869316134621825e-06, + "loss": 0.002, + "step": 129070 + }, + { + "epoch": 0.8278790249909407, + "grad_norm": 0.24532608687877655, + "learning_rate": 7.2864338727788556e-06, + "loss": 0.0016, + "step": 129080 + }, + { + "epoch": 0.8279431618847267, + "grad_norm": 0.00983749981969595, + "learning_rate": 7.285936103444878e-06, + "loss": 0.0021, + "step": 129090 + }, + { + "epoch": 0.8280072987785129, + "grad_norm": 0.20129895210266113, + "learning_rate": 7.2854383054664865e-06, + "loss": 0.0015, + "step": 129100 + }, + { + "epoch": 0.8280714356722989, + "grad_norm": 0.14603929221630096, + "learning_rate": 7.284940478849921e-06, + "loss": 0.0033, + "step": 129110 + }, + { + "epoch": 0.828135572566085, + "grad_norm": 0.14177480340003967, + "learning_rate": 7.284442623601417e-06, + "loss": 0.0038, + "step": 129120 + }, + { + "epoch": 0.8281997094598712, + "grad_norm": 0.024781549349427223, + "learning_rate": 7.2839447397272155e-06, + "loss": 0.0064, + "step": 129130 + }, + { + "epoch": 0.8282638463536572, + "grad_norm": 0.0348445363342762, + "learning_rate": 7.283446827233554e-06, + "loss": 0.0033, + "step": 129140 + }, + { + "epoch": 0.8283279832474434, + "grad_norm": 0.06163106858730316, + "learning_rate": 7.282948886126672e-06, + "loss": 0.0028, + "step": 129150 + }, + { + "epoch": 0.8283921201412294, + "grad_norm": 0.18024040758609772, + "learning_rate": 7.282450916412809e-06, + "loss": 0.0021, + "step": 129160 + }, + { + "epoch": 0.8284562570350156, + "grad_norm": 0.012435585260391235, + "learning_rate": 7.281952918098203e-06, + "loss": 0.0025, + "step": 129170 + }, + { + "epoch": 0.8285203939288016, + "grad_norm": 0.18005426228046417, + "learning_rate": 7.281454891189098e-06, + "loss": 0.0023, + "step": 129180 + }, + { + "epoch": 0.8285845308225878, + "grad_norm": 0.10598894953727722, + "learning_rate": 7.280956835691732e-06, + "loss": 0.0025, + "step": 129190 + }, + { + "epoch": 0.8286486677163738, + "grad_norm": 0.13215792179107666, + "learning_rate": 7.280458751612346e-06, + "loss": 0.0016, + "step": 129200 + }, + { + "epoch": 0.82871280461016, + "grad_norm": 0.12053846567869186, + "learning_rate": 7.279960638957182e-06, + "loss": 0.0041, + "step": 129210 + }, + { + "epoch": 0.828776941503946, + "grad_norm": 0.08754531294107437, + "learning_rate": 7.279462497732484e-06, + "loss": 0.0035, + "step": 129220 + }, + { + "epoch": 0.8288410783977321, + "grad_norm": 0.08480066806077957, + "learning_rate": 7.278964327944489e-06, + "loss": 0.0014, + "step": 129230 + }, + { + "epoch": 0.8289052152915182, + "grad_norm": 0.1272183209657669, + "learning_rate": 7.2784661295994445e-06, + "loss": 0.0029, + "step": 129240 + }, + { + "epoch": 0.8289693521853043, + "grad_norm": 0.03505934029817581, + "learning_rate": 7.277967902703589e-06, + "loss": 0.0019, + "step": 129250 + }, + { + "epoch": 0.8290334890790904, + "grad_norm": 0.08902698010206223, + "learning_rate": 7.277469647263169e-06, + "loss": 0.0015, + "step": 129260 + }, + { + "epoch": 0.8290976259728765, + "grad_norm": 0.053147412836551666, + "learning_rate": 7.276971363284424e-06, + "loss": 0.0027, + "step": 129270 + }, + { + "epoch": 0.8291617628666627, + "grad_norm": 0.07472405582666397, + "learning_rate": 7.276473050773604e-06, + "loss": 0.0026, + "step": 129280 + }, + { + "epoch": 0.8292258997604487, + "grad_norm": 0.01741631329059601, + "learning_rate": 7.275974709736947e-06, + "loss": 0.0023, + "step": 129290 + }, + { + "epoch": 0.8292900366542348, + "grad_norm": 0.12162137031555176, + "learning_rate": 7.275476340180703e-06, + "loss": 0.0027, + "step": 129300 + }, + { + "epoch": 0.8293541735480209, + "grad_norm": 0.10206171870231628, + "learning_rate": 7.2749779421111126e-06, + "loss": 0.002, + "step": 129310 + }, + { + "epoch": 0.829418310441807, + "grad_norm": 0.08548274636268616, + "learning_rate": 7.274479515534423e-06, + "loss": 0.0019, + "step": 129320 + }, + { + "epoch": 0.8294824473355931, + "grad_norm": 0.3585454523563385, + "learning_rate": 7.273981060456878e-06, + "loss": 0.0032, + "step": 129330 + }, + { + "epoch": 0.8295465842293792, + "grad_norm": 0.13993075489997864, + "learning_rate": 7.273482576884726e-06, + "loss": 0.0049, + "step": 129340 + }, + { + "epoch": 0.8296107211231653, + "grad_norm": 0.09641119092702866, + "learning_rate": 7.272984064824213e-06, + "loss": 0.0017, + "step": 129350 + }, + { + "epoch": 0.8296748580169514, + "grad_norm": 0.08146881312131882, + "learning_rate": 7.272485524281585e-06, + "loss": 0.0014, + "step": 129360 + }, + { + "epoch": 0.8297389949107374, + "grad_norm": 0.17681117355823517, + "learning_rate": 7.27198695526309e-06, + "loss": 0.0024, + "step": 129370 + }, + { + "epoch": 0.8298031318045236, + "grad_norm": 0.08128388226032257, + "learning_rate": 7.271488357774974e-06, + "loss": 0.0022, + "step": 129380 + }, + { + "epoch": 0.8298672686983096, + "grad_norm": 0.2925407886505127, + "learning_rate": 7.270989731823486e-06, + "loss": 0.0036, + "step": 129390 + }, + { + "epoch": 0.8299314055920958, + "grad_norm": 0.07133324444293976, + "learning_rate": 7.270491077414873e-06, + "loss": 0.0033, + "step": 129400 + }, + { + "epoch": 0.8299955424858819, + "grad_norm": 0.11704239249229431, + "learning_rate": 7.269992394555385e-06, + "loss": 0.0015, + "step": 129410 + }, + { + "epoch": 0.830059679379668, + "grad_norm": 0.0500614158809185, + "learning_rate": 7.269493683251269e-06, + "loss": 0.0029, + "step": 129420 + }, + { + "epoch": 0.8301238162734541, + "grad_norm": 0.2708382308483124, + "learning_rate": 7.268994943508776e-06, + "loss": 0.0036, + "step": 129430 + }, + { + "epoch": 0.8301879531672401, + "grad_norm": 0.1924009919166565, + "learning_rate": 7.268496175334153e-06, + "loss": 0.0024, + "step": 129440 + }, + { + "epoch": 0.8302520900610263, + "grad_norm": 0.1046387329697609, + "learning_rate": 7.2679973787336535e-06, + "loss": 0.0028, + "step": 129450 + }, + { + "epoch": 0.8303162269548123, + "grad_norm": 0.12897948920726776, + "learning_rate": 7.267498553713525e-06, + "loss": 0.0035, + "step": 129460 + }, + { + "epoch": 0.8303803638485985, + "grad_norm": 0.11709784716367722, + "learning_rate": 7.26699970028002e-06, + "loss": 0.0025, + "step": 129470 + }, + { + "epoch": 0.8304445007423845, + "grad_norm": 0.04053686186671257, + "learning_rate": 7.266500818439387e-06, + "loss": 0.0014, + "step": 129480 + }, + { + "epoch": 0.8305086376361707, + "grad_norm": 0.2003374546766281, + "learning_rate": 7.26600190819788e-06, + "loss": 0.0021, + "step": 129490 + }, + { + "epoch": 0.8305727745299567, + "grad_norm": 0.12915728986263275, + "learning_rate": 7.265502969561748e-06, + "loss": 0.0021, + "step": 129500 + }, + { + "epoch": 0.8306369114237429, + "grad_norm": 0.0786924734711647, + "learning_rate": 7.265004002537247e-06, + "loss": 0.002, + "step": 129510 + }, + { + "epoch": 0.8307010483175289, + "grad_norm": 0.2431574910879135, + "learning_rate": 7.264505007130627e-06, + "loss": 0.0023, + "step": 129520 + }, + { + "epoch": 0.830765185211315, + "grad_norm": 0.09418448060750961, + "learning_rate": 7.2640059833481395e-06, + "loss": 0.0034, + "step": 129530 + }, + { + "epoch": 0.8308293221051011, + "grad_norm": 0.09966879338026047, + "learning_rate": 7.26350693119604e-06, + "loss": 0.0031, + "step": 129540 + }, + { + "epoch": 0.8308934589988872, + "grad_norm": 0.020565232262015343, + "learning_rate": 7.263007850680579e-06, + "loss": 0.0018, + "step": 129550 + }, + { + "epoch": 0.8309575958926734, + "grad_norm": 0.04436400160193443, + "learning_rate": 7.262508741808014e-06, + "loss": 0.0019, + "step": 129560 + }, + { + "epoch": 0.8310217327864594, + "grad_norm": 0.12050460278987885, + "learning_rate": 7.2620096045845965e-06, + "loss": 0.003, + "step": 129570 + }, + { + "epoch": 0.8310858696802456, + "grad_norm": 0.10381973534822464, + "learning_rate": 7.261510439016583e-06, + "loss": 0.0032, + "step": 129580 + }, + { + "epoch": 0.8311500065740316, + "grad_norm": 0.16438117623329163, + "learning_rate": 7.261011245110227e-06, + "loss": 0.0025, + "step": 129590 + }, + { + "epoch": 0.8312141434678177, + "grad_norm": 0.2219746708869934, + "learning_rate": 7.260512022871785e-06, + "loss": 0.0047, + "step": 129600 + }, + { + "epoch": 0.8312782803616038, + "grad_norm": 0.14013536274433136, + "learning_rate": 7.260012772307511e-06, + "loss": 0.0017, + "step": 129610 + }, + { + "epoch": 0.8313424172553899, + "grad_norm": 0.16215287148952484, + "learning_rate": 7.259513493423661e-06, + "loss": 0.002, + "step": 129620 + }, + { + "epoch": 0.831406554149176, + "grad_norm": 0.27494677901268005, + "learning_rate": 7.259014186226493e-06, + "loss": 0.0025, + "step": 129630 + }, + { + "epoch": 0.8314706910429621, + "grad_norm": 0.03116893582046032, + "learning_rate": 7.258514850722263e-06, + "loss": 0.002, + "step": 129640 + }, + { + "epoch": 0.8315348279367482, + "grad_norm": 0.23137922585010529, + "learning_rate": 7.258015486917226e-06, + "loss": 0.0027, + "step": 129650 + }, + { + "epoch": 0.8315989648305343, + "grad_norm": 0.11415555328130722, + "learning_rate": 7.257516094817644e-06, + "loss": 0.0017, + "step": 129660 + }, + { + "epoch": 0.8316631017243203, + "grad_norm": 0.2785172164440155, + "learning_rate": 7.257016674429769e-06, + "loss": 0.0017, + "step": 129670 + }, + { + "epoch": 0.8317272386181065, + "grad_norm": 0.06461936235427856, + "learning_rate": 7.256517225759864e-06, + "loss": 0.0013, + "step": 129680 + }, + { + "epoch": 0.8317913755118926, + "grad_norm": 0.039516519755125046, + "learning_rate": 7.256017748814185e-06, + "loss": 0.0041, + "step": 129690 + }, + { + "epoch": 0.8318555124056787, + "grad_norm": 0.18391795456409454, + "learning_rate": 7.255518243598992e-06, + "loss": 0.0034, + "step": 129700 + }, + { + "epoch": 0.8319196492994648, + "grad_norm": 0.14168499410152435, + "learning_rate": 7.2550187101205416e-06, + "loss": 0.0019, + "step": 129710 + }, + { + "epoch": 0.8319837861932509, + "grad_norm": 0.16390486061573029, + "learning_rate": 7.254519148385095e-06, + "loss": 0.0024, + "step": 129720 + }, + { + "epoch": 0.832047923087037, + "grad_norm": 0.15905983746051788, + "learning_rate": 7.254019558398915e-06, + "loss": 0.0078, + "step": 129730 + }, + { + "epoch": 0.832112059980823, + "grad_norm": 0.046173542737960815, + "learning_rate": 7.253519940168256e-06, + "loss": 0.0023, + "step": 129740 + }, + { + "epoch": 0.8321761968746092, + "grad_norm": 0.14757530391216278, + "learning_rate": 7.253020293699385e-06, + "loss": 0.0023, + "step": 129750 + }, + { + "epoch": 0.8322403337683952, + "grad_norm": 0.07440400868654251, + "learning_rate": 7.252520618998555e-06, + "loss": 0.0029, + "step": 129760 + }, + { + "epoch": 0.8323044706621814, + "grad_norm": 0.14738357067108154, + "learning_rate": 7.252020916072035e-06, + "loss": 0.0014, + "step": 129770 + }, + { + "epoch": 0.8323686075559674, + "grad_norm": 0.19934849441051483, + "learning_rate": 7.251521184926083e-06, + "loss": 0.0059, + "step": 129780 + }, + { + "epoch": 0.8324327444497536, + "grad_norm": 0.07529287040233612, + "learning_rate": 7.251021425566962e-06, + "loss": 0.0021, + "step": 129790 + }, + { + "epoch": 0.8324968813435396, + "grad_norm": 0.06696192920207977, + "learning_rate": 7.250521638000931e-06, + "loss": 0.0016, + "step": 129800 + }, + { + "epoch": 0.8325610182373258, + "grad_norm": 0.17724058032035828, + "learning_rate": 7.250021822234259e-06, + "loss": 0.0031, + "step": 129810 + }, + { + "epoch": 0.8326251551311118, + "grad_norm": 0.12434009462594986, + "learning_rate": 7.2495219782732025e-06, + "loss": 0.0033, + "step": 129820 + }, + { + "epoch": 0.8326892920248979, + "grad_norm": 0.03853723779320717, + "learning_rate": 7.249022106124028e-06, + "loss": 0.002, + "step": 129830 + }, + { + "epoch": 0.8327534289186841, + "grad_norm": 0.09288733452558517, + "learning_rate": 7.248522205793002e-06, + "loss": 0.0034, + "step": 129840 + }, + { + "epoch": 0.8328175658124701, + "grad_norm": 0.3627587854862213, + "learning_rate": 7.248022277286384e-06, + "loss": 0.0031, + "step": 129850 + }, + { + "epoch": 0.8328817027062563, + "grad_norm": 0.06937572360038757, + "learning_rate": 7.247522320610441e-06, + "loss": 0.0025, + "step": 129860 + }, + { + "epoch": 0.8329458396000423, + "grad_norm": 0.062247905880212784, + "learning_rate": 7.247022335771436e-06, + "loss": 0.0036, + "step": 129870 + }, + { + "epoch": 0.8330099764938285, + "grad_norm": 0.06953172385692596, + "learning_rate": 7.246522322775635e-06, + "loss": 0.002, + "step": 129880 + }, + { + "epoch": 0.8330741133876145, + "grad_norm": 0.141593337059021, + "learning_rate": 7.2460222816293035e-06, + "loss": 0.0023, + "step": 129890 + }, + { + "epoch": 0.8331382502814006, + "grad_norm": 0.03468858078122139, + "learning_rate": 7.245522212338709e-06, + "loss": 0.0035, + "step": 129900 + }, + { + "epoch": 0.8332023871751867, + "grad_norm": 0.3736875057220459, + "learning_rate": 7.245022114910116e-06, + "loss": 0.0055, + "step": 129910 + }, + { + "epoch": 0.8332665240689728, + "grad_norm": 0.04381706193089485, + "learning_rate": 7.244521989349791e-06, + "loss": 0.0017, + "step": 129920 + }, + { + "epoch": 0.8333306609627589, + "grad_norm": 0.10753746330738068, + "learning_rate": 7.244021835664001e-06, + "loss": 0.0022, + "step": 129930 + }, + { + "epoch": 0.833394797856545, + "grad_norm": 0.03241729736328125, + "learning_rate": 7.243521653859015e-06, + "loss": 0.0033, + "step": 129940 + }, + { + "epoch": 0.8334589347503311, + "grad_norm": 0.06612688302993774, + "learning_rate": 7.243021443941098e-06, + "loss": 0.003, + "step": 129950 + }, + { + "epoch": 0.8335230716441172, + "grad_norm": 0.274962842464447, + "learning_rate": 7.2425212059165196e-06, + "loss": 0.0025, + "step": 129960 + }, + { + "epoch": 0.8335872085379032, + "grad_norm": 0.20958292484283447, + "learning_rate": 7.242020939791547e-06, + "loss": 0.003, + "step": 129970 + }, + { + "epoch": 0.8336513454316894, + "grad_norm": 0.11247469484806061, + "learning_rate": 7.2415206455724506e-06, + "loss": 0.002, + "step": 129980 + }, + { + "epoch": 0.8337154823254755, + "grad_norm": 0.06534750759601593, + "learning_rate": 7.2410203232655e-06, + "loss": 0.0017, + "step": 129990 + }, + { + "epoch": 0.8337796192192616, + "grad_norm": 0.17713600397109985, + "learning_rate": 7.240519972876961e-06, + "loss": 0.0058, + "step": 130000 + }, + { + "epoch": 0.8338437561130477, + "grad_norm": 0.06117572262883186, + "learning_rate": 7.2400195944131054e-06, + "loss": 0.0028, + "step": 130010 + }, + { + "epoch": 0.8339078930068338, + "grad_norm": 0.1474536657333374, + "learning_rate": 7.239519187880204e-06, + "loss": 0.0019, + "step": 130020 + }, + { + "epoch": 0.8339720299006199, + "grad_norm": 0.17380358278751373, + "learning_rate": 7.239018753284527e-06, + "loss": 0.0025, + "step": 130030 + }, + { + "epoch": 0.834036166794406, + "grad_norm": 0.03862884268164635, + "learning_rate": 7.238518290632343e-06, + "loss": 0.0013, + "step": 130040 + }, + { + "epoch": 0.8341003036881921, + "grad_norm": 0.11746586114168167, + "learning_rate": 7.238017799929926e-06, + "loss": 0.002, + "step": 130050 + }, + { + "epoch": 0.8341644405819781, + "grad_norm": 0.048239219933748245, + "learning_rate": 7.237517281183547e-06, + "loss": 0.004, + "step": 130060 + }, + { + "epoch": 0.8342285774757643, + "grad_norm": 0.14999274909496307, + "learning_rate": 7.237016734399478e-06, + "loss": 0.0025, + "step": 130070 + }, + { + "epoch": 0.8342927143695503, + "grad_norm": 0.037866171449422836, + "learning_rate": 7.2365161595839885e-06, + "loss": 0.0027, + "step": 130080 + }, + { + "epoch": 0.8343568512633365, + "grad_norm": 0.13778047263622284, + "learning_rate": 7.236015556743355e-06, + "loss": 0.0105, + "step": 130090 + }, + { + "epoch": 0.8344209881571225, + "grad_norm": 0.06160407513380051, + "learning_rate": 7.235514925883848e-06, + "loss": 0.002, + "step": 130100 + }, + { + "epoch": 0.8344851250509087, + "grad_norm": 0.10367187112569809, + "learning_rate": 7.235014267011742e-06, + "loss": 0.0052, + "step": 130110 + }, + { + "epoch": 0.8345492619446948, + "grad_norm": 0.11204732954502106, + "learning_rate": 7.234513580133307e-06, + "loss": 0.0022, + "step": 130120 + }, + { + "epoch": 0.8346133988384808, + "grad_norm": 0.24648122489452362, + "learning_rate": 7.234012865254822e-06, + "loss": 0.0027, + "step": 130130 + }, + { + "epoch": 0.834677535732267, + "grad_norm": 0.1494959592819214, + "learning_rate": 7.23351212238256e-06, + "loss": 0.0016, + "step": 130140 + }, + { + "epoch": 0.834741672626053, + "grad_norm": 0.16294920444488525, + "learning_rate": 7.233011351522794e-06, + "loss": 0.0026, + "step": 130150 + }, + { + "epoch": 0.8348058095198392, + "grad_norm": 0.07746896147727966, + "learning_rate": 7.2325105526818e-06, + "loss": 0.0034, + "step": 130160 + }, + { + "epoch": 0.8348699464136252, + "grad_norm": 0.1173417940735817, + "learning_rate": 7.232009725865853e-06, + "loss": 0.0018, + "step": 130170 + }, + { + "epoch": 0.8349340833074114, + "grad_norm": 0.3902769684791565, + "learning_rate": 7.231508871081228e-06, + "loss": 0.0021, + "step": 130180 + }, + { + "epoch": 0.8349982202011974, + "grad_norm": 0.05905061960220337, + "learning_rate": 7.231007988334202e-06, + "loss": 0.0025, + "step": 130190 + }, + { + "epoch": 0.8350623570949836, + "grad_norm": 0.17392592132091522, + "learning_rate": 7.230507077631054e-06, + "loss": 0.0028, + "step": 130200 + }, + { + "epoch": 0.8351264939887696, + "grad_norm": 0.09936745464801788, + "learning_rate": 7.230006138978055e-06, + "loss": 0.0033, + "step": 130210 + }, + { + "epoch": 0.8351906308825557, + "grad_norm": 0.2895844578742981, + "learning_rate": 7.229505172381488e-06, + "loss": 0.0046, + "step": 130220 + }, + { + "epoch": 0.8352547677763418, + "grad_norm": 0.0891617015004158, + "learning_rate": 7.229004177847626e-06, + "loss": 0.004, + "step": 130230 + }, + { + "epoch": 0.8353189046701279, + "grad_norm": 0.041115667670965195, + "learning_rate": 7.22850315538275e-06, + "loss": 0.0025, + "step": 130240 + }, + { + "epoch": 0.835383041563914, + "grad_norm": 0.10365470498800278, + "learning_rate": 7.228002104993135e-06, + "loss": 0.005, + "step": 130250 + }, + { + "epoch": 0.8354471784577001, + "grad_norm": 0.11892421543598175, + "learning_rate": 7.227501026685063e-06, + "loss": 0.0026, + "step": 130260 + }, + { + "epoch": 0.8355113153514863, + "grad_norm": 0.053219012916088104, + "learning_rate": 7.22699992046481e-06, + "loss": 0.0028, + "step": 130270 + }, + { + "epoch": 0.8355754522452723, + "grad_norm": 0.1299111396074295, + "learning_rate": 7.226498786338658e-06, + "loss": 0.003, + "step": 130280 + }, + { + "epoch": 0.8356395891390584, + "grad_norm": 0.04380191117525101, + "learning_rate": 7.225997624312883e-06, + "loss": 0.0015, + "step": 130290 + }, + { + "epoch": 0.8357037260328445, + "grad_norm": 0.1472407877445221, + "learning_rate": 7.225496434393769e-06, + "loss": 0.0036, + "step": 130300 + }, + { + "epoch": 0.8357678629266306, + "grad_norm": 0.13299348950386047, + "learning_rate": 7.224995216587592e-06, + "loss": 0.0023, + "step": 130310 + }, + { + "epoch": 0.8358319998204167, + "grad_norm": 0.0585072822868824, + "learning_rate": 7.224493970900636e-06, + "loss": 0.0038, + "step": 130320 + }, + { + "epoch": 0.8358961367142028, + "grad_norm": 0.078093983232975, + "learning_rate": 7.22399269733918e-06, + "loss": 0.0036, + "step": 130330 + }, + { + "epoch": 0.8359602736079889, + "grad_norm": 0.16029320657253265, + "learning_rate": 7.223491395909506e-06, + "loss": 0.0023, + "step": 130340 + }, + { + "epoch": 0.836024410501775, + "grad_norm": 0.08965877443552017, + "learning_rate": 7.2229900666178964e-06, + "loss": 0.0037, + "step": 130350 + }, + { + "epoch": 0.836088547395561, + "grad_norm": 0.12641486525535583, + "learning_rate": 7.2224887094706305e-06, + "loss": 0.0015, + "step": 130360 + }, + { + "epoch": 0.8361526842893472, + "grad_norm": 0.27190524339675903, + "learning_rate": 7.221987324473996e-06, + "loss": 0.0025, + "step": 130370 + }, + { + "epoch": 0.8362168211831332, + "grad_norm": 0.09276844561100006, + "learning_rate": 7.22148591163427e-06, + "loss": 0.0015, + "step": 130380 + }, + { + "epoch": 0.8362809580769194, + "grad_norm": 0.11088048666715622, + "learning_rate": 7.220984470957739e-06, + "loss": 0.0022, + "step": 130390 + }, + { + "epoch": 0.8363450949707055, + "grad_norm": 0.050899092108011246, + "learning_rate": 7.220483002450685e-06, + "loss": 0.0027, + "step": 130400 + }, + { + "epoch": 0.8364092318644916, + "grad_norm": 0.12364206463098526, + "learning_rate": 7.219981506119391e-06, + "loss": 0.0021, + "step": 130410 + }, + { + "epoch": 0.8364733687582777, + "grad_norm": 0.051249511539936066, + "learning_rate": 7.219479981970141e-06, + "loss": 0.0031, + "step": 130420 + }, + { + "epoch": 0.8365375056520638, + "grad_norm": 0.0800173357129097, + "learning_rate": 7.218978430009224e-06, + "loss": 0.0019, + "step": 130430 + }, + { + "epoch": 0.8366016425458499, + "grad_norm": 0.05738505348563194, + "learning_rate": 7.218476850242919e-06, + "loss": 0.0015, + "step": 130440 + }, + { + "epoch": 0.8366657794396359, + "grad_norm": 0.10963942855596542, + "learning_rate": 7.217975242677515e-06, + "loss": 0.0018, + "step": 130450 + }, + { + "epoch": 0.8367299163334221, + "grad_norm": 0.15522795915603638, + "learning_rate": 7.2174736073192954e-06, + "loss": 0.0027, + "step": 130460 + }, + { + "epoch": 0.8367940532272081, + "grad_norm": 0.11064627766609192, + "learning_rate": 7.216971944174547e-06, + "loss": 0.0031, + "step": 130470 + }, + { + "epoch": 0.8368581901209943, + "grad_norm": 0.2598922848701477, + "learning_rate": 7.216470253249554e-06, + "loss": 0.0022, + "step": 130480 + }, + { + "epoch": 0.8369223270147803, + "grad_norm": 0.15451756119728088, + "learning_rate": 7.215968534550608e-06, + "loss": 0.0032, + "step": 130490 + }, + { + "epoch": 0.8369864639085665, + "grad_norm": 0.12410331517457962, + "learning_rate": 7.21546678808399e-06, + "loss": 0.0022, + "step": 130500 + }, + { + "epoch": 0.8370506008023525, + "grad_norm": 0.3346932828426361, + "learning_rate": 7.214965013855992e-06, + "loss": 0.0046, + "step": 130510 + }, + { + "epoch": 0.8371147376961386, + "grad_norm": 0.03758867457509041, + "learning_rate": 7.214463211872896e-06, + "loss": 0.0025, + "step": 130520 + }, + { + "epoch": 0.8371788745899247, + "grad_norm": 0.18766102194786072, + "learning_rate": 7.213961382140995e-06, + "loss": 0.0034, + "step": 130530 + }, + { + "epoch": 0.8372430114837108, + "grad_norm": 0.404788613319397, + "learning_rate": 7.2134595246665766e-06, + "loss": 0.0031, + "step": 130540 + }, + { + "epoch": 0.837307148377497, + "grad_norm": 0.10620911419391632, + "learning_rate": 7.212957639455926e-06, + "loss": 0.0018, + "step": 130550 + }, + { + "epoch": 0.837371285271283, + "grad_norm": 0.1350063532590866, + "learning_rate": 7.212455726515337e-06, + "loss": 0.0041, + "step": 130560 + }, + { + "epoch": 0.8374354221650692, + "grad_norm": 0.20413848757743835, + "learning_rate": 7.2119537858510954e-06, + "loss": 0.0025, + "step": 130570 + }, + { + "epoch": 0.8374995590588552, + "grad_norm": 0.2801940143108368, + "learning_rate": 7.211451817469493e-06, + "loss": 0.0014, + "step": 130580 + }, + { + "epoch": 0.8375636959526414, + "grad_norm": 0.24905692040920258, + "learning_rate": 7.210949821376817e-06, + "loss": 0.0023, + "step": 130590 + }, + { + "epoch": 0.8376278328464274, + "grad_norm": 0.07550957798957825, + "learning_rate": 7.2104477975793605e-06, + "loss": 0.0017, + "step": 130600 + }, + { + "epoch": 0.8376919697402135, + "grad_norm": 0.04287779703736305, + "learning_rate": 7.209945746083413e-06, + "loss": 0.0014, + "step": 130610 + }, + { + "epoch": 0.8377561066339996, + "grad_norm": 0.07093918323516846, + "learning_rate": 7.209443666895269e-06, + "loss": 0.003, + "step": 130620 + }, + { + "epoch": 0.8378202435277857, + "grad_norm": 0.05075628682971001, + "learning_rate": 7.208941560021212e-06, + "loss": 0.0018, + "step": 130630 + }, + { + "epoch": 0.8378843804215718, + "grad_norm": 0.1476941704750061, + "learning_rate": 7.2084394254675415e-06, + "loss": 0.0033, + "step": 130640 + }, + { + "epoch": 0.8379485173153579, + "grad_norm": 0.14929373562335968, + "learning_rate": 7.207937263240546e-06, + "loss": 0.0025, + "step": 130650 + }, + { + "epoch": 0.838012654209144, + "grad_norm": 0.07781266421079636, + "learning_rate": 7.207435073346518e-06, + "loss": 0.0035, + "step": 130660 + }, + { + "epoch": 0.8380767911029301, + "grad_norm": 0.09879343956708908, + "learning_rate": 7.20693285579175e-06, + "loss": 0.0032, + "step": 130670 + }, + { + "epoch": 0.8381409279967162, + "grad_norm": 0.040806982666254044, + "learning_rate": 7.206430610582538e-06, + "loss": 0.0047, + "step": 130680 + }, + { + "epoch": 0.8382050648905023, + "grad_norm": 0.05319861322641373, + "learning_rate": 7.205928337725173e-06, + "loss": 0.0039, + "step": 130690 + }, + { + "epoch": 0.8382692017842884, + "grad_norm": 0.055010631680488586, + "learning_rate": 7.2054260372259486e-06, + "loss": 0.0036, + "step": 130700 + }, + { + "epoch": 0.8383333386780745, + "grad_norm": 0.05272675305604935, + "learning_rate": 7.204923709091162e-06, + "loss": 0.0037, + "step": 130710 + }, + { + "epoch": 0.8383974755718606, + "grad_norm": 0.11825592070817947, + "learning_rate": 7.204421353327104e-06, + "loss": 0.0026, + "step": 130720 + }, + { + "epoch": 0.8384616124656467, + "grad_norm": 0.07398737967014313, + "learning_rate": 7.2039189699400705e-06, + "loss": 0.0056, + "step": 130730 + }, + { + "epoch": 0.8385257493594328, + "grad_norm": 0.20622889697551727, + "learning_rate": 7.203416558936357e-06, + "loss": 0.0024, + "step": 130740 + }, + { + "epoch": 0.8385898862532188, + "grad_norm": 0.13918302953243256, + "learning_rate": 7.202914120322261e-06, + "loss": 0.0025, + "step": 130750 + }, + { + "epoch": 0.838654023147005, + "grad_norm": 0.2289324551820755, + "learning_rate": 7.202411654104074e-06, + "loss": 0.0015, + "step": 130760 + }, + { + "epoch": 0.838718160040791, + "grad_norm": 0.18719933927059174, + "learning_rate": 7.201909160288098e-06, + "loss": 0.0024, + "step": 130770 + }, + { + "epoch": 0.8387822969345772, + "grad_norm": 0.12550872564315796, + "learning_rate": 7.201406638880624e-06, + "loss": 0.0013, + "step": 130780 + }, + { + "epoch": 0.8388464338283632, + "grad_norm": 0.1475445032119751, + "learning_rate": 7.200904089887954e-06, + "loss": 0.0042, + "step": 130790 + }, + { + "epoch": 0.8389105707221494, + "grad_norm": 0.0035977119114249945, + "learning_rate": 7.20040151331638e-06, + "loss": 0.0016, + "step": 130800 + }, + { + "epoch": 0.8389747076159354, + "grad_norm": 0.16882912814617157, + "learning_rate": 7.199898909172202e-06, + "loss": 0.001, + "step": 130810 + }, + { + "epoch": 0.8390388445097215, + "grad_norm": 0.07918764650821686, + "learning_rate": 7.199396277461721e-06, + "loss": 0.0019, + "step": 130820 + }, + { + "epoch": 0.8391029814035077, + "grad_norm": 0.17589236795902252, + "learning_rate": 7.198893618191231e-06, + "loss": 0.0032, + "step": 130830 + }, + { + "epoch": 0.8391671182972937, + "grad_norm": 0.15713556110858917, + "learning_rate": 7.198390931367034e-06, + "loss": 0.004, + "step": 130840 + }, + { + "epoch": 0.8392312551910799, + "grad_norm": 0.12668083608150482, + "learning_rate": 7.197888216995428e-06, + "loss": 0.0027, + "step": 130850 + }, + { + "epoch": 0.8392953920848659, + "grad_norm": 0.05177538841962814, + "learning_rate": 7.19738547508271e-06, + "loss": 0.0023, + "step": 130860 + }, + { + "epoch": 0.8393595289786521, + "grad_norm": 0.045602746307849884, + "learning_rate": 7.196882705635182e-06, + "loss": 0.0014, + "step": 130870 + }, + { + "epoch": 0.8394236658724381, + "grad_norm": 0.09825082868337631, + "learning_rate": 7.196379908659144e-06, + "loss": 0.0027, + "step": 130880 + }, + { + "epoch": 0.8394878027662243, + "grad_norm": 0.0659918487071991, + "learning_rate": 7.195877084160895e-06, + "loss": 0.0016, + "step": 130890 + }, + { + "epoch": 0.8395519396600103, + "grad_norm": 0.42159461975097656, + "learning_rate": 7.195374232146738e-06, + "loss": 0.0078, + "step": 130900 + }, + { + "epoch": 0.8396160765537964, + "grad_norm": 0.08648031204938889, + "learning_rate": 7.194871352622972e-06, + "loss": 0.0044, + "step": 130910 + }, + { + "epoch": 0.8396802134475825, + "grad_norm": 0.12601043283939362, + "learning_rate": 7.1943684455959e-06, + "loss": 0.0025, + "step": 130920 + }, + { + "epoch": 0.8397443503413686, + "grad_norm": 0.14105676114559174, + "learning_rate": 7.193865511071822e-06, + "loss": 0.0029, + "step": 130930 + }, + { + "epoch": 0.8398084872351547, + "grad_norm": 0.12207061052322388, + "learning_rate": 7.1933625490570434e-06, + "loss": 0.0015, + "step": 130940 + }, + { + "epoch": 0.8398726241289408, + "grad_norm": 0.10712200403213501, + "learning_rate": 7.192859559557863e-06, + "loss": 0.0018, + "step": 130950 + }, + { + "epoch": 0.839936761022727, + "grad_norm": 0.2787437438964844, + "learning_rate": 7.192356542580585e-06, + "loss": 0.0032, + "step": 130960 + }, + { + "epoch": 0.840000897916513, + "grad_norm": 0.11498334258794785, + "learning_rate": 7.191853498131512e-06, + "loss": 0.0017, + "step": 130970 + }, + { + "epoch": 0.8400650348102991, + "grad_norm": 0.19720852375030518, + "learning_rate": 7.1913504262169495e-06, + "loss": 0.0036, + "step": 130980 + }, + { + "epoch": 0.8401291717040852, + "grad_norm": 0.04595879465341568, + "learning_rate": 7.1908473268432e-06, + "loss": 0.0025, + "step": 130990 + }, + { + "epoch": 0.8401933085978713, + "grad_norm": 0.19816502928733826, + "learning_rate": 7.190344200016568e-06, + "loss": 0.0016, + "step": 131000 + }, + { + "epoch": 0.8402574454916574, + "grad_norm": 0.08880461752414703, + "learning_rate": 7.189841045743357e-06, + "loss": 0.0019, + "step": 131010 + }, + { + "epoch": 0.8403215823854435, + "grad_norm": 0.05284334719181061, + "learning_rate": 7.189337864029872e-06, + "loss": 0.0023, + "step": 131020 + }, + { + "epoch": 0.8403857192792296, + "grad_norm": 0.05432449281215668, + "learning_rate": 7.18883465488242e-06, + "loss": 0.0035, + "step": 131030 + }, + { + "epoch": 0.8404498561730157, + "grad_norm": 0.1101267859339714, + "learning_rate": 7.188331418307304e-06, + "loss": 0.0011, + "step": 131040 + }, + { + "epoch": 0.8405139930668017, + "grad_norm": 0.18215373158454895, + "learning_rate": 7.1878281543108334e-06, + "loss": 0.0017, + "step": 131050 + }, + { + "epoch": 0.8405781299605879, + "grad_norm": 0.12219468504190445, + "learning_rate": 7.1873248628993105e-06, + "loss": 0.0057, + "step": 131060 + }, + { + "epoch": 0.8406422668543739, + "grad_norm": 0.10829061269760132, + "learning_rate": 7.186821544079046e-06, + "loss": 0.0026, + "step": 131070 + }, + { + "epoch": 0.8407064037481601, + "grad_norm": 0.12962056696414948, + "learning_rate": 7.186318197856343e-06, + "loss": 0.002, + "step": 131080 + }, + { + "epoch": 0.8407705406419461, + "grad_norm": 0.06855259090662003, + "learning_rate": 7.185814824237512e-06, + "loss": 0.0015, + "step": 131090 + }, + { + "epoch": 0.8408346775357323, + "grad_norm": 0.08255480229854584, + "learning_rate": 7.185311423228856e-06, + "loss": 0.0033, + "step": 131100 + }, + { + "epoch": 0.8408988144295184, + "grad_norm": 0.08055885881185532, + "learning_rate": 7.184807994836689e-06, + "loss": 0.0018, + "step": 131110 + }, + { + "epoch": 0.8409629513233045, + "grad_norm": 0.08228079974651337, + "learning_rate": 7.184304539067315e-06, + "loss": 0.0036, + "step": 131120 + }, + { + "epoch": 0.8410270882170906, + "grad_norm": 0.09898220002651215, + "learning_rate": 7.183801055927044e-06, + "loss": 0.004, + "step": 131130 + }, + { + "epoch": 0.8410912251108766, + "grad_norm": 0.13123354315757751, + "learning_rate": 7.183297545422185e-06, + "loss": 0.002, + "step": 131140 + }, + { + "epoch": 0.8411553620046628, + "grad_norm": 0.11321281641721725, + "learning_rate": 7.1827940075590486e-06, + "loss": 0.0037, + "step": 131150 + }, + { + "epoch": 0.8412194988984488, + "grad_norm": 0.046443551778793335, + "learning_rate": 7.1822904423439425e-06, + "loss": 0.0018, + "step": 131160 + }, + { + "epoch": 0.841283635792235, + "grad_norm": 0.12373586744070053, + "learning_rate": 7.1817868497831765e-06, + "loss": 0.002, + "step": 131170 + }, + { + "epoch": 0.841347772686021, + "grad_norm": 0.11009549349546432, + "learning_rate": 7.1812832298830626e-06, + "loss": 0.002, + "step": 131180 + }, + { + "epoch": 0.8414119095798072, + "grad_norm": 0.09203784167766571, + "learning_rate": 7.18077958264991e-06, + "loss": 0.0021, + "step": 131190 + }, + { + "epoch": 0.8414760464735932, + "grad_norm": 0.07105744630098343, + "learning_rate": 7.180275908090033e-06, + "loss": 0.0021, + "step": 131200 + }, + { + "epoch": 0.8415401833673793, + "grad_norm": 0.10325989872217178, + "learning_rate": 7.179772206209739e-06, + "loss": 0.0044, + "step": 131210 + }, + { + "epoch": 0.8416043202611654, + "grad_norm": 0.14352548122406006, + "learning_rate": 7.179268477015342e-06, + "loss": 0.0031, + "step": 131220 + }, + { + "epoch": 0.8416684571549515, + "grad_norm": 0.14424268901348114, + "learning_rate": 7.178764720513154e-06, + "loss": 0.0017, + "step": 131230 + }, + { + "epoch": 0.8417325940487376, + "grad_norm": 0.10106702893972397, + "learning_rate": 7.178260936709484e-06, + "loss": 0.003, + "step": 131240 + }, + { + "epoch": 0.8417967309425237, + "grad_norm": 0.20695577561855316, + "learning_rate": 7.17775712561065e-06, + "loss": 0.0017, + "step": 131250 + }, + { + "epoch": 0.8418608678363099, + "grad_norm": 0.19014497101306915, + "learning_rate": 7.177253287222964e-06, + "loss": 0.0033, + "step": 131260 + }, + { + "epoch": 0.8419250047300959, + "grad_norm": 0.14594174921512604, + "learning_rate": 7.176749421552736e-06, + "loss": 0.0046, + "step": 131270 + }, + { + "epoch": 0.841989141623882, + "grad_norm": 0.14901790022850037, + "learning_rate": 7.176245528606284e-06, + "loss": 0.0024, + "step": 131280 + }, + { + "epoch": 0.8420532785176681, + "grad_norm": 0.13513055443763733, + "learning_rate": 7.175741608389919e-06, + "loss": 0.0024, + "step": 131290 + }, + { + "epoch": 0.8421174154114542, + "grad_norm": 0.027130547910928726, + "learning_rate": 7.1752376609099575e-06, + "loss": 0.0017, + "step": 131300 + }, + { + "epoch": 0.8421815523052403, + "grad_norm": 0.12011363357305527, + "learning_rate": 7.174733686172712e-06, + "loss": 0.0052, + "step": 131310 + }, + { + "epoch": 0.8422456891990264, + "grad_norm": 0.05899891257286072, + "learning_rate": 7.1742296841845e-06, + "loss": 0.0048, + "step": 131320 + }, + { + "epoch": 0.8423098260928125, + "grad_norm": 0.06901489943265915, + "learning_rate": 7.173725654951636e-06, + "loss": 0.002, + "step": 131330 + }, + { + "epoch": 0.8423739629865986, + "grad_norm": 0.17242315411567688, + "learning_rate": 7.173221598480438e-06, + "loss": 0.0024, + "step": 131340 + }, + { + "epoch": 0.8424380998803846, + "grad_norm": 0.20778584480285645, + "learning_rate": 7.172717514777217e-06, + "loss": 0.0026, + "step": 131350 + }, + { + "epoch": 0.8425022367741708, + "grad_norm": 0.13705037534236908, + "learning_rate": 7.172213403848295e-06, + "loss": 0.0037, + "step": 131360 + }, + { + "epoch": 0.8425663736679568, + "grad_norm": 0.041558653116226196, + "learning_rate": 7.171709265699984e-06, + "loss": 0.0022, + "step": 131370 + }, + { + "epoch": 0.842630510561743, + "grad_norm": 0.2619161605834961, + "learning_rate": 7.171205100338605e-06, + "loss": 0.0024, + "step": 131380 + }, + { + "epoch": 0.8426946474555291, + "grad_norm": 0.004847593605518341, + "learning_rate": 7.170700907770476e-06, + "loss": 0.0025, + "step": 131390 + }, + { + "epoch": 0.8427587843493152, + "grad_norm": 0.06404387950897217, + "learning_rate": 7.170196688001911e-06, + "loss": 0.0021, + "step": 131400 + }, + { + "epoch": 0.8428229212431013, + "grad_norm": 0.09147094935178757, + "learning_rate": 7.169692441039233e-06, + "loss": 0.0031, + "step": 131410 + }, + { + "epoch": 0.8428870581368874, + "grad_norm": 0.07531020790338516, + "learning_rate": 7.1691881668887565e-06, + "loss": 0.0014, + "step": 131420 + }, + { + "epoch": 0.8429511950306735, + "grad_norm": 0.07780922949314117, + "learning_rate": 7.168683865556803e-06, + "loss": 0.0016, + "step": 131430 + }, + { + "epoch": 0.8430153319244595, + "grad_norm": 0.0524565689265728, + "learning_rate": 7.168179537049689e-06, + "loss": 0.0027, + "step": 131440 + }, + { + "epoch": 0.8430794688182457, + "grad_norm": 0.040026117116212845, + "learning_rate": 7.167675181373737e-06, + "loss": 0.0013, + "step": 131450 + }, + { + "epoch": 0.8431436057120317, + "grad_norm": 0.004006249364465475, + "learning_rate": 7.167170798535265e-06, + "loss": 0.0011, + "step": 131460 + }, + { + "epoch": 0.8432077426058179, + "grad_norm": 0.1320967972278595, + "learning_rate": 7.166666388540595e-06, + "loss": 0.0021, + "step": 131470 + }, + { + "epoch": 0.8432718794996039, + "grad_norm": 0.09947043657302856, + "learning_rate": 7.166161951396045e-06, + "loss": 0.0022, + "step": 131480 + }, + { + "epoch": 0.8433360163933901, + "grad_norm": 0.15966539084911346, + "learning_rate": 7.1656574871079394e-06, + "loss": 0.0022, + "step": 131490 + }, + { + "epoch": 0.8434001532871761, + "grad_norm": 0.1235571950674057, + "learning_rate": 7.165152995682597e-06, + "loss": 0.0023, + "step": 131500 + }, + { + "epoch": 0.8434642901809623, + "grad_norm": 0.020679375156760216, + "learning_rate": 7.1646484771263404e-06, + "loss": 0.0022, + "step": 131510 + }, + { + "epoch": 0.8435284270747483, + "grad_norm": 0.13467086851596832, + "learning_rate": 7.164143931445491e-06, + "loss": 0.0034, + "step": 131520 + }, + { + "epoch": 0.8435925639685344, + "grad_norm": 0.08703139424324036, + "learning_rate": 7.16363935864637e-06, + "loss": 0.0049, + "step": 131530 + }, + { + "epoch": 0.8436567008623206, + "grad_norm": 0.09485015273094177, + "learning_rate": 7.1631347587353035e-06, + "loss": 0.0033, + "step": 131540 + }, + { + "epoch": 0.8437208377561066, + "grad_norm": 0.024869795888662338, + "learning_rate": 7.162630131718613e-06, + "loss": 0.0016, + "step": 131550 + }, + { + "epoch": 0.8437849746498928, + "grad_norm": 0.1564255952835083, + "learning_rate": 7.16212547760262e-06, + "loss": 0.0036, + "step": 131560 + }, + { + "epoch": 0.8438491115436788, + "grad_norm": 0.44133636355400085, + "learning_rate": 7.16162079639365e-06, + "loss": 0.003, + "step": 131570 + }, + { + "epoch": 0.843913248437465, + "grad_norm": 0.07069187611341476, + "learning_rate": 7.161116088098026e-06, + "loss": 0.0036, + "step": 131580 + }, + { + "epoch": 0.843977385331251, + "grad_norm": 0.12002238631248474, + "learning_rate": 7.160611352722073e-06, + "loss": 0.0035, + "step": 131590 + }, + { + "epoch": 0.8440415222250371, + "grad_norm": 0.10347267240285873, + "learning_rate": 7.160106590272117e-06, + "loss": 0.0019, + "step": 131600 + }, + { + "epoch": 0.8441056591188232, + "grad_norm": 0.38686108589172363, + "learning_rate": 7.15960180075448e-06, + "loss": 0.0019, + "step": 131610 + }, + { + "epoch": 0.8441697960126093, + "grad_norm": 0.1088900938630104, + "learning_rate": 7.15909698417549e-06, + "loss": 0.0017, + "step": 131620 + }, + { + "epoch": 0.8442339329063954, + "grad_norm": 0.25590530037879944, + "learning_rate": 7.15859214054147e-06, + "loss": 0.003, + "step": 131630 + }, + { + "epoch": 0.8442980698001815, + "grad_norm": 0.22568881511688232, + "learning_rate": 7.15808726985875e-06, + "loss": 0.0026, + "step": 131640 + }, + { + "epoch": 0.8443622066939676, + "grad_norm": 0.09165618568658829, + "learning_rate": 7.157582372133653e-06, + "loss": 0.0027, + "step": 131650 + }, + { + "epoch": 0.8444263435877537, + "grad_norm": 0.23040997982025146, + "learning_rate": 7.157077447372507e-06, + "loss": 0.0025, + "step": 131660 + }, + { + "epoch": 0.8444904804815399, + "grad_norm": 0.0696287453174591, + "learning_rate": 7.15657249558164e-06, + "loss": 0.0023, + "step": 131670 + }, + { + "epoch": 0.8445546173753259, + "grad_norm": 0.11746630817651749, + "learning_rate": 7.1560675167673766e-06, + "loss": 0.0022, + "step": 131680 + }, + { + "epoch": 0.844618754269112, + "grad_norm": 0.13166771829128265, + "learning_rate": 7.155562510936047e-06, + "loss": 0.0026, + "step": 131690 + }, + { + "epoch": 0.8446828911628981, + "grad_norm": 0.16737747192382812, + "learning_rate": 7.155057478093979e-06, + "loss": 0.0033, + "step": 131700 + }, + { + "epoch": 0.8447470280566842, + "grad_norm": 0.0332891121506691, + "learning_rate": 7.1545524182475005e-06, + "loss": 0.0011, + "step": 131710 + }, + { + "epoch": 0.8448111649504703, + "grad_norm": 0.15007895231246948, + "learning_rate": 7.15404733140294e-06, + "loss": 0.0017, + "step": 131720 + }, + { + "epoch": 0.8448753018442564, + "grad_norm": 0.22700099647045135, + "learning_rate": 7.153542217566627e-06, + "loss": 0.0052, + "step": 131730 + }, + { + "epoch": 0.8449394387380424, + "grad_norm": 0.0855378583073616, + "learning_rate": 7.1530370767448894e-06, + "loss": 0.0029, + "step": 131740 + }, + { + "epoch": 0.8450035756318286, + "grad_norm": 0.3931421935558319, + "learning_rate": 7.152531908944061e-06, + "loss": 0.0035, + "step": 131750 + }, + { + "epoch": 0.8450677125256146, + "grad_norm": 0.04366033524274826, + "learning_rate": 7.152026714170468e-06, + "loss": 0.0021, + "step": 131760 + }, + { + "epoch": 0.8451318494194008, + "grad_norm": 0.07778988778591156, + "learning_rate": 7.151521492430443e-06, + "loss": 0.0025, + "step": 131770 + }, + { + "epoch": 0.8451959863131868, + "grad_norm": 0.26300501823425293, + "learning_rate": 7.151016243730316e-06, + "loss": 0.0025, + "step": 131780 + }, + { + "epoch": 0.845260123206973, + "grad_norm": 0.1211204007267952, + "learning_rate": 7.150510968076419e-06, + "loss": 0.005, + "step": 131790 + }, + { + "epoch": 0.845324260100759, + "grad_norm": 0.12193798273801804, + "learning_rate": 7.15000566547508e-06, + "loss": 0.0034, + "step": 131800 + }, + { + "epoch": 0.8453883969945452, + "grad_norm": 0.06321345269680023, + "learning_rate": 7.149500335932636e-06, + "loss": 0.0022, + "step": 131810 + }, + { + "epoch": 0.8454525338883313, + "grad_norm": 0.48327797651290894, + "learning_rate": 7.148994979455415e-06, + "loss": 0.0026, + "step": 131820 + }, + { + "epoch": 0.8455166707821173, + "grad_norm": 0.09827014058828354, + "learning_rate": 7.1484895960497515e-06, + "loss": 0.0018, + "step": 131830 + }, + { + "epoch": 0.8455808076759035, + "grad_norm": 0.045801226049661636, + "learning_rate": 7.1479841857219776e-06, + "loss": 0.0018, + "step": 131840 + }, + { + "epoch": 0.8456449445696895, + "grad_norm": 0.1387709081172943, + "learning_rate": 7.147478748478427e-06, + "loss": 0.0025, + "step": 131850 + }, + { + "epoch": 0.8457090814634757, + "grad_norm": 0.09701228141784668, + "learning_rate": 7.146973284325432e-06, + "loss": 0.0034, + "step": 131860 + }, + { + "epoch": 0.8457732183572617, + "grad_norm": 0.05086098238825798, + "learning_rate": 7.146467793269329e-06, + "loss": 0.0035, + "step": 131870 + }, + { + "epoch": 0.8458373552510479, + "grad_norm": 0.07943065464496613, + "learning_rate": 7.145962275316449e-06, + "loss": 0.0031, + "step": 131880 + }, + { + "epoch": 0.8459014921448339, + "grad_norm": 0.07321260869503021, + "learning_rate": 7.145456730473129e-06, + "loss": 0.003, + "step": 131890 + }, + { + "epoch": 0.84596562903862, + "grad_norm": 0.1799236238002777, + "learning_rate": 7.1449511587457035e-06, + "loss": 0.0035, + "step": 131900 + }, + { + "epoch": 0.8460297659324061, + "grad_norm": 0.13050727546215057, + "learning_rate": 7.144445560140505e-06, + "loss": 0.0036, + "step": 131910 + }, + { + "epoch": 0.8460939028261922, + "grad_norm": 0.11861727386713028, + "learning_rate": 7.143939934663873e-06, + "loss": 0.0036, + "step": 131920 + }, + { + "epoch": 0.8461580397199783, + "grad_norm": 0.04367021098732948, + "learning_rate": 7.143434282322139e-06, + "loss": 0.0017, + "step": 131930 + }, + { + "epoch": 0.8462221766137644, + "grad_norm": 0.12094981223344803, + "learning_rate": 7.142928603121644e-06, + "loss": 0.0052, + "step": 131940 + }, + { + "epoch": 0.8462863135075506, + "grad_norm": 0.16046714782714844, + "learning_rate": 7.142422897068719e-06, + "loss": 0.0026, + "step": 131950 + }, + { + "epoch": 0.8463504504013366, + "grad_norm": 0.045945905148983, + "learning_rate": 7.1419171641697075e-06, + "loss": 0.0021, + "step": 131960 + }, + { + "epoch": 0.8464145872951228, + "grad_norm": 0.13319644331932068, + "learning_rate": 7.141411404430941e-06, + "loss": 0.0016, + "step": 131970 + }, + { + "epoch": 0.8464787241889088, + "grad_norm": 0.12061929702758789, + "learning_rate": 7.14090561785876e-06, + "loss": 0.0033, + "step": 131980 + }, + { + "epoch": 0.8465428610826949, + "grad_norm": 0.09734722971916199, + "learning_rate": 7.140399804459501e-06, + "loss": 0.0021, + "step": 131990 + }, + { + "epoch": 0.846606997976481, + "grad_norm": 0.06309890747070312, + "learning_rate": 7.139893964239503e-06, + "loss": 0.0027, + "step": 132000 + }, + { + "epoch": 0.8466711348702671, + "grad_norm": 0.12926048040390015, + "learning_rate": 7.139388097205104e-06, + "loss": 0.002, + "step": 132010 + }, + { + "epoch": 0.8467352717640532, + "grad_norm": 0.10321187227964401, + "learning_rate": 7.138882203362645e-06, + "loss": 0.0028, + "step": 132020 + }, + { + "epoch": 0.8467994086578393, + "grad_norm": 0.15635032951831818, + "learning_rate": 7.138376282718461e-06, + "loss": 0.0016, + "step": 132030 + }, + { + "epoch": 0.8468635455516254, + "grad_norm": 0.13198229670524597, + "learning_rate": 7.137870335278896e-06, + "loss": 0.003, + "step": 132040 + }, + { + "epoch": 0.8469276824454115, + "grad_norm": 0.146623894572258, + "learning_rate": 7.137364361050285e-06, + "loss": 0.0032, + "step": 132050 + }, + { + "epoch": 0.8469918193391975, + "grad_norm": 0.14922136068344116, + "learning_rate": 7.136858360038973e-06, + "loss": 0.0026, + "step": 132060 + }, + { + "epoch": 0.8470559562329837, + "grad_norm": 0.14886027574539185, + "learning_rate": 7.136352332251297e-06, + "loss": 0.0017, + "step": 132070 + }, + { + "epoch": 0.8471200931267697, + "grad_norm": 0.10785438120365143, + "learning_rate": 7.135846277693602e-06, + "loss": 0.0011, + "step": 132080 + }, + { + "epoch": 0.8471842300205559, + "grad_norm": 0.13098682463169098, + "learning_rate": 7.135340196372225e-06, + "loss": 0.0027, + "step": 132090 + }, + { + "epoch": 0.847248366914342, + "grad_norm": 0.11557871848344803, + "learning_rate": 7.134834088293509e-06, + "loss": 0.0034, + "step": 132100 + }, + { + "epoch": 0.8473125038081281, + "grad_norm": 0.05154212936758995, + "learning_rate": 7.134327953463797e-06, + "loss": 0.0034, + "step": 132110 + }, + { + "epoch": 0.8473766407019142, + "grad_norm": 0.3776550889015198, + "learning_rate": 7.13382179188943e-06, + "loss": 0.0021, + "step": 132120 + }, + { + "epoch": 0.8474407775957002, + "grad_norm": 0.11731040477752686, + "learning_rate": 7.1333156035767506e-06, + "loss": 0.0032, + "step": 132130 + }, + { + "epoch": 0.8475049144894864, + "grad_norm": 0.024094602093100548, + "learning_rate": 7.132809388532101e-06, + "loss": 0.0034, + "step": 132140 + }, + { + "epoch": 0.8475690513832724, + "grad_norm": 0.15600042045116425, + "learning_rate": 7.132303146761828e-06, + "loss": 0.0016, + "step": 132150 + }, + { + "epoch": 0.8476331882770586, + "grad_norm": 0.07214893400669098, + "learning_rate": 7.13179687827227e-06, + "loss": 0.0025, + "step": 132160 + }, + { + "epoch": 0.8476973251708446, + "grad_norm": 0.18649162352085114, + "learning_rate": 7.131290583069776e-06, + "loss": 0.0021, + "step": 132170 + }, + { + "epoch": 0.8477614620646308, + "grad_norm": 0.01331428810954094, + "learning_rate": 7.1307842611606855e-06, + "loss": 0.0015, + "step": 132180 + }, + { + "epoch": 0.8478255989584168, + "grad_norm": 0.21441741287708282, + "learning_rate": 7.130277912551348e-06, + "loss": 0.0015, + "step": 132190 + }, + { + "epoch": 0.847889735852203, + "grad_norm": 0.04190351814031601, + "learning_rate": 7.129771537248104e-06, + "loss": 0.0056, + "step": 132200 + }, + { + "epoch": 0.847953872745989, + "grad_norm": 0.012922914698719978, + "learning_rate": 7.1292651352573014e-06, + "loss": 0.0022, + "step": 132210 + }, + { + "epoch": 0.8480180096397751, + "grad_norm": 0.12478888034820557, + "learning_rate": 7.128758706585284e-06, + "loss": 0.0018, + "step": 132220 + }, + { + "epoch": 0.8480821465335613, + "grad_norm": 0.10566743463277817, + "learning_rate": 7.128252251238399e-06, + "loss": 0.0045, + "step": 132230 + }, + { + "epoch": 0.8481462834273473, + "grad_norm": 0.1309993416070938, + "learning_rate": 7.127745769222992e-06, + "loss": 0.0026, + "step": 132240 + }, + { + "epoch": 0.8482104203211335, + "grad_norm": 0.07731198519468307, + "learning_rate": 7.12723926054541e-06, + "loss": 0.0043, + "step": 132250 + }, + { + "epoch": 0.8482745572149195, + "grad_norm": 0.020769502967596054, + "learning_rate": 7.126732725212e-06, + "loss": 0.003, + "step": 132260 + }, + { + "epoch": 0.8483386941087057, + "grad_norm": 0.09295207262039185, + "learning_rate": 7.126226163229109e-06, + "loss": 0.002, + "step": 132270 + }, + { + "epoch": 0.8484028310024917, + "grad_norm": 0.0527043417096138, + "learning_rate": 7.1257195746030835e-06, + "loss": 0.0019, + "step": 132280 + }, + { + "epoch": 0.8484669678962778, + "grad_norm": 0.09591345489025116, + "learning_rate": 7.125212959340273e-06, + "loss": 0.0039, + "step": 132290 + }, + { + "epoch": 0.8485311047900639, + "grad_norm": 0.24664068222045898, + "learning_rate": 7.124706317447026e-06, + "loss": 0.0024, + "step": 132300 + }, + { + "epoch": 0.84859524168385, + "grad_norm": 0.015033972449600697, + "learning_rate": 7.1241996489296906e-06, + "loss": 0.002, + "step": 132310 + }, + { + "epoch": 0.8486593785776361, + "grad_norm": 0.04619878903031349, + "learning_rate": 7.1236929537946146e-06, + "loss": 0.0015, + "step": 132320 + }, + { + "epoch": 0.8487235154714222, + "grad_norm": 0.09617872536182404, + "learning_rate": 7.123186232048147e-06, + "loss": 0.0019, + "step": 132330 + }, + { + "epoch": 0.8487876523652083, + "grad_norm": 0.09924177825450897, + "learning_rate": 7.12267948369664e-06, + "loss": 0.0014, + "step": 132340 + }, + { + "epoch": 0.8488517892589944, + "grad_norm": 0.09445368498563766, + "learning_rate": 7.122172708746442e-06, + "loss": 0.0013, + "step": 132350 + }, + { + "epoch": 0.8489159261527804, + "grad_norm": 0.4284706115722656, + "learning_rate": 7.121665907203903e-06, + "loss": 0.0063, + "step": 132360 + }, + { + "epoch": 0.8489800630465666, + "grad_norm": 0.2050025314092636, + "learning_rate": 7.121159079075374e-06, + "loss": 0.0033, + "step": 132370 + }, + { + "epoch": 0.8490441999403527, + "grad_norm": 0.15669779479503632, + "learning_rate": 7.120652224367206e-06, + "loss": 0.0032, + "step": 132380 + }, + { + "epoch": 0.8491083368341388, + "grad_norm": 0.015262565575540066, + "learning_rate": 7.120145343085749e-06, + "loss": 0.001, + "step": 132390 + }, + { + "epoch": 0.8491724737279249, + "grad_norm": 0.18097631633281708, + "learning_rate": 7.1196384352373574e-06, + "loss": 0.002, + "step": 132400 + }, + { + "epoch": 0.849236610621711, + "grad_norm": 0.1737731248140335, + "learning_rate": 7.11913150082838e-06, + "loss": 0.0028, + "step": 132410 + }, + { + "epoch": 0.8493007475154971, + "grad_norm": 0.1534372866153717, + "learning_rate": 7.118624539865171e-06, + "loss": 0.0021, + "step": 132420 + }, + { + "epoch": 0.8493648844092831, + "grad_norm": 0.1141214445233345, + "learning_rate": 7.118117552354082e-06, + "loss": 0.0018, + "step": 132430 + }, + { + "epoch": 0.8494290213030693, + "grad_norm": 0.07327088713645935, + "learning_rate": 7.117610538301465e-06, + "loss": 0.0031, + "step": 132440 + }, + { + "epoch": 0.8494931581968553, + "grad_norm": 0.0789964497089386, + "learning_rate": 7.117103497713676e-06, + "loss": 0.0018, + "step": 132450 + }, + { + "epoch": 0.8495572950906415, + "grad_norm": 0.11921041458845139, + "learning_rate": 7.116596430597067e-06, + "loss": 0.003, + "step": 132460 + }, + { + "epoch": 0.8496214319844275, + "grad_norm": 0.04953978583216667, + "learning_rate": 7.116089336957992e-06, + "loss": 0.0017, + "step": 132470 + }, + { + "epoch": 0.8496855688782137, + "grad_norm": 0.2878396213054657, + "learning_rate": 7.115582216802805e-06, + "loss": 0.0016, + "step": 132480 + }, + { + "epoch": 0.8497497057719997, + "grad_norm": 0.05667930841445923, + "learning_rate": 7.115075070137862e-06, + "loss": 0.0032, + "step": 132490 + }, + { + "epoch": 0.8498138426657859, + "grad_norm": 0.0869360864162445, + "learning_rate": 7.114567896969516e-06, + "loss": 0.0049, + "step": 132500 + }, + { + "epoch": 0.849877979559572, + "grad_norm": 0.40160295367240906, + "learning_rate": 7.1140606973041215e-06, + "loss": 0.0031, + "step": 132510 + }, + { + "epoch": 0.849942116453358, + "grad_norm": 0.09188877791166306, + "learning_rate": 7.113553471148037e-06, + "loss": 0.0017, + "step": 132520 + }, + { + "epoch": 0.8500062533471442, + "grad_norm": 0.10770701617002487, + "learning_rate": 7.113046218507618e-06, + "loss": 0.0027, + "step": 132530 + }, + { + "epoch": 0.8500703902409302, + "grad_norm": 0.06739377230405807, + "learning_rate": 7.1125389393892176e-06, + "loss": 0.003, + "step": 132540 + }, + { + "epoch": 0.8501345271347164, + "grad_norm": 0.1430237740278244, + "learning_rate": 7.112031633799196e-06, + "loss": 0.0027, + "step": 132550 + }, + { + "epoch": 0.8501986640285024, + "grad_norm": 0.07702884823083878, + "learning_rate": 7.111524301743907e-06, + "loss": 0.0032, + "step": 132560 + }, + { + "epoch": 0.8502628009222886, + "grad_norm": 0.01654994674026966, + "learning_rate": 7.1110169432297114e-06, + "loss": 0.002, + "step": 132570 + }, + { + "epoch": 0.8503269378160746, + "grad_norm": 0.12832903861999512, + "learning_rate": 7.110509558262963e-06, + "loss": 0.002, + "step": 132580 + }, + { + "epoch": 0.8503910747098608, + "grad_norm": 0.0472089983522892, + "learning_rate": 7.110002146850021e-06, + "loss": 0.0032, + "step": 132590 + }, + { + "epoch": 0.8504552116036468, + "grad_norm": 0.3265191912651062, + "learning_rate": 7.109494708997247e-06, + "loss": 0.0039, + "step": 132600 + }, + { + "epoch": 0.8505193484974329, + "grad_norm": 0.12164904177188873, + "learning_rate": 7.108987244710994e-06, + "loss": 0.0031, + "step": 132610 + }, + { + "epoch": 0.850583485391219, + "grad_norm": 0.09468712657690048, + "learning_rate": 7.108479753997626e-06, + "loss": 0.0025, + "step": 132620 + }, + { + "epoch": 0.8506476222850051, + "grad_norm": 0.1038513034582138, + "learning_rate": 7.107972236863498e-06, + "loss": 0.002, + "step": 132630 + }, + { + "epoch": 0.8507117591787912, + "grad_norm": 0.09218443930149078, + "learning_rate": 7.107464693314972e-06, + "loss": 0.0017, + "step": 132640 + }, + { + "epoch": 0.8507758960725773, + "grad_norm": 0.07426737248897552, + "learning_rate": 7.106957123358405e-06, + "loss": 0.0029, + "step": 132650 + }, + { + "epoch": 0.8508400329663635, + "grad_norm": 0.057585205882787704, + "learning_rate": 7.106449527000162e-06, + "loss": 0.0043, + "step": 132660 + }, + { + "epoch": 0.8509041698601495, + "grad_norm": 0.12777933478355408, + "learning_rate": 7.1059419042466005e-06, + "loss": 0.0016, + "step": 132670 + }, + { + "epoch": 0.8509683067539356, + "grad_norm": 0.11784519255161285, + "learning_rate": 7.105434255104083e-06, + "loss": 0.0027, + "step": 132680 + }, + { + "epoch": 0.8510324436477217, + "grad_norm": 0.13504132628440857, + "learning_rate": 7.104926579578967e-06, + "loss": 0.0027, + "step": 132690 + }, + { + "epoch": 0.8510965805415078, + "grad_norm": 0.0821489617228508, + "learning_rate": 7.104418877677618e-06, + "loss": 0.0029, + "step": 132700 + }, + { + "epoch": 0.8511607174352939, + "grad_norm": 0.19582541286945343, + "learning_rate": 7.103911149406395e-06, + "loss": 0.0018, + "step": 132710 + }, + { + "epoch": 0.85122485432908, + "grad_norm": 0.10627574473619461, + "learning_rate": 7.103403394771663e-06, + "loss": 0.002, + "step": 132720 + }, + { + "epoch": 0.851288991222866, + "grad_norm": 0.06380230188369751, + "learning_rate": 7.102895613779782e-06, + "loss": 0.0026, + "step": 132730 + }, + { + "epoch": 0.8513531281166522, + "grad_norm": 0.2278783768415451, + "learning_rate": 7.102387806437119e-06, + "loss": 0.006, + "step": 132740 + }, + { + "epoch": 0.8514172650104382, + "grad_norm": 1.431296706199646, + "learning_rate": 7.101879972750031e-06, + "loss": 0.0019, + "step": 132750 + }, + { + "epoch": 0.8514814019042244, + "grad_norm": 0.09681957960128784, + "learning_rate": 7.1013721127248865e-06, + "loss": 0.0024, + "step": 132760 + }, + { + "epoch": 0.8515455387980104, + "grad_norm": 0.14589335024356842, + "learning_rate": 7.100864226368047e-06, + "loss": 0.0021, + "step": 132770 + }, + { + "epoch": 0.8516096756917966, + "grad_norm": 0.021403413265943527, + "learning_rate": 7.100356313685877e-06, + "loss": 0.0016, + "step": 132780 + }, + { + "epoch": 0.8516738125855826, + "grad_norm": 0.047523729503154755, + "learning_rate": 7.099848374684743e-06, + "loss": 0.004, + "step": 132790 + }, + { + "epoch": 0.8517379494793688, + "grad_norm": 0.12544597685337067, + "learning_rate": 7.099340409371005e-06, + "loss": 0.0022, + "step": 132800 + }, + { + "epoch": 0.8518020863731549, + "grad_norm": 0.07675661146640778, + "learning_rate": 7.0988324177510335e-06, + "loss": 0.0017, + "step": 132810 + }, + { + "epoch": 0.851866223266941, + "grad_norm": 0.049117445945739746, + "learning_rate": 7.098324399831191e-06, + "loss": 0.0023, + "step": 132820 + }, + { + "epoch": 0.8519303601607271, + "grad_norm": 0.1694405972957611, + "learning_rate": 7.0978163556178455e-06, + "loss": 0.0024, + "step": 132830 + }, + { + "epoch": 0.8519944970545131, + "grad_norm": 0.043329883366823196, + "learning_rate": 7.09730828511736e-06, + "loss": 0.0017, + "step": 132840 + }, + { + "epoch": 0.8520586339482993, + "grad_norm": 0.12720754742622375, + "learning_rate": 7.096800188336105e-06, + "loss": 0.0016, + "step": 132850 + }, + { + "epoch": 0.8521227708420853, + "grad_norm": 0.25648581981658936, + "learning_rate": 7.096292065280444e-06, + "loss": 0.005, + "step": 132860 + }, + { + "epoch": 0.8521869077358715, + "grad_norm": 0.11244427412748337, + "learning_rate": 7.095783915956744e-06, + "loss": 0.0015, + "step": 132870 + }, + { + "epoch": 0.8522510446296575, + "grad_norm": 0.09432416409254074, + "learning_rate": 7.095275740371375e-06, + "loss": 0.0015, + "step": 132880 + }, + { + "epoch": 0.8523151815234437, + "grad_norm": 0.1023477166891098, + "learning_rate": 7.094767538530703e-06, + "loss": 0.0026, + "step": 132890 + }, + { + "epoch": 0.8523793184172297, + "grad_norm": 0.08102133870124817, + "learning_rate": 7.094259310441096e-06, + "loss": 0.0033, + "step": 132900 + }, + { + "epoch": 0.8524434553110158, + "grad_norm": 0.011946534737944603, + "learning_rate": 7.093751056108925e-06, + "loss": 0.0045, + "step": 132910 + }, + { + "epoch": 0.8525075922048019, + "grad_norm": 0.14268122613430023, + "learning_rate": 7.093242775540555e-06, + "loss": 0.0022, + "step": 132920 + }, + { + "epoch": 0.852571729098588, + "grad_norm": 0.045782607048749924, + "learning_rate": 7.092734468742358e-06, + "loss": 0.0026, + "step": 132930 + }, + { + "epoch": 0.8526358659923742, + "grad_norm": 0.21428555250167847, + "learning_rate": 7.092226135720702e-06, + "loss": 0.002, + "step": 132940 + }, + { + "epoch": 0.8527000028861602, + "grad_norm": 0.054481763392686844, + "learning_rate": 7.091717776481957e-06, + "loss": 0.0009, + "step": 132950 + }, + { + "epoch": 0.8527641397799464, + "grad_norm": 0.06863247603178024, + "learning_rate": 7.0912093910324946e-06, + "loss": 0.0028, + "step": 132960 + }, + { + "epoch": 0.8528282766737324, + "grad_norm": 0.01814761385321617, + "learning_rate": 7.090700979378682e-06, + "loss": 0.0027, + "step": 132970 + }, + { + "epoch": 0.8528924135675185, + "grad_norm": 0.04927373677492142, + "learning_rate": 7.0901925415268946e-06, + "loss": 0.0021, + "step": 132980 + }, + { + "epoch": 0.8529565504613046, + "grad_norm": 0.2550349831581116, + "learning_rate": 7.089684077483499e-06, + "loss": 0.0023, + "step": 132990 + }, + { + "epoch": 0.8530206873550907, + "grad_norm": 0.12768761813640594, + "learning_rate": 7.089175587254868e-06, + "loss": 0.0038, + "step": 133000 + }, + { + "epoch": 0.8530848242488768, + "grad_norm": 0.06169166415929794, + "learning_rate": 7.088667070847375e-06, + "loss": 0.0021, + "step": 133010 + }, + { + "epoch": 0.8531489611426629, + "grad_norm": 0.09823588281869888, + "learning_rate": 7.08815852826739e-06, + "loss": 0.0046, + "step": 133020 + }, + { + "epoch": 0.853213098036449, + "grad_norm": 0.025031019002199173, + "learning_rate": 7.087649959521286e-06, + "loss": 0.004, + "step": 133030 + }, + { + "epoch": 0.8532772349302351, + "grad_norm": 0.02685212716460228, + "learning_rate": 7.087141364615437e-06, + "loss": 0.0026, + "step": 133040 + }, + { + "epoch": 0.8533413718240211, + "grad_norm": 0.2141588181257248, + "learning_rate": 7.086632743556214e-06, + "loss": 0.0019, + "step": 133050 + }, + { + "epoch": 0.8534055087178073, + "grad_norm": 0.23564761877059937, + "learning_rate": 7.086124096349993e-06, + "loss": 0.0025, + "step": 133060 + }, + { + "epoch": 0.8534696456115933, + "grad_norm": 0.23499944806098938, + "learning_rate": 7.085615423003145e-06, + "loss": 0.0023, + "step": 133070 + }, + { + "epoch": 0.8535337825053795, + "grad_norm": 0.12187331169843674, + "learning_rate": 7.085106723522046e-06, + "loss": 0.0046, + "step": 133080 + }, + { + "epoch": 0.8535979193991656, + "grad_norm": 0.0823151245713234, + "learning_rate": 7.084597997913069e-06, + "loss": 0.0044, + "step": 133090 + }, + { + "epoch": 0.8536620562929517, + "grad_norm": 0.09182952344417572, + "learning_rate": 7.084089246182588e-06, + "loss": 0.0012, + "step": 133100 + }, + { + "epoch": 0.8537261931867378, + "grad_norm": 0.021208835765719414, + "learning_rate": 7.08358046833698e-06, + "loss": 0.0015, + "step": 133110 + }, + { + "epoch": 0.8537903300805239, + "grad_norm": 0.2756306231021881, + "learning_rate": 7.0830716643826206e-06, + "loss": 0.0023, + "step": 133120 + }, + { + "epoch": 0.85385446697431, + "grad_norm": 0.16646911203861237, + "learning_rate": 7.0825628343258835e-06, + "loss": 0.005, + "step": 133130 + }, + { + "epoch": 0.853918603868096, + "grad_norm": 0.07808910310268402, + "learning_rate": 7.082053978173147e-06, + "loss": 0.0015, + "step": 133140 + }, + { + "epoch": 0.8539827407618822, + "grad_norm": 0.10818828642368317, + "learning_rate": 7.081545095930784e-06, + "loss": 0.0014, + "step": 133150 + }, + { + "epoch": 0.8540468776556682, + "grad_norm": 0.05248650163412094, + "learning_rate": 7.081036187605175e-06, + "loss": 0.0013, + "step": 133160 + }, + { + "epoch": 0.8541110145494544, + "grad_norm": 0.046363357454538345, + "learning_rate": 7.080527253202695e-06, + "loss": 0.003, + "step": 133170 + }, + { + "epoch": 0.8541751514432404, + "grad_norm": 0.22602427005767822, + "learning_rate": 7.080018292729721e-06, + "loss": 0.0014, + "step": 133180 + }, + { + "epoch": 0.8542392883370266, + "grad_norm": 0.12549887597560883, + "learning_rate": 7.0795093061926325e-06, + "loss": 0.0015, + "step": 133190 + }, + { + "epoch": 0.8543034252308126, + "grad_norm": 0.15486212074756622, + "learning_rate": 7.079000293597804e-06, + "loss": 0.0025, + "step": 133200 + }, + { + "epoch": 0.8543675621245987, + "grad_norm": 0.11368501931428909, + "learning_rate": 7.0784912549516185e-06, + "loss": 0.003, + "step": 133210 + }, + { + "epoch": 0.8544316990183849, + "grad_norm": 0.12644527852535248, + "learning_rate": 7.077982190260451e-06, + "loss": 0.0048, + "step": 133220 + }, + { + "epoch": 0.8544958359121709, + "grad_norm": 0.10898713022470474, + "learning_rate": 7.077473099530681e-06, + "loss": 0.0024, + "step": 133230 + }, + { + "epoch": 0.8545599728059571, + "grad_norm": 0.10378258675336838, + "learning_rate": 7.0769639827686885e-06, + "loss": 0.0028, + "step": 133240 + }, + { + "epoch": 0.8546241096997431, + "grad_norm": 0.11458373069763184, + "learning_rate": 7.076454839980854e-06, + "loss": 0.0015, + "step": 133250 + }, + { + "epoch": 0.8546882465935293, + "grad_norm": 0.14008843898773193, + "learning_rate": 7.075945671173555e-06, + "loss": 0.0039, + "step": 133260 + }, + { + "epoch": 0.8547523834873153, + "grad_norm": 0.14388789236545563, + "learning_rate": 7.0754364763531744e-06, + "loss": 0.0018, + "step": 133270 + }, + { + "epoch": 0.8548165203811015, + "grad_norm": 0.030500268563628197, + "learning_rate": 7.07492725552609e-06, + "loss": 0.003, + "step": 133280 + }, + { + "epoch": 0.8548806572748875, + "grad_norm": 0.13390643894672394, + "learning_rate": 7.074418008698685e-06, + "loss": 0.003, + "step": 133290 + }, + { + "epoch": 0.8549447941686736, + "grad_norm": 0.06771231442689896, + "learning_rate": 7.073908735877339e-06, + "loss": 0.0013, + "step": 133300 + }, + { + "epoch": 0.8550089310624597, + "grad_norm": 0.11549648642539978, + "learning_rate": 7.0733994370684355e-06, + "loss": 0.0021, + "step": 133310 + }, + { + "epoch": 0.8550730679562458, + "grad_norm": 0.11787834763526917, + "learning_rate": 7.072890112278355e-06, + "loss": 0.004, + "step": 133320 + }, + { + "epoch": 0.8551372048500319, + "grad_norm": 0.13039426505565643, + "learning_rate": 7.072380761513478e-06, + "loss": 0.0018, + "step": 133330 + }, + { + "epoch": 0.855201341743818, + "grad_norm": 0.04777880758047104, + "learning_rate": 7.071871384780191e-06, + "loss": 0.0028, + "step": 133340 + }, + { + "epoch": 0.855265478637604, + "grad_norm": 0.033508896827697754, + "learning_rate": 7.0713619820848745e-06, + "loss": 0.0009, + "step": 133350 + }, + { + "epoch": 0.8553296155313902, + "grad_norm": 0.33922964334487915, + "learning_rate": 7.070852553433913e-06, + "loss": 0.0025, + "step": 133360 + }, + { + "epoch": 0.8553937524251763, + "grad_norm": 0.0977553054690361, + "learning_rate": 7.070343098833687e-06, + "loss": 0.0014, + "step": 133370 + }, + { + "epoch": 0.8554578893189624, + "grad_norm": 0.1850329041481018, + "learning_rate": 7.069833618290583e-06, + "loss": 0.0023, + "step": 133380 + }, + { + "epoch": 0.8555220262127485, + "grad_norm": 0.07582248747348785, + "learning_rate": 7.069324111810984e-06, + "loss": 0.0024, + "step": 133390 + }, + { + "epoch": 0.8555861631065346, + "grad_norm": 0.06357874721288681, + "learning_rate": 7.068814579401277e-06, + "loss": 0.0025, + "step": 133400 + }, + { + "epoch": 0.8556503000003207, + "grad_norm": 0.1265944540500641, + "learning_rate": 7.068305021067843e-06, + "loss": 0.0024, + "step": 133410 + }, + { + "epoch": 0.8557144368941068, + "grad_norm": 0.04861431568861008, + "learning_rate": 7.0677954368170694e-06, + "loss": 0.0019, + "step": 133420 + }, + { + "epoch": 0.8557785737878929, + "grad_norm": 0.2114597111940384, + "learning_rate": 7.067285826655341e-06, + "loss": 0.003, + "step": 133430 + }, + { + "epoch": 0.8558427106816789, + "grad_norm": 0.20950160920619965, + "learning_rate": 7.066776190589043e-06, + "loss": 0.0032, + "step": 133440 + }, + { + "epoch": 0.8559068475754651, + "grad_norm": 0.0692938044667244, + "learning_rate": 7.066266528624563e-06, + "loss": 0.0013, + "step": 133450 + }, + { + "epoch": 0.8559709844692511, + "grad_norm": 0.037902288138866425, + "learning_rate": 7.065756840768286e-06, + "loss": 0.0022, + "step": 133460 + }, + { + "epoch": 0.8560351213630373, + "grad_norm": 0.13756364583969116, + "learning_rate": 7.065247127026601e-06, + "loss": 0.0061, + "step": 133470 + }, + { + "epoch": 0.8560992582568233, + "grad_norm": 0.056961458176374435, + "learning_rate": 7.064737387405892e-06, + "loss": 0.0021, + "step": 133480 + }, + { + "epoch": 0.8561633951506095, + "grad_norm": 0.07422365248203278, + "learning_rate": 7.064227621912549e-06, + "loss": 0.0037, + "step": 133490 + }, + { + "epoch": 0.8562275320443956, + "grad_norm": 0.21851889789104462, + "learning_rate": 7.063717830552956e-06, + "loss": 0.003, + "step": 133500 + }, + { + "epoch": 0.8562916689381816, + "grad_norm": 0.09047169238328934, + "learning_rate": 7.063208013333507e-06, + "loss": 0.0039, + "step": 133510 + }, + { + "epoch": 0.8563558058319678, + "grad_norm": 0.13710708916187286, + "learning_rate": 7.062698170260585e-06, + "loss": 0.0022, + "step": 133520 + }, + { + "epoch": 0.8564199427257538, + "grad_norm": 0.15213985741138458, + "learning_rate": 7.062188301340582e-06, + "loss": 0.0108, + "step": 133530 + }, + { + "epoch": 0.85648407961954, + "grad_norm": 0.2374606877565384, + "learning_rate": 7.061678406579885e-06, + "loss": 0.0046, + "step": 133540 + }, + { + "epoch": 0.856548216513326, + "grad_norm": 0.08374125510454178, + "learning_rate": 7.061168485984885e-06, + "loss": 0.0031, + "step": 133550 + }, + { + "epoch": 0.8566123534071122, + "grad_norm": 0.13061614334583282, + "learning_rate": 7.060658539561969e-06, + "loss": 0.0018, + "step": 133560 + }, + { + "epoch": 0.8566764903008982, + "grad_norm": 0.030887693166732788, + "learning_rate": 7.060148567317531e-06, + "loss": 0.0029, + "step": 133570 + }, + { + "epoch": 0.8567406271946844, + "grad_norm": 0.19017326831817627, + "learning_rate": 7.059638569257957e-06, + "loss": 0.0034, + "step": 133580 + }, + { + "epoch": 0.8568047640884704, + "grad_norm": 0.23443512618541718, + "learning_rate": 7.0591285453896415e-06, + "loss": 0.0029, + "step": 133590 + }, + { + "epoch": 0.8568689009822565, + "grad_norm": 0.14593902230262756, + "learning_rate": 7.058618495718972e-06, + "loss": 0.0014, + "step": 133600 + }, + { + "epoch": 0.8569330378760426, + "grad_norm": 0.11629689484834671, + "learning_rate": 7.058108420252343e-06, + "loss": 0.0026, + "step": 133610 + }, + { + "epoch": 0.8569971747698287, + "grad_norm": 0.17862918972969055, + "learning_rate": 7.057598318996144e-06, + "loss": 0.0022, + "step": 133620 + }, + { + "epoch": 0.8570613116636148, + "grad_norm": 0.07450482994318008, + "learning_rate": 7.057088191956767e-06, + "loss": 0.0024, + "step": 133630 + }, + { + "epoch": 0.8571254485574009, + "grad_norm": 0.02601449377834797, + "learning_rate": 7.056578039140605e-06, + "loss": 0.0039, + "step": 133640 + }, + { + "epoch": 0.8571895854511871, + "grad_norm": 0.3861764371395111, + "learning_rate": 7.05606786055405e-06, + "loss": 0.003, + "step": 133650 + }, + { + "epoch": 0.8572537223449731, + "grad_norm": 0.042525045573711395, + "learning_rate": 7.055557656203497e-06, + "loss": 0.0022, + "step": 133660 + }, + { + "epoch": 0.8573178592387593, + "grad_norm": 0.12494376301765442, + "learning_rate": 7.055047426095336e-06, + "loss": 0.0031, + "step": 133670 + }, + { + "epoch": 0.8573819961325453, + "grad_norm": 0.06158117204904556, + "learning_rate": 7.054537170235962e-06, + "loss": 0.0017, + "step": 133680 + }, + { + "epoch": 0.8574461330263314, + "grad_norm": 0.12451702356338501, + "learning_rate": 7.054026888631769e-06, + "loss": 0.0029, + "step": 133690 + }, + { + "epoch": 0.8575102699201175, + "grad_norm": 0.15168841183185577, + "learning_rate": 7.053516581289153e-06, + "loss": 0.004, + "step": 133700 + }, + { + "epoch": 0.8575744068139036, + "grad_norm": 0.10787881910800934, + "learning_rate": 7.053006248214503e-06, + "loss": 0.0043, + "step": 133710 + }, + { + "epoch": 0.8576385437076897, + "grad_norm": 0.1075955405831337, + "learning_rate": 7.052495889414221e-06, + "loss": 0.0045, + "step": 133720 + }, + { + "epoch": 0.8577026806014758, + "grad_norm": 0.05712176486849785, + "learning_rate": 7.051985504894696e-06, + "loss": 0.0032, + "step": 133730 + }, + { + "epoch": 0.8577668174952618, + "grad_norm": 0.15225821733474731, + "learning_rate": 7.051475094662328e-06, + "loss": 0.0021, + "step": 133740 + }, + { + "epoch": 0.857830954389048, + "grad_norm": 0.37607520818710327, + "learning_rate": 7.0509646587235095e-06, + "loss": 0.0029, + "step": 133750 + }, + { + "epoch": 0.857895091282834, + "grad_norm": 0.07383402436971664, + "learning_rate": 7.050454197084638e-06, + "loss": 0.0058, + "step": 133760 + }, + { + "epoch": 0.8579592281766202, + "grad_norm": 0.0801803320646286, + "learning_rate": 7.04994370975211e-06, + "loss": 0.0023, + "step": 133770 + }, + { + "epoch": 0.8580233650704063, + "grad_norm": 0.06811663508415222, + "learning_rate": 7.049433196732324e-06, + "loss": 0.0027, + "step": 133780 + }, + { + "epoch": 0.8580875019641924, + "grad_norm": 0.06166665256023407, + "learning_rate": 7.048922658031674e-06, + "loss": 0.0018, + "step": 133790 + }, + { + "epoch": 0.8581516388579785, + "grad_norm": 0.045509107410907745, + "learning_rate": 7.048412093656558e-06, + "loss": 0.0022, + "step": 133800 + }, + { + "epoch": 0.8582157757517646, + "grad_norm": 0.2717222273349762, + "learning_rate": 7.0479015036133755e-06, + "loss": 0.0037, + "step": 133810 + }, + { + "epoch": 0.8582799126455507, + "grad_norm": 0.23772364854812622, + "learning_rate": 7.047390887908523e-06, + "loss": 0.0029, + "step": 133820 + }, + { + "epoch": 0.8583440495393367, + "grad_norm": 0.1597808301448822, + "learning_rate": 7.046880246548401e-06, + "loss": 0.0047, + "step": 133830 + }, + { + "epoch": 0.8584081864331229, + "grad_norm": 0.05817929282784462, + "learning_rate": 7.046369579539405e-06, + "loss": 0.0032, + "step": 133840 + }, + { + "epoch": 0.8584723233269089, + "grad_norm": 0.08475963771343231, + "learning_rate": 7.045858886887936e-06, + "loss": 0.0034, + "step": 133850 + }, + { + "epoch": 0.8585364602206951, + "grad_norm": 0.011824116110801697, + "learning_rate": 7.0453481686003926e-06, + "loss": 0.0024, + "step": 133860 + }, + { + "epoch": 0.8586005971144811, + "grad_norm": 0.14034906029701233, + "learning_rate": 7.044837424683175e-06, + "loss": 0.0023, + "step": 133870 + }, + { + "epoch": 0.8586647340082673, + "grad_norm": 0.1342957615852356, + "learning_rate": 7.044326655142682e-06, + "loss": 0.0044, + "step": 133880 + }, + { + "epoch": 0.8587288709020533, + "grad_norm": 0.2175799161195755, + "learning_rate": 7.043815859985318e-06, + "loss": 0.0034, + "step": 133890 + }, + { + "epoch": 0.8587930077958394, + "grad_norm": 0.2881358563899994, + "learning_rate": 7.043305039217478e-06, + "loss": 0.0015, + "step": 133900 + }, + { + "epoch": 0.8588571446896255, + "grad_norm": 0.14161698520183563, + "learning_rate": 7.0427941928455666e-06, + "loss": 0.0019, + "step": 133910 + }, + { + "epoch": 0.8589212815834116, + "grad_norm": 0.11514313519001007, + "learning_rate": 7.0422833208759845e-06, + "loss": 0.0016, + "step": 133920 + }, + { + "epoch": 0.8589854184771978, + "grad_norm": 0.026508232578635216, + "learning_rate": 7.0417724233151315e-06, + "loss": 0.0035, + "step": 133930 + }, + { + "epoch": 0.8590495553709838, + "grad_norm": 0.052615657448768616, + "learning_rate": 7.041261500169412e-06, + "loss": 0.0026, + "step": 133940 + }, + { + "epoch": 0.85911369226477, + "grad_norm": 0.295786052942276, + "learning_rate": 7.040750551445227e-06, + "loss": 0.0036, + "step": 133950 + }, + { + "epoch": 0.859177829158556, + "grad_norm": 0.09007281064987183, + "learning_rate": 7.040239577148978e-06, + "loss": 0.0019, + "step": 133960 + }, + { + "epoch": 0.8592419660523422, + "grad_norm": 0.06715760380029678, + "learning_rate": 7.039728577287069e-06, + "loss": 0.0035, + "step": 133970 + }, + { + "epoch": 0.8593061029461282, + "grad_norm": 0.2409805804491043, + "learning_rate": 7.039217551865904e-06, + "loss": 0.0021, + "step": 133980 + }, + { + "epoch": 0.8593702398399143, + "grad_norm": 0.06879192590713501, + "learning_rate": 7.038706500891885e-06, + "loss": 0.0027, + "step": 133990 + }, + { + "epoch": 0.8594343767337004, + "grad_norm": 0.18605777621269226, + "learning_rate": 7.0381954243714165e-06, + "loss": 0.0026, + "step": 134000 + }, + { + "epoch": 0.8594985136274865, + "grad_norm": 0.14286191761493683, + "learning_rate": 7.037684322310903e-06, + "loss": 0.002, + "step": 134010 + }, + { + "epoch": 0.8595626505212726, + "grad_norm": 0.036439698189496994, + "learning_rate": 7.037173194716748e-06, + "loss": 0.0025, + "step": 134020 + }, + { + "epoch": 0.8596267874150587, + "grad_norm": 0.11563778668642044, + "learning_rate": 7.036662041595358e-06, + "loss": 0.0018, + "step": 134030 + }, + { + "epoch": 0.8596909243088448, + "grad_norm": 0.13866159319877625, + "learning_rate": 7.036150862953137e-06, + "loss": 0.0023, + "step": 134040 + }, + { + "epoch": 0.8597550612026309, + "grad_norm": 0.27767258882522583, + "learning_rate": 7.03563965879649e-06, + "loss": 0.0035, + "step": 134050 + }, + { + "epoch": 0.859819198096417, + "grad_norm": 0.062224142253398895, + "learning_rate": 7.035128429131823e-06, + "loss": 0.0016, + "step": 134060 + }, + { + "epoch": 0.8598833349902031, + "grad_norm": 0.08909322321414948, + "learning_rate": 7.034617173965544e-06, + "loss": 0.0027, + "step": 134070 + }, + { + "epoch": 0.8599474718839892, + "grad_norm": 0.017653265967965126, + "learning_rate": 7.034105893304055e-06, + "loss": 0.0037, + "step": 134080 + }, + { + "epoch": 0.8600116087777753, + "grad_norm": 0.08256793767213821, + "learning_rate": 7.033594587153767e-06, + "loss": 0.0022, + "step": 134090 + }, + { + "epoch": 0.8600757456715614, + "grad_norm": 0.042854174971580505, + "learning_rate": 7.033083255521086e-06, + "loss": 0.0036, + "step": 134100 + }, + { + "epoch": 0.8601398825653475, + "grad_norm": 0.035144440829753876, + "learning_rate": 7.032571898412417e-06, + "loss": 0.0013, + "step": 134110 + }, + { + "epoch": 0.8602040194591336, + "grad_norm": 0.09312219172716141, + "learning_rate": 7.032060515834172e-06, + "loss": 0.0123, + "step": 134120 + }, + { + "epoch": 0.8602681563529196, + "grad_norm": 0.041414786130189896, + "learning_rate": 7.031549107792753e-06, + "loss": 0.0027, + "step": 134130 + }, + { + "epoch": 0.8603322932467058, + "grad_norm": 0.10964328050613403, + "learning_rate": 7.031037674294573e-06, + "loss": 0.0035, + "step": 134140 + }, + { + "epoch": 0.8603964301404918, + "grad_norm": 0.03819069638848305, + "learning_rate": 7.030526215346041e-06, + "loss": 0.0027, + "step": 134150 + }, + { + "epoch": 0.860460567034278, + "grad_norm": 0.19429278373718262, + "learning_rate": 7.030014730953563e-06, + "loss": 0.0033, + "step": 134160 + }, + { + "epoch": 0.860524703928064, + "grad_norm": 0.08986981213092804, + "learning_rate": 7.029503221123551e-06, + "loss": 0.0034, + "step": 134170 + }, + { + "epoch": 0.8605888408218502, + "grad_norm": 0.17730683088302612, + "learning_rate": 7.028991685862411e-06, + "loss": 0.0019, + "step": 134180 + }, + { + "epoch": 0.8606529777156362, + "grad_norm": 0.11101886630058289, + "learning_rate": 7.028480125176556e-06, + "loss": 0.0033, + "step": 134190 + }, + { + "epoch": 0.8607171146094224, + "grad_norm": 0.21876883506774902, + "learning_rate": 7.027968539072395e-06, + "loss": 0.0019, + "step": 134200 + }, + { + "epoch": 0.8607812515032085, + "grad_norm": 0.005220834631472826, + "learning_rate": 7.02745692755634e-06, + "loss": 0.0011, + "step": 134210 + }, + { + "epoch": 0.8608453883969945, + "grad_norm": 0.2566666901111603, + "learning_rate": 7.026945290634799e-06, + "loss": 0.0018, + "step": 134220 + }, + { + "epoch": 0.8609095252907807, + "grad_norm": 0.19004929065704346, + "learning_rate": 7.026433628314186e-06, + "loss": 0.0017, + "step": 134230 + }, + { + "epoch": 0.8609736621845667, + "grad_norm": 0.13197307288646698, + "learning_rate": 7.025921940600912e-06, + "loss": 0.0021, + "step": 134240 + }, + { + "epoch": 0.8610377990783529, + "grad_norm": 0.04679390415549278, + "learning_rate": 7.0254102275013855e-06, + "loss": 0.0033, + "step": 134250 + }, + { + "epoch": 0.8611019359721389, + "grad_norm": 0.21860431134700775, + "learning_rate": 7.024898489022023e-06, + "loss": 0.0034, + "step": 134260 + }, + { + "epoch": 0.8611660728659251, + "grad_norm": 0.10462875664234161, + "learning_rate": 7.024386725169236e-06, + "loss": 0.0033, + "step": 134270 + }, + { + "epoch": 0.8612302097597111, + "grad_norm": 0.09628218412399292, + "learning_rate": 7.023874935949435e-06, + "loss": 0.0021, + "step": 134280 + }, + { + "epoch": 0.8612943466534972, + "grad_norm": 0.08594681322574615, + "learning_rate": 7.023363121369037e-06, + "loss": 0.0015, + "step": 134290 + }, + { + "epoch": 0.8613584835472833, + "grad_norm": 0.05004475638270378, + "learning_rate": 7.022851281434451e-06, + "loss": 0.0026, + "step": 134300 + }, + { + "epoch": 0.8614226204410694, + "grad_norm": 0.06274831295013428, + "learning_rate": 7.0223394161520944e-06, + "loss": 0.002, + "step": 134310 + }, + { + "epoch": 0.8614867573348555, + "grad_norm": 0.06289125978946686, + "learning_rate": 7.0218275255283775e-06, + "loss": 0.0024, + "step": 134320 + }, + { + "epoch": 0.8615508942286416, + "grad_norm": 0.16910091042518616, + "learning_rate": 7.021315609569719e-06, + "loss": 0.0034, + "step": 134330 + }, + { + "epoch": 0.8616150311224277, + "grad_norm": 0.19005230069160461, + "learning_rate": 7.020803668282529e-06, + "loss": 0.002, + "step": 134340 + }, + { + "epoch": 0.8616791680162138, + "grad_norm": 0.06614792346954346, + "learning_rate": 7.020291701673225e-06, + "loss": 0.0028, + "step": 134350 + }, + { + "epoch": 0.86174330491, + "grad_norm": 0.04979289695620537, + "learning_rate": 7.019779709748223e-06, + "loss": 0.0013, + "step": 134360 + }, + { + "epoch": 0.861807441803786, + "grad_norm": 0.16481654345989227, + "learning_rate": 7.019267692513938e-06, + "loss": 0.0039, + "step": 134370 + }, + { + "epoch": 0.8618715786975721, + "grad_norm": 0.0695338174700737, + "learning_rate": 7.018755649976785e-06, + "loss": 0.0035, + "step": 134380 + }, + { + "epoch": 0.8619357155913582, + "grad_norm": 0.20109935104846954, + "learning_rate": 7.0182435821431815e-06, + "loss": 0.003, + "step": 134390 + }, + { + "epoch": 0.8619998524851443, + "grad_norm": 0.04521843045949936, + "learning_rate": 7.0177314890195435e-06, + "loss": 0.0025, + "step": 134400 + }, + { + "epoch": 0.8620639893789304, + "grad_norm": 0.024238646030426025, + "learning_rate": 7.017219370612287e-06, + "loss": 0.002, + "step": 134410 + }, + { + "epoch": 0.8621281262727165, + "grad_norm": 0.09983240067958832, + "learning_rate": 7.016707226927831e-06, + "loss": 0.0054, + "step": 134420 + }, + { + "epoch": 0.8621922631665025, + "grad_norm": 0.06918484717607498, + "learning_rate": 7.016195057972591e-06, + "loss": 0.003, + "step": 134430 + }, + { + "epoch": 0.8622564000602887, + "grad_norm": 0.08813716471195221, + "learning_rate": 7.015682863752988e-06, + "loss": 0.002, + "step": 134440 + }, + { + "epoch": 0.8623205369540747, + "grad_norm": 0.06289267539978027, + "learning_rate": 7.0151706442754365e-06, + "loss": 0.001, + "step": 134450 + }, + { + "epoch": 0.8623846738478609, + "grad_norm": 0.09562243521213531, + "learning_rate": 7.014658399546357e-06, + "loss": 0.0025, + "step": 134460 + }, + { + "epoch": 0.8624488107416469, + "grad_norm": 0.18738164007663727, + "learning_rate": 7.014146129572168e-06, + "loss": 0.0034, + "step": 134470 + }, + { + "epoch": 0.8625129476354331, + "grad_norm": 0.16660718619823456, + "learning_rate": 7.013633834359289e-06, + "loss": 0.0072, + "step": 134480 + }, + { + "epoch": 0.8625770845292192, + "grad_norm": 0.17574219405651093, + "learning_rate": 7.0131215139141385e-06, + "loss": 0.0038, + "step": 134490 + }, + { + "epoch": 0.8626412214230053, + "grad_norm": 0.1404060572385788, + "learning_rate": 7.0126091682431355e-06, + "loss": 0.0069, + "step": 134500 + }, + { + "epoch": 0.8627053583167914, + "grad_norm": 0.21174687147140503, + "learning_rate": 7.012096797352703e-06, + "loss": 0.0017, + "step": 134510 + }, + { + "epoch": 0.8627694952105774, + "grad_norm": 0.13424913585186005, + "learning_rate": 7.0115844012492585e-06, + "loss": 0.0027, + "step": 134520 + }, + { + "epoch": 0.8628336321043636, + "grad_norm": 0.08720983564853668, + "learning_rate": 7.011071979939225e-06, + "loss": 0.0024, + "step": 134530 + }, + { + "epoch": 0.8628977689981496, + "grad_norm": 0.02763362228870392, + "learning_rate": 7.0105595334290196e-06, + "loss": 0.0052, + "step": 134540 + }, + { + "epoch": 0.8629619058919358, + "grad_norm": 0.2591906189918518, + "learning_rate": 7.01004706172507e-06, + "loss": 0.0062, + "step": 134550 + }, + { + "epoch": 0.8630260427857218, + "grad_norm": 0.07465941458940506, + "learning_rate": 7.009534564833791e-06, + "loss": 0.0018, + "step": 134560 + }, + { + "epoch": 0.863090179679508, + "grad_norm": 0.2604738771915436, + "learning_rate": 7.00902204276161e-06, + "loss": 0.0027, + "step": 134570 + }, + { + "epoch": 0.863154316573294, + "grad_norm": 0.07500201463699341, + "learning_rate": 7.008509495514945e-06, + "loss": 0.0029, + "step": 134580 + }, + { + "epoch": 0.8632184534670801, + "grad_norm": 0.3114131689071655, + "learning_rate": 7.007996923100222e-06, + "loss": 0.0096, + "step": 134590 + }, + { + "epoch": 0.8632825903608662, + "grad_norm": 0.04153985530138016, + "learning_rate": 7.007484325523862e-06, + "loss": 0.0018, + "step": 134600 + }, + { + "epoch": 0.8633467272546523, + "grad_norm": 0.21154899895191193, + "learning_rate": 7.006971702792289e-06, + "loss": 0.0038, + "step": 134610 + }, + { + "epoch": 0.8634108641484384, + "grad_norm": 0.14301951229572296, + "learning_rate": 7.006459054911926e-06, + "loss": 0.0028, + "step": 134620 + }, + { + "epoch": 0.8634750010422245, + "grad_norm": 0.03243682533502579, + "learning_rate": 7.005946381889197e-06, + "loss": 0.0025, + "step": 134630 + }, + { + "epoch": 0.8635391379360107, + "grad_norm": 0.5242324471473694, + "learning_rate": 7.005433683730525e-06, + "loss": 0.0039, + "step": 134640 + }, + { + "epoch": 0.8636032748297967, + "grad_norm": 0.08665811270475388, + "learning_rate": 7.004920960442337e-06, + "loss": 0.0015, + "step": 134650 + }, + { + "epoch": 0.8636674117235829, + "grad_norm": 0.06900236010551453, + "learning_rate": 7.004408212031056e-06, + "loss": 0.0017, + "step": 134660 + }, + { + "epoch": 0.8637315486173689, + "grad_norm": 0.011264686472713947, + "learning_rate": 7.0038954385031085e-06, + "loss": 0.0029, + "step": 134670 + }, + { + "epoch": 0.863795685511155, + "grad_norm": 0.1056748479604721, + "learning_rate": 7.003382639864919e-06, + "loss": 0.0024, + "step": 134680 + }, + { + "epoch": 0.8638598224049411, + "grad_norm": 0.07375384867191315, + "learning_rate": 7.002869816122912e-06, + "loss": 0.0022, + "step": 134690 + }, + { + "epoch": 0.8639239592987272, + "grad_norm": 0.11620330810546875, + "learning_rate": 7.002356967283516e-06, + "loss": 0.0049, + "step": 134700 + }, + { + "epoch": 0.8639880961925133, + "grad_norm": 0.16854000091552734, + "learning_rate": 7.001844093353154e-06, + "loss": 0.0019, + "step": 134710 + }, + { + "epoch": 0.8640522330862994, + "grad_norm": 0.09560932219028473, + "learning_rate": 7.001331194338258e-06, + "loss": 0.001, + "step": 134720 + }, + { + "epoch": 0.8641163699800855, + "grad_norm": 0.12318872660398483, + "learning_rate": 7.000818270245249e-06, + "loss": 0.0032, + "step": 134730 + }, + { + "epoch": 0.8641805068738716, + "grad_norm": 0.15796954929828644, + "learning_rate": 7.000305321080559e-06, + "loss": 0.0043, + "step": 134740 + }, + { + "epoch": 0.8642446437676576, + "grad_norm": 0.049662332981824875, + "learning_rate": 6.999792346850613e-06, + "loss": 0.0034, + "step": 134750 + }, + { + "epoch": 0.8643087806614438, + "grad_norm": 0.1907750964164734, + "learning_rate": 6.99927934756184e-06, + "loss": 0.0028, + "step": 134760 + }, + { + "epoch": 0.8643729175552299, + "grad_norm": 0.1717313677072525, + "learning_rate": 6.998766323220667e-06, + "loss": 0.0023, + "step": 134770 + }, + { + "epoch": 0.864437054449016, + "grad_norm": 0.2066045105457306, + "learning_rate": 6.998253273833524e-06, + "loss": 0.002, + "step": 134780 + }, + { + "epoch": 0.8645011913428021, + "grad_norm": 0.1488698422908783, + "learning_rate": 6.99774019940684e-06, + "loss": 0.0035, + "step": 134790 + }, + { + "epoch": 0.8645653282365882, + "grad_norm": 0.16711297631263733, + "learning_rate": 6.997227099947043e-06, + "loss": 0.0034, + "step": 134800 + }, + { + "epoch": 0.8646294651303743, + "grad_norm": 0.05773639306426048, + "learning_rate": 6.996713975460563e-06, + "loss": 0.0025, + "step": 134810 + }, + { + "epoch": 0.8646936020241603, + "grad_norm": 0.06513752043247223, + "learning_rate": 6.996200825953829e-06, + "loss": 0.0017, + "step": 134820 + }, + { + "epoch": 0.8647577389179465, + "grad_norm": 0.22357122600078583, + "learning_rate": 6.995687651433273e-06, + "loss": 0.002, + "step": 134830 + }, + { + "epoch": 0.8648218758117325, + "grad_norm": 0.07101402431726456, + "learning_rate": 6.995174451905324e-06, + "loss": 0.0018, + "step": 134840 + }, + { + "epoch": 0.8648860127055187, + "grad_norm": 0.06312518566846848, + "learning_rate": 6.994661227376414e-06, + "loss": 0.002, + "step": 134850 + }, + { + "epoch": 0.8649501495993047, + "grad_norm": 0.07518059015274048, + "learning_rate": 6.994147977852972e-06, + "loss": 0.0033, + "step": 134860 + }, + { + "epoch": 0.8650142864930909, + "grad_norm": 0.0721321851015091, + "learning_rate": 6.993634703341432e-06, + "loss": 0.0015, + "step": 134870 + }, + { + "epoch": 0.8650784233868769, + "grad_norm": 0.07936231046915054, + "learning_rate": 6.993121403848223e-06, + "loss": 0.0032, + "step": 134880 + }, + { + "epoch": 0.865142560280663, + "grad_norm": 0.15310147404670715, + "learning_rate": 6.99260807937978e-06, + "loss": 0.0032, + "step": 134890 + }, + { + "epoch": 0.8652066971744491, + "grad_norm": 0.07506764680147171, + "learning_rate": 6.992094729942533e-06, + "loss": 0.002, + "step": 134900 + }, + { + "epoch": 0.8652708340682352, + "grad_norm": 0.05334783345460892, + "learning_rate": 6.991581355542915e-06, + "loss": 0.0016, + "step": 134910 + }, + { + "epoch": 0.8653349709620214, + "grad_norm": 0.17147868871688843, + "learning_rate": 6.991067956187359e-06, + "loss": 0.0033, + "step": 134920 + }, + { + "epoch": 0.8653991078558074, + "grad_norm": 0.16561748087406158, + "learning_rate": 6.990554531882299e-06, + "loss": 0.0015, + "step": 134930 + }, + { + "epoch": 0.8654632447495936, + "grad_norm": 0.08491192013025284, + "learning_rate": 6.9900410826341665e-06, + "loss": 0.0015, + "step": 134940 + }, + { + "epoch": 0.8655273816433796, + "grad_norm": 0.14289340376853943, + "learning_rate": 6.989527608449399e-06, + "loss": 0.0024, + "step": 134950 + }, + { + "epoch": 0.8655915185371658, + "grad_norm": 0.13716906309127808, + "learning_rate": 6.989014109334428e-06, + "loss": 0.0042, + "step": 134960 + }, + { + "epoch": 0.8656556554309518, + "grad_norm": 0.2592507004737854, + "learning_rate": 6.988500585295689e-06, + "loss": 0.0025, + "step": 134970 + }, + { + "epoch": 0.865719792324738, + "grad_norm": 0.09000234305858612, + "learning_rate": 6.987987036339616e-06, + "loss": 0.002, + "step": 134980 + }, + { + "epoch": 0.865783929218524, + "grad_norm": 0.13545222580432892, + "learning_rate": 6.9874734624726445e-06, + "loss": 0.0016, + "step": 134990 + }, + { + "epoch": 0.8658480661123101, + "grad_norm": 0.12626180052757263, + "learning_rate": 6.98695986370121e-06, + "loss": 0.0022, + "step": 135000 + }, + { + "epoch": 0.8659122030060962, + "grad_norm": 0.48018166422843933, + "learning_rate": 6.986446240031749e-06, + "loss": 0.0042, + "step": 135010 + }, + { + "epoch": 0.8659763398998823, + "grad_norm": 0.18012456595897675, + "learning_rate": 6.985932591470697e-06, + "loss": 0.0039, + "step": 135020 + }, + { + "epoch": 0.8660404767936684, + "grad_norm": 0.1186421662569046, + "learning_rate": 6.985418918024489e-06, + "loss": 0.0016, + "step": 135030 + }, + { + "epoch": 0.8661046136874545, + "grad_norm": 0.0692361518740654, + "learning_rate": 6.984905219699565e-06, + "loss": 0.0044, + "step": 135040 + }, + { + "epoch": 0.8661687505812407, + "grad_norm": 0.2168842852115631, + "learning_rate": 6.984391496502358e-06, + "loss": 0.0028, + "step": 135050 + }, + { + "epoch": 0.8662328874750267, + "grad_norm": 0.15261414647102356, + "learning_rate": 6.98387774843931e-06, + "loss": 0.0022, + "step": 135060 + }, + { + "epoch": 0.8662970243688128, + "grad_norm": 0.1448751986026764, + "learning_rate": 6.983363975516853e-06, + "loss": 0.0021, + "step": 135070 + }, + { + "epoch": 0.8663611612625989, + "grad_norm": 0.0858541801571846, + "learning_rate": 6.98285017774143e-06, + "loss": 0.003, + "step": 135080 + }, + { + "epoch": 0.866425298156385, + "grad_norm": 0.010609383694827557, + "learning_rate": 6.982336355119475e-06, + "loss": 0.0032, + "step": 135090 + }, + { + "epoch": 0.8664894350501711, + "grad_norm": 0.14291512966156006, + "learning_rate": 6.981822507657431e-06, + "loss": 0.0014, + "step": 135100 + }, + { + "epoch": 0.8665535719439572, + "grad_norm": 0.23925663530826569, + "learning_rate": 6.9813086353617335e-06, + "loss": 0.0012, + "step": 135110 + }, + { + "epoch": 0.8666177088377433, + "grad_norm": 0.27118679881095886, + "learning_rate": 6.980794738238823e-06, + "loss": 0.0025, + "step": 135120 + }, + { + "epoch": 0.8666818457315294, + "grad_norm": 0.15810494124889374, + "learning_rate": 6.980280816295138e-06, + "loss": 0.0016, + "step": 135130 + }, + { + "epoch": 0.8667459826253154, + "grad_norm": 0.22343648970127106, + "learning_rate": 6.97976686953712e-06, + "loss": 0.0059, + "step": 135140 + }, + { + "epoch": 0.8668101195191016, + "grad_norm": 0.09998579323291779, + "learning_rate": 6.979252897971208e-06, + "loss": 0.0018, + "step": 135150 + }, + { + "epoch": 0.8668742564128876, + "grad_norm": 0.18671497702598572, + "learning_rate": 6.978738901603843e-06, + "loss": 0.0037, + "step": 135160 + }, + { + "epoch": 0.8669383933066738, + "grad_norm": 0.021066153421998024, + "learning_rate": 6.978224880441464e-06, + "loss": 0.0031, + "step": 135170 + }, + { + "epoch": 0.8670025302004598, + "grad_norm": 0.11940399557352066, + "learning_rate": 6.977710834490515e-06, + "loss": 0.004, + "step": 135180 + }, + { + "epoch": 0.867066667094246, + "grad_norm": 0.046428464353084564, + "learning_rate": 6.977196763757436e-06, + "loss": 0.0015, + "step": 135190 + }, + { + "epoch": 0.8671308039880321, + "grad_norm": 0.07993770390748978, + "learning_rate": 6.976682668248667e-06, + "loss": 0.0015, + "step": 135200 + }, + { + "epoch": 0.8671949408818181, + "grad_norm": 0.07815032452344894, + "learning_rate": 6.976168547970652e-06, + "loss": 0.0034, + "step": 135210 + }, + { + "epoch": 0.8672590777756043, + "grad_norm": 0.12343888729810715, + "learning_rate": 6.9756544029298325e-06, + "loss": 0.0026, + "step": 135220 + }, + { + "epoch": 0.8673232146693903, + "grad_norm": 0.37997809052467346, + "learning_rate": 6.975140233132652e-06, + "loss": 0.0026, + "step": 135230 + }, + { + "epoch": 0.8673873515631765, + "grad_norm": 0.047730203717947006, + "learning_rate": 6.974626038585552e-06, + "loss": 0.0012, + "step": 135240 + }, + { + "epoch": 0.8674514884569625, + "grad_norm": 0.11115345358848572, + "learning_rate": 6.974111819294979e-06, + "loss": 0.0026, + "step": 135250 + }, + { + "epoch": 0.8675156253507487, + "grad_norm": 0.076470285654068, + "learning_rate": 6.973597575267371e-06, + "loss": 0.002, + "step": 135260 + }, + { + "epoch": 0.8675797622445347, + "grad_norm": 0.18768024444580078, + "learning_rate": 6.9730833065091765e-06, + "loss": 0.0037, + "step": 135270 + }, + { + "epoch": 0.8676438991383209, + "grad_norm": 0.06109945848584175, + "learning_rate": 6.972569013026837e-06, + "loss": 0.0025, + "step": 135280 + }, + { + "epoch": 0.8677080360321069, + "grad_norm": 0.10006918013095856, + "learning_rate": 6.972054694826799e-06, + "loss": 0.0037, + "step": 135290 + }, + { + "epoch": 0.867772172925893, + "grad_norm": 0.21546000242233276, + "learning_rate": 6.971540351915504e-06, + "loss": 0.0016, + "step": 135300 + }, + { + "epoch": 0.8678363098196791, + "grad_norm": 0.08633749186992645, + "learning_rate": 6.9710259842994025e-06, + "loss": 0.0045, + "step": 135310 + }, + { + "epoch": 0.8679004467134652, + "grad_norm": 0.05979597941040993, + "learning_rate": 6.970511591984936e-06, + "loss": 0.0018, + "step": 135320 + }, + { + "epoch": 0.8679645836072514, + "grad_norm": 0.11502660065889359, + "learning_rate": 6.96999717497855e-06, + "loss": 0.0038, + "step": 135330 + }, + { + "epoch": 0.8680287205010374, + "grad_norm": 0.20135074853897095, + "learning_rate": 6.969482733286691e-06, + "loss": 0.0047, + "step": 135340 + }, + { + "epoch": 0.8680928573948236, + "grad_norm": 0.297893226146698, + "learning_rate": 6.968968266915806e-06, + "loss": 0.0022, + "step": 135350 + }, + { + "epoch": 0.8681569942886096, + "grad_norm": 0.014258438721299171, + "learning_rate": 6.968453775872342e-06, + "loss": 0.0026, + "step": 135360 + }, + { + "epoch": 0.8682211311823957, + "grad_norm": 0.057328782975673676, + "learning_rate": 6.967939260162746e-06, + "loss": 0.0023, + "step": 135370 + }, + { + "epoch": 0.8682852680761818, + "grad_norm": 0.16010943055152893, + "learning_rate": 6.967424719793464e-06, + "loss": 0.0077, + "step": 135380 + }, + { + "epoch": 0.8683494049699679, + "grad_norm": 0.03984547406435013, + "learning_rate": 6.966910154770943e-06, + "loss": 0.0031, + "step": 135390 + }, + { + "epoch": 0.868413541863754, + "grad_norm": 0.0993046835064888, + "learning_rate": 6.966395565101634e-06, + "loss": 0.001, + "step": 135400 + }, + { + "epoch": 0.8684776787575401, + "grad_norm": 0.024639731273055077, + "learning_rate": 6.965880950791981e-06, + "loss": 0.0018, + "step": 135410 + }, + { + "epoch": 0.8685418156513262, + "grad_norm": 0.02593991719186306, + "learning_rate": 6.965366311848436e-06, + "loss": 0.002, + "step": 135420 + }, + { + "epoch": 0.8686059525451123, + "grad_norm": 0.1336241215467453, + "learning_rate": 6.9648516482774464e-06, + "loss": 0.0021, + "step": 135430 + }, + { + "epoch": 0.8686700894388983, + "grad_norm": 0.04180555418133736, + "learning_rate": 6.964336960085461e-06, + "loss": 0.0021, + "step": 135440 + }, + { + "epoch": 0.8687342263326845, + "grad_norm": 0.027477435767650604, + "learning_rate": 6.9638222472789305e-06, + "loss": 0.0053, + "step": 135450 + }, + { + "epoch": 0.8687983632264705, + "grad_norm": 0.13674120604991913, + "learning_rate": 6.963307509864303e-06, + "loss": 0.0023, + "step": 135460 + }, + { + "epoch": 0.8688625001202567, + "grad_norm": 0.08610444515943527, + "learning_rate": 6.96279274784803e-06, + "loss": 0.0024, + "step": 135470 + }, + { + "epoch": 0.8689266370140428, + "grad_norm": 0.10824882239103317, + "learning_rate": 6.962277961236561e-06, + "loss": 0.0012, + "step": 135480 + }, + { + "epoch": 0.8689907739078289, + "grad_norm": 0.12073953449726105, + "learning_rate": 6.961763150036346e-06, + "loss": 0.0038, + "step": 135490 + }, + { + "epoch": 0.869054910801615, + "grad_norm": 0.1749655157327652, + "learning_rate": 6.961248314253836e-06, + "loss": 0.0027, + "step": 135500 + }, + { + "epoch": 0.869119047695401, + "grad_norm": 0.30429336428642273, + "learning_rate": 6.960733453895485e-06, + "loss": 0.0033, + "step": 135510 + }, + { + "epoch": 0.8691831845891872, + "grad_norm": 0.09175138920545578, + "learning_rate": 6.960218568967741e-06, + "loss": 0.0015, + "step": 135520 + }, + { + "epoch": 0.8692473214829732, + "grad_norm": 0.1776876002550125, + "learning_rate": 6.9597036594770586e-06, + "loss": 0.003, + "step": 135530 + }, + { + "epoch": 0.8693114583767594, + "grad_norm": 0.026637470349669456, + "learning_rate": 6.9591887254298886e-06, + "loss": 0.0044, + "step": 135540 + }, + { + "epoch": 0.8693755952705454, + "grad_norm": 0.12641720473766327, + "learning_rate": 6.958673766832682e-06, + "loss": 0.0023, + "step": 135550 + }, + { + "epoch": 0.8694397321643316, + "grad_norm": 0.10737831890583038, + "learning_rate": 6.958158783691894e-06, + "loss": 0.0025, + "step": 135560 + }, + { + "epoch": 0.8695038690581176, + "grad_norm": 0.07200788706541061, + "learning_rate": 6.957643776013978e-06, + "loss": 0.0015, + "step": 135570 + }, + { + "epoch": 0.8695680059519038, + "grad_norm": 0.08781659603118896, + "learning_rate": 6.957128743805385e-06, + "loss": 0.0019, + "step": 135580 + }, + { + "epoch": 0.8696321428456898, + "grad_norm": 0.035964686423540115, + "learning_rate": 6.956613687072571e-06, + "loss": 0.0038, + "step": 135590 + }, + { + "epoch": 0.8696962797394759, + "grad_norm": 0.1329997181892395, + "learning_rate": 6.956098605821988e-06, + "loss": 0.0043, + "step": 135600 + }, + { + "epoch": 0.869760416633262, + "grad_norm": 0.07175031304359436, + "learning_rate": 6.955583500060093e-06, + "loss": 0.0028, + "step": 135610 + }, + { + "epoch": 0.8698245535270481, + "grad_norm": 0.055772650986909866, + "learning_rate": 6.955068369793338e-06, + "loss": 0.0023, + "step": 135620 + }, + { + "epoch": 0.8698886904208343, + "grad_norm": 0.08265043795108795, + "learning_rate": 6.954553215028181e-06, + "loss": 0.0033, + "step": 135630 + }, + { + "epoch": 0.8699528273146203, + "grad_norm": 0.020286694169044495, + "learning_rate": 6.954038035771073e-06, + "loss": 0.0015, + "step": 135640 + }, + { + "epoch": 0.8700169642084065, + "grad_norm": 0.16302058100700378, + "learning_rate": 6.953522832028473e-06, + "loss": 0.0018, + "step": 135650 + }, + { + "epoch": 0.8700811011021925, + "grad_norm": 0.0685119554400444, + "learning_rate": 6.953007603806835e-06, + "loss": 0.003, + "step": 135660 + }, + { + "epoch": 0.8701452379959786, + "grad_norm": 0.1629972606897354, + "learning_rate": 6.952492351112617e-06, + "loss": 0.0036, + "step": 135670 + }, + { + "epoch": 0.8702093748897647, + "grad_norm": 0.0794086903333664, + "learning_rate": 6.951977073952274e-06, + "loss": 0.0018, + "step": 135680 + }, + { + "epoch": 0.8702735117835508, + "grad_norm": 0.0831390917301178, + "learning_rate": 6.951461772332263e-06, + "loss": 0.0036, + "step": 135690 + }, + { + "epoch": 0.8703376486773369, + "grad_norm": 0.18447096645832062, + "learning_rate": 6.950946446259041e-06, + "loss": 0.0029, + "step": 135700 + }, + { + "epoch": 0.870401785571123, + "grad_norm": 0.07849343866109848, + "learning_rate": 6.950431095739065e-06, + "loss": 0.0016, + "step": 135710 + }, + { + "epoch": 0.8704659224649091, + "grad_norm": 0.046604666858911514, + "learning_rate": 6.9499157207787956e-06, + "loss": 0.0018, + "step": 135720 + }, + { + "epoch": 0.8705300593586952, + "grad_norm": 0.14687475562095642, + "learning_rate": 6.949400321384687e-06, + "loss": 0.0022, + "step": 135730 + }, + { + "epoch": 0.8705941962524812, + "grad_norm": 0.12612827122211456, + "learning_rate": 6.948884897563201e-06, + "loss": 0.0042, + "step": 135740 + }, + { + "epoch": 0.8706583331462674, + "grad_norm": 0.12573637068271637, + "learning_rate": 6.948369449320792e-06, + "loss": 0.0015, + "step": 135750 + }, + { + "epoch": 0.8707224700400535, + "grad_norm": 0.1647786647081375, + "learning_rate": 6.947853976663923e-06, + "loss": 0.003, + "step": 135760 + }, + { + "epoch": 0.8707866069338396, + "grad_norm": 0.10565929859876633, + "learning_rate": 6.94733847959905e-06, + "loss": 0.0014, + "step": 135770 + }, + { + "epoch": 0.8708507438276257, + "grad_norm": 0.06949920207262039, + "learning_rate": 6.946822958132635e-06, + "loss": 0.0019, + "step": 135780 + }, + { + "epoch": 0.8709148807214118, + "grad_norm": 0.27971917390823364, + "learning_rate": 6.946307412271136e-06, + "loss": 0.003, + "step": 135790 + }, + { + "epoch": 0.8709790176151979, + "grad_norm": 0.004446999169886112, + "learning_rate": 6.945791842021016e-06, + "loss": 0.002, + "step": 135800 + }, + { + "epoch": 0.871043154508984, + "grad_norm": 0.1649855375289917, + "learning_rate": 6.945276247388732e-06, + "loss": 0.0033, + "step": 135810 + }, + { + "epoch": 0.8711072914027701, + "grad_norm": 0.12075648456811905, + "learning_rate": 6.944760628380748e-06, + "loss": 0.0034, + "step": 135820 + }, + { + "epoch": 0.8711714282965561, + "grad_norm": 0.13726001977920532, + "learning_rate": 6.944244985003522e-06, + "loss": 0.0022, + "step": 135830 + }, + { + "epoch": 0.8712355651903423, + "grad_norm": 0.2607235908508301, + "learning_rate": 6.9437293172635175e-06, + "loss": 0.0013, + "step": 135840 + }, + { + "epoch": 0.8712997020841283, + "grad_norm": 0.15931189060211182, + "learning_rate": 6.9432136251671955e-06, + "loss": 0.0051, + "step": 135850 + }, + { + "epoch": 0.8713638389779145, + "grad_norm": 0.11268272995948792, + "learning_rate": 6.942697908721017e-06, + "loss": 0.0024, + "step": 135860 + }, + { + "epoch": 0.8714279758717005, + "grad_norm": 0.10024124383926392, + "learning_rate": 6.942182167931446e-06, + "loss": 0.0018, + "step": 135870 + }, + { + "epoch": 0.8714921127654867, + "grad_norm": 0.023353857919573784, + "learning_rate": 6.941666402804945e-06, + "loss": 0.0018, + "step": 135880 + }, + { + "epoch": 0.8715562496592727, + "grad_norm": 0.06861988455057144, + "learning_rate": 6.9411506133479756e-06, + "loss": 0.0028, + "step": 135890 + }, + { + "epoch": 0.8716203865530588, + "grad_norm": 0.012388293631374836, + "learning_rate": 6.940634799567002e-06, + "loss": 0.0023, + "step": 135900 + }, + { + "epoch": 0.871684523446845, + "grad_norm": 0.11425565928220749, + "learning_rate": 6.9401189614684875e-06, + "loss": 0.0045, + "step": 135910 + }, + { + "epoch": 0.871748660340631, + "grad_norm": 0.07764580100774765, + "learning_rate": 6.939603099058895e-06, + "loss": 0.0021, + "step": 135920 + }, + { + "epoch": 0.8718127972344172, + "grad_norm": 0.02401791699230671, + "learning_rate": 6.939087212344691e-06, + "loss": 0.0018, + "step": 135930 + }, + { + "epoch": 0.8718769341282032, + "grad_norm": 0.01430745143443346, + "learning_rate": 6.938571301332337e-06, + "loss": 0.0021, + "step": 135940 + }, + { + "epoch": 0.8719410710219894, + "grad_norm": 0.31946444511413574, + "learning_rate": 6.938055366028299e-06, + "loss": 0.0023, + "step": 135950 + }, + { + "epoch": 0.8720052079157754, + "grad_norm": 0.05516481027007103, + "learning_rate": 6.937539406439042e-06, + "loss": 0.001, + "step": 135960 + }, + { + "epoch": 0.8720693448095616, + "grad_norm": 0.020058229565620422, + "learning_rate": 6.9370234225710335e-06, + "loss": 0.0015, + "step": 135970 + }, + { + "epoch": 0.8721334817033476, + "grad_norm": 0.24442732334136963, + "learning_rate": 6.936507414430735e-06, + "loss": 0.0021, + "step": 135980 + }, + { + "epoch": 0.8721976185971337, + "grad_norm": 0.042491428554058075, + "learning_rate": 6.935991382024616e-06, + "loss": 0.0018, + "step": 135990 + }, + { + "epoch": 0.8722617554909198, + "grad_norm": 0.07440150529146194, + "learning_rate": 6.93547532535914e-06, + "loss": 0.0015, + "step": 136000 + }, + { + "epoch": 0.8723258923847059, + "grad_norm": 0.18436209857463837, + "learning_rate": 6.934959244440776e-06, + "loss": 0.004, + "step": 136010 + }, + { + "epoch": 0.872390029278492, + "grad_norm": 0.08648974448442459, + "learning_rate": 6.9344431392759895e-06, + "loss": 0.0042, + "step": 136020 + }, + { + "epoch": 0.8724541661722781, + "grad_norm": 0.030176298692822456, + "learning_rate": 6.933927009871249e-06, + "loss": 0.0023, + "step": 136030 + }, + { + "epoch": 0.8725183030660643, + "grad_norm": 0.1476580649614334, + "learning_rate": 6.933410856233018e-06, + "loss": 0.0026, + "step": 136040 + }, + { + "epoch": 0.8725824399598503, + "grad_norm": 0.09100686013698578, + "learning_rate": 6.932894678367769e-06, + "loss": 0.002, + "step": 136050 + }, + { + "epoch": 0.8726465768536364, + "grad_norm": 0.002788944635540247, + "learning_rate": 6.932378476281969e-06, + "loss": 0.0019, + "step": 136060 + }, + { + "epoch": 0.8727107137474225, + "grad_norm": 0.22135302424430847, + "learning_rate": 6.931862249982084e-06, + "loss": 0.0022, + "step": 136070 + }, + { + "epoch": 0.8727748506412086, + "grad_norm": 0.4503108561038971, + "learning_rate": 6.9313459994745855e-06, + "loss": 0.0031, + "step": 136080 + }, + { + "epoch": 0.8728389875349947, + "grad_norm": 0.11171503365039825, + "learning_rate": 6.930829724765941e-06, + "loss": 0.0017, + "step": 136090 + }, + { + "epoch": 0.8729031244287808, + "grad_norm": 0.12623871862888336, + "learning_rate": 6.93031342586262e-06, + "loss": 0.0022, + "step": 136100 + }, + { + "epoch": 0.8729672613225669, + "grad_norm": 0.3261496424674988, + "learning_rate": 6.929797102771092e-06, + "loss": 0.0037, + "step": 136110 + }, + { + "epoch": 0.873031398216353, + "grad_norm": 0.09023922681808472, + "learning_rate": 6.929280755497828e-06, + "loss": 0.0018, + "step": 136120 + }, + { + "epoch": 0.873095535110139, + "grad_norm": 0.0945887565612793, + "learning_rate": 6.9287643840492965e-06, + "loss": 0.0015, + "step": 136130 + }, + { + "epoch": 0.8731596720039252, + "grad_norm": 0.1977291852235794, + "learning_rate": 6.92824798843197e-06, + "loss": 0.0018, + "step": 136140 + }, + { + "epoch": 0.8732238088977112, + "grad_norm": 0.11037852615118027, + "learning_rate": 6.927731568652316e-06, + "loss": 0.0021, + "step": 136150 + }, + { + "epoch": 0.8732879457914974, + "grad_norm": 0.04763404279947281, + "learning_rate": 6.927215124716808e-06, + "loss": 0.0011, + "step": 136160 + }, + { + "epoch": 0.8733520826852834, + "grad_norm": 0.3576797544956207, + "learning_rate": 6.926698656631918e-06, + "loss": 0.0034, + "step": 136170 + }, + { + "epoch": 0.8734162195790696, + "grad_norm": 0.03536294028162956, + "learning_rate": 6.9261821644041185e-06, + "loss": 0.0021, + "step": 136180 + }, + { + "epoch": 0.8734803564728557, + "grad_norm": 0.051673565059900284, + "learning_rate": 6.925665648039876e-06, + "loss": 0.0022, + "step": 136190 + }, + { + "epoch": 0.8735444933666417, + "grad_norm": 0.0459916815161705, + "learning_rate": 6.92514910754567e-06, + "loss": 0.0033, + "step": 136200 + }, + { + "epoch": 0.8736086302604279, + "grad_norm": 0.10190891474485397, + "learning_rate": 6.924632542927968e-06, + "loss": 0.0009, + "step": 136210 + }, + { + "epoch": 0.8736727671542139, + "grad_norm": 0.02483881264925003, + "learning_rate": 6.924115954193247e-06, + "loss": 0.0019, + "step": 136220 + }, + { + "epoch": 0.8737369040480001, + "grad_norm": 0.081370048224926, + "learning_rate": 6.923599341347975e-06, + "loss": 0.0024, + "step": 136230 + }, + { + "epoch": 0.8738010409417861, + "grad_norm": 0.041823867708444595, + "learning_rate": 6.92308270439863e-06, + "loss": 0.0025, + "step": 136240 + }, + { + "epoch": 0.8738651778355723, + "grad_norm": 0.03406078368425369, + "learning_rate": 6.922566043351684e-06, + "loss": 0.0018, + "step": 136250 + }, + { + "epoch": 0.8739293147293583, + "grad_norm": 0.10117413848638535, + "learning_rate": 6.922049358213612e-06, + "loss": 0.0022, + "step": 136260 + }, + { + "epoch": 0.8739934516231445, + "grad_norm": 0.3346622586250305, + "learning_rate": 6.9215326489908875e-06, + "loss": 0.0023, + "step": 136270 + }, + { + "epoch": 0.8740575885169305, + "grad_norm": 0.05216212570667267, + "learning_rate": 6.921015915689985e-06, + "loss": 0.0012, + "step": 136280 + }, + { + "epoch": 0.8741217254107166, + "grad_norm": 0.062205005437135696, + "learning_rate": 6.9204991583173805e-06, + "loss": 0.0012, + "step": 136290 + }, + { + "epoch": 0.8741858623045027, + "grad_norm": 0.029633566737174988, + "learning_rate": 6.919982376879549e-06, + "loss": 0.0054, + "step": 136300 + }, + { + "epoch": 0.8742499991982888, + "grad_norm": 0.04072760045528412, + "learning_rate": 6.919465571382966e-06, + "loss": 0.0025, + "step": 136310 + }, + { + "epoch": 0.874314136092075, + "grad_norm": 0.14085076749324799, + "learning_rate": 6.9189487418341085e-06, + "loss": 0.0025, + "step": 136320 + }, + { + "epoch": 0.874378272985861, + "grad_norm": 0.21778567135334015, + "learning_rate": 6.918431888239452e-06, + "loss": 0.0041, + "step": 136330 + }, + { + "epoch": 0.8744424098796472, + "grad_norm": 0.06518618762493134, + "learning_rate": 6.917915010605471e-06, + "loss": 0.0036, + "step": 136340 + }, + { + "epoch": 0.8745065467734332, + "grad_norm": 0.10041414946317673, + "learning_rate": 6.917398108938646e-06, + "loss": 0.0025, + "step": 136350 + }, + { + "epoch": 0.8745706836672194, + "grad_norm": 0.4006623327732086, + "learning_rate": 6.9168811832454505e-06, + "loss": 0.0042, + "step": 136360 + }, + { + "epoch": 0.8746348205610054, + "grad_norm": 0.2387170046567917, + "learning_rate": 6.916364233532366e-06, + "loss": 0.0022, + "step": 136370 + }, + { + "epoch": 0.8746989574547915, + "grad_norm": 0.04596169665455818, + "learning_rate": 6.915847259805866e-06, + "loss": 0.0041, + "step": 136380 + }, + { + "epoch": 0.8747630943485776, + "grad_norm": 0.0908060297369957, + "learning_rate": 6.915330262072433e-06, + "loss": 0.0075, + "step": 136390 + }, + { + "epoch": 0.8748272312423637, + "grad_norm": 0.11420466750860214, + "learning_rate": 6.9148132403385405e-06, + "loss": 0.0024, + "step": 136400 + }, + { + "epoch": 0.8748913681361498, + "grad_norm": 0.09050630033016205, + "learning_rate": 6.9142961946106705e-06, + "loss": 0.0017, + "step": 136410 + }, + { + "epoch": 0.8749555050299359, + "grad_norm": 0.1539943814277649, + "learning_rate": 6.913779124895301e-06, + "loss": 0.0039, + "step": 136420 + }, + { + "epoch": 0.875019641923722, + "grad_norm": 0.18496167659759521, + "learning_rate": 6.913262031198911e-06, + "loss": 0.0029, + "step": 136430 + }, + { + "epoch": 0.8750837788175081, + "grad_norm": 0.08429907262325287, + "learning_rate": 6.9127449135279816e-06, + "loss": 0.0017, + "step": 136440 + }, + { + "epoch": 0.8751479157112941, + "grad_norm": 0.062099575996398926, + "learning_rate": 6.91222777188899e-06, + "loss": 0.0016, + "step": 136450 + }, + { + "epoch": 0.8752120526050803, + "grad_norm": 0.075572669506073, + "learning_rate": 6.911710606288419e-06, + "loss": 0.0013, + "step": 136460 + }, + { + "epoch": 0.8752761894988664, + "grad_norm": 0.11704915761947632, + "learning_rate": 6.911193416732747e-06, + "loss": 0.0013, + "step": 136470 + }, + { + "epoch": 0.8753403263926525, + "grad_norm": 0.2985520660877228, + "learning_rate": 6.910676203228456e-06, + "loss": 0.0023, + "step": 136480 + }, + { + "epoch": 0.8754044632864386, + "grad_norm": 0.1371159851551056, + "learning_rate": 6.910158965782025e-06, + "loss": 0.002, + "step": 136490 + }, + { + "epoch": 0.8754686001802247, + "grad_norm": 0.07423175871372223, + "learning_rate": 6.90964170439994e-06, + "loss": 0.003, + "step": 136500 + }, + { + "epoch": 0.8755327370740108, + "grad_norm": 0.05937831476330757, + "learning_rate": 6.909124419088678e-06, + "loss": 0.005, + "step": 136510 + }, + { + "epoch": 0.8755968739677968, + "grad_norm": 0.40971502661705017, + "learning_rate": 6.908607109854723e-06, + "loss": 0.0028, + "step": 136520 + }, + { + "epoch": 0.875661010861583, + "grad_norm": 0.1507779359817505, + "learning_rate": 6.908089776704555e-06, + "loss": 0.0016, + "step": 136530 + }, + { + "epoch": 0.875725147755369, + "grad_norm": 0.1629064679145813, + "learning_rate": 6.907572419644661e-06, + "loss": 0.0015, + "step": 136540 + }, + { + "epoch": 0.8757892846491552, + "grad_norm": 0.09428049623966217, + "learning_rate": 6.9070550386815185e-06, + "loss": 0.0026, + "step": 136550 + }, + { + "epoch": 0.8758534215429412, + "grad_norm": 0.20758113265037537, + "learning_rate": 6.906537633821616e-06, + "loss": 0.0041, + "step": 136560 + }, + { + "epoch": 0.8759175584367274, + "grad_norm": 0.028349295258522034, + "learning_rate": 6.906020205071433e-06, + "loss": 0.0036, + "step": 136570 + }, + { + "epoch": 0.8759816953305134, + "grad_norm": 0.16029034554958344, + "learning_rate": 6.905502752437455e-06, + "loss": 0.0028, + "step": 136580 + }, + { + "epoch": 0.8760458322242995, + "grad_norm": 0.10964485257863998, + "learning_rate": 6.904985275926166e-06, + "loss": 0.0032, + "step": 136590 + }, + { + "epoch": 0.8761099691180857, + "grad_norm": 0.1821531057357788, + "learning_rate": 6.90446777554405e-06, + "loss": 0.004, + "step": 136600 + }, + { + "epoch": 0.8761741060118717, + "grad_norm": 0.11746109277009964, + "learning_rate": 6.903950251297591e-06, + "loss": 0.0018, + "step": 136610 + }, + { + "epoch": 0.8762382429056579, + "grad_norm": 0.017772624269127846, + "learning_rate": 6.903432703193275e-06, + "loss": 0.0015, + "step": 136620 + }, + { + "epoch": 0.8763023797994439, + "grad_norm": 0.24109432101249695, + "learning_rate": 6.902915131237586e-06, + "loss": 0.0014, + "step": 136630 + }, + { + "epoch": 0.8763665166932301, + "grad_norm": 0.0824224203824997, + "learning_rate": 6.9023975354370125e-06, + "loss": 0.0015, + "step": 136640 + }, + { + "epoch": 0.8764306535870161, + "grad_norm": 0.15448006987571716, + "learning_rate": 6.901879915798036e-06, + "loss": 0.0023, + "step": 136650 + }, + { + "epoch": 0.8764947904808023, + "grad_norm": 0.16880998015403748, + "learning_rate": 6.901362272327147e-06, + "loss": 0.0029, + "step": 136660 + }, + { + "epoch": 0.8765589273745883, + "grad_norm": 0.32766395807266235, + "learning_rate": 6.900844605030829e-06, + "loss": 0.0043, + "step": 136670 + }, + { + "epoch": 0.8766230642683744, + "grad_norm": 0.07112014293670654, + "learning_rate": 6.900326913915569e-06, + "loss": 0.0031, + "step": 136680 + }, + { + "epoch": 0.8766872011621605, + "grad_norm": 0.15258969366550446, + "learning_rate": 6.899809198987855e-06, + "loss": 0.0023, + "step": 136690 + }, + { + "epoch": 0.8767513380559466, + "grad_norm": 0.16571097075939178, + "learning_rate": 6.8992914602541735e-06, + "loss": 0.0019, + "step": 136700 + }, + { + "epoch": 0.8768154749497327, + "grad_norm": 0.14944887161254883, + "learning_rate": 6.898773697721014e-06, + "loss": 0.0014, + "step": 136710 + }, + { + "epoch": 0.8768796118435188, + "grad_norm": 0.08200392127037048, + "learning_rate": 6.8982559113948625e-06, + "loss": 0.0011, + "step": 136720 + }, + { + "epoch": 0.8769437487373049, + "grad_norm": 0.12015662342309952, + "learning_rate": 6.897738101282208e-06, + "loss": 0.0037, + "step": 136730 + }, + { + "epoch": 0.877007885631091, + "grad_norm": 0.24516578018665314, + "learning_rate": 6.8972202673895375e-06, + "loss": 0.0032, + "step": 136740 + }, + { + "epoch": 0.8770720225248771, + "grad_norm": 0.07292426377534866, + "learning_rate": 6.896702409723342e-06, + "loss": 0.003, + "step": 136750 + }, + { + "epoch": 0.8771361594186632, + "grad_norm": 0.008474384434521198, + "learning_rate": 6.896184528290109e-06, + "loss": 0.0013, + "step": 136760 + }, + { + "epoch": 0.8772002963124493, + "grad_norm": 0.1347125917673111, + "learning_rate": 6.8956666230963284e-06, + "loss": 0.0035, + "step": 136770 + }, + { + "epoch": 0.8772644332062354, + "grad_norm": 0.151192769408226, + "learning_rate": 6.895148694148493e-06, + "loss": 0.0026, + "step": 136780 + }, + { + "epoch": 0.8773285701000215, + "grad_norm": 0.10084628313779831, + "learning_rate": 6.894630741453087e-06, + "loss": 0.003, + "step": 136790 + }, + { + "epoch": 0.8773927069938076, + "grad_norm": 0.017331156879663467, + "learning_rate": 6.8941127650166055e-06, + "loss": 0.002, + "step": 136800 + }, + { + "epoch": 0.8774568438875937, + "grad_norm": 0.045249033719301224, + "learning_rate": 6.893594764845535e-06, + "loss": 0.0021, + "step": 136810 + }, + { + "epoch": 0.8775209807813797, + "grad_norm": 0.08385881036520004, + "learning_rate": 6.893076740946371e-06, + "loss": 0.0053, + "step": 136820 + }, + { + "epoch": 0.8775851176751659, + "grad_norm": 0.11956585943698883, + "learning_rate": 6.892558693325602e-06, + "loss": 0.0015, + "step": 136830 + }, + { + "epoch": 0.8776492545689519, + "grad_norm": 0.19696132838726044, + "learning_rate": 6.892040621989721e-06, + "loss": 0.0017, + "step": 136840 + }, + { + "epoch": 0.8777133914627381, + "grad_norm": 0.04887941852211952, + "learning_rate": 6.8915225269452165e-06, + "loss": 0.0013, + "step": 136850 + }, + { + "epoch": 0.8777775283565241, + "grad_norm": 0.12483085691928864, + "learning_rate": 6.891004408198585e-06, + "loss": 0.0034, + "step": 136860 + }, + { + "epoch": 0.8778416652503103, + "grad_norm": 0.14218521118164062, + "learning_rate": 6.890486265756316e-06, + "loss": 0.0025, + "step": 136870 + }, + { + "epoch": 0.8779058021440964, + "grad_norm": 0.2279711663722992, + "learning_rate": 6.889968099624902e-06, + "loss": 0.0028, + "step": 136880 + }, + { + "epoch": 0.8779699390378825, + "grad_norm": 0.02050189860165119, + "learning_rate": 6.889449909810838e-06, + "loss": 0.002, + "step": 136890 + }, + { + "epoch": 0.8780340759316686, + "grad_norm": 0.08747979253530502, + "learning_rate": 6.8889316963206155e-06, + "loss": 0.0022, + "step": 136900 + }, + { + "epoch": 0.8780982128254546, + "grad_norm": 0.2141716182231903, + "learning_rate": 6.88841345916073e-06, + "loss": 0.0028, + "step": 136910 + }, + { + "epoch": 0.8781623497192408, + "grad_norm": 0.04946558550000191, + "learning_rate": 6.8878951983376725e-06, + "loss": 0.0016, + "step": 136920 + }, + { + "epoch": 0.8782264866130268, + "grad_norm": 0.24861359596252441, + "learning_rate": 6.887376913857939e-06, + "loss": 0.0022, + "step": 136930 + }, + { + "epoch": 0.878290623506813, + "grad_norm": 0.0350680835545063, + "learning_rate": 6.886858605728026e-06, + "loss": 0.0071, + "step": 136940 + }, + { + "epoch": 0.878354760400599, + "grad_norm": 0.02502746321260929, + "learning_rate": 6.886340273954425e-06, + "loss": 0.0016, + "step": 136950 + }, + { + "epoch": 0.8784188972943852, + "grad_norm": 0.072413869202137, + "learning_rate": 6.885821918543633e-06, + "loss": 0.0017, + "step": 136960 + }, + { + "epoch": 0.8784830341881712, + "grad_norm": 0.03558233380317688, + "learning_rate": 6.885303539502144e-06, + "loss": 0.0036, + "step": 136970 + }, + { + "epoch": 0.8785471710819573, + "grad_norm": 0.05169002711772919, + "learning_rate": 6.884785136836453e-06, + "loss": 0.0008, + "step": 136980 + }, + { + "epoch": 0.8786113079757434, + "grad_norm": 0.02297242172062397, + "learning_rate": 6.884266710553059e-06, + "loss": 0.0018, + "step": 136990 + }, + { + "epoch": 0.8786754448695295, + "grad_norm": 0.18363507091999054, + "learning_rate": 6.883748260658455e-06, + "loss": 0.0024, + "step": 137000 + }, + { + "epoch": 0.8787395817633156, + "grad_norm": 0.09243588894605637, + "learning_rate": 6.88322978715914e-06, + "loss": 0.0039, + "step": 137010 + }, + { + "epoch": 0.8788037186571017, + "grad_norm": 0.1842324137687683, + "learning_rate": 6.88271129006161e-06, + "loss": 0.0044, + "step": 137020 + }, + { + "epoch": 0.8788678555508879, + "grad_norm": 0.09502124041318893, + "learning_rate": 6.882192769372362e-06, + "loss": 0.0023, + "step": 137030 + }, + { + "epoch": 0.8789319924446739, + "grad_norm": 0.08689358085393906, + "learning_rate": 6.881674225097892e-06, + "loss": 0.0019, + "step": 137040 + }, + { + "epoch": 0.87899612933846, + "grad_norm": 0.03809691593050957, + "learning_rate": 6.881155657244702e-06, + "loss": 0.0015, + "step": 137050 + }, + { + "epoch": 0.8790602662322461, + "grad_norm": 0.13645468652248383, + "learning_rate": 6.880637065819284e-06, + "loss": 0.0043, + "step": 137060 + }, + { + "epoch": 0.8791244031260322, + "grad_norm": 0.14224575459957123, + "learning_rate": 6.880118450828142e-06, + "loss": 0.003, + "step": 137070 + }, + { + "epoch": 0.8791885400198183, + "grad_norm": 0.11373443156480789, + "learning_rate": 6.87959981227777e-06, + "loss": 0.0014, + "step": 137080 + }, + { + "epoch": 0.8792526769136044, + "grad_norm": 0.06638626754283905, + "learning_rate": 6.87908115017467e-06, + "loss": 0.003, + "step": 137090 + }, + { + "epoch": 0.8793168138073905, + "grad_norm": 0.0888357162475586, + "learning_rate": 6.87856246452534e-06, + "loss": 0.0025, + "step": 137100 + }, + { + "epoch": 0.8793809507011766, + "grad_norm": 0.26807594299316406, + "learning_rate": 6.8780437553362785e-06, + "loss": 0.0037, + "step": 137110 + }, + { + "epoch": 0.8794450875949626, + "grad_norm": 0.04233105480670929, + "learning_rate": 6.877525022613989e-06, + "loss": 0.0029, + "step": 137120 + }, + { + "epoch": 0.8795092244887488, + "grad_norm": 0.1305970698595047, + "learning_rate": 6.877006266364967e-06, + "loss": 0.0047, + "step": 137130 + }, + { + "epoch": 0.8795733613825348, + "grad_norm": 0.15733136236667633, + "learning_rate": 6.876487486595717e-06, + "loss": 0.0031, + "step": 137140 + }, + { + "epoch": 0.879637498276321, + "grad_norm": 0.023431070148944855, + "learning_rate": 6.875968683312737e-06, + "loss": 0.0015, + "step": 137150 + }, + { + "epoch": 0.879701635170107, + "grad_norm": 0.34440213441848755, + "learning_rate": 6.875449856522529e-06, + "loss": 0.0016, + "step": 137160 + }, + { + "epoch": 0.8797657720638932, + "grad_norm": 0.09305664151906967, + "learning_rate": 6.874931006231593e-06, + "loss": 0.0029, + "step": 137170 + }, + { + "epoch": 0.8798299089576793, + "grad_norm": 0.07814795523881912, + "learning_rate": 6.874412132446432e-06, + "loss": 0.0018, + "step": 137180 + }, + { + "epoch": 0.8798940458514654, + "grad_norm": 0.018024472519755363, + "learning_rate": 6.8738932351735465e-06, + "loss": 0.0066, + "step": 137190 + }, + { + "epoch": 0.8799581827452515, + "grad_norm": 0.06308527290821075, + "learning_rate": 6.873374314419441e-06, + "loss": 0.0024, + "step": 137200 + }, + { + "epoch": 0.8800223196390375, + "grad_norm": 0.06728116422891617, + "learning_rate": 6.872855370190615e-06, + "loss": 0.0018, + "step": 137210 + }, + { + "epoch": 0.8800864565328237, + "grad_norm": 0.12315388023853302, + "learning_rate": 6.8723364024935745e-06, + "loss": 0.0035, + "step": 137220 + }, + { + "epoch": 0.8801505934266097, + "grad_norm": 0.15933430194854736, + "learning_rate": 6.871817411334819e-06, + "loss": 0.0036, + "step": 137230 + }, + { + "epoch": 0.8802147303203959, + "grad_norm": 0.09797076880931854, + "learning_rate": 6.871298396720855e-06, + "loss": 0.0011, + "step": 137240 + }, + { + "epoch": 0.8802788672141819, + "grad_norm": 0.0730673298239708, + "learning_rate": 6.870779358658183e-06, + "loss": 0.0021, + "step": 137250 + }, + { + "epoch": 0.8803430041079681, + "grad_norm": 0.15262871980667114, + "learning_rate": 6.870260297153309e-06, + "loss": 0.0021, + "step": 137260 + }, + { + "epoch": 0.8804071410017541, + "grad_norm": 0.11767885833978653, + "learning_rate": 6.869741212212738e-06, + "loss": 0.0029, + "step": 137270 + }, + { + "epoch": 0.8804712778955402, + "grad_norm": 0.2819925844669342, + "learning_rate": 6.869222103842972e-06, + "loss": 0.0037, + "step": 137280 + }, + { + "epoch": 0.8805354147893263, + "grad_norm": 0.02453608252108097, + "learning_rate": 6.868702972050518e-06, + "loss": 0.0021, + "step": 137290 + }, + { + "epoch": 0.8805995516831124, + "grad_norm": 0.127029150724411, + "learning_rate": 6.86818381684188e-06, + "loss": 0.0013, + "step": 137300 + }, + { + "epoch": 0.8806636885768986, + "grad_norm": 0.04599674418568611, + "learning_rate": 6.8676646382235635e-06, + "loss": 0.002, + "step": 137310 + }, + { + "epoch": 0.8807278254706846, + "grad_norm": 0.07534373551607132, + "learning_rate": 6.867145436202074e-06, + "loss": 0.0037, + "step": 137320 + }, + { + "epoch": 0.8807919623644708, + "grad_norm": 0.2671842575073242, + "learning_rate": 6.866626210783918e-06, + "loss": 0.0034, + "step": 137330 + }, + { + "epoch": 0.8808560992582568, + "grad_norm": 0.16933603584766388, + "learning_rate": 6.866106961975602e-06, + "loss": 0.0061, + "step": 137340 + }, + { + "epoch": 0.880920236152043, + "grad_norm": 0.05069068819284439, + "learning_rate": 6.865587689783631e-06, + "loss": 0.0038, + "step": 137350 + }, + { + "epoch": 0.880984373045829, + "grad_norm": 0.0438336543738842, + "learning_rate": 6.865068394214514e-06, + "loss": 0.0012, + "step": 137360 + }, + { + "epoch": 0.8810485099396151, + "grad_norm": 0.10937488824129105, + "learning_rate": 6.8645490752747575e-06, + "loss": 0.0014, + "step": 137370 + }, + { + "epoch": 0.8811126468334012, + "grad_norm": 0.20446348190307617, + "learning_rate": 6.864029732970867e-06, + "loss": 0.0015, + "step": 137380 + }, + { + "epoch": 0.8811767837271873, + "grad_norm": 0.02750535123050213, + "learning_rate": 6.863510367309353e-06, + "loss": 0.002, + "step": 137390 + }, + { + "epoch": 0.8812409206209734, + "grad_norm": 0.10042654722929001, + "learning_rate": 6.862990978296722e-06, + "loss": 0.0021, + "step": 137400 + }, + { + "epoch": 0.8813050575147595, + "grad_norm": 0.01718439906835556, + "learning_rate": 6.862471565939482e-06, + "loss": 0.0025, + "step": 137410 + }, + { + "epoch": 0.8813691944085456, + "grad_norm": 0.33396032452583313, + "learning_rate": 6.861952130244143e-06, + "loss": 0.0027, + "step": 137420 + }, + { + "epoch": 0.8814333313023317, + "grad_norm": 0.12118028104305267, + "learning_rate": 6.861432671217212e-06, + "loss": 0.0016, + "step": 137430 + }, + { + "epoch": 0.8814974681961177, + "grad_norm": 0.20148040354251862, + "learning_rate": 6.8609131888652e-06, + "loss": 0.0034, + "step": 137440 + }, + { + "epoch": 0.8815616050899039, + "grad_norm": 0.08073599636554718, + "learning_rate": 6.8603936831946165e-06, + "loss": 0.0022, + "step": 137450 + }, + { + "epoch": 0.88162574198369, + "grad_norm": 0.22954554855823517, + "learning_rate": 6.859874154211969e-06, + "loss": 0.0032, + "step": 137460 + }, + { + "epoch": 0.8816898788774761, + "grad_norm": 0.07966429740190506, + "learning_rate": 6.859354601923769e-06, + "loss": 0.0022, + "step": 137470 + }, + { + "epoch": 0.8817540157712622, + "grad_norm": 0.19060322642326355, + "learning_rate": 6.858835026336529e-06, + "loss": 0.002, + "step": 137480 + }, + { + "epoch": 0.8818181526650483, + "grad_norm": 0.05918464809656143, + "learning_rate": 6.858315427456755e-06, + "loss": 0.0024, + "step": 137490 + }, + { + "epoch": 0.8818822895588344, + "grad_norm": 0.14439141750335693, + "learning_rate": 6.857795805290963e-06, + "loss": 0.0025, + "step": 137500 + }, + { + "epoch": 0.8819464264526204, + "grad_norm": 0.042854391038417816, + "learning_rate": 6.857276159845661e-06, + "loss": 0.0023, + "step": 137510 + }, + { + "epoch": 0.8820105633464066, + "grad_norm": 0.12424889206886292, + "learning_rate": 6.856756491127361e-06, + "loss": 0.0035, + "step": 137520 + }, + { + "epoch": 0.8820747002401926, + "grad_norm": 0.2014615684747696, + "learning_rate": 6.856236799142575e-06, + "loss": 0.0019, + "step": 137530 + }, + { + "epoch": 0.8821388371339788, + "grad_norm": 0.06214706599712372, + "learning_rate": 6.855717083897817e-06, + "loss": 0.0027, + "step": 137540 + }, + { + "epoch": 0.8822029740277648, + "grad_norm": 0.09805779159069061, + "learning_rate": 6.8551973453995954e-06, + "loss": 0.0013, + "step": 137550 + }, + { + "epoch": 0.882267110921551, + "grad_norm": 0.025579053908586502, + "learning_rate": 6.8546775836544264e-06, + "loss": 0.0014, + "step": 137560 + }, + { + "epoch": 0.882331247815337, + "grad_norm": 0.15716120600700378, + "learning_rate": 6.854157798668821e-06, + "loss": 0.0051, + "step": 137570 + }, + { + "epoch": 0.8823953847091232, + "grad_norm": 0.11696451902389526, + "learning_rate": 6.853637990449294e-06, + "loss": 0.0024, + "step": 137580 + }, + { + "epoch": 0.8824595216029093, + "grad_norm": 0.06991618126630783, + "learning_rate": 6.853118159002357e-06, + "loss": 0.003, + "step": 137590 + }, + { + "epoch": 0.8825236584966953, + "grad_norm": 0.18459399044513702, + "learning_rate": 6.852598304334528e-06, + "loss": 0.0023, + "step": 137600 + }, + { + "epoch": 0.8825877953904815, + "grad_norm": 0.07764824479818344, + "learning_rate": 6.852078426452315e-06, + "loss": 0.0016, + "step": 137610 + }, + { + "epoch": 0.8826519322842675, + "grad_norm": 0.04978380724787712, + "learning_rate": 6.851558525362236e-06, + "loss": 0.0023, + "step": 137620 + }, + { + "epoch": 0.8827160691780537, + "grad_norm": 0.08767449855804443, + "learning_rate": 6.851038601070808e-06, + "loss": 0.0009, + "step": 137630 + }, + { + "epoch": 0.8827802060718397, + "grad_norm": 0.009009142406284809, + "learning_rate": 6.8505186535845405e-06, + "loss": 0.002, + "step": 137640 + }, + { + "epoch": 0.8828443429656259, + "grad_norm": 0.2602931261062622, + "learning_rate": 6.849998682909953e-06, + "loss": 0.0027, + "step": 137650 + }, + { + "epoch": 0.8829084798594119, + "grad_norm": 0.2864200174808502, + "learning_rate": 6.849478689053559e-06, + "loss": 0.0057, + "step": 137660 + }, + { + "epoch": 0.882972616753198, + "grad_norm": 0.08893916755914688, + "learning_rate": 6.848958672021877e-06, + "loss": 0.0071, + "step": 137670 + }, + { + "epoch": 0.8830367536469841, + "grad_norm": 0.1800779104232788, + "learning_rate": 6.848438631821419e-06, + "loss": 0.0037, + "step": 137680 + }, + { + "epoch": 0.8831008905407702, + "grad_norm": 0.06310734897851944, + "learning_rate": 6.847918568458707e-06, + "loss": 0.0032, + "step": 137690 + }, + { + "epoch": 0.8831650274345563, + "grad_norm": 0.13706275820732117, + "learning_rate": 6.84739848194025e-06, + "loss": 0.003, + "step": 137700 + }, + { + "epoch": 0.8832291643283424, + "grad_norm": 0.031421709805727005, + "learning_rate": 6.846878372272574e-06, + "loss": 0.0024, + "step": 137710 + }, + { + "epoch": 0.8832933012221285, + "grad_norm": 0.10083822906017303, + "learning_rate": 6.8463582394621895e-06, + "loss": 0.0024, + "step": 137720 + }, + { + "epoch": 0.8833574381159146, + "grad_norm": 0.32716020941734314, + "learning_rate": 6.845838083515619e-06, + "loss": 0.0048, + "step": 137730 + }, + { + "epoch": 0.8834215750097008, + "grad_norm": 0.07156256586313248, + "learning_rate": 6.8453179044393755e-06, + "loss": 0.0017, + "step": 137740 + }, + { + "epoch": 0.8834857119034868, + "grad_norm": 0.6506041884422302, + "learning_rate": 6.844797702239983e-06, + "loss": 0.002, + "step": 137750 + }, + { + "epoch": 0.8835498487972729, + "grad_norm": 0.12119151651859283, + "learning_rate": 6.844277476923954e-06, + "loss": 0.0036, + "step": 137760 + }, + { + "epoch": 0.883613985691059, + "grad_norm": 0.008514746092259884, + "learning_rate": 6.843757228497811e-06, + "loss": 0.0012, + "step": 137770 + }, + { + "epoch": 0.8836781225848451, + "grad_norm": 0.13601401448249817, + "learning_rate": 6.843236956968072e-06, + "loss": 0.0032, + "step": 137780 + }, + { + "epoch": 0.8837422594786312, + "grad_norm": 0.320917546749115, + "learning_rate": 6.842716662341258e-06, + "loss": 0.003, + "step": 137790 + }, + { + "epoch": 0.8838063963724173, + "grad_norm": 0.052433934062719345, + "learning_rate": 6.842196344623886e-06, + "loss": 0.0021, + "step": 137800 + }, + { + "epoch": 0.8838705332662034, + "grad_norm": 0.12566226720809937, + "learning_rate": 6.841676003822477e-06, + "loss": 0.0019, + "step": 137810 + }, + { + "epoch": 0.8839346701599895, + "grad_norm": 0.10092577338218689, + "learning_rate": 6.841155639943552e-06, + "loss": 0.0063, + "step": 137820 + }, + { + "epoch": 0.8839988070537755, + "grad_norm": 0.044915057718753815, + "learning_rate": 6.84063525299363e-06, + "loss": 0.0025, + "step": 137830 + }, + { + "epoch": 0.8840629439475617, + "grad_norm": 0.11170393228530884, + "learning_rate": 6.8401148429792355e-06, + "loss": 0.0022, + "step": 137840 + }, + { + "epoch": 0.8841270808413477, + "grad_norm": 0.08210773766040802, + "learning_rate": 6.8395944099068835e-06, + "loss": 0.0016, + "step": 137850 + }, + { + "epoch": 0.8841912177351339, + "grad_norm": 0.12872573733329773, + "learning_rate": 6.839073953783101e-06, + "loss": 0.002, + "step": 137860 + }, + { + "epoch": 0.88425535462892, + "grad_norm": 0.1600671112537384, + "learning_rate": 6.838553474614407e-06, + "loss": 0.0035, + "step": 137870 + }, + { + "epoch": 0.8843194915227061, + "grad_norm": 0.06768742203712463, + "learning_rate": 6.838032972407324e-06, + "loss": 0.0025, + "step": 137880 + }, + { + "epoch": 0.8843836284164922, + "grad_norm": 0.04691407456994057, + "learning_rate": 6.837512447168373e-06, + "loss": 0.0023, + "step": 137890 + }, + { + "epoch": 0.8844477653102782, + "grad_norm": 0.06426296383142471, + "learning_rate": 6.83699189890408e-06, + "loss": 0.0026, + "step": 137900 + }, + { + "epoch": 0.8845119022040644, + "grad_norm": 0.24463340640068054, + "learning_rate": 6.836471327620964e-06, + "loss": 0.0025, + "step": 137910 + }, + { + "epoch": 0.8845760390978504, + "grad_norm": 0.07521167397499084, + "learning_rate": 6.8359507333255505e-06, + "loss": 0.0029, + "step": 137920 + }, + { + "epoch": 0.8846401759916366, + "grad_norm": 0.19357682764530182, + "learning_rate": 6.835430116024362e-06, + "loss": 0.0022, + "step": 137930 + }, + { + "epoch": 0.8847043128854226, + "grad_norm": 0.11128903925418854, + "learning_rate": 6.834909475723923e-06, + "loss": 0.0074, + "step": 137940 + }, + { + "epoch": 0.8847684497792088, + "grad_norm": 0.09577079862356186, + "learning_rate": 6.834388812430756e-06, + "loss": 0.0024, + "step": 137950 + }, + { + "epoch": 0.8848325866729948, + "grad_norm": 0.07646466046571732, + "learning_rate": 6.833868126151385e-06, + "loss": 0.002, + "step": 137960 + }, + { + "epoch": 0.884896723566781, + "grad_norm": 0.020462574437260628, + "learning_rate": 6.833347416892338e-06, + "loss": 0.0027, + "step": 137970 + }, + { + "epoch": 0.884960860460567, + "grad_norm": 0.16638454794883728, + "learning_rate": 6.832826684660137e-06, + "loss": 0.0035, + "step": 137980 + }, + { + "epoch": 0.8850249973543531, + "grad_norm": 0.14669720828533173, + "learning_rate": 6.832305929461307e-06, + "loss": 0.0025, + "step": 137990 + }, + { + "epoch": 0.8850891342481392, + "grad_norm": 0.09566580504179001, + "learning_rate": 6.831785151302373e-06, + "loss": 0.0032, + "step": 138000 + }, + { + "epoch": 0.8851532711419253, + "grad_norm": 0.04054740443825722, + "learning_rate": 6.831264350189866e-06, + "loss": 0.0031, + "step": 138010 + }, + { + "epoch": 0.8852174080357115, + "grad_norm": 0.15328174829483032, + "learning_rate": 6.8307435261303035e-06, + "loss": 0.0029, + "step": 138020 + }, + { + "epoch": 0.8852815449294975, + "grad_norm": 0.11700726300477982, + "learning_rate": 6.830222679130219e-06, + "loss": 0.0023, + "step": 138030 + }, + { + "epoch": 0.8853456818232837, + "grad_norm": 0.10539217293262482, + "learning_rate": 6.829701809196136e-06, + "loss": 0.0027, + "step": 138040 + }, + { + "epoch": 0.8854098187170697, + "grad_norm": 0.2931744158267975, + "learning_rate": 6.82918091633458e-06, + "loss": 0.0031, + "step": 138050 + }, + { + "epoch": 0.8854739556108558, + "grad_norm": 0.07683542370796204, + "learning_rate": 6.828660000552081e-06, + "loss": 0.0023, + "step": 138060 + }, + { + "epoch": 0.8855380925046419, + "grad_norm": 0.557079017162323, + "learning_rate": 6.828139061855165e-06, + "loss": 0.002, + "step": 138070 + }, + { + "epoch": 0.885602229398428, + "grad_norm": 0.08445441722869873, + "learning_rate": 6.82761810025036e-06, + "loss": 0.0065, + "step": 138080 + }, + { + "epoch": 0.8856663662922141, + "grad_norm": 0.10118082910776138, + "learning_rate": 6.827097115744195e-06, + "loss": 0.0017, + "step": 138090 + }, + { + "epoch": 0.8857305031860002, + "grad_norm": 0.09960382431745529, + "learning_rate": 6.826576108343195e-06, + "loss": 0.0028, + "step": 138100 + }, + { + "epoch": 0.8857946400797863, + "grad_norm": 0.12642377614974976, + "learning_rate": 6.826055078053893e-06, + "loss": 0.0026, + "step": 138110 + }, + { + "epoch": 0.8858587769735724, + "grad_norm": 0.20992155373096466, + "learning_rate": 6.825534024882815e-06, + "loss": 0.0026, + "step": 138120 + }, + { + "epoch": 0.8859229138673584, + "grad_norm": 0.1381525695323944, + "learning_rate": 6.8250129488364915e-06, + "loss": 0.0017, + "step": 138130 + }, + { + "epoch": 0.8859870507611446, + "grad_norm": 0.056542061269283295, + "learning_rate": 6.824491849921451e-06, + "loss": 0.0022, + "step": 138140 + }, + { + "epoch": 0.8860511876549307, + "grad_norm": 0.06584298610687256, + "learning_rate": 6.823970728144225e-06, + "loss": 0.0021, + "step": 138150 + }, + { + "epoch": 0.8861153245487168, + "grad_norm": 0.13849897682666779, + "learning_rate": 6.823449583511339e-06, + "loss": 0.0022, + "step": 138160 + }, + { + "epoch": 0.8861794614425029, + "grad_norm": 0.09438132494688034, + "learning_rate": 6.822928416029329e-06, + "loss": 0.0037, + "step": 138170 + }, + { + "epoch": 0.886243598336289, + "grad_norm": 0.08102027326822281, + "learning_rate": 6.8224072257047225e-06, + "loss": 0.0022, + "step": 138180 + }, + { + "epoch": 0.8863077352300751, + "grad_norm": 0.1010395735502243, + "learning_rate": 6.821886012544051e-06, + "loss": 0.0023, + "step": 138190 + }, + { + "epoch": 0.8863718721238611, + "grad_norm": 0.18256144225597382, + "learning_rate": 6.8213647765538475e-06, + "loss": 0.0019, + "step": 138200 + }, + { + "epoch": 0.8864360090176473, + "grad_norm": 0.08884212374687195, + "learning_rate": 6.820843517740638e-06, + "loss": 0.0017, + "step": 138210 + }, + { + "epoch": 0.8865001459114333, + "grad_norm": 0.20112930238246918, + "learning_rate": 6.820322236110961e-06, + "loss": 0.0021, + "step": 138220 + }, + { + "epoch": 0.8865642828052195, + "grad_norm": 0.09102907031774521, + "learning_rate": 6.8198009316713435e-06, + "loss": 0.0019, + "step": 138230 + }, + { + "epoch": 0.8866284196990055, + "grad_norm": 0.05903204157948494, + "learning_rate": 6.819279604428322e-06, + "loss": 0.0019, + "step": 138240 + }, + { + "epoch": 0.8866925565927917, + "grad_norm": 0.2465900033712387, + "learning_rate": 6.818758254388424e-06, + "loss": 0.0047, + "step": 138250 + }, + { + "epoch": 0.8867566934865777, + "grad_norm": 0.10835077613592148, + "learning_rate": 6.818236881558187e-06, + "loss": 0.0023, + "step": 138260 + }, + { + "epoch": 0.8868208303803639, + "grad_norm": 0.03426966816186905, + "learning_rate": 6.817715485944142e-06, + "loss": 0.002, + "step": 138270 + }, + { + "epoch": 0.8868849672741499, + "grad_norm": 0.049255553632974625, + "learning_rate": 6.817194067552824e-06, + "loss": 0.0028, + "step": 138280 + }, + { + "epoch": 0.886949104167936, + "grad_norm": 0.06245843693614006, + "learning_rate": 6.816672626390763e-06, + "loss": 0.0017, + "step": 138290 + }, + { + "epoch": 0.8870132410617222, + "grad_norm": 0.14906224608421326, + "learning_rate": 6.816151162464498e-06, + "loss": 0.0026, + "step": 138300 + }, + { + "epoch": 0.8870773779555082, + "grad_norm": 0.10730446875095367, + "learning_rate": 6.815629675780559e-06, + "loss": 0.0021, + "step": 138310 + }, + { + "epoch": 0.8871415148492944, + "grad_norm": 0.12750211358070374, + "learning_rate": 6.815108166345483e-06, + "loss": 0.0029, + "step": 138320 + }, + { + "epoch": 0.8872056517430804, + "grad_norm": 0.0802149772644043, + "learning_rate": 6.814586634165806e-06, + "loss": 0.0017, + "step": 138330 + }, + { + "epoch": 0.8872697886368666, + "grad_norm": 0.3117047846317291, + "learning_rate": 6.81406507924806e-06, + "loss": 0.0034, + "step": 138340 + }, + { + "epoch": 0.8873339255306526, + "grad_norm": 0.03203052654862404, + "learning_rate": 6.813543501598784e-06, + "loss": 0.0025, + "step": 138350 + }, + { + "epoch": 0.8873980624244387, + "grad_norm": 0.2867416739463806, + "learning_rate": 6.81302190122451e-06, + "loss": 0.004, + "step": 138360 + }, + { + "epoch": 0.8874621993182248, + "grad_norm": 0.1543174386024475, + "learning_rate": 6.812500278131776e-06, + "loss": 0.0024, + "step": 138370 + }, + { + "epoch": 0.8875263362120109, + "grad_norm": 0.07901715487241745, + "learning_rate": 6.811978632327119e-06, + "loss": 0.0023, + "step": 138380 + }, + { + "epoch": 0.887590473105797, + "grad_norm": 0.06686260551214218, + "learning_rate": 6.811456963817075e-06, + "loss": 0.0023, + "step": 138390 + }, + { + "epoch": 0.8876546099995831, + "grad_norm": 0.12487843632698059, + "learning_rate": 6.810935272608179e-06, + "loss": 0.0059, + "step": 138400 + }, + { + "epoch": 0.8877187468933692, + "grad_norm": 0.06749287992715836, + "learning_rate": 6.8104135587069704e-06, + "loss": 0.0014, + "step": 138410 + }, + { + "epoch": 0.8877828837871553, + "grad_norm": 0.05625376105308533, + "learning_rate": 6.8098918221199864e-06, + "loss": 0.0024, + "step": 138420 + }, + { + "epoch": 0.8878470206809415, + "grad_norm": 0.044884275645017624, + "learning_rate": 6.809370062853764e-06, + "loss": 0.0008, + "step": 138430 + }, + { + "epoch": 0.8879111575747275, + "grad_norm": 0.23536013066768646, + "learning_rate": 6.808848280914842e-06, + "loss": 0.003, + "step": 138440 + }, + { + "epoch": 0.8879752944685136, + "grad_norm": 0.06617274135351181, + "learning_rate": 6.808326476309759e-06, + "loss": 0.0022, + "step": 138450 + }, + { + "epoch": 0.8880394313622997, + "grad_norm": 0.05690762773156166, + "learning_rate": 6.80780464904505e-06, + "loss": 0.0013, + "step": 138460 + }, + { + "epoch": 0.8881035682560858, + "grad_norm": 0.13122613728046417, + "learning_rate": 6.807282799127259e-06, + "loss": 0.0034, + "step": 138470 + }, + { + "epoch": 0.8881677051498719, + "grad_norm": 0.00365577545017004, + "learning_rate": 6.806760926562922e-06, + "loss": 0.0015, + "step": 138480 + }, + { + "epoch": 0.888231842043658, + "grad_norm": 0.05660828575491905, + "learning_rate": 6.80623903135858e-06, + "loss": 0.0019, + "step": 138490 + }, + { + "epoch": 0.888295978937444, + "grad_norm": 0.29124340415000916, + "learning_rate": 6.805717113520771e-06, + "loss": 0.0024, + "step": 138500 + }, + { + "epoch": 0.8883601158312302, + "grad_norm": 0.06867077946662903, + "learning_rate": 6.805195173056039e-06, + "loss": 0.0028, + "step": 138510 + }, + { + "epoch": 0.8884242527250162, + "grad_norm": 0.18916812539100647, + "learning_rate": 6.804673209970918e-06, + "loss": 0.0032, + "step": 138520 + }, + { + "epoch": 0.8884883896188024, + "grad_norm": 0.06216156482696533, + "learning_rate": 6.804151224271954e-06, + "loss": 0.0039, + "step": 138530 + }, + { + "epoch": 0.8885525265125884, + "grad_norm": 0.24915002286434174, + "learning_rate": 6.803629215965684e-06, + "loss": 0.0026, + "step": 138540 + }, + { + "epoch": 0.8886166634063746, + "grad_norm": 0.09341257065534592, + "learning_rate": 6.803107185058651e-06, + "loss": 0.0022, + "step": 138550 + }, + { + "epoch": 0.8886808003001606, + "grad_norm": 0.05511720851063728, + "learning_rate": 6.802585131557398e-06, + "loss": 0.0009, + "step": 138560 + }, + { + "epoch": 0.8887449371939468, + "grad_norm": 0.13346849381923676, + "learning_rate": 6.802063055468464e-06, + "loss": 0.0026, + "step": 138570 + }, + { + "epoch": 0.8888090740877329, + "grad_norm": 0.1426878720521927, + "learning_rate": 6.801540956798393e-06, + "loss": 0.0039, + "step": 138580 + }, + { + "epoch": 0.888873210981519, + "grad_norm": 0.02670944854617119, + "learning_rate": 6.8010188355537245e-06, + "loss": 0.0012, + "step": 138590 + }, + { + "epoch": 0.8889373478753051, + "grad_norm": 0.12257665395736694, + "learning_rate": 6.800496691741004e-06, + "loss": 0.0042, + "step": 138600 + }, + { + "epoch": 0.8890014847690911, + "grad_norm": 0.12118265777826309, + "learning_rate": 6.799974525366773e-06, + "loss": 0.0026, + "step": 138610 + }, + { + "epoch": 0.8890656216628773, + "grad_norm": 0.058597929775714874, + "learning_rate": 6.7994523364375744e-06, + "loss": 0.0019, + "step": 138620 + }, + { + "epoch": 0.8891297585566633, + "grad_norm": 0.09288131445646286, + "learning_rate": 6.798930124959952e-06, + "loss": 0.0043, + "step": 138630 + }, + { + "epoch": 0.8891938954504495, + "grad_norm": 0.18174026906490326, + "learning_rate": 6.79840789094045e-06, + "loss": 0.002, + "step": 138640 + }, + { + "epoch": 0.8892580323442355, + "grad_norm": 0.049296993762254715, + "learning_rate": 6.797885634385612e-06, + "loss": 0.0033, + "step": 138650 + }, + { + "epoch": 0.8893221692380217, + "grad_norm": 0.07419116795063019, + "learning_rate": 6.797363355301981e-06, + "loss": 0.0035, + "step": 138660 + }, + { + "epoch": 0.8893863061318077, + "grad_norm": 0.17123816907405853, + "learning_rate": 6.796841053696102e-06, + "loss": 0.0036, + "step": 138670 + }, + { + "epoch": 0.8894504430255938, + "grad_norm": 0.08635471761226654, + "learning_rate": 6.796318729574522e-06, + "loss": 0.0026, + "step": 138680 + }, + { + "epoch": 0.8895145799193799, + "grad_norm": 0.15406401455402374, + "learning_rate": 6.7957963829437845e-06, + "loss": 0.0035, + "step": 138690 + }, + { + "epoch": 0.889578716813166, + "grad_norm": 0.08034338802099228, + "learning_rate": 6.795274013810435e-06, + "loss": 0.0023, + "step": 138700 + }, + { + "epoch": 0.8896428537069521, + "grad_norm": 0.11541954427957535, + "learning_rate": 6.794751622181018e-06, + "loss": 0.0024, + "step": 138710 + }, + { + "epoch": 0.8897069906007382, + "grad_norm": 0.15283158421516418, + "learning_rate": 6.794229208062081e-06, + "loss": 0.0023, + "step": 138720 + }, + { + "epoch": 0.8897711274945244, + "grad_norm": 0.044942475855350494, + "learning_rate": 6.79370677146017e-06, + "loss": 0.0034, + "step": 138730 + }, + { + "epoch": 0.8898352643883104, + "grad_norm": 0.08287149667739868, + "learning_rate": 6.793184312381831e-06, + "loss": 0.0031, + "step": 138740 + }, + { + "epoch": 0.8898994012820965, + "grad_norm": 0.07617488503456116, + "learning_rate": 6.792661830833611e-06, + "loss": 0.0016, + "step": 138750 + }, + { + "epoch": 0.8899635381758826, + "grad_norm": 0.13483263552188873, + "learning_rate": 6.792139326822056e-06, + "loss": 0.0044, + "step": 138760 + }, + { + "epoch": 0.8900276750696687, + "grad_norm": 0.16076132655143738, + "learning_rate": 6.791616800353716e-06, + "loss": 0.0019, + "step": 138770 + }, + { + "epoch": 0.8900918119634548, + "grad_norm": 0.10660827159881592, + "learning_rate": 6.791094251435137e-06, + "loss": 0.0021, + "step": 138780 + }, + { + "epoch": 0.8901559488572409, + "grad_norm": 0.1787632703781128, + "learning_rate": 6.7905716800728664e-06, + "loss": 0.0016, + "step": 138790 + }, + { + "epoch": 0.890220085751027, + "grad_norm": 0.04764917492866516, + "learning_rate": 6.7900490862734525e-06, + "loss": 0.0026, + "step": 138800 + }, + { + "epoch": 0.8902842226448131, + "grad_norm": 0.04433861002326012, + "learning_rate": 6.789526470043444e-06, + "loss": 0.0027, + "step": 138810 + }, + { + "epoch": 0.8903483595385991, + "grad_norm": 0.13696281611919403, + "learning_rate": 6.789003831389391e-06, + "loss": 0.0034, + "step": 138820 + }, + { + "epoch": 0.8904124964323853, + "grad_norm": 0.08294308930635452, + "learning_rate": 6.78848117031784e-06, + "loss": 0.0035, + "step": 138830 + }, + { + "epoch": 0.8904766333261713, + "grad_norm": 0.07750234752893448, + "learning_rate": 6.787958486835342e-06, + "loss": 0.005, + "step": 138840 + }, + { + "epoch": 0.8905407702199575, + "grad_norm": 0.1333630234003067, + "learning_rate": 6.787435780948448e-06, + "loss": 0.0021, + "step": 138850 + }, + { + "epoch": 0.8906049071137436, + "grad_norm": 0.3120582699775696, + "learning_rate": 6.786913052663705e-06, + "loss": 0.0024, + "step": 138860 + }, + { + "epoch": 0.8906690440075297, + "grad_norm": 0.21088089048862457, + "learning_rate": 6.786390301987664e-06, + "loss": 0.0021, + "step": 138870 + }, + { + "epoch": 0.8907331809013158, + "grad_norm": 0.14093145728111267, + "learning_rate": 6.785867528926877e-06, + "loss": 0.0044, + "step": 138880 + }, + { + "epoch": 0.8907973177951019, + "grad_norm": 0.12831830978393555, + "learning_rate": 6.785344733487892e-06, + "loss": 0.0045, + "step": 138890 + }, + { + "epoch": 0.890861454688888, + "grad_norm": 0.13694891333580017, + "learning_rate": 6.784821915677264e-06, + "loss": 0.0013, + "step": 138900 + }, + { + "epoch": 0.890925591582674, + "grad_norm": 0.09416618198156357, + "learning_rate": 6.784299075501539e-06, + "loss": 0.0016, + "step": 138910 + }, + { + "epoch": 0.8909897284764602, + "grad_norm": 0.2192876935005188, + "learning_rate": 6.7837762129672725e-06, + "loss": 0.0017, + "step": 138920 + }, + { + "epoch": 0.8910538653702462, + "grad_norm": 0.12382154911756516, + "learning_rate": 6.783253328081015e-06, + "loss": 0.0019, + "step": 138930 + }, + { + "epoch": 0.8911180022640324, + "grad_norm": 0.15769800543785095, + "learning_rate": 6.782730420849319e-06, + "loss": 0.002, + "step": 138940 + }, + { + "epoch": 0.8911821391578184, + "grad_norm": 0.017192568629980087, + "learning_rate": 6.782207491278738e-06, + "loss": 0.0024, + "step": 138950 + }, + { + "epoch": 0.8912462760516046, + "grad_norm": 0.07215219736099243, + "learning_rate": 6.781684539375822e-06, + "loss": 0.002, + "step": 138960 + }, + { + "epoch": 0.8913104129453906, + "grad_norm": 0.14700429141521454, + "learning_rate": 6.781161565147125e-06, + "loss": 0.0025, + "step": 138970 + }, + { + "epoch": 0.8913745498391767, + "grad_norm": 0.0878363698720932, + "learning_rate": 6.780638568599203e-06, + "loss": 0.0023, + "step": 138980 + }, + { + "epoch": 0.8914386867329628, + "grad_norm": 0.1149219200015068, + "learning_rate": 6.780115549738604e-06, + "loss": 0.003, + "step": 138990 + }, + { + "epoch": 0.8915028236267489, + "grad_norm": 0.16724520921707153, + "learning_rate": 6.779592508571886e-06, + "loss": 0.0014, + "step": 139000 + }, + { + "epoch": 0.8915669605205351, + "grad_norm": 0.2597067952156067, + "learning_rate": 6.779069445105603e-06, + "loss": 0.0025, + "step": 139010 + }, + { + "epoch": 0.8916310974143211, + "grad_norm": 0.08588556200265884, + "learning_rate": 6.7785463593463066e-06, + "loss": 0.0017, + "step": 139020 + }, + { + "epoch": 0.8916952343081073, + "grad_norm": 0.10413733124732971, + "learning_rate": 6.778023251300555e-06, + "loss": 0.0013, + "step": 139030 + }, + { + "epoch": 0.8917593712018933, + "grad_norm": 0.052861861884593964, + "learning_rate": 6.7775001209749005e-06, + "loss": 0.0018, + "step": 139040 + }, + { + "epoch": 0.8918235080956795, + "grad_norm": 0.2284519225358963, + "learning_rate": 6.776976968375899e-06, + "loss": 0.0041, + "step": 139050 + }, + { + "epoch": 0.8918876449894655, + "grad_norm": 0.21057339012622833, + "learning_rate": 6.776453793510106e-06, + "loss": 0.002, + "step": 139060 + }, + { + "epoch": 0.8919517818832516, + "grad_norm": 0.0584498755633831, + "learning_rate": 6.775930596384078e-06, + "loss": 0.0035, + "step": 139070 + }, + { + "epoch": 0.8920159187770377, + "grad_norm": 0.16429851949214935, + "learning_rate": 6.77540737700437e-06, + "loss": 0.0029, + "step": 139080 + }, + { + "epoch": 0.8920800556708238, + "grad_norm": 0.0653160810470581, + "learning_rate": 6.774884135377539e-06, + "loss": 0.0017, + "step": 139090 + }, + { + "epoch": 0.8921441925646099, + "grad_norm": 0.060079559683799744, + "learning_rate": 6.774360871510142e-06, + "loss": 0.0017, + "step": 139100 + }, + { + "epoch": 0.892208329458396, + "grad_norm": 0.049049098044633865, + "learning_rate": 6.773837585408734e-06, + "loss": 0.0015, + "step": 139110 + }, + { + "epoch": 0.892272466352182, + "grad_norm": 0.18999944627285004, + "learning_rate": 6.7733142770798735e-06, + "loss": 0.0031, + "step": 139120 + }, + { + "epoch": 0.8923366032459682, + "grad_norm": 0.15021562576293945, + "learning_rate": 6.772790946530118e-06, + "loss": 0.0018, + "step": 139130 + }, + { + "epoch": 0.8924007401397543, + "grad_norm": 0.2060428410768509, + "learning_rate": 6.7722675937660244e-06, + "loss": 0.002, + "step": 139140 + }, + { + "epoch": 0.8924648770335404, + "grad_norm": 0.12639163434505463, + "learning_rate": 6.771744218794152e-06, + "loss": 0.0016, + "step": 139150 + }, + { + "epoch": 0.8925290139273265, + "grad_norm": 0.27944356203079224, + "learning_rate": 6.771220821621057e-06, + "loss": 0.0028, + "step": 139160 + }, + { + "epoch": 0.8925931508211126, + "grad_norm": 0.1983378380537033, + "learning_rate": 6.770697402253299e-06, + "loss": 0.0028, + "step": 139170 + }, + { + "epoch": 0.8926572877148987, + "grad_norm": 0.16844449937343597, + "learning_rate": 6.770173960697439e-06, + "loss": 0.0026, + "step": 139180 + }, + { + "epoch": 0.8927214246086848, + "grad_norm": 0.1559169888496399, + "learning_rate": 6.769650496960033e-06, + "loss": 0.0015, + "step": 139190 + }, + { + "epoch": 0.8927855615024709, + "grad_norm": 0.01990179345011711, + "learning_rate": 6.769127011047642e-06, + "loss": 0.0013, + "step": 139200 + }, + { + "epoch": 0.8928496983962569, + "grad_norm": 0.07438834756612778, + "learning_rate": 6.7686035029668255e-06, + "loss": 0.0024, + "step": 139210 + }, + { + "epoch": 0.8929138352900431, + "grad_norm": 0.1177377924323082, + "learning_rate": 6.768079972724142e-06, + "loss": 0.0033, + "step": 139220 + }, + { + "epoch": 0.8929779721838291, + "grad_norm": 0.12049730867147446, + "learning_rate": 6.767556420326153e-06, + "loss": 0.0031, + "step": 139230 + }, + { + "epoch": 0.8930421090776153, + "grad_norm": 0.055463168770074844, + "learning_rate": 6.76703284577942e-06, + "loss": 0.0029, + "step": 139240 + }, + { + "epoch": 0.8931062459714013, + "grad_norm": 0.07392285764217377, + "learning_rate": 6.766509249090501e-06, + "loss": 0.0047, + "step": 139250 + }, + { + "epoch": 0.8931703828651875, + "grad_norm": 0.006894891150295734, + "learning_rate": 6.765985630265959e-06, + "loss": 0.0033, + "step": 139260 + }, + { + "epoch": 0.8932345197589735, + "grad_norm": 0.1036565899848938, + "learning_rate": 6.765461989312355e-06, + "loss": 0.0024, + "step": 139270 + }, + { + "epoch": 0.8932986566527596, + "grad_norm": 0.21890641748905182, + "learning_rate": 6.7649383262362524e-06, + "loss": 0.0027, + "step": 139280 + }, + { + "epoch": 0.8933627935465458, + "grad_norm": 0.0598384328186512, + "learning_rate": 6.764414641044208e-06, + "loss": 0.0028, + "step": 139290 + }, + { + "epoch": 0.8934269304403318, + "grad_norm": 0.10389411449432373, + "learning_rate": 6.763890933742789e-06, + "loss": 0.0029, + "step": 139300 + }, + { + "epoch": 0.893491067334118, + "grad_norm": 0.19174174964427948, + "learning_rate": 6.763367204338556e-06, + "loss": 0.0058, + "step": 139310 + }, + { + "epoch": 0.893555204227904, + "grad_norm": 0.054285917431116104, + "learning_rate": 6.762843452838072e-06, + "loss": 0.0016, + "step": 139320 + }, + { + "epoch": 0.8936193411216902, + "grad_norm": 0.08225361257791519, + "learning_rate": 6.762319679247898e-06, + "loss": 0.0039, + "step": 139330 + }, + { + "epoch": 0.8936834780154762, + "grad_norm": 0.17472833395004272, + "learning_rate": 6.7617958835746015e-06, + "loss": 0.0039, + "step": 139340 + }, + { + "epoch": 0.8937476149092624, + "grad_norm": 0.0652933418750763, + "learning_rate": 6.761272065824741e-06, + "loss": 0.0015, + "step": 139350 + }, + { + "epoch": 0.8938117518030484, + "grad_norm": 0.1431877613067627, + "learning_rate": 6.760748226004884e-06, + "loss": 0.0025, + "step": 139360 + }, + { + "epoch": 0.8938758886968345, + "grad_norm": 0.2542361915111542, + "learning_rate": 6.760224364121592e-06, + "loss": 0.0022, + "step": 139370 + }, + { + "epoch": 0.8939400255906206, + "grad_norm": 0.11593648046255112, + "learning_rate": 6.759700480181432e-06, + "loss": 0.0022, + "step": 139380 + }, + { + "epoch": 0.8940041624844067, + "grad_norm": 0.12163574993610382, + "learning_rate": 6.7591765741909665e-06, + "loss": 0.0023, + "step": 139390 + }, + { + "epoch": 0.8940682993781928, + "grad_norm": 0.14666952192783356, + "learning_rate": 6.758652646156763e-06, + "loss": 0.0038, + "step": 139400 + }, + { + "epoch": 0.8941324362719789, + "grad_norm": 0.14081643521785736, + "learning_rate": 6.758128696085383e-06, + "loss": 0.0016, + "step": 139410 + }, + { + "epoch": 0.8941965731657651, + "grad_norm": 0.10789891332387924, + "learning_rate": 6.757604723983394e-06, + "loss": 0.0015, + "step": 139420 + }, + { + "epoch": 0.8942607100595511, + "grad_norm": 0.07813584059476852, + "learning_rate": 6.757080729857364e-06, + "loss": 0.0021, + "step": 139430 + }, + { + "epoch": 0.8943248469533372, + "grad_norm": 0.10379879176616669, + "learning_rate": 6.756556713713853e-06, + "loss": 0.0019, + "step": 139440 + }, + { + "epoch": 0.8943889838471233, + "grad_norm": 0.12771467864513397, + "learning_rate": 6.756032675559434e-06, + "loss": 0.0014, + "step": 139450 + }, + { + "epoch": 0.8944531207409094, + "grad_norm": 0.10882225632667542, + "learning_rate": 6.75550861540067e-06, + "loss": 0.0021, + "step": 139460 + }, + { + "epoch": 0.8945172576346955, + "grad_norm": 0.08284783363342285, + "learning_rate": 6.754984533244128e-06, + "loss": 0.0061, + "step": 139470 + }, + { + "epoch": 0.8945813945284816, + "grad_norm": 0.11675601452589035, + "learning_rate": 6.754460429096374e-06, + "loss": 0.0021, + "step": 139480 + }, + { + "epoch": 0.8946455314222677, + "grad_norm": 0.1882113367319107, + "learning_rate": 6.753936302963979e-06, + "loss": 0.0025, + "step": 139490 + }, + { + "epoch": 0.8947096683160538, + "grad_norm": 0.4749677777290344, + "learning_rate": 6.753412154853508e-06, + "loss": 0.0023, + "step": 139500 + }, + { + "epoch": 0.8947738052098398, + "grad_norm": 0.16558413207530975, + "learning_rate": 6.75288798477153e-06, + "loss": 0.002, + "step": 139510 + }, + { + "epoch": 0.894837942103626, + "grad_norm": 0.13556015491485596, + "learning_rate": 6.752363792724612e-06, + "loss": 0.0021, + "step": 139520 + }, + { + "epoch": 0.894902078997412, + "grad_norm": 0.12252263724803925, + "learning_rate": 6.751839578719324e-06, + "loss": 0.0028, + "step": 139530 + }, + { + "epoch": 0.8949662158911982, + "grad_norm": 0.06946799159049988, + "learning_rate": 6.751315342762234e-06, + "loss": 0.002, + "step": 139540 + }, + { + "epoch": 0.8950303527849842, + "grad_norm": 0.0039226580411195755, + "learning_rate": 6.7507910848599105e-06, + "loss": 0.0019, + "step": 139550 + }, + { + "epoch": 0.8950944896787704, + "grad_norm": 0.1051669493317604, + "learning_rate": 6.750266805018924e-06, + "loss": 0.002, + "step": 139560 + }, + { + "epoch": 0.8951586265725565, + "grad_norm": 0.13654948770999908, + "learning_rate": 6.749742503245843e-06, + "loss": 0.0018, + "step": 139570 + }, + { + "epoch": 0.8952227634663426, + "grad_norm": 0.08528295904397964, + "learning_rate": 6.749218179547239e-06, + "loss": 0.0011, + "step": 139580 + }, + { + "epoch": 0.8952869003601287, + "grad_norm": 0.07897262275218964, + "learning_rate": 6.748693833929679e-06, + "loss": 0.0053, + "step": 139590 + }, + { + "epoch": 0.8953510372539147, + "grad_norm": 0.05403195694088936, + "learning_rate": 6.748169466399738e-06, + "loss": 0.0027, + "step": 139600 + }, + { + "epoch": 0.8954151741477009, + "grad_norm": 0.13913075625896454, + "learning_rate": 6.747645076963981e-06, + "loss": 0.0023, + "step": 139610 + }, + { + "epoch": 0.8954793110414869, + "grad_norm": 0.050196729600429535, + "learning_rate": 6.747120665628985e-06, + "loss": 0.0024, + "step": 139620 + }, + { + "epoch": 0.8955434479352731, + "grad_norm": 0.12445234507322311, + "learning_rate": 6.746596232401316e-06, + "loss": 0.0025, + "step": 139630 + }, + { + "epoch": 0.8956075848290591, + "grad_norm": 0.06994674354791641, + "learning_rate": 6.746071777287551e-06, + "loss": 0.0017, + "step": 139640 + }, + { + "epoch": 0.8956717217228453, + "grad_norm": 0.014794007875025272, + "learning_rate": 6.745547300294256e-06, + "loss": 0.0021, + "step": 139650 + }, + { + "epoch": 0.8957358586166313, + "grad_norm": 0.1464836597442627, + "learning_rate": 6.745022801428007e-06, + "loss": 0.0028, + "step": 139660 + }, + { + "epoch": 0.8957999955104174, + "grad_norm": 0.22069483995437622, + "learning_rate": 6.744498280695375e-06, + "loss": 0.0035, + "step": 139670 + }, + { + "epoch": 0.8958641324042035, + "grad_norm": 0.04780168458819389, + "learning_rate": 6.7439737381029335e-06, + "loss": 0.0035, + "step": 139680 + }, + { + "epoch": 0.8959282692979896, + "grad_norm": 0.14168903231620789, + "learning_rate": 6.743449173657254e-06, + "loss": 0.0065, + "step": 139690 + }, + { + "epoch": 0.8959924061917758, + "grad_norm": 0.04651365801692009, + "learning_rate": 6.742924587364911e-06, + "loss": 0.0021, + "step": 139700 + }, + { + "epoch": 0.8960565430855618, + "grad_norm": 0.05780575051903725, + "learning_rate": 6.742399979232477e-06, + "loss": 0.0026, + "step": 139710 + }, + { + "epoch": 0.896120679979348, + "grad_norm": 0.08799566328525543, + "learning_rate": 6.741875349266525e-06, + "loss": 0.0025, + "step": 139720 + }, + { + "epoch": 0.896184816873134, + "grad_norm": 0.1450275331735611, + "learning_rate": 6.74135069747363e-06, + "loss": 0.0019, + "step": 139730 + }, + { + "epoch": 0.8962489537669202, + "grad_norm": 0.11965475976467133, + "learning_rate": 6.740826023860368e-06, + "loss": 0.006, + "step": 139740 + }, + { + "epoch": 0.8963130906607062, + "grad_norm": 0.13536153733730316, + "learning_rate": 6.740301328433309e-06, + "loss": 0.0022, + "step": 139750 + }, + { + "epoch": 0.8963772275544923, + "grad_norm": 0.06510933488607407, + "learning_rate": 6.739776611199033e-06, + "loss": 0.0022, + "step": 139760 + }, + { + "epoch": 0.8964413644482784, + "grad_norm": 0.08963591605424881, + "learning_rate": 6.739251872164112e-06, + "loss": 0.002, + "step": 139770 + }, + { + "epoch": 0.8965055013420645, + "grad_norm": 0.1946440488100052, + "learning_rate": 6.738727111335122e-06, + "loss": 0.0032, + "step": 139780 + }, + { + "epoch": 0.8965696382358506, + "grad_norm": 0.06276450306177139, + "learning_rate": 6.738202328718639e-06, + "loss": 0.0018, + "step": 139790 + }, + { + "epoch": 0.8966337751296367, + "grad_norm": 0.15514005720615387, + "learning_rate": 6.737677524321238e-06, + "loss": 0.002, + "step": 139800 + }, + { + "epoch": 0.8966979120234227, + "grad_norm": 0.16424056887626648, + "learning_rate": 6.737152698149496e-06, + "loss": 0.0056, + "step": 139810 + }, + { + "epoch": 0.8967620489172089, + "grad_norm": 0.0827999860048294, + "learning_rate": 6.736627850209988e-06, + "loss": 0.0015, + "step": 139820 + }, + { + "epoch": 0.8968261858109949, + "grad_norm": 0.03634626790881157, + "learning_rate": 6.736102980509294e-06, + "loss": 0.0013, + "step": 139830 + }, + { + "epoch": 0.8968903227047811, + "grad_norm": 0.06929469108581543, + "learning_rate": 6.7355780890539865e-06, + "loss": 0.0024, + "step": 139840 + }, + { + "epoch": 0.8969544595985672, + "grad_norm": 0.1740657240152359, + "learning_rate": 6.7350531758506474e-06, + "loss": 0.0017, + "step": 139850 + }, + { + "epoch": 0.8970185964923533, + "grad_norm": 0.10940054059028625, + "learning_rate": 6.73452824090585e-06, + "loss": 0.0026, + "step": 139860 + }, + { + "epoch": 0.8970827333861394, + "grad_norm": 0.13584397733211517, + "learning_rate": 6.734003284226175e-06, + "loss": 0.0013, + "step": 139870 + }, + { + "epoch": 0.8971468702799255, + "grad_norm": 0.1612374633550644, + "learning_rate": 6.7334783058181995e-06, + "loss": 0.0056, + "step": 139880 + }, + { + "epoch": 0.8972110071737116, + "grad_norm": 0.07454238831996918, + "learning_rate": 6.732953305688502e-06, + "loss": 0.0022, + "step": 139890 + }, + { + "epoch": 0.8972751440674976, + "grad_norm": 0.056154754012823105, + "learning_rate": 6.732428283843661e-06, + "loss": 0.0016, + "step": 139900 + }, + { + "epoch": 0.8973392809612838, + "grad_norm": 0.09653551876544952, + "learning_rate": 6.731903240290256e-06, + "loss": 0.0011, + "step": 139910 + }, + { + "epoch": 0.8974034178550698, + "grad_norm": 0.12849965691566467, + "learning_rate": 6.731378175034866e-06, + "loss": 0.0018, + "step": 139920 + }, + { + "epoch": 0.897467554748856, + "grad_norm": 0.0481133833527565, + "learning_rate": 6.730853088084068e-06, + "loss": 0.002, + "step": 139930 + }, + { + "epoch": 0.897531691642642, + "grad_norm": 0.18197129666805267, + "learning_rate": 6.730327979444446e-06, + "loss": 0.0025, + "step": 139940 + }, + { + "epoch": 0.8975958285364282, + "grad_norm": 0.12247568368911743, + "learning_rate": 6.729802849122577e-06, + "loss": 0.0025, + "step": 139950 + }, + { + "epoch": 0.8976599654302142, + "grad_norm": 0.051559291779994965, + "learning_rate": 6.7292776971250415e-06, + "loss": 0.0019, + "step": 139960 + }, + { + "epoch": 0.8977241023240004, + "grad_norm": 0.10260986536741257, + "learning_rate": 6.728752523458421e-06, + "loss": 0.0019, + "step": 139970 + }, + { + "epoch": 0.8977882392177864, + "grad_norm": 0.16498993337154388, + "learning_rate": 6.728227328129296e-06, + "loss": 0.0042, + "step": 139980 + }, + { + "epoch": 0.8978523761115725, + "grad_norm": 0.1725826859474182, + "learning_rate": 6.727702111144245e-06, + "loss": 0.0038, + "step": 139990 + }, + { + "epoch": 0.8979165130053587, + "grad_norm": 0.028135206550359726, + "learning_rate": 6.727176872509855e-06, + "loss": 0.0015, + "step": 140000 + }, + { + "epoch": 0.8979806498991447, + "grad_norm": 0.058798953890800476, + "learning_rate": 6.726651612232703e-06, + "loss": 0.0034, + "step": 140010 + }, + { + "epoch": 0.8980447867929309, + "grad_norm": 0.08650743216276169, + "learning_rate": 6.726126330319373e-06, + "loss": 0.0026, + "step": 140020 + }, + { + "epoch": 0.8981089236867169, + "grad_norm": 0.030648980289697647, + "learning_rate": 6.725601026776446e-06, + "loss": 0.0019, + "step": 140030 + }, + { + "epoch": 0.8981730605805031, + "grad_norm": 0.08183928579092026, + "learning_rate": 6.725075701610505e-06, + "loss": 0.0024, + "step": 140040 + }, + { + "epoch": 0.8982371974742891, + "grad_norm": 0.06568136811256409, + "learning_rate": 6.724550354828132e-06, + "loss": 0.0021, + "step": 140050 + }, + { + "epoch": 0.8983013343680752, + "grad_norm": 0.2455543428659439, + "learning_rate": 6.724024986435912e-06, + "loss": 0.003, + "step": 140060 + }, + { + "epoch": 0.8983654712618613, + "grad_norm": 0.12305790930986404, + "learning_rate": 6.723499596440426e-06, + "loss": 0.0011, + "step": 140070 + }, + { + "epoch": 0.8984296081556474, + "grad_norm": 0.1043807864189148, + "learning_rate": 6.722974184848256e-06, + "loss": 0.0018, + "step": 140080 + }, + { + "epoch": 0.8984937450494335, + "grad_norm": 0.029284963384270668, + "learning_rate": 6.722448751665992e-06, + "loss": 0.0018, + "step": 140090 + }, + { + "epoch": 0.8985578819432196, + "grad_norm": 0.2810690104961395, + "learning_rate": 6.721923296900211e-06, + "loss": 0.0018, + "step": 140100 + }, + { + "epoch": 0.8986220188370057, + "grad_norm": 0.1602506786584854, + "learning_rate": 6.7213978205575025e-06, + "loss": 0.0018, + "step": 140110 + }, + { + "epoch": 0.8986861557307918, + "grad_norm": 0.04824730008840561, + "learning_rate": 6.7208723226444486e-06, + "loss": 0.0011, + "step": 140120 + }, + { + "epoch": 0.898750292624578, + "grad_norm": 0.20356017351150513, + "learning_rate": 6.720346803167634e-06, + "loss": 0.0029, + "step": 140130 + }, + { + "epoch": 0.898814429518364, + "grad_norm": 0.2512374520301819, + "learning_rate": 6.719821262133645e-06, + "loss": 0.0033, + "step": 140140 + }, + { + "epoch": 0.8988785664121501, + "grad_norm": 0.07396150380373001, + "learning_rate": 6.719295699549066e-06, + "loss": 0.0075, + "step": 140150 + }, + { + "epoch": 0.8989427033059362, + "grad_norm": 0.045413654297590256, + "learning_rate": 6.718770115420483e-06, + "loss": 0.0028, + "step": 140160 + }, + { + "epoch": 0.8990068401997223, + "grad_norm": 0.07554330676794052, + "learning_rate": 6.7182445097544835e-06, + "loss": 0.0025, + "step": 140170 + }, + { + "epoch": 0.8990709770935084, + "grad_norm": 0.10082416236400604, + "learning_rate": 6.7177188825576515e-06, + "loss": 0.0019, + "step": 140180 + }, + { + "epoch": 0.8991351139872945, + "grad_norm": 0.0690072625875473, + "learning_rate": 6.717193233836574e-06, + "loss": 0.0047, + "step": 140190 + }, + { + "epoch": 0.8991992508810805, + "grad_norm": 0.056680645793676376, + "learning_rate": 6.716667563597837e-06, + "loss": 0.0019, + "step": 140200 + }, + { + "epoch": 0.8992633877748667, + "grad_norm": 0.06069463863968849, + "learning_rate": 6.71614187184803e-06, + "loss": 0.0019, + "step": 140210 + }, + { + "epoch": 0.8993275246686527, + "grad_norm": 0.006808622274547815, + "learning_rate": 6.715616158593739e-06, + "loss": 0.0025, + "step": 140220 + }, + { + "epoch": 0.8993916615624389, + "grad_norm": 0.06863482296466827, + "learning_rate": 6.715090423841549e-06, + "loss": 0.0017, + "step": 140230 + }, + { + "epoch": 0.8994557984562249, + "grad_norm": 0.3106861412525177, + "learning_rate": 6.714564667598053e-06, + "loss": 0.0037, + "step": 140240 + }, + { + "epoch": 0.8995199353500111, + "grad_norm": 0.35957279801368713, + "learning_rate": 6.714038889869835e-06, + "loss": 0.0019, + "step": 140250 + }, + { + "epoch": 0.8995840722437971, + "grad_norm": 0.07534538209438324, + "learning_rate": 6.713513090663486e-06, + "loss": 0.0019, + "step": 140260 + }, + { + "epoch": 0.8996482091375833, + "grad_norm": 0.0364227332174778, + "learning_rate": 6.712987269985592e-06, + "loss": 0.0017, + "step": 140270 + }, + { + "epoch": 0.8997123460313694, + "grad_norm": 0.06626851856708527, + "learning_rate": 6.712461427842743e-06, + "loss": 0.0028, + "step": 140280 + }, + { + "epoch": 0.8997764829251554, + "grad_norm": 0.3194088637828827, + "learning_rate": 6.71193556424153e-06, + "loss": 0.0042, + "step": 140290 + }, + { + "epoch": 0.8998406198189416, + "grad_norm": 0.14776144921779633, + "learning_rate": 6.71140967918854e-06, + "loss": 0.0013, + "step": 140300 + }, + { + "epoch": 0.8999047567127276, + "grad_norm": 0.1562497466802597, + "learning_rate": 6.710883772690362e-06, + "loss": 0.0027, + "step": 140310 + }, + { + "epoch": 0.8999688936065138, + "grad_norm": 0.16863062977790833, + "learning_rate": 6.71035784475359e-06, + "loss": 0.0023, + "step": 140320 + }, + { + "epoch": 0.9000330305002998, + "grad_norm": 0.020131120458245277, + "learning_rate": 6.70983189538481e-06, + "loss": 0.0017, + "step": 140330 + }, + { + "epoch": 0.900097167394086, + "grad_norm": 0.236953467130661, + "learning_rate": 6.709305924590617e-06, + "loss": 0.0023, + "step": 140340 + }, + { + "epoch": 0.900161304287872, + "grad_norm": 0.14059460163116455, + "learning_rate": 6.708779932377596e-06, + "loss": 0.003, + "step": 140350 + }, + { + "epoch": 0.9002254411816581, + "grad_norm": 0.1643529087305069, + "learning_rate": 6.708253918752343e-06, + "loss": 0.0024, + "step": 140360 + }, + { + "epoch": 0.9002895780754442, + "grad_norm": 0.13108673691749573, + "learning_rate": 6.707727883721447e-06, + "loss": 0.0024, + "step": 140370 + }, + { + "epoch": 0.9003537149692303, + "grad_norm": 0.07430348545312881, + "learning_rate": 6.7072018272915e-06, + "loss": 0.0021, + "step": 140380 + }, + { + "epoch": 0.9004178518630164, + "grad_norm": 0.12285272777080536, + "learning_rate": 6.706675749469093e-06, + "loss": 0.0031, + "step": 140390 + }, + { + "epoch": 0.9004819887568025, + "grad_norm": 0.08578559756278992, + "learning_rate": 6.706149650260821e-06, + "loss": 0.005, + "step": 140400 + }, + { + "epoch": 0.9005461256505887, + "grad_norm": 0.011579250916838646, + "learning_rate": 6.705623529673274e-06, + "loss": 0.0012, + "step": 140410 + }, + { + "epoch": 0.9006102625443747, + "grad_norm": 0.09027321636676788, + "learning_rate": 6.705097387713046e-06, + "loss": 0.0016, + "step": 140420 + }, + { + "epoch": 0.9006743994381609, + "grad_norm": 0.045796554535627365, + "learning_rate": 6.704571224386728e-06, + "loss": 0.0016, + "step": 140430 + }, + { + "epoch": 0.9007385363319469, + "grad_norm": 0.07548578828573227, + "learning_rate": 6.704045039700914e-06, + "loss": 0.0016, + "step": 140440 + }, + { + "epoch": 0.900802673225733, + "grad_norm": 0.23142126202583313, + "learning_rate": 6.703518833662198e-06, + "loss": 0.0024, + "step": 140450 + }, + { + "epoch": 0.9008668101195191, + "grad_norm": 0.11072611808776855, + "learning_rate": 6.702992606277174e-06, + "loss": 0.0022, + "step": 140460 + }, + { + "epoch": 0.9009309470133052, + "grad_norm": 0.2234678566455841, + "learning_rate": 6.702466357552435e-06, + "loss": 0.0024, + "step": 140470 + }, + { + "epoch": 0.9009950839070913, + "grad_norm": 0.05514800548553467, + "learning_rate": 6.701940087494576e-06, + "loss": 0.0016, + "step": 140480 + }, + { + "epoch": 0.9010592208008774, + "grad_norm": 0.1039884090423584, + "learning_rate": 6.701413796110192e-06, + "loss": 0.0021, + "step": 140490 + }, + { + "epoch": 0.9011233576946635, + "grad_norm": 0.013716175220906734, + "learning_rate": 6.700887483405877e-06, + "loss": 0.0017, + "step": 140500 + }, + { + "epoch": 0.9011874945884496, + "grad_norm": 0.16248738765716553, + "learning_rate": 6.7003611493882255e-06, + "loss": 0.0032, + "step": 140510 + }, + { + "epoch": 0.9012516314822356, + "grad_norm": 0.207486093044281, + "learning_rate": 6.6998347940638345e-06, + "loss": 0.0019, + "step": 140520 + }, + { + "epoch": 0.9013157683760218, + "grad_norm": 0.0907140001654625, + "learning_rate": 6.699308417439298e-06, + "loss": 0.0013, + "step": 140530 + }, + { + "epoch": 0.9013799052698078, + "grad_norm": 0.028752660378813744, + "learning_rate": 6.698782019521213e-06, + "loss": 0.0012, + "step": 140540 + }, + { + "epoch": 0.901444042163594, + "grad_norm": 0.05771041288971901, + "learning_rate": 6.698255600316175e-06, + "loss": 0.0018, + "step": 140550 + }, + { + "epoch": 0.9015081790573801, + "grad_norm": 0.04292020574212074, + "learning_rate": 6.69772915983078e-06, + "loss": 0.004, + "step": 140560 + }, + { + "epoch": 0.9015723159511662, + "grad_norm": 0.16082461178302765, + "learning_rate": 6.697202698071626e-06, + "loss": 0.0022, + "step": 140570 + }, + { + "epoch": 0.9016364528449523, + "grad_norm": 0.0972842127084732, + "learning_rate": 6.696676215045309e-06, + "loss": 0.0027, + "step": 140580 + }, + { + "epoch": 0.9017005897387383, + "grad_norm": 0.00951351784169674, + "learning_rate": 6.696149710758426e-06, + "loss": 0.0029, + "step": 140590 + }, + { + "epoch": 0.9017647266325245, + "grad_norm": 0.09503789246082306, + "learning_rate": 6.695623185217576e-06, + "loss": 0.0042, + "step": 140600 + }, + { + "epoch": 0.9018288635263105, + "grad_norm": 0.13512814044952393, + "learning_rate": 6.695096638429355e-06, + "loss": 0.0029, + "step": 140610 + }, + { + "epoch": 0.9018930004200967, + "grad_norm": 0.24733853340148926, + "learning_rate": 6.6945700704003614e-06, + "loss": 0.0026, + "step": 140620 + }, + { + "epoch": 0.9019571373138827, + "grad_norm": 0.09181825071573257, + "learning_rate": 6.694043481137193e-06, + "loss": 0.0016, + "step": 140630 + }, + { + "epoch": 0.9020212742076689, + "grad_norm": 0.19485434889793396, + "learning_rate": 6.69351687064645e-06, + "loss": 0.004, + "step": 140640 + }, + { + "epoch": 0.9020854111014549, + "grad_norm": 0.029666827991604805, + "learning_rate": 6.692990238934731e-06, + "loss": 0.0016, + "step": 140650 + }, + { + "epoch": 0.902149547995241, + "grad_norm": 0.21938619017601013, + "learning_rate": 6.692463586008634e-06, + "loss": 0.003, + "step": 140660 + }, + { + "epoch": 0.9022136848890271, + "grad_norm": 0.21265892684459686, + "learning_rate": 6.691936911874758e-06, + "loss": 0.0036, + "step": 140670 + }, + { + "epoch": 0.9022778217828132, + "grad_norm": 0.14057812094688416, + "learning_rate": 6.691410216539705e-06, + "loss": 0.0027, + "step": 140680 + }, + { + "epoch": 0.9023419586765994, + "grad_norm": 0.19511573016643524, + "learning_rate": 6.69088350001007e-06, + "loss": 0.0015, + "step": 140690 + }, + { + "epoch": 0.9024060955703854, + "grad_norm": 0.031265996396541595, + "learning_rate": 6.690356762292459e-06, + "loss": 0.002, + "step": 140700 + }, + { + "epoch": 0.9024702324641716, + "grad_norm": 0.04314252734184265, + "learning_rate": 6.689830003393468e-06, + "loss": 0.0018, + "step": 140710 + }, + { + "epoch": 0.9025343693579576, + "grad_norm": 0.5250045657157898, + "learning_rate": 6.6893032233197e-06, + "loss": 0.0039, + "step": 140720 + }, + { + "epoch": 0.9025985062517438, + "grad_norm": 0.10032887011766434, + "learning_rate": 6.688776422077756e-06, + "loss": 0.0027, + "step": 140730 + }, + { + "epoch": 0.9026626431455298, + "grad_norm": 0.15504832565784454, + "learning_rate": 6.688249599674235e-06, + "loss": 0.0028, + "step": 140740 + }, + { + "epoch": 0.902726780039316, + "grad_norm": 0.1699640303850174, + "learning_rate": 6.687722756115742e-06, + "loss": 0.0018, + "step": 140750 + }, + { + "epoch": 0.902790916933102, + "grad_norm": 0.15671475231647491, + "learning_rate": 6.687195891408874e-06, + "loss": 0.0017, + "step": 140760 + }, + { + "epoch": 0.9028550538268881, + "grad_norm": 0.35086822509765625, + "learning_rate": 6.686669005560237e-06, + "loss": 0.0073, + "step": 140770 + }, + { + "epoch": 0.9029191907206742, + "grad_norm": 0.1215127483010292, + "learning_rate": 6.686142098576432e-06, + "loss": 0.0024, + "step": 140780 + }, + { + "epoch": 0.9029833276144603, + "grad_norm": 0.08526911586523056, + "learning_rate": 6.685615170464061e-06, + "loss": 0.0026, + "step": 140790 + }, + { + "epoch": 0.9030474645082464, + "grad_norm": 0.07515113055706024, + "learning_rate": 6.685088221229727e-06, + "loss": 0.0031, + "step": 140800 + }, + { + "epoch": 0.9031116014020325, + "grad_norm": 0.1216416209936142, + "learning_rate": 6.684561250880035e-06, + "loss": 0.0029, + "step": 140810 + }, + { + "epoch": 0.9031757382958185, + "grad_norm": 0.08766988664865494, + "learning_rate": 6.684034259421586e-06, + "loss": 0.001, + "step": 140820 + }, + { + "epoch": 0.9032398751896047, + "grad_norm": 0.10048586875200272, + "learning_rate": 6.683507246860984e-06, + "loss": 0.0026, + "step": 140830 + }, + { + "epoch": 0.9033040120833908, + "grad_norm": 0.10893585532903671, + "learning_rate": 6.682980213204832e-06, + "loss": 0.0019, + "step": 140840 + }, + { + "epoch": 0.9033681489771769, + "grad_norm": 0.06966837495565414, + "learning_rate": 6.682453158459736e-06, + "loss": 0.0021, + "step": 140850 + }, + { + "epoch": 0.903432285870963, + "grad_norm": 0.050560686737298965, + "learning_rate": 6.6819260826323e-06, + "loss": 0.0024, + "step": 140860 + }, + { + "epoch": 0.9034964227647491, + "grad_norm": 0.08276427537202835, + "learning_rate": 6.681398985729127e-06, + "loss": 0.0018, + "step": 140870 + }, + { + "epoch": 0.9035605596585352, + "grad_norm": 0.16475242376327515, + "learning_rate": 6.680871867756824e-06, + "loss": 0.0011, + "step": 140880 + }, + { + "epoch": 0.9036246965523212, + "grad_norm": 0.040519170463085175, + "learning_rate": 6.680344728721995e-06, + "loss": 0.0068, + "step": 140890 + }, + { + "epoch": 0.9036888334461074, + "grad_norm": 0.08458121120929718, + "learning_rate": 6.679817568631245e-06, + "loss": 0.0016, + "step": 140900 + }, + { + "epoch": 0.9037529703398934, + "grad_norm": 0.0888509452342987, + "learning_rate": 6.6792903874911805e-06, + "loss": 0.0015, + "step": 140910 + }, + { + "epoch": 0.9038171072336796, + "grad_norm": 0.1488405019044876, + "learning_rate": 6.678763185308408e-06, + "loss": 0.0027, + "step": 140920 + }, + { + "epoch": 0.9038812441274656, + "grad_norm": 0.04099796339869499, + "learning_rate": 6.678235962089531e-06, + "loss": 0.0017, + "step": 140930 + }, + { + "epoch": 0.9039453810212518, + "grad_norm": 0.09509284049272537, + "learning_rate": 6.67770871784116e-06, + "loss": 0.002, + "step": 140940 + }, + { + "epoch": 0.9040095179150378, + "grad_norm": 0.061781175434589386, + "learning_rate": 6.6771814525698984e-06, + "loss": 0.0044, + "step": 140950 + }, + { + "epoch": 0.904073654808824, + "grad_norm": 0.0422157421708107, + "learning_rate": 6.676654166282356e-06, + "loss": 0.0034, + "step": 140960 + }, + { + "epoch": 0.9041377917026101, + "grad_norm": 0.10214755684137344, + "learning_rate": 6.6761268589851384e-06, + "loss": 0.0017, + "step": 140970 + }, + { + "epoch": 0.9042019285963961, + "grad_norm": 0.15138445794582367, + "learning_rate": 6.675599530684853e-06, + "loss": 0.0019, + "step": 140980 + }, + { + "epoch": 0.9042660654901823, + "grad_norm": 0.06442516297101974, + "learning_rate": 6.675072181388107e-06, + "loss": 0.0025, + "step": 140990 + }, + { + "epoch": 0.9043302023839683, + "grad_norm": 0.19241341948509216, + "learning_rate": 6.674544811101511e-06, + "loss": 0.0014, + "step": 141000 + }, + { + "epoch": 0.9043943392777545, + "grad_norm": 0.09069796651601791, + "learning_rate": 6.67401741983167e-06, + "loss": 0.0019, + "step": 141010 + }, + { + "epoch": 0.9044584761715405, + "grad_norm": 0.26262545585632324, + "learning_rate": 6.673490007585196e-06, + "loss": 0.0026, + "step": 141020 + }, + { + "epoch": 0.9045226130653267, + "grad_norm": 0.32308170199394226, + "learning_rate": 6.672962574368695e-06, + "loss": 0.0018, + "step": 141030 + }, + { + "epoch": 0.9045867499591127, + "grad_norm": 0.09142335504293442, + "learning_rate": 6.672435120188778e-06, + "loss": 0.0023, + "step": 141040 + }, + { + "epoch": 0.9046508868528989, + "grad_norm": 0.1618300825357437, + "learning_rate": 6.671907645052054e-06, + "loss": 0.001, + "step": 141050 + }, + { + "epoch": 0.9047150237466849, + "grad_norm": 0.07678396999835968, + "learning_rate": 6.671380148965132e-06, + "loss": 0.0027, + "step": 141060 + }, + { + "epoch": 0.904779160640471, + "grad_norm": 0.04675278440117836, + "learning_rate": 6.670852631934621e-06, + "loss": 0.0014, + "step": 141070 + }, + { + "epoch": 0.9048432975342571, + "grad_norm": 0.15445196628570557, + "learning_rate": 6.670325093967133e-06, + "loss": 0.0022, + "step": 141080 + }, + { + "epoch": 0.9049074344280432, + "grad_norm": 0.009850629605352879, + "learning_rate": 6.669797535069278e-06, + "loss": 0.0011, + "step": 141090 + }, + { + "epoch": 0.9049715713218293, + "grad_norm": 0.1014852374792099, + "learning_rate": 6.669269955247666e-06, + "loss": 0.0015, + "step": 141100 + }, + { + "epoch": 0.9050357082156154, + "grad_norm": 0.15192469954490662, + "learning_rate": 6.66874235450891e-06, + "loss": 0.0011, + "step": 141110 + }, + { + "epoch": 0.9050998451094016, + "grad_norm": 0.08786418288946152, + "learning_rate": 6.668214732859618e-06, + "loss": 0.0023, + "step": 141120 + }, + { + "epoch": 0.9051639820031876, + "grad_norm": 0.02861233986914158, + "learning_rate": 6.667687090306405e-06, + "loss": 0.0025, + "step": 141130 + }, + { + "epoch": 0.9052281188969737, + "grad_norm": 0.08352043479681015, + "learning_rate": 6.667159426855878e-06, + "loss": 0.0026, + "step": 141140 + }, + { + "epoch": 0.9052922557907598, + "grad_norm": 0.0435749851167202, + "learning_rate": 6.666631742514655e-06, + "loss": 0.0021, + "step": 141150 + }, + { + "epoch": 0.9053563926845459, + "grad_norm": 0.06818929314613342, + "learning_rate": 6.666104037289343e-06, + "loss": 0.0023, + "step": 141160 + }, + { + "epoch": 0.905420529578332, + "grad_norm": 0.05088305473327637, + "learning_rate": 6.665576311186557e-06, + "loss": 0.0027, + "step": 141170 + }, + { + "epoch": 0.9054846664721181, + "grad_norm": 0.09777114540338516, + "learning_rate": 6.66504856421291e-06, + "loss": 0.004, + "step": 141180 + }, + { + "epoch": 0.9055488033659042, + "grad_norm": 0.1329837143421173, + "learning_rate": 6.6645207963750145e-06, + "loss": 0.002, + "step": 141190 + }, + { + "epoch": 0.9056129402596903, + "grad_norm": 0.08360269665718079, + "learning_rate": 6.663993007679484e-06, + "loss": 0.0022, + "step": 141200 + }, + { + "epoch": 0.9056770771534763, + "grad_norm": 0.15117284655570984, + "learning_rate": 6.663465198132932e-06, + "loss": 0.0011, + "step": 141210 + }, + { + "epoch": 0.9057412140472625, + "grad_norm": 0.04185052961111069, + "learning_rate": 6.66293736774197e-06, + "loss": 0.0022, + "step": 141220 + }, + { + "epoch": 0.9058053509410485, + "grad_norm": 0.27626991271972656, + "learning_rate": 6.662409516513217e-06, + "loss": 0.0027, + "step": 141230 + }, + { + "epoch": 0.9058694878348347, + "grad_norm": 0.05853215232491493, + "learning_rate": 6.661881644453284e-06, + "loss": 0.0032, + "step": 141240 + }, + { + "epoch": 0.9059336247286208, + "grad_norm": 0.005517345387488604, + "learning_rate": 6.661353751568787e-06, + "loss": 0.0036, + "step": 141250 + }, + { + "epoch": 0.9059977616224069, + "grad_norm": 0.07829311490058899, + "learning_rate": 6.660825837866338e-06, + "loss": 0.002, + "step": 141260 + }, + { + "epoch": 0.906061898516193, + "grad_norm": 0.13096913695335388, + "learning_rate": 6.660297903352556e-06, + "loss": 0.003, + "step": 141270 + }, + { + "epoch": 0.906126035409979, + "grad_norm": 0.17294195294380188, + "learning_rate": 6.659769948034054e-06, + "loss": 0.0027, + "step": 141280 + }, + { + "epoch": 0.9061901723037652, + "grad_norm": 0.0880667194724083, + "learning_rate": 6.659241971917447e-06, + "loss": 0.0024, + "step": 141290 + }, + { + "epoch": 0.9062543091975512, + "grad_norm": 0.13531440496444702, + "learning_rate": 6.6587139750093545e-06, + "loss": 0.0026, + "step": 141300 + }, + { + "epoch": 0.9063184460913374, + "grad_norm": 0.10861565917730331, + "learning_rate": 6.6581859573163895e-06, + "loss": 0.0021, + "step": 141310 + }, + { + "epoch": 0.9063825829851234, + "grad_norm": 0.14992816746234894, + "learning_rate": 6.657657918845169e-06, + "loss": 0.0038, + "step": 141320 + }, + { + "epoch": 0.9064467198789096, + "grad_norm": 0.19455134868621826, + "learning_rate": 6.657129859602312e-06, + "loss": 0.0018, + "step": 141330 + }, + { + "epoch": 0.9065108567726956, + "grad_norm": 0.1379806250333786, + "learning_rate": 6.656601779594431e-06, + "loss": 0.0017, + "step": 141340 + }, + { + "epoch": 0.9065749936664818, + "grad_norm": 0.08714434504508972, + "learning_rate": 6.656073678828147e-06, + "loss": 0.0017, + "step": 141350 + }, + { + "epoch": 0.9066391305602678, + "grad_norm": 0.05364006757736206, + "learning_rate": 6.655545557310077e-06, + "loss": 0.0078, + "step": 141360 + }, + { + "epoch": 0.9067032674540539, + "grad_norm": 0.019650449976325035, + "learning_rate": 6.655017415046836e-06, + "loss": 0.0037, + "step": 141370 + }, + { + "epoch": 0.90676740434784, + "grad_norm": 0.08904875814914703, + "learning_rate": 6.654489252045045e-06, + "loss": 0.002, + "step": 141380 + }, + { + "epoch": 0.9068315412416261, + "grad_norm": 0.1390581876039505, + "learning_rate": 6.653961068311321e-06, + "loss": 0.0024, + "step": 141390 + }, + { + "epoch": 0.9068956781354123, + "grad_norm": 0.06195981428027153, + "learning_rate": 6.653432863852284e-06, + "loss": 0.0018, + "step": 141400 + }, + { + "epoch": 0.9069598150291983, + "grad_norm": 0.019686702638864517, + "learning_rate": 6.65290463867455e-06, + "loss": 0.0013, + "step": 141410 + }, + { + "epoch": 0.9070239519229845, + "grad_norm": 0.09837204217910767, + "learning_rate": 6.65237639278474e-06, + "loss": 0.0017, + "step": 141420 + }, + { + "epoch": 0.9070880888167705, + "grad_norm": 0.08003837615251541, + "learning_rate": 6.651848126189473e-06, + "loss": 0.0017, + "step": 141430 + }, + { + "epoch": 0.9071522257105566, + "grad_norm": 0.03738339990377426, + "learning_rate": 6.651319838895367e-06, + "loss": 0.0009, + "step": 141440 + }, + { + "epoch": 0.9072163626043427, + "grad_norm": 0.07136432081460953, + "learning_rate": 6.650791530909045e-06, + "loss": 0.0016, + "step": 141450 + }, + { + "epoch": 0.9072804994981288, + "grad_norm": 0.05406304448843002, + "learning_rate": 6.650263202237125e-06, + "loss": 0.0017, + "step": 141460 + }, + { + "epoch": 0.9073446363919149, + "grad_norm": 0.30447080731391907, + "learning_rate": 6.649734852886228e-06, + "loss": 0.004, + "step": 141470 + }, + { + "epoch": 0.907408773285701, + "grad_norm": 0.07497084140777588, + "learning_rate": 6.6492064828629735e-06, + "loss": 0.0027, + "step": 141480 + }, + { + "epoch": 0.9074729101794871, + "grad_norm": 0.07167311012744904, + "learning_rate": 6.648678092173983e-06, + "loss": 0.0024, + "step": 141490 + }, + { + "epoch": 0.9075370470732732, + "grad_norm": 0.13172192871570587, + "learning_rate": 6.648149680825878e-06, + "loss": 0.0027, + "step": 141500 + }, + { + "epoch": 0.9076011839670592, + "grad_norm": 0.27396607398986816, + "learning_rate": 6.64762124882528e-06, + "loss": 0.0031, + "step": 141510 + }, + { + "epoch": 0.9076653208608454, + "grad_norm": 0.16255521774291992, + "learning_rate": 6.64709279617881e-06, + "loss": 0.0024, + "step": 141520 + }, + { + "epoch": 0.9077294577546314, + "grad_norm": 0.04003090411424637, + "learning_rate": 6.6465643228930895e-06, + "loss": 0.0018, + "step": 141530 + }, + { + "epoch": 0.9077935946484176, + "grad_norm": 0.11404585838317871, + "learning_rate": 6.646035828974742e-06, + "loss": 0.0025, + "step": 141540 + }, + { + "epoch": 0.9078577315422037, + "grad_norm": 0.5322808623313904, + "learning_rate": 6.645507314430389e-06, + "loss": 0.0052, + "step": 141550 + }, + { + "epoch": 0.9079218684359898, + "grad_norm": 0.15718133747577667, + "learning_rate": 6.644978779266652e-06, + "loss": 0.0027, + "step": 141560 + }, + { + "epoch": 0.9079860053297759, + "grad_norm": 0.031880151480436325, + "learning_rate": 6.644450223490158e-06, + "loss": 0.0024, + "step": 141570 + }, + { + "epoch": 0.908050142223562, + "grad_norm": 0.0900476947426796, + "learning_rate": 6.643921647107525e-06, + "loss": 0.0031, + "step": 141580 + }, + { + "epoch": 0.9081142791173481, + "grad_norm": 0.05382583290338516, + "learning_rate": 6.643393050125379e-06, + "loss": 0.0014, + "step": 141590 + }, + { + "epoch": 0.9081784160111341, + "grad_norm": 0.08408217877149582, + "learning_rate": 6.6428644325503445e-06, + "loss": 0.005, + "step": 141600 + }, + { + "epoch": 0.9082425529049203, + "grad_norm": 0.0819145068526268, + "learning_rate": 6.6423357943890456e-06, + "loss": 0.0018, + "step": 141610 + }, + { + "epoch": 0.9083066897987063, + "grad_norm": 0.05600690841674805, + "learning_rate": 6.641807135648104e-06, + "loss": 0.0025, + "step": 141620 + }, + { + "epoch": 0.9083708266924925, + "grad_norm": 0.09864833950996399, + "learning_rate": 6.641278456334145e-06, + "loss": 0.0017, + "step": 141630 + }, + { + "epoch": 0.9084349635862785, + "grad_norm": 0.08638419210910797, + "learning_rate": 6.640749756453795e-06, + "loss": 0.0022, + "step": 141640 + }, + { + "epoch": 0.9084991004800647, + "grad_norm": 0.10510845482349396, + "learning_rate": 6.640221036013678e-06, + "loss": 0.0017, + "step": 141650 + }, + { + "epoch": 0.9085632373738507, + "grad_norm": 0.3093767762184143, + "learning_rate": 6.639692295020419e-06, + "loss": 0.0021, + "step": 141660 + }, + { + "epoch": 0.9086273742676368, + "grad_norm": 0.08151751011610031, + "learning_rate": 6.6391635334806426e-06, + "loss": 0.0021, + "step": 141670 + }, + { + "epoch": 0.908691511161423, + "grad_norm": 0.02787480689585209, + "learning_rate": 6.638634751400978e-06, + "loss": 0.0017, + "step": 141680 + }, + { + "epoch": 0.908755648055209, + "grad_norm": 0.17475752532482147, + "learning_rate": 6.638105948788046e-06, + "loss": 0.0025, + "step": 141690 + }, + { + "epoch": 0.9088197849489952, + "grad_norm": 0.08409693837165833, + "learning_rate": 6.637577125648478e-06, + "loss": 0.002, + "step": 141700 + }, + { + "epoch": 0.9088839218427812, + "grad_norm": 0.2646850347518921, + "learning_rate": 6.637048281988896e-06, + "loss": 0.0036, + "step": 141710 + }, + { + "epoch": 0.9089480587365674, + "grad_norm": 0.10334274172782898, + "learning_rate": 6.636519417815932e-06, + "loss": 0.0068, + "step": 141720 + }, + { + "epoch": 0.9090121956303534, + "grad_norm": 0.07118162512779236, + "learning_rate": 6.635990533136207e-06, + "loss": 0.0018, + "step": 141730 + }, + { + "epoch": 0.9090763325241396, + "grad_norm": 0.10584995150566101, + "learning_rate": 6.6354616279563545e-06, + "loss": 0.003, + "step": 141740 + }, + { + "epoch": 0.9091404694179256, + "grad_norm": 0.01536076795309782, + "learning_rate": 6.634932702282997e-06, + "loss": 0.0024, + "step": 141750 + }, + { + "epoch": 0.9092046063117117, + "grad_norm": 0.10196884721517563, + "learning_rate": 6.634403756122765e-06, + "loss": 0.0033, + "step": 141760 + }, + { + "epoch": 0.9092687432054978, + "grad_norm": 0.014073599129915237, + "learning_rate": 6.6338747894822845e-06, + "loss": 0.001, + "step": 141770 + }, + { + "epoch": 0.9093328800992839, + "grad_norm": 0.24571390450000763, + "learning_rate": 6.633345802368185e-06, + "loss": 0.0022, + "step": 141780 + }, + { + "epoch": 0.90939701699307, + "grad_norm": 0.09062595665454865, + "learning_rate": 6.632816794787098e-06, + "loss": 0.0016, + "step": 141790 + }, + { + "epoch": 0.9094611538868561, + "grad_norm": 0.20013825595378876, + "learning_rate": 6.632287766745647e-06, + "loss": 0.0022, + "step": 141800 + }, + { + "epoch": 0.9095252907806421, + "grad_norm": 0.0434277281165123, + "learning_rate": 6.631758718250465e-06, + "loss": 0.0015, + "step": 141810 + }, + { + "epoch": 0.9095894276744283, + "grad_norm": 0.03250228241086006, + "learning_rate": 6.63122964930818e-06, + "loss": 0.0018, + "step": 141820 + }, + { + "epoch": 0.9096535645682144, + "grad_norm": 0.009286737069487572, + "learning_rate": 6.6307005599254215e-06, + "loss": 0.0021, + "step": 141830 + }, + { + "epoch": 0.9097177014620005, + "grad_norm": 0.07002612203359604, + "learning_rate": 6.63017145010882e-06, + "loss": 0.0017, + "step": 141840 + }, + { + "epoch": 0.9097818383557866, + "grad_norm": 0.1045675277709961, + "learning_rate": 6.629642319865004e-06, + "loss": 0.002, + "step": 141850 + }, + { + "epoch": 0.9098459752495727, + "grad_norm": 0.09787771105766296, + "learning_rate": 6.629113169200606e-06, + "loss": 0.0014, + "step": 141860 + }, + { + "epoch": 0.9099101121433588, + "grad_norm": 0.06352692097425461, + "learning_rate": 6.628583998122256e-06, + "loss": 0.0011, + "step": 141870 + }, + { + "epoch": 0.9099742490371449, + "grad_norm": 0.2079721987247467, + "learning_rate": 6.628054806636583e-06, + "loss": 0.0017, + "step": 141880 + }, + { + "epoch": 0.910038385930931, + "grad_norm": 0.11491197347640991, + "learning_rate": 6.627525594750221e-06, + "loss": 0.0022, + "step": 141890 + }, + { + "epoch": 0.910102522824717, + "grad_norm": 0.03413490578532219, + "learning_rate": 6.6269963624698e-06, + "loss": 0.0012, + "step": 141900 + }, + { + "epoch": 0.9101666597185032, + "grad_norm": 0.09412901848554611, + "learning_rate": 6.6264671098019505e-06, + "loss": 0.004, + "step": 141910 + }, + { + "epoch": 0.9102307966122892, + "grad_norm": 0.03136226907372475, + "learning_rate": 6.625937836753307e-06, + "loss": 0.0015, + "step": 141920 + }, + { + "epoch": 0.9102949335060754, + "grad_norm": 0.1041698008775711, + "learning_rate": 6.625408543330501e-06, + "loss": 0.0034, + "step": 141930 + }, + { + "epoch": 0.9103590703998614, + "grad_norm": 0.238687202334404, + "learning_rate": 6.624879229540162e-06, + "loss": 0.0069, + "step": 141940 + }, + { + "epoch": 0.9104232072936476, + "grad_norm": 0.14690439403057098, + "learning_rate": 6.6243498953889276e-06, + "loss": 0.0035, + "step": 141950 + }, + { + "epoch": 0.9104873441874337, + "grad_norm": 0.1875958889722824, + "learning_rate": 6.623820540883425e-06, + "loss": 0.0088, + "step": 141960 + }, + { + "epoch": 0.9105514810812197, + "grad_norm": 0.11991121619939804, + "learning_rate": 6.623291166030294e-06, + "loss": 0.0034, + "step": 141970 + }, + { + "epoch": 0.9106156179750059, + "grad_norm": 0.10912397503852844, + "learning_rate": 6.6227617708361625e-06, + "loss": 0.0024, + "step": 141980 + }, + { + "epoch": 0.9106797548687919, + "grad_norm": 0.009573899209499359, + "learning_rate": 6.6222323553076666e-06, + "loss": 0.0026, + "step": 141990 + }, + { + "epoch": 0.9107438917625781, + "grad_norm": 0.024858275428414345, + "learning_rate": 6.62170291945144e-06, + "loss": 0.0014, + "step": 142000 + }, + { + "epoch": 0.9108080286563641, + "grad_norm": 0.05668169632554054, + "learning_rate": 6.621173463274116e-06, + "loss": 0.0017, + "step": 142010 + }, + { + "epoch": 0.9108721655501503, + "grad_norm": 0.018332339823246002, + "learning_rate": 6.620643986782331e-06, + "loss": 0.0018, + "step": 142020 + }, + { + "epoch": 0.9109363024439363, + "grad_norm": 0.2745995819568634, + "learning_rate": 6.620114489982718e-06, + "loss": 0.0041, + "step": 142030 + }, + { + "epoch": 0.9110004393377225, + "grad_norm": 0.1281847506761551, + "learning_rate": 6.619584972881914e-06, + "loss": 0.0023, + "step": 142040 + }, + { + "epoch": 0.9110645762315085, + "grad_norm": 0.0568624772131443, + "learning_rate": 6.61905543548655e-06, + "loss": 0.001, + "step": 142050 + }, + { + "epoch": 0.9111287131252946, + "grad_norm": 0.03093951940536499, + "learning_rate": 6.618525877803267e-06, + "loss": 0.0021, + "step": 142060 + }, + { + "epoch": 0.9111928500190807, + "grad_norm": 0.18486446142196655, + "learning_rate": 6.617996299838696e-06, + "loss": 0.0016, + "step": 142070 + }, + { + "epoch": 0.9112569869128668, + "grad_norm": 0.11845473200082779, + "learning_rate": 6.6174667015994765e-06, + "loss": 0.0021, + "step": 142080 + }, + { + "epoch": 0.9113211238066529, + "grad_norm": 0.2680804133415222, + "learning_rate": 6.616937083092243e-06, + "loss": 0.0038, + "step": 142090 + }, + { + "epoch": 0.911385260700439, + "grad_norm": 0.1053524985909462, + "learning_rate": 6.616407444323631e-06, + "loss": 0.0013, + "step": 142100 + }, + { + "epoch": 0.9114493975942252, + "grad_norm": 0.18218301236629486, + "learning_rate": 6.61587778530028e-06, + "loss": 0.0019, + "step": 142110 + }, + { + "epoch": 0.9115135344880112, + "grad_norm": 0.2066538780927658, + "learning_rate": 6.615348106028825e-06, + "loss": 0.0026, + "step": 142120 + }, + { + "epoch": 0.9115776713817973, + "grad_norm": 0.05156905576586723, + "learning_rate": 6.614818406515904e-06, + "loss": 0.002, + "step": 142130 + }, + { + "epoch": 0.9116418082755834, + "grad_norm": 0.1713191121816635, + "learning_rate": 6.614288686768153e-06, + "loss": 0.0023, + "step": 142140 + }, + { + "epoch": 0.9117059451693695, + "grad_norm": 0.15974517166614532, + "learning_rate": 6.613758946792212e-06, + "loss": 0.0024, + "step": 142150 + }, + { + "epoch": 0.9117700820631556, + "grad_norm": 0.23358303308486938, + "learning_rate": 6.613229186594717e-06, + "loss": 0.0027, + "step": 142160 + }, + { + "epoch": 0.9118342189569417, + "grad_norm": 0.03668862581253052, + "learning_rate": 6.6126994061823094e-06, + "loss": 0.0014, + "step": 142170 + }, + { + "epoch": 0.9118983558507278, + "grad_norm": 0.0947398766875267, + "learning_rate": 6.612169605561625e-06, + "loss": 0.0041, + "step": 142180 + }, + { + "epoch": 0.9119624927445139, + "grad_norm": 0.10485409945249557, + "learning_rate": 6.611639784739303e-06, + "loss": 0.005, + "step": 142190 + }, + { + "epoch": 0.9120266296383, + "grad_norm": 0.4552534222602844, + "learning_rate": 6.6111099437219815e-06, + "loss": 0.0019, + "step": 142200 + }, + { + "epoch": 0.9120907665320861, + "grad_norm": 0.03338133916258812, + "learning_rate": 6.610580082516302e-06, + "loss": 0.0024, + "step": 142210 + }, + { + "epoch": 0.9121549034258721, + "grad_norm": 0.20968693494796753, + "learning_rate": 6.610050201128904e-06, + "loss": 0.0023, + "step": 142220 + }, + { + "epoch": 0.9122190403196583, + "grad_norm": 0.11558353900909424, + "learning_rate": 6.609520299566425e-06, + "loss": 0.003, + "step": 142230 + }, + { + "epoch": 0.9122831772134444, + "grad_norm": 0.22716671228408813, + "learning_rate": 6.6089903778355056e-06, + "loss": 0.0025, + "step": 142240 + }, + { + "epoch": 0.9123473141072305, + "grad_norm": 0.09540443867444992, + "learning_rate": 6.608460435942788e-06, + "loss": 0.0015, + "step": 142250 + }, + { + "epoch": 0.9124114510010166, + "grad_norm": 0.15700772404670715, + "learning_rate": 6.607930473894912e-06, + "loss": 0.0021, + "step": 142260 + }, + { + "epoch": 0.9124755878948027, + "grad_norm": 0.02807980217039585, + "learning_rate": 6.607400491698518e-06, + "loss": 0.002, + "step": 142270 + }, + { + "epoch": 0.9125397247885888, + "grad_norm": 0.1129683256149292, + "learning_rate": 6.606870489360245e-06, + "loss": 0.003, + "step": 142280 + }, + { + "epoch": 0.9126038616823748, + "grad_norm": 0.2775423228740692, + "learning_rate": 6.606340466886738e-06, + "loss": 0.004, + "step": 142290 + }, + { + "epoch": 0.912667998576161, + "grad_norm": 0.16915129125118256, + "learning_rate": 6.605810424284637e-06, + "loss": 0.0023, + "step": 142300 + }, + { + "epoch": 0.912732135469947, + "grad_norm": 0.06936042010784149, + "learning_rate": 6.605280361560583e-06, + "loss": 0.0017, + "step": 142310 + }, + { + "epoch": 0.9127962723637332, + "grad_norm": 0.3886728286743164, + "learning_rate": 6.604750278721219e-06, + "loss": 0.0033, + "step": 142320 + }, + { + "epoch": 0.9128604092575192, + "grad_norm": 0.18234775960445404, + "learning_rate": 6.604220175773188e-06, + "loss": 0.0015, + "step": 142330 + }, + { + "epoch": 0.9129245461513054, + "grad_norm": 0.03708178177475929, + "learning_rate": 6.6036900527231306e-06, + "loss": 0.0036, + "step": 142340 + }, + { + "epoch": 0.9129886830450914, + "grad_norm": 0.11459732055664062, + "learning_rate": 6.60315990957769e-06, + "loss": 0.0076, + "step": 142350 + }, + { + "epoch": 0.9130528199388775, + "grad_norm": 0.12645256519317627, + "learning_rate": 6.602629746343512e-06, + "loss": 0.0021, + "step": 142360 + }, + { + "epoch": 0.9131169568326636, + "grad_norm": 0.12674662470817566, + "learning_rate": 6.602099563027236e-06, + "loss": 0.0034, + "step": 142370 + }, + { + "epoch": 0.9131810937264497, + "grad_norm": 0.03884998336434364, + "learning_rate": 6.601569359635509e-06, + "loss": 0.0024, + "step": 142380 + }, + { + "epoch": 0.9132452306202359, + "grad_norm": 0.24977393448352814, + "learning_rate": 6.601039136174973e-06, + "loss": 0.004, + "step": 142390 + }, + { + "epoch": 0.9133093675140219, + "grad_norm": 0.0631488561630249, + "learning_rate": 6.600508892652273e-06, + "loss": 0.0034, + "step": 142400 + }, + { + "epoch": 0.9133735044078081, + "grad_norm": 0.206329807639122, + "learning_rate": 6.599978629074051e-06, + "loss": 0.0017, + "step": 142410 + }, + { + "epoch": 0.9134376413015941, + "grad_norm": 0.052349966019392014, + "learning_rate": 6.599448345446955e-06, + "loss": 0.0029, + "step": 142420 + }, + { + "epoch": 0.9135017781953803, + "grad_norm": 2.474010705947876, + "learning_rate": 6.598918041777626e-06, + "loss": 0.016, + "step": 142430 + }, + { + "epoch": 0.9135659150891663, + "grad_norm": 0.05902921408414841, + "learning_rate": 6.598387718072714e-06, + "loss": 0.0024, + "step": 142440 + }, + { + "epoch": 0.9136300519829524, + "grad_norm": 0.09081000089645386, + "learning_rate": 6.597857374338859e-06, + "loss": 0.0018, + "step": 142450 + }, + { + "epoch": 0.9136941888767385, + "grad_norm": 0.017076801508665085, + "learning_rate": 6.59732701058271e-06, + "loss": 0.0016, + "step": 142460 + }, + { + "epoch": 0.9137583257705246, + "grad_norm": 0.09036340564489365, + "learning_rate": 6.596796626810913e-06, + "loss": 0.0021, + "step": 142470 + }, + { + "epoch": 0.9138224626643107, + "grad_norm": 0.01195220835506916, + "learning_rate": 6.596266223030111e-06, + "loss": 0.0023, + "step": 142480 + }, + { + "epoch": 0.9138865995580968, + "grad_norm": 0.1422252207994461, + "learning_rate": 6.595735799246954e-06, + "loss": 0.0027, + "step": 142490 + }, + { + "epoch": 0.9139507364518829, + "grad_norm": 0.07921761274337769, + "learning_rate": 6.5952053554680865e-06, + "loss": 0.0024, + "step": 142500 + }, + { + "epoch": 0.914014873345669, + "grad_norm": 0.07937291264533997, + "learning_rate": 6.594674891700157e-06, + "loss": 0.0016, + "step": 142510 + }, + { + "epoch": 0.9140790102394551, + "grad_norm": 0.027815645560622215, + "learning_rate": 6.594144407949809e-06, + "loss": 0.0012, + "step": 142520 + }, + { + "epoch": 0.9141431471332412, + "grad_norm": 0.19019463658332825, + "learning_rate": 6.593613904223695e-06, + "loss": 0.0024, + "step": 142530 + }, + { + "epoch": 0.9142072840270273, + "grad_norm": 0.2717309594154358, + "learning_rate": 6.593083380528457e-06, + "loss": 0.0012, + "step": 142540 + }, + { + "epoch": 0.9142714209208134, + "grad_norm": 0.07188116759061813, + "learning_rate": 6.592552836870747e-06, + "loss": 0.0034, + "step": 142550 + }, + { + "epoch": 0.9143355578145995, + "grad_norm": 0.06552331894636154, + "learning_rate": 6.592022273257213e-06, + "loss": 0.0014, + "step": 142560 + }, + { + "epoch": 0.9143996947083856, + "grad_norm": 0.055768199265003204, + "learning_rate": 6.5914916896945e-06, + "loss": 0.0048, + "step": 142570 + }, + { + "epoch": 0.9144638316021717, + "grad_norm": 0.15143147110939026, + "learning_rate": 6.590961086189259e-06, + "loss": 0.0023, + "step": 142580 + }, + { + "epoch": 0.9145279684959577, + "grad_norm": 0.16667847335338593, + "learning_rate": 6.590430462748141e-06, + "loss": 0.0023, + "step": 142590 + }, + { + "epoch": 0.9145921053897439, + "grad_norm": 0.11879737675189972, + "learning_rate": 6.58989981937779e-06, + "loss": 0.0014, + "step": 142600 + }, + { + "epoch": 0.9146562422835299, + "grad_norm": 0.2997582256793976, + "learning_rate": 6.5893691560848595e-06, + "loss": 0.004, + "step": 142610 + }, + { + "epoch": 0.9147203791773161, + "grad_norm": 0.017078684642910957, + "learning_rate": 6.588838472875996e-06, + "loss": 0.0019, + "step": 142620 + }, + { + "epoch": 0.9147845160711021, + "grad_norm": 0.1263207495212555, + "learning_rate": 6.588307769757851e-06, + "loss": 0.0021, + "step": 142630 + }, + { + "epoch": 0.9148486529648883, + "grad_norm": 0.1354454904794693, + "learning_rate": 6.587777046737076e-06, + "loss": 0.0018, + "step": 142640 + }, + { + "epoch": 0.9149127898586743, + "grad_norm": 0.12904682755470276, + "learning_rate": 6.5872463038203185e-06, + "loss": 0.0045, + "step": 142650 + }, + { + "epoch": 0.9149769267524605, + "grad_norm": 0.12708112597465515, + "learning_rate": 6.586715541014232e-06, + "loss": 0.0016, + "step": 142660 + }, + { + "epoch": 0.9150410636462466, + "grad_norm": 0.12494788318872452, + "learning_rate": 6.5861847583254645e-06, + "loss": 0.0014, + "step": 142670 + }, + { + "epoch": 0.9151052005400326, + "grad_norm": 0.2556142210960388, + "learning_rate": 6.585653955760668e-06, + "loss": 0.0015, + "step": 142680 + }, + { + "epoch": 0.9151693374338188, + "grad_norm": 0.13654863834381104, + "learning_rate": 6.585123133326495e-06, + "loss": 0.0022, + "step": 142690 + }, + { + "epoch": 0.9152334743276048, + "grad_norm": 0.08078952133655548, + "learning_rate": 6.584592291029596e-06, + "loss": 0.002, + "step": 142700 + }, + { + "epoch": 0.915297611221391, + "grad_norm": 0.09141595661640167, + "learning_rate": 6.584061428876623e-06, + "loss": 0.0013, + "step": 142710 + }, + { + "epoch": 0.915361748115177, + "grad_norm": 0.1005011573433876, + "learning_rate": 6.583530546874228e-06, + "loss": 0.0028, + "step": 142720 + }, + { + "epoch": 0.9154258850089632, + "grad_norm": 0.11311080306768417, + "learning_rate": 6.582999645029065e-06, + "loss": 0.0019, + "step": 142730 + }, + { + "epoch": 0.9154900219027492, + "grad_norm": 0.2210063636302948, + "learning_rate": 6.5824687233477835e-06, + "loss": 0.0022, + "step": 142740 + }, + { + "epoch": 0.9155541587965353, + "grad_norm": 0.08203542232513428, + "learning_rate": 6.581937781837038e-06, + "loss": 0.0011, + "step": 142750 + }, + { + "epoch": 0.9156182956903214, + "grad_norm": 0.08673785626888275, + "learning_rate": 6.581406820503482e-06, + "loss": 0.0017, + "step": 142760 + }, + { + "epoch": 0.9156824325841075, + "grad_norm": 0.2789863646030426, + "learning_rate": 6.5808758393537685e-06, + "loss": 0.0026, + "step": 142770 + }, + { + "epoch": 0.9157465694778936, + "grad_norm": 0.05158243700861931, + "learning_rate": 6.580344838394551e-06, + "loss": 0.0023, + "step": 142780 + }, + { + "epoch": 0.9158107063716797, + "grad_norm": 0.13372482359409332, + "learning_rate": 6.579813817632482e-06, + "loss": 0.0035, + "step": 142790 + }, + { + "epoch": 0.9158748432654659, + "grad_norm": 0.17898721992969513, + "learning_rate": 6.579282777074218e-06, + "loss": 0.0023, + "step": 142800 + }, + { + "epoch": 0.9159389801592519, + "grad_norm": 0.07201813161373138, + "learning_rate": 6.578751716726411e-06, + "loss": 0.0056, + "step": 142810 + }, + { + "epoch": 0.916003117053038, + "grad_norm": 0.1174902692437172, + "learning_rate": 6.578220636595719e-06, + "loss": 0.0032, + "step": 142820 + }, + { + "epoch": 0.9160672539468241, + "grad_norm": 0.18898825347423553, + "learning_rate": 6.577689536688792e-06, + "loss": 0.0016, + "step": 142830 + }, + { + "epoch": 0.9161313908406102, + "grad_norm": 0.17814204096794128, + "learning_rate": 6.5771584170122885e-06, + "loss": 0.0022, + "step": 142840 + }, + { + "epoch": 0.9161955277343963, + "grad_norm": 0.19003327190876007, + "learning_rate": 6.576627277572863e-06, + "loss": 0.0039, + "step": 142850 + }, + { + "epoch": 0.9162596646281824, + "grad_norm": 0.16185325384140015, + "learning_rate": 6.576096118377171e-06, + "loss": 0.0044, + "step": 142860 + }, + { + "epoch": 0.9163238015219685, + "grad_norm": 0.16603900492191315, + "learning_rate": 6.5755649394318675e-06, + "loss": 0.0032, + "step": 142870 + }, + { + "epoch": 0.9163879384157546, + "grad_norm": 0.1000480130314827, + "learning_rate": 6.575033740743609e-06, + "loss": 0.0024, + "step": 142880 + }, + { + "epoch": 0.9164520753095406, + "grad_norm": 0.045742470771074295, + "learning_rate": 6.5745025223190535e-06, + "loss": 0.0041, + "step": 142890 + }, + { + "epoch": 0.9165162122033268, + "grad_norm": 0.1930939108133316, + "learning_rate": 6.573971284164855e-06, + "loss": 0.0022, + "step": 142900 + }, + { + "epoch": 0.9165803490971128, + "grad_norm": 0.27457255125045776, + "learning_rate": 6.573440026287673e-06, + "loss": 0.0027, + "step": 142910 + }, + { + "epoch": 0.916644485990899, + "grad_norm": 0.21804606914520264, + "learning_rate": 6.5729087486941615e-06, + "loss": 0.0028, + "step": 142920 + }, + { + "epoch": 0.916708622884685, + "grad_norm": 0.23076127469539642, + "learning_rate": 6.57237745139098e-06, + "loss": 0.0022, + "step": 142930 + }, + { + "epoch": 0.9167727597784712, + "grad_norm": 0.29028287529945374, + "learning_rate": 6.5718461343847835e-06, + "loss": 0.0034, + "step": 142940 + }, + { + "epoch": 0.9168368966722573, + "grad_norm": 0.15482687950134277, + "learning_rate": 6.571314797682234e-06, + "loss": 0.0019, + "step": 142950 + }, + { + "epoch": 0.9169010335660434, + "grad_norm": 0.13907399773597717, + "learning_rate": 6.570783441289985e-06, + "loss": 0.0023, + "step": 142960 + }, + { + "epoch": 0.9169651704598295, + "grad_norm": 0.089432492852211, + "learning_rate": 6.570252065214699e-06, + "loss": 0.0015, + "step": 142970 + }, + { + "epoch": 0.9170293073536155, + "grad_norm": 0.097746342420578, + "learning_rate": 6.56972066946303e-06, + "loss": 0.0013, + "step": 142980 + }, + { + "epoch": 0.9170934442474017, + "grad_norm": 0.11432447284460068, + "learning_rate": 6.56918925404164e-06, + "loss": 0.0023, + "step": 142990 + }, + { + "epoch": 0.9171575811411877, + "grad_norm": 0.06727922707796097, + "learning_rate": 6.568657818957188e-06, + "loss": 0.0027, + "step": 143000 + }, + { + "epoch": 0.9172217180349739, + "grad_norm": 0.07588520646095276, + "learning_rate": 6.5681263642163316e-06, + "loss": 0.002, + "step": 143010 + }, + { + "epoch": 0.9172858549287599, + "grad_norm": 0.03804763779044151, + "learning_rate": 6.567594889825733e-06, + "loss": 0.002, + "step": 143020 + }, + { + "epoch": 0.9173499918225461, + "grad_norm": 0.01098368689417839, + "learning_rate": 6.5670633957920475e-06, + "loss": 0.0029, + "step": 143030 + }, + { + "epoch": 0.9174141287163321, + "grad_norm": 0.13806407153606415, + "learning_rate": 6.566531882121938e-06, + "loss": 0.0016, + "step": 143040 + }, + { + "epoch": 0.9174782656101182, + "grad_norm": 0.10076095908880234, + "learning_rate": 6.566000348822066e-06, + "loss": 0.0033, + "step": 143050 + }, + { + "epoch": 0.9175424025039043, + "grad_norm": 0.03555780649185181, + "learning_rate": 6.565468795899088e-06, + "loss": 0.0013, + "step": 143060 + }, + { + "epoch": 0.9176065393976904, + "grad_norm": 0.3157411515712738, + "learning_rate": 6.5649372233596685e-06, + "loss": 0.0026, + "step": 143070 + }, + { + "epoch": 0.9176706762914765, + "grad_norm": 0.20569480955600739, + "learning_rate": 6.564405631210469e-06, + "loss": 0.0024, + "step": 143080 + }, + { + "epoch": 0.9177348131852626, + "grad_norm": 0.2229042947292328, + "learning_rate": 6.563874019458146e-06, + "loss": 0.0027, + "step": 143090 + }, + { + "epoch": 0.9177989500790488, + "grad_norm": 0.14811183512210846, + "learning_rate": 6.563342388109366e-06, + "loss": 0.003, + "step": 143100 + }, + { + "epoch": 0.9178630869728348, + "grad_norm": 0.10701605677604675, + "learning_rate": 6.562810737170787e-06, + "loss": 0.0017, + "step": 143110 + }, + { + "epoch": 0.917927223866621, + "grad_norm": 0.1293468326330185, + "learning_rate": 6.562279066649073e-06, + "loss": 0.0019, + "step": 143120 + }, + { + "epoch": 0.917991360760407, + "grad_norm": 0.08432118594646454, + "learning_rate": 6.561747376550887e-06, + "loss": 0.004, + "step": 143130 + }, + { + "epoch": 0.9180554976541931, + "grad_norm": 0.16910623013973236, + "learning_rate": 6.5612156668828885e-06, + "loss": 0.0019, + "step": 143140 + }, + { + "epoch": 0.9181196345479792, + "grad_norm": 0.07571445405483246, + "learning_rate": 6.560683937651743e-06, + "loss": 0.0016, + "step": 143150 + }, + { + "epoch": 0.9181837714417653, + "grad_norm": 0.07605468481779099, + "learning_rate": 6.560152188864112e-06, + "loss": 0.0026, + "step": 143160 + }, + { + "epoch": 0.9182479083355514, + "grad_norm": 0.061501987278461456, + "learning_rate": 6.559620420526659e-06, + "loss": 0.0028, + "step": 143170 + }, + { + "epoch": 0.9183120452293375, + "grad_norm": 0.03861086070537567, + "learning_rate": 6.559088632646048e-06, + "loss": 0.0053, + "step": 143180 + }, + { + "epoch": 0.9183761821231236, + "grad_norm": 0.4427987337112427, + "learning_rate": 6.558556825228941e-06, + "loss": 0.0013, + "step": 143190 + }, + { + "epoch": 0.9184403190169097, + "grad_norm": 0.11889459192752838, + "learning_rate": 6.5580249982820035e-06, + "loss": 0.0051, + "step": 143200 + }, + { + "epoch": 0.9185044559106957, + "grad_norm": 0.029812904074788094, + "learning_rate": 6.5574931518119e-06, + "loss": 0.0014, + "step": 143210 + }, + { + "epoch": 0.9185685928044819, + "grad_norm": 0.05113794654607773, + "learning_rate": 6.556961285825293e-06, + "loss": 0.0018, + "step": 143220 + }, + { + "epoch": 0.918632729698268, + "grad_norm": 0.06058286875486374, + "learning_rate": 6.55642940032885e-06, + "loss": 0.0017, + "step": 143230 + }, + { + "epoch": 0.9186968665920541, + "grad_norm": 0.070572130382061, + "learning_rate": 6.555897495329232e-06, + "loss": 0.0023, + "step": 143240 + }, + { + "epoch": 0.9187610034858402, + "grad_norm": 0.0866912454366684, + "learning_rate": 6.555365570833109e-06, + "loss": 0.0033, + "step": 143250 + }, + { + "epoch": 0.9188251403796263, + "grad_norm": 0.079118512570858, + "learning_rate": 6.5548336268471415e-06, + "loss": 0.0012, + "step": 143260 + }, + { + "epoch": 0.9188892772734124, + "grad_norm": 0.1792449653148651, + "learning_rate": 6.5543016633779985e-06, + "loss": 0.0021, + "step": 143270 + }, + { + "epoch": 0.9189534141671984, + "grad_norm": 0.1889599859714508, + "learning_rate": 6.553769680432345e-06, + "loss": 0.0026, + "step": 143280 + }, + { + "epoch": 0.9190175510609846, + "grad_norm": 0.1904626339673996, + "learning_rate": 6.553237678016847e-06, + "loss": 0.0022, + "step": 143290 + }, + { + "epoch": 0.9190816879547706, + "grad_norm": 0.08636914938688278, + "learning_rate": 6.55270565613817e-06, + "loss": 0.0031, + "step": 143300 + }, + { + "epoch": 0.9191458248485568, + "grad_norm": 0.07031205296516418, + "learning_rate": 6.552173614802982e-06, + "loss": 0.0017, + "step": 143310 + }, + { + "epoch": 0.9192099617423428, + "grad_norm": 0.08173853904008865, + "learning_rate": 6.551641554017949e-06, + "loss": 0.0017, + "step": 143320 + }, + { + "epoch": 0.919274098636129, + "grad_norm": 0.008939284831285477, + "learning_rate": 6.551109473789739e-06, + "loss": 0.0026, + "step": 143330 + }, + { + "epoch": 0.919338235529915, + "grad_norm": 0.14659515023231506, + "learning_rate": 6.550577374125018e-06, + "loss": 0.0021, + "step": 143340 + }, + { + "epoch": 0.9194023724237012, + "grad_norm": 0.0790574699640274, + "learning_rate": 6.5500452550304546e-06, + "loss": 0.0015, + "step": 143350 + }, + { + "epoch": 0.9194665093174872, + "grad_norm": 0.03465283662080765, + "learning_rate": 6.549513116512717e-06, + "loss": 0.0019, + "step": 143360 + }, + { + "epoch": 0.9195306462112733, + "grad_norm": 0.10260052233934402, + "learning_rate": 6.548980958578471e-06, + "loss": 0.0045, + "step": 143370 + }, + { + "epoch": 0.9195947831050595, + "grad_norm": 0.07524410635232925, + "learning_rate": 6.548448781234389e-06, + "loss": 0.0024, + "step": 143380 + }, + { + "epoch": 0.9196589199988455, + "grad_norm": 0.1725485920906067, + "learning_rate": 6.547916584487135e-06, + "loss": 0.0026, + "step": 143390 + }, + { + "epoch": 0.9197230568926317, + "grad_norm": 0.03351062163710594, + "learning_rate": 6.5473843683433815e-06, + "loss": 0.0019, + "step": 143400 + }, + { + "epoch": 0.9197871937864177, + "grad_norm": 0.17824223637580872, + "learning_rate": 6.5468521328097935e-06, + "loss": 0.0052, + "step": 143410 + }, + { + "epoch": 0.9198513306802039, + "grad_norm": 0.46944063901901245, + "learning_rate": 6.546319877893045e-06, + "loss": 0.0031, + "step": 143420 + }, + { + "epoch": 0.9199154675739899, + "grad_norm": 0.1069873496890068, + "learning_rate": 6.545787603599803e-06, + "loss": 0.0015, + "step": 143430 + }, + { + "epoch": 0.919979604467776, + "grad_norm": 0.01805989444255829, + "learning_rate": 6.545255309936736e-06, + "loss": 0.003, + "step": 143440 + }, + { + "epoch": 0.9200437413615621, + "grad_norm": 0.059725284576416016, + "learning_rate": 6.5447229969105166e-06, + "loss": 0.0025, + "step": 143450 + }, + { + "epoch": 0.9201078782553482, + "grad_norm": 0.07746428996324539, + "learning_rate": 6.544190664527814e-06, + "loss": 0.002, + "step": 143460 + }, + { + "epoch": 0.9201720151491343, + "grad_norm": 0.022890908643603325, + "learning_rate": 6.543658312795299e-06, + "loss": 0.0044, + "step": 143470 + }, + { + "epoch": 0.9202361520429204, + "grad_norm": 0.11122360080480576, + "learning_rate": 6.543125941719643e-06, + "loss": 0.0059, + "step": 143480 + }, + { + "epoch": 0.9203002889367065, + "grad_norm": 0.02852690778672695, + "learning_rate": 6.542593551307514e-06, + "loss": 0.0031, + "step": 143490 + }, + { + "epoch": 0.9203644258304926, + "grad_norm": 0.21097946166992188, + "learning_rate": 6.542061141565588e-06, + "loss": 0.0026, + "step": 143500 + }, + { + "epoch": 0.9204285627242788, + "grad_norm": 0.04008246585726738, + "learning_rate": 6.541528712500531e-06, + "loss": 0.002, + "step": 143510 + }, + { + "epoch": 0.9204926996180648, + "grad_norm": 0.2406572848558426, + "learning_rate": 6.54099626411902e-06, + "loss": 0.0026, + "step": 143520 + }, + { + "epoch": 0.9205568365118509, + "grad_norm": 0.21601951122283936, + "learning_rate": 6.540463796427723e-06, + "loss": 0.0016, + "step": 143530 + }, + { + "epoch": 0.920620973405637, + "grad_norm": 0.1107027605175972, + "learning_rate": 6.539931309433313e-06, + "loss": 0.0052, + "step": 143540 + }, + { + "epoch": 0.9206851102994231, + "grad_norm": 0.27741020917892456, + "learning_rate": 6.539398803142465e-06, + "loss": 0.0025, + "step": 143550 + }, + { + "epoch": 0.9207492471932092, + "grad_norm": 0.11819496750831604, + "learning_rate": 6.5388662775618485e-06, + "loss": 0.0011, + "step": 143560 + }, + { + "epoch": 0.9208133840869953, + "grad_norm": 0.10231605172157288, + "learning_rate": 6.538333732698139e-06, + "loss": 0.0022, + "step": 143570 + }, + { + "epoch": 0.9208775209807813, + "grad_norm": 0.2636953890323639, + "learning_rate": 6.537801168558007e-06, + "loss": 0.0036, + "step": 143580 + }, + { + "epoch": 0.9209416578745675, + "grad_norm": 0.2232527881860733, + "learning_rate": 6.537268585148127e-06, + "loss": 0.0029, + "step": 143590 + }, + { + "epoch": 0.9210057947683535, + "grad_norm": 0.1344190239906311, + "learning_rate": 6.536735982475174e-06, + "loss": 0.0017, + "step": 143600 + }, + { + "epoch": 0.9210699316621397, + "grad_norm": 0.08762157708406448, + "learning_rate": 6.536203360545821e-06, + "loss": 0.0019, + "step": 143610 + }, + { + "epoch": 0.9211340685559257, + "grad_norm": 0.14623139798641205, + "learning_rate": 6.535670719366741e-06, + "loss": 0.0033, + "step": 143620 + }, + { + "epoch": 0.9211982054497119, + "grad_norm": 0.04193584993481636, + "learning_rate": 6.535138058944611e-06, + "loss": 0.0028, + "step": 143630 + }, + { + "epoch": 0.9212623423434979, + "grad_norm": 0.1585153192281723, + "learning_rate": 6.534605379286103e-06, + "loss": 0.0092, + "step": 143640 + }, + { + "epoch": 0.9213264792372841, + "grad_norm": 0.2073761224746704, + "learning_rate": 6.534072680397893e-06, + "loss": 0.0019, + "step": 143650 + }, + { + "epoch": 0.9213906161310702, + "grad_norm": 0.11256733536720276, + "learning_rate": 6.533539962286655e-06, + "loss": 0.0023, + "step": 143660 + }, + { + "epoch": 0.9214547530248562, + "grad_norm": 0.03633716329932213, + "learning_rate": 6.533007224959067e-06, + "loss": 0.002, + "step": 143670 + }, + { + "epoch": 0.9215188899186424, + "grad_norm": 0.01778295449912548, + "learning_rate": 6.532474468421801e-06, + "loss": 0.0022, + "step": 143680 + }, + { + "epoch": 0.9215830268124284, + "grad_norm": 0.11002130806446075, + "learning_rate": 6.531941692681535e-06, + "loss": 0.0016, + "step": 143690 + }, + { + "epoch": 0.9216471637062146, + "grad_norm": 0.09549775719642639, + "learning_rate": 6.531408897744946e-06, + "loss": 0.0028, + "step": 143700 + }, + { + "epoch": 0.9217113006000006, + "grad_norm": 0.10814757645130157, + "learning_rate": 6.530876083618708e-06, + "loss": 0.0021, + "step": 143710 + }, + { + "epoch": 0.9217754374937868, + "grad_norm": 0.12686097621917725, + "learning_rate": 6.530343250309499e-06, + "loss": 0.0035, + "step": 143720 + }, + { + "epoch": 0.9218395743875728, + "grad_norm": 0.22388538718223572, + "learning_rate": 6.5298103978239946e-06, + "loss": 0.003, + "step": 143730 + }, + { + "epoch": 0.921903711281359, + "grad_norm": 0.12337753176689148, + "learning_rate": 6.529277526168873e-06, + "loss": 0.0096, + "step": 143740 + }, + { + "epoch": 0.921967848175145, + "grad_norm": 0.07037145644426346, + "learning_rate": 6.528744635350812e-06, + "loss": 0.0011, + "step": 143750 + }, + { + "epoch": 0.9220319850689311, + "grad_norm": 0.0782921090722084, + "learning_rate": 6.528211725376488e-06, + "loss": 0.0027, + "step": 143760 + }, + { + "epoch": 0.9220961219627172, + "grad_norm": 0.13416391611099243, + "learning_rate": 6.5276787962525775e-06, + "loss": 0.0019, + "step": 143770 + }, + { + "epoch": 0.9221602588565033, + "grad_norm": 0.08078598231077194, + "learning_rate": 6.5271458479857606e-06, + "loss": 0.0026, + "step": 143780 + }, + { + "epoch": 0.9222243957502895, + "grad_norm": 0.06023133918642998, + "learning_rate": 6.526612880582714e-06, + "loss": 0.0019, + "step": 143790 + }, + { + "epoch": 0.9222885326440755, + "grad_norm": 0.06747840344905853, + "learning_rate": 6.526079894050119e-06, + "loss": 0.002, + "step": 143800 + }, + { + "epoch": 0.9223526695378617, + "grad_norm": 0.020860394462943077, + "learning_rate": 6.525546888394651e-06, + "loss": 0.0025, + "step": 143810 + }, + { + "epoch": 0.9224168064316477, + "grad_norm": 0.1317310333251953, + "learning_rate": 6.5250138636229895e-06, + "loss": 0.0021, + "step": 143820 + }, + { + "epoch": 0.9224809433254338, + "grad_norm": 0.17135535180568695, + "learning_rate": 6.5244808197418145e-06, + "loss": 0.0024, + "step": 143830 + }, + { + "epoch": 0.9225450802192199, + "grad_norm": 0.1718551367521286, + "learning_rate": 6.523947756757807e-06, + "loss": 0.0021, + "step": 143840 + }, + { + "epoch": 0.922609217113006, + "grad_norm": 0.17481783032417297, + "learning_rate": 6.523414674677643e-06, + "loss": 0.0028, + "step": 143850 + }, + { + "epoch": 0.9226733540067921, + "grad_norm": 0.06103501841425896, + "learning_rate": 6.522881573508005e-06, + "loss": 0.0031, + "step": 143860 + }, + { + "epoch": 0.9227374909005782, + "grad_norm": 0.14446958899497986, + "learning_rate": 6.522348453255572e-06, + "loss": 0.0025, + "step": 143870 + }, + { + "epoch": 0.9228016277943643, + "grad_norm": 0.17649446427822113, + "learning_rate": 6.521815313927027e-06, + "loss": 0.0018, + "step": 143880 + }, + { + "epoch": 0.9228657646881504, + "grad_norm": 0.1549309492111206, + "learning_rate": 6.521282155529047e-06, + "loss": 0.0034, + "step": 143890 + }, + { + "epoch": 0.9229299015819364, + "grad_norm": 0.08576612919569016, + "learning_rate": 6.5207489780683146e-06, + "loss": 0.0022, + "step": 143900 + }, + { + "epoch": 0.9229940384757226, + "grad_norm": 0.07953940331935883, + "learning_rate": 6.52021578155151e-06, + "loss": 0.0019, + "step": 143910 + }, + { + "epoch": 0.9230581753695086, + "grad_norm": 0.08306507021188736, + "learning_rate": 6.519682565985315e-06, + "loss": 0.0023, + "step": 143920 + }, + { + "epoch": 0.9231223122632948, + "grad_norm": 0.034287504851818085, + "learning_rate": 6.519149331376413e-06, + "loss": 0.0019, + "step": 143930 + }, + { + "epoch": 0.9231864491570809, + "grad_norm": 0.027775052934885025, + "learning_rate": 6.518616077731482e-06, + "loss": 0.0037, + "step": 143940 + }, + { + "epoch": 0.923250586050867, + "grad_norm": 0.08457817882299423, + "learning_rate": 6.518082805057208e-06, + "loss": 0.003, + "step": 143950 + }, + { + "epoch": 0.9233147229446531, + "grad_norm": 0.19895368814468384, + "learning_rate": 6.5175495133602704e-06, + "loss": 0.002, + "step": 143960 + }, + { + "epoch": 0.9233788598384391, + "grad_norm": 0.06647168844938278, + "learning_rate": 6.517016202647354e-06, + "loss": 0.0029, + "step": 143970 + }, + { + "epoch": 0.9234429967322253, + "grad_norm": 0.08054918050765991, + "learning_rate": 6.5164828729251395e-06, + "loss": 0.0024, + "step": 143980 + }, + { + "epoch": 0.9235071336260113, + "grad_norm": 0.05321136489510536, + "learning_rate": 6.515949524200311e-06, + "loss": 0.0025, + "step": 143990 + }, + { + "epoch": 0.9235712705197975, + "grad_norm": 0.35143348574638367, + "learning_rate": 6.515416156479551e-06, + "loss": 0.0032, + "step": 144000 + }, + { + "epoch": 0.9236354074135835, + "grad_norm": 0.048732027411460876, + "learning_rate": 6.514882769769545e-06, + "loss": 0.0019, + "step": 144010 + }, + { + "epoch": 0.9236995443073697, + "grad_norm": 0.056389711797237396, + "learning_rate": 6.514349364076973e-06, + "loss": 0.0023, + "step": 144020 + }, + { + "epoch": 0.9237636812011557, + "grad_norm": 0.03896265849471092, + "learning_rate": 6.513815939408523e-06, + "loss": 0.0018, + "step": 144030 + }, + { + "epoch": 0.9238278180949419, + "grad_norm": 0.15420575439929962, + "learning_rate": 6.513282495770876e-06, + "loss": 0.0032, + "step": 144040 + }, + { + "epoch": 0.9238919549887279, + "grad_norm": 0.14683672785758972, + "learning_rate": 6.5127490331707184e-06, + "loss": 0.0017, + "step": 144050 + }, + { + "epoch": 0.923956091882514, + "grad_norm": 0.024806208908557892, + "learning_rate": 6.512215551614735e-06, + "loss": 0.0032, + "step": 144060 + }, + { + "epoch": 0.9240202287763002, + "grad_norm": 0.11284174770116806, + "learning_rate": 6.511682051109609e-06, + "loss": 0.0029, + "step": 144070 + }, + { + "epoch": 0.9240843656700862, + "grad_norm": 0.04989278316497803, + "learning_rate": 6.5111485316620284e-06, + "loss": 0.0011, + "step": 144080 + }, + { + "epoch": 0.9241485025638724, + "grad_norm": 0.14955037832260132, + "learning_rate": 6.510614993278674e-06, + "loss": 0.0031, + "step": 144090 + }, + { + "epoch": 0.9242126394576584, + "grad_norm": 0.09255171567201614, + "learning_rate": 6.510081435966235e-06, + "loss": 0.0015, + "step": 144100 + }, + { + "epoch": 0.9242767763514446, + "grad_norm": 0.1275353878736496, + "learning_rate": 6.509547859731398e-06, + "loss": 0.0024, + "step": 144110 + }, + { + "epoch": 0.9243409132452306, + "grad_norm": 0.07732610404491425, + "learning_rate": 6.509014264580846e-06, + "loss": 0.0023, + "step": 144120 + }, + { + "epoch": 0.9244050501390167, + "grad_norm": 0.05314570665359497, + "learning_rate": 6.508480650521266e-06, + "loss": 0.0018, + "step": 144130 + }, + { + "epoch": 0.9244691870328028, + "grad_norm": 0.13815511763095856, + "learning_rate": 6.507947017559347e-06, + "loss": 0.0022, + "step": 144140 + }, + { + "epoch": 0.9245333239265889, + "grad_norm": 0.07134740799665451, + "learning_rate": 6.507413365701773e-06, + "loss": 0.0017, + "step": 144150 + }, + { + "epoch": 0.924597460820375, + "grad_norm": 0.08940762281417847, + "learning_rate": 6.506879694955233e-06, + "loss": 0.0031, + "step": 144160 + }, + { + "epoch": 0.9246615977141611, + "grad_norm": 0.14411704242229462, + "learning_rate": 6.506346005326412e-06, + "loss": 0.0037, + "step": 144170 + }, + { + "epoch": 0.9247257346079472, + "grad_norm": 0.08996514976024628, + "learning_rate": 6.505812296822002e-06, + "loss": 0.0025, + "step": 144180 + }, + { + "epoch": 0.9247898715017333, + "grad_norm": 0.11479675769805908, + "learning_rate": 6.505278569448685e-06, + "loss": 0.0079, + "step": 144190 + }, + { + "epoch": 0.9248540083955193, + "grad_norm": 0.14187529683113098, + "learning_rate": 6.504744823213152e-06, + "loss": 0.0031, + "step": 144200 + }, + { + "epoch": 0.9249181452893055, + "grad_norm": 0.1020754873752594, + "learning_rate": 6.504211058122091e-06, + "loss": 0.0021, + "step": 144210 + }, + { + "epoch": 0.9249822821830916, + "grad_norm": 0.07498777657747269, + "learning_rate": 6.503677274182192e-06, + "loss": 0.003, + "step": 144220 + }, + { + "epoch": 0.9250464190768777, + "grad_norm": 0.05531124398112297, + "learning_rate": 6.5031434714001395e-06, + "loss": 0.0027, + "step": 144230 + }, + { + "epoch": 0.9251105559706638, + "grad_norm": 0.15193775296211243, + "learning_rate": 6.502609649782627e-06, + "loss": 0.0019, + "step": 144240 + }, + { + "epoch": 0.9251746928644499, + "grad_norm": 0.26516246795654297, + "learning_rate": 6.502075809336341e-06, + "loss": 0.0025, + "step": 144250 + }, + { + "epoch": 0.925238829758236, + "grad_norm": 0.23687143623828888, + "learning_rate": 6.501541950067971e-06, + "loss": 0.0045, + "step": 144260 + }, + { + "epoch": 0.925302966652022, + "grad_norm": 0.06759777665138245, + "learning_rate": 6.501008071984209e-06, + "loss": 0.0012, + "step": 144270 + }, + { + "epoch": 0.9253671035458082, + "grad_norm": 0.31802356243133545, + "learning_rate": 6.500474175091742e-06, + "loss": 0.0036, + "step": 144280 + }, + { + "epoch": 0.9254312404395942, + "grad_norm": 0.06832915544509888, + "learning_rate": 6.499940259397262e-06, + "loss": 0.002, + "step": 144290 + }, + { + "epoch": 0.9254953773333804, + "grad_norm": 0.04168454185128212, + "learning_rate": 6.4994063249074565e-06, + "loss": 0.0033, + "step": 144300 + }, + { + "epoch": 0.9255595142271664, + "grad_norm": 0.11130115389823914, + "learning_rate": 6.498872371629021e-06, + "loss": 0.0021, + "step": 144310 + }, + { + "epoch": 0.9256236511209526, + "grad_norm": 0.08546898514032364, + "learning_rate": 6.498338399568641e-06, + "loss": 0.0016, + "step": 144320 + }, + { + "epoch": 0.9256877880147386, + "grad_norm": 0.13469122350215912, + "learning_rate": 6.497804408733012e-06, + "loss": 0.0014, + "step": 144330 + }, + { + "epoch": 0.9257519249085248, + "grad_norm": 0.07961229234933853, + "learning_rate": 6.497270399128821e-06, + "loss": 0.0019, + "step": 144340 + }, + { + "epoch": 0.9258160618023108, + "grad_norm": 0.19842271506786346, + "learning_rate": 6.496736370762764e-06, + "loss": 0.0035, + "step": 144350 + }, + { + "epoch": 0.925880198696097, + "grad_norm": 0.347851037979126, + "learning_rate": 6.49620232364153e-06, + "loss": 0.0024, + "step": 144360 + }, + { + "epoch": 0.9259443355898831, + "grad_norm": 0.10434811562299728, + "learning_rate": 6.495668257771811e-06, + "loss": 0.0025, + "step": 144370 + }, + { + "epoch": 0.9260084724836691, + "grad_norm": 0.06839905679225922, + "learning_rate": 6.4951341731603e-06, + "loss": 0.0018, + "step": 144380 + }, + { + "epoch": 0.9260726093774553, + "grad_norm": 0.15542304515838623, + "learning_rate": 6.4946000698136876e-06, + "loss": 0.0025, + "step": 144390 + }, + { + "epoch": 0.9261367462712413, + "grad_norm": 0.03447660058736801, + "learning_rate": 6.494065947738672e-06, + "loss": 0.0029, + "step": 144400 + }, + { + "epoch": 0.9262008831650275, + "grad_norm": 0.1372140794992447, + "learning_rate": 6.49353180694194e-06, + "loss": 0.0021, + "step": 144410 + }, + { + "epoch": 0.9262650200588135, + "grad_norm": 0.10015080869197845, + "learning_rate": 6.492997647430186e-06, + "loss": 0.0041, + "step": 144420 + }, + { + "epoch": 0.9263291569525997, + "grad_norm": 0.19411984086036682, + "learning_rate": 6.492463469210106e-06, + "loss": 0.0023, + "step": 144430 + }, + { + "epoch": 0.9263932938463857, + "grad_norm": 0.028752420097589493, + "learning_rate": 6.491929272288392e-06, + "loss": 0.0025, + "step": 144440 + }, + { + "epoch": 0.9264574307401718, + "grad_norm": 0.08016210049390793, + "learning_rate": 6.491395056671736e-06, + "loss": 0.0031, + "step": 144450 + }, + { + "epoch": 0.9265215676339579, + "grad_norm": 0.25969594717025757, + "learning_rate": 6.490860822366838e-06, + "loss": 0.002, + "step": 144460 + }, + { + "epoch": 0.926585704527744, + "grad_norm": 0.09981193393468857, + "learning_rate": 6.490326569380385e-06, + "loss": 0.0026, + "step": 144470 + }, + { + "epoch": 0.9266498414215301, + "grad_norm": 0.12202033400535583, + "learning_rate": 6.489792297719076e-06, + "loss": 0.0025, + "step": 144480 + }, + { + "epoch": 0.9267139783153162, + "grad_norm": 0.16252553462982178, + "learning_rate": 6.489258007389605e-06, + "loss": 0.0018, + "step": 144490 + }, + { + "epoch": 0.9267781152091024, + "grad_norm": 0.19570359587669373, + "learning_rate": 6.488723698398667e-06, + "loss": 0.0023, + "step": 144500 + }, + { + "epoch": 0.9268422521028884, + "grad_norm": 0.08531318604946136, + "learning_rate": 6.4881893707529566e-06, + "loss": 0.0028, + "step": 144510 + }, + { + "epoch": 0.9269063889966745, + "grad_norm": 0.2799188196659088, + "learning_rate": 6.487655024459171e-06, + "loss": 0.0023, + "step": 144520 + }, + { + "epoch": 0.9269705258904606, + "grad_norm": 0.054953236132860184, + "learning_rate": 6.487120659524002e-06, + "loss": 0.0014, + "step": 144530 + }, + { + "epoch": 0.9270346627842467, + "grad_norm": 0.14100810885429382, + "learning_rate": 6.48658627595415e-06, + "loss": 0.0025, + "step": 144540 + }, + { + "epoch": 0.9270987996780328, + "grad_norm": 0.050611674785614014, + "learning_rate": 6.48605187375631e-06, + "loss": 0.0029, + "step": 144550 + }, + { + "epoch": 0.9271629365718189, + "grad_norm": 0.13280843198299408, + "learning_rate": 6.485517452937177e-06, + "loss": 0.0017, + "step": 144560 + }, + { + "epoch": 0.927227073465605, + "grad_norm": 0.06337030977010727, + "learning_rate": 6.48498301350345e-06, + "loss": 0.0026, + "step": 144570 + }, + { + "epoch": 0.9272912103593911, + "grad_norm": 0.08777425438165665, + "learning_rate": 6.484448555461823e-06, + "loss": 0.0011, + "step": 144580 + }, + { + "epoch": 0.9273553472531771, + "grad_norm": 0.06910335272550583, + "learning_rate": 6.483914078818995e-06, + "loss": 0.002, + "step": 144590 + }, + { + "epoch": 0.9274194841469633, + "grad_norm": 0.09499827027320862, + "learning_rate": 6.483379583581662e-06, + "loss": 0.0031, + "step": 144600 + }, + { + "epoch": 0.9274836210407493, + "grad_norm": 0.05050915479660034, + "learning_rate": 6.482845069756525e-06, + "loss": 0.0014, + "step": 144610 + }, + { + "epoch": 0.9275477579345355, + "grad_norm": 0.23042018711566925, + "learning_rate": 6.482310537350278e-06, + "loss": 0.0028, + "step": 144620 + }, + { + "epoch": 0.9276118948283215, + "grad_norm": 0.15545450150966644, + "learning_rate": 6.481775986369622e-06, + "loss": 0.0019, + "step": 144630 + }, + { + "epoch": 0.9276760317221077, + "grad_norm": 0.15038074553012848, + "learning_rate": 6.481241416821252e-06, + "loss": 0.0019, + "step": 144640 + }, + { + "epoch": 0.9277401686158938, + "grad_norm": 0.04699162021279335, + "learning_rate": 6.4807068287118705e-06, + "loss": 0.0022, + "step": 144650 + }, + { + "epoch": 0.9278043055096798, + "grad_norm": 0.1671448051929474, + "learning_rate": 6.480172222048172e-06, + "loss": 0.0021, + "step": 144660 + }, + { + "epoch": 0.927868442403466, + "grad_norm": 0.06620363146066666, + "learning_rate": 6.4796375968368594e-06, + "loss": 0.0033, + "step": 144670 + }, + { + "epoch": 0.927932579297252, + "grad_norm": 0.2066885381937027, + "learning_rate": 6.479102953084629e-06, + "loss": 0.0034, + "step": 144680 + }, + { + "epoch": 0.9279967161910382, + "grad_norm": 0.0637272372841835, + "learning_rate": 6.478568290798183e-06, + "loss": 0.0018, + "step": 144690 + }, + { + "epoch": 0.9280608530848242, + "grad_norm": 0.09442780911922455, + "learning_rate": 6.4780336099842175e-06, + "loss": 0.0035, + "step": 144700 + }, + { + "epoch": 0.9281249899786104, + "grad_norm": 0.25165337324142456, + "learning_rate": 6.477498910649437e-06, + "loss": 0.002, + "step": 144710 + }, + { + "epoch": 0.9281891268723964, + "grad_norm": 0.3011432886123657, + "learning_rate": 6.4769641928005365e-06, + "loss": 0.0053, + "step": 144720 + }, + { + "epoch": 0.9282532637661826, + "grad_norm": 0.14117687940597534, + "learning_rate": 6.476429456444222e-06, + "loss": 0.0051, + "step": 144730 + }, + { + "epoch": 0.9283174006599686, + "grad_norm": 0.09507281333208084, + "learning_rate": 6.475894701587189e-06, + "loss": 0.0023, + "step": 144740 + }, + { + "epoch": 0.9283815375537547, + "grad_norm": 0.08003487437963486, + "learning_rate": 6.475359928236141e-06, + "loss": 0.0029, + "step": 144750 + }, + { + "epoch": 0.9284456744475408, + "grad_norm": 0.13146911561489105, + "learning_rate": 6.47482513639778e-06, + "loss": 0.0018, + "step": 144760 + }, + { + "epoch": 0.9285098113413269, + "grad_norm": 0.559633731842041, + "learning_rate": 6.4742903260788036e-06, + "loss": 0.0019, + "step": 144770 + }, + { + "epoch": 0.9285739482351131, + "grad_norm": 0.1207059770822525, + "learning_rate": 6.473755497285918e-06, + "loss": 0.0013, + "step": 144780 + }, + { + "epoch": 0.9286380851288991, + "grad_norm": 0.03859927877783775, + "learning_rate": 6.473220650025822e-06, + "loss": 0.002, + "step": 144790 + }, + { + "epoch": 0.9287022220226853, + "grad_norm": 0.18498145043849945, + "learning_rate": 6.472685784305218e-06, + "loss": 0.0033, + "step": 144800 + }, + { + "epoch": 0.9287663589164713, + "grad_norm": 0.07351011037826538, + "learning_rate": 6.472150900130809e-06, + "loss": 0.003, + "step": 144810 + }, + { + "epoch": 0.9288304958102575, + "grad_norm": 0.08685048669576645, + "learning_rate": 6.4716159975092975e-06, + "loss": 0.002, + "step": 144820 + }, + { + "epoch": 0.9288946327040435, + "grad_norm": 0.16433115303516388, + "learning_rate": 6.471081076447385e-06, + "loss": 0.0012, + "step": 144830 + }, + { + "epoch": 0.9289587695978296, + "grad_norm": 0.14450232684612274, + "learning_rate": 6.470546136951776e-06, + "loss": 0.0029, + "step": 144840 + }, + { + "epoch": 0.9290229064916157, + "grad_norm": 0.05751143768429756, + "learning_rate": 6.470011179029172e-06, + "loss": 0.0011, + "step": 144850 + }, + { + "epoch": 0.9290870433854018, + "grad_norm": 0.0785200223326683, + "learning_rate": 6.4694762026862774e-06, + "loss": 0.0015, + "step": 144860 + }, + { + "epoch": 0.9291511802791879, + "grad_norm": 0.09653989970684052, + "learning_rate": 6.468941207929797e-06, + "loss": 0.0026, + "step": 144870 + }, + { + "epoch": 0.929215317172974, + "grad_norm": 0.12254025787115097, + "learning_rate": 6.468406194766433e-06, + "loss": 0.0021, + "step": 144880 + }, + { + "epoch": 0.92927945406676, + "grad_norm": 0.11895779520273209, + "learning_rate": 6.467871163202888e-06, + "loss": 0.0014, + "step": 144890 + }, + { + "epoch": 0.9293435909605462, + "grad_norm": 0.17474906146526337, + "learning_rate": 6.46733611324587e-06, + "loss": 0.0019, + "step": 144900 + }, + { + "epoch": 0.9294077278543322, + "grad_norm": 0.02767779491841793, + "learning_rate": 6.466801044902081e-06, + "loss": 0.0012, + "step": 144910 + }, + { + "epoch": 0.9294718647481184, + "grad_norm": 0.19321908056735992, + "learning_rate": 6.466265958178227e-06, + "loss": 0.0023, + "step": 144920 + }, + { + "epoch": 0.9295360016419045, + "grad_norm": 0.21164913475513458, + "learning_rate": 6.465730853081013e-06, + "loss": 0.0042, + "step": 144930 + }, + { + "epoch": 0.9296001385356906, + "grad_norm": 0.192432701587677, + "learning_rate": 6.465195729617144e-06, + "loss": 0.002, + "step": 144940 + }, + { + "epoch": 0.9296642754294767, + "grad_norm": 0.09866433590650558, + "learning_rate": 6.4646605877933255e-06, + "loss": 0.0014, + "step": 144950 + }, + { + "epoch": 0.9297284123232628, + "grad_norm": 0.15407894551753998, + "learning_rate": 6.464125427616261e-06, + "loss": 0.0017, + "step": 144960 + }, + { + "epoch": 0.9297925492170489, + "grad_norm": 0.05903768539428711, + "learning_rate": 6.46359024909266e-06, + "loss": 0.0026, + "step": 144970 + }, + { + "epoch": 0.9298566861108349, + "grad_norm": 0.1296510547399521, + "learning_rate": 6.463055052229227e-06, + "loss": 0.0027, + "step": 144980 + }, + { + "epoch": 0.9299208230046211, + "grad_norm": 0.22783802449703217, + "learning_rate": 6.4625198370326695e-06, + "loss": 0.0033, + "step": 144990 + }, + { + "epoch": 0.9299849598984071, + "grad_norm": 0.1219167411327362, + "learning_rate": 6.461984603509692e-06, + "loss": 0.0023, + "step": 145000 + }, + { + "epoch": 0.9300490967921933, + "grad_norm": 0.06050629913806915, + "learning_rate": 6.461449351667004e-06, + "loss": 0.0018, + "step": 145010 + }, + { + "epoch": 0.9301132336859793, + "grad_norm": 0.2846671938896179, + "learning_rate": 6.460914081511309e-06, + "loss": 0.0025, + "step": 145020 + }, + { + "epoch": 0.9301773705797655, + "grad_norm": 0.10349977016448975, + "learning_rate": 6.460378793049318e-06, + "loss": 0.0019, + "step": 145030 + }, + { + "epoch": 0.9302415074735515, + "grad_norm": 0.14169473946094513, + "learning_rate": 6.459843486287735e-06, + "loss": 0.002, + "step": 145040 + }, + { + "epoch": 0.9303056443673376, + "grad_norm": 0.2114722728729248, + "learning_rate": 6.459308161233273e-06, + "loss": 0.0018, + "step": 145050 + }, + { + "epoch": 0.9303697812611238, + "grad_norm": 0.13831239938735962, + "learning_rate": 6.458772817892635e-06, + "loss": 0.0019, + "step": 145060 + }, + { + "epoch": 0.9304339181549098, + "grad_norm": 0.1724303513765335, + "learning_rate": 6.458237456272532e-06, + "loss": 0.0024, + "step": 145070 + }, + { + "epoch": 0.930498055048696, + "grad_norm": 0.050732627511024475, + "learning_rate": 6.45770207637967e-06, + "loss": 0.0028, + "step": 145080 + }, + { + "epoch": 0.930562191942482, + "grad_norm": 0.27567073702812195, + "learning_rate": 6.457166678220761e-06, + "loss": 0.0018, + "step": 145090 + }, + { + "epoch": 0.9306263288362682, + "grad_norm": 0.01489699725061655, + "learning_rate": 6.4566312618025094e-06, + "loss": 0.0019, + "step": 145100 + }, + { + "epoch": 0.9306904657300542, + "grad_norm": 0.04095577076077461, + "learning_rate": 6.456095827131629e-06, + "loss": 0.0019, + "step": 145110 + }, + { + "epoch": 0.9307546026238404, + "grad_norm": 0.18893811106681824, + "learning_rate": 6.455560374214826e-06, + "loss": 0.0035, + "step": 145120 + }, + { + "epoch": 0.9308187395176264, + "grad_norm": 0.1257791519165039, + "learning_rate": 6.455024903058813e-06, + "loss": 0.0019, + "step": 145130 + }, + { + "epoch": 0.9308828764114125, + "grad_norm": 0.05663027986884117, + "learning_rate": 6.454489413670297e-06, + "loss": 0.0017, + "step": 145140 + }, + { + "epoch": 0.9309470133051986, + "grad_norm": 0.10859649628400803, + "learning_rate": 6.4539539060559896e-06, + "loss": 0.0017, + "step": 145150 + }, + { + "epoch": 0.9310111501989847, + "grad_norm": 0.04232072830200195, + "learning_rate": 6.4534183802226e-06, + "loss": 0.0026, + "step": 145160 + }, + { + "epoch": 0.9310752870927708, + "grad_norm": 0.07066509127616882, + "learning_rate": 6.452882836176839e-06, + "loss": 0.0019, + "step": 145170 + }, + { + "epoch": 0.9311394239865569, + "grad_norm": 0.267238587141037, + "learning_rate": 6.45234727392542e-06, + "loss": 0.0023, + "step": 145180 + }, + { + "epoch": 0.931203560880343, + "grad_norm": 0.264752596616745, + "learning_rate": 6.45181169347505e-06, + "loss": 0.0022, + "step": 145190 + }, + { + "epoch": 0.9312676977741291, + "grad_norm": 0.09979977458715439, + "learning_rate": 6.451276094832441e-06, + "loss": 0.0019, + "step": 145200 + }, + { + "epoch": 0.9313318346679152, + "grad_norm": 0.02484172396361828, + "learning_rate": 6.450740478004307e-06, + "loss": 0.0016, + "step": 145210 + }, + { + "epoch": 0.9313959715617013, + "grad_norm": 0.13323238492012024, + "learning_rate": 6.450204842997358e-06, + "loss": 0.0021, + "step": 145220 + }, + { + "epoch": 0.9314601084554874, + "grad_norm": 0.12850530445575714, + "learning_rate": 6.449669189818304e-06, + "loss": 0.0015, + "step": 145230 + }, + { + "epoch": 0.9315242453492735, + "grad_norm": 0.0978880450129509, + "learning_rate": 6.44913351847386e-06, + "loss": 0.0012, + "step": 145240 + }, + { + "epoch": 0.9315883822430596, + "grad_norm": 0.05493517220020294, + "learning_rate": 6.448597828970738e-06, + "loss": 0.0021, + "step": 145250 + }, + { + "epoch": 0.9316525191368457, + "grad_norm": 0.057626523077487946, + "learning_rate": 6.448062121315648e-06, + "loss": 0.0016, + "step": 145260 + }, + { + "epoch": 0.9317166560306318, + "grad_norm": 0.19279751181602478, + "learning_rate": 6.447526395515307e-06, + "loss": 0.0026, + "step": 145270 + }, + { + "epoch": 0.9317807929244178, + "grad_norm": 0.08471494168043137, + "learning_rate": 6.446990651576425e-06, + "loss": 0.0018, + "step": 145280 + }, + { + "epoch": 0.931844929818204, + "grad_norm": 0.16393303871154785, + "learning_rate": 6.446454889505715e-06, + "loss": 0.0031, + "step": 145290 + }, + { + "epoch": 0.93190906671199, + "grad_norm": 0.15938109159469604, + "learning_rate": 6.445919109309893e-06, + "loss": 0.0021, + "step": 145300 + }, + { + "epoch": 0.9319732036057762, + "grad_norm": 0.08110189437866211, + "learning_rate": 6.445383310995671e-06, + "loss": 0.0022, + "step": 145310 + }, + { + "epoch": 0.9320373404995622, + "grad_norm": 0.028752269223332405, + "learning_rate": 6.444847494569761e-06, + "loss": 0.0018, + "step": 145320 + }, + { + "epoch": 0.9321014773933484, + "grad_norm": 0.45185551047325134, + "learning_rate": 6.444311660038882e-06, + "loss": 0.0033, + "step": 145330 + }, + { + "epoch": 0.9321656142871345, + "grad_norm": 0.07926983386278152, + "learning_rate": 6.443775807409745e-06, + "loss": 0.0014, + "step": 145340 + }, + { + "epoch": 0.9322297511809206, + "grad_norm": 0.07370294630527496, + "learning_rate": 6.443239936689064e-06, + "loss": 0.0017, + "step": 145350 + }, + { + "epoch": 0.9322938880747067, + "grad_norm": 0.2275826334953308, + "learning_rate": 6.442704047883555e-06, + "loss": 0.0018, + "step": 145360 + }, + { + "epoch": 0.9323580249684927, + "grad_norm": 0.05165311321616173, + "learning_rate": 6.442168140999935e-06, + "loss": 0.0016, + "step": 145370 + }, + { + "epoch": 0.9324221618622789, + "grad_norm": 0.10057036578655243, + "learning_rate": 6.441632216044915e-06, + "loss": 0.0024, + "step": 145380 + }, + { + "epoch": 0.9324862987560649, + "grad_norm": 0.10764387249946594, + "learning_rate": 6.441096273025216e-06, + "loss": 0.0012, + "step": 145390 + }, + { + "epoch": 0.9325504356498511, + "grad_norm": 0.05163717269897461, + "learning_rate": 6.440560311947549e-06, + "loss": 0.0042, + "step": 145400 + }, + { + "epoch": 0.9326145725436371, + "grad_norm": 0.04539763927459717, + "learning_rate": 6.440024332818633e-06, + "loss": 0.0014, + "step": 145410 + }, + { + "epoch": 0.9326787094374233, + "grad_norm": 0.1427016705274582, + "learning_rate": 6.439488335645181e-06, + "loss": 0.0014, + "step": 145420 + }, + { + "epoch": 0.9327428463312093, + "grad_norm": 0.2075621485710144, + "learning_rate": 6.438952320433913e-06, + "loss": 0.0024, + "step": 145430 + }, + { + "epoch": 0.9328069832249954, + "grad_norm": 0.20911450684070587, + "learning_rate": 6.4384162871915425e-06, + "loss": 0.002, + "step": 145440 + }, + { + "epoch": 0.9328711201187815, + "grad_norm": 0.09059228003025055, + "learning_rate": 6.437880235924788e-06, + "loss": 0.0034, + "step": 145450 + }, + { + "epoch": 0.9329352570125676, + "grad_norm": 0.07383474707603455, + "learning_rate": 6.437344166640369e-06, + "loss": 0.0026, + "step": 145460 + }, + { + "epoch": 0.9329993939063537, + "grad_norm": 0.0879674181342125, + "learning_rate": 6.436808079344998e-06, + "loss": 0.0034, + "step": 145470 + }, + { + "epoch": 0.9330635308001398, + "grad_norm": 0.1926436424255371, + "learning_rate": 6.436271974045396e-06, + "loss": 0.0037, + "step": 145480 + }, + { + "epoch": 0.933127667693926, + "grad_norm": 0.06891998648643494, + "learning_rate": 6.435735850748279e-06, + "loss": 0.0022, + "step": 145490 + }, + { + "epoch": 0.933191804587712, + "grad_norm": 0.11858411133289337, + "learning_rate": 6.435199709460366e-06, + "loss": 0.0038, + "step": 145500 + }, + { + "epoch": 0.9332559414814982, + "grad_norm": 0.08976802974939346, + "learning_rate": 6.434663550188375e-06, + "loss": 0.0024, + "step": 145510 + }, + { + "epoch": 0.9333200783752842, + "grad_norm": 0.11608156561851501, + "learning_rate": 6.434127372939024e-06, + "loss": 0.0023, + "step": 145520 + }, + { + "epoch": 0.9333842152690703, + "grad_norm": 0.0822378545999527, + "learning_rate": 6.433591177719032e-06, + "loss": 0.0024, + "step": 145530 + }, + { + "epoch": 0.9334483521628564, + "grad_norm": 0.16821332275867462, + "learning_rate": 6.433054964535119e-06, + "loss": 0.0026, + "step": 145540 + }, + { + "epoch": 0.9335124890566425, + "grad_norm": 0.04819389805197716, + "learning_rate": 6.432518733394002e-06, + "loss": 0.0037, + "step": 145550 + }, + { + "epoch": 0.9335766259504286, + "grad_norm": 0.006087715271860361, + "learning_rate": 6.431982484302402e-06, + "loss": 0.0024, + "step": 145560 + }, + { + "epoch": 0.9336407628442147, + "grad_norm": 0.08751381188631058, + "learning_rate": 6.4314462172670375e-06, + "loss": 0.0018, + "step": 145570 + }, + { + "epoch": 0.9337048997380007, + "grad_norm": 0.060294367372989655, + "learning_rate": 6.43090993229463e-06, + "loss": 0.0021, + "step": 145580 + }, + { + "epoch": 0.9337690366317869, + "grad_norm": 0.06259045004844666, + "learning_rate": 6.430373629391897e-06, + "loss": 0.0028, + "step": 145590 + }, + { + "epoch": 0.9338331735255729, + "grad_norm": 0.16633011400699615, + "learning_rate": 6.4298373085655606e-06, + "loss": 0.0025, + "step": 145600 + }, + { + "epoch": 0.9338973104193591, + "grad_norm": 0.31002166867256165, + "learning_rate": 6.429300969822341e-06, + "loss": 0.0018, + "step": 145610 + }, + { + "epoch": 0.9339614473131452, + "grad_norm": 0.09039387851953506, + "learning_rate": 6.428764613168958e-06, + "loss": 0.0017, + "step": 145620 + }, + { + "epoch": 0.9340255842069313, + "grad_norm": 0.11676201969385147, + "learning_rate": 6.428228238612135e-06, + "loss": 0.0027, + "step": 145630 + }, + { + "epoch": 0.9340897211007174, + "grad_norm": 0.18348877131938934, + "learning_rate": 6.4276918461585906e-06, + "loss": 0.0013, + "step": 145640 + }, + { + "epoch": 0.9341538579945035, + "grad_norm": 0.1378244161605835, + "learning_rate": 6.427155435815047e-06, + "loss": 0.0038, + "step": 145650 + }, + { + "epoch": 0.9342179948882896, + "grad_norm": 0.027310635894536972, + "learning_rate": 6.426619007588225e-06, + "loss": 0.0014, + "step": 145660 + }, + { + "epoch": 0.9342821317820756, + "grad_norm": 0.10352227091789246, + "learning_rate": 6.426082561484848e-06, + "loss": 0.0022, + "step": 145670 + }, + { + "epoch": 0.9343462686758618, + "grad_norm": 0.11045675724744797, + "learning_rate": 6.425546097511637e-06, + "loss": 0.0018, + "step": 145680 + }, + { + "epoch": 0.9344104055696478, + "grad_norm": 0.055172890424728394, + "learning_rate": 6.425009615675316e-06, + "loss": 0.0016, + "step": 145690 + }, + { + "epoch": 0.934474542463434, + "grad_norm": 0.007064759731292725, + "learning_rate": 6.424473115982603e-06, + "loss": 0.0016, + "step": 145700 + }, + { + "epoch": 0.93453867935722, + "grad_norm": 0.18204259872436523, + "learning_rate": 6.423936598440228e-06, + "loss": 0.0035, + "step": 145710 + }, + { + "epoch": 0.9346028162510062, + "grad_norm": 0.07678377628326416, + "learning_rate": 6.4234000630549065e-06, + "loss": 0.0014, + "step": 145720 + }, + { + "epoch": 0.9346669531447922, + "grad_norm": 0.1188676655292511, + "learning_rate": 6.422863509833366e-06, + "loss": 0.0014, + "step": 145730 + }, + { + "epoch": 0.9347310900385783, + "grad_norm": 0.16479258239269257, + "learning_rate": 6.422326938782328e-06, + "loss": 0.0017, + "step": 145740 + }, + { + "epoch": 0.9347952269323644, + "grad_norm": 0.09554725140333176, + "learning_rate": 6.421790349908518e-06, + "loss": 0.0026, + "step": 145750 + }, + { + "epoch": 0.9348593638261505, + "grad_norm": 0.16004672646522522, + "learning_rate": 6.421253743218658e-06, + "loss": 0.0016, + "step": 145760 + }, + { + "epoch": 0.9349235007199367, + "grad_norm": 0.0543198436498642, + "learning_rate": 6.420717118719473e-06, + "loss": 0.0055, + "step": 145770 + }, + { + "epoch": 0.9349876376137227, + "grad_norm": 0.08797811716794968, + "learning_rate": 6.420180476417688e-06, + "loss": 0.0014, + "step": 145780 + }, + { + "epoch": 0.9350517745075089, + "grad_norm": 0.005532593000680208, + "learning_rate": 6.419643816320026e-06, + "loss": 0.002, + "step": 145790 + }, + { + "epoch": 0.9351159114012949, + "grad_norm": 0.14849628508090973, + "learning_rate": 6.419107138433211e-06, + "loss": 0.0023, + "step": 145800 + }, + { + "epoch": 0.9351800482950811, + "grad_norm": 0.07060623168945312, + "learning_rate": 6.41857044276397e-06, + "loss": 0.0018, + "step": 145810 + }, + { + "epoch": 0.9352441851888671, + "grad_norm": 0.031063968315720558, + "learning_rate": 6.41803372931903e-06, + "loss": 0.0018, + "step": 145820 + }, + { + "epoch": 0.9353083220826532, + "grad_norm": 0.13810567557811737, + "learning_rate": 6.417496998105112e-06, + "loss": 0.0023, + "step": 145830 + }, + { + "epoch": 0.9353724589764393, + "grad_norm": 0.17485027015209198, + "learning_rate": 6.416960249128943e-06, + "loss": 0.002, + "step": 145840 + }, + { + "epoch": 0.9354365958702254, + "grad_norm": 0.14516933262348175, + "learning_rate": 6.41642348239725e-06, + "loss": 0.0024, + "step": 145850 + }, + { + "epoch": 0.9355007327640115, + "grad_norm": 0.07958931475877762, + "learning_rate": 6.415886697916759e-06, + "loss": 0.0033, + "step": 145860 + }, + { + "epoch": 0.9355648696577976, + "grad_norm": 0.03135542571544647, + "learning_rate": 6.415349895694195e-06, + "loss": 0.0021, + "step": 145870 + }, + { + "epoch": 0.9356290065515837, + "grad_norm": 0.11623740941286087, + "learning_rate": 6.414813075736286e-06, + "loss": 0.002, + "step": 145880 + }, + { + "epoch": 0.9356931434453698, + "grad_norm": 0.11329323798418045, + "learning_rate": 6.4142762380497565e-06, + "loss": 0.0043, + "step": 145890 + }, + { + "epoch": 0.9357572803391558, + "grad_norm": 0.03688264265656471, + "learning_rate": 6.413739382641338e-06, + "loss": 0.0028, + "step": 145900 + }, + { + "epoch": 0.935821417232942, + "grad_norm": 0.06331092119216919, + "learning_rate": 6.413202509517752e-06, + "loss": 0.0031, + "step": 145910 + }, + { + "epoch": 0.9358855541267281, + "grad_norm": 0.1076519787311554, + "learning_rate": 6.412665618685729e-06, + "loss": 0.003, + "step": 145920 + }, + { + "epoch": 0.9359496910205142, + "grad_norm": 0.06783204525709152, + "learning_rate": 6.412128710151997e-06, + "loss": 0.0017, + "step": 145930 + }, + { + "epoch": 0.9360138279143003, + "grad_norm": 0.12745817005634308, + "learning_rate": 6.411591783923282e-06, + "loss": 0.0046, + "step": 145940 + }, + { + "epoch": 0.9360779648080864, + "grad_norm": 0.26193997263908386, + "learning_rate": 6.411054840006313e-06, + "loss": 0.0052, + "step": 145950 + }, + { + "epoch": 0.9361421017018725, + "grad_norm": 0.07764274626970291, + "learning_rate": 6.410517878407819e-06, + "loss": 0.0014, + "step": 145960 + }, + { + "epoch": 0.9362062385956585, + "grad_norm": 0.06901423633098602, + "learning_rate": 6.409980899134529e-06, + "loss": 0.0017, + "step": 145970 + }, + { + "epoch": 0.9362703754894447, + "grad_norm": 0.039977312088012695, + "learning_rate": 6.409443902193169e-06, + "loss": 0.0025, + "step": 145980 + }, + { + "epoch": 0.9363345123832307, + "grad_norm": 0.14563529193401337, + "learning_rate": 6.40890688759047e-06, + "loss": 0.0029, + "step": 145990 + }, + { + "epoch": 0.9363986492770169, + "grad_norm": 0.18063455820083618, + "learning_rate": 6.408369855333161e-06, + "loss": 0.0017, + "step": 146000 + }, + { + "epoch": 0.9364627861708029, + "grad_norm": 0.04741799831390381, + "learning_rate": 6.407832805427971e-06, + "loss": 0.0018, + "step": 146010 + }, + { + "epoch": 0.9365269230645891, + "grad_norm": 0.02267642319202423, + "learning_rate": 6.407295737881629e-06, + "loss": 0.0014, + "step": 146020 + }, + { + "epoch": 0.9365910599583751, + "grad_norm": 0.014480840414762497, + "learning_rate": 6.406758652700867e-06, + "loss": 0.0023, + "step": 146030 + }, + { + "epoch": 0.9366551968521613, + "grad_norm": 0.08123641461133957, + "learning_rate": 6.406221549892413e-06, + "loss": 0.0035, + "step": 146040 + }, + { + "epoch": 0.9367193337459474, + "grad_norm": 0.08571852743625641, + "learning_rate": 6.405684429463e-06, + "loss": 0.0026, + "step": 146050 + }, + { + "epoch": 0.9367834706397334, + "grad_norm": 0.11626120656728745, + "learning_rate": 6.405147291419352e-06, + "loss": 0.0026, + "step": 146060 + }, + { + "epoch": 0.9368476075335196, + "grad_norm": 0.02037181705236435, + "learning_rate": 6.404610135768208e-06, + "loss": 0.0034, + "step": 146070 + }, + { + "epoch": 0.9369117444273056, + "grad_norm": 0.24351300299167633, + "learning_rate": 6.404072962516293e-06, + "loss": 0.0036, + "step": 146080 + }, + { + "epoch": 0.9369758813210918, + "grad_norm": 0.12650911509990692, + "learning_rate": 6.403535771670342e-06, + "loss": 0.0031, + "step": 146090 + }, + { + "epoch": 0.9370400182148778, + "grad_norm": 0.11917576938867569, + "learning_rate": 6.402998563237084e-06, + "loss": 0.0019, + "step": 146100 + }, + { + "epoch": 0.937104155108664, + "grad_norm": 0.052274610847234726, + "learning_rate": 6.40246133722325e-06, + "loss": 0.0018, + "step": 146110 + }, + { + "epoch": 0.93716829200245, + "grad_norm": 0.14794179797172546, + "learning_rate": 6.401924093635574e-06, + "loss": 0.002, + "step": 146120 + }, + { + "epoch": 0.9372324288962361, + "grad_norm": 0.14314155280590057, + "learning_rate": 6.4013868324807885e-06, + "loss": 0.0018, + "step": 146130 + }, + { + "epoch": 0.9372965657900222, + "grad_norm": 0.13320375978946686, + "learning_rate": 6.400849553765622e-06, + "loss": 0.0035, + "step": 146140 + }, + { + "epoch": 0.9373607026838083, + "grad_norm": 0.019696349278092384, + "learning_rate": 6.400312257496812e-06, + "loss": 0.001, + "step": 146150 + }, + { + "epoch": 0.9374248395775944, + "grad_norm": 0.008471256121993065, + "learning_rate": 6.399774943681088e-06, + "loss": 0.0014, + "step": 146160 + }, + { + "epoch": 0.9374889764713805, + "grad_norm": 0.052590277045965195, + "learning_rate": 6.399237612325182e-06, + "loss": 0.0022, + "step": 146170 + }, + { + "epoch": 0.9375531133651666, + "grad_norm": 0.056718725711107254, + "learning_rate": 6.39870026343583e-06, + "loss": 0.0026, + "step": 146180 + }, + { + "epoch": 0.9376172502589527, + "grad_norm": 0.1991223394870758, + "learning_rate": 6.3981628970197625e-06, + "loss": 0.003, + "step": 146190 + }, + { + "epoch": 0.9376813871527389, + "grad_norm": 0.06018494814634323, + "learning_rate": 6.397625513083717e-06, + "loss": 0.0012, + "step": 146200 + }, + { + "epoch": 0.9377455240465249, + "grad_norm": 0.14340412616729736, + "learning_rate": 6.397088111634423e-06, + "loss": 0.0016, + "step": 146210 + }, + { + "epoch": 0.937809660940311, + "grad_norm": 0.05904007703065872, + "learning_rate": 6.396550692678618e-06, + "loss": 0.0037, + "step": 146220 + }, + { + "epoch": 0.9378737978340971, + "grad_norm": 0.13746248185634613, + "learning_rate": 6.396013256223034e-06, + "loss": 0.0032, + "step": 146230 + }, + { + "epoch": 0.9379379347278832, + "grad_norm": 0.05195393040776253, + "learning_rate": 6.395475802274407e-06, + "loss": 0.0034, + "step": 146240 + }, + { + "epoch": 0.9380020716216693, + "grad_norm": 0.040026769042015076, + "learning_rate": 6.394938330839468e-06, + "loss": 0.0045, + "step": 146250 + }, + { + "epoch": 0.9380662085154554, + "grad_norm": 0.11002197116613388, + "learning_rate": 6.394400841924959e-06, + "loss": 0.0012, + "step": 146260 + }, + { + "epoch": 0.9381303454092415, + "grad_norm": 0.1482343077659607, + "learning_rate": 6.393863335537608e-06, + "loss": 0.0017, + "step": 146270 + }, + { + "epoch": 0.9381944823030276, + "grad_norm": 0.0892859548330307, + "learning_rate": 6.393325811684154e-06, + "loss": 0.0018, + "step": 146280 + }, + { + "epoch": 0.9382586191968136, + "grad_norm": 0.07135730981826782, + "learning_rate": 6.392788270371332e-06, + "loss": 0.0018, + "step": 146290 + }, + { + "epoch": 0.9383227560905998, + "grad_norm": 0.12789849936962128, + "learning_rate": 6.392250711605876e-06, + "loss": 0.0033, + "step": 146300 + }, + { + "epoch": 0.9383868929843858, + "grad_norm": 0.11650566756725311, + "learning_rate": 6.391713135394526e-06, + "loss": 0.0032, + "step": 146310 + }, + { + "epoch": 0.938451029878172, + "grad_norm": 0.04727376624941826, + "learning_rate": 6.391175541744014e-06, + "loss": 0.0014, + "step": 146320 + }, + { + "epoch": 0.9385151667719581, + "grad_norm": 0.03639853373169899, + "learning_rate": 6.39063793066108e-06, + "loss": 0.0013, + "step": 146330 + }, + { + "epoch": 0.9385793036657442, + "grad_norm": 0.07490959763526917, + "learning_rate": 6.390100302152456e-06, + "loss": 0.0024, + "step": 146340 + }, + { + "epoch": 0.9386434405595303, + "grad_norm": 0.05077169090509415, + "learning_rate": 6.3895626562248845e-06, + "loss": 0.0025, + "step": 146350 + }, + { + "epoch": 0.9387075774533163, + "grad_norm": 0.04926026239991188, + "learning_rate": 6.389024992885099e-06, + "loss": 0.0024, + "step": 146360 + }, + { + "epoch": 0.9387717143471025, + "grad_norm": 0.15370850265026093, + "learning_rate": 6.388487312139837e-06, + "loss": 0.0024, + "step": 146370 + }, + { + "epoch": 0.9388358512408885, + "grad_norm": 0.0960734486579895, + "learning_rate": 6.387949613995838e-06, + "loss": 0.0016, + "step": 146380 + }, + { + "epoch": 0.9388999881346747, + "grad_norm": 0.11249937862157822, + "learning_rate": 6.387411898459836e-06, + "loss": 0.0022, + "step": 146390 + }, + { + "epoch": 0.9389641250284607, + "grad_norm": 0.16878405213356018, + "learning_rate": 6.386874165538573e-06, + "loss": 0.003, + "step": 146400 + }, + { + "epoch": 0.9390282619222469, + "grad_norm": 0.21584518253803253, + "learning_rate": 6.386336415238786e-06, + "loss": 0.002, + "step": 146410 + }, + { + "epoch": 0.9390923988160329, + "grad_norm": 0.22585085034370422, + "learning_rate": 6.385798647567212e-06, + "loss": 0.0031, + "step": 146420 + }, + { + "epoch": 0.939156535709819, + "grad_norm": 0.07451896369457245, + "learning_rate": 6.385260862530591e-06, + "loss": 0.0027, + "step": 146430 + }, + { + "epoch": 0.9392206726036051, + "grad_norm": 0.11135633289813995, + "learning_rate": 6.38472306013566e-06, + "loss": 0.0016, + "step": 146440 + }, + { + "epoch": 0.9392848094973912, + "grad_norm": 0.29068341851234436, + "learning_rate": 6.3841852403891604e-06, + "loss": 0.0042, + "step": 146450 + }, + { + "epoch": 0.9393489463911773, + "grad_norm": 0.03432978317141533, + "learning_rate": 6.3836474032978315e-06, + "loss": 0.0014, + "step": 146460 + }, + { + "epoch": 0.9394130832849634, + "grad_norm": 0.0034370392095297575, + "learning_rate": 6.383109548868411e-06, + "loss": 0.0009, + "step": 146470 + }, + { + "epoch": 0.9394772201787496, + "grad_norm": 0.14675849676132202, + "learning_rate": 6.3825716771076386e-06, + "loss": 0.0021, + "step": 146480 + }, + { + "epoch": 0.9395413570725356, + "grad_norm": 0.028965268284082413, + "learning_rate": 6.382033788022255e-06, + "loss": 0.0021, + "step": 146490 + }, + { + "epoch": 0.9396054939663218, + "grad_norm": 0.051502782851457596, + "learning_rate": 6.381495881619001e-06, + "loss": 0.0024, + "step": 146500 + }, + { + "epoch": 0.9396696308601078, + "grad_norm": 0.03157828748226166, + "learning_rate": 6.380957957904615e-06, + "loss": 0.0016, + "step": 146510 + }, + { + "epoch": 0.939733767753894, + "grad_norm": 0.18266567587852478, + "learning_rate": 6.380420016885841e-06, + "loss": 0.0024, + "step": 146520 + }, + { + "epoch": 0.93979790464768, + "grad_norm": 0.16004875302314758, + "learning_rate": 6.379882058569417e-06, + "loss": 0.0034, + "step": 146530 + }, + { + "epoch": 0.9398620415414661, + "grad_norm": 0.17029421031475067, + "learning_rate": 6.379344082962084e-06, + "loss": 0.0029, + "step": 146540 + }, + { + "epoch": 0.9399261784352522, + "grad_norm": 0.13152047991752625, + "learning_rate": 6.378806090070584e-06, + "loss": 0.0015, + "step": 146550 + }, + { + "epoch": 0.9399903153290383, + "grad_norm": 0.07515915483236313, + "learning_rate": 6.3782680799016584e-06, + "loss": 0.0032, + "step": 146560 + }, + { + "epoch": 0.9400544522228244, + "grad_norm": 0.1369394063949585, + "learning_rate": 6.377730052462048e-06, + "loss": 0.0033, + "step": 146570 + }, + { + "epoch": 0.9401185891166105, + "grad_norm": 0.020615696907043457, + "learning_rate": 6.377192007758497e-06, + "loss": 0.0026, + "step": 146580 + }, + { + "epoch": 0.9401827260103965, + "grad_norm": 0.009614650160074234, + "learning_rate": 6.376653945797744e-06, + "loss": 0.0018, + "step": 146590 + }, + { + "epoch": 0.9402468629041827, + "grad_norm": 0.1355360448360443, + "learning_rate": 6.376115866586534e-06, + "loss": 0.0015, + "step": 146600 + }, + { + "epoch": 0.9403109997979688, + "grad_norm": 0.2663950026035309, + "learning_rate": 6.3755777701316095e-06, + "loss": 0.0022, + "step": 146610 + }, + { + "epoch": 0.9403751366917549, + "grad_norm": 0.010578290559351444, + "learning_rate": 6.375039656439712e-06, + "loss": 0.0016, + "step": 146620 + }, + { + "epoch": 0.940439273585541, + "grad_norm": 0.32353025674819946, + "learning_rate": 6.374501525517585e-06, + "loss": 0.002, + "step": 146630 + }, + { + "epoch": 0.9405034104793271, + "grad_norm": 0.06088297441601753, + "learning_rate": 6.373963377371971e-06, + "loss": 0.0029, + "step": 146640 + }, + { + "epoch": 0.9405675473731132, + "grad_norm": 0.1083764135837555, + "learning_rate": 6.373425212009613e-06, + "loss": 0.0023, + "step": 146650 + }, + { + "epoch": 0.9406316842668992, + "grad_norm": 0.0899743065237999, + "learning_rate": 6.372887029437256e-06, + "loss": 0.0023, + "step": 146660 + }, + { + "epoch": 0.9406958211606854, + "grad_norm": 0.11933674663305283, + "learning_rate": 6.372348829661645e-06, + "loss": 0.0032, + "step": 146670 + }, + { + "epoch": 0.9407599580544714, + "grad_norm": 0.044247616082429886, + "learning_rate": 6.371810612689521e-06, + "loss": 0.0035, + "step": 146680 + }, + { + "epoch": 0.9408240949482576, + "grad_norm": 0.06650304794311523, + "learning_rate": 6.37127237852763e-06, + "loss": 0.0022, + "step": 146690 + }, + { + "epoch": 0.9408882318420436, + "grad_norm": 0.2597753703594208, + "learning_rate": 6.3707341271827165e-06, + "loss": 0.0021, + "step": 146700 + }, + { + "epoch": 0.9409523687358298, + "grad_norm": 0.050154637545347214, + "learning_rate": 6.370195858661523e-06, + "loss": 0.0017, + "step": 146710 + }, + { + "epoch": 0.9410165056296158, + "grad_norm": 0.0485159307718277, + "learning_rate": 6.369657572970798e-06, + "loss": 0.0019, + "step": 146720 + }, + { + "epoch": 0.941080642523402, + "grad_norm": 0.06361440569162369, + "learning_rate": 6.369119270117285e-06, + "loss": 0.0017, + "step": 146730 + }, + { + "epoch": 0.941144779417188, + "grad_norm": 0.24076053500175476, + "learning_rate": 6.3685809501077265e-06, + "loss": 0.002, + "step": 146740 + }, + { + "epoch": 0.9412089163109741, + "grad_norm": 0.4782833158969879, + "learning_rate": 6.368042612948872e-06, + "loss": 0.0045, + "step": 146750 + }, + { + "epoch": 0.9412730532047603, + "grad_norm": 0.09444776177406311, + "learning_rate": 6.3675042586474665e-06, + "loss": 0.0023, + "step": 146760 + }, + { + "epoch": 0.9413371900985463, + "grad_norm": 0.11751112341880798, + "learning_rate": 6.366965887210255e-06, + "loss": 0.0016, + "step": 146770 + }, + { + "epoch": 0.9414013269923325, + "grad_norm": 0.1627361923456192, + "learning_rate": 6.366427498643983e-06, + "loss": 0.0014, + "step": 146780 + }, + { + "epoch": 0.9414654638861185, + "grad_norm": 0.050041310489177704, + "learning_rate": 6.3658890929554e-06, + "loss": 0.0029, + "step": 146790 + }, + { + "epoch": 0.9415296007799047, + "grad_norm": 0.040631361305713654, + "learning_rate": 6.365350670151249e-06, + "loss": 0.0029, + "step": 146800 + }, + { + "epoch": 0.9415937376736907, + "grad_norm": 0.10871771723031998, + "learning_rate": 6.364812230238277e-06, + "loss": 0.0017, + "step": 146810 + }, + { + "epoch": 0.9416578745674768, + "grad_norm": 0.09430640190839767, + "learning_rate": 6.364273773223235e-06, + "loss": 0.0039, + "step": 146820 + }, + { + "epoch": 0.9417220114612629, + "grad_norm": 0.0646919384598732, + "learning_rate": 6.3637352991128654e-06, + "loss": 0.0021, + "step": 146830 + }, + { + "epoch": 0.941786148355049, + "grad_norm": 0.27819910645484924, + "learning_rate": 6.363196807913919e-06, + "loss": 0.0032, + "step": 146840 + }, + { + "epoch": 0.9418502852488351, + "grad_norm": 0.29656410217285156, + "learning_rate": 6.362658299633142e-06, + "loss": 0.0032, + "step": 146850 + }, + { + "epoch": 0.9419144221426212, + "grad_norm": 0.15691252052783966, + "learning_rate": 6.362119774277284e-06, + "loss": 0.0021, + "step": 146860 + }, + { + "epoch": 0.9419785590364073, + "grad_norm": 0.02931254915893078, + "learning_rate": 6.36158123185309e-06, + "loss": 0.0026, + "step": 146870 + }, + { + "epoch": 0.9420426959301934, + "grad_norm": 0.3193877637386322, + "learning_rate": 6.361042672367311e-06, + "loss": 0.0019, + "step": 146880 + }, + { + "epoch": 0.9421068328239796, + "grad_norm": 0.10397673398256302, + "learning_rate": 6.360504095826693e-06, + "loss": 0.0013, + "step": 146890 + }, + { + "epoch": 0.9421709697177656, + "grad_norm": 0.10111892223358154, + "learning_rate": 6.359965502237988e-06, + "loss": 0.0025, + "step": 146900 + }, + { + "epoch": 0.9422351066115517, + "grad_norm": 0.07842773199081421, + "learning_rate": 6.3594268916079425e-06, + "loss": 0.0018, + "step": 146910 + }, + { + "epoch": 0.9422992435053378, + "grad_norm": 0.016308825463056564, + "learning_rate": 6.358888263943307e-06, + "loss": 0.0015, + "step": 146920 + }, + { + "epoch": 0.9423633803991239, + "grad_norm": 0.12908826768398285, + "learning_rate": 6.35834961925083e-06, + "loss": 0.0022, + "step": 146930 + }, + { + "epoch": 0.94242751729291, + "grad_norm": 0.058444876223802567, + "learning_rate": 6.357810957537261e-06, + "loss": 0.0016, + "step": 146940 + }, + { + "epoch": 0.9424916541866961, + "grad_norm": 0.20450295507907867, + "learning_rate": 6.357272278809351e-06, + "loss": 0.0025, + "step": 146950 + }, + { + "epoch": 0.9425557910804822, + "grad_norm": 0.0351119339466095, + "learning_rate": 6.3567335830738494e-06, + "loss": 0.0021, + "step": 146960 + }, + { + "epoch": 0.9426199279742683, + "grad_norm": 0.003150160191580653, + "learning_rate": 6.356194870337507e-06, + "loss": 0.0026, + "step": 146970 + }, + { + "epoch": 0.9426840648680543, + "grad_norm": 0.08454978466033936, + "learning_rate": 6.3556561406070724e-06, + "loss": 0.0029, + "step": 146980 + }, + { + "epoch": 0.9427482017618405, + "grad_norm": 0.004040864296257496, + "learning_rate": 6.3551173938892984e-06, + "loss": 0.0012, + "step": 146990 + }, + { + "epoch": 0.9428123386556265, + "grad_norm": 0.08898429572582245, + "learning_rate": 6.3545786301909355e-06, + "loss": 0.0012, + "step": 147000 + }, + { + "epoch": 0.9428764755494127, + "grad_norm": 0.1371237337589264, + "learning_rate": 6.354039849518732e-06, + "loss": 0.002, + "step": 147010 + }, + { + "epoch": 0.9429406124431987, + "grad_norm": 0.07550517469644547, + "learning_rate": 6.353501051879441e-06, + "loss": 0.0018, + "step": 147020 + }, + { + "epoch": 0.9430047493369849, + "grad_norm": 0.137441948056221, + "learning_rate": 6.352962237279818e-06, + "loss": 0.0031, + "step": 147030 + }, + { + "epoch": 0.943068886230771, + "grad_norm": 0.1083943098783493, + "learning_rate": 6.352423405726609e-06, + "loss": 0.0028, + "step": 147040 + }, + { + "epoch": 0.943133023124557, + "grad_norm": 0.056081295013427734, + "learning_rate": 6.3518845572265685e-06, + "loss": 0.0015, + "step": 147050 + }, + { + "epoch": 0.9431971600183432, + "grad_norm": 0.37494710087776184, + "learning_rate": 6.351345691786448e-06, + "loss": 0.0016, + "step": 147060 + }, + { + "epoch": 0.9432612969121292, + "grad_norm": 0.03048224374651909, + "learning_rate": 6.350806809413001e-06, + "loss": 0.0014, + "step": 147070 + }, + { + "epoch": 0.9433254338059154, + "grad_norm": 0.09839826822280884, + "learning_rate": 6.35026791011298e-06, + "loss": 0.0033, + "step": 147080 + }, + { + "epoch": 0.9433895706997014, + "grad_norm": 0.08801209181547165, + "learning_rate": 6.349728993893135e-06, + "loss": 0.003, + "step": 147090 + }, + { + "epoch": 0.9434537075934876, + "grad_norm": 0.030154164880514145, + "learning_rate": 6.349190060760222e-06, + "loss": 0.0021, + "step": 147100 + }, + { + "epoch": 0.9435178444872736, + "grad_norm": 0.14104928076267242, + "learning_rate": 6.348651110720993e-06, + "loss": 0.003, + "step": 147110 + }, + { + "epoch": 0.9435819813810598, + "grad_norm": 0.02902240678668022, + "learning_rate": 6.348112143782203e-06, + "loss": 0.0028, + "step": 147120 + }, + { + "epoch": 0.9436461182748458, + "grad_norm": 0.007148304954171181, + "learning_rate": 6.347573159950603e-06, + "loss": 0.0017, + "step": 147130 + }, + { + "epoch": 0.9437102551686319, + "grad_norm": 0.12616923451423645, + "learning_rate": 6.347034159232948e-06, + "loss": 0.0014, + "step": 147140 + }, + { + "epoch": 0.943774392062418, + "grad_norm": 0.05462987348437309, + "learning_rate": 6.346495141635992e-06, + "loss": 0.0015, + "step": 147150 + }, + { + "epoch": 0.9438385289562041, + "grad_norm": 0.12261845171451569, + "learning_rate": 6.345956107166491e-06, + "loss": 0.0016, + "step": 147160 + }, + { + "epoch": 0.9439026658499903, + "grad_norm": 0.18668530881404877, + "learning_rate": 6.345417055831198e-06, + "loss": 0.0018, + "step": 147170 + }, + { + "epoch": 0.9439668027437763, + "grad_norm": 0.07512176036834717, + "learning_rate": 6.344877987636867e-06, + "loss": 0.0014, + "step": 147180 + }, + { + "epoch": 0.9440309396375625, + "grad_norm": 0.17846761643886566, + "learning_rate": 6.3443389025902535e-06, + "loss": 0.0021, + "step": 147190 + }, + { + "epoch": 0.9440950765313485, + "grad_norm": 0.03234937787055969, + "learning_rate": 6.343799800698114e-06, + "loss": 0.0016, + "step": 147200 + }, + { + "epoch": 0.9441592134251346, + "grad_norm": 0.095039963722229, + "learning_rate": 6.3432606819672006e-06, + "loss": 0.0044, + "step": 147210 + }, + { + "epoch": 0.9442233503189207, + "grad_norm": 0.09685095399618149, + "learning_rate": 6.342721546404272e-06, + "loss": 0.0017, + "step": 147220 + }, + { + "epoch": 0.9442874872127068, + "grad_norm": 0.12833555042743683, + "learning_rate": 6.342182394016083e-06, + "loss": 0.0024, + "step": 147230 + }, + { + "epoch": 0.9443516241064929, + "grad_norm": 0.06482169032096863, + "learning_rate": 6.34164322480939e-06, + "loss": 0.0021, + "step": 147240 + }, + { + "epoch": 0.944415761000279, + "grad_norm": 0.10324189066886902, + "learning_rate": 6.3411040387909464e-06, + "loss": 0.003, + "step": 147250 + }, + { + "epoch": 0.9444798978940651, + "grad_norm": 0.046588193625211716, + "learning_rate": 6.3405648359675136e-06, + "loss": 0.002, + "step": 147260 + }, + { + "epoch": 0.9445440347878512, + "grad_norm": 0.30959293246269226, + "learning_rate": 6.340025616345842e-06, + "loss": 0.0023, + "step": 147270 + }, + { + "epoch": 0.9446081716816372, + "grad_norm": 0.02668837085366249, + "learning_rate": 6.339486379932693e-06, + "loss": 0.001, + "step": 147280 + }, + { + "epoch": 0.9446723085754234, + "grad_norm": 0.187612384557724, + "learning_rate": 6.338947126734823e-06, + "loss": 0.003, + "step": 147290 + }, + { + "epoch": 0.9447364454692094, + "grad_norm": 0.14825215935707092, + "learning_rate": 6.338407856758988e-06, + "loss": 0.0027, + "step": 147300 + }, + { + "epoch": 0.9448005823629956, + "grad_norm": 0.1354726254940033, + "learning_rate": 6.337868570011946e-06, + "loss": 0.0034, + "step": 147310 + }, + { + "epoch": 0.9448647192567817, + "grad_norm": 0.04497535526752472, + "learning_rate": 6.337329266500456e-06, + "loss": 0.0025, + "step": 147320 + }, + { + "epoch": 0.9449288561505678, + "grad_norm": 0.1626058667898178, + "learning_rate": 6.336789946231272e-06, + "loss": 0.0012, + "step": 147330 + }, + { + "epoch": 0.9449929930443539, + "grad_norm": 0.5120843052864075, + "learning_rate": 6.3362506092111565e-06, + "loss": 0.0033, + "step": 147340 + }, + { + "epoch": 0.94505712993814, + "grad_norm": 0.08398311585187912, + "learning_rate": 6.335711255446866e-06, + "loss": 0.0026, + "step": 147350 + }, + { + "epoch": 0.9451212668319261, + "grad_norm": 0.24006327986717224, + "learning_rate": 6.3351718849451575e-06, + "loss": 0.0021, + "step": 147360 + }, + { + "epoch": 0.9451854037257121, + "grad_norm": 0.038649216294288635, + "learning_rate": 6.334632497712792e-06, + "loss": 0.0022, + "step": 147370 + }, + { + "epoch": 0.9452495406194983, + "grad_norm": 0.2920440435409546, + "learning_rate": 6.334093093756527e-06, + "loss": 0.0036, + "step": 147380 + }, + { + "epoch": 0.9453136775132843, + "grad_norm": 0.36874157190322876, + "learning_rate": 6.3335536730831225e-06, + "loss": 0.0045, + "step": 147390 + }, + { + "epoch": 0.9453778144070705, + "grad_norm": 0.13857769966125488, + "learning_rate": 6.333014235699338e-06, + "loss": 0.0016, + "step": 147400 + }, + { + "epoch": 0.9454419513008565, + "grad_norm": 0.11178428679704666, + "learning_rate": 6.332474781611931e-06, + "loss": 0.0021, + "step": 147410 + }, + { + "epoch": 0.9455060881946427, + "grad_norm": 0.349579781293869, + "learning_rate": 6.331935310827664e-06, + "loss": 0.0031, + "step": 147420 + }, + { + "epoch": 0.9455702250884287, + "grad_norm": 0.03589482605457306, + "learning_rate": 6.331395823353295e-06, + "loss": 0.0008, + "step": 147430 + }, + { + "epoch": 0.9456343619822148, + "grad_norm": 0.011520350351929665, + "learning_rate": 6.3308563191955844e-06, + "loss": 0.0012, + "step": 147440 + }, + { + "epoch": 0.9456984988760009, + "grad_norm": 0.33133986592292786, + "learning_rate": 6.330316798361294e-06, + "loss": 0.0037, + "step": 147450 + }, + { + "epoch": 0.945762635769787, + "grad_norm": 0.08037800341844559, + "learning_rate": 6.329777260857181e-06, + "loss": 0.0018, + "step": 147460 + }, + { + "epoch": 0.9458267726635732, + "grad_norm": 0.05772693455219269, + "learning_rate": 6.329237706690011e-06, + "loss": 0.0024, + "step": 147470 + }, + { + "epoch": 0.9458909095573592, + "grad_norm": 0.07055383920669556, + "learning_rate": 6.328698135866542e-06, + "loss": 0.002, + "step": 147480 + }, + { + "epoch": 0.9459550464511454, + "grad_norm": 0.0548737533390522, + "learning_rate": 6.3281585483935355e-06, + "loss": 0.002, + "step": 147490 + }, + { + "epoch": 0.9460191833449314, + "grad_norm": 0.1246550902724266, + "learning_rate": 6.3276189442777534e-06, + "loss": 0.0016, + "step": 147500 + }, + { + "epoch": 0.9460833202387176, + "grad_norm": 0.12380314618349075, + "learning_rate": 6.327079323525956e-06, + "loss": 0.0021, + "step": 147510 + }, + { + "epoch": 0.9461474571325036, + "grad_norm": 0.04626064375042915, + "learning_rate": 6.326539686144908e-06, + "loss": 0.0021, + "step": 147520 + }, + { + "epoch": 0.9462115940262897, + "grad_norm": 0.1103246882557869, + "learning_rate": 6.326000032141368e-06, + "loss": 0.0027, + "step": 147530 + }, + { + "epoch": 0.9462757309200758, + "grad_norm": 0.08117210865020752, + "learning_rate": 6.325460361522102e-06, + "loss": 0.002, + "step": 147540 + }, + { + "epoch": 0.9463398678138619, + "grad_norm": 0.09314190596342087, + "learning_rate": 6.324920674293868e-06, + "loss": 0.0017, + "step": 147550 + }, + { + "epoch": 0.946404004707648, + "grad_norm": 0.03479532524943352, + "learning_rate": 6.324380970463433e-06, + "loss": 0.0023, + "step": 147560 + }, + { + "epoch": 0.9464681416014341, + "grad_norm": 0.12663571536540985, + "learning_rate": 6.3238412500375564e-06, + "loss": 0.0016, + "step": 147570 + }, + { + "epoch": 0.9465322784952201, + "grad_norm": 0.04988948628306389, + "learning_rate": 6.323301513023004e-06, + "loss": 0.004, + "step": 147580 + }, + { + "epoch": 0.9465964153890063, + "grad_norm": 0.010820391587913036, + "learning_rate": 6.322761759426537e-06, + "loss": 0.0021, + "step": 147590 + }, + { + "epoch": 0.9466605522827924, + "grad_norm": 0.10937017947435379, + "learning_rate": 6.322221989254921e-06, + "loss": 0.0019, + "step": 147600 + }, + { + "epoch": 0.9467246891765785, + "grad_norm": 0.08107168227434158, + "learning_rate": 6.321682202514917e-06, + "loss": 0.0025, + "step": 147610 + }, + { + "epoch": 0.9467888260703646, + "grad_norm": 0.03284059092402458, + "learning_rate": 6.321142399213291e-06, + "loss": 0.0027, + "step": 147620 + }, + { + "epoch": 0.9468529629641507, + "grad_norm": 0.04868131875991821, + "learning_rate": 6.320602579356807e-06, + "loss": 0.001, + "step": 147630 + }, + { + "epoch": 0.9469170998579368, + "grad_norm": 0.19157037138938904, + "learning_rate": 6.320062742952229e-06, + "loss": 0.0033, + "step": 147640 + }, + { + "epoch": 0.9469812367517229, + "grad_norm": 0.1776953786611557, + "learning_rate": 6.319522890006319e-06, + "loss": 0.0016, + "step": 147650 + }, + { + "epoch": 0.947045373645509, + "grad_norm": 0.2312333732843399, + "learning_rate": 6.318983020525846e-06, + "loss": 0.0038, + "step": 147660 + }, + { + "epoch": 0.947109510539295, + "grad_norm": 0.036973197013139725, + "learning_rate": 6.318443134517573e-06, + "loss": 0.003, + "step": 147670 + }, + { + "epoch": 0.9471736474330812, + "grad_norm": 0.20453284680843353, + "learning_rate": 6.317903231988266e-06, + "loss": 0.0031, + "step": 147680 + }, + { + "epoch": 0.9472377843268672, + "grad_norm": 0.12957876920700073, + "learning_rate": 6.317363312944689e-06, + "loss": 0.0033, + "step": 147690 + }, + { + "epoch": 0.9473019212206534, + "grad_norm": 0.07207232713699341, + "learning_rate": 6.316823377393608e-06, + "loss": 0.0014, + "step": 147700 + }, + { + "epoch": 0.9473660581144394, + "grad_norm": 0.05391666665673256, + "learning_rate": 6.316283425341789e-06, + "loss": 0.0038, + "step": 147710 + }, + { + "epoch": 0.9474301950082256, + "grad_norm": 0.06373003125190735, + "learning_rate": 6.315743456795997e-06, + "loss": 0.002, + "step": 147720 + }, + { + "epoch": 0.9474943319020116, + "grad_norm": 0.03036843053996563, + "learning_rate": 6.315203471763001e-06, + "loss": 0.003, + "step": 147730 + }, + { + "epoch": 0.9475584687957977, + "grad_norm": 0.041159600019454956, + "learning_rate": 6.314663470249564e-06, + "loss": 0.0029, + "step": 147740 + }, + { + "epoch": 0.9476226056895839, + "grad_norm": 0.04843321442604065, + "learning_rate": 6.314123452262455e-06, + "loss": 0.0021, + "step": 147750 + }, + { + "epoch": 0.9476867425833699, + "grad_norm": 0.15022115409374237, + "learning_rate": 6.31358341780844e-06, + "loss": 0.0019, + "step": 147760 + }, + { + "epoch": 0.9477508794771561, + "grad_norm": 0.009687594138085842, + "learning_rate": 6.313043366894287e-06, + "loss": 0.0024, + "step": 147770 + }, + { + "epoch": 0.9478150163709421, + "grad_norm": 0.08602087944746017, + "learning_rate": 6.31250329952676e-06, + "loss": 0.0022, + "step": 147780 + }, + { + "epoch": 0.9478791532647283, + "grad_norm": 0.24058887362480164, + "learning_rate": 6.31196321571263e-06, + "loss": 0.0055, + "step": 147790 + }, + { + "epoch": 0.9479432901585143, + "grad_norm": 0.04920877516269684, + "learning_rate": 6.311423115458663e-06, + "loss": 0.0022, + "step": 147800 + }, + { + "epoch": 0.9480074270523005, + "grad_norm": 0.10023945569992065, + "learning_rate": 6.310882998771627e-06, + "loss": 0.002, + "step": 147810 + }, + { + "epoch": 0.9480715639460865, + "grad_norm": 0.19308824837207794, + "learning_rate": 6.31034286565829e-06, + "loss": 0.0022, + "step": 147820 + }, + { + "epoch": 0.9481357008398726, + "grad_norm": 0.05049790441989899, + "learning_rate": 6.30980271612542e-06, + "loss": 0.0013, + "step": 147830 + }, + { + "epoch": 0.9481998377336587, + "grad_norm": 0.1057187020778656, + "learning_rate": 6.309262550179787e-06, + "loss": 0.0028, + "step": 147840 + }, + { + "epoch": 0.9482639746274448, + "grad_norm": 0.14519499242305756, + "learning_rate": 6.308722367828158e-06, + "loss": 0.0029, + "step": 147850 + }, + { + "epoch": 0.9483281115212309, + "grad_norm": 0.047008804976940155, + "learning_rate": 6.308182169077301e-06, + "loss": 0.0027, + "step": 147860 + }, + { + "epoch": 0.948392248415017, + "grad_norm": 0.06405046582221985, + "learning_rate": 6.307641953933988e-06, + "loss": 0.002, + "step": 147870 + }, + { + "epoch": 0.9484563853088032, + "grad_norm": 0.06086205318570137, + "learning_rate": 6.307101722404987e-06, + "loss": 0.0009, + "step": 147880 + }, + { + "epoch": 0.9485205222025892, + "grad_norm": 0.26466992497444153, + "learning_rate": 6.306561474497066e-06, + "loss": 0.0038, + "step": 147890 + }, + { + "epoch": 0.9485846590963753, + "grad_norm": 0.045898932963609695, + "learning_rate": 6.306021210216998e-06, + "loss": 0.0018, + "step": 147900 + }, + { + "epoch": 0.9486487959901614, + "grad_norm": 0.0372375026345253, + "learning_rate": 6.305480929571549e-06, + "loss": 0.0025, + "step": 147910 + }, + { + "epoch": 0.9487129328839475, + "grad_norm": 0.009371262043714523, + "learning_rate": 6.304940632567492e-06, + "loss": 0.0012, + "step": 147920 + }, + { + "epoch": 0.9487770697777336, + "grad_norm": 0.19069337844848633, + "learning_rate": 6.304400319211596e-06, + "loss": 0.003, + "step": 147930 + }, + { + "epoch": 0.9488412066715197, + "grad_norm": 0.027903838083148003, + "learning_rate": 6.303859989510631e-06, + "loss": 0.0022, + "step": 147940 + }, + { + "epoch": 0.9489053435653058, + "grad_norm": 0.007853977382183075, + "learning_rate": 6.303319643471368e-06, + "loss": 0.0013, + "step": 147950 + }, + { + "epoch": 0.9489694804590919, + "grad_norm": 0.101915642619133, + "learning_rate": 6.302779281100581e-06, + "loss": 0.0025, + "step": 147960 + }, + { + "epoch": 0.949033617352878, + "grad_norm": 0.26874613761901855, + "learning_rate": 6.302238902405035e-06, + "loss": 0.0069, + "step": 147970 + }, + { + "epoch": 0.9490977542466641, + "grad_norm": 0.23425744473934174, + "learning_rate": 6.301698507391508e-06, + "loss": 0.0038, + "step": 147980 + }, + { + "epoch": 0.9491618911404501, + "grad_norm": 0.08065503090620041, + "learning_rate": 6.301158096066767e-06, + "loss": 0.0014, + "step": 147990 + }, + { + "epoch": 0.9492260280342363, + "grad_norm": 0.010573102161288261, + "learning_rate": 6.300617668437585e-06, + "loss": 0.0026, + "step": 148000 + }, + { + "epoch": 0.9492901649280223, + "grad_norm": 0.166752889752388, + "learning_rate": 6.300077224510734e-06, + "loss": 0.0016, + "step": 148010 + }, + { + "epoch": 0.9493543018218085, + "grad_norm": 0.02941727079451084, + "learning_rate": 6.299536764292987e-06, + "loss": 0.0039, + "step": 148020 + }, + { + "epoch": 0.9494184387155946, + "grad_norm": 0.012016337364912033, + "learning_rate": 6.298996287791116e-06, + "loss": 0.0021, + "step": 148030 + }, + { + "epoch": 0.9494825756093807, + "grad_norm": 0.2681630849838257, + "learning_rate": 6.298455795011892e-06, + "loss": 0.0024, + "step": 148040 + }, + { + "epoch": 0.9495467125031668, + "grad_norm": 0.06486742198467255, + "learning_rate": 6.2979152859620886e-06, + "loss": 0.0027, + "step": 148050 + }, + { + "epoch": 0.9496108493969528, + "grad_norm": 0.2404012829065323, + "learning_rate": 6.297374760648479e-06, + "loss": 0.0018, + "step": 148060 + }, + { + "epoch": 0.949674986290739, + "grad_norm": 0.21434836089611053, + "learning_rate": 6.296834219077837e-06, + "loss": 0.003, + "step": 148070 + }, + { + "epoch": 0.949739123184525, + "grad_norm": 0.17221471667289734, + "learning_rate": 6.2962936612569355e-06, + "loss": 0.0039, + "step": 148080 + }, + { + "epoch": 0.9498032600783112, + "grad_norm": 0.0725397989153862, + "learning_rate": 6.295753087192549e-06, + "loss": 0.0011, + "step": 148090 + }, + { + "epoch": 0.9498673969720972, + "grad_norm": 0.09452808648347855, + "learning_rate": 6.295212496891449e-06, + "loss": 0.0023, + "step": 148100 + }, + { + "epoch": 0.9499315338658834, + "grad_norm": 0.06127987802028656, + "learning_rate": 6.294671890360411e-06, + "loss": 0.0034, + "step": 148110 + }, + { + "epoch": 0.9499956707596694, + "grad_norm": 0.09638182073831558, + "learning_rate": 6.294131267606208e-06, + "loss": 0.002, + "step": 148120 + }, + { + "epoch": 0.9500598076534555, + "grad_norm": 0.0705321803689003, + "learning_rate": 6.293590628635617e-06, + "loss": 0.0031, + "step": 148130 + }, + { + "epoch": 0.9501239445472416, + "grad_norm": 0.1473604440689087, + "learning_rate": 6.2930499734554094e-06, + "loss": 0.0013, + "step": 148140 + }, + { + "epoch": 0.9501880814410277, + "grad_norm": 0.21525196731090546, + "learning_rate": 6.292509302072364e-06, + "loss": 0.005, + "step": 148150 + }, + { + "epoch": 0.9502522183348139, + "grad_norm": 0.03943829610943794, + "learning_rate": 6.29196861449325e-06, + "loss": 0.0018, + "step": 148160 + }, + { + "epoch": 0.9503163552285999, + "grad_norm": 0.2596535086631775, + "learning_rate": 6.291427910724849e-06, + "loss": 0.0011, + "step": 148170 + }, + { + "epoch": 0.9503804921223861, + "grad_norm": 0.1013018786907196, + "learning_rate": 6.290887190773931e-06, + "loss": 0.0029, + "step": 148180 + }, + { + "epoch": 0.9504446290161721, + "grad_norm": 0.5317591428756714, + "learning_rate": 6.2903464546472745e-06, + "loss": 0.0021, + "step": 148190 + }, + { + "epoch": 0.9505087659099583, + "grad_norm": 0.15050390362739563, + "learning_rate": 6.289805702351654e-06, + "loss": 0.0039, + "step": 148200 + }, + { + "epoch": 0.9505729028037443, + "grad_norm": 0.08444549143314362, + "learning_rate": 6.2892649338938474e-06, + "loss": 0.0019, + "step": 148210 + }, + { + "epoch": 0.9506370396975304, + "grad_norm": 0.15187714993953705, + "learning_rate": 6.2887241492806295e-06, + "loss": 0.0028, + "step": 148220 + }, + { + "epoch": 0.9507011765913165, + "grad_norm": 0.04425651952624321, + "learning_rate": 6.288183348518777e-06, + "loss": 0.0016, + "step": 148230 + }, + { + "epoch": 0.9507653134851026, + "grad_norm": 0.20383568108081818, + "learning_rate": 6.287642531615067e-06, + "loss": 0.0014, + "step": 148240 + }, + { + "epoch": 0.9508294503788887, + "grad_norm": 0.19574086368083954, + "learning_rate": 6.287101698576274e-06, + "loss": 0.0028, + "step": 148250 + }, + { + "epoch": 0.9508935872726748, + "grad_norm": 0.03341750428080559, + "learning_rate": 6.28656084940918e-06, + "loss": 0.0011, + "step": 148260 + }, + { + "epoch": 0.9509577241664608, + "grad_norm": 0.17480899393558502, + "learning_rate": 6.286019984120556e-06, + "loss": 0.0022, + "step": 148270 + }, + { + "epoch": 0.951021861060247, + "grad_norm": 0.14411622285842896, + "learning_rate": 6.2854791027171845e-06, + "loss": 0.0015, + "step": 148280 + }, + { + "epoch": 0.951085997954033, + "grad_norm": 0.048610031604766846, + "learning_rate": 6.284938205205839e-06, + "loss": 0.0019, + "step": 148290 + }, + { + "epoch": 0.9511501348478192, + "grad_norm": 0.1088913083076477, + "learning_rate": 6.2843972915933025e-06, + "loss": 0.0037, + "step": 148300 + }, + { + "epoch": 0.9512142717416053, + "grad_norm": 0.020047571510076523, + "learning_rate": 6.283856361886347e-06, + "loss": 0.0016, + "step": 148310 + }, + { + "epoch": 0.9512784086353914, + "grad_norm": 0.1922512650489807, + "learning_rate": 6.283315416091755e-06, + "loss": 0.0017, + "step": 148320 + }, + { + "epoch": 0.9513425455291775, + "grad_norm": 0.2446134388446808, + "learning_rate": 6.2827744542163035e-06, + "loss": 0.0029, + "step": 148330 + }, + { + "epoch": 0.9514066824229636, + "grad_norm": 0.22289535403251648, + "learning_rate": 6.282233476266773e-06, + "loss": 0.0024, + "step": 148340 + }, + { + "epoch": 0.9514708193167497, + "grad_norm": 0.25141197443008423, + "learning_rate": 6.281692482249938e-06, + "loss": 0.0034, + "step": 148350 + }, + { + "epoch": 0.9515349562105357, + "grad_norm": 0.0894373208284378, + "learning_rate": 6.281151472172581e-06, + "loss": 0.0022, + "step": 148360 + }, + { + "epoch": 0.9515990931043219, + "grad_norm": 0.151262566447258, + "learning_rate": 6.2806104460414805e-06, + "loss": 0.0013, + "step": 148370 + }, + { + "epoch": 0.9516632299981079, + "grad_norm": 0.15166862308979034, + "learning_rate": 6.280069403863416e-06, + "loss": 0.0015, + "step": 148380 + }, + { + "epoch": 0.9517273668918941, + "grad_norm": 0.11685135215520859, + "learning_rate": 6.279528345645168e-06, + "loss": 0.0018, + "step": 148390 + }, + { + "epoch": 0.9517915037856801, + "grad_norm": 0.06985295563936234, + "learning_rate": 6.278987271393514e-06, + "loss": 0.0024, + "step": 148400 + }, + { + "epoch": 0.9518556406794663, + "grad_norm": 0.2967289090156555, + "learning_rate": 6.278446181115237e-06, + "loss": 0.0038, + "step": 148410 + }, + { + "epoch": 0.9519197775732523, + "grad_norm": 0.14818093180656433, + "learning_rate": 6.277905074817112e-06, + "loss": 0.0024, + "step": 148420 + }, + { + "epoch": 0.9519839144670385, + "grad_norm": 0.1534287929534912, + "learning_rate": 6.277363952505926e-06, + "loss": 0.0036, + "step": 148430 + }, + { + "epoch": 0.9520480513608246, + "grad_norm": 0.06887183338403702, + "learning_rate": 6.276822814188457e-06, + "loss": 0.0026, + "step": 148440 + }, + { + "epoch": 0.9521121882546106, + "grad_norm": 0.08078733831644058, + "learning_rate": 6.276281659871485e-06, + "loss": 0.0028, + "step": 148450 + }, + { + "epoch": 0.9521763251483968, + "grad_norm": 0.4600955545902252, + "learning_rate": 6.275740489561791e-06, + "loss": 0.0048, + "step": 148460 + }, + { + "epoch": 0.9522404620421828, + "grad_norm": 0.06801516562700272, + "learning_rate": 6.275199303266158e-06, + "loss": 0.0022, + "step": 148470 + }, + { + "epoch": 0.952304598935969, + "grad_norm": 0.10994677245616913, + "learning_rate": 6.274658100991365e-06, + "loss": 0.0013, + "step": 148480 + }, + { + "epoch": 0.952368735829755, + "grad_norm": 0.05557885766029358, + "learning_rate": 6.274116882744197e-06, + "loss": 0.0014, + "step": 148490 + }, + { + "epoch": 0.9524328727235412, + "grad_norm": 0.07244043052196503, + "learning_rate": 6.273575648531433e-06, + "loss": 0.0016, + "step": 148500 + }, + { + "epoch": 0.9524970096173272, + "grad_norm": 0.11104308813810349, + "learning_rate": 6.2730343983598556e-06, + "loss": 0.0039, + "step": 148510 + }, + { + "epoch": 0.9525611465111133, + "grad_norm": 0.1490146517753601, + "learning_rate": 6.272493132236247e-06, + "loss": 0.0014, + "step": 148520 + }, + { + "epoch": 0.9526252834048994, + "grad_norm": 0.09591233730316162, + "learning_rate": 6.2719518501673905e-06, + "loss": 0.0018, + "step": 148530 + }, + { + "epoch": 0.9526894202986855, + "grad_norm": 0.03019757568836212, + "learning_rate": 6.271410552160069e-06, + "loss": 0.0015, + "step": 148540 + }, + { + "epoch": 0.9527535571924716, + "grad_norm": 0.06000197306275368, + "learning_rate": 6.270869238221064e-06, + "loss": 0.0019, + "step": 148550 + }, + { + "epoch": 0.9528176940862577, + "grad_norm": 0.11967896670103073, + "learning_rate": 6.270327908357159e-06, + "loss": 0.0021, + "step": 148560 + }, + { + "epoch": 0.9528818309800438, + "grad_norm": 0.03409275785088539, + "learning_rate": 6.269786562575136e-06, + "loss": 0.0026, + "step": 148570 + }, + { + "epoch": 0.9529459678738299, + "grad_norm": 0.42724716663360596, + "learning_rate": 6.269245200881781e-06, + "loss": 0.0026, + "step": 148580 + }, + { + "epoch": 0.953010104767616, + "grad_norm": 0.046714745461940765, + "learning_rate": 6.268703823283877e-06, + "loss": 0.0024, + "step": 148590 + }, + { + "epoch": 0.9530742416614021, + "grad_norm": 0.028787871822714806, + "learning_rate": 6.268162429788209e-06, + "loss": 0.0013, + "step": 148600 + }, + { + "epoch": 0.9531383785551882, + "grad_norm": 0.10607559978961945, + "learning_rate": 6.267621020401557e-06, + "loss": 0.0025, + "step": 148610 + }, + { + "epoch": 0.9532025154489743, + "grad_norm": 0.2284134030342102, + "learning_rate": 6.2670795951307085e-06, + "loss": 0.0036, + "step": 148620 + }, + { + "epoch": 0.9532666523427604, + "grad_norm": 0.027532169595360756, + "learning_rate": 6.266538153982446e-06, + "loss": 0.0018, + "step": 148630 + }, + { + "epoch": 0.9533307892365465, + "grad_norm": 0.093606136739254, + "learning_rate": 6.265996696963556e-06, + "loss": 0.0035, + "step": 148640 + }, + { + "epoch": 0.9533949261303326, + "grad_norm": 0.10961989313364029, + "learning_rate": 6.265455224080823e-06, + "loss": 0.0068, + "step": 148650 + }, + { + "epoch": 0.9534590630241186, + "grad_norm": 0.0573560893535614, + "learning_rate": 6.264913735341032e-06, + "loss": 0.0024, + "step": 148660 + }, + { + "epoch": 0.9535231999179048, + "grad_norm": 0.05544622987508774, + "learning_rate": 6.264372230750967e-06, + "loss": 0.0027, + "step": 148670 + }, + { + "epoch": 0.9535873368116908, + "grad_norm": 0.028547391295433044, + "learning_rate": 6.2638307103174145e-06, + "loss": 0.0013, + "step": 148680 + }, + { + "epoch": 0.953651473705477, + "grad_norm": 0.06154303252696991, + "learning_rate": 6.26328917404716e-06, + "loss": 0.0015, + "step": 148690 + }, + { + "epoch": 0.953715610599263, + "grad_norm": 0.12723928689956665, + "learning_rate": 6.26274762194699e-06, + "loss": 0.0022, + "step": 148700 + }, + { + "epoch": 0.9537797474930492, + "grad_norm": 0.060413897037506104, + "learning_rate": 6.262206054023688e-06, + "loss": 0.002, + "step": 148710 + }, + { + "epoch": 0.9538438843868352, + "grad_norm": 0.0428997203707695, + "learning_rate": 6.261664470284044e-06, + "loss": 0.0043, + "step": 148720 + }, + { + "epoch": 0.9539080212806214, + "grad_norm": 0.03595926612615585, + "learning_rate": 6.261122870734841e-06, + "loss": 0.0033, + "step": 148730 + }, + { + "epoch": 0.9539721581744075, + "grad_norm": 0.03506409004330635, + "learning_rate": 6.2605812553828675e-06, + "loss": 0.0018, + "step": 148740 + }, + { + "epoch": 0.9540362950681935, + "grad_norm": 0.034021202474832535, + "learning_rate": 6.26003962423491e-06, + "loss": 0.0021, + "step": 148750 + }, + { + "epoch": 0.9541004319619797, + "grad_norm": 0.004513194784522057, + "learning_rate": 6.259497977297756e-06, + "loss": 0.0015, + "step": 148760 + }, + { + "epoch": 0.9541645688557657, + "grad_norm": 0.05352545529603958, + "learning_rate": 6.258956314578193e-06, + "loss": 0.0029, + "step": 148770 + }, + { + "epoch": 0.9542287057495519, + "grad_norm": 0.04517129808664322, + "learning_rate": 6.258414636083005e-06, + "loss": 0.0021, + "step": 148780 + }, + { + "epoch": 0.9542928426433379, + "grad_norm": 0.026336653158068657, + "learning_rate": 6.257872941818984e-06, + "loss": 0.0023, + "step": 148790 + }, + { + "epoch": 0.9543569795371241, + "grad_norm": 0.09534820914268494, + "learning_rate": 6.257331231792915e-06, + "loss": 0.0018, + "step": 148800 + }, + { + "epoch": 0.9544211164309101, + "grad_norm": 0.06527793407440186, + "learning_rate": 6.256789506011588e-06, + "loss": 0.0013, + "step": 148810 + }, + { + "epoch": 0.9544852533246962, + "grad_norm": 0.16163085401058197, + "learning_rate": 6.25624776448179e-06, + "loss": 0.0031, + "step": 148820 + }, + { + "epoch": 0.9545493902184823, + "grad_norm": 0.0022701562847942114, + "learning_rate": 6.25570600721031e-06, + "loss": 0.0012, + "step": 148830 + }, + { + "epoch": 0.9546135271122684, + "grad_norm": 0.4319455325603485, + "learning_rate": 6.255164234203936e-06, + "loss": 0.0023, + "step": 148840 + }, + { + "epoch": 0.9546776640060545, + "grad_norm": 0.159915953874588, + "learning_rate": 6.254622445469458e-06, + "loss": 0.0025, + "step": 148850 + }, + { + "epoch": 0.9547418008998406, + "grad_norm": 0.24642303586006165, + "learning_rate": 6.254080641013662e-06, + "loss": 0.0043, + "step": 148860 + }, + { + "epoch": 0.9548059377936268, + "grad_norm": 0.09090591967105865, + "learning_rate": 6.253538820843341e-06, + "loss": 0.0029, + "step": 148870 + }, + { + "epoch": 0.9548700746874128, + "grad_norm": 0.16093572974205017, + "learning_rate": 6.252996984965283e-06, + "loss": 0.0024, + "step": 148880 + }, + { + "epoch": 0.954934211581199, + "grad_norm": 0.06859154254198074, + "learning_rate": 6.252455133386277e-06, + "loss": 0.0023, + "step": 148890 + }, + { + "epoch": 0.954998348474985, + "grad_norm": 0.09639650583267212, + "learning_rate": 6.251913266113112e-06, + "loss": 0.0029, + "step": 148900 + }, + { + "epoch": 0.9550624853687711, + "grad_norm": 0.09566006064414978, + "learning_rate": 6.25137138315258e-06, + "loss": 0.0028, + "step": 148910 + }, + { + "epoch": 0.9551266222625572, + "grad_norm": 0.08538588881492615, + "learning_rate": 6.250829484511469e-06, + "loss": 0.0026, + "step": 148920 + }, + { + "epoch": 0.9551907591563433, + "grad_norm": 0.08481060713529587, + "learning_rate": 6.2502875701965715e-06, + "loss": 0.0027, + "step": 148930 + }, + { + "epoch": 0.9552548960501294, + "grad_norm": 0.11255362629890442, + "learning_rate": 6.249745640214677e-06, + "loss": 0.0032, + "step": 148940 + }, + { + "epoch": 0.9553190329439155, + "grad_norm": 0.11876288056373596, + "learning_rate": 6.249203694572577e-06, + "loss": 0.0026, + "step": 148950 + }, + { + "epoch": 0.9553831698377016, + "grad_norm": 0.07284615933895111, + "learning_rate": 6.248661733277062e-06, + "loss": 0.0016, + "step": 148960 + }, + { + "epoch": 0.9554473067314877, + "grad_norm": 0.12033560872077942, + "learning_rate": 6.2481197563349215e-06, + "loss": 0.0034, + "step": 148970 + }, + { + "epoch": 0.9555114436252737, + "grad_norm": 0.15866336226463318, + "learning_rate": 6.24757776375295e-06, + "loss": 0.0019, + "step": 148980 + }, + { + "epoch": 0.9555755805190599, + "grad_norm": 0.024077093228697777, + "learning_rate": 6.247035755537937e-06, + "loss": 0.0027, + "step": 148990 + }, + { + "epoch": 0.9556397174128459, + "grad_norm": 0.10702551156282425, + "learning_rate": 6.246493731696676e-06, + "loss": 0.0043, + "step": 149000 + }, + { + "epoch": 0.9557038543066321, + "grad_norm": 0.13556323945522308, + "learning_rate": 6.245951692235955e-06, + "loss": 0.0028, + "step": 149010 + }, + { + "epoch": 0.9557679912004182, + "grad_norm": 0.08915980160236359, + "learning_rate": 6.2454096371625715e-06, + "loss": 0.0035, + "step": 149020 + }, + { + "epoch": 0.9558321280942043, + "grad_norm": 0.08045701682567596, + "learning_rate": 6.244867566483313e-06, + "loss": 0.0016, + "step": 149030 + }, + { + "epoch": 0.9558962649879904, + "grad_norm": 0.03717053681612015, + "learning_rate": 6.244325480204976e-06, + "loss": 0.002, + "step": 149040 + }, + { + "epoch": 0.9559604018817764, + "grad_norm": 0.0456254780292511, + "learning_rate": 6.243783378334349e-06, + "loss": 0.003, + "step": 149050 + }, + { + "epoch": 0.9560245387755626, + "grad_norm": 0.0967431589961052, + "learning_rate": 6.243241260878229e-06, + "loss": 0.0013, + "step": 149060 + }, + { + "epoch": 0.9560886756693486, + "grad_norm": 0.10826282948255539, + "learning_rate": 6.242699127843408e-06, + "loss": 0.002, + "step": 149070 + }, + { + "epoch": 0.9561528125631348, + "grad_norm": 0.04903615638613701, + "learning_rate": 6.242156979236678e-06, + "loss": 0.0014, + "step": 149080 + }, + { + "epoch": 0.9562169494569208, + "grad_norm": 0.0974036231637001, + "learning_rate": 6.241614815064833e-06, + "loss": 0.0015, + "step": 149090 + }, + { + "epoch": 0.956281086350707, + "grad_norm": 0.029251504689455032, + "learning_rate": 6.241072635334669e-06, + "loss": 0.0029, + "step": 149100 + }, + { + "epoch": 0.956345223244493, + "grad_norm": 0.0733976736664772, + "learning_rate": 6.240530440052976e-06, + "loss": 0.0017, + "step": 149110 + }, + { + "epoch": 0.9564093601382792, + "grad_norm": 0.04980620741844177, + "learning_rate": 6.23998822922655e-06, + "loss": 0.0012, + "step": 149120 + }, + { + "epoch": 0.9564734970320652, + "grad_norm": 0.004516866523772478, + "learning_rate": 6.239446002862186e-06, + "loss": 0.0016, + "step": 149130 + }, + { + "epoch": 0.9565376339258513, + "grad_norm": 0.0453466959297657, + "learning_rate": 6.238903760966677e-06, + "loss": 0.0012, + "step": 149140 + }, + { + "epoch": 0.9566017708196375, + "grad_norm": 0.23368796706199646, + "learning_rate": 6.238361503546819e-06, + "loss": 0.0037, + "step": 149150 + }, + { + "epoch": 0.9566659077134235, + "grad_norm": 0.1779107004404068, + "learning_rate": 6.237819230609407e-06, + "loss": 0.0029, + "step": 149160 + }, + { + "epoch": 0.9567300446072097, + "grad_norm": 0.10037069767713547, + "learning_rate": 6.237276942161234e-06, + "loss": 0.001, + "step": 149170 + }, + { + "epoch": 0.9567941815009957, + "grad_norm": 0.1792505979537964, + "learning_rate": 6.2367346382090964e-06, + "loss": 0.002, + "step": 149180 + }, + { + "epoch": 0.9568583183947819, + "grad_norm": 0.04580867290496826, + "learning_rate": 6.23619231875979e-06, + "loss": 0.0018, + "step": 149190 + }, + { + "epoch": 0.9569224552885679, + "grad_norm": 0.02684248611330986, + "learning_rate": 6.2356499838201115e-06, + "loss": 0.0026, + "step": 149200 + }, + { + "epoch": 0.956986592182354, + "grad_norm": 0.004145835526287556, + "learning_rate": 6.235107633396855e-06, + "loss": 0.0012, + "step": 149210 + }, + { + "epoch": 0.9570507290761401, + "grad_norm": 0.06484013795852661, + "learning_rate": 6.234565267496817e-06, + "loss": 0.0023, + "step": 149220 + }, + { + "epoch": 0.9571148659699262, + "grad_norm": 0.12458470463752747, + "learning_rate": 6.234022886126795e-06, + "loss": 0.002, + "step": 149230 + }, + { + "epoch": 0.9571790028637123, + "grad_norm": 0.021018436178565025, + "learning_rate": 6.233480489293583e-06, + "loss": 0.0017, + "step": 149240 + }, + { + "epoch": 0.9572431397574984, + "grad_norm": 0.20648528635501862, + "learning_rate": 6.23293807700398e-06, + "loss": 0.0019, + "step": 149250 + }, + { + "epoch": 0.9573072766512845, + "grad_norm": 0.17190244793891907, + "learning_rate": 6.23239564926478e-06, + "loss": 0.006, + "step": 149260 + }, + { + "epoch": 0.9573714135450706, + "grad_norm": 0.16801367700099945, + "learning_rate": 6.231853206082783e-06, + "loss": 0.0014, + "step": 149270 + }, + { + "epoch": 0.9574355504388566, + "grad_norm": 0.05028393119573593, + "learning_rate": 6.231310747464785e-06, + "loss": 0.0017, + "step": 149280 + }, + { + "epoch": 0.9574996873326428, + "grad_norm": 0.06627675890922546, + "learning_rate": 6.230768273417582e-06, + "loss": 0.002, + "step": 149290 + }, + { + "epoch": 0.9575638242264289, + "grad_norm": 0.17414219677448273, + "learning_rate": 6.230225783947975e-06, + "loss": 0.0035, + "step": 149300 + }, + { + "epoch": 0.957627961120215, + "grad_norm": 0.06667561084032059, + "learning_rate": 6.229683279062758e-06, + "loss": 0.0013, + "step": 149310 + }, + { + "epoch": 0.9576920980140011, + "grad_norm": 0.048860788345336914, + "learning_rate": 6.229140758768732e-06, + "loss": 0.0016, + "step": 149320 + }, + { + "epoch": 0.9577562349077872, + "grad_norm": 0.11049015820026398, + "learning_rate": 6.228598223072692e-06, + "loss": 0.0031, + "step": 149330 + }, + { + "epoch": 0.9578203718015733, + "grad_norm": 0.07773560285568237, + "learning_rate": 6.228055671981441e-06, + "loss": 0.0023, + "step": 149340 + }, + { + "epoch": 0.9578845086953593, + "grad_norm": 0.09300699084997177, + "learning_rate": 6.227513105501773e-06, + "loss": 0.0017, + "step": 149350 + }, + { + "epoch": 0.9579486455891455, + "grad_norm": 0.11326320469379425, + "learning_rate": 6.226970523640489e-06, + "loss": 0.0009, + "step": 149360 + }, + { + "epoch": 0.9580127824829315, + "grad_norm": 0.09000983834266663, + "learning_rate": 6.226427926404387e-06, + "loss": 0.0024, + "step": 149370 + }, + { + "epoch": 0.9580769193767177, + "grad_norm": 0.09515196084976196, + "learning_rate": 6.225885313800267e-06, + "loss": 0.0019, + "step": 149380 + }, + { + "epoch": 0.9581410562705037, + "grad_norm": 0.04229377210140228, + "learning_rate": 6.225342685834927e-06, + "loss": 0.0017, + "step": 149390 + }, + { + "epoch": 0.9582051931642899, + "grad_norm": 0.03817709535360336, + "learning_rate": 6.224800042515169e-06, + "loss": 0.0015, + "step": 149400 + }, + { + "epoch": 0.9582693300580759, + "grad_norm": 0.19991615414619446, + "learning_rate": 6.224257383847789e-06, + "loss": 0.0017, + "step": 149410 + }, + { + "epoch": 0.9583334669518621, + "grad_norm": 0.030445698648691177, + "learning_rate": 6.22371470983959e-06, + "loss": 0.002, + "step": 149420 + }, + { + "epoch": 0.9583976038456482, + "grad_norm": 0.08594322949647903, + "learning_rate": 6.223172020497372e-06, + "loss": 0.0017, + "step": 149430 + }, + { + "epoch": 0.9584617407394342, + "grad_norm": 0.047370102256536484, + "learning_rate": 6.2226293158279324e-06, + "loss": 0.0016, + "step": 149440 + }, + { + "epoch": 0.9585258776332204, + "grad_norm": 0.4207480847835541, + "learning_rate": 6.222086595838076e-06, + "loss": 0.0039, + "step": 149450 + }, + { + "epoch": 0.9585900145270064, + "grad_norm": 0.04545342177152634, + "learning_rate": 6.221543860534599e-06, + "loss": 0.0018, + "step": 149460 + }, + { + "epoch": 0.9586541514207926, + "grad_norm": 0.07949335128068924, + "learning_rate": 6.221001109924306e-06, + "loss": 0.0017, + "step": 149470 + }, + { + "epoch": 0.9587182883145786, + "grad_norm": 0.11357042193412781, + "learning_rate": 6.220458344013995e-06, + "loss": 0.002, + "step": 149480 + }, + { + "epoch": 0.9587824252083648, + "grad_norm": 0.0045299651101231575, + "learning_rate": 6.2199155628104715e-06, + "loss": 0.0012, + "step": 149490 + }, + { + "epoch": 0.9588465621021508, + "grad_norm": 0.037436194717884064, + "learning_rate": 6.219372766320531e-06, + "loss": 0.0014, + "step": 149500 + }, + { + "epoch": 0.958910698995937, + "grad_norm": 0.11348027735948563, + "learning_rate": 6.21882995455098e-06, + "loss": 0.0017, + "step": 149510 + }, + { + "epoch": 0.958974835889723, + "grad_norm": 0.08938112109899521, + "learning_rate": 6.218287127508618e-06, + "loss": 0.0014, + "step": 149520 + }, + { + "epoch": 0.9590389727835091, + "grad_norm": 0.008515220135450363, + "learning_rate": 6.217744285200248e-06, + "loss": 0.0048, + "step": 149530 + }, + { + "epoch": 0.9591031096772952, + "grad_norm": 0.20820820331573486, + "learning_rate": 6.217201427632671e-06, + "loss": 0.0072, + "step": 149540 + }, + { + "epoch": 0.9591672465710813, + "grad_norm": 0.0789012610912323, + "learning_rate": 6.216658554812691e-06, + "loss": 0.0016, + "step": 149550 + }, + { + "epoch": 0.9592313834648674, + "grad_norm": 0.07566607743501663, + "learning_rate": 6.216115666747109e-06, + "loss": 0.0053, + "step": 149560 + }, + { + "epoch": 0.9592955203586535, + "grad_norm": 0.1387602835893631, + "learning_rate": 6.215572763442729e-06, + "loss": 0.0043, + "step": 149570 + }, + { + "epoch": 0.9593596572524397, + "grad_norm": 0.133415088057518, + "learning_rate": 6.215029844906353e-06, + "loss": 0.0022, + "step": 149580 + }, + { + "epoch": 0.9594237941462257, + "grad_norm": 0.1288202852010727, + "learning_rate": 6.214486911144786e-06, + "loss": 0.0025, + "step": 149590 + }, + { + "epoch": 0.9594879310400118, + "grad_norm": 0.1541908234357834, + "learning_rate": 6.2139439621648275e-06, + "loss": 0.0017, + "step": 149600 + }, + { + "epoch": 0.9595520679337979, + "grad_norm": 0.3520669937133789, + "learning_rate": 6.213400997973286e-06, + "loss": 0.003, + "step": 149610 + }, + { + "epoch": 0.959616204827584, + "grad_norm": 0.040001530200242996, + "learning_rate": 6.212858018576962e-06, + "loss": 0.0012, + "step": 149620 + }, + { + "epoch": 0.9596803417213701, + "grad_norm": 0.17043310403823853, + "learning_rate": 6.2123150239826605e-06, + "loss": 0.0016, + "step": 149630 + }, + { + "epoch": 0.9597444786151562, + "grad_norm": 0.11625032871961594, + "learning_rate": 6.211772014197185e-06, + "loss": 0.0037, + "step": 149640 + }, + { + "epoch": 0.9598086155089423, + "grad_norm": 0.14066296815872192, + "learning_rate": 6.211228989227339e-06, + "loss": 0.0032, + "step": 149650 + }, + { + "epoch": 0.9598727524027284, + "grad_norm": 0.10146692395210266, + "learning_rate": 6.2106859490799305e-06, + "loss": 0.002, + "step": 149660 + }, + { + "epoch": 0.9599368892965144, + "grad_norm": 0.09130629897117615, + "learning_rate": 6.21014289376176e-06, + "loss": 0.0018, + "step": 149670 + }, + { + "epoch": 0.9600010261903006, + "grad_norm": 0.10161637514829636, + "learning_rate": 6.209599823279635e-06, + "loss": 0.002, + "step": 149680 + }, + { + "epoch": 0.9600651630840866, + "grad_norm": 0.07758409529924393, + "learning_rate": 6.20905673764036e-06, + "loss": 0.0021, + "step": 149690 + }, + { + "epoch": 0.9601292999778728, + "grad_norm": 0.06893595308065414, + "learning_rate": 6.208513636850739e-06, + "loss": 0.0021, + "step": 149700 + }, + { + "epoch": 0.9601934368716589, + "grad_norm": 0.11017578840255737, + "learning_rate": 6.207970520917579e-06, + "loss": 0.0015, + "step": 149710 + }, + { + "epoch": 0.960257573765445, + "grad_norm": 0.035975776612758636, + "learning_rate": 6.207427389847685e-06, + "loss": 0.0018, + "step": 149720 + }, + { + "epoch": 0.9603217106592311, + "grad_norm": 0.2288781851530075, + "learning_rate": 6.206884243647863e-06, + "loss": 0.0019, + "step": 149730 + }, + { + "epoch": 0.9603858475530171, + "grad_norm": 0.3251713216304779, + "learning_rate": 6.206341082324919e-06, + "loss": 0.0026, + "step": 149740 + }, + { + "epoch": 0.9604499844468033, + "grad_norm": 0.062379252165555954, + "learning_rate": 6.205797905885658e-06, + "loss": 0.0017, + "step": 149750 + }, + { + "epoch": 0.9605141213405893, + "grad_norm": 0.130185067653656, + "learning_rate": 6.205254714336889e-06, + "loss": 0.0017, + "step": 149760 + }, + { + "epoch": 0.9605782582343755, + "grad_norm": 0.04989314824342728, + "learning_rate": 6.204711507685416e-06, + "loss": 0.0035, + "step": 149770 + }, + { + "epoch": 0.9606423951281615, + "grad_norm": 0.09429958462715149, + "learning_rate": 6.204168285938046e-06, + "loss": 0.0027, + "step": 149780 + }, + { + "epoch": 0.9607065320219477, + "grad_norm": 0.13752683997154236, + "learning_rate": 6.20362504910159e-06, + "loss": 0.0022, + "step": 149790 + }, + { + "epoch": 0.9607706689157337, + "grad_norm": 0.05318170785903931, + "learning_rate": 6.203081797182848e-06, + "loss": 0.0009, + "step": 149800 + }, + { + "epoch": 0.9608348058095199, + "grad_norm": 0.0863172858953476, + "learning_rate": 6.2025385301886335e-06, + "loss": 0.0028, + "step": 149810 + }, + { + "epoch": 0.9608989427033059, + "grad_norm": 0.4169495105743408, + "learning_rate": 6.201995248125752e-06, + "loss": 0.0032, + "step": 149820 + }, + { + "epoch": 0.960963079597092, + "grad_norm": 0.08999016135931015, + "learning_rate": 6.201451951001009e-06, + "loss": 0.0033, + "step": 149830 + }, + { + "epoch": 0.9610272164908781, + "grad_norm": 0.09195482730865479, + "learning_rate": 6.200908638821216e-06, + "loss": 0.0019, + "step": 149840 + }, + { + "epoch": 0.9610913533846642, + "grad_norm": 0.09480104595422745, + "learning_rate": 6.20036531159318e-06, + "loss": 0.0011, + "step": 149850 + }, + { + "epoch": 0.9611554902784504, + "grad_norm": 0.19265435636043549, + "learning_rate": 6.199821969323707e-06, + "loss": 0.002, + "step": 149860 + }, + { + "epoch": 0.9612196271722364, + "grad_norm": 0.04201951250433922, + "learning_rate": 6.199278612019609e-06, + "loss": 0.0019, + "step": 149870 + }, + { + "epoch": 0.9612837640660226, + "grad_norm": 0.13683591783046722, + "learning_rate": 6.198735239687692e-06, + "loss": 0.0014, + "step": 149880 + }, + { + "epoch": 0.9613479009598086, + "grad_norm": 0.07254856079816818, + "learning_rate": 6.198191852334766e-06, + "loss": 0.0027, + "step": 149890 + }, + { + "epoch": 0.9614120378535947, + "grad_norm": 0.013781868852674961, + "learning_rate": 6.197648449967639e-06, + "loss": 0.0027, + "step": 149900 + }, + { + "epoch": 0.9614761747473808, + "grad_norm": 0.059696830809116364, + "learning_rate": 6.197105032593121e-06, + "loss": 0.0025, + "step": 149910 + }, + { + "epoch": 0.9615403116411669, + "grad_norm": 0.1655527651309967, + "learning_rate": 6.196561600218023e-06, + "loss": 0.0021, + "step": 149920 + }, + { + "epoch": 0.961604448534953, + "grad_norm": 0.11569607257843018, + "learning_rate": 6.19601815284915e-06, + "loss": 0.0025, + "step": 149930 + }, + { + "epoch": 0.9616685854287391, + "grad_norm": 0.0929485484957695, + "learning_rate": 6.1954746904933184e-06, + "loss": 0.0013, + "step": 149940 + }, + { + "epoch": 0.9617327223225252, + "grad_norm": 0.14032530784606934, + "learning_rate": 6.1949312131573315e-06, + "loss": 0.0033, + "step": 149950 + }, + { + "epoch": 0.9617968592163113, + "grad_norm": 0.02642715536057949, + "learning_rate": 6.194387720848003e-06, + "loss": 0.0023, + "step": 149960 + }, + { + "epoch": 0.9618609961100973, + "grad_norm": 0.15446075797080994, + "learning_rate": 6.193844213572143e-06, + "loss": 0.0025, + "step": 149970 + }, + { + "epoch": 0.9619251330038835, + "grad_norm": 0.16909636557102203, + "learning_rate": 6.193300691336563e-06, + "loss": 0.0025, + "step": 149980 + }, + { + "epoch": 0.9619892698976696, + "grad_norm": 0.12399910390377045, + "learning_rate": 6.192757154148071e-06, + "loss": 0.0042, + "step": 149990 + }, + { + "epoch": 0.9620534067914557, + "grad_norm": 0.1202448159456253, + "learning_rate": 6.192213602013481e-06, + "loss": 0.0015, + "step": 150000 + }, + { + "epoch": 0.9621175436852418, + "grad_norm": 0.20809784531593323, + "learning_rate": 6.191670034939602e-06, + "loss": 0.0021, + "step": 150010 + }, + { + "epoch": 0.9621816805790279, + "grad_norm": 0.03879670053720474, + "learning_rate": 6.191126452933246e-06, + "loss": 0.0022, + "step": 150020 + }, + { + "epoch": 0.962245817472814, + "grad_norm": 0.1990572065114975, + "learning_rate": 6.190582856001222e-06, + "loss": 0.0031, + "step": 150030 + }, + { + "epoch": 0.9623099543666, + "grad_norm": 0.04990717023611069, + "learning_rate": 6.190039244150348e-06, + "loss": 0.0027, + "step": 150040 + }, + { + "epoch": 0.9623740912603862, + "grad_norm": 0.10544680804014206, + "learning_rate": 6.189495617387428e-06, + "loss": 0.0017, + "step": 150050 + }, + { + "epoch": 0.9624382281541722, + "grad_norm": 0.18796586990356445, + "learning_rate": 6.1889519757192795e-06, + "loss": 0.0025, + "step": 150060 + }, + { + "epoch": 0.9625023650479584, + "grad_norm": 0.07660584151744843, + "learning_rate": 6.1884083191527125e-06, + "loss": 0.0015, + "step": 150070 + }, + { + "epoch": 0.9625665019417444, + "grad_norm": 0.10856002569198608, + "learning_rate": 6.187864647694541e-06, + "loss": 0.0016, + "step": 150080 + }, + { + "epoch": 0.9626306388355306, + "grad_norm": 0.10406231135129929, + "learning_rate": 6.187320961351575e-06, + "loss": 0.0043, + "step": 150090 + }, + { + "epoch": 0.9626947757293166, + "grad_norm": 0.06723621487617493, + "learning_rate": 6.18677726013063e-06, + "loss": 0.0022, + "step": 150100 + }, + { + "epoch": 0.9627589126231028, + "grad_norm": 0.13492071628570557, + "learning_rate": 6.186233544038517e-06, + "loss": 0.0018, + "step": 150110 + }, + { + "epoch": 0.9628230495168888, + "grad_norm": 0.09268075972795486, + "learning_rate": 6.18568981308205e-06, + "loss": 0.0026, + "step": 150120 + }, + { + "epoch": 0.962887186410675, + "grad_norm": 0.08964299410581589, + "learning_rate": 6.185146067268042e-06, + "loss": 0.0021, + "step": 150130 + }, + { + "epoch": 0.9629513233044611, + "grad_norm": 0.05227648839354515, + "learning_rate": 6.1846023066033066e-06, + "loss": 0.0056, + "step": 150140 + }, + { + "epoch": 0.9630154601982471, + "grad_norm": 0.0577087476849556, + "learning_rate": 6.184058531094659e-06, + "loss": 0.0016, + "step": 150150 + }, + { + "epoch": 0.9630795970920333, + "grad_norm": 0.04068737104535103, + "learning_rate": 6.183514740748911e-06, + "loss": 0.0021, + "step": 150160 + }, + { + "epoch": 0.9631437339858193, + "grad_norm": 0.1221189871430397, + "learning_rate": 6.182970935572877e-06, + "loss": 0.0017, + "step": 150170 + }, + { + "epoch": 0.9632078708796055, + "grad_norm": 0.11458059400320053, + "learning_rate": 6.1824271155733715e-06, + "loss": 0.0013, + "step": 150180 + }, + { + "epoch": 0.9632720077733915, + "grad_norm": 0.10058407485485077, + "learning_rate": 6.1818832807572106e-06, + "loss": 0.0018, + "step": 150190 + }, + { + "epoch": 0.9633361446671777, + "grad_norm": 0.1607140749692917, + "learning_rate": 6.181339431131205e-06, + "loss": 0.0029, + "step": 150200 + }, + { + "epoch": 0.9634002815609637, + "grad_norm": 0.4767524302005768, + "learning_rate": 6.1807955667021755e-06, + "loss": 0.0028, + "step": 150210 + }, + { + "epoch": 0.9634644184547498, + "grad_norm": 0.10355031490325928, + "learning_rate": 6.180251687476932e-06, + "loss": 0.0014, + "step": 150220 + }, + { + "epoch": 0.9635285553485359, + "grad_norm": 0.19108106195926666, + "learning_rate": 6.179707793462292e-06, + "loss": 0.0019, + "step": 150230 + }, + { + "epoch": 0.963592692242322, + "grad_norm": 0.13633432984352112, + "learning_rate": 6.179163884665068e-06, + "loss": 0.0017, + "step": 150240 + }, + { + "epoch": 0.9636568291361081, + "grad_norm": 0.05463302135467529, + "learning_rate": 6.1786199610920804e-06, + "loss": 0.0022, + "step": 150250 + }, + { + "epoch": 0.9637209660298942, + "grad_norm": 0.03905477002263069, + "learning_rate": 6.17807602275014e-06, + "loss": 0.0012, + "step": 150260 + }, + { + "epoch": 0.9637851029236802, + "grad_norm": 0.05089954286813736, + "learning_rate": 6.177532069646066e-06, + "loss": 0.0025, + "step": 150270 + }, + { + "epoch": 0.9638492398174664, + "grad_norm": 0.05219513922929764, + "learning_rate": 6.176988101786675e-06, + "loss": 0.0011, + "step": 150280 + }, + { + "epoch": 0.9639133767112525, + "grad_norm": 0.1544886827468872, + "learning_rate": 6.176444119178779e-06, + "loss": 0.002, + "step": 150290 + }, + { + "epoch": 0.9639775136050386, + "grad_norm": 0.21321026980876923, + "learning_rate": 6.1759001218292e-06, + "loss": 0.0019, + "step": 150300 + }, + { + "epoch": 0.9640416504988247, + "grad_norm": 0.05800163745880127, + "learning_rate": 6.175356109744751e-06, + "loss": 0.0016, + "step": 150310 + }, + { + "epoch": 0.9641057873926108, + "grad_norm": 0.08184763789176941, + "learning_rate": 6.174812082932251e-06, + "loss": 0.0012, + "step": 150320 + }, + { + "epoch": 0.9641699242863969, + "grad_norm": 0.16461877524852753, + "learning_rate": 6.174268041398514e-06, + "loss": 0.002, + "step": 150330 + }, + { + "epoch": 0.964234061180183, + "grad_norm": 0.10538577288389206, + "learning_rate": 6.17372398515036e-06, + "loss": 0.0011, + "step": 150340 + }, + { + "epoch": 0.9642981980739691, + "grad_norm": 0.07570752501487732, + "learning_rate": 6.173179914194606e-06, + "loss": 0.0018, + "step": 150350 + }, + { + "epoch": 0.9643623349677551, + "grad_norm": 0.04098653048276901, + "learning_rate": 6.172635828538069e-06, + "loss": 0.0018, + "step": 150360 + }, + { + "epoch": 0.9644264718615413, + "grad_norm": 0.10436978936195374, + "learning_rate": 6.172091728187568e-06, + "loss": 0.0037, + "step": 150370 + }, + { + "epoch": 0.9644906087553273, + "grad_norm": 0.059966154396533966, + "learning_rate": 6.17154761314992e-06, + "loss": 0.0034, + "step": 150380 + }, + { + "epoch": 0.9645547456491135, + "grad_norm": 0.14894920587539673, + "learning_rate": 6.171003483431941e-06, + "loss": 0.003, + "step": 150390 + }, + { + "epoch": 0.9646188825428995, + "grad_norm": 0.105308398604393, + "learning_rate": 6.170459339040453e-06, + "loss": 0.0038, + "step": 150400 + }, + { + "epoch": 0.9646830194366857, + "grad_norm": 0.07003585249185562, + "learning_rate": 6.169915179982272e-06, + "loss": 0.0029, + "step": 150410 + }, + { + "epoch": 0.9647471563304718, + "grad_norm": 0.10759350657463074, + "learning_rate": 6.16937100626422e-06, + "loss": 0.0029, + "step": 150420 + }, + { + "epoch": 0.9648112932242578, + "grad_norm": 0.12816260755062103, + "learning_rate": 6.168826817893111e-06, + "loss": 0.0017, + "step": 150430 + }, + { + "epoch": 0.964875430118044, + "grad_norm": 0.08761973679065704, + "learning_rate": 6.168282614875768e-06, + "loss": 0.0011, + "step": 150440 + }, + { + "epoch": 0.96493956701183, + "grad_norm": 0.1492198407649994, + "learning_rate": 6.167738397219008e-06, + "loss": 0.0033, + "step": 150450 + }, + { + "epoch": 0.9650037039056162, + "grad_norm": 0.6813758611679077, + "learning_rate": 6.167194164929652e-06, + "loss": 0.0084, + "step": 150460 + }, + { + "epoch": 0.9650678407994022, + "grad_norm": 0.1323823630809784, + "learning_rate": 6.166649918014518e-06, + "loss": 0.0034, + "step": 150470 + }, + { + "epoch": 0.9651319776931884, + "grad_norm": 0.048957593739032745, + "learning_rate": 6.166105656480426e-06, + "loss": 0.0017, + "step": 150480 + }, + { + "epoch": 0.9651961145869744, + "grad_norm": 0.10954766720533371, + "learning_rate": 6.165561380334199e-06, + "loss": 0.0019, + "step": 150490 + }, + { + "epoch": 0.9652602514807606, + "grad_norm": 0.06525308638811111, + "learning_rate": 6.165017089582654e-06, + "loss": 0.0021, + "step": 150500 + }, + { + "epoch": 0.9653243883745466, + "grad_norm": 0.042362719774246216, + "learning_rate": 6.1644727842326134e-06, + "loss": 0.003, + "step": 150510 + }, + { + "epoch": 0.9653885252683327, + "grad_norm": 0.26810696721076965, + "learning_rate": 6.163928464290895e-06, + "loss": 0.0035, + "step": 150520 + }, + { + "epoch": 0.9654526621621188, + "grad_norm": 0.12526772916316986, + "learning_rate": 6.1633841297643215e-06, + "loss": 0.0024, + "step": 150530 + }, + { + "epoch": 0.9655167990559049, + "grad_norm": 0.04780496284365654, + "learning_rate": 6.162839780659713e-06, + "loss": 0.0018, + "step": 150540 + }, + { + "epoch": 0.965580935949691, + "grad_norm": 0.02072593756020069, + "learning_rate": 6.162295416983892e-06, + "loss": 0.003, + "step": 150550 + }, + { + "epoch": 0.9656450728434771, + "grad_norm": 0.10294642299413681, + "learning_rate": 6.161751038743678e-06, + "loss": 0.002, + "step": 150560 + }, + { + "epoch": 0.9657092097372633, + "grad_norm": 0.09494373947381973, + "learning_rate": 6.161206645945893e-06, + "loss": 0.0022, + "step": 150570 + }, + { + "epoch": 0.9657733466310493, + "grad_norm": 0.18811984360218048, + "learning_rate": 6.160662238597359e-06, + "loss": 0.0021, + "step": 150580 + }, + { + "epoch": 0.9658374835248354, + "grad_norm": 0.08301776647567749, + "learning_rate": 6.160117816704898e-06, + "loss": 0.0019, + "step": 150590 + }, + { + "epoch": 0.9659016204186215, + "grad_norm": 0.21943216025829315, + "learning_rate": 6.159573380275331e-06, + "loss": 0.0018, + "step": 150600 + }, + { + "epoch": 0.9659657573124076, + "grad_norm": 0.0833280086517334, + "learning_rate": 6.1590289293154825e-06, + "loss": 0.002, + "step": 150610 + }, + { + "epoch": 0.9660298942061937, + "grad_norm": 0.6742835640907288, + "learning_rate": 6.1584844638321705e-06, + "loss": 0.0028, + "step": 150620 + }, + { + "epoch": 0.9660940310999798, + "grad_norm": 0.30826130509376526, + "learning_rate": 6.1579399838322216e-06, + "loss": 0.0015, + "step": 150630 + }, + { + "epoch": 0.9661581679937659, + "grad_norm": 0.24817430973052979, + "learning_rate": 6.1573954893224576e-06, + "loss": 0.0016, + "step": 150640 + }, + { + "epoch": 0.966222304887552, + "grad_norm": 0.11512727290391922, + "learning_rate": 6.1568509803097e-06, + "loss": 0.0014, + "step": 150650 + }, + { + "epoch": 0.966286441781338, + "grad_norm": 0.0032293221447616816, + "learning_rate": 6.1563064568007735e-06, + "loss": 0.0035, + "step": 150660 + }, + { + "epoch": 0.9663505786751242, + "grad_norm": 0.06835031509399414, + "learning_rate": 6.155761918802501e-06, + "loss": 0.0024, + "step": 150670 + }, + { + "epoch": 0.9664147155689102, + "grad_norm": 0.13923440873622894, + "learning_rate": 6.155217366321705e-06, + "loss": 0.0015, + "step": 150680 + }, + { + "epoch": 0.9664788524626964, + "grad_norm": 0.16111516952514648, + "learning_rate": 6.15467279936521e-06, + "loss": 0.0015, + "step": 150690 + }, + { + "epoch": 0.9665429893564825, + "grad_norm": 0.10677210241556168, + "learning_rate": 6.15412821793984e-06, + "loss": 0.0014, + "step": 150700 + }, + { + "epoch": 0.9666071262502686, + "grad_norm": 0.10801635682582855, + "learning_rate": 6.153583622052417e-06, + "loss": 0.0018, + "step": 150710 + }, + { + "epoch": 0.9666712631440547, + "grad_norm": 0.19023743271827698, + "learning_rate": 6.153039011709767e-06, + "loss": 0.0026, + "step": 150720 + }, + { + "epoch": 0.9667354000378408, + "grad_norm": 0.07062771171331406, + "learning_rate": 6.1524943869187145e-06, + "loss": 0.0021, + "step": 150730 + }, + { + "epoch": 0.9667995369316269, + "grad_norm": 0.051830243319272995, + "learning_rate": 6.151949747686085e-06, + "loss": 0.0016, + "step": 150740 + }, + { + "epoch": 0.9668636738254129, + "grad_norm": 0.0025231295730918646, + "learning_rate": 6.151405094018701e-06, + "loss": 0.001, + "step": 150750 + }, + { + "epoch": 0.9669278107191991, + "grad_norm": 0.2973938584327698, + "learning_rate": 6.1508604259233885e-06, + "loss": 0.0022, + "step": 150760 + }, + { + "epoch": 0.9669919476129851, + "grad_norm": 0.091313935816288, + "learning_rate": 6.150315743406972e-06, + "loss": 0.0012, + "step": 150770 + }, + { + "epoch": 0.9670560845067713, + "grad_norm": 0.0033550853841006756, + "learning_rate": 6.149771046476278e-06, + "loss": 0.0019, + "step": 150780 + }, + { + "epoch": 0.9671202214005573, + "grad_norm": 0.006705216597765684, + "learning_rate": 6.14922633513813e-06, + "loss": 0.0026, + "step": 150790 + }, + { + "epoch": 0.9671843582943435, + "grad_norm": 0.10822317749261856, + "learning_rate": 6.1486816093993555e-06, + "loss": 0.0021, + "step": 150800 + }, + { + "epoch": 0.9672484951881295, + "grad_norm": 0.07211865484714508, + "learning_rate": 6.148136869266778e-06, + "loss": 0.0018, + "step": 150810 + }, + { + "epoch": 0.9673126320819156, + "grad_norm": 0.10985633730888367, + "learning_rate": 6.147592114747225e-06, + "loss": 0.0021, + "step": 150820 + }, + { + "epoch": 0.9673767689757017, + "grad_norm": 0.29983705282211304, + "learning_rate": 6.147047345847524e-06, + "loss": 0.0029, + "step": 150830 + }, + { + "epoch": 0.9674409058694878, + "grad_norm": 0.06410083919763565, + "learning_rate": 6.1465025625745e-06, + "loss": 0.0012, + "step": 150840 + }, + { + "epoch": 0.967505042763274, + "grad_norm": 0.3119731545448303, + "learning_rate": 6.14595776493498e-06, + "loss": 0.0022, + "step": 150850 + }, + { + "epoch": 0.96756917965706, + "grad_norm": 0.17304588854312897, + "learning_rate": 6.1454129529357885e-06, + "loss": 0.003, + "step": 150860 + }, + { + "epoch": 0.9676333165508462, + "grad_norm": 0.11323154717683792, + "learning_rate": 6.144868126583755e-06, + "loss": 0.0034, + "step": 150870 + }, + { + "epoch": 0.9676974534446322, + "grad_norm": 0.11185193061828613, + "learning_rate": 6.1443232858857045e-06, + "loss": 0.002, + "step": 150880 + }, + { + "epoch": 0.9677615903384184, + "grad_norm": 0.09502777457237244, + "learning_rate": 6.143778430848467e-06, + "loss": 0.0027, + "step": 150890 + }, + { + "epoch": 0.9678257272322044, + "grad_norm": 0.11737873405218124, + "learning_rate": 6.1432335614788675e-06, + "loss": 0.0017, + "step": 150900 + }, + { + "epoch": 0.9678898641259905, + "grad_norm": 0.14510773122310638, + "learning_rate": 6.142688677783736e-06, + "loss": 0.0019, + "step": 150910 + }, + { + "epoch": 0.9679540010197766, + "grad_norm": 0.16018038988113403, + "learning_rate": 6.142143779769896e-06, + "loss": 0.0022, + "step": 150920 + }, + { + "epoch": 0.9680181379135627, + "grad_norm": 0.028643252328038216, + "learning_rate": 6.141598867444181e-06, + "loss": 0.0032, + "step": 150930 + }, + { + "epoch": 0.9680822748073488, + "grad_norm": 0.10917980223894119, + "learning_rate": 6.141053940813414e-06, + "loss": 0.0019, + "step": 150940 + }, + { + "epoch": 0.9681464117011349, + "grad_norm": 0.12204719334840775, + "learning_rate": 6.140508999884427e-06, + "loss": 0.0017, + "step": 150950 + }, + { + "epoch": 0.968210548594921, + "grad_norm": 0.7267501354217529, + "learning_rate": 6.139964044664046e-06, + "loss": 0.0021, + "step": 150960 + }, + { + "epoch": 0.9682746854887071, + "grad_norm": 0.2687130272388458, + "learning_rate": 6.139419075159101e-06, + "loss": 0.0014, + "step": 150970 + }, + { + "epoch": 0.9683388223824932, + "grad_norm": 0.22710467875003815, + "learning_rate": 6.138874091376421e-06, + "loss": 0.0025, + "step": 150980 + }, + { + "epoch": 0.9684029592762793, + "grad_norm": 0.05579902231693268, + "learning_rate": 6.1383290933228345e-06, + "loss": 0.0024, + "step": 150990 + }, + { + "epoch": 0.9684670961700654, + "grad_norm": 0.10017244517803192, + "learning_rate": 6.137784081005171e-06, + "loss": 0.0017, + "step": 151000 + }, + { + "epoch": 0.9685312330638515, + "grad_norm": 0.05790210887789726, + "learning_rate": 6.13723905443026e-06, + "loss": 0.0018, + "step": 151010 + }, + { + "epoch": 0.9685953699576376, + "grad_norm": 0.14402702450752258, + "learning_rate": 6.136694013604932e-06, + "loss": 0.0037, + "step": 151020 + }, + { + "epoch": 0.9686595068514237, + "grad_norm": 0.2564027011394501, + "learning_rate": 6.136148958536014e-06, + "loss": 0.0024, + "step": 151030 + }, + { + "epoch": 0.9687236437452098, + "grad_norm": 0.3121611475944519, + "learning_rate": 6.135603889230337e-06, + "loss": 0.0032, + "step": 151040 + }, + { + "epoch": 0.9687877806389958, + "grad_norm": 0.01328402291983366, + "learning_rate": 6.1350588056947325e-06, + "loss": 0.0018, + "step": 151050 + }, + { + "epoch": 0.968851917532782, + "grad_norm": 0.034130360931158066, + "learning_rate": 6.134513707936031e-06, + "loss": 0.0015, + "step": 151060 + }, + { + "epoch": 0.968916054426568, + "grad_norm": 0.05559052526950836, + "learning_rate": 6.13396859596106e-06, + "loss": 0.002, + "step": 151070 + }, + { + "epoch": 0.9689801913203542, + "grad_norm": 0.09205931425094604, + "learning_rate": 6.133423469776654e-06, + "loss": 0.0018, + "step": 151080 + }, + { + "epoch": 0.9690443282141402, + "grad_norm": 0.09927406907081604, + "learning_rate": 6.13287832938964e-06, + "loss": 0.002, + "step": 151090 + }, + { + "epoch": 0.9691084651079264, + "grad_norm": 0.08456621319055557, + "learning_rate": 6.132333174806851e-06, + "loss": 0.0021, + "step": 151100 + }, + { + "epoch": 0.9691726020017124, + "grad_norm": 0.0676923468708992, + "learning_rate": 6.131788006035119e-06, + "loss": 0.002, + "step": 151110 + }, + { + "epoch": 0.9692367388954986, + "grad_norm": 0.01282727625221014, + "learning_rate": 6.131242823081275e-06, + "loss": 0.0019, + "step": 151120 + }, + { + "epoch": 0.9693008757892847, + "grad_norm": 0.10641659796237946, + "learning_rate": 6.130697625952149e-06, + "loss": 0.0022, + "step": 151130 + }, + { + "epoch": 0.9693650126830707, + "grad_norm": 0.10187271982431412, + "learning_rate": 6.130152414654574e-06, + "loss": 0.0039, + "step": 151140 + }, + { + "epoch": 0.9694291495768569, + "grad_norm": 0.05137203633785248, + "learning_rate": 6.129607189195381e-06, + "loss": 0.0015, + "step": 151150 + }, + { + "epoch": 0.9694932864706429, + "grad_norm": 0.05988229066133499, + "learning_rate": 6.129061949581403e-06, + "loss": 0.0014, + "step": 151160 + }, + { + "epoch": 0.9695574233644291, + "grad_norm": 0.015055349096655846, + "learning_rate": 6.128516695819472e-06, + "loss": 0.0032, + "step": 151170 + }, + { + "epoch": 0.9696215602582151, + "grad_norm": 0.1137191578745842, + "learning_rate": 6.12797142791642e-06, + "loss": 0.002, + "step": 151180 + }, + { + "epoch": 0.9696856971520013, + "grad_norm": 0.06393451988697052, + "learning_rate": 6.12742614587908e-06, + "loss": 0.0022, + "step": 151190 + }, + { + "epoch": 0.9697498340457873, + "grad_norm": 0.03888490051031113, + "learning_rate": 6.126880849714284e-06, + "loss": 0.0014, + "step": 151200 + }, + { + "epoch": 0.9698139709395734, + "grad_norm": 0.03833886235952377, + "learning_rate": 6.126335539428867e-06, + "loss": 0.0027, + "step": 151210 + }, + { + "epoch": 0.9698781078333595, + "grad_norm": 0.06993034482002258, + "learning_rate": 6.1257902150296585e-06, + "loss": 0.0039, + "step": 151220 + }, + { + "epoch": 0.9699422447271456, + "grad_norm": 0.04533940553665161, + "learning_rate": 6.125244876523496e-06, + "loss": 0.0015, + "step": 151230 + }, + { + "epoch": 0.9700063816209317, + "grad_norm": 0.05946129187941551, + "learning_rate": 6.1246995239172105e-06, + "loss": 0.0013, + "step": 151240 + }, + { + "epoch": 0.9700705185147178, + "grad_norm": 0.0785088837146759, + "learning_rate": 6.124154157217637e-06, + "loss": 0.001, + "step": 151250 + }, + { + "epoch": 0.970134655408504, + "grad_norm": 0.09543221443891525, + "learning_rate": 6.123608776431606e-06, + "loss": 0.0032, + "step": 151260 + }, + { + "epoch": 0.97019879230229, + "grad_norm": 0.03559909388422966, + "learning_rate": 6.123063381565957e-06, + "loss": 0.0023, + "step": 151270 + }, + { + "epoch": 0.9702629291960762, + "grad_norm": 0.10257601737976074, + "learning_rate": 6.122517972627518e-06, + "loss": 0.002, + "step": 151280 + }, + { + "epoch": 0.9703270660898622, + "grad_norm": 0.2212112545967102, + "learning_rate": 6.121972549623129e-06, + "loss": 0.0034, + "step": 151290 + }, + { + "epoch": 0.9703912029836483, + "grad_norm": 0.04329894483089447, + "learning_rate": 6.121427112559622e-06, + "loss": 0.0024, + "step": 151300 + }, + { + "epoch": 0.9704553398774344, + "grad_norm": 0.03108963370323181, + "learning_rate": 6.120881661443831e-06, + "loss": 0.0026, + "step": 151310 + }, + { + "epoch": 0.9705194767712205, + "grad_norm": 0.006353151053190231, + "learning_rate": 6.1203361962825915e-06, + "loss": 0.0014, + "step": 151320 + }, + { + "epoch": 0.9705836136650066, + "grad_norm": 0.1244301050901413, + "learning_rate": 6.1197907170827385e-06, + "loss": 0.002, + "step": 151330 + }, + { + "epoch": 0.9706477505587927, + "grad_norm": 0.14038756489753723, + "learning_rate": 6.119245223851109e-06, + "loss": 0.0008, + "step": 151340 + }, + { + "epoch": 0.9707118874525787, + "grad_norm": 0.1935957670211792, + "learning_rate": 6.1186997165945364e-06, + "loss": 0.0014, + "step": 151350 + }, + { + "epoch": 0.9707760243463649, + "grad_norm": 0.03064032457768917, + "learning_rate": 6.118154195319857e-06, + "loss": 0.0023, + "step": 151360 + }, + { + "epoch": 0.9708401612401509, + "grad_norm": 0.20530641078948975, + "learning_rate": 6.117608660033904e-06, + "loss": 0.0022, + "step": 151370 + }, + { + "epoch": 0.9709042981339371, + "grad_norm": 0.009169038385152817, + "learning_rate": 6.117063110743518e-06, + "loss": 0.0023, + "step": 151380 + }, + { + "epoch": 0.9709684350277231, + "grad_norm": 0.02863924391567707, + "learning_rate": 6.116517547455533e-06, + "loss": 0.0047, + "step": 151390 + }, + { + "epoch": 0.9710325719215093, + "grad_norm": 0.0908161923289299, + "learning_rate": 6.115971970176785e-06, + "loss": 0.002, + "step": 151400 + }, + { + "epoch": 0.9710967088152954, + "grad_norm": 0.39482712745666504, + "learning_rate": 6.11542637891411e-06, + "loss": 0.0033, + "step": 151410 + }, + { + "epoch": 0.9711608457090815, + "grad_norm": 0.0724111795425415, + "learning_rate": 6.114880773674347e-06, + "loss": 0.0013, + "step": 151420 + }, + { + "epoch": 0.9712249826028676, + "grad_norm": 0.014322632923722267, + "learning_rate": 6.1143351544643285e-06, + "loss": 0.0028, + "step": 151430 + }, + { + "epoch": 0.9712891194966536, + "grad_norm": 0.050286244601011276, + "learning_rate": 6.113789521290896e-06, + "loss": 0.004, + "step": 151440 + }, + { + "epoch": 0.9713532563904398, + "grad_norm": 0.08359697461128235, + "learning_rate": 6.113243874160884e-06, + "loss": 0.0031, + "step": 151450 + }, + { + "epoch": 0.9714173932842258, + "grad_norm": 0.19185268878936768, + "learning_rate": 6.112698213081131e-06, + "loss": 0.0023, + "step": 151460 + }, + { + "epoch": 0.971481530178012, + "grad_norm": 0.10571939498186111, + "learning_rate": 6.112152538058473e-06, + "loss": 0.001, + "step": 151470 + }, + { + "epoch": 0.971545667071798, + "grad_norm": 0.09915226697921753, + "learning_rate": 6.111606849099749e-06, + "loss": 0.0028, + "step": 151480 + }, + { + "epoch": 0.9716098039655842, + "grad_norm": 0.0881165862083435, + "learning_rate": 6.111061146211797e-06, + "loss": 0.002, + "step": 151490 + }, + { + "epoch": 0.9716739408593702, + "grad_norm": 0.21176064014434814, + "learning_rate": 6.1105154294014565e-06, + "loss": 0.0029, + "step": 151500 + }, + { + "epoch": 0.9717380777531563, + "grad_norm": 0.09044768661260605, + "learning_rate": 6.109969698675562e-06, + "loss": 0.0034, + "step": 151510 + }, + { + "epoch": 0.9718022146469424, + "grad_norm": 0.13650836050510406, + "learning_rate": 6.109423954040956e-06, + "loss": 0.0032, + "step": 151520 + }, + { + "epoch": 0.9718663515407285, + "grad_norm": 0.08207231014966965, + "learning_rate": 6.108878195504473e-06, + "loss": 0.0029, + "step": 151530 + }, + { + "epoch": 0.9719304884345147, + "grad_norm": 0.12176788598299026, + "learning_rate": 6.108332423072953e-06, + "loss": 0.0029, + "step": 151540 + }, + { + "epoch": 0.9719946253283007, + "grad_norm": 0.09918273985385895, + "learning_rate": 6.107786636753238e-06, + "loss": 0.0026, + "step": 151550 + }, + { + "epoch": 0.9720587622220869, + "grad_norm": 0.01861521415412426, + "learning_rate": 6.107240836552163e-06, + "loss": 0.0018, + "step": 151560 + }, + { + "epoch": 0.9721228991158729, + "grad_norm": 0.08306858688592911, + "learning_rate": 6.106695022476571e-06, + "loss": 0.0017, + "step": 151570 + }, + { + "epoch": 0.9721870360096591, + "grad_norm": 0.16298122704029083, + "learning_rate": 6.106149194533297e-06, + "loss": 0.0023, + "step": 151580 + }, + { + "epoch": 0.9722511729034451, + "grad_norm": 0.08419349044561386, + "learning_rate": 6.105603352729184e-06, + "loss": 0.0022, + "step": 151590 + }, + { + "epoch": 0.9723153097972312, + "grad_norm": 0.0572366900742054, + "learning_rate": 6.105057497071071e-06, + "loss": 0.0024, + "step": 151600 + }, + { + "epoch": 0.9723794466910173, + "grad_norm": 0.049051739275455475, + "learning_rate": 6.104511627565799e-06, + "loss": 0.0021, + "step": 151610 + }, + { + "epoch": 0.9724435835848034, + "grad_norm": 0.15118838846683502, + "learning_rate": 6.103965744220205e-06, + "loss": 0.002, + "step": 151620 + }, + { + "epoch": 0.9725077204785895, + "grad_norm": 0.04342101886868477, + "learning_rate": 6.103419847041132e-06, + "loss": 0.0017, + "step": 151630 + }, + { + "epoch": 0.9725718573723756, + "grad_norm": 0.04763609543442726, + "learning_rate": 6.1028739360354195e-06, + "loss": 0.0019, + "step": 151640 + }, + { + "epoch": 0.9726359942661617, + "grad_norm": 0.026795970275998116, + "learning_rate": 6.102328011209909e-06, + "loss": 0.0013, + "step": 151650 + }, + { + "epoch": 0.9727001311599478, + "grad_norm": 0.04624636471271515, + "learning_rate": 6.10178207257144e-06, + "loss": 0.0029, + "step": 151660 + }, + { + "epoch": 0.9727642680537338, + "grad_norm": 0.08322446048259735, + "learning_rate": 6.101236120126853e-06, + "loss": 0.003, + "step": 151670 + }, + { + "epoch": 0.97282840494752, + "grad_norm": 0.07855825871229172, + "learning_rate": 6.100690153882993e-06, + "loss": 0.0017, + "step": 151680 + }, + { + "epoch": 0.9728925418413061, + "grad_norm": 0.09713820368051529, + "learning_rate": 6.100144173846697e-06, + "loss": 0.0015, + "step": 151690 + }, + { + "epoch": 0.9729566787350922, + "grad_norm": 0.14672186970710754, + "learning_rate": 6.099598180024809e-06, + "loss": 0.002, + "step": 151700 + }, + { + "epoch": 0.9730208156288783, + "grad_norm": 0.016113052144646645, + "learning_rate": 6.099052172424169e-06, + "loss": 0.0012, + "step": 151710 + }, + { + "epoch": 0.9730849525226644, + "grad_norm": 0.19978825747966766, + "learning_rate": 6.098506151051621e-06, + "loss": 0.0045, + "step": 151720 + }, + { + "epoch": 0.9731490894164505, + "grad_norm": 0.12873505055904388, + "learning_rate": 6.0979601159140055e-06, + "loss": 0.0013, + "step": 151730 + }, + { + "epoch": 0.9732132263102365, + "grad_norm": 0.30069053173065186, + "learning_rate": 6.097414067018164e-06, + "loss": 0.0034, + "step": 151740 + }, + { + "epoch": 0.9732773632040227, + "grad_norm": 0.05176600441336632, + "learning_rate": 6.0968680043709416e-06, + "loss": 0.0036, + "step": 151750 + }, + { + "epoch": 0.9733415000978087, + "grad_norm": 0.06669841706752777, + "learning_rate": 6.0963219279791786e-06, + "loss": 0.0028, + "step": 151760 + }, + { + "epoch": 0.9734056369915949, + "grad_norm": 0.21331311762332916, + "learning_rate": 6.095775837849718e-06, + "loss": 0.0019, + "step": 151770 + }, + { + "epoch": 0.9734697738853809, + "grad_norm": 0.07572337985038757, + "learning_rate": 6.095229733989403e-06, + "loss": 0.0024, + "step": 151780 + }, + { + "epoch": 0.9735339107791671, + "grad_norm": 0.13380320370197296, + "learning_rate": 6.094683616405076e-06, + "loss": 0.0019, + "step": 151790 + }, + { + "epoch": 0.9735980476729531, + "grad_norm": 0.018392594531178474, + "learning_rate": 6.094137485103583e-06, + "loss": 0.0013, + "step": 151800 + }, + { + "epoch": 0.9736621845667393, + "grad_norm": 0.23304355144500732, + "learning_rate": 6.093591340091763e-06, + "loss": 0.0016, + "step": 151810 + }, + { + "epoch": 0.9737263214605253, + "grad_norm": 0.09823311865329742, + "learning_rate": 6.093045181376462e-06, + "loss": 0.0022, + "step": 151820 + }, + { + "epoch": 0.9737904583543114, + "grad_norm": 0.025532811880111694, + "learning_rate": 6.092499008964526e-06, + "loss": 0.001, + "step": 151830 + }, + { + "epoch": 0.9738545952480976, + "grad_norm": 0.0746760368347168, + "learning_rate": 6.091952822862794e-06, + "loss": 0.0023, + "step": 151840 + }, + { + "epoch": 0.9739187321418836, + "grad_norm": 0.020240366458892822, + "learning_rate": 6.091406623078116e-06, + "loss": 0.0073, + "step": 151850 + }, + { + "epoch": 0.9739828690356698, + "grad_norm": 0.040793247520923615, + "learning_rate": 6.09086040961733e-06, + "loss": 0.0018, + "step": 151860 + }, + { + "epoch": 0.9740470059294558, + "grad_norm": 0.13199807703495026, + "learning_rate": 6.090314182487284e-06, + "loss": 0.0052, + "step": 151870 + }, + { + "epoch": 0.974111142823242, + "grad_norm": 0.16162413358688354, + "learning_rate": 6.089767941694822e-06, + "loss": 0.002, + "step": 151880 + }, + { + "epoch": 0.974175279717028, + "grad_norm": 0.06183703988790512, + "learning_rate": 6.0892216872467895e-06, + "loss": 0.0016, + "step": 151890 + }, + { + "epoch": 0.9742394166108141, + "grad_norm": 0.07440250366926193, + "learning_rate": 6.0886754191500296e-06, + "loss": 0.0018, + "step": 151900 + }, + { + "epoch": 0.9743035535046002, + "grad_norm": 0.041807644069194794, + "learning_rate": 6.088129137411391e-06, + "loss": 0.0038, + "step": 151910 + }, + { + "epoch": 0.9743676903983863, + "grad_norm": 0.07359262555837631, + "learning_rate": 6.087582842037715e-06, + "loss": 0.0033, + "step": 151920 + }, + { + "epoch": 0.9744318272921724, + "grad_norm": 0.09813549369573593, + "learning_rate": 6.08703653303585e-06, + "loss": 0.0017, + "step": 151930 + }, + { + "epoch": 0.9744959641859585, + "grad_norm": 0.169961079955101, + "learning_rate": 6.086490210412638e-06, + "loss": 0.0019, + "step": 151940 + }, + { + "epoch": 0.9745601010797446, + "grad_norm": 0.24065662920475006, + "learning_rate": 6.0859438741749286e-06, + "loss": 0.0052, + "step": 151950 + }, + { + "epoch": 0.9746242379735307, + "grad_norm": 0.08111688494682312, + "learning_rate": 6.085397524329566e-06, + "loss": 0.001, + "step": 151960 + }, + { + "epoch": 0.9746883748673169, + "grad_norm": 0.08499859273433685, + "learning_rate": 6.0848511608833984e-06, + "loss": 0.0035, + "step": 151970 + }, + { + "epoch": 0.9747525117611029, + "grad_norm": 0.07363910973072052, + "learning_rate": 6.0843047838432675e-06, + "loss": 0.0029, + "step": 151980 + }, + { + "epoch": 0.974816648654889, + "grad_norm": 0.05126811936497688, + "learning_rate": 6.083758393216025e-06, + "loss": 0.0018, + "step": 151990 + }, + { + "epoch": 0.9748807855486751, + "grad_norm": 0.11984333395957947, + "learning_rate": 6.083211989008514e-06, + "loss": 0.0017, + "step": 152000 + }, + { + "epoch": 0.9749449224424612, + "grad_norm": 0.1173730194568634, + "learning_rate": 6.082665571227584e-06, + "loss": 0.0025, + "step": 152010 + }, + { + "epoch": 0.9750090593362473, + "grad_norm": 0.1031876653432846, + "learning_rate": 6.08211913988008e-06, + "loss": 0.003, + "step": 152020 + }, + { + "epoch": 0.9750731962300334, + "grad_norm": 0.11623068153858185, + "learning_rate": 6.0815726949728484e-06, + "loss": 0.0016, + "step": 152030 + }, + { + "epoch": 0.9751373331238194, + "grad_norm": 0.07475735247135162, + "learning_rate": 6.081026236512739e-06, + "loss": 0.0026, + "step": 152040 + }, + { + "epoch": 0.9752014700176056, + "grad_norm": 0.022157786414027214, + "learning_rate": 6.080479764506598e-06, + "loss": 0.0022, + "step": 152050 + }, + { + "epoch": 0.9752656069113916, + "grad_norm": 0.09656548500061035, + "learning_rate": 6.079933278961274e-06, + "loss": 0.0038, + "step": 152060 + }, + { + "epoch": 0.9753297438051778, + "grad_norm": 0.12655095756053925, + "learning_rate": 6.079386779883614e-06, + "loss": 0.0019, + "step": 152070 + }, + { + "epoch": 0.9753938806989638, + "grad_norm": 0.019430868327617645, + "learning_rate": 6.078840267280467e-06, + "loss": 0.0012, + "step": 152080 + }, + { + "epoch": 0.97545801759275, + "grad_norm": 0.058887068182229996, + "learning_rate": 6.078293741158678e-06, + "loss": 0.0017, + "step": 152090 + }, + { + "epoch": 0.975522154486536, + "grad_norm": 0.07945910841226578, + "learning_rate": 6.0777472015251e-06, + "loss": 0.002, + "step": 152100 + }, + { + "epoch": 0.9755862913803222, + "grad_norm": 0.06385931372642517, + "learning_rate": 6.077200648386579e-06, + "loss": 0.0034, + "step": 152110 + }, + { + "epoch": 0.9756504282741083, + "grad_norm": 0.09562142193317413, + "learning_rate": 6.076654081749964e-06, + "loss": 0.0032, + "step": 152120 + }, + { + "epoch": 0.9757145651678943, + "grad_norm": 0.08291531354188919, + "learning_rate": 6.076107501622103e-06, + "loss": 0.0029, + "step": 152130 + }, + { + "epoch": 0.9757787020616805, + "grad_norm": 0.02452937886118889, + "learning_rate": 6.075560908009847e-06, + "loss": 0.0014, + "step": 152140 + }, + { + "epoch": 0.9758428389554665, + "grad_norm": 0.17652659118175507, + "learning_rate": 6.075014300920042e-06, + "loss": 0.0071, + "step": 152150 + }, + { + "epoch": 0.9759069758492527, + "grad_norm": 0.07808490842580795, + "learning_rate": 6.074467680359542e-06, + "loss": 0.0016, + "step": 152160 + }, + { + "epoch": 0.9759711127430387, + "grad_norm": 0.07041225582361221, + "learning_rate": 6.073921046335193e-06, + "loss": 0.0016, + "step": 152170 + }, + { + "epoch": 0.9760352496368249, + "grad_norm": 0.15718582272529602, + "learning_rate": 6.0733743988538455e-06, + "loss": 0.0021, + "step": 152180 + }, + { + "epoch": 0.9760993865306109, + "grad_norm": 0.00888227578252554, + "learning_rate": 6.072827737922351e-06, + "loss": 0.0014, + "step": 152190 + }, + { + "epoch": 0.976163523424397, + "grad_norm": 0.23891635239124298, + "learning_rate": 6.072281063547556e-06, + "loss": 0.0017, + "step": 152200 + }, + { + "epoch": 0.9762276603181831, + "grad_norm": 0.10157041996717453, + "learning_rate": 6.071734375736314e-06, + "loss": 0.0033, + "step": 152210 + }, + { + "epoch": 0.9762917972119692, + "grad_norm": 0.16198943555355072, + "learning_rate": 6.071187674495475e-06, + "loss": 0.0033, + "step": 152220 + }, + { + "epoch": 0.9763559341057553, + "grad_norm": 0.14039942622184753, + "learning_rate": 6.070640959831888e-06, + "loss": 0.0018, + "step": 152230 + }, + { + "epoch": 0.9764200709995414, + "grad_norm": 0.0957934558391571, + "learning_rate": 6.0700942317524054e-06, + "loss": 0.0022, + "step": 152240 + }, + { + "epoch": 0.9764842078933276, + "grad_norm": 0.1538853943347931, + "learning_rate": 6.069547490263876e-06, + "loss": 0.0024, + "step": 152250 + }, + { + "epoch": 0.9765483447871136, + "grad_norm": 0.4327002465724945, + "learning_rate": 6.069000735373153e-06, + "loss": 0.0025, + "step": 152260 + }, + { + "epoch": 0.9766124816808998, + "grad_norm": 0.07163172215223312, + "learning_rate": 6.068453967087087e-06, + "loss": 0.0017, + "step": 152270 + }, + { + "epoch": 0.9766766185746858, + "grad_norm": 0.15471971035003662, + "learning_rate": 6.067907185412528e-06, + "loss": 0.0022, + "step": 152280 + }, + { + "epoch": 0.9767407554684719, + "grad_norm": 0.04025840014219284, + "learning_rate": 6.067360390356331e-06, + "loss": 0.0018, + "step": 152290 + }, + { + "epoch": 0.976804892362258, + "grad_norm": 0.03471701219677925, + "learning_rate": 6.066813581925344e-06, + "loss": 0.0015, + "step": 152300 + }, + { + "epoch": 0.9768690292560441, + "grad_norm": 0.08985839784145355, + "learning_rate": 6.06626676012642e-06, + "loss": 0.0036, + "step": 152310 + }, + { + "epoch": 0.9769331661498302, + "grad_norm": 0.0038460553623735905, + "learning_rate": 6.065719924966412e-06, + "loss": 0.0013, + "step": 152320 + }, + { + "epoch": 0.9769973030436163, + "grad_norm": 0.14992263913154602, + "learning_rate": 6.065173076452172e-06, + "loss": 0.0017, + "step": 152330 + }, + { + "epoch": 0.9770614399374024, + "grad_norm": 0.016167109832167625, + "learning_rate": 6.064626214590552e-06, + "loss": 0.0014, + "step": 152340 + }, + { + "epoch": 0.9771255768311885, + "grad_norm": 0.14940617978572845, + "learning_rate": 6.064079339388404e-06, + "loss": 0.0033, + "step": 152350 + }, + { + "epoch": 0.9771897137249745, + "grad_norm": 0.07109025120735168, + "learning_rate": 6.063532450852582e-06, + "loss": 0.0049, + "step": 152360 + }, + { + "epoch": 0.9772538506187607, + "grad_norm": 0.11286960542201996, + "learning_rate": 6.062985548989939e-06, + "loss": 0.0061, + "step": 152370 + }, + { + "epoch": 0.9773179875125467, + "grad_norm": 0.20279167592525482, + "learning_rate": 6.062438633807326e-06, + "loss": 0.0017, + "step": 152380 + }, + { + "epoch": 0.9773821244063329, + "grad_norm": 0.019253971055150032, + "learning_rate": 6.061891705311597e-06, + "loss": 0.0019, + "step": 152390 + }, + { + "epoch": 0.977446261300119, + "grad_norm": 0.12412141263484955, + "learning_rate": 6.061344763509608e-06, + "loss": 0.0021, + "step": 152400 + }, + { + "epoch": 0.9775103981939051, + "grad_norm": 0.058596231043338776, + "learning_rate": 6.060797808408209e-06, + "loss": 0.0022, + "step": 152410 + }, + { + "epoch": 0.9775745350876912, + "grad_norm": 0.027793683111667633, + "learning_rate": 6.060250840014257e-06, + "loss": 0.0021, + "step": 152420 + }, + { + "epoch": 0.9776386719814772, + "grad_norm": 0.07490064948797226, + "learning_rate": 6.0597038583346026e-06, + "loss": 0.0022, + "step": 152430 + }, + { + "epoch": 0.9777028088752634, + "grad_norm": 0.029315203428268433, + "learning_rate": 6.059156863376102e-06, + "loss": 0.0023, + "step": 152440 + }, + { + "epoch": 0.9777669457690494, + "grad_norm": 0.14921054244041443, + "learning_rate": 6.058609855145608e-06, + "loss": 0.0027, + "step": 152450 + }, + { + "epoch": 0.9778310826628356, + "grad_norm": 0.06638119369745255, + "learning_rate": 6.058062833649978e-06, + "loss": 0.0035, + "step": 152460 + }, + { + "epoch": 0.9778952195566216, + "grad_norm": 0.09822449833154678, + "learning_rate": 6.057515798896063e-06, + "loss": 0.0024, + "step": 152470 + }, + { + "epoch": 0.9779593564504078, + "grad_norm": 0.11892236769199371, + "learning_rate": 6.0569687508907205e-06, + "loss": 0.0037, + "step": 152480 + }, + { + "epoch": 0.9780234933441938, + "grad_norm": 0.03293519467115402, + "learning_rate": 6.056421689640804e-06, + "loss": 0.0018, + "step": 152490 + }, + { + "epoch": 0.97808763023798, + "grad_norm": 0.33398517966270447, + "learning_rate": 6.055874615153168e-06, + "loss": 0.0045, + "step": 152500 + }, + { + "epoch": 0.978151767131766, + "grad_norm": 0.22353917360305786, + "learning_rate": 6.0553275274346675e-06, + "loss": 0.0017, + "step": 152510 + }, + { + "epoch": 0.9782159040255521, + "grad_norm": 0.0990443080663681, + "learning_rate": 6.054780426492161e-06, + "loss": 0.0057, + "step": 152520 + }, + { + "epoch": 0.9782800409193383, + "grad_norm": 0.10360170900821686, + "learning_rate": 6.0542333123325e-06, + "loss": 0.0021, + "step": 152530 + }, + { + "epoch": 0.9783441778131243, + "grad_norm": 0.089264877140522, + "learning_rate": 6.053686184962543e-06, + "loss": 0.0016, + "step": 152540 + }, + { + "epoch": 0.9784083147069105, + "grad_norm": 0.20201466977596283, + "learning_rate": 6.053139044389146e-06, + "loss": 0.0046, + "step": 152550 + }, + { + "epoch": 0.9784724516006965, + "grad_norm": 0.06233484297990799, + "learning_rate": 6.052591890619162e-06, + "loss": 0.0019, + "step": 152560 + }, + { + "epoch": 0.9785365884944827, + "grad_norm": 0.12971296906471252, + "learning_rate": 6.052044723659451e-06, + "loss": 0.002, + "step": 152570 + }, + { + "epoch": 0.9786007253882687, + "grad_norm": 0.033529192209243774, + "learning_rate": 6.051497543516867e-06, + "loss": 0.0022, + "step": 152580 + }, + { + "epoch": 0.9786648622820548, + "grad_norm": 0.34596654772758484, + "learning_rate": 6.050950350198268e-06, + "loss": 0.0042, + "step": 152590 + }, + { + "epoch": 0.9787289991758409, + "grad_norm": 0.16789695620536804, + "learning_rate": 6.050403143710508e-06, + "loss": 0.0017, + "step": 152600 + }, + { + "epoch": 0.978793136069627, + "grad_norm": 0.00438518263399601, + "learning_rate": 6.049855924060449e-06, + "loss": 0.0015, + "step": 152610 + }, + { + "epoch": 0.9788572729634131, + "grad_norm": 0.16074934601783752, + "learning_rate": 6.049308691254943e-06, + "loss": 0.0028, + "step": 152620 + }, + { + "epoch": 0.9789214098571992, + "grad_norm": 0.13383474946022034, + "learning_rate": 6.048761445300848e-06, + "loss": 0.0025, + "step": 152630 + }, + { + "epoch": 0.9789855467509853, + "grad_norm": 0.10629253834486008, + "learning_rate": 6.048214186205024e-06, + "loss": 0.0029, + "step": 152640 + }, + { + "epoch": 0.9790496836447714, + "grad_norm": 0.026463015004992485, + "learning_rate": 6.047666913974328e-06, + "loss": 0.0007, + "step": 152650 + }, + { + "epoch": 0.9791138205385574, + "grad_norm": 0.09996064752340317, + "learning_rate": 6.047119628615616e-06, + "loss": 0.0016, + "step": 152660 + }, + { + "epoch": 0.9791779574323436, + "grad_norm": 0.09978724271059036, + "learning_rate": 6.0465723301357456e-06, + "loss": 0.0022, + "step": 152670 + }, + { + "epoch": 0.9792420943261297, + "grad_norm": 0.08587059378623962, + "learning_rate": 6.046025018541577e-06, + "loss": 0.0017, + "step": 152680 + }, + { + "epoch": 0.9793062312199158, + "grad_norm": 0.10156667977571487, + "learning_rate": 6.045477693839968e-06, + "loss": 0.0016, + "step": 152690 + }, + { + "epoch": 0.9793703681137019, + "grad_norm": 0.05822337046265602, + "learning_rate": 6.044930356037775e-06, + "loss": 0.0061, + "step": 152700 + }, + { + "epoch": 0.979434505007488, + "grad_norm": 0.18206116557121277, + "learning_rate": 6.0443830051418596e-06, + "loss": 0.0012, + "step": 152710 + }, + { + "epoch": 0.9794986419012741, + "grad_norm": 0.11949578672647476, + "learning_rate": 6.043835641159077e-06, + "loss": 0.0024, + "step": 152720 + }, + { + "epoch": 0.9795627787950602, + "grad_norm": 0.0030041232239454985, + "learning_rate": 6.043288264096287e-06, + "loss": 0.0012, + "step": 152730 + }, + { + "epoch": 0.9796269156888463, + "grad_norm": 0.0638008862733841, + "learning_rate": 6.0427408739603525e-06, + "loss": 0.0023, + "step": 152740 + }, + { + "epoch": 0.9796910525826323, + "grad_norm": 0.49946504831314087, + "learning_rate": 6.0421934707581264e-06, + "loss": 0.0021, + "step": 152750 + }, + { + "epoch": 0.9797551894764185, + "grad_norm": 0.192970871925354, + "learning_rate": 6.041646054496474e-06, + "loss": 0.0018, + "step": 152760 + }, + { + "epoch": 0.9798193263702045, + "grad_norm": 0.10488071292638779, + "learning_rate": 6.04109862518225e-06, + "loss": 0.0012, + "step": 152770 + }, + { + "epoch": 0.9798834632639907, + "grad_norm": 0.14728890359401703, + "learning_rate": 6.040551182822317e-06, + "loss": 0.0015, + "step": 152780 + }, + { + "epoch": 0.9799476001577767, + "grad_norm": 0.09117814153432846, + "learning_rate": 6.0400037274235345e-06, + "loss": 0.0014, + "step": 152790 + }, + { + "epoch": 0.9800117370515629, + "grad_norm": 0.07769643515348434, + "learning_rate": 6.039456258992762e-06, + "loss": 0.0015, + "step": 152800 + }, + { + "epoch": 0.980075873945349, + "grad_norm": 0.14587846398353577, + "learning_rate": 6.038908777536858e-06, + "loss": 0.0017, + "step": 152810 + }, + { + "epoch": 0.980140010839135, + "grad_norm": 0.08112621307373047, + "learning_rate": 6.038361283062687e-06, + "loss": 0.0022, + "step": 152820 + }, + { + "epoch": 0.9802041477329212, + "grad_norm": 0.09943347424268723, + "learning_rate": 6.037813775577105e-06, + "loss": 0.0027, + "step": 152830 + }, + { + "epoch": 0.9802682846267072, + "grad_norm": 0.08695618063211441, + "learning_rate": 6.037266255086977e-06, + "loss": 0.0013, + "step": 152840 + }, + { + "epoch": 0.9803324215204934, + "grad_norm": 0.08281394094228745, + "learning_rate": 6.036718721599159e-06, + "loss": 0.0032, + "step": 152850 + }, + { + "epoch": 0.9803965584142794, + "grad_norm": 0.10669806599617004, + "learning_rate": 6.036171175120515e-06, + "loss": 0.0024, + "step": 152860 + }, + { + "epoch": 0.9804606953080656, + "grad_norm": 0.12385310232639313, + "learning_rate": 6.035623615657906e-06, + "loss": 0.0012, + "step": 152870 + }, + { + "epoch": 0.9805248322018516, + "grad_norm": 0.031515467911958694, + "learning_rate": 6.035076043218193e-06, + "loss": 0.0058, + "step": 152880 + }, + { + "epoch": 0.9805889690956378, + "grad_norm": 0.06842345744371414, + "learning_rate": 6.034528457808238e-06, + "loss": 0.0015, + "step": 152890 + }, + { + "epoch": 0.9806531059894238, + "grad_norm": 0.08759213984012604, + "learning_rate": 6.033980859434902e-06, + "loss": 0.0019, + "step": 152900 + }, + { + "epoch": 0.9807172428832099, + "grad_norm": 0.19722624123096466, + "learning_rate": 6.0334332481050465e-06, + "loss": 0.0035, + "step": 152910 + }, + { + "epoch": 0.980781379776996, + "grad_norm": 0.03208306431770325, + "learning_rate": 6.032885623825533e-06, + "loss": 0.0017, + "step": 152920 + }, + { + "epoch": 0.9808455166707821, + "grad_norm": 0.11563814431428909, + "learning_rate": 6.032337986603225e-06, + "loss": 0.0031, + "step": 152930 + }, + { + "epoch": 0.9809096535645682, + "grad_norm": 0.030628079548478127, + "learning_rate": 6.031790336444985e-06, + "loss": 0.0016, + "step": 152940 + }, + { + "epoch": 0.9809737904583543, + "grad_norm": 0.22559551894664764, + "learning_rate": 6.031242673357673e-06, + "loss": 0.0036, + "step": 152950 + }, + { + "epoch": 0.9810379273521405, + "grad_norm": 0.009765544906258583, + "learning_rate": 6.030694997348154e-06, + "loss": 0.0009, + "step": 152960 + }, + { + "epoch": 0.9811020642459265, + "grad_norm": 0.026936056092381477, + "learning_rate": 6.030147308423292e-06, + "loss": 0.0026, + "step": 152970 + }, + { + "epoch": 0.9811662011397126, + "grad_norm": 0.047007352113723755, + "learning_rate": 6.029599606589945e-06, + "loss": 0.0022, + "step": 152980 + }, + { + "epoch": 0.9812303380334987, + "grad_norm": 0.19669876992702484, + "learning_rate": 6.029051891854981e-06, + "loss": 0.0035, + "step": 152990 + }, + { + "epoch": 0.9812944749272848, + "grad_norm": 0.004361078608781099, + "learning_rate": 6.028504164225261e-06, + "loss": 0.0013, + "step": 153000 + }, + { + "epoch": 0.9813586118210709, + "grad_norm": 0.07227908819913864, + "learning_rate": 6.027956423707649e-06, + "loss": 0.0023, + "step": 153010 + }, + { + "epoch": 0.981422748714857, + "grad_norm": 0.30987516045570374, + "learning_rate": 6.0274086703090065e-06, + "loss": 0.0041, + "step": 153020 + }, + { + "epoch": 0.9814868856086431, + "grad_norm": 0.1345183104276657, + "learning_rate": 6.0268609040362e-06, + "loss": 0.0013, + "step": 153030 + }, + { + "epoch": 0.9815510225024292, + "grad_norm": 0.09673765301704407, + "learning_rate": 6.026313124896093e-06, + "loss": 0.0026, + "step": 153040 + }, + { + "epoch": 0.9816151593962152, + "grad_norm": 0.0684819445014, + "learning_rate": 6.02576533289555e-06, + "loss": 0.0011, + "step": 153050 + }, + { + "epoch": 0.9816792962900014, + "grad_norm": 0.03442485257983208, + "learning_rate": 6.025217528041433e-06, + "loss": 0.0021, + "step": 153060 + }, + { + "epoch": 0.9817434331837874, + "grad_norm": 0.14554892480373383, + "learning_rate": 6.024669710340609e-06, + "loss": 0.0021, + "step": 153070 + }, + { + "epoch": 0.9818075700775736, + "grad_norm": 0.04231959581375122, + "learning_rate": 6.024121879799939e-06, + "loss": 0.0021, + "step": 153080 + }, + { + "epoch": 0.9818717069713596, + "grad_norm": 0.21806782484054565, + "learning_rate": 6.023574036426291e-06, + "loss": 0.0014, + "step": 153090 + }, + { + "epoch": 0.9819358438651458, + "grad_norm": 0.1455833613872528, + "learning_rate": 6.023026180226529e-06, + "loss": 0.0024, + "step": 153100 + }, + { + "epoch": 0.9819999807589319, + "grad_norm": 0.07845161110162735, + "learning_rate": 6.022478311207517e-06, + "loss": 0.0029, + "step": 153110 + }, + { + "epoch": 0.982064117652718, + "grad_norm": 0.009655271656811237, + "learning_rate": 6.021930429376122e-06, + "loss": 0.0036, + "step": 153120 + }, + { + "epoch": 0.9821282545465041, + "grad_norm": 0.08508338779211044, + "learning_rate": 6.021382534739207e-06, + "loss": 0.0024, + "step": 153130 + }, + { + "epoch": 0.9821923914402901, + "grad_norm": 0.04608561098575592, + "learning_rate": 6.020834627303641e-06, + "loss": 0.0023, + "step": 153140 + }, + { + "epoch": 0.9822565283340763, + "grad_norm": 0.13315847516059875, + "learning_rate": 6.020286707076286e-06, + "loss": 0.0019, + "step": 153150 + }, + { + "epoch": 0.9823206652278623, + "grad_norm": 0.024399518966674805, + "learning_rate": 6.01973877406401e-06, + "loss": 0.002, + "step": 153160 + }, + { + "epoch": 0.9823848021216485, + "grad_norm": 0.06183422729372978, + "learning_rate": 6.019190828273678e-06, + "loss": 0.0022, + "step": 153170 + }, + { + "epoch": 0.9824489390154345, + "grad_norm": 0.18734464049339294, + "learning_rate": 6.018642869712158e-06, + "loss": 0.0024, + "step": 153180 + }, + { + "epoch": 0.9825130759092207, + "grad_norm": 0.10502655804157257, + "learning_rate": 6.018094898386313e-06, + "loss": 0.0016, + "step": 153190 + }, + { + "epoch": 0.9825772128030067, + "grad_norm": 0.09715384989976883, + "learning_rate": 6.017546914303012e-06, + "loss": 0.002, + "step": 153200 + }, + { + "epoch": 0.9826413496967928, + "grad_norm": 0.00909672025591135, + "learning_rate": 6.01699891746912e-06, + "loss": 0.0021, + "step": 153210 + }, + { + "epoch": 0.9827054865905789, + "grad_norm": 0.06395453959703445, + "learning_rate": 6.016450907891507e-06, + "loss": 0.0016, + "step": 153220 + }, + { + "epoch": 0.982769623484365, + "grad_norm": 0.0025687352754175663, + "learning_rate": 6.015902885577035e-06, + "loss": 0.0023, + "step": 153230 + }, + { + "epoch": 0.9828337603781512, + "grad_norm": 0.08915159851312637, + "learning_rate": 6.015354850532574e-06, + "loss": 0.0018, + "step": 153240 + }, + { + "epoch": 0.9828978972719372, + "grad_norm": 0.06700330972671509, + "learning_rate": 6.014806802764993e-06, + "loss": 0.0011, + "step": 153250 + }, + { + "epoch": 0.9829620341657234, + "grad_norm": 0.06353837996721268, + "learning_rate": 6.014258742281156e-06, + "loss": 0.0019, + "step": 153260 + }, + { + "epoch": 0.9830261710595094, + "grad_norm": 0.05698194354772568, + "learning_rate": 6.013710669087933e-06, + "loss": 0.0014, + "step": 153270 + }, + { + "epoch": 0.9830903079532956, + "grad_norm": 0.1636301726102829, + "learning_rate": 6.01316258319219e-06, + "loss": 0.0021, + "step": 153280 + }, + { + "epoch": 0.9831544448470816, + "grad_norm": 0.04641463980078697, + "learning_rate": 6.012614484600797e-06, + "loss": 0.003, + "step": 153290 + }, + { + "epoch": 0.9832185817408677, + "grad_norm": 0.1287623941898346, + "learning_rate": 6.01206637332062e-06, + "loss": 0.0021, + "step": 153300 + }, + { + "epoch": 0.9832827186346538, + "grad_norm": 0.1351720243692398, + "learning_rate": 6.0115182493585274e-06, + "loss": 0.0029, + "step": 153310 + }, + { + "epoch": 0.9833468555284399, + "grad_norm": 0.20169375836849213, + "learning_rate": 6.010970112721388e-06, + "loss": 0.0034, + "step": 153320 + }, + { + "epoch": 0.983410992422226, + "grad_norm": 0.11158497631549835, + "learning_rate": 6.010421963416073e-06, + "loss": 0.0022, + "step": 153330 + }, + { + "epoch": 0.9834751293160121, + "grad_norm": 0.05959600582718849, + "learning_rate": 6.009873801449446e-06, + "loss": 0.002, + "step": 153340 + }, + { + "epoch": 0.9835392662097981, + "grad_norm": 0.042437419295310974, + "learning_rate": 6.00932562682838e-06, + "loss": 0.0026, + "step": 153350 + }, + { + "epoch": 0.9836034031035843, + "grad_norm": 0.23058632016181946, + "learning_rate": 6.008777439559742e-06, + "loss": 0.0014, + "step": 153360 + }, + { + "epoch": 0.9836675399973703, + "grad_norm": 0.06583657115697861, + "learning_rate": 6.008229239650403e-06, + "loss": 0.0019, + "step": 153370 + }, + { + "epoch": 0.9837316768911565, + "grad_norm": 0.2148008495569229, + "learning_rate": 6.0076810271072284e-06, + "loss": 0.0021, + "step": 153380 + }, + { + "epoch": 0.9837958137849426, + "grad_norm": 0.08241555839776993, + "learning_rate": 6.0071328019370915e-06, + "loss": 0.003, + "step": 153390 + }, + { + "epoch": 0.9838599506787287, + "grad_norm": 0.1243964210152626, + "learning_rate": 6.00658456414686e-06, + "loss": 0.0024, + "step": 153400 + }, + { + "epoch": 0.9839240875725148, + "grad_norm": 0.12397447973489761, + "learning_rate": 6.006036313743407e-06, + "loss": 0.0032, + "step": 153410 + }, + { + "epoch": 0.9839882244663009, + "grad_norm": 0.1622055172920227, + "learning_rate": 6.0054880507335974e-06, + "loss": 0.0029, + "step": 153420 + }, + { + "epoch": 0.984052361360087, + "grad_norm": 0.10634105652570724, + "learning_rate": 6.004939775124306e-06, + "loss": 0.0023, + "step": 153430 + }, + { + "epoch": 0.984116498253873, + "grad_norm": 0.015624837949872017, + "learning_rate": 6.0043914869224e-06, + "loss": 0.0015, + "step": 153440 + }, + { + "epoch": 0.9841806351476592, + "grad_norm": 0.08204245567321777, + "learning_rate": 6.00384318613475e-06, + "loss": 0.0037, + "step": 153450 + }, + { + "epoch": 0.9842447720414452, + "grad_norm": 0.0344320572912693, + "learning_rate": 6.003294872768229e-06, + "loss": 0.0054, + "step": 153460 + }, + { + "epoch": 0.9843089089352314, + "grad_norm": 0.1673690527677536, + "learning_rate": 6.002746546829705e-06, + "loss": 0.0033, + "step": 153470 + }, + { + "epoch": 0.9843730458290174, + "grad_norm": 0.09421828389167786, + "learning_rate": 6.002198208326052e-06, + "loss": 0.0037, + "step": 153480 + }, + { + "epoch": 0.9844371827228036, + "grad_norm": 0.12709495425224304, + "learning_rate": 6.001649857264137e-06, + "loss": 0.002, + "step": 153490 + }, + { + "epoch": 0.9845013196165896, + "grad_norm": 0.22577492892742157, + "learning_rate": 6.001101493650835e-06, + "loss": 0.0021, + "step": 153500 + }, + { + "epoch": 0.9845654565103757, + "grad_norm": 0.10200013220310211, + "learning_rate": 6.0005531174930145e-06, + "loss": 0.0027, + "step": 153510 + }, + { + "epoch": 0.9846295934041619, + "grad_norm": 0.046134013682603836, + "learning_rate": 6.00000472879755e-06, + "loss": 0.0017, + "step": 153520 + }, + { + "epoch": 0.9846937302979479, + "grad_norm": 0.05572868511080742, + "learning_rate": 5.9994563275713106e-06, + "loss": 0.0018, + "step": 153530 + }, + { + "epoch": 0.9847578671917341, + "grad_norm": 0.06650326400995255, + "learning_rate": 5.998907913821169e-06, + "loss": 0.0018, + "step": 153540 + }, + { + "epoch": 0.9848220040855201, + "grad_norm": 0.0946025401353836, + "learning_rate": 5.998359487553996e-06, + "loss": 0.0022, + "step": 153550 + }, + { + "epoch": 0.9848861409793063, + "grad_norm": 0.06810727715492249, + "learning_rate": 5.997811048776668e-06, + "loss": 0.0026, + "step": 153560 + }, + { + "epoch": 0.9849502778730923, + "grad_norm": 0.06308284401893616, + "learning_rate": 5.997262597496052e-06, + "loss": 0.0014, + "step": 153570 + }, + { + "epoch": 0.9850144147668785, + "grad_norm": 0.13130496442317963, + "learning_rate": 5.9967141337190234e-06, + "loss": 0.0021, + "step": 153580 + }, + { + "epoch": 0.9850785516606645, + "grad_norm": 0.15117326378822327, + "learning_rate": 5.996165657452455e-06, + "loss": 0.0022, + "step": 153590 + }, + { + "epoch": 0.9851426885544506, + "grad_norm": 0.017763545736670494, + "learning_rate": 5.99561716870322e-06, + "loss": 0.0018, + "step": 153600 + }, + { + "epoch": 0.9852068254482367, + "grad_norm": 0.29442140460014343, + "learning_rate": 5.995068667478189e-06, + "loss": 0.0035, + "step": 153610 + }, + { + "epoch": 0.9852709623420228, + "grad_norm": 0.08706725388765335, + "learning_rate": 5.994520153784235e-06, + "loss": 0.002, + "step": 153620 + }, + { + "epoch": 0.9853350992358089, + "grad_norm": 0.1322675347328186, + "learning_rate": 5.993971627628235e-06, + "loss": 0.0022, + "step": 153630 + }, + { + "epoch": 0.985399236129595, + "grad_norm": 0.11531005799770355, + "learning_rate": 5.9934230890170595e-06, + "loss": 0.0012, + "step": 153640 + }, + { + "epoch": 0.985463373023381, + "grad_norm": 0.06310612708330154, + "learning_rate": 5.992874537957583e-06, + "loss": 0.0024, + "step": 153650 + }, + { + "epoch": 0.9855275099171672, + "grad_norm": 0.10038796067237854, + "learning_rate": 5.992325974456678e-06, + "loss": 0.0017, + "step": 153660 + }, + { + "epoch": 0.9855916468109533, + "grad_norm": 0.1436949521303177, + "learning_rate": 5.99177739852122e-06, + "loss": 0.0026, + "step": 153670 + }, + { + "epoch": 0.9856557837047394, + "grad_norm": 0.030475756153464317, + "learning_rate": 5.991228810158081e-06, + "loss": 0.0033, + "step": 153680 + }, + { + "epoch": 0.9857199205985255, + "grad_norm": 0.24867470562458038, + "learning_rate": 5.99068020937414e-06, + "loss": 0.0023, + "step": 153690 + }, + { + "epoch": 0.9857840574923116, + "grad_norm": 0.06362586468458176, + "learning_rate": 5.990131596176265e-06, + "loss": 0.002, + "step": 153700 + }, + { + "epoch": 0.9858481943860977, + "grad_norm": 0.2324649840593338, + "learning_rate": 5.989582970571335e-06, + "loss": 0.0026, + "step": 153710 + }, + { + "epoch": 0.9859123312798838, + "grad_norm": 0.1523626446723938, + "learning_rate": 5.989034332566221e-06, + "loss": 0.0026, + "step": 153720 + }, + { + "epoch": 0.9859764681736699, + "grad_norm": 0.09870940446853638, + "learning_rate": 5.988485682167801e-06, + "loss": 0.003, + "step": 153730 + }, + { + "epoch": 0.9860406050674559, + "grad_norm": 0.037186942994594574, + "learning_rate": 5.98793701938295e-06, + "loss": 0.001, + "step": 153740 + }, + { + "epoch": 0.9861047419612421, + "grad_norm": 0.06786558777093887, + "learning_rate": 5.987388344218541e-06, + "loss": 0.0032, + "step": 153750 + }, + { + "epoch": 0.9861688788550281, + "grad_norm": 0.2750147879123688, + "learning_rate": 5.986839656681451e-06, + "loss": 0.0027, + "step": 153760 + }, + { + "epoch": 0.9862330157488143, + "grad_norm": 0.010936897248029709, + "learning_rate": 5.986290956778554e-06, + "loss": 0.0016, + "step": 153770 + }, + { + "epoch": 0.9862971526426003, + "grad_norm": 0.07299142330884933, + "learning_rate": 5.985742244516727e-06, + "loss": 0.0028, + "step": 153780 + }, + { + "epoch": 0.9863612895363865, + "grad_norm": 0.17001250386238098, + "learning_rate": 5.9851935199028446e-06, + "loss": 0.0043, + "step": 153790 + }, + { + "epoch": 0.9864254264301726, + "grad_norm": 0.03887956961989403, + "learning_rate": 5.984644782943784e-06, + "loss": 0.0032, + "step": 153800 + }, + { + "epoch": 0.9864895633239587, + "grad_norm": 0.06760495901107788, + "learning_rate": 5.98409603364642e-06, + "loss": 0.0018, + "step": 153810 + }, + { + "epoch": 0.9865537002177448, + "grad_norm": 0.21361862123012543, + "learning_rate": 5.98354727201763e-06, + "loss": 0.0028, + "step": 153820 + }, + { + "epoch": 0.9866178371115308, + "grad_norm": 0.14941273629665375, + "learning_rate": 5.982998498064289e-06, + "loss": 0.005, + "step": 153830 + }, + { + "epoch": 0.986681974005317, + "grad_norm": 0.06045301631093025, + "learning_rate": 5.982449711793275e-06, + "loss": 0.0035, + "step": 153840 + }, + { + "epoch": 0.986746110899103, + "grad_norm": 0.15081606805324554, + "learning_rate": 5.981900913211464e-06, + "loss": 0.0024, + "step": 153850 + }, + { + "epoch": 0.9868102477928892, + "grad_norm": 0.08579900860786438, + "learning_rate": 5.981352102325733e-06, + "loss": 0.0041, + "step": 153860 + }, + { + "epoch": 0.9868743846866752, + "grad_norm": 0.2710689902305603, + "learning_rate": 5.980803279142958e-06, + "loss": 0.0016, + "step": 153870 + }, + { + "epoch": 0.9869385215804614, + "grad_norm": 0.04658711329102516, + "learning_rate": 5.9802544436700185e-06, + "loss": 0.0032, + "step": 153880 + }, + { + "epoch": 0.9870026584742474, + "grad_norm": 0.15454958379268646, + "learning_rate": 5.979705595913787e-06, + "loss": 0.004, + "step": 153890 + }, + { + "epoch": 0.9870667953680335, + "grad_norm": 0.06560371816158295, + "learning_rate": 5.979156735881148e-06, + "loss": 0.0009, + "step": 153900 + }, + { + "epoch": 0.9871309322618196, + "grad_norm": 0.0811891257762909, + "learning_rate": 5.9786078635789725e-06, + "loss": 0.003, + "step": 153910 + }, + { + "epoch": 0.9871950691556057, + "grad_norm": 0.06769920885562897, + "learning_rate": 5.978058979014143e-06, + "loss": 0.0018, + "step": 153920 + }, + { + "epoch": 0.9872592060493918, + "grad_norm": 0.14531618356704712, + "learning_rate": 5.977510082193534e-06, + "loss": 0.0022, + "step": 153930 + }, + { + "epoch": 0.9873233429431779, + "grad_norm": 0.12348922342061996, + "learning_rate": 5.976961173124025e-06, + "loss": 0.0019, + "step": 153940 + }, + { + "epoch": 0.9873874798369641, + "grad_norm": 0.14243733882904053, + "learning_rate": 5.976412251812496e-06, + "loss": 0.0024, + "step": 153950 + }, + { + "epoch": 0.9874516167307501, + "grad_norm": 0.39899900555610657, + "learning_rate": 5.975863318265822e-06, + "loss": 0.0028, + "step": 153960 + }, + { + "epoch": 0.9875157536245363, + "grad_norm": 0.06399998068809509, + "learning_rate": 5.975314372490884e-06, + "loss": 0.0018, + "step": 153970 + }, + { + "epoch": 0.9875798905183223, + "grad_norm": 0.2605004906654358, + "learning_rate": 5.974765414494559e-06, + "loss": 0.0023, + "step": 153980 + }, + { + "epoch": 0.9876440274121084, + "grad_norm": 0.14542421698570251, + "learning_rate": 5.974216444283728e-06, + "loss": 0.0023, + "step": 153990 + }, + { + "epoch": 0.9877081643058945, + "grad_norm": 0.04210500419139862, + "learning_rate": 5.9736674618652676e-06, + "loss": 0.002, + "step": 154000 + }, + { + "epoch": 0.9877723011996806, + "grad_norm": 0.05088496953248978, + "learning_rate": 5.9731184672460595e-06, + "loss": 0.0026, + "step": 154010 + }, + { + "epoch": 0.9878364380934667, + "grad_norm": 0.12533022463321686, + "learning_rate": 5.9725694604329795e-06, + "loss": 0.0022, + "step": 154020 + }, + { + "epoch": 0.9879005749872528, + "grad_norm": 0.22114339470863342, + "learning_rate": 5.972020441432911e-06, + "loss": 0.0019, + "step": 154030 + }, + { + "epoch": 0.9879647118810388, + "grad_norm": 0.0327499695122242, + "learning_rate": 5.971471410252729e-06, + "loss": 0.0023, + "step": 154040 + }, + { + "epoch": 0.988028848774825, + "grad_norm": 0.03905919939279556, + "learning_rate": 5.9709223668993185e-06, + "loss": 0.0016, + "step": 154050 + }, + { + "epoch": 0.988092985668611, + "grad_norm": 0.1768200546503067, + "learning_rate": 5.970373311379556e-06, + "loss": 0.0021, + "step": 154060 + }, + { + "epoch": 0.9881571225623972, + "grad_norm": 0.0032357678283005953, + "learning_rate": 5.9698242437003226e-06, + "loss": 0.0024, + "step": 154070 + }, + { + "epoch": 0.9882212594561833, + "grad_norm": 0.0414033979177475, + "learning_rate": 5.969275163868498e-06, + "loss": 0.0031, + "step": 154080 + }, + { + "epoch": 0.9882853963499694, + "grad_norm": 0.14489038288593292, + "learning_rate": 5.968726071890962e-06, + "loss": 0.0013, + "step": 154090 + }, + { + "epoch": 0.9883495332437555, + "grad_norm": 0.1014803797006607, + "learning_rate": 5.968176967774598e-06, + "loss": 0.0024, + "step": 154100 + }, + { + "epoch": 0.9884136701375416, + "grad_norm": 0.05862985551357269, + "learning_rate": 5.967627851526283e-06, + "loss": 0.0027, + "step": 154110 + }, + { + "epoch": 0.9884778070313277, + "grad_norm": 0.16196656227111816, + "learning_rate": 5.9670787231529005e-06, + "loss": 0.0022, + "step": 154120 + }, + { + "epoch": 0.9885419439251137, + "grad_norm": 0.043797098100185394, + "learning_rate": 5.96652958266133e-06, + "loss": 0.0031, + "step": 154130 + }, + { + "epoch": 0.9886060808188999, + "grad_norm": 0.07587334513664246, + "learning_rate": 5.965980430058453e-06, + "loss": 0.0022, + "step": 154140 + }, + { + "epoch": 0.9886702177126859, + "grad_norm": 0.11381826549768448, + "learning_rate": 5.96543126535115e-06, + "loss": 0.0009, + "step": 154150 + }, + { + "epoch": 0.9887343546064721, + "grad_norm": 0.040695976465940475, + "learning_rate": 5.964882088546305e-06, + "loss": 0.0029, + "step": 154160 + }, + { + "epoch": 0.9887984915002581, + "grad_norm": 0.10237430036067963, + "learning_rate": 5.9643328996507956e-06, + "loss": 0.0015, + "step": 154170 + }, + { + "epoch": 0.9888626283940443, + "grad_norm": 0.09190256893634796, + "learning_rate": 5.963783698671508e-06, + "loss": 0.0011, + "step": 154180 + }, + { + "epoch": 0.9889267652878303, + "grad_norm": 0.026083897799253464, + "learning_rate": 5.96323448561532e-06, + "loss": 0.0031, + "step": 154190 + }, + { + "epoch": 0.9889909021816164, + "grad_norm": 0.04641636833548546, + "learning_rate": 5.9626852604891175e-06, + "loss": 0.0015, + "step": 154200 + }, + { + "epoch": 0.9890550390754025, + "grad_norm": 0.040112677961587906, + "learning_rate": 5.962136023299779e-06, + "loss": 0.0017, + "step": 154210 + }, + { + "epoch": 0.9891191759691886, + "grad_norm": 0.05133315920829773, + "learning_rate": 5.96158677405419e-06, + "loss": 0.0025, + "step": 154220 + }, + { + "epoch": 0.9891833128629748, + "grad_norm": 0.08732984215021133, + "learning_rate": 5.96103751275923e-06, + "loss": 0.0018, + "step": 154230 + }, + { + "epoch": 0.9892474497567608, + "grad_norm": 0.041648976504802704, + "learning_rate": 5.9604882394217844e-06, + "loss": 0.0018, + "step": 154240 + }, + { + "epoch": 0.989311586650547, + "grad_norm": 0.11416812241077423, + "learning_rate": 5.959938954048734e-06, + "loss": 0.0021, + "step": 154250 + }, + { + "epoch": 0.989375723544333, + "grad_norm": 0.0667480081319809, + "learning_rate": 5.959389656646964e-06, + "loss": 0.0025, + "step": 154260 + }, + { + "epoch": 0.9894398604381192, + "grad_norm": 0.12027747929096222, + "learning_rate": 5.958840347223353e-06, + "loss": 0.001, + "step": 154270 + }, + { + "epoch": 0.9895039973319052, + "grad_norm": 0.13249050080776215, + "learning_rate": 5.958291025784789e-06, + "loss": 0.0037, + "step": 154280 + }, + { + "epoch": 0.9895681342256913, + "grad_norm": 0.1557137817144394, + "learning_rate": 5.957741692338152e-06, + "loss": 0.0026, + "step": 154290 + }, + { + "epoch": 0.9896322711194774, + "grad_norm": 0.01759687066078186, + "learning_rate": 5.9571923468903285e-06, + "loss": 0.002, + "step": 154300 + }, + { + "epoch": 0.9896964080132635, + "grad_norm": 0.04367992654442787, + "learning_rate": 5.956642989448201e-06, + "loss": 0.0019, + "step": 154310 + }, + { + "epoch": 0.9897605449070496, + "grad_norm": 0.18906162679195404, + "learning_rate": 5.956093620018652e-06, + "loss": 0.0038, + "step": 154320 + }, + { + "epoch": 0.9898246818008357, + "grad_norm": 0.07389261573553085, + "learning_rate": 5.955544238608568e-06, + "loss": 0.002, + "step": 154330 + }, + { + "epoch": 0.9898888186946218, + "grad_norm": 0.18635764718055725, + "learning_rate": 5.954994845224831e-06, + "loss": 0.0028, + "step": 154340 + }, + { + "epoch": 0.9899529555884079, + "grad_norm": 0.07302852720022202, + "learning_rate": 5.954445439874327e-06, + "loss": 0.0023, + "step": 154350 + }, + { + "epoch": 0.990017092482194, + "grad_norm": 0.048571355640888214, + "learning_rate": 5.953896022563938e-06, + "loss": 0.0014, + "step": 154360 + }, + { + "epoch": 0.9900812293759801, + "grad_norm": 0.3239956200122833, + "learning_rate": 5.953346593300552e-06, + "loss": 0.0027, + "step": 154370 + }, + { + "epoch": 0.9901453662697662, + "grad_norm": 0.05844731256365776, + "learning_rate": 5.95279715209105e-06, + "loss": 0.0018, + "step": 154380 + }, + { + "epoch": 0.9902095031635523, + "grad_norm": 0.0586627796292305, + "learning_rate": 5.9522476989423195e-06, + "loss": 0.0048, + "step": 154390 + }, + { + "epoch": 0.9902736400573384, + "grad_norm": 0.12048666924238205, + "learning_rate": 5.951698233861244e-06, + "loss": 0.0018, + "step": 154400 + }, + { + "epoch": 0.9903377769511245, + "grad_norm": 0.0442906953394413, + "learning_rate": 5.951148756854711e-06, + "loss": 0.0018, + "step": 154410 + }, + { + "epoch": 0.9904019138449106, + "grad_norm": 0.18814098834991455, + "learning_rate": 5.950599267929603e-06, + "loss": 0.0031, + "step": 154420 + }, + { + "epoch": 0.9904660507386966, + "grad_norm": 0.04006590694189072, + "learning_rate": 5.950049767092807e-06, + "loss": 0.0018, + "step": 154430 + }, + { + "epoch": 0.9905301876324828, + "grad_norm": 0.05012265965342522, + "learning_rate": 5.9495002543512085e-06, + "loss": 0.0023, + "step": 154440 + }, + { + "epoch": 0.9905943245262688, + "grad_norm": 0.11422944068908691, + "learning_rate": 5.948950729711694e-06, + "loss": 0.0015, + "step": 154450 + }, + { + "epoch": 0.990658461420055, + "grad_norm": 0.02385176159441471, + "learning_rate": 5.948401193181148e-06, + "loss": 0.0026, + "step": 154460 + }, + { + "epoch": 0.990722598313841, + "grad_norm": 0.3480783998966217, + "learning_rate": 5.947851644766456e-06, + "loss": 0.0035, + "step": 154470 + }, + { + "epoch": 0.9907867352076272, + "grad_norm": 0.08721592277288437, + "learning_rate": 5.947302084474508e-06, + "loss": 0.0025, + "step": 154480 + }, + { + "epoch": 0.9908508721014132, + "grad_norm": 0.06368129700422287, + "learning_rate": 5.946752512312186e-06, + "loss": 0.0029, + "step": 154490 + }, + { + "epoch": 0.9909150089951994, + "grad_norm": 0.14339937269687653, + "learning_rate": 5.94620292828638e-06, + "loss": 0.0041, + "step": 154500 + }, + { + "epoch": 0.9909791458889855, + "grad_norm": 0.11102969199419022, + "learning_rate": 5.945653332403973e-06, + "loss": 0.0048, + "step": 154510 + }, + { + "epoch": 0.9910432827827715, + "grad_norm": 0.06453274190425873, + "learning_rate": 5.945103724671856e-06, + "loss": 0.0018, + "step": 154520 + }, + { + "epoch": 0.9911074196765577, + "grad_norm": 0.16440774500370026, + "learning_rate": 5.944554105096911e-06, + "loss": 0.0029, + "step": 154530 + }, + { + "epoch": 0.9911715565703437, + "grad_norm": 0.08864869177341461, + "learning_rate": 5.944004473686031e-06, + "loss": 0.0034, + "step": 154540 + }, + { + "epoch": 0.9912356934641299, + "grad_norm": 0.10180410742759705, + "learning_rate": 5.943454830446097e-06, + "loss": 0.0025, + "step": 154550 + }, + { + "epoch": 0.9912998303579159, + "grad_norm": 0.2581990957260132, + "learning_rate": 5.9429051753840025e-06, + "loss": 0.0033, + "step": 154560 + }, + { + "epoch": 0.9913639672517021, + "grad_norm": 0.10305880755186081, + "learning_rate": 5.942355508506632e-06, + "loss": 0.0014, + "step": 154570 + }, + { + "epoch": 0.9914281041454881, + "grad_norm": 0.006125007756054401, + "learning_rate": 5.941805829820873e-06, + "loss": 0.0019, + "step": 154580 + }, + { + "epoch": 0.9914922410392742, + "grad_norm": 0.09211353957653046, + "learning_rate": 5.941256139333613e-06, + "loss": 0.0019, + "step": 154590 + }, + { + "epoch": 0.9915563779330603, + "grad_norm": 0.17578403651714325, + "learning_rate": 5.940706437051743e-06, + "loss": 0.0019, + "step": 154600 + }, + { + "epoch": 0.9916205148268464, + "grad_norm": 0.1794659048318863, + "learning_rate": 5.940156722982146e-06, + "loss": 0.0018, + "step": 154610 + }, + { + "epoch": 0.9916846517206325, + "grad_norm": 0.20038414001464844, + "learning_rate": 5.9396069971317176e-06, + "loss": 0.0026, + "step": 154620 + }, + { + "epoch": 0.9917487886144186, + "grad_norm": 0.09909538179636002, + "learning_rate": 5.9390572595073385e-06, + "loss": 0.0019, + "step": 154630 + }, + { + "epoch": 0.9918129255082047, + "grad_norm": 0.056216951459646225, + "learning_rate": 5.938507510115901e-06, + "loss": 0.0011, + "step": 154640 + }, + { + "epoch": 0.9918770624019908, + "grad_norm": 0.0871502012014389, + "learning_rate": 5.937957748964295e-06, + "loss": 0.0025, + "step": 154650 + }, + { + "epoch": 0.991941199295777, + "grad_norm": 0.35571610927581787, + "learning_rate": 5.937407976059407e-06, + "loss": 0.0025, + "step": 154660 + }, + { + "epoch": 0.992005336189563, + "grad_norm": 0.2239132970571518, + "learning_rate": 5.936858191408128e-06, + "loss": 0.0024, + "step": 154670 + }, + { + "epoch": 0.9920694730833491, + "grad_norm": 0.06647442281246185, + "learning_rate": 5.936308395017346e-06, + "loss": 0.0028, + "step": 154680 + }, + { + "epoch": 0.9921336099771352, + "grad_norm": 0.09694932401180267, + "learning_rate": 5.93575858689395e-06, + "loss": 0.0033, + "step": 154690 + }, + { + "epoch": 0.9921977468709213, + "grad_norm": 0.18823418021202087, + "learning_rate": 5.935208767044831e-06, + "loss": 0.0019, + "step": 154700 + }, + { + "epoch": 0.9922618837647074, + "grad_norm": 0.11843463778495789, + "learning_rate": 5.934658935476878e-06, + "loss": 0.0014, + "step": 154710 + }, + { + "epoch": 0.9923260206584935, + "grad_norm": 0.2229308784008026, + "learning_rate": 5.93410909219698e-06, + "loss": 0.0026, + "step": 154720 + }, + { + "epoch": 0.9923901575522796, + "grad_norm": 0.008210033178329468, + "learning_rate": 5.933559237212027e-06, + "loss": 0.0014, + "step": 154730 + }, + { + "epoch": 0.9924542944460657, + "grad_norm": 0.05394374579191208, + "learning_rate": 5.933009370528911e-06, + "loss": 0.0018, + "step": 154740 + }, + { + "epoch": 0.9925184313398517, + "grad_norm": 0.13600340485572815, + "learning_rate": 5.9324594921545195e-06, + "loss": 0.0017, + "step": 154750 + }, + { + "epoch": 0.9925825682336379, + "grad_norm": 0.04914219304919243, + "learning_rate": 5.931909602095744e-06, + "loss": 0.0025, + "step": 154760 + }, + { + "epoch": 0.9926467051274239, + "grad_norm": 0.09887050837278366, + "learning_rate": 5.931359700359476e-06, + "loss": 0.0015, + "step": 154770 + }, + { + "epoch": 0.9927108420212101, + "grad_norm": 0.006539844907820225, + "learning_rate": 5.930809786952605e-06, + "loss": 0.004, + "step": 154780 + }, + { + "epoch": 0.9927749789149962, + "grad_norm": 0.09673035889863968, + "learning_rate": 5.930259861882022e-06, + "loss": 0.0024, + "step": 154790 + }, + { + "epoch": 0.9928391158087823, + "grad_norm": 0.3432390093803406, + "learning_rate": 5.929709925154619e-06, + "loss": 0.0025, + "step": 154800 + }, + { + "epoch": 0.9929032527025684, + "grad_norm": 0.30649974942207336, + "learning_rate": 5.9291599767772844e-06, + "loss": 0.0035, + "step": 154810 + }, + { + "epoch": 0.9929673895963544, + "grad_norm": 0.33777597546577454, + "learning_rate": 5.928610016756913e-06, + "loss": 0.0024, + "step": 154820 + }, + { + "epoch": 0.9930315264901406, + "grad_norm": 0.13778480887413025, + "learning_rate": 5.928060045100392e-06, + "loss": 0.0015, + "step": 154830 + }, + { + "epoch": 0.9930956633839266, + "grad_norm": 0.06798578053712845, + "learning_rate": 5.927510061814618e-06, + "loss": 0.0027, + "step": 154840 + }, + { + "epoch": 0.9931598002777128, + "grad_norm": 0.2452346831560135, + "learning_rate": 5.9269600669064785e-06, + "loss": 0.003, + "step": 154850 + }, + { + "epoch": 0.9932239371714988, + "grad_norm": 0.10779757797718048, + "learning_rate": 5.9264100603828676e-06, + "loss": 0.0016, + "step": 154860 + }, + { + "epoch": 0.993288074065285, + "grad_norm": 0.07469695806503296, + "learning_rate": 5.925860042250676e-06, + "loss": 0.0029, + "step": 154870 + }, + { + "epoch": 0.993352210959071, + "grad_norm": 0.2282448261976242, + "learning_rate": 5.925310012516797e-06, + "loss": 0.0022, + "step": 154880 + }, + { + "epoch": 0.9934163478528572, + "grad_norm": 0.2129080593585968, + "learning_rate": 5.924759971188122e-06, + "loss": 0.0028, + "step": 154890 + }, + { + "epoch": 0.9934804847466432, + "grad_norm": 0.06968335807323456, + "learning_rate": 5.924209918271544e-06, + "loss": 0.0023, + "step": 154900 + }, + { + "epoch": 0.9935446216404293, + "grad_norm": 0.23044312000274658, + "learning_rate": 5.923659853773954e-06, + "loss": 0.002, + "step": 154910 + }, + { + "epoch": 0.9936087585342154, + "grad_norm": 0.3593491315841675, + "learning_rate": 5.923109777702247e-06, + "loss": 0.003, + "step": 154920 + }, + { + "epoch": 0.9936728954280015, + "grad_norm": 0.1535368710756302, + "learning_rate": 5.922559690063313e-06, + "loss": 0.0022, + "step": 154930 + }, + { + "epoch": 0.9937370323217877, + "grad_norm": 0.04858865216374397, + "learning_rate": 5.9220095908640495e-06, + "loss": 0.0017, + "step": 154940 + }, + { + "epoch": 0.9938011692155737, + "grad_norm": 0.020658988505601883, + "learning_rate": 5.9214594801113445e-06, + "loss": 0.0028, + "step": 154950 + }, + { + "epoch": 0.9938653061093599, + "grad_norm": 0.11902821063995361, + "learning_rate": 5.920909357812096e-06, + "loss": 0.0025, + "step": 154960 + }, + { + "epoch": 0.9939294430031459, + "grad_norm": 0.059825241565704346, + "learning_rate": 5.9203592239731935e-06, + "loss": 0.0024, + "step": 154970 + }, + { + "epoch": 0.993993579896932, + "grad_norm": 0.07929425686597824, + "learning_rate": 5.919809078601533e-06, + "loss": 0.0017, + "step": 154980 + }, + { + "epoch": 0.9940577167907181, + "grad_norm": 0.10015887022018433, + "learning_rate": 5.919258921704005e-06, + "loss": 0.0023, + "step": 154990 + }, + { + "epoch": 0.9941218536845042, + "grad_norm": 0.05563042685389519, + "learning_rate": 5.918708753287507e-06, + "loss": 0.002, + "step": 155000 + }, + { + "epoch": 0.9941859905782903, + "grad_norm": 0.1357041895389557, + "learning_rate": 5.918158573358934e-06, + "loss": 0.0023, + "step": 155010 + }, + { + "epoch": 0.9942501274720764, + "grad_norm": 0.19267672300338745, + "learning_rate": 5.917608381925175e-06, + "loss": 0.0023, + "step": 155020 + }, + { + "epoch": 0.9943142643658625, + "grad_norm": 0.06331752240657806, + "learning_rate": 5.917058178993129e-06, + "loss": 0.0022, + "step": 155030 + }, + { + "epoch": 0.9943784012596486, + "grad_norm": 0.0785926952958107, + "learning_rate": 5.916507964569688e-06, + "loss": 0.0021, + "step": 155040 + }, + { + "epoch": 0.9944425381534346, + "grad_norm": 0.13848865032196045, + "learning_rate": 5.9159577386617474e-06, + "loss": 0.0025, + "step": 155050 + }, + { + "epoch": 0.9945066750472208, + "grad_norm": 0.04544994607567787, + "learning_rate": 5.9154075012762e-06, + "loss": 0.0023, + "step": 155060 + }, + { + "epoch": 0.9945708119410069, + "grad_norm": 0.03753521293401718, + "learning_rate": 5.914857252419946e-06, + "loss": 0.0016, + "step": 155070 + }, + { + "epoch": 0.994634948834793, + "grad_norm": 0.008138508535921574, + "learning_rate": 5.914306992099873e-06, + "loss": 0.0101, + "step": 155080 + }, + { + "epoch": 0.9946990857285791, + "grad_norm": 0.11182375997304916, + "learning_rate": 5.913756720322883e-06, + "loss": 0.0017, + "step": 155090 + }, + { + "epoch": 0.9947632226223652, + "grad_norm": 0.13418009877204895, + "learning_rate": 5.9132064370958654e-06, + "loss": 0.0017, + "step": 155100 + }, + { + "epoch": 0.9948273595161513, + "grad_norm": 0.19320058822631836, + "learning_rate": 5.912656142425721e-06, + "loss": 0.0025, + "step": 155110 + }, + { + "epoch": 0.9948914964099373, + "grad_norm": 0.04462080076336861, + "learning_rate": 5.912105836319341e-06, + "loss": 0.0026, + "step": 155120 + }, + { + "epoch": 0.9949556333037235, + "grad_norm": 0.2760769724845886, + "learning_rate": 5.911555518783624e-06, + "loss": 0.0015, + "step": 155130 + }, + { + "epoch": 0.9950197701975095, + "grad_norm": 0.11476902663707733, + "learning_rate": 5.911005189825464e-06, + "loss": 0.0026, + "step": 155140 + }, + { + "epoch": 0.9950839070912957, + "grad_norm": 0.08436945080757141, + "learning_rate": 5.910454849451759e-06, + "loss": 0.0029, + "step": 155150 + }, + { + "epoch": 0.9951480439850817, + "grad_norm": 0.0776107907295227, + "learning_rate": 5.909904497669403e-06, + "loss": 0.0018, + "step": 155160 + }, + { + "epoch": 0.9952121808788679, + "grad_norm": 0.10979221016168594, + "learning_rate": 5.9093541344852935e-06, + "loss": 0.0023, + "step": 155170 + }, + { + "epoch": 0.9952763177726539, + "grad_norm": 0.10220757871866226, + "learning_rate": 5.908803759906328e-06, + "loss": 0.0023, + "step": 155180 + }, + { + "epoch": 0.99534045466644, + "grad_norm": 0.042567286640405655, + "learning_rate": 5.908253373939401e-06, + "loss": 0.0021, + "step": 155190 + }, + { + "epoch": 0.9954045915602261, + "grad_norm": 0.04859263077378273, + "learning_rate": 5.907702976591411e-06, + "loss": 0.0014, + "step": 155200 + }, + { + "epoch": 0.9954687284540122, + "grad_norm": 0.3271733522415161, + "learning_rate": 5.907152567869252e-06, + "loss": 0.0023, + "step": 155210 + }, + { + "epoch": 0.9955328653477984, + "grad_norm": 0.24706242978572845, + "learning_rate": 5.906602147779824e-06, + "loss": 0.0021, + "step": 155220 + }, + { + "epoch": 0.9955970022415844, + "grad_norm": 0.09822478890419006, + "learning_rate": 5.906051716330023e-06, + "loss": 0.0011, + "step": 155230 + }, + { + "epoch": 0.9956611391353706, + "grad_norm": 0.05063209682703018, + "learning_rate": 5.905501273526748e-06, + "loss": 0.0028, + "step": 155240 + }, + { + "epoch": 0.9957252760291566, + "grad_norm": 0.08223580569028854, + "learning_rate": 5.904950819376892e-06, + "loss": 0.0025, + "step": 155250 + }, + { + "epoch": 0.9957894129229428, + "grad_norm": 0.011897790245711803, + "learning_rate": 5.904400353887358e-06, + "loss": 0.0013, + "step": 155260 + }, + { + "epoch": 0.9958535498167288, + "grad_norm": 0.07343225181102753, + "learning_rate": 5.90384987706504e-06, + "loss": 0.0014, + "step": 155270 + }, + { + "epoch": 0.995917686710515, + "grad_norm": 0.033276788890361786, + "learning_rate": 5.903299388916837e-06, + "loss": 0.0012, + "step": 155280 + }, + { + "epoch": 0.995981823604301, + "grad_norm": 0.06330505758523941, + "learning_rate": 5.9027488894496475e-06, + "loss": 0.0019, + "step": 155290 + }, + { + "epoch": 0.9960459604980871, + "grad_norm": 0.12273988127708435, + "learning_rate": 5.90219837867037e-06, + "loss": 0.002, + "step": 155300 + }, + { + "epoch": 0.9961100973918732, + "grad_norm": 0.11730191111564636, + "learning_rate": 5.901647856585899e-06, + "loss": 0.0016, + "step": 155310 + }, + { + "epoch": 0.9961742342856593, + "grad_norm": 0.03262682631611824, + "learning_rate": 5.9010973232031395e-06, + "loss": 0.0018, + "step": 155320 + }, + { + "epoch": 0.9962383711794454, + "grad_norm": 0.10133693367242813, + "learning_rate": 5.900546778528985e-06, + "loss": 0.0018, + "step": 155330 + }, + { + "epoch": 0.9963025080732315, + "grad_norm": 0.44229885935783386, + "learning_rate": 5.899996222570336e-06, + "loss": 0.0039, + "step": 155340 + }, + { + "epoch": 0.9963666449670177, + "grad_norm": 0.06712737679481506, + "learning_rate": 5.899445655334091e-06, + "loss": 0.0017, + "step": 155350 + }, + { + "epoch": 0.9964307818608037, + "grad_norm": 0.028598163276910782, + "learning_rate": 5.898895076827148e-06, + "loss": 0.0038, + "step": 155360 + }, + { + "epoch": 0.9964949187545898, + "grad_norm": 0.02832469530403614, + "learning_rate": 5.898344487056409e-06, + "loss": 0.0029, + "step": 155370 + }, + { + "epoch": 0.9965590556483759, + "grad_norm": 0.14169839024543762, + "learning_rate": 5.897793886028771e-06, + "loss": 0.0028, + "step": 155380 + }, + { + "epoch": 0.996623192542162, + "grad_norm": 0.10831230133771896, + "learning_rate": 5.897243273751134e-06, + "loss": 0.0017, + "step": 155390 + }, + { + "epoch": 0.9966873294359481, + "grad_norm": 0.1491563618183136, + "learning_rate": 5.896692650230396e-06, + "loss": 0.0032, + "step": 155400 + }, + { + "epoch": 0.9967514663297342, + "grad_norm": 0.14146079123020172, + "learning_rate": 5.896142015473462e-06, + "loss": 0.0022, + "step": 155410 + }, + { + "epoch": 0.9968156032235203, + "grad_norm": 0.08077463507652283, + "learning_rate": 5.895591369487225e-06, + "loss": 0.0021, + "step": 155420 + }, + { + "epoch": 0.9968797401173064, + "grad_norm": 0.08268241584300995, + "learning_rate": 5.895040712278589e-06, + "loss": 0.0012, + "step": 155430 + }, + { + "epoch": 0.9969438770110924, + "grad_norm": 0.019326310604810715, + "learning_rate": 5.894490043854453e-06, + "loss": 0.0024, + "step": 155440 + }, + { + "epoch": 0.9970080139048786, + "grad_norm": 0.03024476021528244, + "learning_rate": 5.893939364221718e-06, + "loss": 0.0012, + "step": 155450 + }, + { + "epoch": 0.9970721507986646, + "grad_norm": 0.12479130178689957, + "learning_rate": 5.893388673387282e-06, + "loss": 0.0027, + "step": 155460 + }, + { + "epoch": 0.9971362876924508, + "grad_norm": 0.16635245084762573, + "learning_rate": 5.89283797135805e-06, + "loss": 0.0014, + "step": 155470 + }, + { + "epoch": 0.9972004245862368, + "grad_norm": 0.11341109126806259, + "learning_rate": 5.892287258140919e-06, + "loss": 0.0028, + "step": 155480 + }, + { + "epoch": 0.997264561480023, + "grad_norm": 0.06706415116786957, + "learning_rate": 5.8917365337427904e-06, + "loss": 0.0033, + "step": 155490 + }, + { + "epoch": 0.9973286983738091, + "grad_norm": 0.12035437673330307, + "learning_rate": 5.891185798170567e-06, + "loss": 0.0023, + "step": 155500 + }, + { + "epoch": 0.9973928352675951, + "grad_norm": 0.13776159286499023, + "learning_rate": 5.890635051431146e-06, + "loss": 0.0015, + "step": 155510 + }, + { + "epoch": 0.9974569721613813, + "grad_norm": 0.11789148300886154, + "learning_rate": 5.890084293531434e-06, + "loss": 0.0022, + "step": 155520 + }, + { + "epoch": 0.9975211090551673, + "grad_norm": 0.08669178932905197, + "learning_rate": 5.889533524478328e-06, + "loss": 0.0025, + "step": 155530 + }, + { + "epoch": 0.9975852459489535, + "grad_norm": 0.15548285841941833, + "learning_rate": 5.888982744278733e-06, + "loss": 0.0021, + "step": 155540 + }, + { + "epoch": 0.9976493828427395, + "grad_norm": 0.07993265241384506, + "learning_rate": 5.888431952939548e-06, + "loss": 0.0019, + "step": 155550 + }, + { + "epoch": 0.9977135197365257, + "grad_norm": 0.05593022331595421, + "learning_rate": 5.887881150467676e-06, + "loss": 0.0033, + "step": 155560 + }, + { + "epoch": 0.9977776566303117, + "grad_norm": 0.13642093539237976, + "learning_rate": 5.887330336870017e-06, + "loss": 0.0017, + "step": 155570 + }, + { + "epoch": 0.9978417935240979, + "grad_norm": 0.11116381734609604, + "learning_rate": 5.886779512153477e-06, + "loss": 0.002, + "step": 155580 + }, + { + "epoch": 0.9979059304178839, + "grad_norm": 0.26490068435668945, + "learning_rate": 5.886228676324953e-06, + "loss": 0.0016, + "step": 155590 + }, + { + "epoch": 0.99797006731167, + "grad_norm": 0.09997473657131195, + "learning_rate": 5.885677829391353e-06, + "loss": 0.0022, + "step": 155600 + }, + { + "epoch": 0.9980342042054561, + "grad_norm": 0.06118928641080856, + "learning_rate": 5.885126971359576e-06, + "loss": 0.0012, + "step": 155610 + }, + { + "epoch": 0.9980983410992422, + "grad_norm": 0.025356553494930267, + "learning_rate": 5.884576102236526e-06, + "loss": 0.0024, + "step": 155620 + }, + { + "epoch": 0.9981624779930284, + "grad_norm": 0.31826135516166687, + "learning_rate": 5.884025222029104e-06, + "loss": 0.0027, + "step": 155630 + }, + { + "epoch": 0.9982266148868144, + "grad_norm": 0.005015498027205467, + "learning_rate": 5.8834743307442145e-06, + "loss": 0.0021, + "step": 155640 + }, + { + "epoch": 0.9982907517806006, + "grad_norm": 0.1632152497768402, + "learning_rate": 5.88292342838876e-06, + "loss": 0.0028, + "step": 155650 + }, + { + "epoch": 0.9983548886743866, + "grad_norm": 0.0382956862449646, + "learning_rate": 5.882372514969644e-06, + "loss": 0.0035, + "step": 155660 + }, + { + "epoch": 0.9984190255681727, + "grad_norm": 0.11817727982997894, + "learning_rate": 5.881821590493772e-06, + "loss": 0.0019, + "step": 155670 + }, + { + "epoch": 0.9984831624619588, + "grad_norm": 0.12771518528461456, + "learning_rate": 5.881270654968042e-06, + "loss": 0.0027, + "step": 155680 + }, + { + "epoch": 0.9985472993557449, + "grad_norm": 0.03132863715291023, + "learning_rate": 5.880719708399363e-06, + "loss": 0.0056, + "step": 155690 + }, + { + "epoch": 0.998611436249531, + "grad_norm": 0.12629881501197815, + "learning_rate": 5.880168750794635e-06, + "loss": 0.0017, + "step": 155700 + }, + { + "epoch": 0.9986755731433171, + "grad_norm": 0.07862812280654907, + "learning_rate": 5.879617782160765e-06, + "loss": 0.0017, + "step": 155710 + }, + { + "epoch": 0.9987397100371032, + "grad_norm": 0.046499527990818024, + "learning_rate": 5.8790668025046536e-06, + "loss": 0.0031, + "step": 155720 + }, + { + "epoch": 0.9988038469308893, + "grad_norm": 0.11387953907251358, + "learning_rate": 5.878515811833209e-06, + "loss": 0.0023, + "step": 155730 + }, + { + "epoch": 0.9988679838246753, + "grad_norm": 0.14124180376529694, + "learning_rate": 5.877964810153333e-06, + "loss": 0.0026, + "step": 155740 + }, + { + "epoch": 0.9989321207184615, + "grad_norm": 0.005234932526946068, + "learning_rate": 5.87741379747193e-06, + "loss": 0.0034, + "step": 155750 + }, + { + "epoch": 0.9989962576122475, + "grad_norm": 0.1285937875509262, + "learning_rate": 5.876862773795905e-06, + "loss": 0.0017, + "step": 155760 + }, + { + "epoch": 0.9990603945060337, + "grad_norm": 0.10089916735887527, + "learning_rate": 5.876311739132164e-06, + "loss": 0.0014, + "step": 155770 + }, + { + "epoch": 0.9991245313998198, + "grad_norm": 0.06541144847869873, + "learning_rate": 5.875760693487607e-06, + "loss": 0.0014, + "step": 155780 + }, + { + "epoch": 0.9991886682936059, + "grad_norm": 0.011267658323049545, + "learning_rate": 5.875209636869147e-06, + "loss": 0.0019, + "step": 155790 + }, + { + "epoch": 0.999252805187392, + "grad_norm": 0.09115643799304962, + "learning_rate": 5.874658569283682e-06, + "loss": 0.0026, + "step": 155800 + }, + { + "epoch": 0.999316942081178, + "grad_norm": 0.09521937370300293, + "learning_rate": 5.8741074907381215e-06, + "loss": 0.0013, + "step": 155810 + }, + { + "epoch": 0.9993810789749642, + "grad_norm": 0.051444701850414276, + "learning_rate": 5.873556401239369e-06, + "loss": 0.0018, + "step": 155820 + }, + { + "epoch": 0.9994452158687502, + "grad_norm": 0.009652688167989254, + "learning_rate": 5.87300530079433e-06, + "loss": 0.0018, + "step": 155830 + }, + { + "epoch": 0.9995093527625364, + "grad_norm": 0.26273030042648315, + "learning_rate": 5.87245418940991e-06, + "loss": 0.0015, + "step": 155840 + }, + { + "epoch": 0.9995734896563224, + "grad_norm": 0.05302487686276436, + "learning_rate": 5.871903067093017e-06, + "loss": 0.0034, + "step": 155850 + }, + { + "epoch": 0.9996376265501086, + "grad_norm": 0.23049956560134888, + "learning_rate": 5.871351933850555e-06, + "loss": 0.0028, + "step": 155860 + }, + { + "epoch": 0.9997017634438946, + "grad_norm": 0.01473341602832079, + "learning_rate": 5.870800789689431e-06, + "loss": 0.0023, + "step": 155870 + }, + { + "epoch": 0.9997659003376808, + "grad_norm": 0.07029661536216736, + "learning_rate": 5.87024963461655e-06, + "loss": 0.0032, + "step": 155880 + }, + { + "epoch": 0.9998300372314668, + "grad_norm": 0.058633968234062195, + "learning_rate": 5.869698468638818e-06, + "loss": 0.0017, + "step": 155890 + }, + { + "epoch": 0.9998941741252529, + "grad_norm": 0.14927196502685547, + "learning_rate": 5.869147291763145e-06, + "loss": 0.0023, + "step": 155900 + }, + { + "epoch": 0.9999583110190391, + "grad_norm": 0.01744919642806053, + "learning_rate": 5.8685961039964334e-06, + "loss": 0.0018, + "step": 155910 + }, + { + "epoch": 1.0000224479128252, + "grad_norm": 0.09784498065710068, + "learning_rate": 5.868044905345594e-06, + "loss": 0.0013, + "step": 155920 + }, + { + "epoch": 1.0000865848066112, + "grad_norm": 0.01963322050869465, + "learning_rate": 5.867493695817529e-06, + "loss": 0.0011, + "step": 155930 + }, + { + "epoch": 1.0001507217003973, + "grad_norm": 0.0918341651558876, + "learning_rate": 5.86694247541915e-06, + "loss": 0.0009, + "step": 155940 + }, + { + "epoch": 1.0002148585941835, + "grad_norm": 0.1195211410522461, + "learning_rate": 5.866391244157361e-06, + "loss": 0.0023, + "step": 155950 + }, + { + "epoch": 1.0002789954879696, + "grad_norm": 0.07075042277574539, + "learning_rate": 5.865840002039072e-06, + "loss": 0.0012, + "step": 155960 + }, + { + "epoch": 1.0003431323817555, + "grad_norm": 0.11134473234415054, + "learning_rate": 5.8652887490711865e-06, + "loss": 0.0015, + "step": 155970 + }, + { + "epoch": 1.0004072692755417, + "grad_norm": 0.023944471031427383, + "learning_rate": 5.864737485260616e-06, + "loss": 0.0008, + "step": 155980 + }, + { + "epoch": 1.0004714061693278, + "grad_norm": 0.026289818808436394, + "learning_rate": 5.864186210614267e-06, + "loss": 0.0008, + "step": 155990 + }, + { + "epoch": 1.000535543063114, + "grad_norm": 0.0795121043920517, + "learning_rate": 5.863634925139046e-06, + "loss": 0.0011, + "step": 156000 + }, + { + "epoch": 1.0005996799569, + "grad_norm": 0.13002263009548187, + "learning_rate": 5.863083628841864e-06, + "loss": 0.0022, + "step": 156010 + }, + { + "epoch": 1.000663816850686, + "grad_norm": 0.02560841664671898, + "learning_rate": 5.862532321729625e-06, + "loss": 0.0018, + "step": 156020 + }, + { + "epoch": 1.0007279537444722, + "grad_norm": 0.1948590725660324, + "learning_rate": 5.8619810038092415e-06, + "loss": 0.0025, + "step": 156030 + }, + { + "epoch": 1.0007920906382584, + "grad_norm": 0.10446181893348694, + "learning_rate": 5.8614296750876184e-06, + "loss": 0.001, + "step": 156040 + }, + { + "epoch": 1.0008562275320443, + "grad_norm": 0.01794133521616459, + "learning_rate": 5.860878335571668e-06, + "loss": 0.001, + "step": 156050 + }, + { + "epoch": 1.0009203644258304, + "grad_norm": 0.06554514169692993, + "learning_rate": 5.8603269852682956e-06, + "loss": 0.0014, + "step": 156060 + }, + { + "epoch": 1.0009845013196166, + "grad_norm": 0.11025381088256836, + "learning_rate": 5.859775624184411e-06, + "loss": 0.0032, + "step": 156070 + }, + { + "epoch": 1.0010486382134027, + "grad_norm": 0.2035963237285614, + "learning_rate": 5.859224252326922e-06, + "loss": 0.0029, + "step": 156080 + }, + { + "epoch": 1.0011127751071889, + "grad_norm": 0.17359669506549835, + "learning_rate": 5.858672869702741e-06, + "loss": 0.0018, + "step": 156090 + }, + { + "epoch": 1.0011769120009748, + "grad_norm": 0.11393879354000092, + "learning_rate": 5.858121476318774e-06, + "loss": 0.0023, + "step": 156100 + }, + { + "epoch": 1.001241048894761, + "grad_norm": 0.04529412463307381, + "learning_rate": 5.857570072181933e-06, + "loss": 0.0017, + "step": 156110 + }, + { + "epoch": 1.001305185788547, + "grad_norm": 0.0755084976553917, + "learning_rate": 5.857018657299125e-06, + "loss": 0.0018, + "step": 156120 + }, + { + "epoch": 1.0013693226823333, + "grad_norm": 0.033754877746105194, + "learning_rate": 5.856467231677262e-06, + "loss": 0.0011, + "step": 156130 + }, + { + "epoch": 1.0014334595761192, + "grad_norm": 0.08789921551942825, + "learning_rate": 5.855915795323251e-06, + "loss": 0.002, + "step": 156140 + }, + { + "epoch": 1.0014975964699053, + "grad_norm": 0.07098563760519028, + "learning_rate": 5.8553643482440045e-06, + "loss": 0.0017, + "step": 156150 + }, + { + "epoch": 1.0015617333636915, + "grad_norm": 0.28439974784851074, + "learning_rate": 5.854812890446431e-06, + "loss": 0.002, + "step": 156160 + }, + { + "epoch": 1.0016258702574776, + "grad_norm": 0.13283652067184448, + "learning_rate": 5.8542614219374416e-06, + "loss": 0.003, + "step": 156170 + }, + { + "epoch": 1.0016900071512636, + "grad_norm": 0.12271107733249664, + "learning_rate": 5.853709942723945e-06, + "loss": 0.0025, + "step": 156180 + }, + { + "epoch": 1.0017541440450497, + "grad_norm": 0.17057596147060394, + "learning_rate": 5.853158452812854e-06, + "loss": 0.002, + "step": 156190 + }, + { + "epoch": 1.0018182809388358, + "grad_norm": 0.08077952265739441, + "learning_rate": 5.852606952211076e-06, + "loss": 0.0016, + "step": 156200 + }, + { + "epoch": 1.001882417832622, + "grad_norm": 0.07943473011255264, + "learning_rate": 5.852055440925524e-06, + "loss": 0.0015, + "step": 156210 + }, + { + "epoch": 1.0019465547264081, + "grad_norm": 0.08325579017400742, + "learning_rate": 5.851503918963109e-06, + "loss": 0.0015, + "step": 156220 + }, + { + "epoch": 1.002010691620194, + "grad_norm": 0.2811479866504669, + "learning_rate": 5.850952386330741e-06, + "loss": 0.0023, + "step": 156230 + }, + { + "epoch": 1.0020748285139802, + "grad_norm": 0.0512402318418026, + "learning_rate": 5.850400843035333e-06, + "loss": 0.0015, + "step": 156240 + }, + { + "epoch": 1.0021389654077664, + "grad_norm": 0.10010890662670135, + "learning_rate": 5.849849289083792e-06, + "loss": 0.0016, + "step": 156250 + }, + { + "epoch": 1.0022031023015525, + "grad_norm": 0.1832588016986847, + "learning_rate": 5.849297724483034e-06, + "loss": 0.0027, + "step": 156260 + }, + { + "epoch": 1.0022672391953384, + "grad_norm": 0.12112399190664291, + "learning_rate": 5.848746149239968e-06, + "loss": 0.0023, + "step": 156270 + }, + { + "epoch": 1.0023313760891246, + "grad_norm": 0.11804680526256561, + "learning_rate": 5.848194563361507e-06, + "loss": 0.0008, + "step": 156280 + }, + { + "epoch": 1.0023955129829107, + "grad_norm": 0.10468452423810959, + "learning_rate": 5.84764296685456e-06, + "loss": 0.001, + "step": 156290 + }, + { + "epoch": 1.002459649876697, + "grad_norm": 0.14020980894565582, + "learning_rate": 5.847091359726043e-06, + "loss": 0.0015, + "step": 156300 + }, + { + "epoch": 1.0025237867704828, + "grad_norm": 0.33313900232315063, + "learning_rate": 5.846539741982864e-06, + "loss": 0.0023, + "step": 156310 + }, + { + "epoch": 1.002587923664269, + "grad_norm": 0.013997141271829605, + "learning_rate": 5.845988113631937e-06, + "loss": 0.0013, + "step": 156320 + }, + { + "epoch": 1.0026520605580551, + "grad_norm": 0.05682804808020592, + "learning_rate": 5.845436474680175e-06, + "loss": 0.0018, + "step": 156330 + }, + { + "epoch": 1.0027161974518413, + "grad_norm": 0.0879458412528038, + "learning_rate": 5.844884825134489e-06, + "loss": 0.0009, + "step": 156340 + }, + { + "epoch": 1.0027803343456274, + "grad_norm": 0.09567782282829285, + "learning_rate": 5.844333165001794e-06, + "loss": 0.0022, + "step": 156350 + }, + { + "epoch": 1.0028444712394133, + "grad_norm": 0.023266635835170746, + "learning_rate": 5.843781494289001e-06, + "loss": 0.0026, + "step": 156360 + }, + { + "epoch": 1.0029086081331995, + "grad_norm": 0.007620238699018955, + "learning_rate": 5.843229813003022e-06, + "loss": 0.0013, + "step": 156370 + }, + { + "epoch": 1.0029727450269856, + "grad_norm": 0.19264978170394897, + "learning_rate": 5.84267812115077e-06, + "loss": 0.0019, + "step": 156380 + }, + { + "epoch": 1.0030368819207718, + "grad_norm": 0.12446966022253036, + "learning_rate": 5.84212641873916e-06, + "loss": 0.0018, + "step": 156390 + }, + { + "epoch": 1.0031010188145577, + "grad_norm": 0.20501558482646942, + "learning_rate": 5.841574705775103e-06, + "loss": 0.0027, + "step": 156400 + }, + { + "epoch": 1.0031651557083439, + "grad_norm": 0.11769339442253113, + "learning_rate": 5.841022982265515e-06, + "loss": 0.003, + "step": 156410 + }, + { + "epoch": 1.00322929260213, + "grad_norm": 0.09512394666671753, + "learning_rate": 5.8404712482173064e-06, + "loss": 0.0015, + "step": 156420 + }, + { + "epoch": 1.0032934294959162, + "grad_norm": 0.08365820348262787, + "learning_rate": 5.839919503637394e-06, + "loss": 0.0026, + "step": 156430 + }, + { + "epoch": 1.003357566389702, + "grad_norm": 0.17679233849048615, + "learning_rate": 5.839367748532689e-06, + "loss": 0.0019, + "step": 156440 + }, + { + "epoch": 1.0034217032834882, + "grad_norm": 0.30964887142181396, + "learning_rate": 5.838815982910107e-06, + "loss": 0.0028, + "step": 156450 + }, + { + "epoch": 1.0034858401772744, + "grad_norm": 0.10647038370370865, + "learning_rate": 5.8382642067765586e-06, + "loss": 0.0015, + "step": 156460 + }, + { + "epoch": 1.0035499770710605, + "grad_norm": 0.14962203800678253, + "learning_rate": 5.837712420138962e-06, + "loss": 0.0052, + "step": 156470 + }, + { + "epoch": 1.0036141139648467, + "grad_norm": 0.08887304365634918, + "learning_rate": 5.83716062300423e-06, + "loss": 0.0026, + "step": 156480 + }, + { + "epoch": 1.0036782508586326, + "grad_norm": 0.12420698255300522, + "learning_rate": 5.836608815379279e-06, + "loss": 0.0034, + "step": 156490 + }, + { + "epoch": 1.0037423877524188, + "grad_norm": 0.006879259832203388, + "learning_rate": 5.8360569972710206e-06, + "loss": 0.0032, + "step": 156500 + }, + { + "epoch": 1.003806524646205, + "grad_norm": 0.04893284663558006, + "learning_rate": 5.83550516868637e-06, + "loss": 0.0022, + "step": 156510 + }, + { + "epoch": 1.003870661539991, + "grad_norm": 0.2606249749660492, + "learning_rate": 5.834953329632243e-06, + "loss": 0.0019, + "step": 156520 + }, + { + "epoch": 1.003934798433777, + "grad_norm": 0.1031232699751854, + "learning_rate": 5.834401480115554e-06, + "loss": 0.0021, + "step": 156530 + }, + { + "epoch": 1.0039989353275631, + "grad_norm": 0.0825612023472786, + "learning_rate": 5.833849620143217e-06, + "loss": 0.0014, + "step": 156540 + }, + { + "epoch": 1.0040630722213493, + "grad_norm": 0.023826727643609047, + "learning_rate": 5.833297749722149e-06, + "loss": 0.0016, + "step": 156550 + }, + { + "epoch": 1.0041272091151354, + "grad_norm": 0.12111756950616837, + "learning_rate": 5.832745868859265e-06, + "loss": 0.0013, + "step": 156560 + }, + { + "epoch": 1.0041913460089213, + "grad_norm": 0.16003498435020447, + "learning_rate": 5.832193977561479e-06, + "loss": 0.0022, + "step": 156570 + }, + { + "epoch": 1.0042554829027075, + "grad_norm": 0.05248863250017166, + "learning_rate": 5.8316420758357094e-06, + "loss": 0.0013, + "step": 156580 + }, + { + "epoch": 1.0043196197964936, + "grad_norm": 0.14343620836734772, + "learning_rate": 5.83109016368887e-06, + "loss": 0.0016, + "step": 156590 + }, + { + "epoch": 1.0043837566902798, + "grad_norm": 0.2597375810146332, + "learning_rate": 5.830538241127876e-06, + "loss": 0.0013, + "step": 156600 + }, + { + "epoch": 1.0044478935840657, + "grad_norm": 0.3479706943035126, + "learning_rate": 5.8299863081596445e-06, + "loss": 0.0045, + "step": 156610 + }, + { + "epoch": 1.0045120304778519, + "grad_norm": 0.07276829332113266, + "learning_rate": 5.829434364791091e-06, + "loss": 0.0014, + "step": 156620 + }, + { + "epoch": 1.004576167371638, + "grad_norm": 0.01598191447556019, + "learning_rate": 5.828882411029133e-06, + "loss": 0.0019, + "step": 156630 + }, + { + "epoch": 1.0046403042654242, + "grad_norm": 0.05684473738074303, + "learning_rate": 5.828330446880686e-06, + "loss": 0.0024, + "step": 156640 + }, + { + "epoch": 1.0047044411592103, + "grad_norm": 0.06137927994132042, + "learning_rate": 5.827778472352666e-06, + "loss": 0.002, + "step": 156650 + }, + { + "epoch": 1.0047685780529962, + "grad_norm": 0.15637855231761932, + "learning_rate": 5.82722648745199e-06, + "loss": 0.002, + "step": 156660 + }, + { + "epoch": 1.0048327149467824, + "grad_norm": 0.09243360161781311, + "learning_rate": 5.8266744921855754e-06, + "loss": 0.0014, + "step": 156670 + }, + { + "epoch": 1.0048968518405685, + "grad_norm": 0.17562691867351532, + "learning_rate": 5.826122486560338e-06, + "loss": 0.0018, + "step": 156680 + }, + { + "epoch": 1.0049609887343547, + "grad_norm": 0.051522646099328995, + "learning_rate": 5.825570470583196e-06, + "loss": 0.0015, + "step": 156690 + }, + { + "epoch": 1.0050251256281406, + "grad_norm": 0.07359160482883453, + "learning_rate": 5.8250184442610646e-06, + "loss": 0.0019, + "step": 156700 + }, + { + "epoch": 1.0050892625219268, + "grad_norm": 0.09471216052770615, + "learning_rate": 5.824466407600865e-06, + "loss": 0.0019, + "step": 156710 + }, + { + "epoch": 1.005153399415713, + "grad_norm": 0.14061139523983002, + "learning_rate": 5.8239143606095095e-06, + "loss": 0.0015, + "step": 156720 + }, + { + "epoch": 1.005217536309499, + "grad_norm": 0.04952032119035721, + "learning_rate": 5.82336230329392e-06, + "loss": 0.0021, + "step": 156730 + }, + { + "epoch": 1.005281673203285, + "grad_norm": 0.02180866152048111, + "learning_rate": 5.822810235661011e-06, + "loss": 0.0075, + "step": 156740 + }, + { + "epoch": 1.0053458100970711, + "grad_norm": 0.2020481824874878, + "learning_rate": 5.822258157717704e-06, + "loss": 0.0019, + "step": 156750 + }, + { + "epoch": 1.0054099469908573, + "grad_norm": 0.0793982520699501, + "learning_rate": 5.821706069470913e-06, + "loss": 0.001, + "step": 156760 + }, + { + "epoch": 1.0054740838846434, + "grad_norm": 0.13440971076488495, + "learning_rate": 5.821153970927558e-06, + "loss": 0.0014, + "step": 156770 + }, + { + "epoch": 1.0055382207784296, + "grad_norm": 0.12930013239383698, + "learning_rate": 5.820601862094556e-06, + "loss": 0.0018, + "step": 156780 + }, + { + "epoch": 1.0056023576722155, + "grad_norm": 0.02354401908814907, + "learning_rate": 5.8200497429788275e-06, + "loss": 0.0016, + "step": 156790 + }, + { + "epoch": 1.0056664945660017, + "grad_norm": 0.0333816297352314, + "learning_rate": 5.819497613587288e-06, + "loss": 0.0022, + "step": 156800 + }, + { + "epoch": 1.0057306314597878, + "grad_norm": 0.030010614544153214, + "learning_rate": 5.81894547392686e-06, + "loss": 0.0021, + "step": 156810 + }, + { + "epoch": 1.005794768353574, + "grad_norm": 0.06879100948572159, + "learning_rate": 5.818393324004458e-06, + "loss": 0.0017, + "step": 156820 + }, + { + "epoch": 1.0058589052473599, + "grad_norm": 0.0591420978307724, + "learning_rate": 5.817841163827004e-06, + "loss": 0.002, + "step": 156830 + }, + { + "epoch": 1.005923042141146, + "grad_norm": 0.03935656696557999, + "learning_rate": 5.8172889934014135e-06, + "loss": 0.0023, + "step": 156840 + }, + { + "epoch": 1.0059871790349322, + "grad_norm": 0.07783859223127365, + "learning_rate": 5.81673681273461e-06, + "loss": 0.0018, + "step": 156850 + }, + { + "epoch": 1.0060513159287183, + "grad_norm": 0.03830932825803757, + "learning_rate": 5.8161846218335095e-06, + "loss": 0.0026, + "step": 156860 + }, + { + "epoch": 1.0061154528225043, + "grad_norm": 0.050277289003133774, + "learning_rate": 5.815632420705033e-06, + "loss": 0.0017, + "step": 156870 + }, + { + "epoch": 1.0061795897162904, + "grad_norm": 0.10210917890071869, + "learning_rate": 5.815080209356099e-06, + "loss": 0.0014, + "step": 156880 + }, + { + "epoch": 1.0062437266100765, + "grad_norm": 0.00457958597689867, + "learning_rate": 5.8145279877936275e-06, + "loss": 0.0016, + "step": 156890 + }, + { + "epoch": 1.0063078635038627, + "grad_norm": 0.0677952691912651, + "learning_rate": 5.813975756024538e-06, + "loss": 0.0017, + "step": 156900 + }, + { + "epoch": 1.0063720003976488, + "grad_norm": 0.13198815286159515, + "learning_rate": 5.8134235140557505e-06, + "loss": 0.0019, + "step": 156910 + }, + { + "epoch": 1.0064361372914348, + "grad_norm": 0.21554039418697357, + "learning_rate": 5.812871261894186e-06, + "loss": 0.0013, + "step": 156920 + }, + { + "epoch": 1.006500274185221, + "grad_norm": 0.11818122863769531, + "learning_rate": 5.8123189995467625e-06, + "loss": 0.0016, + "step": 156930 + }, + { + "epoch": 1.006564411079007, + "grad_norm": 0.17103898525238037, + "learning_rate": 5.811766727020403e-06, + "loss": 0.0016, + "step": 156940 + }, + { + "epoch": 1.0066285479727932, + "grad_norm": 0.12196987867355347, + "learning_rate": 5.811214444322024e-06, + "loss": 0.0014, + "step": 156950 + }, + { + "epoch": 1.0066926848665791, + "grad_norm": 0.05216473713517189, + "learning_rate": 5.81066215145855e-06, + "loss": 0.0012, + "step": 156960 + }, + { + "epoch": 1.0067568217603653, + "grad_norm": 0.02345466986298561, + "learning_rate": 5.8101098484368985e-06, + "loss": 0.0014, + "step": 156970 + }, + { + "epoch": 1.0068209586541514, + "grad_norm": 0.06300746649503708, + "learning_rate": 5.809557535263993e-06, + "loss": 0.0011, + "step": 156980 + }, + { + "epoch": 1.0068850955479376, + "grad_norm": 0.11322963982820511, + "learning_rate": 5.809005211946752e-06, + "loss": 0.0024, + "step": 156990 + }, + { + "epoch": 1.0069492324417235, + "grad_norm": 0.03215669095516205, + "learning_rate": 5.808452878492098e-06, + "loss": 0.0013, + "step": 157000 + }, + { + "epoch": 1.0070133693355097, + "grad_norm": 0.02180369570851326, + "learning_rate": 5.8079005349069516e-06, + "loss": 0.0019, + "step": 157010 + }, + { + "epoch": 1.0070775062292958, + "grad_norm": 0.20294401049613953, + "learning_rate": 5.807348181198235e-06, + "loss": 0.0031, + "step": 157020 + }, + { + "epoch": 1.007141643123082, + "grad_norm": 0.13072869181632996, + "learning_rate": 5.806795817372867e-06, + "loss": 0.0025, + "step": 157030 + }, + { + "epoch": 1.0072057800168681, + "grad_norm": 0.17437884211540222, + "learning_rate": 5.806243443437771e-06, + "loss": 0.0019, + "step": 157040 + }, + { + "epoch": 1.007269916910654, + "grad_norm": 0.2740894556045532, + "learning_rate": 5.805691059399869e-06, + "loss": 0.0023, + "step": 157050 + }, + { + "epoch": 1.0073340538044402, + "grad_norm": 0.055262111127376556, + "learning_rate": 5.8051386652660815e-06, + "loss": 0.0013, + "step": 157060 + }, + { + "epoch": 1.0073981906982263, + "grad_norm": 0.05367450788617134, + "learning_rate": 5.804586261043332e-06, + "loss": 0.0026, + "step": 157070 + }, + { + "epoch": 1.0074623275920125, + "grad_norm": 0.02127639576792717, + "learning_rate": 5.8040338467385416e-06, + "loss": 0.0014, + "step": 157080 + }, + { + "epoch": 1.0075264644857984, + "grad_norm": 0.08617508411407471, + "learning_rate": 5.803481422358632e-06, + "loss": 0.0011, + "step": 157090 + }, + { + "epoch": 1.0075906013795846, + "grad_norm": 0.08567603677511215, + "learning_rate": 5.802928987910527e-06, + "loss": 0.0018, + "step": 157100 + }, + { + "epoch": 1.0076547382733707, + "grad_norm": 0.11544500291347504, + "learning_rate": 5.802376543401147e-06, + "loss": 0.0034, + "step": 157110 + }, + { + "epoch": 1.0077188751671569, + "grad_norm": 0.15631578862667084, + "learning_rate": 5.801824088837414e-06, + "loss": 0.0019, + "step": 157120 + }, + { + "epoch": 1.0077830120609428, + "grad_norm": 0.059519752860069275, + "learning_rate": 5.801271624226255e-06, + "loss": 0.0013, + "step": 157130 + }, + { + "epoch": 1.007847148954729, + "grad_norm": 0.10539231449365616, + "learning_rate": 5.800719149574588e-06, + "loss": 0.0007, + "step": 157140 + }, + { + "epoch": 1.007911285848515, + "grad_norm": 0.24564160406589508, + "learning_rate": 5.80016666488934e-06, + "loss": 0.0013, + "step": 157150 + }, + { + "epoch": 1.0079754227423012, + "grad_norm": 0.07587874680757523, + "learning_rate": 5.799614170177429e-06, + "loss": 0.0036, + "step": 157160 + }, + { + "epoch": 1.0080395596360872, + "grad_norm": 0.08727598190307617, + "learning_rate": 5.799061665445782e-06, + "loss": 0.0017, + "step": 157170 + }, + { + "epoch": 1.0081036965298733, + "grad_norm": 0.07284372299909592, + "learning_rate": 5.798509150701321e-06, + "loss": 0.0021, + "step": 157180 + }, + { + "epoch": 1.0081678334236595, + "grad_norm": 0.033877890557050705, + "learning_rate": 5.79795662595097e-06, + "loss": 0.002, + "step": 157190 + }, + { + "epoch": 1.0082319703174456, + "grad_norm": 0.054351892322301865, + "learning_rate": 5.797404091201653e-06, + "loss": 0.001, + "step": 157200 + }, + { + "epoch": 1.0082961072112318, + "grad_norm": 0.34961628913879395, + "learning_rate": 5.796851546460292e-06, + "loss": 0.0041, + "step": 157210 + }, + { + "epoch": 1.0083602441050177, + "grad_norm": 0.06319102644920349, + "learning_rate": 5.796298991733812e-06, + "loss": 0.0018, + "step": 157220 + }, + { + "epoch": 1.0084243809988038, + "grad_norm": 0.1530248373746872, + "learning_rate": 5.795746427029136e-06, + "loss": 0.0019, + "step": 157230 + }, + { + "epoch": 1.00848851789259, + "grad_norm": 0.17742373049259186, + "learning_rate": 5.795193852353187e-06, + "loss": 0.001, + "step": 157240 + }, + { + "epoch": 1.0085526547863761, + "grad_norm": 0.028137022629380226, + "learning_rate": 5.794641267712894e-06, + "loss": 0.0041, + "step": 157250 + }, + { + "epoch": 1.008616791680162, + "grad_norm": 0.0809083953499794, + "learning_rate": 5.794088673115175e-06, + "loss": 0.0027, + "step": 157260 + }, + { + "epoch": 1.0086809285739482, + "grad_norm": 0.028177518397569656, + "learning_rate": 5.793536068566959e-06, + "loss": 0.0011, + "step": 157270 + }, + { + "epoch": 1.0087450654677343, + "grad_norm": 0.1421087384223938, + "learning_rate": 5.79298345407517e-06, + "loss": 0.0018, + "step": 157280 + }, + { + "epoch": 1.0088092023615205, + "grad_norm": 0.13536611199378967, + "learning_rate": 5.792430829646729e-06, + "loss": 0.0033, + "step": 157290 + }, + { + "epoch": 1.0088733392553064, + "grad_norm": 0.08360431343317032, + "learning_rate": 5.791878195288565e-06, + "loss": 0.0019, + "step": 157300 + }, + { + "epoch": 1.0089374761490926, + "grad_norm": 0.053090013563632965, + "learning_rate": 5.7913255510076e-06, + "loss": 0.0038, + "step": 157310 + }, + { + "epoch": 1.0090016130428787, + "grad_norm": 0.19956789910793304, + "learning_rate": 5.7907728968107635e-06, + "loss": 0.0022, + "step": 157320 + }, + { + "epoch": 1.0090657499366649, + "grad_norm": 0.07794226706027985, + "learning_rate": 5.790220232704974e-06, + "loss": 0.0008, + "step": 157330 + }, + { + "epoch": 1.009129886830451, + "grad_norm": 0.023244792595505714, + "learning_rate": 5.789667558697163e-06, + "loss": 0.0035, + "step": 157340 + }, + { + "epoch": 1.009194023724237, + "grad_norm": 0.01863129809498787, + "learning_rate": 5.789114874794251e-06, + "loss": 0.0017, + "step": 157350 + }, + { + "epoch": 1.009258160618023, + "grad_norm": 0.13609835505485535, + "learning_rate": 5.788562181003167e-06, + "loss": 0.002, + "step": 157360 + }, + { + "epoch": 1.0093222975118092, + "grad_norm": 0.040573108941316605, + "learning_rate": 5.788009477330834e-06, + "loss": 0.002, + "step": 157370 + }, + { + "epoch": 1.0093864344055954, + "grad_norm": 0.167245015501976, + "learning_rate": 5.78745676378418e-06, + "loss": 0.0026, + "step": 157380 + }, + { + "epoch": 1.0094505712993813, + "grad_norm": 0.012963230721652508, + "learning_rate": 5.786904040370128e-06, + "loss": 0.0017, + "step": 157390 + }, + { + "epoch": 1.0095147081931675, + "grad_norm": 0.13187387585639954, + "learning_rate": 5.786351307095608e-06, + "loss": 0.0017, + "step": 157400 + }, + { + "epoch": 1.0095788450869536, + "grad_norm": 0.07060447335243225, + "learning_rate": 5.785798563967544e-06, + "loss": 0.0011, + "step": 157410 + }, + { + "epoch": 1.0096429819807398, + "grad_norm": 0.06462021172046661, + "learning_rate": 5.785245810992861e-06, + "loss": 0.0017, + "step": 157420 + }, + { + "epoch": 1.0097071188745257, + "grad_norm": 0.04903660714626312, + "learning_rate": 5.784693048178489e-06, + "loss": 0.0019, + "step": 157430 + }, + { + "epoch": 1.0097712557683118, + "grad_norm": 0.18517525494098663, + "learning_rate": 5.7841402755313504e-06, + "loss": 0.0025, + "step": 157440 + }, + { + "epoch": 1.009835392662098, + "grad_norm": 0.19298167526721954, + "learning_rate": 5.783587493058376e-06, + "loss": 0.0022, + "step": 157450 + }, + { + "epoch": 1.0098995295558841, + "grad_norm": 0.004793180152773857, + "learning_rate": 5.783034700766487e-06, + "loss": 0.0025, + "step": 157460 + }, + { + "epoch": 1.0099636664496703, + "grad_norm": 0.10318158566951752, + "learning_rate": 5.782481898662616e-06, + "loss": 0.0031, + "step": 157470 + }, + { + "epoch": 1.0100278033434562, + "grad_norm": 0.04313928261399269, + "learning_rate": 5.781929086753687e-06, + "loss": 0.0048, + "step": 157480 + }, + { + "epoch": 1.0100919402372424, + "grad_norm": 0.011004787869751453, + "learning_rate": 5.7813762650466275e-06, + "loss": 0.0015, + "step": 157490 + }, + { + "epoch": 1.0101560771310285, + "grad_norm": 0.13316594064235687, + "learning_rate": 5.7808234335483644e-06, + "loss": 0.0016, + "step": 157500 + }, + { + "epoch": 1.0102202140248147, + "grad_norm": 0.1499415785074234, + "learning_rate": 5.780270592265827e-06, + "loss": 0.0023, + "step": 157510 + }, + { + "epoch": 1.0102843509186006, + "grad_norm": 0.0686010867357254, + "learning_rate": 5.7797177412059414e-06, + "loss": 0.0031, + "step": 157520 + }, + { + "epoch": 1.0103484878123867, + "grad_norm": 0.04718811437487602, + "learning_rate": 5.779164880375635e-06, + "loss": 0.0018, + "step": 157530 + }, + { + "epoch": 1.0104126247061729, + "grad_norm": 0.05614418536424637, + "learning_rate": 5.778612009781834e-06, + "loss": 0.0015, + "step": 157540 + }, + { + "epoch": 1.010476761599959, + "grad_norm": 0.11073336005210876, + "learning_rate": 5.778059129431469e-06, + "loss": 0.0015, + "step": 157550 + }, + { + "epoch": 1.010540898493745, + "grad_norm": 0.006827982142567635, + "learning_rate": 5.777506239331468e-06, + "loss": 0.0028, + "step": 157560 + }, + { + "epoch": 1.010605035387531, + "grad_norm": 0.15427444875240326, + "learning_rate": 5.776953339488758e-06, + "loss": 0.0018, + "step": 157570 + }, + { + "epoch": 1.0106691722813173, + "grad_norm": 0.24773503839969635, + "learning_rate": 5.776400429910267e-06, + "loss": 0.0031, + "step": 157580 + }, + { + "epoch": 1.0107333091751034, + "grad_norm": 0.03619404137134552, + "learning_rate": 5.7758475106029236e-06, + "loss": 0.0012, + "step": 157590 + }, + { + "epoch": 1.0107974460688895, + "grad_norm": 0.07290811091661453, + "learning_rate": 5.7752945815736574e-06, + "loss": 0.001, + "step": 157600 + }, + { + "epoch": 1.0108615829626755, + "grad_norm": 0.052982788532972336, + "learning_rate": 5.7747416428293935e-06, + "loss": 0.0014, + "step": 157610 + }, + { + "epoch": 1.0109257198564616, + "grad_norm": 0.021865442395210266, + "learning_rate": 5.774188694377066e-06, + "loss": 0.0025, + "step": 157620 + }, + { + "epoch": 1.0109898567502478, + "grad_norm": 0.2732413411140442, + "learning_rate": 5.773635736223598e-06, + "loss": 0.0019, + "step": 157630 + }, + { + "epoch": 1.011053993644034, + "grad_norm": 0.36807775497436523, + "learning_rate": 5.773082768375924e-06, + "loss": 0.0037, + "step": 157640 + }, + { + "epoch": 1.0111181305378198, + "grad_norm": 0.08362676203250885, + "learning_rate": 5.772529790840968e-06, + "loss": 0.0022, + "step": 157650 + }, + { + "epoch": 1.011182267431606, + "grad_norm": 0.1202588900923729, + "learning_rate": 5.771976803625664e-06, + "loss": 0.0018, + "step": 157660 + }, + { + "epoch": 1.0112464043253921, + "grad_norm": 0.08706741034984589, + "learning_rate": 5.771423806736938e-06, + "loss": 0.0025, + "step": 157670 + }, + { + "epoch": 1.0113105412191783, + "grad_norm": 0.12375295907258987, + "learning_rate": 5.770870800181721e-06, + "loss": 0.0063, + "step": 157680 + }, + { + "epoch": 1.0113746781129642, + "grad_norm": 0.034464508295059204, + "learning_rate": 5.77031778396694e-06, + "loss": 0.001, + "step": 157690 + }, + { + "epoch": 1.0114388150067504, + "grad_norm": 0.15537671744823456, + "learning_rate": 5.769764758099528e-06, + "loss": 0.0017, + "step": 157700 + }, + { + "epoch": 1.0115029519005365, + "grad_norm": 0.10750894993543625, + "learning_rate": 5.769211722586413e-06, + "loss": 0.0015, + "step": 157710 + }, + { + "epoch": 1.0115670887943227, + "grad_norm": 0.03844396024942398, + "learning_rate": 5.7686586774345265e-06, + "loss": 0.0027, + "step": 157720 + }, + { + "epoch": 1.0116312256881086, + "grad_norm": 0.02411005087196827, + "learning_rate": 5.768105622650796e-06, + "loss": 0.0014, + "step": 157730 + }, + { + "epoch": 1.0116953625818947, + "grad_norm": 0.08573506027460098, + "learning_rate": 5.767552558242153e-06, + "loss": 0.0011, + "step": 157740 + }, + { + "epoch": 1.011759499475681, + "grad_norm": 0.14994917809963226, + "learning_rate": 5.766999484215529e-06, + "loss": 0.0016, + "step": 157750 + }, + { + "epoch": 1.011823636369467, + "grad_norm": 0.11491748690605164, + "learning_rate": 5.766446400577852e-06, + "loss": 0.0013, + "step": 157760 + }, + { + "epoch": 1.0118877732632532, + "grad_norm": 0.05437860265374184, + "learning_rate": 5.765893307336055e-06, + "loss": 0.005, + "step": 157770 + }, + { + "epoch": 1.0119519101570391, + "grad_norm": 0.16857770085334778, + "learning_rate": 5.765340204497066e-06, + "loss": 0.0015, + "step": 157780 + }, + { + "epoch": 1.0120160470508253, + "grad_norm": 0.1880984902381897, + "learning_rate": 5.76478709206782e-06, + "loss": 0.0022, + "step": 157790 + }, + { + "epoch": 1.0120801839446114, + "grad_norm": 0.09091341495513916, + "learning_rate": 5.764233970055243e-06, + "loss": 0.0021, + "step": 157800 + }, + { + "epoch": 1.0121443208383976, + "grad_norm": 0.041069868952035904, + "learning_rate": 5.763680838466269e-06, + "loss": 0.0011, + "step": 157810 + }, + { + "epoch": 1.0122084577321835, + "grad_norm": 0.07052157074213028, + "learning_rate": 5.763127697307828e-06, + "loss": 0.0039, + "step": 157820 + }, + { + "epoch": 1.0122725946259696, + "grad_norm": 0.11400876939296722, + "learning_rate": 5.762574546586852e-06, + "loss": 0.0014, + "step": 157830 + }, + { + "epoch": 1.0123367315197558, + "grad_norm": 0.0032041589729487896, + "learning_rate": 5.76202138631027e-06, + "loss": 0.0015, + "step": 157840 + }, + { + "epoch": 1.012400868413542, + "grad_norm": 0.1367321014404297, + "learning_rate": 5.7614682164850176e-06, + "loss": 0.002, + "step": 157850 + }, + { + "epoch": 1.0124650053073279, + "grad_norm": 0.1709347665309906, + "learning_rate": 5.7609150371180234e-06, + "loss": 0.0012, + "step": 157860 + }, + { + "epoch": 1.012529142201114, + "grad_norm": 0.12345244735479355, + "learning_rate": 5.760361848216221e-06, + "loss": 0.0019, + "step": 157870 + }, + { + "epoch": 1.0125932790949002, + "grad_norm": 0.04564058780670166, + "learning_rate": 5.759808649786539e-06, + "loss": 0.0013, + "step": 157880 + }, + { + "epoch": 1.0126574159886863, + "grad_norm": 0.2694692313671112, + "learning_rate": 5.759255441835914e-06, + "loss": 0.0031, + "step": 157890 + }, + { + "epoch": 1.0127215528824725, + "grad_norm": 0.15322737395763397, + "learning_rate": 5.758702224371274e-06, + "loss": 0.0022, + "step": 157900 + }, + { + "epoch": 1.0127856897762584, + "grad_norm": 0.11926686018705368, + "learning_rate": 5.758148997399553e-06, + "loss": 0.0011, + "step": 157910 + }, + { + "epoch": 1.0128498266700445, + "grad_norm": 0.056959182024002075, + "learning_rate": 5.7575957609276845e-06, + "loss": 0.0017, + "step": 157920 + }, + { + "epoch": 1.0129139635638307, + "grad_norm": 0.05914493277668953, + "learning_rate": 5.757042514962599e-06, + "loss": 0.0027, + "step": 157930 + }, + { + "epoch": 1.0129781004576168, + "grad_norm": 0.0504847951233387, + "learning_rate": 5.756489259511228e-06, + "loss": 0.002, + "step": 157940 + }, + { + "epoch": 1.0130422373514028, + "grad_norm": 0.07084763795137405, + "learning_rate": 5.755935994580508e-06, + "loss": 0.0014, + "step": 157950 + }, + { + "epoch": 1.013106374245189, + "grad_norm": 0.11007464677095413, + "learning_rate": 5.75538272017737e-06, + "loss": 0.0014, + "step": 157960 + }, + { + "epoch": 1.013170511138975, + "grad_norm": 0.028109176084399223, + "learning_rate": 5.754829436308745e-06, + "loss": 0.0023, + "step": 157970 + }, + { + "epoch": 1.0132346480327612, + "grad_norm": 0.05885981023311615, + "learning_rate": 5.75427614298157e-06, + "loss": 0.0014, + "step": 157980 + }, + { + "epoch": 1.0132987849265471, + "grad_norm": 0.020199328660964966, + "learning_rate": 5.753722840202772e-06, + "loss": 0.0013, + "step": 157990 + }, + { + "epoch": 1.0133629218203333, + "grad_norm": 0.29015612602233887, + "learning_rate": 5.753169527979292e-06, + "loss": 0.002, + "step": 158000 + }, + { + "epoch": 1.0134270587141194, + "grad_norm": 0.27271541953086853, + "learning_rate": 5.752616206318057e-06, + "loss": 0.003, + "step": 158010 + }, + { + "epoch": 1.0134911956079056, + "grad_norm": 0.11496245115995407, + "learning_rate": 5.752062875226006e-06, + "loss": 0.0018, + "step": 158020 + }, + { + "epoch": 1.0135553325016917, + "grad_norm": 0.06919115036725998, + "learning_rate": 5.751509534710066e-06, + "loss": 0.0032, + "step": 158030 + }, + { + "epoch": 1.0136194693954776, + "grad_norm": 0.04158717393875122, + "learning_rate": 5.750956184777176e-06, + "loss": 0.0012, + "step": 158040 + }, + { + "epoch": 1.0136836062892638, + "grad_norm": 0.11757807433605194, + "learning_rate": 5.750402825434269e-06, + "loss": 0.0039, + "step": 158050 + }, + { + "epoch": 1.01374774318305, + "grad_norm": 0.021763648837804794, + "learning_rate": 5.749849456688279e-06, + "loss": 0.0009, + "step": 158060 + }, + { + "epoch": 1.013811880076836, + "grad_norm": 0.02642359957098961, + "learning_rate": 5.749296078546137e-06, + "loss": 0.0023, + "step": 158070 + }, + { + "epoch": 1.013876016970622, + "grad_norm": 0.029139714315533638, + "learning_rate": 5.748742691014781e-06, + "loss": 0.0017, + "step": 158080 + }, + { + "epoch": 1.0139401538644082, + "grad_norm": 0.20166301727294922, + "learning_rate": 5.748189294101144e-06, + "loss": 0.0022, + "step": 158090 + }, + { + "epoch": 1.0140042907581943, + "grad_norm": 0.05873652175068855, + "learning_rate": 5.747635887812161e-06, + "loss": 0.0011, + "step": 158100 + }, + { + "epoch": 1.0140684276519805, + "grad_norm": 0.12319185584783554, + "learning_rate": 5.747082472154764e-06, + "loss": 0.0012, + "step": 158110 + }, + { + "epoch": 1.0141325645457664, + "grad_norm": 0.07644834369421005, + "learning_rate": 5.746529047135891e-06, + "loss": 0.0018, + "step": 158120 + }, + { + "epoch": 1.0141967014395525, + "grad_norm": 0.04374532029032707, + "learning_rate": 5.745975612762477e-06, + "loss": 0.004, + "step": 158130 + }, + { + "epoch": 1.0142608383333387, + "grad_norm": 0.0785631611943245, + "learning_rate": 5.7454221690414524e-06, + "loss": 0.0026, + "step": 158140 + }, + { + "epoch": 1.0143249752271248, + "grad_norm": 0.12788932025432587, + "learning_rate": 5.744868715979758e-06, + "loss": 0.0026, + "step": 158150 + }, + { + "epoch": 1.0143891121209108, + "grad_norm": 0.1302683800458908, + "learning_rate": 5.7443152535843235e-06, + "loss": 0.0027, + "step": 158160 + }, + { + "epoch": 1.014453249014697, + "grad_norm": 0.23391394317150116, + "learning_rate": 5.743761781862089e-06, + "loss": 0.0016, + "step": 158170 + }, + { + "epoch": 1.014517385908483, + "grad_norm": 0.1533360779285431, + "learning_rate": 5.7432083008199876e-06, + "loss": 0.0039, + "step": 158180 + }, + { + "epoch": 1.0145815228022692, + "grad_norm": 0.03090854361653328, + "learning_rate": 5.742654810464955e-06, + "loss": 0.0045, + "step": 158190 + }, + { + "epoch": 1.0146456596960554, + "grad_norm": 0.1253776103258133, + "learning_rate": 5.742101310803926e-06, + "loss": 0.0023, + "step": 158200 + }, + { + "epoch": 1.0147097965898413, + "grad_norm": 0.1397521197795868, + "learning_rate": 5.741547801843839e-06, + "loss": 0.0015, + "step": 158210 + }, + { + "epoch": 1.0147739334836274, + "grad_norm": 0.03552157059311867, + "learning_rate": 5.740994283591626e-06, + "loss": 0.0017, + "step": 158220 + }, + { + "epoch": 1.0148380703774136, + "grad_norm": 0.10333353281021118, + "learning_rate": 5.740440756054226e-06, + "loss": 0.0012, + "step": 158230 + }, + { + "epoch": 1.0149022072711997, + "grad_norm": 0.0401446633040905, + "learning_rate": 5.739887219238574e-06, + "loss": 0.0018, + "step": 158240 + }, + { + "epoch": 1.0149663441649857, + "grad_norm": 0.2060912698507309, + "learning_rate": 5.739333673151606e-06, + "loss": 0.0015, + "step": 158250 + }, + { + "epoch": 1.0150304810587718, + "grad_norm": 0.07493453472852707, + "learning_rate": 5.738780117800259e-06, + "loss": 0.0016, + "step": 158260 + }, + { + "epoch": 1.015094617952558, + "grad_norm": 0.43839237093925476, + "learning_rate": 5.738226553191468e-06, + "loss": 0.0015, + "step": 158270 + }, + { + "epoch": 1.015158754846344, + "grad_norm": 0.09643466025590897, + "learning_rate": 5.737672979332173e-06, + "loss": 0.0025, + "step": 158280 + }, + { + "epoch": 1.01522289174013, + "grad_norm": 0.13256953656673431, + "learning_rate": 5.737119396229307e-06, + "loss": 0.0024, + "step": 158290 + }, + { + "epoch": 1.0152870286339162, + "grad_norm": 0.24836604297161102, + "learning_rate": 5.736565803889809e-06, + "loss": 0.002, + "step": 158300 + }, + { + "epoch": 1.0153511655277023, + "grad_norm": 0.027909912168979645, + "learning_rate": 5.736012202320614e-06, + "loss": 0.002, + "step": 158310 + }, + { + "epoch": 1.0154153024214885, + "grad_norm": 0.1804720014333725, + "learning_rate": 5.735458591528661e-06, + "loss": 0.0025, + "step": 158320 + }, + { + "epoch": 1.0154794393152746, + "grad_norm": 0.08508358895778656, + "learning_rate": 5.734904971520885e-06, + "loss": 0.0007, + "step": 158330 + }, + { + "epoch": 1.0155435762090606, + "grad_norm": 0.2949533462524414, + "learning_rate": 5.734351342304227e-06, + "loss": 0.0041, + "step": 158340 + }, + { + "epoch": 1.0156077131028467, + "grad_norm": 0.22988532483577728, + "learning_rate": 5.7337977038856186e-06, + "loss": 0.001, + "step": 158350 + }, + { + "epoch": 1.0156718499966328, + "grad_norm": 0.015086526051163673, + "learning_rate": 5.733244056272003e-06, + "loss": 0.0028, + "step": 158360 + }, + { + "epoch": 1.015735986890419, + "grad_norm": 0.1951785832643509, + "learning_rate": 5.732690399470313e-06, + "loss": 0.0026, + "step": 158370 + }, + { + "epoch": 1.015800123784205, + "grad_norm": 0.07197275757789612, + "learning_rate": 5.73213673348749e-06, + "loss": 0.0015, + "step": 158380 + }, + { + "epoch": 1.015864260677991, + "grad_norm": 0.30711835622787476, + "learning_rate": 5.7315830583304695e-06, + "loss": 0.002, + "step": 158390 + }, + { + "epoch": 1.0159283975717772, + "grad_norm": 0.053068507462739944, + "learning_rate": 5.731029374006192e-06, + "loss": 0.0014, + "step": 158400 + }, + { + "epoch": 1.0159925344655634, + "grad_norm": 0.05254053696990013, + "learning_rate": 5.730475680521593e-06, + "loss": 0.0023, + "step": 158410 + }, + { + "epoch": 1.0160566713593493, + "grad_norm": 0.05597158893942833, + "learning_rate": 5.7299219778836125e-06, + "loss": 0.0016, + "step": 158420 + }, + { + "epoch": 1.0161208082531354, + "grad_norm": 0.14286933839321136, + "learning_rate": 5.729368266099186e-06, + "loss": 0.0028, + "step": 158430 + }, + { + "epoch": 1.0161849451469216, + "grad_norm": 0.04776264727115631, + "learning_rate": 5.728814545175256e-06, + "loss": 0.0015, + "step": 158440 + }, + { + "epoch": 1.0162490820407077, + "grad_norm": 0.07032934576272964, + "learning_rate": 5.728260815118759e-06, + "loss": 0.0022, + "step": 158450 + }, + { + "epoch": 1.016313218934494, + "grad_norm": 0.1985510289669037, + "learning_rate": 5.727707075936632e-06, + "loss": 0.0021, + "step": 158460 + }, + { + "epoch": 1.0163773558282798, + "grad_norm": 0.05037515610456467, + "learning_rate": 5.727153327635816e-06, + "loss": 0.0025, + "step": 158470 + }, + { + "epoch": 1.016441492722066, + "grad_norm": 0.05321979522705078, + "learning_rate": 5.726599570223249e-06, + "loss": 0.0014, + "step": 158480 + }, + { + "epoch": 1.0165056296158521, + "grad_norm": 0.23675918579101562, + "learning_rate": 5.726045803705871e-06, + "loss": 0.0021, + "step": 158490 + }, + { + "epoch": 1.0165697665096383, + "grad_norm": 0.0526350662112236, + "learning_rate": 5.725492028090619e-06, + "loss": 0.0023, + "step": 158500 + }, + { + "epoch": 1.0166339034034242, + "grad_norm": 0.0651823952794075, + "learning_rate": 5.7249382433844335e-06, + "loss": 0.0051, + "step": 158510 + }, + { + "epoch": 1.0166980402972103, + "grad_norm": 0.08328308165073395, + "learning_rate": 5.724384449594253e-06, + "loss": 0.0012, + "step": 158520 + }, + { + "epoch": 1.0167621771909965, + "grad_norm": 0.1616986244916916, + "learning_rate": 5.72383064672702e-06, + "loss": 0.0014, + "step": 158530 + }, + { + "epoch": 1.0168263140847826, + "grad_norm": 0.11192335188388824, + "learning_rate": 5.72327683478967e-06, + "loss": 0.001, + "step": 158540 + }, + { + "epoch": 1.0168904509785686, + "grad_norm": 0.08834236860275269, + "learning_rate": 5.722723013789144e-06, + "loss": 0.0017, + "step": 158550 + }, + { + "epoch": 1.0169545878723547, + "grad_norm": 0.019593428820371628, + "learning_rate": 5.722169183732383e-06, + "loss": 0.0013, + "step": 158560 + }, + { + "epoch": 1.0170187247661409, + "grad_norm": 0.3054475784301758, + "learning_rate": 5.721615344626327e-06, + "loss": 0.0017, + "step": 158570 + }, + { + "epoch": 1.017082861659927, + "grad_norm": 0.07718338072299957, + "learning_rate": 5.721061496477913e-06, + "loss": 0.0025, + "step": 158580 + }, + { + "epoch": 1.017146998553713, + "grad_norm": 0.15053144097328186, + "learning_rate": 5.720507639294084e-06, + "loss": 0.0016, + "step": 158590 + }, + { + "epoch": 1.017211135447499, + "grad_norm": 0.1066005602478981, + "learning_rate": 5.719953773081779e-06, + "loss": 0.0024, + "step": 158600 + }, + { + "epoch": 1.0172752723412852, + "grad_norm": 0.15089969336986542, + "learning_rate": 5.719399897847938e-06, + "loss": 0.0026, + "step": 158610 + }, + { + "epoch": 1.0173394092350714, + "grad_norm": 0.09610164165496826, + "learning_rate": 5.718846013599504e-06, + "loss": 0.0015, + "step": 158620 + }, + { + "epoch": 1.0174035461288575, + "grad_norm": 0.16944530606269836, + "learning_rate": 5.718292120343414e-06, + "loss": 0.0017, + "step": 158630 + }, + { + "epoch": 1.0174676830226435, + "grad_norm": 0.08744259178638458, + "learning_rate": 5.7177382180866115e-06, + "loss": 0.001, + "step": 158640 + }, + { + "epoch": 1.0175318199164296, + "grad_norm": 0.22927281260490417, + "learning_rate": 5.717184306836036e-06, + "loss": 0.003, + "step": 158650 + }, + { + "epoch": 1.0175959568102158, + "grad_norm": 0.04453054070472717, + "learning_rate": 5.716630386598628e-06, + "loss": 0.0013, + "step": 158660 + }, + { + "epoch": 1.017660093704002, + "grad_norm": 0.09694628417491913, + "learning_rate": 5.716076457381329e-06, + "loss": 0.0025, + "step": 158670 + }, + { + "epoch": 1.0177242305977878, + "grad_norm": 0.05184922739863396, + "learning_rate": 5.715522519191081e-06, + "loss": 0.0019, + "step": 158680 + }, + { + "epoch": 1.017788367491574, + "grad_norm": 0.026720192283391953, + "learning_rate": 5.7149685720348235e-06, + "loss": 0.0017, + "step": 158690 + }, + { + "epoch": 1.0178525043853601, + "grad_norm": 0.08675410598516464, + "learning_rate": 5.7144146159195e-06, + "loss": 0.0077, + "step": 158700 + }, + { + "epoch": 1.0179166412791463, + "grad_norm": 0.15461985766887665, + "learning_rate": 5.71386065085205e-06, + "loss": 0.0019, + "step": 158710 + }, + { + "epoch": 1.0179807781729322, + "grad_norm": 0.01148910541087389, + "learning_rate": 5.7133066768394165e-06, + "loss": 0.0022, + "step": 158720 + }, + { + "epoch": 1.0180449150667183, + "grad_norm": 0.07814265042543411, + "learning_rate": 5.712752693888539e-06, + "loss": 0.0022, + "step": 158730 + }, + { + "epoch": 1.0181090519605045, + "grad_norm": 0.1866464614868164, + "learning_rate": 5.712198702006363e-06, + "loss": 0.0029, + "step": 158740 + }, + { + "epoch": 1.0181731888542906, + "grad_norm": 0.12418685853481293, + "learning_rate": 5.711644701199827e-06, + "loss": 0.0022, + "step": 158750 + }, + { + "epoch": 1.0182373257480768, + "grad_norm": 0.047584936022758484, + "learning_rate": 5.711090691475874e-06, + "loss": 0.0016, + "step": 158760 + }, + { + "epoch": 1.0183014626418627, + "grad_norm": 0.12022664397954941, + "learning_rate": 5.710536672841446e-06, + "loss": 0.002, + "step": 158770 + }, + { + "epoch": 1.0183655995356489, + "grad_norm": 0.1303713619709015, + "learning_rate": 5.709982645303487e-06, + "loss": 0.0028, + "step": 158780 + }, + { + "epoch": 1.018429736429435, + "grad_norm": 0.10671041160821915, + "learning_rate": 5.709428608868937e-06, + "loss": 0.0012, + "step": 158790 + }, + { + "epoch": 1.0184938733232212, + "grad_norm": 0.2769628167152405, + "learning_rate": 5.70887456354474e-06, + "loss": 0.0013, + "step": 158800 + }, + { + "epoch": 1.018558010217007, + "grad_norm": 0.04816930368542671, + "learning_rate": 5.708320509337839e-06, + "loss": 0.0022, + "step": 158810 + }, + { + "epoch": 1.0186221471107932, + "grad_norm": 0.009474515914916992, + "learning_rate": 5.707766446255174e-06, + "loss": 0.0016, + "step": 158820 + }, + { + "epoch": 1.0186862840045794, + "grad_norm": 0.061292871832847595, + "learning_rate": 5.707212374303691e-06, + "loss": 0.0023, + "step": 158830 + }, + { + "epoch": 1.0187504208983655, + "grad_norm": 0.06681514531373978, + "learning_rate": 5.706658293490331e-06, + "loss": 0.0025, + "step": 158840 + }, + { + "epoch": 1.0188145577921515, + "grad_norm": 0.42780590057373047, + "learning_rate": 5.706104203822038e-06, + "loss": 0.0018, + "step": 158850 + }, + { + "epoch": 1.0188786946859376, + "grad_norm": 0.025822345167398453, + "learning_rate": 5.705550105305754e-06, + "loss": 0.0019, + "step": 158860 + }, + { + "epoch": 1.0189428315797238, + "grad_norm": 0.21344026923179626, + "learning_rate": 5.704995997948424e-06, + "loss": 0.0023, + "step": 158870 + }, + { + "epoch": 1.01900696847351, + "grad_norm": 0.09337637573480606, + "learning_rate": 5.704441881756989e-06, + "loss": 0.0018, + "step": 158880 + }, + { + "epoch": 1.019071105367296, + "grad_norm": 0.06411875039339066, + "learning_rate": 5.703887756738394e-06, + "loss": 0.0028, + "step": 158890 + }, + { + "epoch": 1.019135242261082, + "grad_norm": 0.1680552363395691, + "learning_rate": 5.703333622899583e-06, + "loss": 0.0021, + "step": 158900 + }, + { + "epoch": 1.0191993791548681, + "grad_norm": 0.07275000214576721, + "learning_rate": 5.702779480247499e-06, + "loss": 0.0019, + "step": 158910 + }, + { + "epoch": 1.0192635160486543, + "grad_norm": 0.11038286238908768, + "learning_rate": 5.702225328789085e-06, + "loss": 0.0018, + "step": 158920 + }, + { + "epoch": 1.0193276529424404, + "grad_norm": 0.14055235683918, + "learning_rate": 5.701671168531287e-06, + "loss": 0.0012, + "step": 158930 + }, + { + "epoch": 1.0193917898362264, + "grad_norm": 0.01654187962412834, + "learning_rate": 5.701116999481047e-06, + "loss": 0.0011, + "step": 158940 + }, + { + "epoch": 1.0194559267300125, + "grad_norm": 0.06248614937067032, + "learning_rate": 5.70056282164531e-06, + "loss": 0.002, + "step": 158950 + }, + { + "epoch": 1.0195200636237987, + "grad_norm": 0.1035081297159195, + "learning_rate": 5.700008635031021e-06, + "loss": 0.0021, + "step": 158960 + }, + { + "epoch": 1.0195842005175848, + "grad_norm": 0.1015050858259201, + "learning_rate": 5.699454439645121e-06, + "loss": 0.0022, + "step": 158970 + }, + { + "epoch": 1.0196483374113707, + "grad_norm": 0.2446785718202591, + "learning_rate": 5.698900235494559e-06, + "loss": 0.0017, + "step": 158980 + }, + { + "epoch": 1.0197124743051569, + "grad_norm": 0.02375601790845394, + "learning_rate": 5.698346022586276e-06, + "loss": 0.0055, + "step": 158990 + }, + { + "epoch": 1.019776611198943, + "grad_norm": 0.08211459219455719, + "learning_rate": 5.6977918009272205e-06, + "loss": 0.0012, + "step": 159000 + }, + { + "epoch": 1.0198407480927292, + "grad_norm": 0.05030378699302673, + "learning_rate": 5.6972375705243345e-06, + "loss": 0.0019, + "step": 159010 + }, + { + "epoch": 1.0199048849865153, + "grad_norm": 0.10835476964712143, + "learning_rate": 5.696683331384563e-06, + "loss": 0.0014, + "step": 159020 + }, + { + "epoch": 1.0199690218803013, + "grad_norm": 0.13494984805583954, + "learning_rate": 5.6961290835148505e-06, + "loss": 0.0031, + "step": 159030 + }, + { + "epoch": 1.0200331587740874, + "grad_norm": 0.040680792182683945, + "learning_rate": 5.6955748269221445e-06, + "loss": 0.001, + "step": 159040 + }, + { + "epoch": 1.0200972956678735, + "grad_norm": 0.07557686418294907, + "learning_rate": 5.695020561613388e-06, + "loss": 0.0017, + "step": 159050 + }, + { + "epoch": 1.0201614325616597, + "grad_norm": 0.10823880881071091, + "learning_rate": 5.694466287595528e-06, + "loss": 0.0021, + "step": 159060 + }, + { + "epoch": 1.0202255694554456, + "grad_norm": 0.03785393759608269, + "learning_rate": 5.693912004875508e-06, + "loss": 0.0032, + "step": 159070 + }, + { + "epoch": 1.0202897063492318, + "grad_norm": 0.2585473656654358, + "learning_rate": 5.693357713460276e-06, + "loss": 0.0026, + "step": 159080 + }, + { + "epoch": 1.020353843243018, + "grad_norm": 0.21977251768112183, + "learning_rate": 5.692803413356774e-06, + "loss": 0.0022, + "step": 159090 + }, + { + "epoch": 1.020417980136804, + "grad_norm": 0.0885687991976738, + "learning_rate": 5.692249104571951e-06, + "loss": 0.0009, + "step": 159100 + }, + { + "epoch": 1.02048211703059, + "grad_norm": 0.10606767237186432, + "learning_rate": 5.6916947871127535e-06, + "loss": 0.0032, + "step": 159110 + }, + { + "epoch": 1.0205462539243761, + "grad_norm": 0.19184932112693787, + "learning_rate": 5.6911404609861245e-06, + "loss": 0.0018, + "step": 159120 + }, + { + "epoch": 1.0206103908181623, + "grad_norm": 0.1128690093755722, + "learning_rate": 5.690586126199012e-06, + "loss": 0.0023, + "step": 159130 + }, + { + "epoch": 1.0206745277119484, + "grad_norm": 0.12977398931980133, + "learning_rate": 5.690031782758362e-06, + "loss": 0.0024, + "step": 159140 + }, + { + "epoch": 1.0207386646057346, + "grad_norm": 0.20466890931129456, + "learning_rate": 5.6894774306711194e-06, + "loss": 0.0018, + "step": 159150 + }, + { + "epoch": 1.0208028014995205, + "grad_norm": 0.13838985562324524, + "learning_rate": 5.688923069944232e-06, + "loss": 0.0017, + "step": 159160 + }, + { + "epoch": 1.0208669383933067, + "grad_norm": 0.07172899693250656, + "learning_rate": 5.6883687005846475e-06, + "loss": 0.0013, + "step": 159170 + }, + { + "epoch": 1.0209310752870928, + "grad_norm": 0.014331742189824581, + "learning_rate": 5.687814322599311e-06, + "loss": 0.0026, + "step": 159180 + }, + { + "epoch": 1.020995212180879, + "grad_norm": 0.34021681547164917, + "learning_rate": 5.6872599359951695e-06, + "loss": 0.0016, + "step": 159190 + }, + { + "epoch": 1.021059349074665, + "grad_norm": 0.3392062485218048, + "learning_rate": 5.686705540779169e-06, + "loss": 0.0014, + "step": 159200 + }, + { + "epoch": 1.021123485968451, + "grad_norm": 0.11961116641759872, + "learning_rate": 5.686151136958258e-06, + "loss": 0.0019, + "step": 159210 + }, + { + "epoch": 1.0211876228622372, + "grad_norm": 0.04757817089557648, + "learning_rate": 5.685596724539383e-06, + "loss": 0.0018, + "step": 159220 + }, + { + "epoch": 1.0212517597560233, + "grad_norm": 0.009403668344020844, + "learning_rate": 5.685042303529491e-06, + "loss": 0.0021, + "step": 159230 + }, + { + "epoch": 1.0213158966498093, + "grad_norm": 0.05813450366258621, + "learning_rate": 5.684487873935528e-06, + "loss": 0.0029, + "step": 159240 + }, + { + "epoch": 1.0213800335435954, + "grad_norm": 0.09060076624155045, + "learning_rate": 5.683933435764445e-06, + "loss": 0.0018, + "step": 159250 + }, + { + "epoch": 1.0214441704373816, + "grad_norm": 0.06484679132699966, + "learning_rate": 5.6833789890231846e-06, + "loss": 0.0034, + "step": 159260 + }, + { + "epoch": 1.0215083073311677, + "grad_norm": 0.024051832035183907, + "learning_rate": 5.682824533718699e-06, + "loss": 0.0029, + "step": 159270 + }, + { + "epoch": 1.0215724442249536, + "grad_norm": 0.07731301337480545, + "learning_rate": 5.682270069857934e-06, + "loss": 0.0021, + "step": 159280 + }, + { + "epoch": 1.0216365811187398, + "grad_norm": 0.08671291172504425, + "learning_rate": 5.681715597447838e-06, + "loss": 0.0012, + "step": 159290 + }, + { + "epoch": 1.021700718012526, + "grad_norm": 0.14244763553142548, + "learning_rate": 5.681161116495356e-06, + "loss": 0.0014, + "step": 159300 + }, + { + "epoch": 1.021764854906312, + "grad_norm": 0.030874190852046013, + "learning_rate": 5.6806066270074385e-06, + "loss": 0.003, + "step": 159310 + }, + { + "epoch": 1.0218289918000982, + "grad_norm": 0.041841261088848114, + "learning_rate": 5.680052128991036e-06, + "loss": 0.004, + "step": 159320 + }, + { + "epoch": 1.0218931286938842, + "grad_norm": 0.1903955489397049, + "learning_rate": 5.679497622453093e-06, + "loss": 0.0014, + "step": 159330 + }, + { + "epoch": 1.0219572655876703, + "grad_norm": 0.10998605191707611, + "learning_rate": 5.67894310740056e-06, + "loss": 0.0013, + "step": 159340 + }, + { + "epoch": 1.0220214024814565, + "grad_norm": 0.032601822167634964, + "learning_rate": 5.678388583840383e-06, + "loss": 0.0009, + "step": 159350 + }, + { + "epoch": 1.0220855393752426, + "grad_norm": 0.13070465624332428, + "learning_rate": 5.677834051779513e-06, + "loss": 0.003, + "step": 159360 + }, + { + "epoch": 1.0221496762690285, + "grad_norm": 0.07420849800109863, + "learning_rate": 5.677279511224898e-06, + "loss": 0.0021, + "step": 159370 + }, + { + "epoch": 1.0222138131628147, + "grad_norm": 0.08946508914232254, + "learning_rate": 5.676724962183487e-06, + "loss": 0.0018, + "step": 159380 + }, + { + "epoch": 1.0222779500566008, + "grad_norm": 0.012758632190525532, + "learning_rate": 5.676170404662227e-06, + "loss": 0.0016, + "step": 159390 + }, + { + "epoch": 1.022342086950387, + "grad_norm": 0.09677497297525406, + "learning_rate": 5.6756158386680705e-06, + "loss": 0.0014, + "step": 159400 + }, + { + "epoch": 1.022406223844173, + "grad_norm": 0.04973935708403587, + "learning_rate": 5.675061264207964e-06, + "loss": 0.0018, + "step": 159410 + }, + { + "epoch": 1.022470360737959, + "grad_norm": 0.21082596480846405, + "learning_rate": 5.674506681288857e-06, + "loss": 0.0017, + "step": 159420 + }, + { + "epoch": 1.0225344976317452, + "grad_norm": 0.16517893970012665, + "learning_rate": 5.6739520899177e-06, + "loss": 0.0021, + "step": 159430 + }, + { + "epoch": 1.0225986345255313, + "grad_norm": 0.04861932992935181, + "learning_rate": 5.673397490101441e-06, + "loss": 0.0026, + "step": 159440 + }, + { + "epoch": 1.0226627714193175, + "grad_norm": 0.09634580463171005, + "learning_rate": 5.6728428818470306e-06, + "loss": 0.0031, + "step": 159450 + }, + { + "epoch": 1.0227269083131034, + "grad_norm": 0.0037230979651212692, + "learning_rate": 5.6722882651614165e-06, + "loss": 0.003, + "step": 159460 + }, + { + "epoch": 1.0227910452068896, + "grad_norm": 0.053129348903894424, + "learning_rate": 5.6717336400515524e-06, + "loss": 0.0012, + "step": 159470 + }, + { + "epoch": 1.0228551821006757, + "grad_norm": 0.005225110333412886, + "learning_rate": 5.671179006524383e-06, + "loss": 0.0011, + "step": 159480 + }, + { + "epoch": 1.0229193189944619, + "grad_norm": 0.11354188621044159, + "learning_rate": 5.6706243645868645e-06, + "loss": 0.0022, + "step": 159490 + }, + { + "epoch": 1.0229834558882478, + "grad_norm": 0.0946168527007103, + "learning_rate": 5.6700697142459404e-06, + "loss": 0.0018, + "step": 159500 + }, + { + "epoch": 1.023047592782034, + "grad_norm": 0.09126311540603638, + "learning_rate": 5.669515055508565e-06, + "loss": 0.0018, + "step": 159510 + }, + { + "epoch": 1.02311172967582, + "grad_norm": 0.04860438406467438, + "learning_rate": 5.668960388381687e-06, + "loss": 0.0021, + "step": 159520 + }, + { + "epoch": 1.0231758665696062, + "grad_norm": 0.0699162483215332, + "learning_rate": 5.668405712872257e-06, + "loss": 0.0026, + "step": 159530 + }, + { + "epoch": 1.0232400034633922, + "grad_norm": 0.028246797621250153, + "learning_rate": 5.667851028987226e-06, + "loss": 0.0014, + "step": 159540 + }, + { + "epoch": 1.0233041403571783, + "grad_norm": 0.06132975593209267, + "learning_rate": 5.667296336733545e-06, + "loss": 0.0037, + "step": 159550 + }, + { + "epoch": 1.0233682772509645, + "grad_norm": 0.04671689122915268, + "learning_rate": 5.666741636118164e-06, + "loss": 0.0023, + "step": 159560 + }, + { + "epoch": 1.0234324141447506, + "grad_norm": 0.06894919276237488, + "learning_rate": 5.666186927148033e-06, + "loss": 0.0018, + "step": 159570 + }, + { + "epoch": 1.0234965510385368, + "grad_norm": 0.12566019594669342, + "learning_rate": 5.665632209830104e-06, + "loss": 0.003, + "step": 159580 + }, + { + "epoch": 1.0235606879323227, + "grad_norm": 0.20520325005054474, + "learning_rate": 5.665077484171329e-06, + "loss": 0.0027, + "step": 159590 + }, + { + "epoch": 1.0236248248261088, + "grad_norm": 0.1556227058172226, + "learning_rate": 5.664522750178656e-06, + "loss": 0.0019, + "step": 159600 + }, + { + "epoch": 1.023688961719895, + "grad_norm": 0.2768998146057129, + "learning_rate": 5.663968007859039e-06, + "loss": 0.0015, + "step": 159610 + }, + { + "epoch": 1.0237530986136811, + "grad_norm": 0.09073995053768158, + "learning_rate": 5.663413257219426e-06, + "loss": 0.0016, + "step": 159620 + }, + { + "epoch": 1.023817235507467, + "grad_norm": 0.10886190086603165, + "learning_rate": 5.662858498266774e-06, + "loss": 0.0016, + "step": 159630 + }, + { + "epoch": 1.0238813724012532, + "grad_norm": 0.04841167852282524, + "learning_rate": 5.66230373100803e-06, + "loss": 0.0019, + "step": 159640 + }, + { + "epoch": 1.0239455092950394, + "grad_norm": 0.034094445407390594, + "learning_rate": 5.661748955450147e-06, + "loss": 0.0009, + "step": 159650 + }, + { + "epoch": 1.0240096461888255, + "grad_norm": 0.05819038674235344, + "learning_rate": 5.6611941716000765e-06, + "loss": 0.0018, + "step": 159660 + }, + { + "epoch": 1.0240737830826114, + "grad_norm": 0.19978097081184387, + "learning_rate": 5.66063937946477e-06, + "loss": 0.002, + "step": 159670 + }, + { + "epoch": 1.0241379199763976, + "grad_norm": 0.12936922907829285, + "learning_rate": 5.660084579051181e-06, + "loss": 0.0017, + "step": 159680 + }, + { + "epoch": 1.0242020568701837, + "grad_norm": 0.026892701163887978, + "learning_rate": 5.659529770366259e-06, + "loss": 0.0015, + "step": 159690 + }, + { + "epoch": 1.0242661937639699, + "grad_norm": 0.04490223154425621, + "learning_rate": 5.658974953416959e-06, + "loss": 0.0019, + "step": 159700 + }, + { + "epoch": 1.0243303306577558, + "grad_norm": 0.06686296314001083, + "learning_rate": 5.658420128210231e-06, + "loss": 0.0028, + "step": 159710 + }, + { + "epoch": 1.024394467551542, + "grad_norm": 0.004353153053671122, + "learning_rate": 5.6578652947530286e-06, + "loss": 0.0012, + "step": 159720 + }, + { + "epoch": 1.024458604445328, + "grad_norm": 0.06313478946685791, + "learning_rate": 5.657310453052304e-06, + "loss": 0.006, + "step": 159730 + }, + { + "epoch": 1.0245227413391143, + "grad_norm": 0.08250018954277039, + "learning_rate": 5.65675560311501e-06, + "loss": 0.0015, + "step": 159740 + }, + { + "epoch": 1.0245868782329004, + "grad_norm": 0.12308786809444427, + "learning_rate": 5.656200744948098e-06, + "loss": 0.0015, + "step": 159750 + }, + { + "epoch": 1.0246510151266863, + "grad_norm": 0.9503151774406433, + "learning_rate": 5.655645878558522e-06, + "loss": 0.002, + "step": 159760 + }, + { + "epoch": 1.0247151520204725, + "grad_norm": 0.03679700568318367, + "learning_rate": 5.655091003953235e-06, + "loss": 0.0016, + "step": 159770 + }, + { + "epoch": 1.0247792889142586, + "grad_norm": 0.1574845314025879, + "learning_rate": 5.65453612113919e-06, + "loss": 0.0048, + "step": 159780 + }, + { + "epoch": 1.0248434258080448, + "grad_norm": 0.09560144692659378, + "learning_rate": 5.653981230123338e-06, + "loss": 0.0017, + "step": 159790 + }, + { + "epoch": 1.0249075627018307, + "grad_norm": 0.02885236032307148, + "learning_rate": 5.653426330912636e-06, + "loss": 0.0027, + "step": 159800 + }, + { + "epoch": 1.0249716995956168, + "grad_norm": 0.168492391705513, + "learning_rate": 5.652871423514033e-06, + "loss": 0.003, + "step": 159810 + }, + { + "epoch": 1.025035836489403, + "grad_norm": 0.033362600952386856, + "learning_rate": 5.652316507934485e-06, + "loss": 0.0011, + "step": 159820 + }, + { + "epoch": 1.0250999733831891, + "grad_norm": 0.08808814734220505, + "learning_rate": 5.651761584180945e-06, + "loss": 0.0019, + "step": 159830 + }, + { + "epoch": 1.025164110276975, + "grad_norm": 0.045800428837537766, + "learning_rate": 5.651206652260366e-06, + "loss": 0.0016, + "step": 159840 + }, + { + "epoch": 1.0252282471707612, + "grad_norm": 0.1911831498146057, + "learning_rate": 5.650651712179703e-06, + "loss": 0.0023, + "step": 159850 + }, + { + "epoch": 1.0252923840645474, + "grad_norm": 0.14668892323970795, + "learning_rate": 5.650096763945909e-06, + "loss": 0.0028, + "step": 159860 + }, + { + "epoch": 1.0253565209583335, + "grad_norm": 0.13308972120285034, + "learning_rate": 5.6495418075659396e-06, + "loss": 0.0021, + "step": 159870 + }, + { + "epoch": 1.0254206578521197, + "grad_norm": 0.0018908643396571279, + "learning_rate": 5.648986843046745e-06, + "loss": 0.0029, + "step": 159880 + }, + { + "epoch": 1.0254847947459056, + "grad_norm": 0.03336883708834648, + "learning_rate": 5.648431870395281e-06, + "loss": 0.0027, + "step": 159890 + }, + { + "epoch": 1.0255489316396917, + "grad_norm": 0.006976774428039789, + "learning_rate": 5.6478768896185035e-06, + "loss": 0.0012, + "step": 159900 + }, + { + "epoch": 1.025613068533478, + "grad_norm": 0.005612578243017197, + "learning_rate": 5.647321900723364e-06, + "loss": 0.0012, + "step": 159910 + }, + { + "epoch": 1.025677205427264, + "grad_norm": 0.15938295423984528, + "learning_rate": 5.646766903716819e-06, + "loss": 0.0027, + "step": 159920 + }, + { + "epoch": 1.02574134232105, + "grad_norm": 0.10505570471286774, + "learning_rate": 5.646211898605823e-06, + "loss": 0.002, + "step": 159930 + }, + { + "epoch": 1.0258054792148361, + "grad_norm": 0.011524339206516743, + "learning_rate": 5.645656885397331e-06, + "loss": 0.0014, + "step": 159940 + }, + { + "epoch": 1.0258696161086223, + "grad_norm": 0.23351573944091797, + "learning_rate": 5.645101864098296e-06, + "loss": 0.0024, + "step": 159950 + }, + { + "epoch": 1.0259337530024084, + "grad_norm": 0.21484927833080292, + "learning_rate": 5.644546834715673e-06, + "loss": 0.0034, + "step": 159960 + }, + { + "epoch": 1.0259978898961943, + "grad_norm": 0.031179411336779594, + "learning_rate": 5.643991797256418e-06, + "loss": 0.0012, + "step": 159970 + }, + { + "epoch": 1.0260620267899805, + "grad_norm": 0.1138547882437706, + "learning_rate": 5.643436751727485e-06, + "loss": 0.0024, + "step": 159980 + }, + { + "epoch": 1.0261261636837666, + "grad_norm": 0.10956770181655884, + "learning_rate": 5.642881698135831e-06, + "loss": 0.0011, + "step": 159990 + }, + { + "epoch": 1.0261903005775528, + "grad_norm": 0.06331674754619598, + "learning_rate": 5.642326636488409e-06, + "loss": 0.0024, + "step": 160000 + }, + { + "epoch": 1.026254437471339, + "grad_norm": 0.13225817680358887, + "learning_rate": 5.641771566792176e-06, + "loss": 0.0014, + "step": 160010 + }, + { + "epoch": 1.0263185743651249, + "grad_norm": 0.18962177634239197, + "learning_rate": 5.641216489054084e-06, + "loss": 0.0023, + "step": 160020 + }, + { + "epoch": 1.026382711258911, + "grad_norm": 0.20619508624076843, + "learning_rate": 5.640661403281094e-06, + "loss": 0.002, + "step": 160030 + }, + { + "epoch": 1.0264468481526972, + "grad_norm": 0.17232321202754974, + "learning_rate": 5.640106309480158e-06, + "loss": 0.0021, + "step": 160040 + }, + { + "epoch": 1.0265109850464833, + "grad_norm": 0.07825799286365509, + "learning_rate": 5.639551207658232e-06, + "loss": 0.0028, + "step": 160050 + }, + { + "epoch": 1.0265751219402692, + "grad_norm": 0.09236768633127213, + "learning_rate": 5.638996097822271e-06, + "loss": 0.0021, + "step": 160060 + }, + { + "epoch": 1.0266392588340554, + "grad_norm": 0.03546392545104027, + "learning_rate": 5.638440979979234e-06, + "loss": 0.001, + "step": 160070 + }, + { + "epoch": 1.0267033957278415, + "grad_norm": 0.22646042704582214, + "learning_rate": 5.637885854136076e-06, + "loss": 0.0014, + "step": 160080 + }, + { + "epoch": 1.0267675326216277, + "grad_norm": 0.0024000273551791906, + "learning_rate": 5.6373307202997496e-06, + "loss": 0.0012, + "step": 160090 + }, + { + "epoch": 1.0268316695154136, + "grad_norm": 0.025319969281554222, + "learning_rate": 5.636775578477216e-06, + "loss": 0.0014, + "step": 160100 + }, + { + "epoch": 1.0268958064091998, + "grad_norm": 0.09891081601381302, + "learning_rate": 5.636220428675429e-06, + "loss": 0.0013, + "step": 160110 + }, + { + "epoch": 1.026959943302986, + "grad_norm": 0.09997166693210602, + "learning_rate": 5.635665270901345e-06, + "loss": 0.0015, + "step": 160120 + }, + { + "epoch": 1.027024080196772, + "grad_norm": 0.047390688210725784, + "learning_rate": 5.635110105161921e-06, + "loss": 0.0019, + "step": 160130 + }, + { + "epoch": 1.027088217090558, + "grad_norm": 0.051536865532398224, + "learning_rate": 5.634554931464113e-06, + "loss": 0.0029, + "step": 160140 + }, + { + "epoch": 1.0271523539843441, + "grad_norm": 0.13702762126922607, + "learning_rate": 5.633999749814879e-06, + "loss": 0.0014, + "step": 160150 + }, + { + "epoch": 1.0272164908781303, + "grad_norm": 0.04055806249380112, + "learning_rate": 5.633444560221174e-06, + "loss": 0.0011, + "step": 160160 + }, + { + "epoch": 1.0272806277719164, + "grad_norm": 0.06640499830245972, + "learning_rate": 5.6328893626899575e-06, + "loss": 0.0013, + "step": 160170 + }, + { + "epoch": 1.0273447646657026, + "grad_norm": 0.11427941173315048, + "learning_rate": 5.6323341572281835e-06, + "loss": 0.0013, + "step": 160180 + }, + { + "epoch": 1.0274089015594885, + "grad_norm": 0.14953333139419556, + "learning_rate": 5.631778943842812e-06, + "loss": 0.0011, + "step": 160190 + }, + { + "epoch": 1.0274730384532746, + "grad_norm": 0.02691211923956871, + "learning_rate": 5.6312237225407965e-06, + "loss": 0.0006, + "step": 160200 + }, + { + "epoch": 1.0275371753470608, + "grad_norm": 0.08211901783943176, + "learning_rate": 5.630668493329099e-06, + "loss": 0.0019, + "step": 160210 + }, + { + "epoch": 1.027601312240847, + "grad_norm": 0.08376616984605789, + "learning_rate": 5.630113256214673e-06, + "loss": 0.0029, + "step": 160220 + }, + { + "epoch": 1.0276654491346329, + "grad_norm": 0.07124131172895432, + "learning_rate": 5.629558011204479e-06, + "loss": 0.0015, + "step": 160230 + }, + { + "epoch": 1.027729586028419, + "grad_norm": 0.014714283868670464, + "learning_rate": 5.6290027583054704e-06, + "loss": 0.0021, + "step": 160240 + }, + { + "epoch": 1.0277937229222052, + "grad_norm": 0.07588217407464981, + "learning_rate": 5.628447497524611e-06, + "loss": 0.0016, + "step": 160250 + }, + { + "epoch": 1.0278578598159913, + "grad_norm": 0.18099211156368256, + "learning_rate": 5.6278922288688535e-06, + "loss": 0.0017, + "step": 160260 + }, + { + "epoch": 1.0279219967097772, + "grad_norm": 0.10302896797657013, + "learning_rate": 5.627336952345158e-06, + "loss": 0.0055, + "step": 160270 + }, + { + "epoch": 1.0279861336035634, + "grad_norm": 0.06174756959080696, + "learning_rate": 5.626781667960483e-06, + "loss": 0.0016, + "step": 160280 + }, + { + "epoch": 1.0280502704973495, + "grad_norm": 0.121791310608387, + "learning_rate": 5.626226375721784e-06, + "loss": 0.0024, + "step": 160290 + }, + { + "epoch": 1.0281144073911357, + "grad_norm": 0.19790798425674438, + "learning_rate": 5.625671075636021e-06, + "loss": 0.0017, + "step": 160300 + }, + { + "epoch": 1.0281785442849218, + "grad_norm": 0.12741999328136444, + "learning_rate": 5.625115767710154e-06, + "loss": 0.0023, + "step": 160310 + }, + { + "epoch": 1.0282426811787078, + "grad_norm": 0.11417978256940842, + "learning_rate": 5.624560451951138e-06, + "loss": 0.0072, + "step": 160320 + }, + { + "epoch": 1.028306818072494, + "grad_norm": 0.15278507769107819, + "learning_rate": 5.624005128365935e-06, + "loss": 0.0015, + "step": 160330 + }, + { + "epoch": 1.02837095496628, + "grad_norm": 0.06196146830916405, + "learning_rate": 5.6234497969615e-06, + "loss": 0.003, + "step": 160340 + }, + { + "epoch": 1.0284350918600662, + "grad_norm": 0.07730269432067871, + "learning_rate": 5.622894457744794e-06, + "loss": 0.0009, + "step": 160350 + }, + { + "epoch": 1.0284992287538521, + "grad_norm": 0.16394564509391785, + "learning_rate": 5.622339110722775e-06, + "loss": 0.002, + "step": 160360 + }, + { + "epoch": 1.0285633656476383, + "grad_norm": 0.04401955008506775, + "learning_rate": 5.621783755902402e-06, + "loss": 0.0009, + "step": 160370 + }, + { + "epoch": 1.0286275025414244, + "grad_norm": 0.057977233082056046, + "learning_rate": 5.621228393290636e-06, + "loss": 0.0018, + "step": 160380 + }, + { + "epoch": 1.0286916394352106, + "grad_norm": 0.05147771164774895, + "learning_rate": 5.620673022894433e-06, + "loss": 0.0022, + "step": 160390 + }, + { + "epoch": 1.0287557763289965, + "grad_norm": 0.02774648182094097, + "learning_rate": 5.620117644720754e-06, + "loss": 0.0014, + "step": 160400 + }, + { + "epoch": 1.0288199132227827, + "grad_norm": 0.1690646857023239, + "learning_rate": 5.619562258776556e-06, + "loss": 0.002, + "step": 160410 + }, + { + "epoch": 1.0288840501165688, + "grad_norm": 0.09440110623836517, + "learning_rate": 5.6190068650688025e-06, + "loss": 0.0019, + "step": 160420 + }, + { + "epoch": 1.028948187010355, + "grad_norm": 0.08699806034564972, + "learning_rate": 5.618451463604448e-06, + "loss": 0.0015, + "step": 160430 + }, + { + "epoch": 1.029012323904141, + "grad_norm": 0.07111769914627075, + "learning_rate": 5.617896054390457e-06, + "loss": 0.0019, + "step": 160440 + }, + { + "epoch": 1.029076460797927, + "grad_norm": 0.17294718325138092, + "learning_rate": 5.617340637433785e-06, + "loss": 0.0021, + "step": 160450 + }, + { + "epoch": 1.0291405976917132, + "grad_norm": 0.04871895909309387, + "learning_rate": 5.6167852127413955e-06, + "loss": 0.0015, + "step": 160460 + }, + { + "epoch": 1.0292047345854993, + "grad_norm": 0.05924814194440842, + "learning_rate": 5.616229780320246e-06, + "loss": 0.0016, + "step": 160470 + }, + { + "epoch": 1.0292688714792855, + "grad_norm": 0.10790330916643143, + "learning_rate": 5.615674340177296e-06, + "loss": 0.0008, + "step": 160480 + }, + { + "epoch": 1.0293330083730714, + "grad_norm": 0.20627789199352264, + "learning_rate": 5.615118892319507e-06, + "loss": 0.0032, + "step": 160490 + }, + { + "epoch": 1.0293971452668575, + "grad_norm": 0.08072004467248917, + "learning_rate": 5.614563436753839e-06, + "loss": 0.0014, + "step": 160500 + }, + { + "epoch": 1.0294612821606437, + "grad_norm": 0.10128284990787506, + "learning_rate": 5.6140079734872514e-06, + "loss": 0.0041, + "step": 160510 + }, + { + "epoch": 1.0295254190544298, + "grad_norm": 0.07587765157222748, + "learning_rate": 5.613452502526704e-06, + "loss": 0.001, + "step": 160520 + }, + { + "epoch": 1.0295895559482158, + "grad_norm": 0.06863145530223846, + "learning_rate": 5.61289702387916e-06, + "loss": 0.0013, + "step": 160530 + }, + { + "epoch": 1.029653692842002, + "grad_norm": 0.17337927222251892, + "learning_rate": 5.6123415375515775e-06, + "loss": 0.0038, + "step": 160540 + }, + { + "epoch": 1.029717829735788, + "grad_norm": 0.01374752726405859, + "learning_rate": 5.611786043550918e-06, + "loss": 0.0012, + "step": 160550 + }, + { + "epoch": 1.0297819666295742, + "grad_norm": 0.06106782332062721, + "learning_rate": 5.611230541884143e-06, + "loss": 0.0017, + "step": 160560 + }, + { + "epoch": 1.0298461035233604, + "grad_norm": 0.06003543734550476, + "learning_rate": 5.610675032558211e-06, + "loss": 0.0013, + "step": 160570 + }, + { + "epoch": 1.0299102404171463, + "grad_norm": 0.07925518602132797, + "learning_rate": 5.610119515580086e-06, + "loss": 0.0023, + "step": 160580 + }, + { + "epoch": 1.0299743773109324, + "grad_norm": 0.1774548888206482, + "learning_rate": 5.609563990956727e-06, + "loss": 0.0013, + "step": 160590 + }, + { + "epoch": 1.0300385142047186, + "grad_norm": 0.19831368327140808, + "learning_rate": 5.609008458695095e-06, + "loss": 0.003, + "step": 160600 + }, + { + "epoch": 1.0301026510985047, + "grad_norm": 0.029156174510717392, + "learning_rate": 5.608452918802152e-06, + "loss": 0.0017, + "step": 160610 + }, + { + "epoch": 1.0301667879922907, + "grad_norm": 0.06537648290395737, + "learning_rate": 5.607897371284858e-06, + "loss": 0.0014, + "step": 160620 + }, + { + "epoch": 1.0302309248860768, + "grad_norm": 0.1173158586025238, + "learning_rate": 5.607341816150177e-06, + "loss": 0.0024, + "step": 160630 + }, + { + "epoch": 1.030295061779863, + "grad_norm": 0.0031485699582844973, + "learning_rate": 5.6067862534050675e-06, + "loss": 0.0022, + "step": 160640 + }, + { + "epoch": 1.0303591986736491, + "grad_norm": 0.07841501384973526, + "learning_rate": 5.606230683056495e-06, + "loss": 0.0012, + "step": 160650 + }, + { + "epoch": 1.030423335567435, + "grad_norm": 0.09447195380926132, + "learning_rate": 5.605675105111416e-06, + "loss": 0.002, + "step": 160660 + }, + { + "epoch": 1.0304874724612212, + "grad_norm": 0.3483569324016571, + "learning_rate": 5.605119519576796e-06, + "loss": 0.0019, + "step": 160670 + }, + { + "epoch": 1.0305516093550073, + "grad_norm": 0.0792723298072815, + "learning_rate": 5.604563926459596e-06, + "loss": 0.0029, + "step": 160680 + }, + { + "epoch": 1.0306157462487935, + "grad_norm": 0.0928591936826706, + "learning_rate": 5.604008325766779e-06, + "loss": 0.0023, + "step": 160690 + }, + { + "epoch": 1.0306798831425796, + "grad_norm": 0.05987996980547905, + "learning_rate": 5.603452717505304e-06, + "loss": 0.0016, + "step": 160700 + }, + { + "epoch": 1.0307440200363656, + "grad_norm": 0.1154266893863678, + "learning_rate": 5.602897101682137e-06, + "loss": 0.0014, + "step": 160710 + }, + { + "epoch": 1.0308081569301517, + "grad_norm": 0.09690660983324051, + "learning_rate": 5.602341478304238e-06, + "loss": 0.0022, + "step": 160720 + }, + { + "epoch": 1.0308722938239379, + "grad_norm": 0.12438417971134186, + "learning_rate": 5.601785847378569e-06, + "loss": 0.0021, + "step": 160730 + }, + { + "epoch": 1.030936430717724, + "grad_norm": 0.03502361848950386, + "learning_rate": 5.601230208912094e-06, + "loss": 0.0024, + "step": 160740 + }, + { + "epoch": 1.03100056761151, + "grad_norm": 0.05004703998565674, + "learning_rate": 5.6006745629117745e-06, + "loss": 0.0015, + "step": 160750 + }, + { + "epoch": 1.031064704505296, + "grad_norm": 0.05685212463140488, + "learning_rate": 5.600118909384574e-06, + "loss": 0.0022, + "step": 160760 + }, + { + "epoch": 1.0311288413990822, + "grad_norm": 0.016093585640192032, + "learning_rate": 5.599563248337454e-06, + "loss": 0.0009, + "step": 160770 + }, + { + "epoch": 1.0311929782928684, + "grad_norm": 0.2270883470773697, + "learning_rate": 5.599007579777378e-06, + "loss": 0.0035, + "step": 160780 + }, + { + "epoch": 1.0312571151866543, + "grad_norm": 0.1231345608830452, + "learning_rate": 5.598451903711309e-06, + "loss": 0.0033, + "step": 160790 + }, + { + "epoch": 1.0313212520804405, + "grad_norm": 0.11265743523836136, + "learning_rate": 5.59789622014621e-06, + "loss": 0.0029, + "step": 160800 + }, + { + "epoch": 1.0313853889742266, + "grad_norm": 0.1232166662812233, + "learning_rate": 5.597340529089045e-06, + "loss": 0.0025, + "step": 160810 + }, + { + "epoch": 1.0314495258680128, + "grad_norm": 0.09138888865709305, + "learning_rate": 5.596784830546776e-06, + "loss": 0.0029, + "step": 160820 + }, + { + "epoch": 1.0315136627617987, + "grad_norm": 0.0028286930173635483, + "learning_rate": 5.596229124526364e-06, + "loss": 0.0018, + "step": 160830 + }, + { + "epoch": 1.0315777996555848, + "grad_norm": 0.14618414640426636, + "learning_rate": 5.595673411034779e-06, + "loss": 0.0014, + "step": 160840 + }, + { + "epoch": 1.031641936549371, + "grad_norm": 0.06368950754404068, + "learning_rate": 5.595117690078977e-06, + "loss": 0.0022, + "step": 160850 + }, + { + "epoch": 1.0317060734431571, + "grad_norm": 0.014613325707614422, + "learning_rate": 5.594561961665927e-06, + "loss": 0.0018, + "step": 160860 + }, + { + "epoch": 1.0317702103369433, + "grad_norm": 0.047521304339170456, + "learning_rate": 5.594006225802589e-06, + "loss": 0.0019, + "step": 160870 + }, + { + "epoch": 1.0318343472307292, + "grad_norm": 0.0785314068198204, + "learning_rate": 5.59345048249593e-06, + "loss": 0.0018, + "step": 160880 + }, + { + "epoch": 1.0318984841245153, + "grad_norm": 0.0063042123802006245, + "learning_rate": 5.592894731752912e-06, + "loss": 0.0022, + "step": 160890 + }, + { + "epoch": 1.0319626210183015, + "grad_norm": 0.029178213328123093, + "learning_rate": 5.592338973580499e-06, + "loss": 0.0015, + "step": 160900 + }, + { + "epoch": 1.0320267579120876, + "grad_norm": 0.13169245421886444, + "learning_rate": 5.591783207985656e-06, + "loss": 0.0021, + "step": 160910 + }, + { + "epoch": 1.0320908948058736, + "grad_norm": 0.1766376793384552, + "learning_rate": 5.591227434975345e-06, + "loss": 0.0015, + "step": 160920 + }, + { + "epoch": 1.0321550316996597, + "grad_norm": 0.03069526143372059, + "learning_rate": 5.590671654556533e-06, + "loss": 0.0014, + "step": 160930 + }, + { + "epoch": 1.0322191685934459, + "grad_norm": 0.16416823863983154, + "learning_rate": 5.590115866736182e-06, + "loss": 0.0019, + "step": 160940 + }, + { + "epoch": 1.032283305487232, + "grad_norm": 0.07134131342172623, + "learning_rate": 5.589560071521259e-06, + "loss": 0.0015, + "step": 160950 + }, + { + "epoch": 1.032347442381018, + "grad_norm": 0.06377499550580978, + "learning_rate": 5.589004268918725e-06, + "loss": 0.0026, + "step": 160960 + }, + { + "epoch": 1.032411579274804, + "grad_norm": 0.012781238183379173, + "learning_rate": 5.588448458935548e-06, + "loss": 0.0017, + "step": 160970 + }, + { + "epoch": 1.0324757161685902, + "grad_norm": 0.021176699548959732, + "learning_rate": 5.5878926415786906e-06, + "loss": 0.0018, + "step": 160980 + }, + { + "epoch": 1.0325398530623764, + "grad_norm": 0.023555800318717957, + "learning_rate": 5.587336816855119e-06, + "loss": 0.0018, + "step": 160990 + }, + { + "epoch": 1.0326039899561625, + "grad_norm": 0.06162658706307411, + "learning_rate": 5.586780984771797e-06, + "loss": 0.0014, + "step": 161000 + }, + { + "epoch": 1.0326681268499485, + "grad_norm": 0.13439489901065826, + "learning_rate": 5.5862251453356885e-06, + "loss": 0.0025, + "step": 161010 + }, + { + "epoch": 1.0327322637437346, + "grad_norm": 0.12414365261793137, + "learning_rate": 5.585669298553762e-06, + "loss": 0.0012, + "step": 161020 + }, + { + "epoch": 1.0327964006375208, + "grad_norm": 0.04816731438040733, + "learning_rate": 5.585113444432979e-06, + "loss": 0.0026, + "step": 161030 + }, + { + "epoch": 1.032860537531307, + "grad_norm": 0.10992000252008438, + "learning_rate": 5.584557582980308e-06, + "loss": 0.0006, + "step": 161040 + }, + { + "epoch": 1.0329246744250928, + "grad_norm": 0.45915332436561584, + "learning_rate": 5.584001714202711e-06, + "loss": 0.0011, + "step": 161050 + }, + { + "epoch": 1.032988811318879, + "grad_norm": 0.036483284085989, + "learning_rate": 5.583445838107156e-06, + "loss": 0.0023, + "step": 161060 + }, + { + "epoch": 1.0330529482126651, + "grad_norm": 0.07692298293113708, + "learning_rate": 5.5828899547006065e-06, + "loss": 0.0015, + "step": 161070 + }, + { + "epoch": 1.0331170851064513, + "grad_norm": 0.10438433289527893, + "learning_rate": 5.582334063990031e-06, + "loss": 0.0017, + "step": 161080 + }, + { + "epoch": 1.0331812220002372, + "grad_norm": 0.14653724431991577, + "learning_rate": 5.581778165982392e-06, + "loss": 0.002, + "step": 161090 + }, + { + "epoch": 1.0332453588940234, + "grad_norm": 0.11509974300861359, + "learning_rate": 5.581222260684659e-06, + "loss": 0.0017, + "step": 161100 + }, + { + "epoch": 1.0333094957878095, + "grad_norm": 0.20483095943927765, + "learning_rate": 5.580666348103794e-06, + "loss": 0.0025, + "step": 161110 + }, + { + "epoch": 1.0333736326815957, + "grad_norm": 0.02480766735970974, + "learning_rate": 5.5801104282467656e-06, + "loss": 0.0023, + "step": 161120 + }, + { + "epoch": 1.0334377695753818, + "grad_norm": 0.12931233644485474, + "learning_rate": 5.579554501120538e-06, + "loss": 0.0022, + "step": 161130 + }, + { + "epoch": 1.0335019064691677, + "grad_norm": 0.3534706234931946, + "learning_rate": 5.578998566732079e-06, + "loss": 0.0028, + "step": 161140 + }, + { + "epoch": 1.0335660433629539, + "grad_norm": 0.18303950130939484, + "learning_rate": 5.578442625088353e-06, + "loss": 0.0018, + "step": 161150 + }, + { + "epoch": 1.03363018025674, + "grad_norm": 0.10803184658288956, + "learning_rate": 5.5778866761963295e-06, + "loss": 0.0018, + "step": 161160 + }, + { + "epoch": 1.0336943171505262, + "grad_norm": 0.09353943169116974, + "learning_rate": 5.577330720062971e-06, + "loss": 0.002, + "step": 161170 + }, + { + "epoch": 1.033758454044312, + "grad_norm": 0.013269330374896526, + "learning_rate": 5.576774756695247e-06, + "loss": 0.0015, + "step": 161180 + }, + { + "epoch": 1.0338225909380983, + "grad_norm": 0.019221922382712364, + "learning_rate": 5.576218786100123e-06, + "loss": 0.0008, + "step": 161190 + }, + { + "epoch": 1.0338867278318844, + "grad_norm": 0.2175097018480301, + "learning_rate": 5.575662808284565e-06, + "loss": 0.0025, + "step": 161200 + }, + { + "epoch": 1.0339508647256705, + "grad_norm": 0.06522596627473831, + "learning_rate": 5.5751068232555414e-06, + "loss": 0.0015, + "step": 161210 + }, + { + "epoch": 1.0340150016194565, + "grad_norm": 0.06038666516542435, + "learning_rate": 5.574550831020018e-06, + "loss": 0.0013, + "step": 161220 + }, + { + "epoch": 1.0340791385132426, + "grad_norm": 0.06672700494527817, + "learning_rate": 5.5739948315849625e-06, + "loss": 0.0011, + "step": 161230 + }, + { + "epoch": 1.0341432754070288, + "grad_norm": 0.07927929610013962, + "learning_rate": 5.5734388249573404e-06, + "loss": 0.0011, + "step": 161240 + }, + { + "epoch": 1.034207412300815, + "grad_norm": 0.6204650402069092, + "learning_rate": 5.572882811144121e-06, + "loss": 0.0028, + "step": 161250 + }, + { + "epoch": 1.0342715491946008, + "grad_norm": 0.1258297860622406, + "learning_rate": 5.57232679015227e-06, + "loss": 0.0021, + "step": 161260 + }, + { + "epoch": 1.034335686088387, + "grad_norm": 0.1383894979953766, + "learning_rate": 5.571770761988756e-06, + "loss": 0.0032, + "step": 161270 + }, + { + "epoch": 1.0343998229821731, + "grad_norm": 0.08100338280200958, + "learning_rate": 5.5712147266605455e-06, + "loss": 0.0024, + "step": 161280 + }, + { + "epoch": 1.0344639598759593, + "grad_norm": 0.15081030130386353, + "learning_rate": 5.570658684174606e-06, + "loss": 0.002, + "step": 161290 + }, + { + "epoch": 1.0345280967697454, + "grad_norm": 0.18181532621383667, + "learning_rate": 5.570102634537905e-06, + "loss": 0.0019, + "step": 161300 + }, + { + "epoch": 1.0345922336635314, + "grad_norm": 0.5333714485168457, + "learning_rate": 5.569546577757412e-06, + "loss": 0.002, + "step": 161310 + }, + { + "epoch": 1.0346563705573175, + "grad_norm": 0.1616603285074234, + "learning_rate": 5.568990513840092e-06, + "loss": 0.0023, + "step": 161320 + }, + { + "epoch": 1.0347205074511037, + "grad_norm": 0.03816382586956024, + "learning_rate": 5.568434442792915e-06, + "loss": 0.0034, + "step": 161330 + }, + { + "epoch": 1.0347846443448898, + "grad_norm": 0.04157206788659096, + "learning_rate": 5.5678783646228466e-06, + "loss": 0.0014, + "step": 161340 + }, + { + "epoch": 1.0348487812386757, + "grad_norm": 0.10471261292695999, + "learning_rate": 5.5673222793368575e-06, + "loss": 0.0016, + "step": 161350 + }, + { + "epoch": 1.034912918132462, + "grad_norm": 0.124302439391613, + "learning_rate": 5.566766186941914e-06, + "loss": 0.0017, + "step": 161360 + }, + { + "epoch": 1.034977055026248, + "grad_norm": 0.09578802436590195, + "learning_rate": 5.566210087444986e-06, + "loss": 0.0018, + "step": 161370 + }, + { + "epoch": 1.0350411919200342, + "grad_norm": 0.09537933766841888, + "learning_rate": 5.565653980853042e-06, + "loss": 0.0019, + "step": 161380 + }, + { + "epoch": 1.0351053288138201, + "grad_norm": 0.11257769912481308, + "learning_rate": 5.5650978671730474e-06, + "loss": 0.0012, + "step": 161390 + }, + { + "epoch": 1.0351694657076063, + "grad_norm": 0.16283220052719116, + "learning_rate": 5.564541746411974e-06, + "loss": 0.0017, + "step": 161400 + }, + { + "epoch": 1.0352336026013924, + "grad_norm": 0.07024915516376495, + "learning_rate": 5.5639856185767885e-06, + "loss": 0.0026, + "step": 161410 + }, + { + "epoch": 1.0352977394951786, + "grad_norm": 0.06933924555778503, + "learning_rate": 5.56342948367446e-06, + "loss": 0.0021, + "step": 161420 + }, + { + "epoch": 1.0353618763889647, + "grad_norm": 0.09991660714149475, + "learning_rate": 5.562873341711958e-06, + "loss": 0.0029, + "step": 161430 + }, + { + "epoch": 1.0354260132827506, + "grad_norm": 0.07846569269895554, + "learning_rate": 5.562317192696248e-06, + "loss": 0.0019, + "step": 161440 + }, + { + "epoch": 1.0354901501765368, + "grad_norm": 0.06706856936216354, + "learning_rate": 5.561761036634304e-06, + "loss": 0.0022, + "step": 161450 + }, + { + "epoch": 1.035554287070323, + "grad_norm": 0.22754397988319397, + "learning_rate": 5.561204873533093e-06, + "loss": 0.0021, + "step": 161460 + }, + { + "epoch": 1.035618423964109, + "grad_norm": 0.012594277039170265, + "learning_rate": 5.5606487033995835e-06, + "loss": 0.0017, + "step": 161470 + }, + { + "epoch": 1.035682560857895, + "grad_norm": 0.11615916341543198, + "learning_rate": 5.560092526240746e-06, + "loss": 0.0012, + "step": 161480 + }, + { + "epoch": 1.0357466977516812, + "grad_norm": 0.01409253478050232, + "learning_rate": 5.559536342063548e-06, + "loss": 0.001, + "step": 161490 + }, + { + "epoch": 1.0358108346454673, + "grad_norm": 0.10805962234735489, + "learning_rate": 5.5589801508749595e-06, + "loss": 0.0016, + "step": 161500 + }, + { + "epoch": 1.0358749715392535, + "grad_norm": 0.22180473804473877, + "learning_rate": 5.558423952681949e-06, + "loss": 0.0019, + "step": 161510 + }, + { + "epoch": 1.0359391084330394, + "grad_norm": 0.2480551153421402, + "learning_rate": 5.55786774749149e-06, + "loss": 0.0017, + "step": 161520 + }, + { + "epoch": 1.0360032453268255, + "grad_norm": 0.08151610195636749, + "learning_rate": 5.557311535310548e-06, + "loss": 0.0013, + "step": 161530 + }, + { + "epoch": 1.0360673822206117, + "grad_norm": 0.03832826763391495, + "learning_rate": 5.556755316146094e-06, + "loss": 0.0014, + "step": 161540 + }, + { + "epoch": 1.0361315191143978, + "grad_norm": 0.13015124201774597, + "learning_rate": 5.556199090005098e-06, + "loss": 0.0022, + "step": 161550 + }, + { + "epoch": 1.036195656008184, + "grad_norm": 0.15211138129234314, + "learning_rate": 5.5556428568945306e-06, + "loss": 0.0016, + "step": 161560 + }, + { + "epoch": 1.03625979290197, + "grad_norm": 0.057695139199495316, + "learning_rate": 5.55508661682136e-06, + "loss": 0.0021, + "step": 161570 + }, + { + "epoch": 1.036323929795756, + "grad_norm": 0.17335347831249237, + "learning_rate": 5.554530369792558e-06, + "loss": 0.0022, + "step": 161580 + }, + { + "epoch": 1.0363880666895422, + "grad_norm": 0.03899018466472626, + "learning_rate": 5.553974115815094e-06, + "loss": 0.0014, + "step": 161590 + }, + { + "epoch": 1.0364522035833283, + "grad_norm": 0.11029978096485138, + "learning_rate": 5.553417854895938e-06, + "loss": 0.0034, + "step": 161600 + }, + { + "epoch": 1.0365163404771143, + "grad_norm": 0.07199625670909882, + "learning_rate": 5.552861587042062e-06, + "loss": 0.0032, + "step": 161610 + }, + { + "epoch": 1.0365804773709004, + "grad_norm": 0.12434910982847214, + "learning_rate": 5.552305312260435e-06, + "loss": 0.0011, + "step": 161620 + }, + { + "epoch": 1.0366446142646866, + "grad_norm": 0.11052077263593674, + "learning_rate": 5.551749030558027e-06, + "loss": 0.0037, + "step": 161630 + }, + { + "epoch": 1.0367087511584727, + "grad_norm": 0.374609112739563, + "learning_rate": 5.5511927419418084e-06, + "loss": 0.0017, + "step": 161640 + }, + { + "epoch": 1.0367728880522586, + "grad_norm": 0.0357525460422039, + "learning_rate": 5.550636446418754e-06, + "loss": 0.0022, + "step": 161650 + }, + { + "epoch": 1.0368370249460448, + "grad_norm": 0.20752693712711334, + "learning_rate": 5.550080143995827e-06, + "loss": 0.0034, + "step": 161660 + }, + { + "epoch": 1.036901161839831, + "grad_norm": 0.05699139088392258, + "learning_rate": 5.549523834680006e-06, + "loss": 0.0019, + "step": 161670 + }, + { + "epoch": 1.036965298733617, + "grad_norm": 0.03660396486520767, + "learning_rate": 5.5489675184782575e-06, + "loss": 0.0025, + "step": 161680 + }, + { + "epoch": 1.037029435627403, + "grad_norm": 0.049404483288526535, + "learning_rate": 5.548411195397554e-06, + "loss": 0.0028, + "step": 161690 + }, + { + "epoch": 1.0370935725211892, + "grad_norm": 0.08149591833353043, + "learning_rate": 5.547854865444866e-06, + "loss": 0.0019, + "step": 161700 + }, + { + "epoch": 1.0371577094149753, + "grad_norm": 0.10058657079935074, + "learning_rate": 5.547298528627165e-06, + "loss": 0.0015, + "step": 161710 + }, + { + "epoch": 1.0372218463087615, + "grad_norm": 0.07720974087715149, + "learning_rate": 5.546742184951422e-06, + "loss": 0.0017, + "step": 161720 + }, + { + "epoch": 1.0372859832025476, + "grad_norm": 0.1567755937576294, + "learning_rate": 5.546185834424609e-06, + "loss": 0.0034, + "step": 161730 + }, + { + "epoch": 1.0373501200963335, + "grad_norm": 0.015572108328342438, + "learning_rate": 5.545629477053697e-06, + "loss": 0.0017, + "step": 161740 + }, + { + "epoch": 1.0374142569901197, + "grad_norm": 0.031676653772592545, + "learning_rate": 5.545073112845657e-06, + "loss": 0.0008, + "step": 161750 + }, + { + "epoch": 1.0374783938839058, + "grad_norm": 0.0611712783575058, + "learning_rate": 5.544516741807463e-06, + "loss": 0.0019, + "step": 161760 + }, + { + "epoch": 1.037542530777692, + "grad_norm": 0.049083299934864044, + "learning_rate": 5.543960363946083e-06, + "loss": 0.0023, + "step": 161770 + }, + { + "epoch": 1.037606667671478, + "grad_norm": 0.1100301593542099, + "learning_rate": 5.543403979268494e-06, + "loss": 0.0026, + "step": 161780 + }, + { + "epoch": 1.037670804565264, + "grad_norm": 0.3066883087158203, + "learning_rate": 5.542847587781661e-06, + "loss": 0.0045, + "step": 161790 + }, + { + "epoch": 1.0377349414590502, + "grad_norm": 0.05478224158287048, + "learning_rate": 5.542291189492564e-06, + "loss": 0.0022, + "step": 161800 + }, + { + "epoch": 1.0377990783528364, + "grad_norm": 0.11197340488433838, + "learning_rate": 5.541734784408167e-06, + "loss": 0.0018, + "step": 161810 + }, + { + "epoch": 1.0378632152466223, + "grad_norm": 0.06742127239704132, + "learning_rate": 5.541178372535447e-06, + "loss": 0.0018, + "step": 161820 + }, + { + "epoch": 1.0379273521404084, + "grad_norm": 0.11610394716262817, + "learning_rate": 5.5406219538813745e-06, + "loss": 0.003, + "step": 161830 + }, + { + "epoch": 1.0379914890341946, + "grad_norm": 0.17880384624004364, + "learning_rate": 5.540065528452925e-06, + "loss": 0.0024, + "step": 161840 + }, + { + "epoch": 1.0380556259279807, + "grad_norm": 0.02726311981678009, + "learning_rate": 5.539509096257066e-06, + "loss": 0.0016, + "step": 161850 + }, + { + "epoch": 1.0381197628217669, + "grad_norm": 0.006308823358267546, + "learning_rate": 5.538952657300775e-06, + "loss": 0.0017, + "step": 161860 + }, + { + "epoch": 1.0381838997155528, + "grad_norm": 0.02790677733719349, + "learning_rate": 5.53839621159102e-06, + "loss": 0.0024, + "step": 161870 + }, + { + "epoch": 1.038248036609339, + "grad_norm": 0.2501605451107025, + "learning_rate": 5.537839759134776e-06, + "loss": 0.0022, + "step": 161880 + }, + { + "epoch": 1.038312173503125, + "grad_norm": 0.10462504625320435, + "learning_rate": 5.537283299939016e-06, + "loss": 0.0021, + "step": 161890 + }, + { + "epoch": 1.0383763103969113, + "grad_norm": 0.033071622252464294, + "learning_rate": 5.536726834010712e-06, + "loss": 0.0014, + "step": 161900 + }, + { + "epoch": 1.0384404472906972, + "grad_norm": 0.04038606584072113, + "learning_rate": 5.536170361356836e-06, + "loss": 0.0032, + "step": 161910 + }, + { + "epoch": 1.0385045841844833, + "grad_norm": 0.24799920618534088, + "learning_rate": 5.535613881984363e-06, + "loss": 0.0031, + "step": 161920 + }, + { + "epoch": 1.0385687210782695, + "grad_norm": 0.0339103527367115, + "learning_rate": 5.535057395900266e-06, + "loss": 0.0013, + "step": 161930 + }, + { + "epoch": 1.0386328579720556, + "grad_norm": 0.10233575850725174, + "learning_rate": 5.5345009031115175e-06, + "loss": 0.0015, + "step": 161940 + }, + { + "epoch": 1.0386969948658415, + "grad_norm": 0.1514114886522293, + "learning_rate": 5.53394440362509e-06, + "loss": 0.0029, + "step": 161950 + }, + { + "epoch": 1.0387611317596277, + "grad_norm": 0.11045278608798981, + "learning_rate": 5.5333878974479575e-06, + "loss": 0.0021, + "step": 161960 + }, + { + "epoch": 1.0388252686534138, + "grad_norm": 0.07066120207309723, + "learning_rate": 5.532831384587094e-06, + "loss": 0.0031, + "step": 161970 + }, + { + "epoch": 1.0388894055472, + "grad_norm": 0.26015713810920715, + "learning_rate": 5.532274865049472e-06, + "loss": 0.0064, + "step": 161980 + }, + { + "epoch": 1.0389535424409861, + "grad_norm": 0.14190037548542023, + "learning_rate": 5.531718338842066e-06, + "loss": 0.0024, + "step": 161990 + }, + { + "epoch": 1.039017679334772, + "grad_norm": 0.086383156478405, + "learning_rate": 5.531161805971848e-06, + "loss": 0.0011, + "step": 162000 + }, + { + "epoch": 1.0390818162285582, + "grad_norm": 0.05792558938264847, + "learning_rate": 5.530605266445795e-06, + "loss": 0.0039, + "step": 162010 + }, + { + "epoch": 1.0391459531223444, + "grad_norm": 0.059819743037223816, + "learning_rate": 5.530048720270876e-06, + "loss": 0.0015, + "step": 162020 + }, + { + "epoch": 1.0392100900161305, + "grad_norm": 0.1345950812101364, + "learning_rate": 5.52949216745407e-06, + "loss": 0.001, + "step": 162030 + }, + { + "epoch": 1.0392742269099164, + "grad_norm": 0.08652999997138977, + "learning_rate": 5.528935608002348e-06, + "loss": 0.002, + "step": 162040 + }, + { + "epoch": 1.0393383638037026, + "grad_norm": 0.034290753304958344, + "learning_rate": 5.528379041922686e-06, + "loss": 0.0012, + "step": 162050 + }, + { + "epoch": 1.0394025006974887, + "grad_norm": 0.04874274507164955, + "learning_rate": 5.527822469222055e-06, + "loss": 0.0034, + "step": 162060 + }, + { + "epoch": 1.039466637591275, + "grad_norm": 0.034296587109565735, + "learning_rate": 5.527265889907431e-06, + "loss": 0.0017, + "step": 162070 + }, + { + "epoch": 1.0395307744850608, + "grad_norm": 0.06329309940338135, + "learning_rate": 5.52670930398579e-06, + "loss": 0.0014, + "step": 162080 + }, + { + "epoch": 1.039594911378847, + "grad_norm": 0.1566283255815506, + "learning_rate": 5.526152711464104e-06, + "loss": 0.0018, + "step": 162090 + }, + { + "epoch": 1.0396590482726331, + "grad_norm": 0.20953132212162018, + "learning_rate": 5.525596112349351e-06, + "loss": 0.0015, + "step": 162100 + }, + { + "epoch": 1.0397231851664193, + "grad_norm": 0.16059410572052002, + "learning_rate": 5.5250395066485e-06, + "loss": 0.0007, + "step": 162110 + }, + { + "epoch": 1.0397873220602052, + "grad_norm": 0.032380376011133194, + "learning_rate": 5.52448289436853e-06, + "loss": 0.0017, + "step": 162120 + }, + { + "epoch": 1.0398514589539913, + "grad_norm": 0.11249042302370071, + "learning_rate": 5.523926275516413e-06, + "loss": 0.0024, + "step": 162130 + }, + { + "epoch": 1.0399155958477775, + "grad_norm": 0.1949484944343567, + "learning_rate": 5.523369650099128e-06, + "loss": 0.0033, + "step": 162140 + }, + { + "epoch": 1.0399797327415636, + "grad_norm": 0.2602667510509491, + "learning_rate": 5.522813018123646e-06, + "loss": 0.0026, + "step": 162150 + }, + { + "epoch": 1.0400438696353498, + "grad_norm": 0.1616698056459427, + "learning_rate": 5.522256379596943e-06, + "loss": 0.0014, + "step": 162160 + }, + { + "epoch": 1.0401080065291357, + "grad_norm": 0.20122376084327698, + "learning_rate": 5.521699734525995e-06, + "loss": 0.0018, + "step": 162170 + }, + { + "epoch": 1.0401721434229219, + "grad_norm": 0.0360245555639267, + "learning_rate": 5.521143082917776e-06, + "loss": 0.0009, + "step": 162180 + }, + { + "epoch": 1.040236280316708, + "grad_norm": 0.06166728213429451, + "learning_rate": 5.520586424779262e-06, + "loss": 0.0017, + "step": 162190 + }, + { + "epoch": 1.0403004172104942, + "grad_norm": 0.058297354727983475, + "learning_rate": 5.520029760117428e-06, + "loss": 0.0018, + "step": 162200 + }, + { + "epoch": 1.04036455410428, + "grad_norm": 0.11905381828546524, + "learning_rate": 5.519473088939247e-06, + "loss": 0.0019, + "step": 162210 + }, + { + "epoch": 1.0404286909980662, + "grad_norm": 0.06737947463989258, + "learning_rate": 5.5189164112517e-06, + "loss": 0.0009, + "step": 162220 + }, + { + "epoch": 1.0404928278918524, + "grad_norm": 0.14862266182899475, + "learning_rate": 5.518359727061757e-06, + "loss": 0.0027, + "step": 162230 + }, + { + "epoch": 1.0405569647856385, + "grad_norm": 0.15508893132209778, + "learning_rate": 5.517803036376399e-06, + "loss": 0.0017, + "step": 162240 + }, + { + "epoch": 1.0406211016794247, + "grad_norm": 0.09064960479736328, + "learning_rate": 5.5172463392025975e-06, + "loss": 0.0028, + "step": 162250 + }, + { + "epoch": 1.0406852385732106, + "grad_norm": 0.1738806515932083, + "learning_rate": 5.51668963554733e-06, + "loss": 0.0019, + "step": 162260 + }, + { + "epoch": 1.0407493754669968, + "grad_norm": 0.3050050735473633, + "learning_rate": 5.5161329254175715e-06, + "loss": 0.003, + "step": 162270 + }, + { + "epoch": 1.040813512360783, + "grad_norm": 0.04364198073744774, + "learning_rate": 5.515576208820299e-06, + "loss": 0.0014, + "step": 162280 + }, + { + "epoch": 1.040877649254569, + "grad_norm": 0.05057888850569725, + "learning_rate": 5.515019485762488e-06, + "loss": 0.0019, + "step": 162290 + }, + { + "epoch": 1.040941786148355, + "grad_norm": 0.1384192705154419, + "learning_rate": 5.514462756251114e-06, + "loss": 0.0025, + "step": 162300 + }, + { + "epoch": 1.0410059230421411, + "grad_norm": 0.03384971618652344, + "learning_rate": 5.513906020293156e-06, + "loss": 0.0017, + "step": 162310 + }, + { + "epoch": 1.0410700599359273, + "grad_norm": 0.07956333458423615, + "learning_rate": 5.513349277895587e-06, + "loss": 0.0013, + "step": 162320 + }, + { + "epoch": 1.0411341968297134, + "grad_norm": 0.07815622538328171, + "learning_rate": 5.512792529065385e-06, + "loss": 0.0022, + "step": 162330 + }, + { + "epoch": 1.0411983337234993, + "grad_norm": 0.08500978350639343, + "learning_rate": 5.512235773809526e-06, + "loss": 0.0013, + "step": 162340 + }, + { + "epoch": 1.0412624706172855, + "grad_norm": 0.08306215703487396, + "learning_rate": 5.511679012134987e-06, + "loss": 0.0014, + "step": 162350 + }, + { + "epoch": 1.0413266075110716, + "grad_norm": 0.28995710611343384, + "learning_rate": 5.511122244048744e-06, + "loss": 0.0017, + "step": 162360 + }, + { + "epoch": 1.0413907444048578, + "grad_norm": 0.16887091100215912, + "learning_rate": 5.510565469557774e-06, + "loss": 0.001, + "step": 162370 + }, + { + "epoch": 1.0414548812986437, + "grad_norm": 0.12421572208404541, + "learning_rate": 5.510008688669053e-06, + "loss": 0.0026, + "step": 162380 + }, + { + "epoch": 1.0415190181924299, + "grad_norm": 0.09098473191261292, + "learning_rate": 5.50945190138956e-06, + "loss": 0.0023, + "step": 162390 + }, + { + "epoch": 1.041583155086216, + "grad_norm": 0.15700791776180267, + "learning_rate": 5.508895107726269e-06, + "loss": 0.0015, + "step": 162400 + }, + { + "epoch": 1.0416472919800022, + "grad_norm": 0.13414964079856873, + "learning_rate": 5.50833830768616e-06, + "loss": 0.0012, + "step": 162410 + }, + { + "epoch": 1.0417114288737883, + "grad_norm": 0.1681639552116394, + "learning_rate": 5.507781501276207e-06, + "loss": 0.0026, + "step": 162420 + }, + { + "epoch": 1.0417755657675742, + "grad_norm": 0.14209985733032227, + "learning_rate": 5.50722468850339e-06, + "loss": 0.0025, + "step": 162430 + }, + { + "epoch": 1.0418397026613604, + "grad_norm": 0.11714351177215576, + "learning_rate": 5.506667869374685e-06, + "loss": 0.0027, + "step": 162440 + }, + { + "epoch": 1.0419038395551465, + "grad_norm": 0.07386796921491623, + "learning_rate": 5.50611104389707e-06, + "loss": 0.0015, + "step": 162450 + }, + { + "epoch": 1.0419679764489327, + "grad_norm": 0.3755844533443451, + "learning_rate": 5.505554212077522e-06, + "loss": 0.0019, + "step": 162460 + }, + { + "epoch": 1.0420321133427186, + "grad_norm": 0.035572972148656845, + "learning_rate": 5.504997373923018e-06, + "loss": 0.0013, + "step": 162470 + }, + { + "epoch": 1.0420962502365048, + "grad_norm": 0.04977955296635628, + "learning_rate": 5.504440529440536e-06, + "loss": 0.0012, + "step": 162480 + }, + { + "epoch": 1.042160387130291, + "grad_norm": 0.05770646035671234, + "learning_rate": 5.503883678637053e-06, + "loss": 0.0013, + "step": 162490 + }, + { + "epoch": 1.042224524024077, + "grad_norm": 0.006688651163130999, + "learning_rate": 5.503326821519549e-06, + "loss": 0.0018, + "step": 162500 + }, + { + "epoch": 1.042288660917863, + "grad_norm": 0.009053279645740986, + "learning_rate": 5.502769958094999e-06, + "loss": 0.0013, + "step": 162510 + }, + { + "epoch": 1.0423527978116491, + "grad_norm": 0.013610010035336018, + "learning_rate": 5.502213088370383e-06, + "loss": 0.0014, + "step": 162520 + }, + { + "epoch": 1.0424169347054353, + "grad_norm": 0.007402785588055849, + "learning_rate": 5.501656212352676e-06, + "loss": 0.0013, + "step": 162530 + }, + { + "epoch": 1.0424810715992214, + "grad_norm": 0.2344551831483841, + "learning_rate": 5.50109933004886e-06, + "loss": 0.0013, + "step": 162540 + }, + { + "epoch": 1.0425452084930076, + "grad_norm": 0.08997918665409088, + "learning_rate": 5.500542441465911e-06, + "loss": 0.0019, + "step": 162550 + }, + { + "epoch": 1.0426093453867935, + "grad_norm": 0.13036172091960907, + "learning_rate": 5.499985546610808e-06, + "loss": 0.0018, + "step": 162560 + }, + { + "epoch": 1.0426734822805797, + "grad_norm": 0.1589774489402771, + "learning_rate": 5.499428645490527e-06, + "loss": 0.0018, + "step": 162570 + }, + { + "epoch": 1.0427376191743658, + "grad_norm": 0.12301304936408997, + "learning_rate": 5.498871738112048e-06, + "loss": 0.0024, + "step": 162580 + }, + { + "epoch": 1.042801756068152, + "grad_norm": 0.09392508119344711, + "learning_rate": 5.498314824482351e-06, + "loss": 0.0023, + "step": 162590 + }, + { + "epoch": 1.0428658929619379, + "grad_norm": 0.01861235871911049, + "learning_rate": 5.497757904608414e-06, + "loss": 0.0022, + "step": 162600 + }, + { + "epoch": 1.042930029855724, + "grad_norm": 0.09133550524711609, + "learning_rate": 5.497200978497212e-06, + "loss": 0.0022, + "step": 162610 + }, + { + "epoch": 1.0429941667495102, + "grad_norm": 0.2399628758430481, + "learning_rate": 5.4966440461557295e-06, + "loss": 0.0012, + "step": 162620 + }, + { + "epoch": 1.0430583036432963, + "grad_norm": 0.19869330525398254, + "learning_rate": 5.49608710759094e-06, + "loss": 0.0034, + "step": 162630 + }, + { + "epoch": 1.0431224405370823, + "grad_norm": 0.1504802405834198, + "learning_rate": 5.4955301628098246e-06, + "loss": 0.0021, + "step": 162640 + }, + { + "epoch": 1.0431865774308684, + "grad_norm": 0.0682467445731163, + "learning_rate": 5.494973211819363e-06, + "loss": 0.0034, + "step": 162650 + }, + { + "epoch": 1.0432507143246545, + "grad_norm": 0.15123432874679565, + "learning_rate": 5.494416254626533e-06, + "loss": 0.0028, + "step": 162660 + }, + { + "epoch": 1.0433148512184407, + "grad_norm": 0.050001200288534164, + "learning_rate": 5.493859291238313e-06, + "loss": 0.0017, + "step": 162670 + }, + { + "epoch": 1.0433789881122268, + "grad_norm": 0.04714712128043175, + "learning_rate": 5.493302321661684e-06, + "loss": 0.0013, + "step": 162680 + }, + { + "epoch": 1.0434431250060128, + "grad_norm": 0.04859734699130058, + "learning_rate": 5.492745345903625e-06, + "loss": 0.0013, + "step": 162690 + }, + { + "epoch": 1.043507261899799, + "grad_norm": 0.04963231831789017, + "learning_rate": 5.492188363971114e-06, + "loss": 0.0023, + "step": 162700 + }, + { + "epoch": 1.043571398793585, + "grad_norm": 0.14139969646930695, + "learning_rate": 5.491631375871131e-06, + "loss": 0.0025, + "step": 162710 + }, + { + "epoch": 1.0436355356873712, + "grad_norm": 0.08460360765457153, + "learning_rate": 5.491074381610655e-06, + "loss": 0.0019, + "step": 162720 + }, + { + "epoch": 1.0436996725811571, + "grad_norm": 0.019998345524072647, + "learning_rate": 5.490517381196667e-06, + "loss": 0.0008, + "step": 162730 + }, + { + "epoch": 1.0437638094749433, + "grad_norm": 0.2822844386100769, + "learning_rate": 5.489960374636145e-06, + "loss": 0.0014, + "step": 162740 + }, + { + "epoch": 1.0438279463687294, + "grad_norm": 0.14481887221336365, + "learning_rate": 5.489403361936071e-06, + "loss": 0.0011, + "step": 162750 + }, + { + "epoch": 1.0438920832625156, + "grad_norm": 0.2062295526266098, + "learning_rate": 5.488846343103421e-06, + "loss": 0.0018, + "step": 162760 + }, + { + "epoch": 1.0439562201563015, + "grad_norm": 0.08027106523513794, + "learning_rate": 5.488289318145177e-06, + "loss": 0.002, + "step": 162770 + }, + { + "epoch": 1.0440203570500877, + "grad_norm": 0.0023542765993624926, + "learning_rate": 5.48773228706832e-06, + "loss": 0.0013, + "step": 162780 + }, + { + "epoch": 1.0440844939438738, + "grad_norm": 0.1881311982870102, + "learning_rate": 5.4871752498798284e-06, + "loss": 0.0028, + "step": 162790 + }, + { + "epoch": 1.04414863083766, + "grad_norm": 0.01573358289897442, + "learning_rate": 5.4866182065866824e-06, + "loss": 0.0016, + "step": 162800 + }, + { + "epoch": 1.044212767731446, + "grad_norm": 0.0733632743358612, + "learning_rate": 5.486061157195862e-06, + "loss": 0.0016, + "step": 162810 + }, + { + "epoch": 1.044276904625232, + "grad_norm": 0.12339013069868088, + "learning_rate": 5.485504101714349e-06, + "loss": 0.0029, + "step": 162820 + }, + { + "epoch": 1.0443410415190182, + "grad_norm": 0.13790413737297058, + "learning_rate": 5.484947040149122e-06, + "loss": 0.005, + "step": 162830 + }, + { + "epoch": 1.0444051784128043, + "grad_norm": 0.2742564082145691, + "learning_rate": 5.4843899725071635e-06, + "loss": 0.0032, + "step": 162840 + }, + { + "epoch": 1.0444693153065905, + "grad_norm": 0.06959738582372665, + "learning_rate": 5.483832898795452e-06, + "loss": 0.001, + "step": 162850 + }, + { + "epoch": 1.0445334522003764, + "grad_norm": 0.11053453385829926, + "learning_rate": 5.483275819020967e-06, + "loss": 0.0013, + "step": 162860 + }, + { + "epoch": 1.0445975890941626, + "grad_norm": 0.05350212752819061, + "learning_rate": 5.482718733190691e-06, + "loss": 0.0013, + "step": 162870 + }, + { + "epoch": 1.0446617259879487, + "grad_norm": 0.10666050016880035, + "learning_rate": 5.4821616413116055e-06, + "loss": 0.0026, + "step": 162880 + }, + { + "epoch": 1.0447258628817349, + "grad_norm": 0.08770793676376343, + "learning_rate": 5.481604543390688e-06, + "loss": 0.0011, + "step": 162890 + }, + { + "epoch": 1.0447899997755208, + "grad_norm": 0.03143342584371567, + "learning_rate": 5.481047439434923e-06, + "loss": 0.0013, + "step": 162900 + }, + { + "epoch": 1.044854136669307, + "grad_norm": 0.023717986419796944, + "learning_rate": 5.480490329451289e-06, + "loss": 0.0009, + "step": 162910 + }, + { + "epoch": 1.044918273563093, + "grad_norm": 0.035895369946956635, + "learning_rate": 5.479933213446768e-06, + "loss": 0.0013, + "step": 162920 + }, + { + "epoch": 1.0449824104568792, + "grad_norm": 0.13733455538749695, + "learning_rate": 5.479376091428341e-06, + "loss": 0.0025, + "step": 162930 + }, + { + "epoch": 1.0450465473506652, + "grad_norm": 0.10928450524806976, + "learning_rate": 5.478818963402989e-06, + "loss": 0.0034, + "step": 162940 + }, + { + "epoch": 1.0451106842444513, + "grad_norm": 0.017055131494998932, + "learning_rate": 5.478261829377694e-06, + "loss": 0.0022, + "step": 162950 + }, + { + "epoch": 1.0451748211382375, + "grad_norm": 0.28910472989082336, + "learning_rate": 5.477704689359435e-06, + "loss": 0.0023, + "step": 162960 + }, + { + "epoch": 1.0452389580320236, + "grad_norm": 0.03387433663010597, + "learning_rate": 5.477147543355195e-06, + "loss": 0.0018, + "step": 162970 + }, + { + "epoch": 1.0453030949258098, + "grad_norm": 0.029093138873577118, + "learning_rate": 5.476590391371956e-06, + "loss": 0.002, + "step": 162980 + }, + { + "epoch": 1.0453672318195957, + "grad_norm": 0.02908753789961338, + "learning_rate": 5.476033233416697e-06, + "loss": 0.0025, + "step": 162990 + }, + { + "epoch": 1.0454313687133818, + "grad_norm": 0.20391933619976044, + "learning_rate": 5.4754760694964035e-06, + "loss": 0.0022, + "step": 163000 + }, + { + "epoch": 1.045495505607168, + "grad_norm": 0.056450072675943375, + "learning_rate": 5.4749188996180545e-06, + "loss": 0.002, + "step": 163010 + }, + { + "epoch": 1.0455596425009541, + "grad_norm": 0.28571149706840515, + "learning_rate": 5.4743617237886315e-06, + "loss": 0.002, + "step": 163020 + }, + { + "epoch": 1.04562377939474, + "grad_norm": 0.05657358467578888, + "learning_rate": 5.4738045420151184e-06, + "loss": 0.0014, + "step": 163030 + }, + { + "epoch": 1.0456879162885262, + "grad_norm": 0.24326853454113007, + "learning_rate": 5.473247354304495e-06, + "loss": 0.0013, + "step": 163040 + }, + { + "epoch": 1.0457520531823123, + "grad_norm": 0.11728103458881378, + "learning_rate": 5.472690160663745e-06, + "loss": 0.0022, + "step": 163050 + }, + { + "epoch": 1.0458161900760985, + "grad_norm": 0.026328377425670624, + "learning_rate": 5.4721329610998475e-06, + "loss": 0.0028, + "step": 163060 + }, + { + "epoch": 1.0458803269698844, + "grad_norm": 0.05746069177985191, + "learning_rate": 5.471575755619788e-06, + "loss": 0.0016, + "step": 163070 + }, + { + "epoch": 1.0459444638636706, + "grad_norm": 0.02624838799238205, + "learning_rate": 5.471018544230546e-06, + "loss": 0.0016, + "step": 163080 + }, + { + "epoch": 1.0460086007574567, + "grad_norm": 0.06770443171262741, + "learning_rate": 5.470461326939107e-06, + "loss": 0.0019, + "step": 163090 + }, + { + "epoch": 1.0460727376512429, + "grad_norm": 0.09298177063465118, + "learning_rate": 5.46990410375245e-06, + "loss": 0.0033, + "step": 163100 + }, + { + "epoch": 1.046136874545029, + "grad_norm": 0.18723025918006897, + "learning_rate": 5.469346874677559e-06, + "loss": 0.0022, + "step": 163110 + }, + { + "epoch": 1.046201011438815, + "grad_norm": 0.07983297109603882, + "learning_rate": 5.468789639721416e-06, + "loss": 0.002, + "step": 163120 + }, + { + "epoch": 1.046265148332601, + "grad_norm": 0.15169022977352142, + "learning_rate": 5.468232398891004e-06, + "loss": 0.0012, + "step": 163130 + }, + { + "epoch": 1.0463292852263872, + "grad_norm": 0.02613871730864048, + "learning_rate": 5.4676751521933055e-06, + "loss": 0.0007, + "step": 163140 + }, + { + "epoch": 1.0463934221201734, + "grad_norm": 0.03867525979876518, + "learning_rate": 5.467117899635302e-06, + "loss": 0.0011, + "step": 163150 + }, + { + "epoch": 1.0464575590139593, + "grad_norm": 0.2037162482738495, + "learning_rate": 5.466560641223979e-06, + "loss": 0.0027, + "step": 163160 + }, + { + "epoch": 1.0465216959077455, + "grad_norm": 0.008311222307384014, + "learning_rate": 5.466003376966317e-06, + "loss": 0.0013, + "step": 163170 + }, + { + "epoch": 1.0465858328015316, + "grad_norm": 0.024490531533956528, + "learning_rate": 5.4654461068693e-06, + "loss": 0.0023, + "step": 163180 + }, + { + "epoch": 1.0466499696953178, + "grad_norm": 0.005609770305454731, + "learning_rate": 5.4648888309399104e-06, + "loss": 0.0018, + "step": 163190 + }, + { + "epoch": 1.0467141065891037, + "grad_norm": 0.18306541442871094, + "learning_rate": 5.464331549185131e-06, + "loss": 0.0022, + "step": 163200 + }, + { + "epoch": 1.0467782434828898, + "grad_norm": 0.08656865358352661, + "learning_rate": 5.463774261611946e-06, + "loss": 0.0015, + "step": 163210 + }, + { + "epoch": 1.046842380376676, + "grad_norm": 0.042053062468767166, + "learning_rate": 5.463216968227339e-06, + "loss": 0.002, + "step": 163220 + }, + { + "epoch": 1.0469065172704621, + "grad_norm": 0.10596334934234619, + "learning_rate": 5.4626596690382905e-06, + "loss": 0.0013, + "step": 163230 + }, + { + "epoch": 1.046970654164248, + "grad_norm": 0.09371647983789444, + "learning_rate": 5.462102364051787e-06, + "loss": 0.0016, + "step": 163240 + }, + { + "epoch": 1.0470347910580342, + "grad_norm": 0.07989853620529175, + "learning_rate": 5.46154505327481e-06, + "loss": 0.0021, + "step": 163250 + }, + { + "epoch": 1.0470989279518204, + "grad_norm": 0.04100971296429634, + "learning_rate": 5.460987736714344e-06, + "loss": 0.0019, + "step": 163260 + }, + { + "epoch": 1.0471630648456065, + "grad_norm": 0.041018567979335785, + "learning_rate": 5.460430414377371e-06, + "loss": 0.0017, + "step": 163270 + }, + { + "epoch": 1.0472272017393927, + "grad_norm": 0.0300181545317173, + "learning_rate": 5.459873086270876e-06, + "loss": 0.0022, + "step": 163280 + }, + { + "epoch": 1.0472913386331786, + "grad_norm": 0.26561641693115234, + "learning_rate": 5.459315752401843e-06, + "loss": 0.0027, + "step": 163290 + }, + { + "epoch": 1.0473554755269647, + "grad_norm": 0.10002687573432922, + "learning_rate": 5.4587584127772566e-06, + "loss": 0.0012, + "step": 163300 + }, + { + "epoch": 1.0474196124207509, + "grad_norm": 0.07808393239974976, + "learning_rate": 5.4582010674040985e-06, + "loss": 0.0024, + "step": 163310 + }, + { + "epoch": 1.047483749314537, + "grad_norm": 0.07971341907978058, + "learning_rate": 5.457643716289354e-06, + "loss": 0.0014, + "step": 163320 + }, + { + "epoch": 1.047547886208323, + "grad_norm": 0.03393528610467911, + "learning_rate": 5.457086359440006e-06, + "loss": 0.0021, + "step": 163330 + }, + { + "epoch": 1.047612023102109, + "grad_norm": 0.24799518287181854, + "learning_rate": 5.456528996863038e-06, + "loss": 0.0019, + "step": 163340 + }, + { + "epoch": 1.0476761599958953, + "grad_norm": 0.10391895473003387, + "learning_rate": 5.4559716285654385e-06, + "loss": 0.0018, + "step": 163350 + }, + { + "epoch": 1.0477402968896814, + "grad_norm": 0.07402423769235611, + "learning_rate": 5.4554142545541854e-06, + "loss": 0.0022, + "step": 163360 + }, + { + "epoch": 1.0478044337834673, + "grad_norm": 0.09968770295381546, + "learning_rate": 5.454856874836268e-06, + "loss": 0.0026, + "step": 163370 + }, + { + "epoch": 1.0478685706772535, + "grad_norm": 0.16183412075042725, + "learning_rate": 5.454299489418669e-06, + "loss": 0.0012, + "step": 163380 + }, + { + "epoch": 1.0479327075710396, + "grad_norm": 0.09767698496580124, + "learning_rate": 5.4537420983083725e-06, + "loss": 0.0014, + "step": 163390 + }, + { + "epoch": 1.0479968444648258, + "grad_norm": 0.11284589767456055, + "learning_rate": 5.453184701512362e-06, + "loss": 0.0029, + "step": 163400 + }, + { + "epoch": 1.048060981358612, + "grad_norm": 0.029451711103320122, + "learning_rate": 5.452627299037625e-06, + "loss": 0.0012, + "step": 163410 + }, + { + "epoch": 1.0481251182523978, + "grad_norm": 0.14543209969997406, + "learning_rate": 5.452069890891143e-06, + "loss": 0.0011, + "step": 163420 + }, + { + "epoch": 1.048189255146184, + "grad_norm": 0.14817456901073456, + "learning_rate": 5.451512477079904e-06, + "loss": 0.0019, + "step": 163430 + }, + { + "epoch": 1.0482533920399701, + "grad_norm": 0.012029202654957771, + "learning_rate": 5.450955057610889e-06, + "loss": 0.0023, + "step": 163440 + }, + { + "epoch": 1.0483175289337563, + "grad_norm": 0.06852412223815918, + "learning_rate": 5.450397632491087e-06, + "loss": 0.0025, + "step": 163450 + }, + { + "epoch": 1.0483816658275422, + "grad_norm": 0.057883575558662415, + "learning_rate": 5.449840201727479e-06, + "loss": 0.002, + "step": 163460 + }, + { + "epoch": 1.0484458027213284, + "grad_norm": 0.1246136873960495, + "learning_rate": 5.449282765327053e-06, + "loss": 0.0009, + "step": 163470 + }, + { + "epoch": 1.0485099396151145, + "grad_norm": 0.058335140347480774, + "learning_rate": 5.4487253232967915e-06, + "loss": 0.0009, + "step": 163480 + }, + { + "epoch": 1.0485740765089007, + "grad_norm": 0.17894196510314941, + "learning_rate": 5.448167875643681e-06, + "loss": 0.0012, + "step": 163490 + }, + { + "epoch": 1.0486382134026866, + "grad_norm": 0.06329632550477982, + "learning_rate": 5.447610422374709e-06, + "loss": 0.0012, + "step": 163500 + }, + { + "epoch": 1.0487023502964727, + "grad_norm": 0.0986696183681488, + "learning_rate": 5.447052963496855e-06, + "loss": 0.0022, + "step": 163510 + }, + { + "epoch": 1.048766487190259, + "grad_norm": 0.03802545368671417, + "learning_rate": 5.44649549901711e-06, + "loss": 0.0021, + "step": 163520 + }, + { + "epoch": 1.048830624084045, + "grad_norm": 0.11675931513309479, + "learning_rate": 5.445938028942456e-06, + "loss": 0.003, + "step": 163530 + }, + { + "epoch": 1.0488947609778312, + "grad_norm": 0.062087420374155045, + "learning_rate": 5.445380553279883e-06, + "loss": 0.0018, + "step": 163540 + }, + { + "epoch": 1.0489588978716171, + "grad_norm": 0.08303030580282211, + "learning_rate": 5.4448230720363705e-06, + "loss": 0.0032, + "step": 163550 + }, + { + "epoch": 1.0490230347654033, + "grad_norm": 0.07520963251590729, + "learning_rate": 5.4442655852189086e-06, + "loss": 0.0015, + "step": 163560 + }, + { + "epoch": 1.0490871716591894, + "grad_norm": 0.06167283281683922, + "learning_rate": 5.44370809283448e-06, + "loss": 0.0013, + "step": 163570 + }, + { + "epoch": 1.0491513085529756, + "grad_norm": 0.1583038866519928, + "learning_rate": 5.443150594890073e-06, + "loss": 0.0037, + "step": 163580 + }, + { + "epoch": 1.0492154454467615, + "grad_norm": 0.07042469084262848, + "learning_rate": 5.442593091392671e-06, + "loss": 0.0033, + "step": 163590 + }, + { + "epoch": 1.0492795823405476, + "grad_norm": 0.16713710129261017, + "learning_rate": 5.4420355823492624e-06, + "loss": 0.0014, + "step": 163600 + }, + { + "epoch": 1.0493437192343338, + "grad_norm": 0.04701533913612366, + "learning_rate": 5.441478067766831e-06, + "loss": 0.0011, + "step": 163610 + }, + { + "epoch": 1.04940785612812, + "grad_norm": 0.07531076669692993, + "learning_rate": 5.440920547652364e-06, + "loss": 0.002, + "step": 163620 + }, + { + "epoch": 1.0494719930219059, + "grad_norm": 0.022455720230937004, + "learning_rate": 5.440363022012849e-06, + "loss": 0.0019, + "step": 163630 + }, + { + "epoch": 1.049536129915692, + "grad_norm": 0.16131709516048431, + "learning_rate": 5.439805490855269e-06, + "loss": 0.0023, + "step": 163640 + }, + { + "epoch": 1.0496002668094782, + "grad_norm": 0.07646039873361588, + "learning_rate": 5.439247954186613e-06, + "loss": 0.0018, + "step": 163650 + }, + { + "epoch": 1.0496644037032643, + "grad_norm": 0.0025511719286441803, + "learning_rate": 5.438690412013865e-06, + "loss": 0.002, + "step": 163660 + }, + { + "epoch": 1.0497285405970502, + "grad_norm": 0.07018695771694183, + "learning_rate": 5.438132864344013e-06, + "loss": 0.0017, + "step": 163670 + }, + { + "epoch": 1.0497926774908364, + "grad_norm": 0.079002745449543, + "learning_rate": 5.4375753111840435e-06, + "loss": 0.0009, + "step": 163680 + }, + { + "epoch": 1.0498568143846225, + "grad_norm": 0.11101429909467697, + "learning_rate": 5.437017752540943e-06, + "loss": 0.0019, + "step": 163690 + }, + { + "epoch": 1.0499209512784087, + "grad_norm": 0.07598632574081421, + "learning_rate": 5.4364601884216974e-06, + "loss": 0.0013, + "step": 163700 + }, + { + "epoch": 1.0499850881721948, + "grad_norm": 0.12365309149026871, + "learning_rate": 5.435902618833293e-06, + "loss": 0.0034, + "step": 163710 + }, + { + "epoch": 1.0500492250659808, + "grad_norm": 0.05901951342821121, + "learning_rate": 5.435345043782717e-06, + "loss": 0.0013, + "step": 163720 + }, + { + "epoch": 1.050113361959767, + "grad_norm": 0.01743759587407112, + "learning_rate": 5.434787463276959e-06, + "loss": 0.0014, + "step": 163730 + }, + { + "epoch": 1.050177498853553, + "grad_norm": 0.13494150340557098, + "learning_rate": 5.434229877322999e-06, + "loss": 0.0023, + "step": 163740 + }, + { + "epoch": 1.0502416357473392, + "grad_norm": 0.09043806046247482, + "learning_rate": 5.433672285927831e-06, + "loss": 0.0016, + "step": 163750 + }, + { + "epoch": 1.0503057726411251, + "grad_norm": 0.03344087675213814, + "learning_rate": 5.433114689098439e-06, + "loss": 0.0014, + "step": 163760 + }, + { + "epoch": 1.0503699095349113, + "grad_norm": 0.023698963224887848, + "learning_rate": 5.432557086841811e-06, + "loss": 0.0024, + "step": 163770 + }, + { + "epoch": 1.0504340464286974, + "grad_norm": 0.22035405039787292, + "learning_rate": 5.4319994791649336e-06, + "loss": 0.0016, + "step": 163780 + }, + { + "epoch": 1.0504981833224836, + "grad_norm": 0.020499106496572495, + "learning_rate": 5.431441866074793e-06, + "loss": 0.001, + "step": 163790 + }, + { + "epoch": 1.0505623202162697, + "grad_norm": 0.03326013311743736, + "learning_rate": 5.430884247578379e-06, + "loss": 0.0016, + "step": 163800 + }, + { + "epoch": 1.0506264571100556, + "grad_norm": 0.24346397817134857, + "learning_rate": 5.4303266236826756e-06, + "loss": 0.0017, + "step": 163810 + }, + { + "epoch": 1.0506905940038418, + "grad_norm": 0.21626178920269012, + "learning_rate": 5.429768994394673e-06, + "loss": 0.0026, + "step": 163820 + }, + { + "epoch": 1.050754730897628, + "grad_norm": 0.03512268513441086, + "learning_rate": 5.429211359721357e-06, + "loss": 0.001, + "step": 163830 + }, + { + "epoch": 1.050818867791414, + "grad_norm": 0.09154361486434937, + "learning_rate": 5.4286537196697165e-06, + "loss": 0.0013, + "step": 163840 + }, + { + "epoch": 1.0508830046852, + "grad_norm": 0.2012699842453003, + "learning_rate": 5.428096074246738e-06, + "loss": 0.0014, + "step": 163850 + }, + { + "epoch": 1.0509471415789862, + "grad_norm": 0.07336778938770294, + "learning_rate": 5.427538423459411e-06, + "loss": 0.0015, + "step": 163860 + }, + { + "epoch": 1.0510112784727723, + "grad_norm": 0.10989232361316681, + "learning_rate": 5.426980767314721e-06, + "loss": 0.0013, + "step": 163870 + }, + { + "epoch": 1.0510754153665585, + "grad_norm": 0.007294870913028717, + "learning_rate": 5.426423105819658e-06, + "loss": 0.0019, + "step": 163880 + }, + { + "epoch": 1.0511395522603444, + "grad_norm": 0.13803347945213318, + "learning_rate": 5.425865438981207e-06, + "loss": 0.0016, + "step": 163890 + }, + { + "epoch": 1.0512036891541305, + "grad_norm": 0.0911358967423439, + "learning_rate": 5.425307766806359e-06, + "loss": 0.0015, + "step": 163900 + }, + { + "epoch": 1.0512678260479167, + "grad_norm": 0.18064068257808685, + "learning_rate": 5.4247500893021e-06, + "loss": 0.0019, + "step": 163910 + }, + { + "epoch": 1.0513319629417028, + "grad_norm": 0.1788107454776764, + "learning_rate": 5.4241924064754195e-06, + "loss": 0.0025, + "step": 163920 + }, + { + "epoch": 1.0513960998354888, + "grad_norm": 0.06165863946080208, + "learning_rate": 5.423634718333304e-06, + "loss": 0.002, + "step": 163930 + }, + { + "epoch": 1.051460236729275, + "grad_norm": 0.05214349180459976, + "learning_rate": 5.423077024882743e-06, + "loss": 0.0011, + "step": 163940 + }, + { + "epoch": 1.051524373623061, + "grad_norm": 0.10383394360542297, + "learning_rate": 5.422519326130725e-06, + "loss": 0.0027, + "step": 163950 + }, + { + "epoch": 1.0515885105168472, + "grad_norm": 0.1499389261007309, + "learning_rate": 5.421961622084239e-06, + "loss": 0.0026, + "step": 163960 + }, + { + "epoch": 1.0516526474106334, + "grad_norm": 0.17220918834209442, + "learning_rate": 5.42140391275027e-06, + "loss": 0.0012, + "step": 163970 + }, + { + "epoch": 1.0517167843044193, + "grad_norm": 0.058838699012994766, + "learning_rate": 5.420846198135808e-06, + "loss": 0.0022, + "step": 163980 + }, + { + "epoch": 1.0517809211982054, + "grad_norm": 0.011541483923792839, + "learning_rate": 5.420288478247846e-06, + "loss": 0.0028, + "step": 163990 + }, + { + "epoch": 1.0518450580919916, + "grad_norm": 0.13648605346679688, + "learning_rate": 5.419730753093366e-06, + "loss": 0.0037, + "step": 164000 + }, + { + "epoch": 1.0519091949857777, + "grad_norm": 0.12824368476867676, + "learning_rate": 5.419173022679361e-06, + "loss": 0.0023, + "step": 164010 + }, + { + "epoch": 1.0519733318795637, + "grad_norm": 0.01217710506170988, + "learning_rate": 5.418615287012818e-06, + "loss": 0.0018, + "step": 164020 + }, + { + "epoch": 1.0520374687733498, + "grad_norm": 0.05712335929274559, + "learning_rate": 5.418057546100726e-06, + "loss": 0.0011, + "step": 164030 + }, + { + "epoch": 1.052101605667136, + "grad_norm": 0.04380573332309723, + "learning_rate": 5.417499799950075e-06, + "loss": 0.0021, + "step": 164040 + }, + { + "epoch": 1.052165742560922, + "grad_norm": 0.015386135317385197, + "learning_rate": 5.416942048567853e-06, + "loss": 0.0016, + "step": 164050 + }, + { + "epoch": 1.052229879454708, + "grad_norm": 0.10284862667322159, + "learning_rate": 5.416384291961049e-06, + "loss": 0.0023, + "step": 164060 + }, + { + "epoch": 1.0522940163484942, + "grad_norm": 0.09124301373958588, + "learning_rate": 5.415826530136653e-06, + "loss": 0.0016, + "step": 164070 + }, + { + "epoch": 1.0523581532422803, + "grad_norm": 0.005526949185878038, + "learning_rate": 5.415268763101652e-06, + "loss": 0.0009, + "step": 164080 + }, + { + "epoch": 1.0524222901360665, + "grad_norm": 0.1582203507423401, + "learning_rate": 5.414710990863038e-06, + "loss": 0.002, + "step": 164090 + }, + { + "epoch": 1.0524864270298526, + "grad_norm": 0.046717043966054916, + "learning_rate": 5.4141532134277976e-06, + "loss": 0.0012, + "step": 164100 + }, + { + "epoch": 1.0525505639236385, + "grad_norm": 0.19061030447483063, + "learning_rate": 5.413595430802923e-06, + "loss": 0.0018, + "step": 164110 + }, + { + "epoch": 1.0526147008174247, + "grad_norm": 0.3718620836734772, + "learning_rate": 5.413037642995399e-06, + "loss": 0.0046, + "step": 164120 + }, + { + "epoch": 1.0526788377112108, + "grad_norm": 0.03796644136309624, + "learning_rate": 5.41247985001222e-06, + "loss": 0.0016, + "step": 164130 + }, + { + "epoch": 1.052742974604997, + "grad_norm": 0.14698080718517303, + "learning_rate": 5.411922051860373e-06, + "loss": 0.0022, + "step": 164140 + }, + { + "epoch": 1.052807111498783, + "grad_norm": 0.05480699986219406, + "learning_rate": 5.41136424854685e-06, + "loss": 0.0026, + "step": 164150 + }, + { + "epoch": 1.052871248392569, + "grad_norm": 0.11102596670389175, + "learning_rate": 5.410806440078637e-06, + "loss": 0.0017, + "step": 164160 + }, + { + "epoch": 1.0529353852863552, + "grad_norm": 0.057191140949726105, + "learning_rate": 5.4102486264627285e-06, + "loss": 0.0028, + "step": 164170 + }, + { + "epoch": 1.0529995221801414, + "grad_norm": 0.08213865011930466, + "learning_rate": 5.409690807706108e-06, + "loss": 0.0017, + "step": 164180 + }, + { + "epoch": 1.0530636590739273, + "grad_norm": 0.0631614625453949, + "learning_rate": 5.409132983815771e-06, + "loss": 0.0013, + "step": 164190 + }, + { + "epoch": 1.0531277959677134, + "grad_norm": 0.030576270073652267, + "learning_rate": 5.408575154798705e-06, + "loss": 0.0013, + "step": 164200 + }, + { + "epoch": 1.0531919328614996, + "grad_norm": 0.28509676456451416, + "learning_rate": 5.4080173206619e-06, + "loss": 0.0029, + "step": 164210 + }, + { + "epoch": 1.0532560697552857, + "grad_norm": 0.11434341222047806, + "learning_rate": 5.407459481412347e-06, + "loss": 0.0016, + "step": 164220 + }, + { + "epoch": 1.0533202066490719, + "grad_norm": 0.11937174946069717, + "learning_rate": 5.406901637057035e-06, + "loss": 0.0038, + "step": 164230 + }, + { + "epoch": 1.0533843435428578, + "grad_norm": 0.024643611162900925, + "learning_rate": 5.406343787602955e-06, + "loss": 0.0009, + "step": 164240 + }, + { + "epoch": 1.053448480436644, + "grad_norm": 0.06495968252420425, + "learning_rate": 5.405785933057097e-06, + "loss": 0.0022, + "step": 164250 + }, + { + "epoch": 1.0535126173304301, + "grad_norm": 0.05198515206575394, + "learning_rate": 5.4052280734264515e-06, + "loss": 0.0013, + "step": 164260 + }, + { + "epoch": 1.0535767542242163, + "grad_norm": 0.041042618453502655, + "learning_rate": 5.4046702087180066e-06, + "loss": 0.0028, + "step": 164270 + }, + { + "epoch": 1.0536408911180022, + "grad_norm": 0.05644712597131729, + "learning_rate": 5.4041123389387574e-06, + "loss": 0.0012, + "step": 164280 + }, + { + "epoch": 1.0537050280117883, + "grad_norm": 0.048884227871894836, + "learning_rate": 5.40355446409569e-06, + "loss": 0.0012, + "step": 164290 + }, + { + "epoch": 1.0537691649055745, + "grad_norm": 0.05040355026721954, + "learning_rate": 5.4029965841957985e-06, + "loss": 0.0023, + "step": 164300 + }, + { + "epoch": 1.0538333017993606, + "grad_norm": 0.06720659881830215, + "learning_rate": 5.40243869924607e-06, + "loss": 0.0013, + "step": 164310 + }, + { + "epoch": 1.0538974386931466, + "grad_norm": 0.12022145837545395, + "learning_rate": 5.401880809253498e-06, + "loss": 0.0013, + "step": 164320 + }, + { + "epoch": 1.0539615755869327, + "grad_norm": 0.0682666078209877, + "learning_rate": 5.401322914225071e-06, + "loss": 0.0016, + "step": 164330 + }, + { + "epoch": 1.0540257124807189, + "grad_norm": 0.07866264879703522, + "learning_rate": 5.400765014167782e-06, + "loss": 0.0016, + "step": 164340 + }, + { + "epoch": 1.054089849374505, + "grad_norm": 0.1084682047367096, + "learning_rate": 5.400207109088622e-06, + "loss": 0.0021, + "step": 164350 + }, + { + "epoch": 1.054153986268291, + "grad_norm": 0.05660034343600273, + "learning_rate": 5.399649198994578e-06, + "loss": 0.0025, + "step": 164360 + }, + { + "epoch": 1.054218123162077, + "grad_norm": 0.07491907477378845, + "learning_rate": 5.399091283892647e-06, + "loss": 0.0043, + "step": 164370 + }, + { + "epoch": 1.0542822600558632, + "grad_norm": 0.04296877607703209, + "learning_rate": 5.398533363789815e-06, + "loss": 0.0019, + "step": 164380 + }, + { + "epoch": 1.0543463969496494, + "grad_norm": 0.008898581378161907, + "learning_rate": 5.397975438693077e-06, + "loss": 0.0015, + "step": 164390 + }, + { + "epoch": 1.0544105338434355, + "grad_norm": 0.05675172060728073, + "learning_rate": 5.3974175086094205e-06, + "loss": 0.0022, + "step": 164400 + }, + { + "epoch": 1.0544746707372215, + "grad_norm": 0.023167546838521957, + "learning_rate": 5.39685957354584e-06, + "loss": 0.0018, + "step": 164410 + }, + { + "epoch": 1.0545388076310076, + "grad_norm": 0.03619055449962616, + "learning_rate": 5.396301633509324e-06, + "loss": 0.0015, + "step": 164420 + }, + { + "epoch": 1.0546029445247938, + "grad_norm": 0.0982537716627121, + "learning_rate": 5.395743688506867e-06, + "loss": 0.0014, + "step": 164430 + }, + { + "epoch": 1.05466708141858, + "grad_norm": 0.088741734623909, + "learning_rate": 5.395185738545456e-06, + "loss": 0.0019, + "step": 164440 + }, + { + "epoch": 1.0547312183123658, + "grad_norm": 0.0686255544424057, + "learning_rate": 5.394627783632088e-06, + "loss": 0.0025, + "step": 164450 + }, + { + "epoch": 1.054795355206152, + "grad_norm": 0.07357365638017654, + "learning_rate": 5.394069823773751e-06, + "loss": 0.0023, + "step": 164460 + }, + { + "epoch": 1.0548594920999381, + "grad_norm": 0.08306479454040527, + "learning_rate": 5.393511858977437e-06, + "loss": 0.0022, + "step": 164470 + }, + { + "epoch": 1.0549236289937243, + "grad_norm": 0.05139013007283211, + "learning_rate": 5.392953889250139e-06, + "loss": 0.0032, + "step": 164480 + }, + { + "epoch": 1.0549877658875102, + "grad_norm": 0.06120975688099861, + "learning_rate": 5.392395914598847e-06, + "loss": 0.0011, + "step": 164490 + }, + { + "epoch": 1.0550519027812963, + "grad_norm": 0.047991979867219925, + "learning_rate": 5.391837935030553e-06, + "loss": 0.0031, + "step": 164500 + }, + { + "epoch": 1.0551160396750825, + "grad_norm": 0.16589823365211487, + "learning_rate": 5.391279950552252e-06, + "loss": 0.0016, + "step": 164510 + }, + { + "epoch": 1.0551801765688686, + "grad_norm": 0.06756390631198883, + "learning_rate": 5.3907219611709315e-06, + "loss": 0.0013, + "step": 164520 + }, + { + "epoch": 1.0552443134626548, + "grad_norm": 0.040023867040872574, + "learning_rate": 5.390163966893587e-06, + "loss": 0.0054, + "step": 164530 + }, + { + "epoch": 1.0553084503564407, + "grad_norm": 0.1901106983423233, + "learning_rate": 5.389605967727208e-06, + "loss": 0.0009, + "step": 164540 + }, + { + "epoch": 1.0553725872502269, + "grad_norm": 0.1705317199230194, + "learning_rate": 5.389047963678788e-06, + "loss": 0.0031, + "step": 164550 + }, + { + "epoch": 1.055436724144013, + "grad_norm": 0.07654978334903717, + "learning_rate": 5.3884899547553195e-06, + "loss": 0.0013, + "step": 164560 + }, + { + "epoch": 1.0555008610377992, + "grad_norm": 0.04382341727614403, + "learning_rate": 5.387931940963793e-06, + "loss": 0.0007, + "step": 164570 + }, + { + "epoch": 1.055564997931585, + "grad_norm": 0.007866988889873028, + "learning_rate": 5.3873739223112045e-06, + "loss": 0.0087, + "step": 164580 + }, + { + "epoch": 1.0556291348253712, + "grad_norm": 0.17084579169750214, + "learning_rate": 5.386815898804541e-06, + "loss": 0.003, + "step": 164590 + }, + { + "epoch": 1.0556932717191574, + "grad_norm": 0.04956797882914543, + "learning_rate": 5.3862578704508005e-06, + "loss": 0.0008, + "step": 164600 + }, + { + "epoch": 1.0557574086129435, + "grad_norm": 0.10130944103002548, + "learning_rate": 5.385699837256971e-06, + "loss": 0.0014, + "step": 164610 + }, + { + "epoch": 1.0558215455067295, + "grad_norm": 0.039703045040369034, + "learning_rate": 5.385141799230048e-06, + "loss": 0.0015, + "step": 164620 + }, + { + "epoch": 1.0558856824005156, + "grad_norm": 0.03739073872566223, + "learning_rate": 5.384583756377022e-06, + "loss": 0.0011, + "step": 164630 + }, + { + "epoch": 1.0559498192943018, + "grad_norm": 0.1988016813993454, + "learning_rate": 5.384025708704887e-06, + "loss": 0.0008, + "step": 164640 + }, + { + "epoch": 1.056013956188088, + "grad_norm": 0.5328776836395264, + "learning_rate": 5.383467656220636e-06, + "loss": 0.002, + "step": 164650 + }, + { + "epoch": 1.056078093081874, + "grad_norm": 0.19334547221660614, + "learning_rate": 5.382909598931262e-06, + "loss": 0.0013, + "step": 164660 + }, + { + "epoch": 1.05614222997566, + "grad_norm": 0.1458764374256134, + "learning_rate": 5.382351536843755e-06, + "loss": 0.0016, + "step": 164670 + }, + { + "epoch": 1.0562063668694461, + "grad_norm": 0.05135856196284294, + "learning_rate": 5.38179346996511e-06, + "loss": 0.0016, + "step": 164680 + }, + { + "epoch": 1.0562705037632323, + "grad_norm": 0.004306500777602196, + "learning_rate": 5.381235398302321e-06, + "loss": 0.0016, + "step": 164690 + }, + { + "epoch": 1.0563346406570184, + "grad_norm": 0.09824187308549881, + "learning_rate": 5.38067732186238e-06, + "loss": 0.0019, + "step": 164700 + }, + { + "epoch": 1.0563987775508044, + "grad_norm": 0.10251548886299133, + "learning_rate": 5.3801192406522805e-06, + "loss": 0.0016, + "step": 164710 + }, + { + "epoch": 1.0564629144445905, + "grad_norm": 0.07564540952444077, + "learning_rate": 5.379561154679014e-06, + "loss": 0.0018, + "step": 164720 + }, + { + "epoch": 1.0565270513383767, + "grad_norm": 0.0791371539235115, + "learning_rate": 5.379003063949576e-06, + "loss": 0.0012, + "step": 164730 + }, + { + "epoch": 1.0565911882321628, + "grad_norm": 0.14272333681583405, + "learning_rate": 5.3784449684709585e-06, + "loss": 0.0023, + "step": 164740 + }, + { + "epoch": 1.0566553251259487, + "grad_norm": 0.22609438002109528, + "learning_rate": 5.377886868250155e-06, + "loss": 0.003, + "step": 164750 + }, + { + "epoch": 1.0567194620197349, + "grad_norm": 0.1458098441362381, + "learning_rate": 5.377328763294158e-06, + "loss": 0.0011, + "step": 164760 + }, + { + "epoch": 1.056783598913521, + "grad_norm": 0.10944253951311111, + "learning_rate": 5.376770653609964e-06, + "loss": 0.0022, + "step": 164770 + }, + { + "epoch": 1.0568477358073072, + "grad_norm": 0.07059153914451599, + "learning_rate": 5.376212539204563e-06, + "loss": 0.0032, + "step": 164780 + }, + { + "epoch": 1.056911872701093, + "grad_norm": 0.010585133917629719, + "learning_rate": 5.37565442008495e-06, + "loss": 0.0018, + "step": 164790 + }, + { + "epoch": 1.0569760095948793, + "grad_norm": 0.34683093428611755, + "learning_rate": 5.375096296258119e-06, + "loss": 0.0022, + "step": 164800 + }, + { + "epoch": 1.0570401464886654, + "grad_norm": 0.3620857894420624, + "learning_rate": 5.374538167731064e-06, + "loss": 0.0012, + "step": 164810 + }, + { + "epoch": 1.0571042833824515, + "grad_norm": 0.01606067828834057, + "learning_rate": 5.373980034510776e-06, + "loss": 0.0039, + "step": 164820 + }, + { + "epoch": 1.0571684202762377, + "grad_norm": 0.14603322744369507, + "learning_rate": 5.373421896604252e-06, + "loss": 0.0015, + "step": 164830 + }, + { + "epoch": 1.0572325571700236, + "grad_norm": 0.2772385776042938, + "learning_rate": 5.372863754018486e-06, + "loss": 0.0029, + "step": 164840 + }, + { + "epoch": 1.0572966940638098, + "grad_norm": 0.0029333140701055527, + "learning_rate": 5.372305606760468e-06, + "loss": 0.0012, + "step": 164850 + }, + { + "epoch": 1.057360830957596, + "grad_norm": 0.020547224208712578, + "learning_rate": 5.371747454837197e-06, + "loss": 0.0013, + "step": 164860 + }, + { + "epoch": 1.057424967851382, + "grad_norm": 0.010254314169287682, + "learning_rate": 5.371189298255664e-06, + "loss": 0.0013, + "step": 164870 + }, + { + "epoch": 1.057489104745168, + "grad_norm": 0.04542948305606842, + "learning_rate": 5.370631137022864e-06, + "loss": 0.0015, + "step": 164880 + }, + { + "epoch": 1.0575532416389541, + "grad_norm": 0.08468092978000641, + "learning_rate": 5.370072971145789e-06, + "loss": 0.0013, + "step": 164890 + }, + { + "epoch": 1.0576173785327403, + "grad_norm": 0.13973210752010345, + "learning_rate": 5.369514800631438e-06, + "loss": 0.002, + "step": 164900 + }, + { + "epoch": 1.0576815154265264, + "grad_norm": 0.056624799966812134, + "learning_rate": 5.368956625486801e-06, + "loss": 0.002, + "step": 164910 + }, + { + "epoch": 1.0577456523203124, + "grad_norm": 0.003999556880444288, + "learning_rate": 5.368398445718875e-06, + "loss": 0.0012, + "step": 164920 + }, + { + "epoch": 1.0578097892140985, + "grad_norm": 0.016761040315032005, + "learning_rate": 5.367840261334652e-06, + "loss": 0.0016, + "step": 164930 + }, + { + "epoch": 1.0578739261078847, + "grad_norm": 0.1763841211795807, + "learning_rate": 5.367282072341128e-06, + "loss": 0.0022, + "step": 164940 + }, + { + "epoch": 1.0579380630016708, + "grad_norm": 0.13624563813209534, + "learning_rate": 5.366723878745297e-06, + "loss": 0.0017, + "step": 164950 + }, + { + "epoch": 1.058002199895457, + "grad_norm": 0.06088218465447426, + "learning_rate": 5.366165680554154e-06, + "loss": 0.0032, + "step": 164960 + }, + { + "epoch": 1.058066336789243, + "grad_norm": 0.046900175511837006, + "learning_rate": 5.365607477774693e-06, + "loss": 0.0014, + "step": 164970 + }, + { + "epoch": 1.058130473683029, + "grad_norm": 0.1951567828655243, + "learning_rate": 5.36504927041391e-06, + "loss": 0.0028, + "step": 164980 + }, + { + "epoch": 1.0581946105768152, + "grad_norm": 0.11414501070976257, + "learning_rate": 5.364491058478797e-06, + "loss": 0.0015, + "step": 164990 + }, + { + "epoch": 1.0582587474706013, + "grad_norm": 0.13751763105392456, + "learning_rate": 5.3639328419763525e-06, + "loss": 0.0012, + "step": 165000 + }, + { + "epoch": 1.0583228843643873, + "grad_norm": 0.0704054906964302, + "learning_rate": 5.363374620913567e-06, + "loss": 0.0038, + "step": 165010 + }, + { + "epoch": 1.0583870212581734, + "grad_norm": 0.015720652416348457, + "learning_rate": 5.36281639529744e-06, + "loss": 0.0016, + "step": 165020 + }, + { + "epoch": 1.0584511581519596, + "grad_norm": 0.09596427530050278, + "learning_rate": 5.362258165134964e-06, + "loss": 0.0021, + "step": 165030 + }, + { + "epoch": 1.0585152950457457, + "grad_norm": 0.07461073249578476, + "learning_rate": 5.361699930433133e-06, + "loss": 0.0021, + "step": 165040 + }, + { + "epoch": 1.0585794319395316, + "grad_norm": 0.003062749747186899, + "learning_rate": 5.361141691198944e-06, + "loss": 0.0006, + "step": 165050 + }, + { + "epoch": 1.0586435688333178, + "grad_norm": 0.04284300282597542, + "learning_rate": 5.360583447439391e-06, + "loss": 0.0018, + "step": 165060 + }, + { + "epoch": 1.058707705727104, + "grad_norm": 0.4961910545825958, + "learning_rate": 5.3600251991614715e-06, + "loss": 0.0022, + "step": 165070 + }, + { + "epoch": 1.05877184262089, + "grad_norm": 0.04706254601478577, + "learning_rate": 5.359466946372177e-06, + "loss": 0.0013, + "step": 165080 + }, + { + "epoch": 1.0588359795146762, + "grad_norm": 0.07458434998989105, + "learning_rate": 5.358908689078506e-06, + "loss": 0.0017, + "step": 165090 + }, + { + "epoch": 1.0589001164084622, + "grad_norm": 0.11897841095924377, + "learning_rate": 5.358350427287452e-06, + "loss": 0.0021, + "step": 165100 + }, + { + "epoch": 1.0589642533022483, + "grad_norm": 0.09279097616672516, + "learning_rate": 5.357792161006011e-06, + "loss": 0.0019, + "step": 165110 + }, + { + "epoch": 1.0590283901960345, + "grad_norm": 0.35574957728385925, + "learning_rate": 5.357233890241178e-06, + "loss": 0.0023, + "step": 165120 + }, + { + "epoch": 1.0590925270898206, + "grad_norm": 0.021752241998910904, + "learning_rate": 5.356675614999951e-06, + "loss": 0.0019, + "step": 165130 + }, + { + "epoch": 1.0591566639836065, + "grad_norm": 0.10023855417966843, + "learning_rate": 5.356117335289321e-06, + "loss": 0.0034, + "step": 165140 + }, + { + "epoch": 1.0592208008773927, + "grad_norm": 0.07290735840797424, + "learning_rate": 5.355559051116289e-06, + "loss": 0.0017, + "step": 165150 + }, + { + "epoch": 1.0592849377711788, + "grad_norm": 0.04673566296696663, + "learning_rate": 5.3550007624878465e-06, + "loss": 0.0019, + "step": 165160 + }, + { + "epoch": 1.059349074664965, + "grad_norm": 0.06728404015302658, + "learning_rate": 5.354442469410991e-06, + "loss": 0.0016, + "step": 165170 + }, + { + "epoch": 1.059413211558751, + "grad_norm": 0.05923718586564064, + "learning_rate": 5.353884171892717e-06, + "loss": 0.0015, + "step": 165180 + }, + { + "epoch": 1.059477348452537, + "grad_norm": 0.037422552704811096, + "learning_rate": 5.3533258699400225e-06, + "loss": 0.0014, + "step": 165190 + }, + { + "epoch": 1.0595414853463232, + "grad_norm": 0.0014998482074588537, + "learning_rate": 5.352767563559902e-06, + "loss": 0.0022, + "step": 165200 + }, + { + "epoch": 1.0596056222401093, + "grad_norm": 0.03718739375472069, + "learning_rate": 5.352209252759352e-06, + "loss": 0.001, + "step": 165210 + }, + { + "epoch": 1.0596697591338953, + "grad_norm": 0.1297600120306015, + "learning_rate": 5.351650937545368e-06, + "loss": 0.0022, + "step": 165220 + }, + { + "epoch": 1.0597338960276814, + "grad_norm": 0.07515675574541092, + "learning_rate": 5.3510926179249465e-06, + "loss": 0.0026, + "step": 165230 + }, + { + "epoch": 1.0597980329214676, + "grad_norm": 0.02211691625416279, + "learning_rate": 5.350534293905084e-06, + "loss": 0.002, + "step": 165240 + }, + { + "epoch": 1.0598621698152537, + "grad_norm": 0.06225384399294853, + "learning_rate": 5.349975965492776e-06, + "loss": 0.0021, + "step": 165250 + }, + { + "epoch": 1.0599263067090399, + "grad_norm": 0.12936356663703918, + "learning_rate": 5.34941763269502e-06, + "loss": 0.0014, + "step": 165260 + }, + { + "epoch": 1.0599904436028258, + "grad_norm": 0.0758969858288765, + "learning_rate": 5.348859295518809e-06, + "loss": 0.0023, + "step": 165270 + }, + { + "epoch": 1.060054580496612, + "grad_norm": 0.14664317667484283, + "learning_rate": 5.348300953971144e-06, + "loss": 0.001, + "step": 165280 + }, + { + "epoch": 1.060118717390398, + "grad_norm": 0.03142325207591057, + "learning_rate": 5.347742608059017e-06, + "loss": 0.0016, + "step": 165290 + }, + { + "epoch": 1.0601828542841842, + "grad_norm": 0.01865263469517231, + "learning_rate": 5.347184257789428e-06, + "loss": 0.0033, + "step": 165300 + }, + { + "epoch": 1.0602469911779702, + "grad_norm": 0.12045206874608994, + "learning_rate": 5.346625903169372e-06, + "loss": 0.0024, + "step": 165310 + }, + { + "epoch": 1.0603111280717563, + "grad_norm": 0.15061624348163605, + "learning_rate": 5.346067544205846e-06, + "loss": 0.003, + "step": 165320 + }, + { + "epoch": 1.0603752649655425, + "grad_norm": 0.23420719802379608, + "learning_rate": 5.3455091809058456e-06, + "loss": 0.0021, + "step": 165330 + }, + { + "epoch": 1.0604394018593286, + "grad_norm": 0.10741454362869263, + "learning_rate": 5.34495081327637e-06, + "loss": 0.0013, + "step": 165340 + }, + { + "epoch": 1.0605035387531148, + "grad_norm": 0.03739853575825691, + "learning_rate": 5.344392441324412e-06, + "loss": 0.0006, + "step": 165350 + }, + { + "epoch": 1.0605676756469007, + "grad_norm": 0.25066447257995605, + "learning_rate": 5.343834065056972e-06, + "loss": 0.0024, + "step": 165360 + }, + { + "epoch": 1.0606318125406868, + "grad_norm": 0.08378434181213379, + "learning_rate": 5.343275684481044e-06, + "loss": 0.0011, + "step": 165370 + }, + { + "epoch": 1.060695949434473, + "grad_norm": 0.02976437658071518, + "learning_rate": 5.342717299603628e-06, + "loss": 0.0031, + "step": 165380 + }, + { + "epoch": 1.0607600863282591, + "grad_norm": 0.0805118978023529, + "learning_rate": 5.342158910431717e-06, + "loss": 0.0017, + "step": 165390 + }, + { + "epoch": 1.060824223222045, + "grad_norm": 0.05659005045890808, + "learning_rate": 5.341600516972312e-06, + "loss": 0.002, + "step": 165400 + }, + { + "epoch": 1.0608883601158312, + "grad_norm": 0.0376061350107193, + "learning_rate": 5.341042119232409e-06, + "loss": 0.0014, + "step": 165410 + }, + { + "epoch": 1.0609524970096174, + "grad_norm": 0.04787229374051094, + "learning_rate": 5.340483717219003e-06, + "loss": 0.0016, + "step": 165420 + }, + { + "epoch": 1.0610166339034035, + "grad_norm": 0.061529211699962616, + "learning_rate": 5.339925310939094e-06, + "loss": 0.0015, + "step": 165430 + }, + { + "epoch": 1.0610807707971894, + "grad_norm": 0.06910432875156403, + "learning_rate": 5.339366900399677e-06, + "loss": 0.001, + "step": 165440 + }, + { + "epoch": 1.0611449076909756, + "grad_norm": 0.041936878114938736, + "learning_rate": 5.33880848560775e-06, + "loss": 0.0019, + "step": 165450 + }, + { + "epoch": 1.0612090445847617, + "grad_norm": 0.021885309368371964, + "learning_rate": 5.338250066570311e-06, + "loss": 0.002, + "step": 165460 + }, + { + "epoch": 1.0612731814785479, + "grad_norm": 0.1264055222272873, + "learning_rate": 5.337691643294357e-06, + "loss": 0.002, + "step": 165470 + }, + { + "epoch": 1.0613373183723338, + "grad_norm": 0.12011872977018356, + "learning_rate": 5.337133215786885e-06, + "loss": 0.0015, + "step": 165480 + }, + { + "epoch": 1.06140145526612, + "grad_norm": 0.16393841803073883, + "learning_rate": 5.336574784054894e-06, + "loss": 0.0009, + "step": 165490 + }, + { + "epoch": 1.061465592159906, + "grad_norm": 0.0875626727938652, + "learning_rate": 5.336016348105379e-06, + "loss": 0.0014, + "step": 165500 + }, + { + "epoch": 1.0615297290536923, + "grad_norm": 0.10251889377832413, + "learning_rate": 5.33545790794534e-06, + "loss": 0.002, + "step": 165510 + }, + { + "epoch": 1.0615938659474784, + "grad_norm": 0.04495257884263992, + "learning_rate": 5.334899463581773e-06, + "loss": 0.0017, + "step": 165520 + }, + { + "epoch": 1.0616580028412643, + "grad_norm": 0.11217251420021057, + "learning_rate": 5.334341015021676e-06, + "loss": 0.0016, + "step": 165530 + }, + { + "epoch": 1.0617221397350505, + "grad_norm": 0.002483774209395051, + "learning_rate": 5.333782562272049e-06, + "loss": 0.0008, + "step": 165540 + }, + { + "epoch": 1.0617862766288366, + "grad_norm": 0.18159668147563934, + "learning_rate": 5.333224105339884e-06, + "loss": 0.0018, + "step": 165550 + }, + { + "epoch": 1.0618504135226228, + "grad_norm": 0.29747647047042847, + "learning_rate": 5.332665644232188e-06, + "loss": 0.0025, + "step": 165560 + }, + { + "epoch": 1.0619145504164087, + "grad_norm": 0.039432551711797714, + "learning_rate": 5.33210717895595e-06, + "loss": 0.0024, + "step": 165570 + }, + { + "epoch": 1.0619786873101948, + "grad_norm": 0.057367291301488876, + "learning_rate": 5.331548709518174e-06, + "loss": 0.0022, + "step": 165580 + }, + { + "epoch": 1.062042824203981, + "grad_norm": 0.11943091452121735, + "learning_rate": 5.330990235925853e-06, + "loss": 0.0011, + "step": 165590 + }, + { + "epoch": 1.0621069610977671, + "grad_norm": 0.006093861069530249, + "learning_rate": 5.330431758185991e-06, + "loss": 0.0015, + "step": 165600 + }, + { + "epoch": 1.062171097991553, + "grad_norm": 0.11781761795282364, + "learning_rate": 5.32987327630558e-06, + "loss": 0.0013, + "step": 165610 + }, + { + "epoch": 1.0622352348853392, + "grad_norm": 0.21625033020973206, + "learning_rate": 5.329314790291623e-06, + "loss": 0.0028, + "step": 165620 + }, + { + "epoch": 1.0622993717791254, + "grad_norm": 0.09720658510923386, + "learning_rate": 5.328756300151116e-06, + "loss": 0.0014, + "step": 165630 + }, + { + "epoch": 1.0623635086729115, + "grad_norm": 0.078163743019104, + "learning_rate": 5.328197805891058e-06, + "loss": 0.0033, + "step": 165640 + }, + { + "epoch": 1.0624276455666977, + "grad_norm": 0.09998316317796707, + "learning_rate": 5.327639307518444e-06, + "loss": 0.0024, + "step": 165650 + }, + { + "epoch": 1.0624917824604836, + "grad_norm": 0.14844781160354614, + "learning_rate": 5.327080805040278e-06, + "loss": 0.0019, + "step": 165660 + }, + { + "epoch": 1.0625559193542697, + "grad_norm": 0.09575474262237549, + "learning_rate": 5.326522298463556e-06, + "loss": 0.0029, + "step": 165670 + }, + { + "epoch": 1.0626200562480559, + "grad_norm": 0.06723390519618988, + "learning_rate": 5.325963787795275e-06, + "loss": 0.0008, + "step": 165680 + }, + { + "epoch": 1.062684193141842, + "grad_norm": 0.014396176673471928, + "learning_rate": 5.325405273042435e-06, + "loss": 0.0008, + "step": 165690 + }, + { + "epoch": 1.062748330035628, + "grad_norm": 0.05677991360425949, + "learning_rate": 5.3248467542120354e-06, + "loss": 0.0015, + "step": 165700 + }, + { + "epoch": 1.0628124669294141, + "grad_norm": 0.05875563621520996, + "learning_rate": 5.324288231311072e-06, + "loss": 0.0007, + "step": 165710 + }, + { + "epoch": 1.0628766038232003, + "grad_norm": 0.26707419753074646, + "learning_rate": 5.323729704346547e-06, + "loss": 0.0022, + "step": 165720 + }, + { + "epoch": 1.0629407407169864, + "grad_norm": 0.06900002807378769, + "learning_rate": 5.323171173325457e-06, + "loss": 0.0014, + "step": 165730 + }, + { + "epoch": 1.0630048776107723, + "grad_norm": 0.0487680621445179, + "learning_rate": 5.3226126382548e-06, + "loss": 0.0018, + "step": 165740 + }, + { + "epoch": 1.0630690145045585, + "grad_norm": 0.08753561228513718, + "learning_rate": 5.322054099141578e-06, + "loss": 0.0033, + "step": 165750 + }, + { + "epoch": 1.0631331513983446, + "grad_norm": 0.04199390485882759, + "learning_rate": 5.321495555992787e-06, + "loss": 0.0016, + "step": 165760 + }, + { + "epoch": 1.0631972882921308, + "grad_norm": 0.17838846147060394, + "learning_rate": 5.320937008815427e-06, + "loss": 0.0015, + "step": 165770 + }, + { + "epoch": 1.063261425185917, + "grad_norm": 0.09121342748403549, + "learning_rate": 5.320378457616498e-06, + "loss": 0.0007, + "step": 165780 + }, + { + "epoch": 1.0633255620797029, + "grad_norm": 0.10484384745359421, + "learning_rate": 5.319819902402996e-06, + "loss": 0.0013, + "step": 165790 + }, + { + "epoch": 1.063389698973489, + "grad_norm": 0.1039414256811142, + "learning_rate": 5.319261343181923e-06, + "loss": 0.0019, + "step": 165800 + }, + { + "epoch": 1.0634538358672752, + "grad_norm": 0.05744036287069321, + "learning_rate": 5.3187027799602775e-06, + "loss": 0.0027, + "step": 165810 + }, + { + "epoch": 1.0635179727610613, + "grad_norm": 0.018517503514885902, + "learning_rate": 5.318144212745058e-06, + "loss": 0.002, + "step": 165820 + }, + { + "epoch": 1.0635821096548472, + "grad_norm": 0.12544725835323334, + "learning_rate": 5.3175856415432645e-06, + "loss": 0.0026, + "step": 165830 + }, + { + "epoch": 1.0636462465486334, + "grad_norm": 0.07395651936531067, + "learning_rate": 5.317027066361895e-06, + "loss": 0.0021, + "step": 165840 + }, + { + "epoch": 1.0637103834424195, + "grad_norm": 0.07024452090263367, + "learning_rate": 5.316468487207951e-06, + "loss": 0.0034, + "step": 165850 + }, + { + "epoch": 1.0637745203362057, + "grad_norm": 0.10968262702226639, + "learning_rate": 5.31590990408843e-06, + "loss": 0.002, + "step": 165860 + }, + { + "epoch": 1.0638386572299916, + "grad_norm": 0.11000864207744598, + "learning_rate": 5.315351317010332e-06, + "loss": 0.002, + "step": 165870 + }, + { + "epoch": 1.0639027941237778, + "grad_norm": 0.015025916509330273, + "learning_rate": 5.314792725980657e-06, + "loss": 0.0012, + "step": 165880 + }, + { + "epoch": 1.063966931017564, + "grad_norm": 0.0507902167737484, + "learning_rate": 5.314234131006403e-06, + "loss": 0.0024, + "step": 165890 + }, + { + "epoch": 1.06403106791135, + "grad_norm": 0.09992703795433044, + "learning_rate": 5.313675532094572e-06, + "loss": 0.0015, + "step": 165900 + }, + { + "epoch": 1.064095204805136, + "grad_norm": 0.07698260992765427, + "learning_rate": 5.3131169292521625e-06, + "loss": 0.0016, + "step": 165910 + }, + { + "epoch": 1.0641593416989221, + "grad_norm": 0.04310933127999306, + "learning_rate": 5.312558322486174e-06, + "loss": 0.0011, + "step": 165920 + }, + { + "epoch": 1.0642234785927083, + "grad_norm": 0.22994256019592285, + "learning_rate": 5.311999711803605e-06, + "loss": 0.0014, + "step": 165930 + }, + { + "epoch": 1.0642876154864944, + "grad_norm": 0.15693657100200653, + "learning_rate": 5.311441097211458e-06, + "loss": 0.0013, + "step": 165940 + }, + { + "epoch": 1.0643517523802806, + "grad_norm": 0.07755468785762787, + "learning_rate": 5.310882478716731e-06, + "loss": 0.0014, + "step": 165950 + }, + { + "epoch": 1.0644158892740665, + "grad_norm": 0.05102725327014923, + "learning_rate": 5.310323856326426e-06, + "loss": 0.0022, + "step": 165960 + }, + { + "epoch": 1.0644800261678526, + "grad_norm": 0.39134690165519714, + "learning_rate": 5.309765230047539e-06, + "loss": 0.0016, + "step": 165970 + }, + { + "epoch": 1.0645441630616388, + "grad_norm": 0.060010071843862534, + "learning_rate": 5.309206599887074e-06, + "loss": 0.0017, + "step": 165980 + }, + { + "epoch": 1.064608299955425, + "grad_norm": 0.03553006052970886, + "learning_rate": 5.308647965852028e-06, + "loss": 0.0022, + "step": 165990 + }, + { + "epoch": 1.0646724368492109, + "grad_norm": 0.05214393138885498, + "learning_rate": 5.308089327949403e-06, + "loss": 0.0019, + "step": 166000 + }, + { + "epoch": 1.064736573742997, + "grad_norm": 0.05157877504825592, + "learning_rate": 5.3075306861861975e-06, + "loss": 0.0015, + "step": 166010 + }, + { + "epoch": 1.0648007106367832, + "grad_norm": 0.14204972982406616, + "learning_rate": 5.306972040569415e-06, + "loss": 0.003, + "step": 166020 + }, + { + "epoch": 1.0648648475305693, + "grad_norm": 0.05269039794802666, + "learning_rate": 5.306413391106051e-06, + "loss": 0.0015, + "step": 166030 + }, + { + "epoch": 1.0649289844243552, + "grad_norm": 0.08166810870170593, + "learning_rate": 5.305854737803109e-06, + "loss": 0.0012, + "step": 166040 + }, + { + "epoch": 1.0649931213181414, + "grad_norm": 0.14019718766212463, + "learning_rate": 5.305296080667588e-06, + "loss": 0.0009, + "step": 166050 + }, + { + "epoch": 1.0650572582119275, + "grad_norm": 0.12255555391311646, + "learning_rate": 5.30473741970649e-06, + "loss": 0.0011, + "step": 166060 + }, + { + "epoch": 1.0651213951057137, + "grad_norm": 0.06505221873521805, + "learning_rate": 5.304178754926813e-06, + "loss": 0.0023, + "step": 166070 + }, + { + "epoch": 1.0651855319994998, + "grad_norm": 0.00911052618175745, + "learning_rate": 5.303620086335561e-06, + "loss": 0.0025, + "step": 166080 + }, + { + "epoch": 1.0652496688932858, + "grad_norm": 0.12398932874202728, + "learning_rate": 5.30306141393973e-06, + "loss": 0.0032, + "step": 166090 + }, + { + "epoch": 1.065313805787072, + "grad_norm": 0.14058513939380646, + "learning_rate": 5.302502737746325e-06, + "loss": 0.0012, + "step": 166100 + }, + { + "epoch": 1.065377942680858, + "grad_norm": 0.15345489978790283, + "learning_rate": 5.301944057762344e-06, + "loss": 0.0027, + "step": 166110 + }, + { + "epoch": 1.0654420795746442, + "grad_norm": 0.07064566761255264, + "learning_rate": 5.301385373994786e-06, + "loss": 0.0019, + "step": 166120 + }, + { + "epoch": 1.0655062164684301, + "grad_norm": 0.1936577558517456, + "learning_rate": 5.300826686450656e-06, + "loss": 0.0024, + "step": 166130 + }, + { + "epoch": 1.0655703533622163, + "grad_norm": 0.1288345903158188, + "learning_rate": 5.300267995136953e-06, + "loss": 0.0016, + "step": 166140 + }, + { + "epoch": 1.0656344902560024, + "grad_norm": 0.19684065878391266, + "learning_rate": 5.299709300060677e-06, + "loss": 0.002, + "step": 166150 + }, + { + "epoch": 1.0656986271497886, + "grad_norm": 0.007303610909730196, + "learning_rate": 5.299150601228828e-06, + "loss": 0.0039, + "step": 166160 + }, + { + "epoch": 1.0657627640435745, + "grad_norm": 0.08438844233751297, + "learning_rate": 5.29859189864841e-06, + "loss": 0.0015, + "step": 166170 + }, + { + "epoch": 1.0658269009373607, + "grad_norm": 0.09508020430803299, + "learning_rate": 5.29803319232642e-06, + "loss": 0.0016, + "step": 166180 + }, + { + "epoch": 1.0658910378311468, + "grad_norm": 0.016180196776986122, + "learning_rate": 5.297474482269863e-06, + "loss": 0.0014, + "step": 166190 + }, + { + "epoch": 1.065955174724933, + "grad_norm": 0.2816241681575775, + "learning_rate": 5.2969157684857375e-06, + "loss": 0.0031, + "step": 166200 + }, + { + "epoch": 1.066019311618719, + "grad_norm": 0.0024928152561187744, + "learning_rate": 5.296357050981046e-06, + "loss": 0.0031, + "step": 166210 + }, + { + "epoch": 1.066083448512505, + "grad_norm": 0.06070615351200104, + "learning_rate": 5.2957983297627866e-06, + "loss": 0.0022, + "step": 166220 + }, + { + "epoch": 1.0661475854062912, + "grad_norm": 0.021533237770199776, + "learning_rate": 5.295239604837965e-06, + "loss": 0.001, + "step": 166230 + }, + { + "epoch": 1.0662117223000773, + "grad_norm": 0.43801963329315186, + "learning_rate": 5.2946808762135785e-06, + "loss": 0.0025, + "step": 166240 + }, + { + "epoch": 1.0662758591938635, + "grad_norm": 0.10720543563365936, + "learning_rate": 5.29412214389663e-06, + "loss": 0.002, + "step": 166250 + }, + { + "epoch": 1.0663399960876494, + "grad_norm": 0.03996255621314049, + "learning_rate": 5.293563407894122e-06, + "loss": 0.0023, + "step": 166260 + }, + { + "epoch": 1.0664041329814355, + "grad_norm": 0.10009384155273438, + "learning_rate": 5.293004668213055e-06, + "loss": 0.0028, + "step": 166270 + }, + { + "epoch": 1.0664682698752217, + "grad_norm": 0.16202972829341888, + "learning_rate": 5.2924459248604284e-06, + "loss": 0.0013, + "step": 166280 + }, + { + "epoch": 1.0665324067690078, + "grad_norm": 0.09987245500087738, + "learning_rate": 5.291887177843247e-06, + "loss": 0.0026, + "step": 166290 + }, + { + "epoch": 1.0665965436627938, + "grad_norm": 0.14244654774665833, + "learning_rate": 5.291328427168511e-06, + "loss": 0.0021, + "step": 166300 + }, + { + "epoch": 1.06666068055658, + "grad_norm": 0.1552550196647644, + "learning_rate": 5.290769672843219e-06, + "loss": 0.0019, + "step": 166310 + }, + { + "epoch": 1.066724817450366, + "grad_norm": 0.06859882175922394, + "learning_rate": 5.290210914874377e-06, + "loss": 0.0013, + "step": 166320 + }, + { + "epoch": 1.0667889543441522, + "grad_norm": 0.01697307638823986, + "learning_rate": 5.2896521532689845e-06, + "loss": 0.0023, + "step": 166330 + }, + { + "epoch": 1.0668530912379381, + "grad_norm": 0.12605133652687073, + "learning_rate": 5.289093388034045e-06, + "loss": 0.0013, + "step": 166340 + }, + { + "epoch": 1.0669172281317243, + "grad_norm": 0.04196297377347946, + "learning_rate": 5.288534619176556e-06, + "loss": 0.0017, + "step": 166350 + }, + { + "epoch": 1.0669813650255104, + "grad_norm": 0.04504567012190819, + "learning_rate": 5.287975846703525e-06, + "loss": 0.0013, + "step": 166360 + }, + { + "epoch": 1.0670455019192966, + "grad_norm": 0.059784576296806335, + "learning_rate": 5.287417070621947e-06, + "loss": 0.0019, + "step": 166370 + }, + { + "epoch": 1.0671096388130827, + "grad_norm": 0.026613537222146988, + "learning_rate": 5.2868582909388296e-06, + "loss": 0.0012, + "step": 166380 + }, + { + "epoch": 1.0671737757068687, + "grad_norm": 0.25359833240509033, + "learning_rate": 5.286299507661174e-06, + "loss": 0.0017, + "step": 166390 + }, + { + "epoch": 1.0672379126006548, + "grad_norm": 0.0881662517786026, + "learning_rate": 5.28574072079598e-06, + "loss": 0.0023, + "step": 166400 + }, + { + "epoch": 1.067302049494441, + "grad_norm": 0.05822227522730827, + "learning_rate": 5.285181930350251e-06, + "loss": 0.0012, + "step": 166410 + }, + { + "epoch": 1.0673661863882271, + "grad_norm": 0.05762477219104767, + "learning_rate": 5.284623136330988e-06, + "loss": 0.0015, + "step": 166420 + }, + { + "epoch": 1.067430323282013, + "grad_norm": 0.20448440313339233, + "learning_rate": 5.2840643387451924e-06, + "loss": 0.0024, + "step": 166430 + }, + { + "epoch": 1.0674944601757992, + "grad_norm": 0.1820959895849228, + "learning_rate": 5.28350553759987e-06, + "loss": 0.0025, + "step": 166440 + }, + { + "epoch": 1.0675585970695853, + "grad_norm": 0.14897200465202332, + "learning_rate": 5.282946732902019e-06, + "loss": 0.0033, + "step": 166450 + }, + { + "epoch": 1.0676227339633715, + "grad_norm": 0.013246786780655384, + "learning_rate": 5.282387924658644e-06, + "loss": 0.0024, + "step": 166460 + }, + { + "epoch": 1.0676868708571576, + "grad_norm": 0.017587296664714813, + "learning_rate": 5.2818291128767466e-06, + "loss": 0.0014, + "step": 166470 + }, + { + "epoch": 1.0677510077509436, + "grad_norm": 0.03909731283783913, + "learning_rate": 5.281270297563329e-06, + "loss": 0.0017, + "step": 166480 + }, + { + "epoch": 1.0678151446447297, + "grad_norm": 0.03878255560994148, + "learning_rate": 5.2807114787253935e-06, + "loss": 0.0017, + "step": 166490 + }, + { + "epoch": 1.0678792815385159, + "grad_norm": 0.04676719754934311, + "learning_rate": 5.280152656369941e-06, + "loss": 0.0013, + "step": 166500 + }, + { + "epoch": 1.067943418432302, + "grad_norm": 0.038249071687459946, + "learning_rate": 5.279593830503979e-06, + "loss": 0.002, + "step": 166510 + }, + { + "epoch": 1.068007555326088, + "grad_norm": 0.053986068814992905, + "learning_rate": 5.279035001134503e-06, + "loss": 0.0016, + "step": 166520 + }, + { + "epoch": 1.068071692219874, + "grad_norm": 0.11400827020406723, + "learning_rate": 5.278476168268522e-06, + "loss": 0.0012, + "step": 166530 + }, + { + "epoch": 1.0681358291136602, + "grad_norm": 0.03897121176123619, + "learning_rate": 5.277917331913033e-06, + "loss": 0.001, + "step": 166540 + }, + { + "epoch": 1.0681999660074464, + "grad_norm": 0.1743035912513733, + "learning_rate": 5.277358492075042e-06, + "loss": 0.0016, + "step": 166550 + }, + { + "epoch": 1.0682641029012323, + "grad_norm": 0.04287755861878395, + "learning_rate": 5.27679964876155e-06, + "loss": 0.0009, + "step": 166560 + }, + { + "epoch": 1.0683282397950185, + "grad_norm": 0.1285584568977356, + "learning_rate": 5.276240801979563e-06, + "loss": 0.005, + "step": 166570 + }, + { + "epoch": 1.0683923766888046, + "grad_norm": 0.021582355722784996, + "learning_rate": 5.275681951736079e-06, + "loss": 0.0016, + "step": 166580 + }, + { + "epoch": 1.0684565135825908, + "grad_norm": 0.2306017279624939, + "learning_rate": 5.275123098038104e-06, + "loss": 0.0027, + "step": 166590 + }, + { + "epoch": 1.0685206504763767, + "grad_norm": 0.05208316445350647, + "learning_rate": 5.27456424089264e-06, + "loss": 0.0009, + "step": 166600 + }, + { + "epoch": 1.0685847873701628, + "grad_norm": 0.3376952111721039, + "learning_rate": 5.274005380306689e-06, + "loss": 0.0029, + "step": 166610 + }, + { + "epoch": 1.068648924263949, + "grad_norm": 0.058090854436159134, + "learning_rate": 5.273446516287258e-06, + "loss": 0.0019, + "step": 166620 + }, + { + "epoch": 1.0687130611577351, + "grad_norm": 0.054444171488285065, + "learning_rate": 5.272887648841343e-06, + "loss": 0.0022, + "step": 166630 + }, + { + "epoch": 1.0687771980515213, + "grad_norm": 0.07919225096702576, + "learning_rate": 5.272328777975952e-06, + "loss": 0.0015, + "step": 166640 + }, + { + "epoch": 1.0688413349453072, + "grad_norm": 0.05821329727768898, + "learning_rate": 5.271769903698086e-06, + "loss": 0.0018, + "step": 166650 + }, + { + "epoch": 1.0689054718390933, + "grad_norm": 0.04042275249958038, + "learning_rate": 5.271211026014751e-06, + "loss": 0.0012, + "step": 166660 + }, + { + "epoch": 1.0689696087328795, + "grad_norm": 0.12543560564517975, + "learning_rate": 5.270652144932946e-06, + "loss": 0.0083, + "step": 166670 + }, + { + "epoch": 1.0690337456266656, + "grad_norm": 0.22743694484233856, + "learning_rate": 5.2700932604596774e-06, + "loss": 0.0039, + "step": 166680 + }, + { + "epoch": 1.0690978825204516, + "grad_norm": 0.02137444168329239, + "learning_rate": 5.269534372601946e-06, + "loss": 0.0036, + "step": 166690 + }, + { + "epoch": 1.0691620194142377, + "grad_norm": 0.018823714926838875, + "learning_rate": 5.268975481366758e-06, + "loss": 0.001, + "step": 166700 + }, + { + "epoch": 1.0692261563080239, + "grad_norm": 0.19334377348423004, + "learning_rate": 5.268416586761113e-06, + "loss": 0.0023, + "step": 166710 + }, + { + "epoch": 1.06929029320181, + "grad_norm": 0.052227046340703964, + "learning_rate": 5.267857688792019e-06, + "loss": 0.0014, + "step": 166720 + }, + { + "epoch": 1.069354430095596, + "grad_norm": 0.1632692813873291, + "learning_rate": 5.267298787466474e-06, + "loss": 0.0024, + "step": 166730 + }, + { + "epoch": 1.069418566989382, + "grad_norm": 0.08603489398956299, + "learning_rate": 5.266739882791485e-06, + "loss": 0.0012, + "step": 166740 + }, + { + "epoch": 1.0694827038831682, + "grad_norm": 0.2116672843694687, + "learning_rate": 5.2661809747740554e-06, + "loss": 0.0018, + "step": 166750 + }, + { + "epoch": 1.0695468407769544, + "grad_norm": 0.13084769248962402, + "learning_rate": 5.265622063421186e-06, + "loss": 0.001, + "step": 166760 + }, + { + "epoch": 1.0696109776707403, + "grad_norm": 0.057312317192554474, + "learning_rate": 5.265063148739885e-06, + "loss": 0.0019, + "step": 166770 + }, + { + "epoch": 1.0696751145645265, + "grad_norm": 0.11602775007486343, + "learning_rate": 5.26450423073715e-06, + "loss": 0.0014, + "step": 166780 + }, + { + "epoch": 1.0697392514583126, + "grad_norm": 0.04075274243950844, + "learning_rate": 5.2639453094199924e-06, + "loss": 0.0023, + "step": 166790 + }, + { + "epoch": 1.0698033883520988, + "grad_norm": 0.12920819222927094, + "learning_rate": 5.263386384795407e-06, + "loss": 0.0022, + "step": 166800 + }, + { + "epoch": 1.069867525245885, + "grad_norm": 0.2572612464427948, + "learning_rate": 5.262827456870404e-06, + "loss": 0.0013, + "step": 166810 + }, + { + "epoch": 1.0699316621396708, + "grad_norm": 0.14153194427490234, + "learning_rate": 5.262268525651985e-06, + "loss": 0.0016, + "step": 166820 + }, + { + "epoch": 1.069995799033457, + "grad_norm": 0.0538192056119442, + "learning_rate": 5.261709591147154e-06, + "loss": 0.001, + "step": 166830 + }, + { + "epoch": 1.0700599359272431, + "grad_norm": 0.09090349823236465, + "learning_rate": 5.261150653362914e-06, + "loss": 0.0036, + "step": 166840 + }, + { + "epoch": 1.0701240728210293, + "grad_norm": 0.08936433494091034, + "learning_rate": 5.260591712306271e-06, + "loss": 0.0014, + "step": 166850 + }, + { + "epoch": 1.0701882097148152, + "grad_norm": 0.1939397007226944, + "learning_rate": 5.2600327679842264e-06, + "loss": 0.0014, + "step": 166860 + }, + { + "epoch": 1.0702523466086014, + "grad_norm": 0.058595966547727585, + "learning_rate": 5.259473820403788e-06, + "loss": 0.0016, + "step": 166870 + }, + { + "epoch": 1.0703164835023875, + "grad_norm": 0.14676587283611298, + "learning_rate": 5.258914869571954e-06, + "loss": 0.0022, + "step": 166880 + }, + { + "epoch": 1.0703806203961737, + "grad_norm": 0.18484529852867126, + "learning_rate": 5.2583559154957334e-06, + "loss": 0.0018, + "step": 166890 + }, + { + "epoch": 1.0704447572899598, + "grad_norm": 0.10423953086137772, + "learning_rate": 5.257796958182127e-06, + "loss": 0.0013, + "step": 166900 + }, + { + "epoch": 1.0705088941837457, + "grad_norm": 0.07524426281452179, + "learning_rate": 5.257237997638143e-06, + "loss": 0.0014, + "step": 166910 + }, + { + "epoch": 1.0705730310775319, + "grad_norm": 0.03524652123451233, + "learning_rate": 5.256679033870781e-06, + "loss": 0.0024, + "step": 166920 + }, + { + "epoch": 1.070637167971318, + "grad_norm": 0.14464613795280457, + "learning_rate": 5.256120066887049e-06, + "loss": 0.002, + "step": 166930 + }, + { + "epoch": 1.0707013048651042, + "grad_norm": 0.15725456178188324, + "learning_rate": 5.255561096693947e-06, + "loss": 0.0014, + "step": 166940 + }, + { + "epoch": 1.07076544175889, + "grad_norm": 0.0924675464630127, + "learning_rate": 5.255002123298482e-06, + "loss": 0.0018, + "step": 166950 + }, + { + "epoch": 1.0708295786526763, + "grad_norm": 0.13643372058868408, + "learning_rate": 5.254443146707661e-06, + "loss": 0.0012, + "step": 166960 + }, + { + "epoch": 1.0708937155464624, + "grad_norm": 0.027466630563139915, + "learning_rate": 5.253884166928482e-06, + "loss": 0.0017, + "step": 166970 + }, + { + "epoch": 1.0709578524402485, + "grad_norm": 0.062290776520967484, + "learning_rate": 5.253325183967956e-06, + "loss": 0.0013, + "step": 166980 + }, + { + "epoch": 1.0710219893340345, + "grad_norm": 0.1609766185283661, + "learning_rate": 5.252766197833082e-06, + "loss": 0.0019, + "step": 166990 + }, + { + "epoch": 1.0710861262278206, + "grad_norm": 0.01689579151570797, + "learning_rate": 5.252207208530869e-06, + "loss": 0.0015, + "step": 167000 + }, + { + "epoch": 1.0711502631216068, + "grad_norm": 0.015078687109053135, + "learning_rate": 5.251648216068318e-06, + "loss": 0.0024, + "step": 167010 + }, + { + "epoch": 1.071214400015393, + "grad_norm": 0.11737733334302902, + "learning_rate": 5.251089220452436e-06, + "loss": 0.002, + "step": 167020 + }, + { + "epoch": 1.0712785369091788, + "grad_norm": 0.1474132090806961, + "learning_rate": 5.250530221690225e-06, + "loss": 0.0021, + "step": 167030 + }, + { + "epoch": 1.071342673802965, + "grad_norm": 0.12184314429759979, + "learning_rate": 5.249971219788693e-06, + "loss": 0.0019, + "step": 167040 + }, + { + "epoch": 1.0714068106967511, + "grad_norm": 0.04866328462958336, + "learning_rate": 5.2494122147548424e-06, + "loss": 0.002, + "step": 167050 + }, + { + "epoch": 1.0714709475905373, + "grad_norm": 0.07479048520326614, + "learning_rate": 5.248853206595679e-06, + "loss": 0.0029, + "step": 167060 + }, + { + "epoch": 1.0715350844843234, + "grad_norm": 0.21589982509613037, + "learning_rate": 5.248294195318205e-06, + "loss": 0.0018, + "step": 167070 + }, + { + "epoch": 1.0715992213781094, + "grad_norm": 0.02624637447297573, + "learning_rate": 5.24773518092943e-06, + "loss": 0.0019, + "step": 167080 + }, + { + "epoch": 1.0716633582718955, + "grad_norm": 0.0176876001060009, + "learning_rate": 5.247176163436354e-06, + "loss": 0.0013, + "step": 167090 + }, + { + "epoch": 1.0717274951656817, + "grad_norm": 0.0757228434085846, + "learning_rate": 5.246617142845985e-06, + "loss": 0.0016, + "step": 167100 + }, + { + "epoch": 1.0717916320594678, + "grad_norm": 0.12094847112894058, + "learning_rate": 5.246058119165327e-06, + "loss": 0.0014, + "step": 167110 + }, + { + "epoch": 1.0718557689532537, + "grad_norm": 0.02124212682247162, + "learning_rate": 5.245499092401384e-06, + "loss": 0.0031, + "step": 167120 + }, + { + "epoch": 1.0719199058470399, + "grad_norm": 0.07685308158397675, + "learning_rate": 5.244940062561164e-06, + "loss": 0.0024, + "step": 167130 + }, + { + "epoch": 1.071984042740826, + "grad_norm": 0.06877698004245758, + "learning_rate": 5.244381029651667e-06, + "loss": 0.0014, + "step": 167140 + }, + { + "epoch": 1.0720481796346122, + "grad_norm": 0.0037930766120553017, + "learning_rate": 5.243821993679904e-06, + "loss": 0.0026, + "step": 167150 + }, + { + "epoch": 1.0721123165283981, + "grad_norm": 0.08032867312431335, + "learning_rate": 5.243262954652875e-06, + "loss": 0.0014, + "step": 167160 + }, + { + "epoch": 1.0721764534221843, + "grad_norm": 0.11195434629917145, + "learning_rate": 5.242703912577588e-06, + "loss": 0.0017, + "step": 167170 + }, + { + "epoch": 1.0722405903159704, + "grad_norm": 0.07904714345932007, + "learning_rate": 5.242144867461047e-06, + "loss": 0.0012, + "step": 167180 + }, + { + "epoch": 1.0723047272097566, + "grad_norm": 0.13505138456821442, + "learning_rate": 5.241585819310258e-06, + "loss": 0.0027, + "step": 167190 + }, + { + "epoch": 1.0723688641035425, + "grad_norm": 0.12018563598394394, + "learning_rate": 5.241026768132227e-06, + "loss": 0.0014, + "step": 167200 + }, + { + "epoch": 1.0724330009973286, + "grad_norm": 0.014983379282057285, + "learning_rate": 5.240467713933957e-06, + "loss": 0.0015, + "step": 167210 + }, + { + "epoch": 1.0724971378911148, + "grad_norm": 0.12585660815238953, + "learning_rate": 5.239908656722455e-06, + "loss": 0.0019, + "step": 167220 + }, + { + "epoch": 1.072561274784901, + "grad_norm": 0.13963164389133453, + "learning_rate": 5.239349596504728e-06, + "loss": 0.0024, + "step": 167230 + }, + { + "epoch": 1.072625411678687, + "grad_norm": 0.1950705200433731, + "learning_rate": 5.238790533287778e-06, + "loss": 0.001, + "step": 167240 + }, + { + "epoch": 1.072689548572473, + "grad_norm": 0.1334400475025177, + "learning_rate": 5.2382314670786114e-06, + "loss": 0.0011, + "step": 167250 + }, + { + "epoch": 1.0727536854662592, + "grad_norm": 0.15896040201187134, + "learning_rate": 5.237672397884235e-06, + "loss": 0.0014, + "step": 167260 + }, + { + "epoch": 1.0728178223600453, + "grad_norm": 0.03526372089982033, + "learning_rate": 5.2371133257116535e-06, + "loss": 0.0018, + "step": 167270 + }, + { + "epoch": 1.0728819592538315, + "grad_norm": 0.06044013053178787, + "learning_rate": 5.236554250567873e-06, + "loss": 0.0023, + "step": 167280 + }, + { + "epoch": 1.0729460961476174, + "grad_norm": 0.11396222561597824, + "learning_rate": 5.235995172459898e-06, + "loss": 0.0019, + "step": 167290 + }, + { + "epoch": 1.0730102330414035, + "grad_norm": 0.03265209496021271, + "learning_rate": 5.235436091394735e-06, + "loss": 0.0029, + "step": 167300 + }, + { + "epoch": 1.0730743699351897, + "grad_norm": 0.11949179321527481, + "learning_rate": 5.234877007379388e-06, + "loss": 0.0019, + "step": 167310 + }, + { + "epoch": 1.0731385068289758, + "grad_norm": 0.18657705187797546, + "learning_rate": 5.234317920420867e-06, + "loss": 0.0035, + "step": 167320 + }, + { + "epoch": 1.073202643722762, + "grad_norm": 0.11149043589830399, + "learning_rate": 5.233758830526173e-06, + "loss": 0.0013, + "step": 167330 + }, + { + "epoch": 1.073266780616548, + "grad_norm": 0.0194416344165802, + "learning_rate": 5.233199737702314e-06, + "loss": 0.0024, + "step": 167340 + }, + { + "epoch": 1.073330917510334, + "grad_norm": 0.06449584662914276, + "learning_rate": 5.232640641956297e-06, + "loss": 0.0013, + "step": 167350 + }, + { + "epoch": 1.0733950544041202, + "grad_norm": 0.2929050028324127, + "learning_rate": 5.232081543295126e-06, + "loss": 0.0017, + "step": 167360 + }, + { + "epoch": 1.0734591912979063, + "grad_norm": 0.0034871790558099747, + "learning_rate": 5.231522441725806e-06, + "loss": 0.0019, + "step": 167370 + }, + { + "epoch": 1.0735233281916923, + "grad_norm": 0.01426205039024353, + "learning_rate": 5.2309633372553456e-06, + "loss": 0.002, + "step": 167380 + }, + { + "epoch": 1.0735874650854784, + "grad_norm": 0.002224660012871027, + "learning_rate": 5.23040422989075e-06, + "loss": 0.0012, + "step": 167390 + }, + { + "epoch": 1.0736516019792646, + "grad_norm": 0.05631047859787941, + "learning_rate": 5.229845119639025e-06, + "loss": 0.0015, + "step": 167400 + }, + { + "epoch": 1.0737157388730507, + "grad_norm": 0.07507754117250443, + "learning_rate": 5.229286006507174e-06, + "loss": 0.0029, + "step": 167410 + }, + { + "epoch": 1.0737798757668366, + "grad_norm": 0.1423020362854004, + "learning_rate": 5.228726890502207e-06, + "loss": 0.0019, + "step": 167420 + }, + { + "epoch": 1.0738440126606228, + "grad_norm": 0.12582719326019287, + "learning_rate": 5.228167771631129e-06, + "loss": 0.0018, + "step": 167430 + }, + { + "epoch": 1.073908149554409, + "grad_norm": 0.059179939329624176, + "learning_rate": 5.227608649900944e-06, + "loss": 0.001, + "step": 167440 + }, + { + "epoch": 1.073972286448195, + "grad_norm": 0.11064352840185165, + "learning_rate": 5.2270495253186615e-06, + "loss": 0.0014, + "step": 167450 + }, + { + "epoch": 1.074036423341981, + "grad_norm": 0.05817147344350815, + "learning_rate": 5.226490397891285e-06, + "loss": 0.0029, + "step": 167460 + }, + { + "epoch": 1.0741005602357672, + "grad_norm": 0.08750585466623306, + "learning_rate": 5.225931267625824e-06, + "loss": 0.0012, + "step": 167470 + }, + { + "epoch": 1.0741646971295533, + "grad_norm": 0.08948834985494614, + "learning_rate": 5.22537213452928e-06, + "loss": 0.0023, + "step": 167480 + }, + { + "epoch": 1.0742288340233395, + "grad_norm": 0.08464883267879486, + "learning_rate": 5.224812998608665e-06, + "loss": 0.0014, + "step": 167490 + }, + { + "epoch": 1.0742929709171256, + "grad_norm": 0.1435505747795105, + "learning_rate": 5.22425385987098e-06, + "loss": 0.0022, + "step": 167500 + }, + { + "epoch": 1.0743571078109115, + "grad_norm": 0.05624101310968399, + "learning_rate": 5.2236947183232356e-06, + "loss": 0.003, + "step": 167510 + }, + { + "epoch": 1.0744212447046977, + "grad_norm": 0.048413850367069244, + "learning_rate": 5.2231355739724355e-06, + "loss": 0.0026, + "step": 167520 + }, + { + "epoch": 1.0744853815984838, + "grad_norm": 0.1629071682691574, + "learning_rate": 5.222576426825588e-06, + "loss": 0.0016, + "step": 167530 + }, + { + "epoch": 1.07454951849227, + "grad_norm": 0.0808134377002716, + "learning_rate": 5.2220172768896986e-06, + "loss": 0.0028, + "step": 167540 + }, + { + "epoch": 1.074613655386056, + "grad_norm": 0.29669323563575745, + "learning_rate": 5.221458124171774e-06, + "loss": 0.0045, + "step": 167550 + }, + { + "epoch": 1.074677792279842, + "grad_norm": 0.052580058574676514, + "learning_rate": 5.220898968678821e-06, + "loss": 0.0029, + "step": 167560 + }, + { + "epoch": 1.0747419291736282, + "grad_norm": 0.05036512017250061, + "learning_rate": 5.220339810417844e-06, + "loss": 0.0017, + "step": 167570 + }, + { + "epoch": 1.0748060660674144, + "grad_norm": 0.004044400528073311, + "learning_rate": 5.219780649395852e-06, + "loss": 0.0009, + "step": 167580 + }, + { + "epoch": 1.0748702029612003, + "grad_norm": 0.0637139230966568, + "learning_rate": 5.219221485619854e-06, + "loss": 0.0013, + "step": 167590 + }, + { + "epoch": 1.0749343398549864, + "grad_norm": 0.05439816042780876, + "learning_rate": 5.218662319096852e-06, + "loss": 0.0014, + "step": 167600 + }, + { + "epoch": 1.0749984767487726, + "grad_norm": 0.1431669443845749, + "learning_rate": 5.218103149833856e-06, + "loss": 0.002, + "step": 167610 + }, + { + "epoch": 1.0750626136425587, + "grad_norm": 0.06507054716348648, + "learning_rate": 5.217543977837871e-06, + "loss": 0.0011, + "step": 167620 + }, + { + "epoch": 1.0751267505363449, + "grad_norm": 0.11313388496637344, + "learning_rate": 5.2169848031159065e-06, + "loss": 0.0016, + "step": 167630 + }, + { + "epoch": 1.0751908874301308, + "grad_norm": 0.1550733745098114, + "learning_rate": 5.216425625674965e-06, + "loss": 0.0012, + "step": 167640 + }, + { + "epoch": 1.075255024323917, + "grad_norm": 0.05684865638613701, + "learning_rate": 5.215866445522056e-06, + "loss": 0.0015, + "step": 167650 + }, + { + "epoch": 1.075319161217703, + "grad_norm": 0.11183615028858185, + "learning_rate": 5.215307262664187e-06, + "loss": 0.0031, + "step": 167660 + }, + { + "epoch": 1.0753832981114892, + "grad_norm": 0.05034491792321205, + "learning_rate": 5.214748077108363e-06, + "loss": 0.0026, + "step": 167670 + }, + { + "epoch": 1.0754474350052752, + "grad_norm": 0.055223409086465836, + "learning_rate": 5.214188888861593e-06, + "loss": 0.0018, + "step": 167680 + }, + { + "epoch": 1.0755115718990613, + "grad_norm": 0.006556411739438772, + "learning_rate": 5.213629697930883e-06, + "loss": 0.0018, + "step": 167690 + }, + { + "epoch": 1.0755757087928475, + "grad_norm": 0.1441619098186493, + "learning_rate": 5.213070504323241e-06, + "loss": 0.0014, + "step": 167700 + }, + { + "epoch": 1.0756398456866336, + "grad_norm": 0.1584549993276596, + "learning_rate": 5.212511308045672e-06, + "loss": 0.0036, + "step": 167710 + }, + { + "epoch": 1.0757039825804195, + "grad_norm": 0.14993031322956085, + "learning_rate": 5.211952109105185e-06, + "loss": 0.0018, + "step": 167720 + }, + { + "epoch": 1.0757681194742057, + "grad_norm": 0.2008506953716278, + "learning_rate": 5.211392907508785e-06, + "loss": 0.001, + "step": 167730 + }, + { + "epoch": 1.0758322563679918, + "grad_norm": 0.20662356913089752, + "learning_rate": 5.210833703263484e-06, + "loss": 0.0022, + "step": 167740 + }, + { + "epoch": 1.075896393261778, + "grad_norm": 0.1540125012397766, + "learning_rate": 5.210274496376284e-06, + "loss": 0.001, + "step": 167750 + }, + { + "epoch": 1.0759605301555641, + "grad_norm": 0.13144925236701965, + "learning_rate": 5.209715286854195e-06, + "loss": 0.0029, + "step": 167760 + }, + { + "epoch": 1.07602466704935, + "grad_norm": 0.06378944963216782, + "learning_rate": 5.209156074704222e-06, + "loss": 0.0034, + "step": 167770 + }, + { + "epoch": 1.0760888039431362, + "grad_norm": 0.0328482985496521, + "learning_rate": 5.208596859933376e-06, + "loss": 0.0026, + "step": 167780 + }, + { + "epoch": 1.0761529408369224, + "grad_norm": 0.0747612789273262, + "learning_rate": 5.208037642548661e-06, + "loss": 0.0012, + "step": 167790 + }, + { + "epoch": 1.0762170777307085, + "grad_norm": 0.13654185831546783, + "learning_rate": 5.2074784225570855e-06, + "loss": 0.0013, + "step": 167800 + }, + { + "epoch": 1.0762812146244944, + "grad_norm": 0.13586686551570892, + "learning_rate": 5.206919199965657e-06, + "loss": 0.0022, + "step": 167810 + }, + { + "epoch": 1.0763453515182806, + "grad_norm": 0.03900545835494995, + "learning_rate": 5.206359974781384e-06, + "loss": 0.0009, + "step": 167820 + }, + { + "epoch": 1.0764094884120667, + "grad_norm": 0.15561453998088837, + "learning_rate": 5.205800747011274e-06, + "loss": 0.0019, + "step": 167830 + }, + { + "epoch": 1.0764736253058529, + "grad_norm": 0.12779083847999573, + "learning_rate": 5.205241516662331e-06, + "loss": 0.0012, + "step": 167840 + }, + { + "epoch": 1.0765377621996388, + "grad_norm": 0.17967472970485687, + "learning_rate": 5.204682283741567e-06, + "loss": 0.0071, + "step": 167850 + }, + { + "epoch": 1.076601899093425, + "grad_norm": 0.010842211544513702, + "learning_rate": 5.2041230482559865e-06, + "loss": 0.0034, + "step": 167860 + }, + { + "epoch": 1.0766660359872111, + "grad_norm": 0.013351025059819221, + "learning_rate": 5.2035638102126e-06, + "loss": 0.001, + "step": 167870 + }, + { + "epoch": 1.0767301728809973, + "grad_norm": 0.22351759672164917, + "learning_rate": 5.20300456961841e-06, + "loss": 0.0034, + "step": 167880 + }, + { + "epoch": 1.0767943097747832, + "grad_norm": 0.03208925202488899, + "learning_rate": 5.202445326480432e-06, + "loss": 0.0027, + "step": 167890 + }, + { + "epoch": 1.0768584466685693, + "grad_norm": 0.10712125897407532, + "learning_rate": 5.201886080805666e-06, + "loss": 0.0015, + "step": 167900 + }, + { + "epoch": 1.0769225835623555, + "grad_norm": 0.05341329425573349, + "learning_rate": 5.201326832601125e-06, + "loss": 0.0014, + "step": 167910 + }, + { + "epoch": 1.0769867204561416, + "grad_norm": 0.1631436049938202, + "learning_rate": 5.200767581873815e-06, + "loss": 0.0025, + "step": 167920 + }, + { + "epoch": 1.0770508573499278, + "grad_norm": 0.19283632934093475, + "learning_rate": 5.200208328630743e-06, + "loss": 0.0035, + "step": 167930 + }, + { + "epoch": 1.0771149942437137, + "grad_norm": 0.032223086804151535, + "learning_rate": 5.199649072878917e-06, + "loss": 0.0015, + "step": 167940 + }, + { + "epoch": 1.0771791311374999, + "grad_norm": 0.12343668192625046, + "learning_rate": 5.199089814625345e-06, + "loss": 0.0017, + "step": 167950 + }, + { + "epoch": 1.077243268031286, + "grad_norm": 0.2309725284576416, + "learning_rate": 5.198530553877036e-06, + "loss": 0.0031, + "step": 167960 + }, + { + "epoch": 1.0773074049250722, + "grad_norm": 0.06467442959547043, + "learning_rate": 5.197971290640999e-06, + "loss": 0.001, + "step": 167970 + }, + { + "epoch": 1.077371541818858, + "grad_norm": 0.19849643111228943, + "learning_rate": 5.1974120249242374e-06, + "loss": 0.0015, + "step": 167980 + }, + { + "epoch": 1.0774356787126442, + "grad_norm": 0.0701538547873497, + "learning_rate": 5.196852756733765e-06, + "loss": 0.0021, + "step": 167990 + }, + { + "epoch": 1.0774998156064304, + "grad_norm": 0.10209740698337555, + "learning_rate": 5.1962934860765845e-06, + "loss": 0.0021, + "step": 168000 + }, + { + "epoch": 1.0775639525002165, + "grad_norm": 0.08195840567350388, + "learning_rate": 5.195734212959706e-06, + "loss": 0.0013, + "step": 168010 + }, + { + "epoch": 1.0776280893940027, + "grad_norm": 0.011830402538180351, + "learning_rate": 5.195174937390141e-06, + "loss": 0.002, + "step": 168020 + }, + { + "epoch": 1.0776922262877886, + "grad_norm": 0.05599063262343407, + "learning_rate": 5.194615659374892e-06, + "loss": 0.002, + "step": 168030 + }, + { + "epoch": 1.0777563631815748, + "grad_norm": 0.044811591506004333, + "learning_rate": 5.194056378920971e-06, + "loss": 0.0016, + "step": 168040 + }, + { + "epoch": 1.077820500075361, + "grad_norm": 0.10353126376867294, + "learning_rate": 5.193497096035384e-06, + "loss": 0.0017, + "step": 168050 + }, + { + "epoch": 1.077884636969147, + "grad_norm": 0.006115327589213848, + "learning_rate": 5.19293781072514e-06, + "loss": 0.0013, + "step": 168060 + }, + { + "epoch": 1.077948773862933, + "grad_norm": 0.024776805192232132, + "learning_rate": 5.192378522997248e-06, + "loss": 0.0015, + "step": 168070 + }, + { + "epoch": 1.0780129107567191, + "grad_norm": 0.034329865127801895, + "learning_rate": 5.1918192328587155e-06, + "loss": 0.0009, + "step": 168080 + }, + { + "epoch": 1.0780770476505053, + "grad_norm": 0.03323132172226906, + "learning_rate": 5.1912599403165495e-06, + "loss": 0.0014, + "step": 168090 + }, + { + "epoch": 1.0781411845442914, + "grad_norm": 0.06228185445070267, + "learning_rate": 5.190700645377762e-06, + "loss": 0.0023, + "step": 168100 + }, + { + "epoch": 1.0782053214380773, + "grad_norm": 0.012968168593943119, + "learning_rate": 5.190141348049358e-06, + "loss": 0.0018, + "step": 168110 + }, + { + "epoch": 1.0782694583318635, + "grad_norm": 0.24061404168605804, + "learning_rate": 5.189582048338348e-06, + "loss": 0.0025, + "step": 168120 + }, + { + "epoch": 1.0783335952256496, + "grad_norm": 0.060437899082899094, + "learning_rate": 5.189022746251737e-06, + "loss": 0.0011, + "step": 168130 + }, + { + "epoch": 1.0783977321194358, + "grad_norm": 0.06799522787332535, + "learning_rate": 5.188463441796538e-06, + "loss": 0.0024, + "step": 168140 + }, + { + "epoch": 1.0784618690132217, + "grad_norm": 0.04411352425813675, + "learning_rate": 5.187904134979758e-06, + "loss": 0.0014, + "step": 168150 + }, + { + "epoch": 1.0785260059070079, + "grad_norm": 0.08532782644033432, + "learning_rate": 5.187344825808403e-06, + "loss": 0.0015, + "step": 168160 + }, + { + "epoch": 1.078590142800794, + "grad_norm": 0.05462755635380745, + "learning_rate": 5.186785514289484e-06, + "loss": 0.0016, + "step": 168170 + }, + { + "epoch": 1.0786542796945802, + "grad_norm": 0.07991321384906769, + "learning_rate": 5.186226200430009e-06, + "loss": 0.0025, + "step": 168180 + }, + { + "epoch": 1.0787184165883663, + "grad_norm": 0.14190146327018738, + "learning_rate": 5.185666884236987e-06, + "loss": 0.0021, + "step": 168190 + }, + { + "epoch": 1.0787825534821522, + "grad_norm": 0.02735942229628563, + "learning_rate": 5.185107565717426e-06, + "loss": 0.0028, + "step": 168200 + }, + { + "epoch": 1.0788466903759384, + "grad_norm": 0.05069974437355995, + "learning_rate": 5.184548244878336e-06, + "loss": 0.0025, + "step": 168210 + }, + { + "epoch": 1.0789108272697245, + "grad_norm": 0.02090507000684738, + "learning_rate": 5.183988921726723e-06, + "loss": 0.0028, + "step": 168220 + }, + { + "epoch": 1.0789749641635107, + "grad_norm": 0.1305733025074005, + "learning_rate": 5.183429596269598e-06, + "loss": 0.003, + "step": 168230 + }, + { + "epoch": 1.0790391010572966, + "grad_norm": 0.13104850053787231, + "learning_rate": 5.182870268513969e-06, + "loss": 0.0015, + "step": 168240 + }, + { + "epoch": 1.0791032379510828, + "grad_norm": 0.1724039614200592, + "learning_rate": 5.182310938466845e-06, + "loss": 0.0025, + "step": 168250 + }, + { + "epoch": 1.079167374844869, + "grad_norm": 0.08119799941778183, + "learning_rate": 5.181751606135233e-06, + "loss": 0.0011, + "step": 168260 + }, + { + "epoch": 1.079231511738655, + "grad_norm": 0.043051231652498245, + "learning_rate": 5.1811922715261464e-06, + "loss": 0.0012, + "step": 168270 + }, + { + "epoch": 1.079295648632441, + "grad_norm": 0.05889670178294182, + "learning_rate": 5.180632934646589e-06, + "loss": 0.0014, + "step": 168280 + }, + { + "epoch": 1.0793597855262271, + "grad_norm": 0.05956847965717316, + "learning_rate": 5.180073595503572e-06, + "loss": 0.0013, + "step": 168290 + }, + { + "epoch": 1.0794239224200133, + "grad_norm": 0.10754121840000153, + "learning_rate": 5.179514254104106e-06, + "loss": 0.001, + "step": 168300 + }, + { + "epoch": 1.0794880593137994, + "grad_norm": 0.13765943050384521, + "learning_rate": 5.178954910455195e-06, + "loss": 0.0017, + "step": 168310 + }, + { + "epoch": 1.0795521962075854, + "grad_norm": 0.08927934616804123, + "learning_rate": 5.178395564563854e-06, + "loss": 0.0014, + "step": 168320 + }, + { + "epoch": 1.0796163331013715, + "grad_norm": 0.04931335151195526, + "learning_rate": 5.1778362164370875e-06, + "loss": 0.0016, + "step": 168330 + }, + { + "epoch": 1.0796804699951577, + "grad_norm": 0.011807376518845558, + "learning_rate": 5.1772768660819065e-06, + "loss": 0.0007, + "step": 168340 + }, + { + "epoch": 1.0797446068889438, + "grad_norm": 0.1913766860961914, + "learning_rate": 5.17671751350532e-06, + "loss": 0.0022, + "step": 168350 + }, + { + "epoch": 1.07980874378273, + "grad_norm": 0.12997888028621674, + "learning_rate": 5.176158158714335e-06, + "loss": 0.0018, + "step": 168360 + }, + { + "epoch": 1.0798728806765159, + "grad_norm": 0.10905221104621887, + "learning_rate": 5.175598801715964e-06, + "loss": 0.002, + "step": 168370 + }, + { + "epoch": 1.079937017570302, + "grad_norm": 0.05260107293725014, + "learning_rate": 5.1750394425172146e-06, + "loss": 0.0029, + "step": 168380 + }, + { + "epoch": 1.0800011544640882, + "grad_norm": 0.15370966494083405, + "learning_rate": 5.174480081125095e-06, + "loss": 0.0025, + "step": 168390 + }, + { + "epoch": 1.0800652913578743, + "grad_norm": 0.10007897019386292, + "learning_rate": 5.173920717546617e-06, + "loss": 0.001, + "step": 168400 + }, + { + "epoch": 1.0801294282516603, + "grad_norm": 0.13257601857185364, + "learning_rate": 5.173361351788786e-06, + "loss": 0.0015, + "step": 168410 + }, + { + "epoch": 1.0801935651454464, + "grad_norm": 0.06499084830284119, + "learning_rate": 5.172801983858615e-06, + "loss": 0.0011, + "step": 168420 + }, + { + "epoch": 1.0802577020392325, + "grad_norm": 0.08412903547286987, + "learning_rate": 5.172242613763111e-06, + "loss": 0.0016, + "step": 168430 + }, + { + "epoch": 1.0803218389330187, + "grad_norm": 0.05546203628182411, + "learning_rate": 5.171683241509284e-06, + "loss": 0.0017, + "step": 168440 + }, + { + "epoch": 1.0803859758268048, + "grad_norm": 0.10209858417510986, + "learning_rate": 5.171123867104143e-06, + "loss": 0.0022, + "step": 168450 + }, + { + "epoch": 1.0804501127205908, + "grad_norm": 0.01484663411974907, + "learning_rate": 5.170564490554698e-06, + "loss": 0.0012, + "step": 168460 + }, + { + "epoch": 1.080514249614377, + "grad_norm": 0.05575484409928322, + "learning_rate": 5.170005111867957e-06, + "loss": 0.0017, + "step": 168470 + }, + { + "epoch": 1.080578386508163, + "grad_norm": 0.04578129202127457, + "learning_rate": 5.169445731050933e-06, + "loss": 0.0013, + "step": 168480 + }, + { + "epoch": 1.0806425234019492, + "grad_norm": 0.123416967689991, + "learning_rate": 5.16888634811063e-06, + "loss": 0.0036, + "step": 168490 + }, + { + "epoch": 1.0807066602957351, + "grad_norm": 0.03386322036385536, + "learning_rate": 5.168326963054061e-06, + "loss": 0.0016, + "step": 168500 + }, + { + "epoch": 1.0807707971895213, + "grad_norm": 0.0203689094632864, + "learning_rate": 5.1677675758882366e-06, + "loss": 0.0015, + "step": 168510 + }, + { + "epoch": 1.0808349340833074, + "grad_norm": 0.0576355904340744, + "learning_rate": 5.167208186620163e-06, + "loss": 0.0015, + "step": 168520 + }, + { + "epoch": 1.0808990709770936, + "grad_norm": 0.027288375422358513, + "learning_rate": 5.1666487952568525e-06, + "loss": 0.0022, + "step": 168530 + }, + { + "epoch": 1.0809632078708795, + "grad_norm": 0.04407395422458649, + "learning_rate": 5.166089401805312e-06, + "loss": 0.0029, + "step": 168540 + }, + { + "epoch": 1.0810273447646657, + "grad_norm": 0.25099894404411316, + "learning_rate": 5.165530006272555e-06, + "loss": 0.0018, + "step": 168550 + }, + { + "epoch": 1.0810914816584518, + "grad_norm": 0.15912792086601257, + "learning_rate": 5.1649706086655856e-06, + "loss": 0.002, + "step": 168560 + }, + { + "epoch": 1.081155618552238, + "grad_norm": 0.08991295844316483, + "learning_rate": 5.164411208991419e-06, + "loss": 0.001, + "step": 168570 + }, + { + "epoch": 1.081219755446024, + "grad_norm": 0.05349145457148552, + "learning_rate": 5.163851807257061e-06, + "loss": 0.0022, + "step": 168580 + }, + { + "epoch": 1.08128389233981, + "grad_norm": 0.07239469140768051, + "learning_rate": 5.163292403469523e-06, + "loss": 0.0011, + "step": 168590 + }, + { + "epoch": 1.0813480292335962, + "grad_norm": 0.2052467167377472, + "learning_rate": 5.162732997635814e-06, + "loss": 0.0015, + "step": 168600 + }, + { + "epoch": 1.0814121661273823, + "grad_norm": 0.18487276136875153, + "learning_rate": 5.162173589762946e-06, + "loss": 0.0043, + "step": 168610 + }, + { + "epoch": 1.0814763030211685, + "grad_norm": 0.11724483221769333, + "learning_rate": 5.1616141798579255e-06, + "loss": 0.0013, + "step": 168620 + }, + { + "epoch": 1.0815404399149544, + "grad_norm": 0.0919305756688118, + "learning_rate": 5.161054767927765e-06, + "loss": 0.0021, + "step": 168630 + }, + { + "epoch": 1.0816045768087406, + "grad_norm": 0.10012010484933853, + "learning_rate": 5.160495353979472e-06, + "loss": 0.0018, + "step": 168640 + }, + { + "epoch": 1.0816687137025267, + "grad_norm": 0.1580265313386917, + "learning_rate": 5.159935938020058e-06, + "loss": 0.0021, + "step": 168650 + }, + { + "epoch": 1.0817328505963129, + "grad_norm": 0.05440143495798111, + "learning_rate": 5.159376520056532e-06, + "loss": 0.0009, + "step": 168660 + }, + { + "epoch": 1.0817969874900988, + "grad_norm": 0.09930847585201263, + "learning_rate": 5.158817100095904e-06, + "loss": 0.001, + "step": 168670 + }, + { + "epoch": 1.081861124383885, + "grad_norm": 0.04801664873957634, + "learning_rate": 5.158257678145185e-06, + "loss": 0.0035, + "step": 168680 + }, + { + "epoch": 1.081925261277671, + "grad_norm": 0.07561906427145004, + "learning_rate": 5.157698254211384e-06, + "loss": 0.0011, + "step": 168690 + }, + { + "epoch": 1.0819893981714572, + "grad_norm": 0.01799064502120018, + "learning_rate": 5.157138828301512e-06, + "loss": 0.0031, + "step": 168700 + }, + { + "epoch": 1.0820535350652432, + "grad_norm": 0.15777525305747986, + "learning_rate": 5.156579400422577e-06, + "loss": 0.0011, + "step": 168710 + }, + { + "epoch": 1.0821176719590293, + "grad_norm": 0.051772113889455795, + "learning_rate": 5.15601997058159e-06, + "loss": 0.0019, + "step": 168720 + }, + { + "epoch": 1.0821818088528155, + "grad_norm": 0.37480801343917847, + "learning_rate": 5.155460538785562e-06, + "loss": 0.0037, + "step": 168730 + }, + { + "epoch": 1.0822459457466016, + "grad_norm": 0.07378017157316208, + "learning_rate": 5.154901105041502e-06, + "loss": 0.0027, + "step": 168740 + }, + { + "epoch": 1.0823100826403875, + "grad_norm": 0.1874859482049942, + "learning_rate": 5.154341669356421e-06, + "loss": 0.0014, + "step": 168750 + }, + { + "epoch": 1.0823742195341737, + "grad_norm": 0.09158900380134583, + "learning_rate": 5.153782231737329e-06, + "loss": 0.0025, + "step": 168760 + }, + { + "epoch": 1.0824383564279598, + "grad_norm": 0.015413496643304825, + "learning_rate": 5.1532227921912335e-06, + "loss": 0.0015, + "step": 168770 + }, + { + "epoch": 1.082502493321746, + "grad_norm": 0.06986375898122787, + "learning_rate": 5.152663350725149e-06, + "loss": 0.0004, + "step": 168780 + }, + { + "epoch": 1.0825666302155321, + "grad_norm": 0.26239410042762756, + "learning_rate": 5.152103907346083e-06, + "loss": 0.0015, + "step": 168790 + }, + { + "epoch": 1.082630767109318, + "grad_norm": 0.004599603824317455, + "learning_rate": 5.151544462061047e-06, + "loss": 0.0027, + "step": 168800 + }, + { + "epoch": 1.0826949040031042, + "grad_norm": 0.12062758952379227, + "learning_rate": 5.150985014877049e-06, + "loss": 0.002, + "step": 168810 + }, + { + "epoch": 1.0827590408968903, + "grad_norm": 0.026245219632983208, + "learning_rate": 5.150425565801103e-06, + "loss": 0.0018, + "step": 168820 + }, + { + "epoch": 1.0828231777906765, + "grad_norm": 0.07881046831607819, + "learning_rate": 5.149866114840215e-06, + "loss": 0.0023, + "step": 168830 + }, + { + "epoch": 1.0828873146844624, + "grad_norm": 0.007862749509513378, + "learning_rate": 5.149306662001399e-06, + "loss": 0.0022, + "step": 168840 + }, + { + "epoch": 1.0829514515782486, + "grad_norm": 0.13096725940704346, + "learning_rate": 5.1487472072916624e-06, + "loss": 0.0016, + "step": 168850 + }, + { + "epoch": 1.0830155884720347, + "grad_norm": 0.08981695771217346, + "learning_rate": 5.148187750718018e-06, + "loss": 0.0026, + "step": 168860 + }, + { + "epoch": 1.0830797253658209, + "grad_norm": 0.12101157754659653, + "learning_rate": 5.1476282922874756e-06, + "loss": 0.0018, + "step": 168870 + }, + { + "epoch": 1.083143862259607, + "grad_norm": 0.3226020336151123, + "learning_rate": 5.147068832007044e-06, + "loss": 0.002, + "step": 168880 + }, + { + "epoch": 1.083207999153393, + "grad_norm": 0.05597984045743942, + "learning_rate": 5.146509369883735e-06, + "loss": 0.001, + "step": 168890 + }, + { + "epoch": 1.083272136047179, + "grad_norm": 0.271222859621048, + "learning_rate": 5.145949905924559e-06, + "loss": 0.002, + "step": 168900 + }, + { + "epoch": 1.0833362729409652, + "grad_norm": 0.08601807802915573, + "learning_rate": 5.145390440136527e-06, + "loss": 0.0013, + "step": 168910 + }, + { + "epoch": 1.0834004098347514, + "grad_norm": 0.11945120990276337, + "learning_rate": 5.144830972526649e-06, + "loss": 0.0014, + "step": 168920 + }, + { + "epoch": 1.0834645467285373, + "grad_norm": 0.09423484653234482, + "learning_rate": 5.144271503101935e-06, + "loss": 0.0011, + "step": 168930 + }, + { + "epoch": 1.0835286836223235, + "grad_norm": 0.04961375147104263, + "learning_rate": 5.143712031869396e-06, + "loss": 0.0011, + "step": 168940 + }, + { + "epoch": 1.0835928205161096, + "grad_norm": 0.06484077870845795, + "learning_rate": 5.143152558836042e-06, + "loss": 0.0013, + "step": 168950 + }, + { + "epoch": 1.0836569574098958, + "grad_norm": 0.042897775769233704, + "learning_rate": 5.142593084008884e-06, + "loss": 0.0009, + "step": 168960 + }, + { + "epoch": 1.0837210943036817, + "grad_norm": 0.0597703717648983, + "learning_rate": 5.142033607394934e-06, + "loss": 0.0018, + "step": 168970 + }, + { + "epoch": 1.0837852311974678, + "grad_norm": 0.0672975704073906, + "learning_rate": 5.1414741290012006e-06, + "loss": 0.0018, + "step": 168980 + }, + { + "epoch": 1.083849368091254, + "grad_norm": 0.14853569865226746, + "learning_rate": 5.140914648834696e-06, + "loss": 0.0018, + "step": 168990 + }, + { + "epoch": 1.0839135049850401, + "grad_norm": 0.06412370502948761, + "learning_rate": 5.140355166902429e-06, + "loss": 0.0011, + "step": 169000 + }, + { + "epoch": 1.083977641878826, + "grad_norm": 0.11806584894657135, + "learning_rate": 5.139795683211412e-06, + "loss": 0.0059, + "step": 169010 + }, + { + "epoch": 1.0840417787726122, + "grad_norm": 0.0590679794549942, + "learning_rate": 5.139236197768654e-06, + "loss": 0.0015, + "step": 169020 + }, + { + "epoch": 1.0841059156663984, + "grad_norm": 0.1295815408229828, + "learning_rate": 5.138676710581169e-06, + "loss": 0.0007, + "step": 169030 + }, + { + "epoch": 1.0841700525601845, + "grad_norm": 0.18576541543006897, + "learning_rate": 5.138117221655965e-06, + "loss": 0.0018, + "step": 169040 + }, + { + "epoch": 1.0842341894539707, + "grad_norm": 0.13462528586387634, + "learning_rate": 5.137557731000052e-06, + "loss": 0.0018, + "step": 169050 + }, + { + "epoch": 1.0842983263477566, + "grad_norm": 0.04111761972308159, + "learning_rate": 5.136998238620444e-06, + "loss": 0.0019, + "step": 169060 + }, + { + "epoch": 1.0843624632415427, + "grad_norm": 0.21343253552913666, + "learning_rate": 5.13643874452415e-06, + "loss": 0.0028, + "step": 169070 + }, + { + "epoch": 1.0844266001353289, + "grad_norm": 0.13954845070838928, + "learning_rate": 5.135879248718181e-06, + "loss": 0.0007, + "step": 169080 + }, + { + "epoch": 1.084490737029115, + "grad_norm": 0.07925862073898315, + "learning_rate": 5.1353197512095474e-06, + "loss": 0.0015, + "step": 169090 + }, + { + "epoch": 1.084554873922901, + "grad_norm": 0.05541840195655823, + "learning_rate": 5.13476025200526e-06, + "loss": 0.0014, + "step": 169100 + }, + { + "epoch": 1.084619010816687, + "grad_norm": 0.18047375977039337, + "learning_rate": 5.134200751112331e-06, + "loss": 0.0017, + "step": 169110 + }, + { + "epoch": 1.0846831477104732, + "grad_norm": 0.06819538027048111, + "learning_rate": 5.133641248537772e-06, + "loss": 0.0018, + "step": 169120 + }, + { + "epoch": 1.0847472846042594, + "grad_norm": 0.09758296608924866, + "learning_rate": 5.133081744288591e-06, + "loss": 0.0031, + "step": 169130 + }, + { + "epoch": 1.0848114214980453, + "grad_norm": 0.11406062543392181, + "learning_rate": 5.132522238371801e-06, + "loss": 0.0028, + "step": 169140 + }, + { + "epoch": 1.0848755583918315, + "grad_norm": 0.1453990787267685, + "learning_rate": 5.131962730794412e-06, + "loss": 0.0015, + "step": 169150 + }, + { + "epoch": 1.0849396952856176, + "grad_norm": 0.05270492285490036, + "learning_rate": 5.131403221563437e-06, + "loss": 0.0024, + "step": 169160 + }, + { + "epoch": 1.0850038321794038, + "grad_norm": 0.2334631234407425, + "learning_rate": 5.130843710685885e-06, + "loss": 0.0022, + "step": 169170 + }, + { + "epoch": 1.08506796907319, + "grad_norm": 0.1925625205039978, + "learning_rate": 5.130284198168769e-06, + "loss": 0.0011, + "step": 169180 + }, + { + "epoch": 1.0851321059669758, + "grad_norm": 0.00834691897034645, + "learning_rate": 5.129724684019098e-06, + "loss": 0.0012, + "step": 169190 + }, + { + "epoch": 1.085196242860762, + "grad_norm": 0.10060546547174454, + "learning_rate": 5.1291651682438835e-06, + "loss": 0.0013, + "step": 169200 + }, + { + "epoch": 1.0852603797545481, + "grad_norm": 0.07248177379369736, + "learning_rate": 5.128605650850137e-06, + "loss": 0.0024, + "step": 169210 + }, + { + "epoch": 1.0853245166483343, + "grad_norm": 0.11243856698274612, + "learning_rate": 5.128046131844869e-06, + "loss": 0.0012, + "step": 169220 + }, + { + "epoch": 1.0853886535421202, + "grad_norm": 0.05346961319446564, + "learning_rate": 5.127486611235094e-06, + "loss": 0.0013, + "step": 169230 + }, + { + "epoch": 1.0854527904359064, + "grad_norm": 0.11817878484725952, + "learning_rate": 5.1269270890278186e-06, + "loss": 0.0017, + "step": 169240 + }, + { + "epoch": 1.0855169273296925, + "grad_norm": 0.11961852759122849, + "learning_rate": 5.126367565230057e-06, + "loss": 0.0021, + "step": 169250 + }, + { + "epoch": 1.0855810642234787, + "grad_norm": 0.02972625009715557, + "learning_rate": 5.125808039848819e-06, + "loss": 0.0015, + "step": 169260 + }, + { + "epoch": 1.0856452011172646, + "grad_norm": 0.23780551552772522, + "learning_rate": 5.125248512891118e-06, + "loss": 0.0025, + "step": 169270 + }, + { + "epoch": 1.0857093380110507, + "grad_norm": 0.08383630216121674, + "learning_rate": 5.124688984363962e-06, + "loss": 0.0029, + "step": 169280 + }, + { + "epoch": 1.0857734749048369, + "grad_norm": 0.077206090092659, + "learning_rate": 5.124129454274363e-06, + "loss": 0.0014, + "step": 169290 + }, + { + "epoch": 1.085837611798623, + "grad_norm": 0.09887336194515228, + "learning_rate": 5.123569922629335e-06, + "loss": 0.0016, + "step": 169300 + }, + { + "epoch": 1.0859017486924092, + "grad_norm": 0.16712264716625214, + "learning_rate": 5.123010389435888e-06, + "loss": 0.0023, + "step": 169310 + }, + { + "epoch": 1.0859658855861951, + "grad_norm": 0.2610989809036255, + "learning_rate": 5.122450854701031e-06, + "loss": 0.0017, + "step": 169320 + }, + { + "epoch": 1.0860300224799813, + "grad_norm": 0.12500953674316406, + "learning_rate": 5.1218913184317785e-06, + "loss": 0.0014, + "step": 169330 + }, + { + "epoch": 1.0860941593737674, + "grad_norm": 0.04894835501909256, + "learning_rate": 5.121331780635139e-06, + "loss": 0.0018, + "step": 169340 + }, + { + "epoch": 1.0861582962675536, + "grad_norm": 0.0937710553407669, + "learning_rate": 5.1207722413181275e-06, + "loss": 0.0017, + "step": 169350 + }, + { + "epoch": 1.0862224331613395, + "grad_norm": 0.13560210168361664, + "learning_rate": 5.120212700487753e-06, + "loss": 0.0026, + "step": 169360 + }, + { + "epoch": 1.0862865700551256, + "grad_norm": 0.05050661042332649, + "learning_rate": 5.119653158151027e-06, + "loss": 0.0008, + "step": 169370 + }, + { + "epoch": 1.0863507069489118, + "grad_norm": 0.02115233615040779, + "learning_rate": 5.119093614314963e-06, + "loss": 0.0023, + "step": 169380 + }, + { + "epoch": 1.086414843842698, + "grad_norm": 0.06858409941196442, + "learning_rate": 5.118534068986568e-06, + "loss": 0.0013, + "step": 169390 + }, + { + "epoch": 1.0864789807364839, + "grad_norm": 0.03543127328157425, + "learning_rate": 5.11797452217286e-06, + "loss": 0.0013, + "step": 169400 + }, + { + "epoch": 1.08654311763027, + "grad_norm": 0.09301551431417465, + "learning_rate": 5.1174149738808434e-06, + "loss": 0.0014, + "step": 169410 + }, + { + "epoch": 1.0866072545240562, + "grad_norm": 0.07186420261859894, + "learning_rate": 5.116855424117535e-06, + "loss": 0.0031, + "step": 169420 + }, + { + "epoch": 1.0866713914178423, + "grad_norm": 0.09942799061536789, + "learning_rate": 5.116295872889944e-06, + "loss": 0.0009, + "step": 169430 + }, + { + "epoch": 1.0867355283116282, + "grad_norm": 0.0785299763083458, + "learning_rate": 5.115736320205083e-06, + "loss": 0.0011, + "step": 169440 + }, + { + "epoch": 1.0867996652054144, + "grad_norm": 0.13406498730182648, + "learning_rate": 5.115176766069964e-06, + "loss": 0.0013, + "step": 169450 + }, + { + "epoch": 1.0868638020992005, + "grad_norm": 0.095372274518013, + "learning_rate": 5.114617210491597e-06, + "loss": 0.0014, + "step": 169460 + }, + { + "epoch": 1.0869279389929867, + "grad_norm": 0.2556072771549225, + "learning_rate": 5.114057653476995e-06, + "loss": 0.0013, + "step": 169470 + }, + { + "epoch": 1.0869920758867728, + "grad_norm": 0.07904695719480515, + "learning_rate": 5.113498095033167e-06, + "loss": 0.0012, + "step": 169480 + }, + { + "epoch": 1.0870562127805588, + "grad_norm": 0.054306890815496445, + "learning_rate": 5.1129385351671276e-06, + "loss": 0.002, + "step": 169490 + }, + { + "epoch": 1.087120349674345, + "grad_norm": 0.04829462617635727, + "learning_rate": 5.11237897388589e-06, + "loss": 0.001, + "step": 169500 + }, + { + "epoch": 1.087184486568131, + "grad_norm": 0.10848630219697952, + "learning_rate": 5.111819411196462e-06, + "loss": 0.0011, + "step": 169510 + }, + { + "epoch": 1.0872486234619172, + "grad_norm": 0.06292233616113663, + "learning_rate": 5.1112598471058565e-06, + "loss": 0.0022, + "step": 169520 + }, + { + "epoch": 1.0873127603557031, + "grad_norm": 0.0438341461122036, + "learning_rate": 5.110700281621085e-06, + "loss": 0.001, + "step": 169530 + }, + { + "epoch": 1.0873768972494893, + "grad_norm": 0.004697320517152548, + "learning_rate": 5.110140714749161e-06, + "loss": 0.0012, + "step": 169540 + }, + { + "epoch": 1.0874410341432754, + "grad_norm": 0.09202726185321808, + "learning_rate": 5.109581146497094e-06, + "loss": 0.0015, + "step": 169550 + }, + { + "epoch": 1.0875051710370616, + "grad_norm": 0.3498629629611969, + "learning_rate": 5.109021576871898e-06, + "loss": 0.0042, + "step": 169560 + }, + { + "epoch": 1.0875693079308477, + "grad_norm": 0.05647391080856323, + "learning_rate": 5.108462005880584e-06, + "loss": 0.0012, + "step": 169570 + }, + { + "epoch": 1.0876334448246336, + "grad_norm": 0.04639855772256851, + "learning_rate": 5.107902433530163e-06, + "loss": 0.0014, + "step": 169580 + }, + { + "epoch": 1.0876975817184198, + "grad_norm": 0.1349206417798996, + "learning_rate": 5.107342859827648e-06, + "loss": 0.0021, + "step": 169590 + }, + { + "epoch": 1.087761718612206, + "grad_norm": 0.028598807752132416, + "learning_rate": 5.1067832847800495e-06, + "loss": 0.0024, + "step": 169600 + }, + { + "epoch": 1.087825855505992, + "grad_norm": 0.07462950795888901, + "learning_rate": 5.10622370839438e-06, + "loss": 0.0017, + "step": 169610 + }, + { + "epoch": 1.087889992399778, + "grad_norm": 0.11065831035375595, + "learning_rate": 5.105664130677651e-06, + "loss": 0.0025, + "step": 169620 + }, + { + "epoch": 1.0879541292935642, + "grad_norm": 0.15748350322246552, + "learning_rate": 5.105104551636877e-06, + "loss": 0.0025, + "step": 169630 + }, + { + "epoch": 1.0880182661873503, + "grad_norm": 0.09333031624555588, + "learning_rate": 5.104544971279067e-06, + "loss": 0.0018, + "step": 169640 + }, + { + "epoch": 1.0880824030811365, + "grad_norm": 0.15592175722122192, + "learning_rate": 5.103985389611233e-06, + "loss": 0.0031, + "step": 169650 + }, + { + "epoch": 1.0881465399749224, + "grad_norm": 0.12185309827327728, + "learning_rate": 5.103425806640387e-06, + "loss": 0.0017, + "step": 169660 + }, + { + "epoch": 1.0882106768687085, + "grad_norm": 0.11560821533203125, + "learning_rate": 5.102866222373544e-06, + "loss": 0.0012, + "step": 169670 + }, + { + "epoch": 1.0882748137624947, + "grad_norm": 0.038899004459381104, + "learning_rate": 5.102306636817712e-06, + "loss": 0.0017, + "step": 169680 + }, + { + "epoch": 1.0883389506562808, + "grad_norm": 0.053067803382873535, + "learning_rate": 5.101747049979906e-06, + "loss": 0.0025, + "step": 169690 + }, + { + "epoch": 1.0884030875500668, + "grad_norm": 0.081266388297081, + "learning_rate": 5.101187461867135e-06, + "loss": 0.0018, + "step": 169700 + }, + { + "epoch": 1.088467224443853, + "grad_norm": 0.2550745904445648, + "learning_rate": 5.100627872486415e-06, + "loss": 0.0017, + "step": 169710 + }, + { + "epoch": 1.088531361337639, + "grad_norm": 0.1860969513654709, + "learning_rate": 5.1000682818447546e-06, + "loss": 0.0013, + "step": 169720 + }, + { + "epoch": 1.0885954982314252, + "grad_norm": 0.4227175712585449, + "learning_rate": 5.099508689949166e-06, + "loss": 0.0027, + "step": 169730 + }, + { + "epoch": 1.0886596351252114, + "grad_norm": 0.16643047332763672, + "learning_rate": 5.098949096806665e-06, + "loss": 0.0025, + "step": 169740 + }, + { + "epoch": 1.0887237720189973, + "grad_norm": 0.20546631515026093, + "learning_rate": 5.098389502424259e-06, + "loss": 0.0015, + "step": 169750 + }, + { + "epoch": 1.0887879089127834, + "grad_norm": 0.024851031601428986, + "learning_rate": 5.0978299068089635e-06, + "loss": 0.0016, + "step": 169760 + }, + { + "epoch": 1.0888520458065696, + "grad_norm": 0.4731219708919525, + "learning_rate": 5.097270309967788e-06, + "loss": 0.0039, + "step": 169770 + }, + { + "epoch": 1.0889161827003557, + "grad_norm": 0.14127282798290253, + "learning_rate": 5.096710711907748e-06, + "loss": 0.002, + "step": 169780 + }, + { + "epoch": 1.0889803195941417, + "grad_norm": 0.023380475118756294, + "learning_rate": 5.096151112635851e-06, + "loss": 0.0015, + "step": 169790 + }, + { + "epoch": 1.0890444564879278, + "grad_norm": 0.14559754729270935, + "learning_rate": 5.095591512159114e-06, + "loss": 0.0017, + "step": 169800 + }, + { + "epoch": 1.089108593381714, + "grad_norm": 0.04824541509151459, + "learning_rate": 5.0950319104845455e-06, + "loss": 0.0013, + "step": 169810 + }, + { + "epoch": 1.0891727302755, + "grad_norm": 0.068598173558712, + "learning_rate": 5.0944723076191605e-06, + "loss": 0.0025, + "step": 169820 + }, + { + "epoch": 1.089236867169286, + "grad_norm": 0.022282173857092857, + "learning_rate": 5.09391270356997e-06, + "loss": 0.0017, + "step": 169830 + }, + { + "epoch": 1.0893010040630722, + "grad_norm": 0.08425862342119217, + "learning_rate": 5.093353098343986e-06, + "loss": 0.0033, + "step": 169840 + }, + { + "epoch": 1.0893651409568583, + "grad_norm": 0.09538999944925308, + "learning_rate": 5.09279349194822e-06, + "loss": 0.0016, + "step": 169850 + }, + { + "epoch": 1.0894292778506445, + "grad_norm": 0.16932521760463715, + "learning_rate": 5.092233884389685e-06, + "loss": 0.0021, + "step": 169860 + }, + { + "epoch": 1.0894934147444304, + "grad_norm": 0.17279605567455292, + "learning_rate": 5.0916742756753934e-06, + "loss": 0.0014, + "step": 169870 + }, + { + "epoch": 1.0895575516382165, + "grad_norm": 0.09527702629566193, + "learning_rate": 5.09111466581236e-06, + "loss": 0.001, + "step": 169880 + }, + { + "epoch": 1.0896216885320027, + "grad_norm": 0.02729853428900242, + "learning_rate": 5.090555054807592e-06, + "loss": 0.0011, + "step": 169890 + }, + { + "epoch": 1.0896858254257888, + "grad_norm": 0.042803362011909485, + "learning_rate": 5.089995442668106e-06, + "loss": 0.0021, + "step": 169900 + }, + { + "epoch": 1.089749962319575, + "grad_norm": 0.03751653805375099, + "learning_rate": 5.089435829400913e-06, + "loss": 0.0012, + "step": 169910 + }, + { + "epoch": 1.089814099213361, + "grad_norm": 0.10222543776035309, + "learning_rate": 5.088876215013024e-06, + "loss": 0.0014, + "step": 169920 + }, + { + "epoch": 1.089878236107147, + "grad_norm": 0.04198543727397919, + "learning_rate": 5.0883165995114536e-06, + "loss": 0.002, + "step": 169930 + }, + { + "epoch": 1.0899423730009332, + "grad_norm": 0.05111617222428322, + "learning_rate": 5.087756982903212e-06, + "loss": 0.0036, + "step": 169940 + }, + { + "epoch": 1.0900065098947194, + "grad_norm": 0.16573524475097656, + "learning_rate": 5.087197365195315e-06, + "loss": 0.0035, + "step": 169950 + }, + { + "epoch": 1.0900706467885053, + "grad_norm": 0.16933542490005493, + "learning_rate": 5.08663774639477e-06, + "loss": 0.0023, + "step": 169960 + }, + { + "epoch": 1.0901347836822914, + "grad_norm": 0.2192375808954239, + "learning_rate": 5.086078126508593e-06, + "loss": 0.0027, + "step": 169970 + }, + { + "epoch": 1.0901989205760776, + "grad_norm": 0.008624457754194736, + "learning_rate": 5.085518505543796e-06, + "loss": 0.0021, + "step": 169980 + }, + { + "epoch": 1.0902630574698637, + "grad_norm": 0.004744633100926876, + "learning_rate": 5.084958883507392e-06, + "loss": 0.0015, + "step": 169990 + }, + { + "epoch": 1.0903271943636499, + "grad_norm": 0.13655802607536316, + "learning_rate": 5.08439926040639e-06, + "loss": 0.0014, + "step": 170000 + }, + { + "epoch": 1.0903913312574358, + "grad_norm": 0.03899003937840462, + "learning_rate": 5.083839636247807e-06, + "loss": 0.0022, + "step": 170010 + }, + { + "epoch": 1.090455468151222, + "grad_norm": 0.08106729388237, + "learning_rate": 5.0832800110386515e-06, + "loss": 0.0018, + "step": 170020 + }, + { + "epoch": 1.0905196050450081, + "grad_norm": 0.08407328277826309, + "learning_rate": 5.08272038478594e-06, + "loss": 0.0011, + "step": 170030 + }, + { + "epoch": 1.0905837419387943, + "grad_norm": 0.05661727860569954, + "learning_rate": 5.0821607574966816e-06, + "loss": 0.0008, + "step": 170040 + }, + { + "epoch": 1.0906478788325802, + "grad_norm": 0.044677119702100754, + "learning_rate": 5.081601129177891e-06, + "loss": 0.0028, + "step": 170050 + }, + { + "epoch": 1.0907120157263663, + "grad_norm": 0.0651017352938652, + "learning_rate": 5.081041499836579e-06, + "loss": 0.002, + "step": 170060 + }, + { + "epoch": 1.0907761526201525, + "grad_norm": 0.11894354224205017, + "learning_rate": 5.0804818694797595e-06, + "loss": 0.0019, + "step": 170070 + }, + { + "epoch": 1.0908402895139386, + "grad_norm": 0.11313174664974213, + "learning_rate": 5.079922238114445e-06, + "loss": 0.0018, + "step": 170080 + }, + { + "epoch": 1.0909044264077246, + "grad_norm": 0.08070102334022522, + "learning_rate": 5.079362605747648e-06, + "loss": 0.0012, + "step": 170090 + }, + { + "epoch": 1.0909685633015107, + "grad_norm": 0.13334757089614868, + "learning_rate": 5.07880297238638e-06, + "loss": 0.0016, + "step": 170100 + }, + { + "epoch": 1.0910327001952969, + "grad_norm": 0.052368223667144775, + "learning_rate": 5.078243338037654e-06, + "loss": 0.0016, + "step": 170110 + }, + { + "epoch": 1.091096837089083, + "grad_norm": 0.19929657876491547, + "learning_rate": 5.077683702708485e-06, + "loss": 0.001, + "step": 170120 + }, + { + "epoch": 1.091160973982869, + "grad_norm": 0.1845984309911728, + "learning_rate": 5.077124066405882e-06, + "loss": 0.0022, + "step": 170130 + }, + { + "epoch": 1.091225110876655, + "grad_norm": 0.09705594182014465, + "learning_rate": 5.07656442913686e-06, + "loss": 0.0027, + "step": 170140 + }, + { + "epoch": 1.0912892477704412, + "grad_norm": 0.04924891144037247, + "learning_rate": 5.076004790908431e-06, + "loss": 0.0036, + "step": 170150 + }, + { + "epoch": 1.0913533846642274, + "grad_norm": 0.02453775703907013, + "learning_rate": 5.075445151727607e-06, + "loss": 0.0014, + "step": 170160 + }, + { + "epoch": 1.0914175215580135, + "grad_norm": 0.0791323110461235, + "learning_rate": 5.074885511601402e-06, + "loss": 0.0012, + "step": 170170 + }, + { + "epoch": 1.0914816584517995, + "grad_norm": 0.16462315618991852, + "learning_rate": 5.074325870536828e-06, + "loss": 0.0022, + "step": 170180 + }, + { + "epoch": 1.0915457953455856, + "grad_norm": 0.01788683421909809, + "learning_rate": 5.073766228540897e-06, + "loss": 0.0011, + "step": 170190 + }, + { + "epoch": 1.0916099322393717, + "grad_norm": 0.08107317239046097, + "learning_rate": 5.073206585620622e-06, + "loss": 0.0012, + "step": 170200 + }, + { + "epoch": 1.091674069133158, + "grad_norm": 0.05761834979057312, + "learning_rate": 5.072646941783018e-06, + "loss": 0.0022, + "step": 170210 + }, + { + "epoch": 1.0917382060269438, + "grad_norm": 0.010808841325342655, + "learning_rate": 5.072087297035095e-06, + "loss": 0.0016, + "step": 170220 + }, + { + "epoch": 1.09180234292073, + "grad_norm": 0.08491594344377518, + "learning_rate": 5.071527651383867e-06, + "loss": 0.0025, + "step": 170230 + }, + { + "epoch": 1.0918664798145161, + "grad_norm": 0.43619436025619507, + "learning_rate": 5.0709680048363466e-06, + "loss": 0.0024, + "step": 170240 + }, + { + "epoch": 1.0919306167083023, + "grad_norm": 0.10487474501132965, + "learning_rate": 5.070408357399544e-06, + "loss": 0.0018, + "step": 170250 + }, + { + "epoch": 1.0919947536020882, + "grad_norm": 0.06518199294805527, + "learning_rate": 5.069848709080476e-06, + "loss": 0.0023, + "step": 170260 + }, + { + "epoch": 1.0920588904958743, + "grad_norm": 0.08357866108417511, + "learning_rate": 5.069289059886156e-06, + "loss": 0.0021, + "step": 170270 + }, + { + "epoch": 1.0921230273896605, + "grad_norm": 0.0606221966445446, + "learning_rate": 5.068729409823591e-06, + "loss": 0.0013, + "step": 170280 + }, + { + "epoch": 1.0921871642834466, + "grad_norm": 0.136131152510643, + "learning_rate": 5.0681697588997995e-06, + "loss": 0.0033, + "step": 170290 + }, + { + "epoch": 1.0922513011772326, + "grad_norm": 0.1497824490070343, + "learning_rate": 5.0676101071217905e-06, + "loss": 0.0013, + "step": 170300 + }, + { + "epoch": 1.0923154380710187, + "grad_norm": 0.012980225495994091, + "learning_rate": 5.067050454496582e-06, + "loss": 0.001, + "step": 170310 + }, + { + "epoch": 1.0923795749648049, + "grad_norm": 0.13039623200893402, + "learning_rate": 5.06649080103118e-06, + "loss": 0.0012, + "step": 170320 + }, + { + "epoch": 1.092443711858591, + "grad_norm": 0.2750491201877594, + "learning_rate": 5.065931146732602e-06, + "loss": 0.0028, + "step": 170330 + }, + { + "epoch": 1.0925078487523772, + "grad_norm": 0.20053251087665558, + "learning_rate": 5.065371491607859e-06, + "loss": 0.0012, + "step": 170340 + }, + { + "epoch": 1.092571985646163, + "grad_norm": 0.106719009578228, + "learning_rate": 5.0648118356639654e-06, + "loss": 0.0015, + "step": 170350 + }, + { + "epoch": 1.0926361225399492, + "grad_norm": 0.08977479487657547, + "learning_rate": 5.064252178907933e-06, + "loss": 0.0029, + "step": 170360 + }, + { + "epoch": 1.0927002594337354, + "grad_norm": 0.10772501677274704, + "learning_rate": 5.063692521346775e-06, + "loss": 0.0041, + "step": 170370 + }, + { + "epoch": 1.0927643963275215, + "grad_norm": 0.001853933441452682, + "learning_rate": 5.063132862987502e-06, + "loss": 0.0029, + "step": 170380 + }, + { + "epoch": 1.0928285332213075, + "grad_norm": 0.031118426471948624, + "learning_rate": 5.062573203837132e-06, + "loss": 0.0011, + "step": 170390 + }, + { + "epoch": 1.0928926701150936, + "grad_norm": 0.08649729937314987, + "learning_rate": 5.062013543902673e-06, + "loss": 0.0016, + "step": 170400 + }, + { + "epoch": 1.0929568070088798, + "grad_norm": 0.05934537202119827, + "learning_rate": 5.061453883191141e-06, + "loss": 0.0018, + "step": 170410 + }, + { + "epoch": 1.093020943902666, + "grad_norm": 0.05794312432408333, + "learning_rate": 5.060894221709548e-06, + "loss": 0.0017, + "step": 170420 + }, + { + "epoch": 1.093085080796452, + "grad_norm": 0.06355617940425873, + "learning_rate": 5.060334559464905e-06, + "loss": 0.0013, + "step": 170430 + }, + { + "epoch": 1.093149217690238, + "grad_norm": 0.07892221957445145, + "learning_rate": 5.05977489646423e-06, + "loss": 0.001, + "step": 170440 + }, + { + "epoch": 1.0932133545840241, + "grad_norm": 0.13034789264202118, + "learning_rate": 5.0592152327145316e-06, + "loss": 0.0015, + "step": 170450 + }, + { + "epoch": 1.0932774914778103, + "grad_norm": 0.07530619204044342, + "learning_rate": 5.058655568222823e-06, + "loss": 0.0016, + "step": 170460 + }, + { + "epoch": 1.0933416283715964, + "grad_norm": 0.11381381005048752, + "learning_rate": 5.05809590299612e-06, + "loss": 0.0016, + "step": 170470 + }, + { + "epoch": 1.0934057652653824, + "grad_norm": 0.24479657411575317, + "learning_rate": 5.057536237041433e-06, + "loss": 0.0037, + "step": 170480 + }, + { + "epoch": 1.0934699021591685, + "grad_norm": 0.04012688249349594, + "learning_rate": 5.056976570365775e-06, + "loss": 0.0019, + "step": 170490 + }, + { + "epoch": 1.0935340390529547, + "grad_norm": 0.08466518670320511, + "learning_rate": 5.0564169029761615e-06, + "loss": 0.0008, + "step": 170500 + }, + { + "epoch": 1.0935981759467408, + "grad_norm": 0.16974368691444397, + "learning_rate": 5.055857234879602e-06, + "loss": 0.0014, + "step": 170510 + }, + { + "epoch": 1.0936623128405267, + "grad_norm": 0.027867648750543594, + "learning_rate": 5.055297566083113e-06, + "loss": 0.0012, + "step": 170520 + }, + { + "epoch": 1.0937264497343129, + "grad_norm": 0.4133170545101166, + "learning_rate": 5.054737896593704e-06, + "loss": 0.0035, + "step": 170530 + }, + { + "epoch": 1.093790586628099, + "grad_norm": 0.07212081551551819, + "learning_rate": 5.054178226418392e-06, + "loss": 0.0015, + "step": 170540 + }, + { + "epoch": 1.0938547235218852, + "grad_norm": 0.135950967669487, + "learning_rate": 5.053618555564187e-06, + "loss": 0.001, + "step": 170550 + }, + { + "epoch": 1.093918860415671, + "grad_norm": 0.2264435589313507, + "learning_rate": 5.053058884038103e-06, + "loss": 0.0021, + "step": 170560 + }, + { + "epoch": 1.0939829973094573, + "grad_norm": 0.0787590816617012, + "learning_rate": 5.052499211847155e-06, + "loss": 0.0016, + "step": 170570 + }, + { + "epoch": 1.0940471342032434, + "grad_norm": 0.005399610847234726, + "learning_rate": 5.051939538998352e-06, + "loss": 0.0014, + "step": 170580 + }, + { + "epoch": 1.0941112710970295, + "grad_norm": 0.057541925460100174, + "learning_rate": 5.0513798654987116e-06, + "loss": 0.0014, + "step": 170590 + }, + { + "epoch": 1.0941754079908157, + "grad_norm": 0.11907067894935608, + "learning_rate": 5.050820191355243e-06, + "loss": 0.0012, + "step": 170600 + }, + { + "epoch": 1.0942395448846016, + "grad_norm": 0.16507697105407715, + "learning_rate": 5.050260516574963e-06, + "loss": 0.0016, + "step": 170610 + }, + { + "epoch": 1.0943036817783878, + "grad_norm": 0.06532784551382065, + "learning_rate": 5.049700841164881e-06, + "loss": 0.0008, + "step": 170620 + }, + { + "epoch": 1.094367818672174, + "grad_norm": 0.06784983724355698, + "learning_rate": 5.049141165132012e-06, + "loss": 0.0026, + "step": 170630 + }, + { + "epoch": 1.09443195556596, + "grad_norm": 0.04982329532504082, + "learning_rate": 5.0485814884833695e-06, + "loss": 0.0022, + "step": 170640 + }, + { + "epoch": 1.094496092459746, + "grad_norm": 0.18449866771697998, + "learning_rate": 5.048021811225966e-06, + "loss": 0.0023, + "step": 170650 + }, + { + "epoch": 1.0945602293535321, + "grad_norm": 0.3333454430103302, + "learning_rate": 5.047462133366815e-06, + "loss": 0.0021, + "step": 170660 + }, + { + "epoch": 1.0946243662473183, + "grad_norm": 0.13253095746040344, + "learning_rate": 5.046902454912931e-06, + "loss": 0.0008, + "step": 170670 + }, + { + "epoch": 1.0946885031411044, + "grad_norm": 0.05479660630226135, + "learning_rate": 5.0463427758713234e-06, + "loss": 0.0013, + "step": 170680 + }, + { + "epoch": 1.0947526400348904, + "grad_norm": 0.10303059965372086, + "learning_rate": 5.04578309624901e-06, + "loss": 0.0024, + "step": 170690 + }, + { + "epoch": 1.0948167769286765, + "grad_norm": 0.06154974550008774, + "learning_rate": 5.045223416053e-06, + "loss": 0.0008, + "step": 170700 + }, + { + "epoch": 1.0948809138224627, + "grad_norm": 0.06780397891998291, + "learning_rate": 5.044663735290309e-06, + "loss": 0.0031, + "step": 170710 + }, + { + "epoch": 1.0949450507162488, + "grad_norm": 0.23820923268795013, + "learning_rate": 5.04410405396795e-06, + "loss": 0.0024, + "step": 170720 + }, + { + "epoch": 1.095009187610035, + "grad_norm": 0.1282614916563034, + "learning_rate": 5.0435443720929344e-06, + "loss": 0.0013, + "step": 170730 + }, + { + "epoch": 1.0950733245038209, + "grad_norm": 0.060902614146471024, + "learning_rate": 5.042984689672278e-06, + "loss": 0.0022, + "step": 170740 + }, + { + "epoch": 1.095137461397607, + "grad_norm": 0.21891924738883972, + "learning_rate": 5.042425006712992e-06, + "loss": 0.0014, + "step": 170750 + }, + { + "epoch": 1.0952015982913932, + "grad_norm": 0.014451575465500355, + "learning_rate": 5.041865323222089e-06, + "loss": 0.0004, + "step": 170760 + }, + { + "epoch": 1.0952657351851793, + "grad_norm": 0.044007159769535065, + "learning_rate": 5.041305639206585e-06, + "loss": 0.0006, + "step": 170770 + }, + { + "epoch": 1.0953298720789653, + "grad_norm": 0.0831480324268341, + "learning_rate": 5.040745954673492e-06, + "loss": 0.002, + "step": 170780 + }, + { + "epoch": 1.0953940089727514, + "grad_norm": 0.13365034759044647, + "learning_rate": 5.040186269629823e-06, + "loss": 0.0015, + "step": 170790 + }, + { + "epoch": 1.0954581458665376, + "grad_norm": 0.19277553260326385, + "learning_rate": 5.039626584082592e-06, + "loss": 0.0016, + "step": 170800 + }, + { + "epoch": 1.0955222827603237, + "grad_norm": 0.13959717750549316, + "learning_rate": 5.03906689803881e-06, + "loss": 0.0013, + "step": 170810 + }, + { + "epoch": 1.0955864196541096, + "grad_norm": 0.25731614232063293, + "learning_rate": 5.038507211505493e-06, + "loss": 0.0015, + "step": 170820 + }, + { + "epoch": 1.0956505565478958, + "grad_norm": 0.07411252707242966, + "learning_rate": 5.0379475244896525e-06, + "loss": 0.0025, + "step": 170830 + }, + { + "epoch": 1.095714693441682, + "grad_norm": 0.03390123322606087, + "learning_rate": 5.0373878369983035e-06, + "loss": 0.003, + "step": 170840 + }, + { + "epoch": 1.095778830335468, + "grad_norm": 0.036655180156230927, + "learning_rate": 5.036828149038456e-06, + "loss": 0.0019, + "step": 170850 + }, + { + "epoch": 1.0958429672292542, + "grad_norm": 0.022341804578900337, + "learning_rate": 5.036268460617128e-06, + "loss": 0.002, + "step": 170860 + }, + { + "epoch": 1.0959071041230402, + "grad_norm": 0.12349379062652588, + "learning_rate": 5.035708771741328e-06, + "loss": 0.0026, + "step": 170870 + }, + { + "epoch": 1.0959712410168263, + "grad_norm": 0.02770046703517437, + "learning_rate": 5.035149082418074e-06, + "loss": 0.001, + "step": 170880 + }, + { + "epoch": 1.0960353779106125, + "grad_norm": 0.10235182195901871, + "learning_rate": 5.034589392654374e-06, + "loss": 0.0016, + "step": 170890 + }, + { + "epoch": 1.0960995148043986, + "grad_norm": 0.0583653599023819, + "learning_rate": 5.034029702457246e-06, + "loss": 0.0013, + "step": 170900 + }, + { + "epoch": 1.0961636516981845, + "grad_norm": 0.09971941262483597, + "learning_rate": 5.033470011833701e-06, + "loss": 0.0006, + "step": 170910 + }, + { + "epoch": 1.0962277885919707, + "grad_norm": 0.16799309849739075, + "learning_rate": 5.0329103207907526e-06, + "loss": 0.0019, + "step": 170920 + }, + { + "epoch": 1.0962919254857568, + "grad_norm": 0.054120440036058426, + "learning_rate": 5.0323506293354156e-06, + "loss": 0.0013, + "step": 170930 + }, + { + "epoch": 1.096356062379543, + "grad_norm": 0.01602933183312416, + "learning_rate": 5.031790937474701e-06, + "loss": 0.0017, + "step": 170940 + }, + { + "epoch": 1.096420199273329, + "grad_norm": 0.03627896308898926, + "learning_rate": 5.031231245215623e-06, + "loss": 0.002, + "step": 170950 + }, + { + "epoch": 1.096484336167115, + "grad_norm": 0.11360101401805878, + "learning_rate": 5.030671552565196e-06, + "loss": 0.0011, + "step": 170960 + }, + { + "epoch": 1.0965484730609012, + "grad_norm": 0.1410820335149765, + "learning_rate": 5.030111859530432e-06, + "loss": 0.0021, + "step": 170970 + }, + { + "epoch": 1.0966126099546873, + "grad_norm": 0.15676705539226532, + "learning_rate": 5.029552166118345e-06, + "loss": 0.0016, + "step": 170980 + }, + { + "epoch": 1.0966767468484733, + "grad_norm": 0.03369128704071045, + "learning_rate": 5.028992472335949e-06, + "loss": 0.0019, + "step": 170990 + }, + { + "epoch": 1.0967408837422594, + "grad_norm": 0.05345707759261131, + "learning_rate": 5.0284327781902556e-06, + "loss": 0.0019, + "step": 171000 + }, + { + "epoch": 1.0968050206360456, + "grad_norm": 0.3105112314224243, + "learning_rate": 5.027873083688279e-06, + "loss": 0.0029, + "step": 171010 + }, + { + "epoch": 1.0968691575298317, + "grad_norm": 0.11570075154304504, + "learning_rate": 5.027313388837032e-06, + "loss": 0.0017, + "step": 171020 + }, + { + "epoch": 1.0969332944236179, + "grad_norm": 0.025331120938062668, + "learning_rate": 5.026753693643531e-06, + "loss": 0.0015, + "step": 171030 + }, + { + "epoch": 1.0969974313174038, + "grad_norm": 0.07241302728652954, + "learning_rate": 5.0261939981147855e-06, + "loss": 0.0012, + "step": 171040 + }, + { + "epoch": 1.09706156821119, + "grad_norm": 0.08699067682027817, + "learning_rate": 5.025634302257811e-06, + "loss": 0.0016, + "step": 171050 + }, + { + "epoch": 1.097125705104976, + "grad_norm": 0.04733594134449959, + "learning_rate": 5.025074606079621e-06, + "loss": 0.0016, + "step": 171060 + }, + { + "epoch": 1.0971898419987622, + "grad_norm": 0.22508540749549866, + "learning_rate": 5.024514909587228e-06, + "loss": 0.0018, + "step": 171070 + }, + { + "epoch": 1.0972539788925482, + "grad_norm": 0.09042717516422272, + "learning_rate": 5.023955212787644e-06, + "loss": 0.0011, + "step": 171080 + }, + { + "epoch": 1.0973181157863343, + "grad_norm": 0.2919725477695465, + "learning_rate": 5.0233955156878864e-06, + "loss": 0.0016, + "step": 171090 + }, + { + "epoch": 1.0973822526801205, + "grad_norm": 0.07168257981538773, + "learning_rate": 5.0228358182949645e-06, + "loss": 0.0014, + "step": 171100 + }, + { + "epoch": 1.0974463895739066, + "grad_norm": 0.10578005760908127, + "learning_rate": 5.0222761206158945e-06, + "loss": 0.0037, + "step": 171110 + }, + { + "epoch": 1.0975105264676928, + "grad_norm": 0.029910454526543617, + "learning_rate": 5.021716422657688e-06, + "loss": 0.0009, + "step": 171120 + }, + { + "epoch": 1.0975746633614787, + "grad_norm": 0.033341534435749054, + "learning_rate": 5.02115672442736e-06, + "loss": 0.0037, + "step": 171130 + }, + { + "epoch": 1.0976388002552648, + "grad_norm": 0.09771548211574554, + "learning_rate": 5.020597025931923e-06, + "loss": 0.0032, + "step": 171140 + }, + { + "epoch": 1.097702937149051, + "grad_norm": 0.03211095929145813, + "learning_rate": 5.02003732717839e-06, + "loss": 0.0015, + "step": 171150 + }, + { + "epoch": 1.0977670740428371, + "grad_norm": 0.04773737117648125, + "learning_rate": 5.019477628173775e-06, + "loss": 0.002, + "step": 171160 + }, + { + "epoch": 1.097831210936623, + "grad_norm": 0.03710533678531647, + "learning_rate": 5.018917928925092e-06, + "loss": 0.0013, + "step": 171170 + }, + { + "epoch": 1.0978953478304092, + "grad_norm": 0.6000286936759949, + "learning_rate": 5.018358229439354e-06, + "loss": 0.0025, + "step": 171180 + }, + { + "epoch": 1.0979594847241954, + "grad_norm": 0.013128839433193207, + "learning_rate": 5.017798529723573e-06, + "loss": 0.0015, + "step": 171190 + }, + { + "epoch": 1.0980236216179815, + "grad_norm": 0.01816055364906788, + "learning_rate": 5.017238829784766e-06, + "loss": 0.0015, + "step": 171200 + }, + { + "epoch": 1.0980877585117674, + "grad_norm": 0.0966353639960289, + "learning_rate": 5.016679129629942e-06, + "loss": 0.0016, + "step": 171210 + }, + { + "epoch": 1.0981518954055536, + "grad_norm": 0.18456101417541504, + "learning_rate": 5.016119429266118e-06, + "loss": 0.0019, + "step": 171220 + }, + { + "epoch": 1.0982160322993397, + "grad_norm": 0.07079457491636276, + "learning_rate": 5.015559728700304e-06, + "loss": 0.0029, + "step": 171230 + }, + { + "epoch": 1.0982801691931259, + "grad_norm": 0.0704764872789383, + "learning_rate": 5.015000027939518e-06, + "loss": 0.0018, + "step": 171240 + }, + { + "epoch": 1.0983443060869118, + "grad_norm": 0.14907720685005188, + "learning_rate": 5.014440326990769e-06, + "loss": 0.0025, + "step": 171250 + }, + { + "epoch": 1.098408442980698, + "grad_norm": 0.049817949533462524, + "learning_rate": 5.013880625861072e-06, + "loss": 0.0012, + "step": 171260 + }, + { + "epoch": 1.098472579874484, + "grad_norm": 0.08189506828784943, + "learning_rate": 5.013320924557443e-06, + "loss": 0.0026, + "step": 171270 + }, + { + "epoch": 1.0985367167682702, + "grad_norm": 0.13400381803512573, + "learning_rate": 5.012761223086891e-06, + "loss": 0.002, + "step": 171280 + }, + { + "epoch": 1.0986008536620564, + "grad_norm": 0.034556783735752106, + "learning_rate": 5.012201521456435e-06, + "loss": 0.0018, + "step": 171290 + }, + { + "epoch": 1.0986649905558423, + "grad_norm": 0.026275919750332832, + "learning_rate": 5.011641819673082e-06, + "loss": 0.0025, + "step": 171300 + }, + { + "epoch": 1.0987291274496285, + "grad_norm": 0.06027844920754433, + "learning_rate": 5.01108211774385e-06, + "loss": 0.001, + "step": 171310 + }, + { + "epoch": 1.0987932643434146, + "grad_norm": 0.07787550240755081, + "learning_rate": 5.010522415675751e-06, + "loss": 0.0012, + "step": 171320 + }, + { + "epoch": 1.0988574012372008, + "grad_norm": 0.10380320250988007, + "learning_rate": 5.009962713475799e-06, + "loss": 0.0009, + "step": 171330 + }, + { + "epoch": 1.0989215381309867, + "grad_norm": 0.20597901940345764, + "learning_rate": 5.009403011151006e-06, + "loss": 0.0038, + "step": 171340 + }, + { + "epoch": 1.0989856750247728, + "grad_norm": 0.05098361149430275, + "learning_rate": 5.008843308708389e-06, + "loss": 0.0012, + "step": 171350 + }, + { + "epoch": 1.099049811918559, + "grad_norm": 0.03223688155412674, + "learning_rate": 5.008283606154957e-06, + "loss": 0.0007, + "step": 171360 + }, + { + "epoch": 1.0991139488123451, + "grad_norm": 0.023974584415555, + "learning_rate": 5.007723903497725e-06, + "loss": 0.0015, + "step": 171370 + }, + { + "epoch": 1.099178085706131, + "grad_norm": 0.03929032385349274, + "learning_rate": 5.007164200743708e-06, + "loss": 0.0029, + "step": 171380 + }, + { + "epoch": 1.0992422225999172, + "grad_norm": 0.06479243189096451, + "learning_rate": 5.0066044978999185e-06, + "loss": 0.002, + "step": 171390 + }, + { + "epoch": 1.0993063594937034, + "grad_norm": 0.09911324083805084, + "learning_rate": 5.006044794973368e-06, + "loss": 0.0013, + "step": 171400 + }, + { + "epoch": 1.0993704963874895, + "grad_norm": 0.09658053517341614, + "learning_rate": 5.005485091971074e-06, + "loss": 0.0008, + "step": 171410 + }, + { + "epoch": 1.0994346332812754, + "grad_norm": 0.0684509128332138, + "learning_rate": 5.004925388900047e-06, + "loss": 0.0033, + "step": 171420 + }, + { + "epoch": 1.0994987701750616, + "grad_norm": 0.06243107095360756, + "learning_rate": 5.004365685767303e-06, + "loss": 0.0022, + "step": 171430 + }, + { + "epoch": 1.0995629070688477, + "grad_norm": 0.17232149839401245, + "learning_rate": 5.003805982579851e-06, + "loss": 0.0016, + "step": 171440 + }, + { + "epoch": 1.0996270439626339, + "grad_norm": 0.07372458279132843, + "learning_rate": 5.00324627934471e-06, + "loss": 0.0018, + "step": 171450 + }, + { + "epoch": 1.09969118085642, + "grad_norm": 0.04588346928358078, + "learning_rate": 5.002686576068889e-06, + "loss": 0.0022, + "step": 171460 + }, + { + "epoch": 1.099755317750206, + "grad_norm": 0.04140771925449371, + "learning_rate": 5.002126872759404e-06, + "loss": 0.0014, + "step": 171470 + }, + { + "epoch": 1.0998194546439921, + "grad_norm": 0.04907546937465668, + "learning_rate": 5.001567169423268e-06, + "loss": 0.0013, + "step": 171480 + }, + { + "epoch": 1.0998835915377783, + "grad_norm": 0.07015169411897659, + "learning_rate": 5.001007466067493e-06, + "loss": 0.0016, + "step": 171490 + }, + { + "epoch": 1.0999477284315644, + "grad_norm": 0.0467178113758564, + "learning_rate": 5.000447762699095e-06, + "loss": 0.0021, + "step": 171500 + }, + { + "epoch": 1.1000118653253503, + "grad_norm": 0.16069184243679047, + "learning_rate": 4.999888059325087e-06, + "loss": 0.0043, + "step": 171510 + }, + { + "epoch": 1.1000760022191365, + "grad_norm": 0.12354295700788498, + "learning_rate": 4.999328355952481e-06, + "loss": 0.0028, + "step": 171520 + }, + { + "epoch": 1.1001401391129226, + "grad_norm": 0.04084980860352516, + "learning_rate": 4.99876865258829e-06, + "loss": 0.0023, + "step": 171530 + }, + { + "epoch": 1.1002042760067088, + "grad_norm": 0.2395259439945221, + "learning_rate": 4.998208949239531e-06, + "loss": 0.0015, + "step": 171540 + }, + { + "epoch": 1.100268412900495, + "grad_norm": 0.14555861055850983, + "learning_rate": 4.997649245913213e-06, + "loss": 0.0015, + "step": 171550 + }, + { + "epoch": 1.1003325497942809, + "grad_norm": 0.008490769192576408, + "learning_rate": 4.9970895426163515e-06, + "loss": 0.0032, + "step": 171560 + }, + { + "epoch": 1.100396686688067, + "grad_norm": 0.11735638231039047, + "learning_rate": 4.996529839355961e-06, + "loss": 0.0016, + "step": 171570 + }, + { + "epoch": 1.1004608235818532, + "grad_norm": 0.058760784566402435, + "learning_rate": 4.995970136139054e-06, + "loss": 0.0009, + "step": 171580 + }, + { + "epoch": 1.1005249604756393, + "grad_norm": 0.2136850357055664, + "learning_rate": 4.995410432972643e-06, + "loss": 0.003, + "step": 171590 + }, + { + "epoch": 1.1005890973694252, + "grad_norm": 0.09907729923725128, + "learning_rate": 4.994850729863744e-06, + "loss": 0.0021, + "step": 171600 + }, + { + "epoch": 1.1006532342632114, + "grad_norm": 0.041656531393527985, + "learning_rate": 4.994291026819368e-06, + "loss": 0.0015, + "step": 171610 + }, + { + "epoch": 1.1007173711569975, + "grad_norm": 0.07977510988712311, + "learning_rate": 4.993731323846531e-06, + "loss": 0.0016, + "step": 171620 + }, + { + "epoch": 1.1007815080507837, + "grad_norm": 0.011695623397827148, + "learning_rate": 4.993171620952243e-06, + "loss": 0.0012, + "step": 171630 + }, + { + "epoch": 1.1008456449445696, + "grad_norm": 0.10950488597154617, + "learning_rate": 4.992611918143521e-06, + "loss": 0.001, + "step": 171640 + }, + { + "epoch": 1.1009097818383557, + "grad_norm": 0.06872362643480301, + "learning_rate": 4.992052215427376e-06, + "loss": 0.0034, + "step": 171650 + }, + { + "epoch": 1.100973918732142, + "grad_norm": 0.12201790511608124, + "learning_rate": 4.9914925128108235e-06, + "loss": 0.0017, + "step": 171660 + }, + { + "epoch": 1.101038055625928, + "grad_norm": 0.2046944946050644, + "learning_rate": 4.990932810300876e-06, + "loss": 0.0022, + "step": 171670 + }, + { + "epoch": 1.101102192519714, + "grad_norm": 0.18569254875183105, + "learning_rate": 4.9903731079045444e-06, + "loss": 0.0017, + "step": 171680 + }, + { + "epoch": 1.1011663294135001, + "grad_norm": 0.06764160841703415, + "learning_rate": 4.989813405628848e-06, + "loss": 0.0012, + "step": 171690 + }, + { + "epoch": 1.1012304663072863, + "grad_norm": 0.06488970667123795, + "learning_rate": 4.989253703480794e-06, + "loss": 0.0012, + "step": 171700 + }, + { + "epoch": 1.1012946032010724, + "grad_norm": 0.10774311423301697, + "learning_rate": 4.988694001467401e-06, + "loss": 0.0008, + "step": 171710 + }, + { + "epoch": 1.1013587400948586, + "grad_norm": 0.12963354587554932, + "learning_rate": 4.988134299595679e-06, + "loss": 0.0009, + "step": 171720 + }, + { + "epoch": 1.1014228769886445, + "grad_norm": 0.12337248772382736, + "learning_rate": 4.987574597872643e-06, + "loss": 0.0018, + "step": 171730 + }, + { + "epoch": 1.1014870138824306, + "grad_norm": 0.2637619972229004, + "learning_rate": 4.987014896305307e-06, + "loss": 0.002, + "step": 171740 + }, + { + "epoch": 1.1015511507762168, + "grad_norm": 0.13124048709869385, + "learning_rate": 4.986455194900683e-06, + "loss": 0.0016, + "step": 171750 + }, + { + "epoch": 1.101615287670003, + "grad_norm": 0.22929047048091888, + "learning_rate": 4.9858954936657845e-06, + "loss": 0.0022, + "step": 171760 + }, + { + "epoch": 1.1016794245637889, + "grad_norm": 0.06770718097686768, + "learning_rate": 4.9853357926076265e-06, + "loss": 0.0024, + "step": 171770 + }, + { + "epoch": 1.101743561457575, + "grad_norm": 0.1008705347776413, + "learning_rate": 4.98477609173322e-06, + "loss": 0.0032, + "step": 171780 + }, + { + "epoch": 1.1018076983513612, + "grad_norm": 0.10125420987606049, + "learning_rate": 4.984216391049582e-06, + "loss": 0.0019, + "step": 171790 + }, + { + "epoch": 1.1018718352451473, + "grad_norm": 0.0495884008705616, + "learning_rate": 4.983656690563722e-06, + "loss": 0.0034, + "step": 171800 + }, + { + "epoch": 1.1019359721389332, + "grad_norm": 0.006137746851891279, + "learning_rate": 4.983096990282657e-06, + "loss": 0.0012, + "step": 171810 + }, + { + "epoch": 1.1020001090327194, + "grad_norm": 0.039007000625133514, + "learning_rate": 4.982537290213398e-06, + "loss": 0.001, + "step": 171820 + }, + { + "epoch": 1.1020642459265055, + "grad_norm": 0.01688682846724987, + "learning_rate": 4.981977590362959e-06, + "loss": 0.0009, + "step": 171830 + }, + { + "epoch": 1.1021283828202917, + "grad_norm": 0.135686457157135, + "learning_rate": 4.981417890738354e-06, + "loss": 0.0037, + "step": 171840 + }, + { + "epoch": 1.1021925197140776, + "grad_norm": 0.0436604842543602, + "learning_rate": 4.980858191346595e-06, + "loss": 0.0015, + "step": 171850 + }, + { + "epoch": 1.1022566566078638, + "grad_norm": 0.1291503757238388, + "learning_rate": 4.980298492194699e-06, + "loss": 0.0018, + "step": 171860 + }, + { + "epoch": 1.10232079350165, + "grad_norm": 0.27027952671051025, + "learning_rate": 4.979738793289675e-06, + "loss": 0.0024, + "step": 171870 + }, + { + "epoch": 1.102384930395436, + "grad_norm": 0.014753194525837898, + "learning_rate": 4.9791790946385395e-06, + "loss": 0.0018, + "step": 171880 + }, + { + "epoch": 1.1024490672892222, + "grad_norm": 0.039702508598566055, + "learning_rate": 4.978619396248304e-06, + "loss": 0.0015, + "step": 171890 + }, + { + "epoch": 1.1025132041830081, + "grad_norm": 0.08132674545049667, + "learning_rate": 4.978059698125983e-06, + "loss": 0.0017, + "step": 171900 + }, + { + "epoch": 1.1025773410767943, + "grad_norm": 0.2327902913093567, + "learning_rate": 4.977500000278589e-06, + "loss": 0.0025, + "step": 171910 + }, + { + "epoch": 1.1026414779705804, + "grad_norm": 0.01180790364742279, + "learning_rate": 4.976940302713138e-06, + "loss": 0.0019, + "step": 171920 + }, + { + "epoch": 1.1027056148643666, + "grad_norm": 0.08049006760120392, + "learning_rate": 4.976380605436639e-06, + "loss": 0.0012, + "step": 171930 + }, + { + "epoch": 1.1027697517581525, + "grad_norm": 0.02945333532989025, + "learning_rate": 4.97582090845611e-06, + "loss": 0.0019, + "step": 171940 + }, + { + "epoch": 1.1028338886519387, + "grad_norm": 0.12358007580041885, + "learning_rate": 4.975261211778561e-06, + "loss": 0.0008, + "step": 171950 + }, + { + "epoch": 1.1028980255457248, + "grad_norm": 0.1592055857181549, + "learning_rate": 4.974701515411008e-06, + "loss": 0.0009, + "step": 171960 + }, + { + "epoch": 1.102962162439511, + "grad_norm": 0.22565963864326477, + "learning_rate": 4.974141819360461e-06, + "loss": 0.001, + "step": 171970 + }, + { + "epoch": 1.103026299333297, + "grad_norm": 0.2617228627204895, + "learning_rate": 4.973582123633936e-06, + "loss": 0.0016, + "step": 171980 + }, + { + "epoch": 1.103090436227083, + "grad_norm": 0.01660790480673313, + "learning_rate": 4.9730224282384475e-06, + "loss": 0.0014, + "step": 171990 + }, + { + "epoch": 1.1031545731208692, + "grad_norm": 0.009054155088961124, + "learning_rate": 4.972462733181006e-06, + "loss": 0.0014, + "step": 172000 + }, + { + "epoch": 1.1032187100146553, + "grad_norm": 0.09953287988901138, + "learning_rate": 4.971903038468627e-06, + "loss": 0.0018, + "step": 172010 + }, + { + "epoch": 1.1032828469084415, + "grad_norm": 0.0722479522228241, + "learning_rate": 4.971343344108322e-06, + "loss": 0.0014, + "step": 172020 + }, + { + "epoch": 1.1033469838022274, + "grad_norm": 0.11470306664705276, + "learning_rate": 4.970783650107107e-06, + "loss": 0.0014, + "step": 172030 + }, + { + "epoch": 1.1034111206960135, + "grad_norm": 0.03702601417899132, + "learning_rate": 4.970223956471992e-06, + "loss": 0.0013, + "step": 172040 + }, + { + "epoch": 1.1034752575897997, + "grad_norm": 0.17161774635314941, + "learning_rate": 4.969664263209993e-06, + "loss": 0.0026, + "step": 172050 + }, + { + "epoch": 1.1035393944835858, + "grad_norm": 0.12393510341644287, + "learning_rate": 4.969104570328122e-06, + "loss": 0.0011, + "step": 172060 + }, + { + "epoch": 1.1036035313773718, + "grad_norm": 0.09345302730798721, + "learning_rate": 4.968544877833393e-06, + "loss": 0.0024, + "step": 172070 + }, + { + "epoch": 1.103667668271158, + "grad_norm": 0.2323468178510666, + "learning_rate": 4.967985185732819e-06, + "loss": 0.0017, + "step": 172080 + }, + { + "epoch": 1.103731805164944, + "grad_norm": 0.06289396435022354, + "learning_rate": 4.967425494033414e-06, + "loss": 0.0011, + "step": 172090 + }, + { + "epoch": 1.1037959420587302, + "grad_norm": 0.03909099102020264, + "learning_rate": 4.96686580274219e-06, + "loss": 0.0041, + "step": 172100 + }, + { + "epoch": 1.1038600789525161, + "grad_norm": 0.1267758309841156, + "learning_rate": 4.9663061118661615e-06, + "loss": 0.0013, + "step": 172110 + }, + { + "epoch": 1.1039242158463023, + "grad_norm": 0.07649413496255875, + "learning_rate": 4.965746421412342e-06, + "loss": 0.0025, + "step": 172120 + }, + { + "epoch": 1.1039883527400884, + "grad_norm": 0.019668733701109886, + "learning_rate": 4.9651867313877435e-06, + "loss": 0.0014, + "step": 172130 + }, + { + "epoch": 1.1040524896338746, + "grad_norm": 0.11278221011161804, + "learning_rate": 4.9646270417993814e-06, + "loss": 0.0012, + "step": 172140 + }, + { + "epoch": 1.1041166265276607, + "grad_norm": 0.20489659905433655, + "learning_rate": 4.964067352654267e-06, + "loss": 0.0014, + "step": 172150 + }, + { + "epoch": 1.1041807634214467, + "grad_norm": 0.06713900715112686, + "learning_rate": 4.963507663959414e-06, + "loss": 0.0023, + "step": 172160 + }, + { + "epoch": 1.1042449003152328, + "grad_norm": 0.12010787427425385, + "learning_rate": 4.962947975721836e-06, + "loss": 0.0024, + "step": 172170 + }, + { + "epoch": 1.104309037209019, + "grad_norm": 0.10179290920495987, + "learning_rate": 4.962388287948548e-06, + "loss": 0.0009, + "step": 172180 + }, + { + "epoch": 1.104373174102805, + "grad_norm": 0.07030438631772995, + "learning_rate": 4.961828600646559e-06, + "loss": 0.0013, + "step": 172190 + }, + { + "epoch": 1.104437310996591, + "grad_norm": 0.1282183825969696, + "learning_rate": 4.961268913822887e-06, + "loss": 0.0008, + "step": 172200 + }, + { + "epoch": 1.1045014478903772, + "grad_norm": 0.18232722580432892, + "learning_rate": 4.960709227484542e-06, + "loss": 0.0013, + "step": 172210 + }, + { + "epoch": 1.1045655847841633, + "grad_norm": 0.00621228851377964, + "learning_rate": 4.960149541638539e-06, + "loss": 0.0013, + "step": 172220 + }, + { + "epoch": 1.1046297216779495, + "grad_norm": 0.034846991300582886, + "learning_rate": 4.9595898562918905e-06, + "loss": 0.0014, + "step": 172230 + }, + { + "epoch": 1.1046938585717354, + "grad_norm": 0.2832137942314148, + "learning_rate": 4.959030171451611e-06, + "loss": 0.0014, + "step": 172240 + }, + { + "epoch": 1.1047579954655216, + "grad_norm": 0.14668747782707214, + "learning_rate": 4.9584704871247114e-06, + "loss": 0.0015, + "step": 172250 + }, + { + "epoch": 1.1048221323593077, + "grad_norm": 0.08905400335788727, + "learning_rate": 4.9579108033182065e-06, + "loss": 0.0022, + "step": 172260 + }, + { + "epoch": 1.1048862692530939, + "grad_norm": 0.09415697306394577, + "learning_rate": 4.957351120039109e-06, + "loss": 0.0021, + "step": 172270 + }, + { + "epoch": 1.10495040614688, + "grad_norm": 0.15120045840740204, + "learning_rate": 4.956791437294433e-06, + "loss": 0.0026, + "step": 172280 + }, + { + "epoch": 1.105014543040666, + "grad_norm": 0.06795934587717056, + "learning_rate": 4.956231755091192e-06, + "loss": 0.0015, + "step": 172290 + }, + { + "epoch": 1.105078679934452, + "grad_norm": 0.09866806119680405, + "learning_rate": 4.955672073436397e-06, + "loss": 0.0013, + "step": 172300 + }, + { + "epoch": 1.1051428168282382, + "grad_norm": 0.06745009869337082, + "learning_rate": 4.9551123923370635e-06, + "loss": 0.0014, + "step": 172310 + }, + { + "epoch": 1.1052069537220244, + "grad_norm": 0.09626159071922302, + "learning_rate": 4.954552711800203e-06, + "loss": 0.0021, + "step": 172320 + }, + { + "epoch": 1.1052710906158103, + "grad_norm": 0.021619094535708427, + "learning_rate": 4.953993031832831e-06, + "loss": 0.0015, + "step": 172330 + }, + { + "epoch": 1.1053352275095965, + "grad_norm": 0.045136865228414536, + "learning_rate": 4.953433352441957e-06, + "loss": 0.0018, + "step": 172340 + }, + { + "epoch": 1.1053993644033826, + "grad_norm": 0.1451915055513382, + "learning_rate": 4.952873673634598e-06, + "loss": 0.0022, + "step": 172350 + }, + { + "epoch": 1.1054635012971687, + "grad_norm": 0.07904060184955597, + "learning_rate": 4.952313995417764e-06, + "loss": 0.0012, + "step": 172360 + }, + { + "epoch": 1.1055276381909547, + "grad_norm": 0.07658032327890396, + "learning_rate": 4.95175431779847e-06, + "loss": 0.0019, + "step": 172370 + }, + { + "epoch": 1.1055917750847408, + "grad_norm": 0.02343565784394741, + "learning_rate": 4.951194640783729e-06, + "loss": 0.0009, + "step": 172380 + }, + { + "epoch": 1.105655911978527, + "grad_norm": 0.018991762772202492, + "learning_rate": 4.950634964380556e-06, + "loss": 0.0017, + "step": 172390 + }, + { + "epoch": 1.1057200488723131, + "grad_norm": 0.054553721100091934, + "learning_rate": 4.95007528859596e-06, + "loss": 0.0015, + "step": 172400 + }, + { + "epoch": 1.1057841857660993, + "grad_norm": 0.03292379528284073, + "learning_rate": 4.949515613436958e-06, + "loss": 0.0023, + "step": 172410 + }, + { + "epoch": 1.1058483226598852, + "grad_norm": 0.06313908100128174, + "learning_rate": 4.94895593891056e-06, + "loss": 0.0019, + "step": 172420 + }, + { + "epoch": 1.1059124595536713, + "grad_norm": 0.21744129061698914, + "learning_rate": 4.9483962650237815e-06, + "loss": 0.0032, + "step": 172430 + }, + { + "epoch": 1.1059765964474575, + "grad_norm": 0.09825435280799866, + "learning_rate": 4.947836591783634e-06, + "loss": 0.0009, + "step": 172440 + }, + { + "epoch": 1.1060407333412436, + "grad_norm": 0.1280527561903, + "learning_rate": 4.947276919197132e-06, + "loss": 0.0014, + "step": 172450 + }, + { + "epoch": 1.1061048702350296, + "grad_norm": 0.08048540353775024, + "learning_rate": 4.946717247271287e-06, + "loss": 0.0046, + "step": 172460 + }, + { + "epoch": 1.1061690071288157, + "grad_norm": 0.16083328425884247, + "learning_rate": 4.946157576013114e-06, + "loss": 0.0022, + "step": 172470 + }, + { + "epoch": 1.1062331440226019, + "grad_norm": 0.0801376923918724, + "learning_rate": 4.9455979054296245e-06, + "loss": 0.0029, + "step": 172480 + }, + { + "epoch": 1.106297280916388, + "grad_norm": 0.04717901721596718, + "learning_rate": 4.945038235527833e-06, + "loss": 0.0008, + "step": 172490 + }, + { + "epoch": 1.106361417810174, + "grad_norm": 0.026985157281160355, + "learning_rate": 4.94447856631475e-06, + "loss": 0.0016, + "step": 172500 + }, + { + "epoch": 1.10642555470396, + "grad_norm": 0.26381930708885193, + "learning_rate": 4.943918897797391e-06, + "loss": 0.0018, + "step": 172510 + }, + { + "epoch": 1.1064896915977462, + "grad_norm": 0.16577814519405365, + "learning_rate": 4.9433592299827685e-06, + "loss": 0.0015, + "step": 172520 + }, + { + "epoch": 1.1065538284915324, + "grad_norm": 0.037880945950746536, + "learning_rate": 4.942799562877895e-06, + "loss": 0.0009, + "step": 172530 + }, + { + "epoch": 1.1066179653853183, + "grad_norm": 0.23803481459617615, + "learning_rate": 4.942239896489785e-06, + "loss": 0.0016, + "step": 172540 + }, + { + "epoch": 1.1066821022791045, + "grad_norm": 0.041799016296863556, + "learning_rate": 4.941680230825449e-06, + "loss": 0.0022, + "step": 172550 + }, + { + "epoch": 1.1067462391728906, + "grad_norm": 0.04286804422736168, + "learning_rate": 4.941120565891902e-06, + "loss": 0.0016, + "step": 172560 + }, + { + "epoch": 1.1068103760666768, + "grad_norm": 0.49322232604026794, + "learning_rate": 4.940560901696156e-06, + "loss": 0.0024, + "step": 172570 + }, + { + "epoch": 1.106874512960463, + "grad_norm": 0.08396865427494049, + "learning_rate": 4.940001238245225e-06, + "loss": 0.0013, + "step": 172580 + }, + { + "epoch": 1.1069386498542488, + "grad_norm": 0.1122252568602562, + "learning_rate": 4.9394415755461205e-06, + "loss": 0.0022, + "step": 172590 + }, + { + "epoch": 1.107002786748035, + "grad_norm": 0.01022116094827652, + "learning_rate": 4.9388819136058575e-06, + "loss": 0.0012, + "step": 172600 + }, + { + "epoch": 1.1070669236418211, + "grad_norm": 0.04051017016172409, + "learning_rate": 4.938322252431447e-06, + "loss": 0.0021, + "step": 172610 + }, + { + "epoch": 1.1071310605356073, + "grad_norm": 0.05071176216006279, + "learning_rate": 4.937762592029903e-06, + "loss": 0.0012, + "step": 172620 + }, + { + "epoch": 1.1071951974293932, + "grad_norm": 0.1994679868221283, + "learning_rate": 4.937202932408238e-06, + "loss": 0.0023, + "step": 172630 + }, + { + "epoch": 1.1072593343231794, + "grad_norm": 0.15775303542613983, + "learning_rate": 4.936643273573466e-06, + "loss": 0.0027, + "step": 172640 + }, + { + "epoch": 1.1073234712169655, + "grad_norm": 0.3187548518180847, + "learning_rate": 4.936083615532598e-06, + "loss": 0.0016, + "step": 172650 + }, + { + "epoch": 1.1073876081107517, + "grad_norm": 0.0747041329741478, + "learning_rate": 4.9355239582926485e-06, + "loss": 0.0016, + "step": 172660 + }, + { + "epoch": 1.1074517450045378, + "grad_norm": 0.059891946613788605, + "learning_rate": 4.934964301860629e-06, + "loss": 0.0012, + "step": 172670 + }, + { + "epoch": 1.1075158818983237, + "grad_norm": 0.06337383389472961, + "learning_rate": 4.934404646243553e-06, + "loss": 0.0024, + "step": 172680 + }, + { + "epoch": 1.1075800187921099, + "grad_norm": 0.1260882169008255, + "learning_rate": 4.933844991448436e-06, + "loss": 0.0023, + "step": 172690 + }, + { + "epoch": 1.107644155685896, + "grad_norm": 0.03950232267379761, + "learning_rate": 4.9332853374822865e-06, + "loss": 0.0009, + "step": 172700 + }, + { + "epoch": 1.1077082925796822, + "grad_norm": 0.005105361342430115, + "learning_rate": 4.932725684352121e-06, + "loss": 0.0015, + "step": 172710 + }, + { + "epoch": 1.107772429473468, + "grad_norm": 0.1591700166463852, + "learning_rate": 4.932166032064949e-06, + "loss": 0.0019, + "step": 172720 + }, + { + "epoch": 1.1078365663672542, + "grad_norm": 0.04399140551686287, + "learning_rate": 4.931606380627786e-06, + "loss": 0.0016, + "step": 172730 + }, + { + "epoch": 1.1079007032610404, + "grad_norm": 0.2605571448802948, + "learning_rate": 4.931046730047644e-06, + "loss": 0.0015, + "step": 172740 + }, + { + "epoch": 1.1079648401548265, + "grad_norm": 0.0853683203458786, + "learning_rate": 4.930487080331536e-06, + "loss": 0.0022, + "step": 172750 + }, + { + "epoch": 1.1080289770486125, + "grad_norm": 0.2431391328573227, + "learning_rate": 4.929927431486474e-06, + "loss": 0.0012, + "step": 172760 + }, + { + "epoch": 1.1080931139423986, + "grad_norm": 0.06449903547763824, + "learning_rate": 4.929367783519471e-06, + "loss": 0.0019, + "step": 172770 + }, + { + "epoch": 1.1081572508361848, + "grad_norm": 0.0941937267780304, + "learning_rate": 4.928808136437541e-06, + "loss": 0.0023, + "step": 172780 + }, + { + "epoch": 1.108221387729971, + "grad_norm": 0.019459959119558334, + "learning_rate": 4.928248490247696e-06, + "loss": 0.0012, + "step": 172790 + }, + { + "epoch": 1.1082855246237568, + "grad_norm": 0.15396666526794434, + "learning_rate": 4.927688844956947e-06, + "loss": 0.0022, + "step": 172800 + }, + { + "epoch": 1.108349661517543, + "grad_norm": 0.0548369325697422, + "learning_rate": 4.927129200572311e-06, + "loss": 0.0007, + "step": 172810 + }, + { + "epoch": 1.1084137984113291, + "grad_norm": 0.014471418224275112, + "learning_rate": 4.926569557100797e-06, + "loss": 0.0106, + "step": 172820 + }, + { + "epoch": 1.1084779353051153, + "grad_norm": 0.09189534187316895, + "learning_rate": 4.926009914549418e-06, + "loss": 0.0016, + "step": 172830 + }, + { + "epoch": 1.1085420721989014, + "grad_norm": 0.04406943917274475, + "learning_rate": 4.925450272925189e-06, + "loss": 0.0012, + "step": 172840 + }, + { + "epoch": 1.1086062090926874, + "grad_norm": 0.12239224463701248, + "learning_rate": 4.92489063223512e-06, + "loss": 0.0023, + "step": 172850 + }, + { + "epoch": 1.1086703459864735, + "grad_norm": 0.12262288480997086, + "learning_rate": 4.924330992486227e-06, + "loss": 0.0015, + "step": 172860 + }, + { + "epoch": 1.1087344828802597, + "grad_norm": 0.05069934204220772, + "learning_rate": 4.923771353685519e-06, + "loss": 0.0013, + "step": 172870 + }, + { + "epoch": 1.1087986197740458, + "grad_norm": 0.5160471200942993, + "learning_rate": 4.923211715840012e-06, + "loss": 0.0013, + "step": 172880 + }, + { + "epoch": 1.1088627566678317, + "grad_norm": 0.07519219815731049, + "learning_rate": 4.922652078956716e-06, + "loss": 0.001, + "step": 172890 + }, + { + "epoch": 1.1089268935616179, + "grad_norm": 0.24941521883010864, + "learning_rate": 4.922092443042645e-06, + "loss": 0.003, + "step": 172900 + }, + { + "epoch": 1.108991030455404, + "grad_norm": 0.34581393003463745, + "learning_rate": 4.921532808104811e-06, + "loss": 0.002, + "step": 172910 + }, + { + "epoch": 1.1090551673491902, + "grad_norm": 0.13571126759052277, + "learning_rate": 4.9209731741502275e-06, + "loss": 0.001, + "step": 172920 + }, + { + "epoch": 1.1091193042429761, + "grad_norm": 0.020635267719626427, + "learning_rate": 4.920413541185906e-06, + "loss": 0.0016, + "step": 172930 + }, + { + "epoch": 1.1091834411367623, + "grad_norm": 0.08942442387342453, + "learning_rate": 4.919853909218861e-06, + "loss": 0.0014, + "step": 172940 + }, + { + "epoch": 1.1092475780305484, + "grad_norm": 0.05175596475601196, + "learning_rate": 4.919294278256102e-06, + "loss": 0.0014, + "step": 172950 + }, + { + "epoch": 1.1093117149243346, + "grad_norm": 0.10908970981836319, + "learning_rate": 4.918734648304644e-06, + "loss": 0.0016, + "step": 172960 + }, + { + "epoch": 1.1093758518181205, + "grad_norm": 0.4310110807418823, + "learning_rate": 4.9181750193715e-06, + "loss": 0.0037, + "step": 172970 + }, + { + "epoch": 1.1094399887119066, + "grad_norm": 0.07702956348657608, + "learning_rate": 4.91761539146368e-06, + "loss": 0.003, + "step": 172980 + }, + { + "epoch": 1.1095041256056928, + "grad_norm": 0.10723809152841568, + "learning_rate": 4.917055764588199e-06, + "loss": 0.0016, + "step": 172990 + }, + { + "epoch": 1.109568262499479, + "grad_norm": 0.01755482703447342, + "learning_rate": 4.916496138752068e-06, + "loss": 0.0027, + "step": 173000 + }, + { + "epoch": 1.109632399393265, + "grad_norm": 0.07348005473613739, + "learning_rate": 4.915936513962301e-06, + "loss": 0.0018, + "step": 173010 + }, + { + "epoch": 1.109696536287051, + "grad_norm": 0.06056353077292442, + "learning_rate": 4.915376890225909e-06, + "loss": 0.002, + "step": 173020 + }, + { + "epoch": 1.1097606731808372, + "grad_norm": 0.011018101125955582, + "learning_rate": 4.914817267549905e-06, + "loss": 0.0011, + "step": 173030 + }, + { + "epoch": 1.1098248100746233, + "grad_norm": 0.04618438333272934, + "learning_rate": 4.914257645941301e-06, + "loss": 0.0009, + "step": 173040 + }, + { + "epoch": 1.1098889469684095, + "grad_norm": 0.09414304047822952, + "learning_rate": 4.913698025407111e-06, + "loss": 0.0018, + "step": 173050 + }, + { + "epoch": 1.1099530838621954, + "grad_norm": 0.042383674532175064, + "learning_rate": 4.913138405954345e-06, + "loss": 0.0012, + "step": 173060 + }, + { + "epoch": 1.1100172207559815, + "grad_norm": 0.04793866351246834, + "learning_rate": 4.912578787590018e-06, + "loss": 0.0008, + "step": 173070 + }, + { + "epoch": 1.1100813576497677, + "grad_norm": 0.03588831424713135, + "learning_rate": 4.912019170321141e-06, + "loss": 0.0006, + "step": 173080 + }, + { + "epoch": 1.1101454945435538, + "grad_norm": 0.1358732283115387, + "learning_rate": 4.9114595541547274e-06, + "loss": 0.0023, + "step": 173090 + }, + { + "epoch": 1.11020963143734, + "grad_norm": 0.10910496860742569, + "learning_rate": 4.910899939097787e-06, + "loss": 0.0013, + "step": 173100 + }, + { + "epoch": 1.110273768331126, + "grad_norm": 0.06244830787181854, + "learning_rate": 4.910340325157335e-06, + "loss": 0.0019, + "step": 173110 + }, + { + "epoch": 1.110337905224912, + "grad_norm": 0.06858513504266739, + "learning_rate": 4.909780712340381e-06, + "loss": 0.0009, + "step": 173120 + }, + { + "epoch": 1.1104020421186982, + "grad_norm": 0.23532763123512268, + "learning_rate": 4.909221100653942e-06, + "loss": 0.0029, + "step": 173130 + }, + { + "epoch": 1.1104661790124843, + "grad_norm": 0.01755049265921116, + "learning_rate": 4.908661490105026e-06, + "loss": 0.0019, + "step": 173140 + }, + { + "epoch": 1.1105303159062703, + "grad_norm": 0.1809210330247879, + "learning_rate": 4.908101880700648e-06, + "loss": 0.0019, + "step": 173150 + }, + { + "epoch": 1.1105944528000564, + "grad_norm": 0.1378970891237259, + "learning_rate": 4.907542272447819e-06, + "loss": 0.0026, + "step": 173160 + }, + { + "epoch": 1.1106585896938426, + "grad_norm": 0.1288081407546997, + "learning_rate": 4.906982665353551e-06, + "loss": 0.001, + "step": 173170 + }, + { + "epoch": 1.1107227265876287, + "grad_norm": 0.22905784845352173, + "learning_rate": 4.906423059424856e-06, + "loss": 0.0014, + "step": 173180 + }, + { + "epoch": 1.1107868634814146, + "grad_norm": 0.17911554872989655, + "learning_rate": 4.9058634546687474e-06, + "loss": 0.003, + "step": 173190 + }, + { + "epoch": 1.1108510003752008, + "grad_norm": 0.34671086072921753, + "learning_rate": 4.905303851092237e-06, + "loss": 0.0021, + "step": 173200 + }, + { + "epoch": 1.110915137268987, + "grad_norm": 0.07665128260850906, + "learning_rate": 4.904744248702338e-06, + "loss": 0.001, + "step": 173210 + }, + { + "epoch": 1.110979274162773, + "grad_norm": 0.20960409939289093, + "learning_rate": 4.90418464750606e-06, + "loss": 0.001, + "step": 173220 + }, + { + "epoch": 1.111043411056559, + "grad_norm": 0.1022791862487793, + "learning_rate": 4.903625047510418e-06, + "loss": 0.0009, + "step": 173230 + }, + { + "epoch": 1.1111075479503452, + "grad_norm": 0.03949712589383125, + "learning_rate": 4.903065448722423e-06, + "loss": 0.0017, + "step": 173240 + }, + { + "epoch": 1.1111716848441313, + "grad_norm": 0.10221009701490402, + "learning_rate": 4.902505851149086e-06, + "loss": 0.0015, + "step": 173250 + }, + { + "epoch": 1.1112358217379175, + "grad_norm": 0.09457410126924515, + "learning_rate": 4.901946254797422e-06, + "loss": 0.0008, + "step": 173260 + }, + { + "epoch": 1.1112999586317036, + "grad_norm": 0.11744176596403122, + "learning_rate": 4.90138665967444e-06, + "loss": 0.0011, + "step": 173270 + }, + { + "epoch": 1.1113640955254895, + "grad_norm": 0.16001646220684052, + "learning_rate": 4.9008270657871545e-06, + "loss": 0.0015, + "step": 173280 + }, + { + "epoch": 1.1114282324192757, + "grad_norm": 0.10524749755859375, + "learning_rate": 4.900267473142576e-06, + "loss": 0.0025, + "step": 173290 + }, + { + "epoch": 1.1114923693130618, + "grad_norm": 0.26658308506011963, + "learning_rate": 4.899707881747719e-06, + "loss": 0.0021, + "step": 173300 + }, + { + "epoch": 1.111556506206848, + "grad_norm": 0.057558316737413406, + "learning_rate": 4.899148291609593e-06, + "loss": 0.001, + "step": 173310 + }, + { + "epoch": 1.111620643100634, + "grad_norm": 0.04493267089128494, + "learning_rate": 4.898588702735211e-06, + "loss": 0.002, + "step": 173320 + }, + { + "epoch": 1.11168477999442, + "grad_norm": 0.0999845489859581, + "learning_rate": 4.898029115131584e-06, + "loss": 0.002, + "step": 173330 + }, + { + "epoch": 1.1117489168882062, + "grad_norm": 0.0713239312171936, + "learning_rate": 4.897469528805727e-06, + "loss": 0.0021, + "step": 173340 + }, + { + "epoch": 1.1118130537819924, + "grad_norm": 0.07967229932546616, + "learning_rate": 4.896909943764649e-06, + "loss": 0.001, + "step": 173350 + }, + { + "epoch": 1.1118771906757783, + "grad_norm": 0.1508815437555313, + "learning_rate": 4.896350360015363e-06, + "loss": 0.002, + "step": 173360 + }, + { + "epoch": 1.1119413275695644, + "grad_norm": 0.12152405083179474, + "learning_rate": 4.895790777564881e-06, + "loss": 0.0012, + "step": 173370 + }, + { + "epoch": 1.1120054644633506, + "grad_norm": 0.053805720061063766, + "learning_rate": 4.895231196420215e-06, + "loss": 0.0013, + "step": 173380 + }, + { + "epoch": 1.1120696013571367, + "grad_norm": 0.05876936763525009, + "learning_rate": 4.894671616588377e-06, + "loss": 0.0009, + "step": 173390 + }, + { + "epoch": 1.1121337382509227, + "grad_norm": 0.10661150515079498, + "learning_rate": 4.894112038076379e-06, + "loss": 0.001, + "step": 173400 + }, + { + "epoch": 1.1121978751447088, + "grad_norm": 0.06447902321815491, + "learning_rate": 4.893552460891234e-06, + "loss": 0.0015, + "step": 173410 + }, + { + "epoch": 1.112262012038495, + "grad_norm": 0.01974887028336525, + "learning_rate": 4.892992885039951e-06, + "loss": 0.0012, + "step": 173420 + }, + { + "epoch": 1.112326148932281, + "grad_norm": 0.20772825181484222, + "learning_rate": 4.892433310529545e-06, + "loss": 0.0024, + "step": 173430 + }, + { + "epoch": 1.1123902858260672, + "grad_norm": 0.029797615483403206, + "learning_rate": 4.891873737367025e-06, + "loss": 0.0014, + "step": 173440 + }, + { + "epoch": 1.1124544227198532, + "grad_norm": 0.3723593056201935, + "learning_rate": 4.891314165559406e-06, + "loss": 0.003, + "step": 173450 + }, + { + "epoch": 1.1125185596136393, + "grad_norm": 0.06410173326730728, + "learning_rate": 4.890754595113697e-06, + "loss": 0.0022, + "step": 173460 + }, + { + "epoch": 1.1125826965074255, + "grad_norm": 0.04267742112278938, + "learning_rate": 4.890195026036912e-06, + "loss": 0.0015, + "step": 173470 + }, + { + "epoch": 1.1126468334012116, + "grad_norm": 0.18614718317985535, + "learning_rate": 4.8896354583360606e-06, + "loss": 0.0015, + "step": 173480 + }, + { + "epoch": 1.1127109702949975, + "grad_norm": 0.06482775509357452, + "learning_rate": 4.889075892018157e-06, + "loss": 0.0029, + "step": 173490 + }, + { + "epoch": 1.1127751071887837, + "grad_norm": 0.049155496060848236, + "learning_rate": 4.88851632709021e-06, + "loss": 0.0012, + "step": 173500 + }, + { + "epoch": 1.1128392440825698, + "grad_norm": 0.08097552508115768, + "learning_rate": 4.887956763559235e-06, + "loss": 0.0021, + "step": 173510 + }, + { + "epoch": 1.112903380976356, + "grad_norm": 0.12890399992465973, + "learning_rate": 4.88739720143224e-06, + "loss": 0.0035, + "step": 173520 + }, + { + "epoch": 1.1129675178701421, + "grad_norm": 0.04127669334411621, + "learning_rate": 4.88683764071624e-06, + "loss": 0.0014, + "step": 173530 + }, + { + "epoch": 1.113031654763928, + "grad_norm": 0.060111287981271744, + "learning_rate": 4.886278081418245e-06, + "loss": 0.0033, + "step": 173540 + }, + { + "epoch": 1.1130957916577142, + "grad_norm": 0.10613421350717545, + "learning_rate": 4.8857185235452665e-06, + "loss": 0.001, + "step": 173550 + }, + { + "epoch": 1.1131599285515004, + "grad_norm": 0.2460632473230362, + "learning_rate": 4.885158967104317e-06, + "loss": 0.0017, + "step": 173560 + }, + { + "epoch": 1.1132240654452865, + "grad_norm": 0.05923857167363167, + "learning_rate": 4.884599412102408e-06, + "loss": 0.0017, + "step": 173570 + }, + { + "epoch": 1.1132882023390724, + "grad_norm": 0.05139973387122154, + "learning_rate": 4.88403985854655e-06, + "loss": 0.0018, + "step": 173580 + }, + { + "epoch": 1.1133523392328586, + "grad_norm": 0.13422226905822754, + "learning_rate": 4.883480306443756e-06, + "loss": 0.0029, + "step": 173590 + }, + { + "epoch": 1.1134164761266447, + "grad_norm": 0.030855519697070122, + "learning_rate": 4.882920755801037e-06, + "loss": 0.0014, + "step": 173600 + }, + { + "epoch": 1.1134806130204309, + "grad_norm": 0.06292476505041122, + "learning_rate": 4.8823612066254035e-06, + "loss": 0.0088, + "step": 173610 + }, + { + "epoch": 1.1135447499142168, + "grad_norm": 0.2309848517179489, + "learning_rate": 4.88180165892387e-06, + "loss": 0.0046, + "step": 173620 + }, + { + "epoch": 1.113608886808003, + "grad_norm": 0.028664829209446907, + "learning_rate": 4.881242112703445e-06, + "loss": 0.0015, + "step": 173630 + }, + { + "epoch": 1.113673023701789, + "grad_norm": 0.18330270051956177, + "learning_rate": 4.8806825679711424e-06, + "loss": 0.0018, + "step": 173640 + }, + { + "epoch": 1.1137371605955753, + "grad_norm": 0.10287895798683167, + "learning_rate": 4.880123024733971e-06, + "loss": 0.0011, + "step": 173650 + }, + { + "epoch": 1.1138012974893612, + "grad_norm": 0.0056119938381016254, + "learning_rate": 4.879563482998945e-06, + "loss": 0.0028, + "step": 173660 + }, + { + "epoch": 1.1138654343831473, + "grad_norm": 0.029498474672436714, + "learning_rate": 4.879003942773074e-06, + "loss": 0.0021, + "step": 173670 + }, + { + "epoch": 1.1139295712769335, + "grad_norm": 0.3677642345428467, + "learning_rate": 4.8784444040633685e-06, + "loss": 0.0021, + "step": 173680 + }, + { + "epoch": 1.1139937081707196, + "grad_norm": 0.06543778628110886, + "learning_rate": 4.8778848668768446e-06, + "loss": 0.0015, + "step": 173690 + }, + { + "epoch": 1.1140578450645058, + "grad_norm": 0.13932010531425476, + "learning_rate": 4.8773253312205085e-06, + "loss": 0.0024, + "step": 173700 + }, + { + "epoch": 1.1141219819582917, + "grad_norm": 0.1845843493938446, + "learning_rate": 4.8767657971013746e-06, + "loss": 0.002, + "step": 173710 + }, + { + "epoch": 1.1141861188520779, + "grad_norm": 0.05792969465255737, + "learning_rate": 4.876206264526452e-06, + "loss": 0.0011, + "step": 173720 + }, + { + "epoch": 1.114250255745864, + "grad_norm": 0.3107650876045227, + "learning_rate": 4.875646733502755e-06, + "loss": 0.0029, + "step": 173730 + }, + { + "epoch": 1.1143143926396502, + "grad_norm": 0.008603124879300594, + "learning_rate": 4.875087204037292e-06, + "loss": 0.0023, + "step": 173740 + }, + { + "epoch": 1.114378529533436, + "grad_norm": 0.07859355211257935, + "learning_rate": 4.8745276761370776e-06, + "loss": 0.0008, + "step": 173750 + }, + { + "epoch": 1.1144426664272222, + "grad_norm": 0.062411241233348846, + "learning_rate": 4.873968149809119e-06, + "loss": 0.0013, + "step": 173760 + }, + { + "epoch": 1.1145068033210084, + "grad_norm": 0.08033911138772964, + "learning_rate": 4.873408625060431e-06, + "loss": 0.0012, + "step": 173770 + }, + { + "epoch": 1.1145709402147945, + "grad_norm": 0.05683527886867523, + "learning_rate": 4.872849101898022e-06, + "loss": 0.0024, + "step": 173780 + }, + { + "epoch": 1.1146350771085805, + "grad_norm": 0.10068879276514053, + "learning_rate": 4.872289580328906e-06, + "loss": 0.0026, + "step": 173790 + }, + { + "epoch": 1.1146992140023666, + "grad_norm": 0.056821130216121674, + "learning_rate": 4.871730060360091e-06, + "loss": 0.0029, + "step": 173800 + }, + { + "epoch": 1.1147633508961527, + "grad_norm": 0.21229983866214752, + "learning_rate": 4.871170541998591e-06, + "loss": 0.0025, + "step": 173810 + }, + { + "epoch": 1.114827487789939, + "grad_norm": 0.12080463021993637, + "learning_rate": 4.870611025251417e-06, + "loss": 0.0016, + "step": 173820 + }, + { + "epoch": 1.114891624683725, + "grad_norm": 0.04254530742764473, + "learning_rate": 4.870051510125578e-06, + "loss": 0.0014, + "step": 173830 + }, + { + "epoch": 1.114955761577511, + "grad_norm": 0.12097093462944031, + "learning_rate": 4.8694919966280875e-06, + "loss": 0.0009, + "step": 173840 + }, + { + "epoch": 1.1150198984712971, + "grad_norm": 0.020847784355282784, + "learning_rate": 4.8689324847659556e-06, + "loss": 0.002, + "step": 173850 + }, + { + "epoch": 1.1150840353650833, + "grad_norm": 0.27949169278144836, + "learning_rate": 4.868372974546193e-06, + "loss": 0.0043, + "step": 173860 + }, + { + "epoch": 1.1151481722588694, + "grad_norm": 0.005513959098607302, + "learning_rate": 4.867813465975811e-06, + "loss": 0.0013, + "step": 173870 + }, + { + "epoch": 1.1152123091526553, + "grad_norm": 0.017530998215079308, + "learning_rate": 4.867253959061821e-06, + "loss": 0.0016, + "step": 173880 + }, + { + "epoch": 1.1152764460464415, + "grad_norm": 0.20806366205215454, + "learning_rate": 4.866694453811232e-06, + "loss": 0.0019, + "step": 173890 + }, + { + "epoch": 1.1153405829402276, + "grad_norm": 0.08184632658958435, + "learning_rate": 4.866134950231058e-06, + "loss": 0.0021, + "step": 173900 + }, + { + "epoch": 1.1154047198340138, + "grad_norm": 0.0540790855884552, + "learning_rate": 4.8655754483283095e-06, + "loss": 0.0014, + "step": 173910 + }, + { + "epoch": 1.1154688567277997, + "grad_norm": 0.09348951280117035, + "learning_rate": 4.8650159481099955e-06, + "loss": 0.0014, + "step": 173920 + }, + { + "epoch": 1.1155329936215859, + "grad_norm": 0.039533596485853195, + "learning_rate": 4.86445644958313e-06, + "loss": 0.002, + "step": 173930 + }, + { + "epoch": 1.115597130515372, + "grad_norm": 0.32928287982940674, + "learning_rate": 4.86389695275472e-06, + "loss": 0.0026, + "step": 173940 + }, + { + "epoch": 1.1156612674091582, + "grad_norm": 0.10675010085105896, + "learning_rate": 4.863337457631779e-06, + "loss": 0.0032, + "step": 173950 + }, + { + "epoch": 1.1157254043029443, + "grad_norm": 0.09021563082933426, + "learning_rate": 4.862777964221318e-06, + "loss": 0.0016, + "step": 173960 + }, + { + "epoch": 1.1157895411967302, + "grad_norm": 0.13088293373584747, + "learning_rate": 4.862218472530347e-06, + "loss": 0.0007, + "step": 173970 + }, + { + "epoch": 1.1158536780905164, + "grad_norm": 0.06926416605710983, + "learning_rate": 4.861658982565877e-06, + "loss": 0.002, + "step": 173980 + }, + { + "epoch": 1.1159178149843025, + "grad_norm": 0.2240966260433197, + "learning_rate": 4.8610994943349185e-06, + "loss": 0.0011, + "step": 173990 + }, + { + "epoch": 1.1159819518780887, + "grad_norm": 0.010228103958070278, + "learning_rate": 4.860540007844484e-06, + "loss": 0.0019, + "step": 174000 + }, + { + "epoch": 1.1160460887718746, + "grad_norm": 0.12614555656909943, + "learning_rate": 4.85998052310158e-06, + "loss": 0.0013, + "step": 174010 + }, + { + "epoch": 1.1161102256656608, + "grad_norm": 0.06303328275680542, + "learning_rate": 4.859421040113223e-06, + "loss": 0.0014, + "step": 174020 + }, + { + "epoch": 1.116174362559447, + "grad_norm": 0.04897018149495125, + "learning_rate": 4.85886155888642e-06, + "loss": 0.0013, + "step": 174030 + }, + { + "epoch": 1.116238499453233, + "grad_norm": 0.14528539776802063, + "learning_rate": 4.858302079428182e-06, + "loss": 0.0021, + "step": 174040 + }, + { + "epoch": 1.116302636347019, + "grad_norm": 0.08426199853420258, + "learning_rate": 4.85774260174552e-06, + "loss": 0.0013, + "step": 174050 + }, + { + "epoch": 1.1163667732408051, + "grad_norm": 0.027923064306378365, + "learning_rate": 4.857183125845446e-06, + "loss": 0.0016, + "step": 174060 + }, + { + "epoch": 1.1164309101345913, + "grad_norm": 0.14836865663528442, + "learning_rate": 4.8566236517349685e-06, + "loss": 0.0012, + "step": 174070 + }, + { + "epoch": 1.1164950470283774, + "grad_norm": 0.17420117557048798, + "learning_rate": 4.856064179421099e-06, + "loss": 0.0033, + "step": 174080 + }, + { + "epoch": 1.1165591839221634, + "grad_norm": 0.03189292922616005, + "learning_rate": 4.855504708910849e-06, + "loss": 0.0015, + "step": 174090 + }, + { + "epoch": 1.1166233208159495, + "grad_norm": 0.02180171012878418, + "learning_rate": 4.854945240211228e-06, + "loss": 0.0012, + "step": 174100 + }, + { + "epoch": 1.1166874577097357, + "grad_norm": 0.005740481428802013, + "learning_rate": 4.854385773329247e-06, + "loss": 0.0028, + "step": 174110 + }, + { + "epoch": 1.1167515946035218, + "grad_norm": 0.11378605663776398, + "learning_rate": 4.8538263082719155e-06, + "loss": 0.0016, + "step": 174120 + }, + { + "epoch": 1.116815731497308, + "grad_norm": 0.04232391342520714, + "learning_rate": 4.853266845046246e-06, + "loss": 0.0015, + "step": 174130 + }, + { + "epoch": 1.1168798683910939, + "grad_norm": 0.08134877681732178, + "learning_rate": 4.852707383659246e-06, + "loss": 0.0013, + "step": 174140 + }, + { + "epoch": 1.11694400528488, + "grad_norm": 0.13055117428302765, + "learning_rate": 4.85214792411793e-06, + "loss": 0.0012, + "step": 174150 + }, + { + "epoch": 1.1170081421786662, + "grad_norm": 0.027094565331935883, + "learning_rate": 4.851588466429305e-06, + "loss": 0.0011, + "step": 174160 + }, + { + "epoch": 1.1170722790724523, + "grad_norm": 0.05294719710946083, + "learning_rate": 4.851029010600383e-06, + "loss": 0.0013, + "step": 174170 + }, + { + "epoch": 1.1171364159662382, + "grad_norm": 0.2595820426940918, + "learning_rate": 4.850469556638173e-06, + "loss": 0.0036, + "step": 174180 + }, + { + "epoch": 1.1172005528600244, + "grad_norm": 0.08011367917060852, + "learning_rate": 4.849910104549687e-06, + "loss": 0.0022, + "step": 174190 + }, + { + "epoch": 1.1172646897538105, + "grad_norm": 0.05550553277134895, + "learning_rate": 4.849350654341934e-06, + "loss": 0.0008, + "step": 174200 + }, + { + "epoch": 1.1173288266475967, + "grad_norm": 0.06627099961042404, + "learning_rate": 4.848791206021927e-06, + "loss": 0.0027, + "step": 174210 + }, + { + "epoch": 1.1173929635413828, + "grad_norm": 0.05866013467311859, + "learning_rate": 4.848231759596672e-06, + "loss": 0.0022, + "step": 174220 + }, + { + "epoch": 1.1174571004351688, + "grad_norm": 0.05826423689723015, + "learning_rate": 4.847672315073182e-06, + "loss": 0.0028, + "step": 174230 + }, + { + "epoch": 1.117521237328955, + "grad_norm": 0.098142109811306, + "learning_rate": 4.847112872458467e-06, + "loss": 0.0016, + "step": 174240 + }, + { + "epoch": 1.117585374222741, + "grad_norm": 0.13749217987060547, + "learning_rate": 4.8465534317595375e-06, + "loss": 0.0022, + "step": 174250 + }, + { + "epoch": 1.1176495111165272, + "grad_norm": 0.08138950914144516, + "learning_rate": 4.845993992983404e-06, + "loss": 0.0018, + "step": 174260 + }, + { + "epoch": 1.1177136480103131, + "grad_norm": 0.1339346468448639, + "learning_rate": 4.8454345561370734e-06, + "loss": 0.0017, + "step": 174270 + }, + { + "epoch": 1.1177777849040993, + "grad_norm": 0.17551785707473755, + "learning_rate": 4.84487512122756e-06, + "loss": 0.0019, + "step": 174280 + }, + { + "epoch": 1.1178419217978854, + "grad_norm": 0.044013574719429016, + "learning_rate": 4.844315688261871e-06, + "loss": 0.0008, + "step": 174290 + }, + { + "epoch": 1.1179060586916716, + "grad_norm": 0.031187549233436584, + "learning_rate": 4.8437562572470195e-06, + "loss": 0.001, + "step": 174300 + }, + { + "epoch": 1.1179701955854575, + "grad_norm": 0.04440230131149292, + "learning_rate": 4.843196828190012e-06, + "loss": 0.0032, + "step": 174310 + }, + { + "epoch": 1.1180343324792437, + "grad_norm": 0.036718934774398804, + "learning_rate": 4.842637401097861e-06, + "loss": 0.0012, + "step": 174320 + }, + { + "epoch": 1.1180984693730298, + "grad_norm": 0.07007744163274765, + "learning_rate": 4.842077975977575e-06, + "loss": 0.0021, + "step": 174330 + }, + { + "epoch": 1.118162606266816, + "grad_norm": 0.11462406814098358, + "learning_rate": 4.841518552836167e-06, + "loss": 0.0019, + "step": 174340 + }, + { + "epoch": 1.1182267431606019, + "grad_norm": 0.06285811215639114, + "learning_rate": 4.840959131680643e-06, + "loss": 0.0031, + "step": 174350 + }, + { + "epoch": 1.118290880054388, + "grad_norm": 0.07035444676876068, + "learning_rate": 4.840399712518015e-06, + "loss": 0.0014, + "step": 174360 + }, + { + "epoch": 1.1183550169481742, + "grad_norm": 0.015017986297607422, + "learning_rate": 4.839840295355292e-06, + "loss": 0.0005, + "step": 174370 + }, + { + "epoch": 1.1184191538419603, + "grad_norm": 0.07589603215456009, + "learning_rate": 4.839280880199485e-06, + "loss": 0.0014, + "step": 174380 + }, + { + "epoch": 1.1184832907357465, + "grad_norm": 0.16272950172424316, + "learning_rate": 4.838721467057605e-06, + "loss": 0.0013, + "step": 174390 + }, + { + "epoch": 1.1185474276295324, + "grad_norm": 0.09640228748321533, + "learning_rate": 4.838162055936658e-06, + "loss": 0.0029, + "step": 174400 + }, + { + "epoch": 1.1186115645233186, + "grad_norm": 0.18411511182785034, + "learning_rate": 4.837602646843658e-06, + "loss": 0.0017, + "step": 174410 + }, + { + "epoch": 1.1186757014171047, + "grad_norm": 0.040111999958753586, + "learning_rate": 4.8370432397856115e-06, + "loss": 0.0013, + "step": 174420 + }, + { + "epoch": 1.1187398383108909, + "grad_norm": 0.02608310803771019, + "learning_rate": 4.836483834769531e-06, + "loss": 0.0017, + "step": 174430 + }, + { + "epoch": 1.1188039752046768, + "grad_norm": 0.17651981115341187, + "learning_rate": 4.835924431802423e-06, + "loss": 0.0011, + "step": 174440 + }, + { + "epoch": 1.118868112098463, + "grad_norm": 0.26195093989372253, + "learning_rate": 4.835365030891301e-06, + "loss": 0.001, + "step": 174450 + }, + { + "epoch": 1.118932248992249, + "grad_norm": 0.12918025255203247, + "learning_rate": 4.834805632043172e-06, + "loss": 0.0034, + "step": 174460 + }, + { + "epoch": 1.1189963858860352, + "grad_norm": 0.15502963960170746, + "learning_rate": 4.834246235265048e-06, + "loss": 0.0015, + "step": 174470 + }, + { + "epoch": 1.1190605227798212, + "grad_norm": 0.0754869133234024, + "learning_rate": 4.833686840563935e-06, + "loss": 0.0019, + "step": 174480 + }, + { + "epoch": 1.1191246596736073, + "grad_norm": 0.07098864763975143, + "learning_rate": 4.833127447946846e-06, + "loss": 0.0015, + "step": 174490 + }, + { + "epoch": 1.1191887965673935, + "grad_norm": 0.024236060678958893, + "learning_rate": 4.832568057420788e-06, + "loss": 0.0066, + "step": 174500 + }, + { + "epoch": 1.1192529334611796, + "grad_norm": 0.0722365453839302, + "learning_rate": 4.832008668992774e-06, + "loss": 0.0015, + "step": 174510 + }, + { + "epoch": 1.1193170703549655, + "grad_norm": 0.026653124019503593, + "learning_rate": 4.831449282669809e-06, + "loss": 0.0039, + "step": 174520 + }, + { + "epoch": 1.1193812072487517, + "grad_norm": 0.09310638904571533, + "learning_rate": 4.830889898458906e-06, + "loss": 0.0008, + "step": 174530 + }, + { + "epoch": 1.1194453441425378, + "grad_norm": 0.057517159730196, + "learning_rate": 4.830330516367074e-06, + "loss": 0.0017, + "step": 174540 + }, + { + "epoch": 1.119509481036324, + "grad_norm": 0.3690768778324127, + "learning_rate": 4.829771136401321e-06, + "loss": 0.0016, + "step": 174550 + }, + { + "epoch": 1.1195736179301101, + "grad_norm": 0.14363223314285278, + "learning_rate": 4.829211758568658e-06, + "loss": 0.0025, + "step": 174560 + }, + { + "epoch": 1.119637754823896, + "grad_norm": 0.10250331461429596, + "learning_rate": 4.828652382876093e-06, + "loss": 0.002, + "step": 174570 + }, + { + "epoch": 1.1197018917176822, + "grad_norm": 0.04894329234957695, + "learning_rate": 4.8280930093306376e-06, + "loss": 0.0016, + "step": 174580 + }, + { + "epoch": 1.1197660286114683, + "grad_norm": 0.13075852394104004, + "learning_rate": 4.827533637939298e-06, + "loss": 0.0013, + "step": 174590 + }, + { + "epoch": 1.1198301655052545, + "grad_norm": 0.07651156187057495, + "learning_rate": 4.826974268709087e-06, + "loss": 0.0013, + "step": 174600 + }, + { + "epoch": 1.1198943023990404, + "grad_norm": 0.05896775797009468, + "learning_rate": 4.826414901647009e-06, + "loss": 0.0017, + "step": 174610 + }, + { + "epoch": 1.1199584392928266, + "grad_norm": 0.18786385655403137, + "learning_rate": 4.825855536760079e-06, + "loss": 0.0018, + "step": 174620 + }, + { + "epoch": 1.1200225761866127, + "grad_norm": 0.096187062561512, + "learning_rate": 4.825296174055303e-06, + "loss": 0.0019, + "step": 174630 + }, + { + "epoch": 1.1200867130803989, + "grad_norm": 0.36271122097969055, + "learning_rate": 4.824736813539691e-06, + "loss": 0.0022, + "step": 174640 + }, + { + "epoch": 1.120150849974185, + "grad_norm": 0.15491759777069092, + "learning_rate": 4.824177455220249e-06, + "loss": 0.0016, + "step": 174650 + }, + { + "epoch": 1.120214986867971, + "grad_norm": 0.040893521159887314, + "learning_rate": 4.823618099103992e-06, + "loss": 0.001, + "step": 174660 + }, + { + "epoch": 1.120279123761757, + "grad_norm": 0.10148506611585617, + "learning_rate": 4.823058745197926e-06, + "loss": 0.0016, + "step": 174670 + }, + { + "epoch": 1.1203432606555432, + "grad_norm": 0.06153487041592598, + "learning_rate": 4.822499393509061e-06, + "loss": 0.0014, + "step": 174680 + }, + { + "epoch": 1.1204073975493294, + "grad_norm": 0.05802586302161217, + "learning_rate": 4.821940044044404e-06, + "loss": 0.0009, + "step": 174690 + }, + { + "epoch": 1.1204715344431153, + "grad_norm": 0.0913996621966362, + "learning_rate": 4.8213806968109675e-06, + "loss": 0.0021, + "step": 174700 + }, + { + "epoch": 1.1205356713369015, + "grad_norm": 0.055139727890491486, + "learning_rate": 4.820821351815757e-06, + "loss": 0.0012, + "step": 174710 + }, + { + "epoch": 1.1205998082306876, + "grad_norm": 0.07627728581428528, + "learning_rate": 4.820262009065784e-06, + "loss": 0.001, + "step": 174720 + }, + { + "epoch": 1.1206639451244738, + "grad_norm": 0.020158030092716217, + "learning_rate": 4.819702668568056e-06, + "loss": 0.0026, + "step": 174730 + }, + { + "epoch": 1.1207280820182597, + "grad_norm": 0.16587579250335693, + "learning_rate": 4.819143330329582e-06, + "loss": 0.0027, + "step": 174740 + }, + { + "epoch": 1.1207922189120458, + "grad_norm": 0.007621010299772024, + "learning_rate": 4.818583994357372e-06, + "loss": 0.0013, + "step": 174750 + }, + { + "epoch": 1.120856355805832, + "grad_norm": 0.10911239683628082, + "learning_rate": 4.818024660658434e-06, + "loss": 0.0022, + "step": 174760 + }, + { + "epoch": 1.1209204926996181, + "grad_norm": 0.07024364918470383, + "learning_rate": 4.817465329239776e-06, + "loss": 0.0013, + "step": 174770 + }, + { + "epoch": 1.120984629593404, + "grad_norm": 0.12400852888822556, + "learning_rate": 4.816906000108409e-06, + "loss": 0.0018, + "step": 174780 + }, + { + "epoch": 1.1210487664871902, + "grad_norm": 0.059531401842832565, + "learning_rate": 4.81634667327134e-06, + "loss": 0.003, + "step": 174790 + }, + { + "epoch": 1.1211129033809764, + "grad_norm": 0.17386078834533691, + "learning_rate": 4.815787348735579e-06, + "loss": 0.0027, + "step": 174800 + }, + { + "epoch": 1.1211770402747625, + "grad_norm": 0.22123262286186218, + "learning_rate": 4.815228026508135e-06, + "loss": 0.0016, + "step": 174810 + }, + { + "epoch": 1.1212411771685487, + "grad_norm": 0.37798458337783813, + "learning_rate": 4.814668706596014e-06, + "loss": 0.0156, + "step": 174820 + }, + { + "epoch": 1.1213053140623346, + "grad_norm": 0.2819300889968872, + "learning_rate": 4.814109389006228e-06, + "loss": 0.0019, + "step": 174830 + }, + { + "epoch": 1.1213694509561207, + "grad_norm": 0.1365143060684204, + "learning_rate": 4.813550073745783e-06, + "loss": 0.0012, + "step": 174840 + }, + { + "epoch": 1.1214335878499069, + "grad_norm": 0.07883584499359131, + "learning_rate": 4.81299076082169e-06, + "loss": 0.0017, + "step": 174850 + }, + { + "epoch": 1.121497724743693, + "grad_norm": 0.273945152759552, + "learning_rate": 4.812431450240956e-06, + "loss": 0.0014, + "step": 174860 + }, + { + "epoch": 1.121561861637479, + "grad_norm": 0.22897343337535858, + "learning_rate": 4.811872142010591e-06, + "loss": 0.0017, + "step": 174870 + }, + { + "epoch": 1.121625998531265, + "grad_norm": 0.1994645744562149, + "learning_rate": 4.8113128361376e-06, + "loss": 0.0026, + "step": 174880 + }, + { + "epoch": 1.1216901354250512, + "grad_norm": 0.052137140184640884, + "learning_rate": 4.810753532628997e-06, + "loss": 0.0021, + "step": 174890 + }, + { + "epoch": 1.1217542723188374, + "grad_norm": 0.049453653395175934, + "learning_rate": 4.810194231491785e-06, + "loss": 0.0008, + "step": 174900 + }, + { + "epoch": 1.1218184092126233, + "grad_norm": 0.09021912515163422, + "learning_rate": 4.809634932732976e-06, + "loss": 0.0017, + "step": 174910 + }, + { + "epoch": 1.1218825461064095, + "grad_norm": 0.11592074483633041, + "learning_rate": 4.809075636359578e-06, + "loss": 0.0023, + "step": 174920 + }, + { + "epoch": 1.1219466830001956, + "grad_norm": 0.01169917918741703, + "learning_rate": 4.808516342378597e-06, + "loss": 0.0023, + "step": 174930 + }, + { + "epoch": 1.1220108198939818, + "grad_norm": 0.0716211125254631, + "learning_rate": 4.807957050797045e-06, + "loss": 0.0035, + "step": 174940 + }, + { + "epoch": 1.1220749567877677, + "grad_norm": 0.14507988095283508, + "learning_rate": 4.807397761621926e-06, + "loss": 0.0023, + "step": 174950 + }, + { + "epoch": 1.1221390936815538, + "grad_norm": 0.04453321173787117, + "learning_rate": 4.806838474860253e-06, + "loss": 0.0014, + "step": 174960 + }, + { + "epoch": 1.12220323057534, + "grad_norm": 0.12254642695188522, + "learning_rate": 4.806279190519031e-06, + "loss": 0.0012, + "step": 174970 + }, + { + "epoch": 1.1222673674691261, + "grad_norm": 0.005856363568454981, + "learning_rate": 4.80571990860527e-06, + "loss": 0.0014, + "step": 174980 + }, + { + "epoch": 1.1223315043629123, + "grad_norm": 0.06946040689945221, + "learning_rate": 4.805160629125976e-06, + "loss": 0.0018, + "step": 174990 + }, + { + "epoch": 1.1223956412566982, + "grad_norm": 0.1354217231273651, + "learning_rate": 4.80460135208816e-06, + "loss": 0.0014, + "step": 175000 + }, + { + "epoch": 1.1224597781504844, + "grad_norm": 0.18229800462722778, + "learning_rate": 4.804042077498828e-06, + "loss": 0.0035, + "step": 175010 + }, + { + "epoch": 1.1225239150442705, + "grad_norm": 0.16235755383968353, + "learning_rate": 4.803482805364989e-06, + "loss": 0.0014, + "step": 175020 + }, + { + "epoch": 1.1225880519380567, + "grad_norm": 0.3516307473182678, + "learning_rate": 4.80292353569365e-06, + "loss": 0.0009, + "step": 175030 + }, + { + "epoch": 1.1226521888318426, + "grad_norm": 0.18743622303009033, + "learning_rate": 4.8023642684918216e-06, + "loss": 0.0032, + "step": 175040 + }, + { + "epoch": 1.1227163257256287, + "grad_norm": 0.04604848101735115, + "learning_rate": 4.801805003766509e-06, + "loss": 0.002, + "step": 175050 + }, + { + "epoch": 1.1227804626194149, + "grad_norm": 0.06320274621248245, + "learning_rate": 4.801245741524721e-06, + "loss": 0.0021, + "step": 175060 + }, + { + "epoch": 1.122844599513201, + "grad_norm": 0.020902549847960472, + "learning_rate": 4.8006864817734665e-06, + "loss": 0.001, + "step": 175070 + }, + { + "epoch": 1.1229087364069872, + "grad_norm": 0.1629502773284912, + "learning_rate": 4.800127224519753e-06, + "loss": 0.0024, + "step": 175080 + }, + { + "epoch": 1.1229728733007731, + "grad_norm": 0.014271809719502926, + "learning_rate": 4.799567969770588e-06, + "loss": 0.0018, + "step": 175090 + }, + { + "epoch": 1.1230370101945593, + "grad_norm": 0.11975744366645813, + "learning_rate": 4.799008717532979e-06, + "loss": 0.003, + "step": 175100 + }, + { + "epoch": 1.1231011470883454, + "grad_norm": 0.07389917969703674, + "learning_rate": 4.798449467813936e-06, + "loss": 0.0012, + "step": 175110 + }, + { + "epoch": 1.1231652839821316, + "grad_norm": 0.11914652585983276, + "learning_rate": 4.7978902206204634e-06, + "loss": 0.0012, + "step": 175120 + }, + { + "epoch": 1.1232294208759175, + "grad_norm": 0.03024482913315296, + "learning_rate": 4.797330975959572e-06, + "loss": 0.0008, + "step": 175130 + }, + { + "epoch": 1.1232935577697036, + "grad_norm": 0.04781707376241684, + "learning_rate": 4.796771733838268e-06, + "loss": 0.001, + "step": 175140 + }, + { + "epoch": 1.1233576946634898, + "grad_norm": 0.08307913690805435, + "learning_rate": 4.79621249426356e-06, + "loss": 0.0023, + "step": 175150 + }, + { + "epoch": 1.123421831557276, + "grad_norm": 0.0447513610124588, + "learning_rate": 4.795653257242454e-06, + "loss": 0.0015, + "step": 175160 + }, + { + "epoch": 1.1234859684510619, + "grad_norm": 0.13397501409053802, + "learning_rate": 4.795094022781959e-06, + "loss": 0.0013, + "step": 175170 + }, + { + "epoch": 1.123550105344848, + "grad_norm": 0.16908450424671173, + "learning_rate": 4.794534790889083e-06, + "loss": 0.0012, + "step": 175180 + }, + { + "epoch": 1.1236142422386342, + "grad_norm": 0.2635178565979004, + "learning_rate": 4.793975561570832e-06, + "loss": 0.0027, + "step": 175190 + }, + { + "epoch": 1.1236783791324203, + "grad_norm": 0.03389308601617813, + "learning_rate": 4.793416334834215e-06, + "loss": 0.0032, + "step": 175200 + }, + { + "epoch": 1.1237425160262062, + "grad_norm": 0.02887616865336895, + "learning_rate": 4.79285711068624e-06, + "loss": 0.0022, + "step": 175210 + }, + { + "epoch": 1.1238066529199924, + "grad_norm": 0.07931360602378845, + "learning_rate": 4.792297889133911e-06, + "loss": 0.0018, + "step": 175220 + }, + { + "epoch": 1.1238707898137785, + "grad_norm": 0.035023000091314316, + "learning_rate": 4.791738670184239e-06, + "loss": 0.0016, + "step": 175230 + }, + { + "epoch": 1.1239349267075647, + "grad_norm": 0.12552867829799652, + "learning_rate": 4.791179453844232e-06, + "loss": 0.0014, + "step": 175240 + }, + { + "epoch": 1.1239990636013508, + "grad_norm": 0.05981716141104698, + "learning_rate": 4.790620240120893e-06, + "loss": 0.0018, + "step": 175250 + }, + { + "epoch": 1.1240632004951367, + "grad_norm": 0.025745639577507973, + "learning_rate": 4.7900610290212335e-06, + "loss": 0.0021, + "step": 175260 + }, + { + "epoch": 1.124127337388923, + "grad_norm": 0.05925753712654114, + "learning_rate": 4.789501820552259e-06, + "loss": 0.0035, + "step": 175270 + }, + { + "epoch": 1.124191474282709, + "grad_norm": 0.05949115753173828, + "learning_rate": 4.788942614720978e-06, + "loss": 0.0027, + "step": 175280 + }, + { + "epoch": 1.1242556111764952, + "grad_norm": 0.10865527391433716, + "learning_rate": 4.7883834115343955e-06, + "loss": 0.0018, + "step": 175290 + }, + { + "epoch": 1.1243197480702811, + "grad_norm": 0.11768241971731186, + "learning_rate": 4.787824210999521e-06, + "loss": 0.0015, + "step": 175300 + }, + { + "epoch": 1.1243838849640673, + "grad_norm": 0.012277359142899513, + "learning_rate": 4.78726501312336e-06, + "loss": 0.0024, + "step": 175310 + }, + { + "epoch": 1.1244480218578534, + "grad_norm": 0.049140699207782745, + "learning_rate": 4.786705817912921e-06, + "loss": 0.0005, + "step": 175320 + }, + { + "epoch": 1.1245121587516396, + "grad_norm": 0.1447220891714096, + "learning_rate": 4.78614662537521e-06, + "loss": 0.0016, + "step": 175330 + }, + { + "epoch": 1.1245762956454255, + "grad_norm": 0.1460440307855606, + "learning_rate": 4.785587435517236e-06, + "loss": 0.0027, + "step": 175340 + }, + { + "epoch": 1.1246404325392116, + "grad_norm": 0.17473335564136505, + "learning_rate": 4.785028248346003e-06, + "loss": 0.0016, + "step": 175350 + }, + { + "epoch": 1.1247045694329978, + "grad_norm": 0.006431942339986563, + "learning_rate": 4.784469063868521e-06, + "loss": 0.0023, + "step": 175360 + }, + { + "epoch": 1.124768706326784, + "grad_norm": 0.10810311138629913, + "learning_rate": 4.783909882091795e-06, + "loss": 0.0018, + "step": 175370 + }, + { + "epoch": 1.1248328432205699, + "grad_norm": 0.04965673387050629, + "learning_rate": 4.783350703022832e-06, + "loss": 0.0016, + "step": 175380 + }, + { + "epoch": 1.124896980114356, + "grad_norm": 0.0024077396374195814, + "learning_rate": 4.782791526668641e-06, + "loss": 0.0022, + "step": 175390 + }, + { + "epoch": 1.1249611170081422, + "grad_norm": 0.05500560626387596, + "learning_rate": 4.7822323530362256e-06, + "loss": 0.001, + "step": 175400 + }, + { + "epoch": 1.1250252539019283, + "grad_norm": 0.07789402455091476, + "learning_rate": 4.781673182132596e-06, + "loss": 0.0015, + "step": 175410 + }, + { + "epoch": 1.1250893907957145, + "grad_norm": 0.07948887348175049, + "learning_rate": 4.781114013964756e-06, + "loss": 0.0021, + "step": 175420 + }, + { + "epoch": 1.1251535276895004, + "grad_norm": 0.2515040636062622, + "learning_rate": 4.7805548485397134e-06, + "loss": 0.0021, + "step": 175430 + }, + { + "epoch": 1.1252176645832865, + "grad_norm": 0.1324034333229065, + "learning_rate": 4.779995685864478e-06, + "loss": 0.0025, + "step": 175440 + }, + { + "epoch": 1.1252818014770727, + "grad_norm": 0.017076067626476288, + "learning_rate": 4.779436525946052e-06, + "loss": 0.0008, + "step": 175450 + }, + { + "epoch": 1.1253459383708588, + "grad_norm": 0.030340420082211494, + "learning_rate": 4.778877368791445e-06, + "loss": 0.0016, + "step": 175460 + }, + { + "epoch": 1.1254100752646448, + "grad_norm": 0.004908331669867039, + "learning_rate": 4.7783182144076616e-06, + "loss": 0.0008, + "step": 175470 + }, + { + "epoch": 1.125474212158431, + "grad_norm": 0.0785120502114296, + "learning_rate": 4.777759062801711e-06, + "loss": 0.0022, + "step": 175480 + }, + { + "epoch": 1.125538349052217, + "grad_norm": 0.02501097321510315, + "learning_rate": 4.777199913980596e-06, + "loss": 0.0015, + "step": 175490 + }, + { + "epoch": 1.1256024859460032, + "grad_norm": 0.06175035238265991, + "learning_rate": 4.776640767951327e-06, + "loss": 0.0014, + "step": 175500 + }, + { + "epoch": 1.1256666228397894, + "grad_norm": 0.023419445380568504, + "learning_rate": 4.776081624720908e-06, + "loss": 0.0019, + "step": 175510 + }, + { + "epoch": 1.1257307597335753, + "grad_norm": 0.8936617374420166, + "learning_rate": 4.7755224842963464e-06, + "loss": 0.004, + "step": 175520 + }, + { + "epoch": 1.1257948966273614, + "grad_norm": 0.04048040136694908, + "learning_rate": 4.77496334668465e-06, + "loss": 0.0012, + "step": 175530 + }, + { + "epoch": 1.1258590335211476, + "grad_norm": 0.027657775208353996, + "learning_rate": 4.7744042118928215e-06, + "loss": 0.0014, + "step": 175540 + }, + { + "epoch": 1.1259231704149337, + "grad_norm": 0.06054976209998131, + "learning_rate": 4.773845079927871e-06, + "loss": 0.0029, + "step": 175550 + }, + { + "epoch": 1.1259873073087197, + "grad_norm": 0.06709885597229004, + "learning_rate": 4.773285950796802e-06, + "loss": 0.0017, + "step": 175560 + }, + { + "epoch": 1.1260514442025058, + "grad_norm": 0.06731562316417694, + "learning_rate": 4.772726824506623e-06, + "loss": 0.0015, + "step": 175570 + }, + { + "epoch": 1.126115581096292, + "grad_norm": 0.023892225697636604, + "learning_rate": 4.772167701064338e-06, + "loss": 0.0005, + "step": 175580 + }, + { + "epoch": 1.126179717990078, + "grad_norm": 0.2002870738506317, + "learning_rate": 4.771608580476957e-06, + "loss": 0.0005, + "step": 175590 + }, + { + "epoch": 1.126243854883864, + "grad_norm": 0.02563397027552128, + "learning_rate": 4.7710494627514815e-06, + "loss": 0.0016, + "step": 175600 + }, + { + "epoch": 1.1263079917776502, + "grad_norm": 0.13682080805301666, + "learning_rate": 4.770490347894922e-06, + "loss": 0.0018, + "step": 175610 + }, + { + "epoch": 1.1263721286714363, + "grad_norm": 0.22487889230251312, + "learning_rate": 4.76993123591428e-06, + "loss": 0.002, + "step": 175620 + }, + { + "epoch": 1.1264362655652225, + "grad_norm": 0.14215342700481415, + "learning_rate": 4.7693721268165655e-06, + "loss": 0.0016, + "step": 175630 + }, + { + "epoch": 1.1265004024590084, + "grad_norm": 0.07587555795907974, + "learning_rate": 4.768813020608782e-06, + "loss": 0.0011, + "step": 175640 + }, + { + "epoch": 1.1265645393527945, + "grad_norm": 0.10688536614179611, + "learning_rate": 4.768253917297937e-06, + "loss": 0.0014, + "step": 175650 + }, + { + "epoch": 1.1266286762465807, + "grad_norm": 0.17090702056884766, + "learning_rate": 4.767694816891037e-06, + "loss": 0.0032, + "step": 175660 + }, + { + "epoch": 1.1266928131403668, + "grad_norm": 0.10280928760766983, + "learning_rate": 4.767135719395086e-06, + "loss": 0.0013, + "step": 175670 + }, + { + "epoch": 1.126756950034153, + "grad_norm": 0.03153495863080025, + "learning_rate": 4.766576624817091e-06, + "loss": 0.0056, + "step": 175680 + }, + { + "epoch": 1.126821086927939, + "grad_norm": 0.0600753091275692, + "learning_rate": 4.766017533164058e-06, + "loss": 0.0013, + "step": 175690 + }, + { + "epoch": 1.126885223821725, + "grad_norm": 0.0316365584731102, + "learning_rate": 4.765458444442993e-06, + "loss": 0.0012, + "step": 175700 + }, + { + "epoch": 1.1269493607155112, + "grad_norm": 0.001820790464989841, + "learning_rate": 4.7648993586609e-06, + "loss": 0.0019, + "step": 175710 + }, + { + "epoch": 1.1270134976092974, + "grad_norm": 0.011880267411470413, + "learning_rate": 4.764340275824787e-06, + "loss": 0.0015, + "step": 175720 + }, + { + "epoch": 1.1270776345030833, + "grad_norm": 0.03209491819143295, + "learning_rate": 4.763781195941659e-06, + "loss": 0.0014, + "step": 175730 + }, + { + "epoch": 1.1271417713968694, + "grad_norm": 0.22056789696216583, + "learning_rate": 4.763222119018521e-06, + "loss": 0.0027, + "step": 175740 + }, + { + "epoch": 1.1272059082906556, + "grad_norm": 0.12804964184761047, + "learning_rate": 4.762663045062379e-06, + "loss": 0.0021, + "step": 175750 + }, + { + "epoch": 1.1272700451844417, + "grad_norm": 0.05203420668840408, + "learning_rate": 4.762103974080239e-06, + "loss": 0.0027, + "step": 175760 + }, + { + "epoch": 1.1273341820782279, + "grad_norm": 0.06643746048212051, + "learning_rate": 4.761544906079106e-06, + "loss": 0.0018, + "step": 175770 + }, + { + "epoch": 1.1273983189720138, + "grad_norm": 0.16261453926563263, + "learning_rate": 4.760985841065985e-06, + "loss": 0.0013, + "step": 175780 + }, + { + "epoch": 1.1274624558658, + "grad_norm": 0.06322963535785675, + "learning_rate": 4.760426779047884e-06, + "loss": 0.002, + "step": 175790 + }, + { + "epoch": 1.127526592759586, + "grad_norm": 0.24147634208202362, + "learning_rate": 4.759867720031804e-06, + "loss": 0.0012, + "step": 175800 + }, + { + "epoch": 1.127590729653372, + "grad_norm": 0.18048495054244995, + "learning_rate": 4.759308664024756e-06, + "loss": 0.0011, + "step": 175810 + }, + { + "epoch": 1.1276548665471582, + "grad_norm": 0.07934437692165375, + "learning_rate": 4.75874961103374e-06, + "loss": 0.0018, + "step": 175820 + }, + { + "epoch": 1.1277190034409443, + "grad_norm": 0.08311376720666885, + "learning_rate": 4.758190561065766e-06, + "loss": 0.0017, + "step": 175830 + }, + { + "epoch": 1.1277831403347305, + "grad_norm": 0.2344166338443756, + "learning_rate": 4.757631514127836e-06, + "loss": 0.0021, + "step": 175840 + }, + { + "epoch": 1.1278472772285166, + "grad_norm": 0.11134286969900131, + "learning_rate": 4.757072470226957e-06, + "loss": 0.0013, + "step": 175850 + }, + { + "epoch": 1.1279114141223026, + "grad_norm": 0.04196736961603165, + "learning_rate": 4.756513429370132e-06, + "loss": 0.0019, + "step": 175860 + }, + { + "epoch": 1.1279755510160887, + "grad_norm": 0.07570148259401321, + "learning_rate": 4.755954391564369e-06, + "loss": 0.0013, + "step": 175870 + }, + { + "epoch": 1.1280396879098749, + "grad_norm": 0.14124351739883423, + "learning_rate": 4.755395356816672e-06, + "loss": 0.0011, + "step": 175880 + }, + { + "epoch": 1.128103824803661, + "grad_norm": 0.03067854233086109, + "learning_rate": 4.754836325134046e-06, + "loss": 0.0011, + "step": 175890 + }, + { + "epoch": 1.128167961697447, + "grad_norm": 0.018530651926994324, + "learning_rate": 4.7542772965234945e-06, + "loss": 0.0009, + "step": 175900 + }, + { + "epoch": 1.128232098591233, + "grad_norm": 0.13074007630348206, + "learning_rate": 4.753718270992026e-06, + "loss": 0.0011, + "step": 175910 + }, + { + "epoch": 1.1282962354850192, + "grad_norm": 0.033416058868169785, + "learning_rate": 4.753159248546643e-06, + "loss": 0.0011, + "step": 175920 + }, + { + "epoch": 1.1283603723788054, + "grad_norm": 0.06768912076950073, + "learning_rate": 4.75260022919435e-06, + "loss": 0.0011, + "step": 175930 + }, + { + "epoch": 1.1284245092725915, + "grad_norm": 0.3239620625972748, + "learning_rate": 4.752041212942154e-06, + "loss": 0.0018, + "step": 175940 + }, + { + "epoch": 1.1284886461663775, + "grad_norm": 0.14764080941677094, + "learning_rate": 4.7514821997970585e-06, + "loss": 0.0022, + "step": 175950 + }, + { + "epoch": 1.1285527830601636, + "grad_norm": 0.09093287587165833, + "learning_rate": 4.750923189766069e-06, + "loss": 0.0018, + "step": 175960 + }, + { + "epoch": 1.1286169199539497, + "grad_norm": 0.10408314317464828, + "learning_rate": 4.7503641828561895e-06, + "loss": 0.0033, + "step": 175970 + }, + { + "epoch": 1.128681056847736, + "grad_norm": 0.5459677577018738, + "learning_rate": 4.749805179074426e-06, + "loss": 0.0024, + "step": 175980 + }, + { + "epoch": 1.1287451937415218, + "grad_norm": 0.14407488703727722, + "learning_rate": 4.749246178427781e-06, + "loss": 0.0016, + "step": 175990 + }, + { + "epoch": 1.128809330635308, + "grad_norm": 0.08890324831008911, + "learning_rate": 4.748687180923262e-06, + "loss": 0.0014, + "step": 176000 + }, + { + "epoch": 1.1288734675290941, + "grad_norm": 0.1855904757976532, + "learning_rate": 4.748128186567871e-06, + "loss": 0.0029, + "step": 176010 + }, + { + "epoch": 1.1289376044228803, + "grad_norm": 0.14686384797096252, + "learning_rate": 4.7475691953686154e-06, + "loss": 0.0011, + "step": 176020 + }, + { + "epoch": 1.1290017413166662, + "grad_norm": 0.01927570253610611, + "learning_rate": 4.747010207332497e-06, + "loss": 0.0017, + "step": 176030 + }, + { + "epoch": 1.1290658782104523, + "grad_norm": 0.09091423451900482, + "learning_rate": 4.7464512224665225e-06, + "loss": 0.0017, + "step": 176040 + }, + { + "epoch": 1.1291300151042385, + "grad_norm": 0.028048235923051834, + "learning_rate": 4.745892240777694e-06, + "loss": 0.002, + "step": 176050 + }, + { + "epoch": 1.1291941519980246, + "grad_norm": 0.024108463898301125, + "learning_rate": 4.745333262273019e-06, + "loss": 0.003, + "step": 176060 + }, + { + "epoch": 1.1292582888918106, + "grad_norm": 0.14120781421661377, + "learning_rate": 4.7447742869594986e-06, + "loss": 0.0033, + "step": 176070 + }, + { + "epoch": 1.1293224257855967, + "grad_norm": 0.01841743476688862, + "learning_rate": 4.744215314844138e-06, + "loss": 0.0014, + "step": 176080 + }, + { + "epoch": 1.1293865626793829, + "grad_norm": 0.017169052734971046, + "learning_rate": 4.743656345933945e-06, + "loss": 0.0026, + "step": 176090 + }, + { + "epoch": 1.129450699573169, + "grad_norm": 0.13318702578544617, + "learning_rate": 4.74309738023592e-06, + "loss": 0.0017, + "step": 176100 + }, + { + "epoch": 1.1295148364669552, + "grad_norm": 0.1167878583073616, + "learning_rate": 4.7425384177570686e-06, + "loss": 0.0012, + "step": 176110 + }, + { + "epoch": 1.129578973360741, + "grad_norm": 0.008401491679251194, + "learning_rate": 4.741979458504394e-06, + "loss": 0.0013, + "step": 176120 + }, + { + "epoch": 1.1296431102545272, + "grad_norm": 0.0819748267531395, + "learning_rate": 4.741420502484903e-06, + "loss": 0.0024, + "step": 176130 + }, + { + "epoch": 1.1297072471483134, + "grad_norm": 0.004283037036657333, + "learning_rate": 4.740861549705595e-06, + "loss": 0.0022, + "step": 176140 + }, + { + "epoch": 1.1297713840420995, + "grad_norm": 0.061530210077762604, + "learning_rate": 4.740302600173479e-06, + "loss": 0.002, + "step": 176150 + }, + { + "epoch": 1.1298355209358855, + "grad_norm": 0.0016988730058073997, + "learning_rate": 4.739743653895557e-06, + "loss": 0.0007, + "step": 176160 + }, + { + "epoch": 1.1298996578296716, + "grad_norm": 0.10308869183063507, + "learning_rate": 4.739184710878833e-06, + "loss": 0.0014, + "step": 176170 + }, + { + "epoch": 1.1299637947234578, + "grad_norm": 0.01512821763753891, + "learning_rate": 4.73862577113031e-06, + "loss": 0.0034, + "step": 176180 + }, + { + "epoch": 1.130027931617244, + "grad_norm": 0.10347221046686172, + "learning_rate": 4.738066834656993e-06, + "loss": 0.0022, + "step": 176190 + }, + { + "epoch": 1.13009206851103, + "grad_norm": 0.13395991921424866, + "learning_rate": 4.737507901465886e-06, + "loss": 0.002, + "step": 176200 + }, + { + "epoch": 1.130156205404816, + "grad_norm": 0.08036302775144577, + "learning_rate": 4.736948971563993e-06, + "loss": 0.0018, + "step": 176210 + }, + { + "epoch": 1.1302203422986021, + "grad_norm": 0.05972522497177124, + "learning_rate": 4.736390044958317e-06, + "loss": 0.0018, + "step": 176220 + }, + { + "epoch": 1.1302844791923883, + "grad_norm": 0.3460785746574402, + "learning_rate": 4.735831121655863e-06, + "loss": 0.003, + "step": 176230 + }, + { + "epoch": 1.1303486160861742, + "grad_norm": 0.08491852134466171, + "learning_rate": 4.735272201663633e-06, + "loss": 0.0011, + "step": 176240 + }, + { + "epoch": 1.1304127529799604, + "grad_norm": 0.0766621083021164, + "learning_rate": 4.7347132849886315e-06, + "loss": 0.0011, + "step": 176250 + }, + { + "epoch": 1.1304768898737465, + "grad_norm": 0.054548390209674835, + "learning_rate": 4.7341543716378625e-06, + "loss": 0.0022, + "step": 176260 + }, + { + "epoch": 1.1305410267675327, + "grad_norm": 0.04001692309975624, + "learning_rate": 4.73359546161833e-06, + "loss": 0.0009, + "step": 176270 + }, + { + "epoch": 1.1306051636613188, + "grad_norm": 0.11134858429431915, + "learning_rate": 4.733036554937035e-06, + "loss": 0.0021, + "step": 176280 + }, + { + "epoch": 1.1306693005551047, + "grad_norm": 0.14896228909492493, + "learning_rate": 4.732477651600985e-06, + "loss": 0.001, + "step": 176290 + }, + { + "epoch": 1.1307334374488909, + "grad_norm": 0.07890468835830688, + "learning_rate": 4.731918751617179e-06, + "loss": 0.0025, + "step": 176300 + }, + { + "epoch": 1.130797574342677, + "grad_norm": 0.07719369977712631, + "learning_rate": 4.731359854992624e-06, + "loss": 0.0018, + "step": 176310 + }, + { + "epoch": 1.1308617112364632, + "grad_norm": 0.11463964730501175, + "learning_rate": 4.730800961734321e-06, + "loss": 0.0012, + "step": 176320 + }, + { + "epoch": 1.130925848130249, + "grad_norm": 0.024333612993359566, + "learning_rate": 4.730242071849275e-06, + "loss": 0.0018, + "step": 176330 + }, + { + "epoch": 1.1309899850240352, + "grad_norm": 0.32956039905548096, + "learning_rate": 4.729683185344488e-06, + "loss": 0.0019, + "step": 176340 + }, + { + "epoch": 1.1310541219178214, + "grad_norm": 0.07506587356328964, + "learning_rate": 4.729124302226965e-06, + "loss": 0.0013, + "step": 176350 + }, + { + "epoch": 1.1311182588116075, + "grad_norm": 0.08916376531124115, + "learning_rate": 4.728565422503708e-06, + "loss": 0.0012, + "step": 176360 + }, + { + "epoch": 1.1311823957053937, + "grad_norm": 0.01957746036350727, + "learning_rate": 4.728006546181718e-06, + "loss": 0.0009, + "step": 176370 + }, + { + "epoch": 1.1312465325991796, + "grad_norm": 0.22332695126533508, + "learning_rate": 4.727447673268003e-06, + "loss": 0.003, + "step": 176380 + }, + { + "epoch": 1.1313106694929658, + "grad_norm": 0.04445593059062958, + "learning_rate": 4.726888803769562e-06, + "loss": 0.0015, + "step": 176390 + }, + { + "epoch": 1.131374806386752, + "grad_norm": 0.15768571197986603, + "learning_rate": 4.726329937693401e-06, + "loss": 0.0015, + "step": 176400 + }, + { + "epoch": 1.131438943280538, + "grad_norm": 0.013671383261680603, + "learning_rate": 4.72577107504652e-06, + "loss": 0.0014, + "step": 176410 + }, + { + "epoch": 1.131503080174324, + "grad_norm": 0.3652510941028595, + "learning_rate": 4.725212215835925e-06, + "loss": 0.0021, + "step": 176420 + }, + { + "epoch": 1.1315672170681101, + "grad_norm": 0.15738549828529358, + "learning_rate": 4.724653360068616e-06, + "loss": 0.0019, + "step": 176430 + }, + { + "epoch": 1.1316313539618963, + "grad_norm": 0.09711114317178726, + "learning_rate": 4.724094507751598e-06, + "loss": 0.0062, + "step": 176440 + }, + { + "epoch": 1.1316954908556824, + "grad_norm": 0.2600592076778412, + "learning_rate": 4.723535658891872e-06, + "loss": 0.002, + "step": 176450 + }, + { + "epoch": 1.1317596277494686, + "grad_norm": 0.09903906285762787, + "learning_rate": 4.722976813496443e-06, + "loss": 0.0024, + "step": 176460 + }, + { + "epoch": 1.1318237646432545, + "grad_norm": 0.09826955199241638, + "learning_rate": 4.7224179715723105e-06, + "loss": 0.0022, + "step": 176470 + }, + { + "epoch": 1.1318879015370407, + "grad_norm": 0.22338512539863586, + "learning_rate": 4.721859133126481e-06, + "loss": 0.0068, + "step": 176480 + }, + { + "epoch": 1.1319520384308268, + "grad_norm": 0.08771412074565887, + "learning_rate": 4.721300298165954e-06, + "loss": 0.0006, + "step": 176490 + }, + { + "epoch": 1.1320161753246127, + "grad_norm": 0.16829173266887665, + "learning_rate": 4.720741466697734e-06, + "loss": 0.0012, + "step": 176500 + }, + { + "epoch": 1.1320803122183989, + "grad_norm": 0.15693411231040955, + "learning_rate": 4.7201826387288245e-06, + "loss": 0.0015, + "step": 176510 + }, + { + "epoch": 1.132144449112185, + "grad_norm": 0.20173071324825287, + "learning_rate": 4.719623814266224e-06, + "loss": 0.0013, + "step": 176520 + }, + { + "epoch": 1.1322085860059712, + "grad_norm": 0.1597852110862732, + "learning_rate": 4.71906499331694e-06, + "loss": 0.002, + "step": 176530 + }, + { + "epoch": 1.1322727228997573, + "grad_norm": 0.08356647938489914, + "learning_rate": 4.71850617588797e-06, + "loss": 0.0028, + "step": 176540 + }, + { + "epoch": 1.1323368597935433, + "grad_norm": 0.07380029559135437, + "learning_rate": 4.717947361986321e-06, + "loss": 0.0017, + "step": 176550 + }, + { + "epoch": 1.1324009966873294, + "grad_norm": 0.07805683463811874, + "learning_rate": 4.717388551618992e-06, + "loss": 0.0009, + "step": 176560 + }, + { + "epoch": 1.1324651335811156, + "grad_norm": 0.11640407145023346, + "learning_rate": 4.716829744792987e-06, + "loss": 0.0023, + "step": 176570 + }, + { + "epoch": 1.1325292704749017, + "grad_norm": 0.021123992279171944, + "learning_rate": 4.7162709415153065e-06, + "loss": 0.0025, + "step": 176580 + }, + { + "epoch": 1.1325934073686876, + "grad_norm": 0.048886850476264954, + "learning_rate": 4.715712141792955e-06, + "loss": 0.0017, + "step": 176590 + }, + { + "epoch": 1.1326575442624738, + "grad_norm": 0.08189143240451813, + "learning_rate": 4.715153345632933e-06, + "loss": 0.0026, + "step": 176600 + }, + { + "epoch": 1.13272168115626, + "grad_norm": 0.047019582241773605, + "learning_rate": 4.714594553042244e-06, + "loss": 0.0009, + "step": 176610 + }, + { + "epoch": 1.132785818050046, + "grad_norm": 0.0060059260576963425, + "learning_rate": 4.7140357640278875e-06, + "loss": 0.0019, + "step": 176620 + }, + { + "epoch": 1.1328499549438322, + "grad_norm": 0.16405409574508667, + "learning_rate": 4.7134769785968685e-06, + "loss": 0.0017, + "step": 176630 + }, + { + "epoch": 1.1329140918376182, + "grad_norm": 0.014878307469189167, + "learning_rate": 4.7129181967561876e-06, + "loss": 0.0011, + "step": 176640 + }, + { + "epoch": 1.1329782287314043, + "grad_norm": 0.07795178890228271, + "learning_rate": 4.712359418512846e-06, + "loss": 0.001, + "step": 176650 + }, + { + "epoch": 1.1330423656251905, + "grad_norm": 0.2748396694660187, + "learning_rate": 4.711800643873848e-06, + "loss": 0.0021, + "step": 176660 + }, + { + "epoch": 1.1331065025189766, + "grad_norm": 0.018300451338291168, + "learning_rate": 4.711241872846193e-06, + "loss": 0.002, + "step": 176670 + }, + { + "epoch": 1.1331706394127625, + "grad_norm": 0.03134751319885254, + "learning_rate": 4.710683105436884e-06, + "loss": 0.0008, + "step": 176680 + }, + { + "epoch": 1.1332347763065487, + "grad_norm": 0.10682273656129837, + "learning_rate": 4.710124341652922e-06, + "loss": 0.0015, + "step": 176690 + }, + { + "epoch": 1.1332989132003348, + "grad_norm": 0.17113542556762695, + "learning_rate": 4.70956558150131e-06, + "loss": 0.0019, + "step": 176700 + }, + { + "epoch": 1.133363050094121, + "grad_norm": 0.26188206672668457, + "learning_rate": 4.709006824989048e-06, + "loss": 0.0031, + "step": 176710 + }, + { + "epoch": 1.133427186987907, + "grad_norm": 0.47190165519714355, + "learning_rate": 4.708448072123139e-06, + "loss": 0.0017, + "step": 176720 + }, + { + "epoch": 1.133491323881693, + "grad_norm": 0.15740633010864258, + "learning_rate": 4.707889322910584e-06, + "loss": 0.0039, + "step": 176730 + }, + { + "epoch": 1.1335554607754792, + "grad_norm": 0.0610303096473217, + "learning_rate": 4.707330577358385e-06, + "loss": 0.0009, + "step": 176740 + }, + { + "epoch": 1.1336195976692653, + "grad_norm": 0.05434420332312584, + "learning_rate": 4.7067718354735405e-06, + "loss": 0.0014, + "step": 176750 + }, + { + "epoch": 1.1336837345630513, + "grad_norm": 0.1731656938791275, + "learning_rate": 4.706213097263057e-06, + "loss": 0.0019, + "step": 176760 + }, + { + "epoch": 1.1337478714568374, + "grad_norm": 0.044647444039583206, + "learning_rate": 4.705654362733933e-06, + "loss": 0.0009, + "step": 176770 + }, + { + "epoch": 1.1338120083506236, + "grad_norm": 0.08089784532785416, + "learning_rate": 4.705095631893169e-06, + "loss": 0.0095, + "step": 176780 + }, + { + "epoch": 1.1338761452444097, + "grad_norm": 0.09667832404375076, + "learning_rate": 4.704536904747769e-06, + "loss": 0.0039, + "step": 176790 + }, + { + "epoch": 1.1339402821381959, + "grad_norm": 0.06942083686590195, + "learning_rate": 4.703978181304732e-06, + "loss": 0.0008, + "step": 176800 + }, + { + "epoch": 1.1340044190319818, + "grad_norm": 0.06267499178647995, + "learning_rate": 4.70341946157106e-06, + "loss": 0.0018, + "step": 176810 + }, + { + "epoch": 1.134068555925768, + "grad_norm": 0.021174170076847076, + "learning_rate": 4.702860745553754e-06, + "loss": 0.0011, + "step": 176820 + }, + { + "epoch": 1.134132692819554, + "grad_norm": 0.13701143860816956, + "learning_rate": 4.702302033259816e-06, + "loss": 0.001, + "step": 176830 + }, + { + "epoch": 1.1341968297133402, + "grad_norm": 0.26283982396125793, + "learning_rate": 4.701743324696245e-06, + "loss": 0.0023, + "step": 176840 + }, + { + "epoch": 1.1342609666071262, + "grad_norm": 0.06547980010509491, + "learning_rate": 4.701184619870045e-06, + "loss": 0.0007, + "step": 176850 + }, + { + "epoch": 1.1343251035009123, + "grad_norm": 0.028654005378484726, + "learning_rate": 4.700625918788214e-06, + "loss": 0.0015, + "step": 176860 + }, + { + "epoch": 1.1343892403946985, + "grad_norm": 0.037154026329517365, + "learning_rate": 4.700067221457755e-06, + "loss": 0.0013, + "step": 176870 + }, + { + "epoch": 1.1344533772884846, + "grad_norm": 0.15115104615688324, + "learning_rate": 4.699508527885667e-06, + "loss": 0.0015, + "step": 176880 + }, + { + "epoch": 1.1345175141822708, + "grad_norm": 0.082732193171978, + "learning_rate": 4.6989498380789525e-06, + "loss": 0.0008, + "step": 176890 + }, + { + "epoch": 1.1345816510760567, + "grad_norm": 0.10726206004619598, + "learning_rate": 4.698391152044611e-06, + "loss": 0.0012, + "step": 176900 + }, + { + "epoch": 1.1346457879698428, + "grad_norm": 0.08087017387151718, + "learning_rate": 4.697832469789646e-06, + "loss": 0.0018, + "step": 176910 + }, + { + "epoch": 1.134709924863629, + "grad_norm": 0.012344476766884327, + "learning_rate": 4.697273791321054e-06, + "loss": 0.0028, + "step": 176920 + }, + { + "epoch": 1.134774061757415, + "grad_norm": 0.07980614900588989, + "learning_rate": 4.696715116645838e-06, + "loss": 0.001, + "step": 176930 + }, + { + "epoch": 1.134838198651201, + "grad_norm": 0.05606852471828461, + "learning_rate": 4.696156445770999e-06, + "loss": 0.004, + "step": 176940 + }, + { + "epoch": 1.1349023355449872, + "grad_norm": 0.2644835412502289, + "learning_rate": 4.695597778703535e-06, + "loss": 0.0015, + "step": 176950 + }, + { + "epoch": 1.1349664724387734, + "grad_norm": 0.17666363716125488, + "learning_rate": 4.695039115450449e-06, + "loss": 0.0015, + "step": 176960 + }, + { + "epoch": 1.1350306093325595, + "grad_norm": 0.02572619542479515, + "learning_rate": 4.694480456018743e-06, + "loss": 0.0009, + "step": 176970 + }, + { + "epoch": 1.1350947462263454, + "grad_norm": 0.07502380013465881, + "learning_rate": 4.693921800415413e-06, + "loss": 0.0018, + "step": 176980 + }, + { + "epoch": 1.1351588831201316, + "grad_norm": 0.03207005187869072, + "learning_rate": 4.693363148647464e-06, + "loss": 0.0011, + "step": 176990 + }, + { + "epoch": 1.1352230200139177, + "grad_norm": 0.25719138979911804, + "learning_rate": 4.692804500721892e-06, + "loss": 0.0016, + "step": 177000 + }, + { + "epoch": 1.1352871569077039, + "grad_norm": 0.0666014552116394, + "learning_rate": 4.6922458566457e-06, + "loss": 0.0025, + "step": 177010 + }, + { + "epoch": 1.1353512938014898, + "grad_norm": 0.14155615866184235, + "learning_rate": 4.691687216425887e-06, + "loss": 0.0026, + "step": 177020 + }, + { + "epoch": 1.135415430695276, + "grad_norm": 0.02840949408710003, + "learning_rate": 4.691128580069455e-06, + "loss": 0.0018, + "step": 177030 + }, + { + "epoch": 1.135479567589062, + "grad_norm": 0.06482551246881485, + "learning_rate": 4.6905699475834e-06, + "loss": 0.0015, + "step": 177040 + }, + { + "epoch": 1.1355437044828482, + "grad_norm": 0.20891007781028748, + "learning_rate": 4.6900113189747255e-06, + "loss": 0.0017, + "step": 177050 + }, + { + "epoch": 1.1356078413766344, + "grad_norm": 0.15411852300167084, + "learning_rate": 4.689452694250432e-06, + "loss": 0.0026, + "step": 177060 + }, + { + "epoch": 1.1356719782704203, + "grad_norm": 0.08161000162363052, + "learning_rate": 4.688894073417517e-06, + "loss": 0.0032, + "step": 177070 + }, + { + "epoch": 1.1357361151642065, + "grad_norm": 0.1426655650138855, + "learning_rate": 4.688335456482983e-06, + "loss": 0.0015, + "step": 177080 + }, + { + "epoch": 1.1358002520579926, + "grad_norm": 0.145971417427063, + "learning_rate": 4.687776843453826e-06, + "loss": 0.0017, + "step": 177090 + }, + { + "epoch": 1.1358643889517788, + "grad_norm": 0.07309820502996445, + "learning_rate": 4.687218234337051e-06, + "loss": 0.0014, + "step": 177100 + }, + { + "epoch": 1.1359285258455647, + "grad_norm": 0.16422462463378906, + "learning_rate": 4.686659629139652e-06, + "loss": 0.0052, + "step": 177110 + }, + { + "epoch": 1.1359926627393508, + "grad_norm": 0.05093219876289368, + "learning_rate": 4.686101027868635e-06, + "loss": 0.0023, + "step": 177120 + }, + { + "epoch": 1.136056799633137, + "grad_norm": 0.05091457441449165, + "learning_rate": 4.685542430530995e-06, + "loss": 0.003, + "step": 177130 + }, + { + "epoch": 1.1361209365269231, + "grad_norm": 0.1133175641298294, + "learning_rate": 4.6849838371337335e-06, + "loss": 0.002, + "step": 177140 + }, + { + "epoch": 1.136185073420709, + "grad_norm": 0.06035906448960304, + "learning_rate": 4.684425247683849e-06, + "loss": 0.0018, + "step": 177150 + }, + { + "epoch": 1.1362492103144952, + "grad_norm": 0.07222796976566315, + "learning_rate": 4.683866662188343e-06, + "loss": 0.0018, + "step": 177160 + }, + { + "epoch": 1.1363133472082814, + "grad_norm": 0.009993140585720539, + "learning_rate": 4.683308080654211e-06, + "loss": 0.0013, + "step": 177170 + }, + { + "epoch": 1.1363774841020675, + "grad_norm": 0.08702369034290314, + "learning_rate": 4.682749503088458e-06, + "loss": 0.0015, + "step": 177180 + }, + { + "epoch": 1.1364416209958534, + "grad_norm": 0.11006546020507812, + "learning_rate": 4.6821909294980785e-06, + "loss": 0.0018, + "step": 177190 + }, + { + "epoch": 1.1365057578896396, + "grad_norm": 0.05190633237361908, + "learning_rate": 4.6816323598900745e-06, + "loss": 0.0013, + "step": 177200 + }, + { + "epoch": 1.1365698947834257, + "grad_norm": 0.17885461449623108, + "learning_rate": 4.681073794271445e-06, + "loss": 0.0017, + "step": 177210 + }, + { + "epoch": 1.1366340316772119, + "grad_norm": 0.07297784090042114, + "learning_rate": 4.680515232649188e-06, + "loss": 0.0014, + "step": 177220 + }, + { + "epoch": 1.136698168570998, + "grad_norm": 0.05929350480437279, + "learning_rate": 4.679956675030304e-06, + "loss": 0.0024, + "step": 177230 + }, + { + "epoch": 1.136762305464784, + "grad_norm": 0.06213223561644554, + "learning_rate": 4.679398121421791e-06, + "loss": 0.0012, + "step": 177240 + }, + { + "epoch": 1.13682644235857, + "grad_norm": 0.05396553874015808, + "learning_rate": 4.678839571830649e-06, + "loss": 0.0009, + "step": 177250 + }, + { + "epoch": 1.1368905792523563, + "grad_norm": 0.20179583132266998, + "learning_rate": 4.678281026263876e-06, + "loss": 0.0025, + "step": 177260 + }, + { + "epoch": 1.1369547161461424, + "grad_norm": 0.08537101000547409, + "learning_rate": 4.6777224847284745e-06, + "loss": 0.0014, + "step": 177270 + }, + { + "epoch": 1.1370188530399283, + "grad_norm": 0.05290082469582558, + "learning_rate": 4.677163947231438e-06, + "loss": 0.0019, + "step": 177280 + }, + { + "epoch": 1.1370829899337145, + "grad_norm": 0.04260869696736336, + "learning_rate": 4.676605413779769e-06, + "loss": 0.002, + "step": 177290 + }, + { + "epoch": 1.1371471268275006, + "grad_norm": 0.024307915940880775, + "learning_rate": 4.676046884380464e-06, + "loss": 0.0014, + "step": 177300 + }, + { + "epoch": 1.1372112637212868, + "grad_norm": 0.05193071812391281, + "learning_rate": 4.675488359040524e-06, + "loss": 0.0091, + "step": 177310 + }, + { + "epoch": 1.137275400615073, + "grad_norm": 0.07003153860569, + "learning_rate": 4.6749298377669464e-06, + "loss": 0.0022, + "step": 177320 + }, + { + "epoch": 1.1373395375088589, + "grad_norm": 0.0878596380352974, + "learning_rate": 4.674371320566731e-06, + "loss": 0.0011, + "step": 177330 + }, + { + "epoch": 1.137403674402645, + "grad_norm": 0.019946128129959106, + "learning_rate": 4.673812807446875e-06, + "loss": 0.0011, + "step": 177340 + }, + { + "epoch": 1.1374678112964312, + "grad_norm": 0.08865027874708176, + "learning_rate": 4.673254298414378e-06, + "loss": 0.0014, + "step": 177350 + }, + { + "epoch": 1.137531948190217, + "grad_norm": 0.060385555028915405, + "learning_rate": 4.672695793476238e-06, + "loss": 0.0015, + "step": 177360 + }, + { + "epoch": 1.1375960850840032, + "grad_norm": 0.05343933776021004, + "learning_rate": 4.672137292639453e-06, + "loss": 0.001, + "step": 177370 + }, + { + "epoch": 1.1376602219777894, + "grad_norm": 0.1751498132944107, + "learning_rate": 4.6715787959110235e-06, + "loss": 0.0019, + "step": 177380 + }, + { + "epoch": 1.1377243588715755, + "grad_norm": 0.06436331570148468, + "learning_rate": 4.671020303297945e-06, + "loss": 0.0013, + "step": 177390 + }, + { + "epoch": 1.1377884957653617, + "grad_norm": 0.1137598305940628, + "learning_rate": 4.670461814807218e-06, + "loss": 0.0021, + "step": 177400 + }, + { + "epoch": 1.1378526326591476, + "grad_norm": 0.06752078235149384, + "learning_rate": 4.66990333044584e-06, + "loss": 0.0014, + "step": 177410 + }, + { + "epoch": 1.1379167695529337, + "grad_norm": 0.056894451379776, + "learning_rate": 4.669344850220809e-06, + "loss": 0.0036, + "step": 177420 + }, + { + "epoch": 1.13798090644672, + "grad_norm": 0.14695769548416138, + "learning_rate": 4.668786374139123e-06, + "loss": 0.0023, + "step": 177430 + }, + { + "epoch": 1.138045043340506, + "grad_norm": 0.14649368822574615, + "learning_rate": 4.668227902207782e-06, + "loss": 0.0016, + "step": 177440 + }, + { + "epoch": 1.138109180234292, + "grad_norm": 0.06601706147193909, + "learning_rate": 4.66766943443378e-06, + "loss": 0.0014, + "step": 177450 + }, + { + "epoch": 1.1381733171280781, + "grad_norm": 0.05942267179489136, + "learning_rate": 4.667110970824119e-06, + "loss": 0.0006, + "step": 177460 + }, + { + "epoch": 1.1382374540218643, + "grad_norm": 0.14742408692836761, + "learning_rate": 4.666552511385795e-06, + "loss": 0.0018, + "step": 177470 + }, + { + "epoch": 1.1383015909156504, + "grad_norm": 0.1130622997879982, + "learning_rate": 4.665994056125806e-06, + "loss": 0.0011, + "step": 177480 + }, + { + "epoch": 1.1383657278094366, + "grad_norm": 0.03683945909142494, + "learning_rate": 4.665435605051151e-06, + "loss": 0.0038, + "step": 177490 + }, + { + "epoch": 1.1384298647032225, + "grad_norm": 0.07685775309801102, + "learning_rate": 4.664877158168827e-06, + "loss": 0.0008, + "step": 177500 + }, + { + "epoch": 1.1384940015970086, + "grad_norm": 0.195655956864357, + "learning_rate": 4.664318715485832e-06, + "loss": 0.0012, + "step": 177510 + }, + { + "epoch": 1.1385581384907948, + "grad_norm": 0.002429707907140255, + "learning_rate": 4.6637602770091614e-06, + "loss": 0.0035, + "step": 177520 + }, + { + "epoch": 1.138622275384581, + "grad_norm": 0.09719683229923248, + "learning_rate": 4.663201842745818e-06, + "loss": 0.0013, + "step": 177530 + }, + { + "epoch": 1.1386864122783669, + "grad_norm": 0.03722328320145607, + "learning_rate": 4.662643412702793e-06, + "loss": 0.002, + "step": 177540 + }, + { + "epoch": 1.138750549172153, + "grad_norm": 0.015547439455986023, + "learning_rate": 4.662084986887089e-06, + "loss": 0.0016, + "step": 177550 + }, + { + "epoch": 1.1388146860659392, + "grad_norm": 0.11467263847589493, + "learning_rate": 4.661526565305701e-06, + "loss": 0.0014, + "step": 177560 + }, + { + "epoch": 1.1388788229597253, + "grad_norm": 0.06219152361154556, + "learning_rate": 4.660968147965628e-06, + "loss": 0.0024, + "step": 177570 + }, + { + "epoch": 1.1389429598535112, + "grad_norm": 0.05296056717634201, + "learning_rate": 4.6604097348738655e-06, + "loss": 0.0015, + "step": 177580 + }, + { + "epoch": 1.1390070967472974, + "grad_norm": 0.0316719226539135, + "learning_rate": 4.6598513260374125e-06, + "loss": 0.0018, + "step": 177590 + }, + { + "epoch": 1.1390712336410835, + "grad_norm": 0.10980919748544693, + "learning_rate": 4.659292921463264e-06, + "loss": 0.0008, + "step": 177600 + }, + { + "epoch": 1.1391353705348697, + "grad_norm": 0.09972270578145981, + "learning_rate": 4.658734521158421e-06, + "loss": 0.0014, + "step": 177610 + }, + { + "epoch": 1.1391995074286556, + "grad_norm": 0.03266414627432823, + "learning_rate": 4.658176125129876e-06, + "loss": 0.0018, + "step": 177620 + }, + { + "epoch": 1.1392636443224418, + "grad_norm": 0.013438074849545956, + "learning_rate": 4.657617733384628e-06, + "loss": 0.0015, + "step": 177630 + }, + { + "epoch": 1.139327781216228, + "grad_norm": 0.06101763993501663, + "learning_rate": 4.657059345929677e-06, + "loss": 0.0018, + "step": 177640 + }, + { + "epoch": 1.139391918110014, + "grad_norm": 0.06609926372766495, + "learning_rate": 4.656500962772016e-06, + "loss": 0.0012, + "step": 177650 + }, + { + "epoch": 1.1394560550038002, + "grad_norm": 0.22577722370624542, + "learning_rate": 4.655942583918644e-06, + "loss": 0.0015, + "step": 177660 + }, + { + "epoch": 1.1395201918975861, + "grad_norm": 0.2436590939760208, + "learning_rate": 4.655384209376557e-06, + "loss": 0.0024, + "step": 177670 + }, + { + "epoch": 1.1395843287913723, + "grad_norm": 0.04356739670038223, + "learning_rate": 4.654825839152753e-06, + "loss": 0.0017, + "step": 177680 + }, + { + "epoch": 1.1396484656851584, + "grad_norm": 0.07654337584972382, + "learning_rate": 4.6542674732542265e-06, + "loss": 0.0023, + "step": 177690 + }, + { + "epoch": 1.1397126025789446, + "grad_norm": 0.12162031978368759, + "learning_rate": 4.653709111687978e-06, + "loss": 0.002, + "step": 177700 + }, + { + "epoch": 1.1397767394727305, + "grad_norm": 0.027826106175780296, + "learning_rate": 4.653150754460999e-06, + "loss": 0.0017, + "step": 177710 + }, + { + "epoch": 1.1398408763665167, + "grad_norm": 0.03190990164875984, + "learning_rate": 4.652592401580288e-06, + "loss": 0.0014, + "step": 177720 + }, + { + "epoch": 1.1399050132603028, + "grad_norm": 0.20037496089935303, + "learning_rate": 4.652034053052846e-06, + "loss": 0.0019, + "step": 177730 + }, + { + "epoch": 1.139969150154089, + "grad_norm": 0.18473303318023682, + "learning_rate": 4.651475708885663e-06, + "loss": 0.0014, + "step": 177740 + }, + { + "epoch": 1.140033287047875, + "grad_norm": 0.23001250624656677, + "learning_rate": 4.6509173690857405e-06, + "loss": 0.0012, + "step": 177750 + }, + { + "epoch": 1.140097423941661, + "grad_norm": 0.09116110950708389, + "learning_rate": 4.650359033660073e-06, + "loss": 0.0012, + "step": 177760 + }, + { + "epoch": 1.1401615608354472, + "grad_norm": 0.11727047711610794, + "learning_rate": 4.649800702615656e-06, + "loss": 0.0016, + "step": 177770 + }, + { + "epoch": 1.1402256977292333, + "grad_norm": 0.17801989614963531, + "learning_rate": 4.649242375959486e-06, + "loss": 0.0016, + "step": 177780 + }, + { + "epoch": 1.1402898346230192, + "grad_norm": 0.06194036081433296, + "learning_rate": 4.64868405369856e-06, + "loss": 0.0012, + "step": 177790 + }, + { + "epoch": 1.1403539715168054, + "grad_norm": 0.10502773523330688, + "learning_rate": 4.648125735839875e-06, + "loss": 0.0015, + "step": 177800 + }, + { + "epoch": 1.1404181084105915, + "grad_norm": 0.06816016882658005, + "learning_rate": 4.6475674223904246e-06, + "loss": 0.0032, + "step": 177810 + }, + { + "epoch": 1.1404822453043777, + "grad_norm": 0.05017755180597305, + "learning_rate": 4.647009113357207e-06, + "loss": 0.0012, + "step": 177820 + }, + { + "epoch": 1.1405463821981638, + "grad_norm": 0.11262434720993042, + "learning_rate": 4.6464508087472175e-06, + "loss": 0.0012, + "step": 177830 + }, + { + "epoch": 1.1406105190919498, + "grad_norm": 0.035311099141836166, + "learning_rate": 4.645892508567452e-06, + "loss": 0.0015, + "step": 177840 + }, + { + "epoch": 1.140674655985736, + "grad_norm": 0.04332641884684563, + "learning_rate": 4.645334212824906e-06, + "loss": 0.0044, + "step": 177850 + }, + { + "epoch": 1.140738792879522, + "grad_norm": 0.0032705175690352917, + "learning_rate": 4.644775921526577e-06, + "loss": 0.0015, + "step": 177860 + }, + { + "epoch": 1.1408029297733082, + "grad_norm": 0.11926588416099548, + "learning_rate": 4.644217634679458e-06, + "loss": 0.0012, + "step": 177870 + }, + { + "epoch": 1.1408670666670941, + "grad_norm": 0.19587744772434235, + "learning_rate": 4.6436593522905484e-06, + "loss": 0.0038, + "step": 177880 + }, + { + "epoch": 1.1409312035608803, + "grad_norm": 0.08402374386787415, + "learning_rate": 4.643101074366839e-06, + "loss": 0.0018, + "step": 177890 + }, + { + "epoch": 1.1409953404546664, + "grad_norm": 0.09392978250980377, + "learning_rate": 4.6425428009153295e-06, + "loss": 0.0011, + "step": 177900 + }, + { + "epoch": 1.1410594773484526, + "grad_norm": 0.1906268298625946, + "learning_rate": 4.641984531943015e-06, + "loss": 0.0021, + "step": 177910 + }, + { + "epoch": 1.1411236142422387, + "grad_norm": 0.020670609548687935, + "learning_rate": 4.6414262674568885e-06, + "loss": 0.0012, + "step": 177920 + }, + { + "epoch": 1.1411877511360247, + "grad_norm": 0.1239483579993248, + "learning_rate": 4.640868007463949e-06, + "loss": 0.0013, + "step": 177930 + }, + { + "epoch": 1.1412518880298108, + "grad_norm": 0.230666384100914, + "learning_rate": 4.640309751971189e-06, + "loss": 0.0024, + "step": 177940 + }, + { + "epoch": 1.141316024923597, + "grad_norm": 0.07052595913410187, + "learning_rate": 4.639751500985606e-06, + "loss": 0.0014, + "step": 177950 + }, + { + "epoch": 1.141380161817383, + "grad_norm": 0.061315130442380905, + "learning_rate": 4.639193254514192e-06, + "loss": 0.0016, + "step": 177960 + }, + { + "epoch": 1.141444298711169, + "grad_norm": 0.08418230712413788, + "learning_rate": 4.638635012563946e-06, + "loss": 0.0011, + "step": 177970 + }, + { + "epoch": 1.1415084356049552, + "grad_norm": 0.05300601199269295, + "learning_rate": 4.638076775141861e-06, + "loss": 0.0024, + "step": 177980 + }, + { + "epoch": 1.1415725724987413, + "grad_norm": 0.02021416835486889, + "learning_rate": 4.637518542254934e-06, + "loss": 0.0018, + "step": 177990 + }, + { + "epoch": 1.1416367093925275, + "grad_norm": 0.006741389166563749, + "learning_rate": 4.6369603139101566e-06, + "loss": 0.001, + "step": 178000 + }, + { + "epoch": 1.1417008462863136, + "grad_norm": 0.040095072239637375, + "learning_rate": 4.636402090114527e-06, + "loss": 0.0037, + "step": 178010 + }, + { + "epoch": 1.1417649831800996, + "grad_norm": 0.1890638768672943, + "learning_rate": 4.635843870875038e-06, + "loss": 0.0025, + "step": 178020 + }, + { + "epoch": 1.1418291200738857, + "grad_norm": 0.007839915342628956, + "learning_rate": 4.6352856561986885e-06, + "loss": 0.0016, + "step": 178030 + }, + { + "epoch": 1.1418932569676719, + "grad_norm": 0.054487310349941254, + "learning_rate": 4.634727446092468e-06, + "loss": 0.0024, + "step": 178040 + }, + { + "epoch": 1.1419573938614578, + "grad_norm": 0.1597786843776703, + "learning_rate": 4.6341692405633725e-06, + "loss": 0.002, + "step": 178050 + }, + { + "epoch": 1.142021530755244, + "grad_norm": 0.012354972772300243, + "learning_rate": 4.6336110396184e-06, + "loss": 0.003, + "step": 178060 + }, + { + "epoch": 1.14208566764903, + "grad_norm": 0.14595657587051392, + "learning_rate": 4.633052843264542e-06, + "loss": 0.001, + "step": 178070 + }, + { + "epoch": 1.1421498045428162, + "grad_norm": 0.08125365525484085, + "learning_rate": 4.632494651508795e-06, + "loss": 0.0017, + "step": 178080 + }, + { + "epoch": 1.1422139414366024, + "grad_norm": 0.05589194595813751, + "learning_rate": 4.631936464358152e-06, + "loss": 0.0014, + "step": 178090 + }, + { + "epoch": 1.1422780783303883, + "grad_norm": 0.013811212033033371, + "learning_rate": 4.631378281819609e-06, + "loss": 0.0017, + "step": 178100 + }, + { + "epoch": 1.1423422152241745, + "grad_norm": 0.09815085679292679, + "learning_rate": 4.630820103900158e-06, + "loss": 0.0019, + "step": 178110 + }, + { + "epoch": 1.1424063521179606, + "grad_norm": 0.05617450922727585, + "learning_rate": 4.630261930606797e-06, + "loss": 0.0015, + "step": 178120 + }, + { + "epoch": 1.1424704890117467, + "grad_norm": 0.04724203050136566, + "learning_rate": 4.6297037619465164e-06, + "loss": 0.0012, + "step": 178130 + }, + { + "epoch": 1.1425346259055327, + "grad_norm": 0.06500443071126938, + "learning_rate": 4.629145597926314e-06, + "loss": 0.0013, + "step": 178140 + }, + { + "epoch": 1.1425987627993188, + "grad_norm": 0.0620625838637352, + "learning_rate": 4.628587438553181e-06, + "loss": 0.0019, + "step": 178150 + }, + { + "epoch": 1.142662899693105, + "grad_norm": 0.14742448925971985, + "learning_rate": 4.628029283834113e-06, + "loss": 0.0023, + "step": 178160 + }, + { + "epoch": 1.1427270365868911, + "grad_norm": 0.07294030487537384, + "learning_rate": 4.6274711337761045e-06, + "loss": 0.0011, + "step": 178170 + }, + { + "epoch": 1.1427911734806773, + "grad_norm": 0.036900971084833145, + "learning_rate": 4.6269129883861486e-06, + "loss": 0.0017, + "step": 178180 + }, + { + "epoch": 1.1428553103744632, + "grad_norm": 0.0273374542593956, + "learning_rate": 4.626354847671239e-06, + "loss": 0.0006, + "step": 178190 + }, + { + "epoch": 1.1429194472682493, + "grad_norm": 0.11742100119590759, + "learning_rate": 4.625796711638371e-06, + "loss": 0.0011, + "step": 178200 + }, + { + "epoch": 1.1429835841620355, + "grad_norm": 0.004678953904658556, + "learning_rate": 4.625238580294538e-06, + "loss": 0.0014, + "step": 178210 + }, + { + "epoch": 1.1430477210558216, + "grad_norm": 0.34453120827674866, + "learning_rate": 4.624680453646732e-06, + "loss": 0.0012, + "step": 178220 + }, + { + "epoch": 1.1431118579496076, + "grad_norm": 0.06346111744642258, + "learning_rate": 4.62412233170195e-06, + "loss": 0.0015, + "step": 178230 + }, + { + "epoch": 1.1431759948433937, + "grad_norm": 0.06714435666799545, + "learning_rate": 4.623564214467182e-06, + "loss": 0.0019, + "step": 178240 + }, + { + "epoch": 1.1432401317371799, + "grad_norm": 0.1403575986623764, + "learning_rate": 4.6230061019494244e-06, + "loss": 0.002, + "step": 178250 + }, + { + "epoch": 1.143304268630966, + "grad_norm": 0.2506372034549713, + "learning_rate": 4.62244799415567e-06, + "loss": 0.0035, + "step": 178260 + }, + { + "epoch": 1.143368405524752, + "grad_norm": 0.10004667192697525, + "learning_rate": 4.6218898910929115e-06, + "loss": 0.0017, + "step": 178270 + }, + { + "epoch": 1.143432542418538, + "grad_norm": 0.040075477212667465, + "learning_rate": 4.621331792768143e-06, + "loss": 0.0011, + "step": 178280 + }, + { + "epoch": 1.1434966793123242, + "grad_norm": 0.11705781519412994, + "learning_rate": 4.6207736991883575e-06, + "loss": 0.0022, + "step": 178290 + }, + { + "epoch": 1.1435608162061104, + "grad_norm": 0.1692320704460144, + "learning_rate": 4.620215610360549e-06, + "loss": 0.0009, + "step": 178300 + }, + { + "epoch": 1.1436249530998963, + "grad_norm": 0.09076292812824249, + "learning_rate": 4.619657526291711e-06, + "loss": 0.0019, + "step": 178310 + }, + { + "epoch": 1.1436890899936825, + "grad_norm": 0.18240463733673096, + "learning_rate": 4.619099446988834e-06, + "loss": 0.0011, + "step": 178320 + }, + { + "epoch": 1.1437532268874686, + "grad_norm": 0.05098031088709831, + "learning_rate": 4.6185413724589145e-06, + "loss": 0.0051, + "step": 178330 + }, + { + "epoch": 1.1438173637812548, + "grad_norm": 0.08685196936130524, + "learning_rate": 4.617983302708945e-06, + "loss": 0.0014, + "step": 178340 + }, + { + "epoch": 1.143881500675041, + "grad_norm": 0.048797864466905594, + "learning_rate": 4.617425237745916e-06, + "loss": 0.0017, + "step": 178350 + }, + { + "epoch": 1.1439456375688268, + "grad_norm": 0.049440789967775345, + "learning_rate": 4.616867177576824e-06, + "loss": 0.0016, + "step": 178360 + }, + { + "epoch": 1.144009774462613, + "grad_norm": 0.07405377179384232, + "learning_rate": 4.616309122208659e-06, + "loss": 0.0022, + "step": 178370 + }, + { + "epoch": 1.1440739113563991, + "grad_norm": 0.08482597768306732, + "learning_rate": 4.615751071648415e-06, + "loss": 0.0013, + "step": 178380 + }, + { + "epoch": 1.1441380482501853, + "grad_norm": 0.20172375440597534, + "learning_rate": 4.615193025903084e-06, + "loss": 0.0016, + "step": 178390 + }, + { + "epoch": 1.1442021851439712, + "grad_norm": 0.1691344678401947, + "learning_rate": 4.614634984979661e-06, + "loss": 0.002, + "step": 178400 + }, + { + "epoch": 1.1442663220377574, + "grad_norm": 0.010287750512361526, + "learning_rate": 4.614076948885136e-06, + "loss": 0.0011, + "step": 178410 + }, + { + "epoch": 1.1443304589315435, + "grad_norm": 0.09773595631122589, + "learning_rate": 4.6135189176265035e-06, + "loss": 0.0015, + "step": 178420 + }, + { + "epoch": 1.1443945958253297, + "grad_norm": 0.23118939995765686, + "learning_rate": 4.612960891210754e-06, + "loss": 0.0012, + "step": 178430 + }, + { + "epoch": 1.1444587327191158, + "grad_norm": 0.023415856063365936, + "learning_rate": 4.612402869644882e-06, + "loss": 0.0012, + "step": 178440 + }, + { + "epoch": 1.1445228696129017, + "grad_norm": 0.06214074790477753, + "learning_rate": 4.611844852935878e-06, + "loss": 0.003, + "step": 178450 + }, + { + "epoch": 1.1445870065066879, + "grad_norm": 0.08905795216560364, + "learning_rate": 4.611286841090738e-06, + "loss": 0.0015, + "step": 178460 + }, + { + "epoch": 1.144651143400474, + "grad_norm": 0.07331910729408264, + "learning_rate": 4.610728834116448e-06, + "loss": 0.0017, + "step": 178470 + }, + { + "epoch": 1.14471528029426, + "grad_norm": 0.10792273283004761, + "learning_rate": 4.610170832020005e-06, + "loss": 0.0016, + "step": 178480 + }, + { + "epoch": 1.144779417188046, + "grad_norm": 0.06411322206258774, + "learning_rate": 4.6096128348084e-06, + "loss": 0.0009, + "step": 178490 + }, + { + "epoch": 1.1448435540818322, + "grad_norm": 0.004302809480577707, + "learning_rate": 4.609054842488627e-06, + "loss": 0.0027, + "step": 178500 + }, + { + "epoch": 1.1449076909756184, + "grad_norm": 0.05978574976325035, + "learning_rate": 4.608496855067675e-06, + "loss": 0.0022, + "step": 178510 + }, + { + "epoch": 1.1449718278694045, + "grad_norm": 0.11780686676502228, + "learning_rate": 4.607938872552537e-06, + "loss": 0.0015, + "step": 178520 + }, + { + "epoch": 1.1450359647631905, + "grad_norm": 0.1133837178349495, + "learning_rate": 4.6073808949502054e-06, + "loss": 0.0012, + "step": 178530 + }, + { + "epoch": 1.1451001016569766, + "grad_norm": 0.12923049926757812, + "learning_rate": 4.606822922267673e-06, + "loss": 0.002, + "step": 178540 + }, + { + "epoch": 1.1451642385507628, + "grad_norm": 0.045091308653354645, + "learning_rate": 4.606264954511929e-06, + "loss": 0.0025, + "step": 178550 + }, + { + "epoch": 1.145228375444549, + "grad_norm": 0.10687202215194702, + "learning_rate": 4.605706991689967e-06, + "loss": 0.0013, + "step": 178560 + }, + { + "epoch": 1.1452925123383348, + "grad_norm": 0.11072013527154922, + "learning_rate": 4.605149033808778e-06, + "loss": 0.0013, + "step": 178570 + }, + { + "epoch": 1.145356649232121, + "grad_norm": 0.23663799464702606, + "learning_rate": 4.604591080875354e-06, + "loss": 0.0033, + "step": 178580 + }, + { + "epoch": 1.1454207861259071, + "grad_norm": 0.0915546789765358, + "learning_rate": 4.604033132896686e-06, + "loss": 0.0008, + "step": 178590 + }, + { + "epoch": 1.1454849230196933, + "grad_norm": 0.008109216578304768, + "learning_rate": 4.603475189879765e-06, + "loss": 0.0015, + "step": 178600 + }, + { + "epoch": 1.1455490599134794, + "grad_norm": 0.26255640387535095, + "learning_rate": 4.602917251831586e-06, + "loss": 0.0017, + "step": 178610 + }, + { + "epoch": 1.1456131968072654, + "grad_norm": 0.034671954810619354, + "learning_rate": 4.602359318759135e-06, + "loss": 0.001, + "step": 178620 + }, + { + "epoch": 1.1456773337010515, + "grad_norm": 0.09453405439853668, + "learning_rate": 4.6018013906694075e-06, + "loss": 0.0025, + "step": 178630 + }, + { + "epoch": 1.1457414705948377, + "grad_norm": 0.11215243488550186, + "learning_rate": 4.601243467569392e-06, + "loss": 0.0007, + "step": 178640 + }, + { + "epoch": 1.1458056074886238, + "grad_norm": 0.06050289794802666, + "learning_rate": 4.600685549466081e-06, + "loss": 0.0022, + "step": 178650 + }, + { + "epoch": 1.1458697443824097, + "grad_norm": 0.11573351174592972, + "learning_rate": 4.600127636366466e-06, + "loss": 0.0018, + "step": 178660 + }, + { + "epoch": 1.1459338812761959, + "grad_norm": 0.08109661936759949, + "learning_rate": 4.599569728277538e-06, + "loss": 0.0023, + "step": 178670 + }, + { + "epoch": 1.145998018169982, + "grad_norm": 0.11803097277879715, + "learning_rate": 4.599011825206287e-06, + "loss": 0.0016, + "step": 178680 + }, + { + "epoch": 1.1460621550637682, + "grad_norm": 0.15141747891902924, + "learning_rate": 4.598453927159704e-06, + "loss": 0.0015, + "step": 178690 + }, + { + "epoch": 1.146126291957554, + "grad_norm": 0.05062533915042877, + "learning_rate": 4.59789603414478e-06, + "loss": 0.0007, + "step": 178700 + }, + { + "epoch": 1.1461904288513403, + "grad_norm": 0.2250877469778061, + "learning_rate": 4.597338146168507e-06, + "loss": 0.0015, + "step": 178710 + }, + { + "epoch": 1.1462545657451264, + "grad_norm": 0.1328151375055313, + "learning_rate": 4.596780263237875e-06, + "loss": 0.0012, + "step": 178720 + }, + { + "epoch": 1.1463187026389126, + "grad_norm": 0.04682164266705513, + "learning_rate": 4.596222385359873e-06, + "loss": 0.0015, + "step": 178730 + }, + { + "epoch": 1.1463828395326985, + "grad_norm": 0.15815287828445435, + "learning_rate": 4.5956645125414945e-06, + "loss": 0.0014, + "step": 178740 + }, + { + "epoch": 1.1464469764264846, + "grad_norm": 0.0862506851553917, + "learning_rate": 4.595106644789727e-06, + "loss": 0.0012, + "step": 178750 + }, + { + "epoch": 1.1465111133202708, + "grad_norm": 0.10812395811080933, + "learning_rate": 4.594548782111564e-06, + "loss": 0.0011, + "step": 178760 + }, + { + "epoch": 1.146575250214057, + "grad_norm": 0.015825750306248665, + "learning_rate": 4.593990924513995e-06, + "loss": 0.0006, + "step": 178770 + }, + { + "epoch": 1.146639387107843, + "grad_norm": 0.09403803199529648, + "learning_rate": 4.593433072004009e-06, + "loss": 0.0019, + "step": 178780 + }, + { + "epoch": 1.146703524001629, + "grad_norm": 0.08201845735311508, + "learning_rate": 4.592875224588597e-06, + "loss": 0.0015, + "step": 178790 + }, + { + "epoch": 1.1467676608954152, + "grad_norm": 0.06644584983587265, + "learning_rate": 4.59231738227475e-06, + "loss": 0.0012, + "step": 178800 + }, + { + "epoch": 1.1468317977892013, + "grad_norm": 0.1825915426015854, + "learning_rate": 4.591759545069457e-06, + "loss": 0.0019, + "step": 178810 + }, + { + "epoch": 1.1468959346829875, + "grad_norm": 0.05877283215522766, + "learning_rate": 4.591201712979709e-06, + "loss": 0.0009, + "step": 178820 + }, + { + "epoch": 1.1469600715767734, + "grad_norm": 0.28253012895584106, + "learning_rate": 4.590643886012496e-06, + "loss": 0.0028, + "step": 178830 + }, + { + "epoch": 1.1470242084705595, + "grad_norm": 0.00932693388313055, + "learning_rate": 4.590086064174807e-06, + "loss": 0.0016, + "step": 178840 + }, + { + "epoch": 1.1470883453643457, + "grad_norm": 0.051429156213998795, + "learning_rate": 4.589528247473633e-06, + "loss": 0.0007, + "step": 178850 + }, + { + "epoch": 1.1471524822581318, + "grad_norm": 0.027532080188393593, + "learning_rate": 4.588970435915964e-06, + "loss": 0.0009, + "step": 178860 + }, + { + "epoch": 1.147216619151918, + "grad_norm": 0.25374674797058105, + "learning_rate": 4.588412629508788e-06, + "loss": 0.0016, + "step": 178870 + }, + { + "epoch": 1.147280756045704, + "grad_norm": 0.12644684314727783, + "learning_rate": 4.587854828259097e-06, + "loss": 0.0018, + "step": 178880 + }, + { + "epoch": 1.14734489293949, + "grad_norm": 0.12913778424263, + "learning_rate": 4.5872970321738784e-06, + "loss": 0.0021, + "step": 178890 + }, + { + "epoch": 1.1474090298332762, + "grad_norm": 0.14622823894023895, + "learning_rate": 4.586739241260123e-06, + "loss": 0.0014, + "step": 178900 + }, + { + "epoch": 1.1474731667270621, + "grad_norm": 0.18271958827972412, + "learning_rate": 4.586181455524821e-06, + "loss": 0.0014, + "step": 178910 + }, + { + "epoch": 1.1475373036208483, + "grad_norm": 0.09339526295661926, + "learning_rate": 4.58562367497496e-06, + "loss": 0.0022, + "step": 178920 + }, + { + "epoch": 1.1476014405146344, + "grad_norm": 0.009551736526191235, + "learning_rate": 4.585065899617532e-06, + "loss": 0.0006, + "step": 178930 + }, + { + "epoch": 1.1476655774084206, + "grad_norm": 0.05664249137043953, + "learning_rate": 4.584508129459524e-06, + "loss": 0.0009, + "step": 178940 + }, + { + "epoch": 1.1477297143022067, + "grad_norm": 0.13133834302425385, + "learning_rate": 4.583950364507927e-06, + "loss": 0.0016, + "step": 178950 + }, + { + "epoch": 1.1477938511959926, + "grad_norm": 0.05462121590971947, + "learning_rate": 4.583392604769728e-06, + "loss": 0.001, + "step": 178960 + }, + { + "epoch": 1.1478579880897788, + "grad_norm": 0.11168855428695679, + "learning_rate": 4.582834850251919e-06, + "loss": 0.0011, + "step": 178970 + }, + { + "epoch": 1.147922124983565, + "grad_norm": 0.10457391291856766, + "learning_rate": 4.582277100961485e-06, + "loss": 0.0019, + "step": 178980 + }, + { + "epoch": 1.147986261877351, + "grad_norm": 0.10076499730348587, + "learning_rate": 4.581719356905421e-06, + "loss": 0.0014, + "step": 178990 + }, + { + "epoch": 1.148050398771137, + "grad_norm": 0.04545801877975464, + "learning_rate": 4.581161618090709e-06, + "loss": 0.0013, + "step": 179000 + }, + { + "epoch": 1.1481145356649232, + "grad_norm": 0.07611383497714996, + "learning_rate": 4.580603884524343e-06, + "loss": 0.0009, + "step": 179010 + }, + { + "epoch": 1.1481786725587093, + "grad_norm": 0.16176478564739227, + "learning_rate": 4.5800461562133095e-06, + "loss": 0.002, + "step": 179020 + }, + { + "epoch": 1.1482428094524955, + "grad_norm": 0.10310656577348709, + "learning_rate": 4.579488433164599e-06, + "loss": 0.0014, + "step": 179030 + }, + { + "epoch": 1.1483069463462816, + "grad_norm": 0.1659184992313385, + "learning_rate": 4.578930715385197e-06, + "loss": 0.0019, + "step": 179040 + }, + { + "epoch": 1.1483710832400675, + "grad_norm": 0.15081000328063965, + "learning_rate": 4.5783730028820935e-06, + "loss": 0.0011, + "step": 179050 + }, + { + "epoch": 1.1484352201338537, + "grad_norm": 0.0816895142197609, + "learning_rate": 4.57781529566228e-06, + "loss": 0.0018, + "step": 179060 + }, + { + "epoch": 1.1484993570276398, + "grad_norm": 0.10530539602041245, + "learning_rate": 4.577257593732741e-06, + "loss": 0.0013, + "step": 179070 + }, + { + "epoch": 1.148563493921426, + "grad_norm": 0.28155598044395447, + "learning_rate": 4.576699897100468e-06, + "loss": 0.0045, + "step": 179080 + }, + { + "epoch": 1.148627630815212, + "grad_norm": 0.07171224057674408, + "learning_rate": 4.576142205772447e-06, + "loss": 0.0008, + "step": 179090 + }, + { + "epoch": 1.148691767708998, + "grad_norm": 0.028919516131281853, + "learning_rate": 4.575584519755668e-06, + "loss": 0.0023, + "step": 179100 + }, + { + "epoch": 1.1487559046027842, + "grad_norm": 0.11434286832809448, + "learning_rate": 4.575026839057116e-06, + "loss": 0.0021, + "step": 179110 + }, + { + "epoch": 1.1488200414965704, + "grad_norm": 0.022055277600884438, + "learning_rate": 4.5744691636837835e-06, + "loss": 0.0009, + "step": 179120 + }, + { + "epoch": 1.1488841783903563, + "grad_norm": 0.03330404683947563, + "learning_rate": 4.573911493642655e-06, + "loss": 0.0019, + "step": 179130 + }, + { + "epoch": 1.1489483152841424, + "grad_norm": 0.030812516808509827, + "learning_rate": 4.5733538289407215e-06, + "loss": 0.0009, + "step": 179140 + }, + { + "epoch": 1.1490124521779286, + "grad_norm": 0.0154456477612257, + "learning_rate": 4.572796169584968e-06, + "loss": 0.0009, + "step": 179150 + }, + { + "epoch": 1.1490765890717147, + "grad_norm": 0.18655256927013397, + "learning_rate": 4.572238515582385e-06, + "loss": 0.0007, + "step": 179160 + }, + { + "epoch": 1.1491407259655007, + "grad_norm": 0.056279927492141724, + "learning_rate": 4.571680866939959e-06, + "loss": 0.0028, + "step": 179170 + }, + { + "epoch": 1.1492048628592868, + "grad_norm": 0.1965060830116272, + "learning_rate": 4.571123223664677e-06, + "loss": 0.0012, + "step": 179180 + }, + { + "epoch": 1.149268999753073, + "grad_norm": 0.05386898294091225, + "learning_rate": 4.570565585763528e-06, + "loss": 0.0013, + "step": 179190 + }, + { + "epoch": 1.149333136646859, + "grad_norm": 0.23607292771339417, + "learning_rate": 4.570007953243499e-06, + "loss": 0.0022, + "step": 179200 + }, + { + "epoch": 1.1493972735406452, + "grad_norm": 0.030810948461294174, + "learning_rate": 4.5694503261115784e-06, + "loss": 0.0025, + "step": 179210 + }, + { + "epoch": 1.1494614104344312, + "grad_norm": 0.05335972458124161, + "learning_rate": 4.568892704374751e-06, + "loss": 0.0014, + "step": 179220 + }, + { + "epoch": 1.1495255473282173, + "grad_norm": 0.009272546507418156, + "learning_rate": 4.568335088040009e-06, + "loss": 0.0018, + "step": 179230 + }, + { + "epoch": 1.1495896842220035, + "grad_norm": 0.022868212312459946, + "learning_rate": 4.567777477114334e-06, + "loss": 0.0008, + "step": 179240 + }, + { + "epoch": 1.1496538211157896, + "grad_norm": 0.0760836973786354, + "learning_rate": 4.567219871604716e-06, + "loss": 0.0016, + "step": 179250 + }, + { + "epoch": 1.1497179580095755, + "grad_norm": 0.35971537232398987, + "learning_rate": 4.566662271518144e-06, + "loss": 0.0008, + "step": 179260 + }, + { + "epoch": 1.1497820949033617, + "grad_norm": 0.07496869564056396, + "learning_rate": 4.566104676861603e-06, + "loss": 0.0006, + "step": 179270 + }, + { + "epoch": 1.1498462317971478, + "grad_norm": 0.12314856052398682, + "learning_rate": 4.5655470876420814e-06, + "loss": 0.0046, + "step": 179280 + }, + { + "epoch": 1.149910368690934, + "grad_norm": 0.08336050063371658, + "learning_rate": 4.564989503866564e-06, + "loss": 0.0017, + "step": 179290 + }, + { + "epoch": 1.1499745055847201, + "grad_norm": 0.2124306708574295, + "learning_rate": 4.5644319255420405e-06, + "loss": 0.0024, + "step": 179300 + }, + { + "epoch": 1.150038642478506, + "grad_norm": 0.018601980060338974, + "learning_rate": 4.563874352675495e-06, + "loss": 0.0016, + "step": 179310 + }, + { + "epoch": 1.1501027793722922, + "grad_norm": 0.061966847628355026, + "learning_rate": 4.5633167852739154e-06, + "loss": 0.001, + "step": 179320 + }, + { + "epoch": 1.1501669162660784, + "grad_norm": 0.11580870300531387, + "learning_rate": 4.56275922334429e-06, + "loss": 0.0015, + "step": 179330 + }, + { + "epoch": 1.1502310531598643, + "grad_norm": 0.0410192608833313, + "learning_rate": 4.562201666893602e-06, + "loss": 0.0012, + "step": 179340 + }, + { + "epoch": 1.1502951900536504, + "grad_norm": 0.08091183006763458, + "learning_rate": 4.561644115928842e-06, + "loss": 0.0013, + "step": 179350 + }, + { + "epoch": 1.1503593269474366, + "grad_norm": 0.17633156478405, + "learning_rate": 4.5610865704569925e-06, + "loss": 0.0016, + "step": 179360 + }, + { + "epoch": 1.1504234638412227, + "grad_norm": 0.037320394068956375, + "learning_rate": 4.560529030485044e-06, + "loss": 0.0026, + "step": 179370 + }, + { + "epoch": 1.1504876007350089, + "grad_norm": 0.15806110203266144, + "learning_rate": 4.559971496019979e-06, + "loss": 0.0027, + "step": 179380 + }, + { + "epoch": 1.1505517376287948, + "grad_norm": 0.033490147441625595, + "learning_rate": 4.559413967068788e-06, + "loss": 0.0015, + "step": 179390 + }, + { + "epoch": 1.150615874522581, + "grad_norm": 0.07470483332872391, + "learning_rate": 4.558856443638452e-06, + "loss": 0.0011, + "step": 179400 + }, + { + "epoch": 1.150680011416367, + "grad_norm": 0.03577504679560661, + "learning_rate": 4.558298925735963e-06, + "loss": 0.0011, + "step": 179410 + }, + { + "epoch": 1.1507441483101533, + "grad_norm": 0.06793781369924545, + "learning_rate": 4.5577414133683016e-06, + "loss": 0.0021, + "step": 179420 + }, + { + "epoch": 1.1508082852039392, + "grad_norm": 0.04992002621293068, + "learning_rate": 4.557183906542458e-06, + "loss": 0.0023, + "step": 179430 + }, + { + "epoch": 1.1508724220977253, + "grad_norm": 0.24769335985183716, + "learning_rate": 4.556626405265415e-06, + "loss": 0.0013, + "step": 179440 + }, + { + "epoch": 1.1509365589915115, + "grad_norm": 0.06733288615942001, + "learning_rate": 4.55606890954416e-06, + "loss": 0.0014, + "step": 179450 + }, + { + "epoch": 1.1510006958852976, + "grad_norm": 0.025171691551804543, + "learning_rate": 4.555511419385681e-06, + "loss": 0.0025, + "step": 179460 + }, + { + "epoch": 1.1510648327790838, + "grad_norm": 0.13776138424873352, + "learning_rate": 4.554953934796959e-06, + "loss": 0.0017, + "step": 179470 + }, + { + "epoch": 1.1511289696728697, + "grad_norm": 0.05418798699975014, + "learning_rate": 4.554396455784985e-06, + "loss": 0.0016, + "step": 179480 + }, + { + "epoch": 1.1511931065666559, + "grad_norm": 0.05372137576341629, + "learning_rate": 4.5538389823567394e-06, + "loss": 0.0019, + "step": 179490 + }, + { + "epoch": 1.151257243460442, + "grad_norm": 0.01216061506420374, + "learning_rate": 4.553281514519212e-06, + "loss": 0.0014, + "step": 179500 + }, + { + "epoch": 1.1513213803542282, + "grad_norm": 0.05967012792825699, + "learning_rate": 4.552724052279386e-06, + "loss": 0.0021, + "step": 179510 + }, + { + "epoch": 1.151385517248014, + "grad_norm": 0.06882903724908829, + "learning_rate": 4.552166595644248e-06, + "loss": 0.001, + "step": 179520 + }, + { + "epoch": 1.1514496541418002, + "grad_norm": 0.07349782437086105, + "learning_rate": 4.55160914462078e-06, + "loss": 0.0022, + "step": 179530 + }, + { + "epoch": 1.1515137910355864, + "grad_norm": 0.023654308170080185, + "learning_rate": 4.551051699215973e-06, + "loss": 0.0014, + "step": 179540 + }, + { + "epoch": 1.1515779279293725, + "grad_norm": 0.199869766831398, + "learning_rate": 4.550494259436807e-06, + "loss": 0.0014, + "step": 179550 + }, + { + "epoch": 1.1516420648231587, + "grad_norm": 0.0609150230884552, + "learning_rate": 4.54993682529027e-06, + "loss": 0.0018, + "step": 179560 + }, + { + "epoch": 1.1517062017169446, + "grad_norm": 0.12952852249145508, + "learning_rate": 4.549379396783346e-06, + "loss": 0.001, + "step": 179570 + }, + { + "epoch": 1.1517703386107307, + "grad_norm": 0.06888972222805023, + "learning_rate": 4.548821973923021e-06, + "loss": 0.0006, + "step": 179580 + }, + { + "epoch": 1.151834475504517, + "grad_norm": 0.11483610421419144, + "learning_rate": 4.548264556716277e-06, + "loss": 0.0016, + "step": 179590 + }, + { + "epoch": 1.1518986123983028, + "grad_norm": 0.09012481570243835, + "learning_rate": 4.5477071451701025e-06, + "loss": 0.0014, + "step": 179600 + }, + { + "epoch": 1.151962749292089, + "grad_norm": 0.02963605895638466, + "learning_rate": 4.547149739291481e-06, + "loss": 0.0018, + "step": 179610 + }, + { + "epoch": 1.1520268861858751, + "grad_norm": 0.06909776479005814, + "learning_rate": 4.546592339087396e-06, + "loss": 0.0013, + "step": 179620 + }, + { + "epoch": 1.1520910230796613, + "grad_norm": 0.19308145344257355, + "learning_rate": 4.546034944564834e-06, + "loss": 0.0023, + "step": 179630 + }, + { + "epoch": 1.1521551599734474, + "grad_norm": 0.09878735989332199, + "learning_rate": 4.545477555730778e-06, + "loss": 0.0012, + "step": 179640 + }, + { + "epoch": 1.1522192968672333, + "grad_norm": 0.07624828815460205, + "learning_rate": 4.5449201725922135e-06, + "loss": 0.0023, + "step": 179650 + }, + { + "epoch": 1.1522834337610195, + "grad_norm": 0.24515004456043243, + "learning_rate": 4.544362795156123e-06, + "loss": 0.0021, + "step": 179660 + }, + { + "epoch": 1.1523475706548056, + "grad_norm": 0.13114790618419647, + "learning_rate": 4.543805423429495e-06, + "loss": 0.0019, + "step": 179670 + }, + { + "epoch": 1.1524117075485918, + "grad_norm": 0.05208076909184456, + "learning_rate": 4.5432480574193085e-06, + "loss": 0.0007, + "step": 179680 + }, + { + "epoch": 1.1524758444423777, + "grad_norm": 0.09520614892244339, + "learning_rate": 4.5426906971325515e-06, + "loss": 0.0015, + "step": 179690 + }, + { + "epoch": 1.1525399813361639, + "grad_norm": 0.04604622349143028, + "learning_rate": 4.542133342576207e-06, + "loss": 0.0011, + "step": 179700 + }, + { + "epoch": 1.15260411822995, + "grad_norm": 0.053334563970565796, + "learning_rate": 4.541575993757259e-06, + "loss": 0.0015, + "step": 179710 + }, + { + "epoch": 1.1526682551237362, + "grad_norm": 0.08976370096206665, + "learning_rate": 4.541018650682691e-06, + "loss": 0.003, + "step": 179720 + }, + { + "epoch": 1.1527323920175223, + "grad_norm": 0.15434318780899048, + "learning_rate": 4.540461313359487e-06, + "loss": 0.0023, + "step": 179730 + }, + { + "epoch": 1.1527965289113082, + "grad_norm": 0.14458923041820526, + "learning_rate": 4.5399039817946315e-06, + "loss": 0.0014, + "step": 179740 + }, + { + "epoch": 1.1528606658050944, + "grad_norm": 0.007658865302801132, + "learning_rate": 4.539346655995108e-06, + "loss": 0.0024, + "step": 179750 + }, + { + "epoch": 1.1529248026988805, + "grad_norm": 0.047519613057374954, + "learning_rate": 4.5387893359679e-06, + "loss": 0.0012, + "step": 179760 + }, + { + "epoch": 1.1529889395926667, + "grad_norm": 0.79911208152771, + "learning_rate": 4.538232021719992e-06, + "loss": 0.002, + "step": 179770 + }, + { + "epoch": 1.1530530764864526, + "grad_norm": 0.17785044014453888, + "learning_rate": 4.537674713258367e-06, + "loss": 0.0017, + "step": 179780 + }, + { + "epoch": 1.1531172133802388, + "grad_norm": 0.16367937624454498, + "learning_rate": 4.537117410590006e-06, + "loss": 0.0011, + "step": 179790 + }, + { + "epoch": 1.153181350274025, + "grad_norm": 0.1835791915655136, + "learning_rate": 4.536560113721897e-06, + "loss": 0.0013, + "step": 179800 + }, + { + "epoch": 1.153245487167811, + "grad_norm": 0.03765160217881203, + "learning_rate": 4.53600282266102e-06, + "loss": 0.0016, + "step": 179810 + }, + { + "epoch": 1.153309624061597, + "grad_norm": 0.04303538054227829, + "learning_rate": 4.535445537414359e-06, + "loss": 0.0015, + "step": 179820 + }, + { + "epoch": 1.1533737609553831, + "grad_norm": 0.004960588179528713, + "learning_rate": 4.534888257988898e-06, + "loss": 0.001, + "step": 179830 + }, + { + "epoch": 1.1534378978491693, + "grad_norm": 0.07767927646636963, + "learning_rate": 4.5343309843916194e-06, + "loss": 0.0028, + "step": 179840 + }, + { + "epoch": 1.1535020347429554, + "grad_norm": 0.04093633219599724, + "learning_rate": 4.533773716629506e-06, + "loss": 0.0016, + "step": 179850 + }, + { + "epoch": 1.1535661716367414, + "grad_norm": 0.03396964073181152, + "learning_rate": 4.533216454709542e-06, + "loss": 0.0011, + "step": 179860 + }, + { + "epoch": 1.1536303085305275, + "grad_norm": 0.043116357177495956, + "learning_rate": 4.532659198638708e-06, + "loss": 0.0018, + "step": 179870 + }, + { + "epoch": 1.1536944454243137, + "grad_norm": 0.09401235729455948, + "learning_rate": 4.53210194842399e-06, + "loss": 0.0027, + "step": 179880 + }, + { + "epoch": 1.1537585823180998, + "grad_norm": 0.17905250191688538, + "learning_rate": 4.531544704072367e-06, + "loss": 0.0008, + "step": 179890 + }, + { + "epoch": 1.153822719211886, + "grad_norm": 0.07407578080892563, + "learning_rate": 4.530987465590825e-06, + "loss": 0.001, + "step": 179900 + }, + { + "epoch": 1.1538868561056719, + "grad_norm": 0.040732480585575104, + "learning_rate": 4.5304302329863456e-06, + "loss": 0.0019, + "step": 179910 + }, + { + "epoch": 1.153950992999458, + "grad_norm": 0.003030008403584361, + "learning_rate": 4.52987300626591e-06, + "loss": 0.0007, + "step": 179920 + }, + { + "epoch": 1.1540151298932442, + "grad_norm": 0.1177324652671814, + "learning_rate": 4.529315785436502e-06, + "loss": 0.0018, + "step": 179930 + }, + { + "epoch": 1.1540792667870303, + "grad_norm": 0.03247679024934769, + "learning_rate": 4.5287585705051035e-06, + "loss": 0.0009, + "step": 179940 + }, + { + "epoch": 1.1541434036808162, + "grad_norm": 0.11346208304166794, + "learning_rate": 4.5282013614786976e-06, + "loss": 0.0016, + "step": 179950 + }, + { + "epoch": 1.1542075405746024, + "grad_norm": 0.017800046131014824, + "learning_rate": 4.527644158364265e-06, + "loss": 0.0013, + "step": 179960 + }, + { + "epoch": 1.1542716774683885, + "grad_norm": 0.11046946048736572, + "learning_rate": 4.527086961168789e-06, + "loss": 0.0019, + "step": 179970 + }, + { + "epoch": 1.1543358143621747, + "grad_norm": 0.04605415090918541, + "learning_rate": 4.526529769899251e-06, + "loss": 0.0012, + "step": 179980 + }, + { + "epoch": 1.1543999512559608, + "grad_norm": 0.040764909237623215, + "learning_rate": 4.525972584562635e-06, + "loss": 0.0014, + "step": 179990 + }, + { + "epoch": 1.1544640881497468, + "grad_norm": 0.05737825483083725, + "learning_rate": 4.525415405165919e-06, + "loss": 0.002, + "step": 180000 + }, + { + "epoch": 1.154528225043533, + "grad_norm": 0.05237356200814247, + "learning_rate": 4.524858231716088e-06, + "loss": 0.0022, + "step": 180010 + }, + { + "epoch": 1.154592361937319, + "grad_norm": 0.11954223364591599, + "learning_rate": 4.524301064220121e-06, + "loss": 0.0019, + "step": 180020 + }, + { + "epoch": 1.154656498831105, + "grad_norm": 0.022486301138997078, + "learning_rate": 4.523743902685004e-06, + "loss": 0.0032, + "step": 180030 + }, + { + "epoch": 1.1547206357248911, + "grad_norm": 0.08458501845598221, + "learning_rate": 4.523186747117715e-06, + "loss": 0.0019, + "step": 180040 + }, + { + "epoch": 1.1547847726186773, + "grad_norm": 0.016316739842295647, + "learning_rate": 4.522629597525239e-06, + "loss": 0.0009, + "step": 180050 + }, + { + "epoch": 1.1548489095124634, + "grad_norm": 0.0723700150847435, + "learning_rate": 4.5220724539145535e-06, + "loss": 0.0037, + "step": 180060 + }, + { + "epoch": 1.1549130464062496, + "grad_norm": 0.1529541164636612, + "learning_rate": 4.521515316292643e-06, + "loss": 0.0016, + "step": 180070 + }, + { + "epoch": 1.1549771833000355, + "grad_norm": 0.07079547643661499, + "learning_rate": 4.5209581846664865e-06, + "loss": 0.0025, + "step": 180080 + }, + { + "epoch": 1.1550413201938217, + "grad_norm": 0.05314158648252487, + "learning_rate": 4.520401059043067e-06, + "loss": 0.0011, + "step": 180090 + }, + { + "epoch": 1.1551054570876078, + "grad_norm": 0.011576839722692966, + "learning_rate": 4.519843939429364e-06, + "loss": 0.0017, + "step": 180100 + }, + { + "epoch": 1.155169593981394, + "grad_norm": 0.01953614130616188, + "learning_rate": 4.519286825832361e-06, + "loss": 0.0021, + "step": 180110 + }, + { + "epoch": 1.1552337308751799, + "grad_norm": 0.11375365406274796, + "learning_rate": 4.518729718259036e-06, + "loss": 0.0015, + "step": 180120 + }, + { + "epoch": 1.155297867768966, + "grad_norm": 0.060757264494895935, + "learning_rate": 4.518172616716374e-06, + "loss": 0.0008, + "step": 180130 + }, + { + "epoch": 1.1553620046627522, + "grad_norm": 0.02616855502128601, + "learning_rate": 4.517615521211351e-06, + "loss": 0.0011, + "step": 180140 + }, + { + "epoch": 1.1554261415565383, + "grad_norm": 0.035598281770944595, + "learning_rate": 4.517058431750952e-06, + "loss": 0.002, + "step": 180150 + }, + { + "epoch": 1.1554902784503245, + "grad_norm": 0.08569321781396866, + "learning_rate": 4.516501348342155e-06, + "loss": 0.001, + "step": 180160 + }, + { + "epoch": 1.1555544153441104, + "grad_norm": 0.030544036999344826, + "learning_rate": 4.515944270991941e-06, + "loss": 0.0009, + "step": 180170 + }, + { + "epoch": 1.1556185522378966, + "grad_norm": 0.013408086262643337, + "learning_rate": 4.515387199707293e-06, + "loss": 0.0015, + "step": 180180 + }, + { + "epoch": 1.1556826891316827, + "grad_norm": 0.028382427990436554, + "learning_rate": 4.514830134495188e-06, + "loss": 0.0012, + "step": 180190 + }, + { + "epoch": 1.1557468260254689, + "grad_norm": 0.05651894956827164, + "learning_rate": 4.51427307536261e-06, + "loss": 0.0009, + "step": 180200 + }, + { + "epoch": 1.1558109629192548, + "grad_norm": 0.19445115327835083, + "learning_rate": 4.513716022316536e-06, + "loss": 0.0015, + "step": 180210 + }, + { + "epoch": 1.155875099813041, + "grad_norm": 0.3186042606830597, + "learning_rate": 4.513158975363949e-06, + "loss": 0.0035, + "step": 180220 + }, + { + "epoch": 1.155939236706827, + "grad_norm": 0.09063344448804855, + "learning_rate": 4.512601934511827e-06, + "loss": 0.0018, + "step": 180230 + }, + { + "epoch": 1.1560033736006132, + "grad_norm": 0.16141769289970398, + "learning_rate": 4.5120448997671515e-06, + "loss": 0.0016, + "step": 180240 + }, + { + "epoch": 1.1560675104943992, + "grad_norm": 0.19638694822788239, + "learning_rate": 4.511487871136901e-06, + "loss": 0.0022, + "step": 180250 + }, + { + "epoch": 1.1561316473881853, + "grad_norm": 0.07716047018766403, + "learning_rate": 4.510930848628059e-06, + "loss": 0.0018, + "step": 180260 + }, + { + "epoch": 1.1561957842819715, + "grad_norm": 0.02431539073586464, + "learning_rate": 4.510373832247601e-06, + "loss": 0.0014, + "step": 180270 + }, + { + "epoch": 1.1562599211757576, + "grad_norm": 0.07726875692605972, + "learning_rate": 4.5098168220025104e-06, + "loss": 0.0014, + "step": 180280 + }, + { + "epoch": 1.1563240580695435, + "grad_norm": 0.09850283712148666, + "learning_rate": 4.509259817899764e-06, + "loss": 0.0009, + "step": 180290 + }, + { + "epoch": 1.1563881949633297, + "grad_norm": 0.008121363818645477, + "learning_rate": 4.508702819946343e-06, + "loss": 0.0022, + "step": 180300 + }, + { + "epoch": 1.1564523318571158, + "grad_norm": 0.036567483097314835, + "learning_rate": 4.508145828149227e-06, + "loss": 0.0034, + "step": 180310 + }, + { + "epoch": 1.156516468750902, + "grad_norm": 0.0764060914516449, + "learning_rate": 4.507588842515395e-06, + "loss": 0.0013, + "step": 180320 + }, + { + "epoch": 1.1565806056446881, + "grad_norm": 0.017248669639229774, + "learning_rate": 4.507031863051828e-06, + "loss": 0.0014, + "step": 180330 + }, + { + "epoch": 1.156644742538474, + "grad_norm": 0.09835748374462128, + "learning_rate": 4.506474889765502e-06, + "loss": 0.0016, + "step": 180340 + }, + { + "epoch": 1.1567088794322602, + "grad_norm": 0.46733415126800537, + "learning_rate": 4.5059179226633995e-06, + "loss": 0.0025, + "step": 180350 + }, + { + "epoch": 1.1567730163260463, + "grad_norm": 0.06276906281709671, + "learning_rate": 4.505360961752498e-06, + "loss": 0.0012, + "step": 180360 + }, + { + "epoch": 1.1568371532198325, + "grad_norm": 0.15917713940143585, + "learning_rate": 4.5048040070397795e-06, + "loss": 0.0012, + "step": 180370 + }, + { + "epoch": 1.1569012901136184, + "grad_norm": 0.07704220712184906, + "learning_rate": 4.504247058532218e-06, + "loss": 0.0015, + "step": 180380 + }, + { + "epoch": 1.1569654270074046, + "grad_norm": 0.12476428598165512, + "learning_rate": 4.5036901162367964e-06, + "loss": 0.0009, + "step": 180390 + }, + { + "epoch": 1.1570295639011907, + "grad_norm": 0.46193230152130127, + "learning_rate": 4.503133180160491e-06, + "loss": 0.0017, + "step": 180400 + }, + { + "epoch": 1.1570937007949769, + "grad_norm": 0.04050949588418007, + "learning_rate": 4.502576250310284e-06, + "loss": 0.0015, + "step": 180410 + }, + { + "epoch": 1.157157837688763, + "grad_norm": 0.19467301666736603, + "learning_rate": 4.50201932669315e-06, + "loss": 0.0009, + "step": 180420 + }, + { + "epoch": 1.157221974582549, + "grad_norm": 0.46448013186454773, + "learning_rate": 4.5014624093160715e-06, + "loss": 0.0016, + "step": 180430 + }, + { + "epoch": 1.157286111476335, + "grad_norm": 0.2336282879114151, + "learning_rate": 4.500905498186024e-06, + "loss": 0.0014, + "step": 180440 + }, + { + "epoch": 1.1573502483701212, + "grad_norm": 0.16977283358573914, + "learning_rate": 4.500348593309988e-06, + "loss": 0.0022, + "step": 180450 + }, + { + "epoch": 1.1574143852639072, + "grad_norm": 0.034816574305295944, + "learning_rate": 4.499791694694942e-06, + "loss": 0.0025, + "step": 180460 + }, + { + "epoch": 1.1574785221576933, + "grad_norm": 0.04419064521789551, + "learning_rate": 4.499234802347862e-06, + "loss": 0.0017, + "step": 180470 + }, + { + "epoch": 1.1575426590514795, + "grad_norm": 0.19311970472335815, + "learning_rate": 4.4986779162757284e-06, + "loss": 0.0024, + "step": 180480 + }, + { + "epoch": 1.1576067959452656, + "grad_norm": 0.10714399814605713, + "learning_rate": 4.498121036485519e-06, + "loss": 0.0018, + "step": 180490 + }, + { + "epoch": 1.1576709328390518, + "grad_norm": 0.3718133568763733, + "learning_rate": 4.497564162984212e-06, + "loss": 0.0039, + "step": 180500 + }, + { + "epoch": 1.1577350697328377, + "grad_norm": 0.039395760744810104, + "learning_rate": 4.497007295778784e-06, + "loss": 0.0015, + "step": 180510 + }, + { + "epoch": 1.1577992066266238, + "grad_norm": 0.08320226520299911, + "learning_rate": 4.496450434876215e-06, + "loss": 0.0008, + "step": 180520 + }, + { + "epoch": 1.15786334352041, + "grad_norm": 0.10056677460670471, + "learning_rate": 4.495893580283482e-06, + "loss": 0.001, + "step": 180530 + }, + { + "epoch": 1.1579274804141961, + "grad_norm": 0.3260595202445984, + "learning_rate": 4.495336732007562e-06, + "loss": 0.0033, + "step": 180540 + }, + { + "epoch": 1.157991617307982, + "grad_norm": 0.07820891588926315, + "learning_rate": 4.494779890055434e-06, + "loss": 0.0019, + "step": 180550 + }, + { + "epoch": 1.1580557542017682, + "grad_norm": 0.10223450511693954, + "learning_rate": 4.494223054434076e-06, + "loss": 0.0017, + "step": 180560 + }, + { + "epoch": 1.1581198910955544, + "grad_norm": 0.13785037398338318, + "learning_rate": 4.493666225150462e-06, + "loss": 0.0012, + "step": 180570 + }, + { + "epoch": 1.1581840279893405, + "grad_norm": 0.008315321058034897, + "learning_rate": 4.4931094022115755e-06, + "loss": 0.0009, + "step": 180580 + }, + { + "epoch": 1.1582481648831267, + "grad_norm": 0.009771126322448254, + "learning_rate": 4.492552585624387e-06, + "loss": 0.0026, + "step": 180590 + }, + { + "epoch": 1.1583123017769126, + "grad_norm": 0.043915245682001114, + "learning_rate": 4.491995775395879e-06, + "loss": 0.0015, + "step": 180600 + }, + { + "epoch": 1.1583764386706987, + "grad_norm": 0.05186295881867409, + "learning_rate": 4.491438971533027e-06, + "loss": 0.0012, + "step": 180610 + }, + { + "epoch": 1.1584405755644849, + "grad_norm": 0.04817241430282593, + "learning_rate": 4.490882174042808e-06, + "loss": 0.0018, + "step": 180620 + }, + { + "epoch": 1.158504712458271, + "grad_norm": 0.022592414170503616, + "learning_rate": 4.490325382932199e-06, + "loss": 0.0024, + "step": 180630 + }, + { + "epoch": 1.158568849352057, + "grad_norm": 0.06265100836753845, + "learning_rate": 4.489768598208177e-06, + "loss": 0.0037, + "step": 180640 + }, + { + "epoch": 1.158632986245843, + "grad_norm": 0.05838491767644882, + "learning_rate": 4.48921181987772e-06, + "loss": 0.0017, + "step": 180650 + }, + { + "epoch": 1.1586971231396292, + "grad_norm": 0.0408768430352211, + "learning_rate": 4.488655047947803e-06, + "loss": 0.0016, + "step": 180660 + }, + { + "epoch": 1.1587612600334154, + "grad_norm": 0.09586689621210098, + "learning_rate": 4.488098282425405e-06, + "loss": 0.0009, + "step": 180670 + }, + { + "epoch": 1.1588253969272013, + "grad_norm": 0.06256552040576935, + "learning_rate": 4.4875415233175e-06, + "loss": 0.0031, + "step": 180680 + }, + { + "epoch": 1.1588895338209875, + "grad_norm": 0.2888277769088745, + "learning_rate": 4.486984770631067e-06, + "loss": 0.0021, + "step": 180690 + }, + { + "epoch": 1.1589536707147736, + "grad_norm": 0.0159669890999794, + "learning_rate": 4.486428024373081e-06, + "loss": 0.001, + "step": 180700 + }, + { + "epoch": 1.1590178076085598, + "grad_norm": 0.04025883972644806, + "learning_rate": 4.485871284550519e-06, + "loss": 0.0021, + "step": 180710 + }, + { + "epoch": 1.1590819445023457, + "grad_norm": 0.131544291973114, + "learning_rate": 4.485314551170357e-06, + "loss": 0.0016, + "step": 180720 + }, + { + "epoch": 1.1591460813961318, + "grad_norm": 0.12201915681362152, + "learning_rate": 4.4847578242395715e-06, + "loss": 0.0017, + "step": 180730 + }, + { + "epoch": 1.159210218289918, + "grad_norm": 0.019768880680203438, + "learning_rate": 4.484201103765139e-06, + "loss": 0.0016, + "step": 180740 + }, + { + "epoch": 1.1592743551837041, + "grad_norm": 0.1573418527841568, + "learning_rate": 4.483644389754034e-06, + "loss": 0.0019, + "step": 180750 + }, + { + "epoch": 1.1593384920774903, + "grad_norm": 0.15485508739948273, + "learning_rate": 4.483087682213236e-06, + "loss": 0.0015, + "step": 180760 + }, + { + "epoch": 1.1594026289712762, + "grad_norm": 0.06819141656160355, + "learning_rate": 4.482530981149717e-06, + "loss": 0.0015, + "step": 180770 + }, + { + "epoch": 1.1594667658650624, + "grad_norm": 0.34133756160736084, + "learning_rate": 4.481974286570454e-06, + "loss": 0.0012, + "step": 180780 + }, + { + "epoch": 1.1595309027588485, + "grad_norm": 0.10663978010416031, + "learning_rate": 4.481417598482426e-06, + "loss": 0.0016, + "step": 180790 + }, + { + "epoch": 1.1595950396526347, + "grad_norm": 0.09885643422603607, + "learning_rate": 4.480860916892604e-06, + "loss": 0.0027, + "step": 180800 + }, + { + "epoch": 1.1596591765464206, + "grad_norm": 0.07657206803560257, + "learning_rate": 4.480304241807967e-06, + "loss": 0.0017, + "step": 180810 + }, + { + "epoch": 1.1597233134402067, + "grad_norm": 0.021962689235806465, + "learning_rate": 4.479747573235488e-06, + "loss": 0.0013, + "step": 180820 + }, + { + "epoch": 1.1597874503339929, + "grad_norm": 0.06031438335776329, + "learning_rate": 4.479190911182145e-06, + "loss": 0.0024, + "step": 180830 + }, + { + "epoch": 1.159851587227779, + "grad_norm": 0.014199194498360157, + "learning_rate": 4.4786342556549115e-06, + "loss": 0.0015, + "step": 180840 + }, + { + "epoch": 1.1599157241215652, + "grad_norm": 0.03586658835411072, + "learning_rate": 4.478077606660764e-06, + "loss": 0.001, + "step": 180850 + }, + { + "epoch": 1.159979861015351, + "grad_norm": 0.16620095074176788, + "learning_rate": 4.477520964206676e-06, + "loss": 0.0014, + "step": 180860 + }, + { + "epoch": 1.1600439979091373, + "grad_norm": 0.12353146821260452, + "learning_rate": 4.476964328299624e-06, + "loss": 0.0028, + "step": 180870 + }, + { + "epoch": 1.1601081348029234, + "grad_norm": 0.012884975410997868, + "learning_rate": 4.4764076989465845e-06, + "loss": 0.0019, + "step": 180880 + }, + { + "epoch": 1.1601722716967093, + "grad_norm": 0.08514288067817688, + "learning_rate": 4.475851076154529e-06, + "loss": 0.0016, + "step": 180890 + }, + { + "epoch": 1.1602364085904955, + "grad_norm": 0.06588922441005707, + "learning_rate": 4.475294459930435e-06, + "loss": 0.0024, + "step": 180900 + }, + { + "epoch": 1.1603005454842816, + "grad_norm": 0.05888795480132103, + "learning_rate": 4.4747378502812755e-06, + "loss": 0.0017, + "step": 180910 + }, + { + "epoch": 1.1603646823780678, + "grad_norm": 0.062410056591033936, + "learning_rate": 4.474181247214027e-06, + "loss": 0.0032, + "step": 180920 + }, + { + "epoch": 1.160428819271854, + "grad_norm": 0.08351512253284454, + "learning_rate": 4.473624650735663e-06, + "loss": 0.0018, + "step": 180930 + }, + { + "epoch": 1.1604929561656399, + "grad_norm": 0.09259256720542908, + "learning_rate": 4.473068060853159e-06, + "loss": 0.0038, + "step": 180940 + }, + { + "epoch": 1.160557093059426, + "grad_norm": 0.05656282231211662, + "learning_rate": 4.472511477573487e-06, + "loss": 0.002, + "step": 180950 + }, + { + "epoch": 1.1606212299532122, + "grad_norm": 0.04642675444483757, + "learning_rate": 4.471954900903625e-06, + "loss": 0.0014, + "step": 180960 + }, + { + "epoch": 1.1606853668469983, + "grad_norm": 0.09179715067148209, + "learning_rate": 4.471398330850544e-06, + "loss": 0.0021, + "step": 180970 + }, + { + "epoch": 1.1607495037407842, + "grad_norm": 0.00894141849130392, + "learning_rate": 4.470841767421221e-06, + "loss": 0.0015, + "step": 180980 + }, + { + "epoch": 1.1608136406345704, + "grad_norm": 0.012759844772517681, + "learning_rate": 4.470285210622628e-06, + "loss": 0.0011, + "step": 180990 + }, + { + "epoch": 1.1608777775283565, + "grad_norm": 0.2721600830554962, + "learning_rate": 4.4697286604617405e-06, + "loss": 0.0022, + "step": 181000 + }, + { + "epoch": 1.1609419144221427, + "grad_norm": 0.009490939788520336, + "learning_rate": 4.469172116945531e-06, + "loss": 0.0016, + "step": 181010 + }, + { + "epoch": 1.1610060513159288, + "grad_norm": 0.028010396286845207, + "learning_rate": 4.468615580080974e-06, + "loss": 0.0017, + "step": 181020 + }, + { + "epoch": 1.1610701882097147, + "grad_norm": 0.05484509468078613, + "learning_rate": 4.468059049875045e-06, + "loss": 0.001, + "step": 181030 + }, + { + "epoch": 1.161134325103501, + "grad_norm": 0.07855560630559921, + "learning_rate": 4.467502526334715e-06, + "loss": 0.0005, + "step": 181040 + }, + { + "epoch": 1.161198461997287, + "grad_norm": 0.13370922207832336, + "learning_rate": 4.466946009466961e-06, + "loss": 0.0031, + "step": 181050 + }, + { + "epoch": 1.1612625988910732, + "grad_norm": 0.05313895642757416, + "learning_rate": 4.466389499278751e-06, + "loss": 0.0017, + "step": 181060 + }, + { + "epoch": 1.1613267357848591, + "grad_norm": 0.12520724534988403, + "learning_rate": 4.4658329957770655e-06, + "loss": 0.0018, + "step": 181070 + }, + { + "epoch": 1.1613908726786453, + "grad_norm": 0.1403217315673828, + "learning_rate": 4.465276498968872e-06, + "loss": 0.0022, + "step": 181080 + }, + { + "epoch": 1.1614550095724314, + "grad_norm": 0.08550545573234558, + "learning_rate": 4.464720008861147e-06, + "loss": 0.0017, + "step": 181090 + }, + { + "epoch": 1.1615191464662176, + "grad_norm": 0.05030914023518562, + "learning_rate": 4.464163525460862e-06, + "loss": 0.0018, + "step": 181100 + }, + { + "epoch": 1.1615832833600037, + "grad_norm": 0.042203955352306366, + "learning_rate": 4.463607048774993e-06, + "loss": 0.0022, + "step": 181110 + }, + { + "epoch": 1.1616474202537896, + "grad_norm": 0.12186636030673981, + "learning_rate": 4.463050578810509e-06, + "loss": 0.0025, + "step": 181120 + }, + { + "epoch": 1.1617115571475758, + "grad_norm": 0.0916713997721672, + "learning_rate": 4.462494115574387e-06, + "loss": 0.0013, + "step": 181130 + }, + { + "epoch": 1.161775694041362, + "grad_norm": 0.24145111441612244, + "learning_rate": 4.461937659073595e-06, + "loss": 0.0026, + "step": 181140 + }, + { + "epoch": 1.1618398309351479, + "grad_norm": 0.07024310529232025, + "learning_rate": 4.461381209315111e-06, + "loss": 0.002, + "step": 181150 + }, + { + "epoch": 1.161903967828934, + "grad_norm": 0.039558593183755875, + "learning_rate": 4.460824766305904e-06, + "loss": 0.0015, + "step": 181160 + }, + { + "epoch": 1.1619681047227202, + "grad_norm": 0.35094472765922546, + "learning_rate": 4.4602683300529484e-06, + "loss": 0.0027, + "step": 181170 + }, + { + "epoch": 1.1620322416165063, + "grad_norm": 0.030600082129240036, + "learning_rate": 4.459711900563217e-06, + "loss": 0.0017, + "step": 181180 + }, + { + "epoch": 1.1620963785102925, + "grad_norm": 0.05822151154279709, + "learning_rate": 4.459155477843681e-06, + "loss": 0.0019, + "step": 181190 + }, + { + "epoch": 1.1621605154040784, + "grad_norm": 0.03679274767637253, + "learning_rate": 4.458599061901314e-06, + "loss": 0.0014, + "step": 181200 + }, + { + "epoch": 1.1622246522978645, + "grad_norm": 0.07842778414487839, + "learning_rate": 4.458042652743087e-06, + "loss": 0.0015, + "step": 181210 + }, + { + "epoch": 1.1622887891916507, + "grad_norm": 0.08766324818134308, + "learning_rate": 4.457486250375974e-06, + "loss": 0.0016, + "step": 181220 + }, + { + "epoch": 1.1623529260854368, + "grad_norm": 0.11263548582792282, + "learning_rate": 4.456929854806944e-06, + "loss": 0.0019, + "step": 181230 + }, + { + "epoch": 1.1624170629792228, + "grad_norm": 0.03930861875414848, + "learning_rate": 4.456373466042974e-06, + "loss": 0.0011, + "step": 181240 + }, + { + "epoch": 1.162481199873009, + "grad_norm": 0.16990233957767487, + "learning_rate": 4.45581708409103e-06, + "loss": 0.0023, + "step": 181250 + }, + { + "epoch": 1.162545336766795, + "grad_norm": 0.07150521129369736, + "learning_rate": 4.455260708958089e-06, + "loss": 0.0014, + "step": 181260 + }, + { + "epoch": 1.1626094736605812, + "grad_norm": 0.04427288472652435, + "learning_rate": 4.454704340651119e-06, + "loss": 0.0017, + "step": 181270 + }, + { + "epoch": 1.1626736105543674, + "grad_norm": 0.033710263669490814, + "learning_rate": 4.454147979177095e-06, + "loss": 0.0015, + "step": 181280 + }, + { + "epoch": 1.1627377474481533, + "grad_norm": 0.06993027776479721, + "learning_rate": 4.4535916245429855e-06, + "loss": 0.0014, + "step": 181290 + }, + { + "epoch": 1.1628018843419394, + "grad_norm": 0.04299509897828102, + "learning_rate": 4.4530352767557635e-06, + "loss": 0.0007, + "step": 181300 + }, + { + "epoch": 1.1628660212357256, + "grad_norm": 0.19627784192562103, + "learning_rate": 4.452478935822402e-06, + "loss": 0.0024, + "step": 181310 + }, + { + "epoch": 1.1629301581295117, + "grad_norm": 0.0833597183227539, + "learning_rate": 4.451922601749869e-06, + "loss": 0.001, + "step": 181320 + }, + { + "epoch": 1.1629942950232977, + "grad_norm": 0.11564180999994278, + "learning_rate": 4.451366274545138e-06, + "loss": 0.0021, + "step": 181330 + }, + { + "epoch": 1.1630584319170838, + "grad_norm": 0.05993635207414627, + "learning_rate": 4.4508099542151795e-06, + "loss": 0.002, + "step": 181340 + }, + { + "epoch": 1.16312256881087, + "grad_norm": 0.11969758570194244, + "learning_rate": 4.450253640766966e-06, + "loss": 0.0014, + "step": 181350 + }, + { + "epoch": 1.163186705704656, + "grad_norm": 0.07571439445018768, + "learning_rate": 4.449697334207465e-06, + "loss": 0.001, + "step": 181360 + }, + { + "epoch": 1.163250842598442, + "grad_norm": 0.39648115634918213, + "learning_rate": 4.449141034543653e-06, + "loss": 0.0023, + "step": 181370 + }, + { + "epoch": 1.1633149794922282, + "grad_norm": 0.04993022233247757, + "learning_rate": 4.448584741782495e-06, + "loss": 0.0014, + "step": 181380 + }, + { + "epoch": 1.1633791163860143, + "grad_norm": 0.04748213663697243, + "learning_rate": 4.448028455930965e-06, + "loss": 0.0025, + "step": 181390 + }, + { + "epoch": 1.1634432532798005, + "grad_norm": 0.26994580030441284, + "learning_rate": 4.447472176996033e-06, + "loss": 0.002, + "step": 181400 + }, + { + "epoch": 1.1635073901735864, + "grad_norm": 0.058311715722084045, + "learning_rate": 4.44691590498467e-06, + "loss": 0.0015, + "step": 181410 + }, + { + "epoch": 1.1635715270673725, + "grad_norm": 0.019568253308534622, + "learning_rate": 4.4463596399038444e-06, + "loss": 0.0018, + "step": 181420 + }, + { + "epoch": 1.1636356639611587, + "grad_norm": 0.07359485328197479, + "learning_rate": 4.44580338176053e-06, + "loss": 0.0029, + "step": 181430 + }, + { + "epoch": 1.1636998008549448, + "grad_norm": 0.07900948077440262, + "learning_rate": 4.445247130561694e-06, + "loss": 0.0013, + "step": 181440 + }, + { + "epoch": 1.163763937748731, + "grad_norm": 0.21989136934280396, + "learning_rate": 4.444690886314307e-06, + "loss": 0.0023, + "step": 181450 + }, + { + "epoch": 1.163828074642517, + "grad_norm": 0.040261659771203995, + "learning_rate": 4.4441346490253425e-06, + "loss": 0.0014, + "step": 181460 + }, + { + "epoch": 1.163892211536303, + "grad_norm": 0.05164617672562599, + "learning_rate": 4.443578418701766e-06, + "loss": 0.0018, + "step": 181470 + }, + { + "epoch": 1.1639563484300892, + "grad_norm": 0.12991642951965332, + "learning_rate": 4.443022195350551e-06, + "loss": 0.0011, + "step": 181480 + }, + { + "epoch": 1.1640204853238754, + "grad_norm": 0.1465480476617813, + "learning_rate": 4.4424659789786655e-06, + "loss": 0.002, + "step": 181490 + }, + { + "epoch": 1.1640846222176613, + "grad_norm": 0.051542460918426514, + "learning_rate": 4.4419097695930806e-06, + "loss": 0.0018, + "step": 181500 + }, + { + "epoch": 1.1641487591114474, + "grad_norm": 0.01723652519285679, + "learning_rate": 4.441353567200763e-06, + "loss": 0.0007, + "step": 181510 + }, + { + "epoch": 1.1642128960052336, + "grad_norm": 0.03253493085503578, + "learning_rate": 4.4407973718086876e-06, + "loss": 0.0035, + "step": 181520 + }, + { + "epoch": 1.1642770328990197, + "grad_norm": 0.4729132652282715, + "learning_rate": 4.440241183423818e-06, + "loss": 0.0011, + "step": 181530 + }, + { + "epoch": 1.1643411697928059, + "grad_norm": 0.18357275426387787, + "learning_rate": 4.439685002053128e-06, + "loss": 0.0023, + "step": 181540 + }, + { + "epoch": 1.1644053066865918, + "grad_norm": 0.06634698808193207, + "learning_rate": 4.439128827703583e-06, + "loss": 0.0023, + "step": 181550 + }, + { + "epoch": 1.164469443580378, + "grad_norm": 0.16304150223731995, + "learning_rate": 4.438572660382156e-06, + "loss": 0.0016, + "step": 181560 + }, + { + "epoch": 1.164533580474164, + "grad_norm": 0.030540192499756813, + "learning_rate": 4.438016500095815e-06, + "loss": 0.0016, + "step": 181570 + }, + { + "epoch": 1.16459771736795, + "grad_norm": 0.054729048162698746, + "learning_rate": 4.43746034685153e-06, + "loss": 0.0008, + "step": 181580 + }, + { + "epoch": 1.1646618542617362, + "grad_norm": 0.008067057467997074, + "learning_rate": 4.436904200656267e-06, + "loss": 0.0021, + "step": 181590 + }, + { + "epoch": 1.1647259911555223, + "grad_norm": 0.36333948373794556, + "learning_rate": 4.436348061516999e-06, + "loss": 0.0009, + "step": 181600 + }, + { + "epoch": 1.1647901280493085, + "grad_norm": 0.008159461431205273, + "learning_rate": 4.435791929440691e-06, + "loss": 0.0013, + "step": 181610 + }, + { + "epoch": 1.1648542649430946, + "grad_norm": 0.05168044939637184, + "learning_rate": 4.435235804434314e-06, + "loss": 0.001, + "step": 181620 + }, + { + "epoch": 1.1649184018368806, + "grad_norm": 0.05114075914025307, + "learning_rate": 4.434679686504836e-06, + "loss": 0.0011, + "step": 181630 + }, + { + "epoch": 1.1649825387306667, + "grad_norm": 0.024549264460802078, + "learning_rate": 4.4341235756592255e-06, + "loss": 0.0016, + "step": 181640 + }, + { + "epoch": 1.1650466756244529, + "grad_norm": 0.06715276092290878, + "learning_rate": 4.43356747190445e-06, + "loss": 0.0012, + "step": 181650 + }, + { + "epoch": 1.165110812518239, + "grad_norm": 0.06436097621917725, + "learning_rate": 4.433011375247481e-06, + "loss": 0.001, + "step": 181660 + }, + { + "epoch": 1.165174949412025, + "grad_norm": 0.13928824663162231, + "learning_rate": 4.4324552856952826e-06, + "loss": 0.0016, + "step": 181670 + }, + { + "epoch": 1.165239086305811, + "grad_norm": 0.18230652809143066, + "learning_rate": 4.431899203254827e-06, + "loss": 0.0028, + "step": 181680 + }, + { + "epoch": 1.1653032231995972, + "grad_norm": 0.09898312389850616, + "learning_rate": 4.4313431279330785e-06, + "loss": 0.0015, + "step": 181690 + }, + { + "epoch": 1.1653673600933834, + "grad_norm": 0.12875409424304962, + "learning_rate": 4.430787059737009e-06, + "loss": 0.0021, + "step": 181700 + }, + { + "epoch": 1.1654314969871695, + "grad_norm": 0.15830400586128235, + "learning_rate": 4.430230998673582e-06, + "loss": 0.0013, + "step": 181710 + }, + { + "epoch": 1.1654956338809555, + "grad_norm": 0.18438397347927094, + "learning_rate": 4.42967494474977e-06, + "loss": 0.0013, + "step": 181720 + }, + { + "epoch": 1.1655597707747416, + "grad_norm": 0.1835208535194397, + "learning_rate": 4.429118897972538e-06, + "loss": 0.0019, + "step": 181730 + }, + { + "epoch": 1.1656239076685277, + "grad_norm": 0.14134618639945984, + "learning_rate": 4.428562858348853e-06, + "loss": 0.001, + "step": 181740 + }, + { + "epoch": 1.165688044562314, + "grad_norm": 0.10361126065254211, + "learning_rate": 4.428006825885686e-06, + "loss": 0.0021, + "step": 181750 + }, + { + "epoch": 1.1657521814560998, + "grad_norm": 0.11805353313684464, + "learning_rate": 4.427450800590001e-06, + "loss": 0.0019, + "step": 181760 + }, + { + "epoch": 1.165816318349886, + "grad_norm": 0.13512155413627625, + "learning_rate": 4.4268947824687665e-06, + "loss": 0.0012, + "step": 181770 + }, + { + "epoch": 1.1658804552436721, + "grad_norm": 0.029260262846946716, + "learning_rate": 4.42633877152895e-06, + "loss": 0.0013, + "step": 181780 + }, + { + "epoch": 1.1659445921374583, + "grad_norm": 0.1251649260520935, + "learning_rate": 4.4257827677775185e-06, + "loss": 0.002, + "step": 181790 + }, + { + "epoch": 1.1660087290312442, + "grad_norm": 0.03417348861694336, + "learning_rate": 4.425226771221439e-06, + "loss": 0.0008, + "step": 181800 + }, + { + "epoch": 1.1660728659250303, + "grad_norm": 0.02154257707297802, + "learning_rate": 4.42467078186768e-06, + "loss": 0.0011, + "step": 181810 + }, + { + "epoch": 1.1661370028188165, + "grad_norm": 0.14985492825508118, + "learning_rate": 4.424114799723207e-06, + "loss": 0.0022, + "step": 181820 + }, + { + "epoch": 1.1662011397126026, + "grad_norm": 0.16996338963508606, + "learning_rate": 4.423558824794987e-06, + "loss": 0.0022, + "step": 181830 + }, + { + "epoch": 1.1662652766063886, + "grad_norm": 0.06400048732757568, + "learning_rate": 4.423002857089987e-06, + "loss": 0.0016, + "step": 181840 + }, + { + "epoch": 1.1663294135001747, + "grad_norm": 0.014239252544939518, + "learning_rate": 4.422446896615174e-06, + "loss": 0.0011, + "step": 181850 + }, + { + "epoch": 1.1663935503939609, + "grad_norm": 0.13408030569553375, + "learning_rate": 4.421890943377512e-06, + "loss": 0.0033, + "step": 181860 + }, + { + "epoch": 1.166457687287747, + "grad_norm": 0.12906645238399506, + "learning_rate": 4.421334997383971e-06, + "loss": 0.0012, + "step": 181870 + }, + { + "epoch": 1.1665218241815332, + "grad_norm": 0.014006029814481735, + "learning_rate": 4.420779058641517e-06, + "loss": 0.0012, + "step": 181880 + }, + { + "epoch": 1.166585961075319, + "grad_norm": 0.0008519539842382073, + "learning_rate": 4.420223127157114e-06, + "loss": 0.0006, + "step": 181890 + }, + { + "epoch": 1.1666500979691052, + "grad_norm": 0.05692529305815697, + "learning_rate": 4.419667202937731e-06, + "loss": 0.0016, + "step": 181900 + }, + { + "epoch": 1.1667142348628914, + "grad_norm": 0.023376788944005966, + "learning_rate": 4.4191112859903305e-06, + "loss": 0.0019, + "step": 181910 + }, + { + "epoch": 1.1667783717566775, + "grad_norm": 0.09955395013093948, + "learning_rate": 4.418555376321883e-06, + "loss": 0.0018, + "step": 181920 + }, + { + "epoch": 1.1668425086504635, + "grad_norm": 0.49170419573783875, + "learning_rate": 4.41799947393935e-06, + "loss": 0.0014, + "step": 181930 + }, + { + "epoch": 1.1669066455442496, + "grad_norm": 0.1264665573835373, + "learning_rate": 4.4174435788497025e-06, + "loss": 0.0017, + "step": 181940 + }, + { + "epoch": 1.1669707824380358, + "grad_norm": 0.041081368923187256, + "learning_rate": 4.416887691059901e-06, + "loss": 0.0014, + "step": 181950 + }, + { + "epoch": 1.167034919331822, + "grad_norm": 0.05159052088856697, + "learning_rate": 4.4163318105769145e-06, + "loss": 0.0009, + "step": 181960 + }, + { + "epoch": 1.167099056225608, + "grad_norm": 0.04607924073934555, + "learning_rate": 4.415775937407706e-06, + "loss": 0.0008, + "step": 181970 + }, + { + "epoch": 1.167163193119394, + "grad_norm": 0.23263250291347504, + "learning_rate": 4.415220071559244e-06, + "loss": 0.0027, + "step": 181980 + }, + { + "epoch": 1.1672273300131801, + "grad_norm": 0.14163386821746826, + "learning_rate": 4.4146642130384925e-06, + "loss": 0.0011, + "step": 181990 + }, + { + "epoch": 1.1672914669069663, + "grad_norm": 0.1206795796751976, + "learning_rate": 4.4141083618524175e-06, + "loss": 0.0019, + "step": 182000 + }, + { + "epoch": 1.1673556038007522, + "grad_norm": 0.02696717530488968, + "learning_rate": 4.413552518007981e-06, + "loss": 0.0016, + "step": 182010 + }, + { + "epoch": 1.1674197406945384, + "grad_norm": 0.07129814475774765, + "learning_rate": 4.412996681512152e-06, + "loss": 0.0012, + "step": 182020 + }, + { + "epoch": 1.1674838775883245, + "grad_norm": 0.09845761209726334, + "learning_rate": 4.412440852371894e-06, + "loss": 0.0019, + "step": 182030 + }, + { + "epoch": 1.1675480144821107, + "grad_norm": 0.07310761511325836, + "learning_rate": 4.411885030594172e-06, + "loss": 0.0016, + "step": 182040 + }, + { + "epoch": 1.1676121513758968, + "grad_norm": 0.030018869787454605, + "learning_rate": 4.411329216185951e-06, + "loss": 0.0017, + "step": 182050 + }, + { + "epoch": 1.1676762882696827, + "grad_norm": 0.17031699419021606, + "learning_rate": 4.410773409154195e-06, + "loss": 0.0029, + "step": 182060 + }, + { + "epoch": 1.1677404251634689, + "grad_norm": 0.09744299203157425, + "learning_rate": 4.4102176095058705e-06, + "loss": 0.0028, + "step": 182070 + }, + { + "epoch": 1.167804562057255, + "grad_norm": 0.014784046448767185, + "learning_rate": 4.40966181724794e-06, + "loss": 0.0011, + "step": 182080 + }, + { + "epoch": 1.1678686989510412, + "grad_norm": 0.08462289720773697, + "learning_rate": 4.40910603238737e-06, + "loss": 0.0015, + "step": 182090 + }, + { + "epoch": 1.167932835844827, + "grad_norm": 0.12970688939094543, + "learning_rate": 4.408550254931122e-06, + "loss": 0.0018, + "step": 182100 + }, + { + "epoch": 1.1679969727386132, + "grad_norm": 0.14601962268352509, + "learning_rate": 4.407994484886163e-06, + "loss": 0.0024, + "step": 182110 + }, + { + "epoch": 1.1680611096323994, + "grad_norm": 0.29048222303390503, + "learning_rate": 4.407438722259456e-06, + "loss": 0.0028, + "step": 182120 + }, + { + "epoch": 1.1681252465261855, + "grad_norm": 0.22827433049678802, + "learning_rate": 4.406882967057966e-06, + "loss": 0.0023, + "step": 182130 + }, + { + "epoch": 1.1681893834199717, + "grad_norm": 0.14444595575332642, + "learning_rate": 4.4063272192886555e-06, + "loss": 0.0017, + "step": 182140 + }, + { + "epoch": 1.1682535203137576, + "grad_norm": 0.08912989497184753, + "learning_rate": 4.405771478958488e-06, + "loss": 0.0022, + "step": 182150 + }, + { + "epoch": 1.1683176572075438, + "grad_norm": 0.006506162695586681, + "learning_rate": 4.405215746074431e-06, + "loss": 0.001, + "step": 182160 + }, + { + "epoch": 1.16838179410133, + "grad_norm": 0.16769647598266602, + "learning_rate": 4.404660020643444e-06, + "loss": 0.0025, + "step": 182170 + }, + { + "epoch": 1.168445930995116, + "grad_norm": 0.06533028930425644, + "learning_rate": 4.404104302672494e-06, + "loss": 0.0031, + "step": 182180 + }, + { + "epoch": 1.168510067888902, + "grad_norm": 0.23282302916049957, + "learning_rate": 4.403548592168541e-06, + "loss": 0.0027, + "step": 182190 + }, + { + "epoch": 1.1685742047826881, + "grad_norm": 0.03258645161986351, + "learning_rate": 4.4029928891385525e-06, + "loss": 0.0008, + "step": 182200 + }, + { + "epoch": 1.1686383416764743, + "grad_norm": 0.09809160977602005, + "learning_rate": 4.4024371935894885e-06, + "loss": 0.001, + "step": 182210 + }, + { + "epoch": 1.1687024785702604, + "grad_norm": 0.07028601318597794, + "learning_rate": 4.401881505528314e-06, + "loss": 0.0013, + "step": 182220 + }, + { + "epoch": 1.1687666154640464, + "grad_norm": 0.12784184515476227, + "learning_rate": 4.401325824961991e-06, + "loss": 0.0019, + "step": 182230 + }, + { + "epoch": 1.1688307523578325, + "grad_norm": 0.0225824024528265, + "learning_rate": 4.400770151897484e-06, + "loss": 0.0014, + "step": 182240 + }, + { + "epoch": 1.1688948892516187, + "grad_norm": 0.3825037181377411, + "learning_rate": 4.400214486341755e-06, + "loss": 0.0026, + "step": 182250 + }, + { + "epoch": 1.1689590261454048, + "grad_norm": 0.11530786007642746, + "learning_rate": 4.399658828301768e-06, + "loss": 0.0019, + "step": 182260 + }, + { + "epoch": 1.1690231630391907, + "grad_norm": 0.12124911695718765, + "learning_rate": 4.399103177784484e-06, + "loss": 0.0015, + "step": 182270 + }, + { + "epoch": 1.1690872999329769, + "grad_norm": 0.038760680705308914, + "learning_rate": 4.398547534796867e-06, + "loss": 0.0016, + "step": 182280 + }, + { + "epoch": 1.169151436826763, + "grad_norm": 0.0614054910838604, + "learning_rate": 4.397991899345879e-06, + "loss": 0.0016, + "step": 182290 + }, + { + "epoch": 1.1692155737205492, + "grad_norm": 0.11623246222734451, + "learning_rate": 4.397436271438482e-06, + "loss": 0.0013, + "step": 182300 + }, + { + "epoch": 1.1692797106143353, + "grad_norm": 0.14545269310474396, + "learning_rate": 4.396880651081638e-06, + "loss": 0.0019, + "step": 182310 + }, + { + "epoch": 1.1693438475081213, + "grad_norm": 0.07068877667188644, + "learning_rate": 4.396325038282313e-06, + "loss": 0.0031, + "step": 182320 + }, + { + "epoch": 1.1694079844019074, + "grad_norm": 0.09615910798311234, + "learning_rate": 4.395769433047466e-06, + "loss": 0.0015, + "step": 182330 + }, + { + "epoch": 1.1694721212956936, + "grad_norm": 0.03125937655568123, + "learning_rate": 4.395213835384061e-06, + "loss": 0.0021, + "step": 182340 + }, + { + "epoch": 1.1695362581894797, + "grad_norm": 0.21899282932281494, + "learning_rate": 4.3946582452990565e-06, + "loss": 0.0023, + "step": 182350 + }, + { + "epoch": 1.1696003950832656, + "grad_norm": 0.09769497811794281, + "learning_rate": 4.394102662799418e-06, + "loss": 0.0024, + "step": 182360 + }, + { + "epoch": 1.1696645319770518, + "grad_norm": 0.023459237068891525, + "learning_rate": 4.3935470878921076e-06, + "loss": 0.0018, + "step": 182370 + }, + { + "epoch": 1.169728668870838, + "grad_norm": 0.1508914679288864, + "learning_rate": 4.392991520584084e-06, + "loss": 0.0007, + "step": 182380 + }, + { + "epoch": 1.169792805764624, + "grad_norm": 0.2519901394844055, + "learning_rate": 4.392435960882311e-06, + "loss": 0.0013, + "step": 182390 + }, + { + "epoch": 1.1698569426584102, + "grad_norm": 0.11893133074045181, + "learning_rate": 4.39188040879375e-06, + "loss": 0.0008, + "step": 182400 + }, + { + "epoch": 1.1699210795521962, + "grad_norm": 0.1509605050086975, + "learning_rate": 4.391324864325361e-06, + "loss": 0.004, + "step": 182410 + }, + { + "epoch": 1.1699852164459823, + "grad_norm": 0.1959735006093979, + "learning_rate": 4.390769327484106e-06, + "loss": 0.0026, + "step": 182420 + }, + { + "epoch": 1.1700493533397684, + "grad_norm": 0.04640708118677139, + "learning_rate": 4.390213798276949e-06, + "loss": 0.0005, + "step": 182430 + }, + { + "epoch": 1.1701134902335544, + "grad_norm": 0.02035406418144703, + "learning_rate": 4.3896582767108465e-06, + "loss": 0.003, + "step": 182440 + }, + { + "epoch": 1.1701776271273405, + "grad_norm": 0.032201845198869705, + "learning_rate": 4.389102762792764e-06, + "loss": 0.0017, + "step": 182450 + }, + { + "epoch": 1.1702417640211267, + "grad_norm": 0.018200211226940155, + "learning_rate": 4.388547256529659e-06, + "loss": 0.0013, + "step": 182460 + }, + { + "epoch": 1.1703059009149128, + "grad_norm": 0.10391952842473984, + "learning_rate": 4.387991757928495e-06, + "loss": 0.0013, + "step": 182470 + }, + { + "epoch": 1.170370037808699, + "grad_norm": 0.08152526617050171, + "learning_rate": 4.387436266996231e-06, + "loss": 0.0009, + "step": 182480 + }, + { + "epoch": 1.170434174702485, + "grad_norm": 0.1280585378408432, + "learning_rate": 4.386880783739829e-06, + "loss": 0.0023, + "step": 182490 + }, + { + "epoch": 1.170498311596271, + "grad_norm": 0.02015378326177597, + "learning_rate": 4.386325308166249e-06, + "loss": 0.0011, + "step": 182500 + }, + { + "epoch": 1.1705624484900572, + "grad_norm": 0.13980241119861603, + "learning_rate": 4.38576984028245e-06, + "loss": 0.0013, + "step": 182510 + }, + { + "epoch": 1.1706265853838433, + "grad_norm": 0.08781352639198303, + "learning_rate": 4.385214380095395e-06, + "loss": 0.0017, + "step": 182520 + }, + { + "epoch": 1.1706907222776293, + "grad_norm": 0.19003696739673615, + "learning_rate": 4.3846589276120435e-06, + "loss": 0.0026, + "step": 182530 + }, + { + "epoch": 1.1707548591714154, + "grad_norm": 0.025535663589835167, + "learning_rate": 4.384103482839354e-06, + "loss": 0.0012, + "step": 182540 + }, + { + "epoch": 1.1708189960652016, + "grad_norm": 0.119336798787117, + "learning_rate": 4.383548045784291e-06, + "loss": 0.002, + "step": 182550 + }, + { + "epoch": 1.1708831329589877, + "grad_norm": 0.05758288875222206, + "learning_rate": 4.382992616453809e-06, + "loss": 0.002, + "step": 182560 + }, + { + "epoch": 1.1709472698527739, + "grad_norm": 0.025762133300304413, + "learning_rate": 4.38243719485487e-06, + "loss": 0.0023, + "step": 182570 + }, + { + "epoch": 1.1710114067465598, + "grad_norm": 0.2017737179994583, + "learning_rate": 4.381881780994436e-06, + "loss": 0.0042, + "step": 182580 + }, + { + "epoch": 1.171075543640346, + "grad_norm": 0.008837364614009857, + "learning_rate": 4.381326374879464e-06, + "loss": 0.0018, + "step": 182590 + }, + { + "epoch": 1.171139680534132, + "grad_norm": 0.0664515420794487, + "learning_rate": 4.380770976516916e-06, + "loss": 0.0033, + "step": 182600 + }, + { + "epoch": 1.1712038174279182, + "grad_norm": 0.07497132569551468, + "learning_rate": 4.380215585913749e-06, + "loss": 0.0015, + "step": 182610 + }, + { + "epoch": 1.1712679543217042, + "grad_norm": 0.10247337073087692, + "learning_rate": 4.3796602030769245e-06, + "loss": 0.0026, + "step": 182620 + }, + { + "epoch": 1.1713320912154903, + "grad_norm": 0.07052148878574371, + "learning_rate": 4.3791048280133995e-06, + "loss": 0.0019, + "step": 182630 + }, + { + "epoch": 1.1713962281092765, + "grad_norm": 0.17377875745296478, + "learning_rate": 4.378549460730136e-06, + "loss": 0.0011, + "step": 182640 + }, + { + "epoch": 1.1714603650030626, + "grad_norm": 0.10041338950395584, + "learning_rate": 4.377994101234092e-06, + "loss": 0.0014, + "step": 182650 + }, + { + "epoch": 1.1715245018968488, + "grad_norm": 0.04387228563427925, + "learning_rate": 4.377438749532226e-06, + "loss": 0.001, + "step": 182660 + }, + { + "epoch": 1.1715886387906347, + "grad_norm": 0.14890913665294647, + "learning_rate": 4.376883405631497e-06, + "loss": 0.0015, + "step": 182670 + }, + { + "epoch": 1.1716527756844208, + "grad_norm": 0.11959514766931534, + "learning_rate": 4.376328069538865e-06, + "loss": 0.0017, + "step": 182680 + }, + { + "epoch": 1.171716912578207, + "grad_norm": 0.1006447970867157, + "learning_rate": 4.3757727412612874e-06, + "loss": 0.0011, + "step": 182690 + }, + { + "epoch": 1.171781049471993, + "grad_norm": 0.137902170419693, + "learning_rate": 4.375217420805725e-06, + "loss": 0.0019, + "step": 182700 + }, + { + "epoch": 1.171845186365779, + "grad_norm": 0.1885043978691101, + "learning_rate": 4.374662108179133e-06, + "loss": 0.0009, + "step": 182710 + }, + { + "epoch": 1.1719093232595652, + "grad_norm": 0.0794215202331543, + "learning_rate": 4.374106803388472e-06, + "loss": 0.0021, + "step": 182720 + }, + { + "epoch": 1.1719734601533514, + "grad_norm": 0.3572762906551361, + "learning_rate": 4.373551506440701e-06, + "loss": 0.0031, + "step": 182730 + }, + { + "epoch": 1.1720375970471375, + "grad_norm": 0.02289850264787674, + "learning_rate": 4.372996217342776e-06, + "loss": 0.0028, + "step": 182740 + }, + { + "epoch": 1.1721017339409234, + "grad_norm": 0.02222614735364914, + "learning_rate": 4.3724409361016585e-06, + "loss": 0.0011, + "step": 182750 + }, + { + "epoch": 1.1721658708347096, + "grad_norm": 0.11943326145410538, + "learning_rate": 4.371885662724302e-06, + "loss": 0.0019, + "step": 182760 + }, + { + "epoch": 1.1722300077284957, + "grad_norm": 0.04962974786758423, + "learning_rate": 4.3713303972176705e-06, + "loss": 0.0011, + "step": 182770 + }, + { + "epoch": 1.1722941446222819, + "grad_norm": 0.09099887311458588, + "learning_rate": 4.370775139588715e-06, + "loss": 0.0019, + "step": 182780 + }, + { + "epoch": 1.1723582815160678, + "grad_norm": 0.08444700390100479, + "learning_rate": 4.370219889844399e-06, + "loss": 0.0018, + "step": 182790 + }, + { + "epoch": 1.172422418409854, + "grad_norm": 0.1226532906293869, + "learning_rate": 4.3696646479916766e-06, + "loss": 0.0016, + "step": 182800 + }, + { + "epoch": 1.17248655530364, + "grad_norm": 0.05741210654377937, + "learning_rate": 4.369109414037508e-06, + "loss": 0.001, + "step": 182810 + }, + { + "epoch": 1.1725506921974262, + "grad_norm": 0.08488724380731583, + "learning_rate": 4.368554187988847e-06, + "loss": 0.0013, + "step": 182820 + }, + { + "epoch": 1.1726148290912124, + "grad_norm": 0.2592843174934387, + "learning_rate": 4.367998969852656e-06, + "loss": 0.0012, + "step": 182830 + }, + { + "epoch": 1.1726789659849983, + "grad_norm": 0.03624601289629936, + "learning_rate": 4.367443759635888e-06, + "loss": 0.0008, + "step": 182840 + }, + { + "epoch": 1.1727431028787845, + "grad_norm": 0.07296912372112274, + "learning_rate": 4.366888557345503e-06, + "loss": 0.0014, + "step": 182850 + }, + { + "epoch": 1.1728072397725706, + "grad_norm": 0.08172671496868134, + "learning_rate": 4.366333362988455e-06, + "loss": 0.0016, + "step": 182860 + }, + { + "epoch": 1.1728713766663568, + "grad_norm": 0.12368035316467285, + "learning_rate": 4.3657781765717026e-06, + "loss": 0.002, + "step": 182870 + }, + { + "epoch": 1.1729355135601427, + "grad_norm": 0.0981890931725502, + "learning_rate": 4.365222998102205e-06, + "loss": 0.0026, + "step": 182880 + }, + { + "epoch": 1.1729996504539288, + "grad_norm": 0.08738455921411514, + "learning_rate": 4.364667827586915e-06, + "loss": 0.0011, + "step": 182890 + }, + { + "epoch": 1.173063787347715, + "grad_norm": 0.08175847679376602, + "learning_rate": 4.364112665032793e-06, + "loss": 0.0013, + "step": 182900 + }, + { + "epoch": 1.1731279242415011, + "grad_norm": 0.14890995621681213, + "learning_rate": 4.363557510446792e-06, + "loss": 0.0016, + "step": 182910 + }, + { + "epoch": 1.173192061135287, + "grad_norm": 0.04660869762301445, + "learning_rate": 4.363002363835872e-06, + "loss": 0.0019, + "step": 182920 + }, + { + "epoch": 1.1732561980290732, + "grad_norm": 0.003985627554357052, + "learning_rate": 4.3624472252069865e-06, + "loss": 0.0008, + "step": 182930 + }, + { + "epoch": 1.1733203349228594, + "grad_norm": 0.0942840576171875, + "learning_rate": 4.361892094567094e-06, + "loss": 0.0018, + "step": 182940 + }, + { + "epoch": 1.1733844718166455, + "grad_norm": 0.10258271545171738, + "learning_rate": 4.361336971923149e-06, + "loss": 0.0014, + "step": 182950 + }, + { + "epoch": 1.1734486087104314, + "grad_norm": 0.12442044168710709, + "learning_rate": 4.360781857282108e-06, + "loss": 0.001, + "step": 182960 + }, + { + "epoch": 1.1735127456042176, + "grad_norm": 0.07931763678789139, + "learning_rate": 4.360226750650926e-06, + "loss": 0.0019, + "step": 182970 + }, + { + "epoch": 1.1735768824980037, + "grad_norm": 0.018039437010884285, + "learning_rate": 4.359671652036562e-06, + "loss": 0.001, + "step": 182980 + }, + { + "epoch": 1.1736410193917899, + "grad_norm": 0.03374273329973221, + "learning_rate": 4.359116561445969e-06, + "loss": 0.004, + "step": 182990 + }, + { + "epoch": 1.173705156285576, + "grad_norm": 0.003924379590898752, + "learning_rate": 4.358561478886103e-06, + "loss": 0.0023, + "step": 183000 + }, + { + "epoch": 1.173705156285576, + "eval_loss": 0.0024000562261790037, + "eval_runtime": 3.3153, + "eval_samples_per_second": 60.326, + "eval_steps_per_second": 15.082, + "step": 183000 + }, + { + "epoch": 1.173769293179362, + "grad_norm": 0.05327145382761955, + "learning_rate": 4.358006404363921e-06, + "loss": 0.0025, + "step": 183010 + }, + { + "epoch": 1.173833430073148, + "grad_norm": 0.014441578648984432, + "learning_rate": 4.357451337886377e-06, + "loss": 0.0015, + "step": 183020 + }, + { + "epoch": 1.1738975669669343, + "grad_norm": 0.18125401437282562, + "learning_rate": 4.356896279460428e-06, + "loss": 0.0013, + "step": 183030 + }, + { + "epoch": 1.1739617038607204, + "grad_norm": 0.05702318251132965, + "learning_rate": 4.356341229093028e-06, + "loss": 0.0022, + "step": 183040 + }, + { + "epoch": 1.1740258407545063, + "grad_norm": 0.22551800310611725, + "learning_rate": 4.355786186791132e-06, + "loss": 0.0019, + "step": 183050 + }, + { + "epoch": 1.1740899776482925, + "grad_norm": 0.04299104958772659, + "learning_rate": 4.355231152561694e-06, + "loss": 0.002, + "step": 183060 + }, + { + "epoch": 1.1741541145420786, + "grad_norm": 0.14469648897647858, + "learning_rate": 4.354676126411672e-06, + "loss": 0.0016, + "step": 183070 + }, + { + "epoch": 1.1742182514358648, + "grad_norm": 0.06454824656248093, + "learning_rate": 4.354121108348018e-06, + "loss": 0.0011, + "step": 183080 + }, + { + "epoch": 1.174282388329651, + "grad_norm": 0.09450476616621017, + "learning_rate": 4.353566098377689e-06, + "loss": 0.0013, + "step": 183090 + }, + { + "epoch": 1.1743465252234369, + "grad_norm": 0.08068623393774033, + "learning_rate": 4.35301109650764e-06, + "loss": 0.0017, + "step": 183100 + }, + { + "epoch": 1.174410662117223, + "grad_norm": 0.06034604832530022, + "learning_rate": 4.352456102744823e-06, + "loss": 0.0016, + "step": 183110 + }, + { + "epoch": 1.1744747990110092, + "grad_norm": 0.06156300753355026, + "learning_rate": 4.3519011170961945e-06, + "loss": 0.0013, + "step": 183120 + }, + { + "epoch": 1.174538935904795, + "grad_norm": 0.017585139721632004, + "learning_rate": 4.351346139568708e-06, + "loss": 0.0016, + "step": 183130 + }, + { + "epoch": 1.1746030727985812, + "grad_norm": 0.08957716077566147, + "learning_rate": 4.3507911701693175e-06, + "loss": 0.0021, + "step": 183140 + }, + { + "epoch": 1.1746672096923674, + "grad_norm": 0.018210873007774353, + "learning_rate": 4.350236208904978e-06, + "loss": 0.0022, + "step": 183150 + }, + { + "epoch": 1.1747313465861535, + "grad_norm": 0.26158273220062256, + "learning_rate": 4.349681255782643e-06, + "loss": 0.0011, + "step": 183160 + }, + { + "epoch": 1.1747954834799397, + "grad_norm": 0.08066187053918839, + "learning_rate": 4.3491263108092675e-06, + "loss": 0.0012, + "step": 183170 + }, + { + "epoch": 1.1748596203737256, + "grad_norm": 0.07623965293169022, + "learning_rate": 4.348571373991803e-06, + "loss": 0.0006, + "step": 183180 + }, + { + "epoch": 1.1749237572675117, + "grad_norm": 0.11561774462461472, + "learning_rate": 4.348016445337206e-06, + "loss": 0.0015, + "step": 183190 + }, + { + "epoch": 1.174987894161298, + "grad_norm": 0.1563999056816101, + "learning_rate": 4.347461524852429e-06, + "loss": 0.0013, + "step": 183200 + }, + { + "epoch": 1.175052031055084, + "grad_norm": 0.0295012928545475, + "learning_rate": 4.346906612544425e-06, + "loss": 0.0006, + "step": 183210 + }, + { + "epoch": 1.17511616794887, + "grad_norm": 0.08366100490093231, + "learning_rate": 4.346351708420147e-06, + "loss": 0.0021, + "step": 183220 + }, + { + "epoch": 1.1751803048426561, + "grad_norm": 0.05583413690328598, + "learning_rate": 4.345796812486552e-06, + "loss": 0.0014, + "step": 183230 + }, + { + "epoch": 1.1752444417364423, + "grad_norm": 0.09528223425149918, + "learning_rate": 4.345241924750588e-06, + "loss": 0.0021, + "step": 183240 + }, + { + "epoch": 1.1753085786302284, + "grad_norm": 0.08164983987808228, + "learning_rate": 4.344687045219212e-06, + "loss": 0.0012, + "step": 183250 + }, + { + "epoch": 1.1753727155240146, + "grad_norm": 0.027242237702012062, + "learning_rate": 4.344132173899374e-06, + "loss": 0.002, + "step": 183260 + }, + { + "epoch": 1.1754368524178005, + "grad_norm": 0.20938929915428162, + "learning_rate": 4.343577310798028e-06, + "loss": 0.0012, + "step": 183270 + }, + { + "epoch": 1.1755009893115866, + "grad_norm": 0.14060692489147186, + "learning_rate": 4.3430224559221305e-06, + "loss": 0.0017, + "step": 183280 + }, + { + "epoch": 1.1755651262053728, + "grad_norm": 0.18034429848194122, + "learning_rate": 4.342467609278629e-06, + "loss": 0.0012, + "step": 183290 + }, + { + "epoch": 1.175629263099159, + "grad_norm": 0.029719088226556778, + "learning_rate": 4.341912770874479e-06, + "loss": 0.0012, + "step": 183300 + }, + { + "epoch": 1.1756933999929449, + "grad_norm": 0.06746480613946915, + "learning_rate": 4.341357940716631e-06, + "loss": 0.0014, + "step": 183310 + }, + { + "epoch": 1.175757536886731, + "grad_norm": 0.07167258113622665, + "learning_rate": 4.340803118812042e-06, + "loss": 0.0023, + "step": 183320 + }, + { + "epoch": 1.1758216737805172, + "grad_norm": 0.4483695924282074, + "learning_rate": 4.340248305167658e-06, + "loss": 0.003, + "step": 183330 + }, + { + "epoch": 1.1758858106743033, + "grad_norm": 0.029790734872221947, + "learning_rate": 4.339693499790436e-06, + "loss": 0.0028, + "step": 183340 + }, + { + "epoch": 1.1759499475680892, + "grad_norm": 0.10472667962312698, + "learning_rate": 4.3391387026873246e-06, + "loss": 0.001, + "step": 183350 + }, + { + "epoch": 1.1760140844618754, + "grad_norm": 0.17653949558734894, + "learning_rate": 4.3385839138652796e-06, + "loss": 0.002, + "step": 183360 + }, + { + "epoch": 1.1760782213556615, + "grad_norm": 0.043926745653152466, + "learning_rate": 4.338029133331249e-06, + "loss": 0.0015, + "step": 183370 + }, + { + "epoch": 1.1761423582494477, + "grad_norm": 0.14336556196212769, + "learning_rate": 4.3374743610921886e-06, + "loss": 0.0014, + "step": 183380 + }, + { + "epoch": 1.1762064951432336, + "grad_norm": 0.04863275960087776, + "learning_rate": 4.336919597155046e-06, + "loss": 0.0022, + "step": 183390 + }, + { + "epoch": 1.1762706320370198, + "grad_norm": 0.06869960576295853, + "learning_rate": 4.336364841526775e-06, + "loss": 0.0009, + "step": 183400 + }, + { + "epoch": 1.176334768930806, + "grad_norm": 0.17010624706745148, + "learning_rate": 4.335810094214327e-06, + "loss": 0.0025, + "step": 183410 + }, + { + "epoch": 1.176398905824592, + "grad_norm": 0.03901492431759834, + "learning_rate": 4.335255355224653e-06, + "loss": 0.0018, + "step": 183420 + }, + { + "epoch": 1.1764630427183782, + "grad_norm": 0.10105922818183899, + "learning_rate": 4.334700624564706e-06, + "loss": 0.0012, + "step": 183430 + }, + { + "epoch": 1.1765271796121641, + "grad_norm": 0.07125986367464066, + "learning_rate": 4.334145902241434e-06, + "loss": 0.0019, + "step": 183440 + }, + { + "epoch": 1.1765913165059503, + "grad_norm": 0.022353487089276314, + "learning_rate": 4.333591188261791e-06, + "loss": 0.0011, + "step": 183450 + }, + { + "epoch": 1.1766554533997364, + "grad_norm": 0.07350007444620132, + "learning_rate": 4.333036482632725e-06, + "loss": 0.0008, + "step": 183460 + }, + { + "epoch": 1.1767195902935226, + "grad_norm": 0.1433224380016327, + "learning_rate": 4.332481785361191e-06, + "loss": 0.0016, + "step": 183470 + }, + { + "epoch": 1.1767837271873085, + "grad_norm": 0.04887795448303223, + "learning_rate": 4.331927096454135e-06, + "loss": 0.0012, + "step": 183480 + }, + { + "epoch": 1.1768478640810947, + "grad_norm": 0.13807959854602814, + "learning_rate": 4.331372415918511e-06, + "loss": 0.0023, + "step": 183490 + }, + { + "epoch": 1.1769120009748808, + "grad_norm": 0.04304143041372299, + "learning_rate": 4.330817743761267e-06, + "loss": 0.0008, + "step": 183500 + }, + { + "epoch": 1.176976137868667, + "grad_norm": 0.09120394289493561, + "learning_rate": 4.330263079989358e-06, + "loss": 0.0012, + "step": 183510 + }, + { + "epoch": 1.177040274762453, + "grad_norm": 0.07101622223854065, + "learning_rate": 4.3297084246097285e-06, + "loss": 0.0013, + "step": 183520 + }, + { + "epoch": 1.177104411656239, + "grad_norm": 0.07521035522222519, + "learning_rate": 4.329153777629333e-06, + "loss": 0.0016, + "step": 183530 + }, + { + "epoch": 1.1771685485500252, + "grad_norm": 0.14560550451278687, + "learning_rate": 4.328599139055119e-06, + "loss": 0.0017, + "step": 183540 + }, + { + "epoch": 1.1772326854438113, + "grad_norm": 0.08797882497310638, + "learning_rate": 4.328044508894039e-06, + "loss": 0.0024, + "step": 183550 + }, + { + "epoch": 1.1772968223375972, + "grad_norm": 0.1541559100151062, + "learning_rate": 4.32748988715304e-06, + "loss": 0.0018, + "step": 183560 + }, + { + "epoch": 1.1773609592313834, + "grad_norm": 0.1894298642873764, + "learning_rate": 4.326935273839074e-06, + "loss": 0.0018, + "step": 183570 + }, + { + "epoch": 1.1774250961251695, + "grad_norm": 0.05562448874115944, + "learning_rate": 4.326380668959091e-06, + "loss": 0.0017, + "step": 183580 + }, + { + "epoch": 1.1774892330189557, + "grad_norm": 0.14997988939285278, + "learning_rate": 4.3258260725200374e-06, + "loss": 0.0023, + "step": 183590 + }, + { + "epoch": 1.1775533699127418, + "grad_norm": 0.23341451585292816, + "learning_rate": 4.325271484528867e-06, + "loss": 0.0018, + "step": 183600 + }, + { + "epoch": 1.1776175068065278, + "grad_norm": 0.2117612212896347, + "learning_rate": 4.324716904992527e-06, + "loss": 0.0022, + "step": 183610 + }, + { + "epoch": 1.177681643700314, + "grad_norm": 0.18917398154735565, + "learning_rate": 4.3241623339179665e-06, + "loss": 0.003, + "step": 183620 + }, + { + "epoch": 1.1777457805941, + "grad_norm": 0.07607435435056686, + "learning_rate": 4.323607771312134e-06, + "loss": 0.0015, + "step": 183630 + }, + { + "epoch": 1.1778099174878862, + "grad_norm": 0.08164612948894501, + "learning_rate": 4.323053217181981e-06, + "loss": 0.0018, + "step": 183640 + }, + { + "epoch": 1.1778740543816721, + "grad_norm": 0.03821833059191704, + "learning_rate": 4.322498671534453e-06, + "loss": 0.0016, + "step": 183650 + }, + { + "epoch": 1.1779381912754583, + "grad_norm": 0.018987243995070457, + "learning_rate": 4.321944134376503e-06, + "loss": 0.0017, + "step": 183660 + }, + { + "epoch": 1.1780023281692444, + "grad_norm": 0.0716153010725975, + "learning_rate": 4.321389605715076e-06, + "loss": 0.001, + "step": 183670 + }, + { + "epoch": 1.1780664650630306, + "grad_norm": 0.12727735936641693, + "learning_rate": 4.3208350855571235e-06, + "loss": 0.0013, + "step": 183680 + }, + { + "epoch": 1.1781306019568167, + "grad_norm": 0.3035914897918701, + "learning_rate": 4.320280573909592e-06, + "loss": 0.002, + "step": 183690 + }, + { + "epoch": 1.1781947388506027, + "grad_norm": 0.22567832469940186, + "learning_rate": 4.3197260707794315e-06, + "loss": 0.0017, + "step": 183700 + }, + { + "epoch": 1.1782588757443888, + "grad_norm": 0.12561731040477753, + "learning_rate": 4.319171576173588e-06, + "loss": 0.0006, + "step": 183710 + }, + { + "epoch": 1.178323012638175, + "grad_norm": 0.11747381836175919, + "learning_rate": 4.318617090099012e-06, + "loss": 0.0017, + "step": 183720 + }, + { + "epoch": 1.178387149531961, + "grad_norm": 0.10898769646883011, + "learning_rate": 4.3180626125626515e-06, + "loss": 0.0027, + "step": 183730 + }, + { + "epoch": 1.178451286425747, + "grad_norm": 0.31439948081970215, + "learning_rate": 4.317508143571453e-06, + "loss": 0.0032, + "step": 183740 + }, + { + "epoch": 1.1785154233195332, + "grad_norm": 0.0507233664393425, + "learning_rate": 4.316953683132365e-06, + "loss": 0.0019, + "step": 183750 + }, + { + "epoch": 1.1785795602133193, + "grad_norm": 0.08064527064561844, + "learning_rate": 4.316399231252336e-06, + "loss": 0.0012, + "step": 183760 + }, + { + "epoch": 1.1786436971071055, + "grad_norm": 0.04937786981463432, + "learning_rate": 4.3158447879383145e-06, + "loss": 0.0018, + "step": 183770 + }, + { + "epoch": 1.1787078340008914, + "grad_norm": 0.17828668653964996, + "learning_rate": 4.315290353197244e-06, + "loss": 0.0014, + "step": 183780 + }, + { + "epoch": 1.1787719708946776, + "grad_norm": 0.08378896117210388, + "learning_rate": 4.314735927036078e-06, + "loss": 0.002, + "step": 183790 + }, + { + "epoch": 1.1788361077884637, + "grad_norm": 0.092821404337883, + "learning_rate": 4.314181509461758e-06, + "loss": 0.0017, + "step": 183800 + }, + { + "epoch": 1.1789002446822499, + "grad_norm": 0.013002404943108559, + "learning_rate": 4.313627100481234e-06, + "loss": 0.0017, + "step": 183810 + }, + { + "epoch": 1.1789643815760358, + "grad_norm": 0.004985139239579439, + "learning_rate": 4.313072700101454e-06, + "loss": 0.0013, + "step": 183820 + }, + { + "epoch": 1.179028518469822, + "grad_norm": 0.11837707459926605, + "learning_rate": 4.312518308329364e-06, + "loss": 0.0019, + "step": 183830 + }, + { + "epoch": 1.179092655363608, + "grad_norm": 0.22910629212856293, + "learning_rate": 4.311963925171908e-06, + "loss": 0.0011, + "step": 183840 + }, + { + "epoch": 1.1791567922573942, + "grad_norm": 0.01819387823343277, + "learning_rate": 4.31140955063604e-06, + "loss": 0.002, + "step": 183850 + }, + { + "epoch": 1.1792209291511804, + "grad_norm": 0.06624264270067215, + "learning_rate": 4.3108551847287e-06, + "loss": 0.001, + "step": 183860 + }, + { + "epoch": 1.1792850660449663, + "grad_norm": 0.16025210916996002, + "learning_rate": 4.310300827456838e-06, + "loss": 0.0017, + "step": 183870 + }, + { + "epoch": 1.1793492029387525, + "grad_norm": 0.11636680364608765, + "learning_rate": 4.309746478827399e-06, + "loss": 0.0022, + "step": 183880 + }, + { + "epoch": 1.1794133398325386, + "grad_norm": 0.005025495775043964, + "learning_rate": 4.3091921388473305e-06, + "loss": 0.0017, + "step": 183890 + }, + { + "epoch": 1.1794774767263247, + "grad_norm": 0.04582427814602852, + "learning_rate": 4.308637807523578e-06, + "loss": 0.0009, + "step": 183900 + }, + { + "epoch": 1.1795416136201107, + "grad_norm": 0.10134973376989365, + "learning_rate": 4.308083484863088e-06, + "loss": 0.0016, + "step": 183910 + }, + { + "epoch": 1.1796057505138968, + "grad_norm": 0.013092214241623878, + "learning_rate": 4.307529170872806e-06, + "loss": 0.0028, + "step": 183920 + }, + { + "epoch": 1.179669887407683, + "grad_norm": 0.06885766983032227, + "learning_rate": 4.30697486555968e-06, + "loss": 0.0032, + "step": 183930 + }, + { + "epoch": 1.1797340243014691, + "grad_norm": 0.02871783636510372, + "learning_rate": 4.306420568930652e-06, + "loss": 0.0019, + "step": 183940 + }, + { + "epoch": 1.1797981611952553, + "grad_norm": 0.04335256665945053, + "learning_rate": 4.305866280992671e-06, + "loss": 0.0006, + "step": 183950 + }, + { + "epoch": 1.1798622980890412, + "grad_norm": 0.061511993408203125, + "learning_rate": 4.305312001752682e-06, + "loss": 0.0012, + "step": 183960 + }, + { + "epoch": 1.1799264349828273, + "grad_norm": 0.07183343172073364, + "learning_rate": 4.30475773121763e-06, + "loss": 0.0012, + "step": 183970 + }, + { + "epoch": 1.1799905718766135, + "grad_norm": 0.109437957406044, + "learning_rate": 4.30420346939446e-06, + "loss": 0.0013, + "step": 183980 + }, + { + "epoch": 1.1800547087703994, + "grad_norm": 0.15795163810253143, + "learning_rate": 4.303649216290117e-06, + "loss": 0.0023, + "step": 183990 + }, + { + "epoch": 1.1801188456641856, + "grad_norm": 0.12437650561332703, + "learning_rate": 4.303094971911548e-06, + "loss": 0.002, + "step": 184000 + }, + { + "epoch": 1.1801829825579717, + "grad_norm": 0.07483706623315811, + "learning_rate": 4.302540736265697e-06, + "loss": 0.0017, + "step": 184010 + }, + { + "epoch": 1.1802471194517579, + "grad_norm": 0.00944068469107151, + "learning_rate": 4.301986509359509e-06, + "loss": 0.0016, + "step": 184020 + }, + { + "epoch": 1.180311256345544, + "grad_norm": 0.09464728832244873, + "learning_rate": 4.301432291199929e-06, + "loss": 0.0019, + "step": 184030 + }, + { + "epoch": 1.18037539323933, + "grad_norm": 0.0372900627553463, + "learning_rate": 4.300878081793902e-06, + "loss": 0.0011, + "step": 184040 + }, + { + "epoch": 1.180439530133116, + "grad_norm": 0.23563946783542633, + "learning_rate": 4.300323881148371e-06, + "loss": 0.0013, + "step": 184050 + }, + { + "epoch": 1.1805036670269022, + "grad_norm": 0.17711228132247925, + "learning_rate": 4.299769689270284e-06, + "loss": 0.002, + "step": 184060 + }, + { + "epoch": 1.1805678039206884, + "grad_norm": 0.12385842949151993, + "learning_rate": 4.299215506166581e-06, + "loss": 0.0014, + "step": 184070 + }, + { + "epoch": 1.1806319408144743, + "grad_norm": 0.04377797245979309, + "learning_rate": 4.29866133184421e-06, + "loss": 0.0012, + "step": 184080 + }, + { + "epoch": 1.1806960777082605, + "grad_norm": 0.07182195037603378, + "learning_rate": 4.2981071663101135e-06, + "loss": 0.0014, + "step": 184090 + }, + { + "epoch": 1.1807602146020466, + "grad_norm": 0.47728675603866577, + "learning_rate": 4.297553009571236e-06, + "loss": 0.0013, + "step": 184100 + }, + { + "epoch": 1.1808243514958328, + "grad_norm": 0.05756598711013794, + "learning_rate": 4.2969988616345205e-06, + "loss": 0.0014, + "step": 184110 + }, + { + "epoch": 1.180888488389619, + "grad_norm": 0.04733746498823166, + "learning_rate": 4.296444722506911e-06, + "loss": 0.0017, + "step": 184120 + }, + { + "epoch": 1.1809526252834048, + "grad_norm": 0.09320191293954849, + "learning_rate": 4.295890592195354e-06, + "loss": 0.002, + "step": 184130 + }, + { + "epoch": 1.181016762177191, + "grad_norm": 0.10781149566173553, + "learning_rate": 4.29533647070679e-06, + "loss": 0.001, + "step": 184140 + }, + { + "epoch": 1.1810808990709771, + "grad_norm": 0.07742494344711304, + "learning_rate": 4.294782358048164e-06, + "loss": 0.0007, + "step": 184150 + }, + { + "epoch": 1.1811450359647633, + "grad_norm": 0.11263524740934372, + "learning_rate": 4.294228254226418e-06, + "loss": 0.0012, + "step": 184160 + }, + { + "epoch": 1.1812091728585492, + "grad_norm": 0.14303967356681824, + "learning_rate": 4.293674159248498e-06, + "loss": 0.0016, + "step": 184170 + }, + { + "epoch": 1.1812733097523354, + "grad_norm": 0.07030445337295532, + "learning_rate": 4.293120073121345e-06, + "loss": 0.0015, + "step": 184180 + }, + { + "epoch": 1.1813374466461215, + "grad_norm": 0.06604164838790894, + "learning_rate": 4.292565995851903e-06, + "loss": 0.0016, + "step": 184190 + }, + { + "epoch": 1.1814015835399077, + "grad_norm": 0.04574945569038391, + "learning_rate": 4.292011927447114e-06, + "loss": 0.0011, + "step": 184200 + }, + { + "epoch": 1.1814657204336938, + "grad_norm": 0.21651262044906616, + "learning_rate": 4.2914578679139215e-06, + "loss": 0.0012, + "step": 184210 + }, + { + "epoch": 1.1815298573274797, + "grad_norm": 0.007107165176421404, + "learning_rate": 4.290903817259269e-06, + "loss": 0.0009, + "step": 184220 + }, + { + "epoch": 1.1815939942212659, + "grad_norm": 0.08238311111927032, + "learning_rate": 4.290349775490098e-06, + "loss": 0.0019, + "step": 184230 + }, + { + "epoch": 1.181658131115052, + "grad_norm": 0.026919526979327202, + "learning_rate": 4.289795742613351e-06, + "loss": 0.0009, + "step": 184240 + }, + { + "epoch": 1.181722268008838, + "grad_norm": 0.021025434136390686, + "learning_rate": 4.289241718635972e-06, + "loss": 0.001, + "step": 184250 + }, + { + "epoch": 1.181786404902624, + "grad_norm": 0.008072410710155964, + "learning_rate": 4.288687703564901e-06, + "loss": 0.0012, + "step": 184260 + }, + { + "epoch": 1.1818505417964102, + "grad_norm": 0.09548212587833405, + "learning_rate": 4.288133697407082e-06, + "loss": 0.0014, + "step": 184270 + }, + { + "epoch": 1.1819146786901964, + "grad_norm": 0.1398821920156479, + "learning_rate": 4.287579700169457e-06, + "loss": 0.0024, + "step": 184280 + }, + { + "epoch": 1.1819788155839825, + "grad_norm": 0.12649239599704742, + "learning_rate": 4.287025711858966e-06, + "loss": 0.0027, + "step": 184290 + }, + { + "epoch": 1.1820429524777685, + "grad_norm": 0.07590962946414948, + "learning_rate": 4.2864717324825546e-06, + "loss": 0.0023, + "step": 184300 + }, + { + "epoch": 1.1821070893715546, + "grad_norm": 0.09035839885473251, + "learning_rate": 4.285917762047161e-06, + "loss": 0.002, + "step": 184310 + }, + { + "epoch": 1.1821712262653408, + "grad_norm": 0.2527417242527008, + "learning_rate": 4.285363800559728e-06, + "loss": 0.0015, + "step": 184320 + }, + { + "epoch": 1.182235363159127, + "grad_norm": 0.07209846377372742, + "learning_rate": 4.284809848027198e-06, + "loss": 0.001, + "step": 184330 + }, + { + "epoch": 1.1822995000529128, + "grad_norm": 0.10733459144830704, + "learning_rate": 4.2842559044565126e-06, + "loss": 0.0014, + "step": 184340 + }, + { + "epoch": 1.182363636946699, + "grad_norm": 0.021622339263558388, + "learning_rate": 4.28370196985461e-06, + "loss": 0.0018, + "step": 184350 + }, + { + "epoch": 1.1824277738404851, + "grad_norm": 0.18197694420814514, + "learning_rate": 4.283148044228435e-06, + "loss": 0.001, + "step": 184360 + }, + { + "epoch": 1.1824919107342713, + "grad_norm": 0.07683996856212616, + "learning_rate": 4.282594127584928e-06, + "loss": 0.0018, + "step": 184370 + }, + { + "epoch": 1.1825560476280574, + "grad_norm": 0.025498775765299797, + "learning_rate": 4.282040219931028e-06, + "loss": 0.0017, + "step": 184380 + }, + { + "epoch": 1.1826201845218434, + "grad_norm": 0.22006182372570038, + "learning_rate": 4.281486321273678e-06, + "loss": 0.0016, + "step": 184390 + }, + { + "epoch": 1.1826843214156295, + "grad_norm": 0.19186091423034668, + "learning_rate": 4.280932431619818e-06, + "loss": 0.0037, + "step": 184400 + }, + { + "epoch": 1.1827484583094157, + "grad_norm": 0.06715280562639236, + "learning_rate": 4.280378550976388e-06, + "loss": 0.0015, + "step": 184410 + }, + { + "epoch": 1.1828125952032016, + "grad_norm": 0.1739199012517929, + "learning_rate": 4.279824679350328e-06, + "loss": 0.0025, + "step": 184420 + }, + { + "epoch": 1.1828767320969877, + "grad_norm": 0.13191701471805573, + "learning_rate": 4.2792708167485805e-06, + "loss": 0.0022, + "step": 184430 + }, + { + "epoch": 1.1829408689907739, + "grad_norm": 0.01909823529422283, + "learning_rate": 4.278716963178085e-06, + "loss": 0.0018, + "step": 184440 + }, + { + "epoch": 1.18300500588456, + "grad_norm": 0.08328601717948914, + "learning_rate": 4.2781631186457815e-06, + "loss": 0.0026, + "step": 184450 + }, + { + "epoch": 1.1830691427783462, + "grad_norm": 0.06265491992235184, + "learning_rate": 4.27760928315861e-06, + "loss": 0.0018, + "step": 184460 + }, + { + "epoch": 1.183133279672132, + "grad_norm": 0.011295688338577747, + "learning_rate": 4.2770554567235104e-06, + "loss": 0.0025, + "step": 184470 + }, + { + "epoch": 1.1831974165659183, + "grad_norm": 0.09248891472816467, + "learning_rate": 4.276501639347423e-06, + "loss": 0.0036, + "step": 184480 + }, + { + "epoch": 1.1832615534597044, + "grad_norm": 0.0904218927025795, + "learning_rate": 4.275947831037287e-06, + "loss": 0.0016, + "step": 184490 + }, + { + "epoch": 1.1833256903534906, + "grad_norm": 0.02539573423564434, + "learning_rate": 4.275394031800041e-06, + "loss": 0.0017, + "step": 184500 + }, + { + "epoch": 1.1833898272472765, + "grad_norm": 0.1429576724767685, + "learning_rate": 4.274840241642627e-06, + "loss": 0.0019, + "step": 184510 + }, + { + "epoch": 1.1834539641410626, + "grad_norm": 0.09809023141860962, + "learning_rate": 4.274286460571981e-06, + "loss": 0.002, + "step": 184520 + }, + { + "epoch": 1.1835181010348488, + "grad_norm": 0.004953475669026375, + "learning_rate": 4.273732688595047e-06, + "loss": 0.0004, + "step": 184530 + }, + { + "epoch": 1.183582237928635, + "grad_norm": 0.02463797852396965, + "learning_rate": 4.27317892571876e-06, + "loss": 0.0029, + "step": 184540 + }, + { + "epoch": 1.183646374822421, + "grad_norm": 0.03655168041586876, + "learning_rate": 4.272625171950061e-06, + "loss": 0.0006, + "step": 184550 + }, + { + "epoch": 1.183710511716207, + "grad_norm": 0.1086079478263855, + "learning_rate": 4.272071427295887e-06, + "loss": 0.0014, + "step": 184560 + }, + { + "epoch": 1.1837746486099932, + "grad_norm": 0.005866074003279209, + "learning_rate": 4.27151769176318e-06, + "loss": 0.0014, + "step": 184570 + }, + { + "epoch": 1.1838387855037793, + "grad_norm": 0.050801679491996765, + "learning_rate": 4.270963965358877e-06, + "loss": 0.0011, + "step": 184580 + }, + { + "epoch": 1.1839029223975654, + "grad_norm": 0.014840539544820786, + "learning_rate": 4.270410248089915e-06, + "loss": 0.0008, + "step": 184590 + }, + { + "epoch": 1.1839670592913514, + "grad_norm": 0.08119440078735352, + "learning_rate": 4.269856539963236e-06, + "loss": 0.0017, + "step": 184600 + }, + { + "epoch": 1.1840311961851375, + "grad_norm": 0.6708487272262573, + "learning_rate": 4.269302840985774e-06, + "loss": 0.0014, + "step": 184610 + }, + { + "epoch": 1.1840953330789237, + "grad_norm": 0.028421934694051743, + "learning_rate": 4.26874915116447e-06, + "loss": 0.0004, + "step": 184620 + }, + { + "epoch": 1.1841594699727098, + "grad_norm": 0.3628992438316345, + "learning_rate": 4.268195470506263e-06, + "loss": 0.0013, + "step": 184630 + }, + { + "epoch": 1.184223606866496, + "grad_norm": 0.014118288643658161, + "learning_rate": 4.267641799018089e-06, + "loss": 0.0012, + "step": 184640 + }, + { + "epoch": 1.184287743760282, + "grad_norm": 0.014202838763594627, + "learning_rate": 4.267088136706888e-06, + "loss": 0.0011, + "step": 184650 + }, + { + "epoch": 1.184351880654068, + "grad_norm": 0.05995582044124603, + "learning_rate": 4.266534483579595e-06, + "loss": 0.0038, + "step": 184660 + }, + { + "epoch": 1.1844160175478542, + "grad_norm": 0.07392224669456482, + "learning_rate": 4.26598083964315e-06, + "loss": 0.0021, + "step": 184670 + }, + { + "epoch": 1.1844801544416401, + "grad_norm": 0.04062545299530029, + "learning_rate": 4.26542720490449e-06, + "loss": 0.0011, + "step": 184680 + }, + { + "epoch": 1.1845442913354263, + "grad_norm": 0.020193586125969887, + "learning_rate": 4.26487357937055e-06, + "loss": 0.0009, + "step": 184690 + }, + { + "epoch": 1.1846084282292124, + "grad_norm": 0.13494987785816193, + "learning_rate": 4.264319963048272e-06, + "loss": 0.0012, + "step": 184700 + }, + { + "epoch": 1.1846725651229986, + "grad_norm": 0.09614096581935883, + "learning_rate": 4.263766355944589e-06, + "loss": 0.001, + "step": 184710 + }, + { + "epoch": 1.1847367020167847, + "grad_norm": 0.10363675653934479, + "learning_rate": 4.263212758066441e-06, + "loss": 0.0015, + "step": 184720 + }, + { + "epoch": 1.1848008389105706, + "grad_norm": 0.1137445792555809, + "learning_rate": 4.262659169420762e-06, + "loss": 0.0014, + "step": 184730 + }, + { + "epoch": 1.1848649758043568, + "grad_norm": 0.06114194169640541, + "learning_rate": 4.262105590014492e-06, + "loss": 0.0008, + "step": 184740 + }, + { + "epoch": 1.184929112698143, + "grad_norm": 0.047655001282691956, + "learning_rate": 4.261552019854566e-06, + "loss": 0.0013, + "step": 184750 + }, + { + "epoch": 1.184993249591929, + "grad_norm": 0.01726294681429863, + "learning_rate": 4.260998458947922e-06, + "loss": 0.003, + "step": 184760 + }, + { + "epoch": 1.185057386485715, + "grad_norm": 0.031104477122426033, + "learning_rate": 4.260444907301494e-06, + "loss": 0.0021, + "step": 184770 + }, + { + "epoch": 1.1851215233795012, + "grad_norm": 0.037550829350948334, + "learning_rate": 4.259891364922221e-06, + "loss": 0.0007, + "step": 184780 + }, + { + "epoch": 1.1851856602732873, + "grad_norm": 0.13632871210575104, + "learning_rate": 4.259337831817037e-06, + "loss": 0.0016, + "step": 184790 + }, + { + "epoch": 1.1852497971670735, + "grad_norm": 0.017351537942886353, + "learning_rate": 4.25878430799288e-06, + "loss": 0.0023, + "step": 184800 + }, + { + "epoch": 1.1853139340608596, + "grad_norm": 0.11563106626272202, + "learning_rate": 4.258230793456685e-06, + "loss": 0.0012, + "step": 184810 + }, + { + "epoch": 1.1853780709546455, + "grad_norm": 0.059192679822444916, + "learning_rate": 4.257677288215388e-06, + "loss": 0.0024, + "step": 184820 + }, + { + "epoch": 1.1854422078484317, + "grad_norm": 0.09861285239458084, + "learning_rate": 4.257123792275925e-06, + "loss": 0.0016, + "step": 184830 + }, + { + "epoch": 1.1855063447422178, + "grad_norm": 0.32034093141555786, + "learning_rate": 4.256570305645233e-06, + "loss": 0.0026, + "step": 184840 + }, + { + "epoch": 1.185570481636004, + "grad_norm": 0.19994030892848969, + "learning_rate": 4.256016828330245e-06, + "loss": 0.0023, + "step": 184850 + }, + { + "epoch": 1.18563461852979, + "grad_norm": 0.12482646107673645, + "learning_rate": 4.255463360337898e-06, + "loss": 0.0027, + "step": 184860 + }, + { + "epoch": 1.185698755423576, + "grad_norm": 0.17621171474456787, + "learning_rate": 4.254909901675128e-06, + "loss": 0.0011, + "step": 184870 + }, + { + "epoch": 1.1857628923173622, + "grad_norm": 0.0534793920814991, + "learning_rate": 4.254356452348869e-06, + "loss": 0.001, + "step": 184880 + }, + { + "epoch": 1.1858270292111484, + "grad_norm": 0.1162564679980278, + "learning_rate": 4.253803012366058e-06, + "loss": 0.0024, + "step": 184890 + }, + { + "epoch": 1.1858911661049343, + "grad_norm": 0.006866424344480038, + "learning_rate": 4.253249581733626e-06, + "loss": 0.0014, + "step": 184900 + }, + { + "epoch": 1.1859553029987204, + "grad_norm": 0.0665106326341629, + "learning_rate": 4.252696160458513e-06, + "loss": 0.0012, + "step": 184910 + }, + { + "epoch": 1.1860194398925066, + "grad_norm": 0.1312754899263382, + "learning_rate": 4.252142748547649e-06, + "loss": 0.0019, + "step": 184920 + }, + { + "epoch": 1.1860835767862927, + "grad_norm": 0.12573768198490143, + "learning_rate": 4.2515893460079725e-06, + "loss": 0.0022, + "step": 184930 + }, + { + "epoch": 1.1861477136800787, + "grad_norm": 0.04333149641752243, + "learning_rate": 4.251035952846415e-06, + "loss": 0.0012, + "step": 184940 + }, + { + "epoch": 1.1862118505738648, + "grad_norm": 0.02859134040772915, + "learning_rate": 4.250482569069913e-06, + "loss": 0.0022, + "step": 184950 + }, + { + "epoch": 1.186275987467651, + "grad_norm": 0.08747965097427368, + "learning_rate": 4.249929194685401e-06, + "loss": 0.002, + "step": 184960 + }, + { + "epoch": 1.186340124361437, + "grad_norm": 0.005212867632508278, + "learning_rate": 4.24937582969981e-06, + "loss": 0.001, + "step": 184970 + }, + { + "epoch": 1.1864042612552232, + "grad_norm": 0.08888068795204163, + "learning_rate": 4.248822474120078e-06, + "loss": 0.0023, + "step": 184980 + }, + { + "epoch": 1.1864683981490092, + "grad_norm": 0.04227092117071152, + "learning_rate": 4.2482691279531365e-06, + "loss": 0.0016, + "step": 184990 + }, + { + "epoch": 1.1865325350427953, + "grad_norm": 0.11541248857975006, + "learning_rate": 4.247715791205921e-06, + "loss": 0.0016, + "step": 185000 + }, + { + "epoch": 1.1865966719365815, + "grad_norm": 0.27483734488487244, + "learning_rate": 4.247162463885363e-06, + "loss": 0.0013, + "step": 185010 + }, + { + "epoch": 1.1866608088303676, + "grad_norm": 0.04213074594736099, + "learning_rate": 4.246609145998399e-06, + "loss": 0.0022, + "step": 185020 + }, + { + "epoch": 1.1867249457241535, + "grad_norm": 0.048484332859516144, + "learning_rate": 4.2460558375519585e-06, + "loss": 0.0038, + "step": 185030 + }, + { + "epoch": 1.1867890826179397, + "grad_norm": 0.05892828106880188, + "learning_rate": 4.2455025385529795e-06, + "loss": 0.0015, + "step": 185040 + }, + { + "epoch": 1.1868532195117258, + "grad_norm": 0.030466919764876366, + "learning_rate": 4.2449492490083924e-06, + "loss": 0.0016, + "step": 185050 + }, + { + "epoch": 1.186917356405512, + "grad_norm": 0.07284680008888245, + "learning_rate": 4.24439596892513e-06, + "loss": 0.0012, + "step": 185060 + }, + { + "epoch": 1.1869814932992981, + "grad_norm": 0.03833737596869469, + "learning_rate": 4.243842698310126e-06, + "loss": 0.001, + "step": 185070 + }, + { + "epoch": 1.187045630193084, + "grad_norm": 0.1180029883980751, + "learning_rate": 4.243289437170316e-06, + "loss": 0.0016, + "step": 185080 + }, + { + "epoch": 1.1871097670868702, + "grad_norm": 0.10787667334079742, + "learning_rate": 4.242736185512627e-06, + "loss": 0.002, + "step": 185090 + }, + { + "epoch": 1.1871739039806564, + "grad_norm": 0.11979163438081741, + "learning_rate": 4.2421829433439965e-06, + "loss": 0.0014, + "step": 185100 + }, + { + "epoch": 1.1872380408744423, + "grad_norm": 0.28249624371528625, + "learning_rate": 4.241629710671355e-06, + "loss": 0.0021, + "step": 185110 + }, + { + "epoch": 1.1873021777682284, + "grad_norm": 0.12857112288475037, + "learning_rate": 4.241076487501634e-06, + "loss": 0.0019, + "step": 185120 + }, + { + "epoch": 1.1873663146620146, + "grad_norm": 0.031790681183338165, + "learning_rate": 4.240523273841768e-06, + "loss": 0.0037, + "step": 185130 + }, + { + "epoch": 1.1874304515558007, + "grad_norm": 0.16708554327487946, + "learning_rate": 4.239970069698688e-06, + "loss": 0.0016, + "step": 185140 + }, + { + "epoch": 1.1874945884495869, + "grad_norm": 0.07488280534744263, + "learning_rate": 4.239416875079327e-06, + "loss": 0.0012, + "step": 185150 + }, + { + "epoch": 1.1875587253433728, + "grad_norm": 0.005816930904984474, + "learning_rate": 4.238863689990615e-06, + "loss": 0.0014, + "step": 185160 + }, + { + "epoch": 1.187622862237159, + "grad_norm": 0.053667064756155014, + "learning_rate": 4.238310514439487e-06, + "loss": 0.001, + "step": 185170 + }, + { + "epoch": 1.187686999130945, + "grad_norm": 0.13718998432159424, + "learning_rate": 4.23775734843287e-06, + "loss": 0.0021, + "step": 185180 + }, + { + "epoch": 1.1877511360247313, + "grad_norm": 0.06031809374690056, + "learning_rate": 4.2372041919777e-06, + "loss": 0.0015, + "step": 185190 + }, + { + "epoch": 1.1878152729185172, + "grad_norm": 0.11881702393293381, + "learning_rate": 4.236651045080905e-06, + "loss": 0.0021, + "step": 185200 + }, + { + "epoch": 1.1878794098123033, + "grad_norm": 0.06994739919900894, + "learning_rate": 4.236097907749419e-06, + "loss": 0.0016, + "step": 185210 + }, + { + "epoch": 1.1879435467060895, + "grad_norm": 0.13131435215473175, + "learning_rate": 4.2355447799901725e-06, + "loss": 0.0009, + "step": 185220 + }, + { + "epoch": 1.1880076835998756, + "grad_norm": 0.019481072202324867, + "learning_rate": 4.234991661810096e-06, + "loss": 0.0022, + "step": 185230 + }, + { + "epoch": 1.1880718204936618, + "grad_norm": 0.08921419829130173, + "learning_rate": 4.2344385532161205e-06, + "loss": 0.0022, + "step": 185240 + }, + { + "epoch": 1.1881359573874477, + "grad_norm": 0.09565063565969467, + "learning_rate": 4.233885454215178e-06, + "loss": 0.0019, + "step": 185250 + }, + { + "epoch": 1.1882000942812339, + "grad_norm": 0.13402371108531952, + "learning_rate": 4.233332364814197e-06, + "loss": 0.001, + "step": 185260 + }, + { + "epoch": 1.18826423117502, + "grad_norm": 0.18935437500476837, + "learning_rate": 4.232779285020109e-06, + "loss": 0.0011, + "step": 185270 + }, + { + "epoch": 1.1883283680688062, + "grad_norm": 0.00180476950481534, + "learning_rate": 4.232226214839845e-06, + "loss": 0.0005, + "step": 185280 + }, + { + "epoch": 1.188392504962592, + "grad_norm": 0.11445319652557373, + "learning_rate": 4.231673154280336e-06, + "loss": 0.0024, + "step": 185290 + }, + { + "epoch": 1.1884566418563782, + "grad_norm": 0.07702426612377167, + "learning_rate": 4.231120103348512e-06, + "loss": 0.0008, + "step": 185300 + }, + { + "epoch": 1.1885207787501644, + "grad_norm": 0.1344224214553833, + "learning_rate": 4.230567062051301e-06, + "loss": 0.0011, + "step": 185310 + }, + { + "epoch": 1.1885849156439505, + "grad_norm": 0.1541663408279419, + "learning_rate": 4.230014030395637e-06, + "loss": 0.0015, + "step": 185320 + }, + { + "epoch": 1.1886490525377365, + "grad_norm": 0.06729359924793243, + "learning_rate": 4.229461008388446e-06, + "loss": 0.0006, + "step": 185330 + }, + { + "epoch": 1.1887131894315226, + "grad_norm": 0.09541870653629303, + "learning_rate": 4.22890799603666e-06, + "loss": 0.0026, + "step": 185340 + }, + { + "epoch": 1.1887773263253087, + "grad_norm": 0.11226841062307358, + "learning_rate": 4.228354993347207e-06, + "loss": 0.0019, + "step": 185350 + }, + { + "epoch": 1.188841463219095, + "grad_norm": 0.05591916665434837, + "learning_rate": 4.227802000327018e-06, + "loss": 0.0011, + "step": 185360 + }, + { + "epoch": 1.1889056001128808, + "grad_norm": 0.027111517265439034, + "learning_rate": 4.227249016983021e-06, + "loss": 0.001, + "step": 185370 + }, + { + "epoch": 1.188969737006667, + "grad_norm": 0.11823323369026184, + "learning_rate": 4.226696043322145e-06, + "loss": 0.0014, + "step": 185380 + }, + { + "epoch": 1.1890338739004531, + "grad_norm": 0.06831841170787811, + "learning_rate": 4.2261430793513215e-06, + "loss": 0.0017, + "step": 185390 + }, + { + "epoch": 1.1890980107942393, + "grad_norm": 0.17060433328151703, + "learning_rate": 4.225590125077479e-06, + "loss": 0.0013, + "step": 185400 + }, + { + "epoch": 1.1891621476880254, + "grad_norm": 0.063255675137043, + "learning_rate": 4.2250371805075455e-06, + "loss": 0.0025, + "step": 185410 + }, + { + "epoch": 1.1892262845818113, + "grad_norm": 0.04975636675953865, + "learning_rate": 4.224484245648451e-06, + "loss": 0.0016, + "step": 185420 + }, + { + "epoch": 1.1892904214755975, + "grad_norm": 0.12806348502635956, + "learning_rate": 4.223931320507121e-06, + "loss": 0.0046, + "step": 185430 + }, + { + "epoch": 1.1893545583693836, + "grad_norm": 0.047647055238485336, + "learning_rate": 4.223378405090487e-06, + "loss": 0.0017, + "step": 185440 + }, + { + "epoch": 1.1894186952631698, + "grad_norm": 0.09078560769557953, + "learning_rate": 4.222825499405477e-06, + "loss": 0.0015, + "step": 185450 + }, + { + "epoch": 1.1894828321569557, + "grad_norm": 0.05512853339314461, + "learning_rate": 4.222272603459019e-06, + "loss": 0.0011, + "step": 185460 + }, + { + "epoch": 1.1895469690507419, + "grad_norm": 0.23027078807353973, + "learning_rate": 4.22171971725804e-06, + "loss": 0.0026, + "step": 185470 + }, + { + "epoch": 1.189611105944528, + "grad_norm": 0.10206654667854309, + "learning_rate": 4.221166840809472e-06, + "loss": 0.0022, + "step": 185480 + }, + { + "epoch": 1.1896752428383142, + "grad_norm": 0.0034956876188516617, + "learning_rate": 4.220613974120237e-06, + "loss": 0.0014, + "step": 185490 + }, + { + "epoch": 1.1897393797321003, + "grad_norm": 0.011971892789006233, + "learning_rate": 4.220061117197268e-06, + "loss": 0.0029, + "step": 185500 + }, + { + "epoch": 1.1898035166258862, + "grad_norm": 0.2612984776496887, + "learning_rate": 4.219508270047489e-06, + "loss": 0.0018, + "step": 185510 + }, + { + "epoch": 1.1898676535196724, + "grad_norm": 0.045755352824926376, + "learning_rate": 4.21895543267783e-06, + "loss": 0.0015, + "step": 185520 + }, + { + "epoch": 1.1899317904134585, + "grad_norm": 0.13760651648044586, + "learning_rate": 4.218402605095218e-06, + "loss": 0.001, + "step": 185530 + }, + { + "epoch": 1.1899959273072445, + "grad_norm": 0.03199196606874466, + "learning_rate": 4.2178497873065784e-06, + "loss": 0.0014, + "step": 185540 + }, + { + "epoch": 1.1900600642010306, + "grad_norm": 0.11631520092487335, + "learning_rate": 4.217296979318841e-06, + "loss": 0.0014, + "step": 185550 + }, + { + "epoch": 1.1901242010948168, + "grad_norm": 0.15202581882476807, + "learning_rate": 4.216744181138932e-06, + "loss": 0.0021, + "step": 185560 + }, + { + "epoch": 1.190188337988603, + "grad_norm": 0.15063878893852234, + "learning_rate": 4.216191392773779e-06, + "loss": 0.0026, + "step": 185570 + }, + { + "epoch": 1.190252474882389, + "grad_norm": 0.20501667261123657, + "learning_rate": 4.215638614230306e-06, + "loss": 0.0011, + "step": 185580 + }, + { + "epoch": 1.190316611776175, + "grad_norm": 0.009964863769710064, + "learning_rate": 4.215085845515444e-06, + "loss": 0.0021, + "step": 185590 + }, + { + "epoch": 1.1903807486699611, + "grad_norm": 0.097783662378788, + "learning_rate": 4.214533086636115e-06, + "loss": 0.0022, + "step": 185600 + }, + { + "epoch": 1.1904448855637473, + "grad_norm": 0.20840519666671753, + "learning_rate": 4.21398033759925e-06, + "loss": 0.0021, + "step": 185610 + }, + { + "epoch": 1.1905090224575334, + "grad_norm": 0.2879226505756378, + "learning_rate": 4.213427598411771e-06, + "loss": 0.0024, + "step": 185620 + }, + { + "epoch": 1.1905731593513194, + "grad_norm": 0.00205128057859838, + "learning_rate": 4.212874869080609e-06, + "loss": 0.0013, + "step": 185630 + }, + { + "epoch": 1.1906372962451055, + "grad_norm": 0.26543793082237244, + "learning_rate": 4.2123221496126844e-06, + "loss": 0.001, + "step": 185640 + }, + { + "epoch": 1.1907014331388917, + "grad_norm": 0.14912432432174683, + "learning_rate": 4.211769440014929e-06, + "loss": 0.0012, + "step": 185650 + }, + { + "epoch": 1.1907655700326778, + "grad_norm": 0.15746237337589264, + "learning_rate": 4.211216740294264e-06, + "loss": 0.0012, + "step": 185660 + }, + { + "epoch": 1.190829706926464, + "grad_norm": 0.0665852278470993, + "learning_rate": 4.210664050457618e-06, + "loss": 0.0011, + "step": 185670 + }, + { + "epoch": 1.1908938438202499, + "grad_norm": 0.12235524505376816, + "learning_rate": 4.210111370511915e-06, + "loss": 0.002, + "step": 185680 + }, + { + "epoch": 1.190957980714036, + "grad_norm": 0.06834182143211365, + "learning_rate": 4.20955870046408e-06, + "loss": 0.0011, + "step": 185690 + }, + { + "epoch": 1.1910221176078222, + "grad_norm": 0.029557999223470688, + "learning_rate": 4.2090060403210405e-06, + "loss": 0.0015, + "step": 185700 + }, + { + "epoch": 1.1910862545016083, + "grad_norm": 0.09428150951862335, + "learning_rate": 4.20845339008972e-06, + "loss": 0.0019, + "step": 185710 + }, + { + "epoch": 1.1911503913953942, + "grad_norm": 0.05321458354592323, + "learning_rate": 4.207900749777046e-06, + "loss": 0.0009, + "step": 185720 + }, + { + "epoch": 1.1912145282891804, + "grad_norm": 0.10113570839166641, + "learning_rate": 4.207348119389941e-06, + "loss": 0.0015, + "step": 185730 + }, + { + "epoch": 1.1912786651829665, + "grad_norm": 0.06574378907680511, + "learning_rate": 4.206795498935331e-06, + "loss": 0.0013, + "step": 185740 + }, + { + "epoch": 1.1913428020767527, + "grad_norm": 0.06594528257846832, + "learning_rate": 4.206242888420139e-06, + "loss": 0.0009, + "step": 185750 + }, + { + "epoch": 1.1914069389705388, + "grad_norm": 0.03930355980992317, + "learning_rate": 4.205690287851291e-06, + "loss": 0.0023, + "step": 185760 + }, + { + "epoch": 1.1914710758643248, + "grad_norm": 0.14651933312416077, + "learning_rate": 4.205137697235712e-06, + "loss": 0.0017, + "step": 185770 + }, + { + "epoch": 1.191535212758111, + "grad_norm": 0.02262321673333645, + "learning_rate": 4.204585116580325e-06, + "loss": 0.0008, + "step": 185780 + }, + { + "epoch": 1.191599349651897, + "grad_norm": 0.00292450119741261, + "learning_rate": 4.204032545892056e-06, + "loss": 0.0014, + "step": 185790 + }, + { + "epoch": 1.191663486545683, + "grad_norm": 0.1417255699634552, + "learning_rate": 4.203479985177827e-06, + "loss": 0.0025, + "step": 185800 + }, + { + "epoch": 1.1917276234394691, + "grad_norm": 0.030075931921601295, + "learning_rate": 4.202927434444564e-06, + "loss": 0.0018, + "step": 185810 + }, + { + "epoch": 1.1917917603332553, + "grad_norm": 0.10921056568622589, + "learning_rate": 4.2023748936991885e-06, + "loss": 0.0015, + "step": 185820 + }, + { + "epoch": 1.1918558972270414, + "grad_norm": 0.1951446533203125, + "learning_rate": 4.2018223629486275e-06, + "loss": 0.0018, + "step": 185830 + }, + { + "epoch": 1.1919200341208276, + "grad_norm": 0.1522318720817566, + "learning_rate": 4.201269842199801e-06, + "loss": 0.001, + "step": 185840 + }, + { + "epoch": 1.1919841710146135, + "grad_norm": 0.07772860676050186, + "learning_rate": 4.200717331459636e-06, + "loss": 0.0012, + "step": 185850 + }, + { + "epoch": 1.1920483079083997, + "grad_norm": 0.13243059813976288, + "learning_rate": 4.200164830735053e-06, + "loss": 0.0027, + "step": 185860 + }, + { + "epoch": 1.1921124448021858, + "grad_norm": 0.09397582709789276, + "learning_rate": 4.1996123400329765e-06, + "loss": 0.0021, + "step": 185870 + }, + { + "epoch": 1.192176581695972, + "grad_norm": 0.06552623957395554, + "learning_rate": 4.199059859360329e-06, + "loss": 0.0018, + "step": 185880 + }, + { + "epoch": 1.1922407185897579, + "grad_norm": 0.015774687752127647, + "learning_rate": 4.198507388724034e-06, + "loss": 0.0006, + "step": 185890 + }, + { + "epoch": 1.192304855483544, + "grad_norm": 0.12974444031715393, + "learning_rate": 4.197954928131015e-06, + "loss": 0.0031, + "step": 185900 + }, + { + "epoch": 1.1923689923773302, + "grad_norm": 0.08671166747808456, + "learning_rate": 4.1974024775881935e-06, + "loss": 0.002, + "step": 185910 + }, + { + "epoch": 1.1924331292711163, + "grad_norm": 0.07681435346603394, + "learning_rate": 4.196850037102492e-06, + "loss": 0.0014, + "step": 185920 + }, + { + "epoch": 1.1924972661649025, + "grad_norm": 0.1029948741197586, + "learning_rate": 4.196297606680835e-06, + "loss": 0.0027, + "step": 185930 + }, + { + "epoch": 1.1925614030586884, + "grad_norm": 0.12408502399921417, + "learning_rate": 4.195745186330142e-06, + "loss": 0.0021, + "step": 185940 + }, + { + "epoch": 1.1926255399524746, + "grad_norm": 0.19858390092849731, + "learning_rate": 4.1951927760573375e-06, + "loss": 0.0022, + "step": 185950 + }, + { + "epoch": 1.1926896768462607, + "grad_norm": 0.030382227152585983, + "learning_rate": 4.194640375869342e-06, + "loss": 0.0012, + "step": 185960 + }, + { + "epoch": 1.1927538137400466, + "grad_norm": 0.15151441097259521, + "learning_rate": 4.194087985773078e-06, + "loss": 0.0025, + "step": 185970 + }, + { + "epoch": 1.1928179506338328, + "grad_norm": 0.08990645408630371, + "learning_rate": 4.193535605775468e-06, + "loss": 0.002, + "step": 185980 + }, + { + "epoch": 1.192882087527619, + "grad_norm": 0.1135379821062088, + "learning_rate": 4.192983235883433e-06, + "loss": 0.001, + "step": 185990 + }, + { + "epoch": 1.192946224421405, + "grad_norm": 0.07216603308916092, + "learning_rate": 4.192430876103896e-06, + "loss": 0.0009, + "step": 186000 + }, + { + "epoch": 1.1930103613151912, + "grad_norm": 0.0714292898774147, + "learning_rate": 4.191878526443776e-06, + "loss": 0.0016, + "step": 186010 + }, + { + "epoch": 1.1930744982089772, + "grad_norm": 0.00590480025857687, + "learning_rate": 4.191326186909996e-06, + "loss": 0.0013, + "step": 186020 + }, + { + "epoch": 1.1931386351027633, + "grad_norm": 0.0996394157409668, + "learning_rate": 4.190773857509476e-06, + "loss": 0.002, + "step": 186030 + }, + { + "epoch": 1.1932027719965494, + "grad_norm": 0.2634541988372803, + "learning_rate": 4.19022153824914e-06, + "loss": 0.0042, + "step": 186040 + }, + { + "epoch": 1.1932669088903356, + "grad_norm": 0.23692826926708221, + "learning_rate": 4.189669229135904e-06, + "loss": 0.0022, + "step": 186050 + }, + { + "epoch": 1.1933310457841215, + "grad_norm": 0.2381436675786972, + "learning_rate": 4.189116930176694e-06, + "loss": 0.0025, + "step": 186060 + }, + { + "epoch": 1.1933951826779077, + "grad_norm": 0.06733310967683792, + "learning_rate": 4.188564641378427e-06, + "loss": 0.001, + "step": 186070 + }, + { + "epoch": 1.1934593195716938, + "grad_norm": 0.030237965285778046, + "learning_rate": 4.188012362748026e-06, + "loss": 0.0018, + "step": 186080 + }, + { + "epoch": 1.19352345646548, + "grad_norm": 0.29854798316955566, + "learning_rate": 4.18746009429241e-06, + "loss": 0.0014, + "step": 186090 + }, + { + "epoch": 1.1935875933592661, + "grad_norm": 0.10096389055252075, + "learning_rate": 4.1869078360185e-06, + "loss": 0.0027, + "step": 186100 + }, + { + "epoch": 1.193651730253052, + "grad_norm": 0.06695820391178131, + "learning_rate": 4.186355587933215e-06, + "loss": 0.0007, + "step": 186110 + }, + { + "epoch": 1.1937158671468382, + "grad_norm": 0.0895819142460823, + "learning_rate": 4.185803350043476e-06, + "loss": 0.0025, + "step": 186120 + }, + { + "epoch": 1.1937800040406243, + "grad_norm": 0.13711774349212646, + "learning_rate": 4.185251122356204e-06, + "loss": 0.0014, + "step": 186130 + }, + { + "epoch": 1.1938441409344105, + "grad_norm": 0.31235578656196594, + "learning_rate": 4.184698904878316e-06, + "loss": 0.0023, + "step": 186140 + }, + { + "epoch": 1.1939082778281964, + "grad_norm": 0.05649501457810402, + "learning_rate": 4.184146697616735e-06, + "loss": 0.0024, + "step": 186150 + }, + { + "epoch": 1.1939724147219826, + "grad_norm": 0.015902062878012657, + "learning_rate": 4.183594500578379e-06, + "loss": 0.0011, + "step": 186160 + }, + { + "epoch": 1.1940365516157687, + "grad_norm": 0.06633001565933228, + "learning_rate": 4.183042313770166e-06, + "loss": 0.0018, + "step": 186170 + }, + { + "epoch": 1.1941006885095549, + "grad_norm": 0.032952580600976944, + "learning_rate": 4.1824901371990175e-06, + "loss": 0.0013, + "step": 186180 + }, + { + "epoch": 1.194164825403341, + "grad_norm": 0.10262813419103622, + "learning_rate": 4.181937970871852e-06, + "loss": 0.0016, + "step": 186190 + }, + { + "epoch": 1.194228962297127, + "grad_norm": 0.10096637904644012, + "learning_rate": 4.181385814795589e-06, + "loss": 0.0025, + "step": 186200 + }, + { + "epoch": 1.194293099190913, + "grad_norm": 0.03392057120800018, + "learning_rate": 4.180833668977145e-06, + "loss": 0.0024, + "step": 186210 + }, + { + "epoch": 1.1943572360846992, + "grad_norm": 0.0377911701798439, + "learning_rate": 4.180281533423444e-06, + "loss": 0.0018, + "step": 186220 + }, + { + "epoch": 1.1944213729784852, + "grad_norm": 0.16003689169883728, + "learning_rate": 4.179729408141398e-06, + "loss": 0.0019, + "step": 186230 + }, + { + "epoch": 1.1944855098722713, + "grad_norm": 0.08728373795747757, + "learning_rate": 4.179177293137929e-06, + "loss": 0.0011, + "step": 186240 + }, + { + "epoch": 1.1945496467660575, + "grad_norm": 0.06210342422127724, + "learning_rate": 4.178625188419957e-06, + "loss": 0.0014, + "step": 186250 + }, + { + "epoch": 1.1946137836598436, + "grad_norm": 0.17774981260299683, + "learning_rate": 4.178073093994397e-06, + "loss": 0.0014, + "step": 186260 + }, + { + "epoch": 1.1946779205536298, + "grad_norm": 0.013091071508824825, + "learning_rate": 4.177521009868169e-06, + "loss": 0.0018, + "step": 186270 + }, + { + "epoch": 1.1947420574474157, + "grad_norm": 0.04876753315329552, + "learning_rate": 4.17696893604819e-06, + "loss": 0.0012, + "step": 186280 + }, + { + "epoch": 1.1948061943412018, + "grad_norm": 0.023355981335043907, + "learning_rate": 4.176416872541379e-06, + "loss": 0.0009, + "step": 186290 + }, + { + "epoch": 1.194870331234988, + "grad_norm": 0.052153367549180984, + "learning_rate": 4.175864819354653e-06, + "loss": 0.0016, + "step": 186300 + }, + { + "epoch": 1.1949344681287741, + "grad_norm": 0.10881591588258743, + "learning_rate": 4.175312776494931e-06, + "loss": 0.0037, + "step": 186310 + }, + { + "epoch": 1.19499860502256, + "grad_norm": 0.1673198789358139, + "learning_rate": 4.1747607439691275e-06, + "loss": 0.0014, + "step": 186320 + }, + { + "epoch": 1.1950627419163462, + "grad_norm": 0.043181002140045166, + "learning_rate": 4.174208721784162e-06, + "loss": 0.0009, + "step": 186330 + }, + { + "epoch": 1.1951268788101324, + "grad_norm": 0.048188578337430954, + "learning_rate": 4.173656709946952e-06, + "loss": 0.002, + "step": 186340 + }, + { + "epoch": 1.1951910157039185, + "grad_norm": 0.07603321224451065, + "learning_rate": 4.173104708464414e-06, + "loss": 0.0016, + "step": 186350 + }, + { + "epoch": 1.1952551525977047, + "grad_norm": 0.04499555751681328, + "learning_rate": 4.172552717343464e-06, + "loss": 0.001, + "step": 186360 + }, + { + "epoch": 1.1953192894914906, + "grad_norm": 0.08442261815071106, + "learning_rate": 4.172000736591022e-06, + "loss": 0.0017, + "step": 186370 + }, + { + "epoch": 1.1953834263852767, + "grad_norm": 0.2813013195991516, + "learning_rate": 4.171448766214e-06, + "loss": 0.0014, + "step": 186380 + }, + { + "epoch": 1.1954475632790629, + "grad_norm": 0.038745105266571045, + "learning_rate": 4.170896806219319e-06, + "loss": 0.0023, + "step": 186390 + }, + { + "epoch": 1.195511700172849, + "grad_norm": 0.006805906537920237, + "learning_rate": 4.170344856613893e-06, + "loss": 0.0025, + "step": 186400 + }, + { + "epoch": 1.195575837066635, + "grad_norm": 0.04771881923079491, + "learning_rate": 4.169792917404638e-06, + "loss": 0.0012, + "step": 186410 + }, + { + "epoch": 1.195639973960421, + "grad_norm": 0.20832189917564392, + "learning_rate": 4.169240988598472e-06, + "loss": 0.0012, + "step": 186420 + }, + { + "epoch": 1.1957041108542072, + "grad_norm": 0.04566992074251175, + "learning_rate": 4.1686890702023095e-06, + "loss": 0.0015, + "step": 186430 + }, + { + "epoch": 1.1957682477479934, + "grad_norm": 0.0640728622674942, + "learning_rate": 4.1681371622230685e-06, + "loss": 0.003, + "step": 186440 + }, + { + "epoch": 1.1958323846417793, + "grad_norm": 0.09161300212144852, + "learning_rate": 4.1675852646676615e-06, + "loss": 0.0019, + "step": 186450 + }, + { + "epoch": 1.1958965215355655, + "grad_norm": 0.03013503924012184, + "learning_rate": 4.167033377543008e-06, + "loss": 0.0007, + "step": 186460 + }, + { + "epoch": 1.1959606584293516, + "grad_norm": 0.1995771825313568, + "learning_rate": 4.166481500856019e-06, + "loss": 0.0012, + "step": 186470 + }, + { + "epoch": 1.1960247953231378, + "grad_norm": 0.0016916844760999084, + "learning_rate": 4.165929634613615e-06, + "loss": 0.0011, + "step": 186480 + }, + { + "epoch": 1.1960889322169237, + "grad_norm": 0.11496210843324661, + "learning_rate": 4.1653777788227075e-06, + "loss": 0.003, + "step": 186490 + }, + { + "epoch": 1.1961530691107098, + "grad_norm": 0.06641450524330139, + "learning_rate": 4.164825933490214e-06, + "loss": 0.0013, + "step": 186500 + }, + { + "epoch": 1.196217206004496, + "grad_norm": 0.12112980335950851, + "learning_rate": 4.164274098623047e-06, + "loss": 0.0019, + "step": 186510 + }, + { + "epoch": 1.1962813428982821, + "grad_norm": 0.08294852077960968, + "learning_rate": 4.163722274228124e-06, + "loss": 0.001, + "step": 186520 + }, + { + "epoch": 1.1963454797920683, + "grad_norm": 0.09129433333873749, + "learning_rate": 4.163170460312358e-06, + "loss": 0.0024, + "step": 186530 + }, + { + "epoch": 1.1964096166858542, + "grad_norm": 0.159889355301857, + "learning_rate": 4.162618656882663e-06, + "loss": 0.0011, + "step": 186540 + }, + { + "epoch": 1.1964737535796404, + "grad_norm": 0.045074403285980225, + "learning_rate": 4.162066863945957e-06, + "loss": 0.0009, + "step": 186550 + }, + { + "epoch": 1.1965378904734265, + "grad_norm": 0.092170350253582, + "learning_rate": 4.161515081509151e-06, + "loss": 0.0007, + "step": 186560 + }, + { + "epoch": 1.1966020273672127, + "grad_norm": 0.009705310687422752, + "learning_rate": 4.160963309579161e-06, + "loss": 0.002, + "step": 186570 + }, + { + "epoch": 1.1966661642609986, + "grad_norm": 0.06332354247570038, + "learning_rate": 4.160411548162899e-06, + "loss": 0.0012, + "step": 186580 + }, + { + "epoch": 1.1967303011547847, + "grad_norm": 0.1114995926618576, + "learning_rate": 4.159859797267282e-06, + "loss": 0.0011, + "step": 186590 + }, + { + "epoch": 1.1967944380485709, + "grad_norm": 0.04476296901702881, + "learning_rate": 4.15930805689922e-06, + "loss": 0.0014, + "step": 186600 + }, + { + "epoch": 1.196858574942357, + "grad_norm": 0.043185412883758545, + "learning_rate": 4.15875632706563e-06, + "loss": 0.0017, + "step": 186610 + }, + { + "epoch": 1.1969227118361432, + "grad_norm": 0.09429524093866348, + "learning_rate": 4.158204607773425e-06, + "loss": 0.0017, + "step": 186620 + }, + { + "epoch": 1.196986848729929, + "grad_norm": 0.053793590515851974, + "learning_rate": 4.157652899029518e-06, + "loss": 0.0017, + "step": 186630 + }, + { + "epoch": 1.1970509856237153, + "grad_norm": 0.1297648549079895, + "learning_rate": 4.157101200840821e-06, + "loss": 0.0018, + "step": 186640 + }, + { + "epoch": 1.1971151225175014, + "grad_norm": 0.021457897499203682, + "learning_rate": 4.15654951321425e-06, + "loss": 0.0008, + "step": 186650 + }, + { + "epoch": 1.1971792594112873, + "grad_norm": 0.06702035665512085, + "learning_rate": 4.1559978361567145e-06, + "loss": 0.001, + "step": 186660 + }, + { + "epoch": 1.1972433963050735, + "grad_norm": 0.08455851674079895, + "learning_rate": 4.15544616967513e-06, + "loss": 0.0019, + "step": 186670 + }, + { + "epoch": 1.1973075331988596, + "grad_norm": 0.13208948075771332, + "learning_rate": 4.15489451377641e-06, + "loss": 0.0013, + "step": 186680 + }, + { + "epoch": 1.1973716700926458, + "grad_norm": 0.08098907023668289, + "learning_rate": 4.154342868467463e-06, + "loss": 0.001, + "step": 186690 + }, + { + "epoch": 1.197435806986432, + "grad_norm": 0.07625220715999603, + "learning_rate": 4.153791233755207e-06, + "loss": 0.001, + "step": 186700 + }, + { + "epoch": 1.1974999438802179, + "grad_norm": 0.13422337174415588, + "learning_rate": 4.15323960964655e-06, + "loss": 0.0012, + "step": 186710 + }, + { + "epoch": 1.197564080774004, + "grad_norm": 0.07337094098329544, + "learning_rate": 4.152687996148407e-06, + "loss": 0.0021, + "step": 186720 + }, + { + "epoch": 1.1976282176677902, + "grad_norm": 0.11599288135766983, + "learning_rate": 4.152136393267688e-06, + "loss": 0.0023, + "step": 186730 + }, + { + "epoch": 1.1976923545615763, + "grad_norm": 0.045450177043676376, + "learning_rate": 4.151584801011306e-06, + "loss": 0.0024, + "step": 186740 + }, + { + "epoch": 1.1977564914553622, + "grad_norm": 0.13330058753490448, + "learning_rate": 4.1510332193861725e-06, + "loss": 0.0014, + "step": 186750 + }, + { + "epoch": 1.1978206283491484, + "grad_norm": 0.09642498195171356, + "learning_rate": 4.1504816483992005e-06, + "loss": 0.0017, + "step": 186760 + }, + { + "epoch": 1.1978847652429345, + "grad_norm": 0.05989083647727966, + "learning_rate": 4.1499300880573e-06, + "loss": 0.0015, + "step": 186770 + }, + { + "epoch": 1.1979489021367207, + "grad_norm": 0.12496864050626755, + "learning_rate": 4.149378538367383e-06, + "loss": 0.001, + "step": 186780 + }, + { + "epoch": 1.1980130390305068, + "grad_norm": 0.04755425453186035, + "learning_rate": 4.14882699933636e-06, + "loss": 0.0013, + "step": 186790 + }, + { + "epoch": 1.1980771759242927, + "grad_norm": 0.14057539403438568, + "learning_rate": 4.148275470971145e-06, + "loss": 0.0029, + "step": 186800 + }, + { + "epoch": 1.198141312818079, + "grad_norm": 0.14890030026435852, + "learning_rate": 4.147723953278646e-06, + "loss": 0.0012, + "step": 186810 + }, + { + "epoch": 1.198205449711865, + "grad_norm": 0.04012436792254448, + "learning_rate": 4.1471724462657745e-06, + "loss": 0.0008, + "step": 186820 + }, + { + "epoch": 1.1982695866056512, + "grad_norm": 0.16928061842918396, + "learning_rate": 4.1466209499394424e-06, + "loss": 0.0021, + "step": 186830 + }, + { + "epoch": 1.1983337234994371, + "grad_norm": 0.2661837935447693, + "learning_rate": 4.14606946430656e-06, + "loss": 0.001, + "step": 186840 + }, + { + "epoch": 1.1983978603932233, + "grad_norm": 0.09210334718227386, + "learning_rate": 4.145517989374037e-06, + "loss": 0.0034, + "step": 186850 + }, + { + "epoch": 1.1984619972870094, + "grad_norm": 0.02662188746035099, + "learning_rate": 4.1449665251487844e-06, + "loss": 0.0013, + "step": 186860 + }, + { + "epoch": 1.1985261341807956, + "grad_norm": 0.13997481763362885, + "learning_rate": 4.144415071637713e-06, + "loss": 0.0028, + "step": 186870 + }, + { + "epoch": 1.1985902710745815, + "grad_norm": 0.0501549169421196, + "learning_rate": 4.143863628847731e-06, + "loss": 0.0014, + "step": 186880 + }, + { + "epoch": 1.1986544079683676, + "grad_norm": 0.06980713456869125, + "learning_rate": 4.1433121967857505e-06, + "loss": 0.0025, + "step": 186890 + }, + { + "epoch": 1.1987185448621538, + "grad_norm": 0.0924331322312355, + "learning_rate": 4.1427607754586794e-06, + "loss": 0.0017, + "step": 186900 + }, + { + "epoch": 1.19878268175594, + "grad_norm": 0.004991814494132996, + "learning_rate": 4.142209364873428e-06, + "loss": 0.0019, + "step": 186910 + }, + { + "epoch": 1.1988468186497259, + "grad_norm": 0.18857893347740173, + "learning_rate": 4.141657965036907e-06, + "loss": 0.0014, + "step": 186920 + }, + { + "epoch": 1.198910955543512, + "grad_norm": 0.05855089798569679, + "learning_rate": 4.141106575956026e-06, + "loss": 0.0019, + "step": 186930 + }, + { + "epoch": 1.1989750924372982, + "grad_norm": 0.18651893734931946, + "learning_rate": 4.140555197637692e-06, + "loss": 0.0022, + "step": 186940 + }, + { + "epoch": 1.1990392293310843, + "grad_norm": 0.09940574318170547, + "learning_rate": 4.140003830088817e-06, + "loss": 0.001, + "step": 186950 + }, + { + "epoch": 1.1991033662248705, + "grad_norm": 0.0719454437494278, + "learning_rate": 4.139452473316308e-06, + "loss": 0.0009, + "step": 186960 + }, + { + "epoch": 1.1991675031186564, + "grad_norm": 0.06848664581775665, + "learning_rate": 4.138901127327075e-06, + "loss": 0.0025, + "step": 186970 + }, + { + "epoch": 1.1992316400124425, + "grad_norm": 0.03205366060137749, + "learning_rate": 4.138349792128026e-06, + "loss": 0.0022, + "step": 186980 + }, + { + "epoch": 1.1992957769062287, + "grad_norm": 0.06204473599791527, + "learning_rate": 4.137798467726071e-06, + "loss": 0.0021, + "step": 186990 + }, + { + "epoch": 1.1993599138000148, + "grad_norm": 0.17390339076519012, + "learning_rate": 4.137247154128116e-06, + "loss": 0.001, + "step": 187000 + }, + { + "epoch": 1.1994240506938008, + "grad_norm": 0.14126235246658325, + "learning_rate": 4.136695851341071e-06, + "loss": 0.0025, + "step": 187010 + }, + { + "epoch": 1.199488187587587, + "grad_norm": 0.07059001922607422, + "learning_rate": 4.136144559371844e-06, + "loss": 0.0015, + "step": 187020 + }, + { + "epoch": 1.199552324481373, + "grad_norm": 0.022132446989417076, + "learning_rate": 4.135593278227344e-06, + "loss": 0.0009, + "step": 187030 + }, + { + "epoch": 1.1996164613751592, + "grad_norm": 0.0732007697224617, + "learning_rate": 4.135042007914477e-06, + "loss": 0.0011, + "step": 187040 + }, + { + "epoch": 1.1996805982689454, + "grad_norm": 0.007789216935634613, + "learning_rate": 4.134490748440152e-06, + "loss": 0.0019, + "step": 187050 + }, + { + "epoch": 1.1997447351627313, + "grad_norm": 0.047100041061639786, + "learning_rate": 4.133939499811276e-06, + "loss": 0.0006, + "step": 187060 + }, + { + "epoch": 1.1998088720565174, + "grad_norm": 0.06680265069007874, + "learning_rate": 4.133388262034758e-06, + "loss": 0.0017, + "step": 187070 + }, + { + "epoch": 1.1998730089503036, + "grad_norm": 0.04481464624404907, + "learning_rate": 4.132837035117504e-06, + "loss": 0.0015, + "step": 187080 + }, + { + "epoch": 1.1999371458440895, + "grad_norm": 0.050326891243457794, + "learning_rate": 4.1322858190664215e-06, + "loss": 0.0012, + "step": 187090 + }, + { + "epoch": 1.2000012827378757, + "grad_norm": 0.006268339231610298, + "learning_rate": 4.131734613888418e-06, + "loss": 0.0012, + "step": 187100 + }, + { + "epoch": 1.2000654196316618, + "grad_norm": 0.11378975212574005, + "learning_rate": 4.1311834195904e-06, + "loss": 0.0013, + "step": 187110 + }, + { + "epoch": 1.200129556525448, + "grad_norm": 0.08344443142414093, + "learning_rate": 4.130632236179276e-06, + "loss": 0.0007, + "step": 187120 + }, + { + "epoch": 1.200193693419234, + "grad_norm": 0.10512486845254898, + "learning_rate": 4.1300810636619496e-06, + "loss": 0.0008, + "step": 187130 + }, + { + "epoch": 1.20025783031302, + "grad_norm": 0.08657458424568176, + "learning_rate": 4.129529902045331e-06, + "loss": 0.0011, + "step": 187140 + }, + { + "epoch": 1.2003219672068062, + "grad_norm": 0.015405516140162945, + "learning_rate": 4.128978751336322e-06, + "loss": 0.001, + "step": 187150 + }, + { + "epoch": 1.2003861041005923, + "grad_norm": 0.05375182628631592, + "learning_rate": 4.128427611541836e-06, + "loss": 0.0014, + "step": 187160 + }, + { + "epoch": 1.2004502409943785, + "grad_norm": 0.23793500661849976, + "learning_rate": 4.127876482668771e-06, + "loss": 0.0033, + "step": 187170 + }, + { + "epoch": 1.2005143778881644, + "grad_norm": 0.08199360966682434, + "learning_rate": 4.127325364724039e-06, + "loss": 0.0015, + "step": 187180 + }, + { + "epoch": 1.2005785147819505, + "grad_norm": 0.011276515200734138, + "learning_rate": 4.126774257714543e-06, + "loss": 0.0025, + "step": 187190 + }, + { + "epoch": 1.2006426516757367, + "grad_norm": 0.008790786378085613, + "learning_rate": 4.126223161647191e-06, + "loss": 0.0014, + "step": 187200 + }, + { + "epoch": 1.2007067885695228, + "grad_norm": 0.15151667594909668, + "learning_rate": 4.125672076528886e-06, + "loss": 0.0013, + "step": 187210 + }, + { + "epoch": 1.200770925463309, + "grad_norm": 0.03541888669133186, + "learning_rate": 4.125121002366535e-06, + "loss": 0.0012, + "step": 187220 + }, + { + "epoch": 1.200835062357095, + "grad_norm": 0.1084711104631424, + "learning_rate": 4.124569939167043e-06, + "loss": 0.0012, + "step": 187230 + }, + { + "epoch": 1.200899199250881, + "grad_norm": 0.04629380628466606, + "learning_rate": 4.124018886937315e-06, + "loss": 0.0015, + "step": 187240 + }, + { + "epoch": 1.2009633361446672, + "grad_norm": 0.042566780000925064, + "learning_rate": 4.1234678456842575e-06, + "loss": 0.0012, + "step": 187250 + }, + { + "epoch": 1.2010274730384534, + "grad_norm": 0.05979502201080322, + "learning_rate": 4.122916815414773e-06, + "loss": 0.001, + "step": 187260 + }, + { + "epoch": 1.2010916099322393, + "grad_norm": 0.02722802944481373, + "learning_rate": 4.122365796135769e-06, + "loss": 0.0019, + "step": 187270 + }, + { + "epoch": 1.2011557468260254, + "grad_norm": 0.1804477423429489, + "learning_rate": 4.1218147878541485e-06, + "loss": 0.0012, + "step": 187280 + }, + { + "epoch": 1.2012198837198116, + "grad_norm": 0.017981648445129395, + "learning_rate": 4.121263790576816e-06, + "loss": 0.0011, + "step": 187290 + }, + { + "epoch": 1.2012840206135977, + "grad_norm": 0.029202884063124657, + "learning_rate": 4.120712804310676e-06, + "loss": 0.0016, + "step": 187300 + }, + { + "epoch": 1.2013481575073839, + "grad_norm": 0.03807961195707321, + "learning_rate": 4.120161829062633e-06, + "loss": 0.0017, + "step": 187310 + }, + { + "epoch": 1.2014122944011698, + "grad_norm": 0.061058592051267624, + "learning_rate": 4.119610864839592e-06, + "loss": 0.0019, + "step": 187320 + }, + { + "epoch": 1.201476431294956, + "grad_norm": 0.05313233286142349, + "learning_rate": 4.1190599116484554e-06, + "loss": 0.0014, + "step": 187330 + }, + { + "epoch": 1.201540568188742, + "grad_norm": 0.015431663021445274, + "learning_rate": 4.118508969496127e-06, + "loss": 0.0011, + "step": 187340 + }, + { + "epoch": 1.201604705082528, + "grad_norm": 0.023360364139080048, + "learning_rate": 4.117958038389512e-06, + "loss": 0.0009, + "step": 187350 + }, + { + "epoch": 1.2016688419763142, + "grad_norm": 0.20973680913448334, + "learning_rate": 4.117407118335513e-06, + "loss": 0.002, + "step": 187360 + }, + { + "epoch": 1.2017329788701003, + "grad_norm": 0.0811159685254097, + "learning_rate": 4.116856209341034e-06, + "loss": 0.006, + "step": 187370 + }, + { + "epoch": 1.2017971157638865, + "grad_norm": 0.2112804502248764, + "learning_rate": 4.116305311412978e-06, + "loss": 0.0018, + "step": 187380 + }, + { + "epoch": 1.2018612526576726, + "grad_norm": 0.09988492727279663, + "learning_rate": 4.115754424558247e-06, + "loss": 0.001, + "step": 187390 + }, + { + "epoch": 1.2019253895514586, + "grad_norm": 0.18315206468105316, + "learning_rate": 4.115203548783746e-06, + "loss": 0.0026, + "step": 187400 + }, + { + "epoch": 1.2019895264452447, + "grad_norm": 0.07950010895729065, + "learning_rate": 4.114652684096375e-06, + "loss": 0.0022, + "step": 187410 + }, + { + "epoch": 1.2020536633390309, + "grad_norm": 0.12662889063358307, + "learning_rate": 4.114101830503041e-06, + "loss": 0.0021, + "step": 187420 + }, + { + "epoch": 1.202117800232817, + "grad_norm": 0.15062464773654938, + "learning_rate": 4.1135509880106425e-06, + "loss": 0.002, + "step": 187430 + }, + { + "epoch": 1.202181937126603, + "grad_norm": 0.07892768830060959, + "learning_rate": 4.113000156626086e-06, + "loss": 0.0025, + "step": 187440 + }, + { + "epoch": 1.202246074020389, + "grad_norm": 0.4865648150444031, + "learning_rate": 4.112449336356269e-06, + "loss": 0.0038, + "step": 187450 + }, + { + "epoch": 1.2023102109141752, + "grad_norm": 0.03853175416588783, + "learning_rate": 4.111898527208097e-06, + "loss": 0.001, + "step": 187460 + }, + { + "epoch": 1.2023743478079614, + "grad_norm": 0.009135020896792412, + "learning_rate": 4.111347729188471e-06, + "loss": 0.0013, + "step": 187470 + }, + { + "epoch": 1.2024384847017475, + "grad_norm": 0.05706874281167984, + "learning_rate": 4.110796942304294e-06, + "loss": 0.0019, + "step": 187480 + }, + { + "epoch": 1.2025026215955334, + "grad_norm": 0.11223699152469635, + "learning_rate": 4.1102461665624655e-06, + "loss": 0.0015, + "step": 187490 + }, + { + "epoch": 1.2025667584893196, + "grad_norm": 0.17788241803646088, + "learning_rate": 4.1096954019698895e-06, + "loss": 0.002, + "step": 187500 + }, + { + "epoch": 1.2026308953831057, + "grad_norm": 0.042884886264801025, + "learning_rate": 4.109144648533466e-06, + "loss": 0.0015, + "step": 187510 + }, + { + "epoch": 1.2026950322768917, + "grad_norm": 0.1537342518568039, + "learning_rate": 4.108593906260096e-06, + "loss": 0.0019, + "step": 187520 + }, + { + "epoch": 1.2027591691706778, + "grad_norm": 0.12015387415885925, + "learning_rate": 4.108043175156682e-06, + "loss": 0.0015, + "step": 187530 + }, + { + "epoch": 1.202823306064464, + "grad_norm": 0.11079380661249161, + "learning_rate": 4.107492455230124e-06, + "loss": 0.0013, + "step": 187540 + }, + { + "epoch": 1.2028874429582501, + "grad_norm": 0.04511912539601326, + "learning_rate": 4.106941746487325e-06, + "loss": 0.0017, + "step": 187550 + }, + { + "epoch": 1.2029515798520363, + "grad_norm": 0.02514353021979332, + "learning_rate": 4.106391048935183e-06, + "loss": 0.0012, + "step": 187560 + }, + { + "epoch": 1.2030157167458222, + "grad_norm": 0.07930656522512436, + "learning_rate": 4.105840362580601e-06, + "loss": 0.0017, + "step": 187570 + }, + { + "epoch": 1.2030798536396083, + "grad_norm": 0.3285084366798401, + "learning_rate": 4.1052896874304756e-06, + "loss": 0.0006, + "step": 187580 + }, + { + "epoch": 1.2031439905333945, + "grad_norm": 0.058521948754787445, + "learning_rate": 4.1047390234917124e-06, + "loss": 0.0017, + "step": 187590 + }, + { + "epoch": 1.2032081274271806, + "grad_norm": 0.15210366249084473, + "learning_rate": 4.104188370771208e-06, + "loss": 0.0028, + "step": 187600 + }, + { + "epoch": 1.2032722643209666, + "grad_norm": 0.1036456972360611, + "learning_rate": 4.103637729275864e-06, + "loss": 0.0015, + "step": 187610 + }, + { + "epoch": 1.2033364012147527, + "grad_norm": 0.6061391234397888, + "learning_rate": 4.103087099012579e-06, + "loss": 0.0023, + "step": 187620 + }, + { + "epoch": 1.2034005381085389, + "grad_norm": 0.31037306785583496, + "learning_rate": 4.102536479988255e-06, + "loss": 0.0023, + "step": 187630 + }, + { + "epoch": 1.203464675002325, + "grad_norm": 0.08877530694007874, + "learning_rate": 4.101985872209789e-06, + "loss": 0.001, + "step": 187640 + }, + { + "epoch": 1.2035288118961112, + "grad_norm": 0.37374791502952576, + "learning_rate": 4.101435275684083e-06, + "loss": 0.0023, + "step": 187650 + }, + { + "epoch": 1.203592948789897, + "grad_norm": 0.04684571921825409, + "learning_rate": 4.100884690418035e-06, + "loss": 0.0016, + "step": 187660 + }, + { + "epoch": 1.2036570856836832, + "grad_norm": 0.11537224054336548, + "learning_rate": 4.100334116418542e-06, + "loss": 0.0016, + "step": 187670 + }, + { + "epoch": 1.2037212225774694, + "grad_norm": 0.0287192240357399, + "learning_rate": 4.099783553692507e-06, + "loss": 0.0013, + "step": 187680 + }, + { + "epoch": 1.2037853594712555, + "grad_norm": 0.0894940122961998, + "learning_rate": 4.0992330022468285e-06, + "loss": 0.0012, + "step": 187690 + }, + { + "epoch": 1.2038494963650415, + "grad_norm": 0.056172262877225876, + "learning_rate": 4.098682462088403e-06, + "loss": 0.0015, + "step": 187700 + }, + { + "epoch": 1.2039136332588276, + "grad_norm": 0.0038929262664169073, + "learning_rate": 4.098131933224132e-06, + "loss": 0.007, + "step": 187710 + }, + { + "epoch": 1.2039777701526138, + "grad_norm": 0.07283658534288406, + "learning_rate": 4.0975814156609106e-06, + "loss": 0.0026, + "step": 187720 + }, + { + "epoch": 1.2040419070464, + "grad_norm": 0.07869023084640503, + "learning_rate": 4.097030909405641e-06, + "loss": 0.0022, + "step": 187730 + }, + { + "epoch": 1.204106043940186, + "grad_norm": 0.007316358853131533, + "learning_rate": 4.0964804144652176e-06, + "loss": 0.0014, + "step": 187740 + }, + { + "epoch": 1.204170180833972, + "grad_norm": 0.07600446045398712, + "learning_rate": 4.095929930846542e-06, + "loss": 0.0016, + "step": 187750 + }, + { + "epoch": 1.2042343177277581, + "grad_norm": 0.055925771594047546, + "learning_rate": 4.095379458556509e-06, + "loss": 0.0008, + "step": 187760 + }, + { + "epoch": 1.2042984546215443, + "grad_norm": 0.09064169973134995, + "learning_rate": 4.094828997602019e-06, + "loss": 0.0016, + "step": 187770 + }, + { + "epoch": 1.2043625915153302, + "grad_norm": 0.04664687439799309, + "learning_rate": 4.094278547989967e-06, + "loss": 0.0015, + "step": 187780 + }, + { + "epoch": 1.2044267284091164, + "grad_norm": 0.22472380101680756, + "learning_rate": 4.0937281097272545e-06, + "loss": 0.0014, + "step": 187790 + }, + { + "epoch": 1.2044908653029025, + "grad_norm": 0.028136566281318665, + "learning_rate": 4.093177682820775e-06, + "loss": 0.0016, + "step": 187800 + }, + { + "epoch": 1.2045550021966887, + "grad_norm": 0.46360525488853455, + "learning_rate": 4.092627267277427e-06, + "loss": 0.0014, + "step": 187810 + }, + { + "epoch": 1.2046191390904748, + "grad_norm": 0.11265137046575546, + "learning_rate": 4.092076863104109e-06, + "loss": 0.0013, + "step": 187820 + }, + { + "epoch": 1.2046832759842607, + "grad_norm": 0.293995201587677, + "learning_rate": 4.0915264703077154e-06, + "loss": 0.0017, + "step": 187830 + }, + { + "epoch": 1.2047474128780469, + "grad_norm": 0.16387131810188293, + "learning_rate": 4.090976088895145e-06, + "loss": 0.0025, + "step": 187840 + }, + { + "epoch": 1.204811549771833, + "grad_norm": 0.1470591127872467, + "learning_rate": 4.090425718873294e-06, + "loss": 0.0018, + "step": 187850 + }, + { + "epoch": 1.2048756866656192, + "grad_norm": 0.01563205197453499, + "learning_rate": 4.089875360249059e-06, + "loss": 0.0019, + "step": 187860 + }, + { + "epoch": 1.204939823559405, + "grad_norm": 0.08468756824731827, + "learning_rate": 4.089325013029335e-06, + "loss": 0.0012, + "step": 187870 + }, + { + "epoch": 1.2050039604531912, + "grad_norm": 0.09946174174547195, + "learning_rate": 4.0887746772210215e-06, + "loss": 0.0006, + "step": 187880 + }, + { + "epoch": 1.2050680973469774, + "grad_norm": 0.057054974138736725, + "learning_rate": 4.088224352831011e-06, + "loss": 0.0031, + "step": 187890 + }, + { + "epoch": 1.2051322342407635, + "grad_norm": 0.2079000622034073, + "learning_rate": 4.0876740398662015e-06, + "loss": 0.0021, + "step": 187900 + }, + { + "epoch": 1.2051963711345497, + "grad_norm": 0.05940638482570648, + "learning_rate": 4.0871237383334885e-06, + "loss": 0.0021, + "step": 187910 + }, + { + "epoch": 1.2052605080283356, + "grad_norm": 0.028715074062347412, + "learning_rate": 4.086573448239768e-06, + "loss": 0.0014, + "step": 187920 + }, + { + "epoch": 1.2053246449221218, + "grad_norm": 0.23639696836471558, + "learning_rate": 4.086023169591934e-06, + "loss": 0.0016, + "step": 187930 + }, + { + "epoch": 1.205388781815908, + "grad_norm": 0.07437796145677567, + "learning_rate": 4.085472902396882e-06, + "loss": 0.0013, + "step": 187940 + }, + { + "epoch": 1.205452918709694, + "grad_norm": 0.042225006967782974, + "learning_rate": 4.08492264666151e-06, + "loss": 0.0008, + "step": 187950 + }, + { + "epoch": 1.20551705560348, + "grad_norm": 0.1663634181022644, + "learning_rate": 4.08437240239271e-06, + "loss": 0.0012, + "step": 187960 + }, + { + "epoch": 1.2055811924972661, + "grad_norm": 0.08279787749052048, + "learning_rate": 4.083822169597379e-06, + "loss": 0.0011, + "step": 187970 + }, + { + "epoch": 1.2056453293910523, + "grad_norm": 0.07078959792852402, + "learning_rate": 4.08327194828241e-06, + "loss": 0.0038, + "step": 187980 + }, + { + "epoch": 1.2057094662848384, + "grad_norm": 0.013743606396019459, + "learning_rate": 4.082721738454701e-06, + "loss": 0.0018, + "step": 187990 + }, + { + "epoch": 1.2057736031786244, + "grad_norm": 0.01445829588919878, + "learning_rate": 4.082171540121141e-06, + "loss": 0.0017, + "step": 188000 + }, + { + "epoch": 1.2058377400724105, + "grad_norm": 0.08816064149141312, + "learning_rate": 4.08162135328863e-06, + "loss": 0.0018, + "step": 188010 + }, + { + "epoch": 1.2059018769661967, + "grad_norm": 0.08746741712093353, + "learning_rate": 4.081071177964058e-06, + "loss": 0.0019, + "step": 188020 + }, + { + "epoch": 1.2059660138599828, + "grad_norm": 0.029904184862971306, + "learning_rate": 4.080521014154323e-06, + "loss": 0.0022, + "step": 188030 + }, + { + "epoch": 1.2060301507537687, + "grad_norm": 0.07069440931081772, + "learning_rate": 4.079970861866315e-06, + "loss": 0.0014, + "step": 188040 + }, + { + "epoch": 1.2060942876475549, + "grad_norm": 0.08658213168382645, + "learning_rate": 4.079420721106931e-06, + "loss": 0.0014, + "step": 188050 + }, + { + "epoch": 1.206158424541341, + "grad_norm": 0.05616581067442894, + "learning_rate": 4.078870591883062e-06, + "loss": 0.0013, + "step": 188060 + }, + { + "epoch": 1.2062225614351272, + "grad_norm": 0.02350578084588051, + "learning_rate": 4.078320474201604e-06, + "loss": 0.0011, + "step": 188070 + }, + { + "epoch": 1.2062866983289133, + "grad_norm": 0.04117431864142418, + "learning_rate": 4.077770368069448e-06, + "loss": 0.0013, + "step": 188080 + }, + { + "epoch": 1.2063508352226993, + "grad_norm": 0.04477888345718384, + "learning_rate": 4.077220273493488e-06, + "loss": 0.0019, + "step": 188090 + }, + { + "epoch": 1.2064149721164854, + "grad_norm": 0.14388786256313324, + "learning_rate": 4.076670190480619e-06, + "loss": 0.0029, + "step": 188100 + }, + { + "epoch": 1.2064791090102716, + "grad_norm": 0.015612471848726273, + "learning_rate": 4.076120119037731e-06, + "loss": 0.0016, + "step": 188110 + }, + { + "epoch": 1.2065432459040577, + "grad_norm": 0.03613625839352608, + "learning_rate": 4.075570059171719e-06, + "loss": 0.0026, + "step": 188120 + }, + { + "epoch": 1.2066073827978436, + "grad_norm": 0.05011439695954323, + "learning_rate": 4.075020010889475e-06, + "loss": 0.0011, + "step": 188130 + }, + { + "epoch": 1.2066715196916298, + "grad_norm": 0.05125297233462334, + "learning_rate": 4.074469974197892e-06, + "loss": 0.002, + "step": 188140 + }, + { + "epoch": 1.206735656585416, + "grad_norm": 0.10764499753713608, + "learning_rate": 4.07391994910386e-06, + "loss": 0.0017, + "step": 188150 + }, + { + "epoch": 1.206799793479202, + "grad_norm": 0.005238198209553957, + "learning_rate": 4.073369935614274e-06, + "loss": 0.0016, + "step": 188160 + }, + { + "epoch": 1.2068639303729882, + "grad_norm": 0.04456604644656181, + "learning_rate": 4.072819933736025e-06, + "loss": 0.0014, + "step": 188170 + }, + { + "epoch": 1.2069280672667742, + "grad_norm": 0.14825411140918732, + "learning_rate": 4.072269943476005e-06, + "loss": 0.0013, + "step": 188180 + }, + { + "epoch": 1.2069922041605603, + "grad_norm": 0.04572425037622452, + "learning_rate": 4.071719964841104e-06, + "loss": 0.0013, + "step": 188190 + }, + { + "epoch": 1.2070563410543464, + "grad_norm": 0.29229500889778137, + "learning_rate": 4.0711699978382174e-06, + "loss": 0.0015, + "step": 188200 + }, + { + "epoch": 1.2071204779481324, + "grad_norm": 0.25380653142929077, + "learning_rate": 4.0706200424742335e-06, + "loss": 0.0017, + "step": 188210 + }, + { + "epoch": 1.2071846148419185, + "grad_norm": 0.0806683674454689, + "learning_rate": 4.0700700987560455e-06, + "loss": 0.0015, + "step": 188220 + }, + { + "epoch": 1.2072487517357047, + "grad_norm": 0.04647250473499298, + "learning_rate": 4.069520166690543e-06, + "loss": 0.002, + "step": 188230 + }, + { + "epoch": 1.2073128886294908, + "grad_norm": 0.03942198306322098, + "learning_rate": 4.068970246284617e-06, + "loss": 0.0019, + "step": 188240 + }, + { + "epoch": 1.207377025523277, + "grad_norm": 0.31986311078071594, + "learning_rate": 4.06842033754516e-06, + "loss": 0.0012, + "step": 188250 + }, + { + "epoch": 1.207441162417063, + "grad_norm": 0.17748479545116425, + "learning_rate": 4.067870440479062e-06, + "loss": 0.0021, + "step": 188260 + }, + { + "epoch": 1.207505299310849, + "grad_norm": 0.08459348231554031, + "learning_rate": 4.067320555093214e-06, + "loss": 0.0011, + "step": 188270 + }, + { + "epoch": 1.2075694362046352, + "grad_norm": 0.06454211473464966, + "learning_rate": 4.066770681394506e-06, + "loss": 0.0009, + "step": 188280 + }, + { + "epoch": 1.2076335730984213, + "grad_norm": 0.11118533462285995, + "learning_rate": 4.066220819389829e-06, + "loss": 0.0036, + "step": 188290 + }, + { + "epoch": 1.2076977099922073, + "grad_norm": 0.025810057297348976, + "learning_rate": 4.06567096908607e-06, + "loss": 0.0012, + "step": 188300 + }, + { + "epoch": 1.2077618468859934, + "grad_norm": 0.09371408075094223, + "learning_rate": 4.065121130490124e-06, + "loss": 0.0012, + "step": 188310 + }, + { + "epoch": 1.2078259837797796, + "grad_norm": 0.018266141414642334, + "learning_rate": 4.064571303608877e-06, + "loss": 0.0022, + "step": 188320 + }, + { + "epoch": 1.2078901206735657, + "grad_norm": 0.14298661053180695, + "learning_rate": 4.064021488449221e-06, + "loss": 0.0028, + "step": 188330 + }, + { + "epoch": 1.2079542575673519, + "grad_norm": 0.019672038033604622, + "learning_rate": 4.063471685018043e-06, + "loss": 0.0007, + "step": 188340 + }, + { + "epoch": 1.2080183944611378, + "grad_norm": 0.0716666504740715, + "learning_rate": 4.062921893322236e-06, + "loss": 0.002, + "step": 188350 + }, + { + "epoch": 1.208082531354924, + "grad_norm": 0.10400012135505676, + "learning_rate": 4.0623721133686866e-06, + "loss": 0.0016, + "step": 188360 + }, + { + "epoch": 1.20814666824871, + "grad_norm": 0.05531886965036392, + "learning_rate": 4.061822345164286e-06, + "loss": 0.0029, + "step": 188370 + }, + { + "epoch": 1.2082108051424962, + "grad_norm": 0.02966497465968132, + "learning_rate": 4.0612725887159195e-06, + "loss": 0.0018, + "step": 188380 + }, + { + "epoch": 1.2082749420362822, + "grad_norm": 0.07674681395292282, + "learning_rate": 4.060722844030479e-06, + "loss": 0.0014, + "step": 188390 + }, + { + "epoch": 1.2083390789300683, + "grad_norm": 0.030127577483654022, + "learning_rate": 4.060173111114854e-06, + "loss": 0.001, + "step": 188400 + }, + { + "epoch": 1.2084032158238545, + "grad_norm": 0.08961062878370285, + "learning_rate": 4.05962338997593e-06, + "loss": 0.001, + "step": 188410 + }, + { + "epoch": 1.2084673527176406, + "grad_norm": 0.035098202526569366, + "learning_rate": 4.059073680620599e-06, + "loss": 0.0009, + "step": 188420 + }, + { + "epoch": 1.2085314896114265, + "grad_norm": 0.04928535968065262, + "learning_rate": 4.058523983055745e-06, + "loss": 0.0028, + "step": 188430 + }, + { + "epoch": 1.2085956265052127, + "grad_norm": 0.021472038701176643, + "learning_rate": 4.057974297288258e-06, + "loss": 0.002, + "step": 188440 + }, + { + "epoch": 1.2086597633989988, + "grad_norm": 0.135204017162323, + "learning_rate": 4.057424623325028e-06, + "loss": 0.0012, + "step": 188450 + }, + { + "epoch": 1.208723900292785, + "grad_norm": 0.006811929401010275, + "learning_rate": 4.056874961172941e-06, + "loss": 0.0027, + "step": 188460 + }, + { + "epoch": 1.208788037186571, + "grad_norm": 0.09208974242210388, + "learning_rate": 4.056325310838885e-06, + "loss": 0.0017, + "step": 188470 + }, + { + "epoch": 1.208852174080357, + "grad_norm": 0.161551833152771, + "learning_rate": 4.055775672329746e-06, + "loss": 0.0011, + "step": 188480 + }, + { + "epoch": 1.2089163109741432, + "grad_norm": 0.01946430653333664, + "learning_rate": 4.055226045652414e-06, + "loss": 0.0015, + "step": 188490 + }, + { + "epoch": 1.2089804478679294, + "grad_norm": 0.08680370450019836, + "learning_rate": 4.054676430813774e-06, + "loss": 0.0018, + "step": 188500 + }, + { + "epoch": 1.2090445847617155, + "grad_norm": 0.08462203294038773, + "learning_rate": 4.054126827820714e-06, + "loss": 0.0014, + "step": 188510 + }, + { + "epoch": 1.2091087216555014, + "grad_norm": 0.06439220905303955, + "learning_rate": 4.053577236680122e-06, + "loss": 0.0011, + "step": 188520 + }, + { + "epoch": 1.2091728585492876, + "grad_norm": 0.07089382410049438, + "learning_rate": 4.053027657398882e-06, + "loss": 0.0014, + "step": 188530 + }, + { + "epoch": 1.2092369954430737, + "grad_norm": 0.025199666619300842, + "learning_rate": 4.052478089983883e-06, + "loss": 0.0009, + "step": 188540 + }, + { + "epoch": 1.2093011323368599, + "grad_norm": 0.039101798087358475, + "learning_rate": 4.051928534442011e-06, + "loss": 0.0014, + "step": 188550 + }, + { + "epoch": 1.2093652692306458, + "grad_norm": 0.10798163712024689, + "learning_rate": 4.051378990780153e-06, + "loss": 0.0013, + "step": 188560 + }, + { + "epoch": 1.209429406124432, + "grad_norm": 0.07373690605163574, + "learning_rate": 4.050829459005192e-06, + "loss": 0.0013, + "step": 188570 + }, + { + "epoch": 1.209493543018218, + "grad_norm": 0.18303203582763672, + "learning_rate": 4.050279939124018e-06, + "loss": 0.0015, + "step": 188580 + }, + { + "epoch": 1.2095576799120042, + "grad_norm": 0.04567175731062889, + "learning_rate": 4.049730431143514e-06, + "loss": 0.0015, + "step": 188590 + }, + { + "epoch": 1.2096218168057904, + "grad_norm": 0.07699242234230042, + "learning_rate": 4.049180935070567e-06, + "loss": 0.0009, + "step": 188600 + }, + { + "epoch": 1.2096859536995763, + "grad_norm": 0.07643191516399384, + "learning_rate": 4.048631450912063e-06, + "loss": 0.002, + "step": 188610 + }, + { + "epoch": 1.2097500905933625, + "grad_norm": 0.05156608670949936, + "learning_rate": 4.048081978674886e-06, + "loss": 0.0012, + "step": 188620 + }, + { + "epoch": 1.2098142274871486, + "grad_norm": 0.3705749213695526, + "learning_rate": 4.047532518365923e-06, + "loss": 0.0035, + "step": 188630 + }, + { + "epoch": 1.2098783643809345, + "grad_norm": 0.043635543435811996, + "learning_rate": 4.046983069992058e-06, + "loss": 0.0019, + "step": 188640 + }, + { + "epoch": 1.2099425012747207, + "grad_norm": 0.0662478655576706, + "learning_rate": 4.046433633560176e-06, + "loss": 0.0024, + "step": 188650 + }, + { + "epoch": 1.2100066381685068, + "grad_norm": 0.1132093146443367, + "learning_rate": 4.045884209077162e-06, + "loss": 0.0023, + "step": 188660 + }, + { + "epoch": 1.210070775062293, + "grad_norm": 0.022825339809060097, + "learning_rate": 4.045334796549901e-06, + "loss": 0.0017, + "step": 188670 + }, + { + "epoch": 1.2101349119560791, + "grad_norm": 0.006263209972530603, + "learning_rate": 4.044785395985277e-06, + "loss": 0.0016, + "step": 188680 + }, + { + "epoch": 1.210199048849865, + "grad_norm": 0.052848342806100845, + "learning_rate": 4.044236007390176e-06, + "loss": 0.0022, + "step": 188690 + }, + { + "epoch": 1.2102631857436512, + "grad_norm": 0.023211365565657616, + "learning_rate": 4.043686630771479e-06, + "loss": 0.0021, + "step": 188700 + }, + { + "epoch": 1.2103273226374374, + "grad_norm": 0.03306528925895691, + "learning_rate": 4.043137266136074e-06, + "loss": 0.0024, + "step": 188710 + }, + { + "epoch": 1.2103914595312235, + "grad_norm": 0.14097969233989716, + "learning_rate": 4.042587913490841e-06, + "loss": 0.002, + "step": 188720 + }, + { + "epoch": 1.2104555964250094, + "grad_norm": 0.08027029782533646, + "learning_rate": 4.042038572842667e-06, + "loss": 0.0032, + "step": 188730 + }, + { + "epoch": 1.2105197333187956, + "grad_norm": 0.014452873729169369, + "learning_rate": 4.041489244198432e-06, + "loss": 0.0011, + "step": 188740 + }, + { + "epoch": 1.2105838702125817, + "grad_norm": 0.07585106045007706, + "learning_rate": 4.040939927565025e-06, + "loss": 0.0021, + "step": 188750 + }, + { + "epoch": 1.2106480071063679, + "grad_norm": 0.14823010563850403, + "learning_rate": 4.0403906229493236e-06, + "loss": 0.0037, + "step": 188760 + }, + { + "epoch": 1.210712144000154, + "grad_norm": 0.058975305408239365, + "learning_rate": 4.039841330358216e-06, + "loss": 0.0018, + "step": 188770 + }, + { + "epoch": 1.21077628089394, + "grad_norm": 0.06043016538023949, + "learning_rate": 4.039292049798581e-06, + "loss": 0.0014, + "step": 188780 + }, + { + "epoch": 1.210840417787726, + "grad_norm": 0.005526017397642136, + "learning_rate": 4.0387427812773025e-06, + "loss": 0.0013, + "step": 188790 + }, + { + "epoch": 1.2109045546815123, + "grad_norm": 0.09409480541944504, + "learning_rate": 4.0381935248012664e-06, + "loss": 0.0016, + "step": 188800 + }, + { + "epoch": 1.2109686915752984, + "grad_norm": 0.11467521637678146, + "learning_rate": 4.0376442803773504e-06, + "loss": 0.0024, + "step": 188810 + }, + { + "epoch": 1.2110328284690843, + "grad_norm": 0.018768569454550743, + "learning_rate": 4.037095048012441e-06, + "loss": 0.0018, + "step": 188820 + }, + { + "epoch": 1.2110969653628705, + "grad_norm": 0.1887967586517334, + "learning_rate": 4.0365458277134174e-06, + "loss": 0.0018, + "step": 188830 + }, + { + "epoch": 1.2111611022566566, + "grad_norm": 0.13555163145065308, + "learning_rate": 4.035996619487165e-06, + "loss": 0.0023, + "step": 188840 + }, + { + "epoch": 1.2112252391504428, + "grad_norm": 0.0160826463252306, + "learning_rate": 4.0354474233405624e-06, + "loss": 0.0039, + "step": 188850 + }, + { + "epoch": 1.2112893760442287, + "grad_norm": 0.053795114159584045, + "learning_rate": 4.0348982392804934e-06, + "loss": 0.002, + "step": 188860 + }, + { + "epoch": 1.2113535129380149, + "grad_norm": 0.16556724905967712, + "learning_rate": 4.034349067313839e-06, + "loss": 0.0016, + "step": 188870 + }, + { + "epoch": 1.211417649831801, + "grad_norm": 0.08626790344715118, + "learning_rate": 4.0337999074474824e-06, + "loss": 0.0012, + "step": 188880 + }, + { + "epoch": 1.2114817867255872, + "grad_norm": 0.2976478040218353, + "learning_rate": 4.033250759688301e-06, + "loss": 0.0014, + "step": 188890 + }, + { + "epoch": 1.211545923619373, + "grad_norm": 1.178741216659546, + "learning_rate": 4.03270162404318e-06, + "loss": 0.0019, + "step": 188900 + }, + { + "epoch": 1.2116100605131592, + "grad_norm": 0.04616977274417877, + "learning_rate": 4.0321525005189984e-06, + "loss": 0.0028, + "step": 188910 + }, + { + "epoch": 1.2116741974069454, + "grad_norm": 0.025343716144561768, + "learning_rate": 4.031603389122639e-06, + "loss": 0.0007, + "step": 188920 + }, + { + "epoch": 1.2117383343007315, + "grad_norm": 0.03278636932373047, + "learning_rate": 4.03105428986098e-06, + "loss": 0.0019, + "step": 188930 + }, + { + "epoch": 1.2118024711945177, + "grad_norm": 0.1040743812918663, + "learning_rate": 4.030505202740903e-06, + "loss": 0.002, + "step": 188940 + }, + { + "epoch": 1.2118666080883036, + "grad_norm": 0.13504858314990997, + "learning_rate": 4.02995612776929e-06, + "loss": 0.002, + "step": 188950 + }, + { + "epoch": 1.2119307449820897, + "grad_norm": 0.1743793785572052, + "learning_rate": 4.029407064953019e-06, + "loss": 0.0016, + "step": 188960 + }, + { + "epoch": 1.211994881875876, + "grad_norm": 0.0381179116666317, + "learning_rate": 4.028858014298971e-06, + "loss": 0.0015, + "step": 188970 + }, + { + "epoch": 1.212059018769662, + "grad_norm": 0.12197365611791611, + "learning_rate": 4.028308975814027e-06, + "loss": 0.0014, + "step": 188980 + }, + { + "epoch": 1.212123155663448, + "grad_norm": 0.14338000118732452, + "learning_rate": 4.0277599495050655e-06, + "loss": 0.0008, + "step": 188990 + }, + { + "epoch": 1.2121872925572341, + "grad_norm": 0.05253363400697708, + "learning_rate": 4.0272109353789665e-06, + "loss": 0.0017, + "step": 189000 + }, + { + "epoch": 1.2122514294510203, + "grad_norm": 0.13448677957057953, + "learning_rate": 4.02666193344261e-06, + "loss": 0.0011, + "step": 189010 + }, + { + "epoch": 1.2123155663448064, + "grad_norm": 0.07924114912748337, + "learning_rate": 4.026112943702874e-06, + "loss": 0.0019, + "step": 189020 + }, + { + "epoch": 1.2123797032385926, + "grad_norm": 0.16758912801742554, + "learning_rate": 4.025563966166641e-06, + "loss": 0.0019, + "step": 189030 + }, + { + "epoch": 1.2124438401323785, + "grad_norm": 0.13628177344799042, + "learning_rate": 4.025015000840786e-06, + "loss": 0.0018, + "step": 189040 + }, + { + "epoch": 1.2125079770261646, + "grad_norm": 0.006133451592177153, + "learning_rate": 4.024466047732192e-06, + "loss": 0.0009, + "step": 189050 + }, + { + "epoch": 1.2125721139199508, + "grad_norm": 0.06621977686882019, + "learning_rate": 4.023917106847733e-06, + "loss": 0.0023, + "step": 189060 + }, + { + "epoch": 1.2126362508137367, + "grad_norm": 0.190654456615448, + "learning_rate": 4.023368178194292e-06, + "loss": 0.0013, + "step": 189070 + }, + { + "epoch": 1.2127003877075229, + "grad_norm": 0.04014454036951065, + "learning_rate": 4.022819261778745e-06, + "loss": 0.0013, + "step": 189080 + }, + { + "epoch": 1.212764524601309, + "grad_norm": 0.07272602617740631, + "learning_rate": 4.022270357607971e-06, + "loss": 0.0015, + "step": 189090 + }, + { + "epoch": 1.2128286614950952, + "grad_norm": 0.012182219885289669, + "learning_rate": 4.021721465688849e-06, + "loss": 0.0009, + "step": 189100 + }, + { + "epoch": 1.2128927983888813, + "grad_norm": 0.08933998644351959, + "learning_rate": 4.021172586028255e-06, + "loss": 0.0007, + "step": 189110 + }, + { + "epoch": 1.2129569352826672, + "grad_norm": 0.10960163176059723, + "learning_rate": 4.020623718633069e-06, + "loss": 0.0016, + "step": 189120 + }, + { + "epoch": 1.2130210721764534, + "grad_norm": 0.04663372039794922, + "learning_rate": 4.020074863510167e-06, + "loss": 0.0009, + "step": 189130 + }, + { + "epoch": 1.2130852090702395, + "grad_norm": 0.1042468473315239, + "learning_rate": 4.019526020666429e-06, + "loss": 0.0023, + "step": 189140 + }, + { + "epoch": 1.2131493459640257, + "grad_norm": 0.02915082685649395, + "learning_rate": 4.018977190108729e-06, + "loss": 0.0014, + "step": 189150 + }, + { + "epoch": 1.2132134828578116, + "grad_norm": 0.06580620259046555, + "learning_rate": 4.018428371843946e-06, + "loss": 0.0015, + "step": 189160 + }, + { + "epoch": 1.2132776197515978, + "grad_norm": 0.05101105570793152, + "learning_rate": 4.017879565878957e-06, + "loss": 0.0008, + "step": 189170 + }, + { + "epoch": 1.213341756645384, + "grad_norm": 0.15847432613372803, + "learning_rate": 4.01733077222064e-06, + "loss": 0.0015, + "step": 189180 + }, + { + "epoch": 1.21340589353917, + "grad_norm": 0.1405104398727417, + "learning_rate": 4.01678199087587e-06, + "loss": 0.0022, + "step": 189190 + }, + { + "epoch": 1.2134700304329562, + "grad_norm": 0.045638903975486755, + "learning_rate": 4.016233221851525e-06, + "loss": 0.0009, + "step": 189200 + }, + { + "epoch": 1.2135341673267421, + "grad_norm": 0.061501361429691315, + "learning_rate": 4.015684465154477e-06, + "loss": 0.0027, + "step": 189210 + }, + { + "epoch": 1.2135983042205283, + "grad_norm": 0.1591787338256836, + "learning_rate": 4.015135720791611e-06, + "loss": 0.0012, + "step": 189220 + }, + { + "epoch": 1.2136624411143144, + "grad_norm": 0.18680493533611298, + "learning_rate": 4.014586988769796e-06, + "loss": 0.0012, + "step": 189230 + }, + { + "epoch": 1.2137265780081006, + "grad_norm": 0.10476205497980118, + "learning_rate": 4.014038269095911e-06, + "loss": 0.0025, + "step": 189240 + }, + { + "epoch": 1.2137907149018865, + "grad_norm": 0.11433443427085876, + "learning_rate": 4.013489561776831e-06, + "loss": 0.0022, + "step": 189250 + }, + { + "epoch": 1.2138548517956727, + "grad_norm": 0.1732834428548813, + "learning_rate": 4.012940866819432e-06, + "loss": 0.0018, + "step": 189260 + }, + { + "epoch": 1.2139189886894588, + "grad_norm": 0.08467309176921844, + "learning_rate": 4.012392184230589e-06, + "loss": 0.0021, + "step": 189270 + }, + { + "epoch": 1.213983125583245, + "grad_norm": 0.06844529509544373, + "learning_rate": 4.011843514017178e-06, + "loss": 0.0013, + "step": 189280 + }, + { + "epoch": 1.214047262477031, + "grad_norm": 0.1597221940755844, + "learning_rate": 4.0112948561860745e-06, + "loss": 0.0037, + "step": 189290 + }, + { + "epoch": 1.214111399370817, + "grad_norm": 0.10579892247915268, + "learning_rate": 4.010746210744153e-06, + "loss": 0.0011, + "step": 189300 + }, + { + "epoch": 1.2141755362646032, + "grad_norm": 0.07977374643087387, + "learning_rate": 4.010197577698287e-06, + "loss": 0.0011, + "step": 189310 + }, + { + "epoch": 1.2142396731583893, + "grad_norm": 0.1373594105243683, + "learning_rate": 4.0096489570553554e-06, + "loss": 0.0016, + "step": 189320 + }, + { + "epoch": 1.2143038100521752, + "grad_norm": 0.03002183511853218, + "learning_rate": 4.009100348822228e-06, + "loss": 0.0035, + "step": 189330 + }, + { + "epoch": 1.2143679469459614, + "grad_norm": 0.1346518099308014, + "learning_rate": 4.0085517530057825e-06, + "loss": 0.0012, + "step": 189340 + }, + { + "epoch": 1.2144320838397475, + "grad_norm": 0.07705426216125488, + "learning_rate": 4.008003169612891e-06, + "loss": 0.0008, + "step": 189350 + }, + { + "epoch": 1.2144962207335337, + "grad_norm": 0.12079522758722305, + "learning_rate": 4.007454598650429e-06, + "loss": 0.0012, + "step": 189360 + }, + { + "epoch": 1.2145603576273198, + "grad_norm": 0.43452203273773193, + "learning_rate": 4.0069060401252715e-06, + "loss": 0.0028, + "step": 189370 + }, + { + "epoch": 1.2146244945211058, + "grad_norm": 0.06812382489442825, + "learning_rate": 4.006357494044289e-06, + "loss": 0.0022, + "step": 189380 + }, + { + "epoch": 1.214688631414892, + "grad_norm": 0.18466074764728546, + "learning_rate": 4.005808960414359e-06, + "loss": 0.0022, + "step": 189390 + }, + { + "epoch": 1.214752768308678, + "grad_norm": 0.08517183363437653, + "learning_rate": 4.005260439242354e-06, + "loss": 0.0009, + "step": 189400 + }, + { + "epoch": 1.2148169052024642, + "grad_norm": 0.16007645428180695, + "learning_rate": 4.004711930535145e-06, + "loss": 0.001, + "step": 189410 + }, + { + "epoch": 1.2148810420962501, + "grad_norm": 0.04669779911637306, + "learning_rate": 4.004163434299606e-06, + "loss": 0.0013, + "step": 189420 + }, + { + "epoch": 1.2149451789900363, + "grad_norm": 0.0766749456524849, + "learning_rate": 4.003614950542614e-06, + "loss": 0.0026, + "step": 189430 + }, + { + "epoch": 1.2150093158838224, + "grad_norm": 0.03686497360467911, + "learning_rate": 4.003066479271035e-06, + "loss": 0.0038, + "step": 189440 + }, + { + "epoch": 1.2150734527776086, + "grad_norm": 0.05317874625325203, + "learning_rate": 4.002518020491748e-06, + "loss": 0.0022, + "step": 189450 + }, + { + "epoch": 1.2151375896713947, + "grad_norm": 0.0613647922873497, + "learning_rate": 4.001969574211623e-06, + "loss": 0.0016, + "step": 189460 + }, + { + "epoch": 1.2152017265651807, + "grad_norm": 0.20941519737243652, + "learning_rate": 4.001421140437533e-06, + "loss": 0.001, + "step": 189470 + }, + { + "epoch": 1.2152658634589668, + "grad_norm": 0.043185021728277206, + "learning_rate": 4.000872719176347e-06, + "loss": 0.0012, + "step": 189480 + }, + { + "epoch": 1.215330000352753, + "grad_norm": 0.03593922033905983, + "learning_rate": 4.000324310434943e-06, + "loss": 0.0018, + "step": 189490 + }, + { + "epoch": 1.215394137246539, + "grad_norm": 0.14687849581241608, + "learning_rate": 3.999775914220188e-06, + "loss": 0.0013, + "step": 189500 + }, + { + "epoch": 1.215458274140325, + "grad_norm": 0.043099600821733475, + "learning_rate": 3.9992275305389555e-06, + "loss": 0.0022, + "step": 189510 + }, + { + "epoch": 1.2155224110341112, + "grad_norm": 0.10225958377122879, + "learning_rate": 3.998679159398118e-06, + "loss": 0.0019, + "step": 189520 + }, + { + "epoch": 1.2155865479278973, + "grad_norm": 0.051480717957019806, + "learning_rate": 3.9981308008045464e-06, + "loss": 0.001, + "step": 189530 + }, + { + "epoch": 1.2156506848216835, + "grad_norm": 0.11180371046066284, + "learning_rate": 3.997582454765112e-06, + "loss": 0.0018, + "step": 189540 + }, + { + "epoch": 1.2157148217154694, + "grad_norm": 0.07143254578113556, + "learning_rate": 3.997034121286685e-06, + "loss": 0.0014, + "step": 189550 + }, + { + "epoch": 1.2157789586092556, + "grad_norm": 0.08327856659889221, + "learning_rate": 3.9964858003761385e-06, + "loss": 0.0014, + "step": 189560 + }, + { + "epoch": 1.2158430955030417, + "grad_norm": 0.06372194737195969, + "learning_rate": 3.995937492040341e-06, + "loss": 0.0011, + "step": 189570 + }, + { + "epoch": 1.2159072323968279, + "grad_norm": 0.29604244232177734, + "learning_rate": 3.995389196286165e-06, + "loss": 0.003, + "step": 189580 + }, + { + "epoch": 1.2159713692906138, + "grad_norm": 0.11042928695678711, + "learning_rate": 3.99484091312048e-06, + "loss": 0.0022, + "step": 189590 + }, + { + "epoch": 1.2160355061844, + "grad_norm": 0.070286825299263, + "learning_rate": 3.9942926425501574e-06, + "loss": 0.0019, + "step": 189600 + }, + { + "epoch": 1.216099643078186, + "grad_norm": 0.016711633652448654, + "learning_rate": 3.993744384582065e-06, + "loss": 0.0029, + "step": 189610 + }, + { + "epoch": 1.2161637799719722, + "grad_norm": 0.030171751976013184, + "learning_rate": 3.993196139223077e-06, + "loss": 0.0022, + "step": 189620 + }, + { + "epoch": 1.2162279168657584, + "grad_norm": 0.1727365404367447, + "learning_rate": 3.99264790648006e-06, + "loss": 0.0015, + "step": 189630 + }, + { + "epoch": 1.2162920537595443, + "grad_norm": 0.00967460311949253, + "learning_rate": 3.992099686359883e-06, + "loss": 0.0017, + "step": 189640 + }, + { + "epoch": 1.2163561906533304, + "grad_norm": 0.1014925017952919, + "learning_rate": 3.99155147886942e-06, + "loss": 0.0019, + "step": 189650 + }, + { + "epoch": 1.2164203275471166, + "grad_norm": 0.20213359594345093, + "learning_rate": 3.9910032840155355e-06, + "loss": 0.0013, + "step": 189660 + }, + { + "epoch": 1.2164844644409027, + "grad_norm": 0.045818161219358444, + "learning_rate": 3.990455101805102e-06, + "loss": 0.0018, + "step": 189670 + }, + { + "epoch": 1.2165486013346887, + "grad_norm": 0.06107959896326065, + "learning_rate": 3.989906932244987e-06, + "loss": 0.0013, + "step": 189680 + }, + { + "epoch": 1.2166127382284748, + "grad_norm": 0.10544388741254807, + "learning_rate": 3.989358775342062e-06, + "loss": 0.0012, + "step": 189690 + }, + { + "epoch": 1.216676875122261, + "grad_norm": 0.05415049567818642, + "learning_rate": 3.9888106311031914e-06, + "loss": 0.0021, + "step": 189700 + }, + { + "epoch": 1.2167410120160471, + "grad_norm": 0.2054257094860077, + "learning_rate": 3.988262499535248e-06, + "loss": 0.0014, + "step": 189710 + }, + { + "epoch": 1.2168051489098333, + "grad_norm": 0.1231830045580864, + "learning_rate": 3.987714380645097e-06, + "loss": 0.0018, + "step": 189720 + }, + { + "epoch": 1.2168692858036192, + "grad_norm": 0.025744184851646423, + "learning_rate": 3.987166274439609e-06, + "loss": 0.0007, + "step": 189730 + }, + { + "epoch": 1.2169334226974053, + "grad_norm": 0.0822049155831337, + "learning_rate": 3.9866181809256515e-06, + "loss": 0.0014, + "step": 189740 + }, + { + "epoch": 1.2169975595911915, + "grad_norm": 0.014606112614274025, + "learning_rate": 3.986070100110093e-06, + "loss": 0.0018, + "step": 189750 + }, + { + "epoch": 1.2170616964849774, + "grad_norm": 0.08604684472084045, + "learning_rate": 3.9855220319998e-06, + "loss": 0.0026, + "step": 189760 + }, + { + "epoch": 1.2171258333787636, + "grad_norm": 0.0861067995429039, + "learning_rate": 3.984973976601641e-06, + "loss": 0.0013, + "step": 189770 + }, + { + "epoch": 1.2171899702725497, + "grad_norm": 0.29283806681632996, + "learning_rate": 3.9844259339224835e-06, + "loss": 0.0016, + "step": 189780 + }, + { + "epoch": 1.2172541071663359, + "grad_norm": 0.10819019377231598, + "learning_rate": 3.983877903969195e-06, + "loss": 0.0013, + "step": 189790 + }, + { + "epoch": 1.217318244060122, + "grad_norm": 0.062073834240436554, + "learning_rate": 3.983329886748643e-06, + "loss": 0.0009, + "step": 189800 + }, + { + "epoch": 1.217382380953908, + "grad_norm": 0.09325316548347473, + "learning_rate": 3.982781882267693e-06, + "loss": 0.0019, + "step": 189810 + }, + { + "epoch": 1.217446517847694, + "grad_norm": 0.14637985825538635, + "learning_rate": 3.9822338905332145e-06, + "loss": 0.0023, + "step": 189820 + }, + { + "epoch": 1.2175106547414802, + "grad_norm": 0.05455147847533226, + "learning_rate": 3.981685911552072e-06, + "loss": 0.0009, + "step": 189830 + }, + { + "epoch": 1.2175747916352664, + "grad_norm": 0.040302470326423645, + "learning_rate": 3.981137945331133e-06, + "loss": 0.0016, + "step": 189840 + }, + { + "epoch": 1.2176389285290523, + "grad_norm": 0.1425817310810089, + "learning_rate": 3.980589991877263e-06, + "loss": 0.0019, + "step": 189850 + }, + { + "epoch": 1.2177030654228385, + "grad_norm": 0.1364559829235077, + "learning_rate": 3.98004205119733e-06, + "loss": 0.0013, + "step": 189860 + }, + { + "epoch": 1.2177672023166246, + "grad_norm": 0.1536790430545807, + "learning_rate": 3.9794941232981985e-06, + "loss": 0.0014, + "step": 189870 + }, + { + "epoch": 1.2178313392104108, + "grad_norm": 0.09738431870937347, + "learning_rate": 3.978946208186736e-06, + "loss": 0.0017, + "step": 189880 + }, + { + "epoch": 1.217895476104197, + "grad_norm": 0.06185058876872063, + "learning_rate": 3.978398305869805e-06, + "loss": 0.0011, + "step": 189890 + }, + { + "epoch": 1.2179596129979828, + "grad_norm": 0.20613211393356323, + "learning_rate": 3.977850416354275e-06, + "loss": 0.001, + "step": 189900 + }, + { + "epoch": 1.218023749891769, + "grad_norm": 0.16253237426280975, + "learning_rate": 3.9773025396470095e-06, + "loss": 0.0014, + "step": 189910 + }, + { + "epoch": 1.2180878867855551, + "grad_norm": 0.11593171209096909, + "learning_rate": 3.976754675754875e-06, + "loss": 0.0017, + "step": 189920 + }, + { + "epoch": 1.2181520236793413, + "grad_norm": 0.16497549414634705, + "learning_rate": 3.976206824684734e-06, + "loss": 0.001, + "step": 189930 + }, + { + "epoch": 1.2182161605731272, + "grad_norm": 0.14754164218902588, + "learning_rate": 3.975658986443454e-06, + "loss": 0.0029, + "step": 189940 + }, + { + "epoch": 1.2182802974669134, + "grad_norm": 0.09771906584501266, + "learning_rate": 3.975111161037899e-06, + "loss": 0.0021, + "step": 189950 + }, + { + "epoch": 1.2183444343606995, + "grad_norm": 0.08924322575330734, + "learning_rate": 3.974563348474933e-06, + "loss": 0.0017, + "step": 189960 + }, + { + "epoch": 1.2184085712544857, + "grad_norm": 0.021901067346334457, + "learning_rate": 3.9740155487614196e-06, + "loss": 0.0011, + "step": 189970 + }, + { + "epoch": 1.2184727081482716, + "grad_norm": 0.07129445672035217, + "learning_rate": 3.9734677619042276e-06, + "loss": 0.0022, + "step": 189980 + }, + { + "epoch": 1.2185368450420577, + "grad_norm": 0.13516168296337128, + "learning_rate": 3.972919987910217e-06, + "loss": 0.0028, + "step": 189990 + }, + { + "epoch": 1.2186009819358439, + "grad_norm": 0.15106359124183655, + "learning_rate": 3.972372226786253e-06, + "loss": 0.0011, + "step": 190000 + }, + { + "epoch": 1.21866511882963, + "grad_norm": 0.1575210988521576, + "learning_rate": 3.9718244785392e-06, + "loss": 0.0014, + "step": 190010 + }, + { + "epoch": 1.218729255723416, + "grad_norm": 0.03287835791707039, + "learning_rate": 3.9712767431759216e-06, + "loss": 0.0013, + "step": 190020 + }, + { + "epoch": 1.218793392617202, + "grad_norm": 0.16302621364593506, + "learning_rate": 3.9707290207032785e-06, + "loss": 0.0017, + "step": 190030 + }, + { + "epoch": 1.2188575295109882, + "grad_norm": 0.03399442136287689, + "learning_rate": 3.970181311128139e-06, + "loss": 0.0008, + "step": 190040 + }, + { + "epoch": 1.2189216664047744, + "grad_norm": 0.010786185972392559, + "learning_rate": 3.9696336144573625e-06, + "loss": 0.0011, + "step": 190050 + }, + { + "epoch": 1.2189858032985605, + "grad_norm": 0.044280312955379486, + "learning_rate": 3.969085930697812e-06, + "loss": 0.001, + "step": 190060 + }, + { + "epoch": 1.2190499401923465, + "grad_norm": 0.009728172793984413, + "learning_rate": 3.968538259856354e-06, + "loss": 0.001, + "step": 190070 + }, + { + "epoch": 1.2191140770861326, + "grad_norm": 0.029003439471125603, + "learning_rate": 3.9679906019398475e-06, + "loss": 0.0021, + "step": 190080 + }, + { + "epoch": 1.2191782139799188, + "grad_norm": 0.07220987975597382, + "learning_rate": 3.967442956955157e-06, + "loss": 0.0012, + "step": 190090 + }, + { + "epoch": 1.219242350873705, + "grad_norm": 0.2752041816711426, + "learning_rate": 3.966895324909143e-06, + "loss": 0.0076, + "step": 190100 + }, + { + "epoch": 1.2193064877674908, + "grad_norm": 0.024868693202733994, + "learning_rate": 3.96634770580867e-06, + "loss": 0.0018, + "step": 190110 + }, + { + "epoch": 1.219370624661277, + "grad_norm": 0.07499602437019348, + "learning_rate": 3.965800099660598e-06, + "loss": 0.0016, + "step": 190120 + }, + { + "epoch": 1.2194347615550631, + "grad_norm": 0.2519768476486206, + "learning_rate": 3.965252506471791e-06, + "loss": 0.0018, + "step": 190130 + }, + { + "epoch": 1.2194988984488493, + "grad_norm": 0.02175537869334221, + "learning_rate": 3.964704926249109e-06, + "loss": 0.0006, + "step": 190140 + }, + { + "epoch": 1.2195630353426354, + "grad_norm": 0.04755594581365585, + "learning_rate": 3.964157358999415e-06, + "loss": 0.0009, + "step": 190150 + }, + { + "epoch": 1.2196271722364214, + "grad_norm": 0.09518511593341827, + "learning_rate": 3.963609804729568e-06, + "loss": 0.0012, + "step": 190160 + }, + { + "epoch": 1.2196913091302075, + "grad_norm": 0.12631119787693024, + "learning_rate": 3.963062263446432e-06, + "loss": 0.0015, + "step": 190170 + }, + { + "epoch": 1.2197554460239937, + "grad_norm": 0.08314120769500732, + "learning_rate": 3.962514735156867e-06, + "loss": 0.0026, + "step": 190180 + }, + { + "epoch": 1.2198195829177796, + "grad_norm": 0.1589404046535492, + "learning_rate": 3.961967219867734e-06, + "loss": 0.003, + "step": 190190 + }, + { + "epoch": 1.2198837198115657, + "grad_norm": 0.04208589717745781, + "learning_rate": 3.961419717585892e-06, + "loss": 0.002, + "step": 190200 + }, + { + "epoch": 1.2199478567053519, + "grad_norm": 0.09710143506526947, + "learning_rate": 3.960872228318204e-06, + "loss": 0.0014, + "step": 190210 + }, + { + "epoch": 1.220011993599138, + "grad_norm": 0.06639356911182404, + "learning_rate": 3.96032475207153e-06, + "loss": 0.0015, + "step": 190220 + }, + { + "epoch": 1.2200761304929242, + "grad_norm": 0.01445319876074791, + "learning_rate": 3.95977728885273e-06, + "loss": 0.0011, + "step": 190230 + }, + { + "epoch": 1.22014026738671, + "grad_norm": 0.07329652458429337, + "learning_rate": 3.959229838668665e-06, + "loss": 0.0022, + "step": 190240 + }, + { + "epoch": 1.2202044042804963, + "grad_norm": 0.03763607144355774, + "learning_rate": 3.958682401526191e-06, + "loss": 0.0031, + "step": 190250 + }, + { + "epoch": 1.2202685411742824, + "grad_norm": 0.10107597708702087, + "learning_rate": 3.9581349774321736e-06, + "loss": 0.0012, + "step": 190260 + }, + { + "epoch": 1.2203326780680686, + "grad_norm": 0.07468398660421371, + "learning_rate": 3.957587566393468e-06, + "loss": 0.0015, + "step": 190270 + }, + { + "epoch": 1.2203968149618545, + "grad_norm": 0.1615559607744217, + "learning_rate": 3.957040168416936e-06, + "loss": 0.0012, + "step": 190280 + }, + { + "epoch": 1.2204609518556406, + "grad_norm": 0.03799016401171684, + "learning_rate": 3.956492783509436e-06, + "loss": 0.002, + "step": 190290 + }, + { + "epoch": 1.2205250887494268, + "grad_norm": 0.06666628271341324, + "learning_rate": 3.955945411677827e-06, + "loss": 0.0011, + "step": 190300 + }, + { + "epoch": 1.220589225643213, + "grad_norm": 0.06822290271520615, + "learning_rate": 3.955398052928968e-06, + "loss": 0.0012, + "step": 190310 + }, + { + "epoch": 1.220653362536999, + "grad_norm": 0.030858352780342102, + "learning_rate": 3.954850707269718e-06, + "loss": 0.0022, + "step": 190320 + }, + { + "epoch": 1.220717499430785, + "grad_norm": 0.09506650269031525, + "learning_rate": 3.9543033747069355e-06, + "loss": 0.0009, + "step": 190330 + }, + { + "epoch": 1.2207816363245712, + "grad_norm": 0.014402428641915321, + "learning_rate": 3.95375605524748e-06, + "loss": 0.0014, + "step": 190340 + }, + { + "epoch": 1.2208457732183573, + "grad_norm": 0.11958342790603638, + "learning_rate": 3.953208748898208e-06, + "loss": 0.0012, + "step": 190350 + }, + { + "epoch": 1.2209099101121434, + "grad_norm": 0.07521621882915497, + "learning_rate": 3.952661455665978e-06, + "loss": 0.0016, + "step": 190360 + }, + { + "epoch": 1.2209740470059294, + "grad_norm": 0.11715155839920044, + "learning_rate": 3.952114175557649e-06, + "loss": 0.0051, + "step": 190370 + }, + { + "epoch": 1.2210381838997155, + "grad_norm": 0.0628410205245018, + "learning_rate": 3.951566908580078e-06, + "loss": 0.0013, + "step": 190380 + }, + { + "epoch": 1.2211023207935017, + "grad_norm": 0.01683405227959156, + "learning_rate": 3.951019654740124e-06, + "loss": 0.0009, + "step": 190390 + }, + { + "epoch": 1.2211664576872878, + "grad_norm": 0.17968235909938812, + "learning_rate": 3.950472414044642e-06, + "loss": 0.0014, + "step": 190400 + }, + { + "epoch": 1.2212305945810737, + "grad_norm": 0.05620647221803665, + "learning_rate": 3.949925186500492e-06, + "loss": 0.0024, + "step": 190410 + }, + { + "epoch": 1.22129473147486, + "grad_norm": 0.12351801246404648, + "learning_rate": 3.949377972114529e-06, + "loss": 0.0014, + "step": 190420 + }, + { + "epoch": 1.221358868368646, + "grad_norm": 0.1264180839061737, + "learning_rate": 3.948830770893612e-06, + "loss": 0.0019, + "step": 190430 + }, + { + "epoch": 1.2214230052624322, + "grad_norm": 0.11731204390525818, + "learning_rate": 3.948283582844595e-06, + "loss": 0.0027, + "step": 190440 + }, + { + "epoch": 1.2214871421562181, + "grad_norm": 0.06938435882329941, + "learning_rate": 3.9477364079743386e-06, + "loss": 0.0011, + "step": 190450 + }, + { + "epoch": 1.2215512790500043, + "grad_norm": 0.06576994806528091, + "learning_rate": 3.947189246289695e-06, + "loss": 0.0016, + "step": 190460 + }, + { + "epoch": 1.2216154159437904, + "grad_norm": 0.09067405760288239, + "learning_rate": 3.946642097797524e-06, + "loss": 0.0016, + "step": 190470 + }, + { + "epoch": 1.2216795528375766, + "grad_norm": 0.0050344220362603664, + "learning_rate": 3.946094962504679e-06, + "loss": 0.0013, + "step": 190480 + }, + { + "epoch": 1.2217436897313627, + "grad_norm": 0.05471819266676903, + "learning_rate": 3.945547840418017e-06, + "loss": 0.0011, + "step": 190490 + }, + { + "epoch": 1.2218078266251486, + "grad_norm": 0.12141404300928116, + "learning_rate": 3.945000731544396e-06, + "loss": 0.0047, + "step": 190500 + }, + { + "epoch": 1.2218719635189348, + "grad_norm": 0.011203257367014885, + "learning_rate": 3.944453635890668e-06, + "loss": 0.0017, + "step": 190510 + }, + { + "epoch": 1.221936100412721, + "grad_norm": 0.15600796043872833, + "learning_rate": 3.943906553463693e-06, + "loss": 0.0044, + "step": 190520 + }, + { + "epoch": 1.222000237306507, + "grad_norm": 0.060006964951753616, + "learning_rate": 3.943359484270321e-06, + "loss": 0.0025, + "step": 190530 + }, + { + "epoch": 1.222064374200293, + "grad_norm": 0.06969252228736877, + "learning_rate": 3.9428124283174116e-06, + "loss": 0.0012, + "step": 190540 + }, + { + "epoch": 1.2221285110940792, + "grad_norm": 0.1027282178401947, + "learning_rate": 3.942265385611817e-06, + "loss": 0.0013, + "step": 190550 + }, + { + "epoch": 1.2221926479878653, + "grad_norm": 0.010004268027842045, + "learning_rate": 3.941718356160393e-06, + "loss": 0.0009, + "step": 190560 + }, + { + "epoch": 1.2222567848816515, + "grad_norm": 0.04696183651685715, + "learning_rate": 3.941171339969995e-06, + "loss": 0.0028, + "step": 190570 + }, + { + "epoch": 1.2223209217754376, + "grad_norm": 0.02193659543991089, + "learning_rate": 3.9406243370474776e-06, + "loss": 0.0019, + "step": 190580 + }, + { + "epoch": 1.2223850586692235, + "grad_norm": 0.11040711402893066, + "learning_rate": 3.940077347399693e-06, + "loss": 0.0019, + "step": 190590 + }, + { + "epoch": 1.2224491955630097, + "grad_norm": 0.08952760696411133, + "learning_rate": 3.939530371033496e-06, + "loss": 0.0018, + "step": 190600 + }, + { + "epoch": 1.2225133324567958, + "grad_norm": 0.0674658939242363, + "learning_rate": 3.938983407955742e-06, + "loss": 0.001, + "step": 190610 + }, + { + "epoch": 1.2225774693505818, + "grad_norm": 0.16172173619270325, + "learning_rate": 3.938436458173286e-06, + "loss": 0.0022, + "step": 190620 + }, + { + "epoch": 1.222641606244368, + "grad_norm": 0.03608992323279381, + "learning_rate": 3.937889521692978e-06, + "loss": 0.0013, + "step": 190630 + }, + { + "epoch": 1.222705743138154, + "grad_norm": 0.05420034006237984, + "learning_rate": 3.9373425985216725e-06, + "loss": 0.0022, + "step": 190640 + }, + { + "epoch": 1.2227698800319402, + "grad_norm": 0.038899801671504974, + "learning_rate": 3.936795688666226e-06, + "loss": 0.002, + "step": 190650 + }, + { + "epoch": 1.2228340169257264, + "grad_norm": 0.14675188064575195, + "learning_rate": 3.936248792133488e-06, + "loss": 0.002, + "step": 190660 + }, + { + "epoch": 1.2228981538195123, + "grad_norm": 0.03291318193078041, + "learning_rate": 3.935701908930314e-06, + "loss": 0.0014, + "step": 190670 + }, + { + "epoch": 1.2229622907132984, + "grad_norm": 0.22007884085178375, + "learning_rate": 3.935155039063555e-06, + "loss": 0.0022, + "step": 190680 + }, + { + "epoch": 1.2230264276070846, + "grad_norm": 0.023851877078413963, + "learning_rate": 3.934608182540065e-06, + "loss": 0.0006, + "step": 190690 + }, + { + "epoch": 1.2230905645008707, + "grad_norm": 0.01468751858919859, + "learning_rate": 3.934061339366694e-06, + "loss": 0.0015, + "step": 190700 + }, + { + "epoch": 1.2231547013946567, + "grad_norm": 0.08310827612876892, + "learning_rate": 3.933514509550298e-06, + "loss": 0.0014, + "step": 190710 + }, + { + "epoch": 1.2232188382884428, + "grad_norm": 0.0643293634057045, + "learning_rate": 3.932967693097727e-06, + "loss": 0.0061, + "step": 190720 + }, + { + "epoch": 1.223282975182229, + "grad_norm": 0.025908587500452995, + "learning_rate": 3.932420890015834e-06, + "loss": 0.0015, + "step": 190730 + }, + { + "epoch": 1.223347112076015, + "grad_norm": 0.0017419327050447464, + "learning_rate": 3.931874100311468e-06, + "loss": 0.0029, + "step": 190740 + }, + { + "epoch": 1.2234112489698012, + "grad_norm": 0.07896079123020172, + "learning_rate": 3.931327323991484e-06, + "loss": 0.0016, + "step": 190750 + }, + { + "epoch": 1.2234753858635872, + "grad_norm": 0.03480801731348038, + "learning_rate": 3.930780561062733e-06, + "loss": 0.0015, + "step": 190760 + }, + { + "epoch": 1.2235395227573733, + "grad_norm": 0.10877152532339096, + "learning_rate": 3.930233811532066e-06, + "loss": 0.001, + "step": 190770 + }, + { + "epoch": 1.2236036596511595, + "grad_norm": 0.14966726303100586, + "learning_rate": 3.929687075406332e-06, + "loss": 0.0016, + "step": 190780 + }, + { + "epoch": 1.2236677965449456, + "grad_norm": 0.09734129905700684, + "learning_rate": 3.929140352692387e-06, + "loss": 0.0013, + "step": 190790 + }, + { + "epoch": 1.2237319334387315, + "grad_norm": 0.11551859229803085, + "learning_rate": 3.928593643397076e-06, + "loss": 0.0016, + "step": 190800 + }, + { + "epoch": 1.2237960703325177, + "grad_norm": 0.07634082436561584, + "learning_rate": 3.928046947527254e-06, + "loss": 0.0014, + "step": 190810 + }, + { + "epoch": 1.2238602072263038, + "grad_norm": 0.04561929404735565, + "learning_rate": 3.927500265089769e-06, + "loss": 0.0021, + "step": 190820 + }, + { + "epoch": 1.22392434412009, + "grad_norm": 0.026405436918139458, + "learning_rate": 3.926953596091473e-06, + "loss": 0.0006, + "step": 190830 + }, + { + "epoch": 1.2239884810138761, + "grad_norm": 0.05360418185591698, + "learning_rate": 3.926406940539215e-06, + "loss": 0.0008, + "step": 190840 + }, + { + "epoch": 1.224052617907662, + "grad_norm": 0.10631723701953888, + "learning_rate": 3.925860298439845e-06, + "loss": 0.0015, + "step": 190850 + }, + { + "epoch": 1.2241167548014482, + "grad_norm": 0.044981710612773895, + "learning_rate": 3.925313669800213e-06, + "loss": 0.0016, + "step": 190860 + }, + { + "epoch": 1.2241808916952344, + "grad_norm": 0.006090010050684214, + "learning_rate": 3.92476705462717e-06, + "loss": 0.0021, + "step": 190870 + }, + { + "epoch": 1.2242450285890203, + "grad_norm": 0.07183487713336945, + "learning_rate": 3.924220452927563e-06, + "loss": 0.0028, + "step": 190880 + }, + { + "epoch": 1.2243091654828064, + "grad_norm": 0.10996677726507187, + "learning_rate": 3.9236738647082435e-06, + "loss": 0.0014, + "step": 190890 + }, + { + "epoch": 1.2243733023765926, + "grad_norm": 0.12826906144618988, + "learning_rate": 3.923127289976059e-06, + "loss": 0.0012, + "step": 190900 + }, + { + "epoch": 1.2244374392703787, + "grad_norm": 0.06789213418960571, + "learning_rate": 3.922580728737858e-06, + "loss": 0.0011, + "step": 190910 + }, + { + "epoch": 1.2245015761641649, + "grad_norm": 0.18491800129413605, + "learning_rate": 3.922034181000492e-06, + "loss": 0.0033, + "step": 190920 + }, + { + "epoch": 1.2245657130579508, + "grad_norm": 0.04658259078860283, + "learning_rate": 3.921487646770808e-06, + "loss": 0.0015, + "step": 190930 + }, + { + "epoch": 1.224629849951737, + "grad_norm": 0.16573143005371094, + "learning_rate": 3.9209411260556555e-06, + "loss": 0.0019, + "step": 190940 + }, + { + "epoch": 1.224693986845523, + "grad_norm": 0.04675838351249695, + "learning_rate": 3.92039461886188e-06, + "loss": 0.0013, + "step": 190950 + }, + { + "epoch": 1.2247581237393093, + "grad_norm": 0.025246139615774155, + "learning_rate": 3.919848125196333e-06, + "loss": 0.0014, + "step": 190960 + }, + { + "epoch": 1.2248222606330952, + "grad_norm": 0.046005018055438995, + "learning_rate": 3.919301645065861e-06, + "loss": 0.0042, + "step": 190970 + }, + { + "epoch": 1.2248863975268813, + "grad_norm": 0.14488635957241058, + "learning_rate": 3.918755178477311e-06, + "loss": 0.0034, + "step": 190980 + }, + { + "epoch": 1.2249505344206675, + "grad_norm": 0.0798855572938919, + "learning_rate": 3.918208725437531e-06, + "loss": 0.002, + "step": 190990 + }, + { + "epoch": 1.2250146713144536, + "grad_norm": 0.03376210480928421, + "learning_rate": 3.9176622859533695e-06, + "loss": 0.002, + "step": 191000 + }, + { + "epoch": 1.2250788082082398, + "grad_norm": 0.07722453027963638, + "learning_rate": 3.917115860031673e-06, + "loss": 0.0014, + "step": 191010 + }, + { + "epoch": 1.2251429451020257, + "grad_norm": 0.17970149219036102, + "learning_rate": 3.91656944767929e-06, + "loss": 0.0018, + "step": 191020 + }, + { + "epoch": 1.2252070819958119, + "grad_norm": 0.2177116721868515, + "learning_rate": 3.916023048903063e-06, + "loss": 0.0013, + "step": 191030 + }, + { + "epoch": 1.225271218889598, + "grad_norm": 0.1654694676399231, + "learning_rate": 3.915476663709845e-06, + "loss": 0.0024, + "step": 191040 + }, + { + "epoch": 1.2253353557833842, + "grad_norm": 0.10967286676168442, + "learning_rate": 3.914930292106477e-06, + "loss": 0.002, + "step": 191050 + }, + { + "epoch": 1.22539949267717, + "grad_norm": 0.09925783425569534, + "learning_rate": 3.914383934099809e-06, + "loss": 0.0017, + "step": 191060 + }, + { + "epoch": 1.2254636295709562, + "grad_norm": 0.2531794309616089, + "learning_rate": 3.913837589696687e-06, + "loss": 0.0018, + "step": 191070 + }, + { + "epoch": 1.2255277664647424, + "grad_norm": 0.025740792974829674, + "learning_rate": 3.913291258903955e-06, + "loss": 0.001, + "step": 191080 + }, + { + "epoch": 1.2255919033585285, + "grad_norm": 0.02483651600778103, + "learning_rate": 3.912744941728461e-06, + "loss": 0.0018, + "step": 191090 + }, + { + "epoch": 1.2256560402523144, + "grad_norm": 0.04175538569688797, + "learning_rate": 3.912198638177049e-06, + "loss": 0.0017, + "step": 191100 + }, + { + "epoch": 1.2257201771461006, + "grad_norm": 0.2951119840145111, + "learning_rate": 3.911652348256567e-06, + "loss": 0.0015, + "step": 191110 + }, + { + "epoch": 1.2257843140398867, + "grad_norm": 0.3507690131664276, + "learning_rate": 3.911106071973858e-06, + "loss": 0.0015, + "step": 191120 + }, + { + "epoch": 1.225848450933673, + "grad_norm": 0.06317662447690964, + "learning_rate": 3.910559809335769e-06, + "loss": 0.002, + "step": 191130 + }, + { + "epoch": 1.2259125878274588, + "grad_norm": 0.0028338609263300896, + "learning_rate": 3.910013560349143e-06, + "loss": 0.0012, + "step": 191140 + }, + { + "epoch": 1.225976724721245, + "grad_norm": 0.03527184948325157, + "learning_rate": 3.909467325020826e-06, + "loss": 0.0017, + "step": 191150 + }, + { + "epoch": 1.2260408616150311, + "grad_norm": 0.014810839667916298, + "learning_rate": 3.908921103357663e-06, + "loss": 0.0022, + "step": 191160 + }, + { + "epoch": 1.2261049985088173, + "grad_norm": 0.1442299634218216, + "learning_rate": 3.908374895366499e-06, + "loss": 0.0015, + "step": 191170 + }, + { + "epoch": 1.2261691354026034, + "grad_norm": 0.15262559056282043, + "learning_rate": 3.907828701054177e-06, + "loss": 0.0017, + "step": 191180 + }, + { + "epoch": 1.2262332722963893, + "grad_norm": 0.03376549482345581, + "learning_rate": 3.907282520427543e-06, + "loss": 0.0015, + "step": 191190 + }, + { + "epoch": 1.2262974091901755, + "grad_norm": 0.16245616972446442, + "learning_rate": 3.9067363534934385e-06, + "loss": 0.0017, + "step": 191200 + }, + { + "epoch": 1.2263615460839616, + "grad_norm": 0.09553241729736328, + "learning_rate": 3.906190200258709e-06, + "loss": 0.0025, + "step": 191210 + }, + { + "epoch": 1.2264256829777478, + "grad_norm": 0.05627526715397835, + "learning_rate": 3.9056440607302e-06, + "loss": 0.0011, + "step": 191220 + }, + { + "epoch": 1.2264898198715337, + "grad_norm": 0.028180358931422234, + "learning_rate": 3.905097934914751e-06, + "loss": 0.002, + "step": 191230 + }, + { + "epoch": 1.2265539567653199, + "grad_norm": 0.07987452298402786, + "learning_rate": 3.9045518228192085e-06, + "loss": 0.0014, + "step": 191240 + }, + { + "epoch": 1.226618093659106, + "grad_norm": 0.04825502634048462, + "learning_rate": 3.904005724450413e-06, + "loss": 0.0015, + "step": 191250 + }, + { + "epoch": 1.2266822305528922, + "grad_norm": 0.0545743964612484, + "learning_rate": 3.903459639815212e-06, + "loss": 0.0023, + "step": 191260 + }, + { + "epoch": 1.2267463674466783, + "grad_norm": 0.1914282590150833, + "learning_rate": 3.902913568920443e-06, + "loss": 0.0016, + "step": 191270 + }, + { + "epoch": 1.2268105043404642, + "grad_norm": 0.05901381000876427, + "learning_rate": 3.902367511772952e-06, + "loss": 0.0011, + "step": 191280 + }, + { + "epoch": 1.2268746412342504, + "grad_norm": 0.07726976275444031, + "learning_rate": 3.90182146837958e-06, + "loss": 0.0011, + "step": 191290 + }, + { + "epoch": 1.2269387781280365, + "grad_norm": 0.08275720477104187, + "learning_rate": 3.901275438747171e-06, + "loss": 0.0018, + "step": 191300 + }, + { + "epoch": 1.2270029150218225, + "grad_norm": 0.09165611863136292, + "learning_rate": 3.900729422882564e-06, + "loss": 0.0014, + "step": 191310 + }, + { + "epoch": 1.2270670519156086, + "grad_norm": 0.0632534846663475, + "learning_rate": 3.900183420792605e-06, + "loss": 0.0014, + "step": 191320 + }, + { + "epoch": 1.2271311888093948, + "grad_norm": 0.024312160909175873, + "learning_rate": 3.899637432484132e-06, + "loss": 0.0022, + "step": 191330 + }, + { + "epoch": 1.227195325703181, + "grad_norm": 0.05788853392004967, + "learning_rate": 3.899091457963989e-06, + "loss": 0.0012, + "step": 191340 + }, + { + "epoch": 1.227259462596967, + "grad_norm": 0.0016625310527160764, + "learning_rate": 3.898545497239018e-06, + "loss": 0.0011, + "step": 191350 + }, + { + "epoch": 1.227323599490753, + "grad_norm": 0.13773536682128906, + "learning_rate": 3.897999550316057e-06, + "loss": 0.002, + "step": 191360 + }, + { + "epoch": 1.2273877363845391, + "grad_norm": 0.0493728332221508, + "learning_rate": 3.8974536172019514e-06, + "loss": 0.0017, + "step": 191370 + }, + { + "epoch": 1.2274518732783253, + "grad_norm": 0.1362447440624237, + "learning_rate": 3.896907697903538e-06, + "loss": 0.0019, + "step": 191380 + }, + { + "epoch": 1.2275160101721114, + "grad_norm": 0.0662623718380928, + "learning_rate": 3.896361792427662e-06, + "loss": 0.0015, + "step": 191390 + }, + { + "epoch": 1.2275801470658974, + "grad_norm": 0.264980286359787, + "learning_rate": 3.895815900781159e-06, + "loss": 0.0027, + "step": 191400 + }, + { + "epoch": 1.2276442839596835, + "grad_norm": 0.20691430568695068, + "learning_rate": 3.895270022970873e-06, + "loss": 0.002, + "step": 191410 + }, + { + "epoch": 1.2277084208534697, + "grad_norm": 0.045289549976587296, + "learning_rate": 3.8947241590036425e-06, + "loss": 0.0023, + "step": 191420 + }, + { + "epoch": 1.2277725577472558, + "grad_norm": 0.07116792351007462, + "learning_rate": 3.8941783088863094e-06, + "loss": 0.0022, + "step": 191430 + }, + { + "epoch": 1.227836694641042, + "grad_norm": 0.09005143493413925, + "learning_rate": 3.893632472625711e-06, + "loss": 0.0018, + "step": 191440 + }, + { + "epoch": 1.2279008315348279, + "grad_norm": 0.001668777083978057, + "learning_rate": 3.89308665022869e-06, + "loss": 0.0017, + "step": 191450 + }, + { + "epoch": 1.227964968428614, + "grad_norm": 0.11686328798532486, + "learning_rate": 3.892540841702083e-06, + "loss": 0.0012, + "step": 191460 + }, + { + "epoch": 1.2280291053224002, + "grad_norm": 0.04654378071427345, + "learning_rate": 3.8919950470527326e-06, + "loss": 0.0018, + "step": 191470 + }, + { + "epoch": 1.2280932422161863, + "grad_norm": 0.0269235260784626, + "learning_rate": 3.891449266287474e-06, + "loss": 0.0007, + "step": 191480 + }, + { + "epoch": 1.2281573791099722, + "grad_norm": 0.11890744417905807, + "learning_rate": 3.890903499413148e-06, + "loss": 0.0028, + "step": 191490 + }, + { + "epoch": 1.2282215160037584, + "grad_norm": 0.17330829799175262, + "learning_rate": 3.890357746436594e-06, + "loss": 0.0032, + "step": 191500 + }, + { + "epoch": 1.2282856528975445, + "grad_norm": 0.04410916566848755, + "learning_rate": 3.8898120073646515e-06, + "loss": 0.0006, + "step": 191510 + }, + { + "epoch": 1.2283497897913307, + "grad_norm": 0.046860549598932266, + "learning_rate": 3.889266282204158e-06, + "loss": 0.002, + "step": 191520 + }, + { + "epoch": 1.2284139266851166, + "grad_norm": 0.060270313173532486, + "learning_rate": 3.888720570961952e-06, + "loss": 0.0012, + "step": 191530 + }, + { + "epoch": 1.2284780635789028, + "grad_norm": 0.2538609802722931, + "learning_rate": 3.888174873644871e-06, + "loss": 0.0019, + "step": 191540 + }, + { + "epoch": 1.228542200472689, + "grad_norm": 0.04857063293457031, + "learning_rate": 3.887629190259754e-06, + "loss": 0.0013, + "step": 191550 + }, + { + "epoch": 1.228606337366475, + "grad_norm": 0.16587914526462555, + "learning_rate": 3.887083520813438e-06, + "loss": 0.0052, + "step": 191560 + }, + { + "epoch": 1.228670474260261, + "grad_norm": 0.050497233867645264, + "learning_rate": 3.886537865312761e-06, + "loss": 0.0007, + "step": 191570 + }, + { + "epoch": 1.2287346111540471, + "grad_norm": 0.010793877765536308, + "learning_rate": 3.88599222376456e-06, + "loss": 0.0021, + "step": 191580 + }, + { + "epoch": 1.2287987480478333, + "grad_norm": 0.1318853795528412, + "learning_rate": 3.885446596175673e-06, + "loss": 0.0014, + "step": 191590 + }, + { + "epoch": 1.2288628849416194, + "grad_norm": 0.17666688561439514, + "learning_rate": 3.884900982552936e-06, + "loss": 0.0017, + "step": 191600 + }, + { + "epoch": 1.2289270218354056, + "grad_norm": 0.04485802352428436, + "learning_rate": 3.884355382903187e-06, + "loss": 0.0012, + "step": 191610 + }, + { + "epoch": 1.2289911587291915, + "grad_norm": 0.05705088749527931, + "learning_rate": 3.8838097972332625e-06, + "loss": 0.0005, + "step": 191620 + }, + { + "epoch": 1.2290552956229777, + "grad_norm": 0.12357481569051743, + "learning_rate": 3.883264225549999e-06, + "loss": 0.0012, + "step": 191630 + }, + { + "epoch": 1.2291194325167638, + "grad_norm": 0.08283048868179321, + "learning_rate": 3.882718667860233e-06, + "loss": 0.0009, + "step": 191640 + }, + { + "epoch": 1.22918356941055, + "grad_norm": 0.041520070284605026, + "learning_rate": 3.8821731241708e-06, + "loss": 0.0045, + "step": 191650 + }, + { + "epoch": 1.2292477063043359, + "grad_norm": 0.030925313010811806, + "learning_rate": 3.881627594488538e-06, + "loss": 0.0031, + "step": 191660 + }, + { + "epoch": 1.229311843198122, + "grad_norm": 0.06241489574313164, + "learning_rate": 3.88108207882028e-06, + "loss": 0.0036, + "step": 191670 + }, + { + "epoch": 1.2293759800919082, + "grad_norm": 0.0917147770524025, + "learning_rate": 3.8805365771728646e-06, + "loss": 0.0022, + "step": 191680 + }, + { + "epoch": 1.2294401169856943, + "grad_norm": 0.31132322549819946, + "learning_rate": 3.879991089553125e-06, + "loss": 0.0018, + "step": 191690 + }, + { + "epoch": 1.2295042538794805, + "grad_norm": 0.061488717794418335, + "learning_rate": 3.879445615967897e-06, + "loss": 0.0008, + "step": 191700 + }, + { + "epoch": 1.2295683907732664, + "grad_norm": 0.24310879409313202, + "learning_rate": 3.878900156424017e-06, + "loss": 0.0019, + "step": 191710 + }, + { + "epoch": 1.2296325276670526, + "grad_norm": 0.00773091334849596, + "learning_rate": 3.87835471092832e-06, + "loss": 0.0012, + "step": 191720 + }, + { + "epoch": 1.2296966645608387, + "grad_norm": 0.055883124470710754, + "learning_rate": 3.877809279487638e-06, + "loss": 0.0017, + "step": 191730 + }, + { + "epoch": 1.2297608014546246, + "grad_norm": 0.06533777713775635, + "learning_rate": 3.87726386210881e-06, + "loss": 0.0012, + "step": 191740 + }, + { + "epoch": 1.2298249383484108, + "grad_norm": 0.038957130163908005, + "learning_rate": 3.876718458798667e-06, + "loss": 0.0011, + "step": 191750 + }, + { + "epoch": 1.229889075242197, + "grad_norm": 0.04319208115339279, + "learning_rate": 3.876173069564045e-06, + "loss": 0.0008, + "step": 191760 + }, + { + "epoch": 1.229953212135983, + "grad_norm": 0.03332489728927612, + "learning_rate": 3.875627694411778e-06, + "loss": 0.0011, + "step": 191770 + }, + { + "epoch": 1.2300173490297692, + "grad_norm": 0.05945609509944916, + "learning_rate": 3.875082333348699e-06, + "loss": 0.0015, + "step": 191780 + }, + { + "epoch": 1.2300814859235552, + "grad_norm": 0.11271724104881287, + "learning_rate": 3.874536986381643e-06, + "loss": 0.0015, + "step": 191790 + }, + { + "epoch": 1.2301456228173413, + "grad_norm": 0.06493068486452103, + "learning_rate": 3.873991653517441e-06, + "loss": 0.0011, + "step": 191800 + }, + { + "epoch": 1.2302097597111274, + "grad_norm": 0.008765444159507751, + "learning_rate": 3.87344633476293e-06, + "loss": 0.0024, + "step": 191810 + }, + { + "epoch": 1.2302738966049136, + "grad_norm": 0.0825427919626236, + "learning_rate": 3.872901030124941e-06, + "loss": 0.001, + "step": 191820 + }, + { + "epoch": 1.2303380334986995, + "grad_norm": 0.16172820329666138, + "learning_rate": 3.8723557396103084e-06, + "loss": 0.0012, + "step": 191830 + }, + { + "epoch": 1.2304021703924857, + "grad_norm": 0.05429816246032715, + "learning_rate": 3.871810463225863e-06, + "loss": 0.002, + "step": 191840 + }, + { + "epoch": 1.2304663072862718, + "grad_norm": 0.008258895017206669, + "learning_rate": 3.8712652009784404e-06, + "loss": 0.0019, + "step": 191850 + }, + { + "epoch": 1.230530444180058, + "grad_norm": 0.20216979086399078, + "learning_rate": 3.8707199528748694e-06, + "loss": 0.0014, + "step": 191860 + }, + { + "epoch": 1.2305945810738441, + "grad_norm": 0.01203607302159071, + "learning_rate": 3.870174718921987e-06, + "loss": 0.0009, + "step": 191870 + }, + { + "epoch": 1.23065871796763, + "grad_norm": 0.012646661140024662, + "learning_rate": 3.869629499126621e-06, + "loss": 0.0013, + "step": 191880 + }, + { + "epoch": 1.2307228548614162, + "grad_norm": 0.0920863151550293, + "learning_rate": 3.869084293495606e-06, + "loss": 0.0021, + "step": 191890 + }, + { + "epoch": 1.2307869917552023, + "grad_norm": 0.01504463329911232, + "learning_rate": 3.868539102035772e-06, + "loss": 0.0015, + "step": 191900 + }, + { + "epoch": 1.2308511286489885, + "grad_norm": 0.05006036534905434, + "learning_rate": 3.867993924753952e-06, + "loss": 0.0011, + "step": 191910 + }, + { + "epoch": 1.2309152655427744, + "grad_norm": 0.20346948504447937, + "learning_rate": 3.867448761656979e-06, + "loss": 0.0019, + "step": 191920 + }, + { + "epoch": 1.2309794024365606, + "grad_norm": 0.0386669859290123, + "learning_rate": 3.86690361275168e-06, + "loss": 0.0015, + "step": 191930 + }, + { + "epoch": 1.2310435393303467, + "grad_norm": 0.05314434692263603, + "learning_rate": 3.8663584780448905e-06, + "loss": 0.0019, + "step": 191940 + }, + { + "epoch": 1.2311076762241329, + "grad_norm": 0.02070430852472782, + "learning_rate": 3.865813357543438e-06, + "loss": 0.0025, + "step": 191950 + }, + { + "epoch": 1.2311718131179188, + "grad_norm": 0.1890772134065628, + "learning_rate": 3.865268251254156e-06, + "loss": 0.0019, + "step": 191960 + }, + { + "epoch": 1.231235950011705, + "grad_norm": 0.06091101095080376, + "learning_rate": 3.864723159183873e-06, + "loss": 0.0019, + "step": 191970 + }, + { + "epoch": 1.231300086905491, + "grad_norm": 0.08680860698223114, + "learning_rate": 3.86417808133942e-06, + "loss": 0.0012, + "step": 191980 + }, + { + "epoch": 1.2313642237992772, + "grad_norm": 0.0516524463891983, + "learning_rate": 3.863633017727628e-06, + "loss": 0.0015, + "step": 191990 + }, + { + "epoch": 1.2314283606930632, + "grad_norm": 0.0733397975564003, + "learning_rate": 3.863087968355327e-06, + "loss": 0.0059, + "step": 192000 + }, + { + "epoch": 1.2314924975868493, + "grad_norm": 0.03720054402947426, + "learning_rate": 3.862542933229345e-06, + "loss": 0.001, + "step": 192010 + }, + { + "epoch": 1.2315566344806355, + "grad_norm": 0.03688714653253555, + "learning_rate": 3.861997912356513e-06, + "loss": 0.0018, + "step": 192020 + }, + { + "epoch": 1.2316207713744216, + "grad_norm": 0.05015870928764343, + "learning_rate": 3.861452905743661e-06, + "loss": 0.0015, + "step": 192030 + }, + { + "epoch": 1.2316849082682078, + "grad_norm": 0.15229342877864838, + "learning_rate": 3.8609079133976176e-06, + "loss": 0.0012, + "step": 192040 + }, + { + "epoch": 1.2317490451619937, + "grad_norm": 0.16295719146728516, + "learning_rate": 3.860362935325213e-06, + "loss": 0.0018, + "step": 192050 + }, + { + "epoch": 1.2318131820557798, + "grad_norm": 0.035660114139318466, + "learning_rate": 3.859817971533273e-06, + "loss": 0.0017, + "step": 192060 + }, + { + "epoch": 1.231877318949566, + "grad_norm": 0.030620815232396126, + "learning_rate": 3.859273022028631e-06, + "loss": 0.0026, + "step": 192070 + }, + { + "epoch": 1.2319414558433521, + "grad_norm": 0.1859862059354782, + "learning_rate": 3.858728086818112e-06, + "loss": 0.0014, + "step": 192080 + }, + { + "epoch": 1.232005592737138, + "grad_norm": 0.009169869124889374, + "learning_rate": 3.8581831659085474e-06, + "loss": 0.0015, + "step": 192090 + }, + { + "epoch": 1.2320697296309242, + "grad_norm": 0.01506097987294197, + "learning_rate": 3.8576382593067616e-06, + "loss": 0.0011, + "step": 192100 + }, + { + "epoch": 1.2321338665247104, + "grad_norm": 0.03212521970272064, + "learning_rate": 3.857093367019587e-06, + "loss": 0.0014, + "step": 192110 + }, + { + "epoch": 1.2321980034184965, + "grad_norm": 0.23379331827163696, + "learning_rate": 3.856548489053847e-06, + "loss": 0.0022, + "step": 192120 + }, + { + "epoch": 1.2322621403122827, + "grad_norm": 0.0456843227148056, + "learning_rate": 3.856003625416374e-06, + "loss": 0.0013, + "step": 192130 + }, + { + "epoch": 1.2323262772060686, + "grad_norm": 0.07977654039859772, + "learning_rate": 3.855458776113991e-06, + "loss": 0.0015, + "step": 192140 + }, + { + "epoch": 1.2323904140998547, + "grad_norm": 0.09059882909059525, + "learning_rate": 3.85491394115353e-06, + "loss": 0.0009, + "step": 192150 + }, + { + "epoch": 1.2324545509936409, + "grad_norm": 0.23698201775550842, + "learning_rate": 3.854369120541814e-06, + "loss": 0.0018, + "step": 192160 + }, + { + "epoch": 1.2325186878874268, + "grad_norm": 0.2211540937423706, + "learning_rate": 3.853824314285672e-06, + "loss": 0.0021, + "step": 192170 + }, + { + "epoch": 1.232582824781213, + "grad_norm": 0.0691179633140564, + "learning_rate": 3.85327952239193e-06, + "loss": 0.0018, + "step": 192180 + }, + { + "epoch": 1.232646961674999, + "grad_norm": 0.03404263034462929, + "learning_rate": 3.852734744867415e-06, + "loss": 0.0014, + "step": 192190 + }, + { + "epoch": 1.2327110985687852, + "grad_norm": 0.022241869941353798, + "learning_rate": 3.852189981718955e-06, + "loss": 0.0021, + "step": 192200 + }, + { + "epoch": 1.2327752354625714, + "grad_norm": 0.10520397126674652, + "learning_rate": 3.851645232953373e-06, + "loss": 0.0012, + "step": 192210 + }, + { + "epoch": 1.2328393723563573, + "grad_norm": 0.13404682278633118, + "learning_rate": 3.851100498577499e-06, + "loss": 0.0015, + "step": 192220 + }, + { + "epoch": 1.2329035092501435, + "grad_norm": 0.07412949204444885, + "learning_rate": 3.8505557785981555e-06, + "loss": 0.0013, + "step": 192230 + }, + { + "epoch": 1.2329676461439296, + "grad_norm": 0.06194952502846718, + "learning_rate": 3.8500110730221705e-06, + "loss": 0.0012, + "step": 192240 + }, + { + "epoch": 1.2330317830377158, + "grad_norm": 0.09382586926221848, + "learning_rate": 3.849466381856367e-06, + "loss": 0.0011, + "step": 192250 + }, + { + "epoch": 1.2330959199315017, + "grad_norm": 0.08719581365585327, + "learning_rate": 3.848921705107574e-06, + "loss": 0.0029, + "step": 192260 + }, + { + "epoch": 1.2331600568252878, + "grad_norm": 0.012377760373055935, + "learning_rate": 3.8483770427826125e-06, + "loss": 0.0016, + "step": 192270 + }, + { + "epoch": 1.233224193719074, + "grad_norm": 0.00940409954637289, + "learning_rate": 3.84783239488831e-06, + "loss": 0.0022, + "step": 192280 + }, + { + "epoch": 1.2332883306128601, + "grad_norm": 0.006755847949534655, + "learning_rate": 3.847287761431493e-06, + "loss": 0.0015, + "step": 192290 + }, + { + "epoch": 1.2333524675066463, + "grad_norm": 0.07054046541452408, + "learning_rate": 3.846743142418983e-06, + "loss": 0.0016, + "step": 192300 + }, + { + "epoch": 1.2334166044004322, + "grad_norm": 0.3278670310974121, + "learning_rate": 3.846198537857606e-06, + "loss": 0.001, + "step": 192310 + }, + { + "epoch": 1.2334807412942184, + "grad_norm": 0.038489580154418945, + "learning_rate": 3.845653947754186e-06, + "loss": 0.0008, + "step": 192320 + }, + { + "epoch": 1.2335448781880045, + "grad_norm": 0.38875246047973633, + "learning_rate": 3.845109372115545e-06, + "loss": 0.0021, + "step": 192330 + }, + { + "epoch": 1.2336090150817907, + "grad_norm": 0.07522208243608475, + "learning_rate": 3.844564810948511e-06, + "loss": 0.0011, + "step": 192340 + }, + { + "epoch": 1.2336731519755766, + "grad_norm": 0.06156201288104057, + "learning_rate": 3.844020264259906e-06, + "loss": 0.0014, + "step": 192350 + }, + { + "epoch": 1.2337372888693627, + "grad_norm": 0.024865301325917244, + "learning_rate": 3.843475732056553e-06, + "loss": 0.0012, + "step": 192360 + }, + { + "epoch": 1.2338014257631489, + "grad_norm": 0.09704294055700302, + "learning_rate": 3.842931214345275e-06, + "loss": 0.0014, + "step": 192370 + }, + { + "epoch": 1.233865562656935, + "grad_norm": 0.05504216253757477, + "learning_rate": 3.8423867111328965e-06, + "loss": 0.0014, + "step": 192380 + }, + { + "epoch": 1.2339296995507212, + "grad_norm": 0.17114925384521484, + "learning_rate": 3.841842222426239e-06, + "loss": 0.0024, + "step": 192390 + }, + { + "epoch": 1.233993836444507, + "grad_norm": 0.15212135016918182, + "learning_rate": 3.8412977482321275e-06, + "loss": 0.0015, + "step": 192400 + }, + { + "epoch": 1.2340579733382933, + "grad_norm": 0.11747518926858902, + "learning_rate": 3.840753288557382e-06, + "loss": 0.0013, + "step": 192410 + }, + { + "epoch": 1.2341221102320794, + "grad_norm": 0.016201209276914597, + "learning_rate": 3.840208843408827e-06, + "loss": 0.0015, + "step": 192420 + }, + { + "epoch": 1.2341862471258653, + "grad_norm": 0.0847243070602417, + "learning_rate": 3.8396644127932835e-06, + "loss": 0.0015, + "step": 192430 + }, + { + "epoch": 1.2342503840196515, + "grad_norm": 0.13864751160144806, + "learning_rate": 3.839119996717576e-06, + "loss": 0.0028, + "step": 192440 + }, + { + "epoch": 1.2343145209134376, + "grad_norm": 0.09642212837934494, + "learning_rate": 3.838575595188522e-06, + "loss": 0.0016, + "step": 192450 + }, + { + "epoch": 1.2343786578072238, + "grad_norm": 0.01905280351638794, + "learning_rate": 3.838031208212947e-06, + "loss": 0.0016, + "step": 192460 + }, + { + "epoch": 1.23444279470101, + "grad_norm": 0.06971075385808945, + "learning_rate": 3.837486835797672e-06, + "loss": 0.0009, + "step": 192470 + }, + { + "epoch": 1.2345069315947959, + "grad_norm": 0.03184329718351364, + "learning_rate": 3.836942477949517e-06, + "loss": 0.0019, + "step": 192480 + }, + { + "epoch": 1.234571068488582, + "grad_norm": 0.04337267577648163, + "learning_rate": 3.836398134675304e-06, + "loss": 0.0018, + "step": 192490 + }, + { + "epoch": 1.2346352053823682, + "grad_norm": 1.0930646657943726, + "learning_rate": 3.835853805981854e-06, + "loss": 0.0012, + "step": 192500 + }, + { + "epoch": 1.2346993422761543, + "grad_norm": 0.06390325725078583, + "learning_rate": 3.835309491875989e-06, + "loss": 0.0013, + "step": 192510 + }, + { + "epoch": 1.2347634791699402, + "grad_norm": 0.07073578983545303, + "learning_rate": 3.834765192364527e-06, + "loss": 0.0027, + "step": 192520 + }, + { + "epoch": 1.2348276160637264, + "grad_norm": 0.05255277454853058, + "learning_rate": 3.834220907454291e-06, + "loss": 0.0014, + "step": 192530 + }, + { + "epoch": 1.2348917529575125, + "grad_norm": 0.01460962649434805, + "learning_rate": 3.833676637152099e-06, + "loss": 0.0013, + "step": 192540 + }, + { + "epoch": 1.2349558898512987, + "grad_norm": 0.02730894461274147, + "learning_rate": 3.833132381464773e-06, + "loss": 0.001, + "step": 192550 + }, + { + "epoch": 1.2350200267450848, + "grad_norm": 0.011701928451657295, + "learning_rate": 3.832588140399131e-06, + "loss": 0.001, + "step": 192560 + }, + { + "epoch": 1.2350841636388707, + "grad_norm": 0.08750313520431519, + "learning_rate": 3.832043913961997e-06, + "loss": 0.0016, + "step": 192570 + }, + { + "epoch": 1.235148300532657, + "grad_norm": 0.06760246306657791, + "learning_rate": 3.8314997021601846e-06, + "loss": 0.0025, + "step": 192580 + }, + { + "epoch": 1.235212437426443, + "grad_norm": 0.02466864138841629, + "learning_rate": 3.830955505000518e-06, + "loss": 0.0027, + "step": 192590 + }, + { + "epoch": 1.2352765743202292, + "grad_norm": 0.02973290905356407, + "learning_rate": 3.830411322489812e-06, + "loss": 0.0014, + "step": 192600 + }, + { + "epoch": 1.2353407112140151, + "grad_norm": 0.04889494553208351, + "learning_rate": 3.829867154634889e-06, + "loss": 0.0016, + "step": 192610 + }, + { + "epoch": 1.2354048481078013, + "grad_norm": 0.10187707841396332, + "learning_rate": 3.829323001442568e-06, + "loss": 0.0013, + "step": 192620 + }, + { + "epoch": 1.2354689850015874, + "grad_norm": 0.1182231605052948, + "learning_rate": 3.8287788629196655e-06, + "loss": 0.0017, + "step": 192630 + }, + { + "epoch": 1.2355331218953736, + "grad_norm": 0.07204227894544601, + "learning_rate": 3.8282347390730015e-06, + "loss": 0.0006, + "step": 192640 + }, + { + "epoch": 1.2355972587891595, + "grad_norm": 0.09443531930446625, + "learning_rate": 3.827690629909393e-06, + "loss": 0.001, + "step": 192650 + }, + { + "epoch": 1.2356613956829456, + "grad_norm": 0.03801809251308441, + "learning_rate": 3.82714653543566e-06, + "loss": 0.001, + "step": 192660 + }, + { + "epoch": 1.2357255325767318, + "grad_norm": 0.10350924730300903, + "learning_rate": 3.8266024556586164e-06, + "loss": 0.001, + "step": 192670 + }, + { + "epoch": 1.235789669470518, + "grad_norm": 0.13763123750686646, + "learning_rate": 3.826058390585086e-06, + "loss": 0.003, + "step": 192680 + }, + { + "epoch": 1.2358538063643039, + "grad_norm": 0.09756220132112503, + "learning_rate": 3.82551434022188e-06, + "loss": 0.0008, + "step": 192690 + }, + { + "epoch": 1.23591794325809, + "grad_norm": 0.09421417117118835, + "learning_rate": 3.824970304575821e-06, + "loss": 0.0017, + "step": 192700 + }, + { + "epoch": 1.2359820801518762, + "grad_norm": 0.08317472785711288, + "learning_rate": 3.824426283653723e-06, + "loss": 0.0016, + "step": 192710 + }, + { + "epoch": 1.2360462170456623, + "grad_norm": 0.17168138921260834, + "learning_rate": 3.823882277462404e-06, + "loss": 0.0015, + "step": 192720 + }, + { + "epoch": 1.2361103539394485, + "grad_norm": 0.025490155443549156, + "learning_rate": 3.82333828600868e-06, + "loss": 0.0011, + "step": 192730 + }, + { + "epoch": 1.2361744908332344, + "grad_norm": 0.13631509244441986, + "learning_rate": 3.822794309299369e-06, + "loss": 0.0009, + "step": 192740 + }, + { + "epoch": 1.2362386277270205, + "grad_norm": 0.28083887696266174, + "learning_rate": 3.822250347341286e-06, + "loss": 0.0026, + "step": 192750 + }, + { + "epoch": 1.2363027646208067, + "grad_norm": 0.17060667276382446, + "learning_rate": 3.8217064001412475e-06, + "loss": 0.0015, + "step": 192760 + }, + { + "epoch": 1.2363669015145928, + "grad_norm": 0.10084377229213715, + "learning_rate": 3.821162467706071e-06, + "loss": 0.0011, + "step": 192770 + }, + { + "epoch": 1.2364310384083788, + "grad_norm": 0.13612842559814453, + "learning_rate": 3.820618550042571e-06, + "loss": 0.003, + "step": 192780 + }, + { + "epoch": 1.236495175302165, + "grad_norm": 0.030563458800315857, + "learning_rate": 3.8200746471575635e-06, + "loss": 0.0029, + "step": 192790 + }, + { + "epoch": 1.236559312195951, + "grad_norm": 0.053338564932346344, + "learning_rate": 3.819530759057863e-06, + "loss": 0.0017, + "step": 192800 + }, + { + "epoch": 1.2366234490897372, + "grad_norm": 0.02539745718240738, + "learning_rate": 3.818986885750287e-06, + "loss": 0.0023, + "step": 192810 + }, + { + "epoch": 1.2366875859835234, + "grad_norm": 0.03159044310450554, + "learning_rate": 3.818443027241648e-06, + "loss": 0.0016, + "step": 192820 + }, + { + "epoch": 1.2367517228773093, + "grad_norm": 0.11882440745830536, + "learning_rate": 3.817899183538765e-06, + "loss": 0.0015, + "step": 192830 + }, + { + "epoch": 1.2368158597710954, + "grad_norm": 0.1520904302597046, + "learning_rate": 3.8173553546484475e-06, + "loss": 0.0014, + "step": 192840 + }, + { + "epoch": 1.2368799966648816, + "grad_norm": 0.025404080748558044, + "learning_rate": 3.816811540577513e-06, + "loss": 0.0013, + "step": 192850 + }, + { + "epoch": 1.2369441335586675, + "grad_norm": 0.23404638469219208, + "learning_rate": 3.8162677413327755e-06, + "loss": 0.0015, + "step": 192860 + }, + { + "epoch": 1.2370082704524537, + "grad_norm": 0.17075438797473907, + "learning_rate": 3.815723956921051e-06, + "loss": 0.0019, + "step": 192870 + }, + { + "epoch": 1.2370724073462398, + "grad_norm": 0.1039583832025528, + "learning_rate": 3.815180187349149e-06, + "loss": 0.0018, + "step": 192880 + }, + { + "epoch": 1.237136544240026, + "grad_norm": 0.0913623720407486, + "learning_rate": 3.8146364326238884e-06, + "loss": 0.0011, + "step": 192890 + }, + { + "epoch": 1.237200681133812, + "grad_norm": 0.11237393319606781, + "learning_rate": 3.8140926927520795e-06, + "loss": 0.0037, + "step": 192900 + }, + { + "epoch": 1.237264818027598, + "grad_norm": 0.12427417188882828, + "learning_rate": 3.8135489677405358e-06, + "loss": 0.0015, + "step": 192910 + }, + { + "epoch": 1.2373289549213842, + "grad_norm": 0.010974938049912453, + "learning_rate": 3.813005257596073e-06, + "loss": 0.0017, + "step": 192920 + }, + { + "epoch": 1.2373930918151703, + "grad_norm": 0.08517418056726456, + "learning_rate": 3.8124615623255012e-06, + "loss": 0.0021, + "step": 192930 + }, + { + "epoch": 1.2374572287089565, + "grad_norm": 0.006680286023765802, + "learning_rate": 3.8119178819356362e-06, + "loss": 0.0019, + "step": 192940 + }, + { + "epoch": 1.2375213656027424, + "grad_norm": 0.2067411094903946, + "learning_rate": 3.811374216433288e-06, + "loss": 0.0011, + "step": 192950 + }, + { + "epoch": 1.2375855024965285, + "grad_norm": 0.3387925326824188, + "learning_rate": 3.8108305658252716e-06, + "loss": 0.0016, + "step": 192960 + }, + { + "epoch": 1.2376496393903147, + "grad_norm": 0.09132348001003265, + "learning_rate": 3.810286930118397e-06, + "loss": 0.0021, + "step": 192970 + }, + { + "epoch": 1.2377137762841008, + "grad_norm": 0.20602019131183624, + "learning_rate": 3.809743309319478e-06, + "loss": 0.0019, + "step": 192980 + }, + { + "epoch": 1.237777913177887, + "grad_norm": 0.16022828221321106, + "learning_rate": 3.8091997034353256e-06, + "loss": 0.0028, + "step": 192990 + }, + { + "epoch": 1.237842050071673, + "grad_norm": 0.0357336662709713, + "learning_rate": 3.808656112472753e-06, + "loss": 0.0039, + "step": 193000 + }, + { + "epoch": 1.237906186965459, + "grad_norm": 0.0015852287178859115, + "learning_rate": 3.8081125364385697e-06, + "loss": 0.0023, + "step": 193010 + }, + { + "epoch": 1.2379703238592452, + "grad_norm": 0.06841090321540833, + "learning_rate": 3.807568975339588e-06, + "loss": 0.0012, + "step": 193020 + }, + { + "epoch": 1.2380344607530314, + "grad_norm": 0.0926680862903595, + "learning_rate": 3.80702542918262e-06, + "loss": 0.0025, + "step": 193030 + }, + { + "epoch": 1.2380985976468173, + "grad_norm": 0.1930045485496521, + "learning_rate": 3.806481897974474e-06, + "loss": 0.0018, + "step": 193040 + }, + { + "epoch": 1.2381627345406034, + "grad_norm": 0.10639449208974838, + "learning_rate": 3.8059383817219644e-06, + "loss": 0.0014, + "step": 193050 + }, + { + "epoch": 1.2382268714343896, + "grad_norm": 0.11121968179941177, + "learning_rate": 3.805394880431901e-06, + "loss": 0.0025, + "step": 193060 + }, + { + "epoch": 1.2382910083281757, + "grad_norm": 0.02013281360268593, + "learning_rate": 3.804851394111092e-06, + "loss": 0.001, + "step": 193070 + }, + { + "epoch": 1.2383551452219617, + "grad_norm": 0.3018694818019867, + "learning_rate": 3.8043079227663504e-06, + "loss": 0.0021, + "step": 193080 + }, + { + "epoch": 1.2384192821157478, + "grad_norm": 0.06745024770498276, + "learning_rate": 3.8037644664044845e-06, + "loss": 0.0017, + "step": 193090 + }, + { + "epoch": 1.238483419009534, + "grad_norm": 0.04946301877498627, + "learning_rate": 3.803221025032305e-06, + "loss": 0.001, + "step": 193100 + }, + { + "epoch": 1.23854755590332, + "grad_norm": 0.011163666844367981, + "learning_rate": 3.802677598656621e-06, + "loss": 0.0028, + "step": 193110 + }, + { + "epoch": 1.238611692797106, + "grad_norm": 0.0768004059791565, + "learning_rate": 3.802134187284244e-06, + "loss": 0.0014, + "step": 193120 + }, + { + "epoch": 1.2386758296908922, + "grad_norm": 0.10020840913057327, + "learning_rate": 3.8015907909219797e-06, + "loss": 0.0007, + "step": 193130 + }, + { + "epoch": 1.2387399665846783, + "grad_norm": 0.005092281382530928, + "learning_rate": 3.80104740957664e-06, + "loss": 0.001, + "step": 193140 + }, + { + "epoch": 1.2388041034784645, + "grad_norm": 0.03567197546362877, + "learning_rate": 3.8005040432550334e-06, + "loss": 0.0019, + "step": 193150 + }, + { + "epoch": 1.2388682403722506, + "grad_norm": 0.1282946914434433, + "learning_rate": 3.7999606919639687e-06, + "loss": 0.0027, + "step": 193160 + }, + { + "epoch": 1.2389323772660366, + "grad_norm": 0.020889919251203537, + "learning_rate": 3.799417355710253e-06, + "loss": 0.0013, + "step": 193170 + }, + { + "epoch": 1.2389965141598227, + "grad_norm": 0.08679716289043427, + "learning_rate": 3.798874034500696e-06, + "loss": 0.002, + "step": 193180 + }, + { + "epoch": 1.2390606510536089, + "grad_norm": 0.11425179988145828, + "learning_rate": 3.798330728342107e-06, + "loss": 0.0013, + "step": 193190 + }, + { + "epoch": 1.239124787947395, + "grad_norm": 0.032662127166986465, + "learning_rate": 3.797787437241292e-06, + "loss": 0.0012, + "step": 193200 + }, + { + "epoch": 1.239188924841181, + "grad_norm": 0.1015615239739418, + "learning_rate": 3.79724416120506e-06, + "loss": 0.0022, + "step": 193210 + }, + { + "epoch": 1.239253061734967, + "grad_norm": 0.27617600560188293, + "learning_rate": 3.7967009002402176e-06, + "loss": 0.0016, + "step": 193220 + }, + { + "epoch": 1.2393171986287532, + "grad_norm": 0.11621396988630295, + "learning_rate": 3.7961576543535745e-06, + "loss": 0.0015, + "step": 193230 + }, + { + "epoch": 1.2393813355225394, + "grad_norm": 0.028171326965093613, + "learning_rate": 3.7956144235519343e-06, + "loss": 0.0016, + "step": 193240 + }, + { + "epoch": 1.2394454724163255, + "grad_norm": 0.039086826145648956, + "learning_rate": 3.795071207842108e-06, + "loss": 0.0024, + "step": 193250 + }, + { + "epoch": 1.2395096093101114, + "grad_norm": 0.1428215205669403, + "learning_rate": 3.794528007230899e-06, + "loss": 0.0018, + "step": 193260 + }, + { + "epoch": 1.2395737462038976, + "grad_norm": 0.006755185779184103, + "learning_rate": 3.793984821725117e-06, + "loss": 0.003, + "step": 193270 + }, + { + "epoch": 1.2396378830976837, + "grad_norm": 0.023260431364178658, + "learning_rate": 3.793441651331566e-06, + "loss": 0.0026, + "step": 193280 + }, + { + "epoch": 1.2397020199914697, + "grad_norm": 0.03583270683884621, + "learning_rate": 3.7928984960570542e-06, + "loss": 0.0008, + "step": 193290 + }, + { + "epoch": 1.2397661568852558, + "grad_norm": 0.04842666909098625, + "learning_rate": 3.792355355908386e-06, + "loss": 0.0012, + "step": 193300 + }, + { + "epoch": 1.239830293779042, + "grad_norm": 0.07545134425163269, + "learning_rate": 3.7918122308923682e-06, + "loss": 0.0017, + "step": 193310 + }, + { + "epoch": 1.2398944306728281, + "grad_norm": 0.12448123842477798, + "learning_rate": 3.7912691210158083e-06, + "loss": 0.0015, + "step": 193320 + }, + { + "epoch": 1.2399585675666143, + "grad_norm": 0.07896067202091217, + "learning_rate": 3.7907260262855083e-06, + "loss": 0.0014, + "step": 193330 + }, + { + "epoch": 1.2400227044604002, + "grad_norm": 0.11490961909294128, + "learning_rate": 3.790182946708277e-06, + "loss": 0.0018, + "step": 193340 + }, + { + "epoch": 1.2400868413541863, + "grad_norm": 0.12536735832691193, + "learning_rate": 3.7896398822909166e-06, + "loss": 0.0007, + "step": 193350 + }, + { + "epoch": 1.2401509782479725, + "grad_norm": 0.04173259809613228, + "learning_rate": 3.789096833040235e-06, + "loss": 0.0013, + "step": 193360 + }, + { + "epoch": 1.2402151151417586, + "grad_norm": 0.11744063347578049, + "learning_rate": 3.788553798963034e-06, + "loss": 0.001, + "step": 193370 + }, + { + "epoch": 1.2402792520355446, + "grad_norm": 0.05858948081731796, + "learning_rate": 3.788010780066121e-06, + "loss": 0.0014, + "step": 193380 + }, + { + "epoch": 1.2403433889293307, + "grad_norm": 0.30108389258384705, + "learning_rate": 3.7874677763562982e-06, + "loss": 0.0008, + "step": 193390 + }, + { + "epoch": 1.2404075258231169, + "grad_norm": 0.07916703075170517, + "learning_rate": 3.7869247878403714e-06, + "loss": 0.0015, + "step": 193400 + }, + { + "epoch": 1.240471662716903, + "grad_norm": 0.039126984775066376, + "learning_rate": 3.7863818145251437e-06, + "loss": 0.0009, + "step": 193410 + }, + { + "epoch": 1.2405357996106892, + "grad_norm": 0.08537046611309052, + "learning_rate": 3.7858388564174197e-06, + "loss": 0.0014, + "step": 193420 + }, + { + "epoch": 1.240599936504475, + "grad_norm": 0.052776217460632324, + "learning_rate": 3.7852959135240016e-06, + "loss": 0.0009, + "step": 193430 + }, + { + "epoch": 1.2406640733982612, + "grad_norm": 0.03654957935214043, + "learning_rate": 3.7847529858516948e-06, + "loss": 0.0017, + "step": 193440 + }, + { + "epoch": 1.2407282102920474, + "grad_norm": 0.12807244062423706, + "learning_rate": 3.7842100734073006e-06, + "loss": 0.0012, + "step": 193450 + }, + { + "epoch": 1.2407923471858335, + "grad_norm": 0.04586097598075867, + "learning_rate": 3.7836671761976228e-06, + "loss": 0.0009, + "step": 193460 + }, + { + "epoch": 1.2408564840796195, + "grad_norm": 0.028308771550655365, + "learning_rate": 3.783124294229466e-06, + "loss": 0.0028, + "step": 193470 + }, + { + "epoch": 1.2409206209734056, + "grad_norm": 0.08119162172079086, + "learning_rate": 3.7825814275096306e-06, + "loss": 0.0013, + "step": 193480 + }, + { + "epoch": 1.2409847578671918, + "grad_norm": 0.06081259623169899, + "learning_rate": 3.782038576044921e-06, + "loss": 0.0016, + "step": 193490 + }, + { + "epoch": 1.241048894760978, + "grad_norm": 0.06799715757369995, + "learning_rate": 3.781495739842137e-06, + "loss": 0.002, + "step": 193500 + }, + { + "epoch": 1.2411130316547638, + "grad_norm": 0.049341876059770584, + "learning_rate": 3.7809529189080837e-06, + "loss": 0.0017, + "step": 193510 + }, + { + "epoch": 1.24117716854855, + "grad_norm": 0.052619002759456635, + "learning_rate": 3.78041011324956e-06, + "loss": 0.0014, + "step": 193520 + }, + { + "epoch": 1.2412413054423361, + "grad_norm": 0.09561476856470108, + "learning_rate": 3.779867322873371e-06, + "loss": 0.0016, + "step": 193530 + }, + { + "epoch": 1.2413054423361223, + "grad_norm": 0.056680865585803986, + "learning_rate": 3.779324547786315e-06, + "loss": 0.0018, + "step": 193540 + }, + { + "epoch": 1.2413695792299082, + "grad_norm": 0.09741806983947754, + "learning_rate": 3.778781787995196e-06, + "loss": 0.0023, + "step": 193550 + }, + { + "epoch": 1.2414337161236944, + "grad_norm": 0.05924725532531738, + "learning_rate": 3.778239043506813e-06, + "loss": 0.0011, + "step": 193560 + }, + { + "epoch": 1.2414978530174805, + "grad_norm": 0.05741953104734421, + "learning_rate": 3.777696314327969e-06, + "loss": 0.0025, + "step": 193570 + }, + { + "epoch": 1.2415619899112667, + "grad_norm": 0.09546720236539841, + "learning_rate": 3.777153600465462e-06, + "loss": 0.0016, + "step": 193580 + }, + { + "epoch": 1.2416261268050528, + "grad_norm": 0.009045228362083435, + "learning_rate": 3.776610901926096e-06, + "loss": 0.005, + "step": 193590 + }, + { + "epoch": 1.2416902636988387, + "grad_norm": 0.19120807945728302, + "learning_rate": 3.7760682187166685e-06, + "loss": 0.0016, + "step": 193600 + }, + { + "epoch": 1.2417544005926249, + "grad_norm": 0.14705318212509155, + "learning_rate": 3.7755255508439813e-06, + "loss": 0.0019, + "step": 193610 + }, + { + "epoch": 1.241818537486411, + "grad_norm": 0.06438015401363373, + "learning_rate": 3.7749828983148352e-06, + "loss": 0.0028, + "step": 193620 + }, + { + "epoch": 1.2418826743801972, + "grad_norm": 0.0024094143882393837, + "learning_rate": 3.7744402611360276e-06, + "loss": 0.0009, + "step": 193630 + }, + { + "epoch": 1.241946811273983, + "grad_norm": 0.04401683062314987, + "learning_rate": 3.77389763931436e-06, + "loss": 0.0017, + "step": 193640 + }, + { + "epoch": 1.2420109481677692, + "grad_norm": 0.031052129343152046, + "learning_rate": 3.7733550328566308e-06, + "loss": 0.0015, + "step": 193650 + }, + { + "epoch": 1.2420750850615554, + "grad_norm": 0.07920389622449875, + "learning_rate": 3.772812441769641e-06, + "loss": 0.0013, + "step": 193660 + }, + { + "epoch": 1.2421392219553415, + "grad_norm": 0.02331819385290146, + "learning_rate": 3.7722698660601864e-06, + "loss": 0.0017, + "step": 193670 + }, + { + "epoch": 1.2422033588491277, + "grad_norm": 0.147809699177742, + "learning_rate": 3.7717273057350697e-06, + "loss": 0.001, + "step": 193680 + }, + { + "epoch": 1.2422674957429136, + "grad_norm": 0.22028706967830658, + "learning_rate": 3.7711847608010867e-06, + "loss": 0.0018, + "step": 193690 + }, + { + "epoch": 1.2423316326366998, + "grad_norm": 0.05800031125545502, + "learning_rate": 3.770642231265037e-06, + "loss": 0.001, + "step": 193700 + }, + { + "epoch": 1.242395769530486, + "grad_norm": 0.18299369513988495, + "learning_rate": 3.7700997171337184e-06, + "loss": 0.0011, + "step": 193710 + }, + { + "epoch": 1.2424599064242718, + "grad_norm": 0.20967887341976166, + "learning_rate": 3.76955721841393e-06, + "loss": 0.0027, + "step": 193720 + }, + { + "epoch": 1.242524043318058, + "grad_norm": 0.011691104620695114, + "learning_rate": 3.769014735112468e-06, + "loss": 0.0011, + "step": 193730 + }, + { + "epoch": 1.2425881802118441, + "grad_norm": 0.34137067198753357, + "learning_rate": 3.7684722672361328e-06, + "loss": 0.001, + "step": 193740 + }, + { + "epoch": 1.2426523171056303, + "grad_norm": 0.0727938637137413, + "learning_rate": 3.7679298147917186e-06, + "loss": 0.0016, + "step": 193750 + }, + { + "epoch": 1.2427164539994164, + "grad_norm": 0.04760196432471275, + "learning_rate": 3.767387377786025e-06, + "loss": 0.0013, + "step": 193760 + }, + { + "epoch": 1.2427805908932024, + "grad_norm": 0.28860580921173096, + "learning_rate": 3.766844956225849e-06, + "loss": 0.0012, + "step": 193770 + }, + { + "epoch": 1.2428447277869885, + "grad_norm": 0.04993847385048866, + "learning_rate": 3.766302550117986e-06, + "loss": 0.0013, + "step": 193780 + }, + { + "epoch": 1.2429088646807747, + "grad_norm": 0.06602156162261963, + "learning_rate": 3.765760159469235e-06, + "loss": 0.0029, + "step": 193790 + }, + { + "epoch": 1.2429730015745608, + "grad_norm": 0.027851253747940063, + "learning_rate": 3.765217784286389e-06, + "loss": 0.0009, + "step": 193800 + }, + { + "epoch": 1.2430371384683467, + "grad_norm": 0.10883858799934387, + "learning_rate": 3.764675424576248e-06, + "loss": 0.0011, + "step": 193810 + }, + { + "epoch": 1.2431012753621329, + "grad_norm": 0.053645309060811996, + "learning_rate": 3.764133080345608e-06, + "loss": 0.0011, + "step": 193820 + }, + { + "epoch": 1.243165412255919, + "grad_norm": 0.11289020627737045, + "learning_rate": 3.763590751601262e-06, + "loss": 0.0022, + "step": 193830 + }, + { + "epoch": 1.2432295491497052, + "grad_norm": 0.06499308347702026, + "learning_rate": 3.763048438350009e-06, + "loss": 0.0059, + "step": 193840 + }, + { + "epoch": 1.2432936860434913, + "grad_norm": 0.028254959732294083, + "learning_rate": 3.762506140598642e-06, + "loss": 0.0014, + "step": 193850 + }, + { + "epoch": 1.2433578229372773, + "grad_norm": 0.07718160003423691, + "learning_rate": 3.7619638583539587e-06, + "loss": 0.0014, + "step": 193860 + }, + { + "epoch": 1.2434219598310634, + "grad_norm": 0.036642514169216156, + "learning_rate": 3.7614215916227515e-06, + "loss": 0.0012, + "step": 193870 + }, + { + "epoch": 1.2434860967248496, + "grad_norm": 0.09880058467388153, + "learning_rate": 3.760879340411817e-06, + "loss": 0.0009, + "step": 193880 + }, + { + "epoch": 1.2435502336186357, + "grad_norm": 0.10891883075237274, + "learning_rate": 3.7603371047279517e-06, + "loss": 0.0015, + "step": 193890 + }, + { + "epoch": 1.2436143705124216, + "grad_norm": 0.2018737643957138, + "learning_rate": 3.759794884577947e-06, + "loss": 0.0014, + "step": 193900 + }, + { + "epoch": 1.2436785074062078, + "grad_norm": 0.0744648426771164, + "learning_rate": 3.7592526799686e-06, + "loss": 0.0009, + "step": 193910 + }, + { + "epoch": 1.243742644299994, + "grad_norm": 0.07834817469120026, + "learning_rate": 3.7587104909067024e-06, + "loss": 0.003, + "step": 193920 + }, + { + "epoch": 1.24380678119378, + "grad_norm": 0.06488928943872452, + "learning_rate": 3.758168317399051e-06, + "loss": 0.0017, + "step": 193930 + }, + { + "epoch": 1.2438709180875662, + "grad_norm": 0.08567792922258377, + "learning_rate": 3.757626159452436e-06, + "loss": 0.0008, + "step": 193940 + }, + { + "epoch": 1.2439350549813522, + "grad_norm": 0.07470423728227615, + "learning_rate": 3.757084017073655e-06, + "loss": 0.0012, + "step": 193950 + }, + { + "epoch": 1.2439991918751383, + "grad_norm": 0.03506644070148468, + "learning_rate": 3.7565418902694984e-06, + "loss": 0.0015, + "step": 193960 + }, + { + "epoch": 1.2440633287689244, + "grad_norm": 0.029685180634260178, + "learning_rate": 3.7559997790467616e-06, + "loss": 0.0024, + "step": 193970 + }, + { + "epoch": 1.2441274656627104, + "grad_norm": 0.1393328756093979, + "learning_rate": 3.755457683412236e-06, + "loss": 0.0013, + "step": 193980 + }, + { + "epoch": 1.2441916025564965, + "grad_norm": 0.08571834862232208, + "learning_rate": 3.754915603372715e-06, + "loss": 0.0022, + "step": 193990 + }, + { + "epoch": 1.2442557394502827, + "grad_norm": 0.06823822110891342, + "learning_rate": 3.7543735389349913e-06, + "loss": 0.0021, + "step": 194000 + }, + { + "epoch": 1.2443198763440688, + "grad_norm": 0.022043902426958084, + "learning_rate": 3.753831490105858e-06, + "loss": 0.0014, + "step": 194010 + }, + { + "epoch": 1.244384013237855, + "grad_norm": 0.09544781595468521, + "learning_rate": 3.7532894568921057e-06, + "loss": 0.0019, + "step": 194020 + }, + { + "epoch": 1.244448150131641, + "grad_norm": 0.21995370090007782, + "learning_rate": 3.752747439300528e-06, + "loss": 0.0015, + "step": 194030 + }, + { + "epoch": 1.244512287025427, + "grad_norm": 0.050760120153427124, + "learning_rate": 3.752205437337917e-06, + "loss": 0.0014, + "step": 194040 + }, + { + "epoch": 1.2445764239192132, + "grad_norm": 0.12651541829109192, + "learning_rate": 3.7516634510110628e-06, + "loss": 0.0031, + "step": 194050 + }, + { + "epoch": 1.2446405608129993, + "grad_norm": 0.08600185811519623, + "learning_rate": 3.751121480326758e-06, + "loss": 0.0017, + "step": 194060 + }, + { + "epoch": 1.2447046977067853, + "grad_norm": 0.3223367929458618, + "learning_rate": 3.750579525291793e-06, + "loss": 0.0033, + "step": 194070 + }, + { + "epoch": 1.2447688346005714, + "grad_norm": 0.2447756975889206, + "learning_rate": 3.7500375859129613e-06, + "loss": 0.0015, + "step": 194080 + }, + { + "epoch": 1.2448329714943576, + "grad_norm": 0.0507052056491375, + "learning_rate": 3.7494956621970503e-06, + "loss": 0.008, + "step": 194090 + }, + { + "epoch": 1.2448971083881437, + "grad_norm": 0.07987413555383682, + "learning_rate": 3.748953754150854e-06, + "loss": 0.0016, + "step": 194100 + }, + { + "epoch": 1.2449612452819299, + "grad_norm": 0.055381108075380325, + "learning_rate": 3.7484118617811593e-06, + "loss": 0.0012, + "step": 194110 + }, + { + "epoch": 1.2450253821757158, + "grad_norm": 0.06680621951818466, + "learning_rate": 3.7478699850947597e-06, + "loss": 0.0029, + "step": 194120 + }, + { + "epoch": 1.245089519069502, + "grad_norm": 0.15559466183185577, + "learning_rate": 3.7473281240984426e-06, + "loss": 0.0026, + "step": 194130 + }, + { + "epoch": 1.245153655963288, + "grad_norm": 0.11484317481517792, + "learning_rate": 3.7467862787990018e-06, + "loss": 0.0015, + "step": 194140 + }, + { + "epoch": 1.2452177928570742, + "grad_norm": 0.11385003477334976, + "learning_rate": 3.7462444492032223e-06, + "loss": 0.0025, + "step": 194150 + }, + { + "epoch": 1.2452819297508602, + "grad_norm": 0.027558807283639908, + "learning_rate": 3.7457026353178965e-06, + "loss": 0.0023, + "step": 194160 + }, + { + "epoch": 1.2453460666446463, + "grad_norm": 0.036400206387043, + "learning_rate": 3.7451608371498137e-06, + "loss": 0.0012, + "step": 194170 + }, + { + "epoch": 1.2454102035384325, + "grad_norm": 0.0022631220053881407, + "learning_rate": 3.7446190547057622e-06, + "loss": 0.0023, + "step": 194180 + }, + { + "epoch": 1.2454743404322186, + "grad_norm": 0.04237014800310135, + "learning_rate": 3.7440772879925313e-06, + "loss": 0.001, + "step": 194190 + }, + { + "epoch": 1.2455384773260045, + "grad_norm": 0.08778490126132965, + "learning_rate": 3.743535537016909e-06, + "loss": 0.0017, + "step": 194200 + }, + { + "epoch": 1.2456026142197907, + "grad_norm": 0.057334933429956436, + "learning_rate": 3.742993801785686e-06, + "loss": 0.0017, + "step": 194210 + }, + { + "epoch": 1.2456667511135768, + "grad_norm": 0.08290781825780869, + "learning_rate": 3.742452082305647e-06, + "loss": 0.0028, + "step": 194220 + }, + { + "epoch": 1.245730888007363, + "grad_norm": 0.042327698320150375, + "learning_rate": 3.7419103785835835e-06, + "loss": 0.001, + "step": 194230 + }, + { + "epoch": 1.245795024901149, + "grad_norm": 0.0877726674079895, + "learning_rate": 3.7413686906262813e-06, + "loss": 0.0018, + "step": 194240 + }, + { + "epoch": 1.245859161794935, + "grad_norm": 0.08212432265281677, + "learning_rate": 3.7408270184405293e-06, + "loss": 0.0013, + "step": 194250 + }, + { + "epoch": 1.2459232986887212, + "grad_norm": 0.047862276434898376, + "learning_rate": 3.740285362033115e-06, + "loss": 0.0015, + "step": 194260 + }, + { + "epoch": 1.2459874355825074, + "grad_norm": 0.03472644463181496, + "learning_rate": 3.739743721410825e-06, + "loss": 0.0019, + "step": 194270 + }, + { + "epoch": 1.2460515724762935, + "grad_norm": 0.045804962515830994, + "learning_rate": 3.7392020965804464e-06, + "loss": 0.001, + "step": 194280 + }, + { + "epoch": 1.2461157093700794, + "grad_norm": 0.03998519852757454, + "learning_rate": 3.7386604875487675e-06, + "loss": 0.0008, + "step": 194290 + }, + { + "epoch": 1.2461798462638656, + "grad_norm": 0.031306635588407516, + "learning_rate": 3.738118894322574e-06, + "loss": 0.0014, + "step": 194300 + }, + { + "epoch": 1.2462439831576517, + "grad_norm": 0.12886030972003937, + "learning_rate": 3.7375773169086515e-06, + "loss": 0.0012, + "step": 194310 + }, + { + "epoch": 1.2463081200514379, + "grad_norm": 0.0673123300075531, + "learning_rate": 3.7370357553137895e-06, + "loss": 0.0012, + "step": 194320 + }, + { + "epoch": 1.2463722569452238, + "grad_norm": 0.0477038212120533, + "learning_rate": 3.73649420954477e-06, + "loss": 0.0022, + "step": 194330 + }, + { + "epoch": 1.24643639383901, + "grad_norm": 0.0366206169128418, + "learning_rate": 3.735952679608382e-06, + "loss": 0.0012, + "step": 194340 + }, + { + "epoch": 1.246500530732796, + "grad_norm": 0.06619582325220108, + "learning_rate": 3.73541116551141e-06, + "loss": 0.0026, + "step": 194350 + }, + { + "epoch": 1.2465646676265822, + "grad_norm": 0.0772685706615448, + "learning_rate": 3.734869667260641e-06, + "loss": 0.0008, + "step": 194360 + }, + { + "epoch": 1.2466288045203684, + "grad_norm": 0.08183934539556503, + "learning_rate": 3.734328184862858e-06, + "loss": 0.0009, + "step": 194370 + }, + { + "epoch": 1.2466929414141543, + "grad_norm": 0.3086586892604828, + "learning_rate": 3.733786718324849e-06, + "loss": 0.0023, + "step": 194380 + }, + { + "epoch": 1.2467570783079405, + "grad_norm": 0.07298220694065094, + "learning_rate": 3.7332452676533954e-06, + "loss": 0.0013, + "step": 194390 + }, + { + "epoch": 1.2468212152017266, + "grad_norm": 0.022465460002422333, + "learning_rate": 3.732703832855286e-06, + "loss": 0.0017, + "step": 194400 + }, + { + "epoch": 1.2468853520955125, + "grad_norm": 0.04418754205107689, + "learning_rate": 3.732162413937301e-06, + "loss": 0.0007, + "step": 194410 + }, + { + "epoch": 1.2469494889892987, + "grad_norm": 0.13295075297355652, + "learning_rate": 3.731621010906229e-06, + "loss": 0.0017, + "step": 194420 + }, + { + "epoch": 1.2470136258830848, + "grad_norm": 0.10908317565917969, + "learning_rate": 3.7310796237688506e-06, + "loss": 0.0008, + "step": 194430 + }, + { + "epoch": 1.247077762776871, + "grad_norm": 0.2138085514307022, + "learning_rate": 3.7305382525319527e-06, + "loss": 0.0018, + "step": 194440 + }, + { + "epoch": 1.2471418996706571, + "grad_norm": 0.09427808225154877, + "learning_rate": 3.729996897202317e-06, + "loss": 0.0007, + "step": 194450 + }, + { + "epoch": 1.247206036564443, + "grad_norm": 0.11858411878347397, + "learning_rate": 3.7294555577867277e-06, + "loss": 0.0011, + "step": 194460 + }, + { + "epoch": 1.2472701734582292, + "grad_norm": 0.08571857213973999, + "learning_rate": 3.7289142342919695e-06, + "loss": 0.0008, + "step": 194470 + }, + { + "epoch": 1.2473343103520154, + "grad_norm": 0.0037415132392197847, + "learning_rate": 3.7283729267248226e-06, + "loss": 0.0012, + "step": 194480 + }, + { + "epoch": 1.2473984472458015, + "grad_norm": 0.027030210942029953, + "learning_rate": 3.727831635092073e-06, + "loss": 0.0009, + "step": 194490 + }, + { + "epoch": 1.2474625841395874, + "grad_norm": 0.06800957024097443, + "learning_rate": 3.7272903594005015e-06, + "loss": 0.0009, + "step": 194500 + }, + { + "epoch": 1.2475267210333736, + "grad_norm": 0.07498878240585327, + "learning_rate": 3.7267490996568924e-06, + "loss": 0.0014, + "step": 194510 + }, + { + "epoch": 1.2475908579271597, + "grad_norm": 0.0452398918569088, + "learning_rate": 3.726207855868026e-06, + "loss": 0.0023, + "step": 194520 + }, + { + "epoch": 1.2476549948209459, + "grad_norm": 0.1459784060716629, + "learning_rate": 3.7256666280406866e-06, + "loss": 0.0011, + "step": 194530 + }, + { + "epoch": 1.247719131714732, + "grad_norm": 0.15869516134262085, + "learning_rate": 3.7251254161816543e-06, + "loss": 0.0018, + "step": 194540 + }, + { + "epoch": 1.247783268608518, + "grad_norm": 0.01833445392549038, + "learning_rate": 3.7245842202977124e-06, + "loss": 0.0008, + "step": 194550 + }, + { + "epoch": 1.247847405502304, + "grad_norm": 0.06426620483398438, + "learning_rate": 3.7240430403956407e-06, + "loss": 0.0013, + "step": 194560 + }, + { + "epoch": 1.2479115423960903, + "grad_norm": 0.094499871134758, + "learning_rate": 3.7235018764822206e-06, + "loss": 0.0022, + "step": 194570 + }, + { + "epoch": 1.2479756792898764, + "grad_norm": 0.06422349065542221, + "learning_rate": 3.7229607285642354e-06, + "loss": 0.0026, + "step": 194580 + }, + { + "epoch": 1.2480398161836623, + "grad_norm": 0.04425198957324028, + "learning_rate": 3.7224195966484656e-06, + "loss": 0.0022, + "step": 194590 + }, + { + "epoch": 1.2481039530774485, + "grad_norm": 0.025903604924678802, + "learning_rate": 3.7218784807416906e-06, + "loss": 0.0014, + "step": 194600 + }, + { + "epoch": 1.2481680899712346, + "grad_norm": 0.05532994121313095, + "learning_rate": 3.7213373808506925e-06, + "loss": 0.001, + "step": 194610 + }, + { + "epoch": 1.2482322268650208, + "grad_norm": 0.20678691565990448, + "learning_rate": 3.720796296982251e-06, + "loss": 0.0019, + "step": 194620 + }, + { + "epoch": 1.2482963637588067, + "grad_norm": 0.041809648275375366, + "learning_rate": 3.720255229143146e-06, + "loss": 0.0011, + "step": 194630 + }, + { + "epoch": 1.2483605006525929, + "grad_norm": 0.04943133518099785, + "learning_rate": 3.719714177340157e-06, + "loss": 0.0019, + "step": 194640 + }, + { + "epoch": 1.248424637546379, + "grad_norm": 0.11647520959377289, + "learning_rate": 3.719173141580065e-06, + "loss": 0.0031, + "step": 194650 + }, + { + "epoch": 1.2484887744401651, + "grad_norm": 0.10881810635328293, + "learning_rate": 3.718632121869648e-06, + "loss": 0.0015, + "step": 194660 + }, + { + "epoch": 1.248552911333951, + "grad_norm": 0.07503806054592133, + "learning_rate": 3.7180911182156874e-06, + "loss": 0.0011, + "step": 194670 + }, + { + "epoch": 1.2486170482277372, + "grad_norm": 0.113071009516716, + "learning_rate": 3.7175501306249607e-06, + "loss": 0.0009, + "step": 194680 + }, + { + "epoch": 1.2486811851215234, + "grad_norm": 0.017294185236096382, + "learning_rate": 3.7170091591042478e-06, + "loss": 0.0017, + "step": 194690 + }, + { + "epoch": 1.2487453220153095, + "grad_norm": 0.01370396837592125, + "learning_rate": 3.716468203660326e-06, + "loss": 0.0017, + "step": 194700 + }, + { + "epoch": 1.2488094589090957, + "grad_norm": 0.11356718838214874, + "learning_rate": 3.715927264299977e-06, + "loss": 0.0006, + "step": 194710 + }, + { + "epoch": 1.2488735958028816, + "grad_norm": 0.1287156343460083, + "learning_rate": 3.7153863410299747e-06, + "loss": 0.0013, + "step": 194720 + }, + { + "epoch": 1.2489377326966677, + "grad_norm": 0.04905663803219795, + "learning_rate": 3.7148454338571e-06, + "loss": 0.0012, + "step": 194730 + }, + { + "epoch": 1.249001869590454, + "grad_norm": 0.2283930629491806, + "learning_rate": 3.714304542788132e-06, + "loss": 0.0018, + "step": 194740 + }, + { + "epoch": 1.24906600648424, + "grad_norm": 0.05940965563058853, + "learning_rate": 3.7137636678298457e-06, + "loss": 0.0017, + "step": 194750 + }, + { + "epoch": 1.249130143378026, + "grad_norm": 0.07616443932056427, + "learning_rate": 3.713222808989021e-06, + "loss": 0.0011, + "step": 194760 + }, + { + "epoch": 1.2491942802718121, + "grad_norm": 0.19304047524929047, + "learning_rate": 3.712681966272433e-06, + "loss": 0.0023, + "step": 194770 + }, + { + "epoch": 1.2492584171655983, + "grad_norm": 0.008352248929440975, + "learning_rate": 3.7121411396868614e-06, + "loss": 0.0026, + "step": 194780 + }, + { + "epoch": 1.2493225540593844, + "grad_norm": 0.0745990201830864, + "learning_rate": 3.71160032923908e-06, + "loss": 0.0015, + "step": 194790 + }, + { + "epoch": 1.2493866909531706, + "grad_norm": 0.029070056974887848, + "learning_rate": 3.7110595349358684e-06, + "loss": 0.0013, + "step": 194800 + }, + { + "epoch": 1.2494508278469565, + "grad_norm": 0.17112939059734344, + "learning_rate": 3.7105187567840016e-06, + "loss": 0.0012, + "step": 194810 + }, + { + "epoch": 1.2495149647407426, + "grad_norm": 0.0685807392001152, + "learning_rate": 3.709977994790257e-06, + "loss": 0.0014, + "step": 194820 + }, + { + "epoch": 1.2495791016345288, + "grad_norm": 0.11804874986410141, + "learning_rate": 3.7094372489614087e-06, + "loss": 0.0022, + "step": 194830 + }, + { + "epoch": 1.2496432385283147, + "grad_norm": 0.02202121540904045, + "learning_rate": 3.708896519304236e-06, + "loss": 0.0013, + "step": 194840 + }, + { + "epoch": 1.2497073754221009, + "grad_norm": 0.2399996519088745, + "learning_rate": 3.7083558058255107e-06, + "loss": 0.0017, + "step": 194850 + }, + { + "epoch": 1.249771512315887, + "grad_norm": 0.07525387406349182, + "learning_rate": 3.7078151085320117e-06, + "loss": 0.0018, + "step": 194860 + }, + { + "epoch": 1.2498356492096732, + "grad_norm": 0.09805445373058319, + "learning_rate": 3.707274427430512e-06, + "loss": 0.0014, + "step": 194870 + }, + { + "epoch": 1.2498997861034593, + "grad_norm": 0.14419735968112946, + "learning_rate": 3.706733762527788e-06, + "loss": 0.0025, + "step": 194880 + }, + { + "epoch": 1.2499639229972452, + "grad_norm": 0.11062031984329224, + "learning_rate": 3.706193113830614e-06, + "loss": 0.001, + "step": 194890 + }, + { + "epoch": 1.2500280598910314, + "grad_norm": 0.041483424603939056, + "learning_rate": 3.705652481345765e-06, + "loss": 0.0012, + "step": 194900 + }, + { + "epoch": 1.2500921967848175, + "grad_norm": 0.08496949076652527, + "learning_rate": 3.705111865080016e-06, + "loss": 0.0011, + "step": 194910 + }, + { + "epoch": 1.2501563336786037, + "grad_norm": 0.050424862653017044, + "learning_rate": 3.70457126504014e-06, + "loss": 0.0029, + "step": 194920 + }, + { + "epoch": 1.2502204705723896, + "grad_norm": 0.15065942704677582, + "learning_rate": 3.704030681232913e-06, + "loss": 0.0016, + "step": 194930 + }, + { + "epoch": 1.2502846074661758, + "grad_norm": 0.07603470981121063, + "learning_rate": 3.703490113665106e-06, + "loss": 0.0026, + "step": 194940 + }, + { + "epoch": 1.250348744359962, + "grad_norm": 0.1060481071472168, + "learning_rate": 3.702949562343496e-06, + "loss": 0.0011, + "step": 194950 + }, + { + "epoch": 1.250412881253748, + "grad_norm": 0.12667687237262726, + "learning_rate": 3.7024090272748547e-06, + "loss": 0.0019, + "step": 194960 + }, + { + "epoch": 1.2504770181475342, + "grad_norm": 0.04505964368581772, + "learning_rate": 3.7018685084659556e-06, + "loss": 0.0009, + "step": 194970 + }, + { + "epoch": 1.2505411550413201, + "grad_norm": 0.0329747311770916, + "learning_rate": 3.701328005923571e-06, + "loss": 0.002, + "step": 194980 + }, + { + "epoch": 1.2506052919351063, + "grad_norm": 0.049436502158641815, + "learning_rate": 3.700787519654476e-06, + "loss": 0.0012, + "step": 194990 + }, + { + "epoch": 1.2506694288288924, + "grad_norm": 0.06754521280527115, + "learning_rate": 3.700247049665441e-06, + "loss": 0.0018, + "step": 195000 + }, + { + "epoch": 1.2507335657226784, + "grad_norm": 0.028261858969926834, + "learning_rate": 3.6997065959632406e-06, + "loss": 0.0007, + "step": 195010 + }, + { + "epoch": 1.2507977026164645, + "grad_norm": 0.0627891793847084, + "learning_rate": 3.699166158554644e-06, + "loss": 0.0009, + "step": 195020 + }, + { + "epoch": 1.2508618395102507, + "grad_norm": 0.06759367138147354, + "learning_rate": 3.698625737446426e-06, + "loss": 0.0011, + "step": 195030 + }, + { + "epoch": 1.2509259764040368, + "grad_norm": 0.05140414461493492, + "learning_rate": 3.698085332645358e-06, + "loss": 0.0014, + "step": 195040 + }, + { + "epoch": 1.250990113297823, + "grad_norm": 0.11699049174785614, + "learning_rate": 3.6975449441582102e-06, + "loss": 0.0011, + "step": 195050 + }, + { + "epoch": 1.251054250191609, + "grad_norm": 0.1346450001001358, + "learning_rate": 3.6970045719917562e-06, + "loss": 0.0014, + "step": 195060 + }, + { + "epoch": 1.251118387085395, + "grad_norm": 0.04619720205664635, + "learning_rate": 3.6964642161527652e-06, + "loss": 0.0009, + "step": 195070 + }, + { + "epoch": 1.2511825239791812, + "grad_norm": 0.030455434694886208, + "learning_rate": 3.6959238766480105e-06, + "loss": 0.0018, + "step": 195080 + }, + { + "epoch": 1.2512466608729673, + "grad_norm": 0.0943940281867981, + "learning_rate": 3.6953835534842603e-06, + "loss": 0.0015, + "step": 195090 + }, + { + "epoch": 1.2513107977667532, + "grad_norm": 0.13932521641254425, + "learning_rate": 3.6948432466682875e-06, + "loss": 0.0009, + "step": 195100 + }, + { + "epoch": 1.2513749346605394, + "grad_norm": 0.15518449246883392, + "learning_rate": 3.694302956206861e-06, + "loss": 0.0012, + "step": 195110 + }, + { + "epoch": 1.2514390715543255, + "grad_norm": 0.04630433768033981, + "learning_rate": 3.6937626821067525e-06, + "loss": 0.0019, + "step": 195120 + }, + { + "epoch": 1.2515032084481117, + "grad_norm": 0.044243235141038895, + "learning_rate": 3.6932224243747296e-06, + "loss": 0.0011, + "step": 195130 + }, + { + "epoch": 1.2515673453418978, + "grad_norm": 0.11855106055736542, + "learning_rate": 3.692682183017565e-06, + "loss": 0.0012, + "step": 195140 + }, + { + "epoch": 1.2516314822356838, + "grad_norm": 0.07241770625114441, + "learning_rate": 3.6921419580420265e-06, + "loss": 0.0008, + "step": 195150 + }, + { + "epoch": 1.25169561912947, + "grad_norm": 0.06299462169408798, + "learning_rate": 3.691601749454883e-06, + "loss": 0.0029, + "step": 195160 + }, + { + "epoch": 1.251759756023256, + "grad_norm": 0.04221626743674278, + "learning_rate": 3.6910615572629066e-06, + "loss": 0.0012, + "step": 195170 + }, + { + "epoch": 1.2518238929170422, + "grad_norm": 0.09005774557590485, + "learning_rate": 3.690521381472863e-06, + "loss": 0.0014, + "step": 195180 + }, + { + "epoch": 1.2518880298108281, + "grad_norm": 0.07019791007041931, + "learning_rate": 3.6899812220915233e-06, + "loss": 0.0006, + "step": 195190 + }, + { + "epoch": 1.2519521667046143, + "grad_norm": 0.07578397542238235, + "learning_rate": 3.689441079125654e-06, + "loss": 0.0019, + "step": 195200 + }, + { + "epoch": 1.2520163035984004, + "grad_norm": 0.08820551633834839, + "learning_rate": 3.6889009525820264e-06, + "loss": 0.0022, + "step": 195210 + }, + { + "epoch": 1.2520804404921866, + "grad_norm": 0.03909339755773544, + "learning_rate": 3.688360842467405e-06, + "loss": 0.0013, + "step": 195220 + }, + { + "epoch": 1.2521445773859727, + "grad_norm": 0.04139334708452225, + "learning_rate": 3.687820748788561e-06, + "loss": 0.0007, + "step": 195230 + }, + { + "epoch": 1.2522087142797587, + "grad_norm": 0.07054650038480759, + "learning_rate": 3.6872806715522597e-06, + "loss": 0.0043, + "step": 195240 + }, + { + "epoch": 1.2522728511735448, + "grad_norm": 0.25521770119667053, + "learning_rate": 3.686740610765271e-06, + "loss": 0.0026, + "step": 195250 + }, + { + "epoch": 1.252336988067331, + "grad_norm": 0.28910234570503235, + "learning_rate": 3.68620056643436e-06, + "loss": 0.0015, + "step": 195260 + }, + { + "epoch": 1.2524011249611169, + "grad_norm": 0.005549044813960791, + "learning_rate": 3.6856605385662957e-06, + "loss": 0.0017, + "step": 195270 + }, + { + "epoch": 1.252465261854903, + "grad_norm": 0.17769359052181244, + "learning_rate": 3.685120527167843e-06, + "loss": 0.0013, + "step": 195280 + }, + { + "epoch": 1.2525293987486892, + "grad_norm": 0.036505963653326035, + "learning_rate": 3.684580532245771e-06, + "loss": 0.0016, + "step": 195290 + }, + { + "epoch": 1.2525935356424753, + "grad_norm": 0.05288667976856232, + "learning_rate": 3.684040553806844e-06, + "loss": 0.001, + "step": 195300 + }, + { + "epoch": 1.2526576725362615, + "grad_norm": 0.005735976621508598, + "learning_rate": 3.6835005918578293e-06, + "loss": 0.0023, + "step": 195310 + }, + { + "epoch": 1.2527218094300476, + "grad_norm": 0.15252001583576202, + "learning_rate": 3.6829606464054945e-06, + "loss": 0.002, + "step": 195320 + }, + { + "epoch": 1.2527859463238336, + "grad_norm": 0.074702188372612, + "learning_rate": 3.6824207174566018e-06, + "loss": 0.0012, + "step": 195330 + }, + { + "epoch": 1.2528500832176197, + "grad_norm": 0.06723365187644958, + "learning_rate": 3.68188080501792e-06, + "loss": 0.0013, + "step": 195340 + }, + { + "epoch": 1.2529142201114059, + "grad_norm": 0.09764128923416138, + "learning_rate": 3.6813409090962148e-06, + "loss": 0.0018, + "step": 195350 + }, + { + "epoch": 1.2529783570051918, + "grad_norm": 0.02114538475871086, + "learning_rate": 3.68080102969825e-06, + "loss": 0.0025, + "step": 195360 + }, + { + "epoch": 1.253042493898978, + "grad_norm": 0.10238329321146011, + "learning_rate": 3.680261166830791e-06, + "loss": 0.0011, + "step": 195370 + }, + { + "epoch": 1.253106630792764, + "grad_norm": 0.29062432050704956, + "learning_rate": 3.679721320500603e-06, + "loss": 0.0011, + "step": 195380 + }, + { + "epoch": 1.2531707676865502, + "grad_norm": 0.129994198679924, + "learning_rate": 3.6791814907144505e-06, + "loss": 0.002, + "step": 195390 + }, + { + "epoch": 1.2532349045803364, + "grad_norm": 0.04898214712738991, + "learning_rate": 3.678641677479098e-06, + "loss": 0.0009, + "step": 195400 + }, + { + "epoch": 1.2532990414741223, + "grad_norm": 0.08681108057498932, + "learning_rate": 3.6781018808013097e-06, + "loss": 0.0017, + "step": 195410 + }, + { + "epoch": 1.2533631783679084, + "grad_norm": 0.07974491268396378, + "learning_rate": 3.6775621006878487e-06, + "loss": 0.0018, + "step": 195420 + }, + { + "epoch": 1.2534273152616946, + "grad_norm": 0.05810292437672615, + "learning_rate": 3.6770223371454797e-06, + "loss": 0.0018, + "step": 195430 + }, + { + "epoch": 1.2534914521554805, + "grad_norm": 0.021946711465716362, + "learning_rate": 3.6764825901809673e-06, + "loss": 0.0015, + "step": 195440 + }, + { + "epoch": 1.2535555890492667, + "grad_norm": 0.1336854249238968, + "learning_rate": 3.6759428598010738e-06, + "loss": 0.0011, + "step": 195450 + }, + { + "epoch": 1.2536197259430528, + "grad_norm": 0.07914441078901291, + "learning_rate": 3.675403146012563e-06, + "loss": 0.0007, + "step": 195460 + }, + { + "epoch": 1.253683862836839, + "grad_norm": 0.05006599798798561, + "learning_rate": 3.6748634488221955e-06, + "loss": 0.0013, + "step": 195470 + }, + { + "epoch": 1.2537479997306251, + "grad_norm": 0.14474913477897644, + "learning_rate": 3.6743237682367385e-06, + "loss": 0.0012, + "step": 195480 + }, + { + "epoch": 1.2538121366244113, + "grad_norm": 0.03993711993098259, + "learning_rate": 3.67378410426295e-06, + "loss": 0.0009, + "step": 195490 + }, + { + "epoch": 1.2538762735181972, + "grad_norm": 0.002169192535802722, + "learning_rate": 3.6732444569075964e-06, + "loss": 0.0012, + "step": 195500 + }, + { + "epoch": 1.2539404104119833, + "grad_norm": 0.011117948219180107, + "learning_rate": 3.672704826177437e-06, + "loss": 0.002, + "step": 195510 + }, + { + "epoch": 1.2540045473057695, + "grad_norm": 0.026117559522390366, + "learning_rate": 3.672165212079235e-06, + "loss": 0.0014, + "step": 195520 + }, + { + "epoch": 1.2540686841995554, + "grad_norm": 0.06564907729625702, + "learning_rate": 3.671625614619751e-06, + "loss": 0.0009, + "step": 195530 + }, + { + "epoch": 1.2541328210933416, + "grad_norm": 0.3652568459510803, + "learning_rate": 3.6710860338057485e-06, + "loss": 0.0027, + "step": 195540 + }, + { + "epoch": 1.2541969579871277, + "grad_norm": 0.08847367018461227, + "learning_rate": 3.6705464696439875e-06, + "loss": 0.0015, + "step": 195550 + }, + { + "epoch": 1.2542610948809139, + "grad_norm": 0.0992204025387764, + "learning_rate": 3.6700069221412295e-06, + "loss": 0.0013, + "step": 195560 + }, + { + "epoch": 1.2543252317747, + "grad_norm": 0.03547690063714981, + "learning_rate": 3.6694673913042345e-06, + "loss": 0.0017, + "step": 195570 + }, + { + "epoch": 1.254389368668486, + "grad_norm": 0.05089672654867172, + "learning_rate": 3.668927877139764e-06, + "loss": 0.0013, + "step": 195580 + }, + { + "epoch": 1.254453505562272, + "grad_norm": 0.015458229929208755, + "learning_rate": 3.668388379654579e-06, + "loss": 0.0021, + "step": 195590 + }, + { + "epoch": 1.2545176424560582, + "grad_norm": 0.1813635677099228, + "learning_rate": 3.6678488988554393e-06, + "loss": 0.0027, + "step": 195600 + }, + { + "epoch": 1.2545817793498444, + "grad_norm": 0.0283439289778471, + "learning_rate": 3.6673094347491055e-06, + "loss": 0.0012, + "step": 195610 + }, + { + "epoch": 1.2546459162436303, + "grad_norm": 0.06188954412937164, + "learning_rate": 3.6667699873423363e-06, + "loss": 0.0014, + "step": 195620 + }, + { + "epoch": 1.2547100531374165, + "grad_norm": 0.0191205944865942, + "learning_rate": 3.6662305566418926e-06, + "loss": 0.0011, + "step": 195630 + }, + { + "epoch": 1.2547741900312026, + "grad_norm": 0.018281230702996254, + "learning_rate": 3.665691142654532e-06, + "loss": 0.002, + "step": 195640 + }, + { + "epoch": 1.2548383269249888, + "grad_norm": 0.06265388429164886, + "learning_rate": 3.665151745387017e-06, + "loss": 0.0017, + "step": 195650 + }, + { + "epoch": 1.254902463818775, + "grad_norm": 0.19270409643650055, + "learning_rate": 3.664612364846103e-06, + "loss": 0.0025, + "step": 195660 + }, + { + "epoch": 1.2549666007125608, + "grad_norm": 0.033852968364953995, + "learning_rate": 3.664073001038551e-06, + "loss": 0.0015, + "step": 195670 + }, + { + "epoch": 1.255030737606347, + "grad_norm": 0.00962325744330883, + "learning_rate": 3.6635336539711188e-06, + "loss": 0.0013, + "step": 195680 + }, + { + "epoch": 1.2550948745001331, + "grad_norm": 0.15461745858192444, + "learning_rate": 3.6629943236505655e-06, + "loss": 0.0015, + "step": 195690 + }, + { + "epoch": 1.255159011393919, + "grad_norm": 0.03359571844339371, + "learning_rate": 3.662455010083648e-06, + "loss": 0.0016, + "step": 195700 + }, + { + "epoch": 1.2552231482877052, + "grad_norm": 0.11851087957620621, + "learning_rate": 3.6619157132771265e-06, + "loss": 0.001, + "step": 195710 + }, + { + "epoch": 1.2552872851814914, + "grad_norm": 0.014558130875229836, + "learning_rate": 3.661376433237756e-06, + "loss": 0.0042, + "step": 195720 + }, + { + "epoch": 1.2553514220752775, + "grad_norm": 0.039042163640260696, + "learning_rate": 3.6608371699722954e-06, + "loss": 0.001, + "step": 195730 + }, + { + "epoch": 1.2554155589690636, + "grad_norm": 0.05535927414894104, + "learning_rate": 3.6602979234875036e-06, + "loss": 0.0019, + "step": 195740 + }, + { + "epoch": 1.2554796958628498, + "grad_norm": 0.03454405069351196, + "learning_rate": 3.6597586937901353e-06, + "loss": 0.0049, + "step": 195750 + }, + { + "epoch": 1.2555438327566357, + "grad_norm": 0.008221879601478577, + "learning_rate": 3.6592194808869498e-06, + "loss": 0.0011, + "step": 195760 + }, + { + "epoch": 1.2556079696504219, + "grad_norm": 0.004276182036846876, + "learning_rate": 3.6586802847847e-06, + "loss": 0.0011, + "step": 195770 + }, + { + "epoch": 1.255672106544208, + "grad_norm": 0.10540279000997543, + "learning_rate": 3.658141105490147e-06, + "loss": 0.0024, + "step": 195780 + }, + { + "epoch": 1.255736243437994, + "grad_norm": 0.16785524785518646, + "learning_rate": 3.657601943010044e-06, + "loss": 0.0016, + "step": 195790 + }, + { + "epoch": 1.25580038033178, + "grad_norm": 0.1500561386346817, + "learning_rate": 3.6570627973511487e-06, + "loss": 0.001, + "step": 195800 + }, + { + "epoch": 1.2558645172255662, + "grad_norm": 0.15236826241016388, + "learning_rate": 3.6565236685202154e-06, + "loss": 0.0018, + "step": 195810 + }, + { + "epoch": 1.2559286541193524, + "grad_norm": 0.17366255819797516, + "learning_rate": 3.6559845565240014e-06, + "loss": 0.002, + "step": 195820 + }, + { + "epoch": 1.2559927910131385, + "grad_norm": 0.08815667033195496, + "learning_rate": 3.6554454613692613e-06, + "loss": 0.0019, + "step": 195830 + }, + { + "epoch": 1.2560569279069245, + "grad_norm": 0.00293713784776628, + "learning_rate": 3.6549063830627506e-06, + "loss": 0.0029, + "step": 195840 + }, + { + "epoch": 1.2561210648007106, + "grad_norm": 0.04876834899187088, + "learning_rate": 3.654367321611224e-06, + "loss": 0.0018, + "step": 195850 + }, + { + "epoch": 1.2561852016944968, + "grad_norm": 0.04722928628325462, + "learning_rate": 3.653828277021437e-06, + "loss": 0.0018, + "step": 195860 + }, + { + "epoch": 1.2562493385882827, + "grad_norm": 0.002980181248858571, + "learning_rate": 3.6532892493001433e-06, + "loss": 0.002, + "step": 195870 + }, + { + "epoch": 1.2563134754820688, + "grad_norm": 0.04678097739815712, + "learning_rate": 3.652750238454097e-06, + "loss": 0.0012, + "step": 195880 + }, + { + "epoch": 1.256377612375855, + "grad_norm": 0.11403562128543854, + "learning_rate": 3.652211244490055e-06, + "loss": 0.0019, + "step": 195890 + }, + { + "epoch": 1.2564417492696411, + "grad_norm": 0.08019541203975677, + "learning_rate": 3.6516722674147675e-06, + "loss": 0.0017, + "step": 195900 + }, + { + "epoch": 1.2565058861634273, + "grad_norm": 0.3112826347351074, + "learning_rate": 3.651133307234992e-06, + "loss": 0.0016, + "step": 195910 + }, + { + "epoch": 1.2565700230572134, + "grad_norm": 0.07466558367013931, + "learning_rate": 3.6505943639574782e-06, + "loss": 0.0017, + "step": 195920 + }, + { + "epoch": 1.2566341599509994, + "grad_norm": 0.06018274277448654, + "learning_rate": 3.650055437588983e-06, + "loss": 0.0025, + "step": 195930 + }, + { + "epoch": 1.2566982968447855, + "grad_norm": 0.14753106236457825, + "learning_rate": 3.6495165281362573e-06, + "loss": 0.0019, + "step": 195940 + }, + { + "epoch": 1.2567624337385717, + "grad_norm": 0.09227093309164047, + "learning_rate": 3.6489776356060546e-06, + "loss": 0.0014, + "step": 195950 + }, + { + "epoch": 1.2568265706323576, + "grad_norm": 0.019015872851014137, + "learning_rate": 3.6484387600051276e-06, + "loss": 0.0013, + "step": 195960 + }, + { + "epoch": 1.2568907075261437, + "grad_norm": 0.006597777362912893, + "learning_rate": 3.64789990134023e-06, + "loss": 0.001, + "step": 195970 + }, + { + "epoch": 1.2569548444199299, + "grad_norm": 0.0689472034573555, + "learning_rate": 3.647361059618111e-06, + "loss": 0.0021, + "step": 195980 + }, + { + "epoch": 1.257018981313716, + "grad_norm": 0.12803134322166443, + "learning_rate": 3.646822234845527e-06, + "loss": 0.0016, + "step": 195990 + }, + { + "epoch": 1.2570831182075022, + "grad_norm": 0.05919404327869415, + "learning_rate": 3.646283427029226e-06, + "loss": 0.0024, + "step": 196000 + }, + { + "epoch": 1.257147255101288, + "grad_norm": 0.2608695328235626, + "learning_rate": 3.6457446361759603e-06, + "loss": 0.0022, + "step": 196010 + }, + { + "epoch": 1.2572113919950743, + "grad_norm": 0.02095581591129303, + "learning_rate": 3.6452058622924836e-06, + "loss": 0.0016, + "step": 196020 + }, + { + "epoch": 1.2572755288888604, + "grad_norm": 0.1533578336238861, + "learning_rate": 3.6446671053855453e-06, + "loss": 0.0015, + "step": 196030 + }, + { + "epoch": 1.2573396657826466, + "grad_norm": 0.13423244655132294, + "learning_rate": 3.6441283654618975e-06, + "loss": 0.0016, + "step": 196040 + }, + { + "epoch": 1.2574038026764325, + "grad_norm": 0.09977749735116959, + "learning_rate": 3.643589642528289e-06, + "loss": 0.0025, + "step": 196050 + }, + { + "epoch": 1.2574679395702186, + "grad_norm": 0.10623639076948166, + "learning_rate": 3.6430509365914723e-06, + "loss": 0.0012, + "step": 196060 + }, + { + "epoch": 1.2575320764640048, + "grad_norm": 0.08471555262804031, + "learning_rate": 3.642512247658197e-06, + "loss": 0.0018, + "step": 196070 + }, + { + "epoch": 1.257596213357791, + "grad_norm": 0.043866001069545746, + "learning_rate": 3.6419735757352144e-06, + "loss": 0.0016, + "step": 196080 + }, + { + "epoch": 1.257660350251577, + "grad_norm": 0.027464529499411583, + "learning_rate": 3.641434920829272e-06, + "loss": 0.0015, + "step": 196090 + }, + { + "epoch": 1.257724487145363, + "grad_norm": 0.0067048026248812675, + "learning_rate": 3.6408962829471205e-06, + "loss": 0.0012, + "step": 196100 + }, + { + "epoch": 1.2577886240391492, + "grad_norm": 0.007362800184637308, + "learning_rate": 3.6403576620955117e-06, + "loss": 0.0015, + "step": 196110 + }, + { + "epoch": 1.2578527609329353, + "grad_norm": 0.10874085128307343, + "learning_rate": 3.6398190582811922e-06, + "loss": 0.0023, + "step": 196120 + }, + { + "epoch": 1.2579168978267212, + "grad_norm": 0.05695396289229393, + "learning_rate": 3.639280471510912e-06, + "loss": 0.0011, + "step": 196130 + }, + { + "epoch": 1.2579810347205074, + "grad_norm": 0.02212439477443695, + "learning_rate": 3.6387419017914204e-06, + "loss": 0.0017, + "step": 196140 + }, + { + "epoch": 1.2580451716142935, + "grad_norm": 0.04459254443645477, + "learning_rate": 3.638203349129465e-06, + "loss": 0.0011, + "step": 196150 + }, + { + "epoch": 1.2581093085080797, + "grad_norm": 0.10103648900985718, + "learning_rate": 3.637664813531796e-06, + "loss": 0.001, + "step": 196160 + }, + { + "epoch": 1.2581734454018658, + "grad_norm": 0.04929735139012337, + "learning_rate": 3.6371262950051593e-06, + "loss": 0.0016, + "step": 196170 + }, + { + "epoch": 1.258237582295652, + "grad_norm": 0.060934390872716904, + "learning_rate": 3.6365877935563055e-06, + "loss": 0.0018, + "step": 196180 + }, + { + "epoch": 1.258301719189438, + "grad_norm": 0.007423396687954664, + "learning_rate": 3.63604930919198e-06, + "loss": 0.0008, + "step": 196190 + }, + { + "epoch": 1.258365856083224, + "grad_norm": 0.039432283490896225, + "learning_rate": 3.635510841918932e-06, + "loss": 0.0013, + "step": 196200 + }, + { + "epoch": 1.2584299929770102, + "grad_norm": 0.003703708527609706, + "learning_rate": 3.634972391743908e-06, + "loss": 0.001, + "step": 196210 + }, + { + "epoch": 1.2584941298707961, + "grad_norm": 0.17724007368087769, + "learning_rate": 3.6344339586736568e-06, + "loss": 0.002, + "step": 196220 + }, + { + "epoch": 1.2585582667645823, + "grad_norm": 0.11132031679153442, + "learning_rate": 3.633895542714922e-06, + "loss": 0.0013, + "step": 196230 + }, + { + "epoch": 1.2586224036583684, + "grad_norm": 0.11109644174575806, + "learning_rate": 3.6333571438744542e-06, + "loss": 0.0012, + "step": 196240 + }, + { + "epoch": 1.2586865405521546, + "grad_norm": 0.08999479562044144, + "learning_rate": 3.632818762158996e-06, + "loss": 0.0026, + "step": 196250 + }, + { + "epoch": 1.2587506774459407, + "grad_norm": 0.08881781250238419, + "learning_rate": 3.632280397575298e-06, + "loss": 0.0019, + "step": 196260 + }, + { + "epoch": 1.2588148143397266, + "grad_norm": 0.0790308266878128, + "learning_rate": 3.6317420501301027e-06, + "loss": 0.0022, + "step": 196270 + }, + { + "epoch": 1.2588789512335128, + "grad_norm": 0.034874871373176575, + "learning_rate": 3.6312037198301576e-06, + "loss": 0.0019, + "step": 196280 + }, + { + "epoch": 1.258943088127299, + "grad_norm": 0.08324344456195831, + "learning_rate": 3.6306654066822087e-06, + "loss": 0.001, + "step": 196290 + }, + { + "epoch": 1.259007225021085, + "grad_norm": 0.009703489020466805, + "learning_rate": 3.630127110693e-06, + "loss": 0.0012, + "step": 196300 + }, + { + "epoch": 1.259071361914871, + "grad_norm": 0.3019203245639801, + "learning_rate": 3.629588831869279e-06, + "loss": 0.0013, + "step": 196310 + }, + { + "epoch": 1.2591354988086572, + "grad_norm": 0.110463447868824, + "learning_rate": 3.629050570217788e-06, + "loss": 0.0063, + "step": 196320 + }, + { + "epoch": 1.2591996357024433, + "grad_norm": 0.0900101438164711, + "learning_rate": 3.628512325745275e-06, + "loss": 0.0014, + "step": 196330 + }, + { + "epoch": 1.2592637725962295, + "grad_norm": 0.11893414705991745, + "learning_rate": 3.6279740984584804e-06, + "loss": 0.0016, + "step": 196340 + }, + { + "epoch": 1.2593279094900156, + "grad_norm": 0.03216111287474632, + "learning_rate": 3.6274358883641525e-06, + "loss": 0.002, + "step": 196350 + }, + { + "epoch": 1.2593920463838015, + "grad_norm": 0.12460221350193024, + "learning_rate": 3.626897695469033e-06, + "loss": 0.0019, + "step": 196360 + }, + { + "epoch": 1.2594561832775877, + "grad_norm": 0.08771844208240509, + "learning_rate": 3.626359519779868e-06, + "loss": 0.0012, + "step": 196370 + }, + { + "epoch": 1.2595203201713738, + "grad_norm": 0.07012537121772766, + "learning_rate": 3.6258213613033983e-06, + "loss": 0.0008, + "step": 196380 + }, + { + "epoch": 1.2595844570651598, + "grad_norm": 0.051890429109334946, + "learning_rate": 3.6252832200463708e-06, + "loss": 0.0014, + "step": 196390 + }, + { + "epoch": 1.259648593958946, + "grad_norm": 0.06170021370053291, + "learning_rate": 3.624745096015525e-06, + "loss": 0.0009, + "step": 196400 + }, + { + "epoch": 1.259712730852732, + "grad_norm": 0.17516081035137177, + "learning_rate": 3.624206989217608e-06, + "loss": 0.0023, + "step": 196410 + }, + { + "epoch": 1.2597768677465182, + "grad_norm": 0.09847494959831238, + "learning_rate": 3.6236688996593593e-06, + "loss": 0.0012, + "step": 196420 + }, + { + "epoch": 1.2598410046403044, + "grad_norm": 0.07129335403442383, + "learning_rate": 3.623130827347523e-06, + "loss": 0.0009, + "step": 196430 + }, + { + "epoch": 1.2599051415340905, + "grad_norm": 0.010882940143346786, + "learning_rate": 3.622592772288842e-06, + "loss": 0.0012, + "step": 196440 + }, + { + "epoch": 1.2599692784278764, + "grad_norm": 0.22375863790512085, + "learning_rate": 3.622054734490058e-06, + "loss": 0.0025, + "step": 196450 + }, + { + "epoch": 1.2600334153216626, + "grad_norm": 0.1855129450559616, + "learning_rate": 3.6215167139579133e-06, + "loss": 0.0033, + "step": 196460 + }, + { + "epoch": 1.2600975522154487, + "grad_norm": 0.08187199383974075, + "learning_rate": 3.620978710699148e-06, + "loss": 0.002, + "step": 196470 + }, + { + "epoch": 1.2601616891092347, + "grad_norm": 0.0629785805940628, + "learning_rate": 3.6204407247205066e-06, + "loss": 0.0013, + "step": 196480 + }, + { + "epoch": 1.2602258260030208, + "grad_norm": 0.10981062799692154, + "learning_rate": 3.619902756028728e-06, + "loss": 0.0026, + "step": 196490 + }, + { + "epoch": 1.260289962896807, + "grad_norm": 0.015897827222943306, + "learning_rate": 3.6193648046305553e-06, + "loss": 0.0015, + "step": 196500 + }, + { + "epoch": 1.260354099790593, + "grad_norm": 0.10848476737737656, + "learning_rate": 3.618826870532727e-06, + "loss": 0.0021, + "step": 196510 + }, + { + "epoch": 1.2604182366843792, + "grad_norm": 0.023016218096017838, + "learning_rate": 3.618288953741986e-06, + "loss": 0.0024, + "step": 196520 + }, + { + "epoch": 1.2604823735781652, + "grad_norm": 0.0073949359357357025, + "learning_rate": 3.6177510542650705e-06, + "loss": 0.0027, + "step": 196530 + }, + { + "epoch": 1.2605465104719513, + "grad_norm": 0.15369051694869995, + "learning_rate": 3.617213172108724e-06, + "loss": 0.0008, + "step": 196540 + }, + { + "epoch": 1.2606106473657375, + "grad_norm": 0.3139120638370514, + "learning_rate": 3.6166753072796834e-06, + "loss": 0.0035, + "step": 196550 + }, + { + "epoch": 1.2606747842595234, + "grad_norm": 0.09201942384243011, + "learning_rate": 3.616137459784691e-06, + "loss": 0.0035, + "step": 196560 + }, + { + "epoch": 1.2607389211533095, + "grad_norm": 0.12248411774635315, + "learning_rate": 3.615599629630484e-06, + "loss": 0.0009, + "step": 196570 + }, + { + "epoch": 1.2608030580470957, + "grad_norm": 0.132732555270195, + "learning_rate": 3.615061816823803e-06, + "loss": 0.0011, + "step": 196580 + }, + { + "epoch": 1.2608671949408818, + "grad_norm": 0.038817718625068665, + "learning_rate": 3.6145240213713884e-06, + "loss": 0.001, + "step": 196590 + }, + { + "epoch": 1.260931331834668, + "grad_norm": 0.04961353912949562, + "learning_rate": 3.613986243279977e-06, + "loss": 0.0015, + "step": 196600 + }, + { + "epoch": 1.2609954687284541, + "grad_norm": 0.01886233128607273, + "learning_rate": 3.61344848255631e-06, + "loss": 0.0017, + "step": 196610 + }, + { + "epoch": 1.26105960562224, + "grad_norm": 0.03975687548518181, + "learning_rate": 3.6129107392071227e-06, + "loss": 0.0012, + "step": 196620 + }, + { + "epoch": 1.2611237425160262, + "grad_norm": 0.048187654465436935, + "learning_rate": 3.6123730132391565e-06, + "loss": 0.0038, + "step": 196630 + }, + { + "epoch": 1.2611878794098124, + "grad_norm": 0.07477468997240067, + "learning_rate": 3.611835304659147e-06, + "loss": 0.002, + "step": 196640 + }, + { + "epoch": 1.2612520163035983, + "grad_norm": 0.007399613503366709, + "learning_rate": 3.6112976134738342e-06, + "loss": 0.0014, + "step": 196650 + }, + { + "epoch": 1.2613161531973844, + "grad_norm": 0.12148452550172806, + "learning_rate": 3.610759939689954e-06, + "loss": 0.0011, + "step": 196660 + }, + { + "epoch": 1.2613802900911706, + "grad_norm": 0.09646649658679962, + "learning_rate": 3.6102222833142452e-06, + "loss": 0.0021, + "step": 196670 + }, + { + "epoch": 1.2614444269849567, + "grad_norm": 0.013172101229429245, + "learning_rate": 3.609684644353444e-06, + "loss": 0.001, + "step": 196680 + }, + { + "epoch": 1.2615085638787429, + "grad_norm": 0.052144456654787064, + "learning_rate": 3.6091470228142888e-06, + "loss": 0.0032, + "step": 196690 + }, + { + "epoch": 1.2615727007725288, + "grad_norm": 0.06968370079994202, + "learning_rate": 3.6086094187035148e-06, + "loss": 0.0009, + "step": 196700 + }, + { + "epoch": 1.261636837666315, + "grad_norm": 0.05902137607336044, + "learning_rate": 3.6080718320278595e-06, + "loss": 0.0012, + "step": 196710 + }, + { + "epoch": 1.261700974560101, + "grad_norm": 0.13474467396736145, + "learning_rate": 3.607534262794058e-06, + "loss": 0.0016, + "step": 196720 + }, + { + "epoch": 1.2617651114538873, + "grad_norm": 0.02709767036139965, + "learning_rate": 3.6069967110088477e-06, + "loss": 0.0015, + "step": 196730 + }, + { + "epoch": 1.2618292483476732, + "grad_norm": 0.13237573206424713, + "learning_rate": 3.606459176678965e-06, + "loss": 0.0013, + "step": 196740 + }, + { + "epoch": 1.2618933852414593, + "grad_norm": 0.10878362506628036, + "learning_rate": 3.6059216598111437e-06, + "loss": 0.001, + "step": 196750 + }, + { + "epoch": 1.2619575221352455, + "grad_norm": 0.05179942399263382, + "learning_rate": 3.6053841604121213e-06, + "loss": 0.0016, + "step": 196760 + }, + { + "epoch": 1.2620216590290316, + "grad_norm": 0.014150524511933327, + "learning_rate": 3.6048466784886316e-06, + "loss": 0.0037, + "step": 196770 + }, + { + "epoch": 1.2620857959228178, + "grad_norm": 0.14497435092926025, + "learning_rate": 3.6043092140474107e-06, + "loss": 0.0012, + "step": 196780 + }, + { + "epoch": 1.2621499328166037, + "grad_norm": 0.12207596004009247, + "learning_rate": 3.6037717670951916e-06, + "loss": 0.0019, + "step": 196790 + }, + { + "epoch": 1.2622140697103899, + "grad_norm": 0.13010969758033752, + "learning_rate": 3.603234337638711e-06, + "loss": 0.0021, + "step": 196800 + }, + { + "epoch": 1.262278206604176, + "grad_norm": 0.14584875106811523, + "learning_rate": 3.602696925684702e-06, + "loss": 0.0011, + "step": 196810 + }, + { + "epoch": 1.262342343497962, + "grad_norm": 0.04134012758731842, + "learning_rate": 3.6021595312399003e-06, + "loss": 0.0008, + "step": 196820 + }, + { + "epoch": 1.262406480391748, + "grad_norm": 0.03213493153452873, + "learning_rate": 3.6016221543110373e-06, + "loss": 0.0012, + "step": 196830 + }, + { + "epoch": 1.2624706172855342, + "grad_norm": 0.061812907457351685, + "learning_rate": 3.6010847949048487e-06, + "loss": 0.0023, + "step": 196840 + }, + { + "epoch": 1.2625347541793204, + "grad_norm": 0.004594553727656603, + "learning_rate": 3.600547453028067e-06, + "loss": 0.002, + "step": 196850 + }, + { + "epoch": 1.2625988910731065, + "grad_norm": 0.011848662979900837, + "learning_rate": 3.6000101286874254e-06, + "loss": 0.002, + "step": 196860 + }, + { + "epoch": 1.2626630279668927, + "grad_norm": 0.007808353286236525, + "learning_rate": 3.599472821889657e-06, + "loss": 0.001, + "step": 196870 + }, + { + "epoch": 1.2627271648606786, + "grad_norm": 0.042417317628860474, + "learning_rate": 3.598935532641497e-06, + "loss": 0.0015, + "step": 196880 + }, + { + "epoch": 1.2627913017544647, + "grad_norm": 0.16768033802509308, + "learning_rate": 3.598398260949675e-06, + "loss": 0.0021, + "step": 196890 + }, + { + "epoch": 1.262855438648251, + "grad_norm": 0.0659007728099823, + "learning_rate": 3.597861006820926e-06, + "loss": 0.001, + "step": 196900 + }, + { + "epoch": 1.2629195755420368, + "grad_norm": 0.006474540568888187, + "learning_rate": 3.597323770261979e-06, + "loss": 0.0014, + "step": 196910 + }, + { + "epoch": 1.262983712435823, + "grad_norm": 0.20306262373924255, + "learning_rate": 3.5967865512795684e-06, + "loss": 0.0014, + "step": 196920 + }, + { + "epoch": 1.2630478493296091, + "grad_norm": 0.10493241995573044, + "learning_rate": 3.5962493498804244e-06, + "loss": 0.0019, + "step": 196930 + }, + { + "epoch": 1.2631119862233953, + "grad_norm": 0.12724147737026215, + "learning_rate": 3.595712166071281e-06, + "loss": 0.0013, + "step": 196940 + }, + { + "epoch": 1.2631761231171814, + "grad_norm": 0.014561139978468418, + "learning_rate": 3.595174999858866e-06, + "loss": 0.0013, + "step": 196950 + }, + { + "epoch": 1.2632402600109673, + "grad_norm": 0.05698208510875702, + "learning_rate": 3.5946378512499137e-06, + "loss": 0.002, + "step": 196960 + }, + { + "epoch": 1.2633043969047535, + "grad_norm": 0.12596088647842407, + "learning_rate": 3.5941007202511526e-06, + "loss": 0.0011, + "step": 196970 + }, + { + "epoch": 1.2633685337985396, + "grad_norm": 0.08804207295179367, + "learning_rate": 3.5935636068693147e-06, + "loss": 0.0008, + "step": 196980 + }, + { + "epoch": 1.2634326706923256, + "grad_norm": 0.15855345129966736, + "learning_rate": 3.593026511111129e-06, + "loss": 0.0016, + "step": 196990 + }, + { + "epoch": 1.2634968075861117, + "grad_norm": 0.16150519251823425, + "learning_rate": 3.592489432983327e-06, + "loss": 0.0027, + "step": 197000 + }, + { + "epoch": 1.2635609444798979, + "grad_norm": 0.08217993378639221, + "learning_rate": 3.5919523724926396e-06, + "loss": 0.0012, + "step": 197010 + }, + { + "epoch": 1.263625081373684, + "grad_norm": 0.04182827100157738, + "learning_rate": 3.5914153296457936e-06, + "loss": 0.0015, + "step": 197020 + }, + { + "epoch": 1.2636892182674702, + "grad_norm": 0.09404095262289047, + "learning_rate": 3.5908783044495215e-06, + "loss": 0.0013, + "step": 197030 + }, + { + "epoch": 1.2637533551612563, + "grad_norm": 0.11161879450082779, + "learning_rate": 3.5903412969105507e-06, + "loss": 0.002, + "step": 197040 + }, + { + "epoch": 1.2638174920550422, + "grad_norm": 0.08677653223276138, + "learning_rate": 3.5898043070356114e-06, + "loss": 0.0014, + "step": 197050 + }, + { + "epoch": 1.2638816289488284, + "grad_norm": 0.13284312188625336, + "learning_rate": 3.589267334831431e-06, + "loss": 0.007, + "step": 197060 + }, + { + "epoch": 1.2639457658426145, + "grad_norm": 0.1533959060907364, + "learning_rate": 3.58873038030474e-06, + "loss": 0.0016, + "step": 197070 + }, + { + "epoch": 1.2640099027364005, + "grad_norm": 0.28346744179725647, + "learning_rate": 3.5881934434622655e-06, + "loss": 0.0015, + "step": 197080 + }, + { + "epoch": 1.2640740396301866, + "grad_norm": 0.025374893099069595, + "learning_rate": 3.5876565243107377e-06, + "loss": 0.0013, + "step": 197090 + }, + { + "epoch": 1.2641381765239728, + "grad_norm": 0.0803384780883789, + "learning_rate": 3.5871196228568807e-06, + "loss": 0.0014, + "step": 197100 + }, + { + "epoch": 1.264202313417759, + "grad_norm": 0.04518803954124451, + "learning_rate": 3.5865827391074265e-06, + "loss": 0.0011, + "step": 197110 + }, + { + "epoch": 1.264266450311545, + "grad_norm": 0.08995330333709717, + "learning_rate": 3.5860458730690996e-06, + "loss": 0.0013, + "step": 197120 + }, + { + "epoch": 1.264330587205331, + "grad_norm": 0.06358237564563751, + "learning_rate": 3.585509024748628e-06, + "loss": 0.0008, + "step": 197130 + }, + { + "epoch": 1.2643947240991171, + "grad_norm": 0.05482769012451172, + "learning_rate": 3.5849721941527405e-06, + "loss": 0.002, + "step": 197140 + }, + { + "epoch": 1.2644588609929033, + "grad_norm": 0.019504649564623833, + "learning_rate": 3.5844353812881617e-06, + "loss": 0.0016, + "step": 197150 + }, + { + "epoch": 1.2645229978866894, + "grad_norm": 0.05306436866521835, + "learning_rate": 3.5838985861616205e-06, + "loss": 0.0008, + "step": 197160 + }, + { + "epoch": 1.2645871347804754, + "grad_norm": 0.09248397499322891, + "learning_rate": 3.5833618087798406e-06, + "loss": 0.0017, + "step": 197170 + }, + { + "epoch": 1.2646512716742615, + "grad_norm": 0.04191691800951958, + "learning_rate": 3.582825049149551e-06, + "loss": 0.0011, + "step": 197180 + }, + { + "epoch": 1.2647154085680476, + "grad_norm": 0.009918549098074436, + "learning_rate": 3.582288307277475e-06, + "loss": 0.0011, + "step": 197190 + }, + { + "epoch": 1.2647795454618338, + "grad_norm": 0.007974385283887386, + "learning_rate": 3.5817515831703413e-06, + "loss": 0.0015, + "step": 197200 + }, + { + "epoch": 1.26484368235562, + "grad_norm": 0.1345491111278534, + "learning_rate": 3.5812148768348724e-06, + "loss": 0.0017, + "step": 197210 + }, + { + "epoch": 1.2649078192494059, + "grad_norm": 0.07586783915758133, + "learning_rate": 3.5806781882777963e-06, + "loss": 0.0028, + "step": 197220 + }, + { + "epoch": 1.264971956143192, + "grad_norm": 0.060350608080625534, + "learning_rate": 3.580141517505836e-06, + "loss": 0.0016, + "step": 197230 + }, + { + "epoch": 1.2650360930369782, + "grad_norm": 0.07729600369930267, + "learning_rate": 3.5796048645257185e-06, + "loss": 0.0013, + "step": 197240 + }, + { + "epoch": 1.265100229930764, + "grad_norm": 0.11275080591440201, + "learning_rate": 3.579068229344166e-06, + "loss": 0.0021, + "step": 197250 + }, + { + "epoch": 1.2651643668245502, + "grad_norm": 0.03644602745771408, + "learning_rate": 3.5785316119679047e-06, + "loss": 0.0024, + "step": 197260 + }, + { + "epoch": 1.2652285037183364, + "grad_norm": 0.13671661913394928, + "learning_rate": 3.5779950124036574e-06, + "loss": 0.003, + "step": 197270 + }, + { + "epoch": 1.2652926406121225, + "grad_norm": 0.008413456380367279, + "learning_rate": 3.5774584306581482e-06, + "loss": 0.0009, + "step": 197280 + }, + { + "epoch": 1.2653567775059087, + "grad_norm": 0.040338508784770966, + "learning_rate": 3.576921866738104e-06, + "loss": 0.0029, + "step": 197290 + }, + { + "epoch": 1.2654209143996948, + "grad_norm": 0.012562062591314316, + "learning_rate": 3.576385320650244e-06, + "loss": 0.0023, + "step": 197300 + }, + { + "epoch": 1.2654850512934808, + "grad_norm": 0.011955642141401768, + "learning_rate": 3.5758487924012942e-06, + "loss": 0.0013, + "step": 197310 + }, + { + "epoch": 1.265549188187267, + "grad_norm": 0.10264961421489716, + "learning_rate": 3.575312281997976e-06, + "loss": 0.0016, + "step": 197320 + }, + { + "epoch": 1.265613325081053, + "grad_norm": 0.06680679321289062, + "learning_rate": 3.5747757894470147e-06, + "loss": 0.0009, + "step": 197330 + }, + { + "epoch": 1.265677461974839, + "grad_norm": 0.03692662715911865, + "learning_rate": 3.57423931475513e-06, + "loss": 0.0017, + "step": 197340 + }, + { + "epoch": 1.2657415988686251, + "grad_norm": 0.1036771759390831, + "learning_rate": 3.5737028579290468e-06, + "loss": 0.0022, + "step": 197350 + }, + { + "epoch": 1.2658057357624113, + "grad_norm": 0.11271841078996658, + "learning_rate": 3.5731664189754845e-06, + "loss": 0.001, + "step": 197360 + }, + { + "epoch": 1.2658698726561974, + "grad_norm": 0.09648439288139343, + "learning_rate": 3.572629997901168e-06, + "loss": 0.0018, + "step": 197370 + }, + { + "epoch": 1.2659340095499836, + "grad_norm": 0.04070811718702316, + "learning_rate": 3.572093594712817e-06, + "loss": 0.0011, + "step": 197380 + }, + { + "epoch": 1.2659981464437695, + "grad_norm": 0.02357848361134529, + "learning_rate": 3.5715572094171548e-06, + "loss": 0.0013, + "step": 197390 + }, + { + "epoch": 1.2660622833375557, + "grad_norm": 0.0517885759472847, + "learning_rate": 3.5710208420209003e-06, + "loss": 0.0013, + "step": 197400 + }, + { + "epoch": 1.2661264202313418, + "grad_norm": 0.12000174075365067, + "learning_rate": 3.5704844925307775e-06, + "loss": 0.0021, + "step": 197410 + }, + { + "epoch": 1.2661905571251277, + "grad_norm": 0.06956811994314194, + "learning_rate": 3.5699481609535047e-06, + "loss": 0.0022, + "step": 197420 + }, + { + "epoch": 1.2662546940189139, + "grad_norm": 0.05584663152694702, + "learning_rate": 3.569411847295803e-06, + "loss": 0.001, + "step": 197430 + }, + { + "epoch": 1.2663188309127, + "grad_norm": 0.10756741464138031, + "learning_rate": 3.5688755515643943e-06, + "loss": 0.0021, + "step": 197440 + }, + { + "epoch": 1.2663829678064862, + "grad_norm": 0.06895508617162704, + "learning_rate": 3.5683392737659967e-06, + "loss": 0.0008, + "step": 197450 + }, + { + "epoch": 1.2664471047002723, + "grad_norm": 0.11756862699985504, + "learning_rate": 3.567803013907332e-06, + "loss": 0.0027, + "step": 197460 + }, + { + "epoch": 1.2665112415940585, + "grad_norm": 0.20282131433486938, + "learning_rate": 3.567266771995119e-06, + "loss": 0.0018, + "step": 197470 + }, + { + "epoch": 1.2665753784878444, + "grad_norm": 0.12561297416687012, + "learning_rate": 3.5667305480360776e-06, + "loss": 0.0012, + "step": 197480 + }, + { + "epoch": 1.2666395153816306, + "grad_norm": 0.1691175252199173, + "learning_rate": 3.5661943420369263e-06, + "loss": 0.0014, + "step": 197490 + }, + { + "epoch": 1.2667036522754167, + "grad_norm": 0.08796347677707672, + "learning_rate": 3.5656581540043843e-06, + "loss": 0.0016, + "step": 197500 + }, + { + "epoch": 1.2667677891692026, + "grad_norm": 0.072384774684906, + "learning_rate": 3.5651219839451712e-06, + "loss": 0.0013, + "step": 197510 + }, + { + "epoch": 1.2668319260629888, + "grad_norm": 0.09652019292116165, + "learning_rate": 3.5645858318660053e-06, + "loss": 0.0005, + "step": 197520 + }, + { + "epoch": 1.266896062956775, + "grad_norm": 0.0962836816906929, + "learning_rate": 3.5640496977736044e-06, + "loss": 0.0016, + "step": 197530 + }, + { + "epoch": 1.266960199850561, + "grad_norm": 0.12505964934825897, + "learning_rate": 3.563513581674688e-06, + "loss": 0.003, + "step": 197540 + }, + { + "epoch": 1.2670243367443472, + "grad_norm": 0.2275208979845047, + "learning_rate": 3.562977483575971e-06, + "loss": 0.0016, + "step": 197550 + }, + { + "epoch": 1.2670884736381332, + "grad_norm": 0.11794976890087128, + "learning_rate": 3.562441403484176e-06, + "loss": 0.002, + "step": 197560 + }, + { + "epoch": 1.2671526105319193, + "grad_norm": 0.2552700936794281, + "learning_rate": 3.561905341406015e-06, + "loss": 0.0016, + "step": 197570 + }, + { + "epoch": 1.2672167474257054, + "grad_norm": 0.09053491055965424, + "learning_rate": 3.5613692973482083e-06, + "loss": 0.0017, + "step": 197580 + }, + { + "epoch": 1.2672808843194916, + "grad_norm": 0.10517799854278564, + "learning_rate": 3.5608332713174743e-06, + "loss": 0.0008, + "step": 197590 + }, + { + "epoch": 1.2673450212132775, + "grad_norm": 0.03843609243631363, + "learning_rate": 3.5602972633205257e-06, + "loss": 0.0007, + "step": 197600 + }, + { + "epoch": 1.2674091581070637, + "grad_norm": 0.07241566479206085, + "learning_rate": 3.559761273364083e-06, + "loss": 0.0013, + "step": 197610 + }, + { + "epoch": 1.2674732950008498, + "grad_norm": 0.04209311306476593, + "learning_rate": 3.55922530145486e-06, + "loss": 0.0011, + "step": 197620 + }, + { + "epoch": 1.267537431894636, + "grad_norm": 0.0937863141298294, + "learning_rate": 3.558689347599572e-06, + "loss": 0.0011, + "step": 197630 + }, + { + "epoch": 1.2676015687884221, + "grad_norm": 0.20069929957389832, + "learning_rate": 3.5581534118049394e-06, + "loss": 0.0026, + "step": 197640 + }, + { + "epoch": 1.267665705682208, + "grad_norm": 0.10028954595327377, + "learning_rate": 3.5576174940776735e-06, + "loss": 0.0015, + "step": 197650 + }, + { + "epoch": 1.2677298425759942, + "grad_norm": 0.3673674166202545, + "learning_rate": 3.557081594424493e-06, + "loss": 0.0026, + "step": 197660 + }, + { + "epoch": 1.2677939794697803, + "grad_norm": 0.10088752955198288, + "learning_rate": 3.5565457128521096e-06, + "loss": 0.0011, + "step": 197670 + }, + { + "epoch": 1.2678581163635663, + "grad_norm": 0.09068503230810165, + "learning_rate": 3.5560098493672413e-06, + "loss": 0.0017, + "step": 197680 + }, + { + "epoch": 1.2679222532573524, + "grad_norm": 0.08756272494792938, + "learning_rate": 3.5554740039766e-06, + "loss": 0.0017, + "step": 197690 + }, + { + "epoch": 1.2679863901511386, + "grad_norm": 0.19590413570404053, + "learning_rate": 3.5549381766869027e-06, + "loss": 0.0021, + "step": 197700 + }, + { + "epoch": 1.2680505270449247, + "grad_norm": 0.11053825169801712, + "learning_rate": 3.5544023675048634e-06, + "loss": 0.0014, + "step": 197710 + }, + { + "epoch": 1.2681146639387109, + "grad_norm": 0.02658925950527191, + "learning_rate": 3.553866576437195e-06, + "loss": 0.0016, + "step": 197720 + }, + { + "epoch": 1.268178800832497, + "grad_norm": 0.039638325572013855, + "learning_rate": 3.553330803490612e-06, + "loss": 0.0011, + "step": 197730 + }, + { + "epoch": 1.268242937726283, + "grad_norm": 0.04033947363495827, + "learning_rate": 3.5527950486718275e-06, + "loss": 0.001, + "step": 197740 + }, + { + "epoch": 1.268307074620069, + "grad_norm": 0.361795037984848, + "learning_rate": 3.552259311987557e-06, + "loss": 0.0047, + "step": 197750 + }, + { + "epoch": 1.2683712115138552, + "grad_norm": 0.10919544100761414, + "learning_rate": 3.55172359344451e-06, + "loss": 0.0014, + "step": 197760 + }, + { + "epoch": 1.2684353484076412, + "grad_norm": 0.08699346333742142, + "learning_rate": 3.5511878930494028e-06, + "loss": 0.0011, + "step": 197770 + }, + { + "epoch": 1.2684994853014273, + "grad_norm": 0.020397085696458817, + "learning_rate": 3.550652210808946e-06, + "loss": 0.0011, + "step": 197780 + }, + { + "epoch": 1.2685636221952135, + "grad_norm": 0.4498283267021179, + "learning_rate": 3.5501165467298538e-06, + "loss": 0.0036, + "step": 197790 + }, + { + "epoch": 1.2686277590889996, + "grad_norm": 0.007840072736144066, + "learning_rate": 3.5495809008188357e-06, + "loss": 0.0009, + "step": 197800 + }, + { + "epoch": 1.2686918959827858, + "grad_norm": 0.16372261941432953, + "learning_rate": 3.5490452730826073e-06, + "loss": 0.0015, + "step": 197810 + }, + { + "epoch": 1.2687560328765717, + "grad_norm": 0.13359896838665009, + "learning_rate": 3.548509663527877e-06, + "loss": 0.0021, + "step": 197820 + }, + { + "epoch": 1.2688201697703578, + "grad_norm": 0.063264861702919, + "learning_rate": 3.54797407216136e-06, + "loss": 0.0012, + "step": 197830 + }, + { + "epoch": 1.268884306664144, + "grad_norm": 0.08021801710128784, + "learning_rate": 3.5474384989897637e-06, + "loss": 0.0013, + "step": 197840 + }, + { + "epoch": 1.2689484435579301, + "grad_norm": 0.07261926680803299, + "learning_rate": 3.546902944019801e-06, + "loss": 0.0014, + "step": 197850 + }, + { + "epoch": 1.269012580451716, + "grad_norm": 0.02300884947180748, + "learning_rate": 3.546367407258185e-06, + "loss": 0.0015, + "step": 197860 + }, + { + "epoch": 1.2690767173455022, + "grad_norm": 0.050239454954862595, + "learning_rate": 3.545831888711623e-06, + "loss": 0.0013, + "step": 197870 + }, + { + "epoch": 1.2691408542392884, + "grad_norm": 0.10576912015676498, + "learning_rate": 3.545296388386827e-06, + "loss": 0.0024, + "step": 197880 + }, + { + "epoch": 1.2692049911330745, + "grad_norm": 0.0587228424847126, + "learning_rate": 3.5447609062905062e-06, + "loss": 0.0008, + "step": 197890 + }, + { + "epoch": 1.2692691280268606, + "grad_norm": 0.13328789174556732, + "learning_rate": 3.544225442429372e-06, + "loss": 0.002, + "step": 197900 + }, + { + "epoch": 1.2693332649206466, + "grad_norm": 0.07111755758523941, + "learning_rate": 3.5436899968101334e-06, + "loss": 0.0007, + "step": 197910 + }, + { + "epoch": 1.2693974018144327, + "grad_norm": 0.08294267952442169, + "learning_rate": 3.5431545694394996e-06, + "loss": 0.0009, + "step": 197920 + }, + { + "epoch": 1.2694615387082189, + "grad_norm": 0.11793050915002823, + "learning_rate": 3.5426191603241805e-06, + "loss": 0.0018, + "step": 197930 + }, + { + "epoch": 1.2695256756020048, + "grad_norm": 0.06387563049793243, + "learning_rate": 3.5420837694708853e-06, + "loss": 0.0016, + "step": 197940 + }, + { + "epoch": 1.269589812495791, + "grad_norm": 0.18619148433208466, + "learning_rate": 3.541548396886322e-06, + "loss": 0.0014, + "step": 197950 + }, + { + "epoch": 1.269653949389577, + "grad_norm": 0.07638446241617203, + "learning_rate": 3.5410130425771993e-06, + "loss": 0.0047, + "step": 197960 + }, + { + "epoch": 1.2697180862833632, + "grad_norm": 0.14112454652786255, + "learning_rate": 3.540477706550226e-06, + "loss": 0.0019, + "step": 197970 + }, + { + "epoch": 1.2697822231771494, + "grad_norm": 0.10112203657627106, + "learning_rate": 3.53994238881211e-06, + "loss": 0.0012, + "step": 197980 + }, + { + "epoch": 1.2698463600709355, + "grad_norm": 0.05991072580218315, + "learning_rate": 3.5394070893695598e-06, + "loss": 0.0016, + "step": 197990 + }, + { + "epoch": 1.2699104969647215, + "grad_norm": 0.012906217016279697, + "learning_rate": 3.538871808229283e-06, + "loss": 0.0017, + "step": 198000 + }, + { + "epoch": 1.2699746338585076, + "grad_norm": 0.12138118594884872, + "learning_rate": 3.538336545397986e-06, + "loss": 0.0036, + "step": 198010 + }, + { + "epoch": 1.2700387707522938, + "grad_norm": 0.20799575746059418, + "learning_rate": 3.5378013008823765e-06, + "loss": 0.0016, + "step": 198020 + }, + { + "epoch": 1.2701029076460797, + "grad_norm": 0.05465441942214966, + "learning_rate": 3.537266074689163e-06, + "loss": 0.0015, + "step": 198030 + }, + { + "epoch": 1.2701670445398658, + "grad_norm": 0.07412154972553253, + "learning_rate": 3.53673086682505e-06, + "loss": 0.0012, + "step": 198040 + }, + { + "epoch": 1.270231181433652, + "grad_norm": 0.0858510211110115, + "learning_rate": 3.5361956772967453e-06, + "loss": 0.0017, + "step": 198050 + }, + { + "epoch": 1.2702953183274381, + "grad_norm": 0.05278509110212326, + "learning_rate": 3.5356605061109545e-06, + "loss": 0.0015, + "step": 198060 + }, + { + "epoch": 1.2703594552212243, + "grad_norm": 0.05897562950849533, + "learning_rate": 3.5351253532743847e-06, + "loss": 0.001, + "step": 198070 + }, + { + "epoch": 1.2704235921150102, + "grad_norm": 0.12483041733503342, + "learning_rate": 3.534590218793741e-06, + "loss": 0.0031, + "step": 198080 + }, + { + "epoch": 1.2704877290087964, + "grad_norm": 0.15329022705554962, + "learning_rate": 3.534055102675729e-06, + "loss": 0.0019, + "step": 198090 + }, + { + "epoch": 1.2705518659025825, + "grad_norm": 0.09980618208646774, + "learning_rate": 3.5335200049270537e-06, + "loss": 0.0019, + "step": 198100 + }, + { + "epoch": 1.2706160027963684, + "grad_norm": 0.004729558248072863, + "learning_rate": 3.5329849255544226e-06, + "loss": 0.0023, + "step": 198110 + }, + { + "epoch": 1.2706801396901546, + "grad_norm": 0.020556021481752396, + "learning_rate": 3.5324498645645368e-06, + "loss": 0.0009, + "step": 198120 + }, + { + "epoch": 1.2707442765839407, + "grad_norm": 0.037179723381996155, + "learning_rate": 3.531914821964103e-06, + "loss": 0.0016, + "step": 198130 + }, + { + "epoch": 1.2708084134777269, + "grad_norm": 0.289968341588974, + "learning_rate": 3.5313797977598276e-06, + "loss": 0.0021, + "step": 198140 + }, + { + "epoch": 1.270872550371513, + "grad_norm": 0.1045757308602333, + "learning_rate": 3.530844791958411e-06, + "loss": 0.0009, + "step": 198150 + }, + { + "epoch": 1.2709366872652992, + "grad_norm": 0.16217878460884094, + "learning_rate": 3.5303098045665607e-06, + "loss": 0.0012, + "step": 198160 + }, + { + "epoch": 1.271000824159085, + "grad_norm": 0.07823773473501205, + "learning_rate": 3.529774835590978e-06, + "loss": 0.0015, + "step": 198170 + }, + { + "epoch": 1.2710649610528713, + "grad_norm": 0.08206193894147873, + "learning_rate": 3.529239885038368e-06, + "loss": 0.0006, + "step": 198180 + }, + { + "epoch": 1.2711290979466574, + "grad_norm": 0.15786902606487274, + "learning_rate": 3.5287049529154325e-06, + "loss": 0.0016, + "step": 198190 + }, + { + "epoch": 1.2711932348404433, + "grad_norm": 0.018060648813843727, + "learning_rate": 3.528170039228877e-06, + "loss": 0.001, + "step": 198200 + }, + { + "epoch": 1.2712573717342295, + "grad_norm": 0.11646221578121185, + "learning_rate": 3.5276351439854013e-06, + "loss": 0.0019, + "step": 198210 + }, + { + "epoch": 1.2713215086280156, + "grad_norm": 0.13924136757850647, + "learning_rate": 3.527100267191711e-06, + "loss": 0.0014, + "step": 198220 + }, + { + "epoch": 1.2713856455218018, + "grad_norm": 0.19885863363742828, + "learning_rate": 3.526565408854505e-06, + "loss": 0.0021, + "step": 198230 + }, + { + "epoch": 1.271449782415588, + "grad_norm": 0.08520758152008057, + "learning_rate": 3.526030568980489e-06, + "loss": 0.0011, + "step": 198240 + }, + { + "epoch": 1.2715139193093739, + "grad_norm": 0.07740870863199234, + "learning_rate": 3.525495747576363e-06, + "loss": 0.0011, + "step": 198250 + }, + { + "epoch": 1.27157805620316, + "grad_norm": 0.05698053911328316, + "learning_rate": 3.5249609446488293e-06, + "loss": 0.0022, + "step": 198260 + }, + { + "epoch": 1.2716421930969461, + "grad_norm": 0.10947985202074051, + "learning_rate": 3.5244261602045882e-06, + "loss": 0.0012, + "step": 198270 + }, + { + "epoch": 1.2717063299907323, + "grad_norm": 0.03766002878546715, + "learning_rate": 3.523891394250342e-06, + "loss": 0.0012, + "step": 198280 + }, + { + "epoch": 1.2717704668845182, + "grad_norm": 0.05742770805954933, + "learning_rate": 3.523356646792793e-06, + "loss": 0.001, + "step": 198290 + }, + { + "epoch": 1.2718346037783044, + "grad_norm": 0.22869835793972015, + "learning_rate": 3.522821917838639e-06, + "loss": 0.0016, + "step": 198300 + }, + { + "epoch": 1.2718987406720905, + "grad_norm": 0.034320537000894547, + "learning_rate": 3.5222872073945835e-06, + "loss": 0.0011, + "step": 198310 + }, + { + "epoch": 1.2719628775658767, + "grad_norm": 0.016153911128640175, + "learning_rate": 3.5217525154673243e-06, + "loss": 0.0014, + "step": 198320 + }, + { + "epoch": 1.2720270144596628, + "grad_norm": 0.0031908380333334208, + "learning_rate": 3.5212178420635635e-06, + "loss": 0.0015, + "step": 198330 + }, + { + "epoch": 1.2720911513534487, + "grad_norm": 0.031341325491666794, + "learning_rate": 3.5206831871899994e-06, + "loss": 0.0018, + "step": 198340 + }, + { + "epoch": 1.272155288247235, + "grad_norm": 0.11928802728652954, + "learning_rate": 3.520148550853333e-06, + "loss": 0.0017, + "step": 198350 + }, + { + "epoch": 1.272219425141021, + "grad_norm": 0.0955466702580452, + "learning_rate": 3.5196139330602615e-06, + "loss": 0.0025, + "step": 198360 + }, + { + "epoch": 1.272283562034807, + "grad_norm": 0.0947674885392189, + "learning_rate": 3.5190793338174865e-06, + "loss": 0.002, + "step": 198370 + }, + { + "epoch": 1.2723476989285931, + "grad_norm": 0.0821545422077179, + "learning_rate": 3.518544753131705e-06, + "loss": 0.0017, + "step": 198380 + }, + { + "epoch": 1.2724118358223793, + "grad_norm": 0.10674940794706345, + "learning_rate": 3.5180101910096175e-06, + "loss": 0.0011, + "step": 198390 + }, + { + "epoch": 1.2724759727161654, + "grad_norm": 0.16639532148838043, + "learning_rate": 3.5174756474579188e-06, + "loss": 0.0011, + "step": 198400 + }, + { + "epoch": 1.2725401096099516, + "grad_norm": 0.0952320322394371, + "learning_rate": 3.5169411224833124e-06, + "loss": 0.0019, + "step": 198410 + }, + { + "epoch": 1.2726042465037377, + "grad_norm": 0.07761390507221222, + "learning_rate": 3.5164066160924924e-06, + "loss": 0.0017, + "step": 198420 + }, + { + "epoch": 1.2726683833975236, + "grad_norm": 0.22065448760986328, + "learning_rate": 3.5158721282921592e-06, + "loss": 0.0024, + "step": 198430 + }, + { + "epoch": 1.2727325202913098, + "grad_norm": 0.08059868961572647, + "learning_rate": 3.5153376590890074e-06, + "loss": 0.0017, + "step": 198440 + }, + { + "epoch": 1.272796657185096, + "grad_norm": 0.1412181556224823, + "learning_rate": 3.5148032084897375e-06, + "loss": 0.001, + "step": 198450 + }, + { + "epoch": 1.2728607940788819, + "grad_norm": 0.13328877091407776, + "learning_rate": 3.5142687765010443e-06, + "loss": 0.0014, + "step": 198460 + }, + { + "epoch": 1.272924930972668, + "grad_norm": 0.0085500068962574, + "learning_rate": 3.513734363129625e-06, + "loss": 0.0009, + "step": 198470 + }, + { + "epoch": 1.2729890678664542, + "grad_norm": 0.1177920401096344, + "learning_rate": 3.513199968382176e-06, + "loss": 0.002, + "step": 198480 + }, + { + "epoch": 1.2730532047602403, + "grad_norm": 0.12280911952257156, + "learning_rate": 3.5126655922653953e-06, + "loss": 0.0014, + "step": 198490 + }, + { + "epoch": 1.2731173416540265, + "grad_norm": 0.007653483655303717, + "learning_rate": 3.512131234785976e-06, + "loss": 0.0012, + "step": 198500 + }, + { + "epoch": 1.2731814785478124, + "grad_norm": 0.0819479450583458, + "learning_rate": 3.5115968959506164e-06, + "loss": 0.0022, + "step": 198510 + }, + { + "epoch": 1.2732456154415985, + "grad_norm": 0.05366813763976097, + "learning_rate": 3.511062575766011e-06, + "loss": 0.0016, + "step": 198520 + }, + { + "epoch": 1.2733097523353847, + "grad_norm": 0.017890458926558495, + "learning_rate": 3.510528274238857e-06, + "loss": 0.0009, + "step": 198530 + }, + { + "epoch": 1.2733738892291706, + "grad_norm": 0.014618410728871822, + "learning_rate": 3.509993991375847e-06, + "loss": 0.0018, + "step": 198540 + }, + { + "epoch": 1.2734380261229568, + "grad_norm": 0.136849507689476, + "learning_rate": 3.509459727183677e-06, + "loss": 0.0017, + "step": 198550 + }, + { + "epoch": 1.273502163016743, + "grad_norm": 0.05653097480535507, + "learning_rate": 3.5089254816690425e-06, + "loss": 0.0025, + "step": 198560 + }, + { + "epoch": 1.273566299910529, + "grad_norm": 0.14733551442623138, + "learning_rate": 3.5083912548386367e-06, + "loss": 0.0013, + "step": 198570 + }, + { + "epoch": 1.2736304368043152, + "grad_norm": 0.3166915774345398, + "learning_rate": 3.5078570466991556e-06, + "loss": 0.0022, + "step": 198580 + }, + { + "epoch": 1.2736945736981014, + "grad_norm": 0.07953108847141266, + "learning_rate": 3.5073228572572914e-06, + "loss": 0.002, + "step": 198590 + }, + { + "epoch": 1.2737587105918873, + "grad_norm": 0.09786271303892136, + "learning_rate": 3.5067886865197397e-06, + "loss": 0.0042, + "step": 198600 + }, + { + "epoch": 1.2738228474856734, + "grad_norm": 0.06881196796894073, + "learning_rate": 3.5062545344931915e-06, + "loss": 0.0011, + "step": 198610 + }, + { + "epoch": 1.2738869843794596, + "grad_norm": 0.007864178158342838, + "learning_rate": 3.5057204011843427e-06, + "loss": 0.0022, + "step": 198620 + }, + { + "epoch": 1.2739511212732455, + "grad_norm": 0.15022243559360504, + "learning_rate": 3.5051862865998843e-06, + "loss": 0.0028, + "step": 198630 + }, + { + "epoch": 1.2740152581670317, + "grad_norm": 0.13478389382362366, + "learning_rate": 3.50465219074651e-06, + "loss": 0.0015, + "step": 198640 + }, + { + "epoch": 1.2740793950608178, + "grad_norm": 0.30176836252212524, + "learning_rate": 3.5041181136309133e-06, + "loss": 0.0009, + "step": 198650 + }, + { + "epoch": 1.274143531954604, + "grad_norm": 0.054974887520074844, + "learning_rate": 3.5035840552597855e-06, + "loss": 0.0013, + "step": 198660 + }, + { + "epoch": 1.27420766884839, + "grad_norm": 0.11352365463972092, + "learning_rate": 3.5030500156398182e-06, + "loss": 0.0014, + "step": 198670 + }, + { + "epoch": 1.274271805742176, + "grad_norm": 0.08226925879716873, + "learning_rate": 3.5025159947777055e-06, + "loss": 0.0011, + "step": 198680 + }, + { + "epoch": 1.2743359426359622, + "grad_norm": 0.07407528907060623, + "learning_rate": 3.5019819926801357e-06, + "loss": 0.0013, + "step": 198690 + }, + { + "epoch": 1.2744000795297483, + "grad_norm": 0.15557901561260223, + "learning_rate": 3.5014480093538027e-06, + "loss": 0.0017, + "step": 198700 + }, + { + "epoch": 1.2744642164235345, + "grad_norm": 0.07050323486328125, + "learning_rate": 3.500914044805398e-06, + "loss": 0.0007, + "step": 198710 + }, + { + "epoch": 1.2745283533173204, + "grad_norm": 0.09494238346815109, + "learning_rate": 3.5003800990416117e-06, + "loss": 0.0018, + "step": 198720 + }, + { + "epoch": 1.2745924902111065, + "grad_norm": 0.05900660529732704, + "learning_rate": 3.499846172069134e-06, + "loss": 0.001, + "step": 198730 + }, + { + "epoch": 1.2746566271048927, + "grad_norm": 0.07017749547958374, + "learning_rate": 3.4993122638946555e-06, + "loss": 0.0032, + "step": 198740 + }, + { + "epoch": 1.2747207639986788, + "grad_norm": 0.04791183024644852, + "learning_rate": 3.498778374524868e-06, + "loss": 0.0015, + "step": 198750 + }, + { + "epoch": 1.274784900892465, + "grad_norm": 0.10131379961967468, + "learning_rate": 3.498244503966459e-06, + "loss": 0.0026, + "step": 198760 + }, + { + "epoch": 1.274849037786251, + "grad_norm": 0.11673296242952347, + "learning_rate": 3.497710652226122e-06, + "loss": 0.0009, + "step": 198770 + }, + { + "epoch": 1.274913174680037, + "grad_norm": 0.032778967171907425, + "learning_rate": 3.4971768193105414e-06, + "loss": 0.001, + "step": 198780 + }, + { + "epoch": 1.2749773115738232, + "grad_norm": 0.044548217207193375, + "learning_rate": 3.4966430052264112e-06, + "loss": 0.0014, + "step": 198790 + }, + { + "epoch": 1.2750414484676091, + "grad_norm": 0.12870903313159943, + "learning_rate": 3.496109209980417e-06, + "loss": 0.0018, + "step": 198800 + }, + { + "epoch": 1.2751055853613953, + "grad_norm": 0.2227126806974411, + "learning_rate": 3.4955754335792513e-06, + "loss": 0.0012, + "step": 198810 + }, + { + "epoch": 1.2751697222551814, + "grad_norm": 0.09919065237045288, + "learning_rate": 3.4950416760295987e-06, + "loss": 0.0013, + "step": 198820 + }, + { + "epoch": 1.2752338591489676, + "grad_norm": 0.18281763792037964, + "learning_rate": 3.4945079373381496e-06, + "loss": 0.0024, + "step": 198830 + }, + { + "epoch": 1.2752979960427537, + "grad_norm": 0.06877867132425308, + "learning_rate": 3.493974217511593e-06, + "loss": 0.0015, + "step": 198840 + }, + { + "epoch": 1.2753621329365399, + "grad_norm": 0.03930193558335304, + "learning_rate": 3.4934405165566153e-06, + "loss": 0.0029, + "step": 198850 + }, + { + "epoch": 1.2754262698303258, + "grad_norm": 0.058170489966869354, + "learning_rate": 3.492906834479905e-06, + "loss": 0.0008, + "step": 198860 + }, + { + "epoch": 1.275490406724112, + "grad_norm": 0.02838187851011753, + "learning_rate": 3.4923731712881483e-06, + "loss": 0.0007, + "step": 198870 + }, + { + "epoch": 1.275554543617898, + "grad_norm": 0.20764176547527313, + "learning_rate": 3.4918395269880345e-06, + "loss": 0.002, + "step": 198880 + }, + { + "epoch": 1.275618680511684, + "grad_norm": 0.18721093237400055, + "learning_rate": 3.491305901586248e-06, + "loss": 0.0011, + "step": 198890 + }, + { + "epoch": 1.2756828174054702, + "grad_norm": 0.027380438521504402, + "learning_rate": 3.4907722950894785e-06, + "loss": 0.001, + "step": 198900 + }, + { + "epoch": 1.2757469542992563, + "grad_norm": 0.04297863319516182, + "learning_rate": 3.4902387075044085e-06, + "loss": 0.0016, + "step": 198910 + }, + { + "epoch": 1.2758110911930425, + "grad_norm": 0.013423757627606392, + "learning_rate": 3.489705138837729e-06, + "loss": 0.0024, + "step": 198920 + }, + { + "epoch": 1.2758752280868286, + "grad_norm": 0.21460917592048645, + "learning_rate": 3.4891715890961215e-06, + "loss": 0.0032, + "step": 198930 + }, + { + "epoch": 1.2759393649806146, + "grad_norm": 0.09087955951690674, + "learning_rate": 3.488638058286275e-06, + "loss": 0.0011, + "step": 198940 + }, + { + "epoch": 1.2760035018744007, + "grad_norm": 0.18837442994117737, + "learning_rate": 3.488104546414873e-06, + "loss": 0.0017, + "step": 198950 + }, + { + "epoch": 1.2760676387681869, + "grad_norm": 0.02254685014486313, + "learning_rate": 3.487571053488602e-06, + "loss": 0.0009, + "step": 198960 + }, + { + "epoch": 1.2761317756619728, + "grad_norm": 0.10164394229650497, + "learning_rate": 3.4870375795141463e-06, + "loss": 0.001, + "step": 198970 + }, + { + "epoch": 1.276195912555759, + "grad_norm": 0.12734057009220123, + "learning_rate": 3.4865041244981906e-06, + "loss": 0.0015, + "step": 198980 + }, + { + "epoch": 1.276260049449545, + "grad_norm": 0.04082336649298668, + "learning_rate": 3.4859706884474207e-06, + "loss": 0.0007, + "step": 198990 + }, + { + "epoch": 1.2763241863433312, + "grad_norm": 0.2712097764015198, + "learning_rate": 3.48543727136852e-06, + "loss": 0.0023, + "step": 199000 + }, + { + "epoch": 1.2763883232371174, + "grad_norm": 0.1718614399433136, + "learning_rate": 3.484903873268173e-06, + "loss": 0.0029, + "step": 199010 + }, + { + "epoch": 1.2764524601309035, + "grad_norm": 0.04521147906780243, + "learning_rate": 3.484370494153062e-06, + "loss": 0.0008, + "step": 199020 + }, + { + "epoch": 1.2765165970246894, + "grad_norm": 0.04617341235280037, + "learning_rate": 3.483837134029874e-06, + "loss": 0.0015, + "step": 199030 + }, + { + "epoch": 1.2765807339184756, + "grad_norm": 0.057335786521434784, + "learning_rate": 3.483303792905288e-06, + "loss": 0.0011, + "step": 199040 + }, + { + "epoch": 1.2766448708122617, + "grad_norm": 0.010286826640367508, + "learning_rate": 3.4827704707859917e-06, + "loss": 0.0026, + "step": 199050 + }, + { + "epoch": 1.2767090077060477, + "grad_norm": 0.014647098258137703, + "learning_rate": 3.482237167678664e-06, + "loss": 0.0021, + "step": 199060 + }, + { + "epoch": 1.2767731445998338, + "grad_norm": 0.0392424501478672, + "learning_rate": 3.481703883589991e-06, + "loss": 0.0019, + "step": 199070 + }, + { + "epoch": 1.27683728149362, + "grad_norm": 0.08423841744661331, + "learning_rate": 3.4811706185266514e-06, + "loss": 0.0018, + "step": 199080 + }, + { + "epoch": 1.2769014183874061, + "grad_norm": 0.08013307303190231, + "learning_rate": 3.480637372495331e-06, + "loss": 0.0015, + "step": 199090 + }, + { + "epoch": 1.2769655552811923, + "grad_norm": 0.43441662192344666, + "learning_rate": 3.4801041455027094e-06, + "loss": 0.0016, + "step": 199100 + }, + { + "epoch": 1.2770296921749782, + "grad_norm": 0.15558330714702606, + "learning_rate": 3.47957093755547e-06, + "loss": 0.0012, + "step": 199110 + }, + { + "epoch": 1.2770938290687643, + "grad_norm": 0.07779597491025925, + "learning_rate": 3.479037748660292e-06, + "loss": 0.0012, + "step": 199120 + }, + { + "epoch": 1.2771579659625505, + "grad_norm": 0.1093960627913475, + "learning_rate": 3.4785045788238582e-06, + "loss": 0.0017, + "step": 199130 + }, + { + "epoch": 1.2772221028563366, + "grad_norm": 0.0921272486448288, + "learning_rate": 3.4779714280528507e-06, + "loss": 0.0014, + "step": 199140 + }, + { + "epoch": 1.2772862397501226, + "grad_norm": 0.004315617028623819, + "learning_rate": 3.4774382963539475e-06, + "loss": 0.0018, + "step": 199150 + }, + { + "epoch": 1.2773503766439087, + "grad_norm": 0.1703188568353653, + "learning_rate": 3.4769051837338303e-06, + "loss": 0.0021, + "step": 199160 + }, + { + "epoch": 1.2774145135376949, + "grad_norm": 0.1463497132062912, + "learning_rate": 3.4763720901991815e-06, + "loss": 0.0009, + "step": 199170 + }, + { + "epoch": 1.277478650431481, + "grad_norm": 0.018715834245085716, + "learning_rate": 3.475839015756678e-06, + "loss": 0.0018, + "step": 199180 + }, + { + "epoch": 1.2775427873252672, + "grad_norm": 0.08665967732667923, + "learning_rate": 3.475305960413002e-06, + "loss": 0.001, + "step": 199190 + }, + { + "epoch": 1.277606924219053, + "grad_norm": 0.024206936359405518, + "learning_rate": 3.4747729241748308e-06, + "loss": 0.0009, + "step": 199200 + }, + { + "epoch": 1.2776710611128392, + "grad_norm": 0.1050475686788559, + "learning_rate": 3.4742399070488464e-06, + "loss": 0.0011, + "step": 199210 + }, + { + "epoch": 1.2777351980066254, + "grad_norm": 0.156494602560997, + "learning_rate": 3.473706909041725e-06, + "loss": 0.0017, + "step": 199220 + }, + { + "epoch": 1.2777993349004113, + "grad_norm": 0.07716964930295944, + "learning_rate": 3.4731739301601482e-06, + "loss": 0.0014, + "step": 199230 + }, + { + "epoch": 1.2778634717941975, + "grad_norm": 0.2524273991584778, + "learning_rate": 3.472640970410792e-06, + "loss": 0.0037, + "step": 199240 + }, + { + "epoch": 1.2779276086879836, + "grad_norm": 0.05351315438747406, + "learning_rate": 3.4721080298003364e-06, + "loss": 0.0009, + "step": 199250 + }, + { + "epoch": 1.2779917455817698, + "grad_norm": 0.08141786605119705, + "learning_rate": 3.47157510833546e-06, + "loss": 0.0015, + "step": 199260 + }, + { + "epoch": 1.278055882475556, + "grad_norm": 0.09562107920646667, + "learning_rate": 3.4710422060228384e-06, + "loss": 0.0018, + "step": 199270 + }, + { + "epoch": 1.278120019369342, + "grad_norm": 0.03243833780288696, + "learning_rate": 3.470509322869152e-06, + "loss": 0.0013, + "step": 199280 + }, + { + "epoch": 1.278184156263128, + "grad_norm": 0.10546350479125977, + "learning_rate": 3.469976458881076e-06, + "loss": 0.0011, + "step": 199290 + }, + { + "epoch": 1.2782482931569141, + "grad_norm": 0.09585004299879074, + "learning_rate": 3.469443614065289e-06, + "loss": 0.0011, + "step": 199300 + }, + { + "epoch": 1.2783124300507003, + "grad_norm": 0.13913419842720032, + "learning_rate": 3.4689107884284666e-06, + "loss": 0.0023, + "step": 199310 + }, + { + "epoch": 1.2783765669444862, + "grad_norm": 0.09405070543289185, + "learning_rate": 3.4683779819772877e-06, + "loss": 0.0015, + "step": 199320 + }, + { + "epoch": 1.2784407038382724, + "grad_norm": 0.07846540957689285, + "learning_rate": 3.467845194718426e-06, + "loss": 0.0019, + "step": 199330 + }, + { + "epoch": 1.2785048407320585, + "grad_norm": 0.09102808684110641, + "learning_rate": 3.46731242665856e-06, + "loss": 0.0011, + "step": 199340 + }, + { + "epoch": 1.2785689776258446, + "grad_norm": 0.15697677433490753, + "learning_rate": 3.466779677804364e-06, + "loss": 0.0016, + "step": 199350 + }, + { + "epoch": 1.2786331145196308, + "grad_norm": 0.029571913182735443, + "learning_rate": 3.466246948162515e-06, + "loss": 0.0012, + "step": 199360 + }, + { + "epoch": 1.2786972514134167, + "grad_norm": 0.04870545119047165, + "learning_rate": 3.4657142377396876e-06, + "loss": 0.0015, + "step": 199370 + }, + { + "epoch": 1.2787613883072029, + "grad_norm": 0.059037283062934875, + "learning_rate": 3.4651815465425577e-06, + "loss": 0.001, + "step": 199380 + }, + { + "epoch": 1.278825525200989, + "grad_norm": 0.047760073095560074, + "learning_rate": 3.464648874577799e-06, + "loss": 0.0012, + "step": 199390 + }, + { + "epoch": 1.2788896620947752, + "grad_norm": 0.1385759860277176, + "learning_rate": 3.4641162218520875e-06, + "loss": 0.0015, + "step": 199400 + }, + { + "epoch": 1.278953798988561, + "grad_norm": 0.06457321345806122, + "learning_rate": 3.463583588372098e-06, + "loss": 0.0013, + "step": 199410 + }, + { + "epoch": 1.2790179358823472, + "grad_norm": 0.08694420754909515, + "learning_rate": 3.4630509741445027e-06, + "loss": 0.0011, + "step": 199420 + }, + { + "epoch": 1.2790820727761334, + "grad_norm": 0.18832582235336304, + "learning_rate": 3.4625183791759793e-06, + "loss": 0.001, + "step": 199430 + }, + { + "epoch": 1.2791462096699195, + "grad_norm": 0.07723814249038696, + "learning_rate": 3.461985803473198e-06, + "loss": 0.0014, + "step": 199440 + }, + { + "epoch": 1.2792103465637057, + "grad_norm": 0.10175478458404541, + "learning_rate": 3.4614532470428346e-06, + "loss": 0.0014, + "step": 199450 + }, + { + "epoch": 1.2792744834574916, + "grad_norm": 0.1596599817276001, + "learning_rate": 3.460920709891561e-06, + "loss": 0.0012, + "step": 199460 + }, + { + "epoch": 1.2793386203512778, + "grad_norm": 0.015792615711688995, + "learning_rate": 3.4603881920260516e-06, + "loss": 0.0011, + "step": 199470 + }, + { + "epoch": 1.279402757245064, + "grad_norm": 0.11992931365966797, + "learning_rate": 3.459855693452977e-06, + "loss": 0.0012, + "step": 199480 + }, + { + "epoch": 1.2794668941388498, + "grad_norm": 0.05175817757844925, + "learning_rate": 3.4593232141790134e-06, + "loss": 0.001, + "step": 199490 + }, + { + "epoch": 1.279531031032636, + "grad_norm": 0.12967315316200256, + "learning_rate": 3.458790754210829e-06, + "loss": 0.003, + "step": 199500 + }, + { + "epoch": 1.2795951679264221, + "grad_norm": 0.04164445027709007, + "learning_rate": 3.4582583135550995e-06, + "loss": 0.0012, + "step": 199510 + }, + { + "epoch": 1.2796593048202083, + "grad_norm": 0.052503302693367004, + "learning_rate": 3.457725892218494e-06, + "loss": 0.0009, + "step": 199520 + }, + { + "epoch": 1.2797234417139944, + "grad_norm": 0.18722525238990784, + "learning_rate": 3.4571934902076866e-06, + "loss": 0.0015, + "step": 199530 + }, + { + "epoch": 1.2797875786077806, + "grad_norm": 0.163239523768425, + "learning_rate": 3.4566611075293455e-06, + "loss": 0.0012, + "step": 199540 + }, + { + "epoch": 1.2798517155015665, + "grad_norm": 0.07324928045272827, + "learning_rate": 3.456128744190145e-06, + "loss": 0.0013, + "step": 199550 + }, + { + "epoch": 1.2799158523953527, + "grad_norm": 0.09207472205162048, + "learning_rate": 3.4555964001967544e-06, + "loss": 0.0018, + "step": 199560 + }, + { + "epoch": 1.2799799892891388, + "grad_norm": 0.07849516719579697, + "learning_rate": 3.455064075555845e-06, + "loss": 0.0015, + "step": 199570 + }, + { + "epoch": 1.2800441261829247, + "grad_norm": 0.09054175764322281, + "learning_rate": 3.454531770274087e-06, + "loss": 0.0031, + "step": 199580 + }, + { + "epoch": 1.2801082630767109, + "grad_norm": 0.01391641702502966, + "learning_rate": 3.45399948435815e-06, + "loss": 0.0015, + "step": 199590 + }, + { + "epoch": 1.280172399970497, + "grad_norm": 0.04894445464015007, + "learning_rate": 3.453467217814705e-06, + "loss": 0.0011, + "step": 199600 + }, + { + "epoch": 1.2802365368642832, + "grad_norm": 0.0364600270986557, + "learning_rate": 3.45293497065042e-06, + "loss": 0.0019, + "step": 199610 + }, + { + "epoch": 1.2803006737580693, + "grad_norm": 0.011941754259169102, + "learning_rate": 3.4524027428719664e-06, + "loss": 0.0015, + "step": 199620 + }, + { + "epoch": 1.2803648106518553, + "grad_norm": 0.0831613540649414, + "learning_rate": 3.4518705344860122e-06, + "loss": 0.0009, + "step": 199630 + }, + { + "epoch": 1.2804289475456414, + "grad_norm": 0.19725944101810455, + "learning_rate": 3.4513383454992268e-06, + "loss": 0.0016, + "step": 199640 + }, + { + "epoch": 1.2804930844394276, + "grad_norm": 0.03266933560371399, + "learning_rate": 3.4508061759182783e-06, + "loss": 0.0024, + "step": 199650 + }, + { + "epoch": 1.2805572213332135, + "grad_norm": 0.07610664516687393, + "learning_rate": 3.4502740257498363e-06, + "loss": 0.0009, + "step": 199660 + }, + { + "epoch": 1.2806213582269996, + "grad_norm": 0.12725989520549774, + "learning_rate": 3.4497418950005673e-06, + "loss": 0.0006, + "step": 199670 + }, + { + "epoch": 1.2806854951207858, + "grad_norm": 0.08239573985338211, + "learning_rate": 3.4492097836771405e-06, + "loss": 0.0018, + "step": 199680 + }, + { + "epoch": 1.280749632014572, + "grad_norm": 0.313229501247406, + "learning_rate": 3.4486776917862242e-06, + "loss": 0.0016, + "step": 199690 + }, + { + "epoch": 1.280813768908358, + "grad_norm": 0.07731663435697556, + "learning_rate": 3.448145619334484e-06, + "loss": 0.0017, + "step": 199700 + }, + { + "epoch": 1.2808779058021442, + "grad_norm": 0.0655188336968422, + "learning_rate": 3.4476135663285905e-06, + "loss": 0.0036, + "step": 199710 + }, + { + "epoch": 1.2809420426959301, + "grad_norm": 0.012748525477945805, + "learning_rate": 3.447081532775206e-06, + "loss": 0.0009, + "step": 199720 + }, + { + "epoch": 1.2810061795897163, + "grad_norm": 0.06347203254699707, + "learning_rate": 3.4465495186810016e-06, + "loss": 0.0012, + "step": 199730 + }, + { + "epoch": 1.2810703164835024, + "grad_norm": 0.23465777933597565, + "learning_rate": 3.4460175240526407e-06, + "loss": 0.0019, + "step": 199740 + }, + { + "epoch": 1.2811344533772884, + "grad_norm": 0.06513968110084534, + "learning_rate": 3.445485548896792e-06, + "loss": 0.0011, + "step": 199750 + }, + { + "epoch": 1.2811985902710745, + "grad_norm": 0.10150118172168732, + "learning_rate": 3.444953593220119e-06, + "loss": 0.0023, + "step": 199760 + }, + { + "epoch": 1.2812627271648607, + "grad_norm": 0.12035603821277618, + "learning_rate": 3.44442165702929e-06, + "loss": 0.0019, + "step": 199770 + }, + { + "epoch": 1.2813268640586468, + "grad_norm": 0.18347781896591187, + "learning_rate": 3.4438897403309686e-06, + "loss": 0.002, + "step": 199780 + }, + { + "epoch": 1.281391000952433, + "grad_norm": 0.033217377960681915, + "learning_rate": 3.4433578431318216e-06, + "loss": 0.0017, + "step": 199790 + }, + { + "epoch": 1.281455137846219, + "grad_norm": 0.1518799513578415, + "learning_rate": 3.4428259654385123e-06, + "loss": 0.0063, + "step": 199800 + }, + { + "epoch": 1.281519274740005, + "grad_norm": 0.08355305343866348, + "learning_rate": 3.4422941072577076e-06, + "loss": 0.0015, + "step": 199810 + }, + { + "epoch": 1.2815834116337912, + "grad_norm": 0.11808858811855316, + "learning_rate": 3.4417622685960704e-06, + "loss": 0.002, + "step": 199820 + }, + { + "epoch": 1.2816475485275773, + "grad_norm": 0.06710100173950195, + "learning_rate": 3.4412304494602654e-06, + "loss": 0.003, + "step": 199830 + }, + { + "epoch": 1.2817116854213633, + "grad_norm": 0.15470421314239502, + "learning_rate": 3.4406986498569576e-06, + "loss": 0.0018, + "step": 199840 + }, + { + "epoch": 1.2817758223151494, + "grad_norm": 0.17547038197517395, + "learning_rate": 3.4401668697928094e-06, + "loss": 0.0009, + "step": 199850 + }, + { + "epoch": 1.2818399592089356, + "grad_norm": 0.044894181191921234, + "learning_rate": 3.4396351092744864e-06, + "loss": 0.0009, + "step": 199860 + }, + { + "epoch": 1.2819040961027217, + "grad_norm": 0.23050405085086823, + "learning_rate": 3.4391033683086492e-06, + "loss": 0.0046, + "step": 199870 + }, + { + "epoch": 1.2819682329965079, + "grad_norm": 0.016979113221168518, + "learning_rate": 3.4385716469019637e-06, + "loss": 0.0009, + "step": 199880 + }, + { + "epoch": 1.2820323698902938, + "grad_norm": 0.04797173663973808, + "learning_rate": 3.43803994506109e-06, + "loss": 0.0027, + "step": 199890 + }, + { + "epoch": 1.28209650678408, + "grad_norm": 0.08218462020158768, + "learning_rate": 3.4375082627926935e-06, + "loss": 0.0017, + "step": 199900 + }, + { + "epoch": 1.282160643677866, + "grad_norm": 0.2115473449230194, + "learning_rate": 3.436976600103434e-06, + "loss": 0.0009, + "step": 199910 + }, + { + "epoch": 1.282224780571652, + "grad_norm": 0.08777006715536118, + "learning_rate": 3.436444956999976e-06, + "loss": 0.0016, + "step": 199920 + }, + { + "epoch": 1.2822889174654382, + "grad_norm": 0.012184024788439274, + "learning_rate": 3.4359133334889773e-06, + "loss": 0.0006, + "step": 199930 + }, + { + "epoch": 1.2823530543592243, + "grad_norm": 0.13068903982639313, + "learning_rate": 3.435381729577104e-06, + "loss": 0.0024, + "step": 199940 + }, + { + "epoch": 1.2824171912530105, + "grad_norm": 0.04617651551961899, + "learning_rate": 3.434850145271016e-06, + "loss": 0.0012, + "step": 199950 + }, + { + "epoch": 1.2824813281467966, + "grad_norm": 0.012335099279880524, + "learning_rate": 3.434318580577375e-06, + "loss": 0.0009, + "step": 199960 + }, + { + "epoch": 1.2825454650405828, + "grad_norm": 0.09198637306690216, + "learning_rate": 3.4337870355028404e-06, + "loss": 0.0011, + "step": 199970 + }, + { + "epoch": 1.2826096019343687, + "grad_norm": 0.4390782415866852, + "learning_rate": 3.433255510054074e-06, + "loss": 0.0034, + "step": 199980 + }, + { + "epoch": 1.2826737388281548, + "grad_norm": 0.08791748434305191, + "learning_rate": 3.432724004237736e-06, + "loss": 0.001, + "step": 199990 + }, + { + "epoch": 1.282737875721941, + "grad_norm": 0.1234566867351532, + "learning_rate": 3.4321925180604863e-06, + "loss": 0.0023, + "step": 200000 + }, + { + "epoch": 1.282802012615727, + "grad_norm": 0.1424219161272049, + "learning_rate": 3.431661051528984e-06, + "loss": 0.0008, + "step": 200010 + }, + { + "epoch": 1.282866149509513, + "grad_norm": 0.10655222088098526, + "learning_rate": 3.431129604649891e-06, + "loss": 0.0017, + "step": 200020 + }, + { + "epoch": 1.2829302864032992, + "grad_norm": 0.15433649718761444, + "learning_rate": 3.4305981774298636e-06, + "loss": 0.0014, + "step": 200030 + }, + { + "epoch": 1.2829944232970854, + "grad_norm": 0.15786220133304596, + "learning_rate": 3.430066769875564e-06, + "loss": 0.0013, + "step": 200040 + }, + { + "epoch": 1.2830585601908715, + "grad_norm": 0.09799902141094208, + "learning_rate": 3.4295353819936495e-06, + "loss": 0.0021, + "step": 200050 + }, + { + "epoch": 1.2831226970846574, + "grad_norm": 0.16291195154190063, + "learning_rate": 3.429004013790779e-06, + "loss": 0.0034, + "step": 200060 + }, + { + "epoch": 1.2831868339784436, + "grad_norm": 0.19031967222690582, + "learning_rate": 3.4284726652736113e-06, + "loss": 0.0027, + "step": 200070 + }, + { + "epoch": 1.2832509708722297, + "grad_norm": 0.1304776966571808, + "learning_rate": 3.4279413364488036e-06, + "loss": 0.0008, + "step": 200080 + }, + { + "epoch": 1.2833151077660157, + "grad_norm": 0.04710371419787407, + "learning_rate": 3.427410027323015e-06, + "loss": 0.0011, + "step": 200090 + }, + { + "epoch": 1.2833792446598018, + "grad_norm": 0.25472894310951233, + "learning_rate": 3.426878737902901e-06, + "loss": 0.0033, + "step": 200100 + }, + { + "epoch": 1.283443381553588, + "grad_norm": 0.05922931805253029, + "learning_rate": 3.4263474681951233e-06, + "loss": 0.0013, + "step": 200110 + }, + { + "epoch": 1.283507518447374, + "grad_norm": 0.05359066650271416, + "learning_rate": 3.4258162182063347e-06, + "loss": 0.0013, + "step": 200120 + }, + { + "epoch": 1.2835716553411602, + "grad_norm": 0.10407944023609161, + "learning_rate": 3.425284987943195e-06, + "loss": 0.0017, + "step": 200130 + }, + { + "epoch": 1.2836357922349464, + "grad_norm": 0.042828936129808426, + "learning_rate": 3.424753777412359e-06, + "loss": 0.0007, + "step": 200140 + }, + { + "epoch": 1.2836999291287323, + "grad_norm": 0.21707478165626526, + "learning_rate": 3.424222586620485e-06, + "loss": 0.0009, + "step": 200150 + }, + { + "epoch": 1.2837640660225185, + "grad_norm": 0.14766912162303925, + "learning_rate": 3.423691415574227e-06, + "loss": 0.0013, + "step": 200160 + }, + { + "epoch": 1.2838282029163046, + "grad_norm": 0.11225715279579163, + "learning_rate": 3.423160264280243e-06, + "loss": 0.0018, + "step": 200170 + }, + { + "epoch": 1.2838923398100905, + "grad_norm": 0.09696038067340851, + "learning_rate": 3.4226291327451877e-06, + "loss": 0.0013, + "step": 200180 + }, + { + "epoch": 1.2839564767038767, + "grad_norm": 0.04146742448210716, + "learning_rate": 3.422098020975717e-06, + "loss": 0.001, + "step": 200190 + }, + { + "epoch": 1.2840206135976628, + "grad_norm": 0.014756908640265465, + "learning_rate": 3.4215669289784847e-06, + "loss": 0.0016, + "step": 200200 + }, + { + "epoch": 1.284084750491449, + "grad_norm": 0.06989117711782455, + "learning_rate": 3.4210358567601477e-06, + "loss": 0.0009, + "step": 200210 + }, + { + "epoch": 1.2841488873852351, + "grad_norm": 0.14492103457450867, + "learning_rate": 3.4205048043273594e-06, + "loss": 0.002, + "step": 200220 + }, + { + "epoch": 1.284213024279021, + "grad_norm": 0.06944374740123749, + "learning_rate": 3.4199737716867753e-06, + "loss": 0.0008, + "step": 200230 + }, + { + "epoch": 1.2842771611728072, + "grad_norm": 0.07351551949977875, + "learning_rate": 3.4194427588450485e-06, + "loss": 0.0011, + "step": 200240 + }, + { + "epoch": 1.2843412980665934, + "grad_norm": 0.6855619549751282, + "learning_rate": 3.4189117658088332e-06, + "loss": 0.0021, + "step": 200250 + }, + { + "epoch": 1.2844054349603795, + "grad_norm": 0.12001349776983261, + "learning_rate": 3.418380792584785e-06, + "loss": 0.0008, + "step": 200260 + }, + { + "epoch": 1.2844695718541654, + "grad_norm": 0.04269237071275711, + "learning_rate": 3.4178498391795546e-06, + "loss": 0.0025, + "step": 200270 + }, + { + "epoch": 1.2845337087479516, + "grad_norm": 0.04182310402393341, + "learning_rate": 3.4173189055997973e-06, + "loss": 0.0008, + "step": 200280 + }, + { + "epoch": 1.2845978456417377, + "grad_norm": 0.17033688724040985, + "learning_rate": 3.416787991852164e-06, + "loss": 0.0016, + "step": 200290 + }, + { + "epoch": 1.2846619825355239, + "grad_norm": 0.0695016160607338, + "learning_rate": 3.41625709794331e-06, + "loss": 0.001, + "step": 200300 + }, + { + "epoch": 1.28472611942931, + "grad_norm": 0.026342421770095825, + "learning_rate": 3.4157262238798857e-06, + "loss": 0.0023, + "step": 200310 + }, + { + "epoch": 1.284790256323096, + "grad_norm": 0.05523712933063507, + "learning_rate": 3.415195369668545e-06, + "loss": 0.0005, + "step": 200320 + }, + { + "epoch": 1.284854393216882, + "grad_norm": 0.02777286432683468, + "learning_rate": 3.4146645353159372e-06, + "loss": 0.001, + "step": 200330 + }, + { + "epoch": 1.2849185301106683, + "grad_norm": 0.0390302874147892, + "learning_rate": 3.414133720828717e-06, + "loss": 0.0014, + "step": 200340 + }, + { + "epoch": 1.2849826670044542, + "grad_norm": 0.03369353339076042, + "learning_rate": 3.4136029262135345e-06, + "loss": 0.0009, + "step": 200350 + }, + { + "epoch": 1.2850468038982403, + "grad_norm": 0.047813788056373596, + "learning_rate": 3.4130721514770416e-06, + "loss": 0.002, + "step": 200360 + }, + { + "epoch": 1.2851109407920265, + "grad_norm": 0.04628170654177666, + "learning_rate": 3.412541396625888e-06, + "loss": 0.0034, + "step": 200370 + }, + { + "epoch": 1.2851750776858126, + "grad_norm": 0.011677786707878113, + "learning_rate": 3.412010661666726e-06, + "loss": 0.0015, + "step": 200380 + }, + { + "epoch": 1.2852392145795988, + "grad_norm": 0.11463964730501175, + "learning_rate": 3.4114799466062043e-06, + "loss": 0.002, + "step": 200390 + }, + { + "epoch": 1.285303351473385, + "grad_norm": 0.05677622929215431, + "learning_rate": 3.4109492514509746e-06, + "loss": 0.0013, + "step": 200400 + }, + { + "epoch": 1.2853674883671709, + "grad_norm": 0.07445462048053741, + "learning_rate": 3.410418576207687e-06, + "loss": 0.0026, + "step": 200410 + }, + { + "epoch": 1.285431625260957, + "grad_norm": 0.03686346486210823, + "learning_rate": 3.4098879208829895e-06, + "loss": 0.0015, + "step": 200420 + }, + { + "epoch": 1.2854957621547431, + "grad_norm": 0.16804060339927673, + "learning_rate": 3.4093572854835346e-06, + "loss": 0.0017, + "step": 200430 + }, + { + "epoch": 1.285559899048529, + "grad_norm": 0.007531581912189722, + "learning_rate": 3.4088266700159677e-06, + "loss": 0.0014, + "step": 200440 + }, + { + "epoch": 1.2856240359423152, + "grad_norm": 0.15468910336494446, + "learning_rate": 3.4082960744869414e-06, + "loss": 0.0027, + "step": 200450 + }, + { + "epoch": 1.2856881728361014, + "grad_norm": 0.3094685673713684, + "learning_rate": 3.4077654989031017e-06, + "loss": 0.0024, + "step": 200460 + }, + { + "epoch": 1.2857523097298875, + "grad_norm": 0.04157442972064018, + "learning_rate": 3.4072349432710992e-06, + "loss": 0.0013, + "step": 200470 + }, + { + "epoch": 1.2858164466236737, + "grad_norm": 0.02950865402817726, + "learning_rate": 3.406704407597581e-06, + "loss": 0.0009, + "step": 200480 + }, + { + "epoch": 1.2858805835174596, + "grad_norm": 0.016590271145105362, + "learning_rate": 3.4061738918891952e-06, + "loss": 0.0022, + "step": 200490 + }, + { + "epoch": 1.2859447204112457, + "grad_norm": 0.026118848472833633, + "learning_rate": 3.405643396152589e-06, + "loss": 0.0016, + "step": 200500 + }, + { + "epoch": 1.286008857305032, + "grad_norm": 0.0074649169109761715, + "learning_rate": 3.4051129203944117e-06, + "loss": 0.0019, + "step": 200510 + }, + { + "epoch": 1.2860729941988178, + "grad_norm": 0.03584938496351242, + "learning_rate": 3.4045824646213083e-06, + "loss": 0.0018, + "step": 200520 + }, + { + "epoch": 1.286137131092604, + "grad_norm": 0.24542315304279327, + "learning_rate": 3.404052028839927e-06, + "loss": 0.0039, + "step": 200530 + }, + { + "epoch": 1.2862012679863901, + "grad_norm": 0.12411786615848541, + "learning_rate": 3.4035216130569147e-06, + "loss": 0.0005, + "step": 200540 + }, + { + "epoch": 1.2862654048801763, + "grad_norm": 0.03921869397163391, + "learning_rate": 3.402991217278917e-06, + "loss": 0.001, + "step": 200550 + }, + { + "epoch": 1.2863295417739624, + "grad_norm": 0.1604154407978058, + "learning_rate": 3.402460841512582e-06, + "loss": 0.0021, + "step": 200560 + }, + { + "epoch": 1.2863936786677486, + "grad_norm": 0.048219580203294754, + "learning_rate": 3.401930485764553e-06, + "loss": 0.0008, + "step": 200570 + }, + { + "epoch": 1.2864578155615345, + "grad_norm": 0.09091049432754517, + "learning_rate": 3.4014001500414784e-06, + "loss": 0.001, + "step": 200580 + }, + { + "epoch": 1.2865219524553206, + "grad_norm": 0.05343707278370857, + "learning_rate": 3.400869834350001e-06, + "loss": 0.0017, + "step": 200590 + }, + { + "epoch": 1.2865860893491068, + "grad_norm": 0.013386723585426807, + "learning_rate": 3.4003395386967692e-06, + "loss": 0.0023, + "step": 200600 + }, + { + "epoch": 1.2866502262428927, + "grad_norm": 0.09424804896116257, + "learning_rate": 3.399809263088425e-06, + "loss": 0.0026, + "step": 200610 + }, + { + "epoch": 1.2867143631366789, + "grad_norm": 0.0879783183336258, + "learning_rate": 3.3992790075316152e-06, + "loss": 0.0032, + "step": 200620 + }, + { + "epoch": 1.286778500030465, + "grad_norm": 0.12786969542503357, + "learning_rate": 3.398748772032982e-06, + "loss": 0.0017, + "step": 200630 + }, + { + "epoch": 1.2868426369242512, + "grad_norm": 0.11305782198905945, + "learning_rate": 3.398218556599173e-06, + "loss": 0.0022, + "step": 200640 + }, + { + "epoch": 1.2869067738180373, + "grad_norm": 0.3130101263523102, + "learning_rate": 3.3976883612368284e-06, + "loss": 0.0023, + "step": 200650 + }, + { + "epoch": 1.2869709107118232, + "grad_norm": 0.09633992612361908, + "learning_rate": 3.397158185952595e-06, + "loss": 0.002, + "step": 200660 + }, + { + "epoch": 1.2870350476056094, + "grad_norm": 0.05635792389512062, + "learning_rate": 3.396628030753114e-06, + "loss": 0.0011, + "step": 200670 + }, + { + "epoch": 1.2870991844993955, + "grad_norm": 0.03625056520104408, + "learning_rate": 3.39609789564503e-06, + "loss": 0.0008, + "step": 200680 + }, + { + "epoch": 1.2871633213931817, + "grad_norm": 0.06167403236031532, + "learning_rate": 3.395567780634987e-06, + "loss": 0.0015, + "step": 200690 + }, + { + "epoch": 1.2872274582869676, + "grad_norm": 0.036683231592178345, + "learning_rate": 3.395037685729624e-06, + "loss": 0.0011, + "step": 200700 + }, + { + "epoch": 1.2872915951807538, + "grad_norm": 0.09551402181386948, + "learning_rate": 3.3945076109355866e-06, + "loss": 0.0013, + "step": 200710 + }, + { + "epoch": 1.28735573207454, + "grad_norm": 0.2631330192089081, + "learning_rate": 3.3939775562595173e-06, + "loss": 0.0028, + "step": 200720 + }, + { + "epoch": 1.287419868968326, + "grad_norm": 0.014659715816378593, + "learning_rate": 3.3934475217080563e-06, + "loss": 0.0024, + "step": 200730 + }, + { + "epoch": 1.2874840058621122, + "grad_norm": 0.08092175424098969, + "learning_rate": 3.3929175072878475e-06, + "loss": 0.0029, + "step": 200740 + }, + { + "epoch": 1.2875481427558981, + "grad_norm": 0.17531432211399078, + "learning_rate": 3.3923875130055284e-06, + "loss": 0.0032, + "step": 200750 + }, + { + "epoch": 1.2876122796496843, + "grad_norm": 0.06754172593355179, + "learning_rate": 3.391857538867745e-06, + "loss": 0.0015, + "step": 200760 + }, + { + "epoch": 1.2876764165434704, + "grad_norm": 0.04346820339560509, + "learning_rate": 3.3913275848811346e-06, + "loss": 0.0015, + "step": 200770 + }, + { + "epoch": 1.2877405534372564, + "grad_norm": 0.048482779413461685, + "learning_rate": 3.39079765105234e-06, + "loss": 0.0005, + "step": 200780 + }, + { + "epoch": 1.2878046903310425, + "grad_norm": 0.3787684142589569, + "learning_rate": 3.3902677373880005e-06, + "loss": 0.0012, + "step": 200790 + }, + { + "epoch": 1.2878688272248286, + "grad_norm": 0.06011801213026047, + "learning_rate": 3.3897378438947557e-06, + "loss": 0.0005, + "step": 200800 + }, + { + "epoch": 1.2879329641186148, + "grad_norm": 0.04073334485292435, + "learning_rate": 3.389207970579249e-06, + "loss": 0.0016, + "step": 200810 + }, + { + "epoch": 1.287997101012401, + "grad_norm": 0.052258770912885666, + "learning_rate": 3.3886781174481156e-06, + "loss": 0.001, + "step": 200820 + }, + { + "epoch": 1.288061237906187, + "grad_norm": 0.06269658356904984, + "learning_rate": 3.3881482845079983e-06, + "loss": 0.0022, + "step": 200830 + }, + { + "epoch": 1.288125374799973, + "grad_norm": 0.12791575491428375, + "learning_rate": 3.3876184717655337e-06, + "loss": 0.002, + "step": 200840 + }, + { + "epoch": 1.2881895116937592, + "grad_norm": 0.11520896852016449, + "learning_rate": 3.387088679227364e-06, + "loss": 0.0034, + "step": 200850 + }, + { + "epoch": 1.2882536485875453, + "grad_norm": 0.059444136917591095, + "learning_rate": 3.386558906900124e-06, + "loss": 0.001, + "step": 200860 + }, + { + "epoch": 1.2883177854813312, + "grad_norm": 0.05978221446275711, + "learning_rate": 3.3860291547904545e-06, + "loss": 0.001, + "step": 200870 + }, + { + "epoch": 1.2883819223751174, + "grad_norm": 0.06346441805362701, + "learning_rate": 3.385499422904993e-06, + "loss": 0.0014, + "step": 200880 + }, + { + "epoch": 1.2884460592689035, + "grad_norm": 0.08050800859928131, + "learning_rate": 3.384969711250379e-06, + "loss": 0.0012, + "step": 200890 + }, + { + "epoch": 1.2885101961626897, + "grad_norm": 0.05469054728746414, + "learning_rate": 3.384440019833247e-06, + "loss": 0.0017, + "step": 200900 + }, + { + "epoch": 1.2885743330564758, + "grad_norm": 0.262292742729187, + "learning_rate": 3.3839103486602365e-06, + "loss": 0.0047, + "step": 200910 + }, + { + "epoch": 1.2886384699502618, + "grad_norm": 0.16505391895771027, + "learning_rate": 3.3833806977379845e-06, + "loss": 0.0008, + "step": 200920 + }, + { + "epoch": 1.288702606844048, + "grad_norm": 0.08264858275651932, + "learning_rate": 3.3828510670731285e-06, + "loss": 0.0016, + "step": 200930 + }, + { + "epoch": 1.288766743737834, + "grad_norm": 0.05656171962618828, + "learning_rate": 3.382321456672303e-06, + "loss": 0.0011, + "step": 200940 + }, + { + "epoch": 1.2888308806316202, + "grad_norm": 0.07540126889944077, + "learning_rate": 3.3817918665421463e-06, + "loss": 0.0008, + "step": 200950 + }, + { + "epoch": 1.2888950175254061, + "grad_norm": 0.08787442743778229, + "learning_rate": 3.381262296689295e-06, + "loss": 0.001, + "step": 200960 + }, + { + "epoch": 1.2889591544191923, + "grad_norm": 0.06413739174604416, + "learning_rate": 3.380732747120383e-06, + "loss": 0.0014, + "step": 200970 + }, + { + "epoch": 1.2890232913129784, + "grad_norm": 0.04403886944055557, + "learning_rate": 3.3802032178420473e-06, + "loss": 0.0008, + "step": 200980 + }, + { + "epoch": 1.2890874282067646, + "grad_norm": 0.09438447654247284, + "learning_rate": 3.379673708860922e-06, + "loss": 0.0014, + "step": 200990 + }, + { + "epoch": 1.2891515651005507, + "grad_norm": 0.017013145610690117, + "learning_rate": 3.379144220183645e-06, + "loss": 0.0009, + "step": 201000 + }, + { + "epoch": 1.2892157019943367, + "grad_norm": 0.013759625144302845, + "learning_rate": 3.3786147518168476e-06, + "loss": 0.0014, + "step": 201010 + }, + { + "epoch": 1.2892798388881228, + "grad_norm": 0.11689459532499313, + "learning_rate": 3.378085303767168e-06, + "loss": 0.0014, + "step": 201020 + }, + { + "epoch": 1.289343975781909, + "grad_norm": 0.08445636928081512, + "learning_rate": 3.377555876041237e-06, + "loss": 0.0017, + "step": 201030 + }, + { + "epoch": 1.2894081126756949, + "grad_norm": 0.02180854044854641, + "learning_rate": 3.377026468645691e-06, + "loss": 0.001, + "step": 201040 + }, + { + "epoch": 1.289472249569481, + "grad_norm": 0.04893924295902252, + "learning_rate": 3.3764970815871633e-06, + "loss": 0.001, + "step": 201050 + }, + { + "epoch": 1.2895363864632672, + "grad_norm": 0.06638488173484802, + "learning_rate": 3.375967714872288e-06, + "loss": 0.0011, + "step": 201060 + }, + { + "epoch": 1.2896005233570533, + "grad_norm": 0.050552599132061005, + "learning_rate": 3.375438368507697e-06, + "loss": 0.0018, + "step": 201070 + }, + { + "epoch": 1.2896646602508395, + "grad_norm": 0.43968114256858826, + "learning_rate": 3.374909042500025e-06, + "loss": 0.0029, + "step": 201080 + }, + { + "epoch": 1.2897287971446256, + "grad_norm": 0.07450058311223984, + "learning_rate": 3.3743797368559035e-06, + "loss": 0.0015, + "step": 201090 + }, + { + "epoch": 1.2897929340384116, + "grad_norm": 0.03740369901061058, + "learning_rate": 3.3738504515819654e-06, + "loss": 0.0013, + "step": 201100 + }, + { + "epoch": 1.2898570709321977, + "grad_norm": 0.06631144881248474, + "learning_rate": 3.3733211866848447e-06, + "loss": 0.001, + "step": 201110 + }, + { + "epoch": 1.2899212078259839, + "grad_norm": 0.10576901584863663, + "learning_rate": 3.372791942171171e-06, + "loss": 0.001, + "step": 201120 + }, + { + "epoch": 1.2899853447197698, + "grad_norm": 0.053370777517557144, + "learning_rate": 3.3722627180475774e-06, + "loss": 0.0021, + "step": 201130 + }, + { + "epoch": 1.290049481613556, + "grad_norm": 0.15974250435829163, + "learning_rate": 3.3717335143206952e-06, + "loss": 0.0035, + "step": 201140 + }, + { + "epoch": 1.290113618507342, + "grad_norm": 0.03818970546126366, + "learning_rate": 3.3712043309971567e-06, + "loss": 0.0014, + "step": 201150 + }, + { + "epoch": 1.2901777554011282, + "grad_norm": 0.12067518383264542, + "learning_rate": 3.370675168083591e-06, + "loss": 0.0012, + "step": 201160 + }, + { + "epoch": 1.2902418922949144, + "grad_norm": 0.053740669041872025, + "learning_rate": 3.37014602558663e-06, + "loss": 0.0018, + "step": 201170 + }, + { + "epoch": 1.2903060291887003, + "grad_norm": 0.10573643445968628, + "learning_rate": 3.3696169035129046e-06, + "loss": 0.0021, + "step": 201180 + }, + { + "epoch": 1.2903701660824864, + "grad_norm": 0.09610594063997269, + "learning_rate": 3.369087801869045e-06, + "loss": 0.0009, + "step": 201190 + }, + { + "epoch": 1.2904343029762726, + "grad_norm": 0.0808788537979126, + "learning_rate": 3.3685587206616793e-06, + "loss": 0.0017, + "step": 201200 + }, + { + "epoch": 1.2904984398700585, + "grad_norm": 0.03952576592564583, + "learning_rate": 3.3680296598974406e-06, + "loss": 0.0009, + "step": 201210 + }, + { + "epoch": 1.2905625767638447, + "grad_norm": 0.24757136404514313, + "learning_rate": 3.3675006195829554e-06, + "loss": 0.0027, + "step": 201220 + }, + { + "epoch": 1.2906267136576308, + "grad_norm": 0.15349692106246948, + "learning_rate": 3.366971599724855e-06, + "loss": 0.0023, + "step": 201230 + }, + { + "epoch": 1.290690850551417, + "grad_norm": 0.04370679333806038, + "learning_rate": 3.366442600329766e-06, + "loss": 0.0014, + "step": 201240 + }, + { + "epoch": 1.2907549874452031, + "grad_norm": 0.03327915817499161, + "learning_rate": 3.3659136214043197e-06, + "loss": 0.0013, + "step": 201250 + }, + { + "epoch": 1.2908191243389893, + "grad_norm": 0.11615893989801407, + "learning_rate": 3.365384662955144e-06, + "loss": 0.0015, + "step": 201260 + }, + { + "epoch": 1.2908832612327752, + "grad_norm": 0.04019397497177124, + "learning_rate": 3.3648557249888657e-06, + "loss": 0.0007, + "step": 201270 + }, + { + "epoch": 1.2909473981265613, + "grad_norm": 0.08190418779850006, + "learning_rate": 3.364326807512115e-06, + "loss": 0.0025, + "step": 201280 + }, + { + "epoch": 1.2910115350203475, + "grad_norm": 0.09775541722774506, + "learning_rate": 3.3637979105315175e-06, + "loss": 0.0009, + "step": 201290 + }, + { + "epoch": 1.2910756719141334, + "grad_norm": 0.045272570103406906, + "learning_rate": 3.363269034053702e-06, + "loss": 0.0012, + "step": 201300 + }, + { + "epoch": 1.2911398088079196, + "grad_norm": 0.12820862233638763, + "learning_rate": 3.3627401780852952e-06, + "loss": 0.0024, + "step": 201310 + }, + { + "epoch": 1.2912039457017057, + "grad_norm": 0.16251589357852936, + "learning_rate": 3.362211342632925e-06, + "loss": 0.0018, + "step": 201320 + }, + { + "epoch": 1.2912680825954919, + "grad_norm": 0.041628845036029816, + "learning_rate": 3.3616825277032154e-06, + "loss": 0.0008, + "step": 201330 + }, + { + "epoch": 1.291332219489278, + "grad_norm": 0.05819320306181908, + "learning_rate": 3.361153733302796e-06, + "loss": 0.0023, + "step": 201340 + }, + { + "epoch": 1.291396356383064, + "grad_norm": 0.051456280052661896, + "learning_rate": 3.3606249594382905e-06, + "loss": 0.0022, + "step": 201350 + }, + { + "epoch": 1.29146049327685, + "grad_norm": 0.11780291050672531, + "learning_rate": 3.360096206116327e-06, + "loss": 0.0022, + "step": 201360 + }, + { + "epoch": 1.2915246301706362, + "grad_norm": 0.0045365807600319386, + "learning_rate": 3.359567473343529e-06, + "loss": 0.0007, + "step": 201370 + }, + { + "epoch": 1.2915887670644224, + "grad_norm": 0.04459698870778084, + "learning_rate": 3.359038761126523e-06, + "loss": 0.0018, + "step": 201380 + }, + { + "epoch": 1.2916529039582083, + "grad_norm": 0.4114333689212799, + "learning_rate": 3.358510069471935e-06, + "loss": 0.0018, + "step": 201390 + }, + { + "epoch": 1.2917170408519945, + "grad_norm": 0.17456431686878204, + "learning_rate": 3.3579813983863884e-06, + "loss": 0.0015, + "step": 201400 + }, + { + "epoch": 1.2917811777457806, + "grad_norm": 0.2068919539451599, + "learning_rate": 3.357452747876509e-06, + "loss": 0.002, + "step": 201410 + }, + { + "epoch": 1.2918453146395668, + "grad_norm": 0.0049244496040046215, + "learning_rate": 3.3569241179489197e-06, + "loss": 0.0018, + "step": 201420 + }, + { + "epoch": 1.291909451533353, + "grad_norm": 0.03238717094063759, + "learning_rate": 3.356395508610246e-06, + "loss": 0.001, + "step": 201430 + }, + { + "epoch": 1.2919735884271388, + "grad_norm": 0.15247681736946106, + "learning_rate": 3.3558669198671113e-06, + "loss": 0.0014, + "step": 201440 + }, + { + "epoch": 1.292037725320925, + "grad_norm": 0.06962453573942184, + "learning_rate": 3.3553383517261395e-06, + "loss": 0.0021, + "step": 201450 + }, + { + "epoch": 1.2921018622147111, + "grad_norm": 0.047201476991176605, + "learning_rate": 3.3548098041939515e-06, + "loss": 0.0012, + "step": 201460 + }, + { + "epoch": 1.292165999108497, + "grad_norm": 0.11179137974977493, + "learning_rate": 3.3542812772771737e-06, + "loss": 0.0016, + "step": 201470 + }, + { + "epoch": 1.2922301360022832, + "grad_norm": 0.27629971504211426, + "learning_rate": 3.3537527709824286e-06, + "loss": 0.0015, + "step": 201480 + }, + { + "epoch": 1.2922942728960694, + "grad_norm": 0.05948624014854431, + "learning_rate": 3.3532242853163366e-06, + "loss": 0.0023, + "step": 201490 + }, + { + "epoch": 1.2923584097898555, + "grad_norm": 0.0237751342356205, + "learning_rate": 3.3526958202855227e-06, + "loss": 0.0014, + "step": 201500 + }, + { + "epoch": 1.2924225466836416, + "grad_norm": 0.02121659927070141, + "learning_rate": 3.352167375896606e-06, + "loss": 0.0013, + "step": 201510 + }, + { + "epoch": 1.2924866835774278, + "grad_norm": 0.08977126330137253, + "learning_rate": 3.35163895215621e-06, + "loss": 0.0013, + "step": 201520 + }, + { + "epoch": 1.2925508204712137, + "grad_norm": 0.1702510118484497, + "learning_rate": 3.3511105490709572e-06, + "loss": 0.0008, + "step": 201530 + }, + { + "epoch": 1.2926149573649999, + "grad_norm": 0.21891912817955017, + "learning_rate": 3.3505821666474657e-06, + "loss": 0.0022, + "step": 201540 + }, + { + "epoch": 1.292679094258786, + "grad_norm": 0.06410212069749832, + "learning_rate": 3.3500538048923603e-06, + "loss": 0.0012, + "step": 201550 + }, + { + "epoch": 1.292743231152572, + "grad_norm": 0.070395328104496, + "learning_rate": 3.3495254638122593e-06, + "loss": 0.0018, + "step": 201560 + }, + { + "epoch": 1.292807368046358, + "grad_norm": 0.07691259682178497, + "learning_rate": 3.348997143413785e-06, + "loss": 0.0026, + "step": 201570 + }, + { + "epoch": 1.2928715049401442, + "grad_norm": 0.10150125622749329, + "learning_rate": 3.348468843703554e-06, + "loss": 0.0021, + "step": 201580 + }, + { + "epoch": 1.2929356418339304, + "grad_norm": 0.03191756084561348, + "learning_rate": 3.347940564688191e-06, + "loss": 0.0013, + "step": 201590 + }, + { + "epoch": 1.2929997787277165, + "grad_norm": 0.13160976767539978, + "learning_rate": 3.3474123063743114e-06, + "loss": 0.0018, + "step": 201600 + }, + { + "epoch": 1.2930639156215025, + "grad_norm": 0.07916834205389023, + "learning_rate": 3.3468840687685383e-06, + "loss": 0.0014, + "step": 201610 + }, + { + "epoch": 1.2931280525152886, + "grad_norm": 0.010582217015326023, + "learning_rate": 3.3463558518774888e-06, + "loss": 0.0017, + "step": 201620 + }, + { + "epoch": 1.2931921894090748, + "grad_norm": 0.0967269316315651, + "learning_rate": 3.3458276557077823e-06, + "loss": 0.0011, + "step": 201630 + }, + { + "epoch": 1.2932563263028607, + "grad_norm": 0.07778430730104446, + "learning_rate": 3.3452994802660367e-06, + "loss": 0.0009, + "step": 201640 + }, + { + "epoch": 1.2933204631966468, + "grad_norm": 0.15006621181964874, + "learning_rate": 3.3447713255588717e-06, + "loss": 0.0021, + "step": 201650 + }, + { + "epoch": 1.293384600090433, + "grad_norm": 0.02836078777909279, + "learning_rate": 3.344243191592905e-06, + "loss": 0.0017, + "step": 201660 + }, + { + "epoch": 1.2934487369842191, + "grad_norm": 0.23487643897533417, + "learning_rate": 3.343715078374755e-06, + "loss": 0.0018, + "step": 201670 + }, + { + "epoch": 1.2935128738780053, + "grad_norm": 0.01940734125673771, + "learning_rate": 3.3431869859110387e-06, + "loss": 0.0017, + "step": 201680 + }, + { + "epoch": 1.2935770107717914, + "grad_norm": 0.061511676758527756, + "learning_rate": 3.342658914208373e-06, + "loss": 0.0017, + "step": 201690 + }, + { + "epoch": 1.2936411476655774, + "grad_norm": 0.049369022250175476, + "learning_rate": 3.342130863273376e-06, + "loss": 0.0018, + "step": 201700 + }, + { + "epoch": 1.2937052845593635, + "grad_norm": 0.05317164212465286, + "learning_rate": 3.3416028331126638e-06, + "loss": 0.0011, + "step": 201710 + }, + { + "epoch": 1.2937694214531497, + "grad_norm": 0.025014236569404602, + "learning_rate": 3.3410748237328537e-06, + "loss": 0.0011, + "step": 201720 + }, + { + "epoch": 1.2938335583469356, + "grad_norm": 0.025604791939258575, + "learning_rate": 3.340546835140561e-06, + "loss": 0.0024, + "step": 201730 + }, + { + "epoch": 1.2938976952407217, + "grad_norm": 0.03990999609231949, + "learning_rate": 3.340018867342404e-06, + "loss": 0.0015, + "step": 201740 + }, + { + "epoch": 1.2939618321345079, + "grad_norm": 0.10523956269025803, + "learning_rate": 3.3394909203449953e-06, + "loss": 0.0012, + "step": 201750 + }, + { + "epoch": 1.294025969028294, + "grad_norm": 0.06308460980653763, + "learning_rate": 3.338962994154954e-06, + "loss": 0.0013, + "step": 201760 + }, + { + "epoch": 1.2940901059220802, + "grad_norm": 0.06793212890625, + "learning_rate": 3.338435088778891e-06, + "loss": 0.0011, + "step": 201770 + }, + { + "epoch": 1.294154242815866, + "grad_norm": 0.03244560584425926, + "learning_rate": 3.3379072042234263e-06, + "loss": 0.0015, + "step": 201780 + }, + { + "epoch": 1.2942183797096523, + "grad_norm": 0.12184517830610275, + "learning_rate": 3.3373793404951704e-06, + "loss": 0.0015, + "step": 201790 + }, + { + "epoch": 1.2942825166034384, + "grad_norm": 0.03447624668478966, + "learning_rate": 3.3368514976007394e-06, + "loss": 0.0012, + "step": 201800 + }, + { + "epoch": 1.2943466534972246, + "grad_norm": 0.07783801108598709, + "learning_rate": 3.3363236755467498e-06, + "loss": 0.0019, + "step": 201810 + }, + { + "epoch": 1.2944107903910105, + "grad_norm": 0.06861250102519989, + "learning_rate": 3.335795874339812e-06, + "loss": 0.0014, + "step": 201820 + }, + { + "epoch": 1.2944749272847966, + "grad_norm": 0.03938888758420944, + "learning_rate": 3.3352680939865417e-06, + "loss": 0.0024, + "step": 201830 + }, + { + "epoch": 1.2945390641785828, + "grad_norm": 0.09895730018615723, + "learning_rate": 3.3347403344935513e-06, + "loss": 0.0012, + "step": 201840 + }, + { + "epoch": 1.294603201072369, + "grad_norm": 0.19778893887996674, + "learning_rate": 3.334212595867456e-06, + "loss": 0.002, + "step": 201850 + }, + { + "epoch": 1.294667337966155, + "grad_norm": 0.059761881828308105, + "learning_rate": 3.333684878114866e-06, + "loss": 0.0009, + "step": 201860 + }, + { + "epoch": 1.294731474859941, + "grad_norm": 0.2661759555339813, + "learning_rate": 3.333157181242397e-06, + "loss": 0.0011, + "step": 201870 + }, + { + "epoch": 1.2947956117537271, + "grad_norm": 0.059345848858356476, + "learning_rate": 3.332629505256658e-06, + "loss": 0.0008, + "step": 201880 + }, + { + "epoch": 1.2948597486475133, + "grad_norm": 0.04656009376049042, + "learning_rate": 3.3321018501642642e-06, + "loss": 0.0013, + "step": 201890 + }, + { + "epoch": 1.2949238855412992, + "grad_norm": 0.1327023208141327, + "learning_rate": 3.3315742159718256e-06, + "loss": 0.0018, + "step": 201900 + }, + { + "epoch": 1.2949880224350854, + "grad_norm": 0.05535271763801575, + "learning_rate": 3.3310466026859555e-06, + "loss": 0.001, + "step": 201910 + }, + { + "epoch": 1.2950521593288715, + "grad_norm": 0.20933690667152405, + "learning_rate": 3.3305190103132627e-06, + "loss": 0.0011, + "step": 201920 + }, + { + "epoch": 1.2951162962226577, + "grad_norm": 0.6115767359733582, + "learning_rate": 3.3299914388603614e-06, + "loss": 0.0018, + "step": 201930 + }, + { + "epoch": 1.2951804331164438, + "grad_norm": 0.09265130013227463, + "learning_rate": 3.3294638883338592e-06, + "loss": 0.0005, + "step": 201940 + }, + { + "epoch": 1.29524457001023, + "grad_norm": 0.044155530631542206, + "learning_rate": 3.328936358740369e-06, + "loss": 0.0018, + "step": 201950 + }, + { + "epoch": 1.295308706904016, + "grad_norm": 0.0016786607448011637, + "learning_rate": 3.328408850086502e-06, + "loss": 0.0014, + "step": 201960 + }, + { + "epoch": 1.295372843797802, + "grad_norm": 0.07835692912340164, + "learning_rate": 3.3278813623788646e-06, + "loss": 0.0025, + "step": 201970 + }, + { + "epoch": 1.2954369806915882, + "grad_norm": 0.06830964237451553, + "learning_rate": 3.3273538956240704e-06, + "loss": 0.0007, + "step": 201980 + }, + { + "epoch": 1.2955011175853741, + "grad_norm": 0.0658147931098938, + "learning_rate": 3.326826449828726e-06, + "loss": 0.002, + "step": 201990 + }, + { + "epoch": 1.2955652544791603, + "grad_norm": 0.13605722784996033, + "learning_rate": 3.326299024999443e-06, + "loss": 0.0017, + "step": 202000 + }, + { + "epoch": 1.2956293913729464, + "grad_norm": 0.045622818171978, + "learning_rate": 3.325771621142829e-06, + "loss": 0.0013, + "step": 202010 + }, + { + "epoch": 1.2956935282667326, + "grad_norm": 0.07887290418148041, + "learning_rate": 3.3252442382654937e-06, + "loss": 0.0015, + "step": 202020 + }, + { + "epoch": 1.2957576651605187, + "grad_norm": 0.006410001777112484, + "learning_rate": 3.3247168763740446e-06, + "loss": 0.0016, + "step": 202030 + }, + { + "epoch": 1.2958218020543046, + "grad_norm": 0.05459125339984894, + "learning_rate": 3.3241895354750903e-06, + "loss": 0.0009, + "step": 202040 + }, + { + "epoch": 1.2958859389480908, + "grad_norm": 0.02828606776893139, + "learning_rate": 3.3236622155752386e-06, + "loss": 0.0005, + "step": 202050 + }, + { + "epoch": 1.295950075841877, + "grad_norm": 0.017393292859196663, + "learning_rate": 3.323134916681098e-06, + "loss": 0.0021, + "step": 202060 + }, + { + "epoch": 1.2960142127356629, + "grad_norm": 0.047871265560388565, + "learning_rate": 3.3226076387992744e-06, + "loss": 0.0012, + "step": 202070 + }, + { + "epoch": 1.296078349629449, + "grad_norm": 0.35553261637687683, + "learning_rate": 3.3220803819363765e-06, + "loss": 0.0012, + "step": 202080 + }, + { + "epoch": 1.2961424865232352, + "grad_norm": 0.13506869971752167, + "learning_rate": 3.3215531460990103e-06, + "loss": 0.0011, + "step": 202090 + }, + { + "epoch": 1.2962066234170213, + "grad_norm": 0.09168893843889236, + "learning_rate": 3.3210259312937826e-06, + "loss": 0.0029, + "step": 202100 + }, + { + "epoch": 1.2962707603108075, + "grad_norm": 0.09741288423538208, + "learning_rate": 3.320498737527301e-06, + "loss": 0.0014, + "step": 202110 + }, + { + "epoch": 1.2963348972045936, + "grad_norm": 0.0015789240133017302, + "learning_rate": 3.3199715648061693e-06, + "loss": 0.0009, + "step": 202120 + }, + { + "epoch": 1.2963990340983795, + "grad_norm": 0.08950482308864594, + "learning_rate": 3.3194444131369957e-06, + "loss": 0.0005, + "step": 202130 + }, + { + "epoch": 1.2964631709921657, + "grad_norm": 0.0017960354452952743, + "learning_rate": 3.318917282526384e-06, + "loss": 0.0028, + "step": 202140 + }, + { + "epoch": 1.2965273078859518, + "grad_norm": 0.06722722947597504, + "learning_rate": 3.31839017298094e-06, + "loss": 0.0015, + "step": 202150 + }, + { + "epoch": 1.2965914447797378, + "grad_norm": 0.3773379623889923, + "learning_rate": 3.317863084507269e-06, + "loss": 0.0023, + "step": 202160 + }, + { + "epoch": 1.296655581673524, + "grad_norm": 0.09458949416875839, + "learning_rate": 3.3173360171119766e-06, + "loss": 0.0017, + "step": 202170 + }, + { + "epoch": 1.29671971856731, + "grad_norm": 0.02694663032889366, + "learning_rate": 3.316808970801666e-06, + "loss": 0.0018, + "step": 202180 + }, + { + "epoch": 1.2967838554610962, + "grad_norm": 0.07020226866006851, + "learning_rate": 3.316281945582942e-06, + "loss": 0.0019, + "step": 202190 + }, + { + "epoch": 1.2968479923548824, + "grad_norm": 0.20809036493301392, + "learning_rate": 3.315754941462408e-06, + "loss": 0.0018, + "step": 202200 + }, + { + "epoch": 1.2969121292486683, + "grad_norm": 0.07734943926334381, + "learning_rate": 3.3152279584466697e-06, + "loss": 0.002, + "step": 202210 + }, + { + "epoch": 1.2969762661424544, + "grad_norm": 0.08855696767568588, + "learning_rate": 3.3147009965423273e-06, + "loss": 0.0023, + "step": 202220 + }, + { + "epoch": 1.2970404030362406, + "grad_norm": 0.0852886363863945, + "learning_rate": 3.3141740557559854e-06, + "loss": 0.0009, + "step": 202230 + }, + { + "epoch": 1.2971045399300267, + "grad_norm": 0.06497237831354141, + "learning_rate": 3.313647136094248e-06, + "loss": 0.0024, + "step": 202240 + }, + { + "epoch": 1.2971686768238126, + "grad_norm": 0.1269858032464981, + "learning_rate": 3.3131202375637183e-06, + "loss": 0.0011, + "step": 202250 + }, + { + "epoch": 1.2972328137175988, + "grad_norm": 0.03508013114333153, + "learning_rate": 3.312593360170997e-06, + "loss": 0.0019, + "step": 202260 + }, + { + "epoch": 1.297296950611385, + "grad_norm": 0.07374950498342514, + "learning_rate": 3.312066503922688e-06, + "loss": 0.0017, + "step": 202270 + }, + { + "epoch": 1.297361087505171, + "grad_norm": 0.01798911578953266, + "learning_rate": 3.3115396688253907e-06, + "loss": 0.0023, + "step": 202280 + }, + { + "epoch": 1.2974252243989572, + "grad_norm": 0.17165930569171906, + "learning_rate": 3.311012854885709e-06, + "loss": 0.0015, + "step": 202290 + }, + { + "epoch": 1.2974893612927432, + "grad_norm": 0.32450318336486816, + "learning_rate": 3.3104860621102426e-06, + "loss": 0.0015, + "step": 202300 + }, + { + "epoch": 1.2975534981865293, + "grad_norm": 0.03590445592999458, + "learning_rate": 3.3099592905055944e-06, + "loss": 0.0017, + "step": 202310 + }, + { + "epoch": 1.2976176350803155, + "grad_norm": 0.11629247665405273, + "learning_rate": 3.3094325400783627e-06, + "loss": 0.0019, + "step": 202320 + }, + { + "epoch": 1.2976817719741014, + "grad_norm": 0.0783039852976799, + "learning_rate": 3.3089058108351507e-06, + "loss": 0.0017, + "step": 202330 + }, + { + "epoch": 1.2977459088678875, + "grad_norm": 0.1197119727730751, + "learning_rate": 3.308379102782556e-06, + "loss": 0.0012, + "step": 202340 + }, + { + "epoch": 1.2978100457616737, + "grad_norm": 0.0840274766087532, + "learning_rate": 3.3078524159271817e-06, + "loss": 0.0014, + "step": 202350 + }, + { + "epoch": 1.2978741826554598, + "grad_norm": 0.06660163402557373, + "learning_rate": 3.307325750275625e-06, + "loss": 0.0027, + "step": 202360 + }, + { + "epoch": 1.297938319549246, + "grad_norm": 0.10205428302288055, + "learning_rate": 3.3067991058344866e-06, + "loss": 0.001, + "step": 202370 + }, + { + "epoch": 1.2980024564430321, + "grad_norm": 0.2115112692117691, + "learning_rate": 3.3062724826103664e-06, + "loss": 0.0028, + "step": 202380 + }, + { + "epoch": 1.298066593336818, + "grad_norm": 0.44339513778686523, + "learning_rate": 3.3057458806098616e-06, + "loss": 0.0019, + "step": 202390 + }, + { + "epoch": 1.2981307302306042, + "grad_norm": 0.007985997945070267, + "learning_rate": 3.3052192998395722e-06, + "loss": 0.0015, + "step": 202400 + }, + { + "epoch": 1.2981948671243904, + "grad_norm": 0.020172620192170143, + "learning_rate": 3.304692740306096e-06, + "loss": 0.0016, + "step": 202410 + }, + { + "epoch": 1.2982590040181763, + "grad_norm": 0.015433751977980137, + "learning_rate": 3.3041662020160314e-06, + "loss": 0.0011, + "step": 202420 + }, + { + "epoch": 1.2983231409119624, + "grad_norm": 0.05586683005094528, + "learning_rate": 3.3036396849759755e-06, + "loss": 0.0019, + "step": 202430 + }, + { + "epoch": 1.2983872778057486, + "grad_norm": 0.04309661686420441, + "learning_rate": 3.303113189192528e-06, + "loss": 0.0018, + "step": 202440 + }, + { + "epoch": 1.2984514146995347, + "grad_norm": 0.09828870743513107, + "learning_rate": 3.3025867146722844e-06, + "loss": 0.0009, + "step": 202450 + }, + { + "epoch": 1.2985155515933209, + "grad_norm": 0.04972812533378601, + "learning_rate": 3.3020602614218434e-06, + "loss": 0.001, + "step": 202460 + }, + { + "epoch": 1.2985796884871068, + "grad_norm": 0.17699630558490753, + "learning_rate": 3.301533829447799e-06, + "loss": 0.0019, + "step": 202470 + }, + { + "epoch": 1.298643825380893, + "grad_norm": 0.03271789848804474, + "learning_rate": 3.301007418756751e-06, + "loss": 0.002, + "step": 202480 + }, + { + "epoch": 1.298707962274679, + "grad_norm": 0.02394431084394455, + "learning_rate": 3.3004810293552936e-06, + "loss": 0.0015, + "step": 202490 + }, + { + "epoch": 1.2987720991684653, + "grad_norm": 0.13419033586978912, + "learning_rate": 3.299954661250023e-06, + "loss": 0.0022, + "step": 202500 + }, + { + "epoch": 1.2988362360622512, + "grad_norm": 0.11430850625038147, + "learning_rate": 3.2994283144475368e-06, + "loss": 0.0014, + "step": 202510 + }, + { + "epoch": 1.2989003729560373, + "grad_norm": 0.007823416963219643, + "learning_rate": 3.298901988954428e-06, + "loss": 0.001, + "step": 202520 + }, + { + "epoch": 1.2989645098498235, + "grad_norm": 0.10548365116119385, + "learning_rate": 3.298375684777294e-06, + "loss": 0.002, + "step": 202530 + }, + { + "epoch": 1.2990286467436096, + "grad_norm": 0.031271420419216156, + "learning_rate": 3.297849401922728e-06, + "loss": 0.0009, + "step": 202540 + }, + { + "epoch": 1.2990927836373958, + "grad_norm": 0.04257241263985634, + "learning_rate": 3.2973231403973265e-06, + "loss": 0.0017, + "step": 202550 + }, + { + "epoch": 1.2991569205311817, + "grad_norm": 0.12754876911640167, + "learning_rate": 3.2967969002076816e-06, + "loss": 0.0011, + "step": 202560 + }, + { + "epoch": 1.2992210574249679, + "grad_norm": 0.111076220870018, + "learning_rate": 3.2962706813603904e-06, + "loss": 0.0022, + "step": 202570 + }, + { + "epoch": 1.299285194318754, + "grad_norm": 0.05372251942753792, + "learning_rate": 3.295744483862044e-06, + "loss": 0.0011, + "step": 202580 + }, + { + "epoch": 1.29934933121254, + "grad_norm": 0.1858329176902771, + "learning_rate": 3.295218307719238e-06, + "loss": 0.0033, + "step": 202590 + }, + { + "epoch": 1.299413468106326, + "grad_norm": 0.1459503173828125, + "learning_rate": 3.2946921529385645e-06, + "loss": 0.0017, + "step": 202600 + }, + { + "epoch": 1.2994776050001122, + "grad_norm": 0.17990605533123016, + "learning_rate": 3.294166019526617e-06, + "loss": 0.003, + "step": 202610 + }, + { + "epoch": 1.2995417418938984, + "grad_norm": 0.5927944183349609, + "learning_rate": 3.2936399074899884e-06, + "loss": 0.0012, + "step": 202620 + }, + { + "epoch": 1.2996058787876845, + "grad_norm": 0.11808649450540543, + "learning_rate": 3.2931138168352715e-06, + "loss": 0.0015, + "step": 202630 + }, + { + "epoch": 1.2996700156814707, + "grad_norm": 0.05937317758798599, + "learning_rate": 3.292587747569058e-06, + "loss": 0.0011, + "step": 202640 + }, + { + "epoch": 1.2997341525752566, + "grad_norm": 0.0408489927649498, + "learning_rate": 3.2920616996979406e-06, + "loss": 0.0019, + "step": 202650 + }, + { + "epoch": 1.2997982894690427, + "grad_norm": 0.1323373168706894, + "learning_rate": 3.2915356732285116e-06, + "loss": 0.0011, + "step": 202660 + }, + { + "epoch": 1.299862426362829, + "grad_norm": 0.28618696331977844, + "learning_rate": 3.29100966816736e-06, + "loss": 0.0021, + "step": 202670 + }, + { + "epoch": 1.2999265632566148, + "grad_norm": 0.06622526049613953, + "learning_rate": 3.2904836845210796e-06, + "loss": 0.0008, + "step": 202680 + }, + { + "epoch": 1.299990700150401, + "grad_norm": 0.017771361395716667, + "learning_rate": 3.2899577222962604e-06, + "loss": 0.0011, + "step": 202690 + }, + { + "epoch": 1.3000548370441871, + "grad_norm": 0.04204076528549194, + "learning_rate": 3.2894317814994934e-06, + "loss": 0.0011, + "step": 202700 + }, + { + "epoch": 1.3001189739379733, + "grad_norm": 0.05336964130401611, + "learning_rate": 3.2889058621373674e-06, + "loss": 0.0015, + "step": 202710 + }, + { + "epoch": 1.3001831108317594, + "grad_norm": 0.0902399867773056, + "learning_rate": 3.288379964216476e-06, + "loss": 0.0011, + "step": 202720 + }, + { + "epoch": 1.3002472477255453, + "grad_norm": 0.1343262940645218, + "learning_rate": 3.287854087743405e-06, + "loss": 0.0015, + "step": 202730 + }, + { + "epoch": 1.3003113846193315, + "grad_norm": 0.10670477896928787, + "learning_rate": 3.287328232724747e-06, + "loss": 0.0011, + "step": 202740 + }, + { + "epoch": 1.3003755215131176, + "grad_norm": 0.01778768189251423, + "learning_rate": 3.2868023991670895e-06, + "loss": 0.0013, + "step": 202750 + }, + { + "epoch": 1.3004396584069036, + "grad_norm": 0.06783147901296616, + "learning_rate": 3.286276587077023e-06, + "loss": 0.0013, + "step": 202760 + }, + { + "epoch": 1.3005037953006897, + "grad_norm": 0.045526184141635895, + "learning_rate": 3.2857507964611347e-06, + "loss": 0.0011, + "step": 202770 + }, + { + "epoch": 1.3005679321944759, + "grad_norm": 0.04004519432783127, + "learning_rate": 3.2852250273260155e-06, + "loss": 0.003, + "step": 202780 + }, + { + "epoch": 1.300632069088262, + "grad_norm": 0.08523906022310257, + "learning_rate": 3.2846992796782507e-06, + "loss": 0.0016, + "step": 202790 + }, + { + "epoch": 1.3006962059820482, + "grad_norm": 0.14614760875701904, + "learning_rate": 3.28417355352443e-06, + "loss": 0.0009, + "step": 202800 + }, + { + "epoch": 1.3007603428758343, + "grad_norm": 0.08484523743391037, + "learning_rate": 3.283647848871142e-06, + "loss": 0.0009, + "step": 202810 + }, + { + "epoch": 1.3008244797696202, + "grad_norm": 0.08262226730585098, + "learning_rate": 3.2831221657249723e-06, + "loss": 0.0014, + "step": 202820 + }, + { + "epoch": 1.3008886166634064, + "grad_norm": 0.09609952569007874, + "learning_rate": 3.28259650409251e-06, + "loss": 0.001, + "step": 202830 + }, + { + "epoch": 1.3009527535571925, + "grad_norm": 0.036871932446956635, + "learning_rate": 3.2820708639803396e-06, + "loss": 0.0021, + "step": 202840 + }, + { + "epoch": 1.3010168904509785, + "grad_norm": 0.218247190117836, + "learning_rate": 3.28154524539505e-06, + "loss": 0.0014, + "step": 202850 + }, + { + "epoch": 1.3010810273447646, + "grad_norm": 0.2301071435213089, + "learning_rate": 3.2810196483432255e-06, + "loss": 0.0017, + "step": 202860 + }, + { + "epoch": 1.3011451642385508, + "grad_norm": 0.07522774487733841, + "learning_rate": 3.280494072831455e-06, + "loss": 0.0007, + "step": 202870 + }, + { + "epoch": 1.301209301132337, + "grad_norm": 0.11489323526620865, + "learning_rate": 3.279968518866321e-06, + "loss": 0.0028, + "step": 202880 + }, + { + "epoch": 1.301273438026123, + "grad_norm": 0.01686144433915615, + "learning_rate": 3.2794429864544126e-06, + "loss": 0.0012, + "step": 202890 + }, + { + "epoch": 1.301337574919909, + "grad_norm": 0.011467288248240948, + "learning_rate": 3.278917475602311e-06, + "loss": 0.0017, + "step": 202900 + }, + { + "epoch": 1.3014017118136951, + "grad_norm": 0.13039171695709229, + "learning_rate": 3.278391986316606e-06, + "loss": 0.0011, + "step": 202910 + }, + { + "epoch": 1.3014658487074813, + "grad_norm": 0.019838912412524223, + "learning_rate": 3.2778665186038776e-06, + "loss": 0.0009, + "step": 202920 + }, + { + "epoch": 1.3015299856012674, + "grad_norm": 0.05785344913601875, + "learning_rate": 3.277341072470714e-06, + "loss": 0.0015, + "step": 202930 + }, + { + "epoch": 1.3015941224950534, + "grad_norm": 0.04003293067216873, + "learning_rate": 3.276815647923697e-06, + "loss": 0.0016, + "step": 202940 + }, + { + "epoch": 1.3016582593888395, + "grad_norm": 0.11377974599599838, + "learning_rate": 3.276290244969411e-06, + "loss": 0.0015, + "step": 202950 + }, + { + "epoch": 1.3017223962826256, + "grad_norm": 0.025902625173330307, + "learning_rate": 3.2757648636144413e-06, + "loss": 0.0017, + "step": 202960 + }, + { + "epoch": 1.3017865331764118, + "grad_norm": 0.08864615112543106, + "learning_rate": 3.2752395038653693e-06, + "loss": 0.0005, + "step": 202970 + }, + { + "epoch": 1.301850670070198, + "grad_norm": 0.12506291270256042, + "learning_rate": 3.2747141657287806e-06, + "loss": 0.0026, + "step": 202980 + }, + { + "epoch": 1.3019148069639839, + "grad_norm": 0.05017014592885971, + "learning_rate": 3.2741888492112534e-06, + "loss": 0.0016, + "step": 202990 + }, + { + "epoch": 1.30197894385777, + "grad_norm": 0.0649946928024292, + "learning_rate": 3.2736635543193753e-06, + "loss": 0.002, + "step": 203000 + }, + { + "epoch": 1.3020430807515562, + "grad_norm": 0.3042117655277252, + "learning_rate": 3.2731382810597267e-06, + "loss": 0.0015, + "step": 203010 + }, + { + "epoch": 1.302107217645342, + "grad_norm": 0.013076065108180046, + "learning_rate": 3.2726130294388892e-06, + "loss": 0.0011, + "step": 203020 + }, + { + "epoch": 1.3021713545391282, + "grad_norm": 0.12743835151195526, + "learning_rate": 3.272087799463446e-06, + "loss": 0.0014, + "step": 203030 + }, + { + "epoch": 1.3022354914329144, + "grad_norm": 0.04182353615760803, + "learning_rate": 3.271562591139976e-06, + "loss": 0.0012, + "step": 203040 + }, + { + "epoch": 1.3022996283267005, + "grad_norm": 0.13796482980251312, + "learning_rate": 3.2710374044750638e-06, + "loss": 0.0015, + "step": 203050 + }, + { + "epoch": 1.3023637652204867, + "grad_norm": 0.021277979016304016, + "learning_rate": 3.270512239475287e-06, + "loss": 0.0012, + "step": 203060 + }, + { + "epoch": 1.3024279021142728, + "grad_norm": 0.003118517342954874, + "learning_rate": 3.269987096147228e-06, + "loss": 0.0019, + "step": 203070 + }, + { + "epoch": 1.3024920390080588, + "grad_norm": 0.042708396911621094, + "learning_rate": 3.269461974497468e-06, + "loss": 0.001, + "step": 203080 + }, + { + "epoch": 1.302556175901845, + "grad_norm": 0.12176183611154556, + "learning_rate": 3.2689368745325854e-06, + "loss": 0.0033, + "step": 203090 + }, + { + "epoch": 1.302620312795631, + "grad_norm": 0.09801366925239563, + "learning_rate": 3.2684117962591624e-06, + "loss": 0.0009, + "step": 203100 + }, + { + "epoch": 1.302684449689417, + "grad_norm": 0.020501457154750824, + "learning_rate": 3.2678867396837753e-06, + "loss": 0.0012, + "step": 203110 + }, + { + "epoch": 1.3027485865832031, + "grad_norm": 0.1360543817281723, + "learning_rate": 3.267361704813007e-06, + "loss": 0.0013, + "step": 203120 + }, + { + "epoch": 1.3028127234769893, + "grad_norm": 0.16810686886310577, + "learning_rate": 3.2668366916534334e-06, + "loss": 0.0016, + "step": 203130 + }, + { + "epoch": 1.3028768603707754, + "grad_norm": 0.05818602070212364, + "learning_rate": 3.2663117002116363e-06, + "loss": 0.0012, + "step": 203140 + }, + { + "epoch": 1.3029409972645616, + "grad_norm": 0.026946822181344032, + "learning_rate": 3.2657867304941913e-06, + "loss": 0.0012, + "step": 203150 + }, + { + "epoch": 1.3030051341583475, + "grad_norm": 0.01007855124771595, + "learning_rate": 3.265261782507679e-06, + "loss": 0.001, + "step": 203160 + }, + { + "epoch": 1.3030692710521337, + "grad_norm": 0.06413888186216354, + "learning_rate": 3.264736856258675e-06, + "loss": 0.0008, + "step": 203170 + }, + { + "epoch": 1.3031334079459198, + "grad_norm": 0.09537085145711899, + "learning_rate": 3.2642119517537597e-06, + "loss": 0.0012, + "step": 203180 + }, + { + "epoch": 1.3031975448397057, + "grad_norm": 0.1698506772518158, + "learning_rate": 3.263687068999508e-06, + "loss": 0.0013, + "step": 203190 + }, + { + "epoch": 1.3032616817334919, + "grad_norm": 0.09424944967031479, + "learning_rate": 3.2631622080024992e-06, + "loss": 0.0018, + "step": 203200 + }, + { + "epoch": 1.303325818627278, + "grad_norm": 0.040745921432971954, + "learning_rate": 3.262637368769309e-06, + "loss": 0.0012, + "step": 203210 + }, + { + "epoch": 1.3033899555210642, + "grad_norm": 0.05310925841331482, + "learning_rate": 3.2621125513065132e-06, + "loss": 0.001, + "step": 203220 + }, + { + "epoch": 1.3034540924148503, + "grad_norm": 0.06866107881069183, + "learning_rate": 3.261587755620691e-06, + "loss": 0.0005, + "step": 203230 + }, + { + "epoch": 1.3035182293086365, + "grad_norm": 0.028232771903276443, + "learning_rate": 3.261062981718415e-06, + "loss": 0.0016, + "step": 203240 + }, + { + "epoch": 1.3035823662024224, + "grad_norm": 0.11395175009965897, + "learning_rate": 3.2605382296062643e-06, + "loss": 0.0014, + "step": 203250 + }, + { + "epoch": 1.3036465030962086, + "grad_norm": 0.05476236343383789, + "learning_rate": 3.2600134992908115e-06, + "loss": 0.0016, + "step": 203260 + }, + { + "epoch": 1.3037106399899947, + "grad_norm": 0.01740194670855999, + "learning_rate": 3.259488790778634e-06, + "loss": 0.0035, + "step": 203270 + }, + { + "epoch": 1.3037747768837806, + "grad_norm": 0.05768860876560211, + "learning_rate": 3.258964104076305e-06, + "loss": 0.0014, + "step": 203280 + }, + { + "epoch": 1.3038389137775668, + "grad_norm": 0.013930111192166805, + "learning_rate": 3.2584394391904008e-06, + "loss": 0.0011, + "step": 203290 + }, + { + "epoch": 1.303903050671353, + "grad_norm": 0.040215425193309784, + "learning_rate": 3.257914796127494e-06, + "loss": 0.0015, + "step": 203300 + }, + { + "epoch": 1.303967187565139, + "grad_norm": 0.0704159140586853, + "learning_rate": 3.2573901748941615e-06, + "loss": 0.0012, + "step": 203310 + }, + { + "epoch": 1.3040313244589252, + "grad_norm": 0.03699749708175659, + "learning_rate": 3.2568655754969736e-06, + "loss": 0.0011, + "step": 203320 + }, + { + "epoch": 1.3040954613527111, + "grad_norm": 0.007879172451794147, + "learning_rate": 3.2563409979425076e-06, + "loss": 0.0043, + "step": 203330 + }, + { + "epoch": 1.3041595982464973, + "grad_norm": 0.057002849876880646, + "learning_rate": 3.255816442237334e-06, + "loss": 0.0023, + "step": 203340 + }, + { + "epoch": 1.3042237351402834, + "grad_norm": 0.05223952978849411, + "learning_rate": 3.2552919083880273e-06, + "loss": 0.0034, + "step": 203350 + }, + { + "epoch": 1.3042878720340696, + "grad_norm": 0.14209382236003876, + "learning_rate": 3.25476739640116e-06, + "loss": 0.0014, + "step": 203360 + }, + { + "epoch": 1.3043520089278555, + "grad_norm": 0.1263195425271988, + "learning_rate": 3.254242906283304e-06, + "loss": 0.0018, + "step": 203370 + }, + { + "epoch": 1.3044161458216417, + "grad_norm": 0.008843827061355114, + "learning_rate": 3.2537184380410324e-06, + "loss": 0.0018, + "step": 203380 + }, + { + "epoch": 1.3044802827154278, + "grad_norm": 0.051901768893003464, + "learning_rate": 3.253193991680917e-06, + "loss": 0.0041, + "step": 203390 + }, + { + "epoch": 1.304544419609214, + "grad_norm": 0.04232050105929375, + "learning_rate": 3.2526695672095297e-06, + "loss": 0.0014, + "step": 203400 + }, + { + "epoch": 1.3046085565030001, + "grad_norm": 0.04878690093755722, + "learning_rate": 3.25214516463344e-06, + "loss": 0.0014, + "step": 203410 + }, + { + "epoch": 1.304672693396786, + "grad_norm": 0.04045327007770538, + "learning_rate": 3.2516207839592225e-06, + "loss": 0.0015, + "step": 203420 + }, + { + "epoch": 1.3047368302905722, + "grad_norm": 0.024009767919778824, + "learning_rate": 3.2510964251934447e-06, + "loss": 0.002, + "step": 203430 + }, + { + "epoch": 1.3048009671843583, + "grad_norm": 0.05732010677456856, + "learning_rate": 3.2505720883426793e-06, + "loss": 0.0012, + "step": 203440 + }, + { + "epoch": 1.3048651040781443, + "grad_norm": 0.025046516209840775, + "learning_rate": 3.2500477734134954e-06, + "loss": 0.0013, + "step": 203450 + }, + { + "epoch": 1.3049292409719304, + "grad_norm": 0.05671786889433861, + "learning_rate": 3.2495234804124644e-06, + "loss": 0.0011, + "step": 203460 + }, + { + "epoch": 1.3049933778657166, + "grad_norm": 0.019109154120087624, + "learning_rate": 3.248999209346154e-06, + "loss": 0.0007, + "step": 203470 + }, + { + "epoch": 1.3050575147595027, + "grad_norm": 0.09901275485754013, + "learning_rate": 3.2484749602211363e-06, + "loss": 0.0016, + "step": 203480 + }, + { + "epoch": 1.3051216516532889, + "grad_norm": 0.03167424350976944, + "learning_rate": 3.2479507330439788e-06, + "loss": 0.0016, + "step": 203490 + }, + { + "epoch": 1.305185788547075, + "grad_norm": 0.08982674032449722, + "learning_rate": 3.2474265278212495e-06, + "loss": 0.0014, + "step": 203500 + }, + { + "epoch": 1.305249925440861, + "grad_norm": 0.08459123969078064, + "learning_rate": 3.2469023445595197e-06, + "loss": 0.0012, + "step": 203510 + }, + { + "epoch": 1.305314062334647, + "grad_norm": 0.025580601766705513, + "learning_rate": 3.246378183265356e-06, + "loss": 0.0019, + "step": 203520 + }, + { + "epoch": 1.3053781992284332, + "grad_norm": 0.023656470701098442, + "learning_rate": 3.2458540439453273e-06, + "loss": 0.0016, + "step": 203530 + }, + { + "epoch": 1.3054423361222192, + "grad_norm": 0.13090649247169495, + "learning_rate": 3.245329926606e-06, + "loss": 0.0013, + "step": 203540 + }, + { + "epoch": 1.3055064730160053, + "grad_norm": 0.02131395787000656, + "learning_rate": 3.244805831253944e-06, + "loss": 0.0009, + "step": 203550 + }, + { + "epoch": 1.3055706099097915, + "grad_norm": 0.022348342463374138, + "learning_rate": 3.2442817578957253e-06, + "loss": 0.0013, + "step": 203560 + }, + { + "epoch": 1.3056347468035776, + "grad_norm": 0.03129712492227554, + "learning_rate": 3.243757706537911e-06, + "loss": 0.0006, + "step": 203570 + }, + { + "epoch": 1.3056988836973638, + "grad_norm": 0.09756151586771011, + "learning_rate": 3.243233677187067e-06, + "loss": 0.0034, + "step": 203580 + }, + { + "epoch": 1.3057630205911497, + "grad_norm": 0.086264967918396, + "learning_rate": 3.242709669849762e-06, + "loss": 0.0018, + "step": 203590 + }, + { + "epoch": 1.3058271574849358, + "grad_norm": 0.0013568139402195811, + "learning_rate": 3.242185684532559e-06, + "loss": 0.0017, + "step": 203600 + }, + { + "epoch": 1.305891294378722, + "grad_norm": 0.0020245579071342945, + "learning_rate": 3.2416617212420263e-06, + "loss": 0.002, + "step": 203610 + }, + { + "epoch": 1.305955431272508, + "grad_norm": 0.0683000385761261, + "learning_rate": 3.241137779984729e-06, + "loss": 0.0009, + "step": 203620 + }, + { + "epoch": 1.306019568166294, + "grad_norm": 0.02258705347776413, + "learning_rate": 3.2406138607672334e-06, + "loss": 0.0016, + "step": 203630 + }, + { + "epoch": 1.3060837050600802, + "grad_norm": 0.06761499494314194, + "learning_rate": 3.2400899635961014e-06, + "loss": 0.0012, + "step": 203640 + }, + { + "epoch": 1.3061478419538664, + "grad_norm": 0.20406056940555573, + "learning_rate": 3.2395660884779007e-06, + "loss": 0.0027, + "step": 203650 + }, + { + "epoch": 1.3062119788476525, + "grad_norm": 0.12400095909833908, + "learning_rate": 3.2390422354191964e-06, + "loss": 0.001, + "step": 203660 + }, + { + "epoch": 1.3062761157414386, + "grad_norm": 0.054169487208127975, + "learning_rate": 3.2385184044265495e-06, + "loss": 0.001, + "step": 203670 + }, + { + "epoch": 1.3063402526352246, + "grad_norm": 0.02641112729907036, + "learning_rate": 3.237994595506527e-06, + "loss": 0.0014, + "step": 203680 + }, + { + "epoch": 1.3064043895290107, + "grad_norm": 0.11586663126945496, + "learning_rate": 3.2374708086656916e-06, + "loss": 0.0023, + "step": 203690 + }, + { + "epoch": 1.3064685264227969, + "grad_norm": 0.11719394475221634, + "learning_rate": 3.2369470439106065e-06, + "loss": 0.0022, + "step": 203700 + }, + { + "epoch": 1.3065326633165828, + "grad_norm": 0.1640966832637787, + "learning_rate": 3.236423301247835e-06, + "loss": 0.0021, + "step": 203710 + }, + { + "epoch": 1.306596800210369, + "grad_norm": 0.08211014419794083, + "learning_rate": 3.23589958068394e-06, + "loss": 0.0021, + "step": 203720 + }, + { + "epoch": 1.306660937104155, + "grad_norm": 0.03999345377087593, + "learning_rate": 3.235375882225483e-06, + "loss": 0.0009, + "step": 203730 + }, + { + "epoch": 1.3067250739979412, + "grad_norm": 0.02322913520038128, + "learning_rate": 3.2348522058790287e-06, + "loss": 0.0016, + "step": 203740 + }, + { + "epoch": 1.3067892108917274, + "grad_norm": 0.14985918998718262, + "learning_rate": 3.234328551651137e-06, + "loss": 0.0008, + "step": 203750 + }, + { + "epoch": 1.3068533477855133, + "grad_norm": 0.07556784898042679, + "learning_rate": 3.2338049195483696e-06, + "loss": 0.0011, + "step": 203760 + }, + { + "epoch": 1.3069174846792995, + "grad_norm": 0.06410104036331177, + "learning_rate": 3.2332813095772897e-06, + "loss": 0.0015, + "step": 203770 + }, + { + "epoch": 1.3069816215730856, + "grad_norm": 0.10119643062353134, + "learning_rate": 3.232757721744458e-06, + "loss": 0.003, + "step": 203780 + }, + { + "epoch": 1.3070457584668718, + "grad_norm": 0.2246820330619812, + "learning_rate": 3.232234156056435e-06, + "loss": 0.0012, + "step": 203790 + }, + { + "epoch": 1.3071098953606577, + "grad_norm": 0.09073050320148468, + "learning_rate": 3.2317106125197816e-06, + "loss": 0.0012, + "step": 203800 + }, + { + "epoch": 1.3071740322544438, + "grad_norm": 0.04273083433508873, + "learning_rate": 3.231187091141057e-06, + "loss": 0.0011, + "step": 203810 + }, + { + "epoch": 1.30723816914823, + "grad_norm": 0.1075194776058197, + "learning_rate": 3.2306635919268235e-06, + "loss": 0.0027, + "step": 203820 + }, + { + "epoch": 1.3073023060420161, + "grad_norm": 0.03114445134997368, + "learning_rate": 3.23014011488364e-06, + "loss": 0.001, + "step": 203830 + }, + { + "epoch": 1.3073664429358023, + "grad_norm": 0.04628058150410652, + "learning_rate": 3.229616660018065e-06, + "loss": 0.0013, + "step": 203840 + }, + { + "epoch": 1.3074305798295882, + "grad_norm": 0.05554237216711044, + "learning_rate": 3.229093227336658e-06, + "loss": 0.0008, + "step": 203850 + }, + { + "epoch": 1.3074947167233744, + "grad_norm": 0.04712125286459923, + "learning_rate": 3.22856981684598e-06, + "loss": 0.0028, + "step": 203860 + }, + { + "epoch": 1.3075588536171605, + "grad_norm": 0.05716227367520332, + "learning_rate": 3.228046428552587e-06, + "loss": 0.0025, + "step": 203870 + }, + { + "epoch": 1.3076229905109464, + "grad_norm": 0.025026440620422363, + "learning_rate": 3.2275230624630395e-06, + "loss": 0.0012, + "step": 203880 + }, + { + "epoch": 1.3076871274047326, + "grad_norm": 0.17739082872867584, + "learning_rate": 3.2269997185838935e-06, + "loss": 0.0012, + "step": 203890 + }, + { + "epoch": 1.3077512642985187, + "grad_norm": 0.04024747386574745, + "learning_rate": 3.2264763969217094e-06, + "loss": 0.0012, + "step": 203900 + }, + { + "epoch": 1.3078154011923049, + "grad_norm": 0.011896566487848759, + "learning_rate": 3.225953097483043e-06, + "loss": 0.0008, + "step": 203910 + }, + { + "epoch": 1.307879538086091, + "grad_norm": 0.06987634301185608, + "learning_rate": 3.2254298202744516e-06, + "loss": 0.0015, + "step": 203920 + }, + { + "epoch": 1.3079436749798772, + "grad_norm": 0.0587100051343441, + "learning_rate": 3.2249065653024948e-06, + "loss": 0.0013, + "step": 203930 + }, + { + "epoch": 1.308007811873663, + "grad_norm": 0.03610633313655853, + "learning_rate": 3.224383332573725e-06, + "loss": 0.0013, + "step": 203940 + }, + { + "epoch": 1.3080719487674493, + "grad_norm": 0.09071983397006989, + "learning_rate": 3.223860122094703e-06, + "loss": 0.0016, + "step": 203950 + }, + { + "epoch": 1.3081360856612354, + "grad_norm": 0.21553845703601837, + "learning_rate": 3.2233369338719816e-06, + "loss": 0.0016, + "step": 203960 + }, + { + "epoch": 1.3082002225550213, + "grad_norm": 0.06973786652088165, + "learning_rate": 3.2228137679121198e-06, + "loss": 0.0016, + "step": 203970 + }, + { + "epoch": 1.3082643594488075, + "grad_norm": 0.10736697912216187, + "learning_rate": 3.22229062422167e-06, + "loss": 0.0011, + "step": 203980 + }, + { + "epoch": 1.3083284963425936, + "grad_norm": 0.04209981858730316, + "learning_rate": 3.2217675028071905e-06, + "loss": 0.0006, + "step": 203990 + }, + { + "epoch": 1.3083926332363798, + "grad_norm": 0.14557282626628876, + "learning_rate": 3.2212444036752335e-06, + "loss": 0.0014, + "step": 204000 + }, + { + "epoch": 1.308456770130166, + "grad_norm": 0.11464909464120865, + "learning_rate": 3.220721326832357e-06, + "loss": 0.0013, + "step": 204010 + }, + { + "epoch": 1.3085209070239519, + "grad_norm": 0.0317268893122673, + "learning_rate": 3.2201982722851127e-06, + "loss": 0.0011, + "step": 204020 + }, + { + "epoch": 1.308585043917738, + "grad_norm": 0.13303732872009277, + "learning_rate": 3.2196752400400573e-06, + "loss": 0.0024, + "step": 204030 + }, + { + "epoch": 1.3086491808115241, + "grad_norm": 0.03285135701298714, + "learning_rate": 3.219152230103742e-06, + "loss": 0.0023, + "step": 204040 + }, + { + "epoch": 1.3087133177053103, + "grad_norm": 0.12097574770450592, + "learning_rate": 3.2186292424827236e-06, + "loss": 0.0025, + "step": 204050 + }, + { + "epoch": 1.3087774545990962, + "grad_norm": 0.07854858040809631, + "learning_rate": 3.2181062771835524e-06, + "loss": 0.0019, + "step": 204060 + }, + { + "epoch": 1.3088415914928824, + "grad_norm": 0.0640355572104454, + "learning_rate": 3.2175833342127834e-06, + "loss": 0.0015, + "step": 204070 + }, + { + "epoch": 1.3089057283866685, + "grad_norm": 0.10648632049560547, + "learning_rate": 3.21706041357697e-06, + "loss": 0.0021, + "step": 204080 + }, + { + "epoch": 1.3089698652804547, + "grad_norm": 0.06015864014625549, + "learning_rate": 3.216537515282663e-06, + "loss": 0.0017, + "step": 204090 + }, + { + "epoch": 1.3090340021742408, + "grad_norm": 0.13343878090381622, + "learning_rate": 3.216014639336416e-06, + "loss": 0.0017, + "step": 204100 + }, + { + "epoch": 1.3090981390680267, + "grad_norm": 0.13053520023822784, + "learning_rate": 3.2154917857447797e-06, + "loss": 0.0028, + "step": 204110 + }, + { + "epoch": 1.309162275961813, + "grad_norm": 0.03047873079776764, + "learning_rate": 3.214968954514308e-06, + "loss": 0.002, + "step": 204120 + }, + { + "epoch": 1.309226412855599, + "grad_norm": 0.09146006405353546, + "learning_rate": 3.21444614565155e-06, + "loss": 0.0018, + "step": 204130 + }, + { + "epoch": 1.309290549749385, + "grad_norm": 0.024952685460448265, + "learning_rate": 3.2139233591630593e-06, + "loss": 0.0008, + "step": 204140 + }, + { + "epoch": 1.3093546866431711, + "grad_norm": 0.12278847396373749, + "learning_rate": 3.213400595055384e-06, + "loss": 0.0014, + "step": 204150 + }, + { + "epoch": 1.3094188235369573, + "grad_norm": 0.03779635950922966, + "learning_rate": 3.2128778533350767e-06, + "loss": 0.0009, + "step": 204160 + }, + { + "epoch": 1.3094829604307434, + "grad_norm": 0.012328768149018288, + "learning_rate": 3.2123551340086868e-06, + "loss": 0.001, + "step": 204170 + }, + { + "epoch": 1.3095470973245296, + "grad_norm": 0.070521779358387, + "learning_rate": 3.2118324370827654e-06, + "loss": 0.0013, + "step": 204180 + }, + { + "epoch": 1.3096112342183155, + "grad_norm": 0.061533670872449875, + "learning_rate": 3.2113097625638606e-06, + "loss": 0.001, + "step": 204190 + }, + { + "epoch": 1.3096753711121016, + "grad_norm": 0.33698606491088867, + "learning_rate": 3.2107871104585238e-06, + "loss": 0.0034, + "step": 204200 + }, + { + "epoch": 1.3097395080058878, + "grad_norm": 0.06746309995651245, + "learning_rate": 3.210264480773302e-06, + "loss": 0.0007, + "step": 204210 + }, + { + "epoch": 1.309803644899674, + "grad_norm": 0.006428118795156479, + "learning_rate": 3.209741873514746e-06, + "loss": 0.0007, + "step": 204220 + }, + { + "epoch": 1.3098677817934599, + "grad_norm": 0.1033342257142067, + "learning_rate": 3.209219288689404e-06, + "loss": 0.002, + "step": 204230 + }, + { + "epoch": 1.309931918687246, + "grad_norm": 0.14611609280109406, + "learning_rate": 3.2086967263038236e-06, + "loss": 0.0017, + "step": 204240 + }, + { + "epoch": 1.3099960555810322, + "grad_norm": 0.12221353501081467, + "learning_rate": 3.2081741863645543e-06, + "loss": 0.0011, + "step": 204250 + }, + { + "epoch": 1.3100601924748183, + "grad_norm": 0.02614627592265606, + "learning_rate": 3.207651668878142e-06, + "loss": 0.001, + "step": 204260 + }, + { + "epoch": 1.3101243293686045, + "grad_norm": 0.4079796075820923, + "learning_rate": 3.207129173851137e-06, + "loss": 0.0018, + "step": 204270 + }, + { + "epoch": 1.3101884662623904, + "grad_norm": 0.19869622588157654, + "learning_rate": 3.206606701290083e-06, + "loss": 0.0018, + "step": 204280 + }, + { + "epoch": 1.3102526031561765, + "grad_norm": 0.05197782814502716, + "learning_rate": 3.20608425120153e-06, + "loss": 0.0018, + "step": 204290 + }, + { + "epoch": 1.3103167400499627, + "grad_norm": 0.017865058034658432, + "learning_rate": 3.2055618235920226e-06, + "loss": 0.001, + "step": 204300 + }, + { + "epoch": 1.3103808769437486, + "grad_norm": 0.04671267047524452, + "learning_rate": 3.205039418468109e-06, + "loss": 0.0013, + "step": 204310 + }, + { + "epoch": 1.3104450138375348, + "grad_norm": 0.016545766964554787, + "learning_rate": 3.2045170358363333e-06, + "loss": 0.0011, + "step": 204320 + }, + { + "epoch": 1.310509150731321, + "grad_norm": 0.12884260714054108, + "learning_rate": 3.2039946757032423e-06, + "loss": 0.0023, + "step": 204330 + }, + { + "epoch": 1.310573287625107, + "grad_norm": 0.05837222933769226, + "learning_rate": 3.203472338075382e-06, + "loss": 0.002, + "step": 204340 + }, + { + "epoch": 1.3106374245188932, + "grad_norm": 0.06461817026138306, + "learning_rate": 3.202950022959297e-06, + "loss": 0.0012, + "step": 204350 + }, + { + "epoch": 1.3107015614126794, + "grad_norm": 0.14301276206970215, + "learning_rate": 3.2024277303615323e-06, + "loss": 0.001, + "step": 204360 + }, + { + "epoch": 1.3107656983064653, + "grad_norm": 0.024913959205150604, + "learning_rate": 3.2019054602886334e-06, + "loss": 0.001, + "step": 204370 + }, + { + "epoch": 1.3108298352002514, + "grad_norm": 0.06206691637635231, + "learning_rate": 3.2013832127471445e-06, + "loss": 0.0015, + "step": 204380 + }, + { + "epoch": 1.3108939720940376, + "grad_norm": 0.008530830033123493, + "learning_rate": 3.2008609877436087e-06, + "loss": 0.0008, + "step": 204390 + }, + { + "epoch": 1.3109581089878235, + "grad_norm": 0.10439231246709824, + "learning_rate": 3.2003387852845714e-06, + "loss": 0.0011, + "step": 204400 + }, + { + "epoch": 1.3110222458816096, + "grad_norm": 0.059677399694919586, + "learning_rate": 3.1998166053765746e-06, + "loss": 0.0017, + "step": 204410 + }, + { + "epoch": 1.3110863827753958, + "grad_norm": 0.121797576546669, + "learning_rate": 3.199294448026162e-06, + "loss": 0.0012, + "step": 204420 + }, + { + "epoch": 1.311150519669182, + "grad_norm": 0.04569542035460472, + "learning_rate": 3.198772313239877e-06, + "loss": 0.0021, + "step": 204430 + }, + { + "epoch": 1.311214656562968, + "grad_norm": 0.03476222604513168, + "learning_rate": 3.1982502010242633e-06, + "loss": 0.0008, + "step": 204440 + }, + { + "epoch": 1.311278793456754, + "grad_norm": 0.08952942490577698, + "learning_rate": 3.1977281113858615e-06, + "loss": 0.001, + "step": 204450 + }, + { + "epoch": 1.3113429303505402, + "grad_norm": 0.10412070155143738, + "learning_rate": 3.197206044331215e-06, + "loss": 0.0035, + "step": 204460 + }, + { + "epoch": 1.3114070672443263, + "grad_norm": 0.07542761415243149, + "learning_rate": 3.196683999866864e-06, + "loss": 0.0013, + "step": 204470 + }, + { + "epoch": 1.3114712041381125, + "grad_norm": 0.056854650378227234, + "learning_rate": 3.1961619779993524e-06, + "loss": 0.0014, + "step": 204480 + }, + { + "epoch": 1.3115353410318984, + "grad_norm": 0.04510832577943802, + "learning_rate": 3.19563997873522e-06, + "loss": 0.0006, + "step": 204490 + }, + { + "epoch": 1.3115994779256845, + "grad_norm": 0.1429297924041748, + "learning_rate": 3.195118002081008e-06, + "loss": 0.002, + "step": 204500 + }, + { + "epoch": 1.3116636148194707, + "grad_norm": 0.05365953966975212, + "learning_rate": 3.194596048043258e-06, + "loss": 0.0011, + "step": 204510 + }, + { + "epoch": 1.3117277517132568, + "grad_norm": 0.017902769148349762, + "learning_rate": 3.1940741166285088e-06, + "loss": 0.0017, + "step": 204520 + }, + { + "epoch": 1.311791888607043, + "grad_norm": 0.09438332170248032, + "learning_rate": 3.193552207843301e-06, + "loss": 0.001, + "step": 204530 + }, + { + "epoch": 1.311856025500829, + "grad_norm": 0.054045744240283966, + "learning_rate": 3.1930303216941773e-06, + "loss": 0.001, + "step": 204540 + }, + { + "epoch": 1.311920162394615, + "grad_norm": 0.09531563520431519, + "learning_rate": 3.192508458187673e-06, + "loss": 0.0038, + "step": 204550 + }, + { + "epoch": 1.3119842992884012, + "grad_norm": 0.04847593978047371, + "learning_rate": 3.1919866173303316e-06, + "loss": 0.0005, + "step": 204560 + }, + { + "epoch": 1.3120484361821871, + "grad_norm": 0.09458363056182861, + "learning_rate": 3.1914647991286886e-06, + "loss": 0.0013, + "step": 204570 + }, + { + "epoch": 1.3121125730759733, + "grad_norm": 0.24712799489498138, + "learning_rate": 3.190943003589285e-06, + "loss": 0.0017, + "step": 204580 + }, + { + "epoch": 1.3121767099697594, + "grad_norm": 0.05329671502113342, + "learning_rate": 3.1904212307186576e-06, + "loss": 0.0027, + "step": 204590 + }, + { + "epoch": 1.3122408468635456, + "grad_norm": 0.0810551792383194, + "learning_rate": 3.189899480523347e-06, + "loss": 0.0019, + "step": 204600 + }, + { + "epoch": 1.3123049837573317, + "grad_norm": 0.11516894400119781, + "learning_rate": 3.189377753009888e-06, + "loss": 0.0019, + "step": 204610 + }, + { + "epoch": 1.3123691206511179, + "grad_norm": 0.011796182952821255, + "learning_rate": 3.1888560481848195e-06, + "loss": 0.0014, + "step": 204620 + }, + { + "epoch": 1.3124332575449038, + "grad_norm": 0.07899974286556244, + "learning_rate": 3.188334366054681e-06, + "loss": 0.0011, + "step": 204630 + }, + { + "epoch": 1.31249739443869, + "grad_norm": 0.08089365065097809, + "learning_rate": 3.187812706626007e-06, + "loss": 0.0022, + "step": 204640 + }, + { + "epoch": 1.312561531332476, + "grad_norm": 0.12527890503406525, + "learning_rate": 3.1872910699053343e-06, + "loss": 0.0013, + "step": 204650 + }, + { + "epoch": 1.312625668226262, + "grad_norm": 0.0680837482213974, + "learning_rate": 3.1867694558992e-06, + "loss": 0.0018, + "step": 204660 + }, + { + "epoch": 1.3126898051200482, + "grad_norm": 0.048125989735126495, + "learning_rate": 3.1862478646141413e-06, + "loss": 0.0021, + "step": 204670 + }, + { + "epoch": 1.3127539420138343, + "grad_norm": 0.042054932564496994, + "learning_rate": 3.185726296056692e-06, + "loss": 0.0007, + "step": 204680 + }, + { + "epoch": 1.3128180789076205, + "grad_norm": 0.11337859183549881, + "learning_rate": 3.18520475023339e-06, + "loss": 0.001, + "step": 204690 + }, + { + "epoch": 1.3128822158014066, + "grad_norm": 0.10501635074615479, + "learning_rate": 3.184683227150768e-06, + "loss": 0.0009, + "step": 204700 + }, + { + "epoch": 1.3129463526951926, + "grad_norm": 0.044662632048130035, + "learning_rate": 3.1841617268153647e-06, + "loss": 0.0026, + "step": 204710 + }, + { + "epoch": 1.3130104895889787, + "grad_norm": 0.12997126579284668, + "learning_rate": 3.1836402492337105e-06, + "loss": 0.0016, + "step": 204720 + }, + { + "epoch": 1.3130746264827649, + "grad_norm": 0.08274967968463898, + "learning_rate": 3.1831187944123435e-06, + "loss": 0.0012, + "step": 204730 + }, + { + "epoch": 1.3131387633765508, + "grad_norm": 0.02656915783882141, + "learning_rate": 3.1825973623577954e-06, + "loss": 0.0019, + "step": 204740 + }, + { + "epoch": 1.313202900270337, + "grad_norm": 0.024764280766248703, + "learning_rate": 3.1820759530766026e-06, + "loss": 0.0021, + "step": 204750 + }, + { + "epoch": 1.313267037164123, + "grad_norm": 0.057234641164541245, + "learning_rate": 3.1815545665752966e-06, + "loss": 0.0011, + "step": 204760 + }, + { + "epoch": 1.3133311740579092, + "grad_norm": 0.10282032936811447, + "learning_rate": 3.1810332028604106e-06, + "loss": 0.0011, + "step": 204770 + }, + { + "epoch": 1.3133953109516954, + "grad_norm": 0.06660402566194534, + "learning_rate": 3.18051186193848e-06, + "loss": 0.0008, + "step": 204780 + }, + { + "epoch": 1.3134594478454815, + "grad_norm": 0.09857561439275742, + "learning_rate": 3.179990543816035e-06, + "loss": 0.0013, + "step": 204790 + }, + { + "epoch": 1.3135235847392674, + "grad_norm": 0.019108690321445465, + "learning_rate": 3.179469248499611e-06, + "loss": 0.0009, + "step": 204800 + }, + { + "epoch": 1.3135877216330536, + "grad_norm": 0.008525248616933823, + "learning_rate": 3.1789479759957366e-06, + "loss": 0.0018, + "step": 204810 + }, + { + "epoch": 1.3136518585268397, + "grad_norm": 0.06714153289794922, + "learning_rate": 3.1784267263109467e-06, + "loss": 0.0016, + "step": 204820 + }, + { + "epoch": 1.3137159954206257, + "grad_norm": 0.0973796471953392, + "learning_rate": 3.177905499451771e-06, + "loss": 0.001, + "step": 204830 + }, + { + "epoch": 1.3137801323144118, + "grad_norm": 0.039386920630931854, + "learning_rate": 3.1773842954247423e-06, + "loss": 0.0006, + "step": 204840 + }, + { + "epoch": 1.313844269208198, + "grad_norm": 0.09478655457496643, + "learning_rate": 3.1768631142363902e-06, + "loss": 0.0012, + "step": 204850 + }, + { + "epoch": 1.3139084061019841, + "grad_norm": 0.10358711332082748, + "learning_rate": 3.1763419558932473e-06, + "loss": 0.0014, + "step": 204860 + }, + { + "epoch": 1.3139725429957703, + "grad_norm": 0.12413185834884644, + "learning_rate": 3.175820820401842e-06, + "loss": 0.0011, + "step": 204870 + }, + { + "epoch": 1.3140366798895562, + "grad_norm": 0.03639234974980354, + "learning_rate": 3.1752997077687063e-06, + "loss": 0.0013, + "step": 204880 + }, + { + "epoch": 1.3141008167833423, + "grad_norm": 0.25842973589897156, + "learning_rate": 3.1747786180003687e-06, + "loss": 0.0013, + "step": 204890 + }, + { + "epoch": 1.3141649536771285, + "grad_norm": 0.11785202473402023, + "learning_rate": 3.17425755110336e-06, + "loss": 0.0007, + "step": 204900 + }, + { + "epoch": 1.3142290905709146, + "grad_norm": 0.209381565451622, + "learning_rate": 3.1737365070842084e-06, + "loss": 0.0027, + "step": 204910 + }, + { + "epoch": 1.3142932274647006, + "grad_norm": 0.10094190388917923, + "learning_rate": 3.1732154859494436e-06, + "loss": 0.0029, + "step": 204920 + }, + { + "epoch": 1.3143573643584867, + "grad_norm": 0.12673942744731903, + "learning_rate": 3.1726944877055955e-06, + "loss": 0.0009, + "step": 204930 + }, + { + "epoch": 1.3144215012522729, + "grad_norm": 0.146784707903862, + "learning_rate": 3.17217351235919e-06, + "loss": 0.001, + "step": 204940 + }, + { + "epoch": 1.314485638146059, + "grad_norm": 0.025839293375611305, + "learning_rate": 3.171652559916758e-06, + "loss": 0.0015, + "step": 204950 + }, + { + "epoch": 1.3145497750398452, + "grad_norm": 0.16551709175109863, + "learning_rate": 3.171131630384825e-06, + "loss": 0.0016, + "step": 204960 + }, + { + "epoch": 1.314613911933631, + "grad_norm": 0.10145284980535507, + "learning_rate": 3.1706107237699214e-06, + "loss": 0.0015, + "step": 204970 + }, + { + "epoch": 1.3146780488274172, + "grad_norm": 0.11628744751214981, + "learning_rate": 3.1700898400785707e-06, + "loss": 0.0023, + "step": 204980 + }, + { + "epoch": 1.3147421857212034, + "grad_norm": 0.13586631417274475, + "learning_rate": 3.169568979317304e-06, + "loss": 0.0011, + "step": 204990 + }, + { + "epoch": 1.3148063226149893, + "grad_norm": 0.09128490835428238, + "learning_rate": 3.169048141492644e-06, + "loss": 0.0018, + "step": 205000 + }, + { + "epoch": 1.3148704595087755, + "grad_norm": 0.15765392780303955, + "learning_rate": 3.168527326611122e-06, + "loss": 0.0024, + "step": 205010 + }, + { + "epoch": 1.3149345964025616, + "grad_norm": 0.1263551115989685, + "learning_rate": 3.16800653467926e-06, + "loss": 0.0012, + "step": 205020 + }, + { + "epoch": 1.3149987332963478, + "grad_norm": 0.02617996372282505, + "learning_rate": 3.1674857657035863e-06, + "loss": 0.0007, + "step": 205030 + }, + { + "epoch": 1.315062870190134, + "grad_norm": 0.04973071441054344, + "learning_rate": 3.1669650196906243e-06, + "loss": 0.0013, + "step": 205040 + }, + { + "epoch": 1.31512700708392, + "grad_norm": 0.0865158811211586, + "learning_rate": 3.1664442966469016e-06, + "loss": 0.001, + "step": 205050 + }, + { + "epoch": 1.315191143977706, + "grad_norm": 0.058341436088085175, + "learning_rate": 3.1659235965789416e-06, + "loss": 0.0015, + "step": 205060 + }, + { + "epoch": 1.3152552808714921, + "grad_norm": 0.04228498786687851, + "learning_rate": 3.16540291949327e-06, + "loss": 0.0024, + "step": 205070 + }, + { + "epoch": 1.3153194177652783, + "grad_norm": 0.10057054460048676, + "learning_rate": 3.1648822653964117e-06, + "loss": 0.0013, + "step": 205080 + }, + { + "epoch": 1.3153835546590642, + "grad_norm": 0.1200881227850914, + "learning_rate": 3.164361634294889e-06, + "loss": 0.0011, + "step": 205090 + }, + { + "epoch": 1.3154476915528504, + "grad_norm": 0.08531105518341064, + "learning_rate": 3.163841026195228e-06, + "loss": 0.0009, + "step": 205100 + }, + { + "epoch": 1.3155118284466365, + "grad_norm": 0.21463236212730408, + "learning_rate": 3.1633204411039498e-06, + "loss": 0.0017, + "step": 205110 + }, + { + "epoch": 1.3155759653404226, + "grad_norm": 0.019257543608546257, + "learning_rate": 3.162799879027581e-06, + "loss": 0.0025, + "step": 205120 + }, + { + "epoch": 1.3156401022342088, + "grad_norm": 0.17167098820209503, + "learning_rate": 3.1622793399726415e-06, + "loss": 0.002, + "step": 205130 + }, + { + "epoch": 1.3157042391279947, + "grad_norm": 0.03428174555301666, + "learning_rate": 3.1617588239456557e-06, + "loss": 0.001, + "step": 205140 + }, + { + "epoch": 1.3157683760217809, + "grad_norm": 0.06448190659284592, + "learning_rate": 3.1612383309531452e-06, + "loss": 0.0013, + "step": 205150 + }, + { + "epoch": 1.315832512915567, + "grad_norm": 0.03777821734547615, + "learning_rate": 3.160717861001633e-06, + "loss": 0.0013, + "step": 205160 + }, + { + "epoch": 1.315896649809353, + "grad_norm": 0.418274462223053, + "learning_rate": 3.1601974140976404e-06, + "loss": 0.0015, + "step": 205170 + }, + { + "epoch": 1.315960786703139, + "grad_norm": 0.07767599821090698, + "learning_rate": 3.15967699024769e-06, + "loss": 0.0005, + "step": 205180 + }, + { + "epoch": 1.3160249235969252, + "grad_norm": 0.06448792666196823, + "learning_rate": 3.1591565894583005e-06, + "loss": 0.0006, + "step": 205190 + }, + { + "epoch": 1.3160890604907114, + "grad_norm": 0.34093043208122253, + "learning_rate": 3.158636211735995e-06, + "loss": 0.0017, + "step": 205200 + }, + { + "epoch": 1.3161531973844975, + "grad_norm": 0.11581496149301529, + "learning_rate": 3.1581158570872936e-06, + "loss": 0.0025, + "step": 205210 + }, + { + "epoch": 1.3162173342782837, + "grad_norm": 0.13731037080287933, + "learning_rate": 3.1575955255187173e-06, + "loss": 0.0018, + "step": 205220 + }, + { + "epoch": 1.3162814711720696, + "grad_norm": 0.06086349859833717, + "learning_rate": 3.157075217036787e-06, + "loss": 0.0023, + "step": 205230 + }, + { + "epoch": 1.3163456080658558, + "grad_norm": 0.0504254549741745, + "learning_rate": 3.1565549316480192e-06, + "loss": 0.0014, + "step": 205240 + }, + { + "epoch": 1.316409744959642, + "grad_norm": 0.0985199511051178, + "learning_rate": 3.1560346693589372e-06, + "loss": 0.0017, + "step": 205250 + }, + { + "epoch": 1.3164738818534278, + "grad_norm": 0.03697388619184494, + "learning_rate": 3.1555144301760576e-06, + "loss": 0.0014, + "step": 205260 + }, + { + "epoch": 1.316538018747214, + "grad_norm": 0.16653156280517578, + "learning_rate": 3.1549942141059014e-06, + "loss": 0.0029, + "step": 205270 + }, + { + "epoch": 1.3166021556410001, + "grad_norm": 0.09604401141405106, + "learning_rate": 3.154474021154985e-06, + "loss": 0.0006, + "step": 205280 + }, + { + "epoch": 1.3166662925347863, + "grad_norm": 0.10724960267543793, + "learning_rate": 3.153953851329827e-06, + "loss": 0.0008, + "step": 205290 + }, + { + "epoch": 1.3167304294285724, + "grad_norm": 0.022921591997146606, + "learning_rate": 3.15343370463695e-06, + "loss": 0.0006, + "step": 205300 + }, + { + "epoch": 1.3167945663223584, + "grad_norm": 0.023986762389540672, + "learning_rate": 3.152913581082866e-06, + "loss": 0.0012, + "step": 205310 + }, + { + "epoch": 1.3168587032161445, + "grad_norm": 0.3583962321281433, + "learning_rate": 3.1523934806740965e-06, + "loss": 0.001, + "step": 205320 + }, + { + "epoch": 1.3169228401099307, + "grad_norm": 0.0671306699514389, + "learning_rate": 3.151873403417156e-06, + "loss": 0.0008, + "step": 205330 + }, + { + "epoch": 1.3169869770037168, + "grad_norm": 0.1393251121044159, + "learning_rate": 3.151353349318562e-06, + "loss": 0.0016, + "step": 205340 + }, + { + "epoch": 1.3170511138975027, + "grad_norm": 0.09741727262735367, + "learning_rate": 3.1508333183848337e-06, + "loss": 0.0018, + "step": 205350 + }, + { + "epoch": 1.3171152507912889, + "grad_norm": 0.022273661568760872, + "learning_rate": 3.1503133106224844e-06, + "loss": 0.0032, + "step": 205360 + }, + { + "epoch": 1.317179387685075, + "grad_norm": 0.05251382291316986, + "learning_rate": 3.149793326038032e-06, + "loss": 0.002, + "step": 205370 + }, + { + "epoch": 1.3172435245788612, + "grad_norm": 0.13902156054973602, + "learning_rate": 3.1492733646379903e-06, + "loss": 0.001, + "step": 205380 + }, + { + "epoch": 1.3173076614726473, + "grad_norm": 0.07224063575267792, + "learning_rate": 3.148753426428877e-06, + "loss": 0.0024, + "step": 205390 + }, + { + "epoch": 1.3173717983664333, + "grad_norm": 0.06636285781860352, + "learning_rate": 3.148233511417206e-06, + "loss": 0.0008, + "step": 205400 + }, + { + "epoch": 1.3174359352602194, + "grad_norm": 0.05493977665901184, + "learning_rate": 3.1477136196094926e-06, + "loss": 0.001, + "step": 205410 + }, + { + "epoch": 1.3175000721540056, + "grad_norm": 0.026711495593190193, + "learning_rate": 3.1471937510122515e-06, + "loss": 0.0013, + "step": 205420 + }, + { + "epoch": 1.3175642090477915, + "grad_norm": 0.18658442795276642, + "learning_rate": 3.146673905631997e-06, + "loss": 0.0014, + "step": 205430 + }, + { + "epoch": 1.3176283459415776, + "grad_norm": 0.09886343777179718, + "learning_rate": 3.1461540834752423e-06, + "loss": 0.0009, + "step": 205440 + }, + { + "epoch": 1.3176924828353638, + "grad_norm": 0.06812240928411484, + "learning_rate": 3.1456342845485032e-06, + "loss": 0.0015, + "step": 205450 + }, + { + "epoch": 1.31775661972915, + "grad_norm": 0.05942175164818764, + "learning_rate": 3.1451145088582903e-06, + "loss": 0.0016, + "step": 205460 + }, + { + "epoch": 1.317820756622936, + "grad_norm": 0.13044652342796326, + "learning_rate": 3.144594756411118e-06, + "loss": 0.0012, + "step": 205470 + }, + { + "epoch": 1.3178848935167222, + "grad_norm": 0.011788193136453629, + "learning_rate": 3.1440750272135013e-06, + "loss": 0.0015, + "step": 205480 + }, + { + "epoch": 1.3179490304105081, + "grad_norm": 0.07968220859766006, + "learning_rate": 3.1435553212719495e-06, + "loss": 0.0022, + "step": 205490 + }, + { + "epoch": 1.3180131673042943, + "grad_norm": 0.2021966427564621, + "learning_rate": 3.1430356385929774e-06, + "loss": 0.0011, + "step": 205500 + }, + { + "epoch": 1.3180773041980804, + "grad_norm": 0.07877519726753235, + "learning_rate": 3.1425159791830947e-06, + "loss": 0.0018, + "step": 205510 + }, + { + "epoch": 1.3181414410918664, + "grad_norm": 0.10816589742898941, + "learning_rate": 3.1419963430488155e-06, + "loss": 0.0017, + "step": 205520 + }, + { + "epoch": 1.3182055779856525, + "grad_norm": 0.24587659537792206, + "learning_rate": 3.141476730196649e-06, + "loss": 0.0011, + "step": 205530 + }, + { + "epoch": 1.3182697148794387, + "grad_norm": 0.058466412127017975, + "learning_rate": 3.1409571406331076e-06, + "loss": 0.001, + "step": 205540 + }, + { + "epoch": 1.3183338517732248, + "grad_norm": 0.04519858956336975, + "learning_rate": 3.140437574364702e-06, + "loss": 0.002, + "step": 205550 + }, + { + "epoch": 1.318397988667011, + "grad_norm": 0.12952274084091187, + "learning_rate": 3.1399180313979425e-06, + "loss": 0.001, + "step": 205560 + }, + { + "epoch": 1.318462125560797, + "grad_norm": 0.006296441424638033, + "learning_rate": 3.1393985117393392e-06, + "loss": 0.0011, + "step": 205570 + }, + { + "epoch": 1.318526262454583, + "grad_norm": 0.06386483460664749, + "learning_rate": 3.1388790153954034e-06, + "loss": 0.0025, + "step": 205580 + }, + { + "epoch": 1.3185903993483692, + "grad_norm": 0.024813035503029823, + "learning_rate": 3.138359542372642e-06, + "loss": 0.0012, + "step": 205590 + }, + { + "epoch": 1.3186545362421551, + "grad_norm": 0.1011386513710022, + "learning_rate": 3.1378400926775675e-06, + "loss": 0.0016, + "step": 205600 + }, + { + "epoch": 1.3187186731359413, + "grad_norm": 0.08756448328495026, + "learning_rate": 3.137320666316687e-06, + "loss": 0.0013, + "step": 205610 + }, + { + "epoch": 1.3187828100297274, + "grad_norm": 0.06075456738471985, + "learning_rate": 3.1368012632965088e-06, + "loss": 0.001, + "step": 205620 + }, + { + "epoch": 1.3188469469235136, + "grad_norm": 0.07185124605894089, + "learning_rate": 3.136281883623544e-06, + "loss": 0.0009, + "step": 205630 + }, + { + "epoch": 1.3189110838172997, + "grad_norm": 0.05814353749155998, + "learning_rate": 3.135762527304298e-06, + "loss": 0.0013, + "step": 205640 + }, + { + "epoch": 1.3189752207110859, + "grad_norm": 0.17648516595363617, + "learning_rate": 3.135243194345281e-06, + "loss": 0.0014, + "step": 205650 + }, + { + "epoch": 1.3190393576048718, + "grad_norm": 0.016797835007309914, + "learning_rate": 3.134723884752998e-06, + "loss": 0.0032, + "step": 205660 + }, + { + "epoch": 1.319103494498658, + "grad_norm": 0.08698790520429611, + "learning_rate": 3.1342045985339598e-06, + "loss": 0.001, + "step": 205670 + }, + { + "epoch": 1.319167631392444, + "grad_norm": 0.08872439712285995, + "learning_rate": 3.1336853356946695e-06, + "loss": 0.0008, + "step": 205680 + }, + { + "epoch": 1.31923176828623, + "grad_norm": 0.2343255579471588, + "learning_rate": 3.133166096241637e-06, + "loss": 0.0015, + "step": 205690 + }, + { + "epoch": 1.3192959051800162, + "grad_norm": 0.03147163614630699, + "learning_rate": 3.132646880181367e-06, + "loss": 0.0033, + "step": 205700 + }, + { + "epoch": 1.3193600420738023, + "grad_norm": 0.059024930000305176, + "learning_rate": 3.1321276875203664e-06, + "loss": 0.0022, + "step": 205710 + }, + { + "epoch": 1.3194241789675885, + "grad_norm": 0.09479758143424988, + "learning_rate": 3.13160851826514e-06, + "loss": 0.0007, + "step": 205720 + }, + { + "epoch": 1.3194883158613746, + "grad_norm": 0.08601735532283783, + "learning_rate": 3.131089372422196e-06, + "loss": 0.0009, + "step": 205730 + }, + { + "epoch": 1.3195524527551605, + "grad_norm": 0.07166753709316254, + "learning_rate": 3.130570249998036e-06, + "loss": 0.0017, + "step": 205740 + }, + { + "epoch": 1.3196165896489467, + "grad_norm": 0.026545114815235138, + "learning_rate": 3.130051150999168e-06, + "loss": 0.001, + "step": 205750 + }, + { + "epoch": 1.3196807265427328, + "grad_norm": 0.031212162226438522, + "learning_rate": 3.1295320754320946e-06, + "loss": 0.0019, + "step": 205760 + }, + { + "epoch": 1.319744863436519, + "grad_norm": 0.21869143843650818, + "learning_rate": 3.129013023303321e-06, + "loss": 0.0019, + "step": 205770 + }, + { + "epoch": 1.319809000330305, + "grad_norm": 0.03965316712856293, + "learning_rate": 3.128493994619353e-06, + "loss": 0.002, + "step": 205780 + }, + { + "epoch": 1.319873137224091, + "grad_norm": 0.03560245782136917, + "learning_rate": 3.127974989386691e-06, + "loss": 0.0024, + "step": 205790 + }, + { + "epoch": 1.3199372741178772, + "grad_norm": 0.06623739004135132, + "learning_rate": 3.1274560076118423e-06, + "loss": 0.0009, + "step": 205800 + }, + { + "epoch": 1.3200014110116634, + "grad_norm": 0.06063716113567352, + "learning_rate": 3.126937049301306e-06, + "loss": 0.002, + "step": 205810 + }, + { + "epoch": 1.3200655479054495, + "grad_norm": 0.08815579116344452, + "learning_rate": 3.1264181144615877e-06, + "loss": 0.0011, + "step": 205820 + }, + { + "epoch": 1.3201296847992354, + "grad_norm": 0.04997415840625763, + "learning_rate": 3.12589920309919e-06, + "loss": 0.0019, + "step": 205830 + }, + { + "epoch": 1.3201938216930216, + "grad_norm": 0.09462188929319382, + "learning_rate": 3.1253803152206153e-06, + "loss": 0.0014, + "step": 205840 + }, + { + "epoch": 1.3202579585868077, + "grad_norm": 0.2312559187412262, + "learning_rate": 3.1248614508323632e-06, + "loss": 0.0014, + "step": 205850 + }, + { + "epoch": 1.3203220954805936, + "grad_norm": 0.05016280338168144, + "learning_rate": 3.124342609940939e-06, + "loss": 0.0013, + "step": 205860 + }, + { + "epoch": 1.3203862323743798, + "grad_norm": 0.11266014724969864, + "learning_rate": 3.1238237925528414e-06, + "loss": 0.0018, + "step": 205870 + }, + { + "epoch": 1.320450369268166, + "grad_norm": 0.06293545663356781, + "learning_rate": 3.1233049986745727e-06, + "loss": 0.0017, + "step": 205880 + }, + { + "epoch": 1.320514506161952, + "grad_norm": 0.022486304864287376, + "learning_rate": 3.122786228312633e-06, + "loss": 0.0014, + "step": 205890 + }, + { + "epoch": 1.3205786430557382, + "grad_norm": 0.2625771760940552, + "learning_rate": 3.1222674814735256e-06, + "loss": 0.0017, + "step": 205900 + }, + { + "epoch": 1.3206427799495244, + "grad_norm": 0.0930396020412445, + "learning_rate": 3.121748758163746e-06, + "loss": 0.0015, + "step": 205910 + }, + { + "epoch": 1.3207069168433103, + "grad_norm": 0.02263970486819744, + "learning_rate": 3.121230058389798e-06, + "loss": 0.0008, + "step": 205920 + }, + { + "epoch": 1.3207710537370965, + "grad_norm": 0.2657495141029358, + "learning_rate": 3.120711382158181e-06, + "loss": 0.0016, + "step": 205930 + }, + { + "epoch": 1.3208351906308826, + "grad_norm": 0.13245417177677155, + "learning_rate": 3.120192729475392e-06, + "loss": 0.0011, + "step": 205940 + }, + { + "epoch": 1.3208993275246685, + "grad_norm": 0.24873270094394684, + "learning_rate": 3.1196741003479324e-06, + "loss": 0.0016, + "step": 205950 + }, + { + "epoch": 1.3209634644184547, + "grad_norm": 0.02384812943637371, + "learning_rate": 3.1191554947823e-06, + "loss": 0.0013, + "step": 205960 + }, + { + "epoch": 1.3210276013122408, + "grad_norm": 0.03216005489230156, + "learning_rate": 3.1186369127849934e-06, + "loss": 0.0037, + "step": 205970 + }, + { + "epoch": 1.321091738206027, + "grad_norm": 0.039360105991363525, + "learning_rate": 3.118118354362511e-06, + "loss": 0.0012, + "step": 205980 + }, + { + "epoch": 1.3211558750998131, + "grad_norm": 0.19784843921661377, + "learning_rate": 3.1175998195213508e-06, + "loss": 0.0012, + "step": 205990 + }, + { + "epoch": 1.321220011993599, + "grad_norm": 0.06966821849346161, + "learning_rate": 3.1170813082680094e-06, + "loss": 0.0024, + "step": 206000 + }, + { + "epoch": 1.3212841488873852, + "grad_norm": 0.02089749276638031, + "learning_rate": 3.116562820608986e-06, + "loss": 0.0007, + "step": 206010 + }, + { + "epoch": 1.3213482857811714, + "grad_norm": 0.039188411086797714, + "learning_rate": 3.116044356550776e-06, + "loss": 0.0016, + "step": 206020 + }, + { + "epoch": 1.3214124226749575, + "grad_norm": 0.13055889308452606, + "learning_rate": 3.1155259160998767e-06, + "loss": 0.0017, + "step": 206030 + }, + { + "epoch": 1.3214765595687434, + "grad_norm": 0.1189856305718422, + "learning_rate": 3.1150074992627844e-06, + "loss": 0.0012, + "step": 206040 + }, + { + "epoch": 1.3215406964625296, + "grad_norm": 0.08534970134496689, + "learning_rate": 3.114489106045995e-06, + "loss": 0.0015, + "step": 206050 + }, + { + "epoch": 1.3216048333563157, + "grad_norm": 0.05722140148282051, + "learning_rate": 3.1139707364560035e-06, + "loss": 0.0012, + "step": 206060 + }, + { + "epoch": 1.3216689702501019, + "grad_norm": 0.04165022820234299, + "learning_rate": 3.1134523904993088e-06, + "loss": 0.0009, + "step": 206070 + }, + { + "epoch": 1.321733107143888, + "grad_norm": 0.09680133312940598, + "learning_rate": 3.112934068182402e-06, + "loss": 0.001, + "step": 206080 + }, + { + "epoch": 1.321797244037674, + "grad_norm": 0.1394525021314621, + "learning_rate": 3.112415769511782e-06, + "loss": 0.0012, + "step": 206090 + }, + { + "epoch": 1.32186138093146, + "grad_norm": 0.06444688141345978, + "learning_rate": 3.11189749449394e-06, + "loss": 0.001, + "step": 206100 + }, + { + "epoch": 1.3219255178252463, + "grad_norm": 0.13324974477291107, + "learning_rate": 3.111379243135373e-06, + "loss": 0.0016, + "step": 206110 + }, + { + "epoch": 1.3219896547190322, + "grad_norm": 0.21047130227088928, + "learning_rate": 3.1108610154425733e-06, + "loss": 0.0016, + "step": 206120 + }, + { + "epoch": 1.3220537916128183, + "grad_norm": 0.03437228500843048, + "learning_rate": 3.110342811422036e-06, + "loss": 0.0018, + "step": 206130 + }, + { + "epoch": 1.3221179285066045, + "grad_norm": 0.07776031643152237, + "learning_rate": 3.109824631080252e-06, + "loss": 0.0017, + "step": 206140 + }, + { + "epoch": 1.3221820654003906, + "grad_norm": 0.0849027931690216, + "learning_rate": 3.1093064744237177e-06, + "loss": 0.0012, + "step": 206150 + }, + { + "epoch": 1.3222462022941768, + "grad_norm": 0.005785741843283176, + "learning_rate": 3.108788341458924e-06, + "loss": 0.0028, + "step": 206160 + }, + { + "epoch": 1.322310339187963, + "grad_norm": 0.1434858739376068, + "learning_rate": 3.108270232192364e-06, + "loss": 0.0007, + "step": 206170 + }, + { + "epoch": 1.3223744760817489, + "grad_norm": 0.12548981606960297, + "learning_rate": 3.1077521466305304e-06, + "loss": 0.0012, + "step": 206180 + }, + { + "epoch": 1.322438612975535, + "grad_norm": 0.2050582319498062, + "learning_rate": 3.1072340847799137e-06, + "loss": 0.0025, + "step": 206190 + }, + { + "epoch": 1.3225027498693211, + "grad_norm": 0.18678708374500275, + "learning_rate": 3.106716046647008e-06, + "loss": 0.0016, + "step": 206200 + }, + { + "epoch": 1.322566886763107, + "grad_norm": 0.12373144179582596, + "learning_rate": 3.106198032238302e-06, + "loss": 0.0015, + "step": 206210 + }, + { + "epoch": 1.3226310236568932, + "grad_norm": 0.013299357146024704, + "learning_rate": 3.10568004156029e-06, + "loss": 0.0015, + "step": 206220 + }, + { + "epoch": 1.3226951605506794, + "grad_norm": 0.23023241758346558, + "learning_rate": 3.1051620746194595e-06, + "loss": 0.0019, + "step": 206230 + }, + { + "epoch": 1.3227592974444655, + "grad_norm": 0.054234281182289124, + "learning_rate": 3.104644131422303e-06, + "loss": 0.0018, + "step": 206240 + }, + { + "epoch": 1.3228234343382517, + "grad_norm": 0.1322822868824005, + "learning_rate": 3.1041262119753097e-06, + "loss": 0.0017, + "step": 206250 + }, + { + "epoch": 1.3228875712320376, + "grad_norm": 0.16184493899345398, + "learning_rate": 3.1036083162849706e-06, + "loss": 0.0016, + "step": 206260 + }, + { + "epoch": 1.3229517081258237, + "grad_norm": 0.057139068841934204, + "learning_rate": 3.1030904443577736e-06, + "loss": 0.0008, + "step": 206270 + }, + { + "epoch": 1.32301584501961, + "grad_norm": 0.042741693556308746, + "learning_rate": 3.10257259620021e-06, + "loss": 0.0012, + "step": 206280 + }, + { + "epoch": 1.3230799819133958, + "grad_norm": 0.008121524937450886, + "learning_rate": 3.1020547718187673e-06, + "loss": 0.0019, + "step": 206290 + }, + { + "epoch": 1.323144118807182, + "grad_norm": 0.09957344830036163, + "learning_rate": 3.101536971219935e-06, + "loss": 0.002, + "step": 206300 + }, + { + "epoch": 1.3232082557009681, + "grad_norm": 0.028351394459605217, + "learning_rate": 3.1010191944102007e-06, + "loss": 0.001, + "step": 206310 + }, + { + "epoch": 1.3232723925947543, + "grad_norm": 0.046477172523736954, + "learning_rate": 3.1005014413960534e-06, + "loss": 0.0031, + "step": 206320 + }, + { + "epoch": 1.3233365294885404, + "grad_norm": 0.21906918287277222, + "learning_rate": 3.0999837121839817e-06, + "loss": 0.0015, + "step": 206330 + }, + { + "epoch": 1.3234006663823266, + "grad_norm": 0.03294725343585014, + "learning_rate": 3.09946600678047e-06, + "loss": 0.0011, + "step": 206340 + }, + { + "epoch": 1.3234648032761125, + "grad_norm": 0.048295244574546814, + "learning_rate": 3.098948325192009e-06, + "loss": 0.0013, + "step": 206350 + }, + { + "epoch": 1.3235289401698986, + "grad_norm": 0.1487571895122528, + "learning_rate": 3.098430667425083e-06, + "loss": 0.0016, + "step": 206360 + }, + { + "epoch": 1.3235930770636848, + "grad_norm": 0.07452013343572617, + "learning_rate": 3.097913033486182e-06, + "loss": 0.0011, + "step": 206370 + }, + { + "epoch": 1.3236572139574707, + "grad_norm": 0.011145166121423244, + "learning_rate": 3.097395423381787e-06, + "loss": 0.001, + "step": 206380 + }, + { + "epoch": 1.3237213508512569, + "grad_norm": 0.05800461024045944, + "learning_rate": 3.0968778371183893e-06, + "loss": 0.0011, + "step": 206390 + }, + { + "epoch": 1.323785487745043, + "grad_norm": 0.033336617052555084, + "learning_rate": 3.0963602747024717e-06, + "loss": 0.0014, + "step": 206400 + }, + { + "epoch": 1.3238496246388292, + "grad_norm": 0.06338828057050705, + "learning_rate": 3.0958427361405207e-06, + "loss": 0.0008, + "step": 206410 + }, + { + "epoch": 1.3239137615326153, + "grad_norm": 0.09692800045013428, + "learning_rate": 3.0953252214390207e-06, + "loss": 0.0019, + "step": 206420 + }, + { + "epoch": 1.3239778984264012, + "grad_norm": 0.2921394407749176, + "learning_rate": 3.094807730604458e-06, + "loss": 0.0012, + "step": 206430 + }, + { + "epoch": 1.3240420353201874, + "grad_norm": 0.04953905940055847, + "learning_rate": 3.094290263643315e-06, + "loss": 0.0019, + "step": 206440 + }, + { + "epoch": 1.3241061722139735, + "grad_norm": 0.047770023345947266, + "learning_rate": 3.093772820562077e-06, + "loss": 0.0019, + "step": 206450 + }, + { + "epoch": 1.3241703091077597, + "grad_norm": 0.06971164792776108, + "learning_rate": 3.0932554013672277e-06, + "loss": 0.0013, + "step": 206460 + }, + { + "epoch": 1.3242344460015456, + "grad_norm": 0.16142980754375458, + "learning_rate": 3.0927380060652512e-06, + "loss": 0.0019, + "step": 206470 + }, + { + "epoch": 1.3242985828953318, + "grad_norm": 0.14970916509628296, + "learning_rate": 3.092220634662631e-06, + "loss": 0.0013, + "step": 206480 + }, + { + "epoch": 1.324362719789118, + "grad_norm": 0.03817512094974518, + "learning_rate": 3.091703287165849e-06, + "loss": 0.0013, + "step": 206490 + }, + { + "epoch": 1.324426856682904, + "grad_norm": 0.0302517581731081, + "learning_rate": 3.0911859635813896e-06, + "loss": 0.0014, + "step": 206500 + }, + { + "epoch": 1.3244909935766902, + "grad_norm": 0.03984169661998749, + "learning_rate": 3.0906686639157335e-06, + "loss": 0.0008, + "step": 206510 + }, + { + "epoch": 1.3245551304704761, + "grad_norm": 0.09860210120677948, + "learning_rate": 3.090151388175364e-06, + "loss": 0.0032, + "step": 206520 + }, + { + "epoch": 1.3246192673642623, + "grad_norm": 0.1225770115852356, + "learning_rate": 3.089634136366762e-06, + "loss": 0.0011, + "step": 206530 + }, + { + "epoch": 1.3246834042580484, + "grad_norm": 0.028699776157736778, + "learning_rate": 3.0891169084964106e-06, + "loss": 0.0008, + "step": 206540 + }, + { + "epoch": 1.3247475411518344, + "grad_norm": 0.01483263447880745, + "learning_rate": 3.088599704570789e-06, + "loss": 0.0011, + "step": 206550 + }, + { + "epoch": 1.3248116780456205, + "grad_norm": 0.08767399191856384, + "learning_rate": 3.0880825245963796e-06, + "loss": 0.0017, + "step": 206560 + }, + { + "epoch": 1.3248758149394066, + "grad_norm": 0.11258254200220108, + "learning_rate": 3.0875653685796624e-06, + "loss": 0.0012, + "step": 206570 + }, + { + "epoch": 1.3249399518331928, + "grad_norm": 0.012691079638898373, + "learning_rate": 3.087048236527119e-06, + "loss": 0.0015, + "step": 206580 + }, + { + "epoch": 1.325004088726979, + "grad_norm": 0.06870340555906296, + "learning_rate": 3.086531128445227e-06, + "loss": 0.001, + "step": 206590 + }, + { + "epoch": 1.325068225620765, + "grad_norm": 0.3031693994998932, + "learning_rate": 3.0860140443404686e-06, + "loss": 0.0024, + "step": 206600 + }, + { + "epoch": 1.325132362514551, + "grad_norm": 0.1562807261943817, + "learning_rate": 3.0854969842193206e-06, + "loss": 0.0008, + "step": 206610 + }, + { + "epoch": 1.3251964994083372, + "grad_norm": 0.1482306867837906, + "learning_rate": 3.084979948088265e-06, + "loss": 0.0012, + "step": 206620 + }, + { + "epoch": 1.3252606363021233, + "grad_norm": 0.19808292388916016, + "learning_rate": 3.0844629359537788e-06, + "loss": 0.0022, + "step": 206630 + }, + { + "epoch": 1.3253247731959092, + "grad_norm": 0.18784743547439575, + "learning_rate": 3.0839459478223404e-06, + "loss": 0.0023, + "step": 206640 + }, + { + "epoch": 1.3253889100896954, + "grad_norm": 0.06134854257106781, + "learning_rate": 3.08342898370043e-06, + "loss": 0.0016, + "step": 206650 + }, + { + "epoch": 1.3254530469834815, + "grad_norm": 0.05111366882920265, + "learning_rate": 3.082912043594523e-06, + "loss": 0.0012, + "step": 206660 + }, + { + "epoch": 1.3255171838772677, + "grad_norm": 0.0958254337310791, + "learning_rate": 3.0823951275111e-06, + "loss": 0.0012, + "step": 206670 + }, + { + "epoch": 1.3255813207710538, + "grad_norm": 0.075313039124012, + "learning_rate": 3.0818782354566346e-06, + "loss": 0.0017, + "step": 206680 + }, + { + "epoch": 1.3256454576648398, + "grad_norm": 0.07610893994569778, + "learning_rate": 3.0813613674376064e-06, + "loss": 0.0012, + "step": 206690 + }, + { + "epoch": 1.325709594558626, + "grad_norm": 0.01589725725352764, + "learning_rate": 3.080844523460491e-06, + "loss": 0.0016, + "step": 206700 + }, + { + "epoch": 1.325773731452412, + "grad_norm": 0.06068975478410721, + "learning_rate": 3.080327703531767e-06, + "loss": 0.0052, + "step": 206710 + }, + { + "epoch": 1.325837868346198, + "grad_norm": 0.06539834290742874, + "learning_rate": 3.0798109076579074e-06, + "loss": 0.0018, + "step": 206720 + }, + { + "epoch": 1.3259020052399841, + "grad_norm": 0.06082136556506157, + "learning_rate": 3.07929413584539e-06, + "loss": 0.002, + "step": 206730 + }, + { + "epoch": 1.3259661421337703, + "grad_norm": 0.1441694051027298, + "learning_rate": 3.0787773881006887e-06, + "loss": 0.002, + "step": 206740 + }, + { + "epoch": 1.3260302790275564, + "grad_norm": 0.03906743600964546, + "learning_rate": 3.0782606644302816e-06, + "loss": 0.0015, + "step": 206750 + }, + { + "epoch": 1.3260944159213426, + "grad_norm": 0.014613121747970581, + "learning_rate": 3.0777439648406404e-06, + "loss": 0.001, + "step": 206760 + }, + { + "epoch": 1.3261585528151287, + "grad_norm": 0.06862866133451462, + "learning_rate": 3.0772272893382416e-06, + "loss": 0.0014, + "step": 206770 + }, + { + "epoch": 1.3262226897089147, + "grad_norm": 0.05738750472664833, + "learning_rate": 3.07671063792956e-06, + "loss": 0.0035, + "step": 206780 + }, + { + "epoch": 1.3262868266027008, + "grad_norm": 0.10485716909170151, + "learning_rate": 3.0761940106210665e-06, + "loss": 0.0019, + "step": 206790 + }, + { + "epoch": 1.326350963496487, + "grad_norm": 0.08685830235481262, + "learning_rate": 3.0756774074192385e-06, + "loss": 0.0021, + "step": 206800 + }, + { + "epoch": 1.3264151003902729, + "grad_norm": 0.10155344754457474, + "learning_rate": 3.075160828330548e-06, + "loss": 0.0026, + "step": 206810 + }, + { + "epoch": 1.326479237284059, + "grad_norm": 0.09273690730333328, + "learning_rate": 3.0746442733614655e-06, + "loss": 0.0014, + "step": 206820 + }, + { + "epoch": 1.3265433741778452, + "grad_norm": 0.10257264971733093, + "learning_rate": 3.074127742518469e-06, + "loss": 0.0016, + "step": 206830 + }, + { + "epoch": 1.3266075110716313, + "grad_norm": 0.0632232204079628, + "learning_rate": 3.0736112358080274e-06, + "loss": 0.0012, + "step": 206840 + }, + { + "epoch": 1.3266716479654175, + "grad_norm": 0.14479869604110718, + "learning_rate": 3.073094753236614e-06, + "loss": 0.0023, + "step": 206850 + }, + { + "epoch": 1.3267357848592034, + "grad_norm": 0.10152845829725266, + "learning_rate": 3.072578294810701e-06, + "loss": 0.0005, + "step": 206860 + }, + { + "epoch": 1.3267999217529896, + "grad_norm": 0.0303544569760561, + "learning_rate": 3.072061860536759e-06, + "loss": 0.002, + "step": 206870 + }, + { + "epoch": 1.3268640586467757, + "grad_norm": 0.05967644974589348, + "learning_rate": 3.071545450421259e-06, + "loss": 0.0019, + "step": 206880 + }, + { + "epoch": 1.3269281955405619, + "grad_norm": 0.03381352126598358, + "learning_rate": 3.0710290644706732e-06, + "loss": 0.0024, + "step": 206890 + }, + { + "epoch": 1.3269923324343478, + "grad_norm": 0.01893250085413456, + "learning_rate": 3.0705127026914726e-06, + "loss": 0.0009, + "step": 206900 + }, + { + "epoch": 1.327056469328134, + "grad_norm": 0.128096342086792, + "learning_rate": 3.069996365090126e-06, + "loss": 0.0012, + "step": 206910 + }, + { + "epoch": 1.32712060622192, + "grad_norm": 0.08662473410367966, + "learning_rate": 3.069480051673105e-06, + "loss": 0.0011, + "step": 206920 + }, + { + "epoch": 1.3271847431157062, + "grad_norm": 0.019852977246046066, + "learning_rate": 3.0689637624468783e-06, + "loss": 0.0012, + "step": 206930 + }, + { + "epoch": 1.3272488800094924, + "grad_norm": 0.1014653667807579, + "learning_rate": 3.0684474974179163e-06, + "loss": 0.0014, + "step": 206940 + }, + { + "epoch": 1.3273130169032783, + "grad_norm": 0.03182956203818321, + "learning_rate": 3.0679312565926865e-06, + "loss": 0.0014, + "step": 206950 + }, + { + "epoch": 1.3273771537970644, + "grad_norm": 0.2458362728357315, + "learning_rate": 3.0674150399776603e-06, + "loss": 0.002, + "step": 206960 + }, + { + "epoch": 1.3274412906908506, + "grad_norm": 0.09880047291517258, + "learning_rate": 3.066898847579303e-06, + "loss": 0.001, + "step": 206970 + }, + { + "epoch": 1.3275054275846365, + "grad_norm": 0.09074307978153229, + "learning_rate": 3.0663826794040863e-06, + "loss": 0.0008, + "step": 206980 + }, + { + "epoch": 1.3275695644784227, + "grad_norm": 0.020720677450299263, + "learning_rate": 3.065866535458476e-06, + "loss": 0.0017, + "step": 206990 + }, + { + "epoch": 1.3276337013722088, + "grad_norm": 0.07115289568901062, + "learning_rate": 3.0653504157489407e-06, + "loss": 0.002, + "step": 207000 + }, + { + "epoch": 1.327697838265995, + "grad_norm": 0.15864945948123932, + "learning_rate": 3.064834320281946e-06, + "loss": 0.0012, + "step": 207010 + }, + { + "epoch": 1.3277619751597811, + "grad_norm": 0.09046715497970581, + "learning_rate": 3.064318249063962e-06, + "loss": 0.0022, + "step": 207020 + }, + { + "epoch": 1.3278261120535673, + "grad_norm": 0.07268185168504715, + "learning_rate": 3.063802202101453e-06, + "loss": 0.0007, + "step": 207030 + }, + { + "epoch": 1.3278902489473532, + "grad_norm": 0.10096162557601929, + "learning_rate": 3.0632861794008858e-06, + "loss": 0.001, + "step": 207040 + }, + { + "epoch": 1.3279543858411393, + "grad_norm": 0.11104384809732437, + "learning_rate": 3.062770180968728e-06, + "loss": 0.0012, + "step": 207050 + }, + { + "epoch": 1.3280185227349255, + "grad_norm": 0.13525117933750153, + "learning_rate": 3.062254206811443e-06, + "loss": 0.0016, + "step": 207060 + }, + { + "epoch": 1.3280826596287114, + "grad_norm": 0.026424003764986992, + "learning_rate": 3.0617382569354993e-06, + "loss": 0.003, + "step": 207070 + }, + { + "epoch": 1.3281467965224976, + "grad_norm": 0.1317053735256195, + "learning_rate": 3.0612223313473587e-06, + "loss": 0.0011, + "step": 207080 + }, + { + "epoch": 1.3282109334162837, + "grad_norm": 0.17248418927192688, + "learning_rate": 3.0607064300534893e-06, + "loss": 0.0013, + "step": 207090 + }, + { + "epoch": 1.3282750703100699, + "grad_norm": 0.041471950709819794, + "learning_rate": 3.060190553060353e-06, + "loss": 0.0014, + "step": 207100 + }, + { + "epoch": 1.328339207203856, + "grad_norm": 0.008954065851867199, + "learning_rate": 3.059674700374417e-06, + "loss": 0.0009, + "step": 207110 + }, + { + "epoch": 1.328403344097642, + "grad_norm": 0.20160478353500366, + "learning_rate": 3.0591588720021427e-06, + "loss": 0.0016, + "step": 207120 + }, + { + "epoch": 1.328467480991428, + "grad_norm": 0.06734311580657959, + "learning_rate": 3.0586430679499956e-06, + "loss": 0.0016, + "step": 207130 + }, + { + "epoch": 1.3285316178852142, + "grad_norm": 0.15078191459178925, + "learning_rate": 3.0581272882244374e-06, + "loss": 0.0034, + "step": 207140 + }, + { + "epoch": 1.3285957547790002, + "grad_norm": 0.06340338289737701, + "learning_rate": 3.0576115328319334e-06, + "loss": 0.0015, + "step": 207150 + }, + { + "epoch": 1.3286598916727863, + "grad_norm": 0.028959598392248154, + "learning_rate": 3.057095801778943e-06, + "loss": 0.0007, + "step": 207160 + }, + { + "epoch": 1.3287240285665725, + "grad_norm": 0.1481342911720276, + "learning_rate": 3.0565800950719317e-06, + "loss": 0.0013, + "step": 207170 + }, + { + "epoch": 1.3287881654603586, + "grad_norm": 0.031880080699920654, + "learning_rate": 3.0560644127173614e-06, + "loss": 0.0013, + "step": 207180 + }, + { + "epoch": 1.3288523023541448, + "grad_norm": 0.08684264123439789, + "learning_rate": 3.0555487547216927e-06, + "loss": 0.0016, + "step": 207190 + }, + { + "epoch": 1.328916439247931, + "grad_norm": 0.2443089485168457, + "learning_rate": 3.0550331210913887e-06, + "loss": 0.0036, + "step": 207200 + }, + { + "epoch": 1.3289805761417168, + "grad_norm": 0.09374405443668365, + "learning_rate": 3.0545175118329085e-06, + "loss": 0.0012, + "step": 207210 + }, + { + "epoch": 1.329044713035503, + "grad_norm": 0.02969098649919033, + "learning_rate": 3.0540019269527155e-06, + "loss": 0.0008, + "step": 207220 + }, + { + "epoch": 1.3291088499292891, + "grad_norm": 0.12158779054880142, + "learning_rate": 3.0534863664572686e-06, + "loss": 0.0016, + "step": 207230 + }, + { + "epoch": 1.329172986823075, + "grad_norm": 0.013619618490338326, + "learning_rate": 3.052970830353029e-06, + "loss": 0.0007, + "step": 207240 + }, + { + "epoch": 1.3292371237168612, + "grad_norm": 0.12970755994319916, + "learning_rate": 3.0524553186464555e-06, + "loss": 0.0019, + "step": 207250 + }, + { + "epoch": 1.3293012606106474, + "grad_norm": 0.15070787072181702, + "learning_rate": 3.0519398313440094e-06, + "loss": 0.0009, + "step": 207260 + }, + { + "epoch": 1.3293653975044335, + "grad_norm": 0.31933197379112244, + "learning_rate": 3.0514243684521496e-06, + "loss": 0.0017, + "step": 207270 + }, + { + "epoch": 1.3294295343982196, + "grad_norm": 0.09706978499889374, + "learning_rate": 3.0509089299773354e-06, + "loss": 0.0015, + "step": 207280 + }, + { + "epoch": 1.3294936712920056, + "grad_norm": 0.021569713950157166, + "learning_rate": 3.0503935159260234e-06, + "loss": 0.0012, + "step": 207290 + }, + { + "epoch": 1.3295578081857917, + "grad_norm": 0.031222287565469742, + "learning_rate": 3.0498781263046763e-06, + "loss": 0.001, + "step": 207300 + }, + { + "epoch": 1.3296219450795779, + "grad_norm": 0.0353911817073822, + "learning_rate": 3.0493627611197483e-06, + "loss": 0.004, + "step": 207310 + }, + { + "epoch": 1.329686081973364, + "grad_norm": 0.2569740414619446, + "learning_rate": 3.0488474203776996e-06, + "loss": 0.003, + "step": 207320 + }, + { + "epoch": 1.32975021886715, + "grad_norm": 0.12398944050073624, + "learning_rate": 3.048332104084988e-06, + "loss": 0.0023, + "step": 207330 + }, + { + "epoch": 1.329814355760936, + "grad_norm": 0.07236245274543762, + "learning_rate": 3.047816812248069e-06, + "loss": 0.0013, + "step": 207340 + }, + { + "epoch": 1.3298784926547222, + "grad_norm": 0.1026138961315155, + "learning_rate": 3.047301544873401e-06, + "loss": 0.0018, + "step": 207350 + }, + { + "epoch": 1.3299426295485084, + "grad_norm": 0.0675225779414177, + "learning_rate": 3.04678630196744e-06, + "loss": 0.0011, + "step": 207360 + }, + { + "epoch": 1.3300067664422945, + "grad_norm": 0.1957700103521347, + "learning_rate": 3.0462710835366426e-06, + "loss": 0.0014, + "step": 207370 + }, + { + "epoch": 1.3300709033360805, + "grad_norm": 0.2093002051115036, + "learning_rate": 3.045755889587465e-06, + "loss": 0.0015, + "step": 207380 + }, + { + "epoch": 1.3301350402298666, + "grad_norm": 0.07760210335254669, + "learning_rate": 3.0452407201263633e-06, + "loss": 0.0014, + "step": 207390 + }, + { + "epoch": 1.3301991771236528, + "grad_norm": 0.3380934000015259, + "learning_rate": 3.044725575159791e-06, + "loss": 0.001, + "step": 207400 + }, + { + "epoch": 1.3302633140174387, + "grad_norm": 0.13043218851089478, + "learning_rate": 3.044210454694206e-06, + "loss": 0.0019, + "step": 207410 + }, + { + "epoch": 1.3303274509112248, + "grad_norm": 0.010576743632555008, + "learning_rate": 3.0436953587360607e-06, + "loss": 0.0019, + "step": 207420 + }, + { + "epoch": 1.330391587805011, + "grad_norm": 0.13296468555927277, + "learning_rate": 3.0431802872918114e-06, + "loss": 0.0079, + "step": 207430 + }, + { + "epoch": 1.3304557246987971, + "grad_norm": 0.06005353108048439, + "learning_rate": 3.0426652403679115e-06, + "loss": 0.0024, + "step": 207440 + }, + { + "epoch": 1.3305198615925833, + "grad_norm": 0.1177959218621254, + "learning_rate": 3.042150217970815e-06, + "loss": 0.0009, + "step": 207450 + }, + { + "epoch": 1.3305839984863694, + "grad_norm": 0.005490519572049379, + "learning_rate": 3.0416352201069753e-06, + "loss": 0.0015, + "step": 207460 + }, + { + "epoch": 1.3306481353801554, + "grad_norm": 0.42011868953704834, + "learning_rate": 3.041120246782846e-06, + "loss": 0.0025, + "step": 207470 + }, + { + "epoch": 1.3307122722739415, + "grad_norm": 0.2927258312702179, + "learning_rate": 3.0406052980048807e-06, + "loss": 0.0008, + "step": 207480 + }, + { + "epoch": 1.3307764091677277, + "grad_norm": 0.2572489380836487, + "learning_rate": 3.04009037377953e-06, + "loss": 0.0016, + "step": 207490 + }, + { + "epoch": 1.3308405460615136, + "grad_norm": 0.022028865292668343, + "learning_rate": 3.039575474113249e-06, + "loss": 0.0009, + "step": 207500 + }, + { + "epoch": 1.3309046829552997, + "grad_norm": 0.08524273335933685, + "learning_rate": 3.0390605990124875e-06, + "loss": 0.001, + "step": 207510 + }, + { + "epoch": 1.3309688198490859, + "grad_norm": 0.03125406801700592, + "learning_rate": 3.0385457484836987e-06, + "loss": 0.0008, + "step": 207520 + }, + { + "epoch": 1.331032956742872, + "grad_norm": 0.2823640704154968, + "learning_rate": 3.038030922533333e-06, + "loss": 0.0026, + "step": 207530 + }, + { + "epoch": 1.3310970936366582, + "grad_norm": 0.0846913754940033, + "learning_rate": 3.0375161211678426e-06, + "loss": 0.001, + "step": 207540 + }, + { + "epoch": 1.331161230530444, + "grad_norm": 0.13597342371940613, + "learning_rate": 3.0370013443936773e-06, + "loss": 0.0014, + "step": 207550 + }, + { + "epoch": 1.3312253674242303, + "grad_norm": 0.06595153361558914, + "learning_rate": 3.036486592217289e-06, + "loss": 0.0016, + "step": 207560 + }, + { + "epoch": 1.3312895043180164, + "grad_norm": 0.0930728167295456, + "learning_rate": 3.035971864645126e-06, + "loss": 0.0012, + "step": 207570 + }, + { + "epoch": 1.3313536412118026, + "grad_norm": 0.018666686490178108, + "learning_rate": 3.0354571616836398e-06, + "loss": 0.0013, + "step": 207580 + }, + { + "epoch": 1.3314177781055885, + "grad_norm": 0.07905947417020798, + "learning_rate": 3.0349424833392765e-06, + "loss": 0.0016, + "step": 207590 + }, + { + "epoch": 1.3314819149993746, + "grad_norm": 0.14699843525886536, + "learning_rate": 3.0344278296184913e-06, + "loss": 0.0014, + "step": 207600 + }, + { + "epoch": 1.3315460518931608, + "grad_norm": 0.12528088688850403, + "learning_rate": 3.0339132005277294e-06, + "loss": 0.0011, + "step": 207610 + }, + { + "epoch": 1.331610188786947, + "grad_norm": 0.09150472283363342, + "learning_rate": 3.033398596073441e-06, + "loss": 0.0011, + "step": 207620 + }, + { + "epoch": 1.331674325680733, + "grad_norm": 0.016758302226662636, + "learning_rate": 3.032884016262073e-06, + "loss": 0.0009, + "step": 207630 + }, + { + "epoch": 1.331738462574519, + "grad_norm": 0.10561700910329819, + "learning_rate": 3.0323694611000753e-06, + "loss": 0.001, + "step": 207640 + }, + { + "epoch": 1.3318025994683051, + "grad_norm": 0.06780321896076202, + "learning_rate": 3.031854930593893e-06, + "loss": 0.003, + "step": 207650 + }, + { + "epoch": 1.3318667363620913, + "grad_norm": 0.0012612667633220553, + "learning_rate": 3.0313404247499763e-06, + "loss": 0.0018, + "step": 207660 + }, + { + "epoch": 1.3319308732558772, + "grad_norm": 0.009958547540009022, + "learning_rate": 3.03082594357477e-06, + "loss": 0.0015, + "step": 207670 + }, + { + "epoch": 1.3319950101496634, + "grad_norm": 0.019118620082736015, + "learning_rate": 3.030311487074723e-06, + "loss": 0.0041, + "step": 207680 + }, + { + "epoch": 1.3320591470434495, + "grad_norm": 0.1367831826210022, + "learning_rate": 3.0297970552562793e-06, + "loss": 0.0021, + "step": 207690 + }, + { + "epoch": 1.3321232839372357, + "grad_norm": 0.14062421023845673, + "learning_rate": 3.0292826481258887e-06, + "loss": 0.0018, + "step": 207700 + }, + { + "epoch": 1.3321874208310218, + "grad_norm": 0.05483415350317955, + "learning_rate": 3.028768265689993e-06, + "loss": 0.0007, + "step": 207710 + }, + { + "epoch": 1.332251557724808, + "grad_norm": 0.09449375420808792, + "learning_rate": 3.028253907955041e-06, + "loss": 0.0006, + "step": 207720 + }, + { + "epoch": 1.332315694618594, + "grad_norm": 0.279594361782074, + "learning_rate": 3.0277395749274757e-06, + "loss": 0.0012, + "step": 207730 + }, + { + "epoch": 1.33237983151238, + "grad_norm": 0.12302122265100479, + "learning_rate": 3.027225266613743e-06, + "loss": 0.0016, + "step": 207740 + }, + { + "epoch": 1.3324439684061662, + "grad_norm": 0.11502698063850403, + "learning_rate": 3.026710983020289e-06, + "loss": 0.0016, + "step": 207750 + }, + { + "epoch": 1.3325081052999521, + "grad_norm": 0.030506573617458344, + "learning_rate": 3.026196724153555e-06, + "loss": 0.0012, + "step": 207760 + }, + { + "epoch": 1.3325722421937383, + "grad_norm": 0.07216215878725052, + "learning_rate": 3.0256824900199886e-06, + "loss": 0.0015, + "step": 207770 + }, + { + "epoch": 1.3326363790875244, + "grad_norm": 0.06424123793840408, + "learning_rate": 3.0251682806260298e-06, + "loss": 0.0011, + "step": 207780 + }, + { + "epoch": 1.3327005159813106, + "grad_norm": 0.05916834995150566, + "learning_rate": 3.0246540959781257e-06, + "loss": 0.0011, + "step": 207790 + }, + { + "epoch": 1.3327646528750967, + "grad_norm": 0.3115961253643036, + "learning_rate": 3.0241399360827157e-06, + "loss": 0.0021, + "step": 207800 + }, + { + "epoch": 1.3328287897688826, + "grad_norm": 0.05232136696577072, + "learning_rate": 3.023625800946246e-06, + "loss": 0.0013, + "step": 207810 + }, + { + "epoch": 1.3328929266626688, + "grad_norm": 0.15427418053150177, + "learning_rate": 3.0231116905751554e-06, + "loss": 0.0013, + "step": 207820 + }, + { + "epoch": 1.332957063556455, + "grad_norm": 0.07504183799028397, + "learning_rate": 3.022597604975891e-06, + "loss": 0.0011, + "step": 207830 + }, + { + "epoch": 1.3330212004502409, + "grad_norm": 0.02206926792860031, + "learning_rate": 3.0220835441548896e-06, + "loss": 0.0006, + "step": 207840 + }, + { + "epoch": 1.333085337344027, + "grad_norm": 0.04043566435575485, + "learning_rate": 3.021569508118596e-06, + "loss": 0.0011, + "step": 207850 + }, + { + "epoch": 1.3331494742378132, + "grad_norm": 0.06484486162662506, + "learning_rate": 3.0210554968734495e-06, + "loss": 0.0014, + "step": 207860 + }, + { + "epoch": 1.3332136111315993, + "grad_norm": 0.05188364535570145, + "learning_rate": 3.0205415104258934e-06, + "loss": 0.0033, + "step": 207870 + }, + { + "epoch": 1.3332777480253855, + "grad_norm": 0.1079925149679184, + "learning_rate": 3.020027548782366e-06, + "loss": 0.0011, + "step": 207880 + }, + { + "epoch": 1.3333418849191716, + "grad_norm": 0.1064772754907608, + "learning_rate": 3.019513611949308e-06, + "loss": 0.0007, + "step": 207890 + }, + { + "epoch": 1.3334060218129575, + "grad_norm": 0.09681066870689392, + "learning_rate": 3.0189996999331606e-06, + "loss": 0.0009, + "step": 207900 + }, + { + "epoch": 1.3334701587067437, + "grad_norm": 0.06284935772418976, + "learning_rate": 3.018485812740363e-06, + "loss": 0.0012, + "step": 207910 + }, + { + "epoch": 1.3335342956005298, + "grad_norm": 0.05767025426030159, + "learning_rate": 3.0179719503773543e-06, + "loss": 0.0015, + "step": 207920 + }, + { + "epoch": 1.3335984324943158, + "grad_norm": 0.09643470495939255, + "learning_rate": 3.017458112850572e-06, + "loss": 0.0024, + "step": 207930 + }, + { + "epoch": 1.333662569388102, + "grad_norm": 0.13457752764225006, + "learning_rate": 3.0169443001664585e-06, + "loss": 0.0024, + "step": 207940 + }, + { + "epoch": 1.333726706281888, + "grad_norm": 0.06010362133383751, + "learning_rate": 3.0164305123314484e-06, + "loss": 0.001, + "step": 207950 + }, + { + "epoch": 1.3337908431756742, + "grad_norm": 0.04925031214952469, + "learning_rate": 3.0159167493519836e-06, + "loss": 0.0008, + "step": 207960 + }, + { + "epoch": 1.3338549800694603, + "grad_norm": 0.08997722715139389, + "learning_rate": 3.0154030112344985e-06, + "loss": 0.0011, + "step": 207970 + }, + { + "epoch": 1.3339191169632463, + "grad_norm": 0.13654863834381104, + "learning_rate": 3.014889297985433e-06, + "loss": 0.0018, + "step": 207980 + }, + { + "epoch": 1.3339832538570324, + "grad_norm": 0.18833725154399872, + "learning_rate": 3.014375609611222e-06, + "loss": 0.0014, + "step": 207990 + }, + { + "epoch": 1.3340473907508186, + "grad_norm": 0.10654346644878387, + "learning_rate": 3.013861946118305e-06, + "loss": 0.0012, + "step": 208000 + }, + { + "epoch": 1.3341115276446047, + "grad_norm": 0.046681273728609085, + "learning_rate": 3.0133483075131165e-06, + "loss": 0.0031, + "step": 208010 + }, + { + "epoch": 1.3341756645383906, + "grad_norm": 0.23288017511367798, + "learning_rate": 3.0128346938020938e-06, + "loss": 0.0012, + "step": 208020 + }, + { + "epoch": 1.3342398014321768, + "grad_norm": 0.039609394967556, + "learning_rate": 3.012321104991673e-06, + "loss": 0.0016, + "step": 208030 + }, + { + "epoch": 1.334303938325963, + "grad_norm": 0.14077578485012054, + "learning_rate": 3.0118075410882886e-06, + "loss": 0.002, + "step": 208040 + }, + { + "epoch": 1.334368075219749, + "grad_norm": 0.0437595397233963, + "learning_rate": 3.0112940020983773e-06, + "loss": 0.001, + "step": 208050 + }, + { + "epoch": 1.3344322121135352, + "grad_norm": 0.21762453019618988, + "learning_rate": 3.0107804880283726e-06, + "loss": 0.0011, + "step": 208060 + }, + { + "epoch": 1.3344963490073212, + "grad_norm": 0.21231357753276825, + "learning_rate": 3.0102669988847112e-06, + "loss": 0.0015, + "step": 208070 + }, + { + "epoch": 1.3345604859011073, + "grad_norm": 0.18998970091342926, + "learning_rate": 3.009753534673825e-06, + "loss": 0.0013, + "step": 208080 + }, + { + "epoch": 1.3346246227948935, + "grad_norm": 0.004365130327641964, + "learning_rate": 3.0092400954021507e-06, + "loss": 0.0004, + "step": 208090 + }, + { + "epoch": 1.3346887596886794, + "grad_norm": 0.011583004146814346, + "learning_rate": 3.00872668107612e-06, + "loss": 0.0018, + "step": 208100 + }, + { + "epoch": 1.3347528965824655, + "grad_norm": 0.13997438549995422, + "learning_rate": 3.0082132917021677e-06, + "loss": 0.0022, + "step": 208110 + }, + { + "epoch": 1.3348170334762517, + "grad_norm": 0.08748896420001984, + "learning_rate": 3.0076999272867256e-06, + "loss": 0.0011, + "step": 208120 + }, + { + "epoch": 1.3348811703700378, + "grad_norm": 0.1436755210161209, + "learning_rate": 3.007186587836228e-06, + "loss": 0.0018, + "step": 208130 + }, + { + "epoch": 1.334945307263824, + "grad_norm": 0.05570292845368385, + "learning_rate": 3.0066732733571064e-06, + "loss": 0.0016, + "step": 208140 + }, + { + "epoch": 1.3350094441576101, + "grad_norm": 0.13503798842430115, + "learning_rate": 3.006159983855794e-06, + "loss": 0.0019, + "step": 208150 + }, + { + "epoch": 1.335073581051396, + "grad_norm": 0.033364132046699524, + "learning_rate": 3.0056467193387207e-06, + "loss": 0.001, + "step": 208160 + }, + { + "epoch": 1.3351377179451822, + "grad_norm": 0.03804779797792435, + "learning_rate": 3.0051334798123192e-06, + "loss": 0.002, + "step": 208170 + }, + { + "epoch": 1.3352018548389684, + "grad_norm": 0.07062738388776779, + "learning_rate": 3.0046202652830226e-06, + "loss": 0.0014, + "step": 208180 + }, + { + "epoch": 1.3352659917327543, + "grad_norm": 0.046227481216192245, + "learning_rate": 3.004107075757259e-06, + "loss": 0.0011, + "step": 208190 + }, + { + "epoch": 1.3353301286265404, + "grad_norm": 0.0878714993596077, + "learning_rate": 3.0035939112414603e-06, + "loss": 0.001, + "step": 208200 + }, + { + "epoch": 1.3353942655203266, + "grad_norm": 0.09363577514886856, + "learning_rate": 3.0030807717420562e-06, + "loss": 0.0015, + "step": 208210 + }, + { + "epoch": 1.3354584024141127, + "grad_norm": 0.18899093568325043, + "learning_rate": 3.002567657265479e-06, + "loss": 0.0019, + "step": 208220 + }, + { + "epoch": 1.3355225393078989, + "grad_norm": 0.00698311859741807, + "learning_rate": 3.0020545678181547e-06, + "loss": 0.0012, + "step": 208230 + }, + { + "epoch": 1.3355866762016848, + "grad_norm": 0.1096363216638565, + "learning_rate": 3.0015415034065155e-06, + "loss": 0.0022, + "step": 208240 + }, + { + "epoch": 1.335650813095471, + "grad_norm": 0.10652846097946167, + "learning_rate": 3.001028464036989e-06, + "loss": 0.0024, + "step": 208250 + }, + { + "epoch": 1.335714949989257, + "grad_norm": 0.05483076721429825, + "learning_rate": 3.0005154497160054e-06, + "loss": 0.0016, + "step": 208260 + }, + { + "epoch": 1.335779086883043, + "grad_norm": 0.08351773768663406, + "learning_rate": 3.000002460449991e-06, + "loss": 0.0015, + "step": 208270 + }, + { + "epoch": 1.3358432237768292, + "grad_norm": 0.059546101838350296, + "learning_rate": 2.999489496245377e-06, + "loss": 0.0011, + "step": 208280 + }, + { + "epoch": 1.3359073606706153, + "grad_norm": 0.1459716260433197, + "learning_rate": 2.998976557108587e-06, + "loss": 0.001, + "step": 208290 + }, + { + "epoch": 1.3359714975644015, + "grad_norm": 0.48013800382614136, + "learning_rate": 2.998463643046053e-06, + "loss": 0.0024, + "step": 208300 + }, + { + "epoch": 1.3360356344581876, + "grad_norm": 0.06204058229923248, + "learning_rate": 2.9979507540641985e-06, + "loss": 0.0022, + "step": 208310 + }, + { + "epoch": 1.3360997713519738, + "grad_norm": 0.0903536006808281, + "learning_rate": 2.997437890169452e-06, + "loss": 0.0009, + "step": 208320 + }, + { + "epoch": 1.3361639082457597, + "grad_norm": 0.04781496152281761, + "learning_rate": 2.9969250513682408e-06, + "loss": 0.0015, + "step": 208330 + }, + { + "epoch": 1.3362280451395459, + "grad_norm": 0.15592113137245178, + "learning_rate": 2.996412237666989e-06, + "loss": 0.0008, + "step": 208340 + }, + { + "epoch": 1.336292182033332, + "grad_norm": 0.3037644624710083, + "learning_rate": 2.9958994490721257e-06, + "loss": 0.0019, + "step": 208350 + }, + { + "epoch": 1.336356318927118, + "grad_norm": 0.019961733371019363, + "learning_rate": 2.9953866855900716e-06, + "loss": 0.0014, + "step": 208360 + }, + { + "epoch": 1.336420455820904, + "grad_norm": 0.1020020991563797, + "learning_rate": 2.994873947227257e-06, + "loss": 0.0011, + "step": 208370 + }, + { + "epoch": 1.3364845927146902, + "grad_norm": 0.10029342025518417, + "learning_rate": 2.9943612339901052e-06, + "loss": 0.0013, + "step": 208380 + }, + { + "epoch": 1.3365487296084764, + "grad_norm": 0.04823361337184906, + "learning_rate": 2.9938485458850396e-06, + "loss": 0.0016, + "step": 208390 + }, + { + "epoch": 1.3366128665022625, + "grad_norm": 0.053015630692243576, + "learning_rate": 2.9933358829184867e-06, + "loss": 0.0019, + "step": 208400 + }, + { + "epoch": 1.3366770033960484, + "grad_norm": 0.0986398383975029, + "learning_rate": 2.992823245096868e-06, + "loss": 0.0016, + "step": 208410 + }, + { + "epoch": 1.3367411402898346, + "grad_norm": 0.10989446192979813, + "learning_rate": 2.99231063242661e-06, + "loss": 0.0018, + "step": 208420 + }, + { + "epoch": 1.3368052771836207, + "grad_norm": 0.03991841524839401, + "learning_rate": 2.9917980449141336e-06, + "loss": 0.001, + "step": 208430 + }, + { + "epoch": 1.336869414077407, + "grad_norm": 0.08873841166496277, + "learning_rate": 2.991285482565862e-06, + "loss": 0.0012, + "step": 208440 + }, + { + "epoch": 1.3369335509711928, + "grad_norm": 0.24083659052848816, + "learning_rate": 2.9907729453882213e-06, + "loss": 0.0016, + "step": 208450 + }, + { + "epoch": 1.336997687864979, + "grad_norm": 0.13094668090343475, + "learning_rate": 2.9902604333876293e-06, + "loss": 0.0016, + "step": 208460 + }, + { + "epoch": 1.3370618247587651, + "grad_norm": 0.02347368746995926, + "learning_rate": 2.9897479465705127e-06, + "loss": 0.0009, + "step": 208470 + }, + { + "epoch": 1.3371259616525513, + "grad_norm": 0.20845891535282135, + "learning_rate": 2.989235484943289e-06, + "loss": 0.0024, + "step": 208480 + }, + { + "epoch": 1.3371900985463374, + "grad_norm": 0.0839250236749649, + "learning_rate": 2.9887230485123834e-06, + "loss": 0.0011, + "step": 208490 + }, + { + "epoch": 1.3372542354401233, + "grad_norm": 0.03127298131585121, + "learning_rate": 2.9882106372842147e-06, + "loss": 0.0015, + "step": 208500 + }, + { + "epoch": 1.3373183723339095, + "grad_norm": 0.051121849566698074, + "learning_rate": 2.9876982512652042e-06, + "loss": 0.0017, + "step": 208510 + }, + { + "epoch": 1.3373825092276956, + "grad_norm": 0.09151150286197662, + "learning_rate": 2.987185890461773e-06, + "loss": 0.0012, + "step": 208520 + }, + { + "epoch": 1.3374466461214816, + "grad_norm": 0.039352428168058395, + "learning_rate": 2.986673554880342e-06, + "loss": 0.0022, + "step": 208530 + }, + { + "epoch": 1.3375107830152677, + "grad_norm": 0.056529849767684937, + "learning_rate": 2.9861612445273287e-06, + "loss": 0.0009, + "step": 208540 + }, + { + "epoch": 1.3375749199090539, + "grad_norm": 0.02233145758509636, + "learning_rate": 2.9856489594091555e-06, + "loss": 0.0016, + "step": 208550 + }, + { + "epoch": 1.33763905680284, + "grad_norm": 0.1725313365459442, + "learning_rate": 2.9851366995322393e-06, + "loss": 0.0015, + "step": 208560 + }, + { + "epoch": 1.3377031936966262, + "grad_norm": 0.004792868159711361, + "learning_rate": 2.984624464903002e-06, + "loss": 0.0013, + "step": 208570 + }, + { + "epoch": 1.3377673305904123, + "grad_norm": 0.045336589217185974, + "learning_rate": 2.984112255527859e-06, + "loss": 0.0008, + "step": 208580 + }, + { + "epoch": 1.3378314674841982, + "grad_norm": 0.08572478592395782, + "learning_rate": 2.9836000714132297e-06, + "loss": 0.0009, + "step": 208590 + }, + { + "epoch": 1.3378956043779844, + "grad_norm": 0.041878607124090195, + "learning_rate": 2.9830879125655343e-06, + "loss": 0.0008, + "step": 208600 + }, + { + "epoch": 1.3379597412717705, + "grad_norm": 0.03267974033951759, + "learning_rate": 2.9825757789911874e-06, + "loss": 0.0011, + "step": 208610 + }, + { + "epoch": 1.3380238781655565, + "grad_norm": 0.006213339976966381, + "learning_rate": 2.9820636706966087e-06, + "loss": 0.0017, + "step": 208620 + }, + { + "epoch": 1.3380880150593426, + "grad_norm": 0.09869519621133804, + "learning_rate": 2.981551587688214e-06, + "loss": 0.001, + "step": 208630 + }, + { + "epoch": 1.3381521519531288, + "grad_norm": 0.06236009672284126, + "learning_rate": 2.9810395299724214e-06, + "loss": 0.0012, + "step": 208640 + }, + { + "epoch": 1.338216288846915, + "grad_norm": 0.10517404228448868, + "learning_rate": 2.9805274975556453e-06, + "loss": 0.0013, + "step": 208650 + }, + { + "epoch": 1.338280425740701, + "grad_norm": 0.014546317979693413, + "learning_rate": 2.9800154904443034e-06, + "loss": 0.0009, + "step": 208660 + }, + { + "epoch": 1.338344562634487, + "grad_norm": 0.03995361179113388, + "learning_rate": 2.979503508644811e-06, + "loss": 0.0008, + "step": 208670 + }, + { + "epoch": 1.3384086995282731, + "grad_norm": 0.07176606357097626, + "learning_rate": 2.9789915521635837e-06, + "loss": 0.0007, + "step": 208680 + }, + { + "epoch": 1.3384728364220593, + "grad_norm": 0.03574638441205025, + "learning_rate": 2.9784796210070368e-06, + "loss": 0.0016, + "step": 208690 + }, + { + "epoch": 1.3385369733158452, + "grad_norm": 0.057587962597608566, + "learning_rate": 2.977967715181585e-06, + "loss": 0.0008, + "step": 208700 + }, + { + "epoch": 1.3386011102096314, + "grad_norm": 0.11445263028144836, + "learning_rate": 2.977455834693642e-06, + "loss": 0.0013, + "step": 208710 + }, + { + "epoch": 1.3386652471034175, + "grad_norm": 0.023943578824400902, + "learning_rate": 2.9769439795496247e-06, + "loss": 0.002, + "step": 208720 + }, + { + "epoch": 1.3387293839972036, + "grad_norm": 0.2436387985944748, + "learning_rate": 2.9764321497559435e-06, + "loss": 0.0028, + "step": 208730 + }, + { + "epoch": 1.3387935208909898, + "grad_norm": 0.033301565796136856, + "learning_rate": 2.975920345319013e-06, + "loss": 0.001, + "step": 208740 + }, + { + "epoch": 1.338857657784776, + "grad_norm": 0.09697895497083664, + "learning_rate": 2.9754085662452494e-06, + "loss": 0.002, + "step": 208750 + }, + { + "epoch": 1.3389217946785619, + "grad_norm": 0.04288691282272339, + "learning_rate": 2.9748968125410617e-06, + "loss": 0.0029, + "step": 208760 + }, + { + "epoch": 1.338985931572348, + "grad_norm": 0.06362093240022659, + "learning_rate": 2.9743850842128657e-06, + "loss": 0.002, + "step": 208770 + }, + { + "epoch": 1.3390500684661342, + "grad_norm": 0.04038805514574051, + "learning_rate": 2.973873381267071e-06, + "loss": 0.0011, + "step": 208780 + }, + { + "epoch": 1.33911420535992, + "grad_norm": 0.020341023802757263, + "learning_rate": 2.973361703710092e-06, + "loss": 0.0009, + "step": 208790 + }, + { + "epoch": 1.3391783422537062, + "grad_norm": 0.03332603722810745, + "learning_rate": 2.9728500515483383e-06, + "loss": 0.0011, + "step": 208800 + }, + { + "epoch": 1.3392424791474924, + "grad_norm": 0.06648506224155426, + "learning_rate": 2.972338424788223e-06, + "loss": 0.0024, + "step": 208810 + }, + { + "epoch": 1.3393066160412785, + "grad_norm": 0.008569886907935143, + "learning_rate": 2.971826823436156e-06, + "loss": 0.0014, + "step": 208820 + }, + { + "epoch": 1.3393707529350647, + "grad_norm": 0.15676802396774292, + "learning_rate": 2.9713152474985485e-06, + "loss": 0.0012, + "step": 208830 + }, + { + "epoch": 1.3394348898288506, + "grad_norm": 0.0957457646727562, + "learning_rate": 2.9708036969818106e-06, + "loss": 0.0013, + "step": 208840 + }, + { + "epoch": 1.3394990267226368, + "grad_norm": 0.016529573127627373, + "learning_rate": 2.970292171892353e-06, + "loss": 0.0012, + "step": 208850 + }, + { + "epoch": 1.339563163616423, + "grad_norm": 0.11083996295928955, + "learning_rate": 2.9697806722365845e-06, + "loss": 0.001, + "step": 208860 + }, + { + "epoch": 1.339627300510209, + "grad_norm": 0.14161944389343262, + "learning_rate": 2.9692691980209153e-06, + "loss": 0.0116, + "step": 208870 + }, + { + "epoch": 1.339691437403995, + "grad_norm": 0.07772056758403778, + "learning_rate": 2.968757749251755e-06, + "loss": 0.0008, + "step": 208880 + }, + { + "epoch": 1.3397555742977811, + "grad_norm": 0.053042441606521606, + "learning_rate": 2.968246325935511e-06, + "loss": 0.0013, + "step": 208890 + }, + { + "epoch": 1.3398197111915673, + "grad_norm": 0.027649134397506714, + "learning_rate": 2.9677349280785937e-06, + "loss": 0.0013, + "step": 208900 + }, + { + "epoch": 1.3398838480853534, + "grad_norm": 0.029094886034727097, + "learning_rate": 2.9672235556874085e-06, + "loss": 0.0012, + "step": 208910 + }, + { + "epoch": 1.3399479849791396, + "grad_norm": 0.0654555931687355, + "learning_rate": 2.966712208768367e-06, + "loss": 0.0014, + "step": 208920 + }, + { + "epoch": 1.3400121218729255, + "grad_norm": 0.07652648538351059, + "learning_rate": 2.966200887327873e-06, + "loss": 0.0012, + "step": 208930 + }, + { + "epoch": 1.3400762587667117, + "grad_norm": 0.0004798894515261054, + "learning_rate": 2.9656895913723365e-06, + "loss": 0.0012, + "step": 208940 + }, + { + "epoch": 1.3401403956604978, + "grad_norm": 0.020167993381619453, + "learning_rate": 2.965178320908163e-06, + "loss": 0.0015, + "step": 208950 + }, + { + "epoch": 1.3402045325542837, + "grad_norm": 0.049159470945596695, + "learning_rate": 2.9646670759417595e-06, + "loss": 0.0014, + "step": 208960 + }, + { + "epoch": 1.3402686694480699, + "grad_norm": 0.23737210035324097, + "learning_rate": 2.9641558564795315e-06, + "loss": 0.0018, + "step": 208970 + }, + { + "epoch": 1.340332806341856, + "grad_norm": 0.11929892003536224, + "learning_rate": 2.963644662527887e-06, + "loss": 0.0025, + "step": 208980 + }, + { + "epoch": 1.3403969432356422, + "grad_norm": 0.09051161259412766, + "learning_rate": 2.963133494093229e-06, + "loss": 0.0017, + "step": 208990 + }, + { + "epoch": 1.3404610801294283, + "grad_norm": 0.060351449996232986, + "learning_rate": 2.962622351181964e-06, + "loss": 0.0019, + "step": 209000 + }, + { + "epoch": 1.3405252170232145, + "grad_norm": 0.05083949491381645, + "learning_rate": 2.9621112338004978e-06, + "loss": 0.0019, + "step": 209010 + }, + { + "epoch": 1.3405893539170004, + "grad_norm": 0.11279977858066559, + "learning_rate": 2.961600141955233e-06, + "loss": 0.0012, + "step": 209020 + }, + { + "epoch": 1.3406534908107866, + "grad_norm": 0.06853433698415756, + "learning_rate": 2.961089075652577e-06, + "loss": 0.0007, + "step": 209030 + }, + { + "epoch": 1.3407176277045727, + "grad_norm": 0.08879048377275467, + "learning_rate": 2.9605780348989305e-06, + "loss": 0.0018, + "step": 209040 + }, + { + "epoch": 1.3407817645983586, + "grad_norm": 0.06870577484369278, + "learning_rate": 2.9600670197006997e-06, + "loss": 0.001, + "step": 209050 + }, + { + "epoch": 1.3408459014921448, + "grad_norm": 0.13088449835777283, + "learning_rate": 2.959556030064287e-06, + "loss": 0.0013, + "step": 209060 + }, + { + "epoch": 1.340910038385931, + "grad_norm": 0.08416897058486938, + "learning_rate": 2.9590450659960958e-06, + "loss": 0.0057, + "step": 209070 + }, + { + "epoch": 1.340974175279717, + "grad_norm": 0.05533396080136299, + "learning_rate": 2.9585341275025277e-06, + "loss": 0.0021, + "step": 209080 + }, + { + "epoch": 1.3410383121735032, + "grad_norm": 0.019153635948896408, + "learning_rate": 2.958023214589987e-06, + "loss": 0.0026, + "step": 209090 + }, + { + "epoch": 1.3411024490672891, + "grad_norm": 0.19014737010002136, + "learning_rate": 2.957512327264873e-06, + "loss": 0.0015, + "step": 209100 + }, + { + "epoch": 1.3411665859610753, + "grad_norm": 0.22254988551139832, + "learning_rate": 2.9570014655335917e-06, + "loss": 0.0013, + "step": 209110 + }, + { + "epoch": 1.3412307228548614, + "grad_norm": 0.03758307546377182, + "learning_rate": 2.95649062940254e-06, + "loss": 0.0011, + "step": 209120 + }, + { + "epoch": 1.3412948597486476, + "grad_norm": 0.0588693767786026, + "learning_rate": 2.9559798188781208e-06, + "loss": 0.0017, + "step": 209130 + }, + { + "epoch": 1.3413589966424335, + "grad_norm": 0.05812332406640053, + "learning_rate": 2.955469033966737e-06, + "loss": 0.0011, + "step": 209140 + }, + { + "epoch": 1.3414231335362197, + "grad_norm": 0.09899233281612396, + "learning_rate": 2.9549582746747872e-06, + "loss": 0.0015, + "step": 209150 + }, + { + "epoch": 1.3414872704300058, + "grad_norm": 0.021945517510175705, + "learning_rate": 2.9544475410086715e-06, + "loss": 0.0006, + "step": 209160 + }, + { + "epoch": 1.341551407323792, + "grad_norm": 0.007202756125479937, + "learning_rate": 2.95393683297479e-06, + "loss": 0.0024, + "step": 209170 + }, + { + "epoch": 1.3416155442175781, + "grad_norm": 0.007555840536952019, + "learning_rate": 2.9534261505795426e-06, + "loss": 0.0006, + "step": 209180 + }, + { + "epoch": 1.341679681111364, + "grad_norm": 0.24433951079845428, + "learning_rate": 2.952915493829328e-06, + "loss": 0.003, + "step": 209190 + }, + { + "epoch": 1.3417438180051502, + "grad_norm": 0.04254264757037163, + "learning_rate": 2.9524048627305455e-06, + "loss": 0.0013, + "step": 209200 + }, + { + "epoch": 1.3418079548989363, + "grad_norm": 0.2464970499277115, + "learning_rate": 2.9518942572895937e-06, + "loss": 0.0035, + "step": 209210 + }, + { + "epoch": 1.3418720917927223, + "grad_norm": 0.1973329335451126, + "learning_rate": 2.95138367751287e-06, + "loss": 0.0022, + "step": 209220 + }, + { + "epoch": 1.3419362286865084, + "grad_norm": 0.06293509900569916, + "learning_rate": 2.950873123406773e-06, + "loss": 0.0011, + "step": 209230 + }, + { + "epoch": 1.3420003655802946, + "grad_norm": 0.03426901251077652, + "learning_rate": 2.9503625949777003e-06, + "loss": 0.0023, + "step": 209240 + }, + { + "epoch": 1.3420645024740807, + "grad_norm": 0.22345155477523804, + "learning_rate": 2.94985209223205e-06, + "loss": 0.0006, + "step": 209250 + }, + { + "epoch": 1.3421286393678669, + "grad_norm": 0.011404551565647125, + "learning_rate": 2.9493416151762173e-06, + "loss": 0.0012, + "step": 209260 + }, + { + "epoch": 1.342192776261653, + "grad_norm": 0.06039094179868698, + "learning_rate": 2.9488311638166e-06, + "loss": 0.0012, + "step": 209270 + }, + { + "epoch": 1.342256913155439, + "grad_norm": 0.024154851213097572, + "learning_rate": 2.948320738159594e-06, + "loss": 0.0017, + "step": 209280 + }, + { + "epoch": 1.342321050049225, + "grad_norm": 0.13007420301437378, + "learning_rate": 2.947810338211595e-06, + "loss": 0.0019, + "step": 209290 + }, + { + "epoch": 1.3423851869430112, + "grad_norm": 0.1291894167661667, + "learning_rate": 2.947299963979e-06, + "loss": 0.0016, + "step": 209300 + }, + { + "epoch": 1.3424493238367972, + "grad_norm": 0.024135563522577286, + "learning_rate": 2.946789615468203e-06, + "loss": 0.0012, + "step": 209310 + }, + { + "epoch": 1.3425134607305833, + "grad_norm": 0.07220418006181717, + "learning_rate": 2.9462792926856002e-06, + "loss": 0.0017, + "step": 209320 + }, + { + "epoch": 1.3425775976243695, + "grad_norm": 0.08306324481964111, + "learning_rate": 2.9457689956375847e-06, + "loss": 0.0009, + "step": 209330 + }, + { + "epoch": 1.3426417345181556, + "grad_norm": 0.15459685027599335, + "learning_rate": 2.945258724330553e-06, + "loss": 0.002, + "step": 209340 + }, + { + "epoch": 1.3427058714119418, + "grad_norm": 0.06739833205938339, + "learning_rate": 2.944748478770897e-06, + "loss": 0.0017, + "step": 209350 + }, + { + "epoch": 1.3427700083057277, + "grad_norm": 0.03368918597698212, + "learning_rate": 2.944238258965012e-06, + "loss": 0.0012, + "step": 209360 + }, + { + "epoch": 1.3428341451995138, + "grad_norm": 0.1809409111738205, + "learning_rate": 2.94372806491929e-06, + "loss": 0.002, + "step": 209370 + }, + { + "epoch": 1.3428982820933, + "grad_norm": 0.10510099679231644, + "learning_rate": 2.943217896640126e-06, + "loss": 0.0014, + "step": 209380 + }, + { + "epoch": 1.342962418987086, + "grad_norm": 0.1288333386182785, + "learning_rate": 2.942707754133911e-06, + "loss": 0.0043, + "step": 209390 + }, + { + "epoch": 1.343026555880872, + "grad_norm": 0.1137746199965477, + "learning_rate": 2.9421976374070393e-06, + "loss": 0.0015, + "step": 209400 + }, + { + "epoch": 1.3430906927746582, + "grad_norm": 0.009048394858837128, + "learning_rate": 2.9416875464659e-06, + "loss": 0.0012, + "step": 209410 + }, + { + "epoch": 1.3431548296684443, + "grad_norm": 0.1014087125658989, + "learning_rate": 2.941177481316888e-06, + "loss": 0.0009, + "step": 209420 + }, + { + "epoch": 1.3432189665622305, + "grad_norm": 0.07595310360193253, + "learning_rate": 2.9406674419663935e-06, + "loss": 0.0014, + "step": 209430 + }, + { + "epoch": 1.3432831034560166, + "grad_norm": 0.164398655295372, + "learning_rate": 2.9401574284208068e-06, + "loss": 0.0041, + "step": 209440 + }, + { + "epoch": 1.3433472403498026, + "grad_norm": 0.10600937157869339, + "learning_rate": 2.939647440686521e-06, + "loss": 0.0013, + "step": 209450 + }, + { + "epoch": 1.3434113772435887, + "grad_norm": 0.23413997888565063, + "learning_rate": 2.9391374787699243e-06, + "loss": 0.0017, + "step": 209460 + }, + { + "epoch": 1.3434755141373749, + "grad_norm": 0.04459110274910927, + "learning_rate": 2.938627542677409e-06, + "loss": 0.0013, + "step": 209470 + }, + { + "epoch": 1.3435396510311608, + "grad_norm": 0.08799204230308533, + "learning_rate": 2.938117632415363e-06, + "loss": 0.0012, + "step": 209480 + }, + { + "epoch": 1.343603787924947, + "grad_norm": 0.09129440039396286, + "learning_rate": 2.9376077479901767e-06, + "loss": 0.0017, + "step": 209490 + }, + { + "epoch": 1.343667924818733, + "grad_norm": 0.04995739459991455, + "learning_rate": 2.937097889408239e-06, + "loss": 0.0012, + "step": 209500 + }, + { + "epoch": 1.3437320617125192, + "grad_norm": 0.07876341789960861, + "learning_rate": 2.93658805667594e-06, + "loss": 0.0034, + "step": 209510 + }, + { + "epoch": 1.3437961986063054, + "grad_norm": 0.06519567221403122, + "learning_rate": 2.9360782497996664e-06, + "loss": 0.0013, + "step": 209520 + }, + { + "epoch": 1.3438603355000913, + "grad_norm": 0.07680631428956985, + "learning_rate": 2.935568468785809e-06, + "loss": 0.0018, + "step": 209530 + }, + { + "epoch": 1.3439244723938775, + "grad_norm": 0.1277865618467331, + "learning_rate": 2.9350587136407517e-06, + "loss": 0.0009, + "step": 209540 + }, + { + "epoch": 1.3439886092876636, + "grad_norm": 0.04611477628350258, + "learning_rate": 2.9345489843708863e-06, + "loss": 0.0009, + "step": 209550 + }, + { + "epoch": 1.3440527461814498, + "grad_norm": 0.1415407657623291, + "learning_rate": 2.9340392809825976e-06, + "loss": 0.0013, + "step": 209560 + }, + { + "epoch": 1.3441168830752357, + "grad_norm": 0.14277635514736176, + "learning_rate": 2.9335296034822737e-06, + "loss": 0.0012, + "step": 209570 + }, + { + "epoch": 1.3441810199690218, + "grad_norm": 0.1470096856355667, + "learning_rate": 2.9330199518763002e-06, + "loss": 0.001, + "step": 209580 + }, + { + "epoch": 1.344245156862808, + "grad_norm": 0.06251547485589981, + "learning_rate": 2.932510326171063e-06, + "loss": 0.0011, + "step": 209590 + }, + { + "epoch": 1.3443092937565941, + "grad_norm": 0.14789541065692902, + "learning_rate": 2.932000726372951e-06, + "loss": 0.0018, + "step": 209600 + }, + { + "epoch": 1.3443734306503803, + "grad_norm": 0.03225419297814369, + "learning_rate": 2.9314911524883466e-06, + "loss": 0.0011, + "step": 209610 + }, + { + "epoch": 1.3444375675441662, + "grad_norm": 0.11897369474172592, + "learning_rate": 2.930981604523637e-06, + "loss": 0.0019, + "step": 209620 + }, + { + "epoch": 1.3445017044379524, + "grad_norm": 0.062377601861953735, + "learning_rate": 2.930472082485206e-06, + "loss": 0.0013, + "step": 209630 + }, + { + "epoch": 1.3445658413317385, + "grad_norm": 0.10768963396549225, + "learning_rate": 2.92996258637944e-06, + "loss": 0.002, + "step": 209640 + }, + { + "epoch": 1.3446299782255244, + "grad_norm": 0.13047385215759277, + "learning_rate": 2.9294531162127216e-06, + "loss": 0.0014, + "step": 209650 + }, + { + "epoch": 1.3446941151193106, + "grad_norm": 0.03815917298197746, + "learning_rate": 2.9289436719914353e-06, + "loss": 0.0013, + "step": 209660 + }, + { + "epoch": 1.3447582520130967, + "grad_norm": 0.392093688249588, + "learning_rate": 2.928434253721965e-06, + "loss": 0.0023, + "step": 209670 + }, + { + "epoch": 1.3448223889068829, + "grad_norm": 0.14308254420757294, + "learning_rate": 2.9279248614106948e-06, + "loss": 0.0044, + "step": 209680 + }, + { + "epoch": 1.344886525800669, + "grad_norm": 0.09599711000919342, + "learning_rate": 2.9274154950640053e-06, + "loss": 0.0013, + "step": 209690 + }, + { + "epoch": 1.3449506626944552, + "grad_norm": 0.09055355936288834, + "learning_rate": 2.926906154688283e-06, + "loss": 0.0016, + "step": 209700 + }, + { + "epoch": 1.345014799588241, + "grad_norm": 0.09291297197341919, + "learning_rate": 2.9263968402899064e-06, + "loss": 0.0013, + "step": 209710 + }, + { + "epoch": 1.3450789364820273, + "grad_norm": 0.08032003790140152, + "learning_rate": 2.9258875518752595e-06, + "loss": 0.0018, + "step": 209720 + }, + { + "epoch": 1.3451430733758134, + "grad_norm": 0.007770441472530365, + "learning_rate": 2.9253782894507248e-06, + "loss": 0.0015, + "step": 209730 + }, + { + "epoch": 1.3452072102695993, + "grad_norm": 0.02273477427661419, + "learning_rate": 2.924869053022682e-06, + "loss": 0.0012, + "step": 209740 + }, + { + "epoch": 1.3452713471633855, + "grad_norm": 0.07384491711854935, + "learning_rate": 2.924359842597514e-06, + "loss": 0.0021, + "step": 209750 + }, + { + "epoch": 1.3453354840571716, + "grad_norm": 0.03380212560296059, + "learning_rate": 2.9238506581815997e-06, + "loss": 0.0066, + "step": 209760 + }, + { + "epoch": 1.3453996209509578, + "grad_norm": 0.009146971628069878, + "learning_rate": 2.9233414997813213e-06, + "loss": 0.0029, + "step": 209770 + }, + { + "epoch": 1.345463757844744, + "grad_norm": 0.07387570291757584, + "learning_rate": 2.922832367403058e-06, + "loss": 0.0018, + "step": 209780 + }, + { + "epoch": 1.3455278947385299, + "grad_norm": 0.039091628044843674, + "learning_rate": 2.9223232610531894e-06, + "loss": 0.001, + "step": 209790 + }, + { + "epoch": 1.345592031632316, + "grad_norm": 0.09510093182325363, + "learning_rate": 2.9218141807380947e-06, + "loss": 0.0022, + "step": 209800 + }, + { + "epoch": 1.3456561685261021, + "grad_norm": 0.07570884376764297, + "learning_rate": 2.9213051264641546e-06, + "loss": 0.0013, + "step": 209810 + }, + { + "epoch": 1.345720305419888, + "grad_norm": 0.026463299989700317, + "learning_rate": 2.9207960982377457e-06, + "loss": 0.0022, + "step": 209820 + }, + { + "epoch": 1.3457844423136742, + "grad_norm": 0.05057981610298157, + "learning_rate": 2.9202870960652486e-06, + "loss": 0.0022, + "step": 209830 + }, + { + "epoch": 1.3458485792074604, + "grad_norm": 0.1473167985677719, + "learning_rate": 2.91977811995304e-06, + "loss": 0.0009, + "step": 209840 + }, + { + "epoch": 1.3459127161012465, + "grad_norm": 0.05318130925297737, + "learning_rate": 2.9192691699074993e-06, + "loss": 0.0006, + "step": 209850 + }, + { + "epoch": 1.3459768529950327, + "grad_norm": 0.09713903814554214, + "learning_rate": 2.9187602459350016e-06, + "loss": 0.0018, + "step": 209860 + }, + { + "epoch": 1.3460409898888188, + "grad_norm": 0.039324406534433365, + "learning_rate": 2.9182513480419262e-06, + "loss": 0.001, + "step": 209870 + }, + { + "epoch": 1.3461051267826047, + "grad_norm": 0.049848772585392, + "learning_rate": 2.917742476234648e-06, + "loss": 0.0017, + "step": 209880 + }, + { + "epoch": 1.346169263676391, + "grad_norm": 0.1317090094089508, + "learning_rate": 2.917233630519544e-06, + "loss": 0.0012, + "step": 209890 + }, + { + "epoch": 1.346233400570177, + "grad_norm": 0.17113488912582397, + "learning_rate": 2.9167248109029934e-06, + "loss": 0.0017, + "step": 209900 + }, + { + "epoch": 1.346297537463963, + "grad_norm": 0.030517883598804474, + "learning_rate": 2.91621601739137e-06, + "loss": 0.0018, + "step": 209910 + }, + { + "epoch": 1.3463616743577491, + "grad_norm": 0.10433640331029892, + "learning_rate": 2.9157072499910487e-06, + "loss": 0.0017, + "step": 209920 + }, + { + "epoch": 1.3464258112515353, + "grad_norm": 0.045640259981155396, + "learning_rate": 2.915198508708404e-06, + "loss": 0.001, + "step": 209930 + }, + { + "epoch": 1.3464899481453214, + "grad_norm": 0.030804645270109177, + "learning_rate": 2.9146897935498133e-06, + "loss": 0.0013, + "step": 209940 + }, + { + "epoch": 1.3465540850391076, + "grad_norm": 0.1606251299381256, + "learning_rate": 2.91418110452165e-06, + "loss": 0.0015, + "step": 209950 + }, + { + "epoch": 1.3466182219328935, + "grad_norm": 0.006368404719978571, + "learning_rate": 2.9136724416302887e-06, + "loss": 0.0013, + "step": 209960 + }, + { + "epoch": 1.3466823588266796, + "grad_norm": 0.00760697154328227, + "learning_rate": 2.9131638048821e-06, + "loss": 0.0006, + "step": 209970 + }, + { + "epoch": 1.3467464957204658, + "grad_norm": 0.052641209214925766, + "learning_rate": 2.9126551942834625e-06, + "loss": 0.001, + "step": 209980 + }, + { + "epoch": 1.346810632614252, + "grad_norm": 0.011346855200827122, + "learning_rate": 2.912146609840747e-06, + "loss": 0.0015, + "step": 209990 + }, + { + "epoch": 1.3468747695080379, + "grad_norm": 0.1732601821422577, + "learning_rate": 2.911638051560325e-06, + "loss": 0.0035, + "step": 210000 + }, + { + "epoch": 1.346938906401824, + "grad_norm": 0.029262270778417587, + "learning_rate": 2.911129519448573e-06, + "loss": 0.0012, + "step": 210010 + }, + { + "epoch": 1.3470030432956102, + "grad_norm": 0.10278654098510742, + "learning_rate": 2.9106210135118608e-06, + "loss": 0.0011, + "step": 210020 + }, + { + "epoch": 1.3470671801893963, + "grad_norm": 0.08118873089551926, + "learning_rate": 2.91011253375656e-06, + "loss": 0.0006, + "step": 210030 + }, + { + "epoch": 1.3471313170831825, + "grad_norm": 0.1753062754869461, + "learning_rate": 2.9096040801890413e-06, + "loss": 0.002, + "step": 210040 + }, + { + "epoch": 1.3471954539769684, + "grad_norm": 0.09469661861658096, + "learning_rate": 2.9090956528156793e-06, + "loss": 0.0015, + "step": 210050 + }, + { + "epoch": 1.3472595908707545, + "grad_norm": 0.04578879848122597, + "learning_rate": 2.908587251642843e-06, + "loss": 0.0012, + "step": 210060 + }, + { + "epoch": 1.3473237277645407, + "grad_norm": 0.12490465492010117, + "learning_rate": 2.9080788766769036e-06, + "loss": 0.0017, + "step": 210070 + }, + { + "epoch": 1.3473878646583266, + "grad_norm": 0.12824605405330658, + "learning_rate": 2.9075705279242284e-06, + "loss": 0.0012, + "step": 210080 + }, + { + "epoch": 1.3474520015521128, + "grad_norm": 0.06864582747220993, + "learning_rate": 2.9070622053911925e-06, + "loss": 0.0011, + "step": 210090 + }, + { + "epoch": 1.347516138445899, + "grad_norm": 0.182582825422287, + "learning_rate": 2.9065539090841623e-06, + "loss": 0.0019, + "step": 210100 + }, + { + "epoch": 1.347580275339685, + "grad_norm": 0.006342856679111719, + "learning_rate": 2.9060456390095082e-06, + "loss": 0.0019, + "step": 210110 + }, + { + "epoch": 1.3476444122334712, + "grad_norm": 0.020330281928181648, + "learning_rate": 2.9055373951735966e-06, + "loss": 0.0017, + "step": 210120 + }, + { + "epoch": 1.3477085491272573, + "grad_norm": 0.14057806134223938, + "learning_rate": 2.905029177582801e-06, + "loss": 0.0011, + "step": 210130 + }, + { + "epoch": 1.3477726860210433, + "grad_norm": 0.0436885803937912, + "learning_rate": 2.9045209862434864e-06, + "loss": 0.0006, + "step": 210140 + }, + { + "epoch": 1.3478368229148294, + "grad_norm": 0.459991991519928, + "learning_rate": 2.9040128211620195e-06, + "loss": 0.0012, + "step": 210150 + }, + { + "epoch": 1.3479009598086156, + "grad_norm": 0.09436246752738953, + "learning_rate": 2.903504682344772e-06, + "loss": 0.0013, + "step": 210160 + }, + { + "epoch": 1.3479650967024015, + "grad_norm": 0.13223658502101898, + "learning_rate": 2.902996569798109e-06, + "loss": 0.0033, + "step": 210170 + }, + { + "epoch": 1.3480292335961876, + "grad_norm": 0.13869208097457886, + "learning_rate": 2.9024884835283978e-06, + "loss": 0.0013, + "step": 210180 + }, + { + "epoch": 1.3480933704899738, + "grad_norm": 0.1832425892353058, + "learning_rate": 2.901980423542003e-06, + "loss": 0.0014, + "step": 210190 + }, + { + "epoch": 1.34815750738376, + "grad_norm": 0.03327884525060654, + "learning_rate": 2.9014723898452946e-06, + "loss": 0.0013, + "step": 210200 + }, + { + "epoch": 1.348221644277546, + "grad_norm": 0.003701785346493125, + "learning_rate": 2.9009643824446377e-06, + "loss": 0.0029, + "step": 210210 + }, + { + "epoch": 1.348285781171332, + "grad_norm": 0.08859538286924362, + "learning_rate": 2.9004564013463964e-06, + "loss": 0.0012, + "step": 210220 + }, + { + "epoch": 1.3483499180651182, + "grad_norm": 0.00830372329801321, + "learning_rate": 2.899948446556936e-06, + "loss": 0.0007, + "step": 210230 + }, + { + "epoch": 1.3484140549589043, + "grad_norm": 0.044589921832084656, + "learning_rate": 2.8994405180826234e-06, + "loss": 0.0013, + "step": 210240 + }, + { + "epoch": 1.3484781918526902, + "grad_norm": 0.07862818986177444, + "learning_rate": 2.8989326159298225e-06, + "loss": 0.002, + "step": 210250 + }, + { + "epoch": 1.3485423287464764, + "grad_norm": 0.06766141206026077, + "learning_rate": 2.8984247401048983e-06, + "loss": 0.0015, + "step": 210260 + }, + { + "epoch": 1.3486064656402625, + "grad_norm": 0.06975769996643066, + "learning_rate": 2.8979168906142114e-06, + "loss": 0.0018, + "step": 210270 + }, + { + "epoch": 1.3486706025340487, + "grad_norm": 0.08193672448396683, + "learning_rate": 2.8974090674641304e-06, + "loss": 0.0022, + "step": 210280 + }, + { + "epoch": 1.3487347394278348, + "grad_norm": 0.10991158336400986, + "learning_rate": 2.896901270661017e-06, + "loss": 0.001, + "step": 210290 + }, + { + "epoch": 1.348798876321621, + "grad_norm": 0.08881914615631104, + "learning_rate": 2.896393500211231e-06, + "loss": 0.0021, + "step": 210300 + }, + { + "epoch": 1.348863013215407, + "grad_norm": 0.06696899235248566, + "learning_rate": 2.8958857561211394e-06, + "loss": 0.0018, + "step": 210310 + }, + { + "epoch": 1.348927150109193, + "grad_norm": 0.04735543578863144, + "learning_rate": 2.8953780383971043e-06, + "loss": 0.0013, + "step": 210320 + }, + { + "epoch": 1.3489912870029792, + "grad_norm": 0.15906420350074768, + "learning_rate": 2.8948703470454854e-06, + "loss": 0.0017, + "step": 210330 + }, + { + "epoch": 1.3490554238967651, + "grad_norm": 0.040080178529024124, + "learning_rate": 2.8943626820726446e-06, + "loss": 0.0009, + "step": 210340 + }, + { + "epoch": 1.3491195607905513, + "grad_norm": 0.055117517709732056, + "learning_rate": 2.8938550434849454e-06, + "loss": 0.0015, + "step": 210350 + }, + { + "epoch": 1.3491836976843374, + "grad_norm": 0.2058384120464325, + "learning_rate": 2.8933474312887477e-06, + "loss": 0.0044, + "step": 210360 + }, + { + "epoch": 1.3492478345781236, + "grad_norm": 0.13802611827850342, + "learning_rate": 2.8928398454904127e-06, + "loss": 0.0016, + "step": 210370 + }, + { + "epoch": 1.3493119714719097, + "grad_norm": 0.15008658170700073, + "learning_rate": 2.8923322860962986e-06, + "loss": 0.0014, + "step": 210380 + }, + { + "epoch": 1.3493761083656957, + "grad_norm": 0.03854681923985481, + "learning_rate": 2.8918247531127696e-06, + "loss": 0.001, + "step": 210390 + }, + { + "epoch": 1.3494402452594818, + "grad_norm": 0.0791763886809349, + "learning_rate": 2.8913172465461824e-06, + "loss": 0.001, + "step": 210400 + }, + { + "epoch": 1.349504382153268, + "grad_norm": 0.1294926255941391, + "learning_rate": 2.890809766402897e-06, + "loss": 0.0016, + "step": 210410 + }, + { + "epoch": 1.349568519047054, + "grad_norm": 0.06261022388935089, + "learning_rate": 2.8903023126892714e-06, + "loss": 0.0011, + "step": 210420 + }, + { + "epoch": 1.34963265594084, + "grad_norm": 0.06749068945646286, + "learning_rate": 2.8897948854116676e-06, + "loss": 0.0016, + "step": 210430 + }, + { + "epoch": 1.3496967928346262, + "grad_norm": 0.24904891848564148, + "learning_rate": 2.8892874845764414e-06, + "loss": 0.0017, + "step": 210440 + }, + { + "epoch": 1.3497609297284123, + "grad_norm": 0.07309553027153015, + "learning_rate": 2.8887801101899495e-06, + "loss": 0.0018, + "step": 210450 + }, + { + "epoch": 1.3498250666221985, + "grad_norm": 0.08268938958644867, + "learning_rate": 2.888272762258554e-06, + "loss": 0.0007, + "step": 210460 + }, + { + "epoch": 1.3498892035159846, + "grad_norm": 0.11225002259016037, + "learning_rate": 2.8877654407886102e-06, + "loss": 0.0026, + "step": 210470 + }, + { + "epoch": 1.3499533404097706, + "grad_norm": 0.12459053844213486, + "learning_rate": 2.8872581457864747e-06, + "loss": 0.0027, + "step": 210480 + }, + { + "epoch": 1.3500174773035567, + "grad_norm": 0.09756121784448624, + "learning_rate": 2.886750877258503e-06, + "loss": 0.0016, + "step": 210490 + }, + { + "epoch": 1.3500816141973428, + "grad_norm": 0.05711054429411888, + "learning_rate": 2.8862436352110543e-06, + "loss": 0.0014, + "step": 210500 + }, + { + "epoch": 1.3501457510911288, + "grad_norm": 0.2802426218986511, + "learning_rate": 2.8857364196504843e-06, + "loss": 0.0022, + "step": 210510 + }, + { + "epoch": 1.350209887984915, + "grad_norm": 0.01650386117398739, + "learning_rate": 2.8852292305831473e-06, + "loss": 0.0007, + "step": 210520 + }, + { + "epoch": 1.350274024878701, + "grad_norm": 0.03775866702198982, + "learning_rate": 2.8847220680153983e-06, + "loss": 0.0008, + "step": 210530 + }, + { + "epoch": 1.3503381617724872, + "grad_norm": 0.07077381014823914, + "learning_rate": 2.884214931953595e-06, + "loss": 0.0015, + "step": 210540 + }, + { + "epoch": 1.3504022986662734, + "grad_norm": 0.0790569856762886, + "learning_rate": 2.8837078224040904e-06, + "loss": 0.0021, + "step": 210550 + }, + { + "epoch": 1.3504664355600595, + "grad_norm": 0.08557630330324173, + "learning_rate": 2.8832007393732404e-06, + "loss": 0.0006, + "step": 210560 + }, + { + "epoch": 1.3505305724538454, + "grad_norm": 0.034811779856681824, + "learning_rate": 2.8826936828673953e-06, + "loss": 0.0011, + "step": 210570 + }, + { + "epoch": 1.3505947093476316, + "grad_norm": 0.01789713464677334, + "learning_rate": 2.8821866528929133e-06, + "loss": 0.0011, + "step": 210580 + }, + { + "epoch": 1.3506588462414177, + "grad_norm": 0.11298376321792603, + "learning_rate": 2.8816796494561468e-06, + "loss": 0.0009, + "step": 210590 + }, + { + "epoch": 1.3507229831352037, + "grad_norm": 0.0268674548715353, + "learning_rate": 2.881172672563446e-06, + "loss": 0.0038, + "step": 210600 + }, + { + "epoch": 1.3507871200289898, + "grad_norm": 0.07442095130681992, + "learning_rate": 2.8806657222211677e-06, + "loss": 0.001, + "step": 210610 + }, + { + "epoch": 1.350851256922776, + "grad_norm": 0.05544663220643997, + "learning_rate": 2.8801587984356623e-06, + "loss": 0.0011, + "step": 210620 + }, + { + "epoch": 1.3509153938165621, + "grad_norm": 0.03473219275474548, + "learning_rate": 2.879651901213283e-06, + "loss": 0.0005, + "step": 210630 + }, + { + "epoch": 1.3509795307103483, + "grad_norm": 0.039361413568258286, + "learning_rate": 2.8791450305603773e-06, + "loss": 0.0016, + "step": 210640 + }, + { + "epoch": 1.3510436676041342, + "grad_norm": 0.0213471706956625, + "learning_rate": 2.8786381864833014e-06, + "loss": 0.0008, + "step": 210650 + }, + { + "epoch": 1.3511078044979203, + "grad_norm": 0.03584079071879387, + "learning_rate": 2.8781313689884073e-06, + "loss": 0.002, + "step": 210660 + }, + { + "epoch": 1.3511719413917065, + "grad_norm": 0.08981915563344955, + "learning_rate": 2.877624578082043e-06, + "loss": 0.0016, + "step": 210670 + }, + { + "epoch": 1.3512360782854926, + "grad_norm": 0.08490435034036636, + "learning_rate": 2.8771178137705595e-06, + "loss": 0.0018, + "step": 210680 + }, + { + "epoch": 1.3513002151792786, + "grad_norm": 0.07120810449123383, + "learning_rate": 2.876611076060305e-06, + "loss": 0.002, + "step": 210690 + }, + { + "epoch": 1.3513643520730647, + "grad_norm": 0.04556477442383766, + "learning_rate": 2.876104364957634e-06, + "loss": 0.0021, + "step": 210700 + }, + { + "epoch": 1.3514284889668509, + "grad_norm": 0.0411594994366169, + "learning_rate": 2.875597680468892e-06, + "loss": 0.0014, + "step": 210710 + }, + { + "epoch": 1.351492625860637, + "grad_norm": 0.13774462044239044, + "learning_rate": 2.8750910226004287e-06, + "loss": 0.002, + "step": 210720 + }, + { + "epoch": 1.3515567627544232, + "grad_norm": 0.2863829731941223, + "learning_rate": 2.8745843913585946e-06, + "loss": 0.0016, + "step": 210730 + }, + { + "epoch": 1.351620899648209, + "grad_norm": 0.02558436617255211, + "learning_rate": 2.874077786749738e-06, + "loss": 0.0011, + "step": 210740 + }, + { + "epoch": 1.3516850365419952, + "grad_norm": 0.10683951526880264, + "learning_rate": 2.8735712087802055e-06, + "loss": 0.0038, + "step": 210750 + }, + { + "epoch": 1.3517491734357814, + "grad_norm": 0.07721851021051407, + "learning_rate": 2.8730646574563437e-06, + "loss": 0.0022, + "step": 210760 + }, + { + "epoch": 1.3518133103295673, + "grad_norm": 0.024722186848521233, + "learning_rate": 2.8725581327845033e-06, + "loss": 0.002, + "step": 210770 + }, + { + "epoch": 1.3518774472233535, + "grad_norm": 0.2504269480705261, + "learning_rate": 2.8720516347710305e-06, + "loss": 0.0026, + "step": 210780 + }, + { + "epoch": 1.3519415841171396, + "grad_norm": 0.059513602405786514, + "learning_rate": 2.8715451634222713e-06, + "loss": 0.0016, + "step": 210790 + }, + { + "epoch": 1.3520057210109258, + "grad_norm": 0.04534576088190079, + "learning_rate": 2.8710387187445697e-06, + "loss": 0.002, + "step": 210800 + }, + { + "epoch": 1.352069857904712, + "grad_norm": 0.005066273733973503, + "learning_rate": 2.8705323007442774e-06, + "loss": 0.0007, + "step": 210810 + }, + { + "epoch": 1.352133994798498, + "grad_norm": 0.04880732670426369, + "learning_rate": 2.870025909427737e-06, + "loss": 0.0007, + "step": 210820 + }, + { + "epoch": 1.352198131692284, + "grad_norm": 0.10243187099695206, + "learning_rate": 2.869519544801295e-06, + "loss": 0.0015, + "step": 210830 + }, + { + "epoch": 1.3522622685860701, + "grad_norm": 0.08573068678379059, + "learning_rate": 2.8690132068712926e-06, + "loss": 0.0012, + "step": 210840 + }, + { + "epoch": 1.3523264054798563, + "grad_norm": 0.15311363339424133, + "learning_rate": 2.8685068956440803e-06, + "loss": 0.0012, + "step": 210850 + }, + { + "epoch": 1.3523905423736422, + "grad_norm": 0.2949298918247223, + "learning_rate": 2.868000611126e-06, + "loss": 0.0019, + "step": 210860 + }, + { + "epoch": 1.3524546792674284, + "grad_norm": 0.02028181403875351, + "learning_rate": 2.8674943533233936e-06, + "loss": 0.0014, + "step": 210870 + }, + { + "epoch": 1.3525188161612145, + "grad_norm": 0.05514419823884964, + "learning_rate": 2.8669881222426086e-06, + "loss": 0.0019, + "step": 210880 + }, + { + "epoch": 1.3525829530550006, + "grad_norm": 0.03877931460738182, + "learning_rate": 2.8664819178899876e-06, + "loss": 0.0011, + "step": 210890 + }, + { + "epoch": 1.3526470899487868, + "grad_norm": 0.04640922695398331, + "learning_rate": 2.8659757402718724e-06, + "loss": 0.0021, + "step": 210900 + }, + { + "epoch": 1.3527112268425727, + "grad_norm": 0.008041328750550747, + "learning_rate": 2.865469589394605e-06, + "loss": 0.0009, + "step": 210910 + }, + { + "epoch": 1.3527753637363589, + "grad_norm": 0.0224269088357687, + "learning_rate": 2.864963465264531e-06, + "loss": 0.0008, + "step": 210920 + }, + { + "epoch": 1.352839500630145, + "grad_norm": 0.07984951883554459, + "learning_rate": 2.8644573678879907e-06, + "loss": 0.0008, + "step": 210930 + }, + { + "epoch": 1.352903637523931, + "grad_norm": 0.00870156567543745, + "learning_rate": 2.8639512972713253e-06, + "loss": 0.0021, + "step": 210940 + }, + { + "epoch": 1.352967774417717, + "grad_norm": 0.029904576018452644, + "learning_rate": 2.8634452534208747e-06, + "loss": 0.0007, + "step": 210950 + }, + { + "epoch": 1.3530319113115032, + "grad_norm": 0.01964622177183628, + "learning_rate": 2.862939236342984e-06, + "loss": 0.002, + "step": 210960 + }, + { + "epoch": 1.3530960482052894, + "grad_norm": 0.05954081937670708, + "learning_rate": 2.8624332460439928e-06, + "loss": 0.0007, + "step": 210970 + }, + { + "epoch": 1.3531601850990755, + "grad_norm": 0.07545377314090729, + "learning_rate": 2.8619272825302402e-06, + "loss": 0.0011, + "step": 210980 + }, + { + "epoch": 1.3532243219928617, + "grad_norm": 0.07013353705406189, + "learning_rate": 2.8614213458080653e-06, + "loss": 0.0017, + "step": 210990 + }, + { + "epoch": 1.3532884588866476, + "grad_norm": 0.011843344196677208, + "learning_rate": 2.860915435883811e-06, + "loss": 0.0005, + "step": 211000 + }, + { + "epoch": 1.3533525957804338, + "grad_norm": 0.07124199718236923, + "learning_rate": 2.860409552763815e-06, + "loss": 0.0011, + "step": 211010 + }, + { + "epoch": 1.35341673267422, + "grad_norm": 0.19323082268238068, + "learning_rate": 2.8599036964544147e-06, + "loss": 0.0014, + "step": 211020 + }, + { + "epoch": 1.3534808695680058, + "grad_norm": 0.045922085642814636, + "learning_rate": 2.8593978669619522e-06, + "loss": 0.0007, + "step": 211030 + }, + { + "epoch": 1.353545006461792, + "grad_norm": 0.42880502343177795, + "learning_rate": 2.858892064292764e-06, + "loss": 0.0008, + "step": 211040 + }, + { + "epoch": 1.3536091433555781, + "grad_norm": 0.06439348310232162, + "learning_rate": 2.858386288453189e-06, + "loss": 0.0007, + "step": 211050 + }, + { + "epoch": 1.3536732802493643, + "grad_norm": 0.13360503315925598, + "learning_rate": 2.8578805394495623e-06, + "loss": 0.0013, + "step": 211060 + }, + { + "epoch": 1.3537374171431504, + "grad_norm": 0.10818067193031311, + "learning_rate": 2.8573748172882254e-06, + "loss": 0.0012, + "step": 211070 + }, + { + "epoch": 1.3538015540369364, + "grad_norm": 0.1894940733909607, + "learning_rate": 2.8568691219755133e-06, + "loss": 0.0031, + "step": 211080 + }, + { + "epoch": 1.3538656909307225, + "grad_norm": 0.02911425195634365, + "learning_rate": 2.856363453517763e-06, + "loss": 0.001, + "step": 211090 + }, + { + "epoch": 1.3539298278245087, + "grad_norm": 0.15279807150363922, + "learning_rate": 2.855857811921309e-06, + "loss": 0.0015, + "step": 211100 + }, + { + "epoch": 1.3539939647182948, + "grad_norm": 0.004417724907398224, + "learning_rate": 2.8553521971924903e-06, + "loss": 0.0009, + "step": 211110 + }, + { + "epoch": 1.3540581016120807, + "grad_norm": 0.14657741785049438, + "learning_rate": 2.8548466093376415e-06, + "loss": 0.0008, + "step": 211120 + }, + { + "epoch": 1.3541222385058669, + "grad_norm": 0.474303275346756, + "learning_rate": 2.8543410483630974e-06, + "loss": 0.0025, + "step": 211130 + }, + { + "epoch": 1.354186375399653, + "grad_norm": 0.08662206679582596, + "learning_rate": 2.853835514275192e-06, + "loss": 0.0017, + "step": 211140 + }, + { + "epoch": 1.3542505122934392, + "grad_norm": 0.0414050854742527, + "learning_rate": 2.853330007080264e-06, + "loss": 0.0012, + "step": 211150 + }, + { + "epoch": 1.3543146491872253, + "grad_norm": 0.1096024289727211, + "learning_rate": 2.8528245267846446e-06, + "loss": 0.0016, + "step": 211160 + }, + { + "epoch": 1.3543787860810113, + "grad_norm": 0.19043126702308655, + "learning_rate": 2.852319073394666e-06, + "loss": 0.001, + "step": 211170 + }, + { + "epoch": 1.3544429229747974, + "grad_norm": 0.045862916857004166, + "learning_rate": 2.851813646916667e-06, + "loss": 0.0011, + "step": 211180 + }, + { + "epoch": 1.3545070598685836, + "grad_norm": 0.1857152283191681, + "learning_rate": 2.8513082473569775e-06, + "loss": 0.0022, + "step": 211190 + }, + { + "epoch": 1.3545711967623695, + "grad_norm": 0.09480617940425873, + "learning_rate": 2.850802874721932e-06, + "loss": 0.0023, + "step": 211200 + }, + { + "epoch": 1.3546353336561556, + "grad_norm": 0.01438390463590622, + "learning_rate": 2.8502975290178604e-06, + "loss": 0.0008, + "step": 211210 + }, + { + "epoch": 1.3546994705499418, + "grad_norm": 0.053283028304576874, + "learning_rate": 2.849792210251099e-06, + "loss": 0.0012, + "step": 211220 + }, + { + "epoch": 1.354763607443728, + "grad_norm": 0.014264298602938652, + "learning_rate": 2.849286918427978e-06, + "loss": 0.0013, + "step": 211230 + }, + { + "epoch": 1.354827744337514, + "grad_norm": 0.017701173201203346, + "learning_rate": 2.8487816535548286e-06, + "loss": 0.0051, + "step": 211240 + }, + { + "epoch": 1.3548918812313002, + "grad_norm": 0.03873252496123314, + "learning_rate": 2.848276415637981e-06, + "loss": 0.0009, + "step": 211250 + }, + { + "epoch": 1.3549560181250861, + "grad_norm": 0.015451488085091114, + "learning_rate": 2.8477712046837697e-06, + "loss": 0.0015, + "step": 211260 + }, + { + "epoch": 1.3550201550188723, + "grad_norm": 0.08412735164165497, + "learning_rate": 2.8472660206985237e-06, + "loss": 0.0011, + "step": 211270 + }, + { + "epoch": 1.3550842919126584, + "grad_norm": 0.06948544085025787, + "learning_rate": 2.84676086368857e-06, + "loss": 0.0023, + "step": 211280 + }, + { + "epoch": 1.3551484288064444, + "grad_norm": 0.07955335080623627, + "learning_rate": 2.846255733660245e-06, + "loss": 0.0008, + "step": 211290 + }, + { + "epoch": 1.3552125657002305, + "grad_norm": 0.0016547439154237509, + "learning_rate": 2.8457506306198733e-06, + "loss": 0.0007, + "step": 211300 + }, + { + "epoch": 1.3552767025940167, + "grad_norm": 0.0419953428208828, + "learning_rate": 2.8452455545737866e-06, + "loss": 0.0013, + "step": 211310 + }, + { + "epoch": 1.3553408394878028, + "grad_norm": 0.14658237993717194, + "learning_rate": 2.8447405055283117e-06, + "loss": 0.001, + "step": 211320 + }, + { + "epoch": 1.355404976381589, + "grad_norm": 0.0616254098713398, + "learning_rate": 2.8442354834897793e-06, + "loss": 0.0008, + "step": 211330 + }, + { + "epoch": 1.355469113275375, + "grad_norm": 0.013792560435831547, + "learning_rate": 2.843730488464518e-06, + "loss": 0.0017, + "step": 211340 + }, + { + "epoch": 1.355533250169161, + "grad_norm": 0.062466397881507874, + "learning_rate": 2.8432255204588542e-06, + "loss": 0.0011, + "step": 211350 + }, + { + "epoch": 1.3555973870629472, + "grad_norm": 0.0883217379450798, + "learning_rate": 2.8427205794791146e-06, + "loss": 0.0012, + "step": 211360 + }, + { + "epoch": 1.3556615239567331, + "grad_norm": 0.11724873632192612, + "learning_rate": 2.8422156655316297e-06, + "loss": 0.0014, + "step": 211370 + }, + { + "epoch": 1.3557256608505193, + "grad_norm": 0.07271930575370789, + "learning_rate": 2.8417107786227244e-06, + "loss": 0.0012, + "step": 211380 + }, + { + "epoch": 1.3557897977443054, + "grad_norm": 0.06599129736423492, + "learning_rate": 2.841205918758726e-06, + "loss": 0.0016, + "step": 211390 + }, + { + "epoch": 1.3558539346380916, + "grad_norm": 0.0861605778336525, + "learning_rate": 2.8407010859459577e-06, + "loss": 0.0024, + "step": 211400 + }, + { + "epoch": 1.3559180715318777, + "grad_norm": 0.12029363214969635, + "learning_rate": 2.84019628019075e-06, + "loss": 0.001, + "step": 211410 + }, + { + "epoch": 1.3559822084256639, + "grad_norm": 0.10664620250463486, + "learning_rate": 2.839691501499425e-06, + "loss": 0.0026, + "step": 211420 + }, + { + "epoch": 1.3560463453194498, + "grad_norm": 0.06973236799240112, + "learning_rate": 2.8391867498783117e-06, + "loss": 0.0014, + "step": 211430 + }, + { + "epoch": 1.356110482213236, + "grad_norm": 0.07850681245326996, + "learning_rate": 2.83868202533373e-06, + "loss": 0.0014, + "step": 211440 + }, + { + "epoch": 1.356174619107022, + "grad_norm": 0.07012001425027847, + "learning_rate": 2.838177327872009e-06, + "loss": 0.0015, + "step": 211450 + }, + { + "epoch": 1.356238756000808, + "grad_norm": 0.05666491761803627, + "learning_rate": 2.8376726574994722e-06, + "loss": 0.0025, + "step": 211460 + }, + { + "epoch": 1.3563028928945942, + "grad_norm": 0.13862815499305725, + "learning_rate": 2.8371680142224422e-06, + "loss": 0.0019, + "step": 211470 + }, + { + "epoch": 1.3563670297883803, + "grad_norm": 0.1430405229330063, + "learning_rate": 2.83666339804724e-06, + "loss": 0.001, + "step": 211480 + }, + { + "epoch": 1.3564311666821665, + "grad_norm": 0.04503755271434784, + "learning_rate": 2.836158808980194e-06, + "loss": 0.0011, + "step": 211490 + }, + { + "epoch": 1.3564953035759526, + "grad_norm": 0.22144374251365662, + "learning_rate": 2.8356542470276242e-06, + "loss": 0.0012, + "step": 211500 + }, + { + "epoch": 1.3565594404697385, + "grad_norm": 0.1325940489768982, + "learning_rate": 2.8351497121958536e-06, + "loss": 0.001, + "step": 211510 + }, + { + "epoch": 1.3566235773635247, + "grad_norm": 0.159137561917305, + "learning_rate": 2.8346452044912033e-06, + "loss": 0.0027, + "step": 211520 + }, + { + "epoch": 1.3566877142573108, + "grad_norm": 0.043283578008413315, + "learning_rate": 2.834140723919997e-06, + "loss": 0.001, + "step": 211530 + }, + { + "epoch": 1.356751851151097, + "grad_norm": 0.14707869291305542, + "learning_rate": 2.8336362704885554e-06, + "loss": 0.001, + "step": 211540 + }, + { + "epoch": 1.356815988044883, + "grad_norm": 0.10080327838659286, + "learning_rate": 2.833131844203198e-06, + "loss": 0.0017, + "step": 211550 + }, + { + "epoch": 1.356880124938669, + "grad_norm": 0.09956194460391998, + "learning_rate": 2.8326274450702497e-06, + "loss": 0.0018, + "step": 211560 + }, + { + "epoch": 1.3569442618324552, + "grad_norm": 0.24537304043769836, + "learning_rate": 2.8321230730960274e-06, + "loss": 0.0013, + "step": 211570 + }, + { + "epoch": 1.3570083987262413, + "grad_norm": 0.0012119578896090388, + "learning_rate": 2.831618728286853e-06, + "loss": 0.0006, + "step": 211580 + }, + { + "epoch": 1.3570725356200275, + "grad_norm": 0.06794049590826035, + "learning_rate": 2.8311144106490435e-06, + "loss": 0.0015, + "step": 211590 + }, + { + "epoch": 1.3571366725138134, + "grad_norm": 0.1144900992512703, + "learning_rate": 2.8306101201889226e-06, + "loss": 0.0014, + "step": 211600 + }, + { + "epoch": 1.3572008094075996, + "grad_norm": 0.0597737655043602, + "learning_rate": 2.830105856912807e-06, + "loss": 0.0013, + "step": 211610 + }, + { + "epoch": 1.3572649463013857, + "grad_norm": 0.08430390805006027, + "learning_rate": 2.8296016208270162e-06, + "loss": 0.0017, + "step": 211620 + }, + { + "epoch": 1.3573290831951716, + "grad_norm": 0.06578479707241058, + "learning_rate": 2.829097411937867e-06, + "loss": 0.0006, + "step": 211630 + }, + { + "epoch": 1.3573932200889578, + "grad_norm": 0.16109733283519745, + "learning_rate": 2.8285932302516794e-06, + "loss": 0.002, + "step": 211640 + }, + { + "epoch": 1.357457356982744, + "grad_norm": 0.021322287619113922, + "learning_rate": 2.828089075774771e-06, + "loss": 0.0006, + "step": 211650 + }, + { + "epoch": 1.35752149387653, + "grad_norm": 0.08686596155166626, + "learning_rate": 2.827584948513459e-06, + "loss": 0.0011, + "step": 211660 + }, + { + "epoch": 1.3575856307703162, + "grad_norm": 0.031903836876153946, + "learning_rate": 2.8270808484740576e-06, + "loss": 0.0011, + "step": 211670 + }, + { + "epoch": 1.3576497676641024, + "grad_norm": 0.12447334825992584, + "learning_rate": 2.8265767756628883e-06, + "loss": 0.0016, + "step": 211680 + }, + { + "epoch": 1.3577139045578883, + "grad_norm": 0.28986862301826477, + "learning_rate": 2.8260727300862655e-06, + "loss": 0.0021, + "step": 211690 + }, + { + "epoch": 1.3577780414516745, + "grad_norm": 0.024045251309871674, + "learning_rate": 2.825568711750503e-06, + "loss": 0.0024, + "step": 211700 + }, + { + "epoch": 1.3578421783454606, + "grad_norm": 0.10672322660684586, + "learning_rate": 2.825064720661921e-06, + "loss": 0.0017, + "step": 211710 + }, + { + "epoch": 1.3579063152392465, + "grad_norm": 0.06658641993999481, + "learning_rate": 2.824560756826832e-06, + "loss": 0.0016, + "step": 211720 + }, + { + "epoch": 1.3579704521330327, + "grad_norm": 0.011623168364167213, + "learning_rate": 2.8240568202515517e-06, + "loss": 0.0015, + "step": 211730 + }, + { + "epoch": 1.3580345890268188, + "grad_norm": 0.029520215466618538, + "learning_rate": 2.823552910942392e-06, + "loss": 0.0012, + "step": 211740 + }, + { + "epoch": 1.358098725920605, + "grad_norm": 0.04139503836631775, + "learning_rate": 2.823049028905672e-06, + "loss": 0.0004, + "step": 211750 + }, + { + "epoch": 1.3581628628143911, + "grad_norm": 0.11593854427337646, + "learning_rate": 2.8225451741477037e-06, + "loss": 0.0016, + "step": 211760 + }, + { + "epoch": 1.358226999708177, + "grad_norm": 0.02130136266350746, + "learning_rate": 2.8220413466748008e-06, + "loss": 0.0017, + "step": 211770 + }, + { + "epoch": 1.3582911366019632, + "grad_norm": 0.07793150842189789, + "learning_rate": 2.8215375464932748e-06, + "loss": 0.0014, + "step": 211780 + }, + { + "epoch": 1.3583552734957494, + "grad_norm": 0.07984554022550583, + "learning_rate": 2.8210337736094417e-06, + "loss": 0.0008, + "step": 211790 + }, + { + "epoch": 1.3584194103895353, + "grad_norm": 0.04542630538344383, + "learning_rate": 2.8205300280296123e-06, + "loss": 0.0023, + "step": 211800 + }, + { + "epoch": 1.3584835472833214, + "grad_norm": 0.05402369797229767, + "learning_rate": 2.8200263097601e-06, + "loss": 0.0013, + "step": 211810 + }, + { + "epoch": 1.3585476841771076, + "grad_norm": 0.09029842913150787, + "learning_rate": 2.8195226188072135e-06, + "loss": 0.0014, + "step": 211820 + }, + { + "epoch": 1.3586118210708937, + "grad_norm": 0.06096288934350014, + "learning_rate": 2.8190189551772695e-06, + "loss": 0.0016, + "step": 211830 + }, + { + "epoch": 1.3586759579646799, + "grad_norm": 0.09616797417402267, + "learning_rate": 2.818515318876576e-06, + "loss": 0.0008, + "step": 211840 + }, + { + "epoch": 1.358740094858466, + "grad_norm": 0.09054041653871536, + "learning_rate": 2.818011709911443e-06, + "loss": 0.0017, + "step": 211850 + }, + { + "epoch": 1.358804231752252, + "grad_norm": 0.10492989420890808, + "learning_rate": 2.8175081282881843e-06, + "loss": 0.0008, + "step": 211860 + }, + { + "epoch": 1.358868368646038, + "grad_norm": 0.49018484354019165, + "learning_rate": 2.8170045740131093e-06, + "loss": 0.0048, + "step": 211870 + }, + { + "epoch": 1.3589325055398243, + "grad_norm": 0.30528512597084045, + "learning_rate": 2.816501047092527e-06, + "loss": 0.0017, + "step": 211880 + }, + { + "epoch": 1.3589966424336102, + "grad_norm": 0.035619039088487625, + "learning_rate": 2.815997547532745e-06, + "loss": 0.0013, + "step": 211890 + }, + { + "epoch": 1.3590607793273963, + "grad_norm": 0.08016897737979889, + "learning_rate": 2.8154940753400763e-06, + "loss": 0.0013, + "step": 211900 + }, + { + "epoch": 1.3591249162211825, + "grad_norm": 0.058081433176994324, + "learning_rate": 2.8149906305208285e-06, + "loss": 0.0007, + "step": 211910 + }, + { + "epoch": 1.3591890531149686, + "grad_norm": 0.03151792287826538, + "learning_rate": 2.8144872130813097e-06, + "loss": 0.0017, + "step": 211920 + }, + { + "epoch": 1.3592531900087548, + "grad_norm": 0.20671361684799194, + "learning_rate": 2.813983823027826e-06, + "loss": 0.0025, + "step": 211930 + }, + { + "epoch": 1.3593173269025407, + "grad_norm": 0.0331951342523098, + "learning_rate": 2.8134804603666887e-06, + "loss": 0.0017, + "step": 211940 + }, + { + "epoch": 1.3593814637963268, + "grad_norm": 0.09501717239618301, + "learning_rate": 2.812977125104205e-06, + "loss": 0.0032, + "step": 211950 + }, + { + "epoch": 1.359445600690113, + "grad_norm": 0.14686033129692078, + "learning_rate": 2.8124738172466805e-06, + "loss": 0.0013, + "step": 211960 + }, + { + "epoch": 1.3595097375838991, + "grad_norm": 0.08381103724241257, + "learning_rate": 2.81197053680042e-06, + "loss": 0.0018, + "step": 211970 + }, + { + "epoch": 1.359573874477685, + "grad_norm": 0.17146036028862, + "learning_rate": 2.811467283771735e-06, + "loss": 0.0013, + "step": 211980 + }, + { + "epoch": 1.3596380113714712, + "grad_norm": 0.19006793200969696, + "learning_rate": 2.810964058166929e-06, + "loss": 0.0018, + "step": 211990 + }, + { + "epoch": 1.3597021482652574, + "grad_norm": 0.023726634681224823, + "learning_rate": 2.8104608599923055e-06, + "loss": 0.0029, + "step": 212000 + }, + { + "epoch": 1.3597662851590435, + "grad_norm": 0.07961488515138626, + "learning_rate": 2.8099576892541746e-06, + "loss": 0.0015, + "step": 212010 + }, + { + "epoch": 1.3598304220528297, + "grad_norm": 0.17800407111644745, + "learning_rate": 2.809454545958839e-06, + "loss": 0.0013, + "step": 212020 + }, + { + "epoch": 1.3598945589466156, + "grad_norm": 0.12595905363559723, + "learning_rate": 2.808951430112603e-06, + "loss": 0.0012, + "step": 212030 + }, + { + "epoch": 1.3599586958404017, + "grad_norm": 0.09590614587068558, + "learning_rate": 2.80844834172177e-06, + "loss": 0.0019, + "step": 212040 + }, + { + "epoch": 1.360022832734188, + "grad_norm": 0.03253532946109772, + "learning_rate": 2.8079452807926477e-06, + "loss": 0.0007, + "step": 212050 + }, + { + "epoch": 1.3600869696279738, + "grad_norm": 0.0494966059923172, + "learning_rate": 2.8074422473315376e-06, + "loss": 0.0015, + "step": 212060 + }, + { + "epoch": 1.36015110652176, + "grad_norm": 0.0349554680287838, + "learning_rate": 2.8069392413447425e-06, + "loss": 0.0033, + "step": 212070 + }, + { + "epoch": 1.3602152434155461, + "grad_norm": 0.0674915537238121, + "learning_rate": 2.806436262838565e-06, + "loss": 0.0021, + "step": 212080 + }, + { + "epoch": 1.3602793803093323, + "grad_norm": 0.0689818412065506, + "learning_rate": 2.8059333118193103e-06, + "loss": 0.0014, + "step": 212090 + }, + { + "epoch": 1.3603435172031184, + "grad_norm": 0.052752457559108734, + "learning_rate": 2.80543038829328e-06, + "loss": 0.001, + "step": 212100 + }, + { + "epoch": 1.3604076540969046, + "grad_norm": 0.12120924890041351, + "learning_rate": 2.8049274922667745e-06, + "loss": 0.0013, + "step": 212110 + }, + { + "epoch": 1.3604717909906905, + "grad_norm": 0.23242832720279694, + "learning_rate": 2.8044246237460944e-06, + "loss": 0.0013, + "step": 212120 + }, + { + "epoch": 1.3605359278844766, + "grad_norm": 0.15054942667484283, + "learning_rate": 2.8039217827375455e-06, + "loss": 0.0027, + "step": 212130 + }, + { + "epoch": 1.3606000647782628, + "grad_norm": 0.06705403327941895, + "learning_rate": 2.8034189692474257e-06, + "loss": 0.0011, + "step": 212140 + }, + { + "epoch": 1.3606642016720487, + "grad_norm": 0.06358052045106888, + "learning_rate": 2.802916183282034e-06, + "loss": 0.001, + "step": 212150 + }, + { + "epoch": 1.3607283385658349, + "grad_norm": 0.027282526716589928, + "learning_rate": 2.8024134248476746e-06, + "loss": 0.0043, + "step": 212160 + }, + { + "epoch": 1.360792475459621, + "grad_norm": 0.016235968098044395, + "learning_rate": 2.8019106939506458e-06, + "loss": 0.0017, + "step": 212170 + }, + { + "epoch": 1.3608566123534072, + "grad_norm": 0.10143746435642242, + "learning_rate": 2.8014079905972467e-06, + "loss": 0.0014, + "step": 212180 + }, + { + "epoch": 1.3609207492471933, + "grad_norm": 0.08578982949256897, + "learning_rate": 2.800905314793776e-06, + "loss": 0.0014, + "step": 212190 + }, + { + "epoch": 1.3609848861409792, + "grad_norm": 0.14914195239543915, + "learning_rate": 2.8004026665465324e-06, + "loss": 0.002, + "step": 212200 + }, + { + "epoch": 1.3610490230347654, + "grad_norm": 0.14445620775222778, + "learning_rate": 2.7999000458618176e-06, + "loss": 0.0009, + "step": 212210 + }, + { + "epoch": 1.3611131599285515, + "grad_norm": 0.04894477128982544, + "learning_rate": 2.799397452745927e-06, + "loss": 0.0009, + "step": 212220 + }, + { + "epoch": 1.3611772968223377, + "grad_norm": 0.09352356940507889, + "learning_rate": 2.7988948872051596e-06, + "loss": 0.0014, + "step": 212230 + }, + { + "epoch": 1.3612414337161236, + "grad_norm": 0.021016614511609077, + "learning_rate": 2.798392349245811e-06, + "loss": 0.0011, + "step": 212240 + }, + { + "epoch": 1.3613055706099098, + "grad_norm": 0.038588326424360275, + "learning_rate": 2.7978898388741805e-06, + "loss": 0.001, + "step": 212250 + }, + { + "epoch": 1.361369707503696, + "grad_norm": 0.25138717889785767, + "learning_rate": 2.7973873560965647e-06, + "loss": 0.0015, + "step": 212260 + }, + { + "epoch": 1.361433844397482, + "grad_norm": 0.06818045675754547, + "learning_rate": 2.796884900919258e-06, + "loss": 0.0015, + "step": 212270 + }, + { + "epoch": 1.3614979812912682, + "grad_norm": 0.07317348569631577, + "learning_rate": 2.796382473348559e-06, + "loss": 0.0012, + "step": 212280 + }, + { + "epoch": 1.3615621181850541, + "grad_norm": 0.12382987886667252, + "learning_rate": 2.795880073390763e-06, + "loss": 0.0012, + "step": 212290 + }, + { + "epoch": 1.3616262550788403, + "grad_norm": 0.12995034456253052, + "learning_rate": 2.7953777010521656e-06, + "loss": 0.0044, + "step": 212300 + }, + { + "epoch": 1.3616903919726264, + "grad_norm": 0.016672872006893158, + "learning_rate": 2.7948753563390585e-06, + "loss": 0.0021, + "step": 212310 + }, + { + "epoch": 1.3617545288664124, + "grad_norm": 0.07339946180582047, + "learning_rate": 2.7943730392577416e-06, + "loss": 0.0013, + "step": 212320 + }, + { + "epoch": 1.3618186657601985, + "grad_norm": 0.1623236984014511, + "learning_rate": 2.793870749814506e-06, + "loss": 0.0015, + "step": 212330 + }, + { + "epoch": 1.3618828026539846, + "grad_norm": 0.11059384793043137, + "learning_rate": 2.7933684880156475e-06, + "loss": 0.0015, + "step": 212340 + }, + { + "epoch": 1.3619469395477708, + "grad_norm": 0.06458184868097305, + "learning_rate": 2.792866253867457e-06, + "loss": 0.001, + "step": 212350 + }, + { + "epoch": 1.362011076441557, + "grad_norm": 0.032684240490198135, + "learning_rate": 2.7923640473762307e-06, + "loss": 0.0006, + "step": 212360 + }, + { + "epoch": 1.362075213335343, + "grad_norm": 0.14689889550209045, + "learning_rate": 2.791861868548261e-06, + "loss": 0.003, + "step": 212370 + }, + { + "epoch": 1.362139350229129, + "grad_norm": 0.039106424897909164, + "learning_rate": 2.791359717389841e-06, + "loss": 0.0016, + "step": 212380 + }, + { + "epoch": 1.3622034871229152, + "grad_norm": 0.16570846736431122, + "learning_rate": 2.79085759390726e-06, + "loss": 0.002, + "step": 212390 + }, + { + "epoch": 1.3622676240167013, + "grad_norm": 0.02795327454805374, + "learning_rate": 2.7903554981068135e-06, + "loss": 0.0014, + "step": 212400 + }, + { + "epoch": 1.3623317609104872, + "grad_norm": 0.08838541805744171, + "learning_rate": 2.789853429994792e-06, + "loss": 0.0012, + "step": 212410 + }, + { + "epoch": 1.3623958978042734, + "grad_norm": 0.09232831746339798, + "learning_rate": 2.7893513895774856e-06, + "loss": 0.002, + "step": 212420 + }, + { + "epoch": 1.3624600346980595, + "grad_norm": 0.07343509048223495, + "learning_rate": 2.7888493768611867e-06, + "loss": 0.0023, + "step": 212430 + }, + { + "epoch": 1.3625241715918457, + "grad_norm": 0.044543057680130005, + "learning_rate": 2.7883473918521864e-06, + "loss": 0.0028, + "step": 212440 + }, + { + "epoch": 1.3625883084856318, + "grad_norm": 0.693203330039978, + "learning_rate": 2.787845434556774e-06, + "loss": 0.0024, + "step": 212450 + }, + { + "epoch": 1.3626524453794178, + "grad_norm": 0.20361362397670746, + "learning_rate": 2.787343504981237e-06, + "loss": 0.0016, + "step": 212460 + }, + { + "epoch": 1.362716582273204, + "grad_norm": 0.2686789035797119, + "learning_rate": 2.786841603131869e-06, + "loss": 0.0023, + "step": 212470 + }, + { + "epoch": 1.36278071916699, + "grad_norm": 0.09884592890739441, + "learning_rate": 2.786339729014958e-06, + "loss": 0.0015, + "step": 212480 + }, + { + "epoch": 1.362844856060776, + "grad_norm": 0.021662624552845955, + "learning_rate": 2.7858378826367914e-06, + "loss": 0.0015, + "step": 212490 + }, + { + "epoch": 1.3629089929545621, + "grad_norm": 0.03224967420101166, + "learning_rate": 2.7853360640036577e-06, + "loss": 0.0014, + "step": 212500 + }, + { + "epoch": 1.3629731298483483, + "grad_norm": 0.1176484078168869, + "learning_rate": 2.784834273121847e-06, + "loss": 0.0013, + "step": 212510 + }, + { + "epoch": 1.3630372667421344, + "grad_norm": 0.10865365713834763, + "learning_rate": 2.7843325099976467e-06, + "loss": 0.0011, + "step": 212520 + }, + { + "epoch": 1.3631014036359206, + "grad_norm": 0.026730181649327278, + "learning_rate": 2.7838307746373427e-06, + "loss": 0.0011, + "step": 212530 + }, + { + "epoch": 1.3631655405297067, + "grad_norm": 0.1156671792268753, + "learning_rate": 2.783329067047222e-06, + "loss": 0.0011, + "step": 212540 + }, + { + "epoch": 1.3632296774234927, + "grad_norm": 0.09126152098178864, + "learning_rate": 2.7828273872335738e-06, + "loss": 0.0012, + "step": 212550 + }, + { + "epoch": 1.3632938143172788, + "grad_norm": 0.021603649482131004, + "learning_rate": 2.7823257352026826e-06, + "loss": 0.0019, + "step": 212560 + }, + { + "epoch": 1.363357951211065, + "grad_norm": 0.009608804248273373, + "learning_rate": 2.7818241109608333e-06, + "loss": 0.0019, + "step": 212570 + }, + { + "epoch": 1.3634220881048509, + "grad_norm": 0.07074563205242157, + "learning_rate": 2.781322514514315e-06, + "loss": 0.0034, + "step": 212580 + }, + { + "epoch": 1.363486224998637, + "grad_norm": 0.16082030534744263, + "learning_rate": 2.7808209458694117e-06, + "loss": 0.0013, + "step": 212590 + }, + { + "epoch": 1.3635503618924232, + "grad_norm": 0.06372162699699402, + "learning_rate": 2.7803194050324078e-06, + "loss": 0.0015, + "step": 212600 + }, + { + "epoch": 1.3636144987862093, + "grad_norm": 0.00480593740940094, + "learning_rate": 2.7798178920095865e-06, + "loss": 0.0018, + "step": 212610 + }, + { + "epoch": 1.3636786356799955, + "grad_norm": 0.07020413130521774, + "learning_rate": 2.7793164068072353e-06, + "loss": 0.0011, + "step": 212620 + }, + { + "epoch": 1.3637427725737814, + "grad_norm": 0.0518869049847126, + "learning_rate": 2.7788149494316364e-06, + "loss": 0.0013, + "step": 212630 + }, + { + "epoch": 1.3638069094675676, + "grad_norm": 0.04014595225453377, + "learning_rate": 2.7783135198890743e-06, + "loss": 0.0021, + "step": 212640 + }, + { + "epoch": 1.3638710463613537, + "grad_norm": 0.012117862701416016, + "learning_rate": 2.7778121181858285e-06, + "loss": 0.001, + "step": 212650 + }, + { + "epoch": 1.3639351832551398, + "grad_norm": 0.0709318220615387, + "learning_rate": 2.7773107443281877e-06, + "loss": 0.0023, + "step": 212660 + }, + { + "epoch": 1.3639993201489258, + "grad_norm": 0.013128736987709999, + "learning_rate": 2.776809398322432e-06, + "loss": 0.0012, + "step": 212670 + }, + { + "epoch": 1.364063457042712, + "grad_norm": 0.0026822881773114204, + "learning_rate": 2.7763080801748434e-06, + "loss": 0.0009, + "step": 212680 + }, + { + "epoch": 1.364127593936498, + "grad_norm": 0.008389109745621681, + "learning_rate": 2.7758067898917018e-06, + "loss": 0.0013, + "step": 212690 + }, + { + "epoch": 1.3641917308302842, + "grad_norm": 0.018718862906098366, + "learning_rate": 2.775305527479293e-06, + "loss": 0.0009, + "step": 212700 + }, + { + "epoch": 1.3642558677240704, + "grad_norm": 0.09819506108760834, + "learning_rate": 2.774804292943895e-06, + "loss": 0.002, + "step": 212710 + }, + { + "epoch": 1.3643200046178563, + "grad_norm": 0.008475651033222675, + "learning_rate": 2.774303086291788e-06, + "loss": 0.0011, + "step": 212720 + }, + { + "epoch": 1.3643841415116424, + "grad_norm": 0.05434533953666687, + "learning_rate": 2.773801907529256e-06, + "loss": 0.0009, + "step": 212730 + }, + { + "epoch": 1.3644482784054286, + "grad_norm": 0.017436152324080467, + "learning_rate": 2.773300756662577e-06, + "loss": 0.0009, + "step": 212740 + }, + { + "epoch": 1.3645124152992145, + "grad_norm": 0.05302887409925461, + "learning_rate": 2.772799633698031e-06, + "loss": 0.0015, + "step": 212750 + }, + { + "epoch": 1.3645765521930007, + "grad_norm": 0.09424480050802231, + "learning_rate": 2.7722985386418957e-06, + "loss": 0.0011, + "step": 212760 + }, + { + "epoch": 1.3646406890867868, + "grad_norm": 0.007592364680022001, + "learning_rate": 2.7717974715004534e-06, + "loss": 0.0008, + "step": 212770 + }, + { + "epoch": 1.364704825980573, + "grad_norm": 0.03045915625989437, + "learning_rate": 2.771296432279982e-06, + "loss": 0.001, + "step": 212780 + }, + { + "epoch": 1.3647689628743591, + "grad_norm": 0.040771279484033585, + "learning_rate": 2.7707954209867584e-06, + "loss": 0.0003, + "step": 212790 + }, + { + "epoch": 1.3648330997681453, + "grad_norm": 0.15090760588645935, + "learning_rate": 2.770294437627059e-06, + "loss": 0.0013, + "step": 212800 + }, + { + "epoch": 1.3648972366619312, + "grad_norm": 0.1347474604845047, + "learning_rate": 2.769793482207167e-06, + "loss": 0.0013, + "step": 212810 + }, + { + "epoch": 1.3649613735557173, + "grad_norm": 0.07247631996870041, + "learning_rate": 2.769292554733356e-06, + "loss": 0.0014, + "step": 212820 + }, + { + "epoch": 1.3650255104495035, + "grad_norm": 0.15575724840164185, + "learning_rate": 2.768791655211903e-06, + "loss": 0.0022, + "step": 212830 + }, + { + "epoch": 1.3650896473432894, + "grad_norm": 0.0690620094537735, + "learning_rate": 2.7682907836490834e-06, + "loss": 0.0006, + "step": 212840 + }, + { + "epoch": 1.3651537842370756, + "grad_norm": 0.020767055451869965, + "learning_rate": 2.767789940051178e-06, + "loss": 0.0006, + "step": 212850 + }, + { + "epoch": 1.3652179211308617, + "grad_norm": 0.07257429510354996, + "learning_rate": 2.7672891244244593e-06, + "loss": 0.0007, + "step": 212860 + }, + { + "epoch": 1.3652820580246479, + "grad_norm": 0.007378075271844864, + "learning_rate": 2.7667883367752024e-06, + "loss": 0.0006, + "step": 212870 + }, + { + "epoch": 1.365346194918434, + "grad_norm": 0.09765730053186417, + "learning_rate": 2.7662875771096847e-06, + "loss": 0.0026, + "step": 212880 + }, + { + "epoch": 1.36541033181222, + "grad_norm": 0.04521140828728676, + "learning_rate": 2.765786845434181e-06, + "loss": 0.0018, + "step": 212890 + }, + { + "epoch": 1.365474468706006, + "grad_norm": 0.14008601009845734, + "learning_rate": 2.765286141754964e-06, + "loss": 0.0014, + "step": 212900 + }, + { + "epoch": 1.3655386055997922, + "grad_norm": 0.20395956933498383, + "learning_rate": 2.7647854660783067e-06, + "loss": 0.0012, + "step": 212910 + }, + { + "epoch": 1.3656027424935782, + "grad_norm": 0.11808919161558151, + "learning_rate": 2.7642848184104876e-06, + "loss": 0.0017, + "step": 212920 + }, + { + "epoch": 1.3656668793873643, + "grad_norm": 0.16811446845531464, + "learning_rate": 2.7637841987577775e-06, + "loss": 0.0024, + "step": 212930 + }, + { + "epoch": 1.3657310162811505, + "grad_norm": 0.09902886301279068, + "learning_rate": 2.763283607126449e-06, + "loss": 0.0011, + "step": 212940 + }, + { + "epoch": 1.3657951531749366, + "grad_norm": 0.18342621624469757, + "learning_rate": 2.7627830435227744e-06, + "loss": 0.0019, + "step": 212950 + }, + { + "epoch": 1.3658592900687228, + "grad_norm": 0.03563809022307396, + "learning_rate": 2.762282507953027e-06, + "loss": 0.0015, + "step": 212960 + }, + { + "epoch": 1.365923426962509, + "grad_norm": 0.004312540870159864, + "learning_rate": 2.761782000423481e-06, + "loss": 0.0015, + "step": 212970 + }, + { + "epoch": 1.3659875638562948, + "grad_norm": 0.08165877312421799, + "learning_rate": 2.7612815209404052e-06, + "loss": 0.0017, + "step": 212980 + }, + { + "epoch": 1.366051700750081, + "grad_norm": 0.05772337317466736, + "learning_rate": 2.760781069510071e-06, + "loss": 0.0023, + "step": 212990 + }, + { + "epoch": 1.3661158376438671, + "grad_norm": 0.0420399084687233, + "learning_rate": 2.760280646138752e-06, + "loss": 0.0012, + "step": 213000 + }, + { + "epoch": 1.366179974537653, + "grad_norm": 0.04587164148688316, + "learning_rate": 2.759780250832717e-06, + "loss": 0.001, + "step": 213010 + }, + { + "epoch": 1.3662441114314392, + "grad_norm": 0.1739441156387329, + "learning_rate": 2.759279883598237e-06, + "loss": 0.0011, + "step": 213020 + }, + { + "epoch": 1.3663082483252253, + "grad_norm": 0.29219284653663635, + "learning_rate": 2.7587795444415787e-06, + "loss": 0.0018, + "step": 213030 + }, + { + "epoch": 1.3663723852190115, + "grad_norm": 0.0494915209710598, + "learning_rate": 2.7582792333690174e-06, + "loss": 0.0017, + "step": 213040 + }, + { + "epoch": 1.3664365221127976, + "grad_norm": 0.14492544531822205, + "learning_rate": 2.757778950386819e-06, + "loss": 0.0012, + "step": 213050 + }, + { + "epoch": 1.3665006590065836, + "grad_norm": 0.14945034682750702, + "learning_rate": 2.7572786955012527e-06, + "loss": 0.0008, + "step": 213060 + }, + { + "epoch": 1.3665647959003697, + "grad_norm": 0.1814311295747757, + "learning_rate": 2.756778468718585e-06, + "loss": 0.0023, + "step": 213070 + }, + { + "epoch": 1.3666289327941559, + "grad_norm": 0.11481145769357681, + "learning_rate": 2.7562782700450883e-06, + "loss": 0.0016, + "step": 213080 + }, + { + "epoch": 1.366693069687942, + "grad_norm": 0.05379915237426758, + "learning_rate": 2.7557780994870298e-06, + "loss": 0.0011, + "step": 213090 + }, + { + "epoch": 1.366757206581728, + "grad_norm": 0.09029082208871841, + "learning_rate": 2.7552779570506743e-06, + "loss": 0.0015, + "step": 213100 + }, + { + "epoch": 1.366821343475514, + "grad_norm": 0.20908387005329132, + "learning_rate": 2.754777842742289e-06, + "loss": 0.001, + "step": 213110 + }, + { + "epoch": 1.3668854803693002, + "grad_norm": 0.10487104207277298, + "learning_rate": 2.7542777565681434e-06, + "loss": 0.0008, + "step": 213120 + }, + { + "epoch": 1.3669496172630864, + "grad_norm": 0.13100507855415344, + "learning_rate": 2.7537776985345023e-06, + "loss": 0.0015, + "step": 213130 + }, + { + "epoch": 1.3670137541568725, + "grad_norm": 0.01375681720674038, + "learning_rate": 2.7532776686476302e-06, + "loss": 0.0024, + "step": 213140 + }, + { + "epoch": 1.3670778910506585, + "grad_norm": 0.1232491284608841, + "learning_rate": 2.752777666913797e-06, + "loss": 0.0155, + "step": 213150 + }, + { + "epoch": 1.3671420279444446, + "grad_norm": 0.057872459292411804, + "learning_rate": 2.7522776933392657e-06, + "loss": 0.0014, + "step": 213160 + }, + { + "epoch": 1.3672061648382308, + "grad_norm": 0.23585091531276703, + "learning_rate": 2.751777747930301e-06, + "loss": 0.0025, + "step": 213170 + }, + { + "epoch": 1.3672703017320167, + "grad_norm": 0.10261121392250061, + "learning_rate": 2.751277830693166e-06, + "loss": 0.0016, + "step": 213180 + }, + { + "epoch": 1.3673344386258028, + "grad_norm": 0.1702728569507599, + "learning_rate": 2.7507779416341286e-06, + "loss": 0.0019, + "step": 213190 + }, + { + "epoch": 1.367398575519589, + "grad_norm": 0.1250714808702469, + "learning_rate": 2.7502780807594513e-06, + "loss": 0.0023, + "step": 213200 + }, + { + "epoch": 1.3674627124133751, + "grad_norm": 0.049842942506074905, + "learning_rate": 2.7497782480753976e-06, + "loss": 0.0018, + "step": 213210 + }, + { + "epoch": 1.3675268493071613, + "grad_norm": 0.1522701531648636, + "learning_rate": 2.749278443588229e-06, + "loss": 0.0012, + "step": 213220 + }, + { + "epoch": 1.3675909862009474, + "grad_norm": 0.157740518450737, + "learning_rate": 2.7487786673042115e-06, + "loss": 0.0011, + "step": 213230 + }, + { + "epoch": 1.3676551230947334, + "grad_norm": 0.1830168515443802, + "learning_rate": 2.748278919229607e-06, + "loss": 0.0038, + "step": 213240 + }, + { + "epoch": 1.3677192599885195, + "grad_norm": 0.13043898344039917, + "learning_rate": 2.7477791993706737e-06, + "loss": 0.0017, + "step": 213250 + }, + { + "epoch": 1.3677833968823057, + "grad_norm": 0.11714408546686172, + "learning_rate": 2.74727950773368e-06, + "loss": 0.0012, + "step": 213260 + }, + { + "epoch": 1.3678475337760916, + "grad_norm": 0.10734383761882782, + "learning_rate": 2.7467798443248827e-06, + "loss": 0.0009, + "step": 213270 + }, + { + "epoch": 1.3679116706698777, + "grad_norm": 0.14148421585559845, + "learning_rate": 2.7462802091505452e-06, + "loss": 0.0011, + "step": 213280 + }, + { + "epoch": 1.3679758075636639, + "grad_norm": 0.11280106753110886, + "learning_rate": 2.7457806022169253e-06, + "loss": 0.0017, + "step": 213290 + }, + { + "epoch": 1.36803994445745, + "grad_norm": 0.02165481075644493, + "learning_rate": 2.745281023530287e-06, + "loss": 0.001, + "step": 213300 + }, + { + "epoch": 1.3681040813512362, + "grad_norm": 0.08553720265626907, + "learning_rate": 2.7447814730968904e-06, + "loss": 0.0014, + "step": 213310 + }, + { + "epoch": 1.368168218245022, + "grad_norm": 0.10544779151678085, + "learning_rate": 2.7442819509229924e-06, + "loss": 0.0009, + "step": 213320 + }, + { + "epoch": 1.3682323551388083, + "grad_norm": 0.051266372203826904, + "learning_rate": 2.743782457014852e-06, + "loss": 0.0004, + "step": 213330 + }, + { + "epoch": 1.3682964920325944, + "grad_norm": 0.027301175519824028, + "learning_rate": 2.7432829913787316e-06, + "loss": 0.001, + "step": 213340 + }, + { + "epoch": 1.3683606289263803, + "grad_norm": 0.11781658232212067, + "learning_rate": 2.742783554020888e-06, + "loss": 0.001, + "step": 213350 + }, + { + "epoch": 1.3684247658201665, + "grad_norm": 0.1479956954717636, + "learning_rate": 2.74228414494758e-06, + "loss": 0.0031, + "step": 213360 + }, + { + "epoch": 1.3684889027139526, + "grad_norm": 0.13904689252376556, + "learning_rate": 2.7417847641650642e-06, + "loss": 0.0008, + "step": 213370 + }, + { + "epoch": 1.3685530396077388, + "grad_norm": 0.04022185504436493, + "learning_rate": 2.7412854116795996e-06, + "loss": 0.0008, + "step": 213380 + }, + { + "epoch": 1.368617176501525, + "grad_norm": 0.04929453134536743, + "learning_rate": 2.740786087497444e-06, + "loss": 0.0014, + "step": 213390 + }, + { + "epoch": 1.368681313395311, + "grad_norm": 0.16074256598949432, + "learning_rate": 2.740286791624851e-06, + "loss": 0.0017, + "step": 213400 + }, + { + "epoch": 1.368745450289097, + "grad_norm": 0.02083122730255127, + "learning_rate": 2.7397875240680816e-06, + "loss": 0.0013, + "step": 213410 + }, + { + "epoch": 1.3688095871828831, + "grad_norm": 0.0831194743514061, + "learning_rate": 2.7392882848333902e-06, + "loss": 0.0019, + "step": 213420 + }, + { + "epoch": 1.3688737240766693, + "grad_norm": 0.08221425116062164, + "learning_rate": 2.738789073927032e-06, + "loss": 0.0022, + "step": 213430 + }, + { + "epoch": 1.3689378609704552, + "grad_norm": 0.13809236884117126, + "learning_rate": 2.738289891355261e-06, + "loss": 0.0022, + "step": 213440 + }, + { + "epoch": 1.3690019978642414, + "grad_norm": 0.04812110587954521, + "learning_rate": 2.737790737124336e-06, + "loss": 0.001, + "step": 213450 + }, + { + "epoch": 1.3690661347580275, + "grad_norm": 0.0709865540266037, + "learning_rate": 2.7372916112405097e-06, + "loss": 0.0011, + "step": 213460 + }, + { + "epoch": 1.3691302716518137, + "grad_norm": 0.11143574863672256, + "learning_rate": 2.7367925137100364e-06, + "loss": 0.0008, + "step": 213470 + }, + { + "epoch": 1.3691944085455998, + "grad_norm": 0.21924534440040588, + "learning_rate": 2.7362934445391686e-06, + "loss": 0.0015, + "step": 213480 + }, + { + "epoch": 1.3692585454393857, + "grad_norm": 0.11467649787664413, + "learning_rate": 2.7357944037341644e-06, + "loss": 0.0029, + "step": 213490 + }, + { + "epoch": 1.369322682333172, + "grad_norm": 0.04351959004998207, + "learning_rate": 2.7352953913012737e-06, + "loss": 0.001, + "step": 213500 + }, + { + "epoch": 1.369386819226958, + "grad_norm": 0.11486602574586868, + "learning_rate": 2.7347964072467503e-06, + "loss": 0.0014, + "step": 213510 + }, + { + "epoch": 1.3694509561207442, + "grad_norm": 0.07183624058961868, + "learning_rate": 2.734297451576845e-06, + "loss": 0.0016, + "step": 213520 + }, + { + "epoch": 1.3695150930145301, + "grad_norm": 0.07201708853244781, + "learning_rate": 2.7337985242978144e-06, + "loss": 0.0004, + "step": 213530 + }, + { + "epoch": 1.3695792299083163, + "grad_norm": 0.1011878177523613, + "learning_rate": 2.733299625415907e-06, + "loss": 0.0012, + "step": 213540 + }, + { + "epoch": 1.3696433668021024, + "grad_norm": 0.1852533221244812, + "learning_rate": 2.732800754937374e-06, + "loss": 0.0015, + "step": 213550 + }, + { + "epoch": 1.3697075036958886, + "grad_norm": 0.14826343953609467, + "learning_rate": 2.7323019128684703e-06, + "loss": 0.0017, + "step": 213560 + }, + { + "epoch": 1.3697716405896747, + "grad_norm": 0.03523099422454834, + "learning_rate": 2.731803099215443e-06, + "loss": 0.0005, + "step": 213570 + }, + { + "epoch": 1.3698357774834606, + "grad_norm": 0.16956846415996552, + "learning_rate": 2.7313043139845452e-06, + "loss": 0.0013, + "step": 213580 + }, + { + "epoch": 1.3698999143772468, + "grad_norm": 0.12136631458997726, + "learning_rate": 2.730805557182023e-06, + "loss": 0.0012, + "step": 213590 + }, + { + "epoch": 1.369964051271033, + "grad_norm": 0.15683306753635406, + "learning_rate": 2.730306828814131e-06, + "loss": 0.0015, + "step": 213600 + }, + { + "epoch": 1.3700281881648189, + "grad_norm": 0.00855253729969263, + "learning_rate": 2.729808128887117e-06, + "loss": 0.0028, + "step": 213610 + }, + { + "epoch": 1.370092325058605, + "grad_norm": 0.09091190993785858, + "learning_rate": 2.7293094574072295e-06, + "loss": 0.0015, + "step": 213620 + }, + { + "epoch": 1.3701564619523912, + "grad_norm": 0.06666356325149536, + "learning_rate": 2.728810814380715e-06, + "loss": 0.001, + "step": 213630 + }, + { + "epoch": 1.3702205988461773, + "grad_norm": 0.14627714455127716, + "learning_rate": 2.7283121998138264e-06, + "loss": 0.0009, + "step": 213640 + }, + { + "epoch": 1.3702847357399635, + "grad_norm": 0.13065119087696075, + "learning_rate": 2.7278136137128097e-06, + "loss": 0.0012, + "step": 213650 + }, + { + "epoch": 1.3703488726337496, + "grad_norm": 0.013947075232863426, + "learning_rate": 2.727315056083912e-06, + "loss": 0.0016, + "step": 213660 + }, + { + "epoch": 1.3704130095275355, + "grad_norm": 0.0493902862071991, + "learning_rate": 2.726816526933379e-06, + "loss": 0.0008, + "step": 213670 + }, + { + "epoch": 1.3704771464213217, + "grad_norm": 0.01956510730087757, + "learning_rate": 2.7263180262674617e-06, + "loss": 0.001, + "step": 213680 + }, + { + "epoch": 1.3705412833151078, + "grad_norm": 0.08916755765676498, + "learning_rate": 2.725819554092404e-06, + "loss": 0.0017, + "step": 213690 + }, + { + "epoch": 1.3706054202088938, + "grad_norm": 0.15487140417099, + "learning_rate": 2.7253211104144504e-06, + "loss": 0.0016, + "step": 213700 + }, + { + "epoch": 1.37066955710268, + "grad_norm": 0.017485613003373146, + "learning_rate": 2.724822695239852e-06, + "loss": 0.0017, + "step": 213710 + }, + { + "epoch": 1.370733693996466, + "grad_norm": 0.11065894365310669, + "learning_rate": 2.724324308574849e-06, + "loss": 0.0012, + "step": 213720 + }, + { + "epoch": 1.3707978308902522, + "grad_norm": 0.1534070074558258, + "learning_rate": 2.7238259504256904e-06, + "loss": 0.0015, + "step": 213730 + }, + { + "epoch": 1.3708619677840383, + "grad_norm": 0.0838715210556984, + "learning_rate": 2.7233276207986196e-06, + "loss": 0.0021, + "step": 213740 + }, + { + "epoch": 1.3709261046778243, + "grad_norm": 0.17326146364212036, + "learning_rate": 2.722829319699879e-06, + "loss": 0.0018, + "step": 213750 + }, + { + "epoch": 1.3709902415716104, + "grad_norm": 0.07356669008731842, + "learning_rate": 2.7223310471357165e-06, + "loss": 0.001, + "step": 213760 + }, + { + "epoch": 1.3710543784653966, + "grad_norm": 0.17165057361125946, + "learning_rate": 2.721832803112373e-06, + "loss": 0.0017, + "step": 213770 + }, + { + "epoch": 1.3711185153591827, + "grad_norm": 0.11722222715616226, + "learning_rate": 2.721334587636094e-06, + "loss": 0.0022, + "step": 213780 + }, + { + "epoch": 1.3711826522529686, + "grad_norm": 0.14171120524406433, + "learning_rate": 2.720836400713118e-06, + "loss": 0.0026, + "step": 213790 + }, + { + "epoch": 1.3712467891467548, + "grad_norm": 0.19339559972286224, + "learning_rate": 2.7203382423496938e-06, + "loss": 0.001, + "step": 213800 + }, + { + "epoch": 1.371310926040541, + "grad_norm": 0.06070531904697418, + "learning_rate": 2.71984011255206e-06, + "loss": 0.001, + "step": 213810 + }, + { + "epoch": 1.371375062934327, + "grad_norm": 0.024490779265761375, + "learning_rate": 2.7193420113264566e-06, + "loss": 0.0009, + "step": 213820 + }, + { + "epoch": 1.3714391998281132, + "grad_norm": 0.1451950967311859, + "learning_rate": 2.718843938679131e-06, + "loss": 0.0027, + "step": 213830 + }, + { + "epoch": 1.3715033367218992, + "grad_norm": 0.039366308599710464, + "learning_rate": 2.718345894616321e-06, + "loss": 0.0017, + "step": 213840 + }, + { + "epoch": 1.3715674736156853, + "grad_norm": 0.06168130040168762, + "learning_rate": 2.717847879144267e-06, + "loss": 0.0013, + "step": 213850 + }, + { + "epoch": 1.3716316105094715, + "grad_norm": 0.01753639057278633, + "learning_rate": 2.717349892269208e-06, + "loss": 0.0022, + "step": 213860 + }, + { + "epoch": 1.3716957474032574, + "grad_norm": 0.023625411093235016, + "learning_rate": 2.7168519339973887e-06, + "loss": 0.0009, + "step": 213870 + }, + { + "epoch": 1.3717598842970435, + "grad_norm": 0.2518775165081024, + "learning_rate": 2.716354004335047e-06, + "loss": 0.0018, + "step": 213880 + }, + { + "epoch": 1.3718240211908297, + "grad_norm": 0.05535493046045303, + "learning_rate": 2.71585610328842e-06, + "loss": 0.0008, + "step": 213890 + }, + { + "epoch": 1.3718881580846158, + "grad_norm": 0.18779072165489197, + "learning_rate": 2.7153582308637485e-06, + "loss": 0.0018, + "step": 213900 + }, + { + "epoch": 1.371952294978402, + "grad_norm": 0.06707113981246948, + "learning_rate": 2.714860387067272e-06, + "loss": 0.0004, + "step": 213910 + }, + { + "epoch": 1.3720164318721881, + "grad_norm": 0.12983901798725128, + "learning_rate": 2.714362571905228e-06, + "loss": 0.0012, + "step": 213920 + }, + { + "epoch": 1.372080568765974, + "grad_norm": 0.06924022734165192, + "learning_rate": 2.7138647853838546e-06, + "loss": 0.0013, + "step": 213930 + }, + { + "epoch": 1.3721447056597602, + "grad_norm": 0.06443751603364944, + "learning_rate": 2.713367027509387e-06, + "loss": 0.0014, + "step": 213940 + }, + { + "epoch": 1.3722088425535464, + "grad_norm": 0.05126974359154701, + "learning_rate": 2.712869298288068e-06, + "loss": 0.0024, + "step": 213950 + }, + { + "epoch": 1.3722729794473323, + "grad_norm": 0.10546655207872391, + "learning_rate": 2.71237159772613e-06, + "loss": 0.0018, + "step": 213960 + }, + { + "epoch": 1.3723371163411184, + "grad_norm": 0.18614305555820465, + "learning_rate": 2.7118739258298094e-06, + "loss": 0.001, + "step": 213970 + }, + { + "epoch": 1.3724012532349046, + "grad_norm": 0.05279052257537842, + "learning_rate": 2.711376282605346e-06, + "loss": 0.0012, + "step": 213980 + }, + { + "epoch": 1.3724653901286907, + "grad_norm": 0.005365374032407999, + "learning_rate": 2.710878668058973e-06, + "loss": 0.0015, + "step": 213990 + }, + { + "epoch": 1.3725295270224769, + "grad_norm": 0.06920674443244934, + "learning_rate": 2.7103810821969264e-06, + "loss": 0.0012, + "step": 214000 + }, + { + "epoch": 1.3725936639162628, + "grad_norm": 0.06458396464586258, + "learning_rate": 2.709883525025439e-06, + "loss": 0.0022, + "step": 214010 + }, + { + "epoch": 1.372657800810049, + "grad_norm": 0.10044469684362411, + "learning_rate": 2.7093859965507496e-06, + "loss": 0.0017, + "step": 214020 + }, + { + "epoch": 1.372721937703835, + "grad_norm": 0.1628868579864502, + "learning_rate": 2.708888496779092e-06, + "loss": 0.0017, + "step": 214030 + }, + { + "epoch": 1.372786074597621, + "grad_norm": 0.07906507700681686, + "learning_rate": 2.708391025716698e-06, + "loss": 0.0011, + "step": 214040 + }, + { + "epoch": 1.3728502114914072, + "grad_norm": 0.0659327283501625, + "learning_rate": 2.7078935833698e-06, + "loss": 0.0016, + "step": 214050 + }, + { + "epoch": 1.3729143483851933, + "grad_norm": 0.08163490891456604, + "learning_rate": 2.707396169744636e-06, + "loss": 0.0013, + "step": 214060 + }, + { + "epoch": 1.3729784852789795, + "grad_norm": 0.07006902247667313, + "learning_rate": 2.706898784847436e-06, + "loss": 0.0016, + "step": 214070 + }, + { + "epoch": 1.3730426221727656, + "grad_norm": 0.12226539850234985, + "learning_rate": 2.7064014286844337e-06, + "loss": 0.0015, + "step": 214080 + }, + { + "epoch": 1.3731067590665518, + "grad_norm": 0.07548625767230988, + "learning_rate": 2.7059041012618583e-06, + "loss": 0.0013, + "step": 214090 + }, + { + "epoch": 1.3731708959603377, + "grad_norm": 0.016052765771746635, + "learning_rate": 2.7054068025859463e-06, + "loss": 0.0025, + "step": 214100 + }, + { + "epoch": 1.3732350328541238, + "grad_norm": 0.009808254428207874, + "learning_rate": 2.704909532662927e-06, + "loss": 0.0013, + "step": 214110 + }, + { + "epoch": 1.37329916974791, + "grad_norm": 0.16814613342285156, + "learning_rate": 2.7044122914990296e-06, + "loss": 0.0012, + "step": 214120 + }, + { + "epoch": 1.373363306641696, + "grad_norm": 0.2443917989730835, + "learning_rate": 2.703915079100489e-06, + "loss": 0.0014, + "step": 214130 + }, + { + "epoch": 1.373427443535482, + "grad_norm": 0.10541815310716629, + "learning_rate": 2.7034178954735325e-06, + "loss": 0.0012, + "step": 214140 + }, + { + "epoch": 1.3734915804292682, + "grad_norm": 0.03079073876142502, + "learning_rate": 2.702920740624392e-06, + "loss": 0.0008, + "step": 214150 + }, + { + "epoch": 1.3735557173230544, + "grad_norm": 0.05303191766142845, + "learning_rate": 2.7024236145592942e-06, + "loss": 0.0016, + "step": 214160 + }, + { + "epoch": 1.3736198542168405, + "grad_norm": 0.03506298363208771, + "learning_rate": 2.701926517284472e-06, + "loss": 0.0012, + "step": 214170 + }, + { + "epoch": 1.3736839911106264, + "grad_norm": 0.043985120952129364, + "learning_rate": 2.701429448806153e-06, + "loss": 0.0027, + "step": 214180 + }, + { + "epoch": 1.3737481280044126, + "grad_norm": 0.11024189740419388, + "learning_rate": 2.7009324091305667e-06, + "loss": 0.0018, + "step": 214190 + }, + { + "epoch": 1.3738122648981987, + "grad_norm": 0.006982483901083469, + "learning_rate": 2.7004353982639375e-06, + "loss": 0.0009, + "step": 214200 + }, + { + "epoch": 1.373876401791985, + "grad_norm": 0.02276814542710781, + "learning_rate": 2.6999384162124987e-06, + "loss": 0.0012, + "step": 214210 + }, + { + "epoch": 1.3739405386857708, + "grad_norm": 0.1164424791932106, + "learning_rate": 2.699441462982475e-06, + "loss": 0.0009, + "step": 214220 + }, + { + "epoch": 1.374004675579557, + "grad_norm": 0.09967939555644989, + "learning_rate": 2.698944538580095e-06, + "loss": 0.0007, + "step": 214230 + }, + { + "epoch": 1.3740688124733431, + "grad_norm": 0.10169283300638199, + "learning_rate": 2.6984476430115815e-06, + "loss": 0.001, + "step": 214240 + }, + { + "epoch": 1.3741329493671293, + "grad_norm": 0.202654629945755, + "learning_rate": 2.6979507762831657e-06, + "loss": 0.0013, + "step": 214250 + }, + { + "epoch": 1.3741970862609154, + "grad_norm": 0.17559348046779633, + "learning_rate": 2.6974539384010722e-06, + "loss": 0.0014, + "step": 214260 + }, + { + "epoch": 1.3742612231547013, + "grad_norm": 0.11496882885694504, + "learning_rate": 2.6969571293715247e-06, + "loss": 0.0018, + "step": 214270 + }, + { + "epoch": 1.3743253600484875, + "grad_norm": 0.14585083723068237, + "learning_rate": 2.6964603492007523e-06, + "loss": 0.0011, + "step": 214280 + }, + { + "epoch": 1.3743894969422736, + "grad_norm": 0.15473544597625732, + "learning_rate": 2.6959635978949785e-06, + "loss": 0.0013, + "step": 214290 + }, + { + "epoch": 1.3744536338360596, + "grad_norm": 0.006460663862526417, + "learning_rate": 2.6954668754604264e-06, + "loss": 0.0008, + "step": 214300 + }, + { + "epoch": 1.3745177707298457, + "grad_norm": 0.1418498307466507, + "learning_rate": 2.69497018190332e-06, + "loss": 0.002, + "step": 214310 + }, + { + "epoch": 1.3745819076236319, + "grad_norm": 0.06111471354961395, + "learning_rate": 2.694473517229886e-06, + "loss": 0.0011, + "step": 214320 + }, + { + "epoch": 1.374646044517418, + "grad_norm": 0.07857625931501389, + "learning_rate": 2.6939768814463472e-06, + "loss": 0.0013, + "step": 214330 + }, + { + "epoch": 1.3747101814112042, + "grad_norm": 0.041253603994846344, + "learning_rate": 2.6934802745589265e-06, + "loss": 0.0014, + "step": 214340 + }, + { + "epoch": 1.3747743183049903, + "grad_norm": 0.0759199857711792, + "learning_rate": 2.692983696573844e-06, + "loss": 0.0023, + "step": 214350 + }, + { + "epoch": 1.3748384551987762, + "grad_norm": 0.01071109902113676, + "learning_rate": 2.692487147497327e-06, + "loss": 0.0008, + "step": 214360 + }, + { + "epoch": 1.3749025920925624, + "grad_norm": 0.13840435445308685, + "learning_rate": 2.6919906273355947e-06, + "loss": 0.0016, + "step": 214370 + }, + { + "epoch": 1.3749667289863485, + "grad_norm": 0.11041421443223953, + "learning_rate": 2.691494136094869e-06, + "loss": 0.0013, + "step": 214380 + }, + { + "epoch": 1.3750308658801345, + "grad_norm": 0.08328073471784592, + "learning_rate": 2.69099767378137e-06, + "loss": 0.001, + "step": 214390 + }, + { + "epoch": 1.3750950027739206, + "grad_norm": 0.11724260449409485, + "learning_rate": 2.6905012404013227e-06, + "loss": 0.0015, + "step": 214400 + }, + { + "epoch": 1.3751591396677068, + "grad_norm": 0.005760582629591227, + "learning_rate": 2.690004835960945e-06, + "loss": 0.0007, + "step": 214410 + }, + { + "epoch": 1.375223276561493, + "grad_norm": 0.032109711319208145, + "learning_rate": 2.6895084604664556e-06, + "loss": 0.0013, + "step": 214420 + }, + { + "epoch": 1.375287413455279, + "grad_norm": 0.06785446405410767, + "learning_rate": 2.689012113924079e-06, + "loss": 0.0019, + "step": 214430 + }, + { + "epoch": 1.375351550349065, + "grad_norm": 0.001295348978601396, + "learning_rate": 2.6885157963400315e-06, + "loss": 0.0005, + "step": 214440 + }, + { + "epoch": 1.3754156872428511, + "grad_norm": 0.007197576574981213, + "learning_rate": 2.6880195077205333e-06, + "loss": 0.0028, + "step": 214450 + }, + { + "epoch": 1.3754798241366373, + "grad_norm": 0.0747002586722374, + "learning_rate": 2.6875232480718014e-06, + "loss": 0.0007, + "step": 214460 + }, + { + "epoch": 1.3755439610304232, + "grad_norm": 0.08409067243337631, + "learning_rate": 2.687027017400058e-06, + "loss": 0.0013, + "step": 214470 + }, + { + "epoch": 1.3756080979242093, + "grad_norm": 0.06002112478017807, + "learning_rate": 2.6865308157115167e-06, + "loss": 0.0012, + "step": 214480 + }, + { + "epoch": 1.3756722348179955, + "grad_norm": 0.17536841332912445, + "learning_rate": 2.6860346430123995e-06, + "loss": 0.0023, + "step": 214490 + }, + { + "epoch": 1.3757363717117816, + "grad_norm": 0.04730650782585144, + "learning_rate": 2.685538499308922e-06, + "loss": 0.0016, + "step": 214500 + }, + { + "epoch": 1.3758005086055678, + "grad_norm": 0.03322217985987663, + "learning_rate": 2.6850423846073e-06, + "loss": 0.0015, + "step": 214510 + }, + { + "epoch": 1.375864645499354, + "grad_norm": 0.03648802265524864, + "learning_rate": 2.6845462989137528e-06, + "loss": 0.0009, + "step": 214520 + }, + { + "epoch": 1.3759287823931399, + "grad_norm": 0.04079330340027809, + "learning_rate": 2.6840502422344955e-06, + "loss": 0.0014, + "step": 214530 + }, + { + "epoch": 1.375992919286926, + "grad_norm": 0.0969911515712738, + "learning_rate": 2.683554214575742e-06, + "loss": 0.002, + "step": 214540 + }, + { + "epoch": 1.3760570561807122, + "grad_norm": 0.29174312949180603, + "learning_rate": 2.683058215943711e-06, + "loss": 0.0029, + "step": 214550 + }, + { + "epoch": 1.376121193074498, + "grad_norm": 0.026156960055232048, + "learning_rate": 2.682562246344617e-06, + "loss": 0.0014, + "step": 214560 + }, + { + "epoch": 1.3761853299682842, + "grad_norm": 0.0699620246887207, + "learning_rate": 2.682066305784674e-06, + "loss": 0.0022, + "step": 214570 + }, + { + "epoch": 1.3762494668620704, + "grad_norm": 0.06607739627361298, + "learning_rate": 2.6815703942700954e-06, + "loss": 0.0009, + "step": 214580 + }, + { + "epoch": 1.3763136037558565, + "grad_norm": 0.021971357986330986, + "learning_rate": 2.681074511807098e-06, + "loss": 0.0018, + "step": 214590 + }, + { + "epoch": 1.3763777406496427, + "grad_norm": 0.062438640743494034, + "learning_rate": 2.680578658401895e-06, + "loss": 0.001, + "step": 214600 + }, + { + "epoch": 1.3764418775434286, + "grad_norm": 0.11185724288225174, + "learning_rate": 2.680082834060699e-06, + "loss": 0.0011, + "step": 214610 + }, + { + "epoch": 1.3765060144372148, + "grad_norm": 0.00310879317112267, + "learning_rate": 2.6795870387897205e-06, + "loss": 0.0008, + "step": 214620 + }, + { + "epoch": 1.376570151331001, + "grad_norm": 0.06282266974449158, + "learning_rate": 2.6790912725951766e-06, + "loss": 0.0008, + "step": 214630 + }, + { + "epoch": 1.376634288224787, + "grad_norm": 0.30241039395332336, + "learning_rate": 2.678595535483278e-06, + "loss": 0.0026, + "step": 214640 + }, + { + "epoch": 1.376698425118573, + "grad_norm": 0.035346679389476776, + "learning_rate": 2.6780998274602376e-06, + "loss": 0.0022, + "step": 214650 + }, + { + "epoch": 1.3767625620123591, + "grad_norm": 0.0476689487695694, + "learning_rate": 2.677604148532262e-06, + "loss": 0.0019, + "step": 214660 + }, + { + "epoch": 1.3768266989061453, + "grad_norm": 0.1020970419049263, + "learning_rate": 2.6771084987055694e-06, + "loss": 0.0013, + "step": 214670 + }, + { + "epoch": 1.3768908357999314, + "grad_norm": 0.18499650061130524, + "learning_rate": 2.6766128779863677e-06, + "loss": 0.0018, + "step": 214680 + }, + { + "epoch": 1.3769549726937176, + "grad_norm": 0.028281651437282562, + "learning_rate": 2.6761172863808645e-06, + "loss": 0.0023, + "step": 214690 + }, + { + "epoch": 1.3770191095875035, + "grad_norm": 0.047749750316143036, + "learning_rate": 2.675621723895274e-06, + "loss": 0.0008, + "step": 214700 + }, + { + "epoch": 1.3770832464812897, + "grad_norm": 0.12156499922275543, + "learning_rate": 2.6751261905358055e-06, + "loss": 0.0015, + "step": 214710 + }, + { + "epoch": 1.3771473833750758, + "grad_norm": 0.06353110820055008, + "learning_rate": 2.6746306863086668e-06, + "loss": 0.0007, + "step": 214720 + }, + { + "epoch": 1.3772115202688617, + "grad_norm": 0.09507710486650467, + "learning_rate": 2.674135211220066e-06, + "loss": 0.0014, + "step": 214730 + }, + { + "epoch": 1.3772756571626479, + "grad_norm": 0.18407589197158813, + "learning_rate": 2.673639765276215e-06, + "loss": 0.0021, + "step": 214740 + }, + { + "epoch": 1.377339794056434, + "grad_norm": 0.0212041474878788, + "learning_rate": 2.673144348483321e-06, + "loss": 0.0016, + "step": 214750 + }, + { + "epoch": 1.3774039309502202, + "grad_norm": 0.1254289299249649, + "learning_rate": 2.6726489608475905e-06, + "loss": 0.0022, + "step": 214760 + }, + { + "epoch": 1.3774680678440063, + "grad_norm": 0.2893882989883423, + "learning_rate": 2.6721536023752297e-06, + "loss": 0.0015, + "step": 214770 + }, + { + "epoch": 1.3775322047377925, + "grad_norm": 0.0767577588558197, + "learning_rate": 2.6716582730724505e-06, + "loss": 0.0007, + "step": 214780 + }, + { + "epoch": 1.3775963416315784, + "grad_norm": 0.02579805813729763, + "learning_rate": 2.6711629729454568e-06, + "loss": 0.0007, + "step": 214790 + }, + { + "epoch": 1.3776604785253646, + "grad_norm": 0.05162614583969116, + "learning_rate": 2.6706677020004558e-06, + "loss": 0.0025, + "step": 214800 + }, + { + "epoch": 1.3777246154191507, + "grad_norm": 0.04368935525417328, + "learning_rate": 2.6701724602436506e-06, + "loss": 0.0019, + "step": 214810 + }, + { + "epoch": 1.3777887523129366, + "grad_norm": 0.14431016147136688, + "learning_rate": 2.6696772476812527e-06, + "loss": 0.0012, + "step": 214820 + }, + { + "epoch": 1.3778528892067228, + "grad_norm": 0.05523088201880455, + "learning_rate": 2.669182064319463e-06, + "loss": 0.001, + "step": 214830 + }, + { + "epoch": 1.377917026100509, + "grad_norm": 0.06342064589262009, + "learning_rate": 2.6686869101644867e-06, + "loss": 0.0013, + "step": 214840 + }, + { + "epoch": 1.377981162994295, + "grad_norm": 0.08518742024898529, + "learning_rate": 2.6681917852225315e-06, + "loss": 0.0016, + "step": 214850 + }, + { + "epoch": 1.3780452998880812, + "grad_norm": 0.01703224517405033, + "learning_rate": 2.6676966894998003e-06, + "loss": 0.0012, + "step": 214860 + }, + { + "epoch": 1.3781094367818671, + "grad_norm": 0.10826463997364044, + "learning_rate": 2.6672016230024955e-06, + "loss": 0.001, + "step": 214870 + }, + { + "epoch": 1.3781735736756533, + "grad_norm": 0.1422380805015564, + "learning_rate": 2.6667065857368203e-06, + "loss": 0.002, + "step": 214880 + }, + { + "epoch": 1.3782377105694394, + "grad_norm": 0.10455601662397385, + "learning_rate": 2.6662115777089805e-06, + "loss": 0.0014, + "step": 214890 + }, + { + "epoch": 1.3783018474632254, + "grad_norm": 0.026486804708838463, + "learning_rate": 2.6657165989251777e-06, + "loss": 0.0009, + "step": 214900 + }, + { + "epoch": 1.3783659843570115, + "grad_norm": 0.12323911488056183, + "learning_rate": 2.665221649391614e-06, + "loss": 0.0025, + "step": 214910 + }, + { + "epoch": 1.3784301212507977, + "grad_norm": 0.08645933866500854, + "learning_rate": 2.66472672911449e-06, + "loss": 0.0019, + "step": 214920 + }, + { + "epoch": 1.3784942581445838, + "grad_norm": 0.004943096078932285, + "learning_rate": 2.664231838100011e-06, + "loss": 0.0014, + "step": 214930 + }, + { + "epoch": 1.37855839503837, + "grad_norm": 0.006645245011895895, + "learning_rate": 2.663736976354377e-06, + "loss": 0.0008, + "step": 214940 + }, + { + "epoch": 1.3786225319321561, + "grad_norm": 0.08196337521076202, + "learning_rate": 2.663242143883786e-06, + "loss": 0.0011, + "step": 214950 + }, + { + "epoch": 1.378686668825942, + "grad_norm": 0.04176272079348564, + "learning_rate": 2.6627473406944426e-06, + "loss": 0.0007, + "step": 214960 + }, + { + "epoch": 1.3787508057197282, + "grad_norm": 0.02642265520989895, + "learning_rate": 2.662252566792546e-06, + "loss": 0.0014, + "step": 214970 + }, + { + "epoch": 1.3788149426135143, + "grad_norm": 0.07213565707206726, + "learning_rate": 2.661757822184295e-06, + "loss": 0.0014, + "step": 214980 + }, + { + "epoch": 1.3788790795073003, + "grad_norm": 0.02878113090991974, + "learning_rate": 2.6612631068758876e-06, + "loss": 0.0008, + "step": 214990 + }, + { + "epoch": 1.3789432164010864, + "grad_norm": 0.10633989423513412, + "learning_rate": 2.6607684208735275e-06, + "loss": 0.0007, + "step": 215000 + }, + { + "epoch": 1.3790073532948726, + "grad_norm": 0.05150622874498367, + "learning_rate": 2.6602737641834105e-06, + "loss": 0.0021, + "step": 215010 + }, + { + "epoch": 1.3790714901886587, + "grad_norm": 0.05158638954162598, + "learning_rate": 2.6597791368117354e-06, + "loss": 0.0013, + "step": 215020 + }, + { + "epoch": 1.3791356270824449, + "grad_norm": 0.07146673649549484, + "learning_rate": 2.659284538764698e-06, + "loss": 0.0005, + "step": 215030 + }, + { + "epoch": 1.3791997639762308, + "grad_norm": 0.2744249105453491, + "learning_rate": 2.6587899700485008e-06, + "loss": 0.0029, + "step": 215040 + }, + { + "epoch": 1.379263900870017, + "grad_norm": 0.14671044051647186, + "learning_rate": 2.6582954306693383e-06, + "loss": 0.0013, + "step": 215050 + }, + { + "epoch": 1.379328037763803, + "grad_norm": 0.19606345891952515, + "learning_rate": 2.6578009206334076e-06, + "loss": 0.0015, + "step": 215060 + }, + { + "epoch": 1.3793921746575892, + "grad_norm": 0.07785133272409439, + "learning_rate": 2.657306439946903e-06, + "loss": 0.003, + "step": 215070 + }, + { + "epoch": 1.3794563115513752, + "grad_norm": 0.017010271549224854, + "learning_rate": 2.6568119886160255e-06, + "loss": 0.0012, + "step": 215080 + }, + { + "epoch": 1.3795204484451613, + "grad_norm": 0.047582823783159256, + "learning_rate": 2.6563175666469683e-06, + "loss": 0.0015, + "step": 215090 + }, + { + "epoch": 1.3795845853389475, + "grad_norm": 0.09325523674488068, + "learning_rate": 2.655823174045925e-06, + "loss": 0.0015, + "step": 215100 + }, + { + "epoch": 1.3796487222327336, + "grad_norm": 0.10350356996059418, + "learning_rate": 2.6553288108190943e-06, + "loss": 0.001, + "step": 215110 + }, + { + "epoch": 1.3797128591265198, + "grad_norm": 0.03015591949224472, + "learning_rate": 2.6548344769726696e-06, + "loss": 0.0017, + "step": 215120 + }, + { + "epoch": 1.3797769960203057, + "grad_norm": 0.0348021537065506, + "learning_rate": 2.654340172512845e-06, + "loss": 0.0008, + "step": 215130 + }, + { + "epoch": 1.3798411329140918, + "grad_norm": 0.04149578511714935, + "learning_rate": 2.6538458974458128e-06, + "loss": 0.0015, + "step": 215140 + }, + { + "epoch": 1.379905269807878, + "grad_norm": 0.04008261114358902, + "learning_rate": 2.65335165177777e-06, + "loss": 0.0008, + "step": 215150 + }, + { + "epoch": 1.379969406701664, + "grad_norm": 0.026236629113554955, + "learning_rate": 2.652857435514908e-06, + "loss": 0.001, + "step": 215160 + }, + { + "epoch": 1.38003354359545, + "grad_norm": 0.15502652525901794, + "learning_rate": 2.65236324866342e-06, + "loss": 0.0012, + "step": 215170 + }, + { + "epoch": 1.3800976804892362, + "grad_norm": 0.0263929795473814, + "learning_rate": 2.6518690912294957e-06, + "loss": 0.0016, + "step": 215180 + }, + { + "epoch": 1.3801618173830223, + "grad_norm": 0.16136017441749573, + "learning_rate": 2.651374963219333e-06, + "loss": 0.0017, + "step": 215190 + }, + { + "epoch": 1.3802259542768085, + "grad_norm": 0.10542784631252289, + "learning_rate": 2.6508808646391193e-06, + "loss": 0.0024, + "step": 215200 + }, + { + "epoch": 1.3802900911705946, + "grad_norm": 0.14071311056613922, + "learning_rate": 2.650386795495048e-06, + "loss": 0.0021, + "step": 215210 + }, + { + "epoch": 1.3803542280643806, + "grad_norm": 0.16117265820503235, + "learning_rate": 2.6498927557933072e-06, + "loss": 0.0012, + "step": 215220 + }, + { + "epoch": 1.3804183649581667, + "grad_norm": 0.029635494574904442, + "learning_rate": 2.6493987455400923e-06, + "loss": 0.0014, + "step": 215230 + }, + { + "epoch": 1.3804825018519529, + "grad_norm": 0.11301979422569275, + "learning_rate": 2.64890476474159e-06, + "loss": 0.0023, + "step": 215240 + }, + { + "epoch": 1.3805466387457388, + "grad_norm": 0.03197695314884186, + "learning_rate": 2.6484108134039903e-06, + "loss": 0.0016, + "step": 215250 + }, + { + "epoch": 1.380610775639525, + "grad_norm": 0.05557979643344879, + "learning_rate": 2.6479168915334834e-06, + "loss": 0.0029, + "step": 215260 + }, + { + "epoch": 1.380674912533311, + "grad_norm": 0.13175056874752045, + "learning_rate": 2.647422999136261e-06, + "loss": 0.0018, + "step": 215270 + }, + { + "epoch": 1.3807390494270972, + "grad_norm": 0.06291336566209793, + "learning_rate": 2.646929136218509e-06, + "loss": 0.0015, + "step": 215280 + }, + { + "epoch": 1.3808031863208834, + "grad_norm": 0.12071221321821213, + "learning_rate": 2.6464353027864177e-06, + "loss": 0.0018, + "step": 215290 + }, + { + "epoch": 1.3808673232146693, + "grad_norm": 0.09303078055381775, + "learning_rate": 2.645941498846172e-06, + "loss": 0.0012, + "step": 215300 + }, + { + "epoch": 1.3809314601084555, + "grad_norm": 0.03669220209121704, + "learning_rate": 2.645447724403963e-06, + "loss": 0.0018, + "step": 215310 + }, + { + "epoch": 1.3809955970022416, + "grad_norm": 0.16656337678432465, + "learning_rate": 2.6449539794659775e-06, + "loss": 0.0038, + "step": 215320 + }, + { + "epoch": 1.3810597338960278, + "grad_norm": 0.08406962454319, + "learning_rate": 2.6444602640384008e-06, + "loss": 0.0009, + "step": 215330 + }, + { + "epoch": 1.3811238707898137, + "grad_norm": 0.043212637305259705, + "learning_rate": 2.6439665781274193e-06, + "loss": 0.0014, + "step": 215340 + }, + { + "epoch": 1.3811880076835998, + "grad_norm": 0.025267785415053368, + "learning_rate": 2.6434729217392223e-06, + "loss": 0.0022, + "step": 215350 + }, + { + "epoch": 1.381252144577386, + "grad_norm": 0.15569503605365753, + "learning_rate": 2.6429792948799935e-06, + "loss": 0.0012, + "step": 215360 + }, + { + "epoch": 1.3813162814711721, + "grad_norm": 0.07439172267913818, + "learning_rate": 2.6424856975559177e-06, + "loss": 0.0011, + "step": 215370 + }, + { + "epoch": 1.3813804183649583, + "grad_norm": 0.15520213544368744, + "learning_rate": 2.6419921297731823e-06, + "loss": 0.0012, + "step": 215380 + }, + { + "epoch": 1.3814445552587442, + "grad_norm": 0.012888588942587376, + "learning_rate": 2.64149859153797e-06, + "loss": 0.0012, + "step": 215390 + }, + { + "epoch": 1.3815086921525304, + "grad_norm": 0.05528208240866661, + "learning_rate": 2.641005082856467e-06, + "loss": 0.0022, + "step": 215400 + }, + { + "epoch": 1.3815728290463165, + "grad_norm": 0.13787513971328735, + "learning_rate": 2.6405116037348534e-06, + "loss": 0.0011, + "step": 215410 + }, + { + "epoch": 1.3816369659401024, + "grad_norm": 0.14957444369792938, + "learning_rate": 2.640018154179318e-06, + "loss": 0.0014, + "step": 215420 + }, + { + "epoch": 1.3817011028338886, + "grad_norm": 0.06772604584693909, + "learning_rate": 2.6395247341960424e-06, + "loss": 0.0007, + "step": 215430 + }, + { + "epoch": 1.3817652397276747, + "grad_norm": 0.05124377831816673, + "learning_rate": 2.6390313437912083e-06, + "loss": 0.0015, + "step": 215440 + }, + { + "epoch": 1.3818293766214609, + "grad_norm": 0.13782000541687012, + "learning_rate": 2.638537982970997e-06, + "loss": 0.0012, + "step": 215450 + }, + { + "epoch": 1.381893513515247, + "grad_norm": 0.06755907833576202, + "learning_rate": 2.638044651741594e-06, + "loss": 0.002, + "step": 215460 + }, + { + "epoch": 1.3819576504090332, + "grad_norm": 0.0827302485704422, + "learning_rate": 2.6375513501091803e-06, + "loss": 0.0008, + "step": 215470 + }, + { + "epoch": 1.382021787302819, + "grad_norm": 0.26965776085853577, + "learning_rate": 2.637058078079936e-06, + "loss": 0.0017, + "step": 215480 + }, + { + "epoch": 1.3820859241966053, + "grad_norm": 0.04469255357980728, + "learning_rate": 2.6365648356600414e-06, + "loss": 0.0011, + "step": 215490 + }, + { + "epoch": 1.3821500610903914, + "grad_norm": 0.2724514305591583, + "learning_rate": 2.6360716228556805e-06, + "loss": 0.0014, + "step": 215500 + }, + { + "epoch": 1.3822141979841773, + "grad_norm": 0.05116065591573715, + "learning_rate": 2.6355784396730313e-06, + "loss": 0.0015, + "step": 215510 + }, + { + "epoch": 1.3822783348779635, + "grad_norm": 0.0161726213991642, + "learning_rate": 2.635085286118272e-06, + "loss": 0.0008, + "step": 215520 + }, + { + "epoch": 1.3823424717717496, + "grad_norm": 0.0626625269651413, + "learning_rate": 2.634592162197586e-06, + "loss": 0.0015, + "step": 215530 + }, + { + "epoch": 1.3824066086655358, + "grad_norm": 0.2643812596797943, + "learning_rate": 2.6340990679171508e-06, + "loss": 0.0021, + "step": 215540 + }, + { + "epoch": 1.382470745559322, + "grad_norm": 0.3324837386608124, + "learning_rate": 2.6336060032831457e-06, + "loss": 0.0028, + "step": 215550 + }, + { + "epoch": 1.3825348824531078, + "grad_norm": 0.11031866818666458, + "learning_rate": 2.6331129683017465e-06, + "loss": 0.001, + "step": 215560 + }, + { + "epoch": 1.382599019346894, + "grad_norm": 0.06517376005649567, + "learning_rate": 2.6326199629791347e-06, + "loss": 0.0011, + "step": 215570 + }, + { + "epoch": 1.3826631562406801, + "grad_norm": 0.060064904391765594, + "learning_rate": 2.632126987321488e-06, + "loss": 0.0021, + "step": 215580 + }, + { + "epoch": 1.382727293134466, + "grad_norm": 0.05003609135746956, + "learning_rate": 2.6316340413349817e-06, + "loss": 0.0011, + "step": 215590 + }, + { + "epoch": 1.3827914300282522, + "grad_norm": 0.01165304146707058, + "learning_rate": 2.6311411250257914e-06, + "loss": 0.0009, + "step": 215600 + }, + { + "epoch": 1.3828555669220384, + "grad_norm": 0.0635843276977539, + "learning_rate": 2.6306482384000974e-06, + "loss": 0.0027, + "step": 215610 + }, + { + "epoch": 1.3829197038158245, + "grad_norm": 0.023913035169243813, + "learning_rate": 2.630155381464075e-06, + "loss": 0.0012, + "step": 215620 + }, + { + "epoch": 1.3829838407096107, + "grad_norm": 0.029272524639964104, + "learning_rate": 2.6296625542238995e-06, + "loss": 0.0021, + "step": 215630 + }, + { + "epoch": 1.3830479776033968, + "grad_norm": 0.002349895890802145, + "learning_rate": 2.6291697566857436e-06, + "loss": 0.0009, + "step": 215640 + }, + { + "epoch": 1.3831121144971827, + "grad_norm": 0.038314275443553925, + "learning_rate": 2.6286769888557878e-06, + "loss": 0.002, + "step": 215650 + }, + { + "epoch": 1.383176251390969, + "grad_norm": 0.04881530627608299, + "learning_rate": 2.6281842507402034e-06, + "loss": 0.0018, + "step": 215660 + }, + { + "epoch": 1.383240388284755, + "grad_norm": 0.04537365213036537, + "learning_rate": 2.6276915423451645e-06, + "loss": 0.0006, + "step": 215670 + }, + { + "epoch": 1.383304525178541, + "grad_norm": 0.07577371597290039, + "learning_rate": 2.6271988636768465e-06, + "loss": 0.0017, + "step": 215680 + }, + { + "epoch": 1.3833686620723271, + "grad_norm": 0.18578864634037018, + "learning_rate": 2.6267062147414237e-06, + "loss": 0.0019, + "step": 215690 + }, + { + "epoch": 1.3834327989661133, + "grad_norm": 0.06750933080911636, + "learning_rate": 2.626213595545067e-06, + "loss": 0.0014, + "step": 215700 + }, + { + "epoch": 1.3834969358598994, + "grad_norm": 0.052284397184848785, + "learning_rate": 2.62572100609395e-06, + "loss": 0.0012, + "step": 215710 + }, + { + "epoch": 1.3835610727536856, + "grad_norm": 0.12799391150474548, + "learning_rate": 2.6252284463942468e-06, + "loss": 0.0025, + "step": 215720 + }, + { + "epoch": 1.3836252096474715, + "grad_norm": 0.11345848441123962, + "learning_rate": 2.624735916452129e-06, + "loss": 0.0012, + "step": 215730 + }, + { + "epoch": 1.3836893465412576, + "grad_norm": 0.06445007771253586, + "learning_rate": 2.624243416273767e-06, + "loss": 0.0017, + "step": 215740 + }, + { + "epoch": 1.3837534834350438, + "grad_norm": 0.07878265529870987, + "learning_rate": 2.623750945865331e-06, + "loss": 0.0019, + "step": 215750 + }, + { + "epoch": 1.38381762032883, + "grad_norm": 0.13059911131858826, + "learning_rate": 2.6232585052329964e-06, + "loss": 0.0014, + "step": 215760 + }, + { + "epoch": 1.3838817572226159, + "grad_norm": 0.09587746113538742, + "learning_rate": 2.6227660943829315e-06, + "loss": 0.001, + "step": 215770 + }, + { + "epoch": 1.383945894116402, + "grad_norm": 0.1065930724143982, + "learning_rate": 2.622273713321306e-06, + "loss": 0.0028, + "step": 215780 + }, + { + "epoch": 1.3840100310101882, + "grad_norm": 0.062053028494119644, + "learning_rate": 2.621781362054288e-06, + "loss": 0.0014, + "step": 215790 + }, + { + "epoch": 1.3840741679039743, + "grad_norm": 0.05019015073776245, + "learning_rate": 2.6212890405880516e-06, + "loss": 0.0016, + "step": 215800 + }, + { + "epoch": 1.3841383047977605, + "grad_norm": 0.10837151855230331, + "learning_rate": 2.620796748928763e-06, + "loss": 0.0019, + "step": 215810 + }, + { + "epoch": 1.3842024416915464, + "grad_norm": 0.04327791556715965, + "learning_rate": 2.62030448708259e-06, + "loss": 0.0013, + "step": 215820 + }, + { + "epoch": 1.3842665785853325, + "grad_norm": 0.14705218374729156, + "learning_rate": 2.6198122550557047e-06, + "loss": 0.0013, + "step": 215830 + }, + { + "epoch": 1.3843307154791187, + "grad_norm": 0.062407251447439194, + "learning_rate": 2.619320052854273e-06, + "loss": 0.0017, + "step": 215840 + }, + { + "epoch": 1.3843948523729046, + "grad_norm": 0.0543547049164772, + "learning_rate": 2.618827880484462e-06, + "loss": 0.0021, + "step": 215850 + }, + { + "epoch": 1.3844589892666908, + "grad_norm": 0.05428921431303024, + "learning_rate": 2.6183357379524376e-06, + "loss": 0.0013, + "step": 215860 + }, + { + "epoch": 1.384523126160477, + "grad_norm": 0.04059014469385147, + "learning_rate": 2.6178436252643703e-06, + "loss": 0.0051, + "step": 215870 + }, + { + "epoch": 1.384587263054263, + "grad_norm": 0.05927395448088646, + "learning_rate": 2.617351542426425e-06, + "loss": 0.0026, + "step": 215880 + }, + { + "epoch": 1.3846513999480492, + "grad_norm": 0.24868649244308472, + "learning_rate": 2.6168594894447687e-06, + "loss": 0.0013, + "step": 215890 + }, + { + "epoch": 1.3847155368418353, + "grad_norm": 0.09808999300003052, + "learning_rate": 2.6163674663255635e-06, + "loss": 0.0013, + "step": 215900 + }, + { + "epoch": 1.3847796737356213, + "grad_norm": 0.04929284378886223, + "learning_rate": 2.615875473074979e-06, + "loss": 0.0009, + "step": 215910 + }, + { + "epoch": 1.3848438106294074, + "grad_norm": 0.030347779393196106, + "learning_rate": 2.6153835096991793e-06, + "loss": 0.0012, + "step": 215920 + }, + { + "epoch": 1.3849079475231936, + "grad_norm": 0.1598779261112213, + "learning_rate": 2.614891576204328e-06, + "loss": 0.0043, + "step": 215930 + }, + { + "epoch": 1.3849720844169795, + "grad_norm": 0.3156174123287201, + "learning_rate": 2.614399672596588e-06, + "loss": 0.0023, + "step": 215940 + }, + { + "epoch": 1.3850362213107656, + "grad_norm": 0.14330817759037018, + "learning_rate": 2.613907798882128e-06, + "loss": 0.0018, + "step": 215950 + }, + { + "epoch": 1.3851003582045518, + "grad_norm": 0.30222201347351074, + "learning_rate": 2.6134159550671072e-06, + "loss": 0.0008, + "step": 215960 + }, + { + "epoch": 1.385164495098338, + "grad_norm": 0.07947871834039688, + "learning_rate": 2.612924141157689e-06, + "loss": 0.0016, + "step": 215970 + }, + { + "epoch": 1.385228631992124, + "grad_norm": 0.1046389490365982, + "learning_rate": 2.612432357160039e-06, + "loss": 0.0012, + "step": 215980 + }, + { + "epoch": 1.38529276888591, + "grad_norm": 0.005430108867585659, + "learning_rate": 2.6119406030803186e-06, + "loss": 0.0015, + "step": 215990 + }, + { + "epoch": 1.3853569057796962, + "grad_norm": 0.08242988586425781, + "learning_rate": 2.6114488789246882e-06, + "loss": 0.0005, + "step": 216000 + }, + { + "epoch": 1.3854210426734823, + "grad_norm": 0.03959507867693901, + "learning_rate": 2.610957184699309e-06, + "loss": 0.001, + "step": 216010 + }, + { + "epoch": 1.3854851795672682, + "grad_norm": 0.03189368173480034, + "learning_rate": 2.6104655204103435e-06, + "loss": 0.0014, + "step": 216020 + }, + { + "epoch": 1.3855493164610544, + "grad_norm": 0.0017806560499593616, + "learning_rate": 2.6099738860639547e-06, + "loss": 0.0021, + "step": 216030 + }, + { + "epoch": 1.3856134533548405, + "grad_norm": 0.02551179751753807, + "learning_rate": 2.6094822816663013e-06, + "loss": 0.0007, + "step": 216040 + }, + { + "epoch": 1.3856775902486267, + "grad_norm": 0.03730745241045952, + "learning_rate": 2.6089907072235436e-06, + "loss": 0.0009, + "step": 216050 + }, + { + "epoch": 1.3857417271424128, + "grad_norm": 0.09445857256650925, + "learning_rate": 2.608499162741839e-06, + "loss": 0.0027, + "step": 216060 + }, + { + "epoch": 1.385805864036199, + "grad_norm": 0.12052536755800247, + "learning_rate": 2.608007648227351e-06, + "loss": 0.0018, + "step": 216070 + }, + { + "epoch": 1.385870000929985, + "grad_norm": 0.04084416851401329, + "learning_rate": 2.6075161636862368e-06, + "loss": 0.0009, + "step": 216080 + }, + { + "epoch": 1.385934137823771, + "grad_norm": 0.01814129576086998, + "learning_rate": 2.607024709124653e-06, + "loss": 0.0014, + "step": 216090 + }, + { + "epoch": 1.3859982747175572, + "grad_norm": 0.35136717557907104, + "learning_rate": 2.606533284548762e-06, + "loss": 0.0013, + "step": 216100 + }, + { + "epoch": 1.3860624116113431, + "grad_norm": 0.03940017893910408, + "learning_rate": 2.6060418899647188e-06, + "loss": 0.002, + "step": 216110 + }, + { + "epoch": 1.3861265485051293, + "grad_norm": 0.01433576736599207, + "learning_rate": 2.6055505253786827e-06, + "loss": 0.0008, + "step": 216120 + }, + { + "epoch": 1.3861906853989154, + "grad_norm": 0.016492605209350586, + "learning_rate": 2.605059190796807e-06, + "loss": 0.0024, + "step": 216130 + }, + { + "epoch": 1.3862548222927016, + "grad_norm": 0.08506309986114502, + "learning_rate": 2.604567886225254e-06, + "loss": 0.0016, + "step": 216140 + }, + { + "epoch": 1.3863189591864877, + "grad_norm": 0.081624835729599, + "learning_rate": 2.6040766116701764e-06, + "loss": 0.0014, + "step": 216150 + }, + { + "epoch": 1.3863830960802737, + "grad_norm": 0.04499850049614906, + "learning_rate": 2.603585367137732e-06, + "loss": 0.0009, + "step": 216160 + }, + { + "epoch": 1.3864472329740598, + "grad_norm": 0.025472009554505348, + "learning_rate": 2.6030941526340737e-06, + "loss": 0.0013, + "step": 216170 + }, + { + "epoch": 1.386511369867846, + "grad_norm": 0.5035187005996704, + "learning_rate": 2.60260296816536e-06, + "loss": 0.0027, + "step": 216180 + }, + { + "epoch": 1.386575506761632, + "grad_norm": 0.07505131512880325, + "learning_rate": 2.602111813737745e-06, + "loss": 0.0013, + "step": 216190 + }, + { + "epoch": 1.386639643655418, + "grad_norm": 0.13377723097801208, + "learning_rate": 2.6016206893573825e-06, + "loss": 0.0015, + "step": 216200 + }, + { + "epoch": 1.3867037805492042, + "grad_norm": 0.28014901280403137, + "learning_rate": 2.601129595030425e-06, + "loss": 0.0014, + "step": 216210 + }, + { + "epoch": 1.3867679174429903, + "grad_norm": 0.008496072143316269, + "learning_rate": 2.60063853076303e-06, + "loss": 0.0023, + "step": 216220 + }, + { + "epoch": 1.3868320543367765, + "grad_norm": 0.08742309361696243, + "learning_rate": 2.6001474965613493e-06, + "loss": 0.0008, + "step": 216230 + }, + { + "epoch": 1.3868961912305626, + "grad_norm": 0.10961107164621353, + "learning_rate": 2.5996564924315332e-06, + "loss": 0.0024, + "step": 216240 + }, + { + "epoch": 1.3869603281243486, + "grad_norm": 0.1274971067905426, + "learning_rate": 2.5991655183797394e-06, + "loss": 0.0028, + "step": 216250 + }, + { + "epoch": 1.3870244650181347, + "grad_norm": 0.07134983688592911, + "learning_rate": 2.5986745744121168e-06, + "loss": 0.0016, + "step": 216260 + }, + { + "epoch": 1.3870886019119208, + "grad_norm": 0.07663287222385406, + "learning_rate": 2.5981836605348174e-06, + "loss": 0.0007, + "step": 216270 + }, + { + "epoch": 1.3871527388057068, + "grad_norm": 0.15232987701892853, + "learning_rate": 2.597692776753993e-06, + "loss": 0.0028, + "step": 216280 + }, + { + "epoch": 1.387216875699493, + "grad_norm": 0.04419165849685669, + "learning_rate": 2.597201923075796e-06, + "loss": 0.0006, + "step": 216290 + }, + { + "epoch": 1.387281012593279, + "grad_norm": 0.051558490842580795, + "learning_rate": 2.596711099506376e-06, + "loss": 0.0016, + "step": 216300 + }, + { + "epoch": 1.3873451494870652, + "grad_norm": 0.041023433208465576, + "learning_rate": 2.596220306051884e-06, + "loss": 0.0012, + "step": 216310 + }, + { + "epoch": 1.3874092863808514, + "grad_norm": 0.04962332919239998, + "learning_rate": 2.595729542718467e-06, + "loss": 0.0015, + "step": 216320 + }, + { + "epoch": 1.3874734232746375, + "grad_norm": 0.023958439007401466, + "learning_rate": 2.59523880951228e-06, + "loss": 0.0007, + "step": 216330 + }, + { + "epoch": 1.3875375601684234, + "grad_norm": 0.0671641007065773, + "learning_rate": 2.5947481064394688e-06, + "loss": 0.0006, + "step": 216340 + }, + { + "epoch": 1.3876016970622096, + "grad_norm": 0.13715045154094696, + "learning_rate": 2.5942574335061826e-06, + "loss": 0.001, + "step": 216350 + }, + { + "epoch": 1.3876658339559957, + "grad_norm": 0.1365310549736023, + "learning_rate": 2.593766790718568e-06, + "loss": 0.0012, + "step": 216360 + }, + { + "epoch": 1.3877299708497817, + "grad_norm": 0.06390999257564545, + "learning_rate": 2.593276178082778e-06, + "loss": 0.0014, + "step": 216370 + }, + { + "epoch": 1.3877941077435678, + "grad_norm": 0.047890402376651764, + "learning_rate": 2.5927855956049576e-06, + "loss": 0.0015, + "step": 216380 + }, + { + "epoch": 1.387858244637354, + "grad_norm": 0.12084142118692398, + "learning_rate": 2.5922950432912515e-06, + "loss": 0.0011, + "step": 216390 + }, + { + "epoch": 1.3879223815311401, + "grad_norm": 0.027084503322839737, + "learning_rate": 2.5918045211478116e-06, + "loss": 0.0017, + "step": 216400 + }, + { + "epoch": 1.3879865184249263, + "grad_norm": 0.1095394641160965, + "learning_rate": 2.5913140291807814e-06, + "loss": 0.0017, + "step": 216410 + }, + { + "epoch": 1.3880506553187122, + "grad_norm": 0.06118927150964737, + "learning_rate": 2.590823567396309e-06, + "loss": 0.0012, + "step": 216420 + }, + { + "epoch": 1.3881147922124983, + "grad_norm": 0.13544799387454987, + "learning_rate": 2.5903331358005364e-06, + "loss": 0.0024, + "step": 216430 + }, + { + "epoch": 1.3881789291062845, + "grad_norm": 0.04669766500592232, + "learning_rate": 2.5898427343996136e-06, + "loss": 0.0017, + "step": 216440 + }, + { + "epoch": 1.3882430660000704, + "grad_norm": 0.17931942641735077, + "learning_rate": 2.589352363199684e-06, + "loss": 0.0016, + "step": 216450 + }, + { + "epoch": 1.3883072028938566, + "grad_norm": 0.042885906994342804, + "learning_rate": 2.588862022206893e-06, + "loss": 0.001, + "step": 216460 + }, + { + "epoch": 1.3883713397876427, + "grad_norm": 0.025600217282772064, + "learning_rate": 2.588371711427381e-06, + "loss": 0.0035, + "step": 216470 + }, + { + "epoch": 1.3884354766814289, + "grad_norm": 0.23572660982608795, + "learning_rate": 2.587881430867298e-06, + "loss": 0.0009, + "step": 216480 + }, + { + "epoch": 1.388499613575215, + "grad_norm": 0.025940345600247383, + "learning_rate": 2.5873911805327835e-06, + "loss": 0.0011, + "step": 216490 + }, + { + "epoch": 1.3885637504690012, + "grad_norm": 0.06433112919330597, + "learning_rate": 2.586900960429982e-06, + "loss": 0.0011, + "step": 216500 + }, + { + "epoch": 1.388627887362787, + "grad_norm": 0.06253568828105927, + "learning_rate": 2.5864107705650356e-06, + "loss": 0.0012, + "step": 216510 + }, + { + "epoch": 1.3886920242565732, + "grad_norm": 0.04966779425740242, + "learning_rate": 2.585920610944087e-06, + "loss": 0.0029, + "step": 216520 + }, + { + "epoch": 1.3887561611503594, + "grad_norm": 0.0927979052066803, + "learning_rate": 2.58543048157328e-06, + "loss": 0.0015, + "step": 216530 + }, + { + "epoch": 1.3888202980441453, + "grad_norm": 0.029799990355968475, + "learning_rate": 2.584940382458752e-06, + "loss": 0.0026, + "step": 216540 + }, + { + "epoch": 1.3888844349379315, + "grad_norm": 0.023917008191347122, + "learning_rate": 2.584450313606649e-06, + "loss": 0.0006, + "step": 216550 + }, + { + "epoch": 1.3889485718317176, + "grad_norm": 0.009702787734568119, + "learning_rate": 2.5839602750231106e-06, + "loss": 0.0007, + "step": 216560 + }, + { + "epoch": 1.3890127087255038, + "grad_norm": 0.092452771961689, + "learning_rate": 2.5834702667142765e-06, + "loss": 0.0012, + "step": 216570 + }, + { + "epoch": 1.38907684561929, + "grad_norm": 0.057914964854717255, + "learning_rate": 2.5829802886862845e-06, + "loss": 0.0011, + "step": 216580 + }, + { + "epoch": 1.3891409825130758, + "grad_norm": 0.11667285114526749, + "learning_rate": 2.582490340945279e-06, + "loss": 0.0015, + "step": 216590 + }, + { + "epoch": 1.389205119406862, + "grad_norm": 0.12485719472169876, + "learning_rate": 2.582000423497397e-06, + "loss": 0.0015, + "step": 216600 + }, + { + "epoch": 1.3892692563006481, + "grad_norm": 0.06877756863832474, + "learning_rate": 2.5815105363487792e-06, + "loss": 0.0014, + "step": 216610 + }, + { + "epoch": 1.3893333931944343, + "grad_norm": 0.10036937892436981, + "learning_rate": 2.5810206795055593e-06, + "loss": 0.0021, + "step": 216620 + }, + { + "epoch": 1.3893975300882202, + "grad_norm": 0.053731124848127365, + "learning_rate": 2.5805308529738814e-06, + "loss": 0.0013, + "step": 216630 + }, + { + "epoch": 1.3894616669820063, + "grad_norm": 0.03762771934270859, + "learning_rate": 2.5800410567598823e-06, + "loss": 0.0012, + "step": 216640 + }, + { + "epoch": 1.3895258038757925, + "grad_norm": 0.0443277582526207, + "learning_rate": 2.5795512908696953e-06, + "loss": 0.0017, + "step": 216650 + }, + { + "epoch": 1.3895899407695786, + "grad_norm": 0.054403021931648254, + "learning_rate": 2.579061555309462e-06, + "loss": 0.0018, + "step": 216660 + }, + { + "epoch": 1.3896540776633648, + "grad_norm": 0.09955320507287979, + "learning_rate": 2.5785718500853186e-06, + "loss": 0.0021, + "step": 216670 + }, + { + "epoch": 1.3897182145571507, + "grad_norm": 0.22564105689525604, + "learning_rate": 2.5780821752034003e-06, + "loss": 0.0012, + "step": 216680 + }, + { + "epoch": 1.3897823514509369, + "grad_norm": 0.11402849853038788, + "learning_rate": 2.5775925306698408e-06, + "loss": 0.0018, + "step": 216690 + }, + { + "epoch": 1.389846488344723, + "grad_norm": 0.1865815371274948, + "learning_rate": 2.57710291649078e-06, + "loss": 0.0011, + "step": 216700 + }, + { + "epoch": 1.389910625238509, + "grad_norm": 0.036226801574230194, + "learning_rate": 2.576613332672352e-06, + "loss": 0.0016, + "step": 216710 + }, + { + "epoch": 1.389974762132295, + "grad_norm": 0.09096764773130417, + "learning_rate": 2.5761237792206904e-06, + "loss": 0.0013, + "step": 216720 + }, + { + "epoch": 1.3900388990260812, + "grad_norm": 0.03716425597667694, + "learning_rate": 2.575634256141929e-06, + "loss": 0.0007, + "step": 216730 + }, + { + "epoch": 1.3901030359198674, + "grad_norm": 0.04131145030260086, + "learning_rate": 2.5751447634422037e-06, + "loss": 0.0013, + "step": 216740 + }, + { + "epoch": 1.3901671728136535, + "grad_norm": 0.06567847728729248, + "learning_rate": 2.5746553011276488e-06, + "loss": 0.0015, + "step": 216750 + }, + { + "epoch": 1.3902313097074397, + "grad_norm": 0.03881232813000679, + "learning_rate": 2.574165869204396e-06, + "loss": 0.002, + "step": 216760 + }, + { + "epoch": 1.3902954466012256, + "grad_norm": 0.15144580602645874, + "learning_rate": 2.5736764676785758e-06, + "loss": 0.0014, + "step": 216770 + }, + { + "epoch": 1.3903595834950118, + "grad_norm": 0.0609845295548439, + "learning_rate": 2.5731870965563244e-06, + "loss": 0.0017, + "step": 216780 + }, + { + "epoch": 1.390423720388798, + "grad_norm": 0.07088928669691086, + "learning_rate": 2.572697755843775e-06, + "loss": 0.0009, + "step": 216790 + }, + { + "epoch": 1.3904878572825838, + "grad_norm": 0.16477276384830475, + "learning_rate": 2.5722084455470564e-06, + "loss": 0.0023, + "step": 216800 + }, + { + "epoch": 1.39055199417637, + "grad_norm": 0.021267952397465706, + "learning_rate": 2.5717191656723005e-06, + "loss": 0.0005, + "step": 216810 + }, + { + "epoch": 1.3906161310701561, + "grad_norm": 0.03198913484811783, + "learning_rate": 2.5712299162256404e-06, + "loss": 0.001, + "step": 216820 + }, + { + "epoch": 1.3906802679639423, + "grad_norm": 0.025601960718631744, + "learning_rate": 2.5707406972132053e-06, + "loss": 0.0022, + "step": 216830 + }, + { + "epoch": 1.3907444048577284, + "grad_norm": 0.0331542082130909, + "learning_rate": 2.5702515086411252e-06, + "loss": 0.0008, + "step": 216840 + }, + { + "epoch": 1.3908085417515144, + "grad_norm": 0.1769125908613205, + "learning_rate": 2.5697623505155283e-06, + "loss": 0.0014, + "step": 216850 + }, + { + "epoch": 1.3908726786453005, + "grad_norm": 0.04075758531689644, + "learning_rate": 2.569273222842548e-06, + "loss": 0.0014, + "step": 216860 + }, + { + "epoch": 1.3909368155390867, + "grad_norm": 0.09046512097120285, + "learning_rate": 2.568784125628312e-06, + "loss": 0.0016, + "step": 216870 + }, + { + "epoch": 1.3910009524328728, + "grad_norm": 0.05227779224514961, + "learning_rate": 2.568295058878948e-06, + "loss": 0.0012, + "step": 216880 + }, + { + "epoch": 1.3910650893266587, + "grad_norm": 0.084907665848732, + "learning_rate": 2.5678060226005826e-06, + "loss": 0.0014, + "step": 216890 + }, + { + "epoch": 1.3911292262204449, + "grad_norm": 0.08666041493415833, + "learning_rate": 2.567317016799348e-06, + "loss": 0.0012, + "step": 216900 + }, + { + "epoch": 1.391193363114231, + "grad_norm": 0.16974017024040222, + "learning_rate": 2.56682804148137e-06, + "loss": 0.0015, + "step": 216910 + }, + { + "epoch": 1.3912575000080172, + "grad_norm": 0.16871021687984467, + "learning_rate": 2.5663390966527734e-06, + "loss": 0.0013, + "step": 216920 + }, + { + "epoch": 1.3913216369018033, + "grad_norm": 0.034537214785814285, + "learning_rate": 2.5658501823196903e-06, + "loss": 0.0016, + "step": 216930 + }, + { + "epoch": 1.3913857737955893, + "grad_norm": 0.038643766194581985, + "learning_rate": 2.5653612984882425e-06, + "loss": 0.0008, + "step": 216940 + }, + { + "epoch": 1.3914499106893754, + "grad_norm": 0.09909739345312119, + "learning_rate": 2.564872445164559e-06, + "loss": 0.0011, + "step": 216950 + }, + { + "epoch": 1.3915140475831616, + "grad_norm": 0.07292510569095612, + "learning_rate": 2.564383622354761e-06, + "loss": 0.0018, + "step": 216960 + }, + { + "epoch": 1.3915781844769475, + "grad_norm": 0.028376858681440353, + "learning_rate": 2.5638948300649795e-06, + "loss": 0.0015, + "step": 216970 + }, + { + "epoch": 1.3916423213707336, + "grad_norm": 0.016274534165859222, + "learning_rate": 2.563406068301337e-06, + "loss": 0.0017, + "step": 216980 + }, + { + "epoch": 1.3917064582645198, + "grad_norm": 0.022052470594644547, + "learning_rate": 2.5629173370699575e-06, + "loss": 0.0006, + "step": 216990 + }, + { + "epoch": 1.391770595158306, + "grad_norm": 0.019932232797145844, + "learning_rate": 2.5624286363769635e-06, + "loss": 0.0015, + "step": 217000 + }, + { + "epoch": 1.391834732052092, + "grad_norm": 0.07324390858411789, + "learning_rate": 2.561939966228483e-06, + "loss": 0.0013, + "step": 217010 + }, + { + "epoch": 1.3918988689458782, + "grad_norm": 0.1380704641342163, + "learning_rate": 2.5614513266306374e-06, + "loss": 0.0019, + "step": 217020 + }, + { + "epoch": 1.3919630058396641, + "grad_norm": 0.09188283234834671, + "learning_rate": 2.560962717589549e-06, + "loss": 0.0006, + "step": 217030 + }, + { + "epoch": 1.3920271427334503, + "grad_norm": 0.07266037911176682, + "learning_rate": 2.5604741391113384e-06, + "loss": 0.0014, + "step": 217040 + }, + { + "epoch": 1.3920912796272364, + "grad_norm": 0.077426478266716, + "learning_rate": 2.5599855912021334e-06, + "loss": 0.0016, + "step": 217050 + }, + { + "epoch": 1.3921554165210224, + "grad_norm": 0.053078003227710724, + "learning_rate": 2.5594970738680526e-06, + "loss": 0.0026, + "step": 217060 + }, + { + "epoch": 1.3922195534148085, + "grad_norm": 0.002474989043548703, + "learning_rate": 2.559008587115216e-06, + "loss": 0.0007, + "step": 217070 + }, + { + "epoch": 1.3922836903085947, + "grad_norm": 0.06136370450258255, + "learning_rate": 2.5585201309497477e-06, + "loss": 0.0036, + "step": 217080 + }, + { + "epoch": 1.3923478272023808, + "grad_norm": 0.11670518666505814, + "learning_rate": 2.558031705377767e-06, + "loss": 0.001, + "step": 217090 + }, + { + "epoch": 1.392411964096167, + "grad_norm": 0.0774407833814621, + "learning_rate": 2.5575433104053945e-06, + "loss": 0.0012, + "step": 217100 + }, + { + "epoch": 1.392476100989953, + "grad_norm": 0.18761223554611206, + "learning_rate": 2.557054946038748e-06, + "loss": 0.0011, + "step": 217110 + }, + { + "epoch": 1.392540237883739, + "grad_norm": 0.1756909042596817, + "learning_rate": 2.5565666122839507e-06, + "loss": 0.0013, + "step": 217120 + }, + { + "epoch": 1.3926043747775252, + "grad_norm": 0.044896550476551056, + "learning_rate": 2.5560783091471203e-06, + "loss": 0.0015, + "step": 217130 + }, + { + "epoch": 1.3926685116713111, + "grad_norm": 0.255946546792984, + "learning_rate": 2.555590036634375e-06, + "loss": 0.0028, + "step": 217140 + }, + { + "epoch": 1.3927326485650973, + "grad_norm": 0.06978725641965866, + "learning_rate": 2.555101794751832e-06, + "loss": 0.0014, + "step": 217150 + }, + { + "epoch": 1.3927967854588834, + "grad_norm": 0.03100358322262764, + "learning_rate": 2.554613583505613e-06, + "loss": 0.0016, + "step": 217160 + }, + { + "epoch": 1.3928609223526696, + "grad_norm": 0.04570413753390312, + "learning_rate": 2.5541254029018324e-06, + "loss": 0.0016, + "step": 217170 + }, + { + "epoch": 1.3929250592464557, + "grad_norm": 0.0653790831565857, + "learning_rate": 2.5536372529466093e-06, + "loss": 0.0016, + "step": 217180 + }, + { + "epoch": 1.3929891961402419, + "grad_norm": 0.19819806516170502, + "learning_rate": 2.553149133646058e-06, + "loss": 0.0021, + "step": 217190 + }, + { + "epoch": 1.3930533330340278, + "grad_norm": 0.1131213903427124, + "learning_rate": 2.5526610450062983e-06, + "loss": 0.0012, + "step": 217200 + }, + { + "epoch": 1.393117469927814, + "grad_norm": 0.09659481793642044, + "learning_rate": 2.5521729870334454e-06, + "loss": 0.001, + "step": 217210 + }, + { + "epoch": 1.3931816068216, + "grad_norm": 0.05584048852324486, + "learning_rate": 2.5516849597336125e-06, + "loss": 0.0009, + "step": 217220 + }, + { + "epoch": 1.393245743715386, + "grad_norm": 0.12968671321868896, + "learning_rate": 2.551196963112918e-06, + "loss": 0.0025, + "step": 217230 + }, + { + "epoch": 1.3933098806091722, + "grad_norm": 0.15531879663467407, + "learning_rate": 2.5507089971774758e-06, + "loss": 0.0015, + "step": 217240 + }, + { + "epoch": 1.3933740175029583, + "grad_norm": 0.0011866064742207527, + "learning_rate": 2.5502210619334013e-06, + "loss": 0.0009, + "step": 217250 + }, + { + "epoch": 1.3934381543967445, + "grad_norm": 0.06492505222558975, + "learning_rate": 2.5497331573868046e-06, + "loss": 0.0014, + "step": 217260 + }, + { + "epoch": 1.3935022912905306, + "grad_norm": 0.07042653113603592, + "learning_rate": 2.5492452835438055e-06, + "loss": 0.0017, + "step": 217270 + }, + { + "epoch": 1.3935664281843165, + "grad_norm": 0.008883239701390266, + "learning_rate": 2.5487574404105135e-06, + "loss": 0.0012, + "step": 217280 + }, + { + "epoch": 1.3936305650781027, + "grad_norm": 0.5036296248435974, + "learning_rate": 2.5482696279930435e-06, + "loss": 0.001, + "step": 217290 + }, + { + "epoch": 1.3936947019718888, + "grad_norm": 0.07283367216587067, + "learning_rate": 2.547781846297505e-06, + "loss": 0.0012, + "step": 217300 + }, + { + "epoch": 1.393758838865675, + "grad_norm": 0.017748216167092323, + "learning_rate": 2.5472940953300146e-06, + "loss": 0.002, + "step": 217310 + }, + { + "epoch": 1.393822975759461, + "grad_norm": 0.10280224680900574, + "learning_rate": 2.546806375096682e-06, + "loss": 0.0009, + "step": 217320 + }, + { + "epoch": 1.393887112653247, + "grad_norm": 0.017901591956615448, + "learning_rate": 2.5463186856036194e-06, + "loss": 0.0017, + "step": 217330 + }, + { + "epoch": 1.3939512495470332, + "grad_norm": 0.050007883459329605, + "learning_rate": 2.5458310268569343e-06, + "loss": 0.0013, + "step": 217340 + }, + { + "epoch": 1.3940153864408193, + "grad_norm": 0.08111511170864105, + "learning_rate": 2.545343398862743e-06, + "loss": 0.001, + "step": 217350 + }, + { + "epoch": 1.3940795233346055, + "grad_norm": 0.13947226107120514, + "learning_rate": 2.544855801627153e-06, + "loss": 0.0018, + "step": 217360 + }, + { + "epoch": 1.3941436602283914, + "grad_norm": 0.04099639132618904, + "learning_rate": 2.5443682351562727e-06, + "loss": 0.0009, + "step": 217370 + }, + { + "epoch": 1.3942077971221776, + "grad_norm": 0.0020156067330390215, + "learning_rate": 2.543880699456215e-06, + "loss": 0.0006, + "step": 217380 + }, + { + "epoch": 1.3942719340159637, + "grad_norm": 0.001230254303663969, + "learning_rate": 2.543393194533088e-06, + "loss": 0.0008, + "step": 217390 + }, + { + "epoch": 1.3943360709097496, + "grad_norm": 0.07310499995946884, + "learning_rate": 2.542905720393e-06, + "loss": 0.0019, + "step": 217400 + }, + { + "epoch": 1.3944002078035358, + "grad_norm": 0.11221767961978912, + "learning_rate": 2.542418277042057e-06, + "loss": 0.0008, + "step": 217410 + }, + { + "epoch": 1.394464344697322, + "grad_norm": 0.19872452318668365, + "learning_rate": 2.5419308644863717e-06, + "loss": 0.0018, + "step": 217420 + }, + { + "epoch": 1.394528481591108, + "grad_norm": 0.0393829271197319, + "learning_rate": 2.541443482732049e-06, + "loss": 0.0011, + "step": 217430 + }, + { + "epoch": 1.3945926184848942, + "grad_norm": 0.012060633860528469, + "learning_rate": 2.540956131785197e-06, + "loss": 0.0013, + "step": 217440 + }, + { + "epoch": 1.3946567553786804, + "grad_norm": 0.0534462034702301, + "learning_rate": 2.54046881165192e-06, + "loss": 0.0018, + "step": 217450 + }, + { + "epoch": 1.3947208922724663, + "grad_norm": 0.013612331822514534, + "learning_rate": 2.5399815223383286e-06, + "loss": 0.0011, + "step": 217460 + }, + { + "epoch": 1.3947850291662525, + "grad_norm": 0.05973077192902565, + "learning_rate": 2.539494263850527e-06, + "loss": 0.001, + "step": 217470 + }, + { + "epoch": 1.3948491660600386, + "grad_norm": 0.0985378846526146, + "learning_rate": 2.5390070361946205e-06, + "loss": 0.0012, + "step": 217480 + }, + { + "epoch": 1.3949133029538245, + "grad_norm": 0.22525636851787567, + "learning_rate": 2.538519839376713e-06, + "loss": 0.0013, + "step": 217490 + }, + { + "epoch": 1.3949774398476107, + "grad_norm": 0.1863296777009964, + "learning_rate": 2.5380326734029127e-06, + "loss": 0.001, + "step": 217500 + }, + { + "epoch": 1.3950415767413968, + "grad_norm": 0.0970177873969078, + "learning_rate": 2.5375455382793223e-06, + "loss": 0.0016, + "step": 217510 + }, + { + "epoch": 1.395105713635183, + "grad_norm": 0.06821413338184357, + "learning_rate": 2.5370584340120453e-06, + "loss": 0.0016, + "step": 217520 + }, + { + "epoch": 1.3951698505289691, + "grad_norm": 0.06517302244901657, + "learning_rate": 2.5365713606071874e-06, + "loss": 0.0009, + "step": 217530 + }, + { + "epoch": 1.395233987422755, + "grad_norm": 0.012658648192882538, + "learning_rate": 2.5360843180708516e-06, + "loss": 0.0014, + "step": 217540 + }, + { + "epoch": 1.3952981243165412, + "grad_norm": 0.16197888553142548, + "learning_rate": 2.5355973064091378e-06, + "loss": 0.0014, + "step": 217550 + }, + { + "epoch": 1.3953622612103274, + "grad_norm": 0.11316743493080139, + "learning_rate": 2.535110325628153e-06, + "loss": 0.0012, + "step": 217560 + }, + { + "epoch": 1.3954263981041133, + "grad_norm": 0.42287132143974304, + "learning_rate": 2.5346233757339956e-06, + "loss": 0.0028, + "step": 217570 + }, + { + "epoch": 1.3954905349978994, + "grad_norm": 0.06374204158782959, + "learning_rate": 2.534136456732771e-06, + "loss": 0.002, + "step": 217580 + }, + { + "epoch": 1.3955546718916856, + "grad_norm": 0.18178769946098328, + "learning_rate": 2.5336495686305796e-06, + "loss": 0.0011, + "step": 217590 + }, + { + "epoch": 1.3956188087854717, + "grad_norm": 0.147816002368927, + "learning_rate": 2.5331627114335213e-06, + "loss": 0.0023, + "step": 217600 + }, + { + "epoch": 1.3956829456792579, + "grad_norm": 0.2753295302391052, + "learning_rate": 2.532675885147695e-06, + "loss": 0.0028, + "step": 217610 + }, + { + "epoch": 1.395747082573044, + "grad_norm": 0.07842117547988892, + "learning_rate": 2.5321890897792057e-06, + "loss": 0.0015, + "step": 217620 + }, + { + "epoch": 1.39581121946683, + "grad_norm": 0.04347194358706474, + "learning_rate": 2.53170232533415e-06, + "loss": 0.0018, + "step": 217630 + }, + { + "epoch": 1.395875356360616, + "grad_norm": 0.11565347760915756, + "learning_rate": 2.531215591818627e-06, + "loss": 0.001, + "step": 217640 + }, + { + "epoch": 1.3959394932544023, + "grad_norm": 0.2027328461408615, + "learning_rate": 2.5307288892387395e-06, + "loss": 0.0023, + "step": 217650 + }, + { + "epoch": 1.3960036301481882, + "grad_norm": 0.03963017091155052, + "learning_rate": 2.5302422176005824e-06, + "loss": 0.0011, + "step": 217660 + }, + { + "epoch": 1.3960677670419743, + "grad_norm": 0.22279760241508484, + "learning_rate": 2.5297555769102564e-06, + "loss": 0.0024, + "step": 217670 + }, + { + "epoch": 1.3961319039357605, + "grad_norm": 0.12143002450466156, + "learning_rate": 2.5292689671738567e-06, + "loss": 0.0011, + "step": 217680 + }, + { + "epoch": 1.3961960408295466, + "grad_norm": 0.046854518353939056, + "learning_rate": 2.5287823883974837e-06, + "loss": 0.0032, + "step": 217690 + }, + { + "epoch": 1.3962601777233328, + "grad_norm": 0.06825122237205505, + "learning_rate": 2.528295840587234e-06, + "loss": 0.0015, + "step": 217700 + }, + { + "epoch": 1.3963243146171187, + "grad_norm": 0.10842904448509216, + "learning_rate": 2.5278093237492042e-06, + "loss": 0.0023, + "step": 217710 + }, + { + "epoch": 1.3963884515109048, + "grad_norm": 0.016050230711698532, + "learning_rate": 2.527322837889488e-06, + "loss": 0.0012, + "step": 217720 + }, + { + "epoch": 1.396452588404691, + "grad_norm": 0.1674279272556305, + "learning_rate": 2.526836383014186e-06, + "loss": 0.0011, + "step": 217730 + }, + { + "epoch": 1.3965167252984771, + "grad_norm": 0.2426510751247406, + "learning_rate": 2.526349959129392e-06, + "loss": 0.002, + "step": 217740 + }, + { + "epoch": 1.396580862192263, + "grad_norm": 0.0029317704029381275, + "learning_rate": 2.5258635662412007e-06, + "loss": 0.0015, + "step": 217750 + }, + { + "epoch": 1.3966449990860492, + "grad_norm": 0.15908031165599823, + "learning_rate": 2.525377204355705e-06, + "loss": 0.0011, + "step": 217760 + }, + { + "epoch": 1.3967091359798354, + "grad_norm": 0.008479459211230278, + "learning_rate": 2.5248908734790035e-06, + "loss": 0.0011, + "step": 217770 + }, + { + "epoch": 1.3967732728736215, + "grad_norm": 0.024743076413869858, + "learning_rate": 2.5244045736171883e-06, + "loss": 0.0014, + "step": 217780 + }, + { + "epoch": 1.3968374097674077, + "grad_norm": 0.16069556772708893, + "learning_rate": 2.5239183047763514e-06, + "loss": 0.0012, + "step": 217790 + }, + { + "epoch": 1.3969015466611936, + "grad_norm": 0.06781712919473648, + "learning_rate": 2.523432066962589e-06, + "loss": 0.0005, + "step": 217800 + }, + { + "epoch": 1.3969656835549797, + "grad_norm": 0.13242220878601074, + "learning_rate": 2.5229458601819934e-06, + "loss": 0.0016, + "step": 217810 + }, + { + "epoch": 1.397029820448766, + "grad_norm": 0.03151949495077133, + "learning_rate": 2.522459684440657e-06, + "loss": 0.0014, + "step": 217820 + }, + { + "epoch": 1.3970939573425518, + "grad_norm": 0.09716480225324631, + "learning_rate": 2.5219735397446686e-06, + "loss": 0.0006, + "step": 217830 + }, + { + "epoch": 1.397158094236338, + "grad_norm": 0.12030621618032455, + "learning_rate": 2.521487426100125e-06, + "loss": 0.0009, + "step": 217840 + }, + { + "epoch": 1.3972222311301241, + "grad_norm": 0.06489705294370651, + "learning_rate": 2.521001343513115e-06, + "loss": 0.0007, + "step": 217850 + }, + { + "epoch": 1.3972863680239103, + "grad_norm": 0.13355490565299988, + "learning_rate": 2.5205152919897304e-06, + "loss": 0.0017, + "step": 217860 + }, + { + "epoch": 1.3973505049176964, + "grad_norm": 0.020991241559386253, + "learning_rate": 2.5200292715360588e-06, + "loss": 0.0007, + "step": 217870 + }, + { + "epoch": 1.3974146418114826, + "grad_norm": 0.10901031643152237, + "learning_rate": 2.519543282158195e-06, + "loss": 0.0012, + "step": 217880 + }, + { + "epoch": 1.3974787787052685, + "grad_norm": 0.04401480406522751, + "learning_rate": 2.5190573238622266e-06, + "loss": 0.0021, + "step": 217890 + }, + { + "epoch": 1.3975429155990546, + "grad_norm": 0.026542415842413902, + "learning_rate": 2.5185713966542435e-06, + "loss": 0.001, + "step": 217900 + }, + { + "epoch": 1.3976070524928408, + "grad_norm": 0.09319578111171722, + "learning_rate": 2.5180855005403316e-06, + "loss": 0.0016, + "step": 217910 + }, + { + "epoch": 1.3976711893866267, + "grad_norm": 0.09715227037668228, + "learning_rate": 2.517599635526584e-06, + "loss": 0.0012, + "step": 217920 + }, + { + "epoch": 1.3977353262804129, + "grad_norm": 0.157813161611557, + "learning_rate": 2.517113801619088e-06, + "loss": 0.0022, + "step": 217930 + }, + { + "epoch": 1.397799463174199, + "grad_norm": 0.09481701999902725, + "learning_rate": 2.5166279988239283e-06, + "loss": 0.0014, + "step": 217940 + }, + { + "epoch": 1.3978636000679852, + "grad_norm": 0.046457622200250626, + "learning_rate": 2.5161422271471964e-06, + "loss": 0.0011, + "step": 217950 + }, + { + "epoch": 1.3979277369617713, + "grad_norm": 0.0647912546992302, + "learning_rate": 2.5156564865949774e-06, + "loss": 0.0027, + "step": 217960 + }, + { + "epoch": 1.3979918738555572, + "grad_norm": 0.05202125012874603, + "learning_rate": 2.515170777173358e-06, + "loss": 0.0016, + "step": 217970 + }, + { + "epoch": 1.3980560107493434, + "grad_norm": 0.13212570548057556, + "learning_rate": 2.514685098888423e-06, + "loss": 0.0014, + "step": 217980 + }, + { + "epoch": 1.3981201476431295, + "grad_norm": 0.019459405913949013, + "learning_rate": 2.514199451746262e-06, + "loss": 0.0008, + "step": 217990 + }, + { + "epoch": 1.3981842845369155, + "grad_norm": 0.054449573159217834, + "learning_rate": 2.513713835752959e-06, + "loss": 0.0008, + "step": 218000 + }, + { + "epoch": 1.3982484214307016, + "grad_norm": 0.08177845180034637, + "learning_rate": 2.513228250914598e-06, + "loss": 0.001, + "step": 218010 + }, + { + "epoch": 1.3983125583244878, + "grad_norm": 0.05067718029022217, + "learning_rate": 2.512742697237262e-06, + "loss": 0.0014, + "step": 218020 + }, + { + "epoch": 1.398376695218274, + "grad_norm": 0.1587323546409607, + "learning_rate": 2.5122571747270397e-06, + "loss": 0.0009, + "step": 218030 + }, + { + "epoch": 1.39844083211206, + "grad_norm": 0.08408209681510925, + "learning_rate": 2.511771683390013e-06, + "loss": 0.0014, + "step": 218040 + }, + { + "epoch": 1.3985049690058462, + "grad_norm": 0.011681445874273777, + "learning_rate": 2.5112862232322654e-06, + "loss": 0.0009, + "step": 218050 + }, + { + "epoch": 1.3985691058996321, + "grad_norm": 0.034590236842632294, + "learning_rate": 2.510800794259879e-06, + "loss": 0.0012, + "step": 218060 + }, + { + "epoch": 1.3986332427934183, + "grad_norm": 0.03324037417769432, + "learning_rate": 2.5103153964789386e-06, + "loss": 0.0009, + "step": 218070 + }, + { + "epoch": 1.3986973796872044, + "grad_norm": 0.019120942801237106, + "learning_rate": 2.5098300298955257e-06, + "loss": 0.0019, + "step": 218080 + }, + { + "epoch": 1.3987615165809903, + "grad_norm": 0.1737535446882248, + "learning_rate": 2.5093446945157206e-06, + "loss": 0.0012, + "step": 218090 + }, + { + "epoch": 1.3988256534747765, + "grad_norm": 0.04208459332585335, + "learning_rate": 2.508859390345608e-06, + "loss": 0.0018, + "step": 218100 + }, + { + "epoch": 1.3988897903685626, + "grad_norm": 0.04920925945043564, + "learning_rate": 2.508374117391268e-06, + "loss": 0.0015, + "step": 218110 + }, + { + "epoch": 1.3989539272623488, + "grad_norm": 0.027007663622498512, + "learning_rate": 2.507888875658781e-06, + "loss": 0.0015, + "step": 218120 + }, + { + "epoch": 1.399018064156135, + "grad_norm": 0.09326251596212387, + "learning_rate": 2.5074036651542256e-06, + "loss": 0.0014, + "step": 218130 + }, + { + "epoch": 1.3990822010499209, + "grad_norm": 0.033007364720106125, + "learning_rate": 2.506918485883686e-06, + "loss": 0.0011, + "step": 218140 + }, + { + "epoch": 1.399146337943707, + "grad_norm": 0.3016238510608673, + "learning_rate": 2.506433337853239e-06, + "loss": 0.0016, + "step": 218150 + }, + { + "epoch": 1.3992104748374932, + "grad_norm": 0.025880996137857437, + "learning_rate": 2.5059482210689645e-06, + "loss": 0.0011, + "step": 218160 + }, + { + "epoch": 1.3992746117312793, + "grad_norm": 0.0889752134680748, + "learning_rate": 2.505463135536939e-06, + "loss": 0.0008, + "step": 218170 + }, + { + "epoch": 1.3993387486250652, + "grad_norm": 0.08893129974603653, + "learning_rate": 2.5049780812632457e-06, + "loss": 0.0018, + "step": 218180 + }, + { + "epoch": 1.3994028855188514, + "grad_norm": 0.05968914180994034, + "learning_rate": 2.50449305825396e-06, + "loss": 0.0017, + "step": 218190 + }, + { + "epoch": 1.3994670224126375, + "grad_norm": 0.06970475614070892, + "learning_rate": 2.50400806651516e-06, + "loss": 0.0008, + "step": 218200 + }, + { + "epoch": 1.3995311593064237, + "grad_norm": 0.16024905443191528, + "learning_rate": 2.503523106052921e-06, + "loss": 0.0015, + "step": 218210 + }, + { + "epoch": 1.3995952962002098, + "grad_norm": 0.06745795160531998, + "learning_rate": 2.5030381768733223e-06, + "loss": 0.0008, + "step": 218220 + }, + { + "epoch": 1.3996594330939958, + "grad_norm": 0.07381200790405273, + "learning_rate": 2.5025532789824414e-06, + "loss": 0.0008, + "step": 218230 + }, + { + "epoch": 1.399723569987782, + "grad_norm": 0.08183572441339493, + "learning_rate": 2.5020684123863503e-06, + "loss": 0.0012, + "step": 218240 + }, + { + "epoch": 1.399787706881568, + "grad_norm": 0.0365438275039196, + "learning_rate": 2.501583577091129e-06, + "loss": 0.0011, + "step": 218250 + }, + { + "epoch": 1.399851843775354, + "grad_norm": 0.0748719871044159, + "learning_rate": 2.501098773102851e-06, + "loss": 0.001, + "step": 218260 + }, + { + "epoch": 1.3999159806691401, + "grad_norm": 0.04895782470703125, + "learning_rate": 2.5006140004275913e-06, + "loss": 0.0015, + "step": 218270 + }, + { + "epoch": 1.3999801175629263, + "grad_norm": 0.022944876924157143, + "learning_rate": 2.500129259071423e-06, + "loss": 0.0009, + "step": 218280 + }, + { + "epoch": 1.4000442544567124, + "grad_norm": 0.057753268629312515, + "learning_rate": 2.4996445490404237e-06, + "loss": 0.0006, + "step": 218290 + }, + { + "epoch": 1.4001083913504986, + "grad_norm": 0.05829066038131714, + "learning_rate": 2.499159870340665e-06, + "loss": 0.0007, + "step": 218300 + }, + { + "epoch": 1.4001725282442847, + "grad_norm": 0.11744900047779083, + "learning_rate": 2.498675222978218e-06, + "loss": 0.0007, + "step": 218310 + }, + { + "epoch": 1.4002366651380707, + "grad_norm": 0.047571297734975815, + "learning_rate": 2.4981906069591603e-06, + "loss": 0.0011, + "step": 218320 + }, + { + "epoch": 1.4003008020318568, + "grad_norm": 0.06034592166543007, + "learning_rate": 2.497706022289561e-06, + "loss": 0.0008, + "step": 218330 + }, + { + "epoch": 1.400364938925643, + "grad_norm": 0.12814146280288696, + "learning_rate": 2.4972214689754948e-06, + "loss": 0.001, + "step": 218340 + }, + { + "epoch": 1.4004290758194289, + "grad_norm": 0.005401868373155594, + "learning_rate": 2.496736947023033e-06, + "loss": 0.0011, + "step": 218350 + }, + { + "epoch": 1.400493212713215, + "grad_norm": 0.17886695265769958, + "learning_rate": 2.4962524564382435e-06, + "loss": 0.0053, + "step": 218360 + }, + { + "epoch": 1.4005573496070012, + "grad_norm": 0.005190698895603418, + "learning_rate": 2.4957679972272023e-06, + "loss": 0.0014, + "step": 218370 + }, + { + "epoch": 1.4006214865007873, + "grad_norm": 0.0008826800039969385, + "learning_rate": 2.4952835693959782e-06, + "loss": 0.0007, + "step": 218380 + }, + { + "epoch": 1.4006856233945735, + "grad_norm": 0.15037375688552856, + "learning_rate": 2.494799172950641e-06, + "loss": 0.001, + "step": 218390 + }, + { + "epoch": 1.4007497602883594, + "grad_norm": 0.32553181052207947, + "learning_rate": 2.4943148078972586e-06, + "loss": 0.0011, + "step": 218400 + }, + { + "epoch": 1.4008138971821456, + "grad_norm": 0.011083441786468029, + "learning_rate": 2.4938304742419045e-06, + "loss": 0.0007, + "step": 218410 + }, + { + "epoch": 1.4008780340759317, + "grad_norm": 0.16851891577243805, + "learning_rate": 2.4933461719906455e-06, + "loss": 0.001, + "step": 218420 + }, + { + "epoch": 1.4009421709697178, + "grad_norm": 0.19955788552761078, + "learning_rate": 2.49286190114955e-06, + "loss": 0.001, + "step": 218430 + }, + { + "epoch": 1.4010063078635038, + "grad_norm": 0.07683160156011581, + "learning_rate": 2.492377661724686e-06, + "loss": 0.0007, + "step": 218440 + }, + { + "epoch": 1.40107044475729, + "grad_norm": 0.025752320885658264, + "learning_rate": 2.4918934537221228e-06, + "loss": 0.0011, + "step": 218450 + }, + { + "epoch": 1.401134581651076, + "grad_norm": 0.25075531005859375, + "learning_rate": 2.4914092771479276e-06, + "loss": 0.0028, + "step": 218460 + }, + { + "epoch": 1.4011987185448622, + "grad_norm": 0.07408151775598526, + "learning_rate": 2.4909251320081674e-06, + "loss": 0.001, + "step": 218470 + }, + { + "epoch": 1.4012628554386484, + "grad_norm": 0.10768114030361176, + "learning_rate": 2.490441018308906e-06, + "loss": 0.002, + "step": 218480 + }, + { + "epoch": 1.4013269923324343, + "grad_norm": 0.21442459523677826, + "learning_rate": 2.489956936056214e-06, + "loss": 0.0013, + "step": 218490 + }, + { + "epoch": 1.4013911292262204, + "grad_norm": 0.20392319560050964, + "learning_rate": 2.489472885256156e-06, + "loss": 0.0013, + "step": 218500 + }, + { + "epoch": 1.4014552661200066, + "grad_norm": 0.022533521056175232, + "learning_rate": 2.4889888659147947e-06, + "loss": 0.0006, + "step": 218510 + }, + { + "epoch": 1.4015194030137925, + "grad_norm": 0.0441557802259922, + "learning_rate": 2.4885048780382e-06, + "loss": 0.0043, + "step": 218520 + }, + { + "epoch": 1.4015835399075787, + "grad_norm": 0.04519323632121086, + "learning_rate": 2.4880209216324335e-06, + "loss": 0.0013, + "step": 218530 + }, + { + "epoch": 1.4016476768013648, + "grad_norm": 0.07286236435174942, + "learning_rate": 2.4875369967035607e-06, + "loss": 0.0011, + "step": 218540 + }, + { + "epoch": 1.401711813695151, + "grad_norm": 0.02912171557545662, + "learning_rate": 2.487053103257642e-06, + "loss": 0.0011, + "step": 218550 + }, + { + "epoch": 1.4017759505889371, + "grad_norm": 0.16293995082378387, + "learning_rate": 2.486569241300747e-06, + "loss": 0.0021, + "step": 218560 + }, + { + "epoch": 1.4018400874827233, + "grad_norm": 0.1001424491405487, + "learning_rate": 2.4860854108389353e-06, + "loss": 0.0015, + "step": 218570 + }, + { + "epoch": 1.4019042243765092, + "grad_norm": 0.09254918247461319, + "learning_rate": 2.48560161187827e-06, + "loss": 0.0011, + "step": 218580 + }, + { + "epoch": 1.4019683612702953, + "grad_norm": 0.024134088307619095, + "learning_rate": 2.4851178444248112e-06, + "loss": 0.0011, + "step": 218590 + }, + { + "epoch": 1.4020324981640815, + "grad_norm": 0.03021530620753765, + "learning_rate": 2.484634108484626e-06, + "loss": 0.0016, + "step": 218600 + }, + { + "epoch": 1.4020966350578674, + "grad_norm": 0.05822308361530304, + "learning_rate": 2.4841504040637726e-06, + "loss": 0.0008, + "step": 218610 + }, + { + "epoch": 1.4021607719516536, + "grad_norm": 0.5333791971206665, + "learning_rate": 2.483666731168313e-06, + "loss": 0.0039, + "step": 218620 + }, + { + "epoch": 1.4022249088454397, + "grad_norm": 0.05283120274543762, + "learning_rate": 2.4831830898043054e-06, + "loss": 0.0013, + "step": 218630 + }, + { + "epoch": 1.4022890457392259, + "grad_norm": 0.12147217243909836, + "learning_rate": 2.4826994799778143e-06, + "loss": 0.0007, + "step": 218640 + }, + { + "epoch": 1.402353182633012, + "grad_norm": 0.07120306044816971, + "learning_rate": 2.4822159016948975e-06, + "loss": 0.0013, + "step": 218650 + }, + { + "epoch": 1.402417319526798, + "grad_norm": 0.0498415008187294, + "learning_rate": 2.4817323549616134e-06, + "loss": 0.0014, + "step": 218660 + }, + { + "epoch": 1.402481456420584, + "grad_norm": 0.04400544986128807, + "learning_rate": 2.4812488397840246e-06, + "loss": 0.0009, + "step": 218670 + }, + { + "epoch": 1.4025455933143702, + "grad_norm": 0.023885253816843033, + "learning_rate": 2.480765356168188e-06, + "loss": 0.0006, + "step": 218680 + }, + { + "epoch": 1.4026097302081562, + "grad_norm": 0.09544313699007034, + "learning_rate": 2.480281904120162e-06, + "loss": 0.0027, + "step": 218690 + }, + { + "epoch": 1.4026738671019423, + "grad_norm": 0.034635111689567566, + "learning_rate": 2.479798483646002e-06, + "loss": 0.0008, + "step": 218700 + }, + { + "epoch": 1.4027380039957285, + "grad_norm": 0.10411494970321655, + "learning_rate": 2.4793150947517706e-06, + "loss": 0.0025, + "step": 218710 + }, + { + "epoch": 1.4028021408895146, + "grad_norm": 0.06943442672491074, + "learning_rate": 2.4788317374435227e-06, + "loss": 0.002, + "step": 218720 + }, + { + "epoch": 1.4028662777833008, + "grad_norm": 0.11532283574342728, + "learning_rate": 2.478348411727315e-06, + "loss": 0.0011, + "step": 218730 + }, + { + "epoch": 1.402930414677087, + "grad_norm": 0.08214882761240005, + "learning_rate": 2.4778651176092018e-06, + "loss": 0.0024, + "step": 218740 + }, + { + "epoch": 1.4029945515708728, + "grad_norm": 0.1970735341310501, + "learning_rate": 2.4773818550952435e-06, + "loss": 0.0009, + "step": 218750 + }, + { + "epoch": 1.403058688464659, + "grad_norm": 0.6348500847816467, + "learning_rate": 2.4768986241914926e-06, + "loss": 0.0009, + "step": 218760 + }, + { + "epoch": 1.4031228253584451, + "grad_norm": 0.059380240738391876, + "learning_rate": 2.476415424904004e-06, + "loss": 0.0009, + "step": 218770 + }, + { + "epoch": 1.403186962252231, + "grad_norm": 0.22803273797035217, + "learning_rate": 2.4759322572388357e-06, + "loss": 0.0011, + "step": 218780 + }, + { + "epoch": 1.4032510991460172, + "grad_norm": 0.1198737621307373, + "learning_rate": 2.475449121202039e-06, + "loss": 0.0012, + "step": 218790 + }, + { + "epoch": 1.4033152360398033, + "grad_norm": 0.1235053688287735, + "learning_rate": 2.4749660167996702e-06, + "loss": 0.0007, + "step": 218800 + }, + { + "epoch": 1.4033793729335895, + "grad_norm": 0.034360114485025406, + "learning_rate": 2.47448294403778e-06, + "loss": 0.0017, + "step": 218810 + }, + { + "epoch": 1.4034435098273756, + "grad_norm": 0.16802871227264404, + "learning_rate": 2.473999902922425e-06, + "loss": 0.0015, + "step": 218820 + }, + { + "epoch": 1.4035076467211616, + "grad_norm": 0.001983431400731206, + "learning_rate": 2.4735168934596567e-06, + "loss": 0.0007, + "step": 218830 + }, + { + "epoch": 1.4035717836149477, + "grad_norm": 0.12489792704582214, + "learning_rate": 2.4730339156555266e-06, + "loss": 0.0008, + "step": 218840 + }, + { + "epoch": 1.4036359205087339, + "grad_norm": 0.37852925062179565, + "learning_rate": 2.4725509695160863e-06, + "loss": 0.0026, + "step": 218850 + }, + { + "epoch": 1.40370005740252, + "grad_norm": 0.1418200582265854, + "learning_rate": 2.472068055047391e-06, + "loss": 0.0022, + "step": 218860 + }, + { + "epoch": 1.403764194296306, + "grad_norm": 0.02384890243411064, + "learning_rate": 2.4715851722554886e-06, + "loss": 0.0022, + "step": 218870 + }, + { + "epoch": 1.403828331190092, + "grad_norm": 0.10498286038637161, + "learning_rate": 2.471102321146432e-06, + "loss": 0.0009, + "step": 218880 + }, + { + "epoch": 1.4038924680838782, + "grad_norm": 0.05484259873628616, + "learning_rate": 2.470619501726268e-06, + "loss": 0.001, + "step": 218890 + }, + { + "epoch": 1.4039566049776644, + "grad_norm": 0.11952408403158188, + "learning_rate": 2.470136714001052e-06, + "loss": 0.0046, + "step": 218900 + }, + { + "epoch": 1.4040207418714505, + "grad_norm": 0.0364367812871933, + "learning_rate": 2.46965395797683e-06, + "loss": 0.0006, + "step": 218910 + }, + { + "epoch": 1.4040848787652365, + "grad_norm": 0.06499797850847244, + "learning_rate": 2.469171233659651e-06, + "loss": 0.0013, + "step": 218920 + }, + { + "epoch": 1.4041490156590226, + "grad_norm": 0.05752279981970787, + "learning_rate": 2.468688541055568e-06, + "loss": 0.0008, + "step": 218930 + }, + { + "epoch": 1.4042131525528088, + "grad_norm": 0.09352587163448334, + "learning_rate": 2.4682058801706256e-06, + "loss": 0.001, + "step": 218940 + }, + { + "epoch": 1.4042772894465947, + "grad_norm": 0.00825769267976284, + "learning_rate": 2.4677232510108732e-06, + "loss": 0.0038, + "step": 218950 + }, + { + "epoch": 1.4043414263403808, + "grad_norm": 0.062322016805410385, + "learning_rate": 2.467240653582356e-06, + "loss": 0.001, + "step": 218960 + }, + { + "epoch": 1.404405563234167, + "grad_norm": 0.09102189540863037, + "learning_rate": 2.4667580878911265e-06, + "loss": 0.0011, + "step": 218970 + }, + { + "epoch": 1.4044697001279531, + "grad_norm": 0.23846127092838287, + "learning_rate": 2.4662755539432277e-06, + "loss": 0.002, + "step": 218980 + }, + { + "epoch": 1.4045338370217393, + "grad_norm": 0.06262645870447159, + "learning_rate": 2.4657930517447076e-06, + "loss": 0.0011, + "step": 218990 + }, + { + "epoch": 1.4045979739155254, + "grad_norm": 0.0890268087387085, + "learning_rate": 2.4653105813016095e-06, + "loss": 0.0022, + "step": 219000 + }, + { + "epoch": 1.4046621108093114, + "grad_norm": 0.017491163685917854, + "learning_rate": 2.464828142619983e-06, + "loss": 0.0008, + "step": 219010 + }, + { + "epoch": 1.4047262477030975, + "grad_norm": 0.05489290505647659, + "learning_rate": 2.4643457357058718e-06, + "loss": 0.0013, + "step": 219020 + }, + { + "epoch": 1.4047903845968837, + "grad_norm": 0.15791866183280945, + "learning_rate": 2.463863360565321e-06, + "loss": 0.0012, + "step": 219030 + }, + { + "epoch": 1.4048545214906696, + "grad_norm": 0.10572674870491028, + "learning_rate": 2.463381017204373e-06, + "loss": 0.0014, + "step": 219040 + }, + { + "epoch": 1.4049186583844557, + "grad_norm": 0.6303642988204956, + "learning_rate": 2.462898705629076e-06, + "loss": 0.0014, + "step": 219050 + }, + { + "epoch": 1.4049827952782419, + "grad_norm": 0.052637193351984024, + "learning_rate": 2.4624164258454707e-06, + "loss": 0.0009, + "step": 219060 + }, + { + "epoch": 1.405046932172028, + "grad_norm": 0.10681580752134323, + "learning_rate": 2.4619341778595994e-06, + "loss": 0.0024, + "step": 219070 + }, + { + "epoch": 1.4051110690658142, + "grad_norm": 0.051800213754177094, + "learning_rate": 2.4614519616775075e-06, + "loss": 0.0042, + "step": 219080 + }, + { + "epoch": 1.4051752059596, + "grad_norm": 0.11543156206607819, + "learning_rate": 2.460969777305238e-06, + "loss": 0.0012, + "step": 219090 + }, + { + "epoch": 1.4052393428533863, + "grad_norm": 0.05147361382842064, + "learning_rate": 2.4604876247488317e-06, + "loss": 0.0017, + "step": 219100 + }, + { + "epoch": 1.4053034797471724, + "grad_norm": 0.09925807267427444, + "learning_rate": 2.4600055040143308e-06, + "loss": 0.0006, + "step": 219110 + }, + { + "epoch": 1.4053676166409583, + "grad_norm": 0.11557259410619736, + "learning_rate": 2.459523415107774e-06, + "loss": 0.0012, + "step": 219120 + }, + { + "epoch": 1.4054317535347445, + "grad_norm": 0.07576493918895721, + "learning_rate": 2.459041358035207e-06, + "loss": 0.0041, + "step": 219130 + }, + { + "epoch": 1.4054958904285306, + "grad_norm": 0.07128460705280304, + "learning_rate": 2.458559332802667e-06, + "loss": 0.0013, + "step": 219140 + }, + { + "epoch": 1.4055600273223168, + "grad_norm": 0.07296294718980789, + "learning_rate": 2.4580773394161954e-06, + "loss": 0.0011, + "step": 219150 + }, + { + "epoch": 1.405624164216103, + "grad_norm": 0.11052407324314117, + "learning_rate": 2.4575953778818295e-06, + "loss": 0.0009, + "step": 219160 + }, + { + "epoch": 1.405688301109889, + "grad_norm": 0.035698726773262024, + "learning_rate": 2.457113448205612e-06, + "loss": 0.0009, + "step": 219170 + }, + { + "epoch": 1.405752438003675, + "grad_norm": 0.11392034590244293, + "learning_rate": 2.4566315503935813e-06, + "loss": 0.0017, + "step": 219180 + }, + { + "epoch": 1.4058165748974611, + "grad_norm": 0.12178272753953934, + "learning_rate": 2.4561496844517717e-06, + "loss": 0.0011, + "step": 219190 + }, + { + "epoch": 1.4058807117912473, + "grad_norm": 0.1255182921886444, + "learning_rate": 2.4556678503862276e-06, + "loss": 0.0028, + "step": 219200 + }, + { + "epoch": 1.4059448486850332, + "grad_norm": 0.06408878415822983, + "learning_rate": 2.4551860482029834e-06, + "loss": 0.0008, + "step": 219210 + }, + { + "epoch": 1.4060089855788194, + "grad_norm": 0.061047110706567764, + "learning_rate": 2.454704277908077e-06, + "loss": 0.0011, + "step": 219220 + }, + { + "epoch": 1.4060731224726055, + "grad_norm": 0.04080299288034439, + "learning_rate": 2.4542225395075433e-06, + "loss": 0.0014, + "step": 219230 + }, + { + "epoch": 1.4061372593663917, + "grad_norm": 0.011935588903725147, + "learning_rate": 2.453740833007422e-06, + "loss": 0.0029, + "step": 219240 + }, + { + "epoch": 1.4062013962601778, + "grad_norm": 0.1219937726855278, + "learning_rate": 2.453259158413748e-06, + "loss": 0.0013, + "step": 219250 + }, + { + "epoch": 1.4062655331539637, + "grad_norm": 0.03379470854997635, + "learning_rate": 2.4527775157325568e-06, + "loss": 0.0007, + "step": 219260 + }, + { + "epoch": 1.40632967004775, + "grad_norm": 0.0320466049015522, + "learning_rate": 2.4522959049698817e-06, + "loss": 0.0009, + "step": 219270 + }, + { + "epoch": 1.406393806941536, + "grad_norm": 0.07323256880044937, + "learning_rate": 2.4518143261317624e-06, + "loss": 0.0013, + "step": 219280 + }, + { + "epoch": 1.4064579438353222, + "grad_norm": 0.04394732415676117, + "learning_rate": 2.45133277922423e-06, + "loss": 0.0011, + "step": 219290 + }, + { + "epoch": 1.4065220807291081, + "grad_norm": 0.04408658668398857, + "learning_rate": 2.45085126425332e-06, + "loss": 0.0008, + "step": 219300 + }, + { + "epoch": 1.4065862176228943, + "grad_norm": 0.10613957792520523, + "learning_rate": 2.4503697812250633e-06, + "loss": 0.0009, + "step": 219310 + }, + { + "epoch": 1.4066503545166804, + "grad_norm": 0.26895952224731445, + "learning_rate": 2.4498883301454972e-06, + "loss": 0.0022, + "step": 219320 + }, + { + "epoch": 1.4067144914104666, + "grad_norm": 0.05045659467577934, + "learning_rate": 2.449406911020653e-06, + "loss": 0.0007, + "step": 219330 + }, + { + "epoch": 1.4067786283042527, + "grad_norm": 0.02221301943063736, + "learning_rate": 2.448925523856561e-06, + "loss": 0.0009, + "step": 219340 + }, + { + "epoch": 1.4068427651980386, + "grad_norm": 0.0894397646188736, + "learning_rate": 2.448444168659258e-06, + "loss": 0.0016, + "step": 219350 + }, + { + "epoch": 1.4069069020918248, + "grad_norm": 0.12538520991802216, + "learning_rate": 2.4479628454347726e-06, + "loss": 0.0017, + "step": 219360 + }, + { + "epoch": 1.406971038985611, + "grad_norm": 0.05737454816699028, + "learning_rate": 2.4474815541891366e-06, + "loss": 0.001, + "step": 219370 + }, + { + "epoch": 1.4070351758793969, + "grad_norm": 0.11879002302885056, + "learning_rate": 2.4470002949283795e-06, + "loss": 0.0009, + "step": 219380 + }, + { + "epoch": 1.407099312773183, + "grad_norm": 0.14492501318454742, + "learning_rate": 2.4465190676585343e-06, + "loss": 0.0013, + "step": 219390 + }, + { + "epoch": 1.4071634496669692, + "grad_norm": 0.07216792553663254, + "learning_rate": 2.446037872385631e-06, + "loss": 0.0014, + "step": 219400 + }, + { + "epoch": 1.4072275865607553, + "grad_norm": 0.10150250792503357, + "learning_rate": 2.445556709115698e-06, + "loss": 0.0012, + "step": 219410 + }, + { + "epoch": 1.4072917234545415, + "grad_norm": 0.11663208901882172, + "learning_rate": 2.4450755778547637e-06, + "loss": 0.0009, + "step": 219420 + }, + { + "epoch": 1.4073558603483276, + "grad_norm": 0.11327947676181793, + "learning_rate": 2.44459447860886e-06, + "loss": 0.001, + "step": 219430 + }, + { + "epoch": 1.4074199972421135, + "grad_norm": 0.08630412817001343, + "learning_rate": 2.444113411384013e-06, + "loss": 0.0013, + "step": 219440 + }, + { + "epoch": 1.4074841341358997, + "grad_norm": 0.06219204142689705, + "learning_rate": 2.443632376186253e-06, + "loss": 0.0018, + "step": 219450 + }, + { + "epoch": 1.4075482710296858, + "grad_norm": 0.28655245900154114, + "learning_rate": 2.4431513730216034e-06, + "loss": 0.0017, + "step": 219460 + }, + { + "epoch": 1.4076124079234718, + "grad_norm": 0.18242192268371582, + "learning_rate": 2.442670401896097e-06, + "loss": 0.0025, + "step": 219470 + }, + { + "epoch": 1.407676544817258, + "grad_norm": 0.16763143241405487, + "learning_rate": 2.4421894628157577e-06, + "loss": 0.0043, + "step": 219480 + }, + { + "epoch": 1.407740681711044, + "grad_norm": 0.017439650371670723, + "learning_rate": 2.441708555786611e-06, + "loss": 0.0021, + "step": 219490 + }, + { + "epoch": 1.4078048186048302, + "grad_norm": 0.029463430866599083, + "learning_rate": 2.441227680814686e-06, + "loss": 0.0019, + "step": 219500 + }, + { + "epoch": 1.4078689554986163, + "grad_norm": 0.13877111673355103, + "learning_rate": 2.440746837906007e-06, + "loss": 0.0018, + "step": 219510 + }, + { + "epoch": 1.4079330923924023, + "grad_norm": 0.20989437401294708, + "learning_rate": 2.440266027066599e-06, + "loss": 0.0022, + "step": 219520 + }, + { + "epoch": 1.4079972292861884, + "grad_norm": 0.07056647539138794, + "learning_rate": 2.439785248302486e-06, + "loss": 0.0013, + "step": 219530 + }, + { + "epoch": 1.4080613661799746, + "grad_norm": 0.1661432981491089, + "learning_rate": 2.4393045016196946e-06, + "loss": 0.0019, + "step": 219540 + }, + { + "epoch": 1.4081255030737605, + "grad_norm": 0.035545654594898224, + "learning_rate": 2.4388237870242483e-06, + "loss": 0.0019, + "step": 219550 + }, + { + "epoch": 1.4081896399675466, + "grad_norm": 0.04041499271988869, + "learning_rate": 2.43834310452217e-06, + "loss": 0.0008, + "step": 219560 + }, + { + "epoch": 1.4082537768613328, + "grad_norm": 0.011176417581737041, + "learning_rate": 2.4378624541194816e-06, + "loss": 0.0011, + "step": 219570 + }, + { + "epoch": 1.408317913755119, + "grad_norm": 0.1042177826166153, + "learning_rate": 2.4373818358222102e-06, + "loss": 0.0011, + "step": 219580 + }, + { + "epoch": 1.408382050648905, + "grad_norm": 0.014329710975289345, + "learning_rate": 2.436901249636375e-06, + "loss": 0.0008, + "step": 219590 + }, + { + "epoch": 1.4084461875426912, + "grad_norm": 0.08484010398387909, + "learning_rate": 2.436420695568e-06, + "loss": 0.0022, + "step": 219600 + }, + { + "epoch": 1.4085103244364772, + "grad_norm": 0.14927707612514496, + "learning_rate": 2.4359401736231035e-06, + "loss": 0.0012, + "step": 219610 + }, + { + "epoch": 1.4085744613302633, + "grad_norm": 0.0252826064825058, + "learning_rate": 2.435459683807711e-06, + "loss": 0.0013, + "step": 219620 + }, + { + "epoch": 1.4086385982240495, + "grad_norm": 0.11428942531347275, + "learning_rate": 2.4349792261278416e-06, + "loss": 0.0017, + "step": 219630 + }, + { + "epoch": 1.4087027351178354, + "grad_norm": 0.008819522336125374, + "learning_rate": 2.434498800589514e-06, + "loss": 0.0009, + "step": 219640 + }, + { + "epoch": 1.4087668720116215, + "grad_norm": 0.20233199000358582, + "learning_rate": 2.434018407198751e-06, + "loss": 0.0008, + "step": 219650 + }, + { + "epoch": 1.4088310089054077, + "grad_norm": 0.12796108424663544, + "learning_rate": 2.433538045961572e-06, + "loss": 0.001, + "step": 219660 + }, + { + "epoch": 1.4088951457991938, + "grad_norm": 0.030231822282075882, + "learning_rate": 2.4330577168839953e-06, + "loss": 0.001, + "step": 219670 + }, + { + "epoch": 1.40895928269298, + "grad_norm": 0.015168020501732826, + "learning_rate": 2.4325774199720382e-06, + "loss": 0.001, + "step": 219680 + }, + { + "epoch": 1.409023419586766, + "grad_norm": 0.14559921622276306, + "learning_rate": 2.4320971552317223e-06, + "loss": 0.0011, + "step": 219690 + }, + { + "epoch": 1.409087556480552, + "grad_norm": 0.07003036141395569, + "learning_rate": 2.431616922669065e-06, + "loss": 0.0012, + "step": 219700 + }, + { + "epoch": 1.4091516933743382, + "grad_norm": 0.12697798013687134, + "learning_rate": 2.431136722290083e-06, + "loss": 0.0011, + "step": 219710 + }, + { + "epoch": 1.4092158302681244, + "grad_norm": 0.039292674511671066, + "learning_rate": 2.4306565541007914e-06, + "loss": 0.0015, + "step": 219720 + }, + { + "epoch": 1.4092799671619103, + "grad_norm": 0.022258060052990913, + "learning_rate": 2.4301764181072118e-06, + "loss": 0.0012, + "step": 219730 + }, + { + "epoch": 1.4093441040556964, + "grad_norm": 0.055856283754110336, + "learning_rate": 2.4296963143153586e-06, + "loss": 0.0009, + "step": 219740 + }, + { + "epoch": 1.4094082409494826, + "grad_norm": 0.2606965899467468, + "learning_rate": 2.429216242731247e-06, + "loss": 0.0024, + "step": 219750 + }, + { + "epoch": 1.4094723778432687, + "grad_norm": 0.08281605690717697, + "learning_rate": 2.428736203360892e-06, + "loss": 0.0012, + "step": 219760 + }, + { + "epoch": 1.4095365147370549, + "grad_norm": 0.007565287407487631, + "learning_rate": 2.428256196210311e-06, + "loss": 0.0018, + "step": 219770 + }, + { + "epoch": 1.4096006516308408, + "grad_norm": 0.0865720584988594, + "learning_rate": 2.4277762212855186e-06, + "loss": 0.0009, + "step": 219780 + }, + { + "epoch": 1.409664788524627, + "grad_norm": 0.005745246075093746, + "learning_rate": 2.4272962785925265e-06, + "loss": 0.0036, + "step": 219790 + }, + { + "epoch": 1.409728925418413, + "grad_norm": 0.05915892496705055, + "learning_rate": 2.4268163681373526e-06, + "loss": 0.0013, + "step": 219800 + }, + { + "epoch": 1.409793062312199, + "grad_norm": 0.007312369532883167, + "learning_rate": 2.4263364899260083e-06, + "loss": 0.0008, + "step": 219810 + }, + { + "epoch": 1.4098571992059852, + "grad_norm": 0.09697067737579346, + "learning_rate": 2.425856643964507e-06, + "loss": 0.0016, + "step": 219820 + }, + { + "epoch": 1.4099213360997713, + "grad_norm": 0.10934215039014816, + "learning_rate": 2.4253768302588606e-06, + "loss": 0.0006, + "step": 219830 + }, + { + "epoch": 1.4099854729935575, + "grad_norm": 0.05217369273304939, + "learning_rate": 2.4248970488150834e-06, + "loss": 0.0008, + "step": 219840 + }, + { + "epoch": 1.4100496098873436, + "grad_norm": 0.04329046979546547, + "learning_rate": 2.4244172996391857e-06, + "loss": 0.001, + "step": 219850 + }, + { + "epoch": 1.4101137467811298, + "grad_norm": 0.06280362606048584, + "learning_rate": 2.423937582737181e-06, + "loss": 0.0011, + "step": 219860 + }, + { + "epoch": 1.4101778836749157, + "grad_norm": 0.0004947282723151147, + "learning_rate": 2.42345789811508e-06, + "loss": 0.0008, + "step": 219870 + }, + { + "epoch": 1.4102420205687018, + "grad_norm": 0.24201305210590363, + "learning_rate": 2.422978245778891e-06, + "loss": 0.0019, + "step": 219880 + }, + { + "epoch": 1.410306157462488, + "grad_norm": 0.025508929044008255, + "learning_rate": 2.4224986257346284e-06, + "loss": 0.0024, + "step": 219890 + }, + { + "epoch": 1.410370294356274, + "grad_norm": 0.10216487944126129, + "learning_rate": 2.4220190379882996e-06, + "loss": 0.0011, + "step": 219900 + }, + { + "epoch": 1.41043443125006, + "grad_norm": 0.053904447704553604, + "learning_rate": 2.421539482545913e-06, + "loss": 0.0011, + "step": 219910 + }, + { + "epoch": 1.4104985681438462, + "grad_norm": 0.0518166646361351, + "learning_rate": 2.4210599594134822e-06, + "loss": 0.002, + "step": 219920 + }, + { + "epoch": 1.4105627050376324, + "grad_norm": 0.10732424259185791, + "learning_rate": 2.4205804685970126e-06, + "loss": 0.0012, + "step": 219930 + }, + { + "epoch": 1.4106268419314185, + "grad_norm": 0.07041086256504059, + "learning_rate": 2.420101010102513e-06, + "loss": 0.0012, + "step": 219940 + }, + { + "epoch": 1.4106909788252044, + "grad_norm": 0.05897292494773865, + "learning_rate": 2.4196215839359898e-06, + "loss": 0.0031, + "step": 219950 + }, + { + "epoch": 1.4107551157189906, + "grad_norm": 0.03396107628941536, + "learning_rate": 2.4191421901034547e-06, + "loss": 0.0023, + "step": 219960 + }, + { + "epoch": 1.4108192526127767, + "grad_norm": 0.14326755702495575, + "learning_rate": 2.4186628286109116e-06, + "loss": 0.0033, + "step": 219970 + }, + { + "epoch": 1.410883389506563, + "grad_norm": 0.10896255820989609, + "learning_rate": 2.418183499464369e-06, + "loss": 0.0014, + "step": 219980 + }, + { + "epoch": 1.4109475264003488, + "grad_norm": 0.09689708054065704, + "learning_rate": 2.4177042026698294e-06, + "loss": 0.0013, + "step": 219990 + }, + { + "epoch": 1.411011663294135, + "grad_norm": 0.04778188467025757, + "learning_rate": 2.4172249382333046e-06, + "loss": 0.0029, + "step": 220000 + }, + { + "epoch": 1.4110758001879211, + "grad_norm": 0.04208539426326752, + "learning_rate": 2.416745706160797e-06, + "loss": 0.0012, + "step": 220010 + }, + { + "epoch": 1.4111399370817073, + "grad_norm": 0.05550342798233032, + "learning_rate": 2.4162665064583112e-06, + "loss": 0.0012, + "step": 220020 + }, + { + "epoch": 1.4112040739754934, + "grad_norm": 0.08074969053268433, + "learning_rate": 2.4157873391318514e-06, + "loss": 0.002, + "step": 220030 + }, + { + "epoch": 1.4112682108692793, + "grad_norm": 0.10883128643035889, + "learning_rate": 2.4153082041874248e-06, + "loss": 0.001, + "step": 220040 + }, + { + "epoch": 1.4113323477630655, + "grad_norm": 0.11793199181556702, + "learning_rate": 2.4148291016310338e-06, + "loss": 0.0012, + "step": 220050 + }, + { + "epoch": 1.4113964846568516, + "grad_norm": 0.030807217583060265, + "learning_rate": 2.4143500314686797e-06, + "loss": 0.001, + "step": 220060 + }, + { + "epoch": 1.4114606215506376, + "grad_norm": 0.09902536869049072, + "learning_rate": 2.41387099370637e-06, + "loss": 0.0009, + "step": 220070 + }, + { + "epoch": 1.4115247584444237, + "grad_norm": 0.018539322540163994, + "learning_rate": 2.4133919883501043e-06, + "loss": 0.0019, + "step": 220080 + }, + { + "epoch": 1.4115888953382099, + "grad_norm": 0.004996740259230137, + "learning_rate": 2.412913015405886e-06, + "loss": 0.0007, + "step": 220090 + }, + { + "epoch": 1.411653032231996, + "grad_norm": 0.1072591245174408, + "learning_rate": 2.412434074879715e-06, + "loss": 0.0014, + "step": 220100 + }, + { + "epoch": 1.4117171691257822, + "grad_norm": 0.23977114260196686, + "learning_rate": 2.411955166777596e-06, + "loss": 0.0017, + "step": 220110 + }, + { + "epoch": 1.4117813060195683, + "grad_norm": 0.08273182064294815, + "learning_rate": 2.4114762911055282e-06, + "loss": 0.0016, + "step": 220120 + }, + { + "epoch": 1.4118454429133542, + "grad_norm": 0.03678559139370918, + "learning_rate": 2.4109974478695124e-06, + "loss": 0.0015, + "step": 220130 + }, + { + "epoch": 1.4119095798071404, + "grad_norm": 0.16956031322479248, + "learning_rate": 2.4105186370755475e-06, + "loss": 0.001, + "step": 220140 + }, + { + "epoch": 1.4119737167009265, + "grad_norm": 0.03240702301263809, + "learning_rate": 2.4100398587296365e-06, + "loss": 0.0018, + "step": 220150 + }, + { + "epoch": 1.4120378535947125, + "grad_norm": 0.20417430996894836, + "learning_rate": 2.4095611128377767e-06, + "loss": 0.0009, + "step": 220160 + }, + { + "epoch": 1.4121019904884986, + "grad_norm": 0.031480804085731506, + "learning_rate": 2.4090823994059674e-06, + "loss": 0.0007, + "step": 220170 + }, + { + "epoch": 1.4121661273822848, + "grad_norm": 0.045793939381837845, + "learning_rate": 2.4086037184402062e-06, + "loss": 0.0009, + "step": 220180 + }, + { + "epoch": 1.412230264276071, + "grad_norm": 0.051683228462934494, + "learning_rate": 2.4081250699464937e-06, + "loss": 0.0007, + "step": 220190 + }, + { + "epoch": 1.412294401169857, + "grad_norm": 0.39436984062194824, + "learning_rate": 2.407646453930827e-06, + "loss": 0.0011, + "step": 220200 + }, + { + "epoch": 1.412358538063643, + "grad_norm": 0.036083146929740906, + "learning_rate": 2.407167870399201e-06, + "loss": 0.0011, + "step": 220210 + }, + { + "epoch": 1.4124226749574291, + "grad_norm": 0.10143862664699554, + "learning_rate": 2.4066893193576157e-06, + "loss": 0.0008, + "step": 220220 + }, + { + "epoch": 1.4124868118512153, + "grad_norm": 0.1924578696489334, + "learning_rate": 2.4062108008120676e-06, + "loss": 0.0018, + "step": 220230 + }, + { + "epoch": 1.4125509487450012, + "grad_norm": 0.09693806618452072, + "learning_rate": 2.4057323147685513e-06, + "loss": 0.0015, + "step": 220240 + }, + { + "epoch": 1.4126150856387873, + "grad_norm": 0.17619289457798004, + "learning_rate": 2.405253861233062e-06, + "loss": 0.0009, + "step": 220250 + }, + { + "epoch": 1.4126792225325735, + "grad_norm": 0.03629330173134804, + "learning_rate": 2.4047754402115974e-06, + "loss": 0.0012, + "step": 220260 + }, + { + "epoch": 1.4127433594263596, + "grad_norm": 0.06974545866250992, + "learning_rate": 2.4042970517101516e-06, + "loss": 0.001, + "step": 220270 + }, + { + "epoch": 1.4128074963201458, + "grad_norm": 0.17821314930915833, + "learning_rate": 2.403818695734719e-06, + "loss": 0.0007, + "step": 220280 + }, + { + "epoch": 1.412871633213932, + "grad_norm": 0.02364698238670826, + "learning_rate": 2.403340372291292e-06, + "loss": 0.0008, + "step": 220290 + }, + { + "epoch": 1.4129357701077179, + "grad_norm": 0.00746135413646698, + "learning_rate": 2.402862081385867e-06, + "loss": 0.0025, + "step": 220300 + }, + { + "epoch": 1.412999907001504, + "grad_norm": 0.13213053345680237, + "learning_rate": 2.402383823024437e-06, + "loss": 0.001, + "step": 220310 + }, + { + "epoch": 1.4130640438952902, + "grad_norm": 0.04691189527511597, + "learning_rate": 2.4019055972129933e-06, + "loss": 0.0007, + "step": 220320 + }, + { + "epoch": 1.413128180789076, + "grad_norm": 0.03543318808078766, + "learning_rate": 2.401427403957528e-06, + "loss": 0.001, + "step": 220330 + }, + { + "epoch": 1.4131923176828622, + "grad_norm": 0.19499273598194122, + "learning_rate": 2.400949243264037e-06, + "loss": 0.0028, + "step": 220340 + }, + { + "epoch": 1.4132564545766484, + "grad_norm": 0.023899951949715614, + "learning_rate": 2.4004711151385086e-06, + "loss": 0.0021, + "step": 220350 + }, + { + "epoch": 1.4133205914704345, + "grad_norm": 0.06745419651269913, + "learning_rate": 2.399993019586933e-06, + "loss": 0.0029, + "step": 220360 + }, + { + "epoch": 1.4133847283642207, + "grad_norm": 0.37230822443962097, + "learning_rate": 2.399514956615306e-06, + "loss": 0.0016, + "step": 220370 + }, + { + "epoch": 1.4134488652580066, + "grad_norm": 0.03617741912603378, + "learning_rate": 2.3990369262296143e-06, + "loss": 0.0016, + "step": 220380 + }, + { + "epoch": 1.4135130021517928, + "grad_norm": 0.17530429363250732, + "learning_rate": 2.398558928435849e-06, + "loss": 0.0013, + "step": 220390 + }, + { + "epoch": 1.413577139045579, + "grad_norm": 0.11301398277282715, + "learning_rate": 2.3980809632399975e-06, + "loss": 0.0015, + "step": 220400 + }, + { + "epoch": 1.413641275939365, + "grad_norm": 0.056706465780735016, + "learning_rate": 2.397603030648053e-06, + "loss": 0.0016, + "step": 220410 + }, + { + "epoch": 1.413705412833151, + "grad_norm": 0.09726312756538391, + "learning_rate": 2.3971251306660027e-06, + "loss": 0.0017, + "step": 220420 + }, + { + "epoch": 1.4137695497269371, + "grad_norm": 0.07375887036323547, + "learning_rate": 2.3966472632998345e-06, + "loss": 0.0015, + "step": 220430 + }, + { + "epoch": 1.4138336866207233, + "grad_norm": 0.1622479259967804, + "learning_rate": 2.3961694285555347e-06, + "loss": 0.0012, + "step": 220440 + }, + { + "epoch": 1.4138978235145094, + "grad_norm": 0.14091528952121735, + "learning_rate": 2.3956916264390952e-06, + "loss": 0.0012, + "step": 220450 + }, + { + "epoch": 1.4139619604082956, + "grad_norm": 0.1288294792175293, + "learning_rate": 2.3952138569565003e-06, + "loss": 0.0015, + "step": 220460 + }, + { + "epoch": 1.4140260973020815, + "grad_norm": 0.20033596456050873, + "learning_rate": 2.394736120113736e-06, + "loss": 0.0016, + "step": 220470 + }, + { + "epoch": 1.4140902341958677, + "grad_norm": 0.15678802132606506, + "learning_rate": 2.394258415916792e-06, + "loss": 0.0025, + "step": 220480 + }, + { + "epoch": 1.4141543710896538, + "grad_norm": 0.006190237123519182, + "learning_rate": 2.393780744371652e-06, + "loss": 0.0006, + "step": 220490 + }, + { + "epoch": 1.4142185079834397, + "grad_norm": 0.029406633228063583, + "learning_rate": 2.393303105484302e-06, + "loss": 0.0031, + "step": 220500 + }, + { + "epoch": 1.4142826448772259, + "grad_norm": 0.04776811972260475, + "learning_rate": 2.3928254992607253e-06, + "loss": 0.0004, + "step": 220510 + }, + { + "epoch": 1.414346781771012, + "grad_norm": 0.09439283609390259, + "learning_rate": 2.3923479257069105e-06, + "loss": 0.0018, + "step": 220520 + }, + { + "epoch": 1.4144109186647982, + "grad_norm": 0.0398576594889164, + "learning_rate": 2.3918703848288396e-06, + "loss": 0.0015, + "step": 220530 + }, + { + "epoch": 1.4144750555585843, + "grad_norm": 0.06165533885359764, + "learning_rate": 2.391392876632497e-06, + "loss": 0.0009, + "step": 220540 + }, + { + "epoch": 1.4145391924523705, + "grad_norm": 0.01763291470706463, + "learning_rate": 2.3909154011238634e-06, + "loss": 0.0006, + "step": 220550 + }, + { + "epoch": 1.4146033293461564, + "grad_norm": 0.21147936582565308, + "learning_rate": 2.390437958308927e-06, + "loss": 0.0024, + "step": 220560 + }, + { + "epoch": 1.4146674662399426, + "grad_norm": 0.24931780993938446, + "learning_rate": 2.3899605481936678e-06, + "loss": 0.0014, + "step": 220570 + }, + { + "epoch": 1.4147316031337287, + "grad_norm": 0.11878237873315811, + "learning_rate": 2.3894831707840682e-06, + "loss": 0.0011, + "step": 220580 + }, + { + "epoch": 1.4147957400275146, + "grad_norm": 0.08321509510278702, + "learning_rate": 2.3890058260861086e-06, + "loss": 0.0018, + "step": 220590 + }, + { + "epoch": 1.4148598769213008, + "grad_norm": 0.425908625125885, + "learning_rate": 2.388528514105774e-06, + "loss": 0.0012, + "step": 220600 + }, + { + "epoch": 1.414924013815087, + "grad_norm": 0.09089425206184387, + "learning_rate": 2.3880512348490412e-06, + "loss": 0.0023, + "step": 220610 + }, + { + "epoch": 1.414988150708873, + "grad_norm": 0.07154347002506256, + "learning_rate": 2.3875739883218953e-06, + "loss": 0.0014, + "step": 220620 + }, + { + "epoch": 1.4150522876026592, + "grad_norm": 0.12015087157487869, + "learning_rate": 2.3870967745303124e-06, + "loss": 0.001, + "step": 220630 + }, + { + "epoch": 1.4151164244964451, + "grad_norm": 0.17344476282596588, + "learning_rate": 2.3866195934802765e-06, + "loss": 0.0014, + "step": 220640 + }, + { + "epoch": 1.4151805613902313, + "grad_norm": 0.024174202233552933, + "learning_rate": 2.3861424451777643e-06, + "loss": 0.0013, + "step": 220650 + }, + { + "epoch": 1.4152446982840174, + "grad_norm": 0.06502870470285416, + "learning_rate": 2.3856653296287557e-06, + "loss": 0.0015, + "step": 220660 + }, + { + "epoch": 1.4153088351778034, + "grad_norm": 0.0520755797624588, + "learning_rate": 2.3851882468392275e-06, + "loss": 0.0007, + "step": 220670 + }, + { + "epoch": 1.4153729720715895, + "grad_norm": 0.10467202961444855, + "learning_rate": 2.3847111968151605e-06, + "loss": 0.001, + "step": 220680 + }, + { + "epoch": 1.4154371089653757, + "grad_norm": 0.07006729394197464, + "learning_rate": 2.384234179562532e-06, + "loss": 0.0013, + "step": 220690 + }, + { + "epoch": 1.4155012458591618, + "grad_norm": 0.05697028711438179, + "learning_rate": 2.383757195087318e-06, + "loss": 0.0016, + "step": 220700 + }, + { + "epoch": 1.415565382752948, + "grad_norm": 0.07601667940616608, + "learning_rate": 2.383280243395495e-06, + "loss": 0.0011, + "step": 220710 + }, + { + "epoch": 1.4156295196467341, + "grad_norm": 0.1048382818698883, + "learning_rate": 2.3828033244930423e-06, + "loss": 0.0011, + "step": 220720 + }, + { + "epoch": 1.41569365654052, + "grad_norm": 0.04876832291483879, + "learning_rate": 2.382326438385934e-06, + "loss": 0.003, + "step": 220730 + }, + { + "epoch": 1.4157577934343062, + "grad_norm": 0.12612700462341309, + "learning_rate": 2.381849585080144e-06, + "loss": 0.0009, + "step": 220740 + }, + { + "epoch": 1.4158219303280923, + "grad_norm": 0.08230948448181152, + "learning_rate": 2.381372764581653e-06, + "loss": 0.0018, + "step": 220750 + }, + { + "epoch": 1.4158860672218783, + "grad_norm": 0.21726293861865997, + "learning_rate": 2.3808959768964317e-06, + "loss": 0.002, + "step": 220760 + }, + { + "epoch": 1.4159502041156644, + "grad_norm": 0.13905607163906097, + "learning_rate": 2.3804192220304565e-06, + "loss": 0.0011, + "step": 220770 + }, + { + "epoch": 1.4160143410094506, + "grad_norm": 0.07558777183294296, + "learning_rate": 2.3799424999896983e-06, + "loss": 0.0014, + "step": 220780 + }, + { + "epoch": 1.4160784779032367, + "grad_norm": 0.08255726844072342, + "learning_rate": 2.379465810780135e-06, + "loss": 0.001, + "step": 220790 + }, + { + "epoch": 1.4161426147970229, + "grad_norm": 0.01143329031765461, + "learning_rate": 2.378989154407738e-06, + "loss": 0.0012, + "step": 220800 + }, + { + "epoch": 1.4162067516908088, + "grad_norm": 0.18541577458381653, + "learning_rate": 2.37851253087848e-06, + "loss": 0.0018, + "step": 220810 + }, + { + "epoch": 1.416270888584595, + "grad_norm": 0.10772611200809479, + "learning_rate": 2.378035940198332e-06, + "loss": 0.003, + "step": 220820 + }, + { + "epoch": 1.416335025478381, + "grad_norm": 0.12028023600578308, + "learning_rate": 2.377559382373269e-06, + "loss": 0.0026, + "step": 220830 + }, + { + "epoch": 1.4163991623721672, + "grad_norm": 0.052633851766586304, + "learning_rate": 2.3770828574092614e-06, + "loss": 0.0013, + "step": 220840 + }, + { + "epoch": 1.4164632992659532, + "grad_norm": 0.10359262675046921, + "learning_rate": 2.3766063653122807e-06, + "loss": 0.0025, + "step": 220850 + }, + { + "epoch": 1.4165274361597393, + "grad_norm": 0.04219682142138481, + "learning_rate": 2.376129906088294e-06, + "loss": 0.0011, + "step": 220860 + }, + { + "epoch": 1.4165915730535255, + "grad_norm": 0.08940007537603378, + "learning_rate": 2.375653479743278e-06, + "loss": 0.0014, + "step": 220870 + }, + { + "epoch": 1.4166557099473116, + "grad_norm": 0.0020488423760980368, + "learning_rate": 2.3751770862831985e-06, + "loss": 0.002, + "step": 220880 + }, + { + "epoch": 1.4167198468410978, + "grad_norm": 0.06613387912511826, + "learning_rate": 2.374700725714025e-06, + "loss": 0.0009, + "step": 220890 + }, + { + "epoch": 1.4167839837348837, + "grad_norm": 0.04910537600517273, + "learning_rate": 2.374224398041729e-06, + "loss": 0.0013, + "step": 220900 + }, + { + "epoch": 1.4168481206286698, + "grad_norm": 0.09544882923364639, + "learning_rate": 2.3737481032722782e-06, + "loss": 0.0016, + "step": 220910 + }, + { + "epoch": 1.416912257522456, + "grad_norm": 0.01734582521021366, + "learning_rate": 2.3732718414116403e-06, + "loss": 0.0007, + "step": 220920 + }, + { + "epoch": 1.416976394416242, + "grad_norm": 0.06376821547746658, + "learning_rate": 2.372795612465782e-06, + "loss": 0.001, + "step": 220930 + }, + { + "epoch": 1.417040531310028, + "grad_norm": 0.08851666003465652, + "learning_rate": 2.372319416440674e-06, + "loss": 0.0021, + "step": 220940 + }, + { + "epoch": 1.4171046682038142, + "grad_norm": 0.06368345767259598, + "learning_rate": 2.3718432533422813e-06, + "loss": 0.0018, + "step": 220950 + }, + { + "epoch": 1.4171688050976003, + "grad_norm": 0.06570161134004593, + "learning_rate": 2.3713671231765718e-06, + "loss": 0.0012, + "step": 220960 + }, + { + "epoch": 1.4172329419913865, + "grad_norm": 0.08560293912887573, + "learning_rate": 2.3708910259495082e-06, + "loss": 0.0005, + "step": 220970 + }, + { + "epoch": 1.4172970788851726, + "grad_norm": 0.15812426805496216, + "learning_rate": 2.3704149616670615e-06, + "loss": 0.0012, + "step": 220980 + }, + { + "epoch": 1.4173612157789586, + "grad_norm": 0.309256374835968, + "learning_rate": 2.3699389303351946e-06, + "loss": 0.0012, + "step": 220990 + }, + { + "epoch": 1.4174253526727447, + "grad_norm": 0.03634125366806984, + "learning_rate": 2.3694629319598724e-06, + "loss": 0.0011, + "step": 221000 + }, + { + "epoch": 1.4174894895665309, + "grad_norm": 0.11074472218751907, + "learning_rate": 2.3689869665470575e-06, + "loss": 0.0015, + "step": 221010 + }, + { + "epoch": 1.4175536264603168, + "grad_norm": 0.01838160865008831, + "learning_rate": 2.3685110341027183e-06, + "loss": 0.001, + "step": 221020 + }, + { + "epoch": 1.417617763354103, + "grad_norm": 0.16619424521923065, + "learning_rate": 2.368035134632817e-06, + "loss": 0.0018, + "step": 221030 + }, + { + "epoch": 1.417681900247889, + "grad_norm": 0.10735753178596497, + "learning_rate": 2.3675592681433142e-06, + "loss": 0.001, + "step": 221040 + }, + { + "epoch": 1.4177460371416752, + "grad_norm": 0.0535336509346962, + "learning_rate": 2.3670834346401773e-06, + "loss": 0.0011, + "step": 221050 + }, + { + "epoch": 1.4178101740354614, + "grad_norm": 0.004761670250445604, + "learning_rate": 2.3666076341293664e-06, + "loss": 0.0013, + "step": 221060 + }, + { + "epoch": 1.4178743109292473, + "grad_norm": 0.043905891478061676, + "learning_rate": 2.3661318666168444e-06, + "loss": 0.0009, + "step": 221070 + }, + { + "epoch": 1.4179384478230335, + "grad_norm": 0.06458326429128647, + "learning_rate": 2.36565613210857e-06, + "loss": 0.0011, + "step": 221080 + }, + { + "epoch": 1.4180025847168196, + "grad_norm": 0.24945113062858582, + "learning_rate": 2.3651804306105094e-06, + "loss": 0.0016, + "step": 221090 + }, + { + "epoch": 1.4180667216106055, + "grad_norm": 0.025887012481689453, + "learning_rate": 2.364704762128621e-06, + "loss": 0.0014, + "step": 221100 + }, + { + "epoch": 1.4181308585043917, + "grad_norm": 0.04768415540456772, + "learning_rate": 2.364229126668865e-06, + "loss": 0.001, + "step": 221110 + }, + { + "epoch": 1.4181949953981778, + "grad_norm": 0.14740866422653198, + "learning_rate": 2.3637535242372002e-06, + "loss": 0.001, + "step": 221120 + }, + { + "epoch": 1.418259132291964, + "grad_norm": 0.05745894834399223, + "learning_rate": 2.363277954839589e-06, + "loss": 0.0015, + "step": 221130 + }, + { + "epoch": 1.4183232691857501, + "grad_norm": 0.07447264343500137, + "learning_rate": 2.3628024184819903e-06, + "loss": 0.0011, + "step": 221140 + }, + { + "epoch": 1.4183874060795363, + "grad_norm": 0.03310972824692726, + "learning_rate": 2.362326915170362e-06, + "loss": 0.0026, + "step": 221150 + }, + { + "epoch": 1.4184515429733222, + "grad_norm": 0.04251949116587639, + "learning_rate": 2.3618514449106606e-06, + "loss": 0.0013, + "step": 221160 + }, + { + "epoch": 1.4185156798671084, + "grad_norm": 0.03919053077697754, + "learning_rate": 2.361376007708848e-06, + "loss": 0.0016, + "step": 221170 + }, + { + "epoch": 1.4185798167608945, + "grad_norm": 0.04729697108268738, + "learning_rate": 2.3609006035708793e-06, + "loss": 0.0009, + "step": 221180 + }, + { + "epoch": 1.4186439536546804, + "grad_norm": 0.06630100309848785, + "learning_rate": 2.3604252325027106e-06, + "loss": 0.0011, + "step": 221190 + }, + { + "epoch": 1.4187080905484666, + "grad_norm": 0.0515078529715538, + "learning_rate": 2.359949894510302e-06, + "loss": 0.0005, + "step": 221200 + }, + { + "epoch": 1.4187722274422527, + "grad_norm": 0.07539720833301544, + "learning_rate": 2.3594745895996083e-06, + "loss": 0.0012, + "step": 221210 + }, + { + "epoch": 1.4188363643360389, + "grad_norm": 0.022874781861901283, + "learning_rate": 2.3589993177765854e-06, + "loss": 0.0021, + "step": 221220 + }, + { + "epoch": 1.418900501229825, + "grad_norm": 0.03904331475496292, + "learning_rate": 2.3585240790471862e-06, + "loss": 0.0006, + "step": 221230 + }, + { + "epoch": 1.418964638123611, + "grad_norm": 0.07771909236907959, + "learning_rate": 2.3580488734173703e-06, + "loss": 0.0008, + "step": 221240 + }, + { + "epoch": 1.419028775017397, + "grad_norm": 0.08516875654459, + "learning_rate": 2.35757370089309e-06, + "loss": 0.0005, + "step": 221250 + }, + { + "epoch": 1.4190929119111833, + "grad_norm": 0.053227923810482025, + "learning_rate": 2.3570985614803e-06, + "loss": 0.0016, + "step": 221260 + }, + { + "epoch": 1.4191570488049694, + "grad_norm": 0.12357582151889801, + "learning_rate": 2.3566234551849514e-06, + "loss": 0.0011, + "step": 221270 + }, + { + "epoch": 1.4192211856987553, + "grad_norm": 0.1287904679775238, + "learning_rate": 2.356148382013002e-06, + "loss": 0.0012, + "step": 221280 + }, + { + "epoch": 1.4192853225925415, + "grad_norm": 0.16033320128917694, + "learning_rate": 2.3556733419704036e-06, + "loss": 0.0107, + "step": 221290 + }, + { + "epoch": 1.4193494594863276, + "grad_norm": 0.08888640999794006, + "learning_rate": 2.355198335063108e-06, + "loss": 0.0022, + "step": 221300 + }, + { + "epoch": 1.4194135963801138, + "grad_norm": 0.13831044733524323, + "learning_rate": 2.354723361297065e-06, + "loss": 0.0012, + "step": 221310 + }, + { + "epoch": 1.4194777332739, + "grad_norm": 0.10133393108844757, + "learning_rate": 2.3542484206782313e-06, + "loss": 0.0024, + "step": 221320 + }, + { + "epoch": 1.4195418701676858, + "grad_norm": 0.1301027089357376, + "learning_rate": 2.3537735132125555e-06, + "loss": 0.001, + "step": 221330 + }, + { + "epoch": 1.419606007061472, + "grad_norm": 0.22867679595947266, + "learning_rate": 2.3532986389059868e-06, + "loss": 0.0009, + "step": 221340 + }, + { + "epoch": 1.4196701439552581, + "grad_norm": 0.036261752247810364, + "learning_rate": 2.35282379776448e-06, + "loss": 0.0005, + "step": 221350 + }, + { + "epoch": 1.419734280849044, + "grad_norm": 0.02306281588971615, + "learning_rate": 2.352348989793983e-06, + "loss": 0.0019, + "step": 221360 + }, + { + "epoch": 1.4197984177428302, + "grad_norm": 0.1047319546341896, + "learning_rate": 2.3518742150004447e-06, + "loss": 0.0012, + "step": 221370 + }, + { + "epoch": 1.4198625546366164, + "grad_norm": 0.07301588356494904, + "learning_rate": 2.351399473389813e-06, + "loss": 0.0007, + "step": 221380 + }, + { + "epoch": 1.4199266915304025, + "grad_norm": 0.2079521119594574, + "learning_rate": 2.35092476496804e-06, + "loss": 0.0008, + "step": 221390 + }, + { + "epoch": 1.4199908284241887, + "grad_norm": 0.09328661859035492, + "learning_rate": 2.350450089741074e-06, + "loss": 0.002, + "step": 221400 + }, + { + "epoch": 1.4200549653179748, + "grad_norm": 0.13493525981903076, + "learning_rate": 2.3499754477148623e-06, + "loss": 0.0011, + "step": 221410 + }, + { + "epoch": 1.4201191022117607, + "grad_norm": 0.03143034875392914, + "learning_rate": 2.3495008388953523e-06, + "loss": 0.0006, + "step": 221420 + }, + { + "epoch": 1.420183239105547, + "grad_norm": 0.10459790378808975, + "learning_rate": 2.349026263288489e-06, + "loss": 0.0019, + "step": 221430 + }, + { + "epoch": 1.420247375999333, + "grad_norm": 0.06142476946115494, + "learning_rate": 2.348551720900223e-06, + "loss": 0.0011, + "step": 221440 + }, + { + "epoch": 1.420311512893119, + "grad_norm": 0.3689160645008087, + "learning_rate": 2.348077211736499e-06, + "loss": 0.0022, + "step": 221450 + }, + { + "epoch": 1.4203756497869051, + "grad_norm": 0.06635049730539322, + "learning_rate": 2.347602735803261e-06, + "loss": 0.0014, + "step": 221460 + }, + { + "epoch": 1.4204397866806913, + "grad_norm": 0.07540370523929596, + "learning_rate": 2.347128293106458e-06, + "loss": 0.001, + "step": 221470 + }, + { + "epoch": 1.4205039235744774, + "grad_norm": 0.12796327471733093, + "learning_rate": 2.3466538836520337e-06, + "loss": 0.0011, + "step": 221480 + }, + { + "epoch": 1.4205680604682636, + "grad_norm": 0.1090424507856369, + "learning_rate": 2.3461795074459327e-06, + "loss": 0.0016, + "step": 221490 + }, + { + "epoch": 1.4206321973620495, + "grad_norm": 0.0716809332370758, + "learning_rate": 2.345705164494097e-06, + "loss": 0.0009, + "step": 221500 + }, + { + "epoch": 1.4206963342558356, + "grad_norm": 0.03105447068810463, + "learning_rate": 2.345230854802475e-06, + "loss": 0.001, + "step": 221510 + }, + { + "epoch": 1.4207604711496218, + "grad_norm": 0.03507548198103905, + "learning_rate": 2.344756578377007e-06, + "loss": 0.0014, + "step": 221520 + }, + { + "epoch": 1.420824608043408, + "grad_norm": 0.12229614704847336, + "learning_rate": 2.344282335223637e-06, + "loss": 0.0012, + "step": 221530 + }, + { + "epoch": 1.4208887449371939, + "grad_norm": 0.19771310687065125, + "learning_rate": 2.343808125348305e-06, + "loss": 0.0021, + "step": 221540 + }, + { + "epoch": 1.42095288183098, + "grad_norm": 0.21561577916145325, + "learning_rate": 2.3433339487569586e-06, + "loss": 0.0014, + "step": 221550 + }, + { + "epoch": 1.4210170187247662, + "grad_norm": 0.006400718353688717, + "learning_rate": 2.342859805455536e-06, + "loss": 0.0011, + "step": 221560 + }, + { + "epoch": 1.4210811556185523, + "grad_norm": 0.042118512094020844, + "learning_rate": 2.3423856954499783e-06, + "loss": 0.0028, + "step": 221570 + }, + { + "epoch": 1.4211452925123385, + "grad_norm": 0.18844324350357056, + "learning_rate": 2.341911618746226e-06, + "loss": 0.0024, + "step": 221580 + }, + { + "epoch": 1.4212094294061244, + "grad_norm": 0.15068097412586212, + "learning_rate": 2.341437575350223e-06, + "loss": 0.0012, + "step": 221590 + }, + { + "epoch": 1.4212735662999105, + "grad_norm": 0.12804320454597473, + "learning_rate": 2.3409635652679068e-06, + "loss": 0.0013, + "step": 221600 + }, + { + "epoch": 1.4213377031936967, + "grad_norm": 0.22675733268260956, + "learning_rate": 2.3404895885052156e-06, + "loss": 0.0025, + "step": 221610 + }, + { + "epoch": 1.4214018400874826, + "grad_norm": 0.01598006673157215, + "learning_rate": 2.340015645068093e-06, + "loss": 0.0016, + "step": 221620 + }, + { + "epoch": 1.4214659769812688, + "grad_norm": 0.0054878066293895245, + "learning_rate": 2.3395417349624744e-06, + "loss": 0.0011, + "step": 221630 + }, + { + "epoch": 1.421530113875055, + "grad_norm": 0.0077287182211875916, + "learning_rate": 2.3390678581943e-06, + "loss": 0.0008, + "step": 221640 + }, + { + "epoch": 1.421594250768841, + "grad_norm": 0.11687889695167542, + "learning_rate": 2.3385940147695053e-06, + "loss": 0.0013, + "step": 221650 + }, + { + "epoch": 1.4216583876626272, + "grad_norm": 0.09536377340555191, + "learning_rate": 2.338120204694031e-06, + "loss": 0.0012, + "step": 221660 + }, + { + "epoch": 1.4217225245564131, + "grad_norm": 0.05910412594676018, + "learning_rate": 2.3376464279738133e-06, + "loss": 0.001, + "step": 221670 + }, + { + "epoch": 1.4217866614501993, + "grad_norm": 0.03483949229121208, + "learning_rate": 2.3371726846147886e-06, + "loss": 0.0018, + "step": 221680 + }, + { + "epoch": 1.4218507983439854, + "grad_norm": 0.026624565944075584, + "learning_rate": 2.3366989746228914e-06, + "loss": 0.0007, + "step": 221690 + }, + { + "epoch": 1.4219149352377716, + "grad_norm": 0.015865344554185867, + "learning_rate": 2.3362252980040614e-06, + "loss": 0.0018, + "step": 221700 + }, + { + "epoch": 1.4219790721315575, + "grad_norm": 0.03469441086053848, + "learning_rate": 2.3357516547642316e-06, + "loss": 0.001, + "step": 221710 + }, + { + "epoch": 1.4220432090253436, + "grad_norm": 0.14101850986480713, + "learning_rate": 2.335278044909338e-06, + "loss": 0.0012, + "step": 221720 + }, + { + "epoch": 1.4221073459191298, + "grad_norm": 0.03780830278992653, + "learning_rate": 2.3348044684453138e-06, + "loss": 0.0013, + "step": 221730 + }, + { + "epoch": 1.422171482812916, + "grad_norm": 0.044076595455408096, + "learning_rate": 2.3343309253780953e-06, + "loss": 0.0008, + "step": 221740 + }, + { + "epoch": 1.422235619706702, + "grad_norm": 0.11094122380018234, + "learning_rate": 2.3338574157136155e-06, + "loss": 0.0016, + "step": 221750 + }, + { + "epoch": 1.422299756600488, + "grad_norm": 0.1418447047472, + "learning_rate": 2.3333839394578067e-06, + "loss": 0.0019, + "step": 221760 + }, + { + "epoch": 1.4223638934942742, + "grad_norm": 0.07506245374679565, + "learning_rate": 2.3329104966166045e-06, + "loss": 0.0007, + "step": 221770 + }, + { + "epoch": 1.4224280303880603, + "grad_norm": 0.018780136480927467, + "learning_rate": 2.3324370871959393e-06, + "loss": 0.0009, + "step": 221780 + }, + { + "epoch": 1.4224921672818462, + "grad_norm": 0.1984405517578125, + "learning_rate": 2.3319637112017447e-06, + "loss": 0.0018, + "step": 221790 + }, + { + "epoch": 1.4225563041756324, + "grad_norm": 0.1646849513053894, + "learning_rate": 2.3314903686399493e-06, + "loss": 0.0012, + "step": 221800 + }, + { + "epoch": 1.4226204410694185, + "grad_norm": 0.02109588496387005, + "learning_rate": 2.3310170595164884e-06, + "loss": 0.0011, + "step": 221810 + }, + { + "epoch": 1.4226845779632047, + "grad_norm": 0.24100464582443237, + "learning_rate": 2.3305437838372913e-06, + "loss": 0.0017, + "step": 221820 + }, + { + "epoch": 1.4227487148569908, + "grad_norm": 0.05594455450773239, + "learning_rate": 2.3300705416082888e-06, + "loss": 0.0009, + "step": 221830 + }, + { + "epoch": 1.422812851750777, + "grad_norm": 0.14231975376605988, + "learning_rate": 2.329597332835409e-06, + "loss": 0.0014, + "step": 221840 + }, + { + "epoch": 1.422876988644563, + "grad_norm": 0.008400573395192623, + "learning_rate": 2.329124157524584e-06, + "loss": 0.002, + "step": 221850 + }, + { + "epoch": 1.422941125538349, + "grad_norm": 0.19097235798835754, + "learning_rate": 2.3286510156817426e-06, + "loss": 0.0027, + "step": 221860 + }, + { + "epoch": 1.4230052624321352, + "grad_norm": 0.028585517778992653, + "learning_rate": 2.3281779073128135e-06, + "loss": 0.0009, + "step": 221870 + }, + { + "epoch": 1.4230693993259211, + "grad_norm": 0.009553569369018078, + "learning_rate": 2.3277048324237222e-06, + "loss": 0.0009, + "step": 221880 + }, + { + "epoch": 1.4231335362197073, + "grad_norm": 0.12681032717227936, + "learning_rate": 2.327231791020402e-06, + "loss": 0.0019, + "step": 221890 + }, + { + "epoch": 1.4231976731134934, + "grad_norm": 0.00455677043646574, + "learning_rate": 2.326758783108777e-06, + "loss": 0.0013, + "step": 221900 + }, + { + "epoch": 1.4232618100072796, + "grad_norm": 0.008080567233264446, + "learning_rate": 2.326285808694773e-06, + "loss": 0.0013, + "step": 221910 + }, + { + "epoch": 1.4233259469010657, + "grad_norm": 0.0857100561261177, + "learning_rate": 2.325812867784321e-06, + "loss": 0.001, + "step": 221920 + }, + { + "epoch": 1.4233900837948517, + "grad_norm": 0.19669803977012634, + "learning_rate": 2.3253399603833448e-06, + "loss": 0.0025, + "step": 221930 + }, + { + "epoch": 1.4234542206886378, + "grad_norm": 0.10235214233398438, + "learning_rate": 2.3248670864977706e-06, + "loss": 0.0011, + "step": 221940 + }, + { + "epoch": 1.423518357582424, + "grad_norm": 0.2035292088985443, + "learning_rate": 2.324394246133522e-06, + "loss": 0.0012, + "step": 221950 + }, + { + "epoch": 1.42358249447621, + "grad_norm": 0.05004888400435448, + "learning_rate": 2.323921439296527e-06, + "loss": 0.0014, + "step": 221960 + }, + { + "epoch": 1.423646631369996, + "grad_norm": 0.017358137294650078, + "learning_rate": 2.323448665992709e-06, + "loss": 0.0049, + "step": 221970 + }, + { + "epoch": 1.4237107682637822, + "grad_norm": 0.022545376792550087, + "learning_rate": 2.322975926227992e-06, + "loss": 0.0011, + "step": 221980 + }, + { + "epoch": 1.4237749051575683, + "grad_norm": 0.06279610842466354, + "learning_rate": 2.322503220008299e-06, + "loss": 0.0015, + "step": 221990 + }, + { + "epoch": 1.4238390420513545, + "grad_norm": 0.0311025008559227, + "learning_rate": 2.322030547339555e-06, + "loss": 0.0006, + "step": 222000 + }, + { + "epoch": 1.4239031789451406, + "grad_norm": 0.1442340612411499, + "learning_rate": 2.321557908227682e-06, + "loss": 0.0017, + "step": 222010 + }, + { + "epoch": 1.4239673158389266, + "grad_norm": 0.05317877233028412, + "learning_rate": 2.321085302678604e-06, + "loss": 0.0014, + "step": 222020 + }, + { + "epoch": 1.4240314527327127, + "grad_norm": 0.05801217257976532, + "learning_rate": 2.320612730698239e-06, + "loss": 0.0012, + "step": 222030 + }, + { + "epoch": 1.4240955896264988, + "grad_norm": 0.15718373656272888, + "learning_rate": 2.3201401922925132e-06, + "loss": 0.0018, + "step": 222040 + }, + { + "epoch": 1.4241597265202848, + "grad_norm": 0.040582649409770966, + "learning_rate": 2.3196676874673464e-06, + "loss": 0.001, + "step": 222050 + }, + { + "epoch": 1.424223863414071, + "grad_norm": 0.0020285227801650763, + "learning_rate": 2.3191952162286576e-06, + "loss": 0.0009, + "step": 222060 + }, + { + "epoch": 1.424288000307857, + "grad_norm": 0.04461637884378433, + "learning_rate": 2.3187227785823696e-06, + "loss": 0.0012, + "step": 222070 + }, + { + "epoch": 1.4243521372016432, + "grad_norm": 0.07278832048177719, + "learning_rate": 2.318250374534402e-06, + "loss": 0.0021, + "step": 222080 + }, + { + "epoch": 1.4244162740954294, + "grad_norm": 0.05430842563509941, + "learning_rate": 2.3177780040906735e-06, + "loss": 0.002, + "step": 222090 + }, + { + "epoch": 1.4244804109892155, + "grad_norm": 0.1495872437953949, + "learning_rate": 2.317305667257102e-06, + "loss": 0.001, + "step": 222100 + }, + { + "epoch": 1.4245445478830014, + "grad_norm": 0.10312050580978394, + "learning_rate": 2.3168333640396097e-06, + "loss": 0.0009, + "step": 222110 + }, + { + "epoch": 1.4246086847767876, + "grad_norm": 0.1687595248222351, + "learning_rate": 2.316361094444113e-06, + "loss": 0.0008, + "step": 222120 + }, + { + "epoch": 1.4246728216705737, + "grad_norm": 0.0017926287837326527, + "learning_rate": 2.3158888584765295e-06, + "loss": 0.0009, + "step": 222130 + }, + { + "epoch": 1.4247369585643597, + "grad_norm": 0.020870963111519814, + "learning_rate": 2.3154166561427748e-06, + "loss": 0.0013, + "step": 222140 + }, + { + "epoch": 1.4248010954581458, + "grad_norm": 0.03439674898982048, + "learning_rate": 2.314944487448769e-06, + "loss": 0.0007, + "step": 222150 + }, + { + "epoch": 1.424865232351932, + "grad_norm": 0.07166893780231476, + "learning_rate": 2.3144723524004286e-06, + "loss": 0.0015, + "step": 222160 + }, + { + "epoch": 1.4249293692457181, + "grad_norm": 0.02373315952718258, + "learning_rate": 2.3140002510036693e-06, + "loss": 0.001, + "step": 222170 + }, + { + "epoch": 1.4249935061395043, + "grad_norm": 0.07971645891666412, + "learning_rate": 2.3135281832644045e-06, + "loss": 0.0009, + "step": 222180 + }, + { + "epoch": 1.4250576430332902, + "grad_norm": 0.12441080808639526, + "learning_rate": 2.3130561491885538e-06, + "loss": 0.001, + "step": 222190 + }, + { + "epoch": 1.4251217799270763, + "grad_norm": 0.09575378149747849, + "learning_rate": 2.3125841487820293e-06, + "loss": 0.0008, + "step": 222200 + }, + { + "epoch": 1.4251859168208625, + "grad_norm": 0.0775391086935997, + "learning_rate": 2.312112182050747e-06, + "loss": 0.0011, + "step": 222210 + }, + { + "epoch": 1.4252500537146484, + "grad_norm": 0.022984299808740616, + "learning_rate": 2.3116402490006173e-06, + "loss": 0.0014, + "step": 222220 + }, + { + "epoch": 1.4253141906084346, + "grad_norm": 0.028220055624842644, + "learning_rate": 2.311168349637559e-06, + "loss": 0.0028, + "step": 222230 + }, + { + "epoch": 1.4253783275022207, + "grad_norm": 0.05225247144699097, + "learning_rate": 2.310696483967483e-06, + "loss": 0.0028, + "step": 222240 + }, + { + "epoch": 1.4254424643960069, + "grad_norm": 0.07785075157880783, + "learning_rate": 2.310224651996302e-06, + "loss": 0.0007, + "step": 222250 + }, + { + "epoch": 1.425506601289793, + "grad_norm": 0.15400755405426025, + "learning_rate": 2.3097528537299263e-06, + "loss": 0.0017, + "step": 222260 + }, + { + "epoch": 1.4255707381835792, + "grad_norm": 0.029253359884023666, + "learning_rate": 2.309281089174272e-06, + "loss": 0.002, + "step": 222270 + }, + { + "epoch": 1.425634875077365, + "grad_norm": 0.0710631012916565, + "learning_rate": 2.308809358335249e-06, + "loss": 0.0012, + "step": 222280 + }, + { + "epoch": 1.4256990119711512, + "grad_norm": 0.07595669478178024, + "learning_rate": 2.3083376612187676e-06, + "loss": 0.0014, + "step": 222290 + }, + { + "epoch": 1.4257631488649374, + "grad_norm": 0.08065669238567352, + "learning_rate": 2.307865997830737e-06, + "loss": 0.0009, + "step": 222300 + }, + { + "epoch": 1.4258272857587233, + "grad_norm": 0.04362192004919052, + "learning_rate": 2.3073943681770723e-06, + "loss": 0.0018, + "step": 222310 + }, + { + "epoch": 1.4258914226525095, + "grad_norm": 0.07967459410429001, + "learning_rate": 2.30692277226368e-06, + "loss": 0.0011, + "step": 222320 + }, + { + "epoch": 1.4259555595462956, + "grad_norm": 0.06728452444076538, + "learning_rate": 2.3064512100964683e-06, + "loss": 0.0023, + "step": 222330 + }, + { + "epoch": 1.4260196964400818, + "grad_norm": 0.08936009556055069, + "learning_rate": 2.3059796816813494e-06, + "loss": 0.0012, + "step": 222340 + }, + { + "epoch": 1.426083833333868, + "grad_norm": 0.06235896050930023, + "learning_rate": 2.3055081870242314e-06, + "loss": 0.0013, + "step": 222350 + }, + { + "epoch": 1.4261479702276538, + "grad_norm": 0.16244564950466156, + "learning_rate": 2.3050367261310207e-06, + "loss": 0.0015, + "step": 222360 + }, + { + "epoch": 1.42621210712144, + "grad_norm": 0.06734142452478409, + "learning_rate": 2.3045652990076245e-06, + "loss": 0.0017, + "step": 222370 + }, + { + "epoch": 1.4262762440152261, + "grad_norm": 0.18772275745868683, + "learning_rate": 2.3040939056599533e-06, + "loss": 0.0011, + "step": 222380 + }, + { + "epoch": 1.4263403809090123, + "grad_norm": 0.27170705795288086, + "learning_rate": 2.3036225460939115e-06, + "loss": 0.0012, + "step": 222390 + }, + { + "epoch": 1.4264045178027982, + "grad_norm": 0.042087312787771225, + "learning_rate": 2.303151220315407e-06, + "loss": 0.0007, + "step": 222400 + }, + { + "epoch": 1.4264686546965843, + "grad_norm": 0.08965945243835449, + "learning_rate": 2.302679928330343e-06, + "loss": 0.0011, + "step": 222410 + }, + { + "epoch": 1.4265327915903705, + "grad_norm": 0.05479520559310913, + "learning_rate": 2.3022086701446292e-06, + "loss": 0.0009, + "step": 222420 + }, + { + "epoch": 1.4265969284841566, + "grad_norm": 0.06495223194360733, + "learning_rate": 2.301737445764169e-06, + "loss": 0.0006, + "step": 222430 + }, + { + "epoch": 1.4266610653779428, + "grad_norm": 0.17523230612277985, + "learning_rate": 2.301266255194865e-06, + "loss": 0.0015, + "step": 222440 + }, + { + "epoch": 1.4267252022717287, + "grad_norm": 0.06626857817173004, + "learning_rate": 2.3007950984426254e-06, + "loss": 0.0017, + "step": 222450 + }, + { + "epoch": 1.4267893391655149, + "grad_norm": 0.2895970046520233, + "learning_rate": 2.300323975513352e-06, + "loss": 0.003, + "step": 222460 + }, + { + "epoch": 1.426853476059301, + "grad_norm": 0.06247478350996971, + "learning_rate": 2.2998528864129495e-06, + "loss": 0.0006, + "step": 222470 + }, + { + "epoch": 1.426917612953087, + "grad_norm": 0.044517505913972855, + "learning_rate": 2.2993818311473175e-06, + "loss": 0.0012, + "step": 222480 + }, + { + "epoch": 1.426981749846873, + "grad_norm": 0.05764401704072952, + "learning_rate": 2.2989108097223635e-06, + "loss": 0.0011, + "step": 222490 + }, + { + "epoch": 1.4270458867406592, + "grad_norm": 0.041933346539735794, + "learning_rate": 2.2984398221439873e-06, + "loss": 0.0008, + "step": 222500 + }, + { + "epoch": 1.4271100236344454, + "grad_norm": 0.06117698922753334, + "learning_rate": 2.297968868418091e-06, + "loss": 0.0012, + "step": 222510 + }, + { + "epoch": 1.4271741605282315, + "grad_norm": 0.04828890040516853, + "learning_rate": 2.2974979485505737e-06, + "loss": 0.0021, + "step": 222520 + }, + { + "epoch": 1.4272382974220177, + "grad_norm": 0.08611414581537247, + "learning_rate": 2.2970270625473407e-06, + "loss": 0.0026, + "step": 222530 + }, + { + "epoch": 1.4273024343158036, + "grad_norm": 0.017762210220098495, + "learning_rate": 2.29655621041429e-06, + "loss": 0.0007, + "step": 222540 + }, + { + "epoch": 1.4273665712095898, + "grad_norm": 0.1064884141087532, + "learning_rate": 2.2960853921573227e-06, + "loss": 0.001, + "step": 222550 + }, + { + "epoch": 1.427430708103376, + "grad_norm": 0.08850418776273727, + "learning_rate": 2.295614607782336e-06, + "loss": 0.0031, + "step": 222560 + }, + { + "epoch": 1.4274948449971618, + "grad_norm": 0.09420426189899445, + "learning_rate": 2.295143857295233e-06, + "loss": 0.002, + "step": 222570 + }, + { + "epoch": 1.427558981890948, + "grad_norm": 0.03913057968020439, + "learning_rate": 2.29467314070191e-06, + "loss": 0.0008, + "step": 222580 + }, + { + "epoch": 1.4276231187847341, + "grad_norm": 0.03345941752195358, + "learning_rate": 2.2942024580082643e-06, + "loss": 0.0016, + "step": 222590 + }, + { + "epoch": 1.4276872556785203, + "grad_norm": 0.1600685566663742, + "learning_rate": 2.2937318092201975e-06, + "loss": 0.0014, + "step": 222600 + }, + { + "epoch": 1.4277513925723064, + "grad_norm": 0.18811281025409698, + "learning_rate": 2.2932611943436055e-06, + "loss": 0.0024, + "step": 222610 + }, + { + "epoch": 1.4278155294660924, + "grad_norm": 0.10699602216482162, + "learning_rate": 2.2927906133843853e-06, + "loss": 0.0008, + "step": 222620 + }, + { + "epoch": 1.4278796663598785, + "grad_norm": 0.0679832175374031, + "learning_rate": 2.2923200663484315e-06, + "loss": 0.0013, + "step": 222630 + }, + { + "epoch": 1.4279438032536647, + "grad_norm": 0.03156968206167221, + "learning_rate": 2.2918495532416444e-06, + "loss": 0.0014, + "step": 222640 + }, + { + "epoch": 1.4280079401474506, + "grad_norm": 0.09393204748630524, + "learning_rate": 2.291379074069918e-06, + "loss": 0.0007, + "step": 222650 + }, + { + "epoch": 1.4280720770412367, + "grad_norm": 0.04491729289293289, + "learning_rate": 2.290908628839147e-06, + "loss": 0.0011, + "step": 222660 + }, + { + "epoch": 1.4281362139350229, + "grad_norm": 0.3788091540336609, + "learning_rate": 2.290438217555226e-06, + "loss": 0.0053, + "step": 222670 + }, + { + "epoch": 1.428200350828809, + "grad_norm": 0.08632595092058182, + "learning_rate": 2.2899678402240516e-06, + "loss": 0.001, + "step": 222680 + }, + { + "epoch": 1.4282644877225952, + "grad_norm": 0.20936362445354462, + "learning_rate": 2.2894974968515176e-06, + "loss": 0.002, + "step": 222690 + }, + { + "epoch": 1.4283286246163813, + "grad_norm": 0.0063627613708376884, + "learning_rate": 2.289027187443517e-06, + "loss": 0.0004, + "step": 222700 + }, + { + "epoch": 1.4283927615101673, + "grad_norm": 0.019038209691643715, + "learning_rate": 2.2885569120059415e-06, + "loss": 0.0019, + "step": 222710 + }, + { + "epoch": 1.4284568984039534, + "grad_norm": 0.06620853394269943, + "learning_rate": 2.288086670544687e-06, + "loss": 0.0013, + "step": 222720 + }, + { + "epoch": 1.4285210352977395, + "grad_norm": 0.04086804389953613, + "learning_rate": 2.2876164630656455e-06, + "loss": 0.0014, + "step": 222730 + }, + { + "epoch": 1.4285851721915255, + "grad_norm": 0.03152358904480934, + "learning_rate": 2.287146289574706e-06, + "loss": 0.0012, + "step": 222740 + }, + { + "epoch": 1.4286493090853116, + "grad_norm": 0.2787061631679535, + "learning_rate": 2.286676150077764e-06, + "loss": 0.0018, + "step": 222750 + }, + { + "epoch": 1.4287134459790978, + "grad_norm": 0.021292492747306824, + "learning_rate": 2.2862060445807094e-06, + "loss": 0.0009, + "step": 222760 + }, + { + "epoch": 1.428777582872884, + "grad_norm": 0.06899149715900421, + "learning_rate": 2.285735973089432e-06, + "loss": 0.0008, + "step": 222770 + }, + { + "epoch": 1.42884171976667, + "grad_norm": 0.08117298036813736, + "learning_rate": 2.2852659356098215e-06, + "loss": 0.0007, + "step": 222780 + }, + { + "epoch": 1.428905856660456, + "grad_norm": 0.024597637355327606, + "learning_rate": 2.284795932147771e-06, + "loss": 0.0009, + "step": 222790 + }, + { + "epoch": 1.4289699935542421, + "grad_norm": 0.10690958052873611, + "learning_rate": 2.2843259627091676e-06, + "loss": 0.0009, + "step": 222800 + }, + { + "epoch": 1.4290341304480283, + "grad_norm": 0.0217173770070076, + "learning_rate": 2.2838560272999006e-06, + "loss": 0.001, + "step": 222810 + }, + { + "epoch": 1.4290982673418144, + "grad_norm": 0.0378669835627079, + "learning_rate": 2.283386125925857e-06, + "loss": 0.001, + "step": 222820 + }, + { + "epoch": 1.4291624042356004, + "grad_norm": 0.05231548845767975, + "learning_rate": 2.2829162585929287e-06, + "loss": 0.0012, + "step": 222830 + }, + { + "epoch": 1.4292265411293865, + "grad_norm": 0.08824943006038666, + "learning_rate": 2.2824464253070017e-06, + "loss": 0.0007, + "step": 222840 + }, + { + "epoch": 1.4292906780231727, + "grad_norm": 0.01857241988182068, + "learning_rate": 2.281976626073963e-06, + "loss": 0.0006, + "step": 222850 + }, + { + "epoch": 1.4293548149169588, + "grad_norm": 0.12173985689878464, + "learning_rate": 2.281506860899698e-06, + "loss": 0.0019, + "step": 222860 + }, + { + "epoch": 1.429418951810745, + "grad_norm": 0.08885741978883743, + "learning_rate": 2.281037129790097e-06, + "loss": 0.005, + "step": 222870 + }, + { + "epoch": 1.429483088704531, + "grad_norm": 0.054606009274721146, + "learning_rate": 2.2805674327510436e-06, + "loss": 0.0015, + "step": 222880 + }, + { + "epoch": 1.429547225598317, + "grad_norm": 0.03347182646393776, + "learning_rate": 2.280097769788422e-06, + "loss": 0.0012, + "step": 222890 + }, + { + "epoch": 1.4296113624921032, + "grad_norm": 0.1394302248954773, + "learning_rate": 2.279628140908121e-06, + "loss": 0.0007, + "step": 222900 + }, + { + "epoch": 1.4296754993858891, + "grad_norm": 0.044962868094444275, + "learning_rate": 2.2791585461160222e-06, + "loss": 0.0014, + "step": 222910 + }, + { + "epoch": 1.4297396362796753, + "grad_norm": 0.12735018134117126, + "learning_rate": 2.278688985418013e-06, + "loss": 0.0032, + "step": 222920 + }, + { + "epoch": 1.4298037731734614, + "grad_norm": 0.21698321402072906, + "learning_rate": 2.278219458819975e-06, + "loss": 0.0018, + "step": 222930 + }, + { + "epoch": 1.4298679100672476, + "grad_norm": 0.01788831502199173, + "learning_rate": 2.277749966327791e-06, + "loss": 0.0008, + "step": 222940 + }, + { + "epoch": 1.4299320469610337, + "grad_norm": 0.1518547683954239, + "learning_rate": 2.2772805079473482e-06, + "loss": 0.0016, + "step": 222950 + }, + { + "epoch": 1.4299961838548199, + "grad_norm": 0.10488652437925339, + "learning_rate": 2.276811083684526e-06, + "loss": 0.0022, + "step": 222960 + }, + { + "epoch": 1.4300603207486058, + "grad_norm": 0.06715139001607895, + "learning_rate": 2.2763416935452064e-06, + "loss": 0.0028, + "step": 222970 + }, + { + "epoch": 1.430124457642392, + "grad_norm": 0.10721565783023834, + "learning_rate": 2.275872337535271e-06, + "loss": 0.0007, + "step": 222980 + }, + { + "epoch": 1.430188594536178, + "grad_norm": 0.14324164390563965, + "learning_rate": 2.275403015660604e-06, + "loss": 0.0019, + "step": 222990 + }, + { + "epoch": 1.430252731429964, + "grad_norm": 0.12048004567623138, + "learning_rate": 2.274933727927084e-06, + "loss": 0.0008, + "step": 223000 + }, + { + "epoch": 1.4303168683237502, + "grad_norm": 0.09764410555362701, + "learning_rate": 2.2744644743405903e-06, + "loss": 0.0015, + "step": 223010 + }, + { + "epoch": 1.4303810052175363, + "grad_norm": 0.1218528151512146, + "learning_rate": 2.273995254907006e-06, + "loss": 0.0008, + "step": 223020 + }, + { + "epoch": 1.4304451421113225, + "grad_norm": 0.08948386460542679, + "learning_rate": 2.2735260696322097e-06, + "loss": 0.0015, + "step": 223030 + }, + { + "epoch": 1.4305092790051086, + "grad_norm": 0.056394536048173904, + "learning_rate": 2.2730569185220807e-06, + "loss": 0.001, + "step": 223040 + }, + { + "epoch": 1.4305734158988945, + "grad_norm": 0.11490045487880707, + "learning_rate": 2.2725878015824954e-06, + "loss": 0.0019, + "step": 223050 + }, + { + "epoch": 1.4306375527926807, + "grad_norm": 0.03355501964688301, + "learning_rate": 2.272118718819336e-06, + "loss": 0.0007, + "step": 223060 + }, + { + "epoch": 1.4307016896864668, + "grad_norm": 0.052423711866140366, + "learning_rate": 2.2716496702384787e-06, + "loss": 0.0015, + "step": 223070 + }, + { + "epoch": 1.4307658265802528, + "grad_norm": 0.10398556292057037, + "learning_rate": 2.271180655845801e-06, + "loss": 0.0012, + "step": 223080 + }, + { + "epoch": 1.430829963474039, + "grad_norm": 0.07617367804050446, + "learning_rate": 2.2707116756471783e-06, + "loss": 0.0017, + "step": 223090 + }, + { + "epoch": 1.430894100367825, + "grad_norm": 0.0866626724600792, + "learning_rate": 2.2702427296484903e-06, + "loss": 0.0023, + "step": 223100 + }, + { + "epoch": 1.4309582372616112, + "grad_norm": 0.1561611443758011, + "learning_rate": 2.2697738178556118e-06, + "loss": 0.0013, + "step": 223110 + }, + { + "epoch": 1.4310223741553973, + "grad_norm": 0.04214513301849365, + "learning_rate": 2.269304940274419e-06, + "loss": 0.0012, + "step": 223120 + }, + { + "epoch": 1.4310865110491835, + "grad_norm": 0.03349952772259712, + "learning_rate": 2.2688360969107847e-06, + "loss": 0.0012, + "step": 223130 + }, + { + "epoch": 1.4311506479429694, + "grad_norm": 0.14768363535404205, + "learning_rate": 2.2683672877705876e-06, + "loss": 0.0009, + "step": 223140 + }, + { + "epoch": 1.4312147848367556, + "grad_norm": 0.06566477566957474, + "learning_rate": 2.2678985128597016e-06, + "loss": 0.0013, + "step": 223150 + }, + { + "epoch": 1.4312789217305417, + "grad_norm": 0.09701356291770935, + "learning_rate": 2.2674297721839973e-06, + "loss": 0.0012, + "step": 223160 + }, + { + "epoch": 1.4313430586243276, + "grad_norm": 0.10374977439641953, + "learning_rate": 2.266961065749353e-06, + "loss": 0.0009, + "step": 223170 + }, + { + "epoch": 1.4314071955181138, + "grad_norm": 0.004872579593211412, + "learning_rate": 2.2664923935616397e-06, + "loss": 0.0017, + "step": 223180 + }, + { + "epoch": 1.4314713324119, + "grad_norm": 0.06999684870243073, + "learning_rate": 2.26602375562673e-06, + "loss": 0.0009, + "step": 223190 + }, + { + "epoch": 1.431535469305686, + "grad_norm": 0.05309106409549713, + "learning_rate": 2.2655551519504955e-06, + "loss": 0.0012, + "step": 223200 + }, + { + "epoch": 1.4315996061994722, + "grad_norm": 0.1891021579504013, + "learning_rate": 2.2650865825388103e-06, + "loss": 0.0008, + "step": 223210 + }, + { + "epoch": 1.4316637430932582, + "grad_norm": 0.06925483047962189, + "learning_rate": 2.2646180473975454e-06, + "loss": 0.0008, + "step": 223220 + }, + { + "epoch": 1.4317278799870443, + "grad_norm": 0.16873957216739655, + "learning_rate": 2.2641495465325707e-06, + "loss": 0.0006, + "step": 223230 + }, + { + "epoch": 1.4317920168808305, + "grad_norm": 0.12946078181266785, + "learning_rate": 2.263681079949756e-06, + "loss": 0.0011, + "step": 223240 + }, + { + "epoch": 1.4318561537746166, + "grad_norm": 0.04395328834652901, + "learning_rate": 2.263212647654975e-06, + "loss": 0.0017, + "step": 223250 + }, + { + "epoch": 1.4319202906684025, + "grad_norm": 0.11219099164009094, + "learning_rate": 2.262744249654095e-06, + "loss": 0.0015, + "step": 223260 + }, + { + "epoch": 1.4319844275621887, + "grad_norm": 0.04642408341169357, + "learning_rate": 2.2622758859529866e-06, + "loss": 0.0011, + "step": 223270 + }, + { + "epoch": 1.4320485644559748, + "grad_norm": 0.02733568102121353, + "learning_rate": 2.261807556557516e-06, + "loss": 0.0015, + "step": 223280 + }, + { + "epoch": 1.432112701349761, + "grad_norm": 0.08607029914855957, + "learning_rate": 2.261339261473555e-06, + "loss": 0.001, + "step": 223290 + }, + { + "epoch": 1.4321768382435471, + "grad_norm": 0.14403820037841797, + "learning_rate": 2.2608710007069706e-06, + "loss": 0.0015, + "step": 223300 + }, + { + "epoch": 1.432240975137333, + "grad_norm": 0.09584439545869827, + "learning_rate": 2.260402774263628e-06, + "loss": 0.0013, + "step": 223310 + }, + { + "epoch": 1.4323051120311192, + "grad_norm": 0.012513621710240841, + "learning_rate": 2.259934582149399e-06, + "loss": 0.0009, + "step": 223320 + }, + { + "epoch": 1.4323692489249054, + "grad_norm": 0.09077856689691544, + "learning_rate": 2.2594664243701474e-06, + "loss": 0.0023, + "step": 223330 + }, + { + "epoch": 1.4324333858186913, + "grad_norm": 0.09028846770524979, + "learning_rate": 2.2589983009317405e-06, + "loss": 0.0011, + "step": 223340 + }, + { + "epoch": 1.4324975227124774, + "grad_norm": 0.05965743586421013, + "learning_rate": 2.258530211840042e-06, + "loss": 0.001, + "step": 223350 + }, + { + "epoch": 1.4325616596062636, + "grad_norm": 0.007242657709866762, + "learning_rate": 2.258062157100921e-06, + "loss": 0.0015, + "step": 223360 + }, + { + "epoch": 1.4326257965000497, + "grad_norm": 0.2847703695297241, + "learning_rate": 2.2575941367202407e-06, + "loss": 0.0015, + "step": 223370 + }, + { + "epoch": 1.4326899333938359, + "grad_norm": 0.02166147716343403, + "learning_rate": 2.257126150703866e-06, + "loss": 0.0008, + "step": 223380 + }, + { + "epoch": 1.432754070287622, + "grad_norm": 0.08338756859302521, + "learning_rate": 2.2566581990576592e-06, + "loss": 0.0009, + "step": 223390 + }, + { + "epoch": 1.432818207181408, + "grad_norm": 0.2065654993057251, + "learning_rate": 2.2561902817874877e-06, + "loss": 0.0016, + "step": 223400 + }, + { + "epoch": 1.432882344075194, + "grad_norm": 0.09669201076030731, + "learning_rate": 2.2557223988992127e-06, + "loss": 0.0021, + "step": 223410 + }, + { + "epoch": 1.4329464809689803, + "grad_norm": 0.04104436933994293, + "learning_rate": 2.255254550398697e-06, + "loss": 0.0009, + "step": 223420 + }, + { + "epoch": 1.4330106178627662, + "grad_norm": 0.03617101535201073, + "learning_rate": 2.2547867362918026e-06, + "loss": 0.0008, + "step": 223430 + }, + { + "epoch": 1.4330747547565523, + "grad_norm": 0.07173281908035278, + "learning_rate": 2.2543189565843938e-06, + "loss": 0.0024, + "step": 223440 + }, + { + "epoch": 1.4331388916503385, + "grad_norm": 0.06350976228713989, + "learning_rate": 2.2538512112823303e-06, + "loss": 0.0008, + "step": 223450 + }, + { + "epoch": 1.4332030285441246, + "grad_norm": 0.004857209045439959, + "learning_rate": 2.253383500391472e-06, + "loss": 0.0007, + "step": 223460 + }, + { + "epoch": 1.4332671654379108, + "grad_norm": 0.0834898054599762, + "learning_rate": 2.2529158239176835e-06, + "loss": 0.0011, + "step": 223470 + }, + { + "epoch": 1.4333313023316967, + "grad_norm": 0.09479456394910812, + "learning_rate": 2.252448181866823e-06, + "loss": 0.0017, + "step": 223480 + }, + { + "epoch": 1.4333954392254828, + "grad_norm": 0.2217305600643158, + "learning_rate": 2.2519805742447504e-06, + "loss": 0.0022, + "step": 223490 + }, + { + "epoch": 1.433459576119269, + "grad_norm": 0.10038892924785614, + "learning_rate": 2.2515130010573234e-06, + "loss": 0.0014, + "step": 223500 + }, + { + "epoch": 1.4335237130130551, + "grad_norm": 0.07915161550045013, + "learning_rate": 2.251045462310405e-06, + "loss": 0.0015, + "step": 223510 + }, + { + "epoch": 1.433587849906841, + "grad_norm": 0.0070143286138772964, + "learning_rate": 2.250577958009851e-06, + "loss": 0.0015, + "step": 223520 + }, + { + "epoch": 1.4336519868006272, + "grad_norm": 0.11248385161161423, + "learning_rate": 2.2501104881615205e-06, + "loss": 0.0016, + "step": 223530 + }, + { + "epoch": 1.4337161236944134, + "grad_norm": 0.003413565456867218, + "learning_rate": 2.2496430527712687e-06, + "loss": 0.0014, + "step": 223540 + }, + { + "epoch": 1.4337802605881995, + "grad_norm": 0.12429042160511017, + "learning_rate": 2.2491756518449576e-06, + "loss": 0.0009, + "step": 223550 + }, + { + "epoch": 1.4338443974819857, + "grad_norm": 0.16163988411426544, + "learning_rate": 2.2487082853884413e-06, + "loss": 0.0016, + "step": 223560 + }, + { + "epoch": 1.4339085343757716, + "grad_norm": 0.15755464136600494, + "learning_rate": 2.248240953407576e-06, + "loss": 0.0011, + "step": 223570 + }, + { + "epoch": 1.4339726712695577, + "grad_norm": 0.04327183961868286, + "learning_rate": 2.247773655908217e-06, + "loss": 0.001, + "step": 223580 + }, + { + "epoch": 1.434036808163344, + "grad_norm": 0.024731261655688286, + "learning_rate": 2.2473063928962222e-06, + "loss": 0.0021, + "step": 223590 + }, + { + "epoch": 1.4341009450571298, + "grad_norm": 0.25946247577667236, + "learning_rate": 2.2468391643774464e-06, + "loss": 0.0017, + "step": 223600 + }, + { + "epoch": 1.434165081950916, + "grad_norm": 0.039348818361759186, + "learning_rate": 2.2463719703577414e-06, + "loss": 0.001, + "step": 223610 + }, + { + "epoch": 1.4342292188447021, + "grad_norm": 0.08746260404586792, + "learning_rate": 2.2459048108429653e-06, + "loss": 0.0011, + "step": 223620 + }, + { + "epoch": 1.4342933557384883, + "grad_norm": 0.04812712222337723, + "learning_rate": 2.2454376858389704e-06, + "loss": 0.0015, + "step": 223630 + }, + { + "epoch": 1.4343574926322744, + "grad_norm": 0.025965077802538872, + "learning_rate": 2.24497059535161e-06, + "loss": 0.0006, + "step": 223640 + }, + { + "epoch": 1.4344216295260606, + "grad_norm": 0.13733486831188202, + "learning_rate": 2.244503539386735e-06, + "loss": 0.0013, + "step": 223650 + }, + { + "epoch": 1.4344857664198465, + "grad_norm": 0.030825136229395866, + "learning_rate": 2.244036517950202e-06, + "loss": 0.0013, + "step": 223660 + }, + { + "epoch": 1.4345499033136326, + "grad_norm": 0.019314579665660858, + "learning_rate": 2.2435695310478587e-06, + "loss": 0.0012, + "step": 223670 + }, + { + "epoch": 1.4346140402074188, + "grad_norm": 0.07342612743377686, + "learning_rate": 2.2431025786855614e-06, + "loss": 0.0023, + "step": 223680 + }, + { + "epoch": 1.4346781771012047, + "grad_norm": 0.0634700134396553, + "learning_rate": 2.2426356608691597e-06, + "loss": 0.0024, + "step": 223690 + }, + { + "epoch": 1.4347423139949909, + "grad_norm": 0.011114954017102718, + "learning_rate": 2.2421687776045016e-06, + "loss": 0.0012, + "step": 223700 + }, + { + "epoch": 1.434806450888777, + "grad_norm": 0.0006286423886194825, + "learning_rate": 2.241701928897441e-06, + "loss": 0.0014, + "step": 223710 + }, + { + "epoch": 1.4348705877825632, + "grad_norm": 0.06351950019598007, + "learning_rate": 2.241235114753827e-06, + "loss": 0.0006, + "step": 223720 + }, + { + "epoch": 1.4349347246763493, + "grad_norm": 0.05163135752081871, + "learning_rate": 2.240768335179507e-06, + "loss": 0.0008, + "step": 223730 + }, + { + "epoch": 1.4349988615701352, + "grad_norm": 0.014766098000109196, + "learning_rate": 2.2403015901803333e-06, + "loss": 0.0006, + "step": 223740 + }, + { + "epoch": 1.4350629984639214, + "grad_norm": 0.034667257219552994, + "learning_rate": 2.2398348797621528e-06, + "loss": 0.0011, + "step": 223750 + }, + { + "epoch": 1.4351271353577075, + "grad_norm": 0.025669017806649208, + "learning_rate": 2.2393682039308147e-06, + "loss": 0.0021, + "step": 223760 + }, + { + "epoch": 1.4351912722514935, + "grad_norm": 0.04082329198718071, + "learning_rate": 2.2389015626921634e-06, + "loss": 0.0004, + "step": 223770 + }, + { + "epoch": 1.4352554091452796, + "grad_norm": 0.05806554853916168, + "learning_rate": 2.238434956052051e-06, + "loss": 0.0041, + "step": 223780 + }, + { + "epoch": 1.4353195460390658, + "grad_norm": 0.10913745313882828, + "learning_rate": 2.237968384016322e-06, + "loss": 0.001, + "step": 223790 + }, + { + "epoch": 1.435383682932852, + "grad_norm": 0.04209039360284805, + "learning_rate": 2.237501846590823e-06, + "loss": 0.0009, + "step": 223800 + }, + { + "epoch": 1.435447819826638, + "grad_norm": 0.1168067678809166, + "learning_rate": 2.2370353437813992e-06, + "loss": 0.001, + "step": 223810 + }, + { + "epoch": 1.4355119567204242, + "grad_norm": 0.05427335202693939, + "learning_rate": 2.2365688755938986e-06, + "loss": 0.0013, + "step": 223820 + }, + { + "epoch": 1.4355760936142101, + "grad_norm": 0.08518806844949722, + "learning_rate": 2.236102442034165e-06, + "loss": 0.0023, + "step": 223830 + }, + { + "epoch": 1.4356402305079963, + "grad_norm": 0.3001389801502228, + "learning_rate": 2.2356360431080427e-06, + "loss": 0.0023, + "step": 223840 + }, + { + "epoch": 1.4357043674017824, + "grad_norm": 0.10559581220149994, + "learning_rate": 2.235169678821375e-06, + "loss": 0.0016, + "step": 223850 + }, + { + "epoch": 1.4357685042955683, + "grad_norm": 0.07048898935317993, + "learning_rate": 2.234703349180009e-06, + "loss": 0.001, + "step": 223860 + }, + { + "epoch": 1.4358326411893545, + "grad_norm": 0.11938511580228806, + "learning_rate": 2.2342370541897867e-06, + "loss": 0.0007, + "step": 223870 + }, + { + "epoch": 1.4358967780831406, + "grad_norm": 0.09405989199876785, + "learning_rate": 2.2337707938565485e-06, + "loss": 0.0008, + "step": 223880 + }, + { + "epoch": 1.4359609149769268, + "grad_norm": 0.11577337235212326, + "learning_rate": 2.233304568186141e-06, + "loss": 0.0043, + "step": 223890 + }, + { + "epoch": 1.436025051870713, + "grad_norm": 0.6096563339233398, + "learning_rate": 2.232838377184405e-06, + "loss": 0.0039, + "step": 223900 + }, + { + "epoch": 1.4360891887644989, + "grad_norm": 0.0584028996527195, + "learning_rate": 2.2323722208571813e-06, + "loss": 0.0016, + "step": 223910 + }, + { + "epoch": 1.436153325658285, + "grad_norm": 0.058712344616651535, + "learning_rate": 2.23190609921031e-06, + "loss": 0.0012, + "step": 223920 + }, + { + "epoch": 1.4362174625520712, + "grad_norm": 0.022509023547172546, + "learning_rate": 2.231440012249636e-06, + "loss": 0.0013, + "step": 223930 + }, + { + "epoch": 1.4362815994458573, + "grad_norm": 0.1276644617319107, + "learning_rate": 2.2309739599809967e-06, + "loss": 0.0015, + "step": 223940 + }, + { + "epoch": 1.4363457363396432, + "grad_norm": 0.08014482259750366, + "learning_rate": 2.2305079424102328e-06, + "loss": 0.0012, + "step": 223950 + }, + { + "epoch": 1.4364098732334294, + "grad_norm": 0.2725202441215515, + "learning_rate": 2.230041959543182e-06, + "loss": 0.0027, + "step": 223960 + }, + { + "epoch": 1.4364740101272155, + "grad_norm": 0.24841365218162537, + "learning_rate": 2.229576011385687e-06, + "loss": 0.0027, + "step": 223970 + }, + { + "epoch": 1.4365381470210017, + "grad_norm": 0.0672532245516777, + "learning_rate": 2.229110097943584e-06, + "loss": 0.001, + "step": 223980 + }, + { + "epoch": 1.4366022839147878, + "grad_norm": 0.12797874212265015, + "learning_rate": 2.2286442192227126e-06, + "loss": 0.0017, + "step": 223990 + }, + { + "epoch": 1.4366664208085738, + "grad_norm": 0.10601053386926651, + "learning_rate": 2.228178375228907e-06, + "loss": 0.0011, + "step": 224000 + }, + { + "epoch": 1.43673055770236, + "grad_norm": 0.028491705656051636, + "learning_rate": 2.22771256596801e-06, + "loss": 0.0022, + "step": 224010 + }, + { + "epoch": 1.436794694596146, + "grad_norm": 0.01704668626189232, + "learning_rate": 2.2272467914458557e-06, + "loss": 0.0008, + "step": 224020 + }, + { + "epoch": 1.436858831489932, + "grad_norm": 0.053478218615055084, + "learning_rate": 2.2267810516682793e-06, + "loss": 0.0008, + "step": 224030 + }, + { + "epoch": 1.4369229683837181, + "grad_norm": 0.13523180782794952, + "learning_rate": 2.226315346641119e-06, + "loss": 0.0015, + "step": 224040 + }, + { + "epoch": 1.4369871052775043, + "grad_norm": 0.035574037581682205, + "learning_rate": 2.2258496763702107e-06, + "loss": 0.0021, + "step": 224050 + }, + { + "epoch": 1.4370512421712904, + "grad_norm": 0.01855083554983139, + "learning_rate": 2.2253840408613885e-06, + "loss": 0.001, + "step": 224060 + }, + { + "epoch": 1.4371153790650766, + "grad_norm": 0.004640830680727959, + "learning_rate": 2.2249184401204854e-06, + "loss": 0.0029, + "step": 224070 + }, + { + "epoch": 1.4371795159588627, + "grad_norm": 0.02465139515697956, + "learning_rate": 2.2244528741533388e-06, + "loss": 0.0025, + "step": 224080 + }, + { + "epoch": 1.4372436528526487, + "grad_norm": 0.2250843048095703, + "learning_rate": 2.223987342965782e-06, + "loss": 0.0023, + "step": 224090 + }, + { + "epoch": 1.4373077897464348, + "grad_norm": 0.21969127655029297, + "learning_rate": 2.2235218465636477e-06, + "loss": 0.0025, + "step": 224100 + }, + { + "epoch": 1.437371926640221, + "grad_norm": 0.13385099172592163, + "learning_rate": 2.223056384952767e-06, + "loss": 0.0012, + "step": 224110 + }, + { + "epoch": 1.4374360635340069, + "grad_norm": 0.08626440167427063, + "learning_rate": 2.222590958138976e-06, + "loss": 0.0012, + "step": 224120 + }, + { + "epoch": 1.437500200427793, + "grad_norm": 0.029763473197817802, + "learning_rate": 2.222125566128106e-06, + "loss": 0.0016, + "step": 224130 + }, + { + "epoch": 1.4375643373215792, + "grad_norm": 0.037327855825424194, + "learning_rate": 2.2216602089259852e-06, + "loss": 0.0012, + "step": 224140 + }, + { + "epoch": 1.4376284742153653, + "grad_norm": 0.09519702196121216, + "learning_rate": 2.2211948865384503e-06, + "loss": 0.0007, + "step": 224150 + }, + { + "epoch": 1.4376926111091515, + "grad_norm": 0.08022797852754593, + "learning_rate": 2.220729598971329e-06, + "loss": 0.0015, + "step": 224160 + }, + { + "epoch": 1.4377567480029374, + "grad_norm": 0.20702189207077026, + "learning_rate": 2.2202643462304525e-06, + "loss": 0.0022, + "step": 224170 + }, + { + "epoch": 1.4378208848967236, + "grad_norm": 0.11773733794689178, + "learning_rate": 2.2197991283216486e-06, + "loss": 0.0017, + "step": 224180 + }, + { + "epoch": 1.4378850217905097, + "grad_norm": 0.019129108637571335, + "learning_rate": 2.21933394525075e-06, + "loss": 0.0008, + "step": 224190 + }, + { + "epoch": 1.4379491586842956, + "grad_norm": 0.08117559552192688, + "learning_rate": 2.2188687970235846e-06, + "loss": 0.0024, + "step": 224200 + }, + { + "epoch": 1.4380132955780818, + "grad_norm": 0.062179937958717346, + "learning_rate": 2.2184036836459805e-06, + "loss": 0.001, + "step": 224210 + }, + { + "epoch": 1.438077432471868, + "grad_norm": 0.2298547625541687, + "learning_rate": 2.2179386051237652e-06, + "loss": 0.0015, + "step": 224220 + }, + { + "epoch": 1.438141569365654, + "grad_norm": 0.11879534274339676, + "learning_rate": 2.2174735614627685e-06, + "loss": 0.0011, + "step": 224230 + }, + { + "epoch": 1.4382057062594402, + "grad_norm": 0.2470162957906723, + "learning_rate": 2.2170085526688166e-06, + "loss": 0.0014, + "step": 224240 + }, + { + "epoch": 1.4382698431532264, + "grad_norm": 0.012568817473948002, + "learning_rate": 2.2165435787477376e-06, + "loss": 0.0012, + "step": 224250 + }, + { + "epoch": 1.4383339800470123, + "grad_norm": 0.06997620314359665, + "learning_rate": 2.216078639705354e-06, + "loss": 0.0011, + "step": 224260 + }, + { + "epoch": 1.4383981169407984, + "grad_norm": 0.03136972337961197, + "learning_rate": 2.2156137355474967e-06, + "loss": 0.004, + "step": 224270 + }, + { + "epoch": 1.4384622538345846, + "grad_norm": 0.047197647392749786, + "learning_rate": 2.21514886627999e-06, + "loss": 0.001, + "step": 224280 + }, + { + "epoch": 1.4385263907283705, + "grad_norm": 0.04600229859352112, + "learning_rate": 2.2146840319086554e-06, + "loss": 0.0016, + "step": 224290 + }, + { + "epoch": 1.4385905276221567, + "grad_norm": 0.03101489320397377, + "learning_rate": 2.214219232439323e-06, + "loss": 0.001, + "step": 224300 + }, + { + "epoch": 1.4386546645159428, + "grad_norm": 0.2436067909002304, + "learning_rate": 2.2137544678778145e-06, + "loss": 0.0021, + "step": 224310 + }, + { + "epoch": 1.438718801409729, + "grad_norm": 0.07516494393348694, + "learning_rate": 2.2132897382299532e-06, + "loss": 0.0021, + "step": 224320 + }, + { + "epoch": 1.4387829383035151, + "grad_norm": 0.12822185456752777, + "learning_rate": 2.2128250435015618e-06, + "loss": 0.0012, + "step": 224330 + }, + { + "epoch": 1.438847075197301, + "grad_norm": 0.0714908316731453, + "learning_rate": 2.212360383698467e-06, + "loss": 0.0009, + "step": 224340 + }, + { + "epoch": 1.4389112120910872, + "grad_norm": 0.10385997593402863, + "learning_rate": 2.211895758826488e-06, + "loss": 0.0008, + "step": 224350 + }, + { + "epoch": 1.4389753489848733, + "grad_norm": 0.06676853448152542, + "learning_rate": 2.211431168891448e-06, + "loss": 0.0009, + "step": 224360 + }, + { + "epoch": 1.4390394858786595, + "grad_norm": 0.10909586399793625, + "learning_rate": 2.210966613899167e-06, + "loss": 0.0013, + "step": 224370 + }, + { + "epoch": 1.4391036227724454, + "grad_norm": 0.09288477152585983, + "learning_rate": 2.2105020938554687e-06, + "loss": 0.0009, + "step": 224380 + }, + { + "epoch": 1.4391677596662316, + "grad_norm": 0.07626969367265701, + "learning_rate": 2.2100376087661736e-06, + "loss": 0.0016, + "step": 224390 + }, + { + "epoch": 1.4392318965600177, + "grad_norm": 0.07371053844690323, + "learning_rate": 2.2095731586371007e-06, + "loss": 0.0006, + "step": 224400 + }, + { + "epoch": 1.4392960334538039, + "grad_norm": 0.03710919991135597, + "learning_rate": 2.209108743474069e-06, + "loss": 0.0028, + "step": 224410 + }, + { + "epoch": 1.43936017034759, + "grad_norm": 0.10803516954183578, + "learning_rate": 2.208644363282901e-06, + "loss": 0.0011, + "step": 224420 + }, + { + "epoch": 1.439424307241376, + "grad_norm": 0.12155324965715408, + "learning_rate": 2.2081800180694135e-06, + "loss": 0.0009, + "step": 224430 + }, + { + "epoch": 1.439488444135162, + "grad_norm": 0.0163084976375103, + "learning_rate": 2.2077157078394244e-06, + "loss": 0.0017, + "step": 224440 + }, + { + "epoch": 1.4395525810289482, + "grad_norm": 0.07152056694030762, + "learning_rate": 2.2072514325987532e-06, + "loss": 0.0009, + "step": 224450 + }, + { + "epoch": 1.4396167179227342, + "grad_norm": 0.03252122923731804, + "learning_rate": 2.2067871923532186e-06, + "loss": 0.0008, + "step": 224460 + }, + { + "epoch": 1.4396808548165203, + "grad_norm": 0.07736662775278091, + "learning_rate": 2.206322987108638e-06, + "loss": 0.0017, + "step": 224470 + }, + { + "epoch": 1.4397449917103065, + "grad_norm": 0.0351140983402729, + "learning_rate": 2.2058588168708257e-06, + "loss": 0.0023, + "step": 224480 + }, + { + "epoch": 1.4398091286040926, + "grad_norm": 0.11911506950855255, + "learning_rate": 2.2053946816455985e-06, + "loss": 0.0012, + "step": 224490 + }, + { + "epoch": 1.4398732654978788, + "grad_norm": 0.11416828632354736, + "learning_rate": 2.2049305814387746e-06, + "loss": 0.0016, + "step": 224500 + }, + { + "epoch": 1.439937402391665, + "grad_norm": 0.08931335061788559, + "learning_rate": 2.204466516256168e-06, + "loss": 0.001, + "step": 224510 + }, + { + "epoch": 1.4400015392854508, + "grad_norm": 0.1183854192495346, + "learning_rate": 2.204002486103594e-06, + "loss": 0.0015, + "step": 224520 + }, + { + "epoch": 1.440065676179237, + "grad_norm": 0.08022509515285492, + "learning_rate": 2.203538490986865e-06, + "loss": 0.0014, + "step": 224530 + }, + { + "epoch": 1.4401298130730231, + "grad_norm": 0.05324234440922737, + "learning_rate": 2.2030745309117995e-06, + "loss": 0.0015, + "step": 224540 + }, + { + "epoch": 1.440193949966809, + "grad_norm": 0.09862230718135834, + "learning_rate": 2.2026106058842094e-06, + "loss": 0.0025, + "step": 224550 + }, + { + "epoch": 1.4402580868605952, + "grad_norm": 0.024814411997795105, + "learning_rate": 2.2021467159099055e-06, + "loss": 0.0012, + "step": 224560 + }, + { + "epoch": 1.4403222237543813, + "grad_norm": 0.07038882374763489, + "learning_rate": 2.2016828609947044e-06, + "loss": 0.0017, + "step": 224570 + }, + { + "epoch": 1.4403863606481675, + "grad_norm": 0.06504993885755539, + "learning_rate": 2.201219041144417e-06, + "loss": 0.0011, + "step": 224580 + }, + { + "epoch": 1.4404504975419536, + "grad_norm": 0.029528573155403137, + "learning_rate": 2.200755256364856e-06, + "loss": 0.0018, + "step": 224590 + }, + { + "epoch": 1.4405146344357396, + "grad_norm": 0.03728478401899338, + "learning_rate": 2.20029150666183e-06, + "loss": 0.001, + "step": 224600 + }, + { + "epoch": 1.4405787713295257, + "grad_norm": 0.0857059583067894, + "learning_rate": 2.1998277920411536e-06, + "loss": 0.0011, + "step": 224610 + }, + { + "epoch": 1.4406429082233119, + "grad_norm": 0.058394331485033035, + "learning_rate": 2.199364112508637e-06, + "loss": 0.0011, + "step": 224620 + }, + { + "epoch": 1.4407070451170978, + "grad_norm": 0.0408020094037056, + "learning_rate": 2.1989004680700893e-06, + "loss": 0.0014, + "step": 224630 + }, + { + "epoch": 1.440771182010884, + "grad_norm": 0.06660662591457367, + "learning_rate": 2.1984368587313186e-06, + "loss": 0.0015, + "step": 224640 + }, + { + "epoch": 1.44083531890467, + "grad_norm": 0.02765144221484661, + "learning_rate": 2.197973284498139e-06, + "loss": 0.0009, + "step": 224650 + }, + { + "epoch": 1.4408994557984562, + "grad_norm": 0.13821285963058472, + "learning_rate": 2.197509745376356e-06, + "loss": 0.0025, + "step": 224660 + }, + { + "epoch": 1.4409635926922424, + "grad_norm": 0.07172411680221558, + "learning_rate": 2.197046241371779e-06, + "loss": 0.0015, + "step": 224670 + }, + { + "epoch": 1.4410277295860285, + "grad_norm": 0.06663408875465393, + "learning_rate": 2.196582772490214e-06, + "loss": 0.0018, + "step": 224680 + }, + { + "epoch": 1.4410918664798145, + "grad_norm": 0.021494129672646523, + "learning_rate": 2.196119338737472e-06, + "loss": 0.0014, + "step": 224690 + }, + { + "epoch": 1.4411560033736006, + "grad_norm": 0.043371740728616714, + "learning_rate": 2.195655940119359e-06, + "loss": 0.0019, + "step": 224700 + }, + { + "epoch": 1.4412201402673868, + "grad_norm": 0.03123489022254944, + "learning_rate": 2.1951925766416795e-06, + "loss": 0.0007, + "step": 224710 + }, + { + "epoch": 1.4412842771611727, + "grad_norm": 0.11745089292526245, + "learning_rate": 2.194729248310243e-06, + "loss": 0.0015, + "step": 224720 + }, + { + "epoch": 1.4413484140549588, + "grad_norm": 0.03425348922610283, + "learning_rate": 2.1942659551308536e-06, + "loss": 0.0017, + "step": 224730 + }, + { + "epoch": 1.441412550948745, + "grad_norm": 0.15983393788337708, + "learning_rate": 2.193802697109318e-06, + "loss": 0.0014, + "step": 224740 + }, + { + "epoch": 1.4414766878425311, + "grad_norm": 0.09651978313922882, + "learning_rate": 2.1933394742514376e-06, + "loss": 0.002, + "step": 224750 + }, + { + "epoch": 1.4415408247363173, + "grad_norm": 0.034692175686359406, + "learning_rate": 2.1928762865630215e-06, + "loss": 0.0017, + "step": 224760 + }, + { + "epoch": 1.4416049616301032, + "grad_norm": 0.0047668395563960075, + "learning_rate": 2.1924131340498716e-06, + "loss": 0.0014, + "step": 224770 + }, + { + "epoch": 1.4416690985238894, + "grad_norm": 0.08082887530326843, + "learning_rate": 2.191950016717792e-06, + "loss": 0.0011, + "step": 224780 + }, + { + "epoch": 1.4417332354176755, + "grad_norm": 0.018363196402788162, + "learning_rate": 2.1914869345725836e-06, + "loss": 0.0028, + "step": 224790 + }, + { + "epoch": 1.4417973723114617, + "grad_norm": 0.0388919971883297, + "learning_rate": 2.1910238876200533e-06, + "loss": 0.0006, + "step": 224800 + }, + { + "epoch": 1.4418615092052476, + "grad_norm": 0.05781109258532524, + "learning_rate": 2.190560875866001e-06, + "loss": 0.001, + "step": 224810 + }, + { + "epoch": 1.4419256460990337, + "grad_norm": 0.04180987924337387, + "learning_rate": 2.1900978993162284e-06, + "loss": 0.0015, + "step": 224820 + }, + { + "epoch": 1.4419897829928199, + "grad_norm": 0.05864508077502251, + "learning_rate": 2.1896349579765356e-06, + "loss": 0.001, + "step": 224830 + }, + { + "epoch": 1.442053919886606, + "grad_norm": 0.045534711331129074, + "learning_rate": 2.1891720518527276e-06, + "loss": 0.0015, + "step": 224840 + }, + { + "epoch": 1.4421180567803922, + "grad_norm": 0.24148043990135193, + "learning_rate": 2.188709180950603e-06, + "loss": 0.0025, + "step": 224850 + }, + { + "epoch": 1.442182193674178, + "grad_norm": 0.05315045267343521, + "learning_rate": 2.188246345275959e-06, + "loss": 0.0023, + "step": 224860 + }, + { + "epoch": 1.4422463305679643, + "grad_norm": 0.039694495499134064, + "learning_rate": 2.1877835448346e-06, + "loss": 0.0027, + "step": 224870 + }, + { + "epoch": 1.4423104674617504, + "grad_norm": 0.12355036288499832, + "learning_rate": 2.1873207796323227e-06, + "loss": 0.0011, + "step": 224880 + }, + { + "epoch": 1.4423746043555363, + "grad_norm": 0.003701229579746723, + "learning_rate": 2.186858049674926e-06, + "loss": 0.0012, + "step": 224890 + }, + { + "epoch": 1.4424387412493225, + "grad_norm": 0.04159437492489815, + "learning_rate": 2.186395354968207e-06, + "loss": 0.001, + "step": 224900 + }, + { + "epoch": 1.4425028781431086, + "grad_norm": 0.041146691888570786, + "learning_rate": 2.185932695517967e-06, + "loss": 0.0014, + "step": 224910 + }, + { + "epoch": 1.4425670150368948, + "grad_norm": 0.08462332934141159, + "learning_rate": 2.1854700713300015e-06, + "loss": 0.0032, + "step": 224920 + }, + { + "epoch": 1.442631151930681, + "grad_norm": 0.10139299184083939, + "learning_rate": 2.1850074824101068e-06, + "loss": 0.0014, + "step": 224930 + }, + { + "epoch": 1.442695288824467, + "grad_norm": 0.25560396909713745, + "learning_rate": 2.184544928764079e-06, + "loss": 0.002, + "step": 224940 + }, + { + "epoch": 1.442759425718253, + "grad_norm": 0.10392890125513077, + "learning_rate": 2.1840824103977175e-06, + "loss": 0.0014, + "step": 224950 + }, + { + "epoch": 1.4428235626120391, + "grad_norm": 0.04784127697348595, + "learning_rate": 2.183619927316816e-06, + "loss": 0.0017, + "step": 224960 + }, + { + "epoch": 1.4428876995058253, + "grad_norm": 0.11029145866632462, + "learning_rate": 2.183157479527169e-06, + "loss": 0.0015, + "step": 224970 + }, + { + "epoch": 1.4429518363996112, + "grad_norm": 0.05119152367115021, + "learning_rate": 2.1826950670345702e-06, + "loss": 0.0017, + "step": 224980 + }, + { + "epoch": 1.4430159732933974, + "grad_norm": 0.1125304326415062, + "learning_rate": 2.1822326898448175e-06, + "loss": 0.0013, + "step": 224990 + }, + { + "epoch": 1.4430801101871835, + "grad_norm": 0.0023442390374839306, + "learning_rate": 2.181770347963704e-06, + "loss": 0.0007, + "step": 225000 + }, + { + "epoch": 1.4431442470809697, + "grad_norm": 0.04524316266179085, + "learning_rate": 2.1813080413970196e-06, + "loss": 0.0007, + "step": 225010 + }, + { + "epoch": 1.4432083839747558, + "grad_norm": 0.07342428714036942, + "learning_rate": 2.1808457701505615e-06, + "loss": 0.0007, + "step": 225020 + }, + { + "epoch": 1.4432725208685417, + "grad_norm": 0.02977115474641323, + "learning_rate": 2.180383534230121e-06, + "loss": 0.0012, + "step": 225030 + }, + { + "epoch": 1.443336657762328, + "grad_norm": 0.29319748282432556, + "learning_rate": 2.1799213336414897e-06, + "loss": 0.0014, + "step": 225040 + }, + { + "epoch": 1.443400794656114, + "grad_norm": 0.10643231123685837, + "learning_rate": 2.1794591683904582e-06, + "loss": 0.0021, + "step": 225050 + }, + { + "epoch": 1.4434649315499002, + "grad_norm": 0.011669746600091457, + "learning_rate": 2.178997038482821e-06, + "loss": 0.0015, + "step": 225060 + }, + { + "epoch": 1.4435290684436861, + "grad_norm": 0.019415656104683876, + "learning_rate": 2.1785349439243664e-06, + "loss": 0.0008, + "step": 225070 + }, + { + "epoch": 1.4435932053374723, + "grad_norm": 0.09339431673288345, + "learning_rate": 2.178072884720886e-06, + "loss": 0.0009, + "step": 225080 + }, + { + "epoch": 1.4436573422312584, + "grad_norm": 0.03157876431941986, + "learning_rate": 2.177610860878167e-06, + "loss": 0.0009, + "step": 225090 + }, + { + "epoch": 1.4437214791250446, + "grad_norm": 0.07756247371435165, + "learning_rate": 2.1771488724020037e-06, + "loss": 0.0015, + "step": 225100 + }, + { + "epoch": 1.4437856160188307, + "grad_norm": 0.07325758785009384, + "learning_rate": 2.1766869192981814e-06, + "loss": 0.0023, + "step": 225110 + }, + { + "epoch": 1.4438497529126166, + "grad_norm": 0.03857431933283806, + "learning_rate": 2.17622500157249e-06, + "loss": 0.0007, + "step": 225120 + }, + { + "epoch": 1.4439138898064028, + "grad_norm": 0.001007712329737842, + "learning_rate": 2.1757631192307162e-06, + "loss": 0.0013, + "step": 225130 + }, + { + "epoch": 1.443978026700189, + "grad_norm": 0.042061109095811844, + "learning_rate": 2.1753012722786502e-06, + "loss": 0.0009, + "step": 225140 + }, + { + "epoch": 1.4440421635939749, + "grad_norm": 0.062346864491701126, + "learning_rate": 2.1748394607220786e-06, + "loss": 0.0012, + "step": 225150 + }, + { + "epoch": 1.444106300487761, + "grad_norm": 0.053323470056056976, + "learning_rate": 2.174377684566785e-06, + "loss": 0.0015, + "step": 225160 + }, + { + "epoch": 1.4441704373815472, + "grad_norm": 0.04464549198746681, + "learning_rate": 2.1739159438185608e-06, + "loss": 0.0024, + "step": 225170 + }, + { + "epoch": 1.4442345742753333, + "grad_norm": 0.034703902900218964, + "learning_rate": 2.1734542384831898e-06, + "loss": 0.0043, + "step": 225180 + }, + { + "epoch": 1.4442987111691195, + "grad_norm": 0.022805416956543922, + "learning_rate": 2.172992568566457e-06, + "loss": 0.0015, + "step": 225190 + }, + { + "epoch": 1.4443628480629056, + "grad_norm": 0.0011144662275910378, + "learning_rate": 2.1725309340741464e-06, + "loss": 0.0011, + "step": 225200 + }, + { + "epoch": 1.4444269849566915, + "grad_norm": 0.09161978960037231, + "learning_rate": 2.1720693350120433e-06, + "loss": 0.002, + "step": 225210 + }, + { + "epoch": 1.4444911218504777, + "grad_norm": 0.03671182692050934, + "learning_rate": 2.1716077713859345e-06, + "loss": 0.0019, + "step": 225220 + }, + { + "epoch": 1.4445552587442638, + "grad_norm": 0.14577430486679077, + "learning_rate": 2.171146243201602e-06, + "loss": 0.0022, + "step": 225230 + }, + { + "epoch": 1.4446193956380498, + "grad_norm": 0.055364321917295456, + "learning_rate": 2.1706847504648286e-06, + "loss": 0.0016, + "step": 225240 + }, + { + "epoch": 1.444683532531836, + "grad_norm": 0.016351599246263504, + "learning_rate": 2.1702232931813956e-06, + "loss": 0.0011, + "step": 225250 + }, + { + "epoch": 1.444747669425622, + "grad_norm": 0.04354793205857277, + "learning_rate": 2.169761871357089e-06, + "loss": 0.0011, + "step": 225260 + }, + { + "epoch": 1.4448118063194082, + "grad_norm": 0.04667946696281433, + "learning_rate": 2.169300484997689e-06, + "loss": 0.0007, + "step": 225270 + }, + { + "epoch": 1.4448759432131943, + "grad_norm": 0.039875585585832596, + "learning_rate": 2.1688391341089748e-06, + "loss": 0.001, + "step": 225280 + }, + { + "epoch": 1.4449400801069803, + "grad_norm": 0.1300768107175827, + "learning_rate": 2.168377818696732e-06, + "loss": 0.0021, + "step": 225290 + }, + { + "epoch": 1.4450042170007664, + "grad_norm": 0.07227897644042969, + "learning_rate": 2.1679165387667386e-06, + "loss": 0.0008, + "step": 225300 + }, + { + "epoch": 1.4450683538945526, + "grad_norm": 0.07283520698547363, + "learning_rate": 2.167455294324775e-06, + "loss": 0.0011, + "step": 225310 + }, + { + "epoch": 1.4451324907883385, + "grad_norm": 0.011886750347912312, + "learning_rate": 2.166994085376619e-06, + "loss": 0.0006, + "step": 225320 + }, + { + "epoch": 1.4451966276821246, + "grad_norm": 0.030358009040355682, + "learning_rate": 2.1665329119280537e-06, + "loss": 0.0015, + "step": 225330 + }, + { + "epoch": 1.4452607645759108, + "grad_norm": 0.0716029554605484, + "learning_rate": 2.1660717739848565e-06, + "loss": 0.0017, + "step": 225340 + }, + { + "epoch": 1.445324901469697, + "grad_norm": 0.0746510773897171, + "learning_rate": 2.1656106715528045e-06, + "loss": 0.0025, + "step": 225350 + }, + { + "epoch": 1.445389038363483, + "grad_norm": 0.0027565364725887775, + "learning_rate": 2.165149604637676e-06, + "loss": 0.0017, + "step": 225360 + }, + { + "epoch": 1.4454531752572692, + "grad_norm": 0.06034074351191521, + "learning_rate": 2.1646885732452492e-06, + "loss": 0.0007, + "step": 225370 + }, + { + "epoch": 1.4455173121510552, + "grad_norm": 0.06511419266462326, + "learning_rate": 2.164227577381302e-06, + "loss": 0.0017, + "step": 225380 + }, + { + "epoch": 1.4455814490448413, + "grad_norm": 0.03042871318757534, + "learning_rate": 2.1637666170516096e-06, + "loss": 0.0006, + "step": 225390 + }, + { + "epoch": 1.4456455859386275, + "grad_norm": 0.13117969036102295, + "learning_rate": 2.163305692261947e-06, + "loss": 0.0017, + "step": 225400 + }, + { + "epoch": 1.4457097228324134, + "grad_norm": 0.14985649287700653, + "learning_rate": 2.1628448030180933e-06, + "loss": 0.0012, + "step": 225410 + }, + { + "epoch": 1.4457738597261995, + "grad_norm": 0.004519871901720762, + "learning_rate": 2.1623839493258213e-06, + "loss": 0.0013, + "step": 225420 + }, + { + "epoch": 1.4458379966199857, + "grad_norm": 0.0251761544495821, + "learning_rate": 2.161923131190905e-06, + "loss": 0.0006, + "step": 225430 + }, + { + "epoch": 1.4459021335137718, + "grad_norm": 0.05949077755212784, + "learning_rate": 2.161462348619122e-06, + "loss": 0.002, + "step": 225440 + }, + { + "epoch": 1.445966270407558, + "grad_norm": 0.04768809303641319, + "learning_rate": 2.1610016016162443e-06, + "loss": 0.0011, + "step": 225450 + }, + { + "epoch": 1.446030407301344, + "grad_norm": 0.005062917247414589, + "learning_rate": 2.1605408901880458e-06, + "loss": 0.0007, + "step": 225460 + }, + { + "epoch": 1.44609454419513, + "grad_norm": 0.10849764943122864, + "learning_rate": 2.160080214340297e-06, + "loss": 0.0016, + "step": 225470 + }, + { + "epoch": 1.4461586810889162, + "grad_norm": 0.01204383559525013, + "learning_rate": 2.159619574078775e-06, + "loss": 0.0015, + "step": 225480 + }, + { + "epoch": 1.4462228179827024, + "grad_norm": 0.03433408588171005, + "learning_rate": 2.1591589694092496e-06, + "loss": 0.0012, + "step": 225490 + }, + { + "epoch": 1.4462869548764883, + "grad_norm": 0.12959915399551392, + "learning_rate": 2.158698400337493e-06, + "loss": 0.0008, + "step": 225500 + }, + { + "epoch": 1.4463510917702744, + "grad_norm": 0.015933355316519737, + "learning_rate": 2.1582378668692738e-06, + "loss": 0.0009, + "step": 225510 + }, + { + "epoch": 1.4464152286640606, + "grad_norm": 0.005979140289127827, + "learning_rate": 2.157777369010367e-06, + "loss": 0.0022, + "step": 225520 + }, + { + "epoch": 1.4464793655578467, + "grad_norm": 0.08340150117874146, + "learning_rate": 2.1573169067665413e-06, + "loss": 0.0019, + "step": 225530 + }, + { + "epoch": 1.4465435024516329, + "grad_norm": 0.01124319713562727, + "learning_rate": 2.156856480143566e-06, + "loss": 0.0015, + "step": 225540 + }, + { + "epoch": 1.4466076393454188, + "grad_norm": 0.05296528711915016, + "learning_rate": 2.1563960891472096e-06, + "loss": 0.0006, + "step": 225550 + }, + { + "epoch": 1.446671776239205, + "grad_norm": 0.06643268465995789, + "learning_rate": 2.1559357337832436e-06, + "loss": 0.0009, + "step": 225560 + }, + { + "epoch": 1.446735913132991, + "grad_norm": 0.06128658354282379, + "learning_rate": 2.1554754140574364e-06, + "loss": 0.0008, + "step": 225570 + }, + { + "epoch": 1.446800050026777, + "grad_norm": 0.049856431782245636, + "learning_rate": 2.1550151299755527e-06, + "loss": 0.0012, + "step": 225580 + }, + { + "epoch": 1.4468641869205632, + "grad_norm": 0.07697924226522446, + "learning_rate": 2.1545548815433647e-06, + "loss": 0.0013, + "step": 225590 + }, + { + "epoch": 1.4469283238143493, + "grad_norm": 0.05229620262980461, + "learning_rate": 2.154094668766638e-06, + "loss": 0.0012, + "step": 225600 + }, + { + "epoch": 1.4469924607081355, + "grad_norm": 0.07874248921871185, + "learning_rate": 2.1536344916511387e-06, + "loss": 0.0022, + "step": 225610 + }, + { + "epoch": 1.4470565976019216, + "grad_norm": 0.03423517569899559, + "learning_rate": 2.1531743502026318e-06, + "loss": 0.0009, + "step": 225620 + }, + { + "epoch": 1.4471207344957078, + "grad_norm": 0.21702076494693756, + "learning_rate": 2.1527142444268866e-06, + "loss": 0.0015, + "step": 225630 + }, + { + "epoch": 1.4471848713894937, + "grad_norm": 0.0025034372229129076, + "learning_rate": 2.152254174329667e-06, + "loss": 0.002, + "step": 225640 + }, + { + "epoch": 1.4472490082832798, + "grad_norm": 0.0951765701174736, + "learning_rate": 2.1517941399167373e-06, + "loss": 0.0017, + "step": 225650 + }, + { + "epoch": 1.447313145177066, + "grad_norm": 0.01177617721259594, + "learning_rate": 2.1513341411938614e-06, + "loss": 0.0009, + "step": 225660 + }, + { + "epoch": 1.447377282070852, + "grad_norm": 0.04554301127791405, + "learning_rate": 2.1508741781668064e-06, + "loss": 0.0012, + "step": 225670 + }, + { + "epoch": 1.447441418964638, + "grad_norm": 0.07917334139347076, + "learning_rate": 2.150414250841334e-06, + "loss": 0.0012, + "step": 225680 + }, + { + "epoch": 1.4475055558584242, + "grad_norm": 0.21603022515773773, + "learning_rate": 2.1499543592232075e-06, + "loss": 0.001, + "step": 225690 + }, + { + "epoch": 1.4475696927522104, + "grad_norm": 0.2080763280391693, + "learning_rate": 2.149494503318188e-06, + "loss": 0.0013, + "step": 225700 + }, + { + "epoch": 1.4476338296459965, + "grad_norm": 0.12338167428970337, + "learning_rate": 2.149034683132042e-06, + "loss": 0.002, + "step": 225710 + }, + { + "epoch": 1.4476979665397824, + "grad_norm": 0.06685017049312592, + "learning_rate": 2.1485748986705285e-06, + "loss": 0.0005, + "step": 225720 + }, + { + "epoch": 1.4477621034335686, + "grad_norm": 0.17813804745674133, + "learning_rate": 2.148115149939407e-06, + "loss": 0.001, + "step": 225730 + }, + { + "epoch": 1.4478262403273547, + "grad_norm": 0.11285127699375153, + "learning_rate": 2.1476554369444438e-06, + "loss": 0.001, + "step": 225740 + }, + { + "epoch": 1.4478903772211407, + "grad_norm": 0.05985259637236595, + "learning_rate": 2.1471957596913957e-06, + "loss": 0.0012, + "step": 225750 + }, + { + "epoch": 1.4479545141149268, + "grad_norm": 0.06689460575580597, + "learning_rate": 2.146736118186024e-06, + "loss": 0.0015, + "step": 225760 + }, + { + "epoch": 1.448018651008713, + "grad_norm": 0.05374080315232277, + "learning_rate": 2.146276512434086e-06, + "loss": 0.0011, + "step": 225770 + }, + { + "epoch": 1.4480827879024991, + "grad_norm": 0.003331320360302925, + "learning_rate": 2.1458169424413445e-06, + "loss": 0.0006, + "step": 225780 + }, + { + "epoch": 1.4481469247962853, + "grad_norm": 0.03331978991627693, + "learning_rate": 2.145357408213557e-06, + "loss": 0.0009, + "step": 225790 + }, + { + "epoch": 1.4482110616900714, + "grad_norm": 0.18725016713142395, + "learning_rate": 2.144897909756481e-06, + "loss": 0.0011, + "step": 225800 + }, + { + "epoch": 1.4482751985838573, + "grad_norm": 0.1007186695933342, + "learning_rate": 2.144438447075873e-06, + "loss": 0.0016, + "step": 225810 + }, + { + "epoch": 1.4483393354776435, + "grad_norm": 0.02833637408912182, + "learning_rate": 2.143979020177494e-06, + "loss": 0.0011, + "step": 225820 + }, + { + "epoch": 1.4484034723714296, + "grad_norm": 0.06066592410206795, + "learning_rate": 2.143519629067099e-06, + "loss": 0.0019, + "step": 225830 + }, + { + "epoch": 1.4484676092652156, + "grad_norm": 0.1573522984981537, + "learning_rate": 2.143060273750444e-06, + "loss": 0.0018, + "step": 225840 + }, + { + "epoch": 1.4485317461590017, + "grad_norm": 0.045502424240112305, + "learning_rate": 2.142600954233284e-06, + "loss": 0.0022, + "step": 225850 + }, + { + "epoch": 1.4485958830527879, + "grad_norm": 0.0707055926322937, + "learning_rate": 2.142141670521379e-06, + "loss": 0.0008, + "step": 225860 + }, + { + "epoch": 1.448660019946574, + "grad_norm": 0.22369283437728882, + "learning_rate": 2.14168242262048e-06, + "loss": 0.0022, + "step": 225870 + }, + { + "epoch": 1.4487241568403602, + "grad_norm": 0.071811243891716, + "learning_rate": 2.141223210536342e-06, + "loss": 0.0015, + "step": 225880 + }, + { + "epoch": 1.448788293734146, + "grad_norm": 0.03724861890077591, + "learning_rate": 2.140764034274722e-06, + "loss": 0.0012, + "step": 225890 + }, + { + "epoch": 1.4488524306279322, + "grad_norm": 0.11753320693969727, + "learning_rate": 2.140304893841372e-06, + "loss": 0.0019, + "step": 225900 + }, + { + "epoch": 1.4489165675217184, + "grad_norm": 0.04428223520517349, + "learning_rate": 2.1398457892420454e-06, + "loss": 0.0011, + "step": 225910 + }, + { + "epoch": 1.4489807044155045, + "grad_norm": 0.05743437632918358, + "learning_rate": 2.1393867204824936e-06, + "loss": 0.001, + "step": 225920 + }, + { + "epoch": 1.4490448413092905, + "grad_norm": 0.08938732743263245, + "learning_rate": 2.1389276875684727e-06, + "loss": 0.0011, + "step": 225930 + }, + { + "epoch": 1.4491089782030766, + "grad_norm": 0.00183452432975173, + "learning_rate": 2.138468690505732e-06, + "loss": 0.0018, + "step": 225940 + }, + { + "epoch": 1.4491731150968628, + "grad_norm": 0.12324772030115128, + "learning_rate": 2.138009729300024e-06, + "loss": 0.0023, + "step": 225950 + }, + { + "epoch": 1.449237251990649, + "grad_norm": 0.002847007242962718, + "learning_rate": 2.137550803957098e-06, + "loss": 0.0008, + "step": 225960 + }, + { + "epoch": 1.449301388884435, + "grad_norm": 0.13642844557762146, + "learning_rate": 2.137091914482706e-06, + "loss": 0.0009, + "step": 225970 + }, + { + "epoch": 1.449365525778221, + "grad_norm": 0.01883120648562908, + "learning_rate": 2.1366330608826004e-06, + "loss": 0.0011, + "step": 225980 + }, + { + "epoch": 1.4494296626720071, + "grad_norm": 0.04838910698890686, + "learning_rate": 2.136174243162528e-06, + "loss": 0.0016, + "step": 225990 + }, + { + "epoch": 1.4494937995657933, + "grad_norm": 0.14629925787448883, + "learning_rate": 2.135715461328238e-06, + "loss": 0.0023, + "step": 226000 + }, + { + "epoch": 1.4495579364595792, + "grad_norm": 0.17296472191810608, + "learning_rate": 2.1352567153854825e-06, + "loss": 0.0011, + "step": 226010 + }, + { + "epoch": 1.4496220733533653, + "grad_norm": 0.10599583387374878, + "learning_rate": 2.1347980053400076e-06, + "loss": 0.001, + "step": 226020 + }, + { + "epoch": 1.4496862102471515, + "grad_norm": 0.11168216913938522, + "learning_rate": 2.1343393311975612e-06, + "loss": 0.0016, + "step": 226030 + }, + { + "epoch": 1.4497503471409376, + "grad_norm": 0.155610591173172, + "learning_rate": 2.133880692963889e-06, + "loss": 0.0019, + "step": 226040 + }, + { + "epoch": 1.4498144840347238, + "grad_norm": 0.03576747328042984, + "learning_rate": 2.133422090644742e-06, + "loss": 0.0008, + "step": 226050 + }, + { + "epoch": 1.44987862092851, + "grad_norm": 0.06736559420824051, + "learning_rate": 2.1329635242458653e-06, + "loss": 0.0013, + "step": 226060 + }, + { + "epoch": 1.4499427578222959, + "grad_norm": 0.04706592112779617, + "learning_rate": 2.1325049937730045e-06, + "loss": 0.0007, + "step": 226070 + }, + { + "epoch": 1.450006894716082, + "grad_norm": 0.16581174731254578, + "learning_rate": 2.1320464992319035e-06, + "loss": 0.002, + "step": 226080 + }, + { + "epoch": 1.4500710316098682, + "grad_norm": 0.1650496870279312, + "learning_rate": 2.131588040628312e-06, + "loss": 0.0014, + "step": 226090 + }, + { + "epoch": 1.450135168503654, + "grad_norm": 0.19719888269901276, + "learning_rate": 2.1311296179679715e-06, + "loss": 0.0007, + "step": 226100 + }, + { + "epoch": 1.4501993053974402, + "grad_norm": 0.02815418690443039, + "learning_rate": 2.1306712312566257e-06, + "loss": 0.0011, + "step": 226110 + }, + { + "epoch": 1.4502634422912264, + "grad_norm": 0.05859525874257088, + "learning_rate": 2.1302128805000223e-06, + "loss": 0.0011, + "step": 226120 + }, + { + "epoch": 1.4503275791850125, + "grad_norm": 0.118864044547081, + "learning_rate": 2.129754565703902e-06, + "loss": 0.0025, + "step": 226130 + }, + { + "epoch": 1.4503917160787987, + "grad_norm": 0.09736819565296173, + "learning_rate": 2.1292962868740084e-06, + "loss": 0.0036, + "step": 226140 + }, + { + "epoch": 1.4504558529725846, + "grad_norm": 0.1656569093465805, + "learning_rate": 2.128838044016083e-06, + "loss": 0.0022, + "step": 226150 + }, + { + "epoch": 1.4505199898663708, + "grad_norm": 0.0474659688770771, + "learning_rate": 2.1283798371358694e-06, + "loss": 0.0007, + "step": 226160 + }, + { + "epoch": 1.450584126760157, + "grad_norm": 0.0348595455288887, + "learning_rate": 2.1279216662391094e-06, + "loss": 0.0005, + "step": 226170 + }, + { + "epoch": 1.4506482636539428, + "grad_norm": 0.09374228119850159, + "learning_rate": 2.1274635313315434e-06, + "loss": 0.0015, + "step": 226180 + }, + { + "epoch": 1.450712400547729, + "grad_norm": 0.17273084819316864, + "learning_rate": 2.127005432418911e-06, + "loss": 0.0015, + "step": 226190 + }, + { + "epoch": 1.4507765374415151, + "grad_norm": 0.24151749908924103, + "learning_rate": 2.126547369506955e-06, + "loss": 0.002, + "step": 226200 + }, + { + "epoch": 1.4508406743353013, + "grad_norm": 0.010909780859947205, + "learning_rate": 2.1260893426014145e-06, + "loss": 0.0003, + "step": 226210 + }, + { + "epoch": 1.4509048112290874, + "grad_norm": 0.09100042283535004, + "learning_rate": 2.1256313517080284e-06, + "loss": 0.0007, + "step": 226220 + }, + { + "epoch": 1.4509689481228736, + "grad_norm": 0.12479095906019211, + "learning_rate": 2.125173396832534e-06, + "loss": 0.0024, + "step": 226230 + }, + { + "epoch": 1.4510330850166595, + "grad_norm": 0.1252729892730713, + "learning_rate": 2.1247154779806726e-06, + "loss": 0.0007, + "step": 226240 + }, + { + "epoch": 1.4510972219104457, + "grad_norm": 0.05272957310080528, + "learning_rate": 2.1242575951581823e-06, + "loss": 0.0021, + "step": 226250 + }, + { + "epoch": 1.4511613588042318, + "grad_norm": 0.01092084962874651, + "learning_rate": 2.1237997483707974e-06, + "loss": 0.0016, + "step": 226260 + }, + { + "epoch": 1.4512254956980177, + "grad_norm": 0.09231545776128769, + "learning_rate": 2.1233419376242587e-06, + "loss": 0.0018, + "step": 226270 + }, + { + "epoch": 1.4512896325918039, + "grad_norm": 0.08255818486213684, + "learning_rate": 2.122884162924302e-06, + "loss": 0.0039, + "step": 226280 + }, + { + "epoch": 1.45135376948559, + "grad_norm": 0.050203077495098114, + "learning_rate": 2.1224264242766625e-06, + "loss": 0.001, + "step": 226290 + }, + { + "epoch": 1.4514179063793762, + "grad_norm": 0.13449114561080933, + "learning_rate": 2.1219687216870747e-06, + "loss": 0.0025, + "step": 226300 + }, + { + "epoch": 1.4514820432731623, + "grad_norm": 0.047507259994745255, + "learning_rate": 2.121511055161278e-06, + "loss": 0.0008, + "step": 226310 + }, + { + "epoch": 1.4515461801669483, + "grad_norm": 0.22218233346939087, + "learning_rate": 2.1210534247050045e-06, + "loss": 0.0017, + "step": 226320 + }, + { + "epoch": 1.4516103170607344, + "grad_norm": 0.09313784539699554, + "learning_rate": 2.1205958303239892e-06, + "loss": 0.0026, + "step": 226330 + }, + { + "epoch": 1.4516744539545205, + "grad_norm": 0.25622984766960144, + "learning_rate": 2.1201382720239644e-06, + "loss": 0.0022, + "step": 226340 + }, + { + "epoch": 1.4517385908483067, + "grad_norm": 0.08348685503005981, + "learning_rate": 2.119680749810667e-06, + "loss": 0.0006, + "step": 226350 + }, + { + "epoch": 1.4518027277420926, + "grad_norm": 0.007414282765239477, + "learning_rate": 2.1192232636898285e-06, + "loss": 0.0028, + "step": 226360 + }, + { + "epoch": 1.4518668646358788, + "grad_norm": 0.06608813256025314, + "learning_rate": 2.118765813667181e-06, + "loss": 0.0006, + "step": 226370 + }, + { + "epoch": 1.451931001529665, + "grad_norm": 0.07415993511676788, + "learning_rate": 2.1183083997484554e-06, + "loss": 0.001, + "step": 226380 + }, + { + "epoch": 1.451995138423451, + "grad_norm": 0.09311382472515106, + "learning_rate": 2.1178510219393873e-06, + "loss": 0.0017, + "step": 226390 + }, + { + "epoch": 1.4520592753172372, + "grad_norm": 0.04318315535783768, + "learning_rate": 2.117393680245705e-06, + "loss": 0.0015, + "step": 226400 + }, + { + "epoch": 1.4521234122110231, + "grad_norm": 0.028543667867779732, + "learning_rate": 2.116936374673139e-06, + "loss": 0.0005, + "step": 226410 + }, + { + "epoch": 1.4521875491048093, + "grad_norm": 0.0671742632985115, + "learning_rate": 2.116479105227422e-06, + "loss": 0.0019, + "step": 226420 + }, + { + "epoch": 1.4522516859985954, + "grad_norm": 0.07890462130308151, + "learning_rate": 2.1160218719142827e-06, + "loss": 0.0014, + "step": 226430 + }, + { + "epoch": 1.4523158228923814, + "grad_norm": 0.08221903443336487, + "learning_rate": 2.1155646747394505e-06, + "loss": 0.0016, + "step": 226440 + }, + { + "epoch": 1.4523799597861675, + "grad_norm": 0.2833114564418793, + "learning_rate": 2.1151075137086522e-06, + "loss": 0.0014, + "step": 226450 + }, + { + "epoch": 1.4524440966799537, + "grad_norm": 0.11079540848731995, + "learning_rate": 2.1146503888276214e-06, + "loss": 0.0016, + "step": 226460 + }, + { + "epoch": 1.4525082335737398, + "grad_norm": 0.18482473492622375, + "learning_rate": 2.1141933001020826e-06, + "loss": 0.0015, + "step": 226470 + }, + { + "epoch": 1.452572370467526, + "grad_norm": 0.03776835277676582, + "learning_rate": 2.1137362475377643e-06, + "loss": 0.0018, + "step": 226480 + }, + { + "epoch": 1.4526365073613121, + "grad_norm": 0.018275147303938866, + "learning_rate": 2.1132792311403922e-06, + "loss": 0.0011, + "step": 226490 + }, + { + "epoch": 1.452700644255098, + "grad_norm": 0.01807580702006817, + "learning_rate": 2.112822250915696e-06, + "loss": 0.0009, + "step": 226500 + }, + { + "epoch": 1.4527647811488842, + "grad_norm": 0.05914665013551712, + "learning_rate": 2.1123653068694e-06, + "loss": 0.0009, + "step": 226510 + }, + { + "epoch": 1.4528289180426703, + "grad_norm": 0.006592872552573681, + "learning_rate": 2.111908399007231e-06, + "loss": 0.001, + "step": 226520 + }, + { + "epoch": 1.4528930549364563, + "grad_norm": 0.04994434863328934, + "learning_rate": 2.111451527334912e-06, + "loss": 0.001, + "step": 226530 + }, + { + "epoch": 1.4529571918302424, + "grad_norm": 0.04584444686770439, + "learning_rate": 2.1109946918581715e-06, + "loss": 0.0011, + "step": 226540 + }, + { + "epoch": 1.4530213287240286, + "grad_norm": 0.0671549141407013, + "learning_rate": 2.1105378925827317e-06, + "loss": 0.0015, + "step": 226550 + }, + { + "epoch": 1.4530854656178147, + "grad_norm": 0.05410590395331383, + "learning_rate": 2.110081129514316e-06, + "loss": 0.0008, + "step": 226560 + }, + { + "epoch": 1.4531496025116009, + "grad_norm": 0.09354747831821442, + "learning_rate": 2.109624402658651e-06, + "loss": 0.0014, + "step": 226570 + }, + { + "epoch": 1.4532137394053868, + "grad_norm": 0.0746600553393364, + "learning_rate": 2.1091677120214577e-06, + "loss": 0.0013, + "step": 226580 + }, + { + "epoch": 1.453277876299173, + "grad_norm": 0.08220326155424118, + "learning_rate": 2.108711057608459e-06, + "loss": 0.0013, + "step": 226590 + }, + { + "epoch": 1.453342013192959, + "grad_norm": 0.09785089641809464, + "learning_rate": 2.108254439425375e-06, + "loss": 0.0017, + "step": 226600 + }, + { + "epoch": 1.4534061500867452, + "grad_norm": 0.016085678711533546, + "learning_rate": 2.107797857477932e-06, + "loss": 0.0009, + "step": 226610 + }, + { + "epoch": 1.4534702869805312, + "grad_norm": 0.11783857643604279, + "learning_rate": 2.1073413117718488e-06, + "loss": 0.0009, + "step": 226620 + }, + { + "epoch": 1.4535344238743173, + "grad_norm": 0.2694298326969147, + "learning_rate": 2.1068848023128464e-06, + "loss": 0.0016, + "step": 226630 + }, + { + "epoch": 1.4535985607681035, + "grad_norm": 0.10376250743865967, + "learning_rate": 2.1064283291066427e-06, + "loss": 0.0018, + "step": 226640 + }, + { + "epoch": 1.4536626976618896, + "grad_norm": 0.030363505706191063, + "learning_rate": 2.105971892158962e-06, + "loss": 0.0014, + "step": 226650 + }, + { + "epoch": 1.4537268345556758, + "grad_norm": 0.020838048309087753, + "learning_rate": 2.105515491475522e-06, + "loss": 0.0019, + "step": 226660 + }, + { + "epoch": 1.4537909714494617, + "grad_norm": 0.08939540386199951, + "learning_rate": 2.105059127062042e-06, + "loss": 0.0018, + "step": 226670 + }, + { + "epoch": 1.4538551083432478, + "grad_norm": 0.1325758397579193, + "learning_rate": 2.104602798924238e-06, + "loss": 0.0007, + "step": 226680 + }, + { + "epoch": 1.453919245237034, + "grad_norm": 0.060680270195007324, + "learning_rate": 2.104146507067832e-06, + "loss": 0.0013, + "step": 226690 + }, + { + "epoch": 1.45398338213082, + "grad_norm": 0.08166959881782532, + "learning_rate": 2.1036902514985396e-06, + "loss": 0.0011, + "step": 226700 + }, + { + "epoch": 1.454047519024606, + "grad_norm": 0.15600843727588654, + "learning_rate": 2.1032340322220766e-06, + "loss": 0.0009, + "step": 226710 + }, + { + "epoch": 1.4541116559183922, + "grad_norm": 0.006155435461550951, + "learning_rate": 2.1027778492441636e-06, + "loss": 0.0009, + "step": 226720 + }, + { + "epoch": 1.4541757928121783, + "grad_norm": 0.03900561481714249, + "learning_rate": 2.1023217025705143e-06, + "loss": 0.0013, + "step": 226730 + }, + { + "epoch": 1.4542399297059645, + "grad_norm": 0.04580393061041832, + "learning_rate": 2.1018655922068436e-06, + "loss": 0.0013, + "step": 226740 + }, + { + "epoch": 1.4543040665997506, + "grad_norm": 0.00805017538368702, + "learning_rate": 2.1014095181588703e-06, + "loss": 0.0011, + "step": 226750 + }, + { + "epoch": 1.4543682034935366, + "grad_norm": 0.10442374646663666, + "learning_rate": 2.1009534804323045e-06, + "loss": 0.0007, + "step": 226760 + }, + { + "epoch": 1.4544323403873227, + "grad_norm": 0.15928815305233002, + "learning_rate": 2.1004974790328665e-06, + "loss": 0.0007, + "step": 226770 + }, + { + "epoch": 1.4544964772811089, + "grad_norm": 0.010206308215856552, + "learning_rate": 2.1000415139662667e-06, + "loss": 0.0016, + "step": 226780 + }, + { + "epoch": 1.4545606141748948, + "grad_norm": 0.25000104308128357, + "learning_rate": 2.099585585238219e-06, + "loss": 0.0015, + "step": 226790 + }, + { + "epoch": 1.454624751068681, + "grad_norm": 0.05201121047139168, + "learning_rate": 2.0991296928544357e-06, + "loss": 0.001, + "step": 226800 + }, + { + "epoch": 1.454688887962467, + "grad_norm": 0.010511612519621849, + "learning_rate": 2.098673836820632e-06, + "loss": 0.0006, + "step": 226810 + }, + { + "epoch": 1.4547530248562532, + "grad_norm": 0.08724335581064224, + "learning_rate": 2.098218017142519e-06, + "loss": 0.0024, + "step": 226820 + }, + { + "epoch": 1.4548171617500394, + "grad_norm": 0.14763133227825165, + "learning_rate": 2.0977622338258056e-06, + "loss": 0.0009, + "step": 226830 + }, + { + "epoch": 1.4548812986438253, + "grad_norm": 0.17446410655975342, + "learning_rate": 2.0973064868762084e-06, + "loss": 0.0012, + "step": 226840 + }, + { + "epoch": 1.4549454355376115, + "grad_norm": 0.028958257287740707, + "learning_rate": 2.096850776299435e-06, + "loss": 0.0014, + "step": 226850 + }, + { + "epoch": 1.4550095724313976, + "grad_norm": 0.1068197637796402, + "learning_rate": 2.0963951021011965e-06, + "loss": 0.0013, + "step": 226860 + }, + { + "epoch": 1.4550737093251835, + "grad_norm": 0.016174737364053726, + "learning_rate": 2.0959394642871998e-06, + "loss": 0.0008, + "step": 226870 + }, + { + "epoch": 1.4551378462189697, + "grad_norm": 0.07588784396648407, + "learning_rate": 2.09548386286316e-06, + "loss": 0.0029, + "step": 226880 + }, + { + "epoch": 1.4552019831127558, + "grad_norm": 0.02686605043709278, + "learning_rate": 2.0950282978347826e-06, + "loss": 0.0011, + "step": 226890 + }, + { + "epoch": 1.455266120006542, + "grad_norm": 0.05538346245884895, + "learning_rate": 2.094572769207777e-06, + "loss": 0.0007, + "step": 226900 + }, + { + "epoch": 1.4553302569003281, + "grad_norm": 0.08940229564905167, + "learning_rate": 2.094117276987849e-06, + "loss": 0.0012, + "step": 226910 + }, + { + "epoch": 1.4553943937941143, + "grad_norm": 0.022856472060084343, + "learning_rate": 2.09366182118071e-06, + "loss": 0.0007, + "step": 226920 + }, + { + "epoch": 1.4554585306879002, + "grad_norm": 0.0348527617752552, + "learning_rate": 2.093206401792066e-06, + "loss": 0.0011, + "step": 226930 + }, + { + "epoch": 1.4555226675816864, + "grad_norm": 0.13491670787334442, + "learning_rate": 2.0927510188276233e-06, + "loss": 0.0017, + "step": 226940 + }, + { + "epoch": 1.4555868044754725, + "grad_norm": 0.05308113247156143, + "learning_rate": 2.092295672293086e-06, + "loss": 0.0017, + "step": 226950 + }, + { + "epoch": 1.4556509413692584, + "grad_norm": 0.06905005872249603, + "learning_rate": 2.091840362194164e-06, + "loss": 0.0014, + "step": 226960 + }, + { + "epoch": 1.4557150782630446, + "grad_norm": 0.2603563666343689, + "learning_rate": 2.0913850885365603e-06, + "loss": 0.0007, + "step": 226970 + }, + { + "epoch": 1.4557792151568307, + "grad_norm": 0.05765556916594505, + "learning_rate": 2.090929851325979e-06, + "loss": 0.0011, + "step": 226980 + }, + { + "epoch": 1.4558433520506169, + "grad_norm": 0.06922811269760132, + "learning_rate": 2.0904746505681273e-06, + "loss": 0.0013, + "step": 226990 + }, + { + "epoch": 1.455907488944403, + "grad_norm": 0.044415734708309174, + "learning_rate": 2.0900194862687077e-06, + "loss": 0.0007, + "step": 227000 + }, + { + "epoch": 1.455971625838189, + "grad_norm": 0.1271929293870926, + "learning_rate": 2.0895643584334233e-06, + "loss": 0.0015, + "step": 227010 + }, + { + "epoch": 1.456035762731975, + "grad_norm": 0.058753691613674164, + "learning_rate": 2.0891092670679764e-06, + "loss": 0.0005, + "step": 227020 + }, + { + "epoch": 1.4560998996257613, + "grad_norm": 0.0076009416952729225, + "learning_rate": 2.088654212178072e-06, + "loss": 0.001, + "step": 227030 + }, + { + "epoch": 1.4561640365195474, + "grad_norm": 0.06783096492290497, + "learning_rate": 2.0881991937694114e-06, + "loss": 0.001, + "step": 227040 + }, + { + "epoch": 1.4562281734133333, + "grad_norm": 0.03720617666840553, + "learning_rate": 2.0877442118476952e-06, + "loss": 0.0014, + "step": 227050 + }, + { + "epoch": 1.4562923103071195, + "grad_norm": 0.1779642254114151, + "learning_rate": 2.0872892664186246e-06, + "loss": 0.0024, + "step": 227060 + }, + { + "epoch": 1.4563564472009056, + "grad_norm": 0.07750676572322845, + "learning_rate": 2.0868343574879025e-06, + "loss": 0.001, + "step": 227070 + }, + { + "epoch": 1.4564205840946918, + "grad_norm": 0.11309818923473358, + "learning_rate": 2.086379485061228e-06, + "loss": 0.0008, + "step": 227080 + }, + { + "epoch": 1.456484720988478, + "grad_norm": 0.14714545011520386, + "learning_rate": 2.0859246491443007e-06, + "loss": 0.0022, + "step": 227090 + }, + { + "epoch": 1.4565488578822638, + "grad_norm": 0.10395433753728867, + "learning_rate": 2.0854698497428182e-06, + "loss": 0.0017, + "step": 227100 + }, + { + "epoch": 1.45661299477605, + "grad_norm": 0.042010944336652756, + "learning_rate": 2.085015086862484e-06, + "loss": 0.0012, + "step": 227110 + }, + { + "epoch": 1.4566771316698361, + "grad_norm": 0.0029472862370312214, + "learning_rate": 2.0845603605089936e-06, + "loss": 0.0011, + "step": 227120 + }, + { + "epoch": 1.456741268563622, + "grad_norm": 0.00931242760270834, + "learning_rate": 2.0841056706880437e-06, + "loss": 0.0007, + "step": 227130 + }, + { + "epoch": 1.4568054054574082, + "grad_norm": 0.11214316636323929, + "learning_rate": 2.0836510174053355e-06, + "loss": 0.0049, + "step": 227140 + }, + { + "epoch": 1.4568695423511944, + "grad_norm": 0.10304298996925354, + "learning_rate": 2.0831964006665644e-06, + "loss": 0.0007, + "step": 227150 + }, + { + "epoch": 1.4569336792449805, + "grad_norm": 0.044274721294641495, + "learning_rate": 2.082741820477427e-06, + "loss": 0.0007, + "step": 227160 + }, + { + "epoch": 1.4569978161387667, + "grad_norm": 0.10848188400268555, + "learning_rate": 2.0822872768436177e-06, + "loss": 0.0012, + "step": 227170 + }, + { + "epoch": 1.4570619530325528, + "grad_norm": 0.0019466973608359694, + "learning_rate": 2.081832769770836e-06, + "loss": 0.0013, + "step": 227180 + }, + { + "epoch": 1.4571260899263387, + "grad_norm": 0.09821311384439468, + "learning_rate": 2.081378299264775e-06, + "loss": 0.001, + "step": 227190 + }, + { + "epoch": 1.457190226820125, + "grad_norm": 0.10010962188243866, + "learning_rate": 2.0809238653311305e-06, + "loss": 0.001, + "step": 227200 + }, + { + "epoch": 1.457254363713911, + "grad_norm": 0.07062168419361115, + "learning_rate": 2.080469467975593e-06, + "loss": 0.0012, + "step": 227210 + }, + { + "epoch": 1.457318500607697, + "grad_norm": 0.050293177366256714, + "learning_rate": 2.080015107203863e-06, + "loss": 0.0011, + "step": 227220 + }, + { + "epoch": 1.4573826375014831, + "grad_norm": 0.032794367522001266, + "learning_rate": 2.0795607830216295e-06, + "loss": 0.0014, + "step": 227230 + }, + { + "epoch": 1.4574467743952693, + "grad_norm": 0.25751736760139465, + "learning_rate": 2.0791064954345873e-06, + "loss": 0.0013, + "step": 227240 + }, + { + "epoch": 1.4575109112890554, + "grad_norm": 0.14639756083488464, + "learning_rate": 2.0786522444484257e-06, + "loss": 0.0015, + "step": 227250 + }, + { + "epoch": 1.4575750481828416, + "grad_norm": 0.048349980264902115, + "learning_rate": 2.078198030068842e-06, + "loss": 0.0014, + "step": 227260 + }, + { + "epoch": 1.4576391850766275, + "grad_norm": 0.05995164439082146, + "learning_rate": 2.077743852301525e-06, + "loss": 0.0008, + "step": 227270 + }, + { + "epoch": 1.4577033219704136, + "grad_norm": 0.04522133246064186, + "learning_rate": 2.077289711152164e-06, + "loss": 0.0008, + "step": 227280 + }, + { + "epoch": 1.4577674588641998, + "grad_norm": 0.17663951218128204, + "learning_rate": 2.0768356066264532e-06, + "loss": 0.0009, + "step": 227290 + }, + { + "epoch": 1.4578315957579857, + "grad_norm": 0.006134978495538235, + "learning_rate": 2.0763815387300817e-06, + "loss": 0.001, + "step": 227300 + }, + { + "epoch": 1.4578957326517719, + "grad_norm": 0.03722411394119263, + "learning_rate": 2.075927507468739e-06, + "loss": 0.0013, + "step": 227310 + }, + { + "epoch": 1.457959869545558, + "grad_norm": 0.15329819917678833, + "learning_rate": 2.075473512848113e-06, + "loss": 0.0014, + "step": 227320 + }, + { + "epoch": 1.4580240064393442, + "grad_norm": 0.062296733260154724, + "learning_rate": 2.075019554873895e-06, + "loss": 0.0008, + "step": 227330 + }, + { + "epoch": 1.4580881433331303, + "grad_norm": 0.12302592396736145, + "learning_rate": 2.074565633551773e-06, + "loss": 0.0027, + "step": 227340 + }, + { + "epoch": 1.4581522802269165, + "grad_norm": 0.09618202596902847, + "learning_rate": 2.0741117488874347e-06, + "loss": 0.0013, + "step": 227350 + }, + { + "epoch": 1.4582164171207024, + "grad_norm": 0.024977801367640495, + "learning_rate": 2.0736579008865647e-06, + "loss": 0.0016, + "step": 227360 + }, + { + "epoch": 1.4582805540144885, + "grad_norm": 0.08137394487857819, + "learning_rate": 2.073204089554855e-06, + "loss": 0.0011, + "step": 227370 + }, + { + "epoch": 1.4583446909082747, + "grad_norm": 0.008541885763406754, + "learning_rate": 2.0727503148979894e-06, + "loss": 0.0021, + "step": 227380 + }, + { + "epoch": 1.4584088278020606, + "grad_norm": 0.05951809138059616, + "learning_rate": 2.0722965769216548e-06, + "loss": 0.0008, + "step": 227390 + }, + { + "epoch": 1.4584729646958468, + "grad_norm": 0.03887954726815224, + "learning_rate": 2.0718428756315346e-06, + "loss": 0.0013, + "step": 227400 + }, + { + "epoch": 1.458537101589633, + "grad_norm": 0.01444583386182785, + "learning_rate": 2.0713892110333173e-06, + "loss": 0.0015, + "step": 227410 + }, + { + "epoch": 1.458601238483419, + "grad_norm": 0.07182031124830246, + "learning_rate": 2.070935583132687e-06, + "loss": 0.0021, + "step": 227420 + }, + { + "epoch": 1.4586653753772052, + "grad_norm": 0.09865667670965195, + "learning_rate": 2.0704819919353246e-06, + "loss": 0.001, + "step": 227430 + }, + { + "epoch": 1.4587295122709911, + "grad_norm": 0.09204527735710144, + "learning_rate": 2.0700284374469185e-06, + "loss": 0.0017, + "step": 227440 + }, + { + "epoch": 1.4587936491647773, + "grad_norm": 0.017316631972789764, + "learning_rate": 2.069574919673151e-06, + "loss": 0.0016, + "step": 227450 + }, + { + "epoch": 1.4588577860585634, + "grad_norm": 0.1520482748746872, + "learning_rate": 2.0691214386197027e-06, + "loss": 0.0015, + "step": 227460 + }, + { + "epoch": 1.4589219229523496, + "grad_norm": 0.053171031177043915, + "learning_rate": 2.068667994292257e-06, + "loss": 0.0011, + "step": 227470 + }, + { + "epoch": 1.4589860598461355, + "grad_norm": 0.03078148141503334, + "learning_rate": 2.068214586696497e-06, + "loss": 0.0005, + "step": 227480 + }, + { + "epoch": 1.4590501967399216, + "grad_norm": 0.14004504680633545, + "learning_rate": 2.0677612158381043e-06, + "loss": 0.0011, + "step": 227490 + }, + { + "epoch": 1.4591143336337078, + "grad_norm": 0.05386776477098465, + "learning_rate": 2.067307881722757e-06, + "loss": 0.0022, + "step": 227500 + }, + { + "epoch": 1.459178470527494, + "grad_norm": 0.040485769510269165, + "learning_rate": 2.06685458435614e-06, + "loss": 0.0013, + "step": 227510 + }, + { + "epoch": 1.45924260742128, + "grad_norm": 0.20594653487205505, + "learning_rate": 2.066401323743929e-06, + "loss": 0.0011, + "step": 227520 + }, + { + "epoch": 1.459306744315066, + "grad_norm": 0.22772714495658875, + "learning_rate": 2.0659480998918085e-06, + "loss": 0.0007, + "step": 227530 + }, + { + "epoch": 1.4593708812088522, + "grad_norm": 0.14145562052726746, + "learning_rate": 2.0654949128054548e-06, + "loss": 0.0007, + "step": 227540 + }, + { + "epoch": 1.4594350181026383, + "grad_norm": 0.057922638952732086, + "learning_rate": 2.0650417624905454e-06, + "loss": 0.0006, + "step": 227550 + }, + { + "epoch": 1.4594991549964242, + "grad_norm": 0.09675946831703186, + "learning_rate": 2.064588648952762e-06, + "loss": 0.0017, + "step": 227560 + }, + { + "epoch": 1.4595632918902104, + "grad_norm": 0.06728710979223251, + "learning_rate": 2.064135572197781e-06, + "loss": 0.0011, + "step": 227570 + }, + { + "epoch": 1.4596274287839965, + "grad_norm": 0.008515803143382072, + "learning_rate": 2.0636825322312794e-06, + "loss": 0.001, + "step": 227580 + }, + { + "epoch": 1.4596915656777827, + "grad_norm": 0.06523746252059937, + "learning_rate": 2.0632295290589326e-06, + "loss": 0.001, + "step": 227590 + }, + { + "epoch": 1.4597557025715688, + "grad_norm": 0.05311696231365204, + "learning_rate": 2.0627765626864197e-06, + "loss": 0.001, + "step": 227600 + }, + { + "epoch": 1.459819839465355, + "grad_norm": 0.14546065032482147, + "learning_rate": 2.062323633119417e-06, + "loss": 0.0013, + "step": 227610 + }, + { + "epoch": 1.459883976359141, + "grad_norm": 0.06579133868217468, + "learning_rate": 2.061870740363598e-06, + "loss": 0.0008, + "step": 227620 + }, + { + "epoch": 1.459948113252927, + "grad_norm": 0.05342791602015495, + "learning_rate": 2.061417884424636e-06, + "loss": 0.0011, + "step": 227630 + }, + { + "epoch": 1.4600122501467132, + "grad_norm": 0.058039650321006775, + "learning_rate": 2.0609650653082108e-06, + "loss": 0.0009, + "step": 227640 + }, + { + "epoch": 1.4600763870404991, + "grad_norm": 0.08071907609701157, + "learning_rate": 2.0605122830199936e-06, + "loss": 0.0006, + "step": 227650 + }, + { + "epoch": 1.4601405239342853, + "grad_norm": 0.12008611112833023, + "learning_rate": 2.0600595375656584e-06, + "loss": 0.0014, + "step": 227660 + }, + { + "epoch": 1.4602046608280714, + "grad_norm": 0.04277434200048447, + "learning_rate": 2.0596068289508766e-06, + "loss": 0.0018, + "step": 227670 + }, + { + "epoch": 1.4602687977218576, + "grad_norm": 0.18823428452014923, + "learning_rate": 2.059154157181324e-06, + "loss": 0.0011, + "step": 227680 + }, + { + "epoch": 1.4603329346156437, + "grad_norm": 0.06276317685842514, + "learning_rate": 2.058701522262672e-06, + "loss": 0.0031, + "step": 227690 + }, + { + "epoch": 1.4603970715094297, + "grad_norm": 0.03390146791934967, + "learning_rate": 2.0582489242005897e-06, + "loss": 0.0008, + "step": 227700 + }, + { + "epoch": 1.4604612084032158, + "grad_norm": 0.18487323820590973, + "learning_rate": 2.057796363000753e-06, + "loss": 0.0018, + "step": 227710 + }, + { + "epoch": 1.460525345297002, + "grad_norm": 0.14110855758190155, + "learning_rate": 2.057343838668831e-06, + "loss": 0.0007, + "step": 227720 + }, + { + "epoch": 1.4605894821907879, + "grad_norm": 0.10188963264226913, + "learning_rate": 2.0568913512104934e-06, + "loss": 0.001, + "step": 227730 + }, + { + "epoch": 1.460653619084574, + "grad_norm": 0.1425182968378067, + "learning_rate": 2.056438900631409e-06, + "loss": 0.0019, + "step": 227740 + }, + { + "epoch": 1.4607177559783602, + "grad_norm": 0.0360860750079155, + "learning_rate": 2.05598648693725e-06, + "loss": 0.0008, + "step": 227750 + }, + { + "epoch": 1.4607818928721463, + "grad_norm": 0.028131086379289627, + "learning_rate": 2.055534110133686e-06, + "loss": 0.0009, + "step": 227760 + }, + { + "epoch": 1.4608460297659325, + "grad_norm": 0.24010440707206726, + "learning_rate": 2.0550817702263824e-06, + "loss": 0.0026, + "step": 227770 + }, + { + "epoch": 1.4609101666597186, + "grad_norm": 0.14275701344013214, + "learning_rate": 2.0546294672210075e-06, + "loss": 0.0014, + "step": 227780 + }, + { + "epoch": 1.4609743035535045, + "grad_norm": 0.06743670254945755, + "learning_rate": 2.0541772011232327e-06, + "loss": 0.0009, + "step": 227790 + }, + { + "epoch": 1.4610384404472907, + "grad_norm": 0.052330587059259415, + "learning_rate": 2.053724971938722e-06, + "loss": 0.0015, + "step": 227800 + }, + { + "epoch": 1.4611025773410768, + "grad_norm": 0.10786642879247665, + "learning_rate": 2.053272779673144e-06, + "loss": 0.0019, + "step": 227810 + }, + { + "epoch": 1.4611667142348628, + "grad_norm": 0.0722866803407669, + "learning_rate": 2.0528206243321618e-06, + "loss": 0.0006, + "step": 227820 + }, + { + "epoch": 1.461230851128649, + "grad_norm": 0.2144901007413864, + "learning_rate": 2.0523685059214452e-06, + "loss": 0.0012, + "step": 227830 + }, + { + "epoch": 1.461294988022435, + "grad_norm": 0.11507140100002289, + "learning_rate": 2.0519164244466583e-06, + "loss": 0.0014, + "step": 227840 + }, + { + "epoch": 1.4613591249162212, + "grad_norm": 0.052062615752220154, + "learning_rate": 2.051464379913463e-06, + "loss": 0.0007, + "step": 227850 + }, + { + "epoch": 1.4614232618100074, + "grad_norm": 0.0925881564617157, + "learning_rate": 2.051012372327529e-06, + "loss": 0.0015, + "step": 227860 + }, + { + "epoch": 1.4614873987037933, + "grad_norm": 0.07081978023052216, + "learning_rate": 2.0505604016945176e-06, + "loss": 0.0031, + "step": 227870 + }, + { + "epoch": 1.4615515355975794, + "grad_norm": 0.14238618314266205, + "learning_rate": 2.0501084680200915e-06, + "loss": 0.0016, + "step": 227880 + }, + { + "epoch": 1.4616156724913656, + "grad_norm": 0.07094322890043259, + "learning_rate": 2.049656571309913e-06, + "loss": 0.0012, + "step": 227890 + }, + { + "epoch": 1.4616798093851517, + "grad_norm": 0.011230080388486385, + "learning_rate": 2.0492047115696483e-06, + "loss": 0.0014, + "step": 227900 + }, + { + "epoch": 1.4617439462789377, + "grad_norm": 0.04832952097058296, + "learning_rate": 2.0487528888049574e-06, + "loss": 0.0011, + "step": 227910 + }, + { + "epoch": 1.4618080831727238, + "grad_norm": 0.03298899158835411, + "learning_rate": 2.048301103021502e-06, + "loss": 0.0027, + "step": 227920 + }, + { + "epoch": 1.46187222006651, + "grad_norm": 0.1945764720439911, + "learning_rate": 2.0478493542249417e-06, + "loss": 0.0019, + "step": 227930 + }, + { + "epoch": 1.4619363569602961, + "grad_norm": 0.02438526414334774, + "learning_rate": 2.04739764242094e-06, + "loss": 0.0006, + "step": 227940 + }, + { + "epoch": 1.4620004938540823, + "grad_norm": 0.2396112084388733, + "learning_rate": 2.0469459676151566e-06, + "loss": 0.0012, + "step": 227950 + }, + { + "epoch": 1.4620646307478682, + "grad_norm": 0.06391119956970215, + "learning_rate": 2.046494329813249e-06, + "loss": 0.0009, + "step": 227960 + }, + { + "epoch": 1.4621287676416543, + "grad_norm": 0.2302028387784958, + "learning_rate": 2.04604272902088e-06, + "loss": 0.002, + "step": 227970 + }, + { + "epoch": 1.4621929045354405, + "grad_norm": 0.02100854367017746, + "learning_rate": 2.0455911652437066e-06, + "loss": 0.0114, + "step": 227980 + }, + { + "epoch": 1.4622570414292264, + "grad_norm": 0.007054073270410299, + "learning_rate": 2.045139638487387e-06, + "loss": 0.0012, + "step": 227990 + }, + { + "epoch": 1.4623211783230126, + "grad_norm": 0.15014074742794037, + "learning_rate": 2.0446881487575785e-06, + "loss": 0.0014, + "step": 228000 + }, + { + "epoch": 1.4623853152167987, + "grad_norm": 0.1818699687719345, + "learning_rate": 2.0442366960599415e-06, + "loss": 0.0018, + "step": 228010 + }, + { + "epoch": 1.4624494521105849, + "grad_norm": 0.16917431354522705, + "learning_rate": 2.0437852804001305e-06, + "loss": 0.0021, + "step": 228020 + }, + { + "epoch": 1.462513589004371, + "grad_norm": 0.18695397675037384, + "learning_rate": 2.0433339017838032e-06, + "loss": 0.001, + "step": 228030 + }, + { + "epoch": 1.4625777258981572, + "grad_norm": 0.10904522985219955, + "learning_rate": 2.0428825602166132e-06, + "loss": 0.0009, + "step": 228040 + }, + { + "epoch": 1.462641862791943, + "grad_norm": 0.007502240594476461, + "learning_rate": 2.04243125570422e-06, + "loss": 0.0007, + "step": 228050 + }, + { + "epoch": 1.4627059996857292, + "grad_norm": 0.12464974075555801, + "learning_rate": 2.041979988252277e-06, + "loss": 0.0014, + "step": 228060 + }, + { + "epoch": 1.4627701365795154, + "grad_norm": 0.12314174324274063, + "learning_rate": 2.0415287578664383e-06, + "loss": 0.0014, + "step": 228070 + }, + { + "epoch": 1.4628342734733013, + "grad_norm": 0.006173981353640556, + "learning_rate": 2.041077564552357e-06, + "loss": 0.0008, + "step": 228080 + }, + { + "epoch": 1.4628984103670875, + "grad_norm": 0.048853591084480286, + "learning_rate": 2.0406264083156905e-06, + "loss": 0.0007, + "step": 228090 + }, + { + "epoch": 1.4629625472608736, + "grad_norm": 0.04714033752679825, + "learning_rate": 2.0401752891620908e-06, + "loss": 0.0011, + "step": 228100 + }, + { + "epoch": 1.4630266841546598, + "grad_norm": 0.01387617364525795, + "learning_rate": 2.039724207097207e-06, + "loss": 0.0008, + "step": 228110 + }, + { + "epoch": 1.463090821048446, + "grad_norm": 0.03569384664297104, + "learning_rate": 2.0392731621266974e-06, + "loss": 0.001, + "step": 228120 + }, + { + "epoch": 1.4631549579422318, + "grad_norm": 0.010431944392621517, + "learning_rate": 2.038822154256211e-06, + "loss": 0.0022, + "step": 228130 + }, + { + "epoch": 1.463219094836018, + "grad_norm": 0.08307234197854996, + "learning_rate": 2.038371183491399e-06, + "loss": 0.0019, + "step": 228140 + }, + { + "epoch": 1.4632832317298041, + "grad_norm": 0.16739168763160706, + "learning_rate": 2.037920249837911e-06, + "loss": 0.0012, + "step": 228150 + }, + { + "epoch": 1.4633473686235903, + "grad_norm": 0.13285072147846222, + "learning_rate": 2.0374693533014007e-06, + "loss": 0.0008, + "step": 228160 + }, + { + "epoch": 1.4634115055173762, + "grad_norm": 0.03239947184920311, + "learning_rate": 2.037018493887517e-06, + "loss": 0.0014, + "step": 228170 + }, + { + "epoch": 1.4634756424111623, + "grad_norm": 0.03774484246969223, + "learning_rate": 2.0365676716019095e-06, + "loss": 0.002, + "step": 228180 + }, + { + "epoch": 1.4635397793049485, + "grad_norm": 0.007905969396233559, + "learning_rate": 2.0361168864502246e-06, + "loss": 0.0011, + "step": 228190 + }, + { + "epoch": 1.4636039161987346, + "grad_norm": 0.12281139940023422, + "learning_rate": 2.0356661384381155e-06, + "loss": 0.0012, + "step": 228200 + }, + { + "epoch": 1.4636680530925208, + "grad_norm": 0.0835944414138794, + "learning_rate": 2.035215427571228e-06, + "loss": 0.0013, + "step": 228210 + }, + { + "epoch": 1.4637321899863067, + "grad_norm": 0.0011360858334228396, + "learning_rate": 2.0347647538552105e-06, + "loss": 0.0007, + "step": 228220 + }, + { + "epoch": 1.4637963268800929, + "grad_norm": 0.1155383288860321, + "learning_rate": 2.0343141172957077e-06, + "loss": 0.0053, + "step": 228230 + }, + { + "epoch": 1.463860463773879, + "grad_norm": 0.16193082928657532, + "learning_rate": 2.0338635178983706e-06, + "loss": 0.0029, + "step": 228240 + }, + { + "epoch": 1.463924600667665, + "grad_norm": 0.019801519811153412, + "learning_rate": 2.033412955668844e-06, + "loss": 0.0014, + "step": 228250 + }, + { + "epoch": 1.463988737561451, + "grad_norm": 0.015870848670601845, + "learning_rate": 2.0329624306127705e-06, + "loss": 0.0005, + "step": 228260 + }, + { + "epoch": 1.4640528744552372, + "grad_norm": 0.07326389104127884, + "learning_rate": 2.032511942735798e-06, + "loss": 0.0014, + "step": 228270 + }, + { + "epoch": 1.4641170113490234, + "grad_norm": 0.014281432144343853, + "learning_rate": 2.0320614920435744e-06, + "loss": 0.0014, + "step": 228280 + }, + { + "epoch": 1.4641811482428095, + "grad_norm": 0.0774741917848587, + "learning_rate": 2.0316110785417414e-06, + "loss": 0.0007, + "step": 228290 + }, + { + "epoch": 1.4642452851365957, + "grad_norm": 0.014599175192415714, + "learning_rate": 2.0311607022359424e-06, + "loss": 0.002, + "step": 228300 + }, + { + "epoch": 1.4643094220303816, + "grad_norm": 0.09474321454763412, + "learning_rate": 2.0307103631318202e-06, + "loss": 0.0013, + "step": 228310 + }, + { + "epoch": 1.4643735589241678, + "grad_norm": 0.1536872386932373, + "learning_rate": 2.0302600612350217e-06, + "loss": 0.0011, + "step": 228320 + }, + { + "epoch": 1.464437695817954, + "grad_norm": 0.09565769135951996, + "learning_rate": 2.029809796551186e-06, + "loss": 0.0012, + "step": 228330 + }, + { + "epoch": 1.4645018327117398, + "grad_norm": 0.0661081001162529, + "learning_rate": 2.0293595690859567e-06, + "loss": 0.0008, + "step": 228340 + }, + { + "epoch": 1.464565969605526, + "grad_norm": 0.02482161857187748, + "learning_rate": 2.028909378844973e-06, + "loss": 0.0009, + "step": 228350 + }, + { + "epoch": 1.4646301064993121, + "grad_norm": 0.11742063611745834, + "learning_rate": 2.028459225833881e-06, + "loss": 0.0007, + "step": 228360 + }, + { + "epoch": 1.4646942433930983, + "grad_norm": 0.05392173305153847, + "learning_rate": 2.0280091100583175e-06, + "loss": 0.0014, + "step": 228370 + }, + { + "epoch": 1.4647583802868844, + "grad_norm": 0.014185560867190361, + "learning_rate": 2.027559031523923e-06, + "loss": 0.0015, + "step": 228380 + }, + { + "epoch": 1.4648225171806704, + "grad_norm": 0.013879602774977684, + "learning_rate": 2.0271089902363394e-06, + "loss": 0.0008, + "step": 228390 + }, + { + "epoch": 1.4648866540744565, + "grad_norm": 0.011047734878957272, + "learning_rate": 2.0266589862012047e-06, + "loss": 0.003, + "step": 228400 + }, + { + "epoch": 1.4649507909682427, + "grad_norm": 0.024848774075508118, + "learning_rate": 2.0262090194241585e-06, + "loss": 0.0006, + "step": 228410 + }, + { + "epoch": 1.4650149278620286, + "grad_norm": 0.023540036752820015, + "learning_rate": 2.0257590899108364e-06, + "loss": 0.0013, + "step": 228420 + }, + { + "epoch": 1.4650790647558147, + "grad_norm": 0.04605693742632866, + "learning_rate": 2.025309197666881e-06, + "loss": 0.0022, + "step": 228430 + }, + { + "epoch": 1.4651432016496009, + "grad_norm": 0.14158214628696442, + "learning_rate": 2.0248593426979268e-06, + "loss": 0.0009, + "step": 228440 + }, + { + "epoch": 1.465207338543387, + "grad_norm": 0.11355796456336975, + "learning_rate": 2.0244095250096115e-06, + "loss": 0.0013, + "step": 228450 + }, + { + "epoch": 1.4652714754371732, + "grad_norm": 0.08286843448877335, + "learning_rate": 2.0239597446075692e-06, + "loss": 0.0004, + "step": 228460 + }, + { + "epoch": 1.4653356123309593, + "grad_norm": 0.08982168883085251, + "learning_rate": 2.02351000149744e-06, + "loss": 0.0006, + "step": 228470 + }, + { + "epoch": 1.4653997492247453, + "grad_norm": 0.06646949797868729, + "learning_rate": 2.023060295684859e-06, + "loss": 0.0016, + "step": 228480 + }, + { + "epoch": 1.4654638861185314, + "grad_norm": 0.016652148216962814, + "learning_rate": 2.0226106271754587e-06, + "loss": 0.0012, + "step": 228490 + }, + { + "epoch": 1.4655280230123175, + "grad_norm": 0.05408656597137451, + "learning_rate": 2.022160995974874e-06, + "loss": 0.0013, + "step": 228500 + }, + { + "epoch": 1.4655921599061035, + "grad_norm": 0.023705298081040382, + "learning_rate": 2.0217114020887416e-06, + "loss": 0.0013, + "step": 228510 + }, + { + "epoch": 1.4656562967998896, + "grad_norm": 0.06567654013633728, + "learning_rate": 2.0212618455226947e-06, + "loss": 0.0012, + "step": 228520 + }, + { + "epoch": 1.4657204336936758, + "grad_norm": 0.12240105867385864, + "learning_rate": 2.0208123262823633e-06, + "loss": 0.0019, + "step": 228530 + }, + { + "epoch": 1.465784570587462, + "grad_norm": 0.0822448655962944, + "learning_rate": 2.020362844373385e-06, + "loss": 0.0011, + "step": 228540 + }, + { + "epoch": 1.465848707481248, + "grad_norm": 0.0353735014796257, + "learning_rate": 2.0199133998013892e-06, + "loss": 0.0013, + "step": 228550 + }, + { + "epoch": 1.465912844375034, + "grad_norm": 0.050326962023973465, + "learning_rate": 2.0194639925720083e-06, + "loss": 0.0007, + "step": 228560 + }, + { + "epoch": 1.4659769812688201, + "grad_norm": 0.06709080189466476, + "learning_rate": 2.0190146226908726e-06, + "loss": 0.001, + "step": 228570 + }, + { + "epoch": 1.4660411181626063, + "grad_norm": 0.23265334963798523, + "learning_rate": 2.0185652901636147e-06, + "loss": 0.0018, + "step": 228580 + }, + { + "epoch": 1.4661052550563924, + "grad_norm": 0.0635458454489708, + "learning_rate": 2.0181159949958655e-06, + "loss": 0.0012, + "step": 228590 + }, + { + "epoch": 1.4661693919501784, + "grad_norm": 0.06184995546936989, + "learning_rate": 2.017666737193254e-06, + "loss": 0.0009, + "step": 228600 + }, + { + "epoch": 1.4662335288439645, + "grad_norm": 0.04848029091954231, + "learning_rate": 2.0172175167614076e-06, + "loss": 0.0005, + "step": 228610 + }, + { + "epoch": 1.4662976657377507, + "grad_norm": 0.03075077198445797, + "learning_rate": 2.0167683337059597e-06, + "loss": 0.0012, + "step": 228620 + }, + { + "epoch": 1.4663618026315368, + "grad_norm": 0.09495376795530319, + "learning_rate": 2.016319188032536e-06, + "loss": 0.0014, + "step": 228630 + }, + { + "epoch": 1.466425939525323, + "grad_norm": 0.015723099932074547, + "learning_rate": 2.0158700797467663e-06, + "loss": 0.0013, + "step": 228640 + }, + { + "epoch": 1.466490076419109, + "grad_norm": 0.017615314573049545, + "learning_rate": 2.015421008854274e-06, + "loss": 0.0009, + "step": 228650 + }, + { + "epoch": 1.466554213312895, + "grad_norm": 0.008050485514104366, + "learning_rate": 2.0149719753606927e-06, + "loss": 0.0021, + "step": 228660 + }, + { + "epoch": 1.4666183502066812, + "grad_norm": 0.0542878732085228, + "learning_rate": 2.014522979271645e-06, + "loss": 0.0009, + "step": 228670 + }, + { + "epoch": 1.4666824871004671, + "grad_norm": 0.057286377996206284, + "learning_rate": 2.0140740205927567e-06, + "loss": 0.0009, + "step": 228680 + }, + { + "epoch": 1.4667466239942533, + "grad_norm": 0.09914997220039368, + "learning_rate": 2.013625099329657e-06, + "loss": 0.0021, + "step": 228690 + }, + { + "epoch": 1.4668107608880394, + "grad_norm": 0.014204105362296104, + "learning_rate": 2.0131762154879688e-06, + "loss": 0.0006, + "step": 228700 + }, + { + "epoch": 1.4668748977818256, + "grad_norm": 0.10019271820783615, + "learning_rate": 2.012727369073317e-06, + "loss": 0.0016, + "step": 228710 + }, + { + "epoch": 1.4669390346756117, + "grad_norm": 0.12342901527881622, + "learning_rate": 2.012278560091324e-06, + "loss": 0.0018, + "step": 228720 + }, + { + "epoch": 1.4670031715693979, + "grad_norm": 0.031092116609215736, + "learning_rate": 2.011829788547619e-06, + "loss": 0.0011, + "step": 228730 + }, + { + "epoch": 1.4670673084631838, + "grad_norm": 0.038071438670158386, + "learning_rate": 2.0113810544478213e-06, + "loss": 0.0006, + "step": 228740 + }, + { + "epoch": 1.46713144535697, + "grad_norm": 0.02734973281621933, + "learning_rate": 2.0109323577975553e-06, + "loss": 0.0012, + "step": 228750 + }, + { + "epoch": 1.467195582250756, + "grad_norm": 0.1813708245754242, + "learning_rate": 2.0104836986024416e-06, + "loss": 0.0014, + "step": 228760 + }, + { + "epoch": 1.467259719144542, + "grad_norm": 0.03087799809873104, + "learning_rate": 2.0100350768681054e-06, + "loss": 0.0015, + "step": 228770 + }, + { + "epoch": 1.4673238560383282, + "grad_norm": 0.04848742485046387, + "learning_rate": 2.0095864926001664e-06, + "loss": 0.0018, + "step": 228780 + }, + { + "epoch": 1.4673879929321143, + "grad_norm": 0.10303984582424164, + "learning_rate": 2.0091379458042458e-06, + "loss": 0.001, + "step": 228790 + }, + { + "epoch": 1.4674521298259005, + "grad_norm": 0.12709379196166992, + "learning_rate": 2.008689436485963e-06, + "loss": 0.0011, + "step": 228800 + }, + { + "epoch": 1.4675162667196866, + "grad_norm": 0.0049931686371564865, + "learning_rate": 2.0082409646509406e-06, + "loss": 0.0004, + "step": 228810 + }, + { + "epoch": 1.4675804036134725, + "grad_norm": 0.09220277518033981, + "learning_rate": 2.0077925303047975e-06, + "loss": 0.0014, + "step": 228820 + }, + { + "epoch": 1.4676445405072587, + "grad_norm": 0.049771569669246674, + "learning_rate": 2.007344133453151e-06, + "loss": 0.0007, + "step": 228830 + }, + { + "epoch": 1.4677086774010448, + "grad_norm": 0.17239652574062347, + "learning_rate": 2.006895774101622e-06, + "loss": 0.0007, + "step": 228840 + }, + { + "epoch": 1.4677728142948308, + "grad_norm": 0.044996198266744614, + "learning_rate": 2.0064474522558287e-06, + "loss": 0.0017, + "step": 228850 + }, + { + "epoch": 1.467836951188617, + "grad_norm": 0.016608484089374542, + "learning_rate": 2.0059991679213884e-06, + "loss": 0.001, + "step": 228860 + }, + { + "epoch": 1.467901088082403, + "grad_norm": 0.05175834149122238, + "learning_rate": 2.0055509211039166e-06, + "loss": 0.0007, + "step": 228870 + }, + { + "epoch": 1.4679652249761892, + "grad_norm": 0.1386476755142212, + "learning_rate": 2.005102711809033e-06, + "loss": 0.0009, + "step": 228880 + }, + { + "epoch": 1.4680293618699753, + "grad_norm": 0.1274915337562561, + "learning_rate": 2.004654540042354e-06, + "loss": 0.0015, + "step": 228890 + }, + { + "epoch": 1.4680934987637615, + "grad_norm": 0.18073634803295135, + "learning_rate": 2.0042064058094934e-06, + "loss": 0.0017, + "step": 228900 + }, + { + "epoch": 1.4681576356575474, + "grad_norm": 0.053620342165231705, + "learning_rate": 2.003758309116066e-06, + "loss": 0.0017, + "step": 228910 + }, + { + "epoch": 1.4682217725513336, + "grad_norm": 0.03877738118171692, + "learning_rate": 2.0033102499676904e-06, + "loss": 0.0008, + "step": 228920 + }, + { + "epoch": 1.4682859094451197, + "grad_norm": 0.05406402796506882, + "learning_rate": 2.002862228369979e-06, + "loss": 0.001, + "step": 228930 + }, + { + "epoch": 1.4683500463389056, + "grad_norm": 0.05574464052915573, + "learning_rate": 2.0024142443285455e-06, + "loss": 0.0017, + "step": 228940 + }, + { + "epoch": 1.4684141832326918, + "grad_norm": 0.1522226333618164, + "learning_rate": 2.001966297849003e-06, + "loss": 0.001, + "step": 228950 + }, + { + "epoch": 1.468478320126478, + "grad_norm": 0.08424288779497147, + "learning_rate": 2.001518388936966e-06, + "loss": 0.001, + "step": 228960 + }, + { + "epoch": 1.468542457020264, + "grad_norm": 0.0023943937849253416, + "learning_rate": 2.001070517598048e-06, + "loss": 0.0005, + "step": 228970 + }, + { + "epoch": 1.4686065939140502, + "grad_norm": 0.12704569101333618, + "learning_rate": 2.000622683837857e-06, + "loss": 0.0009, + "step": 228980 + }, + { + "epoch": 1.4686707308078362, + "grad_norm": 0.10262220352888107, + "learning_rate": 2.000174887662009e-06, + "loss": 0.0008, + "step": 228990 + }, + { + "epoch": 1.4687348677016223, + "grad_norm": 0.041612930595874786, + "learning_rate": 1.9997271290761137e-06, + "loss": 0.0017, + "step": 229000 + }, + { + "epoch": 1.4687990045954085, + "grad_norm": 0.10134538263082504, + "learning_rate": 1.999279408085782e-06, + "loss": 0.0014, + "step": 229010 + }, + { + "epoch": 1.4688631414891946, + "grad_norm": 0.0696941614151001, + "learning_rate": 1.998831724696622e-06, + "loss": 0.002, + "step": 229020 + }, + { + "epoch": 1.4689272783829805, + "grad_norm": 0.22982709109783173, + "learning_rate": 1.9983840789142474e-06, + "loss": 0.003, + "step": 229030 + }, + { + "epoch": 1.4689914152767667, + "grad_norm": 0.019180903211236, + "learning_rate": 1.997936470744263e-06, + "loss": 0.001, + "step": 229040 + }, + { + "epoch": 1.4690555521705528, + "grad_norm": 0.06342509388923645, + "learning_rate": 1.997488900192283e-06, + "loss": 0.0012, + "step": 229050 + }, + { + "epoch": 1.469119689064339, + "grad_norm": 0.07248317450284958, + "learning_rate": 1.997041367263913e-06, + "loss": 0.0009, + "step": 229060 + }, + { + "epoch": 1.4691838259581251, + "grad_norm": 0.06499683111906052, + "learning_rate": 1.9965938719647582e-06, + "loss": 0.0006, + "step": 229070 + }, + { + "epoch": 1.469247962851911, + "grad_norm": 0.15315909683704376, + "learning_rate": 1.9961464143004306e-06, + "loss": 0.0008, + "step": 229080 + }, + { + "epoch": 1.4693120997456972, + "grad_norm": 0.09500911831855774, + "learning_rate": 1.9956989942765353e-06, + "loss": 0.0011, + "step": 229090 + }, + { + "epoch": 1.4693762366394834, + "grad_norm": 0.03162289038300514, + "learning_rate": 1.9952516118986775e-06, + "loss": 0.0018, + "step": 229100 + }, + { + "epoch": 1.4694403735332693, + "grad_norm": 0.013298217207193375, + "learning_rate": 1.9948042671724655e-06, + "loss": 0.0014, + "step": 229110 + }, + { + "epoch": 1.4695045104270554, + "grad_norm": 0.0648544505238533, + "learning_rate": 1.9943569601035045e-06, + "loss": 0.0017, + "step": 229120 + }, + { + "epoch": 1.4695686473208416, + "grad_norm": 0.35140907764434814, + "learning_rate": 1.9939096906973985e-06, + "loss": 0.0025, + "step": 229130 + }, + { + "epoch": 1.4696327842146277, + "grad_norm": 0.03840072825551033, + "learning_rate": 1.9934624589597506e-06, + "loss": 0.0005, + "step": 229140 + }, + { + "epoch": 1.4696969211084139, + "grad_norm": 0.016085276380181313, + "learning_rate": 1.9930152648961686e-06, + "loss": 0.0011, + "step": 229150 + }, + { + "epoch": 1.4697610580022, + "grad_norm": 0.0642709732055664, + "learning_rate": 1.992568108512255e-06, + "loss": 0.0016, + "step": 229160 + }, + { + "epoch": 1.469825194895986, + "grad_norm": 0.12338618189096451, + "learning_rate": 1.9921209898136117e-06, + "loss": 0.0011, + "step": 229170 + }, + { + "epoch": 1.469889331789772, + "grad_norm": 0.07437125593423843, + "learning_rate": 1.9916739088058406e-06, + "loss": 0.0021, + "step": 229180 + }, + { + "epoch": 1.4699534686835583, + "grad_norm": 0.02611471898853779, + "learning_rate": 1.991226865494547e-06, + "loss": 0.0013, + "step": 229190 + }, + { + "epoch": 1.4700176055773442, + "grad_norm": 0.06297191232442856, + "learning_rate": 1.990779859885332e-06, + "loss": 0.0018, + "step": 229200 + }, + { + "epoch": 1.4700817424711303, + "grad_norm": 0.08756767213344574, + "learning_rate": 1.990332891983795e-06, + "loss": 0.0012, + "step": 229210 + }, + { + "epoch": 1.4701458793649165, + "grad_norm": 0.03923594579100609, + "learning_rate": 1.9898859617955363e-06, + "loss": 0.0007, + "step": 229220 + }, + { + "epoch": 1.4702100162587026, + "grad_norm": 0.04241190478205681, + "learning_rate": 1.9894390693261595e-06, + "loss": 0.0012, + "step": 229230 + }, + { + "epoch": 1.4702741531524888, + "grad_norm": 0.18856562674045563, + "learning_rate": 1.988992214581263e-06, + "loss": 0.0012, + "step": 229240 + }, + { + "epoch": 1.4703382900462747, + "grad_norm": 0.07870613783597946, + "learning_rate": 1.9885453975664443e-06, + "loss": 0.0008, + "step": 229250 + }, + { + "epoch": 1.4704024269400608, + "grad_norm": 0.04163774102926254, + "learning_rate": 1.988098618287306e-06, + "loss": 0.0013, + "step": 229260 + }, + { + "epoch": 1.470466563833847, + "grad_norm": 0.07428430020809174, + "learning_rate": 1.987651876749444e-06, + "loss": 0.0011, + "step": 229270 + }, + { + "epoch": 1.470530700727633, + "grad_norm": 0.10029874742031097, + "learning_rate": 1.9872051729584567e-06, + "loss": 0.0024, + "step": 229280 + }, + { + "epoch": 1.470594837621419, + "grad_norm": 0.052629999816417694, + "learning_rate": 1.9867585069199407e-06, + "loss": 0.0011, + "step": 229290 + }, + { + "epoch": 1.4706589745152052, + "grad_norm": 0.060825642198324203, + "learning_rate": 1.986311878639495e-06, + "loss": 0.001, + "step": 229300 + }, + { + "epoch": 1.4707231114089914, + "grad_norm": 0.0829131156206131, + "learning_rate": 1.9858652881227162e-06, + "loss": 0.0011, + "step": 229310 + }, + { + "epoch": 1.4707872483027775, + "grad_norm": 0.03715585544705391, + "learning_rate": 1.9854187353751996e-06, + "loss": 0.0009, + "step": 229320 + }, + { + "epoch": 1.4708513851965637, + "grad_norm": 0.20530331134796143, + "learning_rate": 1.9849722204025385e-06, + "loss": 0.0027, + "step": 229330 + }, + { + "epoch": 1.4709155220903496, + "grad_norm": 0.13014550507068634, + "learning_rate": 1.984525743210332e-06, + "loss": 0.002, + "step": 229340 + }, + { + "epoch": 1.4709796589841357, + "grad_norm": 0.16381634771823883, + "learning_rate": 1.9840793038041733e-06, + "loss": 0.0008, + "step": 229350 + }, + { + "epoch": 1.471043795877922, + "grad_norm": 0.05737442895770073, + "learning_rate": 1.9836329021896567e-06, + "loss": 0.0005, + "step": 229360 + }, + { + "epoch": 1.4711079327717078, + "grad_norm": 0.06670114398002625, + "learning_rate": 1.983186538372373e-06, + "loss": 0.0022, + "step": 229370 + }, + { + "epoch": 1.471172069665494, + "grad_norm": 0.18281473219394684, + "learning_rate": 1.9827402123579204e-06, + "loss": 0.0019, + "step": 229380 + }, + { + "epoch": 1.4712362065592801, + "grad_norm": 0.06288686394691467, + "learning_rate": 1.982293924151889e-06, + "loss": 0.0012, + "step": 229390 + }, + { + "epoch": 1.4713003434530663, + "grad_norm": 0.12208051234483719, + "learning_rate": 1.98184767375987e-06, + "loss": 0.0008, + "step": 229400 + }, + { + "epoch": 1.4713644803468524, + "grad_norm": 0.06759564578533173, + "learning_rate": 1.981401461187459e-06, + "loss": 0.0006, + "step": 229410 + }, + { + "epoch": 1.4714286172406383, + "grad_norm": 0.10475599765777588, + "learning_rate": 1.9809552864402437e-06, + "loss": 0.0014, + "step": 229420 + }, + { + "epoch": 1.4714927541344245, + "grad_norm": 0.1359255462884903, + "learning_rate": 1.980509149523817e-06, + "loss": 0.0011, + "step": 229430 + }, + { + "epoch": 1.4715568910282106, + "grad_norm": 0.07360190153121948, + "learning_rate": 1.9800630504437672e-06, + "loss": 0.0014, + "step": 229440 + }, + { + "epoch": 1.4716210279219968, + "grad_norm": 0.17912261188030243, + "learning_rate": 1.9796169892056873e-06, + "loss": 0.0026, + "step": 229450 + }, + { + "epoch": 1.4716851648157827, + "grad_norm": 0.11364316940307617, + "learning_rate": 1.9791709658151647e-06, + "loss": 0.0019, + "step": 229460 + }, + { + "epoch": 1.4717493017095689, + "grad_norm": 0.008240544237196445, + "learning_rate": 1.9787249802777897e-06, + "loss": 0.0008, + "step": 229470 + }, + { + "epoch": 1.471813438603355, + "grad_norm": 0.10144824534654617, + "learning_rate": 1.9782790325991474e-06, + "loss": 0.0013, + "step": 229480 + }, + { + "epoch": 1.4718775754971412, + "grad_norm": 0.11618056893348694, + "learning_rate": 1.9778331227848307e-06, + "loss": 0.0011, + "step": 229490 + }, + { + "epoch": 1.4719417123909273, + "grad_norm": 0.06600002944469452, + "learning_rate": 1.9773872508404244e-06, + "loss": 0.0049, + "step": 229500 + }, + { + "epoch": 1.4720058492847132, + "grad_norm": 0.027490144595503807, + "learning_rate": 1.976941416771516e-06, + "loss": 0.0007, + "step": 229510 + }, + { + "epoch": 1.4720699861784994, + "grad_norm": 0.07673504948616028, + "learning_rate": 1.976495620583691e-06, + "loss": 0.0033, + "step": 229520 + }, + { + "epoch": 1.4721341230722855, + "grad_norm": 0.024866528809070587, + "learning_rate": 1.9760498622825385e-06, + "loss": 0.0008, + "step": 229530 + }, + { + "epoch": 1.4721982599660715, + "grad_norm": 0.14301668107509613, + "learning_rate": 1.975604141873642e-06, + "loss": 0.0024, + "step": 229540 + }, + { + "epoch": 1.4722623968598576, + "grad_norm": 0.3289594054222107, + "learning_rate": 1.9751584593625855e-06, + "loss": 0.003, + "step": 229550 + }, + { + "epoch": 1.4723265337536438, + "grad_norm": 0.20589904487133026, + "learning_rate": 1.974712814754957e-06, + "loss": 0.0012, + "step": 229560 + }, + { + "epoch": 1.47239067064743, + "grad_norm": 0.05686769261956215, + "learning_rate": 1.9742672080563395e-06, + "loss": 0.0009, + "step": 229570 + }, + { + "epoch": 1.472454807541216, + "grad_norm": 0.07623685151338577, + "learning_rate": 1.973821639272316e-06, + "loss": 0.0013, + "step": 229580 + }, + { + "epoch": 1.4725189444350022, + "grad_norm": 0.09793905168771744, + "learning_rate": 1.9733761084084687e-06, + "loss": 0.0012, + "step": 229590 + }, + { + "epoch": 1.4725830813287881, + "grad_norm": 0.03943793103098869, + "learning_rate": 1.972930615470383e-06, + "loss": 0.0011, + "step": 229600 + }, + { + "epoch": 1.4726472182225743, + "grad_norm": 0.0713537186384201, + "learning_rate": 1.97248516046364e-06, + "loss": 0.0006, + "step": 229610 + }, + { + "epoch": 1.4727113551163604, + "grad_norm": 0.07061032205820084, + "learning_rate": 1.9720397433938227e-06, + "loss": 0.0012, + "step": 229620 + }, + { + "epoch": 1.4727754920101463, + "grad_norm": 0.06999431550502777, + "learning_rate": 1.9715943642665094e-06, + "loss": 0.0012, + "step": 229630 + }, + { + "epoch": 1.4728396289039325, + "grad_norm": 0.030215317383408546, + "learning_rate": 1.9711490230872843e-06, + "loss": 0.0007, + "step": 229640 + }, + { + "epoch": 1.4729037657977186, + "grad_norm": 0.07707786560058594, + "learning_rate": 1.970703719861727e-06, + "loss": 0.0015, + "step": 229650 + }, + { + "epoch": 1.4729679026915048, + "grad_norm": 0.04843216761946678, + "learning_rate": 1.970258454595415e-06, + "loss": 0.0014, + "step": 229660 + }, + { + "epoch": 1.473032039585291, + "grad_norm": 0.12600107491016388, + "learning_rate": 1.969813227293932e-06, + "loss": 0.0012, + "step": 229670 + }, + { + "epoch": 1.4730961764790769, + "grad_norm": 0.041636981070041656, + "learning_rate": 1.9693680379628543e-06, + "loss": 0.0023, + "step": 229680 + }, + { + "epoch": 1.473160313372863, + "grad_norm": 0.11662957817316055, + "learning_rate": 1.968922886607762e-06, + "loss": 0.0025, + "step": 229690 + }, + { + "epoch": 1.4732244502666492, + "grad_norm": 0.09565620124340057, + "learning_rate": 1.9684777732342296e-06, + "loss": 0.0012, + "step": 229700 + }, + { + "epoch": 1.4732885871604353, + "grad_norm": 0.005680976435542107, + "learning_rate": 1.9680326978478393e-06, + "loss": 0.0009, + "step": 229710 + }, + { + "epoch": 1.4733527240542212, + "grad_norm": 0.01353106927126646, + "learning_rate": 1.9675876604541665e-06, + "loss": 0.001, + "step": 229720 + }, + { + "epoch": 1.4734168609480074, + "grad_norm": 0.0855177491903305, + "learning_rate": 1.9671426610587873e-06, + "loss": 0.0017, + "step": 229730 + }, + { + "epoch": 1.4734809978417935, + "grad_norm": 0.0511997751891613, + "learning_rate": 1.9666976996672764e-06, + "loss": 0.0014, + "step": 229740 + }, + { + "epoch": 1.4735451347355797, + "grad_norm": 0.053706057369709015, + "learning_rate": 1.966252776285213e-06, + "loss": 0.0014, + "step": 229750 + }, + { + "epoch": 1.4736092716293658, + "grad_norm": 0.016953039914369583, + "learning_rate": 1.9658078909181705e-06, + "loss": 0.0022, + "step": 229760 + }, + { + "epoch": 1.4736734085231518, + "grad_norm": 0.030180564150214195, + "learning_rate": 1.9653630435717237e-06, + "loss": 0.0011, + "step": 229770 + }, + { + "epoch": 1.473737545416938, + "grad_norm": 0.03028823249042034, + "learning_rate": 1.964918234251445e-06, + "loss": 0.0007, + "step": 229780 + }, + { + "epoch": 1.473801682310724, + "grad_norm": 0.12614458799362183, + "learning_rate": 1.9644734629629114e-06, + "loss": 0.0017, + "step": 229790 + }, + { + "epoch": 1.47386581920451, + "grad_norm": 0.11758097261190414, + "learning_rate": 1.9640287297116933e-06, + "loss": 0.0018, + "step": 229800 + }, + { + "epoch": 1.4739299560982961, + "grad_norm": 0.038990456610918045, + "learning_rate": 1.9635840345033667e-06, + "loss": 0.0016, + "step": 229810 + }, + { + "epoch": 1.4739940929920823, + "grad_norm": 0.08074386417865753, + "learning_rate": 1.9631393773435006e-06, + "loss": 0.0008, + "step": 229820 + }, + { + "epoch": 1.4740582298858684, + "grad_norm": 0.061525046825408936, + "learning_rate": 1.9626947582376704e-06, + "loss": 0.0017, + "step": 229830 + }, + { + "epoch": 1.4741223667796546, + "grad_norm": 0.1928442120552063, + "learning_rate": 1.9622501771914454e-06, + "loss": 0.0011, + "step": 229840 + }, + { + "epoch": 1.4741865036734407, + "grad_norm": 0.012257641181349754, + "learning_rate": 1.9618056342103965e-06, + "loss": 0.0006, + "step": 229850 + }, + { + "epoch": 1.4742506405672267, + "grad_norm": 0.05344541743397713, + "learning_rate": 1.9613611293000927e-06, + "loss": 0.0011, + "step": 229860 + }, + { + "epoch": 1.4743147774610128, + "grad_norm": 0.08402790874242783, + "learning_rate": 1.9609166624661076e-06, + "loss": 0.0023, + "step": 229870 + }, + { + "epoch": 1.474378914354799, + "grad_norm": 0.0019207809818908572, + "learning_rate": 1.9604722337140086e-06, + "loss": 0.0006, + "step": 229880 + }, + { + "epoch": 1.4744430512485849, + "grad_norm": 0.01550615206360817, + "learning_rate": 1.960027843049365e-06, + "loss": 0.0016, + "step": 229890 + }, + { + "epoch": 1.474507188142371, + "grad_norm": 0.10568740963935852, + "learning_rate": 1.9595834904777434e-06, + "loss": 0.0023, + "step": 229900 + }, + { + "epoch": 1.4745713250361572, + "grad_norm": 0.1658589094877243, + "learning_rate": 1.9591391760047153e-06, + "loss": 0.0007, + "step": 229910 + }, + { + "epoch": 1.4746354619299433, + "grad_norm": 0.3455018401145935, + "learning_rate": 1.9586948996358463e-06, + "loss": 0.003, + "step": 229920 + }, + { + "epoch": 1.4746995988237295, + "grad_norm": 0.025642002001404762, + "learning_rate": 1.9582506613767023e-06, + "loss": 0.0045, + "step": 229930 + }, + { + "epoch": 1.4747637357175154, + "grad_norm": 0.18122798204421997, + "learning_rate": 1.957806461232853e-06, + "loss": 0.0018, + "step": 229940 + }, + { + "epoch": 1.4748278726113015, + "grad_norm": 0.047227855771780014, + "learning_rate": 1.957362299209863e-06, + "loss": 0.0008, + "step": 229950 + }, + { + "epoch": 1.4748920095050877, + "grad_norm": 0.059645794332027435, + "learning_rate": 1.9569181753132982e-06, + "loss": 0.0014, + "step": 229960 + }, + { + "epoch": 1.4749561463988736, + "grad_norm": 0.044832829385995865, + "learning_rate": 1.9564740895487217e-06, + "loss": 0.0013, + "step": 229970 + }, + { + "epoch": 1.4750202832926598, + "grad_norm": 0.009424911811947823, + "learning_rate": 1.9560300419217015e-06, + "loss": 0.0013, + "step": 229980 + }, + { + "epoch": 1.475084420186446, + "grad_norm": 0.08541758358478546, + "learning_rate": 1.9555860324378005e-06, + "loss": 0.0012, + "step": 229990 + }, + { + "epoch": 1.475148557080232, + "grad_norm": 0.08901616185903549, + "learning_rate": 1.955142061102582e-06, + "loss": 0.0011, + "step": 230000 + }, + { + "epoch": 1.4752126939740182, + "grad_norm": 0.09359852969646454, + "learning_rate": 1.954698127921609e-06, + "loss": 0.0015, + "step": 230010 + }, + { + "epoch": 1.4752768308678044, + "grad_norm": 0.1309373825788498, + "learning_rate": 1.9542542329004456e-06, + "loss": 0.0013, + "step": 230020 + }, + { + "epoch": 1.4753409677615903, + "grad_norm": 0.12006743997335434, + "learning_rate": 1.9538103760446537e-06, + "loss": 0.0013, + "step": 230030 + }, + { + "epoch": 1.4754051046553764, + "grad_norm": 0.06533078104257584, + "learning_rate": 1.953366557359795e-06, + "loss": 0.0012, + "step": 230040 + }, + { + "epoch": 1.4754692415491626, + "grad_norm": 0.0857715979218483, + "learning_rate": 1.9529227768514286e-06, + "loss": 0.0018, + "step": 230050 + }, + { + "epoch": 1.4755333784429485, + "grad_norm": 0.02155408076941967, + "learning_rate": 1.9524790345251193e-06, + "loss": 0.0012, + "step": 230060 + }, + { + "epoch": 1.4755975153367347, + "grad_norm": 0.05905630812048912, + "learning_rate": 1.952035330386426e-06, + "loss": 0.0007, + "step": 230070 + }, + { + "epoch": 1.4756616522305208, + "grad_norm": 0.014506034553050995, + "learning_rate": 1.951591664440907e-06, + "loss": 0.0005, + "step": 230080 + }, + { + "epoch": 1.475725789124307, + "grad_norm": 0.24017755687236786, + "learning_rate": 1.9511480366941245e-06, + "loss": 0.0019, + "step": 230090 + }, + { + "epoch": 1.4757899260180931, + "grad_norm": 0.031952865421772, + "learning_rate": 1.9507044471516358e-06, + "loss": 0.0012, + "step": 230100 + }, + { + "epoch": 1.475854062911879, + "grad_norm": 0.15895450115203857, + "learning_rate": 1.9502608958189995e-06, + "loss": 0.0014, + "step": 230110 + }, + { + "epoch": 1.4759181998056652, + "grad_norm": 0.260976105928421, + "learning_rate": 1.9498173827017724e-06, + "loss": 0.0016, + "step": 230120 + }, + { + "epoch": 1.4759823366994513, + "grad_norm": 0.030834907665848732, + "learning_rate": 1.949373907805515e-06, + "loss": 0.0009, + "step": 230130 + }, + { + "epoch": 1.4760464735932375, + "grad_norm": 0.03008120320737362, + "learning_rate": 1.948930471135783e-06, + "loss": 0.0012, + "step": 230140 + }, + { + "epoch": 1.4761106104870234, + "grad_norm": 0.19386741518974304, + "learning_rate": 1.948487072698133e-06, + "loss": 0.0009, + "step": 230150 + }, + { + "epoch": 1.4761747473808096, + "grad_norm": 0.0772666484117508, + "learning_rate": 1.948043712498119e-06, + "loss": 0.0008, + "step": 230160 + }, + { + "epoch": 1.4762388842745957, + "grad_norm": 0.007100511807948351, + "learning_rate": 1.9476003905413e-06, + "loss": 0.0022, + "step": 230170 + }, + { + "epoch": 1.4763030211683819, + "grad_norm": 0.09792197495698929, + "learning_rate": 1.9471571068332295e-06, + "loss": 0.0008, + "step": 230180 + }, + { + "epoch": 1.476367158062168, + "grad_norm": 0.04310609772801399, + "learning_rate": 1.946713861379463e-06, + "loss": 0.001, + "step": 230190 + }, + { + "epoch": 1.476431294955954, + "grad_norm": 0.08101110905408859, + "learning_rate": 1.9462706541855514e-06, + "loss": 0.0017, + "step": 230200 + }, + { + "epoch": 1.47649543184974, + "grad_norm": 0.05013372004032135, + "learning_rate": 1.945827485257053e-06, + "loss": 0.0014, + "step": 230210 + }, + { + "epoch": 1.4765595687435262, + "grad_norm": 0.38013651967048645, + "learning_rate": 1.945384354599519e-06, + "loss": 0.002, + "step": 230220 + }, + { + "epoch": 1.4766237056373122, + "grad_norm": 0.06051740422844887, + "learning_rate": 1.9449412622185004e-06, + "loss": 0.0006, + "step": 230230 + }, + { + "epoch": 1.4766878425310983, + "grad_norm": 0.001617398695088923, + "learning_rate": 1.944498208119553e-06, + "loss": 0.0014, + "step": 230240 + }, + { + "epoch": 1.4767519794248845, + "grad_norm": 0.18351486325263977, + "learning_rate": 1.9440551923082266e-06, + "loss": 0.0012, + "step": 230250 + }, + { + "epoch": 1.4768161163186706, + "grad_norm": 0.0058859786950051785, + "learning_rate": 1.9436122147900733e-06, + "loss": 0.0009, + "step": 230260 + }, + { + "epoch": 1.4768802532124568, + "grad_norm": 0.0904245376586914, + "learning_rate": 1.943169275570641e-06, + "loss": 0.0016, + "step": 230270 + }, + { + "epoch": 1.476944390106243, + "grad_norm": 0.08254070580005646, + "learning_rate": 1.9427263746554843e-06, + "loss": 0.0014, + "step": 230280 + }, + { + "epoch": 1.4770085270000288, + "grad_norm": 0.054536763578653336, + "learning_rate": 1.942283512050151e-06, + "loss": 0.0006, + "step": 230290 + }, + { + "epoch": 1.477072663893815, + "grad_norm": 0.058397021144628525, + "learning_rate": 1.94184068776019e-06, + "loss": 0.0011, + "step": 230300 + }, + { + "epoch": 1.4771368007876011, + "grad_norm": 0.009756239131093025, + "learning_rate": 1.9413979017911504e-06, + "loss": 0.0015, + "step": 230310 + }, + { + "epoch": 1.477200937681387, + "grad_norm": 0.02611563727259636, + "learning_rate": 1.940955154148582e-06, + "loss": 0.0013, + "step": 230320 + }, + { + "epoch": 1.4772650745751732, + "grad_norm": 0.15121859312057495, + "learning_rate": 1.940512444838032e-06, + "loss": 0.0011, + "step": 230330 + }, + { + "epoch": 1.4773292114689593, + "grad_norm": 0.11068111658096313, + "learning_rate": 1.9400697738650474e-06, + "loss": 0.0011, + "step": 230340 + }, + { + "epoch": 1.4773933483627455, + "grad_norm": 0.014362462796270847, + "learning_rate": 1.9396271412351737e-06, + "loss": 0.0009, + "step": 230350 + }, + { + "epoch": 1.4774574852565316, + "grad_norm": 0.07700163125991821, + "learning_rate": 1.939184546953961e-06, + "loss": 0.001, + "step": 230360 + }, + { + "epoch": 1.4775216221503176, + "grad_norm": 0.05529524013400078, + "learning_rate": 1.9387419910269536e-06, + "loss": 0.0013, + "step": 230370 + }, + { + "epoch": 1.4775857590441037, + "grad_norm": 0.12332439422607422, + "learning_rate": 1.9382994734596948e-06, + "loss": 0.0022, + "step": 230380 + }, + { + "epoch": 1.4776498959378899, + "grad_norm": 0.1013512909412384, + "learning_rate": 1.937856994257734e-06, + "loss": 0.001, + "step": 230390 + }, + { + "epoch": 1.4777140328316758, + "grad_norm": 0.02150823548436165, + "learning_rate": 1.9374145534266133e-06, + "loss": 0.0011, + "step": 230400 + }, + { + "epoch": 1.477778169725462, + "grad_norm": 0.042761147022247314, + "learning_rate": 1.936972150971877e-06, + "loss": 0.0019, + "step": 230410 + }, + { + "epoch": 1.477842306619248, + "grad_norm": 0.16678951680660248, + "learning_rate": 1.936529786899067e-06, + "loss": 0.001, + "step": 230420 + }, + { + "epoch": 1.4779064435130342, + "grad_norm": 0.07481800019741058, + "learning_rate": 1.93608746121373e-06, + "loss": 0.0012, + "step": 230430 + }, + { + "epoch": 1.4779705804068204, + "grad_norm": 0.020131539553403854, + "learning_rate": 1.9356451739214067e-06, + "loss": 0.0005, + "step": 230440 + }, + { + "epoch": 1.4780347173006065, + "grad_norm": 0.0174866896122694, + "learning_rate": 1.93520292502764e-06, + "loss": 0.0011, + "step": 230450 + }, + { + "epoch": 1.4780988541943925, + "grad_norm": 0.03777153789997101, + "learning_rate": 1.934760714537969e-06, + "loss": 0.0013, + "step": 230460 + }, + { + "epoch": 1.4781629910881786, + "grad_norm": 0.021376833319664, + "learning_rate": 1.934318542457938e-06, + "loss": 0.0008, + "step": 230470 + }, + { + "epoch": 1.4782271279819648, + "grad_norm": 0.047228723764419556, + "learning_rate": 1.9338764087930873e-06, + "loss": 0.0013, + "step": 230480 + }, + { + "epoch": 1.4782912648757507, + "grad_norm": 0.026556644588708878, + "learning_rate": 1.9334343135489564e-06, + "loss": 0.0012, + "step": 230490 + }, + { + "epoch": 1.4783554017695368, + "grad_norm": 0.157155841588974, + "learning_rate": 1.9329922567310833e-06, + "loss": 0.0023, + "step": 230500 + }, + { + "epoch": 1.478419538663323, + "grad_norm": 0.08791355788707733, + "learning_rate": 1.9325502383450114e-06, + "loss": 0.0013, + "step": 230510 + }, + { + "epoch": 1.4784836755571091, + "grad_norm": 0.21543775498867035, + "learning_rate": 1.932108258396277e-06, + "loss": 0.002, + "step": 230520 + }, + { + "epoch": 1.4785478124508953, + "grad_norm": 0.1585678607225418, + "learning_rate": 1.931666316890417e-06, + "loss": 0.0024, + "step": 230530 + }, + { + "epoch": 1.4786119493446812, + "grad_norm": 0.036143168807029724, + "learning_rate": 1.931224413832973e-06, + "loss": 0.0007, + "step": 230540 + }, + { + "epoch": 1.4786760862384674, + "grad_norm": 0.07579084485769272, + "learning_rate": 1.9307825492294798e-06, + "loss": 0.0013, + "step": 230550 + }, + { + "epoch": 1.4787402231322535, + "grad_norm": 0.2692042589187622, + "learning_rate": 1.930340723085475e-06, + "loss": 0.0019, + "step": 230560 + }, + { + "epoch": 1.4788043600260397, + "grad_norm": 0.10390500724315643, + "learning_rate": 1.929898935406493e-06, + "loss": 0.0009, + "step": 230570 + }, + { + "epoch": 1.4788684969198256, + "grad_norm": 0.03958144411444664, + "learning_rate": 1.929457186198071e-06, + "loss": 0.001, + "step": 230580 + }, + { + "epoch": 1.4789326338136117, + "grad_norm": 0.06917309761047363, + "learning_rate": 1.9290154754657474e-06, + "loss": 0.0011, + "step": 230590 + }, + { + "epoch": 1.4789967707073979, + "grad_norm": 0.05723357945680618, + "learning_rate": 1.928573803215055e-06, + "loss": 0.0012, + "step": 230600 + }, + { + "epoch": 1.479060907601184, + "grad_norm": 0.10763806849718094, + "learning_rate": 1.928132169451527e-06, + "loss": 0.001, + "step": 230610 + }, + { + "epoch": 1.4791250444949702, + "grad_norm": 0.21228329837322235, + "learning_rate": 1.927690574180697e-06, + "loss": 0.0017, + "step": 230620 + }, + { + "epoch": 1.479189181388756, + "grad_norm": 0.10918084532022476, + "learning_rate": 1.9272490174081015e-06, + "loss": 0.0011, + "step": 230630 + }, + { + "epoch": 1.4792533182825423, + "grad_norm": 0.061497002840042114, + "learning_rate": 1.9268074991392722e-06, + "loss": 0.002, + "step": 230640 + }, + { + "epoch": 1.4793174551763284, + "grad_norm": 0.11596682667732239, + "learning_rate": 1.9263660193797397e-06, + "loss": 0.0013, + "step": 230650 + }, + { + "epoch": 1.4793815920701143, + "grad_norm": 0.0664464458823204, + "learning_rate": 1.9259245781350387e-06, + "loss": 0.0018, + "step": 230660 + }, + { + "epoch": 1.4794457289639005, + "grad_norm": 0.06875289231538773, + "learning_rate": 1.9254831754107e-06, + "loss": 0.0006, + "step": 230670 + }, + { + "epoch": 1.4795098658576866, + "grad_norm": 0.2601640224456787, + "learning_rate": 1.925041811212255e-06, + "loss": 0.0019, + "step": 230680 + }, + { + "epoch": 1.4795740027514728, + "grad_norm": 0.10457682609558105, + "learning_rate": 1.9246004855452314e-06, + "loss": 0.001, + "step": 230690 + }, + { + "epoch": 1.479638139645259, + "grad_norm": 0.16158710420131683, + "learning_rate": 1.9241591984151636e-06, + "loss": 0.0008, + "step": 230700 + }, + { + "epoch": 1.479702276539045, + "grad_norm": 0.11226168274879456, + "learning_rate": 1.9237179498275792e-06, + "loss": 0.0007, + "step": 230710 + }, + { + "epoch": 1.479766413432831, + "grad_norm": 0.09860129654407501, + "learning_rate": 1.923276739788008e-06, + "loss": 0.0016, + "step": 230720 + }, + { + "epoch": 1.4798305503266171, + "grad_norm": 0.07697834819555283, + "learning_rate": 1.922835568301976e-06, + "loss": 0.0023, + "step": 230730 + }, + { + "epoch": 1.4798946872204033, + "grad_norm": 0.10646352916955948, + "learning_rate": 1.922394435375015e-06, + "loss": 0.0014, + "step": 230740 + }, + { + "epoch": 1.4799588241141892, + "grad_norm": 0.06855232268571854, + "learning_rate": 1.9219533410126517e-06, + "loss": 0.0013, + "step": 230750 + }, + { + "epoch": 1.4800229610079754, + "grad_norm": 0.1098976582288742, + "learning_rate": 1.9215122852204125e-06, + "loss": 0.0007, + "step": 230760 + }, + { + "epoch": 1.4800870979017615, + "grad_norm": 0.15310770273208618, + "learning_rate": 1.9210712680038236e-06, + "loss": 0.0009, + "step": 230770 + }, + { + "epoch": 1.4801512347955477, + "grad_norm": 0.02309637889266014, + "learning_rate": 1.920630289368413e-06, + "loss": 0.0024, + "step": 230780 + }, + { + "epoch": 1.4802153716893338, + "grad_norm": 0.12637177109718323, + "learning_rate": 1.920189349319706e-06, + "loss": 0.0017, + "step": 230790 + }, + { + "epoch": 1.4802795085831197, + "grad_norm": 0.07150278985500336, + "learning_rate": 1.919748447863226e-06, + "loss": 0.0011, + "step": 230800 + }, + { + "epoch": 1.480343645476906, + "grad_norm": 0.07935880124568939, + "learning_rate": 1.9193075850045006e-06, + "loss": 0.0011, + "step": 230810 + }, + { + "epoch": 1.480407782370692, + "grad_norm": 0.011261014267802238, + "learning_rate": 1.9188667607490533e-06, + "loss": 0.0012, + "step": 230820 + }, + { + "epoch": 1.480471919264478, + "grad_norm": 0.10852573066949844, + "learning_rate": 1.9184259751024076e-06, + "loss": 0.001, + "step": 230830 + }, + { + "epoch": 1.4805360561582641, + "grad_norm": 0.044947221875190735, + "learning_rate": 1.917985228070085e-06, + "loss": 0.0018, + "step": 230840 + }, + { + "epoch": 1.4806001930520503, + "grad_norm": 0.0234210267663002, + "learning_rate": 1.917544519657612e-06, + "loss": 0.0007, + "step": 230850 + }, + { + "epoch": 1.4806643299458364, + "grad_norm": 0.10135694593191147, + "learning_rate": 1.917103849870509e-06, + "loss": 0.0023, + "step": 230860 + }, + { + "epoch": 1.4807284668396226, + "grad_norm": 0.011645328253507614, + "learning_rate": 1.916663218714298e-06, + "loss": 0.0006, + "step": 230870 + }, + { + "epoch": 1.4807926037334087, + "grad_norm": 0.08838336914777756, + "learning_rate": 1.9162226261944988e-06, + "loss": 0.0008, + "step": 230880 + }, + { + "epoch": 1.4808567406271946, + "grad_norm": 0.06066448986530304, + "learning_rate": 1.915782072316636e-06, + "loss": 0.0012, + "step": 230890 + }, + { + "epoch": 1.4809208775209808, + "grad_norm": 0.2691197395324707, + "learning_rate": 1.9153415570862275e-06, + "loss": 0.0015, + "step": 230900 + }, + { + "epoch": 1.480985014414767, + "grad_norm": 0.00545494956895709, + "learning_rate": 1.9149010805087947e-06, + "loss": 0.0015, + "step": 230910 + }, + { + "epoch": 1.4810491513085529, + "grad_norm": 0.06407414376735687, + "learning_rate": 1.9144606425898545e-06, + "loss": 0.0021, + "step": 230920 + }, + { + "epoch": 1.481113288202339, + "grad_norm": 0.024175606667995453, + "learning_rate": 1.9140202433349288e-06, + "loss": 0.0005, + "step": 230930 + }, + { + "epoch": 1.4811774250961252, + "grad_norm": 0.10273516178131104, + "learning_rate": 1.913579882749535e-06, + "loss": 0.0013, + "step": 230940 + }, + { + "epoch": 1.4812415619899113, + "grad_norm": 0.065152108669281, + "learning_rate": 1.9131395608391893e-06, + "loss": 0.0007, + "step": 230950 + }, + { + "epoch": 1.4813056988836975, + "grad_norm": 0.05232097581028938, + "learning_rate": 1.9126992776094133e-06, + "loss": 0.0007, + "step": 230960 + }, + { + "epoch": 1.4813698357774834, + "grad_norm": 0.03106767311692238, + "learning_rate": 1.9122590330657215e-06, + "loss": 0.001, + "step": 230970 + }, + { + "epoch": 1.4814339726712695, + "grad_norm": 0.10196743160486221, + "learning_rate": 1.9118188272136305e-06, + "loss": 0.001, + "step": 230980 + }, + { + "epoch": 1.4814981095650557, + "grad_norm": 0.08105448633432388, + "learning_rate": 1.9113786600586555e-06, + "loss": 0.0008, + "step": 230990 + }, + { + "epoch": 1.4815622464588418, + "grad_norm": 0.0760020911693573, + "learning_rate": 1.9109385316063146e-06, + "loss": 0.0009, + "step": 231000 + }, + { + "epoch": 1.4816263833526278, + "grad_norm": 0.02205081842839718, + "learning_rate": 1.910498441862122e-06, + "loss": 0.001, + "step": 231010 + }, + { + "epoch": 1.481690520246414, + "grad_norm": 0.09554306417703629, + "learning_rate": 1.9100583908315917e-06, + "loss": 0.0009, + "step": 231020 + }, + { + "epoch": 1.4817546571402, + "grad_norm": 0.008705779910087585, + "learning_rate": 1.9096183785202365e-06, + "loss": 0.0017, + "step": 231030 + }, + { + "epoch": 1.4818187940339862, + "grad_norm": 0.014837021008133888, + "learning_rate": 1.9091784049335733e-06, + "loss": 0.0016, + "step": 231040 + }, + { + "epoch": 1.4818829309277723, + "grad_norm": 0.08490317314863205, + "learning_rate": 1.908738470077114e-06, + "loss": 0.0009, + "step": 231050 + }, + { + "epoch": 1.4819470678215583, + "grad_norm": 0.2210465967655182, + "learning_rate": 1.9082985739563703e-06, + "loss": 0.0023, + "step": 231060 + }, + { + "epoch": 1.4820112047153444, + "grad_norm": 0.09002111852169037, + "learning_rate": 1.9078587165768537e-06, + "loss": 0.0008, + "step": 231070 + }, + { + "epoch": 1.4820753416091306, + "grad_norm": 0.07184798270463943, + "learning_rate": 1.9074188979440796e-06, + "loss": 0.0008, + "step": 231080 + }, + { + "epoch": 1.4821394785029165, + "grad_norm": 0.04037946090102196, + "learning_rate": 1.906979118063556e-06, + "loss": 0.0016, + "step": 231090 + }, + { + "epoch": 1.4822036153967026, + "grad_norm": 0.007163423113524914, + "learning_rate": 1.9065393769407931e-06, + "loss": 0.0009, + "step": 231100 + }, + { + "epoch": 1.4822677522904888, + "grad_norm": 0.3299335837364197, + "learning_rate": 1.9060996745813049e-06, + "loss": 0.002, + "step": 231110 + }, + { + "epoch": 1.482331889184275, + "grad_norm": 0.09048480540513992, + "learning_rate": 1.905660010990598e-06, + "loss": 0.0008, + "step": 231120 + }, + { + "epoch": 1.482396026078061, + "grad_norm": 0.0650215595960617, + "learning_rate": 1.9052203861741837e-06, + "loss": 0.0021, + "step": 231130 + }, + { + "epoch": 1.4824601629718472, + "grad_norm": 0.0951550230383873, + "learning_rate": 1.9047808001375679e-06, + "loss": 0.0009, + "step": 231140 + }, + { + "epoch": 1.4825242998656332, + "grad_norm": 0.027630243450403214, + "learning_rate": 1.904341252886262e-06, + "loss": 0.0006, + "step": 231150 + }, + { + "epoch": 1.4825884367594193, + "grad_norm": 0.1635369211435318, + "learning_rate": 1.9039017444257728e-06, + "loss": 0.0028, + "step": 231160 + }, + { + "epoch": 1.4826525736532055, + "grad_norm": 0.06888850033283234, + "learning_rate": 1.9034622747616078e-06, + "loss": 0.0014, + "step": 231170 + }, + { + "epoch": 1.4827167105469914, + "grad_norm": 0.10007062554359436, + "learning_rate": 1.9030228438992716e-06, + "loss": 0.001, + "step": 231180 + }, + { + "epoch": 1.4827808474407775, + "grad_norm": 0.058289770036935806, + "learning_rate": 1.9025834518442737e-06, + "loss": 0.0008, + "step": 231190 + }, + { + "epoch": 1.4828449843345637, + "grad_norm": 0.052228596061468124, + "learning_rate": 1.9021440986021196e-06, + "loss": 0.001, + "step": 231200 + }, + { + "epoch": 1.4829091212283498, + "grad_norm": 0.050283003598451614, + "learning_rate": 1.9017047841783137e-06, + "loss": 0.001, + "step": 231210 + }, + { + "epoch": 1.482973258122136, + "grad_norm": 0.16117650270462036, + "learning_rate": 1.90126550857836e-06, + "loss": 0.0013, + "step": 231220 + }, + { + "epoch": 1.483037395015922, + "grad_norm": 0.024334462359547615, + "learning_rate": 1.9008262718077653e-06, + "loss": 0.0016, + "step": 231230 + }, + { + "epoch": 1.483101531909708, + "grad_norm": 0.04623173549771309, + "learning_rate": 1.9003870738720321e-06, + "loss": 0.0008, + "step": 231240 + }, + { + "epoch": 1.4831656688034942, + "grad_norm": 0.15785768628120422, + "learning_rate": 1.8999479147766625e-06, + "loss": 0.0018, + "step": 231250 + }, + { + "epoch": 1.4832298056972804, + "grad_norm": 0.20995059609413147, + "learning_rate": 1.8995087945271634e-06, + "loss": 0.0021, + "step": 231260 + }, + { + "epoch": 1.4832939425910663, + "grad_norm": 0.02085651084780693, + "learning_rate": 1.8990697131290341e-06, + "loss": 0.0014, + "step": 231270 + }, + { + "epoch": 1.4833580794848524, + "grad_norm": 0.13163235783576965, + "learning_rate": 1.8986306705877782e-06, + "loss": 0.0011, + "step": 231280 + }, + { + "epoch": 1.4834222163786386, + "grad_norm": 0.02859206683933735, + "learning_rate": 1.8981916669088947e-06, + "loss": 0.0015, + "step": 231290 + }, + { + "epoch": 1.4834863532724247, + "grad_norm": 0.45611247420310974, + "learning_rate": 1.897752702097888e-06, + "loss": 0.0146, + "step": 231300 + }, + { + "epoch": 1.4835504901662109, + "grad_norm": 0.29005199670791626, + "learning_rate": 1.8973137761602572e-06, + "loss": 0.0014, + "step": 231310 + }, + { + "epoch": 1.4836146270599968, + "grad_norm": 0.03413574770092964, + "learning_rate": 1.8968748891015021e-06, + "loss": 0.0035, + "step": 231320 + }, + { + "epoch": 1.483678763953783, + "grad_norm": 0.037481214851140976, + "learning_rate": 1.896436040927121e-06, + "loss": 0.0012, + "step": 231330 + }, + { + "epoch": 1.483742900847569, + "grad_norm": 0.0579238161444664, + "learning_rate": 1.895997231642614e-06, + "loss": 0.0007, + "step": 231340 + }, + { + "epoch": 1.483807037741355, + "grad_norm": 0.027532974258065224, + "learning_rate": 1.8955584612534821e-06, + "loss": 0.0018, + "step": 231350 + }, + { + "epoch": 1.4838711746351412, + "grad_norm": 0.11463338881731033, + "learning_rate": 1.8951197297652214e-06, + "loss": 0.0013, + "step": 231360 + }, + { + "epoch": 1.4839353115289273, + "grad_norm": 0.09564340114593506, + "learning_rate": 1.8946810371833273e-06, + "loss": 0.0017, + "step": 231370 + }, + { + "epoch": 1.4839994484227135, + "grad_norm": 0.04451832175254822, + "learning_rate": 1.8942423835133012e-06, + "loss": 0.0012, + "step": 231380 + }, + { + "epoch": 1.4840635853164996, + "grad_norm": 0.09709803760051727, + "learning_rate": 1.8938037687606376e-06, + "loss": 0.0006, + "step": 231390 + }, + { + "epoch": 1.4841277222102858, + "grad_norm": 0.05841127783060074, + "learning_rate": 1.8933651929308328e-06, + "loss": 0.0015, + "step": 231400 + }, + { + "epoch": 1.4841918591040717, + "grad_norm": 0.05287181958556175, + "learning_rate": 1.8929266560293807e-06, + "loss": 0.0008, + "step": 231410 + }, + { + "epoch": 1.4842559959978578, + "grad_norm": 0.0692257508635521, + "learning_rate": 1.8924881580617804e-06, + "loss": 0.0005, + "step": 231420 + }, + { + "epoch": 1.484320132891644, + "grad_norm": 0.07513230293989182, + "learning_rate": 1.8920496990335235e-06, + "loss": 0.0005, + "step": 231430 + }, + { + "epoch": 1.48438426978543, + "grad_norm": 0.2818194031715393, + "learning_rate": 1.8916112789501057e-06, + "loss": 0.0016, + "step": 231440 + }, + { + "epoch": 1.484448406679216, + "grad_norm": 0.012203599326312542, + "learning_rate": 1.891172897817018e-06, + "loss": 0.0012, + "step": 231450 + }, + { + "epoch": 1.4845125435730022, + "grad_norm": 0.02082725055515766, + "learning_rate": 1.8907345556397577e-06, + "loss": 0.0011, + "step": 231460 + }, + { + "epoch": 1.4845766804667884, + "grad_norm": 0.12193366885185242, + "learning_rate": 1.8902962524238156e-06, + "loss": 0.0009, + "step": 231470 + }, + { + "epoch": 1.4846408173605745, + "grad_norm": 0.1180684044957161, + "learning_rate": 1.8898579881746842e-06, + "loss": 0.0015, + "step": 231480 + }, + { + "epoch": 1.4847049542543604, + "grad_norm": 0.0809510126709938, + "learning_rate": 1.8894197628978528e-06, + "loss": 0.0012, + "step": 231490 + }, + { + "epoch": 1.4847690911481466, + "grad_norm": 0.17628474533557892, + "learning_rate": 1.8889815765988168e-06, + "loss": 0.0015, + "step": 231500 + }, + { + "epoch": 1.4848332280419327, + "grad_norm": 0.05376352742314339, + "learning_rate": 1.8885434292830652e-06, + "loss": 0.0037, + "step": 231510 + }, + { + "epoch": 1.4848973649357187, + "grad_norm": 0.02923703007400036, + "learning_rate": 1.8881053209560857e-06, + "loss": 0.0009, + "step": 231520 + }, + { + "epoch": 1.4849615018295048, + "grad_norm": 0.026265686377882957, + "learning_rate": 1.8876672516233729e-06, + "loss": 0.0013, + "step": 231530 + }, + { + "epoch": 1.485025638723291, + "grad_norm": 0.06677863746881485, + "learning_rate": 1.8872292212904136e-06, + "loss": 0.0018, + "step": 231540 + }, + { + "epoch": 1.4850897756170771, + "grad_norm": 0.06208925321698189, + "learning_rate": 1.8867912299626973e-06, + "loss": 0.0014, + "step": 231550 + }, + { + "epoch": 1.4851539125108633, + "grad_norm": 0.1605261266231537, + "learning_rate": 1.8863532776457094e-06, + "loss": 0.0018, + "step": 231560 + }, + { + "epoch": 1.4852180494046494, + "grad_norm": 0.039795663207769394, + "learning_rate": 1.8859153643449424e-06, + "loss": 0.0008, + "step": 231570 + }, + { + "epoch": 1.4852821862984353, + "grad_norm": 0.0179697647690773, + "learning_rate": 1.8854774900658812e-06, + "loss": 0.0011, + "step": 231580 + }, + { + "epoch": 1.4853463231922215, + "grad_norm": 0.1934625506401062, + "learning_rate": 1.8850396548140133e-06, + "loss": 0.0011, + "step": 231590 + }, + { + "epoch": 1.4854104600860076, + "grad_norm": 0.07390321791172028, + "learning_rate": 1.8846018585948228e-06, + "loss": 0.0013, + "step": 231600 + }, + { + "epoch": 1.4854745969797936, + "grad_norm": 0.06746132671833038, + "learning_rate": 1.8841641014137995e-06, + "loss": 0.0008, + "step": 231610 + }, + { + "epoch": 1.4855387338735797, + "grad_norm": 0.06024260073900223, + "learning_rate": 1.8837263832764268e-06, + "loss": 0.0018, + "step": 231620 + }, + { + "epoch": 1.4856028707673659, + "grad_norm": 0.06886883825063705, + "learning_rate": 1.8832887041881876e-06, + "loss": 0.0014, + "step": 231630 + }, + { + "epoch": 1.485667007661152, + "grad_norm": 0.05687910318374634, + "learning_rate": 1.8828510641545705e-06, + "loss": 0.0008, + "step": 231640 + }, + { + "epoch": 1.4857311445549382, + "grad_norm": 0.08611790090799332, + "learning_rate": 1.8824134631810575e-06, + "loss": 0.0021, + "step": 231650 + }, + { + "epoch": 1.485795281448724, + "grad_norm": 0.028475651517510414, + "learning_rate": 1.8819759012731314e-06, + "loss": 0.0015, + "step": 231660 + }, + { + "epoch": 1.4858594183425102, + "grad_norm": 0.08990021049976349, + "learning_rate": 1.8815383784362745e-06, + "loss": 0.0017, + "step": 231670 + }, + { + "epoch": 1.4859235552362964, + "grad_norm": 0.04032914713025093, + "learning_rate": 1.8811008946759718e-06, + "loss": 0.0006, + "step": 231680 + }, + { + "epoch": 1.4859876921300825, + "grad_norm": 0.061242811381816864, + "learning_rate": 1.880663449997704e-06, + "loss": 0.0014, + "step": 231690 + }, + { + "epoch": 1.4860518290238685, + "grad_norm": 0.05517999827861786, + "learning_rate": 1.8802260444069526e-06, + "loss": 0.0022, + "step": 231700 + }, + { + "epoch": 1.4861159659176546, + "grad_norm": 0.046025365591049194, + "learning_rate": 1.8797886779091968e-06, + "loss": 0.0012, + "step": 231710 + }, + { + "epoch": 1.4861801028114408, + "grad_norm": 0.01580861397087574, + "learning_rate": 1.879351350509921e-06, + "loss": 0.0009, + "step": 231720 + }, + { + "epoch": 1.486244239705227, + "grad_norm": 0.115535669028759, + "learning_rate": 1.8789140622146025e-06, + "loss": 0.0009, + "step": 231730 + }, + { + "epoch": 1.486308376599013, + "grad_norm": 0.08806314319372177, + "learning_rate": 1.8784768130287211e-06, + "loss": 0.001, + "step": 231740 + }, + { + "epoch": 1.486372513492799, + "grad_norm": 0.17606164515018463, + "learning_rate": 1.8780396029577551e-06, + "loss": 0.0015, + "step": 231750 + }, + { + "epoch": 1.4864366503865851, + "grad_norm": 0.01907699555158615, + "learning_rate": 1.8776024320071856e-06, + "loss": 0.0012, + "step": 231760 + }, + { + "epoch": 1.4865007872803713, + "grad_norm": 0.09007932245731354, + "learning_rate": 1.8771653001824891e-06, + "loss": 0.0007, + "step": 231770 + }, + { + "epoch": 1.4865649241741572, + "grad_norm": 0.11589324474334717, + "learning_rate": 1.876728207489142e-06, + "loss": 0.0009, + "step": 231780 + }, + { + "epoch": 1.4866290610679433, + "grad_norm": 0.05713924765586853, + "learning_rate": 1.8762911539326234e-06, + "loss": 0.0021, + "step": 231790 + }, + { + "epoch": 1.4866931979617295, + "grad_norm": 0.04543076828122139, + "learning_rate": 1.8758541395184099e-06, + "loss": 0.001, + "step": 231800 + }, + { + "epoch": 1.4867573348555156, + "grad_norm": 0.0943898931145668, + "learning_rate": 1.8754171642519763e-06, + "loss": 0.0013, + "step": 231810 + }, + { + "epoch": 1.4868214717493018, + "grad_norm": 0.03184668347239494, + "learning_rate": 1.8749802281387975e-06, + "loss": 0.001, + "step": 231820 + }, + { + "epoch": 1.486885608643088, + "grad_norm": 0.06906899809837341, + "learning_rate": 1.8745433311843514e-06, + "loss": 0.0023, + "step": 231830 + }, + { + "epoch": 1.4869497455368739, + "grad_norm": 0.050902411341667175, + "learning_rate": 1.8741064733941112e-06, + "loss": 0.001, + "step": 231840 + }, + { + "epoch": 1.48701388243066, + "grad_norm": 0.0598636195063591, + "learning_rate": 1.8736696547735505e-06, + "loss": 0.0016, + "step": 231850 + }, + { + "epoch": 1.4870780193244462, + "grad_norm": 0.12425347417593002, + "learning_rate": 1.873232875328142e-06, + "loss": 0.0024, + "step": 231860 + }, + { + "epoch": 1.487142156218232, + "grad_norm": 0.04981955885887146, + "learning_rate": 1.8727961350633622e-06, + "loss": 0.001, + "step": 231870 + }, + { + "epoch": 1.4872062931120182, + "grad_norm": 0.0955575481057167, + "learning_rate": 1.8723594339846813e-06, + "loss": 0.0014, + "step": 231880 + }, + { + "epoch": 1.4872704300058044, + "grad_norm": 0.04553009197115898, + "learning_rate": 1.8719227720975725e-06, + "loss": 0.0019, + "step": 231890 + }, + { + "epoch": 1.4873345668995905, + "grad_norm": 0.09013471007347107, + "learning_rate": 1.8714861494075048e-06, + "loss": 0.0012, + "step": 231900 + }, + { + "epoch": 1.4873987037933767, + "grad_norm": 0.07908090204000473, + "learning_rate": 1.8710495659199535e-06, + "loss": 0.0026, + "step": 231910 + }, + { + "epoch": 1.4874628406871626, + "grad_norm": 0.08770821243524551, + "learning_rate": 1.8706130216403872e-06, + "loss": 0.0026, + "step": 231920 + }, + { + "epoch": 1.4875269775809488, + "grad_norm": 0.02814689837396145, + "learning_rate": 1.8701765165742746e-06, + "loss": 0.0008, + "step": 231930 + }, + { + "epoch": 1.487591114474735, + "grad_norm": 0.05187418684363365, + "learning_rate": 1.8697400507270891e-06, + "loss": 0.0011, + "step": 231940 + }, + { + "epoch": 1.4876552513685208, + "grad_norm": 0.05968382582068443, + "learning_rate": 1.8693036241042978e-06, + "loss": 0.0016, + "step": 231950 + }, + { + "epoch": 1.487719388262307, + "grad_norm": 0.08687455952167511, + "learning_rate": 1.8688672367113691e-06, + "loss": 0.0005, + "step": 231960 + }, + { + "epoch": 1.4877835251560931, + "grad_norm": 0.04117651283740997, + "learning_rate": 1.8684308885537704e-06, + "loss": 0.0014, + "step": 231970 + }, + { + "epoch": 1.4878476620498793, + "grad_norm": 0.10416851192712784, + "learning_rate": 1.8679945796369725e-06, + "loss": 0.001, + "step": 231980 + }, + { + "epoch": 1.4879117989436654, + "grad_norm": 0.059600580483675, + "learning_rate": 1.8675583099664413e-06, + "loss": 0.001, + "step": 231990 + }, + { + "epoch": 1.4879759358374516, + "grad_norm": 0.04816209897398949, + "learning_rate": 1.8671220795476424e-06, + "loss": 0.0005, + "step": 232000 + }, + { + "epoch": 1.4880400727312375, + "grad_norm": 0.018877891823649406, + "learning_rate": 1.8666858883860417e-06, + "loss": 0.0016, + "step": 232010 + }, + { + "epoch": 1.4881042096250237, + "grad_norm": 0.11844898760318756, + "learning_rate": 1.866249736487108e-06, + "loss": 0.0009, + "step": 232020 + }, + { + "epoch": 1.4881683465188098, + "grad_norm": 0.07525431364774704, + "learning_rate": 1.8658136238563041e-06, + "loss": 0.0008, + "step": 232030 + }, + { + "epoch": 1.4882324834125957, + "grad_norm": 0.08898802101612091, + "learning_rate": 1.8653775504990962e-06, + "loss": 0.0013, + "step": 232040 + }, + { + "epoch": 1.4882966203063819, + "grad_norm": 0.11834735423326492, + "learning_rate": 1.8649415164209455e-06, + "loss": 0.0009, + "step": 232050 + }, + { + "epoch": 1.488360757200168, + "grad_norm": 0.09863437712192535, + "learning_rate": 1.8645055216273206e-06, + "loss": 0.001, + "step": 232060 + }, + { + "epoch": 1.4884248940939542, + "grad_norm": 0.07873088121414185, + "learning_rate": 1.8640695661236818e-06, + "loss": 0.0014, + "step": 232070 + }, + { + "epoch": 1.4884890309877403, + "grad_norm": 0.030200175940990448, + "learning_rate": 1.863633649915491e-06, + "loss": 0.0028, + "step": 232080 + }, + { + "epoch": 1.4885531678815263, + "grad_norm": 0.038778483867645264, + "learning_rate": 1.863197773008214e-06, + "loss": 0.001, + "step": 232090 + }, + { + "epoch": 1.4886173047753124, + "grad_norm": 0.06872984766960144, + "learning_rate": 1.862761935407309e-06, + "loss": 0.0007, + "step": 232100 + }, + { + "epoch": 1.4886814416690985, + "grad_norm": 0.15356433391571045, + "learning_rate": 1.8623261371182405e-06, + "loss": 0.0011, + "step": 232110 + }, + { + "epoch": 1.4887455785628847, + "grad_norm": 0.06912410259246826, + "learning_rate": 1.861890378146468e-06, + "loss": 0.0012, + "step": 232120 + }, + { + "epoch": 1.4888097154566706, + "grad_norm": 0.010716703720390797, + "learning_rate": 1.8614546584974503e-06, + "loss": 0.0021, + "step": 232130 + }, + { + "epoch": 1.4888738523504568, + "grad_norm": 0.28601473569869995, + "learning_rate": 1.8610189781766507e-06, + "loss": 0.0026, + "step": 232140 + }, + { + "epoch": 1.488937989244243, + "grad_norm": 0.0352194719016552, + "learning_rate": 1.8605833371895266e-06, + "loss": 0.0009, + "step": 232150 + }, + { + "epoch": 1.489002126138029, + "grad_norm": 0.2125111073255539, + "learning_rate": 1.860147735541537e-06, + "loss": 0.001, + "step": 232160 + }, + { + "epoch": 1.4890662630318152, + "grad_norm": 0.08166087418794632, + "learning_rate": 1.8597121732381384e-06, + "loss": 0.0007, + "step": 232170 + }, + { + "epoch": 1.4891303999256011, + "grad_norm": 0.08743234723806381, + "learning_rate": 1.8592766502847925e-06, + "loss": 0.0012, + "step": 232180 + }, + { + "epoch": 1.4891945368193873, + "grad_norm": 0.06199329346418381, + "learning_rate": 1.8588411666869544e-06, + "loss": 0.0008, + "step": 232190 + }, + { + "epoch": 1.4892586737131734, + "grad_norm": 0.023580171167850494, + "learning_rate": 1.8584057224500801e-06, + "loss": 0.0007, + "step": 232200 + }, + { + "epoch": 1.4893228106069594, + "grad_norm": 0.01461642887443304, + "learning_rate": 1.8579703175796293e-06, + "loss": 0.0012, + "step": 232210 + }, + { + "epoch": 1.4893869475007455, + "grad_norm": 0.007897169329226017, + "learning_rate": 1.8575349520810553e-06, + "loss": 0.0013, + "step": 232220 + }, + { + "epoch": 1.4894510843945317, + "grad_norm": 0.04625585302710533, + "learning_rate": 1.8570996259598152e-06, + "loss": 0.0013, + "step": 232230 + }, + { + "epoch": 1.4895152212883178, + "grad_norm": 0.05158587545156479, + "learning_rate": 1.8566643392213607e-06, + "loss": 0.0009, + "step": 232240 + }, + { + "epoch": 1.489579358182104, + "grad_norm": 0.045665670186281204, + "learning_rate": 1.8562290918711502e-06, + "loss": 0.0006, + "step": 232250 + }, + { + "epoch": 1.4896434950758901, + "grad_norm": 0.003189841518178582, + "learning_rate": 1.8557938839146362e-06, + "loss": 0.0026, + "step": 232260 + }, + { + "epoch": 1.489707631969676, + "grad_norm": 0.16319698095321655, + "learning_rate": 1.8553587153572716e-06, + "loss": 0.0016, + "step": 232270 + }, + { + "epoch": 1.4897717688634622, + "grad_norm": 0.17188839614391327, + "learning_rate": 1.8549235862045085e-06, + "loss": 0.0015, + "step": 232280 + }, + { + "epoch": 1.4898359057572483, + "grad_norm": 0.09171821922063828, + "learning_rate": 1.8544884964618021e-06, + "loss": 0.0013, + "step": 232290 + }, + { + "epoch": 1.4899000426510343, + "grad_norm": 0.060288283973932266, + "learning_rate": 1.8540534461346027e-06, + "loss": 0.0014, + "step": 232300 + }, + { + "epoch": 1.4899641795448204, + "grad_norm": 0.05129498243331909, + "learning_rate": 1.853618435228362e-06, + "loss": 0.0013, + "step": 232310 + }, + { + "epoch": 1.4900283164386066, + "grad_norm": 0.11541414260864258, + "learning_rate": 1.8531834637485297e-06, + "loss": 0.0021, + "step": 232320 + }, + { + "epoch": 1.4900924533323927, + "grad_norm": 0.08661103248596191, + "learning_rate": 1.852748531700559e-06, + "loss": 0.0017, + "step": 232330 + }, + { + "epoch": 1.4901565902261789, + "grad_norm": 0.05976423621177673, + "learning_rate": 1.8523136390898982e-06, + "loss": 0.0017, + "step": 232340 + }, + { + "epoch": 1.4902207271199648, + "grad_norm": 0.11782398074865341, + "learning_rate": 1.851878785921996e-06, + "loss": 0.0012, + "step": 232350 + }, + { + "epoch": 1.490284864013751, + "grad_norm": 0.03229362145066261, + "learning_rate": 1.8514439722023043e-06, + "loss": 0.0017, + "step": 232360 + }, + { + "epoch": 1.490349000907537, + "grad_norm": 0.20834103226661682, + "learning_rate": 1.8510091979362693e-06, + "loss": 0.002, + "step": 232370 + }, + { + "epoch": 1.490413137801323, + "grad_norm": 0.08213939517736435, + "learning_rate": 1.85057446312934e-06, + "loss": 0.0009, + "step": 232380 + }, + { + "epoch": 1.4904772746951092, + "grad_norm": 0.06685581803321838, + "learning_rate": 1.850139767786962e-06, + "loss": 0.0009, + "step": 232390 + }, + { + "epoch": 1.4905414115888953, + "grad_norm": 0.1658812165260315, + "learning_rate": 1.8497051119145854e-06, + "loss": 0.0011, + "step": 232400 + }, + { + "epoch": 1.4906055484826815, + "grad_norm": 0.10321488976478577, + "learning_rate": 1.849270495517655e-06, + "loss": 0.0012, + "step": 232410 + }, + { + "epoch": 1.4906696853764676, + "grad_norm": 0.11038286238908768, + "learning_rate": 1.8488359186016168e-06, + "loss": 0.0012, + "step": 232420 + }, + { + "epoch": 1.4907338222702538, + "grad_norm": 0.1400638371706009, + "learning_rate": 1.848401381171916e-06, + "loss": 0.002, + "step": 232430 + }, + { + "epoch": 1.4907979591640397, + "grad_norm": 0.006335919257253408, + "learning_rate": 1.8479668832339992e-06, + "loss": 0.0012, + "step": 232440 + }, + { + "epoch": 1.4908620960578258, + "grad_norm": 0.1237395852804184, + "learning_rate": 1.8475324247933103e-06, + "loss": 0.0014, + "step": 232450 + }, + { + "epoch": 1.490926232951612, + "grad_norm": 0.046299826353788376, + "learning_rate": 1.847098005855293e-06, + "loss": 0.0024, + "step": 232460 + }, + { + "epoch": 1.490990369845398, + "grad_norm": 0.0490373857319355, + "learning_rate": 1.8466636264253895e-06, + "loss": 0.001, + "step": 232470 + }, + { + "epoch": 1.491054506739184, + "grad_norm": 0.09268473088741302, + "learning_rate": 1.8462292865090458e-06, + "loss": 0.0014, + "step": 232480 + }, + { + "epoch": 1.4911186436329702, + "grad_norm": 0.07769180834293365, + "learning_rate": 1.8457949861117035e-06, + "loss": 0.0016, + "step": 232490 + }, + { + "epoch": 1.4911827805267563, + "grad_norm": 0.03023562766611576, + "learning_rate": 1.8453607252388022e-06, + "loss": 0.0027, + "step": 232500 + }, + { + "epoch": 1.4912469174205425, + "grad_norm": 0.1785176545381546, + "learning_rate": 1.8449265038957882e-06, + "loss": 0.0019, + "step": 232510 + }, + { + "epoch": 1.4913110543143284, + "grad_norm": 0.02365567721426487, + "learning_rate": 1.844492322088099e-06, + "loss": 0.0009, + "step": 232520 + }, + { + "epoch": 1.4913751912081146, + "grad_norm": 0.02931872569024563, + "learning_rate": 1.8440581798211765e-06, + "loss": 0.0008, + "step": 232530 + }, + { + "epoch": 1.4914393281019007, + "grad_norm": 0.13341720402240753, + "learning_rate": 1.8436240771004587e-06, + "loss": 0.0012, + "step": 232540 + }, + { + "epoch": 1.4915034649956869, + "grad_norm": 0.043740373104810715, + "learning_rate": 1.843190013931389e-06, + "loss": 0.0007, + "step": 232550 + }, + { + "epoch": 1.4915676018894728, + "grad_norm": 0.13154718279838562, + "learning_rate": 1.8427559903194049e-06, + "loss": 0.0014, + "step": 232560 + }, + { + "epoch": 1.491631738783259, + "grad_norm": 0.0338272899389267, + "learning_rate": 1.8423220062699442e-06, + "loss": 0.0009, + "step": 232570 + }, + { + "epoch": 1.491695875677045, + "grad_norm": 0.06865895539522171, + "learning_rate": 1.8418880617884438e-06, + "loss": 0.0013, + "step": 232580 + }, + { + "epoch": 1.4917600125708312, + "grad_norm": 0.033184465020895004, + "learning_rate": 1.841454156880345e-06, + "loss": 0.0009, + "step": 232590 + }, + { + "epoch": 1.4918241494646174, + "grad_norm": 0.021861722692847252, + "learning_rate": 1.8410202915510827e-06, + "loss": 0.0007, + "step": 232600 + }, + { + "epoch": 1.4918882863584033, + "grad_norm": 0.028117230162024498, + "learning_rate": 1.840586465806094e-06, + "loss": 0.0009, + "step": 232610 + }, + { + "epoch": 1.4919524232521895, + "grad_norm": 0.035108163952827454, + "learning_rate": 1.8401526796508135e-06, + "loss": 0.0005, + "step": 232620 + }, + { + "epoch": 1.4920165601459756, + "grad_norm": 0.1248382031917572, + "learning_rate": 1.8397189330906794e-06, + "loss": 0.0017, + "step": 232630 + }, + { + "epoch": 1.4920806970397615, + "grad_norm": 0.08763181418180466, + "learning_rate": 1.839285226131126e-06, + "loss": 0.0008, + "step": 232640 + }, + { + "epoch": 1.4921448339335477, + "grad_norm": 0.016087012365460396, + "learning_rate": 1.8388515587775857e-06, + "loss": 0.002, + "step": 232650 + }, + { + "epoch": 1.4922089708273338, + "grad_norm": 0.06415554136037827, + "learning_rate": 1.8384179310354966e-06, + "loss": 0.0007, + "step": 232660 + }, + { + "epoch": 1.49227310772112, + "grad_norm": 0.0232497900724411, + "learning_rate": 1.83798434291029e-06, + "loss": 0.0013, + "step": 232670 + }, + { + "epoch": 1.4923372446149061, + "grad_norm": 0.10639127343893051, + "learning_rate": 1.8375507944074e-06, + "loss": 0.0012, + "step": 232680 + }, + { + "epoch": 1.4924013815086923, + "grad_norm": 0.03844868019223213, + "learning_rate": 1.8371172855322567e-06, + "loss": 0.0022, + "step": 232690 + }, + { + "epoch": 1.4924655184024782, + "grad_norm": 0.05413614958524704, + "learning_rate": 1.8366838162902956e-06, + "loss": 0.001, + "step": 232700 + }, + { + "epoch": 1.4925296552962644, + "grad_norm": 0.0037933976855129004, + "learning_rate": 1.8362503866869474e-06, + "loss": 0.0012, + "step": 232710 + }, + { + "epoch": 1.4925937921900505, + "grad_norm": 0.0512174591422081, + "learning_rate": 1.8358169967276435e-06, + "loss": 0.0011, + "step": 232720 + }, + { + "epoch": 1.4926579290838364, + "grad_norm": 0.04273045063018799, + "learning_rate": 1.8353836464178115e-06, + "loss": 0.0008, + "step": 232730 + }, + { + "epoch": 1.4927220659776226, + "grad_norm": 0.03175722062587738, + "learning_rate": 1.834950335762886e-06, + "loss": 0.0013, + "step": 232740 + }, + { + "epoch": 1.4927862028714087, + "grad_norm": 0.07337773591279984, + "learning_rate": 1.8345170647682953e-06, + "loss": 0.0009, + "step": 232750 + }, + { + "epoch": 1.4928503397651949, + "grad_norm": 0.03609747067093849, + "learning_rate": 1.8340838334394679e-06, + "loss": 0.0012, + "step": 232760 + }, + { + "epoch": 1.492914476658981, + "grad_norm": 0.08420764654874802, + "learning_rate": 1.8336506417818306e-06, + "loss": 0.0016, + "step": 232770 + }, + { + "epoch": 1.492978613552767, + "grad_norm": 0.11893970519304276, + "learning_rate": 1.833217489800816e-06, + "loss": 0.0025, + "step": 232780 + }, + { + "epoch": 1.493042750446553, + "grad_norm": 0.023968994617462158, + "learning_rate": 1.8327843775018495e-06, + "loss": 0.001, + "step": 232790 + }, + { + "epoch": 1.4931068873403393, + "grad_norm": 0.13853110373020172, + "learning_rate": 1.8323513048903563e-06, + "loss": 0.0013, + "step": 232800 + }, + { + "epoch": 1.4931710242341254, + "grad_norm": 0.04194265604019165, + "learning_rate": 1.8319182719717665e-06, + "loss": 0.001, + "step": 232810 + }, + { + "epoch": 1.4932351611279113, + "grad_norm": 0.028974315151572227, + "learning_rate": 1.8314852787515053e-06, + "loss": 0.0007, + "step": 232820 + }, + { + "epoch": 1.4932992980216975, + "grad_norm": 0.01592678390443325, + "learning_rate": 1.8310523252349982e-06, + "loss": 0.0019, + "step": 232830 + }, + { + "epoch": 1.4933634349154836, + "grad_norm": 0.024703364819288254, + "learning_rate": 1.8306194114276687e-06, + "loss": 0.002, + "step": 232840 + }, + { + "epoch": 1.4934275718092698, + "grad_norm": 0.13141685724258423, + "learning_rate": 1.8301865373349448e-06, + "loss": 0.0014, + "step": 232850 + }, + { + "epoch": 1.493491708703056, + "grad_norm": 0.10000346601009369, + "learning_rate": 1.8297537029622487e-06, + "loss": 0.0012, + "step": 232860 + }, + { + "epoch": 1.4935558455968418, + "grad_norm": 0.021517250686883926, + "learning_rate": 1.8293209083150027e-06, + "loss": 0.0008, + "step": 232870 + }, + { + "epoch": 1.493619982490628, + "grad_norm": 0.032985907047986984, + "learning_rate": 1.8288881533986335e-06, + "loss": 0.0012, + "step": 232880 + }, + { + "epoch": 1.4936841193844141, + "grad_norm": 0.17923562228679657, + "learning_rate": 1.8284554382185604e-06, + "loss": 0.0019, + "step": 232890 + }, + { + "epoch": 1.4937482562782, + "grad_norm": 0.048135485500097275, + "learning_rate": 1.8280227627802093e-06, + "loss": 0.0011, + "step": 232900 + }, + { + "epoch": 1.4938123931719862, + "grad_norm": 0.2491879016160965, + "learning_rate": 1.827590127089e-06, + "loss": 0.0008, + "step": 232910 + }, + { + "epoch": 1.4938765300657724, + "grad_norm": 0.06684473156929016, + "learning_rate": 1.8271575311503514e-06, + "loss": 0.0005, + "step": 232920 + }, + { + "epoch": 1.4939406669595585, + "grad_norm": 0.13278910517692566, + "learning_rate": 1.8267249749696886e-06, + "loss": 0.0012, + "step": 232930 + }, + { + "epoch": 1.4940048038533447, + "grad_norm": 0.016812479123473167, + "learning_rate": 1.82629245855243e-06, + "loss": 0.0019, + "step": 232940 + }, + { + "epoch": 1.4940689407471308, + "grad_norm": 0.0660853311419487, + "learning_rate": 1.8258599819039952e-06, + "loss": 0.0007, + "step": 232950 + }, + { + "epoch": 1.4941330776409167, + "grad_norm": 0.05519381910562515, + "learning_rate": 1.825427545029801e-06, + "loss": 0.0006, + "step": 232960 + }, + { + "epoch": 1.4941972145347029, + "grad_norm": 0.04533018544316292, + "learning_rate": 1.8249951479352707e-06, + "loss": 0.0006, + "step": 232970 + }, + { + "epoch": 1.494261351428489, + "grad_norm": 0.05164116993546486, + "learning_rate": 1.8245627906258201e-06, + "loss": 0.0023, + "step": 232980 + }, + { + "epoch": 1.494325488322275, + "grad_norm": 0.012422630563378334, + "learning_rate": 1.8241304731068671e-06, + "loss": 0.0008, + "step": 232990 + }, + { + "epoch": 1.4943896252160611, + "grad_norm": 0.17956890165805817, + "learning_rate": 1.8236981953838278e-06, + "loss": 0.0011, + "step": 233000 + }, + { + "epoch": 1.4944537621098473, + "grad_norm": 0.12385495007038116, + "learning_rate": 1.8232659574621215e-06, + "loss": 0.0009, + "step": 233010 + }, + { + "epoch": 1.4945178990036334, + "grad_norm": 0.14306782186031342, + "learning_rate": 1.8228337593471635e-06, + "loss": 0.0016, + "step": 233020 + }, + { + "epoch": 1.4945820358974196, + "grad_norm": 0.05540962889790535, + "learning_rate": 1.8224016010443685e-06, + "loss": 0.0006, + "step": 233030 + }, + { + "epoch": 1.4946461727912055, + "grad_norm": 0.18952268362045288, + "learning_rate": 1.8219694825591511e-06, + "loss": 0.0007, + "step": 233040 + }, + { + "epoch": 1.4947103096849916, + "grad_norm": 0.2383899837732315, + "learning_rate": 1.8215374038969292e-06, + "loss": 0.0015, + "step": 233050 + }, + { + "epoch": 1.4947744465787778, + "grad_norm": 0.1607988029718399, + "learning_rate": 1.8211053650631155e-06, + "loss": 0.0008, + "step": 233060 + }, + { + "epoch": 1.4948385834725637, + "grad_norm": 0.04241831228137016, + "learning_rate": 1.820673366063121e-06, + "loss": 0.0021, + "step": 233070 + }, + { + "epoch": 1.4949027203663499, + "grad_norm": 0.10005103796720505, + "learning_rate": 1.820241406902364e-06, + "loss": 0.0019, + "step": 233080 + }, + { + "epoch": 1.494966857260136, + "grad_norm": 0.04470842704176903, + "learning_rate": 1.8198094875862543e-06, + "loss": 0.0014, + "step": 233090 + }, + { + "epoch": 1.4950309941539222, + "grad_norm": 0.021665040403604507, + "learning_rate": 1.8193776081202048e-06, + "loss": 0.0009, + "step": 233100 + }, + { + "epoch": 1.4950951310477083, + "grad_norm": 0.057689227163791656, + "learning_rate": 1.8189457685096246e-06, + "loss": 0.0027, + "step": 233110 + }, + { + "epoch": 1.4951592679414945, + "grad_norm": 0.02833256684243679, + "learning_rate": 1.8185139687599297e-06, + "loss": 0.0011, + "step": 233120 + }, + { + "epoch": 1.4952234048352804, + "grad_norm": 0.0563499815762043, + "learning_rate": 1.8180822088765288e-06, + "loss": 0.001, + "step": 233130 + }, + { + "epoch": 1.4952875417290665, + "grad_norm": 0.016010211780667305, + "learning_rate": 1.8176504888648312e-06, + "loss": 0.0008, + "step": 233140 + }, + { + "epoch": 1.4953516786228527, + "grad_norm": 0.042862121015787125, + "learning_rate": 1.8172188087302461e-06, + "loss": 0.0014, + "step": 233150 + }, + { + "epoch": 1.4954158155166386, + "grad_norm": 0.057495709508657455, + "learning_rate": 1.8167871684781856e-06, + "loss": 0.001, + "step": 233160 + }, + { + "epoch": 1.4954799524104248, + "grad_norm": 0.08322770148515701, + "learning_rate": 1.816355568114057e-06, + "loss": 0.0011, + "step": 233170 + }, + { + "epoch": 1.495544089304211, + "grad_norm": 0.04599379003047943, + "learning_rate": 1.8159240076432682e-06, + "loss": 0.0012, + "step": 233180 + }, + { + "epoch": 1.495608226197997, + "grad_norm": 0.08682698011398315, + "learning_rate": 1.815492487071226e-06, + "loss": 0.0013, + "step": 233190 + }, + { + "epoch": 1.4956723630917832, + "grad_norm": 0.08469253778457642, + "learning_rate": 1.8150610064033404e-06, + "loss": 0.0016, + "step": 233200 + }, + { + "epoch": 1.4957364999855691, + "grad_norm": 0.0031433776021003723, + "learning_rate": 1.8146295656450164e-06, + "loss": 0.0011, + "step": 233210 + }, + { + "epoch": 1.4958006368793553, + "grad_norm": 0.029707908630371094, + "learning_rate": 1.8141981648016587e-06, + "loss": 0.0007, + "step": 233220 + }, + { + "epoch": 1.4958647737731414, + "grad_norm": 0.08981744199991226, + "learning_rate": 1.8137668038786765e-06, + "loss": 0.0024, + "step": 233230 + }, + { + "epoch": 1.4959289106669276, + "grad_norm": 0.29700136184692383, + "learning_rate": 1.8133354828814736e-06, + "loss": 0.0013, + "step": 233240 + }, + { + "epoch": 1.4959930475607135, + "grad_norm": 0.019079823046922684, + "learning_rate": 1.8129042018154546e-06, + "loss": 0.0015, + "step": 233250 + }, + { + "epoch": 1.4960571844544996, + "grad_norm": 0.09006282687187195, + "learning_rate": 1.8124729606860225e-06, + "loss": 0.0008, + "step": 233260 + }, + { + "epoch": 1.4961213213482858, + "grad_norm": 0.13939876854419708, + "learning_rate": 1.8120417594985834e-06, + "loss": 0.001, + "step": 233270 + }, + { + "epoch": 1.496185458242072, + "grad_norm": 0.1402837634086609, + "learning_rate": 1.8116105982585397e-06, + "loss": 0.0012, + "step": 233280 + }, + { + "epoch": 1.496249595135858, + "grad_norm": 0.1734987199306488, + "learning_rate": 1.8111794769712937e-06, + "loss": 0.0038, + "step": 233290 + }, + { + "epoch": 1.496313732029644, + "grad_norm": 0.05005556717514992, + "learning_rate": 1.8107483956422462e-06, + "loss": 0.0024, + "step": 233300 + }, + { + "epoch": 1.4963778689234302, + "grad_norm": 0.1206803098320961, + "learning_rate": 1.8103173542768026e-06, + "loss": 0.0017, + "step": 233310 + }, + { + "epoch": 1.4964420058172163, + "grad_norm": 0.14100918173789978, + "learning_rate": 1.8098863528803616e-06, + "loss": 0.0013, + "step": 233320 + }, + { + "epoch": 1.4965061427110022, + "grad_norm": 0.16625139117240906, + "learning_rate": 1.8094553914583235e-06, + "loss": 0.001, + "step": 233330 + }, + { + "epoch": 1.4965702796047884, + "grad_norm": 0.019594913348555565, + "learning_rate": 1.8090244700160909e-06, + "loss": 0.0007, + "step": 233340 + }, + { + "epoch": 1.4966344164985745, + "grad_norm": 0.04821164533495903, + "learning_rate": 1.8085935885590627e-06, + "loss": 0.0012, + "step": 233350 + }, + { + "epoch": 1.4966985533923607, + "grad_norm": 0.04124406352639198, + "learning_rate": 1.8081627470926372e-06, + "loss": 0.0033, + "step": 233360 + }, + { + "epoch": 1.4967626902861468, + "grad_norm": 0.08549986779689789, + "learning_rate": 1.807731945622212e-06, + "loss": 0.0015, + "step": 233370 + }, + { + "epoch": 1.496826827179933, + "grad_norm": 0.07066275924444199, + "learning_rate": 1.807301184153189e-06, + "loss": 0.0008, + "step": 233380 + }, + { + "epoch": 1.496890964073719, + "grad_norm": 0.00854248832911253, + "learning_rate": 1.806870462690964e-06, + "loss": 0.0028, + "step": 233390 + }, + { + "epoch": 1.496955100967505, + "grad_norm": 0.10337699204683304, + "learning_rate": 1.806439781240934e-06, + "loss": 0.0007, + "step": 233400 + }, + { + "epoch": 1.4970192378612912, + "grad_norm": 0.06711836904287338, + "learning_rate": 1.806009139808494e-06, + "loss": 0.0011, + "step": 233410 + }, + { + "epoch": 1.4970833747550771, + "grad_norm": 0.07099798321723938, + "learning_rate": 1.8055785383990442e-06, + "loss": 0.0016, + "step": 233420 + }, + { + "epoch": 1.4971475116488633, + "grad_norm": 0.00854500662535429, + "learning_rate": 1.805147977017978e-06, + "loss": 0.0011, + "step": 233430 + }, + { + "epoch": 1.4972116485426494, + "grad_norm": 0.026742344722151756, + "learning_rate": 1.8047174556706914e-06, + "loss": 0.0013, + "step": 233440 + }, + { + "epoch": 1.4972757854364356, + "grad_norm": 0.00517117977142334, + "learning_rate": 1.804286974362577e-06, + "loss": 0.0013, + "step": 233450 + }, + { + "epoch": 1.4973399223302217, + "grad_norm": 0.08385056257247925, + "learning_rate": 1.803856533099032e-06, + "loss": 0.0014, + "step": 233460 + }, + { + "epoch": 1.4974040592240077, + "grad_norm": 0.06410916149616241, + "learning_rate": 1.803426131885449e-06, + "loss": 0.0014, + "step": 233470 + }, + { + "epoch": 1.4974681961177938, + "grad_norm": 0.16342325508594513, + "learning_rate": 1.80299577072722e-06, + "loss": 0.0014, + "step": 233480 + }, + { + "epoch": 1.49753233301158, + "grad_norm": 0.12051145732402802, + "learning_rate": 1.80256544962974e-06, + "loss": 0.0014, + "step": 233490 + }, + { + "epoch": 1.4975964699053659, + "grad_norm": 0.10693907737731934, + "learning_rate": 1.8021351685984001e-06, + "loss": 0.001, + "step": 233500 + }, + { + "epoch": 1.497660606799152, + "grad_norm": 0.12433069199323654, + "learning_rate": 1.8017049276385924e-06, + "loss": 0.0015, + "step": 233510 + }, + { + "epoch": 1.4977247436929382, + "grad_norm": 0.023863285779953003, + "learning_rate": 1.8012747267557057e-06, + "loss": 0.0018, + "step": 233520 + }, + { + "epoch": 1.4977888805867243, + "grad_norm": 0.040423374623060226, + "learning_rate": 1.8008445659551344e-06, + "loss": 0.002, + "step": 233530 + }, + { + "epoch": 1.4978530174805105, + "grad_norm": 0.109131820499897, + "learning_rate": 1.8004144452422672e-06, + "loss": 0.0013, + "step": 233540 + }, + { + "epoch": 1.4979171543742966, + "grad_norm": 0.11498875170946121, + "learning_rate": 1.799984364622494e-06, + "loss": 0.0013, + "step": 233550 + }, + { + "epoch": 1.4979812912680825, + "grad_norm": 0.07260105013847351, + "learning_rate": 1.7995543241012014e-06, + "loss": 0.0011, + "step": 233560 + }, + { + "epoch": 1.4980454281618687, + "grad_norm": 0.11735733598470688, + "learning_rate": 1.799124323683783e-06, + "loss": 0.0018, + "step": 233570 + }, + { + "epoch": 1.4981095650556548, + "grad_norm": 0.06831369549036026, + "learning_rate": 1.7986943633756238e-06, + "loss": 0.0008, + "step": 233580 + }, + { + "epoch": 1.4981737019494408, + "grad_norm": 0.045636653900146484, + "learning_rate": 1.7982644431821122e-06, + "loss": 0.0016, + "step": 233590 + }, + { + "epoch": 1.498237838843227, + "grad_norm": 0.12789089977741241, + "learning_rate": 1.7978345631086342e-06, + "loss": 0.0007, + "step": 233600 + }, + { + "epoch": 1.498301975737013, + "grad_norm": 0.0596468448638916, + "learning_rate": 1.797404723160579e-06, + "loss": 0.0017, + "step": 233610 + }, + { + "epoch": 1.4983661126307992, + "grad_norm": 0.05858698487281799, + "learning_rate": 1.7969749233433314e-06, + "loss": 0.0012, + "step": 233620 + }, + { + "epoch": 1.4984302495245854, + "grad_norm": 0.4808373749256134, + "learning_rate": 1.7965451636622755e-06, + "loss": 0.0013, + "step": 233630 + }, + { + "epoch": 1.4984943864183713, + "grad_norm": 0.058651797473430634, + "learning_rate": 1.7961154441227986e-06, + "loss": 0.001, + "step": 233640 + }, + { + "epoch": 1.4985585233121574, + "grad_norm": 0.14847874641418457, + "learning_rate": 1.795685764730286e-06, + "loss": 0.0014, + "step": 233650 + }, + { + "epoch": 1.4986226602059436, + "grad_norm": 0.030869372189044952, + "learning_rate": 1.7952561254901214e-06, + "loss": 0.0009, + "step": 233660 + }, + { + "epoch": 1.4986867970997297, + "grad_norm": 0.0621880367398262, + "learning_rate": 1.7948265264076876e-06, + "loss": 0.0006, + "step": 233670 + }, + { + "epoch": 1.4987509339935157, + "grad_norm": 0.14163510501384735, + "learning_rate": 1.7943969674883666e-06, + "loss": 0.0007, + "step": 233680 + }, + { + "epoch": 1.4988150708873018, + "grad_norm": 0.032575275748968124, + "learning_rate": 1.7939674487375442e-06, + "loss": 0.0008, + "step": 233690 + }, + { + "epoch": 1.498879207781088, + "grad_norm": 0.012831765227019787, + "learning_rate": 1.793537970160601e-06, + "loss": 0.0017, + "step": 233700 + }, + { + "epoch": 1.4989433446748741, + "grad_norm": 0.20057491958141327, + "learning_rate": 1.7931085317629187e-06, + "loss": 0.002, + "step": 233710 + }, + { + "epoch": 1.4990074815686603, + "grad_norm": 0.1405409276485443, + "learning_rate": 1.7926791335498767e-06, + "loss": 0.0021, + "step": 233720 + }, + { + "epoch": 1.4990716184624462, + "grad_norm": 0.028645657002925873, + "learning_rate": 1.7922497755268592e-06, + "loss": 0.001, + "step": 233730 + }, + { + "epoch": 1.4991357553562323, + "grad_norm": 0.02900051139295101, + "learning_rate": 1.7918204576992443e-06, + "loss": 0.001, + "step": 233740 + }, + { + "epoch": 1.4991998922500185, + "grad_norm": 0.013951257802546024, + "learning_rate": 1.7913911800724104e-06, + "loss": 0.0007, + "step": 233750 + }, + { + "epoch": 1.4992640291438044, + "grad_norm": 0.044111721217632294, + "learning_rate": 1.7909619426517399e-06, + "loss": 0.0007, + "step": 233760 + }, + { + "epoch": 1.4993281660375906, + "grad_norm": 0.02437959797680378, + "learning_rate": 1.7905327454426096e-06, + "loss": 0.0008, + "step": 233770 + }, + { + "epoch": 1.4993923029313767, + "grad_norm": 0.15801259875297546, + "learning_rate": 1.7901035884503976e-06, + "loss": 0.0015, + "step": 233780 + }, + { + "epoch": 1.4994564398251629, + "grad_norm": 0.12411099672317505, + "learning_rate": 1.7896744716804805e-06, + "loss": 0.0008, + "step": 233790 + }, + { + "epoch": 1.499520576718949, + "grad_norm": 0.0033227538224309683, + "learning_rate": 1.7892453951382383e-06, + "loss": 0.0006, + "step": 233800 + }, + { + "epoch": 1.4995847136127352, + "grad_norm": 0.046695925295352936, + "learning_rate": 1.7888163588290453e-06, + "loss": 0.0015, + "step": 233810 + }, + { + "epoch": 1.499648850506521, + "grad_norm": 0.12680582702159882, + "learning_rate": 1.7883873627582794e-06, + "loss": 0.0016, + "step": 233820 + }, + { + "epoch": 1.4997129874003072, + "grad_norm": 0.2100486159324646, + "learning_rate": 1.7879584069313127e-06, + "loss": 0.0031, + "step": 233830 + }, + { + "epoch": 1.4997771242940934, + "grad_norm": 0.06719649583101273, + "learning_rate": 1.7875294913535246e-06, + "loss": 0.0013, + "step": 233840 + }, + { + "epoch": 1.4998412611878793, + "grad_norm": 0.016559133306145668, + "learning_rate": 1.787100616030288e-06, + "loss": 0.0009, + "step": 233850 + }, + { + "epoch": 1.4999053980816655, + "grad_norm": 0.0677509531378746, + "learning_rate": 1.7866717809669765e-06, + "loss": 0.0021, + "step": 233860 + }, + { + "epoch": 1.4999695349754516, + "grad_norm": 0.17736130952835083, + "learning_rate": 1.7862429861689628e-06, + "loss": 0.0012, + "step": 233870 + }, + { + "epoch": 1.5000336718692378, + "grad_norm": 0.06214691326022148, + "learning_rate": 1.7858142316416232e-06, + "loss": 0.001, + "step": 233880 + }, + { + "epoch": 1.500097808763024, + "grad_norm": 0.08927460014820099, + "learning_rate": 1.7853855173903284e-06, + "loss": 0.0014, + "step": 233890 + }, + { + "epoch": 1.50016194565681, + "grad_norm": 0.075447678565979, + "learning_rate": 1.7849568434204484e-06, + "loss": 0.0011, + "step": 233900 + }, + { + "epoch": 1.500226082550596, + "grad_norm": 0.007112096529453993, + "learning_rate": 1.7845282097373589e-06, + "loss": 0.001, + "step": 233910 + }, + { + "epoch": 1.5002902194443821, + "grad_norm": 0.028644679114222527, + "learning_rate": 1.7840996163464285e-06, + "loss": 0.0013, + "step": 233920 + }, + { + "epoch": 1.500354356338168, + "grad_norm": 0.03432578966021538, + "learning_rate": 1.783671063253029e-06, + "loss": 0.0016, + "step": 233930 + }, + { + "epoch": 1.5004184932319542, + "grad_norm": 0.03648257628083229, + "learning_rate": 1.7832425504625278e-06, + "loss": 0.0012, + "step": 233940 + }, + { + "epoch": 1.5004826301257403, + "grad_norm": 0.05791352316737175, + "learning_rate": 1.7828140779802984e-06, + "loss": 0.001, + "step": 233950 + }, + { + "epoch": 1.5005467670195265, + "grad_norm": 0.13585078716278076, + "learning_rate": 1.7823856458117072e-06, + "loss": 0.001, + "step": 233960 + }, + { + "epoch": 1.5006109039133126, + "grad_norm": 0.007192340213805437, + "learning_rate": 1.7819572539621239e-06, + "loss": 0.0008, + "step": 233970 + }, + { + "epoch": 1.5006750408070988, + "grad_norm": 0.18648450076580048, + "learning_rate": 1.7815289024369142e-06, + "loss": 0.0013, + "step": 233980 + }, + { + "epoch": 1.5007391777008847, + "grad_norm": 0.0246658306568861, + "learning_rate": 1.7811005912414498e-06, + "loss": 0.0007, + "step": 233990 + }, + { + "epoch": 1.5008033145946709, + "grad_norm": 0.09747783094644547, + "learning_rate": 1.7806723203810949e-06, + "loss": 0.0015, + "step": 234000 + }, + { + "epoch": 1.500867451488457, + "grad_norm": 0.12793777883052826, + "learning_rate": 1.7802440898612166e-06, + "loss": 0.0022, + "step": 234010 + }, + { + "epoch": 1.500931588382243, + "grad_norm": 0.0568283386528492, + "learning_rate": 1.7798158996871795e-06, + "loss": 0.0022, + "step": 234020 + }, + { + "epoch": 1.500995725276029, + "grad_norm": 0.056288767606019974, + "learning_rate": 1.7793877498643524e-06, + "loss": 0.0028, + "step": 234030 + }, + { + "epoch": 1.5010598621698152, + "grad_norm": 0.27575936913490295, + "learning_rate": 1.7789596403980985e-06, + "loss": 0.0013, + "step": 234040 + }, + { + "epoch": 1.5011239990636014, + "grad_norm": 0.01003996655344963, + "learning_rate": 1.7785315712937805e-06, + "loss": 0.0025, + "step": 234050 + }, + { + "epoch": 1.5011881359573875, + "grad_norm": 0.07622134685516357, + "learning_rate": 1.778103542556766e-06, + "loss": 0.0015, + "step": 234060 + }, + { + "epoch": 1.5012522728511737, + "grad_norm": 0.004882059525698423, + "learning_rate": 1.7776755541924169e-06, + "loss": 0.0006, + "step": 234070 + }, + { + "epoch": 1.5013164097449596, + "grad_norm": 0.1184699684381485, + "learning_rate": 1.7772476062060963e-06, + "loss": 0.0022, + "step": 234080 + }, + { + "epoch": 1.5013805466387458, + "grad_norm": 0.18747399747371674, + "learning_rate": 1.7768196986031643e-06, + "loss": 0.0018, + "step": 234090 + }, + { + "epoch": 1.5014446835325317, + "grad_norm": 0.0672893077135086, + "learning_rate": 1.7763918313889867e-06, + "loss": 0.0014, + "step": 234100 + }, + { + "epoch": 1.5015088204263178, + "grad_norm": 0.04679718241095543, + "learning_rate": 1.7759640045689235e-06, + "loss": 0.0014, + "step": 234110 + }, + { + "epoch": 1.501572957320104, + "grad_norm": 0.051002953201532364, + "learning_rate": 1.775536218148336e-06, + "loss": 0.001, + "step": 234120 + }, + { + "epoch": 1.5016370942138901, + "grad_norm": 0.0018901376752182841, + "learning_rate": 1.7751084721325818e-06, + "loss": 0.001, + "step": 234130 + }, + { + "epoch": 1.5017012311076763, + "grad_norm": 0.04492555931210518, + "learning_rate": 1.774680766527025e-06, + "loss": 0.0013, + "step": 234140 + }, + { + "epoch": 1.5017653680014624, + "grad_norm": 0.00817918125540018, + "learning_rate": 1.7742531013370235e-06, + "loss": 0.002, + "step": 234150 + }, + { + "epoch": 1.5018295048952486, + "grad_norm": 0.0407526008784771, + "learning_rate": 1.7738254765679364e-06, + "loss": 0.0026, + "step": 234160 + }, + { + "epoch": 1.5018936417890345, + "grad_norm": 0.020076848566532135, + "learning_rate": 1.7733978922251193e-06, + "loss": 0.0015, + "step": 234170 + }, + { + "epoch": 1.5019577786828207, + "grad_norm": 0.17962588369846344, + "learning_rate": 1.7729703483139348e-06, + "loss": 0.0013, + "step": 234180 + }, + { + "epoch": 1.5020219155766066, + "grad_norm": 0.06566172093153, + "learning_rate": 1.772542844839738e-06, + "loss": 0.0015, + "step": 234190 + }, + { + "epoch": 1.5020860524703927, + "grad_norm": 0.02670532651245594, + "learning_rate": 1.7721153818078845e-06, + "loss": 0.0006, + "step": 234200 + }, + { + "epoch": 1.5021501893641789, + "grad_norm": 0.13443441689014435, + "learning_rate": 1.771687959223734e-06, + "loss": 0.0012, + "step": 234210 + }, + { + "epoch": 1.502214326257965, + "grad_norm": 0.12669919431209564, + "learning_rate": 1.77126057709264e-06, + "loss": 0.0008, + "step": 234220 + }, + { + "epoch": 1.5022784631517512, + "grad_norm": 0.020429085940122604, + "learning_rate": 1.7708332354199592e-06, + "loss": 0.0006, + "step": 234230 + }, + { + "epoch": 1.5023426000455373, + "grad_norm": 0.2921810448169708, + "learning_rate": 1.7704059342110435e-06, + "loss": 0.0013, + "step": 234240 + }, + { + "epoch": 1.5024067369393233, + "grad_norm": 0.33811965584754944, + "learning_rate": 1.7699786734712515e-06, + "loss": 0.0034, + "step": 234250 + }, + { + "epoch": 1.5024708738331094, + "grad_norm": 0.1620689481496811, + "learning_rate": 1.7695514532059355e-06, + "loss": 0.0011, + "step": 234260 + }, + { + "epoch": 1.5025350107268953, + "grad_norm": 0.05258363485336304, + "learning_rate": 1.7691242734204483e-06, + "loss": 0.0019, + "step": 234270 + }, + { + "epoch": 1.5025991476206815, + "grad_norm": 0.0416279137134552, + "learning_rate": 1.7686971341201413e-06, + "loss": 0.0017, + "step": 234280 + }, + { + "epoch": 1.5026632845144676, + "grad_norm": 0.10482791811227798, + "learning_rate": 1.7682700353103704e-06, + "loss": 0.0013, + "step": 234290 + }, + { + "epoch": 1.5027274214082538, + "grad_norm": 0.08230151236057281, + "learning_rate": 1.7678429769964856e-06, + "loss": 0.0011, + "step": 234300 + }, + { + "epoch": 1.50279155830204, + "grad_norm": 0.2138132005929947, + "learning_rate": 1.7674159591838385e-06, + "loss": 0.0012, + "step": 234310 + }, + { + "epoch": 1.502855695195826, + "grad_norm": 0.3787125051021576, + "learning_rate": 1.7669889818777774e-06, + "loss": 0.0009, + "step": 234320 + }, + { + "epoch": 1.5029198320896122, + "grad_norm": 0.08036356419324875, + "learning_rate": 1.7665620450836574e-06, + "loss": 0.0008, + "step": 234330 + }, + { + "epoch": 1.5029839689833981, + "grad_norm": 0.02135232463479042, + "learning_rate": 1.7661351488068257e-06, + "loss": 0.0005, + "step": 234340 + }, + { + "epoch": 1.5030481058771843, + "grad_norm": 0.055322859436273575, + "learning_rate": 1.7657082930526298e-06, + "loss": 0.0014, + "step": 234350 + }, + { + "epoch": 1.5031122427709702, + "grad_norm": 0.05045409873127937, + "learning_rate": 1.7652814778264222e-06, + "loss": 0.0014, + "step": 234360 + }, + { + "epoch": 1.5031763796647564, + "grad_norm": 0.03834830969572067, + "learning_rate": 1.7648547031335495e-06, + "loss": 0.002, + "step": 234370 + }, + { + "epoch": 1.5032405165585425, + "grad_norm": 0.006903606466948986, + "learning_rate": 1.7644279689793598e-06, + "loss": 0.0013, + "step": 234380 + }, + { + "epoch": 1.5033046534523287, + "grad_norm": 0.07117222249507904, + "learning_rate": 1.7640012753691982e-06, + "loss": 0.0008, + "step": 234390 + }, + { + "epoch": 1.5033687903461148, + "grad_norm": 0.012395719066262245, + "learning_rate": 1.7635746223084132e-06, + "loss": 0.0016, + "step": 234400 + }, + { + "epoch": 1.503432927239901, + "grad_norm": 0.07572619616985321, + "learning_rate": 1.7631480098023535e-06, + "loss": 0.0015, + "step": 234410 + }, + { + "epoch": 1.503497064133687, + "grad_norm": 0.0958796814084053, + "learning_rate": 1.7627214378563618e-06, + "loss": 0.0008, + "step": 234420 + }, + { + "epoch": 1.503561201027473, + "grad_norm": 0.02184971049427986, + "learning_rate": 1.7622949064757843e-06, + "loss": 0.0007, + "step": 234430 + }, + { + "epoch": 1.5036253379212592, + "grad_norm": 0.179481640458107, + "learning_rate": 1.7618684156659643e-06, + "loss": 0.0013, + "step": 234440 + }, + { + "epoch": 1.5036894748150451, + "grad_norm": 0.0073411292396485806, + "learning_rate": 1.761441965432249e-06, + "loss": 0.0004, + "step": 234450 + }, + { + "epoch": 1.5037536117088313, + "grad_norm": 0.16384795308113098, + "learning_rate": 1.7610155557799803e-06, + "loss": 0.0022, + "step": 234460 + }, + { + "epoch": 1.5038177486026174, + "grad_norm": 0.057359084486961365, + "learning_rate": 1.7605891867144997e-06, + "loss": 0.0031, + "step": 234470 + }, + { + "epoch": 1.5038818854964036, + "grad_norm": 0.04206176847219467, + "learning_rate": 1.7601628582411535e-06, + "loss": 0.001, + "step": 234480 + }, + { + "epoch": 1.5039460223901897, + "grad_norm": 0.00203709676861763, + "learning_rate": 1.7597365703652819e-06, + "loss": 0.0012, + "step": 234490 + }, + { + "epoch": 1.5040101592839759, + "grad_norm": 0.059611234813928604, + "learning_rate": 1.7593103230922276e-06, + "loss": 0.0015, + "step": 234500 + }, + { + "epoch": 1.5040742961777618, + "grad_norm": 0.04611333832144737, + "learning_rate": 1.7588841164273285e-06, + "loss": 0.0007, + "step": 234510 + }, + { + "epoch": 1.504138433071548, + "grad_norm": 0.02621953934431076, + "learning_rate": 1.7584579503759303e-06, + "loss": 0.001, + "step": 234520 + }, + { + "epoch": 1.5042025699653339, + "grad_norm": 0.05761010944843292, + "learning_rate": 1.7580318249433698e-06, + "loss": 0.0009, + "step": 234530 + }, + { + "epoch": 1.50426670685912, + "grad_norm": 0.03204973414540291, + "learning_rate": 1.7576057401349882e-06, + "loss": 0.0012, + "step": 234540 + }, + { + "epoch": 1.5043308437529062, + "grad_norm": 0.02957121655344963, + "learning_rate": 1.7571796959561221e-06, + "loss": 0.0009, + "step": 234550 + }, + { + "epoch": 1.5043949806466923, + "grad_norm": 0.1492205262184143, + "learning_rate": 1.7567536924121137e-06, + "loss": 0.0019, + "step": 234560 + }, + { + "epoch": 1.5044591175404785, + "grad_norm": 0.049612388014793396, + "learning_rate": 1.7563277295082993e-06, + "loss": 0.0008, + "step": 234570 + }, + { + "epoch": 1.5045232544342646, + "grad_norm": 0.267138808965683, + "learning_rate": 1.7559018072500173e-06, + "loss": 0.002, + "step": 234580 + }, + { + "epoch": 1.5045873913280507, + "grad_norm": 0.060233224183321, + "learning_rate": 1.7554759256426017e-06, + "loss": 0.0032, + "step": 234590 + }, + { + "epoch": 1.5046515282218367, + "grad_norm": 0.017759747803211212, + "learning_rate": 1.7550500846913937e-06, + "loss": 0.0023, + "step": 234600 + }, + { + "epoch": 1.5047156651156228, + "grad_norm": 0.040159065276384354, + "learning_rate": 1.7546242844017269e-06, + "loss": 0.0004, + "step": 234610 + }, + { + "epoch": 1.5047798020094088, + "grad_norm": 0.14336469769477844, + "learning_rate": 1.7541985247789362e-06, + "loss": 0.0012, + "step": 234620 + }, + { + "epoch": 1.504843938903195, + "grad_norm": 0.023215042427182198, + "learning_rate": 1.7537728058283588e-06, + "loss": 0.0023, + "step": 234630 + }, + { + "epoch": 1.504908075796981, + "grad_norm": 0.008424768224358559, + "learning_rate": 1.7533471275553281e-06, + "loss": 0.0012, + "step": 234640 + }, + { + "epoch": 1.5049722126907672, + "grad_norm": 0.008528662845492363, + "learning_rate": 1.7529214899651787e-06, + "loss": 0.0005, + "step": 234650 + }, + { + "epoch": 1.5050363495845533, + "grad_norm": 0.027422424405813217, + "learning_rate": 1.7524958930632418e-06, + "loss": 0.0012, + "step": 234660 + }, + { + "epoch": 1.5051004864783395, + "grad_norm": 0.037562474608421326, + "learning_rate": 1.7520703368548541e-06, + "loss": 0.0007, + "step": 234670 + }, + { + "epoch": 1.5051646233721254, + "grad_norm": 0.08821986615657806, + "learning_rate": 1.7516448213453463e-06, + "loss": 0.0013, + "step": 234680 + }, + { + "epoch": 1.5052287602659116, + "grad_norm": 0.044554661959409714, + "learning_rate": 1.7512193465400506e-06, + "loss": 0.0016, + "step": 234690 + }, + { + "epoch": 1.5052928971596975, + "grad_norm": 0.031345486640930176, + "learning_rate": 1.7507939124442963e-06, + "loss": 0.003, + "step": 234700 + }, + { + "epoch": 1.5053570340534836, + "grad_norm": 0.07827392965555191, + "learning_rate": 1.7503685190634185e-06, + "loss": 0.0018, + "step": 234710 + }, + { + "epoch": 1.5054211709472698, + "grad_norm": 0.013680531643331051, + "learning_rate": 1.7499431664027456e-06, + "loss": 0.0012, + "step": 234720 + }, + { + "epoch": 1.505485307841056, + "grad_norm": 0.05149161443114281, + "learning_rate": 1.7495178544676072e-06, + "loss": 0.0011, + "step": 234730 + }, + { + "epoch": 1.505549444734842, + "grad_norm": 0.1659402996301651, + "learning_rate": 1.7490925832633315e-06, + "loss": 0.0013, + "step": 234740 + }, + { + "epoch": 1.5056135816286282, + "grad_norm": 0.06993673741817474, + "learning_rate": 1.7486673527952513e-06, + "loss": 0.0019, + "step": 234750 + }, + { + "epoch": 1.5056777185224144, + "grad_norm": 0.10557802021503448, + "learning_rate": 1.7482421630686925e-06, + "loss": 0.0011, + "step": 234760 + }, + { + "epoch": 1.5057418554162003, + "grad_norm": 0.005402317736297846, + "learning_rate": 1.7478170140889822e-06, + "loss": 0.0006, + "step": 234770 + }, + { + "epoch": 1.5058059923099865, + "grad_norm": 0.07126693427562714, + "learning_rate": 1.7473919058614502e-06, + "loss": 0.0008, + "step": 234780 + }, + { + "epoch": 1.5058701292037724, + "grad_norm": 0.05289481580257416, + "learning_rate": 1.7469668383914223e-06, + "loss": 0.0008, + "step": 234790 + }, + { + "epoch": 1.5059342660975585, + "grad_norm": 0.09493552893400192, + "learning_rate": 1.7465418116842248e-06, + "loss": 0.0013, + "step": 234800 + }, + { + "epoch": 1.5059984029913447, + "grad_norm": 0.10266318917274475, + "learning_rate": 1.7461168257451822e-06, + "loss": 0.0008, + "step": 234810 + }, + { + "epoch": 1.5060625398851308, + "grad_norm": 0.04841642081737518, + "learning_rate": 1.7456918805796224e-06, + "loss": 0.0014, + "step": 234820 + }, + { + "epoch": 1.506126676778917, + "grad_norm": 0.15532329678535461, + "learning_rate": 1.7452669761928697e-06, + "loss": 0.002, + "step": 234830 + }, + { + "epoch": 1.5061908136727031, + "grad_norm": 0.10063052922487259, + "learning_rate": 1.7448421125902476e-06, + "loss": 0.0008, + "step": 234840 + }, + { + "epoch": 1.506254950566489, + "grad_norm": 0.05639910325407982, + "learning_rate": 1.744417289777079e-06, + "loss": 0.0022, + "step": 234850 + }, + { + "epoch": 1.5063190874602752, + "grad_norm": 0.04638287425041199, + "learning_rate": 1.74399250775869e-06, + "loss": 0.0009, + "step": 234860 + }, + { + "epoch": 1.5063832243540614, + "grad_norm": 0.056501440703868866, + "learning_rate": 1.7435677665404016e-06, + "loss": 0.001, + "step": 234870 + }, + { + "epoch": 1.5064473612478473, + "grad_norm": 0.056153517216444016, + "learning_rate": 1.7431430661275367e-06, + "loss": 0.0015, + "step": 234880 + }, + { + "epoch": 1.5065114981416334, + "grad_norm": 0.0634610652923584, + "learning_rate": 1.7427184065254149e-06, + "loss": 0.0008, + "step": 234890 + }, + { + "epoch": 1.5065756350354196, + "grad_norm": 0.11776232719421387, + "learning_rate": 1.7422937877393613e-06, + "loss": 0.0023, + "step": 234900 + }, + { + "epoch": 1.5066397719292057, + "grad_norm": 0.0619511753320694, + "learning_rate": 1.7418692097746949e-06, + "loss": 0.0012, + "step": 234910 + }, + { + "epoch": 1.5067039088229919, + "grad_norm": 0.10542446374893188, + "learning_rate": 1.741444672636734e-06, + "loss": 0.0015, + "step": 234920 + }, + { + "epoch": 1.506768045716778, + "grad_norm": 0.023263636976480484, + "learning_rate": 1.7410201763308022e-06, + "loss": 0.0012, + "step": 234930 + }, + { + "epoch": 1.506832182610564, + "grad_norm": 0.028178084641695023, + "learning_rate": 1.7405957208622158e-06, + "loss": 0.001, + "step": 234940 + }, + { + "epoch": 1.50689631950435, + "grad_norm": 0.08219774812459946, + "learning_rate": 1.7401713062362957e-06, + "loss": 0.0011, + "step": 234950 + }, + { + "epoch": 1.506960456398136, + "grad_norm": 0.16498440504074097, + "learning_rate": 1.7397469324583567e-06, + "loss": 0.0009, + "step": 234960 + }, + { + "epoch": 1.5070245932919222, + "grad_norm": 0.013587296940386295, + "learning_rate": 1.7393225995337198e-06, + "loss": 0.0016, + "step": 234970 + }, + { + "epoch": 1.5070887301857083, + "grad_norm": 0.051530029624700546, + "learning_rate": 1.7388983074677018e-06, + "loss": 0.0006, + "step": 234980 + }, + { + "epoch": 1.5071528670794945, + "grad_norm": 0.04435497522354126, + "learning_rate": 1.7384740562656188e-06, + "loss": 0.0007, + "step": 234990 + }, + { + "epoch": 1.5072170039732806, + "grad_norm": 0.08541511744260788, + "learning_rate": 1.7380498459327844e-06, + "loss": 0.0017, + "step": 235000 + }, + { + "epoch": 1.5072811408670668, + "grad_norm": 0.0650327205657959, + "learning_rate": 1.7376256764745191e-06, + "loss": 0.0013, + "step": 235010 + }, + { + "epoch": 1.507345277760853, + "grad_norm": 0.06552031636238098, + "learning_rate": 1.7372015478961357e-06, + "loss": 0.0011, + "step": 235020 + }, + { + "epoch": 1.5074094146546388, + "grad_norm": 0.051747698336839676, + "learning_rate": 1.736777460202948e-06, + "loss": 0.0015, + "step": 235030 + }, + { + "epoch": 1.507473551548425, + "grad_norm": 0.06316728889942169, + "learning_rate": 1.7363534134002702e-06, + "loss": 0.0021, + "step": 235040 + }, + { + "epoch": 1.507537688442211, + "grad_norm": 0.05629882588982582, + "learning_rate": 1.7359294074934175e-06, + "loss": 0.0013, + "step": 235050 + }, + { + "epoch": 1.507601825335997, + "grad_norm": 0.044074613600969315, + "learning_rate": 1.7355054424877027e-06, + "loss": 0.0008, + "step": 235060 + }, + { + "epoch": 1.5076659622297832, + "grad_norm": 0.015521244145929813, + "learning_rate": 1.7350815183884355e-06, + "loss": 0.0013, + "step": 235070 + }, + { + "epoch": 1.5077300991235694, + "grad_norm": 0.030993226915597916, + "learning_rate": 1.7346576352009326e-06, + "loss": 0.0008, + "step": 235080 + }, + { + "epoch": 1.5077942360173555, + "grad_norm": 0.02457059919834137, + "learning_rate": 1.734233792930503e-06, + "loss": 0.0007, + "step": 235090 + }, + { + "epoch": 1.5078583729111417, + "grad_norm": 0.14740078151226044, + "learning_rate": 1.733809991582458e-06, + "loss": 0.0007, + "step": 235100 + }, + { + "epoch": 1.5079225098049276, + "grad_norm": 0.29353275895118713, + "learning_rate": 1.7333862311621064e-06, + "loss": 0.0016, + "step": 235110 + }, + { + "epoch": 1.5079866466987137, + "grad_norm": 0.030130386352539062, + "learning_rate": 1.7329625116747617e-06, + "loss": 0.0007, + "step": 235120 + }, + { + "epoch": 1.5080507835924997, + "grad_norm": 0.06130951642990112, + "learning_rate": 1.7325388331257315e-06, + "loss": 0.0015, + "step": 235130 + }, + { + "epoch": 1.5081149204862858, + "grad_norm": 0.08484799414873123, + "learning_rate": 1.7321151955203252e-06, + "loss": 0.001, + "step": 235140 + }, + { + "epoch": 1.508179057380072, + "grad_norm": 0.09276269376277924, + "learning_rate": 1.7316915988638488e-06, + "loss": 0.0014, + "step": 235150 + }, + { + "epoch": 1.5082431942738581, + "grad_norm": 0.06885452568531036, + "learning_rate": 1.731268043161613e-06, + "loss": 0.0009, + "step": 235160 + }, + { + "epoch": 1.5083073311676443, + "grad_norm": 0.08210889250040054, + "learning_rate": 1.7308445284189262e-06, + "loss": 0.0011, + "step": 235170 + }, + { + "epoch": 1.5083714680614304, + "grad_norm": 0.04074737802147865, + "learning_rate": 1.7304210546410944e-06, + "loss": 0.0013, + "step": 235180 + }, + { + "epoch": 1.5084356049552166, + "grad_norm": 0.006532349623739719, + "learning_rate": 1.7299976218334215e-06, + "loss": 0.0022, + "step": 235190 + }, + { + "epoch": 1.5084997418490025, + "grad_norm": 0.06377600133419037, + "learning_rate": 1.729574230001217e-06, + "loss": 0.0008, + "step": 235200 + }, + { + "epoch": 1.5085638787427886, + "grad_norm": 0.35375145077705383, + "learning_rate": 1.7291508791497845e-06, + "loss": 0.0016, + "step": 235210 + }, + { + "epoch": 1.5086280156365746, + "grad_norm": 0.04930626600980759, + "learning_rate": 1.728727569284429e-06, + "loss": 0.0008, + "step": 235220 + }, + { + "epoch": 1.5086921525303607, + "grad_norm": 0.1520637422800064, + "learning_rate": 1.728304300410454e-06, + "loss": 0.0027, + "step": 235230 + }, + { + "epoch": 1.5087562894241469, + "grad_norm": 0.1196468248963356, + "learning_rate": 1.7278810725331657e-06, + "loss": 0.0013, + "step": 235240 + }, + { + "epoch": 1.508820426317933, + "grad_norm": 0.1320398598909378, + "learning_rate": 1.7274578856578661e-06, + "loss": 0.0016, + "step": 235250 + }, + { + "epoch": 1.5088845632117192, + "grad_norm": 0.04028046503663063, + "learning_rate": 1.7270347397898585e-06, + "loss": 0.0007, + "step": 235260 + }, + { + "epoch": 1.5089487001055053, + "grad_norm": 0.09058906137943268, + "learning_rate": 1.7266116349344425e-06, + "loss": 0.0015, + "step": 235270 + }, + { + "epoch": 1.5090128369992915, + "grad_norm": 0.002862320514395833, + "learning_rate": 1.726188571096924e-06, + "loss": 0.0016, + "step": 235280 + }, + { + "epoch": 1.5090769738930774, + "grad_norm": 0.03169231116771698, + "learning_rate": 1.7257655482826024e-06, + "loss": 0.0015, + "step": 235290 + }, + { + "epoch": 1.5091411107868635, + "grad_norm": 0.0070249466225504875, + "learning_rate": 1.7253425664967787e-06, + "loss": 0.0015, + "step": 235300 + }, + { + "epoch": 1.5092052476806495, + "grad_norm": 0.050528980791568756, + "learning_rate": 1.7249196257447515e-06, + "loss": 0.0017, + "step": 235310 + }, + { + "epoch": 1.5092693845744356, + "grad_norm": 0.16391852498054504, + "learning_rate": 1.724496726031823e-06, + "loss": 0.0014, + "step": 235320 + }, + { + "epoch": 1.5093335214682218, + "grad_norm": 0.10123415291309357, + "learning_rate": 1.7240738673632923e-06, + "loss": 0.0023, + "step": 235330 + }, + { + "epoch": 1.509397658362008, + "grad_norm": 0.0017691326793283224, + "learning_rate": 1.7236510497444552e-06, + "loss": 0.0009, + "step": 235340 + }, + { + "epoch": 1.509461795255794, + "grad_norm": 0.015997136011719704, + "learning_rate": 1.7232282731806137e-06, + "loss": 0.0022, + "step": 235350 + }, + { + "epoch": 1.5095259321495802, + "grad_norm": 0.04225035384297371, + "learning_rate": 1.722805537677064e-06, + "loss": 0.0008, + "step": 235360 + }, + { + "epoch": 1.5095900690433661, + "grad_norm": 0.012225965037941933, + "learning_rate": 1.7223828432391031e-06, + "loss": 0.0008, + "step": 235370 + }, + { + "epoch": 1.5096542059371523, + "grad_norm": 0.06684717535972595, + "learning_rate": 1.7219601898720256e-06, + "loss": 0.0014, + "step": 235380 + }, + { + "epoch": 1.5097183428309382, + "grad_norm": 0.21332567930221558, + "learning_rate": 1.7215375775811315e-06, + "loss": 0.0014, + "step": 235390 + }, + { + "epoch": 1.5097824797247243, + "grad_norm": 0.058626167476177216, + "learning_rate": 1.7211150063717153e-06, + "loss": 0.0011, + "step": 235400 + }, + { + "epoch": 1.5098466166185105, + "grad_norm": 0.15646931529045105, + "learning_rate": 1.7206924762490706e-06, + "loss": 0.0011, + "step": 235410 + }, + { + "epoch": 1.5099107535122966, + "grad_norm": 0.013812965713441372, + "learning_rate": 1.7202699872184919e-06, + "loss": 0.0005, + "step": 235420 + }, + { + "epoch": 1.5099748904060828, + "grad_norm": 0.11039616912603378, + "learning_rate": 1.7198475392852754e-06, + "loss": 0.001, + "step": 235430 + }, + { + "epoch": 1.510039027299869, + "grad_norm": 0.010939417406916618, + "learning_rate": 1.7194251324547134e-06, + "loss": 0.0006, + "step": 235440 + }, + { + "epoch": 1.510103164193655, + "grad_norm": 0.10745838284492493, + "learning_rate": 1.719002766732098e-06, + "loss": 0.0015, + "step": 235450 + }, + { + "epoch": 1.510167301087441, + "grad_norm": 0.10931447893381119, + "learning_rate": 1.7185804421227242e-06, + "loss": 0.0013, + "step": 235460 + }, + { + "epoch": 1.5102314379812272, + "grad_norm": 0.017885947600007057, + "learning_rate": 1.7181581586318823e-06, + "loss": 0.0012, + "step": 235470 + }, + { + "epoch": 1.510295574875013, + "grad_norm": 0.0009909237269312143, + "learning_rate": 1.717735916264865e-06, + "loss": 0.0009, + "step": 235480 + }, + { + "epoch": 1.5103597117687992, + "grad_norm": 0.06282038241624832, + "learning_rate": 1.7173137150269597e-06, + "loss": 0.0009, + "step": 235490 + }, + { + "epoch": 1.5104238486625854, + "grad_norm": 0.08964784443378448, + "learning_rate": 1.716891554923462e-06, + "loss": 0.001, + "step": 235500 + }, + { + "epoch": 1.5104879855563715, + "grad_norm": 0.034846872091293335, + "learning_rate": 1.7164694359596595e-06, + "loss": 0.0011, + "step": 235510 + }, + { + "epoch": 1.5105521224501577, + "grad_norm": 0.06375276297330856, + "learning_rate": 1.7160473581408415e-06, + "loss": 0.0028, + "step": 235520 + }, + { + "epoch": 1.5106162593439438, + "grad_norm": 0.11003471910953522, + "learning_rate": 1.7156253214722956e-06, + "loss": 0.0005, + "step": 235530 + }, + { + "epoch": 1.5106803962377298, + "grad_norm": 0.23813316226005554, + "learning_rate": 1.7152033259593137e-06, + "loss": 0.0025, + "step": 235540 + }, + { + "epoch": 1.510744533131516, + "grad_norm": 0.009359259158372879, + "learning_rate": 1.7147813716071815e-06, + "loss": 0.0012, + "step": 235550 + }, + { + "epoch": 1.510808670025302, + "grad_norm": 0.08566562831401825, + "learning_rate": 1.7143594584211864e-06, + "loss": 0.0016, + "step": 235560 + }, + { + "epoch": 1.510872806919088, + "grad_norm": 0.0824081152677536, + "learning_rate": 1.7139375864066137e-06, + "loss": 0.0012, + "step": 235570 + }, + { + "epoch": 1.5109369438128741, + "grad_norm": 0.026241037994623184, + "learning_rate": 1.7135157555687537e-06, + "loss": 0.0013, + "step": 235580 + }, + { + "epoch": 1.5110010807066603, + "grad_norm": 0.3644322454929352, + "learning_rate": 1.7130939659128897e-06, + "loss": 0.0026, + "step": 235590 + }, + { + "epoch": 1.5110652176004464, + "grad_norm": 0.015625417232513428, + "learning_rate": 1.712672217444306e-06, + "loss": 0.0014, + "step": 235600 + }, + { + "epoch": 1.5111293544942326, + "grad_norm": 0.057176608592271805, + "learning_rate": 1.7122505101682901e-06, + "loss": 0.0012, + "step": 235610 + }, + { + "epoch": 1.5111934913880187, + "grad_norm": 0.006172089837491512, + "learning_rate": 1.7118288440901255e-06, + "loss": 0.0016, + "step": 235620 + }, + { + "epoch": 1.5112576282818047, + "grad_norm": 0.023188453167676926, + "learning_rate": 1.7114072192150954e-06, + "loss": 0.0025, + "step": 235630 + }, + { + "epoch": 1.5113217651755908, + "grad_norm": 0.06347131729125977, + "learning_rate": 1.7109856355484817e-06, + "loss": 0.001, + "step": 235640 + }, + { + "epoch": 1.5113859020693767, + "grad_norm": 0.04603705182671547, + "learning_rate": 1.7105640930955697e-06, + "loss": 0.0005, + "step": 235650 + }, + { + "epoch": 1.5114500389631629, + "grad_norm": 0.15269768238067627, + "learning_rate": 1.710142591861641e-06, + "loss": 0.0018, + "step": 235660 + }, + { + "epoch": 1.511514175856949, + "grad_norm": 0.06650374829769135, + "learning_rate": 1.7097211318519769e-06, + "loss": 0.0011, + "step": 235670 + }, + { + "epoch": 1.5115783127507352, + "grad_norm": 0.007529903668910265, + "learning_rate": 1.7092997130718564e-06, + "loss": 0.0009, + "step": 235680 + }, + { + "epoch": 1.5116424496445213, + "grad_norm": 0.0774153545498848, + "learning_rate": 1.7088783355265643e-06, + "loss": 0.003, + "step": 235690 + }, + { + "epoch": 1.5117065865383075, + "grad_norm": 0.06964616477489471, + "learning_rate": 1.708456999221379e-06, + "loss": 0.0008, + "step": 235700 + }, + { + "epoch": 1.5117707234320936, + "grad_norm": 0.15337079763412476, + "learning_rate": 1.7080357041615797e-06, + "loss": 0.0007, + "step": 235710 + }, + { + "epoch": 1.5118348603258795, + "grad_norm": 0.05863737314939499, + "learning_rate": 1.707614450352444e-06, + "loss": 0.0009, + "step": 235720 + }, + { + "epoch": 1.5118989972196657, + "grad_norm": 0.21024541556835175, + "learning_rate": 1.707193237799254e-06, + "loss": 0.001, + "step": 235730 + }, + { + "epoch": 1.5119631341134516, + "grad_norm": 0.12702852487564087, + "learning_rate": 1.7067720665072861e-06, + "loss": 0.002, + "step": 235740 + }, + { + "epoch": 1.5120272710072378, + "grad_norm": 0.07385962456464767, + "learning_rate": 1.7063509364818158e-06, + "loss": 0.001, + "step": 235750 + }, + { + "epoch": 1.512091407901024, + "grad_norm": 0.06825349479913712, + "learning_rate": 1.7059298477281243e-06, + "loss": 0.0011, + "step": 235760 + }, + { + "epoch": 1.51215554479481, + "grad_norm": 0.1421101689338684, + "learning_rate": 1.7055088002514864e-06, + "loss": 0.001, + "step": 235770 + }, + { + "epoch": 1.5122196816885962, + "grad_norm": 0.08823364228010178, + "learning_rate": 1.705087794057177e-06, + "loss": 0.0012, + "step": 235780 + }, + { + "epoch": 1.5122838185823824, + "grad_norm": 0.028474045917391777, + "learning_rate": 1.704666829150471e-06, + "loss": 0.0008, + "step": 235790 + }, + { + "epoch": 1.5123479554761683, + "grad_norm": 0.03342844173312187, + "learning_rate": 1.7042459055366467e-06, + "loss": 0.0009, + "step": 235800 + }, + { + "epoch": 1.5124120923699544, + "grad_norm": 0.061231207102537155, + "learning_rate": 1.7038250232209764e-06, + "loss": 0.001, + "step": 235810 + }, + { + "epoch": 1.5124762292637404, + "grad_norm": 0.04442818462848663, + "learning_rate": 1.7034041822087343e-06, + "loss": 0.0012, + "step": 235820 + }, + { + "epoch": 1.5125403661575265, + "grad_norm": 0.14335766434669495, + "learning_rate": 1.7029833825051922e-06, + "loss": 0.002, + "step": 235830 + }, + { + "epoch": 1.5126045030513127, + "grad_norm": 0.09852230548858643, + "learning_rate": 1.7025626241156263e-06, + "loss": 0.0012, + "step": 235840 + }, + { + "epoch": 1.5126686399450988, + "grad_norm": 0.11228171736001968, + "learning_rate": 1.7021419070453076e-06, + "loss": 0.0012, + "step": 235850 + }, + { + "epoch": 1.512732776838885, + "grad_norm": 0.11434180289506912, + "learning_rate": 1.7017212312995074e-06, + "loss": 0.0038, + "step": 235860 + }, + { + "epoch": 1.5127969137326711, + "grad_norm": 0.03543776646256447, + "learning_rate": 1.7013005968834962e-06, + "loss": 0.001, + "step": 235870 + }, + { + "epoch": 1.5128610506264573, + "grad_norm": 0.04616093263030052, + "learning_rate": 1.7008800038025469e-06, + "loss": 0.0034, + "step": 235880 + }, + { + "epoch": 1.5129251875202432, + "grad_norm": 0.06281872093677521, + "learning_rate": 1.70045945206193e-06, + "loss": 0.0012, + "step": 235890 + }, + { + "epoch": 1.5129893244140293, + "grad_norm": 0.14692969620227814, + "learning_rate": 1.7000389416669115e-06, + "loss": 0.0016, + "step": 235900 + }, + { + "epoch": 1.5130534613078153, + "grad_norm": 0.00830824300646782, + "learning_rate": 1.699618472622766e-06, + "loss": 0.0017, + "step": 235910 + }, + { + "epoch": 1.5131175982016014, + "grad_norm": 0.03200173005461693, + "learning_rate": 1.69919804493476e-06, + "loss": 0.0007, + "step": 235920 + }, + { + "epoch": 1.5131817350953876, + "grad_norm": 0.11528050899505615, + "learning_rate": 1.6987776586081595e-06, + "loss": 0.0009, + "step": 235930 + }, + { + "epoch": 1.5132458719891737, + "grad_norm": 0.06540056318044662, + "learning_rate": 1.6983573136482355e-06, + "loss": 0.0015, + "step": 235940 + }, + { + "epoch": 1.5133100088829599, + "grad_norm": 0.1667458862066269, + "learning_rate": 1.6979370100602533e-06, + "loss": 0.0022, + "step": 235950 + }, + { + "epoch": 1.513374145776746, + "grad_norm": 0.030666688457131386, + "learning_rate": 1.6975167478494814e-06, + "loss": 0.0015, + "step": 235960 + }, + { + "epoch": 1.513438282670532, + "grad_norm": 0.1034374013543129, + "learning_rate": 1.6970965270211847e-06, + "loss": 0.0026, + "step": 235970 + }, + { + "epoch": 1.513502419564318, + "grad_norm": 0.15055187046527863, + "learning_rate": 1.6966763475806297e-06, + "loss": 0.0016, + "step": 235980 + }, + { + "epoch": 1.5135665564581042, + "grad_norm": 0.00941514316946268, + "learning_rate": 1.6962562095330787e-06, + "loss": 0.0014, + "step": 235990 + }, + { + "epoch": 1.5136306933518902, + "grad_norm": 0.11294365674257278, + "learning_rate": 1.6958361128838002e-06, + "loss": 0.0018, + "step": 236000 + }, + { + "epoch": 1.5136948302456763, + "grad_norm": 0.02906079776585102, + "learning_rate": 1.6954160576380568e-06, + "loss": 0.0011, + "step": 236010 + }, + { + "epoch": 1.5137589671394625, + "grad_norm": 0.058754779398441315, + "learning_rate": 1.6949960438011104e-06, + "loss": 0.0007, + "step": 236020 + }, + { + "epoch": 1.5138231040332486, + "grad_norm": 0.10251867771148682, + "learning_rate": 1.6945760713782273e-06, + "loss": 0.0013, + "step": 236030 + }, + { + "epoch": 1.5138872409270347, + "grad_norm": 0.008062995970249176, + "learning_rate": 1.694156140374668e-06, + "loss": 0.0008, + "step": 236040 + }, + { + "epoch": 1.513951377820821, + "grad_norm": 0.12957392632961273, + "learning_rate": 1.6937362507956955e-06, + "loss": 0.0007, + "step": 236050 + }, + { + "epoch": 1.5140155147146068, + "grad_norm": 0.05608009546995163, + "learning_rate": 1.6933164026465688e-06, + "loss": 0.0008, + "step": 236060 + }, + { + "epoch": 1.514079651608393, + "grad_norm": 0.07690117508172989, + "learning_rate": 1.6928965959325527e-06, + "loss": 0.0012, + "step": 236070 + }, + { + "epoch": 1.514143788502179, + "grad_norm": 0.05481581389904022, + "learning_rate": 1.6924768306589056e-06, + "loss": 0.0008, + "step": 236080 + }, + { + "epoch": 1.514207925395965, + "grad_norm": 0.0975133627653122, + "learning_rate": 1.6920571068308871e-06, + "loss": 0.0013, + "step": 236090 + }, + { + "epoch": 1.5142720622897512, + "grad_norm": 0.029730889946222305, + "learning_rate": 1.6916374244537564e-06, + "loss": 0.0009, + "step": 236100 + }, + { + "epoch": 1.5143361991835373, + "grad_norm": 0.0672646313905716, + "learning_rate": 1.6912177835327749e-06, + "loss": 0.0009, + "step": 236110 + }, + { + "epoch": 1.5144003360773235, + "grad_norm": 0.05796928331255913, + "learning_rate": 1.6907981840731984e-06, + "loss": 0.0015, + "step": 236120 + }, + { + "epoch": 1.5144644729711096, + "grad_norm": 0.13216884434223175, + "learning_rate": 1.6903786260802868e-06, + "loss": 0.0022, + "step": 236130 + }, + { + "epoch": 1.5145286098648958, + "grad_norm": 0.08694811165332794, + "learning_rate": 1.6899591095592939e-06, + "loss": 0.0009, + "step": 236140 + }, + { + "epoch": 1.5145927467586817, + "grad_norm": 0.19647616147994995, + "learning_rate": 1.6895396345154812e-06, + "loss": 0.0021, + "step": 236150 + }, + { + "epoch": 1.5146568836524679, + "grad_norm": 0.025054756551980972, + "learning_rate": 1.6891202009541024e-06, + "loss": 0.0014, + "step": 236160 + }, + { + "epoch": 1.5147210205462538, + "grad_norm": 0.09283973276615143, + "learning_rate": 1.6887008088804124e-06, + "loss": 0.0015, + "step": 236170 + }, + { + "epoch": 1.51478515744004, + "grad_norm": 0.07384605705738068, + "learning_rate": 1.6882814582996693e-06, + "loss": 0.0014, + "step": 236180 + }, + { + "epoch": 1.514849294333826, + "grad_norm": 0.09666016697883606, + "learning_rate": 1.6878621492171265e-06, + "loss": 0.0006, + "step": 236190 + }, + { + "epoch": 1.5149134312276122, + "grad_norm": 0.224246546626091, + "learning_rate": 1.6874428816380379e-06, + "loss": 0.0011, + "step": 236200 + }, + { + "epoch": 1.5149775681213984, + "grad_norm": 0.05371362343430519, + "learning_rate": 1.6870236555676561e-06, + "loss": 0.0013, + "step": 236210 + }, + { + "epoch": 1.5150417050151845, + "grad_norm": 0.034396693110466, + "learning_rate": 1.6866044710112366e-06, + "loss": 0.0013, + "step": 236220 + }, + { + "epoch": 1.5151058419089705, + "grad_norm": 0.16512033343315125, + "learning_rate": 1.686185327974032e-06, + "loss": 0.0017, + "step": 236230 + }, + { + "epoch": 1.5151699788027566, + "grad_norm": 0.057726021856069565, + "learning_rate": 1.6857662264612934e-06, + "loss": 0.0005, + "step": 236240 + }, + { + "epoch": 1.5152341156965425, + "grad_norm": 0.08658675104379654, + "learning_rate": 1.6853471664782706e-06, + "loss": 0.0011, + "step": 236250 + }, + { + "epoch": 1.5152982525903287, + "grad_norm": 0.15499667823314667, + "learning_rate": 1.6849281480302187e-06, + "loss": 0.0013, + "step": 236260 + }, + { + "epoch": 1.5153623894841148, + "grad_norm": 0.09169743955135345, + "learning_rate": 1.684509171122386e-06, + "loss": 0.0019, + "step": 236270 + }, + { + "epoch": 1.515426526377901, + "grad_norm": 0.005327288061380386, + "learning_rate": 1.6840902357600235e-06, + "loss": 0.0012, + "step": 236280 + }, + { + "epoch": 1.5154906632716871, + "grad_norm": 0.04662052541971207, + "learning_rate": 1.683671341948378e-06, + "loss": 0.0015, + "step": 236290 + }, + { + "epoch": 1.5155548001654733, + "grad_norm": 0.05670848861336708, + "learning_rate": 1.6832524896927027e-06, + "loss": 0.0014, + "step": 236300 + }, + { + "epoch": 1.5156189370592594, + "grad_norm": 0.026139413937926292, + "learning_rate": 1.6828336789982442e-06, + "loss": 0.0128, + "step": 236310 + }, + { + "epoch": 1.5156830739530454, + "grad_norm": 0.10478154569864273, + "learning_rate": 1.6824149098702486e-06, + "loss": 0.0009, + "step": 236320 + }, + { + "epoch": 1.5157472108468315, + "grad_norm": 0.05814342573285103, + "learning_rate": 1.6819961823139668e-06, + "loss": 0.0009, + "step": 236330 + }, + { + "epoch": 1.5158113477406174, + "grad_norm": 0.09379421174526215, + "learning_rate": 1.6815774963346443e-06, + "loss": 0.0011, + "step": 236340 + }, + { + "epoch": 1.5158754846344036, + "grad_norm": 0.01812293566763401, + "learning_rate": 1.6811588519375276e-06, + "loss": 0.0012, + "step": 236350 + }, + { + "epoch": 1.5159396215281897, + "grad_norm": 0.09073521941900253, + "learning_rate": 1.6807402491278607e-06, + "loss": 0.0018, + "step": 236360 + }, + { + "epoch": 1.5160037584219759, + "grad_norm": 0.09301798045635223, + "learning_rate": 1.6803216879108919e-06, + "loss": 0.0008, + "step": 236370 + }, + { + "epoch": 1.516067895315762, + "grad_norm": 0.26336193084716797, + "learning_rate": 1.6799031682918654e-06, + "loss": 0.0011, + "step": 236380 + }, + { + "epoch": 1.5161320322095482, + "grad_norm": 0.02151043340563774, + "learning_rate": 1.6794846902760249e-06, + "loss": 0.0009, + "step": 236390 + }, + { + "epoch": 1.516196169103334, + "grad_norm": 0.12041262537240982, + "learning_rate": 1.6790662538686126e-06, + "loss": 0.0009, + "step": 236400 + }, + { + "epoch": 1.5162603059971203, + "grad_norm": 0.059923578053712845, + "learning_rate": 1.6786478590748756e-06, + "loss": 0.0008, + "step": 236410 + }, + { + "epoch": 1.5163244428909064, + "grad_norm": 0.053933363407850266, + "learning_rate": 1.678229505900054e-06, + "loss": 0.0014, + "step": 236420 + }, + { + "epoch": 1.5163885797846923, + "grad_norm": 0.045569125562906265, + "learning_rate": 1.6778111943493914e-06, + "loss": 0.001, + "step": 236430 + }, + { + "epoch": 1.5164527166784785, + "grad_norm": 0.12082313746213913, + "learning_rate": 1.6773929244281266e-06, + "loss": 0.002, + "step": 236440 + }, + { + "epoch": 1.5165168535722646, + "grad_norm": 0.1428079605102539, + "learning_rate": 1.676974696141505e-06, + "loss": 0.0025, + "step": 236450 + }, + { + "epoch": 1.5165809904660508, + "grad_norm": 0.19562873244285583, + "learning_rate": 1.6765565094947656e-06, + "loss": 0.0019, + "step": 236460 + }, + { + "epoch": 1.516645127359837, + "grad_norm": 0.11447757482528687, + "learning_rate": 1.6761383644931468e-06, + "loss": 0.0016, + "step": 236470 + }, + { + "epoch": 1.516709264253623, + "grad_norm": 0.10640858113765717, + "learning_rate": 1.675720261141891e-06, + "loss": 0.0009, + "step": 236480 + }, + { + "epoch": 1.516773401147409, + "grad_norm": 0.026647871360182762, + "learning_rate": 1.6753021994462366e-06, + "loss": 0.0012, + "step": 236490 + }, + { + "epoch": 1.5168375380411951, + "grad_norm": 0.08135776221752167, + "learning_rate": 1.6748841794114218e-06, + "loss": 0.0015, + "step": 236500 + }, + { + "epoch": 1.516901674934981, + "grad_norm": 0.02475298009812832, + "learning_rate": 1.6744662010426828e-06, + "loss": 0.0015, + "step": 236510 + }, + { + "epoch": 1.5169658118287672, + "grad_norm": 0.026814064010977745, + "learning_rate": 1.6740482643452605e-06, + "loss": 0.0028, + "step": 236520 + }, + { + "epoch": 1.5170299487225534, + "grad_norm": 0.08240596950054169, + "learning_rate": 1.6736303693243904e-06, + "loss": 0.0006, + "step": 236530 + }, + { + "epoch": 1.5170940856163395, + "grad_norm": 0.0356484018266201, + "learning_rate": 1.6732125159853097e-06, + "loss": 0.0013, + "step": 236540 + }, + { + "epoch": 1.5171582225101257, + "grad_norm": 0.06164219230413437, + "learning_rate": 1.6727947043332515e-06, + "loss": 0.0013, + "step": 236550 + }, + { + "epoch": 1.5172223594039118, + "grad_norm": 0.03566623479127884, + "learning_rate": 1.6723769343734552e-06, + "loss": 0.0013, + "step": 236560 + }, + { + "epoch": 1.517286496297698, + "grad_norm": 0.061721839010715485, + "learning_rate": 1.6719592061111545e-06, + "loss": 0.001, + "step": 236570 + }, + { + "epoch": 1.5173506331914839, + "grad_norm": 0.1656349152326584, + "learning_rate": 1.6715415195515838e-06, + "loss": 0.0014, + "step": 236580 + }, + { + "epoch": 1.51741477008527, + "grad_norm": 0.053992122411727905, + "learning_rate": 1.6711238746999742e-06, + "loss": 0.0007, + "step": 236590 + }, + { + "epoch": 1.517478906979056, + "grad_norm": 0.0747436061501503, + "learning_rate": 1.6707062715615635e-06, + "loss": 0.0008, + "step": 236600 + }, + { + "epoch": 1.5175430438728421, + "grad_norm": 0.06978696584701538, + "learning_rate": 1.6702887101415833e-06, + "loss": 0.0022, + "step": 236610 + }, + { + "epoch": 1.5176071807666283, + "grad_norm": 0.15435577929019928, + "learning_rate": 1.6698711904452625e-06, + "loss": 0.0014, + "step": 236620 + }, + { + "epoch": 1.5176713176604144, + "grad_norm": 0.077399343252182, + "learning_rate": 1.6694537124778383e-06, + "loss": 0.0004, + "step": 236630 + }, + { + "epoch": 1.5177354545542006, + "grad_norm": 0.17518320679664612, + "learning_rate": 1.669036276244539e-06, + "loss": 0.0021, + "step": 236640 + }, + { + "epoch": 1.5177995914479867, + "grad_norm": 0.047856125980615616, + "learning_rate": 1.6686188817505955e-06, + "loss": 0.0102, + "step": 236650 + }, + { + "epoch": 1.5178637283417726, + "grad_norm": 0.06831234693527222, + "learning_rate": 1.6682015290012372e-06, + "loss": 0.0018, + "step": 236660 + }, + { + "epoch": 1.5179278652355588, + "grad_norm": 0.048504315316677094, + "learning_rate": 1.6677842180016963e-06, + "loss": 0.001, + "step": 236670 + }, + { + "epoch": 1.5179920021293447, + "grad_norm": 0.017820101231336594, + "learning_rate": 1.6673669487572003e-06, + "loss": 0.0017, + "step": 236680 + }, + { + "epoch": 1.5180561390231309, + "grad_norm": 0.07297179102897644, + "learning_rate": 1.6669497212729785e-06, + "loss": 0.0007, + "step": 236690 + }, + { + "epoch": 1.518120275916917, + "grad_norm": 0.027433795854449272, + "learning_rate": 1.6665325355542573e-06, + "loss": 0.0009, + "step": 236700 + }, + { + "epoch": 1.5181844128107032, + "grad_norm": 0.0877075120806694, + "learning_rate": 1.6661153916062656e-06, + "loss": 0.0019, + "step": 236710 + }, + { + "epoch": 1.5182485497044893, + "grad_norm": 0.25056546926498413, + "learning_rate": 1.6656982894342327e-06, + "loss": 0.001, + "step": 236720 + }, + { + "epoch": 1.5183126865982755, + "grad_norm": 0.14040808379650116, + "learning_rate": 1.6652812290433829e-06, + "loss": 0.0012, + "step": 236730 + }, + { + "epoch": 1.5183768234920616, + "grad_norm": 0.15343688428401947, + "learning_rate": 1.6648642104389413e-06, + "loss": 0.0009, + "step": 236740 + }, + { + "epoch": 1.5184409603858475, + "grad_norm": 0.13074518740177155, + "learning_rate": 1.6644472336261363e-06, + "loss": 0.0013, + "step": 236750 + }, + { + "epoch": 1.5185050972796337, + "grad_norm": 0.03336082026362419, + "learning_rate": 1.6640302986101908e-06, + "loss": 0.0033, + "step": 236760 + }, + { + "epoch": 1.5185692341734196, + "grad_norm": 0.10020921379327774, + "learning_rate": 1.6636134053963304e-06, + "loss": 0.0011, + "step": 236770 + }, + { + "epoch": 1.5186333710672058, + "grad_norm": 0.1573495715856552, + "learning_rate": 1.6631965539897765e-06, + "loss": 0.002, + "step": 236780 + }, + { + "epoch": 1.518697507960992, + "grad_norm": 0.03277795761823654, + "learning_rate": 1.6627797443957567e-06, + "loss": 0.0006, + "step": 236790 + }, + { + "epoch": 1.518761644854778, + "grad_norm": 0.03879289701581001, + "learning_rate": 1.6623629766194914e-06, + "loss": 0.0022, + "step": 236800 + }, + { + "epoch": 1.5188257817485642, + "grad_norm": 0.13124696910381317, + "learning_rate": 1.6619462506662037e-06, + "loss": 0.0012, + "step": 236810 + }, + { + "epoch": 1.5188899186423503, + "grad_norm": 0.08229606598615646, + "learning_rate": 1.661529566541113e-06, + "loss": 0.0023, + "step": 236820 + }, + { + "epoch": 1.5189540555361365, + "grad_norm": 0.07102247327566147, + "learning_rate": 1.661112924249445e-06, + "loss": 0.0008, + "step": 236830 + }, + { + "epoch": 1.5190181924299224, + "grad_norm": 0.04948734492063522, + "learning_rate": 1.660696323796418e-06, + "loss": 0.0017, + "step": 236840 + }, + { + "epoch": 1.5190823293237086, + "grad_norm": 0.013510667718946934, + "learning_rate": 1.6602797651872527e-06, + "loss": 0.0008, + "step": 236850 + }, + { + "epoch": 1.5191464662174945, + "grad_norm": 0.0725022703409195, + "learning_rate": 1.659863248427167e-06, + "loss": 0.0007, + "step": 236860 + }, + { + "epoch": 1.5192106031112806, + "grad_norm": 0.08922433108091354, + "learning_rate": 1.6594467735213838e-06, + "loss": 0.0007, + "step": 236870 + }, + { + "epoch": 1.5192747400050668, + "grad_norm": 0.013453935272991657, + "learning_rate": 1.6590303404751195e-06, + "loss": 0.001, + "step": 236880 + }, + { + "epoch": 1.519338876898853, + "grad_norm": 0.0375833734869957, + "learning_rate": 1.6586139492935922e-06, + "loss": 0.0015, + "step": 236890 + }, + { + "epoch": 1.519403013792639, + "grad_norm": 0.14216962456703186, + "learning_rate": 1.658197599982021e-06, + "loss": 0.0021, + "step": 236900 + }, + { + "epoch": 1.5194671506864252, + "grad_norm": 0.08312832564115524, + "learning_rate": 1.6577812925456221e-06, + "loss": 0.0015, + "step": 236910 + }, + { + "epoch": 1.5195312875802112, + "grad_norm": 0.0007432155543938279, + "learning_rate": 1.6573650269896125e-06, + "loss": 0.001, + "step": 236920 + }, + { + "epoch": 1.5195954244739973, + "grad_norm": 0.13070756196975708, + "learning_rate": 1.6569488033192066e-06, + "loss": 0.001, + "step": 236930 + }, + { + "epoch": 1.5196595613677832, + "grad_norm": 0.016720326617360115, + "learning_rate": 1.6565326215396227e-06, + "loss": 0.0016, + "step": 236940 + }, + { + "epoch": 1.5197236982615694, + "grad_norm": 0.03852033615112305, + "learning_rate": 1.656116481656075e-06, + "loss": 0.001, + "step": 236950 + }, + { + "epoch": 1.5197878351553555, + "grad_norm": 0.24319837987422943, + "learning_rate": 1.6557003836737773e-06, + "loss": 0.0019, + "step": 236960 + }, + { + "epoch": 1.5198519720491417, + "grad_norm": 0.02781999483704567, + "learning_rate": 1.6552843275979424e-06, + "loss": 0.0008, + "step": 236970 + }, + { + "epoch": 1.5199161089429278, + "grad_norm": 0.1802973598241806, + "learning_rate": 1.6548683134337873e-06, + "loss": 0.0019, + "step": 236980 + }, + { + "epoch": 1.519980245836714, + "grad_norm": 0.02273796685039997, + "learning_rate": 1.6544523411865226e-06, + "loss": 0.0015, + "step": 236990 + }, + { + "epoch": 1.5200443827305001, + "grad_norm": 0.03507988527417183, + "learning_rate": 1.654036410861361e-06, + "loss": 0.0021, + "step": 237000 + }, + { + "epoch": 1.520108519624286, + "grad_norm": 0.07036600261926651, + "learning_rate": 1.6536205224635132e-06, + "loss": 0.0021, + "step": 237010 + }, + { + "epoch": 1.5201726565180722, + "grad_norm": 0.06622333824634552, + "learning_rate": 1.6532046759981934e-06, + "loss": 0.0004, + "step": 237020 + }, + { + "epoch": 1.5202367934118581, + "grad_norm": 0.06492827832698822, + "learning_rate": 1.652788871470611e-06, + "loss": 0.0011, + "step": 237030 + }, + { + "epoch": 1.5203009303056443, + "grad_norm": 0.09560419619083405, + "learning_rate": 1.652373108885974e-06, + "loss": 0.0025, + "step": 237040 + }, + { + "epoch": 1.5203650671994304, + "grad_norm": 0.3649875223636627, + "learning_rate": 1.6519573882494966e-06, + "loss": 0.0021, + "step": 237050 + }, + { + "epoch": 1.5204292040932166, + "grad_norm": 0.07574035227298737, + "learning_rate": 1.6515417095663861e-06, + "loss": 0.0016, + "step": 237060 + }, + { + "epoch": 1.5204933409870027, + "grad_norm": 0.006303989328444004, + "learning_rate": 1.651126072841851e-06, + "loss": 0.0011, + "step": 237070 + }, + { + "epoch": 1.5205574778807889, + "grad_norm": 0.05555268004536629, + "learning_rate": 1.650710478081098e-06, + "loss": 0.0011, + "step": 237080 + }, + { + "epoch": 1.5206216147745748, + "grad_norm": 0.33239391446113586, + "learning_rate": 1.6502949252893374e-06, + "loss": 0.0026, + "step": 237090 + }, + { + "epoch": 1.520685751668361, + "grad_norm": 0.02349112741649151, + "learning_rate": 1.6498794144717761e-06, + "loss": 0.003, + "step": 237100 + }, + { + "epoch": 1.520749888562147, + "grad_norm": 0.13826599717140198, + "learning_rate": 1.6494639456336198e-06, + "loss": 0.001, + "step": 237110 + }, + { + "epoch": 1.520814025455933, + "grad_norm": 0.04017220437526703, + "learning_rate": 1.6490485187800731e-06, + "loss": 0.001, + "step": 237120 + }, + { + "epoch": 1.5208781623497192, + "grad_norm": 0.05414574220776558, + "learning_rate": 1.6486331339163452e-06, + "loss": 0.0017, + "step": 237130 + }, + { + "epoch": 1.5209422992435053, + "grad_norm": 0.03123663365840912, + "learning_rate": 1.6482177910476393e-06, + "loss": 0.001, + "step": 237140 + }, + { + "epoch": 1.5210064361372915, + "grad_norm": 0.054575365036726, + "learning_rate": 1.6478024901791583e-06, + "loss": 0.0011, + "step": 237150 + }, + { + "epoch": 1.5210705730310776, + "grad_norm": 0.25057604908943176, + "learning_rate": 1.6473872313161094e-06, + "loss": 0.0015, + "step": 237160 + }, + { + "epoch": 1.5211347099248638, + "grad_norm": 0.05438575521111488, + "learning_rate": 1.6469720144636946e-06, + "loss": 0.0012, + "step": 237170 + }, + { + "epoch": 1.5211988468186497, + "grad_norm": 0.13939109444618225, + "learning_rate": 1.646556839627117e-06, + "loss": 0.0018, + "step": 237180 + }, + { + "epoch": 1.5212629837124358, + "grad_norm": 0.19489020109176636, + "learning_rate": 1.646141706811577e-06, + "loss": 0.0014, + "step": 237190 + }, + { + "epoch": 1.5213271206062218, + "grad_norm": 0.04001326486468315, + "learning_rate": 1.64572661602228e-06, + "loss": 0.0011, + "step": 237200 + }, + { + "epoch": 1.521391257500008, + "grad_norm": 0.07058380544185638, + "learning_rate": 1.645311567264426e-06, + "loss": 0.001, + "step": 237210 + }, + { + "epoch": 1.521455394393794, + "grad_norm": 0.14475677907466888, + "learning_rate": 1.6448965605432155e-06, + "loss": 0.0017, + "step": 237220 + }, + { + "epoch": 1.5215195312875802, + "grad_norm": 0.20844435691833496, + "learning_rate": 1.6444815958638471e-06, + "loss": 0.001, + "step": 237230 + }, + { + "epoch": 1.5215836681813664, + "grad_norm": 0.09307936578989029, + "learning_rate": 1.6440666732315248e-06, + "loss": 0.0009, + "step": 237240 + }, + { + "epoch": 1.5216478050751525, + "grad_norm": 0.1734868884086609, + "learning_rate": 1.6436517926514444e-06, + "loss": 0.0012, + "step": 237250 + }, + { + "epoch": 1.5217119419689387, + "grad_norm": 0.02707342617213726, + "learning_rate": 1.6432369541288068e-06, + "loss": 0.0007, + "step": 237260 + }, + { + "epoch": 1.5217760788627246, + "grad_norm": 0.0013223428977653384, + "learning_rate": 1.642822157668807e-06, + "loss": 0.0005, + "step": 237270 + }, + { + "epoch": 1.5218402157565107, + "grad_norm": 0.10929550230503082, + "learning_rate": 1.6424074032766469e-06, + "loss": 0.0063, + "step": 237280 + }, + { + "epoch": 1.5219043526502967, + "grad_norm": 0.03251928463578224, + "learning_rate": 1.6419926909575217e-06, + "loss": 0.0013, + "step": 237290 + }, + { + "epoch": 1.5219684895440828, + "grad_norm": 0.037062376737594604, + "learning_rate": 1.641578020716626e-06, + "loss": 0.0012, + "step": 237300 + }, + { + "epoch": 1.522032626437869, + "grad_norm": 0.14792048931121826, + "learning_rate": 1.6411633925591597e-06, + "loss": 0.0027, + "step": 237310 + }, + { + "epoch": 1.5220967633316551, + "grad_norm": 0.04708145186305046, + "learning_rate": 1.6407488064903165e-06, + "loss": 0.0022, + "step": 237320 + }, + { + "epoch": 1.5221609002254413, + "grad_norm": 0.025157131254673004, + "learning_rate": 1.6403342625152918e-06, + "loss": 0.0014, + "step": 237330 + }, + { + "epoch": 1.5222250371192274, + "grad_norm": 0.04621124267578125, + "learning_rate": 1.6399197606392787e-06, + "loss": 0.0006, + "step": 237340 + }, + { + "epoch": 1.5222891740130133, + "grad_norm": 0.18020835518836975, + "learning_rate": 1.6395053008674732e-06, + "loss": 0.0018, + "step": 237350 + }, + { + "epoch": 1.5223533109067995, + "grad_norm": 0.02694026753306389, + "learning_rate": 1.6390908832050683e-06, + "loss": 0.0023, + "step": 237360 + }, + { + "epoch": 1.5224174478005854, + "grad_norm": 0.021097462624311447, + "learning_rate": 1.638676507657257e-06, + "loss": 0.0011, + "step": 237370 + }, + { + "epoch": 1.5224815846943716, + "grad_norm": 0.1032140851020813, + "learning_rate": 1.6382621742292292e-06, + "loss": 0.001, + "step": 237380 + }, + { + "epoch": 1.5225457215881577, + "grad_norm": 0.026876067742705345, + "learning_rate": 1.6378478829261806e-06, + "loss": 0.001, + "step": 237390 + }, + { + "epoch": 1.5226098584819439, + "grad_norm": 0.042561158537864685, + "learning_rate": 1.6374336337533015e-06, + "loss": 0.0004, + "step": 237400 + }, + { + "epoch": 1.52267399537573, + "grad_norm": 0.06443782150745392, + "learning_rate": 1.637019426715782e-06, + "loss": 0.0013, + "step": 237410 + }, + { + "epoch": 1.5227381322695162, + "grad_norm": 0.0814862996339798, + "learning_rate": 1.6366052618188105e-06, + "loss": 0.0006, + "step": 237420 + }, + { + "epoch": 1.5228022691633023, + "grad_norm": 0.09525315463542938, + "learning_rate": 1.6361911390675806e-06, + "loss": 0.0011, + "step": 237430 + }, + { + "epoch": 1.5228664060570882, + "grad_norm": 0.27986252307891846, + "learning_rate": 1.63577705846728e-06, + "loss": 0.0005, + "step": 237440 + }, + { + "epoch": 1.5229305429508744, + "grad_norm": 0.043385230004787445, + "learning_rate": 1.6353630200230951e-06, + "loss": 0.0012, + "step": 237450 + }, + { + "epoch": 1.5229946798446603, + "grad_norm": 0.05866341292858124, + "learning_rate": 1.634949023740217e-06, + "loss": 0.0016, + "step": 237460 + }, + { + "epoch": 1.5230588167384465, + "grad_norm": 0.06352284550666809, + "learning_rate": 1.6345350696238339e-06, + "loss": 0.0016, + "step": 237470 + }, + { + "epoch": 1.5231229536322326, + "grad_norm": 0.01987922377884388, + "learning_rate": 1.6341211576791317e-06, + "loss": 0.0011, + "step": 237480 + }, + { + "epoch": 1.5231870905260187, + "grad_norm": 0.28827333450317383, + "learning_rate": 1.633707287911297e-06, + "loss": 0.0015, + "step": 237490 + }, + { + "epoch": 1.523251227419805, + "grad_norm": 0.018926220014691353, + "learning_rate": 1.633293460325514e-06, + "loss": 0.0014, + "step": 237500 + }, + { + "epoch": 1.523315364313591, + "grad_norm": 0.14290401339530945, + "learning_rate": 1.6328796749269715e-06, + "loss": 0.0015, + "step": 237510 + }, + { + "epoch": 1.523379501207377, + "grad_norm": 0.040627621114254, + "learning_rate": 1.6324659317208536e-06, + "loss": 0.0011, + "step": 237520 + }, + { + "epoch": 1.5234436381011631, + "grad_norm": 0.11878825724124908, + "learning_rate": 1.6320522307123443e-06, + "loss": 0.0007, + "step": 237530 + }, + { + "epoch": 1.5235077749949493, + "grad_norm": 0.07322997599840164, + "learning_rate": 1.6316385719066258e-06, + "loss": 0.0012, + "step": 237540 + }, + { + "epoch": 1.5235719118887352, + "grad_norm": 0.07629713416099548, + "learning_rate": 1.631224955308885e-06, + "loss": 0.0021, + "step": 237550 + }, + { + "epoch": 1.5236360487825213, + "grad_norm": 0.11985758692026138, + "learning_rate": 1.6308113809243026e-06, + "loss": 0.0016, + "step": 237560 + }, + { + "epoch": 1.5237001856763075, + "grad_norm": 0.06853505223989487, + "learning_rate": 1.6303978487580607e-06, + "loss": 0.0011, + "step": 237570 + }, + { + "epoch": 1.5237643225700936, + "grad_norm": 0.06833287328481674, + "learning_rate": 1.629984358815343e-06, + "loss": 0.0005, + "step": 237580 + }, + { + "epoch": 1.5238284594638798, + "grad_norm": 0.07232213020324707, + "learning_rate": 1.6295709111013302e-06, + "loss": 0.0006, + "step": 237590 + }, + { + "epoch": 1.523892596357666, + "grad_norm": 0.04098551347851753, + "learning_rate": 1.6291575056212029e-06, + "loss": 0.001, + "step": 237600 + }, + { + "epoch": 1.5239567332514519, + "grad_norm": 0.16344477236270905, + "learning_rate": 1.6287441423801386e-06, + "loss": 0.0019, + "step": 237610 + }, + { + "epoch": 1.524020870145238, + "grad_norm": 0.003833626862615347, + "learning_rate": 1.628330821383322e-06, + "loss": 0.001, + "step": 237620 + }, + { + "epoch": 1.524085007039024, + "grad_norm": 0.13997580111026764, + "learning_rate": 1.6279175426359295e-06, + "loss": 0.0017, + "step": 237630 + }, + { + "epoch": 1.52414914393281, + "grad_norm": 0.15628334879875183, + "learning_rate": 1.6275043061431406e-06, + "loss": 0.0006, + "step": 237640 + }, + { + "epoch": 1.5242132808265962, + "grad_norm": 0.20656491816043854, + "learning_rate": 1.6270911119101313e-06, + "loss": 0.0017, + "step": 237650 + }, + { + "epoch": 1.5242774177203824, + "grad_norm": 0.03471900895237923, + "learning_rate": 1.6266779599420823e-06, + "loss": 0.0013, + "step": 237660 + }, + { + "epoch": 1.5243415546141685, + "grad_norm": 0.15333348512649536, + "learning_rate": 1.6262648502441697e-06, + "loss": 0.0009, + "step": 237670 + }, + { + "epoch": 1.5244056915079547, + "grad_norm": 0.06840582937002182, + "learning_rate": 1.6258517828215692e-06, + "loss": 0.001, + "step": 237680 + }, + { + "epoch": 1.5244698284017408, + "grad_norm": 0.01534581370651722, + "learning_rate": 1.6254387576794562e-06, + "loss": 0.0007, + "step": 237690 + }, + { + "epoch": 1.5245339652955268, + "grad_norm": 0.1612095683813095, + "learning_rate": 1.6250257748230086e-06, + "loss": 0.0014, + "step": 237700 + }, + { + "epoch": 1.524598102189313, + "grad_norm": 0.06327960640192032, + "learning_rate": 1.6246128342574002e-06, + "loss": 0.0022, + "step": 237710 + }, + { + "epoch": 1.5246622390830988, + "grad_norm": 0.03815099224448204, + "learning_rate": 1.624199935987804e-06, + "loss": 0.0008, + "step": 237720 + }, + { + "epoch": 1.524726375976885, + "grad_norm": 0.10183555632829666, + "learning_rate": 1.6237870800193966e-06, + "loss": 0.0019, + "step": 237730 + }, + { + "epoch": 1.5247905128706711, + "grad_norm": 0.30045872926712036, + "learning_rate": 1.6233742663573503e-06, + "loss": 0.0024, + "step": 237740 + }, + { + "epoch": 1.5248546497644573, + "grad_norm": 0.029955746605992317, + "learning_rate": 1.6229614950068374e-06, + "loss": 0.001, + "step": 237750 + }, + { + "epoch": 1.5249187866582434, + "grad_norm": 0.01774410903453827, + "learning_rate": 1.6225487659730288e-06, + "loss": 0.0026, + "step": 237760 + }, + { + "epoch": 1.5249829235520296, + "grad_norm": 0.07637742906808853, + "learning_rate": 1.6221360792611002e-06, + "loss": 0.0009, + "step": 237770 + }, + { + "epoch": 1.5250470604458155, + "grad_norm": 0.05893915146589279, + "learning_rate": 1.6217234348762202e-06, + "loss": 0.0016, + "step": 237780 + }, + { + "epoch": 1.5251111973396017, + "grad_norm": 0.025320112705230713, + "learning_rate": 1.62131083282356e-06, + "loss": 0.0012, + "step": 237790 + }, + { + "epoch": 1.5251753342333876, + "grad_norm": 0.24433287978172302, + "learning_rate": 1.6208982731082879e-06, + "loss": 0.0014, + "step": 237800 + }, + { + "epoch": 1.5252394711271737, + "grad_norm": 0.12436637282371521, + "learning_rate": 1.6204857557355775e-06, + "loss": 0.0011, + "step": 237810 + }, + { + "epoch": 1.5253036080209599, + "grad_norm": 0.11953530460596085, + "learning_rate": 1.6200732807105956e-06, + "loss": 0.0023, + "step": 237820 + }, + { + "epoch": 1.525367744914746, + "grad_norm": 0.06200635805726051, + "learning_rate": 1.6196608480385112e-06, + "loss": 0.0009, + "step": 237830 + }, + { + "epoch": 1.5254318818085322, + "grad_norm": 0.14688628911972046, + "learning_rate": 1.6192484577244904e-06, + "loss": 0.0012, + "step": 237840 + }, + { + "epoch": 1.5254960187023183, + "grad_norm": 0.0018314635381102562, + "learning_rate": 1.6188361097737043e-06, + "loss": 0.0008, + "step": 237850 + }, + { + "epoch": 1.5255601555961045, + "grad_norm": 0.05635478347539902, + "learning_rate": 1.6184238041913187e-06, + "loss": 0.001, + "step": 237860 + }, + { + "epoch": 1.5256242924898904, + "grad_norm": 0.0111836614087224, + "learning_rate": 1.6180115409824976e-06, + "loss": 0.0013, + "step": 237870 + }, + { + "epoch": 1.5256884293836765, + "grad_norm": 0.004777786787599325, + "learning_rate": 1.6175993201524103e-06, + "loss": 0.0012, + "step": 237880 + }, + { + "epoch": 1.5257525662774625, + "grad_norm": 0.08003873378038406, + "learning_rate": 1.6171871417062218e-06, + "loss": 0.0017, + "step": 237890 + }, + { + "epoch": 1.5258167031712486, + "grad_norm": 0.024621382355690002, + "learning_rate": 1.6167750056490955e-06, + "loss": 0.0021, + "step": 237900 + }, + { + "epoch": 1.5258808400650348, + "grad_norm": 0.08933726698160172, + "learning_rate": 1.6163629119861945e-06, + "loss": 0.0013, + "step": 237910 + }, + { + "epoch": 1.525944976958821, + "grad_norm": 0.038684818893671036, + "learning_rate": 1.6159508607226865e-06, + "loss": 0.0009, + "step": 237920 + }, + { + "epoch": 1.526009113852607, + "grad_norm": 0.1163448691368103, + "learning_rate": 1.6155388518637327e-06, + "loss": 0.0011, + "step": 237930 + }, + { + "epoch": 1.5260732507463932, + "grad_norm": 0.07280529290437698, + "learning_rate": 1.6151268854144958e-06, + "loss": 0.0012, + "step": 237940 + }, + { + "epoch": 1.5261373876401791, + "grad_norm": 0.09436652809381485, + "learning_rate": 1.614714961380136e-06, + "loss": 0.0008, + "step": 237950 + }, + { + "epoch": 1.5262015245339653, + "grad_norm": 0.039469748735427856, + "learning_rate": 1.6143030797658194e-06, + "loss": 0.0021, + "step": 237960 + }, + { + "epoch": 1.5262656614277514, + "grad_norm": 0.009606624953448772, + "learning_rate": 1.6138912405767048e-06, + "loss": 0.0004, + "step": 237970 + }, + { + "epoch": 1.5263297983215374, + "grad_norm": 0.18109820783138275, + "learning_rate": 1.6134794438179534e-06, + "loss": 0.002, + "step": 237980 + }, + { + "epoch": 1.5263939352153235, + "grad_norm": 0.1913572996854782, + "learning_rate": 1.6130676894947228e-06, + "loss": 0.0026, + "step": 237990 + }, + { + "epoch": 1.5264580721091097, + "grad_norm": 0.027836723253130913, + "learning_rate": 1.6126559776121764e-06, + "loss": 0.0008, + "step": 238000 + }, + { + "epoch": 1.5265222090028958, + "grad_norm": 0.11762955784797668, + "learning_rate": 1.6122443081754713e-06, + "loss": 0.0009, + "step": 238010 + }, + { + "epoch": 1.526586345896682, + "grad_norm": 0.06743264943361282, + "learning_rate": 1.611832681189765e-06, + "loss": 0.0015, + "step": 238020 + }, + { + "epoch": 1.526650482790468, + "grad_norm": 0.005941579584032297, + "learning_rate": 1.611421096660218e-06, + "loss": 0.0012, + "step": 238030 + }, + { + "epoch": 1.526714619684254, + "grad_norm": 0.13325421512126923, + "learning_rate": 1.6110095545919863e-06, + "loss": 0.0016, + "step": 238040 + }, + { + "epoch": 1.5267787565780402, + "grad_norm": 0.032834045588970184, + "learning_rate": 1.6105980549902273e-06, + "loss": 0.0006, + "step": 238050 + }, + { + "epoch": 1.5268428934718261, + "grad_norm": 0.09828424453735352, + "learning_rate": 1.610186597860095e-06, + "loss": 0.0011, + "step": 238060 + }, + { + "epoch": 1.5269070303656123, + "grad_norm": 0.11012592911720276, + "learning_rate": 1.6097751832067492e-06, + "loss": 0.0016, + "step": 238070 + }, + { + "epoch": 1.5269711672593984, + "grad_norm": 0.10598249733448029, + "learning_rate": 1.6093638110353432e-06, + "loss": 0.0011, + "step": 238080 + }, + { + "epoch": 1.5270353041531846, + "grad_norm": 0.007695229258388281, + "learning_rate": 1.6089524813510321e-06, + "loss": 0.0009, + "step": 238090 + }, + { + "epoch": 1.5270994410469707, + "grad_norm": 0.2664151191711426, + "learning_rate": 1.6085411941589685e-06, + "loss": 0.0028, + "step": 238100 + }, + { + "epoch": 1.5271635779407569, + "grad_norm": 0.14196178317070007, + "learning_rate": 1.6081299494643087e-06, + "loss": 0.0017, + "step": 238110 + }, + { + "epoch": 1.527227714834543, + "grad_norm": 0.1498994082212448, + "learning_rate": 1.607718747272205e-06, + "loss": 0.0007, + "step": 238120 + }, + { + "epoch": 1.527291851728329, + "grad_norm": 0.10175507515668869, + "learning_rate": 1.6073075875878097e-06, + "loss": 0.0053, + "step": 238130 + }, + { + "epoch": 1.527355988622115, + "grad_norm": 0.007635892368853092, + "learning_rate": 1.6068964704162736e-06, + "loss": 0.0018, + "step": 238140 + }, + { + "epoch": 1.527420125515901, + "grad_norm": 0.022415148094296455, + "learning_rate": 1.6064853957627513e-06, + "loss": 0.0009, + "step": 238150 + }, + { + "epoch": 1.5274842624096872, + "grad_norm": 0.18747855722904205, + "learning_rate": 1.6060743636323923e-06, + "loss": 0.0016, + "step": 238160 + }, + { + "epoch": 1.5275483993034733, + "grad_norm": 0.126417875289917, + "learning_rate": 1.6056633740303456e-06, + "loss": 0.0016, + "step": 238170 + }, + { + "epoch": 1.5276125361972595, + "grad_norm": 0.1865348368883133, + "learning_rate": 1.6052524269617641e-06, + "loss": 0.0019, + "step": 238180 + }, + { + "epoch": 1.5276766730910456, + "grad_norm": 0.20272643864154816, + "learning_rate": 1.604841522431796e-06, + "loss": 0.0011, + "step": 238190 + }, + { + "epoch": 1.5277408099848317, + "grad_norm": 0.08572908490896225, + "learning_rate": 1.60443066044559e-06, + "loss": 0.0015, + "step": 238200 + }, + { + "epoch": 1.5278049468786177, + "grad_norm": 0.09908934682607651, + "learning_rate": 1.6040198410082936e-06, + "loss": 0.0028, + "step": 238210 + }, + { + "epoch": 1.5278690837724038, + "grad_norm": 0.009173417463898659, + "learning_rate": 1.6036090641250568e-06, + "loss": 0.002, + "step": 238220 + }, + { + "epoch": 1.5279332206661898, + "grad_norm": 0.04009650647640228, + "learning_rate": 1.603198329801024e-06, + "loss": 0.0011, + "step": 238230 + }, + { + "epoch": 1.527997357559976, + "grad_norm": 0.08476738631725311, + "learning_rate": 1.6027876380413453e-06, + "loss": 0.0011, + "step": 238240 + }, + { + "epoch": 1.528061494453762, + "grad_norm": 0.19845592975616455, + "learning_rate": 1.602376988851166e-06, + "loss": 0.0016, + "step": 238250 + }, + { + "epoch": 1.5281256313475482, + "grad_norm": 0.09100116789340973, + "learning_rate": 1.601966382235629e-06, + "loss": 0.0014, + "step": 238260 + }, + { + "epoch": 1.5281897682413343, + "grad_norm": 0.019955476745963097, + "learning_rate": 1.6015558181998842e-06, + "loss": 0.0009, + "step": 238270 + }, + { + "epoch": 1.5282539051351205, + "grad_norm": 0.07049417495727539, + "learning_rate": 1.6011452967490732e-06, + "loss": 0.0016, + "step": 238280 + }, + { + "epoch": 1.5283180420289066, + "grad_norm": 0.01399952918291092, + "learning_rate": 1.6007348178883391e-06, + "loss": 0.0018, + "step": 238290 + }, + { + "epoch": 1.5283821789226926, + "grad_norm": 0.01669745333492756, + "learning_rate": 1.6003243816228292e-06, + "loss": 0.0007, + "step": 238300 + }, + { + "epoch": 1.5284463158164787, + "grad_norm": 0.07379358261823654, + "learning_rate": 1.5999139879576842e-06, + "loss": 0.0015, + "step": 238310 + }, + { + "epoch": 1.5285104527102646, + "grad_norm": 0.014796216040849686, + "learning_rate": 1.5995036368980472e-06, + "loss": 0.0006, + "step": 238320 + }, + { + "epoch": 1.5285745896040508, + "grad_norm": 0.09059396386146545, + "learning_rate": 1.599093328449058e-06, + "loss": 0.001, + "step": 238330 + }, + { + "epoch": 1.528638726497837, + "grad_norm": 0.39618799090385437, + "learning_rate": 1.5986830626158618e-06, + "loss": 0.003, + "step": 238340 + }, + { + "epoch": 1.528702863391623, + "grad_norm": 0.021616334095597267, + "learning_rate": 1.598272839403598e-06, + "loss": 0.0006, + "step": 238350 + }, + { + "epoch": 1.5287670002854092, + "grad_norm": 0.0020888724830001593, + "learning_rate": 1.5978626588174061e-06, + "loss": 0.0007, + "step": 238360 + }, + { + "epoch": 1.5288311371791954, + "grad_norm": 0.012589544989168644, + "learning_rate": 1.5974525208624253e-06, + "loss": 0.0022, + "step": 238370 + }, + { + "epoch": 1.5288952740729815, + "grad_norm": 0.004906138405203819, + "learning_rate": 1.5970424255437977e-06, + "loss": 0.0014, + "step": 238380 + }, + { + "epoch": 1.5289594109667675, + "grad_norm": 0.0733562558889389, + "learning_rate": 1.5966323728666606e-06, + "loss": 0.0008, + "step": 238390 + }, + { + "epoch": 1.5290235478605536, + "grad_norm": 0.03818623349070549, + "learning_rate": 1.5962223628361523e-06, + "loss": 0.0016, + "step": 238400 + }, + { + "epoch": 1.5290876847543395, + "grad_norm": 0.0623963326215744, + "learning_rate": 1.5958123954574084e-06, + "loss": 0.001, + "step": 238410 + }, + { + "epoch": 1.5291518216481257, + "grad_norm": 0.08566606044769287, + "learning_rate": 1.5954024707355703e-06, + "loss": 0.0009, + "step": 238420 + }, + { + "epoch": 1.5292159585419118, + "grad_norm": 0.036736611276865005, + "learning_rate": 1.5949925886757722e-06, + "loss": 0.0012, + "step": 238430 + }, + { + "epoch": 1.529280095435698, + "grad_norm": 0.05121005326509476, + "learning_rate": 1.5945827492831484e-06, + "loss": 0.0014, + "step": 238440 + }, + { + "epoch": 1.5293442323294841, + "grad_norm": 0.07882276922464371, + "learning_rate": 1.594172952562839e-06, + "loss": 0.0013, + "step": 238450 + }, + { + "epoch": 1.5294083692232703, + "grad_norm": 0.10243957489728928, + "learning_rate": 1.5937631985199764e-06, + "loss": 0.0065, + "step": 238460 + }, + { + "epoch": 1.5294725061170562, + "grad_norm": 0.11681561917066574, + "learning_rate": 1.5933534871596952e-06, + "loss": 0.0012, + "step": 238470 + }, + { + "epoch": 1.5295366430108424, + "grad_norm": 0.026318980380892754, + "learning_rate": 1.5929438184871277e-06, + "loss": 0.0012, + "step": 238480 + }, + { + "epoch": 1.5296007799046283, + "grad_norm": 0.05359599366784096, + "learning_rate": 1.5925341925074112e-06, + "loss": 0.0052, + "step": 238490 + }, + { + "epoch": 1.5296649167984144, + "grad_norm": 0.054443489760160446, + "learning_rate": 1.5921246092256758e-06, + "loss": 0.0008, + "step": 238500 + }, + { + "epoch": 1.5297290536922006, + "grad_norm": 0.003965005744248629, + "learning_rate": 1.591715068647055e-06, + "loss": 0.0005, + "step": 238510 + }, + { + "epoch": 1.5297931905859867, + "grad_norm": 0.034113626927137375, + "learning_rate": 1.5913055707766788e-06, + "loss": 0.0019, + "step": 238520 + }, + { + "epoch": 1.5298573274797729, + "grad_norm": 0.012696630321443081, + "learning_rate": 1.5908961156196818e-06, + "loss": 0.0011, + "step": 238530 + }, + { + "epoch": 1.529921464373559, + "grad_norm": 0.13981802761554718, + "learning_rate": 1.5904867031811926e-06, + "loss": 0.001, + "step": 238540 + }, + { + "epoch": 1.5299856012673452, + "grad_norm": 0.10802409797906876, + "learning_rate": 1.5900773334663417e-06, + "loss": 0.0008, + "step": 238550 + }, + { + "epoch": 1.530049738161131, + "grad_norm": 0.07713709026575089, + "learning_rate": 1.5896680064802573e-06, + "loss": 0.0015, + "step": 238560 + }, + { + "epoch": 1.5301138750549172, + "grad_norm": 0.08833787590265274, + "learning_rate": 1.589258722228072e-06, + "loss": 0.0012, + "step": 238570 + }, + { + "epoch": 1.5301780119487032, + "grad_norm": 0.02495083026587963, + "learning_rate": 1.5888494807149118e-06, + "loss": 0.0007, + "step": 238580 + }, + { + "epoch": 1.5302421488424893, + "grad_norm": 0.12479456514120102, + "learning_rate": 1.5884402819459044e-06, + "loss": 0.001, + "step": 238590 + }, + { + "epoch": 1.5303062857362755, + "grad_norm": 0.04055798798799515, + "learning_rate": 1.5880311259261806e-06, + "loss": 0.0012, + "step": 238600 + }, + { + "epoch": 1.5303704226300616, + "grad_norm": 0.06542731076478958, + "learning_rate": 1.5876220126608643e-06, + "loss": 0.0011, + "step": 238610 + }, + { + "epoch": 1.5304345595238478, + "grad_norm": 0.07246585935354233, + "learning_rate": 1.5872129421550836e-06, + "loss": 0.0017, + "step": 238620 + }, + { + "epoch": 1.530498696417634, + "grad_norm": 0.06228356808423996, + "learning_rate": 1.586803914413962e-06, + "loss": 0.0013, + "step": 238630 + }, + { + "epoch": 1.5305628333114198, + "grad_norm": 0.004853600636124611, + "learning_rate": 1.5863949294426284e-06, + "loss": 0.0009, + "step": 238640 + }, + { + "epoch": 1.530626970205206, + "grad_norm": 0.0747649073600769, + "learning_rate": 1.5859859872462058e-06, + "loss": 0.0012, + "step": 238650 + }, + { + "epoch": 1.5306911070989921, + "grad_norm": 0.0786866620182991, + "learning_rate": 1.5855770878298188e-06, + "loss": 0.001, + "step": 238660 + }, + { + "epoch": 1.530755243992778, + "grad_norm": 0.0508040115237236, + "learning_rate": 1.58516823119859e-06, + "loss": 0.0016, + "step": 238670 + }, + { + "epoch": 1.5308193808865642, + "grad_norm": 0.05188383534550667, + "learning_rate": 1.5847594173576447e-06, + "loss": 0.0016, + "step": 238680 + }, + { + "epoch": 1.5308835177803504, + "grad_norm": 0.08505114167928696, + "learning_rate": 1.584350646312105e-06, + "loss": 0.0012, + "step": 238690 + }, + { + "epoch": 1.5309476546741365, + "grad_norm": 0.10715041309595108, + "learning_rate": 1.5839419180670935e-06, + "loss": 0.0009, + "step": 238700 + }, + { + "epoch": 1.5310117915679227, + "grad_norm": 0.0594995841383934, + "learning_rate": 1.5835332326277287e-06, + "loss": 0.0008, + "step": 238710 + }, + { + "epoch": 1.5310759284617088, + "grad_norm": 0.0047898245975375175, + "learning_rate": 1.583124589999136e-06, + "loss": 0.0005, + "step": 238720 + }, + { + "epoch": 1.5311400653554947, + "grad_norm": 0.19723492860794067, + "learning_rate": 1.5827159901864342e-06, + "loss": 0.0012, + "step": 238730 + }, + { + "epoch": 1.5312042022492809, + "grad_norm": 0.053472548723220825, + "learning_rate": 1.5823074331947418e-06, + "loss": 0.0008, + "step": 238740 + }, + { + "epoch": 1.5312683391430668, + "grad_norm": 0.15455129742622375, + "learning_rate": 1.5818989190291816e-06, + "loss": 0.0007, + "step": 238750 + }, + { + "epoch": 1.531332476036853, + "grad_norm": 0.06002508103847504, + "learning_rate": 1.5814904476948707e-06, + "loss": 0.0008, + "step": 238760 + }, + { + "epoch": 1.5313966129306391, + "grad_norm": 0.1651560366153717, + "learning_rate": 1.5810820191969278e-06, + "loss": 0.0009, + "step": 238770 + }, + { + "epoch": 1.5314607498244253, + "grad_norm": 0.22709596157073975, + "learning_rate": 1.5806736335404688e-06, + "loss": 0.0018, + "step": 238780 + }, + { + "epoch": 1.5315248867182114, + "grad_norm": 0.12977087497711182, + "learning_rate": 1.5802652907306148e-06, + "loss": 0.0014, + "step": 238790 + }, + { + "epoch": 1.5315890236119976, + "grad_norm": 0.06415867060422897, + "learning_rate": 1.5798569907724804e-06, + "loss": 0.001, + "step": 238800 + }, + { + "epoch": 1.5316531605057837, + "grad_norm": 0.12139426171779633, + "learning_rate": 1.5794487336711827e-06, + "loss": 0.0014, + "step": 238810 + }, + { + "epoch": 1.5317172973995696, + "grad_norm": 0.025406209751963615, + "learning_rate": 1.5790405194318354e-06, + "loss": 0.0012, + "step": 238820 + }, + { + "epoch": 1.5317814342933558, + "grad_norm": 0.01880517229437828, + "learning_rate": 1.5786323480595562e-06, + "loss": 0.0011, + "step": 238830 + }, + { + "epoch": 1.5318455711871417, + "grad_norm": 0.04645241051912308, + "learning_rate": 1.5782242195594594e-06, + "loss": 0.0009, + "step": 238840 + }, + { + "epoch": 1.5319097080809279, + "grad_norm": 0.13483476638793945, + "learning_rate": 1.5778161339366572e-06, + "loss": 0.0013, + "step": 238850 + }, + { + "epoch": 1.531973844974714, + "grad_norm": 0.06605926156044006, + "learning_rate": 1.5774080911962657e-06, + "loss": 0.0016, + "step": 238860 + }, + { + "epoch": 1.5320379818685002, + "grad_norm": 0.15618863701820374, + "learning_rate": 1.5770000913433974e-06, + "loss": 0.0016, + "step": 238870 + }, + { + "epoch": 1.5321021187622863, + "grad_norm": 0.0538754016160965, + "learning_rate": 1.5765921343831642e-06, + "loss": 0.001, + "step": 238880 + }, + { + "epoch": 1.5321662556560725, + "grad_norm": 0.031060660257935524, + "learning_rate": 1.5761842203206767e-06, + "loss": 0.0009, + "step": 238890 + }, + { + "epoch": 1.5322303925498584, + "grad_norm": 0.02626187354326248, + "learning_rate": 1.5757763491610494e-06, + "loss": 0.0003, + "step": 238900 + }, + { + "epoch": 1.5322945294436445, + "grad_norm": 0.146221324801445, + "learning_rate": 1.5753685209093917e-06, + "loss": 0.0011, + "step": 238910 + }, + { + "epoch": 1.5323586663374305, + "grad_norm": 0.16122718155384064, + "learning_rate": 1.5749607355708142e-06, + "loss": 0.0012, + "step": 238920 + }, + { + "epoch": 1.5324228032312166, + "grad_norm": 0.06554492563009262, + "learning_rate": 1.5745529931504243e-06, + "loss": 0.0006, + "step": 238930 + }, + { + "epoch": 1.5324869401250028, + "grad_norm": 0.15830129384994507, + "learning_rate": 1.5741452936533358e-06, + "loss": 0.0015, + "step": 238940 + }, + { + "epoch": 1.532551077018789, + "grad_norm": 0.23866844177246094, + "learning_rate": 1.5737376370846547e-06, + "loss": 0.0024, + "step": 238950 + }, + { + "epoch": 1.532615213912575, + "grad_norm": 0.16389961540699005, + "learning_rate": 1.5733300234494903e-06, + "loss": 0.0014, + "step": 238960 + }, + { + "epoch": 1.5326793508063612, + "grad_norm": 0.023869449272751808, + "learning_rate": 1.5729224527529474e-06, + "loss": 0.0009, + "step": 238970 + }, + { + "epoch": 1.5327434877001473, + "grad_norm": 0.044073037803173065, + "learning_rate": 1.5725149250001377e-06, + "loss": 0.0019, + "step": 238980 + }, + { + "epoch": 1.5328076245939333, + "grad_norm": 0.0290644820779562, + "learning_rate": 1.5721074401961633e-06, + "loss": 0.0005, + "step": 238990 + }, + { + "epoch": 1.5328717614877194, + "grad_norm": 0.055113472044467926, + "learning_rate": 1.5716999983461344e-06, + "loss": 0.0011, + "step": 239000 + }, + { + "epoch": 1.5329358983815053, + "grad_norm": 0.007592161186039448, + "learning_rate": 1.5712925994551536e-06, + "loss": 0.0024, + "step": 239010 + }, + { + "epoch": 1.5330000352752915, + "grad_norm": 0.14502429962158203, + "learning_rate": 1.5708852435283283e-06, + "loss": 0.002, + "step": 239020 + }, + { + "epoch": 1.5330641721690776, + "grad_norm": 0.1949986070394516, + "learning_rate": 1.5704779305707613e-06, + "loss": 0.0028, + "step": 239030 + }, + { + "epoch": 1.5331283090628638, + "grad_norm": 0.06624270975589752, + "learning_rate": 1.570070660587557e-06, + "loss": 0.001, + "step": 239040 + }, + { + "epoch": 1.53319244595665, + "grad_norm": 0.044614873826503754, + "learning_rate": 1.5696634335838172e-06, + "loss": 0.0024, + "step": 239050 + }, + { + "epoch": 1.533256582850436, + "grad_norm": 0.0646383985877037, + "learning_rate": 1.569256249564648e-06, + "loss": 0.0008, + "step": 239060 + }, + { + "epoch": 1.533320719744222, + "grad_norm": 0.025071272626519203, + "learning_rate": 1.5688491085351499e-06, + "loss": 0.0009, + "step": 239070 + }, + { + "epoch": 1.5333848566380082, + "grad_norm": 0.027934769168496132, + "learning_rate": 1.5684420105004245e-06, + "loss": 0.0005, + "step": 239080 + }, + { + "epoch": 1.5334489935317943, + "grad_norm": 0.1389744132757187, + "learning_rate": 1.5680349554655716e-06, + "loss": 0.0016, + "step": 239090 + }, + { + "epoch": 1.5335131304255802, + "grad_norm": 0.025445854291319847, + "learning_rate": 1.567627943435695e-06, + "loss": 0.0005, + "step": 239100 + }, + { + "epoch": 1.5335772673193664, + "grad_norm": 0.0023066888097673655, + "learning_rate": 1.5672209744158935e-06, + "loss": 0.0007, + "step": 239110 + }, + { + "epoch": 1.5336414042131525, + "grad_norm": 0.11088065803050995, + "learning_rate": 1.5668140484112649e-06, + "loss": 0.0015, + "step": 239120 + }, + { + "epoch": 1.5337055411069387, + "grad_norm": 0.09681985527276993, + "learning_rate": 1.5664071654269114e-06, + "loss": 0.001, + "step": 239130 + }, + { + "epoch": 1.5337696780007248, + "grad_norm": 0.04255175217986107, + "learning_rate": 1.5660003254679302e-06, + "loss": 0.0012, + "step": 239140 + }, + { + "epoch": 1.533833814894511, + "grad_norm": 0.05023466795682907, + "learning_rate": 1.565593528539419e-06, + "loss": 0.001, + "step": 239150 + }, + { + "epoch": 1.533897951788297, + "grad_norm": 0.026395542547106743, + "learning_rate": 1.5651867746464743e-06, + "loss": 0.0015, + "step": 239160 + }, + { + "epoch": 1.533962088682083, + "grad_norm": 0.03340686485171318, + "learning_rate": 1.564780063794195e-06, + "loss": 0.0006, + "step": 239170 + }, + { + "epoch": 1.534026225575869, + "grad_norm": 0.011547114700078964, + "learning_rate": 1.5643733959876772e-06, + "loss": 0.0013, + "step": 239180 + }, + { + "epoch": 1.5340903624696551, + "grad_norm": 0.02173069305717945, + "learning_rate": 1.5639667712320161e-06, + "loss": 0.0015, + "step": 239190 + }, + { + "epoch": 1.5341544993634413, + "grad_norm": 0.10959511250257492, + "learning_rate": 1.5635601895323054e-06, + "loss": 0.0019, + "step": 239200 + }, + { + "epoch": 1.5342186362572274, + "grad_norm": 0.0560414157807827, + "learning_rate": 1.563153650893643e-06, + "loss": 0.0021, + "step": 239210 + }, + { + "epoch": 1.5342827731510136, + "grad_norm": 0.060674846172332764, + "learning_rate": 1.5627471553211216e-06, + "loss": 0.001, + "step": 239220 + }, + { + "epoch": 1.5343469100447997, + "grad_norm": 0.06418942660093307, + "learning_rate": 1.562340702819835e-06, + "loss": 0.0004, + "step": 239230 + }, + { + "epoch": 1.5344110469385859, + "grad_norm": 0.16282793879508972, + "learning_rate": 1.561934293394875e-06, + "loss": 0.0013, + "step": 239240 + }, + { + "epoch": 1.5344751838323718, + "grad_norm": 0.08704288303852081, + "learning_rate": 1.5615279270513367e-06, + "loss": 0.0011, + "step": 239250 + }, + { + "epoch": 1.534539320726158, + "grad_norm": 0.12967506051063538, + "learning_rate": 1.5611216037943105e-06, + "loss": 0.0011, + "step": 239260 + }, + { + "epoch": 1.5346034576199439, + "grad_norm": 0.038755375891923904, + "learning_rate": 1.5607153236288874e-06, + "loss": 0.0005, + "step": 239270 + }, + { + "epoch": 1.53466759451373, + "grad_norm": 0.048440322279930115, + "learning_rate": 1.5603090865601605e-06, + "loss": 0.0006, + "step": 239280 + }, + { + "epoch": 1.5347317314075162, + "grad_norm": 0.03399894759058952, + "learning_rate": 1.559902892593219e-06, + "loss": 0.0029, + "step": 239290 + }, + { + "epoch": 1.5347958683013023, + "grad_norm": 0.16211147606372833, + "learning_rate": 1.5594967417331536e-06, + "loss": 0.0008, + "step": 239300 + }, + { + "epoch": 1.5348600051950885, + "grad_norm": 0.08329859375953674, + "learning_rate": 1.5590906339850504e-06, + "loss": 0.0009, + "step": 239310 + }, + { + "epoch": 1.5349241420888746, + "grad_norm": 0.24930784106254578, + "learning_rate": 1.558684569354003e-06, + "loss": 0.0014, + "step": 239320 + }, + { + "epoch": 1.5349882789826605, + "grad_norm": 0.06121993437409401, + "learning_rate": 1.5582785478450968e-06, + "loss": 0.0011, + "step": 239330 + }, + { + "epoch": 1.5350524158764467, + "grad_norm": 0.1486685872077942, + "learning_rate": 1.5578725694634207e-06, + "loss": 0.001, + "step": 239340 + }, + { + "epoch": 1.5351165527702326, + "grad_norm": 0.04166239872574806, + "learning_rate": 1.5574666342140598e-06, + "loss": 0.001, + "step": 239350 + }, + { + "epoch": 1.5351806896640188, + "grad_norm": 0.027320783585309982, + "learning_rate": 1.5570607421021032e-06, + "loss": 0.0008, + "step": 239360 + }, + { + "epoch": 1.535244826557805, + "grad_norm": 0.12129577994346619, + "learning_rate": 1.556654893132637e-06, + "loss": 0.0011, + "step": 239370 + }, + { + "epoch": 1.535308963451591, + "grad_norm": 0.03990350291132927, + "learning_rate": 1.5562490873107456e-06, + "loss": 0.0012, + "step": 239380 + }, + { + "epoch": 1.5353731003453772, + "grad_norm": 0.09766650199890137, + "learning_rate": 1.5558433246415123e-06, + "loss": 0.0016, + "step": 239390 + }, + { + "epoch": 1.5354372372391634, + "grad_norm": 0.22297364473342896, + "learning_rate": 1.5554376051300258e-06, + "loss": 0.002, + "step": 239400 + }, + { + "epoch": 1.5355013741329495, + "grad_norm": 0.06788810342550278, + "learning_rate": 1.5550319287813675e-06, + "loss": 0.0013, + "step": 239410 + }, + { + "epoch": 1.5355655110267354, + "grad_norm": 0.1882997751235962, + "learning_rate": 1.5546262956006197e-06, + "loss": 0.0016, + "step": 239420 + }, + { + "epoch": 1.5356296479205216, + "grad_norm": 0.01625201851129532, + "learning_rate": 1.5542207055928688e-06, + "loss": 0.0006, + "step": 239430 + }, + { + "epoch": 1.5356937848143075, + "grad_norm": 0.04416879639029503, + "learning_rate": 1.553815158763195e-06, + "loss": 0.0009, + "step": 239440 + }, + { + "epoch": 1.5357579217080937, + "grad_norm": 0.06767366826534271, + "learning_rate": 1.55340965511668e-06, + "loss": 0.0008, + "step": 239450 + }, + { + "epoch": 1.5358220586018798, + "grad_norm": 0.003938053268939257, + "learning_rate": 1.5530041946584035e-06, + "loss": 0.0007, + "step": 239460 + }, + { + "epoch": 1.535886195495666, + "grad_norm": 0.12138058990240097, + "learning_rate": 1.5525987773934499e-06, + "loss": 0.0011, + "step": 239470 + }, + { + "epoch": 1.535950332389452, + "grad_norm": 0.06975291669368744, + "learning_rate": 1.552193403326897e-06, + "loss": 0.0011, + "step": 239480 + }, + { + "epoch": 1.5360144692832383, + "grad_norm": 0.07297302782535553, + "learning_rate": 1.5517880724638258e-06, + "loss": 0.0016, + "step": 239490 + }, + { + "epoch": 1.5360786061770242, + "grad_norm": 0.19614681601524353, + "learning_rate": 1.5513827848093115e-06, + "loss": 0.0013, + "step": 239500 + }, + { + "epoch": 1.5361427430708103, + "grad_norm": 0.04527741298079491, + "learning_rate": 1.5509775403684385e-06, + "loss": 0.0006, + "step": 239510 + }, + { + "epoch": 1.5362068799645965, + "grad_norm": 0.07013577222824097, + "learning_rate": 1.5505723391462813e-06, + "loss": 0.0014, + "step": 239520 + }, + { + "epoch": 1.5362710168583824, + "grad_norm": 0.047192350029945374, + "learning_rate": 1.550167181147918e-06, + "loss": 0.0015, + "step": 239530 + }, + { + "epoch": 1.5363351537521686, + "grad_norm": 0.1701509952545166, + "learning_rate": 1.5497620663784236e-06, + "loss": 0.0009, + "step": 239540 + }, + { + "epoch": 1.5363992906459547, + "grad_norm": 0.06133796274662018, + "learning_rate": 1.5493569948428783e-06, + "loss": 0.0011, + "step": 239550 + }, + { + "epoch": 1.5364634275397409, + "grad_norm": 0.08307241648435593, + "learning_rate": 1.5489519665463559e-06, + "loss": 0.001, + "step": 239560 + }, + { + "epoch": 1.536527564433527, + "grad_norm": 0.06485676020383835, + "learning_rate": 1.5485469814939303e-06, + "loss": 0.001, + "step": 239570 + }, + { + "epoch": 1.5365917013273132, + "grad_norm": 0.0776052251458168, + "learning_rate": 1.5481420396906793e-06, + "loss": 0.0025, + "step": 239580 + }, + { + "epoch": 1.536655838221099, + "grad_norm": 0.04508012533187866, + "learning_rate": 1.5477371411416753e-06, + "loss": 0.0012, + "step": 239590 + }, + { + "epoch": 1.5367199751148852, + "grad_norm": 0.03473687916994095, + "learning_rate": 1.5473322858519924e-06, + "loss": 0.001, + "step": 239600 + }, + { + "epoch": 1.5367841120086712, + "grad_norm": 0.10467745363712311, + "learning_rate": 1.546927473826702e-06, + "loss": 0.0022, + "step": 239610 + }, + { + "epoch": 1.5368482489024573, + "grad_norm": 0.11767324060201645, + "learning_rate": 1.5465227050708797e-06, + "loss": 0.0009, + "step": 239620 + }, + { + "epoch": 1.5369123857962435, + "grad_norm": 0.033213626593351364, + "learning_rate": 1.5461179795895963e-06, + "loss": 0.0011, + "step": 239630 + }, + { + "epoch": 1.5369765226900296, + "grad_norm": 0.03318759426474571, + "learning_rate": 1.545713297387923e-06, + "loss": 0.0008, + "step": 239640 + }, + { + "epoch": 1.5370406595838157, + "grad_norm": 0.21723335981369019, + "learning_rate": 1.5453086584709286e-06, + "loss": 0.0021, + "step": 239650 + }, + { + "epoch": 1.537104796477602, + "grad_norm": 0.2502657175064087, + "learning_rate": 1.5449040628436884e-06, + "loss": 0.0019, + "step": 239660 + }, + { + "epoch": 1.537168933371388, + "grad_norm": 0.03230128437280655, + "learning_rate": 1.5444995105112686e-06, + "loss": 0.0006, + "step": 239670 + }, + { + "epoch": 1.537233070265174, + "grad_norm": 0.12768696248531342, + "learning_rate": 1.5440950014787404e-06, + "loss": 0.0026, + "step": 239680 + }, + { + "epoch": 1.5372972071589601, + "grad_norm": 0.07762544602155685, + "learning_rate": 1.5436905357511694e-06, + "loss": 0.0033, + "step": 239690 + }, + { + "epoch": 1.537361344052746, + "grad_norm": 0.06683100759983063, + "learning_rate": 1.5432861133336285e-06, + "loss": 0.0008, + "step": 239700 + }, + { + "epoch": 1.5374254809465322, + "grad_norm": 0.2564246356487274, + "learning_rate": 1.5428817342311825e-06, + "loss": 0.0033, + "step": 239710 + }, + { + "epoch": 1.5374896178403183, + "grad_norm": 0.15359140932559967, + "learning_rate": 1.5424773984488978e-06, + "loss": 0.0017, + "step": 239720 + }, + { + "epoch": 1.5375537547341045, + "grad_norm": 0.015263685956597328, + "learning_rate": 1.5420731059918436e-06, + "loss": 0.0015, + "step": 239730 + }, + { + "epoch": 1.5376178916278906, + "grad_norm": 0.07375656068325043, + "learning_rate": 1.5416688568650856e-06, + "loss": 0.001, + "step": 239740 + }, + { + "epoch": 1.5376820285216768, + "grad_norm": 0.03587993606925011, + "learning_rate": 1.5412646510736878e-06, + "loss": 0.0009, + "step": 239750 + }, + { + "epoch": 1.5377461654154627, + "grad_norm": 0.11929059773683548, + "learning_rate": 1.540860488622714e-06, + "loss": 0.0011, + "step": 239760 + }, + { + "epoch": 1.5378103023092489, + "grad_norm": 0.08358773589134216, + "learning_rate": 1.5404563695172309e-06, + "loss": 0.001, + "step": 239770 + }, + { + "epoch": 1.5378744392030348, + "grad_norm": 0.009313437156379223, + "learning_rate": 1.5400522937623035e-06, + "loss": 0.0005, + "step": 239780 + }, + { + "epoch": 1.537938576096821, + "grad_norm": 0.1198863759636879, + "learning_rate": 1.5396482613629937e-06, + "loss": 0.0019, + "step": 239790 + }, + { + "epoch": 1.538002712990607, + "grad_norm": 0.032433778047561646, + "learning_rate": 1.539244272324364e-06, + "loss": 0.0016, + "step": 239800 + }, + { + "epoch": 1.5380668498843932, + "grad_norm": 0.042525772005319595, + "learning_rate": 1.5388403266514756e-06, + "loss": 0.0004, + "step": 239810 + }, + { + "epoch": 1.5381309867781794, + "grad_norm": 0.21475815773010254, + "learning_rate": 1.5384364243493932e-06, + "loss": 0.001, + "step": 239820 + }, + { + "epoch": 1.5381951236719655, + "grad_norm": 0.09270118921995163, + "learning_rate": 1.5380325654231764e-06, + "loss": 0.0006, + "step": 239830 + }, + { + "epoch": 1.5382592605657517, + "grad_norm": 0.05335585027933121, + "learning_rate": 1.5376287498778841e-06, + "loss": 0.0016, + "step": 239840 + }, + { + "epoch": 1.5383233974595376, + "grad_norm": 0.046682726591825485, + "learning_rate": 1.5372249777185793e-06, + "loss": 0.0013, + "step": 239850 + }, + { + "epoch": 1.5383875343533238, + "grad_norm": 0.02034573256969452, + "learning_rate": 1.5368212489503208e-06, + "loss": 0.0015, + "step": 239860 + }, + { + "epoch": 1.5384516712471097, + "grad_norm": 0.08550441265106201, + "learning_rate": 1.536417563578167e-06, + "loss": 0.0009, + "step": 239870 + }, + { + "epoch": 1.5385158081408958, + "grad_norm": 0.024634793400764465, + "learning_rate": 1.5360139216071746e-06, + "loss": 0.0015, + "step": 239880 + }, + { + "epoch": 1.538579945034682, + "grad_norm": 0.0035681596491485834, + "learning_rate": 1.5356103230424057e-06, + "loss": 0.0015, + "step": 239890 + }, + { + "epoch": 1.5386440819284681, + "grad_norm": 0.18973346054553986, + "learning_rate": 1.535206767888915e-06, + "loss": 0.0012, + "step": 239900 + }, + { + "epoch": 1.5387082188222543, + "grad_norm": 0.014074346981942654, + "learning_rate": 1.5348032561517596e-06, + "loss": 0.0014, + "step": 239910 + }, + { + "epoch": 1.5387723557160404, + "grad_norm": 0.13889437913894653, + "learning_rate": 1.5343997878359945e-06, + "loss": 0.0014, + "step": 239920 + }, + { + "epoch": 1.5388364926098266, + "grad_norm": 0.19967631995677948, + "learning_rate": 1.5339963629466787e-06, + "loss": 0.0023, + "step": 239930 + }, + { + "epoch": 1.5389006295036125, + "grad_norm": 0.03955758363008499, + "learning_rate": 1.5335929814888656e-06, + "loss": 0.0013, + "step": 239940 + }, + { + "epoch": 1.5389647663973987, + "grad_norm": 0.05924701690673828, + "learning_rate": 1.5331896434676096e-06, + "loss": 0.0006, + "step": 239950 + }, + { + "epoch": 1.5390289032911846, + "grad_norm": 0.06783130764961243, + "learning_rate": 1.5327863488879635e-06, + "loss": 0.0007, + "step": 239960 + }, + { + "epoch": 1.5390930401849707, + "grad_norm": 0.009174141101539135, + "learning_rate": 1.532383097754984e-06, + "loss": 0.0017, + "step": 239970 + }, + { + "epoch": 1.5391571770787569, + "grad_norm": 0.5989391803741455, + "learning_rate": 1.5319798900737226e-06, + "loss": 0.0021, + "step": 239980 + }, + { + "epoch": 1.539221313972543, + "grad_norm": 0.09201222658157349, + "learning_rate": 1.5315767258492304e-06, + "loss": 0.0008, + "step": 239990 + }, + { + "epoch": 1.5392854508663292, + "grad_norm": 0.048332955688238144, + "learning_rate": 1.5311736050865616e-06, + "loss": 0.0011, + "step": 240000 + }, + { + "epoch": 1.5393495877601153, + "grad_norm": 0.2488795667886734, + "learning_rate": 1.530770527790767e-06, + "loss": 0.0021, + "step": 240010 + }, + { + "epoch": 1.5394137246539012, + "grad_norm": 0.04531652107834816, + "learning_rate": 1.5303674939668971e-06, + "loss": 0.001, + "step": 240020 + }, + { + "epoch": 1.5394778615476874, + "grad_norm": 0.012171772308647633, + "learning_rate": 1.529964503620001e-06, + "loss": 0.0013, + "step": 240030 + }, + { + "epoch": 1.5395419984414733, + "grad_norm": 0.019362622871994972, + "learning_rate": 1.5295615567551307e-06, + "loss": 0.0012, + "step": 240040 + }, + { + "epoch": 1.5396061353352595, + "grad_norm": 0.01918141543865204, + "learning_rate": 1.5291586533773351e-06, + "loss": 0.0007, + "step": 240050 + }, + { + "epoch": 1.5396702722290456, + "grad_norm": 0.050688810646533966, + "learning_rate": 1.5287557934916615e-06, + "loss": 0.001, + "step": 240060 + }, + { + "epoch": 1.5397344091228318, + "grad_norm": 0.07629798352718353, + "learning_rate": 1.5283529771031568e-06, + "loss": 0.0027, + "step": 240070 + }, + { + "epoch": 1.539798546016618, + "grad_norm": 0.08818703144788742, + "learning_rate": 1.5279502042168726e-06, + "loss": 0.0013, + "step": 240080 + }, + { + "epoch": 1.539862682910404, + "grad_norm": 0.07173456251621246, + "learning_rate": 1.527547474837854e-06, + "loss": 0.0015, + "step": 240090 + }, + { + "epoch": 1.5399268198041902, + "grad_norm": 0.04634719341993332, + "learning_rate": 1.5271447889711466e-06, + "loss": 0.0005, + "step": 240100 + }, + { + "epoch": 1.5399909566979761, + "grad_norm": 0.0994994193315506, + "learning_rate": 1.5267421466217958e-06, + "loss": 0.001, + "step": 240110 + }, + { + "epoch": 1.5400550935917623, + "grad_norm": 0.20136234164237976, + "learning_rate": 1.52633954779485e-06, + "loss": 0.0009, + "step": 240120 + }, + { + "epoch": 1.5401192304855482, + "grad_norm": 0.09540402889251709, + "learning_rate": 1.525936992495352e-06, + "loss": 0.0008, + "step": 240130 + }, + { + "epoch": 1.5401833673793344, + "grad_norm": 0.18061742186546326, + "learning_rate": 1.5255344807283445e-06, + "loss": 0.0011, + "step": 240140 + }, + { + "epoch": 1.5402475042731205, + "grad_norm": 0.04126046970486641, + "learning_rate": 1.525132012498875e-06, + "loss": 0.0013, + "step": 240150 + }, + { + "epoch": 1.5403116411669067, + "grad_norm": 0.090067058801651, + "learning_rate": 1.524729587811985e-06, + "loss": 0.0003, + "step": 240160 + }, + { + "epoch": 1.5403757780606928, + "grad_norm": 0.006891035940498114, + "learning_rate": 1.5243272066727167e-06, + "loss": 0.002, + "step": 240170 + }, + { + "epoch": 1.540439914954479, + "grad_norm": 0.06617258489131927, + "learning_rate": 1.5239248690861109e-06, + "loss": 0.0012, + "step": 240180 + }, + { + "epoch": 1.5405040518482649, + "grad_norm": 0.14071519672870636, + "learning_rate": 1.5235225750572124e-06, + "loss": 0.0012, + "step": 240190 + }, + { + "epoch": 1.540568188742051, + "grad_norm": 0.04268965870141983, + "learning_rate": 1.5231203245910608e-06, + "loss": 0.0008, + "step": 240200 + }, + { + "epoch": 1.5406323256358372, + "grad_norm": 0.07042060047388077, + "learning_rate": 1.522718117692697e-06, + "loss": 0.0013, + "step": 240210 + }, + { + "epoch": 1.5406964625296231, + "grad_norm": 0.03442941606044769, + "learning_rate": 1.522315954367158e-06, + "loss": 0.0007, + "step": 240220 + }, + { + "epoch": 1.5407605994234093, + "grad_norm": 0.05070033669471741, + "learning_rate": 1.5219138346194873e-06, + "loss": 0.0018, + "step": 240230 + }, + { + "epoch": 1.5408247363171954, + "grad_norm": 0.09447235614061356, + "learning_rate": 1.5215117584547219e-06, + "loss": 0.0018, + "step": 240240 + }, + { + "epoch": 1.5408888732109816, + "grad_norm": 0.07985574752092361, + "learning_rate": 1.5211097258779e-06, + "loss": 0.0031, + "step": 240250 + }, + { + "epoch": 1.5409530101047677, + "grad_norm": 0.08799561113119125, + "learning_rate": 1.5207077368940587e-06, + "loss": 0.0009, + "step": 240260 + }, + { + "epoch": 1.5410171469985539, + "grad_norm": 0.07382316887378693, + "learning_rate": 1.520305791508237e-06, + "loss": 0.0017, + "step": 240270 + }, + { + "epoch": 1.5410812838923398, + "grad_norm": 0.10267768800258636, + "learning_rate": 1.5199038897254709e-06, + "loss": 0.0012, + "step": 240280 + }, + { + "epoch": 1.541145420786126, + "grad_norm": 0.06217135861515999, + "learning_rate": 1.5195020315507947e-06, + "loss": 0.0009, + "step": 240290 + }, + { + "epoch": 1.5412095576799119, + "grad_norm": 0.07185224443674088, + "learning_rate": 1.5191002169892472e-06, + "loss": 0.0006, + "step": 240300 + }, + { + "epoch": 1.541273694573698, + "grad_norm": 0.0013148905709385872, + "learning_rate": 1.5186984460458614e-06, + "loss": 0.0008, + "step": 240310 + }, + { + "epoch": 1.5413378314674842, + "grad_norm": 0.06411554664373398, + "learning_rate": 1.5182967187256725e-06, + "loss": 0.0011, + "step": 240320 + }, + { + "epoch": 1.5414019683612703, + "grad_norm": 0.16318665444850922, + "learning_rate": 1.5178950350337123e-06, + "loss": 0.0016, + "step": 240330 + }, + { + "epoch": 1.5414661052550565, + "grad_norm": 0.21505574882030487, + "learning_rate": 1.5174933949750176e-06, + "loss": 0.0015, + "step": 240340 + }, + { + "epoch": 1.5415302421488426, + "grad_norm": 0.1235521212220192, + "learning_rate": 1.5170917985546191e-06, + "loss": 0.0013, + "step": 240350 + }, + { + "epoch": 1.5415943790426287, + "grad_norm": 0.16214965283870697, + "learning_rate": 1.51669024577755e-06, + "loss": 0.0013, + "step": 240360 + }, + { + "epoch": 1.5416585159364147, + "grad_norm": 0.04509899392724037, + "learning_rate": 1.51628873664884e-06, + "loss": 0.0012, + "step": 240370 + }, + { + "epoch": 1.5417226528302008, + "grad_norm": 0.04285109043121338, + "learning_rate": 1.5158872711735234e-06, + "loss": 0.0009, + "step": 240380 + }, + { + "epoch": 1.5417867897239868, + "grad_norm": 0.043615441769361496, + "learning_rate": 1.5154858493566294e-06, + "loss": 0.0031, + "step": 240390 + }, + { + "epoch": 1.541850926617773, + "grad_norm": 0.01779012195765972, + "learning_rate": 1.5150844712031882e-06, + "loss": 0.0005, + "step": 240400 + }, + { + "epoch": 1.541915063511559, + "grad_norm": 0.08573545515537262, + "learning_rate": 1.5146831367182275e-06, + "loss": 0.0033, + "step": 240410 + }, + { + "epoch": 1.5419792004053452, + "grad_norm": 0.026266250759363174, + "learning_rate": 1.5142818459067792e-06, + "loss": 0.0012, + "step": 240420 + }, + { + "epoch": 1.5420433372991313, + "grad_norm": 0.13313041627407074, + "learning_rate": 1.5138805987738715e-06, + "loss": 0.001, + "step": 240430 + }, + { + "epoch": 1.5421074741929175, + "grad_norm": 0.11342868208885193, + "learning_rate": 1.5134793953245291e-06, + "loss": 0.0012, + "step": 240440 + }, + { + "epoch": 1.5421716110867034, + "grad_norm": 0.09087756276130676, + "learning_rate": 1.513078235563783e-06, + "loss": 0.0014, + "step": 240450 + }, + { + "epoch": 1.5422357479804896, + "grad_norm": 0.046084512025117874, + "learning_rate": 1.5126771194966595e-06, + "loss": 0.0017, + "step": 240460 + }, + { + "epoch": 1.5422998848742755, + "grad_norm": 0.03875308111310005, + "learning_rate": 1.5122760471281833e-06, + "loss": 0.0007, + "step": 240470 + }, + { + "epoch": 1.5423640217680616, + "grad_norm": 0.1758212000131607, + "learning_rate": 1.5118750184633796e-06, + "loss": 0.0009, + "step": 240480 + }, + { + "epoch": 1.5424281586618478, + "grad_norm": 0.060830261558294296, + "learning_rate": 1.5114740335072764e-06, + "loss": 0.0009, + "step": 240490 + }, + { + "epoch": 1.542492295555634, + "grad_norm": 0.10947144776582718, + "learning_rate": 1.5110730922648969e-06, + "loss": 0.0028, + "step": 240500 + }, + { + "epoch": 1.54255643244942, + "grad_norm": 0.09652144461870193, + "learning_rate": 1.510672194741265e-06, + "loss": 0.0009, + "step": 240510 + }, + { + "epoch": 1.5426205693432062, + "grad_norm": 0.09939628094434738, + "learning_rate": 1.5102713409414028e-06, + "loss": 0.0007, + "step": 240520 + }, + { + "epoch": 1.5426847062369924, + "grad_norm": 0.05884421616792679, + "learning_rate": 1.5098705308703344e-06, + "loss": 0.0017, + "step": 240530 + }, + { + "epoch": 1.5427488431307783, + "grad_norm": 0.013617325574159622, + "learning_rate": 1.5094697645330841e-06, + "loss": 0.0006, + "step": 240540 + }, + { + "epoch": 1.5428129800245645, + "grad_norm": 0.09015436470508575, + "learning_rate": 1.5090690419346726e-06, + "loss": 0.001, + "step": 240550 + }, + { + "epoch": 1.5428771169183504, + "grad_norm": 0.12895537912845612, + "learning_rate": 1.5086683630801197e-06, + "loss": 0.0027, + "step": 240560 + }, + { + "epoch": 1.5429412538121365, + "grad_norm": 0.03182035684585571, + "learning_rate": 1.5082677279744485e-06, + "loss": 0.0017, + "step": 240570 + }, + { + "epoch": 1.5430053907059227, + "grad_norm": 0.007782531436532736, + "learning_rate": 1.5078671366226783e-06, + "loss": 0.0008, + "step": 240580 + }, + { + "epoch": 1.5430695275997088, + "grad_norm": 0.051679827272892, + "learning_rate": 1.5074665890298285e-06, + "loss": 0.0009, + "step": 240590 + }, + { + "epoch": 1.543133664493495, + "grad_norm": 0.11652804166078568, + "learning_rate": 1.5070660852009173e-06, + "loss": 0.0015, + "step": 240600 + }, + { + "epoch": 1.5431978013872811, + "grad_norm": 0.09914771467447281, + "learning_rate": 1.5066656251409656e-06, + "loss": 0.0017, + "step": 240610 + }, + { + "epoch": 1.543261938281067, + "grad_norm": 0.04378265142440796, + "learning_rate": 1.5062652088549907e-06, + "loss": 0.001, + "step": 240620 + }, + { + "epoch": 1.5433260751748532, + "grad_norm": 0.06831402331590652, + "learning_rate": 1.5058648363480088e-06, + "loss": 0.0008, + "step": 240630 + }, + { + "epoch": 1.5433902120686394, + "grad_norm": 0.052788347005844116, + "learning_rate": 1.5054645076250368e-06, + "loss": 0.0031, + "step": 240640 + }, + { + "epoch": 1.5434543489624253, + "grad_norm": 0.006500875577330589, + "learning_rate": 1.5050642226910938e-06, + "loss": 0.0003, + "step": 240650 + }, + { + "epoch": 1.5435184858562114, + "grad_norm": 0.08855942636728287, + "learning_rate": 1.5046639815511932e-06, + "loss": 0.0016, + "step": 240660 + }, + { + "epoch": 1.5435826227499976, + "grad_norm": 0.03987079858779907, + "learning_rate": 1.5042637842103514e-06, + "loss": 0.0012, + "step": 240670 + }, + { + "epoch": 1.5436467596437837, + "grad_norm": 0.09060493856668472, + "learning_rate": 1.5038636306735815e-06, + "loss": 0.0008, + "step": 240680 + }, + { + "epoch": 1.5437108965375699, + "grad_norm": 0.029795430600643158, + "learning_rate": 1.5034635209458998e-06, + "loss": 0.0009, + "step": 240690 + }, + { + "epoch": 1.543775033431356, + "grad_norm": 0.10571033507585526, + "learning_rate": 1.5030634550323198e-06, + "loss": 0.0007, + "step": 240700 + }, + { + "epoch": 1.543839170325142, + "grad_norm": 0.05886177346110344, + "learning_rate": 1.502663432937852e-06, + "loss": 0.0021, + "step": 240710 + }, + { + "epoch": 1.543903307218928, + "grad_norm": 0.07125724852085114, + "learning_rate": 1.5022634546675124e-06, + "loss": 0.0013, + "step": 240720 + }, + { + "epoch": 1.543967444112714, + "grad_norm": 0.15605969727039337, + "learning_rate": 1.5018635202263115e-06, + "loss": 0.002, + "step": 240730 + }, + { + "epoch": 1.5440315810065002, + "grad_norm": 0.06762544065713882, + "learning_rate": 1.5014636296192607e-06, + "loss": 0.0014, + "step": 240740 + }, + { + "epoch": 1.5440957179002863, + "grad_norm": 0.0794999822974205, + "learning_rate": 1.5010637828513702e-06, + "loss": 0.0018, + "step": 240750 + }, + { + "epoch": 1.5441598547940725, + "grad_norm": 0.07379082590341568, + "learning_rate": 1.5006639799276518e-06, + "loss": 0.0009, + "step": 240760 + }, + { + "epoch": 1.5442239916878586, + "grad_norm": 0.06329061836004257, + "learning_rate": 1.5002642208531154e-06, + "loss": 0.0017, + "step": 240770 + }, + { + "epoch": 1.5442881285816448, + "grad_norm": 0.0044218008406460285, + "learning_rate": 1.4998645056327687e-06, + "loss": 0.0012, + "step": 240780 + }, + { + "epoch": 1.544352265475431, + "grad_norm": 0.13181927800178528, + "learning_rate": 1.4994648342716205e-06, + "loss": 0.0017, + "step": 240790 + }, + { + "epoch": 1.5444164023692168, + "grad_norm": 0.06018625944852829, + "learning_rate": 1.4990652067746808e-06, + "loss": 0.0006, + "step": 240800 + }, + { + "epoch": 1.544480539263003, + "grad_norm": 0.03753812611103058, + "learning_rate": 1.498665623146956e-06, + "loss": 0.0007, + "step": 240810 + }, + { + "epoch": 1.544544676156789, + "grad_norm": 0.10939286649227142, + "learning_rate": 1.4982660833934521e-06, + "loss": 0.0009, + "step": 240820 + }, + { + "epoch": 1.544608813050575, + "grad_norm": 0.05444050952792168, + "learning_rate": 1.4978665875191784e-06, + "loss": 0.0012, + "step": 240830 + }, + { + "epoch": 1.5446729499443612, + "grad_norm": 0.03752541542053223, + "learning_rate": 1.4974671355291393e-06, + "loss": 0.0017, + "step": 240840 + }, + { + "epoch": 1.5447370868381474, + "grad_norm": 0.0661284327507019, + "learning_rate": 1.49706772742834e-06, + "loss": 0.001, + "step": 240850 + }, + { + "epoch": 1.5448012237319335, + "grad_norm": 0.22921940684318542, + "learning_rate": 1.4966683632217843e-06, + "loss": 0.0011, + "step": 240860 + }, + { + "epoch": 1.5448653606257197, + "grad_norm": 0.16091661155223846, + "learning_rate": 1.496269042914479e-06, + "loss": 0.001, + "step": 240870 + }, + { + "epoch": 1.5449294975195056, + "grad_norm": 0.19941392540931702, + "learning_rate": 1.4958697665114268e-06, + "loss": 0.0015, + "step": 240880 + }, + { + "epoch": 1.5449936344132917, + "grad_norm": 0.05221044272184372, + "learning_rate": 1.4954705340176312e-06, + "loss": 0.0013, + "step": 240890 + }, + { + "epoch": 1.5450577713070777, + "grad_norm": 0.12489048391580582, + "learning_rate": 1.4950713454380922e-06, + "loss": 0.0013, + "step": 240900 + }, + { + "epoch": 1.5451219082008638, + "grad_norm": 0.0692276880145073, + "learning_rate": 1.4946722007778164e-06, + "loss": 0.0007, + "step": 240910 + }, + { + "epoch": 1.54518604509465, + "grad_norm": 0.07412971556186676, + "learning_rate": 1.4942731000418026e-06, + "loss": 0.0017, + "step": 240920 + }, + { + "epoch": 1.5452501819884361, + "grad_norm": 0.12431248277425766, + "learning_rate": 1.4938740432350525e-06, + "loss": 0.0008, + "step": 240930 + }, + { + "epoch": 1.5453143188822223, + "grad_norm": 0.019095465540885925, + "learning_rate": 1.493475030362565e-06, + "loss": 0.0011, + "step": 240940 + }, + { + "epoch": 1.5453784557760084, + "grad_norm": 0.09157022833824158, + "learning_rate": 1.4930760614293432e-06, + "loss": 0.0009, + "step": 240950 + }, + { + "epoch": 1.5454425926697946, + "grad_norm": 0.30129361152648926, + "learning_rate": 1.492677136440384e-06, + "loss": 0.0017, + "step": 240960 + }, + { + "epoch": 1.5455067295635805, + "grad_norm": 0.07098261266946793, + "learning_rate": 1.4922782554006859e-06, + "loss": 0.0011, + "step": 240970 + }, + { + "epoch": 1.5455708664573666, + "grad_norm": 0.0788029134273529, + "learning_rate": 1.4918794183152497e-06, + "loss": 0.0027, + "step": 240980 + }, + { + "epoch": 1.5456350033511526, + "grad_norm": 0.004866393748670816, + "learning_rate": 1.4914806251890717e-06, + "loss": 0.0018, + "step": 240990 + }, + { + "epoch": 1.5456991402449387, + "grad_norm": 0.033007655292749405, + "learning_rate": 1.491081876027149e-06, + "loss": 0.0014, + "step": 241000 + }, + { + "epoch": 1.5457632771387249, + "grad_norm": 0.08490005135536194, + "learning_rate": 1.4906831708344767e-06, + "loss": 0.0015, + "step": 241010 + }, + { + "epoch": 1.545827414032511, + "grad_norm": 0.034369032829999924, + "learning_rate": 1.4902845096160534e-06, + "loss": 0.0021, + "step": 241020 + }, + { + "epoch": 1.5458915509262972, + "grad_norm": 0.17891032993793488, + "learning_rate": 1.489885892376874e-06, + "loss": 0.0013, + "step": 241030 + }, + { + "epoch": 1.5459556878200833, + "grad_norm": 0.031783584505319595, + "learning_rate": 1.4894873191219329e-06, + "loss": 0.0009, + "step": 241040 + }, + { + "epoch": 1.5460198247138692, + "grad_norm": 0.06378553062677383, + "learning_rate": 1.4890887898562228e-06, + "loss": 0.0005, + "step": 241050 + }, + { + "epoch": 1.5460839616076554, + "grad_norm": 0.10979326069355011, + "learning_rate": 1.4886903045847412e-06, + "loss": 0.0021, + "step": 241060 + }, + { + "epoch": 1.5461480985014415, + "grad_norm": 0.015144680626690388, + "learning_rate": 1.4882918633124794e-06, + "loss": 0.0011, + "step": 241070 + }, + { + "epoch": 1.5462122353952275, + "grad_norm": 0.07192160934209824, + "learning_rate": 1.4878934660444305e-06, + "loss": 0.0025, + "step": 241080 + }, + { + "epoch": 1.5462763722890136, + "grad_norm": 0.14805211126804352, + "learning_rate": 1.4874951127855847e-06, + "loss": 0.002, + "step": 241090 + }, + { + "epoch": 1.5463405091827997, + "grad_norm": 0.048894334584474564, + "learning_rate": 1.4870968035409371e-06, + "loss": 0.0007, + "step": 241100 + }, + { + "epoch": 1.546404646076586, + "grad_norm": 0.04320673272013664, + "learning_rate": 1.4866985383154775e-06, + "loss": 0.0019, + "step": 241110 + }, + { + "epoch": 1.546468782970372, + "grad_norm": 0.25919121503829956, + "learning_rate": 1.486300317114195e-06, + "loss": 0.0071, + "step": 241120 + }, + { + "epoch": 1.5465329198641582, + "grad_norm": 0.10468928515911102, + "learning_rate": 1.4859021399420813e-06, + "loss": 0.0015, + "step": 241130 + }, + { + "epoch": 1.5465970567579441, + "grad_norm": 0.329969197511673, + "learning_rate": 1.485504006804126e-06, + "loss": 0.0014, + "step": 241140 + }, + { + "epoch": 1.5466611936517303, + "grad_norm": 0.24276787042617798, + "learning_rate": 1.4851059177053167e-06, + "loss": 0.0009, + "step": 241150 + }, + { + "epoch": 1.5467253305455162, + "grad_norm": 0.011052812449634075, + "learning_rate": 1.4847078726506409e-06, + "loss": 0.0013, + "step": 241160 + }, + { + "epoch": 1.5467894674393023, + "grad_norm": 0.06549164652824402, + "learning_rate": 1.4843098716450893e-06, + "loss": 0.0008, + "step": 241170 + }, + { + "epoch": 1.5468536043330885, + "grad_norm": 0.24330514669418335, + "learning_rate": 1.483911914693648e-06, + "loss": 0.002, + "step": 241180 + }, + { + "epoch": 1.5469177412268746, + "grad_norm": 0.3681797981262207, + "learning_rate": 1.4835140018013033e-06, + "loss": 0.0023, + "step": 241190 + }, + { + "epoch": 1.5469818781206608, + "grad_norm": 0.012566491961479187, + "learning_rate": 1.4831161329730392e-06, + "loss": 0.0016, + "step": 241200 + }, + { + "epoch": 1.547046015014447, + "grad_norm": 0.128724604845047, + "learning_rate": 1.4827183082138457e-06, + "loss": 0.0009, + "step": 241210 + }, + { + "epoch": 1.547110151908233, + "grad_norm": 0.12924446165561676, + "learning_rate": 1.482320527528705e-06, + "loss": 0.001, + "step": 241220 + }, + { + "epoch": 1.547174288802019, + "grad_norm": 0.04774237424135208, + "learning_rate": 1.4819227909226025e-06, + "loss": 0.001, + "step": 241230 + }, + { + "epoch": 1.5472384256958052, + "grad_norm": 0.01152403000742197, + "learning_rate": 1.4815250984005203e-06, + "loss": 0.001, + "step": 241240 + }, + { + "epoch": 1.547302562589591, + "grad_norm": 0.014252632856369019, + "learning_rate": 1.4811274499674444e-06, + "loss": 0.0016, + "step": 241250 + }, + { + "epoch": 1.5473666994833772, + "grad_norm": 0.16456815600395203, + "learning_rate": 1.480729845628357e-06, + "loss": 0.0006, + "step": 241260 + }, + { + "epoch": 1.5474308363771634, + "grad_norm": 0.013565735891461372, + "learning_rate": 1.480332285388238e-06, + "loss": 0.001, + "step": 241270 + }, + { + "epoch": 1.5474949732709495, + "grad_norm": 0.09282597154378891, + "learning_rate": 1.4799347692520722e-06, + "loss": 0.0016, + "step": 241280 + }, + { + "epoch": 1.5475591101647357, + "grad_norm": 0.028873804956674576, + "learning_rate": 1.4795372972248378e-06, + "loss": 0.001, + "step": 241290 + }, + { + "epoch": 1.5476232470585218, + "grad_norm": 0.08476100862026215, + "learning_rate": 1.4791398693115195e-06, + "loss": 0.0012, + "step": 241300 + }, + { + "epoch": 1.5476873839523078, + "grad_norm": 0.22884000837802887, + "learning_rate": 1.478742485517094e-06, + "loss": 0.001, + "step": 241310 + }, + { + "epoch": 1.547751520846094, + "grad_norm": 0.019175561144948006, + "learning_rate": 1.4783451458465409e-06, + "loss": 0.0029, + "step": 241320 + }, + { + "epoch": 1.5478156577398798, + "grad_norm": 0.029204215854406357, + "learning_rate": 1.477947850304841e-06, + "loss": 0.001, + "step": 241330 + }, + { + "epoch": 1.547879794633666, + "grad_norm": 0.00403748732060194, + "learning_rate": 1.4775505988969723e-06, + "loss": 0.0006, + "step": 241340 + }, + { + "epoch": 1.5479439315274521, + "grad_norm": 0.040969155728816986, + "learning_rate": 1.477153391627912e-06, + "loss": 0.001, + "step": 241350 + }, + { + "epoch": 1.5480080684212383, + "grad_norm": 0.0407954677939415, + "learning_rate": 1.4767562285026355e-06, + "loss": 0.0018, + "step": 241360 + }, + { + "epoch": 1.5480722053150244, + "grad_norm": 0.10405800491571426, + "learning_rate": 1.4763591095261233e-06, + "loss": 0.0011, + "step": 241370 + }, + { + "epoch": 1.5481363422088106, + "grad_norm": 0.25405964255332947, + "learning_rate": 1.4759620347033493e-06, + "loss": 0.002, + "step": 241380 + }, + { + "epoch": 1.5482004791025967, + "grad_norm": 0.13294708728790283, + "learning_rate": 1.4755650040392888e-06, + "loss": 0.0012, + "step": 241390 + }, + { + "epoch": 1.5482646159963827, + "grad_norm": 0.05426466092467308, + "learning_rate": 1.4751680175389188e-06, + "loss": 0.001, + "step": 241400 + }, + { + "epoch": 1.5483287528901688, + "grad_norm": 0.09514619410037994, + "learning_rate": 1.4747710752072125e-06, + "loss": 0.0011, + "step": 241410 + }, + { + "epoch": 1.5483928897839547, + "grad_norm": 0.16939108073711395, + "learning_rate": 1.4743741770491443e-06, + "loss": 0.0015, + "step": 241420 + }, + { + "epoch": 1.5484570266777409, + "grad_norm": 0.14232565462589264, + "learning_rate": 1.4739773230696858e-06, + "loss": 0.0015, + "step": 241430 + }, + { + "epoch": 1.548521163571527, + "grad_norm": 0.08041399717330933, + "learning_rate": 1.4735805132738135e-06, + "loss": 0.0009, + "step": 241440 + }, + { + "epoch": 1.5485853004653132, + "grad_norm": 0.06873752921819687, + "learning_rate": 1.4731837476664967e-06, + "loss": 0.0006, + "step": 241450 + }, + { + "epoch": 1.5486494373590993, + "grad_norm": 0.0607171431183815, + "learning_rate": 1.472787026252709e-06, + "loss": 0.0017, + "step": 241460 + }, + { + "epoch": 1.5487135742528855, + "grad_norm": 0.07531336694955826, + "learning_rate": 1.4723903490374186e-06, + "loss": 0.0009, + "step": 241470 + }, + { + "epoch": 1.5487777111466716, + "grad_norm": 0.1684890240430832, + "learning_rate": 1.4719937160256004e-06, + "loss": 0.0012, + "step": 241480 + }, + { + "epoch": 1.5488418480404575, + "grad_norm": 0.05508307367563248, + "learning_rate": 1.4715971272222217e-06, + "loss": 0.0009, + "step": 241490 + }, + { + "epoch": 1.5489059849342437, + "grad_norm": 0.15135546028614044, + "learning_rate": 1.471200582632253e-06, + "loss": 0.0014, + "step": 241500 + }, + { + "epoch": 1.5489701218280296, + "grad_norm": 0.005747431889176369, + "learning_rate": 1.4708040822606618e-06, + "loss": 0.0005, + "step": 241510 + }, + { + "epoch": 1.5490342587218158, + "grad_norm": 0.12473436444997787, + "learning_rate": 1.4704076261124183e-06, + "loss": 0.0011, + "step": 241520 + }, + { + "epoch": 1.549098395615602, + "grad_norm": 0.032045476138591766, + "learning_rate": 1.4700112141924905e-06, + "loss": 0.0011, + "step": 241530 + }, + { + "epoch": 1.549162532509388, + "grad_norm": 0.13783413171768188, + "learning_rate": 1.4696148465058436e-06, + "loss": 0.0015, + "step": 241540 + }, + { + "epoch": 1.5492266694031742, + "grad_norm": 0.06455527991056442, + "learning_rate": 1.4692185230574475e-06, + "loss": 0.0014, + "step": 241550 + }, + { + "epoch": 1.5492908062969604, + "grad_norm": 0.04851328209042549, + "learning_rate": 1.4688222438522658e-06, + "loss": 0.0014, + "step": 241560 + }, + { + "epoch": 1.5493549431907463, + "grad_norm": 0.02985536865890026, + "learning_rate": 1.4684260088952663e-06, + "loss": 0.0013, + "step": 241570 + }, + { + "epoch": 1.5494190800845324, + "grad_norm": 0.013616991229355335, + "learning_rate": 1.4680298181914105e-06, + "loss": 0.0035, + "step": 241580 + }, + { + "epoch": 1.5494832169783184, + "grad_norm": 0.001772357034496963, + "learning_rate": 1.4676336717456668e-06, + "loss": 0.0009, + "step": 241590 + }, + { + "epoch": 1.5495473538721045, + "grad_norm": 0.07002270221710205, + "learning_rate": 1.4672375695629982e-06, + "loss": 0.0012, + "step": 241600 + }, + { + "epoch": 1.5496114907658907, + "grad_norm": 0.07833683490753174, + "learning_rate": 1.4668415116483675e-06, + "loss": 0.0012, + "step": 241610 + }, + { + "epoch": 1.5496756276596768, + "grad_norm": 0.3495336174964905, + "learning_rate": 1.4664454980067362e-06, + "loss": 0.002, + "step": 241620 + }, + { + "epoch": 1.549739764553463, + "grad_norm": 0.06402402371168137, + "learning_rate": 1.4660495286430699e-06, + "loss": 0.0016, + "step": 241630 + }, + { + "epoch": 1.549803901447249, + "grad_norm": 0.05305173620581627, + "learning_rate": 1.4656536035623286e-06, + "loss": 0.0013, + "step": 241640 + }, + { + "epoch": 1.5498680383410353, + "grad_norm": 0.1703844666481018, + "learning_rate": 1.4652577227694737e-06, + "loss": 0.001, + "step": 241650 + }, + { + "epoch": 1.5499321752348212, + "grad_norm": 0.01733911968767643, + "learning_rate": 1.4648618862694636e-06, + "loss": 0.0009, + "step": 241660 + }, + { + "epoch": 1.5499963121286073, + "grad_norm": 0.08173426240682602, + "learning_rate": 1.4644660940672628e-06, + "loss": 0.0012, + "step": 241670 + }, + { + "epoch": 1.5500604490223933, + "grad_norm": 0.006655998528003693, + "learning_rate": 1.4640703461678285e-06, + "loss": 0.0007, + "step": 241680 + }, + { + "epoch": 1.5501245859161794, + "grad_norm": 0.06824464350938797, + "learning_rate": 1.4636746425761183e-06, + "loss": 0.001, + "step": 241690 + }, + { + "epoch": 1.5501887228099656, + "grad_norm": 0.04127271845936775, + "learning_rate": 1.463278983297094e-06, + "loss": 0.0004, + "step": 241700 + }, + { + "epoch": 1.5502528597037517, + "grad_norm": 0.08334328234195709, + "learning_rate": 1.4628833683357114e-06, + "loss": 0.0016, + "step": 241710 + }, + { + "epoch": 1.5503169965975379, + "grad_norm": 0.07784296572208405, + "learning_rate": 1.4624877976969282e-06, + "loss": 0.001, + "step": 241720 + }, + { + "epoch": 1.550381133491324, + "grad_norm": 0.19750766456127167, + "learning_rate": 1.4620922713856994e-06, + "loss": 0.0027, + "step": 241730 + }, + { + "epoch": 1.55044527038511, + "grad_norm": 0.2604537606239319, + "learning_rate": 1.4616967894069845e-06, + "loss": 0.0015, + "step": 241740 + }, + { + "epoch": 1.550509407278896, + "grad_norm": 0.08659055083990097, + "learning_rate": 1.4613013517657377e-06, + "loss": 0.0009, + "step": 241750 + }, + { + "epoch": 1.5505735441726822, + "grad_norm": 0.019921543076634407, + "learning_rate": 1.4609059584669139e-06, + "loss": 0.0008, + "step": 241760 + }, + { + "epoch": 1.5506376810664682, + "grad_norm": 0.001479236176237464, + "learning_rate": 1.4605106095154664e-06, + "loss": 0.0007, + "step": 241770 + }, + { + "epoch": 1.5507018179602543, + "grad_norm": 0.07658054679632187, + "learning_rate": 1.4601153049163525e-06, + "loss": 0.0012, + "step": 241780 + }, + { + "epoch": 1.5507659548540405, + "grad_norm": 0.10939273238182068, + "learning_rate": 1.4597200446745235e-06, + "loss": 0.0015, + "step": 241790 + }, + { + "epoch": 1.5508300917478266, + "grad_norm": 0.008693967945873737, + "learning_rate": 1.4593248287949324e-06, + "loss": 0.0003, + "step": 241800 + }, + { + "epoch": 1.5508942286416127, + "grad_norm": 0.2519848942756653, + "learning_rate": 1.4589296572825302e-06, + "loss": 0.0017, + "step": 241810 + }, + { + "epoch": 1.550958365535399, + "grad_norm": 0.022765902802348137, + "learning_rate": 1.4585345301422715e-06, + "loss": 0.001, + "step": 241820 + }, + { + "epoch": 1.5510225024291848, + "grad_norm": 0.02760549820959568, + "learning_rate": 1.4581394473791067e-06, + "loss": 0.0014, + "step": 241830 + }, + { + "epoch": 1.551086639322971, + "grad_norm": 0.15331031382083893, + "learning_rate": 1.4577444089979837e-06, + "loss": 0.0014, + "step": 241840 + }, + { + "epoch": 1.551150776216757, + "grad_norm": 0.03634566441178322, + "learning_rate": 1.457349415003857e-06, + "loss": 0.0019, + "step": 241850 + }, + { + "epoch": 1.551214913110543, + "grad_norm": 0.07276799529790878, + "learning_rate": 1.4569544654016737e-06, + "loss": 0.001, + "step": 241860 + }, + { + "epoch": 1.5512790500043292, + "grad_norm": 0.09183241426944733, + "learning_rate": 1.4565595601963833e-06, + "loss": 0.0015, + "step": 241870 + }, + { + "epoch": 1.5513431868981153, + "grad_norm": 0.03877013549208641, + "learning_rate": 1.4561646993929323e-06, + "loss": 0.0007, + "step": 241880 + }, + { + "epoch": 1.5514073237919015, + "grad_norm": 0.03545617312192917, + "learning_rate": 1.4557698829962724e-06, + "loss": 0.0014, + "step": 241890 + }, + { + "epoch": 1.5514714606856876, + "grad_norm": 0.29456937313079834, + "learning_rate": 1.4553751110113484e-06, + "loss": 0.0008, + "step": 241900 + }, + { + "epoch": 1.5515355975794738, + "grad_norm": 0.20917344093322754, + "learning_rate": 1.4549803834431076e-06, + "loss": 0.0014, + "step": 241910 + }, + { + "epoch": 1.5515997344732597, + "grad_norm": 0.03617027401924133, + "learning_rate": 1.4545857002964948e-06, + "loss": 0.0008, + "step": 241920 + }, + { + "epoch": 1.5516638713670459, + "grad_norm": 0.1423276662826538, + "learning_rate": 1.4541910615764587e-06, + "loss": 0.0012, + "step": 241930 + }, + { + "epoch": 1.5517280082608318, + "grad_norm": 0.08496689796447754, + "learning_rate": 1.4537964672879422e-06, + "loss": 0.0016, + "step": 241940 + }, + { + "epoch": 1.551792145154618, + "grad_norm": 0.049270179122686386, + "learning_rate": 1.453401917435891e-06, + "loss": 0.0013, + "step": 241950 + }, + { + "epoch": 1.551856282048404, + "grad_norm": 0.039366334676742554, + "learning_rate": 1.4530074120252468e-06, + "loss": 0.0009, + "step": 241960 + }, + { + "epoch": 1.5519204189421902, + "grad_norm": 0.1436959207057953, + "learning_rate": 1.452612951060956e-06, + "loss": 0.002, + "step": 241970 + }, + { + "epoch": 1.5519845558359764, + "grad_norm": 0.05382394790649414, + "learning_rate": 1.4522185345479606e-06, + "loss": 0.0019, + "step": 241980 + }, + { + "epoch": 1.5520486927297625, + "grad_norm": 0.12463415414094925, + "learning_rate": 1.451824162491201e-06, + "loss": 0.0008, + "step": 241990 + }, + { + "epoch": 1.5521128296235485, + "grad_norm": 0.15907691419124603, + "learning_rate": 1.451429834895622e-06, + "loss": 0.0012, + "step": 242000 + }, + { + "epoch": 1.5521769665173346, + "grad_norm": 0.0672299712896347, + "learning_rate": 1.4510355517661628e-06, + "loss": 0.0013, + "step": 242010 + }, + { + "epoch": 1.5522411034111205, + "grad_norm": 0.1971874237060547, + "learning_rate": 1.4506413131077652e-06, + "loss": 0.0022, + "step": 242020 + }, + { + "epoch": 1.5523052403049067, + "grad_norm": 0.07928959280252457, + "learning_rate": 1.4502471189253665e-06, + "loss": 0.0023, + "step": 242030 + }, + { + "epoch": 1.5523693771986928, + "grad_norm": 0.019396668300032616, + "learning_rate": 1.44985296922391e-06, + "loss": 0.0013, + "step": 242040 + }, + { + "epoch": 1.552433514092479, + "grad_norm": 0.010010046884417534, + "learning_rate": 1.4494588640083334e-06, + "loss": 0.0014, + "step": 242050 + }, + { + "epoch": 1.5524976509862651, + "grad_norm": 0.12092921137809753, + "learning_rate": 1.449064803283573e-06, + "loss": 0.0012, + "step": 242060 + }, + { + "epoch": 1.5525617878800513, + "grad_norm": 0.2388553023338318, + "learning_rate": 1.4486707870545701e-06, + "loss": 0.0019, + "step": 242070 + }, + { + "epoch": 1.5526259247738374, + "grad_norm": 0.004728635307401419, + "learning_rate": 1.4482768153262583e-06, + "loss": 0.0015, + "step": 242080 + }, + { + "epoch": 1.5526900616676234, + "grad_norm": 0.10769958049058914, + "learning_rate": 1.4478828881035783e-06, + "loss": 0.0009, + "step": 242090 + }, + { + "epoch": 1.5527541985614095, + "grad_norm": 0.1482594609260559, + "learning_rate": 1.4474890053914647e-06, + "loss": 0.0011, + "step": 242100 + }, + { + "epoch": 1.5528183354551954, + "grad_norm": 0.05761986970901489, + "learning_rate": 1.4470951671948508e-06, + "loss": 0.001, + "step": 242110 + }, + { + "epoch": 1.5528824723489816, + "grad_norm": 0.008168700151145458, + "learning_rate": 1.4467013735186752e-06, + "loss": 0.0012, + "step": 242120 + }, + { + "epoch": 1.5529466092427677, + "grad_norm": 0.09967954456806183, + "learning_rate": 1.4463076243678714e-06, + "loss": 0.0017, + "step": 242130 + }, + { + "epoch": 1.5530107461365539, + "grad_norm": 0.04914829134941101, + "learning_rate": 1.4459139197473725e-06, + "loss": 0.0009, + "step": 242140 + }, + { + "epoch": 1.55307488303034, + "grad_norm": 0.009536635130643845, + "learning_rate": 1.445520259662111e-06, + "loss": 0.0011, + "step": 242150 + }, + { + "epoch": 1.5531390199241262, + "grad_norm": 0.08600223809480667, + "learning_rate": 1.445126644117022e-06, + "loss": 0.001, + "step": 242160 + }, + { + "epoch": 1.553203156817912, + "grad_norm": 0.034827813506126404, + "learning_rate": 1.4447330731170372e-06, + "loss": 0.0005, + "step": 242170 + }, + { + "epoch": 1.5532672937116982, + "grad_norm": 0.07760866731405258, + "learning_rate": 1.444339546667088e-06, + "loss": 0.002, + "step": 242180 + }, + { + "epoch": 1.5533314306054844, + "grad_norm": 0.004131306428462267, + "learning_rate": 1.4439460647721032e-06, + "loss": 0.0008, + "step": 242190 + }, + { + "epoch": 1.5533955674992703, + "grad_norm": 0.15165965259075165, + "learning_rate": 1.4435526274370176e-06, + "loss": 0.0013, + "step": 242200 + }, + { + "epoch": 1.5534597043930565, + "grad_norm": 0.07331079989671707, + "learning_rate": 1.4431592346667595e-06, + "loss": 0.0006, + "step": 242210 + }, + { + "epoch": 1.5535238412868426, + "grad_norm": 0.10047654062509537, + "learning_rate": 1.4427658864662585e-06, + "loss": 0.0015, + "step": 242220 + }, + { + "epoch": 1.5535879781806288, + "grad_norm": 0.1110883429646492, + "learning_rate": 1.4423725828404411e-06, + "loss": 0.0015, + "step": 242230 + }, + { + "epoch": 1.553652115074415, + "grad_norm": 0.0955362617969513, + "learning_rate": 1.4419793237942397e-06, + "loss": 0.0012, + "step": 242240 + }, + { + "epoch": 1.553716251968201, + "grad_norm": 0.043106190860271454, + "learning_rate": 1.4415861093325805e-06, + "loss": 0.0008, + "step": 242250 + }, + { + "epoch": 1.553780388861987, + "grad_norm": 0.008243992924690247, + "learning_rate": 1.4411929394603884e-06, + "loss": 0.0025, + "step": 242260 + }, + { + "epoch": 1.5538445257557731, + "grad_norm": 0.05364123731851578, + "learning_rate": 1.440799814182594e-06, + "loss": 0.0016, + "step": 242270 + }, + { + "epoch": 1.553908662649559, + "grad_norm": 0.043370697647333145, + "learning_rate": 1.4404067335041216e-06, + "loss": 0.0013, + "step": 242280 + }, + { + "epoch": 1.5539727995433452, + "grad_norm": 0.04179252311587334, + "learning_rate": 1.4400136974298972e-06, + "loss": 0.0011, + "step": 242290 + }, + { + "epoch": 1.5540369364371314, + "grad_norm": 0.00294058327563107, + "learning_rate": 1.4396207059648438e-06, + "loss": 0.0014, + "step": 242300 + }, + { + "epoch": 1.5541010733309175, + "grad_norm": 0.07783500850200653, + "learning_rate": 1.4392277591138886e-06, + "loss": 0.0018, + "step": 242310 + }, + { + "epoch": 1.5541652102247037, + "grad_norm": 0.10862353444099426, + "learning_rate": 1.438834856881955e-06, + "loss": 0.0007, + "step": 242320 + }, + { + "epoch": 1.5542293471184898, + "grad_norm": 0.12284133583307266, + "learning_rate": 1.438441999273965e-06, + "loss": 0.0015, + "step": 242330 + }, + { + "epoch": 1.554293484012276, + "grad_norm": 0.05826522037386894, + "learning_rate": 1.4380491862948415e-06, + "loss": 0.0015, + "step": 242340 + }, + { + "epoch": 1.5543576209060619, + "grad_norm": 0.0899071991443634, + "learning_rate": 1.4376564179495085e-06, + "loss": 0.0033, + "step": 242350 + }, + { + "epoch": 1.554421757799848, + "grad_norm": 0.03002636320888996, + "learning_rate": 1.4372636942428863e-06, + "loss": 0.0022, + "step": 242360 + }, + { + "epoch": 1.554485894693634, + "grad_norm": 0.16218431293964386, + "learning_rate": 1.436871015179897e-06, + "loss": 0.0014, + "step": 242370 + }, + { + "epoch": 1.5545500315874201, + "grad_norm": 0.02801361121237278, + "learning_rate": 1.436478380765458e-06, + "loss": 0.0011, + "step": 242380 + }, + { + "epoch": 1.5546141684812063, + "grad_norm": 0.07750725001096725, + "learning_rate": 1.436085791004494e-06, + "loss": 0.0015, + "step": 242390 + }, + { + "epoch": 1.5546783053749924, + "grad_norm": 0.15338924527168274, + "learning_rate": 1.4356932459019218e-06, + "loss": 0.0015, + "step": 242400 + }, + { + "epoch": 1.5547424422687786, + "grad_norm": 0.21196606755256653, + "learning_rate": 1.4353007454626588e-06, + "loss": 0.001, + "step": 242410 + }, + { + "epoch": 1.5548065791625647, + "grad_norm": 0.0607130192220211, + "learning_rate": 1.4349082896916273e-06, + "loss": 0.001, + "step": 242420 + }, + { + "epoch": 1.5548707160563506, + "grad_norm": 0.12506140768527985, + "learning_rate": 1.434515878593742e-06, + "loss": 0.0018, + "step": 242430 + }, + { + "epoch": 1.5549348529501368, + "grad_norm": 0.11620277166366577, + "learning_rate": 1.4341235121739216e-06, + "loss": 0.002, + "step": 242440 + }, + { + "epoch": 1.5549989898439227, + "grad_norm": 0.12097823619842529, + "learning_rate": 1.4337311904370804e-06, + "loss": 0.0019, + "step": 242450 + }, + { + "epoch": 1.5550631267377089, + "grad_norm": 0.03330722823739052, + "learning_rate": 1.4333389133881375e-06, + "loss": 0.0025, + "step": 242460 + }, + { + "epoch": 1.555127263631495, + "grad_norm": 0.017193831503391266, + "learning_rate": 1.432946681032007e-06, + "loss": 0.0005, + "step": 242470 + }, + { + "epoch": 1.5551914005252812, + "grad_norm": 0.08073843270540237, + "learning_rate": 1.4325544933736047e-06, + "loss": 0.0008, + "step": 242480 + }, + { + "epoch": 1.5552555374190673, + "grad_norm": 0.0536016970872879, + "learning_rate": 1.4321623504178416e-06, + "loss": 0.0012, + "step": 242490 + }, + { + "epoch": 1.5553196743128535, + "grad_norm": 0.004435609094798565, + "learning_rate": 1.4317702521696364e-06, + "loss": 0.0014, + "step": 242500 + }, + { + "epoch": 1.5553838112066396, + "grad_norm": 0.1516411006450653, + "learning_rate": 1.4313781986338998e-06, + "loss": 0.0011, + "step": 242510 + }, + { + "epoch": 1.5554479481004255, + "grad_norm": 0.07027588039636612, + "learning_rate": 1.4309861898155453e-06, + "loss": 0.0015, + "step": 242520 + }, + { + "epoch": 1.5555120849942117, + "grad_norm": 0.06725809723138809, + "learning_rate": 1.430594225719482e-06, + "loss": 0.001, + "step": 242530 + }, + { + "epoch": 1.5555762218879976, + "grad_norm": 0.16939279437065125, + "learning_rate": 1.4302023063506265e-06, + "loss": 0.0015, + "step": 242540 + }, + { + "epoch": 1.5556403587817837, + "grad_norm": 0.0354507751762867, + "learning_rate": 1.4298104317138873e-06, + "loss": 0.0013, + "step": 242550 + }, + { + "epoch": 1.55570449567557, + "grad_norm": 0.019089942798018456, + "learning_rate": 1.4294186018141732e-06, + "loss": 0.0006, + "step": 242560 + }, + { + "epoch": 1.555768632569356, + "grad_norm": 0.03581133484840393, + "learning_rate": 1.4290268166563975e-06, + "loss": 0.0012, + "step": 242570 + }, + { + "epoch": 1.5558327694631422, + "grad_norm": 0.05963753163814545, + "learning_rate": 1.4286350762454682e-06, + "loss": 0.0015, + "step": 242580 + }, + { + "epoch": 1.5558969063569283, + "grad_norm": 0.07994631677865982, + "learning_rate": 1.4282433805862933e-06, + "loss": 0.001, + "step": 242590 + }, + { + "epoch": 1.5559610432507143, + "grad_norm": 0.09030909091234207, + "learning_rate": 1.4278517296837807e-06, + "loss": 0.0021, + "step": 242600 + }, + { + "epoch": 1.5560251801445004, + "grad_norm": 0.042248863726854324, + "learning_rate": 1.4274601235428399e-06, + "loss": 0.001, + "step": 242610 + }, + { + "epoch": 1.5560893170382866, + "grad_norm": 0.11223195493221283, + "learning_rate": 1.4270685621683772e-06, + "loss": 0.0015, + "step": 242620 + }, + { + "epoch": 1.5561534539320725, + "grad_norm": 0.003470065537840128, + "learning_rate": 1.4266770455652984e-06, + "loss": 0.0015, + "step": 242630 + }, + { + "epoch": 1.5562175908258586, + "grad_norm": 0.09900204092264175, + "learning_rate": 1.426285573738509e-06, + "loss": 0.0006, + "step": 242640 + }, + { + "epoch": 1.5562817277196448, + "grad_norm": 0.06904611736536026, + "learning_rate": 1.4258941466929171e-06, + "loss": 0.0003, + "step": 242650 + }, + { + "epoch": 1.556345864613431, + "grad_norm": 0.06262891739606857, + "learning_rate": 1.4255027644334257e-06, + "loss": 0.0012, + "step": 242660 + }, + { + "epoch": 1.556410001507217, + "grad_norm": 0.11479669064283371, + "learning_rate": 1.4251114269649379e-06, + "loss": 0.001, + "step": 242670 + }, + { + "epoch": 1.5564741384010032, + "grad_norm": 0.19921335577964783, + "learning_rate": 1.4247201342923605e-06, + "loss": 0.0015, + "step": 242680 + }, + { + "epoch": 1.5565382752947892, + "grad_norm": 0.023834677413105965, + "learning_rate": 1.4243288864205945e-06, + "loss": 0.001, + "step": 242690 + }, + { + "epoch": 1.5566024121885753, + "grad_norm": 0.05833800137042999, + "learning_rate": 1.4239376833545437e-06, + "loss": 0.001, + "step": 242700 + }, + { + "epoch": 1.5566665490823612, + "grad_norm": 0.16412882506847382, + "learning_rate": 1.4235465250991076e-06, + "loss": 0.0015, + "step": 242710 + }, + { + "epoch": 1.5567306859761474, + "grad_norm": 0.03514918312430382, + "learning_rate": 1.4231554116591912e-06, + "loss": 0.0013, + "step": 242720 + }, + { + "epoch": 1.5567948228699335, + "grad_norm": 0.026855146512389183, + "learning_rate": 1.4227643430396938e-06, + "loss": 0.001, + "step": 242730 + }, + { + "epoch": 1.5568589597637197, + "grad_norm": 0.047292310744524, + "learning_rate": 1.422373319245516e-06, + "loss": 0.0013, + "step": 242740 + }, + { + "epoch": 1.5569230966575058, + "grad_norm": 0.07855642586946487, + "learning_rate": 1.4219823402815559e-06, + "loss": 0.0014, + "step": 242750 + }, + { + "epoch": 1.556987233551292, + "grad_norm": 0.02176888845860958, + "learning_rate": 1.4215914061527152e-06, + "loss": 0.0011, + "step": 242760 + }, + { + "epoch": 1.5570513704450781, + "grad_norm": 0.12822465598583221, + "learning_rate": 1.421200516863892e-06, + "loss": 0.0004, + "step": 242770 + }, + { + "epoch": 1.557115507338864, + "grad_norm": 0.07081867754459381, + "learning_rate": 1.4208096724199843e-06, + "loss": 0.0014, + "step": 242780 + }, + { + "epoch": 1.5571796442326502, + "grad_norm": 0.11112422496080399, + "learning_rate": 1.4204188728258877e-06, + "loss": 0.0017, + "step": 242790 + }, + { + "epoch": 1.5572437811264361, + "grad_norm": 0.025584004819393158, + "learning_rate": 1.4200281180865023e-06, + "loss": 0.0008, + "step": 242800 + }, + { + "epoch": 1.5573079180202223, + "grad_norm": 0.1054731011390686, + "learning_rate": 1.4196374082067231e-06, + "loss": 0.0016, + "step": 242810 + }, + { + "epoch": 1.5573720549140084, + "grad_norm": 0.08611925691366196, + "learning_rate": 1.4192467431914446e-06, + "loss": 0.0007, + "step": 242820 + }, + { + "epoch": 1.5574361918077946, + "grad_norm": 0.043091047555208206, + "learning_rate": 1.4188561230455632e-06, + "loss": 0.0015, + "step": 242830 + }, + { + "epoch": 1.5575003287015807, + "grad_norm": 0.03824600949883461, + "learning_rate": 1.4184655477739763e-06, + "loss": 0.0008, + "step": 242840 + }, + { + "epoch": 1.5575644655953669, + "grad_norm": 0.03904568403959274, + "learning_rate": 1.4180750173815756e-06, + "loss": 0.0018, + "step": 242850 + }, + { + "epoch": 1.5576286024891528, + "grad_norm": 0.10202863812446594, + "learning_rate": 1.417684531873254e-06, + "loss": 0.0014, + "step": 242860 + }, + { + "epoch": 1.557692739382939, + "grad_norm": 0.035224538296461105, + "learning_rate": 1.4172940912539045e-06, + "loss": 0.0025, + "step": 242870 + }, + { + "epoch": 1.5577568762767249, + "grad_norm": 0.12914836406707764, + "learning_rate": 1.4169036955284227e-06, + "loss": 0.0009, + "step": 242880 + }, + { + "epoch": 1.557821013170511, + "grad_norm": 0.030876312404870987, + "learning_rate": 1.4165133447016976e-06, + "loss": 0.0008, + "step": 242890 + }, + { + "epoch": 1.5578851500642972, + "grad_norm": 0.1389792114496231, + "learning_rate": 1.4161230387786217e-06, + "loss": 0.0019, + "step": 242900 + }, + { + "epoch": 1.5579492869580833, + "grad_norm": 0.009573440998792648, + "learning_rate": 1.415732777764084e-06, + "loss": 0.001, + "step": 242910 + }, + { + "epoch": 1.5580134238518695, + "grad_norm": 0.05179290100932121, + "learning_rate": 1.4153425616629773e-06, + "loss": 0.001, + "step": 242920 + }, + { + "epoch": 1.5580775607456556, + "grad_norm": 0.04464186728000641, + "learning_rate": 1.414952390480191e-06, + "loss": 0.0012, + "step": 242930 + }, + { + "epoch": 1.5581416976394418, + "grad_norm": 0.07343059033155441, + "learning_rate": 1.4145622642206113e-06, + "loss": 0.0021, + "step": 242940 + }, + { + "epoch": 1.5582058345332277, + "grad_norm": 0.0018839394906535745, + "learning_rate": 1.4141721828891303e-06, + "loss": 0.0006, + "step": 242950 + }, + { + "epoch": 1.5582699714270138, + "grad_norm": 0.03524048253893852, + "learning_rate": 1.4137821464906349e-06, + "loss": 0.0017, + "step": 242960 + }, + { + "epoch": 1.5583341083207998, + "grad_norm": 0.07066163420677185, + "learning_rate": 1.4133921550300122e-06, + "loss": 0.0008, + "step": 242970 + }, + { + "epoch": 1.558398245214586, + "grad_norm": 0.04661337658762932, + "learning_rate": 1.4130022085121475e-06, + "loss": 0.0008, + "step": 242980 + }, + { + "epoch": 1.558462382108372, + "grad_norm": 0.024441925808787346, + "learning_rate": 1.4126123069419307e-06, + "loss": 0.0012, + "step": 242990 + }, + { + "epoch": 1.5585265190021582, + "grad_norm": 0.11181625723838806, + "learning_rate": 1.412222450324245e-06, + "loss": 0.0017, + "step": 243000 + }, + { + "epoch": 1.5585906558959444, + "grad_norm": 0.0369691401720047, + "learning_rate": 1.4118326386639764e-06, + "loss": 0.0031, + "step": 243010 + }, + { + "epoch": 1.5586547927897305, + "grad_norm": 0.002632532501593232, + "learning_rate": 1.4114428719660078e-06, + "loss": 0.0006, + "step": 243020 + }, + { + "epoch": 1.5587189296835167, + "grad_norm": 0.13097435235977173, + "learning_rate": 1.411053150235226e-06, + "loss": 0.0015, + "step": 243030 + }, + { + "epoch": 1.5587830665773026, + "grad_norm": 0.11004441231489182, + "learning_rate": 1.4106634734765135e-06, + "loss": 0.0017, + "step": 243040 + }, + { + "epoch": 1.5588472034710887, + "grad_norm": 0.1229131743311882, + "learning_rate": 1.4102738416947525e-06, + "loss": 0.0019, + "step": 243050 + }, + { + "epoch": 1.5589113403648747, + "grad_norm": 0.03745304048061371, + "learning_rate": 1.409884254894825e-06, + "loss": 0.0019, + "step": 243060 + }, + { + "epoch": 1.5589754772586608, + "grad_norm": 0.018195126205682755, + "learning_rate": 1.4094947130816144e-06, + "loss": 0.0014, + "step": 243070 + }, + { + "epoch": 1.559039614152447, + "grad_norm": 0.12419760972261429, + "learning_rate": 1.4091052162600017e-06, + "loss": 0.0011, + "step": 243080 + }, + { + "epoch": 1.559103751046233, + "grad_norm": 0.32271960377693176, + "learning_rate": 1.4087157644348648e-06, + "loss": 0.0016, + "step": 243090 + }, + { + "epoch": 1.5591678879400193, + "grad_norm": 0.03508146479725838, + "learning_rate": 1.4083263576110885e-06, + "loss": 0.0009, + "step": 243100 + }, + { + "epoch": 1.5592320248338054, + "grad_norm": 0.08492378890514374, + "learning_rate": 1.4079369957935491e-06, + "loss": 0.001, + "step": 243110 + }, + { + "epoch": 1.5592961617275913, + "grad_norm": 0.3226666748523712, + "learning_rate": 1.4075476789871267e-06, + "loss": 0.0009, + "step": 243120 + }, + { + "epoch": 1.5593602986213775, + "grad_norm": 0.03416430577635765, + "learning_rate": 1.4071584071966976e-06, + "loss": 0.001, + "step": 243130 + }, + { + "epoch": 1.5594244355151634, + "grad_norm": 0.062238939106464386, + "learning_rate": 1.4067691804271433e-06, + "loss": 0.0007, + "step": 243140 + }, + { + "epoch": 1.5594885724089496, + "grad_norm": 0.10507658123970032, + "learning_rate": 1.4063799986833388e-06, + "loss": 0.0014, + "step": 243150 + }, + { + "epoch": 1.5595527093027357, + "grad_norm": 0.10475585609674454, + "learning_rate": 1.4059908619701612e-06, + "loss": 0.0009, + "step": 243160 + }, + { + "epoch": 1.5596168461965219, + "grad_norm": 0.006586553994566202, + "learning_rate": 1.4056017702924858e-06, + "loss": 0.0018, + "step": 243170 + }, + { + "epoch": 1.559680983090308, + "grad_norm": 0.1432701051235199, + "learning_rate": 1.40521272365519e-06, + "loss": 0.0011, + "step": 243180 + }, + { + "epoch": 1.5597451199840942, + "grad_norm": 0.016658388078212738, + "learning_rate": 1.4048237220631484e-06, + "loss": 0.0004, + "step": 243190 + }, + { + "epoch": 1.5598092568778803, + "grad_norm": 0.09033188968896866, + "learning_rate": 1.4044347655212343e-06, + "loss": 0.001, + "step": 243200 + }, + { + "epoch": 1.5598733937716662, + "grad_norm": 0.09436356276273727, + "learning_rate": 1.4040458540343215e-06, + "loss": 0.0014, + "step": 243210 + }, + { + "epoch": 1.5599375306654524, + "grad_norm": 0.12356075644493103, + "learning_rate": 1.4036569876072853e-06, + "loss": 0.0009, + "step": 243220 + }, + { + "epoch": 1.5600016675592383, + "grad_norm": 0.07750432938337326, + "learning_rate": 1.4032681662449976e-06, + "loss": 0.001, + "step": 243230 + }, + { + "epoch": 1.5600658044530245, + "grad_norm": 0.0030151098035275936, + "learning_rate": 1.4028793899523285e-06, + "loss": 0.0008, + "step": 243240 + }, + { + "epoch": 1.5601299413468106, + "grad_norm": 0.032839518040418625, + "learning_rate": 1.402490658734153e-06, + "loss": 0.0009, + "step": 243250 + }, + { + "epoch": 1.5601940782405967, + "grad_norm": 0.06680786609649658, + "learning_rate": 1.4021019725953405e-06, + "loss": 0.0007, + "step": 243260 + }, + { + "epoch": 1.560258215134383, + "grad_norm": 0.05233680456876755, + "learning_rate": 1.4017133315407622e-06, + "loss": 0.0008, + "step": 243270 + }, + { + "epoch": 1.560322352028169, + "grad_norm": 0.04931798204779625, + "learning_rate": 1.4013247355752858e-06, + "loss": 0.0009, + "step": 243280 + }, + { + "epoch": 1.560386488921955, + "grad_norm": 0.0011380620999261737, + "learning_rate": 1.4009361847037833e-06, + "loss": 0.0014, + "step": 243290 + }, + { + "epoch": 1.5604506258157411, + "grad_norm": 0.0662262886762619, + "learning_rate": 1.4005476789311233e-06, + "loss": 0.0022, + "step": 243300 + }, + { + "epoch": 1.5605147627095273, + "grad_norm": 0.15421129763126373, + "learning_rate": 1.4001592182621732e-06, + "loss": 0.0014, + "step": 243310 + }, + { + "epoch": 1.5605788996033132, + "grad_norm": 0.12788690626621246, + "learning_rate": 1.3997708027017993e-06, + "loss": 0.0009, + "step": 243320 + }, + { + "epoch": 1.5606430364970993, + "grad_norm": 0.1507088989019394, + "learning_rate": 1.3993824322548721e-06, + "loss": 0.0013, + "step": 243330 + }, + { + "epoch": 1.5607071733908855, + "grad_norm": 0.09280886501073837, + "learning_rate": 1.3989941069262558e-06, + "loss": 0.001, + "step": 243340 + }, + { + "epoch": 1.5607713102846716, + "grad_norm": 0.06773676723241806, + "learning_rate": 1.3986058267208174e-06, + "loss": 0.0009, + "step": 243350 + }, + { + "epoch": 1.5608354471784578, + "grad_norm": 0.1687345951795578, + "learning_rate": 1.3982175916434204e-06, + "loss": 0.0012, + "step": 243360 + }, + { + "epoch": 1.560899584072244, + "grad_norm": 0.10132893174886703, + "learning_rate": 1.3978294016989324e-06, + "loss": 0.0007, + "step": 243370 + }, + { + "epoch": 1.5609637209660299, + "grad_norm": 0.030050620436668396, + "learning_rate": 1.3974412568922163e-06, + "loss": 0.0012, + "step": 243380 + }, + { + "epoch": 1.561027857859816, + "grad_norm": 0.07169070839881897, + "learning_rate": 1.397053157228135e-06, + "loss": 0.0005, + "step": 243390 + }, + { + "epoch": 1.561091994753602, + "grad_norm": 0.1907750368118286, + "learning_rate": 1.3966651027115536e-06, + "loss": 0.003, + "step": 243400 + }, + { + "epoch": 1.561156131647388, + "grad_norm": 0.03356742486357689, + "learning_rate": 1.3962770933473336e-06, + "loss": 0.0012, + "step": 243410 + }, + { + "epoch": 1.5612202685411742, + "grad_norm": 0.05747605115175247, + "learning_rate": 1.3958891291403376e-06, + "loss": 0.0016, + "step": 243420 + }, + { + "epoch": 1.5612844054349604, + "grad_norm": 0.05902548506855965, + "learning_rate": 1.3955012100954246e-06, + "loss": 0.0016, + "step": 243430 + }, + { + "epoch": 1.5613485423287465, + "grad_norm": 0.09562114626169205, + "learning_rate": 1.3951133362174595e-06, + "loss": 0.0006, + "step": 243440 + }, + { + "epoch": 1.5614126792225327, + "grad_norm": 0.04821772128343582, + "learning_rate": 1.394725507511301e-06, + "loss": 0.0011, + "step": 243450 + }, + { + "epoch": 1.5614768161163188, + "grad_norm": 0.04168270155787468, + "learning_rate": 1.3943377239818078e-06, + "loss": 0.0006, + "step": 243460 + }, + { + "epoch": 1.5615409530101048, + "grad_norm": 0.127223938703537, + "learning_rate": 1.3939499856338384e-06, + "loss": 0.0013, + "step": 243470 + }, + { + "epoch": 1.561605089903891, + "grad_norm": 0.0999385342001915, + "learning_rate": 1.3935622924722546e-06, + "loss": 0.0009, + "step": 243480 + }, + { + "epoch": 1.5616692267976768, + "grad_norm": 0.15948669612407684, + "learning_rate": 1.3931746445019134e-06, + "loss": 0.0019, + "step": 243490 + }, + { + "epoch": 1.561733363691463, + "grad_norm": 0.09697796404361725, + "learning_rate": 1.3927870417276707e-06, + "loss": 0.0012, + "step": 243500 + }, + { + "epoch": 1.5617975005852491, + "grad_norm": 0.07811637222766876, + "learning_rate": 1.3923994841543836e-06, + "loss": 0.0007, + "step": 243510 + }, + { + "epoch": 1.5618616374790353, + "grad_norm": 0.0629824548959732, + "learning_rate": 1.3920119717869102e-06, + "loss": 0.0015, + "step": 243520 + }, + { + "epoch": 1.5619257743728214, + "grad_norm": 0.1268361210823059, + "learning_rate": 1.3916245046301058e-06, + "loss": 0.0009, + "step": 243530 + }, + { + "epoch": 1.5619899112666076, + "grad_norm": 0.023304827511310577, + "learning_rate": 1.3912370826888232e-06, + "loss": 0.001, + "step": 243540 + }, + { + "epoch": 1.5620540481603935, + "grad_norm": 0.04373375326395035, + "learning_rate": 1.390849705967921e-06, + "loss": 0.0011, + "step": 243550 + }, + { + "epoch": 1.5621181850541797, + "grad_norm": 0.04374853894114494, + "learning_rate": 1.3904623744722517e-06, + "loss": 0.0028, + "step": 243560 + }, + { + "epoch": 1.5621823219479656, + "grad_norm": 0.0007684393785893917, + "learning_rate": 1.3900750882066683e-06, + "loss": 0.0012, + "step": 243570 + }, + { + "epoch": 1.5622464588417517, + "grad_norm": 0.04392901062965393, + "learning_rate": 1.3896878471760222e-06, + "loss": 0.0011, + "step": 243580 + }, + { + "epoch": 1.5623105957355379, + "grad_norm": 0.07232941687107086, + "learning_rate": 1.3893006513851676e-06, + "loss": 0.0012, + "step": 243590 + }, + { + "epoch": 1.562374732629324, + "grad_norm": 0.026323221623897552, + "learning_rate": 1.3889135008389582e-06, + "loss": 0.0015, + "step": 243600 + }, + { + "epoch": 1.5624388695231102, + "grad_norm": 0.06476714462041855, + "learning_rate": 1.3885263955422434e-06, + "loss": 0.0008, + "step": 243610 + }, + { + "epoch": 1.5625030064168963, + "grad_norm": 0.1663721352815628, + "learning_rate": 1.388139335499874e-06, + "loss": 0.0016, + "step": 243620 + }, + { + "epoch": 1.5625671433106825, + "grad_norm": 0.06588054448366165, + "learning_rate": 1.3877523207166982e-06, + "loss": 0.0015, + "step": 243630 + }, + { + "epoch": 1.5626312802044684, + "grad_norm": 0.0852726399898529, + "learning_rate": 1.3873653511975694e-06, + "loss": 0.0013, + "step": 243640 + }, + { + "epoch": 1.5626954170982545, + "grad_norm": 0.0007486481335945427, + "learning_rate": 1.3869784269473347e-06, + "loss": 0.0012, + "step": 243650 + }, + { + "epoch": 1.5627595539920405, + "grad_norm": 0.00634699035435915, + "learning_rate": 1.386591547970841e-06, + "loss": 0.0026, + "step": 243660 + }, + { + "epoch": 1.5628236908858266, + "grad_norm": 0.033408116549253464, + "learning_rate": 1.3862047142729385e-06, + "loss": 0.0012, + "step": 243670 + }, + { + "epoch": 1.5628878277796128, + "grad_norm": 0.09897807240486145, + "learning_rate": 1.3858179258584743e-06, + "loss": 0.0007, + "step": 243680 + }, + { + "epoch": 1.562951964673399, + "grad_norm": 0.04074937850236893, + "learning_rate": 1.3854311827322942e-06, + "loss": 0.002, + "step": 243690 + }, + { + "epoch": 1.563016101567185, + "grad_norm": 0.04539360851049423, + "learning_rate": 1.3850444848992433e-06, + "loss": 0.0007, + "step": 243700 + }, + { + "epoch": 1.5630802384609712, + "grad_norm": 0.013356728479266167, + "learning_rate": 1.3846578323641702e-06, + "loss": 0.0006, + "step": 243710 + }, + { + "epoch": 1.5631443753547571, + "grad_norm": 0.15393126010894775, + "learning_rate": 1.3842712251319185e-06, + "loss": 0.0012, + "step": 243720 + }, + { + "epoch": 1.5632085122485433, + "grad_norm": 0.056364256888628006, + "learning_rate": 1.3838846632073316e-06, + "loss": 0.0014, + "step": 243730 + }, + { + "epoch": 1.5632726491423294, + "grad_norm": 0.04982461407780647, + "learning_rate": 1.3834981465952535e-06, + "loss": 0.0018, + "step": 243740 + }, + { + "epoch": 1.5633367860361154, + "grad_norm": 0.07150271534919739, + "learning_rate": 1.3831116753005297e-06, + "loss": 0.0015, + "step": 243750 + }, + { + "epoch": 1.5634009229299015, + "grad_norm": 0.047069478780031204, + "learning_rate": 1.3827252493280014e-06, + "loss": 0.0011, + "step": 243760 + }, + { + "epoch": 1.5634650598236877, + "grad_norm": 0.1507633626461029, + "learning_rate": 1.3823388686825112e-06, + "loss": 0.0009, + "step": 243770 + }, + { + "epoch": 1.5635291967174738, + "grad_norm": 0.12099693715572357, + "learning_rate": 1.3819525333688989e-06, + "loss": 0.0011, + "step": 243780 + }, + { + "epoch": 1.56359333361126, + "grad_norm": 0.020730815827846527, + "learning_rate": 1.3815662433920084e-06, + "loss": 0.0012, + "step": 243790 + }, + { + "epoch": 1.563657470505046, + "grad_norm": 0.007649121806025505, + "learning_rate": 1.3811799987566794e-06, + "loss": 0.0022, + "step": 243800 + }, + { + "epoch": 1.563721607398832, + "grad_norm": 0.014868194237351418, + "learning_rate": 1.3807937994677494e-06, + "loss": 0.0009, + "step": 243810 + }, + { + "epoch": 1.5637857442926182, + "grad_norm": 0.15606722235679626, + "learning_rate": 1.3804076455300614e-06, + "loss": 0.0013, + "step": 243820 + }, + { + "epoch": 1.5638498811864041, + "grad_norm": 0.016681639477610588, + "learning_rate": 1.3800215369484521e-06, + "loss": 0.0011, + "step": 243830 + }, + { + "epoch": 1.5639140180801903, + "grad_norm": 0.0037738841492682695, + "learning_rate": 1.3796354737277607e-06, + "loss": 0.0006, + "step": 243840 + }, + { + "epoch": 1.5639781549739764, + "grad_norm": 0.03227373585104942, + "learning_rate": 1.3792494558728226e-06, + "loss": 0.0021, + "step": 243850 + }, + { + "epoch": 1.5640422918677626, + "grad_norm": 0.07239340245723724, + "learning_rate": 1.378863483388478e-06, + "loss": 0.0012, + "step": 243860 + }, + { + "epoch": 1.5641064287615487, + "grad_norm": 0.06314176321029663, + "learning_rate": 1.3784775562795617e-06, + "loss": 0.0011, + "step": 243870 + }, + { + "epoch": 1.5641705656553349, + "grad_norm": 0.06834463030099869, + "learning_rate": 1.3780916745509099e-06, + "loss": 0.0011, + "step": 243880 + }, + { + "epoch": 1.564234702549121, + "grad_norm": 0.039398193359375, + "learning_rate": 1.3777058382073566e-06, + "loss": 0.0004, + "step": 243890 + }, + { + "epoch": 1.564298839442907, + "grad_norm": 0.1559874713420868, + "learning_rate": 1.3773200472537396e-06, + "loss": 0.0013, + "step": 243900 + }, + { + "epoch": 1.564362976336693, + "grad_norm": 0.028561554849147797, + "learning_rate": 1.3769343016948911e-06, + "loss": 0.0008, + "step": 243910 + }, + { + "epoch": 1.564427113230479, + "grad_norm": 0.042328860610723495, + "learning_rate": 1.3765486015356455e-06, + "loss": 0.0008, + "step": 243920 + }, + { + "epoch": 1.5644912501242652, + "grad_norm": 0.08399327844381332, + "learning_rate": 1.376162946780834e-06, + "loss": 0.0008, + "step": 243930 + }, + { + "epoch": 1.5645553870180513, + "grad_norm": 0.08977387845516205, + "learning_rate": 1.3757773374352918e-06, + "loss": 0.0006, + "step": 243940 + }, + { + "epoch": 1.5646195239118375, + "grad_norm": 0.005761916283518076, + "learning_rate": 1.3753917735038503e-06, + "loss": 0.0015, + "step": 243950 + }, + { + "epoch": 1.5646836608056236, + "grad_norm": 0.00901766400784254, + "learning_rate": 1.3750062549913383e-06, + "loss": 0.0018, + "step": 243960 + }, + { + "epoch": 1.5647477976994097, + "grad_norm": 0.05700463801622391, + "learning_rate": 1.374620781902591e-06, + "loss": 0.0016, + "step": 243970 + }, + { + "epoch": 1.5648119345931957, + "grad_norm": 0.11297713965177536, + "learning_rate": 1.374235354242436e-06, + "loss": 0.0015, + "step": 243980 + }, + { + "epoch": 1.5648760714869818, + "grad_norm": 0.03825245797634125, + "learning_rate": 1.3738499720157028e-06, + "loss": 0.0005, + "step": 243990 + }, + { + "epoch": 1.5649402083807677, + "grad_norm": 0.11012471467256546, + "learning_rate": 1.3734646352272202e-06, + "loss": 0.0018, + "step": 244000 + }, + { + "epoch": 1.5649402083807677, + "eval_loss": 0.0021118263248354197, + "eval_runtime": 3.3204, + "eval_samples_per_second": 60.233, + "eval_steps_per_second": 15.058, + "step": 244000 + }, + { + "epoch": 1.565004345274554, + "grad_norm": 0.051630180329084396, + "learning_rate": 1.3730793438818184e-06, + "loss": 0.0011, + "step": 244010 + }, + { + "epoch": 1.56506848216834, + "grad_norm": 0.1360810250043869, + "learning_rate": 1.372694097984325e-06, + "loss": 0.0008, + "step": 244020 + }, + { + "epoch": 1.5651326190621262, + "grad_norm": 0.027779752388596535, + "learning_rate": 1.3723088975395671e-06, + "loss": 0.0006, + "step": 244030 + }, + { + "epoch": 1.5651967559559123, + "grad_norm": 0.01636786386370659, + "learning_rate": 1.3719237425523695e-06, + "loss": 0.0009, + "step": 244040 + }, + { + "epoch": 1.5652608928496985, + "grad_norm": 0.017599863931536674, + "learning_rate": 1.371538633027562e-06, + "loss": 0.0008, + "step": 244050 + }, + { + "epoch": 1.5653250297434846, + "grad_norm": 0.08058811724185944, + "learning_rate": 1.3711535689699684e-06, + "loss": 0.0011, + "step": 244060 + }, + { + "epoch": 1.5653891666372706, + "grad_norm": 0.17835091054439545, + "learning_rate": 1.3707685503844142e-06, + "loss": 0.0023, + "step": 244070 + }, + { + "epoch": 1.5654533035310567, + "grad_norm": 0.05642031878232956, + "learning_rate": 1.3703835772757229e-06, + "loss": 0.0014, + "step": 244080 + }, + { + "epoch": 1.5655174404248426, + "grad_norm": 0.03311365097761154, + "learning_rate": 1.3699986496487206e-06, + "loss": 0.0011, + "step": 244090 + }, + { + "epoch": 1.5655815773186288, + "grad_norm": 0.004921520594507456, + "learning_rate": 1.369613767508229e-06, + "loss": 0.0013, + "step": 244100 + }, + { + "epoch": 1.565645714212415, + "grad_norm": 0.024157697334885597, + "learning_rate": 1.3692289308590706e-06, + "loss": 0.0014, + "step": 244110 + }, + { + "epoch": 1.565709851106201, + "grad_norm": 0.003105662763118744, + "learning_rate": 1.3688441397060698e-06, + "loss": 0.0012, + "step": 244120 + }, + { + "epoch": 1.5657739879999872, + "grad_norm": 0.3345547020435333, + "learning_rate": 1.3684593940540468e-06, + "loss": 0.0008, + "step": 244130 + }, + { + "epoch": 1.5658381248937734, + "grad_norm": 0.010019885376095772, + "learning_rate": 1.3680746939078237e-06, + "loss": 0.0017, + "step": 244140 + }, + { + "epoch": 1.5659022617875593, + "grad_norm": 0.05200977995991707, + "learning_rate": 1.3676900392722186e-06, + "loss": 0.0014, + "step": 244150 + }, + { + "epoch": 1.5659663986813455, + "grad_norm": 0.014318738132715225, + "learning_rate": 1.3673054301520545e-06, + "loss": 0.0008, + "step": 244160 + }, + { + "epoch": 1.5660305355751316, + "grad_norm": 0.10073602199554443, + "learning_rate": 1.36692086655215e-06, + "loss": 0.0006, + "step": 244170 + }, + { + "epoch": 1.5660946724689175, + "grad_norm": 0.04899398609995842, + "learning_rate": 1.3665363484773237e-06, + "loss": 0.0009, + "step": 244180 + }, + { + "epoch": 1.5661588093627037, + "grad_norm": 0.03722739592194557, + "learning_rate": 1.3661518759323916e-06, + "loss": 0.0009, + "step": 244190 + }, + { + "epoch": 1.5662229462564898, + "grad_norm": 0.14708061516284943, + "learning_rate": 1.3657674489221756e-06, + "loss": 0.001, + "step": 244200 + }, + { + "epoch": 1.566287083150276, + "grad_norm": 0.16129310429096222, + "learning_rate": 1.3653830674514906e-06, + "loss": 0.0011, + "step": 244210 + }, + { + "epoch": 1.5663512200440621, + "grad_norm": 0.04316999763250351, + "learning_rate": 1.3649987315251534e-06, + "loss": 0.0007, + "step": 244220 + }, + { + "epoch": 1.5664153569378483, + "grad_norm": 0.6100648641586304, + "learning_rate": 1.3646144411479784e-06, + "loss": 0.0018, + "step": 244230 + }, + { + "epoch": 1.5664794938316342, + "grad_norm": 0.07713521271944046, + "learning_rate": 1.3642301963247845e-06, + "loss": 0.0007, + "step": 244240 + }, + { + "epoch": 1.5665436307254204, + "grad_norm": 0.014485377818346024, + "learning_rate": 1.363845997060384e-06, + "loss": 0.0008, + "step": 244250 + }, + { + "epoch": 1.5666077676192063, + "grad_norm": 0.05376194417476654, + "learning_rate": 1.3634618433595908e-06, + "loss": 0.0047, + "step": 244260 + }, + { + "epoch": 1.5666719045129924, + "grad_norm": 0.018168184906244278, + "learning_rate": 1.3630777352272212e-06, + "loss": 0.0008, + "step": 244270 + }, + { + "epoch": 1.5667360414067786, + "grad_norm": 0.05175149813294411, + "learning_rate": 1.3626936726680867e-06, + "loss": 0.0009, + "step": 244280 + }, + { + "epoch": 1.5668001783005647, + "grad_norm": 0.0020207969937473536, + "learning_rate": 1.3623096556870003e-06, + "loss": 0.0009, + "step": 244290 + }, + { + "epoch": 1.5668643151943509, + "grad_norm": 0.11341476440429688, + "learning_rate": 1.3619256842887724e-06, + "loss": 0.0005, + "step": 244300 + }, + { + "epoch": 1.566928452088137, + "grad_norm": 0.0669422447681427, + "learning_rate": 1.361541758478217e-06, + "loss": 0.0006, + "step": 244310 + }, + { + "epoch": 1.5669925889819232, + "grad_norm": 0.12491405010223389, + "learning_rate": 1.3611578782601436e-06, + "loss": 0.001, + "step": 244320 + }, + { + "epoch": 1.567056725875709, + "grad_norm": 0.21512703597545624, + "learning_rate": 1.3607740436393624e-06, + "loss": 0.0007, + "step": 244330 + }, + { + "epoch": 1.5671208627694952, + "grad_norm": 0.0756843164563179, + "learning_rate": 1.3603902546206826e-06, + "loss": 0.0004, + "step": 244340 + }, + { + "epoch": 1.5671849996632812, + "grad_norm": 0.01778343692421913, + "learning_rate": 1.360006511208915e-06, + "loss": 0.0012, + "step": 244350 + }, + { + "epoch": 1.5672491365570673, + "grad_norm": 0.03670975938439369, + "learning_rate": 1.3596228134088657e-06, + "loss": 0.0011, + "step": 244360 + }, + { + "epoch": 1.5673132734508535, + "grad_norm": 0.13029594719409943, + "learning_rate": 1.3592391612253465e-06, + "loss": 0.0018, + "step": 244370 + }, + { + "epoch": 1.5673774103446396, + "grad_norm": 0.04360777512192726, + "learning_rate": 1.3588555546631605e-06, + "loss": 0.0015, + "step": 244380 + }, + { + "epoch": 1.5674415472384258, + "grad_norm": 0.03885827213525772, + "learning_rate": 1.3584719937271184e-06, + "loss": 0.0006, + "step": 244390 + }, + { + "epoch": 1.567505684132212, + "grad_norm": 0.07572092860937119, + "learning_rate": 1.3580884784220243e-06, + "loss": 0.0025, + "step": 244400 + }, + { + "epoch": 1.5675698210259978, + "grad_norm": 0.013776548206806183, + "learning_rate": 1.357705008752685e-06, + "loss": 0.0005, + "step": 244410 + }, + { + "epoch": 1.567633957919784, + "grad_norm": 0.09575240314006805, + "learning_rate": 1.3573215847239034e-06, + "loss": 0.0011, + "step": 244420 + }, + { + "epoch": 1.56769809481357, + "grad_norm": 0.05258919671177864, + "learning_rate": 1.3569382063404873e-06, + "loss": 0.0008, + "step": 244430 + }, + { + "epoch": 1.567762231707356, + "grad_norm": 0.04056648537516594, + "learning_rate": 1.3565548736072394e-06, + "loss": 0.0007, + "step": 244440 + }, + { + "epoch": 1.5678263686011422, + "grad_norm": 0.017687644809484482, + "learning_rate": 1.356171586528962e-06, + "loss": 0.001, + "step": 244450 + }, + { + "epoch": 1.5678905054949284, + "grad_norm": 0.08572002500295639, + "learning_rate": 1.3557883451104581e-06, + "loss": 0.001, + "step": 244460 + }, + { + "epoch": 1.5679546423887145, + "grad_norm": 0.028661692515015602, + "learning_rate": 1.3554051493565317e-06, + "loss": 0.0024, + "step": 244470 + }, + { + "epoch": 1.5680187792825007, + "grad_norm": 0.10158579796552658, + "learning_rate": 1.3550219992719838e-06, + "loss": 0.0015, + "step": 244480 + }, + { + "epoch": 1.5680829161762868, + "grad_norm": 0.012906504794955254, + "learning_rate": 1.3546388948616152e-06, + "loss": 0.001, + "step": 244490 + }, + { + "epoch": 1.5681470530700727, + "grad_norm": 0.07433255761861801, + "learning_rate": 1.354255836130225e-06, + "loss": 0.002, + "step": 244500 + }, + { + "epoch": 1.5682111899638589, + "grad_norm": 0.06329713761806488, + "learning_rate": 1.3538728230826165e-06, + "loss": 0.0006, + "step": 244510 + }, + { + "epoch": 1.5682753268576448, + "grad_norm": 0.16259931027889252, + "learning_rate": 1.353489855723587e-06, + "loss": 0.0009, + "step": 244520 + }, + { + "epoch": 1.568339463751431, + "grad_norm": 0.017799677327275276, + "learning_rate": 1.3531069340579345e-06, + "loss": 0.0021, + "step": 244530 + }, + { + "epoch": 1.568403600645217, + "grad_norm": 0.16020628809928894, + "learning_rate": 1.35272405809046e-06, + "loss": 0.0012, + "step": 244540 + }, + { + "epoch": 1.5684677375390033, + "grad_norm": 0.0366208590567112, + "learning_rate": 1.3523412278259595e-06, + "loss": 0.0013, + "step": 244550 + }, + { + "epoch": 1.5685318744327894, + "grad_norm": 0.11714744567871094, + "learning_rate": 1.3519584432692305e-06, + "loss": 0.0016, + "step": 244560 + }, + { + "epoch": 1.5685960113265756, + "grad_norm": 0.026635903865098953, + "learning_rate": 1.3515757044250683e-06, + "loss": 0.003, + "step": 244570 + }, + { + "epoch": 1.5686601482203617, + "grad_norm": 0.012604729272425175, + "learning_rate": 1.3511930112982713e-06, + "loss": 0.0006, + "step": 244580 + }, + { + "epoch": 1.5687242851141476, + "grad_norm": 0.12273949384689331, + "learning_rate": 1.3508103638936332e-06, + "loss": 0.0012, + "step": 244590 + }, + { + "epoch": 1.5687884220079338, + "grad_norm": 0.1338241696357727, + "learning_rate": 1.3504277622159495e-06, + "loss": 0.002, + "step": 244600 + }, + { + "epoch": 1.5688525589017197, + "grad_norm": 0.02673218958079815, + "learning_rate": 1.3500452062700132e-06, + "loss": 0.001, + "step": 244610 + }, + { + "epoch": 1.5689166957955059, + "grad_norm": 0.09174564480781555, + "learning_rate": 1.3496626960606202e-06, + "loss": 0.0014, + "step": 244620 + }, + { + "epoch": 1.568980832689292, + "grad_norm": 0.069851815700531, + "learning_rate": 1.3492802315925623e-06, + "loss": 0.0026, + "step": 244630 + }, + { + "epoch": 1.5690449695830782, + "grad_norm": 0.12030567228794098, + "learning_rate": 1.3488978128706309e-06, + "loss": 0.0012, + "step": 244640 + }, + { + "epoch": 1.5691091064768643, + "grad_norm": 0.1622411608695984, + "learning_rate": 1.3485154398996209e-06, + "loss": 0.0026, + "step": 244650 + }, + { + "epoch": 1.5691732433706505, + "grad_norm": 0.010083461180329323, + "learning_rate": 1.3481331126843216e-06, + "loss": 0.0008, + "step": 244660 + }, + { + "epoch": 1.5692373802644364, + "grad_norm": 0.14678412675857544, + "learning_rate": 1.347750831229525e-06, + "loss": 0.0014, + "step": 244670 + }, + { + "epoch": 1.5693015171582225, + "grad_norm": 0.07875506579875946, + "learning_rate": 1.347368595540019e-06, + "loss": 0.0021, + "step": 244680 + }, + { + "epoch": 1.5693656540520085, + "grad_norm": 0.03652176260948181, + "learning_rate": 1.3469864056205962e-06, + "loss": 0.0005, + "step": 244690 + }, + { + "epoch": 1.5694297909457946, + "grad_norm": 0.14352712035179138, + "learning_rate": 1.3466042614760449e-06, + "loss": 0.0022, + "step": 244700 + }, + { + "epoch": 1.5694939278395807, + "grad_norm": 0.40054547786712646, + "learning_rate": 1.3462221631111533e-06, + "loss": 0.0047, + "step": 244710 + }, + { + "epoch": 1.569558064733367, + "grad_norm": 0.08101187646389008, + "learning_rate": 1.3458401105307073e-06, + "loss": 0.0009, + "step": 244720 + }, + { + "epoch": 1.569622201627153, + "grad_norm": 0.1281840205192566, + "learning_rate": 1.3454581037394981e-06, + "loss": 0.0007, + "step": 244730 + }, + { + "epoch": 1.5696863385209392, + "grad_norm": 0.05733279883861542, + "learning_rate": 1.345076142742311e-06, + "loss": 0.0011, + "step": 244740 + }, + { + "epoch": 1.5697504754147253, + "grad_norm": 0.36465564370155334, + "learning_rate": 1.3446942275439317e-06, + "loss": 0.0047, + "step": 244750 + }, + { + "epoch": 1.5698146123085113, + "grad_norm": 0.0006671261508017778, + "learning_rate": 1.3443123581491446e-06, + "loss": 0.001, + "step": 244760 + }, + { + "epoch": 1.5698787492022974, + "grad_norm": 0.05007866397500038, + "learning_rate": 1.343930534562738e-06, + "loss": 0.0011, + "step": 244770 + }, + { + "epoch": 1.5699428860960833, + "grad_norm": 0.05445602163672447, + "learning_rate": 1.3435487567894944e-06, + "loss": 0.0012, + "step": 244780 + }, + { + "epoch": 1.5700070229898695, + "grad_norm": 0.17970049381256104, + "learning_rate": 1.3431670248341965e-06, + "loss": 0.0011, + "step": 244790 + }, + { + "epoch": 1.5700711598836556, + "grad_norm": 0.0806487500667572, + "learning_rate": 1.3427853387016315e-06, + "loss": 0.001, + "step": 244800 + }, + { + "epoch": 1.5701352967774418, + "grad_norm": 0.11729707568883896, + "learning_rate": 1.3424036983965793e-06, + "loss": 0.0014, + "step": 244810 + }, + { + "epoch": 1.570199433671228, + "grad_norm": 0.08794651180505753, + "learning_rate": 1.3420221039238235e-06, + "loss": 0.0015, + "step": 244820 + }, + { + "epoch": 1.570263570565014, + "grad_norm": 0.09539645165205002, + "learning_rate": 1.3416405552881429e-06, + "loss": 0.002, + "step": 244830 + }, + { + "epoch": 1.5703277074588, + "grad_norm": 0.08823912590742111, + "learning_rate": 1.3412590524943226e-06, + "loss": 0.0014, + "step": 244840 + }, + { + "epoch": 1.5703918443525862, + "grad_norm": 0.11009037494659424, + "learning_rate": 1.340877595547141e-06, + "loss": 0.0013, + "step": 244850 + }, + { + "epoch": 1.5704559812463723, + "grad_norm": 0.09003601223230362, + "learning_rate": 1.3404961844513786e-06, + "loss": 0.0014, + "step": 244860 + }, + { + "epoch": 1.5705201181401582, + "grad_norm": 0.007920468226075172, + "learning_rate": 1.3401148192118124e-06, + "loss": 0.0013, + "step": 244870 + }, + { + "epoch": 1.5705842550339444, + "grad_norm": 0.004617893602699041, + "learning_rate": 1.3397334998332252e-06, + "loss": 0.0006, + "step": 244880 + }, + { + "epoch": 1.5706483919277305, + "grad_norm": 0.10888317227363586, + "learning_rate": 1.339352226320393e-06, + "loss": 0.0012, + "step": 244890 + }, + { + "epoch": 1.5707125288215167, + "grad_norm": 0.0815168246626854, + "learning_rate": 1.3389709986780935e-06, + "loss": 0.0012, + "step": 244900 + }, + { + "epoch": 1.5707766657153028, + "grad_norm": 0.09804923832416534, + "learning_rate": 1.3385898169111028e-06, + "loss": 0.0017, + "step": 244910 + }, + { + "epoch": 1.570840802609089, + "grad_norm": 0.016148488968610764, + "learning_rate": 1.3382086810241995e-06, + "loss": 0.0006, + "step": 244920 + }, + { + "epoch": 1.570904939502875, + "grad_norm": 0.02452525682747364, + "learning_rate": 1.337827591022159e-06, + "loss": 0.0024, + "step": 244930 + }, + { + "epoch": 1.570969076396661, + "grad_norm": 0.10429196804761887, + "learning_rate": 1.3374465469097536e-06, + "loss": 0.0015, + "step": 244940 + }, + { + "epoch": 1.571033213290447, + "grad_norm": 0.01082930900156498, + "learning_rate": 1.3370655486917627e-06, + "loss": 0.0016, + "step": 244950 + }, + { + "epoch": 1.5710973501842331, + "grad_norm": 0.030714496970176697, + "learning_rate": 1.3366845963729586e-06, + "loss": 0.0006, + "step": 244960 + }, + { + "epoch": 1.5711614870780193, + "grad_norm": 0.13202424347400665, + "learning_rate": 1.3363036899581145e-06, + "loss": 0.0016, + "step": 244970 + }, + { + "epoch": 1.5712256239718054, + "grad_norm": 0.05811617895960808, + "learning_rate": 1.3359228294520017e-06, + "loss": 0.0008, + "step": 244980 + }, + { + "epoch": 1.5712897608655916, + "grad_norm": 0.014790991321206093, + "learning_rate": 1.3355420148593961e-06, + "loss": 0.0016, + "step": 244990 + }, + { + "epoch": 1.5713538977593777, + "grad_norm": 0.12423159182071686, + "learning_rate": 1.3351612461850682e-06, + "loss": 0.0009, + "step": 245000 + }, + { + "epoch": 1.5714180346531639, + "grad_norm": 0.03937933221459389, + "learning_rate": 1.334780523433789e-06, + "loss": 0.0006, + "step": 245010 + }, + { + "epoch": 1.5714821715469498, + "grad_norm": 0.13783757388591766, + "learning_rate": 1.334399846610328e-06, + "loss": 0.0017, + "step": 245020 + }, + { + "epoch": 1.571546308440736, + "grad_norm": 0.03599138185381889, + "learning_rate": 1.3340192157194581e-06, + "loss": 0.0012, + "step": 245030 + }, + { + "epoch": 1.5716104453345219, + "grad_norm": 0.30758318305015564, + "learning_rate": 1.3336386307659477e-06, + "loss": 0.0015, + "step": 245040 + }, + { + "epoch": 1.571674582228308, + "grad_norm": 0.01626293547451496, + "learning_rate": 1.333258091754565e-06, + "loss": 0.0016, + "step": 245050 + }, + { + "epoch": 1.5717387191220942, + "grad_norm": 0.13304047286510468, + "learning_rate": 1.3328775986900782e-06, + "loss": 0.0007, + "step": 245060 + }, + { + "epoch": 1.5718028560158803, + "grad_norm": 0.034299153834581375, + "learning_rate": 1.3324971515772566e-06, + "loss": 0.0009, + "step": 245070 + }, + { + "epoch": 1.5718669929096665, + "grad_norm": 0.05086997151374817, + "learning_rate": 1.3321167504208677e-06, + "loss": 0.0004, + "step": 245080 + }, + { + "epoch": 1.5719311298034526, + "grad_norm": 0.10344081372022629, + "learning_rate": 1.331736395225675e-06, + "loss": 0.0006, + "step": 245090 + }, + { + "epoch": 1.5719952666972385, + "grad_norm": 0.21350620687007904, + "learning_rate": 1.3313560859964493e-06, + "loss": 0.0017, + "step": 245100 + }, + { + "epoch": 1.5720594035910247, + "grad_norm": 0.18175390362739563, + "learning_rate": 1.3309758227379533e-06, + "loss": 0.0012, + "step": 245110 + }, + { + "epoch": 1.5721235404848106, + "grad_norm": 0.06544242054224014, + "learning_rate": 1.3305956054549508e-06, + "loss": 0.0008, + "step": 245120 + }, + { + "epoch": 1.5721876773785968, + "grad_norm": 0.0935523584485054, + "learning_rate": 1.3302154341522093e-06, + "loss": 0.0005, + "step": 245130 + }, + { + "epoch": 1.572251814272383, + "grad_norm": 0.11086033284664154, + "learning_rate": 1.32983530883449e-06, + "loss": 0.0016, + "step": 245140 + }, + { + "epoch": 1.572315951166169, + "grad_norm": 0.038969676941633224, + "learning_rate": 1.3294552295065588e-06, + "loss": 0.0019, + "step": 245150 + }, + { + "epoch": 1.5723800880599552, + "grad_norm": 0.02033829689025879, + "learning_rate": 1.3290751961731769e-06, + "loss": 0.0021, + "step": 245160 + }, + { + "epoch": 1.5724442249537414, + "grad_norm": 0.20166823267936707, + "learning_rate": 1.3286952088391058e-06, + "loss": 0.0023, + "step": 245170 + }, + { + "epoch": 1.5725083618475275, + "grad_norm": 0.05726677551865578, + "learning_rate": 1.3283152675091065e-06, + "loss": 0.0008, + "step": 245180 + }, + { + "epoch": 1.5725724987413134, + "grad_norm": 0.05271828919649124, + "learning_rate": 1.3279353721879429e-06, + "loss": 0.0008, + "step": 245190 + }, + { + "epoch": 1.5726366356350996, + "grad_norm": 0.3088632822036743, + "learning_rate": 1.3275555228803726e-06, + "loss": 0.002, + "step": 245200 + }, + { + "epoch": 1.5727007725288855, + "grad_norm": 0.08474955707788467, + "learning_rate": 1.3271757195911556e-06, + "loss": 0.0006, + "step": 245210 + }, + { + "epoch": 1.5727649094226717, + "grad_norm": 0.07594852894544601, + "learning_rate": 1.3267959623250526e-06, + "loss": 0.0053, + "step": 245220 + }, + { + "epoch": 1.5728290463164578, + "grad_norm": 0.03569547086954117, + "learning_rate": 1.3264162510868216e-06, + "loss": 0.0004, + "step": 245230 + }, + { + "epoch": 1.572893183210244, + "grad_norm": 0.008282431401312351, + "learning_rate": 1.3260365858812208e-06, + "loss": 0.0008, + "step": 245240 + }, + { + "epoch": 1.57295732010403, + "grad_norm": 0.0521833673119545, + "learning_rate": 1.3256569667130053e-06, + "loss": 0.0012, + "step": 245250 + }, + { + "epoch": 1.5730214569978163, + "grad_norm": 0.14588910341262817, + "learning_rate": 1.3252773935869357e-06, + "loss": 0.0019, + "step": 245260 + }, + { + "epoch": 1.5730855938916022, + "grad_norm": 0.11893438547849655, + "learning_rate": 1.3248978665077667e-06, + "loss": 0.0008, + "step": 245270 + }, + { + "epoch": 1.5731497307853883, + "grad_norm": 0.04997969791293144, + "learning_rate": 1.324518385480254e-06, + "loss": 0.0009, + "step": 245280 + }, + { + "epoch": 1.5732138676791745, + "grad_norm": 0.10066279768943787, + "learning_rate": 1.3241389505091513e-06, + "loss": 0.0019, + "step": 245290 + }, + { + "epoch": 1.5732780045729604, + "grad_norm": 0.13450700044631958, + "learning_rate": 1.323759561599216e-06, + "loss": 0.0014, + "step": 245300 + }, + { + "epoch": 1.5733421414667466, + "grad_norm": 0.11641976237297058, + "learning_rate": 1.3233802187552007e-06, + "loss": 0.0023, + "step": 245310 + }, + { + "epoch": 1.5734062783605327, + "grad_norm": 0.042439937591552734, + "learning_rate": 1.3230009219818596e-06, + "loss": 0.001, + "step": 245320 + }, + { + "epoch": 1.5734704152543189, + "grad_norm": 0.027549132704734802, + "learning_rate": 1.322621671283943e-06, + "loss": 0.0012, + "step": 245330 + }, + { + "epoch": 1.573534552148105, + "grad_norm": 0.03462856262922287, + "learning_rate": 1.322242466666206e-06, + "loss": 0.001, + "step": 245340 + }, + { + "epoch": 1.5735986890418912, + "grad_norm": 0.0055800797417759895, + "learning_rate": 1.3218633081333997e-06, + "loss": 0.0023, + "step": 245350 + }, + { + "epoch": 1.573662825935677, + "grad_norm": 0.05559716001152992, + "learning_rate": 1.3214841956902735e-06, + "loss": 0.0011, + "step": 245360 + }, + { + "epoch": 1.5737269628294632, + "grad_norm": 0.4655143618583679, + "learning_rate": 1.3211051293415805e-06, + "loss": 0.0014, + "step": 245370 + }, + { + "epoch": 1.5737910997232492, + "grad_norm": 0.08998212218284607, + "learning_rate": 1.3207261090920698e-06, + "loss": 0.0012, + "step": 245380 + }, + { + "epoch": 1.5738552366170353, + "grad_norm": 0.02035832405090332, + "learning_rate": 1.3203471349464903e-06, + "loss": 0.0014, + "step": 245390 + }, + { + "epoch": 1.5739193735108215, + "grad_norm": 0.14554955065250397, + "learning_rate": 1.3199682069095899e-06, + "loss": 0.0013, + "step": 245400 + }, + { + "epoch": 1.5739835104046076, + "grad_norm": 0.20792771875858307, + "learning_rate": 1.3195893249861192e-06, + "loss": 0.0009, + "step": 245410 + }, + { + "epoch": 1.5740476472983937, + "grad_norm": 0.15382707118988037, + "learning_rate": 1.3192104891808244e-06, + "loss": 0.0012, + "step": 245420 + }, + { + "epoch": 1.57411178419218, + "grad_norm": 0.01003416907042265, + "learning_rate": 1.318831699498453e-06, + "loss": 0.0003, + "step": 245430 + }, + { + "epoch": 1.574175921085966, + "grad_norm": 0.06154649332165718, + "learning_rate": 1.3184529559437498e-06, + "loss": 0.0019, + "step": 245440 + }, + { + "epoch": 1.574240057979752, + "grad_norm": 0.23963524401187897, + "learning_rate": 1.3180742585214635e-06, + "loss": 0.0015, + "step": 245450 + }, + { + "epoch": 1.5743041948735381, + "grad_norm": 0.18416304886341095, + "learning_rate": 1.3176956072363384e-06, + "loss": 0.0014, + "step": 245460 + }, + { + "epoch": 1.574368331767324, + "grad_norm": 0.05095000937581062, + "learning_rate": 1.3173170020931187e-06, + "loss": 0.0006, + "step": 245470 + }, + { + "epoch": 1.5744324686611102, + "grad_norm": 0.03930210322141647, + "learning_rate": 1.3169384430965483e-06, + "loss": 0.0013, + "step": 245480 + }, + { + "epoch": 1.5744966055548963, + "grad_norm": 0.11303441971540451, + "learning_rate": 1.316559930251372e-06, + "loss": 0.0011, + "step": 245490 + }, + { + "epoch": 1.5745607424486825, + "grad_norm": 0.10995394736528397, + "learning_rate": 1.3161814635623327e-06, + "loss": 0.0009, + "step": 245500 + }, + { + "epoch": 1.5746248793424686, + "grad_norm": 0.0212956964969635, + "learning_rate": 1.3158030430341707e-06, + "loss": 0.0006, + "step": 245510 + }, + { + "epoch": 1.5746890162362548, + "grad_norm": 0.004086362197995186, + "learning_rate": 1.3154246686716315e-06, + "loss": 0.001, + "step": 245520 + }, + { + "epoch": 1.5747531531300407, + "grad_norm": 0.10942432284355164, + "learning_rate": 1.3150463404794544e-06, + "loss": 0.0017, + "step": 245530 + }, + { + "epoch": 1.5748172900238269, + "grad_norm": 0.1038489043712616, + "learning_rate": 1.31466805846238e-06, + "loss": 0.001, + "step": 245540 + }, + { + "epoch": 1.5748814269176128, + "grad_norm": 0.06717261672019958, + "learning_rate": 1.3142898226251478e-06, + "loss": 0.0012, + "step": 245550 + }, + { + "epoch": 1.574945563811399, + "grad_norm": 0.10273327678442001, + "learning_rate": 1.3139116329724994e-06, + "loss": 0.0011, + "step": 245560 + }, + { + "epoch": 1.575009700705185, + "grad_norm": 0.374106228351593, + "learning_rate": 1.3135334895091723e-06, + "loss": 0.0018, + "step": 245570 + }, + { + "epoch": 1.5750738375989712, + "grad_norm": 0.13840097188949585, + "learning_rate": 1.3131553922399053e-06, + "loss": 0.001, + "step": 245580 + }, + { + "epoch": 1.5751379744927574, + "grad_norm": 0.030233973637223244, + "learning_rate": 1.3127773411694356e-06, + "loss": 0.001, + "step": 245590 + }, + { + "epoch": 1.5752021113865435, + "grad_norm": 0.06467781215906143, + "learning_rate": 1.3123993363025018e-06, + "loss": 0.001, + "step": 245600 + }, + { + "epoch": 1.5752662482803297, + "grad_norm": 0.08963991701602936, + "learning_rate": 1.3120213776438395e-06, + "loss": 0.0016, + "step": 245610 + }, + { + "epoch": 1.5753303851741156, + "grad_norm": 0.11008745431900024, + "learning_rate": 1.3116434651981857e-06, + "loss": 0.0009, + "step": 245620 + }, + { + "epoch": 1.5753945220679018, + "grad_norm": 0.09108911454677582, + "learning_rate": 1.3112655989702733e-06, + "loss": 0.0012, + "step": 245630 + }, + { + "epoch": 1.5754586589616877, + "grad_norm": 0.10402543097734451, + "learning_rate": 1.3108877789648412e-06, + "loss": 0.0006, + "step": 245640 + }, + { + "epoch": 1.5755227958554738, + "grad_norm": 0.18037079274654388, + "learning_rate": 1.3105100051866214e-06, + "loss": 0.0021, + "step": 245650 + }, + { + "epoch": 1.57558693274926, + "grad_norm": 0.26776322722435, + "learning_rate": 1.3101322776403468e-06, + "loss": 0.002, + "step": 245660 + }, + { + "epoch": 1.5756510696430461, + "grad_norm": 0.11705923825502396, + "learning_rate": 1.309754596330754e-06, + "loss": 0.0017, + "step": 245670 + }, + { + "epoch": 1.5757152065368323, + "grad_norm": 0.08753743767738342, + "learning_rate": 1.3093769612625729e-06, + "loss": 0.001, + "step": 245680 + }, + { + "epoch": 1.5757793434306184, + "grad_norm": 0.05915207788348198, + "learning_rate": 1.3089993724405358e-06, + "loss": 0.0009, + "step": 245690 + }, + { + "epoch": 1.5758434803244044, + "grad_norm": 0.23128096759319305, + "learning_rate": 1.3086218298693732e-06, + "loss": 0.001, + "step": 245700 + }, + { + "epoch": 1.5759076172181905, + "grad_norm": 0.08078275620937347, + "learning_rate": 1.3082443335538185e-06, + "loss": 0.0004, + "step": 245710 + }, + { + "epoch": 1.5759717541119767, + "grad_norm": 0.11938001960515976, + "learning_rate": 1.3078668834986013e-06, + "loss": 0.001, + "step": 245720 + }, + { + "epoch": 1.5760358910057626, + "grad_norm": 0.1649659276008606, + "learning_rate": 1.3074894797084508e-06, + "loss": 0.0015, + "step": 245730 + }, + { + "epoch": 1.5761000278995487, + "grad_norm": 0.12822167575359344, + "learning_rate": 1.3071121221880945e-06, + "loss": 0.0008, + "step": 245740 + }, + { + "epoch": 1.5761641647933349, + "grad_norm": 0.06904137134552002, + "learning_rate": 1.306734810942264e-06, + "loss": 0.0005, + "step": 245750 + }, + { + "epoch": 1.576228301687121, + "grad_norm": 0.010498573072254658, + "learning_rate": 1.3063575459756855e-06, + "loss": 0.0018, + "step": 245760 + }, + { + "epoch": 1.5762924385809072, + "grad_norm": 0.017474694177508354, + "learning_rate": 1.3059803272930871e-06, + "loss": 0.0006, + "step": 245770 + }, + { + "epoch": 1.5763565754746933, + "grad_norm": 0.066599041223526, + "learning_rate": 1.3056031548991937e-06, + "loss": 0.0015, + "step": 245780 + }, + { + "epoch": 1.5764207123684792, + "grad_norm": 0.01642470993101597, + "learning_rate": 1.3052260287987351e-06, + "loss": 0.0011, + "step": 245790 + }, + { + "epoch": 1.5764848492622654, + "grad_norm": 0.09349098056554794, + "learning_rate": 1.3048489489964344e-06, + "loss": 0.0011, + "step": 245800 + }, + { + "epoch": 1.5765489861560513, + "grad_norm": 0.1274600625038147, + "learning_rate": 1.304471915497016e-06, + "loss": 0.0011, + "step": 245810 + }, + { + "epoch": 1.5766131230498375, + "grad_norm": 0.12524327635765076, + "learning_rate": 1.3040949283052069e-06, + "loss": 0.0017, + "step": 245820 + }, + { + "epoch": 1.5766772599436236, + "grad_norm": 0.01571890339255333, + "learning_rate": 1.30371798742573e-06, + "loss": 0.0013, + "step": 245830 + }, + { + "epoch": 1.5767413968374098, + "grad_norm": 0.0890233963727951, + "learning_rate": 1.3033410928633083e-06, + "loss": 0.0015, + "step": 245840 + }, + { + "epoch": 1.576805533731196, + "grad_norm": 0.10049532353878021, + "learning_rate": 1.3029642446226632e-06, + "loss": 0.0009, + "step": 245850 + }, + { + "epoch": 1.576869670624982, + "grad_norm": 0.06716424971818924, + "learning_rate": 1.3025874427085193e-06, + "loss": 0.0005, + "step": 245860 + }, + { + "epoch": 1.5769338075187682, + "grad_norm": 0.04313703626394272, + "learning_rate": 1.302210687125598e-06, + "loss": 0.001, + "step": 245870 + }, + { + "epoch": 1.5769979444125541, + "grad_norm": 0.04454496130347252, + "learning_rate": 1.3018339778786193e-06, + "loss": 0.0008, + "step": 245880 + }, + { + "epoch": 1.5770620813063403, + "grad_norm": 0.28844645619392395, + "learning_rate": 1.3014573149723025e-06, + "loss": 0.0016, + "step": 245890 + }, + { + "epoch": 1.5771262182001262, + "grad_norm": 0.0529690720140934, + "learning_rate": 1.301080698411369e-06, + "loss": 0.0023, + "step": 245900 + }, + { + "epoch": 1.5771903550939124, + "grad_norm": 0.07780619710683823, + "learning_rate": 1.300704128200539e-06, + "loss": 0.0007, + "step": 245910 + }, + { + "epoch": 1.5772544919876985, + "grad_norm": 0.040550705045461655, + "learning_rate": 1.3003276043445307e-06, + "loss": 0.0013, + "step": 245920 + }, + { + "epoch": 1.5773186288814847, + "grad_norm": 0.04916974902153015, + "learning_rate": 1.2999511268480598e-06, + "loss": 0.0007, + "step": 245930 + }, + { + "epoch": 1.5773827657752708, + "grad_norm": 0.006002301815897226, + "learning_rate": 1.2995746957158466e-06, + "loss": 0.001, + "step": 245940 + }, + { + "epoch": 1.577446902669057, + "grad_norm": 0.10196304321289062, + "learning_rate": 1.299198310952608e-06, + "loss": 0.0009, + "step": 245950 + }, + { + "epoch": 1.5775110395628429, + "grad_norm": 0.035125818103551865, + "learning_rate": 1.298821972563059e-06, + "loss": 0.0017, + "step": 245960 + }, + { + "epoch": 1.577575176456629, + "grad_norm": 0.011328734457492828, + "learning_rate": 1.2984456805519146e-06, + "loss": 0.0007, + "step": 245970 + }, + { + "epoch": 1.577639313350415, + "grad_norm": 0.007865257561206818, + "learning_rate": 1.2980694349238925e-06, + "loss": 0.0013, + "step": 245980 + }, + { + "epoch": 1.577703450244201, + "grad_norm": 0.023871131241321564, + "learning_rate": 1.2976932356837064e-06, + "loss": 0.0011, + "step": 245990 + }, + { + "epoch": 1.5777675871379873, + "grad_norm": 0.060003168880939484, + "learning_rate": 1.2973170828360698e-06, + "loss": 0.0008, + "step": 246000 + }, + { + "epoch": 1.5778317240317734, + "grad_norm": 0.04944562539458275, + "learning_rate": 1.2969409763856954e-06, + "loss": 0.0009, + "step": 246010 + }, + { + "epoch": 1.5778958609255596, + "grad_norm": 0.13507193326950073, + "learning_rate": 1.2965649163372985e-06, + "loss": 0.0013, + "step": 246020 + }, + { + "epoch": 1.5779599978193457, + "grad_norm": 0.009508341550827026, + "learning_rate": 1.2961889026955898e-06, + "loss": 0.0008, + "step": 246030 + }, + { + "epoch": 1.5780241347131319, + "grad_norm": 0.15763916075229645, + "learning_rate": 1.2958129354652809e-06, + "loss": 0.0021, + "step": 246040 + }, + { + "epoch": 1.5780882716069178, + "grad_norm": 0.1158578023314476, + "learning_rate": 1.295437014651082e-06, + "loss": 0.003, + "step": 246050 + }, + { + "epoch": 1.578152408500704, + "grad_norm": 0.08165355771780014, + "learning_rate": 1.2950611402577068e-06, + "loss": 0.002, + "step": 246060 + }, + { + "epoch": 1.5782165453944899, + "grad_norm": 0.11118276417255402, + "learning_rate": 1.2946853122898629e-06, + "loss": 0.0008, + "step": 246070 + }, + { + "epoch": 1.578280682288276, + "grad_norm": 0.025926100090146065, + "learning_rate": 1.2943095307522592e-06, + "loss": 0.0012, + "step": 246080 + }, + { + "epoch": 1.5783448191820622, + "grad_norm": 0.0540606826543808, + "learning_rate": 1.2939337956496062e-06, + "loss": 0.0008, + "step": 246090 + }, + { + "epoch": 1.5784089560758483, + "grad_norm": 0.07775036990642548, + "learning_rate": 1.2935581069866122e-06, + "loss": 0.0028, + "step": 246100 + }, + { + "epoch": 1.5784730929696345, + "grad_norm": 0.09212201088666916, + "learning_rate": 1.2931824647679837e-06, + "loss": 0.0008, + "step": 246110 + }, + { + "epoch": 1.5785372298634206, + "grad_norm": 0.013664871454238892, + "learning_rate": 1.2928068689984268e-06, + "loss": 0.0016, + "step": 246120 + }, + { + "epoch": 1.5786013667572067, + "grad_norm": 0.04631384462118149, + "learning_rate": 1.2924313196826504e-06, + "loss": 0.0007, + "step": 246130 + }, + { + "epoch": 1.5786655036509927, + "grad_norm": 0.08882184326648712, + "learning_rate": 1.29205581682536e-06, + "loss": 0.0011, + "step": 246140 + }, + { + "epoch": 1.5787296405447788, + "grad_norm": 0.016570575535297394, + "learning_rate": 1.29168036043126e-06, + "loss": 0.0008, + "step": 246150 + }, + { + "epoch": 1.5787937774385647, + "grad_norm": 0.11887503415346146, + "learning_rate": 1.2913049505050535e-06, + "loss": 0.0006, + "step": 246160 + }, + { + "epoch": 1.578857914332351, + "grad_norm": 0.08305158466100693, + "learning_rate": 1.2909295870514482e-06, + "loss": 0.0012, + "step": 246170 + }, + { + "epoch": 1.578922051226137, + "grad_norm": 0.0012892017839476466, + "learning_rate": 1.2905542700751461e-06, + "loss": 0.0005, + "step": 246180 + }, + { + "epoch": 1.5789861881199232, + "grad_norm": 0.005535861011594534, + "learning_rate": 1.2901789995808506e-06, + "loss": 0.0005, + "step": 246190 + }, + { + "epoch": 1.5790503250137093, + "grad_norm": 0.020395416766405106, + "learning_rate": 1.2898037755732613e-06, + "loss": 0.0087, + "step": 246200 + }, + { + "epoch": 1.5791144619074955, + "grad_norm": 0.0902070477604866, + "learning_rate": 1.2894285980570842e-06, + "loss": 0.0016, + "step": 246210 + }, + { + "epoch": 1.5791785988012814, + "grad_norm": 0.06517552584409714, + "learning_rate": 1.2890534670370187e-06, + "loss": 0.0015, + "step": 246220 + }, + { + "epoch": 1.5792427356950676, + "grad_norm": 0.05345199629664421, + "learning_rate": 1.288678382517764e-06, + "loss": 0.0011, + "step": 246230 + }, + { + "epoch": 1.5793068725888535, + "grad_norm": 0.07760341465473175, + "learning_rate": 1.2883033445040228e-06, + "loss": 0.0009, + "step": 246240 + }, + { + "epoch": 1.5793710094826396, + "grad_norm": 0.1683817356824875, + "learning_rate": 1.2879283530004932e-06, + "loss": 0.0009, + "step": 246250 + }, + { + "epoch": 1.5794351463764258, + "grad_norm": 0.0635901466012001, + "learning_rate": 1.2875534080118751e-06, + "loss": 0.0012, + "step": 246260 + }, + { + "epoch": 1.579499283270212, + "grad_norm": 0.03265683725476265, + "learning_rate": 1.287178509542864e-06, + "loss": 0.0019, + "step": 246270 + }, + { + "epoch": 1.579563420163998, + "grad_norm": 0.08981554955244064, + "learning_rate": 1.2868036575981613e-06, + "loss": 0.0011, + "step": 246280 + }, + { + "epoch": 1.5796275570577842, + "grad_norm": 0.052812643349170685, + "learning_rate": 1.2864288521824625e-06, + "loss": 0.0022, + "step": 246290 + }, + { + "epoch": 1.5796916939515704, + "grad_norm": 0.030123114585876465, + "learning_rate": 1.2860540933004645e-06, + "loss": 0.0009, + "step": 246300 + }, + { + "epoch": 1.5797558308453563, + "grad_norm": 0.027613233774900436, + "learning_rate": 1.2856793809568613e-06, + "loss": 0.0012, + "step": 246310 + }, + { + "epoch": 1.5798199677391425, + "grad_norm": 0.07647305727005005, + "learning_rate": 1.2853047151563513e-06, + "loss": 0.0013, + "step": 246320 + }, + { + "epoch": 1.5798841046329284, + "grad_norm": 0.09239564836025238, + "learning_rate": 1.2849300959036287e-06, + "loss": 0.0009, + "step": 246330 + }, + { + "epoch": 1.5799482415267145, + "grad_norm": 0.05426836013793945, + "learning_rate": 1.2845555232033852e-06, + "loss": 0.0009, + "step": 246340 + }, + { + "epoch": 1.5800123784205007, + "grad_norm": 0.051104720681905746, + "learning_rate": 1.2841809970603176e-06, + "loss": 0.0009, + "step": 246350 + }, + { + "epoch": 1.5800765153142868, + "grad_norm": 0.15226265788078308, + "learning_rate": 1.2838065174791182e-06, + "loss": 0.0012, + "step": 246360 + }, + { + "epoch": 1.580140652208073, + "grad_norm": 0.07135067135095596, + "learning_rate": 1.2834320844644788e-06, + "loss": 0.0026, + "step": 246370 + }, + { + "epoch": 1.5802047891018591, + "grad_norm": 0.01079430803656578, + "learning_rate": 1.2830576980210906e-06, + "loss": 0.0013, + "step": 246380 + }, + { + "epoch": 1.580268925995645, + "grad_norm": 0.051921162754297256, + "learning_rate": 1.282683358153647e-06, + "loss": 0.0012, + "step": 246390 + }, + { + "epoch": 1.5803330628894312, + "grad_norm": 0.0851326659321785, + "learning_rate": 1.2823090648668375e-06, + "loss": 0.0019, + "step": 246400 + }, + { + "epoch": 1.5803971997832171, + "grad_norm": 0.042742229998111725, + "learning_rate": 1.281934818165353e-06, + "loss": 0.002, + "step": 246410 + }, + { + "epoch": 1.5804613366770033, + "grad_norm": 0.11216244101524353, + "learning_rate": 1.2815606180538804e-06, + "loss": 0.0023, + "step": 246420 + }, + { + "epoch": 1.5805254735707894, + "grad_norm": 0.010424695909023285, + "learning_rate": 1.2811864645371124e-06, + "loss": 0.0007, + "step": 246430 + }, + { + "epoch": 1.5805896104645756, + "grad_norm": 0.04899062588810921, + "learning_rate": 1.2808123576197356e-06, + "loss": 0.0013, + "step": 246440 + }, + { + "epoch": 1.5806537473583617, + "grad_norm": 0.1219649612903595, + "learning_rate": 1.2804382973064383e-06, + "loss": 0.0012, + "step": 246450 + }, + { + "epoch": 1.5807178842521479, + "grad_norm": 0.018149536103010178, + "learning_rate": 1.2800642836019062e-06, + "loss": 0.0009, + "step": 246460 + }, + { + "epoch": 1.580782021145934, + "grad_norm": 0.11209143698215485, + "learning_rate": 1.2796903165108282e-06, + "loss": 0.0025, + "step": 246470 + }, + { + "epoch": 1.58084615803972, + "grad_norm": 0.024495825171470642, + "learning_rate": 1.2793163960378896e-06, + "loss": 0.001, + "step": 246480 + }, + { + "epoch": 1.580910294933506, + "grad_norm": 0.08707740902900696, + "learning_rate": 1.2789425221877743e-06, + "loss": 0.0014, + "step": 246490 + }, + { + "epoch": 1.580974431827292, + "grad_norm": 0.09110743552446365, + "learning_rate": 1.2785686949651704e-06, + "loss": 0.0011, + "step": 246500 + }, + { + "epoch": 1.5810385687210782, + "grad_norm": 0.02921900525689125, + "learning_rate": 1.2781949143747603e-06, + "loss": 0.0006, + "step": 246510 + }, + { + "epoch": 1.5811027056148643, + "grad_norm": 0.08461631089448929, + "learning_rate": 1.277821180421227e-06, + "loss": 0.0012, + "step": 246520 + }, + { + "epoch": 1.5811668425086505, + "grad_norm": 0.09141971170902252, + "learning_rate": 1.277447493109254e-06, + "loss": 0.001, + "step": 246530 + }, + { + "epoch": 1.5812309794024366, + "grad_norm": 0.0596066415309906, + "learning_rate": 1.2770738524435255e-06, + "loss": 0.0017, + "step": 246540 + }, + { + "epoch": 1.5812951162962228, + "grad_norm": 0.07852587848901749, + "learning_rate": 1.2767002584287224e-06, + "loss": 0.002, + "step": 246550 + }, + { + "epoch": 1.581359253190009, + "grad_norm": 0.0018973442493006587, + "learning_rate": 1.276326711069526e-06, + "loss": 0.0006, + "step": 246560 + }, + { + "epoch": 1.5814233900837948, + "grad_norm": 0.12400216609239578, + "learning_rate": 1.2759532103706157e-06, + "loss": 0.002, + "step": 246570 + }, + { + "epoch": 1.581487526977581, + "grad_norm": 0.07634778320789337, + "learning_rate": 1.275579756336675e-06, + "loss": 0.0007, + "step": 246580 + }, + { + "epoch": 1.581551663871367, + "grad_norm": 0.025330260396003723, + "learning_rate": 1.2752063489723816e-06, + "loss": 0.0006, + "step": 246590 + }, + { + "epoch": 1.581615800765153, + "grad_norm": 0.07662412524223328, + "learning_rate": 1.2748329882824146e-06, + "loss": 0.0016, + "step": 246600 + }, + { + "epoch": 1.5816799376589392, + "grad_norm": 0.06542450189590454, + "learning_rate": 1.274459674271451e-06, + "loss": 0.0023, + "step": 246610 + }, + { + "epoch": 1.5817440745527254, + "grad_norm": 0.037552472203969955, + "learning_rate": 1.2740864069441716e-06, + "loss": 0.0006, + "step": 246620 + }, + { + "epoch": 1.5818082114465115, + "grad_norm": 0.0035921186208724976, + "learning_rate": 1.2737131863052526e-06, + "loss": 0.0018, + "step": 246630 + }, + { + "epoch": 1.5818723483402977, + "grad_norm": 0.1471884846687317, + "learning_rate": 1.2733400123593692e-06, + "loss": 0.001, + "step": 246640 + }, + { + "epoch": 1.5819364852340836, + "grad_norm": 0.04834791645407677, + "learning_rate": 1.2729668851111987e-06, + "loss": 0.001, + "step": 246650 + }, + { + "epoch": 1.5820006221278697, + "grad_norm": 0.08057847619056702, + "learning_rate": 1.2725938045654185e-06, + "loss": 0.0017, + "step": 246660 + }, + { + "epoch": 1.5820647590216557, + "grad_norm": 0.07877923548221588, + "learning_rate": 1.272220770726702e-06, + "loss": 0.0007, + "step": 246670 + }, + { + "epoch": 1.5821288959154418, + "grad_norm": 0.07737217843532562, + "learning_rate": 1.2718477835997229e-06, + "loss": 0.001, + "step": 246680 + }, + { + "epoch": 1.582193032809228, + "grad_norm": 0.08197028189897537, + "learning_rate": 1.2714748431891545e-06, + "loss": 0.0011, + "step": 246690 + }, + { + "epoch": 1.582257169703014, + "grad_norm": 0.20422714948654175, + "learning_rate": 1.2711019494996723e-06, + "loss": 0.002, + "step": 246700 + }, + { + "epoch": 1.5823213065968003, + "grad_norm": 0.15007200837135315, + "learning_rate": 1.2707291025359475e-06, + "loss": 0.001, + "step": 246710 + }, + { + "epoch": 1.5823854434905864, + "grad_norm": 0.08318213373422623, + "learning_rate": 1.2703563023026527e-06, + "loss": 0.0017, + "step": 246720 + }, + { + "epoch": 1.5824495803843726, + "grad_norm": 0.03313397616147995, + "learning_rate": 1.269983548804457e-06, + "loss": 0.0016, + "step": 246730 + }, + { + "epoch": 1.5825137172781585, + "grad_norm": 0.11351385712623596, + "learning_rate": 1.2696108420460352e-06, + "loss": 0.0008, + "step": 246740 + }, + { + "epoch": 1.5825778541719446, + "grad_norm": 0.07731682062149048, + "learning_rate": 1.2692381820320554e-06, + "loss": 0.0015, + "step": 246750 + }, + { + "epoch": 1.5826419910657306, + "grad_norm": 0.13076956570148468, + "learning_rate": 1.2688655687671864e-06, + "loss": 0.0007, + "step": 246760 + }, + { + "epoch": 1.5827061279595167, + "grad_norm": 0.13700582087039948, + "learning_rate": 1.2684930022560992e-06, + "loss": 0.0017, + "step": 246770 + }, + { + "epoch": 1.5827702648533029, + "grad_norm": 0.08193062245845795, + "learning_rate": 1.2681204825034621e-06, + "loss": 0.0007, + "step": 246780 + }, + { + "epoch": 1.582834401747089, + "grad_norm": 0.08168069273233414, + "learning_rate": 1.2677480095139427e-06, + "loss": 0.0011, + "step": 246790 + }, + { + "epoch": 1.5828985386408752, + "grad_norm": 0.1415787637233734, + "learning_rate": 1.2673755832922064e-06, + "loss": 0.0013, + "step": 246800 + }, + { + "epoch": 1.5829626755346613, + "grad_norm": 0.08244706690311432, + "learning_rate": 1.2670032038429225e-06, + "loss": 0.0015, + "step": 246810 + }, + { + "epoch": 1.5830268124284472, + "grad_norm": 0.017394885420799255, + "learning_rate": 1.266630871170757e-06, + "loss": 0.0009, + "step": 246820 + }, + { + "epoch": 1.5830909493222334, + "grad_norm": 0.047168806195259094, + "learning_rate": 1.266258585280375e-06, + "loss": 0.0004, + "step": 246830 + }, + { + "epoch": 1.5831550862160195, + "grad_norm": 0.019143542274832726, + "learning_rate": 1.2658863461764398e-06, + "loss": 0.0008, + "step": 246840 + }, + { + "epoch": 1.5832192231098055, + "grad_norm": 0.12793073058128357, + "learning_rate": 1.265514153863619e-06, + "loss": 0.0008, + "step": 246850 + }, + { + "epoch": 1.5832833600035916, + "grad_norm": 0.028690339997410774, + "learning_rate": 1.2651420083465748e-06, + "loss": 0.0007, + "step": 246860 + }, + { + "epoch": 1.5833474968973777, + "grad_norm": 0.060283955186605453, + "learning_rate": 1.2647699096299703e-06, + "loss": 0.0018, + "step": 246870 + }, + { + "epoch": 1.583411633791164, + "grad_norm": 0.1377541571855545, + "learning_rate": 1.2643978577184668e-06, + "loss": 0.0026, + "step": 246880 + }, + { + "epoch": 1.58347577068495, + "grad_norm": 0.005368927028030157, + "learning_rate": 1.2640258526167298e-06, + "loss": 0.0013, + "step": 246890 + }, + { + "epoch": 1.5835399075787362, + "grad_norm": 0.0832783579826355, + "learning_rate": 1.2636538943294186e-06, + "loss": 0.0013, + "step": 246900 + }, + { + "epoch": 1.5836040444725221, + "grad_norm": 0.05644654110074043, + "learning_rate": 1.2632819828611931e-06, + "loss": 0.0021, + "step": 246910 + }, + { + "epoch": 1.5836681813663083, + "grad_norm": 0.06918276101350784, + "learning_rate": 1.2629101182167164e-06, + "loss": 0.0031, + "step": 246920 + }, + { + "epoch": 1.5837323182600942, + "grad_norm": 0.10213880240917206, + "learning_rate": 1.262538300400647e-06, + "loss": 0.0038, + "step": 246930 + }, + { + "epoch": 1.5837964551538803, + "grad_norm": 0.11579898744821548, + "learning_rate": 1.2621665294176433e-06, + "loss": 0.0011, + "step": 246940 + }, + { + "epoch": 1.5838605920476665, + "grad_norm": 0.04726351425051689, + "learning_rate": 1.2617948052723633e-06, + "loss": 0.0008, + "step": 246950 + }, + { + "epoch": 1.5839247289414526, + "grad_norm": 0.1257619559764862, + "learning_rate": 1.2614231279694678e-06, + "loss": 0.0012, + "step": 246960 + }, + { + "epoch": 1.5839888658352388, + "grad_norm": 0.040273942053318024, + "learning_rate": 1.2610514975136118e-06, + "loss": 0.0006, + "step": 246970 + }, + { + "epoch": 1.584053002729025, + "grad_norm": 0.19437241554260254, + "learning_rate": 1.2606799139094529e-06, + "loss": 0.0017, + "step": 246980 + }, + { + "epoch": 1.584117139622811, + "grad_norm": 0.16303570568561554, + "learning_rate": 1.2603083771616459e-06, + "loss": 0.0012, + "step": 246990 + }, + { + "epoch": 1.584181276516597, + "grad_norm": 0.050766024738550186, + "learning_rate": 1.2599368872748491e-06, + "loss": 0.0008, + "step": 247000 + }, + { + "epoch": 1.5842454134103832, + "grad_norm": 0.11944007128477097, + "learning_rate": 1.2595654442537158e-06, + "loss": 0.002, + "step": 247010 + }, + { + "epoch": 1.584309550304169, + "grad_norm": 0.0842583030462265, + "learning_rate": 1.259194048102901e-06, + "loss": 0.0011, + "step": 247020 + }, + { + "epoch": 1.5843736871979552, + "grad_norm": 0.09873521327972412, + "learning_rate": 1.2588226988270564e-06, + "loss": 0.0014, + "step": 247030 + }, + { + "epoch": 1.5844378240917414, + "grad_norm": 0.07508692145347595, + "learning_rate": 1.2584513964308393e-06, + "loss": 0.0011, + "step": 247040 + }, + { + "epoch": 1.5845019609855275, + "grad_norm": 0.10218508541584015, + "learning_rate": 1.2580801409188997e-06, + "loss": 0.0009, + "step": 247050 + }, + { + "epoch": 1.5845660978793137, + "grad_norm": 0.08793481439352036, + "learning_rate": 1.257708932295889e-06, + "loss": 0.0009, + "step": 247060 + }, + { + "epoch": 1.5846302347730998, + "grad_norm": 0.04642735421657562, + "learning_rate": 1.2573377705664613e-06, + "loss": 0.0015, + "step": 247070 + }, + { + "epoch": 1.5846943716668858, + "grad_norm": 0.1290217638015747, + "learning_rate": 1.2569666557352662e-06, + "loss": 0.0011, + "step": 247080 + }, + { + "epoch": 1.584758508560672, + "grad_norm": 0.06868187338113785, + "learning_rate": 1.256595587806954e-06, + "loss": 0.0013, + "step": 247090 + }, + { + "epoch": 1.5848226454544578, + "grad_norm": 0.05559730902314186, + "learning_rate": 1.2562245667861727e-06, + "loss": 0.0006, + "step": 247100 + }, + { + "epoch": 1.584886782348244, + "grad_norm": 0.09011746197938919, + "learning_rate": 1.2558535926775757e-06, + "loss": 0.0034, + "step": 247110 + }, + { + "epoch": 1.5849509192420301, + "grad_norm": 0.07295114547014236, + "learning_rate": 1.2554826654858081e-06, + "loss": 0.0007, + "step": 247120 + }, + { + "epoch": 1.5850150561358163, + "grad_norm": 0.14845941960811615, + "learning_rate": 1.2551117852155192e-06, + "loss": 0.0011, + "step": 247130 + }, + { + "epoch": 1.5850791930296024, + "grad_norm": 0.0017486442811787128, + "learning_rate": 1.2547409518713543e-06, + "loss": 0.001, + "step": 247140 + }, + { + "epoch": 1.5851433299233886, + "grad_norm": 0.0336947999894619, + "learning_rate": 1.2543701654579637e-06, + "loss": 0.001, + "step": 247150 + }, + { + "epoch": 1.5852074668171747, + "grad_norm": 0.22223295271396637, + "learning_rate": 1.253999425979992e-06, + "loss": 0.0007, + "step": 247160 + }, + { + "epoch": 1.5852716037109607, + "grad_norm": 0.0942402258515358, + "learning_rate": 1.2536287334420848e-06, + "loss": 0.0018, + "step": 247170 + }, + { + "epoch": 1.5853357406047468, + "grad_norm": 0.029933787882328033, + "learning_rate": 1.2532580878488854e-06, + "loss": 0.0024, + "step": 247180 + }, + { + "epoch": 1.5853998774985327, + "grad_norm": 0.12282829731702805, + "learning_rate": 1.2528874892050414e-06, + "loss": 0.0012, + "step": 247190 + }, + { + "epoch": 1.5854640143923189, + "grad_norm": 0.05031512677669525, + "learning_rate": 1.2525169375151953e-06, + "loss": 0.0007, + "step": 247200 + }, + { + "epoch": 1.585528151286105, + "grad_norm": 0.06032634153962135, + "learning_rate": 1.2521464327839882e-06, + "loss": 0.0009, + "step": 247210 + }, + { + "epoch": 1.5855922881798912, + "grad_norm": 0.057162925601005554, + "learning_rate": 1.2517759750160667e-06, + "loss": 0.0011, + "step": 247220 + }, + { + "epoch": 1.5856564250736773, + "grad_norm": 0.07621420919895172, + "learning_rate": 1.2514055642160716e-06, + "loss": 0.0036, + "step": 247230 + }, + { + "epoch": 1.5857205619674635, + "grad_norm": 0.0914858728647232, + "learning_rate": 1.2510352003886432e-06, + "loss": 0.0009, + "step": 247240 + }, + { + "epoch": 1.5857846988612494, + "grad_norm": 0.0333448126912117, + "learning_rate": 1.2506648835384221e-06, + "loss": 0.0013, + "step": 247250 + }, + { + "epoch": 1.5858488357550355, + "grad_norm": 0.05894660949707031, + "learning_rate": 1.2502946136700507e-06, + "loss": 0.0018, + "step": 247260 + }, + { + "epoch": 1.5859129726488217, + "grad_norm": 0.08252187818288803, + "learning_rate": 1.2499243907881676e-06, + "loss": 0.0009, + "step": 247270 + }, + { + "epoch": 1.5859771095426076, + "grad_norm": 0.020556898787617683, + "learning_rate": 1.2495542148974121e-06, + "loss": 0.0022, + "step": 247280 + }, + { + "epoch": 1.5860412464363938, + "grad_norm": 0.09331326931715012, + "learning_rate": 1.2491840860024212e-06, + "loss": 0.0015, + "step": 247290 + }, + { + "epoch": 1.58610538333018, + "grad_norm": 0.1433272659778595, + "learning_rate": 1.2488140041078362e-06, + "loss": 0.0012, + "step": 247300 + }, + { + "epoch": 1.586169520223966, + "grad_norm": 0.21614956855773926, + "learning_rate": 1.2484439692182926e-06, + "loss": 0.0017, + "step": 247310 + }, + { + "epoch": 1.5862336571177522, + "grad_norm": 0.02656024694442749, + "learning_rate": 1.2480739813384268e-06, + "loss": 0.0014, + "step": 247320 + }, + { + "epoch": 1.5862977940115384, + "grad_norm": 0.12968099117279053, + "learning_rate": 1.2477040404728741e-06, + "loss": 0.0017, + "step": 247330 + }, + { + "epoch": 1.5863619309053243, + "grad_norm": 0.015749948099255562, + "learning_rate": 1.2473341466262734e-06, + "loss": 0.0015, + "step": 247340 + }, + { + "epoch": 1.5864260677991104, + "grad_norm": 0.07174625247716904, + "learning_rate": 1.2469642998032577e-06, + "loss": 0.001, + "step": 247350 + }, + { + "epoch": 1.5864902046928964, + "grad_norm": 0.05340525507926941, + "learning_rate": 1.24659450000846e-06, + "loss": 0.0005, + "step": 247360 + }, + { + "epoch": 1.5865543415866825, + "grad_norm": 0.15386264026165009, + "learning_rate": 1.2462247472465172e-06, + "loss": 0.0018, + "step": 247370 + }, + { + "epoch": 1.5866184784804687, + "grad_norm": 0.02587992511689663, + "learning_rate": 1.245855041522061e-06, + "loss": 0.0012, + "step": 247380 + }, + { + "epoch": 1.5866826153742548, + "grad_norm": 0.21163691580295563, + "learning_rate": 1.245485382839724e-06, + "loss": 0.0023, + "step": 247390 + }, + { + "epoch": 1.586746752268041, + "grad_norm": 0.26643458008766174, + "learning_rate": 1.2451157712041374e-06, + "loss": 0.0034, + "step": 247400 + }, + { + "epoch": 1.586810889161827, + "grad_norm": 0.04723113775253296, + "learning_rate": 1.244746206619935e-06, + "loss": 0.0009, + "step": 247410 + }, + { + "epoch": 1.5868750260556133, + "grad_norm": 0.01681624911725521, + "learning_rate": 1.2443766890917452e-06, + "loss": 0.0006, + "step": 247420 + }, + { + "epoch": 1.5869391629493992, + "grad_norm": 0.014629567973315716, + "learning_rate": 1.2440072186242008e-06, + "loss": 0.0021, + "step": 247430 + }, + { + "epoch": 1.5870032998431853, + "grad_norm": 0.06550707668066025, + "learning_rate": 1.243637795221931e-06, + "loss": 0.0012, + "step": 247440 + }, + { + "epoch": 1.5870674367369713, + "grad_norm": 0.10899477452039719, + "learning_rate": 1.2432684188895616e-06, + "loss": 0.0006, + "step": 247450 + }, + { + "epoch": 1.5871315736307574, + "grad_norm": 0.10097301006317139, + "learning_rate": 1.242899089631726e-06, + "loss": 0.0006, + "step": 247460 + }, + { + "epoch": 1.5871957105245436, + "grad_norm": 0.12945745885372162, + "learning_rate": 1.2425298074530502e-06, + "loss": 0.0012, + "step": 247470 + }, + { + "epoch": 1.5872598474183297, + "grad_norm": 0.06421015411615372, + "learning_rate": 1.2421605723581593e-06, + "loss": 0.0013, + "step": 247480 + }, + { + "epoch": 1.5873239843121159, + "grad_norm": 0.06142069771885872, + "learning_rate": 1.2417913843516838e-06, + "loss": 0.0015, + "step": 247490 + }, + { + "epoch": 1.587388121205902, + "grad_norm": 0.09488467872142792, + "learning_rate": 1.2414222434382483e-06, + "loss": 0.0015, + "step": 247500 + }, + { + "epoch": 1.587452258099688, + "grad_norm": 0.037198759615421295, + "learning_rate": 1.2410531496224782e-06, + "loss": 0.0013, + "step": 247510 + }, + { + "epoch": 1.587516394993474, + "grad_norm": 0.04325252026319504, + "learning_rate": 1.2406841029089972e-06, + "loss": 0.0013, + "step": 247520 + }, + { + "epoch": 1.58758053188726, + "grad_norm": 0.0072103929705917835, + "learning_rate": 1.240315103302433e-06, + "loss": 0.0006, + "step": 247530 + }, + { + "epoch": 1.5876446687810462, + "grad_norm": 0.0305376797914505, + "learning_rate": 1.2399461508074067e-06, + "loss": 0.002, + "step": 247540 + }, + { + "epoch": 1.5877088056748323, + "grad_norm": 0.10464880615472794, + "learning_rate": 1.239577245428543e-06, + "loss": 0.0017, + "step": 247550 + }, + { + "epoch": 1.5877729425686185, + "grad_norm": 0.18822775781154633, + "learning_rate": 1.2392083871704624e-06, + "loss": 0.0025, + "step": 247560 + }, + { + "epoch": 1.5878370794624046, + "grad_norm": 0.07013606280088425, + "learning_rate": 1.2388395760377896e-06, + "loss": 0.0012, + "step": 247570 + }, + { + "epoch": 1.5879012163561907, + "grad_norm": 0.03351839259266853, + "learning_rate": 1.2384708120351458e-06, + "loss": 0.0008, + "step": 247580 + }, + { + "epoch": 1.587965353249977, + "grad_norm": 0.04853971675038338, + "learning_rate": 1.2381020951671502e-06, + "loss": 0.0018, + "step": 247590 + }, + { + "epoch": 1.5880294901437628, + "grad_norm": 0.08642683178186417, + "learning_rate": 1.2377334254384232e-06, + "loss": 0.0012, + "step": 247600 + }, + { + "epoch": 1.588093627037549, + "grad_norm": 0.08164126425981522, + "learning_rate": 1.2373648028535862e-06, + "loss": 0.0013, + "step": 247610 + }, + { + "epoch": 1.588157763931335, + "grad_norm": 0.12997667491436005, + "learning_rate": 1.2369962274172575e-06, + "loss": 0.0012, + "step": 247620 + }, + { + "epoch": 1.588221900825121, + "grad_norm": 0.42265182733535767, + "learning_rate": 1.2366276991340537e-06, + "loss": 0.0017, + "step": 247630 + }, + { + "epoch": 1.5882860377189072, + "grad_norm": 0.025046564638614655, + "learning_rate": 1.2362592180085963e-06, + "loss": 0.0005, + "step": 247640 + }, + { + "epoch": 1.5883501746126933, + "grad_norm": 0.010669128969311714, + "learning_rate": 1.2358907840455004e-06, + "loss": 0.0013, + "step": 247650 + }, + { + "epoch": 1.5884143115064795, + "grad_norm": 0.019329898059368134, + "learning_rate": 1.2355223972493835e-06, + "loss": 0.0017, + "step": 247660 + }, + { + "epoch": 1.5884784484002656, + "grad_norm": 0.032687701284885406, + "learning_rate": 1.23515405762486e-06, + "loss": 0.0011, + "step": 247670 + }, + { + "epoch": 1.5885425852940518, + "grad_norm": 0.028135398402810097, + "learning_rate": 1.2347857651765483e-06, + "loss": 0.0005, + "step": 247680 + }, + { + "epoch": 1.5886067221878377, + "grad_norm": 0.018525226041674614, + "learning_rate": 1.2344175199090613e-06, + "loss": 0.0011, + "step": 247690 + }, + { + "epoch": 1.5886708590816239, + "grad_norm": 0.045167919248342514, + "learning_rate": 1.234049321827015e-06, + "loss": 0.0017, + "step": 247700 + }, + { + "epoch": 1.5887349959754098, + "grad_norm": 0.10461270064115524, + "learning_rate": 1.23368117093502e-06, + "loss": 0.0018, + "step": 247710 + }, + { + "epoch": 1.588799132869196, + "grad_norm": 0.06808051466941833, + "learning_rate": 1.233313067237693e-06, + "loss": 0.0021, + "step": 247720 + }, + { + "epoch": 1.588863269762982, + "grad_norm": 0.0033471379429101944, + "learning_rate": 1.2329450107396456e-06, + "loss": 0.0013, + "step": 247730 + }, + { + "epoch": 1.5889274066567682, + "grad_norm": 0.1340583711862564, + "learning_rate": 1.2325770014454897e-06, + "loss": 0.0012, + "step": 247740 + }, + { + "epoch": 1.5889915435505544, + "grad_norm": 0.029970020055770874, + "learning_rate": 1.2322090393598352e-06, + "loss": 0.0015, + "step": 247750 + }, + { + "epoch": 1.5890556804443405, + "grad_norm": 0.034044284373521805, + "learning_rate": 1.2318411244872952e-06, + "loss": 0.001, + "step": 247760 + }, + { + "epoch": 1.5891198173381265, + "grad_norm": 0.06569530069828033, + "learning_rate": 1.231473256832479e-06, + "loss": 0.0005, + "step": 247770 + }, + { + "epoch": 1.5891839542319126, + "grad_norm": 0.1073935404419899, + "learning_rate": 1.2311054363999948e-06, + "loss": 0.0016, + "step": 247780 + }, + { + "epoch": 1.5892480911256985, + "grad_norm": 0.011293603107333183, + "learning_rate": 1.2307376631944545e-06, + "loss": 0.0011, + "step": 247790 + }, + { + "epoch": 1.5893122280194847, + "grad_norm": 0.004708931315690279, + "learning_rate": 1.2303699372204653e-06, + "loss": 0.0018, + "step": 247800 + }, + { + "epoch": 1.5893763649132708, + "grad_norm": 0.0963144302368164, + "learning_rate": 1.230002258482635e-06, + "loss": 0.001, + "step": 247810 + }, + { + "epoch": 1.589440501807057, + "grad_norm": 0.009647555649280548, + "learning_rate": 1.229634626985569e-06, + "loss": 0.0012, + "step": 247820 + }, + { + "epoch": 1.5895046387008431, + "grad_norm": 0.09639555215835571, + "learning_rate": 1.2292670427338777e-06, + "loss": 0.0017, + "step": 247830 + }, + { + "epoch": 1.5895687755946293, + "grad_norm": 0.04794756695628166, + "learning_rate": 1.2288995057321645e-06, + "loss": 0.0012, + "step": 247840 + }, + { + "epoch": 1.5896329124884154, + "grad_norm": 0.10684481263160706, + "learning_rate": 1.2285320159850362e-06, + "loss": 0.0025, + "step": 247850 + }, + { + "epoch": 1.5896970493822014, + "grad_norm": 0.03137262538075447, + "learning_rate": 1.2281645734970953e-06, + "loss": 0.0009, + "step": 247860 + }, + { + "epoch": 1.5897611862759875, + "grad_norm": 0.036332130432128906, + "learning_rate": 1.2277971782729503e-06, + "loss": 0.0007, + "step": 247870 + }, + { + "epoch": 1.5898253231697734, + "grad_norm": 0.0729440450668335, + "learning_rate": 1.2274298303172017e-06, + "loss": 0.0021, + "step": 247880 + }, + { + "epoch": 1.5898894600635596, + "grad_norm": 0.03348057344555855, + "learning_rate": 1.2270625296344541e-06, + "loss": 0.0017, + "step": 247890 + }, + { + "epoch": 1.5899535969573457, + "grad_norm": 0.00622418150305748, + "learning_rate": 1.2266952762293078e-06, + "loss": 0.002, + "step": 247900 + }, + { + "epoch": 1.5900177338511319, + "grad_norm": 0.07951758056879044, + "learning_rate": 1.2263280701063678e-06, + "loss": 0.0008, + "step": 247910 + }, + { + "epoch": 1.590081870744918, + "grad_norm": 0.06803692132234573, + "learning_rate": 1.2259609112702342e-06, + "loss": 0.0011, + "step": 247920 + }, + { + "epoch": 1.5901460076387042, + "grad_norm": 0.11719736456871033, + "learning_rate": 1.2255937997255064e-06, + "loss": 0.0011, + "step": 247930 + }, + { + "epoch": 1.59021014453249, + "grad_norm": 0.07720184326171875, + "learning_rate": 1.2252267354767866e-06, + "loss": 0.0018, + "step": 247940 + }, + { + "epoch": 1.5902742814262762, + "grad_norm": 0.10507367551326752, + "learning_rate": 1.2248597185286742e-06, + "loss": 0.0012, + "step": 247950 + }, + { + "epoch": 1.5903384183200622, + "grad_norm": 0.08309304714202881, + "learning_rate": 1.2244927488857678e-06, + "loss": 0.0006, + "step": 247960 + }, + { + "epoch": 1.5904025552138483, + "grad_norm": 0.020326273515820503, + "learning_rate": 1.2241258265526635e-06, + "loss": 0.0008, + "step": 247970 + }, + { + "epoch": 1.5904666921076345, + "grad_norm": 0.02808596007525921, + "learning_rate": 1.2237589515339627e-06, + "loss": 0.0006, + "step": 247980 + }, + { + "epoch": 1.5905308290014206, + "grad_norm": 0.08803045749664307, + "learning_rate": 1.223392123834261e-06, + "loss": 0.001, + "step": 247990 + }, + { + "epoch": 1.5905949658952068, + "grad_norm": 0.023076031357049942, + "learning_rate": 1.2230253434581558e-06, + "loss": 0.0012, + "step": 248000 + }, + { + "epoch": 1.590659102788993, + "grad_norm": 0.08916833996772766, + "learning_rate": 1.2226586104102407e-06, + "loss": 0.0014, + "step": 248010 + }, + { + "epoch": 1.590723239682779, + "grad_norm": 0.004460031166672707, + "learning_rate": 1.2222919246951136e-06, + "loss": 0.0011, + "step": 248020 + }, + { + "epoch": 1.590787376576565, + "grad_norm": 0.11745785176753998, + "learning_rate": 1.2219252863173692e-06, + "loss": 0.0012, + "step": 248030 + }, + { + "epoch": 1.5908515134703511, + "grad_norm": 0.04275527969002724, + "learning_rate": 1.2215586952815994e-06, + "loss": 0.0014, + "step": 248040 + }, + { + "epoch": 1.590915650364137, + "grad_norm": 0.10326236486434937, + "learning_rate": 1.2211921515924014e-06, + "loss": 0.0007, + "step": 248050 + }, + { + "epoch": 1.5909797872579232, + "grad_norm": 0.015973402187228203, + "learning_rate": 1.2208256552543657e-06, + "loss": 0.001, + "step": 248060 + }, + { + "epoch": 1.5910439241517094, + "grad_norm": 0.03947042301297188, + "learning_rate": 1.220459206272086e-06, + "loss": 0.0051, + "step": 248070 + }, + { + "epoch": 1.5911080610454955, + "grad_norm": 0.2029953896999359, + "learning_rate": 1.220092804650152e-06, + "loss": 0.0009, + "step": 248080 + }, + { + "epoch": 1.5911721979392817, + "grad_norm": 0.04274044185876846, + "learning_rate": 1.2197264503931584e-06, + "loss": 0.0006, + "step": 248090 + }, + { + "epoch": 1.5912363348330678, + "grad_norm": 0.13463370501995087, + "learning_rate": 1.219360143505694e-06, + "loss": 0.0018, + "step": 248100 + }, + { + "epoch": 1.591300471726854, + "grad_norm": 0.04177447780966759, + "learning_rate": 1.2189938839923487e-06, + "loss": 0.0016, + "step": 248110 + }, + { + "epoch": 1.5913646086206399, + "grad_norm": 0.04472409188747406, + "learning_rate": 1.218627671857711e-06, + "loss": 0.0017, + "step": 248120 + }, + { + "epoch": 1.591428745514426, + "grad_norm": 0.04733484610915184, + "learning_rate": 1.2182615071063724e-06, + "loss": 0.0016, + "step": 248130 + }, + { + "epoch": 1.591492882408212, + "grad_norm": 0.012340148910880089, + "learning_rate": 1.2178953897429202e-06, + "loss": 0.0008, + "step": 248140 + }, + { + "epoch": 1.591557019301998, + "grad_norm": 0.1512427031993866, + "learning_rate": 1.2175293197719413e-06, + "loss": 0.0008, + "step": 248150 + }, + { + "epoch": 1.5916211561957843, + "grad_norm": 0.1732378602027893, + "learning_rate": 1.2171632971980225e-06, + "loss": 0.0014, + "step": 248160 + }, + { + "epoch": 1.5916852930895704, + "grad_norm": 0.020908314734697342, + "learning_rate": 1.2167973220257517e-06, + "loss": 0.0011, + "step": 248170 + }, + { + "epoch": 1.5917494299833566, + "grad_norm": 0.0377255454659462, + "learning_rate": 1.216431394259715e-06, + "loss": 0.0017, + "step": 248180 + }, + { + "epoch": 1.5918135668771427, + "grad_norm": 0.06305290758609772, + "learning_rate": 1.2160655139044953e-06, + "loss": 0.0008, + "step": 248190 + }, + { + "epoch": 1.5918777037709286, + "grad_norm": 0.16171769797801971, + "learning_rate": 1.2156996809646792e-06, + "loss": 0.0028, + "step": 248200 + }, + { + "epoch": 1.5919418406647148, + "grad_norm": 0.010947500355541706, + "learning_rate": 1.2153338954448518e-06, + "loss": 0.0006, + "step": 248210 + }, + { + "epoch": 1.5920059775585007, + "grad_norm": 0.007494628429412842, + "learning_rate": 1.214968157349596e-06, + "loss": 0.0013, + "step": 248220 + }, + { + "epoch": 1.5920701144522869, + "grad_norm": 0.026027843356132507, + "learning_rate": 1.2146024666834944e-06, + "loss": 0.0013, + "step": 248230 + }, + { + "epoch": 1.592134251346073, + "grad_norm": 0.06451816856861115, + "learning_rate": 1.214236823451127e-06, + "loss": 0.0015, + "step": 248240 + }, + { + "epoch": 1.5921983882398592, + "grad_norm": 0.10120180249214172, + "learning_rate": 1.2138712276570802e-06, + "loss": 0.0007, + "step": 248250 + }, + { + "epoch": 1.5922625251336453, + "grad_norm": 0.1435142308473587, + "learning_rate": 1.2135056793059325e-06, + "loss": 0.0008, + "step": 248260 + }, + { + "epoch": 1.5923266620274314, + "grad_norm": 0.04157077521085739, + "learning_rate": 1.2131401784022646e-06, + "loss": 0.001, + "step": 248270 + }, + { + "epoch": 1.5923907989212176, + "grad_norm": 0.06308285892009735, + "learning_rate": 1.2127747249506554e-06, + "loss": 0.0009, + "step": 248280 + }, + { + "epoch": 1.5924549358150035, + "grad_norm": 0.05132739245891571, + "learning_rate": 1.212409318955687e-06, + "loss": 0.001, + "step": 248290 + }, + { + "epoch": 1.5925190727087897, + "grad_norm": 0.03832164406776428, + "learning_rate": 1.212043960421937e-06, + "loss": 0.0007, + "step": 248300 + }, + { + "epoch": 1.5925832096025756, + "grad_norm": 0.019853752106428146, + "learning_rate": 1.2116786493539818e-06, + "loss": 0.0008, + "step": 248310 + }, + { + "epoch": 1.5926473464963617, + "grad_norm": 0.02733612060546875, + "learning_rate": 1.2113133857564018e-06, + "loss": 0.0014, + "step": 248320 + }, + { + "epoch": 1.592711483390148, + "grad_norm": 0.007777262479066849, + "learning_rate": 1.2109481696337732e-06, + "loss": 0.0013, + "step": 248330 + }, + { + "epoch": 1.592775620283934, + "grad_norm": 0.0869772881269455, + "learning_rate": 1.2105830009906716e-06, + "loss": 0.0012, + "step": 248340 + }, + { + "epoch": 1.5928397571777202, + "grad_norm": 0.042146243155002594, + "learning_rate": 1.2102178798316722e-06, + "loss": 0.0013, + "step": 248350 + }, + { + "epoch": 1.5929038940715063, + "grad_norm": 0.06737937778234482, + "learning_rate": 1.2098528061613523e-06, + "loss": 0.0014, + "step": 248360 + }, + { + "epoch": 1.5929680309652923, + "grad_norm": 0.18272994458675385, + "learning_rate": 1.2094877799842858e-06, + "loss": 0.0014, + "step": 248370 + }, + { + "epoch": 1.5930321678590784, + "grad_norm": 0.03615164756774902, + "learning_rate": 1.2091228013050466e-06, + "loss": 0.0006, + "step": 248380 + }, + { + "epoch": 1.5930963047528646, + "grad_norm": 0.027232449501752853, + "learning_rate": 1.2087578701282065e-06, + "loss": 0.0017, + "step": 248390 + }, + { + "epoch": 1.5931604416466505, + "grad_norm": 0.05842263624072075, + "learning_rate": 1.208392986458341e-06, + "loss": 0.0015, + "step": 248400 + }, + { + "epoch": 1.5932245785404366, + "grad_norm": 0.05320729687809944, + "learning_rate": 1.2080281503000214e-06, + "loss": 0.0017, + "step": 248410 + }, + { + "epoch": 1.5932887154342228, + "grad_norm": 0.03330248221755028, + "learning_rate": 1.2076633616578192e-06, + "loss": 0.0008, + "step": 248420 + }, + { + "epoch": 1.593352852328009, + "grad_norm": 0.09903709590435028, + "learning_rate": 1.2072986205363041e-06, + "loss": 0.0013, + "step": 248430 + }, + { + "epoch": 1.593416989221795, + "grad_norm": 0.1738354116678238, + "learning_rate": 1.2069339269400493e-06, + "loss": 0.0012, + "step": 248440 + }, + { + "epoch": 1.5934811261155812, + "grad_norm": 0.04957576468586922, + "learning_rate": 1.206569280873623e-06, + "loss": 0.0008, + "step": 248450 + }, + { + "epoch": 1.5935452630093672, + "grad_norm": 0.03456306830048561, + "learning_rate": 1.2062046823415936e-06, + "loss": 0.0009, + "step": 248460 + }, + { + "epoch": 1.5936093999031533, + "grad_norm": 0.037498582154512405, + "learning_rate": 1.2058401313485318e-06, + "loss": 0.0006, + "step": 248470 + }, + { + "epoch": 1.5936735367969392, + "grad_norm": 0.027157435193657875, + "learning_rate": 1.2054756278990053e-06, + "loss": 0.0012, + "step": 248480 + }, + { + "epoch": 1.5937376736907254, + "grad_norm": 0.11722490191459656, + "learning_rate": 1.205111171997581e-06, + "loss": 0.0013, + "step": 248490 + }, + { + "epoch": 1.5938018105845115, + "grad_norm": 0.04586930572986603, + "learning_rate": 1.204746763648824e-06, + "loss": 0.0012, + "step": 248500 + }, + { + "epoch": 1.5938659474782977, + "grad_norm": 0.0408138781785965, + "learning_rate": 1.2043824028573049e-06, + "loss": 0.0011, + "step": 248510 + }, + { + "epoch": 1.5939300843720838, + "grad_norm": 0.2502545714378357, + "learning_rate": 1.204018089627586e-06, + "loss": 0.0014, + "step": 248520 + }, + { + "epoch": 1.59399422126587, + "grad_norm": 0.0775919258594513, + "learning_rate": 1.2036538239642336e-06, + "loss": 0.001, + "step": 248530 + }, + { + "epoch": 1.5940583581596561, + "grad_norm": 0.07798996567726135, + "learning_rate": 1.2032896058718109e-06, + "loss": 0.0012, + "step": 248540 + }, + { + "epoch": 1.594122495053442, + "grad_norm": 0.08263470977544785, + "learning_rate": 1.2029254353548841e-06, + "loss": 0.0008, + "step": 248550 + }, + { + "epoch": 1.5941866319472282, + "grad_norm": 0.11589933186769485, + "learning_rate": 1.2025613124180158e-06, + "loss": 0.0009, + "step": 248560 + }, + { + "epoch": 1.5942507688410141, + "grad_norm": 0.07335735857486725, + "learning_rate": 1.2021972370657676e-06, + "loss": 0.001, + "step": 248570 + }, + { + "epoch": 1.5943149057348003, + "grad_norm": 0.07589791715145111, + "learning_rate": 1.2018332093027014e-06, + "loss": 0.0004, + "step": 248580 + }, + { + "epoch": 1.5943790426285864, + "grad_norm": 0.06774845719337463, + "learning_rate": 1.20146922913338e-06, + "loss": 0.0007, + "step": 248590 + }, + { + "epoch": 1.5944431795223726, + "grad_norm": 0.09183141589164734, + "learning_rate": 1.2011052965623648e-06, + "loss": 0.0011, + "step": 248600 + }, + { + "epoch": 1.5945073164161587, + "grad_norm": 0.1584300845861435, + "learning_rate": 1.200741411594214e-06, + "loss": 0.001, + "step": 248610 + }, + { + "epoch": 1.5945714533099449, + "grad_norm": 0.05256221443414688, + "learning_rate": 1.2003775742334894e-06, + "loss": 0.0008, + "step": 248620 + }, + { + "epoch": 1.5946355902037308, + "grad_norm": 0.04152030125260353, + "learning_rate": 1.2000137844847497e-06, + "loss": 0.0011, + "step": 248630 + }, + { + "epoch": 1.594699727097517, + "grad_norm": 0.06338044255971909, + "learning_rate": 1.199650042352553e-06, + "loss": 0.0009, + "step": 248640 + }, + { + "epoch": 1.5947638639913029, + "grad_norm": 0.05661037191748619, + "learning_rate": 1.1992863478414563e-06, + "loss": 0.0006, + "step": 248650 + }, + { + "epoch": 1.594828000885089, + "grad_norm": 0.11422684043645859, + "learning_rate": 1.198922700956019e-06, + "loss": 0.0017, + "step": 248660 + }, + { + "epoch": 1.5948921377788752, + "grad_norm": 0.06677471846342087, + "learning_rate": 1.198559101700797e-06, + "loss": 0.001, + "step": 248670 + }, + { + "epoch": 1.5949562746726613, + "grad_norm": 0.22182701528072357, + "learning_rate": 1.1981955500803461e-06, + "loss": 0.0013, + "step": 248680 + }, + { + "epoch": 1.5950204115664475, + "grad_norm": 0.03416885808110237, + "learning_rate": 1.1978320460992204e-06, + "loss": 0.001, + "step": 248690 + }, + { + "epoch": 1.5950845484602336, + "grad_norm": 0.16863662004470825, + "learning_rate": 1.1974685897619786e-06, + "loss": 0.0017, + "step": 248700 + }, + { + "epoch": 1.5951486853540198, + "grad_norm": 0.2889634966850281, + "learning_rate": 1.1971051810731726e-06, + "loss": 0.0012, + "step": 248710 + }, + { + "epoch": 1.5952128222478057, + "grad_norm": 0.09325895458459854, + "learning_rate": 1.1967418200373565e-06, + "loss": 0.0032, + "step": 248720 + }, + { + "epoch": 1.5952769591415918, + "grad_norm": 0.15792018175125122, + "learning_rate": 1.1963785066590827e-06, + "loss": 0.0009, + "step": 248730 + }, + { + "epoch": 1.5953410960353778, + "grad_norm": 0.06073154881596565, + "learning_rate": 1.1960152409429055e-06, + "loss": 0.0013, + "step": 248740 + }, + { + "epoch": 1.595405232929164, + "grad_norm": 0.0029850320424884558, + "learning_rate": 1.195652022893376e-06, + "loss": 0.0006, + "step": 248750 + }, + { + "epoch": 1.59546936982295, + "grad_norm": 0.06377291679382324, + "learning_rate": 1.1952888525150441e-06, + "loss": 0.0011, + "step": 248760 + }, + { + "epoch": 1.5955335067167362, + "grad_norm": 0.05814136937260628, + "learning_rate": 1.1949257298124639e-06, + "loss": 0.0013, + "step": 248770 + }, + { + "epoch": 1.5955976436105224, + "grad_norm": 0.14128145575523376, + "learning_rate": 1.1945626547901835e-06, + "loss": 0.0008, + "step": 248780 + }, + { + "epoch": 1.5956617805043085, + "grad_norm": 0.12270446121692657, + "learning_rate": 1.1941996274527528e-06, + "loss": 0.0021, + "step": 248790 + }, + { + "epoch": 1.5957259173980944, + "grad_norm": 0.02536684088408947, + "learning_rate": 1.1938366478047192e-06, + "loss": 0.0012, + "step": 248800 + }, + { + "epoch": 1.5957900542918806, + "grad_norm": 0.12321645766496658, + "learning_rate": 1.1934737158506342e-06, + "loss": 0.0014, + "step": 248810 + }, + { + "epoch": 1.5958541911856667, + "grad_norm": 0.046535737812519073, + "learning_rate": 1.193110831595044e-06, + "loss": 0.001, + "step": 248820 + }, + { + "epoch": 1.5959183280794527, + "grad_norm": 0.07171449065208435, + "learning_rate": 1.1927479950424958e-06, + "loss": 0.0009, + "step": 248830 + }, + { + "epoch": 1.5959824649732388, + "grad_norm": 0.07412496209144592, + "learning_rate": 1.192385206197535e-06, + "loss": 0.0009, + "step": 248840 + }, + { + "epoch": 1.596046601867025, + "grad_norm": 0.026464832946658134, + "learning_rate": 1.19202246506471e-06, + "loss": 0.0008, + "step": 248850 + }, + { + "epoch": 1.596110738760811, + "grad_norm": 0.1299687623977661, + "learning_rate": 1.1916597716485651e-06, + "loss": 0.0007, + "step": 248860 + }, + { + "epoch": 1.5961748756545973, + "grad_norm": 0.009185717441141605, + "learning_rate": 1.191297125953645e-06, + "loss": 0.0013, + "step": 248870 + }, + { + "epoch": 1.5962390125483834, + "grad_norm": 0.060487210750579834, + "learning_rate": 1.1909345279844925e-06, + "loss": 0.0022, + "step": 248880 + }, + { + "epoch": 1.5963031494421693, + "grad_norm": 0.04519510641694069, + "learning_rate": 1.1905719777456537e-06, + "loss": 0.0004, + "step": 248890 + }, + { + "epoch": 1.5963672863359555, + "grad_norm": 0.03752319887280464, + "learning_rate": 1.190209475241671e-06, + "loss": 0.0019, + "step": 248900 + }, + { + "epoch": 1.5964314232297414, + "grad_norm": 0.01071302779018879, + "learning_rate": 1.189847020477085e-06, + "loss": 0.0011, + "step": 248910 + }, + { + "epoch": 1.5964955601235276, + "grad_norm": 0.10751044005155563, + "learning_rate": 1.1894846134564402e-06, + "loss": 0.0011, + "step": 248920 + }, + { + "epoch": 1.5965596970173137, + "grad_norm": 0.0483706034719944, + "learning_rate": 1.1891222541842767e-06, + "loss": 0.0009, + "step": 248930 + }, + { + "epoch": 1.5966238339110999, + "grad_norm": 0.1471995711326599, + "learning_rate": 1.1887599426651353e-06, + "loss": 0.0025, + "step": 248940 + }, + { + "epoch": 1.596687970804886, + "grad_norm": 0.10588864982128143, + "learning_rate": 1.1883976789035533e-06, + "loss": 0.0008, + "step": 248950 + }, + { + "epoch": 1.5967521076986722, + "grad_norm": 0.06941290944814682, + "learning_rate": 1.188035462904073e-06, + "loss": 0.0005, + "step": 248960 + }, + { + "epoch": 1.5968162445924583, + "grad_norm": 0.006535564083606005, + "learning_rate": 1.1876732946712344e-06, + "loss": 0.0016, + "step": 248970 + }, + { + "epoch": 1.5968803814862442, + "grad_norm": 0.07977228611707687, + "learning_rate": 1.1873111742095739e-06, + "loss": 0.0009, + "step": 248980 + }, + { + "epoch": 1.5969445183800304, + "grad_norm": 0.0827580988407135, + "learning_rate": 1.186949101523629e-06, + "loss": 0.0017, + "step": 248990 + }, + { + "epoch": 1.5970086552738163, + "grad_norm": 0.08859512954950333, + "learning_rate": 1.186587076617936e-06, + "loss": 0.0007, + "step": 249000 + }, + { + "epoch": 1.5970727921676025, + "grad_norm": 0.0018137848237529397, + "learning_rate": 1.186225099497033e-06, + "loss": 0.0011, + "step": 249010 + }, + { + "epoch": 1.5971369290613886, + "grad_norm": 0.2125101238489151, + "learning_rate": 1.185863170165456e-06, + "loss": 0.002, + "step": 249020 + }, + { + "epoch": 1.5972010659551747, + "grad_norm": 0.008683650754392147, + "learning_rate": 1.1855012886277377e-06, + "loss": 0.0007, + "step": 249030 + }, + { + "epoch": 1.597265202848961, + "grad_norm": 0.153066024184227, + "learning_rate": 1.185139454888416e-06, + "loss": 0.0016, + "step": 249040 + }, + { + "epoch": 1.597329339742747, + "grad_norm": 0.062498725950717926, + "learning_rate": 1.1847776689520228e-06, + "loss": 0.0012, + "step": 249050 + }, + { + "epoch": 1.597393476636533, + "grad_norm": 0.10802311450242996, + "learning_rate": 1.1844159308230924e-06, + "loss": 0.001, + "step": 249060 + }, + { + "epoch": 1.5974576135303191, + "grad_norm": 0.457075834274292, + "learning_rate": 1.1840542405061562e-06, + "loss": 0.0018, + "step": 249070 + }, + { + "epoch": 1.597521750424105, + "grad_norm": 0.06506223976612091, + "learning_rate": 1.183692598005749e-06, + "loss": 0.0004, + "step": 249080 + }, + { + "epoch": 1.5975858873178912, + "grad_norm": 0.15700525045394897, + "learning_rate": 1.1833310033264006e-06, + "loss": 0.0011, + "step": 249090 + }, + { + "epoch": 1.5976500242116773, + "grad_norm": 0.024480275809764862, + "learning_rate": 1.182969456472643e-06, + "loss": 0.001, + "step": 249100 + }, + { + "epoch": 1.5977141611054635, + "grad_norm": 0.48664844036102295, + "learning_rate": 1.182607957449004e-06, + "loss": 0.0009, + "step": 249110 + }, + { + "epoch": 1.5977782979992496, + "grad_norm": 0.0950402095913887, + "learning_rate": 1.1822465062600175e-06, + "loss": 0.0016, + "step": 249120 + }, + { + "epoch": 1.5978424348930358, + "grad_norm": 0.004985040053725243, + "learning_rate": 1.1818851029102108e-06, + "loss": 0.0011, + "step": 249130 + }, + { + "epoch": 1.597906571786822, + "grad_norm": 0.0396341010928154, + "learning_rate": 1.181523747404112e-06, + "loss": 0.0019, + "step": 249140 + }, + { + "epoch": 1.5979707086806079, + "grad_norm": 0.08913438767194748, + "learning_rate": 1.181162439746249e-06, + "loss": 0.0009, + "step": 249150 + }, + { + "epoch": 1.598034845574394, + "grad_norm": 0.055692918598651886, + "learning_rate": 1.1808011799411507e-06, + "loss": 0.0009, + "step": 249160 + }, + { + "epoch": 1.59809898246818, + "grad_norm": 0.07682760059833527, + "learning_rate": 1.180439967993343e-06, + "loss": 0.0007, + "step": 249170 + }, + { + "epoch": 1.598163119361966, + "grad_norm": 0.17688927054405212, + "learning_rate": 1.1800788039073519e-06, + "loss": 0.0017, + "step": 249180 + }, + { + "epoch": 1.5982272562557522, + "grad_norm": 0.12076979875564575, + "learning_rate": 1.1797176876877037e-06, + "loss": 0.0009, + "step": 249190 + }, + { + "epoch": 1.5982913931495384, + "grad_norm": 0.030076563358306885, + "learning_rate": 1.1793566193389239e-06, + "loss": 0.0011, + "step": 249200 + }, + { + "epoch": 1.5983555300433245, + "grad_norm": 0.17592324316501617, + "learning_rate": 1.1789955988655361e-06, + "loss": 0.0018, + "step": 249210 + }, + { + "epoch": 1.5984196669371107, + "grad_norm": 0.16255876421928406, + "learning_rate": 1.1786346262720628e-06, + "loss": 0.0018, + "step": 249220 + }, + { + "epoch": 1.5984838038308968, + "grad_norm": 0.04533237963914871, + "learning_rate": 1.1782737015630302e-06, + "loss": 0.0012, + "step": 249230 + }, + { + "epoch": 1.5985479407246828, + "grad_norm": 0.0027576833963394165, + "learning_rate": 1.1779128247429594e-06, + "loss": 0.0012, + "step": 249240 + }, + { + "epoch": 1.598612077618469, + "grad_norm": 0.0029595845844596624, + "learning_rate": 1.1775519958163723e-06, + "loss": 0.0017, + "step": 249250 + }, + { + "epoch": 1.5986762145122548, + "grad_norm": 0.07872213423252106, + "learning_rate": 1.1771912147877896e-06, + "loss": 0.0012, + "step": 249260 + }, + { + "epoch": 1.598740351406041, + "grad_norm": 0.025151098147034645, + "learning_rate": 1.1768304816617344e-06, + "loss": 0.0011, + "step": 249270 + }, + { + "epoch": 1.5988044882998271, + "grad_norm": 0.041764579713344574, + "learning_rate": 1.1764697964427253e-06, + "loss": 0.0012, + "step": 249280 + }, + { + "epoch": 1.5988686251936133, + "grad_norm": 0.039692021906375885, + "learning_rate": 1.1761091591352825e-06, + "loss": 0.0014, + "step": 249290 + }, + { + "epoch": 1.5989327620873994, + "grad_norm": 0.09026049077510834, + "learning_rate": 1.1757485697439235e-06, + "loss": 0.0009, + "step": 249300 + }, + { + "epoch": 1.5989968989811856, + "grad_norm": 0.026388583704829216, + "learning_rate": 1.1753880282731694e-06, + "loss": 0.0022, + "step": 249310 + }, + { + "epoch": 1.5990610358749715, + "grad_norm": 0.044500820338726044, + "learning_rate": 1.175027534727537e-06, + "loss": 0.0006, + "step": 249320 + }, + { + "epoch": 1.5991251727687577, + "grad_norm": 0.08768253773450851, + "learning_rate": 1.1746670891115414e-06, + "loss": 0.0019, + "step": 249330 + }, + { + "epoch": 1.5991893096625436, + "grad_norm": 0.08217465132474899, + "learning_rate": 1.1743066914297025e-06, + "loss": 0.0008, + "step": 249340 + }, + { + "epoch": 1.5992534465563297, + "grad_norm": 0.17016156017780304, + "learning_rate": 1.1739463416865348e-06, + "loss": 0.0016, + "step": 249350 + }, + { + "epoch": 1.5993175834501159, + "grad_norm": 0.0020344434306025505, + "learning_rate": 1.1735860398865546e-06, + "loss": 0.0012, + "step": 249360 + }, + { + "epoch": 1.599381720343902, + "grad_norm": 0.02283032238483429, + "learning_rate": 1.1732257860342743e-06, + "loss": 0.0008, + "step": 249370 + }, + { + "epoch": 1.5994458572376882, + "grad_norm": 0.024351265281438828, + "learning_rate": 1.172865580134211e-06, + "loss": 0.0013, + "step": 249380 + }, + { + "epoch": 1.5995099941314743, + "grad_norm": 0.22470252215862274, + "learning_rate": 1.1725054221908772e-06, + "loss": 0.0017, + "step": 249390 + }, + { + "epoch": 1.5995741310252605, + "grad_norm": 0.027585037052631378, + "learning_rate": 1.1721453122087862e-06, + "loss": 0.001, + "step": 249400 + }, + { + "epoch": 1.5996382679190464, + "grad_norm": 0.09092612564563751, + "learning_rate": 1.1717852501924487e-06, + "loss": 0.0007, + "step": 249410 + }, + { + "epoch": 1.5997024048128325, + "grad_norm": 0.04048805311322212, + "learning_rate": 1.1714252361463791e-06, + "loss": 0.0005, + "step": 249420 + }, + { + "epoch": 1.5997665417066185, + "grad_norm": 0.10383453220129013, + "learning_rate": 1.1710652700750879e-06, + "loss": 0.0013, + "step": 249430 + }, + { + "epoch": 1.5998306786004046, + "grad_norm": 0.03964724391698837, + "learning_rate": 1.1707053519830852e-06, + "loss": 0.001, + "step": 249440 + }, + { + "epoch": 1.5998948154941908, + "grad_norm": 0.05186668410897255, + "learning_rate": 1.17034548187488e-06, + "loss": 0.0007, + "step": 249450 + }, + { + "epoch": 1.599958952387977, + "grad_norm": 0.17879439890384674, + "learning_rate": 1.1699856597549842e-06, + "loss": 0.0006, + "step": 249460 + }, + { + "epoch": 1.600023089281763, + "grad_norm": 0.03267485648393631, + "learning_rate": 1.1696258856279053e-06, + "loss": 0.0017, + "step": 249470 + }, + { + "epoch": 1.6000872261755492, + "grad_norm": 0.0976705327630043, + "learning_rate": 1.1692661594981502e-06, + "loss": 0.0011, + "step": 249480 + }, + { + "epoch": 1.6001513630693351, + "grad_norm": 0.03639264777302742, + "learning_rate": 1.1689064813702295e-06, + "loss": 0.0014, + "step": 249490 + }, + { + "epoch": 1.6002154999631213, + "grad_norm": 0.007496272213757038, + "learning_rate": 1.1685468512486481e-06, + "loss": 0.0008, + "step": 249500 + }, + { + "epoch": 1.6002796368569072, + "grad_norm": 0.041140880435705185, + "learning_rate": 1.1681872691379132e-06, + "loss": 0.0022, + "step": 249510 + }, + { + "epoch": 1.6003437737506934, + "grad_norm": 0.16042396426200867, + "learning_rate": 1.1678277350425293e-06, + "loss": 0.0077, + "step": 249520 + }, + { + "epoch": 1.6004079106444795, + "grad_norm": 0.0523374006152153, + "learning_rate": 1.1674682489670036e-06, + "loss": 0.0015, + "step": 249530 + }, + { + "epoch": 1.6004720475382657, + "grad_norm": 0.07310651242733002, + "learning_rate": 1.1671088109158402e-06, + "loss": 0.0009, + "step": 249540 + }, + { + "epoch": 1.6005361844320518, + "grad_norm": 0.04398871958255768, + "learning_rate": 1.1667494208935426e-06, + "loss": 0.0018, + "step": 249550 + }, + { + "epoch": 1.600600321325838, + "grad_norm": 0.06598342210054398, + "learning_rate": 1.166390078904613e-06, + "loss": 0.0009, + "step": 249560 + }, + { + "epoch": 1.600664458219624, + "grad_norm": 0.0393049456179142, + "learning_rate": 1.1660307849535569e-06, + "loss": 0.0006, + "step": 249570 + }, + { + "epoch": 1.60072859511341, + "grad_norm": 0.10699399560689926, + "learning_rate": 1.1656715390448746e-06, + "loss": 0.0013, + "step": 249580 + }, + { + "epoch": 1.6007927320071962, + "grad_norm": 0.04361341521143913, + "learning_rate": 1.165312341183069e-06, + "loss": 0.0014, + "step": 249590 + }, + { + "epoch": 1.600856868900982, + "grad_norm": 0.0037963781505823135, + "learning_rate": 1.1649531913726392e-06, + "loss": 0.0013, + "step": 249600 + }, + { + "epoch": 1.6009210057947683, + "grad_norm": 0.003534812480211258, + "learning_rate": 1.1645940896180874e-06, + "loss": 0.0007, + "step": 249610 + }, + { + "epoch": 1.6009851426885544, + "grad_norm": 0.0005362582160159945, + "learning_rate": 1.1642350359239136e-06, + "loss": 0.0006, + "step": 249620 + }, + { + "epoch": 1.6010492795823406, + "grad_norm": 0.11060915887355804, + "learning_rate": 1.163876030294614e-06, + "loss": 0.001, + "step": 249630 + }, + { + "epoch": 1.6011134164761267, + "grad_norm": 0.03232883661985397, + "learning_rate": 1.1635170727346912e-06, + "loss": 0.0006, + "step": 249640 + }, + { + "epoch": 1.6011775533699129, + "grad_norm": 0.040262069553136826, + "learning_rate": 1.1631581632486411e-06, + "loss": 0.0014, + "step": 249650 + }, + { + "epoch": 1.601241690263699, + "grad_norm": 0.045741088688373566, + "learning_rate": 1.1627993018409616e-06, + "loss": 0.0009, + "step": 249660 + }, + { + "epoch": 1.601305827157485, + "grad_norm": 0.03924969211220741, + "learning_rate": 1.1624404885161478e-06, + "loss": 0.0004, + "step": 249670 + }, + { + "epoch": 1.601369964051271, + "grad_norm": 0.038819339126348495, + "learning_rate": 1.1620817232786986e-06, + "loss": 0.0003, + "step": 249680 + }, + { + "epoch": 1.601434100945057, + "grad_norm": 0.046807948499917984, + "learning_rate": 1.1617230061331085e-06, + "loss": 0.0006, + "step": 249690 + }, + { + "epoch": 1.6014982378388432, + "grad_norm": 0.014780609868466854, + "learning_rate": 1.1613643370838723e-06, + "loss": 0.0039, + "step": 249700 + }, + { + "epoch": 1.6015623747326293, + "grad_norm": 0.0041016568429768085, + "learning_rate": 1.1610057161354827e-06, + "loss": 0.001, + "step": 249710 + }, + { + "epoch": 1.6016265116264154, + "grad_norm": 0.09573370218276978, + "learning_rate": 1.1606471432924353e-06, + "loss": 0.0022, + "step": 249720 + }, + { + "epoch": 1.6016906485202016, + "grad_norm": 0.016817208379507065, + "learning_rate": 1.1602886185592248e-06, + "loss": 0.0004, + "step": 249730 + }, + { + "epoch": 1.6017547854139877, + "grad_norm": 0.048654478043317795, + "learning_rate": 1.159930141940342e-06, + "loss": 0.0015, + "step": 249740 + }, + { + "epoch": 1.6018189223077737, + "grad_norm": 0.07242769747972488, + "learning_rate": 1.1595717134402778e-06, + "loss": 0.0029, + "step": 249750 + }, + { + "epoch": 1.6018830592015598, + "grad_norm": 0.01238231360912323, + "learning_rate": 1.1592133330635258e-06, + "loss": 0.0008, + "step": 249760 + }, + { + "epoch": 1.6019471960953457, + "grad_norm": 0.04801921918988228, + "learning_rate": 1.1588550008145756e-06, + "loss": 0.0008, + "step": 249770 + }, + { + "epoch": 1.602011332989132, + "grad_norm": 0.10079651325941086, + "learning_rate": 1.1584967166979178e-06, + "loss": 0.0008, + "step": 249780 + }, + { + "epoch": 1.602075469882918, + "grad_norm": 0.1167251393198967, + "learning_rate": 1.1581384807180402e-06, + "loss": 0.0022, + "step": 249790 + }, + { + "epoch": 1.6021396067767042, + "grad_norm": 0.1415635645389557, + "learning_rate": 1.1577802928794341e-06, + "loss": 0.0008, + "step": 249800 + }, + { + "epoch": 1.6022037436704903, + "grad_norm": 0.04449000209569931, + "learning_rate": 1.1574221531865875e-06, + "loss": 0.0008, + "step": 249810 + }, + { + "epoch": 1.6022678805642765, + "grad_norm": 0.2547750174999237, + "learning_rate": 1.1570640616439872e-06, + "loss": 0.0031, + "step": 249820 + }, + { + "epoch": 1.6023320174580626, + "grad_norm": 0.05963004752993584, + "learning_rate": 1.1567060182561196e-06, + "loss": 0.0005, + "step": 249830 + }, + { + "epoch": 1.6023961543518486, + "grad_norm": 0.171538308262825, + "learning_rate": 1.1563480230274738e-06, + "loss": 0.0012, + "step": 249840 + }, + { + "epoch": 1.6024602912456347, + "grad_norm": 0.17668305337429047, + "learning_rate": 1.1559900759625336e-06, + "loss": 0.0013, + "step": 249850 + }, + { + "epoch": 1.6025244281394206, + "grad_norm": 0.01708364672958851, + "learning_rate": 1.1556321770657858e-06, + "loss": 0.002, + "step": 249860 + }, + { + "epoch": 1.6025885650332068, + "grad_norm": 0.36142441630363464, + "learning_rate": 1.1552743263417126e-06, + "loss": 0.0008, + "step": 249870 + }, + { + "epoch": 1.602652701926993, + "grad_norm": 0.048225026577711105, + "learning_rate": 1.1549165237948011e-06, + "loss": 0.0008, + "step": 249880 + }, + { + "epoch": 1.602716838820779, + "grad_norm": 0.0728774294257164, + "learning_rate": 1.154558769429534e-06, + "loss": 0.0023, + "step": 249890 + }, + { + "epoch": 1.6027809757145652, + "grad_norm": 0.08327235281467438, + "learning_rate": 1.1542010632503925e-06, + "loss": 0.0008, + "step": 249900 + }, + { + "epoch": 1.6028451126083514, + "grad_norm": 0.18347333371639252, + "learning_rate": 1.1538434052618614e-06, + "loss": 0.0017, + "step": 249910 + }, + { + "epoch": 1.6029092495021373, + "grad_norm": 0.008992303162813187, + "learning_rate": 1.153485795468421e-06, + "loss": 0.0007, + "step": 249920 + }, + { + "epoch": 1.6029733863959235, + "grad_norm": 0.06550116837024689, + "learning_rate": 1.153128233874553e-06, + "loss": 0.0019, + "step": 249930 + }, + { + "epoch": 1.6030375232897096, + "grad_norm": 0.07155051827430725, + "learning_rate": 1.152770720484736e-06, + "loss": 0.0008, + "step": 249940 + }, + { + "epoch": 1.6031016601834955, + "grad_norm": 0.08370451629161835, + "learning_rate": 1.1524132553034528e-06, + "loss": 0.0008, + "step": 249950 + }, + { + "epoch": 1.6031657970772817, + "grad_norm": 0.07363441586494446, + "learning_rate": 1.1520558383351816e-06, + "loss": 0.0009, + "step": 249960 + }, + { + "epoch": 1.6032299339710678, + "grad_norm": 0.060655880719423294, + "learning_rate": 1.1516984695844002e-06, + "loss": 0.0003, + "step": 249970 + }, + { + "epoch": 1.603294070864854, + "grad_norm": 0.02170303836464882, + "learning_rate": 1.1513411490555865e-06, + "loss": 0.0013, + "step": 249980 + }, + { + "epoch": 1.6033582077586401, + "grad_norm": 0.2270852029323578, + "learning_rate": 1.15098387675322e-06, + "loss": 0.0008, + "step": 249990 + }, + { + "epoch": 1.6034223446524263, + "grad_norm": 0.06730823218822479, + "learning_rate": 1.1506266526817766e-06, + "loss": 0.0011, + "step": 250000 + }, + { + "epoch": 1.6034864815462122, + "grad_norm": 0.004074290860444307, + "learning_rate": 1.1502694768457317e-06, + "loss": 0.0011, + "step": 250010 + }, + { + "epoch": 1.6035506184399984, + "grad_norm": 0.09978527575731277, + "learning_rate": 1.1499123492495607e-06, + "loss": 0.0003, + "step": 250020 + }, + { + "epoch": 1.6036147553337843, + "grad_norm": 0.035276979207992554, + "learning_rate": 1.1495552698977414e-06, + "loss": 0.0007, + "step": 250030 + }, + { + "epoch": 1.6036788922275704, + "grad_norm": 0.13160935044288635, + "learning_rate": 1.149198238794746e-06, + "loss": 0.0009, + "step": 250040 + }, + { + "epoch": 1.6037430291213566, + "grad_norm": 0.1706625372171402, + "learning_rate": 1.1488412559450473e-06, + "loss": 0.0008, + "step": 250050 + }, + { + "epoch": 1.6038071660151427, + "grad_norm": 0.10292987525463104, + "learning_rate": 1.1484843213531216e-06, + "loss": 0.0013, + "step": 250060 + }, + { + "epoch": 1.6038713029089289, + "grad_norm": 0.13077609241008759, + "learning_rate": 1.1481274350234395e-06, + "loss": 0.0011, + "step": 250070 + }, + { + "epoch": 1.603935439802715, + "grad_norm": 0.06850180774927139, + "learning_rate": 1.147770596960474e-06, + "loss": 0.0037, + "step": 250080 + }, + { + "epoch": 1.6039995766965012, + "grad_norm": 0.04928059130907059, + "learning_rate": 1.1474138071686947e-06, + "loss": 0.0012, + "step": 250090 + }, + { + "epoch": 1.604063713590287, + "grad_norm": 0.04577067866921425, + "learning_rate": 1.1470570656525754e-06, + "loss": 0.001, + "step": 250100 + }, + { + "epoch": 1.6041278504840732, + "grad_norm": 0.14815518260002136, + "learning_rate": 1.1467003724165842e-06, + "loss": 0.0007, + "step": 250110 + }, + { + "epoch": 1.6041919873778592, + "grad_norm": 0.06266386806964874, + "learning_rate": 1.1463437274651918e-06, + "loss": 0.001, + "step": 250120 + }, + { + "epoch": 1.6042561242716453, + "grad_norm": 0.002447890816256404, + "learning_rate": 1.1459871308028647e-06, + "loss": 0.0011, + "step": 250130 + }, + { + "epoch": 1.6043202611654315, + "grad_norm": 0.12594091892242432, + "learning_rate": 1.1456305824340746e-06, + "loss": 0.0009, + "step": 250140 + }, + { + "epoch": 1.6043843980592176, + "grad_norm": 0.06531865894794464, + "learning_rate": 1.1452740823632885e-06, + "loss": 0.001, + "step": 250150 + }, + { + "epoch": 1.6044485349530038, + "grad_norm": 0.07496868818998337, + "learning_rate": 1.1449176305949717e-06, + "loss": 0.001, + "step": 250160 + }, + { + "epoch": 1.60451267184679, + "grad_norm": 0.08994642645120621, + "learning_rate": 1.1445612271335933e-06, + "loss": 0.0014, + "step": 250170 + }, + { + "epoch": 1.6045768087405758, + "grad_norm": 0.17000466585159302, + "learning_rate": 1.144204871983618e-06, + "loss": 0.0008, + "step": 250180 + }, + { + "epoch": 1.604640945634362, + "grad_norm": 0.08831164985895157, + "learning_rate": 1.1438485651495117e-06, + "loss": 0.0007, + "step": 250190 + }, + { + "epoch": 1.604705082528148, + "grad_norm": 0.0376860573887825, + "learning_rate": 1.1434923066357374e-06, + "loss": 0.0006, + "step": 250200 + }, + { + "epoch": 1.604769219421934, + "grad_norm": 0.047595515847206116, + "learning_rate": 1.143136096446762e-06, + "loss": 0.002, + "step": 250210 + }, + { + "epoch": 1.6048333563157202, + "grad_norm": 0.1046251505613327, + "learning_rate": 1.1427799345870478e-06, + "loss": 0.001, + "step": 250220 + }, + { + "epoch": 1.6048974932095064, + "grad_norm": 0.03919575735926628, + "learning_rate": 1.1424238210610577e-06, + "loss": 0.0009, + "step": 250230 + }, + { + "epoch": 1.6049616301032925, + "grad_norm": 0.09882248193025589, + "learning_rate": 1.1420677558732523e-06, + "loss": 0.0017, + "step": 250240 + }, + { + "epoch": 1.6050257669970787, + "grad_norm": 0.060251470655202866, + "learning_rate": 1.1417117390280975e-06, + "loss": 0.0008, + "step": 250250 + }, + { + "epoch": 1.6050899038908648, + "grad_norm": 0.09244880825281143, + "learning_rate": 1.141355770530051e-06, + "loss": 0.0013, + "step": 250260 + }, + { + "epoch": 1.6051540407846507, + "grad_norm": 0.005103874485939741, + "learning_rate": 1.1409998503835751e-06, + "loss": 0.0013, + "step": 250270 + }, + { + "epoch": 1.6052181776784369, + "grad_norm": 0.06546703726053238, + "learning_rate": 1.1406439785931272e-06, + "loss": 0.0006, + "step": 250280 + }, + { + "epoch": 1.6052823145722228, + "grad_norm": 0.008968470618128777, + "learning_rate": 1.1402881551631706e-06, + "loss": 0.0007, + "step": 250290 + }, + { + "epoch": 1.605346451466009, + "grad_norm": 0.16478286683559418, + "learning_rate": 1.1399323800981616e-06, + "loss": 0.0028, + "step": 250300 + }, + { + "epoch": 1.605410588359795, + "grad_norm": 0.10185612738132477, + "learning_rate": 1.139576653402557e-06, + "loss": 0.0006, + "step": 250310 + }, + { + "epoch": 1.6054747252535813, + "grad_norm": 0.07352697849273682, + "learning_rate": 1.1392209750808175e-06, + "loss": 0.0006, + "step": 250320 + }, + { + "epoch": 1.6055388621473674, + "grad_norm": 0.006930564530193806, + "learning_rate": 1.1388653451373983e-06, + "loss": 0.0007, + "step": 250330 + }, + { + "epoch": 1.6056029990411536, + "grad_norm": 0.058381129056215286, + "learning_rate": 1.138509763576756e-06, + "loss": 0.0008, + "step": 250340 + }, + { + "epoch": 1.6056671359349395, + "grad_norm": 0.01590009219944477, + "learning_rate": 1.1381542304033443e-06, + "loss": 0.0008, + "step": 250350 + }, + { + "epoch": 1.6057312728287256, + "grad_norm": 0.07030168175697327, + "learning_rate": 1.1377987456216222e-06, + "loss": 0.0014, + "step": 250360 + }, + { + "epoch": 1.6057954097225118, + "grad_norm": 0.0982590839266777, + "learning_rate": 1.1374433092360416e-06, + "loss": 0.0017, + "step": 250370 + }, + { + "epoch": 1.6058595466162977, + "grad_norm": 0.17898105084896088, + "learning_rate": 1.1370879212510572e-06, + "loss": 0.0011, + "step": 250380 + }, + { + "epoch": 1.6059236835100839, + "grad_norm": 0.38064634799957275, + "learning_rate": 1.1367325816711205e-06, + "loss": 0.0016, + "step": 250390 + }, + { + "epoch": 1.60598782040387, + "grad_norm": 0.05069749429821968, + "learning_rate": 1.1363772905006865e-06, + "loss": 0.0008, + "step": 250400 + }, + { + "epoch": 1.6060519572976562, + "grad_norm": 0.4793483316898346, + "learning_rate": 1.1360220477442068e-06, + "loss": 0.0012, + "step": 250410 + }, + { + "epoch": 1.6061160941914423, + "grad_norm": 0.0342763252556324, + "learning_rate": 1.135666853406132e-06, + "loss": 0.0008, + "step": 250420 + }, + { + "epoch": 1.6061802310852284, + "grad_norm": 0.10136108845472336, + "learning_rate": 1.1353117074909125e-06, + "loss": 0.0007, + "step": 250430 + }, + { + "epoch": 1.6062443679790144, + "grad_norm": 0.0018044215394183993, + "learning_rate": 1.1349566100030007e-06, + "loss": 0.0012, + "step": 250440 + }, + { + "epoch": 1.6063085048728005, + "grad_norm": 0.1991158425807953, + "learning_rate": 1.1346015609468446e-06, + "loss": 0.0011, + "step": 250450 + }, + { + "epoch": 1.6063726417665865, + "grad_norm": 0.002864201320335269, + "learning_rate": 1.134246560326892e-06, + "loss": 0.0009, + "step": 250460 + }, + { + "epoch": 1.6064367786603726, + "grad_norm": 0.022196555510163307, + "learning_rate": 1.1338916081475943e-06, + "loss": 0.0008, + "step": 250470 + }, + { + "epoch": 1.6065009155541587, + "grad_norm": 0.1637929528951645, + "learning_rate": 1.1335367044133965e-06, + "loss": 0.001, + "step": 250480 + }, + { + "epoch": 1.606565052447945, + "grad_norm": 0.20712658762931824, + "learning_rate": 1.1331818491287483e-06, + "loss": 0.0023, + "step": 250490 + }, + { + "epoch": 1.606629189341731, + "grad_norm": 0.016757261008024216, + "learning_rate": 1.1328270422980953e-06, + "loss": 0.0011, + "step": 250500 + }, + { + "epoch": 1.6066933262355172, + "grad_norm": 0.0070112538523972034, + "learning_rate": 1.1324722839258823e-06, + "loss": 0.001, + "step": 250510 + }, + { + "epoch": 1.6067574631293033, + "grad_norm": 0.02961020916700363, + "learning_rate": 1.1321175740165563e-06, + "loss": 0.0007, + "step": 250520 + }, + { + "epoch": 1.6068216000230893, + "grad_norm": 0.09048043936491013, + "learning_rate": 1.1317629125745616e-06, + "loss": 0.0008, + "step": 250530 + }, + { + "epoch": 1.6068857369168754, + "grad_norm": 0.02654772624373436, + "learning_rate": 1.1314082996043424e-06, + "loss": 0.0007, + "step": 250540 + }, + { + "epoch": 1.6069498738106613, + "grad_norm": 0.050176508724689484, + "learning_rate": 1.1310537351103407e-06, + "loss": 0.0005, + "step": 250550 + }, + { + "epoch": 1.6070140107044475, + "grad_norm": 0.08712086081504822, + "learning_rate": 1.1306992190970023e-06, + "loss": 0.0013, + "step": 250560 + }, + { + "epoch": 1.6070781475982336, + "grad_norm": 0.019818203523755074, + "learning_rate": 1.1303447515687676e-06, + "loss": 0.0009, + "step": 250570 + }, + { + "epoch": 1.6071422844920198, + "grad_norm": 0.03647455945611, + "learning_rate": 1.1299903325300775e-06, + "loss": 0.0008, + "step": 250580 + }, + { + "epoch": 1.607206421385806, + "grad_norm": 0.030462345108389854, + "learning_rate": 1.129635961985376e-06, + "loss": 0.0024, + "step": 250590 + }, + { + "epoch": 1.607270558279592, + "grad_norm": 0.004395130090415478, + "learning_rate": 1.1292816399391022e-06, + "loss": 0.0016, + "step": 250600 + }, + { + "epoch": 1.607334695173378, + "grad_norm": 0.02232508361339569, + "learning_rate": 1.1289273663956951e-06, + "loss": 0.0008, + "step": 250610 + }, + { + "epoch": 1.6073988320671642, + "grad_norm": 0.018504155799746513, + "learning_rate": 1.128573141359594e-06, + "loss": 0.0009, + "step": 250620 + }, + { + "epoch": 1.60746296896095, + "grad_norm": 0.08404509723186493, + "learning_rate": 1.1282189648352393e-06, + "loss": 0.0006, + "step": 250630 + }, + { + "epoch": 1.6075271058547362, + "grad_norm": 0.04173661395907402, + "learning_rate": 1.1278648368270683e-06, + "loss": 0.0011, + "step": 250640 + }, + { + "epoch": 1.6075912427485224, + "grad_norm": 0.064873106777668, + "learning_rate": 1.1275107573395183e-06, + "loss": 0.001, + "step": 250650 + }, + { + "epoch": 1.6076553796423085, + "grad_norm": 0.0069096386432647705, + "learning_rate": 1.1271567263770244e-06, + "loss": 0.0006, + "step": 250660 + }, + { + "epoch": 1.6077195165360947, + "grad_norm": 0.08207154273986816, + "learning_rate": 1.126802743944026e-06, + "loss": 0.0078, + "step": 250670 + }, + { + "epoch": 1.6077836534298808, + "grad_norm": 0.09320911765098572, + "learning_rate": 1.1264488100449577e-06, + "loss": 0.0014, + "step": 250680 + }, + { + "epoch": 1.607847790323667, + "grad_norm": 0.043957896530628204, + "learning_rate": 1.1260949246842539e-06, + "loss": 0.0011, + "step": 250690 + }, + { + "epoch": 1.607911927217453, + "grad_norm": 0.055686213076114655, + "learning_rate": 1.1257410878663482e-06, + "loss": 0.0008, + "step": 250700 + }, + { + "epoch": 1.607976064111239, + "grad_norm": 0.027599196881055832, + "learning_rate": 1.1253872995956772e-06, + "loss": 0.0005, + "step": 250710 + }, + { + "epoch": 1.608040201005025, + "grad_norm": 0.03367011249065399, + "learning_rate": 1.125033559876672e-06, + "loss": 0.0008, + "step": 250720 + }, + { + "epoch": 1.6081043378988111, + "grad_norm": 0.09349153935909271, + "learning_rate": 1.1246798687137644e-06, + "loss": 0.0017, + "step": 250730 + }, + { + "epoch": 1.6081684747925973, + "grad_norm": 0.024527819827198982, + "learning_rate": 1.124326226111389e-06, + "loss": 0.0012, + "step": 250740 + }, + { + "epoch": 1.6082326116863834, + "grad_norm": 0.032239992171525955, + "learning_rate": 1.1239726320739758e-06, + "loss": 0.0011, + "step": 250750 + }, + { + "epoch": 1.6082967485801696, + "grad_norm": 0.11960357427597046, + "learning_rate": 1.123619086605956e-06, + "loss": 0.002, + "step": 250760 + }, + { + "epoch": 1.6083608854739557, + "grad_norm": 0.016015082597732544, + "learning_rate": 1.1232655897117579e-06, + "loss": 0.0014, + "step": 250770 + }, + { + "epoch": 1.6084250223677417, + "grad_norm": 0.06744474917650223, + "learning_rate": 1.1229121413958144e-06, + "loss": 0.0005, + "step": 250780 + }, + { + "epoch": 1.6084891592615278, + "grad_norm": 0.035480957478284836, + "learning_rate": 1.1225587416625521e-06, + "loss": 0.0014, + "step": 250790 + }, + { + "epoch": 1.608553296155314, + "grad_norm": 0.07199794054031372, + "learning_rate": 1.1222053905163999e-06, + "loss": 0.0013, + "step": 250800 + }, + { + "epoch": 1.6086174330490999, + "grad_norm": 0.09330529719591141, + "learning_rate": 1.1218520879617845e-06, + "loss": 0.003, + "step": 250810 + }, + { + "epoch": 1.608681569942886, + "grad_norm": 0.06037352606654167, + "learning_rate": 1.1214988340031357e-06, + "loss": 0.0007, + "step": 250820 + }, + { + "epoch": 1.6087457068366722, + "grad_norm": 0.10916303098201752, + "learning_rate": 1.121145628644878e-06, + "loss": 0.0009, + "step": 250830 + }, + { + "epoch": 1.6088098437304583, + "grad_norm": 0.05477078631520271, + "learning_rate": 1.120792471891438e-06, + "loss": 0.0017, + "step": 250840 + }, + { + "epoch": 1.6088739806242445, + "grad_norm": 0.14304757118225098, + "learning_rate": 1.1204393637472394e-06, + "loss": 0.0037, + "step": 250850 + }, + { + "epoch": 1.6089381175180306, + "grad_norm": 0.08332877606153488, + "learning_rate": 1.1200863042167093e-06, + "loss": 0.0012, + "step": 250860 + }, + { + "epoch": 1.6090022544118165, + "grad_norm": 0.04952731728553772, + "learning_rate": 1.119733293304271e-06, + "loss": 0.0014, + "step": 250870 + }, + { + "epoch": 1.6090663913056027, + "grad_norm": 0.039637889713048935, + "learning_rate": 1.1193803310143463e-06, + "loss": 0.0016, + "step": 250880 + }, + { + "epoch": 1.6091305281993886, + "grad_norm": 0.0378977470099926, + "learning_rate": 1.119027417351361e-06, + "loss": 0.0008, + "step": 250890 + }, + { + "epoch": 1.6091946650931748, + "grad_norm": 0.05773506313562393, + "learning_rate": 1.1186745523197357e-06, + "loss": 0.0014, + "step": 250900 + }, + { + "epoch": 1.609258801986961, + "grad_norm": 0.11306332051753998, + "learning_rate": 1.1183217359238924e-06, + "loss": 0.0009, + "step": 250910 + }, + { + "epoch": 1.609322938880747, + "grad_norm": 0.11664940416812897, + "learning_rate": 1.1179689681682504e-06, + "loss": 0.001, + "step": 250920 + }, + { + "epoch": 1.6093870757745332, + "grad_norm": 0.06874285638332367, + "learning_rate": 1.1176162490572328e-06, + "loss": 0.0024, + "step": 250930 + }, + { + "epoch": 1.6094512126683194, + "grad_norm": 0.007244887761771679, + "learning_rate": 1.1172635785952584e-06, + "loss": 0.0014, + "step": 250940 + }, + { + "epoch": 1.6095153495621055, + "grad_norm": 0.08755049109458923, + "learning_rate": 1.116910956786747e-06, + "loss": 0.0016, + "step": 250950 + }, + { + "epoch": 1.6095794864558914, + "grad_norm": 0.11688628792762756, + "learning_rate": 1.1165583836361143e-06, + "loss": 0.0011, + "step": 250960 + }, + { + "epoch": 1.6096436233496776, + "grad_norm": 0.03535597771406174, + "learning_rate": 1.116205859147782e-06, + "loss": 0.0023, + "step": 250970 + }, + { + "epoch": 1.6097077602434635, + "grad_norm": 0.06629591435194016, + "learning_rate": 1.1158533833261665e-06, + "loss": 0.0011, + "step": 250980 + }, + { + "epoch": 1.6097718971372497, + "grad_norm": 0.03955451771616936, + "learning_rate": 1.1155009561756836e-06, + "loss": 0.0015, + "step": 250990 + }, + { + "epoch": 1.6098360340310358, + "grad_norm": 0.08270388096570969, + "learning_rate": 1.1151485777007487e-06, + "loss": 0.0008, + "step": 251000 + }, + { + "epoch": 1.609900170924822, + "grad_norm": 0.10194525867700577, + "learning_rate": 1.11479624790578e-06, + "loss": 0.0016, + "step": 251010 + }, + { + "epoch": 1.609964307818608, + "grad_norm": 0.08749617636203766, + "learning_rate": 1.1144439667951906e-06, + "loss": 0.0012, + "step": 251020 + }, + { + "epoch": 1.6100284447123943, + "grad_norm": 0.052722033113241196, + "learning_rate": 1.1140917343733943e-06, + "loss": 0.0011, + "step": 251030 + }, + { + "epoch": 1.6100925816061802, + "grad_norm": 0.05054409056901932, + "learning_rate": 1.1137395506448074e-06, + "loss": 0.0004, + "step": 251040 + }, + { + "epoch": 1.6101567184999663, + "grad_norm": 0.021686574444174767, + "learning_rate": 1.1133874156138407e-06, + "loss": 0.0006, + "step": 251050 + }, + { + "epoch": 1.6102208553937523, + "grad_norm": 0.017417414113879204, + "learning_rate": 1.1130353292849083e-06, + "loss": 0.0007, + "step": 251060 + }, + { + "epoch": 1.6102849922875384, + "grad_norm": 0.003439707215875387, + "learning_rate": 1.1126832916624192e-06, + "loss": 0.001, + "step": 251070 + }, + { + "epoch": 1.6103491291813246, + "grad_norm": 0.023953434079885483, + "learning_rate": 1.1123313027507882e-06, + "loss": 0.0009, + "step": 251080 + }, + { + "epoch": 1.6104132660751107, + "grad_norm": 0.01200141292065382, + "learning_rate": 1.1119793625544246e-06, + "loss": 0.0006, + "step": 251090 + }, + { + "epoch": 1.6104774029688969, + "grad_norm": 0.10512842237949371, + "learning_rate": 1.111627471077738e-06, + "loss": 0.0012, + "step": 251100 + }, + { + "epoch": 1.610541539862683, + "grad_norm": 0.10881675034761429, + "learning_rate": 1.111275628325137e-06, + "loss": 0.0014, + "step": 251110 + }, + { + "epoch": 1.6106056767564692, + "grad_norm": 0.11241288483142853, + "learning_rate": 1.1109238343010326e-06, + "loss": 0.0016, + "step": 251120 + }, + { + "epoch": 1.610669813650255, + "grad_norm": 0.10109782963991165, + "learning_rate": 1.1105720890098327e-06, + "loss": 0.0016, + "step": 251130 + }, + { + "epoch": 1.6107339505440412, + "grad_norm": 0.22030378878116608, + "learning_rate": 1.1102203924559441e-06, + "loss": 0.0008, + "step": 251140 + }, + { + "epoch": 1.6107980874378272, + "grad_norm": 0.0337819904088974, + "learning_rate": 1.1098687446437722e-06, + "loss": 0.001, + "step": 251150 + }, + { + "epoch": 1.6108622243316133, + "grad_norm": 0.01923406682908535, + "learning_rate": 1.1095171455777264e-06, + "loss": 0.0014, + "step": 251160 + }, + { + "epoch": 1.6109263612253995, + "grad_norm": 0.08116120845079422, + "learning_rate": 1.109165595262212e-06, + "loss": 0.0007, + "step": 251170 + }, + { + "epoch": 1.6109904981191856, + "grad_norm": 0.014466686174273491, + "learning_rate": 1.1088140937016318e-06, + "loss": 0.0008, + "step": 251180 + }, + { + "epoch": 1.6110546350129717, + "grad_norm": 0.052190881222486496, + "learning_rate": 1.108462640900393e-06, + "loss": 0.0013, + "step": 251190 + }, + { + "epoch": 1.611118771906758, + "grad_norm": 0.11071562767028809, + "learning_rate": 1.1081112368628989e-06, + "loss": 0.0034, + "step": 251200 + }, + { + "epoch": 1.611182908800544, + "grad_norm": 0.015977121889591217, + "learning_rate": 1.107759881593552e-06, + "loss": 0.0006, + "step": 251210 + }, + { + "epoch": 1.61124704569433, + "grad_norm": 0.10085508227348328, + "learning_rate": 1.1074085750967545e-06, + "loss": 0.0014, + "step": 251220 + }, + { + "epoch": 1.6113111825881161, + "grad_norm": 0.11096709966659546, + "learning_rate": 1.107057317376911e-06, + "loss": 0.001, + "step": 251230 + }, + { + "epoch": 1.611375319481902, + "grad_norm": 0.04569392651319504, + "learning_rate": 1.1067061084384218e-06, + "loss": 0.0012, + "step": 251240 + }, + { + "epoch": 1.6114394563756882, + "grad_norm": 0.10049768537282944, + "learning_rate": 1.1063549482856855e-06, + "loss": 0.0011, + "step": 251250 + }, + { + "epoch": 1.6115035932694743, + "grad_norm": 0.09229201078414917, + "learning_rate": 1.1060038369231063e-06, + "loss": 0.002, + "step": 251260 + }, + { + "epoch": 1.6115677301632605, + "grad_norm": 0.030462343245744705, + "learning_rate": 1.1056527743550805e-06, + "loss": 0.0016, + "step": 251270 + }, + { + "epoch": 1.6116318670570466, + "grad_norm": 0.05826370045542717, + "learning_rate": 1.10530176058601e-06, + "loss": 0.001, + "step": 251280 + }, + { + "epoch": 1.6116960039508328, + "grad_norm": 0.07355988025665283, + "learning_rate": 1.104950795620292e-06, + "loss": 0.0007, + "step": 251290 + }, + { + "epoch": 1.6117601408446187, + "grad_norm": 0.11956194043159485, + "learning_rate": 1.1045998794623231e-06, + "loss": 0.0017, + "step": 251300 + }, + { + "epoch": 1.6118242777384049, + "grad_norm": 0.02855396270751953, + "learning_rate": 1.1042490121165033e-06, + "loss": 0.0009, + "step": 251310 + }, + { + "epoch": 1.6118884146321908, + "grad_norm": 0.010830280371010303, + "learning_rate": 1.1038981935872272e-06, + "loss": 0.0013, + "step": 251320 + }, + { + "epoch": 1.611952551525977, + "grad_norm": 0.06761819124221802, + "learning_rate": 1.1035474238788912e-06, + "loss": 0.0012, + "step": 251330 + }, + { + "epoch": 1.612016688419763, + "grad_norm": 0.02414173260331154, + "learning_rate": 1.1031967029958897e-06, + "loss": 0.0006, + "step": 251340 + }, + { + "epoch": 1.6120808253135492, + "grad_norm": 0.0437890999019146, + "learning_rate": 1.1028460309426193e-06, + "loss": 0.001, + "step": 251350 + }, + { + "epoch": 1.6121449622073354, + "grad_norm": 0.0029892411548644304, + "learning_rate": 1.102495407723474e-06, + "loss": 0.0027, + "step": 251360 + }, + { + "epoch": 1.6122090991011215, + "grad_norm": 0.0340309739112854, + "learning_rate": 1.1021448333428464e-06, + "loss": 0.0016, + "step": 251370 + }, + { + "epoch": 1.6122732359949077, + "grad_norm": 0.09791620820760727, + "learning_rate": 1.1017943078051285e-06, + "loss": 0.0006, + "step": 251380 + }, + { + "epoch": 1.6123373728886936, + "grad_norm": 0.09915640205144882, + "learning_rate": 1.1014438311147152e-06, + "loss": 0.0016, + "step": 251390 + }, + { + "epoch": 1.6124015097824798, + "grad_norm": 0.1071750596165657, + "learning_rate": 1.1010934032759968e-06, + "loss": 0.001, + "step": 251400 + }, + { + "epoch": 1.6124656466762657, + "grad_norm": 0.1516011655330658, + "learning_rate": 1.1007430242933653e-06, + "loss": 0.0036, + "step": 251410 + }, + { + "epoch": 1.6125297835700518, + "grad_norm": 0.09297031909227371, + "learning_rate": 1.1003926941712085e-06, + "loss": 0.0008, + "step": 251420 + }, + { + "epoch": 1.612593920463838, + "grad_norm": 0.22856420278549194, + "learning_rate": 1.1000424129139197e-06, + "loss": 0.0013, + "step": 251430 + }, + { + "epoch": 1.6126580573576241, + "grad_norm": 0.1917046159505844, + "learning_rate": 1.0996921805258864e-06, + "loss": 0.0023, + "step": 251440 + }, + { + "epoch": 1.6127221942514103, + "grad_norm": 0.05130043625831604, + "learning_rate": 1.0993419970114966e-06, + "loss": 0.0015, + "step": 251450 + }, + { + "epoch": 1.6127863311451964, + "grad_norm": 0.01813359372317791, + "learning_rate": 1.098991862375141e-06, + "loss": 0.0019, + "step": 251460 + }, + { + "epoch": 1.6128504680389824, + "grad_norm": 0.01646178402006626, + "learning_rate": 1.098641776621205e-06, + "loss": 0.0012, + "step": 251470 + }, + { + "epoch": 1.6129146049327685, + "grad_norm": 0.2576580345630646, + "learning_rate": 1.098291739754076e-06, + "loss": 0.0007, + "step": 251480 + }, + { + "epoch": 1.6129787418265547, + "grad_norm": 0.0731213241815567, + "learning_rate": 1.0979417517781383e-06, + "loss": 0.001, + "step": 251490 + }, + { + "epoch": 1.6130428787203406, + "grad_norm": 0.00965266302227974, + "learning_rate": 1.097591812697781e-06, + "loss": 0.0015, + "step": 251500 + }, + { + "epoch": 1.6131070156141267, + "grad_norm": 0.075978122651577, + "learning_rate": 1.0972419225173869e-06, + "loss": 0.001, + "step": 251510 + }, + { + "epoch": 1.6131711525079129, + "grad_norm": 0.05839274823665619, + "learning_rate": 1.0968920812413409e-06, + "loss": 0.0008, + "step": 251520 + }, + { + "epoch": 1.613235289401699, + "grad_norm": 0.12388291209936142, + "learning_rate": 1.0965422888740252e-06, + "loss": 0.0016, + "step": 251530 + }, + { + "epoch": 1.6132994262954852, + "grad_norm": 0.06515951454639435, + "learning_rate": 1.0961925454198257e-06, + "loss": 0.001, + "step": 251540 + }, + { + "epoch": 1.6133635631892713, + "grad_norm": 0.04512935131788254, + "learning_rate": 1.0958428508831237e-06, + "loss": 0.0005, + "step": 251550 + }, + { + "epoch": 1.6134277000830572, + "grad_norm": 0.008951231837272644, + "learning_rate": 1.0954932052683014e-06, + "loss": 0.0017, + "step": 251560 + }, + { + "epoch": 1.6134918369768434, + "grad_norm": 0.20069415867328644, + "learning_rate": 1.0951436085797378e-06, + "loss": 0.0006, + "step": 251570 + }, + { + "epoch": 1.6135559738706293, + "grad_norm": 0.006812704261392355, + "learning_rate": 1.0947940608218171e-06, + "loss": 0.0009, + "step": 251580 + }, + { + "epoch": 1.6136201107644155, + "grad_norm": 0.14547978341579437, + "learning_rate": 1.094444561998918e-06, + "loss": 0.0016, + "step": 251590 + }, + { + "epoch": 1.6136842476582016, + "grad_norm": 0.07378130406141281, + "learning_rate": 1.0940951121154187e-06, + "loss": 0.0016, + "step": 251600 + }, + { + "epoch": 1.6137483845519878, + "grad_norm": 0.06077616661787033, + "learning_rate": 1.0937457111757e-06, + "loss": 0.0024, + "step": 251610 + }, + { + "epoch": 1.613812521445774, + "grad_norm": 0.0864984542131424, + "learning_rate": 1.0933963591841395e-06, + "loss": 0.0012, + "step": 251620 + }, + { + "epoch": 1.61387665833956, + "grad_norm": 0.0956496149301529, + "learning_rate": 1.0930470561451145e-06, + "loss": 0.0008, + "step": 251630 + }, + { + "epoch": 1.6139407952333462, + "grad_norm": 0.09259046614170074, + "learning_rate": 1.0926978020630008e-06, + "loss": 0.0009, + "step": 251640 + }, + { + "epoch": 1.6140049321271321, + "grad_norm": 0.03744299337267876, + "learning_rate": 1.0923485969421776e-06, + "loss": 0.0009, + "step": 251650 + }, + { + "epoch": 1.6140690690209183, + "grad_norm": 0.03746733069419861, + "learning_rate": 1.0919994407870194e-06, + "loss": 0.0012, + "step": 251660 + }, + { + "epoch": 1.6141332059147042, + "grad_norm": 0.023150503635406494, + "learning_rate": 1.0916503336019008e-06, + "loss": 0.0046, + "step": 251670 + }, + { + "epoch": 1.6141973428084904, + "grad_norm": 0.05050637200474739, + "learning_rate": 1.0913012753911955e-06, + "loss": 0.0004, + "step": 251680 + }, + { + "epoch": 1.6142614797022765, + "grad_norm": 0.14014700055122375, + "learning_rate": 1.0909522661592804e-06, + "loss": 0.0014, + "step": 251690 + }, + { + "epoch": 1.6143256165960627, + "grad_norm": 0.012953144498169422, + "learning_rate": 1.090603305910527e-06, + "loss": 0.003, + "step": 251700 + }, + { + "epoch": 1.6143897534898488, + "grad_norm": 0.09172007441520691, + "learning_rate": 1.0902543946493083e-06, + "loss": 0.0008, + "step": 251710 + }, + { + "epoch": 1.614453890383635, + "grad_norm": 0.005768218543380499, + "learning_rate": 1.0899055323799945e-06, + "loss": 0.0008, + "step": 251720 + }, + { + "epoch": 1.6145180272774209, + "grad_norm": 0.14142289757728577, + "learning_rate": 1.0895567191069605e-06, + "loss": 0.0008, + "step": 251730 + }, + { + "epoch": 1.614582164171207, + "grad_norm": 0.07299283146858215, + "learning_rate": 1.0892079548345758e-06, + "loss": 0.0011, + "step": 251740 + }, + { + "epoch": 1.614646301064993, + "grad_norm": 0.0013321398291736841, + "learning_rate": 1.0888592395672087e-06, + "loss": 0.0009, + "step": 251750 + }, + { + "epoch": 1.614710437958779, + "grad_norm": 0.07189654558897018, + "learning_rate": 1.0885105733092322e-06, + "loss": 0.0017, + "step": 251760 + }, + { + "epoch": 1.6147745748525653, + "grad_norm": 0.10784239321947098, + "learning_rate": 1.0881619560650137e-06, + "loss": 0.0017, + "step": 251770 + }, + { + "epoch": 1.6148387117463514, + "grad_norm": 0.07716381549835205, + "learning_rate": 1.0878133878389213e-06, + "loss": 0.0017, + "step": 251780 + }, + { + "epoch": 1.6149028486401376, + "grad_norm": 0.041925061494112015, + "learning_rate": 1.0874648686353224e-06, + "loss": 0.0007, + "step": 251790 + }, + { + "epoch": 1.6149669855339237, + "grad_norm": 0.05732448399066925, + "learning_rate": 1.0871163984585859e-06, + "loss": 0.0015, + "step": 251800 + }, + { + "epoch": 1.6150311224277099, + "grad_norm": 0.05569880083203316, + "learning_rate": 1.0867679773130775e-06, + "loss": 0.0009, + "step": 251810 + }, + { + "epoch": 1.6150952593214958, + "grad_norm": 0.06757721304893494, + "learning_rate": 1.0864196052031627e-06, + "loss": 0.001, + "step": 251820 + }, + { + "epoch": 1.615159396215282, + "grad_norm": 0.3683226406574249, + "learning_rate": 1.0860712821332064e-06, + "loss": 0.002, + "step": 251830 + }, + { + "epoch": 1.6152235331090679, + "grad_norm": 0.05867931991815567, + "learning_rate": 1.0857230081075754e-06, + "loss": 0.0007, + "step": 251840 + }, + { + "epoch": 1.615287670002854, + "grad_norm": 0.2559502422809601, + "learning_rate": 1.085374783130632e-06, + "loss": 0.0009, + "step": 251850 + }, + { + "epoch": 1.6153518068966402, + "grad_norm": 0.08006120473146439, + "learning_rate": 1.08502660720674e-06, + "loss": 0.0012, + "step": 251860 + }, + { + "epoch": 1.6154159437904263, + "grad_norm": 0.01886802911758423, + "learning_rate": 1.0846784803402633e-06, + "loss": 0.0008, + "step": 251870 + }, + { + "epoch": 1.6154800806842124, + "grad_norm": 0.05986326187849045, + "learning_rate": 1.0843304025355638e-06, + "loss": 0.001, + "step": 251880 + }, + { + "epoch": 1.6155442175779986, + "grad_norm": 0.012768320739269257, + "learning_rate": 1.083982373797003e-06, + "loss": 0.0011, + "step": 251890 + }, + { + "epoch": 1.6156083544717845, + "grad_norm": 0.13626137375831604, + "learning_rate": 1.0836343941289395e-06, + "loss": 0.0006, + "step": 251900 + }, + { + "epoch": 1.6156724913655707, + "grad_norm": 0.06842995434999466, + "learning_rate": 1.0832864635357382e-06, + "loss": 0.0011, + "step": 251910 + }, + { + "epoch": 1.6157366282593568, + "grad_norm": 0.11791258305311203, + "learning_rate": 1.082938582021757e-06, + "loss": 0.0012, + "step": 251920 + }, + { + "epoch": 1.6158007651531427, + "grad_norm": 0.11330414563417435, + "learning_rate": 1.082590749591354e-06, + "loss": 0.0019, + "step": 251930 + }, + { + "epoch": 1.615864902046929, + "grad_norm": 0.013453952036798, + "learning_rate": 1.0822429662488875e-06, + "loss": 0.0008, + "step": 251940 + }, + { + "epoch": 1.615929038940715, + "grad_norm": 0.06207551807165146, + "learning_rate": 1.0818952319987187e-06, + "loss": 0.0008, + "step": 251950 + }, + { + "epoch": 1.6159931758345012, + "grad_norm": 0.05609503015875816, + "learning_rate": 1.081547546845202e-06, + "loss": 0.0009, + "step": 251960 + }, + { + "epoch": 1.6160573127282873, + "grad_norm": 0.2726113498210907, + "learning_rate": 1.0811999107926958e-06, + "loss": 0.0015, + "step": 251970 + }, + { + "epoch": 1.6161214496220735, + "grad_norm": 0.04356217011809349, + "learning_rate": 1.0808523238455532e-06, + "loss": 0.0008, + "step": 251980 + }, + { + "epoch": 1.6161855865158594, + "grad_norm": 0.04935280233621597, + "learning_rate": 1.0805047860081335e-06, + "loss": 0.0014, + "step": 251990 + }, + { + "epoch": 1.6162497234096456, + "grad_norm": 0.15753525495529175, + "learning_rate": 1.0801572972847907e-06, + "loss": 0.0025, + "step": 252000 + }, + { + "epoch": 1.6163138603034315, + "grad_norm": 0.0707324743270874, + "learning_rate": 1.0798098576798766e-06, + "loss": 0.0007, + "step": 252010 + }, + { + "epoch": 1.6163779971972176, + "grad_norm": 0.05470231920480728, + "learning_rate": 1.0794624671977465e-06, + "loss": 0.0015, + "step": 252020 + }, + { + "epoch": 1.6164421340910038, + "grad_norm": 0.13599084317684174, + "learning_rate": 1.0791151258427557e-06, + "loss": 0.0015, + "step": 252030 + }, + { + "epoch": 1.61650627098479, + "grad_norm": 0.021795066073536873, + "learning_rate": 1.0787678336192542e-06, + "loss": 0.0015, + "step": 252040 + }, + { + "epoch": 1.616570407878576, + "grad_norm": 0.07633227854967117, + "learning_rate": 1.0784205905315941e-06, + "loss": 0.0012, + "step": 252050 + }, + { + "epoch": 1.6166345447723622, + "grad_norm": 0.008000954985618591, + "learning_rate": 1.078073396584125e-06, + "loss": 0.0011, + "step": 252060 + }, + { + "epoch": 1.6166986816661484, + "grad_norm": 0.01430139597505331, + "learning_rate": 1.0777262517812014e-06, + "loss": 0.0009, + "step": 252070 + }, + { + "epoch": 1.6167628185599343, + "grad_norm": 0.05942973494529724, + "learning_rate": 1.0773791561271706e-06, + "loss": 0.0012, + "step": 252080 + }, + { + "epoch": 1.6168269554537205, + "grad_norm": 0.004552983678877354, + "learning_rate": 1.0770321096263825e-06, + "loss": 0.0011, + "step": 252090 + }, + { + "epoch": 1.6168910923475064, + "grad_norm": 0.12817177176475525, + "learning_rate": 1.0766851122831845e-06, + "loss": 0.001, + "step": 252100 + }, + { + "epoch": 1.6169552292412925, + "grad_norm": 0.1432831883430481, + "learning_rate": 1.0763381641019272e-06, + "loss": 0.0011, + "step": 252110 + }, + { + "epoch": 1.6170193661350787, + "grad_norm": 0.050865817815065384, + "learning_rate": 1.075991265086957e-06, + "loss": 0.0009, + "step": 252120 + }, + { + "epoch": 1.6170835030288648, + "grad_norm": 0.10968196392059326, + "learning_rate": 1.0756444152426192e-06, + "loss": 0.0012, + "step": 252130 + }, + { + "epoch": 1.617147639922651, + "grad_norm": 0.08784846216440201, + "learning_rate": 1.075297614573263e-06, + "loss": 0.0009, + "step": 252140 + }, + { + "epoch": 1.6172117768164371, + "grad_norm": 0.0864066407084465, + "learning_rate": 1.0749508630832329e-06, + "loss": 0.0008, + "step": 252150 + }, + { + "epoch": 1.617275913710223, + "grad_norm": 0.15843220055103302, + "learning_rate": 1.074604160776873e-06, + "loss": 0.0015, + "step": 252160 + }, + { + "epoch": 1.6173400506040092, + "grad_norm": 0.12567143142223358, + "learning_rate": 1.0742575076585276e-06, + "loss": 0.0016, + "step": 252170 + }, + { + "epoch": 1.6174041874977951, + "grad_norm": 0.048958681523799896, + "learning_rate": 1.0739109037325423e-06, + "loss": 0.0006, + "step": 252180 + }, + { + "epoch": 1.6174683243915813, + "grad_norm": 0.08918514847755432, + "learning_rate": 1.07356434900326e-06, + "loss": 0.001, + "step": 252190 + }, + { + "epoch": 1.6175324612853674, + "grad_norm": 0.004656277596950531, + "learning_rate": 1.0732178434750218e-06, + "loss": 0.0012, + "step": 252200 + }, + { + "epoch": 1.6175965981791536, + "grad_norm": 0.13030418753623962, + "learning_rate": 1.0728713871521695e-06, + "loss": 0.0016, + "step": 252210 + }, + { + "epoch": 1.6176607350729397, + "grad_norm": 0.03686818480491638, + "learning_rate": 1.0725249800390468e-06, + "loss": 0.0011, + "step": 252220 + }, + { + "epoch": 1.6177248719667259, + "grad_norm": 0.0056649139150977135, + "learning_rate": 1.0721786221399928e-06, + "loss": 0.0012, + "step": 252230 + }, + { + "epoch": 1.617789008860512, + "grad_norm": 0.06054788827896118, + "learning_rate": 1.0718323134593477e-06, + "loss": 0.0031, + "step": 252240 + }, + { + "epoch": 1.617853145754298, + "grad_norm": 0.0929122045636177, + "learning_rate": 1.0714860540014504e-06, + "loss": 0.001, + "step": 252250 + }, + { + "epoch": 1.617917282648084, + "grad_norm": 0.09619086980819702, + "learning_rate": 1.0711398437706416e-06, + "loss": 0.0007, + "step": 252260 + }, + { + "epoch": 1.61798141954187, + "grad_norm": 0.06638479977846146, + "learning_rate": 1.0707936827712584e-06, + "loss": 0.0013, + "step": 252270 + }, + { + "epoch": 1.6180455564356562, + "grad_norm": 0.172328919172287, + "learning_rate": 1.0704475710076367e-06, + "loss": 0.0012, + "step": 252280 + }, + { + "epoch": 1.6181096933294423, + "grad_norm": 0.010810944251716137, + "learning_rate": 1.0701015084841171e-06, + "loss": 0.001, + "step": 252290 + }, + { + "epoch": 1.6181738302232285, + "grad_norm": 0.04878225550055504, + "learning_rate": 1.0697554952050342e-06, + "loss": 0.0017, + "step": 252300 + }, + { + "epoch": 1.6182379671170146, + "grad_norm": 0.06370214372873306, + "learning_rate": 1.0694095311747243e-06, + "loss": 0.0008, + "step": 252310 + }, + { + "epoch": 1.6183021040108008, + "grad_norm": 0.04039781913161278, + "learning_rate": 1.0690636163975204e-06, + "loss": 0.0008, + "step": 252320 + }, + { + "epoch": 1.6183662409045867, + "grad_norm": 0.07960638403892517, + "learning_rate": 1.0687177508777602e-06, + "loss": 0.0015, + "step": 252330 + }, + { + "epoch": 1.6184303777983728, + "grad_norm": 0.43764522671699524, + "learning_rate": 1.0683719346197758e-06, + "loss": 0.0012, + "step": 252340 + }, + { + "epoch": 1.618494514692159, + "grad_norm": 0.03488235920667648, + "learning_rate": 1.0680261676279014e-06, + "loss": 0.0007, + "step": 252350 + }, + { + "epoch": 1.618558651585945, + "grad_norm": 0.035237208008766174, + "learning_rate": 1.067680449906468e-06, + "loss": 0.0007, + "step": 252360 + }, + { + "epoch": 1.618622788479731, + "grad_norm": 0.07528963685035706, + "learning_rate": 1.0673347814598101e-06, + "loss": 0.0012, + "step": 252370 + }, + { + "epoch": 1.6186869253735172, + "grad_norm": 0.20800499618053436, + "learning_rate": 1.066989162292258e-06, + "loss": 0.0029, + "step": 252380 + }, + { + "epoch": 1.6187510622673034, + "grad_norm": 0.13910309970378876, + "learning_rate": 1.0666435924081424e-06, + "loss": 0.0012, + "step": 252390 + }, + { + "epoch": 1.6188151991610895, + "grad_norm": 0.005096248351037502, + "learning_rate": 1.0662980718117927e-06, + "loss": 0.0012, + "step": 252400 + }, + { + "epoch": 1.6188793360548757, + "grad_norm": 0.13247188925743103, + "learning_rate": 1.065952600507541e-06, + "loss": 0.0014, + "step": 252410 + }, + { + "epoch": 1.6189434729486616, + "grad_norm": 0.05428318679332733, + "learning_rate": 1.0656071784997147e-06, + "loss": 0.0008, + "step": 252420 + }, + { + "epoch": 1.6190076098424477, + "grad_norm": 0.09457910805940628, + "learning_rate": 1.0652618057926405e-06, + "loss": 0.0008, + "step": 252430 + }, + { + "epoch": 1.6190717467362337, + "grad_norm": 0.07830148190259933, + "learning_rate": 1.06491648239065e-06, + "loss": 0.0015, + "step": 252440 + }, + { + "epoch": 1.6191358836300198, + "grad_norm": 0.05774727463722229, + "learning_rate": 1.064571208298068e-06, + "loss": 0.0007, + "step": 252450 + }, + { + "epoch": 1.619200020523806, + "grad_norm": 0.08623258024454117, + "learning_rate": 1.0642259835192215e-06, + "loss": 0.0012, + "step": 252460 + }, + { + "epoch": 1.619264157417592, + "grad_norm": 0.051753610372543335, + "learning_rate": 1.0638808080584346e-06, + "loss": 0.0024, + "step": 252470 + }, + { + "epoch": 1.6193282943113783, + "grad_norm": 0.028300661593675613, + "learning_rate": 1.063535681920036e-06, + "loss": 0.001, + "step": 252480 + }, + { + "epoch": 1.6193924312051644, + "grad_norm": 0.09492386877536774, + "learning_rate": 1.0631906051083484e-06, + "loss": 0.0018, + "step": 252490 + }, + { + "epoch": 1.6194565680989506, + "grad_norm": 0.0015070786466822028, + "learning_rate": 1.0628455776276964e-06, + "loss": 0.0003, + "step": 252500 + }, + { + "epoch": 1.6195207049927365, + "grad_norm": 0.03143575042486191, + "learning_rate": 1.0625005994824017e-06, + "loss": 0.0017, + "step": 252510 + }, + { + "epoch": 1.6195848418865226, + "grad_norm": 0.07570742070674896, + "learning_rate": 1.06215567067679e-06, + "loss": 0.0008, + "step": 252520 + }, + { + "epoch": 1.6196489787803086, + "grad_norm": 0.036942899227142334, + "learning_rate": 1.0618107912151815e-06, + "loss": 0.0014, + "step": 252530 + }, + { + "epoch": 1.6197131156740947, + "grad_norm": 0.06867800652980804, + "learning_rate": 1.0614659611018991e-06, + "loss": 0.0008, + "step": 252540 + }, + { + "epoch": 1.6197772525678809, + "grad_norm": 0.14280128479003906, + "learning_rate": 1.0611211803412614e-06, + "loss": 0.0009, + "step": 252550 + }, + { + "epoch": 1.619841389461667, + "grad_norm": 0.03174474462866783, + "learning_rate": 1.0607764489375915e-06, + "loss": 0.0014, + "step": 252560 + }, + { + "epoch": 1.6199055263554532, + "grad_norm": 0.24680443108081818, + "learning_rate": 1.0604317668952084e-06, + "loss": 0.0011, + "step": 252570 + }, + { + "epoch": 1.6199696632492393, + "grad_norm": 0.05565698444843292, + "learning_rate": 1.0600871342184294e-06, + "loss": 0.0009, + "step": 252580 + }, + { + "epoch": 1.6200338001430252, + "grad_norm": 0.2579374313354492, + "learning_rate": 1.0597425509115756e-06, + "loss": 0.0009, + "step": 252590 + }, + { + "epoch": 1.6200979370368114, + "grad_norm": 0.007848929613828659, + "learning_rate": 1.0593980169789636e-06, + "loss": 0.0016, + "step": 252600 + }, + { + "epoch": 1.6201620739305973, + "grad_norm": 0.004670563619583845, + "learning_rate": 1.0590535324249113e-06, + "loss": 0.0008, + "step": 252610 + }, + { + "epoch": 1.6202262108243835, + "grad_norm": 0.03399378061294556, + "learning_rate": 1.0587090972537327e-06, + "loss": 0.0017, + "step": 252620 + }, + { + "epoch": 1.6202903477181696, + "grad_norm": 0.10650131106376648, + "learning_rate": 1.0583647114697483e-06, + "loss": 0.0008, + "step": 252630 + }, + { + "epoch": 1.6203544846119557, + "grad_norm": 0.1858557015657425, + "learning_rate": 1.05802037507727e-06, + "loss": 0.0016, + "step": 252640 + }, + { + "epoch": 1.620418621505742, + "grad_norm": 0.08053423464298248, + "learning_rate": 1.0576760880806142e-06, + "loss": 0.0015, + "step": 252650 + }, + { + "epoch": 1.620482758399528, + "grad_norm": 0.09590917080640793, + "learning_rate": 1.0573318504840935e-06, + "loss": 0.0017, + "step": 252660 + }, + { + "epoch": 1.6205468952933142, + "grad_norm": 0.03918692097067833, + "learning_rate": 1.0569876622920232e-06, + "loss": 0.0003, + "step": 252670 + }, + { + "epoch": 1.6206110321871001, + "grad_norm": 0.14594435691833496, + "learning_rate": 1.0566435235087157e-06, + "loss": 0.001, + "step": 252680 + }, + { + "epoch": 1.6206751690808863, + "grad_norm": 0.10581952333450317, + "learning_rate": 1.0562994341384835e-06, + "loss": 0.0016, + "step": 252690 + }, + { + "epoch": 1.6207393059746722, + "grad_norm": 0.06139199808239937, + "learning_rate": 1.0559553941856365e-06, + "loss": 0.0012, + "step": 252700 + }, + { + "epoch": 1.6208034428684583, + "grad_norm": 0.09698482602834702, + "learning_rate": 1.0556114036544878e-06, + "loss": 0.0014, + "step": 252710 + }, + { + "epoch": 1.6208675797622445, + "grad_norm": 0.09465713798999786, + "learning_rate": 1.055267462549348e-06, + "loss": 0.0009, + "step": 252720 + }, + { + "epoch": 1.6209317166560306, + "grad_norm": 0.11443684250116348, + "learning_rate": 1.0549235708745249e-06, + "loss": 0.0043, + "step": 252730 + }, + { + "epoch": 1.6209958535498168, + "grad_norm": 0.1651214212179184, + "learning_rate": 1.0545797286343296e-06, + "loss": 0.001, + "step": 252740 + }, + { + "epoch": 1.621059990443603, + "grad_norm": 0.06214063987135887, + "learning_rate": 1.0542359358330707e-06, + "loss": 0.0019, + "step": 252750 + }, + { + "epoch": 1.621124127337389, + "grad_norm": 0.034032728523015976, + "learning_rate": 1.053892192475055e-06, + "loss": 0.0013, + "step": 252760 + }, + { + "epoch": 1.621188264231175, + "grad_norm": 0.011761156842112541, + "learning_rate": 1.0535484985645895e-06, + "loss": 0.0007, + "step": 252770 + }, + { + "epoch": 1.6212524011249612, + "grad_norm": 0.1583254039287567, + "learning_rate": 1.0532048541059814e-06, + "loss": 0.0007, + "step": 252780 + }, + { + "epoch": 1.621316538018747, + "grad_norm": 0.15233290195465088, + "learning_rate": 1.0528612591035386e-06, + "loss": 0.0008, + "step": 252790 + }, + { + "epoch": 1.6213806749125332, + "grad_norm": 0.004722410812973976, + "learning_rate": 1.0525177135615656e-06, + "loss": 0.0011, + "step": 252800 + }, + { + "epoch": 1.6214448118063194, + "grad_norm": 0.10750328749418259, + "learning_rate": 1.0521742174843663e-06, + "loss": 0.0017, + "step": 252810 + }, + { + "epoch": 1.6215089487001055, + "grad_norm": 0.020725587382912636, + "learning_rate": 1.0518307708762448e-06, + "loss": 0.0007, + "step": 252820 + }, + { + "epoch": 1.6215730855938917, + "grad_norm": 0.18014080822467804, + "learning_rate": 1.0514873737415065e-06, + "loss": 0.001, + "step": 252830 + }, + { + "epoch": 1.6216372224876778, + "grad_norm": 0.010807321406900883, + "learning_rate": 1.051144026084453e-06, + "loss": 0.0011, + "step": 252840 + }, + { + "epoch": 1.6217013593814638, + "grad_norm": 0.059095799922943115, + "learning_rate": 1.0508007279093862e-06, + "loss": 0.0012, + "step": 252850 + }, + { + "epoch": 1.62176549627525, + "grad_norm": 0.10498159378767014, + "learning_rate": 1.0504574792206101e-06, + "loss": 0.0014, + "step": 252860 + }, + { + "epoch": 1.6218296331690358, + "grad_norm": 0.13310320675373077, + "learning_rate": 1.050114280022424e-06, + "loss": 0.0014, + "step": 252870 + }, + { + "epoch": 1.621893770062822, + "grad_norm": 0.04748896136879921, + "learning_rate": 1.0497711303191294e-06, + "loss": 0.0006, + "step": 252880 + }, + { + "epoch": 1.6219579069566081, + "grad_norm": 0.01747206225991249, + "learning_rate": 1.049428030115024e-06, + "loss": 0.0006, + "step": 252890 + }, + { + "epoch": 1.6220220438503943, + "grad_norm": 0.017674749717116356, + "learning_rate": 1.0490849794144103e-06, + "loss": 0.0014, + "step": 252900 + }, + { + "epoch": 1.6220861807441804, + "grad_norm": 0.008640342392027378, + "learning_rate": 1.0487419782215858e-06, + "loss": 0.0005, + "step": 252910 + }, + { + "epoch": 1.6221503176379666, + "grad_norm": 0.04351125285029411, + "learning_rate": 1.0483990265408477e-06, + "loss": 0.0008, + "step": 252920 + }, + { + "epoch": 1.6222144545317527, + "grad_norm": 0.004246466793119907, + "learning_rate": 1.048056124376493e-06, + "loss": 0.0007, + "step": 252930 + }, + { + "epoch": 1.6222785914255387, + "grad_norm": 0.013899121433496475, + "learning_rate": 1.0477132717328208e-06, + "loss": 0.0004, + "step": 252940 + }, + { + "epoch": 1.6223427283193248, + "grad_norm": 0.1409715712070465, + "learning_rate": 1.0473704686141261e-06, + "loss": 0.0024, + "step": 252950 + }, + { + "epoch": 1.6224068652131107, + "grad_norm": 0.024969857186079025, + "learning_rate": 1.0470277150247039e-06, + "loss": 0.0016, + "step": 252960 + }, + { + "epoch": 1.6224710021068969, + "grad_norm": 0.16803643107414246, + "learning_rate": 1.0466850109688487e-06, + "loss": 0.0032, + "step": 252970 + }, + { + "epoch": 1.622535139000683, + "grad_norm": 0.11032666265964508, + "learning_rate": 1.0463423564508567e-06, + "loss": 0.0014, + "step": 252980 + }, + { + "epoch": 1.6225992758944692, + "grad_norm": 0.07094945013523102, + "learning_rate": 1.0459997514750204e-06, + "loss": 0.0013, + "step": 252990 + }, + { + "epoch": 1.6226634127882553, + "grad_norm": 0.014905665069818497, + "learning_rate": 1.0456571960456324e-06, + "loss": 0.0011, + "step": 253000 + }, + { + "epoch": 1.6227275496820415, + "grad_norm": 0.08949162811040878, + "learning_rate": 1.0453146901669863e-06, + "loss": 0.0015, + "step": 253010 + }, + { + "epoch": 1.6227916865758274, + "grad_norm": 0.06372623890638351, + "learning_rate": 1.0449722338433743e-06, + "loss": 0.0014, + "step": 253020 + }, + { + "epoch": 1.6228558234696135, + "grad_norm": 0.003106101183220744, + "learning_rate": 1.0446298270790866e-06, + "loss": 0.0017, + "step": 253030 + }, + { + "epoch": 1.6229199603633997, + "grad_norm": 0.0415562242269516, + "learning_rate": 1.0442874698784128e-06, + "loss": 0.0011, + "step": 253040 + }, + { + "epoch": 1.6229840972571856, + "grad_norm": 0.12806938588619232, + "learning_rate": 1.0439451622456453e-06, + "loss": 0.0009, + "step": 253050 + }, + { + "epoch": 1.6230482341509718, + "grad_norm": 0.0016295817913487554, + "learning_rate": 1.043602904185072e-06, + "loss": 0.0011, + "step": 253060 + }, + { + "epoch": 1.623112371044758, + "grad_norm": 0.04967926815152168, + "learning_rate": 1.0432606957009823e-06, + "loss": 0.0009, + "step": 253070 + }, + { + "epoch": 1.623176507938544, + "grad_norm": 0.08386543393135071, + "learning_rate": 1.0429185367976625e-06, + "loss": 0.0006, + "step": 253080 + }, + { + "epoch": 1.6232406448323302, + "grad_norm": 0.05398569256067276, + "learning_rate": 1.0425764274794032e-06, + "loss": 0.001, + "step": 253090 + }, + { + "epoch": 1.6233047817261164, + "grad_norm": 0.02327839843928814, + "learning_rate": 1.0422343677504888e-06, + "loss": 0.0016, + "step": 253100 + }, + { + "epoch": 1.6233689186199023, + "grad_norm": 0.053075287491083145, + "learning_rate": 1.0418923576152069e-06, + "loss": 0.0011, + "step": 253110 + }, + { + "epoch": 1.6234330555136884, + "grad_norm": 0.10003472864627838, + "learning_rate": 1.0415503970778411e-06, + "loss": 0.0008, + "step": 253120 + }, + { + "epoch": 1.6234971924074744, + "grad_norm": 0.23413614928722382, + "learning_rate": 1.041208486142679e-06, + "loss": 0.0011, + "step": 253130 + }, + { + "epoch": 1.6235613293012605, + "grad_norm": 0.17361804842948914, + "learning_rate": 1.0408666248140043e-06, + "loss": 0.0013, + "step": 253140 + }, + { + "epoch": 1.6236254661950467, + "grad_norm": 0.0991687998175621, + "learning_rate": 1.0405248130960988e-06, + "loss": 0.0018, + "step": 253150 + }, + { + "epoch": 1.6236896030888328, + "grad_norm": 0.21054257452487946, + "learning_rate": 1.0401830509932488e-06, + "loss": 0.0024, + "step": 253160 + }, + { + "epoch": 1.623753739982619, + "grad_norm": 0.13935822248458862, + "learning_rate": 1.0398413385097345e-06, + "loss": 0.001, + "step": 253170 + }, + { + "epoch": 1.623817876876405, + "grad_norm": 0.09200213849544525, + "learning_rate": 1.0394996756498394e-06, + "loss": 0.0006, + "step": 253180 + }, + { + "epoch": 1.6238820137701913, + "grad_norm": 0.0637202262878418, + "learning_rate": 1.0391580624178416e-06, + "loss": 0.0005, + "step": 253190 + }, + { + "epoch": 1.6239461506639772, + "grad_norm": 0.020669076591730118, + "learning_rate": 1.0388164988180261e-06, + "loss": 0.0013, + "step": 253200 + }, + { + "epoch": 1.6240102875577633, + "grad_norm": 0.08464609831571579, + "learning_rate": 1.0384749848546704e-06, + "loss": 0.0007, + "step": 253210 + }, + { + "epoch": 1.6240744244515493, + "grad_norm": 0.031027456745505333, + "learning_rate": 1.0381335205320547e-06, + "loss": 0.0009, + "step": 253220 + }, + { + "epoch": 1.6241385613453354, + "grad_norm": 0.05245270952582359, + "learning_rate": 1.0377921058544567e-06, + "loss": 0.0009, + "step": 253230 + }, + { + "epoch": 1.6242026982391216, + "grad_norm": 0.04250001534819603, + "learning_rate": 1.0374507408261558e-06, + "loss": 0.0014, + "step": 253240 + }, + { + "epoch": 1.6242668351329077, + "grad_norm": 0.028570299968123436, + "learning_rate": 1.0371094254514292e-06, + "loss": 0.0006, + "step": 253250 + }, + { + "epoch": 1.6243309720266939, + "grad_norm": 0.18932245671749115, + "learning_rate": 1.0367681597345541e-06, + "loss": 0.0014, + "step": 253260 + }, + { + "epoch": 1.62439510892048, + "grad_norm": 0.003532364498823881, + "learning_rate": 1.0364269436798053e-06, + "loss": 0.0006, + "step": 253270 + }, + { + "epoch": 1.624459245814266, + "grad_norm": 0.0064608012326061726, + "learning_rate": 1.0360857772914601e-06, + "loss": 0.0008, + "step": 253280 + }, + { + "epoch": 1.624523382708052, + "grad_norm": 0.08528894931077957, + "learning_rate": 1.0357446605737936e-06, + "loss": 0.0009, + "step": 253290 + }, + { + "epoch": 1.624587519601838, + "grad_norm": 0.14303399622440338, + "learning_rate": 1.0354035935310785e-06, + "loss": 0.0012, + "step": 253300 + }, + { + "epoch": 1.6246516564956242, + "grad_norm": 0.08092918246984482, + "learning_rate": 1.0350625761675908e-06, + "loss": 0.0006, + "step": 253310 + }, + { + "epoch": 1.6247157933894103, + "grad_norm": 0.09207907319068909, + "learning_rate": 1.0347216084876033e-06, + "loss": 0.0005, + "step": 253320 + }, + { + "epoch": 1.6247799302831964, + "grad_norm": 0.06599237024784088, + "learning_rate": 1.0343806904953873e-06, + "loss": 0.0012, + "step": 253330 + }, + { + "epoch": 1.6248440671769826, + "grad_norm": 0.06715761870145798, + "learning_rate": 1.0340398221952146e-06, + "loss": 0.0007, + "step": 253340 + }, + { + "epoch": 1.6249082040707687, + "grad_norm": 0.174531027674675, + "learning_rate": 1.0336990035913586e-06, + "loss": 0.0008, + "step": 253350 + }, + { + "epoch": 1.624972340964555, + "grad_norm": 0.06355522572994232, + "learning_rate": 1.0333582346880887e-06, + "loss": 0.0006, + "step": 253360 + }, + { + "epoch": 1.6250364778583408, + "grad_norm": 0.22836199402809143, + "learning_rate": 1.0330175154896748e-06, + "loss": 0.0015, + "step": 253370 + }, + { + "epoch": 1.625100614752127, + "grad_norm": 0.16075459122657776, + "learning_rate": 1.0326768460003856e-06, + "loss": 0.0006, + "step": 253380 + }, + { + "epoch": 1.625164751645913, + "grad_norm": 0.011506589129567146, + "learning_rate": 1.0323362262244923e-06, + "loss": 0.0005, + "step": 253390 + }, + { + "epoch": 1.625228888539699, + "grad_norm": 0.09543559700250626, + "learning_rate": 1.0319956561662615e-06, + "loss": 0.0006, + "step": 253400 + }, + { + "epoch": 1.6252930254334852, + "grad_norm": 0.09004479646682739, + "learning_rate": 1.0316551358299614e-06, + "loss": 0.001, + "step": 253410 + }, + { + "epoch": 1.6253571623272713, + "grad_norm": 0.22792024910449982, + "learning_rate": 1.0313146652198568e-06, + "loss": 0.0018, + "step": 253420 + }, + { + "epoch": 1.6254212992210575, + "grad_norm": 0.0872546061873436, + "learning_rate": 1.030974244340217e-06, + "loss": 0.0013, + "step": 253430 + }, + { + "epoch": 1.6254854361148436, + "grad_norm": 0.04959293082356453, + "learning_rate": 1.030633873195307e-06, + "loss": 0.0021, + "step": 253440 + }, + { + "epoch": 1.6255495730086296, + "grad_norm": 0.15947020053863525, + "learning_rate": 1.0302935517893897e-06, + "loss": 0.0014, + "step": 253450 + }, + { + "epoch": 1.6256137099024157, + "grad_norm": 0.08254580199718475, + "learning_rate": 1.029953280126733e-06, + "loss": 0.0005, + "step": 253460 + }, + { + "epoch": 1.6256778467962019, + "grad_norm": 0.11837852001190186, + "learning_rate": 1.0296130582115992e-06, + "loss": 0.0012, + "step": 253470 + }, + { + "epoch": 1.6257419836899878, + "grad_norm": 0.025882352143526077, + "learning_rate": 1.029272886048251e-06, + "loss": 0.001, + "step": 253480 + }, + { + "epoch": 1.625806120583774, + "grad_norm": 0.036259740591049194, + "learning_rate": 1.0289327636409502e-06, + "loss": 0.0007, + "step": 253490 + }, + { + "epoch": 1.62587025747756, + "grad_norm": 0.20507407188415527, + "learning_rate": 1.028592690993961e-06, + "loss": 0.002, + "step": 253500 + }, + { + "epoch": 1.6259343943713462, + "grad_norm": 0.19284480810165405, + "learning_rate": 1.028252668111544e-06, + "loss": 0.0016, + "step": 253510 + }, + { + "epoch": 1.6259985312651324, + "grad_norm": 0.05104293301701546, + "learning_rate": 1.0279126949979596e-06, + "loss": 0.0009, + "step": 253520 + }, + { + "epoch": 1.6260626681589185, + "grad_norm": 0.06307905167341232, + "learning_rate": 1.0275727716574663e-06, + "loss": 0.0006, + "step": 253530 + }, + { + "epoch": 1.6261268050527045, + "grad_norm": 0.026986945420503616, + "learning_rate": 1.0272328980943269e-06, + "loss": 0.0012, + "step": 253540 + }, + { + "epoch": 1.6261909419464906, + "grad_norm": 0.19290678203105927, + "learning_rate": 1.0268930743127976e-06, + "loss": 0.0009, + "step": 253550 + }, + { + "epoch": 1.6262550788402765, + "grad_norm": 0.003200072795152664, + "learning_rate": 1.0265533003171379e-06, + "loss": 0.005, + "step": 253560 + }, + { + "epoch": 1.6263192157340627, + "grad_norm": 0.049872659146785736, + "learning_rate": 1.026213576111605e-06, + "loss": 0.001, + "step": 253570 + }, + { + "epoch": 1.6263833526278488, + "grad_norm": 0.12477219849824905, + "learning_rate": 1.0258739017004565e-06, + "loss": 0.001, + "step": 253580 + }, + { + "epoch": 1.626447489521635, + "grad_norm": 0.06833262741565704, + "learning_rate": 1.0255342770879484e-06, + "loss": 0.0007, + "step": 253590 + }, + { + "epoch": 1.6265116264154211, + "grad_norm": 0.20073255896568298, + "learning_rate": 1.0251947022783365e-06, + "loss": 0.0016, + "step": 253600 + }, + { + "epoch": 1.6265757633092073, + "grad_norm": 0.05578920617699623, + "learning_rate": 1.024855177275874e-06, + "loss": 0.0012, + "step": 253610 + }, + { + "epoch": 1.6266399002029934, + "grad_norm": 0.2767143249511719, + "learning_rate": 1.024515702084819e-06, + "loss": 0.0015, + "step": 253620 + }, + { + "epoch": 1.6267040370967794, + "grad_norm": 0.20888754725456238, + "learning_rate": 1.0241762767094231e-06, + "loss": 0.0018, + "step": 253630 + }, + { + "epoch": 1.6267681739905655, + "grad_norm": 0.11888570338487625, + "learning_rate": 1.0238369011539406e-06, + "loss": 0.0009, + "step": 253640 + }, + { + "epoch": 1.6268323108843514, + "grad_norm": 0.054734617471694946, + "learning_rate": 1.0234975754226212e-06, + "loss": 0.0011, + "step": 253650 + }, + { + "epoch": 1.6268964477781376, + "grad_norm": 0.08102140575647354, + "learning_rate": 1.0231582995197208e-06, + "loss": 0.0006, + "step": 253660 + }, + { + "epoch": 1.6269605846719237, + "grad_norm": 0.02955351211130619, + "learning_rate": 1.0228190734494897e-06, + "loss": 0.0012, + "step": 253670 + }, + { + "epoch": 1.6270247215657099, + "grad_norm": 0.014702515676617622, + "learning_rate": 1.0224798972161776e-06, + "loss": 0.0006, + "step": 253680 + }, + { + "epoch": 1.627088858459496, + "grad_norm": 0.03445766866207123, + "learning_rate": 1.0221407708240333e-06, + "loss": 0.0004, + "step": 253690 + }, + { + "epoch": 1.6271529953532822, + "grad_norm": 0.10714350640773773, + "learning_rate": 1.0218016942773102e-06, + "loss": 0.0022, + "step": 253700 + }, + { + "epoch": 1.627217132247068, + "grad_norm": 0.21742475032806396, + "learning_rate": 1.0214626675802547e-06, + "loss": 0.0008, + "step": 253710 + }, + { + "epoch": 1.6272812691408542, + "grad_norm": 0.05936504900455475, + "learning_rate": 1.0211236907371141e-06, + "loss": 0.0011, + "step": 253720 + }, + { + "epoch": 1.6273454060346402, + "grad_norm": 0.4004567265510559, + "learning_rate": 1.0207847637521385e-06, + "loss": 0.0041, + "step": 253730 + }, + { + "epoch": 1.6274095429284263, + "grad_norm": 0.1915207952260971, + "learning_rate": 1.0204458866295736e-06, + "loss": 0.0027, + "step": 253740 + }, + { + "epoch": 1.6274736798222125, + "grad_norm": 0.10918500274419785, + "learning_rate": 1.0201070593736661e-06, + "loss": 0.0011, + "step": 253750 + }, + { + "epoch": 1.6275378167159986, + "grad_norm": 0.09684329479932785, + "learning_rate": 1.01976828198866e-06, + "loss": 0.0007, + "step": 253760 + }, + { + "epoch": 1.6276019536097848, + "grad_norm": 0.10650018602609634, + "learning_rate": 1.0194295544788036e-06, + "loss": 0.0006, + "step": 253770 + }, + { + "epoch": 1.627666090503571, + "grad_norm": 0.028103556483983994, + "learning_rate": 1.0190908768483398e-06, + "loss": 0.001, + "step": 253780 + }, + { + "epoch": 1.627730227397357, + "grad_norm": 0.07362200319766998, + "learning_rate": 1.0187522491015122e-06, + "loss": 0.0007, + "step": 253790 + }, + { + "epoch": 1.627794364291143, + "grad_norm": 0.11591235548257828, + "learning_rate": 1.018413671242563e-06, + "loss": 0.0011, + "step": 253800 + }, + { + "epoch": 1.6278585011849291, + "grad_norm": 0.14773188531398773, + "learning_rate": 1.0180751432757374e-06, + "loss": 0.0013, + "step": 253810 + }, + { + "epoch": 1.627922638078715, + "grad_norm": 0.12079880386590958, + "learning_rate": 1.0177366652052762e-06, + "loss": 0.0017, + "step": 253820 + }, + { + "epoch": 1.6279867749725012, + "grad_norm": 0.048897046595811844, + "learning_rate": 1.0173982370354192e-06, + "loss": 0.0013, + "step": 253830 + }, + { + "epoch": 1.6280509118662874, + "grad_norm": 0.0130966417491436, + "learning_rate": 1.01705985877041e-06, + "loss": 0.0018, + "step": 253840 + }, + { + "epoch": 1.6281150487600735, + "grad_norm": 0.06076972186565399, + "learning_rate": 1.0167215304144872e-06, + "loss": 0.0011, + "step": 253850 + }, + { + "epoch": 1.6281791856538597, + "grad_norm": 0.0678781121969223, + "learning_rate": 1.0163832519718907e-06, + "loss": 0.0009, + "step": 253860 + }, + { + "epoch": 1.6282433225476458, + "grad_norm": 0.09457668662071228, + "learning_rate": 1.0160450234468578e-06, + "loss": 0.0007, + "step": 253870 + }, + { + "epoch": 1.6283074594414317, + "grad_norm": 0.13398870825767517, + "learning_rate": 1.015706844843629e-06, + "loss": 0.0017, + "step": 253880 + }, + { + "epoch": 1.6283715963352179, + "grad_norm": 0.13375312089920044, + "learning_rate": 1.015368716166441e-06, + "loss": 0.0019, + "step": 253890 + }, + { + "epoch": 1.628435733229004, + "grad_norm": 0.10975903272628784, + "learning_rate": 1.0150306374195313e-06, + "loss": 0.0023, + "step": 253900 + }, + { + "epoch": 1.62849987012279, + "grad_norm": 0.13212823867797852, + "learning_rate": 1.0146926086071334e-06, + "loss": 0.0013, + "step": 253910 + }, + { + "epoch": 1.628564007016576, + "grad_norm": 0.05868707224726677, + "learning_rate": 1.0143546297334878e-06, + "loss": 0.0006, + "step": 253920 + }, + { + "epoch": 1.6286281439103623, + "grad_norm": 0.10828299075365067, + "learning_rate": 1.0140167008028267e-06, + "loss": 0.0005, + "step": 253930 + }, + { + "epoch": 1.6286922808041484, + "grad_norm": 0.09012256562709808, + "learning_rate": 1.013678821819385e-06, + "loss": 0.0009, + "step": 253940 + }, + { + "epoch": 1.6287564176979346, + "grad_norm": 0.1700572669506073, + "learning_rate": 1.0133409927873954e-06, + "loss": 0.0017, + "step": 253950 + }, + { + "epoch": 1.6288205545917207, + "grad_norm": 0.0477653443813324, + "learning_rate": 1.0130032137110935e-06, + "loss": 0.001, + "step": 253960 + }, + { + "epoch": 1.6288846914855066, + "grad_norm": 0.12062278389930725, + "learning_rate": 1.012665484594711e-06, + "loss": 0.0035, + "step": 253970 + }, + { + "epoch": 1.6289488283792928, + "grad_norm": 0.10053012520074844, + "learning_rate": 1.0123278054424784e-06, + "loss": 0.001, + "step": 253980 + }, + { + "epoch": 1.6290129652730787, + "grad_norm": 0.1188049167394638, + "learning_rate": 1.0119901762586298e-06, + "loss": 0.0016, + "step": 253990 + }, + { + "epoch": 1.6290771021668649, + "grad_norm": 0.03905399143695831, + "learning_rate": 1.0116525970473945e-06, + "loss": 0.0008, + "step": 254000 + }, + { + "epoch": 1.629141239060651, + "grad_norm": 0.026444246992468834, + "learning_rate": 1.0113150678130024e-06, + "loss": 0.0006, + "step": 254010 + }, + { + "epoch": 1.6292053759544372, + "grad_norm": 0.08806001394987106, + "learning_rate": 1.0109775885596818e-06, + "loss": 0.0008, + "step": 254020 + }, + { + "epoch": 1.6292695128482233, + "grad_norm": 0.21622851490974426, + "learning_rate": 1.0106401592916644e-06, + "loss": 0.0012, + "step": 254030 + }, + { + "epoch": 1.6293336497420094, + "grad_norm": 0.09526892006397247, + "learning_rate": 1.0103027800131765e-06, + "loss": 0.0009, + "step": 254040 + }, + { + "epoch": 1.6293977866357956, + "grad_norm": 0.17161071300506592, + "learning_rate": 1.0099654507284467e-06, + "loss": 0.0014, + "step": 254050 + }, + { + "epoch": 1.6294619235295815, + "grad_norm": 0.06431101262569427, + "learning_rate": 1.0096281714417e-06, + "loss": 0.0008, + "step": 254060 + }, + { + "epoch": 1.6295260604233677, + "grad_norm": 0.04051023721694946, + "learning_rate": 1.009290942157165e-06, + "loss": 0.0005, + "step": 254070 + }, + { + "epoch": 1.6295901973171536, + "grad_norm": 0.12363533675670624, + "learning_rate": 1.0089537628790675e-06, + "loss": 0.0037, + "step": 254080 + }, + { + "epoch": 1.6296543342109397, + "grad_norm": 0.1416805237531662, + "learning_rate": 1.008616633611631e-06, + "loss": 0.0064, + "step": 254090 + }, + { + "epoch": 1.629718471104726, + "grad_norm": 0.028971588239073753, + "learning_rate": 1.0082795543590796e-06, + "loss": 0.0007, + "step": 254100 + }, + { + "epoch": 1.629782607998512, + "grad_norm": 0.14429962635040283, + "learning_rate": 1.0079425251256397e-06, + "loss": 0.0009, + "step": 254110 + }, + { + "epoch": 1.6298467448922982, + "grad_norm": 0.1125936433672905, + "learning_rate": 1.0076055459155327e-06, + "loss": 0.0025, + "step": 254120 + }, + { + "epoch": 1.6299108817860843, + "grad_norm": 0.15618206560611725, + "learning_rate": 1.0072686167329803e-06, + "loss": 0.0008, + "step": 254130 + }, + { + "epoch": 1.6299750186798703, + "grad_norm": 0.010613231919705868, + "learning_rate": 1.0069317375822073e-06, + "loss": 0.0013, + "step": 254140 + }, + { + "epoch": 1.6300391555736564, + "grad_norm": 0.020671576261520386, + "learning_rate": 1.0065949084674332e-06, + "loss": 0.0008, + "step": 254150 + }, + { + "epoch": 1.6301032924674423, + "grad_norm": 0.13317108154296875, + "learning_rate": 1.0062581293928791e-06, + "loss": 0.002, + "step": 254160 + }, + { + "epoch": 1.6301674293612285, + "grad_norm": 0.1296268254518509, + "learning_rate": 1.0059214003627638e-06, + "loss": 0.0014, + "step": 254170 + }, + { + "epoch": 1.6302315662550146, + "grad_norm": 0.3353487551212311, + "learning_rate": 1.0055847213813085e-06, + "loss": 0.0017, + "step": 254180 + }, + { + "epoch": 1.6302957031488008, + "grad_norm": 0.025075187906622887, + "learning_rate": 1.005248092452732e-06, + "loss": 0.0005, + "step": 254190 + }, + { + "epoch": 1.630359840042587, + "grad_norm": 0.0145181929692626, + "learning_rate": 1.0049115135812514e-06, + "loss": 0.0016, + "step": 254200 + }, + { + "epoch": 1.630423976936373, + "grad_norm": 0.07771790772676468, + "learning_rate": 1.004574984771084e-06, + "loss": 0.0014, + "step": 254210 + }, + { + "epoch": 1.6304881138301592, + "grad_norm": 0.01438713725656271, + "learning_rate": 1.0042385060264486e-06, + "loss": 0.0012, + "step": 254220 + }, + { + "epoch": 1.6305522507239452, + "grad_norm": 0.008997242897748947, + "learning_rate": 1.0039020773515607e-06, + "loss": 0.0019, + "step": 254230 + }, + { + "epoch": 1.6306163876177313, + "grad_norm": 0.004172218032181263, + "learning_rate": 1.0035656987506354e-06, + "loss": 0.0015, + "step": 254240 + }, + { + "epoch": 1.6306805245115172, + "grad_norm": 0.10217877477407455, + "learning_rate": 1.0032293702278866e-06, + "loss": 0.0013, + "step": 254250 + }, + { + "epoch": 1.6307446614053034, + "grad_norm": 0.06913498789072037, + "learning_rate": 1.0028930917875323e-06, + "loss": 0.001, + "step": 254260 + }, + { + "epoch": 1.6308087982990895, + "grad_norm": 0.11332852393388748, + "learning_rate": 1.0025568634337834e-06, + "loss": 0.0013, + "step": 254270 + }, + { + "epoch": 1.6308729351928757, + "grad_norm": 0.09933172911405563, + "learning_rate": 1.0022206851708532e-06, + "loss": 0.0015, + "step": 254280 + }, + { + "epoch": 1.6309370720866618, + "grad_norm": 0.05286799743771553, + "learning_rate": 1.0018845570029562e-06, + "loss": 0.0006, + "step": 254290 + }, + { + "epoch": 1.631001208980448, + "grad_norm": 0.03247305378317833, + "learning_rate": 1.0015484789343027e-06, + "loss": 0.0014, + "step": 254300 + }, + { + "epoch": 1.6310653458742341, + "grad_norm": 0.17723588645458221, + "learning_rate": 1.0012124509691034e-06, + "loss": 0.0016, + "step": 254310 + }, + { + "epoch": 1.63112948276802, + "grad_norm": 0.07294376939535141, + "learning_rate": 1.0008764731115717e-06, + "loss": 0.0025, + "step": 254320 + }, + { + "epoch": 1.6311936196618062, + "grad_norm": 0.21547554433345795, + "learning_rate": 1.000540545365914e-06, + "loss": 0.0008, + "step": 254330 + }, + { + "epoch": 1.6312577565555921, + "grad_norm": 0.06939996778964996, + "learning_rate": 1.000204667736343e-06, + "loss": 0.0014, + "step": 254340 + }, + { + "epoch": 1.6313218934493783, + "grad_norm": 0.14365023374557495, + "learning_rate": 9.998688402270667e-07, + "loss": 0.0012, + "step": 254350 + }, + { + "epoch": 1.6313860303431644, + "grad_norm": 0.004856092389672995, + "learning_rate": 9.995330628422923e-07, + "loss": 0.0008, + "step": 254360 + }, + { + "epoch": 1.6314501672369506, + "grad_norm": 0.046167537569999695, + "learning_rate": 9.99197335586226e-07, + "loss": 0.0011, + "step": 254370 + }, + { + "epoch": 1.6315143041307367, + "grad_norm": 0.15383437275886536, + "learning_rate": 9.988616584630783e-07, + "loss": 0.0025, + "step": 254380 + }, + { + "epoch": 1.6315784410245229, + "grad_norm": 0.016113227233290672, + "learning_rate": 9.985260314770533e-07, + "loss": 0.0008, + "step": 254390 + }, + { + "epoch": 1.6316425779183088, + "grad_norm": 0.19796684384346008, + "learning_rate": 9.981904546323555e-07, + "loss": 0.0031, + "step": 254400 + }, + { + "epoch": 1.631706714812095, + "grad_norm": 0.10615178197622299, + "learning_rate": 9.978549279331934e-07, + "loss": 0.0006, + "step": 254410 + }, + { + "epoch": 1.6317708517058809, + "grad_norm": 0.020054157823324203, + "learning_rate": 9.975194513837687e-07, + "loss": 0.0011, + "step": 254420 + }, + { + "epoch": 1.631834988599667, + "grad_norm": 0.06269645690917969, + "learning_rate": 9.971840249882859e-07, + "loss": 0.0021, + "step": 254430 + }, + { + "epoch": 1.6318991254934532, + "grad_norm": 0.1866074651479721, + "learning_rate": 9.968486487509466e-07, + "loss": 0.0013, + "step": 254440 + }, + { + "epoch": 1.6319632623872393, + "grad_norm": 0.12030469626188278, + "learning_rate": 9.965133226759566e-07, + "loss": 0.0014, + "step": 254450 + }, + { + "epoch": 1.6320273992810255, + "grad_norm": 0.10025019943714142, + "learning_rate": 9.961780467675153e-07, + "loss": 0.001, + "step": 254460 + }, + { + "epoch": 1.6320915361748116, + "grad_norm": 0.07710816711187363, + "learning_rate": 9.95842821029825e-07, + "loss": 0.0006, + "step": 254470 + }, + { + "epoch": 1.6321556730685978, + "grad_norm": 0.08118700981140137, + "learning_rate": 9.955076454670842e-07, + "loss": 0.0011, + "step": 254480 + }, + { + "epoch": 1.6322198099623837, + "grad_norm": 0.001708224997855723, + "learning_rate": 9.951725200834961e-07, + "loss": 0.0008, + "step": 254490 + }, + { + "epoch": 1.6322839468561698, + "grad_norm": 0.1567011922597885, + "learning_rate": 9.94837444883258e-07, + "loss": 0.0009, + "step": 254500 + }, + { + "epoch": 1.6323480837499558, + "grad_norm": 0.0870925784111023, + "learning_rate": 9.9450241987057e-07, + "loss": 0.0015, + "step": 254510 + }, + { + "epoch": 1.632412220643742, + "grad_norm": 0.05215726047754288, + "learning_rate": 9.941674450496276e-07, + "loss": 0.0006, + "step": 254520 + }, + { + "epoch": 1.632476357537528, + "grad_norm": 0.07498734444379807, + "learning_rate": 9.938325204246313e-07, + "loss": 0.001, + "step": 254530 + }, + { + "epoch": 1.6325404944313142, + "grad_norm": 0.10156609863042831, + "learning_rate": 9.934976459997768e-07, + "loss": 0.0022, + "step": 254540 + }, + { + "epoch": 1.6326046313251004, + "grad_norm": 0.01245181169360876, + "learning_rate": 9.931628217792593e-07, + "loss": 0.0014, + "step": 254550 + }, + { + "epoch": 1.6326687682188865, + "grad_norm": 0.12639538943767548, + "learning_rate": 9.928280477672764e-07, + "loss": 0.0009, + "step": 254560 + }, + { + "epoch": 1.6327329051126724, + "grad_norm": 0.01934858411550522, + "learning_rate": 9.924933239680213e-07, + "loss": 0.0026, + "step": 254570 + }, + { + "epoch": 1.6327970420064586, + "grad_norm": 0.006678812671452761, + "learning_rate": 9.921586503856894e-07, + "loss": 0.001, + "step": 254580 + }, + { + "epoch": 1.6328611789002447, + "grad_norm": 0.14333570003509521, + "learning_rate": 9.918240270244727e-07, + "loss": 0.0022, + "step": 254590 + }, + { + "epoch": 1.6329253157940307, + "grad_norm": 0.06659631431102753, + "learning_rate": 9.914894538885671e-07, + "loss": 0.0011, + "step": 254600 + }, + { + "epoch": 1.6329894526878168, + "grad_norm": 0.020249219611287117, + "learning_rate": 9.911549309821632e-07, + "loss": 0.0007, + "step": 254610 + }, + { + "epoch": 1.633053589581603, + "grad_norm": 0.03794463723897934, + "learning_rate": 9.908204583094533e-07, + "loss": 0.0008, + "step": 254620 + }, + { + "epoch": 1.633117726475389, + "grad_norm": 0.07175882905721664, + "learning_rate": 9.904860358746265e-07, + "loss": 0.0028, + "step": 254630 + }, + { + "epoch": 1.6331818633691753, + "grad_norm": 0.00746492063626647, + "learning_rate": 9.90151663681877e-07, + "loss": 0.0006, + "step": 254640 + }, + { + "epoch": 1.6332460002629614, + "grad_norm": 0.019557328894734383, + "learning_rate": 9.898173417353929e-07, + "loss": 0.001, + "step": 254650 + }, + { + "epoch": 1.6333101371567473, + "grad_norm": 0.13451708853244781, + "learning_rate": 9.894830700393637e-07, + "loss": 0.0017, + "step": 254660 + }, + { + "epoch": 1.6333742740505335, + "grad_norm": 0.0878053605556488, + "learning_rate": 9.891488485979762e-07, + "loss": 0.0014, + "step": 254670 + }, + { + "epoch": 1.6334384109443194, + "grad_norm": 0.27910155057907104, + "learning_rate": 9.888146774154217e-07, + "loss": 0.0012, + "step": 254680 + }, + { + "epoch": 1.6335025478381056, + "grad_norm": 0.039628490805625916, + "learning_rate": 9.88480556495886e-07, + "loss": 0.0015, + "step": 254690 + }, + { + "epoch": 1.6335666847318917, + "grad_norm": 0.10816241055727005, + "learning_rate": 9.881464858435547e-07, + "loss": 0.0006, + "step": 254700 + }, + { + "epoch": 1.6336308216256779, + "grad_norm": 0.035373102873563766, + "learning_rate": 9.878124654626165e-07, + "loss": 0.0025, + "step": 254710 + }, + { + "epoch": 1.633694958519464, + "grad_norm": 0.08186127990484238, + "learning_rate": 9.874784953572553e-07, + "loss": 0.0008, + "step": 254720 + }, + { + "epoch": 1.6337590954132502, + "grad_norm": 0.004696046933531761, + "learning_rate": 9.871445755316562e-07, + "loss": 0.0011, + "step": 254730 + }, + { + "epoch": 1.6338232323070363, + "grad_norm": 0.054276540875434875, + "learning_rate": 9.868107059900024e-07, + "loss": 0.0013, + "step": 254740 + }, + { + "epoch": 1.6338873692008222, + "grad_norm": 0.03867164999246597, + "learning_rate": 9.8647688673648e-07, + "loss": 0.001, + "step": 254750 + }, + { + "epoch": 1.6339515060946084, + "grad_norm": 0.07352355122566223, + "learning_rate": 9.861431177752706e-07, + "loss": 0.0012, + "step": 254760 + }, + { + "epoch": 1.6340156429883943, + "grad_norm": 0.05755196884274483, + "learning_rate": 9.858093991105567e-07, + "loss": 0.0009, + "step": 254770 + }, + { + "epoch": 1.6340797798821804, + "grad_norm": 0.08385112136602402, + "learning_rate": 9.854757307465185e-07, + "loss": 0.0011, + "step": 254780 + }, + { + "epoch": 1.6341439167759666, + "grad_norm": 0.10787700861692429, + "learning_rate": 9.851421126873395e-07, + "loss": 0.0018, + "step": 254790 + }, + { + "epoch": 1.6342080536697527, + "grad_norm": 0.051003411412239075, + "learning_rate": 9.848085449371998e-07, + "loss": 0.0012, + "step": 254800 + }, + { + "epoch": 1.634272190563539, + "grad_norm": 0.28282269835472107, + "learning_rate": 9.844750275002784e-07, + "loss": 0.0013, + "step": 254810 + }, + { + "epoch": 1.634336327457325, + "grad_norm": 0.059521906077861786, + "learning_rate": 9.841415603807536e-07, + "loss": 0.001, + "step": 254820 + }, + { + "epoch": 1.634400464351111, + "grad_norm": 0.11180324852466583, + "learning_rate": 9.838081435828061e-07, + "loss": 0.0006, + "step": 254830 + }, + { + "epoch": 1.6344646012448971, + "grad_norm": 0.02087988890707493, + "learning_rate": 9.834747771106128e-07, + "loss": 0.001, + "step": 254840 + }, + { + "epoch": 1.634528738138683, + "grad_norm": 0.042987458407878876, + "learning_rate": 9.831414609683503e-07, + "loss": 0.0013, + "step": 254850 + }, + { + "epoch": 1.6345928750324692, + "grad_norm": 0.08517789840698242, + "learning_rate": 9.82808195160197e-07, + "loss": 0.0013, + "step": 254860 + }, + { + "epoch": 1.6346570119262553, + "grad_norm": 0.07343614101409912, + "learning_rate": 9.824749796903281e-07, + "loss": 0.001, + "step": 254870 + }, + { + "epoch": 1.6347211488200415, + "grad_norm": 0.04992000758647919, + "learning_rate": 9.821418145629196e-07, + "loss": 0.0008, + "step": 254880 + }, + { + "epoch": 1.6347852857138276, + "grad_norm": 0.10211850702762604, + "learning_rate": 9.818086997821436e-07, + "loss": 0.0007, + "step": 254890 + }, + { + "epoch": 1.6348494226076138, + "grad_norm": 0.0167512446641922, + "learning_rate": 9.814756353521782e-07, + "loss": 0.0013, + "step": 254900 + }, + { + "epoch": 1.6349135595014, + "grad_norm": 0.14388491213321686, + "learning_rate": 9.811426212771946e-07, + "loss": 0.0006, + "step": 254910 + }, + { + "epoch": 1.6349776963951859, + "grad_norm": 0.021333513781428337, + "learning_rate": 9.808096575613668e-07, + "loss": 0.0006, + "step": 254920 + }, + { + "epoch": 1.635041833288972, + "grad_norm": 0.0028631179593503475, + "learning_rate": 9.804767442088647e-07, + "loss": 0.001, + "step": 254930 + }, + { + "epoch": 1.635105970182758, + "grad_norm": 0.052888594567775726, + "learning_rate": 9.801438812238627e-07, + "loss": 0.0007, + "step": 254940 + }, + { + "epoch": 1.635170107076544, + "grad_norm": 0.133683443069458, + "learning_rate": 9.798110686105317e-07, + "loss": 0.0009, + "step": 254950 + }, + { + "epoch": 1.6352342439703302, + "grad_norm": 0.07119552791118622, + "learning_rate": 9.794783063730407e-07, + "loss": 0.0009, + "step": 254960 + }, + { + "epoch": 1.6352983808641164, + "grad_norm": 0.11453062295913696, + "learning_rate": 9.791455945155588e-07, + "loss": 0.0008, + "step": 254970 + }, + { + "epoch": 1.6353625177579025, + "grad_norm": 0.04229366406798363, + "learning_rate": 9.788129330422568e-07, + "loss": 0.0008, + "step": 254980 + }, + { + "epoch": 1.6354266546516887, + "grad_norm": 0.1479012817144394, + "learning_rate": 9.784803219573036e-07, + "loss": 0.0022, + "step": 254990 + }, + { + "epoch": 1.6354907915454746, + "grad_norm": 0.03219074010848999, + "learning_rate": 9.781477612648644e-07, + "loss": 0.0007, + "step": 255000 + }, + { + "epoch": 1.6355549284392608, + "grad_norm": 0.05763757973909378, + "learning_rate": 9.778152509691097e-07, + "loss": 0.0006, + "step": 255010 + }, + { + "epoch": 1.635619065333047, + "grad_norm": 0.04857158660888672, + "learning_rate": 9.77482791074204e-07, + "loss": 0.0008, + "step": 255020 + }, + { + "epoch": 1.6356832022268328, + "grad_norm": 0.08321210741996765, + "learning_rate": 9.771503815843143e-07, + "loss": 0.0015, + "step": 255030 + }, + { + "epoch": 1.635747339120619, + "grad_norm": 0.01625431887805462, + "learning_rate": 9.76818022503604e-07, + "loss": 0.0012, + "step": 255040 + }, + { + "epoch": 1.6358114760144051, + "grad_norm": 0.1378721445798874, + "learning_rate": 9.764857138362405e-07, + "loss": 0.0012, + "step": 255050 + }, + { + "epoch": 1.6358756129081913, + "grad_norm": 0.036976706236600876, + "learning_rate": 9.76153455586386e-07, + "loss": 0.0012, + "step": 255060 + }, + { + "epoch": 1.6359397498019774, + "grad_norm": 0.005930387880653143, + "learning_rate": 9.758212477582052e-07, + "loss": 0.0005, + "step": 255070 + }, + { + "epoch": 1.6360038866957636, + "grad_norm": 0.09951237589120865, + "learning_rate": 9.75489090355859e-07, + "loss": 0.001, + "step": 255080 + }, + { + "epoch": 1.6360680235895495, + "grad_norm": 0.005069872364401817, + "learning_rate": 9.7515698338351e-07, + "loss": 0.0007, + "step": 255090 + }, + { + "epoch": 1.6361321604833357, + "grad_norm": 0.01948101818561554, + "learning_rate": 9.748249268453225e-07, + "loss": 0.001, + "step": 255100 + }, + { + "epoch": 1.6361962973771216, + "grad_norm": 0.0026461135130375624, + "learning_rate": 9.744929207454552e-07, + "loss": 0.0017, + "step": 255110 + }, + { + "epoch": 1.6362604342709077, + "grad_norm": 0.05868653580546379, + "learning_rate": 9.741609650880674e-07, + "loss": 0.0016, + "step": 255120 + }, + { + "epoch": 1.6363245711646939, + "grad_norm": 0.2810801565647125, + "learning_rate": 9.738290598773209e-07, + "loss": 0.0011, + "step": 255130 + }, + { + "epoch": 1.63638870805848, + "grad_norm": 0.01811443269252777, + "learning_rate": 9.734972051173742e-07, + "loss": 0.0017, + "step": 255140 + }, + { + "epoch": 1.6364528449522662, + "grad_norm": 0.15347135066986084, + "learning_rate": 9.731654008123848e-07, + "loss": 0.001, + "step": 255150 + }, + { + "epoch": 1.6365169818460523, + "grad_norm": 0.041657716035842896, + "learning_rate": 9.728336469665096e-07, + "loss": 0.0019, + "step": 255160 + }, + { + "epoch": 1.6365811187398385, + "grad_norm": 0.05868009477853775, + "learning_rate": 9.725019435839085e-07, + "loss": 0.0015, + "step": 255170 + }, + { + "epoch": 1.6366452556336244, + "grad_norm": 0.1378551423549652, + "learning_rate": 9.721702906687363e-07, + "loss": 0.0009, + "step": 255180 + }, + { + "epoch": 1.6367093925274105, + "grad_norm": 0.015106326900422573, + "learning_rate": 9.71838688225149e-07, + "loss": 0.001, + "step": 255190 + }, + { + "epoch": 1.6367735294211965, + "grad_norm": 0.12061250954866409, + "learning_rate": 9.715071362573002e-07, + "loss": 0.0011, + "step": 255200 + }, + { + "epoch": 1.6368376663149826, + "grad_norm": 0.05114997923374176, + "learning_rate": 9.71175634769348e-07, + "loss": 0.0009, + "step": 255210 + }, + { + "epoch": 1.6369018032087688, + "grad_norm": 0.009737277403473854, + "learning_rate": 9.708441837654441e-07, + "loss": 0.0013, + "step": 255220 + }, + { + "epoch": 1.636965940102555, + "grad_norm": 0.08145773410797119, + "learning_rate": 9.705127832497423e-07, + "loss": 0.0009, + "step": 255230 + }, + { + "epoch": 1.637030076996341, + "grad_norm": 0.3063494861125946, + "learning_rate": 9.701814332263943e-07, + "loss": 0.0017, + "step": 255240 + }, + { + "epoch": 1.6370942138901272, + "grad_norm": 0.09998942911624908, + "learning_rate": 9.698501336995536e-07, + "loss": 0.002, + "step": 255250 + }, + { + "epoch": 1.6371583507839131, + "grad_norm": 0.11387070268392563, + "learning_rate": 9.695188846733712e-07, + "loss": 0.0015, + "step": 255260 + }, + { + "epoch": 1.6372224876776993, + "grad_norm": 0.043187957257032394, + "learning_rate": 9.691876861519967e-07, + "loss": 0.0024, + "step": 255270 + }, + { + "epoch": 1.6372866245714852, + "grad_norm": 0.12112016975879669, + "learning_rate": 9.68856538139582e-07, + "loss": 0.0012, + "step": 255280 + }, + { + "epoch": 1.6373507614652714, + "grad_norm": 0.03679222613573074, + "learning_rate": 9.685254406402767e-07, + "loss": 0.0009, + "step": 255290 + }, + { + "epoch": 1.6374148983590575, + "grad_norm": 0.033384256064891815, + "learning_rate": 9.681943936582282e-07, + "loss": 0.0008, + "step": 255300 + }, + { + "epoch": 1.6374790352528437, + "grad_norm": 0.07566209882497787, + "learning_rate": 9.678633971975842e-07, + "loss": 0.0013, + "step": 255310 + }, + { + "epoch": 1.6375431721466298, + "grad_norm": 0.1315520852804184, + "learning_rate": 9.67532451262495e-07, + "loss": 0.0024, + "step": 255320 + }, + { + "epoch": 1.637607309040416, + "grad_norm": 0.042651720345020294, + "learning_rate": 9.672015558571063e-07, + "loss": 0.0013, + "step": 255330 + }, + { + "epoch": 1.637671445934202, + "grad_norm": 0.11850108206272125, + "learning_rate": 9.668707109855646e-07, + "loss": 0.0015, + "step": 255340 + }, + { + "epoch": 1.637735582827988, + "grad_norm": 0.06423966586589813, + "learning_rate": 9.665399166520135e-07, + "loss": 0.0006, + "step": 255350 + }, + { + "epoch": 1.6377997197217742, + "grad_norm": 0.04013515263795853, + "learning_rate": 9.66209172860601e-07, + "loss": 0.0007, + "step": 255360 + }, + { + "epoch": 1.63786385661556, + "grad_norm": 0.1369294822216034, + "learning_rate": 9.658784796154714e-07, + "loss": 0.0017, + "step": 255370 + }, + { + "epoch": 1.6379279935093463, + "grad_norm": 0.04467201605439186, + "learning_rate": 9.655478369207665e-07, + "loss": 0.0018, + "step": 255380 + }, + { + "epoch": 1.6379921304031324, + "grad_norm": 0.01031948160380125, + "learning_rate": 9.652172447806302e-07, + "loss": 0.001, + "step": 255390 + }, + { + "epoch": 1.6380562672969186, + "grad_norm": 0.03769548982381821, + "learning_rate": 9.64886703199206e-07, + "loss": 0.0009, + "step": 255400 + }, + { + "epoch": 1.6381204041907047, + "grad_norm": 0.02971324324607849, + "learning_rate": 9.645562121806357e-07, + "loss": 0.0011, + "step": 255410 + }, + { + "epoch": 1.6381845410844909, + "grad_norm": 0.08204713463783264, + "learning_rate": 9.642257717290586e-07, + "loss": 0.0017, + "step": 255420 + }, + { + "epoch": 1.6382486779782768, + "grad_norm": 0.0884409248828888, + "learning_rate": 9.638953818486185e-07, + "loss": 0.0009, + "step": 255430 + }, + { + "epoch": 1.638312814872063, + "grad_norm": 0.029870890080928802, + "learning_rate": 9.635650425434534e-07, + "loss": 0.001, + "step": 255440 + }, + { + "epoch": 1.638376951765849, + "grad_norm": 0.05527724698185921, + "learning_rate": 9.632347538177033e-07, + "loss": 0.0015, + "step": 255450 + }, + { + "epoch": 1.638441088659635, + "grad_norm": 0.013997524045407772, + "learning_rate": 9.629045156755057e-07, + "loss": 0.0013, + "step": 255460 + }, + { + "epoch": 1.6385052255534212, + "grad_norm": 0.04378235340118408, + "learning_rate": 9.62574328121001e-07, + "loss": 0.0007, + "step": 255470 + }, + { + "epoch": 1.6385693624472073, + "grad_norm": 0.014343179762363434, + "learning_rate": 9.622441911583253e-07, + "loss": 0.0012, + "step": 255480 + }, + { + "epoch": 1.6386334993409934, + "grad_norm": 0.05192306637763977, + "learning_rate": 9.619141047916158e-07, + "loss": 0.0007, + "step": 255490 + }, + { + "epoch": 1.6386976362347796, + "grad_norm": 0.016144398599863052, + "learning_rate": 9.615840690250073e-07, + "loss": 0.0012, + "step": 255500 + }, + { + "epoch": 1.6387617731285657, + "grad_norm": 0.04556126892566681, + "learning_rate": 9.612540838626378e-07, + "loss": 0.0019, + "step": 255510 + }, + { + "epoch": 1.6388259100223517, + "grad_norm": 0.015942873433232307, + "learning_rate": 9.609241493086413e-07, + "loss": 0.0014, + "step": 255520 + }, + { + "epoch": 1.6388900469161378, + "grad_norm": 0.049452684819698334, + "learning_rate": 9.605942653671503e-07, + "loss": 0.0008, + "step": 255530 + }, + { + "epoch": 1.6389541838099237, + "grad_norm": 0.25146812200546265, + "learning_rate": 9.602644320423011e-07, + "loss": 0.0013, + "step": 255540 + }, + { + "epoch": 1.63901832070371, + "grad_norm": 0.11109595745801926, + "learning_rate": 9.59934649338226e-07, + "loss": 0.0012, + "step": 255550 + }, + { + "epoch": 1.639082457597496, + "grad_norm": 0.09703154861927032, + "learning_rate": 9.596049172590577e-07, + "loss": 0.0009, + "step": 255560 + }, + { + "epoch": 1.6391465944912822, + "grad_norm": 0.07881749421358109, + "learning_rate": 9.592752358089253e-07, + "loss": 0.0018, + "step": 255570 + }, + { + "epoch": 1.6392107313850683, + "grad_norm": 0.02280495874583721, + "learning_rate": 9.589456049919638e-07, + "loss": 0.0006, + "step": 255580 + }, + { + "epoch": 1.6392748682788545, + "grad_norm": 0.070301353931427, + "learning_rate": 9.586160248123015e-07, + "loss": 0.0015, + "step": 255590 + }, + { + "epoch": 1.6393390051726406, + "grad_norm": 0.034235622733831406, + "learning_rate": 9.582864952740694e-07, + "loss": 0.0012, + "step": 255600 + }, + { + "epoch": 1.6394031420664266, + "grad_norm": 0.010166875086724758, + "learning_rate": 9.579570163813945e-07, + "loss": 0.0004, + "step": 255610 + }, + { + "epoch": 1.6394672789602127, + "grad_norm": 0.06755946576595306, + "learning_rate": 9.576275881384084e-07, + "loss": 0.0011, + "step": 255620 + }, + { + "epoch": 1.6395314158539986, + "grad_norm": 0.022154757753014565, + "learning_rate": 9.572982105492373e-07, + "loss": 0.0009, + "step": 255630 + }, + { + "epoch": 1.6395955527477848, + "grad_norm": 0.12332911789417267, + "learning_rate": 9.56968883618009e-07, + "loss": 0.001, + "step": 255640 + }, + { + "epoch": 1.639659689641571, + "grad_norm": 0.02179088443517685, + "learning_rate": 9.566396073488487e-07, + "loss": 0.0017, + "step": 255650 + }, + { + "epoch": 1.639723826535357, + "grad_norm": 0.12347091734409332, + "learning_rate": 9.563103817458857e-07, + "loss": 0.0028, + "step": 255660 + }, + { + "epoch": 1.6397879634291432, + "grad_norm": 0.010825304314494133, + "learning_rate": 9.559812068132434e-07, + "loss": 0.0012, + "step": 255670 + }, + { + "epoch": 1.6398521003229294, + "grad_norm": 0.1055217906832695, + "learning_rate": 9.556520825550453e-07, + "loss": 0.0012, + "step": 255680 + }, + { + "epoch": 1.6399162372167153, + "grad_norm": 0.02588566020131111, + "learning_rate": 9.553230089754184e-07, + "loss": 0.0013, + "step": 255690 + }, + { + "epoch": 1.6399803741105015, + "grad_norm": 0.05414152890443802, + "learning_rate": 9.549939860784852e-07, + "loss": 0.0013, + "step": 255700 + }, + { + "epoch": 1.6400445110042874, + "grad_norm": 0.11356854438781738, + "learning_rate": 9.546650138683678e-07, + "loss": 0.0014, + "step": 255710 + }, + { + "epoch": 1.6401086478980735, + "grad_norm": 0.11140979826450348, + "learning_rate": 9.543360923491878e-07, + "loss": 0.0019, + "step": 255720 + }, + { + "epoch": 1.6401727847918597, + "grad_norm": 0.03561192378401756, + "learning_rate": 9.540072215250689e-07, + "loss": 0.0008, + "step": 255730 + }, + { + "epoch": 1.6402369216856458, + "grad_norm": 0.26978716254234314, + "learning_rate": 9.536784014001316e-07, + "loss": 0.0016, + "step": 255740 + }, + { + "epoch": 1.640301058579432, + "grad_norm": 0.0954686626791954, + "learning_rate": 9.533496319784957e-07, + "loss": 0.0011, + "step": 255750 + }, + { + "epoch": 1.6403651954732181, + "grad_norm": 0.021966198459267616, + "learning_rate": 9.530209132642799e-07, + "loss": 0.002, + "step": 255760 + }, + { + "epoch": 1.6404293323670043, + "grad_norm": 0.1717972755432129, + "learning_rate": 9.526922452616055e-07, + "loss": 0.0009, + "step": 255770 + }, + { + "epoch": 1.6404934692607902, + "grad_norm": 0.12879596650600433, + "learning_rate": 9.523636279745901e-07, + "loss": 0.0011, + "step": 255780 + }, + { + "epoch": 1.6405576061545764, + "grad_norm": 0.05737556889653206, + "learning_rate": 9.520350614073509e-07, + "loss": 0.0011, + "step": 255790 + }, + { + "epoch": 1.6406217430483623, + "grad_norm": 0.10769285261631012, + "learning_rate": 9.51706545564004e-07, + "loss": 0.0009, + "step": 255800 + }, + { + "epoch": 1.6406858799421484, + "grad_norm": 0.06674765050411224, + "learning_rate": 9.513780804486688e-07, + "loss": 0.0007, + "step": 255810 + }, + { + "epoch": 1.6407500168359346, + "grad_norm": 0.18882626295089722, + "learning_rate": 9.5104966606546e-07, + "loss": 0.0025, + "step": 255820 + }, + { + "epoch": 1.6408141537297207, + "grad_norm": 0.3161608576774597, + "learning_rate": 9.507213024184914e-07, + "loss": 0.0015, + "step": 255830 + }, + { + "epoch": 1.6408782906235069, + "grad_norm": 0.05102437362074852, + "learning_rate": 9.503929895118796e-07, + "loss": 0.0031, + "step": 255840 + }, + { + "epoch": 1.640942427517293, + "grad_norm": 0.14979737997055054, + "learning_rate": 9.500647273497371e-07, + "loss": 0.0011, + "step": 255850 + }, + { + "epoch": 1.6410065644110792, + "grad_norm": 0.13412176072597504, + "learning_rate": 9.497365159361788e-07, + "loss": 0.0011, + "step": 255860 + }, + { + "epoch": 1.641070701304865, + "grad_norm": 0.025658756494522095, + "learning_rate": 9.494083552753169e-07, + "loss": 0.0028, + "step": 255870 + }, + { + "epoch": 1.6411348381986512, + "grad_norm": 0.05824677646160126, + "learning_rate": 9.490802453712622e-07, + "loss": 0.0004, + "step": 255880 + }, + { + "epoch": 1.6411989750924372, + "grad_norm": 0.07874336838722229, + "learning_rate": 9.487521862281279e-07, + "loss": 0.0011, + "step": 255890 + }, + { + "epoch": 1.6412631119862233, + "grad_norm": 0.3560166656970978, + "learning_rate": 9.484241778500247e-07, + "loss": 0.0011, + "step": 255900 + }, + { + "epoch": 1.6413272488800095, + "grad_norm": 0.03607160225510597, + "learning_rate": 9.480962202410615e-07, + "loss": 0.0008, + "step": 255910 + }, + { + "epoch": 1.6413913857737956, + "grad_norm": 0.13168150186538696, + "learning_rate": 9.477683134053478e-07, + "loss": 0.0017, + "step": 255920 + }, + { + "epoch": 1.6414555226675818, + "grad_norm": 0.16879135370254517, + "learning_rate": 9.474404573469942e-07, + "loss": 0.0008, + "step": 255930 + }, + { + "epoch": 1.641519659561368, + "grad_norm": 0.021642599254846573, + "learning_rate": 9.471126520701079e-07, + "loss": 0.0011, + "step": 255940 + }, + { + "epoch": 1.6415837964551538, + "grad_norm": 0.044197067618370056, + "learning_rate": 9.467848975787958e-07, + "loss": 0.001, + "step": 255950 + }, + { + "epoch": 1.64164793334894, + "grad_norm": 0.09164709597826004, + "learning_rate": 9.464571938771666e-07, + "loss": 0.0031, + "step": 255960 + }, + { + "epoch": 1.641712070242726, + "grad_norm": 0.03152585029602051, + "learning_rate": 9.461295409693261e-07, + "loss": 0.0011, + "step": 255970 + }, + { + "epoch": 1.641776207136512, + "grad_norm": 0.11910726130008698, + "learning_rate": 9.458019388593792e-07, + "loss": 0.0011, + "step": 255980 + }, + { + "epoch": 1.6418403440302982, + "grad_norm": 0.12369369715452194, + "learning_rate": 9.454743875514305e-07, + "loss": 0.001, + "step": 255990 + }, + { + "epoch": 1.6419044809240844, + "grad_norm": 0.09841001033782959, + "learning_rate": 9.451468870495867e-07, + "loss": 0.0011, + "step": 256000 + }, + { + "epoch": 1.6419686178178705, + "grad_norm": 0.06547404080629349, + "learning_rate": 9.448194373579505e-07, + "loss": 0.0011, + "step": 256010 + }, + { + "epoch": 1.6420327547116567, + "grad_norm": 0.07814065366983414, + "learning_rate": 9.44492038480625e-07, + "loss": 0.0006, + "step": 256020 + }, + { + "epoch": 1.6420968916054428, + "grad_norm": 0.06707212328910828, + "learning_rate": 9.441646904217112e-07, + "loss": 0.0016, + "step": 256030 + }, + { + "epoch": 1.6421610284992287, + "grad_norm": 0.16191032528877258, + "learning_rate": 9.43837393185314e-07, + "loss": 0.0017, + "step": 256040 + }, + { + "epoch": 1.6422251653930149, + "grad_norm": 0.2640500068664551, + "learning_rate": 9.435101467755331e-07, + "loss": 0.0011, + "step": 256050 + }, + { + "epoch": 1.6422893022868008, + "grad_norm": 0.032831743359565735, + "learning_rate": 9.431829511964691e-07, + "loss": 0.0009, + "step": 256060 + }, + { + "epoch": 1.642353439180587, + "grad_norm": 0.07627349346876144, + "learning_rate": 9.428558064522208e-07, + "loss": 0.0019, + "step": 256070 + }, + { + "epoch": 1.642417576074373, + "grad_norm": 0.01395474374294281, + "learning_rate": 9.425287125468902e-07, + "loss": 0.0014, + "step": 256080 + }, + { + "epoch": 1.6424817129681593, + "grad_norm": 0.07457228749990463, + "learning_rate": 9.422016694845743e-07, + "loss": 0.0009, + "step": 256090 + }, + { + "epoch": 1.6425458498619454, + "grad_norm": 0.08780708909034729, + "learning_rate": 9.418746772693704e-07, + "loss": 0.0011, + "step": 256100 + }, + { + "epoch": 1.6426099867557316, + "grad_norm": 0.4448014497756958, + "learning_rate": 9.415477359053787e-07, + "loss": 0.0033, + "step": 256110 + }, + { + "epoch": 1.6426741236495175, + "grad_norm": 0.1905025988817215, + "learning_rate": 9.412208453966942e-07, + "loss": 0.0014, + "step": 256120 + }, + { + "epoch": 1.6427382605433036, + "grad_norm": 0.03435979783535004, + "learning_rate": 9.408940057474136e-07, + "loss": 0.0009, + "step": 256130 + }, + { + "epoch": 1.6428023974370898, + "grad_norm": 0.07999928295612335, + "learning_rate": 9.405672169616304e-07, + "loss": 0.0017, + "step": 256140 + }, + { + "epoch": 1.6428665343308757, + "grad_norm": 0.0338425487279892, + "learning_rate": 9.402404790434427e-07, + "loss": 0.0013, + "step": 256150 + }, + { + "epoch": 1.6429306712246619, + "grad_norm": 0.07838539034128189, + "learning_rate": 9.399137919969431e-07, + "loss": 0.0007, + "step": 256160 + }, + { + "epoch": 1.642994808118448, + "grad_norm": 0.02995946630835533, + "learning_rate": 9.395871558262254e-07, + "loss": 0.0009, + "step": 256170 + }, + { + "epoch": 1.6430589450122342, + "grad_norm": 0.01181158795952797, + "learning_rate": 9.392605705353813e-07, + "loss": 0.001, + "step": 256180 + }, + { + "epoch": 1.6431230819060203, + "grad_norm": 0.24913929402828217, + "learning_rate": 9.389340361285059e-07, + "loss": 0.0022, + "step": 256190 + }, + { + "epoch": 1.6431872187998064, + "grad_norm": 0.1120237410068512, + "learning_rate": 9.386075526096888e-07, + "loss": 0.0036, + "step": 256200 + }, + { + "epoch": 1.6432513556935924, + "grad_norm": 0.17576159536838531, + "learning_rate": 9.382811199830227e-07, + "loss": 0.0013, + "step": 256210 + }, + { + "epoch": 1.6433154925873785, + "grad_norm": 0.09315807372331619, + "learning_rate": 9.37954738252595e-07, + "loss": 0.0014, + "step": 256220 + }, + { + "epoch": 1.6433796294811645, + "grad_norm": 0.15503202378749847, + "learning_rate": 9.376284074224989e-07, + "loss": 0.001, + "step": 256230 + }, + { + "epoch": 1.6434437663749506, + "grad_norm": 0.13876628875732422, + "learning_rate": 9.373021274968225e-07, + "loss": 0.0015, + "step": 256240 + }, + { + "epoch": 1.6435079032687367, + "grad_norm": 0.1393558830022812, + "learning_rate": 9.369758984796523e-07, + "loss": 0.001, + "step": 256250 + }, + { + "epoch": 1.643572040162523, + "grad_norm": 0.04984534904360771, + "learning_rate": 9.366497203750796e-07, + "loss": 0.0007, + "step": 256260 + }, + { + "epoch": 1.643636177056309, + "grad_norm": 0.08660076558589935, + "learning_rate": 9.3632359318719e-07, + "loss": 0.001, + "step": 256270 + }, + { + "epoch": 1.6437003139500952, + "grad_norm": 0.055148929357528687, + "learning_rate": 9.359975169200696e-07, + "loss": 0.0007, + "step": 256280 + }, + { + "epoch": 1.6437644508438813, + "grad_norm": 0.13627982139587402, + "learning_rate": 9.356714915778037e-07, + "loss": 0.0031, + "step": 256290 + }, + { + "epoch": 1.6438285877376673, + "grad_norm": 0.08665461093187332, + "learning_rate": 9.353455171644798e-07, + "loss": 0.0005, + "step": 256300 + }, + { + "epoch": 1.6438927246314534, + "grad_norm": 0.08246444910764694, + "learning_rate": 9.350195936841822e-07, + "loss": 0.0012, + "step": 256310 + }, + { + "epoch": 1.6439568615252393, + "grad_norm": 0.04495377838611603, + "learning_rate": 9.346937211409939e-07, + "loss": 0.0005, + "step": 256320 + }, + { + "epoch": 1.6440209984190255, + "grad_norm": 0.06548351049423218, + "learning_rate": 9.34367899538997e-07, + "loss": 0.001, + "step": 256330 + }, + { + "epoch": 1.6440851353128116, + "grad_norm": 0.09427938610315323, + "learning_rate": 9.340421288822782e-07, + "loss": 0.0015, + "step": 256340 + }, + { + "epoch": 1.6441492722065978, + "grad_norm": 0.07400583475828171, + "learning_rate": 9.337164091749168e-07, + "loss": 0.0006, + "step": 256350 + }, + { + "epoch": 1.644213409100384, + "grad_norm": 0.19087454676628113, + "learning_rate": 9.333907404209947e-07, + "loss": 0.0019, + "step": 256360 + }, + { + "epoch": 1.64427754599417, + "grad_norm": 0.03530288115143776, + "learning_rate": 9.330651226245924e-07, + "loss": 0.003, + "step": 256370 + }, + { + "epoch": 1.644341682887956, + "grad_norm": 0.04272129759192467, + "learning_rate": 9.327395557897911e-07, + "loss": 0.0018, + "step": 256380 + }, + { + "epoch": 1.6444058197817422, + "grad_norm": 0.1905973255634308, + "learning_rate": 9.32414039920671e-07, + "loss": 0.0014, + "step": 256390 + }, + { + "epoch": 1.644469956675528, + "grad_norm": 0.004057542886584997, + "learning_rate": 9.320885750213083e-07, + "loss": 0.0008, + "step": 256400 + }, + { + "epoch": 1.6445340935693142, + "grad_norm": 0.02324562892317772, + "learning_rate": 9.317631610957839e-07, + "loss": 0.0014, + "step": 256410 + }, + { + "epoch": 1.6445982304631004, + "grad_norm": 0.008663343265652657, + "learning_rate": 9.314377981481754e-07, + "loss": 0.001, + "step": 256420 + }, + { + "epoch": 1.6446623673568865, + "grad_norm": 0.06866946071386337, + "learning_rate": 9.311124861825582e-07, + "loss": 0.0013, + "step": 256430 + }, + { + "epoch": 1.6447265042506727, + "grad_norm": 0.13391518592834473, + "learning_rate": 9.307872252030087e-07, + "loss": 0.0007, + "step": 256440 + }, + { + "epoch": 1.6447906411444588, + "grad_norm": 0.013321959413588047, + "learning_rate": 9.304620152136052e-07, + "loss": 0.0013, + "step": 256450 + }, + { + "epoch": 1.644854778038245, + "grad_norm": 0.055423554033041, + "learning_rate": 9.301368562184204e-07, + "loss": 0.0008, + "step": 256460 + }, + { + "epoch": 1.644918914932031, + "grad_norm": 0.11180834472179413, + "learning_rate": 9.298117482215296e-07, + "loss": 0.0011, + "step": 256470 + }, + { + "epoch": 1.644983051825817, + "grad_norm": 0.34718215465545654, + "learning_rate": 9.294866912270056e-07, + "loss": 0.0018, + "step": 256480 + }, + { + "epoch": 1.645047188719603, + "grad_norm": 0.02721422351896763, + "learning_rate": 9.291616852389235e-07, + "loss": 0.0005, + "step": 256490 + }, + { + "epoch": 1.6451113256133891, + "grad_norm": 0.006650662515312433, + "learning_rate": 9.288367302613549e-07, + "loss": 0.0012, + "step": 256500 + }, + { + "epoch": 1.6451754625071753, + "grad_norm": 0.062067050486803055, + "learning_rate": 9.285118262983717e-07, + "loss": 0.0009, + "step": 256510 + }, + { + "epoch": 1.6452395994009614, + "grad_norm": 0.10535760223865509, + "learning_rate": 9.281869733540443e-07, + "loss": 0.0012, + "step": 256520 + }, + { + "epoch": 1.6453037362947476, + "grad_norm": 0.06388656049966812, + "learning_rate": 9.278621714324449e-07, + "loss": 0.001, + "step": 256530 + }, + { + "epoch": 1.6453678731885337, + "grad_norm": 0.10174249112606049, + "learning_rate": 9.275374205376431e-07, + "loss": 0.0014, + "step": 256540 + }, + { + "epoch": 1.6454320100823197, + "grad_norm": 0.11160119622945786, + "learning_rate": 9.272127206737064e-07, + "loss": 0.0008, + "step": 256550 + }, + { + "epoch": 1.6454961469761058, + "grad_norm": 0.08544647693634033, + "learning_rate": 9.268880718447066e-07, + "loss": 0.001, + "step": 256560 + }, + { + "epoch": 1.645560283869892, + "grad_norm": 0.07638426870107651, + "learning_rate": 9.265634740547103e-07, + "loss": 0.0006, + "step": 256570 + }, + { + "epoch": 1.6456244207636779, + "grad_norm": 0.004329252522438765, + "learning_rate": 9.262389273077849e-07, + "loss": 0.0009, + "step": 256580 + }, + { + "epoch": 1.645688557657464, + "grad_norm": 0.002356065670028329, + "learning_rate": 9.25914431607996e-07, + "loss": 0.0007, + "step": 256590 + }, + { + "epoch": 1.6457526945512502, + "grad_norm": 0.01471665408462286, + "learning_rate": 9.255899869594121e-07, + "loss": 0.0015, + "step": 256600 + }, + { + "epoch": 1.6458168314450363, + "grad_norm": 0.053834833204746246, + "learning_rate": 9.252655933660964e-07, + "loss": 0.0011, + "step": 256610 + }, + { + "epoch": 1.6458809683388225, + "grad_norm": 0.17222963273525238, + "learning_rate": 9.249412508321159e-07, + "loss": 0.0024, + "step": 256620 + }, + { + "epoch": 1.6459451052326086, + "grad_norm": 0.10242783278226852, + "learning_rate": 9.246169593615345e-07, + "loss": 0.0007, + "step": 256630 + }, + { + "epoch": 1.6460092421263945, + "grad_norm": 0.11901787668466568, + "learning_rate": 9.242927189584139e-07, + "loss": 0.0007, + "step": 256640 + }, + { + "epoch": 1.6460733790201807, + "grad_norm": 0.030185380950570107, + "learning_rate": 9.239685296268191e-07, + "loss": 0.001, + "step": 256650 + }, + { + "epoch": 1.6461375159139666, + "grad_norm": 0.07891599088907242, + "learning_rate": 9.236443913708121e-07, + "loss": 0.0008, + "step": 256660 + }, + { + "epoch": 1.6462016528077528, + "grad_norm": 0.0033202078193426132, + "learning_rate": 9.233203041944533e-07, + "loss": 0.0011, + "step": 256670 + }, + { + "epoch": 1.646265789701539, + "grad_norm": 0.23745547235012054, + "learning_rate": 9.229962681018051e-07, + "loss": 0.0014, + "step": 256680 + }, + { + "epoch": 1.646329926595325, + "grad_norm": 0.016615508124232292, + "learning_rate": 9.226722830969281e-07, + "loss": 0.0008, + "step": 256690 + }, + { + "epoch": 1.6463940634891112, + "grad_norm": 0.09162529557943344, + "learning_rate": 9.223483491838814e-07, + "loss": 0.0014, + "step": 256700 + }, + { + "epoch": 1.6464582003828974, + "grad_norm": 0.09584107249975204, + "learning_rate": 9.220244663667228e-07, + "loss": 0.0009, + "step": 256710 + }, + { + "epoch": 1.6465223372766835, + "grad_norm": 0.1296747475862503, + "learning_rate": 9.217006346495128e-07, + "loss": 0.0007, + "step": 256720 + }, + { + "epoch": 1.6465864741704694, + "grad_norm": 0.06498198956251144, + "learning_rate": 9.213768540363094e-07, + "loss": 0.0011, + "step": 256730 + }, + { + "epoch": 1.6466506110642556, + "grad_norm": 0.09656433016061783, + "learning_rate": 9.210531245311683e-07, + "loss": 0.0006, + "step": 256740 + }, + { + "epoch": 1.6467147479580415, + "grad_norm": 0.12453159689903259, + "learning_rate": 9.207294461381461e-07, + "loss": 0.0013, + "step": 256750 + }, + { + "epoch": 1.6467788848518277, + "grad_norm": 0.24564692378044128, + "learning_rate": 9.204058188613002e-07, + "loss": 0.0035, + "step": 256760 + }, + { + "epoch": 1.6468430217456138, + "grad_norm": 0.08555837720632553, + "learning_rate": 9.200822427046852e-07, + "loss": 0.0012, + "step": 256770 + }, + { + "epoch": 1.6469071586394, + "grad_norm": 0.030364805832505226, + "learning_rate": 9.197587176723555e-07, + "loss": 0.0009, + "step": 256780 + }, + { + "epoch": 1.646971295533186, + "grad_norm": 0.056859008967876434, + "learning_rate": 9.194352437683635e-07, + "loss": 0.0011, + "step": 256790 + }, + { + "epoch": 1.6470354324269723, + "grad_norm": 0.0025923969224095345, + "learning_rate": 9.191118209967658e-07, + "loss": 0.0006, + "step": 256800 + }, + { + "epoch": 1.6470995693207582, + "grad_norm": 0.01009892113506794, + "learning_rate": 9.18788449361614e-07, + "loss": 0.0006, + "step": 256810 + }, + { + "epoch": 1.6471637062145443, + "grad_norm": 0.12013409286737442, + "learning_rate": 9.184651288669582e-07, + "loss": 0.0014, + "step": 256820 + }, + { + "epoch": 1.6472278431083303, + "grad_norm": 0.11209302395582199, + "learning_rate": 9.181418595168523e-07, + "loss": 0.0015, + "step": 256830 + }, + { + "epoch": 1.6472919800021164, + "grad_norm": 0.03150581568479538, + "learning_rate": 9.178186413153468e-07, + "loss": 0.0012, + "step": 256840 + }, + { + "epoch": 1.6473561168959026, + "grad_norm": 0.040904004126787186, + "learning_rate": 9.174954742664904e-07, + "loss": 0.0011, + "step": 256850 + }, + { + "epoch": 1.6474202537896887, + "grad_norm": 0.10026594251394272, + "learning_rate": 9.171723583743325e-07, + "loss": 0.0009, + "step": 256860 + }, + { + "epoch": 1.6474843906834749, + "grad_norm": 0.006834973581135273, + "learning_rate": 9.168492936429246e-07, + "loss": 0.0006, + "step": 256870 + }, + { + "epoch": 1.647548527577261, + "grad_norm": 0.15026934444904327, + "learning_rate": 9.165262800763126e-07, + "loss": 0.0017, + "step": 256880 + }, + { + "epoch": 1.6476126644710472, + "grad_norm": 0.2972351908683777, + "learning_rate": 9.16203317678545e-07, + "loss": 0.0016, + "step": 256890 + }, + { + "epoch": 1.647676801364833, + "grad_norm": 0.05031905695796013, + "learning_rate": 9.158804064536675e-07, + "loss": 0.0011, + "step": 256900 + }, + { + "epoch": 1.6477409382586192, + "grad_norm": 0.1165706142783165, + "learning_rate": 9.155575464057282e-07, + "loss": 0.0011, + "step": 256910 + }, + { + "epoch": 1.6478050751524052, + "grad_norm": 0.06868688762187958, + "learning_rate": 9.152347375387721e-07, + "loss": 0.0011, + "step": 256920 + }, + { + "epoch": 1.6478692120461913, + "grad_norm": 0.14829511940479279, + "learning_rate": 9.14911979856844e-07, + "loss": 0.0032, + "step": 256930 + }, + { + "epoch": 1.6479333489399774, + "grad_norm": 0.06655465066432953, + "learning_rate": 9.145892733639872e-07, + "loss": 0.0004, + "step": 256940 + }, + { + "epoch": 1.6479974858337636, + "grad_norm": 0.0601753368973732, + "learning_rate": 9.142666180642479e-07, + "loss": 0.0008, + "step": 256950 + }, + { + "epoch": 1.6480616227275497, + "grad_norm": 0.01680414192378521, + "learning_rate": 9.139440139616679e-07, + "loss": 0.001, + "step": 256960 + }, + { + "epoch": 1.648125759621336, + "grad_norm": 0.06670185923576355, + "learning_rate": 9.136214610602884e-07, + "loss": 0.0014, + "step": 256970 + }, + { + "epoch": 1.6481898965151218, + "grad_norm": 0.030908556655049324, + "learning_rate": 9.132989593641534e-07, + "loss": 0.0005, + "step": 256980 + }, + { + "epoch": 1.648254033408908, + "grad_norm": 0.18697044253349304, + "learning_rate": 9.129765088773035e-07, + "loss": 0.001, + "step": 256990 + }, + { + "epoch": 1.6483181703026941, + "grad_norm": 0.4722413122653961, + "learning_rate": 9.126541096037789e-07, + "loss": 0.001, + "step": 257000 + }, + { + "epoch": 1.64838230719648, + "grad_norm": 0.026497313752770424, + "learning_rate": 9.123317615476185e-07, + "loss": 0.0009, + "step": 257010 + }, + { + "epoch": 1.6484464440902662, + "grad_norm": 0.03699392452836037, + "learning_rate": 9.12009464712863e-07, + "loss": 0.0007, + "step": 257020 + }, + { + "epoch": 1.6485105809840523, + "grad_norm": 0.0715600997209549, + "learning_rate": 9.116872191035514e-07, + "loss": 0.0005, + "step": 257030 + }, + { + "epoch": 1.6485747178778385, + "grad_norm": 0.05829404294490814, + "learning_rate": 9.113650247237205e-07, + "loss": 0.0008, + "step": 257040 + }, + { + "epoch": 1.6486388547716246, + "grad_norm": 0.06432265043258667, + "learning_rate": 9.110428815774064e-07, + "loss": 0.0015, + "step": 257050 + }, + { + "epoch": 1.6487029916654108, + "grad_norm": 0.1736939251422882, + "learning_rate": 9.107207896686487e-07, + "loss": 0.0017, + "step": 257060 + }, + { + "epoch": 1.6487671285591967, + "grad_norm": 0.02131580002605915, + "learning_rate": 9.103987490014826e-07, + "loss": 0.0012, + "step": 257070 + }, + { + "epoch": 1.6488312654529829, + "grad_norm": 0.049157217144966125, + "learning_rate": 9.100767595799426e-07, + "loss": 0.001, + "step": 257080 + }, + { + "epoch": 1.6488954023467688, + "grad_norm": 0.0992347002029419, + "learning_rate": 9.09754821408062e-07, + "loss": 0.001, + "step": 257090 + }, + { + "epoch": 1.648959539240555, + "grad_norm": 0.08398938924074173, + "learning_rate": 9.094329344898789e-07, + "loss": 0.0011, + "step": 257100 + }, + { + "epoch": 1.649023676134341, + "grad_norm": 0.010240110568702221, + "learning_rate": 9.091110988294244e-07, + "loss": 0.0011, + "step": 257110 + }, + { + "epoch": 1.6490878130281272, + "grad_norm": 0.018708644434809685, + "learning_rate": 9.087893144307297e-07, + "loss": 0.0013, + "step": 257120 + }, + { + "epoch": 1.6491519499219134, + "grad_norm": 0.10159168392419815, + "learning_rate": 9.084675812978305e-07, + "loss": 0.0015, + "step": 257130 + }, + { + "epoch": 1.6492160868156995, + "grad_norm": 0.02465192601084709, + "learning_rate": 9.081458994347569e-07, + "loss": 0.0007, + "step": 257140 + }, + { + "epoch": 1.6492802237094857, + "grad_norm": 0.09112152457237244, + "learning_rate": 9.078242688455397e-07, + "loss": 0.0014, + "step": 257150 + }, + { + "epoch": 1.6493443606032716, + "grad_norm": 0.002627485664561391, + "learning_rate": 9.075026895342071e-07, + "loss": 0.0006, + "step": 257160 + }, + { + "epoch": 1.6494084974970578, + "grad_norm": 0.1102386862039566, + "learning_rate": 9.071811615047926e-07, + "loss": 0.0021, + "step": 257170 + }, + { + "epoch": 1.6494726343908437, + "grad_norm": 0.16178393363952637, + "learning_rate": 9.068596847613226e-07, + "loss": 0.0025, + "step": 257180 + }, + { + "epoch": 1.6495367712846298, + "grad_norm": 0.03818779066205025, + "learning_rate": 9.065382593078265e-07, + "loss": 0.0007, + "step": 257190 + }, + { + "epoch": 1.649600908178416, + "grad_norm": 0.0884946957230568, + "learning_rate": 9.062168851483299e-07, + "loss": 0.0006, + "step": 257200 + }, + { + "epoch": 1.6496650450722021, + "grad_norm": 0.1244484931230545, + "learning_rate": 9.058955622868631e-07, + "loss": 0.0011, + "step": 257210 + }, + { + "epoch": 1.6497291819659883, + "grad_norm": 0.01862574927508831, + "learning_rate": 9.055742907274511e-07, + "loss": 0.0012, + "step": 257220 + }, + { + "epoch": 1.6497933188597744, + "grad_norm": 0.012687386944890022, + "learning_rate": 9.052530704741191e-07, + "loss": 0.0011, + "step": 257230 + }, + { + "epoch": 1.6498574557535604, + "grad_norm": 0.014412990771234035, + "learning_rate": 9.049319015308916e-07, + "loss": 0.0008, + "step": 257240 + }, + { + "epoch": 1.6499215926473465, + "grad_norm": 0.00819874182343483, + "learning_rate": 9.046107839017954e-07, + "loss": 0.0018, + "step": 257250 + }, + { + "epoch": 1.6499857295411324, + "grad_norm": 0.08654952794313431, + "learning_rate": 9.042897175908527e-07, + "loss": 0.0031, + "step": 257260 + }, + { + "epoch": 1.6500498664349186, + "grad_norm": 0.44848760962486267, + "learning_rate": 9.039687026020861e-07, + "loss": 0.0014, + "step": 257270 + }, + { + "epoch": 1.6501140033287047, + "grad_norm": 0.05857900530099869, + "learning_rate": 9.036477389395204e-07, + "loss": 0.001, + "step": 257280 + }, + { + "epoch": 1.6501781402224909, + "grad_norm": 0.09503237903118134, + "learning_rate": 9.033268266071755e-07, + "loss": 0.0008, + "step": 257290 + }, + { + "epoch": 1.650242277116277, + "grad_norm": 0.13392576575279236, + "learning_rate": 9.03005965609074e-07, + "loss": 0.0018, + "step": 257300 + }, + { + "epoch": 1.6503064140100632, + "grad_norm": 0.09986912459135056, + "learning_rate": 9.026851559492339e-07, + "loss": 0.0007, + "step": 257310 + }, + { + "epoch": 1.6503705509038493, + "grad_norm": 0.013021270744502544, + "learning_rate": 9.023643976316787e-07, + "loss": 0.0018, + "step": 257320 + }, + { + "epoch": 1.6504346877976352, + "grad_norm": 0.025929611176252365, + "learning_rate": 9.020436906604263e-07, + "loss": 0.0008, + "step": 257330 + }, + { + "epoch": 1.6504988246914214, + "grad_norm": 0.017933866009116173, + "learning_rate": 9.017230350394951e-07, + "loss": 0.0009, + "step": 257340 + }, + { + "epoch": 1.6505629615852073, + "grad_norm": 0.08063351362943649, + "learning_rate": 9.014024307729019e-07, + "loss": 0.0018, + "step": 257350 + }, + { + "epoch": 1.6506270984789935, + "grad_norm": 0.018969247117638588, + "learning_rate": 9.010818778646668e-07, + "loss": 0.0008, + "step": 257360 + }, + { + "epoch": 1.6506912353727796, + "grad_norm": 0.011855973862111568, + "learning_rate": 9.00761376318805e-07, + "loss": 0.0011, + "step": 257370 + }, + { + "epoch": 1.6507553722665658, + "grad_norm": 0.036045532673597336, + "learning_rate": 9.004409261393321e-07, + "loss": 0.0011, + "step": 257380 + }, + { + "epoch": 1.650819509160352, + "grad_norm": 0.029906436800956726, + "learning_rate": 9.001205273302638e-07, + "loss": 0.0012, + "step": 257390 + }, + { + "epoch": 1.650883646054138, + "grad_norm": 0.13861581683158875, + "learning_rate": 8.998001798956168e-07, + "loss": 0.0005, + "step": 257400 + }, + { + "epoch": 1.6509477829479242, + "grad_norm": 0.10438334196805954, + "learning_rate": 8.994798838394043e-07, + "loss": 0.0006, + "step": 257410 + }, + { + "epoch": 1.6510119198417101, + "grad_norm": 0.1229967400431633, + "learning_rate": 8.991596391656388e-07, + "loss": 0.0012, + "step": 257420 + }, + { + "epoch": 1.6510760567354963, + "grad_norm": 0.05250271409749985, + "learning_rate": 8.988394458783334e-07, + "loss": 0.002, + "step": 257430 + }, + { + "epoch": 1.6511401936292822, + "grad_norm": 0.0711706355214119, + "learning_rate": 8.985193039815015e-07, + "loss": 0.0007, + "step": 257440 + }, + { + "epoch": 1.6512043305230684, + "grad_norm": 0.12365922331809998, + "learning_rate": 8.98199213479154e-07, + "loss": 0.002, + "step": 257450 + }, + { + "epoch": 1.6512684674168545, + "grad_norm": 0.03708312660455704, + "learning_rate": 8.978791743753018e-07, + "loss": 0.0012, + "step": 257460 + }, + { + "epoch": 1.6513326043106407, + "grad_norm": 0.04477045685052872, + "learning_rate": 8.975591866739541e-07, + "loss": 0.0017, + "step": 257470 + }, + { + "epoch": 1.6513967412044268, + "grad_norm": 0.01089094765484333, + "learning_rate": 8.972392503791233e-07, + "loss": 0.0012, + "step": 257480 + }, + { + "epoch": 1.651460878098213, + "grad_norm": 0.014865393750369549, + "learning_rate": 8.969193654948166e-07, + "loss": 0.0016, + "step": 257490 + }, + { + "epoch": 1.6515250149919989, + "grad_norm": 0.22002476453781128, + "learning_rate": 8.965995320250415e-07, + "loss": 0.001, + "step": 257500 + }, + { + "epoch": 1.651589151885785, + "grad_norm": 0.1237795427441597, + "learning_rate": 8.962797499738085e-07, + "loss": 0.0018, + "step": 257510 + }, + { + "epoch": 1.651653288779571, + "grad_norm": 0.01695345714688301, + "learning_rate": 8.959600193451229e-07, + "loss": 0.0003, + "step": 257520 + }, + { + "epoch": 1.651717425673357, + "grad_norm": 0.07024957239627838, + "learning_rate": 8.956403401429909e-07, + "loss": 0.0006, + "step": 257530 + }, + { + "epoch": 1.6517815625671433, + "grad_norm": 0.003555310657247901, + "learning_rate": 8.95320712371418e-07, + "loss": 0.0007, + "step": 257540 + }, + { + "epoch": 1.6518456994609294, + "grad_norm": 0.0036182524636387825, + "learning_rate": 8.950011360344113e-07, + "loss": 0.0011, + "step": 257550 + }, + { + "epoch": 1.6519098363547156, + "grad_norm": 0.12117797136306763, + "learning_rate": 8.946816111359741e-07, + "loss": 0.0012, + "step": 257560 + }, + { + "epoch": 1.6519739732485017, + "grad_norm": 0.09137025475502014, + "learning_rate": 8.943621376801103e-07, + "loss": 0.0008, + "step": 257570 + }, + { + "epoch": 1.6520381101422879, + "grad_norm": 0.09539289027452469, + "learning_rate": 8.940427156708225e-07, + "loss": 0.0013, + "step": 257580 + }, + { + "epoch": 1.6521022470360738, + "grad_norm": 0.004371563903987408, + "learning_rate": 8.937233451121147e-07, + "loss": 0.0013, + "step": 257590 + }, + { + "epoch": 1.65216638392986, + "grad_norm": 0.08195861428976059, + "learning_rate": 8.934040260079879e-07, + "loss": 0.0006, + "step": 257600 + }, + { + "epoch": 1.6522305208236459, + "grad_norm": 0.25516414642333984, + "learning_rate": 8.930847583624441e-07, + "loss": 0.0006, + "step": 257610 + }, + { + "epoch": 1.652294657717432, + "grad_norm": 0.0634370967745781, + "learning_rate": 8.927655421794817e-07, + "loss": 0.0006, + "step": 257620 + }, + { + "epoch": 1.6523587946112182, + "grad_norm": 0.12932150065898895, + "learning_rate": 8.924463774631043e-07, + "loss": 0.0014, + "step": 257630 + }, + { + "epoch": 1.6524229315050043, + "grad_norm": 0.01701982319355011, + "learning_rate": 8.92127264217309e-07, + "loss": 0.0036, + "step": 257640 + }, + { + "epoch": 1.6524870683987904, + "grad_norm": 0.10885673016309738, + "learning_rate": 8.918082024460934e-07, + "loss": 0.0014, + "step": 257650 + }, + { + "epoch": 1.6525512052925766, + "grad_norm": 0.06675291806459427, + "learning_rate": 8.914891921534591e-07, + "loss": 0.0008, + "step": 257660 + }, + { + "epoch": 1.6526153421863625, + "grad_norm": 0.13544189929962158, + "learning_rate": 8.911702333434008e-07, + "loss": 0.0008, + "step": 257670 + }, + { + "epoch": 1.6526794790801487, + "grad_norm": 0.3474222421646118, + "learning_rate": 8.908513260199165e-07, + "loss": 0.0023, + "step": 257680 + }, + { + "epoch": 1.6527436159739348, + "grad_norm": 0.07608381658792496, + "learning_rate": 8.905324701870006e-07, + "loss": 0.001, + "step": 257690 + }, + { + "epoch": 1.6528077528677207, + "grad_norm": 0.10198512673377991, + "learning_rate": 8.902136658486504e-07, + "loss": 0.0016, + "step": 257700 + }, + { + "epoch": 1.652871889761507, + "grad_norm": 0.09177470952272415, + "learning_rate": 8.898949130088607e-07, + "loss": 0.001, + "step": 257710 + }, + { + "epoch": 1.652936026655293, + "grad_norm": 0.1004767194390297, + "learning_rate": 8.895762116716256e-07, + "loss": 0.0009, + "step": 257720 + }, + { + "epoch": 1.6530001635490792, + "grad_norm": 0.06865187734365463, + "learning_rate": 8.892575618409365e-07, + "loss": 0.0011, + "step": 257730 + }, + { + "epoch": 1.6530643004428653, + "grad_norm": 0.1096527948975563, + "learning_rate": 8.889389635207896e-07, + "loss": 0.0008, + "step": 257740 + }, + { + "epoch": 1.6531284373366515, + "grad_norm": 0.20047782361507416, + "learning_rate": 8.886204167151752e-07, + "loss": 0.0018, + "step": 257750 + }, + { + "epoch": 1.6531925742304374, + "grad_norm": 0.16751068830490112, + "learning_rate": 8.883019214280853e-07, + "loss": 0.0012, + "step": 257760 + }, + { + "epoch": 1.6532567111242236, + "grad_norm": 0.06954125314950943, + "learning_rate": 8.879834776635099e-07, + "loss": 0.001, + "step": 257770 + }, + { + "epoch": 1.6533208480180095, + "grad_norm": 0.009285716339945793, + "learning_rate": 8.876650854254421e-07, + "loss": 0.0008, + "step": 257780 + }, + { + "epoch": 1.6533849849117956, + "grad_norm": 0.17273807525634766, + "learning_rate": 8.873467447178691e-07, + "loss": 0.0023, + "step": 257790 + }, + { + "epoch": 1.6534491218055818, + "grad_norm": 0.021548360586166382, + "learning_rate": 8.870284555447794e-07, + "loss": 0.0008, + "step": 257800 + }, + { + "epoch": 1.653513258699368, + "grad_norm": 0.09591946005821228, + "learning_rate": 8.867102179101645e-07, + "loss": 0.0008, + "step": 257810 + }, + { + "epoch": 1.653577395593154, + "grad_norm": 0.007451815530657768, + "learning_rate": 8.863920318180097e-07, + "loss": 0.0005, + "step": 257820 + }, + { + "epoch": 1.6536415324869402, + "grad_norm": 0.05306859314441681, + "learning_rate": 8.860738972723026e-07, + "loss": 0.0012, + "step": 257830 + }, + { + "epoch": 1.6537056693807264, + "grad_norm": 0.013590164482593536, + "learning_rate": 8.85755814277029e-07, + "loss": 0.0016, + "step": 257840 + }, + { + "epoch": 1.6537698062745123, + "grad_norm": 0.31237760186195374, + "learning_rate": 8.854377828361765e-07, + "loss": 0.0014, + "step": 257850 + }, + { + "epoch": 1.6538339431682985, + "grad_norm": 0.08388779312372208, + "learning_rate": 8.851198029537295e-07, + "loss": 0.0007, + "step": 257860 + }, + { + "epoch": 1.6538980800620844, + "grad_norm": 0.020440956577658653, + "learning_rate": 8.848018746336717e-07, + "loss": 0.0006, + "step": 257870 + }, + { + "epoch": 1.6539622169558705, + "grad_norm": 0.03452529013156891, + "learning_rate": 8.844839978799868e-07, + "loss": 0.0008, + "step": 257880 + }, + { + "epoch": 1.6540263538496567, + "grad_norm": 0.10935361683368683, + "learning_rate": 8.841661726966593e-07, + "loss": 0.0009, + "step": 257890 + }, + { + "epoch": 1.6540904907434428, + "grad_norm": 0.21043239533901215, + "learning_rate": 8.838483990876718e-07, + "loss": 0.0016, + "step": 257900 + }, + { + "epoch": 1.654154627637229, + "grad_norm": 0.10948438197374344, + "learning_rate": 8.835306770570051e-07, + "loss": 0.0012, + "step": 257910 + }, + { + "epoch": 1.6542187645310151, + "grad_norm": 0.10672084242105484, + "learning_rate": 8.832130066086403e-07, + "loss": 0.0005, + "step": 257920 + }, + { + "epoch": 1.654282901424801, + "grad_norm": 0.023894185200333595, + "learning_rate": 8.828953877465595e-07, + "loss": 0.0008, + "step": 257930 + }, + { + "epoch": 1.6543470383185872, + "grad_norm": 0.16079075634479523, + "learning_rate": 8.825778204747426e-07, + "loss": 0.0008, + "step": 257940 + }, + { + "epoch": 1.6544111752123731, + "grad_norm": 0.019003797322511673, + "learning_rate": 8.822603047971667e-07, + "loss": 0.0008, + "step": 257950 + }, + { + "epoch": 1.6544753121061593, + "grad_norm": 0.001286782673560083, + "learning_rate": 8.819428407178133e-07, + "loss": 0.0007, + "step": 257960 + }, + { + "epoch": 1.6545394489999454, + "grad_norm": 0.09896622598171234, + "learning_rate": 8.816254282406589e-07, + "loss": 0.0008, + "step": 257970 + }, + { + "epoch": 1.6546035858937316, + "grad_norm": 0.12817074358463287, + "learning_rate": 8.813080673696817e-07, + "loss": 0.0011, + "step": 257980 + }, + { + "epoch": 1.6546677227875177, + "grad_norm": 0.07022719830274582, + "learning_rate": 8.809907581088567e-07, + "loss": 0.0035, + "step": 257990 + }, + { + "epoch": 1.6547318596813039, + "grad_norm": 0.072791688144207, + "learning_rate": 8.806735004621625e-07, + "loss": 0.0011, + "step": 258000 + }, + { + "epoch": 1.65479599657509, + "grad_norm": 0.12035372108221054, + "learning_rate": 8.803562944335731e-07, + "loss": 0.0032, + "step": 258010 + }, + { + "epoch": 1.654860133468876, + "grad_norm": 0.06667357683181763, + "learning_rate": 8.800391400270641e-07, + "loss": 0.0019, + "step": 258020 + }, + { + "epoch": 1.654924270362662, + "grad_norm": 0.11507948487997055, + "learning_rate": 8.797220372466075e-07, + "loss": 0.0021, + "step": 258030 + }, + { + "epoch": 1.654988407256448, + "grad_norm": 0.06681770831346512, + "learning_rate": 8.7940498609618e-07, + "loss": 0.0008, + "step": 258040 + }, + { + "epoch": 1.6550525441502342, + "grad_norm": 0.125620499253273, + "learning_rate": 8.790879865797525e-07, + "loss": 0.0008, + "step": 258050 + }, + { + "epoch": 1.6551166810440203, + "grad_norm": 0.09714993834495544, + "learning_rate": 8.787710387012982e-07, + "loss": 0.0007, + "step": 258060 + }, + { + "epoch": 1.6551808179378065, + "grad_norm": 0.005753469653427601, + "learning_rate": 8.784541424647869e-07, + "loss": 0.0006, + "step": 258070 + }, + { + "epoch": 1.6552449548315926, + "grad_norm": 0.1216263622045517, + "learning_rate": 8.78137297874192e-07, + "loss": 0.001, + "step": 258080 + }, + { + "epoch": 1.6553090917253788, + "grad_norm": 0.026845403015613556, + "learning_rate": 8.778205049334825e-07, + "loss": 0.0013, + "step": 258090 + }, + { + "epoch": 1.6553732286191647, + "grad_norm": 0.04864067956805229, + "learning_rate": 8.775037636466271e-07, + "loss": 0.0011, + "step": 258100 + }, + { + "epoch": 1.6554373655129508, + "grad_norm": 0.03315316513180733, + "learning_rate": 8.77187074017597e-07, + "loss": 0.0019, + "step": 258110 + }, + { + "epoch": 1.655501502406737, + "grad_norm": 0.08306470513343811, + "learning_rate": 8.768704360503594e-07, + "loss": 0.0008, + "step": 258120 + }, + { + "epoch": 1.655565639300523, + "grad_norm": 0.07637351006269455, + "learning_rate": 8.765538497488818e-07, + "loss": 0.0013, + "step": 258130 + }, + { + "epoch": 1.655629776194309, + "grad_norm": 0.1510131061077118, + "learning_rate": 8.762373151171305e-07, + "loss": 0.0026, + "step": 258140 + }, + { + "epoch": 1.6556939130880952, + "grad_norm": 0.1989763081073761, + "learning_rate": 8.759208321590723e-07, + "loss": 0.001, + "step": 258150 + }, + { + "epoch": 1.6557580499818814, + "grad_norm": 0.07850760966539383, + "learning_rate": 8.756044008786751e-07, + "loss": 0.0009, + "step": 258160 + }, + { + "epoch": 1.6558221868756675, + "grad_norm": 0.03400076925754547, + "learning_rate": 8.752880212799025e-07, + "loss": 0.001, + "step": 258170 + }, + { + "epoch": 1.6558863237694537, + "grad_norm": 0.04656525328755379, + "learning_rate": 8.74971693366719e-07, + "loss": 0.0007, + "step": 258180 + }, + { + "epoch": 1.6559504606632396, + "grad_norm": 0.12720754742622375, + "learning_rate": 8.746554171430871e-07, + "loss": 0.0011, + "step": 258190 + }, + { + "epoch": 1.6560145975570257, + "grad_norm": 0.05890100821852684, + "learning_rate": 8.743391926129723e-07, + "loss": 0.002, + "step": 258200 + }, + { + "epoch": 1.6560787344508117, + "grad_norm": 0.19628125429153442, + "learning_rate": 8.740230197803356e-07, + "loss": 0.0012, + "step": 258210 + }, + { + "epoch": 1.6561428713445978, + "grad_norm": 0.04698095843195915, + "learning_rate": 8.737068986491388e-07, + "loss": 0.0012, + "step": 258220 + }, + { + "epoch": 1.656207008238384, + "grad_norm": 0.031655896455049515, + "learning_rate": 8.733908292233439e-07, + "loss": 0.0007, + "step": 258230 + }, + { + "epoch": 1.65627114513217, + "grad_norm": 0.11383464187383652, + "learning_rate": 8.730748115069116e-07, + "loss": 0.0011, + "step": 258240 + }, + { + "epoch": 1.6563352820259563, + "grad_norm": 0.015196473337709904, + "learning_rate": 8.727588455038017e-07, + "loss": 0.002, + "step": 258250 + }, + { + "epoch": 1.6563994189197424, + "grad_norm": 0.18308258056640625, + "learning_rate": 8.724429312179716e-07, + "loss": 0.0013, + "step": 258260 + }, + { + "epoch": 1.6564635558135286, + "grad_norm": 0.007341694552451372, + "learning_rate": 8.721270686533823e-07, + "loss": 0.0011, + "step": 258270 + }, + { + "epoch": 1.6565276927073145, + "grad_norm": 0.21798130869865417, + "learning_rate": 8.718112578139915e-07, + "loss": 0.001, + "step": 258280 + }, + { + "epoch": 1.6565918296011006, + "grad_norm": 0.0013603041879832745, + "learning_rate": 8.714954987037561e-07, + "loss": 0.0009, + "step": 258290 + }, + { + "epoch": 1.6566559664948866, + "grad_norm": 0.11801481246948242, + "learning_rate": 8.711797913266312e-07, + "loss": 0.0009, + "step": 258300 + }, + { + "epoch": 1.6567201033886727, + "grad_norm": 0.0527237206697464, + "learning_rate": 8.708641356865755e-07, + "loss": 0.0008, + "step": 258310 + }, + { + "epoch": 1.6567842402824589, + "grad_norm": 0.04094007611274719, + "learning_rate": 8.705485317875434e-07, + "loss": 0.0013, + "step": 258320 + }, + { + "epoch": 1.656848377176245, + "grad_norm": 0.04453378543257713, + "learning_rate": 8.702329796334896e-07, + "loss": 0.0014, + "step": 258330 + }, + { + "epoch": 1.6569125140700312, + "grad_norm": 0.13029615581035614, + "learning_rate": 8.699174792283666e-07, + "loss": 0.0027, + "step": 258340 + }, + { + "epoch": 1.6569766509638173, + "grad_norm": 0.11282777786254883, + "learning_rate": 8.696020305761305e-07, + "loss": 0.0017, + "step": 258350 + }, + { + "epoch": 1.6570407878576032, + "grad_norm": 0.14039598405361176, + "learning_rate": 8.692866336807332e-07, + "loss": 0.0018, + "step": 258360 + }, + { + "epoch": 1.6571049247513894, + "grad_norm": 0.05331820994615555, + "learning_rate": 8.689712885461249e-07, + "loss": 0.0008, + "step": 258370 + }, + { + "epoch": 1.6571690616451753, + "grad_norm": 0.025747543200850487, + "learning_rate": 8.686559951762602e-07, + "loss": 0.0011, + "step": 258380 + }, + { + "epoch": 1.6572331985389614, + "grad_norm": 0.08664846420288086, + "learning_rate": 8.683407535750887e-07, + "loss": 0.0009, + "step": 258390 + }, + { + "epoch": 1.6572973354327476, + "grad_norm": 0.07032646983861923, + "learning_rate": 8.680255637465601e-07, + "loss": 0.001, + "step": 258400 + }, + { + "epoch": 1.6573614723265337, + "grad_norm": 0.08050806075334549, + "learning_rate": 8.677104256946233e-07, + "loss": 0.0008, + "step": 258410 + }, + { + "epoch": 1.65742560922032, + "grad_norm": 0.07885205745697021, + "learning_rate": 8.673953394232293e-07, + "loss": 0.0007, + "step": 258420 + }, + { + "epoch": 1.657489746114106, + "grad_norm": 0.056627798825502396, + "learning_rate": 8.670803049363252e-07, + "loss": 0.0018, + "step": 258430 + }, + { + "epoch": 1.6575538830078922, + "grad_norm": 0.09750901162624359, + "learning_rate": 8.667653222378586e-07, + "loss": 0.002, + "step": 258440 + }, + { + "epoch": 1.6576180199016781, + "grad_norm": 0.015911463648080826, + "learning_rate": 8.664503913317751e-07, + "loss": 0.0008, + "step": 258450 + }, + { + "epoch": 1.6576821567954643, + "grad_norm": 0.03874349594116211, + "learning_rate": 8.661355122220239e-07, + "loss": 0.0014, + "step": 258460 + }, + { + "epoch": 1.6577462936892502, + "grad_norm": 0.09996067732572556, + "learning_rate": 8.658206849125495e-07, + "loss": 0.001, + "step": 258470 + }, + { + "epoch": 1.6578104305830363, + "grad_norm": 0.07658535242080688, + "learning_rate": 8.65505909407296e-07, + "loss": 0.0026, + "step": 258480 + }, + { + "epoch": 1.6578745674768225, + "grad_norm": 0.0181189626455307, + "learning_rate": 8.651911857102075e-07, + "loss": 0.0021, + "step": 258490 + }, + { + "epoch": 1.6579387043706086, + "grad_norm": 0.024657348170876503, + "learning_rate": 8.64876513825229e-07, + "loss": 0.0012, + "step": 258500 + }, + { + "epoch": 1.6580028412643948, + "grad_norm": 0.1810661256313324, + "learning_rate": 8.645618937563039e-07, + "loss": 0.0011, + "step": 258510 + }, + { + "epoch": 1.658066978158181, + "grad_norm": 0.068379245698452, + "learning_rate": 8.642473255073725e-07, + "loss": 0.0013, + "step": 258520 + }, + { + "epoch": 1.6581311150519669, + "grad_norm": 0.0052925460040569305, + "learning_rate": 8.639328090823789e-07, + "loss": 0.0005, + "step": 258530 + }, + { + "epoch": 1.658195251945753, + "grad_norm": 0.01999668963253498, + "learning_rate": 8.636183444852631e-07, + "loss": 0.0023, + "step": 258540 + }, + { + "epoch": 1.6582593888395392, + "grad_norm": 0.059351321309804916, + "learning_rate": 8.63303931719966e-07, + "loss": 0.0009, + "step": 258550 + }, + { + "epoch": 1.658323525733325, + "grad_norm": 0.12230983376502991, + "learning_rate": 8.629895707904256e-07, + "loss": 0.003, + "step": 258560 + }, + { + "epoch": 1.6583876626271112, + "grad_norm": 0.11306685209274292, + "learning_rate": 8.62675261700584e-07, + "loss": 0.0008, + "step": 258570 + }, + { + "epoch": 1.6584517995208974, + "grad_norm": 0.038956791162490845, + "learning_rate": 8.623610044543779e-07, + "loss": 0.0025, + "step": 258580 + }, + { + "epoch": 1.6585159364146835, + "grad_norm": 0.10281708836555481, + "learning_rate": 8.620467990557458e-07, + "loss": 0.0021, + "step": 258590 + }, + { + "epoch": 1.6585800733084697, + "grad_norm": 0.08976875990629196, + "learning_rate": 8.617326455086234e-07, + "loss": 0.0012, + "step": 258600 + }, + { + "epoch": 1.6586442102022558, + "grad_norm": 0.21373844146728516, + "learning_rate": 8.614185438169498e-07, + "loss": 0.0017, + "step": 258610 + }, + { + "epoch": 1.6587083470960418, + "grad_norm": 0.05366050451993942, + "learning_rate": 8.611044939846597e-07, + "loss": 0.0009, + "step": 258620 + }, + { + "epoch": 1.658772483989828, + "grad_norm": 0.26793211698532104, + "learning_rate": 8.607904960156882e-07, + "loss": 0.0014, + "step": 258630 + }, + { + "epoch": 1.6588366208836138, + "grad_norm": 0.05795462056994438, + "learning_rate": 8.604765499139689e-07, + "loss": 0.0011, + "step": 258640 + }, + { + "epoch": 1.6589007577774, + "grad_norm": 0.018418608233332634, + "learning_rate": 8.60162655683438e-07, + "loss": 0.0005, + "step": 258650 + }, + { + "epoch": 1.6589648946711861, + "grad_norm": 0.07220429927110672, + "learning_rate": 8.598488133280281e-07, + "loss": 0.0008, + "step": 258660 + }, + { + "epoch": 1.6590290315649723, + "grad_norm": 0.17509478330612183, + "learning_rate": 8.595350228516697e-07, + "loss": 0.0009, + "step": 258670 + }, + { + "epoch": 1.6590931684587584, + "grad_norm": 0.018411090597510338, + "learning_rate": 8.592212842582981e-07, + "loss": 0.0014, + "step": 258680 + }, + { + "epoch": 1.6591573053525446, + "grad_norm": 0.10979661345481873, + "learning_rate": 8.589075975518435e-07, + "loss": 0.0007, + "step": 258690 + }, + { + "epoch": 1.6592214422463307, + "grad_norm": 0.015542004257440567, + "learning_rate": 8.585939627362356e-07, + "loss": 0.0009, + "step": 258700 + }, + { + "epoch": 1.6592855791401167, + "grad_norm": 0.09888337552547455, + "learning_rate": 8.582803798154044e-07, + "loss": 0.0012, + "step": 258710 + }, + { + "epoch": 1.6593497160339028, + "grad_norm": 0.08750156313180923, + "learning_rate": 8.579668487932813e-07, + "loss": 0.0006, + "step": 258720 + }, + { + "epoch": 1.6594138529276887, + "grad_norm": 0.04003286361694336, + "learning_rate": 8.576533696737937e-07, + "loss": 0.0007, + "step": 258730 + }, + { + "epoch": 1.6594779898214749, + "grad_norm": 0.09341073781251907, + "learning_rate": 8.573399424608703e-07, + "loss": 0.0013, + "step": 258740 + }, + { + "epoch": 1.659542126715261, + "grad_norm": 0.12593533098697662, + "learning_rate": 8.570265671584361e-07, + "loss": 0.0013, + "step": 258750 + }, + { + "epoch": 1.6596062636090472, + "grad_norm": 0.08734682947397232, + "learning_rate": 8.567132437704217e-07, + "loss": 0.0007, + "step": 258760 + }, + { + "epoch": 1.6596704005028333, + "grad_norm": 0.03187654912471771, + "learning_rate": 8.563999723007515e-07, + "loss": 0.0009, + "step": 258770 + }, + { + "epoch": 1.6597345373966195, + "grad_norm": 0.07724946737289429, + "learning_rate": 8.560867527533512e-07, + "loss": 0.0006, + "step": 258780 + }, + { + "epoch": 1.6597986742904054, + "grad_norm": 0.09878451377153397, + "learning_rate": 8.557735851321442e-07, + "loss": 0.0013, + "step": 258790 + }, + { + "epoch": 1.6598628111841915, + "grad_norm": 0.005886028986424208, + "learning_rate": 8.554604694410568e-07, + "loss": 0.0007, + "step": 258800 + }, + { + "epoch": 1.6599269480779775, + "grad_norm": 0.05878474935889244, + "learning_rate": 8.55147405684012e-07, + "loss": 0.0008, + "step": 258810 + }, + { + "epoch": 1.6599910849717636, + "grad_norm": 0.061283230781555176, + "learning_rate": 8.548343938649312e-07, + "loss": 0.0013, + "step": 258820 + }, + { + "epoch": 1.6600552218655498, + "grad_norm": 0.06955339014530182, + "learning_rate": 8.545214339877395e-07, + "loss": 0.0013, + "step": 258830 + }, + { + "epoch": 1.660119358759336, + "grad_norm": 0.054750386625528336, + "learning_rate": 8.542085260563565e-07, + "loss": 0.0016, + "step": 258840 + }, + { + "epoch": 1.660183495653122, + "grad_norm": 0.0939524918794632, + "learning_rate": 8.538956700747042e-07, + "loss": 0.0005, + "step": 258850 + }, + { + "epoch": 1.6602476325469082, + "grad_norm": 0.10427170246839523, + "learning_rate": 8.535828660467005e-07, + "loss": 0.0018, + "step": 258860 + }, + { + "epoch": 1.6603117694406944, + "grad_norm": 0.18324019014835358, + "learning_rate": 8.532701139762683e-07, + "loss": 0.0015, + "step": 258870 + }, + { + "epoch": 1.6603759063344803, + "grad_norm": 0.03328492119908333, + "learning_rate": 8.529574138673258e-07, + "loss": 0.001, + "step": 258880 + }, + { + "epoch": 1.6604400432282664, + "grad_norm": 0.058263231068849564, + "learning_rate": 8.526447657237901e-07, + "loss": 0.0011, + "step": 258890 + }, + { + "epoch": 1.6605041801220524, + "grad_norm": 0.09907954931259155, + "learning_rate": 8.523321695495785e-07, + "loss": 0.0005, + "step": 258900 + }, + { + "epoch": 1.6605683170158385, + "grad_norm": 0.13083921372890472, + "learning_rate": 8.52019625348609e-07, + "loss": 0.001, + "step": 258910 + }, + { + "epoch": 1.6606324539096247, + "grad_norm": 0.008092692121863365, + "learning_rate": 8.517071331247995e-07, + "loss": 0.001, + "step": 258920 + }, + { + "epoch": 1.6606965908034108, + "grad_norm": 0.08676736056804657, + "learning_rate": 8.513946928820644e-07, + "loss": 0.001, + "step": 258930 + }, + { + "epoch": 1.660760727697197, + "grad_norm": 0.05568745732307434, + "learning_rate": 8.510823046243171e-07, + "loss": 0.0018, + "step": 258940 + }, + { + "epoch": 1.660824864590983, + "grad_norm": 0.08479592204093933, + "learning_rate": 8.507699683554755e-07, + "loss": 0.0013, + "step": 258950 + }, + { + "epoch": 1.6608890014847693, + "grad_norm": 0.27204254269599915, + "learning_rate": 8.504576840794516e-07, + "loss": 0.0009, + "step": 258960 + }, + { + "epoch": 1.6609531383785552, + "grad_norm": 0.09179872274398804, + "learning_rate": 8.501454518001579e-07, + "loss": 0.0009, + "step": 258970 + }, + { + "epoch": 1.6610172752723413, + "grad_norm": 0.08390912413597107, + "learning_rate": 8.498332715215068e-07, + "loss": 0.001, + "step": 258980 + }, + { + "epoch": 1.6610814121661273, + "grad_norm": 0.007385471370071173, + "learning_rate": 8.495211432474121e-07, + "loss": 0.0014, + "step": 258990 + }, + { + "epoch": 1.6611455490599134, + "grad_norm": 0.03575975447893143, + "learning_rate": 8.492090669817837e-07, + "loss": 0.0009, + "step": 259000 + }, + { + "epoch": 1.6612096859536996, + "grad_norm": 0.12160173803567886, + "learning_rate": 8.48897042728532e-07, + "loss": 0.0014, + "step": 259010 + }, + { + "epoch": 1.6612738228474857, + "grad_norm": 0.13452289998531342, + "learning_rate": 8.485850704915665e-07, + "loss": 0.0019, + "step": 259020 + }, + { + "epoch": 1.6613379597412719, + "grad_norm": 0.12849852442741394, + "learning_rate": 8.482731502747976e-07, + "loss": 0.0012, + "step": 259030 + }, + { + "epoch": 1.661402096635058, + "grad_norm": 0.028622470796108246, + "learning_rate": 8.479612820821337e-07, + "loss": 0.0019, + "step": 259040 + }, + { + "epoch": 1.661466233528844, + "grad_norm": 0.11926902085542679, + "learning_rate": 8.476494659174822e-07, + "loss": 0.001, + "step": 259050 + }, + { + "epoch": 1.66153037042263, + "grad_norm": 0.11900042742490768, + "learning_rate": 8.473377017847495e-07, + "loss": 0.0015, + "step": 259060 + }, + { + "epoch": 1.661594507316416, + "grad_norm": 0.09407677501440048, + "learning_rate": 8.470259896878442e-07, + "loss": 0.0021, + "step": 259070 + }, + { + "epoch": 1.6616586442102022, + "grad_norm": 0.09666857123374939, + "learning_rate": 8.467143296306718e-07, + "loss": 0.001, + "step": 259080 + }, + { + "epoch": 1.6617227811039883, + "grad_norm": 0.0920310914516449, + "learning_rate": 8.464027216171356e-07, + "loss": 0.0015, + "step": 259090 + }, + { + "epoch": 1.6617869179977744, + "grad_norm": 0.09433083236217499, + "learning_rate": 8.460911656511428e-07, + "loss": 0.0009, + "step": 259100 + }, + { + "epoch": 1.6618510548915606, + "grad_norm": 0.13251636922359467, + "learning_rate": 8.45779661736597e-07, + "loss": 0.0015, + "step": 259110 + }, + { + "epoch": 1.6619151917853467, + "grad_norm": 0.010854779742658138, + "learning_rate": 8.454682098774003e-07, + "loss": 0.0002, + "step": 259120 + }, + { + "epoch": 1.661979328679133, + "grad_norm": 0.11809447407722473, + "learning_rate": 8.451568100774554e-07, + "loss": 0.0013, + "step": 259130 + }, + { + "epoch": 1.6620434655729188, + "grad_norm": 0.19234363734722137, + "learning_rate": 8.448454623406666e-07, + "loss": 0.0006, + "step": 259140 + }, + { + "epoch": 1.662107602466705, + "grad_norm": 0.027598833665251732, + "learning_rate": 8.445341666709334e-07, + "loss": 0.0028, + "step": 259150 + }, + { + "epoch": 1.662171739360491, + "grad_norm": 0.03952634334564209, + "learning_rate": 8.442229230721572e-07, + "loss": 0.0016, + "step": 259160 + }, + { + "epoch": 1.662235876254277, + "grad_norm": 0.10926931351423264, + "learning_rate": 8.439117315482359e-07, + "loss": 0.0004, + "step": 259170 + }, + { + "epoch": 1.6623000131480632, + "grad_norm": 0.061621829867362976, + "learning_rate": 8.436005921030732e-07, + "loss": 0.0018, + "step": 259180 + }, + { + "epoch": 1.6623641500418493, + "grad_norm": 0.025558849796652794, + "learning_rate": 8.432895047405648e-07, + "loss": 0.0018, + "step": 259190 + }, + { + "epoch": 1.6624282869356355, + "grad_norm": 0.010318063199520111, + "learning_rate": 8.429784694646098e-07, + "loss": 0.0013, + "step": 259200 + }, + { + "epoch": 1.6624924238294216, + "grad_norm": 0.10450685024261475, + "learning_rate": 8.426674862791046e-07, + "loss": 0.0012, + "step": 259210 + }, + { + "epoch": 1.6625565607232076, + "grad_norm": 0.0915796086192131, + "learning_rate": 8.423565551879482e-07, + "loss": 0.0021, + "step": 259220 + }, + { + "epoch": 1.6626206976169937, + "grad_norm": 0.12747132778167725, + "learning_rate": 8.420456761950357e-07, + "loss": 0.001, + "step": 259230 + }, + { + "epoch": 1.6626848345107799, + "grad_norm": 0.19605652987957, + "learning_rate": 8.417348493042609e-07, + "loss": 0.0021, + "step": 259240 + }, + { + "epoch": 1.6627489714045658, + "grad_norm": 0.033247210085392, + "learning_rate": 8.414240745195218e-07, + "loss": 0.0019, + "step": 259250 + }, + { + "epoch": 1.662813108298352, + "grad_norm": 0.12935180962085724, + "learning_rate": 8.411133518447113e-07, + "loss": 0.0008, + "step": 259260 + }, + { + "epoch": 1.662877245192138, + "grad_norm": 0.06682842969894409, + "learning_rate": 8.408026812837222e-07, + "loss": 0.0011, + "step": 259270 + }, + { + "epoch": 1.6629413820859242, + "grad_norm": 0.07753019034862518, + "learning_rate": 8.404920628404473e-07, + "loss": 0.0015, + "step": 259280 + }, + { + "epoch": 1.6630055189797104, + "grad_norm": 0.030806515365839005, + "learning_rate": 8.40181496518781e-07, + "loss": 0.0012, + "step": 259290 + }, + { + "epoch": 1.6630696558734965, + "grad_norm": 0.028927655890583992, + "learning_rate": 8.398709823226131e-07, + "loss": 0.0013, + "step": 259300 + }, + { + "epoch": 1.6631337927672825, + "grad_norm": 0.3859172463417053, + "learning_rate": 8.395605202558349e-07, + "loss": 0.0028, + "step": 259310 + }, + { + "epoch": 1.6631979296610686, + "grad_norm": 0.004698690492659807, + "learning_rate": 8.392501103223361e-07, + "loss": 0.0005, + "step": 259320 + }, + { + "epoch": 1.6632620665548545, + "grad_norm": 0.18692345917224884, + "learning_rate": 8.389397525260079e-07, + "loss": 0.0013, + "step": 259330 + }, + { + "epoch": 1.6633262034486407, + "grad_norm": 0.10133519023656845, + "learning_rate": 8.386294468707384e-07, + "loss": 0.0025, + "step": 259340 + }, + { + "epoch": 1.6633903403424268, + "grad_norm": 0.058625735342502594, + "learning_rate": 8.383191933604151e-07, + "loss": 0.0006, + "step": 259350 + }, + { + "epoch": 1.663454477236213, + "grad_norm": 0.14901982247829437, + "learning_rate": 8.380089919989276e-07, + "loss": 0.0015, + "step": 259360 + }, + { + "epoch": 1.6635186141299991, + "grad_norm": 0.03865260258316994, + "learning_rate": 8.376988427901622e-07, + "loss": 0.0039, + "step": 259370 + }, + { + "epoch": 1.6635827510237853, + "grad_norm": 0.03860464319586754, + "learning_rate": 8.373887457380047e-07, + "loss": 0.0011, + "step": 259380 + }, + { + "epoch": 1.6636468879175714, + "grad_norm": 0.06766288727521896, + "learning_rate": 8.370787008463404e-07, + "loss": 0.0011, + "step": 259390 + }, + { + "epoch": 1.6637110248113574, + "grad_norm": 0.07876670360565186, + "learning_rate": 8.367687081190562e-07, + "loss": 0.0013, + "step": 259400 + }, + { + "epoch": 1.6637751617051435, + "grad_norm": 0.13151301443576813, + "learning_rate": 8.364587675600355e-07, + "loss": 0.0018, + "step": 259410 + }, + { + "epoch": 1.6638392985989294, + "grad_norm": 0.040049389004707336, + "learning_rate": 8.36148879173162e-07, + "loss": 0.0006, + "step": 259420 + }, + { + "epoch": 1.6639034354927156, + "grad_norm": 0.057279810309410095, + "learning_rate": 8.358390429623181e-07, + "loss": 0.0008, + "step": 259430 + }, + { + "epoch": 1.6639675723865017, + "grad_norm": 0.11955063790082932, + "learning_rate": 8.355292589313879e-07, + "loss": 0.0009, + "step": 259440 + }, + { + "epoch": 1.6640317092802879, + "grad_norm": 0.1107025220990181, + "learning_rate": 8.352195270842522e-07, + "loss": 0.0013, + "step": 259450 + }, + { + "epoch": 1.664095846174074, + "grad_norm": 0.048308372497558594, + "learning_rate": 8.349098474247924e-07, + "loss": 0.0007, + "step": 259460 + }, + { + "epoch": 1.6641599830678602, + "grad_norm": 0.03436093404889107, + "learning_rate": 8.346002199568881e-07, + "loss": 0.0009, + "step": 259470 + }, + { + "epoch": 1.664224119961646, + "grad_norm": 0.0168591495603323, + "learning_rate": 8.342906446844212e-07, + "loss": 0.0051, + "step": 259480 + }, + { + "epoch": 1.6642882568554322, + "grad_norm": 0.07112542539834976, + "learning_rate": 8.339811216112698e-07, + "loss": 0.0011, + "step": 259490 + }, + { + "epoch": 1.6643523937492182, + "grad_norm": 0.13269291818141937, + "learning_rate": 8.336716507413107e-07, + "loss": 0.0016, + "step": 259500 + }, + { + "epoch": 1.6644165306430043, + "grad_norm": 0.1593928039073944, + "learning_rate": 8.333622320784246e-07, + "loss": 0.0038, + "step": 259510 + }, + { + "epoch": 1.6644806675367905, + "grad_norm": 0.09930547326803207, + "learning_rate": 8.33052865626488e-07, + "loss": 0.0011, + "step": 259520 + }, + { + "epoch": 1.6645448044305766, + "grad_norm": 0.034947656095027924, + "learning_rate": 8.327435513893767e-07, + "loss": 0.0009, + "step": 259530 + }, + { + "epoch": 1.6646089413243628, + "grad_norm": 0.05803503096103668, + "learning_rate": 8.324342893709664e-07, + "loss": 0.0015, + "step": 259540 + }, + { + "epoch": 1.664673078218149, + "grad_norm": 0.05112419277429581, + "learning_rate": 8.321250795751334e-07, + "loss": 0.0005, + "step": 259550 + }, + { + "epoch": 1.664737215111935, + "grad_norm": 0.04191881790757179, + "learning_rate": 8.318159220057526e-07, + "loss": 0.0005, + "step": 259560 + }, + { + "epoch": 1.664801352005721, + "grad_norm": 0.08693952113389969, + "learning_rate": 8.315068166666967e-07, + "loss": 0.0023, + "step": 259570 + }, + { + "epoch": 1.6648654888995071, + "grad_norm": 0.025236818939447403, + "learning_rate": 8.311977635618385e-07, + "loss": 0.0009, + "step": 259580 + }, + { + "epoch": 1.664929625793293, + "grad_norm": 0.05903942137956619, + "learning_rate": 8.30888762695053e-07, + "loss": 0.001, + "step": 259590 + }, + { + "epoch": 1.6649937626870792, + "grad_norm": 0.06373009830713272, + "learning_rate": 8.30579814070211e-07, + "loss": 0.0009, + "step": 259600 + }, + { + "epoch": 1.6650578995808654, + "grad_norm": 0.040365107357501984, + "learning_rate": 8.302709176911838e-07, + "loss": 0.0013, + "step": 259610 + }, + { + "epoch": 1.6651220364746515, + "grad_norm": 0.13631953299045563, + "learning_rate": 8.299620735618408e-07, + "loss": 0.0016, + "step": 259620 + }, + { + "epoch": 1.6651861733684377, + "grad_norm": 0.14050503075122833, + "learning_rate": 8.296532816860542e-07, + "loss": 0.001, + "step": 259630 + }, + { + "epoch": 1.6652503102622238, + "grad_norm": 0.07822521030902863, + "learning_rate": 8.293445420676927e-07, + "loss": 0.0011, + "step": 259640 + }, + { + "epoch": 1.6653144471560097, + "grad_norm": 0.05127008259296417, + "learning_rate": 8.290358547106237e-07, + "loss": 0.001, + "step": 259650 + }, + { + "epoch": 1.6653785840497959, + "grad_norm": 0.07713084667921066, + "learning_rate": 8.287272196187174e-07, + "loss": 0.0012, + "step": 259660 + }, + { + "epoch": 1.665442720943582, + "grad_norm": 0.07243801653385162, + "learning_rate": 8.284186367958402e-07, + "loss": 0.0028, + "step": 259670 + }, + { + "epoch": 1.665506857837368, + "grad_norm": 0.046422865241765976, + "learning_rate": 8.28110106245858e-07, + "loss": 0.0008, + "step": 259680 + }, + { + "epoch": 1.665570994731154, + "grad_norm": 0.3993385136127472, + "learning_rate": 8.278016279726386e-07, + "loss": 0.0012, + "step": 259690 + }, + { + "epoch": 1.6656351316249403, + "grad_norm": 0.011559012345969677, + "learning_rate": 8.274932019800458e-07, + "loss": 0.0011, + "step": 259700 + }, + { + "epoch": 1.6656992685187264, + "grad_norm": 0.20504416525363922, + "learning_rate": 8.271848282719463e-07, + "loss": 0.0024, + "step": 259710 + }, + { + "epoch": 1.6657634054125126, + "grad_norm": 0.007328205741941929, + "learning_rate": 8.268765068522028e-07, + "loss": 0.0012, + "step": 259720 + }, + { + "epoch": 1.6658275423062987, + "grad_norm": 0.06418116390705109, + "learning_rate": 8.265682377246797e-07, + "loss": 0.002, + "step": 259730 + }, + { + "epoch": 1.6658916792000846, + "grad_norm": 0.0644039437174797, + "learning_rate": 8.262600208932375e-07, + "loss": 0.0013, + "step": 259740 + }, + { + "epoch": 1.6659558160938708, + "grad_norm": 0.0812007486820221, + "learning_rate": 8.259518563617419e-07, + "loss": 0.001, + "step": 259750 + }, + { + "epoch": 1.6660199529876567, + "grad_norm": 0.22765526175498962, + "learning_rate": 8.256437441340525e-07, + "loss": 0.0044, + "step": 259760 + }, + { + "epoch": 1.6660840898814429, + "grad_norm": 0.0717020258307457, + "learning_rate": 8.253356842140293e-07, + "loss": 0.0008, + "step": 259770 + }, + { + "epoch": 1.666148226775229, + "grad_norm": 0.11149182170629501, + "learning_rate": 8.250276766055343e-07, + "loss": 0.0014, + "step": 259780 + }, + { + "epoch": 1.6662123636690152, + "grad_norm": 0.05467083305120468, + "learning_rate": 8.247197213124269e-07, + "loss": 0.0007, + "step": 259790 + }, + { + "epoch": 1.6662765005628013, + "grad_norm": 0.030334288254380226, + "learning_rate": 8.244118183385652e-07, + "loss": 0.0021, + "step": 259800 + }, + { + "epoch": 1.6663406374565874, + "grad_norm": 0.044191643595695496, + "learning_rate": 8.24103967687806e-07, + "loss": 0.0012, + "step": 259810 + }, + { + "epoch": 1.6664047743503736, + "grad_norm": 0.047531258314847946, + "learning_rate": 8.237961693640101e-07, + "loss": 0.0015, + "step": 259820 + }, + { + "epoch": 1.6664689112441595, + "grad_norm": 0.10695334523916245, + "learning_rate": 8.234884233710333e-07, + "loss": 0.0012, + "step": 259830 + }, + { + "epoch": 1.6665330481379457, + "grad_norm": 0.06404314935207367, + "learning_rate": 8.231807297127309e-07, + "loss": 0.0014, + "step": 259840 + }, + { + "epoch": 1.6665971850317316, + "grad_norm": 0.05008331313729286, + "learning_rate": 8.228730883929581e-07, + "loss": 0.0011, + "step": 259850 + }, + { + "epoch": 1.6666613219255177, + "grad_norm": 0.0490146204829216, + "learning_rate": 8.225654994155718e-07, + "loss": 0.0019, + "step": 259860 + }, + { + "epoch": 1.666725458819304, + "grad_norm": 0.002646214794367552, + "learning_rate": 8.222579627844257e-07, + "loss": 0.0011, + "step": 259870 + }, + { + "epoch": 1.66678959571309, + "grad_norm": 0.008767073974013329, + "learning_rate": 8.219504785033733e-07, + "loss": 0.0018, + "step": 259880 + }, + { + "epoch": 1.6668537326068762, + "grad_norm": 0.009336371906101704, + "learning_rate": 8.216430465762659e-07, + "loss": 0.001, + "step": 259890 + }, + { + "epoch": 1.6669178695006623, + "grad_norm": 0.047865401953458786, + "learning_rate": 8.213356670069583e-07, + "loss": 0.0021, + "step": 259900 + }, + { + "epoch": 1.6669820063944483, + "grad_norm": 0.023457759991288185, + "learning_rate": 8.210283397993018e-07, + "loss": 0.0012, + "step": 259910 + }, + { + "epoch": 1.6670461432882344, + "grad_norm": 0.20852166414260864, + "learning_rate": 8.207210649571451e-07, + "loss": 0.0031, + "step": 259920 + }, + { + "epoch": 1.6671102801820203, + "grad_norm": 0.017165524885058403, + "learning_rate": 8.204138424843422e-07, + "loss": 0.0005, + "step": 259930 + }, + { + "epoch": 1.6671744170758065, + "grad_norm": 0.020751260221004486, + "learning_rate": 8.201066723847406e-07, + "loss": 0.0011, + "step": 259940 + }, + { + "epoch": 1.6672385539695926, + "grad_norm": 0.042478736490011215, + "learning_rate": 8.197995546621895e-07, + "loss": 0.0004, + "step": 259950 + }, + { + "epoch": 1.6673026908633788, + "grad_norm": 0.05359106883406639, + "learning_rate": 8.194924893205363e-07, + "loss": 0.001, + "step": 259960 + }, + { + "epoch": 1.667366827757165, + "grad_norm": 0.1899213045835495, + "learning_rate": 8.191854763636314e-07, + "loss": 0.0014, + "step": 259970 + }, + { + "epoch": 1.667430964650951, + "grad_norm": 0.02799876406788826, + "learning_rate": 8.188785157953205e-07, + "loss": 0.0007, + "step": 259980 + }, + { + "epoch": 1.6674951015447372, + "grad_norm": 0.0149038415402174, + "learning_rate": 8.1857160761945e-07, + "loss": 0.0013, + "step": 259990 + }, + { + "epoch": 1.6675592384385232, + "grad_norm": 0.03285687044262886, + "learning_rate": 8.182647518398645e-07, + "loss": 0.0015, + "step": 260000 + }, + { + "epoch": 1.6676233753323093, + "grad_norm": 0.09179037064313889, + "learning_rate": 8.179579484604116e-07, + "loss": 0.002, + "step": 260010 + }, + { + "epoch": 1.6676875122260952, + "grad_norm": 0.08945292234420776, + "learning_rate": 8.176511974849344e-07, + "loss": 0.0005, + "step": 260020 + }, + { + "epoch": 1.6677516491198814, + "grad_norm": 0.14974276721477509, + "learning_rate": 8.173444989172763e-07, + "loss": 0.002, + "step": 260030 + }, + { + "epoch": 1.6678157860136675, + "grad_norm": 0.044630106538534164, + "learning_rate": 8.170378527612804e-07, + "loss": 0.0011, + "step": 260040 + }, + { + "epoch": 1.6678799229074537, + "grad_norm": 0.026970086619257927, + "learning_rate": 8.167312590207905e-07, + "loss": 0.0006, + "step": 260050 + }, + { + "epoch": 1.6679440598012398, + "grad_norm": 0.019978389143943787, + "learning_rate": 8.164247176996476e-07, + "loss": 0.0013, + "step": 260060 + }, + { + "epoch": 1.668008196695026, + "grad_norm": 0.2422911822795868, + "learning_rate": 8.161182288016922e-07, + "loss": 0.0009, + "step": 260070 + }, + { + "epoch": 1.668072333588812, + "grad_norm": 0.061068106442689896, + "learning_rate": 8.158117923307662e-07, + "loss": 0.0008, + "step": 260080 + }, + { + "epoch": 1.668136470482598, + "grad_norm": 0.020502885803580284, + "learning_rate": 8.155054082907093e-07, + "loss": 0.0013, + "step": 260090 + }, + { + "epoch": 1.6682006073763842, + "grad_norm": 0.05441015586256981, + "learning_rate": 8.151990766853602e-07, + "loss": 0.0008, + "step": 260100 + }, + { + "epoch": 1.6682647442701701, + "grad_norm": 0.08081910759210587, + "learning_rate": 8.148927975185561e-07, + "loss": 0.0013, + "step": 260110 + }, + { + "epoch": 1.6683288811639563, + "grad_norm": 0.0780453011393547, + "learning_rate": 8.145865707941375e-07, + "loss": 0.0016, + "step": 260120 + }, + { + "epoch": 1.6683930180577424, + "grad_norm": 0.46676942706108093, + "learning_rate": 8.142803965159407e-07, + "loss": 0.0006, + "step": 260130 + }, + { + "epoch": 1.6684571549515286, + "grad_norm": 0.07748177647590637, + "learning_rate": 8.139742746878021e-07, + "loss": 0.0019, + "step": 260140 + }, + { + "epoch": 1.6685212918453147, + "grad_norm": 0.05093437433242798, + "learning_rate": 8.136682053135558e-07, + "loss": 0.0012, + "step": 260150 + }, + { + "epoch": 1.6685854287391009, + "grad_norm": 0.3314497768878937, + "learning_rate": 8.133621883970405e-07, + "loss": 0.0019, + "step": 260160 + }, + { + "epoch": 1.6686495656328868, + "grad_norm": 0.06093122810125351, + "learning_rate": 8.130562239420886e-07, + "loss": 0.0011, + "step": 260170 + }, + { + "epoch": 1.668713702526673, + "grad_norm": 0.08686614036560059, + "learning_rate": 8.127503119525349e-07, + "loss": 0.0013, + "step": 260180 + }, + { + "epoch": 1.6687778394204589, + "grad_norm": 0.0773782730102539, + "learning_rate": 8.124444524322111e-07, + "loss": 0.0023, + "step": 260190 + }, + { + "epoch": 1.668841976314245, + "grad_norm": 0.15026801824569702, + "learning_rate": 8.121386453849528e-07, + "loss": 0.0018, + "step": 260200 + }, + { + "epoch": 1.6689061132080312, + "grad_norm": 0.0033452792558819056, + "learning_rate": 8.118328908145895e-07, + "loss": 0.0014, + "step": 260210 + }, + { + "epoch": 1.6689702501018173, + "grad_norm": 0.12269829213619232, + "learning_rate": 8.115271887249527e-07, + "loss": 0.0008, + "step": 260220 + }, + { + "epoch": 1.6690343869956035, + "grad_norm": 0.050269342958927155, + "learning_rate": 8.112215391198747e-07, + "loss": 0.0004, + "step": 260230 + }, + { + "epoch": 1.6690985238893896, + "grad_norm": 0.047745510935783386, + "learning_rate": 8.109159420031843e-07, + "loss": 0.0015, + "step": 260240 + }, + { + "epoch": 1.6691626607831758, + "grad_norm": 0.12586195766925812, + "learning_rate": 8.106103973787116e-07, + "loss": 0.0015, + "step": 260250 + }, + { + "epoch": 1.6692267976769617, + "grad_norm": 0.22415123879909515, + "learning_rate": 8.103049052502832e-07, + "loss": 0.0008, + "step": 260260 + }, + { + "epoch": 1.6692909345707478, + "grad_norm": 0.13841083645820618, + "learning_rate": 8.099994656217297e-07, + "loss": 0.0014, + "step": 260270 + }, + { + "epoch": 1.6693550714645338, + "grad_norm": 0.04546148329973221, + "learning_rate": 8.096940784968776e-07, + "loss": 0.0013, + "step": 260280 + }, + { + "epoch": 1.66941920835832, + "grad_norm": 0.2795056700706482, + "learning_rate": 8.093887438795534e-07, + "loss": 0.0019, + "step": 260290 + }, + { + "epoch": 1.669483345252106, + "grad_norm": 0.18928854167461395, + "learning_rate": 8.090834617735816e-07, + "loss": 0.0016, + "step": 260300 + }, + { + "epoch": 1.6695474821458922, + "grad_norm": 0.1277276575565338, + "learning_rate": 8.087782321827909e-07, + "loss": 0.0011, + "step": 260310 + }, + { + "epoch": 1.6696116190396784, + "grad_norm": 0.048505738377571106, + "learning_rate": 8.084730551110043e-07, + "loss": 0.0012, + "step": 260320 + }, + { + "epoch": 1.6696757559334645, + "grad_norm": 0.040552858263254166, + "learning_rate": 8.08167930562046e-07, + "loss": 0.0023, + "step": 260330 + }, + { + "epoch": 1.6697398928272504, + "grad_norm": 0.06651022285223007, + "learning_rate": 8.078628585397386e-07, + "loss": 0.001, + "step": 260340 + }, + { + "epoch": 1.6698040297210366, + "grad_norm": 0.13170164823532104, + "learning_rate": 8.075578390479061e-07, + "loss": 0.0015, + "step": 260350 + }, + { + "epoch": 1.6698681666148225, + "grad_norm": 0.09963278472423553, + "learning_rate": 8.072528720903705e-07, + "loss": 0.0006, + "step": 260360 + }, + { + "epoch": 1.6699323035086087, + "grad_norm": 0.04172990471124649, + "learning_rate": 8.069479576709521e-07, + "loss": 0.0013, + "step": 260370 + }, + { + "epoch": 1.6699964404023948, + "grad_norm": 0.0824194923043251, + "learning_rate": 8.066430957934729e-07, + "loss": 0.0004, + "step": 260380 + }, + { + "epoch": 1.670060577296181, + "grad_norm": 0.06737431138753891, + "learning_rate": 8.063382864617536e-07, + "loss": 0.0012, + "step": 260390 + }, + { + "epoch": 1.670124714189967, + "grad_norm": 0.1556253284215927, + "learning_rate": 8.060335296796118e-07, + "loss": 0.0009, + "step": 260400 + }, + { + "epoch": 1.6701888510837533, + "grad_norm": 0.01636998914182186, + "learning_rate": 8.057288254508667e-07, + "loss": 0.0008, + "step": 260410 + }, + { + "epoch": 1.6702529879775394, + "grad_norm": 0.010741271078586578, + "learning_rate": 8.054241737793378e-07, + "loss": 0.002, + "step": 260420 + }, + { + "epoch": 1.6703171248713253, + "grad_norm": 0.023520736023783684, + "learning_rate": 8.051195746688423e-07, + "loss": 0.0008, + "step": 260430 + }, + { + "epoch": 1.6703812617651115, + "grad_norm": 0.03630809485912323, + "learning_rate": 8.048150281231953e-07, + "loss": 0.0009, + "step": 260440 + }, + { + "epoch": 1.6704453986588974, + "grad_norm": 0.09315015375614166, + "learning_rate": 8.04510534146215e-07, + "loss": 0.0009, + "step": 260450 + }, + { + "epoch": 1.6705095355526836, + "grad_norm": 0.08812452107667923, + "learning_rate": 8.04206092741715e-07, + "loss": 0.0013, + "step": 260460 + }, + { + "epoch": 1.6705736724464697, + "grad_norm": 0.10664328932762146, + "learning_rate": 8.039017039135133e-07, + "loss": 0.0009, + "step": 260470 + }, + { + "epoch": 1.6706378093402559, + "grad_norm": 0.13169336318969727, + "learning_rate": 8.035973676654212e-07, + "loss": 0.0009, + "step": 260480 + }, + { + "epoch": 1.670701946234042, + "grad_norm": 0.19953066110610962, + "learning_rate": 8.032930840012527e-07, + "loss": 0.0013, + "step": 260490 + }, + { + "epoch": 1.6707660831278281, + "grad_norm": 0.04290478676557541, + "learning_rate": 8.02988852924822e-07, + "loss": 0.0013, + "step": 260500 + }, + { + "epoch": 1.6708302200216143, + "grad_norm": 0.03743245452642441, + "learning_rate": 8.026846744399408e-07, + "loss": 0.0012, + "step": 260510 + }, + { + "epoch": 1.6708943569154002, + "grad_norm": 0.1103561744093895, + "learning_rate": 8.023805485504204e-07, + "loss": 0.0015, + "step": 260520 + }, + { + "epoch": 1.6709584938091864, + "grad_norm": 0.0029104137793183327, + "learning_rate": 8.020764752600702e-07, + "loss": 0.001, + "step": 260530 + }, + { + "epoch": 1.6710226307029723, + "grad_norm": 0.04735827445983887, + "learning_rate": 8.017724545727035e-07, + "loss": 0.0012, + "step": 260540 + }, + { + "epoch": 1.6710867675967584, + "grad_norm": 0.12044599652290344, + "learning_rate": 8.014684864921279e-07, + "loss": 0.0009, + "step": 260550 + }, + { + "epoch": 1.6711509044905446, + "grad_norm": 0.009900239296257496, + "learning_rate": 8.011645710221533e-07, + "loss": 0.0009, + "step": 260560 + }, + { + "epoch": 1.6712150413843307, + "grad_norm": 0.04917309433221817, + "learning_rate": 8.008607081665864e-07, + "loss": 0.0005, + "step": 260570 + }, + { + "epoch": 1.671279178278117, + "grad_norm": 0.12651683390140533, + "learning_rate": 8.005568979292367e-07, + "loss": 0.0019, + "step": 260580 + }, + { + "epoch": 1.671343315171903, + "grad_norm": 0.0106301698833704, + "learning_rate": 8.002531403139108e-07, + "loss": 0.0033, + "step": 260590 + }, + { + "epoch": 1.671407452065689, + "grad_norm": 0.10813277214765549, + "learning_rate": 7.999494353244142e-07, + "loss": 0.0009, + "step": 260600 + }, + { + "epoch": 1.6714715889594751, + "grad_norm": 0.05767657235264778, + "learning_rate": 7.996457829645516e-07, + "loss": 0.0023, + "step": 260610 + }, + { + "epoch": 1.671535725853261, + "grad_norm": 0.02989642694592476, + "learning_rate": 7.99342183238131e-07, + "loss": 0.0009, + "step": 260620 + }, + { + "epoch": 1.6715998627470472, + "grad_norm": 0.053140442818403244, + "learning_rate": 7.990386361489544e-07, + "loss": 0.0031, + "step": 260630 + }, + { + "epoch": 1.6716639996408333, + "grad_norm": 0.09720054268836975, + "learning_rate": 7.987351417008249e-07, + "loss": 0.0006, + "step": 260640 + }, + { + "epoch": 1.6717281365346195, + "grad_norm": 0.05448118597269058, + "learning_rate": 7.984316998975484e-07, + "loss": 0.0018, + "step": 260650 + }, + { + "epoch": 1.6717922734284056, + "grad_norm": 0.12441521883010864, + "learning_rate": 7.98128310742925e-07, + "loss": 0.0015, + "step": 260660 + }, + { + "epoch": 1.6718564103221918, + "grad_norm": 0.045425478368997574, + "learning_rate": 7.978249742407573e-07, + "loss": 0.0015, + "step": 260670 + }, + { + "epoch": 1.671920547215978, + "grad_norm": 0.019230369478464127, + "learning_rate": 7.975216903948446e-07, + "loss": 0.0005, + "step": 260680 + }, + { + "epoch": 1.6719846841097639, + "grad_norm": 0.04719553887844086, + "learning_rate": 7.972184592089899e-07, + "loss": 0.0008, + "step": 260690 + }, + { + "epoch": 1.67204882100355, + "grad_norm": 0.04122736304998398, + "learning_rate": 7.969152806869912e-07, + "loss": 0.001, + "step": 260700 + }, + { + "epoch": 1.672112957897336, + "grad_norm": 0.07827562838792801, + "learning_rate": 7.966121548326483e-07, + "loss": 0.0009, + "step": 260710 + }, + { + "epoch": 1.672177094791122, + "grad_norm": 0.1230246052145958, + "learning_rate": 7.963090816497576e-07, + "loss": 0.0015, + "step": 260720 + }, + { + "epoch": 1.6722412316849082, + "grad_norm": 0.5096327066421509, + "learning_rate": 7.9600606114212e-07, + "loss": 0.0013, + "step": 260730 + }, + { + "epoch": 1.6723053685786944, + "grad_norm": 0.19383084774017334, + "learning_rate": 7.957030933135312e-07, + "loss": 0.0015, + "step": 260740 + }, + { + "epoch": 1.6723695054724805, + "grad_norm": 0.056503500789403915, + "learning_rate": 7.954001781677872e-07, + "loss": 0.0018, + "step": 260750 + }, + { + "epoch": 1.6724336423662667, + "grad_norm": 0.16064177453517914, + "learning_rate": 7.950973157086827e-07, + "loss": 0.0007, + "step": 260760 + }, + { + "epoch": 1.6724977792600526, + "grad_norm": 0.038757264614105225, + "learning_rate": 7.947945059400153e-07, + "loss": 0.0011, + "step": 260770 + }, + { + "epoch": 1.6725619161538388, + "grad_norm": 0.080266073346138, + "learning_rate": 7.944917488655779e-07, + "loss": 0.0014, + "step": 260780 + }, + { + "epoch": 1.672626053047625, + "grad_norm": 0.0057196966372430325, + "learning_rate": 7.941890444891637e-07, + "loss": 0.0006, + "step": 260790 + }, + { + "epoch": 1.6726901899414108, + "grad_norm": 0.14382421970367432, + "learning_rate": 7.938863928145674e-07, + "loss": 0.0012, + "step": 260800 + }, + { + "epoch": 1.672754326835197, + "grad_norm": 0.15724822878837585, + "learning_rate": 7.935837938455809e-07, + "loss": 0.0007, + "step": 260810 + }, + { + "epoch": 1.6728184637289831, + "grad_norm": 0.06264682859182358, + "learning_rate": 7.932812475859958e-07, + "loss": 0.0006, + "step": 260820 + }, + { + "epoch": 1.6728826006227693, + "grad_norm": 0.1835131198167801, + "learning_rate": 7.929787540396016e-07, + "loss": 0.0012, + "step": 260830 + }, + { + "epoch": 1.6729467375165554, + "grad_norm": 0.020518098026514053, + "learning_rate": 7.926763132101923e-07, + "loss": 0.0008, + "step": 260840 + }, + { + "epoch": 1.6730108744103416, + "grad_norm": 0.03425362706184387, + "learning_rate": 7.92373925101555e-07, + "loss": 0.0012, + "step": 260850 + }, + { + "epoch": 1.6730750113041275, + "grad_norm": 0.010144386440515518, + "learning_rate": 7.920715897174796e-07, + "loss": 0.0014, + "step": 260860 + }, + { + "epoch": 1.6731391481979137, + "grad_norm": 0.020300816744565964, + "learning_rate": 7.917693070617538e-07, + "loss": 0.001, + "step": 260870 + }, + { + "epoch": 1.6732032850916996, + "grad_norm": 0.14274899661540985, + "learning_rate": 7.914670771381671e-07, + "loss": 0.0011, + "step": 260880 + }, + { + "epoch": 1.6732674219854857, + "grad_norm": 0.09293682128190994, + "learning_rate": 7.911648999505056e-07, + "loss": 0.0007, + "step": 260890 + }, + { + "epoch": 1.6733315588792719, + "grad_norm": 0.004996034782379866, + "learning_rate": 7.908627755025561e-07, + "loss": 0.0012, + "step": 260900 + }, + { + "epoch": 1.673395695773058, + "grad_norm": 0.07686932384967804, + "learning_rate": 7.905607037981028e-07, + "loss": 0.001, + "step": 260910 + }, + { + "epoch": 1.6734598326668442, + "grad_norm": 0.10814297199249268, + "learning_rate": 7.902586848409338e-07, + "loss": 0.0012, + "step": 260920 + }, + { + "epoch": 1.6735239695606303, + "grad_norm": 0.2382301688194275, + "learning_rate": 7.899567186348322e-07, + "loss": 0.0008, + "step": 260930 + }, + { + "epoch": 1.6735881064544165, + "grad_norm": 0.032378844916820526, + "learning_rate": 7.896548051835801e-07, + "loss": 0.0013, + "step": 260940 + }, + { + "epoch": 1.6736522433482024, + "grad_norm": 0.0709405466914177, + "learning_rate": 7.893529444909642e-07, + "loss": 0.0008, + "step": 260950 + }, + { + "epoch": 1.6737163802419885, + "grad_norm": 0.09986542165279388, + "learning_rate": 7.890511365607645e-07, + "loss": 0.0006, + "step": 260960 + }, + { + "epoch": 1.6737805171357745, + "grad_norm": 0.2603265047073364, + "learning_rate": 7.887493813967639e-07, + "loss": 0.0011, + "step": 260970 + }, + { + "epoch": 1.6738446540295606, + "grad_norm": 0.03708268702030182, + "learning_rate": 7.884476790027424e-07, + "loss": 0.0008, + "step": 260980 + }, + { + "epoch": 1.6739087909233468, + "grad_norm": 0.11559338122606277, + "learning_rate": 7.881460293824827e-07, + "loss": 0.0013, + "step": 260990 + }, + { + "epoch": 1.673972927817133, + "grad_norm": 0.039508212357759476, + "learning_rate": 7.878444325397628e-07, + "loss": 0.0006, + "step": 261000 + }, + { + "epoch": 1.674037064710919, + "grad_norm": 0.1299441009759903, + "learning_rate": 7.875428884783631e-07, + "loss": 0.0012, + "step": 261010 + }, + { + "epoch": 1.6741012016047052, + "grad_norm": 0.07982337474822998, + "learning_rate": 7.872413972020598e-07, + "loss": 0.0011, + "step": 261020 + }, + { + "epoch": 1.6741653384984911, + "grad_norm": 0.0564100444316864, + "learning_rate": 7.869399587146342e-07, + "loss": 0.0011, + "step": 261030 + }, + { + "epoch": 1.6742294753922773, + "grad_norm": 0.17207171022891998, + "learning_rate": 7.866385730198623e-07, + "loss": 0.0006, + "step": 261040 + }, + { + "epoch": 1.6742936122860632, + "grad_norm": 0.033203672617673874, + "learning_rate": 7.863372401215186e-07, + "loss": 0.0008, + "step": 261050 + }, + { + "epoch": 1.6743577491798494, + "grad_norm": 0.037467509508132935, + "learning_rate": 7.860359600233819e-07, + "loss": 0.0012, + "step": 261060 + }, + { + "epoch": 1.6744218860736355, + "grad_norm": 0.035042110830545425, + "learning_rate": 7.857347327292269e-07, + "loss": 0.001, + "step": 261070 + }, + { + "epoch": 1.6744860229674217, + "grad_norm": 0.006249886937439442, + "learning_rate": 7.854335582428269e-07, + "loss": 0.0014, + "step": 261080 + }, + { + "epoch": 1.6745501598612078, + "grad_norm": 0.07606332749128342, + "learning_rate": 7.851324365679557e-07, + "loss": 0.0017, + "step": 261090 + }, + { + "epoch": 1.674614296754994, + "grad_norm": 0.032138582319021225, + "learning_rate": 7.848313677083885e-07, + "loss": 0.0009, + "step": 261100 + }, + { + "epoch": 1.67467843364878, + "grad_norm": 0.06362374871969223, + "learning_rate": 7.845303516678964e-07, + "loss": 0.0011, + "step": 261110 + }, + { + "epoch": 1.674742570542566, + "grad_norm": 0.027711695060133934, + "learning_rate": 7.842293884502523e-07, + "loss": 0.0012, + "step": 261120 + }, + { + "epoch": 1.6748067074363522, + "grad_norm": 0.034594226628541946, + "learning_rate": 7.83928478059226e-07, + "loss": 0.0018, + "step": 261130 + }, + { + "epoch": 1.674870844330138, + "grad_norm": 0.024958940222859383, + "learning_rate": 7.836276204985894e-07, + "loss": 0.0009, + "step": 261140 + }, + { + "epoch": 1.6749349812239243, + "grad_norm": 0.11396825313568115, + "learning_rate": 7.833268157721124e-07, + "loss": 0.0014, + "step": 261150 + }, + { + "epoch": 1.6749991181177104, + "grad_norm": 0.1585187166929245, + "learning_rate": 7.830260638835646e-07, + "loss": 0.001, + "step": 261160 + }, + { + "epoch": 1.6750632550114966, + "grad_norm": 0.027392275631427765, + "learning_rate": 7.827253648367122e-07, + "loss": 0.0006, + "step": 261170 + }, + { + "epoch": 1.6751273919052827, + "grad_norm": 0.08222039043903351, + "learning_rate": 7.824247186353262e-07, + "loss": 0.001, + "step": 261180 + }, + { + "epoch": 1.6751915287990689, + "grad_norm": 0.06914021819829941, + "learning_rate": 7.821241252831729e-07, + "loss": 0.0016, + "step": 261190 + }, + { + "epoch": 1.6752556656928548, + "grad_norm": 0.027489222586154938, + "learning_rate": 7.818235847840178e-07, + "loss": 0.001, + "step": 261200 + }, + { + "epoch": 1.675319802586641, + "grad_norm": 0.14536058902740479, + "learning_rate": 7.815230971416277e-07, + "loss": 0.0011, + "step": 261210 + }, + { + "epoch": 1.675383939480427, + "grad_norm": 0.05501720681786537, + "learning_rate": 7.812226623597691e-07, + "loss": 0.0012, + "step": 261220 + }, + { + "epoch": 1.675448076374213, + "grad_norm": 0.4984779357910156, + "learning_rate": 7.809222804422062e-07, + "loss": 0.0067, + "step": 261230 + }, + { + "epoch": 1.6755122132679992, + "grad_norm": 0.14796331524848938, + "learning_rate": 7.806219513927016e-07, + "loss": 0.0016, + "step": 261240 + }, + { + "epoch": 1.6755763501617853, + "grad_norm": 0.052086081355810165, + "learning_rate": 7.803216752150189e-07, + "loss": 0.0006, + "step": 261250 + }, + { + "epoch": 1.6756404870555714, + "grad_norm": 0.09541890025138855, + "learning_rate": 7.800214519129223e-07, + "loss": 0.0012, + "step": 261260 + }, + { + "epoch": 1.6757046239493576, + "grad_norm": 0.11986565589904785, + "learning_rate": 7.797212814901733e-07, + "loss": 0.0013, + "step": 261270 + }, + { + "epoch": 1.6757687608431437, + "grad_norm": 0.09449843317270279, + "learning_rate": 7.794211639505322e-07, + "loss": 0.0008, + "step": 261280 + }, + { + "epoch": 1.6758328977369297, + "grad_norm": 0.07100570201873779, + "learning_rate": 7.791210992977599e-07, + "loss": 0.0014, + "step": 261290 + }, + { + "epoch": 1.6758970346307158, + "grad_norm": 0.04841234162449837, + "learning_rate": 7.788210875356173e-07, + "loss": 0.0014, + "step": 261300 + }, + { + "epoch": 1.6759611715245017, + "grad_norm": 0.0634155198931694, + "learning_rate": 7.78521128667864e-07, + "loss": 0.0009, + "step": 261310 + }, + { + "epoch": 1.676025308418288, + "grad_norm": 0.04266565665602684, + "learning_rate": 7.782212226982561e-07, + "loss": 0.0013, + "step": 261320 + }, + { + "epoch": 1.676089445312074, + "grad_norm": 0.2701398730278015, + "learning_rate": 7.779213696305554e-07, + "loss": 0.0013, + "step": 261330 + }, + { + "epoch": 1.6761535822058602, + "grad_norm": 0.09580012410879135, + "learning_rate": 7.776215694685169e-07, + "loss": 0.0012, + "step": 261340 + }, + { + "epoch": 1.6762177190996463, + "grad_norm": 0.09963560849428177, + "learning_rate": 7.773218222158979e-07, + "loss": 0.0008, + "step": 261350 + }, + { + "epoch": 1.6762818559934325, + "grad_norm": 0.06957130879163742, + "learning_rate": 7.770221278764534e-07, + "loss": 0.0015, + "step": 261360 + }, + { + "epoch": 1.6763459928872186, + "grad_norm": 0.08795663714408875, + "learning_rate": 7.767224864539408e-07, + "loss": 0.0006, + "step": 261370 + }, + { + "epoch": 1.6764101297810046, + "grad_norm": 0.059120964258909225, + "learning_rate": 7.764228979521138e-07, + "loss": 0.0019, + "step": 261380 + }, + { + "epoch": 1.6764742666747907, + "grad_norm": 0.052831437438726425, + "learning_rate": 7.761233623747261e-07, + "loss": 0.0009, + "step": 261390 + }, + { + "epoch": 1.6765384035685766, + "grad_norm": 0.024905595928430557, + "learning_rate": 7.758238797255308e-07, + "loss": 0.0014, + "step": 261400 + }, + { + "epoch": 1.6766025404623628, + "grad_norm": 0.09078676253557205, + "learning_rate": 7.755244500082821e-07, + "loss": 0.0008, + "step": 261410 + }, + { + "epoch": 1.676666677356149, + "grad_norm": 0.055515043437480927, + "learning_rate": 7.752250732267314e-07, + "loss": 0.0009, + "step": 261420 + }, + { + "epoch": 1.676730814249935, + "grad_norm": 0.02581734210252762, + "learning_rate": 7.749257493846301e-07, + "loss": 0.0005, + "step": 261430 + }, + { + "epoch": 1.6767949511437212, + "grad_norm": 0.2283240556716919, + "learning_rate": 7.746264784857271e-07, + "loss": 0.0015, + "step": 261440 + }, + { + "epoch": 1.6768590880375074, + "grad_norm": 0.030655836686491966, + "learning_rate": 7.743272605337754e-07, + "loss": 0.0007, + "step": 261450 + }, + { + "epoch": 1.6769232249312933, + "grad_norm": 0.1032186895608902, + "learning_rate": 7.740280955325236e-07, + "loss": 0.0014, + "step": 261460 + }, + { + "epoch": 1.6769873618250795, + "grad_norm": 0.05850433185696602, + "learning_rate": 7.737289834857187e-07, + "loss": 0.001, + "step": 261470 + }, + { + "epoch": 1.6770514987188654, + "grad_norm": 0.5587889552116394, + "learning_rate": 7.734299243971111e-07, + "loss": 0.0005, + "step": 261480 + }, + { + "epoch": 1.6771156356126515, + "grad_norm": 0.21267381310462952, + "learning_rate": 7.731309182704471e-07, + "loss": 0.0009, + "step": 261490 + }, + { + "epoch": 1.6771797725064377, + "grad_norm": 0.104644276201725, + "learning_rate": 7.72831965109474e-07, + "loss": 0.0017, + "step": 261500 + }, + { + "epoch": 1.6772439094002238, + "grad_norm": 0.05525534972548485, + "learning_rate": 7.725330649179364e-07, + "loss": 0.0005, + "step": 261510 + }, + { + "epoch": 1.67730804629401, + "grad_norm": 0.04056679457426071, + "learning_rate": 7.722342176995817e-07, + "loss": 0.0013, + "step": 261520 + }, + { + "epoch": 1.6773721831877961, + "grad_norm": 0.016694676131010056, + "learning_rate": 7.719354234581539e-07, + "loss": 0.0013, + "step": 261530 + }, + { + "epoch": 1.6774363200815823, + "grad_norm": 0.20411895215511322, + "learning_rate": 7.716366821973964e-07, + "loss": 0.0009, + "step": 261540 + }, + { + "epoch": 1.6775004569753682, + "grad_norm": 0.02913813479244709, + "learning_rate": 7.71337993921053e-07, + "loss": 0.001, + "step": 261550 + }, + { + "epoch": 1.6775645938691544, + "grad_norm": 0.06505188345909119, + "learning_rate": 7.710393586328674e-07, + "loss": 0.0009, + "step": 261560 + }, + { + "epoch": 1.6776287307629403, + "grad_norm": 0.07389794290065765, + "learning_rate": 7.707407763365809e-07, + "loss": 0.0016, + "step": 261570 + }, + { + "epoch": 1.6776928676567264, + "grad_norm": 0.13428865373134613, + "learning_rate": 7.704422470359352e-07, + "loss": 0.0008, + "step": 261580 + }, + { + "epoch": 1.6777570045505126, + "grad_norm": 0.13568255305290222, + "learning_rate": 7.701437707346698e-07, + "loss": 0.0011, + "step": 261590 + }, + { + "epoch": 1.6778211414442987, + "grad_norm": 0.043442100286483765, + "learning_rate": 7.698453474365275e-07, + "loss": 0.0006, + "step": 261600 + }, + { + "epoch": 1.6778852783380849, + "grad_norm": 0.07020683586597443, + "learning_rate": 7.695469771452458e-07, + "loss": 0.0008, + "step": 261610 + }, + { + "epoch": 1.677949415231871, + "grad_norm": 0.1260029524564743, + "learning_rate": 7.692486598645632e-07, + "loss": 0.0008, + "step": 261620 + }, + { + "epoch": 1.678013552125657, + "grad_norm": 0.11657378077507019, + "learning_rate": 7.689503955982192e-07, + "loss": 0.0009, + "step": 261630 + }, + { + "epoch": 1.678077689019443, + "grad_norm": 0.18355035781860352, + "learning_rate": 7.686521843499512e-07, + "loss": 0.0019, + "step": 261640 + }, + { + "epoch": 1.6781418259132292, + "grad_norm": 0.0605897530913353, + "learning_rate": 7.683540261234951e-07, + "loss": 0.0012, + "step": 261650 + }, + { + "epoch": 1.6782059628070152, + "grad_norm": 0.18452134728431702, + "learning_rate": 7.680559209225863e-07, + "loss": 0.0027, + "step": 261660 + }, + { + "epoch": 1.6782700997008013, + "grad_norm": 0.027823036536574364, + "learning_rate": 7.677578687509624e-07, + "loss": 0.0008, + "step": 261670 + }, + { + "epoch": 1.6783342365945875, + "grad_norm": 0.055968355387449265, + "learning_rate": 7.674598696123569e-07, + "loss": 0.0016, + "step": 261680 + }, + { + "epoch": 1.6783983734883736, + "grad_norm": 0.08254148066043854, + "learning_rate": 7.671619235105043e-07, + "loss": 0.001, + "step": 261690 + }, + { + "epoch": 1.6784625103821598, + "grad_norm": 0.0800231546163559, + "learning_rate": 7.668640304491371e-07, + "loss": 0.0017, + "step": 261700 + }, + { + "epoch": 1.678526647275946, + "grad_norm": 0.1368565559387207, + "learning_rate": 7.665661904319898e-07, + "loss": 0.0012, + "step": 261710 + }, + { + "epoch": 1.6785907841697318, + "grad_norm": 0.0406535267829895, + "learning_rate": 7.662684034627943e-07, + "loss": 0.0021, + "step": 261720 + }, + { + "epoch": 1.678654921063518, + "grad_norm": 0.07403713464736938, + "learning_rate": 7.659706695452807e-07, + "loss": 0.0011, + "step": 261730 + }, + { + "epoch": 1.678719057957304, + "grad_norm": 0.09942308813333511, + "learning_rate": 7.656729886831798e-07, + "loss": 0.0008, + "step": 261740 + }, + { + "epoch": 1.67878319485109, + "grad_norm": 0.06193510442972183, + "learning_rate": 7.653753608802239e-07, + "loss": 0.0014, + "step": 261750 + }, + { + "epoch": 1.6788473317448762, + "grad_norm": 0.11304626613855362, + "learning_rate": 7.650777861401415e-07, + "loss": 0.0011, + "step": 261760 + }, + { + "epoch": 1.6789114686386624, + "grad_norm": 0.2045489251613617, + "learning_rate": 7.647802644666591e-07, + "loss": 0.0013, + "step": 261770 + }, + { + "epoch": 1.6789756055324485, + "grad_norm": 0.1679089516401291, + "learning_rate": 7.644827958635087e-07, + "loss": 0.0006, + "step": 261780 + }, + { + "epoch": 1.6790397424262347, + "grad_norm": 0.012728678993880749, + "learning_rate": 7.641853803344157e-07, + "loss": 0.0024, + "step": 261790 + }, + { + "epoch": 1.6791038793200208, + "grad_norm": 0.15526632964611053, + "learning_rate": 7.638880178831071e-07, + "loss": 0.0014, + "step": 261800 + }, + { + "epoch": 1.6791680162138067, + "grad_norm": 0.047305576503276825, + "learning_rate": 7.635907085133082e-07, + "loss": 0.0016, + "step": 261810 + }, + { + "epoch": 1.6792321531075929, + "grad_norm": 0.09887392073869705, + "learning_rate": 7.63293452228746e-07, + "loss": 0.0009, + "step": 261820 + }, + { + "epoch": 1.6792962900013788, + "grad_norm": 0.09882711619138718, + "learning_rate": 7.629962490331456e-07, + "loss": 0.0018, + "step": 261830 + }, + { + "epoch": 1.679360426895165, + "grad_norm": 0.015573171898722649, + "learning_rate": 7.626990989302296e-07, + "loss": 0.0006, + "step": 261840 + }, + { + "epoch": 1.679424563788951, + "grad_norm": 0.07946422696113586, + "learning_rate": 7.624020019237216e-07, + "loss": 0.0013, + "step": 261850 + }, + { + "epoch": 1.6794887006827373, + "grad_norm": 0.013404951430857182, + "learning_rate": 7.621049580173461e-07, + "loss": 0.0006, + "step": 261860 + }, + { + "epoch": 1.6795528375765234, + "grad_norm": 0.03741779923439026, + "learning_rate": 7.618079672148243e-07, + "loss": 0.0015, + "step": 261870 + }, + { + "epoch": 1.6796169744703096, + "grad_norm": 0.0537244938313961, + "learning_rate": 7.615110295198774e-07, + "loss": 0.001, + "step": 261880 + }, + { + "epoch": 1.6796811113640955, + "grad_norm": 0.07449512183666229, + "learning_rate": 7.612141449362254e-07, + "loss": 0.0011, + "step": 261890 + }, + { + "epoch": 1.6797452482578816, + "grad_norm": 0.05764267221093178, + "learning_rate": 7.609173134675912e-07, + "loss": 0.0011, + "step": 261900 + }, + { + "epoch": 1.6798093851516676, + "grad_norm": 0.3346785604953766, + "learning_rate": 7.606205351176921e-07, + "loss": 0.0029, + "step": 261910 + }, + { + "epoch": 1.6798735220454537, + "grad_norm": 0.04754569008946419, + "learning_rate": 7.603238098902466e-07, + "loss": 0.0008, + "step": 261920 + }, + { + "epoch": 1.6799376589392399, + "grad_norm": 0.03843769431114197, + "learning_rate": 7.600271377889751e-07, + "loss": 0.0006, + "step": 261930 + }, + { + "epoch": 1.680001795833026, + "grad_norm": 0.040142547339200974, + "learning_rate": 7.597305188175941e-07, + "loss": 0.0018, + "step": 261940 + }, + { + "epoch": 1.6800659327268122, + "grad_norm": 0.016071323305368423, + "learning_rate": 7.594339529798195e-07, + "loss": 0.0009, + "step": 261950 + }, + { + "epoch": 1.6801300696205983, + "grad_norm": 0.014691210351884365, + "learning_rate": 7.591374402793672e-07, + "loss": 0.0007, + "step": 261960 + }, + { + "epoch": 1.6801942065143844, + "grad_norm": 0.11548521369695663, + "learning_rate": 7.588409807199537e-07, + "loss": 0.0011, + "step": 261970 + }, + { + "epoch": 1.6802583434081704, + "grad_norm": 0.033998552709817886, + "learning_rate": 7.585445743052955e-07, + "loss": 0.0005, + "step": 261980 + }, + { + "epoch": 1.6803224803019565, + "grad_norm": 0.050724223256111145, + "learning_rate": 7.582482210391046e-07, + "loss": 0.001, + "step": 261990 + }, + { + "epoch": 1.6803866171957424, + "grad_norm": 0.1336205154657364, + "learning_rate": 7.579519209250946e-07, + "loss": 0.0007, + "step": 262000 + }, + { + "epoch": 1.6804507540895286, + "grad_norm": 0.004237947519868612, + "learning_rate": 7.576556739669782e-07, + "loss": 0.0011, + "step": 262010 + }, + { + "epoch": 1.6805148909833147, + "grad_norm": 0.10108738392591476, + "learning_rate": 7.573594801684692e-07, + "loss": 0.001, + "step": 262020 + }, + { + "epoch": 1.680579027877101, + "grad_norm": 0.06919895857572556, + "learning_rate": 7.570633395332783e-07, + "loss": 0.0054, + "step": 262030 + }, + { + "epoch": 1.680643164770887, + "grad_norm": 0.07569548487663269, + "learning_rate": 7.567672520651148e-07, + "loss": 0.0011, + "step": 262040 + }, + { + "epoch": 1.6807073016646732, + "grad_norm": 0.020496781915426254, + "learning_rate": 7.564712177676914e-07, + "loss": 0.0009, + "step": 262050 + }, + { + "epoch": 1.6807714385584593, + "grad_norm": 0.12332148849964142, + "learning_rate": 7.561752366447162e-07, + "loss": 0.0018, + "step": 262060 + }, + { + "epoch": 1.6808355754522453, + "grad_norm": 0.0759996846318245, + "learning_rate": 7.558793086998989e-07, + "loss": 0.0012, + "step": 262070 + }, + { + "epoch": 1.6808997123460314, + "grad_norm": 0.004803712945431471, + "learning_rate": 7.555834339369456e-07, + "loss": 0.0009, + "step": 262080 + }, + { + "epoch": 1.6809638492398173, + "grad_norm": 0.04900961369276047, + "learning_rate": 7.55287612359566e-07, + "loss": 0.0007, + "step": 262090 + }, + { + "epoch": 1.6810279861336035, + "grad_norm": 0.06744549423456192, + "learning_rate": 7.549918439714671e-07, + "loss": 0.0028, + "step": 262100 + }, + { + "epoch": 1.6810921230273896, + "grad_norm": 0.09023269265890121, + "learning_rate": 7.546961287763538e-07, + "loss": 0.0019, + "step": 262110 + }, + { + "epoch": 1.6811562599211758, + "grad_norm": 0.09666185826063156, + "learning_rate": 7.544004667779309e-07, + "loss": 0.0008, + "step": 262120 + }, + { + "epoch": 1.681220396814962, + "grad_norm": 0.045062899589538574, + "learning_rate": 7.541048579799054e-07, + "loss": 0.0014, + "step": 262130 + }, + { + "epoch": 1.681284533708748, + "grad_norm": 0.13950562477111816, + "learning_rate": 7.53809302385981e-07, + "loss": 0.0011, + "step": 262140 + }, + { + "epoch": 1.681348670602534, + "grad_norm": 0.015909569337964058, + "learning_rate": 7.535137999998604e-07, + "loss": 0.0007, + "step": 262150 + }, + { + "epoch": 1.6814128074963202, + "grad_norm": 0.0681985542178154, + "learning_rate": 7.532183508252455e-07, + "loss": 0.0007, + "step": 262160 + }, + { + "epoch": 1.681476944390106, + "grad_norm": 0.08667725324630737, + "learning_rate": 7.529229548658412e-07, + "loss": 0.0006, + "step": 262170 + }, + { + "epoch": 1.6815410812838922, + "grad_norm": 0.05679089203476906, + "learning_rate": 7.526276121253479e-07, + "loss": 0.0014, + "step": 262180 + }, + { + "epoch": 1.6816052181776784, + "grad_norm": 0.14505784213542938, + "learning_rate": 7.523323226074647e-07, + "loss": 0.0011, + "step": 262190 + }, + { + "epoch": 1.6816693550714645, + "grad_norm": 0.08209065347909927, + "learning_rate": 7.520370863158943e-07, + "loss": 0.0013, + "step": 262200 + }, + { + "epoch": 1.6817334919652507, + "grad_norm": 0.15775032341480255, + "learning_rate": 7.517419032543355e-07, + "loss": 0.0018, + "step": 262210 + }, + { + "epoch": 1.6817976288590368, + "grad_norm": 0.1044556275010109, + "learning_rate": 7.514467734264869e-07, + "loss": 0.0012, + "step": 262220 + }, + { + "epoch": 1.681861765752823, + "grad_norm": 0.06325215846300125, + "learning_rate": 7.511516968360449e-07, + "loss": 0.0012, + "step": 262230 + }, + { + "epoch": 1.681925902646609, + "grad_norm": 0.2255081981420517, + "learning_rate": 7.508566734867107e-07, + "loss": 0.0015, + "step": 262240 + }, + { + "epoch": 1.681990039540395, + "grad_norm": 0.04119641333818436, + "learning_rate": 7.505617033821783e-07, + "loss": 0.0004, + "step": 262250 + }, + { + "epoch": 1.682054176434181, + "grad_norm": 0.10483510047197342, + "learning_rate": 7.502667865261454e-07, + "loss": 0.0011, + "step": 262260 + }, + { + "epoch": 1.6821183133279671, + "grad_norm": 0.1358007788658142, + "learning_rate": 7.499719229223057e-07, + "loss": 0.0015, + "step": 262270 + }, + { + "epoch": 1.6821824502217533, + "grad_norm": 0.02440463751554489, + "learning_rate": 7.496771125743563e-07, + "loss": 0.0008, + "step": 262280 + }, + { + "epoch": 1.6822465871155394, + "grad_norm": 0.08051423728466034, + "learning_rate": 7.493823554859903e-07, + "loss": 0.0016, + "step": 262290 + }, + { + "epoch": 1.6823107240093256, + "grad_norm": 0.06842823326587677, + "learning_rate": 7.490876516609013e-07, + "loss": 0.0014, + "step": 262300 + }, + { + "epoch": 1.6823748609031117, + "grad_norm": 0.04999329894781113, + "learning_rate": 7.48793001102781e-07, + "loss": 0.0006, + "step": 262310 + }, + { + "epoch": 1.6824389977968977, + "grad_norm": 0.05793393775820732, + "learning_rate": 7.484984038153237e-07, + "loss": 0.0012, + "step": 262320 + }, + { + "epoch": 1.6825031346906838, + "grad_norm": 0.14233599603176117, + "learning_rate": 7.482038598022202e-07, + "loss": 0.0005, + "step": 262330 + }, + { + "epoch": 1.68256727158447, + "grad_norm": 0.07395566999912262, + "learning_rate": 7.479093690671596e-07, + "loss": 0.0007, + "step": 262340 + }, + { + "epoch": 1.6826314084782559, + "grad_norm": 0.12719450891017914, + "learning_rate": 7.476149316138348e-07, + "loss": 0.0007, + "step": 262350 + }, + { + "epoch": 1.682695545372042, + "grad_norm": 0.13135480880737305, + "learning_rate": 7.473205474459339e-07, + "loss": 0.0013, + "step": 262360 + }, + { + "epoch": 1.6827596822658282, + "grad_norm": 0.09198420494794846, + "learning_rate": 7.470262165671461e-07, + "loss": 0.0015, + "step": 262370 + }, + { + "epoch": 1.6828238191596143, + "grad_norm": 0.13832350075244904, + "learning_rate": 7.467319389811578e-07, + "loss": 0.0007, + "step": 262380 + }, + { + "epoch": 1.6828879560534005, + "grad_norm": 0.08932088315486908, + "learning_rate": 7.464377146916595e-07, + "loss": 0.0012, + "step": 262390 + }, + { + "epoch": 1.6829520929471866, + "grad_norm": 0.0009030341752804816, + "learning_rate": 7.461435437023368e-07, + "loss": 0.0017, + "step": 262400 + }, + { + "epoch": 1.6830162298409725, + "grad_norm": 0.09307923167943954, + "learning_rate": 7.458494260168753e-07, + "loss": 0.0007, + "step": 262410 + }, + { + "epoch": 1.6830803667347587, + "grad_norm": 0.03176266327500343, + "learning_rate": 7.455553616389599e-07, + "loss": 0.0015, + "step": 262420 + }, + { + "epoch": 1.6831445036285446, + "grad_norm": 0.021300937980413437, + "learning_rate": 7.452613505722778e-07, + "loss": 0.0011, + "step": 262430 + }, + { + "epoch": 1.6832086405223308, + "grad_norm": 0.01835530996322632, + "learning_rate": 7.449673928205114e-07, + "loss": 0.0012, + "step": 262440 + }, + { + "epoch": 1.683272777416117, + "grad_norm": 0.03712568059563637, + "learning_rate": 7.446734883873452e-07, + "loss": 0.0009, + "step": 262450 + }, + { + "epoch": 1.683336914309903, + "grad_norm": 0.08349623531103134, + "learning_rate": 7.4437963727646e-07, + "loss": 0.0012, + "step": 262460 + }, + { + "epoch": 1.6834010512036892, + "grad_norm": 0.15113046765327454, + "learning_rate": 7.440858394915401e-07, + "loss": 0.0011, + "step": 262470 + }, + { + "epoch": 1.6834651880974754, + "grad_norm": 0.2494426816701889, + "learning_rate": 7.437920950362665e-07, + "loss": 0.001, + "step": 262480 + }, + { + "epoch": 1.6835293249912615, + "grad_norm": 0.07854034751653671, + "learning_rate": 7.434984039143189e-07, + "loss": 0.0011, + "step": 262490 + }, + { + "epoch": 1.6835934618850474, + "grad_norm": 0.03320141136646271, + "learning_rate": 7.432047661293795e-07, + "loss": 0.0007, + "step": 262500 + }, + { + "epoch": 1.6836575987788336, + "grad_norm": 0.09847461432218552, + "learning_rate": 7.429111816851264e-07, + "loss": 0.0005, + "step": 262510 + }, + { + "epoch": 1.6837217356726195, + "grad_norm": 0.02717522718012333, + "learning_rate": 7.42617650585239e-07, + "loss": 0.0011, + "step": 262520 + }, + { + "epoch": 1.6837858725664057, + "grad_norm": 0.2049603909254074, + "learning_rate": 7.423241728333941e-07, + "loss": 0.0021, + "step": 262530 + }, + { + "epoch": 1.6838500094601918, + "grad_norm": 0.011489232070744038, + "learning_rate": 7.420307484332711e-07, + "loss": 0.0007, + "step": 262540 + }, + { + "epoch": 1.683914146353978, + "grad_norm": 0.10288581997156143, + "learning_rate": 7.417373773885461e-07, + "loss": 0.0022, + "step": 262550 + }, + { + "epoch": 1.683978283247764, + "grad_norm": 0.09101805835962296, + "learning_rate": 7.414440597028949e-07, + "loss": 0.0017, + "step": 262560 + }, + { + "epoch": 1.6840424201415503, + "grad_norm": 0.01015832182019949, + "learning_rate": 7.411507953799918e-07, + "loss": 0.0009, + "step": 262570 + }, + { + "epoch": 1.6841065570353362, + "grad_norm": 0.10437482595443726, + "learning_rate": 7.408575844235144e-07, + "loss": 0.0018, + "step": 262580 + }, + { + "epoch": 1.6841706939291223, + "grad_norm": 0.03975355252623558, + "learning_rate": 7.405644268371359e-07, + "loss": 0.0028, + "step": 262590 + }, + { + "epoch": 1.6842348308229083, + "grad_norm": 0.12639813125133514, + "learning_rate": 7.402713226245284e-07, + "loss": 0.002, + "step": 262600 + }, + { + "epoch": 1.6842989677166944, + "grad_norm": 0.19275104999542236, + "learning_rate": 7.39978271789365e-07, + "loss": 0.0017, + "step": 262610 + }, + { + "epoch": 1.6843631046104806, + "grad_norm": 0.018935957923531532, + "learning_rate": 7.396852743353194e-07, + "loss": 0.0011, + "step": 262620 + }, + { + "epoch": 1.6844272415042667, + "grad_norm": 0.015886522829532623, + "learning_rate": 7.393923302660627e-07, + "loss": 0.0006, + "step": 262630 + }, + { + "epoch": 1.6844913783980529, + "grad_norm": 0.04877450317144394, + "learning_rate": 7.390994395852636e-07, + "loss": 0.0025, + "step": 262640 + }, + { + "epoch": 1.684555515291839, + "grad_norm": 0.01553063653409481, + "learning_rate": 7.388066022965946e-07, + "loss": 0.0009, + "step": 262650 + }, + { + "epoch": 1.6846196521856251, + "grad_norm": 0.02764376997947693, + "learning_rate": 7.385138184037244e-07, + "loss": 0.0011, + "step": 262660 + }, + { + "epoch": 1.684683789079411, + "grad_norm": 0.03206116333603859, + "learning_rate": 7.382210879103219e-07, + "loss": 0.0007, + "step": 262670 + }, + { + "epoch": 1.6847479259731972, + "grad_norm": 0.10185568779706955, + "learning_rate": 7.379284108200535e-07, + "loss": 0.0025, + "step": 262680 + }, + { + "epoch": 1.6848120628669832, + "grad_norm": 0.05674508213996887, + "learning_rate": 7.376357871365897e-07, + "loss": 0.0007, + "step": 262690 + }, + { + "epoch": 1.6848761997607693, + "grad_norm": 0.07970869541168213, + "learning_rate": 7.373432168635958e-07, + "loss": 0.0011, + "step": 262700 + }, + { + "epoch": 1.6849403366545554, + "grad_norm": 0.02939658612012863, + "learning_rate": 7.370507000047378e-07, + "loss": 0.0007, + "step": 262710 + }, + { + "epoch": 1.6850044735483416, + "grad_norm": 0.03765963017940521, + "learning_rate": 7.3675823656368e-07, + "loss": 0.0015, + "step": 262720 + }, + { + "epoch": 1.6850686104421277, + "grad_norm": 0.007897039875388145, + "learning_rate": 7.364658265440894e-07, + "loss": 0.0009, + "step": 262730 + }, + { + "epoch": 1.685132747335914, + "grad_norm": 0.09889094531536102, + "learning_rate": 7.361734699496282e-07, + "loss": 0.0011, + "step": 262740 + }, + { + "epoch": 1.6851968842296998, + "grad_norm": 0.09377726912498474, + "learning_rate": 7.358811667839616e-07, + "loss": 0.0009, + "step": 262750 + }, + { + "epoch": 1.685261021123486, + "grad_norm": 0.17835475504398346, + "learning_rate": 7.355889170507502e-07, + "loss": 0.001, + "step": 262760 + }, + { + "epoch": 1.6853251580172721, + "grad_norm": 0.07152102142572403, + "learning_rate": 7.352967207536587e-07, + "loss": 0.0009, + "step": 262770 + }, + { + "epoch": 1.685389294911058, + "grad_norm": 0.013958334922790527, + "learning_rate": 7.350045778963477e-07, + "loss": 0.0027, + "step": 262780 + }, + { + "epoch": 1.6854534318048442, + "grad_norm": 0.09204661846160889, + "learning_rate": 7.347124884824769e-07, + "loss": 0.0012, + "step": 262790 + }, + { + "epoch": 1.6855175686986303, + "grad_norm": 0.1123444065451622, + "learning_rate": 7.344204525157061e-07, + "loss": 0.001, + "step": 262800 + }, + { + "epoch": 1.6855817055924165, + "grad_norm": 0.033399078994989395, + "learning_rate": 7.34128469999697e-07, + "loss": 0.0013, + "step": 262810 + }, + { + "epoch": 1.6856458424862026, + "grad_norm": 0.09275560826063156, + "learning_rate": 7.338365409381065e-07, + "loss": 0.0009, + "step": 262820 + }, + { + "epoch": 1.6857099793799888, + "grad_norm": 0.12270651757717133, + "learning_rate": 7.335446653345934e-07, + "loss": 0.0012, + "step": 262830 + }, + { + "epoch": 1.6857741162737747, + "grad_norm": 0.10209984332323074, + "learning_rate": 7.332528431928133e-07, + "loss": 0.0011, + "step": 262840 + }, + { + "epoch": 1.6858382531675609, + "grad_norm": 0.04941209778189659, + "learning_rate": 7.329610745164262e-07, + "loss": 0.001, + "step": 262850 + }, + { + "epoch": 1.6859023900613468, + "grad_norm": 0.36141422390937805, + "learning_rate": 7.326693593090861e-07, + "loss": 0.0008, + "step": 262860 + }, + { + "epoch": 1.685966526955133, + "grad_norm": 0.031035954132676125, + "learning_rate": 7.323776975744484e-07, + "loss": 0.0012, + "step": 262870 + }, + { + "epoch": 1.686030663848919, + "grad_norm": 0.029855938628315926, + "learning_rate": 7.320860893161674e-07, + "loss": 0.0016, + "step": 262880 + }, + { + "epoch": 1.6860948007427052, + "grad_norm": 0.03218485787510872, + "learning_rate": 7.317945345378991e-07, + "loss": 0.0016, + "step": 262890 + }, + { + "epoch": 1.6861589376364914, + "grad_norm": 0.07458245754241943, + "learning_rate": 7.315030332432959e-07, + "loss": 0.0016, + "step": 262900 + }, + { + "epoch": 1.6862230745302775, + "grad_norm": 0.08994331955909729, + "learning_rate": 7.312115854360091e-07, + "loss": 0.0038, + "step": 262910 + }, + { + "epoch": 1.6862872114240637, + "grad_norm": 0.1847568303346634, + "learning_rate": 7.309201911196928e-07, + "loss": 0.0008, + "step": 262920 + }, + { + "epoch": 1.6863513483178496, + "grad_norm": 0.0190438125282526, + "learning_rate": 7.30628850297998e-07, + "loss": 0.0009, + "step": 262930 + }, + { + "epoch": 1.6864154852116358, + "grad_norm": 0.06274163722991943, + "learning_rate": 7.303375629745746e-07, + "loss": 0.0009, + "step": 262940 + }, + { + "epoch": 1.6864796221054217, + "grad_norm": 0.07734082639217377, + "learning_rate": 7.300463291530718e-07, + "loss": 0.0008, + "step": 262950 + }, + { + "epoch": 1.6865437589992078, + "grad_norm": 0.019327746704220772, + "learning_rate": 7.297551488371418e-07, + "loss": 0.0032, + "step": 262960 + }, + { + "epoch": 1.686607895892994, + "grad_norm": 0.007246449589729309, + "learning_rate": 7.294640220304317e-07, + "loss": 0.0006, + "step": 262970 + }, + { + "epoch": 1.6866720327867801, + "grad_norm": 0.006289687007665634, + "learning_rate": 7.291729487365889e-07, + "loss": 0.0009, + "step": 262980 + }, + { + "epoch": 1.6867361696805663, + "grad_norm": 0.1553562432527542, + "learning_rate": 7.288819289592608e-07, + "loss": 0.0026, + "step": 262990 + }, + { + "epoch": 1.6868003065743524, + "grad_norm": 0.2426321655511856, + "learning_rate": 7.285909627020954e-07, + "loss": 0.0018, + "step": 263000 + }, + { + "epoch": 1.6868644434681384, + "grad_norm": 0.19575554132461548, + "learning_rate": 7.283000499687382e-07, + "loss": 0.0006, + "step": 263010 + }, + { + "epoch": 1.6869285803619245, + "grad_norm": 0.1894301176071167, + "learning_rate": 7.280091907628334e-07, + "loss": 0.001, + "step": 263020 + }, + { + "epoch": 1.6869927172557104, + "grad_norm": 0.17330430448055267, + "learning_rate": 7.277183850880271e-07, + "loss": 0.0014, + "step": 263030 + }, + { + "epoch": 1.6870568541494966, + "grad_norm": 0.10145927220582962, + "learning_rate": 7.274276329479635e-07, + "loss": 0.0011, + "step": 263040 + }, + { + "epoch": 1.6871209910432827, + "grad_norm": 0.07053583860397339, + "learning_rate": 7.271369343462848e-07, + "loss": 0.0008, + "step": 263050 + }, + { + "epoch": 1.6871851279370689, + "grad_norm": 0.12517698109149933, + "learning_rate": 7.268462892866329e-07, + "loss": 0.0009, + "step": 263060 + }, + { + "epoch": 1.687249264830855, + "grad_norm": 0.04059239849448204, + "learning_rate": 7.265556977726523e-07, + "loss": 0.0007, + "step": 263070 + }, + { + "epoch": 1.6873134017246412, + "grad_norm": 0.0846027135848999, + "learning_rate": 7.262651598079829e-07, + "loss": 0.002, + "step": 263080 + }, + { + "epoch": 1.6873775386184273, + "grad_norm": 0.07531607151031494, + "learning_rate": 7.259746753962655e-07, + "loss": 0.0013, + "step": 263090 + }, + { + "epoch": 1.6874416755122132, + "grad_norm": 0.051252126693725586, + "learning_rate": 7.256842445411389e-07, + "loss": 0.0006, + "step": 263100 + }, + { + "epoch": 1.6875058124059994, + "grad_norm": 0.05703764408826828, + "learning_rate": 7.253938672462446e-07, + "loss": 0.0013, + "step": 263110 + }, + { + "epoch": 1.6875699492997853, + "grad_norm": 0.03158862888813019, + "learning_rate": 7.251035435152198e-07, + "loss": 0.001, + "step": 263120 + }, + { + "epoch": 1.6876340861935715, + "grad_norm": 0.02299869991838932, + "learning_rate": 7.248132733517032e-07, + "loss": 0.0006, + "step": 263130 + }, + { + "epoch": 1.6876982230873576, + "grad_norm": 0.17391365766525269, + "learning_rate": 7.245230567593308e-07, + "loss": 0.0011, + "step": 263140 + }, + { + "epoch": 1.6877623599811438, + "grad_norm": 0.013422899879515171, + "learning_rate": 7.242328937417409e-07, + "loss": 0.0008, + "step": 263150 + }, + { + "epoch": 1.68782649687493, + "grad_norm": 0.10892119258642197, + "learning_rate": 7.239427843025687e-07, + "loss": 0.0014, + "step": 263160 + }, + { + "epoch": 1.687890633768716, + "grad_norm": 0.1053953766822815, + "learning_rate": 7.236527284454487e-07, + "loss": 0.0006, + "step": 263170 + }, + { + "epoch": 1.687954770662502, + "grad_norm": 0.07258967310190201, + "learning_rate": 7.233627261740167e-07, + "loss": 0.0025, + "step": 263180 + }, + { + "epoch": 1.6880189075562881, + "grad_norm": 0.00420130742713809, + "learning_rate": 7.230727774919066e-07, + "loss": 0.0015, + "step": 263190 + }, + { + "epoch": 1.6880830444500743, + "grad_norm": 0.01840096153318882, + "learning_rate": 7.227828824027516e-07, + "loss": 0.0017, + "step": 263200 + }, + { + "epoch": 1.6881471813438602, + "grad_norm": 0.12298330664634705, + "learning_rate": 7.224930409101827e-07, + "loss": 0.0007, + "step": 263210 + }, + { + "epoch": 1.6882113182376464, + "grad_norm": 0.014765714295208454, + "learning_rate": 7.222032530178342e-07, + "loss": 0.0014, + "step": 263220 + }, + { + "epoch": 1.6882754551314325, + "grad_norm": 0.04172176122665405, + "learning_rate": 7.219135187293357e-07, + "loss": 0.0015, + "step": 263230 + }, + { + "epoch": 1.6883395920252187, + "grad_norm": 0.015325518324971199, + "learning_rate": 7.216238380483188e-07, + "loss": 0.0027, + "step": 263240 + }, + { + "epoch": 1.6884037289190048, + "grad_norm": 0.07766814529895782, + "learning_rate": 7.213342109784117e-07, + "loss": 0.0007, + "step": 263250 + }, + { + "epoch": 1.688467865812791, + "grad_norm": 0.11536253988742828, + "learning_rate": 7.21044637523246e-07, + "loss": 0.0013, + "step": 263260 + }, + { + "epoch": 1.6885320027065769, + "grad_norm": 0.12471003085374832, + "learning_rate": 7.207551176864491e-07, + "loss": 0.0014, + "step": 263270 + }, + { + "epoch": 1.688596139600363, + "grad_norm": 0.025006921961903572, + "learning_rate": 7.204656514716491e-07, + "loss": 0.0007, + "step": 263280 + }, + { + "epoch": 1.688660276494149, + "grad_norm": 0.22946634888648987, + "learning_rate": 7.201762388824717e-07, + "loss": 0.001, + "step": 263290 + }, + { + "epoch": 1.688724413387935, + "grad_norm": 0.06974420696496964, + "learning_rate": 7.198868799225455e-07, + "loss": 0.0005, + "step": 263300 + }, + { + "epoch": 1.6887885502817213, + "grad_norm": 0.007004431914538145, + "learning_rate": 7.195975745954964e-07, + "loss": 0.0014, + "step": 263310 + }, + { + "epoch": 1.6888526871755074, + "grad_norm": 0.05225522071123123, + "learning_rate": 7.193083229049475e-07, + "loss": 0.004, + "step": 263320 + }, + { + "epoch": 1.6889168240692936, + "grad_norm": 0.007790552452206612, + "learning_rate": 7.190191248545253e-07, + "loss": 0.001, + "step": 263330 + }, + { + "epoch": 1.6889809609630797, + "grad_norm": 0.001854132628068328, + "learning_rate": 7.187299804478536e-07, + "loss": 0.0008, + "step": 263340 + }, + { + "epoch": 1.6890450978568659, + "grad_norm": 0.06629274040460587, + "learning_rate": 7.184408896885547e-07, + "loss": 0.0009, + "step": 263350 + }, + { + "epoch": 1.6891092347506518, + "grad_norm": 0.0625310093164444, + "learning_rate": 7.181518525802505e-07, + "loss": 0.001, + "step": 263360 + }, + { + "epoch": 1.689173371644438, + "grad_norm": 0.04694930836558342, + "learning_rate": 7.178628691265649e-07, + "loss": 0.0008, + "step": 263370 + }, + { + "epoch": 1.6892375085382239, + "grad_norm": 0.00229465588927269, + "learning_rate": 7.175739393311176e-07, + "loss": 0.0008, + "step": 263380 + }, + { + "epoch": 1.68930164543201, + "grad_norm": 0.11056386679410934, + "learning_rate": 7.172850631975298e-07, + "loss": 0.0007, + "step": 263390 + }, + { + "epoch": 1.6893657823257962, + "grad_norm": 0.23887838423252106, + "learning_rate": 7.169962407294201e-07, + "loss": 0.0008, + "step": 263400 + }, + { + "epoch": 1.6894299192195823, + "grad_norm": 0.034622084349393845, + "learning_rate": 7.16707471930409e-07, + "loss": 0.0014, + "step": 263410 + }, + { + "epoch": 1.6894940561133684, + "grad_norm": 0.011238383129239082, + "learning_rate": 7.164187568041154e-07, + "loss": 0.0009, + "step": 263420 + }, + { + "epoch": 1.6895581930071546, + "grad_norm": 0.006505804136395454, + "learning_rate": 7.161300953541556e-07, + "loss": 0.001, + "step": 263430 + }, + { + "epoch": 1.6896223299009405, + "grad_norm": 0.19434146583080292, + "learning_rate": 7.158414875841457e-07, + "loss": 0.0006, + "step": 263440 + }, + { + "epoch": 1.6896864667947267, + "grad_norm": 0.03185209631919861, + "learning_rate": 7.155529334977057e-07, + "loss": 0.0009, + "step": 263450 + }, + { + "epoch": 1.6897506036885126, + "grad_norm": 0.13237544894218445, + "learning_rate": 7.152644330984493e-07, + "loss": 0.0009, + "step": 263460 + }, + { + "epoch": 1.6898147405822987, + "grad_norm": 0.03689480945467949, + "learning_rate": 7.149759863899908e-07, + "loss": 0.0006, + "step": 263470 + }, + { + "epoch": 1.689878877476085, + "grad_norm": 0.1426839977502823, + "learning_rate": 7.146875933759467e-07, + "loss": 0.0011, + "step": 263480 + }, + { + "epoch": 1.689943014369871, + "grad_norm": 0.26433318853378296, + "learning_rate": 7.143992540599293e-07, + "loss": 0.0015, + "step": 263490 + }, + { + "epoch": 1.6900071512636572, + "grad_norm": 0.04924926534295082, + "learning_rate": 7.141109684455517e-07, + "loss": 0.0014, + "step": 263500 + }, + { + "epoch": 1.6900712881574433, + "grad_norm": 0.09768155217170715, + "learning_rate": 7.138227365364275e-07, + "loss": 0.0016, + "step": 263510 + }, + { + "epoch": 1.6901354250512295, + "grad_norm": 0.07902403920888901, + "learning_rate": 7.135345583361663e-07, + "loss": 0.001, + "step": 263520 + }, + { + "epoch": 1.6901995619450154, + "grad_norm": 0.09526874125003815, + "learning_rate": 7.132464338483818e-07, + "loss": 0.0009, + "step": 263530 + }, + { + "epoch": 1.6902636988388016, + "grad_norm": 0.08909189701080322, + "learning_rate": 7.129583630766834e-07, + "loss": 0.0012, + "step": 263540 + }, + { + "epoch": 1.6903278357325875, + "grad_norm": 0.04073075205087662, + "learning_rate": 7.126703460246809e-07, + "loss": 0.0018, + "step": 263550 + }, + { + "epoch": 1.6903919726263736, + "grad_norm": 0.09088864177465439, + "learning_rate": 7.123823826959813e-07, + "loss": 0.0009, + "step": 263560 + }, + { + "epoch": 1.6904561095201598, + "grad_norm": 0.002805843949317932, + "learning_rate": 7.120944730941964e-07, + "loss": 0.0006, + "step": 263570 + }, + { + "epoch": 1.690520246413946, + "grad_norm": 0.0623597651720047, + "learning_rate": 7.118066172229321e-07, + "loss": 0.001, + "step": 263580 + }, + { + "epoch": 1.690584383307732, + "grad_norm": 0.1109582707285881, + "learning_rate": 7.115188150857943e-07, + "loss": 0.0014, + "step": 263590 + }, + { + "epoch": 1.6906485202015182, + "grad_norm": 0.013657116331160069, + "learning_rate": 7.11231066686392e-07, + "loss": 0.0007, + "step": 263600 + }, + { + "epoch": 1.6907126570953044, + "grad_norm": 0.06236774101853371, + "learning_rate": 7.109433720283299e-07, + "loss": 0.0006, + "step": 263610 + }, + { + "epoch": 1.6907767939890903, + "grad_norm": 0.025372015312314034, + "learning_rate": 7.106557311152118e-07, + "loss": 0.0014, + "step": 263620 + }, + { + "epoch": 1.6908409308828765, + "grad_norm": 0.11978617310523987, + "learning_rate": 7.103681439506427e-07, + "loss": 0.0013, + "step": 263630 + }, + { + "epoch": 1.6909050677766624, + "grad_norm": 0.20812635123729706, + "learning_rate": 7.100806105382275e-07, + "loss": 0.0016, + "step": 263640 + }, + { + "epoch": 1.6909692046704485, + "grad_norm": 0.056038640439510345, + "learning_rate": 7.097931308815676e-07, + "loss": 0.0008, + "step": 263650 + }, + { + "epoch": 1.6910333415642347, + "grad_norm": 0.017140207812190056, + "learning_rate": 7.095057049842662e-07, + "loss": 0.0008, + "step": 263660 + }, + { + "epoch": 1.6910974784580208, + "grad_norm": 0.10366932302713394, + "learning_rate": 7.092183328499241e-07, + "loss": 0.0009, + "step": 263670 + }, + { + "epoch": 1.691161615351807, + "grad_norm": 0.10067028552293777, + "learning_rate": 7.089310144821432e-07, + "loss": 0.0009, + "step": 263680 + }, + { + "epoch": 1.6912257522455931, + "grad_norm": 0.12825380265712738, + "learning_rate": 7.086437498845239e-07, + "loss": 0.0012, + "step": 263690 + }, + { + "epoch": 1.691289889139379, + "grad_norm": 0.04232438653707504, + "learning_rate": 7.083565390606656e-07, + "loss": 0.0016, + "step": 263700 + }, + { + "epoch": 1.6913540260331652, + "grad_norm": 0.01695011369884014, + "learning_rate": 7.080693820141654e-07, + "loss": 0.0007, + "step": 263710 + }, + { + "epoch": 1.6914181629269511, + "grad_norm": 0.0885840430855751, + "learning_rate": 7.077822787486249e-07, + "loss": 0.0019, + "step": 263720 + }, + { + "epoch": 1.6914822998207373, + "grad_norm": 0.0035042043309658766, + "learning_rate": 7.074952292676396e-07, + "loss": 0.0006, + "step": 263730 + }, + { + "epoch": 1.6915464367145234, + "grad_norm": 0.07663535326719284, + "learning_rate": 7.07208233574806e-07, + "loss": 0.0021, + "step": 263740 + }, + { + "epoch": 1.6916105736083096, + "grad_norm": 0.07760876417160034, + "learning_rate": 7.069212916737223e-07, + "loss": 0.0012, + "step": 263750 + }, + { + "epoch": 1.6916747105020957, + "grad_norm": 0.05023767799139023, + "learning_rate": 7.066344035679828e-07, + "loss": 0.0008, + "step": 263760 + }, + { + "epoch": 1.6917388473958819, + "grad_norm": 0.048591502010822296, + "learning_rate": 7.063475692611832e-07, + "loss": 0.0009, + "step": 263770 + }, + { + "epoch": 1.691802984289668, + "grad_norm": 0.08918386697769165, + "learning_rate": 7.06060788756916e-07, + "loss": 0.001, + "step": 263780 + }, + { + "epoch": 1.691867121183454, + "grad_norm": 0.031152892857789993, + "learning_rate": 7.057740620587766e-07, + "loss": 0.0009, + "step": 263790 + }, + { + "epoch": 1.69193125807724, + "grad_norm": 0.07061514258384705, + "learning_rate": 7.054873891703573e-07, + "loss": 0.0009, + "step": 263800 + }, + { + "epoch": 1.691995394971026, + "grad_norm": 0.14635823667049408, + "learning_rate": 7.052007700952506e-07, + "loss": 0.001, + "step": 263810 + }, + { + "epoch": 1.6920595318648122, + "grad_norm": 0.10236475616693497, + "learning_rate": 7.049142048370461e-07, + "loss": 0.0016, + "step": 263820 + }, + { + "epoch": 1.6921236687585983, + "grad_norm": 0.10531385987997055, + "learning_rate": 7.046276933993379e-07, + "loss": 0.0016, + "step": 263830 + }, + { + "epoch": 1.6921878056523845, + "grad_norm": 0.04722611606121063, + "learning_rate": 7.043412357857144e-07, + "loss": 0.0015, + "step": 263840 + }, + { + "epoch": 1.6922519425461706, + "grad_norm": 0.03881454840302467, + "learning_rate": 7.040548319997653e-07, + "loss": 0.0009, + "step": 263850 + }, + { + "epoch": 1.6923160794399568, + "grad_norm": 0.08548258990049362, + "learning_rate": 7.03768482045078e-07, + "loss": 0.0007, + "step": 263860 + }, + { + "epoch": 1.6923802163337427, + "grad_norm": 0.4337688088417053, + "learning_rate": 7.034821859252433e-07, + "loss": 0.0016, + "step": 263870 + }, + { + "epoch": 1.6924443532275288, + "grad_norm": 0.09530623257160187, + "learning_rate": 7.03195943643848e-07, + "loss": 0.0014, + "step": 263880 + }, + { + "epoch": 1.6925084901213148, + "grad_norm": 0.09382256120443344, + "learning_rate": 7.029097552044767e-07, + "loss": 0.0007, + "step": 263890 + }, + { + "epoch": 1.692572627015101, + "grad_norm": 0.07203307747840881, + "learning_rate": 7.026236206107185e-07, + "loss": 0.0023, + "step": 263900 + }, + { + "epoch": 1.692636763908887, + "grad_norm": 0.0677449181675911, + "learning_rate": 7.02337539866158e-07, + "loss": 0.0013, + "step": 263910 + }, + { + "epoch": 1.6927009008026732, + "grad_norm": 0.09249774366617203, + "learning_rate": 7.020515129743794e-07, + "loss": 0.001, + "step": 263920 + }, + { + "epoch": 1.6927650376964594, + "grad_norm": 0.19315855205059052, + "learning_rate": 7.017655399389656e-07, + "loss": 0.0009, + "step": 263930 + }, + { + "epoch": 1.6928291745902455, + "grad_norm": 0.04837198182940483, + "learning_rate": 7.014796207635027e-07, + "loss": 0.0008, + "step": 263940 + }, + { + "epoch": 1.6928933114840317, + "grad_norm": 0.3748023211956024, + "learning_rate": 7.011937554515724e-07, + "loss": 0.0014, + "step": 263950 + }, + { + "epoch": 1.6929574483778176, + "grad_norm": 0.15534189343452454, + "learning_rate": 7.009079440067567e-07, + "loss": 0.0011, + "step": 263960 + }, + { + "epoch": 1.6930215852716037, + "grad_norm": 0.04076743870973587, + "learning_rate": 7.006221864326357e-07, + "loss": 0.0009, + "step": 263970 + }, + { + "epoch": 1.6930857221653897, + "grad_norm": 0.01948699913918972, + "learning_rate": 7.00336482732793e-07, + "loss": 0.0014, + "step": 263980 + }, + { + "epoch": 1.6931498590591758, + "grad_norm": 0.04300503805279732, + "learning_rate": 7.000508329108063e-07, + "loss": 0.0008, + "step": 263990 + }, + { + "epoch": 1.693213995952962, + "grad_norm": 0.25529250502586365, + "learning_rate": 6.997652369702562e-07, + "loss": 0.0011, + "step": 264000 + }, + { + "epoch": 1.693278132846748, + "grad_norm": 0.03575824573636055, + "learning_rate": 6.994796949147204e-07, + "loss": 0.0003, + "step": 264010 + }, + { + "epoch": 1.6933422697405343, + "grad_norm": 0.0745188519358635, + "learning_rate": 6.99194206747778e-07, + "loss": 0.0007, + "step": 264020 + }, + { + "epoch": 1.6934064066343204, + "grad_norm": 0.027460288256406784, + "learning_rate": 6.989087724730059e-07, + "loss": 0.001, + "step": 264030 + }, + { + "epoch": 1.6934705435281066, + "grad_norm": 0.04555510729551315, + "learning_rate": 6.9862339209398e-07, + "loss": 0.0007, + "step": 264040 + }, + { + "epoch": 1.6935346804218925, + "grad_norm": 0.02433430403470993, + "learning_rate": 6.983380656142785e-07, + "loss": 0.0012, + "step": 264050 + }, + { + "epoch": 1.6935988173156786, + "grad_norm": 0.06247928366065025, + "learning_rate": 6.980527930374748e-07, + "loss": 0.0009, + "step": 264060 + }, + { + "epoch": 1.6936629542094646, + "grad_norm": 0.022036660462617874, + "learning_rate": 6.977675743671448e-07, + "loss": 0.0007, + "step": 264070 + }, + { + "epoch": 1.6937270911032507, + "grad_norm": 0.10363399982452393, + "learning_rate": 6.9748240960686e-07, + "loss": 0.0019, + "step": 264080 + }, + { + "epoch": 1.6937912279970369, + "grad_norm": 0.041434530168771744, + "learning_rate": 6.971972987601972e-07, + "loss": 0.0005, + "step": 264090 + }, + { + "epoch": 1.693855364890823, + "grad_norm": 0.051926229149103165, + "learning_rate": 6.969122418307272e-07, + "loss": 0.001, + "step": 264100 + }, + { + "epoch": 1.6939195017846091, + "grad_norm": 0.12077579647302628, + "learning_rate": 6.966272388220224e-07, + "loss": 0.0011, + "step": 264110 + }, + { + "epoch": 1.6939836386783953, + "grad_norm": 0.23285746574401855, + "learning_rate": 6.963422897376526e-07, + "loss": 0.0012, + "step": 264120 + }, + { + "epoch": 1.6940477755721812, + "grad_norm": 0.08923560380935669, + "learning_rate": 6.96057394581191e-07, + "loss": 0.0006, + "step": 264130 + }, + { + "epoch": 1.6941119124659674, + "grad_norm": 0.03662756457924843, + "learning_rate": 6.95772553356206e-07, + "loss": 0.0012, + "step": 264140 + }, + { + "epoch": 1.6941760493597533, + "grad_norm": 0.04590865969657898, + "learning_rate": 6.954877660662673e-07, + "loss": 0.0004, + "step": 264150 + }, + { + "epoch": 1.6942401862535394, + "grad_norm": 0.07533108443021774, + "learning_rate": 6.952030327149417e-07, + "loss": 0.0007, + "step": 264160 + }, + { + "epoch": 1.6943043231473256, + "grad_norm": 0.0856962725520134, + "learning_rate": 6.949183533058002e-07, + "loss": 0.0013, + "step": 264170 + }, + { + "epoch": 1.6943684600411117, + "grad_norm": 0.09973156452178955, + "learning_rate": 6.946337278424087e-07, + "loss": 0.0009, + "step": 264180 + }, + { + "epoch": 1.694432596934898, + "grad_norm": 0.10728974640369415, + "learning_rate": 6.943491563283322e-07, + "loss": 0.001, + "step": 264190 + }, + { + "epoch": 1.694496733828684, + "grad_norm": 0.10567191243171692, + "learning_rate": 6.94064638767139e-07, + "loss": 0.0008, + "step": 264200 + }, + { + "epoch": 1.6945608707224702, + "grad_norm": 0.021192071959376335, + "learning_rate": 6.937801751623935e-07, + "loss": 0.0013, + "step": 264210 + }, + { + "epoch": 1.6946250076162561, + "grad_norm": 0.006901530083268881, + "learning_rate": 6.9349576551766e-07, + "loss": 0.0012, + "step": 264220 + }, + { + "epoch": 1.6946891445100423, + "grad_norm": 0.05930528789758682, + "learning_rate": 6.932114098365006e-07, + "loss": 0.0016, + "step": 264230 + }, + { + "epoch": 1.6947532814038282, + "grad_norm": 0.11704102903604507, + "learning_rate": 6.92927108122482e-07, + "loss": 0.0014, + "step": 264240 + }, + { + "epoch": 1.6948174182976143, + "grad_norm": 0.08013855665922165, + "learning_rate": 6.926428603791652e-07, + "loss": 0.0012, + "step": 264250 + }, + { + "epoch": 1.6948815551914005, + "grad_norm": 0.09500796347856522, + "learning_rate": 6.923586666101112e-07, + "loss": 0.0012, + "step": 264260 + }, + { + "epoch": 1.6949456920851866, + "grad_norm": 0.10292333364486694, + "learning_rate": 6.920745268188811e-07, + "loss": 0.001, + "step": 264270 + }, + { + "epoch": 1.6950098289789728, + "grad_norm": 0.11834751069545746, + "learning_rate": 6.917904410090359e-07, + "loss": 0.0012, + "step": 264280 + }, + { + "epoch": 1.695073965872759, + "grad_norm": 0.03635682910680771, + "learning_rate": 6.915064091841361e-07, + "loss": 0.0024, + "step": 264290 + }, + { + "epoch": 1.6951381027665449, + "grad_norm": 0.22080478072166443, + "learning_rate": 6.912224313477406e-07, + "loss": 0.0018, + "step": 264300 + }, + { + "epoch": 1.695202239660331, + "grad_norm": 0.0906863883137703, + "learning_rate": 6.909385075034065e-07, + "loss": 0.001, + "step": 264310 + }, + { + "epoch": 1.6952663765541172, + "grad_norm": 0.0966591164469719, + "learning_rate": 6.906546376546936e-07, + "loss": 0.0009, + "step": 264320 + }, + { + "epoch": 1.695330513447903, + "grad_norm": 0.04243626818060875, + "learning_rate": 6.90370821805158e-07, + "loss": 0.0018, + "step": 264330 + }, + { + "epoch": 1.6953946503416892, + "grad_norm": 0.01818661577999592, + "learning_rate": 6.900870599583559e-07, + "loss": 0.0008, + "step": 264340 + }, + { + "epoch": 1.6954587872354754, + "grad_norm": 0.0733444094657898, + "learning_rate": 6.898033521178421e-07, + "loss": 0.0014, + "step": 264350 + }, + { + "epoch": 1.6955229241292615, + "grad_norm": 0.0397791862487793, + "learning_rate": 6.895196982871738e-07, + "loss": 0.0009, + "step": 264360 + }, + { + "epoch": 1.6955870610230477, + "grad_norm": 0.0348215214908123, + "learning_rate": 6.892360984699042e-07, + "loss": 0.0034, + "step": 264370 + }, + { + "epoch": 1.6956511979168338, + "grad_norm": 0.056535977870225906, + "learning_rate": 6.889525526695873e-07, + "loss": 0.001, + "step": 264380 + }, + { + "epoch": 1.6957153348106198, + "grad_norm": 0.002477928763255477, + "learning_rate": 6.88669060889775e-07, + "loss": 0.001, + "step": 264390 + }, + { + "epoch": 1.695779471704406, + "grad_norm": 0.05568133294582367, + "learning_rate": 6.883856231340219e-07, + "loss": 0.0008, + "step": 264400 + }, + { + "epoch": 1.6958436085981918, + "grad_norm": 0.3743601143360138, + "learning_rate": 6.881022394058779e-07, + "loss": 0.0011, + "step": 264410 + }, + { + "epoch": 1.695907745491978, + "grad_norm": 0.11811339855194092, + "learning_rate": 6.87818909708895e-07, + "loss": 0.0014, + "step": 264420 + }, + { + "epoch": 1.6959718823857641, + "grad_norm": 0.10599221289157867, + "learning_rate": 6.875356340466216e-07, + "loss": 0.0012, + "step": 264430 + }, + { + "epoch": 1.6960360192795503, + "grad_norm": 0.10912429541349411, + "learning_rate": 6.872524124226099e-07, + "loss": 0.001, + "step": 264440 + }, + { + "epoch": 1.6961001561733364, + "grad_norm": 0.04976571723818779, + "learning_rate": 6.869692448404081e-07, + "loss": 0.0013, + "step": 264450 + }, + { + "epoch": 1.6961642930671226, + "grad_norm": 0.12095761299133301, + "learning_rate": 6.866861313035622e-07, + "loss": 0.0007, + "step": 264460 + }, + { + "epoch": 1.6962284299609087, + "grad_norm": 0.07463040202856064, + "learning_rate": 6.864030718156234e-07, + "loss": 0.0008, + "step": 264470 + }, + { + "epoch": 1.6962925668546947, + "grad_norm": 0.04961828142404556, + "learning_rate": 6.861200663801371e-07, + "loss": 0.0009, + "step": 264480 + }, + { + "epoch": 1.6963567037484808, + "grad_norm": 0.03794373199343681, + "learning_rate": 6.858371150006493e-07, + "loss": 0.0015, + "step": 264490 + }, + { + "epoch": 1.6964208406422667, + "grad_norm": 0.013013191521167755, + "learning_rate": 6.855542176807051e-07, + "loss": 0.0004, + "step": 264500 + }, + { + "epoch": 1.6964849775360529, + "grad_norm": 0.018160870298743248, + "learning_rate": 6.852713744238504e-07, + "loss": 0.0018, + "step": 264510 + }, + { + "epoch": 1.696549114429839, + "grad_norm": 0.14764241874217987, + "learning_rate": 6.849885852336297e-07, + "loss": 0.0023, + "step": 264520 + }, + { + "epoch": 1.6966132513236252, + "grad_norm": 0.05118638649582863, + "learning_rate": 6.847058501135856e-07, + "loss": 0.0009, + "step": 264530 + }, + { + "epoch": 1.6966773882174113, + "grad_norm": 0.17762254178524017, + "learning_rate": 6.844231690672599e-07, + "loss": 0.001, + "step": 264540 + }, + { + "epoch": 1.6967415251111975, + "grad_norm": 0.07955456525087357, + "learning_rate": 6.841405420981978e-07, + "loss": 0.0023, + "step": 264550 + }, + { + "epoch": 1.6968056620049834, + "grad_norm": 0.08135933429002762, + "learning_rate": 6.838579692099389e-07, + "loss": 0.0041, + "step": 264560 + }, + { + "epoch": 1.6968697988987695, + "grad_norm": 0.04895102232694626, + "learning_rate": 6.835754504060244e-07, + "loss": 0.0013, + "step": 264570 + }, + { + "epoch": 1.6969339357925555, + "grad_norm": 0.12795418500900269, + "learning_rate": 6.832929856899934e-07, + "loss": 0.0013, + "step": 264580 + }, + { + "epoch": 1.6969980726863416, + "grad_norm": 0.08570278435945511, + "learning_rate": 6.830105750653875e-07, + "loss": 0.001, + "step": 264590 + }, + { + "epoch": 1.6970622095801278, + "grad_norm": 0.03363915905356407, + "learning_rate": 6.827282185357442e-07, + "loss": 0.001, + "step": 264600 + }, + { + "epoch": 1.697126346473914, + "grad_norm": 0.03974626213312149, + "learning_rate": 6.82445916104601e-07, + "loss": 0.0009, + "step": 264610 + }, + { + "epoch": 1.6971904833677, + "grad_norm": 0.024083245545625687, + "learning_rate": 6.821636677754967e-07, + "loss": 0.0009, + "step": 264620 + }, + { + "epoch": 1.6972546202614862, + "grad_norm": 0.036198630928993225, + "learning_rate": 6.818814735519685e-07, + "loss": 0.0011, + "step": 264630 + }, + { + "epoch": 1.6973187571552724, + "grad_norm": 0.06512334942817688, + "learning_rate": 6.815993334375509e-07, + "loss": 0.0012, + "step": 264640 + }, + { + "epoch": 1.6973828940490583, + "grad_norm": 0.08642488718032837, + "learning_rate": 6.813172474357788e-07, + "loss": 0.0009, + "step": 264650 + }, + { + "epoch": 1.6974470309428444, + "grad_norm": 0.025644278153777122, + "learning_rate": 6.810352155501898e-07, + "loss": 0.0013, + "step": 264660 + }, + { + "epoch": 1.6975111678366304, + "grad_norm": 0.1016627624630928, + "learning_rate": 6.807532377843157e-07, + "loss": 0.0009, + "step": 264670 + }, + { + "epoch": 1.6975753047304165, + "grad_norm": 0.2199431210756302, + "learning_rate": 6.804713141416907e-07, + "loss": 0.0009, + "step": 264680 + }, + { + "epoch": 1.6976394416242027, + "grad_norm": 0.008409027941524982, + "learning_rate": 6.801894446258462e-07, + "loss": 0.0009, + "step": 264690 + }, + { + "epoch": 1.6977035785179888, + "grad_norm": 0.10022395849227905, + "learning_rate": 6.799076292403161e-07, + "loss": 0.0008, + "step": 264700 + }, + { + "epoch": 1.697767715411775, + "grad_norm": 0.11597537249326706, + "learning_rate": 6.796258679886309e-07, + "loss": 0.0013, + "step": 264710 + }, + { + "epoch": 1.697831852305561, + "grad_norm": 0.06105426326394081, + "learning_rate": 6.793441608743206e-07, + "loss": 0.0018, + "step": 264720 + }, + { + "epoch": 1.697895989199347, + "grad_norm": 0.01340585295110941, + "learning_rate": 6.790625079009167e-07, + "loss": 0.0014, + "step": 264730 + }, + { + "epoch": 1.6979601260931332, + "grad_norm": 0.2167714387178421, + "learning_rate": 6.787809090719477e-07, + "loss": 0.0018, + "step": 264740 + }, + { + "epoch": 1.6980242629869193, + "grad_norm": 0.249318465590477, + "learning_rate": 6.784993643909426e-07, + "loss": 0.0016, + "step": 264750 + }, + { + "epoch": 1.6980883998807053, + "grad_norm": 0.08137817680835724, + "learning_rate": 6.782178738614276e-07, + "loss": 0.0005, + "step": 264760 + }, + { + "epoch": 1.6981525367744914, + "grad_norm": 0.0265290979295969, + "learning_rate": 6.779364374869324e-07, + "loss": 0.0009, + "step": 264770 + }, + { + "epoch": 1.6982166736682776, + "grad_norm": 0.12711146473884583, + "learning_rate": 6.776550552709827e-07, + "loss": 0.0005, + "step": 264780 + }, + { + "epoch": 1.6982808105620637, + "grad_norm": 0.08862216770648956, + "learning_rate": 6.773737272171043e-07, + "loss": 0.0007, + "step": 264790 + }, + { + "epoch": 1.6983449474558499, + "grad_norm": 0.08041568100452423, + "learning_rate": 6.770924533288215e-07, + "loss": 0.0008, + "step": 264800 + }, + { + "epoch": 1.698409084349636, + "grad_norm": 0.0358443446457386, + "learning_rate": 6.768112336096605e-07, + "loss": 0.0009, + "step": 264810 + }, + { + "epoch": 1.698473221243422, + "grad_norm": 0.02899232506752014, + "learning_rate": 6.765300680631448e-07, + "loss": 0.0027, + "step": 264820 + }, + { + "epoch": 1.698537358137208, + "grad_norm": 0.012916501611471176, + "learning_rate": 6.762489566927971e-07, + "loss": 0.0008, + "step": 264830 + }, + { + "epoch": 1.698601495030994, + "grad_norm": 0.06844992935657501, + "learning_rate": 6.75967899502139e-07, + "loss": 0.0011, + "step": 264840 + }, + { + "epoch": 1.6986656319247802, + "grad_norm": 0.1103900671005249, + "learning_rate": 6.756868964946944e-07, + "loss": 0.0012, + "step": 264850 + }, + { + "epoch": 1.6987297688185663, + "grad_norm": 0.17282582819461823, + "learning_rate": 6.754059476739838e-07, + "loss": 0.0027, + "step": 264860 + }, + { + "epoch": 1.6987939057123524, + "grad_norm": 0.1048382967710495, + "learning_rate": 6.751250530435261e-07, + "loss": 0.0019, + "step": 264870 + }, + { + "epoch": 1.6988580426061386, + "grad_norm": 0.03646853566169739, + "learning_rate": 6.748442126068439e-07, + "loss": 0.0006, + "step": 264880 + }, + { + "epoch": 1.6989221794999247, + "grad_norm": 0.030923152342438698, + "learning_rate": 6.745634263674544e-07, + "loss": 0.0016, + "step": 264890 + }, + { + "epoch": 1.698986316393711, + "grad_norm": 0.061948273330926895, + "learning_rate": 6.742826943288771e-07, + "loss": 0.0009, + "step": 264900 + }, + { + "epoch": 1.6990504532874968, + "grad_norm": 0.0893976241350174, + "learning_rate": 6.740020164946276e-07, + "loss": 0.0005, + "step": 264910 + }, + { + "epoch": 1.699114590181283, + "grad_norm": 0.15201431512832642, + "learning_rate": 6.737213928682257e-07, + "loss": 0.002, + "step": 264920 + }, + { + "epoch": 1.699178727075069, + "grad_norm": 0.03452815115451813, + "learning_rate": 6.734408234531869e-07, + "loss": 0.0008, + "step": 264930 + }, + { + "epoch": 1.699242863968855, + "grad_norm": 0.04301924630999565, + "learning_rate": 6.731603082530269e-07, + "loss": 0.0013, + "step": 264940 + }, + { + "epoch": 1.6993070008626412, + "grad_norm": 0.0761362686753273, + "learning_rate": 6.728798472712589e-07, + "loss": 0.001, + "step": 264950 + }, + { + "epoch": 1.6993711377564273, + "grad_norm": 0.07112876325845718, + "learning_rate": 6.725994405114006e-07, + "loss": 0.0007, + "step": 264960 + }, + { + "epoch": 1.6994352746502135, + "grad_norm": 0.05872325971722603, + "learning_rate": 6.723190879769637e-07, + "loss": 0.0006, + "step": 264970 + }, + { + "epoch": 1.6994994115439996, + "grad_norm": 0.09243052452802658, + "learning_rate": 6.720387896714619e-07, + "loss": 0.0008, + "step": 264980 + }, + { + "epoch": 1.6995635484377856, + "grad_norm": 0.008637145161628723, + "learning_rate": 6.717585455984061e-07, + "loss": 0.0008, + "step": 264990 + }, + { + "epoch": 1.6996276853315717, + "grad_norm": 0.05439627543091774, + "learning_rate": 6.714783557613097e-07, + "loss": 0.0007, + "step": 265000 + }, + { + "epoch": 1.6996918222253576, + "grad_norm": 0.025693070143461227, + "learning_rate": 6.711982201636835e-07, + "loss": 0.0007, + "step": 265010 + }, + { + "epoch": 1.6997559591191438, + "grad_norm": 0.048525068908929825, + "learning_rate": 6.709181388090357e-07, + "loss": 0.0006, + "step": 265020 + }, + { + "epoch": 1.69982009601293, + "grad_norm": 0.05880725756287575, + "learning_rate": 6.706381117008793e-07, + "loss": 0.0008, + "step": 265030 + }, + { + "epoch": 1.699884232906716, + "grad_norm": 0.11127229779958725, + "learning_rate": 6.703581388427199e-07, + "loss": 0.0012, + "step": 265040 + }, + { + "epoch": 1.6999483698005022, + "grad_norm": 0.047811251133680344, + "learning_rate": 6.70078220238069e-07, + "loss": 0.0039, + "step": 265050 + }, + { + "epoch": 1.7000125066942884, + "grad_norm": 0.2942372262477875, + "learning_rate": 6.697983558904325e-07, + "loss": 0.0011, + "step": 265060 + }, + { + "epoch": 1.7000766435880745, + "grad_norm": 0.041605714708566666, + "learning_rate": 6.69518545803316e-07, + "loss": 0.0016, + "step": 265070 + }, + { + "epoch": 1.7001407804818605, + "grad_norm": 0.09894415736198425, + "learning_rate": 6.692387899802283e-07, + "loss": 0.0014, + "step": 265080 + }, + { + "epoch": 1.7002049173756466, + "grad_norm": 0.1805630326271057, + "learning_rate": 6.689590884246733e-07, + "loss": 0.0005, + "step": 265090 + }, + { + "epoch": 1.7002690542694325, + "grad_norm": 0.04582097753882408, + "learning_rate": 6.686794411401564e-07, + "loss": 0.0014, + "step": 265100 + }, + { + "epoch": 1.7003331911632187, + "grad_norm": 0.1644166111946106, + "learning_rate": 6.683998481301812e-07, + "loss": 0.0013, + "step": 265110 + }, + { + "epoch": 1.7003973280570048, + "grad_norm": 0.17809748649597168, + "learning_rate": 6.681203093982519e-07, + "loss": 0.0013, + "step": 265120 + }, + { + "epoch": 1.700461464950791, + "grad_norm": 0.034539878368377686, + "learning_rate": 6.678408249478719e-07, + "loss": 0.0018, + "step": 265130 + }, + { + "epoch": 1.7005256018445771, + "grad_norm": 0.007027273066341877, + "learning_rate": 6.67561394782541e-07, + "loss": 0.0009, + "step": 265140 + }, + { + "epoch": 1.7005897387383633, + "grad_norm": 0.0024962613824754953, + "learning_rate": 6.672820189057628e-07, + "loss": 0.0013, + "step": 265150 + }, + { + "epoch": 1.7006538756321494, + "grad_norm": 0.3956187069416046, + "learning_rate": 6.670026973210381e-07, + "loss": 0.0021, + "step": 265160 + }, + { + "epoch": 1.7007180125259354, + "grad_norm": 0.19051364064216614, + "learning_rate": 6.667234300318659e-07, + "loss": 0.0008, + "step": 265170 + }, + { + "epoch": 1.7007821494197215, + "grad_norm": 0.47155243158340454, + "learning_rate": 6.664442170417457e-07, + "loss": 0.0011, + "step": 265180 + }, + { + "epoch": 1.7008462863135074, + "grad_norm": 0.06036653369665146, + "learning_rate": 6.661650583541767e-07, + "loss": 0.0012, + "step": 265190 + }, + { + "epoch": 1.7009104232072936, + "grad_norm": 0.03911900520324707, + "learning_rate": 6.658859539726575e-07, + "loss": 0.001, + "step": 265200 + }, + { + "epoch": 1.7009745601010797, + "grad_norm": 0.025036673992872238, + "learning_rate": 6.656069039006846e-07, + "loss": 0.0018, + "step": 265210 + }, + { + "epoch": 1.7010386969948659, + "grad_norm": 0.10333983600139618, + "learning_rate": 6.653279081417541e-07, + "loss": 0.0014, + "step": 265220 + }, + { + "epoch": 1.701102833888652, + "grad_norm": 0.15717697143554688, + "learning_rate": 6.650489666993637e-07, + "loss": 0.0017, + "step": 265230 + }, + { + "epoch": 1.7011669707824382, + "grad_norm": 0.03343525528907776, + "learning_rate": 6.647700795770084e-07, + "loss": 0.0014, + "step": 265240 + }, + { + "epoch": 1.701231107676224, + "grad_norm": 0.028773343190550804, + "learning_rate": 6.644912467781816e-07, + "loss": 0.0005, + "step": 265250 + }, + { + "epoch": 1.7012952445700102, + "grad_norm": 0.054235756397247314, + "learning_rate": 6.642124683063772e-07, + "loss": 0.0006, + "step": 265260 + }, + { + "epoch": 1.7013593814637962, + "grad_norm": 0.08739051222801208, + "learning_rate": 6.639337441650906e-07, + "loss": 0.0011, + "step": 265270 + }, + { + "epoch": 1.7014235183575823, + "grad_norm": 0.17551854252815247, + "learning_rate": 6.63655074357813e-07, + "loss": 0.0011, + "step": 265280 + }, + { + "epoch": 1.7014876552513685, + "grad_norm": 0.14753693342208862, + "learning_rate": 6.633764588880354e-07, + "loss": 0.0009, + "step": 265290 + }, + { + "epoch": 1.7015517921451546, + "grad_norm": 0.02053745463490486, + "learning_rate": 6.630978977592512e-07, + "loss": 0.0006, + "step": 265300 + }, + { + "epoch": 1.7016159290389408, + "grad_norm": 0.04896795004606247, + "learning_rate": 6.628193909749497e-07, + "loss": 0.0012, + "step": 265310 + }, + { + "epoch": 1.701680065932727, + "grad_norm": 0.027453456073999405, + "learning_rate": 6.625409385386211e-07, + "loss": 0.0008, + "step": 265320 + }, + { + "epoch": 1.701744202826513, + "grad_norm": 0.02024935744702816, + "learning_rate": 6.622625404537536e-07, + "loss": 0.0006, + "step": 265330 + }, + { + "epoch": 1.701808339720299, + "grad_norm": 0.14231550693511963, + "learning_rate": 6.619841967238378e-07, + "loss": 0.0009, + "step": 265340 + }, + { + "epoch": 1.7018724766140851, + "grad_norm": 0.027371495962142944, + "learning_rate": 6.617059073523597e-07, + "loss": 0.0007, + "step": 265350 + }, + { + "epoch": 1.701936613507871, + "grad_norm": 0.10268767923116684, + "learning_rate": 6.614276723428076e-07, + "loss": 0.0013, + "step": 265360 + }, + { + "epoch": 1.7020007504016572, + "grad_norm": 0.2743382751941681, + "learning_rate": 6.611494916986661e-07, + "loss": 0.0019, + "step": 265370 + }, + { + "epoch": 1.7020648872954434, + "grad_norm": 0.052332375198602676, + "learning_rate": 6.608713654234239e-07, + "loss": 0.0011, + "step": 265380 + }, + { + "epoch": 1.7021290241892295, + "grad_norm": 0.001781732658855617, + "learning_rate": 6.605932935205644e-07, + "loss": 0.0009, + "step": 265390 + }, + { + "epoch": 1.7021931610830157, + "grad_norm": 0.004635539371520281, + "learning_rate": 6.603152759935722e-07, + "loss": 0.0011, + "step": 265400 + }, + { + "epoch": 1.7022572979768018, + "grad_norm": 0.06639865785837173, + "learning_rate": 6.600373128459298e-07, + "loss": 0.0023, + "step": 265410 + }, + { + "epoch": 1.7023214348705877, + "grad_norm": 0.12824387848377228, + "learning_rate": 6.597594040811228e-07, + "loss": 0.0009, + "step": 265420 + }, + { + "epoch": 1.7023855717643739, + "grad_norm": 0.05182370916008949, + "learning_rate": 6.59481549702633e-07, + "loss": 0.0011, + "step": 265430 + }, + { + "epoch": 1.7024497086581598, + "grad_norm": 0.14896632730960846, + "learning_rate": 6.592037497139397e-07, + "loss": 0.002, + "step": 265440 + }, + { + "epoch": 1.702513845551946, + "grad_norm": 0.012180998921394348, + "learning_rate": 6.58926004118527e-07, + "loss": 0.0018, + "step": 265450 + }, + { + "epoch": 1.702577982445732, + "grad_norm": 0.1370176523923874, + "learning_rate": 6.586483129198739e-07, + "loss": 0.0009, + "step": 265460 + }, + { + "epoch": 1.7026421193395183, + "grad_norm": 0.03550407662987709, + "learning_rate": 6.583706761214603e-07, + "loss": 0.0008, + "step": 265470 + }, + { + "epoch": 1.7027062562333044, + "grad_norm": 0.21676664054393768, + "learning_rate": 6.580930937267644e-07, + "loss": 0.0014, + "step": 265480 + }, + { + "epoch": 1.7027703931270906, + "grad_norm": 0.10779733210802078, + "learning_rate": 6.578155657392654e-07, + "loss": 0.0015, + "step": 265490 + }, + { + "epoch": 1.7028345300208767, + "grad_norm": 0.03631667420268059, + "learning_rate": 6.575380921624414e-07, + "loss": 0.001, + "step": 265500 + }, + { + "epoch": 1.7028986669146626, + "grad_norm": 0.126630499958992, + "learning_rate": 6.572606729997682e-07, + "loss": 0.001, + "step": 265510 + }, + { + "epoch": 1.7029628038084488, + "grad_norm": 0.006550406105816364, + "learning_rate": 6.569833082547217e-07, + "loss": 0.0013, + "step": 265520 + }, + { + "epoch": 1.7030269407022347, + "grad_norm": 0.13078109920024872, + "learning_rate": 6.567059979307788e-07, + "loss": 0.0019, + "step": 265530 + }, + { + "epoch": 1.7030910775960209, + "grad_norm": 0.0882938951253891, + "learning_rate": 6.564287420314142e-07, + "loss": 0.0005, + "step": 265540 + }, + { + "epoch": 1.703155214489807, + "grad_norm": 0.0037018307484686375, + "learning_rate": 6.561515405601021e-07, + "loss": 0.0038, + "step": 265550 + }, + { + "epoch": 1.7032193513835931, + "grad_norm": 0.015162280760705471, + "learning_rate": 6.558743935203138e-07, + "loss": 0.002, + "step": 265560 + }, + { + "epoch": 1.7032834882773793, + "grad_norm": 0.07943790405988693, + "learning_rate": 6.555973009155259e-07, + "loss": 0.0006, + "step": 265570 + }, + { + "epoch": 1.7033476251711654, + "grad_norm": 0.042144257575273514, + "learning_rate": 6.553202627492084e-07, + "loss": 0.0004, + "step": 265580 + }, + { + "epoch": 1.7034117620649516, + "grad_norm": 0.1977560967206955, + "learning_rate": 6.550432790248318e-07, + "loss": 0.0012, + "step": 265590 + }, + { + "epoch": 1.7034758989587375, + "grad_norm": 0.06872736662626266, + "learning_rate": 6.547663497458695e-07, + "loss": 0.0007, + "step": 265600 + }, + { + "epoch": 1.7035400358525237, + "grad_norm": 0.07133138179779053, + "learning_rate": 6.544894749157904e-07, + "loss": 0.0015, + "step": 265610 + }, + { + "epoch": 1.7036041727463096, + "grad_norm": 0.09437665343284607, + "learning_rate": 6.542126545380639e-07, + "loss": 0.001, + "step": 265620 + }, + { + "epoch": 1.7036683096400957, + "grad_norm": 0.09219174832105637, + "learning_rate": 6.539358886161573e-07, + "loss": 0.0007, + "step": 265630 + }, + { + "epoch": 1.703732446533882, + "grad_norm": 0.007705396506935358, + "learning_rate": 6.536591771535417e-07, + "loss": 0.006, + "step": 265640 + }, + { + "epoch": 1.703796583427668, + "grad_norm": 0.030093500390648842, + "learning_rate": 6.533825201536826e-07, + "loss": 0.0007, + "step": 265650 + }, + { + "epoch": 1.7038607203214542, + "grad_norm": 0.053052615374326706, + "learning_rate": 6.531059176200472e-07, + "loss": 0.0027, + "step": 265660 + }, + { + "epoch": 1.7039248572152403, + "grad_norm": 0.10145606845617294, + "learning_rate": 6.528293695560999e-07, + "loss": 0.0012, + "step": 265670 + }, + { + "epoch": 1.7039889941090263, + "grad_norm": 0.055144038051366806, + "learning_rate": 6.525528759653088e-07, + "loss": 0.0007, + "step": 265680 + }, + { + "epoch": 1.7040531310028124, + "grad_norm": 0.10749773681163788, + "learning_rate": 6.522764368511375e-07, + "loss": 0.001, + "step": 265690 + }, + { + "epoch": 1.7041172678965983, + "grad_norm": 0.1056133508682251, + "learning_rate": 6.520000522170488e-07, + "loss": 0.0021, + "step": 265700 + }, + { + "epoch": 1.7041814047903845, + "grad_norm": 0.034532222896814346, + "learning_rate": 6.517237220665068e-07, + "loss": 0.0008, + "step": 265710 + }, + { + "epoch": 1.7042455416841706, + "grad_norm": 0.15641459822654724, + "learning_rate": 6.514474464029752e-07, + "loss": 0.0014, + "step": 265720 + }, + { + "epoch": 1.7043096785779568, + "grad_norm": 0.010333506390452385, + "learning_rate": 6.511712252299146e-07, + "loss": 0.0007, + "step": 265730 + }, + { + "epoch": 1.704373815471743, + "grad_norm": 0.013161303475499153, + "learning_rate": 6.508950585507856e-07, + "loss": 0.0005, + "step": 265740 + }, + { + "epoch": 1.704437952365529, + "grad_norm": 0.06176147982478142, + "learning_rate": 6.50618946369051e-07, + "loss": 0.0014, + "step": 265750 + }, + { + "epoch": 1.7045020892593152, + "grad_norm": 0.12416189163923264, + "learning_rate": 6.50342888688169e-07, + "loss": 0.0013, + "step": 265760 + }, + { + "epoch": 1.7045662261531012, + "grad_norm": 0.03362249210476875, + "learning_rate": 6.500668855115999e-07, + "loss": 0.001, + "step": 265770 + }, + { + "epoch": 1.7046303630468873, + "grad_norm": 0.004374053794890642, + "learning_rate": 6.497909368428002e-07, + "loss": 0.0013, + "step": 265780 + }, + { + "epoch": 1.7046944999406732, + "grad_norm": 0.06230396404862404, + "learning_rate": 6.495150426852298e-07, + "loss": 0.0011, + "step": 265790 + }, + { + "epoch": 1.7047586368344594, + "grad_norm": 0.04657968878746033, + "learning_rate": 6.492392030423444e-07, + "loss": 0.0015, + "step": 265800 + }, + { + "epoch": 1.7048227737282455, + "grad_norm": 0.00979990791529417, + "learning_rate": 6.489634179176025e-07, + "loss": 0.0009, + "step": 265810 + }, + { + "epoch": 1.7048869106220317, + "grad_norm": 0.08935471624135971, + "learning_rate": 6.486876873144587e-07, + "loss": 0.001, + "step": 265820 + }, + { + "epoch": 1.7049510475158178, + "grad_norm": 0.021802430972456932, + "learning_rate": 6.484120112363667e-07, + "loss": 0.0007, + "step": 265830 + }, + { + "epoch": 1.705015184409604, + "grad_norm": 0.04074636101722717, + "learning_rate": 6.481363896867832e-07, + "loss": 0.0018, + "step": 265840 + }, + { + "epoch": 1.70507932130339, + "grad_norm": 0.04045892506837845, + "learning_rate": 6.478608226691613e-07, + "loss": 0.0017, + "step": 265850 + }, + { + "epoch": 1.705143458197176, + "grad_norm": 0.009271270595490932, + "learning_rate": 6.475853101869523e-07, + "loss": 0.0004, + "step": 265860 + }, + { + "epoch": 1.7052075950909622, + "grad_norm": 0.08048803359270096, + "learning_rate": 6.473098522436111e-07, + "loss": 0.0013, + "step": 265870 + }, + { + "epoch": 1.7052717319847481, + "grad_norm": 0.007005834486335516, + "learning_rate": 6.470344488425884e-07, + "loss": 0.0018, + "step": 265880 + }, + { + "epoch": 1.7053358688785343, + "grad_norm": 0.01865677908062935, + "learning_rate": 6.467590999873352e-07, + "loss": 0.0019, + "step": 265890 + }, + { + "epoch": 1.7054000057723204, + "grad_norm": 0.17345519363880157, + "learning_rate": 6.464838056813006e-07, + "loss": 0.0017, + "step": 265900 + }, + { + "epoch": 1.7054641426661066, + "grad_norm": 0.202212855219841, + "learning_rate": 6.46208565927936e-07, + "loss": 0.0011, + "step": 265910 + }, + { + "epoch": 1.7055282795598927, + "grad_norm": 0.008908011019229889, + "learning_rate": 6.4593338073069e-07, + "loss": 0.0007, + "step": 265920 + }, + { + "epoch": 1.7055924164536789, + "grad_norm": 0.2655397355556488, + "learning_rate": 6.456582500930103e-07, + "loss": 0.0007, + "step": 265930 + }, + { + "epoch": 1.7056565533474648, + "grad_norm": 0.2044960856437683, + "learning_rate": 6.453831740183441e-07, + "loss": 0.0018, + "step": 265940 + }, + { + "epoch": 1.705720690241251, + "grad_norm": 0.08434181660413742, + "learning_rate": 6.451081525101399e-07, + "loss": 0.0013, + "step": 265950 + }, + { + "epoch": 1.7057848271350369, + "grad_norm": 0.004985256120562553, + "learning_rate": 6.448331855718426e-07, + "loss": 0.0021, + "step": 265960 + }, + { + "epoch": 1.705848964028823, + "grad_norm": 0.10638520866632462, + "learning_rate": 6.445582732068984e-07, + "loss": 0.001, + "step": 265970 + }, + { + "epoch": 1.7059131009226092, + "grad_norm": 0.07057823240756989, + "learning_rate": 6.442834154187505e-07, + "loss": 0.0008, + "step": 265980 + }, + { + "epoch": 1.7059772378163953, + "grad_norm": 0.0023433228489011526, + "learning_rate": 6.440086122108458e-07, + "loss": 0.0055, + "step": 265990 + }, + { + "epoch": 1.7060413747101815, + "grad_norm": 0.08313106000423431, + "learning_rate": 6.437338635866258e-07, + "loss": 0.0012, + "step": 266000 + }, + { + "epoch": 1.7061055116039676, + "grad_norm": 0.06899221241474152, + "learning_rate": 6.434591695495335e-07, + "loss": 0.0014, + "step": 266010 + }, + { + "epoch": 1.7061696484977538, + "grad_norm": 0.19987016916275024, + "learning_rate": 6.431845301030115e-07, + "loss": 0.0015, + "step": 266020 + }, + { + "epoch": 1.7062337853915397, + "grad_norm": 0.11432585120201111, + "learning_rate": 6.429099452505023e-07, + "loss": 0.0018, + "step": 266030 + }, + { + "epoch": 1.7062979222853258, + "grad_norm": 0.13025084137916565, + "learning_rate": 6.426354149954445e-07, + "loss": 0.0008, + "step": 266040 + }, + { + "epoch": 1.7063620591791118, + "grad_norm": 0.09899530559778214, + "learning_rate": 6.423609393412783e-07, + "loss": 0.0008, + "step": 266050 + }, + { + "epoch": 1.706426196072898, + "grad_norm": 0.0024011684581637383, + "learning_rate": 6.420865182914454e-07, + "loss": 0.0006, + "step": 266060 + }, + { + "epoch": 1.706490332966684, + "grad_norm": 0.06698352843523026, + "learning_rate": 6.418121518493825e-07, + "loss": 0.0009, + "step": 266070 + }, + { + "epoch": 1.7065544698604702, + "grad_norm": 0.12909150123596191, + "learning_rate": 6.41537840018528e-07, + "loss": 0.0007, + "step": 266080 + }, + { + "epoch": 1.7066186067542564, + "grad_norm": 0.12435998022556305, + "learning_rate": 6.412635828023183e-07, + "loss": 0.0009, + "step": 266090 + }, + { + "epoch": 1.7066827436480425, + "grad_norm": 0.44490617513656616, + "learning_rate": 6.409893802041927e-07, + "loss": 0.0017, + "step": 266100 + }, + { + "epoch": 1.7067468805418284, + "grad_norm": 0.0475863553583622, + "learning_rate": 6.40715232227585e-07, + "loss": 0.0011, + "step": 266110 + }, + { + "epoch": 1.7068110174356146, + "grad_norm": 0.043053992092609406, + "learning_rate": 6.404411388759307e-07, + "loss": 0.0011, + "step": 266120 + }, + { + "epoch": 1.7068751543294005, + "grad_norm": 0.05306117981672287, + "learning_rate": 6.401671001526644e-07, + "loss": 0.0013, + "step": 266130 + }, + { + "epoch": 1.7069392912231867, + "grad_norm": 0.032787881791591644, + "learning_rate": 6.398931160612204e-07, + "loss": 0.0007, + "step": 266140 + }, + { + "epoch": 1.7070034281169728, + "grad_norm": 0.027778422459959984, + "learning_rate": 6.396191866050328e-07, + "loss": 0.001, + "step": 266150 + }, + { + "epoch": 1.707067565010759, + "grad_norm": 0.2105724811553955, + "learning_rate": 6.393453117875314e-07, + "loss": 0.0022, + "step": 266160 + }, + { + "epoch": 1.707131701904545, + "grad_norm": 0.03211251273751259, + "learning_rate": 6.390714916121505e-07, + "loss": 0.0006, + "step": 266170 + }, + { + "epoch": 1.7071958387983313, + "grad_norm": 0.0578240305185318, + "learning_rate": 6.387977260823214e-07, + "loss": 0.0004, + "step": 266180 + }, + { + "epoch": 1.7072599756921174, + "grad_norm": 0.07418222725391388, + "learning_rate": 6.385240152014732e-07, + "loss": 0.0009, + "step": 266190 + }, + { + "epoch": 1.7073241125859033, + "grad_norm": 0.09191425144672394, + "learning_rate": 6.382503589730349e-07, + "loss": 0.0011, + "step": 266200 + }, + { + "epoch": 1.7073882494796895, + "grad_norm": 0.051483865827322006, + "learning_rate": 6.37976757400438e-07, + "loss": 0.0007, + "step": 266210 + }, + { + "epoch": 1.7074523863734754, + "grad_norm": 0.05915215611457825, + "learning_rate": 6.377032104871101e-07, + "loss": 0.0013, + "step": 266220 + }, + { + "epoch": 1.7075165232672616, + "grad_norm": 0.01516183651983738, + "learning_rate": 6.374297182364786e-07, + "loss": 0.0012, + "step": 266230 + }, + { + "epoch": 1.7075806601610477, + "grad_norm": 0.04072074219584465, + "learning_rate": 6.371562806519699e-07, + "loss": 0.0007, + "step": 266240 + }, + { + "epoch": 1.7076447970548339, + "grad_norm": 0.11622676998376846, + "learning_rate": 6.368828977370117e-07, + "loss": 0.0012, + "step": 266250 + }, + { + "epoch": 1.70770893394862, + "grad_norm": 0.12206113338470459, + "learning_rate": 6.366095694950292e-07, + "loss": 0.0011, + "step": 266260 + }, + { + "epoch": 1.7077730708424061, + "grad_norm": 0.17869877815246582, + "learning_rate": 6.363362959294472e-07, + "loss": 0.0019, + "step": 266270 + }, + { + "epoch": 1.707837207736192, + "grad_norm": 0.11581534892320633, + "learning_rate": 6.360630770436887e-07, + "loss": 0.0009, + "step": 266280 + }, + { + "epoch": 1.7079013446299782, + "grad_norm": 0.04566968232393265, + "learning_rate": 6.357899128411804e-07, + "loss": 0.0005, + "step": 266290 + }, + { + "epoch": 1.7079654815237644, + "grad_norm": 0.011661666445434093, + "learning_rate": 6.355168033253428e-07, + "loss": 0.001, + "step": 266300 + }, + { + "epoch": 1.7080296184175503, + "grad_norm": 0.19823497533798218, + "learning_rate": 6.352437484995988e-07, + "loss": 0.0012, + "step": 266310 + }, + { + "epoch": 1.7080937553113364, + "grad_norm": 0.07741516083478928, + "learning_rate": 6.349707483673706e-07, + "loss": 0.0042, + "step": 266320 + }, + { + "epoch": 1.7081578922051226, + "grad_norm": 0.05639111250638962, + "learning_rate": 6.346978029320782e-07, + "loss": 0.0009, + "step": 266330 + }, + { + "epoch": 1.7082220290989087, + "grad_norm": 0.2746449112892151, + "learning_rate": 6.34424912197143e-07, + "loss": 0.0006, + "step": 266340 + }, + { + "epoch": 1.708286165992695, + "grad_norm": 0.08014754205942154, + "learning_rate": 6.341520761659819e-07, + "loss": 0.0014, + "step": 266350 + }, + { + "epoch": 1.708350302886481, + "grad_norm": 0.00769168371334672, + "learning_rate": 6.33879294842017e-07, + "loss": 0.001, + "step": 266360 + }, + { + "epoch": 1.708414439780267, + "grad_norm": 0.08942686766386032, + "learning_rate": 6.336065682286647e-07, + "loss": 0.0011, + "step": 266370 + }, + { + "epoch": 1.7084785766740531, + "grad_norm": 0.08934418857097626, + "learning_rate": 6.333338963293428e-07, + "loss": 0.0011, + "step": 266380 + }, + { + "epoch": 1.708542713567839, + "grad_norm": 0.004581431858241558, + "learning_rate": 6.330612791474666e-07, + "loss": 0.0012, + "step": 266390 + }, + { + "epoch": 1.7086068504616252, + "grad_norm": 0.0772194117307663, + "learning_rate": 6.327887166864549e-07, + "loss": 0.0013, + "step": 266400 + }, + { + "epoch": 1.7086709873554113, + "grad_norm": 0.0690506175160408, + "learning_rate": 6.325162089497223e-07, + "loss": 0.0028, + "step": 266410 + }, + { + "epoch": 1.7087351242491975, + "grad_norm": 0.04467339441180229, + "learning_rate": 6.322437559406825e-07, + "loss": 0.001, + "step": 266420 + }, + { + "epoch": 1.7087992611429836, + "grad_norm": 0.06026383116841316, + "learning_rate": 6.31971357662749e-07, + "loss": 0.0054, + "step": 266430 + }, + { + "epoch": 1.7088633980367698, + "grad_norm": 0.037136465311050415, + "learning_rate": 6.316990141193368e-07, + "loss": 0.0008, + "step": 266440 + }, + { + "epoch": 1.708927534930556, + "grad_norm": 0.08824778348207474, + "learning_rate": 6.314267253138584e-07, + "loss": 0.0009, + "step": 266450 + }, + { + "epoch": 1.7089916718243419, + "grad_norm": 0.029125427827239037, + "learning_rate": 6.311544912497241e-07, + "loss": 0.0007, + "step": 266460 + }, + { + "epoch": 1.709055808718128, + "grad_norm": 0.030814319849014282, + "learning_rate": 6.308823119303481e-07, + "loss": 0.0012, + "step": 266470 + }, + { + "epoch": 1.709119945611914, + "grad_norm": 0.0323149673640728, + "learning_rate": 6.306101873591386e-07, + "loss": 0.0013, + "step": 266480 + }, + { + "epoch": 1.7091840825057, + "grad_norm": 0.041041210293769836, + "learning_rate": 6.303381175395062e-07, + "loss": 0.0019, + "step": 266490 + }, + { + "epoch": 1.7092482193994862, + "grad_norm": 0.0773228108882904, + "learning_rate": 6.300661024748595e-07, + "loss": 0.0009, + "step": 266500 + }, + { + "epoch": 1.7093123562932724, + "grad_norm": 0.17675377428531647, + "learning_rate": 6.297941421686083e-07, + "loss": 0.0012, + "step": 266510 + }, + { + "epoch": 1.7093764931870585, + "grad_norm": 0.02773331105709076, + "learning_rate": 6.295222366241599e-07, + "loss": 0.0012, + "step": 266520 + }, + { + "epoch": 1.7094406300808447, + "grad_norm": 0.0747687891125679, + "learning_rate": 6.292503858449217e-07, + "loss": 0.0009, + "step": 266530 + }, + { + "epoch": 1.7095047669746306, + "grad_norm": 0.272279292345047, + "learning_rate": 6.289785898342987e-07, + "loss": 0.0025, + "step": 266540 + }, + { + "epoch": 1.7095689038684168, + "grad_norm": 0.011230595409870148, + "learning_rate": 6.287068485956988e-07, + "loss": 0.001, + "step": 266550 + }, + { + "epoch": 1.7096330407622027, + "grad_norm": 0.08038709312677383, + "learning_rate": 6.284351621325263e-07, + "loss": 0.0011, + "step": 266560 + }, + { + "epoch": 1.7096971776559888, + "grad_norm": 0.03474096208810806, + "learning_rate": 6.281635304481847e-07, + "loss": 0.0008, + "step": 266570 + }, + { + "epoch": 1.709761314549775, + "grad_norm": 0.10244005173444748, + "learning_rate": 6.278919535460781e-07, + "loss": 0.0007, + "step": 266580 + }, + { + "epoch": 1.7098254514435611, + "grad_norm": 0.06485792249441147, + "learning_rate": 6.276204314296113e-07, + "loss": 0.0006, + "step": 266590 + }, + { + "epoch": 1.7098895883373473, + "grad_norm": 0.022783532738685608, + "learning_rate": 6.273489641021857e-07, + "loss": 0.0011, + "step": 266600 + }, + { + "epoch": 1.7099537252311334, + "grad_norm": 0.005095191765576601, + "learning_rate": 6.270775515672023e-07, + "loss": 0.0009, + "step": 266610 + }, + { + "epoch": 1.7100178621249196, + "grad_norm": 0.082610122859478, + "learning_rate": 6.268061938280612e-07, + "loss": 0.0012, + "step": 266620 + }, + { + "epoch": 1.7100819990187055, + "grad_norm": 0.049736011773347855, + "learning_rate": 6.265348908881658e-07, + "loss": 0.0011, + "step": 266630 + }, + { + "epoch": 1.7101461359124916, + "grad_norm": 0.06456831097602844, + "learning_rate": 6.262636427509133e-07, + "loss": 0.0005, + "step": 266640 + }, + { + "epoch": 1.7102102728062776, + "grad_norm": 0.09709274023771286, + "learning_rate": 6.259924494197034e-07, + "loss": 0.0022, + "step": 266650 + }, + { + "epoch": 1.7102744097000637, + "grad_norm": 0.0323229655623436, + "learning_rate": 6.257213108979326e-07, + "loss": 0.0009, + "step": 266660 + }, + { + "epoch": 1.7103385465938499, + "grad_norm": 0.11305923014879227, + "learning_rate": 6.254502271890017e-07, + "loss": 0.0006, + "step": 266670 + }, + { + "epoch": 1.710402683487636, + "grad_norm": 0.060932356864213943, + "learning_rate": 6.251791982963057e-07, + "loss": 0.0012, + "step": 266680 + }, + { + "epoch": 1.7104668203814222, + "grad_norm": 0.10568568855524063, + "learning_rate": 6.249082242232407e-07, + "loss": 0.0011, + "step": 266690 + }, + { + "epoch": 1.7105309572752083, + "grad_norm": 0.007015223614871502, + "learning_rate": 6.246373049732018e-07, + "loss": 0.0005, + "step": 266700 + }, + { + "epoch": 1.7105950941689945, + "grad_norm": 0.05034947395324707, + "learning_rate": 6.243664405495858e-07, + "loss": 0.0032, + "step": 266710 + }, + { + "epoch": 1.7106592310627804, + "grad_norm": 0.17234694957733154, + "learning_rate": 6.24095630955785e-07, + "loss": 0.0012, + "step": 266720 + }, + { + "epoch": 1.7107233679565665, + "grad_norm": 0.06869015097618103, + "learning_rate": 6.238248761951926e-07, + "loss": 0.0014, + "step": 266730 + }, + { + "epoch": 1.7107875048503525, + "grad_norm": 0.04930241405963898, + "learning_rate": 6.235541762712027e-07, + "loss": 0.001, + "step": 266740 + }, + { + "epoch": 1.7108516417441386, + "grad_norm": 0.0738460049033165, + "learning_rate": 6.232835311872071e-07, + "loss": 0.0004, + "step": 266750 + }, + { + "epoch": 1.7109157786379248, + "grad_norm": 0.08131708949804306, + "learning_rate": 6.230129409465968e-07, + "loss": 0.0018, + "step": 266760 + }, + { + "epoch": 1.710979915531711, + "grad_norm": 0.12405913323163986, + "learning_rate": 6.22742405552762e-07, + "loss": 0.0013, + "step": 266770 + }, + { + "epoch": 1.711044052425497, + "grad_norm": 0.039367906749248505, + "learning_rate": 6.224719250090938e-07, + "loss": 0.0011, + "step": 266780 + }, + { + "epoch": 1.7111081893192832, + "grad_norm": 0.1633806675672531, + "learning_rate": 6.222014993189806e-07, + "loss": 0.001, + "step": 266790 + }, + { + "epoch": 1.7111723262130691, + "grad_norm": 0.008292242884635925, + "learning_rate": 6.21931128485812e-07, + "loss": 0.0006, + "step": 266800 + }, + { + "epoch": 1.7112364631068553, + "grad_norm": 0.01935173198580742, + "learning_rate": 6.216608125129741e-07, + "loss": 0.0007, + "step": 266810 + }, + { + "epoch": 1.7113006000006412, + "grad_norm": 0.013553868047893047, + "learning_rate": 6.213905514038559e-07, + "loss": 0.0011, + "step": 266820 + }, + { + "epoch": 1.7113647368944274, + "grad_norm": 0.2309228479862213, + "learning_rate": 6.211203451618441e-07, + "loss": 0.0022, + "step": 266830 + }, + { + "epoch": 1.7114288737882135, + "grad_norm": 0.4577714204788208, + "learning_rate": 6.208501937903227e-07, + "loss": 0.0027, + "step": 266840 + }, + { + "epoch": 1.7114930106819997, + "grad_norm": 0.0713023692369461, + "learning_rate": 6.205800972926789e-07, + "loss": 0.0013, + "step": 266850 + }, + { + "epoch": 1.7115571475757858, + "grad_norm": 0.02272789739072323, + "learning_rate": 6.203100556722969e-07, + "loss": 0.0006, + "step": 266860 + }, + { + "epoch": 1.711621284469572, + "grad_norm": 0.06002601981163025, + "learning_rate": 6.200400689325597e-07, + "loss": 0.0011, + "step": 266870 + }, + { + "epoch": 1.711685421363358, + "grad_norm": 0.13648687303066254, + "learning_rate": 6.197701370768494e-07, + "loss": 0.0012, + "step": 266880 + }, + { + "epoch": 1.711749558257144, + "grad_norm": 0.05618472024798393, + "learning_rate": 6.195002601085515e-07, + "loss": 0.0006, + "step": 266890 + }, + { + "epoch": 1.7118136951509302, + "grad_norm": 0.02872016280889511, + "learning_rate": 6.192304380310455e-07, + "loss": 0.001, + "step": 266900 + }, + { + "epoch": 1.711877832044716, + "grad_norm": 0.1917131394147873, + "learning_rate": 6.189606708477131e-07, + "loss": 0.001, + "step": 266910 + }, + { + "epoch": 1.7119419689385023, + "grad_norm": 0.009861587546765804, + "learning_rate": 6.186909585619339e-07, + "loss": 0.0018, + "step": 266920 + }, + { + "epoch": 1.7120061058322884, + "grad_norm": 0.05207042768597603, + "learning_rate": 6.18421301177089e-07, + "loss": 0.0008, + "step": 266930 + }, + { + "epoch": 1.7120702427260746, + "grad_norm": 0.0531453862786293, + "learning_rate": 6.181516986965569e-07, + "loss": 0.0008, + "step": 266940 + }, + { + "epoch": 1.7121343796198607, + "grad_norm": 0.06557393819093704, + "learning_rate": 6.178821511237154e-07, + "loss": 0.0015, + "step": 266950 + }, + { + "epoch": 1.7121985165136469, + "grad_norm": 0.06830243021249771, + "learning_rate": 6.176126584619413e-07, + "loss": 0.0015, + "step": 266960 + }, + { + "epoch": 1.7122626534074328, + "grad_norm": 0.1856570839881897, + "learning_rate": 6.173432207146135e-07, + "loss": 0.0022, + "step": 266970 + }, + { + "epoch": 1.712326790301219, + "grad_norm": 0.05492860823869705, + "learning_rate": 6.170738378851077e-07, + "loss": 0.0011, + "step": 266980 + }, + { + "epoch": 1.7123909271950049, + "grad_norm": 0.008831695653498173, + "learning_rate": 6.168045099767983e-07, + "loss": 0.0009, + "step": 266990 + }, + { + "epoch": 1.712455064088791, + "grad_norm": 0.0019844414200633764, + "learning_rate": 6.165352369930616e-07, + "loss": 0.0007, + "step": 267000 + }, + { + "epoch": 1.7125192009825771, + "grad_norm": 0.20661035180091858, + "learning_rate": 6.162660189372716e-07, + "loss": 0.0016, + "step": 267010 + }, + { + "epoch": 1.7125833378763633, + "grad_norm": 0.13168048858642578, + "learning_rate": 6.15996855812801e-07, + "loss": 0.0011, + "step": 267020 + }, + { + "epoch": 1.7126474747701494, + "grad_norm": 0.04329398646950722, + "learning_rate": 6.157277476230222e-07, + "loss": 0.0008, + "step": 267030 + }, + { + "epoch": 1.7127116116639356, + "grad_norm": 0.08694454282522202, + "learning_rate": 6.154586943713092e-07, + "loss": 0.001, + "step": 267040 + }, + { + "epoch": 1.7127757485577217, + "grad_norm": 0.06795962154865265, + "learning_rate": 6.151896960610326e-07, + "loss": 0.0009, + "step": 267050 + }, + { + "epoch": 1.7128398854515077, + "grad_norm": 0.13790574669837952, + "learning_rate": 6.149207526955625e-07, + "loss": 0.0011, + "step": 267060 + }, + { + "epoch": 1.7129040223452938, + "grad_norm": 0.052158210426568985, + "learning_rate": 6.146518642782684e-07, + "loss": 0.0023, + "step": 267070 + }, + { + "epoch": 1.7129681592390797, + "grad_norm": 0.015230941586196423, + "learning_rate": 6.143830308125215e-07, + "loss": 0.0215, + "step": 267080 + }, + { + "epoch": 1.713032296132866, + "grad_norm": 0.19992482662200928, + "learning_rate": 6.141142523016896e-07, + "loss": 0.0025, + "step": 267090 + }, + { + "epoch": 1.713096433026652, + "grad_norm": 0.005246074870228767, + "learning_rate": 6.138455287491408e-07, + "loss": 0.0004, + "step": 267100 + }, + { + "epoch": 1.7131605699204382, + "grad_norm": 0.10086475312709808, + "learning_rate": 6.13576860158241e-07, + "loss": 0.0011, + "step": 267110 + }, + { + "epoch": 1.7132247068142243, + "grad_norm": 0.0043176934123039246, + "learning_rate": 6.133082465323592e-07, + "loss": 0.0012, + "step": 267120 + }, + { + "epoch": 1.7132888437080105, + "grad_norm": 0.2594849169254303, + "learning_rate": 6.130396878748607e-07, + "loss": 0.001, + "step": 267130 + }, + { + "epoch": 1.7133529806017966, + "grad_norm": 0.03310069814324379, + "learning_rate": 6.127711841891087e-07, + "loss": 0.0008, + "step": 267140 + }, + { + "epoch": 1.7134171174955826, + "grad_norm": 0.06937534362077713, + "learning_rate": 6.125027354784702e-07, + "loss": 0.0014, + "step": 267150 + }, + { + "epoch": 1.7134812543893687, + "grad_norm": 0.04508813098073006, + "learning_rate": 6.122343417463083e-07, + "loss": 0.0009, + "step": 267160 + }, + { + "epoch": 1.7135453912831546, + "grad_norm": 0.06019587814807892, + "learning_rate": 6.119660029959862e-07, + "loss": 0.0013, + "step": 267170 + }, + { + "epoch": 1.7136095281769408, + "grad_norm": 0.01498024258762598, + "learning_rate": 6.116977192308649e-07, + "loss": 0.0005, + "step": 267180 + }, + { + "epoch": 1.713673665070727, + "grad_norm": 0.010349423624575138, + "learning_rate": 6.114294904543084e-07, + "loss": 0.0013, + "step": 267190 + }, + { + "epoch": 1.713737801964513, + "grad_norm": 0.09700113534927368, + "learning_rate": 6.111613166696767e-07, + "loss": 0.001, + "step": 267200 + }, + { + "epoch": 1.7138019388582992, + "grad_norm": 0.22153259813785553, + "learning_rate": 6.108931978803306e-07, + "loss": 0.0022, + "step": 267210 + }, + { + "epoch": 1.7138660757520854, + "grad_norm": 0.011544063687324524, + "learning_rate": 6.106251340896285e-07, + "loss": 0.0016, + "step": 267220 + }, + { + "epoch": 1.7139302126458713, + "grad_norm": 0.04484653100371361, + "learning_rate": 6.103571253009316e-07, + "loss": 0.0007, + "step": 267230 + }, + { + "epoch": 1.7139943495396575, + "grad_norm": 0.15743902325630188, + "learning_rate": 6.100891715175972e-07, + "loss": 0.0011, + "step": 267240 + }, + { + "epoch": 1.7140584864334434, + "grad_norm": 0.034544993191957474, + "learning_rate": 6.098212727429825e-07, + "loss": 0.0007, + "step": 267250 + }, + { + "epoch": 1.7141226233272295, + "grad_norm": 0.058348409831523895, + "learning_rate": 6.095534289804444e-07, + "loss": 0.0016, + "step": 267260 + }, + { + "epoch": 1.7141867602210157, + "grad_norm": 0.10489243268966675, + "learning_rate": 6.092856402333403e-07, + "loss": 0.001, + "step": 267270 + }, + { + "epoch": 1.7142508971148018, + "grad_norm": 0.02318517304956913, + "learning_rate": 6.090179065050256e-07, + "loss": 0.0008, + "step": 267280 + }, + { + "epoch": 1.714315034008588, + "grad_norm": 0.06452568620443344, + "learning_rate": 6.087502277988533e-07, + "loss": 0.001, + "step": 267290 + }, + { + "epoch": 1.7143791709023741, + "grad_norm": 0.05898898467421532, + "learning_rate": 6.084826041181796e-07, + "loss": 0.0013, + "step": 267300 + }, + { + "epoch": 1.7144433077961603, + "grad_norm": 0.046029724180698395, + "learning_rate": 6.08215035466358e-07, + "loss": 0.0013, + "step": 267310 + }, + { + "epoch": 1.7145074446899462, + "grad_norm": 0.05649979040026665, + "learning_rate": 6.079475218467407e-07, + "loss": 0.0009, + "step": 267320 + }, + { + "epoch": 1.7145715815837324, + "grad_norm": 0.09914553165435791, + "learning_rate": 6.076800632626789e-07, + "loss": 0.0007, + "step": 267330 + }, + { + "epoch": 1.7146357184775183, + "grad_norm": 0.1583593189716339, + "learning_rate": 6.074126597175251e-07, + "loss": 0.001, + "step": 267340 + }, + { + "epoch": 1.7146998553713044, + "grad_norm": 0.00976317748427391, + "learning_rate": 6.071453112146309e-07, + "loss": 0.0004, + "step": 267350 + }, + { + "epoch": 1.7147639922650906, + "grad_norm": 0.0697932094335556, + "learning_rate": 6.068780177573458e-07, + "loss": 0.0007, + "step": 267360 + }, + { + "epoch": 1.7148281291588767, + "grad_norm": 0.0736718475818634, + "learning_rate": 6.066107793490183e-07, + "loss": 0.0013, + "step": 267370 + }, + { + "epoch": 1.7148922660526629, + "grad_norm": 0.037221480160951614, + "learning_rate": 6.063435959929969e-07, + "loss": 0.0011, + "step": 267380 + }, + { + "epoch": 1.714956402946449, + "grad_norm": 0.0539449043571949, + "learning_rate": 6.060764676926317e-07, + "loss": 0.0009, + "step": 267390 + }, + { + "epoch": 1.715020539840235, + "grad_norm": 0.10681122541427612, + "learning_rate": 6.058093944512683e-07, + "loss": 0.0021, + "step": 267400 + }, + { + "epoch": 1.715084676734021, + "grad_norm": 0.06330878287553787, + "learning_rate": 6.055423762722523e-07, + "loss": 0.0023, + "step": 267410 + }, + { + "epoch": 1.7151488136278072, + "grad_norm": 0.0955391600728035, + "learning_rate": 6.052754131589328e-07, + "loss": 0.0008, + "step": 267420 + }, + { + "epoch": 1.7152129505215932, + "grad_norm": 0.20529933273792267, + "learning_rate": 6.050085051146525e-07, + "loss": 0.0021, + "step": 267430 + }, + { + "epoch": 1.7152770874153793, + "grad_norm": 0.08644850552082062, + "learning_rate": 6.047416521427574e-07, + "loss": 0.0009, + "step": 267440 + }, + { + "epoch": 1.7153412243091655, + "grad_norm": 0.020164771005511284, + "learning_rate": 6.044748542465889e-07, + "loss": 0.004, + "step": 267450 + }, + { + "epoch": 1.7154053612029516, + "grad_norm": 0.061093926429748535, + "learning_rate": 6.042081114294934e-07, + "loss": 0.0007, + "step": 267460 + }, + { + "epoch": 1.7154694980967378, + "grad_norm": 0.052653685212135315, + "learning_rate": 6.039414236948116e-07, + "loss": 0.0014, + "step": 267470 + }, + { + "epoch": 1.715533634990524, + "grad_norm": 0.13485048711299896, + "learning_rate": 6.036747910458856e-07, + "loss": 0.0008, + "step": 267480 + }, + { + "epoch": 1.7155977718843098, + "grad_norm": 0.11048126965761185, + "learning_rate": 6.034082134860558e-07, + "loss": 0.0018, + "step": 267490 + }, + { + "epoch": 1.715661908778096, + "grad_norm": 0.19369938969612122, + "learning_rate": 6.031416910186644e-07, + "loss": 0.001, + "step": 267500 + }, + { + "epoch": 1.715726045671882, + "grad_norm": 0.026004096493124962, + "learning_rate": 6.028752236470492e-07, + "loss": 0.0007, + "step": 267510 + }, + { + "epoch": 1.715790182565668, + "grad_norm": 0.1416434645652771, + "learning_rate": 6.026088113745509e-07, + "loss": 0.0009, + "step": 267520 + }, + { + "epoch": 1.7158543194594542, + "grad_norm": 0.02691793628036976, + "learning_rate": 6.023424542045053e-07, + "loss": 0.0009, + "step": 267530 + }, + { + "epoch": 1.7159184563532404, + "grad_norm": 0.08257123082876205, + "learning_rate": 6.020761521402529e-07, + "loss": 0.0013, + "step": 267540 + }, + { + "epoch": 1.7159825932470265, + "grad_norm": 0.0846862643957138, + "learning_rate": 6.018099051851295e-07, + "loss": 0.0005, + "step": 267550 + }, + { + "epoch": 1.7160467301408127, + "grad_norm": 0.07328194379806519, + "learning_rate": 6.015437133424706e-07, + "loss": 0.001, + "step": 267560 + }, + { + "epoch": 1.7161108670345988, + "grad_norm": 0.06155369058251381, + "learning_rate": 6.012775766156131e-07, + "loss": 0.0028, + "step": 267570 + }, + { + "epoch": 1.7161750039283847, + "grad_norm": 0.07231937348842621, + "learning_rate": 6.010114950078916e-07, + "loss": 0.0016, + "step": 267580 + }, + { + "epoch": 1.7162391408221709, + "grad_norm": 0.05894768238067627, + "learning_rate": 6.0074546852264e-07, + "loss": 0.0017, + "step": 267590 + }, + { + "epoch": 1.7163032777159568, + "grad_norm": 0.003771646646782756, + "learning_rate": 6.004794971631905e-07, + "loss": 0.002, + "step": 267600 + }, + { + "epoch": 1.716367414609743, + "grad_norm": 0.03629041835665703, + "learning_rate": 6.002135809328785e-07, + "loss": 0.0017, + "step": 267610 + }, + { + "epoch": 1.716431551503529, + "grad_norm": 0.04036949202418327, + "learning_rate": 5.999477198350346e-07, + "loss": 0.0006, + "step": 267620 + }, + { + "epoch": 1.7164956883973153, + "grad_norm": 0.06391513347625732, + "learning_rate": 5.996819138729904e-07, + "loss": 0.0015, + "step": 267630 + }, + { + "epoch": 1.7165598252911014, + "grad_norm": 0.03568914532661438, + "learning_rate": 5.994161630500755e-07, + "loss": 0.0014, + "step": 267640 + }, + { + "epoch": 1.7166239621848876, + "grad_norm": 0.08988387137651443, + "learning_rate": 5.991504673696224e-07, + "loss": 0.0013, + "step": 267650 + }, + { + "epoch": 1.7166880990786735, + "grad_norm": 0.04272187128663063, + "learning_rate": 5.988848268349589e-07, + "loss": 0.0015, + "step": 267660 + }, + { + "epoch": 1.7167522359724596, + "grad_norm": 0.11087754368782043, + "learning_rate": 5.986192414494146e-07, + "loss": 0.0009, + "step": 267670 + }, + { + "epoch": 1.7168163728662456, + "grad_norm": 0.11257560551166534, + "learning_rate": 5.983537112163151e-07, + "loss": 0.0028, + "step": 267680 + }, + { + "epoch": 1.7168805097600317, + "grad_norm": 0.12729914486408234, + "learning_rate": 5.980882361389906e-07, + "loss": 0.0004, + "step": 267690 + }, + { + "epoch": 1.7169446466538179, + "grad_norm": 0.09516214579343796, + "learning_rate": 5.978228162207666e-07, + "loss": 0.0033, + "step": 267700 + }, + { + "epoch": 1.717008783547604, + "grad_norm": 0.0547090545296669, + "learning_rate": 5.975574514649679e-07, + "loss": 0.0009, + "step": 267710 + }, + { + "epoch": 1.7170729204413901, + "grad_norm": 0.20721198618412018, + "learning_rate": 5.972921418749222e-07, + "loss": 0.0018, + "step": 267720 + }, + { + "epoch": 1.7171370573351763, + "grad_norm": 0.16879446804523468, + "learning_rate": 5.97026887453952e-07, + "loss": 0.0015, + "step": 267730 + }, + { + "epoch": 1.7172011942289624, + "grad_norm": 0.02988448366522789, + "learning_rate": 5.967616882053818e-07, + "loss": 0.0008, + "step": 267740 + }, + { + "epoch": 1.7172653311227484, + "grad_norm": 0.033318206667900085, + "learning_rate": 5.964965441325332e-07, + "loss": 0.0038, + "step": 267750 + }, + { + "epoch": 1.7173294680165345, + "grad_norm": 0.07636163383722305, + "learning_rate": 5.962314552387317e-07, + "loss": 0.0009, + "step": 267760 + }, + { + "epoch": 1.7173936049103204, + "grad_norm": 0.04419083893299103, + "learning_rate": 5.95966421527297e-07, + "loss": 0.0006, + "step": 267770 + }, + { + "epoch": 1.7174577418041066, + "grad_norm": 0.009929543361067772, + "learning_rate": 5.957014430015506e-07, + "loss": 0.0008, + "step": 267780 + }, + { + "epoch": 1.7175218786978927, + "grad_norm": 0.07677434384822845, + "learning_rate": 5.954365196648116e-07, + "loss": 0.0091, + "step": 267790 + }, + { + "epoch": 1.717586015591679, + "grad_norm": 0.042411331087350845, + "learning_rate": 5.95171651520402e-07, + "loss": 0.0013, + "step": 267800 + }, + { + "epoch": 1.717650152485465, + "grad_norm": 0.036884430795907974, + "learning_rate": 5.9490683857164e-07, + "loss": 0.0022, + "step": 267810 + }, + { + "epoch": 1.7177142893792512, + "grad_norm": 0.19136400520801544, + "learning_rate": 5.946420808218434e-07, + "loss": 0.0015, + "step": 267820 + }, + { + "epoch": 1.7177784262730371, + "grad_norm": 0.04579498991370201, + "learning_rate": 5.94377378274329e-07, + "loss": 0.0008, + "step": 267830 + }, + { + "epoch": 1.7178425631668233, + "grad_norm": 0.020937394350767136, + "learning_rate": 5.941127309324157e-07, + "loss": 0.0007, + "step": 267840 + }, + { + "epoch": 1.7179067000606094, + "grad_norm": 0.06400826573371887, + "learning_rate": 5.938481387994189e-07, + "loss": 0.001, + "step": 267850 + }, + { + "epoch": 1.7179708369543953, + "grad_norm": 0.23463304340839386, + "learning_rate": 5.935836018786523e-07, + "loss": 0.002, + "step": 267860 + }, + { + "epoch": 1.7180349738481815, + "grad_norm": 0.048746898770332336, + "learning_rate": 5.933191201734339e-07, + "loss": 0.0003, + "step": 267870 + }, + { + "epoch": 1.7180991107419676, + "grad_norm": 0.1165740042924881, + "learning_rate": 5.930546936870762e-07, + "loss": 0.0009, + "step": 267880 + }, + { + "epoch": 1.7181632476357538, + "grad_norm": 0.06149594485759735, + "learning_rate": 5.927903224228932e-07, + "loss": 0.0016, + "step": 267890 + }, + { + "epoch": 1.71822738452954, + "grad_norm": 0.09030690044164658, + "learning_rate": 5.925260063841959e-07, + "loss": 0.0011, + "step": 267900 + }, + { + "epoch": 1.718291521423326, + "grad_norm": 0.07678499817848206, + "learning_rate": 5.922617455742985e-07, + "loss": 0.0011, + "step": 267910 + }, + { + "epoch": 1.718355658317112, + "grad_norm": 0.18253855407238007, + "learning_rate": 5.919975399965117e-07, + "loss": 0.0009, + "step": 267920 + }, + { + "epoch": 1.7184197952108982, + "grad_norm": 0.023488642647862434, + "learning_rate": 5.917333896541466e-07, + "loss": 0.001, + "step": 267930 + }, + { + "epoch": 1.718483932104684, + "grad_norm": 0.05363985523581505, + "learning_rate": 5.914692945505112e-07, + "loss": 0.0006, + "step": 267940 + }, + { + "epoch": 1.7185480689984702, + "grad_norm": 0.03072049282491207, + "learning_rate": 5.912052546889175e-07, + "loss": 0.0012, + "step": 267950 + }, + { + "epoch": 1.7186122058922564, + "grad_norm": 0.12611393630504608, + "learning_rate": 5.909412700726725e-07, + "loss": 0.0013, + "step": 267960 + }, + { + "epoch": 1.7186763427860425, + "grad_norm": 0.04329914599657059, + "learning_rate": 5.906773407050847e-07, + "loss": 0.0017, + "step": 267970 + }, + { + "epoch": 1.7187404796798287, + "grad_norm": 0.05544757843017578, + "learning_rate": 5.904134665894601e-07, + "loss": 0.0013, + "step": 267980 + }, + { + "epoch": 1.7188046165736148, + "grad_norm": 0.0072180950082838535, + "learning_rate": 5.901496477291069e-07, + "loss": 0.0008, + "step": 267990 + }, + { + "epoch": 1.718868753467401, + "grad_norm": 0.055339131504297256, + "learning_rate": 5.898858841273309e-07, + "loss": 0.0004, + "step": 268000 + }, + { + "epoch": 1.718932890361187, + "grad_norm": 0.05021030455827713, + "learning_rate": 5.896221757874354e-07, + "loss": 0.0012, + "step": 268010 + }, + { + "epoch": 1.718997027254973, + "grad_norm": 0.20326822996139526, + "learning_rate": 5.893585227127269e-07, + "loss": 0.0009, + "step": 268020 + }, + { + "epoch": 1.719061164148759, + "grad_norm": 0.0853961855173111, + "learning_rate": 5.890949249065086e-07, + "loss": 0.0007, + "step": 268030 + }, + { + "epoch": 1.7191253010425451, + "grad_norm": 0.06888889521360397, + "learning_rate": 5.888313823720831e-07, + "loss": 0.0015, + "step": 268040 + }, + { + "epoch": 1.7191894379363313, + "grad_norm": 0.11359435319900513, + "learning_rate": 5.885678951127521e-07, + "loss": 0.0007, + "step": 268050 + }, + { + "epoch": 1.7192535748301174, + "grad_norm": 0.06066417321562767, + "learning_rate": 5.883044631318191e-07, + "loss": 0.0015, + "step": 268060 + }, + { + "epoch": 1.7193177117239036, + "grad_norm": 0.034054309129714966, + "learning_rate": 5.880410864325847e-07, + "loss": 0.0017, + "step": 268070 + }, + { + "epoch": 1.7193818486176897, + "grad_norm": 0.04914059489965439, + "learning_rate": 5.87777765018348e-07, + "loss": 0.0011, + "step": 268080 + }, + { + "epoch": 1.7194459855114756, + "grad_norm": 0.0036719120107591152, + "learning_rate": 5.875144988924087e-07, + "loss": 0.001, + "step": 268090 + }, + { + "epoch": 1.7195101224052618, + "grad_norm": 0.023349685594439507, + "learning_rate": 5.872512880580661e-07, + "loss": 0.0008, + "step": 268100 + }, + { + "epoch": 1.7195742592990477, + "grad_norm": 0.03897149860858917, + "learning_rate": 5.869881325186194e-07, + "loss": 0.0011, + "step": 268110 + }, + { + "epoch": 1.7196383961928339, + "grad_norm": 0.07962550222873688, + "learning_rate": 5.867250322773655e-07, + "loss": 0.0008, + "step": 268120 + }, + { + "epoch": 1.71970253308662, + "grad_norm": 0.055032357573509216, + "learning_rate": 5.864619873376004e-07, + "loss": 0.0006, + "step": 268130 + }, + { + "epoch": 1.7197666699804062, + "grad_norm": 0.06639142334461212, + "learning_rate": 5.861989977026211e-07, + "loss": 0.0008, + "step": 268140 + }, + { + "epoch": 1.7198308068741923, + "grad_norm": 0.006149010267108679, + "learning_rate": 5.859360633757238e-07, + "loss": 0.0017, + "step": 268150 + }, + { + "epoch": 1.7198949437679785, + "grad_norm": 0.0026477861683815718, + "learning_rate": 5.856731843602015e-07, + "loss": 0.0007, + "step": 268160 + }, + { + "epoch": 1.7199590806617646, + "grad_norm": 0.15994895994663239, + "learning_rate": 5.854103606593487e-07, + "loss": 0.0016, + "step": 268170 + }, + { + "epoch": 1.7200232175555505, + "grad_norm": 0.05148687586188316, + "learning_rate": 5.851475922764599e-07, + "loss": 0.0011, + "step": 268180 + }, + { + "epoch": 1.7200873544493367, + "grad_norm": 0.03837016224861145, + "learning_rate": 5.84884879214827e-07, + "loss": 0.0008, + "step": 268190 + }, + { + "epoch": 1.7201514913431226, + "grad_norm": 0.14075350761413574, + "learning_rate": 5.84622221477742e-07, + "loss": 0.0011, + "step": 268200 + }, + { + "epoch": 1.7202156282369088, + "grad_norm": 0.15034602582454681, + "learning_rate": 5.843596190684952e-07, + "loss": 0.0012, + "step": 268210 + }, + { + "epoch": 1.720279765130695, + "grad_norm": 0.03877944499254227, + "learning_rate": 5.840970719903788e-07, + "loss": 0.0013, + "step": 268220 + }, + { + "epoch": 1.720343902024481, + "grad_norm": 0.02334008365869522, + "learning_rate": 5.838345802466827e-07, + "loss": 0.001, + "step": 268230 + }, + { + "epoch": 1.7204080389182672, + "grad_norm": 0.11830996721982956, + "learning_rate": 5.835721438406955e-07, + "loss": 0.0009, + "step": 268240 + }, + { + "epoch": 1.7204721758120534, + "grad_norm": 0.03914085403084755, + "learning_rate": 5.833097627757039e-07, + "loss": 0.0021, + "step": 268250 + }, + { + "epoch": 1.7205363127058393, + "grad_norm": 0.11923909187316895, + "learning_rate": 5.830474370549993e-07, + "loss": 0.001, + "step": 268260 + }, + { + "epoch": 1.7206004495996254, + "grad_norm": 0.03597554564476013, + "learning_rate": 5.827851666818668e-07, + "loss": 0.0013, + "step": 268270 + }, + { + "epoch": 1.7206645864934116, + "grad_norm": 0.019106004387140274, + "learning_rate": 5.82522951659592e-07, + "loss": 0.0005, + "step": 268280 + }, + { + "epoch": 1.7207287233871975, + "grad_norm": 0.054433081299066544, + "learning_rate": 5.822607919914631e-07, + "loss": 0.0012, + "step": 268290 + }, + { + "epoch": 1.7207928602809837, + "grad_norm": 0.11670947819948196, + "learning_rate": 5.819986876807638e-07, + "loss": 0.0014, + "step": 268300 + }, + { + "epoch": 1.7208569971747698, + "grad_norm": 0.001161582418717444, + "learning_rate": 5.817366387307782e-07, + "loss": 0.0014, + "step": 268310 + }, + { + "epoch": 1.720921134068556, + "grad_norm": 0.05353518947958946, + "learning_rate": 5.814746451447889e-07, + "loss": 0.0007, + "step": 268320 + }, + { + "epoch": 1.720985270962342, + "grad_norm": 0.0915118157863617, + "learning_rate": 5.812127069260814e-07, + "loss": 0.001, + "step": 268330 + }, + { + "epoch": 1.7210494078561283, + "grad_norm": 0.03967313468456268, + "learning_rate": 5.809508240779371e-07, + "loss": 0.0019, + "step": 268340 + }, + { + "epoch": 1.7211135447499142, + "grad_norm": 0.010144086554646492, + "learning_rate": 5.806889966036372e-07, + "loss": 0.0008, + "step": 268350 + }, + { + "epoch": 1.7211776816437003, + "grad_norm": 0.0049812509678304195, + "learning_rate": 5.804272245064613e-07, + "loss": 0.0009, + "step": 268360 + }, + { + "epoch": 1.7212418185374863, + "grad_norm": 0.004335071891546249, + "learning_rate": 5.801655077896917e-07, + "loss": 0.0007, + "step": 268370 + }, + { + "epoch": 1.7213059554312724, + "grad_norm": 0.07853275537490845, + "learning_rate": 5.799038464566075e-07, + "loss": 0.0012, + "step": 268380 + }, + { + "epoch": 1.7213700923250586, + "grad_norm": 0.0015121111646294594, + "learning_rate": 5.796422405104868e-07, + "loss": 0.0021, + "step": 268390 + }, + { + "epoch": 1.7214342292188447, + "grad_norm": 0.022543083876371384, + "learning_rate": 5.793806899546072e-07, + "loss": 0.0008, + "step": 268400 + }, + { + "epoch": 1.7214983661126309, + "grad_norm": 0.07074515521526337, + "learning_rate": 5.791191947922481e-07, + "loss": 0.0017, + "step": 268410 + }, + { + "epoch": 1.721562503006417, + "grad_norm": 0.07344797253608704, + "learning_rate": 5.788577550266844e-07, + "loss": 0.0006, + "step": 268420 + }, + { + "epoch": 1.7216266399002031, + "grad_norm": 0.10367267578840256, + "learning_rate": 5.785963706611925e-07, + "loss": 0.0008, + "step": 268430 + }, + { + "epoch": 1.721690776793989, + "grad_norm": 0.03328879177570343, + "learning_rate": 5.783350416990485e-07, + "loss": 0.0008, + "step": 268440 + }, + { + "epoch": 1.7217549136877752, + "grad_norm": 0.07013612240552902, + "learning_rate": 5.780737681435267e-07, + "loss": 0.0007, + "step": 268450 + }, + { + "epoch": 1.7218190505815612, + "grad_norm": 0.027138646692037582, + "learning_rate": 5.778125499979009e-07, + "loss": 0.0008, + "step": 268460 + }, + { + "epoch": 1.7218831874753473, + "grad_norm": 0.1326294243335724, + "learning_rate": 5.775513872654436e-07, + "loss": 0.0015, + "step": 268470 + }, + { + "epoch": 1.7219473243691334, + "grad_norm": 0.015880443155765533, + "learning_rate": 5.77290279949429e-07, + "loss": 0.0016, + "step": 268480 + }, + { + "epoch": 1.7220114612629196, + "grad_norm": 0.14516927301883698, + "learning_rate": 5.770292280531276e-07, + "loss": 0.0017, + "step": 268490 + }, + { + "epoch": 1.7220755981567057, + "grad_norm": 0.0028718379326164722, + "learning_rate": 5.767682315798117e-07, + "loss": 0.0006, + "step": 268500 + }, + { + "epoch": 1.722139735050492, + "grad_norm": 0.041969895362854004, + "learning_rate": 5.765072905327496e-07, + "loss": 0.0025, + "step": 268510 + }, + { + "epoch": 1.7222038719442778, + "grad_norm": 0.06452200561761856, + "learning_rate": 5.762464049152139e-07, + "loss": 0.0014, + "step": 268520 + }, + { + "epoch": 1.722268008838064, + "grad_norm": 0.1101684421300888, + "learning_rate": 5.759855747304722e-07, + "loss": 0.0008, + "step": 268530 + }, + { + "epoch": 1.72233214573185, + "grad_norm": 0.15614256262779236, + "learning_rate": 5.757247999817917e-07, + "loss": 0.001, + "step": 268540 + }, + { + "epoch": 1.722396282625636, + "grad_norm": 0.0508844330906868, + "learning_rate": 5.754640806724426e-07, + "loss": 0.001, + "step": 268550 + }, + { + "epoch": 1.7224604195194222, + "grad_norm": 0.017746128141880035, + "learning_rate": 5.752034168056908e-07, + "loss": 0.0008, + "step": 268560 + }, + { + "epoch": 1.7225245564132083, + "grad_norm": 0.042515430599451065, + "learning_rate": 5.74942808384803e-07, + "loss": 0.0006, + "step": 268570 + }, + { + "epoch": 1.7225886933069945, + "grad_norm": 0.046711161732673645, + "learning_rate": 5.746822554130427e-07, + "loss": 0.0013, + "step": 268580 + }, + { + "epoch": 1.7226528302007806, + "grad_norm": 0.04456254094839096, + "learning_rate": 5.744217578936778e-07, + "loss": 0.001, + "step": 268590 + }, + { + "epoch": 1.7227169670945668, + "grad_norm": 0.020523108541965485, + "learning_rate": 5.741613158299714e-07, + "loss": 0.0008, + "step": 268600 + }, + { + "epoch": 1.7227811039883527, + "grad_norm": 0.023720821365714073, + "learning_rate": 5.739009292251863e-07, + "loss": 0.0014, + "step": 268610 + }, + { + "epoch": 1.7228452408821389, + "grad_norm": 0.15102943778038025, + "learning_rate": 5.736405980825849e-07, + "loss": 0.0011, + "step": 268620 + }, + { + "epoch": 1.7229093777759248, + "grad_norm": 0.004818478133529425, + "learning_rate": 5.733803224054319e-07, + "loss": 0.0008, + "step": 268630 + }, + { + "epoch": 1.722973514669711, + "grad_norm": 0.0735711082816124, + "learning_rate": 5.731201021969868e-07, + "loss": 0.0017, + "step": 268640 + }, + { + "epoch": 1.723037651563497, + "grad_norm": 0.21588671207427979, + "learning_rate": 5.728599374605104e-07, + "loss": 0.001, + "step": 268650 + }, + { + "epoch": 1.7231017884572832, + "grad_norm": 0.1779114305973053, + "learning_rate": 5.725998281992618e-07, + "loss": 0.0026, + "step": 268660 + }, + { + "epoch": 1.7231659253510694, + "grad_norm": 0.07624183595180511, + "learning_rate": 5.723397744165032e-07, + "loss": 0.0013, + "step": 268670 + }, + { + "epoch": 1.7232300622448555, + "grad_norm": 0.07083241641521454, + "learning_rate": 5.720797761154911e-07, + "loss": 0.0009, + "step": 268680 + }, + { + "epoch": 1.7232941991386417, + "grad_norm": 0.0490984283387661, + "learning_rate": 5.718198332994835e-07, + "loss": 0.0019, + "step": 268690 + }, + { + "epoch": 1.7233583360324276, + "grad_norm": 0.20232270658016205, + "learning_rate": 5.715599459717386e-07, + "loss": 0.0008, + "step": 268700 + }, + { + "epoch": 1.7234224729262138, + "grad_norm": 0.00593222351744771, + "learning_rate": 5.713001141355129e-07, + "loss": 0.0012, + "step": 268710 + }, + { + "epoch": 1.7234866098199997, + "grad_norm": 0.04960239306092262, + "learning_rate": 5.710403377940621e-07, + "loss": 0.0012, + "step": 268720 + }, + { + "epoch": 1.7235507467137858, + "grad_norm": 0.07858838140964508, + "learning_rate": 5.707806169506397e-07, + "loss": 0.0008, + "step": 268730 + }, + { + "epoch": 1.723614883607572, + "grad_norm": 0.1905651092529297, + "learning_rate": 5.70520951608503e-07, + "loss": 0.0011, + "step": 268740 + }, + { + "epoch": 1.7236790205013581, + "grad_norm": 0.043486468493938446, + "learning_rate": 5.702613417709046e-07, + "loss": 0.0007, + "step": 268750 + }, + { + "epoch": 1.7237431573951443, + "grad_norm": 0.11901059001684189, + "learning_rate": 5.700017874410973e-07, + "loss": 0.0013, + "step": 268760 + }, + { + "epoch": 1.7238072942889304, + "grad_norm": 0.0030484062153846025, + "learning_rate": 5.697422886223325e-07, + "loss": 0.0008, + "step": 268770 + }, + { + "epoch": 1.7238714311827164, + "grad_norm": 0.1276695430278778, + "learning_rate": 5.694828453178641e-07, + "loss": 0.0011, + "step": 268780 + }, + { + "epoch": 1.7239355680765025, + "grad_norm": 0.0690717101097107, + "learning_rate": 5.692234575309424e-07, + "loss": 0.0012, + "step": 268790 + }, + { + "epoch": 1.7239997049702884, + "grad_norm": 0.014858945272862911, + "learning_rate": 5.689641252648171e-07, + "loss": 0.0006, + "step": 268800 + }, + { + "epoch": 1.7240638418640746, + "grad_norm": 0.07536854594945908, + "learning_rate": 5.687048485227376e-07, + "loss": 0.001, + "step": 268810 + }, + { + "epoch": 1.7241279787578607, + "grad_norm": 0.06748121231794357, + "learning_rate": 5.684456273079536e-07, + "loss": 0.0017, + "step": 268820 + }, + { + "epoch": 1.7241921156516469, + "grad_norm": 0.060129065066576004, + "learning_rate": 5.681864616237137e-07, + "loss": 0.0009, + "step": 268830 + }, + { + "epoch": 1.724256252545433, + "grad_norm": 0.09345393627882004, + "learning_rate": 5.679273514732636e-07, + "loss": 0.0021, + "step": 268840 + }, + { + "epoch": 1.7243203894392192, + "grad_norm": 0.016838548704981804, + "learning_rate": 5.676682968598523e-07, + "loss": 0.0021, + "step": 268850 + }, + { + "epoch": 1.7243845263330053, + "grad_norm": 0.006551853846758604, + "learning_rate": 5.674092977867252e-07, + "loss": 0.0008, + "step": 268860 + }, + { + "epoch": 1.7244486632267912, + "grad_norm": 0.06902817636728287, + "learning_rate": 5.67150354257126e-07, + "loss": 0.0011, + "step": 268870 + }, + { + "epoch": 1.7245128001205774, + "grad_norm": 0.07074497640132904, + "learning_rate": 5.668914662743025e-07, + "loss": 0.0009, + "step": 268880 + }, + { + "epoch": 1.7245769370143633, + "grad_norm": 0.12114270031452179, + "learning_rate": 5.666326338414962e-07, + "loss": 0.0007, + "step": 268890 + }, + { + "epoch": 1.7246410739081495, + "grad_norm": 0.06865867972373962, + "learning_rate": 5.66373856961952e-07, + "loss": 0.0008, + "step": 268900 + }, + { + "epoch": 1.7247052108019356, + "grad_norm": 0.10277780890464783, + "learning_rate": 5.661151356389127e-07, + "loss": 0.0004, + "step": 268910 + }, + { + "epoch": 1.7247693476957218, + "grad_norm": 0.02711491286754608, + "learning_rate": 5.658564698756192e-07, + "loss": 0.0009, + "step": 268920 + }, + { + "epoch": 1.724833484589508, + "grad_norm": 0.04675956070423126, + "learning_rate": 5.65597859675312e-07, + "loss": 0.0015, + "step": 268930 + }, + { + "epoch": 1.724897621483294, + "grad_norm": 0.17096774280071259, + "learning_rate": 5.653393050412343e-07, + "loss": 0.0016, + "step": 268940 + }, + { + "epoch": 1.72496175837708, + "grad_norm": 0.12755827605724335, + "learning_rate": 5.650808059766238e-07, + "loss": 0.0023, + "step": 268950 + }, + { + "epoch": 1.7250258952708661, + "grad_norm": 0.030916327610611916, + "learning_rate": 5.648223624847199e-07, + "loss": 0.0009, + "step": 268960 + }, + { + "epoch": 1.7250900321646523, + "grad_norm": 0.1278943568468094, + "learning_rate": 5.645639745687625e-07, + "loss": 0.0011, + "step": 268970 + }, + { + "epoch": 1.7251541690584382, + "grad_norm": 0.009564920328557491, + "learning_rate": 5.643056422319887e-07, + "loss": 0.0007, + "step": 268980 + }, + { + "epoch": 1.7252183059522244, + "grad_norm": 0.07532178610563278, + "learning_rate": 5.640473654776352e-07, + "loss": 0.0017, + "step": 268990 + }, + { + "epoch": 1.7252824428460105, + "grad_norm": 0.06400129944086075, + "learning_rate": 5.637891443089372e-07, + "loss": 0.0006, + "step": 269000 + }, + { + "epoch": 1.7253465797397967, + "grad_norm": 0.0003619497874751687, + "learning_rate": 5.635309787291332e-07, + "loss": 0.001, + "step": 269010 + }, + { + "epoch": 1.7254107166335828, + "grad_norm": 0.09920549392700195, + "learning_rate": 5.632728687414562e-07, + "loss": 0.0017, + "step": 269020 + }, + { + "epoch": 1.725474853527369, + "grad_norm": 0.031354062259197235, + "learning_rate": 5.630148143491415e-07, + "loss": 0.0009, + "step": 269030 + }, + { + "epoch": 1.7255389904211549, + "grad_norm": 0.05927729234099388, + "learning_rate": 5.627568155554214e-07, + "loss": 0.0028, + "step": 269040 + }, + { + "epoch": 1.725603127314941, + "grad_norm": 0.0024785532150417566, + "learning_rate": 5.6249887236353e-07, + "loss": 0.0029, + "step": 269050 + }, + { + "epoch": 1.725667264208727, + "grad_norm": 0.1858125627040863, + "learning_rate": 5.622409847766997e-07, + "loss": 0.0017, + "step": 269060 + }, + { + "epoch": 1.725731401102513, + "grad_norm": 0.1651456207036972, + "learning_rate": 5.619831527981612e-07, + "loss": 0.001, + "step": 269070 + }, + { + "epoch": 1.7257955379962993, + "grad_norm": 0.04127020016312599, + "learning_rate": 5.617253764311447e-07, + "loss": 0.0009, + "step": 269080 + }, + { + "epoch": 1.7258596748900854, + "grad_norm": 0.01615344174206257, + "learning_rate": 5.614676556788823e-07, + "loss": 0.0008, + "step": 269090 + }, + { + "epoch": 1.7259238117838716, + "grad_norm": 0.013099998235702515, + "learning_rate": 5.612099905446022e-07, + "loss": 0.001, + "step": 269100 + }, + { + "epoch": 1.7259879486776577, + "grad_norm": 0.08057832717895508, + "learning_rate": 5.609523810315326e-07, + "loss": 0.0007, + "step": 269110 + }, + { + "epoch": 1.7260520855714439, + "grad_norm": 0.06329803913831711, + "learning_rate": 5.60694827142903e-07, + "loss": 0.0011, + "step": 269120 + }, + { + "epoch": 1.7261162224652298, + "grad_norm": 0.09694880247116089, + "learning_rate": 5.604373288819398e-07, + "loss": 0.001, + "step": 269130 + }, + { + "epoch": 1.726180359359016, + "grad_norm": 0.21163076162338257, + "learning_rate": 5.601798862518698e-07, + "loss": 0.0021, + "step": 269140 + }, + { + "epoch": 1.7262444962528019, + "grad_norm": 0.1984589546918869, + "learning_rate": 5.599224992559177e-07, + "loss": 0.0011, + "step": 269150 + }, + { + "epoch": 1.726308633146588, + "grad_norm": 0.19185489416122437, + "learning_rate": 5.596651678973114e-07, + "loss": 0.0015, + "step": 269160 + }, + { + "epoch": 1.7263727700403741, + "grad_norm": 0.20477338135242462, + "learning_rate": 5.594078921792739e-07, + "loss": 0.0021, + "step": 269170 + }, + { + "epoch": 1.7264369069341603, + "grad_norm": 0.0421905592083931, + "learning_rate": 5.591506721050294e-07, + "loss": 0.0008, + "step": 269180 + }, + { + "epoch": 1.7265010438279464, + "grad_norm": 0.09347651898860931, + "learning_rate": 5.588935076777991e-07, + "loss": 0.0025, + "step": 269190 + }, + { + "epoch": 1.7265651807217326, + "grad_norm": 0.05665479600429535, + "learning_rate": 5.586363989008087e-07, + "loss": 0.001, + "step": 269200 + }, + { + "epoch": 1.7266293176155185, + "grad_norm": 0.08920852839946747, + "learning_rate": 5.583793457772785e-07, + "loss": 0.0013, + "step": 269210 + }, + { + "epoch": 1.7266934545093047, + "grad_norm": 0.02189306542277336, + "learning_rate": 5.581223483104292e-07, + "loss": 0.0013, + "step": 269220 + }, + { + "epoch": 1.7267575914030906, + "grad_norm": 0.09904073178768158, + "learning_rate": 5.578654065034806e-07, + "loss": 0.001, + "step": 269230 + }, + { + "epoch": 1.7268217282968767, + "grad_norm": 0.059837743639945984, + "learning_rate": 5.576085203596537e-07, + "loss": 0.001, + "step": 269240 + }, + { + "epoch": 1.726885865190663, + "grad_norm": 0.09818316251039505, + "learning_rate": 5.573516898821674e-07, + "loss": 0.0009, + "step": 269250 + }, + { + "epoch": 1.726950002084449, + "grad_norm": 0.12516026198863983, + "learning_rate": 5.570949150742383e-07, + "loss": 0.0014, + "step": 269260 + }, + { + "epoch": 1.7270141389782352, + "grad_norm": 0.1935839205980301, + "learning_rate": 5.568381959390868e-07, + "loss": 0.0008, + "step": 269270 + }, + { + "epoch": 1.7270782758720213, + "grad_norm": 0.05724336579442024, + "learning_rate": 5.565815324799278e-07, + "loss": 0.0021, + "step": 269280 + }, + { + "epoch": 1.7271424127658075, + "grad_norm": 0.040515750646591187, + "learning_rate": 5.563249246999774e-07, + "loss": 0.001, + "step": 269290 + }, + { + "epoch": 1.7272065496595934, + "grad_norm": 0.05509898066520691, + "learning_rate": 5.560683726024512e-07, + "loss": 0.0005, + "step": 269300 + }, + { + "epoch": 1.7272706865533796, + "grad_norm": 0.01175855565816164, + "learning_rate": 5.558118761905651e-07, + "loss": 0.0006, + "step": 269310 + }, + { + "epoch": 1.7273348234471655, + "grad_norm": 0.15045689046382904, + "learning_rate": 5.555554354675325e-07, + "loss": 0.0012, + "step": 269320 + }, + { + "epoch": 1.7273989603409516, + "grad_norm": 0.029485128819942474, + "learning_rate": 5.552990504365669e-07, + "loss": 0.0018, + "step": 269330 + }, + { + "epoch": 1.7274630972347378, + "grad_norm": 0.042644254863262177, + "learning_rate": 5.550427211008791e-07, + "loss": 0.0007, + "step": 269340 + }, + { + "epoch": 1.727527234128524, + "grad_norm": 0.069488525390625, + "learning_rate": 5.547864474636844e-07, + "loss": 0.001, + "step": 269350 + }, + { + "epoch": 1.72759137102231, + "grad_norm": 0.025307761505246162, + "learning_rate": 5.545302295281924e-07, + "loss": 0.0022, + "step": 269360 + }, + { + "epoch": 1.7276555079160962, + "grad_norm": 0.1392243653535843, + "learning_rate": 5.542740672976133e-07, + "loss": 0.0011, + "step": 269370 + }, + { + "epoch": 1.7277196448098822, + "grad_norm": 0.08107449114322662, + "learning_rate": 5.540179607751566e-07, + "loss": 0.0005, + "step": 269380 + }, + { + "epoch": 1.7277837817036683, + "grad_norm": 0.18233659863471985, + "learning_rate": 5.537619099640335e-07, + "loss": 0.001, + "step": 269390 + }, + { + "epoch": 1.7278479185974545, + "grad_norm": 0.014074004255235195, + "learning_rate": 5.535059148674516e-07, + "loss": 0.0006, + "step": 269400 + }, + { + "epoch": 1.7279120554912404, + "grad_norm": 0.044057007879018784, + "learning_rate": 5.532499754886173e-07, + "loss": 0.0005, + "step": 269410 + }, + { + "epoch": 1.7279761923850265, + "grad_norm": 0.12658320367336273, + "learning_rate": 5.529940918307392e-07, + "loss": 0.0016, + "step": 269420 + }, + { + "epoch": 1.7280403292788127, + "grad_norm": 0.1030837744474411, + "learning_rate": 5.527382638970241e-07, + "loss": 0.0028, + "step": 269430 + }, + { + "epoch": 1.7281044661725988, + "grad_norm": 0.07192330062389374, + "learning_rate": 5.524824916906768e-07, + "loss": 0.0012, + "step": 269440 + }, + { + "epoch": 1.728168603066385, + "grad_norm": 0.11342022567987442, + "learning_rate": 5.522267752149013e-07, + "loss": 0.0013, + "step": 269450 + }, + { + "epoch": 1.7282327399601711, + "grad_norm": 0.01136010605841875, + "learning_rate": 5.51971114472904e-07, + "loss": 0.0006, + "step": 269460 + }, + { + "epoch": 1.728296876853957, + "grad_norm": 0.11182946711778641, + "learning_rate": 5.517155094678877e-07, + "loss": 0.0016, + "step": 269470 + }, + { + "epoch": 1.7283610137477432, + "grad_norm": 0.16093896329402924, + "learning_rate": 5.514599602030557e-07, + "loss": 0.0021, + "step": 269480 + }, + { + "epoch": 1.7284251506415291, + "grad_norm": 0.1454562246799469, + "learning_rate": 5.51204466681608e-07, + "loss": 0.0007, + "step": 269490 + }, + { + "epoch": 1.7284892875353153, + "grad_norm": 0.17309823632240295, + "learning_rate": 5.509490289067493e-07, + "loss": 0.0008, + "step": 269500 + }, + { + "epoch": 1.7285534244291014, + "grad_norm": 0.025239739567041397, + "learning_rate": 5.506936468816787e-07, + "loss": 0.0013, + "step": 269510 + }, + { + "epoch": 1.7286175613228876, + "grad_norm": 0.05094781517982483, + "learning_rate": 5.504383206095971e-07, + "loss": 0.001, + "step": 269520 + }, + { + "epoch": 1.7286816982166737, + "grad_norm": 0.025469429790973663, + "learning_rate": 5.501830500937016e-07, + "loss": 0.0004, + "step": 269530 + }, + { + "epoch": 1.7287458351104599, + "grad_norm": 0.030855637043714523, + "learning_rate": 5.49927835337194e-07, + "loss": 0.0004, + "step": 269540 + }, + { + "epoch": 1.728809972004246, + "grad_norm": 0.05780933424830437, + "learning_rate": 5.496726763432714e-07, + "loss": 0.0012, + "step": 269550 + }, + { + "epoch": 1.728874108898032, + "grad_norm": 0.08418560773134232, + "learning_rate": 5.494175731151297e-07, + "loss": 0.0011, + "step": 269560 + }, + { + "epoch": 1.728938245791818, + "grad_norm": 0.09374580532312393, + "learning_rate": 5.491625256559674e-07, + "loss": 0.0006, + "step": 269570 + }, + { + "epoch": 1.729002382685604, + "grad_norm": 0.12030810117721558, + "learning_rate": 5.489075339689792e-07, + "loss": 0.001, + "step": 269580 + }, + { + "epoch": 1.7290665195793902, + "grad_norm": 0.07812446355819702, + "learning_rate": 5.486525980573615e-07, + "loss": 0.0006, + "step": 269590 + }, + { + "epoch": 1.7291306564731763, + "grad_norm": 0.01384024415165186, + "learning_rate": 5.483977179243066e-07, + "loss": 0.0018, + "step": 269600 + }, + { + "epoch": 1.7291947933669625, + "grad_norm": 0.054847002029418945, + "learning_rate": 5.48142893573011e-07, + "loss": 0.0018, + "step": 269610 + }, + { + "epoch": 1.7292589302607486, + "grad_norm": 0.007031548768281937, + "learning_rate": 5.478881250066665e-07, + "loss": 0.0011, + "step": 269620 + }, + { + "epoch": 1.7293230671545348, + "grad_norm": 0.20511378347873688, + "learning_rate": 5.476334122284649e-07, + "loss": 0.0017, + "step": 269630 + }, + { + "epoch": 1.7293872040483207, + "grad_norm": 0.09294286370277405, + "learning_rate": 5.473787552415994e-07, + "loss": 0.0021, + "step": 269640 + }, + { + "epoch": 1.7294513409421068, + "grad_norm": 0.1279413104057312, + "learning_rate": 5.471241540492595e-07, + "loss": 0.0021, + "step": 269650 + }, + { + "epoch": 1.7295154778358928, + "grad_norm": 0.0042890217155218124, + "learning_rate": 5.468696086546377e-07, + "loss": 0.0012, + "step": 269660 + }, + { + "epoch": 1.729579614729679, + "grad_norm": 0.06577875465154648, + "learning_rate": 5.466151190609215e-07, + "loss": 0.0008, + "step": 269670 + }, + { + "epoch": 1.729643751623465, + "grad_norm": 0.08991734683513641, + "learning_rate": 5.463606852713005e-07, + "loss": 0.0015, + "step": 269680 + }, + { + "epoch": 1.7297078885172512, + "grad_norm": 0.08856498450040817, + "learning_rate": 5.461063072889633e-07, + "loss": 0.001, + "step": 269690 + }, + { + "epoch": 1.7297720254110374, + "grad_norm": 0.049498848617076874, + "learning_rate": 5.458519851170979e-07, + "loss": 0.0014, + "step": 269700 + }, + { + "epoch": 1.7298361623048235, + "grad_norm": 0.02303713746368885, + "learning_rate": 5.455977187588902e-07, + "loss": 0.001, + "step": 269710 + }, + { + "epoch": 1.7299002991986097, + "grad_norm": 0.04629380255937576, + "learning_rate": 5.453435082175256e-07, + "loss": 0.0006, + "step": 269720 + }, + { + "epoch": 1.7299644360923956, + "grad_norm": 0.05294347181916237, + "learning_rate": 5.450893534961915e-07, + "loss": 0.0013, + "step": 269730 + }, + { + "epoch": 1.7300285729861817, + "grad_norm": 0.2689642906188965, + "learning_rate": 5.448352545980717e-07, + "loss": 0.0014, + "step": 269740 + }, + { + "epoch": 1.7300927098799677, + "grad_norm": 0.10071782022714615, + "learning_rate": 5.4458121152635e-07, + "loss": 0.0007, + "step": 269750 + }, + { + "epoch": 1.7301568467737538, + "grad_norm": 0.23420055210590363, + "learning_rate": 5.44327224284209e-07, + "loss": 0.0018, + "step": 269760 + }, + { + "epoch": 1.73022098366754, + "grad_norm": 0.11843283474445343, + "learning_rate": 5.440732928748338e-07, + "loss": 0.0006, + "step": 269770 + }, + { + "epoch": 1.730285120561326, + "grad_norm": 0.05789335444569588, + "learning_rate": 5.438194173014044e-07, + "loss": 0.0011, + "step": 269780 + }, + { + "epoch": 1.7303492574551123, + "grad_norm": 0.021529171615839005, + "learning_rate": 5.435655975671029e-07, + "loss": 0.0011, + "step": 269790 + }, + { + "epoch": 1.7304133943488984, + "grad_norm": 0.02813744731247425, + "learning_rate": 5.433118336751081e-07, + "loss": 0.0009, + "step": 269800 + }, + { + "epoch": 1.7304775312426843, + "grad_norm": 0.1608094424009323, + "learning_rate": 5.430581256286022e-07, + "loss": 0.0011, + "step": 269810 + }, + { + "epoch": 1.7305416681364705, + "grad_norm": 0.1470644772052765, + "learning_rate": 5.42804473430763e-07, + "loss": 0.0022, + "step": 269820 + }, + { + "epoch": 1.7306058050302566, + "grad_norm": 0.029143664985895157, + "learning_rate": 5.425508770847687e-07, + "loss": 0.0009, + "step": 269830 + }, + { + "epoch": 1.7306699419240426, + "grad_norm": 0.0821867510676384, + "learning_rate": 5.422973365937983e-07, + "loss": 0.001, + "step": 269840 + }, + { + "epoch": 1.7307340788178287, + "grad_norm": 0.004571834579110146, + "learning_rate": 5.420438519610282e-07, + "loss": 0.0021, + "step": 269850 + }, + { + "epoch": 1.7307982157116149, + "grad_norm": 0.04594600945711136, + "learning_rate": 5.417904231896348e-07, + "loss": 0.0009, + "step": 269860 + }, + { + "epoch": 1.730862352605401, + "grad_norm": 0.1579095721244812, + "learning_rate": 5.415370502827927e-07, + "loss": 0.0025, + "step": 269870 + }, + { + "epoch": 1.7309264894991871, + "grad_norm": 0.10846943408250809, + "learning_rate": 5.412837332436788e-07, + "loss": 0.0014, + "step": 269880 + }, + { + "epoch": 1.7309906263929733, + "grad_norm": 0.19232290983200073, + "learning_rate": 5.410304720754661e-07, + "loss": 0.0014, + "step": 269890 + }, + { + "epoch": 1.7310547632867592, + "grad_norm": 0.08476459980010986, + "learning_rate": 5.407772667813288e-07, + "loss": 0.0013, + "step": 269900 + }, + { + "epoch": 1.7311189001805454, + "grad_norm": 0.04643384739756584, + "learning_rate": 5.405241173644377e-07, + "loss": 0.0015, + "step": 269910 + }, + { + "epoch": 1.7311830370743313, + "grad_norm": 0.10197514295578003, + "learning_rate": 5.402710238279679e-07, + "loss": 0.0008, + "step": 269920 + }, + { + "epoch": 1.7312471739681174, + "grad_norm": 0.12310665845870972, + "learning_rate": 5.400179861750898e-07, + "loss": 0.0014, + "step": 269930 + }, + { + "epoch": 1.7313113108619036, + "grad_norm": 0.11327383667230606, + "learning_rate": 5.39765004408973e-07, + "loss": 0.0008, + "step": 269940 + }, + { + "epoch": 1.7313754477556897, + "grad_norm": 0.11353304237127304, + "learning_rate": 5.395120785327878e-07, + "loss": 0.0021, + "step": 269950 + }, + { + "epoch": 1.731439584649476, + "grad_norm": 0.020031224936246872, + "learning_rate": 5.392592085497055e-07, + "loss": 0.0008, + "step": 269960 + }, + { + "epoch": 1.731503721543262, + "grad_norm": 0.04257189854979515, + "learning_rate": 5.390063944628932e-07, + "loss": 0.0009, + "step": 269970 + }, + { + "epoch": 1.7315678584370482, + "grad_norm": 0.09973517805337906, + "learning_rate": 5.387536362755175e-07, + "loss": 0.0008, + "step": 269980 + }, + { + "epoch": 1.7316319953308341, + "grad_norm": 0.05843477323651314, + "learning_rate": 5.385009339907487e-07, + "loss": 0.0009, + "step": 269990 + }, + { + "epoch": 1.7316961322246203, + "grad_norm": 0.09853708744049072, + "learning_rate": 5.382482876117511e-07, + "loss": 0.0011, + "step": 270000 + }, + { + "epoch": 1.7317602691184062, + "grad_norm": 0.05056915804743767, + "learning_rate": 5.379956971416922e-07, + "loss": 0.001, + "step": 270010 + }, + { + "epoch": 1.7318244060121923, + "grad_norm": 0.15489324927330017, + "learning_rate": 5.377431625837343e-07, + "loss": 0.0012, + "step": 270020 + }, + { + "epoch": 1.7318885429059785, + "grad_norm": 0.08503349870443344, + "learning_rate": 5.374906839410455e-07, + "loss": 0.0007, + "step": 270030 + }, + { + "epoch": 1.7319526797997646, + "grad_norm": 0.13210877776145935, + "learning_rate": 5.37238261216787e-07, + "loss": 0.0013, + "step": 270040 + }, + { + "epoch": 1.7320168166935508, + "grad_norm": 0.06503061950206757, + "learning_rate": 5.369858944141226e-07, + "loss": 0.001, + "step": 270050 + }, + { + "epoch": 1.732080953587337, + "grad_norm": 0.038855258375406265, + "learning_rate": 5.367335835362141e-07, + "loss": 0.0012, + "step": 270060 + }, + { + "epoch": 1.7321450904811229, + "grad_norm": 0.01080508716404438, + "learning_rate": 5.364813285862242e-07, + "loss": 0.0011, + "step": 270070 + }, + { + "epoch": 1.732209227374909, + "grad_norm": 0.013003021478652954, + "learning_rate": 5.362291295673139e-07, + "loss": 0.0033, + "step": 270080 + }, + { + "epoch": 1.732273364268695, + "grad_norm": 0.010630217380821705, + "learning_rate": 5.35976986482642e-07, + "loss": 0.0011, + "step": 270090 + }, + { + "epoch": 1.732337501162481, + "grad_norm": 0.11707303673028946, + "learning_rate": 5.357248993353686e-07, + "loss": 0.0004, + "step": 270100 + }, + { + "epoch": 1.7324016380562672, + "grad_norm": 0.0049722082912921906, + "learning_rate": 5.35472868128653e-07, + "loss": 0.0008, + "step": 270110 + }, + { + "epoch": 1.7324657749500534, + "grad_norm": 0.0259892288595438, + "learning_rate": 5.352208928656539e-07, + "loss": 0.0006, + "step": 270120 + }, + { + "epoch": 1.7325299118438395, + "grad_norm": 0.05452693626284599, + "learning_rate": 5.349689735495267e-07, + "loss": 0.0009, + "step": 270130 + }, + { + "epoch": 1.7325940487376257, + "grad_norm": 0.012979741208255291, + "learning_rate": 5.347171101834297e-07, + "loss": 0.0021, + "step": 270140 + }, + { + "epoch": 1.7326581856314118, + "grad_norm": 0.057723190635442734, + "learning_rate": 5.344653027705193e-07, + "loss": 0.001, + "step": 270150 + }, + { + "epoch": 1.7327223225251978, + "grad_norm": 0.12237024307250977, + "learning_rate": 5.342135513139501e-07, + "loss": 0.0011, + "step": 270160 + }, + { + "epoch": 1.732786459418984, + "grad_norm": 0.05576246604323387, + "learning_rate": 5.339618558168757e-07, + "loss": 0.0008, + "step": 270170 + }, + { + "epoch": 1.7328505963127698, + "grad_norm": 0.004119261167943478, + "learning_rate": 5.33710216282452e-07, + "loss": 0.0025, + "step": 270180 + }, + { + "epoch": 1.732914733206556, + "grad_norm": 0.0020740805193781853, + "learning_rate": 5.334586327138313e-07, + "loss": 0.001, + "step": 270190 + }, + { + "epoch": 1.7329788701003421, + "grad_norm": 0.02012786827981472, + "learning_rate": 5.332071051141663e-07, + "loss": 0.0013, + "step": 270200 + }, + { + "epoch": 1.7330430069941283, + "grad_norm": 0.01470683142542839, + "learning_rate": 5.329556334866071e-07, + "loss": 0.0025, + "step": 270210 + }, + { + "epoch": 1.7331071438879144, + "grad_norm": 0.09086151421070099, + "learning_rate": 5.327042178343078e-07, + "loss": 0.0068, + "step": 270220 + }, + { + "epoch": 1.7331712807817006, + "grad_norm": 0.19252285361289978, + "learning_rate": 5.324528581604177e-07, + "loss": 0.0013, + "step": 270230 + }, + { + "epoch": 1.7332354176754867, + "grad_norm": 0.15432101488113403, + "learning_rate": 5.322015544680848e-07, + "loss": 0.0019, + "step": 270240 + }, + { + "epoch": 1.7332995545692726, + "grad_norm": 0.10898509621620178, + "learning_rate": 5.319503067604604e-07, + "loss": 0.0016, + "step": 270250 + }, + { + "epoch": 1.7333636914630588, + "grad_norm": 0.14141759276390076, + "learning_rate": 5.31699115040692e-07, + "loss": 0.0012, + "step": 270260 + }, + { + "epoch": 1.7334278283568447, + "grad_norm": 0.009414039552211761, + "learning_rate": 5.314479793119271e-07, + "loss": 0.0022, + "step": 270270 + }, + { + "epoch": 1.7334919652506309, + "grad_norm": 0.13237473368644714, + "learning_rate": 5.311968995773115e-07, + "loss": 0.0008, + "step": 270280 + }, + { + "epoch": 1.733556102144417, + "grad_norm": 0.04768795520067215, + "learning_rate": 5.309458758399938e-07, + "loss": 0.0038, + "step": 270290 + }, + { + "epoch": 1.7336202390382032, + "grad_norm": 0.12000446021556854, + "learning_rate": 5.306949081031182e-07, + "loss": 0.0021, + "step": 270300 + }, + { + "epoch": 1.7336843759319893, + "grad_norm": 0.08753681182861328, + "learning_rate": 5.304439963698299e-07, + "loss": 0.0051, + "step": 270310 + }, + { + "epoch": 1.7337485128257755, + "grad_norm": 0.15666162967681885, + "learning_rate": 5.301931406432715e-07, + "loss": 0.0009, + "step": 270320 + }, + { + "epoch": 1.7338126497195614, + "grad_norm": 0.04588661715388298, + "learning_rate": 5.29942340926588e-07, + "loss": 0.0014, + "step": 270330 + }, + { + "epoch": 1.7338767866133475, + "grad_norm": 0.012371247634291649, + "learning_rate": 5.296915972229222e-07, + "loss": 0.0015, + "step": 270340 + }, + { + "epoch": 1.7339409235071335, + "grad_norm": 0.010006232187151909, + "learning_rate": 5.294409095354158e-07, + "loss": 0.0007, + "step": 270350 + }, + { + "epoch": 1.7340050604009196, + "grad_norm": 0.06989938765764236, + "learning_rate": 5.291902778672081e-07, + "loss": 0.0011, + "step": 270360 + }, + { + "epoch": 1.7340691972947058, + "grad_norm": 0.14981599152088165, + "learning_rate": 5.289397022214432e-07, + "loss": 0.0009, + "step": 270370 + }, + { + "epoch": 1.734133334188492, + "grad_norm": 0.014810253866016865, + "learning_rate": 5.286891826012591e-07, + "loss": 0.0015, + "step": 270380 + }, + { + "epoch": 1.734197471082278, + "grad_norm": 0.004750589840114117, + "learning_rate": 5.28438719009794e-07, + "loss": 0.0012, + "step": 270390 + }, + { + "epoch": 1.7342616079760642, + "grad_norm": 0.004686721134930849, + "learning_rate": 5.281883114501874e-07, + "loss": 0.0008, + "step": 270400 + }, + { + "epoch": 1.7343257448698504, + "grad_norm": 0.09390250593423843, + "learning_rate": 5.279379599255779e-07, + "loss": 0.001, + "step": 270410 + }, + { + "epoch": 1.7343898817636363, + "grad_norm": 0.10690487176179886, + "learning_rate": 5.276876644391027e-07, + "loss": 0.0011, + "step": 270420 + }, + { + "epoch": 1.7344540186574224, + "grad_norm": 0.08885901421308517, + "learning_rate": 5.274374249938969e-07, + "loss": 0.0008, + "step": 270430 + }, + { + "epoch": 1.7345181555512084, + "grad_norm": 0.04019516333937645, + "learning_rate": 5.271872415930956e-07, + "loss": 0.0012, + "step": 270440 + }, + { + "epoch": 1.7345822924449945, + "grad_norm": 0.027625810354948044, + "learning_rate": 5.26937114239836e-07, + "loss": 0.0017, + "step": 270450 + }, + { + "epoch": 1.7346464293387807, + "grad_norm": 0.03272608667612076, + "learning_rate": 5.266870429372511e-07, + "loss": 0.0019, + "step": 270460 + }, + { + "epoch": 1.7347105662325668, + "grad_norm": 0.023706305772066116, + "learning_rate": 5.264370276884751e-07, + "loss": 0.0023, + "step": 270470 + }, + { + "epoch": 1.734774703126353, + "grad_norm": 0.07645612955093384, + "learning_rate": 5.261870684966386e-07, + "loss": 0.0008, + "step": 270480 + }, + { + "epoch": 1.734838840020139, + "grad_norm": 0.11616411805152893, + "learning_rate": 5.259371653648771e-07, + "loss": 0.0035, + "step": 270490 + }, + { + "epoch": 1.734902976913925, + "grad_norm": 0.010350296273827553, + "learning_rate": 5.256873182963201e-07, + "loss": 0.0005, + "step": 270500 + }, + { + "epoch": 1.7349671138077112, + "grad_norm": 0.19959133863449097, + "learning_rate": 5.254375272940982e-07, + "loss": 0.0016, + "step": 270510 + }, + { + "epoch": 1.7350312507014973, + "grad_norm": 0.10781977325677872, + "learning_rate": 5.251877923613424e-07, + "loss": 0.0013, + "step": 270520 + }, + { + "epoch": 1.7350953875952833, + "grad_norm": 0.11725848913192749, + "learning_rate": 5.249381135011822e-07, + "loss": 0.0016, + "step": 270530 + }, + { + "epoch": 1.7351595244890694, + "grad_norm": 0.08592963963747025, + "learning_rate": 5.246884907167454e-07, + "loss": 0.0005, + "step": 270540 + }, + { + "epoch": 1.7352236613828556, + "grad_norm": 0.0015895924298092723, + "learning_rate": 5.244389240111591e-07, + "loss": 0.0006, + "step": 270550 + }, + { + "epoch": 1.7352877982766417, + "grad_norm": 0.00917302630841732, + "learning_rate": 5.241894133875531e-07, + "loss": 0.0015, + "step": 270560 + }, + { + "epoch": 1.7353519351704279, + "grad_norm": 0.08128763735294342, + "learning_rate": 5.239399588490524e-07, + "loss": 0.0016, + "step": 270570 + }, + { + "epoch": 1.735416072064214, + "grad_norm": 0.10477113723754883, + "learning_rate": 5.236905603987829e-07, + "loss": 0.001, + "step": 270580 + }, + { + "epoch": 1.735480208958, + "grad_norm": 0.05633659288287163, + "learning_rate": 5.23441218039869e-07, + "loss": 0.0007, + "step": 270590 + }, + { + "epoch": 1.735544345851786, + "grad_norm": 0.16217748820781708, + "learning_rate": 5.231919317754369e-07, + "loss": 0.0021, + "step": 270600 + }, + { + "epoch": 1.735608482745572, + "grad_norm": 0.11766723543405533, + "learning_rate": 5.229427016086097e-07, + "loss": 0.0016, + "step": 270610 + }, + { + "epoch": 1.7356726196393581, + "grad_norm": 0.15290795266628265, + "learning_rate": 5.226935275425099e-07, + "loss": 0.0009, + "step": 270620 + }, + { + "epoch": 1.7357367565331443, + "grad_norm": 0.2578399181365967, + "learning_rate": 5.224444095802589e-07, + "loss": 0.0015, + "step": 270630 + }, + { + "epoch": 1.7358008934269304, + "grad_norm": 0.04691920056939125, + "learning_rate": 5.221953477249808e-07, + "loss": 0.001, + "step": 270640 + }, + { + "epoch": 1.7358650303207166, + "grad_norm": 0.048594821244478226, + "learning_rate": 5.219463419797949e-07, + "loss": 0.001, + "step": 270650 + }, + { + "epoch": 1.7359291672145027, + "grad_norm": 0.11301165819168091, + "learning_rate": 5.216973923478214e-07, + "loss": 0.001, + "step": 270660 + }, + { + "epoch": 1.735993304108289, + "grad_norm": 0.0807356983423233, + "learning_rate": 5.214484988321805e-07, + "loss": 0.0009, + "step": 270670 + }, + { + "epoch": 1.7360574410020748, + "grad_norm": 0.06519937515258789, + "learning_rate": 5.21199661435991e-07, + "loss": 0.0008, + "step": 270680 + }, + { + "epoch": 1.736121577895861, + "grad_norm": 0.1602220982313156, + "learning_rate": 5.209508801623708e-07, + "loss": 0.0008, + "step": 270690 + }, + { + "epoch": 1.736185714789647, + "grad_norm": 0.051953308284282684, + "learning_rate": 5.207021550144359e-07, + "loss": 0.0012, + "step": 270700 + }, + { + "epoch": 1.736249851683433, + "grad_norm": 0.010182995349168777, + "learning_rate": 5.204534859953053e-07, + "loss": 0.0009, + "step": 270710 + }, + { + "epoch": 1.7363139885772192, + "grad_norm": 0.1255737692117691, + "learning_rate": 5.202048731080944e-07, + "loss": 0.0012, + "step": 270720 + }, + { + "epoch": 1.7363781254710053, + "grad_norm": 0.016661204397678375, + "learning_rate": 5.199563163559179e-07, + "loss": 0.0009, + "step": 270730 + }, + { + "epoch": 1.7364422623647915, + "grad_norm": 0.06500456482172012, + "learning_rate": 5.197078157418894e-07, + "loss": 0.0004, + "step": 270740 + }, + { + "epoch": 1.7365063992585776, + "grad_norm": 0.06425906717777252, + "learning_rate": 5.194593712691254e-07, + "loss": 0.0015, + "step": 270750 + }, + { + "epoch": 1.7365705361523636, + "grad_norm": 0.17025528848171234, + "learning_rate": 5.192109829407371e-07, + "loss": 0.0016, + "step": 270760 + }, + { + "epoch": 1.7366346730461497, + "grad_norm": 0.07801761478185654, + "learning_rate": 5.189626507598377e-07, + "loss": 0.0011, + "step": 270770 + }, + { + "epoch": 1.7366988099399356, + "grad_norm": 0.10675777494907379, + "learning_rate": 5.187143747295381e-07, + "loss": 0.0013, + "step": 270780 + }, + { + "epoch": 1.7367629468337218, + "grad_norm": 0.07541347295045853, + "learning_rate": 5.184661548529513e-07, + "loss": 0.0008, + "step": 270790 + }, + { + "epoch": 1.736827083727508, + "grad_norm": 0.07384781539440155, + "learning_rate": 5.182179911331858e-07, + "loss": 0.0008, + "step": 270800 + }, + { + "epoch": 1.736891220621294, + "grad_norm": 0.024426599964499474, + "learning_rate": 5.179698835733515e-07, + "loss": 0.0011, + "step": 270810 + }, + { + "epoch": 1.7369553575150802, + "grad_norm": 0.05507689341902733, + "learning_rate": 5.177218321765587e-07, + "loss": 0.001, + "step": 270820 + }, + { + "epoch": 1.7370194944088664, + "grad_norm": 0.0703328549861908, + "learning_rate": 5.174738369459148e-07, + "loss": 0.0006, + "step": 270830 + }, + { + "epoch": 1.7370836313026525, + "grad_norm": 0.047399815171957016, + "learning_rate": 5.172258978845274e-07, + "loss": 0.001, + "step": 270840 + }, + { + "epoch": 1.7371477681964385, + "grad_norm": 0.02468428760766983, + "learning_rate": 5.169780149955017e-07, + "loss": 0.0029, + "step": 270850 + }, + { + "epoch": 1.7372119050902246, + "grad_norm": 0.00034481112379580736, + "learning_rate": 5.16730188281947e-07, + "loss": 0.0014, + "step": 270860 + }, + { + "epoch": 1.7372760419840105, + "grad_norm": 0.13004070520401, + "learning_rate": 5.164824177469673e-07, + "loss": 0.0013, + "step": 270870 + }, + { + "epoch": 1.7373401788777967, + "grad_norm": 0.0290438923984766, + "learning_rate": 5.162347033936671e-07, + "loss": 0.0007, + "step": 270880 + }, + { + "epoch": 1.7374043157715828, + "grad_norm": 0.012381301261484623, + "learning_rate": 5.159870452251492e-07, + "loss": 0.0007, + "step": 270890 + }, + { + "epoch": 1.737468452665369, + "grad_norm": 0.04207621514797211, + "learning_rate": 5.157394432445195e-07, + "loss": 0.0022, + "step": 270900 + }, + { + "epoch": 1.7375325895591551, + "grad_norm": 0.0601922944188118, + "learning_rate": 5.154918974548795e-07, + "loss": 0.0014, + "step": 270910 + }, + { + "epoch": 1.7375967264529413, + "grad_norm": 0.03622348606586456, + "learning_rate": 5.15244407859331e-07, + "loss": 0.0008, + "step": 270920 + }, + { + "epoch": 1.7376608633467272, + "grad_norm": 0.00952757615596056, + "learning_rate": 5.149969744609745e-07, + "loss": 0.0004, + "step": 270930 + }, + { + "epoch": 1.7377250002405134, + "grad_norm": 0.0805339366197586, + "learning_rate": 5.147495972629124e-07, + "loss": 0.0009, + "step": 270940 + }, + { + "epoch": 1.7377891371342995, + "grad_norm": 0.049618132412433624, + "learning_rate": 5.145022762682427e-07, + "loss": 0.001, + "step": 270950 + }, + { + "epoch": 1.7378532740280854, + "grad_norm": 0.07571809738874435, + "learning_rate": 5.142550114800649e-07, + "loss": 0.0012, + "step": 270960 + }, + { + "epoch": 1.7379174109218716, + "grad_norm": 0.019243987277150154, + "learning_rate": 5.140078029014783e-07, + "loss": 0.0013, + "step": 270970 + }, + { + "epoch": 1.7379815478156577, + "grad_norm": 0.11308278143405914, + "learning_rate": 5.137606505355802e-07, + "loss": 0.001, + "step": 270980 + }, + { + "epoch": 1.7380456847094439, + "grad_norm": 0.014698876067996025, + "learning_rate": 5.135135543854675e-07, + "loss": 0.0011, + "step": 270990 + }, + { + "epoch": 1.73810982160323, + "grad_norm": 0.041699737310409546, + "learning_rate": 5.132665144542354e-07, + "loss": 0.0009, + "step": 271000 + }, + { + "epoch": 1.7381739584970162, + "grad_norm": 0.066974937915802, + "learning_rate": 5.130195307449815e-07, + "loss": 0.001, + "step": 271010 + }, + { + "epoch": 1.738238095390802, + "grad_norm": 0.11968187242746353, + "learning_rate": 5.127726032607994e-07, + "loss": 0.0016, + "step": 271020 + }, + { + "epoch": 1.7383022322845882, + "grad_norm": 0.21375861763954163, + "learning_rate": 5.125257320047839e-07, + "loss": 0.001, + "step": 271030 + }, + { + "epoch": 1.7383663691783742, + "grad_norm": 0.11760486662387848, + "learning_rate": 5.122789169800269e-07, + "loss": 0.001, + "step": 271040 + }, + { + "epoch": 1.7384305060721603, + "grad_norm": 0.08709526062011719, + "learning_rate": 5.120321581896237e-07, + "loss": 0.0006, + "step": 271050 + }, + { + "epoch": 1.7384946429659465, + "grad_norm": 0.08717769384384155, + "learning_rate": 5.117854556366647e-07, + "loss": 0.0016, + "step": 271060 + }, + { + "epoch": 1.7385587798597326, + "grad_norm": 0.047181129455566406, + "learning_rate": 5.115388093242418e-07, + "loss": 0.001, + "step": 271070 + }, + { + "epoch": 1.7386229167535188, + "grad_norm": 0.1129605770111084, + "learning_rate": 5.112922192554443e-07, + "loss": 0.0012, + "step": 271080 + }, + { + "epoch": 1.738687053647305, + "grad_norm": 0.15419110655784607, + "learning_rate": 5.110456854333645e-07, + "loss": 0.001, + "step": 271090 + }, + { + "epoch": 1.738751190541091, + "grad_norm": 0.01996695250272751, + "learning_rate": 5.107992078610901e-07, + "loss": 0.0007, + "step": 271100 + }, + { + "epoch": 1.738815327434877, + "grad_norm": 0.0014193024253472686, + "learning_rate": 5.105527865417098e-07, + "loss": 0.0008, + "step": 271110 + }, + { + "epoch": 1.7388794643286631, + "grad_norm": 0.0876384973526001, + "learning_rate": 5.103064214783121e-07, + "loss": 0.0014, + "step": 271120 + }, + { + "epoch": 1.738943601222449, + "grad_norm": 0.04958288371562958, + "learning_rate": 5.100601126739835e-07, + "loss": 0.0007, + "step": 271130 + }, + { + "epoch": 1.7390077381162352, + "grad_norm": 0.036646727472543716, + "learning_rate": 5.098138601318109e-07, + "loss": 0.0008, + "step": 271140 + }, + { + "epoch": 1.7390718750100214, + "grad_norm": 0.05905251204967499, + "learning_rate": 5.095676638548786e-07, + "loss": 0.0009, + "step": 271150 + }, + { + "epoch": 1.7391360119038075, + "grad_norm": 0.05176056921482086, + "learning_rate": 5.093215238462728e-07, + "loss": 0.0006, + "step": 271160 + }, + { + "epoch": 1.7392001487975937, + "grad_norm": 0.06435713917016983, + "learning_rate": 5.090754401090787e-07, + "loss": 0.0008, + "step": 271170 + }, + { + "epoch": 1.7392642856913798, + "grad_norm": 0.007998064160346985, + "learning_rate": 5.088294126463789e-07, + "loss": 0.001, + "step": 271180 + }, + { + "epoch": 1.7393284225851657, + "grad_norm": 0.016270240768790245, + "learning_rate": 5.085834414612567e-07, + "loss": 0.0009, + "step": 271190 + }, + { + "epoch": 1.7393925594789519, + "grad_norm": 0.08200455456972122, + "learning_rate": 5.083375265567925e-07, + "loss": 0.0009, + "step": 271200 + }, + { + "epoch": 1.7394566963727378, + "grad_norm": 0.15137460827827454, + "learning_rate": 5.080916679360703e-07, + "loss": 0.0008, + "step": 271210 + }, + { + "epoch": 1.739520833266524, + "grad_norm": 0.05072480067610741, + "learning_rate": 5.078458656021701e-07, + "loss": 0.001, + "step": 271220 + }, + { + "epoch": 1.73958497016031, + "grad_norm": 0.15417474508285522, + "learning_rate": 5.076001195581709e-07, + "loss": 0.0012, + "step": 271230 + }, + { + "epoch": 1.7396491070540963, + "grad_norm": 0.032537128776311874, + "learning_rate": 5.073544298071531e-07, + "loss": 0.001, + "step": 271240 + }, + { + "epoch": 1.7397132439478824, + "grad_norm": 0.004596466664224863, + "learning_rate": 5.071087963521959e-07, + "loss": 0.0011, + "step": 271250 + }, + { + "epoch": 1.7397773808416686, + "grad_norm": 0.09249936789274216, + "learning_rate": 5.068632191963758e-07, + "loss": 0.0009, + "step": 271260 + }, + { + "epoch": 1.7398415177354547, + "grad_norm": 0.013414569199085236, + "learning_rate": 5.066176983427701e-07, + "loss": 0.0017, + "step": 271270 + }, + { + "epoch": 1.7399056546292406, + "grad_norm": 0.11013256013393402, + "learning_rate": 5.063722337944571e-07, + "loss": 0.0018, + "step": 271280 + }, + { + "epoch": 1.7399697915230268, + "grad_norm": 0.16548310220241547, + "learning_rate": 5.061268255545115e-07, + "loss": 0.0025, + "step": 271290 + }, + { + "epoch": 1.7400339284168127, + "grad_norm": 0.15287838876247406, + "learning_rate": 5.058814736260087e-07, + "loss": 0.0012, + "step": 271300 + }, + { + "epoch": 1.7400980653105989, + "grad_norm": 0.11675088852643967, + "learning_rate": 5.056361780120216e-07, + "loss": 0.0016, + "step": 271310 + }, + { + "epoch": 1.740162202204385, + "grad_norm": 0.04161589592695236, + "learning_rate": 5.053909387156264e-07, + "loss": 0.0027, + "step": 271320 + }, + { + "epoch": 1.7402263390981711, + "grad_norm": 0.07786667346954346, + "learning_rate": 5.051457557398948e-07, + "loss": 0.0013, + "step": 271330 + }, + { + "epoch": 1.7402904759919573, + "grad_norm": 0.08022894710302353, + "learning_rate": 5.049006290878993e-07, + "loss": 0.0024, + "step": 271340 + }, + { + "epoch": 1.7403546128857434, + "grad_norm": 0.10996535420417786, + "learning_rate": 5.046555587627111e-07, + "loss": 0.0016, + "step": 271350 + }, + { + "epoch": 1.7404187497795294, + "grad_norm": 0.2262801229953766, + "learning_rate": 5.044105447674019e-07, + "loss": 0.0017, + "step": 271360 + }, + { + "epoch": 1.7404828866733155, + "grad_norm": 0.044142354279756546, + "learning_rate": 5.041655871050416e-07, + "loss": 0.0008, + "step": 271370 + }, + { + "epoch": 1.7405470235671017, + "grad_norm": 0.16979172825813293, + "learning_rate": 5.039206857786988e-07, + "loss": 0.0008, + "step": 271380 + }, + { + "epoch": 1.7406111604608876, + "grad_norm": 0.12596730887889862, + "learning_rate": 5.036758407914444e-07, + "loss": 0.0014, + "step": 271390 + }, + { + "epoch": 1.7406752973546737, + "grad_norm": 0.015053360722959042, + "learning_rate": 5.034310521463449e-07, + "loss": 0.0011, + "step": 271400 + }, + { + "epoch": 1.74073943424846, + "grad_norm": 0.10763084143400192, + "learning_rate": 5.031863198464676e-07, + "loss": 0.0015, + "step": 271410 + }, + { + "epoch": 1.740803571142246, + "grad_norm": 0.08218323439359665, + "learning_rate": 5.029416438948786e-07, + "loss": 0.0011, + "step": 271420 + }, + { + "epoch": 1.7408677080360322, + "grad_norm": 0.0035591446794569492, + "learning_rate": 5.026970242946466e-07, + "loss": 0.0014, + "step": 271430 + }, + { + "epoch": 1.7409318449298183, + "grad_norm": 0.025590816512703896, + "learning_rate": 5.024524610488341e-07, + "loss": 0.0008, + "step": 271440 + }, + { + "epoch": 1.7409959818236043, + "grad_norm": 0.10720288008451462, + "learning_rate": 5.022079541605074e-07, + "loss": 0.001, + "step": 271450 + }, + { + "epoch": 1.7410601187173904, + "grad_norm": 0.10830137878656387, + "learning_rate": 5.019635036327281e-07, + "loss": 0.0014, + "step": 271460 + }, + { + "epoch": 1.7411242556111763, + "grad_norm": 0.03661242872476578, + "learning_rate": 5.01719109468562e-07, + "loss": 0.0005, + "step": 271470 + }, + { + "epoch": 1.7411883925049625, + "grad_norm": 0.04848403111100197, + "learning_rate": 5.0147477167107e-07, + "loss": 0.0009, + "step": 271480 + }, + { + "epoch": 1.7412525293987486, + "grad_norm": 0.010289512574672699, + "learning_rate": 5.01230490243314e-07, + "loss": 0.0005, + "step": 271490 + }, + { + "epoch": 1.7413166662925348, + "grad_norm": 0.05726618319749832, + "learning_rate": 5.009862651883546e-07, + "loss": 0.0012, + "step": 271500 + }, + { + "epoch": 1.741380803186321, + "grad_norm": 0.007875513285398483, + "learning_rate": 5.00742096509254e-07, + "loss": 0.0013, + "step": 271510 + }, + { + "epoch": 1.741444940080107, + "grad_norm": 0.12497909367084503, + "learning_rate": 5.004979842090702e-07, + "loss": 0.0015, + "step": 271520 + }, + { + "epoch": 1.7415090769738932, + "grad_norm": 0.07876826077699661, + "learning_rate": 5.002539282908614e-07, + "loss": 0.0032, + "step": 271530 + }, + { + "epoch": 1.7415732138676792, + "grad_norm": 0.052569467574357986, + "learning_rate": 5.000099287576876e-07, + "loss": 0.0009, + "step": 271540 + }, + { + "epoch": 1.7416373507614653, + "grad_norm": 0.1996789127588272, + "learning_rate": 4.997659856126053e-07, + "loss": 0.0012, + "step": 271550 + }, + { + "epoch": 1.7417014876552512, + "grad_norm": 0.05426711589097977, + "learning_rate": 4.995220988586719e-07, + "loss": 0.001, + "step": 271560 + }, + { + "epoch": 1.7417656245490374, + "grad_norm": 0.16309188306331635, + "learning_rate": 4.992782684989422e-07, + "loss": 0.0009, + "step": 271570 + }, + { + "epoch": 1.7418297614428235, + "grad_norm": 0.10447095334529877, + "learning_rate": 4.990344945364727e-07, + "loss": 0.0011, + "step": 271580 + }, + { + "epoch": 1.7418938983366097, + "grad_norm": 0.14355474710464478, + "learning_rate": 4.987907769743183e-07, + "loss": 0.0004, + "step": 271590 + }, + { + "epoch": 1.7419580352303958, + "grad_norm": 0.06109283119440079, + "learning_rate": 4.985471158155325e-07, + "loss": 0.0009, + "step": 271600 + }, + { + "epoch": 1.742022172124182, + "grad_norm": 0.012738275341689587, + "learning_rate": 4.983035110631674e-07, + "loss": 0.0013, + "step": 271610 + }, + { + "epoch": 1.742086309017968, + "grad_norm": 0.13235825300216675, + "learning_rate": 4.980599627202776e-07, + "loss": 0.0007, + "step": 271620 + }, + { + "epoch": 1.742150445911754, + "grad_norm": 0.05144060403108597, + "learning_rate": 4.978164707899142e-07, + "loss": 0.0009, + "step": 271630 + }, + { + "epoch": 1.74221458280554, + "grad_norm": 0.04663949832320213, + "learning_rate": 4.975730352751273e-07, + "loss": 0.0005, + "step": 271640 + }, + { + "epoch": 1.7422787196993261, + "grad_norm": 0.03589535877108574, + "learning_rate": 4.973296561789676e-07, + "loss": 0.001, + "step": 271650 + }, + { + "epoch": 1.7423428565931123, + "grad_norm": 0.030549991875886917, + "learning_rate": 4.970863335044867e-07, + "loss": 0.0006, + "step": 271660 + }, + { + "epoch": 1.7424069934868984, + "grad_norm": 0.03686181828379631, + "learning_rate": 4.968430672547314e-07, + "loss": 0.0018, + "step": 271670 + }, + { + "epoch": 1.7424711303806846, + "grad_norm": 0.01910620741546154, + "learning_rate": 4.965998574327508e-07, + "loss": 0.0013, + "step": 271680 + }, + { + "epoch": 1.7425352672744707, + "grad_norm": 0.05751664936542511, + "learning_rate": 4.963567040415929e-07, + "loss": 0.0009, + "step": 271690 + }, + { + "epoch": 1.7425994041682569, + "grad_norm": 0.014478910714387894, + "learning_rate": 4.961136070843043e-07, + "loss": 0.0009, + "step": 271700 + }, + { + "epoch": 1.7426635410620428, + "grad_norm": 0.01925645023584366, + "learning_rate": 4.958705665639308e-07, + "loss": 0.0009, + "step": 271710 + }, + { + "epoch": 1.742727677955829, + "grad_norm": 0.05361935496330261, + "learning_rate": 4.956275824835177e-07, + "loss": 0.0012, + "step": 271720 + }, + { + "epoch": 1.7427918148496149, + "grad_norm": 0.051223274320364, + "learning_rate": 4.953846548461105e-07, + "loss": 0.0013, + "step": 271730 + }, + { + "epoch": 1.742855951743401, + "grad_norm": 0.08455495536327362, + "learning_rate": 4.951417836547539e-07, + "loss": 0.002, + "step": 271740 + }, + { + "epoch": 1.7429200886371872, + "grad_norm": 0.05032658204436302, + "learning_rate": 4.948989689124894e-07, + "loss": 0.0012, + "step": 271750 + }, + { + "epoch": 1.7429842255309733, + "grad_norm": 0.047818951308727264, + "learning_rate": 4.946562106223602e-07, + "loss": 0.0014, + "step": 271760 + }, + { + "epoch": 1.7430483624247595, + "grad_norm": 0.005683389492332935, + "learning_rate": 4.944135087874097e-07, + "loss": 0.0017, + "step": 271770 + }, + { + "epoch": 1.7431124993185456, + "grad_norm": 0.024180131033062935, + "learning_rate": 4.941708634106773e-07, + "loss": 0.0013, + "step": 271780 + }, + { + "epoch": 1.7431766362123318, + "grad_norm": 0.12976869940757751, + "learning_rate": 4.939282744952051e-07, + "loss": 0.0007, + "step": 271790 + }, + { + "epoch": 1.7432407731061177, + "grad_norm": 0.14844253659248352, + "learning_rate": 4.936857420440306e-07, + "loss": 0.001, + "step": 271800 + }, + { + "epoch": 1.7433049099999038, + "grad_norm": 0.059311579912900925, + "learning_rate": 4.934432660601951e-07, + "loss": 0.0007, + "step": 271810 + }, + { + "epoch": 1.7433690468936898, + "grad_norm": 0.07273940742015839, + "learning_rate": 4.932008465467369e-07, + "loss": 0.0019, + "step": 271820 + }, + { + "epoch": 1.743433183787476, + "grad_norm": 0.04856349900364876, + "learning_rate": 4.929584835066914e-07, + "loss": 0.001, + "step": 271830 + }, + { + "epoch": 1.743497320681262, + "grad_norm": 0.11532484740018845, + "learning_rate": 4.927161769430989e-07, + "loss": 0.0014, + "step": 271840 + }, + { + "epoch": 1.7435614575750482, + "grad_norm": 0.08793105185031891, + "learning_rate": 4.924739268589934e-07, + "loss": 0.0012, + "step": 271850 + }, + { + "epoch": 1.7436255944688344, + "grad_norm": 0.057791031897068024, + "learning_rate": 4.922317332574117e-07, + "loss": 0.0022, + "step": 271860 + }, + { + "epoch": 1.7436897313626205, + "grad_norm": 0.2020353376865387, + "learning_rate": 4.919895961413867e-07, + "loss": 0.0021, + "step": 271870 + }, + { + "epoch": 1.7437538682564064, + "grad_norm": 0.1663372665643692, + "learning_rate": 4.917475155139545e-07, + "loss": 0.0013, + "step": 271880 + }, + { + "epoch": 1.7438180051501926, + "grad_norm": 0.1889677196741104, + "learning_rate": 4.915054913781486e-07, + "loss": 0.001, + "step": 271890 + }, + { + "epoch": 1.7438821420439785, + "grad_norm": 0.1728479266166687, + "learning_rate": 4.912635237370006e-07, + "loss": 0.0012, + "step": 271900 + }, + { + "epoch": 1.7439462789377647, + "grad_norm": 0.06438399851322174, + "learning_rate": 4.910216125935424e-07, + "loss": 0.0007, + "step": 271910 + }, + { + "epoch": 1.7440104158315508, + "grad_norm": 0.06103010103106499, + "learning_rate": 4.907797579508066e-07, + "loss": 0.0005, + "step": 271920 + }, + { + "epoch": 1.744074552725337, + "grad_norm": 0.04396438226103783, + "learning_rate": 4.905379598118221e-07, + "loss": 0.0013, + "step": 271930 + }, + { + "epoch": 1.744138689619123, + "grad_norm": 0.4433193504810333, + "learning_rate": 4.902962181796211e-07, + "loss": 0.0027, + "step": 271940 + }, + { + "epoch": 1.7442028265129093, + "grad_norm": 0.006359719205647707, + "learning_rate": 4.900545330572304e-07, + "loss": 0.0004, + "step": 271950 + }, + { + "epoch": 1.7442669634066954, + "grad_norm": 0.060575541108846664, + "learning_rate": 4.898129044476802e-07, + "loss": 0.0022, + "step": 271960 + }, + { + "epoch": 1.7443311003004813, + "grad_norm": 0.032161895185709, + "learning_rate": 4.89571332353998e-07, + "loss": 0.001, + "step": 271970 + }, + { + "epoch": 1.7443952371942675, + "grad_norm": 0.043320585042238235, + "learning_rate": 4.89329816779211e-07, + "loss": 0.0019, + "step": 271980 + }, + { + "epoch": 1.7444593740880534, + "grad_norm": 0.0536777563393116, + "learning_rate": 4.890883577263439e-07, + "loss": 0.0005, + "step": 271990 + }, + { + "epoch": 1.7445235109818396, + "grad_norm": 0.011271800845861435, + "learning_rate": 4.888469551984243e-07, + "loss": 0.0005, + "step": 272000 + }, + { + "epoch": 1.7445876478756257, + "grad_norm": 0.08536845445632935, + "learning_rate": 4.886056091984764e-07, + "loss": 0.002, + "step": 272010 + }, + { + "epoch": 1.7446517847694119, + "grad_norm": 0.015522822737693787, + "learning_rate": 4.883643197295246e-07, + "loss": 0.0009, + "step": 272020 + }, + { + "epoch": 1.744715921663198, + "grad_norm": 0.09444174915552139, + "learning_rate": 4.881230867945913e-07, + "loss": 0.001, + "step": 272030 + }, + { + "epoch": 1.7447800585569841, + "grad_norm": 0.05753888562321663, + "learning_rate": 4.878819103967014e-07, + "loss": 0.0024, + "step": 272040 + }, + { + "epoch": 1.74484419545077, + "grad_norm": 0.130371555685997, + "learning_rate": 4.876407905388758e-07, + "loss": 0.0007, + "step": 272050 + }, + { + "epoch": 1.7449083323445562, + "grad_norm": 0.13810470700263977, + "learning_rate": 4.87399727224136e-07, + "loss": 0.0006, + "step": 272060 + }, + { + "epoch": 1.7449724692383424, + "grad_norm": 0.10591180622577667, + "learning_rate": 4.871587204555018e-07, + "loss": 0.0013, + "step": 272070 + }, + { + "epoch": 1.7450366061321283, + "grad_norm": 0.1073022112250328, + "learning_rate": 4.869177702359951e-07, + "loss": 0.0008, + "step": 272080 + }, + { + "epoch": 1.7451007430259144, + "grad_norm": 0.036559127271175385, + "learning_rate": 4.866768765686342e-07, + "loss": 0.0016, + "step": 272090 + }, + { + "epoch": 1.7451648799197006, + "grad_norm": 0.008580600842833519, + "learning_rate": 4.864360394564366e-07, + "loss": 0.0006, + "step": 272100 + }, + { + "epoch": 1.7452290168134867, + "grad_norm": 0.06898060441017151, + "learning_rate": 4.861952589024222e-07, + "loss": 0.0015, + "step": 272110 + }, + { + "epoch": 1.745293153707273, + "grad_norm": 0.060005057603120804, + "learning_rate": 4.859545349096073e-07, + "loss": 0.0022, + "step": 272120 + }, + { + "epoch": 1.745357290601059, + "grad_norm": 0.04038242995738983, + "learning_rate": 4.857138674810081e-07, + "loss": 0.0025, + "step": 272130 + }, + { + "epoch": 1.745421427494845, + "grad_norm": 0.10667458921670914, + "learning_rate": 4.854732566196397e-07, + "loss": 0.0015, + "step": 272140 + }, + { + "epoch": 1.7454855643886311, + "grad_norm": 0.10232573002576828, + "learning_rate": 4.852327023285186e-07, + "loss": 0.0021, + "step": 272150 + }, + { + "epoch": 1.745549701282417, + "grad_norm": 0.06841970235109329, + "learning_rate": 4.849922046106581e-07, + "loss": 0.0015, + "step": 272160 + }, + { + "epoch": 1.7456138381762032, + "grad_norm": 0.04854750633239746, + "learning_rate": 4.847517634690729e-07, + "loss": 0.0006, + "step": 272170 + }, + { + "epoch": 1.7456779750699893, + "grad_norm": 0.05373238027095795, + "learning_rate": 4.845113789067735e-07, + "loss": 0.0005, + "step": 272180 + }, + { + "epoch": 1.7457421119637755, + "grad_norm": 0.031732819974422455, + "learning_rate": 4.84271050926775e-07, + "loss": 0.0013, + "step": 272190 + }, + { + "epoch": 1.7458062488575616, + "grad_norm": 0.0857602208852768, + "learning_rate": 4.840307795320876e-07, + "loss": 0.0016, + "step": 272200 + }, + { + "epoch": 1.7458703857513478, + "grad_norm": 0.064203180372715, + "learning_rate": 4.837905647257207e-07, + "loss": 0.0048, + "step": 272210 + }, + { + "epoch": 1.745934522645134, + "grad_norm": 0.1207960918545723, + "learning_rate": 4.835504065106872e-07, + "loss": 0.0008, + "step": 272220 + }, + { + "epoch": 1.7459986595389199, + "grad_norm": 0.024911632761359215, + "learning_rate": 4.833103048899946e-07, + "loss": 0.0013, + "step": 272230 + }, + { + "epoch": 1.746062796432706, + "grad_norm": 0.052240040153265, + "learning_rate": 4.83070259866652e-07, + "loss": 0.0006, + "step": 272240 + }, + { + "epoch": 1.746126933326492, + "grad_norm": 0.3158170282840729, + "learning_rate": 4.828302714436661e-07, + "loss": 0.0017, + "step": 272250 + }, + { + "epoch": 1.746191070220278, + "grad_norm": 0.10014260560274124, + "learning_rate": 4.825903396240461e-07, + "loss": 0.0012, + "step": 272260 + }, + { + "epoch": 1.7462552071140642, + "grad_norm": 0.06072854995727539, + "learning_rate": 4.823504644107985e-07, + "loss": 0.0006, + "step": 272270 + }, + { + "epoch": 1.7463193440078504, + "grad_norm": 0.018507638946175575, + "learning_rate": 4.821106458069275e-07, + "loss": 0.0012, + "step": 272280 + }, + { + "epoch": 1.7463834809016365, + "grad_norm": 0.023834386840462685, + "learning_rate": 4.818708838154384e-07, + "loss": 0.0008, + "step": 272290 + }, + { + "epoch": 1.7464476177954227, + "grad_norm": 0.07017380744218826, + "learning_rate": 4.816311784393368e-07, + "loss": 0.0008, + "step": 272300 + }, + { + "epoch": 1.7465117546892086, + "grad_norm": 0.07759445160627365, + "learning_rate": 4.813915296816263e-07, + "loss": 0.0034, + "step": 272310 + }, + { + "epoch": 1.7465758915829948, + "grad_norm": 0.025435132905840874, + "learning_rate": 4.811519375453089e-07, + "loss": 0.0004, + "step": 272320 + }, + { + "epoch": 1.7466400284767807, + "grad_norm": 0.12056136876344681, + "learning_rate": 4.809124020333867e-07, + "loss": 0.0012, + "step": 272330 + }, + { + "epoch": 1.7467041653705668, + "grad_norm": 0.1359078586101532, + "learning_rate": 4.806729231488622e-07, + "loss": 0.0011, + "step": 272340 + }, + { + "epoch": 1.746768302264353, + "grad_norm": 0.008710467256605625, + "learning_rate": 4.804335008947364e-07, + "loss": 0.0009, + "step": 272350 + }, + { + "epoch": 1.7468324391581391, + "grad_norm": 0.15154539048671722, + "learning_rate": 4.80194135274008e-07, + "loss": 0.0006, + "step": 272360 + }, + { + "epoch": 1.7468965760519253, + "grad_norm": 0.16552463173866272, + "learning_rate": 4.799548262896781e-07, + "loss": 0.0014, + "step": 272370 + }, + { + "epoch": 1.7469607129457114, + "grad_norm": 0.05478297173976898, + "learning_rate": 4.797155739447445e-07, + "loss": 0.0007, + "step": 272380 + }, + { + "epoch": 1.7470248498394976, + "grad_norm": 0.010676765814423561, + "learning_rate": 4.794763782422057e-07, + "loss": 0.0006, + "step": 272390 + }, + { + "epoch": 1.7470889867332835, + "grad_norm": 0.06147603318095207, + "learning_rate": 4.792372391850575e-07, + "loss": 0.001, + "step": 272400 + }, + { + "epoch": 1.7471531236270696, + "grad_norm": 0.06963121891021729, + "learning_rate": 4.789981567762986e-07, + "loss": 0.001, + "step": 272410 + }, + { + "epoch": 1.7472172605208556, + "grad_norm": 0.2152966409921646, + "learning_rate": 4.787591310189238e-07, + "loss": 0.0009, + "step": 272420 + }, + { + "epoch": 1.7472813974146417, + "grad_norm": 0.1609608232975006, + "learning_rate": 4.785201619159285e-07, + "loss": 0.0017, + "step": 272430 + }, + { + "epoch": 1.7473455343084279, + "grad_norm": 0.006035391241312027, + "learning_rate": 4.782812494703066e-07, + "loss": 0.0006, + "step": 272440 + }, + { + "epoch": 1.747409671202214, + "grad_norm": 0.06160098686814308, + "learning_rate": 4.780423936850526e-07, + "loss": 0.0009, + "step": 272450 + }, + { + "epoch": 1.7474738080960002, + "grad_norm": 0.06009820103645325, + "learning_rate": 4.778035945631593e-07, + "loss": 0.0013, + "step": 272460 + }, + { + "epoch": 1.7475379449897863, + "grad_norm": 0.03193667158484459, + "learning_rate": 4.775648521076187e-07, + "loss": 0.0011, + "step": 272470 + }, + { + "epoch": 1.7476020818835722, + "grad_norm": 0.11648708581924438, + "learning_rate": 4.773261663214218e-07, + "loss": 0.0023, + "step": 272480 + }, + { + "epoch": 1.7476662187773584, + "grad_norm": 0.056543439626693726, + "learning_rate": 4.770875372075618e-07, + "loss": 0.0007, + "step": 272490 + }, + { + "epoch": 1.7477303556711445, + "grad_norm": 0.0710235983133316, + "learning_rate": 4.768489647690266e-07, + "loss": 0.006, + "step": 272500 + }, + { + "epoch": 1.7477944925649305, + "grad_norm": 0.06654216349124908, + "learning_rate": 4.7661044900880636e-07, + "loss": 0.0006, + "step": 272510 + }, + { + "epoch": 1.7478586294587166, + "grad_norm": 0.3334129750728607, + "learning_rate": 4.7637198992989075e-07, + "loss": 0.0011, + "step": 272520 + }, + { + "epoch": 1.7479227663525028, + "grad_norm": 0.06493246555328369, + "learning_rate": 4.7613358753526686e-07, + "loss": 0.0009, + "step": 272530 + }, + { + "epoch": 1.747986903246289, + "grad_norm": 0.04161922633647919, + "learning_rate": 4.7589524182792226e-07, + "loss": 0.0007, + "step": 272540 + }, + { + "epoch": 1.748051040140075, + "grad_norm": 0.02677379921078682, + "learning_rate": 4.756569528108429e-07, + "loss": 0.0004, + "step": 272550 + }, + { + "epoch": 1.7481151770338612, + "grad_norm": 0.002950383350253105, + "learning_rate": 4.7541872048701585e-07, + "loss": 0.0009, + "step": 272560 + }, + { + "epoch": 1.7481793139276471, + "grad_norm": 0.06629134714603424, + "learning_rate": 4.7518054485942653e-07, + "loss": 0.0011, + "step": 272570 + }, + { + "epoch": 1.7482434508214333, + "grad_norm": 0.022829001769423485, + "learning_rate": 4.7494242593105867e-07, + "loss": 0.0006, + "step": 272580 + }, + { + "epoch": 1.7483075877152192, + "grad_norm": 0.020911620929837227, + "learning_rate": 4.7470436370489535e-07, + "loss": 0.0003, + "step": 272590 + }, + { + "epoch": 1.7483717246090054, + "grad_norm": 0.040540631860494614, + "learning_rate": 4.7446635818392093e-07, + "loss": 0.0008, + "step": 272600 + }, + { + "epoch": 1.7484358615027915, + "grad_norm": 0.06236898899078369, + "learning_rate": 4.7422840937111804e-07, + "loss": 0.0011, + "step": 272610 + }, + { + "epoch": 1.7484999983965777, + "grad_norm": 0.0678824707865715, + "learning_rate": 4.7399051726946754e-07, + "loss": 0.0009, + "step": 272620 + }, + { + "epoch": 1.7485641352903638, + "grad_norm": 0.00921363290399313, + "learning_rate": 4.7375268188194934e-07, + "loss": 0.0008, + "step": 272630 + }, + { + "epoch": 1.74862827218415, + "grad_norm": 0.3204735815525055, + "learning_rate": 4.73514903211546e-07, + "loss": 0.0032, + "step": 272640 + }, + { + "epoch": 1.748692409077936, + "grad_norm": 0.03617285192012787, + "learning_rate": 4.7327718126123577e-07, + "loss": 0.0015, + "step": 272650 + }, + { + "epoch": 1.748756545971722, + "grad_norm": 0.022229550406336784, + "learning_rate": 4.7303951603399736e-07, + "loss": 0.0006, + "step": 272660 + }, + { + "epoch": 1.7488206828655082, + "grad_norm": 0.006759267766028643, + "learning_rate": 4.7280190753280943e-07, + "loss": 0.0006, + "step": 272670 + }, + { + "epoch": 1.748884819759294, + "grad_norm": 0.1989506185054779, + "learning_rate": 4.7256435576064963e-07, + "loss": 0.0006, + "step": 272680 + }, + { + "epoch": 1.7489489566530803, + "grad_norm": 0.06484976410865784, + "learning_rate": 4.7232686072049394e-07, + "loss": 0.0009, + "step": 272690 + }, + { + "epoch": 1.7490130935468664, + "grad_norm": 0.0020673118997365236, + "learning_rate": 4.720894224153172e-07, + "loss": 0.001, + "step": 272700 + }, + { + "epoch": 1.7490772304406526, + "grad_norm": 0.10880999267101288, + "learning_rate": 4.718520408480964e-07, + "loss": 0.0019, + "step": 272710 + }, + { + "epoch": 1.7491413673344387, + "grad_norm": 0.40636178851127625, + "learning_rate": 4.716147160218071e-07, + "loss": 0.001, + "step": 272720 + }, + { + "epoch": 1.7492055042282249, + "grad_norm": 0.316981703042984, + "learning_rate": 4.713774479394212e-07, + "loss": 0.0019, + "step": 272730 + }, + { + "epoch": 1.7492696411220108, + "grad_norm": 0.11160846799612045, + "learning_rate": 4.711402366039125e-07, + "loss": 0.0017, + "step": 272740 + }, + { + "epoch": 1.749333778015797, + "grad_norm": 0.04493987560272217, + "learning_rate": 4.7090308201825305e-07, + "loss": 0.0008, + "step": 272750 + }, + { + "epoch": 1.7493979149095829, + "grad_norm": 0.06282531470060349, + "learning_rate": 4.70665984185415e-07, + "loss": 0.0007, + "step": 272760 + }, + { + "epoch": 1.749462051803369, + "grad_norm": 0.08844546228647232, + "learning_rate": 4.7042894310836975e-07, + "loss": 0.0009, + "step": 272770 + }, + { + "epoch": 1.7495261886971551, + "grad_norm": 0.06802531331777573, + "learning_rate": 4.7019195879008603e-07, + "loss": 0.001, + "step": 272780 + }, + { + "epoch": 1.7495903255909413, + "grad_norm": 0.15322864055633545, + "learning_rate": 4.699550312335355e-07, + "loss": 0.001, + "step": 272790 + }, + { + "epoch": 1.7496544624847274, + "grad_norm": 0.13658012449741364, + "learning_rate": 4.697181604416856e-07, + "loss": 0.0004, + "step": 272800 + }, + { + "epoch": 1.7497185993785136, + "grad_norm": 0.07306113094091415, + "learning_rate": 4.694813464175052e-07, + "loss": 0.0008, + "step": 272810 + }, + { + "epoch": 1.7497827362722997, + "grad_norm": 0.03323390707373619, + "learning_rate": 4.6924458916396076e-07, + "loss": 0.0014, + "step": 272820 + }, + { + "epoch": 1.7498468731660857, + "grad_norm": 0.1885232925415039, + "learning_rate": 4.690078886840199e-07, + "loss": 0.0016, + "step": 272830 + }, + { + "epoch": 1.7499110100598718, + "grad_norm": 0.09597447514533997, + "learning_rate": 4.687712449806492e-07, + "loss": 0.0012, + "step": 272840 + }, + { + "epoch": 1.7499751469536577, + "grad_norm": 0.2891257703304291, + "learning_rate": 4.6853465805681287e-07, + "loss": 0.0015, + "step": 272850 + }, + { + "epoch": 1.750039283847444, + "grad_norm": 0.1429951786994934, + "learning_rate": 4.682981279154747e-07, + "loss": 0.0007, + "step": 272860 + }, + { + "epoch": 1.75010342074123, + "grad_norm": 0.01997167058289051, + "learning_rate": 4.680616545596012e-07, + "loss": 0.0009, + "step": 272870 + }, + { + "epoch": 1.7501675576350162, + "grad_norm": 0.04769844189286232, + "learning_rate": 4.6782523799215384e-07, + "loss": 0.0009, + "step": 272880 + }, + { + "epoch": 1.7502316945288023, + "grad_norm": 0.07950573414564133, + "learning_rate": 4.6758887821609534e-07, + "loss": 0.0007, + "step": 272890 + }, + { + "epoch": 1.7502958314225885, + "grad_norm": 0.06623969227075577, + "learning_rate": 4.673525752343866e-07, + "loss": 0.0005, + "step": 272900 + }, + { + "epoch": 1.7503599683163744, + "grad_norm": 0.03988812863826752, + "learning_rate": 4.671163290499903e-07, + "loss": 0.0005, + "step": 272910 + }, + { + "epoch": 1.7504241052101606, + "grad_norm": 0.11533651500940323, + "learning_rate": 4.668801396658662e-07, + "loss": 0.001, + "step": 272920 + }, + { + "epoch": 1.7504882421039467, + "grad_norm": 0.08430314064025879, + "learning_rate": 4.666440070849726e-07, + "loss": 0.0008, + "step": 272930 + }, + { + "epoch": 1.7505523789977326, + "grad_norm": 0.11702308803796768, + "learning_rate": 4.664079313102704e-07, + "loss": 0.0016, + "step": 272940 + }, + { + "epoch": 1.7506165158915188, + "grad_norm": 0.051689263433218, + "learning_rate": 4.6617191234471726e-07, + "loss": 0.0005, + "step": 272950 + }, + { + "epoch": 1.750680652785305, + "grad_norm": 0.09569631516933441, + "learning_rate": 4.6593595019127023e-07, + "loss": 0.0018, + "step": 272960 + }, + { + "epoch": 1.750744789679091, + "grad_norm": 0.09990435838699341, + "learning_rate": 4.657000448528848e-07, + "loss": 0.0012, + "step": 272970 + }, + { + "epoch": 1.7508089265728772, + "grad_norm": 0.09022072702646255, + "learning_rate": 4.6546419633252016e-07, + "loss": 0.0015, + "step": 272980 + }, + { + "epoch": 1.7508730634666634, + "grad_norm": 0.20424622297286987, + "learning_rate": 4.6522840463312904e-07, + "loss": 0.0007, + "step": 272990 + }, + { + "epoch": 1.7509372003604493, + "grad_norm": 0.18562482297420502, + "learning_rate": 4.649926697576679e-07, + "loss": 0.0006, + "step": 273000 + }, + { + "epoch": 1.7510013372542355, + "grad_norm": 0.038311198353767395, + "learning_rate": 4.6475699170908826e-07, + "loss": 0.0007, + "step": 273010 + }, + { + "epoch": 1.7510654741480214, + "grad_norm": 0.040191225707530975, + "learning_rate": 4.645213704903456e-07, + "loss": 0.0007, + "step": 273020 + }, + { + "epoch": 1.7511296110418075, + "grad_norm": 0.011406010948121548, + "learning_rate": 4.64285806104392e-07, + "loss": 0.0021, + "step": 273030 + }, + { + "epoch": 1.7511937479355937, + "grad_norm": 0.08132412284612656, + "learning_rate": 4.640502985541784e-07, + "loss": 0.0004, + "step": 273040 + }, + { + "epoch": 1.7512578848293798, + "grad_norm": 0.07212943583726883, + "learning_rate": 4.638148478426557e-07, + "loss": 0.0013, + "step": 273050 + }, + { + "epoch": 1.751322021723166, + "grad_norm": 0.055701203644275665, + "learning_rate": 4.6357945397277615e-07, + "loss": 0.0009, + "step": 273060 + }, + { + "epoch": 1.7513861586169521, + "grad_norm": 0.10213173925876617, + "learning_rate": 4.6334411694748785e-07, + "loss": 0.0013, + "step": 273070 + }, + { + "epoch": 1.7514502955107383, + "grad_norm": 0.047767817974090576, + "learning_rate": 4.6310883676973895e-07, + "loss": 0.0006, + "step": 273080 + }, + { + "epoch": 1.7515144324045242, + "grad_norm": 0.07404826581478119, + "learning_rate": 4.6287361344247995e-07, + "loss": 0.001, + "step": 273090 + }, + { + "epoch": 1.7515785692983104, + "grad_norm": 0.014767042361199856, + "learning_rate": 4.6263844696865734e-07, + "loss": 0.0028, + "step": 273100 + }, + { + "epoch": 1.7516427061920963, + "grad_norm": 0.06555477529764175, + "learning_rate": 4.6240333735121765e-07, + "loss": 0.0009, + "step": 273110 + }, + { + "epoch": 1.7517068430858824, + "grad_norm": 0.028044844046235085, + "learning_rate": 4.621682845931064e-07, + "loss": 0.0039, + "step": 273120 + }, + { + "epoch": 1.7517709799796686, + "grad_norm": 0.06414242833852768, + "learning_rate": 4.619332886972699e-07, + "loss": 0.0009, + "step": 273130 + }, + { + "epoch": 1.7518351168734547, + "grad_norm": 0.29313191771507263, + "learning_rate": 4.6169834966665326e-07, + "loss": 0.0013, + "step": 273140 + }, + { + "epoch": 1.7518992537672409, + "grad_norm": 0.2261309027671814, + "learning_rate": 4.6146346750419957e-07, + "loss": 0.0013, + "step": 273150 + }, + { + "epoch": 1.751963390661027, + "grad_norm": 0.0017110253684222698, + "learning_rate": 4.6122864221285144e-07, + "loss": 0.0011, + "step": 273160 + }, + { + "epoch": 1.752027527554813, + "grad_norm": 0.11952899396419525, + "learning_rate": 4.6099387379555327e-07, + "loss": 0.0017, + "step": 273170 + }, + { + "epoch": 1.752091664448599, + "grad_norm": 0.026024747639894485, + "learning_rate": 4.607591622552454e-07, + "loss": 0.0005, + "step": 273180 + }, + { + "epoch": 1.752155801342385, + "grad_norm": 0.022143952548503876, + "learning_rate": 4.6052450759487e-07, + "loss": 0.0012, + "step": 273190 + }, + { + "epoch": 1.7522199382361712, + "grad_norm": 0.049013178795576096, + "learning_rate": 4.6028990981736577e-07, + "loss": 0.0023, + "step": 273200 + }, + { + "epoch": 1.7522840751299573, + "grad_norm": 0.05707814171910286, + "learning_rate": 4.600553689256737e-07, + "loss": 0.0027, + "step": 273210 + }, + { + "epoch": 1.7523482120237435, + "grad_norm": 0.23792365193367004, + "learning_rate": 4.5982088492273316e-07, + "loss": 0.001, + "step": 273220 + }, + { + "epoch": 1.7524123489175296, + "grad_norm": 0.09483388811349869, + "learning_rate": 4.595864578114806e-07, + "loss": 0.0013, + "step": 273230 + }, + { + "epoch": 1.7524764858113158, + "grad_norm": 0.0022677434608340263, + "learning_rate": 4.5935208759485593e-07, + "loss": 0.0006, + "step": 273240 + }, + { + "epoch": 1.752540622705102, + "grad_norm": 0.04843544960021973, + "learning_rate": 4.5911777427579464e-07, + "loss": 0.0014, + "step": 273250 + }, + { + "epoch": 1.7526047595988878, + "grad_norm": 0.0260971337556839, + "learning_rate": 4.5888351785723264e-07, + "loss": 0.0011, + "step": 273260 + }, + { + "epoch": 1.752668896492674, + "grad_norm": 0.04305571690201759, + "learning_rate": 4.586493183421048e-07, + "loss": 0.0025, + "step": 273270 + }, + { + "epoch": 1.75273303338646, + "grad_norm": 0.011727412231266499, + "learning_rate": 4.584151757333477e-07, + "loss": 0.0007, + "step": 273280 + }, + { + "epoch": 1.752797170280246, + "grad_norm": 0.07655268907546997, + "learning_rate": 4.58181090033894e-07, + "loss": 0.0008, + "step": 273290 + }, + { + "epoch": 1.7528613071740322, + "grad_norm": 0.10501454025506973, + "learning_rate": 4.57947061246678e-07, + "loss": 0.0009, + "step": 273300 + }, + { + "epoch": 1.7529254440678184, + "grad_norm": 0.016707416623830795, + "learning_rate": 4.5771308937462957e-07, + "loss": 0.0017, + "step": 273310 + }, + { + "epoch": 1.7529895809616045, + "grad_norm": 0.03645702451467514, + "learning_rate": 4.574791744206841e-07, + "loss": 0.001, + "step": 273320 + }, + { + "epoch": 1.7530537178553907, + "grad_norm": 0.09454400092363358, + "learning_rate": 4.5724531638777105e-07, + "loss": 0.0007, + "step": 273330 + }, + { + "epoch": 1.7531178547491768, + "grad_norm": 0.05410655215382576, + "learning_rate": 4.5701151527882014e-07, + "loss": 0.0008, + "step": 273340 + }, + { + "epoch": 1.7531819916429627, + "grad_norm": 0.040957093238830566, + "learning_rate": 4.5677777109676134e-07, + "loss": 0.0014, + "step": 273350 + }, + { + "epoch": 1.7532461285367489, + "grad_norm": 0.04725410416722298, + "learning_rate": 4.5654408384452507e-07, + "loss": 0.0009, + "step": 273360 + }, + { + "epoch": 1.7533102654305348, + "grad_norm": 0.043523017317056656, + "learning_rate": 4.5631045352503844e-07, + "loss": 0.004, + "step": 273370 + }, + { + "epoch": 1.753374402324321, + "grad_norm": 0.04890415817499161, + "learning_rate": 4.560768801412285e-07, + "loss": 0.0015, + "step": 273380 + }, + { + "epoch": 1.753438539218107, + "grad_norm": 0.12464313954114914, + "learning_rate": 4.5584336369602355e-07, + "loss": 0.0012, + "step": 273390 + }, + { + "epoch": 1.7535026761118933, + "grad_norm": 0.07013463228940964, + "learning_rate": 4.556099041923484e-07, + "loss": 0.0015, + "step": 273400 + }, + { + "epoch": 1.7535668130056794, + "grad_norm": 0.09630367159843445, + "learning_rate": 4.553765016331296e-07, + "loss": 0.0013, + "step": 273410 + }, + { + "epoch": 1.7536309498994656, + "grad_norm": 0.018023649230599403, + "learning_rate": 4.551431560212899e-07, + "loss": 0.0008, + "step": 273420 + }, + { + "epoch": 1.7536950867932515, + "grad_norm": 0.07699010521173477, + "learning_rate": 4.5490986735975573e-07, + "loss": 0.0006, + "step": 273430 + }, + { + "epoch": 1.7537592236870376, + "grad_norm": 0.02094586379826069, + "learning_rate": 4.5467663565144924e-07, + "loss": 0.0018, + "step": 273440 + }, + { + "epoch": 1.7538233605808236, + "grad_norm": 0.10399764031171799, + "learning_rate": 4.544434608992931e-07, + "loss": 0.0015, + "step": 273450 + }, + { + "epoch": 1.7538874974746097, + "grad_norm": 0.013622358441352844, + "learning_rate": 4.5421034310620784e-07, + "loss": 0.0016, + "step": 273460 + }, + { + "epoch": 1.7539516343683959, + "grad_norm": 0.04761362448334694, + "learning_rate": 4.5397728227511596e-07, + "loss": 0.0005, + "step": 273470 + }, + { + "epoch": 1.754015771262182, + "grad_norm": 0.003122537164017558, + "learning_rate": 4.537442784089391e-07, + "loss": 0.001, + "step": 273480 + }, + { + "epoch": 1.7540799081559681, + "grad_norm": 0.31933242082595825, + "learning_rate": 4.535113315105949e-07, + "loss": 0.0015, + "step": 273490 + }, + { + "epoch": 1.7541440450497543, + "grad_norm": 0.09475366771221161, + "learning_rate": 4.532784415830027e-07, + "loss": 0.001, + "step": 273500 + }, + { + "epoch": 1.7542081819435404, + "grad_norm": 0.1212468072772026, + "learning_rate": 4.530456086290813e-07, + "loss": 0.0012, + "step": 273510 + }, + { + "epoch": 1.7542723188373264, + "grad_norm": 0.005724911577999592, + "learning_rate": 4.528128326517489e-07, + "loss": 0.0005, + "step": 273520 + }, + { + "epoch": 1.7543364557311125, + "grad_norm": 0.027791861444711685, + "learning_rate": 4.5258011365392096e-07, + "loss": 0.0006, + "step": 273530 + }, + { + "epoch": 1.7544005926248984, + "grad_norm": 0.09632658958435059, + "learning_rate": 4.5234745163851346e-07, + "loss": 0.0011, + "step": 273540 + }, + { + "epoch": 1.7544647295186846, + "grad_norm": 0.08549169450998306, + "learning_rate": 4.521148466084435e-07, + "loss": 0.0007, + "step": 273550 + }, + { + "epoch": 1.7545288664124707, + "grad_norm": 0.1568332314491272, + "learning_rate": 4.5188229856662493e-07, + "loss": 0.0008, + "step": 273560 + }, + { + "epoch": 1.754593003306257, + "grad_norm": 0.09158727526664734, + "learning_rate": 4.5164980751597143e-07, + "loss": 0.0009, + "step": 273570 + }, + { + "epoch": 1.754657140200043, + "grad_norm": 0.06931840628385544, + "learning_rate": 4.5141737345939573e-07, + "loss": 0.0009, + "step": 273580 + }, + { + "epoch": 1.7547212770938292, + "grad_norm": 0.00592627702280879, + "learning_rate": 4.5118499639981216e-07, + "loss": 0.0007, + "step": 273590 + }, + { + "epoch": 1.7547854139876151, + "grad_norm": 0.15602008998394012, + "learning_rate": 4.509526763401312e-07, + "loss": 0.0013, + "step": 273600 + }, + { + "epoch": 1.7548495508814013, + "grad_norm": 0.01364865992218256, + "learning_rate": 4.507204132832649e-07, + "loss": 0.001, + "step": 273610 + }, + { + "epoch": 1.7549136877751874, + "grad_norm": 0.020498182624578476, + "learning_rate": 4.504882072321215e-07, + "loss": 0.0012, + "step": 273620 + }, + { + "epoch": 1.7549778246689733, + "grad_norm": 0.046825554221868515, + "learning_rate": 4.5025605818961373e-07, + "loss": 0.0014, + "step": 273630 + }, + { + "epoch": 1.7550419615627595, + "grad_norm": 0.11790932714939117, + "learning_rate": 4.500239661586492e-07, + "loss": 0.001, + "step": 273640 + }, + { + "epoch": 1.7551060984565456, + "grad_norm": 0.018157294020056725, + "learning_rate": 4.4979193114213506e-07, + "loss": 0.0015, + "step": 273650 + }, + { + "epoch": 1.7551702353503318, + "grad_norm": 0.06198253110051155, + "learning_rate": 4.4955995314298117e-07, + "loss": 0.0038, + "step": 273660 + }, + { + "epoch": 1.755234372244118, + "grad_norm": 0.22804534435272217, + "learning_rate": 4.493280321640936e-07, + "loss": 0.0015, + "step": 273670 + }, + { + "epoch": 1.755298509137904, + "grad_norm": 0.0053673069924116135, + "learning_rate": 4.490961682083772e-07, + "loss": 0.0006, + "step": 273680 + }, + { + "epoch": 1.75536264603169, + "grad_norm": 0.04484537988901138, + "learning_rate": 4.48864361278738e-07, + "loss": 0.0008, + "step": 273690 + }, + { + "epoch": 1.7554267829254762, + "grad_norm": 0.16247770190238953, + "learning_rate": 4.48632611378082e-07, + "loss": 0.0011, + "step": 273700 + }, + { + "epoch": 1.755490919819262, + "grad_norm": 0.07464831322431564, + "learning_rate": 4.4840091850931187e-07, + "loss": 0.0031, + "step": 273710 + }, + { + "epoch": 1.7555550567130482, + "grad_norm": 0.04992212727665901, + "learning_rate": 4.4816928267533144e-07, + "loss": 0.0033, + "step": 273720 + }, + { + "epoch": 1.7556191936068344, + "grad_norm": 0.04683858901262283, + "learning_rate": 4.479377038790417e-07, + "loss": 0.0006, + "step": 273730 + }, + { + "epoch": 1.7556833305006205, + "grad_norm": 0.03431175649166107, + "learning_rate": 4.4770618212334695e-07, + "loss": 0.0013, + "step": 273740 + }, + { + "epoch": 1.7557474673944067, + "grad_norm": 0.00959078874439001, + "learning_rate": 4.4747471741114713e-07, + "loss": 0.0006, + "step": 273750 + }, + { + "epoch": 1.7558116042881928, + "grad_norm": 0.3120195269584656, + "learning_rate": 4.4724330974534327e-07, + "loss": 0.0016, + "step": 273760 + }, + { + "epoch": 1.755875741181979, + "grad_norm": 0.20491768419742584, + "learning_rate": 4.47011959128833e-07, + "loss": 0.0007, + "step": 273770 + }, + { + "epoch": 1.755939878075765, + "grad_norm": 0.06042253226041794, + "learning_rate": 4.4678066556451793e-07, + "loss": 0.0006, + "step": 273780 + }, + { + "epoch": 1.756004014969551, + "grad_norm": 0.08034548163414001, + "learning_rate": 4.4654942905529577e-07, + "loss": 0.0009, + "step": 273790 + }, + { + "epoch": 1.756068151863337, + "grad_norm": 0.039186157286167145, + "learning_rate": 4.463182496040619e-07, + "loss": 0.0014, + "step": 273800 + }, + { + "epoch": 1.7561322887571231, + "grad_norm": 0.13175533711910248, + "learning_rate": 4.4608712721371626e-07, + "loss": 0.0008, + "step": 273810 + }, + { + "epoch": 1.7561964256509093, + "grad_norm": 0.09573426097631454, + "learning_rate": 4.4585606188715325e-07, + "loss": 0.0022, + "step": 273820 + }, + { + "epoch": 1.7562605625446954, + "grad_norm": 0.09324001520872116, + "learning_rate": 4.456250536272688e-07, + "loss": 0.001, + "step": 273830 + }, + { + "epoch": 1.7563246994384816, + "grad_norm": 0.17717893421649933, + "learning_rate": 4.4539410243695624e-07, + "loss": 0.0009, + "step": 273840 + }, + { + "epoch": 1.7563888363322677, + "grad_norm": 0.06860344111919403, + "learning_rate": 4.451632083191121e-07, + "loss": 0.0028, + "step": 273850 + }, + { + "epoch": 1.7564529732260536, + "grad_norm": 0.07243098318576813, + "learning_rate": 4.449323712766279e-07, + "loss": 0.0023, + "step": 273860 + }, + { + "epoch": 1.7565171101198398, + "grad_norm": 0.1243899017572403, + "learning_rate": 4.4470159131239645e-07, + "loss": 0.0009, + "step": 273870 + }, + { + "epoch": 1.7565812470136257, + "grad_norm": 0.10625339299440384, + "learning_rate": 4.444708684293086e-07, + "loss": 0.0012, + "step": 273880 + }, + { + "epoch": 1.7566453839074119, + "grad_norm": 0.02705708146095276, + "learning_rate": 4.442402026302578e-07, + "loss": 0.0011, + "step": 273890 + }, + { + "epoch": 1.756709520801198, + "grad_norm": 0.008359714411199093, + "learning_rate": 4.4400959391813323e-07, + "loss": 0.0005, + "step": 273900 + }, + { + "epoch": 1.7567736576949842, + "grad_norm": 0.006910772528499365, + "learning_rate": 4.4377904229582434e-07, + "loss": 0.0014, + "step": 273910 + }, + { + "epoch": 1.7568377945887703, + "grad_norm": 0.12756577134132385, + "learning_rate": 4.435485477662199e-07, + "loss": 0.0021, + "step": 273920 + }, + { + "epoch": 1.7569019314825565, + "grad_norm": 0.2769004702568054, + "learning_rate": 4.433181103322093e-07, + "loss": 0.001, + "step": 273930 + }, + { + "epoch": 1.7569660683763426, + "grad_norm": 0.0014730626717209816, + "learning_rate": 4.4308772999667905e-07, + "loss": 0.0009, + "step": 273940 + }, + { + "epoch": 1.7570302052701285, + "grad_norm": 0.0016581410309299827, + "learning_rate": 4.428574067625158e-07, + "loss": 0.0011, + "step": 273950 + }, + { + "epoch": 1.7570943421639147, + "grad_norm": 0.26757508516311646, + "learning_rate": 4.426271406326066e-07, + "loss": 0.0026, + "step": 273960 + }, + { + "epoch": 1.7571584790577006, + "grad_norm": 0.01904025487601757, + "learning_rate": 4.4239693160983696e-07, + "loss": 0.0014, + "step": 273970 + }, + { + "epoch": 1.7572226159514868, + "grad_norm": 0.04785741865634918, + "learning_rate": 4.4216677969709076e-07, + "loss": 0.0013, + "step": 273980 + }, + { + "epoch": 1.757286752845273, + "grad_norm": 0.12253019213676453, + "learning_rate": 4.419366848972517e-07, + "loss": 0.0012, + "step": 273990 + }, + { + "epoch": 1.757350889739059, + "grad_norm": 0.1177787259221077, + "learning_rate": 4.4170664721320424e-07, + "loss": 0.0013, + "step": 274000 + }, + { + "epoch": 1.7574150266328452, + "grad_norm": 0.10684992372989655, + "learning_rate": 4.4147666664782984e-07, + "loss": 0.0008, + "step": 274010 + }, + { + "epoch": 1.7574791635266314, + "grad_norm": 0.012389772571623325, + "learning_rate": 4.412467432040113e-07, + "loss": 0.0015, + "step": 274020 + }, + { + "epoch": 1.7575433004204173, + "grad_norm": 0.036616671830415726, + "learning_rate": 4.4101687688462793e-07, + "loss": 0.0008, + "step": 274030 + }, + { + "epoch": 1.7576074373142034, + "grad_norm": 0.03530687093734741, + "learning_rate": 4.4078706769256243e-07, + "loss": 0.002, + "step": 274040 + }, + { + "epoch": 1.7576715742079896, + "grad_norm": 0.09993518888950348, + "learning_rate": 4.405573156306936e-07, + "loss": 0.0015, + "step": 274050 + }, + { + "epoch": 1.7577357111017755, + "grad_norm": 0.07789759337902069, + "learning_rate": 4.4032762070189917e-07, + "loss": 0.0012, + "step": 274060 + }, + { + "epoch": 1.7577998479955617, + "grad_norm": 0.11408476531505585, + "learning_rate": 4.40097982909059e-07, + "loss": 0.0005, + "step": 274070 + }, + { + "epoch": 1.7578639848893478, + "grad_norm": 0.07201806455850601, + "learning_rate": 4.3986840225505036e-07, + "loss": 0.0009, + "step": 274080 + }, + { + "epoch": 1.757928121783134, + "grad_norm": 0.1237541139125824, + "learning_rate": 4.3963887874274915e-07, + "loss": 0.001, + "step": 274090 + }, + { + "epoch": 1.75799225867692, + "grad_norm": 0.05876180902123451, + "learning_rate": 4.3940941237503143e-07, + "loss": 0.0008, + "step": 274100 + }, + { + "epoch": 1.7580563955707063, + "grad_norm": 0.030227554962038994, + "learning_rate": 4.3918000315477436e-07, + "loss": 0.001, + "step": 274110 + }, + { + "epoch": 1.7581205324644922, + "grad_norm": 0.018378805369138718, + "learning_rate": 4.3895065108485124e-07, + "loss": 0.0011, + "step": 274120 + }, + { + "epoch": 1.7581846693582783, + "grad_norm": 0.006053559482097626, + "learning_rate": 4.387213561681364e-07, + "loss": 0.0008, + "step": 274130 + }, + { + "epoch": 1.7582488062520643, + "grad_norm": 0.07609471678733826, + "learning_rate": 4.3849211840750137e-07, + "loss": 0.0013, + "step": 274140 + }, + { + "epoch": 1.7583129431458504, + "grad_norm": 0.13260923326015472, + "learning_rate": 4.3826293780582175e-07, + "loss": 0.0007, + "step": 274150 + }, + { + "epoch": 1.7583770800396366, + "grad_norm": 0.05366222932934761, + "learning_rate": 4.380338143659674e-07, + "loss": 0.001, + "step": 274160 + }, + { + "epoch": 1.7584412169334227, + "grad_norm": 0.0023712320253252983, + "learning_rate": 4.3780474809080985e-07, + "loss": 0.0012, + "step": 274170 + }, + { + "epoch": 1.7585053538272089, + "grad_norm": 0.10853615403175354, + "learning_rate": 4.3757573898321803e-07, + "loss": 0.0013, + "step": 274180 + }, + { + "epoch": 1.758569490720995, + "grad_norm": 0.046302780508995056, + "learning_rate": 4.373467870460646e-07, + "loss": 0.0014, + "step": 274190 + }, + { + "epoch": 1.7586336276147811, + "grad_norm": 0.045307353138923645, + "learning_rate": 4.3711789228221615e-07, + "loss": 0.0016, + "step": 274200 + }, + { + "epoch": 1.758697764508567, + "grad_norm": 0.08859158307313919, + "learning_rate": 4.368890546945409e-07, + "loss": 0.0011, + "step": 274210 + }, + { + "epoch": 1.7587619014023532, + "grad_norm": 0.015490715391933918, + "learning_rate": 4.3666027428590775e-07, + "loss": 0.0006, + "step": 274220 + }, + { + "epoch": 1.7588260382961391, + "grad_norm": 0.002372288377955556, + "learning_rate": 4.364315510591821e-07, + "loss": 0.0008, + "step": 274230 + }, + { + "epoch": 1.7588901751899253, + "grad_norm": 0.11505340039730072, + "learning_rate": 4.362028850172312e-07, + "loss": 0.0013, + "step": 274240 + }, + { + "epoch": 1.7589543120837114, + "grad_norm": 0.006366999354213476, + "learning_rate": 4.3597427616292044e-07, + "loss": 0.0012, + "step": 274250 + }, + { + "epoch": 1.7590184489774976, + "grad_norm": 0.0795503780245781, + "learning_rate": 4.3574572449911254e-07, + "loss": 0.0022, + "step": 274260 + }, + { + "epoch": 1.7590825858712837, + "grad_norm": 0.0016061549540609121, + "learning_rate": 4.3551723002867364e-07, + "loss": 0.0011, + "step": 274270 + }, + { + "epoch": 1.75914672276507, + "grad_norm": 0.016629310324788094, + "learning_rate": 4.352887927544658e-07, + "loss": 0.0013, + "step": 274280 + }, + { + "epoch": 1.7592108596588558, + "grad_norm": 0.3661215007305145, + "learning_rate": 4.350604126793523e-07, + "loss": 0.0012, + "step": 274290 + }, + { + "epoch": 1.759274996552642, + "grad_norm": 0.11058757454156876, + "learning_rate": 4.348320898061931e-07, + "loss": 0.0014, + "step": 274300 + }, + { + "epoch": 1.759339133446428, + "grad_norm": 0.0846678763628006, + "learning_rate": 4.346038241378514e-07, + "loss": 0.0009, + "step": 274310 + }, + { + "epoch": 1.759403270340214, + "grad_norm": 0.09430932998657227, + "learning_rate": 4.3437561567718667e-07, + "loss": 0.0016, + "step": 274320 + }, + { + "epoch": 1.7594674072340002, + "grad_norm": 0.20638717710971832, + "learning_rate": 4.3414746442705767e-07, + "loss": 0.0013, + "step": 274330 + }, + { + "epoch": 1.7595315441277863, + "grad_norm": 0.1938396543264389, + "learning_rate": 4.3391937039032496e-07, + "loss": 0.0011, + "step": 274340 + }, + { + "epoch": 1.7595956810215725, + "grad_norm": 0.05062471330165863, + "learning_rate": 4.3369133356984563e-07, + "loss": 0.0023, + "step": 274350 + }, + { + "epoch": 1.7596598179153586, + "grad_norm": 0.13887950778007507, + "learning_rate": 4.3346335396847736e-07, + "loss": 0.0006, + "step": 274360 + }, + { + "epoch": 1.7597239548091448, + "grad_norm": 0.09052564948797226, + "learning_rate": 4.3323543158907635e-07, + "loss": 0.0003, + "step": 274370 + }, + { + "epoch": 1.7597880917029307, + "grad_norm": 0.008419949561357498, + "learning_rate": 4.3300756643450016e-07, + "loss": 0.0004, + "step": 274380 + }, + { + "epoch": 1.7598522285967169, + "grad_norm": 0.0025691508781164885, + "learning_rate": 4.3277975850760266e-07, + "loss": 0.0004, + "step": 274390 + }, + { + "epoch": 1.7599163654905028, + "grad_norm": 0.04689428210258484, + "learning_rate": 4.325520078112394e-07, + "loss": 0.0009, + "step": 274400 + }, + { + "epoch": 1.759980502384289, + "grad_norm": 0.04605294391512871, + "learning_rate": 4.323243143482625e-07, + "loss": 0.0015, + "step": 274410 + }, + { + "epoch": 1.760044639278075, + "grad_norm": 0.03803924471139908, + "learning_rate": 4.3209667812152745e-07, + "loss": 0.0007, + "step": 274420 + }, + { + "epoch": 1.7601087761718612, + "grad_norm": 0.041317418217659, + "learning_rate": 4.3186909913388587e-07, + "loss": 0.0014, + "step": 274430 + }, + { + "epoch": 1.7601729130656474, + "grad_norm": 0.050265710800886154, + "learning_rate": 4.3164157738818935e-07, + "loss": 0.001, + "step": 274440 + }, + { + "epoch": 1.7602370499594335, + "grad_norm": 0.026506226509809494, + "learning_rate": 4.3141411288728783e-07, + "loss": 0.0005, + "step": 274450 + }, + { + "epoch": 1.7603011868532195, + "grad_norm": 0.07355380058288574, + "learning_rate": 4.3118670563403296e-07, + "loss": 0.0011, + "step": 274460 + }, + { + "epoch": 1.7603653237470056, + "grad_norm": 0.04049091786146164, + "learning_rate": 4.309593556312747e-07, + "loss": 0.0013, + "step": 274470 + }, + { + "epoch": 1.7604294606407918, + "grad_norm": 0.02454739809036255, + "learning_rate": 4.3073206288185956e-07, + "loss": 0.0028, + "step": 274480 + }, + { + "epoch": 1.7604935975345777, + "grad_norm": 0.0465393029153347, + "learning_rate": 4.3050482738863866e-07, + "loss": 0.0007, + "step": 274490 + }, + { + "epoch": 1.7605577344283638, + "grad_norm": 0.15284514427185059, + "learning_rate": 4.30277649154458e-07, + "loss": 0.0007, + "step": 274500 + }, + { + "epoch": 1.76062187132215, + "grad_norm": 0.1650029420852661, + "learning_rate": 4.300505281821643e-07, + "loss": 0.001, + "step": 274510 + }, + { + "epoch": 1.7606860082159361, + "grad_norm": 0.022082634270191193, + "learning_rate": 4.2982346447460245e-07, + "loss": 0.0002, + "step": 274520 + }, + { + "epoch": 1.7607501451097223, + "grad_norm": 0.0654936209321022, + "learning_rate": 4.2959645803462014e-07, + "loss": 0.0007, + "step": 274530 + }, + { + "epoch": 1.7608142820035084, + "grad_norm": 0.13372991979122162, + "learning_rate": 4.2936950886506014e-07, + "loss": 0.0016, + "step": 274540 + }, + { + "epoch": 1.7608784188972944, + "grad_norm": 0.02332022599875927, + "learning_rate": 4.2914261696876736e-07, + "loss": 0.0008, + "step": 274550 + }, + { + "epoch": 1.7609425557910805, + "grad_norm": 0.0456223227083683, + "learning_rate": 4.289157823485829e-07, + "loss": 0.0008, + "step": 274560 + }, + { + "epoch": 1.7610066926848664, + "grad_norm": 0.03842933103442192, + "learning_rate": 4.2868900500735166e-07, + "loss": 0.0011, + "step": 274570 + }, + { + "epoch": 1.7610708295786526, + "grad_norm": 0.15203480422496796, + "learning_rate": 4.284622849479142e-07, + "loss": 0.0023, + "step": 274580 + }, + { + "epoch": 1.7611349664724387, + "grad_norm": 0.06362316757440567, + "learning_rate": 4.2823562217311154e-07, + "loss": 0.0008, + "step": 274590 + }, + { + "epoch": 1.7611991033662249, + "grad_norm": 0.03955472260713577, + "learning_rate": 4.280090166857831e-07, + "loss": 0.0009, + "step": 274600 + }, + { + "epoch": 1.761263240260011, + "grad_norm": 0.12518471479415894, + "learning_rate": 4.2778246848877045e-07, + "loss": 0.0011, + "step": 274610 + }, + { + "epoch": 1.7613273771537972, + "grad_norm": 0.023512301966547966, + "learning_rate": 4.2755597758491084e-07, + "loss": 0.0006, + "step": 274620 + }, + { + "epoch": 1.7613915140475833, + "grad_norm": 0.007494448684155941, + "learning_rate": 4.2732954397704196e-07, + "loss": 0.0013, + "step": 274630 + }, + { + "epoch": 1.7614556509413692, + "grad_norm": 0.3269810378551483, + "learning_rate": 4.2710316766800264e-07, + "loss": 0.0012, + "step": 274640 + }, + { + "epoch": 1.7615197878351554, + "grad_norm": 0.017750240862369537, + "learning_rate": 4.2687684866062897e-07, + "loss": 0.0007, + "step": 274650 + }, + { + "epoch": 1.7615839247289413, + "grad_norm": 0.010823042131960392, + "learning_rate": 4.266505869577564e-07, + "loss": 0.0012, + "step": 274660 + }, + { + "epoch": 1.7616480616227275, + "grad_norm": 0.019104070961475372, + "learning_rate": 4.2642438256221996e-07, + "loss": 0.0013, + "step": 274670 + }, + { + "epoch": 1.7617121985165136, + "grad_norm": 0.001704822527244687, + "learning_rate": 4.2619823547685567e-07, + "loss": 0.0019, + "step": 274680 + }, + { + "epoch": 1.7617763354102998, + "grad_norm": 0.10967900604009628, + "learning_rate": 4.259721457044963e-07, + "loss": 0.0012, + "step": 274690 + }, + { + "epoch": 1.761840472304086, + "grad_norm": 0.04793315753340721, + "learning_rate": 4.2574611324797455e-07, + "loss": 0.0015, + "step": 274700 + }, + { + "epoch": 1.761904609197872, + "grad_norm": 0.1262887865304947, + "learning_rate": 4.255201381101226e-07, + "loss": 0.0013, + "step": 274710 + }, + { + "epoch": 1.761968746091658, + "grad_norm": 0.0792112872004509, + "learning_rate": 4.2529422029377376e-07, + "loss": 0.0012, + "step": 274720 + }, + { + "epoch": 1.7620328829854441, + "grad_norm": 0.16655921936035156, + "learning_rate": 4.250683598017574e-07, + "loss": 0.0011, + "step": 274730 + }, + { + "epoch": 1.76209701987923, + "grad_norm": 0.10308097302913666, + "learning_rate": 4.2484255663690466e-07, + "loss": 0.0008, + "step": 274740 + }, + { + "epoch": 1.7621611567730162, + "grad_norm": 0.0619034506380558, + "learning_rate": 4.246168108020432e-07, + "loss": 0.001, + "step": 274750 + }, + { + "epoch": 1.7622252936668024, + "grad_norm": 0.06137575954198837, + "learning_rate": 4.2439112230000414e-07, + "loss": 0.0006, + "step": 274760 + }, + { + "epoch": 1.7622894305605885, + "grad_norm": 0.021134236827492714, + "learning_rate": 4.2416549113361407e-07, + "loss": 0.0009, + "step": 274770 + }, + { + "epoch": 1.7623535674543747, + "grad_norm": 0.08857366442680359, + "learning_rate": 4.2393991730570016e-07, + "loss": 0.0008, + "step": 274780 + }, + { + "epoch": 1.7624177043481608, + "grad_norm": 0.12476522475481033, + "learning_rate": 4.2371440081909023e-07, + "loss": 0.0006, + "step": 274790 + }, + { + "epoch": 1.762481841241947, + "grad_norm": 0.017562726512551308, + "learning_rate": 4.234889416766097e-07, + "loss": 0.0009, + "step": 274800 + }, + { + "epoch": 1.7625459781357329, + "grad_norm": 0.010540449991822243, + "learning_rate": 4.232635398810836e-07, + "loss": 0.0012, + "step": 274810 + }, + { + "epoch": 1.762610115029519, + "grad_norm": 0.03732389584183693, + "learning_rate": 4.230381954353352e-07, + "loss": 0.0007, + "step": 274820 + }, + { + "epoch": 1.762674251923305, + "grad_norm": 0.1333763152360916, + "learning_rate": 4.2281290834219e-07, + "loss": 0.0013, + "step": 274830 + }, + { + "epoch": 1.762738388817091, + "grad_norm": 0.039669591933488846, + "learning_rate": 4.225876786044708e-07, + "loss": 0.0008, + "step": 274840 + }, + { + "epoch": 1.7628025257108773, + "grad_norm": 0.11995638161897659, + "learning_rate": 4.223625062249992e-07, + "loss": 0.0013, + "step": 274850 + }, + { + "epoch": 1.7628666626046634, + "grad_norm": 0.04716256260871887, + "learning_rate": 4.2213739120659626e-07, + "loss": 0.0006, + "step": 274860 + }, + { + "epoch": 1.7629307994984496, + "grad_norm": 0.15836253762245178, + "learning_rate": 4.2191233355208415e-07, + "loss": 0.0008, + "step": 274870 + }, + { + "epoch": 1.7629949363922357, + "grad_norm": 0.07259972393512726, + "learning_rate": 4.2168733326428235e-07, + "loss": 0.0008, + "step": 274880 + }, + { + "epoch": 1.7630590732860218, + "grad_norm": 0.05724453553557396, + "learning_rate": 4.214623903460108e-07, + "loss": 0.0008, + "step": 274890 + }, + { + "epoch": 1.7631232101798078, + "grad_norm": 0.06909746676683426, + "learning_rate": 4.2123750480008607e-07, + "loss": 0.0006, + "step": 274900 + }, + { + "epoch": 1.763187347073594, + "grad_norm": 0.18209044635295868, + "learning_rate": 4.2101267662932934e-07, + "loss": 0.0015, + "step": 274910 + }, + { + "epoch": 1.7632514839673799, + "grad_norm": 0.07382436841726303, + "learning_rate": 4.20787905836556e-07, + "loss": 0.0006, + "step": 274920 + }, + { + "epoch": 1.763315620861166, + "grad_norm": 0.06956379115581512, + "learning_rate": 4.2056319242458235e-07, + "loss": 0.0008, + "step": 274930 + }, + { + "epoch": 1.7633797577549521, + "grad_norm": 0.0836181566119194, + "learning_rate": 4.2033853639622534e-07, + "loss": 0.001, + "step": 274940 + }, + { + "epoch": 1.7634438946487383, + "grad_norm": 0.1506095826625824, + "learning_rate": 4.201139377543001e-07, + "loss": 0.0011, + "step": 274950 + }, + { + "epoch": 1.7635080315425244, + "grad_norm": 0.01491470541805029, + "learning_rate": 4.1988939650161986e-07, + "loss": 0.001, + "step": 274960 + }, + { + "epoch": 1.7635721684363106, + "grad_norm": 0.12581266462802887, + "learning_rate": 4.1966491264099797e-07, + "loss": 0.0019, + "step": 274970 + }, + { + "epoch": 1.7636363053300965, + "grad_norm": 0.012968640774488449, + "learning_rate": 4.1944048617524934e-07, + "loss": 0.0009, + "step": 274980 + }, + { + "epoch": 1.7637004422238827, + "grad_norm": 0.16107068955898285, + "learning_rate": 4.1921611710718404e-07, + "loss": 0.0017, + "step": 274990 + }, + { + "epoch": 1.7637645791176686, + "grad_norm": 0.0013026761589571834, + "learning_rate": 4.1899180543961524e-07, + "loss": 0.0026, + "step": 275000 + }, + { + "epoch": 1.7638287160114547, + "grad_norm": 0.005042532924562693, + "learning_rate": 4.187675511753536e-07, + "loss": 0.0018, + "step": 275010 + }, + { + "epoch": 1.763892852905241, + "grad_norm": 0.32132092118263245, + "learning_rate": 4.1854335431720794e-07, + "loss": 0.0013, + "step": 275020 + }, + { + "epoch": 1.763956989799027, + "grad_norm": 0.05832437425851822, + "learning_rate": 4.1831921486798876e-07, + "loss": 0.0009, + "step": 275030 + }, + { + "epoch": 1.7640211266928132, + "grad_norm": 0.20390057563781738, + "learning_rate": 4.180951328305044e-07, + "loss": 0.0009, + "step": 275040 + }, + { + "epoch": 1.7640852635865993, + "grad_norm": 0.044995591044425964, + "learning_rate": 4.1787110820756205e-07, + "loss": 0.0005, + "step": 275050 + }, + { + "epoch": 1.7641494004803855, + "grad_norm": 0.08996668457984924, + "learning_rate": 4.1764714100197003e-07, + "loss": 0.0008, + "step": 275060 + }, + { + "epoch": 1.7642135373741714, + "grad_norm": 0.09389866888523102, + "learning_rate": 4.174232312165344e-07, + "loss": 0.0007, + "step": 275070 + }, + { + "epoch": 1.7642776742679576, + "grad_norm": 0.0043205274268984795, + "learning_rate": 4.171993788540613e-07, + "loss": 0.0009, + "step": 275080 + }, + { + "epoch": 1.7643418111617435, + "grad_norm": 0.11533765494823456, + "learning_rate": 4.169755839173539e-07, + "loss": 0.0011, + "step": 275090 + }, + { + "epoch": 1.7644059480555296, + "grad_norm": 0.008159862831234932, + "learning_rate": 4.167518464092185e-07, + "loss": 0.0014, + "step": 275100 + }, + { + "epoch": 1.7644700849493158, + "grad_norm": 0.07528112083673477, + "learning_rate": 4.165281663324583e-07, + "loss": 0.0011, + "step": 275110 + }, + { + "epoch": 1.764534221843102, + "grad_norm": 0.04019409045577049, + "learning_rate": 4.1630454368987605e-07, + "loss": 0.001, + "step": 275120 + }, + { + "epoch": 1.764598358736888, + "grad_norm": 0.08170922100543976, + "learning_rate": 4.160809784842729e-07, + "loss": 0.0013, + "step": 275130 + }, + { + "epoch": 1.7646624956306742, + "grad_norm": 0.16807445883750916, + "learning_rate": 4.158574707184521e-07, + "loss": 0.0012, + "step": 275140 + }, + { + "epoch": 1.7647266325244602, + "grad_norm": 0.019600650295615196, + "learning_rate": 4.156340203952136e-07, + "loss": 0.0008, + "step": 275150 + }, + { + "epoch": 1.7647907694182463, + "grad_norm": 0.02643808163702488, + "learning_rate": 4.1541062751735696e-07, + "loss": 0.001, + "step": 275160 + }, + { + "epoch": 1.7648549063120325, + "grad_norm": 0.15279154479503632, + "learning_rate": 4.1518729208768096e-07, + "loss": 0.0011, + "step": 275170 + }, + { + "epoch": 1.7649190432058184, + "grad_norm": 0.1854482889175415, + "learning_rate": 4.1496401410898557e-07, + "loss": 0.0013, + "step": 275180 + }, + { + "epoch": 1.7649831800996045, + "grad_norm": 0.06766447424888611, + "learning_rate": 4.1474079358406806e-07, + "loss": 0.0009, + "step": 275190 + }, + { + "epoch": 1.7650473169933907, + "grad_norm": 0.13233855366706848, + "learning_rate": 4.1451763051572444e-07, + "loss": 0.0015, + "step": 275200 + }, + { + "epoch": 1.7651114538871768, + "grad_norm": 0.041932497173547745, + "learning_rate": 4.1429452490675315e-07, + "loss": 0.0012, + "step": 275210 + }, + { + "epoch": 1.765175590780963, + "grad_norm": 0.08011277765035629, + "learning_rate": 4.1407147675994855e-07, + "loss": 0.0004, + "step": 275220 + }, + { + "epoch": 1.7652397276747491, + "grad_norm": 0.01628202199935913, + "learning_rate": 4.138484860781061e-07, + "loss": 0.002, + "step": 275230 + }, + { + "epoch": 1.765303864568535, + "grad_norm": 0.059850651770830154, + "learning_rate": 4.136255528640193e-07, + "loss": 0.001, + "step": 275240 + }, + { + "epoch": 1.7653680014623212, + "grad_norm": 0.029925214126706123, + "learning_rate": 4.1340267712048245e-07, + "loss": 0.0017, + "step": 275250 + }, + { + "epoch": 1.7654321383561071, + "grad_norm": 0.06737666577100754, + "learning_rate": 4.131798588502883e-07, + "loss": 0.001, + "step": 275260 + }, + { + "epoch": 1.7654962752498933, + "grad_norm": 0.027920279651880264, + "learning_rate": 4.129570980562286e-07, + "loss": 0.0009, + "step": 275270 + }, + { + "epoch": 1.7655604121436794, + "grad_norm": 0.05020119249820709, + "learning_rate": 4.1273439474109387e-07, + "loss": 0.0008, + "step": 275280 + }, + { + "epoch": 1.7656245490374656, + "grad_norm": 0.034163858741521835, + "learning_rate": 4.125117489076763e-07, + "loss": 0.0012, + "step": 275290 + }, + { + "epoch": 1.7656886859312517, + "grad_norm": 0.04959217458963394, + "learning_rate": 4.122891605587653e-07, + "loss": 0.0009, + "step": 275300 + }, + { + "epoch": 1.7657528228250379, + "grad_norm": 0.19557246565818787, + "learning_rate": 4.1206662969715037e-07, + "loss": 0.0009, + "step": 275310 + }, + { + "epoch": 1.765816959718824, + "grad_norm": 0.10723380744457245, + "learning_rate": 4.1184415632561815e-07, + "loss": 0.0011, + "step": 275320 + }, + { + "epoch": 1.76588109661261, + "grad_norm": 0.02903168648481369, + "learning_rate": 4.116217404469586e-07, + "loss": 0.0003, + "step": 275330 + }, + { + "epoch": 1.765945233506396, + "grad_norm": 0.01404721848666668, + "learning_rate": 4.1139938206395837e-07, + "loss": 0.0018, + "step": 275340 + }, + { + "epoch": 1.766009370400182, + "grad_norm": 0.06477724015712738, + "learning_rate": 4.111770811794019e-07, + "loss": 0.0014, + "step": 275350 + }, + { + "epoch": 1.7660735072939682, + "grad_norm": 0.020367076620459557, + "learning_rate": 4.10954837796077e-07, + "loss": 0.0009, + "step": 275360 + }, + { + "epoch": 1.7661376441877543, + "grad_norm": 0.03996472433209419, + "learning_rate": 4.107326519167681e-07, + "loss": 0.0013, + "step": 275370 + }, + { + "epoch": 1.7662017810815405, + "grad_norm": 0.06612200289964676, + "learning_rate": 4.1051052354425915e-07, + "loss": 0.0013, + "step": 275380 + }, + { + "epoch": 1.7662659179753266, + "grad_norm": 0.11471281200647354, + "learning_rate": 4.1028845268133223e-07, + "loss": 0.0013, + "step": 275390 + }, + { + "epoch": 1.7663300548691128, + "grad_norm": 0.03563837707042694, + "learning_rate": 4.1006643933077184e-07, + "loss": 0.0012, + "step": 275400 + }, + { + "epoch": 1.7663941917628987, + "grad_norm": 0.09462326765060425, + "learning_rate": 4.0984448349535967e-07, + "loss": 0.0011, + "step": 275410 + }, + { + "epoch": 1.7664583286566848, + "grad_norm": 0.11228634417057037, + "learning_rate": 4.096225851778768e-07, + "loss": 0.0007, + "step": 275420 + }, + { + "epoch": 1.7665224655504708, + "grad_norm": 0.048812948167324066, + "learning_rate": 4.094007443811021e-07, + "loss": 0.0006, + "step": 275430 + }, + { + "epoch": 1.766586602444257, + "grad_norm": 0.0776638612151146, + "learning_rate": 4.091789611078184e-07, + "loss": 0.0008, + "step": 275440 + }, + { + "epoch": 1.766650739338043, + "grad_norm": 0.12345968186855316, + "learning_rate": 4.0895723536080343e-07, + "loss": 0.0015, + "step": 275450 + }, + { + "epoch": 1.7667148762318292, + "grad_norm": 0.01290800143033266, + "learning_rate": 4.08735567142835e-07, + "loss": 0.0012, + "step": 275460 + }, + { + "epoch": 1.7667790131256154, + "grad_norm": 0.11628074944019318, + "learning_rate": 4.085139564566909e-07, + "loss": 0.0009, + "step": 275470 + }, + { + "epoch": 1.7668431500194015, + "grad_norm": 0.014658177271485329, + "learning_rate": 4.0829240330514885e-07, + "loss": 0.001, + "step": 275480 + }, + { + "epoch": 1.7669072869131877, + "grad_norm": 0.07219427824020386, + "learning_rate": 4.0807090769098447e-07, + "loss": 0.0012, + "step": 275490 + }, + { + "epoch": 1.7669714238069736, + "grad_norm": 0.005928349681198597, + "learning_rate": 4.078494696169727e-07, + "loss": 0.001, + "step": 275500 + }, + { + "epoch": 1.7670355607007597, + "grad_norm": 0.0487651452422142, + "learning_rate": 4.0762808908589026e-07, + "loss": 0.0007, + "step": 275510 + }, + { + "epoch": 1.7670996975945457, + "grad_norm": 0.07806593924760818, + "learning_rate": 4.0740676610050987e-07, + "loss": 0.0014, + "step": 275520 + }, + { + "epoch": 1.7671638344883318, + "grad_norm": 0.03947426751255989, + "learning_rate": 4.0718550066360496e-07, + "loss": 0.0011, + "step": 275530 + }, + { + "epoch": 1.767227971382118, + "grad_norm": 0.11940759420394897, + "learning_rate": 4.0696429277794713e-07, + "loss": 0.0009, + "step": 275540 + }, + { + "epoch": 1.767292108275904, + "grad_norm": 0.08800876885652542, + "learning_rate": 4.067431424463103e-07, + "loss": 0.0009, + "step": 275550 + }, + { + "epoch": 1.7673562451696903, + "grad_norm": 0.09916777163743973, + "learning_rate": 4.065220496714645e-07, + "loss": 0.0013, + "step": 275560 + }, + { + "epoch": 1.7674203820634764, + "grad_norm": 0.042468562722206116, + "learning_rate": 4.063010144561802e-07, + "loss": 0.0009, + "step": 275570 + }, + { + "epoch": 1.7674845189572623, + "grad_norm": 0.12761539220809937, + "learning_rate": 4.0608003680322696e-07, + "loss": 0.0013, + "step": 275580 + }, + { + "epoch": 1.7675486558510485, + "grad_norm": 0.14592377841472626, + "learning_rate": 4.0585911671537415e-07, + "loss": 0.002, + "step": 275590 + }, + { + "epoch": 1.7676127927448346, + "grad_norm": 0.13312381505966187, + "learning_rate": 4.056382541953907e-07, + "loss": 0.0007, + "step": 275600 + }, + { + "epoch": 1.7676769296386206, + "grad_norm": 0.01981111615896225, + "learning_rate": 4.054174492460433e-07, + "loss": 0.0003, + "step": 275610 + }, + { + "epoch": 1.7677410665324067, + "grad_norm": 0.10597145557403564, + "learning_rate": 4.051967018700975e-07, + "loss": 0.001, + "step": 275620 + }, + { + "epoch": 1.7678052034261929, + "grad_norm": 0.011349072679877281, + "learning_rate": 4.049760120703228e-07, + "loss": 0.0008, + "step": 275630 + }, + { + "epoch": 1.767869340319979, + "grad_norm": 0.03977828845381737, + "learning_rate": 4.047553798494819e-07, + "loss": 0.0011, + "step": 275640 + }, + { + "epoch": 1.7679334772137651, + "grad_norm": 0.06345690786838531, + "learning_rate": 4.045348052103393e-07, + "loss": 0.0008, + "step": 275650 + }, + { + "epoch": 1.7679976141075513, + "grad_norm": 0.0018058223649859428, + "learning_rate": 4.043142881556611e-07, + "loss": 0.0028, + "step": 275660 + }, + { + "epoch": 1.7680617510013372, + "grad_norm": 0.028217557817697525, + "learning_rate": 4.040938286882096e-07, + "loss": 0.001, + "step": 275670 + }, + { + "epoch": 1.7681258878951234, + "grad_norm": 0.054113343358039856, + "learning_rate": 4.038734268107469e-07, + "loss": 0.0012, + "step": 275680 + }, + { + "epoch": 1.7681900247889093, + "grad_norm": 0.03654279187321663, + "learning_rate": 4.0365308252603377e-07, + "loss": 0.001, + "step": 275690 + }, + { + "epoch": 1.7682541616826954, + "grad_norm": 0.05833207815885544, + "learning_rate": 4.034327958368339e-07, + "loss": 0.001, + "step": 275700 + }, + { + "epoch": 1.7683182985764816, + "grad_norm": 0.05758131295442581, + "learning_rate": 4.032125667459058e-07, + "loss": 0.0008, + "step": 275710 + }, + { + "epoch": 1.7683824354702677, + "grad_norm": 0.1753859519958496, + "learning_rate": 4.029923952560094e-07, + "loss": 0.0018, + "step": 275720 + }, + { + "epoch": 1.768446572364054, + "grad_norm": 0.08543030172586441, + "learning_rate": 4.027722813699031e-07, + "loss": 0.0012, + "step": 275730 + }, + { + "epoch": 1.76851070925784, + "grad_norm": 0.01350217405706644, + "learning_rate": 4.0255222509034685e-07, + "loss": 0.0007, + "step": 275740 + }, + { + "epoch": 1.7685748461516262, + "grad_norm": 0.12346943467855453, + "learning_rate": 4.0233222642009627e-07, + "loss": 0.0007, + "step": 275750 + }, + { + "epoch": 1.7686389830454121, + "grad_norm": 0.032266777008771896, + "learning_rate": 4.021122853619086e-07, + "loss": 0.0011, + "step": 275760 + }, + { + "epoch": 1.7687031199391983, + "grad_norm": 0.1392628252506256, + "learning_rate": 4.0189240191854e-07, + "loss": 0.0013, + "step": 275770 + }, + { + "epoch": 1.7687672568329842, + "grad_norm": 0.070569708943367, + "learning_rate": 4.0167257609274645e-07, + "loss": 0.0007, + "step": 275780 + }, + { + "epoch": 1.7688313937267703, + "grad_norm": 0.026905816048383713, + "learning_rate": 4.0145280788728147e-07, + "loss": 0.0006, + "step": 275790 + }, + { + "epoch": 1.7688955306205565, + "grad_norm": 0.042029283940792084, + "learning_rate": 4.012330973049e-07, + "loss": 0.0016, + "step": 275800 + }, + { + "epoch": 1.7689596675143426, + "grad_norm": 0.04697522148489952, + "learning_rate": 4.010134443483532e-07, + "loss": 0.0007, + "step": 275810 + }, + { + "epoch": 1.7690238044081288, + "grad_norm": 0.23765172064304352, + "learning_rate": 4.007938490203961e-07, + "loss": 0.0007, + "step": 275820 + }, + { + "epoch": 1.769087941301915, + "grad_norm": 0.062108952552080154, + "learning_rate": 4.005743113237787e-07, + "loss": 0.0007, + "step": 275830 + }, + { + "epoch": 1.7691520781957009, + "grad_norm": 0.06532338261604309, + "learning_rate": 4.003548312612526e-07, + "loss": 0.0007, + "step": 275840 + }, + { + "epoch": 1.769216215089487, + "grad_norm": 0.0470815934240818, + "learning_rate": 4.001354088355669e-07, + "loss": 0.0011, + "step": 275850 + }, + { + "epoch": 1.769280351983273, + "grad_norm": 0.054930299520492554, + "learning_rate": 3.999160440494726e-07, + "loss": 0.0009, + "step": 275860 + }, + { + "epoch": 1.769344488877059, + "grad_norm": 0.04019542038440704, + "learning_rate": 3.9969673690571807e-07, + "loss": 0.0005, + "step": 275870 + }, + { + "epoch": 1.7694086257708452, + "grad_norm": 0.10912688821554184, + "learning_rate": 3.994774874070512e-07, + "loss": 0.0016, + "step": 275880 + }, + { + "epoch": 1.7694727626646314, + "grad_norm": 0.018208475783467293, + "learning_rate": 3.9925829555621853e-07, + "loss": 0.001, + "step": 275890 + }, + { + "epoch": 1.7695368995584175, + "grad_norm": 0.0015009446069598198, + "learning_rate": 3.9903916135596854e-07, + "loss": 0.0006, + "step": 275900 + }, + { + "epoch": 1.7696010364522037, + "grad_norm": 0.011169308796525002, + "learning_rate": 3.9882008480904564e-07, + "loss": 0.0006, + "step": 275910 + }, + { + "epoch": 1.7696651733459898, + "grad_norm": 0.1605800986289978, + "learning_rate": 3.986010659181949e-07, + "loss": 0.0011, + "step": 275920 + }, + { + "epoch": 1.7697293102397758, + "grad_norm": 0.06483528763055801, + "learning_rate": 3.9838210468616244e-07, + "loss": 0.0005, + "step": 275930 + }, + { + "epoch": 1.769793447133562, + "grad_norm": 0.03819932043552399, + "learning_rate": 3.9816320111569107e-07, + "loss": 0.0007, + "step": 275940 + }, + { + "epoch": 1.7698575840273478, + "grad_norm": 0.07846676558256149, + "learning_rate": 3.97944355209523e-07, + "loss": 0.0009, + "step": 275950 + }, + { + "epoch": 1.769921720921134, + "grad_norm": 0.13251625001430511, + "learning_rate": 3.977255669704011e-07, + "loss": 0.0007, + "step": 275960 + }, + { + "epoch": 1.7699858578149201, + "grad_norm": 0.0833633616566658, + "learning_rate": 3.97506836401067e-07, + "loss": 0.0011, + "step": 275970 + }, + { + "epoch": 1.7700499947087063, + "grad_norm": 0.0796852707862854, + "learning_rate": 3.9728816350426247e-07, + "loss": 0.0007, + "step": 275980 + }, + { + "epoch": 1.7701141316024924, + "grad_norm": 0.09615079313516617, + "learning_rate": 3.9706954828272636e-07, + "loss": 0.0008, + "step": 275990 + }, + { + "epoch": 1.7701782684962786, + "grad_norm": 0.003064911812543869, + "learning_rate": 3.9685099073919766e-07, + "loss": 0.0006, + "step": 276000 + }, + { + "epoch": 1.7702424053900645, + "grad_norm": 0.07344446331262589, + "learning_rate": 3.9663249087641684e-07, + "loss": 0.0013, + "step": 276010 + }, + { + "epoch": 1.7703065422838506, + "grad_norm": 0.04304369166493416, + "learning_rate": 3.964140486971207e-07, + "loss": 0.0008, + "step": 276020 + }, + { + "epoch": 1.7703706791776368, + "grad_norm": 0.2215459793806076, + "learning_rate": 3.9619566420404543e-07, + "loss": 0.0011, + "step": 276030 + }, + { + "epoch": 1.7704348160714227, + "grad_norm": 0.004388847388327122, + "learning_rate": 3.9597733739993037e-07, + "loss": 0.0005, + "step": 276040 + }, + { + "epoch": 1.7704989529652089, + "grad_norm": 0.047044143080711365, + "learning_rate": 3.9575906828750956e-07, + "loss": 0.002, + "step": 276050 + }, + { + "epoch": 1.770563089858995, + "grad_norm": 0.07690353691577911, + "learning_rate": 3.9554085686951795e-07, + "loss": 0.0013, + "step": 276060 + }, + { + "epoch": 1.7706272267527812, + "grad_norm": 0.021951548755168915, + "learning_rate": 3.9532270314868947e-07, + "loss": 0.001, + "step": 276070 + }, + { + "epoch": 1.7706913636465673, + "grad_norm": 0.14238323271274567, + "learning_rate": 3.951046071277592e-07, + "loss": 0.0015, + "step": 276080 + }, + { + "epoch": 1.7707555005403535, + "grad_norm": 0.04025707021355629, + "learning_rate": 3.948865688094594e-07, + "loss": 0.0008, + "step": 276090 + }, + { + "epoch": 1.7708196374341394, + "grad_norm": 0.027796175330877304, + "learning_rate": 3.9466858819652174e-07, + "loss": 0.0011, + "step": 276100 + }, + { + "epoch": 1.7708837743279255, + "grad_norm": 0.003692495170980692, + "learning_rate": 3.944506652916774e-07, + "loss": 0.0024, + "step": 276110 + }, + { + "epoch": 1.7709479112217115, + "grad_norm": 0.18867793679237366, + "learning_rate": 3.942328000976586e-07, + "loss": 0.0021, + "step": 276120 + }, + { + "epoch": 1.7710120481154976, + "grad_norm": 0.0782652497291565, + "learning_rate": 3.9401499261719487e-07, + "loss": 0.001, + "step": 276130 + }, + { + "epoch": 1.7710761850092838, + "grad_norm": 0.06763576716184616, + "learning_rate": 3.937972428530151e-07, + "loss": 0.0009, + "step": 276140 + }, + { + "epoch": 1.77114032190307, + "grad_norm": 0.00766996992751956, + "learning_rate": 3.935795508078466e-07, + "loss": 0.0026, + "step": 276150 + }, + { + "epoch": 1.771204458796856, + "grad_norm": 0.023206427693367004, + "learning_rate": 3.9336191648441935e-07, + "loss": 0.0008, + "step": 276160 + }, + { + "epoch": 1.7712685956906422, + "grad_norm": 0.008450353518128395, + "learning_rate": 3.931443398854601e-07, + "loss": 0.0013, + "step": 276170 + }, + { + "epoch": 1.7713327325844284, + "grad_norm": 0.027441825717687607, + "learning_rate": 3.9292682101369326e-07, + "loss": 0.001, + "step": 276180 + }, + { + "epoch": 1.7713968694782143, + "grad_norm": 0.03814779594540596, + "learning_rate": 3.9270935987184734e-07, + "loss": 0.0016, + "step": 276190 + }, + { + "epoch": 1.7714610063720004, + "grad_norm": 0.061811137944459915, + "learning_rate": 3.924919564626456e-07, + "loss": 0.0019, + "step": 276200 + }, + { + "epoch": 1.7715251432657864, + "grad_norm": 0.08312131464481354, + "learning_rate": 3.9227461078881314e-07, + "loss": 0.001, + "step": 276210 + }, + { + "epoch": 1.7715892801595725, + "grad_norm": 0.057990048080682755, + "learning_rate": 3.920573228530716e-07, + "loss": 0.0003, + "step": 276220 + }, + { + "epoch": 1.7716534170533587, + "grad_norm": 0.01682116463780403, + "learning_rate": 3.918400926581456e-07, + "loss": 0.0004, + "step": 276230 + }, + { + "epoch": 1.7717175539471448, + "grad_norm": 0.009075605310499668, + "learning_rate": 3.916229202067573e-07, + "loss": 0.0009, + "step": 276240 + }, + { + "epoch": 1.771781690840931, + "grad_norm": 0.058820053935050964, + "learning_rate": 3.914058055016268e-07, + "loss": 0.0007, + "step": 276250 + }, + { + "epoch": 1.771845827734717, + "grad_norm": 0.06107432767748833, + "learning_rate": 3.911887485454746e-07, + "loss": 0.0009, + "step": 276260 + }, + { + "epoch": 1.771909964628503, + "grad_norm": 0.04917144402861595, + "learning_rate": 3.90971749341022e-07, + "loss": 0.0007, + "step": 276270 + }, + { + "epoch": 1.7719741015222892, + "grad_norm": 0.11266804486513138, + "learning_rate": 3.9075480789098786e-07, + "loss": 0.0012, + "step": 276280 + }, + { + "epoch": 1.772038238416075, + "grad_norm": 0.03330082818865776, + "learning_rate": 3.9053792419809e-07, + "loss": 0.0013, + "step": 276290 + }, + { + "epoch": 1.7721023753098613, + "grad_norm": 0.07728027552366257, + "learning_rate": 3.9032109826504516e-07, + "loss": 0.0013, + "step": 276300 + }, + { + "epoch": 1.7721665122036474, + "grad_norm": 0.09894976019859314, + "learning_rate": 3.9010433009457225e-07, + "loss": 0.0009, + "step": 276310 + }, + { + "epoch": 1.7722306490974336, + "grad_norm": 0.04832207411527634, + "learning_rate": 3.8988761968938693e-07, + "loss": 0.0012, + "step": 276320 + }, + { + "epoch": 1.7722947859912197, + "grad_norm": 0.03660057112574577, + "learning_rate": 3.8967096705220363e-07, + "loss": 0.002, + "step": 276330 + }, + { + "epoch": 1.7723589228850058, + "grad_norm": 0.059815533459186554, + "learning_rate": 3.894543721857391e-07, + "loss": 0.001, + "step": 276340 + }, + { + "epoch": 1.772423059778792, + "grad_norm": 0.08611498773097992, + "learning_rate": 3.8923783509270616e-07, + "loss": 0.001, + "step": 276350 + }, + { + "epoch": 1.772487196672578, + "grad_norm": 0.030227821320295334, + "learning_rate": 3.8902135577581825e-07, + "loss": 0.0008, + "step": 276360 + }, + { + "epoch": 1.772551333566364, + "grad_norm": 0.09007013589143753, + "learning_rate": 3.8880493423778755e-07, + "loss": 0.0008, + "step": 276370 + }, + { + "epoch": 1.77261547046015, + "grad_norm": 0.27165305614471436, + "learning_rate": 3.885885704813275e-07, + "loss": 0.002, + "step": 276380 + }, + { + "epoch": 1.7726796073539361, + "grad_norm": 0.01975778490304947, + "learning_rate": 3.883722645091487e-07, + "loss": 0.0009, + "step": 276390 + }, + { + "epoch": 1.7727437442477223, + "grad_norm": 0.01760447584092617, + "learning_rate": 3.881560163239606e-07, + "loss": 0.0016, + "step": 276400 + }, + { + "epoch": 1.7728078811415084, + "grad_norm": 0.0017506623407825828, + "learning_rate": 3.8793982592847333e-07, + "loss": 0.0015, + "step": 276410 + }, + { + "epoch": 1.7728720180352946, + "grad_norm": 0.07523943483829498, + "learning_rate": 3.8772369332539695e-07, + "loss": 0.0006, + "step": 276420 + }, + { + "epoch": 1.7729361549290807, + "grad_norm": 0.028977354988455772, + "learning_rate": 3.8750761851743866e-07, + "loss": 0.0006, + "step": 276430 + }, + { + "epoch": 1.773000291822867, + "grad_norm": 0.0565430223941803, + "learning_rate": 3.8729160150730694e-07, + "loss": 0.0012, + "step": 276440 + }, + { + "epoch": 1.7730644287166528, + "grad_norm": 0.03902720287442207, + "learning_rate": 3.870756422977068e-07, + "loss": 0.001, + "step": 276450 + }, + { + "epoch": 1.773128565610439, + "grad_norm": 0.02282317355275154, + "learning_rate": 3.868597408913466e-07, + "loss": 0.0008, + "step": 276460 + }, + { + "epoch": 1.773192702504225, + "grad_norm": 0.05966460704803467, + "learning_rate": 3.8664389729093144e-07, + "loss": 0.0007, + "step": 276470 + }, + { + "epoch": 1.773256839398011, + "grad_norm": 0.038154445588588715, + "learning_rate": 3.864281114991636e-07, + "loss": 0.0011, + "step": 276480 + }, + { + "epoch": 1.7733209762917972, + "grad_norm": 0.07106415182352066, + "learning_rate": 3.862123835187503e-07, + "loss": 0.0005, + "step": 276490 + }, + { + "epoch": 1.7733851131855833, + "grad_norm": 0.05983132869005203, + "learning_rate": 3.8599671335239284e-07, + "loss": 0.0026, + "step": 276500 + }, + { + "epoch": 1.7734492500793695, + "grad_norm": 0.06823556870222092, + "learning_rate": 3.8578110100279455e-07, + "loss": 0.0012, + "step": 276510 + }, + { + "epoch": 1.7735133869731556, + "grad_norm": 0.03860345482826233, + "learning_rate": 3.85565546472656e-07, + "loss": 0.0006, + "step": 276520 + }, + { + "epoch": 1.7735775238669416, + "grad_norm": 0.045919954776763916, + "learning_rate": 3.853500497646784e-07, + "loss": 0.0007, + "step": 276530 + }, + { + "epoch": 1.7736416607607277, + "grad_norm": 0.0455821193754673, + "learning_rate": 3.85134610881564e-07, + "loss": 0.0005, + "step": 276540 + }, + { + "epoch": 1.7737057976545136, + "grad_norm": 0.05296953395009041, + "learning_rate": 3.849192298260113e-07, + "loss": 0.0014, + "step": 276550 + }, + { + "epoch": 1.7737699345482998, + "grad_norm": 0.11851990222930908, + "learning_rate": 3.8470390660071855e-07, + "loss": 0.0005, + "step": 276560 + }, + { + "epoch": 1.773834071442086, + "grad_norm": 0.06372379511594772, + "learning_rate": 3.844886412083837e-07, + "loss": 0.0019, + "step": 276570 + }, + { + "epoch": 1.773898208335872, + "grad_norm": 0.06602787226438522, + "learning_rate": 3.8427343365170567e-07, + "loss": 0.0008, + "step": 276580 + }, + { + "epoch": 1.7739623452296582, + "grad_norm": 0.09497271478176117, + "learning_rate": 3.8405828393338063e-07, + "loss": 0.0012, + "step": 276590 + }, + { + "epoch": 1.7740264821234444, + "grad_norm": 0.038306642323732376, + "learning_rate": 3.838431920561031e-07, + "loss": 0.0014, + "step": 276600 + }, + { + "epoch": 1.7740906190172305, + "grad_norm": 0.15213309228420258, + "learning_rate": 3.8362815802257035e-07, + "loss": 0.0014, + "step": 276610 + }, + { + "epoch": 1.7741547559110165, + "grad_norm": 0.04893713817000389, + "learning_rate": 3.8341318183547583e-07, + "loss": 0.0005, + "step": 276620 + }, + { + "epoch": 1.7742188928048026, + "grad_norm": 0.07411212474107742, + "learning_rate": 3.83198263497514e-07, + "loss": 0.001, + "step": 276630 + }, + { + "epoch": 1.7742830296985885, + "grad_norm": 0.08615033328533173, + "learning_rate": 3.829834030113766e-07, + "loss": 0.0008, + "step": 276640 + }, + { + "epoch": 1.7743471665923747, + "grad_norm": 0.03488355875015259, + "learning_rate": 3.8276860037975704e-07, + "loss": 0.001, + "step": 276650 + }, + { + "epoch": 1.7744113034861608, + "grad_norm": 0.06693438440561295, + "learning_rate": 3.825538556053476e-07, + "loss": 0.0012, + "step": 276660 + }, + { + "epoch": 1.774475440379947, + "grad_norm": 0.10357903689146042, + "learning_rate": 3.8233916869083845e-07, + "loss": 0.0009, + "step": 276670 + }, + { + "epoch": 1.7745395772737331, + "grad_norm": 0.21852293610572815, + "learning_rate": 3.8212453963891837e-07, + "loss": 0.0009, + "step": 276680 + }, + { + "epoch": 1.7746037141675193, + "grad_norm": 0.11806097626686096, + "learning_rate": 3.819099684522792e-07, + "loss": 0.0013, + "step": 276690 + }, + { + "epoch": 1.7746678510613052, + "grad_norm": 0.250988245010376, + "learning_rate": 3.816954551336088e-07, + "loss": 0.0019, + "step": 276700 + }, + { + "epoch": 1.7747319879550914, + "grad_norm": 0.04955058917403221, + "learning_rate": 3.8148099968559447e-07, + "loss": 0.0004, + "step": 276710 + }, + { + "epoch": 1.7747961248488775, + "grad_norm": 0.05638645961880684, + "learning_rate": 3.812666021109235e-07, + "loss": 0.0009, + "step": 276720 + }, + { + "epoch": 1.7748602617426634, + "grad_norm": 0.07330942153930664, + "learning_rate": 3.810522624122842e-07, + "loss": 0.0005, + "step": 276730 + }, + { + "epoch": 1.7749243986364496, + "grad_norm": 0.05153987556695938, + "learning_rate": 3.808379805923607e-07, + "loss": 0.0009, + "step": 276740 + }, + { + "epoch": 1.7749885355302357, + "grad_norm": 0.09960044920444489, + "learning_rate": 3.806237566538379e-07, + "loss": 0.0019, + "step": 276750 + }, + { + "epoch": 1.7750526724240219, + "grad_norm": 0.030330004170536995, + "learning_rate": 3.8040959059940154e-07, + "loss": 0.0019, + "step": 276760 + }, + { + "epoch": 1.775116809317808, + "grad_norm": 0.08835256099700928, + "learning_rate": 3.8019548243173444e-07, + "loss": 0.0006, + "step": 276770 + }, + { + "epoch": 1.7751809462115942, + "grad_norm": 0.02172674797475338, + "learning_rate": 3.7998143215352e-07, + "loss": 0.0009, + "step": 276780 + }, + { + "epoch": 1.77524508310538, + "grad_norm": 0.3368646800518036, + "learning_rate": 3.797674397674389e-07, + "loss": 0.002, + "step": 276790 + }, + { + "epoch": 1.7753092199991662, + "grad_norm": 0.039189331233501434, + "learning_rate": 3.795535052761751e-07, + "loss": 0.0012, + "step": 276800 + }, + { + "epoch": 1.7753733568929522, + "grad_norm": 0.1254708170890808, + "learning_rate": 3.7933962868240805e-07, + "loss": 0.0012, + "step": 276810 + }, + { + "epoch": 1.7754374937867383, + "grad_norm": 0.03662301227450371, + "learning_rate": 3.7912580998881734e-07, + "loss": 0.0013, + "step": 276820 + }, + { + "epoch": 1.7755016306805245, + "grad_norm": 0.08107379078865051, + "learning_rate": 3.7891204919808243e-07, + "loss": 0.0009, + "step": 276830 + }, + { + "epoch": 1.7755657675743106, + "grad_norm": 0.012480441480875015, + "learning_rate": 3.786983463128824e-07, + "loss": 0.0014, + "step": 276840 + }, + { + "epoch": 1.7756299044680968, + "grad_norm": 0.09728964418172836, + "learning_rate": 3.7848470133589557e-07, + "loss": 0.0014, + "step": 276850 + }, + { + "epoch": 1.775694041361883, + "grad_norm": 0.1525079309940338, + "learning_rate": 3.7827111426979755e-07, + "loss": 0.001, + "step": 276860 + }, + { + "epoch": 1.775758178255669, + "grad_norm": 0.09387625008821487, + "learning_rate": 3.780575851172652e-07, + "loss": 0.0014, + "step": 276870 + }, + { + "epoch": 1.775822315149455, + "grad_norm": 0.08427508920431137, + "learning_rate": 3.7784411388097577e-07, + "loss": 0.0007, + "step": 276880 + }, + { + "epoch": 1.7758864520432411, + "grad_norm": 0.07875356823205948, + "learning_rate": 3.776307005636026e-07, + "loss": 0.0007, + "step": 276890 + }, + { + "epoch": 1.775950588937027, + "grad_norm": 0.04849980026483536, + "learning_rate": 3.7741734516781925e-07, + "loss": 0.0007, + "step": 276900 + }, + { + "epoch": 1.7760147258308132, + "grad_norm": 0.050537627190351486, + "learning_rate": 3.7720404769630124e-07, + "loss": 0.0015, + "step": 276910 + }, + { + "epoch": 1.7760788627245994, + "grad_norm": 0.25381091237068176, + "learning_rate": 3.76990808151721e-07, + "loss": 0.0006, + "step": 276920 + }, + { + "epoch": 1.7761429996183855, + "grad_norm": 0.022908836603164673, + "learning_rate": 3.76777626536749e-07, + "loss": 0.0007, + "step": 276930 + }, + { + "epoch": 1.7762071365121717, + "grad_norm": 0.030104728415608406, + "learning_rate": 3.765645028540571e-07, + "loss": 0.0007, + "step": 276940 + }, + { + "epoch": 1.7762712734059578, + "grad_norm": 0.03946729749441147, + "learning_rate": 3.7635143710631706e-07, + "loss": 0.0018, + "step": 276950 + }, + { + "epoch": 1.7763354102997437, + "grad_norm": 0.08555560559034348, + "learning_rate": 3.7613842929619837e-07, + "loss": 0.0008, + "step": 276960 + }, + { + "epoch": 1.7763995471935299, + "grad_norm": 0.046086009591817856, + "learning_rate": 3.7592547942636946e-07, + "loss": 0.001, + "step": 276970 + }, + { + "epoch": 1.7764636840873158, + "grad_norm": 0.06498829275369644, + "learning_rate": 3.757125874994982e-07, + "loss": 0.0011, + "step": 276980 + }, + { + "epoch": 1.776527820981102, + "grad_norm": 0.04270976409316063, + "learning_rate": 3.754997535182542e-07, + "loss": 0.0018, + "step": 276990 + }, + { + "epoch": 1.776591957874888, + "grad_norm": 0.09135537594556808, + "learning_rate": 3.75286977485303e-07, + "loss": 0.0012, + "step": 277000 + }, + { + "epoch": 1.7766560947686743, + "grad_norm": 0.030918428674340248, + "learning_rate": 3.750742594033113e-07, + "loss": 0.0005, + "step": 277010 + }, + { + "epoch": 1.7767202316624604, + "grad_norm": 0.07951313257217407, + "learning_rate": 3.748615992749438e-07, + "loss": 0.0008, + "step": 277020 + }, + { + "epoch": 1.7767843685562466, + "grad_norm": 0.018732817843556404, + "learning_rate": 3.7464899710286717e-07, + "loss": 0.0005, + "step": 277030 + }, + { + "epoch": 1.7768485054500327, + "grad_norm": 0.2585754990577698, + "learning_rate": 3.7443645288974374e-07, + "loss": 0.0009, + "step": 277040 + }, + { + "epoch": 1.7769126423438186, + "grad_norm": 0.0019640575628727674, + "learning_rate": 3.742239666382369e-07, + "loss": 0.0008, + "step": 277050 + }, + { + "epoch": 1.7769767792376048, + "grad_norm": 0.11195459216833115, + "learning_rate": 3.740115383510107e-07, + "loss": 0.0008, + "step": 277060 + }, + { + "epoch": 1.7770409161313907, + "grad_norm": 0.03852207213640213, + "learning_rate": 3.7379916803072567e-07, + "loss": 0.0008, + "step": 277070 + }, + { + "epoch": 1.7771050530251769, + "grad_norm": 0.09934749454259872, + "learning_rate": 3.735868556800432e-07, + "loss": 0.0007, + "step": 277080 + }, + { + "epoch": 1.777169189918963, + "grad_norm": 0.03736523538827896, + "learning_rate": 3.7337460130162375e-07, + "loss": 0.0008, + "step": 277090 + }, + { + "epoch": 1.7772333268127491, + "grad_norm": 0.0296183992177248, + "learning_rate": 3.731624048981275e-07, + "loss": 0.0008, + "step": 277100 + }, + { + "epoch": 1.7772974637065353, + "grad_norm": 0.03917883336544037, + "learning_rate": 3.729502664722129e-07, + "loss": 0.001, + "step": 277110 + }, + { + "epoch": 1.7773616006003214, + "grad_norm": 0.13903005421161652, + "learning_rate": 3.727381860265389e-07, + "loss": 0.0012, + "step": 277120 + }, + { + "epoch": 1.7774257374941074, + "grad_norm": 0.12451034784317017, + "learning_rate": 3.7252616356376117e-07, + "loss": 0.0005, + "step": 277130 + }, + { + "epoch": 1.7774898743878935, + "grad_norm": 0.0074807340279221535, + "learning_rate": 3.723141990865392e-07, + "loss": 0.0007, + "step": 277140 + }, + { + "epoch": 1.7775540112816797, + "grad_norm": 0.1273653656244278, + "learning_rate": 3.7210229259752706e-07, + "loss": 0.0011, + "step": 277150 + }, + { + "epoch": 1.7776181481754656, + "grad_norm": 0.04760697856545448, + "learning_rate": 3.718904440993809e-07, + "loss": 0.0022, + "step": 277160 + }, + { + "epoch": 1.7776822850692517, + "grad_norm": 0.14566820859909058, + "learning_rate": 3.716786535947542e-07, + "loss": 0.001, + "step": 277170 + }, + { + "epoch": 1.777746421963038, + "grad_norm": 0.045055124908685684, + "learning_rate": 3.714669210863031e-07, + "loss": 0.0007, + "step": 277180 + }, + { + "epoch": 1.777810558856824, + "grad_norm": 0.013329172506928444, + "learning_rate": 3.712552465766789e-07, + "loss": 0.0013, + "step": 277190 + }, + { + "epoch": 1.7778746957506102, + "grad_norm": 0.05233705788850784, + "learning_rate": 3.710436300685338e-07, + "loss": 0.0011, + "step": 277200 + }, + { + "epoch": 1.7779388326443963, + "grad_norm": 0.08523570001125336, + "learning_rate": 3.708320715645214e-07, + "loss": 0.0016, + "step": 277210 + }, + { + "epoch": 1.7780029695381823, + "grad_norm": 0.054902657866477966, + "learning_rate": 3.7062057106729166e-07, + "loss": 0.0014, + "step": 277220 + }, + { + "epoch": 1.7780671064319684, + "grad_norm": 0.004034021403640509, + "learning_rate": 3.7040912857949474e-07, + "loss": 0.0015, + "step": 277230 + }, + { + "epoch": 1.7781312433257543, + "grad_norm": 0.020426299422979355, + "learning_rate": 3.7019774410377905e-07, + "loss": 0.0011, + "step": 277240 + }, + { + "epoch": 1.7781953802195405, + "grad_norm": 0.272157222032547, + "learning_rate": 3.699864176427953e-07, + "loss": 0.0012, + "step": 277250 + }, + { + "epoch": 1.7782595171133266, + "grad_norm": 0.02593575045466423, + "learning_rate": 3.6977514919919075e-07, + "loss": 0.0007, + "step": 277260 + }, + { + "epoch": 1.7783236540071128, + "grad_norm": 0.15078026056289673, + "learning_rate": 3.695639387756128e-07, + "loss": 0.0007, + "step": 277270 + }, + { + "epoch": 1.778387790900899, + "grad_norm": 0.08224067836999893, + "learning_rate": 3.6935278637470705e-07, + "loss": 0.0007, + "step": 277280 + }, + { + "epoch": 1.778451927794685, + "grad_norm": 0.02865617163479328, + "learning_rate": 3.6914169199912086e-07, + "loss": 0.0022, + "step": 277290 + }, + { + "epoch": 1.7785160646884712, + "grad_norm": 0.006676721386611462, + "learning_rate": 3.689306556514993e-07, + "loss": 0.001, + "step": 277300 + }, + { + "epoch": 1.7785802015822572, + "grad_norm": 0.10433896631002426, + "learning_rate": 3.6871967733448646e-07, + "loss": 0.0009, + "step": 277310 + }, + { + "epoch": 1.7786443384760433, + "grad_norm": 0.019970644265413284, + "learning_rate": 3.685087570507251e-07, + "loss": 0.0018, + "step": 277320 + }, + { + "epoch": 1.7787084753698292, + "grad_norm": 0.058814890682697296, + "learning_rate": 3.6829789480285983e-07, + "loss": 0.0013, + "step": 277330 + }, + { + "epoch": 1.7787726122636154, + "grad_norm": 0.024515783414244652, + "learning_rate": 3.6808709059353244e-07, + "loss": 0.0005, + "step": 277340 + }, + { + "epoch": 1.7788367491574015, + "grad_norm": 0.08331423252820969, + "learning_rate": 3.6787634442538414e-07, + "loss": 0.0013, + "step": 277350 + }, + { + "epoch": 1.7789008860511877, + "grad_norm": 0.07859785109758377, + "learning_rate": 3.67665656301055e-07, + "loss": 0.0007, + "step": 277360 + }, + { + "epoch": 1.7789650229449738, + "grad_norm": 0.010937534272670746, + "learning_rate": 3.674550262231869e-07, + "loss": 0.0008, + "step": 277370 + }, + { + "epoch": 1.77902915983876, + "grad_norm": 0.1252000331878662, + "learning_rate": 3.672444541944176e-07, + "loss": 0.0009, + "step": 277380 + }, + { + "epoch": 1.779093296732546, + "grad_norm": 0.14219404757022858, + "learning_rate": 3.670339402173867e-07, + "loss": 0.0015, + "step": 277390 + }, + { + "epoch": 1.779157433626332, + "grad_norm": 0.1581021100282669, + "learning_rate": 3.6682348429473044e-07, + "loss": 0.0008, + "step": 277400 + }, + { + "epoch": 1.779221570520118, + "grad_norm": 0.16713345050811768, + "learning_rate": 3.6661308642908846e-07, + "loss": 0.0011, + "step": 277410 + }, + { + "epoch": 1.7792857074139041, + "grad_norm": 0.058367762714624405, + "learning_rate": 3.664027466230957e-07, + "loss": 0.001, + "step": 277420 + }, + { + "epoch": 1.7793498443076903, + "grad_norm": 0.012379063293337822, + "learning_rate": 3.66192464879388e-07, + "loss": 0.001, + "step": 277430 + }, + { + "epoch": 1.7794139812014764, + "grad_norm": 0.08468479663133621, + "learning_rate": 3.6598224120059977e-07, + "loss": 0.0012, + "step": 277440 + }, + { + "epoch": 1.7794781180952626, + "grad_norm": 0.1617065668106079, + "learning_rate": 3.657720755893668e-07, + "loss": 0.0017, + "step": 277450 + }, + { + "epoch": 1.7795422549890487, + "grad_norm": 0.03813833370804787, + "learning_rate": 3.655619680483213e-07, + "loss": 0.0021, + "step": 277460 + }, + { + "epoch": 1.7796063918828349, + "grad_norm": 0.18245063722133636, + "learning_rate": 3.6535191858009633e-07, + "loss": 0.0011, + "step": 277470 + }, + { + "epoch": 1.7796705287766208, + "grad_norm": 0.0410914272069931, + "learning_rate": 3.6514192718732467e-07, + "loss": 0.0038, + "step": 277480 + }, + { + "epoch": 1.779734665670407, + "grad_norm": 0.07085295766592026, + "learning_rate": 3.64931993872637e-07, + "loss": 0.0009, + "step": 277490 + }, + { + "epoch": 1.7797988025641929, + "grad_norm": 0.18684187531471252, + "learning_rate": 3.647221186386646e-07, + "loss": 0.0013, + "step": 277500 + }, + { + "epoch": 1.779862939457979, + "grad_norm": 0.13839027285575867, + "learning_rate": 3.6451230148803583e-07, + "loss": 0.0012, + "step": 277510 + }, + { + "epoch": 1.7799270763517652, + "grad_norm": 0.05822043865919113, + "learning_rate": 3.643025424233815e-07, + "loss": 0.0008, + "step": 277520 + }, + { + "epoch": 1.7799912132455513, + "grad_norm": 0.013622069731354713, + "learning_rate": 3.640928414473294e-07, + "loss": 0.0012, + "step": 277530 + }, + { + "epoch": 1.7800553501393375, + "grad_norm": 0.24358125030994415, + "learning_rate": 3.6388319856250744e-07, + "loss": 0.0007, + "step": 277540 + }, + { + "epoch": 1.7801194870331236, + "grad_norm": 0.11311017721891403, + "learning_rate": 3.636736137715413e-07, + "loss": 0.0009, + "step": 277550 + }, + { + "epoch": 1.7801836239269095, + "grad_norm": 0.037950318306684494, + "learning_rate": 3.6346408707705893e-07, + "loss": 0.0011, + "step": 277560 + }, + { + "epoch": 1.7802477608206957, + "grad_norm": 0.062418099492788315, + "learning_rate": 3.63254618481686e-07, + "loss": 0.0012, + "step": 277570 + }, + { + "epoch": 1.7803118977144818, + "grad_norm": 0.01619354449212551, + "learning_rate": 3.6304520798804587e-07, + "loss": 0.0008, + "step": 277580 + }, + { + "epoch": 1.7803760346082678, + "grad_norm": 0.030265532433986664, + "learning_rate": 3.6283585559876265e-07, + "loss": 0.0014, + "step": 277590 + }, + { + "epoch": 1.780440171502054, + "grad_norm": 0.15462017059326172, + "learning_rate": 3.626265613164609e-07, + "loss": 0.0008, + "step": 277600 + }, + { + "epoch": 1.78050430839584, + "grad_norm": 0.0385175496339798, + "learning_rate": 3.6241732514376295e-07, + "loss": 0.0007, + "step": 277610 + }, + { + "epoch": 1.7805684452896262, + "grad_norm": 0.08664373308420181, + "learning_rate": 3.6220814708328945e-07, + "loss": 0.0011, + "step": 277620 + }, + { + "epoch": 1.7806325821834124, + "grad_norm": 0.007938371039927006, + "learning_rate": 3.6199902713766334e-07, + "loss": 0.0006, + "step": 277630 + }, + { + "epoch": 1.7806967190771985, + "grad_norm": 0.1576695740222931, + "learning_rate": 3.617899653095042e-07, + "loss": 0.001, + "step": 277640 + }, + { + "epoch": 1.7807608559709844, + "grad_norm": 0.10778723657131195, + "learning_rate": 3.6158096160143096e-07, + "loss": 0.0018, + "step": 277650 + }, + { + "epoch": 1.7808249928647706, + "grad_norm": 0.03832042217254639, + "learning_rate": 3.613720160160633e-07, + "loss": 0.0016, + "step": 277660 + }, + { + "epoch": 1.7808891297585565, + "grad_norm": 0.14303508400917053, + "learning_rate": 3.611631285560202e-07, + "loss": 0.0022, + "step": 277670 + }, + { + "epoch": 1.7809532666523427, + "grad_norm": 0.05030818283557892, + "learning_rate": 3.609542992239179e-07, + "loss": 0.0011, + "step": 277680 + }, + { + "epoch": 1.7810174035461288, + "grad_norm": 0.15469315648078918, + "learning_rate": 3.607455280223743e-07, + "loss": 0.0024, + "step": 277690 + }, + { + "epoch": 1.781081540439915, + "grad_norm": 0.005536719225347042, + "learning_rate": 3.6053681495400337e-07, + "loss": 0.0017, + "step": 277700 + }, + { + "epoch": 1.781145677333701, + "grad_norm": 0.017671020701527596, + "learning_rate": 3.603281600214231e-07, + "loss": 0.001, + "step": 277710 + }, + { + "epoch": 1.7812098142274873, + "grad_norm": 0.22595839202404022, + "learning_rate": 3.601195632272475e-07, + "loss": 0.0017, + "step": 277720 + }, + { + "epoch": 1.7812739511212734, + "grad_norm": 0.1403406709432602, + "learning_rate": 3.5991102457408834e-07, + "loss": 0.0018, + "step": 277730 + }, + { + "epoch": 1.7813380880150593, + "grad_norm": 0.09702686965465546, + "learning_rate": 3.5970254406456126e-07, + "loss": 0.0011, + "step": 277740 + }, + { + "epoch": 1.7814022249088455, + "grad_norm": 0.10749652981758118, + "learning_rate": 3.594941217012776e-07, + "loss": 0.0007, + "step": 277750 + }, + { + "epoch": 1.7814663618026314, + "grad_norm": 0.054250333458185196, + "learning_rate": 3.592857574868491e-07, + "loss": 0.0007, + "step": 277760 + }, + { + "epoch": 1.7815304986964176, + "grad_norm": 0.04100211337208748, + "learning_rate": 3.5907745142388597e-07, + "loss": 0.0009, + "step": 277770 + }, + { + "epoch": 1.7815946355902037, + "grad_norm": 0.08517192304134369, + "learning_rate": 3.588692035149999e-07, + "loss": 0.0013, + "step": 277780 + }, + { + "epoch": 1.7816587724839898, + "grad_norm": 0.23140820860862732, + "learning_rate": 3.5866101376279995e-07, + "loss": 0.002, + "step": 277790 + }, + { + "epoch": 1.781722909377776, + "grad_norm": 0.023030906915664673, + "learning_rate": 3.5845288216989407e-07, + "loss": 0.0013, + "step": 277800 + }, + { + "epoch": 1.7817870462715621, + "grad_norm": 0.0510365292429924, + "learning_rate": 3.5824480873889064e-07, + "loss": 0.002, + "step": 277810 + }, + { + "epoch": 1.781851183165348, + "grad_norm": 0.018052220344543457, + "learning_rate": 3.5803679347239775e-07, + "loss": 0.001, + "step": 277820 + }, + { + "epoch": 1.7819153200591342, + "grad_norm": 0.06128367409110069, + "learning_rate": 3.578288363730215e-07, + "loss": 0.0011, + "step": 277830 + }, + { + "epoch": 1.7819794569529201, + "grad_norm": 0.0588102862238884, + "learning_rate": 3.5762093744336754e-07, + "loss": 0.0008, + "step": 277840 + }, + { + "epoch": 1.7820435938467063, + "grad_norm": 0.06334016472101212, + "learning_rate": 3.5741309668604007e-07, + "loss": 0.0014, + "step": 277850 + }, + { + "epoch": 1.7821077307404924, + "grad_norm": 0.06539490818977356, + "learning_rate": 3.5720531410364523e-07, + "loss": 0.0019, + "step": 277860 + }, + { + "epoch": 1.7821718676342786, + "grad_norm": 0.0030477459076792, + "learning_rate": 3.569975896987865e-07, + "loss": 0.0012, + "step": 277870 + }, + { + "epoch": 1.7822360045280647, + "grad_norm": 0.2873289883136749, + "learning_rate": 3.5678992347406517e-07, + "loss": 0.0009, + "step": 277880 + }, + { + "epoch": 1.782300141421851, + "grad_norm": 0.06125490739941597, + "learning_rate": 3.5658231543208523e-07, + "loss": 0.001, + "step": 277890 + }, + { + "epoch": 1.782364278315637, + "grad_norm": 0.001798335462808609, + "learning_rate": 3.563747655754479e-07, + "loss": 0.0008, + "step": 277900 + }, + { + "epoch": 1.782428415209423, + "grad_norm": 0.04958849400281906, + "learning_rate": 3.5616727390675343e-07, + "loss": 0.0014, + "step": 277910 + }, + { + "epoch": 1.7824925521032091, + "grad_norm": 0.00632866658270359, + "learning_rate": 3.5595984042860076e-07, + "loss": 0.0018, + "step": 277920 + }, + { + "epoch": 1.782556688996995, + "grad_norm": 0.048083748668432236, + "learning_rate": 3.557524651435912e-07, + "loss": 0.0034, + "step": 277930 + }, + { + "epoch": 1.7826208258907812, + "grad_norm": 0.14223527908325195, + "learning_rate": 3.555451480543226e-07, + "loss": 0.0009, + "step": 277940 + }, + { + "epoch": 1.7826849627845673, + "grad_norm": 0.007624818477779627, + "learning_rate": 3.553378891633924e-07, + "loss": 0.001, + "step": 277950 + }, + { + "epoch": 1.7827490996783535, + "grad_norm": 0.08095641434192657, + "learning_rate": 3.551306884733974e-07, + "loss": 0.0006, + "step": 277960 + }, + { + "epoch": 1.7828132365721396, + "grad_norm": 0.22837106883525848, + "learning_rate": 3.5492354598693547e-07, + "loss": 0.0018, + "step": 277970 + }, + { + "epoch": 1.7828773734659258, + "grad_norm": 0.2520412504673004, + "learning_rate": 3.5471646170660067e-07, + "loss": 0.0016, + "step": 277980 + }, + { + "epoch": 1.782941510359712, + "grad_norm": 0.05908200144767761, + "learning_rate": 3.545094356349893e-07, + "loss": 0.0013, + "step": 277990 + }, + { + "epoch": 1.7830056472534979, + "grad_norm": 0.06598391383886337, + "learning_rate": 3.543024677746937e-07, + "loss": 0.0008, + "step": 278000 + }, + { + "epoch": 1.783069784147284, + "grad_norm": 0.013274770230054855, + "learning_rate": 3.54095558128309e-07, + "loss": 0.0014, + "step": 278010 + }, + { + "epoch": 1.78313392104107, + "grad_norm": 0.15073542296886444, + "learning_rate": 3.5388870669842755e-07, + "loss": 0.0015, + "step": 278020 + }, + { + "epoch": 1.783198057934856, + "grad_norm": 0.01688840240240097, + "learning_rate": 3.5368191348764013e-07, + "loss": 0.0012, + "step": 278030 + }, + { + "epoch": 1.7832621948286422, + "grad_norm": 0.0731649175286293, + "learning_rate": 3.534751784985396e-07, + "loss": 0.0028, + "step": 278040 + }, + { + "epoch": 1.7833263317224284, + "grad_norm": 0.02688288502395153, + "learning_rate": 3.532685017337167e-07, + "loss": 0.0013, + "step": 278050 + }, + { + "epoch": 1.7833904686162145, + "grad_norm": 0.05125247314572334, + "learning_rate": 3.5306188319575886e-07, + "loss": 0.0007, + "step": 278060 + }, + { + "epoch": 1.7834546055100007, + "grad_norm": 0.17364509403705597, + "learning_rate": 3.5285532288725777e-07, + "loss": 0.0014, + "step": 278070 + }, + { + "epoch": 1.7835187424037866, + "grad_norm": 0.3780440390110016, + "learning_rate": 3.526488208108003e-07, + "loss": 0.001, + "step": 278080 + }, + { + "epoch": 1.7835828792975728, + "grad_norm": 0.012333092279732227, + "learning_rate": 3.52442376968975e-07, + "loss": 0.0007, + "step": 278090 + }, + { + "epoch": 1.7836470161913587, + "grad_norm": 0.0849262997508049, + "learning_rate": 3.5223599136436806e-07, + "loss": 0.0004, + "step": 278100 + }, + { + "epoch": 1.7837111530851448, + "grad_norm": 0.03807647153735161, + "learning_rate": 3.520296639995663e-07, + "loss": 0.0003, + "step": 278110 + }, + { + "epoch": 1.783775289978931, + "grad_norm": 0.033494483679533005, + "learning_rate": 3.5182339487715377e-07, + "loss": 0.0008, + "step": 278120 + }, + { + "epoch": 1.7838394268727171, + "grad_norm": 0.1657082736492157, + "learning_rate": 3.516171839997168e-07, + "loss": 0.0009, + "step": 278130 + }, + { + "epoch": 1.7839035637665033, + "grad_norm": 0.16438868641853333, + "learning_rate": 3.514110313698388e-07, + "loss": 0.0031, + "step": 278140 + }, + { + "epoch": 1.7839677006602894, + "grad_norm": 0.3702642321586609, + "learning_rate": 3.512049369901016e-07, + "loss": 0.0013, + "step": 278150 + }, + { + "epoch": 1.7840318375540756, + "grad_norm": 0.10200533270835876, + "learning_rate": 3.509989008630904e-07, + "loss": 0.0013, + "step": 278160 + }, + { + "epoch": 1.7840959744478615, + "grad_norm": 0.08121436089277267, + "learning_rate": 3.5079292299138535e-07, + "loss": 0.0009, + "step": 278170 + }, + { + "epoch": 1.7841601113416476, + "grad_norm": 0.044688694179058075, + "learning_rate": 3.505870033775677e-07, + "loss": 0.0009, + "step": 278180 + }, + { + "epoch": 1.7842242482354336, + "grad_norm": 0.038585562258958817, + "learning_rate": 3.5038114202421705e-07, + "loss": 0.0015, + "step": 278190 + }, + { + "epoch": 1.7842883851292197, + "grad_norm": 0.024512559175491333, + "learning_rate": 3.5017533893391466e-07, + "loss": 0.0005, + "step": 278200 + }, + { + "epoch": 1.7843525220230059, + "grad_norm": 0.039037760347127914, + "learning_rate": 3.4996959410923793e-07, + "loss": 0.0011, + "step": 278210 + }, + { + "epoch": 1.784416658916792, + "grad_norm": 0.05863253399729729, + "learning_rate": 3.497639075527659e-07, + "loss": 0.0018, + "step": 278220 + }, + { + "epoch": 1.7844807958105782, + "grad_norm": 0.006160229444503784, + "learning_rate": 3.4955827926707485e-07, + "loss": 0.0006, + "step": 278230 + }, + { + "epoch": 1.7845449327043643, + "grad_norm": 0.0604865700006485, + "learning_rate": 3.4935270925474274e-07, + "loss": 0.001, + "step": 278240 + }, + { + "epoch": 1.7846090695981502, + "grad_norm": 0.07260777056217194, + "learning_rate": 3.491471975183452e-07, + "loss": 0.0014, + "step": 278250 + }, + { + "epoch": 1.7846732064919364, + "grad_norm": 0.012632405385375023, + "learning_rate": 3.48941744060457e-07, + "loss": 0.0005, + "step": 278260 + }, + { + "epoch": 1.7847373433857225, + "grad_norm": 0.1146724745631218, + "learning_rate": 3.4873634888365206e-07, + "loss": 0.0009, + "step": 278270 + }, + { + "epoch": 1.7848014802795085, + "grad_norm": 0.1695510447025299, + "learning_rate": 3.4853101199050553e-07, + "loss": 0.0014, + "step": 278280 + }, + { + "epoch": 1.7848656171732946, + "grad_norm": 0.006848647724837065, + "learning_rate": 3.4832573338358986e-07, + "loss": 0.0006, + "step": 278290 + }, + { + "epoch": 1.7849297540670808, + "grad_norm": 0.0386105477809906, + "learning_rate": 3.4812051306547637e-07, + "loss": 0.0008, + "step": 278300 + }, + { + "epoch": 1.784993890960867, + "grad_norm": 0.0190505962818861, + "learning_rate": 3.479153510387384e-07, + "loss": 0.0009, + "step": 278310 + }, + { + "epoch": 1.785058027854653, + "grad_norm": 0.048402898013591766, + "learning_rate": 3.4771024730594627e-07, + "loss": 0.001, + "step": 278320 + }, + { + "epoch": 1.7851221647484392, + "grad_norm": 0.025702862069010735, + "learning_rate": 3.47505201869669e-07, + "loss": 0.0007, + "step": 278330 + }, + { + "epoch": 1.7851863016422251, + "grad_norm": 0.03252828121185303, + "learning_rate": 3.473002147324761e-07, + "loss": 0.0012, + "step": 278340 + }, + { + "epoch": 1.7852504385360113, + "grad_norm": 0.02330513298511505, + "learning_rate": 3.470952858969379e-07, + "loss": 0.0007, + "step": 278350 + }, + { + "epoch": 1.7853145754297972, + "grad_norm": 0.023918023332953453, + "learning_rate": 3.4689041536562053e-07, + "loss": 0.0007, + "step": 278360 + }, + { + "epoch": 1.7853787123235834, + "grad_norm": 0.06483355164527893, + "learning_rate": 3.466856031410926e-07, + "loss": 0.0024, + "step": 278370 + }, + { + "epoch": 1.7854428492173695, + "grad_norm": 0.04346863552927971, + "learning_rate": 3.4648084922591806e-07, + "loss": 0.0007, + "step": 278380 + }, + { + "epoch": 1.7855069861111557, + "grad_norm": 0.06587924808263779, + "learning_rate": 3.4627615362266554e-07, + "loss": 0.0012, + "step": 278390 + }, + { + "epoch": 1.7855711230049418, + "grad_norm": 0.05727564916014671, + "learning_rate": 3.4607151633389903e-07, + "loss": 0.0012, + "step": 278400 + }, + { + "epoch": 1.785635259898728, + "grad_norm": 0.2357838749885559, + "learning_rate": 3.4586693736218204e-07, + "loss": 0.0016, + "step": 278410 + }, + { + "epoch": 1.785699396792514, + "grad_norm": 0.002354859374463558, + "learning_rate": 3.4566241671007806e-07, + "loss": 0.0005, + "step": 278420 + }, + { + "epoch": 1.7857635336863, + "grad_norm": 0.3780546188354492, + "learning_rate": 3.4545795438015115e-07, + "loss": 0.0043, + "step": 278430 + }, + { + "epoch": 1.7858276705800862, + "grad_norm": 0.04920445755124092, + "learning_rate": 3.452535503749621e-07, + "loss": 0.0016, + "step": 278440 + }, + { + "epoch": 1.785891807473872, + "grad_norm": 0.13359010219573975, + "learning_rate": 3.450492046970727e-07, + "loss": 0.0005, + "step": 278450 + }, + { + "epoch": 1.7859559443676583, + "grad_norm": 0.024329397827386856, + "learning_rate": 3.4484491734904423e-07, + "loss": 0.0012, + "step": 278460 + }, + { + "epoch": 1.7860200812614444, + "grad_norm": 0.03433268144726753, + "learning_rate": 3.446406883334358e-07, + "loss": 0.0042, + "step": 278470 + }, + { + "epoch": 1.7860842181552306, + "grad_norm": 0.06557345390319824, + "learning_rate": 3.4443651765280697e-07, + "loss": 0.0007, + "step": 278480 + }, + { + "epoch": 1.7861483550490167, + "grad_norm": 0.0781419649720192, + "learning_rate": 3.442324053097146e-07, + "loss": 0.0011, + "step": 278490 + }, + { + "epoch": 1.7862124919428028, + "grad_norm": 0.009028265252709389, + "learning_rate": 3.440283513067183e-07, + "loss": 0.0011, + "step": 278500 + }, + { + "epoch": 1.7862766288365888, + "grad_norm": 0.04159869998693466, + "learning_rate": 3.4382435564637496e-07, + "loss": 0.0014, + "step": 278510 + }, + { + "epoch": 1.786340765730375, + "grad_norm": 0.06454797834157944, + "learning_rate": 3.4362041833123973e-07, + "loss": 0.0005, + "step": 278520 + }, + { + "epoch": 1.7864049026241609, + "grad_norm": 0.04843491315841675, + "learning_rate": 3.4341653936386776e-07, + "loss": 0.0012, + "step": 278530 + }, + { + "epoch": 1.786469039517947, + "grad_norm": 0.017590438947081566, + "learning_rate": 3.4321271874681595e-07, + "loss": 0.0009, + "step": 278540 + }, + { + "epoch": 1.7865331764117331, + "grad_norm": 0.08637532591819763, + "learning_rate": 3.430089564826361e-07, + "loss": 0.0009, + "step": 278550 + }, + { + "epoch": 1.7865973133055193, + "grad_norm": 0.09738405793905258, + "learning_rate": 3.4280525257388284e-07, + "loss": 0.0012, + "step": 278560 + }, + { + "epoch": 1.7866614501993054, + "grad_norm": 0.005397073924541473, + "learning_rate": 3.4260160702310697e-07, + "loss": 0.0009, + "step": 278570 + }, + { + "epoch": 1.7867255870930916, + "grad_norm": 0.0016158544458448887, + "learning_rate": 3.4239801983286305e-07, + "loss": 0.0006, + "step": 278580 + }, + { + "epoch": 1.7867897239868777, + "grad_norm": 0.004870242904871702, + "learning_rate": 3.421944910057001e-07, + "loss": 0.0005, + "step": 278590 + }, + { + "epoch": 1.7868538608806637, + "grad_norm": 0.07605539262294769, + "learning_rate": 3.4199102054416843e-07, + "loss": 0.0014, + "step": 278600 + }, + { + "epoch": 1.7869179977744498, + "grad_norm": 0.023373080417513847, + "learning_rate": 3.4178760845081925e-07, + "loss": 0.0038, + "step": 278610 + }, + { + "epoch": 1.7869821346682357, + "grad_norm": 0.12973308563232422, + "learning_rate": 3.415842547282e-07, + "loss": 0.0012, + "step": 278620 + }, + { + "epoch": 1.787046271562022, + "grad_norm": 0.07479102164506912, + "learning_rate": 3.413809593788597e-07, + "loss": 0.0015, + "step": 278630 + }, + { + "epoch": 1.787110408455808, + "grad_norm": 0.009775402024388313, + "learning_rate": 3.4117772240534474e-07, + "loss": 0.0008, + "step": 278640 + }, + { + "epoch": 1.7871745453495942, + "grad_norm": 0.17945508658885956, + "learning_rate": 3.4097454381020356e-07, + "loss": 0.0009, + "step": 278650 + }, + { + "epoch": 1.7872386822433803, + "grad_norm": 0.20023804903030396, + "learning_rate": 3.4077142359598024e-07, + "loss": 0.0011, + "step": 278660 + }, + { + "epoch": 1.7873028191371665, + "grad_norm": 0.09040382504463196, + "learning_rate": 3.4056836176522166e-07, + "loss": 0.0008, + "step": 278670 + }, + { + "epoch": 1.7873669560309524, + "grad_norm": 0.041846659034490585, + "learning_rate": 3.403653583204708e-07, + "loss": 0.0009, + "step": 278680 + }, + { + "epoch": 1.7874310929247386, + "grad_norm": 0.04396964609622955, + "learning_rate": 3.4016241326427223e-07, + "loss": 0.0012, + "step": 278690 + }, + { + "epoch": 1.7874952298185247, + "grad_norm": 0.053148720413446426, + "learning_rate": 3.399595265991695e-07, + "loss": 0.0012, + "step": 278700 + }, + { + "epoch": 1.7875593667123106, + "grad_norm": 0.053744640201330185, + "learning_rate": 3.397566983277045e-07, + "loss": 0.0013, + "step": 278710 + }, + { + "epoch": 1.7876235036060968, + "grad_norm": 0.1659751683473587, + "learning_rate": 3.39553928452418e-07, + "loss": 0.001, + "step": 278720 + }, + { + "epoch": 1.787687640499883, + "grad_norm": 0.12354079633951187, + "learning_rate": 3.393512169758517e-07, + "loss": 0.0008, + "step": 278730 + }, + { + "epoch": 1.787751777393669, + "grad_norm": 0.06363257020711899, + "learning_rate": 3.3914856390054594e-07, + "loss": 0.001, + "step": 278740 + }, + { + "epoch": 1.7878159142874552, + "grad_norm": 0.05239519476890564, + "learning_rate": 3.3894596922903923e-07, + "loss": 0.0008, + "step": 278750 + }, + { + "epoch": 1.7878800511812414, + "grad_norm": 0.035413049161434174, + "learning_rate": 3.387434329638711e-07, + "loss": 0.0033, + "step": 278760 + }, + { + "epoch": 1.7879441880750273, + "grad_norm": 0.08839958161115646, + "learning_rate": 3.385409551075791e-07, + "loss": 0.0009, + "step": 278770 + }, + { + "epoch": 1.7880083249688135, + "grad_norm": 0.021664628759026527, + "learning_rate": 3.383385356627006e-07, + "loss": 0.0213, + "step": 278780 + }, + { + "epoch": 1.7880724618625994, + "grad_norm": 0.07869093120098114, + "learning_rate": 3.381361746317713e-07, + "loss": 0.002, + "step": 278790 + }, + { + "epoch": 1.7881365987563855, + "grad_norm": 0.07586157321929932, + "learning_rate": 3.379338720173275e-07, + "loss": 0.0009, + "step": 278800 + }, + { + "epoch": 1.7882007356501717, + "grad_norm": 0.10144965350627899, + "learning_rate": 3.3773162782190505e-07, + "loss": 0.0009, + "step": 278810 + }, + { + "epoch": 1.7882648725439578, + "grad_norm": 0.1888962835073471, + "learning_rate": 3.3752944204803626e-07, + "loss": 0.0011, + "step": 278820 + }, + { + "epoch": 1.788329009437744, + "grad_norm": 0.002420370001345873, + "learning_rate": 3.373273146982564e-07, + "loss": 0.0013, + "step": 278830 + }, + { + "epoch": 1.7883931463315301, + "grad_norm": 0.0930965393781662, + "learning_rate": 3.371252457750973e-07, + "loss": 0.0015, + "step": 278840 + }, + { + "epoch": 1.7884572832253163, + "grad_norm": 0.08777619153261185, + "learning_rate": 3.369232352810919e-07, + "loss": 0.0014, + "step": 278850 + }, + { + "epoch": 1.7885214201191022, + "grad_norm": 0.10435951501131058, + "learning_rate": 3.3672128321877104e-07, + "loss": 0.0011, + "step": 278860 + }, + { + "epoch": 1.7885855570128883, + "grad_norm": 0.013379015028476715, + "learning_rate": 3.365193895906649e-07, + "loss": 0.0014, + "step": 278870 + }, + { + "epoch": 1.7886496939066743, + "grad_norm": 0.09842795878648758, + "learning_rate": 3.3631755439930415e-07, + "loss": 0.0019, + "step": 278880 + }, + { + "epoch": 1.7887138308004604, + "grad_norm": 0.04331498220562935, + "learning_rate": 3.3611577764721735e-07, + "loss": 0.001, + "step": 278890 + }, + { + "epoch": 1.7887779676942466, + "grad_norm": 0.14871858060359955, + "learning_rate": 3.359140593369331e-07, + "loss": 0.0009, + "step": 278900 + }, + { + "epoch": 1.7888421045880327, + "grad_norm": 0.0670400857925415, + "learning_rate": 3.357123994709782e-07, + "loss": 0.0012, + "step": 278910 + }, + { + "epoch": 1.7889062414818189, + "grad_norm": 0.12930205464363098, + "learning_rate": 3.3551079805188183e-07, + "loss": 0.0007, + "step": 278920 + }, + { + "epoch": 1.788970378375605, + "grad_norm": 0.025455178692936897, + "learning_rate": 3.353092550821685e-07, + "loss": 0.001, + "step": 278930 + }, + { + "epoch": 1.789034515269391, + "grad_norm": 0.05392056703567505, + "learning_rate": 3.351077705643641e-07, + "loss": 0.001, + "step": 278940 + }, + { + "epoch": 1.789098652163177, + "grad_norm": 0.05469081178307533, + "learning_rate": 3.3490634450099203e-07, + "loss": 0.0007, + "step": 278950 + }, + { + "epoch": 1.789162789056963, + "grad_norm": 0.03116176836192608, + "learning_rate": 3.347049768945787e-07, + "loss": 0.001, + "step": 278960 + }, + { + "epoch": 1.7892269259507492, + "grad_norm": 0.08433020114898682, + "learning_rate": 3.3450366774764655e-07, + "loss": 0.002, + "step": 278970 + }, + { + "epoch": 1.7892910628445353, + "grad_norm": 0.07080574333667755, + "learning_rate": 3.343024170627174e-07, + "loss": 0.0012, + "step": 278980 + }, + { + "epoch": 1.7893551997383215, + "grad_norm": 0.2883979082107544, + "learning_rate": 3.3410122484231254e-07, + "loss": 0.0021, + "step": 278990 + }, + { + "epoch": 1.7894193366321076, + "grad_norm": 0.4545295536518097, + "learning_rate": 3.3390009108895503e-07, + "loss": 0.0072, + "step": 279000 + }, + { + "epoch": 1.7894834735258938, + "grad_norm": 0.20828689634799957, + "learning_rate": 3.336990158051645e-07, + "loss": 0.0018, + "step": 279010 + }, + { + "epoch": 1.78954761041968, + "grad_norm": 0.10651051998138428, + "learning_rate": 3.334979989934589e-07, + "loss": 0.0008, + "step": 279020 + }, + { + "epoch": 1.7896117473134658, + "grad_norm": 0.06715662032365799, + "learning_rate": 3.332970406563596e-07, + "loss": 0.0008, + "step": 279030 + }, + { + "epoch": 1.789675884207252, + "grad_norm": 0.05971822887659073, + "learning_rate": 3.330961407963834e-07, + "loss": 0.0011, + "step": 279040 + }, + { + "epoch": 1.789740021101038, + "grad_norm": 0.04333239048719406, + "learning_rate": 3.3289529941604836e-07, + "loss": 0.0006, + "step": 279050 + }, + { + "epoch": 1.789804157994824, + "grad_norm": 0.3848743736743927, + "learning_rate": 3.3269451651786966e-07, + "loss": 0.0014, + "step": 279060 + }, + { + "epoch": 1.7898682948886102, + "grad_norm": 0.0060179815627634525, + "learning_rate": 3.324937921043653e-07, + "loss": 0.0007, + "step": 279070 + }, + { + "epoch": 1.7899324317823964, + "grad_norm": 0.08131309598684311, + "learning_rate": 3.322931261780493e-07, + "loss": 0.0008, + "step": 279080 + }, + { + "epoch": 1.7899965686761825, + "grad_norm": 0.16063711047172546, + "learning_rate": 3.3209251874143645e-07, + "loss": 0.0005, + "step": 279090 + }, + { + "epoch": 1.7900607055699687, + "grad_norm": 0.11836498230695724, + "learning_rate": 3.318919697970396e-07, + "loss": 0.0018, + "step": 279100 + }, + { + "epoch": 1.7901248424637546, + "grad_norm": 0.012969082221388817, + "learning_rate": 3.3169147934737355e-07, + "loss": 0.0018, + "step": 279110 + }, + { + "epoch": 1.7901889793575407, + "grad_norm": 0.13354268670082092, + "learning_rate": 3.3149104739494954e-07, + "loss": 0.0009, + "step": 279120 + }, + { + "epoch": 1.7902531162513269, + "grad_norm": 0.023537177592515945, + "learning_rate": 3.312906739422794e-07, + "loss": 0.0004, + "step": 279130 + }, + { + "epoch": 1.7903172531451128, + "grad_norm": 0.018479079008102417, + "learning_rate": 3.310903589918729e-07, + "loss": 0.0012, + "step": 279140 + }, + { + "epoch": 1.790381390038899, + "grad_norm": 0.07216761261224747, + "learning_rate": 3.308901025462419e-07, + "loss": 0.0009, + "step": 279150 + }, + { + "epoch": 1.790445526932685, + "grad_norm": 0.012507503852248192, + "learning_rate": 3.306899046078949e-07, + "loss": 0.0005, + "step": 279160 + }, + { + "epoch": 1.7905096638264713, + "grad_norm": 0.05727556720376015, + "learning_rate": 3.3048976517933984e-07, + "loss": 0.0012, + "step": 279170 + }, + { + "epoch": 1.7905738007202574, + "grad_norm": 0.06608857214450836, + "learning_rate": 3.302896842630854e-07, + "loss": 0.0014, + "step": 279180 + }, + { + "epoch": 1.7906379376140436, + "grad_norm": 0.09351908415555954, + "learning_rate": 3.300896618616395e-07, + "loss": 0.0006, + "step": 279190 + }, + { + "epoch": 1.7907020745078295, + "grad_norm": 0.09092344343662262, + "learning_rate": 3.298896979775068e-07, + "loss": 0.0011, + "step": 279200 + }, + { + "epoch": 1.7907662114016156, + "grad_norm": 0.13009698688983917, + "learning_rate": 3.296897926131937e-07, + "loss": 0.0008, + "step": 279210 + }, + { + "epoch": 1.7908303482954016, + "grad_norm": 0.05622050166130066, + "learning_rate": 3.2948994577120585e-07, + "loss": 0.001, + "step": 279220 + }, + { + "epoch": 1.7908944851891877, + "grad_norm": 0.03919670730829239, + "learning_rate": 3.292901574540469e-07, + "loss": 0.0005, + "step": 279230 + }, + { + "epoch": 1.7909586220829739, + "grad_norm": 0.08288165181875229, + "learning_rate": 3.2909042766422037e-07, + "loss": 0.0007, + "step": 279240 + }, + { + "epoch": 1.79102275897676, + "grad_norm": 0.09894700348377228, + "learning_rate": 3.2889075640422876e-07, + "loss": 0.0013, + "step": 279250 + }, + { + "epoch": 1.7910868958705461, + "grad_norm": 0.14007550477981567, + "learning_rate": 3.2869114367657437e-07, + "loss": 0.0012, + "step": 279260 + }, + { + "epoch": 1.7911510327643323, + "grad_norm": 0.07528212666511536, + "learning_rate": 3.284915894837587e-07, + "loss": 0.0007, + "step": 279270 + }, + { + "epoch": 1.7912151696581184, + "grad_norm": 0.036512792110443115, + "learning_rate": 3.282920938282824e-07, + "loss": 0.0007, + "step": 279280 + }, + { + "epoch": 1.7912793065519044, + "grad_norm": 0.08713310956954956, + "learning_rate": 3.280926567126441e-07, + "loss": 0.0007, + "step": 279290 + }, + { + "epoch": 1.7913434434456905, + "grad_norm": 0.03951220214366913, + "learning_rate": 3.278932781393446e-07, + "loss": 0.0008, + "step": 279300 + }, + { + "epoch": 1.7914075803394764, + "grad_norm": 0.2905653715133667, + "learning_rate": 3.276939581108812e-07, + "loss": 0.0018, + "step": 279310 + }, + { + "epoch": 1.7914717172332626, + "grad_norm": 0.030956579372286797, + "learning_rate": 3.2749469662975096e-07, + "loss": 0.0009, + "step": 279320 + }, + { + "epoch": 1.7915358541270487, + "grad_norm": 0.11684050410985947, + "learning_rate": 3.272954936984524e-07, + "loss": 0.0011, + "step": 279330 + }, + { + "epoch": 1.791599991020835, + "grad_norm": 0.307887464761734, + "learning_rate": 3.270963493194812e-07, + "loss": 0.0015, + "step": 279340 + }, + { + "epoch": 1.791664127914621, + "grad_norm": 0.049778252840042114, + "learning_rate": 3.2689726349533213e-07, + "loss": 0.0008, + "step": 279350 + }, + { + "epoch": 1.7917282648084072, + "grad_norm": 0.04836338013410568, + "learning_rate": 3.266982362284993e-07, + "loss": 0.0012, + "step": 279360 + }, + { + "epoch": 1.7917924017021931, + "grad_norm": 0.03864509239792824, + "learning_rate": 3.2649926752147843e-07, + "loss": 0.0011, + "step": 279370 + }, + { + "epoch": 1.7918565385959793, + "grad_norm": 0.004542256239801645, + "learning_rate": 3.2630035737676203e-07, + "loss": 0.0007, + "step": 279380 + }, + { + "epoch": 1.7919206754897652, + "grad_norm": 0.26342859864234924, + "learning_rate": 3.2610150579684195e-07, + "loss": 0.002, + "step": 279390 + }, + { + "epoch": 1.7919848123835513, + "grad_norm": 0.07165306806564331, + "learning_rate": 3.259027127842096e-07, + "loss": 0.002, + "step": 279400 + }, + { + "epoch": 1.7920489492773375, + "grad_norm": 0.09093828499317169, + "learning_rate": 3.257039783413579e-07, + "loss": 0.0008, + "step": 279410 + }, + { + "epoch": 1.7921130861711236, + "grad_norm": 0.019808035343885422, + "learning_rate": 3.2550530247077605e-07, + "loss": 0.0007, + "step": 279420 + }, + { + "epoch": 1.7921772230649098, + "grad_norm": 0.02690022811293602, + "learning_rate": 3.253066851749526e-07, + "loss": 0.0016, + "step": 279430 + }, + { + "epoch": 1.792241359958696, + "grad_norm": 0.04569307714700699, + "learning_rate": 3.2510812645637824e-07, + "loss": 0.0008, + "step": 279440 + }, + { + "epoch": 1.792305496852482, + "grad_norm": 0.21289020776748657, + "learning_rate": 3.2490962631754e-07, + "loss": 0.0016, + "step": 279450 + }, + { + "epoch": 1.792369633746268, + "grad_norm": 0.09713505208492279, + "learning_rate": 3.247111847609252e-07, + "loss": 0.0009, + "step": 279460 + }, + { + "epoch": 1.7924337706400542, + "grad_norm": 0.04144668206572533, + "learning_rate": 3.2451280178902033e-07, + "loss": 0.0003, + "step": 279470 + }, + { + "epoch": 1.79249790753384, + "grad_norm": 0.21650367975234985, + "learning_rate": 3.2431447740431224e-07, + "loss": 0.0025, + "step": 279480 + }, + { + "epoch": 1.7925620444276262, + "grad_norm": 0.04582495987415314, + "learning_rate": 3.2411621160928497e-07, + "loss": 0.001, + "step": 279490 + }, + { + "epoch": 1.7926261813214124, + "grad_norm": 0.11708521842956543, + "learning_rate": 3.2391800440642384e-07, + "loss": 0.001, + "step": 279500 + }, + { + "epoch": 1.7926903182151985, + "grad_norm": 0.016406027600169182, + "learning_rate": 3.237198557982113e-07, + "loss": 0.0011, + "step": 279510 + }, + { + "epoch": 1.7927544551089847, + "grad_norm": 0.0726175531744957, + "learning_rate": 3.2352176578713144e-07, + "loss": 0.0007, + "step": 279520 + }, + { + "epoch": 1.7928185920027708, + "grad_norm": 0.1482061743736267, + "learning_rate": 3.233237343756662e-07, + "loss": 0.0027, + "step": 279530 + }, + { + "epoch": 1.792882728896557, + "grad_norm": 0.0564088337123394, + "learning_rate": 3.2312576156629747e-07, + "loss": 0.0008, + "step": 279540 + }, + { + "epoch": 1.792946865790343, + "grad_norm": 0.09703831374645233, + "learning_rate": 3.229278473615044e-07, + "loss": 0.001, + "step": 279550 + }, + { + "epoch": 1.793011002684129, + "grad_norm": 0.03012095019221306, + "learning_rate": 3.227299917637683e-07, + "loss": 0.0014, + "step": 279560 + }, + { + "epoch": 1.793075139577915, + "grad_norm": 0.023541104048490524, + "learning_rate": 3.2253219477556886e-07, + "loss": 0.0008, + "step": 279570 + }, + { + "epoch": 1.7931392764717011, + "grad_norm": 0.059293895959854126, + "learning_rate": 3.22334456399383e-07, + "loss": 0.001, + "step": 279580 + }, + { + "epoch": 1.7932034133654873, + "grad_norm": 0.03531581535935402, + "learning_rate": 3.221367766376893e-07, + "loss": 0.0015, + "step": 279590 + }, + { + "epoch": 1.7932675502592734, + "grad_norm": 0.014433217234909534, + "learning_rate": 3.2193915549296575e-07, + "loss": 0.0011, + "step": 279600 + }, + { + "epoch": 1.7933316871530596, + "grad_norm": 0.030258553102612495, + "learning_rate": 3.2174159296768816e-07, + "loss": 0.0009, + "step": 279610 + }, + { + "epoch": 1.7933958240468457, + "grad_norm": 0.04009813070297241, + "learning_rate": 3.2154408906433174e-07, + "loss": 0.001, + "step": 279620 + }, + { + "epoch": 1.7934599609406316, + "grad_norm": 0.08422304689884186, + "learning_rate": 3.2134664378537075e-07, + "loss": 0.0014, + "step": 279630 + }, + { + "epoch": 1.7935240978344178, + "grad_norm": 0.02794818766415119, + "learning_rate": 3.2114925713328084e-07, + "loss": 0.0011, + "step": 279640 + }, + { + "epoch": 1.7935882347282037, + "grad_norm": 0.1486392766237259, + "learning_rate": 3.209519291105351e-07, + "loss": 0.0008, + "step": 279650 + }, + { + "epoch": 1.7936523716219899, + "grad_norm": 0.033261239528656006, + "learning_rate": 3.207546597196054e-07, + "loss": 0.0012, + "step": 279660 + }, + { + "epoch": 1.793716508515776, + "grad_norm": 0.06807536631822586, + "learning_rate": 3.2055744896296313e-07, + "loss": 0.0016, + "step": 279670 + }, + { + "epoch": 1.7937806454095622, + "grad_norm": 0.025683945044875145, + "learning_rate": 3.2036029684308133e-07, + "loss": 0.0007, + "step": 279680 + }, + { + "epoch": 1.7938447823033483, + "grad_norm": 0.012448211200535297, + "learning_rate": 3.201632033624297e-07, + "loss": 0.0017, + "step": 279690 + }, + { + "epoch": 1.7939089191971345, + "grad_norm": 0.00342944567091763, + "learning_rate": 3.1996616852347673e-07, + "loss": 0.001, + "step": 279700 + }, + { + "epoch": 1.7939730560909206, + "grad_norm": 0.007278515491634607, + "learning_rate": 3.197691923286933e-07, + "loss": 0.0014, + "step": 279710 + }, + { + "epoch": 1.7940371929847065, + "grad_norm": 0.05002468824386597, + "learning_rate": 3.195722747805463e-07, + "loss": 0.0005, + "step": 279720 + }, + { + "epoch": 1.7941013298784927, + "grad_norm": 0.03734104707837105, + "learning_rate": 3.193754158815043e-07, + "loss": 0.0011, + "step": 279730 + }, + { + "epoch": 1.7941654667722786, + "grad_norm": 0.06979603320360184, + "learning_rate": 3.191786156340326e-07, + "loss": 0.0003, + "step": 279740 + }, + { + "epoch": 1.7942296036660648, + "grad_norm": 0.0816522017121315, + "learning_rate": 3.189818740405992e-07, + "loss": 0.0009, + "step": 279750 + }, + { + "epoch": 1.794293740559851, + "grad_norm": 0.10368962585926056, + "learning_rate": 3.1878519110366824e-07, + "loss": 0.001, + "step": 279760 + }, + { + "epoch": 1.794357877453637, + "grad_norm": 0.0599248968064785, + "learning_rate": 3.185885668257044e-07, + "loss": 0.001, + "step": 279770 + }, + { + "epoch": 1.7944220143474232, + "grad_norm": 0.11840607225894928, + "learning_rate": 3.183920012091707e-07, + "loss": 0.0006, + "step": 279780 + }, + { + "epoch": 1.7944861512412094, + "grad_norm": 0.24062080681324005, + "learning_rate": 3.1819549425653186e-07, + "loss": 0.0024, + "step": 279790 + }, + { + "epoch": 1.7945502881349953, + "grad_norm": 0.08701277524232864, + "learning_rate": 3.1799904597024976e-07, + "loss": 0.0005, + "step": 279800 + }, + { + "epoch": 1.7946144250287814, + "grad_norm": 0.11865556240081787, + "learning_rate": 3.1780265635278583e-07, + "loss": 0.0005, + "step": 279810 + }, + { + "epoch": 1.7946785619225676, + "grad_norm": 0.10071233659982681, + "learning_rate": 3.1760632540659975e-07, + "loss": 0.0007, + "step": 279820 + }, + { + "epoch": 1.7947426988163535, + "grad_norm": 0.029392078518867493, + "learning_rate": 3.1741005313415394e-07, + "loss": 0.0007, + "step": 279830 + }, + { + "epoch": 1.7948068357101397, + "grad_norm": 0.08132757991552353, + "learning_rate": 3.172138395379065e-07, + "loss": 0.0018, + "step": 279840 + }, + { + "epoch": 1.7948709726039258, + "grad_norm": 0.1531228870153427, + "learning_rate": 3.1701768462031603e-07, + "loss": 0.0013, + "step": 279850 + }, + { + "epoch": 1.794935109497712, + "grad_norm": 0.07538601756095886, + "learning_rate": 3.168215883838416e-07, + "loss": 0.001, + "step": 279860 + }, + { + "epoch": 1.794999246391498, + "grad_norm": 0.0838082954287529, + "learning_rate": 3.1662555083093906e-07, + "loss": 0.0007, + "step": 279870 + }, + { + "epoch": 1.7950633832852843, + "grad_norm": 0.01654684729874134, + "learning_rate": 3.1642957196406586e-07, + "loss": 0.0011, + "step": 279880 + }, + { + "epoch": 1.7951275201790702, + "grad_norm": 0.04836370795965195, + "learning_rate": 3.1623365178567677e-07, + "loss": 0.0008, + "step": 279890 + }, + { + "epoch": 1.7951916570728563, + "grad_norm": 0.04304756596684456, + "learning_rate": 3.160377902982287e-07, + "loss": 0.0007, + "step": 279900 + }, + { + "epoch": 1.7952557939666423, + "grad_norm": 0.07920753955841064, + "learning_rate": 3.1584198750417403e-07, + "loss": 0.0012, + "step": 279910 + }, + { + "epoch": 1.7953199308604284, + "grad_norm": 0.07729573547840118, + "learning_rate": 3.1564624340596704e-07, + "loss": 0.0011, + "step": 279920 + }, + { + "epoch": 1.7953840677542146, + "grad_norm": 0.13962076604366302, + "learning_rate": 3.154505580060596e-07, + "loss": 0.0013, + "step": 279930 + }, + { + "epoch": 1.7954482046480007, + "grad_norm": 0.09074907004833221, + "learning_rate": 3.152549313069059e-07, + "loss": 0.0009, + "step": 279940 + }, + { + "epoch": 1.7955123415417868, + "grad_norm": 0.0053997826762497425, + "learning_rate": 3.1505936331095557e-07, + "loss": 0.0005, + "step": 279950 + }, + { + "epoch": 1.795576478435573, + "grad_norm": 0.24059101939201355, + "learning_rate": 3.1486385402066e-07, + "loss": 0.002, + "step": 279960 + }, + { + "epoch": 1.7956406153293591, + "grad_norm": 0.04343503713607788, + "learning_rate": 3.1466840343846785e-07, + "loss": 0.0014, + "step": 279970 + }, + { + "epoch": 1.795704752223145, + "grad_norm": 0.16977642476558685, + "learning_rate": 3.144730115668304e-07, + "loss": 0.0015, + "step": 279980 + }, + { + "epoch": 1.7957688891169312, + "grad_norm": 0.026483383029699326, + "learning_rate": 3.142776784081941e-07, + "loss": 0.0005, + "step": 279990 + }, + { + "epoch": 1.7958330260107171, + "grad_norm": 0.22658784687519073, + "learning_rate": 3.14082403965007e-07, + "loss": 0.0012, + "step": 280000 + }, + { + "epoch": 1.7958971629045033, + "grad_norm": 0.027925442904233932, + "learning_rate": 3.138871882397171e-07, + "loss": 0.0009, + "step": 280010 + }, + { + "epoch": 1.7959612997982894, + "grad_norm": 0.06454706937074661, + "learning_rate": 3.1369203123476964e-07, + "loss": 0.0016, + "step": 280020 + }, + { + "epoch": 1.7960254366920756, + "grad_norm": 0.056851208209991455, + "learning_rate": 3.134969329526105e-07, + "loss": 0.0005, + "step": 280030 + }, + { + "epoch": 1.7960895735858617, + "grad_norm": 0.03306282311677933, + "learning_rate": 3.133018933956833e-07, + "loss": 0.0004, + "step": 280040 + }, + { + "epoch": 1.796153710479648, + "grad_norm": 0.018016841262578964, + "learning_rate": 3.131069125664332e-07, + "loss": 0.0005, + "step": 280050 + }, + { + "epoch": 1.7962178473734338, + "grad_norm": 0.9242339134216309, + "learning_rate": 3.1291199046730393e-07, + "loss": 0.0069, + "step": 280060 + }, + { + "epoch": 1.79628198426722, + "grad_norm": 0.01819530874490738, + "learning_rate": 3.1271712710073675e-07, + "loss": 0.0009, + "step": 280070 + }, + { + "epoch": 1.796346121161006, + "grad_norm": 0.013147273100912571, + "learning_rate": 3.1252232246917314e-07, + "loss": 0.0006, + "step": 280080 + }, + { + "epoch": 1.796410258054792, + "grad_norm": 0.011244059540331364, + "learning_rate": 3.1232757657505605e-07, + "loss": 0.0008, + "step": 280090 + }, + { + "epoch": 1.7964743949485782, + "grad_norm": 0.036678630858659744, + "learning_rate": 3.1213288942082474e-07, + "loss": 0.0008, + "step": 280100 + }, + { + "epoch": 1.7965385318423643, + "grad_norm": 0.03329041227698326, + "learning_rate": 3.119382610089183e-07, + "loss": 0.0003, + "step": 280110 + }, + { + "epoch": 1.7966026687361505, + "grad_norm": 0.059137530624866486, + "learning_rate": 3.117436913417754e-07, + "loss": 0.0012, + "step": 280120 + }, + { + "epoch": 1.7966668056299366, + "grad_norm": 0.05240783840417862, + "learning_rate": 3.115491804218351e-07, + "loss": 0.0009, + "step": 280130 + }, + { + "epoch": 1.7967309425237228, + "grad_norm": 0.14184416830539703, + "learning_rate": 3.1135472825153447e-07, + "loss": 0.0016, + "step": 280140 + }, + { + "epoch": 1.7967950794175087, + "grad_norm": 0.05168509483337402, + "learning_rate": 3.111603348333098e-07, + "loss": 0.0007, + "step": 280150 + }, + { + "epoch": 1.7968592163112949, + "grad_norm": 0.044936347752809525, + "learning_rate": 3.109660001695974e-07, + "loss": 0.0017, + "step": 280160 + }, + { + "epoch": 1.7969233532050808, + "grad_norm": 0.04893936589360237, + "learning_rate": 3.107717242628322e-07, + "loss": 0.0006, + "step": 280170 + }, + { + "epoch": 1.796987490098867, + "grad_norm": 0.07789768278598785, + "learning_rate": 3.105775071154493e-07, + "loss": 0.0026, + "step": 280180 + }, + { + "epoch": 1.797051626992653, + "grad_norm": 0.03610196337103844, + "learning_rate": 3.1038334872988016e-07, + "loss": 0.0011, + "step": 280190 + }, + { + "epoch": 1.7971157638864392, + "grad_norm": 0.003820534097030759, + "learning_rate": 3.1018924910856064e-07, + "loss": 0.0005, + "step": 280200 + }, + { + "epoch": 1.7971799007802254, + "grad_norm": 0.032815784215927124, + "learning_rate": 3.099952082539209e-07, + "loss": 0.0007, + "step": 280210 + }, + { + "epoch": 1.7972440376740115, + "grad_norm": 0.053888414055109024, + "learning_rate": 3.0980122616839357e-07, + "loss": 0.0015, + "step": 280220 + }, + { + "epoch": 1.7973081745677975, + "grad_norm": 0.14324425160884857, + "learning_rate": 3.096073028544083e-07, + "loss": 0.0012, + "step": 280230 + }, + { + "epoch": 1.7973723114615836, + "grad_norm": 0.08394581079483032, + "learning_rate": 3.0941343831439597e-07, + "loss": 0.0009, + "step": 280240 + }, + { + "epoch": 1.7974364483553698, + "grad_norm": 0.04315227270126343, + "learning_rate": 3.0921963255078625e-07, + "loss": 0.0009, + "step": 280250 + }, + { + "epoch": 1.7975005852491557, + "grad_norm": 0.011538634076714516, + "learning_rate": 3.090258855660061e-07, + "loss": 0.0007, + "step": 280260 + }, + { + "epoch": 1.7975647221429418, + "grad_norm": 0.06566084176301956, + "learning_rate": 3.0883219736248417e-07, + "loss": 0.0005, + "step": 280270 + }, + { + "epoch": 1.797628859036728, + "grad_norm": 0.07556148618459702, + "learning_rate": 3.086385679426479e-07, + "loss": 0.0012, + "step": 280280 + }, + { + "epoch": 1.7976929959305141, + "grad_norm": 0.11681356281042099, + "learning_rate": 3.0844499730892376e-07, + "loss": 0.0008, + "step": 280290 + }, + { + "epoch": 1.7977571328243003, + "grad_norm": 0.051375046372413635, + "learning_rate": 3.082514854637353e-07, + "loss": 0.0009, + "step": 280300 + }, + { + "epoch": 1.7978212697180864, + "grad_norm": 0.08355208486318588, + "learning_rate": 3.0805803240951e-07, + "loss": 0.0009, + "step": 280310 + }, + { + "epoch": 1.7978854066118723, + "grad_norm": 0.16111016273498535, + "learning_rate": 3.0786463814867106e-07, + "loss": 0.0007, + "step": 280320 + }, + { + "epoch": 1.7979495435056585, + "grad_norm": 0.13881529867649078, + "learning_rate": 3.076713026836414e-07, + "loss": 0.0012, + "step": 280330 + }, + { + "epoch": 1.7980136803994444, + "grad_norm": 0.17221049964427948, + "learning_rate": 3.074780260168431e-07, + "loss": 0.001, + "step": 280340 + }, + { + "epoch": 1.7980778172932306, + "grad_norm": 0.10530173778533936, + "learning_rate": 3.072848081506996e-07, + "loss": 0.001, + "step": 280350 + }, + { + "epoch": 1.7981419541870167, + "grad_norm": 0.0914389118552208, + "learning_rate": 3.0709164908763025e-07, + "loss": 0.001, + "step": 280360 + }, + { + "epoch": 1.7982060910808029, + "grad_norm": 0.10517113655805588, + "learning_rate": 3.0689854883005746e-07, + "loss": 0.0007, + "step": 280370 + }, + { + "epoch": 1.798270227974589, + "grad_norm": 0.002558621345087886, + "learning_rate": 3.067055073804004e-07, + "loss": 0.0016, + "step": 280380 + }, + { + "epoch": 1.7983343648683752, + "grad_norm": 0.11635466665029526, + "learning_rate": 3.0651252474107607e-07, + "loss": 0.0012, + "step": 280390 + }, + { + "epoch": 1.7983985017621613, + "grad_norm": 0.09409769624471664, + "learning_rate": 3.063196009145053e-07, + "loss": 0.0007, + "step": 280400 + }, + { + "epoch": 1.7984626386559472, + "grad_norm": 0.05665315315127373, + "learning_rate": 3.061267359031045e-07, + "loss": 0.0015, + "step": 280410 + }, + { + "epoch": 1.7985267755497334, + "grad_norm": 0.025883320719003677, + "learning_rate": 3.0593392970929004e-07, + "loss": 0.0011, + "step": 280420 + }, + { + "epoch": 1.7985909124435193, + "grad_norm": 0.1190020889043808, + "learning_rate": 3.0574118233547834e-07, + "loss": 0.0011, + "step": 280430 + }, + { + "epoch": 1.7986550493373055, + "grad_norm": 0.06334960460662842, + "learning_rate": 3.055484937840847e-07, + "loss": 0.0007, + "step": 280440 + }, + { + "epoch": 1.7987191862310916, + "grad_norm": 0.014685377478599548, + "learning_rate": 3.053558640575238e-07, + "loss": 0.0009, + "step": 280450 + }, + { + "epoch": 1.7987833231248778, + "grad_norm": 0.04168510064482689, + "learning_rate": 3.051632931582077e-07, + "loss": 0.0007, + "step": 280460 + }, + { + "epoch": 1.798847460018664, + "grad_norm": 0.0745929628610611, + "learning_rate": 3.0497078108855214e-07, + "loss": 0.001, + "step": 280470 + }, + { + "epoch": 1.79891159691245, + "grad_norm": 0.06373525410890579, + "learning_rate": 3.047783278509681e-07, + "loss": 0.0013, + "step": 280480 + }, + { + "epoch": 1.798975733806236, + "grad_norm": 0.09083700180053711, + "learning_rate": 3.0458593344786737e-07, + "loss": 0.001, + "step": 280490 + }, + { + "epoch": 1.7990398707000221, + "grad_norm": 0.08681952208280563, + "learning_rate": 3.0439359788165977e-07, + "loss": 0.001, + "step": 280500 + }, + { + "epoch": 1.799104007593808, + "grad_norm": 0.07414544373750687, + "learning_rate": 3.042013211547573e-07, + "loss": 0.0009, + "step": 280510 + }, + { + "epoch": 1.7991681444875942, + "grad_norm": 0.005429443903267384, + "learning_rate": 3.0400910326956803e-07, + "loss": 0.0012, + "step": 280520 + }, + { + "epoch": 1.7992322813813804, + "grad_norm": 0.00619896687567234, + "learning_rate": 3.0381694422850107e-07, + "loss": 0.0008, + "step": 280530 + }, + { + "epoch": 1.7992964182751665, + "grad_norm": 0.04532570391893387, + "learning_rate": 3.0362484403396286e-07, + "loss": 0.0005, + "step": 280540 + }, + { + "epoch": 1.7993605551689527, + "grad_norm": 0.2156553417444229, + "learning_rate": 3.0343280268836315e-07, + "loss": 0.0014, + "step": 280550 + }, + { + "epoch": 1.7994246920627388, + "grad_norm": 0.02299816720187664, + "learning_rate": 3.032408201941067e-07, + "loss": 0.0007, + "step": 280560 + }, + { + "epoch": 1.799488828956525, + "grad_norm": 0.22444169223308563, + "learning_rate": 3.0304889655359884e-07, + "loss": 0.0017, + "step": 280570 + }, + { + "epoch": 1.7995529658503109, + "grad_norm": 0.18806947767734528, + "learning_rate": 3.0285703176924587e-07, + "loss": 0.0008, + "step": 280580 + }, + { + "epoch": 1.799617102744097, + "grad_norm": 0.11151966452598572, + "learning_rate": 3.0266522584345147e-07, + "loss": 0.0012, + "step": 280590 + }, + { + "epoch": 1.799681239637883, + "grad_norm": 0.0499407984316349, + "learning_rate": 3.024734787786188e-07, + "loss": 0.0017, + "step": 280600 + }, + { + "epoch": 1.799745376531669, + "grad_norm": 0.019490646198391914, + "learning_rate": 3.022817905771497e-07, + "loss": 0.0006, + "step": 280610 + }, + { + "epoch": 1.7998095134254553, + "grad_norm": 0.04655775800347328, + "learning_rate": 3.020901612414484e-07, + "loss": 0.0006, + "step": 280620 + }, + { + "epoch": 1.7998736503192414, + "grad_norm": 0.06663881987333298, + "learning_rate": 3.018985907739147e-07, + "loss": 0.0004, + "step": 280630 + }, + { + "epoch": 1.7999377872130276, + "grad_norm": 0.010833787731826305, + "learning_rate": 3.0170707917694885e-07, + "loss": 0.0011, + "step": 280640 + }, + { + "epoch": 1.8000019241068137, + "grad_norm": 0.09659522026777267, + "learning_rate": 3.0151562645295116e-07, + "loss": 0.0011, + "step": 280650 + }, + { + "epoch": 1.8000660610005996, + "grad_norm": 0.08453579992055893, + "learning_rate": 3.0132423260432084e-07, + "loss": 0.0007, + "step": 280660 + }, + { + "epoch": 1.8001301978943858, + "grad_norm": 0.03279484063386917, + "learning_rate": 3.011328976334565e-07, + "loss": 0.002, + "step": 280670 + }, + { + "epoch": 1.800194334788172, + "grad_norm": 0.30616578459739685, + "learning_rate": 3.009416215427546e-07, + "loss": 0.001, + "step": 280680 + }, + { + "epoch": 1.8002584716819579, + "grad_norm": 0.046144042164087296, + "learning_rate": 3.0075040433461213e-07, + "loss": 0.0014, + "step": 280690 + }, + { + "epoch": 1.800322608575744, + "grad_norm": 0.14769472181797028, + "learning_rate": 3.005592460114265e-07, + "loss": 0.0006, + "step": 280700 + }, + { + "epoch": 1.8003867454695301, + "grad_norm": 0.09131012111902237, + "learning_rate": 3.003681465755914e-07, + "loss": 0.0013, + "step": 280710 + }, + { + "epoch": 1.8004508823633163, + "grad_norm": 0.08563213795423508, + "learning_rate": 3.001771060295022e-07, + "loss": 0.0007, + "step": 280720 + }, + { + "epoch": 1.8005150192571024, + "grad_norm": 0.0615537166595459, + "learning_rate": 2.9998612437555365e-07, + "loss": 0.0005, + "step": 280730 + }, + { + "epoch": 1.8005791561508886, + "grad_norm": 0.008855455555021763, + "learning_rate": 2.997952016161376e-07, + "loss": 0.002, + "step": 280740 + }, + { + "epoch": 1.8006432930446745, + "grad_norm": 0.07653220742940903, + "learning_rate": 2.9960433775364674e-07, + "loss": 0.0006, + "step": 280750 + }, + { + "epoch": 1.8007074299384607, + "grad_norm": 0.0029082682449370623, + "learning_rate": 2.994135327904724e-07, + "loss": 0.0005, + "step": 280760 + }, + { + "epoch": 1.8007715668322466, + "grad_norm": 0.10379134863615036, + "learning_rate": 2.992227867290065e-07, + "loss": 0.0008, + "step": 280770 + }, + { + "epoch": 1.8008357037260327, + "grad_norm": 0.06596032530069351, + "learning_rate": 2.990320995716389e-07, + "loss": 0.002, + "step": 280780 + }, + { + "epoch": 1.800899840619819, + "grad_norm": 0.08790586143732071, + "learning_rate": 2.988414713207588e-07, + "loss": 0.0007, + "step": 280790 + }, + { + "epoch": 1.800963977513605, + "grad_norm": 0.04251585528254509, + "learning_rate": 2.986509019787537e-07, + "loss": 0.0008, + "step": 280800 + }, + { + "epoch": 1.8010281144073912, + "grad_norm": 0.06360335648059845, + "learning_rate": 2.984603915480139e-07, + "loss": 0.0014, + "step": 280810 + }, + { + "epoch": 1.8010922513011773, + "grad_norm": 0.03347192704677582, + "learning_rate": 2.982699400309258e-07, + "loss": 0.0007, + "step": 280820 + }, + { + "epoch": 1.8011563881949635, + "grad_norm": 0.10828061401844025, + "learning_rate": 2.9807954742987534e-07, + "loss": 0.0016, + "step": 280830 + }, + { + "epoch": 1.8012205250887494, + "grad_norm": 0.018905604258179665, + "learning_rate": 2.978892137472478e-07, + "loss": 0.0025, + "step": 280840 + }, + { + "epoch": 1.8012846619825356, + "grad_norm": 0.06821412593126297, + "learning_rate": 2.976989389854301e-07, + "loss": 0.0028, + "step": 280850 + }, + { + "epoch": 1.8013487988763215, + "grad_norm": 0.09935334324836731, + "learning_rate": 2.975087231468049e-07, + "loss": 0.001, + "step": 280860 + }, + { + "epoch": 1.8014129357701076, + "grad_norm": 0.0035753112751990557, + "learning_rate": 2.973185662337558e-07, + "loss": 0.0013, + "step": 280870 + }, + { + "epoch": 1.8014770726638938, + "grad_norm": 0.1754932999610901, + "learning_rate": 2.971284682486664e-07, + "loss": 0.0013, + "step": 280880 + }, + { + "epoch": 1.80154120955768, + "grad_norm": 0.0921214148402214, + "learning_rate": 2.969384291939187e-07, + "loss": 0.0011, + "step": 280890 + }, + { + "epoch": 1.801605346451466, + "grad_norm": 0.1432764232158661, + "learning_rate": 2.967484490718936e-07, + "loss": 0.0012, + "step": 280900 + }, + { + "epoch": 1.8016694833452522, + "grad_norm": 0.140858456492424, + "learning_rate": 2.9655852788497087e-07, + "loss": 0.0026, + "step": 280910 + }, + { + "epoch": 1.8017336202390382, + "grad_norm": 0.14326424896717072, + "learning_rate": 2.963686656355319e-07, + "loss": 0.0021, + "step": 280920 + }, + { + "epoch": 1.8017977571328243, + "grad_norm": 0.12035234272480011, + "learning_rate": 2.9617886232595596e-07, + "loss": 0.0019, + "step": 280930 + }, + { + "epoch": 1.8018618940266102, + "grad_norm": 0.05423832684755325, + "learning_rate": 2.9598911795862e-07, + "loss": 0.0006, + "step": 280940 + }, + { + "epoch": 1.8019260309203964, + "grad_norm": 0.07330697029829025, + "learning_rate": 2.957994325359015e-07, + "loss": 0.0021, + "step": 280950 + }, + { + "epoch": 1.8019901678141825, + "grad_norm": 0.07319719344377518, + "learning_rate": 2.9560980606017917e-07, + "loss": 0.0011, + "step": 280960 + }, + { + "epoch": 1.8020543047079687, + "grad_norm": 0.013270118273794651, + "learning_rate": 2.954202385338284e-07, + "loss": 0.0007, + "step": 280970 + }, + { + "epoch": 1.8021184416017548, + "grad_norm": 0.004556635860353708, + "learning_rate": 2.9523072995922387e-07, + "loss": 0.0007, + "step": 280980 + }, + { + "epoch": 1.802182578495541, + "grad_norm": 0.11629913747310638, + "learning_rate": 2.9504128033874035e-07, + "loss": 0.001, + "step": 280990 + }, + { + "epoch": 1.8022467153893271, + "grad_norm": 0.11676929891109467, + "learning_rate": 2.9485188967475263e-07, + "loss": 0.0014, + "step": 281000 + }, + { + "epoch": 1.802310852283113, + "grad_norm": 0.019509943202137947, + "learning_rate": 2.9466255796963327e-07, + "loss": 0.0008, + "step": 281010 + }, + { + "epoch": 1.8023749891768992, + "grad_norm": 0.21810665726661682, + "learning_rate": 2.944732852257548e-07, + "loss": 0.0008, + "step": 281020 + }, + { + "epoch": 1.8024391260706851, + "grad_norm": 0.009947647340595722, + "learning_rate": 2.942840714454892e-07, + "loss": 0.0005, + "step": 281030 + }, + { + "epoch": 1.8025032629644713, + "grad_norm": 0.17453940212726593, + "learning_rate": 2.9409491663120736e-07, + "loss": 0.0008, + "step": 281040 + }, + { + "epoch": 1.8025673998582574, + "grad_norm": 0.06499435007572174, + "learning_rate": 2.9390582078528016e-07, + "loss": 0.001, + "step": 281050 + }, + { + "epoch": 1.8026315367520436, + "grad_norm": 0.09117833524942398, + "learning_rate": 2.9371678391007516e-07, + "loss": 0.002, + "step": 281060 + }, + { + "epoch": 1.8026956736458297, + "grad_norm": 0.03328854963183403, + "learning_rate": 2.9352780600796274e-07, + "loss": 0.0009, + "step": 281070 + }, + { + "epoch": 1.8027598105396159, + "grad_norm": 0.18647193908691406, + "learning_rate": 2.933388870813114e-07, + "loss": 0.0006, + "step": 281080 + }, + { + "epoch": 1.802823947433402, + "grad_norm": 0.12472377717494965, + "learning_rate": 2.9315002713248717e-07, + "loss": 0.0007, + "step": 281090 + }, + { + "epoch": 1.802888084327188, + "grad_norm": 0.07018400728702545, + "learning_rate": 2.9296122616385646e-07, + "loss": 0.0011, + "step": 281100 + }, + { + "epoch": 1.802952221220974, + "grad_norm": 0.0946158692240715, + "learning_rate": 2.9277248417778624e-07, + "loss": 0.0009, + "step": 281110 + }, + { + "epoch": 1.80301635811476, + "grad_norm": 0.030763905495405197, + "learning_rate": 2.925838011766402e-07, + "loss": 0.0015, + "step": 281120 + }, + { + "epoch": 1.8030804950085462, + "grad_norm": 0.10628003627061844, + "learning_rate": 2.923951771627842e-07, + "loss": 0.0006, + "step": 281130 + }, + { + "epoch": 1.8031446319023323, + "grad_norm": 0.003375616390258074, + "learning_rate": 2.9220661213858024e-07, + "loss": 0.0019, + "step": 281140 + }, + { + "epoch": 1.8032087687961185, + "grad_norm": 0.08748704940080643, + "learning_rate": 2.9201810610639316e-07, + "loss": 0.0024, + "step": 281150 + }, + { + "epoch": 1.8032729056899046, + "grad_norm": 0.050831131637096405, + "learning_rate": 2.9182965906858375e-07, + "loss": 0.0008, + "step": 281160 + }, + { + "epoch": 1.8033370425836908, + "grad_norm": 0.15017278492450714, + "learning_rate": 2.9164127102751347e-07, + "loss": 0.0006, + "step": 281170 + }, + { + "epoch": 1.8034011794774767, + "grad_norm": 0.04059740900993347, + "learning_rate": 2.9145294198554275e-07, + "loss": 0.0005, + "step": 281180 + }, + { + "epoch": 1.8034653163712628, + "grad_norm": 0.07971083372831345, + "learning_rate": 2.9126467194503236e-07, + "loss": 0.0016, + "step": 281190 + }, + { + "epoch": 1.8035294532650488, + "grad_norm": 0.09287826716899872, + "learning_rate": 2.910764609083405e-07, + "loss": 0.0009, + "step": 281200 + }, + { + "epoch": 1.803593590158835, + "grad_norm": 0.08002916723489761, + "learning_rate": 2.908883088778264e-07, + "loss": 0.0012, + "step": 281210 + }, + { + "epoch": 1.803657727052621, + "grad_norm": 0.13122466206550598, + "learning_rate": 2.90700215855847e-07, + "loss": 0.0015, + "step": 281220 + }, + { + "epoch": 1.8037218639464072, + "grad_norm": 0.001795127522200346, + "learning_rate": 2.905121818447598e-07, + "loss": 0.0004, + "step": 281230 + }, + { + "epoch": 1.8037860008401934, + "grad_norm": 0.08344212174415588, + "learning_rate": 2.9032420684692085e-07, + "loss": 0.0007, + "step": 281240 + }, + { + "epoch": 1.8038501377339795, + "grad_norm": 0.04882136732339859, + "learning_rate": 2.901362908646854e-07, + "loss": 0.0009, + "step": 281250 + }, + { + "epoch": 1.8039142746277657, + "grad_norm": 0.047372933477163315, + "learning_rate": 2.8994843390040773e-07, + "loss": 0.0012, + "step": 281260 + }, + { + "epoch": 1.8039784115215516, + "grad_norm": 0.16857458651065826, + "learning_rate": 2.8976063595644313e-07, + "loss": 0.0012, + "step": 281270 + }, + { + "epoch": 1.8040425484153377, + "grad_norm": 0.057195521891117096, + "learning_rate": 2.8957289703514413e-07, + "loss": 0.0009, + "step": 281280 + }, + { + "epoch": 1.8041066853091237, + "grad_norm": 0.07683075964450836, + "learning_rate": 2.893852171388628e-07, + "loss": 0.0009, + "step": 281290 + }, + { + "epoch": 1.8041708222029098, + "grad_norm": 0.0845569521188736, + "learning_rate": 2.891975962699517e-07, + "loss": 0.0017, + "step": 281300 + }, + { + "epoch": 1.804234959096696, + "grad_norm": 0.12468987703323364, + "learning_rate": 2.8901003443076113e-07, + "loss": 0.0013, + "step": 281310 + }, + { + "epoch": 1.804299095990482, + "grad_norm": 0.0669076219201088, + "learning_rate": 2.888225316236426e-07, + "loss": 0.0004, + "step": 281320 + }, + { + "epoch": 1.8043632328842683, + "grad_norm": 0.010885614901781082, + "learning_rate": 2.8863508785094305e-07, + "loss": 0.0018, + "step": 281330 + }, + { + "epoch": 1.8044273697780544, + "grad_norm": 0.09682101011276245, + "learning_rate": 2.884477031150146e-07, + "loss": 0.0013, + "step": 281340 + }, + { + "epoch": 1.8044915066718403, + "grad_norm": 0.016672270372509956, + "learning_rate": 2.8826037741820357e-07, + "loss": 0.0014, + "step": 281350 + }, + { + "epoch": 1.8045556435656265, + "grad_norm": 0.08441387116909027, + "learning_rate": 2.8807311076285705e-07, + "loss": 0.0012, + "step": 281360 + }, + { + "epoch": 1.8046197804594124, + "grad_norm": 0.002428964478895068, + "learning_rate": 2.8788590315132146e-07, + "loss": 0.0009, + "step": 281370 + }, + { + "epoch": 1.8046839173531986, + "grad_norm": 0.021949807181954384, + "learning_rate": 2.876987545859439e-07, + "loss": 0.0013, + "step": 281380 + }, + { + "epoch": 1.8047480542469847, + "grad_norm": 0.016486287117004395, + "learning_rate": 2.8751166506906904e-07, + "loss": 0.0008, + "step": 281390 + }, + { + "epoch": 1.8048121911407708, + "grad_norm": 0.08122608065605164, + "learning_rate": 2.873246346030406e-07, + "loss": 0.0008, + "step": 281400 + }, + { + "epoch": 1.804876328034557, + "grad_norm": 0.01998315192759037, + "learning_rate": 2.871376631902023e-07, + "loss": 0.0008, + "step": 281410 + }, + { + "epoch": 1.8049404649283431, + "grad_norm": 0.05530071258544922, + "learning_rate": 2.8695075083289727e-07, + "loss": 0.0008, + "step": 281420 + }, + { + "epoch": 1.8050046018221293, + "grad_norm": 0.00521240197122097, + "learning_rate": 2.8676389753346856e-07, + "loss": 0.0023, + "step": 281430 + }, + { + "epoch": 1.8050687387159152, + "grad_norm": 0.004941079765558243, + "learning_rate": 2.865771032942555e-07, + "loss": 0.001, + "step": 281440 + }, + { + "epoch": 1.8051328756097014, + "grad_norm": 0.02477288246154785, + "learning_rate": 2.863903681176006e-07, + "loss": 0.0011, + "step": 281450 + }, + { + "epoch": 1.8051970125034873, + "grad_norm": 0.454287052154541, + "learning_rate": 2.8620369200584363e-07, + "loss": 0.0029, + "step": 281460 + }, + { + "epoch": 1.8052611493972734, + "grad_norm": 0.19727076590061188, + "learning_rate": 2.8601707496132283e-07, + "loss": 0.0009, + "step": 281470 + }, + { + "epoch": 1.8053252862910596, + "grad_norm": 0.03952730819582939, + "learning_rate": 2.858305169863768e-07, + "loss": 0.0009, + "step": 281480 + }, + { + "epoch": 1.8053894231848457, + "grad_norm": 0.09670371562242508, + "learning_rate": 2.856440180833442e-07, + "loss": 0.0009, + "step": 281490 + }, + { + "epoch": 1.805453560078632, + "grad_norm": 0.045216407626867294, + "learning_rate": 2.85457578254561e-07, + "loss": 0.0022, + "step": 281500 + }, + { + "epoch": 1.805517696972418, + "grad_norm": 0.1280045360326767, + "learning_rate": 2.852711975023642e-07, + "loss": 0.0012, + "step": 281510 + }, + { + "epoch": 1.8055818338662042, + "grad_norm": 0.07866466045379639, + "learning_rate": 2.8508487582908804e-07, + "loss": 0.0006, + "step": 281520 + }, + { + "epoch": 1.8056459707599901, + "grad_norm": 0.25505441427230835, + "learning_rate": 2.8489861323706844e-07, + "loss": 0.0013, + "step": 281530 + }, + { + "epoch": 1.8057101076537763, + "grad_norm": 0.057884521782398224, + "learning_rate": 2.847124097286397e-07, + "loss": 0.0015, + "step": 281540 + }, + { + "epoch": 1.8057742445475622, + "grad_norm": 0.12495142966508865, + "learning_rate": 2.8452626530613314e-07, + "loss": 0.0008, + "step": 281550 + }, + { + "epoch": 1.8058383814413483, + "grad_norm": 0.022214218974113464, + "learning_rate": 2.843401799718837e-07, + "loss": 0.0006, + "step": 281560 + }, + { + "epoch": 1.8059025183351345, + "grad_norm": 0.028516335412859917, + "learning_rate": 2.8415415372822164e-07, + "loss": 0.0013, + "step": 281570 + }, + { + "epoch": 1.8059666552289206, + "grad_norm": 0.08100759983062744, + "learning_rate": 2.839681865774785e-07, + "loss": 0.0035, + "step": 281580 + }, + { + "epoch": 1.8060307921227068, + "grad_norm": 0.20059432089328766, + "learning_rate": 2.83782278521984e-07, + "loss": 0.0027, + "step": 281590 + }, + { + "epoch": 1.806094929016493, + "grad_norm": 0.020009048283100128, + "learning_rate": 2.835964295640686e-07, + "loss": 0.0013, + "step": 281600 + }, + { + "epoch": 1.8061590659102789, + "grad_norm": 0.06283935159444809, + "learning_rate": 2.83410639706061e-07, + "loss": 0.0008, + "step": 281610 + }, + { + "epoch": 1.806223202804065, + "grad_norm": 0.041120126843452454, + "learning_rate": 2.832249089502886e-07, + "loss": 0.0017, + "step": 281620 + }, + { + "epoch": 1.806287339697851, + "grad_norm": 0.10960491001605988, + "learning_rate": 2.8303923729907866e-07, + "loss": 0.0007, + "step": 281630 + }, + { + "epoch": 1.806351476591637, + "grad_norm": 0.15289542078971863, + "learning_rate": 2.828536247547592e-07, + "loss": 0.001, + "step": 281640 + }, + { + "epoch": 1.8064156134854232, + "grad_norm": 0.013551203534007072, + "learning_rate": 2.826680713196545e-07, + "loss": 0.001, + "step": 281650 + }, + { + "epoch": 1.8064797503792094, + "grad_norm": 0.09905155748128891, + "learning_rate": 2.8248257699609095e-07, + "loss": 0.0007, + "step": 281660 + }, + { + "epoch": 1.8065438872729955, + "grad_norm": 0.038905225694179535, + "learning_rate": 2.8229714178639125e-07, + "loss": 0.0004, + "step": 281670 + }, + { + "epoch": 1.8066080241667817, + "grad_norm": 0.10164858400821686, + "learning_rate": 2.8211176569288065e-07, + "loss": 0.0017, + "step": 281680 + }, + { + "epoch": 1.8066721610605678, + "grad_norm": 0.00880381092429161, + "learning_rate": 2.8192644871788135e-07, + "loss": 0.0011, + "step": 281690 + }, + { + "epoch": 1.8067362979543538, + "grad_norm": 0.06276466697454453, + "learning_rate": 2.817411908637152e-07, + "loss": 0.0018, + "step": 281700 + }, + { + "epoch": 1.80680043484814, + "grad_norm": 0.089014932513237, + "learning_rate": 2.815559921327049e-07, + "loss": 0.0013, + "step": 281710 + }, + { + "epoch": 1.8068645717419258, + "grad_norm": 0.7244876623153687, + "learning_rate": 2.8137085252716965e-07, + "loss": 0.001, + "step": 281720 + }, + { + "epoch": 1.806928708635712, + "grad_norm": 0.08272527158260345, + "learning_rate": 2.811857720494304e-07, + "loss": 0.0021, + "step": 281730 + }, + { + "epoch": 1.8069928455294981, + "grad_norm": 0.09128829091787338, + "learning_rate": 2.8100075070180535e-07, + "loss": 0.0011, + "step": 281740 + }, + { + "epoch": 1.8070569824232843, + "grad_norm": 0.1517772674560547, + "learning_rate": 2.808157884866136e-07, + "loss": 0.0012, + "step": 281750 + }, + { + "epoch": 1.8071211193170704, + "grad_norm": 0.14261551201343536, + "learning_rate": 2.8063088540617345e-07, + "loss": 0.0011, + "step": 281760 + }, + { + "epoch": 1.8071852562108566, + "grad_norm": 0.05205344036221504, + "learning_rate": 2.8044604146280074e-07, + "loss": 0.0005, + "step": 281770 + }, + { + "epoch": 1.8072493931046425, + "grad_norm": 0.058461472392082214, + "learning_rate": 2.802612566588114e-07, + "loss": 0.0017, + "step": 281780 + }, + { + "epoch": 1.8073135299984286, + "grad_norm": 0.05705380439758301, + "learning_rate": 2.800765309965225e-07, + "loss": 0.0011, + "step": 281790 + }, + { + "epoch": 1.8073776668922148, + "grad_norm": 0.0523599311709404, + "learning_rate": 2.7989186447824834e-07, + "loss": 0.001, + "step": 281800 + }, + { + "epoch": 1.8074418037860007, + "grad_norm": 0.07191793620586395, + "learning_rate": 2.7970725710630196e-07, + "loss": 0.0008, + "step": 281810 + }, + { + "epoch": 1.8075059406797869, + "grad_norm": 0.08435988426208496, + "learning_rate": 2.7952270888299657e-07, + "loss": 0.0016, + "step": 281820 + }, + { + "epoch": 1.807570077573573, + "grad_norm": 0.006404428742825985, + "learning_rate": 2.793382198106465e-07, + "loss": 0.0007, + "step": 281830 + }, + { + "epoch": 1.8076342144673592, + "grad_norm": 0.0974842756986618, + "learning_rate": 2.7915378989156193e-07, + "loss": 0.0008, + "step": 281840 + }, + { + "epoch": 1.8076983513611453, + "grad_norm": 0.05271318182349205, + "learning_rate": 2.789694191280534e-07, + "loss": 0.0012, + "step": 281850 + }, + { + "epoch": 1.8077624882549315, + "grad_norm": 0.04372788220643997, + "learning_rate": 2.7878510752243294e-07, + "loss": 0.0006, + "step": 281860 + }, + { + "epoch": 1.8078266251487174, + "grad_norm": 0.06479683518409729, + "learning_rate": 2.7860085507700975e-07, + "loss": 0.0011, + "step": 281870 + }, + { + "epoch": 1.8078907620425035, + "grad_norm": 0.10288426280021667, + "learning_rate": 2.784166617940914e-07, + "loss": 0.0019, + "step": 281880 + }, + { + "epoch": 1.8079548989362895, + "grad_norm": 0.09815745800733566, + "learning_rate": 2.7823252767598675e-07, + "loss": 0.0008, + "step": 281890 + }, + { + "epoch": 1.8080190358300756, + "grad_norm": 0.02030204050242901, + "learning_rate": 2.780484527250027e-07, + "loss": 0.0006, + "step": 281900 + }, + { + "epoch": 1.8080831727238618, + "grad_norm": 0.018891580402851105, + "learning_rate": 2.7786443694344745e-07, + "loss": 0.0023, + "step": 281910 + }, + { + "epoch": 1.808147309617648, + "grad_norm": 0.005142646841704845, + "learning_rate": 2.776804803336253e-07, + "loss": 0.0007, + "step": 281920 + }, + { + "epoch": 1.808211446511434, + "grad_norm": 0.10481272637844086, + "learning_rate": 2.7749658289784156e-07, + "loss": 0.0011, + "step": 281930 + }, + { + "epoch": 1.8082755834052202, + "grad_norm": 0.011906717903912067, + "learning_rate": 2.773127446384005e-07, + "loss": 0.0014, + "step": 281940 + }, + { + "epoch": 1.8083397202990064, + "grad_norm": 0.11263014376163483, + "learning_rate": 2.771289655576065e-07, + "loss": 0.0009, + "step": 281950 + }, + { + "epoch": 1.8084038571927923, + "grad_norm": 0.03821062296628952, + "learning_rate": 2.76945245657762e-07, + "loss": 0.0007, + "step": 281960 + }, + { + "epoch": 1.8084679940865784, + "grad_norm": 0.008966549299657345, + "learning_rate": 2.7676158494116814e-07, + "loss": 0.0023, + "step": 281970 + }, + { + "epoch": 1.8085321309803644, + "grad_norm": 0.00295799458399415, + "learning_rate": 2.765779834101279e-07, + "loss": 0.0011, + "step": 281980 + }, + { + "epoch": 1.8085962678741505, + "grad_norm": 0.06502897292375565, + "learning_rate": 2.7639444106694167e-07, + "loss": 0.0012, + "step": 281990 + }, + { + "epoch": 1.8086604047679367, + "grad_norm": 0.01179441250860691, + "learning_rate": 2.7621095791390883e-07, + "loss": 0.0007, + "step": 282000 + }, + { + "epoch": 1.8087245416617228, + "grad_norm": 0.07027900218963623, + "learning_rate": 2.7602753395332803e-07, + "loss": 0.0008, + "step": 282010 + }, + { + "epoch": 1.808788678555509, + "grad_norm": 0.10831480473279953, + "learning_rate": 2.758441691874991e-07, + "loss": 0.0012, + "step": 282020 + }, + { + "epoch": 1.808852815449295, + "grad_norm": 0.07933515310287476, + "learning_rate": 2.756608636187191e-07, + "loss": 0.0011, + "step": 282030 + }, + { + "epoch": 1.808916952343081, + "grad_norm": 0.05271373316645622, + "learning_rate": 2.7547761724928515e-07, + "loss": 0.0005, + "step": 282040 + }, + { + "epoch": 1.8089810892368672, + "grad_norm": 0.06447888910770416, + "learning_rate": 2.7529443008149193e-07, + "loss": 0.0009, + "step": 282050 + }, + { + "epoch": 1.809045226130653, + "grad_norm": 0.04930812120437622, + "learning_rate": 2.7511130211763773e-07, + "loss": 0.0008, + "step": 282060 + }, + { + "epoch": 1.8091093630244393, + "grad_norm": 0.05755256116390228, + "learning_rate": 2.749282333600151e-07, + "loss": 0.0015, + "step": 282070 + }, + { + "epoch": 1.8091734999182254, + "grad_norm": 0.11437110602855682, + "learning_rate": 2.7474522381091883e-07, + "loss": 0.0014, + "step": 282080 + }, + { + "epoch": 1.8092376368120116, + "grad_norm": 0.029845241457223892, + "learning_rate": 2.745622734726411e-07, + "loss": 0.0026, + "step": 282090 + }, + { + "epoch": 1.8093017737057977, + "grad_norm": 0.14690016210079193, + "learning_rate": 2.743793823474761e-07, + "loss": 0.0005, + "step": 282100 + }, + { + "epoch": 1.8093659105995838, + "grad_norm": 0.06361275166273117, + "learning_rate": 2.7419655043771474e-07, + "loss": 0.0018, + "step": 282110 + }, + { + "epoch": 1.80943004749337, + "grad_norm": 0.04907489940524101, + "learning_rate": 2.7401377774564754e-07, + "loss": 0.0009, + "step": 282120 + }, + { + "epoch": 1.809494184387156, + "grad_norm": 0.08087644726037979, + "learning_rate": 2.7383106427356645e-07, + "loss": 0.0007, + "step": 282130 + }, + { + "epoch": 1.809558321280942, + "grad_norm": 0.07917283475399017, + "learning_rate": 2.736484100237591e-07, + "loss": 0.0011, + "step": 282140 + }, + { + "epoch": 1.809622458174728, + "grad_norm": 0.05790490657091141, + "learning_rate": 2.734658149985153e-07, + "loss": 0.001, + "step": 282150 + }, + { + "epoch": 1.8096865950685141, + "grad_norm": 0.22517386078834534, + "learning_rate": 2.7328327920012223e-07, + "loss": 0.0011, + "step": 282160 + }, + { + "epoch": 1.8097507319623003, + "grad_norm": 0.06193617731332779, + "learning_rate": 2.7310080263086847e-07, + "loss": 0.0013, + "step": 282170 + }, + { + "epoch": 1.8098148688560864, + "grad_norm": 0.02145802229642868, + "learning_rate": 2.7291838529304005e-07, + "loss": 0.0012, + "step": 282180 + }, + { + "epoch": 1.8098790057498726, + "grad_norm": 0.057392776012420654, + "learning_rate": 2.727360271889223e-07, + "loss": 0.001, + "step": 282190 + }, + { + "epoch": 1.8099431426436587, + "grad_norm": 0.0896657407283783, + "learning_rate": 2.725537283208002e-07, + "loss": 0.0012, + "step": 282200 + }, + { + "epoch": 1.8100072795374447, + "grad_norm": 0.0054199169389903545, + "learning_rate": 2.7237148869095955e-07, + "loss": 0.0006, + "step": 282210 + }, + { + "epoch": 1.8100714164312308, + "grad_norm": 0.10844659805297852, + "learning_rate": 2.7218930830168247e-07, + "loss": 0.0005, + "step": 282220 + }, + { + "epoch": 1.810135553325017, + "grad_norm": 0.10597152262926102, + "learning_rate": 2.720071871552521e-07, + "loss": 0.001, + "step": 282230 + }, + { + "epoch": 1.810199690218803, + "grad_norm": 0.04104155674576759, + "learning_rate": 2.7182512525395055e-07, + "loss": 0.0006, + "step": 282240 + }, + { + "epoch": 1.810263827112589, + "grad_norm": 0.07009421288967133, + "learning_rate": 2.7164312260005986e-07, + "loss": 0.0011, + "step": 282250 + }, + { + "epoch": 1.8103279640063752, + "grad_norm": 0.1096557155251503, + "learning_rate": 2.714611791958599e-07, + "loss": 0.001, + "step": 282260 + }, + { + "epoch": 1.8103921009001613, + "grad_norm": 0.19809821248054504, + "learning_rate": 2.7127929504363047e-07, + "loss": 0.001, + "step": 282270 + }, + { + "epoch": 1.8104562377939475, + "grad_norm": 0.052505962550640106, + "learning_rate": 2.7109747014565146e-07, + "loss": 0.0012, + "step": 282280 + }, + { + "epoch": 1.8105203746877336, + "grad_norm": 0.029269928112626076, + "learning_rate": 2.70915704504201e-07, + "loss": 0.0009, + "step": 282290 + }, + { + "epoch": 1.8105845115815196, + "grad_norm": 0.12355450540781021, + "learning_rate": 2.707339981215568e-07, + "loss": 0.0037, + "step": 282300 + }, + { + "epoch": 1.8106486484753057, + "grad_norm": 0.06523124873638153, + "learning_rate": 2.705523509999941e-07, + "loss": 0.0007, + "step": 282310 + }, + { + "epoch": 1.8107127853690916, + "grad_norm": 0.25369080901145935, + "learning_rate": 2.703707631417918e-07, + "loss": 0.0019, + "step": 282320 + }, + { + "epoch": 1.8107769222628778, + "grad_norm": 0.02460874617099762, + "learning_rate": 2.701892345492241e-07, + "loss": 0.0007, + "step": 282330 + }, + { + "epoch": 1.810841059156664, + "grad_norm": 0.06876687705516815, + "learning_rate": 2.7000776522456586e-07, + "loss": 0.0012, + "step": 282340 + }, + { + "epoch": 1.81090519605045, + "grad_norm": 0.047911740839481354, + "learning_rate": 2.698263551700897e-07, + "loss": 0.0008, + "step": 282350 + }, + { + "epoch": 1.8109693329442362, + "grad_norm": 0.05371854081749916, + "learning_rate": 2.696450043880705e-07, + "loss": 0.0008, + "step": 282360 + }, + { + "epoch": 1.8110334698380224, + "grad_norm": 0.010016710497438908, + "learning_rate": 2.694637128807803e-07, + "loss": 0.001, + "step": 282370 + }, + { + "epoch": 1.8110976067318085, + "grad_norm": 0.0178392231464386, + "learning_rate": 2.692824806504907e-07, + "loss": 0.0009, + "step": 282380 + }, + { + "epoch": 1.8111617436255945, + "grad_norm": 0.044535715132951736, + "learning_rate": 2.6910130769947194e-07, + "loss": 0.0005, + "step": 282390 + }, + { + "epoch": 1.8112258805193806, + "grad_norm": 0.12505608797073364, + "learning_rate": 2.6892019402999515e-07, + "loss": 0.001, + "step": 282400 + }, + { + "epoch": 1.8112900174131665, + "grad_norm": 0.011626729741692543, + "learning_rate": 2.687391396443301e-07, + "loss": 0.0011, + "step": 282410 + }, + { + "epoch": 1.8113541543069527, + "grad_norm": 0.07806049287319183, + "learning_rate": 2.6855814454474437e-07, + "loss": 0.0012, + "step": 282420 + }, + { + "epoch": 1.8114182912007388, + "grad_norm": 0.16373804211616516, + "learning_rate": 2.683772087335068e-07, + "loss": 0.001, + "step": 282430 + }, + { + "epoch": 1.811482428094525, + "grad_norm": 0.11224564164876938, + "learning_rate": 2.68196332212885e-07, + "loss": 0.0011, + "step": 282440 + }, + { + "epoch": 1.8115465649883111, + "grad_norm": 0.017139988020062447, + "learning_rate": 2.6801551498514434e-07, + "loss": 0.0012, + "step": 282450 + }, + { + "epoch": 1.8116107018820973, + "grad_norm": 0.05777726694941521, + "learning_rate": 2.678347570525508e-07, + "loss": 0.0011, + "step": 282460 + }, + { + "epoch": 1.8116748387758832, + "grad_norm": 0.07671855390071869, + "learning_rate": 2.676540584173709e-07, + "loss": 0.001, + "step": 282470 + }, + { + "epoch": 1.8117389756696693, + "grad_norm": 0.0009651641012169421, + "learning_rate": 2.674734190818673e-07, + "loss": 0.0007, + "step": 282480 + }, + { + "epoch": 1.8118031125634553, + "grad_norm": 0.13907742500305176, + "learning_rate": 2.6729283904830426e-07, + "loss": 0.0007, + "step": 282490 + }, + { + "epoch": 1.8118672494572414, + "grad_norm": 0.07996530830860138, + "learning_rate": 2.671123183189434e-07, + "loss": 0.001, + "step": 282500 + }, + { + "epoch": 1.8119313863510276, + "grad_norm": 0.062143053859472275, + "learning_rate": 2.669318568960483e-07, + "loss": 0.0006, + "step": 282510 + }, + { + "epoch": 1.8119955232448137, + "grad_norm": 0.23566403985023499, + "learning_rate": 2.6675145478188003e-07, + "loss": 0.0014, + "step": 282520 + }, + { + "epoch": 1.8120596601385999, + "grad_norm": 0.04087146371603012, + "learning_rate": 2.66571111978699e-07, + "loss": 0.0008, + "step": 282530 + }, + { + "epoch": 1.812123797032386, + "grad_norm": 0.0075711836107075214, + "learning_rate": 2.66390828488764e-07, + "loss": 0.0014, + "step": 282540 + }, + { + "epoch": 1.8121879339261722, + "grad_norm": 0.03502900153398514, + "learning_rate": 2.662106043143353e-07, + "loss": 0.0018, + "step": 282550 + }, + { + "epoch": 1.812252070819958, + "grad_norm": 0.035383906215429306, + "learning_rate": 2.660304394576713e-07, + "loss": 0.001, + "step": 282560 + }, + { + "epoch": 1.8123162077137442, + "grad_norm": 0.42719870805740356, + "learning_rate": 2.658503339210289e-07, + "loss": 0.0017, + "step": 282570 + }, + { + "epoch": 1.8123803446075302, + "grad_norm": 0.13851973414421082, + "learning_rate": 2.6567028770666525e-07, + "loss": 0.0012, + "step": 282580 + }, + { + "epoch": 1.8124444815013163, + "grad_norm": 0.02888554334640503, + "learning_rate": 2.654903008168369e-07, + "loss": 0.0004, + "step": 282590 + }, + { + "epoch": 1.8125086183951025, + "grad_norm": 0.05228433385491371, + "learning_rate": 2.653103732537987e-07, + "loss": 0.0012, + "step": 282600 + }, + { + "epoch": 1.8125727552888886, + "grad_norm": 0.06343115121126175, + "learning_rate": 2.651305050198044e-07, + "loss": 0.0011, + "step": 282610 + }, + { + "epoch": 1.8126368921826748, + "grad_norm": 0.016875579953193665, + "learning_rate": 2.6495069611710944e-07, + "loss": 0.0016, + "step": 282620 + }, + { + "epoch": 1.812701029076461, + "grad_norm": 0.07538674771785736, + "learning_rate": 2.64770946547967e-07, + "loss": 0.0013, + "step": 282630 + }, + { + "epoch": 1.812765165970247, + "grad_norm": 0.06355781108140945, + "learning_rate": 2.64591256314628e-07, + "loss": 0.0006, + "step": 282640 + }, + { + "epoch": 1.812829302864033, + "grad_norm": 0.06597629189491272, + "learning_rate": 2.644116254193446e-07, + "loss": 0.0009, + "step": 282650 + }, + { + "epoch": 1.8128934397578191, + "grad_norm": 0.09554356336593628, + "learning_rate": 2.6423205386436834e-07, + "loss": 0.0012, + "step": 282660 + }, + { + "epoch": 1.812957576651605, + "grad_norm": 0.20009282231330872, + "learning_rate": 2.640525416519496e-07, + "loss": 0.0015, + "step": 282670 + }, + { + "epoch": 1.8130217135453912, + "grad_norm": 0.06588278710842133, + "learning_rate": 2.6387308878433713e-07, + "loss": 0.0009, + "step": 282680 + }, + { + "epoch": 1.8130858504391774, + "grad_norm": 0.06026269868016243, + "learning_rate": 2.636936952637786e-07, + "loss": 0.0012, + "step": 282690 + }, + { + "epoch": 1.8131499873329635, + "grad_norm": 0.24051862955093384, + "learning_rate": 2.635143610925245e-07, + "loss": 0.0009, + "step": 282700 + }, + { + "epoch": 1.8132141242267497, + "grad_norm": 0.1734119951725006, + "learning_rate": 2.6333508627282014e-07, + "loss": 0.002, + "step": 282710 + }, + { + "epoch": 1.8132782611205358, + "grad_norm": 0.023682259023189545, + "learning_rate": 2.6315587080691205e-07, + "loss": 0.0009, + "step": 282720 + }, + { + "epoch": 1.8133423980143217, + "grad_norm": 0.18128125369548798, + "learning_rate": 2.629767146970463e-07, + "loss": 0.0019, + "step": 282730 + }, + { + "epoch": 1.8134065349081079, + "grad_norm": 0.04932695999741554, + "learning_rate": 2.627976179454683e-07, + "loss": 0.0017, + "step": 282740 + }, + { + "epoch": 1.8134706718018938, + "grad_norm": 0.030380118638277054, + "learning_rate": 2.626185805544218e-07, + "loss": 0.0007, + "step": 282750 + }, + { + "epoch": 1.81353480869568, + "grad_norm": 0.13956759870052338, + "learning_rate": 2.6243960252614997e-07, + "loss": 0.0006, + "step": 282760 + }, + { + "epoch": 1.813598945589466, + "grad_norm": 0.13544772565364838, + "learning_rate": 2.622606838628949e-07, + "loss": 0.0012, + "step": 282770 + }, + { + "epoch": 1.8136630824832523, + "grad_norm": 0.06107968091964722, + "learning_rate": 2.6208182456690044e-07, + "loss": 0.0005, + "step": 282780 + }, + { + "epoch": 1.8137272193770384, + "grad_norm": 0.13277402520179749, + "learning_rate": 2.6190302464040693e-07, + "loss": 0.0015, + "step": 282790 + }, + { + "epoch": 1.8137913562708246, + "grad_norm": 0.024062488228082657, + "learning_rate": 2.6172428408565485e-07, + "loss": 0.001, + "step": 282800 + }, + { + "epoch": 1.8138554931646107, + "grad_norm": 0.06757381558418274, + "learning_rate": 2.615456029048835e-07, + "loss": 0.0009, + "step": 282810 + }, + { + "epoch": 1.8139196300583966, + "grad_norm": 0.1526866853237152, + "learning_rate": 2.613669811003322e-07, + "loss": 0.0009, + "step": 282820 + }, + { + "epoch": 1.8139837669521828, + "grad_norm": 0.06795057654380798, + "learning_rate": 2.611884186742397e-07, + "loss": 0.0013, + "step": 282830 + }, + { + "epoch": 1.8140479038459687, + "grad_norm": 0.0031683510169386864, + "learning_rate": 2.610099156288426e-07, + "loss": 0.0013, + "step": 282840 + }, + { + "epoch": 1.8141120407397548, + "grad_norm": 0.04895066097378731, + "learning_rate": 2.608314719663785e-07, + "loss": 0.0008, + "step": 282850 + }, + { + "epoch": 1.814176177633541, + "grad_norm": 0.006946314591914415, + "learning_rate": 2.6065308768908335e-07, + "loss": 0.0014, + "step": 282860 + }, + { + "epoch": 1.8142403145273271, + "grad_norm": 0.08963599801063538, + "learning_rate": 2.604747627991922e-07, + "loss": 0.0008, + "step": 282870 + }, + { + "epoch": 1.8143044514211133, + "grad_norm": 0.15497802197933197, + "learning_rate": 2.602964972989386e-07, + "loss": 0.0021, + "step": 282880 + }, + { + "epoch": 1.8143685883148994, + "grad_norm": 0.0698586031794548, + "learning_rate": 2.601182911905581e-07, + "loss": 0.0007, + "step": 282890 + }, + { + "epoch": 1.8144327252086854, + "grad_norm": 0.047951746731996536, + "learning_rate": 2.599401444762828e-07, + "loss": 0.0009, + "step": 282900 + }, + { + "epoch": 1.8144968621024715, + "grad_norm": 0.02806960605084896, + "learning_rate": 2.5976205715834533e-07, + "loss": 0.0006, + "step": 282910 + }, + { + "epoch": 1.8145609989962574, + "grad_norm": 0.06804698705673218, + "learning_rate": 2.5958402923897616e-07, + "loss": 0.001, + "step": 282920 + }, + { + "epoch": 1.8146251358900436, + "grad_norm": 0.14505638182163239, + "learning_rate": 2.5940606072040795e-07, + "loss": 0.0014, + "step": 282930 + }, + { + "epoch": 1.8146892727838297, + "grad_norm": 0.031782738864421844, + "learning_rate": 2.5922815160487003e-07, + "loss": 0.0006, + "step": 282940 + }, + { + "epoch": 1.814753409677616, + "grad_norm": 0.11933920532464981, + "learning_rate": 2.590503018945911e-07, + "loss": 0.0019, + "step": 282950 + }, + { + "epoch": 1.814817546571402, + "grad_norm": 0.05999316647648811, + "learning_rate": 2.5887251159180005e-07, + "loss": 0.0006, + "step": 282960 + }, + { + "epoch": 1.8148816834651882, + "grad_norm": 0.06679647415876389, + "learning_rate": 2.58694780698725e-07, + "loss": 0.0007, + "step": 282970 + }, + { + "epoch": 1.8149458203589743, + "grad_norm": 0.10151786357164383, + "learning_rate": 2.585171092175931e-07, + "loss": 0.0017, + "step": 282980 + }, + { + "epoch": 1.8150099572527603, + "grad_norm": 0.04049347713589668, + "learning_rate": 2.5833949715063035e-07, + "loss": 0.0011, + "step": 282990 + }, + { + "epoch": 1.8150740941465464, + "grad_norm": 0.06263840943574905, + "learning_rate": 2.581619445000627e-07, + "loss": 0.0004, + "step": 283000 + }, + { + "epoch": 1.8151382310403323, + "grad_norm": 0.04469954967498779, + "learning_rate": 2.5798445126811455e-07, + "loss": 0.0003, + "step": 283010 + }, + { + "epoch": 1.8152023679341185, + "grad_norm": 0.11815565824508667, + "learning_rate": 2.5780701745701073e-07, + "loss": 0.0014, + "step": 283020 + }, + { + "epoch": 1.8152665048279046, + "grad_norm": 0.030553000047802925, + "learning_rate": 2.576296430689734e-07, + "loss": 0.0013, + "step": 283030 + }, + { + "epoch": 1.8153306417216908, + "grad_norm": 0.04269164428114891, + "learning_rate": 2.5745232810622634e-07, + "loss": 0.0015, + "step": 283040 + }, + { + "epoch": 1.815394778615477, + "grad_norm": 0.04332788288593292, + "learning_rate": 2.572750725709916e-07, + "loss": 0.0009, + "step": 283050 + }, + { + "epoch": 1.815458915509263, + "grad_norm": 0.4782222807407379, + "learning_rate": 2.5709787646548967e-07, + "loss": 0.0019, + "step": 283060 + }, + { + "epoch": 1.8155230524030492, + "grad_norm": 0.09612571448087692, + "learning_rate": 2.569207397919399e-07, + "loss": 0.0007, + "step": 283070 + }, + { + "epoch": 1.8155871892968352, + "grad_norm": 0.11113991588354111, + "learning_rate": 2.5674366255256433e-07, + "loss": 0.0012, + "step": 283080 + }, + { + "epoch": 1.8156513261906213, + "grad_norm": 0.10252439230680466, + "learning_rate": 2.5656664474958014e-07, + "loss": 0.001, + "step": 283090 + }, + { + "epoch": 1.8157154630844072, + "grad_norm": 0.041576530784368515, + "learning_rate": 2.5638968638520613e-07, + "loss": 0.001, + "step": 283100 + }, + { + "epoch": 1.8157795999781934, + "grad_norm": 0.05730690062046051, + "learning_rate": 2.562127874616588e-07, + "loss": 0.0033, + "step": 283110 + }, + { + "epoch": 1.8158437368719795, + "grad_norm": 0.06864220649003983, + "learning_rate": 2.560359479811564e-07, + "loss": 0.001, + "step": 283120 + }, + { + "epoch": 1.8159078737657657, + "grad_norm": 0.06728434562683105, + "learning_rate": 2.5585916794591437e-07, + "loss": 0.0014, + "step": 283130 + }, + { + "epoch": 1.8159720106595518, + "grad_norm": 0.11732706427574158, + "learning_rate": 2.556824473581465e-07, + "loss": 0.0011, + "step": 283140 + }, + { + "epoch": 1.816036147553338, + "grad_norm": 0.029023462906479836, + "learning_rate": 2.5550578622006885e-07, + "loss": 0.0014, + "step": 283150 + }, + { + "epoch": 1.816100284447124, + "grad_norm": 0.027747154235839844, + "learning_rate": 2.5532918453389456e-07, + "loss": 0.0008, + "step": 283160 + }, + { + "epoch": 1.81616442134091, + "grad_norm": 0.0065784878097474575, + "learning_rate": 2.551526423018369e-07, + "loss": 0.0008, + "step": 283170 + }, + { + "epoch": 1.816228558234696, + "grad_norm": 0.06812255829572678, + "learning_rate": 2.549761595261069e-07, + "loss": 0.0009, + "step": 283180 + }, + { + "epoch": 1.8162926951284821, + "grad_norm": 0.15295302867889404, + "learning_rate": 2.5479973620891716e-07, + "loss": 0.001, + "step": 283190 + }, + { + "epoch": 1.8163568320222683, + "grad_norm": 0.06800895184278488, + "learning_rate": 2.5462337235247824e-07, + "loss": 0.0013, + "step": 283200 + }, + { + "epoch": 1.8164209689160544, + "grad_norm": 0.1769361048936844, + "learning_rate": 2.5444706795899996e-07, + "loss": 0.0016, + "step": 283210 + }, + { + "epoch": 1.8164851058098406, + "grad_norm": 0.02690737694501877, + "learning_rate": 2.5427082303069116e-07, + "loss": 0.0016, + "step": 283220 + }, + { + "epoch": 1.8165492427036267, + "grad_norm": 0.023523934185504913, + "learning_rate": 2.5409463756976117e-07, + "loss": 0.0019, + "step": 283230 + }, + { + "epoch": 1.8166133795974129, + "grad_norm": 0.09415130317211151, + "learning_rate": 2.539185115784171e-07, + "loss": 0.0006, + "step": 283240 + }, + { + "epoch": 1.8166775164911988, + "grad_norm": 0.01110894326120615, + "learning_rate": 2.537424450588655e-07, + "loss": 0.0005, + "step": 283250 + }, + { + "epoch": 1.816741653384985, + "grad_norm": 0.023204559460282326, + "learning_rate": 2.5356643801331346e-07, + "loss": 0.0011, + "step": 283260 + }, + { + "epoch": 1.8168057902787709, + "grad_norm": 0.017421049997210503, + "learning_rate": 2.533904904439666e-07, + "loss": 0.0007, + "step": 283270 + }, + { + "epoch": 1.816869927172557, + "grad_norm": 0.03126649558544159, + "learning_rate": 2.532146023530291e-07, + "loss": 0.0012, + "step": 283280 + }, + { + "epoch": 1.8169340640663432, + "grad_norm": 0.06004180759191513, + "learning_rate": 2.5303877374270426e-07, + "loss": 0.0009, + "step": 283290 + }, + { + "epoch": 1.8169982009601293, + "grad_norm": 0.015872960910201073, + "learning_rate": 2.5286300461519754e-07, + "loss": 0.0013, + "step": 283300 + }, + { + "epoch": 1.8170623378539155, + "grad_norm": 0.014313939958810806, + "learning_rate": 2.526872949727094e-07, + "loss": 0.0006, + "step": 283310 + }, + { + "epoch": 1.8171264747477016, + "grad_norm": 0.06666509062051773, + "learning_rate": 2.525116448174425e-07, + "loss": 0.0008, + "step": 283320 + }, + { + "epoch": 1.8171906116414875, + "grad_norm": 0.20529133081436157, + "learning_rate": 2.523360541515968e-07, + "loss": 0.001, + "step": 283330 + }, + { + "epoch": 1.8172547485352737, + "grad_norm": 0.11298438906669617, + "learning_rate": 2.5216052297737437e-07, + "loss": 0.001, + "step": 283340 + }, + { + "epoch": 1.8173188854290598, + "grad_norm": 0.08906236290931702, + "learning_rate": 2.5198505129697406e-07, + "loss": 0.0007, + "step": 283350 + }, + { + "epoch": 1.8173830223228458, + "grad_norm": 0.024035204201936722, + "learning_rate": 2.51809639112594e-07, + "loss": 0.0007, + "step": 283360 + }, + { + "epoch": 1.817447159216632, + "grad_norm": 0.12804993987083435, + "learning_rate": 2.516342864264321e-07, + "loss": 0.001, + "step": 283370 + }, + { + "epoch": 1.817511296110418, + "grad_norm": 0.03600526973605156, + "learning_rate": 2.514589932406869e-07, + "loss": 0.0011, + "step": 283380 + }, + { + "epoch": 1.8175754330042042, + "grad_norm": 0.017293879762291908, + "learning_rate": 2.51283759557554e-07, + "loss": 0.0004, + "step": 283390 + }, + { + "epoch": 1.8176395698979904, + "grad_norm": 0.043514467775821686, + "learning_rate": 2.5110858537922944e-07, + "loss": 0.0008, + "step": 283400 + }, + { + "epoch": 1.8177037067917765, + "grad_norm": 0.12087168544530869, + "learning_rate": 2.509334707079081e-07, + "loss": 0.001, + "step": 283410 + }, + { + "epoch": 1.8177678436855624, + "grad_norm": 0.15204310417175293, + "learning_rate": 2.507584155457843e-07, + "loss": 0.0009, + "step": 283420 + }, + { + "epoch": 1.8178319805793486, + "grad_norm": 0.1251821219921112, + "learning_rate": 2.5058341989505244e-07, + "loss": 0.0007, + "step": 283430 + }, + { + "epoch": 1.8178961174731345, + "grad_norm": 0.05553895980119705, + "learning_rate": 2.5040848375790463e-07, + "loss": 0.0016, + "step": 283440 + }, + { + "epoch": 1.8179602543669207, + "grad_norm": 0.094098299741745, + "learning_rate": 2.5023360713653244e-07, + "loss": 0.0009, + "step": 283450 + }, + { + "epoch": 1.8180243912607068, + "grad_norm": 0.050325796008110046, + "learning_rate": 2.5005879003312805e-07, + "loss": 0.0009, + "step": 283460 + }, + { + "epoch": 1.818088528154493, + "grad_norm": 0.03272897005081177, + "learning_rate": 2.4988403244988246e-07, + "loss": 0.0009, + "step": 283470 + }, + { + "epoch": 1.818152665048279, + "grad_norm": 0.03459775820374489, + "learning_rate": 2.4970933438898394e-07, + "loss": 0.0007, + "step": 283480 + }, + { + "epoch": 1.8182168019420653, + "grad_norm": 0.010766434483230114, + "learning_rate": 2.495346958526224e-07, + "loss": 0.0013, + "step": 283490 + }, + { + "epoch": 1.8182809388358514, + "grad_norm": 0.12688294053077698, + "learning_rate": 2.4936011684298657e-07, + "loss": 0.0011, + "step": 283500 + }, + { + "epoch": 1.8183450757296373, + "grad_norm": 0.19279158115386963, + "learning_rate": 2.4918559736226364e-07, + "loss": 0.0025, + "step": 283510 + }, + { + "epoch": 1.8184092126234235, + "grad_norm": 0.07030703127384186, + "learning_rate": 2.490111374126403e-07, + "loss": 0.0006, + "step": 283520 + }, + { + "epoch": 1.8184733495172094, + "grad_norm": 0.10207617282867432, + "learning_rate": 2.488367369963035e-07, + "loss": 0.0006, + "step": 283530 + }, + { + "epoch": 1.8185374864109956, + "grad_norm": 0.045885562896728516, + "learning_rate": 2.4866239611543774e-07, + "loss": 0.0011, + "step": 283540 + }, + { + "epoch": 1.8186016233047817, + "grad_norm": 0.1675550937652588, + "learning_rate": 2.484881147722279e-07, + "loss": 0.0013, + "step": 283550 + }, + { + "epoch": 1.8186657601985678, + "grad_norm": 0.08820303529500961, + "learning_rate": 2.483138929688572e-07, + "loss": 0.0007, + "step": 283560 + }, + { + "epoch": 1.818729897092354, + "grad_norm": 0.07986678183078766, + "learning_rate": 2.481397307075101e-07, + "loss": 0.0009, + "step": 283570 + }, + { + "epoch": 1.8187940339861401, + "grad_norm": 0.026571124792099, + "learning_rate": 2.4796562799036806e-07, + "loss": 0.0007, + "step": 283580 + }, + { + "epoch": 1.818858170879926, + "grad_norm": 0.01867252215743065, + "learning_rate": 2.4779158481961327e-07, + "loss": 0.0014, + "step": 283590 + }, + { + "epoch": 1.8189223077737122, + "grad_norm": 0.02360674925148487, + "learning_rate": 2.476176011974252e-07, + "loss": 0.0034, + "step": 283600 + }, + { + "epoch": 1.8189864446674981, + "grad_norm": 0.10880941897630692, + "learning_rate": 2.474436771259864e-07, + "loss": 0.001, + "step": 283610 + }, + { + "epoch": 1.8190505815612843, + "grad_norm": 0.05003716051578522, + "learning_rate": 2.4726981260747405e-07, + "loss": 0.0018, + "step": 283620 + }, + { + "epoch": 1.8191147184550704, + "grad_norm": 0.06162728741765022, + "learning_rate": 2.4709600764406813e-07, + "loss": 0.0015, + "step": 283630 + }, + { + "epoch": 1.8191788553488566, + "grad_norm": 0.13946183025836945, + "learning_rate": 2.469222622379458e-07, + "loss": 0.001, + "step": 283640 + }, + { + "epoch": 1.8192429922426427, + "grad_norm": 0.004668702837079763, + "learning_rate": 2.467485763912847e-07, + "loss": 0.0013, + "step": 283650 + }, + { + "epoch": 1.819307129136429, + "grad_norm": 0.015313344076275826, + "learning_rate": 2.465749501062609e-07, + "loss": 0.0009, + "step": 283660 + }, + { + "epoch": 1.819371266030215, + "grad_norm": 0.12870186567306519, + "learning_rate": 2.464013833850498e-07, + "loss": 0.0019, + "step": 283670 + }, + { + "epoch": 1.819435402924001, + "grad_norm": 0.06950012594461441, + "learning_rate": 2.462278762298276e-07, + "loss": 0.0019, + "step": 283680 + }, + { + "epoch": 1.8194995398177871, + "grad_norm": 0.08987858146429062, + "learning_rate": 2.4605442864276685e-07, + "loss": 0.0005, + "step": 283690 + }, + { + "epoch": 1.819563676711573, + "grad_norm": 0.003726414404809475, + "learning_rate": 2.4588104062604257e-07, + "loss": 0.0008, + "step": 283700 + }, + { + "epoch": 1.8196278136053592, + "grad_norm": 0.09197190403938293, + "learning_rate": 2.457077121818252e-07, + "loss": 0.0009, + "step": 283710 + }, + { + "epoch": 1.8196919504991453, + "grad_norm": 0.04489089176058769, + "learning_rate": 2.4553444331228916e-07, + "loss": 0.001, + "step": 283720 + }, + { + "epoch": 1.8197560873929315, + "grad_norm": 0.09359274804592133, + "learning_rate": 2.453612340196043e-07, + "loss": 0.0014, + "step": 283730 + }, + { + "epoch": 1.8198202242867176, + "grad_norm": 0.6765559315681458, + "learning_rate": 2.4518808430594123e-07, + "loss": 0.0067, + "step": 283740 + }, + { + "epoch": 1.8198843611805038, + "grad_norm": 0.0031113028526306152, + "learning_rate": 2.450149941734692e-07, + "loss": 0.0005, + "step": 283750 + }, + { + "epoch": 1.8199484980742897, + "grad_norm": 0.010244605131447315, + "learning_rate": 2.448419636243582e-07, + "loss": 0.0008, + "step": 283760 + }, + { + "epoch": 1.8200126349680759, + "grad_norm": 0.013047495856881142, + "learning_rate": 2.4466899266077594e-07, + "loss": 0.0009, + "step": 283770 + }, + { + "epoch": 1.820076771861862, + "grad_norm": 0.12160877883434296, + "learning_rate": 2.4449608128488955e-07, + "loss": 0.0015, + "step": 283780 + }, + { + "epoch": 1.820140908755648, + "grad_norm": 0.07258080691099167, + "learning_rate": 2.443232294988657e-07, + "loss": 0.0026, + "step": 283790 + }, + { + "epoch": 1.820205045649434, + "grad_norm": 0.13584206998348236, + "learning_rate": 2.4415043730487084e-07, + "loss": 0.0012, + "step": 283800 + }, + { + "epoch": 1.8202691825432202, + "grad_norm": 0.03979848325252533, + "learning_rate": 2.439777047050701e-07, + "loss": 0.0014, + "step": 283810 + }, + { + "epoch": 1.8203333194370064, + "grad_norm": 0.0566716194152832, + "learning_rate": 2.438050317016272e-07, + "loss": 0.0026, + "step": 283820 + }, + { + "epoch": 1.8203974563307925, + "grad_norm": 0.09523949772119522, + "learning_rate": 2.43632418296707e-07, + "loss": 0.0007, + "step": 283830 + }, + { + "epoch": 1.8204615932245787, + "grad_norm": 0.21772432327270508, + "learning_rate": 2.434598644924724e-07, + "loss": 0.0012, + "step": 283840 + }, + { + "epoch": 1.8205257301183646, + "grad_norm": 0.029549170285463333, + "learning_rate": 2.432873702910843e-07, + "loss": 0.0018, + "step": 283850 + }, + { + "epoch": 1.8205898670121508, + "grad_norm": 0.018051231279969215, + "learning_rate": 2.431149356947049e-07, + "loss": 0.0025, + "step": 283860 + }, + { + "epoch": 1.8206540039059367, + "grad_norm": 0.004038951825350523, + "learning_rate": 2.429425607054958e-07, + "loss": 0.0009, + "step": 283870 + }, + { + "epoch": 1.8207181407997228, + "grad_norm": 0.11834798008203506, + "learning_rate": 2.4277024532561586e-07, + "loss": 0.0008, + "step": 283880 + }, + { + "epoch": 1.820782277693509, + "grad_norm": 0.07488865405321121, + "learning_rate": 2.4259798955722504e-07, + "loss": 0.0006, + "step": 283890 + }, + { + "epoch": 1.8208464145872951, + "grad_norm": 0.0020906440913677216, + "learning_rate": 2.4242579340248096e-07, + "loss": 0.0006, + "step": 283900 + }, + { + "epoch": 1.8209105514810813, + "grad_norm": 0.03752012923359871, + "learning_rate": 2.4225365686354195e-07, + "loss": 0.0004, + "step": 283910 + }, + { + "epoch": 1.8209746883748674, + "grad_norm": 0.31805944442749023, + "learning_rate": 2.420815799425652e-07, + "loss": 0.0007, + "step": 283920 + }, + { + "epoch": 1.8210388252686536, + "grad_norm": 0.05737582966685295, + "learning_rate": 2.4190956264170673e-07, + "loss": 0.0003, + "step": 283930 + }, + { + "epoch": 1.8211029621624395, + "grad_norm": 0.03898722305893898, + "learning_rate": 2.417376049631215e-07, + "loss": 0.0005, + "step": 283940 + }, + { + "epoch": 1.8211670990562256, + "grad_norm": 0.11303561925888062, + "learning_rate": 2.41565706908965e-07, + "loss": 0.0021, + "step": 283950 + }, + { + "epoch": 1.8212312359500116, + "grad_norm": 0.07213585823774338, + "learning_rate": 2.4139386848139156e-07, + "loss": 0.0008, + "step": 283960 + }, + { + "epoch": 1.8212953728437977, + "grad_norm": 0.0378580316901207, + "learning_rate": 2.412220896825529e-07, + "loss": 0.001, + "step": 283970 + }, + { + "epoch": 1.8213595097375839, + "grad_norm": 0.027136176824569702, + "learning_rate": 2.4105037051460277e-07, + "loss": 0.0007, + "step": 283980 + }, + { + "epoch": 1.82142364663137, + "grad_norm": 0.1134774312376976, + "learning_rate": 2.408787109796934e-07, + "loss": 0.0009, + "step": 283990 + }, + { + "epoch": 1.8214877835251562, + "grad_norm": 0.06808760017156601, + "learning_rate": 2.4070711107997467e-07, + "loss": 0.0013, + "step": 284000 + }, + { + "epoch": 1.8215519204189423, + "grad_norm": 0.03774027153849602, + "learning_rate": 2.4053557081759657e-07, + "loss": 0.0008, + "step": 284010 + }, + { + "epoch": 1.8216160573127282, + "grad_norm": 0.04879768192768097, + "learning_rate": 2.403640901947102e-07, + "loss": 0.0015, + "step": 284020 + }, + { + "epoch": 1.8216801942065144, + "grad_norm": 0.0474613681435585, + "learning_rate": 2.401926692134632e-07, + "loss": 0.0008, + "step": 284030 + }, + { + "epoch": 1.8217443311003003, + "grad_norm": 0.14003092050552368, + "learning_rate": 2.400213078760039e-07, + "loss": 0.0015, + "step": 284040 + }, + { + "epoch": 1.8218084679940865, + "grad_norm": 0.10043643414974213, + "learning_rate": 2.3985000618447884e-07, + "loss": 0.0014, + "step": 284050 + }, + { + "epoch": 1.8218726048878726, + "grad_norm": 0.08790823072195053, + "learning_rate": 2.3967876414103586e-07, + "loss": 0.0006, + "step": 284060 + }, + { + "epoch": 1.8219367417816588, + "grad_norm": 0.05833596736192703, + "learning_rate": 2.3950758174781984e-07, + "loss": 0.0011, + "step": 284070 + }, + { + "epoch": 1.822000878675445, + "grad_norm": 0.05277148261666298, + "learning_rate": 2.393364590069769e-07, + "loss": 0.0017, + "step": 284080 + }, + { + "epoch": 1.822065015569231, + "grad_norm": 0.03449109569191933, + "learning_rate": 2.391653959206491e-07, + "loss": 0.003, + "step": 284090 + }, + { + "epoch": 1.8221291524630172, + "grad_norm": 0.12976112961769104, + "learning_rate": 2.3899439249098264e-07, + "loss": 0.0011, + "step": 284100 + }, + { + "epoch": 1.8221932893568031, + "grad_norm": 0.04298318549990654, + "learning_rate": 2.3882344872011854e-07, + "loss": 0.0012, + "step": 284110 + }, + { + "epoch": 1.8222574262505893, + "grad_norm": 0.0845508873462677, + "learning_rate": 2.386525646101989e-07, + "loss": 0.003, + "step": 284120 + }, + { + "epoch": 1.8223215631443752, + "grad_norm": 0.01747436821460724, + "learning_rate": 2.3848174016336657e-07, + "loss": 0.002, + "step": 284130 + }, + { + "epoch": 1.8223857000381614, + "grad_norm": 0.017732495442032814, + "learning_rate": 2.3831097538176086e-07, + "loss": 0.0008, + "step": 284140 + }, + { + "epoch": 1.8224498369319475, + "grad_norm": 0.04959338530898094, + "learning_rate": 2.3814027026752118e-07, + "loss": 0.0008, + "step": 284150 + }, + { + "epoch": 1.8225139738257337, + "grad_norm": 0.027588363736867905, + "learning_rate": 2.3796962482278697e-07, + "loss": 0.001, + "step": 284160 + }, + { + "epoch": 1.8225781107195198, + "grad_norm": 0.09255266934633255, + "learning_rate": 2.377990390496976e-07, + "loss": 0.0013, + "step": 284170 + }, + { + "epoch": 1.822642247613306, + "grad_norm": 0.028061656281352043, + "learning_rate": 2.376285129503897e-07, + "loss": 0.0018, + "step": 284180 + }, + { + "epoch": 1.822706384507092, + "grad_norm": 0.007896821945905685, + "learning_rate": 2.3745804652699933e-07, + "loss": 0.0011, + "step": 284190 + }, + { + "epoch": 1.822770521400878, + "grad_norm": 0.05308055505156517, + "learning_rate": 2.372876397816637e-07, + "loss": 0.0008, + "step": 284200 + }, + { + "epoch": 1.8228346582946642, + "grad_norm": 0.05304637551307678, + "learning_rate": 2.3711729271651774e-07, + "loss": 0.0018, + "step": 284210 + }, + { + "epoch": 1.82289879518845, + "grad_norm": 0.0017153015360236168, + "learning_rate": 2.3694700533369641e-07, + "loss": 0.0007, + "step": 284220 + }, + { + "epoch": 1.8229629320822363, + "grad_norm": 0.015460219234228134, + "learning_rate": 2.36776777635333e-07, + "loss": 0.0013, + "step": 284230 + }, + { + "epoch": 1.8230270689760224, + "grad_norm": 0.049073509871959686, + "learning_rate": 2.3660660962356086e-07, + "loss": 0.0018, + "step": 284240 + }, + { + "epoch": 1.8230912058698086, + "grad_norm": 0.11652687191963196, + "learning_rate": 2.364365013005121e-07, + "loss": 0.0016, + "step": 284250 + }, + { + "epoch": 1.8231553427635947, + "grad_norm": 0.008816922083497047, + "learning_rate": 2.3626645266831893e-07, + "loss": 0.0019, + "step": 284260 + }, + { + "epoch": 1.8232194796573808, + "grad_norm": 0.03230687603354454, + "learning_rate": 2.3609646372911133e-07, + "loss": 0.0008, + "step": 284270 + }, + { + "epoch": 1.8232836165511668, + "grad_norm": 0.07474133372306824, + "learning_rate": 2.3592653448501924e-07, + "loss": 0.001, + "step": 284280 + }, + { + "epoch": 1.823347753444953, + "grad_norm": 0.0740194246172905, + "learning_rate": 2.3575666493817373e-07, + "loss": 0.0008, + "step": 284290 + }, + { + "epoch": 1.8234118903387388, + "grad_norm": 0.07866983115673065, + "learning_rate": 2.3558685509070146e-07, + "loss": 0.0018, + "step": 284300 + }, + { + "epoch": 1.823476027232525, + "grad_norm": 0.004387131426483393, + "learning_rate": 2.3541710494473124e-07, + "loss": 0.0011, + "step": 284310 + }, + { + "epoch": 1.8235401641263111, + "grad_norm": 0.03439735248684883, + "learning_rate": 2.352474145023892e-07, + "loss": 0.0007, + "step": 284320 + }, + { + "epoch": 1.8236043010200973, + "grad_norm": 0.11432670801877975, + "learning_rate": 2.3507778376580303e-07, + "loss": 0.0006, + "step": 284330 + }, + { + "epoch": 1.8236684379138834, + "grad_norm": 0.05256522446870804, + "learning_rate": 2.3490821273709774e-07, + "loss": 0.0015, + "step": 284340 + }, + { + "epoch": 1.8237325748076696, + "grad_norm": 0.11117374151945114, + "learning_rate": 2.3473870141839772e-07, + "loss": 0.001, + "step": 284350 + }, + { + "epoch": 1.8237967117014557, + "grad_norm": 0.06200719624757767, + "learning_rate": 2.345692498118274e-07, + "loss": 0.001, + "step": 284360 + }, + { + "epoch": 1.8238608485952417, + "grad_norm": 0.044623877853155136, + "learning_rate": 2.3439985791951058e-07, + "loss": 0.0008, + "step": 284370 + }, + { + "epoch": 1.8239249854890278, + "grad_norm": 0.018259888514876366, + "learning_rate": 2.3423052574356898e-07, + "loss": 0.0006, + "step": 284380 + }, + { + "epoch": 1.8239891223828137, + "grad_norm": 0.0028362676966935396, + "learning_rate": 2.3406125328612472e-07, + "loss": 0.0004, + "step": 284390 + }, + { + "epoch": 1.8240532592766, + "grad_norm": 0.014020822010934353, + "learning_rate": 2.3389204054930005e-07, + "loss": 0.0011, + "step": 284400 + }, + { + "epoch": 1.824117396170386, + "grad_norm": 0.021346304565668106, + "learning_rate": 2.3372288753521433e-07, + "loss": 0.0011, + "step": 284410 + }, + { + "epoch": 1.8241815330641722, + "grad_norm": 0.12803484499454498, + "learning_rate": 2.3355379424598702e-07, + "loss": 0.0016, + "step": 284420 + }, + { + "epoch": 1.8242456699579583, + "grad_norm": 0.07047094404697418, + "learning_rate": 2.333847606837364e-07, + "loss": 0.005, + "step": 284430 + }, + { + "epoch": 1.8243098068517445, + "grad_norm": 0.21268074214458466, + "learning_rate": 2.332157868505819e-07, + "loss": 0.0006, + "step": 284440 + }, + { + "epoch": 1.8243739437455304, + "grad_norm": 0.06028466299176216, + "learning_rate": 2.330468727486407e-07, + "loss": 0.002, + "step": 284450 + }, + { + "epoch": 1.8244380806393166, + "grad_norm": 0.020619206130504608, + "learning_rate": 2.328780183800289e-07, + "loss": 0.0008, + "step": 284460 + }, + { + "epoch": 1.8245022175331025, + "grad_norm": 0.004687672946602106, + "learning_rate": 2.32709223746862e-07, + "loss": 0.0024, + "step": 284470 + }, + { + "epoch": 1.8245663544268886, + "grad_norm": 0.007976637221872807, + "learning_rate": 2.325404888512567e-07, + "loss": 0.0005, + "step": 284480 + }, + { + "epoch": 1.8246304913206748, + "grad_norm": 0.07855124026536942, + "learning_rate": 2.323718136953257e-07, + "loss": 0.0008, + "step": 284490 + }, + { + "epoch": 1.824694628214461, + "grad_norm": 0.1139468252658844, + "learning_rate": 2.3220319828118342e-07, + "loss": 0.0008, + "step": 284500 + }, + { + "epoch": 1.824758765108247, + "grad_norm": 0.0543724000453949, + "learning_rate": 2.320346426109421e-07, + "loss": 0.0006, + "step": 284510 + }, + { + "epoch": 1.8248229020020332, + "grad_norm": 0.024786775931715965, + "learning_rate": 2.3186614668671504e-07, + "loss": 0.0007, + "step": 284520 + }, + { + "epoch": 1.8248870388958194, + "grad_norm": 0.07218330353498459, + "learning_rate": 2.3169771051061274e-07, + "loss": 0.0008, + "step": 284530 + }, + { + "epoch": 1.8249511757896053, + "grad_norm": 0.023913349956274033, + "learning_rate": 2.315293340847452e-07, + "loss": 0.0013, + "step": 284540 + }, + { + "epoch": 1.8250153126833915, + "grad_norm": 0.07372229546308517, + "learning_rate": 2.3136101741122352e-07, + "loss": 0.0013, + "step": 284550 + }, + { + "epoch": 1.8250794495771774, + "grad_norm": 0.04746563732624054, + "learning_rate": 2.311927604921571e-07, + "loss": 0.0011, + "step": 284560 + }, + { + "epoch": 1.8251435864709635, + "grad_norm": 0.08703367412090302, + "learning_rate": 2.3102456332965317e-07, + "loss": 0.001, + "step": 284570 + }, + { + "epoch": 1.8252077233647497, + "grad_norm": 0.10042295604944229, + "learning_rate": 2.3085642592581892e-07, + "loss": 0.0009, + "step": 284580 + }, + { + "epoch": 1.8252718602585358, + "grad_norm": 0.059683751314878464, + "learning_rate": 2.306883482827632e-07, + "loss": 0.0013, + "step": 284590 + }, + { + "epoch": 1.825335997152322, + "grad_norm": 0.21237105131149292, + "learning_rate": 2.3052033040259048e-07, + "loss": 0.0009, + "step": 284600 + }, + { + "epoch": 1.8254001340461081, + "grad_norm": 0.047385189682245255, + "learning_rate": 2.3035237228740735e-07, + "loss": 0.0006, + "step": 284610 + }, + { + "epoch": 1.8254642709398943, + "grad_norm": 0.02855606935918331, + "learning_rate": 2.3018447393931664e-07, + "loss": 0.0013, + "step": 284620 + }, + { + "epoch": 1.8255284078336802, + "grad_norm": 0.011425605043768883, + "learning_rate": 2.300166353604244e-07, + "loss": 0.0012, + "step": 284630 + }, + { + "epoch": 1.8255925447274663, + "grad_norm": 0.04062425717711449, + "learning_rate": 2.2984885655283285e-07, + "loss": 0.0014, + "step": 284640 + }, + { + "epoch": 1.8256566816212523, + "grad_norm": 0.17988109588623047, + "learning_rate": 2.2968113751864417e-07, + "loss": 0.0007, + "step": 284650 + }, + { + "epoch": 1.8257208185150384, + "grad_norm": 0.013708336278796196, + "learning_rate": 2.295134782599595e-07, + "loss": 0.0016, + "step": 284660 + }, + { + "epoch": 1.8257849554088246, + "grad_norm": 0.03549422323703766, + "learning_rate": 2.2934587877888105e-07, + "loss": 0.0014, + "step": 284670 + }, + { + "epoch": 1.8258490923026107, + "grad_norm": 0.02807396836578846, + "learning_rate": 2.291783390775082e-07, + "loss": 0.0015, + "step": 284680 + }, + { + "epoch": 1.8259132291963969, + "grad_norm": 0.041874222457408905, + "learning_rate": 2.2901085915793986e-07, + "loss": 0.0019, + "step": 284690 + }, + { + "epoch": 1.825977366090183, + "grad_norm": 0.1899644285440445, + "learning_rate": 2.2884343902227547e-07, + "loss": 0.0018, + "step": 284700 + }, + { + "epoch": 1.826041502983969, + "grad_norm": 0.07992995530366898, + "learning_rate": 2.2867607867261332e-07, + "loss": 0.0013, + "step": 284710 + }, + { + "epoch": 1.826105639877755, + "grad_norm": 0.04594377428293228, + "learning_rate": 2.2850877811104955e-07, + "loss": 0.0019, + "step": 284720 + }, + { + "epoch": 1.826169776771541, + "grad_norm": 0.09122800827026367, + "learning_rate": 2.2834153733968023e-07, + "loss": 0.0011, + "step": 284730 + }, + { + "epoch": 1.8262339136653272, + "grad_norm": 0.07416558265686035, + "learning_rate": 2.2817435636060258e-07, + "loss": 0.0014, + "step": 284740 + }, + { + "epoch": 1.8262980505591133, + "grad_norm": 0.0800134614109993, + "learning_rate": 2.280072351759105e-07, + "loss": 0.0003, + "step": 284750 + }, + { + "epoch": 1.8263621874528995, + "grad_norm": 0.04095055162906647, + "learning_rate": 2.2784017378769784e-07, + "loss": 0.0017, + "step": 284760 + }, + { + "epoch": 1.8264263243466856, + "grad_norm": 0.0665946677327156, + "learning_rate": 2.276731721980585e-07, + "loss": 0.0007, + "step": 284770 + }, + { + "epoch": 1.8264904612404718, + "grad_norm": 0.06196899712085724, + "learning_rate": 2.2750623040908527e-07, + "loss": 0.0006, + "step": 284780 + }, + { + "epoch": 1.826554598134258, + "grad_norm": 0.05296425148844719, + "learning_rate": 2.2733934842286975e-07, + "loss": 0.0011, + "step": 284790 + }, + { + "epoch": 1.8266187350280438, + "grad_norm": 0.026383887976408005, + "learning_rate": 2.271725262415031e-07, + "loss": 0.0018, + "step": 284800 + }, + { + "epoch": 1.82668287192183, + "grad_norm": 0.06328203529119492, + "learning_rate": 2.2700576386707584e-07, + "loss": 0.0007, + "step": 284810 + }, + { + "epoch": 1.826747008815616, + "grad_norm": 0.007411746773868799, + "learning_rate": 2.2683906130167742e-07, + "loss": 0.0006, + "step": 284820 + }, + { + "epoch": 1.826811145709402, + "grad_norm": 0.13926292955875397, + "learning_rate": 2.266724185473973e-07, + "loss": 0.0007, + "step": 284830 + }, + { + "epoch": 1.8268752826031882, + "grad_norm": 0.014773097820580006, + "learning_rate": 2.2650583560632266e-07, + "loss": 0.0005, + "step": 284840 + }, + { + "epoch": 1.8269394194969744, + "grad_norm": 0.04269682243466377, + "learning_rate": 2.2633931248054187e-07, + "loss": 0.0007, + "step": 284850 + }, + { + "epoch": 1.8270035563907605, + "grad_norm": 0.017428934574127197, + "learning_rate": 2.26172849172141e-07, + "loss": 0.0009, + "step": 284860 + }, + { + "epoch": 1.8270676932845467, + "grad_norm": 0.060745932161808014, + "learning_rate": 2.2600644568320618e-07, + "loss": 0.001, + "step": 284870 + }, + { + "epoch": 1.8271318301783326, + "grad_norm": 0.0948595181107521, + "learning_rate": 2.258401020158224e-07, + "loss": 0.0012, + "step": 284880 + }, + { + "epoch": 1.8271959670721187, + "grad_norm": 0.03808773308992386, + "learning_rate": 2.2567381817207412e-07, + "loss": 0.0017, + "step": 284890 + }, + { + "epoch": 1.8272601039659049, + "grad_norm": 0.01708976924419403, + "learning_rate": 2.2550759415404577e-07, + "loss": 0.0009, + "step": 284900 + }, + { + "epoch": 1.8273242408596908, + "grad_norm": 0.04463403299450874, + "learning_rate": 2.2534142996381902e-07, + "loss": 0.0007, + "step": 284910 + }, + { + "epoch": 1.827388377753477, + "grad_norm": 0.049894046038389206, + "learning_rate": 2.251753256034761e-07, + "loss": 0.001, + "step": 284920 + }, + { + "epoch": 1.827452514647263, + "grad_norm": 0.09170003980398178, + "learning_rate": 2.2500928107509923e-07, + "loss": 0.0005, + "step": 284930 + }, + { + "epoch": 1.8275166515410493, + "grad_norm": 0.0323021374642849, + "learning_rate": 2.2484329638076896e-07, + "loss": 0.0008, + "step": 284940 + }, + { + "epoch": 1.8275807884348354, + "grad_norm": 0.02366352640092373, + "learning_rate": 2.2467737152256474e-07, + "loss": 0.0024, + "step": 284950 + }, + { + "epoch": 1.8276449253286216, + "grad_norm": 0.03775579854846001, + "learning_rate": 2.2451150650256548e-07, + "loss": 0.0008, + "step": 284960 + }, + { + "epoch": 1.8277090622224075, + "grad_norm": 0.0916261374950409, + "learning_rate": 2.243457013228506e-07, + "loss": 0.0015, + "step": 284970 + }, + { + "epoch": 1.8277731991161936, + "grad_norm": 0.06144082918763161, + "learning_rate": 2.2417995598549735e-07, + "loss": 0.0023, + "step": 284980 + }, + { + "epoch": 1.8278373360099796, + "grad_norm": 0.44196850061416626, + "learning_rate": 2.2401427049258239e-07, + "loss": 0.0015, + "step": 284990 + }, + { + "epoch": 1.8279014729037657, + "grad_norm": 0.5167443752288818, + "learning_rate": 2.238486448461813e-07, + "loss": 0.0034, + "step": 285000 + }, + { + "epoch": 1.8279656097975518, + "grad_norm": 0.031271178275346756, + "learning_rate": 2.2368307904837072e-07, + "loss": 0.0015, + "step": 285010 + }, + { + "epoch": 1.828029746691338, + "grad_norm": 0.05039814114570618, + "learning_rate": 2.2351757310122458e-07, + "loss": 0.0015, + "step": 285020 + }, + { + "epoch": 1.8280938835851241, + "grad_norm": 0.0538700595498085, + "learning_rate": 2.233521270068173e-07, + "loss": 0.0013, + "step": 285030 + }, + { + "epoch": 1.8281580204789103, + "grad_norm": 0.011439146474003792, + "learning_rate": 2.2318674076722114e-07, + "loss": 0.0005, + "step": 285040 + }, + { + "epoch": 1.8282221573726964, + "grad_norm": 0.030994849279522896, + "learning_rate": 2.230214143845094e-07, + "loss": 0.0006, + "step": 285050 + }, + { + "epoch": 1.8282862942664824, + "grad_norm": 0.04261430725455284, + "learning_rate": 2.2285614786075382e-07, + "loss": 0.0008, + "step": 285060 + }, + { + "epoch": 1.8283504311602685, + "grad_norm": 0.010509190149605274, + "learning_rate": 2.2269094119802438e-07, + "loss": 0.0011, + "step": 285070 + }, + { + "epoch": 1.8284145680540544, + "grad_norm": 0.07600738108158112, + "learning_rate": 2.2252579439839105e-07, + "loss": 0.0008, + "step": 285080 + }, + { + "epoch": 1.8284787049478406, + "grad_norm": 0.04539591446518898, + "learning_rate": 2.22360707463925e-07, + "loss": 0.0006, + "step": 285090 + }, + { + "epoch": 1.8285428418416267, + "grad_norm": 0.0028495138976722956, + "learning_rate": 2.2219568039669347e-07, + "loss": 0.0015, + "step": 285100 + }, + { + "epoch": 1.828606978735413, + "grad_norm": 0.07312221825122833, + "learning_rate": 2.2203071319876422e-07, + "loss": 0.0015, + "step": 285110 + }, + { + "epoch": 1.828671115629199, + "grad_norm": 0.060156095772981644, + "learning_rate": 2.2186580587220562e-07, + "loss": 0.0012, + "step": 285120 + }, + { + "epoch": 1.8287352525229852, + "grad_norm": 0.18297149240970612, + "learning_rate": 2.2170095841908322e-07, + "loss": 0.0012, + "step": 285130 + }, + { + "epoch": 1.8287993894167711, + "grad_norm": 0.06519244611263275, + "learning_rate": 2.2153617084146316e-07, + "loss": 0.0016, + "step": 285140 + }, + { + "epoch": 1.8288635263105573, + "grad_norm": 0.16978222131729126, + "learning_rate": 2.2137144314140879e-07, + "loss": 0.0008, + "step": 285150 + }, + { + "epoch": 1.8289276632043432, + "grad_norm": 0.061636921018362045, + "learning_rate": 2.2120677532098677e-07, + "loss": 0.0005, + "step": 285160 + }, + { + "epoch": 1.8289918000981293, + "grad_norm": 0.07009384781122208, + "learning_rate": 2.2104216738225882e-07, + "loss": 0.0017, + "step": 285170 + }, + { + "epoch": 1.8290559369919155, + "grad_norm": 0.09590679407119751, + "learning_rate": 2.2087761932728768e-07, + "loss": 0.0015, + "step": 285180 + }, + { + "epoch": 1.8291200738857016, + "grad_norm": 0.06868293881416321, + "learning_rate": 2.207131311581351e-07, + "loss": 0.0012, + "step": 285190 + }, + { + "epoch": 1.8291842107794878, + "grad_norm": 0.06806357949972153, + "learning_rate": 2.205487028768638e-07, + "loss": 0.0016, + "step": 285200 + }, + { + "epoch": 1.829248347673274, + "grad_norm": 0.040417108684778214, + "learning_rate": 2.2038433448553275e-07, + "loss": 0.0007, + "step": 285210 + }, + { + "epoch": 1.82931248456706, + "grad_norm": 0.06480779498815536, + "learning_rate": 2.2022002598620084e-07, + "loss": 0.0024, + "step": 285220 + }, + { + "epoch": 1.829376621460846, + "grad_norm": 0.051375679671764374, + "learning_rate": 2.200557773809292e-07, + "loss": 0.001, + "step": 285230 + }, + { + "epoch": 1.8294407583546322, + "grad_norm": 0.049823835492134094, + "learning_rate": 2.1989158867177506e-07, + "loss": 0.0004, + "step": 285240 + }, + { + "epoch": 1.829504895248418, + "grad_norm": 0.1641385555267334, + "learning_rate": 2.1972745986079513e-07, + "loss": 0.0011, + "step": 285250 + }, + { + "epoch": 1.8295690321422042, + "grad_norm": 0.0011703071650117636, + "learning_rate": 2.1956339095004608e-07, + "loss": 0.0014, + "step": 285260 + }, + { + "epoch": 1.8296331690359904, + "grad_norm": 0.1630324125289917, + "learning_rate": 2.1939938194158517e-07, + "loss": 0.0015, + "step": 285270 + }, + { + "epoch": 1.8296973059297765, + "grad_norm": 0.007984250783920288, + "learning_rate": 2.192354328374663e-07, + "loss": 0.0013, + "step": 285280 + }, + { + "epoch": 1.8297614428235627, + "grad_norm": 0.03937610611319542, + "learning_rate": 2.1907154363974392e-07, + "loss": 0.0013, + "step": 285290 + }, + { + "epoch": 1.8298255797173488, + "grad_norm": 0.04710065573453903, + "learning_rate": 2.18907714350472e-07, + "loss": 0.0012, + "step": 285300 + }, + { + "epoch": 1.8298897166111348, + "grad_norm": 0.10098345577716827, + "learning_rate": 2.187439449717038e-07, + "loss": 0.0008, + "step": 285310 + }, + { + "epoch": 1.829953853504921, + "grad_norm": 0.015499911271035671, + "learning_rate": 2.1858023550549113e-07, + "loss": 0.0009, + "step": 285320 + }, + { + "epoch": 1.830017990398707, + "grad_norm": 0.04580983147025108, + "learning_rate": 2.1841658595388503e-07, + "loss": 0.0009, + "step": 285330 + }, + { + "epoch": 1.830082127292493, + "grad_norm": 0.06571226567029953, + "learning_rate": 2.1825299631893615e-07, + "loss": 0.0008, + "step": 285340 + }, + { + "epoch": 1.8301462641862791, + "grad_norm": 0.10527317970991135, + "learning_rate": 2.1808946660269503e-07, + "loss": 0.0015, + "step": 285350 + }, + { + "epoch": 1.8302104010800653, + "grad_norm": 0.023459885269403458, + "learning_rate": 2.179259968072106e-07, + "loss": 0.0008, + "step": 285360 + }, + { + "epoch": 1.8302745379738514, + "grad_norm": 0.08581940084695816, + "learning_rate": 2.1776258693453068e-07, + "loss": 0.001, + "step": 285370 + }, + { + "epoch": 1.8303386748676376, + "grad_norm": 0.10950316488742828, + "learning_rate": 2.1759923698670415e-07, + "loss": 0.0014, + "step": 285380 + }, + { + "epoch": 1.8304028117614237, + "grad_norm": 0.24941828846931458, + "learning_rate": 2.1743594696577662e-07, + "loss": 0.0021, + "step": 285390 + }, + { + "epoch": 1.8304669486552096, + "grad_norm": 0.099155955016613, + "learning_rate": 2.172727168737948e-07, + "loss": 0.0005, + "step": 285400 + }, + { + "epoch": 1.8305310855489958, + "grad_norm": 0.039132773876190186, + "learning_rate": 2.171095467128037e-07, + "loss": 0.0007, + "step": 285410 + }, + { + "epoch": 1.8305952224427817, + "grad_norm": 0.12010085582733154, + "learning_rate": 2.1694643648484892e-07, + "loss": 0.0013, + "step": 285420 + }, + { + "epoch": 1.8306593593365679, + "grad_norm": 0.04474587365984917, + "learning_rate": 2.1678338619197325e-07, + "loss": 0.0006, + "step": 285430 + }, + { + "epoch": 1.830723496230354, + "grad_norm": 0.08558463305234909, + "learning_rate": 2.1662039583622064e-07, + "loss": 0.0008, + "step": 285440 + }, + { + "epoch": 1.8307876331241402, + "grad_norm": 0.005080286413431168, + "learning_rate": 2.164574654196322e-07, + "loss": 0.0015, + "step": 285450 + }, + { + "epoch": 1.8308517700179263, + "grad_norm": 0.055858634412288666, + "learning_rate": 2.1629459494425131e-07, + "loss": 0.001, + "step": 285460 + }, + { + "epoch": 1.8309159069117125, + "grad_norm": 0.018276643007993698, + "learning_rate": 2.1613178441211745e-07, + "loss": 0.0006, + "step": 285470 + }, + { + "epoch": 1.8309800438054986, + "grad_norm": 0.04330141469836235, + "learning_rate": 2.1596903382527178e-07, + "loss": 0.0009, + "step": 285480 + }, + { + "epoch": 1.8310441806992845, + "grad_norm": 0.061170391738414764, + "learning_rate": 2.1580634318575265e-07, + "loss": 0.0021, + "step": 285490 + }, + { + "epoch": 1.8311083175930707, + "grad_norm": 0.016236858442425728, + "learning_rate": 2.1564371249560012e-07, + "loss": 0.0005, + "step": 285500 + }, + { + "epoch": 1.8311724544868566, + "grad_norm": 0.19410637021064758, + "learning_rate": 2.1548114175685086e-07, + "loss": 0.0022, + "step": 285510 + }, + { + "epoch": 1.8312365913806428, + "grad_norm": 0.13653182983398438, + "learning_rate": 2.153186309715416e-07, + "loss": 0.0013, + "step": 285520 + }, + { + "epoch": 1.831300728274429, + "grad_norm": 0.2021653950214386, + "learning_rate": 2.1515618014171014e-07, + "loss": 0.0038, + "step": 285530 + }, + { + "epoch": 1.831364865168215, + "grad_norm": 0.15602578222751617, + "learning_rate": 2.1499378926939152e-07, + "loss": 0.0007, + "step": 285540 + }, + { + "epoch": 1.8314290020620012, + "grad_norm": 0.01986047253012657, + "learning_rate": 2.1483145835662021e-07, + "loss": 0.0008, + "step": 285550 + }, + { + "epoch": 1.8314931389557874, + "grad_norm": 0.09452295303344727, + "learning_rate": 2.1466918740543075e-07, + "loss": 0.001, + "step": 285560 + }, + { + "epoch": 1.8315572758495733, + "grad_norm": 0.09902264922857285, + "learning_rate": 2.1450697641785646e-07, + "loss": 0.0009, + "step": 285570 + }, + { + "epoch": 1.8316214127433594, + "grad_norm": 0.002422323450446129, + "learning_rate": 2.143448253959296e-07, + "loss": 0.0011, + "step": 285580 + }, + { + "epoch": 1.8316855496371454, + "grad_norm": 0.1396687924861908, + "learning_rate": 2.1418273434168303e-07, + "loss": 0.001, + "step": 285590 + }, + { + "epoch": 1.8317496865309315, + "grad_norm": 0.5556753873825073, + "learning_rate": 2.140207032571462e-07, + "loss": 0.0022, + "step": 285600 + }, + { + "epoch": 1.8318138234247177, + "grad_norm": 0.052254196256399155, + "learning_rate": 2.1385873214435082e-07, + "loss": 0.0004, + "step": 285610 + }, + { + "epoch": 1.8318779603185038, + "grad_norm": 0.129730686545372, + "learning_rate": 2.136968210053264e-07, + "loss": 0.0012, + "step": 285620 + }, + { + "epoch": 1.83194209721229, + "grad_norm": 0.1199645921587944, + "learning_rate": 2.1353496984210187e-07, + "loss": 0.0017, + "step": 285630 + }, + { + "epoch": 1.832006234106076, + "grad_norm": 0.01759057678282261, + "learning_rate": 2.1337317865670393e-07, + "loss": 0.0006, + "step": 285640 + }, + { + "epoch": 1.8320703709998623, + "grad_norm": 0.21323983371257782, + "learning_rate": 2.1321144745116206e-07, + "loss": 0.0011, + "step": 285650 + }, + { + "epoch": 1.8321345078936482, + "grad_norm": 0.023735884577035904, + "learning_rate": 2.1304977622750135e-07, + "loss": 0.0004, + "step": 285660 + }, + { + "epoch": 1.8321986447874343, + "grad_norm": 0.09157714247703552, + "learning_rate": 2.1288816498774733e-07, + "loss": 0.0034, + "step": 285670 + }, + { + "epoch": 1.8322627816812203, + "grad_norm": 0.05838904529809952, + "learning_rate": 2.1272661373392734e-07, + "loss": 0.0014, + "step": 285680 + }, + { + "epoch": 1.8323269185750064, + "grad_norm": 0.14488168060779572, + "learning_rate": 2.125651224680636e-07, + "loss": 0.0014, + "step": 285690 + }, + { + "epoch": 1.8323910554687926, + "grad_norm": 0.13726484775543213, + "learning_rate": 2.1240369119218062e-07, + "loss": 0.0007, + "step": 285700 + }, + { + "epoch": 1.8324551923625787, + "grad_norm": 0.03192019462585449, + "learning_rate": 2.1224231990830013e-07, + "loss": 0.001, + "step": 285710 + }, + { + "epoch": 1.8325193292563648, + "grad_norm": 0.0978066623210907, + "learning_rate": 2.120810086184455e-07, + "loss": 0.001, + "step": 285720 + }, + { + "epoch": 1.832583466150151, + "grad_norm": 0.0054242261685431, + "learning_rate": 2.1191975732463843e-07, + "loss": 0.0005, + "step": 285730 + }, + { + "epoch": 1.832647603043937, + "grad_norm": 0.054589856415987015, + "learning_rate": 2.1175856602889843e-07, + "loss": 0.0012, + "step": 285740 + }, + { + "epoch": 1.832711739937723, + "grad_norm": 0.1770869642496109, + "learning_rate": 2.115974347332461e-07, + "loss": 0.0007, + "step": 285750 + }, + { + "epoch": 1.8327758768315092, + "grad_norm": 0.12868542969226837, + "learning_rate": 2.114363634396993e-07, + "loss": 0.0013, + "step": 285760 + }, + { + "epoch": 1.8328400137252951, + "grad_norm": 0.08010537177324295, + "learning_rate": 2.112753521502775e-07, + "loss": 0.0015, + "step": 285770 + }, + { + "epoch": 1.8329041506190813, + "grad_norm": 0.17784012854099274, + "learning_rate": 2.1111440086699854e-07, + "loss": 0.0013, + "step": 285780 + }, + { + "epoch": 1.8329682875128674, + "grad_norm": 0.21651434898376465, + "learning_rate": 2.10953509591878e-07, + "loss": 0.0016, + "step": 285790 + }, + { + "epoch": 1.8330324244066536, + "grad_norm": 0.0007737832493148744, + "learning_rate": 2.1079267832693317e-07, + "loss": 0.0006, + "step": 285800 + }, + { + "epoch": 1.8330965613004397, + "grad_norm": 0.0973321944475174, + "learning_rate": 2.1063190707417858e-07, + "loss": 0.0013, + "step": 285810 + }, + { + "epoch": 1.833160698194226, + "grad_norm": 0.015331475995481014, + "learning_rate": 2.1047119583562926e-07, + "loss": 0.0023, + "step": 285820 + }, + { + "epoch": 1.8332248350880118, + "grad_norm": 0.09351062774658203, + "learning_rate": 2.103105446132986e-07, + "loss": 0.0016, + "step": 285830 + }, + { + "epoch": 1.833288971981798, + "grad_norm": 0.06600060313940048, + "learning_rate": 2.1014995340919998e-07, + "loss": 0.0008, + "step": 285840 + }, + { + "epoch": 1.833353108875584, + "grad_norm": 0.06511164456605911, + "learning_rate": 2.0998942222534624e-07, + "loss": 0.0013, + "step": 285850 + }, + { + "epoch": 1.83341724576937, + "grad_norm": 0.01580311357975006, + "learning_rate": 2.0982895106374802e-07, + "loss": 0.001, + "step": 285860 + }, + { + "epoch": 1.8334813826631562, + "grad_norm": 0.09493190795183182, + "learning_rate": 2.0966853992641644e-07, + "loss": 0.0013, + "step": 285870 + }, + { + "epoch": 1.8335455195569423, + "grad_norm": 0.09049346297979355, + "learning_rate": 2.095081888153616e-07, + "loss": 0.0012, + "step": 285880 + }, + { + "epoch": 1.8336096564507285, + "grad_norm": 0.1104205846786499, + "learning_rate": 2.0934789773259355e-07, + "loss": 0.001, + "step": 285890 + }, + { + "epoch": 1.8336737933445146, + "grad_norm": 0.010128783993422985, + "learning_rate": 2.0918766668011959e-07, + "loss": 0.0015, + "step": 285900 + }, + { + "epoch": 1.8337379302383008, + "grad_norm": 0.03706973418593407, + "learning_rate": 2.0902749565994751e-07, + "loss": 0.0024, + "step": 285910 + }, + { + "epoch": 1.8338020671320867, + "grad_norm": 0.04837115854024887, + "learning_rate": 2.088673846740863e-07, + "loss": 0.0007, + "step": 285920 + }, + { + "epoch": 1.8338662040258729, + "grad_norm": 0.08827932924032211, + "learning_rate": 2.0870733372454045e-07, + "loss": 0.0012, + "step": 285930 + }, + { + "epoch": 1.8339303409196588, + "grad_norm": 0.12149443477392197, + "learning_rate": 2.0854734281331502e-07, + "loss": 0.0014, + "step": 285940 + }, + { + "epoch": 1.833994477813445, + "grad_norm": 0.11144936829805374, + "learning_rate": 2.0838741194241675e-07, + "loss": 0.0015, + "step": 285950 + }, + { + "epoch": 1.834058614707231, + "grad_norm": 0.08110851794481277, + "learning_rate": 2.0822754111384846e-07, + "loss": 0.0037, + "step": 285960 + }, + { + "epoch": 1.8341227516010172, + "grad_norm": 0.024030309170484543, + "learning_rate": 2.0806773032961414e-07, + "loss": 0.0011, + "step": 285970 + }, + { + "epoch": 1.8341868884948034, + "grad_norm": 0.0025723432190716267, + "learning_rate": 2.0790797959171494e-07, + "loss": 0.0006, + "step": 285980 + }, + { + "epoch": 1.8342510253885895, + "grad_norm": 0.08574619889259338, + "learning_rate": 2.077482889021548e-07, + "loss": 0.0018, + "step": 285990 + }, + { + "epoch": 1.8343151622823755, + "grad_norm": 0.039204102009534836, + "learning_rate": 2.0758865826293274e-07, + "loss": 0.0016, + "step": 286000 + }, + { + "epoch": 1.8343792991761616, + "grad_norm": 0.03519874066114426, + "learning_rate": 2.0742908767605042e-07, + "loss": 0.0009, + "step": 286010 + }, + { + "epoch": 1.8344434360699475, + "grad_norm": 0.11675609648227692, + "learning_rate": 2.0726957714350626e-07, + "loss": 0.0004, + "step": 286020 + }, + { + "epoch": 1.8345075729637337, + "grad_norm": 0.01122630387544632, + "learning_rate": 2.071101266672998e-07, + "loss": 0.0007, + "step": 286030 + }, + { + "epoch": 1.8345717098575198, + "grad_norm": 0.10321499407291412, + "learning_rate": 2.069507362494294e-07, + "loss": 0.0006, + "step": 286040 + }, + { + "epoch": 1.834635846751306, + "grad_norm": 0.055491261184215546, + "learning_rate": 2.0679140589189128e-07, + "loss": 0.0015, + "step": 286050 + }, + { + "epoch": 1.8346999836450921, + "grad_norm": 0.08569338172674179, + "learning_rate": 2.0663213559668215e-07, + "loss": 0.0003, + "step": 286060 + }, + { + "epoch": 1.8347641205388783, + "grad_norm": 0.17324191331863403, + "learning_rate": 2.0647292536579877e-07, + "loss": 0.0004, + "step": 286070 + }, + { + "epoch": 1.8348282574326644, + "grad_norm": 0.10901259630918503, + "learning_rate": 2.0631377520123563e-07, + "loss": 0.0005, + "step": 286080 + }, + { + "epoch": 1.8348923943264503, + "grad_norm": 0.027271650731563568, + "learning_rate": 2.0615468510498616e-07, + "loss": 0.0006, + "step": 286090 + }, + { + "epoch": 1.8349565312202365, + "grad_norm": 0.16619277000427246, + "learning_rate": 2.0599565507904539e-07, + "loss": 0.0016, + "step": 286100 + }, + { + "epoch": 1.8350206681140224, + "grad_norm": 0.03477492928504944, + "learning_rate": 2.0583668512540512e-07, + "loss": 0.0006, + "step": 286110 + }, + { + "epoch": 1.8350848050078086, + "grad_norm": 0.04874105006456375, + "learning_rate": 2.0567777524605704e-07, + "loss": 0.0009, + "step": 286120 + }, + { + "epoch": 1.8351489419015947, + "grad_norm": 0.026079542934894562, + "learning_rate": 2.055189254429929e-07, + "loss": 0.0009, + "step": 286130 + }, + { + "epoch": 1.8352130787953809, + "grad_norm": 0.08594903349876404, + "learning_rate": 2.0536013571820336e-07, + "loss": 0.0011, + "step": 286140 + }, + { + "epoch": 1.835277215689167, + "grad_norm": 0.1158260926604271, + "learning_rate": 2.0520140607367845e-07, + "loss": 0.0013, + "step": 286150 + }, + { + "epoch": 1.8353413525829532, + "grad_norm": 0.13311445713043213, + "learning_rate": 2.050427365114066e-07, + "loss": 0.001, + "step": 286160 + }, + { + "epoch": 1.8354054894767393, + "grad_norm": 0.04005707800388336, + "learning_rate": 2.0488412703337512e-07, + "loss": 0.0008, + "step": 286170 + }, + { + "epoch": 1.8354696263705252, + "grad_norm": 0.1504872590303421, + "learning_rate": 2.0472557764157352e-07, + "loss": 0.0011, + "step": 286180 + }, + { + "epoch": 1.8355337632643114, + "grad_norm": 0.05102236196398735, + "learning_rate": 2.04567088337988e-07, + "loss": 0.0009, + "step": 286190 + }, + { + "epoch": 1.8355979001580973, + "grad_norm": 0.05543968081474304, + "learning_rate": 2.0440865912460362e-07, + "loss": 0.0017, + "step": 286200 + }, + { + "epoch": 1.8356620370518835, + "grad_norm": 0.16379182040691376, + "learning_rate": 2.0425029000340546e-07, + "loss": 0.0012, + "step": 286210 + }, + { + "epoch": 1.8357261739456696, + "grad_norm": 0.04222199320793152, + "learning_rate": 2.0409198097637917e-07, + "loss": 0.0007, + "step": 286220 + }, + { + "epoch": 1.8357903108394558, + "grad_norm": 0.2061203569173813, + "learning_rate": 2.0393373204550815e-07, + "loss": 0.0007, + "step": 286230 + }, + { + "epoch": 1.835854447733242, + "grad_norm": 0.07101015746593475, + "learning_rate": 2.0377554321277414e-07, + "loss": 0.0007, + "step": 286240 + }, + { + "epoch": 1.835918584627028, + "grad_norm": 0.02212749794125557, + "learning_rate": 2.036174144801617e-07, + "loss": 0.0028, + "step": 286250 + }, + { + "epoch": 1.835982721520814, + "grad_norm": 0.05497482046484947, + "learning_rate": 2.0345934584965034e-07, + "loss": 0.001, + "step": 286260 + }, + { + "epoch": 1.8360468584146001, + "grad_norm": 0.014361917041242123, + "learning_rate": 2.0330133732322177e-07, + "loss": 0.0005, + "step": 286270 + }, + { + "epoch": 1.836110995308386, + "grad_norm": 0.2536948025226593, + "learning_rate": 2.03143388902855e-07, + "loss": 0.001, + "step": 286280 + }, + { + "epoch": 1.8361751322021722, + "grad_norm": 0.13963524997234344, + "learning_rate": 2.0298550059053012e-07, + "loss": 0.0022, + "step": 286290 + }, + { + "epoch": 1.8362392690959584, + "grad_norm": 0.16756793856620789, + "learning_rate": 2.0282767238822553e-07, + "loss": 0.0012, + "step": 286300 + }, + { + "epoch": 1.8363034059897445, + "grad_norm": 0.30188626050949097, + "learning_rate": 2.0266990429791911e-07, + "loss": 0.0015, + "step": 286310 + }, + { + "epoch": 1.8363675428835307, + "grad_norm": 0.255096435546875, + "learning_rate": 2.025121963215859e-07, + "loss": 0.0011, + "step": 286320 + }, + { + "epoch": 1.8364316797773168, + "grad_norm": 0.0976017639040947, + "learning_rate": 2.0235454846120496e-07, + "loss": 0.0015, + "step": 286330 + }, + { + "epoch": 1.836495816671103, + "grad_norm": 0.007329453714191914, + "learning_rate": 2.0219696071875016e-07, + "loss": 0.0015, + "step": 286340 + }, + { + "epoch": 1.8365599535648889, + "grad_norm": 0.08448395878076553, + "learning_rate": 2.020394330961961e-07, + "loss": 0.001, + "step": 286350 + }, + { + "epoch": 1.836624090458675, + "grad_norm": 0.054196055978536606, + "learning_rate": 2.0188196559551621e-07, + "loss": 0.0025, + "step": 286360 + }, + { + "epoch": 1.836688227352461, + "grad_norm": 0.00047856580931693316, + "learning_rate": 2.0172455821868554e-07, + "loss": 0.0009, + "step": 286370 + }, + { + "epoch": 1.836752364246247, + "grad_norm": 0.02655697613954544, + "learning_rate": 2.0156721096767528e-07, + "loss": 0.0006, + "step": 286380 + }, + { + "epoch": 1.8368165011400333, + "grad_norm": 0.008494804613292217, + "learning_rate": 2.0140992384445668e-07, + "loss": 0.0009, + "step": 286390 + }, + { + "epoch": 1.8368806380338194, + "grad_norm": 0.06913774460554123, + "learning_rate": 2.0125269685100203e-07, + "loss": 0.001, + "step": 286400 + }, + { + "epoch": 1.8369447749276056, + "grad_norm": 0.053152233362197876, + "learning_rate": 2.0109552998928027e-07, + "loss": 0.0006, + "step": 286410 + }, + { + "epoch": 1.8370089118213917, + "grad_norm": 0.1499040573835373, + "learning_rate": 2.0093842326126156e-07, + "loss": 0.0016, + "step": 286420 + }, + { + "epoch": 1.8370730487151776, + "grad_norm": 0.09256197512149811, + "learning_rate": 2.0078137666891374e-07, + "loss": 0.0011, + "step": 286430 + }, + { + "epoch": 1.8371371856089638, + "grad_norm": 0.02435123547911644, + "learning_rate": 2.006243902142052e-07, + "loss": 0.0005, + "step": 286440 + }, + { + "epoch": 1.83720132250275, + "grad_norm": 0.0038517331704497337, + "learning_rate": 2.0046746389910388e-07, + "loss": 0.0006, + "step": 286450 + }, + { + "epoch": 1.8372654593965358, + "grad_norm": 0.10937990993261337, + "learning_rate": 2.0031059772557482e-07, + "loss": 0.0008, + "step": 286460 + }, + { + "epoch": 1.837329596290322, + "grad_norm": 0.038557618856430054, + "learning_rate": 2.0015379169558368e-07, + "loss": 0.0026, + "step": 286470 + }, + { + "epoch": 1.8373937331841081, + "grad_norm": 0.06308803707361221, + "learning_rate": 1.9999704581109614e-07, + "loss": 0.0012, + "step": 286480 + }, + { + "epoch": 1.8374578700778943, + "grad_norm": 0.052062951028347015, + "learning_rate": 1.9984036007407724e-07, + "loss": 0.0007, + "step": 286490 + }, + { + "epoch": 1.8375220069716804, + "grad_norm": 0.05477140098810196, + "learning_rate": 1.9968373448648826e-07, + "loss": 0.001, + "step": 286500 + }, + { + "epoch": 1.8375861438654666, + "grad_norm": 0.08055808395147324, + "learning_rate": 1.9952716905029313e-07, + "loss": 0.0016, + "step": 286510 + }, + { + "epoch": 1.8376502807592525, + "grad_norm": 0.2582389712333679, + "learning_rate": 1.9937066376745363e-07, + "loss": 0.001, + "step": 286520 + }, + { + "epoch": 1.8377144176530387, + "grad_norm": 0.09943609684705734, + "learning_rate": 1.992142186399304e-07, + "loss": 0.0013, + "step": 286530 + }, + { + "epoch": 1.8377785545468246, + "grad_norm": 0.002249883022159338, + "learning_rate": 1.990578336696841e-07, + "loss": 0.0005, + "step": 286540 + }, + { + "epoch": 1.8378426914406107, + "grad_norm": 0.08084165304899216, + "learning_rate": 1.989015088586743e-07, + "loss": 0.0014, + "step": 286550 + }, + { + "epoch": 1.837906828334397, + "grad_norm": 0.03168710693717003, + "learning_rate": 1.9874524420886e-07, + "loss": 0.001, + "step": 286560 + }, + { + "epoch": 1.837970965228183, + "grad_norm": 0.050417460501194, + "learning_rate": 1.9858903972219955e-07, + "loss": 0.0007, + "step": 286570 + }, + { + "epoch": 1.8380351021219692, + "grad_norm": 0.07067926228046417, + "learning_rate": 1.9843289540064926e-07, + "loss": 0.0006, + "step": 286580 + }, + { + "epoch": 1.8380992390157553, + "grad_norm": 0.034939754754304886, + "learning_rate": 1.9827681124616638e-07, + "loss": 0.0008, + "step": 286590 + }, + { + "epoch": 1.8381633759095415, + "grad_norm": 0.07707566022872925, + "learning_rate": 1.9812078726070716e-07, + "loss": 0.0008, + "step": 286600 + }, + { + "epoch": 1.8382275128033274, + "grad_norm": 0.08685056120157242, + "learning_rate": 1.979648234462267e-07, + "loss": 0.0007, + "step": 286610 + }, + { + "epoch": 1.8382916496971136, + "grad_norm": 0.044056981801986694, + "learning_rate": 1.97808919804679e-07, + "loss": 0.0008, + "step": 286620 + }, + { + "epoch": 1.8383557865908995, + "grad_norm": 0.08716120570898056, + "learning_rate": 1.9765307633801635e-07, + "loss": 0.0012, + "step": 286630 + }, + { + "epoch": 1.8384199234846856, + "grad_norm": 0.02711053565144539, + "learning_rate": 1.9749729304819386e-07, + "loss": 0.0021, + "step": 286640 + }, + { + "epoch": 1.8384840603784718, + "grad_norm": 0.07572808116674423, + "learning_rate": 1.973415699371628e-07, + "loss": 0.0009, + "step": 286650 + }, + { + "epoch": 1.838548197272258, + "grad_norm": 0.01647782139480114, + "learning_rate": 1.9718590700687323e-07, + "loss": 0.0018, + "step": 286660 + }, + { + "epoch": 1.838612334166044, + "grad_norm": 0.06854109466075897, + "learning_rate": 1.970303042592775e-07, + "loss": 0.0005, + "step": 286670 + }, + { + "epoch": 1.8386764710598302, + "grad_norm": 0.06197073310613632, + "learning_rate": 1.9687476169632458e-07, + "loss": 0.0004, + "step": 286680 + }, + { + "epoch": 1.8387406079536162, + "grad_norm": 0.02185492031276226, + "learning_rate": 1.9671927931996403e-07, + "loss": 0.001, + "step": 286690 + }, + { + "epoch": 1.8388047448474023, + "grad_norm": 0.05141449347138405, + "learning_rate": 1.965638571321432e-07, + "loss": 0.0019, + "step": 286700 + }, + { + "epoch": 1.8388688817411882, + "grad_norm": 0.052674420177936554, + "learning_rate": 1.9640849513481052e-07, + "loss": 0.0011, + "step": 286710 + }, + { + "epoch": 1.8389330186349744, + "grad_norm": 0.0528746098279953, + "learning_rate": 1.9625319332991223e-07, + "loss": 0.0018, + "step": 286720 + }, + { + "epoch": 1.8389971555287605, + "grad_norm": 0.18851709365844727, + "learning_rate": 1.9609795171939506e-07, + "loss": 0.0019, + "step": 286730 + }, + { + "epoch": 1.8390612924225467, + "grad_norm": 0.03728310018777847, + "learning_rate": 1.9594277030520303e-07, + "loss": 0.0011, + "step": 286740 + }, + { + "epoch": 1.8391254293163328, + "grad_norm": 0.12406738102436066, + "learning_rate": 1.9578764908928183e-07, + "loss": 0.0014, + "step": 286750 + }, + { + "epoch": 1.839189566210119, + "grad_norm": 0.09355689585208893, + "learning_rate": 1.9563258807357543e-07, + "loss": 0.001, + "step": 286760 + }, + { + "epoch": 1.8392537031039051, + "grad_norm": 0.06668778508901596, + "learning_rate": 1.9547758726002618e-07, + "loss": 0.0035, + "step": 286770 + }, + { + "epoch": 1.839317839997691, + "grad_norm": 0.05535772442817688, + "learning_rate": 1.9532264665057588e-07, + "loss": 0.003, + "step": 286780 + }, + { + "epoch": 1.8393819768914772, + "grad_norm": 0.0019860744941979647, + "learning_rate": 1.9516776624716738e-07, + "loss": 0.0008, + "step": 286790 + }, + { + "epoch": 1.8394461137852631, + "grad_norm": 0.07493186742067337, + "learning_rate": 1.9501294605174026e-07, + "loss": 0.0005, + "step": 286800 + }, + { + "epoch": 1.8395102506790493, + "grad_norm": 0.0036910579074174166, + "learning_rate": 1.9485818606623464e-07, + "loss": 0.0006, + "step": 286810 + }, + { + "epoch": 1.8395743875728354, + "grad_norm": 0.12099093198776245, + "learning_rate": 1.9470348629259118e-07, + "loss": 0.0007, + "step": 286820 + }, + { + "epoch": 1.8396385244666216, + "grad_norm": 0.06820711493492126, + "learning_rate": 1.9454884673274666e-07, + "loss": 0.0007, + "step": 286830 + }, + { + "epoch": 1.8397026613604077, + "grad_norm": 0.05245767533779144, + "learning_rate": 1.9439426738864008e-07, + "loss": 0.0033, + "step": 286840 + }, + { + "epoch": 1.8397667982541939, + "grad_norm": 0.03281093016266823, + "learning_rate": 1.942397482622066e-07, + "loss": 0.0009, + "step": 286850 + }, + { + "epoch": 1.8398309351479798, + "grad_norm": 0.006717719603329897, + "learning_rate": 1.940852893553846e-07, + "loss": 0.0007, + "step": 286860 + }, + { + "epoch": 1.839895072041766, + "grad_norm": 0.07139433175325394, + "learning_rate": 1.9393089067010872e-07, + "loss": 0.0008, + "step": 286870 + }, + { + "epoch": 1.839959208935552, + "grad_norm": 0.10973839461803436, + "learning_rate": 1.9377655220831348e-07, + "loss": 0.0008, + "step": 286880 + }, + { + "epoch": 1.840023345829338, + "grad_norm": 0.027526378631591797, + "learning_rate": 1.936222739719329e-07, + "loss": 0.0011, + "step": 286890 + }, + { + "epoch": 1.8400874827231242, + "grad_norm": 0.17372477054595947, + "learning_rate": 1.9346805596290042e-07, + "loss": 0.001, + "step": 286900 + }, + { + "epoch": 1.8401516196169103, + "grad_norm": 0.04875880479812622, + "learning_rate": 1.9331389818314838e-07, + "loss": 0.0009, + "step": 286910 + }, + { + "epoch": 1.8402157565106965, + "grad_norm": 0.11518640071153641, + "learning_rate": 1.9315980063460805e-07, + "loss": 0.0019, + "step": 286920 + }, + { + "epoch": 1.8402798934044826, + "grad_norm": 0.2383868396282196, + "learning_rate": 1.9300576331921116e-07, + "loss": 0.0012, + "step": 286930 + }, + { + "epoch": 1.8403440302982688, + "grad_norm": 0.03028559498488903, + "learning_rate": 1.9285178623888788e-07, + "loss": 0.0022, + "step": 286940 + }, + { + "epoch": 1.8404081671920547, + "grad_norm": 0.1163666620850563, + "learning_rate": 1.9269786939556722e-07, + "loss": 0.0004, + "step": 286950 + }, + { + "epoch": 1.8404723040858408, + "grad_norm": 0.019930845126509666, + "learning_rate": 1.9254401279117763e-07, + "loss": 0.001, + "step": 286960 + }, + { + "epoch": 1.8405364409796268, + "grad_norm": 0.0351133793592453, + "learning_rate": 1.9239021642764754e-07, + "loss": 0.0007, + "step": 286970 + }, + { + "epoch": 1.840600577873413, + "grad_norm": 0.08575202524662018, + "learning_rate": 1.9223648030690433e-07, + "loss": 0.001, + "step": 286980 + }, + { + "epoch": 1.840664714767199, + "grad_norm": 0.01145631168037653, + "learning_rate": 1.9208280443087369e-07, + "loss": 0.0008, + "step": 286990 + }, + { + "epoch": 1.8407288516609852, + "grad_norm": 0.03613721579313278, + "learning_rate": 1.9192918880148125e-07, + "loss": 0.0007, + "step": 287000 + }, + { + "epoch": 1.8407929885547714, + "grad_norm": 0.03389430791139603, + "learning_rate": 1.9177563342065332e-07, + "loss": 0.0021, + "step": 287010 + }, + { + "epoch": 1.8408571254485575, + "grad_norm": 0.0285571850836277, + "learning_rate": 1.9162213829031273e-07, + "loss": 0.001, + "step": 287020 + }, + { + "epoch": 1.8409212623423437, + "grad_norm": 0.0828038901090622, + "learning_rate": 1.9146870341238356e-07, + "loss": 0.0008, + "step": 287030 + }, + { + "epoch": 1.8409853992361296, + "grad_norm": 0.1025981530547142, + "learning_rate": 1.91315328788787e-07, + "loss": 0.0009, + "step": 287040 + }, + { + "epoch": 1.8410495361299157, + "grad_norm": 0.043404772877693176, + "learning_rate": 1.9116201442144712e-07, + "loss": 0.001, + "step": 287050 + }, + { + "epoch": 1.8411136730237017, + "grad_norm": 0.09349111467599869, + "learning_rate": 1.91008760312284e-07, + "loss": 0.0007, + "step": 287060 + }, + { + "epoch": 1.8411778099174878, + "grad_norm": 0.15410931408405304, + "learning_rate": 1.9085556646321724e-07, + "loss": 0.0018, + "step": 287070 + }, + { + "epoch": 1.841241946811274, + "grad_norm": 0.005862515885382891, + "learning_rate": 1.9070243287616753e-07, + "loss": 0.0013, + "step": 287080 + }, + { + "epoch": 1.84130608370506, + "grad_norm": 0.06634867936372757, + "learning_rate": 1.905493595530544e-07, + "loss": 0.0008, + "step": 287090 + }, + { + "epoch": 1.8413702205988463, + "grad_norm": 0.035512588918209076, + "learning_rate": 1.9039634649579418e-07, + "loss": 0.0006, + "step": 287100 + }, + { + "epoch": 1.8414343574926324, + "grad_norm": 0.057515282183885574, + "learning_rate": 1.902433937063053e-07, + "loss": 0.0008, + "step": 287110 + }, + { + "epoch": 1.8414984943864183, + "grad_norm": 0.11442890763282776, + "learning_rate": 1.9009050118650397e-07, + "loss": 0.0008, + "step": 287120 + }, + { + "epoch": 1.8415626312802045, + "grad_norm": 0.02416716329753399, + "learning_rate": 1.89937668938307e-07, + "loss": 0.001, + "step": 287130 + }, + { + "epoch": 1.8416267681739904, + "grad_norm": 0.20283135771751404, + "learning_rate": 1.8978489696362846e-07, + "loss": 0.0056, + "step": 287140 + }, + { + "epoch": 1.8416909050677766, + "grad_norm": 0.011368321254849434, + "learning_rate": 1.8963218526438233e-07, + "loss": 0.001, + "step": 287150 + }, + { + "epoch": 1.8417550419615627, + "grad_norm": 0.07025032490491867, + "learning_rate": 1.894795338424832e-07, + "loss": 0.0003, + "step": 287160 + }, + { + "epoch": 1.8418191788553488, + "grad_norm": 0.04045814648270607, + "learning_rate": 1.8932694269984397e-07, + "loss": 0.0015, + "step": 287170 + }, + { + "epoch": 1.841883315749135, + "grad_norm": 0.017666665837168694, + "learning_rate": 1.8917441183837593e-07, + "loss": 0.0019, + "step": 287180 + }, + { + "epoch": 1.8419474526429211, + "grad_norm": 0.0366133488714695, + "learning_rate": 1.8902194125999029e-07, + "loss": 0.0008, + "step": 287190 + }, + { + "epoch": 1.8420115895367073, + "grad_norm": 0.055339265614748, + "learning_rate": 1.8886953096659833e-07, + "loss": 0.0011, + "step": 287200 + }, + { + "epoch": 1.8420757264304932, + "grad_norm": 0.13355812430381775, + "learning_rate": 1.8871718096011015e-07, + "loss": 0.0019, + "step": 287210 + }, + { + "epoch": 1.8421398633242794, + "grad_norm": 0.0653081163764, + "learning_rate": 1.8856489124243372e-07, + "loss": 0.0006, + "step": 287220 + }, + { + "epoch": 1.8422040002180653, + "grad_norm": 0.015454231761395931, + "learning_rate": 1.8841266181547803e-07, + "loss": 0.001, + "step": 287230 + }, + { + "epoch": 1.8422681371118514, + "grad_norm": 0.029873233288526535, + "learning_rate": 1.88260492681151e-07, + "loss": 0.0005, + "step": 287240 + }, + { + "epoch": 1.8423322740056376, + "grad_norm": 0.058013807982206345, + "learning_rate": 1.8810838384135777e-07, + "loss": 0.0017, + "step": 287250 + }, + { + "epoch": 1.8423964108994237, + "grad_norm": 0.22602404654026031, + "learning_rate": 1.8795633529800627e-07, + "loss": 0.0015, + "step": 287260 + }, + { + "epoch": 1.84246054779321, + "grad_norm": 0.03169405832886696, + "learning_rate": 1.8780434705300054e-07, + "loss": 0.0013, + "step": 287270 + }, + { + "epoch": 1.842524684686996, + "grad_norm": 0.06978941708803177, + "learning_rate": 1.8765241910824573e-07, + "loss": 0.0005, + "step": 287280 + }, + { + "epoch": 1.842588821580782, + "grad_norm": 0.12041551619768143, + "learning_rate": 1.8750055146564582e-07, + "loss": 0.0012, + "step": 287290 + }, + { + "epoch": 1.8426529584745681, + "grad_norm": 0.15691913664340973, + "learning_rate": 1.873487441271038e-07, + "loss": 0.0017, + "step": 287300 + }, + { + "epoch": 1.8427170953683543, + "grad_norm": 0.06942026317119598, + "learning_rate": 1.8719699709452032e-07, + "loss": 0.0009, + "step": 287310 + }, + { + "epoch": 1.8427812322621402, + "grad_norm": 0.05978382006287575, + "learning_rate": 1.870453103697989e-07, + "loss": 0.0012, + "step": 287320 + }, + { + "epoch": 1.8428453691559263, + "grad_norm": 0.08361706137657166, + "learning_rate": 1.8689368395483964e-07, + "loss": 0.0006, + "step": 287330 + }, + { + "epoch": 1.8429095060497125, + "grad_norm": 0.1487368941307068, + "learning_rate": 1.8674211785154162e-07, + "loss": 0.0015, + "step": 287340 + }, + { + "epoch": 1.8429736429434986, + "grad_norm": 0.02252854034304619, + "learning_rate": 1.865906120618055e-07, + "loss": 0.001, + "step": 287350 + }, + { + "epoch": 1.8430377798372848, + "grad_norm": 0.00402287021279335, + "learning_rate": 1.8643916658752925e-07, + "loss": 0.0007, + "step": 287360 + }, + { + "epoch": 1.843101916731071, + "grad_norm": 0.05641627311706543, + "learning_rate": 1.8628778143061078e-07, + "loss": 0.0015, + "step": 287370 + }, + { + "epoch": 1.8431660536248569, + "grad_norm": 0.004220783710479736, + "learning_rate": 1.8613645659294522e-07, + "loss": 0.0003, + "step": 287380 + }, + { + "epoch": 1.843230190518643, + "grad_norm": 0.08366972208023071, + "learning_rate": 1.859851920764316e-07, + "loss": 0.0012, + "step": 287390 + }, + { + "epoch": 1.843294327412429, + "grad_norm": 0.007999827153980732, + "learning_rate": 1.8583398788296403e-07, + "loss": 0.0008, + "step": 287400 + }, + { + "epoch": 1.843358464306215, + "grad_norm": 0.08857898414134979, + "learning_rate": 1.8568284401443648e-07, + "loss": 0.0013, + "step": 287410 + }, + { + "epoch": 1.8434226012000012, + "grad_norm": 0.041703127324581146, + "learning_rate": 1.8553176047274357e-07, + "loss": 0.0009, + "step": 287420 + }, + { + "epoch": 1.8434867380937874, + "grad_norm": 0.07411111146211624, + "learning_rate": 1.8538073725977935e-07, + "loss": 0.0006, + "step": 287430 + }, + { + "epoch": 1.8435508749875735, + "grad_norm": 0.13202106952667236, + "learning_rate": 1.8522977437743505e-07, + "loss": 0.0013, + "step": 287440 + }, + { + "epoch": 1.8436150118813597, + "grad_norm": 0.03775406628847122, + "learning_rate": 1.850788718276031e-07, + "loss": 0.0009, + "step": 287450 + }, + { + "epoch": 1.8436791487751458, + "grad_norm": 0.07211952656507492, + "learning_rate": 1.8492802961217305e-07, + "loss": 0.0007, + "step": 287460 + }, + { + "epoch": 1.8437432856689318, + "grad_norm": 0.002661585807800293, + "learning_rate": 1.8477724773303728e-07, + "loss": 0.0013, + "step": 287470 + }, + { + "epoch": 1.843807422562718, + "grad_norm": 0.22028471529483795, + "learning_rate": 1.846265261920832e-07, + "loss": 0.0016, + "step": 287480 + }, + { + "epoch": 1.8438715594565038, + "grad_norm": 0.052171215415000916, + "learning_rate": 1.8447586499120042e-07, + "loss": 0.0012, + "step": 287490 + }, + { + "epoch": 1.84393569635029, + "grad_norm": 0.1524595320224762, + "learning_rate": 1.843252641322768e-07, + "loss": 0.001, + "step": 287500 + }, + { + "epoch": 1.8439998332440761, + "grad_norm": 0.018931686878204346, + "learning_rate": 1.8417472361719923e-07, + "loss": 0.001, + "step": 287510 + }, + { + "epoch": 1.8440639701378623, + "grad_norm": 0.005724246148020029, + "learning_rate": 1.8402424344785452e-07, + "loss": 0.0014, + "step": 287520 + }, + { + "epoch": 1.8441281070316484, + "grad_norm": 0.05640290305018425, + "learning_rate": 1.8387382362612727e-07, + "loss": 0.0011, + "step": 287530 + }, + { + "epoch": 1.8441922439254346, + "grad_norm": 0.19624315202236176, + "learning_rate": 1.8372346415390374e-07, + "loss": 0.0009, + "step": 287540 + }, + { + "epoch": 1.8442563808192205, + "grad_norm": 0.0690305233001709, + "learning_rate": 1.8357316503306688e-07, + "loss": 0.0006, + "step": 287550 + }, + { + "epoch": 1.8443205177130066, + "grad_norm": 0.04752447083592415, + "learning_rate": 1.8342292626550074e-07, + "loss": 0.0006, + "step": 287560 + }, + { + "epoch": 1.8443846546067926, + "grad_norm": 0.061734091490507126, + "learning_rate": 1.8327274785308713e-07, + "loss": 0.0013, + "step": 287570 + }, + { + "epoch": 1.8444487915005787, + "grad_norm": 0.03917500004172325, + "learning_rate": 1.8312262979770955e-07, + "loss": 0.0017, + "step": 287580 + }, + { + "epoch": 1.8445129283943649, + "grad_norm": 0.10373364388942719, + "learning_rate": 1.8297257210124708e-07, + "loss": 0.0018, + "step": 287590 + }, + { + "epoch": 1.844577065288151, + "grad_norm": 0.05342405289411545, + "learning_rate": 1.8282257476558153e-07, + "loss": 0.0013, + "step": 287600 + }, + { + "epoch": 1.8446412021819372, + "grad_norm": 0.03532358258962631, + "learning_rate": 1.8267263779259138e-07, + "loss": 0.0013, + "step": 287610 + }, + { + "epoch": 1.8447053390757233, + "grad_norm": 0.04092554748058319, + "learning_rate": 1.8252276118415624e-07, + "loss": 0.0009, + "step": 287620 + }, + { + "epoch": 1.8447694759695095, + "grad_norm": 0.037707142531871796, + "learning_rate": 1.8237294494215408e-07, + "loss": 0.0013, + "step": 287630 + }, + { + "epoch": 1.8448336128632954, + "grad_norm": 0.049918580800294876, + "learning_rate": 1.822231890684617e-07, + "loss": 0.0013, + "step": 287640 + }, + { + "epoch": 1.8448977497570815, + "grad_norm": 0.02753879502415657, + "learning_rate": 1.8207349356495595e-07, + "loss": 0.0005, + "step": 287650 + }, + { + "epoch": 1.8449618866508675, + "grad_norm": 0.22618646919727325, + "learning_rate": 1.8192385843351313e-07, + "loss": 0.0006, + "step": 287660 + }, + { + "epoch": 1.8450260235446536, + "grad_norm": 0.08298692107200623, + "learning_rate": 1.8177428367600725e-07, + "loss": 0.0008, + "step": 287670 + }, + { + "epoch": 1.8450901604384398, + "grad_norm": 0.17711129784584045, + "learning_rate": 1.8162476929431295e-07, + "loss": 0.0018, + "step": 287680 + }, + { + "epoch": 1.845154297332226, + "grad_norm": 0.014852141961455345, + "learning_rate": 1.8147531529030483e-07, + "loss": 0.0015, + "step": 287690 + }, + { + "epoch": 1.845218434226012, + "grad_norm": 0.07084935158491135, + "learning_rate": 1.8132592166585416e-07, + "loss": 0.001, + "step": 287700 + }, + { + "epoch": 1.8452825711197982, + "grad_norm": 0.03970756009221077, + "learning_rate": 1.8117658842283392e-07, + "loss": 0.0012, + "step": 287710 + }, + { + "epoch": 1.8453467080135844, + "grad_norm": 0.14913849532604218, + "learning_rate": 1.8102731556311425e-07, + "loss": 0.0016, + "step": 287720 + }, + { + "epoch": 1.8454108449073703, + "grad_norm": 0.05938960239291191, + "learning_rate": 1.8087810308856703e-07, + "loss": 0.0014, + "step": 287730 + }, + { + "epoch": 1.8454749818011564, + "grad_norm": 0.03920167312026024, + "learning_rate": 1.8072895100106124e-07, + "loss": 0.0011, + "step": 287740 + }, + { + "epoch": 1.8455391186949424, + "grad_norm": 0.0677153617143631, + "learning_rate": 1.8057985930246603e-07, + "loss": 0.0018, + "step": 287750 + }, + { + "epoch": 1.8456032555887285, + "grad_norm": 0.362377405166626, + "learning_rate": 1.804308279946493e-07, + "loss": 0.0008, + "step": 287760 + }, + { + "epoch": 1.8456673924825147, + "grad_norm": 0.044777531176805496, + "learning_rate": 1.80281857079479e-07, + "loss": 0.0005, + "step": 287770 + }, + { + "epoch": 1.8457315293763008, + "grad_norm": 0.03310674801468849, + "learning_rate": 1.8013294655882142e-07, + "loss": 0.0006, + "step": 287780 + }, + { + "epoch": 1.845795666270087, + "grad_norm": 0.010308466851711273, + "learning_rate": 1.7998409643454228e-07, + "loss": 0.0006, + "step": 287790 + }, + { + "epoch": 1.845859803163873, + "grad_norm": 0.07554610818624496, + "learning_rate": 1.7983530670850846e-07, + "loss": 0.0027, + "step": 287800 + }, + { + "epoch": 1.845923940057659, + "grad_norm": 0.0661851018667221, + "learning_rate": 1.7968657738258232e-07, + "loss": 0.0011, + "step": 287810 + }, + { + "epoch": 1.8459880769514452, + "grad_norm": 0.006231280043721199, + "learning_rate": 1.7953790845862907e-07, + "loss": 0.0008, + "step": 287820 + }, + { + "epoch": 1.846052213845231, + "grad_norm": 0.23022891581058502, + "learning_rate": 1.7938929993850996e-07, + "loss": 0.0013, + "step": 287830 + }, + { + "epoch": 1.8461163507390173, + "grad_norm": 0.020045816898345947, + "learning_rate": 1.792407518240885e-07, + "loss": 0.0006, + "step": 287840 + }, + { + "epoch": 1.8461804876328034, + "grad_norm": 0.1408000886440277, + "learning_rate": 1.7909226411722603e-07, + "loss": 0.0022, + "step": 287850 + }, + { + "epoch": 1.8462446245265896, + "grad_norm": 0.007024731952697039, + "learning_rate": 1.7894383681978321e-07, + "loss": 0.0011, + "step": 287860 + }, + { + "epoch": 1.8463087614203757, + "grad_norm": 0.045289646834135056, + "learning_rate": 1.7879546993361918e-07, + "loss": 0.0007, + "step": 287870 + }, + { + "epoch": 1.8463728983141618, + "grad_norm": 0.03839179128408432, + "learning_rate": 1.7864716346059352e-07, + "loss": 0.0008, + "step": 287880 + }, + { + "epoch": 1.846437035207948, + "grad_norm": 0.02492174133658409, + "learning_rate": 1.784989174025653e-07, + "loss": 0.001, + "step": 287890 + }, + { + "epoch": 1.846501172101734, + "grad_norm": 0.0650896206498146, + "learning_rate": 1.7835073176139083e-07, + "loss": 0.0015, + "step": 287900 + }, + { + "epoch": 1.84656530899552, + "grad_norm": 0.10479025542736053, + "learning_rate": 1.7820260653892752e-07, + "loss": 0.0017, + "step": 287910 + }, + { + "epoch": 1.846629445889306, + "grad_norm": 0.2658521831035614, + "learning_rate": 1.7805454173703218e-07, + "loss": 0.001, + "step": 287920 + }, + { + "epoch": 1.8466935827830921, + "grad_norm": 0.12911981344223022, + "learning_rate": 1.7790653735756002e-07, + "loss": 0.0011, + "step": 287930 + }, + { + "epoch": 1.8467577196768783, + "grad_norm": 0.11474470794200897, + "learning_rate": 1.77758593402364e-07, + "loss": 0.0005, + "step": 287940 + }, + { + "epoch": 1.8468218565706644, + "grad_norm": 0.051907800137996674, + "learning_rate": 1.7761070987329988e-07, + "loss": 0.0005, + "step": 287950 + }, + { + "epoch": 1.8468859934644506, + "grad_norm": 0.06339803338050842, + "learning_rate": 1.7746288677222058e-07, + "loss": 0.0017, + "step": 287960 + }, + { + "epoch": 1.8469501303582367, + "grad_norm": 0.05739164352416992, + "learning_rate": 1.773151241009774e-07, + "loss": 0.0019, + "step": 287970 + }, + { + "epoch": 1.8470142672520227, + "grad_norm": 0.048748426139354706, + "learning_rate": 1.7716742186142222e-07, + "loss": 0.001, + "step": 287980 + }, + { + "epoch": 1.8470784041458088, + "grad_norm": 0.03448588401079178, + "learning_rate": 1.7701978005540633e-07, + "loss": 0.0013, + "step": 287990 + }, + { + "epoch": 1.847142541039595, + "grad_norm": 0.03690283000469208, + "learning_rate": 1.768721986847799e-07, + "loss": 0.0012, + "step": 288000 + }, + { + "epoch": 1.847206677933381, + "grad_norm": 0.03804676979780197, + "learning_rate": 1.7672467775139145e-07, + "loss": 0.001, + "step": 288010 + }, + { + "epoch": 1.847270814827167, + "grad_norm": 0.06943704932928085, + "learning_rate": 1.7657721725708953e-07, + "loss": 0.0021, + "step": 288020 + }, + { + "epoch": 1.8473349517209532, + "grad_norm": 0.08496490865945816, + "learning_rate": 1.7642981720372264e-07, + "loss": 0.0012, + "step": 288030 + }, + { + "epoch": 1.8473990886147393, + "grad_norm": 0.03908595070242882, + "learning_rate": 1.7628247759313765e-07, + "loss": 0.0007, + "step": 288040 + }, + { + "epoch": 1.8474632255085255, + "grad_norm": 0.011440551839768887, + "learning_rate": 1.7613519842718086e-07, + "loss": 0.0015, + "step": 288050 + }, + { + "epoch": 1.8475273624023116, + "grad_norm": 0.10329517722129822, + "learning_rate": 1.7598797970769688e-07, + "loss": 0.0015, + "step": 288060 + }, + { + "epoch": 1.8475914992960976, + "grad_norm": 0.030152389779686928, + "learning_rate": 1.7584082143653204e-07, + "loss": 0.0008, + "step": 288070 + }, + { + "epoch": 1.8476556361898837, + "grad_norm": 0.05683228000998497, + "learning_rate": 1.7569372361552927e-07, + "loss": 0.0009, + "step": 288080 + }, + { + "epoch": 1.8477197730836696, + "grad_norm": 0.15313345193862915, + "learning_rate": 1.755466862465316e-07, + "loss": 0.0008, + "step": 288090 + }, + { + "epoch": 1.8477839099774558, + "grad_norm": 0.10386233776807785, + "learning_rate": 1.7539970933138195e-07, + "loss": 0.0015, + "step": 288100 + }, + { + "epoch": 1.847848046871242, + "grad_norm": 0.011520123109221458, + "learning_rate": 1.7525279287192277e-07, + "loss": 0.0005, + "step": 288110 + }, + { + "epoch": 1.847912183765028, + "grad_norm": 0.08517508208751678, + "learning_rate": 1.7510593686999367e-07, + "loss": 0.0024, + "step": 288120 + }, + { + "epoch": 1.8479763206588142, + "grad_norm": 0.011454598978161812, + "learning_rate": 1.7495914132743597e-07, + "loss": 0.0011, + "step": 288130 + }, + { + "epoch": 1.8480404575526004, + "grad_norm": 0.1312156468629837, + "learning_rate": 1.7481240624608764e-07, + "loss": 0.001, + "step": 288140 + }, + { + "epoch": 1.8481045944463865, + "grad_norm": 0.036829717457294464, + "learning_rate": 1.7466573162778944e-07, + "loss": 0.0005, + "step": 288150 + }, + { + "epoch": 1.8481687313401725, + "grad_norm": 0.07705144584178925, + "learning_rate": 1.7451911747437766e-07, + "loss": 0.0009, + "step": 288160 + }, + { + "epoch": 1.8482328682339586, + "grad_norm": 0.06838127970695496, + "learning_rate": 1.743725637876903e-07, + "loss": 0.0009, + "step": 288170 + }, + { + "epoch": 1.8482970051277445, + "grad_norm": 0.029356516897678375, + "learning_rate": 1.742260705695631e-07, + "loss": 0.0013, + "step": 288180 + }, + { + "epoch": 1.8483611420215307, + "grad_norm": 0.08610998094081879, + "learning_rate": 1.740796378218329e-07, + "loss": 0.0019, + "step": 288190 + }, + { + "epoch": 1.8484252789153168, + "grad_norm": 0.07729338854551315, + "learning_rate": 1.7393326554633382e-07, + "loss": 0.0009, + "step": 288200 + }, + { + "epoch": 1.848489415809103, + "grad_norm": 0.13719235360622406, + "learning_rate": 1.7378695374489883e-07, + "loss": 0.0006, + "step": 288210 + }, + { + "epoch": 1.8485535527028891, + "grad_norm": 0.06897895038127899, + "learning_rate": 1.736407024193637e-07, + "loss": 0.001, + "step": 288220 + }, + { + "epoch": 1.8486176895966753, + "grad_norm": 0.06548663973808289, + "learning_rate": 1.7349451157156026e-07, + "loss": 0.0004, + "step": 288230 + }, + { + "epoch": 1.8486818264904612, + "grad_norm": 0.10674738883972168, + "learning_rate": 1.733483812033193e-07, + "loss": 0.001, + "step": 288240 + }, + { + "epoch": 1.8487459633842473, + "grad_norm": 0.03931476175785065, + "learning_rate": 1.732023113164727e-07, + "loss": 0.0008, + "step": 288250 + }, + { + "epoch": 1.8488101002780333, + "grad_norm": 0.06040503457188606, + "learning_rate": 1.730563019128506e-07, + "loss": 0.0007, + "step": 288260 + }, + { + "epoch": 1.8488742371718194, + "grad_norm": 0.0705578550696373, + "learning_rate": 1.7291035299428328e-07, + "loss": 0.0008, + "step": 288270 + }, + { + "epoch": 1.8489383740656056, + "grad_norm": 0.08032902330160141, + "learning_rate": 1.7276446456259922e-07, + "loss": 0.001, + "step": 288280 + }, + { + "epoch": 1.8490025109593917, + "grad_norm": 0.03601422905921936, + "learning_rate": 1.7261863661962585e-07, + "loss": 0.0006, + "step": 288290 + }, + { + "epoch": 1.8490666478531779, + "grad_norm": 0.030967695638537407, + "learning_rate": 1.7247286916719174e-07, + "loss": 0.001, + "step": 288300 + }, + { + "epoch": 1.849130784746964, + "grad_norm": 0.051623016595840454, + "learning_rate": 1.723271622071221e-07, + "loss": 0.0009, + "step": 288310 + }, + { + "epoch": 1.8491949216407502, + "grad_norm": 0.09495478868484497, + "learning_rate": 1.7218151574124376e-07, + "loss": 0.0006, + "step": 288320 + }, + { + "epoch": 1.849259058534536, + "grad_norm": 0.03560183197259903, + "learning_rate": 1.7203592977138083e-07, + "loss": 0.0009, + "step": 288330 + }, + { + "epoch": 1.8493231954283222, + "grad_norm": 0.025246890261769295, + "learning_rate": 1.718904042993591e-07, + "loss": 0.0007, + "step": 288340 + }, + { + "epoch": 1.8493873323221082, + "grad_norm": 0.0935717299580574, + "learning_rate": 1.7174493932700098e-07, + "loss": 0.0013, + "step": 288350 + }, + { + "epoch": 1.8494514692158943, + "grad_norm": 0.06764940172433853, + "learning_rate": 1.715995348561289e-07, + "loss": 0.0012, + "step": 288360 + }, + { + "epoch": 1.8495156061096805, + "grad_norm": 0.16031967103481293, + "learning_rate": 1.7145419088856585e-07, + "loss": 0.0006, + "step": 288370 + }, + { + "epoch": 1.8495797430034666, + "grad_norm": 0.042350515723228455, + "learning_rate": 1.713089074261326e-07, + "loss": 0.0008, + "step": 288380 + }, + { + "epoch": 1.8496438798972528, + "grad_norm": 0.18568123877048492, + "learning_rate": 1.7116368447064991e-07, + "loss": 0.0009, + "step": 288390 + }, + { + "epoch": 1.849708016791039, + "grad_norm": 0.15407635271549225, + "learning_rate": 1.7101852202393688e-07, + "loss": 0.0007, + "step": 288400 + }, + { + "epoch": 1.8497721536848248, + "grad_norm": 0.06498847901821136, + "learning_rate": 1.7087342008781372e-07, + "loss": 0.0014, + "step": 288410 + }, + { + "epoch": 1.849836290578611, + "grad_norm": 0.11050275713205338, + "learning_rate": 1.7072837866409785e-07, + "loss": 0.001, + "step": 288420 + }, + { + "epoch": 1.8499004274723971, + "grad_norm": 0.07677308470010757, + "learning_rate": 1.7058339775460675e-07, + "loss": 0.0017, + "step": 288430 + }, + { + "epoch": 1.849964564366183, + "grad_norm": 0.07862634211778641, + "learning_rate": 1.704384773611567e-07, + "loss": 0.001, + "step": 288440 + }, + { + "epoch": 1.8500287012599692, + "grad_norm": 0.024905717000365257, + "learning_rate": 1.7029361748556462e-07, + "loss": 0.0027, + "step": 288450 + }, + { + "epoch": 1.8500928381537554, + "grad_norm": 0.39429962635040283, + "learning_rate": 1.7014881812964511e-07, + "loss": 0.0046, + "step": 288460 + }, + { + "epoch": 1.8501569750475415, + "grad_norm": 0.12610678374767303, + "learning_rate": 1.7000407929521289e-07, + "loss": 0.0012, + "step": 288470 + }, + { + "epoch": 1.8502211119413277, + "grad_norm": 0.1045580506324768, + "learning_rate": 1.6985940098408093e-07, + "loss": 0.0014, + "step": 288480 + }, + { + "epoch": 1.8502852488351138, + "grad_norm": 0.0926932767033577, + "learning_rate": 1.6971478319806334e-07, + "loss": 0.0017, + "step": 288490 + }, + { + "epoch": 1.8503493857288997, + "grad_norm": 0.02598356083035469, + "learning_rate": 1.69570225938972e-07, + "loss": 0.0013, + "step": 288500 + }, + { + "epoch": 1.8504135226226859, + "grad_norm": 0.1517276167869568, + "learning_rate": 1.6942572920861712e-07, + "loss": 0.0012, + "step": 288510 + }, + { + "epoch": 1.8504776595164718, + "grad_norm": 0.18101486563682556, + "learning_rate": 1.6928129300881057e-07, + "loss": 0.002, + "step": 288520 + }, + { + "epoch": 1.850541796410258, + "grad_norm": 0.11176485568284988, + "learning_rate": 1.6913691734136206e-07, + "loss": 0.0014, + "step": 288530 + }, + { + "epoch": 1.850605933304044, + "grad_norm": 0.21926063299179077, + "learning_rate": 1.6899260220808068e-07, + "loss": 0.0011, + "step": 288540 + }, + { + "epoch": 1.8506700701978303, + "grad_norm": 0.01746945083141327, + "learning_rate": 1.6884834761077385e-07, + "loss": 0.0007, + "step": 288550 + }, + { + "epoch": 1.8507342070916164, + "grad_norm": 0.010603145696222782, + "learning_rate": 1.6870415355125068e-07, + "loss": 0.0009, + "step": 288560 + }, + { + "epoch": 1.8507983439854025, + "grad_norm": 0.01538256648927927, + "learning_rate": 1.68560020031317e-07, + "loss": 0.0017, + "step": 288570 + }, + { + "epoch": 1.8508624808791887, + "grad_norm": 0.1104760617017746, + "learning_rate": 1.6841594705277964e-07, + "loss": 0.0006, + "step": 288580 + }, + { + "epoch": 1.8509266177729746, + "grad_norm": 0.002728406572714448, + "learning_rate": 1.6827193461744275e-07, + "loss": 0.0013, + "step": 288590 + }, + { + "epoch": 1.8509907546667608, + "grad_norm": 0.14359810948371887, + "learning_rate": 1.681279827271126e-07, + "loss": 0.0011, + "step": 288600 + }, + { + "epoch": 1.8510548915605467, + "grad_norm": 0.13374969363212585, + "learning_rate": 1.6798409138359172e-07, + "loss": 0.0008, + "step": 288610 + }, + { + "epoch": 1.8511190284543328, + "grad_norm": 0.09471452981233597, + "learning_rate": 1.6784026058868418e-07, + "loss": 0.0005, + "step": 288620 + }, + { + "epoch": 1.851183165348119, + "grad_norm": 0.0014831902226433158, + "learning_rate": 1.6769649034419078e-07, + "loss": 0.0012, + "step": 288630 + }, + { + "epoch": 1.8512473022419051, + "grad_norm": 0.15719948709011078, + "learning_rate": 1.6755278065191395e-07, + "loss": 0.0015, + "step": 288640 + }, + { + "epoch": 1.8513114391356913, + "grad_norm": 0.07202229648828506, + "learning_rate": 1.67409131513655e-07, + "loss": 0.0008, + "step": 288650 + }, + { + "epoch": 1.8513755760294774, + "grad_norm": 0.1010279506444931, + "learning_rate": 1.672655429312131e-07, + "loss": 0.0007, + "step": 288660 + }, + { + "epoch": 1.8514397129232634, + "grad_norm": 0.0597020722925663, + "learning_rate": 1.671220149063879e-07, + "loss": 0.0006, + "step": 288670 + }, + { + "epoch": 1.8515038498170495, + "grad_norm": 0.01989404484629631, + "learning_rate": 1.6697854744097853e-07, + "loss": 0.001, + "step": 288680 + }, + { + "epoch": 1.8515679867108354, + "grad_norm": 0.14670448005199432, + "learning_rate": 1.668351405367813e-07, + "loss": 0.0018, + "step": 288690 + }, + { + "epoch": 1.8516321236046216, + "grad_norm": 0.038981057703495026, + "learning_rate": 1.6669179419559366e-07, + "loss": 0.0006, + "step": 288700 + }, + { + "epoch": 1.8516962604984077, + "grad_norm": 0.14221884310245514, + "learning_rate": 1.665485084192131e-07, + "loss": 0.0011, + "step": 288710 + }, + { + "epoch": 1.851760397392194, + "grad_norm": 0.06356054544448853, + "learning_rate": 1.6640528320943372e-07, + "loss": 0.0017, + "step": 288720 + }, + { + "epoch": 1.85182453428598, + "grad_norm": 0.05507383123040199, + "learning_rate": 1.6626211856805075e-07, + "loss": 0.0008, + "step": 288730 + }, + { + "epoch": 1.8518886711797662, + "grad_norm": 0.08880084753036499, + "learning_rate": 1.6611901449685775e-07, + "loss": 0.0007, + "step": 288740 + }, + { + "epoch": 1.8519528080735523, + "grad_norm": 0.002199749927967787, + "learning_rate": 1.659759709976483e-07, + "loss": 0.0018, + "step": 288750 + }, + { + "epoch": 1.8520169449673383, + "grad_norm": 0.13226568698883057, + "learning_rate": 1.6583298807221537e-07, + "loss": 0.0009, + "step": 288760 + }, + { + "epoch": 1.8520810818611244, + "grad_norm": 0.029226161539554596, + "learning_rate": 1.6569006572234869e-07, + "loss": 0.0005, + "step": 288770 + }, + { + "epoch": 1.8521452187549103, + "grad_norm": 0.03023863211274147, + "learning_rate": 1.6554720394984124e-07, + "loss": 0.0009, + "step": 288780 + }, + { + "epoch": 1.8522093556486965, + "grad_norm": 0.07122538983821869, + "learning_rate": 1.6540440275648274e-07, + "loss": 0.0008, + "step": 288790 + }, + { + "epoch": 1.8522734925424826, + "grad_norm": 0.055547356605529785, + "learning_rate": 1.6526166214406226e-07, + "loss": 0.0014, + "step": 288800 + }, + { + "epoch": 1.8523376294362688, + "grad_norm": 0.03576844185590744, + "learning_rate": 1.651189821143684e-07, + "loss": 0.0012, + "step": 288810 + }, + { + "epoch": 1.852401766330055, + "grad_norm": 0.07118990272283554, + "learning_rate": 1.6497636266918858e-07, + "loss": 0.0017, + "step": 288820 + }, + { + "epoch": 1.852465903223841, + "grad_norm": 0.15735577046871185, + "learning_rate": 1.6483380381031145e-07, + "loss": 0.0011, + "step": 288830 + }, + { + "epoch": 1.852530040117627, + "grad_norm": 0.0425572507083416, + "learning_rate": 1.6469130553952219e-07, + "loss": 0.0011, + "step": 288840 + }, + { + "epoch": 1.8525941770114132, + "grad_norm": 0.08285803347826004, + "learning_rate": 1.645488678586066e-07, + "loss": 0.0006, + "step": 288850 + }, + { + "epoch": 1.8526583139051993, + "grad_norm": 0.0034415412228554487, + "learning_rate": 1.6440649076934878e-07, + "loss": 0.001, + "step": 288860 + }, + { + "epoch": 1.8527224507989852, + "grad_norm": 0.12120576947927475, + "learning_rate": 1.6426417427353403e-07, + "loss": 0.0012, + "step": 288870 + }, + { + "epoch": 1.8527865876927714, + "grad_norm": 0.00897813867777586, + "learning_rate": 1.6412191837294534e-07, + "loss": 0.0015, + "step": 288880 + }, + { + "epoch": 1.8528507245865575, + "grad_norm": 0.06584618985652924, + "learning_rate": 1.6397972306936516e-07, + "loss": 0.0015, + "step": 288890 + }, + { + "epoch": 1.8529148614803437, + "grad_norm": 0.02014937251806259, + "learning_rate": 1.6383758836457487e-07, + "loss": 0.0006, + "step": 288900 + }, + { + "epoch": 1.8529789983741298, + "grad_norm": 0.02061321958899498, + "learning_rate": 1.636955142603569e-07, + "loss": 0.0021, + "step": 288910 + }, + { + "epoch": 1.853043135267916, + "grad_norm": 0.4016263484954834, + "learning_rate": 1.6355350075848986e-07, + "loss": 0.004, + "step": 288920 + }, + { + "epoch": 1.853107272161702, + "grad_norm": 0.05717543885111809, + "learning_rate": 1.6341154786075396e-07, + "loss": 0.0005, + "step": 288930 + }, + { + "epoch": 1.853171409055488, + "grad_norm": 0.03883775696158409, + "learning_rate": 1.6326965556892836e-07, + "loss": 0.0014, + "step": 288940 + }, + { + "epoch": 1.853235545949274, + "grad_norm": 0.06243137642741203, + "learning_rate": 1.631278238847911e-07, + "loss": 0.001, + "step": 288950 + }, + { + "epoch": 1.8532996828430601, + "grad_norm": 0.12922696769237518, + "learning_rate": 1.629860528101185e-07, + "loss": 0.0009, + "step": 288960 + }, + { + "epoch": 1.8533638197368463, + "grad_norm": 0.03439302369952202, + "learning_rate": 1.6284434234668746e-07, + "loss": 0.0007, + "step": 288970 + }, + { + "epoch": 1.8534279566306324, + "grad_norm": 0.04470491781830788, + "learning_rate": 1.6270269249627435e-07, + "loss": 0.0011, + "step": 288980 + }, + { + "epoch": 1.8534920935244186, + "grad_norm": 0.030634136870503426, + "learning_rate": 1.6256110326065332e-07, + "loss": 0.0013, + "step": 288990 + }, + { + "epoch": 1.8535562304182047, + "grad_norm": 0.015092063695192337, + "learning_rate": 1.6241957464159907e-07, + "loss": 0.0005, + "step": 289000 + }, + { + "epoch": 1.8536203673119909, + "grad_norm": 0.09808158874511719, + "learning_rate": 1.6227810664088462e-07, + "loss": 0.0018, + "step": 289010 + }, + { + "epoch": 1.8536845042057768, + "grad_norm": 0.07334356009960175, + "learning_rate": 1.6213669926028352e-07, + "loss": 0.0012, + "step": 289020 + }, + { + "epoch": 1.853748641099563, + "grad_norm": 0.03621971979737282, + "learning_rate": 1.6199535250156717e-07, + "loss": 0.0011, + "step": 289030 + }, + { + "epoch": 1.8538127779933489, + "grad_norm": 0.03564497083425522, + "learning_rate": 1.618540663665058e-07, + "loss": 0.0009, + "step": 289040 + }, + { + "epoch": 1.853876914887135, + "grad_norm": 0.0414310023188591, + "learning_rate": 1.6171284085687133e-07, + "loss": 0.0014, + "step": 289050 + }, + { + "epoch": 1.8539410517809212, + "grad_norm": 0.041223831474781036, + "learning_rate": 1.6157167597443345e-07, + "loss": 0.0004, + "step": 289060 + }, + { + "epoch": 1.8540051886747073, + "grad_norm": 0.0021375659853219986, + "learning_rate": 1.6143057172095965e-07, + "loss": 0.0016, + "step": 289070 + }, + { + "epoch": 1.8540693255684935, + "grad_norm": 0.017594551667571068, + "learning_rate": 1.6128952809821852e-07, + "loss": 0.0017, + "step": 289080 + }, + { + "epoch": 1.8541334624622796, + "grad_norm": 0.16080540418624878, + "learning_rate": 1.6114854510797862e-07, + "loss": 0.0015, + "step": 289090 + }, + { + "epoch": 1.8541975993560655, + "grad_norm": 0.024991154670715332, + "learning_rate": 1.6100762275200576e-07, + "loss": 0.0017, + "step": 289100 + }, + { + "epoch": 1.8542617362498517, + "grad_norm": 0.10999748855829239, + "learning_rate": 1.6086676103206577e-07, + "loss": 0.0007, + "step": 289110 + }, + { + "epoch": 1.8543258731436376, + "grad_norm": 0.11018364131450653, + "learning_rate": 1.607259599499228e-07, + "loss": 0.0006, + "step": 289120 + }, + { + "epoch": 1.8543900100374238, + "grad_norm": 0.0660175308585167, + "learning_rate": 1.6058521950734262e-07, + "loss": 0.001, + "step": 289130 + }, + { + "epoch": 1.85445414693121, + "grad_norm": 0.06319452077150345, + "learning_rate": 1.6044453970608886e-07, + "loss": 0.0007, + "step": 289140 + }, + { + "epoch": 1.854518283824996, + "grad_norm": 0.19534561038017273, + "learning_rate": 1.6030392054792344e-07, + "loss": 0.002, + "step": 289150 + }, + { + "epoch": 1.8545824207187822, + "grad_norm": 0.03305432200431824, + "learning_rate": 1.6016336203460825e-07, + "loss": 0.0008, + "step": 289160 + }, + { + "epoch": 1.8546465576125684, + "grad_norm": 0.06634438782930374, + "learning_rate": 1.600228641679058e-07, + "loss": 0.0009, + "step": 289170 + }, + { + "epoch": 1.8547106945063545, + "grad_norm": 0.01888250932097435, + "learning_rate": 1.5988242694957577e-07, + "loss": 0.0015, + "step": 289180 + }, + { + "epoch": 1.8547748314001404, + "grad_norm": 0.009038791060447693, + "learning_rate": 1.597420503813779e-07, + "loss": 0.0007, + "step": 289190 + }, + { + "epoch": 1.8548389682939266, + "grad_norm": 0.13109901547431946, + "learning_rate": 1.5960173446507187e-07, + "loss": 0.0039, + "step": 289200 + }, + { + "epoch": 1.8549031051877125, + "grad_norm": 0.04096861928701401, + "learning_rate": 1.5946147920241516e-07, + "loss": 0.0007, + "step": 289210 + }, + { + "epoch": 1.8549672420814987, + "grad_norm": 0.07823286950588226, + "learning_rate": 1.5932128459516582e-07, + "loss": 0.0009, + "step": 289220 + }, + { + "epoch": 1.8550313789752848, + "grad_norm": 0.044161245226860046, + "learning_rate": 1.5918115064507966e-07, + "loss": 0.0006, + "step": 289230 + }, + { + "epoch": 1.855095515869071, + "grad_norm": 0.1879640519618988, + "learning_rate": 1.5904107735391415e-07, + "loss": 0.0007, + "step": 289240 + }, + { + "epoch": 1.855159652762857, + "grad_norm": 0.056915298104286194, + "learning_rate": 1.589010647234235e-07, + "loss": 0.0017, + "step": 289250 + }, + { + "epoch": 1.8552237896566433, + "grad_norm": 0.18126241862773895, + "learning_rate": 1.5876111275536231e-07, + "loss": 0.0013, + "step": 289260 + }, + { + "epoch": 1.8552879265504294, + "grad_norm": 0.10514125972986221, + "learning_rate": 1.586212214514843e-07, + "loss": 0.0004, + "step": 289270 + }, + { + "epoch": 1.8553520634442153, + "grad_norm": 0.05540881305932999, + "learning_rate": 1.5848139081354242e-07, + "loss": 0.0026, + "step": 289280 + }, + { + "epoch": 1.8554162003380015, + "grad_norm": 0.08097107708454132, + "learning_rate": 1.583416208432892e-07, + "loss": 0.0014, + "step": 289290 + }, + { + "epoch": 1.8554803372317874, + "grad_norm": 0.07919275015592575, + "learning_rate": 1.58201911542476e-07, + "loss": 0.0018, + "step": 289300 + }, + { + "epoch": 1.8555444741255736, + "grad_norm": 0.14636743068695068, + "learning_rate": 1.58062262912852e-07, + "loss": 0.0009, + "step": 289310 + }, + { + "epoch": 1.8556086110193597, + "grad_norm": 0.1234036535024643, + "learning_rate": 1.5792267495616963e-07, + "loss": 0.0023, + "step": 289320 + }, + { + "epoch": 1.8556727479131458, + "grad_norm": 0.08552830666303635, + "learning_rate": 1.5778314767417645e-07, + "loss": 0.0009, + "step": 289330 + }, + { + "epoch": 1.855736884806932, + "grad_norm": 0.0010238487739115953, + "learning_rate": 1.5764368106862048e-07, + "loss": 0.0015, + "step": 289340 + }, + { + "epoch": 1.8558010217007181, + "grad_norm": 0.19790436327457428, + "learning_rate": 1.5750427514125034e-07, + "loss": 0.0008, + "step": 289350 + }, + { + "epoch": 1.855865158594504, + "grad_norm": 0.03808882459998131, + "learning_rate": 1.5736492989381292e-07, + "loss": 0.0007, + "step": 289360 + }, + { + "epoch": 1.8559292954882902, + "grad_norm": 0.00809218268841505, + "learning_rate": 1.5722564532805352e-07, + "loss": 0.0008, + "step": 289370 + }, + { + "epoch": 1.8559934323820761, + "grad_norm": 0.02840617671608925, + "learning_rate": 1.570864214457174e-07, + "loss": 0.001, + "step": 289380 + }, + { + "epoch": 1.8560575692758623, + "grad_norm": 0.007927249185740948, + "learning_rate": 1.569472582485504e-07, + "loss": 0.0014, + "step": 289390 + }, + { + "epoch": 1.8561217061696484, + "grad_norm": 0.03396786376833916, + "learning_rate": 1.56808155738295e-07, + "loss": 0.0008, + "step": 289400 + }, + { + "epoch": 1.8561858430634346, + "grad_norm": 0.007886813022196293, + "learning_rate": 1.5666911391669537e-07, + "loss": 0.0016, + "step": 289410 + }, + { + "epoch": 1.8562499799572207, + "grad_norm": 0.06851857155561447, + "learning_rate": 1.5653013278549234e-07, + "loss": 0.0011, + "step": 289420 + }, + { + "epoch": 1.856314116851007, + "grad_norm": 0.26010021567344666, + "learning_rate": 1.5639121234642895e-07, + "loss": 0.0009, + "step": 289430 + }, + { + "epoch": 1.856378253744793, + "grad_norm": 0.03365808352828026, + "learning_rate": 1.562523526012455e-07, + "loss": 0.0008, + "step": 289440 + }, + { + "epoch": 1.856442390638579, + "grad_norm": 0.0676041916012764, + "learning_rate": 1.5611355355168167e-07, + "loss": 0.0014, + "step": 289450 + }, + { + "epoch": 1.8565065275323651, + "grad_norm": 0.10150793194770813, + "learning_rate": 1.559748151994761e-07, + "loss": 0.0012, + "step": 289460 + }, + { + "epoch": 1.856570664426151, + "grad_norm": 0.01773030310869217, + "learning_rate": 1.5583613754636906e-07, + "loss": 0.0013, + "step": 289470 + }, + { + "epoch": 1.8566348013199372, + "grad_norm": 0.3465256094932556, + "learning_rate": 1.5569752059409748e-07, + "loss": 0.0016, + "step": 289480 + }, + { + "epoch": 1.8566989382137233, + "grad_norm": 0.10367559641599655, + "learning_rate": 1.5555896434439722e-07, + "loss": 0.0012, + "step": 289490 + }, + { + "epoch": 1.8567630751075095, + "grad_norm": 0.2588544487953186, + "learning_rate": 1.5542046879900573e-07, + "loss": 0.0021, + "step": 289500 + }, + { + "epoch": 1.8568272120012956, + "grad_norm": 0.08816307038068771, + "learning_rate": 1.5528203395965835e-07, + "loss": 0.0004, + "step": 289510 + }, + { + "epoch": 1.8568913488950818, + "grad_norm": 0.01976894959807396, + "learning_rate": 1.5514365982808975e-07, + "loss": 0.001, + "step": 289520 + }, + { + "epoch": 1.8569554857888677, + "grad_norm": 0.01618194580078125, + "learning_rate": 1.5500534640603304e-07, + "loss": 0.0007, + "step": 289530 + }, + { + "epoch": 1.8570196226826539, + "grad_norm": 0.09402556717395782, + "learning_rate": 1.5486709369522236e-07, + "loss": 0.0012, + "step": 289540 + }, + { + "epoch": 1.85708375957644, + "grad_norm": 0.06722892820835114, + "learning_rate": 1.5472890169738909e-07, + "loss": 0.0009, + "step": 289550 + }, + { + "epoch": 1.857147896470226, + "grad_norm": 0.24180275201797485, + "learning_rate": 1.545907704142663e-07, + "loss": 0.0009, + "step": 289560 + }, + { + "epoch": 1.857212033364012, + "grad_norm": 0.14629188179969788, + "learning_rate": 1.5445269984758427e-07, + "loss": 0.0015, + "step": 289570 + }, + { + "epoch": 1.8572761702577982, + "grad_norm": 0.03688056394457817, + "learning_rate": 1.5431468999907218e-07, + "loss": 0.0013, + "step": 289580 + }, + { + "epoch": 1.8573403071515844, + "grad_norm": 0.06282052397727966, + "learning_rate": 1.5417674087046087e-07, + "loss": 0.0029, + "step": 289590 + }, + { + "epoch": 1.8574044440453705, + "grad_norm": 0.07084057480096817, + "learning_rate": 1.540388524634784e-07, + "loss": 0.0009, + "step": 289600 + }, + { + "epoch": 1.8574685809391567, + "grad_norm": 0.02749285101890564, + "learning_rate": 1.539010247798517e-07, + "loss": 0.0003, + "step": 289610 + }, + { + "epoch": 1.8575327178329426, + "grad_norm": 0.13757991790771484, + "learning_rate": 1.5376325782130885e-07, + "loss": 0.0014, + "step": 289620 + }, + { + "epoch": 1.8575968547267288, + "grad_norm": 0.00561791704967618, + "learning_rate": 1.5362555158957626e-07, + "loss": 0.0017, + "step": 289630 + }, + { + "epoch": 1.8576609916205147, + "grad_norm": 0.1288556158542633, + "learning_rate": 1.534879060863792e-07, + "loss": 0.0012, + "step": 289640 + }, + { + "epoch": 1.8577251285143008, + "grad_norm": 0.07304224371910095, + "learning_rate": 1.5335032131344185e-07, + "loss": 0.0024, + "step": 289650 + }, + { + "epoch": 1.857789265408087, + "grad_norm": 0.018483763560652733, + "learning_rate": 1.532127972724895e-07, + "loss": 0.0009, + "step": 289660 + }, + { + "epoch": 1.8578534023018731, + "grad_norm": 0.033974286168813705, + "learning_rate": 1.5307533396524466e-07, + "loss": 0.0012, + "step": 289670 + }, + { + "epoch": 1.8579175391956593, + "grad_norm": 0.017961300909519196, + "learning_rate": 1.5293793139342984e-07, + "loss": 0.001, + "step": 289680 + }, + { + "epoch": 1.8579816760894454, + "grad_norm": 0.08534421771764755, + "learning_rate": 1.5280058955876642e-07, + "loss": 0.0025, + "step": 289690 + }, + { + "epoch": 1.8580458129832316, + "grad_norm": 0.05251190811395645, + "learning_rate": 1.5266330846297638e-07, + "loss": 0.0022, + "step": 289700 + }, + { + "epoch": 1.8581099498770175, + "grad_norm": 0.03260777145624161, + "learning_rate": 1.5252608810777946e-07, + "loss": 0.0004, + "step": 289710 + }, + { + "epoch": 1.8581740867708036, + "grad_norm": 0.08294403553009033, + "learning_rate": 1.5238892849489483e-07, + "loss": 0.001, + "step": 289720 + }, + { + "epoch": 1.8582382236645896, + "grad_norm": 0.0010562815004959702, + "learning_rate": 1.5225182962604112e-07, + "loss": 0.0015, + "step": 289730 + }, + { + "epoch": 1.8583023605583757, + "grad_norm": 0.05841523036360741, + "learning_rate": 1.521147915029375e-07, + "loss": 0.0009, + "step": 289740 + }, + { + "epoch": 1.8583664974521619, + "grad_norm": 0.024304818361997604, + "learning_rate": 1.5197781412729984e-07, + "loss": 0.0022, + "step": 289750 + }, + { + "epoch": 1.858430634345948, + "grad_norm": 0.17089597880840302, + "learning_rate": 1.518408975008445e-07, + "loss": 0.001, + "step": 289760 + }, + { + "epoch": 1.8584947712397342, + "grad_norm": 0.008853795938193798, + "learning_rate": 1.5170404162528796e-07, + "loss": 0.0008, + "step": 289770 + }, + { + "epoch": 1.8585589081335203, + "grad_norm": 0.032498959451913834, + "learning_rate": 1.5156724650234545e-07, + "loss": 0.0006, + "step": 289780 + }, + { + "epoch": 1.8586230450273062, + "grad_norm": 0.029728278517723083, + "learning_rate": 1.5143051213373007e-07, + "loss": 0.0009, + "step": 289790 + }, + { + "epoch": 1.8586871819210924, + "grad_norm": 0.08156143873929977, + "learning_rate": 1.5129383852115486e-07, + "loss": 0.0011, + "step": 289800 + }, + { + "epoch": 1.8587513188148783, + "grad_norm": 0.016715293750166893, + "learning_rate": 1.5115722566633406e-07, + "loss": 0.0009, + "step": 289810 + }, + { + "epoch": 1.8588154557086645, + "grad_norm": 0.05936663597822189, + "learning_rate": 1.510206735709785e-07, + "loss": 0.0019, + "step": 289820 + }, + { + "epoch": 1.8588795926024506, + "grad_norm": 0.02317112125456333, + "learning_rate": 1.5088418223679956e-07, + "loss": 0.0007, + "step": 289830 + }, + { + "epoch": 1.8589437294962368, + "grad_norm": 0.0055950540117919445, + "learning_rate": 1.5074775166550647e-07, + "loss": 0.0012, + "step": 289840 + }, + { + "epoch": 1.859007866390023, + "grad_norm": 0.21792720258235931, + "learning_rate": 1.5061138185881063e-07, + "loss": 0.0015, + "step": 289850 + }, + { + "epoch": 1.859072003283809, + "grad_norm": 0.13058669865131378, + "learning_rate": 1.5047507281842012e-07, + "loss": 0.0011, + "step": 289860 + }, + { + "epoch": 1.8591361401775952, + "grad_norm": 0.012549787759780884, + "learning_rate": 1.5033882454604244e-07, + "loss": 0.0013, + "step": 289870 + }, + { + "epoch": 1.8592002770713811, + "grad_norm": 0.2569255232810974, + "learning_rate": 1.5020263704338513e-07, + "loss": 0.0017, + "step": 289880 + }, + { + "epoch": 1.8592644139651673, + "grad_norm": 0.09576437622308731, + "learning_rate": 1.5006651031215513e-07, + "loss": 0.0011, + "step": 289890 + }, + { + "epoch": 1.8593285508589532, + "grad_norm": 0.058203332126140594, + "learning_rate": 1.4993044435405836e-07, + "loss": 0.001, + "step": 289900 + }, + { + "epoch": 1.8593926877527394, + "grad_norm": 0.1404581069946289, + "learning_rate": 1.497944391707984e-07, + "loss": 0.0009, + "step": 289910 + }, + { + "epoch": 1.8594568246465255, + "grad_norm": 0.002524998504668474, + "learning_rate": 1.4965849476408113e-07, + "loss": 0.0012, + "step": 289920 + }, + { + "epoch": 1.8595209615403117, + "grad_norm": 0.1650613397359848, + "learning_rate": 1.4952261113560963e-07, + "loss": 0.0011, + "step": 289930 + }, + { + "epoch": 1.8595850984340978, + "grad_norm": 0.049357227981090546, + "learning_rate": 1.4938678828708642e-07, + "loss": 0.0005, + "step": 289940 + }, + { + "epoch": 1.859649235327884, + "grad_norm": 0.12375776469707489, + "learning_rate": 1.4925102622021293e-07, + "loss": 0.0021, + "step": 289950 + }, + { + "epoch": 1.8597133722216699, + "grad_norm": 0.11767815053462982, + "learning_rate": 1.4911532493669113e-07, + "loss": 0.0012, + "step": 289960 + }, + { + "epoch": 1.859777509115456, + "grad_norm": 0.14198528230190277, + "learning_rate": 1.489796844382213e-07, + "loss": 0.0021, + "step": 289970 + }, + { + "epoch": 1.8598416460092422, + "grad_norm": 0.02816985361278057, + "learning_rate": 1.4884410472650269e-07, + "loss": 0.0004, + "step": 289980 + }, + { + "epoch": 1.859905782903028, + "grad_norm": 0.1383359581232071, + "learning_rate": 1.4870858580323445e-07, + "loss": 0.0008, + "step": 289990 + }, + { + "epoch": 1.8599699197968143, + "grad_norm": 0.10833131521940231, + "learning_rate": 1.4857312767011521e-07, + "loss": 0.0009, + "step": 290000 + }, + { + "epoch": 1.8600340566906004, + "grad_norm": 0.13313689827919006, + "learning_rate": 1.48437730328842e-07, + "loss": 0.0011, + "step": 290010 + }, + { + "epoch": 1.8600981935843866, + "grad_norm": 0.06083110347390175, + "learning_rate": 1.4830239378111123e-07, + "loss": 0.0007, + "step": 290020 + }, + { + "epoch": 1.8601623304781727, + "grad_norm": 0.0641220286488533, + "learning_rate": 1.4816711802861816e-07, + "loss": 0.0013, + "step": 290030 + }, + { + "epoch": 1.8602264673719588, + "grad_norm": 0.00814772117882967, + "learning_rate": 1.4803190307305982e-07, + "loss": 0.0004, + "step": 290040 + }, + { + "epoch": 1.8602906042657448, + "grad_norm": 0.039491720497608185, + "learning_rate": 1.478967489161287e-07, + "loss": 0.0011, + "step": 290050 + }, + { + "epoch": 1.860354741159531, + "grad_norm": 0.0898330882191658, + "learning_rate": 1.4776165555951904e-07, + "loss": 0.0011, + "step": 290060 + }, + { + "epoch": 1.8604188780533168, + "grad_norm": 0.04195317625999451, + "learning_rate": 1.4762662300492392e-07, + "loss": 0.0006, + "step": 290070 + }, + { + "epoch": 1.860483014947103, + "grad_norm": 0.08919844031333923, + "learning_rate": 1.474916512540353e-07, + "loss": 0.0006, + "step": 290080 + }, + { + "epoch": 1.8605471518408891, + "grad_norm": 0.08594311773777008, + "learning_rate": 1.4735674030854408e-07, + "loss": 0.0013, + "step": 290090 + }, + { + "epoch": 1.8606112887346753, + "grad_norm": 0.020756598562002182, + "learning_rate": 1.472218901701411e-07, + "loss": 0.0015, + "step": 290100 + }, + { + "epoch": 1.8606754256284614, + "grad_norm": 0.04540959745645523, + "learning_rate": 1.4708710084051613e-07, + "loss": 0.0006, + "step": 290110 + }, + { + "epoch": 1.8607395625222476, + "grad_norm": 0.014379561878740788, + "learning_rate": 1.4695237232135785e-07, + "loss": 0.0005, + "step": 290120 + }, + { + "epoch": 1.8608036994160337, + "grad_norm": 0.07023920118808746, + "learning_rate": 1.468177046143554e-07, + "loss": 0.0008, + "step": 290130 + }, + { + "epoch": 1.8608678363098197, + "grad_norm": 0.1338580846786499, + "learning_rate": 1.4668309772119471e-07, + "loss": 0.0012, + "step": 290140 + }, + { + "epoch": 1.8609319732036058, + "grad_norm": 0.08372864127159119, + "learning_rate": 1.4654855164356386e-07, + "loss": 0.0032, + "step": 290150 + }, + { + "epoch": 1.8609961100973917, + "grad_norm": 0.02373339794576168, + "learning_rate": 1.464140663831487e-07, + "loss": 0.001, + "step": 290160 + }, + { + "epoch": 1.861060246991178, + "grad_norm": 0.2806231677532196, + "learning_rate": 1.46279641941634e-07, + "loss": 0.001, + "step": 290170 + }, + { + "epoch": 1.861124383884964, + "grad_norm": 0.04377453774213791, + "learning_rate": 1.4614527832070347e-07, + "loss": 0.0006, + "step": 290180 + }, + { + "epoch": 1.8611885207787502, + "grad_norm": 0.27308493852615356, + "learning_rate": 1.4601097552204235e-07, + "loss": 0.0011, + "step": 290190 + }, + { + "epoch": 1.8612526576725363, + "grad_norm": 0.09190023690462112, + "learning_rate": 1.4587673354733267e-07, + "loss": 0.0014, + "step": 290200 + }, + { + "epoch": 1.8613167945663225, + "grad_norm": 0.1294102668762207, + "learning_rate": 1.4574255239825642e-07, + "loss": 0.0017, + "step": 290210 + }, + { + "epoch": 1.8613809314601084, + "grad_norm": 0.021928202360868454, + "learning_rate": 1.4560843207649557e-07, + "loss": 0.0013, + "step": 290220 + }, + { + "epoch": 1.8614450683538946, + "grad_norm": 0.09823820739984512, + "learning_rate": 1.4547437258373098e-07, + "loss": 0.0009, + "step": 290230 + }, + { + "epoch": 1.8615092052476805, + "grad_norm": 0.08892396092414856, + "learning_rate": 1.4534037392164136e-07, + "loss": 0.0005, + "step": 290240 + }, + { + "epoch": 1.8615733421414666, + "grad_norm": 0.055825598537921906, + "learning_rate": 1.4520643609190588e-07, + "loss": 0.0018, + "step": 290250 + }, + { + "epoch": 1.8616374790352528, + "grad_norm": 0.016030259430408478, + "learning_rate": 1.4507255909620431e-07, + "loss": 0.0008, + "step": 290260 + }, + { + "epoch": 1.861701615929039, + "grad_norm": 0.1098199263215065, + "learning_rate": 1.4493874293621312e-07, + "loss": 0.0022, + "step": 290270 + }, + { + "epoch": 1.861765752822825, + "grad_norm": 0.011548800393939018, + "learning_rate": 1.4480498761360984e-07, + "loss": 0.0009, + "step": 290280 + }, + { + "epoch": 1.8618298897166112, + "grad_norm": 0.16268345713615417, + "learning_rate": 1.4467129313006868e-07, + "loss": 0.0017, + "step": 290290 + }, + { + "epoch": 1.8618940266103974, + "grad_norm": 0.09902417659759521, + "learning_rate": 1.4453765948726717e-07, + "loss": 0.0011, + "step": 290300 + }, + { + "epoch": 1.8619581635041833, + "grad_norm": 0.03196365013718605, + "learning_rate": 1.4440408668687844e-07, + "loss": 0.0012, + "step": 290310 + }, + { + "epoch": 1.8620223003979695, + "grad_norm": 0.11622941493988037, + "learning_rate": 1.4427057473057727e-07, + "loss": 0.0008, + "step": 290320 + }, + { + "epoch": 1.8620864372917554, + "grad_norm": 0.06088544428348541, + "learning_rate": 1.4413712362003562e-07, + "loss": 0.001, + "step": 290330 + }, + { + "epoch": 1.8621505741855415, + "grad_norm": 0.11155184358358383, + "learning_rate": 1.440037333569272e-07, + "loss": 0.001, + "step": 290340 + }, + { + "epoch": 1.8622147110793277, + "grad_norm": 0.03502899408340454, + "learning_rate": 1.4387040394292175e-07, + "loss": 0.001, + "step": 290350 + }, + { + "epoch": 1.8622788479731138, + "grad_norm": 0.006140367593616247, + "learning_rate": 1.4373713537969125e-07, + "loss": 0.001, + "step": 290360 + }, + { + "epoch": 1.8623429848669, + "grad_norm": 0.2765370309352875, + "learning_rate": 1.436039276689044e-07, + "loss": 0.0011, + "step": 290370 + }, + { + "epoch": 1.8624071217606861, + "grad_norm": 0.06028538942337036, + "learning_rate": 1.434707808122321e-07, + "loss": 0.0009, + "step": 290380 + }, + { + "epoch": 1.862471258654472, + "grad_norm": 0.17979082465171814, + "learning_rate": 1.433376948113413e-07, + "loss": 0.0019, + "step": 290390 + }, + { + "epoch": 1.8625353955482582, + "grad_norm": 0.13455578684806824, + "learning_rate": 1.4320466966790015e-07, + "loss": 0.0009, + "step": 290400 + }, + { + "epoch": 1.8625995324420443, + "grad_norm": 0.01863623596727848, + "learning_rate": 1.4307170538357563e-07, + "loss": 0.0014, + "step": 290410 + }, + { + "epoch": 1.8626636693358303, + "grad_norm": 0.10759945958852768, + "learning_rate": 1.4293880196003363e-07, + "loss": 0.0009, + "step": 290420 + }, + { + "epoch": 1.8627278062296164, + "grad_norm": 0.0366380400955677, + "learning_rate": 1.4280595939894005e-07, + "loss": 0.0006, + "step": 290430 + }, + { + "epoch": 1.8627919431234026, + "grad_norm": 0.08533468842506409, + "learning_rate": 1.4267317770195966e-07, + "loss": 0.0018, + "step": 290440 + }, + { + "epoch": 1.8628560800171887, + "grad_norm": 0.04634246230125427, + "learning_rate": 1.4254045687075502e-07, + "loss": 0.0039, + "step": 290450 + }, + { + "epoch": 1.8629202169109749, + "grad_norm": 0.02340097166597843, + "learning_rate": 1.4240779690699035e-07, + "loss": 0.0011, + "step": 290460 + }, + { + "epoch": 1.862984353804761, + "grad_norm": 0.017465023323893547, + "learning_rate": 1.4227519781232767e-07, + "loss": 0.001, + "step": 290470 + }, + { + "epoch": 1.863048490698547, + "grad_norm": 0.06610596925020218, + "learning_rate": 1.4214265958842787e-07, + "loss": 0.0008, + "step": 290480 + }, + { + "epoch": 1.863112627592333, + "grad_norm": 0.0007582298712804914, + "learning_rate": 1.4201018223695295e-07, + "loss": 0.0022, + "step": 290490 + }, + { + "epoch": 1.863176764486119, + "grad_norm": 0.09911002218723297, + "learning_rate": 1.418777657595627e-07, + "loss": 0.0018, + "step": 290500 + }, + { + "epoch": 1.8632409013799052, + "grad_norm": 0.0666009783744812, + "learning_rate": 1.4174541015791633e-07, + "loss": 0.0007, + "step": 290510 + }, + { + "epoch": 1.8633050382736913, + "grad_norm": 0.05044683441519737, + "learning_rate": 1.4161311543367084e-07, + "loss": 0.001, + "step": 290520 + }, + { + "epoch": 1.8633691751674775, + "grad_norm": 0.09418749809265137, + "learning_rate": 1.414808815884866e-07, + "loss": 0.0011, + "step": 290530 + }, + { + "epoch": 1.8634333120612636, + "grad_norm": 0.032017726451158524, + "learning_rate": 1.413487086240184e-07, + "loss": 0.0007, + "step": 290540 + }, + { + "epoch": 1.8634974489550498, + "grad_norm": 0.04044421762228012, + "learning_rate": 1.4121659654192377e-07, + "loss": 0.0011, + "step": 290550 + }, + { + "epoch": 1.863561585848836, + "grad_norm": 0.04872051253914833, + "learning_rate": 1.4108454534385696e-07, + "loss": 0.0011, + "step": 290560 + }, + { + "epoch": 1.8636257227426218, + "grad_norm": 0.12192614376544952, + "learning_rate": 1.4095255503147443e-07, + "loss": 0.0007, + "step": 290570 + }, + { + "epoch": 1.863689859636408, + "grad_norm": 0.09276101738214493, + "learning_rate": 1.4082062560642872e-07, + "loss": 0.0012, + "step": 290580 + }, + { + "epoch": 1.863753996530194, + "grad_norm": 0.0316673219203949, + "learning_rate": 1.4068875707037355e-07, + "loss": 0.0015, + "step": 290590 + }, + { + "epoch": 1.86381813342398, + "grad_norm": 0.04883475974202156, + "learning_rate": 1.4055694942496035e-07, + "loss": 0.0009, + "step": 290600 + }, + { + "epoch": 1.8638822703177662, + "grad_norm": 0.014902455732226372, + "learning_rate": 1.4042520267184224e-07, + "loss": 0.0008, + "step": 290610 + }, + { + "epoch": 1.8639464072115524, + "grad_norm": 0.028056534007191658, + "learning_rate": 1.4029351681266955e-07, + "loss": 0.0024, + "step": 290620 + }, + { + "epoch": 1.8640105441053385, + "grad_norm": 0.046454910188913345, + "learning_rate": 1.4016189184909156e-07, + "loss": 0.0009, + "step": 290630 + }, + { + "epoch": 1.8640746809991247, + "grad_norm": 0.06369485706090927, + "learning_rate": 1.400303277827586e-07, + "loss": 0.0011, + "step": 290640 + }, + { + "epoch": 1.8641388178929106, + "grad_norm": 0.01218261756002903, + "learning_rate": 1.3989882461531933e-07, + "loss": 0.0016, + "step": 290650 + }, + { + "epoch": 1.8642029547866967, + "grad_norm": 0.021232986822724342, + "learning_rate": 1.3976738234842136e-07, + "loss": 0.0008, + "step": 290660 + }, + { + "epoch": 1.8642670916804827, + "grad_norm": 0.08904554694890976, + "learning_rate": 1.396360009837111e-07, + "loss": 0.0007, + "step": 290670 + }, + { + "epoch": 1.8643312285742688, + "grad_norm": 0.038141753524541855, + "learning_rate": 1.3950468052283562e-07, + "loss": 0.0006, + "step": 290680 + }, + { + "epoch": 1.864395365468055, + "grad_norm": 0.04452548548579216, + "learning_rate": 1.3937342096744077e-07, + "loss": 0.0008, + "step": 290690 + }, + { + "epoch": 1.864459502361841, + "grad_norm": 0.03853532299399376, + "learning_rate": 1.3924222231917028e-07, + "loss": 0.0011, + "step": 290700 + }, + { + "epoch": 1.8645236392556273, + "grad_norm": 0.07612650841474533, + "learning_rate": 1.3911108457966837e-07, + "loss": 0.0012, + "step": 290710 + }, + { + "epoch": 1.8645877761494134, + "grad_norm": 0.03762242570519447, + "learning_rate": 1.3898000775057928e-07, + "loss": 0.0016, + "step": 290720 + }, + { + "epoch": 1.8646519130431995, + "grad_norm": 0.017636556178331375, + "learning_rate": 1.388489918335445e-07, + "loss": 0.0006, + "step": 290730 + }, + { + "epoch": 1.8647160499369855, + "grad_norm": 0.050190914422273636, + "learning_rate": 1.3871803683020546e-07, + "loss": 0.0007, + "step": 290740 + }, + { + "epoch": 1.8647801868307716, + "grad_norm": 0.0019875033758580685, + "learning_rate": 1.3858714274220474e-07, + "loss": 0.0004, + "step": 290750 + }, + { + "epoch": 1.8648443237245576, + "grad_norm": 0.15975508093833923, + "learning_rate": 1.3845630957118107e-07, + "loss": 0.0014, + "step": 290760 + }, + { + "epoch": 1.8649084606183437, + "grad_norm": 0.04271375387907028, + "learning_rate": 1.3832553731877474e-07, + "loss": 0.0009, + "step": 290770 + }, + { + "epoch": 1.8649725975121298, + "grad_norm": 0.043441157788038254, + "learning_rate": 1.3819482598662281e-07, + "loss": 0.001, + "step": 290780 + }, + { + "epoch": 1.865036734405916, + "grad_norm": 0.0461297333240509, + "learning_rate": 1.3806417557636564e-07, + "loss": 0.0011, + "step": 290790 + }, + { + "epoch": 1.8651008712997021, + "grad_norm": 0.00915750116109848, + "learning_rate": 1.379335860896386e-07, + "loss": 0.0016, + "step": 290800 + }, + { + "epoch": 1.8651650081934883, + "grad_norm": 0.06905308365821838, + "learning_rate": 1.378030575280792e-07, + "loss": 0.0015, + "step": 290810 + }, + { + "epoch": 1.8652291450872744, + "grad_norm": 0.04653427377343178, + "learning_rate": 1.3767258989332122e-07, + "loss": 0.0007, + "step": 290820 + }, + { + "epoch": 1.8652932819810604, + "grad_norm": 0.11974623054265976, + "learning_rate": 1.3754218318700164e-07, + "loss": 0.0008, + "step": 290830 + }, + { + "epoch": 1.8653574188748465, + "grad_norm": 0.060955844819545746, + "learning_rate": 1.374118374107536e-07, + "loss": 0.0016, + "step": 290840 + }, + { + "epoch": 1.8654215557686324, + "grad_norm": 0.1308441460132599, + "learning_rate": 1.3728155256621078e-07, + "loss": 0.0014, + "step": 290850 + }, + { + "epoch": 1.8654856926624186, + "grad_norm": 0.2624158561229706, + "learning_rate": 1.3715132865500468e-07, + "loss": 0.0039, + "step": 290860 + }, + { + "epoch": 1.8655498295562047, + "grad_norm": 0.06166985630989075, + "learning_rate": 1.3702116567876788e-07, + "loss": 0.0012, + "step": 290870 + }, + { + "epoch": 1.865613966449991, + "grad_norm": 0.06867585331201553, + "learning_rate": 1.3689106363913186e-07, + "loss": 0.0012, + "step": 290880 + }, + { + "epoch": 1.865678103343777, + "grad_norm": 0.232400581240654, + "learning_rate": 1.3676102253772583e-07, + "loss": 0.0016, + "step": 290890 + }, + { + "epoch": 1.8657422402375632, + "grad_norm": 0.0076700723730027676, + "learning_rate": 1.3663104237618075e-07, + "loss": 0.0011, + "step": 290900 + }, + { + "epoch": 1.8658063771313491, + "grad_norm": 0.01567370444536209, + "learning_rate": 1.365011231561242e-07, + "loss": 0.0013, + "step": 290910 + }, + { + "epoch": 1.8658705140251353, + "grad_norm": 0.09104197472333908, + "learning_rate": 1.3637126487918428e-07, + "loss": 0.0009, + "step": 290920 + }, + { + "epoch": 1.8659346509189212, + "grad_norm": 0.03871174529194832, + "learning_rate": 1.3624146754698808e-07, + "loss": 0.0009, + "step": 290930 + }, + { + "epoch": 1.8659987878127073, + "grad_norm": 0.2143871784210205, + "learning_rate": 1.3611173116116316e-07, + "loss": 0.0012, + "step": 290940 + }, + { + "epoch": 1.8660629247064935, + "grad_norm": 0.004860973916947842, + "learning_rate": 1.3598205572333378e-07, + "loss": 0.002, + "step": 290950 + }, + { + "epoch": 1.8661270616002796, + "grad_norm": 0.039312057197093964, + "learning_rate": 1.3585244123512587e-07, + "loss": 0.0013, + "step": 290960 + }, + { + "epoch": 1.8661911984940658, + "grad_norm": 0.040601156651973724, + "learning_rate": 1.357228876981631e-07, + "loss": 0.0006, + "step": 290970 + }, + { + "epoch": 1.866255335387852, + "grad_norm": 0.06531794369220734, + "learning_rate": 1.3559339511406865e-07, + "loss": 0.0008, + "step": 290980 + }, + { + "epoch": 1.866319472281638, + "grad_norm": 0.07218613475561142, + "learning_rate": 1.354639634844662e-07, + "loss": 0.0007, + "step": 290990 + }, + { + "epoch": 1.866383609175424, + "grad_norm": 0.03746568039059639, + "learning_rate": 1.3533459281097673e-07, + "loss": 0.0003, + "step": 291000 + }, + { + "epoch": 1.8664477460692102, + "grad_norm": 0.05084928497672081, + "learning_rate": 1.3520528309522108e-07, + "loss": 0.0006, + "step": 291010 + }, + { + "epoch": 1.866511882962996, + "grad_norm": 0.14735764265060425, + "learning_rate": 1.3507603433882022e-07, + "loss": 0.0015, + "step": 291020 + }, + { + "epoch": 1.8665760198567822, + "grad_norm": 0.03813185170292854, + "learning_rate": 1.3494684654339397e-07, + "loss": 0.0011, + "step": 291030 + }, + { + "epoch": 1.8666401567505684, + "grad_norm": 0.2427544891834259, + "learning_rate": 1.348177197105599e-07, + "loss": 0.0018, + "step": 291040 + }, + { + "epoch": 1.8667042936443545, + "grad_norm": 0.027156217023730278, + "learning_rate": 1.346886538419373e-07, + "loss": 0.0003, + "step": 291050 + }, + { + "epoch": 1.8667684305381407, + "grad_norm": 0.0016352023230865598, + "learning_rate": 1.3455964893914376e-07, + "loss": 0.0005, + "step": 291060 + }, + { + "epoch": 1.8668325674319268, + "grad_norm": 0.3087085783481598, + "learning_rate": 1.3443070500379464e-07, + "loss": 0.0027, + "step": 291070 + }, + { + "epoch": 1.8668967043257128, + "grad_norm": 0.19220387935638428, + "learning_rate": 1.343018220375053e-07, + "loss": 0.0016, + "step": 291080 + }, + { + "epoch": 1.866960841219499, + "grad_norm": 0.06612735986709595, + "learning_rate": 1.3417300004189226e-07, + "loss": 0.0005, + "step": 291090 + }, + { + "epoch": 1.867024978113285, + "grad_norm": 0.019334839656949043, + "learning_rate": 1.340442390185692e-07, + "loss": 0.0011, + "step": 291100 + }, + { + "epoch": 1.867089115007071, + "grad_norm": 0.049330078065395355, + "learning_rate": 1.3391553896914933e-07, + "loss": 0.0007, + "step": 291110 + }, + { + "epoch": 1.8671532519008571, + "grad_norm": 0.05242971330881119, + "learning_rate": 1.3378689989524573e-07, + "loss": 0.0004, + "step": 291120 + }, + { + "epoch": 1.8672173887946433, + "grad_norm": 0.18560273945331573, + "learning_rate": 1.336583217984694e-07, + "loss": 0.0011, + "step": 291130 + }, + { + "epoch": 1.8672815256884294, + "grad_norm": 0.03429925814270973, + "learning_rate": 1.3352980468043285e-07, + "loss": 0.0018, + "step": 291140 + }, + { + "epoch": 1.8673456625822156, + "grad_norm": 0.03484424576163292, + "learning_rate": 1.33401348542746e-07, + "loss": 0.0009, + "step": 291150 + }, + { + "epoch": 1.8674097994760017, + "grad_norm": 0.015022116713225842, + "learning_rate": 1.3327295338701806e-07, + "loss": 0.0011, + "step": 291160 + }, + { + "epoch": 1.8674739363697876, + "grad_norm": 0.1105656549334526, + "learning_rate": 1.3314461921485834e-07, + "loss": 0.0007, + "step": 291170 + }, + { + "epoch": 1.8675380732635738, + "grad_norm": 0.03682788088917732, + "learning_rate": 1.3301634602787494e-07, + "loss": 0.0019, + "step": 291180 + }, + { + "epoch": 1.8676022101573597, + "grad_norm": 0.0016587432473897934, + "learning_rate": 1.3288813382767496e-07, + "loss": 0.001, + "step": 291190 + }, + { + "epoch": 1.8676663470511459, + "grad_norm": 0.0696180984377861, + "learning_rate": 1.3275998261586486e-07, + "loss": 0.0011, + "step": 291200 + }, + { + "epoch": 1.867730483944932, + "grad_norm": 0.019244924187660217, + "learning_rate": 1.3263189239405116e-07, + "loss": 0.0005, + "step": 291210 + }, + { + "epoch": 1.8677946208387182, + "grad_norm": 0.02816018834710121, + "learning_rate": 1.3250386316383813e-07, + "loss": 0.0009, + "step": 291220 + }, + { + "epoch": 1.8678587577325043, + "grad_norm": 0.03528784587979317, + "learning_rate": 1.323758949268311e-07, + "loss": 0.0011, + "step": 291230 + }, + { + "epoch": 1.8679228946262905, + "grad_norm": 0.07470495998859406, + "learning_rate": 1.322479876846322e-07, + "loss": 0.0014, + "step": 291240 + }, + { + "epoch": 1.8679870315200766, + "grad_norm": 0.020319901406764984, + "learning_rate": 1.321201414388451e-07, + "loss": 0.0017, + "step": 291250 + }, + { + "epoch": 1.8680511684138625, + "grad_norm": 0.03774775192141533, + "learning_rate": 1.3199235619107187e-07, + "loss": 0.0008, + "step": 291260 + }, + { + "epoch": 1.8681153053076487, + "grad_norm": 0.05965312942862511, + "learning_rate": 1.3186463194291343e-07, + "loss": 0.0029, + "step": 291270 + }, + { + "epoch": 1.8681794422014346, + "grad_norm": 0.012127332389354706, + "learning_rate": 1.3173696869597018e-07, + "loss": 0.0014, + "step": 291280 + }, + { + "epoch": 1.8682435790952208, + "grad_norm": 0.05256694555282593, + "learning_rate": 1.3160936645184253e-07, + "loss": 0.0012, + "step": 291290 + }, + { + "epoch": 1.868307715989007, + "grad_norm": 0.04609822481870651, + "learning_rate": 1.314818252121286e-07, + "loss": 0.0013, + "step": 291300 + }, + { + "epoch": 1.868371852882793, + "grad_norm": 0.07213814556598663, + "learning_rate": 1.3135434497842658e-07, + "loss": 0.0008, + "step": 291310 + }, + { + "epoch": 1.8684359897765792, + "grad_norm": 0.017824608832597733, + "learning_rate": 1.3122692575233464e-07, + "loss": 0.0013, + "step": 291320 + }, + { + "epoch": 1.8685001266703654, + "grad_norm": 0.12118804454803467, + "learning_rate": 1.3109956753544872e-07, + "loss": 0.0006, + "step": 291330 + }, + { + "epoch": 1.8685642635641513, + "grad_norm": 0.11525218933820724, + "learning_rate": 1.3097227032936534e-07, + "loss": 0.0019, + "step": 291340 + }, + { + "epoch": 1.8686284004579374, + "grad_norm": 0.02752411551773548, + "learning_rate": 1.3084503413567874e-07, + "loss": 0.0009, + "step": 291350 + }, + { + "epoch": 1.8686925373517234, + "grad_norm": 0.007287966553121805, + "learning_rate": 1.3071785895598376e-07, + "loss": 0.0011, + "step": 291360 + }, + { + "epoch": 1.8687566742455095, + "grad_norm": 0.028855368494987488, + "learning_rate": 1.3059074479187472e-07, + "loss": 0.0017, + "step": 291370 + }, + { + "epoch": 1.8688208111392957, + "grad_norm": 0.09461076557636261, + "learning_rate": 1.304636916449431e-07, + "loss": 0.002, + "step": 291380 + }, + { + "epoch": 1.8688849480330818, + "grad_norm": 0.08592013269662857, + "learning_rate": 1.3033669951678153e-07, + "loss": 0.0013, + "step": 291390 + }, + { + "epoch": 1.868949084926868, + "grad_norm": 0.15071649849414825, + "learning_rate": 1.3020976840898203e-07, + "loss": 0.0009, + "step": 291400 + }, + { + "epoch": 1.869013221820654, + "grad_norm": 0.039613980799913406, + "learning_rate": 1.3008289832313448e-07, + "loss": 0.0008, + "step": 291410 + }, + { + "epoch": 1.8690773587144403, + "grad_norm": 0.041813723742961884, + "learning_rate": 1.299560892608287e-07, + "loss": 0.0016, + "step": 291420 + }, + { + "epoch": 1.8691414956082262, + "grad_norm": 0.01769079640507698, + "learning_rate": 1.2982934122365287e-07, + "loss": 0.0004, + "step": 291430 + }, + { + "epoch": 1.8692056325020123, + "grad_norm": 0.14418870210647583, + "learning_rate": 1.2970265421319684e-07, + "loss": 0.0013, + "step": 291440 + }, + { + "epoch": 1.8692697693957983, + "grad_norm": 0.08726982027292252, + "learning_rate": 1.295760282310471e-07, + "loss": 0.0013, + "step": 291450 + }, + { + "epoch": 1.8693339062895844, + "grad_norm": 0.03900137543678284, + "learning_rate": 1.2944946327879014e-07, + "loss": 0.0009, + "step": 291460 + }, + { + "epoch": 1.8693980431833706, + "grad_norm": 0.044279251247644424, + "learning_rate": 1.293229593580131e-07, + "loss": 0.0004, + "step": 291470 + }, + { + "epoch": 1.8694621800771567, + "grad_norm": 0.06156221777200699, + "learning_rate": 1.2919651647030017e-07, + "loss": 0.0007, + "step": 291480 + }, + { + "epoch": 1.8695263169709428, + "grad_norm": 0.01173117384314537, + "learning_rate": 1.2907013461723573e-07, + "loss": 0.001, + "step": 291490 + }, + { + "epoch": 1.869590453864729, + "grad_norm": 0.0110355569049716, + "learning_rate": 1.2894381380040344e-07, + "loss": 0.001, + "step": 291500 + }, + { + "epoch": 1.869654590758515, + "grad_norm": 0.11326061934232712, + "learning_rate": 1.2881755402138706e-07, + "loss": 0.001, + "step": 291510 + }, + { + "epoch": 1.869718727652301, + "grad_norm": 0.04712875187397003, + "learning_rate": 1.2869135528176812e-07, + "loss": 0.0009, + "step": 291520 + }, + { + "epoch": 1.8697828645460872, + "grad_norm": 0.34151312708854675, + "learning_rate": 1.2856521758312756e-07, + "loss": 0.0021, + "step": 291530 + }, + { + "epoch": 1.8698470014398731, + "grad_norm": 0.07753254473209381, + "learning_rate": 1.2843914092704633e-07, + "loss": 0.001, + "step": 291540 + }, + { + "epoch": 1.8699111383336593, + "grad_norm": 0.1782429963350296, + "learning_rate": 1.2831312531510488e-07, + "loss": 0.0013, + "step": 291550 + }, + { + "epoch": 1.8699752752274454, + "grad_norm": 0.08619780838489532, + "learning_rate": 1.2818717074888133e-07, + "loss": 0.0011, + "step": 291560 + }, + { + "epoch": 1.8700394121212316, + "grad_norm": 9.459776878356934, + "learning_rate": 1.2806127722995443e-07, + "loss": 0.0158, + "step": 291570 + }, + { + "epoch": 1.8701035490150177, + "grad_norm": 0.07566432654857635, + "learning_rate": 1.2793544475990127e-07, + "loss": 0.0016, + "step": 291580 + }, + { + "epoch": 1.870167685908804, + "grad_norm": 0.05713646113872528, + "learning_rate": 1.2780967334030002e-07, + "loss": 0.0006, + "step": 291590 + }, + { + "epoch": 1.8702318228025898, + "grad_norm": 0.049163222312927246, + "learning_rate": 1.27683962972725e-07, + "loss": 0.0035, + "step": 291600 + }, + { + "epoch": 1.870295959696376, + "grad_norm": 0.13116882741451263, + "learning_rate": 1.2755831365875159e-07, + "loss": 0.0006, + "step": 291610 + }, + { + "epoch": 1.870360096590162, + "grad_norm": 0.10171689838171005, + "learning_rate": 1.2743272539995576e-07, + "loss": 0.0006, + "step": 291620 + }, + { + "epoch": 1.870424233483948, + "grad_norm": 0.07907585054636002, + "learning_rate": 1.2730719819790959e-07, + "loss": 0.0023, + "step": 291630 + }, + { + "epoch": 1.8704883703777342, + "grad_norm": 0.07575205713510513, + "learning_rate": 1.2718173205418739e-07, + "loss": 0.0009, + "step": 291640 + }, + { + "epoch": 1.8705525072715203, + "grad_norm": 0.02001216448843479, + "learning_rate": 1.2705632697035953e-07, + "loss": 0.0009, + "step": 291650 + }, + { + "epoch": 1.8706166441653065, + "grad_norm": 0.02850428968667984, + "learning_rate": 1.2693098294799922e-07, + "loss": 0.0011, + "step": 291660 + }, + { + "epoch": 1.8706807810590926, + "grad_norm": 0.04260598495602608, + "learning_rate": 1.2680569998867686e-07, + "loss": 0.0019, + "step": 291670 + }, + { + "epoch": 1.8707449179528788, + "grad_norm": 0.033002011477947235, + "learning_rate": 1.266804780939612e-07, + "loss": 0.0006, + "step": 291680 + }, + { + "epoch": 1.8708090548466647, + "grad_norm": 0.022915298119187355, + "learning_rate": 1.2655531726542214e-07, + "loss": 0.0003, + "step": 291690 + }, + { + "epoch": 1.8708731917404509, + "grad_norm": 0.2629983425140381, + "learning_rate": 1.2643021750462837e-07, + "loss": 0.0016, + "step": 291700 + }, + { + "epoch": 1.8709373286342368, + "grad_norm": 0.11415312439203262, + "learning_rate": 1.2630517881314697e-07, + "loss": 0.0017, + "step": 291710 + }, + { + "epoch": 1.871001465528023, + "grad_norm": 0.057738933712244034, + "learning_rate": 1.2618020119254448e-07, + "loss": 0.0008, + "step": 291720 + }, + { + "epoch": 1.871065602421809, + "grad_norm": 0.00794194731861353, + "learning_rate": 1.2605528464438743e-07, + "loss": 0.001, + "step": 291730 + }, + { + "epoch": 1.8711297393155952, + "grad_norm": 0.09125705808401108, + "learning_rate": 1.2593042917024122e-07, + "loss": 0.001, + "step": 291740 + }, + { + "epoch": 1.8711938762093814, + "grad_norm": 0.06937754154205322, + "learning_rate": 1.2580563477167018e-07, + "loss": 0.0019, + "step": 291750 + }, + { + "epoch": 1.8712580131031675, + "grad_norm": 0.01868765987455845, + "learning_rate": 1.256809014502375e-07, + "loss": 0.0014, + "step": 291760 + }, + { + "epoch": 1.8713221499969535, + "grad_norm": 0.0665220245718956, + "learning_rate": 1.2555622920750743e-07, + "loss": 0.0009, + "step": 291770 + }, + { + "epoch": 1.8713862868907396, + "grad_norm": 0.13614019751548767, + "learning_rate": 1.2543161804504156e-07, + "loss": 0.0016, + "step": 291780 + }, + { + "epoch": 1.8714504237845255, + "grad_norm": 0.05044696480035782, + "learning_rate": 1.2530706796440085e-07, + "loss": 0.0007, + "step": 291790 + }, + { + "epoch": 1.8715145606783117, + "grad_norm": 0.10428164899349213, + "learning_rate": 1.2518257896714626e-07, + "loss": 0.001, + "step": 291800 + }, + { + "epoch": 1.8715786975720978, + "grad_norm": 0.06369077414274216, + "learning_rate": 1.2505815105483876e-07, + "loss": 0.0009, + "step": 291810 + }, + { + "epoch": 1.871642834465884, + "grad_norm": 0.3190913200378418, + "learning_rate": 1.2493378422903658e-07, + "loss": 0.0013, + "step": 291820 + }, + { + "epoch": 1.8717069713596701, + "grad_norm": 0.01415115687996149, + "learning_rate": 1.2480947849129788e-07, + "loss": 0.0015, + "step": 291830 + }, + { + "epoch": 1.8717711082534563, + "grad_norm": 0.09167692065238953, + "learning_rate": 1.2468523384318033e-07, + "loss": 0.0008, + "step": 291840 + }, + { + "epoch": 1.8718352451472424, + "grad_norm": 0.001591764623299241, + "learning_rate": 1.24561050286241e-07, + "loss": 0.0006, + "step": 291850 + }, + { + "epoch": 1.8718993820410283, + "grad_norm": 0.034697454422712326, + "learning_rate": 1.2443692782203697e-07, + "loss": 0.002, + "step": 291860 + }, + { + "epoch": 1.8719635189348145, + "grad_norm": 0.015665624290704727, + "learning_rate": 1.243128664521226e-07, + "loss": 0.0007, + "step": 291870 + }, + { + "epoch": 1.8720276558286004, + "grad_norm": 0.08221752196550369, + "learning_rate": 1.2418886617805214e-07, + "loss": 0.0015, + "step": 291880 + }, + { + "epoch": 1.8720917927223866, + "grad_norm": 0.2477082461118698, + "learning_rate": 1.240649270013805e-07, + "loss": 0.0016, + "step": 291890 + }, + { + "epoch": 1.8721559296161727, + "grad_norm": 0.09488316625356674, + "learning_rate": 1.2394104892365976e-07, + "loss": 0.0027, + "step": 291900 + }, + { + "epoch": 1.8722200665099589, + "grad_norm": 0.02428659237921238, + "learning_rate": 1.2381723194644258e-07, + "loss": 0.0011, + "step": 291910 + }, + { + "epoch": 1.872284203403745, + "grad_norm": 0.06166722625494003, + "learning_rate": 1.2369347607127991e-07, + "loss": 0.0009, + "step": 291920 + }, + { + "epoch": 1.8723483402975312, + "grad_norm": 0.005926318001002073, + "learning_rate": 1.2356978129972387e-07, + "loss": 0.001, + "step": 291930 + }, + { + "epoch": 1.872412477191317, + "grad_norm": 0.09185931086540222, + "learning_rate": 1.2344614763332374e-07, + "loss": 0.0015, + "step": 291940 + }, + { + "epoch": 1.8724766140851032, + "grad_norm": 0.059110403060913086, + "learning_rate": 1.2332257507362778e-07, + "loss": 0.0015, + "step": 291950 + }, + { + "epoch": 1.8725407509788894, + "grad_norm": 0.06216234341263771, + "learning_rate": 1.231990636221858e-07, + "loss": 0.0025, + "step": 291960 + }, + { + "epoch": 1.8726048878726753, + "grad_norm": 0.0533917061984539, + "learning_rate": 1.2307561328054441e-07, + "loss": 0.001, + "step": 291970 + }, + { + "epoch": 1.8726690247664615, + "grad_norm": 0.11403600871562958, + "learning_rate": 1.2295222405025176e-07, + "loss": 0.001, + "step": 291980 + }, + { + "epoch": 1.8727331616602476, + "grad_norm": 0.05329390987753868, + "learning_rate": 1.228288959328533e-07, + "loss": 0.001, + "step": 291990 + }, + { + "epoch": 1.8727972985540338, + "grad_norm": 0.008496430702507496, + "learning_rate": 1.227056289298939e-07, + "loss": 0.0015, + "step": 292000 + }, + { + "epoch": 1.87286143544782, + "grad_norm": 0.10404878854751587, + "learning_rate": 1.22582423042919e-07, + "loss": 0.0013, + "step": 292010 + }, + { + "epoch": 1.872925572341606, + "grad_norm": 0.018436910584568977, + "learning_rate": 1.2245927827347237e-07, + "loss": 0.0011, + "step": 292020 + }, + { + "epoch": 1.872989709235392, + "grad_norm": 0.014300175942480564, + "learning_rate": 1.223361946230961e-07, + "loss": 0.001, + "step": 292030 + }, + { + "epoch": 1.8730538461291781, + "grad_norm": 0.09425853192806244, + "learning_rate": 1.2221317209333395e-07, + "loss": 0.0006, + "step": 292040 + }, + { + "epoch": 1.873117983022964, + "grad_norm": 0.016019126400351524, + "learning_rate": 1.220902106857269e-07, + "loss": 0.0005, + "step": 292050 + }, + { + "epoch": 1.8731821199167502, + "grad_norm": 0.07877396792173386, + "learning_rate": 1.2196731040181598e-07, + "loss": 0.0012, + "step": 292060 + }, + { + "epoch": 1.8732462568105364, + "grad_norm": 0.2555936872959137, + "learning_rate": 1.218444712431399e-07, + "loss": 0.0014, + "step": 292070 + }, + { + "epoch": 1.8733103937043225, + "grad_norm": 0.07026442885398865, + "learning_rate": 1.2172169321123973e-07, + "loss": 0.0007, + "step": 292080 + }, + { + "epoch": 1.8733745305981087, + "grad_norm": 0.01781727746129036, + "learning_rate": 1.215989763076536e-07, + "loss": 0.0003, + "step": 292090 + }, + { + "epoch": 1.8734386674918948, + "grad_norm": 0.03243421018123627, + "learning_rate": 1.214763205339181e-07, + "loss": 0.0013, + "step": 292100 + }, + { + "epoch": 1.873502804385681, + "grad_norm": 0.19241560995578766, + "learning_rate": 1.2135372589157092e-07, + "loss": 0.0015, + "step": 292110 + }, + { + "epoch": 1.8735669412794669, + "grad_norm": 0.03661670535802841, + "learning_rate": 1.2123119238214854e-07, + "loss": 0.0029, + "step": 292120 + }, + { + "epoch": 1.873631078173253, + "grad_norm": 0.024726325646042824, + "learning_rate": 1.2110872000718643e-07, + "loss": 0.0006, + "step": 292130 + }, + { + "epoch": 1.873695215067039, + "grad_norm": 0.04537238925695419, + "learning_rate": 1.2098630876821894e-07, + "loss": 0.0023, + "step": 292140 + }, + { + "epoch": 1.873759351960825, + "grad_norm": 0.14653237164020538, + "learning_rate": 1.2086395866677925e-07, + "loss": 0.0006, + "step": 292150 + }, + { + "epoch": 1.8738234888546113, + "grad_norm": 0.03436683490872383, + "learning_rate": 1.207416697044017e-07, + "loss": 0.002, + "step": 292160 + }, + { + "epoch": 1.8738876257483974, + "grad_norm": 0.11190580576658249, + "learning_rate": 1.2061944188261787e-07, + "loss": 0.0009, + "step": 292170 + }, + { + "epoch": 1.8739517626421835, + "grad_norm": 0.013377520255744457, + "learning_rate": 1.2049727520295983e-07, + "loss": 0.0005, + "step": 292180 + }, + { + "epoch": 1.8740158995359697, + "grad_norm": 0.0461856946349144, + "learning_rate": 1.203751696669586e-07, + "loss": 0.0012, + "step": 292190 + }, + { + "epoch": 1.8740800364297556, + "grad_norm": 0.09250977635383606, + "learning_rate": 1.2025312527614353e-07, + "loss": 0.0007, + "step": 292200 + }, + { + "epoch": 1.8741441733235418, + "grad_norm": 0.04103543981909752, + "learning_rate": 1.2013114203204447e-07, + "loss": 0.0005, + "step": 292210 + }, + { + "epoch": 1.8742083102173277, + "grad_norm": 0.003423841670155525, + "learning_rate": 1.2000921993618962e-07, + "loss": 0.0014, + "step": 292220 + }, + { + "epoch": 1.8742724471111138, + "grad_norm": 0.0074290018528699875, + "learning_rate": 1.198873589901073e-07, + "loss": 0.0031, + "step": 292230 + }, + { + "epoch": 1.8743365840049, + "grad_norm": 0.016086000949144363, + "learning_rate": 1.1976555919532395e-07, + "loss": 0.001, + "step": 292240 + }, + { + "epoch": 1.8744007208986861, + "grad_norm": 0.037323419004678726, + "learning_rate": 1.1964382055336566e-07, + "loss": 0.0012, + "step": 292250 + }, + { + "epoch": 1.8744648577924723, + "grad_norm": 0.047648780047893524, + "learning_rate": 1.195221430657584e-07, + "loss": 0.0012, + "step": 292260 + }, + { + "epoch": 1.8745289946862584, + "grad_norm": 0.06828763335943222, + "learning_rate": 1.1940052673402703e-07, + "loss": 0.0016, + "step": 292270 + }, + { + "epoch": 1.8745931315800446, + "grad_norm": 0.019493915140628815, + "learning_rate": 1.192789715596948e-07, + "loss": 0.0007, + "step": 292280 + }, + { + "epoch": 1.8746572684738305, + "grad_norm": 0.10813542455434799, + "learning_rate": 1.1915747754428553e-07, + "loss": 0.0006, + "step": 292290 + }, + { + "epoch": 1.8747214053676167, + "grad_norm": 0.15127013623714447, + "learning_rate": 1.1903604468932128e-07, + "loss": 0.0081, + "step": 292300 + }, + { + "epoch": 1.8747855422614026, + "grad_norm": 0.23425142467021942, + "learning_rate": 1.1891467299632365e-07, + "loss": 0.0044, + "step": 292310 + }, + { + "epoch": 1.8748496791551887, + "grad_norm": 0.0018174074357375503, + "learning_rate": 1.1879336246681361e-07, + "loss": 0.0017, + "step": 292320 + }, + { + "epoch": 1.874913816048975, + "grad_norm": 0.06480545550584793, + "learning_rate": 1.1867211310231108e-07, + "loss": 0.0006, + "step": 292330 + }, + { + "epoch": 1.874977952942761, + "grad_norm": 0.13981083035469055, + "learning_rate": 1.1855092490433595e-07, + "loss": 0.0009, + "step": 292340 + }, + { + "epoch": 1.8750420898365472, + "grad_norm": 0.14205895364284515, + "learning_rate": 1.1842979787440645e-07, + "loss": 0.001, + "step": 292350 + }, + { + "epoch": 1.8751062267303333, + "grad_norm": 0.023094238713383675, + "learning_rate": 1.183087320140408e-07, + "loss": 0.0009, + "step": 292360 + }, + { + "epoch": 1.8751703636241195, + "grad_norm": 0.11857487261295319, + "learning_rate": 1.1818772732475447e-07, + "loss": 0.0016, + "step": 292370 + }, + { + "epoch": 1.8752345005179054, + "grad_norm": 0.0800149217247963, + "learning_rate": 1.1806678380806569e-07, + "loss": 0.0011, + "step": 292380 + }, + { + "epoch": 1.8752986374116916, + "grad_norm": 0.14757139980793, + "learning_rate": 1.1794590146548934e-07, + "loss": 0.0024, + "step": 292390 + }, + { + "epoch": 1.8753627743054775, + "grad_norm": 0.13186165690422058, + "learning_rate": 1.1782508029853979e-07, + "loss": 0.0009, + "step": 292400 + }, + { + "epoch": 1.8754269111992636, + "grad_norm": 0.010632836259901524, + "learning_rate": 1.177043203087308e-07, + "loss": 0.0006, + "step": 292410 + }, + { + "epoch": 1.8754910480930498, + "grad_norm": 0.07655154913663864, + "learning_rate": 1.1758362149757674e-07, + "loss": 0.0013, + "step": 292420 + }, + { + "epoch": 1.875555184986836, + "grad_norm": 0.060383353382349014, + "learning_rate": 1.1746298386658917e-07, + "loss": 0.0007, + "step": 292430 + }, + { + "epoch": 1.875619321880622, + "grad_norm": 0.3088397681713104, + "learning_rate": 1.173424074172791e-07, + "loss": 0.0011, + "step": 292440 + }, + { + "epoch": 1.8756834587744082, + "grad_norm": 0.09891100227832794, + "learning_rate": 1.172218921511592e-07, + "loss": 0.0023, + "step": 292450 + }, + { + "epoch": 1.8757475956681942, + "grad_norm": 0.07089928537607193, + "learning_rate": 1.1710143806973829e-07, + "loss": 0.0023, + "step": 292460 + }, + { + "epoch": 1.8758117325619803, + "grad_norm": 0.07698842138051987, + "learning_rate": 1.1698104517452624e-07, + "loss": 0.001, + "step": 292470 + }, + { + "epoch": 1.8758758694557662, + "grad_norm": 0.1335075944662094, + "learning_rate": 1.1686071346703076e-07, + "loss": 0.0014, + "step": 292480 + }, + { + "epoch": 1.8759400063495524, + "grad_norm": 0.07194697111845016, + "learning_rate": 1.167404429487612e-07, + "loss": 0.0018, + "step": 292490 + }, + { + "epoch": 1.8760041432433385, + "grad_norm": 0.030050817877054214, + "learning_rate": 1.166202336212241e-07, + "loss": 0.0008, + "step": 292500 + }, + { + "epoch": 1.8760682801371247, + "grad_norm": 0.052116841077804565, + "learning_rate": 1.1650008548592551e-07, + "loss": 0.0013, + "step": 292510 + }, + { + "epoch": 1.8761324170309108, + "grad_norm": 0.015215110033750534, + "learning_rate": 1.1637999854437032e-07, + "loss": 0.0006, + "step": 292520 + }, + { + "epoch": 1.876196553924697, + "grad_norm": 0.014021230861544609, + "learning_rate": 1.1625997279806456e-07, + "loss": 0.0014, + "step": 292530 + }, + { + "epoch": 1.8762606908184831, + "grad_norm": 0.048613812774419785, + "learning_rate": 1.1614000824851201e-07, + "loss": 0.0014, + "step": 292540 + }, + { + "epoch": 1.876324827712269, + "grad_norm": 0.06975982338190079, + "learning_rate": 1.1602010489721538e-07, + "loss": 0.0019, + "step": 292550 + }, + { + "epoch": 1.8763889646060552, + "grad_norm": 0.021208981052041054, + "learning_rate": 1.1590026274567678e-07, + "loss": 0.001, + "step": 292560 + }, + { + "epoch": 1.8764531014998411, + "grad_norm": 0.06615134328603745, + "learning_rate": 1.1578048179539891e-07, + "loss": 0.0011, + "step": 292570 + }, + { + "epoch": 1.8765172383936273, + "grad_norm": 0.11063316464424133, + "learning_rate": 1.1566076204788223e-07, + "loss": 0.0007, + "step": 292580 + }, + { + "epoch": 1.8765813752874134, + "grad_norm": 0.06497619301080704, + "learning_rate": 1.1554110350462722e-07, + "loss": 0.0016, + "step": 292590 + }, + { + "epoch": 1.8766455121811996, + "grad_norm": 0.023190032690763474, + "learning_rate": 1.1542150616713266e-07, + "loss": 0.0011, + "step": 292600 + }, + { + "epoch": 1.8767096490749857, + "grad_norm": 0.09450613707304001, + "learning_rate": 1.1530197003689736e-07, + "loss": 0.0009, + "step": 292610 + }, + { + "epoch": 1.8767737859687719, + "grad_norm": 0.0028610366862267256, + "learning_rate": 1.1518249511542012e-07, + "loss": 0.0015, + "step": 292620 + }, + { + "epoch": 1.8768379228625578, + "grad_norm": 0.08985843509435654, + "learning_rate": 1.1506308140419752e-07, + "loss": 0.0012, + "step": 292630 + }, + { + "epoch": 1.876902059756344, + "grad_norm": 0.429986834526062, + "learning_rate": 1.1494372890472506e-07, + "loss": 0.0013, + "step": 292640 + }, + { + "epoch": 1.87696619665013, + "grad_norm": 0.061802733689546585, + "learning_rate": 1.1482443761849926e-07, + "loss": 0.0006, + "step": 292650 + }, + { + "epoch": 1.877030333543916, + "grad_norm": 0.10510675609111786, + "learning_rate": 1.1470520754701453e-07, + "loss": 0.0016, + "step": 292660 + }, + { + "epoch": 1.8770944704377022, + "grad_norm": 0.006245863623917103, + "learning_rate": 1.1458603869176521e-07, + "loss": 0.0005, + "step": 292670 + }, + { + "epoch": 1.8771586073314883, + "grad_norm": 0.030436299741268158, + "learning_rate": 1.14466931054244e-07, + "loss": 0.0007, + "step": 292680 + }, + { + "epoch": 1.8772227442252745, + "grad_norm": 0.040905144065618515, + "learning_rate": 1.1434788463594415e-07, + "loss": 0.001, + "step": 292690 + }, + { + "epoch": 1.8772868811190606, + "grad_norm": 0.013966300524771214, + "learning_rate": 1.1422889943835725e-07, + "loss": 0.0007, + "step": 292700 + }, + { + "epoch": 1.8773510180128468, + "grad_norm": 0.11456148326396942, + "learning_rate": 1.141099754629732e-07, + "loss": 0.0007, + "step": 292710 + }, + { + "epoch": 1.8774151549066327, + "grad_norm": 0.020847106352448463, + "learning_rate": 1.139911127112836e-07, + "loss": 0.0006, + "step": 292720 + }, + { + "epoch": 1.8774792918004188, + "grad_norm": 0.1931428462266922, + "learning_rate": 1.1387231118477782e-07, + "loss": 0.0013, + "step": 292730 + }, + { + "epoch": 1.8775434286942048, + "grad_norm": 0.08069287240505219, + "learning_rate": 1.1375357088494354e-07, + "loss": 0.0012, + "step": 292740 + }, + { + "epoch": 1.877607565587991, + "grad_norm": 0.021612413227558136, + "learning_rate": 1.1363489181326848e-07, + "loss": 0.0006, + "step": 292750 + }, + { + "epoch": 1.877671702481777, + "grad_norm": 0.016337022185325623, + "learning_rate": 1.1351627397124143e-07, + "loss": 0.001, + "step": 292760 + }, + { + "epoch": 1.8777358393755632, + "grad_norm": 0.2647327780723572, + "learning_rate": 1.1339771736034788e-07, + "loss": 0.0013, + "step": 292770 + }, + { + "epoch": 1.8777999762693494, + "grad_norm": 0.046598881483078, + "learning_rate": 1.1327922198207275e-07, + "loss": 0.0015, + "step": 292780 + }, + { + "epoch": 1.8778641131631355, + "grad_norm": 0.05295758694410324, + "learning_rate": 1.1316078783790152e-07, + "loss": 0.0008, + "step": 292790 + }, + { + "epoch": 1.8779282500569217, + "grad_norm": 0.05909512937068939, + "learning_rate": 1.1304241492931855e-07, + "loss": 0.0006, + "step": 292800 + }, + { + "epoch": 1.8779923869507076, + "grad_norm": 0.06993626803159714, + "learning_rate": 1.1292410325780656e-07, + "loss": 0.0009, + "step": 292810 + }, + { + "epoch": 1.8780565238444937, + "grad_norm": 0.03092687390744686, + "learning_rate": 1.1280585282484824e-07, + "loss": 0.0021, + "step": 292820 + }, + { + "epoch": 1.8781206607382797, + "grad_norm": 0.20055820047855377, + "learning_rate": 1.1268766363192518e-07, + "loss": 0.0016, + "step": 292830 + }, + { + "epoch": 1.8781847976320658, + "grad_norm": 0.019928058609366417, + "learning_rate": 1.1256953568051898e-07, + "loss": 0.0007, + "step": 292840 + }, + { + "epoch": 1.878248934525852, + "grad_norm": 0.05944271385669708, + "learning_rate": 1.1245146897210957e-07, + "loss": 0.0011, + "step": 292850 + }, + { + "epoch": 1.878313071419638, + "grad_norm": 0.002654408337548375, + "learning_rate": 1.1233346350817575e-07, + "loss": 0.0015, + "step": 292860 + }, + { + "epoch": 1.8783772083134243, + "grad_norm": 0.04330388456583023, + "learning_rate": 1.1221551929019748e-07, + "loss": 0.0004, + "step": 292870 + }, + { + "epoch": 1.8784413452072104, + "grad_norm": 0.02954934909939766, + "learning_rate": 1.1209763631965187e-07, + "loss": 0.0015, + "step": 292880 + }, + { + "epoch": 1.8785054821009963, + "grad_norm": 0.007893679663538933, + "learning_rate": 1.119798145980161e-07, + "loss": 0.001, + "step": 292890 + }, + { + "epoch": 1.8785696189947825, + "grad_norm": 0.014690901152789593, + "learning_rate": 1.118620541267662e-07, + "loss": 0.0013, + "step": 292900 + }, + { + "epoch": 1.8786337558885684, + "grad_norm": 0.061036258935928345, + "learning_rate": 1.1174435490737879e-07, + "loss": 0.0013, + "step": 292910 + }, + { + "epoch": 1.8786978927823546, + "grad_norm": 0.049692727625370026, + "learning_rate": 1.116267169413282e-07, + "loss": 0.0016, + "step": 292920 + }, + { + "epoch": 1.8787620296761407, + "grad_norm": 0.046990640461444855, + "learning_rate": 1.115091402300883e-07, + "loss": 0.0004, + "step": 292930 + }, + { + "epoch": 1.8788261665699268, + "grad_norm": 0.13081590831279755, + "learning_rate": 1.1139162477513233e-07, + "loss": 0.0012, + "step": 292940 + }, + { + "epoch": 1.878890303463713, + "grad_norm": 0.039909522980451584, + "learning_rate": 1.112741705779341e-07, + "loss": 0.001, + "step": 292950 + }, + { + "epoch": 1.8789544403574991, + "grad_norm": 0.07774998992681503, + "learning_rate": 1.1115677763996357e-07, + "loss": 0.0013, + "step": 292960 + }, + { + "epoch": 1.8790185772512853, + "grad_norm": 0.04683222994208336, + "learning_rate": 1.1103944596269345e-07, + "loss": 0.0007, + "step": 292970 + }, + { + "epoch": 1.8790827141450712, + "grad_norm": 0.11634768545627594, + "learning_rate": 1.10922175547592e-07, + "loss": 0.0007, + "step": 292980 + }, + { + "epoch": 1.8791468510388574, + "grad_norm": 0.04241019859910011, + "learning_rate": 1.1080496639613081e-07, + "loss": 0.0005, + "step": 292990 + }, + { + "epoch": 1.8792109879326433, + "grad_norm": 0.02268884889781475, + "learning_rate": 1.1068781850977706e-07, + "loss": 0.0008, + "step": 293000 + }, + { + "epoch": 1.8792751248264294, + "grad_norm": 0.14664621651172638, + "learning_rate": 1.1057073188999956e-07, + "loss": 0.0011, + "step": 293010 + }, + { + "epoch": 1.8793392617202156, + "grad_norm": 0.031727563589811325, + "learning_rate": 1.1045370653826492e-07, + "loss": 0.0004, + "step": 293020 + }, + { + "epoch": 1.8794033986140017, + "grad_norm": 0.20842571556568146, + "learning_rate": 1.1033674245604031e-07, + "loss": 0.0017, + "step": 293030 + }, + { + "epoch": 1.879467535507788, + "grad_norm": 0.03360797464847565, + "learning_rate": 1.1021983964479122e-07, + "loss": 0.0013, + "step": 293040 + }, + { + "epoch": 1.879531672401574, + "grad_norm": 0.09648806601762772, + "learning_rate": 1.101029981059809e-07, + "loss": 0.001, + "step": 293050 + }, + { + "epoch": 1.87959580929536, + "grad_norm": 0.05264410376548767, + "learning_rate": 1.09986217841076e-07, + "loss": 0.0003, + "step": 293060 + }, + { + "epoch": 1.8796599461891461, + "grad_norm": 0.11550859361886978, + "learning_rate": 1.0986949885153809e-07, + "loss": 0.0004, + "step": 293070 + }, + { + "epoch": 1.8797240830829323, + "grad_norm": 0.03810092434287071, + "learning_rate": 1.09752841138831e-07, + "loss": 0.0013, + "step": 293080 + }, + { + "epoch": 1.8797882199767182, + "grad_norm": 0.11439960449934006, + "learning_rate": 1.0963624470441469e-07, + "loss": 0.0025, + "step": 293090 + }, + { + "epoch": 1.8798523568705043, + "grad_norm": 0.05327378585934639, + "learning_rate": 1.0951970954975188e-07, + "loss": 0.001, + "step": 293100 + }, + { + "epoch": 1.8799164937642905, + "grad_norm": 0.08586519211530685, + "learning_rate": 1.0940323567630251e-07, + "loss": 0.0008, + "step": 293110 + }, + { + "epoch": 1.8799806306580766, + "grad_norm": 0.05707564949989319, + "learning_rate": 1.092868230855254e-07, + "loss": 0.0012, + "step": 293120 + }, + { + "epoch": 1.8800447675518628, + "grad_norm": 0.2404910773038864, + "learning_rate": 1.0917047177887997e-07, + "loss": 0.0026, + "step": 293130 + }, + { + "epoch": 1.880108904445649, + "grad_norm": 0.026872379705309868, + "learning_rate": 1.090541817578239e-07, + "loss": 0.0009, + "step": 293140 + }, + { + "epoch": 1.8801730413394349, + "grad_norm": 0.037973977625370026, + "learning_rate": 1.0893795302381438e-07, + "loss": 0.0011, + "step": 293150 + }, + { + "epoch": 1.880237178233221, + "grad_norm": 0.09424316138029099, + "learning_rate": 1.0882178557830802e-07, + "loss": 0.0017, + "step": 293160 + }, + { + "epoch": 1.880301315127007, + "grad_norm": 0.08930182456970215, + "learning_rate": 1.0870567942276033e-07, + "loss": 0.0016, + "step": 293170 + }, + { + "epoch": 1.880365452020793, + "grad_norm": 0.2537005543708801, + "learning_rate": 1.0858963455862625e-07, + "loss": 0.0017, + "step": 293180 + }, + { + "epoch": 1.8804295889145792, + "grad_norm": 0.07175569981336594, + "learning_rate": 1.0847365098736018e-07, + "loss": 0.0011, + "step": 293190 + }, + { + "epoch": 1.8804937258083654, + "grad_norm": 0.06721962243318558, + "learning_rate": 1.0835772871041428e-07, + "loss": 0.0015, + "step": 293200 + }, + { + "epoch": 1.8805578627021515, + "grad_norm": 0.06867531687021255, + "learning_rate": 1.0824186772924295e-07, + "loss": 0.0009, + "step": 293210 + }, + { + "epoch": 1.8806219995959377, + "grad_norm": 0.01514175534248352, + "learning_rate": 1.0812606804529668e-07, + "loss": 0.0007, + "step": 293220 + }, + { + "epoch": 1.8806861364897238, + "grad_norm": 0.0059837414883077145, + "learning_rate": 1.0801032966002712e-07, + "loss": 0.0017, + "step": 293230 + }, + { + "epoch": 1.8807502733835098, + "grad_norm": 0.25319117307662964, + "learning_rate": 1.0789465257488418e-07, + "loss": 0.0017, + "step": 293240 + }, + { + "epoch": 1.880814410277296, + "grad_norm": 0.048505526036024094, + "learning_rate": 1.0777903679131785e-07, + "loss": 0.0004, + "step": 293250 + }, + { + "epoch": 1.8808785471710818, + "grad_norm": 0.002642439678311348, + "learning_rate": 1.0766348231077639e-07, + "loss": 0.0004, + "step": 293260 + }, + { + "epoch": 1.880942684064868, + "grad_norm": 0.003176505444571376, + "learning_rate": 1.075479891347081e-07, + "loss": 0.0004, + "step": 293270 + }, + { + "epoch": 1.8810068209586541, + "grad_norm": 0.0015448590274900198, + "learning_rate": 1.0743255726455959e-07, + "loss": 0.001, + "step": 293280 + }, + { + "epoch": 1.8810709578524403, + "grad_norm": 0.024752216413617134, + "learning_rate": 1.073171867017786e-07, + "loss": 0.0006, + "step": 293290 + }, + { + "epoch": 1.8811350947462264, + "grad_norm": 0.38314008712768555, + "learning_rate": 1.0720187744781008e-07, + "loss": 0.002, + "step": 293300 + }, + { + "epoch": 1.8811992316400126, + "grad_norm": 0.02777690440416336, + "learning_rate": 1.0708662950409787e-07, + "loss": 0.0005, + "step": 293310 + }, + { + "epoch": 1.8812633685337985, + "grad_norm": 0.13420148193836212, + "learning_rate": 1.0697144287208805e-07, + "loss": 0.0011, + "step": 293320 + }, + { + "epoch": 1.8813275054275846, + "grad_norm": 0.09384496510028839, + "learning_rate": 1.0685631755322279e-07, + "loss": 0.0008, + "step": 293330 + }, + { + "epoch": 1.8813916423213706, + "grad_norm": 0.20899826288223267, + "learning_rate": 1.0674125354894483e-07, + "loss": 0.0035, + "step": 293340 + }, + { + "epoch": 1.8814557792151567, + "grad_norm": 0.04801175370812416, + "learning_rate": 1.0662625086069579e-07, + "loss": 0.0006, + "step": 293350 + }, + { + "epoch": 1.8815199161089429, + "grad_norm": 0.003630199935287237, + "learning_rate": 1.0651130948991728e-07, + "loss": 0.0009, + "step": 293360 + }, + { + "epoch": 1.881584053002729, + "grad_norm": 0.09173569083213806, + "learning_rate": 1.0639642943804984e-07, + "loss": 0.0024, + "step": 293370 + }, + { + "epoch": 1.8816481898965152, + "grad_norm": 0.07374726980924606, + "learning_rate": 1.0628161070653175e-07, + "loss": 0.0005, + "step": 293380 + }, + { + "epoch": 1.8817123267903013, + "grad_norm": 0.05393248423933983, + "learning_rate": 1.0616685329680354e-07, + "loss": 0.0016, + "step": 293390 + }, + { + "epoch": 1.8817764636840875, + "grad_norm": 0.05182025954127312, + "learning_rate": 1.0605215721030126e-07, + "loss": 0.0009, + "step": 293400 + }, + { + "epoch": 1.8818406005778734, + "grad_norm": 0.08506152778863907, + "learning_rate": 1.0593752244846378e-07, + "loss": 0.0006, + "step": 293410 + }, + { + "epoch": 1.8819047374716595, + "grad_norm": 0.09758177399635315, + "learning_rate": 1.0582294901272661e-07, + "loss": 0.0012, + "step": 293420 + }, + { + "epoch": 1.8819688743654455, + "grad_norm": 0.03581222519278526, + "learning_rate": 1.0570843690452526e-07, + "loss": 0.0005, + "step": 293430 + }, + { + "epoch": 1.8820330112592316, + "grad_norm": 0.2432924509048462, + "learning_rate": 1.0559398612529581e-07, + "loss": 0.0011, + "step": 293440 + }, + { + "epoch": 1.8820971481530178, + "grad_norm": 0.15219354629516602, + "learning_rate": 1.0547959667647157e-07, + "loss": 0.0006, + "step": 293450 + }, + { + "epoch": 1.882161285046804, + "grad_norm": 0.017942845821380615, + "learning_rate": 1.0536526855948637e-07, + "loss": 0.0008, + "step": 293460 + }, + { + "epoch": 1.88222542194059, + "grad_norm": 0.045608971267938614, + "learning_rate": 1.0525100177577185e-07, + "loss": 0.0006, + "step": 293470 + }, + { + "epoch": 1.8822895588343762, + "grad_norm": 0.28920862078666687, + "learning_rate": 1.0513679632676077e-07, + "loss": 0.0021, + "step": 293480 + }, + { + "epoch": 1.8823536957281621, + "grad_norm": 0.05183442682027817, + "learning_rate": 1.0502265221388419e-07, + "loss": 0.0018, + "step": 293490 + }, + { + "epoch": 1.8824178326219483, + "grad_norm": 0.015167763456702232, + "learning_rate": 1.0490856943857153e-07, + "loss": 0.0017, + "step": 293500 + }, + { + "epoch": 1.8824819695157344, + "grad_norm": 0.18725934624671936, + "learning_rate": 1.047945480022533e-07, + "loss": 0.0009, + "step": 293510 + }, + { + "epoch": 1.8825461064095204, + "grad_norm": 0.13862395286560059, + "learning_rate": 1.0468058790635782e-07, + "loss": 0.0017, + "step": 293520 + }, + { + "epoch": 1.8826102433033065, + "grad_norm": 0.012817653827369213, + "learning_rate": 1.0456668915231338e-07, + "loss": 0.0004, + "step": 293530 + }, + { + "epoch": 1.8826743801970927, + "grad_norm": 0.10296519845724106, + "learning_rate": 1.0445285174154717e-07, + "loss": 0.0011, + "step": 293540 + }, + { + "epoch": 1.8827385170908788, + "grad_norm": 0.1026385948061943, + "learning_rate": 1.043390756754853e-07, + "loss": 0.0014, + "step": 293550 + }, + { + "epoch": 1.882802653984665, + "grad_norm": 0.01244757603853941, + "learning_rate": 1.0422536095555381e-07, + "loss": 0.0022, + "step": 293560 + }, + { + "epoch": 1.882866790878451, + "grad_norm": 0.0076684970408678055, + "learning_rate": 1.0411170758317768e-07, + "loss": 0.0016, + "step": 293570 + }, + { + "epoch": 1.882930927772237, + "grad_norm": 0.125825434923172, + "learning_rate": 1.0399811555978024e-07, + "loss": 0.001, + "step": 293580 + }, + { + "epoch": 1.8829950646660232, + "grad_norm": 0.02257315255701542, + "learning_rate": 1.0388458488678588e-07, + "loss": 0.0008, + "step": 293590 + }, + { + "epoch": 1.883059201559809, + "grad_norm": 0.03127681463956833, + "learning_rate": 1.0377111556561682e-07, + "loss": 0.0056, + "step": 293600 + }, + { + "epoch": 1.8831233384535953, + "grad_norm": 0.07549407333135605, + "learning_rate": 1.0365770759769522e-07, + "loss": 0.0017, + "step": 293610 + }, + { + "epoch": 1.8831874753473814, + "grad_norm": 0.08330096304416656, + "learning_rate": 1.0354436098444165e-07, + "loss": 0.0006, + "step": 293620 + }, + { + "epoch": 1.8832516122411675, + "grad_norm": 0.04965728893876076, + "learning_rate": 1.0343107572727663e-07, + "loss": 0.0004, + "step": 293630 + }, + { + "epoch": 1.8833157491349537, + "grad_norm": 0.1543658822774887, + "learning_rate": 1.033178518276201e-07, + "loss": 0.0015, + "step": 293640 + }, + { + "epoch": 1.8833798860287398, + "grad_norm": 0.013736913911998272, + "learning_rate": 1.0320468928689043e-07, + "loss": 0.0008, + "step": 293650 + }, + { + "epoch": 1.883444022922526, + "grad_norm": 0.06732528656721115, + "learning_rate": 1.0309158810650532e-07, + "loss": 0.0004, + "step": 293660 + }, + { + "epoch": 1.883508159816312, + "grad_norm": 0.02943151257932186, + "learning_rate": 1.0297854828788311e-07, + "loss": 0.0005, + "step": 293670 + }, + { + "epoch": 1.883572296710098, + "grad_norm": 0.008330841548740864, + "learning_rate": 1.0286556983243878e-07, + "loss": 0.0008, + "step": 293680 + }, + { + "epoch": 1.883636433603884, + "grad_norm": 0.02544136717915535, + "learning_rate": 1.0275265274158952e-07, + "loss": 0.001, + "step": 293690 + }, + { + "epoch": 1.8837005704976701, + "grad_norm": 0.13496889173984528, + "learning_rate": 1.0263979701674865e-07, + "loss": 0.0014, + "step": 293700 + }, + { + "epoch": 1.8837647073914563, + "grad_norm": 0.05403325334191322, + "learning_rate": 1.0252700265933169e-07, + "loss": 0.0009, + "step": 293710 + }, + { + "epoch": 1.8838288442852424, + "grad_norm": 0.05685772746801376, + "learning_rate": 1.024142696707514e-07, + "loss": 0.0008, + "step": 293720 + }, + { + "epoch": 1.8838929811790286, + "grad_norm": 0.010242694057524204, + "learning_rate": 1.0230159805242058e-07, + "loss": 0.0011, + "step": 293730 + }, + { + "epoch": 1.8839571180728147, + "grad_norm": 0.050826091319322586, + "learning_rate": 1.0218898780575138e-07, + "loss": 0.0006, + "step": 293740 + }, + { + "epoch": 1.8840212549666007, + "grad_norm": 0.10138603299856186, + "learning_rate": 1.0207643893215435e-07, + "loss": 0.0008, + "step": 293750 + }, + { + "epoch": 1.8840853918603868, + "grad_norm": 0.06282013654708862, + "learning_rate": 1.0196395143304006e-07, + "loss": 0.003, + "step": 293760 + }, + { + "epoch": 1.8841495287541727, + "grad_norm": 0.05487838387489319, + "learning_rate": 1.018515253098179e-07, + "loss": 0.0009, + "step": 293770 + }, + { + "epoch": 1.884213665647959, + "grad_norm": 0.10411953926086426, + "learning_rate": 1.0173916056389677e-07, + "loss": 0.0011, + "step": 293780 + }, + { + "epoch": 1.884277802541745, + "grad_norm": 0.167263925075531, + "learning_rate": 1.0162685719668497e-07, + "loss": 0.0021, + "step": 293790 + }, + { + "epoch": 1.8843419394355312, + "grad_norm": 0.03700994327664375, + "learning_rate": 1.0151461520958971e-07, + "loss": 0.0011, + "step": 293800 + }, + { + "epoch": 1.8844060763293173, + "grad_norm": 0.09405221045017242, + "learning_rate": 1.01402434604016e-07, + "loss": 0.0018, + "step": 293810 + }, + { + "epoch": 1.8844702132231035, + "grad_norm": 0.01762114278972149, + "learning_rate": 1.0129031538137213e-07, + "loss": 0.0007, + "step": 293820 + }, + { + "epoch": 1.8845343501168896, + "grad_norm": 0.16988490521907806, + "learning_rate": 1.0117825754306088e-07, + "loss": 0.002, + "step": 293830 + }, + { + "epoch": 1.8845984870106756, + "grad_norm": 0.08364865928888321, + "learning_rate": 1.0106626109048778e-07, + "loss": 0.001, + "step": 293840 + }, + { + "epoch": 1.8846626239044617, + "grad_norm": 0.0017609879141673446, + "learning_rate": 1.0095432602505506e-07, + "loss": 0.0005, + "step": 293850 + }, + { + "epoch": 1.8847267607982476, + "grad_norm": 0.0023259019944816828, + "learning_rate": 1.0084245234816603e-07, + "loss": 0.0009, + "step": 293860 + }, + { + "epoch": 1.8847908976920338, + "grad_norm": 0.013131055980920792, + "learning_rate": 1.007306400612229e-07, + "loss": 0.0006, + "step": 293870 + }, + { + "epoch": 1.88485503458582, + "grad_norm": 0.056270454078912735, + "learning_rate": 1.0061888916562568e-07, + "loss": 0.0012, + "step": 293880 + }, + { + "epoch": 1.884919171479606, + "grad_norm": 0.01898212917149067, + "learning_rate": 1.0050719966277544e-07, + "loss": 0.0007, + "step": 293890 + }, + { + "epoch": 1.8849833083733922, + "grad_norm": 0.15191014111042023, + "learning_rate": 1.003955715540722e-07, + "loss": 0.0015, + "step": 293900 + }, + { + "epoch": 1.8850474452671784, + "grad_norm": 0.0716463252902031, + "learning_rate": 1.0028400484091372e-07, + "loss": 0.0011, + "step": 293910 + }, + { + "epoch": 1.8851115821609645, + "grad_norm": 0.06062595546245575, + "learning_rate": 1.0017249952469776e-07, + "loss": 0.0016, + "step": 293920 + }, + { + "epoch": 1.8851757190547505, + "grad_norm": 0.09747257828712463, + "learning_rate": 1.0006105560682322e-07, + "loss": 0.0014, + "step": 293930 + }, + { + "epoch": 1.8852398559485366, + "grad_norm": 0.022606564685702324, + "learning_rate": 9.994967308868564e-08, + "loss": 0.0007, + "step": 293940 + }, + { + "epoch": 1.8853039928423225, + "grad_norm": 0.09071505814790726, + "learning_rate": 9.983835197168001e-08, + "loss": 0.0008, + "step": 293950 + }, + { + "epoch": 1.8853681297361087, + "grad_norm": 0.016011979430913925, + "learning_rate": 9.972709225720189e-08, + "loss": 0.0008, + "step": 293960 + }, + { + "epoch": 1.8854322666298948, + "grad_norm": 0.05072588101029396, + "learning_rate": 9.961589394664628e-08, + "loss": 0.0016, + "step": 293970 + }, + { + "epoch": 1.885496403523681, + "grad_norm": 0.0029339883476495743, + "learning_rate": 9.950475704140539e-08, + "loss": 0.0016, + "step": 293980 + }, + { + "epoch": 1.8855605404174671, + "grad_norm": 0.037333372980356216, + "learning_rate": 9.939368154287198e-08, + "loss": 0.0015, + "step": 293990 + }, + { + "epoch": 1.8856246773112533, + "grad_norm": 0.013276482932269573, + "learning_rate": 9.928266745243831e-08, + "loss": 0.0017, + "step": 294000 + }, + { + "epoch": 1.8856888142050392, + "grad_norm": 0.06680195778608322, + "learning_rate": 9.91717147714949e-08, + "loss": 0.0011, + "step": 294010 + }, + { + "epoch": 1.8857529510988253, + "grad_norm": 0.06643390655517578, + "learning_rate": 9.906082350143343e-08, + "loss": 0.001, + "step": 294020 + }, + { + "epoch": 1.8858170879926113, + "grad_norm": 0.08585356175899506, + "learning_rate": 9.894999364364166e-08, + "loss": 0.0011, + "step": 294030 + }, + { + "epoch": 1.8858812248863974, + "grad_norm": 0.045736897736787796, + "learning_rate": 9.883922519950961e-08, + "loss": 0.0007, + "step": 294040 + }, + { + "epoch": 1.8859453617801836, + "grad_norm": 0.06042035296559334, + "learning_rate": 9.872851817042451e-08, + "loss": 0.0006, + "step": 294050 + }, + { + "epoch": 1.8860094986739697, + "grad_norm": 0.0751265287399292, + "learning_rate": 9.861787255777411e-08, + "loss": 0.0008, + "step": 294060 + }, + { + "epoch": 1.8860736355677559, + "grad_norm": 0.04391670227050781, + "learning_rate": 9.850728836294455e-08, + "loss": 0.0008, + "step": 294070 + }, + { + "epoch": 1.886137772461542, + "grad_norm": 0.11632103472948074, + "learning_rate": 9.839676558732248e-08, + "loss": 0.0012, + "step": 294080 + }, + { + "epoch": 1.8862019093553282, + "grad_norm": 0.1448919028043747, + "learning_rate": 9.828630423229124e-08, + "loss": 0.002, + "step": 294090 + }, + { + "epoch": 1.886266046249114, + "grad_norm": 0.021598802879452705, + "learning_rate": 9.81759042992364e-08, + "loss": 0.0008, + "step": 294100 + }, + { + "epoch": 1.8863301831429002, + "grad_norm": 0.05400576442480087, + "learning_rate": 9.806556578954019e-08, + "loss": 0.0004, + "step": 294110 + }, + { + "epoch": 1.8863943200366862, + "grad_norm": 0.1336008459329605, + "learning_rate": 9.795528870458593e-08, + "loss": 0.0009, + "step": 294120 + }, + { + "epoch": 1.8864584569304723, + "grad_norm": 0.24765513837337494, + "learning_rate": 9.784507304575586e-08, + "loss": 0.0012, + "step": 294130 + }, + { + "epoch": 1.8865225938242585, + "grad_norm": 0.09768116474151611, + "learning_rate": 9.773491881442998e-08, + "loss": 0.0009, + "step": 294140 + }, + { + "epoch": 1.8865867307180446, + "grad_norm": 0.10387171059846878, + "learning_rate": 9.762482601198886e-08, + "loss": 0.0017, + "step": 294150 + }, + { + "epoch": 1.8866508676118308, + "grad_norm": 0.06271151453256607, + "learning_rate": 9.751479463981306e-08, + "loss": 0.0006, + "step": 294160 + }, + { + "epoch": 1.886715004505617, + "grad_norm": 0.15673549473285675, + "learning_rate": 9.740482469928036e-08, + "loss": 0.0009, + "step": 294170 + }, + { + "epoch": 1.8867791413994028, + "grad_norm": 0.14968831837177277, + "learning_rate": 9.729491619176912e-08, + "loss": 0.0018, + "step": 294180 + }, + { + "epoch": 1.886843278293189, + "grad_norm": 0.13565237820148468, + "learning_rate": 9.718506911865655e-08, + "loss": 0.0007, + "step": 294190 + }, + { + "epoch": 1.8869074151869751, + "grad_norm": 0.07846157252788544, + "learning_rate": 9.707528348131878e-08, + "loss": 0.0015, + "step": 294200 + }, + { + "epoch": 1.886971552080761, + "grad_norm": 0.09204721450805664, + "learning_rate": 9.69655592811325e-08, + "loss": 0.0004, + "step": 294210 + }, + { + "epoch": 1.8870356889745472, + "grad_norm": 0.003562049474567175, + "learning_rate": 9.685589651947102e-08, + "loss": 0.0012, + "step": 294220 + }, + { + "epoch": 1.8870998258683334, + "grad_norm": 0.059349425137043, + "learning_rate": 9.67462951977094e-08, + "loss": 0.001, + "step": 294230 + }, + { + "epoch": 1.8871639627621195, + "grad_norm": 0.08028772473335266, + "learning_rate": 9.66367553172215e-08, + "loss": 0.0009, + "step": 294240 + }, + { + "epoch": 1.8872280996559057, + "grad_norm": 0.0028649321757256985, + "learning_rate": 9.652727687937957e-08, + "loss": 0.0007, + "step": 294250 + }, + { + "epoch": 1.8872922365496918, + "grad_norm": 0.16632534563541412, + "learning_rate": 9.641785988555529e-08, + "loss": 0.0018, + "step": 294260 + }, + { + "epoch": 1.8873563734434777, + "grad_norm": 0.05998115986585617, + "learning_rate": 9.630850433711925e-08, + "loss": 0.0007, + "step": 294270 + }, + { + "epoch": 1.8874205103372639, + "grad_norm": 0.2951284945011139, + "learning_rate": 9.619921023544254e-08, + "loss": 0.0014, + "step": 294280 + }, + { + "epoch": 1.8874846472310498, + "grad_norm": 0.05850505083799362, + "learning_rate": 9.608997758189465e-08, + "loss": 0.0008, + "step": 294290 + }, + { + "epoch": 1.887548784124836, + "grad_norm": 0.005478013306856155, + "learning_rate": 9.598080637784335e-08, + "loss": 0.0005, + "step": 294300 + }, + { + "epoch": 1.887612921018622, + "grad_norm": 0.13967640697956085, + "learning_rate": 9.587169662465811e-08, + "loss": 0.001, + "step": 294310 + }, + { + "epoch": 1.8876770579124083, + "grad_norm": 0.06771519780158997, + "learning_rate": 9.576264832370508e-08, + "loss": 0.0007, + "step": 294320 + }, + { + "epoch": 1.8877411948061944, + "grad_norm": 0.17400945723056793, + "learning_rate": 9.565366147635147e-08, + "loss": 0.0008, + "step": 294330 + }, + { + "epoch": 1.8878053316999805, + "grad_norm": 0.05246680974960327, + "learning_rate": 9.554473608396175e-08, + "loss": 0.0007, + "step": 294340 + }, + { + "epoch": 1.8878694685937667, + "grad_norm": 0.016703316941857338, + "learning_rate": 9.543587214790261e-08, + "loss": 0.0017, + "step": 294350 + }, + { + "epoch": 1.8879336054875526, + "grad_norm": 0.014375735074281693, + "learning_rate": 9.532706966953686e-08, + "loss": 0.002, + "step": 294360 + }, + { + "epoch": 1.8879977423813388, + "grad_norm": 0.1126503273844719, + "learning_rate": 9.52183286502284e-08, + "loss": 0.001, + "step": 294370 + }, + { + "epoch": 1.8880618792751247, + "grad_norm": 0.0054759010672569275, + "learning_rate": 9.510964909133946e-08, + "loss": 0.0011, + "step": 294380 + }, + { + "epoch": 1.8881260161689108, + "grad_norm": 0.05257393419742584, + "learning_rate": 9.500103099423174e-08, + "loss": 0.0011, + "step": 294390 + }, + { + "epoch": 1.888190153062697, + "grad_norm": 0.05237250030040741, + "learning_rate": 9.489247436026749e-08, + "loss": 0.0037, + "step": 294400 + }, + { + "epoch": 1.8882542899564831, + "grad_norm": 0.13146939873695374, + "learning_rate": 9.478397919080506e-08, + "loss": 0.0014, + "step": 294410 + }, + { + "epoch": 1.8883184268502693, + "grad_norm": 0.01931230165064335, + "learning_rate": 9.467554548720615e-08, + "loss": 0.0009, + "step": 294420 + }, + { + "epoch": 1.8883825637440554, + "grad_norm": 0.1467883288860321, + "learning_rate": 9.456717325082798e-08, + "loss": 0.002, + "step": 294430 + }, + { + "epoch": 1.8884467006378414, + "grad_norm": 0.02824421413242817, + "learning_rate": 9.445886248302949e-08, + "loss": 0.0005, + "step": 294440 + }, + { + "epoch": 1.8885108375316275, + "grad_norm": 0.038735803216695786, + "learning_rate": 9.435061318516625e-08, + "loss": 0.0011, + "step": 294450 + }, + { + "epoch": 1.8885749744254134, + "grad_norm": 0.03149021416902542, + "learning_rate": 9.424242535859662e-08, + "loss": 0.0004, + "step": 294460 + }, + { + "epoch": 1.8886391113191996, + "grad_norm": 0.034077052026987076, + "learning_rate": 9.413429900467564e-08, + "loss": 0.0007, + "step": 294470 + }, + { + "epoch": 1.8887032482129857, + "grad_norm": 0.00893963873386383, + "learning_rate": 9.402623412475775e-08, + "loss": 0.0005, + "step": 294480 + }, + { + "epoch": 1.888767385106772, + "grad_norm": 0.010076693259179592, + "learning_rate": 9.391823072019746e-08, + "loss": 0.0003, + "step": 294490 + }, + { + "epoch": 1.888831522000558, + "grad_norm": 0.0885658785700798, + "learning_rate": 9.38102887923481e-08, + "loss": 0.0016, + "step": 294500 + }, + { + "epoch": 1.8888956588943442, + "grad_norm": 0.053332455456256866, + "learning_rate": 9.37024083425625e-08, + "loss": 0.001, + "step": 294510 + }, + { + "epoch": 1.8889597957881303, + "grad_norm": 0.07140041142702103, + "learning_rate": 9.359458937219179e-08, + "loss": 0.0025, + "step": 294520 + }, + { + "epoch": 1.8890239326819163, + "grad_norm": 0.06093136593699455, + "learning_rate": 9.348683188258712e-08, + "loss": 0.001, + "step": 294530 + }, + { + "epoch": 1.8890880695757024, + "grad_norm": 0.44862136244773865, + "learning_rate": 9.337913587509961e-08, + "loss": 0.0027, + "step": 294540 + }, + { + "epoch": 1.8891522064694883, + "grad_norm": 0.14340335130691528, + "learning_rate": 9.327150135107821e-08, + "loss": 0.0015, + "step": 294550 + }, + { + "epoch": 1.8892163433632745, + "grad_norm": 0.07659434527158737, + "learning_rate": 9.316392831187126e-08, + "loss": 0.0016, + "step": 294560 + }, + { + "epoch": 1.8892804802570606, + "grad_norm": 0.0966777428984642, + "learning_rate": 9.305641675882771e-08, + "loss": 0.0012, + "step": 294570 + }, + { + "epoch": 1.8893446171508468, + "grad_norm": 0.28905001282691956, + "learning_rate": 9.294896669329423e-08, + "loss": 0.0021, + "step": 294580 + }, + { + "epoch": 1.889408754044633, + "grad_norm": 0.12563137710094452, + "learning_rate": 9.284157811661698e-08, + "loss": 0.0015, + "step": 294590 + }, + { + "epoch": 1.889472890938419, + "grad_norm": 0.2730359137058258, + "learning_rate": 9.273425103014155e-08, + "loss": 0.0022, + "step": 294600 + }, + { + "epoch": 1.889537027832205, + "grad_norm": 0.07535937428474426, + "learning_rate": 9.262698543521353e-08, + "loss": 0.0008, + "step": 294610 + }, + { + "epoch": 1.8896011647259912, + "grad_norm": 0.35285791754722595, + "learning_rate": 9.251978133317629e-08, + "loss": 0.0023, + "step": 294620 + }, + { + "epoch": 1.8896653016197773, + "grad_norm": 0.08534836769104004, + "learning_rate": 9.241263872537376e-08, + "loss": 0.0007, + "step": 294630 + }, + { + "epoch": 1.8897294385135632, + "grad_norm": 0.08622516691684723, + "learning_rate": 9.230555761314819e-08, + "loss": 0.0018, + "step": 294640 + }, + { + "epoch": 1.8897935754073494, + "grad_norm": 0.06464492529630661, + "learning_rate": 9.219853799784129e-08, + "loss": 0.0008, + "step": 294650 + }, + { + "epoch": 1.8898577123011355, + "grad_norm": 0.0783517137169838, + "learning_rate": 9.209157988079475e-08, + "loss": 0.0018, + "step": 294660 + }, + { + "epoch": 1.8899218491949217, + "grad_norm": 0.057245105504989624, + "learning_rate": 9.198468326334809e-08, + "loss": 0.0009, + "step": 294670 + }, + { + "epoch": 1.8899859860887078, + "grad_norm": 0.05029996857047081, + "learning_rate": 9.187784814684076e-08, + "loss": 0.0009, + "step": 294680 + }, + { + "epoch": 1.890050122982494, + "grad_norm": 0.10758306831121445, + "learning_rate": 9.177107453261225e-08, + "loss": 0.0008, + "step": 294690 + }, + { + "epoch": 1.89011425987628, + "grad_norm": 0.2533423602581024, + "learning_rate": 9.166436242199983e-08, + "loss": 0.0023, + "step": 294700 + }, + { + "epoch": 1.890178396770066, + "grad_norm": 0.020279565826058388, + "learning_rate": 9.155771181634076e-08, + "loss": 0.0011, + "step": 294710 + }, + { + "epoch": 1.890242533663852, + "grad_norm": 0.1295294463634491, + "learning_rate": 9.145112271697176e-08, + "loss": 0.0007, + "step": 294720 + }, + { + "epoch": 1.8903066705576381, + "grad_norm": 0.03561263158917427, + "learning_rate": 9.134459512522842e-08, + "loss": 0.0015, + "step": 294730 + }, + { + "epoch": 1.8903708074514243, + "grad_norm": 0.016167763620615005, + "learning_rate": 9.123812904244522e-08, + "loss": 0.001, + "step": 294740 + }, + { + "epoch": 1.8904349443452104, + "grad_norm": 0.055394627153873444, + "learning_rate": 9.11317244699561e-08, + "loss": 0.0008, + "step": 294750 + }, + { + "epoch": 1.8904990812389966, + "grad_norm": 0.029049696400761604, + "learning_rate": 9.102538140909555e-08, + "loss": 0.0015, + "step": 294760 + }, + { + "epoch": 1.8905632181327827, + "grad_norm": 0.0370810441672802, + "learning_rate": 9.091909986119474e-08, + "loss": 0.0009, + "step": 294770 + }, + { + "epoch": 1.8906273550265689, + "grad_norm": 0.17660130560398102, + "learning_rate": 9.081287982758647e-08, + "loss": 0.0007, + "step": 294780 + }, + { + "epoch": 1.8906914919203548, + "grad_norm": 0.15603278577327728, + "learning_rate": 9.070672130960134e-08, + "loss": 0.0015, + "step": 294790 + }, + { + "epoch": 1.890755628814141, + "grad_norm": 0.0040861391462385654, + "learning_rate": 9.060062430856942e-08, + "loss": 0.0004, + "step": 294800 + }, + { + "epoch": 1.8908197657079269, + "grad_norm": 0.09123212099075317, + "learning_rate": 9.049458882582075e-08, + "loss": 0.0006, + "step": 294810 + }, + { + "epoch": 1.890883902601713, + "grad_norm": 0.04571864753961563, + "learning_rate": 9.038861486268313e-08, + "loss": 0.0011, + "step": 294820 + }, + { + "epoch": 1.8909480394954992, + "grad_norm": 0.192408949136734, + "learning_rate": 9.028270242048498e-08, + "loss": 0.0009, + "step": 294830 + }, + { + "epoch": 1.8910121763892853, + "grad_norm": 0.04279579594731331, + "learning_rate": 9.017685150055411e-08, + "loss": 0.0004, + "step": 294840 + }, + { + "epoch": 1.8910763132830715, + "grad_norm": 0.19916805624961853, + "learning_rate": 9.007106210421613e-08, + "loss": 0.0032, + "step": 294850 + }, + { + "epoch": 1.8911404501768576, + "grad_norm": 0.03907819092273712, + "learning_rate": 8.996533423279608e-08, + "loss": 0.001, + "step": 294860 + }, + { + "epoch": 1.8912045870706435, + "grad_norm": 0.03859139606356621, + "learning_rate": 8.985966788762013e-08, + "loss": 0.0008, + "step": 294870 + }, + { + "epoch": 1.8912687239644297, + "grad_norm": 0.05505898594856262, + "learning_rate": 8.975406307001222e-08, + "loss": 0.0008, + "step": 294880 + }, + { + "epoch": 1.8913328608582156, + "grad_norm": 0.0035464514512568712, + "learning_rate": 8.964851978129463e-08, + "loss": 0.0006, + "step": 294890 + }, + { + "epoch": 1.8913969977520018, + "grad_norm": 0.016381246969103813, + "learning_rate": 8.954303802279019e-08, + "loss": 0.0013, + "step": 294900 + }, + { + "epoch": 1.891461134645788, + "grad_norm": 0.04539335519075394, + "learning_rate": 8.943761779582116e-08, + "loss": 0.0006, + "step": 294910 + }, + { + "epoch": 1.891525271539574, + "grad_norm": 0.04665771871805191, + "learning_rate": 8.933225910170818e-08, + "loss": 0.0004, + "step": 294920 + }, + { + "epoch": 1.8915894084333602, + "grad_norm": 0.26177358627319336, + "learning_rate": 8.922696194177238e-08, + "loss": 0.001, + "step": 294930 + }, + { + "epoch": 1.8916535453271464, + "grad_norm": 0.005626978352665901, + "learning_rate": 8.912172631733162e-08, + "loss": 0.0004, + "step": 294940 + }, + { + "epoch": 1.8917176822209325, + "grad_norm": 0.3913898766040802, + "learning_rate": 8.90165522297054e-08, + "loss": 0.0024, + "step": 294950 + }, + { + "epoch": 1.8917818191147184, + "grad_norm": 0.045536499470472336, + "learning_rate": 8.89114396802121e-08, + "loss": 0.0016, + "step": 294960 + }, + { + "epoch": 1.8918459560085046, + "grad_norm": 0.051582399755716324, + "learning_rate": 8.880638867016844e-08, + "loss": 0.0006, + "step": 294970 + }, + { + "epoch": 1.8919100929022905, + "grad_norm": 0.1261383295059204, + "learning_rate": 8.870139920089005e-08, + "loss": 0.0009, + "step": 294980 + }, + { + "epoch": 1.8919742297960767, + "grad_norm": 0.07379081100225449, + "learning_rate": 8.859647127369364e-08, + "loss": 0.0011, + "step": 294990 + }, + { + "epoch": 1.8920383666898628, + "grad_norm": 0.06457892805337906, + "learning_rate": 8.849160488989317e-08, + "loss": 0.0009, + "step": 295000 + }, + { + "epoch": 1.892102503583649, + "grad_norm": 0.08591438084840775, + "learning_rate": 8.838680005080368e-08, + "loss": 0.003, + "step": 295010 + }, + { + "epoch": 1.892166640477435, + "grad_norm": 0.08735421299934387, + "learning_rate": 8.828205675773749e-08, + "loss": 0.001, + "step": 295020 + }, + { + "epoch": 1.8922307773712213, + "grad_norm": 0.037042561918497086, + "learning_rate": 8.81773750120074e-08, + "loss": 0.0011, + "step": 295030 + }, + { + "epoch": 1.8922949142650072, + "grad_norm": 0.03372732177376747, + "learning_rate": 8.807275481492572e-08, + "loss": 0.0013, + "step": 295040 + }, + { + "epoch": 1.8923590511587933, + "grad_norm": 0.03540458902716637, + "learning_rate": 8.79681961678025e-08, + "loss": 0.0009, + "step": 295050 + }, + { + "epoch": 1.8924231880525795, + "grad_norm": 0.006590025965124369, + "learning_rate": 8.786369907194836e-08, + "loss": 0.0005, + "step": 295060 + }, + { + "epoch": 1.8924873249463654, + "grad_norm": 0.11606483906507492, + "learning_rate": 8.775926352867281e-08, + "loss": 0.0016, + "step": 295070 + }, + { + "epoch": 1.8925514618401515, + "grad_norm": 0.04718036204576492, + "learning_rate": 8.765488953928425e-08, + "loss": 0.0008, + "step": 295080 + }, + { + "epoch": 1.8926155987339377, + "grad_norm": 0.0750744491815567, + "learning_rate": 8.755057710509108e-08, + "loss": 0.0011, + "step": 295090 + }, + { + "epoch": 1.8926797356277238, + "grad_norm": 0.0894516259431839, + "learning_rate": 8.744632622739946e-08, + "loss": 0.0014, + "step": 295100 + }, + { + "epoch": 1.89274387252151, + "grad_norm": 0.039059873670339584, + "learning_rate": 8.734213690751725e-08, + "loss": 0.0011, + "step": 295110 + }, + { + "epoch": 1.8928080094152961, + "grad_norm": 0.1326218545436859, + "learning_rate": 8.72380091467484e-08, + "loss": 0.0017, + "step": 295120 + }, + { + "epoch": 1.892872146309082, + "grad_norm": 0.07302480936050415, + "learning_rate": 8.713394294639799e-08, + "loss": 0.0009, + "step": 295130 + }, + { + "epoch": 1.8929362832028682, + "grad_norm": 0.08233803510665894, + "learning_rate": 8.702993830777162e-08, + "loss": 0.001, + "step": 295140 + }, + { + "epoch": 1.8930004200966541, + "grad_norm": 0.004406723193824291, + "learning_rate": 8.692599523217049e-08, + "loss": 0.0007, + "step": 295150 + }, + { + "epoch": 1.8930645569904403, + "grad_norm": 0.10290886461734772, + "learning_rate": 8.682211372089855e-08, + "loss": 0.0006, + "step": 295160 + }, + { + "epoch": 1.8931286938842264, + "grad_norm": 0.06705625355243683, + "learning_rate": 8.671829377525642e-08, + "loss": 0.0019, + "step": 295170 + }, + { + "epoch": 1.8931928307780126, + "grad_norm": 0.06751684844493866, + "learning_rate": 8.66145353965453e-08, + "loss": 0.0009, + "step": 295180 + }, + { + "epoch": 1.8932569676717987, + "grad_norm": 0.12947224080562592, + "learning_rate": 8.651083858606635e-08, + "loss": 0.0005, + "step": 295190 + }, + { + "epoch": 1.893321104565585, + "grad_norm": 0.03041689656674862, + "learning_rate": 8.640720334511799e-08, + "loss": 0.0009, + "step": 295200 + }, + { + "epoch": 1.893385241459371, + "grad_norm": 0.13694101572036743, + "learning_rate": 8.630362967499862e-08, + "loss": 0.0013, + "step": 295210 + }, + { + "epoch": 1.893449378353157, + "grad_norm": 0.17339839041233063, + "learning_rate": 8.620011757700719e-08, + "loss": 0.0016, + "step": 295220 + }, + { + "epoch": 1.8935135152469431, + "grad_norm": 0.06717420369386673, + "learning_rate": 8.60966670524399e-08, + "loss": 0.0015, + "step": 295230 + }, + { + "epoch": 1.893577652140729, + "grad_norm": 0.05104643851518631, + "learning_rate": 8.599327810259295e-08, + "loss": 0.0012, + "step": 295240 + }, + { + "epoch": 1.8936417890345152, + "grad_norm": 0.021760204806923866, + "learning_rate": 8.588995072876249e-08, + "loss": 0.0006, + "step": 295250 + }, + { + "epoch": 1.8937059259283013, + "grad_norm": 0.07978024333715439, + "learning_rate": 8.578668493224306e-08, + "loss": 0.0011, + "step": 295260 + }, + { + "epoch": 1.8937700628220875, + "grad_norm": 0.3168538808822632, + "learning_rate": 8.568348071432863e-08, + "loss": 0.0024, + "step": 295270 + }, + { + "epoch": 1.8938341997158736, + "grad_norm": 0.07114940881729126, + "learning_rate": 8.55803380763115e-08, + "loss": 0.0022, + "step": 295280 + }, + { + "epoch": 1.8938983366096598, + "grad_norm": 0.021724779158830643, + "learning_rate": 8.547725701948618e-08, + "loss": 0.0006, + "step": 295290 + }, + { + "epoch": 1.8939624735034457, + "grad_norm": 0.03300023078918457, + "learning_rate": 8.537423754514274e-08, + "loss": 0.001, + "step": 295300 + }, + { + "epoch": 1.8940266103972319, + "grad_norm": 0.07515677064657211, + "learning_rate": 8.527127965457293e-08, + "loss": 0.0012, + "step": 295310 + }, + { + "epoch": 1.8940907472910178, + "grad_norm": 0.05644484981894493, + "learning_rate": 8.516838334906574e-08, + "loss": 0.0012, + "step": 295320 + }, + { + "epoch": 1.894154884184804, + "grad_norm": 0.008623950183391571, + "learning_rate": 8.506554862991179e-08, + "loss": 0.0009, + "step": 295330 + }, + { + "epoch": 1.89421902107859, + "grad_norm": 0.04799478501081467, + "learning_rate": 8.496277549839893e-08, + "loss": 0.0014, + "step": 295340 + }, + { + "epoch": 1.8942831579723762, + "grad_norm": 0.050887249410152435, + "learning_rate": 8.486006395581559e-08, + "loss": 0.001, + "step": 295350 + }, + { + "epoch": 1.8943472948661624, + "grad_norm": 0.2912452518939972, + "learning_rate": 8.475741400344794e-08, + "loss": 0.0008, + "step": 295360 + }, + { + "epoch": 1.8944114317599485, + "grad_norm": 0.05949672311544418, + "learning_rate": 8.465482564258332e-08, + "loss": 0.0012, + "step": 295370 + }, + { + "epoch": 1.8944755686537347, + "grad_norm": 0.05052575096487999, + "learning_rate": 8.455229887450622e-08, + "loss": 0.0007, + "step": 295380 + }, + { + "epoch": 1.8945397055475206, + "grad_norm": 0.014839448034763336, + "learning_rate": 8.44498337005023e-08, + "loss": 0.0011, + "step": 295390 + }, + { + "epoch": 1.8946038424413068, + "grad_norm": 0.10445275902748108, + "learning_rate": 8.434743012185442e-08, + "loss": 0.0012, + "step": 295400 + }, + { + "epoch": 1.8946679793350927, + "grad_norm": 0.16053244471549988, + "learning_rate": 8.424508813984711e-08, + "loss": 0.0005, + "step": 295410 + }, + { + "epoch": 1.8947321162288788, + "grad_norm": 0.010030178353190422, + "learning_rate": 8.414280775576156e-08, + "loss": 0.0005, + "step": 295420 + }, + { + "epoch": 1.894796253122665, + "grad_norm": 0.18217097222805023, + "learning_rate": 8.404058897088008e-08, + "loss": 0.0014, + "step": 295430 + }, + { + "epoch": 1.8948603900164511, + "grad_norm": 0.16161665320396423, + "learning_rate": 8.393843178648331e-08, + "loss": 0.0012, + "step": 295440 + }, + { + "epoch": 1.8949245269102373, + "grad_norm": 0.0912969708442688, + "learning_rate": 8.383633620385134e-08, + "loss": 0.0021, + "step": 295450 + }, + { + "epoch": 1.8949886638040234, + "grad_norm": 0.0579666867852211, + "learning_rate": 8.373430222426427e-08, + "loss": 0.0013, + "step": 295460 + }, + { + "epoch": 1.8950528006978096, + "grad_norm": 0.19168339669704437, + "learning_rate": 8.363232984899938e-08, + "loss": 0.0007, + "step": 295470 + }, + { + "epoch": 1.8951169375915955, + "grad_norm": 0.11395134776830673, + "learning_rate": 8.353041907933512e-08, + "loss": 0.0011, + "step": 295480 + }, + { + "epoch": 1.8951810744853816, + "grad_norm": 0.05215844139456749, + "learning_rate": 8.342856991654879e-08, + "loss": 0.0008, + "step": 295490 + }, + { + "epoch": 1.8952452113791676, + "grad_norm": 0.012780345045030117, + "learning_rate": 8.332678236191605e-08, + "loss": 0.0007, + "step": 295500 + }, + { + "epoch": 1.8953093482729537, + "grad_norm": 0.00840887613594532, + "learning_rate": 8.322505641671252e-08, + "loss": 0.0011, + "step": 295510 + }, + { + "epoch": 1.8953734851667399, + "grad_norm": 0.011757910251617432, + "learning_rate": 8.312339208221331e-08, + "loss": 0.0008, + "step": 295520 + }, + { + "epoch": 1.895437622060526, + "grad_norm": 0.08257078379392624, + "learning_rate": 8.302178935969186e-08, + "loss": 0.0006, + "step": 295530 + }, + { + "epoch": 1.8955017589543122, + "grad_norm": 0.02417915128171444, + "learning_rate": 8.292024825042155e-08, + "loss": 0.0015, + "step": 295540 + }, + { + "epoch": 1.8955658958480983, + "grad_norm": 0.10538596659898758, + "learning_rate": 8.281876875567418e-08, + "loss": 0.0017, + "step": 295550 + }, + { + "epoch": 1.8956300327418842, + "grad_norm": 0.07173268496990204, + "learning_rate": 8.271735087672261e-08, + "loss": 0.0011, + "step": 295560 + }, + { + "epoch": 1.8956941696356704, + "grad_norm": 0.10954654961824417, + "learning_rate": 8.261599461483694e-08, + "loss": 0.001, + "step": 295570 + }, + { + "epoch": 1.8957583065294563, + "grad_norm": 0.040432389825582504, + "learning_rate": 8.25146999712867e-08, + "loss": 0.0014, + "step": 295580 + }, + { + "epoch": 1.8958224434232425, + "grad_norm": 0.04825109243392944, + "learning_rate": 8.241346694734198e-08, + "loss": 0.0011, + "step": 295590 + }, + { + "epoch": 1.8958865803170286, + "grad_norm": 0.138762965798378, + "learning_rate": 8.231229554427123e-08, + "loss": 0.0007, + "step": 295600 + }, + { + "epoch": 1.8959507172108148, + "grad_norm": 0.14435996115207672, + "learning_rate": 8.221118576334231e-08, + "loss": 0.0011, + "step": 295610 + }, + { + "epoch": 1.896014854104601, + "grad_norm": 0.031372375786304474, + "learning_rate": 8.211013760582142e-08, + "loss": 0.0027, + "step": 295620 + }, + { + "epoch": 1.896078990998387, + "grad_norm": 0.0296054445207119, + "learning_rate": 8.200915107297535e-08, + "loss": 0.0012, + "step": 295630 + }, + { + "epoch": 1.8961431278921732, + "grad_norm": 0.12075857073068619, + "learning_rate": 8.190822616606975e-08, + "loss": 0.0011, + "step": 295640 + }, + { + "epoch": 1.8962072647859591, + "grad_norm": 0.040819283574819565, + "learning_rate": 8.180736288636915e-08, + "loss": 0.0021, + "step": 295650 + }, + { + "epoch": 1.8962714016797453, + "grad_norm": 0.07715485244989395, + "learning_rate": 8.170656123513643e-08, + "loss": 0.0012, + "step": 295660 + }, + { + "epoch": 1.8963355385735312, + "grad_norm": 0.015074445866048336, + "learning_rate": 8.160582121363614e-08, + "loss": 0.0006, + "step": 295670 + }, + { + "epoch": 1.8963996754673174, + "grad_norm": 0.09128915518522263, + "learning_rate": 8.150514282312949e-08, + "loss": 0.001, + "step": 295680 + }, + { + "epoch": 1.8964638123611035, + "grad_norm": 0.10052846372127533, + "learning_rate": 8.140452606487937e-08, + "loss": 0.0011, + "step": 295690 + }, + { + "epoch": 1.8965279492548897, + "grad_norm": 0.017802821472287178, + "learning_rate": 8.130397094014475e-08, + "loss": 0.0011, + "step": 295700 + }, + { + "epoch": 1.8965920861486758, + "grad_norm": 0.0714261382818222, + "learning_rate": 8.120347745018798e-08, + "loss": 0.0012, + "step": 295710 + }, + { + "epoch": 1.896656223042462, + "grad_norm": 0.016490206122398376, + "learning_rate": 8.110304559626635e-08, + "loss": 0.0004, + "step": 295720 + }, + { + "epoch": 1.8967203599362479, + "grad_norm": 0.1430540829896927, + "learning_rate": 8.100267537963947e-08, + "loss": 0.0015, + "step": 295730 + }, + { + "epoch": 1.896784496830034, + "grad_norm": 0.1116696298122406, + "learning_rate": 8.090236680156404e-08, + "loss": 0.0007, + "step": 295740 + }, + { + "epoch": 1.8968486337238202, + "grad_norm": 0.018631864339113235, + "learning_rate": 8.0802119863298e-08, + "loss": 0.0009, + "step": 295750 + }, + { + "epoch": 1.896912770617606, + "grad_norm": 0.05354061350226402, + "learning_rate": 8.070193456609699e-08, + "loss": 0.0008, + "step": 295760 + }, + { + "epoch": 1.8969769075113923, + "grad_norm": 0.0025101928040385246, + "learning_rate": 8.060181091121667e-08, + "loss": 0.0008, + "step": 295770 + }, + { + "epoch": 1.8970410444051784, + "grad_norm": 0.09185576438903809, + "learning_rate": 8.050174889991103e-08, + "loss": 0.0017, + "step": 295780 + }, + { + "epoch": 1.8971051812989645, + "grad_norm": 0.0862610936164856, + "learning_rate": 8.040174853343464e-08, + "loss": 0.0005, + "step": 295790 + }, + { + "epoch": 1.8971693181927507, + "grad_norm": 0.024376560002565384, + "learning_rate": 8.030180981304036e-08, + "loss": 0.0013, + "step": 295800 + }, + { + "epoch": 1.8972334550865368, + "grad_norm": 0.05399715155363083, + "learning_rate": 8.020193273998055e-08, + "loss": 0.0012, + "step": 295810 + }, + { + "epoch": 1.8972975919803228, + "grad_norm": 0.029330408200621605, + "learning_rate": 8.010211731550643e-08, + "loss": 0.0015, + "step": 295820 + }, + { + "epoch": 1.897361728874109, + "grad_norm": 0.0124303437769413, + "learning_rate": 8.00023635408692e-08, + "loss": 0.0006, + "step": 295830 + }, + { + "epoch": 1.8974258657678948, + "grad_norm": 0.33522889018058777, + "learning_rate": 7.990267141731845e-08, + "loss": 0.0012, + "step": 295840 + }, + { + "epoch": 1.897490002661681, + "grad_norm": 0.04323439672589302, + "learning_rate": 7.980304094610314e-08, + "loss": 0.0007, + "step": 295850 + }, + { + "epoch": 1.8975541395554671, + "grad_norm": 0.007989304140210152, + "learning_rate": 7.970347212847285e-08, + "loss": 0.0009, + "step": 295860 + }, + { + "epoch": 1.8976182764492533, + "grad_norm": 0.06763707101345062, + "learning_rate": 7.960396496567436e-08, + "loss": 0.0012, + "step": 295870 + }, + { + "epoch": 1.8976824133430394, + "grad_norm": 0.045833926647901535, + "learning_rate": 7.9504519458955e-08, + "loss": 0.0012, + "step": 295880 + }, + { + "epoch": 1.8977465502368256, + "grad_norm": 0.06413446366786957, + "learning_rate": 7.940513560955986e-08, + "loss": 0.001, + "step": 295890 + }, + { + "epoch": 1.8978106871306117, + "grad_norm": 0.08513320982456207, + "learning_rate": 7.930581341873577e-08, + "loss": 0.0006, + "step": 295900 + }, + { + "epoch": 1.8978748240243977, + "grad_norm": 0.11804290115833282, + "learning_rate": 7.920655288772672e-08, + "loss": 0.0012, + "step": 295910 + }, + { + "epoch": 1.8979389609181838, + "grad_norm": 0.018304867669939995, + "learning_rate": 7.910735401777613e-08, + "loss": 0.0013, + "step": 295920 + }, + { + "epoch": 1.8980030978119697, + "grad_norm": 0.07030729949474335, + "learning_rate": 7.900821681012693e-08, + "loss": 0.0008, + "step": 295930 + }, + { + "epoch": 1.898067234705756, + "grad_norm": 0.024078192189335823, + "learning_rate": 7.8909141266022e-08, + "loss": 0.0004, + "step": 295940 + }, + { + "epoch": 1.898131371599542, + "grad_norm": 0.047128576785326004, + "learning_rate": 7.881012738670257e-08, + "loss": 0.001, + "step": 295950 + }, + { + "epoch": 1.8981955084933282, + "grad_norm": 0.06363033503293991, + "learning_rate": 7.871117517340987e-08, + "loss": 0.0009, + "step": 295960 + }, + { + "epoch": 1.8982596453871143, + "grad_norm": 0.3511105179786682, + "learning_rate": 7.861228462738235e-08, + "loss": 0.001, + "step": 295970 + }, + { + "epoch": 1.8983237822809005, + "grad_norm": 0.05861715227365494, + "learning_rate": 7.851345574986124e-08, + "loss": 0.002, + "step": 295980 + }, + { + "epoch": 1.8983879191746864, + "grad_norm": 0.017348650842905045, + "learning_rate": 7.841468854208334e-08, + "loss": 0.0005, + "step": 295990 + }, + { + "epoch": 1.8984520560684726, + "grad_norm": 0.03482039272785187, + "learning_rate": 7.831598300528653e-08, + "loss": 0.0011, + "step": 296000 + }, + { + "epoch": 1.8985161929622585, + "grad_norm": 0.005001869518309832, + "learning_rate": 7.821733914070816e-08, + "loss": 0.0009, + "step": 296010 + }, + { + "epoch": 1.8985803298560446, + "grad_norm": 0.14488859474658966, + "learning_rate": 7.811875694958448e-08, + "loss": 0.0025, + "step": 296020 + }, + { + "epoch": 1.8986444667498308, + "grad_norm": 0.09802429378032684, + "learning_rate": 7.802023643315005e-08, + "loss": 0.0012, + "step": 296030 + }, + { + "epoch": 1.898708603643617, + "grad_norm": 0.018383031710982323, + "learning_rate": 7.792177759263941e-08, + "loss": 0.001, + "step": 296040 + }, + { + "epoch": 1.898772740537403, + "grad_norm": 0.1427089124917984, + "learning_rate": 7.782338042928716e-08, + "loss": 0.0019, + "step": 296050 + }, + { + "epoch": 1.8988368774311892, + "grad_norm": 0.02857111766934395, + "learning_rate": 7.772504494432564e-08, + "loss": 0.0018, + "step": 296060 + }, + { + "epoch": 1.8989010143249754, + "grad_norm": 0.12065998464822769, + "learning_rate": 7.76267711389872e-08, + "loss": 0.0008, + "step": 296070 + }, + { + "epoch": 1.8989651512187613, + "grad_norm": 0.10027211904525757, + "learning_rate": 7.752855901450306e-08, + "loss": 0.0006, + "step": 296080 + }, + { + "epoch": 1.8990292881125475, + "grad_norm": 0.14700154960155487, + "learning_rate": 7.743040857210393e-08, + "loss": 0.0025, + "step": 296090 + }, + { + "epoch": 1.8990934250063334, + "grad_norm": 0.03631043806672096, + "learning_rate": 7.733231981302047e-08, + "loss": 0.001, + "step": 296100 + }, + { + "epoch": 1.8991575619001195, + "grad_norm": 0.05204037204384804, + "learning_rate": 7.72342927384806e-08, + "loss": 0.0016, + "step": 296110 + }, + { + "epoch": 1.8992216987939057, + "grad_norm": 0.02272013947367668, + "learning_rate": 7.713632734971388e-08, + "loss": 0.0006, + "step": 296120 + }, + { + "epoch": 1.8992858356876918, + "grad_norm": 0.06338615715503693, + "learning_rate": 7.703842364794711e-08, + "loss": 0.0014, + "step": 296130 + }, + { + "epoch": 1.899349972581478, + "grad_norm": 0.07160641252994537, + "learning_rate": 7.694058163440766e-08, + "loss": 0.001, + "step": 296140 + }, + { + "epoch": 1.8994141094752641, + "grad_norm": 0.03875984996557236, + "learning_rate": 7.684280131032062e-08, + "loss": 0.0007, + "step": 296150 + }, + { + "epoch": 1.89947824636905, + "grad_norm": 0.07607729732990265, + "learning_rate": 7.674508267691172e-08, + "loss": 0.0008, + "step": 296160 + }, + { + "epoch": 1.8995423832628362, + "grad_norm": 0.034141361713409424, + "learning_rate": 7.664742573540607e-08, + "loss": 0.0006, + "step": 296170 + }, + { + "epoch": 1.8996065201566223, + "grad_norm": 0.04295650124549866, + "learning_rate": 7.654983048702658e-08, + "loss": 0.0008, + "step": 296180 + }, + { + "epoch": 1.8996706570504083, + "grad_norm": 0.013392886146903038, + "learning_rate": 7.645229693299617e-08, + "loss": 0.0006, + "step": 296190 + }, + { + "epoch": 1.8997347939441944, + "grad_norm": 0.03318767994642258, + "learning_rate": 7.635482507453773e-08, + "loss": 0.0007, + "step": 296200 + }, + { + "epoch": 1.8997989308379806, + "grad_norm": 0.039401594549417496, + "learning_rate": 7.625741491287197e-08, + "loss": 0.0011, + "step": 296210 + }, + { + "epoch": 1.8998630677317667, + "grad_norm": 0.03686876967549324, + "learning_rate": 7.616006644922014e-08, + "loss": 0.0064, + "step": 296220 + }, + { + "epoch": 1.8999272046255529, + "grad_norm": 0.22167079150676727, + "learning_rate": 7.606277968480125e-08, + "loss": 0.0012, + "step": 296230 + }, + { + "epoch": 1.899991341519339, + "grad_norm": 0.036673545837402344, + "learning_rate": 7.596555462083488e-08, + "loss": 0.0008, + "step": 296240 + }, + { + "epoch": 1.900055478413125, + "grad_norm": 0.048822399228811264, + "learning_rate": 7.586839125853951e-08, + "loss": 0.0014, + "step": 296250 + }, + { + "epoch": 1.900119615306911, + "grad_norm": 0.23170608282089233, + "learning_rate": 7.577128959913193e-08, + "loss": 0.0014, + "step": 296260 + }, + { + "epoch": 1.900183752200697, + "grad_norm": 0.01074785366654396, + "learning_rate": 7.567424964382953e-08, + "loss": 0.0004, + "step": 296270 + }, + { + "epoch": 1.9002478890944832, + "grad_norm": 0.08027832210063934, + "learning_rate": 7.557727139384852e-08, + "loss": 0.0021, + "step": 296280 + }, + { + "epoch": 1.9003120259882693, + "grad_norm": 0.09156271070241928, + "learning_rate": 7.54803548504035e-08, + "loss": 0.0011, + "step": 296290 + }, + { + "epoch": 1.9003761628820555, + "grad_norm": 0.09900987148284912, + "learning_rate": 7.538350001470907e-08, + "loss": 0.0017, + "step": 296300 + }, + { + "epoch": 1.9004402997758416, + "grad_norm": 0.0034116050228476524, + "learning_rate": 7.528670688797868e-08, + "loss": 0.0009, + "step": 296310 + }, + { + "epoch": 1.9005044366696278, + "grad_norm": 0.04635313153266907, + "learning_rate": 7.518997547142637e-08, + "loss": 0.0006, + "step": 296320 + }, + { + "epoch": 1.900568573563414, + "grad_norm": 0.16556833684444427, + "learning_rate": 7.509330576626284e-08, + "loss": 0.0021, + "step": 296330 + }, + { + "epoch": 1.9006327104571998, + "grad_norm": 0.08494561910629272, + "learning_rate": 7.499669777369933e-08, + "loss": 0.0018, + "step": 296340 + }, + { + "epoch": 1.900696847350986, + "grad_norm": 0.21394364535808563, + "learning_rate": 7.490015149494823e-08, + "loss": 0.0033, + "step": 296350 + }, + { + "epoch": 1.900760984244772, + "grad_norm": 0.09006308764219284, + "learning_rate": 7.480366693121744e-08, + "loss": 0.0011, + "step": 296360 + }, + { + "epoch": 1.900825121138558, + "grad_norm": 0.044618889689445496, + "learning_rate": 7.47072440837171e-08, + "loss": 0.001, + "step": 296370 + }, + { + "epoch": 1.9008892580323442, + "grad_norm": 0.06901489198207855, + "learning_rate": 7.46108829536546e-08, + "loss": 0.0011, + "step": 296380 + }, + { + "epoch": 1.9009533949261304, + "grad_norm": 0.15160177648067474, + "learning_rate": 7.451458354223784e-08, + "loss": 0.0012, + "step": 296390 + }, + { + "epoch": 1.9010175318199165, + "grad_norm": 0.1087668314576149, + "learning_rate": 7.441834585067365e-08, + "loss": 0.0018, + "step": 296400 + }, + { + "epoch": 1.9010816687137027, + "grad_norm": 0.0020599712152034044, + "learning_rate": 7.432216988016771e-08, + "loss": 0.0008, + "step": 296410 + }, + { + "epoch": 1.9011458056074886, + "grad_norm": 0.07600867748260498, + "learning_rate": 7.422605563192575e-08, + "loss": 0.0005, + "step": 296420 + }, + { + "epoch": 1.9012099425012747, + "grad_norm": 0.03156861662864685, + "learning_rate": 7.41300031071518e-08, + "loss": 0.0021, + "step": 296430 + }, + { + "epoch": 1.9012740793950607, + "grad_norm": 0.05616842210292816, + "learning_rate": 7.403401230704876e-08, + "loss": 0.0009, + "step": 296440 + }, + { + "epoch": 1.9013382162888468, + "grad_norm": 0.036105792969465256, + "learning_rate": 7.39380832328207e-08, + "loss": 0.0005, + "step": 296450 + }, + { + "epoch": 1.901402353182633, + "grad_norm": 0.1208905577659607, + "learning_rate": 7.384221588566831e-08, + "loss": 0.0008, + "step": 296460 + }, + { + "epoch": 1.901466490076419, + "grad_norm": 0.10680118948221207, + "learning_rate": 7.374641026679396e-08, + "loss": 0.0006, + "step": 296470 + }, + { + "epoch": 1.9015306269702053, + "grad_norm": 0.1479857712984085, + "learning_rate": 7.365066637739837e-08, + "loss": 0.0013, + "step": 296480 + }, + { + "epoch": 1.9015947638639914, + "grad_norm": 0.13697151839733124, + "learning_rate": 7.355498421868001e-08, + "loss": 0.0011, + "step": 296490 + }, + { + "epoch": 1.9016589007577775, + "grad_norm": 0.1833760142326355, + "learning_rate": 7.345936379183904e-08, + "loss": 0.0009, + "step": 296500 + }, + { + "epoch": 1.9017230376515635, + "grad_norm": 0.012452002614736557, + "learning_rate": 7.336380509807284e-08, + "loss": 0.0004, + "step": 296510 + }, + { + "epoch": 1.9017871745453496, + "grad_norm": 0.06796858459711075, + "learning_rate": 7.326830813857933e-08, + "loss": 0.0015, + "step": 296520 + }, + { + "epoch": 1.9018513114391356, + "grad_norm": 0.04385437071323395, + "learning_rate": 7.317287291455478e-08, + "loss": 0.0012, + "step": 296530 + }, + { + "epoch": 1.9019154483329217, + "grad_norm": 0.05254632607102394, + "learning_rate": 7.3077499427196e-08, + "loss": 0.002, + "step": 296540 + }, + { + "epoch": 1.9019795852267078, + "grad_norm": 0.06706543266773224, + "learning_rate": 7.298218767769705e-08, + "loss": 0.0008, + "step": 296550 + }, + { + "epoch": 1.902043722120494, + "grad_norm": 0.024213319644331932, + "learning_rate": 7.288693766725253e-08, + "loss": 0.0011, + "step": 296560 + }, + { + "epoch": 1.9021078590142801, + "grad_norm": 0.06082941219210625, + "learning_rate": 7.279174939705536e-08, + "loss": 0.0021, + "step": 296570 + }, + { + "epoch": 1.9021719959080663, + "grad_norm": 0.10337240248918533, + "learning_rate": 7.26966228682996e-08, + "loss": 0.0013, + "step": 296580 + }, + { + "epoch": 1.9022361328018522, + "grad_norm": 0.2524295449256897, + "learning_rate": 7.260155808217706e-08, + "loss": 0.0011, + "step": 296590 + }, + { + "epoch": 1.9023002696956384, + "grad_norm": 0.05802197381854057, + "learning_rate": 7.250655503987792e-08, + "loss": 0.0007, + "step": 296600 + }, + { + "epoch": 1.9023644065894245, + "grad_norm": 0.06177850067615509, + "learning_rate": 7.241161374259343e-08, + "loss": 0.0011, + "step": 296610 + }, + { + "epoch": 1.9024285434832104, + "grad_norm": 0.1176663413643837, + "learning_rate": 7.23167341915132e-08, + "loss": 0.0006, + "step": 296620 + }, + { + "epoch": 1.9024926803769966, + "grad_norm": 0.12713593244552612, + "learning_rate": 7.222191638782572e-08, + "loss": 0.0011, + "step": 296630 + }, + { + "epoch": 1.9025568172707827, + "grad_norm": 0.006985232699662447, + "learning_rate": 7.212716033272005e-08, + "loss": 0.001, + "step": 296640 + }, + { + "epoch": 1.902620954164569, + "grad_norm": 0.12933291494846344, + "learning_rate": 7.203246602738245e-08, + "loss": 0.0012, + "step": 296650 + }, + { + "epoch": 1.902685091058355, + "grad_norm": 0.016291489824652672, + "learning_rate": 7.193783347300032e-08, + "loss": 0.0006, + "step": 296660 + }, + { + "epoch": 1.9027492279521412, + "grad_norm": 0.01926431432366371, + "learning_rate": 7.184326267075936e-08, + "loss": 0.001, + "step": 296670 + }, + { + "epoch": 1.9028133648459271, + "grad_norm": 0.024936731904745102, + "learning_rate": 7.174875362184363e-08, + "loss": 0.0013, + "step": 296680 + }, + { + "epoch": 1.9028775017397133, + "grad_norm": 0.0974646806716919, + "learning_rate": 7.165430632743886e-08, + "loss": 0.0021, + "step": 296690 + }, + { + "epoch": 1.9029416386334992, + "grad_norm": 0.08997154980897903, + "learning_rate": 7.155992078872742e-08, + "loss": 0.0003, + "step": 296700 + }, + { + "epoch": 1.9030057755272853, + "grad_norm": 0.08475396037101746, + "learning_rate": 7.146559700689337e-08, + "loss": 0.002, + "step": 296710 + }, + { + "epoch": 1.9030699124210715, + "grad_norm": 0.00956734549254179, + "learning_rate": 7.137133498311633e-08, + "loss": 0.002, + "step": 296720 + }, + { + "epoch": 1.9031340493148576, + "grad_norm": 0.04105890542268753, + "learning_rate": 7.127713471857977e-08, + "loss": 0.0011, + "step": 296730 + }, + { + "epoch": 1.9031981862086438, + "grad_norm": 0.04018397629261017, + "learning_rate": 7.118299621446334e-08, + "loss": 0.001, + "step": 296740 + }, + { + "epoch": 1.90326232310243, + "grad_norm": 0.19481207430362701, + "learning_rate": 7.108891947194662e-08, + "loss": 0.0012, + "step": 296750 + }, + { + "epoch": 1.903326459996216, + "grad_norm": 0.05149256810545921, + "learning_rate": 7.099490449220758e-08, + "loss": 0.001, + "step": 296760 + }, + { + "epoch": 1.903390596890002, + "grad_norm": 0.04618384689092636, + "learning_rate": 7.090095127642582e-08, + "loss": 0.0012, + "step": 296770 + }, + { + "epoch": 1.9034547337837882, + "grad_norm": 0.13499963283538818, + "learning_rate": 7.080705982577817e-08, + "loss": 0.0017, + "step": 296780 + }, + { + "epoch": 1.903518870677574, + "grad_norm": 0.07416088879108429, + "learning_rate": 7.071323014144039e-08, + "loss": 0.0014, + "step": 296790 + }, + { + "epoch": 1.9035830075713602, + "grad_norm": 0.041787635535001755, + "learning_rate": 7.061946222458871e-08, + "loss": 0.0014, + "step": 296800 + }, + { + "epoch": 1.9036471444651464, + "grad_norm": 0.13607001304626465, + "learning_rate": 7.052575607639833e-08, + "loss": 0.0012, + "step": 296810 + }, + { + "epoch": 1.9037112813589325, + "grad_norm": 0.0958867073059082, + "learning_rate": 7.043211169804332e-08, + "loss": 0.001, + "step": 296820 + }, + { + "epoch": 1.9037754182527187, + "grad_norm": 0.14104056358337402, + "learning_rate": 7.03385290906966e-08, + "loss": 0.0008, + "step": 296830 + }, + { + "epoch": 1.9038395551465048, + "grad_norm": 0.1130591556429863, + "learning_rate": 7.024500825553172e-08, + "loss": 0.0034, + "step": 296840 + }, + { + "epoch": 1.9039036920402908, + "grad_norm": 0.10772310197353363, + "learning_rate": 7.015154919371991e-08, + "loss": 0.0011, + "step": 296850 + }, + { + "epoch": 1.903967828934077, + "grad_norm": 0.13072291016578674, + "learning_rate": 7.00581519064325e-08, + "loss": 0.001, + "step": 296860 + }, + { + "epoch": 1.9040319658278628, + "grad_norm": 0.050700295716524124, + "learning_rate": 6.996481639483966e-08, + "loss": 0.0011, + "step": 296870 + }, + { + "epoch": 1.904096102721649, + "grad_norm": 0.026685891672968864, + "learning_rate": 6.987154266011154e-08, + "loss": 0.0012, + "step": 296880 + }, + { + "epoch": 1.9041602396154351, + "grad_norm": 0.007372669875621796, + "learning_rate": 6.977833070341667e-08, + "loss": 0.002, + "step": 296890 + }, + { + "epoch": 1.9042243765092213, + "grad_norm": 0.16585969924926758, + "learning_rate": 6.968518052592244e-08, + "loss": 0.0037, + "step": 296900 + }, + { + "epoch": 1.9042885134030074, + "grad_norm": 0.011847295798361301, + "learning_rate": 6.959209212879625e-08, + "loss": 0.0007, + "step": 296910 + }, + { + "epoch": 1.9043526502967936, + "grad_norm": 0.05392090231180191, + "learning_rate": 6.949906551320551e-08, + "loss": 0.0007, + "step": 296920 + }, + { + "epoch": 1.9044167871905797, + "grad_norm": 0.04062240198254585, + "learning_rate": 6.940610068031484e-08, + "loss": 0.0032, + "step": 296930 + }, + { + "epoch": 1.9044809240843656, + "grad_norm": 0.07179093360900879, + "learning_rate": 6.931319763128997e-08, + "loss": 0.0015, + "step": 296940 + }, + { + "epoch": 1.9045450609781518, + "grad_norm": 0.18170824646949768, + "learning_rate": 6.922035636729441e-08, + "loss": 0.0013, + "step": 296950 + }, + { + "epoch": 1.9046091978719377, + "grad_norm": 0.13805805146694183, + "learning_rate": 6.912757688949167e-08, + "loss": 0.001, + "step": 296960 + }, + { + "epoch": 1.9046733347657239, + "grad_norm": 0.08730573207139969, + "learning_rate": 6.903485919904474e-08, + "loss": 0.0014, + "step": 296970 + }, + { + "epoch": 1.90473747165951, + "grad_norm": 0.11466261744499207, + "learning_rate": 6.894220329711432e-08, + "loss": 0.001, + "step": 296980 + }, + { + "epoch": 1.9048016085532962, + "grad_norm": 0.056746866554021835, + "learning_rate": 6.884960918486339e-08, + "loss": 0.0007, + "step": 296990 + }, + { + "epoch": 1.9048657454470823, + "grad_norm": 0.06870730966329575, + "learning_rate": 6.875707686345045e-08, + "loss": 0.0007, + "step": 297000 + }, + { + "epoch": 1.9049298823408685, + "grad_norm": 0.12024735659360886, + "learning_rate": 6.866460633403571e-08, + "loss": 0.0007, + "step": 297010 + }, + { + "epoch": 1.9049940192346546, + "grad_norm": 0.0668744146823883, + "learning_rate": 6.857219759777767e-08, + "loss": 0.0011, + "step": 297020 + }, + { + "epoch": 1.9050581561284405, + "grad_norm": 0.15155884623527527, + "learning_rate": 6.847985065583485e-08, + "loss": 0.0012, + "step": 297030 + }, + { + "epoch": 1.9051222930222267, + "grad_norm": 0.0582219660282135, + "learning_rate": 6.83875655093641e-08, + "loss": 0.0005, + "step": 297040 + }, + { + "epoch": 1.9051864299160126, + "grad_norm": 0.08068463206291199, + "learning_rate": 6.829534215952116e-08, + "loss": 0.0008, + "step": 297050 + }, + { + "epoch": 1.9052505668097988, + "grad_norm": 0.08450084924697876, + "learning_rate": 6.820318060746234e-08, + "loss": 0.0014, + "step": 297060 + }, + { + "epoch": 1.905314703703585, + "grad_norm": 0.0036552983801811934, + "learning_rate": 6.811108085434282e-08, + "loss": 0.0004, + "step": 297070 + }, + { + "epoch": 1.905378840597371, + "grad_norm": 0.02461552433669567, + "learning_rate": 6.801904290131556e-08, + "loss": 0.0025, + "step": 297080 + }, + { + "epoch": 1.9054429774911572, + "grad_norm": 0.0689999908208847, + "learning_rate": 6.792706674953519e-08, + "loss": 0.0011, + "step": 297090 + }, + { + "epoch": 1.9055071143849434, + "grad_norm": 0.15311597287654877, + "learning_rate": 6.783515240015304e-08, + "loss": 0.0014, + "step": 297100 + }, + { + "epoch": 1.9055712512787293, + "grad_norm": 0.05924878641963005, + "learning_rate": 6.774329985432149e-08, + "loss": 0.0018, + "step": 297110 + }, + { + "epoch": 1.9056353881725154, + "grad_norm": 0.06728945672512054, + "learning_rate": 6.765150911319185e-08, + "loss": 0.001, + "step": 297120 + }, + { + "epoch": 1.9056995250663014, + "grad_norm": 0.04347103834152222, + "learning_rate": 6.755978017791321e-08, + "loss": 0.0008, + "step": 297130 + }, + { + "epoch": 1.9057636619600875, + "grad_norm": 0.07253609597682953, + "learning_rate": 6.74681130496363e-08, + "loss": 0.0008, + "step": 297140 + }, + { + "epoch": 1.9058277988538737, + "grad_norm": 0.035815589129924774, + "learning_rate": 6.73765077295091e-08, + "loss": 0.0009, + "step": 297150 + }, + { + "epoch": 1.9058919357476598, + "grad_norm": 0.11661411821842194, + "learning_rate": 6.728496421867902e-08, + "loss": 0.0009, + "step": 297160 + }, + { + "epoch": 1.905956072641446, + "grad_norm": 0.10119163990020752, + "learning_rate": 6.719348251829405e-08, + "loss": 0.0007, + "step": 297170 + }, + { + "epoch": 1.906020209535232, + "grad_norm": 0.14704670011997223, + "learning_rate": 6.710206262950047e-08, + "loss": 0.0006, + "step": 297180 + }, + { + "epoch": 1.9060843464290183, + "grad_norm": 0.03064277581870556, + "learning_rate": 6.701070455344294e-08, + "loss": 0.0008, + "step": 297190 + }, + { + "epoch": 1.9061484833228042, + "grad_norm": 0.01659799925982952, + "learning_rate": 6.69194082912672e-08, + "loss": 0.0006, + "step": 297200 + }, + { + "epoch": 1.9062126202165903, + "grad_norm": 0.1433970034122467, + "learning_rate": 6.682817384411677e-08, + "loss": 0.0008, + "step": 297210 + }, + { + "epoch": 1.9062767571103763, + "grad_norm": 0.028640659525990486, + "learning_rate": 6.673700121313464e-08, + "loss": 0.0008, + "step": 297220 + }, + { + "epoch": 1.9063408940041624, + "grad_norm": 0.060921695083379745, + "learning_rate": 6.664589039946434e-08, + "loss": 0.0007, + "step": 297230 + }, + { + "epoch": 1.9064050308979485, + "grad_norm": 0.08914317190647125, + "learning_rate": 6.655484140424661e-08, + "loss": 0.0007, + "step": 297240 + }, + { + "epoch": 1.9064691677917347, + "grad_norm": 0.03564678505063057, + "learning_rate": 6.646385422862223e-08, + "loss": 0.0004, + "step": 297250 + }, + { + "epoch": 1.9065333046855208, + "grad_norm": 0.05976053699851036, + "learning_rate": 6.637292887373248e-08, + "loss": 0.0008, + "step": 297260 + }, + { + "epoch": 1.906597441579307, + "grad_norm": 0.13643500208854675, + "learning_rate": 6.628206534071535e-08, + "loss": 0.0016, + "step": 297270 + }, + { + "epoch": 1.906661578473093, + "grad_norm": 0.040523845702409744, + "learning_rate": 6.619126363071049e-08, + "loss": 0.001, + "step": 297280 + }, + { + "epoch": 1.906725715366879, + "grad_norm": 0.05647537112236023, + "learning_rate": 6.61005237448542e-08, + "loss": 0.001, + "step": 297290 + }, + { + "epoch": 1.9067898522606652, + "grad_norm": 0.002782865660265088, + "learning_rate": 6.600984568428559e-08, + "loss": 0.0016, + "step": 297300 + }, + { + "epoch": 1.9068539891544511, + "grad_norm": 0.004738634917885065, + "learning_rate": 6.591922945013984e-08, + "loss": 0.0014, + "step": 297310 + }, + { + "epoch": 1.9069181260482373, + "grad_norm": 0.04857382923364639, + "learning_rate": 6.582867504355217e-08, + "loss": 0.0014, + "step": 297320 + }, + { + "epoch": 1.9069822629420234, + "grad_norm": 0.00765692163258791, + "learning_rate": 6.573818246565722e-08, + "loss": 0.0015, + "step": 297330 + }, + { + "epoch": 1.9070463998358096, + "grad_norm": 0.016255423426628113, + "learning_rate": 6.564775171758964e-08, + "loss": 0.0009, + "step": 297340 + }, + { + "epoch": 1.9071105367295957, + "grad_norm": 0.12796856462955475, + "learning_rate": 6.55573828004824e-08, + "loss": 0.0009, + "step": 297350 + }, + { + "epoch": 1.9071746736233819, + "grad_norm": 0.01191774196922779, + "learning_rate": 6.546707571546796e-08, + "loss": 0.0012, + "step": 297360 + }, + { + "epoch": 1.9072388105171678, + "grad_norm": 0.08716825395822525, + "learning_rate": 6.537683046367704e-08, + "loss": 0.0015, + "step": 297370 + }, + { + "epoch": 1.907302947410954, + "grad_norm": 0.18957430124282837, + "learning_rate": 6.528664704624155e-08, + "loss": 0.0012, + "step": 297380 + }, + { + "epoch": 1.90736708430474, + "grad_norm": 0.11020193248987198, + "learning_rate": 6.519652546429167e-08, + "loss": 0.0011, + "step": 297390 + }, + { + "epoch": 1.907431221198526, + "grad_norm": 0.02139066718518734, + "learning_rate": 6.51064657189554e-08, + "loss": 0.0008, + "step": 297400 + }, + { + "epoch": 1.9074953580923122, + "grad_norm": 0.0030964526813477278, + "learning_rate": 6.50164678113624e-08, + "loss": 0.0004, + "step": 297410 + }, + { + "epoch": 1.9075594949860983, + "grad_norm": 0.024820616468787193, + "learning_rate": 6.492653174263951e-08, + "loss": 0.002, + "step": 297420 + }, + { + "epoch": 1.9076236318798845, + "grad_norm": 0.04223772883415222, + "learning_rate": 6.483665751391477e-08, + "loss": 0.0019, + "step": 297430 + }, + { + "epoch": 1.9076877687736706, + "grad_norm": 0.0062083834782242775, + "learning_rate": 6.47468451263139e-08, + "loss": 0.0009, + "step": 297440 + }, + { + "epoch": 1.9077519056674568, + "grad_norm": 0.08388002961874008, + "learning_rate": 6.465709458096214e-08, + "loss": 0.0023, + "step": 297450 + }, + { + "epoch": 1.9078160425612427, + "grad_norm": 0.037969715893268585, + "learning_rate": 6.456740587898414e-08, + "loss": 0.0017, + "step": 297460 + }, + { + "epoch": 1.9078801794550289, + "grad_norm": 0.1530952900648117, + "learning_rate": 6.447777902150398e-08, + "loss": 0.0021, + "step": 297470 + }, + { + "epoch": 1.9079443163488148, + "grad_norm": 0.09825599193572998, + "learning_rate": 6.438821400964412e-08, + "loss": 0.0009, + "step": 297480 + }, + { + "epoch": 1.908008453242601, + "grad_norm": 0.09959319233894348, + "learning_rate": 6.429871084452755e-08, + "loss": 0.001, + "step": 297490 + }, + { + "epoch": 1.908072590136387, + "grad_norm": 0.1308680921792984, + "learning_rate": 6.420926952727613e-08, + "loss": 0.0016, + "step": 297500 + }, + { + "epoch": 1.9081367270301732, + "grad_norm": 0.07021002471446991, + "learning_rate": 6.411989005900954e-08, + "loss": 0.0008, + "step": 297510 + }, + { + "epoch": 1.9082008639239594, + "grad_norm": 0.0220838263630867, + "learning_rate": 6.403057244084854e-08, + "loss": 0.0012, + "step": 297520 + }, + { + "epoch": 1.9082650008177455, + "grad_norm": 0.10355224460363388, + "learning_rate": 6.394131667391224e-08, + "loss": 0.0007, + "step": 297530 + }, + { + "epoch": 1.9083291377115315, + "grad_norm": 0.011518482118844986, + "learning_rate": 6.385212275931862e-08, + "loss": 0.0009, + "step": 297540 + }, + { + "epoch": 1.9083932746053176, + "grad_norm": 0.1443081796169281, + "learning_rate": 6.376299069818626e-08, + "loss": 0.0006, + "step": 297550 + }, + { + "epoch": 1.9084574114991035, + "grad_norm": 0.07212929427623749, + "learning_rate": 6.36739204916309e-08, + "loss": 0.0009, + "step": 297560 + }, + { + "epoch": 1.9085215483928897, + "grad_norm": 0.06594572961330414, + "learning_rate": 6.358491214077e-08, + "loss": 0.0008, + "step": 297570 + }, + { + "epoch": 1.9085856852866758, + "grad_norm": 0.03384760767221451, + "learning_rate": 6.349596564671767e-08, + "loss": 0.0009, + "step": 297580 + }, + { + "epoch": 1.908649822180462, + "grad_norm": 0.030025290325284004, + "learning_rate": 6.340708101058913e-08, + "loss": 0.0014, + "step": 297590 + }, + { + "epoch": 1.9087139590742481, + "grad_norm": 0.08829595893621445, + "learning_rate": 6.331825823349791e-08, + "loss": 0.001, + "step": 297600 + }, + { + "epoch": 1.9087780959680343, + "grad_norm": 0.05397048965096474, + "learning_rate": 6.322949731655704e-08, + "loss": 0.0006, + "step": 297610 + }, + { + "epoch": 1.9088422328618204, + "grad_norm": 0.14397847652435303, + "learning_rate": 6.314079826087894e-08, + "loss": 0.001, + "step": 297620 + }, + { + "epoch": 1.9089063697556063, + "grad_norm": 0.2537747621536255, + "learning_rate": 6.305216106757494e-08, + "loss": 0.0019, + "step": 297630 + }, + { + "epoch": 1.9089705066493925, + "grad_norm": 0.09246334433555603, + "learning_rate": 6.296358573775585e-08, + "loss": 0.0009, + "step": 297640 + }, + { + "epoch": 1.9090346435431784, + "grad_norm": 0.006885716691613197, + "learning_rate": 6.287507227253131e-08, + "loss": 0.0003, + "step": 297650 + }, + { + "epoch": 1.9090987804369646, + "grad_norm": 0.06697414070367813, + "learning_rate": 6.278662067301044e-08, + "loss": 0.0009, + "step": 297660 + }, + { + "epoch": 1.9091629173307507, + "grad_norm": 0.10258012264966965, + "learning_rate": 6.269823094030181e-08, + "loss": 0.0009, + "step": 297670 + }, + { + "epoch": 1.9092270542245369, + "grad_norm": 0.08741557598114014, + "learning_rate": 6.26099030755134e-08, + "loss": 0.0012, + "step": 297680 + }, + { + "epoch": 1.909291191118323, + "grad_norm": 0.07768207788467407, + "learning_rate": 6.252163707975157e-08, + "loss": 0.0008, + "step": 297690 + }, + { + "epoch": 1.9093553280121092, + "grad_norm": 0.1813802868127823, + "learning_rate": 6.243343295412207e-08, + "loss": 0.0013, + "step": 297700 + }, + { + "epoch": 1.909419464905895, + "grad_norm": 0.07702813297510147, + "learning_rate": 6.234529069973071e-08, + "loss": 0.0014, + "step": 297710 + }, + { + "epoch": 1.9094836017996812, + "grad_norm": 0.1788177788257599, + "learning_rate": 6.225721031768162e-08, + "loss": 0.0014, + "step": 297720 + }, + { + "epoch": 1.9095477386934674, + "grad_norm": 0.03819502145051956, + "learning_rate": 6.216919180907888e-08, + "loss": 0.0005, + "step": 297730 + }, + { + "epoch": 1.9096118755872533, + "grad_norm": 0.07616864889860153, + "learning_rate": 6.208123517502441e-08, + "loss": 0.0008, + "step": 297740 + }, + { + "epoch": 1.9096760124810395, + "grad_norm": 0.09854179620742798, + "learning_rate": 6.199334041662175e-08, + "loss": 0.0008, + "step": 297750 + }, + { + "epoch": 1.9097401493748256, + "grad_norm": 0.1113155409693718, + "learning_rate": 6.190550753497171e-08, + "loss": 0.0013, + "step": 297760 + }, + { + "epoch": 1.9098042862686118, + "grad_norm": 0.03208629786968231, + "learning_rate": 6.181773653117506e-08, + "loss": 0.0004, + "step": 297770 + }, + { + "epoch": 1.909868423162398, + "grad_norm": 0.03522845730185509, + "learning_rate": 6.173002740633095e-08, + "loss": 0.0007, + "step": 297780 + }, + { + "epoch": 1.909932560056184, + "grad_norm": 0.1295401155948639, + "learning_rate": 6.164238016153901e-08, + "loss": 0.0005, + "step": 297790 + }, + { + "epoch": 1.90999669694997, + "grad_norm": 0.04841848090291023, + "learning_rate": 6.155479479789783e-08, + "loss": 0.0013, + "step": 297800 + }, + { + "epoch": 1.9100608338437561, + "grad_norm": 0.10158253461122513, + "learning_rate": 6.146727131650432e-08, + "loss": 0.0014, + "step": 297810 + }, + { + "epoch": 1.910124970737542, + "grad_norm": 0.024595726281404495, + "learning_rate": 6.137980971845536e-08, + "loss": 0.0016, + "step": 297820 + }, + { + "epoch": 1.9101891076313282, + "grad_norm": 0.044377777725458145, + "learning_rate": 6.129241000484676e-08, + "loss": 0.0013, + "step": 297830 + }, + { + "epoch": 1.9102532445251144, + "grad_norm": 0.13128231465816498, + "learning_rate": 6.120507217677429e-08, + "loss": 0.0011, + "step": 297840 + }, + { + "epoch": 1.9103173814189005, + "grad_norm": 0.029137341305613518, + "learning_rate": 6.111779623533154e-08, + "loss": 0.0007, + "step": 297850 + }, + { + "epoch": 1.9103815183126867, + "grad_norm": 0.043349724262952805, + "learning_rate": 6.103058218161262e-08, + "loss": 0.0014, + "step": 297860 + }, + { + "epoch": 1.9104456552064728, + "grad_norm": 0.0957290530204773, + "learning_rate": 6.094343001671055e-08, + "loss": 0.0012, + "step": 297870 + }, + { + "epoch": 1.910509792100259, + "grad_norm": 0.149446502327919, + "learning_rate": 6.085633974171778e-08, + "loss": 0.0019, + "step": 297880 + }, + { + "epoch": 1.9105739289940449, + "grad_norm": 0.032342396676540375, + "learning_rate": 6.076931135772402e-08, + "loss": 0.0014, + "step": 297890 + }, + { + "epoch": 1.910638065887831, + "grad_norm": 0.04412601888179779, + "learning_rate": 6.068234486582113e-08, + "loss": 0.0007, + "step": 297900 + }, + { + "epoch": 1.910702202781617, + "grad_norm": 0.04877545312047005, + "learning_rate": 6.059544026709885e-08, + "loss": 0.001, + "step": 297910 + }, + { + "epoch": 1.910766339675403, + "grad_norm": 0.08056735247373581, + "learning_rate": 6.050859756264571e-08, + "loss": 0.0007, + "step": 297920 + }, + { + "epoch": 1.9108304765691893, + "grad_norm": 0.043473027646541595, + "learning_rate": 6.042181675354975e-08, + "loss": 0.0007, + "step": 297930 + }, + { + "epoch": 1.9108946134629754, + "grad_norm": 0.011658878065645695, + "learning_rate": 6.033509784089897e-08, + "loss": 0.0006, + "step": 297940 + }, + { + "epoch": 1.9109587503567615, + "grad_norm": 0.030716655775904655, + "learning_rate": 6.024844082577919e-08, + "loss": 0.0021, + "step": 297950 + }, + { + "epoch": 1.9110228872505477, + "grad_norm": 0.002981361001729965, + "learning_rate": 6.01618457092773e-08, + "loss": 0.0014, + "step": 297960 + }, + { + "epoch": 1.9110870241443336, + "grad_norm": 0.008524656295776367, + "learning_rate": 6.007531249247744e-08, + "loss": 0.0023, + "step": 297970 + }, + { + "epoch": 1.9111511610381198, + "grad_norm": 0.03623645007610321, + "learning_rate": 5.998884117646542e-08, + "loss": 0.0014, + "step": 297980 + }, + { + "epoch": 1.9112152979319057, + "grad_norm": 0.13329999148845673, + "learning_rate": 5.990243176232313e-08, + "loss": 0.001, + "step": 297990 + }, + { + "epoch": 1.9112794348256918, + "grad_norm": 0.15252041816711426, + "learning_rate": 5.981608425113416e-08, + "loss": 0.0011, + "step": 298000 + }, + { + "epoch": 1.911343571719478, + "grad_norm": 0.07557599246501923, + "learning_rate": 5.972979864397988e-08, + "loss": 0.0007, + "step": 298010 + }, + { + "epoch": 1.9114077086132641, + "grad_norm": 0.032776590436697006, + "learning_rate": 5.964357494194273e-08, + "loss": 0.0012, + "step": 298020 + }, + { + "epoch": 1.9114718455070503, + "grad_norm": 0.16524513065814972, + "learning_rate": 5.9557413146102415e-08, + "loss": 0.0033, + "step": 298030 + }, + { + "epoch": 1.9115359824008364, + "grad_norm": 0.13968272507190704, + "learning_rate": 5.947131325753808e-08, + "loss": 0.0017, + "step": 298040 + }, + { + "epoch": 1.9116001192946226, + "grad_norm": 0.07123354822397232, + "learning_rate": 5.9385275277329404e-08, + "loss": 0.0015, + "step": 298050 + }, + { + "epoch": 1.9116642561884085, + "grad_norm": 0.17939843237400055, + "learning_rate": 5.929929920655386e-08, + "loss": 0.001, + "step": 298060 + }, + { + "epoch": 1.9117283930821947, + "grad_norm": 0.09192238003015518, + "learning_rate": 5.921338504629004e-08, + "loss": 0.0008, + "step": 298070 + }, + { + "epoch": 1.9117925299759806, + "grad_norm": 0.07744672894477844, + "learning_rate": 5.912753279761263e-08, + "loss": 0.0012, + "step": 298080 + }, + { + "epoch": 1.9118566668697667, + "grad_norm": 0.10017392039299011, + "learning_rate": 5.9041742461599105e-08, + "loss": 0.001, + "step": 298090 + }, + { + "epoch": 1.911920803763553, + "grad_norm": 0.0025806869380176067, + "learning_rate": 5.8956014039323604e-08, + "loss": 0.0007, + "step": 298100 + }, + { + "epoch": 1.911984940657339, + "grad_norm": 0.04378533735871315, + "learning_rate": 5.887034753186027e-08, + "loss": 0.0009, + "step": 298110 + }, + { + "epoch": 1.9120490775511252, + "grad_norm": 0.06870028376579285, + "learning_rate": 5.878474294028269e-08, + "loss": 0.001, + "step": 298120 + }, + { + "epoch": 1.9121132144449113, + "grad_norm": 0.08500008285045624, + "learning_rate": 5.8699200265664445e-08, + "loss": 0.0008, + "step": 298130 + }, + { + "epoch": 1.9121773513386973, + "grad_norm": 0.03247741609811783, + "learning_rate": 5.861371950907635e-08, + "loss": 0.0012, + "step": 298140 + }, + { + "epoch": 1.9122414882324834, + "grad_norm": 0.007751672528684139, + "learning_rate": 5.8528300671590324e-08, + "loss": 0.0008, + "step": 298150 + }, + { + "epoch": 1.9123056251262696, + "grad_norm": 0.054278917610645294, + "learning_rate": 5.844294375427551e-08, + "loss": 0.0006, + "step": 298160 + }, + { + "epoch": 1.9123697620200555, + "grad_norm": 0.056611429899930954, + "learning_rate": 5.835764875820271e-08, + "loss": 0.0015, + "step": 298170 + }, + { + "epoch": 1.9124338989138416, + "grad_norm": 0.1559065580368042, + "learning_rate": 5.827241568444053e-08, + "loss": 0.0011, + "step": 298180 + }, + { + "epoch": 1.9124980358076278, + "grad_norm": 0.006949694361537695, + "learning_rate": 5.818724453405644e-08, + "loss": 0.0008, + "step": 298190 + }, + { + "epoch": 1.912562172701414, + "grad_norm": 0.08792159706354141, + "learning_rate": 5.810213530811792e-08, + "loss": 0.0016, + "step": 298200 + }, + { + "epoch": 1.9126263095952, + "grad_norm": 0.13154152035713196, + "learning_rate": 5.8017088007691904e-08, + "loss": 0.0006, + "step": 298210 + }, + { + "epoch": 1.9126904464889862, + "grad_norm": 0.1045413538813591, + "learning_rate": 5.7932102633843633e-08, + "loss": 0.0017, + "step": 298220 + }, + { + "epoch": 1.9127545833827722, + "grad_norm": 0.15327368676662445, + "learning_rate": 5.7847179187637825e-08, + "loss": 0.0008, + "step": 298230 + }, + { + "epoch": 1.9128187202765583, + "grad_norm": 0.015088552609086037, + "learning_rate": 5.7762317670139734e-08, + "loss": 0.0006, + "step": 298240 + }, + { + "epoch": 1.9128828571703442, + "grad_norm": 0.08311817795038223, + "learning_rate": 5.767751808241129e-08, + "loss": 0.0011, + "step": 298250 + }, + { + "epoch": 1.9129469940641304, + "grad_norm": 0.02838069386780262, + "learning_rate": 5.759278042551664e-08, + "loss": 0.0004, + "step": 298260 + }, + { + "epoch": 1.9130111309579165, + "grad_norm": 0.045654039829969406, + "learning_rate": 5.7508104700515486e-08, + "loss": 0.0006, + "step": 298270 + }, + { + "epoch": 1.9130752678517027, + "grad_norm": 0.08993151038885117, + "learning_rate": 5.742349090847088e-08, + "loss": 0.0005, + "step": 298280 + }, + { + "epoch": 1.9131394047454888, + "grad_norm": 0.013555367477238178, + "learning_rate": 5.7338939050442524e-08, + "loss": 0.0007, + "step": 298290 + }, + { + "epoch": 1.913203541639275, + "grad_norm": 0.0012622076319530606, + "learning_rate": 5.725444912748956e-08, + "loss": 0.0007, + "step": 298300 + }, + { + "epoch": 1.9132676785330611, + "grad_norm": 0.07890114933252335, + "learning_rate": 5.717002114067005e-08, + "loss": 0.0006, + "step": 298310 + }, + { + "epoch": 1.913331815426847, + "grad_norm": 0.09559661149978638, + "learning_rate": 5.708565509104369e-08, + "loss": 0.0013, + "step": 298320 + }, + { + "epoch": 1.9133959523206332, + "grad_norm": 0.36704665422439575, + "learning_rate": 5.700135097966686e-08, + "loss": 0.0031, + "step": 298330 + }, + { + "epoch": 1.9134600892144191, + "grad_norm": 0.109199658036232, + "learning_rate": 5.691710880759538e-08, + "loss": 0.0008, + "step": 298340 + }, + { + "epoch": 1.9135242261082053, + "grad_norm": 0.0839865654706955, + "learning_rate": 5.683292857588507e-08, + "loss": 0.0013, + "step": 298350 + }, + { + "epoch": 1.9135883630019914, + "grad_norm": 0.13686271011829376, + "learning_rate": 5.674881028559121e-08, + "loss": 0.0008, + "step": 298360 + }, + { + "epoch": 1.9136524998957776, + "grad_norm": 0.17503884434700012, + "learning_rate": 5.666475393776793e-08, + "loss": 0.0012, + "step": 298370 + }, + { + "epoch": 1.9137166367895637, + "grad_norm": 0.023506227880716324, + "learning_rate": 5.658075953346776e-08, + "loss": 0.0012, + "step": 298380 + }, + { + "epoch": 1.9137807736833499, + "grad_norm": 0.007835052907466888, + "learning_rate": 5.649682707374371e-08, + "loss": 0.0024, + "step": 298390 + }, + { + "epoch": 1.9138449105771358, + "grad_norm": 0.017951978370547295, + "learning_rate": 5.6412956559647734e-08, + "loss": 0.0008, + "step": 298400 + }, + { + "epoch": 1.913909047470922, + "grad_norm": 0.013206799514591694, + "learning_rate": 5.632914799223066e-08, + "loss": 0.001, + "step": 298410 + }, + { + "epoch": 1.9139731843647079, + "grad_norm": 0.09789872169494629, + "learning_rate": 5.6245401372542193e-08, + "loss": 0.0016, + "step": 298420 + }, + { + "epoch": 1.914037321258494, + "grad_norm": 0.08239579200744629, + "learning_rate": 5.616171670163262e-08, + "loss": 0.0008, + "step": 298430 + }, + { + "epoch": 1.9141014581522802, + "grad_norm": 0.21788769960403442, + "learning_rate": 5.607809398054998e-08, + "loss": 0.0007, + "step": 298440 + }, + { + "epoch": 1.9141655950460663, + "grad_norm": 0.05247426778078079, + "learning_rate": 5.599453321034176e-08, + "loss": 0.002, + "step": 298450 + }, + { + "epoch": 1.9142297319398525, + "grad_norm": 0.1064145565032959, + "learning_rate": 5.5911034392055474e-08, + "loss": 0.0013, + "step": 298460 + }, + { + "epoch": 1.9142938688336386, + "grad_norm": 0.05088728293776512, + "learning_rate": 5.58275975267375e-08, + "loss": 0.0008, + "step": 298470 + }, + { + "epoch": 1.9143580057274248, + "grad_norm": 0.04615416377782822, + "learning_rate": 5.574422261543366e-08, + "loss": 0.0025, + "step": 298480 + }, + { + "epoch": 1.9144221426212107, + "grad_norm": 0.1170719638466835, + "learning_rate": 5.566090965918814e-08, + "loss": 0.0013, + "step": 298490 + }, + { + "epoch": 1.9144862795149968, + "grad_norm": 0.1538655310869217, + "learning_rate": 5.557765865904452e-08, + "loss": 0.0007, + "step": 298500 + }, + { + "epoch": 1.9145504164087828, + "grad_norm": 0.02719215862452984, + "learning_rate": 5.5494469616046984e-08, + "loss": 0.0006, + "step": 298510 + }, + { + "epoch": 1.914614553302569, + "grad_norm": 0.008994230069220066, + "learning_rate": 5.541134253123748e-08, + "loss": 0.0013, + "step": 298520 + }, + { + "epoch": 1.914678690196355, + "grad_norm": 0.013056914322078228, + "learning_rate": 5.532827740565794e-08, + "loss": 0.0009, + "step": 298530 + }, + { + "epoch": 1.9147428270901412, + "grad_norm": 0.07613728940486908, + "learning_rate": 5.524527424034865e-08, + "loss": 0.0009, + "step": 298540 + }, + { + "epoch": 1.9148069639839274, + "grad_norm": 0.019116273149847984, + "learning_rate": 5.516233303635044e-08, + "loss": 0.0008, + "step": 298550 + }, + { + "epoch": 1.9148711008777135, + "grad_norm": 0.12364184856414795, + "learning_rate": 5.5079453794701924e-08, + "loss": 0.0012, + "step": 298560 + }, + { + "epoch": 1.9149352377714997, + "grad_norm": 0.05676203966140747, + "learning_rate": 5.4996636516441715e-08, + "loss": 0.0006, + "step": 298570 + }, + { + "epoch": 1.9149993746652856, + "grad_norm": 0.09833601117134094, + "learning_rate": 5.491388120260843e-08, + "loss": 0.0007, + "step": 298580 + }, + { + "epoch": 1.9150635115590717, + "grad_norm": 0.0450495220720768, + "learning_rate": 5.483118785423791e-08, + "loss": 0.001, + "step": 298590 + }, + { + "epoch": 1.9151276484528577, + "grad_norm": 0.050660811364650726, + "learning_rate": 5.47485564723671e-08, + "loss": 0.0007, + "step": 298600 + }, + { + "epoch": 1.9151917853466438, + "grad_norm": 0.245155930519104, + "learning_rate": 5.4665987058030724e-08, + "loss": 0.0009, + "step": 298610 + }, + { + "epoch": 1.91525592224043, + "grad_norm": 0.07262758165597916, + "learning_rate": 5.458347961226462e-08, + "loss": 0.0008, + "step": 298620 + }, + { + "epoch": 1.915320059134216, + "grad_norm": 0.005807352717965841, + "learning_rate": 5.450103413610186e-08, + "loss": 0.0008, + "step": 298630 + }, + { + "epoch": 1.9153841960280023, + "grad_norm": 0.0009603967191651464, + "learning_rate": 5.441865063057494e-08, + "loss": 0.0007, + "step": 298640 + }, + { + "epoch": 1.9154483329217884, + "grad_norm": 0.06040569022297859, + "learning_rate": 5.433632909671749e-08, + "loss": 0.0012, + "step": 298650 + }, + { + "epoch": 1.9155124698155743, + "grad_norm": 0.039267648011446, + "learning_rate": 5.425406953556034e-08, + "loss": 0.0012, + "step": 298660 + }, + { + "epoch": 1.9155766067093605, + "grad_norm": 0.09607995301485062, + "learning_rate": 5.4171871948134335e-08, + "loss": 0.002, + "step": 298670 + }, + { + "epoch": 1.9156407436031464, + "grad_norm": 0.08390876650810242, + "learning_rate": 5.408973633546921e-08, + "loss": 0.0011, + "step": 298680 + }, + { + "epoch": 1.9157048804969325, + "grad_norm": 0.056424278765916824, + "learning_rate": 5.40076626985947e-08, + "loss": 0.0008, + "step": 298690 + }, + { + "epoch": 1.9157690173907187, + "grad_norm": 0.048826076090335846, + "learning_rate": 5.392565103853942e-08, + "loss": 0.0007, + "step": 298700 + }, + { + "epoch": 1.9158331542845048, + "grad_norm": 0.03236357867717743, + "learning_rate": 5.3843701356330326e-08, + "loss": 0.0046, + "step": 298710 + }, + { + "epoch": 1.915897291178291, + "grad_norm": 0.0056189983151853085, + "learning_rate": 5.3761813652994374e-08, + "loss": 0.0007, + "step": 298720 + }, + { + "epoch": 1.9159614280720771, + "grad_norm": 0.0127511415630579, + "learning_rate": 5.367998792955798e-08, + "loss": 0.0006, + "step": 298730 + }, + { + "epoch": 1.9160255649658633, + "grad_norm": 0.12237073481082916, + "learning_rate": 5.359822418704641e-08, + "loss": 0.0006, + "step": 298740 + }, + { + "epoch": 1.9160897018596492, + "grad_norm": 0.06909266859292984, + "learning_rate": 5.351652242648442e-08, + "loss": 0.0011, + "step": 298750 + }, + { + "epoch": 1.9161538387534354, + "grad_norm": 0.07474600523710251, + "learning_rate": 5.3434882648895626e-08, + "loss": 0.0012, + "step": 298760 + }, + { + "epoch": 1.9162179756472213, + "grad_norm": 0.005450695753097534, + "learning_rate": 5.33533048553031e-08, + "loss": 0.0027, + "step": 298770 + }, + { + "epoch": 1.9162821125410074, + "grad_norm": 0.03122709132730961, + "learning_rate": 5.327178904672881e-08, + "loss": 0.001, + "step": 298780 + }, + { + "epoch": 1.9163462494347936, + "grad_norm": 0.0031229574233293533, + "learning_rate": 5.319033522419414e-08, + "loss": 0.001, + "step": 298790 + }, + { + "epoch": 1.9164103863285797, + "grad_norm": 0.035058699548244476, + "learning_rate": 5.3108943388720527e-08, + "loss": 0.0004, + "step": 298800 + }, + { + "epoch": 1.9164745232223659, + "grad_norm": 0.012346319854259491, + "learning_rate": 5.3027613541327126e-08, + "loss": 0.0009, + "step": 298810 + }, + { + "epoch": 1.916538660116152, + "grad_norm": 0.155277818441391, + "learning_rate": 5.294634568303314e-08, + "loss": 0.0016, + "step": 298820 + }, + { + "epoch": 1.916602797009938, + "grad_norm": 0.14972133934497833, + "learning_rate": 5.286513981485719e-08, + "loss": 0.001, + "step": 298830 + }, + { + "epoch": 1.9166669339037241, + "grad_norm": 0.011021796613931656, + "learning_rate": 5.2783995937816244e-08, + "loss": 0.0007, + "step": 298840 + }, + { + "epoch": 1.9167310707975103, + "grad_norm": 0.007839307188987732, + "learning_rate": 5.270291405292838e-08, + "loss": 0.0012, + "step": 298850 + }, + { + "epoch": 1.9167952076912962, + "grad_norm": 0.027409091591835022, + "learning_rate": 5.262189416120833e-08, + "loss": 0.0011, + "step": 298860 + }, + { + "epoch": 1.9168593445850823, + "grad_norm": 0.048824433237314224, + "learning_rate": 5.254093626367196e-08, + "loss": 0.001, + "step": 298870 + }, + { + "epoch": 1.9169234814788685, + "grad_norm": 0.10896562039852142, + "learning_rate": 5.24600403613329e-08, + "loss": 0.0011, + "step": 298880 + }, + { + "epoch": 1.9169876183726546, + "grad_norm": 0.14132238924503326, + "learning_rate": 5.2379206455206446e-08, + "loss": 0.0007, + "step": 298890 + }, + { + "epoch": 1.9170517552664408, + "grad_norm": 0.08344261348247528, + "learning_rate": 5.229843454630401e-08, + "loss": 0.0007, + "step": 298900 + }, + { + "epoch": 1.917115892160227, + "grad_norm": 0.08146130293607712, + "learning_rate": 5.221772463563868e-08, + "loss": 0.0034, + "step": 298910 + }, + { + "epoch": 1.9171800290540129, + "grad_norm": 0.14258337020874023, + "learning_rate": 5.213707672422075e-08, + "loss": 0.0009, + "step": 298920 + }, + { + "epoch": 1.917244165947799, + "grad_norm": 0.04950059950351715, + "learning_rate": 5.2056490813061636e-08, + "loss": 0.0003, + "step": 298930 + }, + { + "epoch": 1.917308302841585, + "grad_norm": 0.0863884910941124, + "learning_rate": 5.197596690317108e-08, + "loss": 0.0015, + "step": 298940 + }, + { + "epoch": 1.917372439735371, + "grad_norm": 0.12216462194919586, + "learning_rate": 5.189550499555773e-08, + "loss": 0.0009, + "step": 298950 + }, + { + "epoch": 1.9174365766291572, + "grad_norm": 0.014564010314643383, + "learning_rate": 5.181510509123022e-08, + "loss": 0.0007, + "step": 298960 + }, + { + "epoch": 1.9175007135229434, + "grad_norm": 0.02649235725402832, + "learning_rate": 5.173476719119608e-08, + "loss": 0.0009, + "step": 298970 + }, + { + "epoch": 1.9175648504167295, + "grad_norm": 0.14758247137069702, + "learning_rate": 5.165449129646172e-08, + "loss": 0.0008, + "step": 298980 + }, + { + "epoch": 1.9176289873105157, + "grad_norm": 0.12455189228057861, + "learning_rate": 5.157427740803245e-08, + "loss": 0.0009, + "step": 298990 + }, + { + "epoch": 1.9176931242043018, + "grad_norm": 0.08838582783937454, + "learning_rate": 5.1494125526914687e-08, + "loss": 0.0012, + "step": 299000 + }, + { + "epoch": 1.9177572610980878, + "grad_norm": 0.08339110761880875, + "learning_rate": 5.141403565411207e-08, + "loss": 0.0009, + "step": 299010 + }, + { + "epoch": 1.917821397991874, + "grad_norm": 0.058458272367715836, + "learning_rate": 5.1334007790628805e-08, + "loss": 0.0013, + "step": 299020 + }, + { + "epoch": 1.9178855348856598, + "grad_norm": 0.09558631479740143, + "learning_rate": 5.12540419374663e-08, + "loss": 0.001, + "step": 299030 + }, + { + "epoch": 1.917949671779446, + "grad_norm": 0.08977165818214417, + "learning_rate": 5.1174138095627654e-08, + "loss": 0.0009, + "step": 299040 + }, + { + "epoch": 1.9180138086732321, + "grad_norm": 0.08206156641244888, + "learning_rate": 5.109429626611428e-08, + "loss": 0.0009, + "step": 299050 + }, + { + "epoch": 1.9180779455670183, + "grad_norm": 0.04454299435019493, + "learning_rate": 5.10145164499265e-08, + "loss": 0.0014, + "step": 299060 + }, + { + "epoch": 1.9181420824608044, + "grad_norm": 0.09718791395425797, + "learning_rate": 5.0934798648062944e-08, + "loss": 0.0012, + "step": 299070 + }, + { + "epoch": 1.9182062193545906, + "grad_norm": 0.13956280052661896, + "learning_rate": 5.0855142861523934e-08, + "loss": 0.0013, + "step": 299080 + }, + { + "epoch": 1.9182703562483765, + "grad_norm": 0.03992290794849396, + "learning_rate": 5.0775549091307554e-08, + "loss": 0.0006, + "step": 299090 + }, + { + "epoch": 1.9183344931421626, + "grad_norm": 0.13538269698619843, + "learning_rate": 5.0696017338409675e-08, + "loss": 0.0011, + "step": 299100 + }, + { + "epoch": 1.9183986300359486, + "grad_norm": 0.03277270868420601, + "learning_rate": 5.0616547603828394e-08, + "loss": 0.0017, + "step": 299110 + }, + { + "epoch": 1.9184627669297347, + "grad_norm": 0.049767736345529556, + "learning_rate": 5.053713988855957e-08, + "loss": 0.001, + "step": 299120 + }, + { + "epoch": 1.9185269038235209, + "grad_norm": 0.12353898584842682, + "learning_rate": 5.045779419359742e-08, + "loss": 0.0013, + "step": 299130 + }, + { + "epoch": 1.918591040717307, + "grad_norm": 0.015955159440636635, + "learning_rate": 5.0378510519935584e-08, + "loss": 0.0018, + "step": 299140 + }, + { + "epoch": 1.9186551776110932, + "grad_norm": 0.0956311970949173, + "learning_rate": 5.029928886856939e-08, + "loss": 0.0008, + "step": 299150 + }, + { + "epoch": 1.9187193145048793, + "grad_norm": 0.04158175364136696, + "learning_rate": 5.02201292404908e-08, + "loss": 0.0005, + "step": 299160 + }, + { + "epoch": 1.9187834513986655, + "grad_norm": 0.05196625366806984, + "learning_rate": 5.014103163669126e-08, + "loss": 0.0006, + "step": 299170 + }, + { + "epoch": 1.9188475882924514, + "grad_norm": 0.048744142055511475, + "learning_rate": 5.0061996058161644e-08, + "loss": 0.0008, + "step": 299180 + }, + { + "epoch": 1.9189117251862375, + "grad_norm": 0.04593722149729729, + "learning_rate": 4.998302250589338e-08, + "loss": 0.0006, + "step": 299190 + }, + { + "epoch": 1.9189758620800235, + "grad_norm": 0.012638093903660774, + "learning_rate": 4.9904110980875664e-08, + "loss": 0.0007, + "step": 299200 + }, + { + "epoch": 1.9190399989738096, + "grad_norm": 0.0911950096487999, + "learning_rate": 4.982526148409716e-08, + "loss": 0.0006, + "step": 299210 + }, + { + "epoch": 1.9191041358675958, + "grad_norm": 0.005118612200021744, + "learning_rate": 4.974647401654542e-08, + "loss": 0.0017, + "step": 299220 + }, + { + "epoch": 1.919168272761382, + "grad_norm": 0.23245181143283844, + "learning_rate": 4.966774857920909e-08, + "loss": 0.0017, + "step": 299230 + }, + { + "epoch": 1.919232409655168, + "grad_norm": 0.05644499883055687, + "learning_rate": 4.958908517307348e-08, + "loss": 0.0009, + "step": 299240 + }, + { + "epoch": 1.9192965465489542, + "grad_norm": 0.05527875944972038, + "learning_rate": 4.951048379912449e-08, + "loss": 0.0018, + "step": 299250 + }, + { + "epoch": 1.9193606834427401, + "grad_norm": 0.055565234273672104, + "learning_rate": 4.9431944458347426e-08, + "loss": 0.0006, + "step": 299260 + }, + { + "epoch": 1.9194248203365263, + "grad_norm": 0.18801401555538177, + "learning_rate": 4.935346715172595e-08, + "loss": 0.0015, + "step": 299270 + }, + { + "epoch": 1.9194889572303124, + "grad_norm": 0.0394013486802578, + "learning_rate": 4.927505188024373e-08, + "loss": 0.0035, + "step": 299280 + }, + { + "epoch": 1.9195530941240984, + "grad_norm": 0.07867538183927536, + "learning_rate": 4.9196698644883303e-08, + "loss": 0.0011, + "step": 299290 + }, + { + "epoch": 1.9196172310178845, + "grad_norm": 0.027638576924800873, + "learning_rate": 4.911840744662666e-08, + "loss": 0.001, + "step": 299300 + }, + { + "epoch": 1.9196813679116707, + "grad_norm": 0.008723137900233269, + "learning_rate": 4.9040178286455245e-08, + "loss": 0.0013, + "step": 299310 + }, + { + "epoch": 1.9197455048054568, + "grad_norm": 0.046434395015239716, + "learning_rate": 4.896201116534827e-08, + "loss": 0.0009, + "step": 299320 + }, + { + "epoch": 1.919809641699243, + "grad_norm": 0.27379703521728516, + "learning_rate": 4.88839060842855e-08, + "loss": 0.0008, + "step": 299330 + }, + { + "epoch": 1.919873778593029, + "grad_norm": 0.03799886628985405, + "learning_rate": 4.880586304424617e-08, + "loss": 0.0012, + "step": 299340 + }, + { + "epoch": 1.919937915486815, + "grad_norm": 0.036054909229278564, + "learning_rate": 4.872788204620782e-08, + "loss": 0.0017, + "step": 299350 + }, + { + "epoch": 1.9200020523806012, + "grad_norm": 0.0491146519780159, + "learning_rate": 4.8649963091148e-08, + "loss": 0.001, + "step": 299360 + }, + { + "epoch": 1.920066189274387, + "grad_norm": 0.042426567524671555, + "learning_rate": 4.85721061800426e-08, + "loss": 0.0007, + "step": 299370 + }, + { + "epoch": 1.9201303261681733, + "grad_norm": 0.0619833804666996, + "learning_rate": 4.84943113138675e-08, + "loss": 0.0011, + "step": 299380 + }, + { + "epoch": 1.9201944630619594, + "grad_norm": 0.049811821430921555, + "learning_rate": 4.8416578493597485e-08, + "loss": 0.0019, + "step": 299390 + }, + { + "epoch": 1.9202585999557455, + "grad_norm": 0.1061360165476799, + "learning_rate": 4.833890772020622e-08, + "loss": 0.0029, + "step": 299400 + }, + { + "epoch": 1.9203227368495317, + "grad_norm": 0.07087648659944534, + "learning_rate": 4.826129899466792e-08, + "loss": 0.0005, + "step": 299410 + }, + { + "epoch": 1.9203868737433178, + "grad_norm": 0.04247698560357094, + "learning_rate": 4.818375231795458e-08, + "loss": 0.0006, + "step": 299420 + }, + { + "epoch": 1.920451010637104, + "grad_norm": 0.13066044449806213, + "learning_rate": 4.81062676910371e-08, + "loss": 0.0013, + "step": 299430 + }, + { + "epoch": 1.92051514753089, + "grad_norm": 0.08296853303909302, + "learning_rate": 4.802884511488748e-08, + "loss": 0.0006, + "step": 299440 + }, + { + "epoch": 1.920579284424676, + "grad_norm": 0.03349597379565239, + "learning_rate": 4.795148459047605e-08, + "loss": 0.0006, + "step": 299450 + }, + { + "epoch": 1.920643421318462, + "grad_norm": 0.04592498391866684, + "learning_rate": 4.787418611877093e-08, + "loss": 0.0004, + "step": 299460 + }, + { + "epoch": 1.9207075582122481, + "grad_norm": 0.045830707997083664, + "learning_rate": 4.779694970074189e-08, + "loss": 0.001, + "step": 299470 + }, + { + "epoch": 1.9207716951060343, + "grad_norm": 0.0611383356153965, + "learning_rate": 4.77197753373565e-08, + "loss": 0.0015, + "step": 299480 + }, + { + "epoch": 1.9208358319998204, + "grad_norm": 0.03120235539972782, + "learning_rate": 4.76426630295812e-08, + "loss": 0.0011, + "step": 299490 + }, + { + "epoch": 1.9208999688936066, + "grad_norm": 0.0034458893351256847, + "learning_rate": 4.756561277838301e-08, + "loss": 0.0014, + "step": 299500 + }, + { + "epoch": 1.9209641057873927, + "grad_norm": 0.0789545401930809, + "learning_rate": 4.748862458472725e-08, + "loss": 0.0008, + "step": 299510 + }, + { + "epoch": 1.9210282426811787, + "grad_norm": 0.03234486281871796, + "learning_rate": 4.741169844957816e-08, + "loss": 0.0013, + "step": 299520 + }, + { + "epoch": 1.9210923795749648, + "grad_norm": 0.059302132576704025, + "learning_rate": 4.733483437390052e-08, + "loss": 0.0012, + "step": 299530 + }, + { + "epoch": 1.9211565164687507, + "grad_norm": 0.13612000644207, + "learning_rate": 4.7258032358656893e-08, + "loss": 0.0009, + "step": 299540 + }, + { + "epoch": 1.921220653362537, + "grad_norm": 0.07007851451635361, + "learning_rate": 4.7181292404809286e-08, + "loss": 0.0011, + "step": 299550 + }, + { + "epoch": 1.921284790256323, + "grad_norm": 0.117681585252285, + "learning_rate": 4.710461451332027e-08, + "loss": 0.0009, + "step": 299560 + }, + { + "epoch": 1.9213489271501092, + "grad_norm": 0.05283145606517792, + "learning_rate": 4.7027998685150176e-08, + "loss": 0.0012, + "step": 299570 + }, + { + "epoch": 1.9214130640438953, + "grad_norm": 0.11339683830738068, + "learning_rate": 4.695144492125936e-08, + "loss": 0.001, + "step": 299580 + }, + { + "epoch": 1.9214772009376815, + "grad_norm": 0.07208563387393951, + "learning_rate": 4.687495322260649e-08, + "loss": 0.0012, + "step": 299590 + }, + { + "epoch": 1.9215413378314676, + "grad_norm": 0.16996242105960846, + "learning_rate": 4.6798523590150246e-08, + "loss": 0.0017, + "step": 299600 + }, + { + "epoch": 1.9216054747252536, + "grad_norm": 0.03294508531689644, + "learning_rate": 4.6722156024848754e-08, + "loss": 0.0007, + "step": 299610 + }, + { + "epoch": 1.9216696116190397, + "grad_norm": 0.007039350923150778, + "learning_rate": 4.664585052765902e-08, + "loss": 0.0018, + "step": 299620 + }, + { + "epoch": 1.9217337485128256, + "grad_norm": 0.10350693017244339, + "learning_rate": 4.6569607099536393e-08, + "loss": 0.0009, + "step": 299630 + }, + { + "epoch": 1.9217978854066118, + "grad_norm": 0.056322213262319565, + "learning_rate": 4.649342574143678e-08, + "loss": 0.0007, + "step": 299640 + }, + { + "epoch": 1.921862022300398, + "grad_norm": 0.009310414083302021, + "learning_rate": 4.641730645431497e-08, + "loss": 0.0018, + "step": 299650 + }, + { + "epoch": 1.921926159194184, + "grad_norm": 0.07358931750059128, + "learning_rate": 4.634124923912464e-08, + "loss": 0.0008, + "step": 299660 + }, + { + "epoch": 1.9219902960879702, + "grad_norm": 0.0843188539147377, + "learning_rate": 4.6265254096818367e-08, + "loss": 0.0035, + "step": 299670 + }, + { + "epoch": 1.9220544329817564, + "grad_norm": 0.023625105619430542, + "learning_rate": 4.618932102834928e-08, + "loss": 0.0014, + "step": 299680 + }, + { + "epoch": 1.9221185698755423, + "grad_norm": 0.11312787979841232, + "learning_rate": 4.611345003466827e-08, + "loss": 0.0009, + "step": 299690 + }, + { + "epoch": 1.9221827067693285, + "grad_norm": 0.0016061868518590927, + "learning_rate": 4.603764111672626e-08, + "loss": 0.0004, + "step": 299700 + }, + { + "epoch": 1.9222468436631146, + "grad_norm": 0.08886443078517914, + "learning_rate": 4.596189427547304e-08, + "loss": 0.0011, + "step": 299710 + }, + { + "epoch": 1.9223109805569005, + "grad_norm": 0.0731070414185524, + "learning_rate": 4.588620951185785e-08, + "loss": 0.0008, + "step": 299720 + }, + { + "epoch": 1.9223751174506867, + "grad_norm": 0.2453460395336151, + "learning_rate": 4.581058682682937e-08, + "loss": 0.0023, + "step": 299730 + }, + { + "epoch": 1.9224392543444728, + "grad_norm": 0.006312183570116758, + "learning_rate": 4.573502622133463e-08, + "loss": 0.0017, + "step": 299740 + }, + { + "epoch": 1.922503391238259, + "grad_norm": 0.1199653148651123, + "learning_rate": 4.565952769632065e-08, + "loss": 0.0008, + "step": 299750 + }, + { + "epoch": 1.9225675281320451, + "grad_norm": 0.10376963764429092, + "learning_rate": 4.558409125273444e-08, + "loss": 0.0023, + "step": 299760 + }, + { + "epoch": 1.9226316650258313, + "grad_norm": 0.2373136430978775, + "learning_rate": 4.55087168915197e-08, + "loss": 0.0043, + "step": 299770 + }, + { + "epoch": 1.9226958019196172, + "grad_norm": 0.10240045189857483, + "learning_rate": 4.543340461362178e-08, + "loss": 0.0007, + "step": 299780 + }, + { + "epoch": 1.9227599388134033, + "grad_norm": 0.07114317268133163, + "learning_rate": 4.535815441998437e-08, + "loss": 0.0009, + "step": 299790 + }, + { + "epoch": 1.9228240757071893, + "grad_norm": 0.13522303104400635, + "learning_rate": 4.5282966311550045e-08, + "loss": 0.0007, + "step": 299800 + }, + { + "epoch": 1.9228882126009754, + "grad_norm": 0.05266407132148743, + "learning_rate": 4.520784028926195e-08, + "loss": 0.0011, + "step": 299810 + }, + { + "epoch": 1.9229523494947616, + "grad_norm": 0.037769682705402374, + "learning_rate": 4.5132776354059884e-08, + "loss": 0.0006, + "step": 299820 + }, + { + "epoch": 1.9230164863885477, + "grad_norm": 0.04296983778476715, + "learning_rate": 4.505777450688586e-08, + "loss": 0.0006, + "step": 299830 + }, + { + "epoch": 1.9230806232823339, + "grad_norm": 0.12407558411359787, + "learning_rate": 4.498283474867915e-08, + "loss": 0.0011, + "step": 299840 + }, + { + "epoch": 1.92314476017612, + "grad_norm": 0.08229169994592667, + "learning_rate": 4.490795708037843e-08, + "loss": 0.0006, + "step": 299850 + }, + { + "epoch": 1.9232088970699062, + "grad_norm": 0.010448667220771313, + "learning_rate": 4.4833141502922395e-08, + "loss": 0.0007, + "step": 299860 + }, + { + "epoch": 1.923273033963692, + "grad_norm": 0.0727403461933136, + "learning_rate": 4.4758388017248636e-08, + "loss": 0.0008, + "step": 299870 + }, + { + "epoch": 1.9233371708574782, + "grad_norm": 0.09879482537508011, + "learning_rate": 4.468369662429417e-08, + "loss": 0.0011, + "step": 299880 + }, + { + "epoch": 1.9234013077512642, + "grad_norm": 0.15423615276813507, + "learning_rate": 4.460906732499437e-08, + "loss": 0.0029, + "step": 299890 + }, + { + "epoch": 1.9234654446450503, + "grad_norm": 0.2108854353427887, + "learning_rate": 4.453450012028404e-08, + "loss": 0.0024, + "step": 299900 + }, + { + "epoch": 1.9235295815388365, + "grad_norm": 0.08908912539482117, + "learning_rate": 4.445999501109799e-08, + "loss": 0.0013, + "step": 299910 + }, + { + "epoch": 1.9235937184326226, + "grad_norm": 0.13990257680416107, + "learning_rate": 4.438555199837047e-08, + "loss": 0.0009, + "step": 299920 + }, + { + "epoch": 1.9236578553264088, + "grad_norm": 0.05871908366680145, + "learning_rate": 4.4311171083032956e-08, + "loss": 0.0004, + "step": 299930 + }, + { + "epoch": 1.923721992220195, + "grad_norm": 0.05720856785774231, + "learning_rate": 4.423685226601915e-08, + "loss": 0.0008, + "step": 299940 + }, + { + "epoch": 1.9237861291139808, + "grad_norm": 0.025936510413885117, + "learning_rate": 4.416259554825886e-08, + "loss": 0.001, + "step": 299950 + }, + { + "epoch": 1.923850266007767, + "grad_norm": 0.06441635638475418, + "learning_rate": 4.408840093068301e-08, + "loss": 0.0007, + "step": 299960 + }, + { + "epoch": 1.923914402901553, + "grad_norm": 0.17235547304153442, + "learning_rate": 4.4014268414221404e-08, + "loss": 0.0021, + "step": 299970 + }, + { + "epoch": 1.923978539795339, + "grad_norm": 0.07071554660797119, + "learning_rate": 4.3940197999803315e-08, + "loss": 0.0009, + "step": 299980 + }, + { + "epoch": 1.9240426766891252, + "grad_norm": 0.01034556794911623, + "learning_rate": 4.386618968835688e-08, + "loss": 0.0009, + "step": 299990 + }, + { + "epoch": 1.9241068135829114, + "grad_norm": 0.08722493052482605, + "learning_rate": 4.379224348080913e-08, + "loss": 0.0008, + "step": 300000 + }, + { + "epoch": 1.9241709504766975, + "grad_norm": 0.07715963572263718, + "learning_rate": 4.3718359378085994e-08, + "loss": 0.0007, + "step": 300010 + }, + { + "epoch": 1.9242350873704837, + "grad_norm": 0.08677951246500015, + "learning_rate": 4.364453738111451e-08, + "loss": 0.0006, + "step": 300020 + }, + { + "epoch": 1.9242992242642698, + "grad_norm": 0.05860080569982529, + "learning_rate": 4.357077749081895e-08, + "loss": 0.0009, + "step": 300030 + }, + { + "epoch": 1.9243633611580557, + "grad_norm": 0.05761454999446869, + "learning_rate": 4.3497079708124114e-08, + "loss": 0.0011, + "step": 300040 + }, + { + "epoch": 1.9244274980518419, + "grad_norm": 0.20905107259750366, + "learning_rate": 4.342344403395316e-08, + "loss": 0.0008, + "step": 300050 + }, + { + "epoch": 1.9244916349456278, + "grad_norm": 0.01208533812314272, + "learning_rate": 4.3349870469229226e-08, + "loss": 0.0012, + "step": 300060 + }, + { + "epoch": 1.924555771839414, + "grad_norm": 0.05330813676118851, + "learning_rate": 4.327635901487326e-08, + "loss": 0.0006, + "step": 300070 + }, + { + "epoch": 1.9246199087332, + "grad_norm": 0.0022559245117008686, + "learning_rate": 4.320290967180729e-08, + "loss": 0.0013, + "step": 300080 + }, + { + "epoch": 1.9246840456269863, + "grad_norm": 0.06547002494335175, + "learning_rate": 4.31295224409517e-08, + "loss": 0.0022, + "step": 300090 + }, + { + "epoch": 1.9247481825207724, + "grad_norm": 0.10205364972352982, + "learning_rate": 4.305619732322519e-08, + "loss": 0.0013, + "step": 300100 + }, + { + "epoch": 1.9248123194145585, + "grad_norm": 0.14018011093139648, + "learning_rate": 4.298293431954759e-08, + "loss": 0.0009, + "step": 300110 + }, + { + "epoch": 1.9248764563083447, + "grad_norm": 0.0348082073032856, + "learning_rate": 4.290973343083649e-08, + "loss": 0.0005, + "step": 300120 + }, + { + "epoch": 1.9249405932021306, + "grad_norm": 0.029344813898205757, + "learning_rate": 4.283659465800949e-08, + "loss": 0.0007, + "step": 300130 + }, + { + "epoch": 1.9250047300959168, + "grad_norm": 0.1087023913860321, + "learning_rate": 4.276351800198253e-08, + "loss": 0.0007, + "step": 300140 + }, + { + "epoch": 1.9250688669897027, + "grad_norm": 0.01807575672864914, + "learning_rate": 4.269050346367154e-08, + "loss": 0.001, + "step": 300150 + }, + { + "epoch": 1.9251330038834888, + "grad_norm": 0.0954509899020195, + "learning_rate": 4.261755104399135e-08, + "loss": 0.0011, + "step": 300160 + }, + { + "epoch": 1.925197140777275, + "grad_norm": 0.019217127934098244, + "learning_rate": 4.254466074385677e-08, + "loss": 0.0019, + "step": 300170 + }, + { + "epoch": 1.9252612776710611, + "grad_norm": 0.03665965422987938, + "learning_rate": 4.2471832564180414e-08, + "loss": 0.0008, + "step": 300180 + }, + { + "epoch": 1.9253254145648473, + "grad_norm": 0.04427497088909149, + "learning_rate": 4.2399066505874886e-08, + "loss": 0.0017, + "step": 300190 + }, + { + "epoch": 1.9253895514586334, + "grad_norm": 0.003385307500138879, + "learning_rate": 4.232636256985167e-08, + "loss": 0.0005, + "step": 300200 + }, + { + "epoch": 1.9254536883524194, + "grad_norm": 0.08548618108034134, + "learning_rate": 4.225372075702339e-08, + "loss": 0.0012, + "step": 300210 + }, + { + "epoch": 1.9255178252462055, + "grad_norm": 0.07868991792201996, + "learning_rate": 4.2181141068298736e-08, + "loss": 0.0013, + "step": 300220 + }, + { + "epoch": 1.9255819621399914, + "grad_norm": 0.07562734186649323, + "learning_rate": 4.210862350458755e-08, + "loss": 0.0019, + "step": 300230 + }, + { + "epoch": 1.9256460990337776, + "grad_norm": 0.08704736828804016, + "learning_rate": 4.2036168066798554e-08, + "loss": 0.0018, + "step": 300240 + }, + { + "epoch": 1.9257102359275637, + "grad_norm": 0.09126976877450943, + "learning_rate": 4.196377475584046e-08, + "loss": 0.0009, + "step": 300250 + }, + { + "epoch": 1.92577437282135, + "grad_norm": 0.10753437131643295, + "learning_rate": 4.189144357261921e-08, + "loss": 0.0016, + "step": 300260 + }, + { + "epoch": 1.925838509715136, + "grad_norm": 0.11447235196828842, + "learning_rate": 4.181917451804185e-08, + "loss": 0.0008, + "step": 300270 + }, + { + "epoch": 1.9259026466089222, + "grad_norm": 0.08692572265863419, + "learning_rate": 4.174696759301322e-08, + "loss": 0.0006, + "step": 300280 + }, + { + "epoch": 1.9259667835027083, + "grad_norm": 0.02848992496728897, + "learning_rate": 4.167482279843926e-08, + "loss": 0.0008, + "step": 300290 + }, + { + "epoch": 1.9260309203964943, + "grad_norm": 0.012743757106363773, + "learning_rate": 4.1602740135223697e-08, + "loss": 0.0011, + "step": 300300 + }, + { + "epoch": 1.9260950572902804, + "grad_norm": 0.04856209456920624, + "learning_rate": 4.153071960426913e-08, + "loss": 0.0008, + "step": 300310 + }, + { + "epoch": 1.9261591941840663, + "grad_norm": 0.05277502164244652, + "learning_rate": 4.1458761206478185e-08, + "loss": 0.0005, + "step": 300320 + }, + { + "epoch": 1.9262233310778525, + "grad_norm": 0.10218650102615356, + "learning_rate": 4.1386864942753456e-08, + "loss": 0.0009, + "step": 300330 + }, + { + "epoch": 1.9262874679716386, + "grad_norm": 0.03546750545501709, + "learning_rate": 4.13150308139948e-08, + "loss": 0.0006, + "step": 300340 + }, + { + "epoch": 1.9263516048654248, + "grad_norm": 0.07997418940067291, + "learning_rate": 4.124325882110203e-08, + "loss": 0.0005, + "step": 300350 + }, + { + "epoch": 1.926415741759211, + "grad_norm": 0.11265015602111816, + "learning_rate": 4.1171548964976106e-08, + "loss": 0.0017, + "step": 300360 + }, + { + "epoch": 1.926479878652997, + "grad_norm": 0.013606592081487179, + "learning_rate": 4.109990124651464e-08, + "loss": 0.001, + "step": 300370 + }, + { + "epoch": 1.926544015546783, + "grad_norm": 0.02300187200307846, + "learning_rate": 4.102831566661525e-08, + "loss": 0.0012, + "step": 300380 + }, + { + "epoch": 1.9266081524405692, + "grad_norm": 0.02995084412395954, + "learning_rate": 4.095679222617499e-08, + "loss": 0.0009, + "step": 300390 + }, + { + "epoch": 1.926672289334355, + "grad_norm": 0.060416627675294876, + "learning_rate": 4.0885330926090924e-08, + "loss": 0.0004, + "step": 300400 + }, + { + "epoch": 1.9267364262281412, + "grad_norm": 0.04969345033168793, + "learning_rate": 4.0813931767257345e-08, + "loss": 0.0008, + "step": 300410 + }, + { + "epoch": 1.9268005631219274, + "grad_norm": 0.07248201966285706, + "learning_rate": 4.074259475056963e-08, + "loss": 0.0013, + "step": 300420 + }, + { + "epoch": 1.9268647000157135, + "grad_norm": 0.05075995996594429, + "learning_rate": 4.0671319876921524e-08, + "loss": 0.0009, + "step": 300430 + }, + { + "epoch": 1.9269288369094997, + "grad_norm": 0.02230651304125786, + "learning_rate": 4.060010714720619e-08, + "loss": 0.0006, + "step": 300440 + }, + { + "epoch": 1.9269929738032858, + "grad_norm": 0.11635108292102814, + "learning_rate": 4.052895656231626e-08, + "loss": 0.0009, + "step": 300450 + }, + { + "epoch": 1.927057110697072, + "grad_norm": 0.04682289436459541, + "learning_rate": 4.045786812314268e-08, + "loss": 0.0016, + "step": 300460 + }, + { + "epoch": 1.927121247590858, + "grad_norm": 0.221276193857193, + "learning_rate": 4.0386841830576396e-08, + "loss": 0.0007, + "step": 300470 + }, + { + "epoch": 1.927185384484644, + "grad_norm": 0.3462485074996948, + "learning_rate": 4.031587768550782e-08, + "loss": 0.0021, + "step": 300480 + }, + { + "epoch": 1.92724952137843, + "grad_norm": 0.02788991667330265, + "learning_rate": 4.0244975688825685e-08, + "loss": 0.0004, + "step": 300490 + }, + { + "epoch": 1.9273136582722161, + "grad_norm": 0.05282348394393921, + "learning_rate": 4.017413584141871e-08, + "loss": 0.0019, + "step": 300500 + }, + { + "epoch": 1.9273777951660023, + "grad_norm": 0.10648701339960098, + "learning_rate": 4.0103358144174544e-08, + "loss": 0.0009, + "step": 300510 + }, + { + "epoch": 1.9274419320597884, + "grad_norm": 0.07527918368577957, + "learning_rate": 4.003264259798023e-08, + "loss": 0.0022, + "step": 300520 + }, + { + "epoch": 1.9275060689535746, + "grad_norm": 0.018936704844236374, + "learning_rate": 3.996198920372174e-08, + "loss": 0.0015, + "step": 300530 + }, + { + "epoch": 1.9275702058473607, + "grad_norm": 0.09216234087944031, + "learning_rate": 3.989139796228447e-08, + "loss": 0.0016, + "step": 300540 + }, + { + "epoch": 1.9276343427411469, + "grad_norm": 0.02676011249423027, + "learning_rate": 3.982086887455272e-08, + "loss": 0.0007, + "step": 300550 + }, + { + "epoch": 1.9276984796349328, + "grad_norm": 0.14437265694141388, + "learning_rate": 3.9750401941410775e-08, + "loss": 0.0011, + "step": 300560 + }, + { + "epoch": 1.927762616528719, + "grad_norm": 0.008645369671285152, + "learning_rate": 3.967999716374127e-08, + "loss": 0.0003, + "step": 300570 + }, + { + "epoch": 1.9278267534225049, + "grad_norm": 0.028107842430472374, + "learning_rate": 3.9609654542425716e-08, + "loss": 0.0008, + "step": 300580 + }, + { + "epoch": 1.927890890316291, + "grad_norm": 0.022598829120397568, + "learning_rate": 3.953937407834729e-08, + "loss": 0.001, + "step": 300590 + }, + { + "epoch": 1.9279550272100772, + "grad_norm": 0.0549064464867115, + "learning_rate": 3.9469155772385304e-08, + "loss": 0.0004, + "step": 300600 + }, + { + "epoch": 1.9280191641038633, + "grad_norm": 0.07589433342218399, + "learning_rate": 3.9398999625420154e-08, + "loss": 0.001, + "step": 300610 + }, + { + "epoch": 1.9280833009976495, + "grad_norm": 0.09359247982501984, + "learning_rate": 3.932890563833058e-08, + "loss": 0.0008, + "step": 300620 + }, + { + "epoch": 1.9281474378914356, + "grad_norm": 0.10599889606237411, + "learning_rate": 3.9258873811995334e-08, + "loss": 0.001, + "step": 300630 + }, + { + "epoch": 1.9282115747852215, + "grad_norm": 0.08819228410720825, + "learning_rate": 3.918890414729204e-08, + "loss": 0.0009, + "step": 300640 + }, + { + "epoch": 1.9282757116790077, + "grad_norm": 0.08009287714958191, + "learning_rate": 3.9118996645096666e-08, + "loss": 0.0008, + "step": 300650 + }, + { + "epoch": 1.9283398485727936, + "grad_norm": 0.11573512852191925, + "learning_rate": 3.904915130628628e-08, + "loss": 0.0012, + "step": 300660 + }, + { + "epoch": 1.9284039854665798, + "grad_norm": 0.0738484263420105, + "learning_rate": 3.8979368131735196e-08, + "loss": 0.0011, + "step": 300670 + }, + { + "epoch": 1.928468122360366, + "grad_norm": 0.007015854585915804, + "learning_rate": 3.890964712231826e-08, + "loss": 0.0012, + "step": 300680 + }, + { + "epoch": 1.928532259254152, + "grad_norm": 0.19372880458831787, + "learning_rate": 3.883998827890923e-08, + "loss": 0.0018, + "step": 300690 + }, + { + "epoch": 1.9285963961479382, + "grad_norm": 0.024199509993195534, + "learning_rate": 3.877039160238072e-08, + "loss": 0.0006, + "step": 300700 + }, + { + "epoch": 1.9286605330417244, + "grad_norm": 0.30805060267448425, + "learning_rate": 3.870085709360538e-08, + "loss": 0.0013, + "step": 300710 + }, + { + "epoch": 1.9287246699355105, + "grad_norm": 0.01284112874418497, + "learning_rate": 3.863138475345363e-08, + "loss": 0.0019, + "step": 300720 + }, + { + "epoch": 1.9287888068292964, + "grad_norm": 0.057999007403850555, + "learning_rate": 3.8561974582796423e-08, + "loss": 0.0007, + "step": 300730 + }, + { + "epoch": 1.9288529437230826, + "grad_norm": 0.3170008659362793, + "learning_rate": 3.849262658250363e-08, + "loss": 0.0021, + "step": 300740 + }, + { + "epoch": 1.9289170806168685, + "grad_norm": 0.024755142629146576, + "learning_rate": 3.842334075344401e-08, + "loss": 0.002, + "step": 300750 + }, + { + "epoch": 1.9289812175106547, + "grad_norm": 0.12757644057273865, + "learning_rate": 3.8354117096486285e-08, + "loss": 0.001, + "step": 300760 + }, + { + "epoch": 1.9290453544044408, + "grad_norm": 0.04172433167695999, + "learning_rate": 3.828495561249757e-08, + "loss": 0.0009, + "step": 300770 + }, + { + "epoch": 1.929109491298227, + "grad_norm": 0.03098372370004654, + "learning_rate": 3.8215856302343815e-08, + "loss": 0.0012, + "step": 300780 + }, + { + "epoch": 1.929173628192013, + "grad_norm": 0.04408879205584526, + "learning_rate": 3.8146819166892115e-08, + "loss": 0.0012, + "step": 300790 + }, + { + "epoch": 1.9292377650857992, + "grad_norm": 0.039453618228435516, + "learning_rate": 3.8077844207006774e-08, + "loss": 0.0008, + "step": 300800 + }, + { + "epoch": 1.9293019019795852, + "grad_norm": 0.14602237939834595, + "learning_rate": 3.800893142355211e-08, + "loss": 0.0009, + "step": 300810 + }, + { + "epoch": 1.9293660388733713, + "grad_norm": 0.09760798513889313, + "learning_rate": 3.794008081739187e-08, + "loss": 0.0009, + "step": 300820 + }, + { + "epoch": 1.9294301757671575, + "grad_norm": 0.004577153827995062, + "learning_rate": 3.787129238938925e-08, + "loss": 0.002, + "step": 300830 + }, + { + "epoch": 1.9294943126609434, + "grad_norm": 0.0346844345331192, + "learning_rate": 3.7802566140405225e-08, + "loss": 0.0011, + "step": 300840 + }, + { + "epoch": 1.9295584495547295, + "grad_norm": 0.2503458857536316, + "learning_rate": 3.773390207130134e-08, + "loss": 0.0011, + "step": 300850 + }, + { + "epoch": 1.9296225864485157, + "grad_norm": 0.006880940869450569, + "learning_rate": 3.7665300182938566e-08, + "loss": 0.0011, + "step": 300860 + }, + { + "epoch": 1.9296867233423018, + "grad_norm": 0.06815887242555618, + "learning_rate": 3.759676047617622e-08, + "loss": 0.0012, + "step": 300870 + }, + { + "epoch": 1.929750860236088, + "grad_norm": 0.2355438768863678, + "learning_rate": 3.75282829518725e-08, + "loss": 0.0018, + "step": 300880 + }, + { + "epoch": 1.9298149971298741, + "grad_norm": 0.14180922508239746, + "learning_rate": 3.745986761088616e-08, + "loss": 0.0011, + "step": 300890 + }, + { + "epoch": 1.92987913402366, + "grad_norm": 0.01800578273832798, + "learning_rate": 3.73915144540743e-08, + "loss": 0.0007, + "step": 300900 + }, + { + "epoch": 1.9299432709174462, + "grad_norm": 0.0636366456747055, + "learning_rate": 3.7323223482294e-08, + "loss": 0.0032, + "step": 300910 + }, + { + "epoch": 1.9300074078112321, + "grad_norm": 0.04221118241548538, + "learning_rate": 3.7254994696399596e-08, + "loss": 0.0007, + "step": 300920 + }, + { + "epoch": 1.9300715447050183, + "grad_norm": 0.03675359487533569, + "learning_rate": 3.7186828097247607e-08, + "loss": 0.0009, + "step": 300930 + }, + { + "epoch": 1.9301356815988044, + "grad_norm": 0.02007216587662697, + "learning_rate": 3.711872368569125e-08, + "loss": 0.0011, + "step": 300940 + }, + { + "epoch": 1.9301998184925906, + "grad_norm": 0.12596172094345093, + "learning_rate": 3.705068146258428e-08, + "loss": 0.0023, + "step": 300950 + }, + { + "epoch": 1.9302639553863767, + "grad_norm": 0.047864366322755814, + "learning_rate": 3.6982701428779356e-08, + "loss": 0.0008, + "step": 300960 + }, + { + "epoch": 1.9303280922801629, + "grad_norm": 0.012640939094126225, + "learning_rate": 3.6914783585128014e-08, + "loss": 0.001, + "step": 300970 + }, + { + "epoch": 1.930392229173949, + "grad_norm": 0.0019188645528629422, + "learning_rate": 3.684692793248179e-08, + "loss": 0.001, + "step": 300980 + }, + { + "epoch": 1.930456366067735, + "grad_norm": 0.13600589334964752, + "learning_rate": 3.677913447169057e-08, + "loss": 0.0008, + "step": 300990 + }, + { + "epoch": 1.9305205029615211, + "grad_norm": 0.041920121759176254, + "learning_rate": 3.671140320360367e-08, + "loss": 0.0012, + "step": 301000 + }, + { + "epoch": 1.930584639855307, + "grad_norm": 0.050655558705329895, + "learning_rate": 3.6643734129070406e-08, + "loss": 0.0009, + "step": 301010 + }, + { + "epoch": 1.9306487767490932, + "grad_norm": 0.12476855516433716, + "learning_rate": 3.6576127248938444e-08, + "loss": 0.001, + "step": 301020 + }, + { + "epoch": 1.9307129136428793, + "grad_norm": 0.1970900446176529, + "learning_rate": 3.650858256405432e-08, + "loss": 0.0009, + "step": 301030 + }, + { + "epoch": 1.9307770505366655, + "grad_norm": 0.009478774853050709, + "learning_rate": 3.644110007526569e-08, + "loss": 0.0012, + "step": 301040 + }, + { + "epoch": 1.9308411874304516, + "grad_norm": 0.14857624471187592, + "learning_rate": 3.637367978341688e-08, + "loss": 0.0007, + "step": 301050 + }, + { + "epoch": 1.9309053243242378, + "grad_norm": 0.0902068167924881, + "learning_rate": 3.630632168935389e-08, + "loss": 0.0009, + "step": 301060 + }, + { + "epoch": 1.9309694612180237, + "grad_norm": 0.15382826328277588, + "learning_rate": 3.6239025793919914e-08, + "loss": 0.0008, + "step": 301070 + }, + { + "epoch": 1.9310335981118099, + "grad_norm": 0.0057074157521128654, + "learning_rate": 3.617179209795873e-08, + "loss": 0.0008, + "step": 301080 + }, + { + "epoch": 1.9310977350055958, + "grad_norm": 0.027745436877012253, + "learning_rate": 3.610462060231246e-08, + "loss": 0.0012, + "step": 301090 + }, + { + "epoch": 1.931161871899382, + "grad_norm": 0.09244703501462936, + "learning_rate": 3.603751130782263e-08, + "loss": 0.0012, + "step": 301100 + }, + { + "epoch": 1.931226008793168, + "grad_norm": 0.12194220721721649, + "learning_rate": 3.597046421533079e-08, + "loss": 0.0006, + "step": 301110 + }, + { + "epoch": 1.9312901456869542, + "grad_norm": 0.07963887602090836, + "learning_rate": 3.590347932567684e-08, + "loss": 0.0008, + "step": 301120 + }, + { + "epoch": 1.9313542825807404, + "grad_norm": 0.1852940022945404, + "learning_rate": 3.5836556639700096e-08, + "loss": 0.0016, + "step": 301130 + }, + { + "epoch": 1.9314184194745265, + "grad_norm": 0.0025362635497003794, + "learning_rate": 3.5769696158238775e-08, + "loss": 0.0006, + "step": 301140 + }, + { + "epoch": 1.9314825563683127, + "grad_norm": 0.1968517005443573, + "learning_rate": 3.57028978821311e-08, + "loss": 0.0007, + "step": 301150 + }, + { + "epoch": 1.9315466932620986, + "grad_norm": 0.11529047787189484, + "learning_rate": 3.5636161812214186e-08, + "loss": 0.0012, + "step": 301160 + }, + { + "epoch": 1.9316108301558848, + "grad_norm": 0.022898223251104355, + "learning_rate": 3.5569487949324576e-08, + "loss": 0.0097, + "step": 301170 + }, + { + "epoch": 1.9316749670496707, + "grad_norm": 0.07718861103057861, + "learning_rate": 3.5502876294296605e-08, + "loss": 0.0006, + "step": 301180 + }, + { + "epoch": 1.9317391039434568, + "grad_norm": 0.06445500254631042, + "learning_rate": 3.543632684796572e-08, + "loss": 0.0009, + "step": 301190 + }, + { + "epoch": 1.931803240837243, + "grad_norm": 0.045879118144512177, + "learning_rate": 3.5369839611166244e-08, + "loss": 0.0008, + "step": 301200 + }, + { + "epoch": 1.9318673777310291, + "grad_norm": 0.12509427964687347, + "learning_rate": 3.530341458473085e-08, + "loss": 0.0009, + "step": 301210 + }, + { + "epoch": 1.9319315146248153, + "grad_norm": 0.041130181401968, + "learning_rate": 3.5237051769491104e-08, + "loss": 0.0008, + "step": 301220 + }, + { + "epoch": 1.9319956515186014, + "grad_norm": 0.09322613477706909, + "learning_rate": 3.517075116628021e-08, + "loss": 0.0006, + "step": 301230 + }, + { + "epoch": 1.9320597884123873, + "grad_norm": 0.15340624749660492, + "learning_rate": 3.510451277592752e-08, + "loss": 0.0011, + "step": 301240 + }, + { + "epoch": 1.9321239253061735, + "grad_norm": 0.005170913878828287, + "learning_rate": 3.503833659926403e-08, + "loss": 0.0013, + "step": 301250 + }, + { + "epoch": 1.9321880621999596, + "grad_norm": 0.04025174677371979, + "learning_rate": 3.4972222637118524e-08, + "loss": 0.0026, + "step": 301260 + }, + { + "epoch": 1.9322521990937456, + "grad_norm": 0.061025556176900864, + "learning_rate": 3.490617089031978e-08, + "loss": 0.005, + "step": 301270 + }, + { + "epoch": 1.9323163359875317, + "grad_norm": 0.04001593962311745, + "learning_rate": 3.484018135969491e-08, + "loss": 0.0011, + "step": 301280 + }, + { + "epoch": 1.9323804728813179, + "grad_norm": 0.11239849030971527, + "learning_rate": 3.477425404607104e-08, + "loss": 0.001, + "step": 301290 + }, + { + "epoch": 1.932444609775104, + "grad_norm": 0.023942064493894577, + "learning_rate": 3.4708388950274174e-08, + "loss": 0.0005, + "step": 301300 + }, + { + "epoch": 1.9325087466688902, + "grad_norm": 0.08422784507274628, + "learning_rate": 3.464258607313031e-08, + "loss": 0.0019, + "step": 301310 + }, + { + "epoch": 1.9325728835626763, + "grad_norm": 0.10249890387058258, + "learning_rate": 3.457684541546325e-08, + "loss": 0.0011, + "step": 301320 + }, + { + "epoch": 1.9326370204564622, + "grad_norm": 0.07031677663326263, + "learning_rate": 3.451116697809731e-08, + "loss": 0.0007, + "step": 301330 + }, + { + "epoch": 1.9327011573502484, + "grad_norm": 0.0049631018191576, + "learning_rate": 3.444555076185463e-08, + "loss": 0.0012, + "step": 301340 + }, + { + "epoch": 1.9327652942440343, + "grad_norm": 0.0954102948307991, + "learning_rate": 3.437999676755843e-08, + "loss": 0.0017, + "step": 301350 + }, + { + "epoch": 1.9328294311378205, + "grad_norm": 0.06614626944065094, + "learning_rate": 3.431450499602973e-08, + "loss": 0.0006, + "step": 301360 + }, + { + "epoch": 1.9328935680316066, + "grad_norm": 0.02178000658750534, + "learning_rate": 3.424907544808953e-08, + "loss": 0.001, + "step": 301370 + }, + { + "epoch": 1.9329577049253928, + "grad_norm": 0.024998584762215614, + "learning_rate": 3.418370812455718e-08, + "loss": 0.0009, + "step": 301380 + }, + { + "epoch": 1.933021841819179, + "grad_norm": 0.03699403628706932, + "learning_rate": 3.4118403026251464e-08, + "loss": 0.0008, + "step": 301390 + }, + { + "epoch": 1.933085978712965, + "grad_norm": 0.039904557168483734, + "learning_rate": 3.405316015399174e-08, + "loss": 0.0025, + "step": 301400 + }, + { + "epoch": 1.9331501156067512, + "grad_norm": 0.05057976767420769, + "learning_rate": 3.398797950859511e-08, + "loss": 0.0009, + "step": 301410 + }, + { + "epoch": 1.9332142525005371, + "grad_norm": 0.003374518360942602, + "learning_rate": 3.392286109087817e-08, + "loss": 0.001, + "step": 301420 + }, + { + "epoch": 1.9332783893943233, + "grad_norm": 0.07602231949567795, + "learning_rate": 3.385780490165691e-08, + "loss": 0.0019, + "step": 301430 + }, + { + "epoch": 1.9333425262881092, + "grad_norm": 0.08708921074867249, + "learning_rate": 3.379281094174625e-08, + "loss": 0.0007, + "step": 301440 + }, + { + "epoch": 1.9334066631818954, + "grad_norm": 0.02393578365445137, + "learning_rate": 3.372787921196108e-08, + "loss": 0.0006, + "step": 301450 + }, + { + "epoch": 1.9334708000756815, + "grad_norm": 0.12386545538902283, + "learning_rate": 3.36630097131152e-08, + "loss": 0.0006, + "step": 301460 + }, + { + "epoch": 1.9335349369694677, + "grad_norm": 0.07013312727212906, + "learning_rate": 3.35982024460213e-08, + "loss": 0.0007, + "step": 301470 + }, + { + "epoch": 1.9335990738632538, + "grad_norm": 0.040222447365522385, + "learning_rate": 3.35334574114915e-08, + "loss": 0.0005, + "step": 301480 + }, + { + "epoch": 1.93366321075704, + "grad_norm": 0.030433671548962593, + "learning_rate": 3.346877461033626e-08, + "loss": 0.0017, + "step": 301490 + }, + { + "epoch": 1.9337273476508259, + "grad_norm": 0.16435152292251587, + "learning_rate": 3.340415404336716e-08, + "loss": 0.0015, + "step": 301500 + }, + { + "epoch": 1.933791484544612, + "grad_norm": 0.056320659816265106, + "learning_rate": 3.333959571139356e-08, + "loss": 0.0012, + "step": 301510 + }, + { + "epoch": 1.933855621438398, + "grad_norm": 0.18155111372470856, + "learning_rate": 3.327509961522479e-08, + "loss": 0.0015, + "step": 301520 + }, + { + "epoch": 1.933919758332184, + "grad_norm": 0.026386840268969536, + "learning_rate": 3.3210665755668004e-08, + "loss": 0.0021, + "step": 301530 + }, + { + "epoch": 1.9339838952259703, + "grad_norm": 0.024649258702993393, + "learning_rate": 3.3146294133531984e-08, + "loss": 0.0008, + "step": 301540 + }, + { + "epoch": 1.9340480321197564, + "grad_norm": 0.0012394103687256575, + "learning_rate": 3.308198474962221e-08, + "loss": 0.0015, + "step": 301550 + }, + { + "epoch": 1.9341121690135425, + "grad_norm": 0.0007696148240938783, + "learning_rate": 3.301773760474525e-08, + "loss": 0.0009, + "step": 301560 + }, + { + "epoch": 1.9341763059073287, + "grad_norm": 0.058741334825754166, + "learning_rate": 3.295355269970546e-08, + "loss": 0.0019, + "step": 301570 + }, + { + "epoch": 1.9342404428011148, + "grad_norm": 0.20647259056568146, + "learning_rate": 3.288943003530831e-08, + "loss": 0.001, + "step": 301580 + }, + { + "epoch": 1.9343045796949008, + "grad_norm": 0.03462643548846245, + "learning_rate": 3.282536961235594e-08, + "loss": 0.0007, + "step": 301590 + }, + { + "epoch": 1.934368716588687, + "grad_norm": 0.0032162105198949575, + "learning_rate": 3.276137143165159e-08, + "loss": 0.0011, + "step": 301600 + }, + { + "epoch": 1.9344328534824728, + "grad_norm": 0.059601809829473495, + "learning_rate": 3.26974354939974e-08, + "loss": 0.0008, + "step": 301610 + }, + { + "epoch": 1.934496990376259, + "grad_norm": 0.012852217070758343, + "learning_rate": 3.2633561800194945e-08, + "loss": 0.0011, + "step": 301620 + }, + { + "epoch": 1.9345611272700451, + "grad_norm": 0.13055984675884247, + "learning_rate": 3.256975035104304e-08, + "loss": 0.0006, + "step": 301630 + }, + { + "epoch": 1.9346252641638313, + "grad_norm": 0.14420166611671448, + "learning_rate": 3.250600114734326e-08, + "loss": 0.0011, + "step": 301640 + }, + { + "epoch": 1.9346894010576174, + "grad_norm": 0.25341367721557617, + "learning_rate": 3.24423141898933e-08, + "loss": 0.0013, + "step": 301650 + }, + { + "epoch": 1.9347535379514036, + "grad_norm": 0.02456929162144661, + "learning_rate": 3.237868947949141e-08, + "loss": 0.0008, + "step": 301660 + }, + { + "epoch": 1.9348176748451897, + "grad_norm": 0.10025903582572937, + "learning_rate": 3.231512701693418e-08, + "loss": 0.0005, + "step": 301670 + }, + { + "epoch": 1.9348818117389757, + "grad_norm": 0.04823238030076027, + "learning_rate": 3.22516268030193e-08, + "loss": 0.0007, + "step": 301680 + }, + { + "epoch": 1.9349459486327618, + "grad_norm": 0.011798826046288013, + "learning_rate": 3.2188188838542246e-08, + "loss": 0.0012, + "step": 301690 + }, + { + "epoch": 1.9350100855265477, + "grad_norm": 0.04304208979010582, + "learning_rate": 3.212481312429738e-08, + "loss": 0.0008, + "step": 301700 + }, + { + "epoch": 1.935074222420334, + "grad_norm": 0.0785306766629219, + "learning_rate": 3.2061499661079075e-08, + "loss": 0.0016, + "step": 301710 + }, + { + "epoch": 1.93513835931412, + "grad_norm": 0.012932732701301575, + "learning_rate": 3.199824844968058e-08, + "loss": 0.0005, + "step": 301720 + }, + { + "epoch": 1.9352024962079062, + "grad_norm": 0.0498775988817215, + "learning_rate": 3.193505949089459e-08, + "loss": 0.0009, + "step": 301730 + }, + { + "epoch": 1.9352666331016923, + "grad_norm": 0.20959006249904633, + "learning_rate": 3.187193278551326e-08, + "loss": 0.0014, + "step": 301740 + }, + { + "epoch": 1.9353307699954785, + "grad_norm": 0.057461485266685486, + "learning_rate": 3.1808868334327056e-08, + "loss": 0.0006, + "step": 301750 + }, + { + "epoch": 1.9353949068892644, + "grad_norm": 0.052913159132003784, + "learning_rate": 3.1745866138126466e-08, + "loss": 0.0019, + "step": 301760 + }, + { + "epoch": 1.9354590437830506, + "grad_norm": 0.12710267305374146, + "learning_rate": 3.168292619770086e-08, + "loss": 0.0013, + "step": 301770 + }, + { + "epoch": 1.9355231806768365, + "grad_norm": 0.1411878615617752, + "learning_rate": 3.1620048513838485e-08, + "loss": 0.0011, + "step": 301780 + }, + { + "epoch": 1.9355873175706226, + "grad_norm": 0.14976029098033905, + "learning_rate": 3.155723308732872e-08, + "loss": 0.0024, + "step": 301790 + }, + { + "epoch": 1.9356514544644088, + "grad_norm": 0.10860023647546768, + "learning_rate": 3.1494479918957044e-08, + "loss": 0.0011, + "step": 301800 + }, + { + "epoch": 1.935715591358195, + "grad_norm": 0.09502911567687988, + "learning_rate": 3.143178900951061e-08, + "loss": 0.0012, + "step": 301810 + }, + { + "epoch": 1.935779728251981, + "grad_norm": 0.05954446643590927, + "learning_rate": 3.136916035977489e-08, + "loss": 0.0013, + "step": 301820 + }, + { + "epoch": 1.9358438651457672, + "grad_norm": 0.05650576576590538, + "learning_rate": 3.1306593970534814e-08, + "loss": 0.0008, + "step": 301830 + }, + { + "epoch": 1.9359080020395534, + "grad_norm": 0.13886955380439758, + "learning_rate": 3.124408984257421e-08, + "loss": 0.0015, + "step": 301840 + }, + { + "epoch": 1.9359721389333393, + "grad_norm": 0.0654849037528038, + "learning_rate": 3.1181647976676334e-08, + "loss": 0.0012, + "step": 301850 + }, + { + "epoch": 1.9360362758271255, + "grad_norm": 0.02531741000711918, + "learning_rate": 3.1119268373623336e-08, + "loss": 0.0008, + "step": 301860 + }, + { + "epoch": 1.9361004127209114, + "grad_norm": 0.03234170749783516, + "learning_rate": 3.105695103419682e-08, + "loss": 0.0007, + "step": 301870 + }, + { + "epoch": 1.9361645496146975, + "grad_norm": 0.07435993105173111, + "learning_rate": 3.099469595917837e-08, + "loss": 0.0007, + "step": 301880 + }, + { + "epoch": 1.9362286865084837, + "grad_norm": 0.06573089957237244, + "learning_rate": 3.093250314934737e-08, + "loss": 0.0011, + "step": 301890 + }, + { + "epoch": 1.9362928234022698, + "grad_norm": 0.0297558456659317, + "learning_rate": 3.087037260548376e-08, + "loss": 0.0005, + "step": 301900 + }, + { + "epoch": 1.936356960296056, + "grad_norm": 0.03819414973258972, + "learning_rate": 3.080830432836579e-08, + "loss": 0.0006, + "step": 301910 + }, + { + "epoch": 1.9364210971898421, + "grad_norm": 0.07690879702568054, + "learning_rate": 3.074629831877119e-08, + "loss": 0.0007, + "step": 301920 + }, + { + "epoch": 1.936485234083628, + "grad_norm": 0.06591645628213882, + "learning_rate": 3.0684354577476536e-08, + "loss": 0.001, + "step": 301930 + }, + { + "epoch": 1.9365493709774142, + "grad_norm": 0.08025170862674713, + "learning_rate": 3.062247310525845e-08, + "loss": 0.0009, + "step": 301940 + }, + { + "epoch": 1.9366135078712001, + "grad_norm": 0.044352661818265915, + "learning_rate": 3.056065390289298e-08, + "loss": 0.0008, + "step": 301950 + }, + { + "epoch": 1.9366776447649863, + "grad_norm": 0.02093079872429371, + "learning_rate": 3.049889697115393e-08, + "loss": 0.0016, + "step": 301960 + }, + { + "epoch": 1.9367417816587724, + "grad_norm": 0.0008874760824255645, + "learning_rate": 3.043720231081515e-08, + "loss": 0.0017, + "step": 301970 + }, + { + "epoch": 1.9368059185525586, + "grad_norm": 0.03476732224225998, + "learning_rate": 3.037556992264934e-08, + "loss": 0.0008, + "step": 301980 + }, + { + "epoch": 1.9368700554463447, + "grad_norm": 0.027935517951846123, + "learning_rate": 3.0313999807430324e-08, + "loss": 0.0014, + "step": 301990 + }, + { + "epoch": 1.9369341923401309, + "grad_norm": 0.06478878855705261, + "learning_rate": 3.0252491965928057e-08, + "loss": 0.0009, + "step": 302000 + }, + { + "epoch": 1.936998329233917, + "grad_norm": 0.0036114610265940428, + "learning_rate": 3.019104639891468e-08, + "loss": 0.0009, + "step": 302010 + }, + { + "epoch": 1.937062466127703, + "grad_norm": 0.007749971468001604, + "learning_rate": 3.012966310715848e-08, + "loss": 0.0007, + "step": 302020 + }, + { + "epoch": 1.937126603021489, + "grad_norm": 0.0017212193924933672, + "learning_rate": 3.0068342091429945e-08, + "loss": 0.0007, + "step": 302030 + }, + { + "epoch": 1.937190739915275, + "grad_norm": 0.02803710475564003, + "learning_rate": 3.00070833524968e-08, + "loss": 0.0013, + "step": 302040 + }, + { + "epoch": 1.9372548768090612, + "grad_norm": 0.11851103603839874, + "learning_rate": 2.994588689112676e-08, + "loss": 0.0007, + "step": 302050 + }, + { + "epoch": 1.9373190137028473, + "grad_norm": 0.10424335300922394, + "learning_rate": 2.988475270808755e-08, + "loss": 0.0014, + "step": 302060 + }, + { + "epoch": 1.9373831505966335, + "grad_norm": 0.16494068503379822, + "learning_rate": 2.982368080414411e-08, + "loss": 0.0007, + "step": 302070 + }, + { + "epoch": 1.9374472874904196, + "grad_norm": 0.032014600932598114, + "learning_rate": 2.976267118006193e-08, + "loss": 0.0011, + "step": 302080 + }, + { + "epoch": 1.9375114243842058, + "grad_norm": 0.06066601350903511, + "learning_rate": 2.970172383660541e-08, + "loss": 0.0009, + "step": 302090 + }, + { + "epoch": 1.937575561277992, + "grad_norm": 0.0030617748852819204, + "learning_rate": 2.964083877453894e-08, + "loss": 0.0005, + "step": 302100 + }, + { + "epoch": 1.9376396981717778, + "grad_norm": 0.1492859423160553, + "learning_rate": 2.9580015994625234e-08, + "loss": 0.0009, + "step": 302110 + }, + { + "epoch": 1.937703835065564, + "grad_norm": 0.10125358402729034, + "learning_rate": 2.951925549762591e-08, + "loss": 0.0025, + "step": 302120 + }, + { + "epoch": 1.93776797195935, + "grad_norm": 0.11417628824710846, + "learning_rate": 2.9458557284302027e-08, + "loss": 0.0013, + "step": 302130 + }, + { + "epoch": 1.937832108853136, + "grad_norm": 0.041539266705513, + "learning_rate": 2.9397921355415747e-08, + "loss": 0.0053, + "step": 302140 + }, + { + "epoch": 1.9378962457469222, + "grad_norm": 0.05827486515045166, + "learning_rate": 2.9337347711725917e-08, + "loss": 0.0009, + "step": 302150 + }, + { + "epoch": 1.9379603826407084, + "grad_norm": 0.30581918358802795, + "learning_rate": 2.9276836353991366e-08, + "loss": 0.002, + "step": 302160 + }, + { + "epoch": 1.9380245195344945, + "grad_norm": 0.02726643532514572, + "learning_rate": 2.921638728297038e-08, + "loss": 0.0011, + "step": 302170 + }, + { + "epoch": 1.9380886564282807, + "grad_norm": 0.025319676846265793, + "learning_rate": 2.915600049942069e-08, + "loss": 0.0004, + "step": 302180 + }, + { + "epoch": 1.9381527933220666, + "grad_norm": 0.0578426830470562, + "learning_rate": 2.9095676004098905e-08, + "loss": 0.0005, + "step": 302190 + }, + { + "epoch": 1.9382169302158527, + "grad_norm": 0.007352834101766348, + "learning_rate": 2.903541379776109e-08, + "loss": 0.001, + "step": 302200 + }, + { + "epoch": 1.9382810671096387, + "grad_norm": 0.020987290889024734, + "learning_rate": 2.8975213881162202e-08, + "loss": 0.0009, + "step": 302210 + }, + { + "epoch": 1.9383452040034248, + "grad_norm": 0.14074473083019257, + "learning_rate": 2.8915076255056628e-08, + "loss": 0.0008, + "step": 302220 + }, + { + "epoch": 1.938409340897211, + "grad_norm": 0.0046698665246367455, + "learning_rate": 2.8855000920198217e-08, + "loss": 0.0014, + "step": 302230 + }, + { + "epoch": 1.938473477790997, + "grad_norm": 0.08994993567466736, + "learning_rate": 2.8794987877338588e-08, + "loss": 0.0011, + "step": 302240 + }, + { + "epoch": 1.9385376146847833, + "grad_norm": 0.03455111384391785, + "learning_rate": 2.873503712723158e-08, + "loss": 0.0012, + "step": 302250 + }, + { + "epoch": 1.9386017515785694, + "grad_norm": 0.014766179956495762, + "learning_rate": 2.867514867062715e-08, + "loss": 0.0009, + "step": 302260 + }, + { + "epoch": 1.9386658884723555, + "grad_norm": 0.006934038363397121, + "learning_rate": 2.861532250827581e-08, + "loss": 0.0006, + "step": 302270 + }, + { + "epoch": 1.9387300253661415, + "grad_norm": 0.027285199612379074, + "learning_rate": 2.8555558640927516e-08, + "loss": 0.0008, + "step": 302280 + }, + { + "epoch": 1.9387941622599276, + "grad_norm": 0.07449705898761749, + "learning_rate": 2.8495857069331668e-08, + "loss": 0.0005, + "step": 302290 + }, + { + "epoch": 1.9388582991537135, + "grad_norm": 0.16933131217956543, + "learning_rate": 2.8436217794235442e-08, + "loss": 0.0011, + "step": 302300 + }, + { + "epoch": 1.9389224360474997, + "grad_norm": 0.10641875863075256, + "learning_rate": 2.837664081638658e-08, + "loss": 0.0012, + "step": 302310 + }, + { + "epoch": 1.9389865729412858, + "grad_norm": 0.13166961073875427, + "learning_rate": 2.8317126136531148e-08, + "loss": 0.0023, + "step": 302320 + }, + { + "epoch": 1.939050709835072, + "grad_norm": 0.10253006219863892, + "learning_rate": 2.825767375541577e-08, + "loss": 0.001, + "step": 302330 + }, + { + "epoch": 1.9391148467288581, + "grad_norm": 0.06676961481571198, + "learning_rate": 2.8198283673784855e-08, + "loss": 0.0006, + "step": 302340 + }, + { + "epoch": 1.9391789836226443, + "grad_norm": 0.020916448906064034, + "learning_rate": 2.8138955892382804e-08, + "loss": 0.0009, + "step": 302350 + }, + { + "epoch": 1.9392431205164302, + "grad_norm": 0.03495674952864647, + "learning_rate": 2.807969041195291e-08, + "loss": 0.0009, + "step": 302360 + }, + { + "epoch": 1.9393072574102164, + "grad_norm": 0.10187196731567383, + "learning_rate": 2.8020487233237916e-08, + "loss": 0.0017, + "step": 302370 + }, + { + "epoch": 1.9393713943040025, + "grad_norm": 0.08674407750368118, + "learning_rate": 2.796134635698e-08, + "loss": 0.0004, + "step": 302380 + }, + { + "epoch": 1.9394355311977884, + "grad_norm": 0.01654968410730362, + "learning_rate": 2.7902267783919136e-08, + "loss": 0.0011, + "step": 302390 + }, + { + "epoch": 1.9394996680915746, + "grad_norm": 0.42116132378578186, + "learning_rate": 2.7843251514796943e-08, + "loss": 0.0007, + "step": 302400 + }, + { + "epoch": 1.9395638049853607, + "grad_norm": 0.17139796912670135, + "learning_rate": 2.778429755035228e-08, + "loss": 0.0013, + "step": 302410 + }, + { + "epoch": 1.9396279418791469, + "grad_norm": 0.032461978495121, + "learning_rate": 2.772540589132344e-08, + "loss": 0.0008, + "step": 302420 + }, + { + "epoch": 1.939692078772933, + "grad_norm": 0.051237910985946655, + "learning_rate": 2.7666576538449285e-08, + "loss": 0.0009, + "step": 302430 + }, + { + "epoch": 1.9397562156667192, + "grad_norm": 0.09664763510227203, + "learning_rate": 2.7607809492466997e-08, + "loss": 0.0006, + "step": 302440 + }, + { + "epoch": 1.9398203525605051, + "grad_norm": 0.0451032929122448, + "learning_rate": 2.75491047541121e-08, + "loss": 0.0006, + "step": 302450 + }, + { + "epoch": 1.9398844894542913, + "grad_norm": 0.04460052028298378, + "learning_rate": 2.749046232412067e-08, + "loss": 0.003, + "step": 302460 + }, + { + "epoch": 1.9399486263480772, + "grad_norm": 0.0648808553814888, + "learning_rate": 2.7431882203227678e-08, + "loss": 0.0008, + "step": 302470 + }, + { + "epoch": 1.9400127632418633, + "grad_norm": 0.09854970127344131, + "learning_rate": 2.737336439216698e-08, + "loss": 0.0006, + "step": 302480 + }, + { + "epoch": 1.9400769001356495, + "grad_norm": 0.08210846781730652, + "learning_rate": 2.7314908891671875e-08, + "loss": 0.0008, + "step": 302490 + }, + { + "epoch": 1.9401410370294356, + "grad_norm": 0.04816485196352005, + "learning_rate": 2.725651570247456e-08, + "loss": 0.0005, + "step": 302500 + }, + { + "epoch": 1.9402051739232218, + "grad_norm": 0.05821401998400688, + "learning_rate": 2.7198184825307782e-08, + "loss": 0.0013, + "step": 302510 + }, + { + "epoch": 1.940269310817008, + "grad_norm": 0.07066737115383148, + "learning_rate": 2.713991626090151e-08, + "loss": 0.0012, + "step": 302520 + }, + { + "epoch": 1.940333447710794, + "grad_norm": 0.048833467066287994, + "learning_rate": 2.708171000998572e-08, + "loss": 0.001, + "step": 302530 + }, + { + "epoch": 1.94039758460458, + "grad_norm": 0.19674324989318848, + "learning_rate": 2.7023566073290374e-08, + "loss": 0.0008, + "step": 302540 + }, + { + "epoch": 1.9404617214983662, + "grad_norm": 0.15980391204357147, + "learning_rate": 2.6965484451544343e-08, + "loss": 0.0011, + "step": 302550 + }, + { + "epoch": 1.940525858392152, + "grad_norm": 0.029844066128134727, + "learning_rate": 2.690746514547482e-08, + "loss": 0.0005, + "step": 302560 + }, + { + "epoch": 1.9405899952859382, + "grad_norm": 0.04482239857316017, + "learning_rate": 2.6849508155808446e-08, + "loss": 0.0017, + "step": 302570 + }, + { + "epoch": 1.9406541321797244, + "grad_norm": 0.04891893267631531, + "learning_rate": 2.6791613483272416e-08, + "loss": 0.0007, + "step": 302580 + }, + { + "epoch": 1.9407182690735105, + "grad_norm": 0.07550440728664398, + "learning_rate": 2.6733781128591707e-08, + "loss": 0.0019, + "step": 302590 + }, + { + "epoch": 1.9407824059672967, + "grad_norm": 0.07568196952342987, + "learning_rate": 2.667601109249074e-08, + "loss": 0.0011, + "step": 302600 + }, + { + "epoch": 1.9408465428610828, + "grad_norm": 0.05623389407992363, + "learning_rate": 2.6618303375694487e-08, + "loss": 0.0011, + "step": 302610 + }, + { + "epoch": 1.9409106797548688, + "grad_norm": 0.07222924381494522, + "learning_rate": 2.6560657978924598e-08, + "loss": 0.0013, + "step": 302620 + }, + { + "epoch": 1.940974816648655, + "grad_norm": 0.05307445302605629, + "learning_rate": 2.650307490290438e-08, + "loss": 0.0005, + "step": 302630 + }, + { + "epoch": 1.9410389535424408, + "grad_norm": 0.1193118616938591, + "learning_rate": 2.644555414835548e-08, + "loss": 0.0013, + "step": 302640 + }, + { + "epoch": 1.941103090436227, + "grad_norm": 0.01188894547522068, + "learning_rate": 2.6388095715997875e-08, + "loss": 0.001, + "step": 302650 + }, + { + "epoch": 1.9411672273300131, + "grad_norm": 0.0663500651717186, + "learning_rate": 2.63306996065521e-08, + "loss": 0.0022, + "step": 302660 + }, + { + "epoch": 1.9412313642237993, + "grad_norm": 0.10533638298511505, + "learning_rate": 2.6273365820737028e-08, + "loss": 0.0005, + "step": 302670 + }, + { + "epoch": 1.9412955011175854, + "grad_norm": 0.061424218118190765, + "learning_rate": 2.6216094359272083e-08, + "loss": 0.0007, + "step": 302680 + }, + { + "epoch": 1.9413596380113716, + "grad_norm": 0.058211881667375565, + "learning_rate": 2.615888522287391e-08, + "loss": 0.0022, + "step": 302690 + }, + { + "epoch": 1.9414237749051577, + "grad_norm": 0.38756659626960754, + "learning_rate": 2.6101738412259158e-08, + "loss": 0.0033, + "step": 302700 + }, + { + "epoch": 1.9414879117989436, + "grad_norm": 0.10443129390478134, + "learning_rate": 2.6044653928144483e-08, + "loss": 0.0023, + "step": 302710 + }, + { + "epoch": 1.9415520486927298, + "grad_norm": 0.0632278099656105, + "learning_rate": 2.598763177124597e-08, + "loss": 0.0009, + "step": 302720 + }, + { + "epoch": 1.9416161855865157, + "grad_norm": 0.15798984467983246, + "learning_rate": 2.5930671942276386e-08, + "loss": 0.0013, + "step": 302730 + }, + { + "epoch": 1.9416803224803019, + "grad_norm": 0.11811117082834244, + "learning_rate": 2.5873774441950718e-08, + "loss": 0.0019, + "step": 302740 + }, + { + "epoch": 1.941744459374088, + "grad_norm": 0.11421682685613632, + "learning_rate": 2.581693927098172e-08, + "loss": 0.0007, + "step": 302750 + }, + { + "epoch": 1.9418085962678742, + "grad_norm": 0.11522186547517776, + "learning_rate": 2.576016643008161e-08, + "loss": 0.0011, + "step": 302760 + }, + { + "epoch": 1.9418727331616603, + "grad_norm": 0.12734942138195038, + "learning_rate": 2.570345591996093e-08, + "loss": 0.0007, + "step": 302770 + }, + { + "epoch": 1.9419368700554465, + "grad_norm": 0.16883651912212372, + "learning_rate": 2.564680774133188e-08, + "loss": 0.0006, + "step": 302780 + }, + { + "epoch": 1.9420010069492324, + "grad_norm": 0.26879411935806274, + "learning_rate": 2.5590221894903346e-08, + "loss": 0.001, + "step": 302790 + }, + { + "epoch": 1.9420651438430185, + "grad_norm": 0.027143623679876328, + "learning_rate": 2.5533698381384196e-08, + "loss": 0.0009, + "step": 302800 + }, + { + "epoch": 1.9421292807368047, + "grad_norm": 0.21365833282470703, + "learning_rate": 2.5477237201482764e-08, + "loss": 0.0012, + "step": 302810 + }, + { + "epoch": 1.9421934176305906, + "grad_norm": 0.1140991821885109, + "learning_rate": 2.5420838355907364e-08, + "loss": 0.001, + "step": 302820 + }, + { + "epoch": 1.9422575545243768, + "grad_norm": 0.09537651389837265, + "learning_rate": 2.5364501845363543e-08, + "loss": 0.0012, + "step": 302830 + }, + { + "epoch": 1.942321691418163, + "grad_norm": 0.002741380361840129, + "learning_rate": 2.5308227670558517e-08, + "loss": 0.0013, + "step": 302840 + }, + { + "epoch": 1.942385828311949, + "grad_norm": 0.029889706522226334, + "learning_rate": 2.5252015832196164e-08, + "loss": 0.0009, + "step": 302850 + }, + { + "epoch": 1.9424499652057352, + "grad_norm": 0.011436429806053638, + "learning_rate": 2.519586633098148e-08, + "loss": 0.0024, + "step": 302860 + }, + { + "epoch": 1.9425141020995214, + "grad_norm": 0.09918776899576187, + "learning_rate": 2.513977916761834e-08, + "loss": 0.001, + "step": 302870 + }, + { + "epoch": 1.9425782389933073, + "grad_norm": 0.03670860826969147, + "learning_rate": 2.5083754342808963e-08, + "loss": 0.0009, + "step": 302880 + }, + { + "epoch": 1.9426423758870934, + "grad_norm": 0.14740601181983948, + "learning_rate": 2.5027791857255567e-08, + "loss": 0.0017, + "step": 302890 + }, + { + "epoch": 1.9427065127808794, + "grad_norm": 0.0429193489253521, + "learning_rate": 2.4971891711659813e-08, + "loss": 0.0009, + "step": 302900 + }, + { + "epoch": 1.9427706496746655, + "grad_norm": 0.0554153174161911, + "learning_rate": 2.4916053906722247e-08, + "loss": 0.0004, + "step": 302910 + }, + { + "epoch": 1.9428347865684517, + "grad_norm": 0.22062762081623077, + "learning_rate": 2.4860278443141206e-08, + "loss": 0.0007, + "step": 302920 + }, + { + "epoch": 1.9428989234622378, + "grad_norm": 0.02430759370326996, + "learning_rate": 2.4804565321617235e-08, + "loss": 0.0011, + "step": 302930 + }, + { + "epoch": 1.942963060356024, + "grad_norm": 0.059388384222984314, + "learning_rate": 2.474891454284756e-08, + "loss": 0.0015, + "step": 302940 + }, + { + "epoch": 1.94302719724981, + "grad_norm": 0.020595358684659004, + "learning_rate": 2.4693326107529945e-08, + "loss": 0.001, + "step": 302950 + }, + { + "epoch": 1.9430913341435962, + "grad_norm": 0.10308534651994705, + "learning_rate": 2.4637800016360512e-08, + "loss": 0.0016, + "step": 302960 + }, + { + "epoch": 1.9431554710373822, + "grad_norm": 0.1445896029472351, + "learning_rate": 2.4582336270035367e-08, + "loss": 0.0012, + "step": 302970 + }, + { + "epoch": 1.9432196079311683, + "grad_norm": 0.1349262297153473, + "learning_rate": 2.4526934869249507e-08, + "loss": 0.001, + "step": 302980 + }, + { + "epoch": 1.9432837448249543, + "grad_norm": 0.024352222681045532, + "learning_rate": 2.4471595814696825e-08, + "loss": 0.0012, + "step": 302990 + }, + { + "epoch": 1.9433478817187404, + "grad_norm": 0.06732741743326187, + "learning_rate": 2.4416319107071206e-08, + "loss": 0.0015, + "step": 303000 + }, + { + "epoch": 1.9434120186125265, + "grad_norm": 0.1403336524963379, + "learning_rate": 2.436110474706488e-08, + "loss": 0.0015, + "step": 303010 + }, + { + "epoch": 1.9434761555063127, + "grad_norm": 0.005384480115026236, + "learning_rate": 2.430595273537062e-08, + "loss": 0.0008, + "step": 303020 + }, + { + "epoch": 1.9435402924000988, + "grad_norm": 0.032813455909490585, + "learning_rate": 2.4250863072678434e-08, + "loss": 0.0015, + "step": 303030 + }, + { + "epoch": 1.943604429293885, + "grad_norm": 0.08630481362342834, + "learning_rate": 2.419583575967832e-08, + "loss": 0.0015, + "step": 303040 + }, + { + "epoch": 1.943668566187671, + "grad_norm": 0.045145533978939056, + "learning_rate": 2.41408707970614e-08, + "loss": 0.0012, + "step": 303050 + }, + { + "epoch": 1.943732703081457, + "grad_norm": 0.04118989035487175, + "learning_rate": 2.408596818551545e-08, + "loss": 0.0013, + "step": 303060 + }, + { + "epoch": 1.943796839975243, + "grad_norm": 0.10800248384475708, + "learning_rate": 2.403112792572826e-08, + "loss": 0.0012, + "step": 303070 + }, + { + "epoch": 1.9438609768690291, + "grad_norm": 0.06161636859178543, + "learning_rate": 2.3976350018387608e-08, + "loss": 0.0016, + "step": 303080 + }, + { + "epoch": 1.9439251137628153, + "grad_norm": 0.06434313952922821, + "learning_rate": 2.392163446417961e-08, + "loss": 0.0005, + "step": 303090 + }, + { + "epoch": 1.9439892506566014, + "grad_norm": 0.028090180829167366, + "learning_rate": 2.3866981263789835e-08, + "loss": 0.0007, + "step": 303100 + }, + { + "epoch": 1.9440533875503876, + "grad_norm": 0.01200695801526308, + "learning_rate": 2.381239041790273e-08, + "loss": 0.0012, + "step": 303110 + }, + { + "epoch": 1.9441175244441737, + "grad_norm": 0.17337866127490997, + "learning_rate": 2.3757861927203308e-08, + "loss": 0.0009, + "step": 303120 + }, + { + "epoch": 1.9441816613379599, + "grad_norm": 0.0266607403755188, + "learning_rate": 2.3703395792374352e-08, + "loss": 0.0008, + "step": 303130 + }, + { + "epoch": 1.9442457982317458, + "grad_norm": 0.11554598808288574, + "learning_rate": 2.3648992014098092e-08, + "loss": 0.0011, + "step": 303140 + }, + { + "epoch": 1.944309935125532, + "grad_norm": 0.059785395860672, + "learning_rate": 2.3594650593056767e-08, + "loss": 0.0012, + "step": 303150 + }, + { + "epoch": 1.944374072019318, + "grad_norm": 0.027033396065235138, + "learning_rate": 2.354037152993094e-08, + "loss": 0.0005, + "step": 303160 + }, + { + "epoch": 1.944438208913104, + "grad_norm": 0.04057193920016289, + "learning_rate": 2.348615482540062e-08, + "loss": 0.0023, + "step": 303170 + }, + { + "epoch": 1.9445023458068902, + "grad_norm": 0.05944840610027313, + "learning_rate": 2.3432000480145822e-08, + "loss": 0.0007, + "step": 303180 + }, + { + "epoch": 1.9445664827006763, + "grad_norm": 0.026654450222849846, + "learning_rate": 2.3377908494844337e-08, + "loss": 0.0016, + "step": 303190 + }, + { + "epoch": 1.9446306195944625, + "grad_norm": 0.05117101967334747, + "learning_rate": 2.3323878870175064e-08, + "loss": 0.0006, + "step": 303200 + }, + { + "epoch": 1.9446947564882486, + "grad_norm": 0.07699143141508102, + "learning_rate": 2.3269911606813577e-08, + "loss": 0.0009, + "step": 303210 + }, + { + "epoch": 1.9447588933820348, + "grad_norm": 0.07043943554162979, + "learning_rate": 2.321600670543711e-08, + "loss": 0.0011, + "step": 303220 + }, + { + "epoch": 1.9448230302758207, + "grad_norm": 0.11004768311977386, + "learning_rate": 2.3162164166721235e-08, + "loss": 0.0009, + "step": 303230 + }, + { + "epoch": 1.9448871671696069, + "grad_norm": 0.02211976796388626, + "learning_rate": 2.310838399134041e-08, + "loss": 0.0015, + "step": 303240 + }, + { + "epoch": 1.9449513040633928, + "grad_norm": 0.11930811405181885, + "learning_rate": 2.3054666179968544e-08, + "loss": 0.0007, + "step": 303250 + }, + { + "epoch": 1.945015440957179, + "grad_norm": 0.0007218879763968289, + "learning_rate": 2.3001010733277873e-08, + "loss": 0.0005, + "step": 303260 + }, + { + "epoch": 1.945079577850965, + "grad_norm": 0.020383605733513832, + "learning_rate": 2.2947417651942305e-08, + "loss": 0.0007, + "step": 303270 + }, + { + "epoch": 1.9451437147447512, + "grad_norm": 0.03223626688122749, + "learning_rate": 2.2893886936632413e-08, + "loss": 0.0028, + "step": 303280 + }, + { + "epoch": 1.9452078516385374, + "grad_norm": 0.08068500459194183, + "learning_rate": 2.284041858801933e-08, + "loss": 0.0011, + "step": 303290 + }, + { + "epoch": 1.9452719885323235, + "grad_norm": 0.06491301208734512, + "learning_rate": 2.278701260677252e-08, + "loss": 0.0007, + "step": 303300 + }, + { + "epoch": 1.9453361254261095, + "grad_norm": 0.07721704989671707, + "learning_rate": 2.2733668993561998e-08, + "loss": 0.0011, + "step": 303310 + }, + { + "epoch": 1.9454002623198956, + "grad_norm": 0.03412219136953354, + "learning_rate": 2.268038774905612e-08, + "loss": 0.0021, + "step": 303320 + }, + { + "epoch": 1.9454643992136815, + "grad_norm": 0.17845505475997925, + "learning_rate": 2.262716887392158e-08, + "loss": 0.0007, + "step": 303330 + }, + { + "epoch": 1.9455285361074677, + "grad_norm": 0.040481481701135635, + "learning_rate": 2.2574012368825616e-08, + "loss": 0.0011, + "step": 303340 + }, + { + "epoch": 1.9455926730012538, + "grad_norm": 0.10173839330673218, + "learning_rate": 2.2520918234435473e-08, + "loss": 0.0014, + "step": 303350 + }, + { + "epoch": 1.94565680989504, + "grad_norm": 0.003593247616663575, + "learning_rate": 2.2467886471415068e-08, + "loss": 0.0018, + "step": 303360 + }, + { + "epoch": 1.9457209467888261, + "grad_norm": 0.017439965158700943, + "learning_rate": 2.241491708042942e-08, + "loss": 0.0011, + "step": 303370 + }, + { + "epoch": 1.9457850836826123, + "grad_norm": 0.12718206644058228, + "learning_rate": 2.2362010062142448e-08, + "loss": 0.0011, + "step": 303380 + }, + { + "epoch": 1.9458492205763984, + "grad_norm": 0.0425318107008934, + "learning_rate": 2.230916541721695e-08, + "loss": 0.0003, + "step": 303390 + }, + { + "epoch": 1.9459133574701843, + "grad_norm": 0.03548678010702133, + "learning_rate": 2.2256383146315175e-08, + "loss": 0.0006, + "step": 303400 + }, + { + "epoch": 1.9459774943639705, + "grad_norm": 0.0281289741396904, + "learning_rate": 2.2203663250098263e-08, + "loss": 0.0015, + "step": 303410 + }, + { + "epoch": 1.9460416312577564, + "grad_norm": 0.00972016528248787, + "learning_rate": 2.2151005729226794e-08, + "loss": 0.0009, + "step": 303420 + }, + { + "epoch": 1.9461057681515426, + "grad_norm": 0.3040933907032013, + "learning_rate": 2.2098410584361352e-08, + "loss": 0.0013, + "step": 303430 + }, + { + "epoch": 1.9461699050453287, + "grad_norm": 0.00990355759859085, + "learning_rate": 2.20458778161603e-08, + "loss": 0.001, + "step": 303440 + }, + { + "epoch": 1.9462340419391149, + "grad_norm": 0.06964608281850815, + "learning_rate": 2.1993407425282e-08, + "loss": 0.0013, + "step": 303450 + }, + { + "epoch": 1.946298178832901, + "grad_norm": 0.013352076523005962, + "learning_rate": 2.1940999412384258e-08, + "loss": 0.0007, + "step": 303460 + }, + { + "epoch": 1.9463623157266872, + "grad_norm": 0.09254525601863861, + "learning_rate": 2.1888653778123214e-08, + "loss": 0.001, + "step": 303470 + }, + { + "epoch": 1.946426452620473, + "grad_norm": 0.04405030235648155, + "learning_rate": 2.183637052315557e-08, + "loss": 0.0009, + "step": 303480 + }, + { + "epoch": 1.9464905895142592, + "grad_norm": 0.014495084062218666, + "learning_rate": 2.17841496481358e-08, + "loss": 0.0021, + "step": 303490 + }, + { + "epoch": 1.9465547264080452, + "grad_norm": 0.02775007300078869, + "learning_rate": 2.173199115371838e-08, + "loss": 0.0008, + "step": 303500 + }, + { + "epoch": 1.9466188633018313, + "grad_norm": 0.06201065331697464, + "learning_rate": 2.167989504055723e-08, + "loss": 0.0005, + "step": 303510 + }, + { + "epoch": 1.9466830001956175, + "grad_norm": 0.032466646283864975, + "learning_rate": 2.1627861309305164e-08, + "loss": 0.0016, + "step": 303520 + }, + { + "epoch": 1.9467471370894036, + "grad_norm": 0.10472922772169113, + "learning_rate": 2.1575889960613327e-08, + "loss": 0.0011, + "step": 303530 + }, + { + "epoch": 1.9468112739831898, + "grad_norm": 0.21284520626068115, + "learning_rate": 2.1523980995133976e-08, + "loss": 0.0008, + "step": 303540 + }, + { + "epoch": 1.946875410876976, + "grad_norm": 0.06618592888116837, + "learning_rate": 2.1472134413517698e-08, + "loss": 0.0013, + "step": 303550 + }, + { + "epoch": 1.946939547770762, + "grad_norm": 0.050356101244688034, + "learning_rate": 2.1420350216412866e-08, + "loss": 0.0008, + "step": 303560 + }, + { + "epoch": 1.947003684664548, + "grad_norm": 0.03412504494190216, + "learning_rate": 2.1368628404469517e-08, + "loss": 0.0012, + "step": 303570 + }, + { + "epoch": 1.9470678215583341, + "grad_norm": 0.08310101926326752, + "learning_rate": 2.1316968978335463e-08, + "loss": 0.0012, + "step": 303580 + }, + { + "epoch": 1.94713195845212, + "grad_norm": 0.006315671838819981, + "learning_rate": 2.1265371938657965e-08, + "loss": 0.0009, + "step": 303590 + }, + { + "epoch": 1.9471960953459062, + "grad_norm": 0.14996036887168884, + "learning_rate": 2.121383728608373e-08, + "loss": 0.0008, + "step": 303600 + }, + { + "epoch": 1.9472602322396924, + "grad_norm": 0.03868402913212776, + "learning_rate": 2.1162365021258345e-08, + "loss": 0.0015, + "step": 303610 + }, + { + "epoch": 1.9473243691334785, + "grad_norm": 0.02724253199994564, + "learning_rate": 2.111095514482686e-08, + "loss": 0.0007, + "step": 303620 + }, + { + "epoch": 1.9473885060272647, + "grad_norm": 0.08939662575721741, + "learning_rate": 2.10596076574332e-08, + "loss": 0.0008, + "step": 303630 + }, + { + "epoch": 1.9474526429210508, + "grad_norm": 0.037689194083213806, + "learning_rate": 2.1008322559721296e-08, + "loss": 0.0004, + "step": 303640 + }, + { + "epoch": 1.947516779814837, + "grad_norm": 0.023243412375450134, + "learning_rate": 2.0957099852333407e-08, + "loss": 0.0008, + "step": 303650 + }, + { + "epoch": 1.9475809167086229, + "grad_norm": 0.024731189012527466, + "learning_rate": 2.0905939535911802e-08, + "loss": 0.0008, + "step": 303660 + }, + { + "epoch": 1.947645053602409, + "grad_norm": 0.014677757397294044, + "learning_rate": 2.0854841611097078e-08, + "loss": 0.0007, + "step": 303670 + }, + { + "epoch": 1.947709190496195, + "grad_norm": 0.32674118876457214, + "learning_rate": 2.0803806078529275e-08, + "loss": 0.0023, + "step": 303680 + }, + { + "epoch": 1.947773327389981, + "grad_norm": 0.1138594001531601, + "learning_rate": 2.0752832938849e-08, + "loss": 0.001, + "step": 303690 + }, + { + "epoch": 1.9478374642837673, + "grad_norm": 0.0010594233172014356, + "learning_rate": 2.0701922192694067e-08, + "loss": 0.0012, + "step": 303700 + }, + { + "epoch": 1.9479016011775534, + "grad_norm": 0.010231339372694492, + "learning_rate": 2.0651073840702862e-08, + "loss": 0.001, + "step": 303710 + }, + { + "epoch": 1.9479657380713395, + "grad_norm": 0.10267619788646698, + "learning_rate": 2.0600287883512093e-08, + "loss": 0.0006, + "step": 303720 + }, + { + "epoch": 1.9480298749651257, + "grad_norm": 0.08820979297161102, + "learning_rate": 2.054956432175903e-08, + "loss": 0.0004, + "step": 303730 + }, + { + "epoch": 1.9480940118589116, + "grad_norm": 0.10496065020561218, + "learning_rate": 2.0498903156078165e-08, + "loss": 0.0005, + "step": 303740 + }, + { + "epoch": 1.9481581487526978, + "grad_norm": 0.07082303613424301, + "learning_rate": 2.04483043871051e-08, + "loss": 0.0012, + "step": 303750 + }, + { + "epoch": 1.9482222856464837, + "grad_norm": 0.03306499868631363, + "learning_rate": 2.0397768015473218e-08, + "loss": 0.0009, + "step": 303760 + }, + { + "epoch": 1.9482864225402698, + "grad_norm": 0.0331336110830307, + "learning_rate": 2.0347294041816456e-08, + "loss": 0.0005, + "step": 303770 + }, + { + "epoch": 1.948350559434056, + "grad_norm": 0.10384833067655563, + "learning_rate": 2.0296882466767086e-08, + "loss": 0.0015, + "step": 303780 + }, + { + "epoch": 1.9484146963278421, + "grad_norm": 0.07057362049818039, + "learning_rate": 2.0246533290956826e-08, + "loss": 0.0013, + "step": 303790 + }, + { + "epoch": 1.9484788332216283, + "grad_norm": 0.05630839988589287, + "learning_rate": 2.019624651501628e-08, + "loss": 0.001, + "step": 303800 + }, + { + "epoch": 1.9485429701154144, + "grad_norm": 0.055216625332832336, + "learning_rate": 2.0146022139575506e-08, + "loss": 0.0006, + "step": 303810 + }, + { + "epoch": 1.9486071070092006, + "grad_norm": 0.05376419052481651, + "learning_rate": 2.0095860165264547e-08, + "loss": 0.0017, + "step": 303820 + }, + { + "epoch": 1.9486712439029865, + "grad_norm": 0.02756691165268421, + "learning_rate": 2.0045760592711238e-08, + "loss": 0.0009, + "step": 303830 + }, + { + "epoch": 1.9487353807967727, + "grad_norm": 0.056577298790216446, + "learning_rate": 1.9995723422543968e-08, + "loss": 0.0008, + "step": 303840 + }, + { + "epoch": 1.9487995176905586, + "grad_norm": 0.0758282020688057, + "learning_rate": 1.994574865538945e-08, + "loss": 0.0013, + "step": 303850 + }, + { + "epoch": 1.9488636545843447, + "grad_norm": 0.02690417319536209, + "learning_rate": 1.989583629187386e-08, + "loss": 0.0011, + "step": 303860 + }, + { + "epoch": 1.9489277914781309, + "grad_norm": 0.012038026005029678, + "learning_rate": 1.9845986332622248e-08, + "loss": 0.0015, + "step": 303870 + }, + { + "epoch": 1.948991928371917, + "grad_norm": 0.06118635833263397, + "learning_rate": 1.9796198778260222e-08, + "loss": 0.0006, + "step": 303880 + }, + { + "epoch": 1.9490560652657032, + "grad_norm": 0.10096149891614914, + "learning_rate": 1.9746473629410624e-08, + "loss": 0.0007, + "step": 303890 + }, + { + "epoch": 1.9491202021594893, + "grad_norm": 0.13723134994506836, + "learning_rate": 1.9696810886697392e-08, + "loss": 0.0019, + "step": 303900 + }, + { + "epoch": 1.9491843390532753, + "grad_norm": 0.05517492815852165, + "learning_rate": 1.9647210550742256e-08, + "loss": 0.0006, + "step": 303910 + }, + { + "epoch": 1.9492484759470614, + "grad_norm": 0.03260452672839165, + "learning_rate": 1.9597672622167497e-08, + "loss": 0.0012, + "step": 303920 + }, + { + "epoch": 1.9493126128408476, + "grad_norm": 0.05614691600203514, + "learning_rate": 1.954819710159317e-08, + "loss": 0.0007, + "step": 303930 + }, + { + "epoch": 1.9493767497346335, + "grad_norm": 0.03515557199716568, + "learning_rate": 1.949878398963878e-08, + "loss": 0.0006, + "step": 303940 + }, + { + "epoch": 1.9494408866284196, + "grad_norm": 0.040092095732688904, + "learning_rate": 1.9449433286924946e-08, + "loss": 0.0027, + "step": 303950 + }, + { + "epoch": 1.9495050235222058, + "grad_norm": 0.03886708989739418, + "learning_rate": 1.9400144994068948e-08, + "loss": 0.0009, + "step": 303960 + }, + { + "epoch": 1.949569160415992, + "grad_norm": 0.13820737600326538, + "learning_rate": 1.9350919111688626e-08, + "loss": 0.0005, + "step": 303970 + }, + { + "epoch": 1.949633297309778, + "grad_norm": 0.004038135055452585, + "learning_rate": 1.930175564040071e-08, + "loss": 0.0008, + "step": 303980 + }, + { + "epoch": 1.9496974342035642, + "grad_norm": 0.006674329750239849, + "learning_rate": 1.9252654580821374e-08, + "loss": 0.001, + "step": 303990 + }, + { + "epoch": 1.9497615710973502, + "grad_norm": 0.01439402624964714, + "learning_rate": 1.9203615933566787e-08, + "loss": 0.0009, + "step": 304000 + }, + { + "epoch": 1.9498257079911363, + "grad_norm": 0.004403538070619106, + "learning_rate": 1.91546396992498e-08, + "loss": 0.0009, + "step": 304010 + }, + { + "epoch": 1.9498898448849222, + "grad_norm": 0.05232447758316994, + "learning_rate": 1.9105725878485472e-08, + "loss": 0.0019, + "step": 304020 + }, + { + "epoch": 1.9499539817787084, + "grad_norm": 0.05275091156363487, + "learning_rate": 1.9056874471885535e-08, + "loss": 0.0007, + "step": 304030 + }, + { + "epoch": 1.9500181186724945, + "grad_norm": 0.03211916610598564, + "learning_rate": 1.900808548006339e-08, + "loss": 0.0009, + "step": 304040 + }, + { + "epoch": 1.9500822555662807, + "grad_norm": 0.07366874814033508, + "learning_rate": 1.8959358903629653e-08, + "loss": 0.0013, + "step": 304050 + }, + { + "epoch": 1.9501463924600668, + "grad_norm": 0.0299956277012825, + "learning_rate": 1.8910694743194958e-08, + "loss": 0.0003, + "step": 304060 + }, + { + "epoch": 1.950210529353853, + "grad_norm": 0.034520067274570465, + "learning_rate": 1.8862092999369364e-08, + "loss": 0.0007, + "step": 304070 + }, + { + "epoch": 1.9502746662476391, + "grad_norm": 0.1465241014957428, + "learning_rate": 1.8813553672761274e-08, + "loss": 0.0008, + "step": 304080 + }, + { + "epoch": 1.950338803141425, + "grad_norm": 0.15555109083652496, + "learning_rate": 1.876507676397965e-08, + "loss": 0.001, + "step": 304090 + }, + { + "epoch": 1.9504029400352112, + "grad_norm": 0.05211598053574562, + "learning_rate": 1.8716662273631224e-08, + "loss": 0.0012, + "step": 304100 + }, + { + "epoch": 1.9504670769289971, + "grad_norm": 0.03298133611679077, + "learning_rate": 1.8668310202323847e-08, + "loss": 0.0009, + "step": 304110 + }, + { + "epoch": 1.9505312138227833, + "grad_norm": 0.009433920495212078, + "learning_rate": 1.8620020550662588e-08, + "loss": 0.0022, + "step": 304120 + }, + { + "epoch": 1.9505953507165694, + "grad_norm": 0.1759236752986908, + "learning_rate": 1.857179331925196e-08, + "loss": 0.0014, + "step": 304130 + }, + { + "epoch": 1.9506594876103556, + "grad_norm": 0.09502455592155457, + "learning_rate": 1.85236285086976e-08, + "loss": 0.0007, + "step": 304140 + }, + { + "epoch": 1.9507236245041417, + "grad_norm": 0.10114434361457825, + "learning_rate": 1.8475526119601794e-08, + "loss": 0.0013, + "step": 304150 + }, + { + "epoch": 1.9507877613979279, + "grad_norm": 0.10108424723148346, + "learning_rate": 1.8427486152568507e-08, + "loss": 0.0012, + "step": 304160 + }, + { + "epoch": 1.9508518982917138, + "grad_norm": 0.13766522705554962, + "learning_rate": 1.837950860819837e-08, + "loss": 0.0011, + "step": 304170 + }, + { + "epoch": 1.9509160351855, + "grad_norm": 0.10113713890314102, + "learning_rate": 1.833159348709368e-08, + "loss": 0.0008, + "step": 304180 + }, + { + "epoch": 1.9509801720792859, + "grad_norm": 0.09011459350585938, + "learning_rate": 1.828374078985451e-08, + "loss": 0.0007, + "step": 304190 + }, + { + "epoch": 1.951044308973072, + "grad_norm": 0.028076674789190292, + "learning_rate": 1.823595051708038e-08, + "loss": 0.0009, + "step": 304200 + }, + { + "epoch": 1.9511084458668582, + "grad_norm": 0.11939436197280884, + "learning_rate": 1.818822266937026e-08, + "loss": 0.001, + "step": 304210 + }, + { + "epoch": 1.9511725827606443, + "grad_norm": 0.08247743546962738, + "learning_rate": 1.8140557247322554e-08, + "loss": 0.0005, + "step": 304220 + }, + { + "epoch": 1.9512367196544305, + "grad_norm": 0.043421365320682526, + "learning_rate": 1.8092954251533455e-08, + "loss": 0.0007, + "step": 304230 + }, + { + "epoch": 1.9513008565482166, + "grad_norm": 0.18763089179992676, + "learning_rate": 1.804541368260082e-08, + "loss": 0.0012, + "step": 304240 + }, + { + "epoch": 1.9513649934420028, + "grad_norm": 0.14211469888687134, + "learning_rate": 1.7997935541119172e-08, + "loss": 0.0009, + "step": 304250 + }, + { + "epoch": 1.9514291303357887, + "grad_norm": 0.0641964003443718, + "learning_rate": 1.7950519827684142e-08, + "loss": 0.001, + "step": 304260 + }, + { + "epoch": 1.9514932672295748, + "grad_norm": 0.091981440782547, + "learning_rate": 1.7903166542889705e-08, + "loss": 0.0025, + "step": 304270 + }, + { + "epoch": 1.9515574041233608, + "grad_norm": 0.032483477145433426, + "learning_rate": 1.785587568732927e-08, + "loss": 0.0009, + "step": 304280 + }, + { + "epoch": 1.951621541017147, + "grad_norm": 0.3191652297973633, + "learning_rate": 1.7808647261595148e-08, + "loss": 0.001, + "step": 304290 + }, + { + "epoch": 1.951685677910933, + "grad_norm": 0.04146173223853111, + "learning_rate": 1.7761481266279634e-08, + "loss": 0.0007, + "step": 304300 + }, + { + "epoch": 1.9517498148047192, + "grad_norm": 0.003173446049913764, + "learning_rate": 1.7714377701973374e-08, + "loss": 0.0009, + "step": 304310 + }, + { + "epoch": 1.9518139516985054, + "grad_norm": 0.07733031362295151, + "learning_rate": 1.7667336569267e-08, + "loss": 0.0008, + "step": 304320 + }, + { + "epoch": 1.9518780885922915, + "grad_norm": 0.06869202107191086, + "learning_rate": 1.7620357868749495e-08, + "loss": 0.0009, + "step": 304330 + }, + { + "epoch": 1.9519422254860774, + "grad_norm": 0.12415417283773422, + "learning_rate": 1.7573441601009822e-08, + "loss": 0.0015, + "step": 304340 + }, + { + "epoch": 1.9520063623798636, + "grad_norm": 0.06551288068294525, + "learning_rate": 1.7526587766635848e-08, + "loss": 0.0012, + "step": 304350 + }, + { + "epoch": 1.9520704992736497, + "grad_norm": 0.09301673620939255, + "learning_rate": 1.747979636621433e-08, + "loss": 0.0006, + "step": 304360 + }, + { + "epoch": 1.9521346361674357, + "grad_norm": 0.0703938826918602, + "learning_rate": 1.743306740033257e-08, + "loss": 0.0011, + "step": 304370 + }, + { + "epoch": 1.9521987730612218, + "grad_norm": 0.285512775182724, + "learning_rate": 1.7386400869575104e-08, + "loss": 0.0022, + "step": 304380 + }, + { + "epoch": 1.952262909955008, + "grad_norm": 0.025058260187506676, + "learning_rate": 1.733979677452702e-08, + "loss": 0.0008, + "step": 304390 + }, + { + "epoch": 1.952327046848794, + "grad_norm": 0.04474668204784393, + "learning_rate": 1.7293255115772288e-08, + "loss": 0.0005, + "step": 304400 + }, + { + "epoch": 1.9523911837425802, + "grad_norm": 0.03960119187831879, + "learning_rate": 1.724677589389434e-08, + "loss": 0.0035, + "step": 304410 + }, + { + "epoch": 1.9524553206363664, + "grad_norm": 0.0017269984818995, + "learning_rate": 1.7200359109475485e-08, + "loss": 0.0006, + "step": 304420 + }, + { + "epoch": 1.9525194575301523, + "grad_norm": 0.03660395368933678, + "learning_rate": 1.715400476309692e-08, + "loss": 0.0017, + "step": 304430 + }, + { + "epoch": 1.9525835944239385, + "grad_norm": 0.042580440640449524, + "learning_rate": 1.7107712855339853e-08, + "loss": 0.0009, + "step": 304440 + }, + { + "epoch": 1.9526477313177244, + "grad_norm": 0.10549118369817734, + "learning_rate": 1.7061483386784928e-08, + "loss": 0.0005, + "step": 304450 + }, + { + "epoch": 1.9527118682115105, + "grad_norm": 0.18237760663032532, + "learning_rate": 1.701531635801057e-08, + "loss": 0.0017, + "step": 304460 + }, + { + "epoch": 1.9527760051052967, + "grad_norm": 0.01771414466202259, + "learning_rate": 1.6969211769595206e-08, + "loss": 0.0022, + "step": 304470 + }, + { + "epoch": 1.9528401419990828, + "grad_norm": 0.10704022645950317, + "learning_rate": 1.6923169622117264e-08, + "loss": 0.0008, + "step": 304480 + }, + { + "epoch": 1.952904278892869, + "grad_norm": 0.11638756841421127, + "learning_rate": 1.6877189916153503e-08, + "loss": 0.0016, + "step": 304490 + }, + { + "epoch": 1.9529684157866551, + "grad_norm": 0.037044230848550797, + "learning_rate": 1.6831272652279573e-08, + "loss": 0.0009, + "step": 304500 + }, + { + "epoch": 1.9530325526804413, + "grad_norm": 0.043244726955890656, + "learning_rate": 1.6785417831071682e-08, + "loss": 0.0006, + "step": 304510 + }, + { + "epoch": 1.9530966895742272, + "grad_norm": 0.14400893449783325, + "learning_rate": 1.6739625453103258e-08, + "loss": 0.0015, + "step": 304520 + }, + { + "epoch": 1.9531608264680134, + "grad_norm": 0.02449709363281727, + "learning_rate": 1.66938955189494e-08, + "loss": 0.0006, + "step": 304530 + }, + { + "epoch": 1.9532249633617993, + "grad_norm": 0.05143209546804428, + "learning_rate": 1.6648228029182424e-08, + "loss": 0.0009, + "step": 304540 + }, + { + "epoch": 1.9532891002555854, + "grad_norm": 0.01187801267951727, + "learning_rate": 1.660262298437465e-08, + "loss": 0.0007, + "step": 304550 + }, + { + "epoch": 1.9533532371493716, + "grad_norm": 0.14507299661636353, + "learning_rate": 1.6557080385097845e-08, + "loss": 0.0011, + "step": 304560 + }, + { + "epoch": 1.9534173740431577, + "grad_norm": 0.20421335101127625, + "learning_rate": 1.651160023192211e-08, + "loss": 0.0005, + "step": 304570 + }, + { + "epoch": 1.9534815109369439, + "grad_norm": 0.020057285204529762, + "learning_rate": 1.646618252541754e-08, + "loss": 0.0058, + "step": 304580 + }, + { + "epoch": 1.95354564783073, + "grad_norm": 0.12206927686929703, + "learning_rate": 1.6420827266153683e-08, + "loss": 0.0009, + "step": 304590 + }, + { + "epoch": 1.953609784724516, + "grad_norm": 0.1060272827744484, + "learning_rate": 1.6375534454698417e-08, + "loss": 0.0011, + "step": 304600 + }, + { + "epoch": 1.9536739216183021, + "grad_norm": 0.04592036083340645, + "learning_rate": 1.633030409161962e-08, + "loss": 0.0015, + "step": 304610 + }, + { + "epoch": 1.953738058512088, + "grad_norm": 0.10995520651340485, + "learning_rate": 1.6285136177483505e-08, + "loss": 0.0007, + "step": 304620 + }, + { + "epoch": 1.9538021954058742, + "grad_norm": 0.31133806705474854, + "learning_rate": 1.624003071285685e-08, + "loss": 0.0016, + "step": 304630 + }, + { + "epoch": 1.9538663322996603, + "grad_norm": 0.19678683578968048, + "learning_rate": 1.619498769830474e-08, + "loss": 0.0012, + "step": 304640 + }, + { + "epoch": 1.9539304691934465, + "grad_norm": 0.03897224739193916, + "learning_rate": 1.6150007134390632e-08, + "loss": 0.0019, + "step": 304650 + }, + { + "epoch": 1.9539946060872326, + "grad_norm": 0.05141619220376015, + "learning_rate": 1.6105089021679067e-08, + "loss": 0.0021, + "step": 304660 + }, + { + "epoch": 1.9540587429810188, + "grad_norm": 0.024906743317842484, + "learning_rate": 1.6060233360732924e-08, + "loss": 0.0011, + "step": 304670 + }, + { + "epoch": 1.954122879874805, + "grad_norm": 0.05543055385351181, + "learning_rate": 1.6015440152113425e-08, + "loss": 0.0007, + "step": 304680 + }, + { + "epoch": 1.9541870167685909, + "grad_norm": 0.010509229265153408, + "learning_rate": 1.59707093963829e-08, + "loss": 0.0009, + "step": 304690 + }, + { + "epoch": 1.954251153662377, + "grad_norm": 0.15427754819393158, + "learning_rate": 1.5926041094101452e-08, + "loss": 0.0021, + "step": 304700 + }, + { + "epoch": 1.954315290556163, + "grad_norm": 0.1171812117099762, + "learning_rate": 1.5881435245828636e-08, + "loss": 0.0015, + "step": 304710 + }, + { + "epoch": 1.954379427449949, + "grad_norm": 0.03251752257347107, + "learning_rate": 1.583689185212345e-08, + "loss": 0.0012, + "step": 304720 + }, + { + "epoch": 1.9544435643437352, + "grad_norm": 0.05555243790149689, + "learning_rate": 1.5792410913544332e-08, + "loss": 0.0012, + "step": 304730 + }, + { + "epoch": 1.9545077012375214, + "grad_norm": 0.3689599931240082, + "learning_rate": 1.5747992430648617e-08, + "loss": 0.0018, + "step": 304740 + }, + { + "epoch": 1.9545718381313075, + "grad_norm": 0.044546645134687424, + "learning_rate": 1.570363640399253e-08, + "loss": 0.0012, + "step": 304750 + }, + { + "epoch": 1.9546359750250937, + "grad_norm": 0.025621255859732628, + "learning_rate": 1.565934283413173e-08, + "loss": 0.001, + "step": 304760 + }, + { + "epoch": 1.9547001119188796, + "grad_norm": 0.18231767416000366, + "learning_rate": 1.5615111721621888e-08, + "loss": 0.0012, + "step": 304770 + }, + { + "epoch": 1.9547642488126658, + "grad_norm": 0.04967685416340828, + "learning_rate": 1.5570943067017564e-08, + "loss": 0.0013, + "step": 304780 + }, + { + "epoch": 1.954828385706452, + "grad_norm": 0.026820089668035507, + "learning_rate": 1.5526836870871086e-08, + "loss": 0.0014, + "step": 304790 + }, + { + "epoch": 1.9548925226002378, + "grad_norm": 0.11177382618188858, + "learning_rate": 1.548279313373591e-08, + "loss": 0.0011, + "step": 304800 + }, + { + "epoch": 1.954956659494024, + "grad_norm": 0.03823992982506752, + "learning_rate": 1.543881185616325e-08, + "loss": 0.0006, + "step": 304810 + }, + { + "epoch": 1.9550207963878101, + "grad_norm": 0.2680332362651825, + "learning_rate": 1.5394893038704895e-08, + "loss": 0.0015, + "step": 304820 + }, + { + "epoch": 1.9550849332815963, + "grad_norm": 0.0011533283395692706, + "learning_rate": 1.535103668191096e-08, + "loss": 0.0009, + "step": 304830 + }, + { + "epoch": 1.9551490701753824, + "grad_norm": 0.07149229943752289, + "learning_rate": 1.5307242786331556e-08, + "loss": 0.0006, + "step": 304840 + }, + { + "epoch": 1.9552132070691686, + "grad_norm": 0.07902127504348755, + "learning_rate": 1.5263511352514026e-08, + "loss": 0.0011, + "step": 304850 + }, + { + "epoch": 1.9552773439629545, + "grad_norm": 0.065912626683712, + "learning_rate": 1.5219842381007932e-08, + "loss": 0.001, + "step": 304860 + }, + { + "epoch": 1.9553414808567406, + "grad_norm": 0.13002678751945496, + "learning_rate": 1.5176235872359502e-08, + "loss": 0.0019, + "step": 304870 + }, + { + "epoch": 1.9554056177505266, + "grad_norm": 0.061404649168252945, + "learning_rate": 1.513269182711552e-08, + "loss": 0.0014, + "step": 304880 + }, + { + "epoch": 1.9554697546443127, + "grad_norm": 0.027881687507033348, + "learning_rate": 1.5089210245821105e-08, + "loss": 0.0034, + "step": 304890 + }, + { + "epoch": 1.9555338915380989, + "grad_norm": 0.02497880719602108, + "learning_rate": 1.5045791129021935e-08, + "loss": 0.0008, + "step": 304900 + }, + { + "epoch": 1.955598028431885, + "grad_norm": 0.009090565145015717, + "learning_rate": 1.5002434477262018e-08, + "loss": 0.001, + "step": 304910 + }, + { + "epoch": 1.9556621653256712, + "grad_norm": 0.07809104025363922, + "learning_rate": 1.4959140291083696e-08, + "loss": 0.0012, + "step": 304920 + }, + { + "epoch": 1.9557263022194573, + "grad_norm": 0.11369671672582626, + "learning_rate": 1.4915908571030424e-08, + "loss": 0.0009, + "step": 304930 + }, + { + "epoch": 1.9557904391132435, + "grad_norm": 0.12540864944458008, + "learning_rate": 1.4872739317643992e-08, + "loss": 0.0014, + "step": 304940 + }, + { + "epoch": 1.9558545760070294, + "grad_norm": 0.0958738848567009, + "learning_rate": 1.4829632531464523e-08, + "loss": 0.0004, + "step": 304950 + }, + { + "epoch": 1.9559187129008155, + "grad_norm": 0.07722891122102737, + "learning_rate": 1.4786588213032694e-08, + "loss": 0.0009, + "step": 304960 + }, + { + "epoch": 1.9559828497946015, + "grad_norm": 0.06726028025150299, + "learning_rate": 1.474360636288752e-08, + "loss": 0.0026, + "step": 304970 + }, + { + "epoch": 1.9560469866883876, + "grad_norm": 0.03700147196650505, + "learning_rate": 1.4700686981568014e-08, + "loss": 0.0008, + "step": 304980 + }, + { + "epoch": 1.9561111235821738, + "grad_norm": 0.14430172741413116, + "learning_rate": 1.4657830069612078e-08, + "loss": 0.0015, + "step": 304990 + }, + { + "epoch": 1.95617526047596, + "grad_norm": 0.09119134396314621, + "learning_rate": 1.4615035627556507e-08, + "loss": 0.0011, + "step": 305000 + }, + { + "epoch": 1.95617526047596, + "eval_loss": 0.0019545629620552063, + "eval_runtime": 3.3198, + "eval_samples_per_second": 60.244, + "eval_steps_per_second": 15.061, + "step": 305000 + }, + { + "epoch": 1.956239397369746, + "grad_norm": 0.010408638045191765, + "learning_rate": 1.4572303655936981e-08, + "loss": 0.0013, + "step": 305010 + }, + { + "epoch": 1.9563035342635322, + "grad_norm": 0.05148407071828842, + "learning_rate": 1.452963415528974e-08, + "loss": 0.0008, + "step": 305020 + }, + { + "epoch": 1.9563676711573181, + "grad_norm": 0.025471650063991547, + "learning_rate": 1.4487027126149356e-08, + "loss": 0.001, + "step": 305030 + }, + { + "epoch": 1.9564318080511043, + "grad_norm": 0.14106789231300354, + "learning_rate": 1.4444482569049845e-08, + "loss": 0.0006, + "step": 305040 + }, + { + "epoch": 1.9564959449448902, + "grad_norm": 0.08223976194858551, + "learning_rate": 1.4402000484524115e-08, + "loss": 0.0017, + "step": 305050 + }, + { + "epoch": 1.9565600818386764, + "grad_norm": 0.06003883108496666, + "learning_rate": 1.4359580873103962e-08, + "loss": 0.0009, + "step": 305060 + }, + { + "epoch": 1.9566242187324625, + "grad_norm": 0.02777407504618168, + "learning_rate": 1.4317223735321739e-08, + "loss": 0.0011, + "step": 305070 + }, + { + "epoch": 1.9566883556262487, + "grad_norm": 0.09857097268104553, + "learning_rate": 1.4274929071708133e-08, + "loss": 0.0009, + "step": 305080 + }, + { + "epoch": 1.9567524925200348, + "grad_norm": 0.11872630566358566, + "learning_rate": 1.4232696882792718e-08, + "loss": 0.0009, + "step": 305090 + }, + { + "epoch": 1.956816629413821, + "grad_norm": 0.049037232995033264, + "learning_rate": 1.4190527169105073e-08, + "loss": 0.002, + "step": 305100 + }, + { + "epoch": 1.956880766307607, + "grad_norm": 0.05172473192214966, + "learning_rate": 1.4148419931173107e-08, + "loss": 0.0015, + "step": 305110 + }, + { + "epoch": 1.956944903201393, + "grad_norm": 0.11926617473363876, + "learning_rate": 1.4106375169524733e-08, + "loss": 0.0013, + "step": 305120 + }, + { + "epoch": 1.9570090400951792, + "grad_norm": 0.0758260115981102, + "learning_rate": 1.406439288468675e-08, + "loss": 0.0009, + "step": 305130 + }, + { + "epoch": 1.957073176988965, + "grad_norm": 0.023133208975195885, + "learning_rate": 1.402247307718596e-08, + "loss": 0.0003, + "step": 305140 + }, + { + "epoch": 1.9571373138827513, + "grad_norm": 0.1966695487499237, + "learning_rate": 1.3980615747546389e-08, + "loss": 0.0015, + "step": 305150 + }, + { + "epoch": 1.9572014507765374, + "grad_norm": 0.05596424639225006, + "learning_rate": 1.3938820896293171e-08, + "loss": 0.0006, + "step": 305160 + }, + { + "epoch": 1.9572655876703235, + "grad_norm": 0.029670780524611473, + "learning_rate": 1.389708852395033e-08, + "loss": 0.001, + "step": 305170 + }, + { + "epoch": 1.9573297245641097, + "grad_norm": 0.034810930490493774, + "learning_rate": 1.3855418631040785e-08, + "loss": 0.0008, + "step": 305180 + }, + { + "epoch": 1.9573938614578958, + "grad_norm": 0.16377195715904236, + "learning_rate": 1.3813811218085781e-08, + "loss": 0.0017, + "step": 305190 + }, + { + "epoch": 1.957457998351682, + "grad_norm": 0.11208045482635498, + "learning_rate": 1.3772266285607128e-08, + "loss": 0.0011, + "step": 305200 + }, + { + "epoch": 1.957522135245468, + "grad_norm": 0.031090980395674706, + "learning_rate": 1.3730783834126072e-08, + "loss": 0.0006, + "step": 305210 + }, + { + "epoch": 1.957586272139254, + "grad_norm": 0.04496197775006294, + "learning_rate": 1.3689363864162198e-08, + "loss": 0.0011, + "step": 305220 + }, + { + "epoch": 1.95765040903304, + "grad_norm": 0.06327894330024719, + "learning_rate": 1.3648006376233425e-08, + "loss": 0.0006, + "step": 305230 + }, + { + "epoch": 1.9577145459268261, + "grad_norm": 0.023319434374570847, + "learning_rate": 1.3606711370859338e-08, + "loss": 0.0008, + "step": 305240 + }, + { + "epoch": 1.9577786828206123, + "grad_norm": 0.14921852946281433, + "learning_rate": 1.3565478848556746e-08, + "loss": 0.0012, + "step": 305250 + }, + { + "epoch": 1.9578428197143984, + "grad_norm": 0.08323876559734344, + "learning_rate": 1.3524308809843013e-08, + "loss": 0.0011, + "step": 305260 + }, + { + "epoch": 1.9579069566081846, + "grad_norm": 0.08927945047616959, + "learning_rate": 1.3483201255232726e-08, + "loss": 0.003, + "step": 305270 + }, + { + "epoch": 1.9579710935019707, + "grad_norm": 0.043267734348773956, + "learning_rate": 1.3442156185242139e-08, + "loss": 0.0006, + "step": 305280 + }, + { + "epoch": 1.9580352303957567, + "grad_norm": 0.019233791157603264, + "learning_rate": 1.3401173600384731e-08, + "loss": 0.0008, + "step": 305290 + }, + { + "epoch": 1.9580993672895428, + "grad_norm": 0.10895214974880219, + "learning_rate": 1.3360253501175091e-08, + "loss": 0.0013, + "step": 305300 + }, + { + "epoch": 1.9581635041833287, + "grad_norm": 0.2412586361169815, + "learning_rate": 1.3319395888124476e-08, + "loss": 0.0009, + "step": 305310 + }, + { + "epoch": 1.9582276410771149, + "grad_norm": 0.20381537079811096, + "learning_rate": 1.3278600761746364e-08, + "loss": 0.0007, + "step": 305320 + }, + { + "epoch": 1.958291777970901, + "grad_norm": 0.037070974707603455, + "learning_rate": 1.3237868122551457e-08, + "loss": 0.0008, + "step": 305330 + }, + { + "epoch": 1.9583559148646872, + "grad_norm": 0.15732403099536896, + "learning_rate": 1.3197197971049902e-08, + "loss": 0.0012, + "step": 305340 + }, + { + "epoch": 1.9584200517584733, + "grad_norm": 0.022160615772008896, + "learning_rate": 1.3156590307751293e-08, + "loss": 0.0011, + "step": 305350 + }, + { + "epoch": 1.9584841886522595, + "grad_norm": 0.013370877131819725, + "learning_rate": 1.3116045133165222e-08, + "loss": 0.0015, + "step": 305360 + }, + { + "epoch": 1.9585483255460456, + "grad_norm": 0.00940181128680706, + "learning_rate": 1.3075562447798506e-08, + "loss": 0.0006, + "step": 305370 + }, + { + "epoch": 1.9586124624398316, + "grad_norm": 0.07063985615968704, + "learning_rate": 1.3035142252159627e-08, + "loss": 0.001, + "step": 305380 + }, + { + "epoch": 1.9586765993336177, + "grad_norm": 0.17283165454864502, + "learning_rate": 1.299478454675429e-08, + "loss": 0.0012, + "step": 305390 + }, + { + "epoch": 1.9587407362274036, + "grad_norm": 0.05347483977675438, + "learning_rate": 1.2954489332088206e-08, + "loss": 0.0014, + "step": 305400 + }, + { + "epoch": 1.9588048731211898, + "grad_norm": 0.10970115661621094, + "learning_rate": 1.2914256608667076e-08, + "loss": 0.0032, + "step": 305410 + }, + { + "epoch": 1.958869010014976, + "grad_norm": 0.02583213895559311, + "learning_rate": 1.2874086376993833e-08, + "loss": 0.0008, + "step": 305420 + }, + { + "epoch": 1.958933146908762, + "grad_norm": 0.17533306777477264, + "learning_rate": 1.2833978637573075e-08, + "loss": 0.0015, + "step": 305430 + }, + { + "epoch": 1.9589972838025482, + "grad_norm": 0.06510546803474426, + "learning_rate": 1.2793933390907176e-08, + "loss": 0.0012, + "step": 305440 + }, + { + "epoch": 1.9590614206963344, + "grad_norm": 0.045575156807899475, + "learning_rate": 1.2753950637496849e-08, + "loss": 0.0028, + "step": 305450 + }, + { + "epoch": 1.9591255575901203, + "grad_norm": 0.02801566943526268, + "learning_rate": 1.2714030377843911e-08, + "loss": 0.0009, + "step": 305460 + }, + { + "epoch": 1.9591896944839065, + "grad_norm": 0.055962517857551575, + "learning_rate": 1.2674172612449077e-08, + "loss": 0.0009, + "step": 305470 + }, + { + "epoch": 1.9592538313776926, + "grad_norm": 0.12682779133319855, + "learning_rate": 1.2634377341810832e-08, + "loss": 0.0012, + "step": 305480 + }, + { + "epoch": 1.9593179682714785, + "grad_norm": 0.0686289519071579, + "learning_rate": 1.2594644566428227e-08, + "loss": 0.002, + "step": 305490 + }, + { + "epoch": 1.9593821051652647, + "grad_norm": 0.026192547753453255, + "learning_rate": 1.2554974286799748e-08, + "loss": 0.0016, + "step": 305500 + }, + { + "epoch": 1.9594462420590508, + "grad_norm": 0.002885625697672367, + "learning_rate": 1.251536650342111e-08, + "loss": 0.0009, + "step": 305510 + }, + { + "epoch": 1.959510378952837, + "grad_norm": 0.18336519598960876, + "learning_rate": 1.2475821216789696e-08, + "loss": 0.0009, + "step": 305520 + }, + { + "epoch": 1.9595745158466231, + "grad_norm": 0.11568191647529602, + "learning_rate": 1.2436338427401218e-08, + "loss": 0.0011, + "step": 305530 + }, + { + "epoch": 1.9596386527404093, + "grad_norm": 0.01961652934551239, + "learning_rate": 1.2396918135749724e-08, + "loss": 0.0009, + "step": 305540 + }, + { + "epoch": 1.9597027896341952, + "grad_norm": 0.04887218400835991, + "learning_rate": 1.2357560342329267e-08, + "loss": 0.0008, + "step": 305550 + }, + { + "epoch": 1.9597669265279813, + "grad_norm": 0.07078026235103607, + "learning_rate": 1.2318265047633338e-08, + "loss": 0.0011, + "step": 305560 + }, + { + "epoch": 1.9598310634217673, + "grad_norm": 0.07141014188528061, + "learning_rate": 1.2279032252153766e-08, + "loss": 0.0007, + "step": 305570 + }, + { + "epoch": 1.9598952003155534, + "grad_norm": 0.0644538402557373, + "learning_rate": 1.2239861956382936e-08, + "loss": 0.0009, + "step": 305580 + }, + { + "epoch": 1.9599593372093396, + "grad_norm": 0.0744011253118515, + "learning_rate": 1.2200754160811568e-08, + "loss": 0.0009, + "step": 305590 + }, + { + "epoch": 1.9600234741031257, + "grad_norm": 0.14834116399288177, + "learning_rate": 1.2161708865929268e-08, + "loss": 0.0016, + "step": 305600 + }, + { + "epoch": 1.9600876109969119, + "grad_norm": 0.06007117033004761, + "learning_rate": 1.2122726072225089e-08, + "loss": 0.002, + "step": 305610 + }, + { + "epoch": 1.960151747890698, + "grad_norm": 0.07412609457969666, + "learning_rate": 1.208380578018864e-08, + "loss": 0.0005, + "step": 305620 + }, + { + "epoch": 1.9602158847844842, + "grad_norm": 0.04260360077023506, + "learning_rate": 1.2044947990306199e-08, + "loss": 0.0006, + "step": 305630 + }, + { + "epoch": 1.96028002167827, + "grad_norm": 0.11977134644985199, + "learning_rate": 1.2006152703066265e-08, + "loss": 0.0011, + "step": 305640 + }, + { + "epoch": 1.9603441585720562, + "grad_norm": 0.1177653968334198, + "learning_rate": 1.196741991895345e-08, + "loss": 0.0022, + "step": 305650 + }, + { + "epoch": 1.9604082954658422, + "grad_norm": 0.016976799815893173, + "learning_rate": 1.1928749638454029e-08, + "loss": 0.0016, + "step": 305660 + }, + { + "epoch": 1.9604724323596283, + "grad_norm": 0.009372200816869736, + "learning_rate": 1.1890141862052062e-08, + "loss": 0.0011, + "step": 305670 + }, + { + "epoch": 1.9605365692534145, + "grad_norm": 0.07452521473169327, + "learning_rate": 1.1851596590231607e-08, + "loss": 0.0008, + "step": 305680 + }, + { + "epoch": 1.9606007061472006, + "grad_norm": 0.03782688453793526, + "learning_rate": 1.1813113823475609e-08, + "loss": 0.0006, + "step": 305690 + }, + { + "epoch": 1.9606648430409868, + "grad_norm": 0.10139846056699753, + "learning_rate": 1.177469356226646e-08, + "loss": 0.0011, + "step": 305700 + }, + { + "epoch": 1.960728979934773, + "grad_norm": 0.15534450113773346, + "learning_rate": 1.1736335807085442e-08, + "loss": 0.002, + "step": 305710 + }, + { + "epoch": 1.9607931168285588, + "grad_norm": 0.027061453089118004, + "learning_rate": 1.1698040558413281e-08, + "loss": 0.0008, + "step": 305720 + }, + { + "epoch": 1.960857253722345, + "grad_norm": 0.0902622640132904, + "learning_rate": 1.1659807816729595e-08, + "loss": 0.0024, + "step": 305730 + }, + { + "epoch": 1.960921390616131, + "grad_norm": 0.21433427929878235, + "learning_rate": 1.1621637582514e-08, + "loss": 0.001, + "step": 305740 + }, + { + "epoch": 1.960985527509917, + "grad_norm": 0.045051656663417816, + "learning_rate": 1.1583529856243891e-08, + "loss": 0.0007, + "step": 305750 + }, + { + "epoch": 1.9610496644037032, + "grad_norm": 0.08645248413085938, + "learning_rate": 1.154548463839722e-08, + "loss": 0.001, + "step": 305760 + }, + { + "epoch": 1.9611138012974894, + "grad_norm": 0.02531195990741253, + "learning_rate": 1.1507501929451381e-08, + "loss": 0.0012, + "step": 305770 + }, + { + "epoch": 1.9611779381912755, + "grad_norm": 0.20479781925678253, + "learning_rate": 1.146958172988155e-08, + "loss": 0.0014, + "step": 305780 + }, + { + "epoch": 1.9612420750850617, + "grad_norm": 0.10863394290208817, + "learning_rate": 1.1431724040162907e-08, + "loss": 0.0016, + "step": 305790 + }, + { + "epoch": 1.9613062119788478, + "grad_norm": 0.44899410009384155, + "learning_rate": 1.1393928860770064e-08, + "loss": 0.0013, + "step": 305800 + }, + { + "epoch": 1.9613703488726337, + "grad_norm": 0.029866235330700874, + "learning_rate": 1.1356196192177094e-08, + "loss": 0.0011, + "step": 305810 + }, + { + "epoch": 1.9614344857664199, + "grad_norm": 0.0236195120960474, + "learning_rate": 1.1318526034855837e-08, + "loss": 0.0007, + "step": 305820 + }, + { + "epoch": 1.9614986226602058, + "grad_norm": 0.058961015194654465, + "learning_rate": 1.1280918389278694e-08, + "loss": 0.0012, + "step": 305830 + }, + { + "epoch": 1.961562759553992, + "grad_norm": 0.06082117184996605, + "learning_rate": 1.1243373255917512e-08, + "loss": 0.0008, + "step": 305840 + }, + { + "epoch": 1.961626896447778, + "grad_norm": 0.00631625484675169, + "learning_rate": 1.1205890635241912e-08, + "loss": 0.0015, + "step": 305850 + }, + { + "epoch": 1.9616910333415642, + "grad_norm": 0.05238247662782669, + "learning_rate": 1.1168470527722075e-08, + "loss": 0.0012, + "step": 305860 + }, + { + "epoch": 1.9617551702353504, + "grad_norm": 0.0238849688321352, + "learning_rate": 1.1131112933826516e-08, + "loss": 0.0015, + "step": 305870 + }, + { + "epoch": 1.9618193071291365, + "grad_norm": 0.13209733366966248, + "learning_rate": 1.1093817854023748e-08, + "loss": 0.0011, + "step": 305880 + }, + { + "epoch": 1.9618834440229225, + "grad_norm": 0.08128155767917633, + "learning_rate": 1.1056585288781174e-08, + "loss": 0.0018, + "step": 305890 + }, + { + "epoch": 1.9619475809167086, + "grad_norm": 0.10184819251298904, + "learning_rate": 1.101941523856509e-08, + "loss": 0.0008, + "step": 305900 + }, + { + "epoch": 1.9620117178104948, + "grad_norm": 0.012238494120538235, + "learning_rate": 1.0982307703841232e-08, + "loss": 0.0008, + "step": 305910 + }, + { + "epoch": 1.9620758547042807, + "grad_norm": 0.1712855100631714, + "learning_rate": 1.0945262685074787e-08, + "loss": 0.001, + "step": 305920 + }, + { + "epoch": 1.9621399915980668, + "grad_norm": 0.03880096226930618, + "learning_rate": 1.090828018272927e-08, + "loss": 0.0005, + "step": 305930 + }, + { + "epoch": 1.962204128491853, + "grad_norm": 0.18040379881858826, + "learning_rate": 1.0871360197269309e-08, + "loss": 0.0007, + "step": 305940 + }, + { + "epoch": 1.9622682653856391, + "grad_norm": 0.09596449881792068, + "learning_rate": 1.0834502729156205e-08, + "loss": 0.0009, + "step": 305950 + }, + { + "epoch": 1.9623324022794253, + "grad_norm": 0.10705173760652542, + "learning_rate": 1.0797707778852917e-08, + "loss": 0.0007, + "step": 305960 + }, + { + "epoch": 1.9623965391732114, + "grad_norm": 0.05151883885264397, + "learning_rate": 1.0760975346820191e-08, + "loss": 0.0015, + "step": 305970 + }, + { + "epoch": 1.9624606760669974, + "grad_norm": 0.0651080310344696, + "learning_rate": 1.0724305433518212e-08, + "loss": 0.0015, + "step": 305980 + }, + { + "epoch": 1.9625248129607835, + "grad_norm": 0.041869163513183594, + "learning_rate": 1.0687698039406058e-08, + "loss": 0.0012, + "step": 305990 + }, + { + "epoch": 1.9625889498545694, + "grad_norm": 0.014452272094786167, + "learning_rate": 1.0651153164943362e-08, + "loss": 0.0009, + "step": 306000 + }, + { + "epoch": 1.9626530867483556, + "grad_norm": 0.07938862591981888, + "learning_rate": 1.0614670810587535e-08, + "loss": 0.0008, + "step": 306010 + }, + { + "epoch": 1.9627172236421417, + "grad_norm": 0.005171359051018953, + "learning_rate": 1.0578250976795434e-08, + "loss": 0.001, + "step": 306020 + }, + { + "epoch": 1.9627813605359279, + "grad_norm": 0.06828926503658295, + "learning_rate": 1.0541893664023361e-08, + "loss": 0.0008, + "step": 306030 + }, + { + "epoch": 1.962845497429714, + "grad_norm": 0.06769850850105286, + "learning_rate": 1.0505598872728174e-08, + "loss": 0.001, + "step": 306040 + }, + { + "epoch": 1.9629096343235002, + "grad_norm": 0.03014775551855564, + "learning_rate": 1.0469366603363396e-08, + "loss": 0.0009, + "step": 306050 + }, + { + "epoch": 1.9629737712172863, + "grad_norm": 0.16810640692710876, + "learning_rate": 1.043319685638311e-08, + "loss": 0.0008, + "step": 306060 + }, + { + "epoch": 1.9630379081110723, + "grad_norm": 0.1911521703004837, + "learning_rate": 1.0397089632240843e-08, + "loss": 0.0008, + "step": 306070 + }, + { + "epoch": 1.9631020450048584, + "grad_norm": 0.16602012515068054, + "learning_rate": 1.0361044931389564e-08, + "loss": 0.0012, + "step": 306080 + }, + { + "epoch": 1.9631661818986443, + "grad_norm": 0.059331078082323074, + "learning_rate": 1.0325062754280024e-08, + "loss": 0.0014, + "step": 306090 + }, + { + "epoch": 1.9632303187924305, + "grad_norm": 0.40836986899375916, + "learning_rate": 1.028914310136353e-08, + "loss": 0.0061, + "step": 306100 + }, + { + "epoch": 1.9632944556862166, + "grad_norm": 0.08073293417692184, + "learning_rate": 1.0253285973090277e-08, + "loss": 0.0005, + "step": 306110 + }, + { + "epoch": 1.9633585925800028, + "grad_norm": 0.02992640808224678, + "learning_rate": 1.0217491369909905e-08, + "loss": 0.0008, + "step": 306120 + }, + { + "epoch": 1.963422729473789, + "grad_norm": 0.004306135233491659, + "learning_rate": 1.0181759292269833e-08, + "loss": 0.0011, + "step": 306130 + }, + { + "epoch": 1.963486866367575, + "grad_norm": 0.06630755960941315, + "learning_rate": 1.0146089740618592e-08, + "loss": 0.0003, + "step": 306140 + }, + { + "epoch": 1.963551003261361, + "grad_norm": 0.15983599424362183, + "learning_rate": 1.0110482715403603e-08, + "loss": 0.0014, + "step": 306150 + }, + { + "epoch": 1.9636151401551472, + "grad_norm": 0.1168171614408493, + "learning_rate": 1.0074938217070062e-08, + "loss": 0.0008, + "step": 306160 + }, + { + "epoch": 1.963679277048933, + "grad_norm": 0.10949229449033737, + "learning_rate": 1.0039456246063727e-08, + "loss": 0.0012, + "step": 306170 + }, + { + "epoch": 1.9637434139427192, + "grad_norm": 0.0038095058407634497, + "learning_rate": 1.0004036802829242e-08, + "loss": 0.0005, + "step": 306180 + }, + { + "epoch": 1.9638075508365054, + "grad_norm": 0.059697799384593964, + "learning_rate": 9.968679887810695e-09, + "loss": 0.001, + "step": 306190 + }, + { + "epoch": 1.9638716877302915, + "grad_norm": 0.09330522269010544, + "learning_rate": 9.933385501451064e-09, + "loss": 0.0003, + "step": 306200 + }, + { + "epoch": 1.9639358246240777, + "grad_norm": 0.06144918128848076, + "learning_rate": 9.898153644191666e-09, + "loss": 0.0009, + "step": 306210 + }, + { + "epoch": 1.9639999615178638, + "grad_norm": 0.03653734177350998, + "learning_rate": 9.862984316475476e-09, + "loss": 0.0012, + "step": 306220 + }, + { + "epoch": 1.96406409841165, + "grad_norm": 0.25971123576164246, + "learning_rate": 9.827877518742145e-09, + "loss": 0.001, + "step": 306230 + }, + { + "epoch": 1.964128235305436, + "grad_norm": 0.040889885276556015, + "learning_rate": 9.792833251432431e-09, + "loss": 0.0022, + "step": 306240 + }, + { + "epoch": 1.964192372199222, + "grad_norm": 0.1783040314912796, + "learning_rate": 9.757851514984318e-09, + "loss": 0.0014, + "step": 306250 + }, + { + "epoch": 1.964256509093008, + "grad_norm": 0.039647918194532394, + "learning_rate": 9.722932309837451e-09, + "loss": 0.0014, + "step": 306260 + }, + { + "epoch": 1.9643206459867941, + "grad_norm": 0.257514089345932, + "learning_rate": 9.688075636428152e-09, + "loss": 0.0012, + "step": 306270 + }, + { + "epoch": 1.9643847828805803, + "grad_norm": 0.17515437304973602, + "learning_rate": 9.653281495194399e-09, + "loss": 0.0021, + "step": 306280 + }, + { + "epoch": 1.9644489197743664, + "grad_norm": 0.06372150033712387, + "learning_rate": 9.618549886570849e-09, + "loss": 0.0015, + "step": 306290 + }, + { + "epoch": 1.9645130566681526, + "grad_norm": 0.011198348365724087, + "learning_rate": 9.583880810993818e-09, + "loss": 0.0007, + "step": 306300 + }, + { + "epoch": 1.9645771935619387, + "grad_norm": 0.08975838124752045, + "learning_rate": 9.549274268897402e-09, + "loss": 0.0008, + "step": 306310 + }, + { + "epoch": 1.9646413304557246, + "grad_norm": 0.04905629903078079, + "learning_rate": 9.514730260715144e-09, + "loss": 0.0004, + "step": 306320 + }, + { + "epoch": 1.9647054673495108, + "grad_norm": 0.04664885625243187, + "learning_rate": 9.480248786880031e-09, + "loss": 0.0013, + "step": 306330 + }, + { + "epoch": 1.964769604243297, + "grad_norm": 0.02026456594467163, + "learning_rate": 9.44582984782394e-09, + "loss": 0.0008, + "step": 306340 + }, + { + "epoch": 1.9648337411370829, + "grad_norm": 0.043228600174188614, + "learning_rate": 9.411473443978747e-09, + "loss": 0.0008, + "step": 306350 + }, + { + "epoch": 1.964897878030869, + "grad_norm": 0.057951316237449646, + "learning_rate": 9.377179575773554e-09, + "loss": 0.0005, + "step": 306360 + }, + { + "epoch": 1.9649620149246552, + "grad_norm": 0.0890708640217781, + "learning_rate": 9.342948243639683e-09, + "loss": 0.0007, + "step": 306370 + }, + { + "epoch": 1.9650261518184413, + "grad_norm": 0.004393084440380335, + "learning_rate": 9.308779448005678e-09, + "loss": 0.0012, + "step": 306380 + }, + { + "epoch": 1.9650902887122275, + "grad_norm": 0.018562108278274536, + "learning_rate": 9.274673189298977e-09, + "loss": 0.0008, + "step": 306390 + }, + { + "epoch": 1.9651544256060136, + "grad_norm": 0.004979966674000025, + "learning_rate": 9.240629467947571e-09, + "loss": 0.0005, + "step": 306400 + }, + { + "epoch": 1.9652185624997995, + "grad_norm": 0.05927908048033714, + "learning_rate": 9.206648284377784e-09, + "loss": 0.0015, + "step": 306410 + }, + { + "epoch": 1.9652826993935857, + "grad_norm": 0.009664489887654781, + "learning_rate": 9.172729639015943e-09, + "loss": 0.0015, + "step": 306420 + }, + { + "epoch": 1.9653468362873716, + "grad_norm": 0.054331354796886444, + "learning_rate": 9.138873532286707e-09, + "loss": 0.002, + "step": 306430 + }, + { + "epoch": 1.9654109731811578, + "grad_norm": 0.08084974437952042, + "learning_rate": 9.105079964613627e-09, + "loss": 0.0011, + "step": 306440 + }, + { + "epoch": 1.965475110074944, + "grad_norm": 0.13514891266822815, + "learning_rate": 9.071348936421364e-09, + "loss": 0.0012, + "step": 306450 + }, + { + "epoch": 1.96553924696873, + "grad_norm": 0.08561267703771591, + "learning_rate": 9.037680448132358e-09, + "loss": 0.0018, + "step": 306460 + }, + { + "epoch": 1.9656033838625162, + "grad_norm": 0.017613446339964867, + "learning_rate": 9.004074500167937e-09, + "loss": 0.0015, + "step": 306470 + }, + { + "epoch": 1.9656675207563024, + "grad_norm": 0.04687608405947685, + "learning_rate": 8.970531092948875e-09, + "loss": 0.0013, + "step": 306480 + }, + { + "epoch": 1.9657316576500885, + "grad_norm": 0.18052297830581665, + "learning_rate": 8.93705022689706e-09, + "loss": 0.0009, + "step": 306490 + }, + { + "epoch": 1.9657957945438744, + "grad_norm": 0.0018816557712852955, + "learning_rate": 8.903631902430487e-09, + "loss": 0.0008, + "step": 306500 + }, + { + "epoch": 1.9658599314376606, + "grad_norm": 0.0032184049487113953, + "learning_rate": 8.870276119968823e-09, + "loss": 0.0014, + "step": 306510 + }, + { + "epoch": 1.9659240683314465, + "grad_norm": 0.040409523993730545, + "learning_rate": 8.836982879928957e-09, + "loss": 0.0006, + "step": 306520 + }, + { + "epoch": 1.9659882052252327, + "grad_norm": 0.0843072310090065, + "learning_rate": 8.803752182729441e-09, + "loss": 0.0007, + "step": 306530 + }, + { + "epoch": 1.9660523421190188, + "grad_norm": 0.07782573997974396, + "learning_rate": 8.770584028786055e-09, + "loss": 0.0006, + "step": 306540 + }, + { + "epoch": 1.966116479012805, + "grad_norm": 0.28871554136276245, + "learning_rate": 8.737478418514578e-09, + "loss": 0.002, + "step": 306550 + }, + { + "epoch": 1.966180615906591, + "grad_norm": 0.3268186151981354, + "learning_rate": 8.704435352329121e-09, + "loss": 0.0016, + "step": 306560 + }, + { + "epoch": 1.9662447528003772, + "grad_norm": 0.03365792706608772, + "learning_rate": 8.671454830644909e-09, + "loss": 0.0008, + "step": 306570 + }, + { + "epoch": 1.9663088896941632, + "grad_norm": 0.13226476311683655, + "learning_rate": 8.63853685387439e-09, + "loss": 0.0012, + "step": 306580 + }, + { + "epoch": 1.9663730265879493, + "grad_norm": 0.07006511837244034, + "learning_rate": 8.605681422430012e-09, + "loss": 0.001, + "step": 306590 + }, + { + "epoch": 1.9664371634817353, + "grad_norm": 0.2049380987882614, + "learning_rate": 8.572888536724223e-09, + "loss": 0.0014, + "step": 306600 + }, + { + "epoch": 1.9665013003755214, + "grad_norm": 0.1545920968055725, + "learning_rate": 8.540158197167247e-09, + "loss": 0.001, + "step": 306610 + }, + { + "epoch": 1.9665654372693075, + "grad_norm": 0.06900747120380402, + "learning_rate": 8.507490404169317e-09, + "loss": 0.0006, + "step": 306620 + }, + { + "epoch": 1.9666295741630937, + "grad_norm": 0.06036344915628433, + "learning_rate": 8.474885158140101e-09, + "loss": 0.0008, + "step": 306630 + }, + { + "epoch": 1.9666937110568798, + "grad_norm": 0.0792551040649414, + "learning_rate": 8.442342459487607e-09, + "loss": 0.001, + "step": 306640 + }, + { + "epoch": 1.966757847950666, + "grad_norm": 0.017666907981038094, + "learning_rate": 8.409862308620398e-09, + "loss": 0.0008, + "step": 306650 + }, + { + "epoch": 1.9668219848444521, + "grad_norm": 0.08386830240488052, + "learning_rate": 8.377444705944815e-09, + "loss": 0.0023, + "step": 306660 + }, + { + "epoch": 1.966886121738238, + "grad_norm": 0.005592831410467625, + "learning_rate": 8.345089651867199e-09, + "loss": 0.0025, + "step": 306670 + }, + { + "epoch": 1.9669502586320242, + "grad_norm": 0.07596705853939056, + "learning_rate": 8.312797146793339e-09, + "loss": 0.0015, + "step": 306680 + }, + { + "epoch": 1.9670143955258101, + "grad_norm": 0.03802109137177467, + "learning_rate": 8.280567191127354e-09, + "loss": 0.0007, + "step": 306690 + }, + { + "epoch": 1.9670785324195963, + "grad_norm": 0.03789745643734932, + "learning_rate": 8.248399785273919e-09, + "loss": 0.0005, + "step": 306700 + }, + { + "epoch": 1.9671426693133824, + "grad_norm": 0.08203060179948807, + "learning_rate": 8.216294929634939e-09, + "loss": 0.0013, + "step": 306710 + }, + { + "epoch": 1.9672068062071686, + "grad_norm": 0.025898775085806847, + "learning_rate": 8.184252624613975e-09, + "loss": 0.0016, + "step": 306720 + }, + { + "epoch": 1.9672709431009547, + "grad_norm": 0.16674844920635223, + "learning_rate": 8.152272870611266e-09, + "loss": 0.0009, + "step": 306730 + }, + { + "epoch": 1.9673350799947409, + "grad_norm": 0.08714725822210312, + "learning_rate": 8.120355668028712e-09, + "loss": 0.0009, + "step": 306740 + }, + { + "epoch": 1.967399216888527, + "grad_norm": 0.10741503536701202, + "learning_rate": 8.088501017265438e-09, + "loss": 0.0016, + "step": 306750 + }, + { + "epoch": 1.967463353782313, + "grad_norm": 0.044649600982666016, + "learning_rate": 8.056708918721123e-09, + "loss": 0.0012, + "step": 306760 + }, + { + "epoch": 1.9675274906760991, + "grad_norm": 0.1709849089384079, + "learning_rate": 8.024979372793784e-09, + "loss": 0.0012, + "step": 306770 + }, + { + "epoch": 1.967591627569885, + "grad_norm": 0.13717105984687805, + "learning_rate": 7.993312379881435e-09, + "loss": 0.0024, + "step": 306780 + }, + { + "epoch": 1.9676557644636712, + "grad_norm": 0.03835640475153923, + "learning_rate": 7.961707940380425e-09, + "loss": 0.0019, + "step": 306790 + }, + { + "epoch": 1.9677199013574573, + "grad_norm": 0.06769486516714096, + "learning_rate": 7.930166054687104e-09, + "loss": 0.0005, + "step": 306800 + }, + { + "epoch": 1.9677840382512435, + "grad_norm": 0.12234804034233093, + "learning_rate": 7.898686723196159e-09, + "loss": 0.001, + "step": 306810 + }, + { + "epoch": 1.9678481751450296, + "grad_norm": 0.09259574115276337, + "learning_rate": 7.867269946302825e-09, + "loss": 0.0011, + "step": 306820 + }, + { + "epoch": 1.9679123120388158, + "grad_norm": 0.03439909592270851, + "learning_rate": 7.835915724400678e-09, + "loss": 0.0007, + "step": 306830 + }, + { + "epoch": 1.9679764489326017, + "grad_norm": 0.10112472623586655, + "learning_rate": 7.804624057881626e-09, + "loss": 0.0005, + "step": 306840 + }, + { + "epoch": 1.9680405858263879, + "grad_norm": 0.05775924399495125, + "learning_rate": 7.773394947139246e-09, + "loss": 0.0011, + "step": 306850 + }, + { + "epoch": 1.9681047227201738, + "grad_norm": 0.03136734664440155, + "learning_rate": 7.742228392563778e-09, + "loss": 0.0017, + "step": 306860 + }, + { + "epoch": 1.96816885961396, + "grad_norm": 0.004767545498907566, + "learning_rate": 7.711124394546021e-09, + "loss": 0.0004, + "step": 306870 + }, + { + "epoch": 1.968232996507746, + "grad_norm": 0.060319263488054276, + "learning_rate": 7.680082953475665e-09, + "loss": 0.0015, + "step": 306880 + }, + { + "epoch": 1.9682971334015322, + "grad_norm": 0.016754992306232452, + "learning_rate": 7.649104069742396e-09, + "loss": 0.0009, + "step": 306890 + }, + { + "epoch": 1.9683612702953184, + "grad_norm": 0.07807540893554688, + "learning_rate": 7.61818774373313e-09, + "loss": 0.0014, + "step": 306900 + }, + { + "epoch": 1.9684254071891045, + "grad_norm": 0.12074722349643707, + "learning_rate": 7.587333975836442e-09, + "loss": 0.0018, + "step": 306910 + }, + { + "epoch": 1.9684895440828907, + "grad_norm": 0.15460114181041718, + "learning_rate": 7.556542766438691e-09, + "loss": 0.0026, + "step": 306920 + }, + { + "epoch": 1.9685536809766766, + "grad_norm": 0.0063071777112782, + "learning_rate": 7.525814115925124e-09, + "loss": 0.0009, + "step": 306930 + }, + { + "epoch": 1.9686178178704627, + "grad_norm": 0.06452561914920807, + "learning_rate": 7.495148024680988e-09, + "loss": 0.0008, + "step": 306940 + }, + { + "epoch": 1.9686819547642487, + "grad_norm": 0.1113513708114624, + "learning_rate": 7.464544493090975e-09, + "loss": 0.0016, + "step": 306950 + }, + { + "epoch": 1.9687460916580348, + "grad_norm": 0.047639150172472, + "learning_rate": 7.434003521538668e-09, + "loss": 0.0014, + "step": 306960 + }, + { + "epoch": 1.968810228551821, + "grad_norm": 0.005297117866575718, + "learning_rate": 7.403525110405985e-09, + "loss": 0.0006, + "step": 306970 + }, + { + "epoch": 1.9688743654456071, + "grad_norm": 0.04109714552760124, + "learning_rate": 7.373109260075395e-09, + "loss": 0.0015, + "step": 306980 + }, + { + "epoch": 1.9689385023393933, + "grad_norm": 0.06713901460170746, + "learning_rate": 7.342755970927707e-09, + "loss": 0.0022, + "step": 306990 + }, + { + "epoch": 1.9690026392331794, + "grad_norm": 0.11273383349180222, + "learning_rate": 7.3124652433437246e-09, + "loss": 0.0015, + "step": 307000 + }, + { + "epoch": 1.9690667761269653, + "grad_norm": 0.001334617380052805, + "learning_rate": 7.282237077702592e-09, + "loss": 0.0009, + "step": 307010 + }, + { + "epoch": 1.9691309130207515, + "grad_norm": 0.07082372158765793, + "learning_rate": 7.252071474383449e-09, + "loss": 0.0009, + "step": 307020 + }, + { + "epoch": 1.9691950499145376, + "grad_norm": 0.03211138769984245, + "learning_rate": 7.2219684337643255e-09, + "loss": 0.0005, + "step": 307030 + }, + { + "epoch": 1.9692591868083236, + "grad_norm": 0.03824929520487785, + "learning_rate": 7.1919279562221446e-09, + "loss": 0.0009, + "step": 307040 + }, + { + "epoch": 1.9693233237021097, + "grad_norm": 0.03742806240916252, + "learning_rate": 7.16195004213327e-09, + "loss": 0.0007, + "step": 307050 + }, + { + "epoch": 1.9693874605958959, + "grad_norm": 0.046398505568504333, + "learning_rate": 7.132034691873513e-09, + "loss": 0.001, + "step": 307060 + }, + { + "epoch": 1.969451597489682, + "grad_norm": 0.027226777747273445, + "learning_rate": 7.102181905817573e-09, + "loss": 0.0011, + "step": 307070 + }, + { + "epoch": 1.9695157343834682, + "grad_norm": 0.0399356409907341, + "learning_rate": 7.072391684339597e-09, + "loss": 0.0011, + "step": 307080 + }, + { + "epoch": 1.9695798712772543, + "grad_norm": 0.07177360355854034, + "learning_rate": 7.042664027813173e-09, + "loss": 0.0008, + "step": 307090 + }, + { + "epoch": 1.9696440081710402, + "grad_norm": 0.029129868373274803, + "learning_rate": 7.012998936610782e-09, + "loss": 0.0013, + "step": 307100 + }, + { + "epoch": 1.9697081450648264, + "grad_norm": 0.017454329878091812, + "learning_rate": 6.983396411103793e-09, + "loss": 0.0008, + "step": 307110 + }, + { + "epoch": 1.9697722819586123, + "grad_norm": 0.07953023165464401, + "learning_rate": 6.953856451663021e-09, + "loss": 0.0004, + "step": 307120 + }, + { + "epoch": 1.9698364188523985, + "grad_norm": 0.037367139011621475, + "learning_rate": 6.924379058658725e-09, + "loss": 0.0011, + "step": 307130 + }, + { + "epoch": 1.9699005557461846, + "grad_norm": 0.05491355061531067, + "learning_rate": 6.8949642324611654e-09, + "loss": 0.0005, + "step": 307140 + }, + { + "epoch": 1.9699646926399708, + "grad_norm": 0.12753793597221375, + "learning_rate": 6.865611973437825e-09, + "loss": 0.0012, + "step": 307150 + }, + { + "epoch": 1.970028829533757, + "grad_norm": 0.00912963505834341, + "learning_rate": 6.836322281956742e-09, + "loss": 0.0007, + "step": 307160 + }, + { + "epoch": 1.970092966427543, + "grad_norm": 0.1551181524991989, + "learning_rate": 6.807095158385402e-09, + "loss": 0.0011, + "step": 307170 + }, + { + "epoch": 1.9701571033213292, + "grad_norm": 0.05082277953624725, + "learning_rate": 6.777930603089622e-09, + "loss": 0.001, + "step": 307180 + }, + { + "epoch": 1.9702212402151151, + "grad_norm": 0.03281303867697716, + "learning_rate": 6.7488286164352215e-09, + "loss": 0.0007, + "step": 307190 + }, + { + "epoch": 1.9702853771089013, + "grad_norm": 0.015306927263736725, + "learning_rate": 6.719789198786353e-09, + "loss": 0.0005, + "step": 307200 + }, + { + "epoch": 1.9703495140026872, + "grad_norm": 0.0468122772872448, + "learning_rate": 6.69081235050717e-09, + "loss": 0.0013, + "step": 307210 + }, + { + "epoch": 1.9704136508964734, + "grad_norm": 0.017557993531227112, + "learning_rate": 6.661898071960715e-09, + "loss": 0.0006, + "step": 307220 + }, + { + "epoch": 1.9704777877902595, + "grad_norm": 0.07849603146314621, + "learning_rate": 6.633046363509477e-09, + "loss": 0.0009, + "step": 307230 + }, + { + "epoch": 1.9705419246840457, + "grad_norm": 0.10207756608724594, + "learning_rate": 6.604257225514832e-09, + "loss": 0.0014, + "step": 307240 + }, + { + "epoch": 1.9706060615778318, + "grad_norm": 0.040534086525440216, + "learning_rate": 6.575530658337603e-09, + "loss": 0.0008, + "step": 307250 + }, + { + "epoch": 1.970670198471618, + "grad_norm": 0.2911793291568756, + "learning_rate": 6.5468666623380584e-09, + "loss": 0.0012, + "step": 307260 + }, + { + "epoch": 1.9707343353654039, + "grad_norm": 0.1544741541147232, + "learning_rate": 6.518265237874244e-09, + "loss": 0.001, + "step": 307270 + }, + { + "epoch": 1.97079847225919, + "grad_norm": 0.022588972002267838, + "learning_rate": 6.489726385305872e-09, + "loss": 0.0008, + "step": 307280 + }, + { + "epoch": 1.970862609152976, + "grad_norm": 0.2511366903781891, + "learning_rate": 6.4612501049898804e-09, + "loss": 0.001, + "step": 307290 + }, + { + "epoch": 1.970926746046762, + "grad_norm": 0.016783207654953003, + "learning_rate": 6.432836397283759e-09, + "loss": 0.0006, + "step": 307300 + }, + { + "epoch": 1.9709908829405482, + "grad_norm": 0.025259699672460556, + "learning_rate": 6.404485262542226e-09, + "loss": 0.0007, + "step": 307310 + }, + { + "epoch": 1.9710550198343344, + "grad_norm": 0.11235236376523972, + "learning_rate": 6.376196701122217e-09, + "loss": 0.0007, + "step": 307320 + }, + { + "epoch": 1.9711191567281205, + "grad_norm": 0.08961598575115204, + "learning_rate": 6.347970713376783e-09, + "loss": 0.0017, + "step": 307330 + }, + { + "epoch": 1.9711832936219067, + "grad_norm": 0.1996752917766571, + "learning_rate": 6.319807299660085e-09, + "loss": 0.0012, + "step": 307340 + }, + { + "epoch": 1.9712474305156928, + "grad_norm": 0.0376928374171257, + "learning_rate": 6.2917064603257304e-09, + "loss": 0.0009, + "step": 307350 + }, + { + "epoch": 1.9713115674094788, + "grad_norm": 0.17062856256961823, + "learning_rate": 6.263668195724548e-09, + "loss": 0.0013, + "step": 307360 + }, + { + "epoch": 1.971375704303265, + "grad_norm": 0.04219835624098778, + "learning_rate": 6.235692506209034e-09, + "loss": 0.0005, + "step": 307370 + }, + { + "epoch": 1.9714398411970508, + "grad_norm": 0.06409318745136261, + "learning_rate": 6.207779392129465e-09, + "loss": 0.0007, + "step": 307380 + }, + { + "epoch": 1.971503978090837, + "grad_norm": 0.043306782841682434, + "learning_rate": 6.179928853835005e-09, + "loss": 0.0006, + "step": 307390 + }, + { + "epoch": 1.9715681149846231, + "grad_norm": 0.045628488063812256, + "learning_rate": 6.152140891674818e-09, + "loss": 0.0005, + "step": 307400 + }, + { + "epoch": 1.9716322518784093, + "grad_norm": 0.09256916493177414, + "learning_rate": 6.124415505998071e-09, + "loss": 0.0021, + "step": 307410 + }, + { + "epoch": 1.9716963887721954, + "grad_norm": 0.08107291162014008, + "learning_rate": 6.096752697150598e-09, + "loss": 0.0011, + "step": 307420 + }, + { + "epoch": 1.9717605256659816, + "grad_norm": 0.016026349738240242, + "learning_rate": 6.069152465480455e-09, + "loss": 0.0013, + "step": 307430 + }, + { + "epoch": 1.9718246625597675, + "grad_norm": 0.045945633202791214, + "learning_rate": 6.041614811332919e-09, + "loss": 0.0005, + "step": 307440 + }, + { + "epoch": 1.9718887994535537, + "grad_norm": 0.06947251409292221, + "learning_rate": 6.014139735053271e-09, + "loss": 0.0007, + "step": 307450 + }, + { + "epoch": 1.9719529363473398, + "grad_norm": 0.08861232548952103, + "learning_rate": 5.986727236985124e-09, + "loss": 0.0004, + "step": 307460 + }, + { + "epoch": 1.9720170732411257, + "grad_norm": 0.11720695346593857, + "learning_rate": 5.959377317473203e-09, + "loss": 0.0006, + "step": 307470 + }, + { + "epoch": 1.9720812101349119, + "grad_norm": 0.04580852761864662, + "learning_rate": 5.932089976858901e-09, + "loss": 0.0009, + "step": 307480 + }, + { + "epoch": 1.972145347028698, + "grad_norm": 0.07148537784814835, + "learning_rate": 5.904865215484723e-09, + "loss": 0.0006, + "step": 307490 + }, + { + "epoch": 1.9722094839224842, + "grad_norm": 0.018794462084770203, + "learning_rate": 5.877703033692062e-09, + "loss": 0.0004, + "step": 307500 + }, + { + "epoch": 1.9722736208162703, + "grad_norm": 0.04394660145044327, + "learning_rate": 5.8506034318206454e-09, + "loss": 0.0014, + "step": 307510 + }, + { + "epoch": 1.9723377577100565, + "grad_norm": 0.10869495570659637, + "learning_rate": 5.823566410210757e-09, + "loss": 0.0012, + "step": 307520 + }, + { + "epoch": 1.9724018946038424, + "grad_norm": 0.03450937941670418, + "learning_rate": 5.7965919692004605e-09, + "loss": 0.0012, + "step": 307530 + }, + { + "epoch": 1.9724660314976286, + "grad_norm": 0.020507413893938065, + "learning_rate": 5.769680109128373e-09, + "loss": 0.0003, + "step": 307540 + }, + { + "epoch": 1.9725301683914145, + "grad_norm": 0.05193580687046051, + "learning_rate": 5.742830830332002e-09, + "loss": 0.0007, + "step": 307550 + }, + { + "epoch": 1.9725943052852006, + "grad_norm": 0.017926448956131935, + "learning_rate": 5.7160441331466364e-09, + "loss": 0.0005, + "step": 307560 + }, + { + "epoch": 1.9726584421789868, + "grad_norm": 0.09575698524713516, + "learning_rate": 5.689320017908673e-09, + "loss": 0.0014, + "step": 307570 + }, + { + "epoch": 1.972722579072773, + "grad_norm": 0.06409459561109543, + "learning_rate": 5.662658484953398e-09, + "loss": 0.0007, + "step": 307580 + }, + { + "epoch": 1.972786715966559, + "grad_norm": 0.1508537083864212, + "learning_rate": 5.6360595346138805e-09, + "loss": 0.0013, + "step": 307590 + }, + { + "epoch": 1.9728508528603452, + "grad_norm": 0.20504428446292877, + "learning_rate": 5.6095231672242955e-09, + "loss": 0.0011, + "step": 307600 + }, + { + "epoch": 1.9729149897541314, + "grad_norm": 0.20593594014644623, + "learning_rate": 5.583049383116601e-09, + "loss": 0.0013, + "step": 307610 + }, + { + "epoch": 1.9729791266479173, + "grad_norm": 0.15778371691703796, + "learning_rate": 5.556638182623309e-09, + "loss": 0.0012, + "step": 307620 + }, + { + "epoch": 1.9730432635417035, + "grad_norm": 0.018617259338498116, + "learning_rate": 5.530289566074154e-09, + "loss": 0.0008, + "step": 307630 + }, + { + "epoch": 1.9731074004354894, + "grad_norm": 0.08489036560058594, + "learning_rate": 5.504003533799984e-09, + "loss": 0.0027, + "step": 307640 + }, + { + "epoch": 1.9731715373292755, + "grad_norm": 0.17733611166477203, + "learning_rate": 5.477780086130535e-09, + "loss": 0.0017, + "step": 307650 + }, + { + "epoch": 1.9732356742230617, + "grad_norm": 0.10488373041152954, + "learning_rate": 5.451619223393878e-09, + "loss": 0.0009, + "step": 307660 + }, + { + "epoch": 1.9732998111168478, + "grad_norm": 0.0796809196472168, + "learning_rate": 5.425520945918083e-09, + "loss": 0.001, + "step": 307670 + }, + { + "epoch": 1.973363948010634, + "grad_norm": 0.07839123159646988, + "learning_rate": 5.3994852540301125e-09, + "loss": 0.0011, + "step": 307680 + }, + { + "epoch": 1.9734280849044201, + "grad_norm": 0.09871714562177658, + "learning_rate": 5.373512148055815e-09, + "loss": 0.0004, + "step": 307690 + }, + { + "epoch": 1.973492221798206, + "grad_norm": 0.0920652449131012, + "learning_rate": 5.347601628321042e-09, + "loss": 0.0007, + "step": 307700 + }, + { + "epoch": 1.9735563586919922, + "grad_norm": 0.15558601915836334, + "learning_rate": 5.321753695150533e-09, + "loss": 0.0017, + "step": 307710 + }, + { + "epoch": 1.9736204955857781, + "grad_norm": 0.027167458087205887, + "learning_rate": 5.2959683488684746e-09, + "loss": 0.0041, + "step": 307720 + }, + { + "epoch": 1.9736846324795643, + "grad_norm": 0.03996816277503967, + "learning_rate": 5.270245589797385e-09, + "loss": 0.0011, + "step": 307730 + }, + { + "epoch": 1.9737487693733504, + "grad_norm": 0.03641613945364952, + "learning_rate": 5.244585418259785e-09, + "loss": 0.0014, + "step": 307740 + }, + { + "epoch": 1.9738129062671366, + "grad_norm": 0.1493997573852539, + "learning_rate": 5.218987834577083e-09, + "loss": 0.0005, + "step": 307750 + }, + { + "epoch": 1.9738770431609227, + "grad_norm": 0.11646231263875961, + "learning_rate": 5.193452839070134e-09, + "loss": 0.0003, + "step": 307760 + }, + { + "epoch": 1.9739411800547089, + "grad_norm": 0.024753261357545853, + "learning_rate": 5.167980432059238e-09, + "loss": 0.0013, + "step": 307770 + }, + { + "epoch": 1.974005316948495, + "grad_norm": 0.09728194773197174, + "learning_rate": 5.142570613863029e-09, + "loss": 0.0012, + "step": 307780 + }, + { + "epoch": 1.974069453842281, + "grad_norm": 0.06630918383598328, + "learning_rate": 5.117223384800141e-09, + "loss": 0.0013, + "step": 307790 + }, + { + "epoch": 1.974133590736067, + "grad_norm": 0.042899925261735916, + "learning_rate": 5.0919387451886516e-09, + "loss": 0.0013, + "step": 307800 + }, + { + "epoch": 1.974197727629853, + "grad_norm": 0.029828933998942375, + "learning_rate": 5.0667166953444205e-09, + "loss": 0.0009, + "step": 307810 + }, + { + "epoch": 1.9742618645236392, + "grad_norm": 0.021324610337615013, + "learning_rate": 5.041557235584416e-09, + "loss": 0.0008, + "step": 307820 + }, + { + "epoch": 1.9743260014174253, + "grad_norm": 0.041082773357629776, + "learning_rate": 5.016460366223385e-09, + "loss": 0.0018, + "step": 307830 + }, + { + "epoch": 1.9743901383112115, + "grad_norm": 0.12822555005550385, + "learning_rate": 4.991426087576079e-09, + "loss": 0.0023, + "step": 307840 + }, + { + "epoch": 1.9744542752049976, + "grad_norm": 0.026461130008101463, + "learning_rate": 4.9664543999561335e-09, + "loss": 0.0014, + "step": 307850 + }, + { + "epoch": 1.9745184120987838, + "grad_norm": 0.011752900667488575, + "learning_rate": 4.941545303676076e-09, + "loss": 0.0014, + "step": 307860 + }, + { + "epoch": 1.9745825489925697, + "grad_norm": 0.037690773606300354, + "learning_rate": 4.9166987990489915e-09, + "loss": 0.0029, + "step": 307870 + }, + { + "epoch": 1.9746466858863558, + "grad_norm": 0.11344976723194122, + "learning_rate": 4.891914886385185e-09, + "loss": 0.0015, + "step": 307880 + }, + { + "epoch": 1.974710822780142, + "grad_norm": 0.16902725398540497, + "learning_rate": 4.86719356599552e-09, + "loss": 0.0006, + "step": 307890 + }, + { + "epoch": 1.974774959673928, + "grad_norm": 0.07160399109125137, + "learning_rate": 4.842534838190305e-09, + "loss": 0.0009, + "step": 307900 + }, + { + "epoch": 1.974839096567714, + "grad_norm": 0.1412993222475052, + "learning_rate": 4.817938703277625e-09, + "loss": 0.0012, + "step": 307910 + }, + { + "epoch": 1.9749032334615002, + "grad_norm": 0.014993314631283283, + "learning_rate": 4.793405161566678e-09, + "loss": 0.0025, + "step": 307920 + }, + { + "epoch": 1.9749673703552864, + "grad_norm": 0.07830905169248581, + "learning_rate": 4.7689342133638846e-09, + "loss": 0.0004, + "step": 307930 + }, + { + "epoch": 1.9750315072490725, + "grad_norm": 0.08769722282886505, + "learning_rate": 4.744525858976778e-09, + "loss": 0.0006, + "step": 307940 + }, + { + "epoch": 1.9750956441428587, + "grad_norm": 0.167959064245224, + "learning_rate": 4.720180098710669e-09, + "loss": 0.0013, + "step": 307950 + }, + { + "epoch": 1.9751597810366446, + "grad_norm": 0.08166962116956711, + "learning_rate": 4.695896932870314e-09, + "loss": 0.0008, + "step": 307960 + }, + { + "epoch": 1.9752239179304307, + "grad_norm": 0.11420666426420212, + "learning_rate": 4.671676361761024e-09, + "loss": 0.001, + "step": 307970 + }, + { + "epoch": 1.9752880548242167, + "grad_norm": 0.1623649150133133, + "learning_rate": 4.647518385685889e-09, + "loss": 0.0008, + "step": 307980 + }, + { + "epoch": 1.9753521917180028, + "grad_norm": 0.022991027683019638, + "learning_rate": 4.623423004946892e-09, + "loss": 0.0011, + "step": 307990 + }, + { + "epoch": 1.975416328611789, + "grad_norm": 0.07424769550561905, + "learning_rate": 4.599390219847122e-09, + "loss": 0.0008, + "step": 308000 + }, + { + "epoch": 1.975480465505575, + "grad_norm": 0.052277661859989166, + "learning_rate": 4.575420030686895e-09, + "loss": 0.001, + "step": 308010 + }, + { + "epoch": 1.9755446023993612, + "grad_norm": 0.016164889559149742, + "learning_rate": 4.551512437767081e-09, + "loss": 0.0014, + "step": 308020 + }, + { + "epoch": 1.9756087392931474, + "grad_norm": 0.12415362149477005, + "learning_rate": 4.52766744138633e-09, + "loss": 0.0014, + "step": 308030 + }, + { + "epoch": 1.9756728761869335, + "grad_norm": 0.1140543669462204, + "learning_rate": 4.503885041844403e-09, + "loss": 0.0007, + "step": 308040 + }, + { + "epoch": 1.9757370130807195, + "grad_norm": 0.11638864874839783, + "learning_rate": 4.480165239439393e-09, + "loss": 0.0007, + "step": 308050 + }, + { + "epoch": 1.9758011499745056, + "grad_norm": 0.13007675111293793, + "learning_rate": 4.456508034467733e-09, + "loss": 0.001, + "step": 308060 + }, + { + "epoch": 1.9758652868682915, + "grad_norm": 0.05380553379654884, + "learning_rate": 4.432913427226959e-09, + "loss": 0.0004, + "step": 308070 + }, + { + "epoch": 1.9759294237620777, + "grad_norm": 0.019417930394411087, + "learning_rate": 4.409381418011283e-09, + "loss": 0.0007, + "step": 308080 + }, + { + "epoch": 1.9759935606558638, + "grad_norm": 0.06386806815862656, + "learning_rate": 4.385912007116577e-09, + "loss": 0.0013, + "step": 308090 + }, + { + "epoch": 1.97605769754965, + "grad_norm": 0.042818184942007065, + "learning_rate": 4.362505194837052e-09, + "loss": 0.0005, + "step": 308100 + }, + { + "epoch": 1.9761218344434361, + "grad_norm": 0.04058590158820152, + "learning_rate": 4.3391609814652516e-09, + "loss": 0.0007, + "step": 308110 + }, + { + "epoch": 1.9761859713372223, + "grad_norm": 0.07583338022232056, + "learning_rate": 4.315879367294274e-09, + "loss": 0.0005, + "step": 308120 + }, + { + "epoch": 1.9762501082310082, + "grad_norm": 0.11366616189479828, + "learning_rate": 4.292660352615552e-09, + "loss": 0.0008, + "step": 308130 + }, + { + "epoch": 1.9763142451247944, + "grad_norm": 0.015062171965837479, + "learning_rate": 4.2695039377205206e-09, + "loss": 0.0007, + "step": 308140 + }, + { + "epoch": 1.9763783820185803, + "grad_norm": 0.059372857213020325, + "learning_rate": 4.246410122898392e-09, + "loss": 0.0008, + "step": 308150 + }, + { + "epoch": 1.9764425189123664, + "grad_norm": 0.009204063564538956, + "learning_rate": 4.2233789084400456e-09, + "loss": 0.0004, + "step": 308160 + }, + { + "epoch": 1.9765066558061526, + "grad_norm": 0.36856114864349365, + "learning_rate": 4.2004102946324735e-09, + "loss": 0.0027, + "step": 308170 + }, + { + "epoch": 1.9765707926999387, + "grad_norm": 0.0946122258901596, + "learning_rate": 4.177504281764333e-09, + "loss": 0.0009, + "step": 308180 + }, + { + "epoch": 1.9766349295937249, + "grad_norm": 0.09484247863292694, + "learning_rate": 4.154660870122618e-09, + "loss": 0.0011, + "step": 308190 + }, + { + "epoch": 1.976699066487511, + "grad_norm": 0.04668445140123367, + "learning_rate": 4.1318800599932095e-09, + "loss": 0.0011, + "step": 308200 + }, + { + "epoch": 1.9767632033812972, + "grad_norm": 0.0832228809595108, + "learning_rate": 4.109161851662546e-09, + "loss": 0.0005, + "step": 308210 + }, + { + "epoch": 1.9768273402750831, + "grad_norm": 0.06130097806453705, + "learning_rate": 4.086506245413735e-09, + "loss": 0.0017, + "step": 308220 + }, + { + "epoch": 1.9768914771688693, + "grad_norm": 0.08028879016637802, + "learning_rate": 4.063913241532103e-09, + "loss": 0.0015, + "step": 308230 + }, + { + "epoch": 1.9769556140626552, + "grad_norm": 0.032631564885377884, + "learning_rate": 4.041382840300201e-09, + "loss": 0.0009, + "step": 308240 + }, + { + "epoch": 1.9770197509564413, + "grad_norm": 0.37207770347595215, + "learning_rate": 4.018915042000027e-09, + "loss": 0.0013, + "step": 308250 + }, + { + "epoch": 1.9770838878502275, + "grad_norm": 0.12970571219921112, + "learning_rate": 3.996509846913577e-09, + "loss": 0.0015, + "step": 308260 + }, + { + "epoch": 1.9771480247440136, + "grad_norm": 0.1854458898305893, + "learning_rate": 3.974167255321182e-09, + "loss": 0.0011, + "step": 308270 + }, + { + "epoch": 1.9772121616377998, + "grad_norm": 0.14242278039455414, + "learning_rate": 3.951887267503174e-09, + "loss": 0.0018, + "step": 308280 + }, + { + "epoch": 1.977276298531586, + "grad_norm": 0.10432162880897522, + "learning_rate": 3.929669883738774e-09, + "loss": 0.001, + "step": 308290 + }, + { + "epoch": 1.977340435425372, + "grad_norm": 0.1106819212436676, + "learning_rate": 3.9075151043066475e-09, + "loss": 0.0011, + "step": 308300 + }, + { + "epoch": 1.977404572319158, + "grad_norm": 0.08355239033699036, + "learning_rate": 3.8854229294832405e-09, + "loss": 0.0015, + "step": 308310 + }, + { + "epoch": 1.9774687092129442, + "grad_norm": 0.03320559114217758, + "learning_rate": 3.8633933595466636e-09, + "loss": 0.0011, + "step": 308320 + }, + { + "epoch": 1.97753284610673, + "grad_norm": 0.043652139604091644, + "learning_rate": 3.841426394772252e-09, + "loss": 0.001, + "step": 308330 + }, + { + "epoch": 1.9775969830005162, + "grad_norm": 0.08986610174179077, + "learning_rate": 3.8195220354353416e-09, + "loss": 0.0019, + "step": 308340 + }, + { + "epoch": 1.9776611198943024, + "grad_norm": 0.034796494990587234, + "learning_rate": 3.797680281810712e-09, + "loss": 0.0011, + "step": 308350 + }, + { + "epoch": 1.9777252567880885, + "grad_norm": 0.0075569432228803635, + "learning_rate": 3.775901134172033e-09, + "loss": 0.0015, + "step": 308360 + }, + { + "epoch": 1.9777893936818747, + "grad_norm": 0.19257009029388428, + "learning_rate": 3.754184592791866e-09, + "loss": 0.0012, + "step": 308370 + }, + { + "epoch": 1.9778535305756608, + "grad_norm": 0.02256534807384014, + "learning_rate": 3.732530657942768e-09, + "loss": 0.0008, + "step": 308380 + }, + { + "epoch": 1.9779176674694467, + "grad_norm": 0.03438534960150719, + "learning_rate": 3.710939329895635e-09, + "loss": 0.0004, + "step": 308390 + }, + { + "epoch": 1.977981804363233, + "grad_norm": 0.042067401111125946, + "learning_rate": 3.6894106089213623e-09, + "loss": 0.0025, + "step": 308400 + }, + { + "epoch": 1.9780459412570188, + "grad_norm": 0.11088132113218307, + "learning_rate": 3.6679444952891773e-09, + "loss": 0.0016, + "step": 308410 + }, + { + "epoch": 1.978110078150805, + "grad_norm": 0.12521320581436157, + "learning_rate": 3.646540989268865e-09, + "loss": 0.0013, + "step": 308420 + }, + { + "epoch": 1.9781742150445911, + "grad_norm": 0.01596071943640709, + "learning_rate": 3.6252000911279894e-09, + "loss": 0.0004, + "step": 308430 + }, + { + "epoch": 1.9782383519383773, + "grad_norm": 0.001190666574984789, + "learning_rate": 3.6039218011341137e-09, + "loss": 0.0011, + "step": 308440 + }, + { + "epoch": 1.9783024888321634, + "grad_norm": 0.06425302475690842, + "learning_rate": 3.582706119554247e-09, + "loss": 0.0014, + "step": 308450 + }, + { + "epoch": 1.9783666257259496, + "grad_norm": 0.04505036026239395, + "learning_rate": 3.561553046654287e-09, + "loss": 0.001, + "step": 308460 + }, + { + "epoch": 1.9784307626197357, + "grad_norm": 0.042695268988609314, + "learning_rate": 3.5404625826984674e-09, + "loss": 0.0009, + "step": 308470 + }, + { + "epoch": 1.9784948995135216, + "grad_norm": 0.02438264898955822, + "learning_rate": 3.519434727951576e-09, + "loss": 0.0009, + "step": 308480 + }, + { + "epoch": 1.9785590364073078, + "grad_norm": 0.02695961482822895, + "learning_rate": 3.4984694826772912e-09, + "loss": 0.0019, + "step": 308490 + }, + { + "epoch": 1.9786231733010937, + "grad_norm": 0.0018918344285339117, + "learning_rate": 3.4775668471381807e-09, + "loss": 0.0006, + "step": 308500 + }, + { + "epoch": 1.9786873101948799, + "grad_norm": 0.11057069152593613, + "learning_rate": 3.4567268215957015e-09, + "loss": 0.001, + "step": 308510 + }, + { + "epoch": 1.978751447088666, + "grad_norm": 0.00726739689707756, + "learning_rate": 3.435949406311867e-09, + "loss": 0.0008, + "step": 308520 + }, + { + "epoch": 1.9788155839824522, + "grad_norm": 0.05602853000164032, + "learning_rate": 3.415234601546469e-09, + "loss": 0.0024, + "step": 308530 + }, + { + "epoch": 1.9788797208762383, + "grad_norm": 0.054609403014183044, + "learning_rate": 3.394582407558744e-09, + "loss": 0.0009, + "step": 308540 + }, + { + "epoch": 1.9789438577700245, + "grad_norm": 0.1321811079978943, + "learning_rate": 3.3739928246084854e-09, + "loss": 0.0009, + "step": 308550 + }, + { + "epoch": 1.9790079946638104, + "grad_norm": 0.09992507100105286, + "learning_rate": 3.353465852952709e-09, + "loss": 0.0008, + "step": 308560 + }, + { + "epoch": 1.9790721315575965, + "grad_norm": 0.06722811609506607, + "learning_rate": 3.3330014928495415e-09, + "loss": 0.0009, + "step": 308570 + }, + { + "epoch": 1.9791362684513827, + "grad_norm": 0.0741615891456604, + "learning_rate": 3.3125997445548895e-09, + "loss": 0.0006, + "step": 308580 + }, + { + "epoch": 1.9792004053451686, + "grad_norm": 0.12428124248981476, + "learning_rate": 3.2922606083241048e-09, + "loss": 0.0017, + "step": 308590 + }, + { + "epoch": 1.9792645422389548, + "grad_norm": 0.056774888187646866, + "learning_rate": 3.2719840844125384e-09, + "loss": 0.0009, + "step": 308600 + }, + { + "epoch": 1.979328679132741, + "grad_norm": 0.03519848361611366, + "learning_rate": 3.251770173073876e-09, + "loss": 0.0016, + "step": 308610 + }, + { + "epoch": 1.979392816026527, + "grad_norm": 0.03318639099597931, + "learning_rate": 3.2316188745618037e-09, + "loss": 0.001, + "step": 308620 + }, + { + "epoch": 1.9794569529203132, + "grad_norm": 0.019073570147156715, + "learning_rate": 3.2115301891288975e-09, + "loss": 0.0006, + "step": 308630 + }, + { + "epoch": 1.9795210898140994, + "grad_norm": 0.00515739107504487, + "learning_rate": 3.1915041170266225e-09, + "loss": 0.0008, + "step": 308640 + }, + { + "epoch": 1.9795852267078853, + "grad_norm": 0.03800279274582863, + "learning_rate": 3.1715406585058893e-09, + "loss": 0.001, + "step": 308650 + }, + { + "epoch": 1.9796493636016714, + "grad_norm": 0.075951486825943, + "learning_rate": 3.1516398138170533e-09, + "loss": 0.0008, + "step": 308660 + }, + { + "epoch": 1.9797135004954574, + "grad_norm": 0.024031635373830795, + "learning_rate": 3.1318015832088043e-09, + "loss": 0.0008, + "step": 308670 + }, + { + "epoch": 1.9797776373892435, + "grad_norm": 0.10507842153310776, + "learning_rate": 3.112025966930943e-09, + "loss": 0.0012, + "step": 308680 + }, + { + "epoch": 1.9798417742830297, + "grad_norm": 0.04669702425599098, + "learning_rate": 3.092312965230493e-09, + "loss": 0.0015, + "step": 308690 + }, + { + "epoch": 1.9799059111768158, + "grad_norm": 0.11659438163042068, + "learning_rate": 3.0726625783544796e-09, + "loss": 0.0007, + "step": 308700 + }, + { + "epoch": 1.979970048070602, + "grad_norm": 0.11450156569480896, + "learning_rate": 3.053074806548817e-09, + "loss": 0.0008, + "step": 308710 + }, + { + "epoch": 1.980034184964388, + "grad_norm": 0.10217200964689255, + "learning_rate": 3.0335496500599748e-09, + "loss": 0.0005, + "step": 308720 + }, + { + "epoch": 1.9800983218581742, + "grad_norm": 0.12170190364122391, + "learning_rate": 3.014087109131647e-09, + "loss": 0.0007, + "step": 308730 + }, + { + "epoch": 1.9801624587519602, + "grad_norm": 0.1131337434053421, + "learning_rate": 2.9946871840086376e-09, + "loss": 0.001, + "step": 308740 + }, + { + "epoch": 1.9802265956457463, + "grad_norm": 0.048643749207258224, + "learning_rate": 2.9753498749329757e-09, + "loss": 0.0012, + "step": 308750 + }, + { + "epoch": 1.9802907325395323, + "grad_norm": 0.08117853105068207, + "learning_rate": 2.9560751821477997e-09, + "loss": 0.0003, + "step": 308760 + }, + { + "epoch": 1.9803548694333184, + "grad_norm": 0.0409931018948555, + "learning_rate": 2.936863105894583e-09, + "loss": 0.0015, + "step": 308770 + }, + { + "epoch": 1.9804190063271045, + "grad_norm": 0.040586985647678375, + "learning_rate": 2.917713646413134e-09, + "loss": 0.0007, + "step": 308780 + }, + { + "epoch": 1.9804831432208907, + "grad_norm": 0.0733502060174942, + "learning_rate": 2.8986268039443712e-09, + "loss": 0.0009, + "step": 308790 + }, + { + "epoch": 1.9805472801146768, + "grad_norm": 0.004943209234625101, + "learning_rate": 2.8796025787275474e-09, + "loss": 0.0006, + "step": 308800 + }, + { + "epoch": 1.980611417008463, + "grad_norm": 0.14331646263599396, + "learning_rate": 2.86064097100025e-09, + "loss": 0.0014, + "step": 308810 + }, + { + "epoch": 1.980675553902249, + "grad_norm": 0.07906894385814667, + "learning_rate": 2.8417419810011783e-09, + "loss": 0.0011, + "step": 308820 + }, + { + "epoch": 1.980739690796035, + "grad_norm": 0.11326241493225098, + "learning_rate": 2.822905608965698e-09, + "loss": 0.0014, + "step": 308830 + }, + { + "epoch": 1.980803827689821, + "grad_norm": 0.16474296152591705, + "learning_rate": 2.8041318551308426e-09, + "loss": 0.0022, + "step": 308840 + }, + { + "epoch": 1.9808679645836071, + "grad_norm": 0.0755569115281105, + "learning_rate": 2.7854207197319794e-09, + "loss": 0.0036, + "step": 308850 + }, + { + "epoch": 1.9809321014773933, + "grad_norm": 0.1894502192735672, + "learning_rate": 2.7667722030028098e-09, + "loss": 0.0007, + "step": 308860 + }, + { + "epoch": 1.9809962383711794, + "grad_norm": 0.08864570409059525, + "learning_rate": 2.7481863051775913e-09, + "loss": 0.0007, + "step": 308870 + }, + { + "epoch": 1.9810603752649656, + "grad_norm": 0.16819244623184204, + "learning_rate": 2.7296630264894706e-09, + "loss": 0.0014, + "step": 308880 + }, + { + "epoch": 1.9811245121587517, + "grad_norm": 0.005292494315654039, + "learning_rate": 2.7112023671699297e-09, + "loss": 0.0008, + "step": 308890 + }, + { + "epoch": 1.9811886490525379, + "grad_norm": 0.016621023416519165, + "learning_rate": 2.692804327450449e-09, + "loss": 0.0027, + "step": 308900 + }, + { + "epoch": 1.9812527859463238, + "grad_norm": 0.12444616109132767, + "learning_rate": 2.6744689075614005e-09, + "loss": 0.001, + "step": 308910 + }, + { + "epoch": 1.98131692284011, + "grad_norm": 0.10248826444149017, + "learning_rate": 2.6561961077331556e-09, + "loss": 0.001, + "step": 308920 + }, + { + "epoch": 1.9813810597338959, + "grad_norm": 0.0755588635802269, + "learning_rate": 2.6379859281944196e-09, + "loss": 0.0009, + "step": 308930 + }, + { + "epoch": 1.981445196627682, + "grad_norm": 0.08173400908708572, + "learning_rate": 2.619838369172789e-09, + "loss": 0.0009, + "step": 308940 + }, + { + "epoch": 1.9815093335214682, + "grad_norm": 0.07572423666715622, + "learning_rate": 2.601753430896414e-09, + "loss": 0.0007, + "step": 308950 + }, + { + "epoch": 1.9815734704152543, + "grad_norm": 0.04430937021970749, + "learning_rate": 2.5837311135917807e-09, + "loss": 0.0012, + "step": 308960 + }, + { + "epoch": 1.9816376073090405, + "grad_norm": 0.41289564967155457, + "learning_rate": 2.5657714174848192e-09, + "loss": 0.0013, + "step": 308970 + }, + { + "epoch": 1.9817017442028266, + "grad_norm": 0.08378173410892487, + "learning_rate": 2.5478743427997943e-09, + "loss": 0.0009, + "step": 308980 + }, + { + "epoch": 1.9817658810966126, + "grad_norm": 0.05246363952755928, + "learning_rate": 2.5300398897620816e-09, + "loss": 0.0012, + "step": 308990 + }, + { + "epoch": 1.9818300179903987, + "grad_norm": 0.06257522851228714, + "learning_rate": 2.5122680585942803e-09, + "loss": 0.0009, + "step": 309000 + }, + { + "epoch": 1.9818941548841849, + "grad_norm": 0.0537213571369648, + "learning_rate": 2.494558849519546e-09, + "loss": 0.0007, + "step": 309010 + }, + { + "epoch": 1.9819582917779708, + "grad_norm": 0.24839678406715393, + "learning_rate": 2.476912262759368e-09, + "loss": 0.0026, + "step": 309020 + }, + { + "epoch": 1.982022428671757, + "grad_norm": 0.06641262769699097, + "learning_rate": 2.459328298535235e-09, + "loss": 0.0013, + "step": 309030 + }, + { + "epoch": 1.982086565565543, + "grad_norm": 0.15526942908763885, + "learning_rate": 2.4418069570675272e-09, + "loss": 0.0017, + "step": 309040 + }, + { + "epoch": 1.9821507024593292, + "grad_norm": 0.09031543880701065, + "learning_rate": 2.4243482385755133e-09, + "loss": 0.0014, + "step": 309050 + }, + { + "epoch": 1.9822148393531154, + "grad_norm": 0.10646952688694, + "learning_rate": 2.406952143278463e-09, + "loss": 0.0008, + "step": 309060 + }, + { + "epoch": 1.9822789762469015, + "grad_norm": 0.04314703121781349, + "learning_rate": 2.3896186713939783e-09, + "loss": 0.0005, + "step": 309070 + }, + { + "epoch": 1.9823431131406875, + "grad_norm": 0.03664984926581383, + "learning_rate": 2.3723478231391097e-09, + "loss": 0.0011, + "step": 309080 + }, + { + "epoch": 1.9824072500344736, + "grad_norm": 0.002345103770494461, + "learning_rate": 2.3551395987303494e-09, + "loss": 0.0007, + "step": 309090 + }, + { + "epoch": 1.9824713869282595, + "grad_norm": 0.07222457975149155, + "learning_rate": 2.3379939983836363e-09, + "loss": 0.0006, + "step": 309100 + }, + { + "epoch": 1.9825355238220457, + "grad_norm": 0.06955096870660782, + "learning_rate": 2.3209110223137987e-09, + "loss": 0.0008, + "step": 309110 + }, + { + "epoch": 1.9825996607158318, + "grad_norm": 0.016046542674303055, + "learning_rate": 2.3038906707345545e-09, + "loss": 0.0015, + "step": 309120 + }, + { + "epoch": 1.982663797609618, + "grad_norm": 0.07722265273332596, + "learning_rate": 2.286932943859621e-09, + "loss": 0.0041, + "step": 309130 + }, + { + "epoch": 1.9827279345034041, + "grad_norm": 0.11098852753639221, + "learning_rate": 2.2700378419004963e-09, + "loss": 0.0023, + "step": 309140 + }, + { + "epoch": 1.9827920713971903, + "grad_norm": 0.10734561830759048, + "learning_rate": 2.2532053650703433e-09, + "loss": 0.0016, + "step": 309150 + }, + { + "epoch": 1.9828562082909764, + "grad_norm": 0.07452143728733063, + "learning_rate": 2.236435513578994e-09, + "loss": 0.0004, + "step": 309160 + }, + { + "epoch": 1.9829203451847623, + "grad_norm": 0.11671552807092667, + "learning_rate": 2.2197282876373904e-09, + "loss": 0.0011, + "step": 309170 + }, + { + "epoch": 1.9829844820785485, + "grad_norm": 0.04381822794675827, + "learning_rate": 2.2030836874542548e-09, + "loss": 0.0014, + "step": 309180 + }, + { + "epoch": 1.9830486189723344, + "grad_norm": 0.047901857644319534, + "learning_rate": 2.1865017132377543e-09, + "loss": 0.0014, + "step": 309190 + }, + { + "epoch": 1.9831127558661206, + "grad_norm": 0.08380786329507828, + "learning_rate": 2.169982365197165e-09, + "loss": 0.0018, + "step": 309200 + }, + { + "epoch": 1.9831768927599067, + "grad_norm": 0.06376513838768005, + "learning_rate": 2.1535256435378793e-09, + "loss": 0.0021, + "step": 309210 + }, + { + "epoch": 1.9832410296536929, + "grad_norm": 0.2591180205345154, + "learning_rate": 2.1371315484675082e-09, + "loss": 0.0011, + "step": 309220 + }, + { + "epoch": 1.983305166547479, + "grad_norm": 0.07580866664648056, + "learning_rate": 2.120800080190333e-09, + "loss": 0.0006, + "step": 309230 + }, + { + "epoch": 1.9833693034412652, + "grad_norm": 0.1985291689634323, + "learning_rate": 2.1045312389111893e-09, + "loss": 0.0012, + "step": 309240 + }, + { + "epoch": 1.983433440335051, + "grad_norm": 0.042491424828767776, + "learning_rate": 2.0883250248349142e-09, + "loss": 0.0008, + "step": 309250 + }, + { + "epoch": 1.9834975772288372, + "grad_norm": 0.05674993619322777, + "learning_rate": 2.0721814381635676e-09, + "loss": 0.0015, + "step": 309260 + }, + { + "epoch": 1.9835617141226232, + "grad_norm": 0.02484288439154625, + "learning_rate": 2.0561004790997653e-09, + "loss": 0.0017, + "step": 309270 + }, + { + "epoch": 1.9836258510164093, + "grad_norm": 0.041392479091882706, + "learning_rate": 2.0400821478450126e-09, + "loss": 0.0011, + "step": 309280 + }, + { + "epoch": 1.9836899879101955, + "grad_norm": 0.03724001348018646, + "learning_rate": 2.0241264445997057e-09, + "loss": 0.0019, + "step": 309290 + }, + { + "epoch": 1.9837541248039816, + "grad_norm": 0.01675868220627308, + "learning_rate": 2.0082333695642385e-09, + "loss": 0.0008, + "step": 309300 + }, + { + "epoch": 1.9838182616977678, + "grad_norm": 0.08815675973892212, + "learning_rate": 1.992402922937897e-09, + "loss": 0.0011, + "step": 309310 + }, + { + "epoch": 1.983882398591554, + "grad_norm": 0.08469753712415695, + "learning_rate": 1.976635104918856e-09, + "loss": 0.0008, + "step": 309320 + }, + { + "epoch": 1.98394653548534, + "grad_norm": 0.12119343876838684, + "learning_rate": 1.9609299157041796e-09, + "loss": 0.0008, + "step": 309330 + }, + { + "epoch": 1.984010672379126, + "grad_norm": 0.07586673647165298, + "learning_rate": 1.9452873554914875e-09, + "loss": 0.0015, + "step": 309340 + }, + { + "epoch": 1.9840748092729121, + "grad_norm": 0.06572098284959793, + "learning_rate": 1.929707424476179e-09, + "loss": 0.001, + "step": 309350 + }, + { + "epoch": 1.984138946166698, + "grad_norm": 0.10405579209327698, + "learning_rate": 1.914190122853654e-09, + "loss": 0.0007, + "step": 309360 + }, + { + "epoch": 1.9842030830604842, + "grad_norm": 0.009541662409901619, + "learning_rate": 1.898735450818201e-09, + "loss": 0.0011, + "step": 309370 + }, + { + "epoch": 1.9842672199542704, + "grad_norm": 0.013885400258004665, + "learning_rate": 1.883343408564109e-09, + "loss": 0.001, + "step": 309380 + }, + { + "epoch": 1.9843313568480565, + "grad_norm": 0.27208417654037476, + "learning_rate": 1.8680139962834466e-09, + "loss": 0.0012, + "step": 309390 + }, + { + "epoch": 1.9843954937418427, + "grad_norm": 0.04294242709875107, + "learning_rate": 1.8527472141688375e-09, + "loss": 0.0005, + "step": 309400 + }, + { + "epoch": 1.9844596306356288, + "grad_norm": 0.05179812014102936, + "learning_rate": 1.8375430624117952e-09, + "loss": 0.0008, + "step": 309410 + }, + { + "epoch": 1.9845237675294147, + "grad_norm": 0.03251425921916962, + "learning_rate": 1.822401541202168e-09, + "loss": 0.0007, + "step": 309420 + }, + { + "epoch": 1.9845879044232009, + "grad_norm": 0.05451219528913498, + "learning_rate": 1.8073226507298036e-09, + "loss": 0.0012, + "step": 309430 + }, + { + "epoch": 1.984652041316987, + "grad_norm": 0.14095070958137512, + "learning_rate": 1.7923063911839956e-09, + "loss": 0.001, + "step": 309440 + }, + { + "epoch": 1.984716178210773, + "grad_norm": 0.10173560678958893, + "learning_rate": 1.7773527627529263e-09, + "loss": 0.0009, + "step": 309450 + }, + { + "epoch": 1.984780315104559, + "grad_norm": 0.036518894135951996, + "learning_rate": 1.7624617656236686e-09, + "loss": 0.0009, + "step": 309460 + }, + { + "epoch": 1.9848444519983452, + "grad_norm": 0.12577344477176666, + "learning_rate": 1.74763339998274e-09, + "loss": 0.001, + "step": 309470 + }, + { + "epoch": 1.9849085888921314, + "grad_norm": 0.014238179661333561, + "learning_rate": 1.7328676660166578e-09, + "loss": 0.0005, + "step": 309480 + }, + { + "epoch": 1.9849727257859175, + "grad_norm": 0.047518134117126465, + "learning_rate": 1.7181645639097188e-09, + "loss": 0.0009, + "step": 309490 + }, + { + "epoch": 1.9850368626797037, + "grad_norm": 0.15631861984729767, + "learning_rate": 1.7035240938462204e-09, + "loss": 0.0016, + "step": 309500 + }, + { + "epoch": 1.9851009995734896, + "grad_norm": 0.013858428224921227, + "learning_rate": 1.6889462560099046e-09, + "loss": 0.0004, + "step": 309510 + }, + { + "epoch": 1.9851651364672758, + "grad_norm": 0.045731060206890106, + "learning_rate": 1.6744310505834027e-09, + "loss": 0.0013, + "step": 309520 + }, + { + "epoch": 1.9852292733610617, + "grad_norm": 0.01558227464556694, + "learning_rate": 1.6599784777482365e-09, + "loss": 0.001, + "step": 309530 + }, + { + "epoch": 1.9852934102548478, + "grad_norm": 0.2027255892753601, + "learning_rate": 1.645588537685927e-09, + "loss": 0.0012, + "step": 309540 + }, + { + "epoch": 1.985357547148634, + "grad_norm": 0.06667507439851761, + "learning_rate": 1.631261230576886e-09, + "loss": 0.0011, + "step": 309550 + }, + { + "epoch": 1.9854216840424201, + "grad_norm": 0.05854310467839241, + "learning_rate": 1.6169965566004141e-09, + "loss": 0.0007, + "step": 309560 + }, + { + "epoch": 1.9854858209362063, + "grad_norm": 0.09671935439109802, + "learning_rate": 1.6027945159352575e-09, + "loss": 0.0007, + "step": 309570 + }, + { + "epoch": 1.9855499578299924, + "grad_norm": 0.08241838961839676, + "learning_rate": 1.5886551087590518e-09, + "loss": 0.0018, + "step": 309580 + }, + { + "epoch": 1.9856140947237786, + "grad_norm": 0.10554327070713043, + "learning_rate": 1.5745783352494327e-09, + "loss": 0.0006, + "step": 309590 + }, + { + "epoch": 1.9856782316175645, + "grad_norm": 0.05703651160001755, + "learning_rate": 1.5605641955829254e-09, + "loss": 0.0003, + "step": 309600 + }, + { + "epoch": 1.9857423685113507, + "grad_norm": 0.10870242118835449, + "learning_rate": 1.5466126899349454e-09, + "loss": 0.0012, + "step": 309610 + }, + { + "epoch": 1.9858065054051366, + "grad_norm": 0.11203661561012268, + "learning_rate": 1.5327238184797977e-09, + "loss": 0.0009, + "step": 309620 + }, + { + "epoch": 1.9858706422989227, + "grad_norm": 0.11214514821767807, + "learning_rate": 1.5188975813923424e-09, + "loss": 0.001, + "step": 309630 + }, + { + "epoch": 1.9859347791927089, + "grad_norm": 0.04769856482744217, + "learning_rate": 1.5051339788452192e-09, + "loss": 0.0005, + "step": 309640 + }, + { + "epoch": 1.985998916086495, + "grad_norm": 0.10414264351129532, + "learning_rate": 1.4914330110110675e-09, + "loss": 0.0021, + "step": 309650 + }, + { + "epoch": 1.9860630529802812, + "grad_norm": 0.059631455689668655, + "learning_rate": 1.4777946780619724e-09, + "loss": 0.0006, + "step": 309660 + }, + { + "epoch": 1.9861271898740673, + "grad_norm": 0.09064489603042603, + "learning_rate": 1.4642189801683527e-09, + "loss": 0.0005, + "step": 309670 + }, + { + "epoch": 1.9861913267678533, + "grad_norm": 0.3767741620540619, + "learning_rate": 1.4507059175006277e-09, + "loss": 0.0023, + "step": 309680 + }, + { + "epoch": 1.9862554636616394, + "grad_norm": 0.05923156067728996, + "learning_rate": 1.4372554902275514e-09, + "loss": 0.0007, + "step": 309690 + }, + { + "epoch": 1.9863196005554253, + "grad_norm": 0.07710826396942139, + "learning_rate": 1.423867698518433e-09, + "loss": 0.0012, + "step": 309700 + }, + { + "epoch": 1.9863837374492115, + "grad_norm": 0.08025619387626648, + "learning_rate": 1.4105425425403607e-09, + "loss": 0.0008, + "step": 309710 + }, + { + "epoch": 1.9864478743429976, + "grad_norm": 0.007851624861359596, + "learning_rate": 1.3972800224609784e-09, + "loss": 0.0007, + "step": 309720 + }, + { + "epoch": 1.9865120112367838, + "grad_norm": 0.14910300076007843, + "learning_rate": 1.3840801384462643e-09, + "loss": 0.0006, + "step": 309730 + }, + { + "epoch": 1.98657614813057, + "grad_norm": 0.05662422627210617, + "learning_rate": 1.3709428906610868e-09, + "loss": 0.0011, + "step": 309740 + }, + { + "epoch": 1.986640285024356, + "grad_norm": 0.025845753028988838, + "learning_rate": 1.3578682792703136e-09, + "loss": 0.001, + "step": 309750 + }, + { + "epoch": 1.9867044219181422, + "grad_norm": 0.011817049235105515, + "learning_rate": 1.344856304438258e-09, + "loss": 0.0008, + "step": 309760 + }, + { + "epoch": 1.9867685588119282, + "grad_norm": 0.05935809388756752, + "learning_rate": 1.3319069663275674e-09, + "loss": 0.0006, + "step": 309770 + }, + { + "epoch": 1.9868326957057143, + "grad_norm": 0.06123574078083038, + "learning_rate": 1.3190202651008899e-09, + "loss": 0.0012, + "step": 309780 + }, + { + "epoch": 1.9868968325995002, + "grad_norm": 0.0641397163271904, + "learning_rate": 1.3061962009192075e-09, + "loss": 0.002, + "step": 309790 + }, + { + "epoch": 1.9869609694932864, + "grad_norm": 0.05723587051033974, + "learning_rate": 1.2934347739429476e-09, + "loss": 0.0002, + "step": 309800 + }, + { + "epoch": 1.9870251063870725, + "grad_norm": 0.004579669795930386, + "learning_rate": 1.2807359843330925e-09, + "loss": 0.0007, + "step": 309810 + }, + { + "epoch": 1.9870892432808587, + "grad_norm": 0.022011714056134224, + "learning_rate": 1.268099832247849e-09, + "loss": 0.0008, + "step": 309820 + }, + { + "epoch": 1.9871533801746448, + "grad_norm": 0.10788783431053162, + "learning_rate": 1.2555263178454235e-09, + "loss": 0.001, + "step": 309830 + }, + { + "epoch": 1.987217517068431, + "grad_norm": 0.061985645443201065, + "learning_rate": 1.2430154412845785e-09, + "loss": 0.002, + "step": 309840 + }, + { + "epoch": 1.9872816539622171, + "grad_norm": 0.04927527531981468, + "learning_rate": 1.2305672027207448e-09, + "loss": 0.0013, + "step": 309850 + }, + { + "epoch": 1.987345790856003, + "grad_norm": 0.21452468633651733, + "learning_rate": 1.2181816023110193e-09, + "loss": 0.0012, + "step": 309860 + }, + { + "epoch": 1.9874099277497892, + "grad_norm": 0.012229650281369686, + "learning_rate": 1.2058586402097227e-09, + "loss": 0.0018, + "step": 309870 + }, + { + "epoch": 1.9874740646435751, + "grad_norm": 0.026618018746376038, + "learning_rate": 1.1935983165711762e-09, + "loss": 0.0026, + "step": 309880 + }, + { + "epoch": 1.9875382015373613, + "grad_norm": 0.030633946880698204, + "learning_rate": 1.1814006315502558e-09, + "loss": 0.0005, + "step": 309890 + }, + { + "epoch": 1.9876023384311474, + "grad_norm": 0.005378763657063246, + "learning_rate": 1.1692655852985069e-09, + "loss": 0.0009, + "step": 309900 + }, + { + "epoch": 1.9876664753249336, + "grad_norm": 0.022457418963313103, + "learning_rate": 1.1571931779685852e-09, + "loss": 0.0021, + "step": 309910 + }, + { + "epoch": 1.9877306122187197, + "grad_norm": 0.024466663599014282, + "learning_rate": 1.1451834097114811e-09, + "loss": 0.0005, + "step": 309920 + }, + { + "epoch": 1.9877947491125059, + "grad_norm": 0.06743562966585159, + "learning_rate": 1.133236280678185e-09, + "loss": 0.0009, + "step": 309930 + }, + { + "epoch": 1.9878588860062918, + "grad_norm": 0.047757603228092194, + "learning_rate": 1.1213517910180217e-09, + "loss": 0.0009, + "step": 309940 + }, + { + "epoch": 1.987923022900078, + "grad_norm": 0.0661270022392273, + "learning_rate": 1.1095299408797611e-09, + "loss": 0.0021, + "step": 309950 + }, + { + "epoch": 1.9879871597938639, + "grad_norm": 0.011862175539135933, + "learning_rate": 1.097770730411618e-09, + "loss": 0.0007, + "step": 309960 + }, + { + "epoch": 1.98805129668765, + "grad_norm": 0.023457394912838936, + "learning_rate": 1.0860741597612524e-09, + "loss": 0.0005, + "step": 309970 + }, + { + "epoch": 1.9881154335814362, + "grad_norm": 0.06609214842319489, + "learning_rate": 1.0744402290752132e-09, + "loss": 0.0029, + "step": 309980 + }, + { + "epoch": 1.9881795704752223, + "grad_norm": 0.10954923182725906, + "learning_rate": 1.0628689384989399e-09, + "loss": 0.0006, + "step": 309990 + }, + { + "epoch": 1.9882437073690085, + "grad_norm": 0.09938246011734009, + "learning_rate": 1.0513602881773166e-09, + "loss": 0.0012, + "step": 310000 + }, + { + "epoch": 1.9883078442627946, + "grad_norm": 0.056122008711099625, + "learning_rate": 1.0399142782552273e-09, + "loss": 0.0007, + "step": 310010 + }, + { + "epoch": 1.9883719811565808, + "grad_norm": 0.06735320389270782, + "learning_rate": 1.028530908875891e-09, + "loss": 0.0004, + "step": 310020 + }, + { + "epoch": 1.9884361180503667, + "grad_norm": 0.03025159053504467, + "learning_rate": 1.017210180181416e-09, + "loss": 0.0007, + "step": 310030 + }, + { + "epoch": 1.9885002549441528, + "grad_norm": 0.26144954562187195, + "learning_rate": 1.005952092313911e-09, + "loss": 0.0016, + "step": 310040 + }, + { + "epoch": 1.9885643918379388, + "grad_norm": 0.05625905096530914, + "learning_rate": 9.947566454149293e-10, + "loss": 0.0017, + "step": 310050 + }, + { + "epoch": 1.988628528731725, + "grad_norm": 0.05733387544751167, + "learning_rate": 9.836238396243592e-10, + "loss": 0.0008, + "step": 310060 + }, + { + "epoch": 1.988692665625511, + "grad_norm": 0.08595871925354004, + "learning_rate": 9.725536750815334e-10, + "loss": 0.0017, + "step": 310070 + }, + { + "epoch": 1.9887568025192972, + "grad_norm": 0.019021539017558098, + "learning_rate": 9.6154615192523e-10, + "loss": 0.0003, + "step": 310080 + }, + { + "epoch": 1.9888209394130834, + "grad_norm": 0.08416017144918442, + "learning_rate": 9.50601270294227e-10, + "loss": 0.0018, + "step": 310090 + }, + { + "epoch": 1.9888850763068695, + "grad_norm": 0.04324590787291527, + "learning_rate": 9.397190303239712e-10, + "loss": 0.0005, + "step": 310100 + }, + { + "epoch": 1.9889492132006554, + "grad_norm": 0.03690469264984131, + "learning_rate": 9.288994321526857e-10, + "loss": 0.0008, + "step": 310110 + }, + { + "epoch": 1.9890133500944416, + "grad_norm": 0.014062805101275444, + "learning_rate": 9.181424759141522e-10, + "loss": 0.001, + "step": 310120 + }, + { + "epoch": 1.9890774869882277, + "grad_norm": 0.059747498482465744, + "learning_rate": 9.074481617449282e-10, + "loss": 0.0016, + "step": 310130 + }, + { + "epoch": 1.9891416238820137, + "grad_norm": 0.02084960974752903, + "learning_rate": 8.968164897776855e-10, + "loss": 0.0048, + "step": 310140 + }, + { + "epoch": 1.9892057607757998, + "grad_norm": 0.07610044628381729, + "learning_rate": 8.862474601462057e-10, + "loss": 0.0016, + "step": 310150 + }, + { + "epoch": 1.989269897669586, + "grad_norm": 0.21877041459083557, + "learning_rate": 8.757410729831606e-10, + "loss": 0.0024, + "step": 310160 + }, + { + "epoch": 1.989334034563372, + "grad_norm": 0.010066531598567963, + "learning_rate": 8.652973284195565e-10, + "loss": 0.001, + "step": 310170 + }, + { + "epoch": 1.9893981714571582, + "grad_norm": 0.04835638031363487, + "learning_rate": 8.549162265869548e-10, + "loss": 0.001, + "step": 310180 + }, + { + "epoch": 1.9894623083509444, + "grad_norm": 0.03072960674762726, + "learning_rate": 8.445977676146965e-10, + "loss": 0.0015, + "step": 310190 + }, + { + "epoch": 1.9895264452447303, + "grad_norm": 0.02436680905520916, + "learning_rate": 8.343419516326779e-10, + "loss": 0.001, + "step": 310200 + }, + { + "epoch": 1.9895905821385165, + "grad_norm": 0.028769398108124733, + "learning_rate": 8.241487787691293e-10, + "loss": 0.0008, + "step": 310210 + }, + { + "epoch": 1.9896547190323024, + "grad_norm": 0.01909705065190792, + "learning_rate": 8.140182491517268e-10, + "loss": 0.0013, + "step": 310220 + }, + { + "epoch": 1.9897188559260885, + "grad_norm": 0.025863518938422203, + "learning_rate": 8.039503629075907e-10, + "loss": 0.001, + "step": 310230 + }, + { + "epoch": 1.9897829928198747, + "grad_norm": 0.0008688007947057486, + "learning_rate": 7.939451201632864e-10, + "loss": 0.0016, + "step": 310240 + }, + { + "epoch": 1.9898471297136608, + "grad_norm": 0.5137998461723328, + "learning_rate": 7.84002521043159e-10, + "loss": 0.0014, + "step": 310250 + }, + { + "epoch": 1.989911266607447, + "grad_norm": 0.10145371407270432, + "learning_rate": 7.741225656726636e-10, + "loss": 0.0011, + "step": 310260 + }, + { + "epoch": 1.9899754035012331, + "grad_norm": 0.013956460170447826, + "learning_rate": 7.643052541750351e-10, + "loss": 0.0007, + "step": 310270 + }, + { + "epoch": 1.9900395403950193, + "grad_norm": 0.05402513965964317, + "learning_rate": 7.545505866740632e-10, + "loss": 0.0014, + "step": 310280 + }, + { + "epoch": 1.9901036772888052, + "grad_norm": 0.030353525653481483, + "learning_rate": 7.448585632913175e-10, + "loss": 0.001, + "step": 310290 + }, + { + "epoch": 1.9901678141825914, + "grad_norm": 0.10931088775396347, + "learning_rate": 7.352291841478121e-10, + "loss": 0.002, + "step": 310300 + }, + { + "epoch": 1.9902319510763773, + "grad_norm": 0.016313182190060616, + "learning_rate": 7.256624493656717e-10, + "loss": 0.001, + "step": 310310 + }, + { + "epoch": 1.9902960879701634, + "grad_norm": 0.12528997659683228, + "learning_rate": 7.161583590631349e-10, + "loss": 0.0013, + "step": 310320 + }, + { + "epoch": 1.9903602248639496, + "grad_norm": 0.002206750214099884, + "learning_rate": 7.06716913360661e-10, + "loss": 0.0009, + "step": 310330 + }, + { + "epoch": 1.9904243617577357, + "grad_norm": 0.08335158228874207, + "learning_rate": 6.973381123759338e-10, + "loss": 0.0019, + "step": 310340 + }, + { + "epoch": 1.9904884986515219, + "grad_norm": 0.11780422180891037, + "learning_rate": 6.880219562260815e-10, + "loss": 0.0005, + "step": 310350 + }, + { + "epoch": 1.990552635545308, + "grad_norm": 0.0865696370601654, + "learning_rate": 6.787684450282328e-10, + "loss": 0.0009, + "step": 310360 + }, + { + "epoch": 1.990616772439094, + "grad_norm": 0.022653456777334213, + "learning_rate": 6.695775788989611e-10, + "loss": 0.0017, + "step": 310370 + }, + { + "epoch": 1.99068090933288, + "grad_norm": 0.03010975569486618, + "learning_rate": 6.604493579526195e-10, + "loss": 0.0009, + "step": 310380 + }, + { + "epoch": 1.990745046226666, + "grad_norm": 0.07456130534410477, + "learning_rate": 6.513837823035607e-10, + "loss": 0.0007, + "step": 310390 + }, + { + "epoch": 1.9908091831204522, + "grad_norm": 0.3172712028026581, + "learning_rate": 6.423808520655828e-10, + "loss": 0.001, + "step": 310400 + }, + { + "epoch": 1.9908733200142383, + "grad_norm": 0.049600999802351, + "learning_rate": 6.334405673519284e-10, + "loss": 0.0012, + "step": 310410 + }, + { + "epoch": 1.9909374569080245, + "grad_norm": 0.03408721834421158, + "learning_rate": 6.24562928274175e-10, + "loss": 0.0014, + "step": 310420 + }, + { + "epoch": 1.9910015938018106, + "grad_norm": 0.13581745326519012, + "learning_rate": 6.157479349433448e-10, + "loss": 0.0005, + "step": 310430 + }, + { + "epoch": 1.9910657306955968, + "grad_norm": 0.030747568234801292, + "learning_rate": 6.069955874704603e-10, + "loss": 0.0005, + "step": 310440 + }, + { + "epoch": 1.991129867589383, + "grad_norm": 0.08988082408905029, + "learning_rate": 5.983058859648783e-10, + "loss": 0.0012, + "step": 310450 + }, + { + "epoch": 1.9911940044831689, + "grad_norm": 0.07408042997121811, + "learning_rate": 5.896788305359558e-10, + "loss": 0.0011, + "step": 310460 + }, + { + "epoch": 1.991258141376955, + "grad_norm": 0.08488886058330536, + "learning_rate": 5.811144212908293e-10, + "loss": 0.0017, + "step": 310470 + }, + { + "epoch": 1.991322278270741, + "grad_norm": 0.017424480989575386, + "learning_rate": 5.726126583377456e-10, + "loss": 0.0006, + "step": 310480 + }, + { + "epoch": 1.991386415164527, + "grad_norm": 0.004001277964562178, + "learning_rate": 5.641735417827309e-10, + "loss": 0.001, + "step": 310490 + }, + { + "epoch": 1.9914505520583132, + "grad_norm": 0.027292709797620773, + "learning_rate": 5.557970717312566e-10, + "loss": 0.0009, + "step": 310500 + }, + { + "epoch": 1.9915146889520994, + "grad_norm": 0.10521198809146881, + "learning_rate": 5.474832482893489e-10, + "loss": 0.0005, + "step": 310510 + }, + { + "epoch": 1.9915788258458855, + "grad_norm": 0.11118293553590775, + "learning_rate": 5.392320715602584e-10, + "loss": 0.0022, + "step": 310520 + }, + { + "epoch": 1.9916429627396717, + "grad_norm": 0.01775343529880047, + "learning_rate": 5.310435416472359e-10, + "loss": 0.0015, + "step": 310530 + }, + { + "epoch": 1.9917070996334576, + "grad_norm": 0.05329453945159912, + "learning_rate": 5.229176586540874e-10, + "loss": 0.0007, + "step": 310540 + }, + { + "epoch": 1.9917712365272437, + "grad_norm": 0.08876674622297287, + "learning_rate": 5.148544226812879e-10, + "loss": 0.0009, + "step": 310550 + }, + { + "epoch": 1.99183537342103, + "grad_norm": 0.055429551750421524, + "learning_rate": 5.06853833830423e-10, + "loss": 0.0008, + "step": 310560 + }, + { + "epoch": 1.9918995103148158, + "grad_norm": 0.12451827526092529, + "learning_rate": 4.989158922019677e-10, + "loss": 0.0009, + "step": 310570 + }, + { + "epoch": 1.991963647208602, + "grad_norm": 0.13710607588291168, + "learning_rate": 4.91040597895287e-10, + "loss": 0.0008, + "step": 310580 + }, + { + "epoch": 1.9920277841023881, + "grad_norm": 0.0039566149935126305, + "learning_rate": 4.832279510086358e-10, + "loss": 0.0018, + "step": 310590 + }, + { + "epoch": 1.9920919209961743, + "grad_norm": 0.11361566185951233, + "learning_rate": 4.754779516402685e-10, + "loss": 0.0011, + "step": 310600 + }, + { + "epoch": 1.9921560578899604, + "grad_norm": 0.3491664230823517, + "learning_rate": 4.677905998878852e-10, + "loss": 0.006, + "step": 310610 + }, + { + "epoch": 1.9922201947837466, + "grad_norm": 0.014886301942169666, + "learning_rate": 4.6016589584640945e-10, + "loss": 0.0007, + "step": 310620 + }, + { + "epoch": 1.9922843316775325, + "grad_norm": 0.09547198563814163, + "learning_rate": 4.5260383961243105e-10, + "loss": 0.001, + "step": 310630 + }, + { + "epoch": 1.9923484685713186, + "grad_norm": 0.07460545003414154, + "learning_rate": 4.451044312808739e-10, + "loss": 0.0011, + "step": 310640 + }, + { + "epoch": 1.9924126054651046, + "grad_norm": 0.1180550679564476, + "learning_rate": 4.3766767094499673e-10, + "loss": 0.001, + "step": 310650 + }, + { + "epoch": 1.9924767423588907, + "grad_norm": 0.09302978962659836, + "learning_rate": 4.3029355869805836e-10, + "loss": 0.0013, + "step": 310660 + }, + { + "epoch": 1.9925408792526769, + "grad_norm": 0.06479623913764954, + "learning_rate": 4.229820946327623e-10, + "loss": 0.0011, + "step": 310670 + }, + { + "epoch": 1.992605016146463, + "grad_norm": 0.027378110215067863, + "learning_rate": 4.1573327884070203e-10, + "loss": 0.0008, + "step": 310680 + }, + { + "epoch": 1.9926691530402492, + "grad_norm": 0.01163564994931221, + "learning_rate": 4.085471114129158e-10, + "loss": 0.0008, + "step": 310690 + }, + { + "epoch": 1.9927332899340353, + "grad_norm": 0.07593715935945511, + "learning_rate": 4.014235924387766e-10, + "loss": 0.0008, + "step": 310700 + }, + { + "epoch": 1.9927974268278215, + "grad_norm": 0.06961977481842041, + "learning_rate": 3.943627220082125e-10, + "loss": 0.0013, + "step": 310710 + }, + { + "epoch": 1.9928615637216074, + "grad_norm": 0.04441741853952408, + "learning_rate": 3.873645002094861e-10, + "loss": 0.0012, + "step": 310720 + }, + { + "epoch": 1.9929257006153935, + "grad_norm": 0.05210668221116066, + "learning_rate": 3.804289271297501e-10, + "loss": 0.001, + "step": 310730 + }, + { + "epoch": 1.9929898375091795, + "grad_norm": 0.05263422802090645, + "learning_rate": 3.73556002856712e-10, + "loss": 0.0011, + "step": 310740 + }, + { + "epoch": 1.9930539744029656, + "grad_norm": 0.1447191685438156, + "learning_rate": 3.6674572747641413e-10, + "loss": 0.001, + "step": 310750 + }, + { + "epoch": 1.9931181112967518, + "grad_norm": 0.06727171689271927, + "learning_rate": 3.599981010737885e-10, + "loss": 0.0014, + "step": 310760 + }, + { + "epoch": 1.993182248190538, + "grad_norm": 0.05539167672395706, + "learning_rate": 3.5331312373376723e-10, + "loss": 0.001, + "step": 310770 + }, + { + "epoch": 1.993246385084324, + "grad_norm": 0.058031290769577026, + "learning_rate": 3.4669079553961706e-10, + "loss": 0.0012, + "step": 310780 + }, + { + "epoch": 1.9933105219781102, + "grad_norm": 0.0378909595310688, + "learning_rate": 3.401311165751597e-10, + "loss": 0.001, + "step": 310790 + }, + { + "epoch": 1.9933746588718961, + "grad_norm": 0.09321986138820648, + "learning_rate": 3.3363408692144163e-10, + "loss": 0.0005, + "step": 310800 + }, + { + "epoch": 1.9934387957656823, + "grad_norm": 0.07231327146291733, + "learning_rate": 3.2719970666061915e-10, + "loss": 0.002, + "step": 310810 + }, + { + "epoch": 1.9935029326594682, + "grad_norm": 0.05760103464126587, + "learning_rate": 3.208279758737387e-10, + "loss": 0.0009, + "step": 310820 + }, + { + "epoch": 1.9935670695532544, + "grad_norm": 0.12361236661672592, + "learning_rate": 3.1451889463962605e-10, + "loss": 0.0018, + "step": 310830 + }, + { + "epoch": 1.9936312064470405, + "grad_norm": 0.006827784236520529, + "learning_rate": 3.082724630376621e-10, + "loss": 0.0014, + "step": 310840 + }, + { + "epoch": 1.9936953433408267, + "grad_norm": 0.2531537413597107, + "learning_rate": 3.0208868114667277e-10, + "loss": 0.0007, + "step": 310850 + }, + { + "epoch": 1.9937594802346128, + "grad_norm": 0.0022021911572664976, + "learning_rate": 2.959675490432634e-10, + "loss": 0.0005, + "step": 310860 + }, + { + "epoch": 1.993823617128399, + "grad_norm": 0.0008656713180243969, + "learning_rate": 2.8990906680514964e-10, + "loss": 0.001, + "step": 310870 + }, + { + "epoch": 1.993887754022185, + "grad_norm": 0.09890484809875488, + "learning_rate": 2.839132345072715e-10, + "loss": 0.0023, + "step": 310880 + }, + { + "epoch": 1.993951890915971, + "grad_norm": 0.10886561870574951, + "learning_rate": 2.7798005222567923e-10, + "loss": 0.0009, + "step": 310890 + }, + { + "epoch": 1.9940160278097572, + "grad_norm": 0.04242394119501114, + "learning_rate": 2.721095200336477e-10, + "loss": 0.001, + "step": 310900 + }, + { + "epoch": 1.994080164703543, + "grad_norm": 0.06635317206382751, + "learning_rate": 2.6630163800556163e-10, + "loss": 0.001, + "step": 310910 + }, + { + "epoch": 1.9941443015973292, + "grad_norm": 0.10035008192062378, + "learning_rate": 2.605564062141408e-10, + "loss": 0.0007, + "step": 310920 + }, + { + "epoch": 1.9942084384911154, + "grad_norm": 0.3456178307533264, + "learning_rate": 2.5487382473154965e-10, + "loss": 0.0026, + "step": 310930 + }, + { + "epoch": 1.9942725753849015, + "grad_norm": 0.025355815887451172, + "learning_rate": 2.4925389362828734e-10, + "loss": 0.0008, + "step": 310940 + }, + { + "epoch": 1.9943367122786877, + "grad_norm": 0.176799014210701, + "learning_rate": 2.43696612974853e-10, + "loss": 0.0015, + "step": 310950 + }, + { + "epoch": 1.9944008491724738, + "grad_norm": 0.07181256264448166, + "learning_rate": 2.3820198284119076e-10, + "loss": 0.0006, + "step": 310960 + }, + { + "epoch": 1.9944649860662598, + "grad_norm": 0.033693600445985794, + "learning_rate": 2.3277000329668952e-10, + "loss": 0.0005, + "step": 310970 + }, + { + "epoch": 1.994529122960046, + "grad_norm": 0.07449740916490555, + "learning_rate": 2.2740067440851778e-10, + "loss": 0.0016, + "step": 310980 + }, + { + "epoch": 1.994593259853832, + "grad_norm": 0.014070970937609673, + "learning_rate": 2.2209399624439908e-10, + "loss": 0.001, + "step": 310990 + }, + { + "epoch": 1.994657396747618, + "grad_norm": 0.07527777552604675, + "learning_rate": 2.168499688703918e-10, + "loss": 0.0009, + "step": 311000 + }, + { + "epoch": 1.9947215336414041, + "grad_norm": 0.060144905000925064, + "learning_rate": 2.1166859235255412e-10, + "loss": 0.0017, + "step": 311010 + }, + { + "epoch": 1.9947856705351903, + "grad_norm": 0.06478618830442429, + "learning_rate": 2.0654986675638922e-10, + "loss": 0.0005, + "step": 311020 + }, + { + "epoch": 1.9948498074289764, + "grad_norm": 0.114219069480896, + "learning_rate": 2.0149379214517982e-10, + "loss": 0.0014, + "step": 311030 + }, + { + "epoch": 1.9949139443227626, + "grad_norm": 0.05162247642874718, + "learning_rate": 1.9650036858220867e-10, + "loss": 0.0024, + "step": 311040 + }, + { + "epoch": 1.9949780812165487, + "grad_norm": 0.2268955409526825, + "learning_rate": 1.9156959613075843e-10, + "loss": 0.0011, + "step": 311050 + }, + { + "epoch": 1.9950422181103347, + "grad_norm": 0.093012735247612, + "learning_rate": 1.867014748518914e-10, + "loss": 0.0007, + "step": 311060 + }, + { + "epoch": 1.9951063550041208, + "grad_norm": 0.06731496751308441, + "learning_rate": 1.8189600480778003e-10, + "loss": 0.0008, + "step": 311070 + }, + { + "epoch": 1.9951704918979067, + "grad_norm": 0.1012783870100975, + "learning_rate": 1.7715318605726617e-10, + "loss": 0.001, + "step": 311080 + }, + { + "epoch": 1.9952346287916929, + "grad_norm": 0.07619098573923111, + "learning_rate": 1.7247301866030186e-10, + "loss": 0.0012, + "step": 311090 + }, + { + "epoch": 1.995298765685479, + "grad_norm": 0.09405265748500824, + "learning_rate": 1.678555026751738e-10, + "loss": 0.0008, + "step": 311100 + }, + { + "epoch": 1.9953629025792652, + "grad_norm": 0.07160182297229767, + "learning_rate": 1.6330063816072384e-10, + "loss": 0.0008, + "step": 311110 + }, + { + "epoch": 1.9954270394730513, + "grad_norm": 0.12579509615898132, + "learning_rate": 1.588084251735733e-10, + "loss": 0.0009, + "step": 311120 + }, + { + "epoch": 1.9954911763668375, + "grad_norm": 0.00943692959845066, + "learning_rate": 1.543788637692334e-10, + "loss": 0.0008, + "step": 311130 + }, + { + "epoch": 1.9955553132606236, + "grad_norm": 0.05778767913579941, + "learning_rate": 1.5001195400432544e-10, + "loss": 0.001, + "step": 311140 + }, + { + "epoch": 1.9956194501544096, + "grad_norm": 0.019559109583497047, + "learning_rate": 1.457076959332504e-10, + "loss": 0.0007, + "step": 311150 + }, + { + "epoch": 1.9956835870481957, + "grad_norm": 0.0378846637904644, + "learning_rate": 1.414660896098541e-10, + "loss": 0.0016, + "step": 311160 + }, + { + "epoch": 1.9957477239419816, + "grad_norm": 0.03069428727030754, + "learning_rate": 1.372871350868721e-10, + "loss": 0.0038, + "step": 311170 + }, + { + "epoch": 1.9958118608357678, + "grad_norm": 0.05695541203022003, + "learning_rate": 1.3317083241704e-10, + "loss": 0.0006, + "step": 311180 + }, + { + "epoch": 1.995875997729554, + "grad_norm": 0.08141503483057022, + "learning_rate": 1.2911718165198317e-10, + "loss": 0.0007, + "step": 311190 + }, + { + "epoch": 1.99594013462334, + "grad_norm": 0.08701863139867783, + "learning_rate": 1.2512618284221678e-10, + "loss": 0.002, + "step": 311200 + }, + { + "epoch": 1.9960042715171262, + "grad_norm": 0.02023044228553772, + "learning_rate": 1.2119783603825596e-10, + "loss": 0.0008, + "step": 311210 + }, + { + "epoch": 1.9960684084109124, + "grad_norm": 0.11604054272174835, + "learning_rate": 1.1733214128950565e-10, + "loss": 0.0011, + "step": 311220 + }, + { + "epoch": 1.9961325453046983, + "grad_norm": 0.028305407613515854, + "learning_rate": 1.1352909864315031e-10, + "loss": 0.0007, + "step": 311230 + }, + { + "epoch": 1.9961966821984845, + "grad_norm": 0.04502418637275696, + "learning_rate": 1.0978870814803977e-10, + "loss": 0.0007, + "step": 311240 + }, + { + "epoch": 1.9962608190922704, + "grad_norm": 0.002515362109988928, + "learning_rate": 1.0611096985024827e-10, + "loss": 0.0008, + "step": 311250 + }, + { + "epoch": 1.9963249559860565, + "grad_norm": 0.0246027410030365, + "learning_rate": 1.024958837969603e-10, + "loss": 0.0009, + "step": 311260 + }, + { + "epoch": 1.9963890928798427, + "grad_norm": 0.005193647928535938, + "learning_rate": 9.894345003202966e-11, + "loss": 0.0003, + "step": 311270 + }, + { + "epoch": 1.9964532297736288, + "grad_norm": 0.06985964626073837, + "learning_rate": 9.54536686015306e-11, + "loss": 0.0005, + "step": 311280 + }, + { + "epoch": 1.996517366667415, + "grad_norm": 0.06013587862253189, + "learning_rate": 9.202653954765161e-11, + "loss": 0.0011, + "step": 311290 + }, + { + "epoch": 1.9965815035612011, + "grad_norm": 0.04733504727482796, + "learning_rate": 8.86620629142465e-11, + "loss": 0.0007, + "step": 311300 + }, + { + "epoch": 1.9966456404549873, + "grad_norm": 0.04047602415084839, + "learning_rate": 8.536023874350374e-11, + "loss": 0.0008, + "step": 311310 + }, + { + "epoch": 1.9967097773487732, + "grad_norm": 0.03270931541919708, + "learning_rate": 8.212106707650158e-11, + "loss": 0.0005, + "step": 311320 + }, + { + "epoch": 1.9967739142425593, + "grad_norm": 0.01887642592191696, + "learning_rate": 7.894454795376317e-11, + "loss": 0.0009, + "step": 311330 + }, + { + "epoch": 1.9968380511363453, + "grad_norm": 0.3225075602531433, + "learning_rate": 7.583068141525652e-11, + "loss": 0.0013, + "step": 311340 + }, + { + "epoch": 1.9969021880301314, + "grad_norm": 0.011128585785627365, + "learning_rate": 7.277946749983944e-11, + "loss": 0.001, + "step": 311350 + }, + { + "epoch": 1.9969663249239176, + "grad_norm": 0.03264245018362999, + "learning_rate": 6.979090624636975e-11, + "loss": 0.0008, + "step": 311360 + }, + { + "epoch": 1.9970304618177037, + "grad_norm": 0.0062835942953825, + "learning_rate": 6.68649976914848e-11, + "loss": 0.0009, + "step": 311370 + }, + { + "epoch": 1.9970945987114899, + "grad_norm": 0.006522227544337511, + "learning_rate": 6.400174187237707e-11, + "loss": 0.0025, + "step": 311380 + }, + { + "epoch": 1.997158735605276, + "grad_norm": 0.06396842002868652, + "learning_rate": 6.120113882457368e-11, + "loss": 0.0007, + "step": 311390 + }, + { + "epoch": 1.9972228724990622, + "grad_norm": 0.08676396310329437, + "learning_rate": 5.846318858304667e-11, + "loss": 0.0018, + "step": 311400 + }, + { + "epoch": 1.997287009392848, + "grad_norm": 0.1269548535346985, + "learning_rate": 5.578789118276806e-11, + "loss": 0.0014, + "step": 311410 + }, + { + "epoch": 1.9973511462866342, + "grad_norm": 0.0786188468337059, + "learning_rate": 5.3175246657044546e-11, + "loss": 0.0012, + "step": 311420 + }, + { + "epoch": 1.9974152831804202, + "grad_norm": 0.11513978242874146, + "learning_rate": 5.062525503807258e-11, + "loss": 0.0017, + "step": 311430 + }, + { + "epoch": 1.9974794200742063, + "grad_norm": 0.0169609896838665, + "learning_rate": 4.813791635804865e-11, + "loss": 0.0009, + "step": 311440 + }, + { + "epoch": 1.9975435569679925, + "grad_norm": 0.09304054081439972, + "learning_rate": 4.57132306486141e-11, + "loss": 0.0023, + "step": 311450 + }, + { + "epoch": 1.9976076938617786, + "grad_norm": 0.007657730020582676, + "learning_rate": 4.335119793974496e-11, + "loss": 0.0009, + "step": 311460 + }, + { + "epoch": 1.9976718307555648, + "grad_norm": 0.04164130985736847, + "learning_rate": 4.105181826086213e-11, + "loss": 0.0014, + "step": 311470 + }, + { + "epoch": 1.997735967649351, + "grad_norm": 0.07427259534597397, + "learning_rate": 3.8815091641386526e-11, + "loss": 0.001, + "step": 311480 + }, + { + "epoch": 1.9978001045431368, + "grad_norm": 0.010587507858872414, + "learning_rate": 3.664101810851861e-11, + "loss": 0.0012, + "step": 311490 + }, + { + "epoch": 1.997864241436923, + "grad_norm": 0.0805448368191719, + "learning_rate": 3.4529597690569074e-11, + "loss": 0.0012, + "step": 311500 + }, + { + "epoch": 1.997928378330709, + "grad_norm": 0.11400971561670303, + "learning_rate": 3.248083041307304e-11, + "loss": 0.0014, + "step": 311510 + }, + { + "epoch": 1.997992515224495, + "grad_norm": 0.04872676730155945, + "learning_rate": 3.049471630212075e-11, + "loss": 0.0006, + "step": 311520 + }, + { + "epoch": 1.9980566521182812, + "grad_norm": 0.024912770837545395, + "learning_rate": 2.857125538213712e-11, + "loss": 0.0023, + "step": 311530 + }, + { + "epoch": 1.9981207890120674, + "grad_norm": 0.07626421749591827, + "learning_rate": 2.6710447677547046e-11, + "loss": 0.0008, + "step": 311540 + }, + { + "epoch": 1.9981849259058535, + "grad_norm": 0.04140548035502434, + "learning_rate": 2.491229321222033e-11, + "loss": 0.0007, + "step": 311550 + }, + { + "epoch": 1.9982490627996397, + "grad_norm": 0.036363955587148666, + "learning_rate": 2.3176792007806314e-11, + "loss": 0.0006, + "step": 311560 + }, + { + "epoch": 1.9983131996934258, + "grad_norm": 0.04990491643548012, + "learning_rate": 2.1503944086509465e-11, + "loss": 0.0006, + "step": 311570 + }, + { + "epoch": 1.9983773365872117, + "grad_norm": 0.07418892532587051, + "learning_rate": 1.9893749469424017e-11, + "loss": 0.0062, + "step": 311580 + }, + { + "epoch": 1.9984414734809979, + "grad_norm": 0.001999436877667904, + "learning_rate": 1.8346208175978874e-11, + "loss": 0.0016, + "step": 311590 + }, + { + "epoch": 1.9985056103747838, + "grad_norm": 0.09791887551546097, + "learning_rate": 1.686132022671316e-11, + "loss": 0.0015, + "step": 311600 + }, + { + "epoch": 1.99856974726857, + "grad_norm": 0.09272914379835129, + "learning_rate": 1.543908563939045e-11, + "loss": 0.0011, + "step": 311610 + }, + { + "epoch": 1.998633884162356, + "grad_norm": 0.05816177278757095, + "learning_rate": 1.4079504431774304e-11, + "loss": 0.0008, + "step": 311620 + }, + { + "epoch": 1.9986980210561422, + "grad_norm": 0.032101716846227646, + "learning_rate": 1.2782576621073183e-11, + "loss": 0.0012, + "step": 311630 + }, + { + "epoch": 1.9987621579499284, + "grad_norm": 0.011775681748986244, + "learning_rate": 1.1548302223940433e-11, + "loss": 0.0019, + "step": 311640 + }, + { + "epoch": 1.9988262948437145, + "grad_norm": 0.060431741178035736, + "learning_rate": 1.0376681255919174e-11, + "loss": 0.0008, + "step": 311650 + }, + { + "epoch": 1.9988904317375005, + "grad_norm": 0.07768959552049637, + "learning_rate": 9.267713730887196e-12, + "loss": 0.0008, + "step": 311660 + }, + { + "epoch": 1.9989545686312866, + "grad_norm": 0.09777266532182693, + "learning_rate": 8.221399663277396e-12, + "loss": 0.0029, + "step": 311670 + }, + { + "epoch": 1.9990187055250728, + "grad_norm": 0.04257415980100632, + "learning_rate": 7.237739065857341e-12, + "loss": 0.0011, + "step": 311680 + }, + { + "epoch": 1.9990828424188587, + "grad_norm": 0.026949284598231316, + "learning_rate": 6.316731951394594e-12, + "loss": 0.0012, + "step": 311690 + }, + { + "epoch": 1.9991469793126448, + "grad_norm": 0.004648712929338217, + "learning_rate": 5.458378330991387e-12, + "loss": 0.0007, + "step": 311700 + }, + { + "epoch": 1.999211116206431, + "grad_norm": 0.025176560506224632, + "learning_rate": 4.662678216305061e-12, + "loss": 0.0011, + "step": 311710 + }, + { + "epoch": 1.9992752531002171, + "grad_norm": 0.06456632167100906, + "learning_rate": 3.929631615662288e-12, + "loss": 0.0025, + "step": 311720 + }, + { + "epoch": 1.9993393899940033, + "grad_norm": 0.030822809785604477, + "learning_rate": 3.2592385401652992e-12, + "loss": 0.0008, + "step": 311730 + }, + { + "epoch": 1.9994035268877894, + "grad_norm": 0.0362224243581295, + "learning_rate": 2.6514989964754323e-12, + "loss": 0.0013, + "step": 311740 + }, + { + "epoch": 1.9994676637815754, + "grad_norm": 0.02068529650568962, + "learning_rate": 2.1064129934744713e-12, + "loss": 0.0014, + "step": 311750 + }, + { + "epoch": 1.9995318006753615, + "grad_norm": 0.1268768012523651, + "learning_rate": 1.6239805372686435e-12, + "loss": 0.0009, + "step": 311760 + }, + { + "epoch": 1.9995959375691474, + "grad_norm": 0.04929465427994728, + "learning_rate": 1.2042016345192864e-12, + "loss": 0.0022, + "step": 311770 + }, + { + "epoch": 1.9996600744629336, + "grad_norm": 0.0022703553549945354, + "learning_rate": 8.470762902224039e-13, + "loss": 0.0011, + "step": 311780 + }, + { + "epoch": 1.9997242113567197, + "grad_norm": 0.09381227195262909, + "learning_rate": 5.526045082637765e-13, + "loss": 0.0014, + "step": 311790 + }, + { + "epoch": 1.9997883482505059, + "grad_norm": 0.06136627495288849, + "learning_rate": 3.2078629308429643e-13, + "loss": 0.0008, + "step": 311800 + }, + { + "epoch": 1.999852485144292, + "grad_norm": 0.06326216459274292, + "learning_rate": 1.5162164690440962e-13, + "loss": 0.0007, + "step": 311810 + }, + { + "epoch": 1.9999166220380782, + "grad_norm": 0.06475763022899628, + "learning_rate": 4.5110573054785166e-14, + "loss": 0.0007, + "step": 311820 + }, + { + "epoch": 1.9999807589318643, + "grad_norm": 0.20405817031860352, + "learning_rate": 1.2530715354230894e-15, + "loss": 0.0024, + "step": 311830 + }, + { + "epoch": 1.9999935863106213, + "step": 311832, + "total_flos": 1.0432719577686213e+19, + "train_loss": 0.004895635656955134, + "train_runtime": 187864.5289, + "train_samples_per_second": 13.279, + "train_steps_per_second": 1.66 + } + ], + "logging_steps": 10, + "max_steps": 311832, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 62000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.0432719577686213e+19, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}