{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 500, "global_step": 34236, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 2.1784449710473837, "learning_rate": 4.999999736861907e-05, "loss": 1.3118, "step": 5 }, { "epoch": 0.0, "grad_norm": 1.4897392967760945, "learning_rate": 4.999998947447681e-05, "loss": 1.2024, "step": 10 }, { "epoch": 0.0, "grad_norm": 1.5311854018449251, "learning_rate": 4.9999976317574895e-05, "loss": 1.1376, "step": 15 }, { "epoch": 0.0, "grad_norm": 1.3982158801348301, "learning_rate": 4.999995789791609e-05, "loss": 1.074, "step": 20 }, { "epoch": 0.0, "grad_norm": 1.3336193278462263, "learning_rate": 4.9999934215504286e-05, "loss": 1.0115, "step": 25 }, { "epoch": 0.0, "grad_norm": 1.3752137315104604, "learning_rate": 4.999990527034445e-05, "loss": 1.0576, "step": 30 }, { "epoch": 0.0, "grad_norm": 1.4094207690165101, "learning_rate": 4.999987106244268e-05, "loss": 1.1654, "step": 35 }, { "epoch": 0.0, "grad_norm": 1.5199782713779533, "learning_rate": 4.999983159180619e-05, "loss": 0.9992, "step": 40 }, { "epoch": 0.01, "grad_norm": 1.4566344917839809, "learning_rate": 4.999978685844327e-05, "loss": 1.074, "step": 45 }, { "epoch": 0.01, "grad_norm": 1.564520831809233, "learning_rate": 4.999973686236335e-05, "loss": 1.021, "step": 50 }, { "epoch": 0.01, "grad_norm": 1.468453331106892, "learning_rate": 4.9999681603576945e-05, "loss": 1.0803, "step": 55 }, { "epoch": 0.01, "grad_norm": 1.2428443229463324, "learning_rate": 4.999962108209569e-05, "loss": 1.0231, "step": 60 }, { "epoch": 0.01, "grad_norm": 1.23791908522262, "learning_rate": 4.9999555297932345e-05, "loss": 0.9771, "step": 65 }, { "epoch": 0.01, "grad_norm": 1.2773848935764596, "learning_rate": 4.9999484251100726e-05, "loss": 1.1095, "step": 70 }, { "epoch": 0.01, "grad_norm": 1.3782292477996736, "learning_rate": 4.999940794161581e-05, "loss": 1.0616, "step": 75 }, { "epoch": 0.01, "grad_norm": 1.2952820131263536, "learning_rate": 4.999932636949365e-05, "loss": 1.0632, "step": 80 }, { "epoch": 0.01, "grad_norm": 1.4248329224299214, "learning_rate": 4.999923953475143e-05, "loss": 1.0455, "step": 85 }, { "epoch": 0.01, "grad_norm": 1.2220927446888543, "learning_rate": 4.999914743740742e-05, "loss": 1.0553, "step": 90 }, { "epoch": 0.01, "grad_norm": 1.2941886202299266, "learning_rate": 4.9999050077481014e-05, "loss": 1.0822, "step": 95 }, { "epoch": 0.01, "grad_norm": 1.3552724672192507, "learning_rate": 4.99989474549927e-05, "loss": 0.9869, "step": 100 }, { "epoch": 0.01, "grad_norm": 1.2325989312913683, "learning_rate": 4.9998839569964086e-05, "loss": 0.9796, "step": 105 }, { "epoch": 0.01, "grad_norm": 1.4295829729154879, "learning_rate": 4.999872642241788e-05, "loss": 1.0544, "step": 110 }, { "epoch": 0.01, "grad_norm": 1.1865757592832702, "learning_rate": 4.9998608012377904e-05, "loss": 1.0361, "step": 115 }, { "epoch": 0.01, "grad_norm": 1.2022374665421445, "learning_rate": 4.9998484339869085e-05, "loss": 1.0009, "step": 120 }, { "epoch": 0.01, "grad_norm": 1.2840172912471361, "learning_rate": 4.999835540491745e-05, "loss": 0.9948, "step": 125 }, { "epoch": 0.02, "grad_norm": 1.3908185779332902, "learning_rate": 4.999822120755015e-05, "loss": 1.031, "step": 130 }, { "epoch": 0.02, "grad_norm": 1.1783544616928932, "learning_rate": 4.9998081747795434e-05, "loss": 1.0133, "step": 135 }, { "epoch": 0.02, "grad_norm": 1.1582571514646922, "learning_rate": 4.999793702568265e-05, "loss": 0.9739, "step": 140 }, { "epoch": 0.02, "grad_norm": 1.230808365640957, "learning_rate": 4.999778704124228e-05, "loss": 1.0328, "step": 145 }, { "epoch": 0.02, "grad_norm": 1.2306674242430162, "learning_rate": 4.999763179450588e-05, "loss": 1.0518, "step": 150 }, { "epoch": 0.02, "grad_norm": 1.089913942169356, "learning_rate": 4.999747128550615e-05, "loss": 0.9716, "step": 155 }, { "epoch": 0.02, "grad_norm": 1.2127600303253867, "learning_rate": 4.999730551427686e-05, "loss": 1.0216, "step": 160 }, { "epoch": 0.02, "grad_norm": 1.2223076887165993, "learning_rate": 4.9997134480852916e-05, "loss": 1.0557, "step": 165 }, { "epoch": 0.02, "grad_norm": 1.1650424198208296, "learning_rate": 4.999695818527032e-05, "loss": 1.0516, "step": 170 }, { "epoch": 0.02, "grad_norm": 1.9089784717853382, "learning_rate": 4.999677662756618e-05, "loss": 1.0407, "step": 175 }, { "epoch": 0.02, "grad_norm": 1.1857033366836702, "learning_rate": 4.999658980777873e-05, "loss": 1.026, "step": 180 }, { "epoch": 0.02, "grad_norm": 1.1175077900584947, "learning_rate": 4.9996397725947276e-05, "loss": 1.0058, "step": 185 }, { "epoch": 0.02, "grad_norm": 1.2839942707097831, "learning_rate": 4.999620038211227e-05, "loss": 1.0985, "step": 190 }, { "epoch": 0.02, "grad_norm": 1.198310511273475, "learning_rate": 4.9995997776315256e-05, "loss": 1.0306, "step": 195 }, { "epoch": 0.02, "grad_norm": 1.2046663657921814, "learning_rate": 4.9995789908598876e-05, "loss": 1.0083, "step": 200 }, { "epoch": 0.02, "grad_norm": 1.0217207631158867, "learning_rate": 4.999557677900689e-05, "loss": 1.0148, "step": 205 }, { "epoch": 0.02, "grad_norm": 1.186093973343761, "learning_rate": 4.999535838758417e-05, "loss": 1.0204, "step": 210 }, { "epoch": 0.03, "grad_norm": 1.1090086047402792, "learning_rate": 4.999513473437668e-05, "loss": 1.0237, "step": 215 }, { "epoch": 0.03, "grad_norm": 1.1141811398253623, "learning_rate": 4.9994905819431516e-05, "loss": 1.0204, "step": 220 }, { "epoch": 0.03, "grad_norm": 1.3182671403308446, "learning_rate": 4.999467164279684e-05, "loss": 1.0291, "step": 225 }, { "epoch": 0.03, "grad_norm": 1.386651938972161, "learning_rate": 4.999443220452198e-05, "loss": 1.0474, "step": 230 }, { "epoch": 0.03, "grad_norm": 1.846959544880204, "learning_rate": 4.999418750465732e-05, "loss": 0.9256, "step": 235 }, { "epoch": 0.03, "grad_norm": 1.4501773981803823, "learning_rate": 4.999393754325438e-05, "loss": 0.9958, "step": 240 }, { "epoch": 0.03, "grad_norm": 1.1216837830840287, "learning_rate": 4.9993682320365783e-05, "loss": 0.9963, "step": 245 }, { "epoch": 0.03, "grad_norm": 1.072695041562247, "learning_rate": 4.999342183604525e-05, "loss": 0.9837, "step": 250 }, { "epoch": 0.03, "grad_norm": 1.2854505754455945, "learning_rate": 4.9993156090347604e-05, "loss": 0.9735, "step": 255 }, { "epoch": 0.03, "grad_norm": 1.1201609223612168, "learning_rate": 4.999288508332881e-05, "loss": 1.0047, "step": 260 }, { "epoch": 0.03, "grad_norm": 1.1067023825942017, "learning_rate": 4.99926088150459e-05, "loss": 1.0462, "step": 265 }, { "epoch": 0.03, "grad_norm": 1.488134858528184, "learning_rate": 4.999232728555705e-05, "loss": 0.9916, "step": 270 }, { "epoch": 0.03, "grad_norm": 1.1046296845118166, "learning_rate": 4.999204049492151e-05, "loss": 0.9228, "step": 275 }, { "epoch": 0.03, "grad_norm": 1.1179757334585674, "learning_rate": 4.999174844319965e-05, "loss": 1.0438, "step": 280 }, { "epoch": 0.03, "grad_norm": 1.2583407669600082, "learning_rate": 4.9991451130452964e-05, "loss": 0.9587, "step": 285 }, { "epoch": 0.03, "grad_norm": 1.130825505538917, "learning_rate": 4.999114855674403e-05, "loss": 0.9418, "step": 290 }, { "epoch": 0.03, "grad_norm": 1.1276805384744066, "learning_rate": 4.999084072213654e-05, "loss": 0.9831, "step": 295 }, { "epoch": 0.04, "grad_norm": 1.0895304959445886, "learning_rate": 4.9990527626695306e-05, "loss": 0.9919, "step": 300 }, { "epoch": 0.04, "grad_norm": 1.3340398201279144, "learning_rate": 4.9990209270486235e-05, "loss": 1.0481, "step": 305 }, { "epoch": 0.04, "grad_norm": 1.2873488720246915, "learning_rate": 4.998988565357633e-05, "loss": 0.9639, "step": 310 }, { "epoch": 0.04, "grad_norm": 1.170230482992979, "learning_rate": 4.9989556776033744e-05, "loss": 1.0525, "step": 315 }, { "epoch": 0.04, "grad_norm": 1.2297751507027324, "learning_rate": 4.998922263792768e-05, "loss": 1.0187, "step": 320 }, { "epoch": 0.04, "grad_norm": 1.2801466011614338, "learning_rate": 4.99888832393285e-05, "loss": 0.9391, "step": 325 }, { "epoch": 0.04, "grad_norm": 1.2169368970409447, "learning_rate": 4.9988538580307634e-05, "loss": 1.0129, "step": 330 }, { "epoch": 0.04, "grad_norm": 1.0959208225420087, "learning_rate": 4.9988188660937644e-05, "loss": 1.0192, "step": 335 }, { "epoch": 0.04, "grad_norm": 1.2706316396214556, "learning_rate": 4.99878334812922e-05, "loss": 0.9624, "step": 340 }, { "epoch": 0.04, "grad_norm": 1.1380901250678794, "learning_rate": 4.9987473041446056e-05, "loss": 1.0094, "step": 345 }, { "epoch": 0.04, "grad_norm": 1.172421811889701, "learning_rate": 4.998710734147509e-05, "loss": 1.0305, "step": 350 }, { "epoch": 0.04, "grad_norm": 1.0737481983310084, "learning_rate": 4.99867363814563e-05, "loss": 0.9378, "step": 355 }, { "epoch": 0.04, "grad_norm": 1.2035643705297934, "learning_rate": 4.998636016146777e-05, "loss": 1.0645, "step": 360 }, { "epoch": 0.04, "grad_norm": 1.1937551163588551, "learning_rate": 4.998597868158869e-05, "loss": 0.9719, "step": 365 }, { "epoch": 0.04, "grad_norm": 1.1826140175264577, "learning_rate": 4.998559194189937e-05, "loss": 1.0146, "step": 370 }, { "epoch": 0.04, "grad_norm": 1.1042650143382684, "learning_rate": 4.998519994248123e-05, "loss": 1.0107, "step": 375 }, { "epoch": 0.04, "grad_norm": 1.2029194221209942, "learning_rate": 4.998480268341678e-05, "loss": 0.9828, "step": 380 }, { "epoch": 0.04, "grad_norm": 1.1072587809035566, "learning_rate": 4.9984400164789645e-05, "loss": 1.236, "step": 385 }, { "epoch": 0.05, "grad_norm": 1.0953844697196076, "learning_rate": 4.998399238668457e-05, "loss": 0.9292, "step": 390 }, { "epoch": 0.05, "grad_norm": 1.1716065545175716, "learning_rate": 4.99835793491874e-05, "loss": 1.0484, "step": 395 }, { "epoch": 0.05, "grad_norm": 1.0359817423985835, "learning_rate": 4.998316105238507e-05, "loss": 1.0815, "step": 400 }, { "epoch": 0.05, "grad_norm": 1.1579234526074056, "learning_rate": 4.998273749636564e-05, "loss": 1.0899, "step": 405 }, { "epoch": 0.05, "grad_norm": 1.2668886554383545, "learning_rate": 4.998230868121828e-05, "loss": 1.0541, "step": 410 }, { "epoch": 0.05, "grad_norm": 1.222528950010076, "learning_rate": 4.9981874607033255e-05, "loss": 1.0427, "step": 415 }, { "epoch": 0.05, "grad_norm": 1.048250239183132, "learning_rate": 4.998143527390194e-05, "loss": 0.9596, "step": 420 }, { "epoch": 0.05, "grad_norm": 1.120426201376995, "learning_rate": 4.998099068191682e-05, "loss": 0.9472, "step": 425 }, { "epoch": 0.05, "grad_norm": 1.0439744398964554, "learning_rate": 4.9980540831171486e-05, "loss": 1.0614, "step": 430 }, { "epoch": 0.05, "grad_norm": 1.1171357434251363, "learning_rate": 4.998008572176064e-05, "loss": 0.9669, "step": 435 }, { "epoch": 0.05, "grad_norm": 1.126080016712785, "learning_rate": 4.997962535378009e-05, "loss": 1.0044, "step": 440 }, { "epoch": 0.05, "grad_norm": 1.1291910970101615, "learning_rate": 4.997915972732674e-05, "loss": 1.0066, "step": 445 }, { "epoch": 0.05, "grad_norm": 1.0485393220686727, "learning_rate": 4.9978688842498614e-05, "loss": 1.0298, "step": 450 }, { "epoch": 0.05, "grad_norm": 1.1159726165725172, "learning_rate": 4.9978212699394835e-05, "loss": 0.9646, "step": 455 }, { "epoch": 0.05, "grad_norm": 1.1801373513766116, "learning_rate": 4.997773129811564e-05, "loss": 1.0419, "step": 460 }, { "epoch": 0.05, "grad_norm": 1.328022904217585, "learning_rate": 4.997724463876235e-05, "loss": 0.995, "step": 465 }, { "epoch": 0.05, "grad_norm": 1.553359191538908, "learning_rate": 4.997675272143745e-05, "loss": 1.0236, "step": 470 }, { "epoch": 0.06, "grad_norm": 1.0669585138225524, "learning_rate": 4.997625554624447e-05, "loss": 0.9219, "step": 475 }, { "epoch": 0.06, "grad_norm": 1.2514148117721813, "learning_rate": 4.997575311328807e-05, "loss": 1.0618, "step": 480 }, { "epoch": 0.06, "grad_norm": 1.0413844004800614, "learning_rate": 4.997524542267402e-05, "loss": 1.0033, "step": 485 }, { "epoch": 0.06, "grad_norm": 1.360363412276393, "learning_rate": 4.9974732474509194e-05, "loss": 1.0419, "step": 490 }, { "epoch": 0.06, "grad_norm": 1.1644390899221422, "learning_rate": 4.997421426890158e-05, "loss": 1.0206, "step": 495 }, { "epoch": 0.06, "grad_norm": 1.1011003708276013, "learning_rate": 4.997369080596025e-05, "loss": 0.9966, "step": 500 }, { "epoch": 0.06, "grad_norm": 1.18785372402119, "learning_rate": 4.997316208579541e-05, "loss": 1.043, "step": 505 }, { "epoch": 0.06, "grad_norm": 1.1598755032261203, "learning_rate": 4.9972628108518364e-05, "loss": 0.9685, "step": 510 }, { "epoch": 0.06, "grad_norm": 1.3077285598491475, "learning_rate": 4.997208887424152e-05, "loss": 0.9949, "step": 515 }, { "epoch": 0.06, "grad_norm": 1.1489137561146099, "learning_rate": 4.997154438307837e-05, "loss": 1.0071, "step": 520 }, { "epoch": 0.06, "grad_norm": 1.0181246713287804, "learning_rate": 4.9970994635143566e-05, "loss": 1.113, "step": 525 }, { "epoch": 0.06, "grad_norm": 1.2011072033512882, "learning_rate": 4.9970439630552826e-05, "loss": 1.0618, "step": 530 }, { "epoch": 0.06, "grad_norm": 1.133566905118304, "learning_rate": 4.9969879369422964e-05, "loss": 0.9615, "step": 535 }, { "epoch": 0.06, "grad_norm": 1.2008484845480965, "learning_rate": 4.996931385187195e-05, "loss": 0.9305, "step": 540 }, { "epoch": 0.06, "grad_norm": 1.2055167711396377, "learning_rate": 4.9968743078018815e-05, "loss": 1.019, "step": 545 }, { "epoch": 0.06, "grad_norm": 1.2349141913988522, "learning_rate": 4.9968167047983715e-05, "loss": 1.0175, "step": 550 }, { "epoch": 0.06, "grad_norm": 1.16231748566863, "learning_rate": 4.996758576188791e-05, "loss": 1.0421, "step": 555 }, { "epoch": 0.07, "grad_norm": 1.134522538690211, "learning_rate": 4.996699921985378e-05, "loss": 1.0446, "step": 560 }, { "epoch": 0.07, "grad_norm": 1.0781459655801693, "learning_rate": 4.996640742200478e-05, "loss": 0.9381, "step": 565 }, { "epoch": 0.07, "grad_norm": 1.2137309722711722, "learning_rate": 4.9965810368465494e-05, "loss": 0.9967, "step": 570 }, { "epoch": 0.07, "grad_norm": 1.1529053128390343, "learning_rate": 4.996520805936161e-05, "loss": 0.9657, "step": 575 }, { "epoch": 0.07, "grad_norm": 1.0460006580720695, "learning_rate": 4.996460049481992e-05, "loss": 0.9482, "step": 580 }, { "epoch": 0.07, "grad_norm": 1.238588161142535, "learning_rate": 4.996398767496833e-05, "loss": 1.0501, "step": 585 }, { "epoch": 0.07, "grad_norm": 1.2095871167802945, "learning_rate": 4.996336959993584e-05, "loss": 0.9922, "step": 590 }, { "epoch": 0.07, "grad_norm": 1.5591839757597055, "learning_rate": 4.9962746269852555e-05, "loss": 1.0473, "step": 595 }, { "epoch": 0.07, "grad_norm": 1.4018217429088933, "learning_rate": 4.99621176848497e-05, "loss": 1.0325, "step": 600 }, { "epoch": 0.07, "grad_norm": 1.2274248412044058, "learning_rate": 4.996148384505959e-05, "loss": 0.9981, "step": 605 }, { "epoch": 0.07, "grad_norm": 1.1372531103499968, "learning_rate": 4.9960844750615666e-05, "loss": 0.9938, "step": 610 }, { "epoch": 0.07, "grad_norm": 1.1484648375757214, "learning_rate": 4.9960200401652456e-05, "loss": 0.9234, "step": 615 }, { "epoch": 0.07, "grad_norm": 1.1663957851216613, "learning_rate": 4.995955079830561e-05, "loss": 1.0454, "step": 620 }, { "epoch": 0.07, "grad_norm": 1.067926862773165, "learning_rate": 4.995889594071187e-05, "loss": 0.9921, "step": 625 }, { "epoch": 0.07, "grad_norm": 1.1268531360199512, "learning_rate": 4.995823582900909e-05, "loss": 0.8797, "step": 630 }, { "epoch": 0.07, "grad_norm": 1.038312565787539, "learning_rate": 4.995757046333623e-05, "loss": 1.1061, "step": 635 }, { "epoch": 0.07, "grad_norm": 1.120749068173633, "learning_rate": 4.995689984383337e-05, "loss": 1.0332, "step": 640 }, { "epoch": 0.08, "grad_norm": 1.1747213257891365, "learning_rate": 4.995622397064167e-05, "loss": 1.051, "step": 645 }, { "epoch": 0.08, "grad_norm": 1.4146223383616165, "learning_rate": 4.99555428439034e-05, "loss": 1.0477, "step": 650 }, { "epoch": 0.08, "grad_norm": 1.2217985044940483, "learning_rate": 4.9954856463761955e-05, "loss": 0.9759, "step": 655 }, { "epoch": 0.08, "grad_norm": 1.1476396571931295, "learning_rate": 4.9954164830361826e-05, "loss": 1.0341, "step": 660 }, { "epoch": 0.08, "grad_norm": 1.1468045837629024, "learning_rate": 4.9953467943848615e-05, "loss": 1.022, "step": 665 }, { "epoch": 0.08, "grad_norm": 1.1506220646964473, "learning_rate": 4.995276580436901e-05, "loss": 0.944, "step": 670 }, { "epoch": 0.08, "grad_norm": 1.2193283081896507, "learning_rate": 4.9952058412070827e-05, "loss": 1.0019, "step": 675 }, { "epoch": 0.08, "grad_norm": 1.6278010973094974, "learning_rate": 4.9951345767102975e-05, "loss": 0.969, "step": 680 }, { "epoch": 0.08, "grad_norm": 1.1542453383131253, "learning_rate": 4.995062786961547e-05, "loss": 0.9639, "step": 685 }, { "epoch": 0.08, "grad_norm": 1.109667105016642, "learning_rate": 4.9949904719759456e-05, "loss": 0.984, "step": 690 }, { "epoch": 0.08, "grad_norm": 1.2279651658287232, "learning_rate": 4.994917631768714e-05, "loss": 1.0782, "step": 695 }, { "epoch": 0.08, "grad_norm": 1.2697221410971238, "learning_rate": 4.9948442663551875e-05, "loss": 1.0715, "step": 700 }, { "epoch": 0.08, "grad_norm": 1.0556788249673268, "learning_rate": 4.994770375750809e-05, "loss": 0.9582, "step": 705 }, { "epoch": 0.08, "grad_norm": 1.212747883871362, "learning_rate": 4.9946959599711344e-05, "loss": 1.0071, "step": 710 }, { "epoch": 0.08, "grad_norm": 1.0986630962573964, "learning_rate": 4.994621019031829e-05, "loss": 1.0043, "step": 715 }, { "epoch": 0.08, "grad_norm": 1.2618896084074627, "learning_rate": 4.9945455529486674e-05, "loss": 0.9495, "step": 720 }, { "epoch": 0.08, "grad_norm": 1.0323216327931164, "learning_rate": 4.994469561737536e-05, "loss": 0.9683, "step": 725 }, { "epoch": 0.09, "grad_norm": 1.0364089610975664, "learning_rate": 4.994393045414434e-05, "loss": 0.9673, "step": 730 }, { "epoch": 0.09, "grad_norm": 1.0669230855911755, "learning_rate": 4.994316003995466e-05, "loss": 0.9519, "step": 735 }, { "epoch": 0.09, "grad_norm": 1.11233721465925, "learning_rate": 4.9942384374968524e-05, "loss": 1.1105, "step": 740 }, { "epoch": 0.09, "grad_norm": 1.1407959492109718, "learning_rate": 4.9941603459349204e-05, "loss": 1.0423, "step": 745 }, { "epoch": 0.09, "grad_norm": 1.125422017552011, "learning_rate": 4.99408172932611e-05, "loss": 0.9786, "step": 750 }, { "epoch": 0.09, "grad_norm": 1.158106022415828, "learning_rate": 4.994002587686969e-05, "loss": 0.9698, "step": 755 }, { "epoch": 0.09, "grad_norm": 1.0939940993936046, "learning_rate": 4.9939229210341595e-05, "loss": 0.9609, "step": 760 }, { "epoch": 0.09, "grad_norm": 1.0622610266073254, "learning_rate": 4.993842729384452e-05, "loss": 1.0002, "step": 765 }, { "epoch": 0.09, "grad_norm": 1.1272829857252467, "learning_rate": 4.993762012754727e-05, "loss": 0.9943, "step": 770 }, { "epoch": 0.09, "grad_norm": 1.1726597080252255, "learning_rate": 4.993680771161976e-05, "loss": 0.9914, "step": 775 }, { "epoch": 0.09, "grad_norm": 1.1885807225605083, "learning_rate": 4.9935990046233013e-05, "loss": 1.04, "step": 780 }, { "epoch": 0.09, "grad_norm": 1.0252635112929571, "learning_rate": 4.9935167131559164e-05, "loss": 0.9601, "step": 785 }, { "epoch": 0.09, "grad_norm": 1.070768351996017, "learning_rate": 4.993433896777144e-05, "loss": 0.936, "step": 790 }, { "epoch": 0.09, "grad_norm": 0.979351135295802, "learning_rate": 4.9933505555044176e-05, "loss": 0.9474, "step": 795 }, { "epoch": 0.09, "grad_norm": 1.0923553433235567, "learning_rate": 4.9932666893552814e-05, "loss": 0.9527, "step": 800 }, { "epoch": 0.09, "grad_norm": 1.063621339559813, "learning_rate": 4.993182298347391e-05, "loss": 0.8715, "step": 805 }, { "epoch": 0.09, "grad_norm": 1.0645635491907166, "learning_rate": 4.9930973824985106e-05, "loss": 1.0387, "step": 810 }, { "epoch": 0.1, "grad_norm": 1.4101887938562314, "learning_rate": 4.993011941826515e-05, "loss": 1.0784, "step": 815 }, { "epoch": 0.1, "grad_norm": 1.1403174474482154, "learning_rate": 4.992925976349393e-05, "loss": 0.9465, "step": 820 }, { "epoch": 0.1, "grad_norm": 1.105964146430222, "learning_rate": 4.99283948608524e-05, "loss": 1.004, "step": 825 }, { "epoch": 0.1, "grad_norm": 1.0143092043613382, "learning_rate": 4.992752471052262e-05, "loss": 1.0008, "step": 830 }, { "epoch": 0.1, "grad_norm": 1.0563260851517375, "learning_rate": 4.992664931268778e-05, "loss": 0.9547, "step": 835 }, { "epoch": 0.1, "grad_norm": 1.1321506056379347, "learning_rate": 4.992576866753215e-05, "loss": 0.9449, "step": 840 }, { "epoch": 0.1, "grad_norm": 1.0550778724384844, "learning_rate": 4.992488277524112e-05, "loss": 0.9706, "step": 845 }, { "epoch": 0.1, "grad_norm": 1.1627731111385733, "learning_rate": 4.992399163600119e-05, "loss": 1.0122, "step": 850 }, { "epoch": 0.1, "grad_norm": 1.084268912268043, "learning_rate": 4.9923095249999934e-05, "loss": 0.9857, "step": 855 }, { "epoch": 0.1, "grad_norm": 1.0544899608238885, "learning_rate": 4.992219361742606e-05, "loss": 1.0404, "step": 860 }, { "epoch": 0.1, "grad_norm": 1.1289084827453804, "learning_rate": 4.992128673846937e-05, "loss": 0.9843, "step": 865 }, { "epoch": 0.1, "grad_norm": 1.1240470360783996, "learning_rate": 4.992037461332079e-05, "loss": 0.9472, "step": 870 }, { "epoch": 0.1, "grad_norm": 1.1645030232116904, "learning_rate": 4.9919457242172294e-05, "loss": 1.052, "step": 875 }, { "epoch": 0.1, "grad_norm": 1.0237899559040131, "learning_rate": 4.991853462521703e-05, "loss": 0.9998, "step": 880 }, { "epoch": 0.1, "grad_norm": 1.1368450264431524, "learning_rate": 4.99176067626492e-05, "loss": 0.9078, "step": 885 }, { "epoch": 0.1, "grad_norm": 1.2101786105315597, "learning_rate": 4.991667365466415e-05, "loss": 0.8937, "step": 890 }, { "epoch": 0.1, "grad_norm": 0.9993742814448794, "learning_rate": 4.991573530145829e-05, "loss": 0.9655, "step": 895 }, { "epoch": 0.11, "grad_norm": 1.114674160413653, "learning_rate": 4.9914791703229156e-05, "loss": 0.9457, "step": 900 }, { "epoch": 0.11, "grad_norm": 1.0911638319797456, "learning_rate": 4.9913842860175386e-05, "loss": 1.0958, "step": 905 }, { "epoch": 0.11, "grad_norm": 1.0083153225279762, "learning_rate": 4.9912888772496726e-05, "loss": 0.9445, "step": 910 }, { "epoch": 0.11, "grad_norm": 1.1273808617795962, "learning_rate": 4.9911929440394024e-05, "loss": 1.0286, "step": 915 }, { "epoch": 0.11, "grad_norm": 1.0090396888417874, "learning_rate": 4.991096486406922e-05, "loss": 0.9329, "step": 920 }, { "epoch": 0.11, "grad_norm": 1.0316018606241877, "learning_rate": 4.9909995043725367e-05, "loss": 1.0193, "step": 925 }, { "epoch": 0.11, "grad_norm": 1.0927576922289184, "learning_rate": 4.990901997956663e-05, "loss": 0.9225, "step": 930 }, { "epoch": 0.11, "grad_norm": 1.1525322010853176, "learning_rate": 4.990803967179827e-05, "loss": 1.0172, "step": 935 }, { "epoch": 0.11, "grad_norm": 1.2385687190577728, "learning_rate": 4.990705412062665e-05, "loss": 0.9959, "step": 940 }, { "epoch": 0.11, "grad_norm": 1.2865888272666233, "learning_rate": 4.9906063326259234e-05, "loss": 0.9272, "step": 945 }, { "epoch": 0.11, "grad_norm": 1.042732327178097, "learning_rate": 4.99050672889046e-05, "loss": 0.9197, "step": 950 }, { "epoch": 0.11, "grad_norm": 1.2125210575633774, "learning_rate": 4.9904066008772425e-05, "loss": 1.0497, "step": 955 }, { "epoch": 0.11, "grad_norm": 1.0184920052524684, "learning_rate": 4.990305948607349e-05, "loss": 0.9117, "step": 960 }, { "epoch": 0.11, "grad_norm": 1.023643684348291, "learning_rate": 4.9902047721019665e-05, "loss": 0.9475, "step": 965 }, { "epoch": 0.11, "grad_norm": 1.2246601761468199, "learning_rate": 4.990103071382396e-05, "loss": 1.0029, "step": 970 }, { "epoch": 0.11, "grad_norm": 1.1585569716875517, "learning_rate": 4.990000846470045e-05, "loss": 0.9906, "step": 975 }, { "epoch": 0.11, "grad_norm": 1.1231322075423755, "learning_rate": 4.989898097386433e-05, "loss": 0.9785, "step": 980 }, { "epoch": 0.12, "grad_norm": 1.025858498315423, "learning_rate": 4.98979482415319e-05, "loss": 0.937, "step": 985 }, { "epoch": 0.12, "grad_norm": 1.150967904443838, "learning_rate": 4.989691026792056e-05, "loss": 0.9761, "step": 990 }, { "epoch": 0.12, "grad_norm": 1.1880689515129315, "learning_rate": 4.9895867053248815e-05, "loss": 0.9922, "step": 995 }, { "epoch": 0.12, "grad_norm": 1.066619733682898, "learning_rate": 4.989481859773628e-05, "loss": 1.0385, "step": 1000 }, { "epoch": 0.12, "grad_norm": 1.1150814501276582, "learning_rate": 4.9893764901603656e-05, "loss": 1.0158, "step": 1005 }, { "epoch": 0.12, "grad_norm": 1.0835925240891797, "learning_rate": 4.989270596507276e-05, "loss": 0.9909, "step": 1010 }, { "epoch": 0.12, "grad_norm": 1.1249097643403791, "learning_rate": 4.9891641788366504e-05, "loss": 0.9925, "step": 1015 }, { "epoch": 0.12, "grad_norm": 1.1070476901019226, "learning_rate": 4.989057237170892e-05, "loss": 0.9266, "step": 1020 }, { "epoch": 0.12, "grad_norm": 1.0295190981562214, "learning_rate": 4.988949771532512e-05, "loss": 0.8754, "step": 1025 }, { "epoch": 0.12, "grad_norm": 1.034001470133943, "learning_rate": 4.988841781944134e-05, "loss": 1.0265, "step": 1030 }, { "epoch": 0.12, "grad_norm": 1.259459341174265, "learning_rate": 4.9887332684284904e-05, "loss": 0.9695, "step": 1035 }, { "epoch": 0.12, "grad_norm": 1.0540381986986065, "learning_rate": 4.988624231008424e-05, "loss": 0.9158, "step": 1040 }, { "epoch": 0.12, "grad_norm": 0.9986687320766073, "learning_rate": 4.9885146697068895e-05, "loss": 0.9557, "step": 1045 }, { "epoch": 0.12, "grad_norm": 1.0571885372322036, "learning_rate": 4.98840458454695e-05, "loss": 0.909, "step": 1050 }, { "epoch": 0.12, "grad_norm": 1.3303470190955011, "learning_rate": 4.9882939755517796e-05, "loss": 0.929, "step": 1055 }, { "epoch": 0.12, "grad_norm": 1.4428684503209415, "learning_rate": 4.988182842744662e-05, "loss": 1.0033, "step": 1060 }, { "epoch": 0.12, "grad_norm": 1.001470683681023, "learning_rate": 4.988071186148994e-05, "loss": 0.8846, "step": 1065 }, { "epoch": 0.13, "grad_norm": 1.5029225541016935, "learning_rate": 4.987959005788278e-05, "loss": 1.0021, "step": 1070 }, { "epoch": 0.13, "grad_norm": 1.2026572394839545, "learning_rate": 4.9878463016861294e-05, "loss": 1.0561, "step": 1075 }, { "epoch": 0.13, "grad_norm": 1.113398975046172, "learning_rate": 4.987733073866275e-05, "loss": 0.9779, "step": 1080 }, { "epoch": 0.13, "grad_norm": 1.1850171783475074, "learning_rate": 4.98761932235255e-05, "loss": 0.9879, "step": 1085 }, { "epoch": 0.13, "grad_norm": 1.0889902195011716, "learning_rate": 4.9875050471688995e-05, "loss": 0.9488, "step": 1090 }, { "epoch": 0.13, "grad_norm": 1.063107523673824, "learning_rate": 4.9873902483393804e-05, "loss": 0.9347, "step": 1095 }, { "epoch": 0.13, "grad_norm": 0.9517739473173996, "learning_rate": 4.987274925888159e-05, "loss": 0.9502, "step": 1100 }, { "epoch": 0.13, "grad_norm": 1.0496780085904533, "learning_rate": 4.9871590798395114e-05, "loss": 1.0158, "step": 1105 }, { "epoch": 0.13, "grad_norm": 0.9734234365963514, "learning_rate": 4.9870427102178245e-05, "loss": 0.9325, "step": 1110 }, { "epoch": 0.13, "grad_norm": 0.9700777836460857, "learning_rate": 4.9869258170475964e-05, "loss": 0.9613, "step": 1115 }, { "epoch": 0.13, "grad_norm": 1.0981548573974478, "learning_rate": 4.9868084003534324e-05, "loss": 0.8924, "step": 1120 }, { "epoch": 0.13, "grad_norm": 1.071610990717182, "learning_rate": 4.9866904601600515e-05, "loss": 0.997, "step": 1125 }, { "epoch": 0.13, "grad_norm": 1.049751015377668, "learning_rate": 4.9865719964922815e-05, "loss": 1.0079, "step": 1130 }, { "epoch": 0.13, "grad_norm": 1.294098878584965, "learning_rate": 4.986453009375058e-05, "loss": 0.9611, "step": 1135 }, { "epoch": 0.13, "grad_norm": 1.0787102628401606, "learning_rate": 4.986333498833432e-05, "loss": 0.9487, "step": 1140 }, { "epoch": 0.13, "grad_norm": 1.1185282217701418, "learning_rate": 4.9862134648925606e-05, "loss": 1.0095, "step": 1145 }, { "epoch": 0.13, "grad_norm": 1.008511461853383, "learning_rate": 4.986092907577711e-05, "loss": 0.9096, "step": 1150 }, { "epoch": 0.13, "grad_norm": 1.0146369090716436, "learning_rate": 4.985971826914263e-05, "loss": 0.8906, "step": 1155 }, { "epoch": 0.14, "grad_norm": 1.054839433777709, "learning_rate": 4.9858502229277056e-05, "loss": 0.9606, "step": 1160 }, { "epoch": 0.14, "grad_norm": 0.975493229451669, "learning_rate": 4.9857280956436366e-05, "loss": 0.9303, "step": 1165 }, { "epoch": 0.14, "grad_norm": 1.1929396164965356, "learning_rate": 4.9856054450877657e-05, "loss": 0.9367, "step": 1170 }, { "epoch": 0.14, "grad_norm": 0.9674508126117796, "learning_rate": 4.985482271285912e-05, "loss": 0.9015, "step": 1175 }, { "epoch": 0.14, "grad_norm": 1.0340864603119082, "learning_rate": 4.985358574264005e-05, "loss": 0.9937, "step": 1180 }, { "epoch": 0.14, "grad_norm": 1.1041655369749979, "learning_rate": 4.985234354048085e-05, "loss": 1.0129, "step": 1185 }, { "epoch": 0.14, "grad_norm": 1.1472324175635489, "learning_rate": 4.9851096106643003e-05, "loss": 0.9997, "step": 1190 }, { "epoch": 0.14, "grad_norm": 1.3921415331775695, "learning_rate": 4.984984344138911e-05, "loss": 0.8841, "step": 1195 }, { "epoch": 0.14, "grad_norm": 1.077043943816124, "learning_rate": 4.984858554498287e-05, "loss": 0.9375, "step": 1200 }, { "epoch": 0.14, "grad_norm": 1.0989436703484008, "learning_rate": 4.984732241768909e-05, "loss": 0.9864, "step": 1205 }, { "epoch": 0.14, "grad_norm": 1.166230423857481, "learning_rate": 4.984605405977367e-05, "loss": 0.994, "step": 1210 }, { "epoch": 0.14, "grad_norm": 1.2378507527837355, "learning_rate": 4.984478047150361e-05, "loss": 1.0142, "step": 1215 }, { "epoch": 0.14, "grad_norm": 1.1291435371399459, "learning_rate": 4.9843501653147005e-05, "loss": 1.0168, "step": 1220 }, { "epoch": 0.14, "grad_norm": 1.136596067818321, "learning_rate": 4.984221760497307e-05, "loss": 0.9722, "step": 1225 }, { "epoch": 0.14, "grad_norm": 1.030073369122721, "learning_rate": 4.9840928327252124e-05, "loss": 0.8917, "step": 1230 }, { "epoch": 0.14, "grad_norm": 1.0295089073150256, "learning_rate": 4.983963382025555e-05, "loss": 0.982, "step": 1235 }, { "epoch": 0.14, "grad_norm": 1.0028893637323986, "learning_rate": 4.983833408425586e-05, "loss": 0.9686, "step": 1240 }, { "epoch": 0.15, "grad_norm": 1.0108242129137919, "learning_rate": 4.9837029119526666e-05, "loss": 0.9802, "step": 1245 }, { "epoch": 0.15, "grad_norm": 1.342055014654345, "learning_rate": 4.983571892634268e-05, "loss": 0.9146, "step": 1250 }, { "epoch": 0.15, "grad_norm": 0.9861838898768059, "learning_rate": 4.98344035049797e-05, "loss": 1.0945, "step": 1255 }, { "epoch": 0.15, "grad_norm": 1.0987386544632918, "learning_rate": 4.9833082855714655e-05, "loss": 0.9169, "step": 1260 }, { "epoch": 0.15, "grad_norm": 1.0240510509527712, "learning_rate": 4.9831756978825533e-05, "loss": 0.9588, "step": 1265 }, { "epoch": 0.15, "grad_norm": 1.026861888806448, "learning_rate": 4.983042587459146e-05, "loss": 0.9212, "step": 1270 }, { "epoch": 0.15, "grad_norm": 1.0405422029154836, "learning_rate": 4.982908954329265e-05, "loss": 0.9657, "step": 1275 }, { "epoch": 0.15, "grad_norm": 1.0734083412242856, "learning_rate": 4.982774798521041e-05, "loss": 0.8937, "step": 1280 }, { "epoch": 0.15, "grad_norm": 1.028849733659568, "learning_rate": 4.982640120062714e-05, "loss": 0.976, "step": 1285 }, { "epoch": 0.15, "grad_norm": 3.5639846221607407, "learning_rate": 4.9825049189826364e-05, "loss": 0.9879, "step": 1290 }, { "epoch": 0.15, "grad_norm": 1.2306434216545792, "learning_rate": 4.98236919530927e-05, "loss": 1.0149, "step": 1295 }, { "epoch": 0.15, "grad_norm": 0.998198552589239, "learning_rate": 4.982232949071185e-05, "loss": 1.0374, "step": 1300 }, { "epoch": 0.15, "grad_norm": 1.1037963291634068, "learning_rate": 4.982096180297063e-05, "loss": 0.9608, "step": 1305 }, { "epoch": 0.15, "grad_norm": 1.2580344419394551, "learning_rate": 4.9819588890156954e-05, "loss": 0.9281, "step": 1310 }, { "epoch": 0.15, "grad_norm": 0.9813615836076114, "learning_rate": 4.9818210752559836e-05, "loss": 1.0075, "step": 1315 }, { "epoch": 0.15, "grad_norm": 1.0726165269232262, "learning_rate": 4.9816827390469376e-05, "loss": 0.9427, "step": 1320 }, { "epoch": 0.15, "grad_norm": 1.1526945180857884, "learning_rate": 4.981543880417681e-05, "loss": 0.923, "step": 1325 }, { "epoch": 0.16, "grad_norm": 1.0887297803255238, "learning_rate": 4.981404499397442e-05, "loss": 0.9718, "step": 1330 }, { "epoch": 0.16, "grad_norm": 1.0129906993925826, "learning_rate": 4.981264596015565e-05, "loss": 0.9261, "step": 1335 }, { "epoch": 0.16, "grad_norm": 0.9977442889932219, "learning_rate": 4.9811241703014985e-05, "loss": 0.9647, "step": 1340 }, { "epoch": 0.16, "grad_norm": 1.1924499850171741, "learning_rate": 4.980983222284805e-05, "loss": 0.9626, "step": 1345 }, { "epoch": 0.16, "grad_norm": 0.9338597201109845, "learning_rate": 4.980841751995154e-05, "loss": 0.9113, "step": 1350 }, { "epoch": 0.16, "grad_norm": 0.9878861354812847, "learning_rate": 4.980699759462329e-05, "loss": 0.9838, "step": 1355 }, { "epoch": 0.16, "grad_norm": 2.072420646942375, "learning_rate": 4.98055724471622e-05, "loss": 0.9378, "step": 1360 }, { "epoch": 0.16, "grad_norm": 1.0383692996758382, "learning_rate": 4.9804142077868255e-05, "loss": 1.0072, "step": 1365 }, { "epoch": 0.16, "grad_norm": 0.964575336840462, "learning_rate": 4.9802706487042596e-05, "loss": 0.9501, "step": 1370 }, { "epoch": 0.16, "grad_norm": 1.0977595340605062, "learning_rate": 4.980126567498741e-05, "loss": 0.9698, "step": 1375 }, { "epoch": 0.16, "grad_norm": 43.58973051023571, "learning_rate": 4.979981964200601e-05, "loss": 1.0252, "step": 1380 }, { "epoch": 0.16, "grad_norm": 0.9996807116400318, "learning_rate": 4.979836838840279e-05, "loss": 1.0884, "step": 1385 }, { "epoch": 0.16, "grad_norm": 1.2340067790524, "learning_rate": 4.9796911914483266e-05, "loss": 0.9428, "step": 1390 }, { "epoch": 0.16, "grad_norm": 1.2437280129441672, "learning_rate": 4.979545022055405e-05, "loss": 0.8784, "step": 1395 }, { "epoch": 0.16, "grad_norm": 1.2031921231745022, "learning_rate": 4.979398330692282e-05, "loss": 0.9594, "step": 1400 }, { "epoch": 0.16, "grad_norm": 1.0385706074058578, "learning_rate": 4.979251117389839e-05, "loss": 0.9923, "step": 1405 }, { "epoch": 0.16, "grad_norm": 1.070391407596474, "learning_rate": 4.979103382179067e-05, "loss": 0.8948, "step": 1410 }, { "epoch": 0.17, "grad_norm": 1.313389868119059, "learning_rate": 4.9789551250910635e-05, "loss": 1.0072, "step": 1415 }, { "epoch": 0.17, "grad_norm": 1.0879150187440534, "learning_rate": 4.97880634615704e-05, "loss": 1.0532, "step": 1420 }, { "epoch": 0.17, "grad_norm": 0.9194567966273833, "learning_rate": 4.978657045408315e-05, "loss": 0.895, "step": 1425 }, { "epoch": 0.17, "grad_norm": 1.0708849906186182, "learning_rate": 4.978507222876318e-05, "loss": 0.9765, "step": 1430 }, { "epoch": 0.17, "grad_norm": 1.0456229104852912, "learning_rate": 4.9783568785925884e-05, "loss": 1.0524, "step": 1435 }, { "epoch": 0.17, "grad_norm": 1.224747882054799, "learning_rate": 4.9782060125887754e-05, "loss": 0.9801, "step": 1440 }, { "epoch": 0.17, "grad_norm": 1.0562635138369583, "learning_rate": 4.978054624896638e-05, "loss": 0.9685, "step": 1445 }, { "epoch": 0.17, "grad_norm": 1.0770522445683612, "learning_rate": 4.9779027155480446e-05, "loss": 0.9567, "step": 1450 }, { "epoch": 0.17, "grad_norm": 1.0610209246694893, "learning_rate": 4.9777502845749734e-05, "loss": 1.0977, "step": 1455 }, { "epoch": 0.17, "grad_norm": 1.0007879688504147, "learning_rate": 4.977597332009514e-05, "loss": 1.0614, "step": 1460 }, { "epoch": 0.17, "grad_norm": 1.1286945667650947, "learning_rate": 4.977443857883862e-05, "loss": 0.9462, "step": 1465 }, { "epoch": 0.17, "grad_norm": 1.4756372869464596, "learning_rate": 4.977289862230328e-05, "loss": 0.8638, "step": 1470 }, { "epoch": 0.17, "grad_norm": 1.1007600667866084, "learning_rate": 4.977135345081328e-05, "loss": 1.0591, "step": 1475 }, { "epoch": 0.17, "grad_norm": 1.0628226523719524, "learning_rate": 4.976980306469391e-05, "loss": 0.9044, "step": 1480 }, { "epoch": 0.17, "grad_norm": 0.9976004764454622, "learning_rate": 4.976824746427153e-05, "loss": 0.9865, "step": 1485 }, { "epoch": 0.17, "grad_norm": 1.1673629310802622, "learning_rate": 4.976668664987361e-05, "loss": 0.9455, "step": 1490 }, { "epoch": 0.17, "grad_norm": 1.019773002400684, "learning_rate": 4.9765120621828723e-05, "loss": 0.8515, "step": 1495 }, { "epoch": 0.18, "grad_norm": 0.9929101518268212, "learning_rate": 4.976354938046653e-05, "loss": 0.966, "step": 1500 }, { "epoch": 0.18, "grad_norm": 1.1659344987660303, "learning_rate": 4.97619729261178e-05, "loss": 0.8927, "step": 1505 }, { "epoch": 0.18, "grad_norm": 0.9800973379786984, "learning_rate": 4.976039125911438e-05, "loss": 0.9127, "step": 1510 }, { "epoch": 0.18, "grad_norm": 1.0293538916496288, "learning_rate": 4.975880437978925e-05, "loss": 0.8911, "step": 1515 }, { "epoch": 0.18, "grad_norm": 0.9786726951695742, "learning_rate": 4.975721228847644e-05, "loss": 0.9666, "step": 1520 }, { "epoch": 0.18, "grad_norm": 1.0950897672726212, "learning_rate": 4.9755614985511125e-05, "loss": 0.9644, "step": 1525 }, { "epoch": 0.18, "grad_norm": 1.1564485380567946, "learning_rate": 4.975401247122954e-05, "loss": 0.9354, "step": 1530 }, { "epoch": 0.18, "grad_norm": 1.211567054566017, "learning_rate": 4.975240474596903e-05, "loss": 0.9813, "step": 1535 }, { "epoch": 0.18, "grad_norm": 1.0353263153354124, "learning_rate": 4.9750791810068037e-05, "loss": 0.9243, "step": 1540 }, { "epoch": 0.18, "grad_norm": 1.3556542000792884, "learning_rate": 4.974917366386611e-05, "loss": 1.0324, "step": 1545 }, { "epoch": 0.18, "grad_norm": 0.9965640833003246, "learning_rate": 4.974755030770388e-05, "loss": 1.0008, "step": 1550 }, { "epoch": 0.18, "grad_norm": 1.060616779983905, "learning_rate": 4.974592174192308e-05, "loss": 0.9107, "step": 1555 }, { "epoch": 0.18, "grad_norm": 1.3131016948246124, "learning_rate": 4.974428796686655e-05, "loss": 0.903, "step": 1560 }, { "epoch": 0.18, "grad_norm": 1.0018412867219073, "learning_rate": 4.9742648982878206e-05, "loss": 0.9254, "step": 1565 }, { "epoch": 0.18, "grad_norm": 0.9838746414950001, "learning_rate": 4.974100479030307e-05, "loss": 0.941, "step": 1570 }, { "epoch": 0.18, "grad_norm": 0.9921677375009069, "learning_rate": 4.973935538948726e-05, "loss": 0.8576, "step": 1575 }, { "epoch": 0.18, "grad_norm": 32.02600340596563, "learning_rate": 4.9737700780778006e-05, "loss": 0.8981, "step": 1580 }, { "epoch": 0.19, "grad_norm": 1.0362957531909271, "learning_rate": 4.973604096452361e-05, "loss": 0.9904, "step": 1585 }, { "epoch": 0.19, "grad_norm": 1.0872834022022002, "learning_rate": 4.973437594107349e-05, "loss": 1.019, "step": 1590 }, { "epoch": 0.19, "grad_norm": 2.671364234323081, "learning_rate": 4.9732705710778136e-05, "loss": 0.9141, "step": 1595 }, { "epoch": 0.19, "grad_norm": 1.0535153676037712, "learning_rate": 4.973103027398916e-05, "loss": 1.051, "step": 1600 }, { "epoch": 0.19, "grad_norm": 1.0544706958645702, "learning_rate": 4.972934963105925e-05, "loss": 1.0177, "step": 1605 }, { "epoch": 0.19, "grad_norm": 1.1073221246994636, "learning_rate": 4.9727663782342216e-05, "loss": 0.9306, "step": 1610 }, { "epoch": 0.19, "grad_norm": 1.056906858082247, "learning_rate": 4.9725972728192925e-05, "loss": 1.0191, "step": 1615 }, { "epoch": 0.19, "grad_norm": 1.0980206496507552, "learning_rate": 4.9724276468967376e-05, "loss": 1.0461, "step": 1620 }, { "epoch": 0.19, "grad_norm": 0.9742009813970189, "learning_rate": 4.972257500502264e-05, "loss": 0.9959, "step": 1625 }, { "epoch": 0.19, "grad_norm": 1.1132119064493218, "learning_rate": 4.972086833671691e-05, "loss": 0.9713, "step": 1630 }, { "epoch": 0.19, "grad_norm": 0.9180554607431861, "learning_rate": 4.971915646440944e-05, "loss": 0.8513, "step": 1635 }, { "epoch": 0.19, "grad_norm": 1.2669533011597012, "learning_rate": 4.97174393884606e-05, "loss": 1.2273, "step": 1640 }, { "epoch": 0.19, "grad_norm": 0.9604477177346731, "learning_rate": 4.971571710923186e-05, "loss": 0.9914, "step": 1645 }, { "epoch": 0.19, "grad_norm": 1.0739417974073544, "learning_rate": 4.9713989627085774e-05, "loss": 0.964, "step": 1650 }, { "epoch": 0.19, "grad_norm": 1.02137214500824, "learning_rate": 4.9712256942385995e-05, "loss": 1.0252, "step": 1655 }, { "epoch": 0.19, "grad_norm": 1.0150842290472988, "learning_rate": 4.971051905549726e-05, "loss": 0.9787, "step": 1660 }, { "epoch": 0.19, "grad_norm": 0.9700700274417345, "learning_rate": 4.9708775966785434e-05, "loss": 0.9451, "step": 1665 }, { "epoch": 0.2, "grad_norm": 3.4431579105019137, "learning_rate": 4.9707027676617444e-05, "loss": 0.9389, "step": 1670 }, { "epoch": 0.2, "grad_norm": 1.0036855479909967, "learning_rate": 4.9705274185361325e-05, "loss": 0.8942, "step": 1675 }, { "epoch": 0.2, "grad_norm": 0.9898182747639732, "learning_rate": 4.97035154933862e-05, "loss": 0.9761, "step": 1680 }, { "epoch": 0.2, "grad_norm": 1.004767069567346, "learning_rate": 4.9701751601062296e-05, "loss": 0.8977, "step": 1685 }, { "epoch": 0.2, "grad_norm": 1.0788304384765617, "learning_rate": 4.9699982508760934e-05, "loss": 0.9322, "step": 1690 }, { "epoch": 0.2, "grad_norm": 1.3660645241937865, "learning_rate": 4.969820821685452e-05, "loss": 0.9696, "step": 1695 }, { "epoch": 0.2, "grad_norm": 1.0565937040002569, "learning_rate": 4.969642872571656e-05, "loss": 1.0409, "step": 1700 }, { "epoch": 0.2, "grad_norm": 1.0440023633812632, "learning_rate": 4.9694644035721674e-05, "loss": 0.891, "step": 1705 }, { "epoch": 0.2, "grad_norm": 1.0819964185769326, "learning_rate": 4.969285414724554e-05, "loss": 0.9289, "step": 1710 }, { "epoch": 0.2, "grad_norm": 1.1084576623601878, "learning_rate": 4.9691059060664944e-05, "loss": 0.9735, "step": 1715 }, { "epoch": 0.2, "grad_norm": 1.0785946241396942, "learning_rate": 4.968925877635778e-05, "loss": 0.9522, "step": 1720 }, { "epoch": 0.2, "grad_norm": 1.0403368593941278, "learning_rate": 4.9687453294703037e-05, "loss": 0.9727, "step": 1725 }, { "epoch": 0.2, "grad_norm": 1.0205488654226593, "learning_rate": 4.968564261608077e-05, "loss": 0.8656, "step": 1730 }, { "epoch": 0.2, "grad_norm": 1.6384143210213529, "learning_rate": 4.968382674087215e-05, "loss": 0.9569, "step": 1735 }, { "epoch": 0.2, "grad_norm": 1.7801143627607843, "learning_rate": 4.968200566945944e-05, "loss": 0.9265, "step": 1740 }, { "epoch": 0.2, "grad_norm": 1.1349526474514298, "learning_rate": 4.9680179402226e-05, "loss": 1.0261, "step": 1745 }, { "epoch": 0.2, "grad_norm": 1.144111915916139, "learning_rate": 4.967834793955627e-05, "loss": 0.9004, "step": 1750 }, { "epoch": 0.21, "grad_norm": 0.9958760712213459, "learning_rate": 4.9676511281835795e-05, "loss": 0.9929, "step": 1755 }, { "epoch": 0.21, "grad_norm": 1.100461026765821, "learning_rate": 4.96746694294512e-05, "loss": 0.9216, "step": 1760 }, { "epoch": 0.21, "grad_norm": 0.9914756050641614, "learning_rate": 4.967282238279024e-05, "loss": 0.9949, "step": 1765 }, { "epoch": 0.21, "grad_norm": 1.1073400438840872, "learning_rate": 4.967097014224171e-05, "loss": 0.994, "step": 1770 }, { "epoch": 0.21, "grad_norm": 1.2147150611970752, "learning_rate": 4.9669112708195555e-05, "loss": 0.9717, "step": 1775 }, { "epoch": 0.21, "grad_norm": 0.9268310524106047, "learning_rate": 4.966725008104276e-05, "loss": 0.9517, "step": 1780 }, { "epoch": 0.21, "grad_norm": 0.9372467545249018, "learning_rate": 4.966538226117543e-05, "loss": 0.939, "step": 1785 }, { "epoch": 0.21, "grad_norm": 1.0761847075054334, "learning_rate": 4.966350924898677e-05, "loss": 0.9365, "step": 1790 }, { "epoch": 0.21, "grad_norm": 0.9995303594538358, "learning_rate": 4.966163104487107e-05, "loss": 0.9657, "step": 1795 }, { "epoch": 0.21, "grad_norm": 1.2779518847116105, "learning_rate": 4.96597476492237e-05, "loss": 0.9172, "step": 1800 }, { "epoch": 0.21, "grad_norm": 1.3880637551350963, "learning_rate": 4.965785906244114e-05, "loss": 0.9239, "step": 1805 }, { "epoch": 0.21, "grad_norm": 0.9177267522535165, "learning_rate": 4.965596528492096e-05, "loss": 0.8447, "step": 1810 }, { "epoch": 0.21, "grad_norm": 0.9420407257960208, "learning_rate": 4.965406631706182e-05, "loss": 0.8569, "step": 1815 }, { "epoch": 0.21, "grad_norm": 1.0271277300742228, "learning_rate": 4.965216215926347e-05, "loss": 0.8694, "step": 1820 }, { "epoch": 0.21, "grad_norm": 1.0650930656875617, "learning_rate": 4.9650252811926755e-05, "loss": 0.8789, "step": 1825 }, { "epoch": 0.21, "grad_norm": 1.098138271404505, "learning_rate": 4.964833827545362e-05, "loss": 1.0128, "step": 1830 }, { "epoch": 0.21, "grad_norm": 1.8880726674631114, "learning_rate": 4.9646418550247086e-05, "loss": 0.9438, "step": 1835 }, { "epoch": 0.21, "grad_norm": 1.37137181096444, "learning_rate": 4.9644493636711275e-05, "loss": 0.9536, "step": 1840 }, { "epoch": 0.22, "grad_norm": 1.1349121207619406, "learning_rate": 4.9642563535251405e-05, "loss": 0.8945, "step": 1845 }, { "epoch": 0.22, "grad_norm": 1.0199324476720972, "learning_rate": 4.9640628246273776e-05, "loss": 0.8518, "step": 1850 }, { "epoch": 0.22, "grad_norm": 6.0796366441547445, "learning_rate": 4.96386877701858e-05, "loss": 0.9347, "step": 1855 }, { "epoch": 0.22, "grad_norm": 1.0265880593292798, "learning_rate": 4.9636742107395965e-05, "loss": 1.0344, "step": 1860 }, { "epoch": 0.22, "grad_norm": 1.1115498681795417, "learning_rate": 4.963479125831384e-05, "loss": 0.998, "step": 1865 }, { "epoch": 0.22, "grad_norm": 0.9983932174222161, "learning_rate": 4.963283522335011e-05, "loss": 1.0467, "step": 1870 }, { "epoch": 0.22, "grad_norm": 1.0069162672897751, "learning_rate": 4.963087400291654e-05, "loss": 0.9391, "step": 1875 }, { "epoch": 0.22, "grad_norm": 0.9449861144085916, "learning_rate": 4.9628907597425986e-05, "loss": 0.9381, "step": 1880 }, { "epoch": 0.22, "grad_norm": 1.0023292009166762, "learning_rate": 4.9626936007292394e-05, "loss": 0.8568, "step": 1885 }, { "epoch": 0.22, "grad_norm": 0.9288546742514908, "learning_rate": 4.962495923293081e-05, "loss": 0.9373, "step": 1890 }, { "epoch": 0.22, "grad_norm": 1.1206214411358377, "learning_rate": 4.962297727475736e-05, "loss": 1.012, "step": 1895 }, { "epoch": 0.22, "grad_norm": 1.102011598389579, "learning_rate": 4.9620990133189274e-05, "loss": 0.94, "step": 1900 }, { "epoch": 0.22, "grad_norm": 2.102609050951811, "learning_rate": 4.9618997808644864e-05, "loss": 0.9391, "step": 1905 }, { "epoch": 0.22, "grad_norm": 1.0654246307959698, "learning_rate": 4.961700030154353e-05, "loss": 0.9658, "step": 1910 }, { "epoch": 0.22, "grad_norm": 1.1005104987219085, "learning_rate": 4.961499761230576e-05, "loss": 0.9468, "step": 1915 }, { "epoch": 0.22, "grad_norm": 1.0256121111487757, "learning_rate": 4.9612989741353164e-05, "loss": 1.0237, "step": 1920 }, { "epoch": 0.22, "grad_norm": 1.0468017919184678, "learning_rate": 4.9610976689108415e-05, "loss": 0.9045, "step": 1925 }, { "epoch": 0.23, "grad_norm": 0.9989011872789753, "learning_rate": 4.9608958455995265e-05, "loss": 1.0025, "step": 1930 }, { "epoch": 0.23, "grad_norm": 1.0599468757943245, "learning_rate": 4.9606935042438584e-05, "loss": 0.9578, "step": 1935 }, { "epoch": 0.23, "grad_norm": 0.994261797120377, "learning_rate": 4.960490644886433e-05, "loss": 0.9877, "step": 1940 }, { "epoch": 0.23, "grad_norm": 0.999539882035706, "learning_rate": 4.9602872675699516e-05, "loss": 0.982, "step": 1945 }, { "epoch": 0.23, "grad_norm": 1.0461348478709427, "learning_rate": 4.960083372337231e-05, "loss": 0.9658, "step": 1950 }, { "epoch": 0.23, "grad_norm": 1.1214366738174446, "learning_rate": 4.95987895923119e-05, "loss": 0.9433, "step": 1955 }, { "epoch": 0.23, "grad_norm": 0.9896133914145191, "learning_rate": 4.959674028294862e-05, "loss": 1.0174, "step": 1960 }, { "epoch": 0.23, "grad_norm": 1.0654497666503353, "learning_rate": 4.959468579571386e-05, "loss": 0.9731, "step": 1965 }, { "epoch": 0.23, "grad_norm": 0.9682466593387831, "learning_rate": 4.9592626131040105e-05, "loss": 0.9353, "step": 1970 }, { "epoch": 0.23, "grad_norm": 1.0461286941244134, "learning_rate": 4.959056128936095e-05, "loss": 0.9267, "step": 1975 }, { "epoch": 0.23, "grad_norm": 1.217230793575659, "learning_rate": 4.958849127111105e-05, "loss": 0.9779, "step": 1980 }, { "epoch": 0.23, "grad_norm": 1.008087374556498, "learning_rate": 4.958641607672619e-05, "loss": 1.0081, "step": 1985 }, { "epoch": 0.23, "grad_norm": 1.0930387058212025, "learning_rate": 4.9584335706643194e-05, "loss": 0.9487, "step": 1990 }, { "epoch": 0.23, "grad_norm": 0.9531729301746176, "learning_rate": 4.958225016130002e-05, "loss": 0.9946, "step": 1995 }, { "epoch": 0.23, "grad_norm": 0.9619034951743967, "learning_rate": 4.9580159441135685e-05, "loss": 0.9251, "step": 2000 }, { "epoch": 0.23, "grad_norm": 0.9904024793521159, "learning_rate": 4.957806354659031e-05, "loss": 0.9031, "step": 2005 }, { "epoch": 0.23, "grad_norm": 1.0499680746726634, "learning_rate": 4.9575962478105115e-05, "loss": 0.9011, "step": 2010 }, { "epoch": 0.24, "grad_norm": 0.9945935916217389, "learning_rate": 4.9573856236122383e-05, "loss": 0.875, "step": 2015 }, { "epoch": 0.24, "grad_norm": 0.9743685311839844, "learning_rate": 4.9571744821085494e-05, "loss": 0.9096, "step": 2020 }, { "epoch": 0.24, "grad_norm": 1.0497111678386803, "learning_rate": 4.956962823343895e-05, "loss": 0.8738, "step": 2025 }, { "epoch": 0.24, "grad_norm": 1.0932387309732174, "learning_rate": 4.956750647362829e-05, "loss": 0.8564, "step": 2030 }, { "epoch": 0.24, "grad_norm": 1.025830754286173, "learning_rate": 4.956537954210018e-05, "loss": 0.9633, "step": 2035 }, { "epoch": 0.24, "grad_norm": 1.000027276228554, "learning_rate": 4.9563247439302344e-05, "loss": 0.8779, "step": 2040 }, { "epoch": 0.24, "grad_norm": 0.9625342626885018, "learning_rate": 4.956111016568363e-05, "loss": 0.9348, "step": 2045 }, { "epoch": 0.24, "grad_norm": 0.9763805549131147, "learning_rate": 4.955896772169395e-05, "loss": 0.9944, "step": 2050 }, { "epoch": 0.24, "grad_norm": 0.9910882147777211, "learning_rate": 4.955682010778432e-05, "loss": 0.9646, "step": 2055 }, { "epoch": 0.24, "grad_norm": 1.0309272557339653, "learning_rate": 4.9554667324406814e-05, "loss": 1.0429, "step": 2060 }, { "epoch": 0.24, "grad_norm": 1.075680518698965, "learning_rate": 4.9552509372014636e-05, "loss": 0.8867, "step": 2065 }, { "epoch": 0.24, "grad_norm": 1.0732233966703328, "learning_rate": 4.955034625106204e-05, "loss": 0.8747, "step": 2070 }, { "epoch": 0.24, "grad_norm": 1.0703269000267739, "learning_rate": 4.954817796200441e-05, "loss": 0.9328, "step": 2075 }, { "epoch": 0.24, "grad_norm": 1.1682895759835192, "learning_rate": 4.954600450529817e-05, "loss": 0.9911, "step": 2080 }, { "epoch": 0.24, "grad_norm": 1.0605161246329127, "learning_rate": 4.954382588140086e-05, "loss": 1.0787, "step": 2085 }, { "epoch": 0.24, "grad_norm": 1.0746688585425606, "learning_rate": 4.9541642090771115e-05, "loss": 0.9707, "step": 2090 }, { "epoch": 0.24, "grad_norm": 1.1166360666019006, "learning_rate": 4.953945313386864e-05, "loss": 0.9907, "step": 2095 }, { "epoch": 0.25, "grad_norm": 1.101683961882563, "learning_rate": 4.953725901115422e-05, "loss": 0.953, "step": 2100 }, { "epoch": 0.25, "grad_norm": 1.056657955523902, "learning_rate": 4.953505972308976e-05, "loss": 0.9325, "step": 2105 }, { "epoch": 0.25, "grad_norm": 0.999828227759176, "learning_rate": 4.9532855270138226e-05, "loss": 0.9419, "step": 2110 }, { "epoch": 0.25, "grad_norm": 0.9860568170600005, "learning_rate": 4.953064565276368e-05, "loss": 0.9516, "step": 2115 }, { "epoch": 0.25, "grad_norm": 0.9898163712376775, "learning_rate": 4.9528430871431264e-05, "loss": 0.9693, "step": 2120 }, { "epoch": 0.25, "grad_norm": 1.0773087335738685, "learning_rate": 4.952621092660722e-05, "loss": 0.9071, "step": 2125 }, { "epoch": 0.25, "grad_norm": 1.0436769253979625, "learning_rate": 4.952398581875886e-05, "loss": 1.004, "step": 2130 }, { "epoch": 0.25, "grad_norm": 0.9757969393954983, "learning_rate": 4.95217555483546e-05, "loss": 0.9222, "step": 2135 }, { "epoch": 0.25, "grad_norm": 1.0186056430216128, "learning_rate": 4.9519520115863937e-05, "loss": 0.9371, "step": 2140 }, { "epoch": 0.25, "grad_norm": 1.1110737550470071, "learning_rate": 4.9517279521757446e-05, "loss": 1.0049, "step": 2145 }, { "epoch": 0.25, "grad_norm": 1.103422546430909, "learning_rate": 4.9515033766506805e-05, "loss": 0.7888, "step": 2150 }, { "epoch": 0.25, "grad_norm": 0.929146499878184, "learning_rate": 4.951278285058476e-05, "loss": 0.9972, "step": 2155 }, { "epoch": 0.25, "grad_norm": 0.9297137090132164, "learning_rate": 4.951052677446515e-05, "loss": 0.9387, "step": 2160 }, { "epoch": 0.25, "grad_norm": 1.0057194407826662, "learning_rate": 4.950826553862292e-05, "loss": 0.9708, "step": 2165 }, { "epoch": 0.25, "grad_norm": 0.9909416049658488, "learning_rate": 4.950599914353406e-05, "loss": 0.9934, "step": 2170 }, { "epoch": 0.25, "grad_norm": 1.0313929731620215, "learning_rate": 4.950372758967569e-05, "loss": 0.9364, "step": 2175 }, { "epoch": 0.25, "grad_norm": 0.9842558788029845, "learning_rate": 4.950145087752598e-05, "loss": 1.0124, "step": 2180 }, { "epoch": 0.26, "grad_norm": 0.9856973064084651, "learning_rate": 4.949916900756422e-05, "loss": 0.9041, "step": 2185 }, { "epoch": 0.26, "grad_norm": 1.0166774424926104, "learning_rate": 4.949688198027075e-05, "loss": 0.9168, "step": 2190 }, { "epoch": 0.26, "grad_norm": 1.0145061405636069, "learning_rate": 4.949458979612702e-05, "loss": 0.9105, "step": 2195 }, { "epoch": 0.26, "grad_norm": 0.9488445041446063, "learning_rate": 4.9492292455615565e-05, "loss": 0.9902, "step": 2200 }, { "epoch": 0.26, "grad_norm": 1.1884658052352395, "learning_rate": 4.9489989959219986e-05, "loss": 0.9966, "step": 2205 }, { "epoch": 0.26, "grad_norm": 1.0515928059240325, "learning_rate": 4.9487682307424985e-05, "loss": 1.0284, "step": 2210 }, { "epoch": 0.26, "grad_norm": 0.9705709426071323, "learning_rate": 4.9485369500716365e-05, "loss": 0.9415, "step": 2215 }, { "epoch": 0.26, "grad_norm": 1.4392749705612937, "learning_rate": 4.948305153958098e-05, "loss": 0.9081, "step": 2220 }, { "epoch": 0.26, "grad_norm": 1.0914357539028203, "learning_rate": 4.948072842450678e-05, "loss": 1.0203, "step": 2225 }, { "epoch": 0.26, "grad_norm": 0.9517975268601816, "learning_rate": 4.9478400155982815e-05, "loss": 0.9065, "step": 2230 }, { "epoch": 0.26, "grad_norm": 0.9857692625038519, "learning_rate": 4.947606673449922e-05, "loss": 0.9281, "step": 2235 }, { "epoch": 0.26, "grad_norm": 1.1965974391031806, "learning_rate": 4.9473728160547184e-05, "loss": 0.9814, "step": 2240 }, { "epoch": 0.26, "grad_norm": 0.931918323998934, "learning_rate": 4.947138443461901e-05, "loss": 0.9632, "step": 2245 }, { "epoch": 0.26, "grad_norm": 1.2424534680701529, "learning_rate": 4.9469035557208074e-05, "loss": 0.8815, "step": 2250 }, { "epoch": 0.26, "grad_norm": 1.0804964338036775, "learning_rate": 4.946668152880884e-05, "loss": 1.0169, "step": 2255 }, { "epoch": 0.26, "grad_norm": 1.0050442196221274, "learning_rate": 4.9464322349916866e-05, "loss": 0.9392, "step": 2260 }, { "epoch": 0.26, "grad_norm": 1.0672729123381384, "learning_rate": 4.946195802102878e-05, "loss": 0.9886, "step": 2265 }, { "epoch": 0.27, "grad_norm": 1.1050264427087864, "learning_rate": 4.9459588542642276e-05, "loss": 0.8766, "step": 2270 }, { "epoch": 0.27, "grad_norm": 1.0385687804876034, "learning_rate": 4.9457213915256186e-05, "loss": 0.879, "step": 2275 }, { "epoch": 0.27, "grad_norm": 1.0480805729433877, "learning_rate": 4.9454834139370373e-05, "loss": 0.9988, "step": 2280 }, { "epoch": 0.27, "grad_norm": 1.2752768724008028, "learning_rate": 4.9452449215485814e-05, "loss": 0.8835, "step": 2285 }, { "epoch": 0.27, "grad_norm": 0.9903481586303367, "learning_rate": 4.9450059144104554e-05, "loss": 0.9346, "step": 2290 }, { "epoch": 0.27, "grad_norm": 1.4711399334225232, "learning_rate": 4.944766392572974e-05, "loss": 0.8156, "step": 2295 }, { "epoch": 0.27, "grad_norm": 0.9916844541286457, "learning_rate": 4.9445263560865574e-05, "loss": 0.8965, "step": 2300 }, { "epoch": 0.27, "grad_norm": 1.0361221929417161, "learning_rate": 4.944285805001737e-05, "loss": 1.0175, "step": 2305 }, { "epoch": 0.27, "grad_norm": 1.0799656489302991, "learning_rate": 4.9440447393691505e-05, "loss": 0.9811, "step": 2310 }, { "epoch": 0.27, "grad_norm": 1.1561924038632971, "learning_rate": 4.943803159239546e-05, "loss": 0.9925, "step": 2315 }, { "epoch": 0.27, "grad_norm": 1.0217733016841795, "learning_rate": 4.943561064663777e-05, "loss": 1.0075, "step": 2320 }, { "epoch": 0.27, "grad_norm": 0.9296018093283421, "learning_rate": 4.943318455692808e-05, "loss": 0.9273, "step": 2325 }, { "epoch": 0.27, "grad_norm": 0.947181231161687, "learning_rate": 4.943075332377711e-05, "loss": 1.004, "step": 2330 }, { "epoch": 0.27, "grad_norm": 0.9775957360955153, "learning_rate": 4.942831694769665e-05, "loss": 0.9509, "step": 2335 }, { "epoch": 0.27, "grad_norm": 1.0304155163978852, "learning_rate": 4.942587542919959e-05, "loss": 1.0683, "step": 2340 }, { "epoch": 0.27, "grad_norm": 1.028559900699405, "learning_rate": 4.9423428768799895e-05, "loss": 0.9023, "step": 2345 }, { "epoch": 0.27, "grad_norm": 0.9355822387484952, "learning_rate": 4.942097696701261e-05, "loss": 1.0111, "step": 2350 }, { "epoch": 0.28, "grad_norm": 1.0343094695838864, "learning_rate": 4.941852002435387e-05, "loss": 0.9462, "step": 2355 }, { "epoch": 0.28, "grad_norm": 0.9490484407437914, "learning_rate": 4.941605794134087e-05, "loss": 0.8872, "step": 2360 }, { "epoch": 0.28, "grad_norm": 0.9733407846727499, "learning_rate": 4.9413590718491927e-05, "loss": 0.9536, "step": 2365 }, { "epoch": 0.28, "grad_norm": 1.0885703766584243, "learning_rate": 4.94111183563264e-05, "loss": 0.8668, "step": 2370 }, { "epoch": 0.28, "grad_norm": 1.0291312285654912, "learning_rate": 4.9408640855364766e-05, "loss": 0.9678, "step": 2375 }, { "epoch": 0.28, "grad_norm": 1.0465668480489048, "learning_rate": 4.940615821612854e-05, "loss": 0.8852, "step": 2380 }, { "epoch": 0.28, "grad_norm": 0.9576448277741111, "learning_rate": 4.940367043914037e-05, "loss": 0.8695, "step": 2385 }, { "epoch": 0.28, "grad_norm": 0.9521806529133873, "learning_rate": 4.940117752492394e-05, "loss": 0.9553, "step": 2390 }, { "epoch": 0.28, "grad_norm": 0.9965331170260615, "learning_rate": 4.9398679474004036e-05, "loss": 0.9443, "step": 2395 }, { "epoch": 0.28, "grad_norm": 0.9929617469278481, "learning_rate": 4.939617628690654e-05, "loss": 0.9898, "step": 2400 }, { "epoch": 0.28, "grad_norm": 1.022046218347804, "learning_rate": 4.939366796415838e-05, "loss": 0.9837, "step": 2405 }, { "epoch": 0.28, "grad_norm": 0.9161635143646696, "learning_rate": 4.939115450628759e-05, "loss": 0.8949, "step": 2410 }, { "epoch": 0.28, "grad_norm": 1.001534067477496, "learning_rate": 4.9388635913823276e-05, "loss": 0.9111, "step": 2415 }, { "epoch": 0.28, "grad_norm": 1.1066495535991843, "learning_rate": 4.9386112187295645e-05, "loss": 0.8868, "step": 2420 }, { "epoch": 0.28, "grad_norm": 0.9884705917124958, "learning_rate": 4.938358332723595e-05, "loss": 0.975, "step": 2425 }, { "epoch": 0.28, "grad_norm": 1.072196623371505, "learning_rate": 4.9381049334176556e-05, "loss": 0.9471, "step": 2430 }, { "epoch": 0.28, "grad_norm": 1.0872776991462638, "learning_rate": 4.937851020865088e-05, "loss": 1.0266, "step": 2435 }, { "epoch": 0.29, "grad_norm": 1.0518911165357627, "learning_rate": 4.937596595119345e-05, "loss": 0.9534, "step": 2440 }, { "epoch": 0.29, "grad_norm": 1.0892624142061076, "learning_rate": 4.9373416562339835e-05, "loss": 0.9071, "step": 2445 }, { "epoch": 0.29, "grad_norm": 1.232676411555252, "learning_rate": 4.9370862042626745e-05, "loss": 0.9999, "step": 2450 }, { "epoch": 0.29, "grad_norm": 1.0878922768218178, "learning_rate": 4.936830239259189e-05, "loss": 0.9959, "step": 2455 }, { "epoch": 0.29, "grad_norm": 1.2065797810654832, "learning_rate": 4.936573761277414e-05, "loss": 0.9064, "step": 2460 }, { "epoch": 0.29, "grad_norm": 1.0430843034159922, "learning_rate": 4.936316770371339e-05, "loss": 0.9976, "step": 2465 }, { "epoch": 0.29, "grad_norm": 1.0791384462173006, "learning_rate": 4.936059266595064e-05, "loss": 0.915, "step": 2470 }, { "epoch": 0.29, "grad_norm": 0.9768206190883695, "learning_rate": 4.935801250002795e-05, "loss": 0.9264, "step": 2475 }, { "epoch": 0.29, "grad_norm": 0.9290093480571731, "learning_rate": 4.935542720648849e-05, "loss": 0.9211, "step": 2480 }, { "epoch": 0.29, "grad_norm": 0.9576971219770666, "learning_rate": 4.9352836785876475e-05, "loss": 1.0057, "step": 2485 }, { "epoch": 0.29, "grad_norm": 0.9383325318591361, "learning_rate": 4.9350241238737225e-05, "loss": 0.961, "step": 2490 }, { "epoch": 0.29, "grad_norm": 1.0544191874502755, "learning_rate": 4.934764056561713e-05, "loss": 0.9107, "step": 2495 }, { "epoch": 0.29, "grad_norm": 2.09184960623287, "learning_rate": 4.934503476706366e-05, "loss": 0.8734, "step": 2500 }, { "epoch": 0.29, "grad_norm": 1.626087550208894, "learning_rate": 4.9342423843625354e-05, "loss": 0.8435, "step": 2505 }, { "epoch": 0.29, "grad_norm": 0.9188646760831191, "learning_rate": 4.933980779585183e-05, "loss": 0.8747, "step": 2510 }, { "epoch": 0.29, "grad_norm": 0.9750772856183894, "learning_rate": 4.933718662429383e-05, "loss": 0.8752, "step": 2515 }, { "epoch": 0.29, "grad_norm": 0.9119480198871687, "learning_rate": 4.9334560329503105e-05, "loss": 1.3787, "step": 2520 }, { "epoch": 0.3, "grad_norm": 1.544445095992932, "learning_rate": 4.933192891203253e-05, "loss": 0.945, "step": 2525 }, { "epoch": 0.3, "grad_norm": 1.081224532231963, "learning_rate": 4.9329292372436044e-05, "loss": 0.9645, "step": 2530 }, { "epoch": 0.3, "grad_norm": 0.920134814037843, "learning_rate": 4.932665071126866e-05, "loss": 0.9326, "step": 2535 }, { "epoch": 0.3, "grad_norm": 1.0525978932870466, "learning_rate": 4.932400392908649e-05, "loss": 0.9884, "step": 2540 }, { "epoch": 0.3, "grad_norm": 1.1325733499830895, "learning_rate": 4.93213520264467e-05, "loss": 0.9399, "step": 2545 }, { "epoch": 0.3, "grad_norm": 0.9859079362445832, "learning_rate": 4.9318695003907545e-05, "loss": 0.9954, "step": 2550 }, { "epoch": 0.3, "grad_norm": 1.0165748185591408, "learning_rate": 4.931603286202835e-05, "loss": 0.869, "step": 2555 }, { "epoch": 0.3, "grad_norm": 0.9232422414048324, "learning_rate": 4.931336560136953e-05, "loss": 0.9703, "step": 2560 }, { "epoch": 0.3, "grad_norm": 0.9815186857455945, "learning_rate": 4.931069322249258e-05, "loss": 0.9752, "step": 2565 }, { "epoch": 0.3, "grad_norm": 0.9246440863691903, "learning_rate": 4.930801572596005e-05, "loss": 1.0154, "step": 2570 }, { "epoch": 0.3, "grad_norm": 1.0722734879138376, "learning_rate": 4.930533311233557e-05, "loss": 0.9113, "step": 2575 }, { "epoch": 0.3, "grad_norm": 0.9414323488721418, "learning_rate": 4.930264538218389e-05, "loss": 0.8482, "step": 2580 }, { "epoch": 0.3, "grad_norm": 0.9559687725252016, "learning_rate": 4.929995253607078e-05, "loss": 0.8771, "step": 2585 }, { "epoch": 0.3, "grad_norm": 0.9884286336038881, "learning_rate": 4.929725457456312e-05, "loss": 0.9949, "step": 2590 }, { "epoch": 0.3, "grad_norm": 0.9826661070910763, "learning_rate": 4.929455149822887e-05, "loss": 0.93, "step": 2595 }, { "epoch": 0.3, "grad_norm": 1.0180709164209465, "learning_rate": 4.929184330763704e-05, "loss": 0.9295, "step": 2600 }, { "epoch": 0.3, "grad_norm": 1.0084177859809078, "learning_rate": 4.9289130003357744e-05, "loss": 1.0116, "step": 2605 }, { "epoch": 0.3, "grad_norm": 0.949883707096381, "learning_rate": 4.928641158596215e-05, "loss": 0.8502, "step": 2610 }, { "epoch": 0.31, "grad_norm": 1.1068546345812, "learning_rate": 4.928368805602252e-05, "loss": 1.0092, "step": 2615 }, { "epoch": 0.31, "grad_norm": 0.954898595831912, "learning_rate": 4.9280959414112184e-05, "loss": 0.9578, "step": 2620 }, { "epoch": 0.31, "grad_norm": 0.8662476745851277, "learning_rate": 4.927822566080556e-05, "loss": 0.849, "step": 2625 }, { "epoch": 0.31, "grad_norm": 0.8849945901540008, "learning_rate": 4.927548679667811e-05, "loss": 0.9345, "step": 2630 }, { "epoch": 0.31, "grad_norm": 1.0043542735829336, "learning_rate": 4.9272742822306404e-05, "loss": 0.8519, "step": 2635 }, { "epoch": 0.31, "grad_norm": 1.0100590956998168, "learning_rate": 4.9269993738268084e-05, "loss": 0.9497, "step": 2640 }, { "epoch": 0.31, "grad_norm": 1.082575179811648, "learning_rate": 4.9267239545141855e-05, "loss": 0.9621, "step": 2645 }, { "epoch": 0.31, "grad_norm": 1.0870659005442584, "learning_rate": 4.926448024350751e-05, "loss": 0.9921, "step": 2650 }, { "epoch": 0.31, "grad_norm": 1.118395019353692, "learning_rate": 4.92617158339459e-05, "loss": 0.9255, "step": 2655 }, { "epoch": 0.31, "grad_norm": 1.0147065011669647, "learning_rate": 4.9258946317038967e-05, "loss": 0.9186, "step": 2660 }, { "epoch": 0.31, "grad_norm": 0.939720111574292, "learning_rate": 4.9256171693369724e-05, "loss": 0.8523, "step": 2665 }, { "epoch": 0.31, "grad_norm": 1.0430188436900594, "learning_rate": 4.925339196352226e-05, "loss": 0.9459, "step": 2670 }, { "epoch": 0.31, "grad_norm": 0.9607539094185692, "learning_rate": 4.925060712808174e-05, "loss": 0.7983, "step": 2675 }, { "epoch": 0.31, "grad_norm": 1.0311514516159483, "learning_rate": 4.924781718763438e-05, "loss": 0.9423, "step": 2680 }, { "epoch": 0.31, "grad_norm": 0.9630614717407954, "learning_rate": 4.9245022142767526e-05, "loss": 0.9417, "step": 2685 }, { "epoch": 0.31, "grad_norm": 1.0202763718220864, "learning_rate": 4.924222199406954e-05, "loss": 0.8712, "step": 2690 }, { "epoch": 0.31, "grad_norm": 1.0158369048337754, "learning_rate": 4.923941674212989e-05, "loss": 0.8647, "step": 2695 }, { "epoch": 0.32, "grad_norm": 0.9795044626481724, "learning_rate": 4.92366063875391e-05, "loss": 0.9445, "step": 2700 }, { "epoch": 0.32, "grad_norm": 0.9642520944769797, "learning_rate": 4.9233790930888805e-05, "loss": 0.9583, "step": 2705 }, { "epoch": 0.32, "grad_norm": 0.9322141000631244, "learning_rate": 4.923097037277166e-05, "loss": 0.8844, "step": 2710 }, { "epoch": 0.32, "grad_norm": 0.9296945733326111, "learning_rate": 4.9228144713781435e-05, "loss": 0.869, "step": 2715 }, { "epoch": 0.32, "grad_norm": 0.9674562115919055, "learning_rate": 4.9225313954512965e-05, "loss": 0.9717, "step": 2720 }, { "epoch": 0.32, "grad_norm": 1.1958624855560076, "learning_rate": 4.922247809556215e-05, "loss": 0.9778, "step": 2725 }, { "epoch": 0.32, "grad_norm": 0.8583557213963918, "learning_rate": 4.9219637137525963e-05, "loss": 0.9669, "step": 2730 }, { "epoch": 0.32, "grad_norm": 1.1631100570882231, "learning_rate": 4.921679108100246e-05, "loss": 0.8419, "step": 2735 }, { "epoch": 0.32, "grad_norm": 1.084321427743312, "learning_rate": 4.921393992659077e-05, "loss": 1.0262, "step": 2740 }, { "epoch": 0.32, "grad_norm": 1.362756035361668, "learning_rate": 4.921108367489108e-05, "loss": 0.9827, "step": 2745 }, { "epoch": 0.32, "grad_norm": 0.9256269790194894, "learning_rate": 4.920822232650467e-05, "loss": 0.9296, "step": 2750 }, { "epoch": 0.32, "grad_norm": 1.1396124516737005, "learning_rate": 4.9205355882033885e-05, "loss": 0.944, "step": 2755 }, { "epoch": 0.32, "grad_norm": 0.9678768380588751, "learning_rate": 4.9202484342082135e-05, "loss": 0.9527, "step": 2760 }, { "epoch": 0.32, "grad_norm": 0.9661086011155546, "learning_rate": 4.919960770725391e-05, "loss": 0.8902, "step": 2765 }, { "epoch": 0.32, "grad_norm": 1.3251674666991091, "learning_rate": 4.919672597815478e-05, "loss": 0.999, "step": 2770 }, { "epoch": 0.32, "grad_norm": 0.9726361573108259, "learning_rate": 4.919383915539136e-05, "loss": 0.9536, "step": 2775 }, { "epoch": 0.32, "grad_norm": 0.90487447873456, "learning_rate": 4.919094723957138e-05, "loss": 0.9986, "step": 2780 }, { "epoch": 0.33, "grad_norm": 0.9626231464935668, "learning_rate": 4.9188050231303604e-05, "loss": 0.9595, "step": 2785 }, { "epoch": 0.33, "grad_norm": 0.9374856877133149, "learning_rate": 4.918514813119789e-05, "loss": 0.916, "step": 2790 }, { "epoch": 0.33, "grad_norm": 0.9561303093764307, "learning_rate": 4.918224093986516e-05, "loss": 0.8875, "step": 2795 }, { "epoch": 0.33, "grad_norm": 1.4089821387226624, "learning_rate": 4.917932865791739e-05, "loss": 0.967, "step": 2800 }, { "epoch": 0.33, "grad_norm": 0.9469865846642878, "learning_rate": 4.917641128596767e-05, "loss": 0.9069, "step": 2805 }, { "epoch": 0.33, "grad_norm": 0.9949370084932259, "learning_rate": 4.917348882463013e-05, "loss": 1.0126, "step": 2810 }, { "epoch": 0.33, "grad_norm": 1.200759951729825, "learning_rate": 4.9170561274519976e-05, "loss": 0.9478, "step": 2815 }, { "epoch": 0.33, "grad_norm": 0.9928901061210181, "learning_rate": 4.916762863625349e-05, "loss": 0.8878, "step": 2820 }, { "epoch": 0.33, "grad_norm": 0.9582105762009233, "learning_rate": 4.916469091044803e-05, "loss": 0.9834, "step": 2825 }, { "epoch": 0.33, "grad_norm": 1.1540783618074437, "learning_rate": 4.916174809772199e-05, "loss": 0.8523, "step": 2830 }, { "epoch": 0.33, "grad_norm": 1.0531775113786088, "learning_rate": 4.9158800198694896e-05, "loss": 0.9445, "step": 2835 }, { "epoch": 0.33, "grad_norm": 1.073871804610859, "learning_rate": 4.9155847213987305e-05, "loss": 0.8917, "step": 2840 }, { "epoch": 0.33, "grad_norm": 1.1677878364422891, "learning_rate": 4.9152889144220835e-05, "loss": 0.8572, "step": 2845 }, { "epoch": 0.33, "grad_norm": 0.9954259885389227, "learning_rate": 4.9149925990018204e-05, "loss": 0.9245, "step": 2850 }, { "epoch": 0.33, "grad_norm": 1.1752106055774592, "learning_rate": 4.914695775200318e-05, "loss": 0.9651, "step": 2855 }, { "epoch": 0.33, "grad_norm": 1.1768655767032141, "learning_rate": 4.914398443080062e-05, "loss": 1.0641, "step": 2860 }, { "epoch": 0.33, "grad_norm": 0.9555298034078296, "learning_rate": 4.9141006027036417e-05, "loss": 0.9058, "step": 2865 }, { "epoch": 0.34, "grad_norm": 0.9715196417521234, "learning_rate": 4.913802254133758e-05, "loss": 0.9832, "step": 2870 }, { "epoch": 0.34, "grad_norm": 0.9869689106405393, "learning_rate": 4.913503397433215e-05, "loss": 0.9757, "step": 2875 }, { "epoch": 0.34, "grad_norm": 1.0222448994283775, "learning_rate": 4.913204032664926e-05, "loss": 0.94, "step": 2880 }, { "epoch": 0.34, "grad_norm": 1.3685013944103575, "learning_rate": 4.91290415989191e-05, "loss": 1.015, "step": 2885 }, { "epoch": 0.34, "grad_norm": 1.0819621741081182, "learning_rate": 4.9126037791772936e-05, "loss": 0.9726, "step": 2890 }, { "epoch": 0.34, "grad_norm": 0.9188569988661278, "learning_rate": 4.9123028905843096e-05, "loss": 0.8222, "step": 2895 }, { "epoch": 0.34, "grad_norm": 0.9847904938147446, "learning_rate": 4.912001494176298e-05, "loss": 0.9818, "step": 2900 }, { "epoch": 0.34, "grad_norm": 0.9722499551191345, "learning_rate": 4.911699590016707e-05, "loss": 0.9852, "step": 2905 }, { "epoch": 0.34, "grad_norm": 0.9794396170425321, "learning_rate": 4.911397178169089e-05, "loss": 0.8712, "step": 2910 }, { "epoch": 0.34, "grad_norm": 0.9277676450289588, "learning_rate": 4.9110942586971063e-05, "loss": 0.8835, "step": 2915 }, { "epoch": 0.34, "grad_norm": 0.9652657650925404, "learning_rate": 4.910790831664527e-05, "loss": 0.9498, "step": 2920 }, { "epoch": 0.34, "grad_norm": 1.0296350726273003, "learning_rate": 4.9104868971352234e-05, "loss": 0.9344, "step": 2925 }, { "epoch": 0.34, "grad_norm": 1.151515006554475, "learning_rate": 4.9101824551731794e-05, "loss": 0.8435, "step": 2930 }, { "epoch": 0.34, "grad_norm": 1.0499957918181868, "learning_rate": 4.909877505842482e-05, "loss": 0.9727, "step": 2935 }, { "epoch": 0.34, "grad_norm": 1.083112909131835, "learning_rate": 4.909572049207326e-05, "loss": 0.8516, "step": 2940 }, { "epoch": 0.34, "grad_norm": 1.075195671744196, "learning_rate": 4.9092660853320136e-05, "loss": 0.9314, "step": 2945 }, { "epoch": 0.34, "grad_norm": 0.9238139779363551, "learning_rate": 4.908959614280953e-05, "loss": 0.9429, "step": 2950 }, { "epoch": 0.35, "grad_norm": 1.0908803571726293, "learning_rate": 4.908652636118661e-05, "loss": 0.8782, "step": 2955 }, { "epoch": 0.35, "grad_norm": 0.9479817659551019, "learning_rate": 4.908345150909758e-05, "loss": 0.9838, "step": 2960 }, { "epoch": 0.35, "grad_norm": 0.9766407253343226, "learning_rate": 4.9080371587189733e-05, "loss": 0.8407, "step": 2965 }, { "epoch": 0.35, "grad_norm": 1.011893092811412, "learning_rate": 4.9077286596111424e-05, "loss": 0.8957, "step": 2970 }, { "epoch": 0.35, "grad_norm": 0.9792082715502184, "learning_rate": 4.9074196536512086e-05, "loss": 1.1728, "step": 2975 }, { "epoch": 0.35, "grad_norm": 0.9113771488229973, "learning_rate": 4.90711014090422e-05, "loss": 0.9546, "step": 2980 }, { "epoch": 0.35, "grad_norm": 1.0896247441785774, "learning_rate": 4.906800121435332e-05, "loss": 0.9244, "step": 2985 }, { "epoch": 0.35, "grad_norm": 1.08651788115613, "learning_rate": 4.9064895953098086e-05, "loss": 0.9743, "step": 2990 }, { "epoch": 0.35, "grad_norm": 0.9835695542501453, "learning_rate": 4.9061785625930166e-05, "loss": 0.8719, "step": 2995 }, { "epoch": 0.35, "grad_norm": 1.0188446261250015, "learning_rate": 4.9058670233504334e-05, "loss": 0.8452, "step": 3000 }, { "epoch": 0.35, "grad_norm": 0.8559846064638893, "learning_rate": 4.9055549776476396e-05, "loss": 0.871, "step": 3005 }, { "epoch": 0.35, "grad_norm": 0.9977916618614925, "learning_rate": 4.905242425550326e-05, "loss": 0.8807, "step": 3010 }, { "epoch": 0.35, "grad_norm": 0.9105788014199488, "learning_rate": 4.904929367124286e-05, "loss": 0.8189, "step": 3015 }, { "epoch": 0.35, "grad_norm": 1.0117513405476202, "learning_rate": 4.904615802435423e-05, "loss": 0.9234, "step": 3020 }, { "epoch": 0.35, "grad_norm": 1.0312616631987472, "learning_rate": 4.904301731549746e-05, "loss": 0.8749, "step": 3025 }, { "epoch": 0.35, "grad_norm": 1.0374045678241164, "learning_rate": 4.9039871545333695e-05, "loss": 0.9879, "step": 3030 }, { "epoch": 0.35, "grad_norm": 0.9251833838287623, "learning_rate": 4.903672071452515e-05, "loss": 0.8905, "step": 3035 }, { "epoch": 0.36, "grad_norm": 0.9808020454280866, "learning_rate": 4.9033564823735115e-05, "loss": 0.9036, "step": 3040 }, { "epoch": 0.36, "grad_norm": 0.8634500576183282, "learning_rate": 4.903040387362794e-05, "loss": 0.937, "step": 3045 }, { "epoch": 0.36, "grad_norm": 1.0030832358770914, "learning_rate": 4.902723786486903e-05, "loss": 0.9796, "step": 3050 }, { "epoch": 0.36, "grad_norm": 1.07901100788446, "learning_rate": 4.9024066798124856e-05, "loss": 0.8604, "step": 3055 }, { "epoch": 0.36, "grad_norm": 1.027171957887921, "learning_rate": 4.902089067406298e-05, "loss": 0.8031, "step": 3060 }, { "epoch": 0.36, "grad_norm": 0.9953708063939386, "learning_rate": 4.9017709493352e-05, "loss": 0.9195, "step": 3065 }, { "epoch": 0.36, "grad_norm": 1.158676130786448, "learning_rate": 4.901452325666159e-05, "loss": 0.9301, "step": 3070 }, { "epoch": 0.36, "grad_norm": 1.0047225791013188, "learning_rate": 4.9011331964662474e-05, "loss": 0.9188, "step": 3075 }, { "epoch": 0.36, "grad_norm": 1.017150424414422, "learning_rate": 4.9008135618026475e-05, "loss": 0.9713, "step": 3080 }, { "epoch": 0.36, "grad_norm": 1.0631614941745693, "learning_rate": 4.900493421742644e-05, "loss": 0.8969, "step": 3085 }, { "epoch": 0.36, "grad_norm": 1.1453441720230393, "learning_rate": 4.9001727763536304e-05, "loss": 0.9167, "step": 3090 }, { "epoch": 0.36, "grad_norm": 0.962536915609097, "learning_rate": 4.8998516257031056e-05, "loss": 0.912, "step": 3095 }, { "epoch": 0.36, "grad_norm": 0.9384833307069022, "learning_rate": 4.899529969858675e-05, "loss": 0.8796, "step": 3100 }, { "epoch": 0.36, "grad_norm": 0.9738294990297953, "learning_rate": 4.899207808888051e-05, "loss": 0.8597, "step": 3105 }, { "epoch": 0.36, "grad_norm": 0.9731458004211785, "learning_rate": 4.898885142859052e-05, "loss": 0.8853, "step": 3110 }, { "epoch": 0.36, "grad_norm": 0.9671123438525385, "learning_rate": 4.8985619718396025e-05, "loss": 0.9604, "step": 3115 }, { "epoch": 0.36, "grad_norm": 1.1739490131600088, "learning_rate": 4.8982382958977334e-05, "loss": 0.9938, "step": 3120 }, { "epoch": 0.37, "grad_norm": 0.9570476646533199, "learning_rate": 4.897914115101581e-05, "loss": 0.9317, "step": 3125 }, { "epoch": 0.37, "grad_norm": 0.9875332516891884, "learning_rate": 4.89758942951939e-05, "loss": 0.9771, "step": 3130 }, { "epoch": 0.37, "grad_norm": 1.0100905293852893, "learning_rate": 4.897264239219509e-05, "loss": 1.0268, "step": 3135 }, { "epoch": 0.37, "grad_norm": 1.0275794718432414, "learning_rate": 4.896938544270394e-05, "loss": 0.9127, "step": 3140 }, { "epoch": 0.37, "grad_norm": 1.0412756847375793, "learning_rate": 4.8966123447406084e-05, "loss": 0.8887, "step": 3145 }, { "epoch": 0.37, "grad_norm": 0.987992081513018, "learning_rate": 4.89628564069882e-05, "loss": 0.9161, "step": 3150 }, { "epoch": 0.37, "grad_norm": 0.8437278171894443, "learning_rate": 4.895958432213803e-05, "loss": 0.8649, "step": 3155 }, { "epoch": 0.37, "grad_norm": 1.1196960218204823, "learning_rate": 4.8956307193544396e-05, "loss": 1.0025, "step": 3160 }, { "epoch": 0.37, "grad_norm": 1.0905613402252354, "learning_rate": 4.895302502189715e-05, "loss": 0.9304, "step": 3165 }, { "epoch": 0.37, "grad_norm": 0.9790350423791192, "learning_rate": 4.894973780788722e-05, "loss": 0.9091, "step": 3170 }, { "epoch": 0.37, "grad_norm": 0.951724969988976, "learning_rate": 4.894644555220662e-05, "loss": 0.9091, "step": 3175 }, { "epoch": 0.37, "grad_norm": 0.8808331608565764, "learning_rate": 4.89431482555484e-05, "loss": 0.8943, "step": 3180 }, { "epoch": 0.37, "grad_norm": 1.0398444619224043, "learning_rate": 4.893984591860666e-05, "loss": 0.9017, "step": 3185 }, { "epoch": 0.37, "grad_norm": 1.1761590418893446, "learning_rate": 4.8936538542076596e-05, "loss": 0.914, "step": 3190 }, { "epoch": 0.37, "grad_norm": 1.1598503289608948, "learning_rate": 4.893322612665442e-05, "loss": 0.899, "step": 3195 }, { "epoch": 0.37, "grad_norm": 1.1788779225336012, "learning_rate": 4.892990867303746e-05, "loss": 0.849, "step": 3200 }, { "epoch": 0.37, "grad_norm": 1.09993036242583, "learning_rate": 4.892658618192406e-05, "loss": 1.0156, "step": 3205 }, { "epoch": 0.38, "grad_norm": 0.9245927037778428, "learning_rate": 4.8923258654013625e-05, "loss": 0.7657, "step": 3210 }, { "epoch": 0.38, "grad_norm": 1.1620986771023685, "learning_rate": 4.891992609000666e-05, "loss": 0.9115, "step": 3215 }, { "epoch": 0.38, "grad_norm": 0.960263948912723, "learning_rate": 4.89165884906047e-05, "loss": 0.8494, "step": 3220 }, { "epoch": 0.38, "grad_norm": 2.068493760356761, "learning_rate": 4.8913245856510325e-05, "loss": 0.8763, "step": 3225 }, { "epoch": 0.38, "grad_norm": 1.323625170395326, "learning_rate": 4.8909898188427214e-05, "loss": 0.8659, "step": 3230 }, { "epoch": 0.38, "grad_norm": 1.0375389700327822, "learning_rate": 4.8906545487060086e-05, "loss": 0.8975, "step": 3235 }, { "epoch": 0.38, "grad_norm": 1.3120119678153748, "learning_rate": 4.890318775311471e-05, "loss": 0.9708, "step": 3240 }, { "epoch": 0.38, "grad_norm": 1.0162533650075878, "learning_rate": 4.8899824987297925e-05, "loss": 0.9788, "step": 3245 }, { "epoch": 0.38, "grad_norm": 1.0115847758646506, "learning_rate": 4.8896457190317635e-05, "loss": 0.9105, "step": 3250 }, { "epoch": 0.38, "grad_norm": 0.9620865967537976, "learning_rate": 4.889308436288279e-05, "loss": 0.9193, "step": 3255 }, { "epoch": 0.38, "grad_norm": 0.895107993395294, "learning_rate": 4.8889706505703407e-05, "loss": 0.865, "step": 3260 }, { "epoch": 0.38, "grad_norm": 0.9613138314308484, "learning_rate": 4.8886323619490566e-05, "loss": 0.936, "step": 3265 }, { "epoch": 0.38, "grad_norm": 0.8906143795908595, "learning_rate": 4.888293570495639e-05, "loss": 0.8952, "step": 3270 }, { "epoch": 0.38, "grad_norm": 0.9234029374816892, "learning_rate": 4.887954276281409e-05, "loss": 0.962, "step": 3275 }, { "epoch": 0.38, "grad_norm": 0.9402627059485674, "learning_rate": 4.887614479377789e-05, "loss": 0.9338, "step": 3280 }, { "epoch": 0.38, "grad_norm": 0.9667260247135531, "learning_rate": 4.88727417985631e-05, "loss": 0.89, "step": 3285 }, { "epoch": 0.38, "grad_norm": 0.8707815571078674, "learning_rate": 4.886933377788612e-05, "loss": 0.8951, "step": 3290 }, { "epoch": 0.38, "grad_norm": 0.9174998711832358, "learning_rate": 4.886592073246434e-05, "loss": 0.8957, "step": 3295 }, { "epoch": 0.39, "grad_norm": 0.8704654666554288, "learning_rate": 4.886250266301625e-05, "loss": 0.9659, "step": 3300 }, { "epoch": 0.39, "grad_norm": 0.923399218666802, "learning_rate": 4.885907957026139e-05, "loss": 0.9264, "step": 3305 }, { "epoch": 0.39, "grad_norm": 0.9726739535206362, "learning_rate": 4.885565145492036e-05, "loss": 0.9814, "step": 3310 }, { "epoch": 0.39, "grad_norm": 0.9081310006502049, "learning_rate": 4.885221831771482e-05, "loss": 0.8522, "step": 3315 }, { "epoch": 0.39, "grad_norm": 1.0783138772420493, "learning_rate": 4.884878015936747e-05, "loss": 0.9699, "step": 3320 }, { "epoch": 0.39, "grad_norm": 0.9963254199382147, "learning_rate": 4.884533698060208e-05, "loss": 1.0089, "step": 3325 }, { "epoch": 0.39, "grad_norm": 1.2552027299542712, "learning_rate": 4.884188878214347e-05, "loss": 0.8451, "step": 3330 }, { "epoch": 0.39, "grad_norm": 0.9285096007241638, "learning_rate": 4.883843556471754e-05, "loss": 0.8354, "step": 3335 }, { "epoch": 0.39, "grad_norm": 0.8832948838268647, "learning_rate": 4.8834977329051225e-05, "loss": 0.9583, "step": 3340 }, { "epoch": 0.39, "grad_norm": 0.8645112372948729, "learning_rate": 4.8831514075872495e-05, "loss": 0.934, "step": 3345 }, { "epoch": 0.39, "grad_norm": 1.0339029443434846, "learning_rate": 4.882804580591043e-05, "loss": 0.9797, "step": 3350 }, { "epoch": 0.39, "grad_norm": 1.0538097924029293, "learning_rate": 4.882457251989513e-05, "loss": 0.9737, "step": 3355 }, { "epoch": 0.39, "grad_norm": 1.1217270662925884, "learning_rate": 4.882109421855775e-05, "loss": 0.9868, "step": 3360 }, { "epoch": 0.39, "grad_norm": 0.9589544546099569, "learning_rate": 4.881761090263051e-05, "loss": 0.995, "step": 3365 }, { "epoch": 0.39, "grad_norm": 1.045924708142707, "learning_rate": 4.8814122572846686e-05, "loss": 0.8722, "step": 3370 }, { "epoch": 0.39, "grad_norm": 1.042472714033629, "learning_rate": 4.881062922994061e-05, "loss": 0.9242, "step": 3375 }, { "epoch": 0.39, "grad_norm": 0.8493291573005518, "learning_rate": 4.880713087464767e-05, "loss": 0.8903, "step": 3380 }, { "epoch": 0.4, "grad_norm": 0.9657697879521427, "learning_rate": 4.88036275077043e-05, "loss": 0.9369, "step": 3385 }, { "epoch": 0.4, "grad_norm": 0.9737366223994887, "learning_rate": 4.8800119129848e-05, "loss": 1.0918, "step": 3390 }, { "epoch": 0.4, "grad_norm": 0.9672779862477917, "learning_rate": 4.879660574181733e-05, "loss": 0.9547, "step": 3395 }, { "epoch": 0.4, "grad_norm": 0.9281581147091152, "learning_rate": 4.879308734435186e-05, "loss": 0.8319, "step": 3400 }, { "epoch": 0.4, "grad_norm": 0.9629420634129155, "learning_rate": 4.878956393819229e-05, "loss": 0.896, "step": 3405 }, { "epoch": 0.4, "grad_norm": 0.9143774121504513, "learning_rate": 4.878603552408032e-05, "loss": 0.9587, "step": 3410 }, { "epoch": 0.4, "grad_norm": 0.921423590452113, "learning_rate": 4.8782502102758705e-05, "loss": 0.9646, "step": 3415 }, { "epoch": 0.4, "grad_norm": 0.9414297550730459, "learning_rate": 4.8778963674971276e-05, "loss": 0.9735, "step": 3420 }, { "epoch": 0.4, "grad_norm": 1.035230794090253, "learning_rate": 4.877542024146292e-05, "loss": 0.8706, "step": 3425 }, { "epoch": 0.4, "grad_norm": 0.9719924579135688, "learning_rate": 4.877187180297955e-05, "loss": 0.9009, "step": 3430 }, { "epoch": 0.4, "grad_norm": 0.9060580029938262, "learning_rate": 4.876831836026816e-05, "loss": 0.9422, "step": 3435 }, { "epoch": 0.4, "grad_norm": 1.0369044673459387, "learning_rate": 4.876475991407679e-05, "loss": 0.942, "step": 3440 }, { "epoch": 0.4, "grad_norm": 1.1387762914430315, "learning_rate": 4.876119646515452e-05, "loss": 0.8789, "step": 3445 }, { "epoch": 0.4, "grad_norm": 1.037051145404901, "learning_rate": 4.875762801425149e-05, "loss": 0.9871, "step": 3450 }, { "epoch": 0.4, "grad_norm": 1.4205212192870267, "learning_rate": 4.875405456211891e-05, "loss": 0.9601, "step": 3455 }, { "epoch": 0.4, "grad_norm": 0.9113096528951976, "learning_rate": 4.875047610950903e-05, "loss": 0.8599, "step": 3460 }, { "epoch": 0.4, "grad_norm": 0.938545710586039, "learning_rate": 4.874689265717514e-05, "loss": 0.8487, "step": 3465 }, { "epoch": 0.41, "grad_norm": 0.9578703197917051, "learning_rate": 4.874330420587159e-05, "loss": 0.9043, "step": 3470 }, { "epoch": 0.41, "grad_norm": 0.8878961559275605, "learning_rate": 4.8739710756353806e-05, "loss": 0.8568, "step": 3475 }, { "epoch": 0.41, "grad_norm": 1.0774414056263133, "learning_rate": 4.873611230937824e-05, "loss": 0.9369, "step": 3480 }, { "epoch": 0.41, "grad_norm": 0.848770373189615, "learning_rate": 4.8732508865702383e-05, "loss": 0.9162, "step": 3485 }, { "epoch": 0.41, "grad_norm": 1.1187796611034349, "learning_rate": 4.872890042608482e-05, "loss": 0.9479, "step": 3490 }, { "epoch": 0.41, "grad_norm": 0.9799458708066113, "learning_rate": 4.8725286991285166e-05, "loss": 0.918, "step": 3495 }, { "epoch": 0.41, "grad_norm": 0.9758562791483246, "learning_rate": 4.872166856206407e-05, "loss": 0.8546, "step": 3500 }, { "epoch": 0.41, "grad_norm": 0.9417872835004778, "learning_rate": 4.871804513918327e-05, "loss": 0.8721, "step": 3505 }, { "epoch": 0.41, "grad_norm": 0.9778738679536334, "learning_rate": 4.871441672340551e-05, "loss": 1.0115, "step": 3510 }, { "epoch": 0.41, "grad_norm": 0.9285846263492525, "learning_rate": 4.8710783315494634e-05, "loss": 0.8275, "step": 3515 }, { "epoch": 0.41, "grad_norm": 1.014052487657275, "learning_rate": 4.8707144916215495e-05, "loss": 0.9113, "step": 3520 }, { "epoch": 0.41, "grad_norm": 0.9787743356118391, "learning_rate": 4.870350152633403e-05, "loss": 0.8502, "step": 3525 }, { "epoch": 0.41, "grad_norm": 1.2207431321000475, "learning_rate": 4.86998531466172e-05, "loss": 0.9187, "step": 3530 }, { "epoch": 0.41, "grad_norm": 1.1431518242301262, "learning_rate": 4.869619977783302e-05, "loss": 0.8886, "step": 3535 }, { "epoch": 0.41, "grad_norm": 0.9623796712042414, "learning_rate": 4.869254142075057e-05, "loss": 0.9358, "step": 3540 }, { "epoch": 0.41, "grad_norm": 0.9647785006447487, "learning_rate": 4.868887807613999e-05, "loss": 0.8993, "step": 3545 }, { "epoch": 0.41, "grad_norm": 0.992140586359378, "learning_rate": 4.868520974477242e-05, "loss": 0.9473, "step": 3550 }, { "epoch": 0.42, "grad_norm": 1.1316075323489232, "learning_rate": 4.8681536427420106e-05, "loss": 0.8848, "step": 3555 }, { "epoch": 0.42, "grad_norm": 1.002368008144341, "learning_rate": 4.8677858124856304e-05, "loss": 0.9134, "step": 3560 }, { "epoch": 0.42, "grad_norm": 0.9906151117851623, "learning_rate": 4.867417483785534e-05, "loss": 0.9978, "step": 3565 }, { "epoch": 0.42, "grad_norm": 0.9840590733033028, "learning_rate": 4.867048656719261e-05, "loss": 0.7565, "step": 3570 }, { "epoch": 0.42, "grad_norm": 0.9736154818193262, "learning_rate": 4.866679331364449e-05, "loss": 0.9821, "step": 3575 }, { "epoch": 0.42, "grad_norm": 0.9857477854974904, "learning_rate": 4.8663095077988476e-05, "loss": 0.8801, "step": 3580 }, { "epoch": 0.42, "grad_norm": 0.9848764944902861, "learning_rate": 4.865939186100308e-05, "loss": 0.9331, "step": 3585 }, { "epoch": 0.42, "grad_norm": 0.868983713213606, "learning_rate": 4.8655683663467866e-05, "loss": 0.9015, "step": 3590 }, { "epoch": 0.42, "grad_norm": 0.9097275723858047, "learning_rate": 4.865197048616345e-05, "loss": 0.9275, "step": 3595 }, { "epoch": 0.42, "grad_norm": 0.9232703938802409, "learning_rate": 4.8648252329871494e-05, "loss": 0.9712, "step": 3600 }, { "epoch": 0.42, "grad_norm": 1.0454061209137948, "learning_rate": 4.864452919537471e-05, "loss": 0.8645, "step": 3605 }, { "epoch": 0.42, "grad_norm": 0.9503720222629888, "learning_rate": 4.8640801083456855e-05, "loss": 0.9811, "step": 3610 }, { "epoch": 0.42, "grad_norm": 1.002101452538871, "learning_rate": 4.863706799490273e-05, "loss": 0.97, "step": 3615 }, { "epoch": 0.42, "grad_norm": 1.0758078400103814, "learning_rate": 4.8633329930498205e-05, "loss": 0.9431, "step": 3620 }, { "epoch": 0.42, "grad_norm": 1.1113795208235766, "learning_rate": 4.862958689103017e-05, "loss": 0.9679, "step": 3625 }, { "epoch": 0.42, "grad_norm": 1.3153025648107326, "learning_rate": 4.862583887728658e-05, "loss": 0.8778, "step": 3630 }, { "epoch": 0.42, "grad_norm": 1.0315773528641883, "learning_rate": 4.862208589005642e-05, "loss": 0.9315, "step": 3635 }, { "epoch": 0.43, "grad_norm": 1.2002600160762993, "learning_rate": 4.861832793012975e-05, "loss": 0.9482, "step": 3640 }, { "epoch": 0.43, "grad_norm": 1.015632637034598, "learning_rate": 4.861456499829764e-05, "loss": 0.9335, "step": 3645 }, { "epoch": 0.43, "grad_norm": 0.7865503244245898, "learning_rate": 4.8610797095352244e-05, "loss": 0.9218, "step": 3650 }, { "epoch": 0.43, "grad_norm": 0.93604473107611, "learning_rate": 4.8607024222086736e-05, "loss": 0.8848, "step": 3655 }, { "epoch": 0.43, "grad_norm": 0.9335809782245598, "learning_rate": 4.8603246379295345e-05, "loss": 0.8826, "step": 3660 }, { "epoch": 0.43, "grad_norm": 0.8557833747486344, "learning_rate": 4.8599463567773354e-05, "loss": 0.8693, "step": 3665 }, { "epoch": 0.43, "grad_norm": 1.2620023598165786, "learning_rate": 4.859567578831708e-05, "loss": 0.9407, "step": 3670 }, { "epoch": 0.43, "grad_norm": 0.9554266700919634, "learning_rate": 4.8591883041723886e-05, "loss": 0.8954, "step": 3675 }, { "epoch": 0.43, "grad_norm": 0.9436274921228281, "learning_rate": 4.8588085328792185e-05, "loss": 0.9836, "step": 3680 }, { "epoch": 0.43, "grad_norm": 1.1258174269715784, "learning_rate": 4.858428265032145e-05, "loss": 0.9217, "step": 3685 }, { "epoch": 0.43, "grad_norm": 0.9403918675581807, "learning_rate": 4.858047500711217e-05, "loss": 0.9027, "step": 3690 }, { "epoch": 0.43, "grad_norm": 0.9414728823176005, "learning_rate": 4.857666239996589e-05, "loss": 0.9704, "step": 3695 }, { "epoch": 0.43, "grad_norm": 0.8516341972896481, "learning_rate": 4.8572844829685215e-05, "loss": 0.9258, "step": 3700 }, { "epoch": 0.43, "grad_norm": 0.9603444658951118, "learning_rate": 4.856902229707378e-05, "loss": 0.864, "step": 3705 }, { "epoch": 0.43, "grad_norm": 0.8887753326166056, "learning_rate": 4.8565194802936274e-05, "loss": 0.8712, "step": 3710 }, { "epoch": 0.43, "grad_norm": 0.9899098582615348, "learning_rate": 4.8561362348078406e-05, "loss": 0.9132, "step": 3715 }, { "epoch": 0.43, "grad_norm": 1.0404052953709504, "learning_rate": 4.8557524933306976e-05, "loss": 0.8879, "step": 3720 }, { "epoch": 0.44, "grad_norm": 1.15102975947701, "learning_rate": 4.8553682559429774e-05, "loss": 1.0017, "step": 3725 }, { "epoch": 0.44, "grad_norm": 0.9237196737475427, "learning_rate": 4.854983522725568e-05, "loss": 0.9869, "step": 3730 }, { "epoch": 0.44, "grad_norm": 0.8964349023778845, "learning_rate": 4.8545982937594576e-05, "loss": 0.9358, "step": 3735 }, { "epoch": 0.44, "grad_norm": 0.9484986614369278, "learning_rate": 4.854212569125742e-05, "loss": 0.904, "step": 3740 }, { "epoch": 0.44, "grad_norm": 1.0694168360652927, "learning_rate": 4.853826348905621e-05, "loss": 0.9153, "step": 3745 }, { "epoch": 0.44, "grad_norm": 0.9569871125779592, "learning_rate": 4.853439633180398e-05, "loss": 0.9858, "step": 3750 }, { "epoch": 0.44, "grad_norm": 1.1165116866806482, "learning_rate": 4.85305242203148e-05, "loss": 0.9694, "step": 3755 }, { "epoch": 0.44, "grad_norm": 1.0063975761554997, "learning_rate": 4.852664715540378e-05, "loss": 0.9284, "step": 3760 }, { "epoch": 0.44, "grad_norm": 0.9712906839689703, "learning_rate": 4.8522765137887104e-05, "loss": 0.993, "step": 3765 }, { "epoch": 0.44, "grad_norm": 0.9378860687864021, "learning_rate": 4.851887816858196e-05, "loss": 0.8751, "step": 3770 }, { "epoch": 0.44, "grad_norm": 1.0293388298169437, "learning_rate": 4.851498624830661e-05, "loss": 0.9277, "step": 3775 }, { "epoch": 0.44, "grad_norm": 0.964803385452915, "learning_rate": 4.851108937788034e-05, "loss": 0.9359, "step": 3780 }, { "epoch": 0.44, "grad_norm": 0.9552239954787749, "learning_rate": 4.8507187558123466e-05, "loss": 0.9062, "step": 3785 }, { "epoch": 0.44, "grad_norm": 0.9825883172444517, "learning_rate": 4.850328078985739e-05, "loss": 0.8924, "step": 3790 }, { "epoch": 0.44, "grad_norm": 1.4606665057540582, "learning_rate": 4.84993690739045e-05, "loss": 0.9281, "step": 3795 }, { "epoch": 0.44, "grad_norm": 0.8861706171878336, "learning_rate": 4.849545241108827e-05, "loss": 0.8769, "step": 3800 }, { "epoch": 0.44, "grad_norm": 1.0164064103564012, "learning_rate": 4.8491530802233195e-05, "loss": 0.9701, "step": 3805 }, { "epoch": 0.45, "grad_norm": 1.0018902462532118, "learning_rate": 4.848760424816482e-05, "loss": 0.9851, "step": 3810 }, { "epoch": 0.45, "grad_norm": 1.0023656745784852, "learning_rate": 4.848367274970972e-05, "loss": 0.8762, "step": 3815 }, { "epoch": 0.45, "grad_norm": 0.9686707426613198, "learning_rate": 4.8479736307695514e-05, "loss": 0.9294, "step": 3820 }, { "epoch": 0.45, "grad_norm": 0.9814802177232217, "learning_rate": 4.8475794922950855e-05, "loss": 0.9429, "step": 3825 }, { "epoch": 0.45, "grad_norm": 0.9630873007715853, "learning_rate": 4.8471848596305466e-05, "loss": 1.0083, "step": 3830 }, { "epoch": 0.45, "grad_norm": 0.8831676045344481, "learning_rate": 4.846789732859009e-05, "loss": 0.8681, "step": 3835 }, { "epoch": 0.45, "grad_norm": 0.9343165035221371, "learning_rate": 4.84639411206365e-05, "loss": 0.9011, "step": 3840 }, { "epoch": 0.45, "grad_norm": 0.8607232413760687, "learning_rate": 4.845997997327751e-05, "loss": 0.9791, "step": 3845 }, { "epoch": 0.45, "grad_norm": 0.9253966585760002, "learning_rate": 4.8456013887347007e-05, "loss": 1.0433, "step": 3850 }, { "epoch": 0.45, "grad_norm": 1.0537063093602257, "learning_rate": 4.8452042863679867e-05, "loss": 0.9764, "step": 3855 }, { "epoch": 0.45, "grad_norm": 1.0574085332855863, "learning_rate": 4.844806690311206e-05, "loss": 0.911, "step": 3860 }, { "epoch": 0.45, "grad_norm": 0.9166911162048151, "learning_rate": 4.844408600648055e-05, "loss": 0.9462, "step": 3865 }, { "epoch": 0.45, "grad_norm": 0.9639905473307447, "learning_rate": 4.8440100174623354e-05, "loss": 0.9236, "step": 3870 }, { "epoch": 0.45, "grad_norm": 1.185921639828869, "learning_rate": 4.843610940837954e-05, "loss": 0.9108, "step": 3875 }, { "epoch": 0.45, "grad_norm": 0.9631774554939261, "learning_rate": 4.843211370858921e-05, "loss": 0.8965, "step": 3880 }, { "epoch": 0.45, "grad_norm": 1.1035487993577413, "learning_rate": 4.842811307609349e-05, "loss": 0.9307, "step": 3885 }, { "epoch": 0.45, "grad_norm": 0.9889554691312329, "learning_rate": 4.8424107511734564e-05, "loss": 0.881, "step": 3890 }, { "epoch": 0.46, "grad_norm": 32.859193072098826, "learning_rate": 4.8420097016355636e-05, "loss": 1.0766, "step": 3895 }, { "epoch": 0.46, "grad_norm": 0.9531821021610071, "learning_rate": 4.841608159080097e-05, "loss": 0.8961, "step": 3900 }, { "epoch": 0.46, "grad_norm": 1.1455809758226858, "learning_rate": 4.841206123591584e-05, "loss": 0.9413, "step": 3905 }, { "epoch": 0.46, "grad_norm": 0.9362217231198446, "learning_rate": 4.8408035952546585e-05, "loss": 0.944, "step": 3910 }, { "epoch": 0.46, "grad_norm": 0.8591659987270633, "learning_rate": 4.840400574154056e-05, "loss": 0.9388, "step": 3915 }, { "epoch": 0.46, "grad_norm": 0.998473689677426, "learning_rate": 4.839997060374617e-05, "loss": 0.9462, "step": 3920 }, { "epoch": 0.46, "grad_norm": 0.9057666617989911, "learning_rate": 4.8395930540012856e-05, "loss": 0.9196, "step": 3925 }, { "epoch": 0.46, "grad_norm": 0.9816403390756506, "learning_rate": 4.83918855511911e-05, "loss": 0.8476, "step": 3930 }, { "epoch": 0.46, "grad_norm": 0.9620236884016036, "learning_rate": 4.83878356381324e-05, "loss": 0.9618, "step": 3935 }, { "epoch": 0.46, "grad_norm": 0.9579678045099858, "learning_rate": 4.8383780801689316e-05, "loss": 0.8265, "step": 3940 }, { "epoch": 0.46, "grad_norm": 1.0364290194181867, "learning_rate": 4.837972104271543e-05, "loss": 0.9419, "step": 3945 }, { "epoch": 0.46, "grad_norm": 0.9267244060802147, "learning_rate": 4.837565636206536e-05, "loss": 0.9367, "step": 3950 }, { "epoch": 0.46, "grad_norm": 1.0341605920182162, "learning_rate": 4.837158676059478e-05, "loss": 0.8827, "step": 3955 }, { "epoch": 0.46, "grad_norm": 0.9497141268959044, "learning_rate": 4.8367512239160356e-05, "loss": 0.9438, "step": 3960 }, { "epoch": 0.46, "grad_norm": 0.9076414951706951, "learning_rate": 4.8363432798619837e-05, "loss": 0.9458, "step": 3965 }, { "epoch": 0.46, "grad_norm": 0.9072736861592536, "learning_rate": 4.835934843983199e-05, "loss": 0.8802, "step": 3970 }, { "epoch": 0.46, "grad_norm": 1.2747160746450632, "learning_rate": 4.835525916365661e-05, "loss": 0.9166, "step": 3975 }, { "epoch": 0.47, "grad_norm": 1.1995159217567877, "learning_rate": 4.835116497095452e-05, "loss": 1.0289, "step": 3980 }, { "epoch": 0.47, "grad_norm": 1.0060549362142395, "learning_rate": 4.834706586258762e-05, "loss": 0.9123, "step": 3985 }, { "epoch": 0.47, "grad_norm": 1.1419137891389273, "learning_rate": 4.834296183941878e-05, "loss": 0.9299, "step": 3990 }, { "epoch": 0.47, "grad_norm": 1.0463373724405032, "learning_rate": 4.833885290231196e-05, "loss": 0.95, "step": 3995 }, { "epoch": 0.47, "grad_norm": 0.9563839536977482, "learning_rate": 4.833473905213214e-05, "loss": 0.8681, "step": 4000 }, { "epoch": 0.47, "grad_norm": 0.7736234244855686, "learning_rate": 4.833062028974531e-05, "loss": 0.9081, "step": 4005 }, { "epoch": 0.47, "grad_norm": 1.0449183588325297, "learning_rate": 4.832649661601853e-05, "loss": 0.896, "step": 4010 }, { "epoch": 0.47, "grad_norm": 1.050794809840981, "learning_rate": 4.8322368031819866e-05, "loss": 0.9457, "step": 4015 }, { "epoch": 0.47, "grad_norm": 0.8857422867920175, "learning_rate": 4.8318234538018436e-05, "loss": 0.8709, "step": 4020 }, { "epoch": 0.47, "grad_norm": 0.9857144285928388, "learning_rate": 4.831409613548437e-05, "loss": 0.9058, "step": 4025 }, { "epoch": 0.47, "grad_norm": 1.089070194398636, "learning_rate": 4.830995282508886e-05, "loss": 0.9154, "step": 4030 }, { "epoch": 0.47, "grad_norm": 1.100980387334043, "learning_rate": 4.8305804607704105e-05, "loss": 0.9042, "step": 4035 }, { "epoch": 0.47, "grad_norm": 1.1858151008226716, "learning_rate": 4.830165148420337e-05, "loss": 0.927, "step": 4040 }, { "epoch": 0.47, "grad_norm": 1.0286130018827226, "learning_rate": 4.8297493455460895e-05, "loss": 0.954, "step": 4045 }, { "epoch": 0.47, "grad_norm": 1.0712085938152927, "learning_rate": 4.829333052235202e-05, "loss": 0.8351, "step": 4050 }, { "epoch": 0.47, "grad_norm": 0.9593806141720698, "learning_rate": 4.828916268575308e-05, "loss": 0.9098, "step": 4055 }, { "epoch": 0.47, "grad_norm": 0.8772398047170469, "learning_rate": 4.828498994654143e-05, "loss": 0.9087, "step": 4060 }, { "epoch": 0.47, "grad_norm": 0.9115033207309853, "learning_rate": 4.8280812305595494e-05, "loss": 0.8425, "step": 4065 }, { "epoch": 0.48, "grad_norm": 1.038099343316853, "learning_rate": 4.827662976379469e-05, "loss": 0.923, "step": 4070 }, { "epoch": 0.48, "grad_norm": 0.9044012139299706, "learning_rate": 4.8272442322019514e-05, "loss": 0.9471, "step": 4075 }, { "epoch": 0.48, "grad_norm": 0.9995296892135088, "learning_rate": 4.826824998115145e-05, "loss": 0.8989, "step": 4080 }, { "epoch": 0.48, "grad_norm": 0.9668511561268096, "learning_rate": 4.826405274207303e-05, "loss": 0.9637, "step": 4085 }, { "epoch": 0.48, "grad_norm": 0.9369342435071403, "learning_rate": 4.8259850605667824e-05, "loss": 0.9217, "step": 4090 }, { "epoch": 0.48, "grad_norm": 0.8873437758274902, "learning_rate": 4.825564357282041e-05, "loss": 0.9025, "step": 4095 }, { "epoch": 0.48, "grad_norm": 1.093349441361928, "learning_rate": 4.8251431644416434e-05, "loss": 0.8621, "step": 4100 }, { "epoch": 0.48, "grad_norm": 0.905289402143519, "learning_rate": 4.824721482134253e-05, "loss": 0.8614, "step": 4105 }, { "epoch": 0.48, "grad_norm": 1.3170466529933793, "learning_rate": 4.82429931044864e-05, "loss": 0.9297, "step": 4110 }, { "epoch": 0.48, "grad_norm": 0.9723323292609203, "learning_rate": 4.8238766494736756e-05, "loss": 0.8245, "step": 4115 }, { "epoch": 0.48, "grad_norm": 1.1199257421067867, "learning_rate": 4.8234534992983324e-05, "loss": 0.8882, "step": 4120 }, { "epoch": 0.48, "grad_norm": 1.2417072458316452, "learning_rate": 4.823029860011692e-05, "loss": 0.9395, "step": 4125 }, { "epoch": 0.48, "grad_norm": 0.909759254924053, "learning_rate": 4.8226057317029305e-05, "loss": 0.9428, "step": 4130 }, { "epoch": 0.48, "grad_norm": 0.9896126573154411, "learning_rate": 4.822181114461334e-05, "loss": 0.9567, "step": 4135 }, { "epoch": 0.48, "grad_norm": 1.10468734439763, "learning_rate": 4.821756008376289e-05, "loss": 0.8813, "step": 4140 }, { "epoch": 0.48, "grad_norm": 1.0242063783201032, "learning_rate": 4.821330413537283e-05, "loss": 0.9361, "step": 4145 }, { "epoch": 0.48, "grad_norm": 1.0106293242206477, "learning_rate": 4.82090433003391e-05, "loss": 0.9634, "step": 4150 }, { "epoch": 0.49, "grad_norm": 1.013843054008336, "learning_rate": 4.820477757955864e-05, "loss": 0.9401, "step": 4155 }, { "epoch": 0.49, "grad_norm": 1.0293826642364385, "learning_rate": 4.820050697392943e-05, "loss": 0.9863, "step": 4160 }, { "epoch": 0.49, "grad_norm": 0.887095006123414, "learning_rate": 4.819623148435048e-05, "loss": 0.9072, "step": 4165 }, { "epoch": 0.49, "grad_norm": 0.8721552648061809, "learning_rate": 4.8191951111721824e-05, "loss": 0.8849, "step": 4170 }, { "epoch": 0.49, "grad_norm": 1.5979987854315105, "learning_rate": 4.818766585694452e-05, "loss": 0.999, "step": 4175 }, { "epoch": 0.49, "grad_norm": 0.9340425058718178, "learning_rate": 4.818337572092067e-05, "loss": 0.8784, "step": 4180 }, { "epoch": 0.49, "grad_norm": 0.9430409666393136, "learning_rate": 4.8179080704553386e-05, "loss": 0.8564, "step": 4185 }, { "epoch": 0.49, "grad_norm": 0.9335305379224227, "learning_rate": 4.817478080874681e-05, "loss": 0.9266, "step": 4190 }, { "epoch": 0.49, "grad_norm": 0.8789693682753087, "learning_rate": 4.817047603440613e-05, "loss": 0.931, "step": 4195 }, { "epoch": 0.49, "grad_norm": 0.9770230379694931, "learning_rate": 4.816616638243753e-05, "loss": 0.8626, "step": 4200 }, { "epoch": 0.49, "grad_norm": 0.974435575411324, "learning_rate": 4.816185185374824e-05, "loss": 0.9195, "step": 4205 }, { "epoch": 0.49, "grad_norm": 1.0296248792216185, "learning_rate": 4.8157532449246524e-05, "loss": 0.8555, "step": 4210 }, { "epoch": 0.49, "grad_norm": 0.9829890377233504, "learning_rate": 4.8153208169841646e-05, "loss": 0.8907, "step": 4215 }, { "epoch": 0.49, "grad_norm": 1.0199595843789433, "learning_rate": 4.814887901644392e-05, "loss": 0.9138, "step": 4220 }, { "epoch": 0.49, "grad_norm": 0.9120296230173331, "learning_rate": 4.814454498996468e-05, "loss": 0.9015, "step": 4225 }, { "epoch": 0.49, "grad_norm": 0.9967938476619388, "learning_rate": 4.8140206091316284e-05, "loss": 0.9581, "step": 4230 }, { "epoch": 0.49, "grad_norm": 0.9148132145145298, "learning_rate": 4.813586232141212e-05, "loss": 0.9081, "step": 4235 }, { "epoch": 0.5, "grad_norm": 0.8808678011367258, "learning_rate": 4.813151368116658e-05, "loss": 0.9289, "step": 4240 }, { "epoch": 0.5, "grad_norm": 0.8910130972642033, "learning_rate": 4.8127160171495115e-05, "loss": 0.9786, "step": 4245 }, { "epoch": 0.5, "grad_norm": 0.9160860753616116, "learning_rate": 4.8122801793314174e-05, "loss": 0.8792, "step": 4250 }, { "epoch": 0.5, "grad_norm": 1.2075292463237108, "learning_rate": 4.8118438547541254e-05, "loss": 0.9563, "step": 4255 }, { "epoch": 0.5, "grad_norm": 1.103142596488639, "learning_rate": 4.811407043509486e-05, "loss": 0.9339, "step": 4260 }, { "epoch": 0.5, "grad_norm": 1.0403493518530094, "learning_rate": 4.810969745689451e-05, "loss": 0.8931, "step": 4265 }, { "epoch": 0.5, "grad_norm": 1.4496385194282688, "learning_rate": 4.8105319613860776e-05, "loss": 0.8533, "step": 4270 }, { "epoch": 0.5, "grad_norm": 1.0841434371550498, "learning_rate": 4.810093690691524e-05, "loss": 0.9292, "step": 4275 }, { "epoch": 0.5, "grad_norm": 0.9015486947572188, "learning_rate": 4.80965493369805e-05, "loss": 0.8138, "step": 4280 }, { "epoch": 0.5, "grad_norm": 1.0521073120390954, "learning_rate": 4.809215690498019e-05, "loss": 0.8947, "step": 4285 }, { "epoch": 0.5, "grad_norm": 0.8972151213895544, "learning_rate": 4.808775961183897e-05, "loss": 0.8401, "step": 4290 }, { "epoch": 0.5, "grad_norm": 1.0161410560430715, "learning_rate": 4.8083357458482505e-05, "loss": 0.8835, "step": 4295 }, { "epoch": 0.5, "grad_norm": 1.0165586013998356, "learning_rate": 4.8078950445837494e-05, "loss": 0.9276, "step": 4300 }, { "epoch": 0.5, "grad_norm": 0.9651747648409673, "learning_rate": 4.807453857483167e-05, "loss": 0.7909, "step": 4305 }, { "epoch": 0.5, "grad_norm": 0.964488308075261, "learning_rate": 4.8070121846393766e-05, "loss": 0.8243, "step": 4310 }, { "epoch": 0.5, "grad_norm": 0.8766598782448897, "learning_rate": 4.806570026145356e-05, "loss": 0.9263, "step": 4315 }, { "epoch": 0.5, "grad_norm": 0.8604352610375053, "learning_rate": 4.8061273820941845e-05, "loss": 0.8852, "step": 4320 }, { "epoch": 0.51, "grad_norm": 1.0340822406917236, "learning_rate": 4.8056842525790415e-05, "loss": 0.7946, "step": 4325 }, { "epoch": 0.51, "grad_norm": 0.9541468500778845, "learning_rate": 4.805240637693212e-05, "loss": 0.8285, "step": 4330 }, { "epoch": 0.51, "grad_norm": 0.8857830983714304, "learning_rate": 4.8047965375300806e-05, "loss": 0.899, "step": 4335 }, { "epoch": 0.51, "grad_norm": 0.9166475002622385, "learning_rate": 4.804351952183136e-05, "loss": 0.8427, "step": 4340 }, { "epoch": 0.51, "grad_norm": 0.9182539569609515, "learning_rate": 4.803906881745968e-05, "loss": 0.8439, "step": 4345 }, { "epoch": 0.51, "grad_norm": 1.0236257744641755, "learning_rate": 4.803461326312267e-05, "loss": 0.9148, "step": 4350 }, { "epoch": 0.51, "grad_norm": 0.880806259464865, "learning_rate": 4.8030152859758284e-05, "loss": 0.8697, "step": 4355 }, { "epoch": 0.51, "grad_norm": 0.903662387966118, "learning_rate": 4.8025687608305494e-05, "loss": 0.9598, "step": 4360 }, { "epoch": 0.51, "grad_norm": 1.1530261481497712, "learning_rate": 4.802121750970425e-05, "loss": 0.8588, "step": 4365 }, { "epoch": 0.51, "grad_norm": 1.0126756005036486, "learning_rate": 4.8016742564895595e-05, "loss": 0.9082, "step": 4370 }, { "epoch": 0.51, "grad_norm": 1.0059117894191174, "learning_rate": 4.801226277482153e-05, "loss": 0.9754, "step": 4375 }, { "epoch": 0.51, "grad_norm": 0.9557827124825298, "learning_rate": 4.8007778140425084e-05, "loss": 0.9483, "step": 4380 }, { "epoch": 0.51, "grad_norm": 0.994421892397985, "learning_rate": 4.800328866265035e-05, "loss": 0.9208, "step": 4385 }, { "epoch": 0.51, "grad_norm": 0.8896209300235811, "learning_rate": 4.7998794342442385e-05, "loss": 0.8685, "step": 4390 }, { "epoch": 0.51, "grad_norm": 0.9706044832348802, "learning_rate": 4.79942951807473e-05, "loss": 0.951, "step": 4395 }, { "epoch": 0.51, "grad_norm": 0.9421807432329478, "learning_rate": 4.798979117851222e-05, "loss": 0.9295, "step": 4400 }, { "epoch": 0.51, "grad_norm": 0.9521430855453626, "learning_rate": 4.7985282336685277e-05, "loss": 0.8864, "step": 4405 }, { "epoch": 0.52, "grad_norm": 1.154205685917419, "learning_rate": 4.798076865621564e-05, "loss": 0.8945, "step": 4410 }, { "epoch": 0.52, "grad_norm": 0.92249520650557, "learning_rate": 4.797625013805348e-05, "loss": 0.9049, "step": 4415 }, { "epoch": 0.52, "grad_norm": 0.9648365961624972, "learning_rate": 4.7971726783149985e-05, "loss": 0.9322, "step": 4420 }, { "epoch": 0.52, "grad_norm": 0.9710002033355778, "learning_rate": 4.7967198592457376e-05, "loss": 0.853, "step": 4425 }, { "epoch": 0.52, "grad_norm": 0.9733922923210124, "learning_rate": 4.796266556692889e-05, "loss": 0.9371, "step": 4430 }, { "epoch": 0.52, "grad_norm": 0.8548254886728215, "learning_rate": 4.7958127707518765e-05, "loss": 0.8413, "step": 4435 }, { "epoch": 0.52, "grad_norm": 0.9504784552774678, "learning_rate": 4.7953585015182266e-05, "loss": 0.9082, "step": 4440 }, { "epoch": 0.52, "grad_norm": 0.8843052821219075, "learning_rate": 4.79490374908757e-05, "loss": 0.9312, "step": 4445 }, { "epoch": 0.52, "grad_norm": 0.9917184205598669, "learning_rate": 4.794448513555634e-05, "loss": 0.93, "step": 4450 }, { "epoch": 0.52, "grad_norm": 0.8953364964928886, "learning_rate": 4.793992795018253e-05, "loss": 0.8632, "step": 4455 }, { "epoch": 0.52, "grad_norm": 0.9121544104994831, "learning_rate": 4.793536593571358e-05, "loss": 0.8949, "step": 4460 }, { "epoch": 0.52, "grad_norm": 1.0186207045283104, "learning_rate": 4.7930799093109855e-05, "loss": 0.9303, "step": 4465 }, { "epoch": 0.52, "grad_norm": 0.9036844654904278, "learning_rate": 4.792622742333273e-05, "loss": 0.9547, "step": 4470 }, { "epoch": 0.52, "grad_norm": 0.9890914415279327, "learning_rate": 4.792165092734458e-05, "loss": 0.8666, "step": 4475 }, { "epoch": 0.52, "grad_norm": 1.0815879001314463, "learning_rate": 4.79170696061088e-05, "loss": 0.9001, "step": 4480 }, { "epoch": 0.52, "grad_norm": 1.0436150477506996, "learning_rate": 4.791248346058983e-05, "loss": 0.9092, "step": 4485 }, { "epoch": 0.52, "grad_norm": 0.9496671598384474, "learning_rate": 4.790789249175307e-05, "loss": 0.9871, "step": 4490 }, { "epoch": 0.53, "grad_norm": 0.9440921396640157, "learning_rate": 4.7903296700564994e-05, "loss": 0.8873, "step": 4495 }, { "epoch": 0.53, "grad_norm": 0.9228445487502488, "learning_rate": 4.789869608799304e-05, "loss": 0.9328, "step": 4500 }, { "epoch": 0.53, "grad_norm": 0.935063192093136, "learning_rate": 4.7894090655005705e-05, "loss": 0.9226, "step": 4505 }, { "epoch": 0.53, "grad_norm": 0.9415307777230048, "learning_rate": 4.7889480402572474e-05, "loss": 0.9135, "step": 4510 }, { "epoch": 0.53, "grad_norm": 1.0131784606673566, "learning_rate": 4.788486533166384e-05, "loss": 0.8674, "step": 4515 }, { "epoch": 0.53, "grad_norm": 0.9766246881415149, "learning_rate": 4.788024544325135e-05, "loss": 0.8623, "step": 4520 }, { "epoch": 0.53, "grad_norm": 0.9175019038199792, "learning_rate": 4.787562073830753e-05, "loss": 0.9839, "step": 4525 }, { "epoch": 0.53, "grad_norm": 0.9077423233786132, "learning_rate": 4.787099121780591e-05, "loss": 0.9163, "step": 4530 }, { "epoch": 0.53, "grad_norm": 1.7487036207781055, "learning_rate": 4.7866356882721074e-05, "loss": 0.8959, "step": 4535 }, { "epoch": 0.53, "grad_norm": 0.929568282711205, "learning_rate": 4.786171773402859e-05, "loss": 0.8877, "step": 4540 }, { "epoch": 0.53, "grad_norm": 1.007750144587037, "learning_rate": 4.785707377270505e-05, "loss": 1.0033, "step": 4545 }, { "epoch": 0.53, "grad_norm": 0.921980687483201, "learning_rate": 4.785242499972805e-05, "loss": 0.844, "step": 4550 }, { "epoch": 0.53, "grad_norm": 1.0735634987598275, "learning_rate": 4.784777141607622e-05, "loss": 0.9172, "step": 4555 }, { "epoch": 0.53, "grad_norm": 0.8729592164633109, "learning_rate": 4.784311302272917e-05, "loss": 0.847, "step": 4560 }, { "epoch": 0.53, "grad_norm": 1.0716544675804034, "learning_rate": 4.783844982066755e-05, "loss": 0.9323, "step": 4565 }, { "epoch": 0.53, "grad_norm": 0.8876233822448721, "learning_rate": 4.783378181087301e-05, "loss": 0.8679, "step": 4570 }, { "epoch": 0.53, "grad_norm": 0.9759926093199263, "learning_rate": 4.782910899432822e-05, "loss": 0.816, "step": 4575 }, { "epoch": 0.54, "grad_norm": 0.912771700478242, "learning_rate": 4.782443137201685e-05, "loss": 0.8517, "step": 4580 }, { "epoch": 0.54, "grad_norm": 0.9077966050043738, "learning_rate": 4.781974894492361e-05, "loss": 0.8785, "step": 4585 }, { "epoch": 0.54, "grad_norm": 0.9772236585209746, "learning_rate": 4.781506171403416e-05, "loss": 0.9017, "step": 4590 }, { "epoch": 0.54, "grad_norm": 0.8587431172774337, "learning_rate": 4.7810369680335244e-05, "loss": 0.9044, "step": 4595 }, { "epoch": 0.54, "grad_norm": 1.051849428066615, "learning_rate": 4.780567284481457e-05, "loss": 0.8639, "step": 4600 }, { "epoch": 0.54, "grad_norm": 1.029216872114914, "learning_rate": 4.780097120846088e-05, "loss": 0.9181, "step": 4605 }, { "epoch": 0.54, "grad_norm": 0.9255108873033635, "learning_rate": 4.77962647722639e-05, "loss": 0.8954, "step": 4610 }, { "epoch": 0.54, "grad_norm": 1.252840975415005, "learning_rate": 4.779155353721441e-05, "loss": 0.8463, "step": 4615 }, { "epoch": 0.54, "grad_norm": 0.8421827663973196, "learning_rate": 4.778683750430415e-05, "loss": 0.9074, "step": 4620 }, { "epoch": 0.54, "grad_norm": 0.9593702747479365, "learning_rate": 4.7782116674525914e-05, "loss": 0.8486, "step": 4625 }, { "epoch": 0.54, "grad_norm": 0.9407666945429404, "learning_rate": 4.7777391048873465e-05, "loss": 0.9361, "step": 4630 }, { "epoch": 0.54, "grad_norm": 0.9990179060227713, "learning_rate": 4.777266062834162e-05, "loss": 0.9305, "step": 4635 }, { "epoch": 0.54, "grad_norm": 0.9161714977758391, "learning_rate": 4.7767925413926166e-05, "loss": 1.0146, "step": 4640 }, { "epoch": 0.54, "grad_norm": 0.9159481848966394, "learning_rate": 4.776318540662392e-05, "loss": 0.9511, "step": 4645 }, { "epoch": 0.54, "grad_norm": 0.6947797501387168, "learning_rate": 4.7758440607432706e-05, "loss": 0.8602, "step": 4650 }, { "epoch": 0.54, "grad_norm": 0.9121919980966544, "learning_rate": 4.775369101735135e-05, "loss": 0.9218, "step": 4655 }, { "epoch": 0.54, "grad_norm": 0.9284442379877902, "learning_rate": 4.7748936637379694e-05, "loss": 0.9223, "step": 4660 }, { "epoch": 0.55, "grad_norm": 0.9426241791268728, "learning_rate": 4.774417746851858e-05, "loss": 0.8775, "step": 4665 }, { "epoch": 0.55, "grad_norm": 0.9048623332101565, "learning_rate": 4.773941351176987e-05, "loss": 0.9031, "step": 4670 }, { "epoch": 0.55, "grad_norm": 0.9501726490455027, "learning_rate": 4.773464476813642e-05, "loss": 0.9741, "step": 4675 }, { "epoch": 0.55, "grad_norm": 0.9451190277500853, "learning_rate": 4.77298712386221e-05, "loss": 0.8654, "step": 4680 }, { "epoch": 0.55, "grad_norm": 0.8322069942575794, "learning_rate": 4.7725092924231794e-05, "loss": 0.8728, "step": 4685 }, { "epoch": 0.55, "grad_norm": 0.985024297194413, "learning_rate": 4.772030982597138e-05, "loss": 0.9418, "step": 4690 }, { "epoch": 0.55, "grad_norm": 1.1569374921834679, "learning_rate": 4.7715521944847755e-05, "loss": 0.9027, "step": 4695 }, { "epoch": 0.55, "grad_norm": 0.935350101482852, "learning_rate": 4.7710729281868824e-05, "loss": 0.9695, "step": 4700 }, { "epoch": 0.55, "grad_norm": 0.9342995362666469, "learning_rate": 4.770593183804348e-05, "loss": 1.0218, "step": 4705 }, { "epoch": 0.55, "grad_norm": 0.9751550781951213, "learning_rate": 4.770112961438164e-05, "loss": 0.9267, "step": 4710 }, { "epoch": 0.55, "grad_norm": 0.9928310477183011, "learning_rate": 4.769632261189423e-05, "loss": 0.9102, "step": 4715 }, { "epoch": 0.55, "grad_norm": 0.9004968422207804, "learning_rate": 4.769151083159315e-05, "loss": 0.8548, "step": 4720 }, { "epoch": 0.55, "grad_norm": 0.8602921643959474, "learning_rate": 4.768669427449137e-05, "loss": 0.8487, "step": 4725 }, { "epoch": 0.55, "grad_norm": 0.9374211564514093, "learning_rate": 4.76818729416028e-05, "loss": 0.8868, "step": 4730 }, { "epoch": 0.55, "grad_norm": 0.9086038133032283, "learning_rate": 4.767704683394238e-05, "loss": 0.9158, "step": 4735 }, { "epoch": 0.55, "grad_norm": 0.9777676541258469, "learning_rate": 4.7672215952526064e-05, "loss": 0.8774, "step": 4740 }, { "epoch": 0.55, "grad_norm": 0.9131377351677685, "learning_rate": 4.76673802983708e-05, "loss": 0.8715, "step": 4745 }, { "epoch": 0.55, "grad_norm": 1.2818015794324125, "learning_rate": 4.766253987249454e-05, "loss": 0.9264, "step": 4750 }, { "epoch": 0.56, "grad_norm": 0.8879275727418001, "learning_rate": 4.765769467591625e-05, "loss": 0.8928, "step": 4755 }, { "epoch": 0.56, "grad_norm": 0.8730931288349222, "learning_rate": 4.76528447096559e-05, "loss": 0.9204, "step": 4760 }, { "epoch": 0.56, "grad_norm": 0.9797363287638522, "learning_rate": 4.7647989974734445e-05, "loss": 1.0487, "step": 4765 }, { "epoch": 0.56, "grad_norm": 0.9139472801202914, "learning_rate": 4.764313047217386e-05, "loss": 0.8991, "step": 4770 }, { "epoch": 0.56, "grad_norm": 0.8151305238101016, "learning_rate": 4.763826620299714e-05, "loss": 0.8295, "step": 4775 }, { "epoch": 0.56, "grad_norm": 0.9020269228062616, "learning_rate": 4.763339716822824e-05, "loss": 0.8683, "step": 4780 }, { "epoch": 0.56, "grad_norm": 0.9680056504033638, "learning_rate": 4.7628523368892155e-05, "loss": 0.8972, "step": 4785 }, { "epoch": 0.56, "grad_norm": 1.006194949570781, "learning_rate": 4.7623644806014865e-05, "loss": 0.9575, "step": 4790 }, { "epoch": 0.56, "grad_norm": 0.9761220891959065, "learning_rate": 4.761876148062337e-05, "loss": 1.0198, "step": 4795 }, { "epoch": 0.56, "grad_norm": 0.9089820236597586, "learning_rate": 4.761387339374565e-05, "loss": 0.8853, "step": 4800 }, { "epoch": 0.56, "grad_norm": 0.8835758111217743, "learning_rate": 4.7608980546410705e-05, "loss": 0.95, "step": 4805 }, { "epoch": 0.56, "grad_norm": 0.9362541763841585, "learning_rate": 4.760408293964852e-05, "loss": 0.9253, "step": 4810 }, { "epoch": 0.56, "grad_norm": 1.1263601048794918, "learning_rate": 4.759918057449011e-05, "loss": 0.8761, "step": 4815 }, { "epoch": 0.56, "grad_norm": 0.9118611897844395, "learning_rate": 4.7594273451967455e-05, "loss": 0.897, "step": 4820 }, { "epoch": 0.56, "grad_norm": 0.9483346946621739, "learning_rate": 4.7589361573113573e-05, "loss": 0.8852, "step": 4825 }, { "epoch": 0.56, "grad_norm": 1.0592288762146533, "learning_rate": 4.758444493896246e-05, "loss": 0.8483, "step": 4830 }, { "epoch": 0.56, "grad_norm": 0.9621172975665744, "learning_rate": 4.757952355054911e-05, "loss": 0.9341, "step": 4835 }, { "epoch": 0.57, "grad_norm": 0.9914306692414427, "learning_rate": 4.7574597408909535e-05, "loss": 0.9061, "step": 4840 }, { "epoch": 0.57, "grad_norm": 0.9106002064523849, "learning_rate": 4.7569666515080735e-05, "loss": 0.8839, "step": 4845 }, { "epoch": 0.57, "grad_norm": 1.1394912907978014, "learning_rate": 4.756473087010073e-05, "loss": 0.8607, "step": 4850 }, { "epoch": 0.57, "grad_norm": 0.9325749129810041, "learning_rate": 4.7559790475008505e-05, "loss": 0.8703, "step": 4855 }, { "epoch": 0.57, "grad_norm": 0.9514193475932677, "learning_rate": 4.7554845330844066e-05, "loss": 1.0, "step": 4860 }, { "epoch": 0.57, "grad_norm": 0.9472240188171506, "learning_rate": 4.754989543864844e-05, "loss": 0.9111, "step": 4865 }, { "epoch": 0.57, "grad_norm": 0.8495427060955298, "learning_rate": 4.7544940799463606e-05, "loss": 0.9019, "step": 4870 }, { "epoch": 0.57, "grad_norm": 0.9074393459645913, "learning_rate": 4.753998141433258e-05, "loss": 0.8653, "step": 4875 }, { "epoch": 0.57, "grad_norm": 0.9589345459468107, "learning_rate": 4.753501728429936e-05, "loss": 0.9159, "step": 4880 }, { "epoch": 0.57, "grad_norm": 1.031657021745517, "learning_rate": 4.7530048410408956e-05, "loss": 0.9071, "step": 4885 }, { "epoch": 0.57, "grad_norm": 0.8088513800368576, "learning_rate": 4.752507479370735e-05, "loss": 0.9179, "step": 4890 }, { "epoch": 0.57, "grad_norm": 0.997198203419029, "learning_rate": 4.752009643524156e-05, "loss": 0.8878, "step": 4895 }, { "epoch": 0.57, "grad_norm": 1.01836760046325, "learning_rate": 4.751511333605957e-05, "loss": 0.848, "step": 4900 }, { "epoch": 0.57, "grad_norm": 1.0784323710187127, "learning_rate": 4.7510125497210386e-05, "loss": 1.0026, "step": 4905 }, { "epoch": 0.57, "grad_norm": 0.8034495194620102, "learning_rate": 4.7505132919743985e-05, "loss": 0.8813, "step": 4910 }, { "epoch": 0.57, "grad_norm": 0.9489501992658349, "learning_rate": 4.750013560471137e-05, "loss": 0.8547, "step": 4915 }, { "epoch": 0.57, "grad_norm": 0.8420252740468569, "learning_rate": 4.749513355316452e-05, "loss": 0.8621, "step": 4920 }, { "epoch": 0.58, "grad_norm": 0.9326413415392528, "learning_rate": 4.749012676615642e-05, "loss": 0.9095, "step": 4925 }, { "epoch": 0.58, "grad_norm": 1.0359317637757601, "learning_rate": 4.7485115244741054e-05, "loss": 0.9154, "step": 4930 }, { "epoch": 0.58, "grad_norm": 1.0627702479440768, "learning_rate": 4.748009898997341e-05, "loss": 0.8251, "step": 4935 }, { "epoch": 0.58, "grad_norm": 0.9363955667423713, "learning_rate": 4.747507800290944e-05, "loss": 0.8728, "step": 4940 }, { "epoch": 0.58, "grad_norm": 0.8783367661148899, "learning_rate": 4.7470052284606126e-05, "loss": 0.8952, "step": 4945 }, { "epoch": 0.58, "grad_norm": 0.8945666331789611, "learning_rate": 4.746502183612144e-05, "loss": 0.8836, "step": 4950 }, { "epoch": 0.58, "grad_norm": 1.0774006235838147, "learning_rate": 4.745998665851433e-05, "loss": 0.902, "step": 4955 }, { "epoch": 0.58, "grad_norm": 0.9349831367340914, "learning_rate": 4.745494675284477e-05, "loss": 0.9048, "step": 4960 }, { "epoch": 0.58, "grad_norm": 0.9343428599788585, "learning_rate": 4.744990212017371e-05, "loss": 0.896, "step": 4965 }, { "epoch": 0.58, "grad_norm": 1.0767793624774202, "learning_rate": 4.744485276156307e-05, "loss": 0.9706, "step": 4970 }, { "epoch": 0.58, "grad_norm": 1.022127402159067, "learning_rate": 4.743979867807584e-05, "loss": 0.9905, "step": 4975 }, { "epoch": 0.58, "grad_norm": 1.0514513769724285, "learning_rate": 4.7434739870775926e-05, "loss": 1.0015, "step": 4980 }, { "epoch": 0.58, "grad_norm": 0.8905449188591552, "learning_rate": 4.7429676340728265e-05, "loss": 0.952, "step": 4985 }, { "epoch": 0.58, "grad_norm": 0.998220478427225, "learning_rate": 4.742460808899878e-05, "loss": 0.9352, "step": 4990 }, { "epoch": 0.58, "grad_norm": 1.0630463119634113, "learning_rate": 4.74195351166544e-05, "loss": 0.8896, "step": 4995 }, { "epoch": 0.58, "grad_norm": 0.9953669765411227, "learning_rate": 4.7414457424763036e-05, "loss": 0.8861, "step": 5000 }, { "epoch": 0.58, "grad_norm": 1.174672433352205, "learning_rate": 4.7409375014393596e-05, "loss": 0.8438, "step": 5005 }, { "epoch": 0.59, "grad_norm": 1.1098446664691177, "learning_rate": 4.740428788661598e-05, "loss": 0.9717, "step": 5010 }, { "epoch": 0.59, "grad_norm": 1.0471451752218335, "learning_rate": 4.739919604250108e-05, "loss": 0.8306, "step": 5015 }, { "epoch": 0.59, "grad_norm": 0.950781312544515, "learning_rate": 4.739409948312078e-05, "loss": 0.8309, "step": 5020 }, { "epoch": 0.59, "grad_norm": 1.0125908425171948, "learning_rate": 4.7388998209547966e-05, "loss": 0.9146, "step": 5025 }, { "epoch": 0.59, "grad_norm": 0.8133748718770355, "learning_rate": 4.7383892222856507e-05, "loss": 0.789, "step": 5030 }, { "epoch": 0.59, "grad_norm": 0.9024164148250481, "learning_rate": 4.737878152412126e-05, "loss": 0.9415, "step": 5035 }, { "epoch": 0.59, "grad_norm": 0.9055245550422416, "learning_rate": 4.737366611441809e-05, "loss": 0.9178, "step": 5040 }, { "epoch": 0.59, "grad_norm": 1.01711755576948, "learning_rate": 4.736854599482384e-05, "loss": 0.91, "step": 5045 }, { "epoch": 0.59, "grad_norm": 0.9376320461704247, "learning_rate": 4.736342116641635e-05, "loss": 0.9189, "step": 5050 }, { "epoch": 0.59, "grad_norm": 0.9350308591729168, "learning_rate": 4.735829163027445e-05, "loss": 0.8618, "step": 5055 }, { "epoch": 0.59, "grad_norm": 0.8757662379212904, "learning_rate": 4.7353157387477954e-05, "loss": 0.9307, "step": 5060 }, { "epoch": 0.59, "grad_norm": 0.9366293955471897, "learning_rate": 4.734801843910769e-05, "loss": 0.9464, "step": 5065 }, { "epoch": 0.59, "grad_norm": 0.9084759807917344, "learning_rate": 4.734287478624545e-05, "loss": 0.8634, "step": 5070 }, { "epoch": 0.59, "grad_norm": 1.0675134198899723, "learning_rate": 4.733772642997402e-05, "loss": 0.8878, "step": 5075 }, { "epoch": 0.59, "grad_norm": 0.9687610193245658, "learning_rate": 4.73325733713772e-05, "loss": 0.9911, "step": 5080 }, { "epoch": 0.59, "grad_norm": 0.9667590688322949, "learning_rate": 4.732741561153974e-05, "loss": 0.9058, "step": 5085 }, { "epoch": 0.59, "grad_norm": 0.9219803235931052, "learning_rate": 4.732225315154743e-05, "loss": 0.8915, "step": 5090 }, { "epoch": 0.6, "grad_norm": 0.9623469186831466, "learning_rate": 4.7317085992487e-05, "loss": 0.9821, "step": 5095 }, { "epoch": 0.6, "grad_norm": 0.9352372212174855, "learning_rate": 4.73119141354462e-05, "loss": 0.8408, "step": 5100 }, { "epoch": 0.6, "grad_norm": 0.8491799245817572, "learning_rate": 4.730673758151376e-05, "loss": 0.8709, "step": 5105 }, { "epoch": 0.6, "grad_norm": 0.9280866922102348, "learning_rate": 4.73015563317794e-05, "loss": 0.8633, "step": 5110 }, { "epoch": 0.6, "grad_norm": 0.8808344319929562, "learning_rate": 4.729637038733382e-05, "loss": 0.9068, "step": 5115 }, { "epoch": 0.6, "grad_norm": 0.9420756531187319, "learning_rate": 4.729117974926873e-05, "loss": 0.9112, "step": 5120 }, { "epoch": 0.6, "grad_norm": 0.9110611714148382, "learning_rate": 4.7285984418676796e-05, "loss": 0.9096, "step": 5125 }, { "epoch": 0.6, "grad_norm": 1.018586607557034, "learning_rate": 4.72807843966517e-05, "loss": 0.9809, "step": 5130 }, { "epoch": 0.6, "grad_norm": 0.9931577086155187, "learning_rate": 4.7275579684288104e-05, "loss": 0.8507, "step": 5135 }, { "epoch": 0.6, "grad_norm": 1.0013243282244393, "learning_rate": 4.727037028268164e-05, "loss": 0.9356, "step": 5140 }, { "epoch": 0.6, "grad_norm": 0.9245783918713356, "learning_rate": 4.726515619292896e-05, "loss": 0.8777, "step": 5145 }, { "epoch": 0.6, "grad_norm": 0.9083869557073138, "learning_rate": 4.725993741612767e-05, "loss": 0.8558, "step": 5150 }, { "epoch": 0.6, "grad_norm": 0.9495953702450738, "learning_rate": 4.7254713953376384e-05, "loss": 0.9251, "step": 5155 }, { "epoch": 0.6, "grad_norm": 1.0355595589739228, "learning_rate": 4.72494858057747e-05, "loss": 0.9259, "step": 5160 }, { "epoch": 0.6, "grad_norm": 0.9838685115091735, "learning_rate": 4.7244252974423185e-05, "loss": 0.9668, "step": 5165 }, { "epoch": 0.6, "grad_norm": 1.0456732532342636, "learning_rate": 4.7239015460423414e-05, "loss": 0.8755, "step": 5170 }, { "epoch": 0.6, "grad_norm": 0.8736368841801216, "learning_rate": 4.723377326487794e-05, "loss": 0.846, "step": 5175 }, { "epoch": 0.61, "grad_norm": 1.0678483944355903, "learning_rate": 4.722852638889029e-05, "loss": 0.9019, "step": 5180 }, { "epoch": 0.61, "grad_norm": 1.031726819181275, "learning_rate": 4.7223274833565e-05, "loss": 0.8859, "step": 5185 }, { "epoch": 0.61, "grad_norm": 0.8978785546916876, "learning_rate": 4.721801860000757e-05, "loss": 1.0386, "step": 5190 }, { "epoch": 0.61, "grad_norm": 0.9677446475986918, "learning_rate": 4.721275768932449e-05, "loss": 0.8884, "step": 5195 }, { "epoch": 0.61, "grad_norm": 1.0338790914888178, "learning_rate": 4.720749210262324e-05, "loss": 0.9328, "step": 5200 }, { "epoch": 0.61, "grad_norm": 0.8701627091871788, "learning_rate": 4.7202221841012275e-05, "loss": 0.919, "step": 5205 }, { "epoch": 0.61, "grad_norm": 0.8883313644836678, "learning_rate": 4.7196946905601055e-05, "loss": 0.8951, "step": 5210 }, { "epoch": 0.61, "grad_norm": 1.0195922728084343, "learning_rate": 4.7191667297499994e-05, "loss": 0.8481, "step": 5215 }, { "epoch": 0.61, "grad_norm": 0.8879610077544813, "learning_rate": 4.7186383017820516e-05, "loss": 0.869, "step": 5220 }, { "epoch": 0.61, "grad_norm": 0.9838993840626952, "learning_rate": 4.718109406767502e-05, "loss": 0.9522, "step": 5225 }, { "epoch": 0.61, "grad_norm": 1.0250758583408783, "learning_rate": 4.7175800448176865e-05, "loss": 0.9395, "step": 5230 }, { "epoch": 0.61, "grad_norm": 0.8760290609510584, "learning_rate": 4.7170502160440424e-05, "loss": 0.8169, "step": 5235 }, { "epoch": 0.61, "grad_norm": 0.9721790601477689, "learning_rate": 4.7165199205581053e-05, "loss": 0.8023, "step": 5240 }, { "epoch": 0.61, "grad_norm": 0.9638278814892691, "learning_rate": 4.715989158471507e-05, "loss": 0.922, "step": 5245 }, { "epoch": 0.61, "grad_norm": 0.899404320245155, "learning_rate": 4.715457929895978e-05, "loss": 0.9962, "step": 5250 }, { "epoch": 0.61, "grad_norm": 1.04560341680004, "learning_rate": 4.714926234943348e-05, "loss": 0.9187, "step": 5255 }, { "epoch": 0.61, "grad_norm": 1.0588051794471998, "learning_rate": 4.714394073725544e-05, "loss": 0.836, "step": 5260 }, { "epoch": 0.62, "grad_norm": 1.0045174364708946, "learning_rate": 4.7138614463545924e-05, "loss": 0.8425, "step": 5265 }, { "epoch": 0.62, "grad_norm": 1.0476991024630613, "learning_rate": 4.7133283529426165e-05, "loss": 0.9722, "step": 5270 }, { "epoch": 0.62, "grad_norm": 1.0333987022084203, "learning_rate": 4.712794793601838e-05, "loss": 0.8874, "step": 5275 }, { "epoch": 0.62, "grad_norm": 1.068849767404288, "learning_rate": 4.712260768444576e-05, "loss": 1.0005, "step": 5280 }, { "epoch": 0.62, "grad_norm": 0.895126643617668, "learning_rate": 4.7117262775832496e-05, "loss": 0.8608, "step": 5285 }, { "epoch": 0.62, "grad_norm": 1.0016050519456048, "learning_rate": 4.711191321130374e-05, "loss": 0.9331, "step": 5290 }, { "epoch": 0.62, "grad_norm": 0.9054625464875337, "learning_rate": 4.7106558991985634e-05, "loss": 0.8671, "step": 5295 }, { "epoch": 0.62, "grad_norm": 0.8700732350470702, "learning_rate": 4.710120011900529e-05, "loss": 0.886, "step": 5300 }, { "epoch": 0.62, "grad_norm": 0.8504473513118219, "learning_rate": 4.7095836593490815e-05, "loss": 0.7917, "step": 5305 }, { "epoch": 0.62, "grad_norm": 0.9123168341760761, "learning_rate": 4.709046841657129e-05, "loss": 0.8171, "step": 5310 }, { "epoch": 0.62, "grad_norm": 1.2653412760434275, "learning_rate": 4.708509558937676e-05, "loss": 0.9549, "step": 5315 }, { "epoch": 0.62, "grad_norm": 0.9261057097853282, "learning_rate": 4.707971811303827e-05, "loss": 0.9926, "step": 5320 }, { "epoch": 0.62, "grad_norm": 0.9694522719348073, "learning_rate": 4.707433598868784e-05, "loss": 0.9287, "step": 5325 }, { "epoch": 0.62, "grad_norm": 0.9941257419055902, "learning_rate": 4.706894921745845e-05, "loss": 0.9856, "step": 5330 }, { "epoch": 0.62, "grad_norm": 1.0869813931558723, "learning_rate": 4.706355780048408e-05, "loss": 0.9224, "step": 5335 }, { "epoch": 0.62, "grad_norm": 0.8492471857242857, "learning_rate": 4.705816173889969e-05, "loss": 0.9393, "step": 5340 }, { "epoch": 0.62, "grad_norm": 0.9840307181844872, "learning_rate": 4.705276103384119e-05, "loss": 0.8486, "step": 5345 }, { "epoch": 0.63, "grad_norm": 0.9955482658021549, "learning_rate": 4.7047355686445485e-05, "loss": 0.898, "step": 5350 }, { "epoch": 0.63, "grad_norm": 0.900983276408742, "learning_rate": 4.7041945697850466e-05, "loss": 0.8927, "step": 5355 }, { "epoch": 0.63, "grad_norm": 0.9271025297172, "learning_rate": 4.7036531069194996e-05, "loss": 0.8635, "step": 5360 }, { "epoch": 0.63, "grad_norm": 0.9285434480936893, "learning_rate": 4.70311118016189e-05, "loss": 0.8862, "step": 5365 }, { "epoch": 0.63, "grad_norm": 0.9325358849171663, "learning_rate": 4.7025687896262994e-05, "loss": 0.9087, "step": 5370 }, { "epoch": 0.63, "grad_norm": 0.9153431473739005, "learning_rate": 4.702025935426907e-05, "loss": 0.9348, "step": 5375 }, { "epoch": 0.63, "grad_norm": 0.943839638144225, "learning_rate": 4.7014826176779896e-05, "loss": 0.9862, "step": 5380 }, { "epoch": 0.63, "grad_norm": 0.9471473629703717, "learning_rate": 4.7009388364939203e-05, "loss": 0.7952, "step": 5385 }, { "epoch": 0.63, "grad_norm": 0.8740480776148996, "learning_rate": 4.7003945919891715e-05, "loss": 0.9124, "step": 5390 }, { "epoch": 0.63, "grad_norm": 0.9255818551861128, "learning_rate": 4.699849884278312e-05, "loss": 0.8174, "step": 5395 }, { "epoch": 0.63, "grad_norm": 0.9610244958701634, "learning_rate": 4.699304713476009e-05, "loss": 0.8935, "step": 5400 }, { "epoch": 0.63, "grad_norm": 0.9652297207776812, "learning_rate": 4.698759079697026e-05, "loss": 0.8923, "step": 5405 }, { "epoch": 0.63, "grad_norm": 0.9383503284105464, "learning_rate": 4.698212983056225e-05, "loss": 0.834, "step": 5410 }, { "epoch": 0.63, "grad_norm": 0.8760602065533816, "learning_rate": 4.697666423668565e-05, "loss": 0.8266, "step": 5415 }, { "epoch": 0.63, "grad_norm": 0.9210700781335122, "learning_rate": 4.6971194016491025e-05, "loss": 0.9142, "step": 5420 }, { "epoch": 0.63, "grad_norm": 1.0983602314379115, "learning_rate": 4.696571917112991e-05, "loss": 0.841, "step": 5425 }, { "epoch": 0.63, "grad_norm": 0.8851036441317692, "learning_rate": 4.696023970175483e-05, "loss": 0.8705, "step": 5430 }, { "epoch": 0.64, "grad_norm": 0.9511684237588188, "learning_rate": 4.695475560951925e-05, "loss": 0.8897, "step": 5435 }, { "epoch": 0.64, "grad_norm": 0.885053507235492, "learning_rate": 4.694926689557764e-05, "loss": 0.8006, "step": 5440 }, { "epoch": 0.64, "grad_norm": 0.9849892194518868, "learning_rate": 4.6943773561085435e-05, "loss": 0.9323, "step": 5445 }, { "epoch": 0.64, "grad_norm": 0.910899058845763, "learning_rate": 4.6938275607199033e-05, "loss": 0.8622, "step": 5450 }, { "epoch": 0.64, "grad_norm": 0.9777009686317241, "learning_rate": 4.6932773035075816e-05, "loss": 0.9072, "step": 5455 }, { "epoch": 0.64, "grad_norm": 0.8716996270901086, "learning_rate": 4.6927265845874136e-05, "loss": 0.873, "step": 5460 }, { "epoch": 0.64, "grad_norm": 0.9909763148258036, "learning_rate": 4.69217540407533e-05, "loss": 1.0057, "step": 5465 }, { "epoch": 0.64, "grad_norm": 0.9720374658668953, "learning_rate": 4.6916237620873617e-05, "loss": 0.8676, "step": 5470 }, { "epoch": 0.64, "grad_norm": 0.8910963485197985, "learning_rate": 4.6910716587396344e-05, "loss": 0.8518, "step": 5475 }, { "epoch": 0.64, "grad_norm": 0.9569929361626156, "learning_rate": 4.690519094148371e-05, "loss": 0.9391, "step": 5480 }, { "epoch": 0.64, "grad_norm": 1.0511680915658494, "learning_rate": 4.6899660684298936e-05, "loss": 0.8968, "step": 5485 }, { "epoch": 0.64, "grad_norm": 1.0801385418230076, "learning_rate": 4.689412581700618e-05, "loss": 0.9384, "step": 5490 }, { "epoch": 0.64, "grad_norm": 0.8927714251323101, "learning_rate": 4.688858634077061e-05, "loss": 0.898, "step": 5495 }, { "epoch": 0.64, "grad_norm": 1.1309981861996445, "learning_rate": 4.688304225675834e-05, "loss": 0.9083, "step": 5500 }, { "epoch": 0.64, "grad_norm": 0.9220005244397378, "learning_rate": 4.687749356613643e-05, "loss": 0.9628, "step": 5505 }, { "epoch": 0.64, "grad_norm": 0.8450685429645087, "learning_rate": 4.6871940270072976e-05, "loss": 0.8543, "step": 5510 }, { "epoch": 0.64, "grad_norm": 0.8743105335085044, "learning_rate": 4.686638236973698e-05, "loss": 0.9632, "step": 5515 }, { "epoch": 0.64, "grad_norm": 0.8788870715691519, "learning_rate": 4.6860819866298456e-05, "loss": 0.8929, "step": 5520 }, { "epoch": 0.65, "grad_norm": 0.8715849901477508, "learning_rate": 4.6855252760928356e-05, "loss": 0.9212, "step": 5525 }, { "epoch": 0.65, "grad_norm": 0.8434996407999087, "learning_rate": 4.6849681054798615e-05, "loss": 0.9428, "step": 5530 }, { "epoch": 0.65, "grad_norm": 0.8768448790280514, "learning_rate": 4.6844104749082144e-05, "loss": 0.9217, "step": 5535 }, { "epoch": 0.65, "grad_norm": 0.8608543898621194, "learning_rate": 4.68385238449528e-05, "loss": 0.9223, "step": 5540 }, { "epoch": 0.65, "grad_norm": 0.8828757282033372, "learning_rate": 4.683293834358544e-05, "loss": 0.9527, "step": 5545 }, { "epoch": 0.65, "grad_norm": 0.9530063799041077, "learning_rate": 4.6827348246155855e-05, "loss": 0.9122, "step": 5550 }, { "epoch": 0.65, "grad_norm": 0.9231426689889262, "learning_rate": 4.682175355384082e-05, "loss": 0.9776, "step": 5555 }, { "epoch": 0.65, "grad_norm": 62.51705595956571, "learning_rate": 4.6816154267818084e-05, "loss": 0.9379, "step": 5560 }, { "epoch": 0.65, "grad_norm": 1.1773638956250652, "learning_rate": 4.6810550389266356e-05, "loss": 0.9749, "step": 5565 }, { "epoch": 0.65, "grad_norm": 0.9525301740186433, "learning_rate": 4.680494191936531e-05, "loss": 0.8828, "step": 5570 }, { "epoch": 0.65, "grad_norm": 0.9835525247928325, "learning_rate": 4.679932885929558e-05, "loss": 0.8223, "step": 5575 }, { "epoch": 0.65, "grad_norm": 0.9308323914965436, "learning_rate": 4.679371121023877e-05, "loss": 0.794, "step": 5580 }, { "epoch": 0.65, "grad_norm": 1.0642197192722602, "learning_rate": 4.678808897337747e-05, "loss": 0.8937, "step": 5585 }, { "epoch": 0.65, "grad_norm": 0.863722934745398, "learning_rate": 4.678246214989522e-05, "loss": 0.9496, "step": 5590 }, { "epoch": 0.65, "grad_norm": 0.8946901888023797, "learning_rate": 4.6776830740976505e-05, "loss": 0.8944, "step": 5595 }, { "epoch": 0.65, "grad_norm": 0.9255928319484048, "learning_rate": 4.6771194747806826e-05, "loss": 0.9524, "step": 5600 }, { "epoch": 0.65, "grad_norm": 0.9496776135844713, "learning_rate": 4.676555417157259e-05, "loss": 0.9024, "step": 5605 }, { "epoch": 0.66, "grad_norm": 6.826806279167785, "learning_rate": 4.67599090134612e-05, "loss": 0.9267, "step": 5610 }, { "epoch": 0.66, "grad_norm": 0.9945564753833892, "learning_rate": 4.675425927466104e-05, "loss": 0.897, "step": 5615 }, { "epoch": 0.66, "grad_norm": 1.1326767874904067, "learning_rate": 4.674860495636143e-05, "loss": 1.0306, "step": 5620 }, { "epoch": 0.66, "grad_norm": 1.192659378890715, "learning_rate": 4.6742946059752654e-05, "loss": 0.8268, "step": 5625 }, { "epoch": 0.66, "grad_norm": 0.865472885386012, "learning_rate": 4.673728258602599e-05, "loss": 0.9125, "step": 5630 }, { "epoch": 0.66, "grad_norm": 0.8859023700638049, "learning_rate": 4.673161453637363e-05, "loss": 0.7883, "step": 5635 }, { "epoch": 0.66, "grad_norm": 0.9828220946204883, "learning_rate": 4.672594191198878e-05, "loss": 0.7975, "step": 5640 }, { "epoch": 0.66, "grad_norm": 1.0218056914595839, "learning_rate": 4.672026471406558e-05, "loss": 0.8704, "step": 5645 }, { "epoch": 0.66, "grad_norm": 0.9962228419117912, "learning_rate": 4.671458294379914e-05, "loss": 0.8443, "step": 5650 }, { "epoch": 0.66, "grad_norm": 0.916842238976931, "learning_rate": 4.670889660238553e-05, "loss": 0.9196, "step": 5655 }, { "epoch": 0.66, "grad_norm": 0.896973578985922, "learning_rate": 4.670320569102179e-05, "loss": 0.9462, "step": 5660 }, { "epoch": 0.66, "grad_norm": 0.869017141870717, "learning_rate": 4.669751021090592e-05, "loss": 0.8841, "step": 5665 }, { "epoch": 0.66, "grad_norm": 0.94360156088277, "learning_rate": 4.6691810163236855e-05, "loss": 0.8789, "step": 5670 }, { "epoch": 0.66, "grad_norm": 0.9073554986575458, "learning_rate": 4.668610554921454e-05, "loss": 0.8325, "step": 5675 }, { "epoch": 0.66, "grad_norm": 0.9257045965587248, "learning_rate": 4.668039637003984e-05, "loss": 0.8651, "step": 5680 }, { "epoch": 0.66, "grad_norm": 0.8828285742161036, "learning_rate": 4.66746826269146e-05, "loss": 0.8829, "step": 5685 }, { "epoch": 0.66, "grad_norm": 1.1408332564085044, "learning_rate": 4.666896432104164e-05, "loss": 0.9076, "step": 5690 }, { "epoch": 0.67, "grad_norm": 0.8766537384116836, "learning_rate": 4.66632414536247e-05, "loss": 0.9158, "step": 5695 }, { "epoch": 0.67, "grad_norm": 0.8636114269587559, "learning_rate": 4.665751402586852e-05, "loss": 0.844, "step": 5700 }, { "epoch": 0.67, "grad_norm": 1.1465206874810836, "learning_rate": 4.665178203897877e-05, "loss": 0.9747, "step": 5705 }, { "epoch": 0.67, "grad_norm": 0.9935397619995754, "learning_rate": 4.6646045494162115e-05, "loss": 0.9081, "step": 5710 }, { "epoch": 0.67, "grad_norm": 1.0416302205511918, "learning_rate": 4.664030439262613e-05, "loss": 0.9922, "step": 5715 }, { "epoch": 0.67, "grad_norm": 0.9765975302035652, "learning_rate": 4.663455873557939e-05, "loss": 0.8822, "step": 5720 }, { "epoch": 0.67, "grad_norm": 0.9113509057044953, "learning_rate": 4.6628808524231413e-05, "loss": 0.9486, "step": 5725 }, { "epoch": 0.67, "grad_norm": 1.0315351882329007, "learning_rate": 4.6623053759792696e-05, "loss": 0.9191, "step": 5730 }, { "epoch": 0.67, "grad_norm": 0.8948879757766348, "learning_rate": 4.661729444347465e-05, "loss": 0.8486, "step": 5735 }, { "epoch": 0.67, "grad_norm": 0.8766710372397554, "learning_rate": 4.661153057648969e-05, "loss": 0.8894, "step": 5740 }, { "epoch": 0.67, "grad_norm": 0.82706258378984, "learning_rate": 4.660576216005116e-05, "loss": 0.8505, "step": 5745 }, { "epoch": 0.67, "grad_norm": 0.930694026529663, "learning_rate": 4.6599989195373385e-05, "loss": 0.8854, "step": 5750 }, { "epoch": 0.67, "grad_norm": 1.1264148491246995, "learning_rate": 4.659421168367162e-05, "loss": 0.8439, "step": 5755 }, { "epoch": 0.67, "grad_norm": 0.9425710713723578, "learning_rate": 4.6588429626162095e-05, "loss": 0.9722, "step": 5760 }, { "epoch": 0.67, "grad_norm": 0.9296863266965247, "learning_rate": 4.6582643024062e-05, "loss": 0.9878, "step": 5765 }, { "epoch": 0.67, "grad_norm": 0.9717643081597708, "learning_rate": 4.6576851878589475e-05, "loss": 0.9861, "step": 5770 }, { "epoch": 0.67, "grad_norm": 1.0128093772015851, "learning_rate": 4.657105619096361e-05, "loss": 0.848, "step": 5775 }, { "epoch": 0.68, "grad_norm": 0.8858755147245149, "learning_rate": 4.656525596240446e-05, "loss": 0.9132, "step": 5780 }, { "epoch": 0.68, "grad_norm": 1.0529894716410615, "learning_rate": 4.655945119413304e-05, "loss": 0.9049, "step": 5785 }, { "epoch": 0.68, "grad_norm": 0.9423077431089073, "learning_rate": 4.6553641887371305e-05, "loss": 0.8939, "step": 5790 }, { "epoch": 0.68, "grad_norm": 0.9289900362286485, "learning_rate": 4.6547828043342186e-05, "loss": 0.8498, "step": 5795 }, { "epoch": 0.68, "grad_norm": 0.8434000431799341, "learning_rate": 4.654200966326955e-05, "loss": 0.8566, "step": 5800 }, { "epoch": 0.68, "grad_norm": 0.9383065891406847, "learning_rate": 4.653618674837823e-05, "loss": 0.875, "step": 5805 }, { "epoch": 0.68, "grad_norm": 1.0008310804902216, "learning_rate": 4.6530359299894006e-05, "loss": 0.8826, "step": 5810 }, { "epoch": 0.68, "grad_norm": 0.888970604341223, "learning_rate": 4.652452731904362e-05, "loss": 0.9341, "step": 5815 }, { "epoch": 0.68, "grad_norm": 0.9564059476156773, "learning_rate": 4.651869080705477e-05, "loss": 0.8431, "step": 5820 }, { "epoch": 0.68, "grad_norm": 0.91077967654913, "learning_rate": 4.65128497651561e-05, "loss": 0.867, "step": 5825 }, { "epoch": 0.68, "grad_norm": 1.2406196675947683, "learning_rate": 4.650700419457721e-05, "loss": 0.9075, "step": 5830 }, { "epoch": 0.68, "grad_norm": 0.8850204148321628, "learning_rate": 4.6501154096548646e-05, "loss": 0.8777, "step": 5835 }, { "epoch": 0.68, "grad_norm": 0.8883847420156631, "learning_rate": 4.6495299472301925e-05, "loss": 0.8567, "step": 5840 }, { "epoch": 0.68, "grad_norm": 0.9476475233766783, "learning_rate": 4.64894403230695e-05, "loss": 0.841, "step": 5845 }, { "epoch": 0.68, "grad_norm": 0.9487764754969552, "learning_rate": 4.6483576650084795e-05, "loss": 0.9376, "step": 5850 }, { "epoch": 0.68, "grad_norm": 0.9165964621495063, "learning_rate": 4.647770845458216e-05, "loss": 0.9088, "step": 5855 }, { "epoch": 0.68, "grad_norm": 0.9974674944880536, "learning_rate": 4.647183573779692e-05, "loss": 0.8475, "step": 5860 }, { "epoch": 0.69, "grad_norm": 0.8980672638517232, "learning_rate": 4.646595850096534e-05, "loss": 0.8466, "step": 5865 }, { "epoch": 0.69, "grad_norm": 0.8751879975488112, "learning_rate": 4.6460076745324646e-05, "loss": 0.9086, "step": 5870 }, { "epoch": 0.69, "grad_norm": 0.9058933220665744, "learning_rate": 4.6454190472113e-05, "loss": 0.9135, "step": 5875 }, { "epoch": 0.69, "grad_norm": 0.9606409834423856, "learning_rate": 4.644829968256953e-05, "loss": 0.943, "step": 5880 }, { "epoch": 0.69, "grad_norm": 1.2415993852198044, "learning_rate": 4.644240437793431e-05, "loss": 0.8554, "step": 5885 }, { "epoch": 0.69, "grad_norm": 0.9317260883498478, "learning_rate": 4.6436504559448354e-05, "loss": 0.9151, "step": 5890 }, { "epoch": 0.69, "grad_norm": 1.2617341544231906, "learning_rate": 4.643060022835365e-05, "loss": 1.0054, "step": 5895 }, { "epoch": 0.69, "grad_norm": 1.0409449577750085, "learning_rate": 4.642469138589311e-05, "loss": 0.8763, "step": 5900 }, { "epoch": 0.69, "grad_norm": 0.8450256402510777, "learning_rate": 4.641877803331061e-05, "loss": 0.8358, "step": 5905 }, { "epoch": 0.69, "grad_norm": 0.9507390060939424, "learning_rate": 4.641286017185098e-05, "loss": 0.9093, "step": 5910 }, { "epoch": 0.69, "grad_norm": 0.8747175940849005, "learning_rate": 4.6406937802759984e-05, "loss": 0.91, "step": 5915 }, { "epoch": 0.69, "grad_norm": 0.9070793302110012, "learning_rate": 4.640101092728434e-05, "loss": 0.8121, "step": 5920 }, { "epoch": 0.69, "grad_norm": 0.949786764506466, "learning_rate": 4.6395079546671725e-05, "loss": 0.9231, "step": 5925 }, { "epoch": 0.69, "grad_norm": 1.1435484446328472, "learning_rate": 4.6389143662170755e-05, "loss": 0.9155, "step": 5930 }, { "epoch": 0.69, "grad_norm": 0.9058030053546251, "learning_rate": 4.6383203275030996e-05, "loss": 0.9596, "step": 5935 }, { "epoch": 0.69, "grad_norm": 0.9352875952310603, "learning_rate": 4.6377258386502956e-05, "loss": 1.0381, "step": 5940 }, { "epoch": 0.69, "grad_norm": 0.9408422310720795, "learning_rate": 4.6371308997838107e-05, "loss": 0.9698, "step": 5945 }, { "epoch": 0.7, "grad_norm": 1.066883861343165, "learning_rate": 4.6365355110288845e-05, "loss": 0.918, "step": 5950 }, { "epoch": 0.7, "grad_norm": 0.881656248780669, "learning_rate": 4.635939672510853e-05, "loss": 0.9496, "step": 5955 }, { "epoch": 0.7, "grad_norm": 0.906309309915082, "learning_rate": 4.635343384355147e-05, "loss": 0.8641, "step": 5960 }, { "epoch": 0.7, "grad_norm": 0.9486582240345701, "learning_rate": 4.6347466466872916e-05, "loss": 0.9471, "step": 5965 }, { "epoch": 0.7, "grad_norm": 0.9448056393967973, "learning_rate": 4.634149459632905e-05, "loss": 0.8951, "step": 5970 }, { "epoch": 0.7, "grad_norm": 0.8680934354031703, "learning_rate": 4.633551823317702e-05, "loss": 0.8387, "step": 5975 }, { "epoch": 0.7, "grad_norm": 1.0298181055442204, "learning_rate": 4.6329537378674914e-05, "loss": 0.8783, "step": 5980 }, { "epoch": 0.7, "grad_norm": 1.1161018549931019, "learning_rate": 4.6323552034081773e-05, "loss": 0.8897, "step": 5985 }, { "epoch": 0.7, "grad_norm": 0.8837510703914684, "learning_rate": 4.631756220065756e-05, "loss": 0.9294, "step": 5990 }, { "epoch": 0.7, "grad_norm": 0.8860897108704975, "learning_rate": 4.6311567879663206e-05, "loss": 0.867, "step": 5995 }, { "epoch": 0.7, "grad_norm": 0.9029700041572566, "learning_rate": 4.6305569072360576e-05, "loss": 0.8449, "step": 6000 }, { "epoch": 0.7, "grad_norm": 0.9428846895181254, "learning_rate": 4.629956578001248e-05, "loss": 0.9825, "step": 6005 }, { "epoch": 0.7, "grad_norm": 0.9153696985371471, "learning_rate": 4.6293558003882676e-05, "loss": 0.804, "step": 6010 }, { "epoch": 0.7, "grad_norm": 0.8585207867675193, "learning_rate": 4.6287545745235874e-05, "loss": 0.878, "step": 6015 }, { "epoch": 0.7, "grad_norm": 1.21832783563486, "learning_rate": 4.62815290053377e-05, "loss": 0.9089, "step": 6020 }, { "epoch": 0.7, "grad_norm": 0.9377488059388995, "learning_rate": 4.627550778545474e-05, "loss": 1.0253, "step": 6025 }, { "epoch": 0.7, "grad_norm": 0.9992394738506685, "learning_rate": 4.626948208685454e-05, "loss": 0.8695, "step": 6030 }, { "epoch": 0.71, "grad_norm": 0.8940893498337988, "learning_rate": 4.6263451910805564e-05, "loss": 0.9124, "step": 6035 }, { "epoch": 0.71, "grad_norm": 0.9109467535072565, "learning_rate": 4.6257417258577226e-05, "loss": 0.8536, "step": 6040 }, { "epoch": 0.71, "grad_norm": 0.87354951406432, "learning_rate": 4.625137813143989e-05, "loss": 0.8777, "step": 6045 }, { "epoch": 0.71, "grad_norm": 0.9320667565405548, "learning_rate": 4.624533453066485e-05, "loss": 0.9779, "step": 6050 }, { "epoch": 0.71, "grad_norm": 0.9184223567517243, "learning_rate": 4.623928645752434e-05, "loss": 0.9153, "step": 6055 }, { "epoch": 0.71, "grad_norm": 0.9068618601226535, "learning_rate": 4.6233233913291564e-05, "loss": 0.9139, "step": 6060 }, { "epoch": 0.71, "grad_norm": 0.8831840608811291, "learning_rate": 4.622717689924062e-05, "loss": 0.8134, "step": 6065 }, { "epoch": 0.71, "grad_norm": 0.9251633590782754, "learning_rate": 4.6221115416646594e-05, "loss": 0.9339, "step": 6070 }, { "epoch": 0.71, "grad_norm": 0.9243066351426512, "learning_rate": 4.621504946678548e-05, "loss": 0.8668, "step": 6075 }, { "epoch": 0.71, "grad_norm": 0.8835072500508083, "learning_rate": 4.620897905093423e-05, "loss": 0.9419, "step": 6080 }, { "epoch": 0.71, "grad_norm": 1.1830639824657618, "learning_rate": 4.6202904170370725e-05, "loss": 0.92, "step": 6085 }, { "epoch": 0.71, "grad_norm": 1.0377641678174525, "learning_rate": 4.61968248263738e-05, "loss": 0.8622, "step": 6090 }, { "epoch": 0.71, "grad_norm": 0.8221855416618498, "learning_rate": 4.619074102022321e-05, "loss": 0.879, "step": 6095 }, { "epoch": 0.71, "grad_norm": 1.1236408060018133, "learning_rate": 4.6184652753199654e-05, "loss": 0.8666, "step": 6100 }, { "epoch": 0.71, "grad_norm": 1.0546708431225267, "learning_rate": 4.6178560026584796e-05, "loss": 0.9434, "step": 6105 }, { "epoch": 0.71, "grad_norm": 1.1096496852030064, "learning_rate": 4.617246284166121e-05, "loss": 0.975, "step": 6110 }, { "epoch": 0.71, "grad_norm": 0.9842139713889004, "learning_rate": 4.616636119971241e-05, "loss": 0.8923, "step": 6115 }, { "epoch": 0.72, "grad_norm": 1.1695387595408357, "learning_rate": 4.6160255102022865e-05, "loss": 0.8549, "step": 6120 }, { "epoch": 0.72, "grad_norm": 0.9674107209626502, "learning_rate": 4.6154144549877966e-05, "loss": 0.8989, "step": 6125 }, { "epoch": 0.72, "grad_norm": 1.1585208366143323, "learning_rate": 4.614802954456405e-05, "loss": 0.8816, "step": 6130 }, { "epoch": 0.72, "grad_norm": 0.9408702981592619, "learning_rate": 4.61419100873684e-05, "loss": 0.8636, "step": 6135 }, { "epoch": 0.72, "grad_norm": 0.8983370202177836, "learning_rate": 4.613578617957921e-05, "loss": 0.8763, "step": 6140 }, { "epoch": 0.72, "grad_norm": 1.097332295548343, "learning_rate": 4.6129657822485637e-05, "loss": 0.8812, "step": 6145 }, { "epoch": 0.72, "grad_norm": 0.8769493817593639, "learning_rate": 4.6123525017377757e-05, "loss": 0.8373, "step": 6150 }, { "epoch": 0.72, "grad_norm": 0.9922450337690748, "learning_rate": 4.611738776554659e-05, "loss": 0.8961, "step": 6155 }, { "epoch": 0.72, "grad_norm": 0.8890048430127167, "learning_rate": 4.611124606828411e-05, "loss": 0.8995, "step": 6160 }, { "epoch": 0.72, "grad_norm": 0.8768629434253079, "learning_rate": 4.610509992688319e-05, "loss": 0.8798, "step": 6165 }, { "epoch": 0.72, "grad_norm": 1.0440007922113836, "learning_rate": 4.6098949342637655e-05, "loss": 0.8847, "step": 6170 }, { "epoch": 0.72, "grad_norm": 1.0012655568302622, "learning_rate": 4.6092794316842266e-05, "loss": 0.9017, "step": 6175 }, { "epoch": 0.72, "grad_norm": 0.9532168138198784, "learning_rate": 4.6086634850792743e-05, "loss": 0.8492, "step": 6180 }, { "epoch": 0.72, "grad_norm": 0.8829541753778574, "learning_rate": 4.608047094578569e-05, "loss": 0.8476, "step": 6185 }, { "epoch": 0.72, "grad_norm": 0.8695714693195097, "learning_rate": 4.60743026031187e-05, "loss": 0.8765, "step": 6190 }, { "epoch": 0.72, "grad_norm": 0.9128963621393668, "learning_rate": 4.606812982409025e-05, "loss": 0.9509, "step": 6195 }, { "epoch": 0.72, "grad_norm": 0.8859489839261778, "learning_rate": 4.606195260999979e-05, "loss": 0.8819, "step": 6200 }, { "epoch": 0.72, "grad_norm": 0.9390059600209083, "learning_rate": 4.6055770962147684e-05, "loss": 0.9154, "step": 6205 }, { "epoch": 0.73, "grad_norm": 0.9243235446324867, "learning_rate": 4.604958488183523e-05, "loss": 0.8912, "step": 6210 }, { "epoch": 0.73, "grad_norm": 0.9729508229223045, "learning_rate": 4.6043394370364665e-05, "loss": 0.8596, "step": 6215 }, { "epoch": 0.73, "grad_norm": 0.8484031459132618, "learning_rate": 4.603719942903916e-05, "loss": 0.9308, "step": 6220 }, { "epoch": 0.73, "grad_norm": 0.9665400919268459, "learning_rate": 4.603100005916281e-05, "loss": 0.9341, "step": 6225 }, { "epoch": 0.73, "grad_norm": 0.8432324874952188, "learning_rate": 4.6024796262040656e-05, "loss": 0.9389, "step": 6230 }, { "epoch": 0.73, "grad_norm": 1.0460759951319094, "learning_rate": 4.601858803897865e-05, "loss": 0.9347, "step": 6235 }, { "epoch": 0.73, "grad_norm": 0.816847428520859, "learning_rate": 4.6012375391283704e-05, "loss": 0.8765, "step": 6240 }, { "epoch": 0.73, "grad_norm": 1.035963830805095, "learning_rate": 4.6006158320263623e-05, "loss": 0.903, "step": 6245 }, { "epoch": 0.73, "grad_norm": 0.9084377151112936, "learning_rate": 4.5999936827227174e-05, "loss": 0.9154, "step": 6250 }, { "epoch": 0.73, "grad_norm": 0.9251730841669199, "learning_rate": 4.5993710913484065e-05, "loss": 0.9767, "step": 6255 }, { "epoch": 0.73, "grad_norm": 0.9461081359999874, "learning_rate": 4.5987480580344886e-05, "loss": 0.8985, "step": 6260 }, { "epoch": 0.73, "grad_norm": 0.9366044223690984, "learning_rate": 4.598124582912121e-05, "loss": 0.9089, "step": 6265 }, { "epoch": 0.73, "grad_norm": 0.8806746000216982, "learning_rate": 4.5975006661125515e-05, "loss": 0.8571, "step": 6270 }, { "epoch": 0.73, "grad_norm": 0.8890877537842857, "learning_rate": 4.59687630776712e-05, "loss": 0.8431, "step": 6275 }, { "epoch": 0.73, "grad_norm": 0.875535656849131, "learning_rate": 4.596251508007261e-05, "loss": 0.8789, "step": 6280 }, { "epoch": 0.73, "grad_norm": 0.8622018079377948, "learning_rate": 4.5956262669645026e-05, "loss": 0.8664, "step": 6285 }, { "epoch": 0.73, "grad_norm": 0.8807128797583802, "learning_rate": 4.595000584770462e-05, "loss": 0.9086, "step": 6290 }, { "epoch": 0.74, "grad_norm": 0.8197993515527898, "learning_rate": 4.594374461556854e-05, "loss": 0.8242, "step": 6295 }, { "epoch": 0.74, "grad_norm": 0.9044114149932014, "learning_rate": 4.593747897455484e-05, "loss": 0.84, "step": 6300 }, { "epoch": 0.74, "grad_norm": 0.844962428245253, "learning_rate": 4.593120892598249e-05, "loss": 0.9456, "step": 6305 }, { "epoch": 0.74, "grad_norm": 0.9007442506130924, "learning_rate": 4.592493447117141e-05, "loss": 0.8419, "step": 6310 }, { "epoch": 0.74, "grad_norm": 0.9295044964217902, "learning_rate": 4.591865561144244e-05, "loss": 0.9004, "step": 6315 }, { "epoch": 0.74, "grad_norm": 0.9626186549600156, "learning_rate": 4.5912372348117336e-05, "loss": 0.8287, "step": 6320 }, { "epoch": 0.74, "grad_norm": 0.9007240148017548, "learning_rate": 4.59060846825188e-05, "loss": 0.9189, "step": 6325 }, { "epoch": 0.74, "grad_norm": 0.9268166833165955, "learning_rate": 4.589979261597045e-05, "loss": 0.8387, "step": 6330 }, { "epoch": 0.74, "grad_norm": 1.2251278176367635, "learning_rate": 4.5893496149796834e-05, "loss": 0.882, "step": 6335 }, { "epoch": 0.74, "grad_norm": 0.8273194127826791, "learning_rate": 4.588719528532342e-05, "loss": 0.7887, "step": 6340 }, { "epoch": 0.74, "grad_norm": 1.000549915201785, "learning_rate": 4.58808900238766e-05, "loss": 0.985, "step": 6345 }, { "epoch": 0.74, "grad_norm": 0.9529477105878277, "learning_rate": 4.5874580366783706e-05, "loss": 0.88, "step": 6350 }, { "epoch": 0.74, "grad_norm": 0.8475429884907233, "learning_rate": 4.586826631537299e-05, "loss": 0.8077, "step": 6355 }, { "epoch": 0.74, "grad_norm": 0.8431966618407057, "learning_rate": 4.586194787097362e-05, "loss": 0.8776, "step": 6360 }, { "epoch": 0.74, "grad_norm": 0.8818010759884471, "learning_rate": 4.58556250349157e-05, "loss": 0.8218, "step": 6365 }, { "epoch": 0.74, "grad_norm": 0.9719406565621362, "learning_rate": 4.584929780853023e-05, "loss": 0.9744, "step": 6370 }, { "epoch": 0.74, "grad_norm": 0.8169917993462518, "learning_rate": 4.5842966193149196e-05, "loss": 0.8829, "step": 6375 }, { "epoch": 0.75, "grad_norm": 0.8964425215861298, "learning_rate": 4.583663019010545e-05, "loss": 0.9191, "step": 6380 }, { "epoch": 0.75, "grad_norm": 0.9222675607785201, "learning_rate": 4.5830289800732775e-05, "loss": 0.8921, "step": 6385 }, { "epoch": 0.75, "grad_norm": 0.9528886187326482, "learning_rate": 4.582394502636591e-05, "loss": 0.8363, "step": 6390 }, { "epoch": 0.75, "grad_norm": 0.8767551135711331, "learning_rate": 4.581759586834049e-05, "loss": 0.9017, "step": 6395 }, { "epoch": 0.75, "grad_norm": 0.8796793033406571, "learning_rate": 4.5811242327993066e-05, "loss": 0.9381, "step": 6400 }, { "epoch": 0.75, "grad_norm": 0.869895599396756, "learning_rate": 4.5804884406661143e-05, "loss": 0.961, "step": 6405 }, { "epoch": 0.75, "grad_norm": 0.8944114677302881, "learning_rate": 4.579852210568313e-05, "loss": 0.9182, "step": 6410 }, { "epoch": 0.75, "grad_norm": 1.0548227460991775, "learning_rate": 4.579215542639834e-05, "loss": 0.9077, "step": 6415 }, { "epoch": 0.75, "grad_norm": 0.9862335460732246, "learning_rate": 4.5785784370147036e-05, "loss": 0.8815, "step": 6420 }, { "epoch": 0.75, "grad_norm": 0.9832163639675155, "learning_rate": 4.5779408938270396e-05, "loss": 1.0411, "step": 6425 }, { "epoch": 0.75, "grad_norm": 0.9217683195009689, "learning_rate": 4.577302913211051e-05, "loss": 0.8684, "step": 6430 }, { "epoch": 0.75, "grad_norm": 4.093573529908637, "learning_rate": 4.5766644953010406e-05, "loss": 0.9057, "step": 6435 }, { "epoch": 0.75, "grad_norm": 0.9141308948145531, "learning_rate": 4.5760256402314e-05, "loss": 0.9413, "step": 6440 }, { "epoch": 0.75, "grad_norm": 0.8536925340397831, "learning_rate": 4.575386348136617e-05, "loss": 0.9157, "step": 6445 }, { "epoch": 0.75, "grad_norm": 1.012324444749008, "learning_rate": 4.574746619151267e-05, "loss": 0.8648, "step": 6450 }, { "epoch": 0.75, "grad_norm": 0.8389768064145428, "learning_rate": 4.5741064534100206e-05, "loss": 0.8937, "step": 6455 }, { "epoch": 0.75, "grad_norm": 0.7900443792939897, "learning_rate": 4.573465851047641e-05, "loss": 0.909, "step": 6460 }, { "epoch": 0.76, "grad_norm": 0.8103292562631329, "learning_rate": 4.57282481219898e-05, "loss": 0.9096, "step": 6465 }, { "epoch": 0.76, "grad_norm": 0.8431917074010248, "learning_rate": 4.572183336998983e-05, "loss": 0.8685, "step": 6470 }, { "epoch": 0.76, "grad_norm": 1.0345249342212555, "learning_rate": 4.571541425582688e-05, "loss": 0.8977, "step": 6475 }, { "epoch": 0.76, "grad_norm": 0.8997773810571221, "learning_rate": 4.570899078085223e-05, "loss": 0.9831, "step": 6480 }, { "epoch": 0.76, "grad_norm": 1.133684431280131, "learning_rate": 4.5702562946418096e-05, "loss": 0.9852, "step": 6485 }, { "epoch": 0.76, "grad_norm": 0.8515355959743727, "learning_rate": 4.5696130753877605e-05, "loss": 0.8296, "step": 6490 }, { "epoch": 0.76, "grad_norm": 0.881822819644578, "learning_rate": 4.568969420458481e-05, "loss": 0.8074, "step": 6495 }, { "epoch": 0.76, "grad_norm": 0.9981784732119469, "learning_rate": 4.568325329989466e-05, "loss": 0.9014, "step": 6500 }, { "epoch": 0.76, "grad_norm": 0.9708027233798908, "learning_rate": 4.567680804116302e-05, "loss": 0.9014, "step": 6505 }, { "epoch": 0.76, "grad_norm": 0.9602401756058483, "learning_rate": 4.567035842974672e-05, "loss": 0.9401, "step": 6510 }, { "epoch": 0.76, "grad_norm": 0.8881231641217178, "learning_rate": 4.566390446700343e-05, "loss": 0.9916, "step": 6515 }, { "epoch": 0.76, "grad_norm": 1.012553516599443, "learning_rate": 4.565744615429181e-05, "loss": 0.8627, "step": 6520 }, { "epoch": 0.76, "grad_norm": 0.9668416999748121, "learning_rate": 4.565098349297139e-05, "loss": 0.9217, "step": 6525 }, { "epoch": 0.76, "grad_norm": 1.1165722314674247, "learning_rate": 4.564451648440262e-05, "loss": 0.8758, "step": 6530 }, { "epoch": 0.76, "grad_norm": 1.0443461673657293, "learning_rate": 4.5638045129946884e-05, "loss": 0.8174, "step": 6535 }, { "epoch": 0.76, "grad_norm": 1.0005526034038386, "learning_rate": 4.563156943096647e-05, "loss": 0.8884, "step": 6540 }, { "epoch": 0.76, "grad_norm": 0.8738596141005056, "learning_rate": 4.5625089388824574e-05, "loss": 0.7904, "step": 6545 }, { "epoch": 0.77, "grad_norm": 0.8205869197196017, "learning_rate": 4.5618605004885304e-05, "loss": 0.822, "step": 6550 }, { "epoch": 0.77, "grad_norm": 0.993592490390536, "learning_rate": 4.561211628051372e-05, "loss": 0.8492, "step": 6555 }, { "epoch": 0.77, "grad_norm": 1.0526138718755598, "learning_rate": 4.560562321707574e-05, "loss": 0.8476, "step": 6560 }, { "epoch": 0.77, "grad_norm": 1.0509429220233961, "learning_rate": 4.559912581593823e-05, "loss": 0.8446, "step": 6565 }, { "epoch": 0.77, "grad_norm": 0.9261331908819719, "learning_rate": 4.559262407846896e-05, "loss": 0.8301, "step": 6570 }, { "epoch": 0.77, "grad_norm": 0.8656335970010844, "learning_rate": 4.5586118006036624e-05, "loss": 0.8711, "step": 6575 }, { "epoch": 0.77, "grad_norm": 1.0044467466466536, "learning_rate": 4.55796076000108e-05, "loss": 0.9174, "step": 6580 }, { "epoch": 0.77, "grad_norm": 0.8599510579625476, "learning_rate": 4.557309286176202e-05, "loss": 0.9533, "step": 6585 }, { "epoch": 0.77, "grad_norm": 0.9141088247845843, "learning_rate": 4.556657379266168e-05, "loss": 0.9352, "step": 6590 }, { "epoch": 0.77, "grad_norm": 0.8477613746736062, "learning_rate": 4.556005039408212e-05, "loss": 0.8875, "step": 6595 }, { "epoch": 0.77, "grad_norm": 0.867459635396512, "learning_rate": 4.5553522667396596e-05, "loss": 0.8872, "step": 6600 }, { "epoch": 0.77, "grad_norm": 0.9365938449215916, "learning_rate": 4.5546990613979254e-05, "loss": 0.941, "step": 6605 }, { "epoch": 0.77, "grad_norm": 0.9362390934799447, "learning_rate": 4.5540454235205155e-05, "loss": 0.9718, "step": 6610 }, { "epoch": 0.77, "grad_norm": 0.8240309388980643, "learning_rate": 4.553391353245028e-05, "loss": 0.8478, "step": 6615 }, { "epoch": 0.77, "grad_norm": 0.9416357888311052, "learning_rate": 4.552736850709152e-05, "loss": 0.8707, "step": 6620 }, { "epoch": 0.77, "grad_norm": 0.9642286942572582, "learning_rate": 4.5520819160506663e-05, "loss": 0.8754, "step": 6625 }, { "epoch": 0.77, "grad_norm": 0.9399405270703323, "learning_rate": 4.551426549407442e-05, "loss": 0.9623, "step": 6630 }, { "epoch": 0.78, "grad_norm": 0.920686265338966, "learning_rate": 4.55077075091744e-05, "loss": 0.8966, "step": 6635 }, { "epoch": 0.78, "grad_norm": 0.8855940071389286, "learning_rate": 4.5501145207187146e-05, "loss": 1.0745, "step": 6640 }, { "epoch": 0.78, "grad_norm": 0.8723032412287673, "learning_rate": 4.5494578589494067e-05, "loss": 0.8585, "step": 6645 }, { "epoch": 0.78, "grad_norm": 0.8200793561196714, "learning_rate": 4.548800765747752e-05, "loss": 0.9757, "step": 6650 }, { "epoch": 0.78, "grad_norm": 0.8927281867615093, "learning_rate": 4.548143241252075e-05, "loss": 0.9107, "step": 6655 }, { "epoch": 0.78, "grad_norm": 0.9292826126834808, "learning_rate": 4.547485285600791e-05, "loss": 0.8477, "step": 6660 }, { "epoch": 0.78, "grad_norm": 0.9861904227565962, "learning_rate": 4.546826898932408e-05, "loss": 0.8929, "step": 6665 }, { "epoch": 0.78, "grad_norm": 0.9585159916124865, "learning_rate": 4.546168081385522e-05, "loss": 0.8093, "step": 6670 }, { "epoch": 0.78, "grad_norm": 0.9577613091747206, "learning_rate": 4.545508833098822e-05, "loss": 0.977, "step": 6675 }, { "epoch": 0.78, "grad_norm": 0.9192788328338644, "learning_rate": 4.544849154211085e-05, "loss": 0.8832, "step": 6680 }, { "epoch": 0.78, "grad_norm": 0.987998066896299, "learning_rate": 4.5441890448611826e-05, "loss": 0.7768, "step": 6685 }, { "epoch": 0.78, "grad_norm": 1.1912815115222373, "learning_rate": 4.543528505188073e-05, "loss": 0.8507, "step": 6690 }, { "epoch": 0.78, "grad_norm": 0.9393612929571797, "learning_rate": 4.542867535330807e-05, "loss": 0.8959, "step": 6695 }, { "epoch": 0.78, "grad_norm": 0.9845511721012468, "learning_rate": 4.542206135428526e-05, "loss": 0.8748, "step": 6700 }, { "epoch": 0.78, "grad_norm": 0.8903440465894508, "learning_rate": 4.541544305620462e-05, "loss": 0.9035, "step": 6705 }, { "epoch": 0.78, "grad_norm": 0.9548946271504166, "learning_rate": 4.5408820460459354e-05, "loss": 0.9212, "step": 6710 }, { "epoch": 0.78, "grad_norm": 0.9030897660349191, "learning_rate": 4.540219356844362e-05, "loss": 0.8025, "step": 6715 }, { "epoch": 0.79, "grad_norm": 0.8424287819579624, "learning_rate": 4.5395562381552414e-05, "loss": 0.7828, "step": 6720 }, { "epoch": 0.79, "grad_norm": 0.8832524513732642, "learning_rate": 4.5388926901181696e-05, "loss": 0.9107, "step": 6725 }, { "epoch": 0.79, "grad_norm": 0.7678228777709971, "learning_rate": 4.538228712872828e-05, "loss": 0.8686, "step": 6730 }, { "epoch": 0.79, "grad_norm": 0.9443007432414015, "learning_rate": 4.5375643065589926e-05, "loss": 0.8986, "step": 6735 }, { "epoch": 0.79, "grad_norm": 0.8166760834680121, "learning_rate": 4.5368994713165274e-05, "loss": 0.8385, "step": 6740 }, { "epoch": 0.79, "grad_norm": 1.0589750260582995, "learning_rate": 4.536234207285387e-05, "loss": 0.872, "step": 6745 }, { "epoch": 0.79, "grad_norm": 0.9670038000292133, "learning_rate": 4.535568514605617e-05, "loss": 0.7944, "step": 6750 }, { "epoch": 0.79, "grad_norm": 0.9249180768610487, "learning_rate": 4.534902393417352e-05, "loss": 0.9245, "step": 6755 }, { "epoch": 0.79, "grad_norm": 0.9667722628382301, "learning_rate": 4.534235843860818e-05, "loss": 0.9252, "step": 6760 }, { "epoch": 0.79, "grad_norm": 0.9510002135775978, "learning_rate": 4.5335688660763295e-05, "loss": 0.8906, "step": 6765 }, { "epoch": 0.79, "grad_norm": 0.9595636576431867, "learning_rate": 4.532901460204295e-05, "loss": 0.885, "step": 6770 }, { "epoch": 0.79, "grad_norm": 0.8287853742632969, "learning_rate": 4.5322336263852074e-05, "loss": 0.8593, "step": 6775 }, { "epoch": 0.79, "grad_norm": 0.9499119473730897, "learning_rate": 4.5315653647596535e-05, "loss": 0.8761, "step": 6780 }, { "epoch": 0.79, "grad_norm": 0.906177355917264, "learning_rate": 4.530896675468311e-05, "loss": 0.8467, "step": 6785 }, { "epoch": 0.79, "grad_norm": 0.9552272331103624, "learning_rate": 4.530227558651944e-05, "loss": 0.9359, "step": 6790 }, { "epoch": 0.79, "grad_norm": 0.9837491128468989, "learning_rate": 4.52955801445141e-05, "loss": 0.898, "step": 6795 }, { "epoch": 0.79, "grad_norm": 1.0734456804213968, "learning_rate": 4.528888043007654e-05, "loss": 0.8603, "step": 6800 }, { "epoch": 0.8, "grad_norm": 0.8189829717059002, "learning_rate": 4.528217644461713e-05, "loss": 0.8911, "step": 6805 }, { "epoch": 0.8, "grad_norm": 1.0087337719968905, "learning_rate": 4.527546818954712e-05, "loss": 0.973, "step": 6810 }, { "epoch": 0.8, "grad_norm": 0.9127711731357061, "learning_rate": 4.526875566627867e-05, "loss": 0.8793, "step": 6815 }, { "epoch": 0.8, "grad_norm": 0.8172810337144737, "learning_rate": 4.5262038876224845e-05, "loss": 0.8434, "step": 6820 }, { "epoch": 0.8, "grad_norm": 1.0137957503406376, "learning_rate": 4.525531782079959e-05, "loss": 0.9221, "step": 6825 }, { "epoch": 0.8, "grad_norm": 0.8665790946319983, "learning_rate": 4.5248592501417766e-05, "loss": 0.9085, "step": 6830 }, { "epoch": 0.8, "grad_norm": 0.7968224632190943, "learning_rate": 4.524186291949511e-05, "loss": 0.818, "step": 6835 }, { "epoch": 0.8, "grad_norm": 0.8617178046624142, "learning_rate": 4.523512907644828e-05, "loss": 0.8725, "step": 6840 }, { "epoch": 0.8, "grad_norm": 0.8616638385384007, "learning_rate": 4.522839097369482e-05, "loss": 0.8988, "step": 6845 }, { "epoch": 0.8, "grad_norm": 0.9129405302298863, "learning_rate": 4.522164861265317e-05, "loss": 0.8525, "step": 6850 }, { "epoch": 0.8, "grad_norm": 0.993762251834204, "learning_rate": 4.521490199474266e-05, "loss": 0.9097, "step": 6855 }, { "epoch": 0.8, "grad_norm": 0.8627959591614259, "learning_rate": 4.5208151121383546e-05, "loss": 0.9018, "step": 6860 }, { "epoch": 0.8, "grad_norm": 0.8772948923706035, "learning_rate": 4.520139599399693e-05, "loss": 0.8907, "step": 6865 }, { "epoch": 0.8, "grad_norm": 0.9281018046141196, "learning_rate": 4.519463661400485e-05, "loss": 0.8834, "step": 6870 }, { "epoch": 0.8, "grad_norm": 1.0879450505596755, "learning_rate": 4.518787298283023e-05, "loss": 0.8029, "step": 6875 }, { "epoch": 0.8, "grad_norm": 0.964585574353663, "learning_rate": 4.518110510189688e-05, "loss": 0.833, "step": 6880 }, { "epoch": 0.8, "grad_norm": 1.0101205874621093, "learning_rate": 4.517433297262951e-05, "loss": 0.9045, "step": 6885 }, { "epoch": 0.81, "grad_norm": 0.9226063392721335, "learning_rate": 4.516755659645372e-05, "loss": 0.872, "step": 6890 }, { "epoch": 0.81, "grad_norm": 0.8649322144361529, "learning_rate": 4.5160775974796024e-05, "loss": 0.8576, "step": 6895 }, { "epoch": 0.81, "grad_norm": 0.8894395684373384, "learning_rate": 4.5153991109083793e-05, "loss": 0.9085, "step": 6900 }, { "epoch": 0.81, "grad_norm": 1.0986583258385774, "learning_rate": 4.514720200074532e-05, "loss": 0.8539, "step": 6905 }, { "epoch": 0.81, "grad_norm": 0.821887475104247, "learning_rate": 4.5140408651209786e-05, "loss": 0.8554, "step": 6910 }, { "epoch": 0.81, "grad_norm": 0.8520470635574051, "learning_rate": 4.513361106190727e-05, "loss": 0.8537, "step": 6915 }, { "epoch": 0.81, "grad_norm": 0.9575081196572843, "learning_rate": 4.512680923426872e-05, "loss": 0.8286, "step": 6920 }, { "epoch": 0.81, "grad_norm": 0.8687385291363767, "learning_rate": 4.512000316972601e-05, "loss": 0.8684, "step": 6925 }, { "epoch": 0.81, "grad_norm": 0.8291128300019188, "learning_rate": 4.5113192869711867e-05, "loss": 0.866, "step": 6930 }, { "epoch": 0.81, "grad_norm": 0.9040886126399529, "learning_rate": 4.510637833565995e-05, "loss": 0.892, "step": 6935 }, { "epoch": 0.81, "grad_norm": 0.9257901574732298, "learning_rate": 4.509955956900477e-05, "loss": 0.8653, "step": 6940 }, { "epoch": 0.81, "grad_norm": 0.8725862551744366, "learning_rate": 4.509273657118176e-05, "loss": 0.8321, "step": 6945 }, { "epoch": 0.81, "grad_norm": 0.9536244255736042, "learning_rate": 4.508590934362724e-05, "loss": 0.8459, "step": 6950 }, { "epoch": 0.81, "grad_norm": 0.9536386407407413, "learning_rate": 4.50790778877784e-05, "loss": 0.9171, "step": 6955 }, { "epoch": 0.81, "grad_norm": 1.2092264578524776, "learning_rate": 4.5072242205073344e-05, "loss": 0.9571, "step": 6960 }, { "epoch": 0.81, "grad_norm": 0.9257429332429866, "learning_rate": 4.5065402296951045e-05, "loss": 0.8044, "step": 6965 }, { "epoch": 0.81, "grad_norm": 0.937915435848398, "learning_rate": 4.505855816485137e-05, "loss": 0.8464, "step": 6970 }, { "epoch": 0.81, "grad_norm": 0.8423602784480106, "learning_rate": 4.50517098102151e-05, "loss": 0.8331, "step": 6975 }, { "epoch": 0.82, "grad_norm": 0.9221368086856745, "learning_rate": 4.5044857234483875e-05, "loss": 0.8868, "step": 6980 }, { "epoch": 0.82, "grad_norm": 0.8778849955736138, "learning_rate": 4.5038000439100225e-05, "loss": 0.8345, "step": 6985 }, { "epoch": 0.82, "grad_norm": 0.851903570309572, "learning_rate": 4.5031139425507594e-05, "loss": 0.9198, "step": 6990 }, { "epoch": 0.82, "grad_norm": 0.849994639818449, "learning_rate": 4.502427419515029e-05, "loss": 0.8434, "step": 6995 }, { "epoch": 0.82, "grad_norm": 0.9802392034633585, "learning_rate": 4.501740474947351e-05, "loss": 0.7763, "step": 7000 }, { "epoch": 0.82, "grad_norm": 0.9829221380810969, "learning_rate": 4.501053108992336e-05, "loss": 0.8766, "step": 7005 }, { "epoch": 0.82, "grad_norm": 1.0364018301876563, "learning_rate": 4.50036532179468e-05, "loss": 0.947, "step": 7010 }, { "epoch": 0.82, "grad_norm": 0.894787156679939, "learning_rate": 4.49967711349917e-05, "loss": 0.8863, "step": 7015 }, { "epoch": 0.82, "grad_norm": 0.8963814443599233, "learning_rate": 4.498988484250681e-05, "loss": 0.8614, "step": 7020 }, { "epoch": 0.82, "grad_norm": 0.9396352911688199, "learning_rate": 4.498299434194177e-05, "loss": 0.9108, "step": 7025 }, { "epoch": 0.82, "grad_norm": 1.1286919734957201, "learning_rate": 4.49760996347471e-05, "loss": 0.843, "step": 7030 }, { "epoch": 0.82, "grad_norm": 0.8928371685122503, "learning_rate": 4.496920072237421e-05, "loss": 0.8339, "step": 7035 }, { "epoch": 0.82, "grad_norm": 0.8248669470936781, "learning_rate": 4.496229760627539e-05, "loss": 0.9174, "step": 7040 }, { "epoch": 0.82, "grad_norm": 0.8887586721124235, "learning_rate": 4.495539028790382e-05, "loss": 0.8729, "step": 7045 }, { "epoch": 0.82, "grad_norm": 0.8319933863371728, "learning_rate": 4.494847876871357e-05, "loss": 0.857, "step": 7050 }, { "epoch": 0.82, "grad_norm": 0.8338766329165384, "learning_rate": 4.494156305015957e-05, "loss": 0.7854, "step": 7055 }, { "epoch": 0.82, "grad_norm": 1.0364781233200655, "learning_rate": 4.493464313369767e-05, "loss": 0.8082, "step": 7060 }, { "epoch": 0.83, "grad_norm": 0.9106671310578824, "learning_rate": 4.4927719020784575e-05, "loss": 0.9232, "step": 7065 }, { "epoch": 0.83, "grad_norm": 0.9719011131736461, "learning_rate": 4.492079071287789e-05, "loss": 0.8632, "step": 7070 }, { "epoch": 0.83, "grad_norm": 0.8383111219368286, "learning_rate": 4.491385821143608e-05, "loss": 0.8703, "step": 7075 }, { "epoch": 0.83, "grad_norm": 0.9990005730927887, "learning_rate": 4.490692151791853e-05, "loss": 0.8291, "step": 7080 }, { "epoch": 0.83, "grad_norm": 0.8941197465996347, "learning_rate": 4.489998063378547e-05, "loss": 0.8375, "step": 7085 }, { "epoch": 0.83, "grad_norm": 0.9295576392869906, "learning_rate": 4.489303556049804e-05, "loss": 0.8976, "step": 7090 }, { "epoch": 0.83, "grad_norm": 1.0573081770403099, "learning_rate": 4.488608629951825e-05, "loss": 0.9799, "step": 7095 }, { "epoch": 0.83, "grad_norm": 0.9466852685840387, "learning_rate": 4.487913285230898e-05, "loss": 0.9285, "step": 7100 }, { "epoch": 0.83, "grad_norm": 0.8787468187624394, "learning_rate": 4.4872175220334024e-05, "loss": 0.8556, "step": 7105 }, { "epoch": 0.83, "grad_norm": 0.9993758420568105, "learning_rate": 4.4865213405058014e-05, "loss": 0.9783, "step": 7110 }, { "epoch": 0.83, "grad_norm": 0.9959396538377233, "learning_rate": 4.4858247407946496e-05, "loss": 0.8926, "step": 7115 }, { "epoch": 0.83, "grad_norm": 0.8170150853699522, "learning_rate": 4.4851277230465894e-05, "loss": 0.9501, "step": 7120 }, { "epoch": 0.83, "grad_norm": 0.9171553468384273, "learning_rate": 4.4844302874083486e-05, "loss": 0.9088, "step": 7125 }, { "epoch": 0.83, "grad_norm": 0.7953580046271129, "learning_rate": 4.4837324340267464e-05, "loss": 0.8915, "step": 7130 }, { "epoch": 0.83, "grad_norm": 0.8540800082337093, "learning_rate": 4.483034163048686e-05, "loss": 0.7784, "step": 7135 }, { "epoch": 0.83, "grad_norm": 0.8279915498440072, "learning_rate": 4.4823354746211635e-05, "loss": 0.8532, "step": 7140 }, { "epoch": 0.83, "grad_norm": 0.9172370024750921, "learning_rate": 4.4816363688912576e-05, "loss": 0.8394, "step": 7145 }, { "epoch": 0.84, "grad_norm": 0.8689893295016209, "learning_rate": 4.4809368460061396e-05, "loss": 0.926, "step": 7150 }, { "epoch": 0.84, "grad_norm": 0.98822179973456, "learning_rate": 4.480236906113066e-05, "loss": 0.8924, "step": 7155 }, { "epoch": 0.84, "grad_norm": 0.91796060807761, "learning_rate": 4.4795365493593785e-05, "loss": 0.7828, "step": 7160 }, { "epoch": 0.84, "grad_norm": 0.9321675939799655, "learning_rate": 4.478835775892514e-05, "loss": 0.817, "step": 7165 }, { "epoch": 0.84, "grad_norm": 0.84580744467292, "learning_rate": 4.47813458585999e-05, "loss": 0.9439, "step": 7170 }, { "epoch": 0.84, "grad_norm": 1.076125532310432, "learning_rate": 4.477432979409414e-05, "loss": 0.8454, "step": 7175 }, { "epoch": 0.84, "grad_norm": 0.8852067963073116, "learning_rate": 4.476730956688483e-05, "loss": 0.8063, "step": 7180 }, { "epoch": 0.84, "grad_norm": 0.7138234884476036, "learning_rate": 4.47602851784498e-05, "loss": 0.8239, "step": 7185 }, { "epoch": 0.84, "grad_norm": 0.8755414679994341, "learning_rate": 4.4753256630267746e-05, "loss": 0.9468, "step": 7190 }, { "epoch": 0.84, "grad_norm": 0.8677059920952861, "learning_rate": 4.4746223923818264e-05, "loss": 0.9976, "step": 7195 }, { "epoch": 0.84, "grad_norm": 0.7956263078427499, "learning_rate": 4.47391870605818e-05, "loss": 0.8508, "step": 7200 }, { "epoch": 0.84, "grad_norm": 0.965560728475822, "learning_rate": 4.473214604203969e-05, "loss": 0.8297, "step": 7205 }, { "epoch": 0.84, "grad_norm": 0.863538707034915, "learning_rate": 4.4725100869674155e-05, "loss": 0.933, "step": 7210 }, { "epoch": 0.84, "grad_norm": 0.9302430461321485, "learning_rate": 4.4718051544968266e-05, "loss": 0.8753, "step": 7215 }, { "epoch": 0.84, "grad_norm": 0.8806079690272463, "learning_rate": 4.4710998069405975e-05, "loss": 0.8898, "step": 7220 }, { "epoch": 0.84, "grad_norm": 1.0550357765235905, "learning_rate": 4.470394044447212e-05, "loss": 0.9357, "step": 7225 }, { "epoch": 0.84, "grad_norm": 0.8581816349941516, "learning_rate": 4.469687867165241e-05, "loss": 0.8513, "step": 7230 }, { "epoch": 0.85, "grad_norm": 0.9317271016868308, "learning_rate": 4.468981275243342e-05, "loss": 0.9328, "step": 7235 }, { "epoch": 0.85, "grad_norm": 0.9039812407732404, "learning_rate": 4.468274268830259e-05, "loss": 0.8282, "step": 7240 }, { "epoch": 0.85, "grad_norm": 0.9215722972687789, "learning_rate": 4.467566848074825e-05, "loss": 0.8609, "step": 7245 }, { "epoch": 0.85, "grad_norm": 0.926232072579969, "learning_rate": 4.4668590131259594e-05, "loss": 0.8559, "step": 7250 }, { "epoch": 0.85, "grad_norm": 0.8941784287881752, "learning_rate": 4.4661507641326685e-05, "loss": 0.8695, "step": 7255 }, { "epoch": 0.85, "grad_norm": 0.9449107847537849, "learning_rate": 4.4654421012440476e-05, "loss": 0.8083, "step": 7260 }, { "epoch": 0.85, "grad_norm": 0.8335736185175803, "learning_rate": 4.464733024609275e-05, "loss": 0.9429, "step": 7265 }, { "epoch": 0.85, "grad_norm": 1.0711296982828185, "learning_rate": 4.464023534377622e-05, "loss": 0.9065, "step": 7270 }, { "epoch": 0.85, "grad_norm": 0.9088398525717711, "learning_rate": 4.46331363069844e-05, "loss": 0.8549, "step": 7275 }, { "epoch": 0.85, "grad_norm": 0.9450818322946247, "learning_rate": 4.462603313721174e-05, "loss": 0.9091, "step": 7280 }, { "epoch": 0.85, "grad_norm": 0.8363897736840603, "learning_rate": 4.461892583595353e-05, "loss": 0.8449, "step": 7285 }, { "epoch": 0.85, "grad_norm": 0.8339214324087547, "learning_rate": 4.461181440470592e-05, "loss": 0.8904, "step": 7290 }, { "epoch": 0.85, "grad_norm": 0.906352616047358, "learning_rate": 4.460469884496595e-05, "loss": 0.8437, "step": 7295 }, { "epoch": 0.85, "grad_norm": 0.8389865386863039, "learning_rate": 4.4597579158231505e-05, "loss": 0.8968, "step": 7300 }, { "epoch": 0.85, "grad_norm": 0.8889636566786414, "learning_rate": 4.4590455346001375e-05, "loss": 0.8225, "step": 7305 }, { "epoch": 0.85, "grad_norm": 0.8750682110510425, "learning_rate": 4.458332740977519e-05, "loss": 0.9207, "step": 7310 }, { "epoch": 0.85, "grad_norm": 0.9855576560554214, "learning_rate": 4.4576195351053444e-05, "loss": 0.8669, "step": 7315 }, { "epoch": 0.86, "grad_norm": 0.993406261887127, "learning_rate": 4.4569059171337514e-05, "loss": 0.8795, "step": 7320 }, { "epoch": 0.86, "grad_norm": 0.8275597830126532, "learning_rate": 4.4561918872129646e-05, "loss": 0.7501, "step": 7325 }, { "epoch": 0.86, "grad_norm": 0.852750929980776, "learning_rate": 4.4554774454932944e-05, "loss": 0.8946, "step": 7330 }, { "epoch": 0.86, "grad_norm": 0.9669793933891, "learning_rate": 4.454762592125139e-05, "loss": 0.8546, "step": 7335 }, { "epoch": 0.86, "grad_norm": 1.4807142343419046, "learning_rate": 4.454047327258982e-05, "loss": 0.9621, "step": 7340 }, { "epoch": 0.86, "grad_norm": 0.8685218519116757, "learning_rate": 4.4533316510453936e-05, "loss": 0.8769, "step": 7345 }, { "epoch": 0.86, "grad_norm": 0.8838099072632095, "learning_rate": 4.452615563635032e-05, "loss": 0.8063, "step": 7350 }, { "epoch": 0.86, "grad_norm": 0.8384367798190346, "learning_rate": 4.451899065178641e-05, "loss": 0.8492, "step": 7355 }, { "epoch": 0.86, "grad_norm": 1.0879312532034804, "learning_rate": 4.4511821558270505e-05, "loss": 0.8746, "step": 7360 }, { "epoch": 0.86, "grad_norm": 0.8753858386142155, "learning_rate": 4.4504648357311774e-05, "loss": 0.8384, "step": 7365 }, { "epoch": 0.86, "grad_norm": 0.8975403180372352, "learning_rate": 4.449747105042026e-05, "loss": 0.8702, "step": 7370 }, { "epoch": 0.86, "grad_norm": 0.8824452326774359, "learning_rate": 4.449028963910685e-05, "loss": 0.8306, "step": 7375 }, { "epoch": 0.86, "grad_norm": 0.9338620618287319, "learning_rate": 4.4483104124883324e-05, "loss": 0.8762, "step": 7380 }, { "epoch": 0.86, "grad_norm": 0.8917119009385744, "learning_rate": 4.447591450926229e-05, "loss": 0.8558, "step": 7385 }, { "epoch": 0.86, "grad_norm": 0.8499078539903893, "learning_rate": 4.4468720793757236e-05, "loss": 0.8949, "step": 7390 }, { "epoch": 0.86, "grad_norm": 0.9651890648776259, "learning_rate": 4.446152297988252e-05, "loss": 0.908, "step": 7395 }, { "epoch": 0.86, "grad_norm": 0.9031133732989215, "learning_rate": 4.445432106915337e-05, "loss": 0.8266, "step": 7400 }, { "epoch": 0.87, "grad_norm": 0.8490767265607513, "learning_rate": 4.444711506308584e-05, "loss": 0.8585, "step": 7405 }, { "epoch": 0.87, "grad_norm": 0.9354401790724719, "learning_rate": 4.4439904963196894e-05, "loss": 0.8587, "step": 7410 }, { "epoch": 0.87, "grad_norm": 0.8416776015267864, "learning_rate": 4.443269077100431e-05, "loss": 0.8578, "step": 7415 }, { "epoch": 0.87, "grad_norm": 0.9068181958709155, "learning_rate": 4.4425472488026774e-05, "loss": 0.8348, "step": 7420 }, { "epoch": 0.87, "grad_norm": 0.8552300375235387, "learning_rate": 4.44182501157838e-05, "loss": 0.8834, "step": 7425 }, { "epoch": 0.87, "grad_norm": 1.1822478721503025, "learning_rate": 4.4411023655795766e-05, "loss": 0.9594, "step": 7430 }, { "epoch": 0.87, "grad_norm": 0.9337057371607408, "learning_rate": 4.440379310958392e-05, "loss": 0.952, "step": 7435 }, { "epoch": 0.87, "grad_norm": 0.8881575925688209, "learning_rate": 4.439655847867038e-05, "loss": 0.887, "step": 7440 }, { "epoch": 0.87, "grad_norm": 0.8060294523314736, "learning_rate": 4.4389319764578095e-05, "loss": 0.7746, "step": 7445 }, { "epoch": 0.87, "grad_norm": 0.8504355014812155, "learning_rate": 4.43820769688309e-05, "loss": 0.8395, "step": 7450 }, { "epoch": 0.87, "grad_norm": 0.8735277695511526, "learning_rate": 4.4374830092953487e-05, "loss": 0.9266, "step": 7455 }, { "epoch": 0.87, "grad_norm": 0.9524538225370099, "learning_rate": 4.436757913847138e-05, "loss": 0.8189, "step": 7460 }, { "epoch": 0.87, "grad_norm": 0.9359354689109824, "learning_rate": 4.436032410691099e-05, "loss": 0.9346, "step": 7465 }, { "epoch": 0.87, "grad_norm": 0.847021754575933, "learning_rate": 4.4353064999799583e-05, "loss": 0.8342, "step": 7470 }, { "epoch": 0.87, "grad_norm": 1.1582344510460938, "learning_rate": 4.4345801818665266e-05, "loss": 0.9424, "step": 7475 }, { "epoch": 0.87, "grad_norm": 0.9179071600486363, "learning_rate": 4.4338534565037026e-05, "loss": 0.8427, "step": 7480 }, { "epoch": 0.87, "grad_norm": 0.8281592959524418, "learning_rate": 4.433126324044469e-05, "loss": 0.7833, "step": 7485 }, { "epoch": 0.88, "grad_norm": 0.9500621494862264, "learning_rate": 4.4323987846418946e-05, "loss": 0.9238, "step": 7490 }, { "epoch": 0.88, "grad_norm": 0.9553499030283522, "learning_rate": 4.431670838449134e-05, "loss": 0.8584, "step": 7495 }, { "epoch": 0.88, "grad_norm": 0.9655582350564316, "learning_rate": 4.4309424856194285e-05, "loss": 0.8825, "step": 7500 }, { "epoch": 0.88, "grad_norm": 0.8487596128740992, "learning_rate": 4.430213726306103e-05, "loss": 0.9031, "step": 7505 }, { "epoch": 0.88, "grad_norm": 0.9083099187740944, "learning_rate": 4.429484560662569e-05, "loss": 0.9605, "step": 7510 }, { "epoch": 0.88, "grad_norm": 0.9626390301041929, "learning_rate": 4.428754988842324e-05, "loss": 0.8377, "step": 7515 }, { "epoch": 0.88, "grad_norm": 0.8429245794037871, "learning_rate": 4.42802501099895e-05, "loss": 0.9388, "step": 7520 }, { "epoch": 0.88, "grad_norm": 0.8515202658200619, "learning_rate": 4.427294627286116e-05, "loss": 0.906, "step": 7525 }, { "epoch": 0.88, "grad_norm": 0.8423680410003936, "learning_rate": 4.426563837857574e-05, "loss": 0.8997, "step": 7530 }, { "epoch": 0.88, "grad_norm": 0.8455909980356767, "learning_rate": 4.425832642867164e-05, "loss": 0.8226, "step": 7535 }, { "epoch": 0.88, "grad_norm": 1.0586815596505421, "learning_rate": 4.42510104246881e-05, "loss": 0.8735, "step": 7540 }, { "epoch": 0.88, "grad_norm": 0.7989108896023916, "learning_rate": 4.4243690368165206e-05, "loss": 0.7778, "step": 7545 }, { "epoch": 0.88, "grad_norm": 0.9034705300027844, "learning_rate": 4.423636626064391e-05, "loss": 0.8303, "step": 7550 }, { "epoch": 0.88, "grad_norm": 0.8738189650258167, "learning_rate": 4.4229038103666035e-05, "loss": 0.8344, "step": 7555 }, { "epoch": 0.88, "grad_norm": 0.9029087738200049, "learning_rate": 4.4221705898774196e-05, "loss": 0.8146, "step": 7560 }, { "epoch": 0.88, "grad_norm": 0.802702294189799, "learning_rate": 4.421436964751193e-05, "loss": 0.9148, "step": 7565 }, { "epoch": 0.88, "grad_norm": 0.9196903884412462, "learning_rate": 4.420702935142358e-05, "loss": 0.8627, "step": 7570 }, { "epoch": 0.89, "grad_norm": 0.9192381209596086, "learning_rate": 4.419968501205436e-05, "loss": 0.7788, "step": 7575 }, { "epoch": 0.89, "grad_norm": 0.875387380395048, "learning_rate": 4.4192336630950335e-05, "loss": 0.8942, "step": 7580 }, { "epoch": 0.89, "grad_norm": 0.8370622170085728, "learning_rate": 4.418498420965841e-05, "loss": 0.8686, "step": 7585 }, { "epoch": 0.89, "grad_norm": 0.976379331606312, "learning_rate": 4.417762774972634e-05, "loss": 0.905, "step": 7590 }, { "epoch": 0.89, "grad_norm": 0.8351051453852988, "learning_rate": 4.417026725270275e-05, "loss": 0.8752, "step": 7595 }, { "epoch": 0.89, "grad_norm": 0.8640799042921292, "learning_rate": 4.41629027201371e-05, "loss": 0.8371, "step": 7600 }, { "epoch": 0.89, "grad_norm": 0.918241552380792, "learning_rate": 4.415553415357969e-05, "loss": 0.8618, "step": 7605 }, { "epoch": 0.89, "grad_norm": 0.8356063467697566, "learning_rate": 4.414816155458169e-05, "loss": 0.8498, "step": 7610 }, { "epoch": 0.89, "grad_norm": 0.8696933172220548, "learning_rate": 4.4140784924695113e-05, "loss": 0.9145, "step": 7615 }, { "epoch": 0.89, "grad_norm": 0.8086781616429023, "learning_rate": 4.413340426547281e-05, "loss": 0.9038, "step": 7620 }, { "epoch": 0.89, "grad_norm": 0.8995082272653837, "learning_rate": 4.412601957846848e-05, "loss": 0.9295, "step": 7625 }, { "epoch": 0.89, "grad_norm": 0.8417526696960922, "learning_rate": 4.411863086523669e-05, "loss": 0.7611, "step": 7630 }, { "epoch": 0.89, "grad_norm": 0.8964104192476045, "learning_rate": 4.4111238127332846e-05, "loss": 0.7623, "step": 7635 }, { "epoch": 0.89, "grad_norm": 0.8097264311749593, "learning_rate": 4.410384136631317e-05, "loss": 0.7836, "step": 7640 }, { "epoch": 0.89, "grad_norm": 0.8553596703542771, "learning_rate": 4.409644058373479e-05, "loss": 0.81, "step": 7645 }, { "epoch": 0.89, "grad_norm": 0.9262993151525223, "learning_rate": 4.4089035781155624e-05, "loss": 0.9227, "step": 7650 }, { "epoch": 0.89, "grad_norm": 0.8414048889190111, "learning_rate": 4.408162696013447e-05, "loss": 0.9514, "step": 7655 }, { "epoch": 0.89, "grad_norm": 0.9945279706822402, "learning_rate": 4.407421412223097e-05, "loss": 0.9017, "step": 7660 }, { "epoch": 0.9, "grad_norm": 0.8764404857676572, "learning_rate": 4.406679726900559e-05, "loss": 0.8853, "step": 7665 }, { "epoch": 0.9, "grad_norm": 0.9700127757280103, "learning_rate": 4.4059376402019667e-05, "loss": 0.9016, "step": 7670 }, { "epoch": 0.9, "grad_norm": 0.9122234958658786, "learning_rate": 4.405195152283536e-05, "loss": 0.7754, "step": 7675 }, { "epoch": 0.9, "grad_norm": 0.8377198184403536, "learning_rate": 4.4044522633015696e-05, "loss": 0.8706, "step": 7680 }, { "epoch": 0.9, "grad_norm": 0.9509993766392825, "learning_rate": 4.403708973412453e-05, "loss": 0.9026, "step": 7685 }, { "epoch": 0.9, "grad_norm": 0.9211180166402848, "learning_rate": 4.402965282772655e-05, "loss": 0.8235, "step": 7690 }, { "epoch": 0.9, "grad_norm": 0.8863883737860377, "learning_rate": 4.4022211915387324e-05, "loss": 0.8747, "step": 7695 }, { "epoch": 0.9, "grad_norm": 0.9679860825554182, "learning_rate": 4.4014766998673235e-05, "loss": 0.8785, "step": 7700 }, { "epoch": 0.9, "grad_norm": 1.0920781342037216, "learning_rate": 4.400731807915151e-05, "loss": 0.8382, "step": 7705 }, { "epoch": 0.9, "grad_norm": 1.0631999306433024, "learning_rate": 4.399986515839024e-05, "loss": 0.941, "step": 7710 }, { "epoch": 0.9, "grad_norm": 0.9738722037707661, "learning_rate": 4.399240823795832e-05, "loss": 0.9302, "step": 7715 }, { "epoch": 0.9, "grad_norm": 1.0510927259912366, "learning_rate": 4.398494731942553e-05, "loss": 0.8237, "step": 7720 }, { "epoch": 0.9, "grad_norm": 0.9407111049629848, "learning_rate": 4.397748240436246e-05, "loss": 0.9747, "step": 7725 }, { "epoch": 0.9, "grad_norm": 0.7885052263345511, "learning_rate": 4.3970013494340554e-05, "loss": 0.8429, "step": 7730 }, { "epoch": 0.9, "grad_norm": 0.9167767667033337, "learning_rate": 4.39625405909321e-05, "loss": 0.8546, "step": 7735 }, { "epoch": 0.9, "grad_norm": 0.8899655500112281, "learning_rate": 4.395506369571022e-05, "loss": 0.8293, "step": 7740 }, { "epoch": 0.9, "grad_norm": 0.9261869430632528, "learning_rate": 4.394758281024888e-05, "loss": 0.9227, "step": 7745 }, { "epoch": 0.91, "grad_norm": 7.604827012798249, "learning_rate": 4.3940097936122885e-05, "loss": 0.8483, "step": 7750 }, { "epoch": 0.91, "grad_norm": 0.9843785975297291, "learning_rate": 4.393260907490787e-05, "loss": 0.8694, "step": 7755 }, { "epoch": 0.91, "grad_norm": 0.8153554723451127, "learning_rate": 4.392511622818034e-05, "loss": 0.8588, "step": 7760 }, { "epoch": 0.91, "grad_norm": 0.8989001263785075, "learning_rate": 4.39176193975176e-05, "loss": 0.9211, "step": 7765 }, { "epoch": 0.91, "grad_norm": 0.8703437261717202, "learning_rate": 4.391011858449782e-05, "loss": 0.8237, "step": 7770 }, { "epoch": 0.91, "grad_norm": 0.8786086110597998, "learning_rate": 4.390261379069999e-05, "loss": 0.8792, "step": 7775 }, { "epoch": 0.91, "grad_norm": 0.8811646614688738, "learning_rate": 4.389510501770395e-05, "loss": 0.8552, "step": 7780 }, { "epoch": 0.91, "grad_norm": 0.9546398739304482, "learning_rate": 4.388759226709038e-05, "loss": 0.8962, "step": 7785 }, { "epoch": 0.91, "grad_norm": 2.2749201624682613, "learning_rate": 4.38800755404408e-05, "loss": 0.773, "step": 7790 }, { "epoch": 0.91, "grad_norm": 0.9685431429058441, "learning_rate": 4.3872554839337556e-05, "loss": 1.0673, "step": 7795 }, { "epoch": 0.91, "grad_norm": 0.9578475078534364, "learning_rate": 4.386503016536382e-05, "loss": 0.8263, "step": 7800 }, { "epoch": 0.91, "grad_norm": 0.8811396651535431, "learning_rate": 4.385750152010362e-05, "loss": 0.7402, "step": 7805 }, { "epoch": 0.91, "grad_norm": 0.9070274254113131, "learning_rate": 4.384996890514183e-05, "loss": 0.9536, "step": 7810 }, { "epoch": 0.91, "grad_norm": 0.8982033015188957, "learning_rate": 4.3842432322064125e-05, "loss": 0.8949, "step": 7815 }, { "epoch": 0.91, "grad_norm": 0.8615567369881546, "learning_rate": 4.383489177245705e-05, "loss": 0.8619, "step": 7820 }, { "epoch": 0.91, "grad_norm": 0.8434363800544157, "learning_rate": 4.382734725790796e-05, "loss": 0.8753, "step": 7825 }, { "epoch": 0.91, "grad_norm": 1.1535678646719105, "learning_rate": 4.381979878000506e-05, "loss": 0.941, "step": 7830 }, { "epoch": 0.92, "grad_norm": 0.9130487069363806, "learning_rate": 4.381224634033738e-05, "loss": 0.8545, "step": 7835 }, { "epoch": 0.92, "grad_norm": 0.9812289780991902, "learning_rate": 4.380468994049478e-05, "loss": 0.9435, "step": 7840 }, { "epoch": 0.92, "grad_norm": 0.8292687856191864, "learning_rate": 4.379712958206798e-05, "loss": 0.8656, "step": 7845 }, { "epoch": 0.92, "grad_norm": 1.0232887613760528, "learning_rate": 4.37895652666485e-05, "loss": 0.9256, "step": 7850 }, { "epoch": 0.92, "grad_norm": 0.8886137181697363, "learning_rate": 4.378199699582871e-05, "loss": 0.8973, "step": 7855 }, { "epoch": 0.92, "grad_norm": 0.9205648031929387, "learning_rate": 4.377442477120182e-05, "loss": 0.9087, "step": 7860 }, { "epoch": 0.92, "grad_norm": 0.8785491269263982, "learning_rate": 4.3766848594361854e-05, "loss": 0.8481, "step": 7865 }, { "epoch": 0.92, "grad_norm": 0.9416529397448189, "learning_rate": 4.3759268466903675e-05, "loss": 0.8888, "step": 7870 }, { "epoch": 0.92, "grad_norm": 0.8939233801619114, "learning_rate": 4.3751684390422984e-05, "loss": 0.9231, "step": 7875 }, { "epoch": 0.92, "grad_norm": 0.975428929451796, "learning_rate": 4.37440963665163e-05, "loss": 0.8259, "step": 7880 }, { "epoch": 0.92, "grad_norm": 1.037004013318425, "learning_rate": 4.3736504396781e-05, "loss": 0.868, "step": 7885 }, { "epoch": 0.92, "grad_norm": 0.9111686711396364, "learning_rate": 4.372890848281525e-05, "loss": 0.8528, "step": 7890 }, { "epoch": 0.92, "grad_norm": 0.8140355105059677, "learning_rate": 4.3721308626218085e-05, "loss": 0.8923, "step": 7895 }, { "epoch": 0.92, "grad_norm": 0.9158453273251607, "learning_rate": 4.3713704828589346e-05, "loss": 0.8618, "step": 7900 }, { "epoch": 0.92, "grad_norm": 0.9220709512441392, "learning_rate": 4.370609709152972e-05, "loss": 0.8994, "step": 7905 }, { "epoch": 0.92, "grad_norm": 0.9473518294180306, "learning_rate": 4.369848541664071e-05, "loss": 0.878, "step": 7910 }, { "epoch": 0.92, "grad_norm": 0.8482481449234285, "learning_rate": 4.369086980552466e-05, "loss": 0.8266, "step": 7915 }, { "epoch": 0.93, "grad_norm": 0.993146400834012, "learning_rate": 4.368325025978472e-05, "loss": 0.9275, "step": 7920 }, { "epoch": 0.93, "grad_norm": 0.887792878713791, "learning_rate": 4.36756267810249e-05, "loss": 0.7967, "step": 7925 }, { "epoch": 0.93, "grad_norm": 0.8569225427737016, "learning_rate": 4.366799937085002e-05, "loss": 0.7997, "step": 7930 }, { "epoch": 0.93, "grad_norm": 0.830616619635648, "learning_rate": 4.366036803086573e-05, "loss": 0.8733, "step": 7935 }, { "epoch": 0.93, "grad_norm": 1.0218893664993307, "learning_rate": 4.365273276267849e-05, "loss": 0.7861, "step": 7940 }, { "epoch": 0.93, "grad_norm": 0.8766447865684245, "learning_rate": 4.3645093567895626e-05, "loss": 1.2592, "step": 7945 }, { "epoch": 0.93, "grad_norm": 0.7944329052758466, "learning_rate": 4.363745044812526e-05, "loss": 0.7989, "step": 7950 }, { "epoch": 0.93, "grad_norm": 0.8536075854356092, "learning_rate": 4.362980340497635e-05, "loss": 0.8326, "step": 7955 }, { "epoch": 0.93, "grad_norm": 0.7784297862170187, "learning_rate": 4.3622152440058674e-05, "loss": 0.7916, "step": 7960 }, { "epoch": 0.93, "grad_norm": 0.8848112222364299, "learning_rate": 4.361449755498285e-05, "loss": 0.8864, "step": 7965 }, { "epoch": 0.93, "grad_norm": 0.8524859394050325, "learning_rate": 4.36068387513603e-05, "loss": 0.8436, "step": 7970 }, { "epoch": 0.93, "grad_norm": 0.8654517155959764, "learning_rate": 4.359917603080329e-05, "loss": 0.8255, "step": 7975 }, { "epoch": 0.93, "grad_norm": 0.9139644605327134, "learning_rate": 4.35915093949249e-05, "loss": 0.9451, "step": 7980 }, { "epoch": 0.93, "grad_norm": 0.902218266135654, "learning_rate": 4.358383884533903e-05, "loss": 0.946, "step": 7985 }, { "epoch": 0.93, "grad_norm": 0.91475779195718, "learning_rate": 4.3576164383660436e-05, "loss": 0.9333, "step": 7990 }, { "epoch": 0.93, "grad_norm": 0.905054290621748, "learning_rate": 4.356848601150464e-05, "loss": 0.8524, "step": 7995 }, { "epoch": 0.93, "grad_norm": 0.8029074963074335, "learning_rate": 4.3560803730488044e-05, "loss": 0.8801, "step": 8000 }, { "epoch": 0.94, "grad_norm": 0.831531752281927, "learning_rate": 4.355311754222784e-05, "loss": 0.8057, "step": 8005 }, { "epoch": 0.94, "grad_norm": 0.8249539916397222, "learning_rate": 4.354542744834204e-05, "loss": 0.8134, "step": 8010 }, { "epoch": 0.94, "grad_norm": 0.856510386137243, "learning_rate": 4.353773345044951e-05, "loss": 0.8655, "step": 8015 }, { "epoch": 0.94, "grad_norm": 1.0670913060531149, "learning_rate": 4.353003555016991e-05, "loss": 0.9247, "step": 8020 }, { "epoch": 0.94, "grad_norm": 0.9756111892275695, "learning_rate": 4.3522333749123716e-05, "loss": 0.9578, "step": 8025 }, { "epoch": 0.94, "grad_norm": 0.8848304769493224, "learning_rate": 4.3514628048932255e-05, "loss": 0.8264, "step": 8030 }, { "epoch": 0.94, "grad_norm": 0.8748368542147439, "learning_rate": 4.350691845121765e-05, "loss": 0.9501, "step": 8035 }, { "epoch": 0.94, "grad_norm": 0.8974351797393816, "learning_rate": 4.349920495760285e-05, "loss": 0.8997, "step": 8040 }, { "epoch": 0.94, "grad_norm": 1.2611093166185807, "learning_rate": 4.349148756971163e-05, "loss": 0.8797, "step": 8045 }, { "epoch": 0.94, "grad_norm": 1.079256272851702, "learning_rate": 4.3483766289168584e-05, "loss": 0.8593, "step": 8050 }, { "epoch": 0.94, "grad_norm": 0.8340106284949618, "learning_rate": 4.347604111759911e-05, "loss": 0.8293, "step": 8055 }, { "epoch": 0.94, "grad_norm": 0.7998464253661705, "learning_rate": 4.346831205662944e-05, "loss": 0.8679, "step": 8060 }, { "epoch": 0.94, "grad_norm": 0.9589135364911668, "learning_rate": 4.3460579107886645e-05, "loss": 0.929, "step": 8065 }, { "epoch": 0.94, "grad_norm": 1.3926542476798163, "learning_rate": 4.345284227299857e-05, "loss": 0.9015, "step": 8070 }, { "epoch": 0.94, "grad_norm": 1.0190252572650633, "learning_rate": 4.3445101553593905e-05, "loss": 0.9291, "step": 8075 }, { "epoch": 0.94, "grad_norm": 0.8653309228283771, "learning_rate": 4.3437356951302146e-05, "loss": 0.8695, "step": 8080 }, { "epoch": 0.94, "grad_norm": 0.7860877367668022, "learning_rate": 4.342960846775363e-05, "loss": 0.8471, "step": 8085 }, { "epoch": 0.95, "grad_norm": 0.8785542500215634, "learning_rate": 4.342185610457947e-05, "loss": 0.8249, "step": 8090 }, { "epoch": 0.95, "grad_norm": 1.0210060558161365, "learning_rate": 4.3414099863411636e-05, "loss": 0.8873, "step": 8095 }, { "epoch": 0.95, "grad_norm": 0.9000806358746681, "learning_rate": 4.340633974588289e-05, "loss": 0.7748, "step": 8100 }, { "epoch": 0.95, "grad_norm": 0.8143824814684562, "learning_rate": 4.3398575753626836e-05, "loss": 0.8731, "step": 8105 }, { "epoch": 0.95, "grad_norm": 0.8370597099782764, "learning_rate": 4.339080788827785e-05, "loss": 0.8549, "step": 8110 }, { "epoch": 0.95, "grad_norm": 0.8872156968522008, "learning_rate": 4.338303615147117e-05, "loss": 0.8865, "step": 8115 }, { "epoch": 0.95, "grad_norm": 0.9910013496879678, "learning_rate": 4.337526054484281e-05, "loss": 0.8821, "step": 8120 }, { "epoch": 0.95, "grad_norm": 0.8928308931971602, "learning_rate": 4.336748107002964e-05, "loss": 0.9145, "step": 8125 }, { "epoch": 0.95, "grad_norm": 0.9530466754362821, "learning_rate": 4.335969772866929e-05, "loss": 0.875, "step": 8130 }, { "epoch": 0.95, "grad_norm": 0.9007972002630497, "learning_rate": 4.335191052240026e-05, "loss": 0.9073, "step": 8135 }, { "epoch": 0.95, "grad_norm": 0.8315730303923892, "learning_rate": 4.334411945286183e-05, "loss": 0.9835, "step": 8140 }, { "epoch": 0.95, "grad_norm": 0.9965317077573741, "learning_rate": 4.3336324521694104e-05, "loss": 0.8098, "step": 8145 }, { "epoch": 0.95, "grad_norm": 0.8585706172071598, "learning_rate": 4.3328525730538e-05, "loss": 0.9347, "step": 8150 }, { "epoch": 0.95, "grad_norm": 0.8972964846110897, "learning_rate": 4.332072308103523e-05, "loss": 0.8488, "step": 8155 }, { "epoch": 0.95, "grad_norm": 0.8364032291090727, "learning_rate": 4.3312916574828346e-05, "loss": 0.821, "step": 8160 }, { "epoch": 0.95, "grad_norm": 0.9005046384498866, "learning_rate": 4.33051062135607e-05, "loss": 1.0117, "step": 8165 }, { "epoch": 0.95, "grad_norm": 0.8845071675603398, "learning_rate": 4.329729199887644e-05, "loss": 0.8134, "step": 8170 }, { "epoch": 0.96, "grad_norm": 0.7576112709340472, "learning_rate": 4.328947393242056e-05, "loss": 0.807, "step": 8175 }, { "epoch": 0.96, "grad_norm": 0.8334840626733158, "learning_rate": 4.328165201583884e-05, "loss": 0.8181, "step": 8180 }, { "epoch": 0.96, "grad_norm": 0.8981205993340662, "learning_rate": 4.327382625077787e-05, "loss": 0.938, "step": 8185 }, { "epoch": 0.96, "grad_norm": 0.8008175152417177, "learning_rate": 4.3265996638885054e-05, "loss": 0.8813, "step": 8190 }, { "epoch": 0.96, "grad_norm": 0.9899264131841488, "learning_rate": 4.325816318180861e-05, "loss": 0.9119, "step": 8195 }, { "epoch": 0.96, "grad_norm": 0.986877206037316, "learning_rate": 4.325032588119757e-05, "loss": 0.8776, "step": 8200 }, { "epoch": 0.96, "grad_norm": 0.830813507739178, "learning_rate": 4.324248473870176e-05, "loss": 0.8436, "step": 8205 }, { "epoch": 0.96, "grad_norm": 0.9872635561319187, "learning_rate": 4.323463975597182e-05, "loss": 0.9274, "step": 8210 }, { "epoch": 0.96, "grad_norm": 0.9530456433579224, "learning_rate": 4.322679093465921e-05, "loss": 0.9176, "step": 8215 }, { "epoch": 0.96, "grad_norm": 0.9718604669934869, "learning_rate": 4.3218938276416176e-05, "loss": 0.9145, "step": 8220 }, { "epoch": 0.96, "grad_norm": 0.8867516134392972, "learning_rate": 4.32110817828958e-05, "loss": 0.8624, "step": 8225 }, { "epoch": 0.96, "grad_norm": 0.858527411325421, "learning_rate": 4.320322145575194e-05, "loss": 0.8577, "step": 8230 }, { "epoch": 0.96, "grad_norm": 0.8929595856361262, "learning_rate": 4.319535729663929e-05, "loss": 0.8205, "step": 8235 }, { "epoch": 0.96, "grad_norm": 0.8736540470074058, "learning_rate": 4.3187489307213344e-05, "loss": 0.8287, "step": 8240 }, { "epoch": 0.96, "grad_norm": 0.8279054855564584, "learning_rate": 4.317961748913038e-05, "loss": 0.8817, "step": 8245 }, { "epoch": 0.96, "grad_norm": 1.060861338637028, "learning_rate": 4.3171741844047506e-05, "loss": 0.8359, "step": 8250 }, { "epoch": 0.96, "grad_norm": 0.9268994828497623, "learning_rate": 4.316386237362262e-05, "loss": 0.8926, "step": 8255 }, { "epoch": 0.97, "grad_norm": 0.9288009933447495, "learning_rate": 4.315597907951444e-05, "loss": 0.8847, "step": 8260 }, { "epoch": 0.97, "grad_norm": 0.9456603140696496, "learning_rate": 4.314809196338249e-05, "loss": 0.9044, "step": 8265 }, { "epoch": 0.97, "grad_norm": 0.9409157423481455, "learning_rate": 4.3140201026887076e-05, "loss": 0.9362, "step": 8270 }, { "epoch": 0.97, "grad_norm": 1.0359051877183574, "learning_rate": 4.3132306271689325e-05, "loss": 0.918, "step": 8275 }, { "epoch": 0.97, "grad_norm": 0.8632678728285839, "learning_rate": 4.3124407699451174e-05, "loss": 0.9411, "step": 8280 }, { "epoch": 0.97, "grad_norm": 0.9006400051978153, "learning_rate": 4.3116505311835356e-05, "loss": 0.9037, "step": 8285 }, { "epoch": 0.97, "grad_norm": 0.9222179607791998, "learning_rate": 4.3108599110505395e-05, "loss": 0.8709, "step": 8290 }, { "epoch": 0.97, "grad_norm": 0.9194497962870436, "learning_rate": 4.310068909712564e-05, "loss": 0.8563, "step": 8295 }, { "epoch": 0.97, "grad_norm": 0.8560794466219108, "learning_rate": 4.309277527336122e-05, "loss": 0.8541, "step": 8300 }, { "epoch": 0.97, "grad_norm": 0.9134584307097554, "learning_rate": 4.3084857640878085e-05, "loss": 0.937, "step": 8305 }, { "epoch": 0.97, "grad_norm": 0.8631699793571805, "learning_rate": 4.307693620134298e-05, "loss": 0.8839, "step": 8310 }, { "epoch": 0.97, "grad_norm": 0.7938715044186743, "learning_rate": 4.306901095642345e-05, "loss": 0.7466, "step": 8315 }, { "epoch": 0.97, "grad_norm": 0.908922549759451, "learning_rate": 4.306108190778785e-05, "loss": 0.8871, "step": 8320 }, { "epoch": 0.97, "grad_norm": 0.9613961768025776, "learning_rate": 4.305314905710531e-05, "loss": 0.8913, "step": 8325 }, { "epoch": 0.97, "grad_norm": 0.8218487721111957, "learning_rate": 4.304521240604579e-05, "loss": 1.0229, "step": 8330 }, { "epoch": 0.97, "grad_norm": 0.9226161424710482, "learning_rate": 4.3037271956280034e-05, "loss": 0.7477, "step": 8335 }, { "epoch": 0.97, "grad_norm": 1.059244778426306, "learning_rate": 4.3029327709479606e-05, "loss": 0.8783, "step": 8340 }, { "epoch": 0.97, "grad_norm": 0.990054633319157, "learning_rate": 4.3021379667316825e-05, "loss": 0.8801, "step": 8345 }, { "epoch": 0.98, "grad_norm": 0.8454553073110684, "learning_rate": 4.301342783146486e-05, "loss": 0.9038, "step": 8350 }, { "epoch": 0.98, "grad_norm": 1.0051622442985124, "learning_rate": 4.300547220359764e-05, "loss": 0.8841, "step": 8355 }, { "epoch": 0.98, "grad_norm": 1.0767182244496156, "learning_rate": 4.2997512785389924e-05, "loss": 0.865, "step": 8360 }, { "epoch": 0.98, "grad_norm": 0.9068668624822273, "learning_rate": 4.2989549578517235e-05, "loss": 0.9434, "step": 8365 }, { "epoch": 0.98, "grad_norm": 0.9601550402458588, "learning_rate": 4.298158258465592e-05, "loss": 0.8319, "step": 8370 }, { "epoch": 0.98, "grad_norm": 1.1524707054789232, "learning_rate": 4.297361180548313e-05, "loss": 0.9629, "step": 8375 }, { "epoch": 0.98, "grad_norm": 0.86720359093305, "learning_rate": 4.296563724267677e-05, "loss": 0.8412, "step": 8380 }, { "epoch": 0.98, "grad_norm": 1.0625063481591932, "learning_rate": 4.295765889791558e-05, "loss": 0.8373, "step": 8385 }, { "epoch": 0.98, "grad_norm": 0.804245606488221, "learning_rate": 4.2949676772879096e-05, "loss": 0.8035, "step": 8390 }, { "epoch": 0.98, "grad_norm": 0.9797327685527931, "learning_rate": 4.294169086924763e-05, "loss": 0.9291, "step": 8395 }, { "epoch": 0.98, "grad_norm": 0.9440667048256564, "learning_rate": 4.293370118870229e-05, "loss": 0.8395, "step": 8400 }, { "epoch": 0.98, "grad_norm": 0.8492610007357806, "learning_rate": 4.2925707732925e-05, "loss": 0.8207, "step": 8405 }, { "epoch": 0.98, "grad_norm": 0.891723107223008, "learning_rate": 4.2917710503598456e-05, "loss": 0.8967, "step": 8410 }, { "epoch": 0.98, "grad_norm": 1.0062080236250128, "learning_rate": 4.290970950240617e-05, "loss": 0.9174, "step": 8415 }, { "epoch": 0.98, "grad_norm": 0.9200574284219023, "learning_rate": 4.290170473103243e-05, "loss": 0.9054, "step": 8420 }, { "epoch": 0.98, "grad_norm": 0.868072689426447, "learning_rate": 4.289369619116233e-05, "loss": 0.8697, "step": 8425 }, { "epoch": 0.98, "grad_norm": 1.0437361215521352, "learning_rate": 4.288568388448174e-05, "loss": 0.8899, "step": 8430 }, { "epoch": 0.99, "grad_norm": 0.8937510612281192, "learning_rate": 4.2877667812677346e-05, "loss": 0.8259, "step": 8435 }, { "epoch": 0.99, "grad_norm": 0.9579781125129077, "learning_rate": 4.286964797743661e-05, "loss": 0.8266, "step": 8440 }, { "epoch": 0.99, "grad_norm": 0.9845783571596185, "learning_rate": 4.286162438044779e-05, "loss": 0.9319, "step": 8445 }, { "epoch": 0.99, "grad_norm": 1.0235380371029066, "learning_rate": 4.285359702339994e-05, "loss": 0.9022, "step": 8450 }, { "epoch": 0.99, "grad_norm": 0.8718725903336981, "learning_rate": 4.2845565907982897e-05, "loss": 0.8037, "step": 8455 }, { "epoch": 0.99, "grad_norm": 0.8978180694702957, "learning_rate": 4.2837531035887305e-05, "loss": 0.8338, "step": 8460 }, { "epoch": 0.99, "grad_norm": 0.8214721327663154, "learning_rate": 4.282949240880458e-05, "loss": 0.788, "step": 8465 }, { "epoch": 0.99, "grad_norm": 0.8217165450763345, "learning_rate": 4.282145002842695e-05, "loss": 0.8033, "step": 8470 }, { "epoch": 0.99, "grad_norm": 0.8353144923625703, "learning_rate": 4.281340389644739e-05, "loss": 0.8433, "step": 8475 }, { "epoch": 0.99, "grad_norm": 0.9516764231486345, "learning_rate": 4.280535401455973e-05, "loss": 0.8923, "step": 8480 }, { "epoch": 0.99, "grad_norm": 0.8359667390068116, "learning_rate": 4.279730038445854e-05, "loss": 0.9234, "step": 8485 }, { "epoch": 0.99, "grad_norm": 0.9398406561365003, "learning_rate": 4.27892430078392e-05, "loss": 0.8771, "step": 8490 }, { "epoch": 0.99, "grad_norm": 1.0075501297873979, "learning_rate": 4.278118188639786e-05, "loss": 0.8377, "step": 8495 }, { "epoch": 0.99, "grad_norm": 0.8543631271581719, "learning_rate": 4.2773117021831466e-05, "loss": 0.8342, "step": 8500 }, { "epoch": 0.99, "grad_norm": 0.9249497184350577, "learning_rate": 4.276504841583778e-05, "loss": 0.7878, "step": 8505 }, { "epoch": 0.99, "grad_norm": 0.9304974218438378, "learning_rate": 4.2756976070115305e-05, "loss": 0.8765, "step": 8510 }, { "epoch": 0.99, "grad_norm": 0.9237755546258369, "learning_rate": 4.274889998636338e-05, "loss": 0.9027, "step": 8515 }, { "epoch": 1.0, "grad_norm": 0.8725459720640374, "learning_rate": 4.274082016628208e-05, "loss": 0.8785, "step": 8520 }, { "epoch": 1.0, "grad_norm": 0.865420587506024, "learning_rate": 4.27327366115723e-05, "loss": 0.8557, "step": 8525 }, { "epoch": 1.0, "grad_norm": 0.8530479992793641, "learning_rate": 4.272464932393572e-05, "loss": 0.9093, "step": 8530 }, { "epoch": 1.0, "grad_norm": 0.8523136600293201, "learning_rate": 4.271655830507478e-05, "loss": 0.8154, "step": 8535 }, { "epoch": 1.0, "grad_norm": 0.9319590427323424, "learning_rate": 4.2708463556692745e-05, "loss": 0.981, "step": 8540 }, { "epoch": 1.0, "grad_norm": 0.9393820145931006, "learning_rate": 4.270036508049364e-05, "loss": 0.9124, "step": 8545 }, { "epoch": 1.0, "grad_norm": 0.8400295085421112, "learning_rate": 4.269226287818228e-05, "loss": 0.9175, "step": 8550 }, { "epoch": 1.0, "grad_norm": 0.8476841916976854, "learning_rate": 4.268415695146425e-05, "loss": 0.8423, "step": 8555 }, { "epoch": 1.0, "grad_norm": 0.9642996969885996, "learning_rate": 4.267604730204594e-05, "loss": 0.8078, "step": 8560 }, { "epoch": 1.0, "grad_norm": 0.9277646533991848, "learning_rate": 4.266793393163452e-05, "loss": 0.6013, "step": 8565 }, { "epoch": 1.0, "grad_norm": 1.0706887324499244, "learning_rate": 4.265981684193794e-05, "loss": 0.6044, "step": 8570 }, { "epoch": 1.0, "grad_norm": 0.8273192611142373, "learning_rate": 4.265169603466493e-05, "loss": 0.6383, "step": 8575 }, { "epoch": 1.0, "grad_norm": 0.8205728595780964, "learning_rate": 4.2643571511525006e-05, "loss": 0.6131, "step": 8580 }, { "epoch": 1.0, "grad_norm": 0.865032770485709, "learning_rate": 4.263544327422846e-05, "loss": 0.5971, "step": 8585 }, { "epoch": 1.0, "grad_norm": 0.9061162295993133, "learning_rate": 4.2627311324486377e-05, "loss": 0.7053, "step": 8590 }, { "epoch": 1.0, "grad_norm": 0.8434590515527532, "learning_rate": 4.261917566401061e-05, "loss": 0.6075, "step": 8595 }, { "epoch": 1.0, "grad_norm": 0.9029329507315093, "learning_rate": 4.261103629451381e-05, "loss": 0.5954, "step": 8600 }, { "epoch": 1.01, "grad_norm": 0.8740159041699627, "learning_rate": 4.26028932177094e-05, "loss": 0.6419, "step": 8605 }, { "epoch": 1.01, "grad_norm": 0.9079979043161515, "learning_rate": 4.259474643531157e-05, "loss": 0.6654, "step": 8610 }, { "epoch": 1.01, "grad_norm": 0.894369058539577, "learning_rate": 4.258659594903532e-05, "loss": 0.6042, "step": 8615 }, { "epoch": 1.01, "grad_norm": 0.8826620387477946, "learning_rate": 4.25784417605964e-05, "loss": 0.6314, "step": 8620 }, { "epoch": 1.01, "grad_norm": 1.0169609325560889, "learning_rate": 4.2570283871711355e-05, "loss": 0.5917, "step": 8625 }, { "epoch": 1.01, "grad_norm": 1.1388351557100063, "learning_rate": 4.256212228409752e-05, "loss": 0.6313, "step": 8630 }, { "epoch": 1.01, "grad_norm": 1.0177762666025187, "learning_rate": 4.255395699947297e-05, "loss": 0.6291, "step": 8635 }, { "epoch": 1.01, "grad_norm": 0.8301895445486615, "learning_rate": 4.25457880195566e-05, "loss": 0.6603, "step": 8640 }, { "epoch": 1.01, "grad_norm": 0.938875998709611, "learning_rate": 4.253761534606806e-05, "loss": 0.5926, "step": 8645 }, { "epoch": 1.01, "grad_norm": 0.8747794937997334, "learning_rate": 4.2529438980727784e-05, "loss": 0.5593, "step": 8650 }, { "epoch": 1.01, "grad_norm": 0.9189268262888526, "learning_rate": 4.252125892525698e-05, "loss": 0.6634, "step": 8655 }, { "epoch": 1.01, "grad_norm": 1.0710265091378568, "learning_rate": 4.251307518137765e-05, "loss": 0.6206, "step": 8660 }, { "epoch": 1.01, "grad_norm": 0.8851240901140472, "learning_rate": 4.250488775081254e-05, "loss": 0.6182, "step": 8665 }, { "epoch": 1.01, "grad_norm": 0.9909197448091167, "learning_rate": 4.2496696635285185e-05, "loss": 0.6392, "step": 8670 }, { "epoch": 1.01, "grad_norm": 0.7932300897557871, "learning_rate": 4.248850183651992e-05, "loss": 0.6653, "step": 8675 }, { "epoch": 1.01, "grad_norm": 0.938322781180203, "learning_rate": 4.248030335624183e-05, "loss": 0.7057, "step": 8680 }, { "epoch": 1.01, "grad_norm": 0.8229756810065206, "learning_rate": 4.247210119617679e-05, "loss": 0.5989, "step": 8685 }, { "epoch": 1.02, "grad_norm": 0.8822519759321903, "learning_rate": 4.246389535805141e-05, "loss": 0.6396, "step": 8690 }, { "epoch": 1.02, "grad_norm": 0.8691081495744949, "learning_rate": 4.245568584359313e-05, "loss": 0.6192, "step": 8695 }, { "epoch": 1.02, "grad_norm": 0.8599436694083061, "learning_rate": 4.244747265453014e-05, "loss": 0.6125, "step": 8700 }, { "epoch": 1.02, "grad_norm": 1.0259563736331991, "learning_rate": 4.2439255792591384e-05, "loss": 0.5917, "step": 8705 }, { "epoch": 1.02, "grad_norm": 0.9156046669276074, "learning_rate": 4.2431035259506626e-05, "loss": 0.5313, "step": 8710 }, { "epoch": 1.02, "grad_norm": 0.8705457318769528, "learning_rate": 4.242281105700634e-05, "loss": 0.569, "step": 8715 }, { "epoch": 1.02, "grad_norm": 0.9337200241141412, "learning_rate": 4.241458318682183e-05, "loss": 0.5953, "step": 8720 }, { "epoch": 1.02, "grad_norm": 0.9166056678744614, "learning_rate": 4.240635165068515e-05, "loss": 0.5987, "step": 8725 }, { "epoch": 1.02, "grad_norm": 0.9914530288041218, "learning_rate": 4.23981164503291e-05, "loss": 0.5812, "step": 8730 }, { "epoch": 1.02, "grad_norm": 0.8363380284298336, "learning_rate": 4.23898775874873e-05, "loss": 0.6235, "step": 8735 }, { "epoch": 1.02, "grad_norm": 0.8469659350724175, "learning_rate": 4.238163506389412e-05, "loss": 0.5904, "step": 8740 }, { "epoch": 1.02, "grad_norm": 1.0554835077197375, "learning_rate": 4.237338888128468e-05, "loss": 0.566, "step": 8745 }, { "epoch": 1.02, "grad_norm": 0.9815381006067133, "learning_rate": 4.23651390413949e-05, "loss": 0.5417, "step": 8750 }, { "epoch": 1.02, "grad_norm": 0.8777158051005103, "learning_rate": 4.2356885545961456e-05, "loss": 0.6009, "step": 8755 }, { "epoch": 1.02, "grad_norm": 0.9689041668086507, "learning_rate": 4.234862839672178e-05, "loss": 0.6056, "step": 8760 }, { "epoch": 1.02, "grad_norm": 0.8421991341881123, "learning_rate": 4.234036759541411e-05, "loss": 0.5487, "step": 8765 }, { "epoch": 1.02, "grad_norm": 0.8918366158803973, "learning_rate": 4.233210314377742e-05, "loss": 0.5595, "step": 8770 }, { "epoch": 1.03, "grad_norm": 1.1488261033917695, "learning_rate": 4.232383504355147e-05, "loss": 0.5579, "step": 8775 }, { "epoch": 1.03, "grad_norm": 0.8756084810754023, "learning_rate": 4.2315563296476775e-05, "loss": 0.5558, "step": 8780 }, { "epoch": 1.03, "grad_norm": 1.0034840453434597, "learning_rate": 4.230728790429462e-05, "loss": 0.6144, "step": 8785 }, { "epoch": 1.03, "grad_norm": 1.057321079114524, "learning_rate": 4.229900886874708e-05, "loss": 0.5941, "step": 8790 }, { "epoch": 1.03, "grad_norm": 0.8978693265667533, "learning_rate": 4.2290726191576954e-05, "loss": 0.5667, "step": 8795 }, { "epoch": 1.03, "grad_norm": 0.8279063536947038, "learning_rate": 4.2282439874527854e-05, "loss": 0.5823, "step": 8800 }, { "epoch": 1.03, "grad_norm": 0.8383008864951491, "learning_rate": 4.227414991934413e-05, "loss": 0.6906, "step": 8805 }, { "epoch": 1.03, "grad_norm": 0.7895799159407739, "learning_rate": 4.22658563277709e-05, "loss": 0.5837, "step": 8810 }, { "epoch": 1.03, "grad_norm": 0.7682059172445806, "learning_rate": 4.225755910155407e-05, "loss": 0.5899, "step": 8815 }, { "epoch": 1.03, "grad_norm": 1.3960673280094005, "learning_rate": 4.224925824244025e-05, "loss": 0.6074, "step": 8820 }, { "epoch": 1.03, "grad_norm": 0.9385953158411825, "learning_rate": 4.22409537521769e-05, "loss": 0.5001, "step": 8825 }, { "epoch": 1.03, "grad_norm": 1.0166282802883184, "learning_rate": 4.223264563251219e-05, "loss": 0.5527, "step": 8830 }, { "epoch": 1.03, "grad_norm": 0.9022081026016874, "learning_rate": 4.222433388519505e-05, "loss": 0.5848, "step": 8835 }, { "epoch": 1.03, "grad_norm": 0.8869404967605953, "learning_rate": 4.221601851197523e-05, "loss": 0.6249, "step": 8840 }, { "epoch": 1.03, "grad_norm": 0.8681790209968652, "learning_rate": 4.220769951460315e-05, "loss": 0.5946, "step": 8845 }, { "epoch": 1.03, "grad_norm": 0.8831074405654262, "learning_rate": 4.219937689483009e-05, "loss": 0.6251, "step": 8850 }, { "epoch": 1.03, "grad_norm": 0.8372940849502932, "learning_rate": 4.219105065440802e-05, "loss": 0.6001, "step": 8855 }, { "epoch": 1.04, "grad_norm": 0.9066301464572489, "learning_rate": 4.218272079508972e-05, "loss": 0.7102, "step": 8860 }, { "epoch": 1.04, "grad_norm": 1.1079515746735977, "learning_rate": 4.217438731862871e-05, "loss": 0.6173, "step": 8865 }, { "epoch": 1.04, "grad_norm": 0.9234636985975533, "learning_rate": 4.2166050226779254e-05, "loss": 0.6437, "step": 8870 }, { "epoch": 1.04, "grad_norm": 1.0418391095017363, "learning_rate": 4.2157709521296415e-05, "loss": 0.6029, "step": 8875 }, { "epoch": 1.04, "grad_norm": 0.8635321632060303, "learning_rate": 4.2149365203936e-05, "loss": 0.6495, "step": 8880 }, { "epoch": 1.04, "grad_norm": 0.804516687821039, "learning_rate": 4.2141017276454574e-05, "loss": 0.5862, "step": 8885 }, { "epoch": 1.04, "grad_norm": 0.9365539590127896, "learning_rate": 4.213266574060946e-05, "loss": 0.6732, "step": 8890 }, { "epoch": 1.04, "grad_norm": 0.9018465454943395, "learning_rate": 4.212431059815874e-05, "loss": 0.568, "step": 8895 }, { "epoch": 1.04, "grad_norm": 0.8982739941406391, "learning_rate": 4.2115951850861266e-05, "loss": 0.6291, "step": 8900 }, { "epoch": 1.04, "grad_norm": 1.1873884308517382, "learning_rate": 4.210758950047664e-05, "loss": 0.6416, "step": 8905 }, { "epoch": 1.04, "grad_norm": 0.8430312157736384, "learning_rate": 4.2099223548765224e-05, "loss": 0.6515, "step": 8910 }, { "epoch": 1.04, "grad_norm": 1.0910697163500291, "learning_rate": 4.209085399748814e-05, "loss": 0.5151, "step": 8915 }, { "epoch": 1.04, "grad_norm": 0.8671703016367165, "learning_rate": 4.2082480848407256e-05, "loss": 0.5851, "step": 8920 }, { "epoch": 1.04, "grad_norm": 0.9542144961145261, "learning_rate": 4.207410410328523e-05, "loss": 0.5596, "step": 8925 }, { "epoch": 1.04, "grad_norm": 1.1689561120146947, "learning_rate": 4.206572376388543e-05, "loss": 0.6648, "step": 8930 }, { "epoch": 1.04, "grad_norm": 0.8376895251130306, "learning_rate": 4.205733983197201e-05, "loss": 0.5707, "step": 8935 }, { "epoch": 1.04, "grad_norm": 0.7675738075828713, "learning_rate": 4.204895230930989e-05, "loss": 0.63, "step": 8940 }, { "epoch": 1.05, "grad_norm": 0.937283923962815, "learning_rate": 4.204056119766473e-05, "loss": 0.6893, "step": 8945 }, { "epoch": 1.05, "grad_norm": 0.9141561528968974, "learning_rate": 4.2032166498802924e-05, "loss": 0.6336, "step": 8950 }, { "epoch": 1.05, "grad_norm": 0.9018641662825158, "learning_rate": 4.202376821449167e-05, "loss": 0.6107, "step": 8955 }, { "epoch": 1.05, "grad_norm": 9.240946444757233, "learning_rate": 4.2015366346498876e-05, "loss": 0.5445, "step": 8960 }, { "epoch": 1.05, "grad_norm": 0.8463706600609651, "learning_rate": 4.200696089659324e-05, "loss": 0.5881, "step": 8965 }, { "epoch": 1.05, "grad_norm": 0.8810863042510195, "learning_rate": 4.1998551866544176e-05, "loss": 0.5918, "step": 8970 }, { "epoch": 1.05, "grad_norm": 0.8878701771813601, "learning_rate": 4.1990139258121896e-05, "loss": 0.5974, "step": 8975 }, { "epoch": 1.05, "grad_norm": 0.97833524704983, "learning_rate": 4.198172307309733e-05, "loss": 0.6416, "step": 8980 }, { "epoch": 1.05, "grad_norm": 0.9418290733556817, "learning_rate": 4.197330331324217e-05, "loss": 0.5702, "step": 8985 }, { "epoch": 1.05, "grad_norm": 0.8526099423601223, "learning_rate": 4.196487998032888e-05, "loss": 0.6007, "step": 8990 }, { "epoch": 1.05, "grad_norm": 0.8234540809248805, "learning_rate": 4.195645307613064e-05, "loss": 0.6498, "step": 8995 }, { "epoch": 1.05, "grad_norm": 0.8638309536728099, "learning_rate": 4.194802260242141e-05, "loss": 0.5696, "step": 9000 }, { "epoch": 1.05, "grad_norm": 0.8702117797112877, "learning_rate": 4.1939588560975895e-05, "loss": 0.5776, "step": 9005 }, { "epoch": 1.05, "grad_norm": 0.8827369710038147, "learning_rate": 4.1931150953569545e-05, "loss": 0.6699, "step": 9010 }, { "epoch": 1.05, "grad_norm": 0.8918531483354363, "learning_rate": 4.1922709781978576e-05, "loss": 0.6426, "step": 9015 }, { "epoch": 1.05, "grad_norm": 0.8137736136300915, "learning_rate": 4.191426504797992e-05, "loss": 0.5947, "step": 9020 }, { "epoch": 1.05, "grad_norm": 1.0294362932822392, "learning_rate": 4.1905816753351305e-05, "loss": 0.5357, "step": 9025 }, { "epoch": 1.06, "grad_norm": 0.8762244052287632, "learning_rate": 4.189736489987117e-05, "loss": 0.6312, "step": 9030 }, { "epoch": 1.06, "grad_norm": 0.8386457480979533, "learning_rate": 4.188890948931873e-05, "loss": 0.5845, "step": 9035 }, { "epoch": 1.06, "grad_norm": 0.8535662324621395, "learning_rate": 4.1880450523473934e-05, "loss": 0.6403, "step": 9040 }, { "epoch": 1.06, "grad_norm": 0.8490395229400508, "learning_rate": 4.187198800411748e-05, "loss": 0.6169, "step": 9045 }, { "epoch": 1.06, "grad_norm": 0.8176561930436175, "learning_rate": 4.1863521933030814e-05, "loss": 0.6224, "step": 9050 }, { "epoch": 1.06, "grad_norm": 0.8769277862052794, "learning_rate": 4.1855052311996143e-05, "loss": 0.6115, "step": 9055 }, { "epoch": 1.06, "grad_norm": 0.9966852282859848, "learning_rate": 4.18465791427964e-05, "loss": 0.6389, "step": 9060 }, { "epoch": 1.06, "grad_norm": 0.8727974424722675, "learning_rate": 4.1838102427215284e-05, "loss": 0.5951, "step": 9065 }, { "epoch": 1.06, "grad_norm": 0.9241233613092971, "learning_rate": 4.182962216703723e-05, "loss": 0.607, "step": 9070 }, { "epoch": 1.06, "grad_norm": 0.9418839716192525, "learning_rate": 4.182113836404742e-05, "loss": 0.6727, "step": 9075 }, { "epoch": 1.06, "grad_norm": 0.9842083814523354, "learning_rate": 4.181265102003178e-05, "loss": 0.6622, "step": 9080 }, { "epoch": 1.06, "grad_norm": 0.8108174508816137, "learning_rate": 4.1804160136776996e-05, "loss": 0.6423, "step": 9085 }, { "epoch": 1.06, "grad_norm": 0.8227960799903279, "learning_rate": 4.1795665716070474e-05, "loss": 0.636, "step": 9090 }, { "epoch": 1.06, "grad_norm": 0.8200827037169234, "learning_rate": 4.1787167759700396e-05, "loss": 0.5591, "step": 9095 }, { "epoch": 1.06, "grad_norm": 0.9142772491569335, "learning_rate": 4.177866626945565e-05, "loss": 0.5679, "step": 9100 }, { "epoch": 1.06, "grad_norm": 0.944257216292202, "learning_rate": 4.17701612471259e-05, "loss": 0.6504, "step": 9105 }, { "epoch": 1.06, "grad_norm": 0.8774004676351174, "learning_rate": 4.176165269450155e-05, "loss": 0.6472, "step": 9110 }, { "epoch": 1.06, "grad_norm": 0.9121387145443576, "learning_rate": 4.175314061337372e-05, "loss": 0.5886, "step": 9115 }, { "epoch": 1.07, "grad_norm": 0.8289642292304378, "learning_rate": 4.174462500553431e-05, "loss": 0.5988, "step": 9120 }, { "epoch": 1.07, "grad_norm": 1.1576133661813495, "learning_rate": 4.173610587277594e-05, "loss": 0.6673, "step": 9125 }, { "epoch": 1.07, "grad_norm": 0.8408511980336993, "learning_rate": 4.172758321689197e-05, "loss": 0.5441, "step": 9130 }, { "epoch": 1.07, "grad_norm": 0.8962694132061438, "learning_rate": 4.1719057039676515e-05, "loss": 0.6548, "step": 9135 }, { "epoch": 1.07, "grad_norm": 0.9271376576225812, "learning_rate": 4.171052734292442e-05, "loss": 0.5554, "step": 9140 }, { "epoch": 1.07, "grad_norm": 0.9028916892519055, "learning_rate": 4.1701994128431274e-05, "loss": 0.6887, "step": 9145 }, { "epoch": 1.07, "grad_norm": 0.8868096108743126, "learning_rate": 4.169345739799342e-05, "loss": 0.5993, "step": 9150 }, { "epoch": 1.07, "grad_norm": 1.0278611265858315, "learning_rate": 4.168491715340792e-05, "loss": 0.5831, "step": 9155 }, { "epoch": 1.07, "grad_norm": 0.928107869559645, "learning_rate": 4.167637339647258e-05, "loss": 0.6587, "step": 9160 }, { "epoch": 1.07, "grad_norm": 0.8726913467580857, "learning_rate": 4.166782612898596e-05, "loss": 0.5525, "step": 9165 }, { "epoch": 1.07, "grad_norm": 0.909095473410213, "learning_rate": 4.1659275352747346e-05, "loss": 0.6084, "step": 9170 }, { "epoch": 1.07, "grad_norm": 0.8915410194765684, "learning_rate": 4.165072106955676e-05, "loss": 0.5754, "step": 9175 }, { "epoch": 1.07, "grad_norm": 1.0573317389146333, "learning_rate": 4.1642163281214984e-05, "loss": 0.5563, "step": 9180 }, { "epoch": 1.07, "grad_norm": 0.9594329532730426, "learning_rate": 4.163360198952351e-05, "loss": 0.5826, "step": 9185 }, { "epoch": 1.07, "grad_norm": 0.914275718877347, "learning_rate": 4.1625037196284575e-05, "loss": 0.6283, "step": 9190 }, { "epoch": 1.07, "grad_norm": 0.9293038106149837, "learning_rate": 4.1616468903301166e-05, "loss": 0.5936, "step": 9195 }, { "epoch": 1.07, "grad_norm": 0.9802699630248849, "learning_rate": 4.1607897112377e-05, "loss": 0.647, "step": 9200 }, { "epoch": 1.08, "grad_norm": 0.9616383156693995, "learning_rate": 4.159932182531653e-05, "loss": 0.6315, "step": 9205 }, { "epoch": 1.08, "grad_norm": 0.8562848646733909, "learning_rate": 4.159074304392493e-05, "loss": 0.6008, "step": 9210 }, { "epoch": 1.08, "grad_norm": 0.8766904592822528, "learning_rate": 4.1582160770008135e-05, "loss": 0.6655, "step": 9215 }, { "epoch": 1.08, "grad_norm": 0.8803698710118834, "learning_rate": 4.15735750053728e-05, "loss": 0.6639, "step": 9220 }, { "epoch": 1.08, "grad_norm": 0.8872627668714872, "learning_rate": 4.156498575182633e-05, "loss": 0.6478, "step": 9225 }, { "epoch": 1.08, "grad_norm": 0.9215102380152673, "learning_rate": 4.155639301117683e-05, "loss": 0.6187, "step": 9230 }, { "epoch": 1.08, "grad_norm": 0.8941727018771481, "learning_rate": 4.1547796785233184e-05, "loss": 0.6569, "step": 9235 }, { "epoch": 1.08, "grad_norm": 1.8455878025031203, "learning_rate": 4.153919707580497e-05, "loss": 0.5984, "step": 9240 }, { "epoch": 1.08, "grad_norm": 1.3255126938280823, "learning_rate": 4.153059388470253e-05, "loss": 0.601, "step": 9245 }, { "epoch": 1.08, "grad_norm": 0.9585304710125648, "learning_rate": 4.152198721373691e-05, "loss": 0.5188, "step": 9250 }, { "epoch": 1.08, "grad_norm": 0.928525307708516, "learning_rate": 4.151337706471993e-05, "loss": 0.6191, "step": 9255 }, { "epoch": 1.08, "grad_norm": 1.0062232763206145, "learning_rate": 4.1504763439464086e-05, "loss": 0.572, "step": 9260 }, { "epoch": 1.08, "grad_norm": 1.0251831560539522, "learning_rate": 4.149614633978266e-05, "loss": 0.5353, "step": 9265 }, { "epoch": 1.08, "grad_norm": 1.0153972607329083, "learning_rate": 4.1487525767489635e-05, "loss": 0.5528, "step": 9270 }, { "epoch": 1.08, "grad_norm": 1.0575186158980403, "learning_rate": 4.147890172439973e-05, "loss": 0.6301, "step": 9275 }, { "epoch": 1.08, "grad_norm": 0.9252518054199487, "learning_rate": 4.1470274212328396e-05, "loss": 0.6703, "step": 9280 }, { "epoch": 1.08, "grad_norm": 0.9186694597928409, "learning_rate": 4.146164323309182e-05, "loss": 0.6048, "step": 9285 }, { "epoch": 1.09, "grad_norm": 0.9385692278078165, "learning_rate": 4.14530087885069e-05, "loss": 0.6208, "step": 9290 }, { "epoch": 1.09, "grad_norm": 0.8684047001595284, "learning_rate": 4.144437088039129e-05, "loss": 0.5817, "step": 9295 }, { "epoch": 1.09, "grad_norm": 0.9773036752922679, "learning_rate": 4.143572951056336e-05, "loss": 0.6249, "step": 9300 }, { "epoch": 1.09, "grad_norm": 0.8527892871727347, "learning_rate": 4.14270846808422e-05, "loss": 0.5567, "step": 9305 }, { "epoch": 1.09, "grad_norm": 1.0247760836400146, "learning_rate": 4.141843639304765e-05, "loss": 0.6064, "step": 9310 }, { "epoch": 1.09, "grad_norm": 0.8344753696442245, "learning_rate": 4.140978464900025e-05, "loss": 0.5822, "step": 9315 }, { "epoch": 1.09, "grad_norm": 0.9618762819662215, "learning_rate": 4.140112945052129e-05, "loss": 0.5971, "step": 9320 }, { "epoch": 1.09, "grad_norm": 0.9571653438072899, "learning_rate": 4.1392470799432784e-05, "loss": 0.5972, "step": 9325 }, { "epoch": 1.09, "grad_norm": 0.8194742217294482, "learning_rate": 4.138380869755747e-05, "loss": 0.6727, "step": 9330 }, { "epoch": 1.09, "grad_norm": 0.8311485080490008, "learning_rate": 4.137514314671879e-05, "loss": 0.6181, "step": 9335 }, { "epoch": 1.09, "grad_norm": 0.9556187434309861, "learning_rate": 4.1366474148740964e-05, "loss": 0.61, "step": 9340 }, { "epoch": 1.09, "grad_norm": 0.8833476520157852, "learning_rate": 4.1357801705448885e-05, "loss": 0.8532, "step": 9345 }, { "epoch": 1.09, "grad_norm": 0.9063824733888173, "learning_rate": 4.134912581866821e-05, "loss": 0.6533, "step": 9350 }, { "epoch": 1.09, "grad_norm": 0.8287359909143839, "learning_rate": 4.134044649022528e-05, "loss": 0.5994, "step": 9355 }, { "epoch": 1.09, "grad_norm": 1.022126103378075, "learning_rate": 4.13317637219472e-05, "loss": 0.5835, "step": 9360 }, { "epoch": 1.09, "grad_norm": 4.530502518214563, "learning_rate": 4.1323077515661794e-05, "loss": 0.6326, "step": 9365 }, { "epoch": 1.09, "grad_norm": 0.8019002699371386, "learning_rate": 4.131438787319759e-05, "loss": 0.6269, "step": 9370 }, { "epoch": 1.1, "grad_norm": 0.9288439870395792, "learning_rate": 4.130569479638383e-05, "loss": 0.6165, "step": 9375 }, { "epoch": 1.1, "grad_norm": 0.8976834896314357, "learning_rate": 4.1296998287050524e-05, "loss": 0.6369, "step": 9380 }, { "epoch": 1.1, "grad_norm": 0.8527835679808553, "learning_rate": 4.128829834702838e-05, "loss": 0.6508, "step": 9385 }, { "epoch": 1.1, "grad_norm": 0.8422118491219953, "learning_rate": 4.1279594978148796e-05, "loss": 0.5052, "step": 9390 }, { "epoch": 1.1, "grad_norm": 0.9007388888373373, "learning_rate": 4.127088818224395e-05, "loss": 0.6108, "step": 9395 }, { "epoch": 1.1, "grad_norm": 1.0024797564172927, "learning_rate": 4.1262177961146706e-05, "loss": 0.6461, "step": 9400 }, { "epoch": 1.1, "grad_norm": 1.1111025670090133, "learning_rate": 4.125346431669065e-05, "loss": 0.6582, "step": 9405 }, { "epoch": 1.1, "grad_norm": 0.829441850559287, "learning_rate": 4.1244747250710114e-05, "loss": 0.6998, "step": 9410 }, { "epoch": 1.1, "grad_norm": 0.9620225771555422, "learning_rate": 4.123602676504011e-05, "loss": 0.6588, "step": 9415 }, { "epoch": 1.1, "grad_norm": 4.758821533454097, "learning_rate": 4.12273028615164e-05, "loss": 0.6214, "step": 9420 }, { "epoch": 1.1, "grad_norm": 0.9005743815041396, "learning_rate": 4.1218575541975465e-05, "loss": 0.6235, "step": 9425 }, { "epoch": 1.1, "grad_norm": 0.9638660573830756, "learning_rate": 4.1209844808254485e-05, "loss": 0.6536, "step": 9430 }, { "epoch": 1.1, "grad_norm": 0.9096082315522549, "learning_rate": 4.120111066219138e-05, "loss": 0.6032, "step": 9435 }, { "epoch": 1.1, "grad_norm": 0.9228745839604217, "learning_rate": 4.119237310562477e-05, "loss": 0.5797, "step": 9440 }, { "epoch": 1.1, "grad_norm": 0.8215471996763309, "learning_rate": 4.1183632140394014e-05, "loss": 0.5996, "step": 9445 }, { "epoch": 1.1, "grad_norm": 1.0194203421439125, "learning_rate": 4.1174887768339164e-05, "loss": 0.621, "step": 9450 }, { "epoch": 1.1, "grad_norm": 0.8182899096337711, "learning_rate": 4.1166139991301014e-05, "loss": 0.6232, "step": 9455 }, { "epoch": 1.11, "grad_norm": 0.8334896884227421, "learning_rate": 4.115738881112105e-05, "loss": 0.5143, "step": 9460 }, { "epoch": 1.11, "grad_norm": 0.983983227144839, "learning_rate": 4.1148634229641495e-05, "loss": 0.6021, "step": 9465 }, { "epoch": 1.11, "grad_norm": 0.8348851332798832, "learning_rate": 4.113987624870528e-05, "loss": 0.6391, "step": 9470 }, { "epoch": 1.11, "grad_norm": 0.9105741906663594, "learning_rate": 4.1131114870156054e-05, "loss": 0.6757, "step": 9475 }, { "epoch": 1.11, "grad_norm": 0.8280699053178365, "learning_rate": 4.1122350095838174e-05, "loss": 0.5671, "step": 9480 }, { "epoch": 1.11, "grad_norm": 0.857219718278141, "learning_rate": 4.1113581927596724e-05, "loss": 0.6313, "step": 9485 }, { "epoch": 1.11, "grad_norm": 0.9016560303307284, "learning_rate": 4.110481036727748e-05, "loss": 0.5797, "step": 9490 }, { "epoch": 1.11, "grad_norm": 0.8285068014702185, "learning_rate": 4.1096035416726966e-05, "loss": 0.606, "step": 9495 }, { "epoch": 1.11, "grad_norm": 0.8810691364569447, "learning_rate": 4.108725707779238e-05, "loss": 0.6305, "step": 9500 }, { "epoch": 1.11, "grad_norm": 0.8538779516110756, "learning_rate": 4.1078475352321675e-05, "loss": 0.6377, "step": 9505 }, { "epoch": 1.11, "grad_norm": 0.932471400423201, "learning_rate": 4.1069690242163484e-05, "loss": 0.5733, "step": 9510 }, { "epoch": 1.11, "grad_norm": 0.9081992259361201, "learning_rate": 4.1060901749167175e-05, "loss": 0.6013, "step": 9515 }, { "epoch": 1.11, "grad_norm": 0.8750173420278271, "learning_rate": 4.1052109875182804e-05, "loss": 0.6356, "step": 9520 }, { "epoch": 1.11, "grad_norm": 1.2485698544680992, "learning_rate": 4.1043314622061165e-05, "loss": 0.6439, "step": 9525 }, { "epoch": 1.11, "grad_norm": 0.9083552100444398, "learning_rate": 4.103451599165374e-05, "loss": 0.6134, "step": 9530 }, { "epoch": 1.11, "grad_norm": 0.9421113651564124, "learning_rate": 4.102571398581274e-05, "loss": 0.6521, "step": 9535 }, { "epoch": 1.11, "grad_norm": 1.2557029200993437, "learning_rate": 4.101690860639108e-05, "loss": 0.5907, "step": 9540 }, { "epoch": 1.12, "grad_norm": 0.9665697226056439, "learning_rate": 4.100809985524238e-05, "loss": 0.6242, "step": 9545 }, { "epoch": 1.12, "grad_norm": 0.9617080770593286, "learning_rate": 4.099928773422098e-05, "loss": 0.562, "step": 9550 }, { "epoch": 1.12, "grad_norm": 0.9148415802633895, "learning_rate": 4.099047224518192e-05, "loss": 0.6158, "step": 9555 }, { "epoch": 1.12, "grad_norm": 1.0141716049378553, "learning_rate": 4.0981653389980954e-05, "loss": 0.653, "step": 9560 }, { "epoch": 1.12, "grad_norm": 0.9622663943050648, "learning_rate": 4.097283117047455e-05, "loss": 0.588, "step": 9565 }, { "epoch": 1.12, "grad_norm": 0.9922851758966393, "learning_rate": 4.096400558851986e-05, "loss": 0.5943, "step": 9570 }, { "epoch": 1.12, "grad_norm": 1.0328846038388495, "learning_rate": 4.0955176645974774e-05, "loss": 0.593, "step": 9575 }, { "epoch": 1.12, "grad_norm": 0.8832446055522517, "learning_rate": 4.0946344344697874e-05, "loss": 0.5124, "step": 9580 }, { "epoch": 1.12, "grad_norm": 0.8088913061832707, "learning_rate": 4.0937508686548455e-05, "loss": 0.6461, "step": 9585 }, { "epoch": 1.12, "grad_norm": 0.9261208965877685, "learning_rate": 4.092866967338651e-05, "loss": 0.5735, "step": 9590 }, { "epoch": 1.12, "grad_norm": 1.0427701686523054, "learning_rate": 4.091982730707275e-05, "loss": 0.5435, "step": 9595 }, { "epoch": 1.12, "grad_norm": 0.7612729987856766, "learning_rate": 4.091098158946859e-05, "loss": 0.5958, "step": 9600 }, { "epoch": 1.12, "grad_norm": 0.8885194063013234, "learning_rate": 4.090213252243612e-05, "loss": 0.6246, "step": 9605 }, { "epoch": 1.12, "grad_norm": 0.8562718735726935, "learning_rate": 4.08932801078382e-05, "loss": 0.6134, "step": 9610 }, { "epoch": 1.12, "grad_norm": 0.839932526258277, "learning_rate": 4.088442434753833e-05, "loss": 0.5738, "step": 9615 }, { "epoch": 1.12, "grad_norm": 13.248673901344128, "learning_rate": 4.087556524340075e-05, "loss": 0.6389, "step": 9620 }, { "epoch": 1.12, "grad_norm": 0.9464387608328463, "learning_rate": 4.086670279729038e-05, "loss": 0.5768, "step": 9625 }, { "epoch": 1.13, "grad_norm": 0.7600374099784892, "learning_rate": 4.085783701107288e-05, "loss": 0.6513, "step": 9630 }, { "epoch": 1.13, "grad_norm": 1.068162854657603, "learning_rate": 4.084896788661458e-05, "loss": 0.623, "step": 9635 }, { "epoch": 1.13, "grad_norm": 0.8999060240232348, "learning_rate": 4.084009542578251e-05, "loss": 0.5755, "step": 9640 }, { "epoch": 1.13, "grad_norm": 1.0708600562994768, "learning_rate": 4.083121963044444e-05, "loss": 0.6702, "step": 9645 }, { "epoch": 1.13, "grad_norm": 0.7671671241956116, "learning_rate": 4.08223405024688e-05, "loss": 0.5828, "step": 9650 }, { "epoch": 1.13, "grad_norm": 0.9056359957694474, "learning_rate": 4.081345804372475e-05, "loss": 0.589, "step": 9655 }, { "epoch": 1.13, "grad_norm": 0.9139667473825068, "learning_rate": 4.080457225608214e-05, "loss": 0.5492, "step": 9660 }, { "epoch": 1.13, "grad_norm": 0.9227650272336652, "learning_rate": 4.079568314141152e-05, "loss": 0.5915, "step": 9665 }, { "epoch": 1.13, "grad_norm": 0.9151880775080945, "learning_rate": 4.078679070158413e-05, "loss": 0.5721, "step": 9670 }, { "epoch": 1.13, "grad_norm": 0.8697274735123237, "learning_rate": 4.077789493847194e-05, "loss": 0.6507, "step": 9675 }, { "epoch": 1.13, "grad_norm": 0.9371052089851482, "learning_rate": 4.07689958539476e-05, "loss": 0.678, "step": 9680 }, { "epoch": 1.13, "grad_norm": 1.044862678211282, "learning_rate": 4.076009344988444e-05, "loss": 0.6084, "step": 9685 }, { "epoch": 1.13, "grad_norm": 0.8312392235480726, "learning_rate": 4.075118772815654e-05, "loss": 0.6451, "step": 9690 }, { "epoch": 1.13, "grad_norm": 0.9860466179648407, "learning_rate": 4.074227869063861e-05, "loss": 0.5837, "step": 9695 }, { "epoch": 1.13, "grad_norm": 0.7249402738180497, "learning_rate": 4.073336633920613e-05, "loss": 0.6021, "step": 9700 }, { "epoch": 1.13, "grad_norm": 0.8472237106522102, "learning_rate": 4.072445067573523e-05, "loss": 0.5836, "step": 9705 }, { "epoch": 1.13, "grad_norm": 0.8751154208796319, "learning_rate": 4.0715531702102735e-05, "loss": 0.6976, "step": 9710 }, { "epoch": 1.14, "grad_norm": 0.9176567732963984, "learning_rate": 4.070660942018622e-05, "loss": 0.6185, "step": 9715 }, { "epoch": 1.14, "grad_norm": 0.8201043298299713, "learning_rate": 4.0697683831863877e-05, "loss": 0.589, "step": 9720 }, { "epoch": 1.14, "grad_norm": 0.8525492515407452, "learning_rate": 4.068875493901467e-05, "loss": 0.5547, "step": 9725 }, { "epoch": 1.14, "grad_norm": 0.9868701328291947, "learning_rate": 4.0679822743518206e-05, "loss": 0.5843, "step": 9730 }, { "epoch": 1.14, "grad_norm": 0.926300738253284, "learning_rate": 4.06708872472548e-05, "loss": 0.5719, "step": 9735 }, { "epoch": 1.14, "grad_norm": 0.8928032034763357, "learning_rate": 4.066194845210549e-05, "loss": 0.5763, "step": 9740 }, { "epoch": 1.14, "grad_norm": 0.9702162928445587, "learning_rate": 4.065300635995197e-05, "loss": 0.5954, "step": 9745 }, { "epoch": 1.14, "grad_norm": 1.081141134487754, "learning_rate": 4.064406097267665e-05, "loss": 0.5845, "step": 9750 }, { "epoch": 1.14, "grad_norm": 0.904794270196421, "learning_rate": 4.0635112292162615e-05, "loss": 0.5764, "step": 9755 }, { "epoch": 1.14, "grad_norm": 0.9085037644608931, "learning_rate": 4.062616032029367e-05, "loss": 0.6563, "step": 9760 }, { "epoch": 1.14, "grad_norm": 0.8150858058827991, "learning_rate": 4.06172050589543e-05, "loss": 0.671, "step": 9765 }, { "epoch": 1.14, "grad_norm": 0.8996870804957054, "learning_rate": 4.060824651002967e-05, "loss": 0.6075, "step": 9770 }, { "epoch": 1.14, "grad_norm": 0.8213482296231155, "learning_rate": 4.059928467540566e-05, "loss": 0.5613, "step": 9775 }, { "epoch": 1.14, "grad_norm": 0.9006394954800155, "learning_rate": 4.059031955696882e-05, "loss": 0.6062, "step": 9780 }, { "epoch": 1.14, "grad_norm": 0.8907361452518608, "learning_rate": 4.058135115660641e-05, "loss": 0.5952, "step": 9785 }, { "epoch": 1.14, "grad_norm": 0.849663574160607, "learning_rate": 4.057237947620636e-05, "loss": 0.6055, "step": 9790 }, { "epoch": 1.14, "grad_norm": 0.8471132332767213, "learning_rate": 4.0563404517657324e-05, "loss": 0.6151, "step": 9795 }, { "epoch": 1.14, "grad_norm": 0.9101727179278354, "learning_rate": 4.05544262828486e-05, "loss": 0.6292, "step": 9800 }, { "epoch": 1.15, "grad_norm": 0.7981621928808994, "learning_rate": 4.054544477367022e-05, "loss": 0.5964, "step": 9805 }, { "epoch": 1.15, "grad_norm": 0.9594345392819724, "learning_rate": 4.053645999201287e-05, "loss": 0.6154, "step": 9810 }, { "epoch": 1.15, "grad_norm": 1.189914031503265, "learning_rate": 4.052747193976795e-05, "loss": 0.5879, "step": 9815 }, { "epoch": 1.15, "grad_norm": 1.0107693337040382, "learning_rate": 4.051848061882755e-05, "loss": 0.6197, "step": 9820 }, { "epoch": 1.15, "grad_norm": 0.8918712702288432, "learning_rate": 4.050948603108441e-05, "loss": 0.611, "step": 9825 }, { "epoch": 1.15, "grad_norm": 0.9043076750860054, "learning_rate": 4.050048817843201e-05, "loss": 0.572, "step": 9830 }, { "epoch": 1.15, "grad_norm": 0.8321686514270472, "learning_rate": 4.049148706276447e-05, "loss": 0.639, "step": 9835 }, { "epoch": 1.15, "grad_norm": 0.9551571579166243, "learning_rate": 4.048248268597664e-05, "loss": 0.5846, "step": 9840 }, { "epoch": 1.15, "grad_norm": 0.8420750522382193, "learning_rate": 4.047347504996403e-05, "loss": 0.6793, "step": 9845 }, { "epoch": 1.15, "grad_norm": 0.8413534977306244, "learning_rate": 4.046446415662283e-05, "loss": 0.5947, "step": 9850 }, { "epoch": 1.15, "grad_norm": 0.9801330995809016, "learning_rate": 4.0455450007849945e-05, "loss": 0.6505, "step": 9855 }, { "epoch": 1.15, "grad_norm": 1.047270367782501, "learning_rate": 4.0446432605542926e-05, "loss": 0.6357, "step": 9860 }, { "epoch": 1.15, "grad_norm": 0.9162854258787997, "learning_rate": 4.0437411951600056e-05, "loss": 0.6805, "step": 9865 }, { "epoch": 1.15, "grad_norm": 0.8031869014755787, "learning_rate": 4.042838804792026e-05, "loss": 0.6598, "step": 9870 }, { "epoch": 1.15, "grad_norm": 0.9469068665864222, "learning_rate": 4.041936089640317e-05, "loss": 0.6192, "step": 9875 }, { "epoch": 1.15, "grad_norm": 0.9110327010793229, "learning_rate": 4.041033049894909e-05, "loss": 0.5754, "step": 9880 }, { "epoch": 1.15, "grad_norm": 0.8798608223896566, "learning_rate": 4.0401296857459026e-05, "loss": 0.6593, "step": 9885 }, { "epoch": 1.16, "grad_norm": 0.8952185728910895, "learning_rate": 4.039225997383464e-05, "loss": 0.6579, "step": 9890 }, { "epoch": 1.16, "grad_norm": 0.8969629395565243, "learning_rate": 4.038321984997829e-05, "loss": 0.6985, "step": 9895 }, { "epoch": 1.16, "grad_norm": 0.8302735968430788, "learning_rate": 4.037417648779304e-05, "loss": 0.5187, "step": 9900 }, { "epoch": 1.16, "grad_norm": 1.1303999185258016, "learning_rate": 4.0365129889182584e-05, "loss": 0.6846, "step": 9905 }, { "epoch": 1.16, "grad_norm": 0.9772078663141259, "learning_rate": 4.0356080056051345e-05, "loss": 0.6823, "step": 9910 }, { "epoch": 1.16, "grad_norm": 0.9535922750694439, "learning_rate": 4.034702699030439e-05, "loss": 0.6919, "step": 9915 }, { "epoch": 1.16, "grad_norm": 0.9398525330317363, "learning_rate": 4.03379706938475e-05, "loss": 0.5385, "step": 9920 }, { "epoch": 1.16, "grad_norm": 0.9215923704026867, "learning_rate": 4.0328911168587125e-05, "loss": 0.6222, "step": 9925 }, { "epoch": 1.16, "grad_norm": 0.8069367191223502, "learning_rate": 4.031984841643037e-05, "loss": 0.6438, "step": 9930 }, { "epoch": 1.16, "grad_norm": 0.903549102288383, "learning_rate": 4.0310782439285054e-05, "loss": 0.6276, "step": 9935 }, { "epoch": 1.16, "grad_norm": 0.8979396615731928, "learning_rate": 4.030171323905965e-05, "loss": 0.5848, "step": 9940 }, { "epoch": 1.16, "grad_norm": 0.8753038102570072, "learning_rate": 4.029264081766333e-05, "loss": 0.5724, "step": 9945 }, { "epoch": 1.16, "grad_norm": 1.0475434911700867, "learning_rate": 4.0283565177005924e-05, "loss": 0.6489, "step": 9950 }, { "epoch": 1.16, "grad_norm": 0.848606080971198, "learning_rate": 4.027448631899796e-05, "loss": 0.6598, "step": 9955 }, { "epoch": 1.16, "grad_norm": 0.8882713828216475, "learning_rate": 4.026540424555062e-05, "loss": 0.5622, "step": 9960 }, { "epoch": 1.16, "grad_norm": 2.2082566947531976, "learning_rate": 4.025631895857579e-05, "loss": 0.6491, "step": 9965 }, { "epoch": 1.16, "grad_norm": 1.0495436080584797, "learning_rate": 4.0247230459986004e-05, "loss": 0.6334, "step": 9970 }, { "epoch": 1.17, "grad_norm": 0.9350201595609539, "learning_rate": 4.02381387516945e-05, "loss": 0.6555, "step": 9975 }, { "epoch": 1.17, "grad_norm": 0.8879965573607457, "learning_rate": 4.0229043835615155e-05, "loss": 0.588, "step": 9980 }, { "epoch": 1.17, "grad_norm": 0.8918351647989582, "learning_rate": 4.0219945713662576e-05, "loss": 0.613, "step": 9985 }, { "epoch": 1.17, "grad_norm": 0.7840567649434786, "learning_rate": 4.021084438775199e-05, "loss": 0.6332, "step": 9990 }, { "epoch": 1.17, "grad_norm": 1.0335261984915007, "learning_rate": 4.0201739859799325e-05, "loss": 0.5649, "step": 9995 }, { "epoch": 1.17, "grad_norm": 0.8861762512251794, "learning_rate": 4.0192632131721185e-05, "loss": 0.5838, "step": 10000 }, { "epoch": 1.17, "grad_norm": 0.7712705175084428, "learning_rate": 4.018352120543485e-05, "loss": 0.5988, "step": 10005 }, { "epoch": 1.17, "grad_norm": 1.0029455847776978, "learning_rate": 4.017440708285824e-05, "loss": 0.5617, "step": 10010 }, { "epoch": 1.17, "grad_norm": 1.0963691107536784, "learning_rate": 4.0165289765909996e-05, "loss": 0.6162, "step": 10015 }, { "epoch": 1.17, "grad_norm": 0.9950886489154548, "learning_rate": 4.01561692565094e-05, "loss": 0.6229, "step": 10020 }, { "epoch": 1.17, "grad_norm": 0.9440204288452254, "learning_rate": 4.0147045556576415e-05, "loss": 0.6334, "step": 10025 }, { "epoch": 1.17, "grad_norm": 0.9964489671959494, "learning_rate": 4.013791866803167e-05, "loss": 0.6313, "step": 10030 }, { "epoch": 1.17, "grad_norm": 0.9249501924597987, "learning_rate": 4.0128788592796484e-05, "loss": 0.6486, "step": 10035 }, { "epoch": 1.17, "grad_norm": 0.9213352087336827, "learning_rate": 4.0119655332792826e-05, "loss": 0.6733, "step": 10040 }, { "epoch": 1.17, "grad_norm": 0.9568225596954095, "learning_rate": 4.011051888994335e-05, "loss": 0.6222, "step": 10045 }, { "epoch": 1.17, "grad_norm": 0.8569036072099062, "learning_rate": 4.010137926617136e-05, "loss": 0.6248, "step": 10050 }, { "epoch": 1.17, "grad_norm": 0.9383601287942281, "learning_rate": 4.009223646340084e-05, "loss": 0.6061, "step": 10055 }, { "epoch": 1.18, "grad_norm": 0.903270509340819, "learning_rate": 4.008309048355646e-05, "loss": 0.6312, "step": 10060 }, { "epoch": 1.18, "grad_norm": 0.9724861070811751, "learning_rate": 4.0073941328563546e-05, "loss": 0.6176, "step": 10065 }, { "epoch": 1.18, "grad_norm": 0.9085347640631056, "learning_rate": 4.006478900034808e-05, "loss": 0.6114, "step": 10070 }, { "epoch": 1.18, "grad_norm": 0.8590173109493656, "learning_rate": 4.005563350083673e-05, "loss": 0.5889, "step": 10075 }, { "epoch": 1.18, "grad_norm": 0.8568817119257549, "learning_rate": 4.004647483195682e-05, "loss": 0.6356, "step": 10080 }, { "epoch": 1.18, "grad_norm": 0.8751475523043187, "learning_rate": 4.0037312995636336e-05, "loss": 0.6386, "step": 10085 }, { "epoch": 1.18, "grad_norm": 0.8681531696645705, "learning_rate": 4.002814799380397e-05, "loss": 0.612, "step": 10090 }, { "epoch": 1.18, "grad_norm": 1.0569052005853135, "learning_rate": 4.001897982838902e-05, "loss": 0.6351, "step": 10095 }, { "epoch": 1.18, "grad_norm": 0.9015183523209024, "learning_rate": 4.00098085013215e-05, "loss": 0.596, "step": 10100 }, { "epoch": 1.18, "grad_norm": 0.8442722392585974, "learning_rate": 4.0000634014532054e-05, "loss": 0.5995, "step": 10105 }, { "epoch": 1.18, "grad_norm": 0.8850762982309546, "learning_rate": 3.999145636995203e-05, "loss": 0.6084, "step": 10110 }, { "epoch": 1.18, "grad_norm": 1.0571424102878644, "learning_rate": 3.998227556951341e-05, "loss": 0.6794, "step": 10115 }, { "epoch": 1.18, "grad_norm": 0.8628784934756678, "learning_rate": 3.997309161514883e-05, "loss": 0.6222, "step": 10120 }, { "epoch": 1.18, "grad_norm": 0.9588455777465492, "learning_rate": 3.996390450879163e-05, "loss": 0.5995, "step": 10125 }, { "epoch": 1.18, "grad_norm": 0.8900907773977574, "learning_rate": 3.995471425237578e-05, "loss": 0.5684, "step": 10130 }, { "epoch": 1.18, "grad_norm": 0.8591476511074881, "learning_rate": 3.9945520847835935e-05, "loss": 0.576, "step": 10135 }, { "epoch": 1.18, "grad_norm": 0.7951726521016427, "learning_rate": 3.9936324297107396e-05, "loss": 0.5558, "step": 10140 }, { "epoch": 1.19, "grad_norm": 1.0588470665638625, "learning_rate": 3.9927124602126135e-05, "loss": 0.5434, "step": 10145 }, { "epoch": 1.19, "grad_norm": 1.2723282099950253, "learning_rate": 3.991792176482879e-05, "loss": 0.582, "step": 10150 }, { "epoch": 1.19, "grad_norm": 1.075406036860975, "learning_rate": 3.990871578715264e-05, "loss": 0.662, "step": 10155 }, { "epoch": 1.19, "grad_norm": 0.8328623678809383, "learning_rate": 3.989950667103566e-05, "loss": 0.6482, "step": 10160 }, { "epoch": 1.19, "grad_norm": 0.847580232745473, "learning_rate": 3.989029441841645e-05, "loss": 0.5367, "step": 10165 }, { "epoch": 1.19, "grad_norm": 0.8589010882682073, "learning_rate": 3.9881079031234295e-05, "loss": 0.5862, "step": 10170 }, { "epoch": 1.19, "grad_norm": 0.8610734142118764, "learning_rate": 3.987186051142911e-05, "loss": 0.6158, "step": 10175 }, { "epoch": 1.19, "grad_norm": 0.86627951786458, "learning_rate": 3.9862638860941524e-05, "loss": 0.6112, "step": 10180 }, { "epoch": 1.19, "grad_norm": 1.0667526656804673, "learning_rate": 3.985341408171277e-05, "loss": 0.6354, "step": 10185 }, { "epoch": 1.19, "grad_norm": 0.8711287886263307, "learning_rate": 3.984418617568475e-05, "loss": 0.6355, "step": 10190 }, { "epoch": 1.19, "grad_norm": 0.9881251887237487, "learning_rate": 3.983495514480007e-05, "loss": 0.622, "step": 10195 }, { "epoch": 1.19, "grad_norm": 0.8792489533541943, "learning_rate": 3.982572099100192e-05, "loss": 0.5141, "step": 10200 }, { "epoch": 1.19, "grad_norm": 0.9152498687376003, "learning_rate": 3.981648371623421e-05, "loss": 0.6207, "step": 10205 }, { "epoch": 1.19, "grad_norm": 0.7989876504398636, "learning_rate": 3.980724332244147e-05, "loss": 0.5269, "step": 10210 }, { "epoch": 1.19, "grad_norm": 0.8797185298604385, "learning_rate": 3.9797999811568916e-05, "loss": 0.5778, "step": 10215 }, { "epoch": 1.19, "grad_norm": 0.9223928804353906, "learning_rate": 3.978875318556238e-05, "loss": 0.6445, "step": 10220 }, { "epoch": 1.19, "grad_norm": 0.9364308011543939, "learning_rate": 3.97795034463684e-05, "loss": 0.6594, "step": 10225 }, { "epoch": 1.2, "grad_norm": 0.9220385812390476, "learning_rate": 3.977025059593413e-05, "loss": 0.6346, "step": 10230 }, { "epoch": 1.2, "grad_norm": 0.9701765029649853, "learning_rate": 3.976099463620738e-05, "loss": 0.6367, "step": 10235 }, { "epoch": 1.2, "grad_norm": 0.8867570256512409, "learning_rate": 3.975173556913665e-05, "loss": 0.6055, "step": 10240 }, { "epoch": 1.2, "grad_norm": 0.8125509656428566, "learning_rate": 3.974247339667106e-05, "loss": 0.6225, "step": 10245 }, { "epoch": 1.2, "grad_norm": 1.0892926497453927, "learning_rate": 3.973320812076039e-05, "loss": 0.5704, "step": 10250 }, { "epoch": 1.2, "grad_norm": 1.026750775890507, "learning_rate": 3.972393974335508e-05, "loss": 0.6553, "step": 10255 }, { "epoch": 1.2, "grad_norm": 1.1427438785669062, "learning_rate": 3.971466826640622e-05, "loss": 0.6834, "step": 10260 }, { "epoch": 1.2, "grad_norm": 0.8839448364089717, "learning_rate": 3.9705393691865566e-05, "loss": 0.5522, "step": 10265 }, { "epoch": 1.2, "grad_norm": 0.8791945443161713, "learning_rate": 3.969611602168549e-05, "loss": 0.5669, "step": 10270 }, { "epoch": 1.2, "grad_norm": 0.9515027738904531, "learning_rate": 3.968683525781906e-05, "loss": 0.6297, "step": 10275 }, { "epoch": 1.2, "grad_norm": 1.4876496267656212, "learning_rate": 3.967755140221996e-05, "loss": 0.589, "step": 10280 }, { "epoch": 1.2, "grad_norm": 1.1247622916072126, "learning_rate": 3.966826445684255e-05, "loss": 0.633, "step": 10285 }, { "epoch": 1.2, "grad_norm": 0.8302846402144587, "learning_rate": 3.965897442364182e-05, "loss": 0.6365, "step": 10290 }, { "epoch": 1.2, "grad_norm": 0.9908935689677275, "learning_rate": 3.964968130457341e-05, "loss": 0.6323, "step": 10295 }, { "epoch": 1.2, "grad_norm": 0.9340391113359591, "learning_rate": 3.964038510159364e-05, "loss": 0.6151, "step": 10300 }, { "epoch": 1.2, "grad_norm": 1.2718292438791907, "learning_rate": 3.963108581665945e-05, "loss": 0.6114, "step": 10305 }, { "epoch": 1.2, "grad_norm": 0.7785590263663952, "learning_rate": 3.962178345172844e-05, "loss": 0.5637, "step": 10310 }, { "epoch": 1.21, "grad_norm": 0.9645013541347448, "learning_rate": 3.961247800875885e-05, "loss": 0.5801, "step": 10315 }, { "epoch": 1.21, "grad_norm": 0.8583297905620315, "learning_rate": 3.9603169489709567e-05, "loss": 0.6214, "step": 10320 }, { "epoch": 1.21, "grad_norm": 0.9282947415255546, "learning_rate": 3.959385789654014e-05, "loss": 0.6309, "step": 10325 }, { "epoch": 1.21, "grad_norm": 0.9368910532197497, "learning_rate": 3.9584543231210766e-05, "loss": 0.582, "step": 10330 }, { "epoch": 1.21, "grad_norm": 0.8091883054913099, "learning_rate": 3.9575225495682266e-05, "loss": 0.6053, "step": 10335 }, { "epoch": 1.21, "grad_norm": 0.9250748071469435, "learning_rate": 3.956590469191612e-05, "loss": 0.6578, "step": 10340 }, { "epoch": 1.21, "grad_norm": 0.8699557441268959, "learning_rate": 3.955658082187447e-05, "loss": 0.6516, "step": 10345 }, { "epoch": 1.21, "grad_norm": 0.8144798873280884, "learning_rate": 3.954725388752006e-05, "loss": 0.5899, "step": 10350 }, { "epoch": 1.21, "grad_norm": 0.8413361061870538, "learning_rate": 3.953792389081635e-05, "loss": 0.5566, "step": 10355 }, { "epoch": 1.21, "grad_norm": 1.002226599985344, "learning_rate": 3.9528590833727366e-05, "loss": 0.5435, "step": 10360 }, { "epoch": 1.21, "grad_norm": 1.1095065513935778, "learning_rate": 3.951925471821781e-05, "loss": 0.5638, "step": 10365 }, { "epoch": 1.21, "grad_norm": 0.9947145888090904, "learning_rate": 3.950991554625307e-05, "loss": 0.59, "step": 10370 }, { "epoch": 1.21, "grad_norm": 1.1471816219045725, "learning_rate": 3.9500573319799104e-05, "loss": 0.6571, "step": 10375 }, { "epoch": 1.21, "grad_norm": 1.2056447335181821, "learning_rate": 3.949122804082257e-05, "loss": 0.6385, "step": 10380 }, { "epoch": 1.21, "grad_norm": 0.9236499290260043, "learning_rate": 3.9481879711290735e-05, "loss": 0.5761, "step": 10385 }, { "epoch": 1.21, "grad_norm": 0.9871575598379568, "learning_rate": 3.9472528333171535e-05, "loss": 0.652, "step": 10390 }, { "epoch": 1.21, "grad_norm": 0.788541974001009, "learning_rate": 3.94631739084335e-05, "loss": 0.5995, "step": 10395 }, { "epoch": 1.22, "grad_norm": 1.1523334655894832, "learning_rate": 3.9453816439045876e-05, "loss": 0.5945, "step": 10400 }, { "epoch": 1.22, "grad_norm": 1.063804037800662, "learning_rate": 3.944445592697848e-05, "loss": 0.6398, "step": 10405 }, { "epoch": 1.22, "grad_norm": 0.9686012869197632, "learning_rate": 3.943509237420181e-05, "loss": 0.6056, "step": 10410 }, { "epoch": 1.22, "grad_norm": 0.9297495694452663, "learning_rate": 3.9425725782686984e-05, "loss": 0.6175, "step": 10415 }, { "epoch": 1.22, "grad_norm": 1.1319803102042276, "learning_rate": 3.9416356154405775e-05, "loss": 0.6641, "step": 10420 }, { "epoch": 1.22, "grad_norm": 0.9009336255386489, "learning_rate": 3.940698349133059e-05, "loss": 0.666, "step": 10425 }, { "epoch": 1.22, "grad_norm": 0.8854415142953846, "learning_rate": 3.939760779543447e-05, "loss": 0.6813, "step": 10430 }, { "epoch": 1.22, "grad_norm": 1.0041367685322249, "learning_rate": 3.938822906869108e-05, "loss": 0.6322, "step": 10435 }, { "epoch": 1.22, "grad_norm": 1.1876986370837992, "learning_rate": 3.937884731307477e-05, "loss": 0.6451, "step": 10440 }, { "epoch": 1.22, "grad_norm": 0.8686221873500196, "learning_rate": 3.936946253056047e-05, "loss": 0.571, "step": 10445 }, { "epoch": 1.22, "grad_norm": 0.8786593514516138, "learning_rate": 3.936007472312381e-05, "loss": 0.6693, "step": 10450 }, { "epoch": 1.22, "grad_norm": 0.8636998162821604, "learning_rate": 3.935068389274098e-05, "loss": 0.5903, "step": 10455 }, { "epoch": 1.22, "grad_norm": 0.9589885303494541, "learning_rate": 3.9341290041388884e-05, "loss": 0.5917, "step": 10460 }, { "epoch": 1.22, "grad_norm": 1.0097558423849307, "learning_rate": 3.9331893171045e-05, "loss": 0.5752, "step": 10465 }, { "epoch": 1.22, "grad_norm": 0.9040003496213147, "learning_rate": 3.932249328368748e-05, "loss": 0.6171, "step": 10470 }, { "epoch": 1.22, "grad_norm": 0.9117511057238641, "learning_rate": 3.93130903812951e-05, "loss": 0.5481, "step": 10475 }, { "epoch": 1.22, "grad_norm": 0.9177250458951397, "learning_rate": 3.9303684465847254e-05, "loss": 0.6379, "step": 10480 }, { "epoch": 1.23, "grad_norm": 0.8738506459598745, "learning_rate": 3.929427553932402e-05, "loss": 0.6947, "step": 10485 }, { "epoch": 1.23, "grad_norm": 1.2560711170573873, "learning_rate": 3.9284863603706036e-05, "loss": 0.6615, "step": 10490 }, { "epoch": 1.23, "grad_norm": 0.9556202554831419, "learning_rate": 3.927544866097464e-05, "loss": 0.6716, "step": 10495 }, { "epoch": 1.23, "grad_norm": 1.0206437800131474, "learning_rate": 3.9266030713111755e-05, "loss": 0.5995, "step": 10500 }, { "epoch": 1.23, "grad_norm": 0.9605446928738075, "learning_rate": 3.925660976209997e-05, "loss": 0.6041, "step": 10505 }, { "epoch": 1.23, "grad_norm": 0.9548800749168715, "learning_rate": 3.924718580992251e-05, "loss": 0.5636, "step": 10510 }, { "epoch": 1.23, "grad_norm": 0.9932209464059008, "learning_rate": 3.923775885856319e-05, "loss": 0.6875, "step": 10515 }, { "epoch": 1.23, "grad_norm": 0.9048080570269783, "learning_rate": 3.922832891000649e-05, "loss": 0.6603, "step": 10520 }, { "epoch": 1.23, "grad_norm": 1.0231057550763838, "learning_rate": 3.921889596623751e-05, "loss": 0.5725, "step": 10525 }, { "epoch": 1.23, "grad_norm": 0.9322082392496664, "learning_rate": 3.9209460029242e-05, "loss": 0.5851, "step": 10530 }, { "epoch": 1.23, "grad_norm": 0.9579507146406717, "learning_rate": 3.920002110100629e-05, "loss": 0.7321, "step": 10535 }, { "epoch": 1.23, "grad_norm": 0.8083429849855337, "learning_rate": 3.9190579183517406e-05, "loss": 0.636, "step": 10540 }, { "epoch": 1.23, "grad_norm": 0.8156973969412518, "learning_rate": 3.9181134278762964e-05, "loss": 0.5845, "step": 10545 }, { "epoch": 1.23, "grad_norm": 0.879778222064731, "learning_rate": 3.91716863887312e-05, "loss": 0.6019, "step": 10550 }, { "epoch": 1.23, "grad_norm": 0.8923596962786368, "learning_rate": 3.916223551541101e-05, "loss": 0.5634, "step": 10555 }, { "epoch": 1.23, "grad_norm": 0.9155326327793609, "learning_rate": 3.915278166079189e-05, "loss": 0.6537, "step": 10560 }, { "epoch": 1.23, "grad_norm": 1.0492039727253646, "learning_rate": 3.914332482686399e-05, "loss": 0.6017, "step": 10565 }, { "epoch": 1.23, "grad_norm": 0.8001464314523077, "learning_rate": 3.913386501561806e-05, "loss": 0.6189, "step": 10570 }, { "epoch": 1.24, "grad_norm": 0.8678962272468027, "learning_rate": 3.9124402229045495e-05, "loss": 0.5916, "step": 10575 }, { "epoch": 1.24, "grad_norm": 0.7826536308145431, "learning_rate": 3.911493646913831e-05, "loss": 0.5379, "step": 10580 }, { "epoch": 1.24, "grad_norm": 0.9353426230920516, "learning_rate": 3.910546773788914e-05, "loss": 0.673, "step": 10585 }, { "epoch": 1.24, "grad_norm": 0.7802279526029655, "learning_rate": 3.909599603729126e-05, "loss": 0.5673, "step": 10590 }, { "epoch": 1.24, "grad_norm": 0.803304245282809, "learning_rate": 3.9086521369338556e-05, "loss": 0.5585, "step": 10595 }, { "epoch": 1.24, "grad_norm": 0.8199001979000007, "learning_rate": 3.9077043736025556e-05, "loss": 0.539, "step": 10600 }, { "epoch": 1.24, "grad_norm": 0.7984079430611349, "learning_rate": 3.9067563139347386e-05, "loss": 0.6036, "step": 10605 }, { "epoch": 1.24, "grad_norm": 0.8371379939357879, "learning_rate": 3.905807958129982e-05, "loss": 0.6438, "step": 10610 }, { "epoch": 1.24, "grad_norm": 1.0017476169312525, "learning_rate": 3.9048593063879246e-05, "loss": 0.6572, "step": 10615 }, { "epoch": 1.24, "grad_norm": 0.8923742455765313, "learning_rate": 3.903910358908267e-05, "loss": 0.583, "step": 10620 }, { "epoch": 1.24, "grad_norm": 0.827146707829598, "learning_rate": 3.9029611158907745e-05, "loss": 0.6236, "step": 10625 }, { "epoch": 1.24, "grad_norm": 0.8940512986771159, "learning_rate": 3.902011577535269e-05, "loss": 0.638, "step": 10630 }, { "epoch": 1.24, "grad_norm": 0.8542815063650978, "learning_rate": 3.901061744041643e-05, "loss": 0.6362, "step": 10635 }, { "epoch": 1.24, "grad_norm": 0.7677566757686113, "learning_rate": 3.900111615609842e-05, "loss": 0.6269, "step": 10640 }, { "epoch": 1.24, "grad_norm": 0.8962289926396972, "learning_rate": 3.899161192439881e-05, "loss": 0.5695, "step": 10645 }, { "epoch": 1.24, "grad_norm": 0.8544061172511264, "learning_rate": 3.8982104747318316e-05, "loss": 0.6068, "step": 10650 }, { "epoch": 1.24, "grad_norm": 1.5333065249275686, "learning_rate": 3.897259462685832e-05, "loss": 0.5868, "step": 10655 }, { "epoch": 1.25, "grad_norm": 0.8123447402985269, "learning_rate": 3.896308156502079e-05, "loss": 0.5808, "step": 10660 }, { "epoch": 1.25, "grad_norm": 0.7606482859001378, "learning_rate": 3.895356556380833e-05, "loss": 0.6026, "step": 10665 }, { "epoch": 1.25, "grad_norm": 0.8720049492312372, "learning_rate": 3.894404662522416e-05, "loss": 0.6163, "step": 10670 }, { "epoch": 1.25, "grad_norm": 1.0972196656470814, "learning_rate": 3.893452475127211e-05, "loss": 0.6978, "step": 10675 }, { "epoch": 1.25, "grad_norm": 0.8357015413947352, "learning_rate": 3.892499994395663e-05, "loss": 0.6325, "step": 10680 }, { "epoch": 1.25, "grad_norm": 0.8376635428915669, "learning_rate": 3.891547220528281e-05, "loss": 0.5576, "step": 10685 }, { "epoch": 1.25, "grad_norm": 0.9301395880749401, "learning_rate": 3.890594153725632e-05, "loss": 0.6492, "step": 10690 }, { "epoch": 1.25, "grad_norm": 0.8991462800998806, "learning_rate": 3.8896407941883475e-05, "loss": 0.5933, "step": 10695 }, { "epoch": 1.25, "grad_norm": 1.135268620694561, "learning_rate": 3.888687142117119e-05, "loss": 0.6041, "step": 10700 }, { "epoch": 1.25, "grad_norm": 0.7950192411597671, "learning_rate": 3.8877331977127016e-05, "loss": 0.549, "step": 10705 }, { "epoch": 1.25, "grad_norm": 0.9886058872581209, "learning_rate": 3.886778961175909e-05, "loss": 0.6432, "step": 10710 }, { "epoch": 1.25, "grad_norm": 0.9020636491058205, "learning_rate": 3.885824432707619e-05, "loss": 0.5564, "step": 10715 }, { "epoch": 1.25, "grad_norm": 0.9417537441195247, "learning_rate": 3.884869612508769e-05, "loss": 0.594, "step": 10720 }, { "epoch": 1.25, "grad_norm": 0.8395106968537629, "learning_rate": 3.8839145007803595e-05, "loss": 0.5911, "step": 10725 }, { "epoch": 1.25, "grad_norm": 0.9534682639358851, "learning_rate": 3.882959097723452e-05, "loss": 0.6634, "step": 10730 }, { "epoch": 1.25, "grad_norm": 0.8537875219359679, "learning_rate": 3.882003403539167e-05, "loss": 0.5958, "step": 10735 }, { "epoch": 1.25, "grad_norm": 0.9305107670073952, "learning_rate": 3.881047418428689e-05, "loss": 0.6929, "step": 10740 }, { "epoch": 1.26, "grad_norm": 0.9670989101416944, "learning_rate": 3.880091142593264e-05, "loss": 0.5983, "step": 10745 }, { "epoch": 1.26, "grad_norm": 0.8448567402070611, "learning_rate": 3.8791345762341964e-05, "loss": 0.6476, "step": 10750 }, { "epoch": 1.26, "grad_norm": 0.8342757687344932, "learning_rate": 3.878177719552854e-05, "loss": 0.5962, "step": 10755 }, { "epoch": 1.26, "grad_norm": 0.9821085080167582, "learning_rate": 3.877220572750666e-05, "loss": 0.6087, "step": 10760 }, { "epoch": 1.26, "grad_norm": 0.8143332953793175, "learning_rate": 3.8762631360291216e-05, "loss": 0.6719, "step": 10765 }, { "epoch": 1.26, "grad_norm": 0.8939371738689622, "learning_rate": 3.87530540958977e-05, "loss": 0.6668, "step": 10770 }, { "epoch": 1.26, "grad_norm": 0.9984507961597239, "learning_rate": 3.8743473936342237e-05, "loss": 0.7245, "step": 10775 }, { "epoch": 1.26, "grad_norm": 1.0366333910297614, "learning_rate": 3.8733890883641546e-05, "loss": 0.5783, "step": 10780 }, { "epoch": 1.26, "grad_norm": 0.9395341069698825, "learning_rate": 3.872430493981296e-05, "loss": 0.654, "step": 10785 }, { "epoch": 1.26, "grad_norm": 0.8717797602352988, "learning_rate": 3.871471610687443e-05, "loss": 0.593, "step": 10790 }, { "epoch": 1.26, "grad_norm": 0.8405064538772097, "learning_rate": 3.8705124386844496e-05, "loss": 0.6237, "step": 10795 }, { "epoch": 1.26, "grad_norm": 1.011690964651043, "learning_rate": 3.869552978174232e-05, "loss": 0.5654, "step": 10800 }, { "epoch": 1.26, "grad_norm": 0.8274264177970064, "learning_rate": 3.8685932293587667e-05, "loss": 0.735, "step": 10805 }, { "epoch": 1.26, "grad_norm": 0.7492990356826792, "learning_rate": 3.867633192440091e-05, "loss": 0.6513, "step": 10810 }, { "epoch": 1.26, "grad_norm": 0.9556706424766879, "learning_rate": 3.8666728676203014e-05, "loss": 0.6411, "step": 10815 }, { "epoch": 1.26, "grad_norm": 0.9250297568023449, "learning_rate": 3.865712255101558e-05, "loss": 0.5837, "step": 10820 }, { "epoch": 1.26, "grad_norm": 0.9267620841402138, "learning_rate": 3.864751355086079e-05, "loss": 0.7109, "step": 10825 }, { "epoch": 1.27, "grad_norm": 0.8955447348850617, "learning_rate": 3.8637901677761444e-05, "loss": 0.6102, "step": 10830 }, { "epoch": 1.27, "grad_norm": 0.9267094887891539, "learning_rate": 3.8628286933740944e-05, "loss": 0.6228, "step": 10835 }, { "epoch": 1.27, "grad_norm": 1.042260881163461, "learning_rate": 3.8618669320823275e-05, "loss": 0.6555, "step": 10840 }, { "epoch": 1.27, "grad_norm": 0.7754079249431357, "learning_rate": 3.860904884103307e-05, "loss": 0.5808, "step": 10845 }, { "epoch": 1.27, "grad_norm": 0.7762541189855575, "learning_rate": 3.8599425496395524e-05, "loss": 0.6904, "step": 10850 }, { "epoch": 1.27, "grad_norm": 0.8438497344552657, "learning_rate": 3.8589799288936455e-05, "loss": 0.6961, "step": 10855 }, { "epoch": 1.27, "grad_norm": 0.8486304572198412, "learning_rate": 3.8580170220682296e-05, "loss": 0.6287, "step": 10860 }, { "epoch": 1.27, "grad_norm": 0.9356315348099948, "learning_rate": 3.857053829366004e-05, "loss": 0.6161, "step": 10865 }, { "epoch": 1.27, "grad_norm": 0.8007096208982735, "learning_rate": 3.856090350989734e-05, "loss": 0.5484, "step": 10870 }, { "epoch": 1.27, "grad_norm": 0.9925025701344553, "learning_rate": 3.8551265871422396e-05, "loss": 0.6214, "step": 10875 }, { "epoch": 1.27, "grad_norm": 0.9825450554788376, "learning_rate": 3.854162538026403e-05, "loss": 0.6641, "step": 10880 }, { "epoch": 1.27, "grad_norm": 0.9996087481642618, "learning_rate": 3.853198203845168e-05, "loss": 0.6026, "step": 10885 }, { "epoch": 1.27, "grad_norm": 0.9274024570030142, "learning_rate": 3.8522335848015354e-05, "loss": 0.5601, "step": 10890 }, { "epoch": 1.27, "grad_norm": 0.8811789207679005, "learning_rate": 3.85126868109857e-05, "loss": 0.5875, "step": 10895 }, { "epoch": 1.27, "grad_norm": 0.9871747760850451, "learning_rate": 3.8503034929393925e-05, "loss": 0.5681, "step": 10900 }, { "epoch": 1.27, "grad_norm": 1.0026799254752736, "learning_rate": 3.8493380205271853e-05, "loss": 0.6066, "step": 10905 }, { "epoch": 1.27, "grad_norm": 0.9995022492168373, "learning_rate": 3.848372264065191e-05, "loss": 0.6323, "step": 10910 }, { "epoch": 1.28, "grad_norm": 0.8492706133660896, "learning_rate": 3.84740622375671e-05, "loss": 0.569, "step": 10915 }, { "epoch": 1.28, "grad_norm": 0.8581076132316923, "learning_rate": 3.846439899805105e-05, "loss": 0.6762, "step": 10920 }, { "epoch": 1.28, "grad_norm": 0.9502228297935466, "learning_rate": 3.845473292413798e-05, "loss": 0.5787, "step": 10925 }, { "epoch": 1.28, "grad_norm": 0.8637341736551217, "learning_rate": 3.844506401786269e-05, "loss": 0.5803, "step": 10930 }, { "epoch": 1.28, "grad_norm": 0.8524150662452137, "learning_rate": 3.843539228126058e-05, "loss": 0.5958, "step": 10935 }, { "epoch": 1.28, "grad_norm": 0.8270053635687636, "learning_rate": 3.842571771636768e-05, "loss": 0.6677, "step": 10940 }, { "epoch": 1.28, "grad_norm": 1.0086486412209947, "learning_rate": 3.841604032522055e-05, "loss": 0.6658, "step": 10945 }, { "epoch": 1.28, "grad_norm": 0.9352842090904804, "learning_rate": 3.840636010985641e-05, "loss": 0.5932, "step": 10950 }, { "epoch": 1.28, "grad_norm": 1.0806432789409284, "learning_rate": 3.8396677072313034e-05, "loss": 0.6332, "step": 10955 }, { "epoch": 1.28, "grad_norm": 0.7999354005122973, "learning_rate": 3.8386991214628797e-05, "loss": 0.5391, "step": 10960 }, { "epoch": 1.28, "grad_norm": 0.8218652169517799, "learning_rate": 3.837730253884269e-05, "loss": 0.5802, "step": 10965 }, { "epoch": 1.28, "grad_norm": 0.9829707526277693, "learning_rate": 3.836761104699427e-05, "loss": 0.6717, "step": 10970 }, { "epoch": 1.28, "grad_norm": 0.999222528590918, "learning_rate": 3.83579167411237e-05, "loss": 0.5802, "step": 10975 }, { "epoch": 1.28, "grad_norm": 0.860445495174055, "learning_rate": 3.834821962327173e-05, "loss": 0.6537, "step": 10980 }, { "epoch": 1.28, "grad_norm": 0.8125634256790278, "learning_rate": 3.8338519695479716e-05, "loss": 0.6635, "step": 10985 }, { "epoch": 1.28, "grad_norm": 0.9595748660746783, "learning_rate": 3.8328816959789575e-05, "loss": 0.6035, "step": 10990 }, { "epoch": 1.28, "grad_norm": 0.9214771561884445, "learning_rate": 3.831911141824385e-05, "loss": 0.5591, "step": 10995 }, { "epoch": 1.29, "grad_norm": 0.857980786523769, "learning_rate": 3.830940307288566e-05, "loss": 0.5479, "step": 11000 }, { "epoch": 1.29, "grad_norm": 0.7538894567415371, "learning_rate": 3.829969192575871e-05, "loss": 0.5774, "step": 11005 }, { "epoch": 1.29, "grad_norm": 0.8062194948061725, "learning_rate": 3.828997797890729e-05, "loss": 0.6194, "step": 11010 }, { "epoch": 1.29, "grad_norm": 0.9548394974181096, "learning_rate": 3.8280261234376287e-05, "loss": 0.6614, "step": 11015 }, { "epoch": 1.29, "grad_norm": 0.8897333022950585, "learning_rate": 3.82705416942112e-05, "loss": 0.6274, "step": 11020 }, { "epoch": 1.29, "grad_norm": 0.8977298770015277, "learning_rate": 3.8260819360458066e-05, "loss": 0.6162, "step": 11025 }, { "epoch": 1.29, "grad_norm": 0.9066455645379055, "learning_rate": 3.8251094235163554e-05, "loss": 0.6113, "step": 11030 }, { "epoch": 1.29, "grad_norm": 0.9580061450012253, "learning_rate": 3.824136632037491e-05, "loss": 0.5868, "step": 11035 }, { "epoch": 1.29, "grad_norm": 0.9456190707715768, "learning_rate": 3.8231635618139936e-05, "loss": 0.6277, "step": 11040 }, { "epoch": 1.29, "grad_norm": 0.8788374367071842, "learning_rate": 3.822190213050708e-05, "loss": 0.667, "step": 11045 }, { "epoch": 1.29, "grad_norm": 0.8557056891283095, "learning_rate": 3.821216585952532e-05, "loss": 0.631, "step": 11050 }, { "epoch": 1.29, "grad_norm": 0.8657693530524959, "learning_rate": 3.820242680724425e-05, "loss": 0.6108, "step": 11055 }, { "epoch": 1.29, "grad_norm": 0.9362997076936507, "learning_rate": 3.8192684975714044e-05, "loss": 0.6881, "step": 11060 }, { "epoch": 1.29, "grad_norm": 0.9086232257851192, "learning_rate": 3.8182940366985453e-05, "loss": 0.575, "step": 11065 }, { "epoch": 1.29, "grad_norm": 0.9798969018535946, "learning_rate": 3.817319298310984e-05, "loss": 0.5777, "step": 11070 }, { "epoch": 1.29, "grad_norm": 0.9204227526266643, "learning_rate": 3.81634428261391e-05, "loss": 0.6161, "step": 11075 }, { "epoch": 1.29, "grad_norm": 0.9031456475026092, "learning_rate": 3.815368989812577e-05, "loss": 0.5984, "step": 11080 }, { "epoch": 1.3, "grad_norm": 0.8332886586657042, "learning_rate": 3.814393420112293e-05, "loss": 0.5782, "step": 11085 }, { "epoch": 1.3, "grad_norm": 0.948327368803431, "learning_rate": 3.813417573718425e-05, "loss": 0.6042, "step": 11090 }, { "epoch": 1.3, "grad_norm": 0.9302673907811143, "learning_rate": 3.8124414508364e-05, "loss": 0.6084, "step": 11095 }, { "epoch": 1.3, "grad_norm": 1.0149846346708356, "learning_rate": 3.811465051671703e-05, "loss": 0.7347, "step": 11100 }, { "epoch": 1.3, "grad_norm": 0.8853276720694164, "learning_rate": 3.810488376429875e-05, "loss": 0.6183, "step": 11105 }, { "epoch": 1.3, "grad_norm": 0.8997255542636341, "learning_rate": 3.809511425316516e-05, "loss": 0.5893, "step": 11110 }, { "epoch": 1.3, "grad_norm": 1.2493794398722728, "learning_rate": 3.8085341985372847e-05, "loss": 0.638, "step": 11115 }, { "epoch": 1.3, "grad_norm": 0.9648031344636927, "learning_rate": 3.807556696297898e-05, "loss": 0.5742, "step": 11120 }, { "epoch": 1.3, "grad_norm": 0.7729946615076482, "learning_rate": 3.80657891880413e-05, "loss": 0.6159, "step": 11125 }, { "epoch": 1.3, "grad_norm": 0.8102769410124832, "learning_rate": 3.805600866261814e-05, "loss": 0.5689, "step": 11130 }, { "epoch": 1.3, "grad_norm": 0.9167695446361862, "learning_rate": 3.804622538876839e-05, "loss": 0.5909, "step": 11135 }, { "epoch": 1.3, "grad_norm": 0.885746496310935, "learning_rate": 3.803643936855154e-05, "loss": 0.6393, "step": 11140 }, { "epoch": 1.3, "grad_norm": 0.8345147567446465, "learning_rate": 3.8026650604027634e-05, "loss": 0.6152, "step": 11145 }, { "epoch": 1.3, "grad_norm": 0.8324694114308352, "learning_rate": 3.8016859097257346e-05, "loss": 0.5446, "step": 11150 }, { "epoch": 1.3, "grad_norm": 0.8321645576957094, "learning_rate": 3.8007064850301856e-05, "loss": 0.6058, "step": 11155 }, { "epoch": 1.3, "grad_norm": 0.8846630905388356, "learning_rate": 3.7997267865222966e-05, "loss": 0.5836, "step": 11160 }, { "epoch": 1.3, "grad_norm": 0.8318205805952699, "learning_rate": 3.7987468144083036e-05, "loss": 0.5852, "step": 11165 }, { "epoch": 1.31, "grad_norm": 0.8646265924602047, "learning_rate": 3.7977665688945034e-05, "loss": 0.6084, "step": 11170 }, { "epoch": 1.31, "grad_norm": 1.0135804808549047, "learning_rate": 3.7967860501872466e-05, "loss": 0.6305, "step": 11175 }, { "epoch": 1.31, "grad_norm": 1.0572248931514374, "learning_rate": 3.795805258492942e-05, "loss": 0.5791, "step": 11180 }, { "epoch": 1.31, "grad_norm": 0.8241309161367708, "learning_rate": 3.794824194018057e-05, "loss": 0.6332, "step": 11185 }, { "epoch": 1.31, "grad_norm": 1.0207797991250498, "learning_rate": 3.793842856969115e-05, "loss": 0.5828, "step": 11190 }, { "epoch": 1.31, "grad_norm": 0.8703248082964679, "learning_rate": 3.7928612475526994e-05, "loss": 0.5701, "step": 11195 }, { "epoch": 1.31, "grad_norm": 0.9909917811865638, "learning_rate": 3.791879365975449e-05, "loss": 0.6104, "step": 11200 }, { "epoch": 1.31, "grad_norm": 0.9724696243898826, "learning_rate": 3.79089721244406e-05, "loss": 0.7083, "step": 11205 }, { "epoch": 1.31, "grad_norm": 0.8660167582718417, "learning_rate": 3.7899147871652854e-05, "loss": 0.6314, "step": 11210 }, { "epoch": 1.31, "grad_norm": 0.8612859220817484, "learning_rate": 3.788932090345936e-05, "loss": 0.6172, "step": 11215 }, { "epoch": 1.31, "grad_norm": 0.8656672706644697, "learning_rate": 3.78794912219288e-05, "loss": 0.6021, "step": 11220 }, { "epoch": 1.31, "grad_norm": 0.8465723235668507, "learning_rate": 3.786965882913043e-05, "loss": 0.5641, "step": 11225 }, { "epoch": 1.31, "grad_norm": 0.9442867577106012, "learning_rate": 3.785982372713407e-05, "loss": 0.6582, "step": 11230 }, { "epoch": 1.31, "grad_norm": 0.8928565402328836, "learning_rate": 3.7849985918010104e-05, "loss": 0.566, "step": 11235 }, { "epoch": 1.31, "grad_norm": 0.9336485202547289, "learning_rate": 3.7840145403829504e-05, "loss": 0.5899, "step": 11240 }, { "epoch": 1.31, "grad_norm": 0.8785527733013121, "learning_rate": 3.783030218666379e-05, "loss": 0.6675, "step": 11245 }, { "epoch": 1.31, "grad_norm": 0.9097826967379423, "learning_rate": 3.782045626858508e-05, "loss": 0.6363, "step": 11250 }, { "epoch": 1.31, "grad_norm": 0.8987518233115371, "learning_rate": 3.781060765166603e-05, "loss": 0.5632, "step": 11255 }, { "epoch": 1.32, "grad_norm": 0.7868921879200586, "learning_rate": 3.780075633797987e-05, "loss": 0.5834, "step": 11260 }, { "epoch": 1.32, "grad_norm": 1.0211945683796622, "learning_rate": 3.7790902329600425e-05, "loss": 0.5981, "step": 11265 }, { "epoch": 1.32, "grad_norm": 0.8419824706101989, "learning_rate": 3.7781045628602055e-05, "loss": 0.6016, "step": 11270 }, { "epoch": 1.32, "grad_norm": 0.9685983278017648, "learning_rate": 3.7771186237059684e-05, "loss": 0.7299, "step": 11275 }, { "epoch": 1.32, "grad_norm": 0.8817880606433147, "learning_rate": 3.776132415704885e-05, "loss": 0.5684, "step": 11280 }, { "epoch": 1.32, "grad_norm": 0.8953681497446253, "learning_rate": 3.7751459390645606e-05, "loss": 0.6323, "step": 11285 }, { "epoch": 1.32, "grad_norm": 0.92791057615013, "learning_rate": 3.774159193992659e-05, "loss": 0.6532, "step": 11290 }, { "epoch": 1.32, "grad_norm": 0.7796992976893938, "learning_rate": 3.773172180696899e-05, "loss": 0.5357, "step": 11295 }, { "epoch": 1.32, "grad_norm": 1.0095280570162695, "learning_rate": 3.77218489938506e-05, "loss": 0.6254, "step": 11300 }, { "epoch": 1.32, "grad_norm": 0.8728540668103351, "learning_rate": 3.7711973502649734e-05, "loss": 0.5855, "step": 11305 }, { "epoch": 1.32, "grad_norm": 0.8208463354991954, "learning_rate": 3.770209533544528e-05, "loss": 0.6229, "step": 11310 }, { "epoch": 1.32, "grad_norm": 0.913216067875161, "learning_rate": 3.769221449431672e-05, "loss": 0.5345, "step": 11315 }, { "epoch": 1.32, "grad_norm": 0.8384641919175935, "learning_rate": 3.768233098134405e-05, "loss": 0.6026, "step": 11320 }, { "epoch": 1.32, "grad_norm": 1.1479452158066363, "learning_rate": 3.767244479860786e-05, "loss": 0.6281, "step": 11325 }, { "epoch": 1.32, "grad_norm": 0.8902637630830832, "learning_rate": 3.7662555948189306e-05, "loss": 0.6356, "step": 11330 }, { "epoch": 1.32, "grad_norm": 0.9125532587337507, "learning_rate": 3.765266443217007e-05, "loss": 0.5757, "step": 11335 }, { "epoch": 1.32, "grad_norm": 0.9114758609594183, "learning_rate": 3.7642770252632445e-05, "loss": 0.6894, "step": 11340 }, { "epoch": 1.33, "grad_norm": 0.9808822816853947, "learning_rate": 3.7632873411659255e-05, "loss": 0.6391, "step": 11345 }, { "epoch": 1.33, "grad_norm": 0.8492305095279656, "learning_rate": 3.7622973911333876e-05, "loss": 0.6428, "step": 11350 }, { "epoch": 1.33, "grad_norm": 0.9750796246164326, "learning_rate": 3.761307175374026e-05, "loss": 0.5861, "step": 11355 }, { "epoch": 1.33, "grad_norm": 0.8629630109832057, "learning_rate": 3.760316694096293e-05, "loss": 0.6064, "step": 11360 }, { "epoch": 1.33, "grad_norm": 1.3550431161513656, "learning_rate": 3.7593259475086935e-05, "loss": 0.6905, "step": 11365 }, { "epoch": 1.33, "grad_norm": 0.9469540128688306, "learning_rate": 3.7583349358197904e-05, "loss": 0.6067, "step": 11370 }, { "epoch": 1.33, "grad_norm": 0.8695447191434387, "learning_rate": 3.7573436592382026e-05, "loss": 0.6105, "step": 11375 }, { "epoch": 1.33, "grad_norm": 0.9728299524926329, "learning_rate": 3.7563521179726044e-05, "loss": 0.5891, "step": 11380 }, { "epoch": 1.33, "grad_norm": 0.9792883660167887, "learning_rate": 3.755360312231726e-05, "loss": 0.6005, "step": 11385 }, { "epoch": 1.33, "grad_norm": 0.9105457541277419, "learning_rate": 3.75436824222435e-05, "loss": 0.6469, "step": 11390 }, { "epoch": 1.33, "grad_norm": 0.7922867610863673, "learning_rate": 3.753375908159321e-05, "loss": 0.6414, "step": 11395 }, { "epoch": 1.33, "grad_norm": 1.111971860056968, "learning_rate": 3.752383310245534e-05, "loss": 0.6063, "step": 11400 }, { "epoch": 1.33, "grad_norm": 0.9304507725403305, "learning_rate": 3.7513904486919415e-05, "loss": 0.6509, "step": 11405 }, { "epoch": 1.33, "grad_norm": 0.8469496890401091, "learning_rate": 3.7503973237075514e-05, "loss": 0.6037, "step": 11410 }, { "epoch": 1.33, "grad_norm": 0.9445851104822968, "learning_rate": 3.749403935501427e-05, "loss": 0.644, "step": 11415 }, { "epoch": 1.33, "grad_norm": 0.8120529458493533, "learning_rate": 3.748410284282686e-05, "loss": 0.7007, "step": 11420 }, { "epoch": 1.33, "grad_norm": 0.8413557511647501, "learning_rate": 3.7474163702605034e-05, "loss": 0.6032, "step": 11425 }, { "epoch": 1.34, "grad_norm": 0.8352067275623862, "learning_rate": 3.7464221936441094e-05, "loss": 0.5946, "step": 11430 }, { "epoch": 1.34, "grad_norm": 1.0183922714166689, "learning_rate": 3.745427754642787e-05, "loss": 0.6426, "step": 11435 }, { "epoch": 1.34, "grad_norm": 0.902175681401637, "learning_rate": 3.7444330534658755e-05, "loss": 0.573, "step": 11440 }, { "epoch": 1.34, "grad_norm": 0.9848095718516258, "learning_rate": 3.743438090322772e-05, "loss": 0.556, "step": 11445 }, { "epoch": 1.34, "grad_norm": 0.9348176258943024, "learning_rate": 3.742442865422925e-05, "loss": 0.6395, "step": 11450 }, { "epoch": 1.34, "grad_norm": 0.8557819391771571, "learning_rate": 3.741447378975841e-05, "loss": 0.5686, "step": 11455 }, { "epoch": 1.34, "grad_norm": 0.9832075277554719, "learning_rate": 3.740451631191079e-05, "loss": 0.6982, "step": 11460 }, { "epoch": 1.34, "grad_norm": 0.848202394046599, "learning_rate": 3.739455622278255e-05, "loss": 0.5942, "step": 11465 }, { "epoch": 1.34, "grad_norm": 0.8952627283268336, "learning_rate": 3.7384593524470396e-05, "loss": 0.581, "step": 11470 }, { "epoch": 1.34, "grad_norm": 0.8229072416704125, "learning_rate": 3.7374628219071576e-05, "loss": 0.5735, "step": 11475 }, { "epoch": 1.34, "grad_norm": 0.886607252396971, "learning_rate": 3.7364660308683894e-05, "loss": 0.6148, "step": 11480 }, { "epoch": 1.34, "grad_norm": 0.8676237723911505, "learning_rate": 3.735468979540569e-05, "loss": 0.6434, "step": 11485 }, { "epoch": 1.34, "grad_norm": 0.848116892329449, "learning_rate": 3.7344716681335887e-05, "loss": 0.6368, "step": 11490 }, { "epoch": 1.34, "grad_norm": 0.9443951647533891, "learning_rate": 3.73347409685739e-05, "loss": 0.6804, "step": 11495 }, { "epoch": 1.34, "grad_norm": 0.8141488078395794, "learning_rate": 3.732476265921974e-05, "loss": 0.6425, "step": 11500 }, { "epoch": 1.34, "grad_norm": 0.9608420311948448, "learning_rate": 3.731478175537393e-05, "loss": 0.6219, "step": 11505 }, { "epoch": 1.34, "grad_norm": 0.8621877078630583, "learning_rate": 3.730479825913758e-05, "loss": 0.6431, "step": 11510 }, { "epoch": 1.35, "grad_norm": 0.826002432148367, "learning_rate": 3.7294812172612284e-05, "loss": 0.6545, "step": 11515 }, { "epoch": 1.35, "grad_norm": 0.846160540751185, "learning_rate": 3.728482349790025e-05, "loss": 0.5828, "step": 11520 }, { "epoch": 1.35, "grad_norm": 1.0051060833180396, "learning_rate": 3.7274832237104185e-05, "loss": 0.615, "step": 11525 }, { "epoch": 1.35, "grad_norm": 0.8096020240629066, "learning_rate": 3.726483839232736e-05, "loss": 0.6059, "step": 11530 }, { "epoch": 1.35, "grad_norm": 0.8649721951013966, "learning_rate": 3.725484196567358e-05, "loss": 0.6594, "step": 11535 }, { "epoch": 1.35, "grad_norm": 0.8220253964395031, "learning_rate": 3.7244842959247196e-05, "loss": 0.6212, "step": 11540 }, { "epoch": 1.35, "grad_norm": 0.9547267956323815, "learning_rate": 3.7234841375153105e-05, "loss": 0.5733, "step": 11545 }, { "epoch": 1.35, "grad_norm": 1.021393747095624, "learning_rate": 3.7224837215496746e-05, "loss": 0.7078, "step": 11550 }, { "epoch": 1.35, "grad_norm": 0.9635282878003277, "learning_rate": 3.7214830482384095e-05, "loss": 0.6321, "step": 11555 }, { "epoch": 1.35, "grad_norm": 0.9400741543897787, "learning_rate": 3.720482117792169e-05, "loss": 0.5791, "step": 11560 }, { "epoch": 1.35, "grad_norm": 0.8244965274555158, "learning_rate": 3.719480930421657e-05, "loss": 0.6157, "step": 11565 }, { "epoch": 1.35, "grad_norm": 1.0930063113874906, "learning_rate": 3.7184794863376354e-05, "loss": 0.5673, "step": 11570 }, { "epoch": 1.35, "grad_norm": 1.0914630542998986, "learning_rate": 3.717477785750919e-05, "loss": 0.6337, "step": 11575 }, { "epoch": 1.35, "grad_norm": 0.892354492440155, "learning_rate": 3.716475828872375e-05, "loss": 0.5459, "step": 11580 }, { "epoch": 1.35, "grad_norm": 0.8611819198520095, "learning_rate": 3.7154736159129265e-05, "loss": 0.5331, "step": 11585 }, { "epoch": 1.35, "grad_norm": 0.8068972894930494, "learning_rate": 3.714471147083551e-05, "loss": 0.6029, "step": 11590 }, { "epoch": 1.35, "grad_norm": 0.8843196201964665, "learning_rate": 3.7134684225952766e-05, "loss": 0.6172, "step": 11595 }, { "epoch": 1.36, "grad_norm": 0.9012176991567481, "learning_rate": 3.712465442659188e-05, "loss": 0.5888, "step": 11600 }, { "epoch": 1.36, "grad_norm": 1.419713424001114, "learning_rate": 3.711462207486424e-05, "loss": 0.6504, "step": 11605 }, { "epoch": 1.36, "grad_norm": 0.9966863116420186, "learning_rate": 3.710458717288176e-05, "loss": 0.6061, "step": 11610 }, { "epoch": 1.36, "grad_norm": 0.8515444217587302, "learning_rate": 3.7094549722756866e-05, "loss": 0.6732, "step": 11615 }, { "epoch": 1.36, "grad_norm": 0.9083326272134167, "learning_rate": 3.708450972660258e-05, "loss": 0.6058, "step": 11620 }, { "epoch": 1.36, "grad_norm": 1.0100396309954474, "learning_rate": 3.7074467186532405e-05, "loss": 0.6137, "step": 11625 }, { "epoch": 1.36, "grad_norm": 0.8813512492341774, "learning_rate": 3.706442210466041e-05, "loss": 1.3633, "step": 11630 }, { "epoch": 1.36, "grad_norm": 0.8262883146845641, "learning_rate": 3.7054374483101194e-05, "loss": 0.6067, "step": 11635 }, { "epoch": 1.36, "grad_norm": 0.8063940645429819, "learning_rate": 3.7044324323969875e-05, "loss": 0.6256, "step": 11640 }, { "epoch": 1.36, "grad_norm": 0.8502441867849484, "learning_rate": 3.7034271629382125e-05, "loss": 0.6471, "step": 11645 }, { "epoch": 1.36, "grad_norm": 0.9558059639172778, "learning_rate": 3.702421640145414e-05, "loss": 0.6065, "step": 11650 }, { "epoch": 1.36, "grad_norm": 0.8964591128487243, "learning_rate": 3.7014158642302645e-05, "loss": 0.6249, "step": 11655 }, { "epoch": 1.36, "grad_norm": 0.9410443055703813, "learning_rate": 3.700409835404491e-05, "loss": 0.6858, "step": 11660 }, { "epoch": 1.36, "grad_norm": 0.8359924847753424, "learning_rate": 3.699403553879873e-05, "loss": 0.6574, "step": 11665 }, { "epoch": 1.36, "grad_norm": 0.8506670110351549, "learning_rate": 3.6983970198682435e-05, "loss": 0.6281, "step": 11670 }, { "epoch": 1.36, "grad_norm": 0.864007163969048, "learning_rate": 3.6973902335814886e-05, "loss": 0.6158, "step": 11675 }, { "epoch": 1.36, "grad_norm": 0.9014929669141521, "learning_rate": 3.696383195231546e-05, "loss": 0.6167, "step": 11680 }, { "epoch": 1.37, "grad_norm": 0.8483195813673494, "learning_rate": 3.695375905030409e-05, "loss": 0.6211, "step": 11685 }, { "epoch": 1.37, "grad_norm": 0.7976878432407867, "learning_rate": 3.694368363190123e-05, "loss": 0.6019, "step": 11690 }, { "epoch": 1.37, "grad_norm": 0.946242048073552, "learning_rate": 3.6933605699227855e-05, "loss": 0.6295, "step": 11695 }, { "epoch": 1.37, "grad_norm": 0.8219336558297399, "learning_rate": 3.692352525440548e-05, "loss": 0.6224, "step": 11700 }, { "epoch": 1.37, "grad_norm": 0.8923286866315998, "learning_rate": 3.6913442299556135e-05, "loss": 0.6115, "step": 11705 }, { "epoch": 1.37, "grad_norm": 0.8435919575200298, "learning_rate": 3.690335683680239e-05, "loss": 0.6032, "step": 11710 }, { "epoch": 1.37, "grad_norm": 1.0458727498746518, "learning_rate": 3.689326886826735e-05, "loss": 0.5331, "step": 11715 }, { "epoch": 1.37, "grad_norm": 0.8457603664996752, "learning_rate": 3.688317839607463e-05, "loss": 0.6043, "step": 11720 }, { "epoch": 1.37, "grad_norm": 0.9141727510462546, "learning_rate": 3.6873085422348385e-05, "loss": 0.602, "step": 11725 }, { "epoch": 1.37, "grad_norm": 0.9706587815811856, "learning_rate": 3.686298994921329e-05, "loss": 0.5999, "step": 11730 }, { "epoch": 1.37, "grad_norm": 0.8392066019989034, "learning_rate": 3.685289197879454e-05, "loss": 0.6388, "step": 11735 }, { "epoch": 1.37, "grad_norm": 0.9963421838539251, "learning_rate": 3.684279151321788e-05, "loss": 0.5961, "step": 11740 }, { "epoch": 1.37, "grad_norm": 0.8536922835244994, "learning_rate": 3.683268855460955e-05, "loss": 0.6118, "step": 11745 }, { "epoch": 1.37, "grad_norm": 0.7901944889689158, "learning_rate": 3.682258310509633e-05, "loss": 0.6362, "step": 11750 }, { "epoch": 1.37, "grad_norm": 0.9220205609995663, "learning_rate": 3.6812475166805526e-05, "loss": 0.6294, "step": 11755 }, { "epoch": 1.37, "grad_norm": 0.8741271739185323, "learning_rate": 3.680236474186497e-05, "loss": 0.6089, "step": 11760 }, { "epoch": 1.37, "grad_norm": 0.9070119288566725, "learning_rate": 3.6792251832403e-05, "loss": 0.664, "step": 11765 }, { "epoch": 1.38, "grad_norm": 0.8534567714319213, "learning_rate": 3.67821364405485e-05, "loss": 0.6307, "step": 11770 }, { "epoch": 1.38, "grad_norm": 0.8874654347954261, "learning_rate": 3.6772018568430864e-05, "loss": 0.6057, "step": 11775 }, { "epoch": 1.38, "grad_norm": 0.9480911478991431, "learning_rate": 3.676189821818e-05, "loss": 0.5985, "step": 11780 }, { "epoch": 1.38, "grad_norm": 0.8174653350358779, "learning_rate": 3.675177539192636e-05, "loss": 0.5718, "step": 11785 }, { "epoch": 1.38, "grad_norm": 0.7888549229450207, "learning_rate": 3.674165009180091e-05, "loss": 0.5888, "step": 11790 }, { "epoch": 1.38, "grad_norm": 0.9299848532603948, "learning_rate": 3.673152231993512e-05, "loss": 0.6406, "step": 11795 }, { "epoch": 1.38, "grad_norm": 0.8579765079823473, "learning_rate": 3.672139207846099e-05, "loss": 0.599, "step": 11800 }, { "epoch": 1.38, "grad_norm": 0.9839779105297691, "learning_rate": 3.671125936951105e-05, "loss": 0.6279, "step": 11805 }, { "epoch": 1.38, "grad_norm": 0.8922445801315554, "learning_rate": 3.670112419521833e-05, "loss": 0.5724, "step": 11810 }, { "epoch": 1.38, "grad_norm": 0.9473545153433485, "learning_rate": 3.669098655771641e-05, "loss": 0.6988, "step": 11815 }, { "epoch": 1.38, "grad_norm": 0.9594339530288059, "learning_rate": 3.668084645913935e-05, "loss": 0.6917, "step": 11820 }, { "epoch": 1.38, "grad_norm": 0.752388039245179, "learning_rate": 3.667070390162176e-05, "loss": 0.5212, "step": 11825 }, { "epoch": 1.38, "grad_norm": 0.945652079162958, "learning_rate": 3.666055888729874e-05, "loss": 0.5946, "step": 11830 }, { "epoch": 1.38, "grad_norm": 0.8740843589928866, "learning_rate": 3.665041141830594e-05, "loss": 0.6667, "step": 11835 }, { "epoch": 1.38, "grad_norm": 0.8873866105458915, "learning_rate": 3.66402614967795e-05, "loss": 0.6325, "step": 11840 }, { "epoch": 1.38, "grad_norm": 0.9989105772306284, "learning_rate": 3.6630109124856077e-05, "loss": 0.6198, "step": 11845 }, { "epoch": 1.38, "grad_norm": 0.9745653244874164, "learning_rate": 3.6619954304672865e-05, "loss": 0.7203, "step": 11850 }, { "epoch": 1.39, "grad_norm": 0.8955360500291366, "learning_rate": 3.660979703836755e-05, "loss": 0.6057, "step": 11855 }, { "epoch": 1.39, "grad_norm": 1.0822483631246398, "learning_rate": 3.6599637328078346e-05, "loss": 0.6273, "step": 11860 }, { "epoch": 1.39, "grad_norm": 0.8862916401341311, "learning_rate": 3.658947517594398e-05, "loss": 0.6566, "step": 11865 }, { "epoch": 1.39, "grad_norm": 0.7865539902489639, "learning_rate": 3.657931058410369e-05, "loss": 0.6186, "step": 11870 }, { "epoch": 1.39, "grad_norm": 0.7606360957566586, "learning_rate": 3.656914355469724e-05, "loss": 0.5791, "step": 11875 }, { "epoch": 1.39, "grad_norm": 1.0426033911520904, "learning_rate": 3.655897408986487e-05, "loss": 0.6493, "step": 11880 }, { "epoch": 1.39, "grad_norm": 3.3394917940746476, "learning_rate": 3.6548802191747385e-05, "loss": 0.6398, "step": 11885 }, { "epoch": 1.39, "grad_norm": 0.8190767899160933, "learning_rate": 3.6538627862486064e-05, "loss": 0.7146, "step": 11890 }, { "epoch": 1.39, "grad_norm": 0.8416893474339635, "learning_rate": 3.65284511042227e-05, "loss": 0.6684, "step": 11895 }, { "epoch": 1.39, "grad_norm": 0.8594889718555592, "learning_rate": 3.6518271919099635e-05, "loss": 0.6567, "step": 11900 }, { "epoch": 1.39, "grad_norm": 0.8430123737106848, "learning_rate": 3.6508090309259674e-05, "loss": 0.6225, "step": 11905 }, { "epoch": 1.39, "grad_norm": 0.8647792785300874, "learning_rate": 3.649790627684616e-05, "loss": 0.5574, "step": 11910 }, { "epoch": 1.39, "grad_norm": 0.9818801422694434, "learning_rate": 3.6487719824002926e-05, "loss": 0.58, "step": 11915 }, { "epoch": 1.39, "grad_norm": 0.9276693398318523, "learning_rate": 3.647753095287434e-05, "loss": 0.613, "step": 11920 }, { "epoch": 1.39, "grad_norm": 0.9167940833193173, "learning_rate": 3.646733966560527e-05, "loss": 0.6399, "step": 11925 }, { "epoch": 1.39, "grad_norm": 0.8515885891556684, "learning_rate": 3.6457145964341064e-05, "loss": 0.636, "step": 11930 }, { "epoch": 1.39, "grad_norm": 1.1826619378287282, "learning_rate": 3.6446949851227636e-05, "loss": 0.6507, "step": 11935 }, { "epoch": 1.4, "grad_norm": 0.8151826147835061, "learning_rate": 3.6436751328411346e-05, "loss": 0.6634, "step": 11940 }, { "epoch": 1.4, "grad_norm": 0.8933784656186476, "learning_rate": 3.642655039803911e-05, "loss": 0.5745, "step": 11945 }, { "epoch": 1.4, "grad_norm": 18.275452782991643, "learning_rate": 3.6416347062258325e-05, "loss": 0.6362, "step": 11950 }, { "epoch": 1.4, "grad_norm": 0.9538021653732811, "learning_rate": 3.640614132321689e-05, "loss": 0.6611, "step": 11955 }, { "epoch": 1.4, "grad_norm": 1.267132202895838, "learning_rate": 3.639593318306323e-05, "loss": 0.6567, "step": 11960 }, { "epoch": 1.4, "grad_norm": 0.9172124975874861, "learning_rate": 3.638572264394626e-05, "loss": 0.6367, "step": 11965 }, { "epoch": 1.4, "grad_norm": 0.8664333095838165, "learning_rate": 3.637550970801543e-05, "loss": 0.5852, "step": 11970 }, { "epoch": 1.4, "grad_norm": 0.9156970694456673, "learning_rate": 3.636529437742063e-05, "loss": 0.6654, "step": 11975 }, { "epoch": 1.4, "grad_norm": 0.8622107729700692, "learning_rate": 3.635507665431231e-05, "loss": 0.614, "step": 11980 }, { "epoch": 1.4, "grad_norm": 0.799167859462027, "learning_rate": 3.6344856540841424e-05, "loss": 0.6015, "step": 11985 }, { "epoch": 1.4, "grad_norm": 0.9432145201955199, "learning_rate": 3.633463403915939e-05, "loss": 0.5937, "step": 11990 }, { "epoch": 1.4, "grad_norm": 0.8942013630608001, "learning_rate": 3.632440915141816e-05, "loss": 0.7099, "step": 11995 }, { "epoch": 1.4, "grad_norm": 0.8970834872690424, "learning_rate": 3.6314181879770195e-05, "loss": 0.6072, "step": 12000 }, { "epoch": 1.4, "grad_norm": 0.8670091301488518, "learning_rate": 3.630395222636842e-05, "loss": 0.6866, "step": 12005 }, { "epoch": 1.4, "grad_norm": 0.9950770407163555, "learning_rate": 3.629372019336629e-05, "loss": 0.688, "step": 12010 }, { "epoch": 1.4, "grad_norm": 1.0092870679045602, "learning_rate": 3.628348578291776e-05, "loss": 0.6212, "step": 12015 }, { "epoch": 1.4, "grad_norm": 0.8970458337270661, "learning_rate": 3.6273248997177284e-05, "loss": 0.5511, "step": 12020 }, { "epoch": 1.4, "grad_norm": 0.9267648123052946, "learning_rate": 3.626300983829981e-05, "loss": 0.641, "step": 12025 }, { "epoch": 1.41, "grad_norm": 0.9212166095013694, "learning_rate": 3.625276830844078e-05, "loss": 0.6525, "step": 12030 }, { "epoch": 1.41, "grad_norm": 0.8567839607925971, "learning_rate": 3.624252440975615e-05, "loss": 0.6262, "step": 12035 }, { "epoch": 1.41, "grad_norm": 0.8906713133012847, "learning_rate": 3.623227814440237e-05, "loss": 0.6247, "step": 12040 }, { "epoch": 1.41, "grad_norm": 1.1224718084593563, "learning_rate": 3.622202951453637e-05, "loss": 0.5882, "step": 12045 }, { "epoch": 1.41, "grad_norm": 1.220008869058968, "learning_rate": 3.621177852231563e-05, "loss": 0.6168, "step": 12050 }, { "epoch": 1.41, "grad_norm": 0.9706766043476424, "learning_rate": 3.620152516989805e-05, "loss": 0.617, "step": 12055 }, { "epoch": 1.41, "grad_norm": 0.8153129469677494, "learning_rate": 3.619126945944209e-05, "loss": 0.6571, "step": 12060 }, { "epoch": 1.41, "grad_norm": 1.0215444864061816, "learning_rate": 3.618101139310669e-05, "loss": 0.6171, "step": 12065 }, { "epoch": 1.41, "grad_norm": 0.8934896089793263, "learning_rate": 3.617075097305126e-05, "loss": 0.5959, "step": 12070 }, { "epoch": 1.41, "grad_norm": 0.9843694449696093, "learning_rate": 3.6160488201435745e-05, "loss": 0.5846, "step": 12075 }, { "epoch": 1.41, "grad_norm": 0.8697273081394036, "learning_rate": 3.6150223080420555e-05, "loss": 0.5674, "step": 12080 }, { "epoch": 1.41, "grad_norm": 0.800288367189229, "learning_rate": 3.6139955612166615e-05, "loss": 0.5886, "step": 12085 }, { "epoch": 1.41, "grad_norm": 0.9040934975175596, "learning_rate": 3.612968579883532e-05, "loss": 0.642, "step": 12090 }, { "epoch": 1.41, "grad_norm": 0.9528880927170442, "learning_rate": 3.611941364258859e-05, "loss": 0.616, "step": 12095 }, { "epoch": 1.41, "grad_norm": 0.9271861118502893, "learning_rate": 3.610913914558881e-05, "loss": 0.6094, "step": 12100 }, { "epoch": 1.41, "grad_norm": 0.8450485138889783, "learning_rate": 3.609886230999886e-05, "loss": 0.6819, "step": 12105 }, { "epoch": 1.41, "grad_norm": 0.9521220678078501, "learning_rate": 3.6088583137982154e-05, "loss": 0.5995, "step": 12110 }, { "epoch": 1.42, "grad_norm": 0.8283449125046546, "learning_rate": 3.607830163170254e-05, "loss": 0.5865, "step": 12115 }, { "epoch": 1.42, "grad_norm": 0.8336540444103293, "learning_rate": 3.6068017793324385e-05, "loss": 0.6064, "step": 12120 }, { "epoch": 1.42, "grad_norm": 0.8838534548040841, "learning_rate": 3.605773162501255e-05, "loss": 0.6232, "step": 12125 }, { "epoch": 1.42, "grad_norm": 0.8218728945185004, "learning_rate": 3.6047443128932374e-05, "loss": 0.5761, "step": 12130 }, { "epoch": 1.42, "grad_norm": 0.9236400315218273, "learning_rate": 3.6037152307249706e-05, "loss": 0.5891, "step": 12135 }, { "epoch": 1.42, "grad_norm": 0.881988398219993, "learning_rate": 3.6026859162130854e-05, "loss": 0.6041, "step": 12140 }, { "epoch": 1.42, "grad_norm": 0.8190175918841568, "learning_rate": 3.601656369574265e-05, "loss": 0.5823, "step": 12145 }, { "epoch": 1.42, "grad_norm": 0.9116519229688698, "learning_rate": 3.600626591025239e-05, "loss": 0.62, "step": 12150 }, { "epoch": 1.42, "grad_norm": 1.9053885643530948, "learning_rate": 3.599596580782787e-05, "loss": 0.7042, "step": 12155 }, { "epoch": 1.42, "grad_norm": 0.854121065594848, "learning_rate": 3.5985663390637354e-05, "loss": 0.5904, "step": 12160 }, { "epoch": 1.42, "grad_norm": 0.939889025268169, "learning_rate": 3.5975358660849637e-05, "loss": 0.5553, "step": 12165 }, { "epoch": 1.42, "grad_norm": 0.9841472953902778, "learning_rate": 3.596505162063394e-05, "loss": 0.6373, "step": 12170 }, { "epoch": 1.42, "grad_norm": 1.0028632154873853, "learning_rate": 3.5954742272160026e-05, "loss": 0.651, "step": 12175 }, { "epoch": 1.42, "grad_norm": 0.953966824591413, "learning_rate": 3.594443061759811e-05, "loss": 0.5832, "step": 12180 }, { "epoch": 1.42, "grad_norm": 0.8970291999675264, "learning_rate": 3.593411665911891e-05, "loss": 0.6377, "step": 12185 }, { "epoch": 1.42, "grad_norm": 1.2858605745665024, "learning_rate": 3.592380039889362e-05, "loss": 0.6246, "step": 12190 }, { "epoch": 1.42, "grad_norm": 0.7986784002870713, "learning_rate": 3.591348183909391e-05, "loss": 0.6582, "step": 12195 }, { "epoch": 1.43, "grad_norm": 0.9071050087626734, "learning_rate": 3.590316098189197e-05, "loss": 0.6737, "step": 12200 }, { "epoch": 1.43, "grad_norm": 0.9132510708771986, "learning_rate": 3.5892837829460426e-05, "loss": 0.5919, "step": 12205 }, { "epoch": 1.43, "grad_norm": 0.9433480879893589, "learning_rate": 3.588251238397242e-05, "loss": 0.5984, "step": 12210 }, { "epoch": 1.43, "grad_norm": 0.8425425497415544, "learning_rate": 3.587218464760157e-05, "loss": 0.674, "step": 12215 }, { "epoch": 1.43, "grad_norm": 0.8560379066177962, "learning_rate": 3.5861854622521956e-05, "loss": 0.6225, "step": 12220 }, { "epoch": 1.43, "grad_norm": 1.0216349670614215, "learning_rate": 3.585152231090817e-05, "loss": 0.5898, "step": 12225 }, { "epoch": 1.43, "grad_norm": 0.941910058752599, "learning_rate": 3.584118771493527e-05, "loss": 0.6371, "step": 12230 }, { "epoch": 1.43, "grad_norm": 0.9450546824846788, "learning_rate": 3.583085083677879e-05, "loss": 0.67, "step": 12235 }, { "epoch": 1.43, "grad_norm": 0.9430983134358434, "learning_rate": 3.582051167861477e-05, "loss": 0.6077, "step": 12240 }, { "epoch": 1.43, "grad_norm": 1.2647311219912982, "learning_rate": 3.581017024261968e-05, "loss": 0.5574, "step": 12245 }, { "epoch": 1.43, "grad_norm": 0.8800855810701044, "learning_rate": 3.579982653097053e-05, "loss": 0.5983, "step": 12250 }, { "epoch": 1.43, "grad_norm": 0.8531335602475937, "learning_rate": 3.578948054584476e-05, "loss": 0.6076, "step": 12255 }, { "epoch": 1.43, "grad_norm": 0.9688059857971794, "learning_rate": 3.577913228942033e-05, "loss": 0.5814, "step": 12260 }, { "epoch": 1.43, "grad_norm": 0.8878522123624325, "learning_rate": 3.5768781763875634e-05, "loss": 0.6511, "step": 12265 }, { "epoch": 1.43, "grad_norm": 0.8859337872833457, "learning_rate": 3.575842897138957e-05, "loss": 0.6106, "step": 12270 }, { "epoch": 1.43, "grad_norm": 0.8392555265533592, "learning_rate": 3.5748073914141515e-05, "loss": 0.6318, "step": 12275 }, { "epoch": 1.43, "grad_norm": 1.0179826817227207, "learning_rate": 3.573771659431132e-05, "loss": 0.6587, "step": 12280 }, { "epoch": 1.44, "grad_norm": 0.8250855610755873, "learning_rate": 3.57273570140793e-05, "loss": 0.6277, "step": 12285 }, { "epoch": 1.44, "grad_norm": 1.028761145323544, "learning_rate": 3.571699517562627e-05, "loss": 0.5805, "step": 12290 }, { "epoch": 1.44, "grad_norm": 0.8470798159598402, "learning_rate": 3.57066310811335e-05, "loss": 0.6357, "step": 12295 }, { "epoch": 1.44, "grad_norm": 0.9002010615128283, "learning_rate": 3.569626473278271e-05, "loss": 0.5889, "step": 12300 }, { "epoch": 1.44, "grad_norm": 0.9351117962530887, "learning_rate": 3.568589613275618e-05, "loss": 0.6457, "step": 12305 }, { "epoch": 1.44, "grad_norm": 0.9952238962551381, "learning_rate": 3.567552528323656e-05, "loss": 0.5789, "step": 12310 }, { "epoch": 1.44, "grad_norm": 0.8771019021548889, "learning_rate": 3.566515218640705e-05, "loss": 0.5995, "step": 12315 }, { "epoch": 1.44, "grad_norm": 0.8035809097945872, "learning_rate": 3.5654776844451296e-05, "loss": 0.5988, "step": 12320 }, { "epoch": 1.44, "grad_norm": 0.8893650027132582, "learning_rate": 3.56443992595534e-05, "loss": 0.5957, "step": 12325 }, { "epoch": 1.44, "grad_norm": 0.9225381879454705, "learning_rate": 3.5634019433897964e-05, "loss": 0.6473, "step": 12330 }, { "epoch": 1.44, "grad_norm": 0.9015631227736667, "learning_rate": 3.562363736967004e-05, "loss": 0.6037, "step": 12335 }, { "epoch": 1.44, "grad_norm": 0.8318607677704034, "learning_rate": 3.561325306905517e-05, "loss": 0.6522, "step": 12340 }, { "epoch": 1.44, "grad_norm": 0.8994217373668357, "learning_rate": 3.5602866534239365e-05, "loss": 0.5811, "step": 12345 }, { "epoch": 1.44, "grad_norm": 0.8591196579865831, "learning_rate": 3.559247776740908e-05, "loss": 0.5433, "step": 12350 }, { "epoch": 1.44, "grad_norm": 0.865730815127164, "learning_rate": 3.558208677075127e-05, "loss": 0.5847, "step": 12355 }, { "epoch": 1.44, "grad_norm": 0.9072983135377769, "learning_rate": 3.5571693546453356e-05, "loss": 0.5503, "step": 12360 }, { "epoch": 1.44, "grad_norm": 0.8976836103305895, "learning_rate": 3.556129809670321e-05, "loss": 0.6447, "step": 12365 }, { "epoch": 1.45, "grad_norm": 0.9092058787096782, "learning_rate": 3.555090042368918e-05, "loss": 0.5973, "step": 12370 }, { "epoch": 1.45, "grad_norm": 0.8266958082415768, "learning_rate": 3.5540500529600096e-05, "loss": 0.6323, "step": 12375 }, { "epoch": 1.45, "grad_norm": 0.8145995325127587, "learning_rate": 3.553009841662524e-05, "loss": 0.599, "step": 12380 }, { "epoch": 1.45, "grad_norm": 0.7600467175839406, "learning_rate": 3.551969408695436e-05, "loss": 0.6181, "step": 12385 }, { "epoch": 1.45, "grad_norm": 1.2119218629692157, "learning_rate": 3.550928754277769e-05, "loss": 0.6287, "step": 12390 }, { "epoch": 1.45, "grad_norm": 0.8219758616713628, "learning_rate": 3.5498878786285906e-05, "loss": 0.5444, "step": 12395 }, { "epoch": 1.45, "grad_norm": 0.9195114586262254, "learning_rate": 3.5488467819670165e-05, "loss": 0.5227, "step": 12400 }, { "epoch": 1.45, "grad_norm": 0.8354182979948097, "learning_rate": 3.5478054645122074e-05, "loss": 0.6294, "step": 12405 }, { "epoch": 1.45, "grad_norm": 0.939150400634756, "learning_rate": 3.546763926483373e-05, "loss": 0.6228, "step": 12410 }, { "epoch": 1.45, "grad_norm": 0.8771575023674802, "learning_rate": 3.5457221680997665e-05, "loss": 0.6274, "step": 12415 }, { "epoch": 1.45, "grad_norm": 0.9647783644348853, "learning_rate": 3.5446801895806904e-05, "loss": 0.6751, "step": 12420 }, { "epoch": 1.45, "grad_norm": 1.054634469694846, "learning_rate": 3.543637991145491e-05, "loss": 0.6449, "step": 12425 }, { "epoch": 1.45, "grad_norm": 0.8879257717479248, "learning_rate": 3.542595573013563e-05, "loss": 0.5864, "step": 12430 }, { "epoch": 1.45, "grad_norm": 0.8623663216663834, "learning_rate": 3.541552935404346e-05, "loss": 0.6031, "step": 12435 }, { "epoch": 1.45, "grad_norm": 1.0150487304074556, "learning_rate": 3.540510078537325e-05, "loss": 0.6144, "step": 12440 }, { "epoch": 1.45, "grad_norm": 0.9425391365070878, "learning_rate": 3.539467002632034e-05, "loss": 0.6682, "step": 12445 }, { "epoch": 1.45, "grad_norm": 0.8622312713916199, "learning_rate": 3.538423707908051e-05, "loss": 0.6847, "step": 12450 }, { "epoch": 1.46, "grad_norm": 0.8801524966858489, "learning_rate": 3.5373801945849985e-05, "loss": 0.6163, "step": 12455 }, { "epoch": 1.46, "grad_norm": 0.8299444575804841, "learning_rate": 3.53633646288255e-05, "loss": 0.5875, "step": 12460 }, { "epoch": 1.46, "grad_norm": 0.8759494556959604, "learning_rate": 3.53529251302042e-05, "loss": 0.5593, "step": 12465 }, { "epoch": 1.46, "grad_norm": 0.8985264861266203, "learning_rate": 3.534248345218371e-05, "loss": 0.6427, "step": 12470 }, { "epoch": 1.46, "grad_norm": 0.8519847751204643, "learning_rate": 3.533203959696212e-05, "loss": 0.7022, "step": 12475 }, { "epoch": 1.46, "grad_norm": 0.9825898764091011, "learning_rate": 3.532159356673797e-05, "loss": 0.5919, "step": 12480 }, { "epoch": 1.46, "grad_norm": 0.8653221102185527, "learning_rate": 3.531114536371026e-05, "loss": 0.6803, "step": 12485 }, { "epoch": 1.46, "grad_norm": 0.9239947369685201, "learning_rate": 3.530069499007843e-05, "loss": 0.6154, "step": 12490 }, { "epoch": 1.46, "grad_norm": 0.9062899279415221, "learning_rate": 3.529024244804242e-05, "loss": 0.65, "step": 12495 }, { "epoch": 1.46, "grad_norm": 0.9527406919374568, "learning_rate": 3.527978773980257e-05, "loss": 0.6002, "step": 12500 }, { "epoch": 1.46, "grad_norm": 1.0399162325622036, "learning_rate": 3.5269330867559734e-05, "loss": 0.672, "step": 12505 }, { "epoch": 1.46, "grad_norm": 0.8442645622694827, "learning_rate": 3.525887183351517e-05, "loss": 0.535, "step": 12510 }, { "epoch": 1.46, "grad_norm": 0.938052602974706, "learning_rate": 3.524841063987063e-05, "loss": 0.5834, "step": 12515 }, { "epoch": 1.46, "grad_norm": 0.8828262617775489, "learning_rate": 3.52379472888283e-05, "loss": 0.5582, "step": 12520 }, { "epoch": 1.46, "grad_norm": 0.7867389542354907, "learning_rate": 3.522748178259082e-05, "loss": 0.6142, "step": 12525 }, { "epoch": 1.46, "grad_norm": 0.9845643637186133, "learning_rate": 3.5217014123361306e-05, "loss": 0.6257, "step": 12530 }, { "epoch": 1.46, "grad_norm": 0.9202685943685677, "learning_rate": 3.5206544313343274e-05, "loss": 0.5742, "step": 12535 }, { "epoch": 1.47, "grad_norm": 1.039346776792003, "learning_rate": 3.519607235474077e-05, "loss": 0.6271, "step": 12540 }, { "epoch": 1.47, "grad_norm": 0.8564988677121582, "learning_rate": 3.518559824975822e-05, "loss": 0.6047, "step": 12545 }, { "epoch": 1.47, "grad_norm": 1.015991476780951, "learning_rate": 3.517512200060055e-05, "loss": 0.7109, "step": 12550 }, { "epoch": 1.47, "grad_norm": 0.8922363318376536, "learning_rate": 3.5164643609473114e-05, "loss": 0.5472, "step": 12555 }, { "epoch": 1.47, "grad_norm": 0.8912317480108893, "learning_rate": 3.515416307858173e-05, "loss": 0.5998, "step": 12560 }, { "epoch": 1.47, "grad_norm": 1.0199078616697879, "learning_rate": 3.5143680410132654e-05, "loss": 0.6205, "step": 12565 }, { "epoch": 1.47, "grad_norm": 0.8246101578623768, "learning_rate": 3.513319560633259e-05, "loss": 0.6132, "step": 12570 }, { "epoch": 1.47, "grad_norm": 0.916942633653224, "learning_rate": 3.5122708669388715e-05, "loss": 0.6402, "step": 12575 }, { "epoch": 1.47, "grad_norm": 0.831173170164292, "learning_rate": 3.511221960150862e-05, "loss": 0.6278, "step": 12580 }, { "epoch": 1.47, "grad_norm": 1.0541321235758667, "learning_rate": 3.5101728404900376e-05, "loss": 0.6283, "step": 12585 }, { "epoch": 1.47, "grad_norm": 0.8331126187727365, "learning_rate": 3.50912350817725e-05, "loss": 0.5667, "step": 12590 }, { "epoch": 1.47, "grad_norm": 1.071895814551917, "learning_rate": 3.508073963433392e-05, "loss": 0.6305, "step": 12595 }, { "epoch": 1.47, "grad_norm": 0.901334249363068, "learning_rate": 3.507024206479406e-05, "loss": 0.6235, "step": 12600 }, { "epoch": 1.47, "grad_norm": 0.9740922386563966, "learning_rate": 3.505974237536276e-05, "loss": 0.608, "step": 12605 }, { "epoch": 1.47, "grad_norm": 0.8258422734556367, "learning_rate": 3.5049240568250314e-05, "loss": 0.5743, "step": 12610 }, { "epoch": 1.47, "grad_norm": 0.8115276989495022, "learning_rate": 3.503873664566746e-05, "loss": 0.5889, "step": 12615 }, { "epoch": 1.47, "grad_norm": 0.8906443008710788, "learning_rate": 3.502823060982539e-05, "loss": 0.6311, "step": 12620 }, { "epoch": 1.48, "grad_norm": 0.9575861203827718, "learning_rate": 3.501772246293573e-05, "loss": 0.6597, "step": 12625 }, { "epoch": 1.48, "grad_norm": 0.9253573690672728, "learning_rate": 3.500721220721056e-05, "loss": 0.6666, "step": 12630 }, { "epoch": 1.48, "grad_norm": 0.956590163086714, "learning_rate": 3.499669984486239e-05, "loss": 0.6701, "step": 12635 }, { "epoch": 1.48, "grad_norm": 0.8584458801517105, "learning_rate": 3.49861853781042e-05, "loss": 0.7377, "step": 12640 }, { "epoch": 1.48, "grad_norm": 1.3546094304017708, "learning_rate": 3.4975668809149375e-05, "loss": 0.6653, "step": 12645 }, { "epoch": 1.48, "grad_norm": 0.9173080389352878, "learning_rate": 3.496515014021177e-05, "loss": 0.6029, "step": 12650 }, { "epoch": 1.48, "grad_norm": 0.7717861026546176, "learning_rate": 3.4954629373505675e-05, "loss": 0.5707, "step": 12655 }, { "epoch": 1.48, "grad_norm": 0.8941973636443095, "learning_rate": 3.494410651124583e-05, "loss": 0.6098, "step": 12660 }, { "epoch": 1.48, "grad_norm": 0.9301219672662374, "learning_rate": 3.4933581555647395e-05, "loss": 0.6382, "step": 12665 }, { "epoch": 1.48, "grad_norm": 0.9602475719939857, "learning_rate": 3.492305450892599e-05, "loss": 0.7317, "step": 12670 }, { "epoch": 1.48, "grad_norm": 0.8754327759681167, "learning_rate": 3.491252537329767e-05, "loss": 0.5679, "step": 12675 }, { "epoch": 1.48, "grad_norm": 0.8332519375768591, "learning_rate": 3.490199415097892e-05, "loss": 0.6359, "step": 12680 }, { "epoch": 1.48, "grad_norm": 1.0021545185332463, "learning_rate": 3.4891460844186686e-05, "loss": 0.6257, "step": 12685 }, { "epoch": 1.48, "grad_norm": 0.6708521115211317, "learning_rate": 3.488092545513833e-05, "loss": 0.5801, "step": 12690 }, { "epoch": 1.48, "grad_norm": 0.9043971625533642, "learning_rate": 3.487038798605166e-05, "loss": 0.5934, "step": 12695 }, { "epoch": 1.48, "grad_norm": 0.9707006135387458, "learning_rate": 3.4859848439144935e-05, "loss": 0.6332, "step": 12700 }, { "epoch": 1.48, "grad_norm": 0.8671700202992485, "learning_rate": 3.4849306816636826e-05, "loss": 0.6898, "step": 12705 }, { "epoch": 1.48, "grad_norm": 0.7868368116852693, "learning_rate": 3.4838763120746466e-05, "loss": 0.5487, "step": 12710 }, { "epoch": 1.49, "grad_norm": 0.9776706028230453, "learning_rate": 3.4828217353693406e-05, "loss": 0.6495, "step": 12715 }, { "epoch": 1.49, "grad_norm": 0.8105488864437738, "learning_rate": 3.4817669517697646e-05, "loss": 0.6229, "step": 12720 }, { "epoch": 1.49, "grad_norm": 0.8240676399476085, "learning_rate": 3.4807119614979604e-05, "loss": 0.5324, "step": 12725 }, { "epoch": 1.49, "grad_norm": 0.7860136215179412, "learning_rate": 3.479656764776017e-05, "loss": 0.6396, "step": 12730 }, { "epoch": 1.49, "grad_norm": 0.8547156450939054, "learning_rate": 3.4786013618260615e-05, "loss": 0.5825, "step": 12735 }, { "epoch": 1.49, "grad_norm": 0.8445157630854596, "learning_rate": 3.4775457528702694e-05, "loss": 0.6123, "step": 12740 }, { "epoch": 1.49, "grad_norm": 1.0213647765797331, "learning_rate": 3.476489938130855e-05, "loss": 0.61, "step": 12745 }, { "epoch": 1.49, "grad_norm": 0.8997373148241865, "learning_rate": 3.4754339178300815e-05, "loss": 0.5706, "step": 12750 }, { "epoch": 1.49, "grad_norm": 0.8349194461333793, "learning_rate": 3.4743776921902507e-05, "loss": 0.5866, "step": 12755 }, { "epoch": 1.49, "grad_norm": 1.0234969645557912, "learning_rate": 3.473321261433709e-05, "loss": 0.6119, "step": 12760 }, { "epoch": 1.49, "grad_norm": 1.0384233399407776, "learning_rate": 3.472264625782846e-05, "loss": 0.6104, "step": 12765 }, { "epoch": 1.49, "grad_norm": 0.9031259021096777, "learning_rate": 3.471207785460094e-05, "loss": 0.6553, "step": 12770 }, { "epoch": 1.49, "grad_norm": 0.9519468975360182, "learning_rate": 3.4701507406879316e-05, "loss": 0.6565, "step": 12775 }, { "epoch": 1.49, "grad_norm": 0.8750176568537622, "learning_rate": 3.4690934916888754e-05, "loss": 0.5794, "step": 12780 }, { "epoch": 1.49, "grad_norm": 0.9418170666749259, "learning_rate": 3.468036038685487e-05, "loss": 0.5943, "step": 12785 }, { "epoch": 1.49, "grad_norm": 0.9165266091499794, "learning_rate": 3.4669783819003744e-05, "loss": 0.6765, "step": 12790 }, { "epoch": 1.49, "grad_norm": 1.0415731301685915, "learning_rate": 3.465920521556181e-05, "loss": 0.5855, "step": 12795 }, { "epoch": 1.5, "grad_norm": 0.918325395317344, "learning_rate": 3.4648624578756015e-05, "loss": 0.6199, "step": 12800 }, { "epoch": 1.5, "grad_norm": 0.8910389581495544, "learning_rate": 3.4638041910813676e-05, "loss": 0.6228, "step": 12805 }, { "epoch": 1.5, "grad_norm": 0.8550177516201493, "learning_rate": 3.462745721396256e-05, "loss": 0.5384, "step": 12810 }, { "epoch": 1.5, "grad_norm": 0.8784205415771655, "learning_rate": 3.461687049043084e-05, "loss": 0.8027, "step": 12815 }, { "epoch": 1.5, "grad_norm": 0.873308321376839, "learning_rate": 3.460628174244717e-05, "loss": 0.5331, "step": 12820 }, { "epoch": 1.5, "grad_norm": 1.1242614668282056, "learning_rate": 3.459569097224054e-05, "loss": 0.6078, "step": 12825 }, { "epoch": 1.5, "grad_norm": 0.8942596638254138, "learning_rate": 3.4585098182040455e-05, "loss": 0.5706, "step": 12830 }, { "epoch": 1.5, "grad_norm": 0.8723261387454745, "learning_rate": 3.457450337407681e-05, "loss": 0.6101, "step": 12835 }, { "epoch": 1.5, "grad_norm": 0.8875896833951963, "learning_rate": 3.4563906550579906e-05, "loss": 0.6873, "step": 12840 }, { "epoch": 1.5, "grad_norm": 0.9253061229666167, "learning_rate": 3.4553307713780495e-05, "loss": 0.6052, "step": 12845 }, { "epoch": 1.5, "grad_norm": 0.9715378499755188, "learning_rate": 3.4542706865909734e-05, "loss": 0.6208, "step": 12850 }, { "epoch": 1.5, "grad_norm": 0.8408746971103221, "learning_rate": 3.4532104009199224e-05, "loss": 0.5636, "step": 12855 }, { "epoch": 1.5, "grad_norm": 0.9006259480128637, "learning_rate": 3.452149914588096e-05, "loss": 0.6393, "step": 12860 }, { "epoch": 1.5, "grad_norm": 0.8293184348962447, "learning_rate": 3.4510892278187406e-05, "loss": 0.676, "step": 12865 }, { "epoch": 1.5, "grad_norm": 0.8807055889725863, "learning_rate": 3.45002834083514e-05, "loss": 0.6504, "step": 12870 }, { "epoch": 1.5, "grad_norm": 1.0176297993998487, "learning_rate": 3.448967253860621e-05, "loss": 0.6295, "step": 12875 }, { "epoch": 1.5, "grad_norm": 0.8009997493787064, "learning_rate": 3.4479059671185554e-05, "loss": 0.6158, "step": 12880 }, { "epoch": 1.51, "grad_norm": 0.8560279805457361, "learning_rate": 3.446844480832354e-05, "loss": 0.66, "step": 12885 }, { "epoch": 1.51, "grad_norm": 0.967252817282039, "learning_rate": 3.445782795225472e-05, "loss": 0.6953, "step": 12890 }, { "epoch": 1.51, "grad_norm": 1.1664043700480964, "learning_rate": 3.444720910521404e-05, "loss": 0.6085, "step": 12895 }, { "epoch": 1.51, "grad_norm": 0.9277619311992812, "learning_rate": 3.4436588269436887e-05, "loss": 0.6182, "step": 12900 }, { "epoch": 1.51, "grad_norm": 0.8382700027314575, "learning_rate": 3.442596544715906e-05, "loss": 0.5645, "step": 12905 }, { "epoch": 1.51, "grad_norm": 0.7908477207082969, "learning_rate": 3.441534064061676e-05, "loss": 0.5883, "step": 12910 }, { "epoch": 1.51, "grad_norm": 0.8838387862921007, "learning_rate": 3.440471385204664e-05, "loss": 0.6224, "step": 12915 }, { "epoch": 1.51, "grad_norm": 1.1434419980146526, "learning_rate": 3.439408508368574e-05, "loss": 0.5898, "step": 12920 }, { "epoch": 1.51, "grad_norm": 1.0128506331169786, "learning_rate": 3.4383454337771526e-05, "loss": 0.6528, "step": 12925 }, { "epoch": 1.51, "grad_norm": 0.9618887083113228, "learning_rate": 3.437282161654188e-05, "loss": 0.6676, "step": 12930 }, { "epoch": 1.51, "grad_norm": 1.2541173464635784, "learning_rate": 3.43621869222351e-05, "loss": 0.6553, "step": 12935 }, { "epoch": 1.51, "grad_norm": 0.9366037361828637, "learning_rate": 3.435155025708992e-05, "loss": 0.6763, "step": 12940 }, { "epoch": 1.51, "grad_norm": 0.943652775707637, "learning_rate": 3.4340911623345436e-05, "loss": 0.701, "step": 12945 }, { "epoch": 1.51, "grad_norm": 0.9108439836269759, "learning_rate": 3.433027102324122e-05, "loss": 0.6674, "step": 12950 }, { "epoch": 1.51, "grad_norm": 0.9898470852280427, "learning_rate": 3.4319628459017216e-05, "loss": 0.6382, "step": 12955 }, { "epoch": 1.51, "grad_norm": 0.9070685163297803, "learning_rate": 3.430898393291381e-05, "loss": 0.592, "step": 12960 }, { "epoch": 1.51, "grad_norm": 0.8759130030387231, "learning_rate": 3.4298337447171764e-05, "loss": 0.6295, "step": 12965 }, { "epoch": 1.52, "grad_norm": 0.7773532856823042, "learning_rate": 3.428768900403229e-05, "loss": 0.5401, "step": 12970 }, { "epoch": 1.52, "grad_norm": 0.8728781018623748, "learning_rate": 3.4277038605736986e-05, "loss": 0.7166, "step": 12975 }, { "epoch": 1.52, "grad_norm": 0.813151309139467, "learning_rate": 3.426638625452789e-05, "loss": 0.5715, "step": 12980 }, { "epoch": 1.52, "grad_norm": 1.0215831652624239, "learning_rate": 3.425573195264742e-05, "loss": 0.555, "step": 12985 }, { "epoch": 1.52, "grad_norm": 0.8614008378157113, "learning_rate": 3.4245075702338415e-05, "loss": 0.695, "step": 12990 }, { "epoch": 1.52, "grad_norm": 0.809144076015466, "learning_rate": 3.423441750584414e-05, "loss": 0.6254, "step": 12995 }, { "epoch": 1.52, "grad_norm": 0.8917639776273031, "learning_rate": 3.422375736540824e-05, "loss": 0.605, "step": 13000 }, { "epoch": 1.52, "grad_norm": 0.8995589927620352, "learning_rate": 3.4213095283274807e-05, "loss": 0.5897, "step": 13005 }, { "epoch": 1.52, "grad_norm": 0.8696231166601598, "learning_rate": 3.42024312616883e-05, "loss": 0.6283, "step": 13010 }, { "epoch": 1.52, "grad_norm": 1.0423269869481833, "learning_rate": 3.419176530289363e-05, "loss": 0.5981, "step": 13015 }, { "epoch": 1.52, "grad_norm": 0.8222009021178757, "learning_rate": 3.418109740913608e-05, "loss": 0.6892, "step": 13020 }, { "epoch": 1.52, "grad_norm": 0.8031873658234491, "learning_rate": 3.417042758266135e-05, "loss": 0.5313, "step": 13025 }, { "epoch": 1.52, "grad_norm": 0.7809903810806502, "learning_rate": 3.415975582571555e-05, "loss": 0.6371, "step": 13030 }, { "epoch": 1.52, "grad_norm": 0.8633127502310625, "learning_rate": 3.414908214054521e-05, "loss": 0.5565, "step": 13035 }, { "epoch": 1.52, "grad_norm": 0.9184847844091931, "learning_rate": 3.413840652939723e-05, "loss": 0.6381, "step": 13040 }, { "epoch": 1.52, "grad_norm": 1.1198374845791288, "learning_rate": 3.412772899451896e-05, "loss": 0.6213, "step": 13045 }, { "epoch": 1.52, "grad_norm": 0.8190319915825256, "learning_rate": 3.411704953815813e-05, "loss": 0.5723, "step": 13050 }, { "epoch": 1.53, "grad_norm": 0.7995468855318312, "learning_rate": 3.410636816256286e-05, "loss": 0.625, "step": 13055 }, { "epoch": 1.53, "grad_norm": 0.9771296752971816, "learning_rate": 3.4095684869981715e-05, "loss": 0.5751, "step": 13060 }, { "epoch": 1.53, "grad_norm": 0.8837268572872942, "learning_rate": 3.408499966266362e-05, "loss": 0.6585, "step": 13065 }, { "epoch": 1.53, "grad_norm": 0.84276977968608, "learning_rate": 3.4074312542857933e-05, "loss": 0.581, "step": 13070 }, { "epoch": 1.53, "grad_norm": 0.9201266801650666, "learning_rate": 3.40636235128144e-05, "loss": 0.5781, "step": 13075 }, { "epoch": 1.53, "grad_norm": 0.9520401434470717, "learning_rate": 3.405293257478319e-05, "loss": 0.6208, "step": 13080 }, { "epoch": 1.53, "grad_norm": 0.9102371922663296, "learning_rate": 3.4042239731014835e-05, "loss": 0.5991, "step": 13085 }, { "epoch": 1.53, "grad_norm": 0.9093704492946941, "learning_rate": 3.40315449837603e-05, "loss": 0.6248, "step": 13090 }, { "epoch": 1.53, "grad_norm": 0.8625920756576104, "learning_rate": 3.4020848335270944e-05, "loss": 0.5599, "step": 13095 }, { "epoch": 1.53, "grad_norm": 0.9254248842897794, "learning_rate": 3.401014978779852e-05, "loss": 0.6417, "step": 13100 }, { "epoch": 1.53, "grad_norm": 0.8380331488480008, "learning_rate": 3.399944934359518e-05, "loss": 0.6532, "step": 13105 }, { "epoch": 1.53, "grad_norm": 0.946114144453383, "learning_rate": 3.3988747004913486e-05, "loss": 0.5843, "step": 13110 }, { "epoch": 1.53, "grad_norm": 0.9839002491888842, "learning_rate": 3.3978042774006395e-05, "loss": 0.594, "step": 13115 }, { "epoch": 1.53, "grad_norm": 0.9291678575353318, "learning_rate": 3.396733665312725e-05, "loss": 0.588, "step": 13120 }, { "epoch": 1.53, "grad_norm": 0.8915272873294703, "learning_rate": 3.3956628644529815e-05, "loss": 0.5793, "step": 13125 }, { "epoch": 1.53, "grad_norm": 0.8440532059398612, "learning_rate": 3.394591875046823e-05, "loss": 0.6231, "step": 13130 }, { "epoch": 1.53, "grad_norm": 0.961022969741923, "learning_rate": 3.393520697319704e-05, "loss": 0.6165, "step": 13135 }, { "epoch": 1.54, "grad_norm": 0.823843073127469, "learning_rate": 3.392449331497117e-05, "loss": 0.6481, "step": 13140 }, { "epoch": 1.54, "grad_norm": 1.0192337202739583, "learning_rate": 3.3913777778045994e-05, "loss": 0.6099, "step": 13145 }, { "epoch": 1.54, "grad_norm": 0.9025322981358163, "learning_rate": 3.3903060364677216e-05, "loss": 0.5629, "step": 13150 }, { "epoch": 1.54, "grad_norm": 0.8069845317325498, "learning_rate": 3.3892341077120974e-05, "loss": 0.5806, "step": 13155 }, { "epoch": 1.54, "grad_norm": 0.8124687021347363, "learning_rate": 3.388161991763379e-05, "loss": 0.6194, "step": 13160 }, { "epoch": 1.54, "grad_norm": 0.8875143177244339, "learning_rate": 3.387089688847258e-05, "loss": 0.6241, "step": 13165 }, { "epoch": 1.54, "grad_norm": 0.8359666089663408, "learning_rate": 3.386017199189465e-05, "loss": 0.6022, "step": 13170 }, { "epoch": 1.54, "grad_norm": 0.858812621309702, "learning_rate": 3.384944523015771e-05, "loss": 0.5816, "step": 13175 }, { "epoch": 1.54, "grad_norm": 0.8896546849269568, "learning_rate": 3.383871660551985e-05, "loss": 0.5934, "step": 13180 }, { "epoch": 1.54, "grad_norm": 0.8647229392788754, "learning_rate": 3.3827986120239556e-05, "loss": 0.6686, "step": 13185 }, { "epoch": 1.54, "grad_norm": 0.793345125802617, "learning_rate": 3.3817253776575714e-05, "loss": 0.5707, "step": 13190 }, { "epoch": 1.54, "grad_norm": 0.9480051849601898, "learning_rate": 3.38065195767876e-05, "loss": 0.6255, "step": 13195 }, { "epoch": 1.54, "grad_norm": 0.9191963606043647, "learning_rate": 3.379578352313485e-05, "loss": 0.59, "step": 13200 }, { "epoch": 1.54, "grad_norm": 0.9472888560047468, "learning_rate": 3.378504561787755e-05, "loss": 0.6342, "step": 13205 }, { "epoch": 1.54, "grad_norm": 0.8332083714559967, "learning_rate": 3.377430586327612e-05, "loss": 0.6058, "step": 13210 }, { "epoch": 1.54, "grad_norm": 0.968968442777554, "learning_rate": 3.3763564261591385e-05, "loss": 0.6411, "step": 13215 }, { "epoch": 1.54, "grad_norm": 1.4511436808150782, "learning_rate": 3.375282081508459e-05, "loss": 0.5874, "step": 13220 }, { "epoch": 1.55, "grad_norm": 0.9868514432409948, "learning_rate": 3.3742075526017315e-05, "loss": 0.6434, "step": 13225 }, { "epoch": 1.55, "grad_norm": 1.2549177302774823, "learning_rate": 3.373132839665159e-05, "loss": 0.5901, "step": 13230 }, { "epoch": 1.55, "grad_norm": 0.896072051615042, "learning_rate": 3.3720579429249755e-05, "loss": 0.5308, "step": 13235 }, { "epoch": 1.55, "grad_norm": 0.986331797112231, "learning_rate": 3.3709828626074615e-05, "loss": 0.6494, "step": 13240 }, { "epoch": 1.55, "grad_norm": 0.88956079434755, "learning_rate": 3.3699075989389305e-05, "loss": 0.6446, "step": 13245 }, { "epoch": 1.55, "grad_norm": 0.8432055006500154, "learning_rate": 3.368832152145738e-05, "loss": 0.6167, "step": 13250 }, { "epoch": 1.55, "grad_norm": 0.9007854163305742, "learning_rate": 3.367756522454277e-05, "loss": 0.6881, "step": 13255 }, { "epoch": 1.55, "grad_norm": 0.9950203619990622, "learning_rate": 3.3666807100909786e-05, "loss": 0.7618, "step": 13260 }, { "epoch": 1.55, "grad_norm": 0.8381708520680272, "learning_rate": 3.365604715282312e-05, "loss": 0.5727, "step": 13265 }, { "epoch": 1.55, "grad_norm": 0.9640117070742609, "learning_rate": 3.3645285382547855e-05, "loss": 0.5688, "step": 13270 }, { "epoch": 1.55, "grad_norm": 0.8211205577874293, "learning_rate": 3.363452179234946e-05, "loss": 0.5974, "step": 13275 }, { "epoch": 1.55, "grad_norm": 0.8357480242027399, "learning_rate": 3.362375638449378e-05, "loss": 0.5995, "step": 13280 }, { "epoch": 1.55, "grad_norm": 0.9715230295484729, "learning_rate": 3.361298916124704e-05, "loss": 0.5996, "step": 13285 }, { "epoch": 1.55, "grad_norm": 0.7898587202706426, "learning_rate": 3.3602220124875875e-05, "loss": 0.6117, "step": 13290 }, { "epoch": 1.55, "grad_norm": 0.8818336958131766, "learning_rate": 3.359144927764727e-05, "loss": 0.6414, "step": 13295 }, { "epoch": 1.55, "grad_norm": 0.8275981280284921, "learning_rate": 3.358067662182859e-05, "loss": 0.6321, "step": 13300 }, { "epoch": 1.55, "grad_norm": 0.8827928716298319, "learning_rate": 3.3569902159687595e-05, "loss": 0.6043, "step": 13305 }, { "epoch": 1.56, "grad_norm": 0.9311754224703687, "learning_rate": 3.355912589349243e-05, "loss": 0.677, "step": 13310 }, { "epoch": 1.56, "grad_norm": 0.7913626804480359, "learning_rate": 3.35483478255116e-05, "loss": 0.5952, "step": 13315 }, { "epoch": 1.56, "grad_norm": 0.9396380034284258, "learning_rate": 3.353756795801402e-05, "loss": 0.5573, "step": 13320 }, { "epoch": 1.56, "grad_norm": 0.9239927191443369, "learning_rate": 3.3526786293268957e-05, "loss": 0.5941, "step": 13325 }, { "epoch": 1.56, "grad_norm": 0.9794809409295886, "learning_rate": 3.3516002833546056e-05, "loss": 0.6008, "step": 13330 }, { "epoch": 1.56, "grad_norm": 0.9225548682353489, "learning_rate": 3.3505217581115355e-05, "loss": 0.6832, "step": 13335 }, { "epoch": 1.56, "grad_norm": 0.9882205802588672, "learning_rate": 3.3494430538247253e-05, "loss": 0.6587, "step": 13340 }, { "epoch": 1.56, "grad_norm": 0.9719619059808527, "learning_rate": 3.3483641707212566e-05, "loss": 0.7756, "step": 13345 }, { "epoch": 1.56, "grad_norm": 1.1892106557209925, "learning_rate": 3.347285109028241e-05, "loss": 0.6402, "step": 13350 }, { "epoch": 1.56, "grad_norm": 0.9873130543692576, "learning_rate": 3.346205868972836e-05, "loss": 0.6866, "step": 13355 }, { "epoch": 1.56, "grad_norm": 1.0336494547972308, "learning_rate": 3.345126450782232e-05, "loss": 0.6036, "step": 13360 }, { "epoch": 1.56, "grad_norm": 0.8457080948817928, "learning_rate": 3.344046854683656e-05, "loss": 0.5957, "step": 13365 }, { "epoch": 1.56, "grad_norm": 1.1027643454746756, "learning_rate": 3.342967080904377e-05, "loss": 0.5772, "step": 13370 }, { "epoch": 1.56, "grad_norm": 0.8562088252487238, "learning_rate": 3.3418871296716977e-05, "loss": 0.6005, "step": 13375 }, { "epoch": 1.56, "grad_norm": 0.9466605641458102, "learning_rate": 3.340807001212958e-05, "loss": 0.6073, "step": 13380 }, { "epoch": 1.56, "grad_norm": 0.8965644460808967, "learning_rate": 3.339726695755537e-05, "loss": 0.6204, "step": 13385 }, { "epoch": 1.56, "grad_norm": 1.1096286163849778, "learning_rate": 3.338646213526852e-05, "loss": 0.635, "step": 13390 }, { "epoch": 1.57, "grad_norm": 0.8754853667874716, "learning_rate": 3.337565554754353e-05, "loss": 0.6358, "step": 13395 }, { "epoch": 1.57, "grad_norm": 0.9110482614717333, "learning_rate": 3.3364847196655305e-05, "loss": 0.6214, "step": 13400 }, { "epoch": 1.57, "grad_norm": 0.7926845633276647, "learning_rate": 3.335403708487914e-05, "loss": 0.6243, "step": 13405 }, { "epoch": 1.57, "grad_norm": 0.9078940820337732, "learning_rate": 3.334322521449066e-05, "loss": 0.6026, "step": 13410 }, { "epoch": 1.57, "grad_norm": 0.9372460308302087, "learning_rate": 3.333241158776587e-05, "loss": 0.6094, "step": 13415 }, { "epoch": 1.57, "grad_norm": 0.9564483086346293, "learning_rate": 3.332159620698116e-05, "loss": 0.7157, "step": 13420 }, { "epoch": 1.57, "grad_norm": 0.9229741855777024, "learning_rate": 3.331077907441328e-05, "loss": 0.564, "step": 13425 }, { "epoch": 1.57, "grad_norm": 0.9769203270681543, "learning_rate": 3.3299960192339356e-05, "loss": 0.6285, "step": 13430 }, { "epoch": 1.57, "grad_norm": 1.015604600610834, "learning_rate": 3.328913956303687e-05, "loss": 0.6421, "step": 13435 }, { "epoch": 1.57, "grad_norm": 0.9225261875128974, "learning_rate": 3.3278317188783684e-05, "loss": 0.6393, "step": 13440 }, { "epoch": 1.57, "grad_norm": 1.5001861035157056, "learning_rate": 3.3267493071858e-05, "loss": 0.6498, "step": 13445 }, { "epoch": 1.57, "grad_norm": 0.8612643013746522, "learning_rate": 3.325666721453843e-05, "loss": 0.6828, "step": 13450 }, { "epoch": 1.57, "grad_norm": 1.0309589730596793, "learning_rate": 3.3245839619103916e-05, "loss": 0.6204, "step": 13455 }, { "epoch": 1.57, "grad_norm": 0.8201016813229378, "learning_rate": 3.323501028783379e-05, "loss": 0.6237, "step": 13460 }, { "epoch": 1.57, "grad_norm": 1.13957645634541, "learning_rate": 3.322417922300775e-05, "loss": 0.631, "step": 13465 }, { "epoch": 1.57, "grad_norm": 0.8453239478004159, "learning_rate": 3.321334642690582e-05, "loss": 0.6454, "step": 13470 }, { "epoch": 1.57, "grad_norm": 0.9482490037378013, "learning_rate": 3.320251190180845e-05, "loss": 0.6024, "step": 13475 }, { "epoch": 1.57, "grad_norm": 0.8549568743141279, "learning_rate": 3.319167564999639e-05, "loss": 0.6375, "step": 13480 }, { "epoch": 1.58, "grad_norm": 0.9089922574390278, "learning_rate": 3.318083767375081e-05, "loss": 0.6104, "step": 13485 }, { "epoch": 1.58, "grad_norm": 0.9671969586531175, "learning_rate": 3.3169997975353195e-05, "loss": 0.5837, "step": 13490 }, { "epoch": 1.58, "grad_norm": 0.9829591111528554, "learning_rate": 3.315915655708543e-05, "loss": 0.599, "step": 13495 }, { "epoch": 1.58, "grad_norm": 0.9878969304179483, "learning_rate": 3.314831342122974e-05, "loss": 0.6393, "step": 13500 }, { "epoch": 1.58, "grad_norm": 0.8643449190320925, "learning_rate": 3.313746857006873e-05, "loss": 0.63, "step": 13505 }, { "epoch": 1.58, "grad_norm": 0.9417188349940933, "learning_rate": 3.312662200588534e-05, "loss": 0.6288, "step": 13510 }, { "epoch": 1.58, "grad_norm": 1.130525033006468, "learning_rate": 3.3115773730962896e-05, "loss": 0.5918, "step": 13515 }, { "epoch": 1.58, "grad_norm": 0.9404647424053684, "learning_rate": 3.310492374758507e-05, "loss": 0.6873, "step": 13520 }, { "epoch": 1.58, "grad_norm": 0.9975615755136265, "learning_rate": 3.3094072058035894e-05, "loss": 0.6591, "step": 13525 }, { "epoch": 1.58, "grad_norm": 0.8774444779428315, "learning_rate": 3.3083218664599755e-05, "loss": 0.6845, "step": 13530 }, { "epoch": 1.58, "grad_norm": 0.9092239097967811, "learning_rate": 3.307236356956144e-05, "loss": 0.62, "step": 13535 }, { "epoch": 1.58, "grad_norm": 0.8609363482888034, "learning_rate": 3.306150677520602e-05, "loss": 0.604, "step": 13540 }, { "epoch": 1.58, "grad_norm": 0.8140700714246218, "learning_rate": 3.3050648283818985e-05, "loss": 0.5633, "step": 13545 }, { "epoch": 1.58, "grad_norm": 1.0347541611333066, "learning_rate": 3.303978809768615e-05, "loss": 0.5718, "step": 13550 }, { "epoch": 1.58, "grad_norm": 1.0125114628229028, "learning_rate": 3.3028926219093714e-05, "loss": 0.578, "step": 13555 }, { "epoch": 1.58, "grad_norm": 0.8975432257995927, "learning_rate": 3.30180626503282e-05, "loss": 0.5827, "step": 13560 }, { "epoch": 1.58, "grad_norm": 0.843827212748594, "learning_rate": 3.30071973936765e-05, "loss": 0.6177, "step": 13565 }, { "epoch": 1.59, "grad_norm": 0.784180156204481, "learning_rate": 3.299633045142589e-05, "loss": 0.5826, "step": 13570 }, { "epoch": 1.59, "grad_norm": 0.9275241980706935, "learning_rate": 3.298546182586396e-05, "loss": 0.6339, "step": 13575 }, { "epoch": 1.59, "grad_norm": 0.9040153722117333, "learning_rate": 3.297459151927866e-05, "loss": 0.6973, "step": 13580 }, { "epoch": 1.59, "grad_norm": 0.9661519331134533, "learning_rate": 3.296371953395831e-05, "loss": 0.6597, "step": 13585 }, { "epoch": 1.59, "grad_norm": 0.910274082636293, "learning_rate": 3.295284587219159e-05, "loss": 0.6241, "step": 13590 }, { "epoch": 1.59, "grad_norm": 0.8992370130049528, "learning_rate": 3.29419705362675e-05, "loss": 0.6826, "step": 13595 }, { "epoch": 1.59, "grad_norm": 0.845200766365333, "learning_rate": 3.293109352847542e-05, "loss": 0.5987, "step": 13600 }, { "epoch": 1.59, "grad_norm": 1.0041001619269916, "learning_rate": 3.292021485110508e-05, "loss": 0.5825, "step": 13605 }, { "epoch": 1.59, "grad_norm": 0.8766907421245957, "learning_rate": 3.290933450644654e-05, "loss": 0.6311, "step": 13610 }, { "epoch": 1.59, "grad_norm": 0.8960473453380331, "learning_rate": 3.289845249679025e-05, "loss": 0.6706, "step": 13615 }, { "epoch": 1.59, "grad_norm": 0.9549203402504854, "learning_rate": 3.2887568824426965e-05, "loss": 0.6411, "step": 13620 }, { "epoch": 1.59, "grad_norm": 0.8567389877383453, "learning_rate": 3.287668349164783e-05, "loss": 0.5632, "step": 13625 }, { "epoch": 1.59, "grad_norm": 0.8035215975175422, "learning_rate": 3.28657965007443e-05, "loss": 0.6211, "step": 13630 }, { "epoch": 1.59, "grad_norm": 0.8128657085497275, "learning_rate": 3.285490785400822e-05, "loss": 0.6213, "step": 13635 }, { "epoch": 1.59, "grad_norm": 0.8270049212790569, "learning_rate": 3.284401755373176e-05, "loss": 0.5984, "step": 13640 }, { "epoch": 1.59, "grad_norm": 1.0343263940447744, "learning_rate": 3.283312560220743e-05, "loss": 0.6213, "step": 13645 }, { "epoch": 1.59, "grad_norm": 0.8168778107678732, "learning_rate": 3.2822232001728116e-05, "loss": 0.592, "step": 13650 }, { "epoch": 1.6, "grad_norm": 0.8367169041139833, "learning_rate": 3.2811336754587027e-05, "loss": 0.6354, "step": 13655 }, { "epoch": 1.6, "grad_norm": 0.877453803181334, "learning_rate": 3.280043986307773e-05, "loss": 0.5926, "step": 13660 }, { "epoch": 1.6, "grad_norm": 0.9567992539893181, "learning_rate": 3.278954132949413e-05, "loss": 0.6709, "step": 13665 }, { "epoch": 1.6, "grad_norm": 0.9440890984372244, "learning_rate": 3.2778641156130483e-05, "loss": 0.6311, "step": 13670 }, { "epoch": 1.6, "grad_norm": 0.8550152848081506, "learning_rate": 3.27677393452814e-05, "loss": 0.5263, "step": 13675 }, { "epoch": 1.6, "grad_norm": 1.0679190497601727, "learning_rate": 3.275683589924181e-05, "loss": 0.6034, "step": 13680 }, { "epoch": 1.6, "grad_norm": 0.9056776230882122, "learning_rate": 3.274593082030702e-05, "loss": 0.5819, "step": 13685 }, { "epoch": 1.6, "grad_norm": 0.8613565987259736, "learning_rate": 3.273502411077265e-05, "loss": 0.6402, "step": 13690 }, { "epoch": 1.6, "grad_norm": 1.4131119352517771, "learning_rate": 3.272411577293468e-05, "loss": 0.5346, "step": 13695 }, { "epoch": 1.6, "grad_norm": 0.841056332330446, "learning_rate": 3.271320580908943e-05, "loss": 0.5775, "step": 13700 }, { "epoch": 1.6, "grad_norm": 0.9991608484383396, "learning_rate": 3.270229422153356e-05, "loss": 0.6306, "step": 13705 }, { "epoch": 1.6, "grad_norm": 0.8252817791685058, "learning_rate": 3.269138101256408e-05, "loss": 0.602, "step": 13710 }, { "epoch": 1.6, "grad_norm": 0.8557066814007601, "learning_rate": 3.268046618447833e-05, "loss": 0.5983, "step": 13715 }, { "epoch": 1.6, "grad_norm": 1.342185329347312, "learning_rate": 3.2669549739573994e-05, "loss": 0.6566, "step": 13720 }, { "epoch": 1.6, "grad_norm": 0.9293684467045353, "learning_rate": 3.26586316801491e-05, "loss": 0.551, "step": 13725 }, { "epoch": 1.6, "grad_norm": 0.9881476411545929, "learning_rate": 3.264771200850201e-05, "loss": 0.611, "step": 13730 }, { "epoch": 1.6, "grad_norm": 0.892419704640757, "learning_rate": 3.2636790726931446e-05, "loss": 0.557, "step": 13735 }, { "epoch": 1.61, "grad_norm": 0.9388799630899802, "learning_rate": 3.262586783773642e-05, "loss": 0.6487, "step": 13740 }, { "epoch": 1.61, "grad_norm": 0.8817809748299225, "learning_rate": 3.261494334321635e-05, "loss": 0.6186, "step": 13745 }, { "epoch": 1.61, "grad_norm": 0.8421088804070335, "learning_rate": 3.260401724567094e-05, "loss": 0.63, "step": 13750 }, { "epoch": 1.61, "grad_norm": 0.992382878836625, "learning_rate": 3.2593089547400234e-05, "loss": 0.6044, "step": 13755 }, { "epoch": 1.61, "grad_norm": 0.8855943216062995, "learning_rate": 3.2582160250704654e-05, "loss": 0.6103, "step": 13760 }, { "epoch": 1.61, "grad_norm": 0.8640879874274411, "learning_rate": 3.2571229357884915e-05, "loss": 0.6113, "step": 13765 }, { "epoch": 1.61, "grad_norm": 0.8737568180192352, "learning_rate": 3.256029687124209e-05, "loss": 0.6214, "step": 13770 }, { "epoch": 1.61, "grad_norm": 0.8365702078982379, "learning_rate": 3.254936279307757e-05, "loss": 0.6057, "step": 13775 }, { "epoch": 1.61, "grad_norm": 1.1079162458987608, "learning_rate": 3.253842712569312e-05, "loss": 0.6181, "step": 13780 }, { "epoch": 1.61, "grad_norm": 0.8169920397637487, "learning_rate": 3.252748987139078e-05, "loss": 0.6243, "step": 13785 }, { "epoch": 1.61, "grad_norm": 0.9998227717889913, "learning_rate": 3.251655103247298e-05, "loss": 0.6202, "step": 13790 }, { "epoch": 1.61, "grad_norm": 0.8792378919698017, "learning_rate": 3.250561061124245e-05, "loss": 0.642, "step": 13795 }, { "epoch": 1.61, "grad_norm": 0.8700829569105526, "learning_rate": 3.249466861000227e-05, "loss": 0.5613, "step": 13800 }, { "epoch": 1.61, "grad_norm": 0.8586101363117356, "learning_rate": 3.248372503105584e-05, "loss": 0.5551, "step": 13805 }, { "epoch": 1.61, "grad_norm": 0.8853985117030423, "learning_rate": 3.247277987670689e-05, "loss": 0.6033, "step": 13810 }, { "epoch": 1.61, "grad_norm": 0.8683083160803056, "learning_rate": 3.2461833149259516e-05, "loss": 0.5892, "step": 13815 }, { "epoch": 1.61, "grad_norm": 0.8949843862183975, "learning_rate": 3.2450884851018096e-05, "loss": 0.6292, "step": 13820 }, { "epoch": 1.62, "grad_norm": 0.9356885992030335, "learning_rate": 3.243993498428736e-05, "loss": 0.6513, "step": 13825 }, { "epoch": 1.62, "grad_norm": 0.8497374441324735, "learning_rate": 3.2428983551372374e-05, "loss": 0.6926, "step": 13830 }, { "epoch": 1.62, "grad_norm": 0.9157864165593556, "learning_rate": 3.241803055457855e-05, "loss": 0.6408, "step": 13835 }, { "epoch": 1.62, "grad_norm": 1.001275078546633, "learning_rate": 3.240707599621158e-05, "loss": 0.5921, "step": 13840 }, { "epoch": 1.62, "grad_norm": 0.9215544155311559, "learning_rate": 3.2396119878577516e-05, "loss": 0.6493, "step": 13845 }, { "epoch": 1.62, "grad_norm": 1.074733105041847, "learning_rate": 3.2385162203982765e-05, "loss": 0.6362, "step": 13850 }, { "epoch": 1.62, "grad_norm": 0.9492895870678364, "learning_rate": 3.237420297473399e-05, "loss": 0.5948, "step": 13855 }, { "epoch": 1.62, "grad_norm": 0.8169730446674744, "learning_rate": 3.236324219313826e-05, "loss": 0.6016, "step": 13860 }, { "epoch": 1.62, "grad_norm": 0.8708981236440343, "learning_rate": 3.2352279861502914e-05, "loss": 0.6146, "step": 13865 }, { "epoch": 1.62, "grad_norm": 0.8690444518168777, "learning_rate": 3.234131598213565e-05, "loss": 0.6295, "step": 13870 }, { "epoch": 1.62, "grad_norm": 0.8449880813128703, "learning_rate": 3.2330350557344455e-05, "loss": 0.6827, "step": 13875 }, { "epoch": 1.62, "grad_norm": 0.9335719888247228, "learning_rate": 3.231938358943769e-05, "loss": 0.6576, "step": 13880 }, { "epoch": 1.62, "grad_norm": 0.9016478062448239, "learning_rate": 3.2308415080724013e-05, "loss": 0.6144, "step": 13885 }, { "epoch": 1.62, "grad_norm": 0.9194336026418829, "learning_rate": 3.22974450335124e-05, "loss": 0.5881, "step": 13890 }, { "epoch": 1.62, "grad_norm": 0.878014195494255, "learning_rate": 3.228647345011217e-05, "loss": 0.6076, "step": 13895 }, { "epoch": 1.62, "grad_norm": 0.8918343693158489, "learning_rate": 3.227550033283296e-05, "loss": 0.6161, "step": 13900 }, { "epoch": 1.62, "grad_norm": 0.912385493336373, "learning_rate": 3.226452568398471e-05, "loss": 0.5138, "step": 13905 }, { "epoch": 1.63, "grad_norm": 0.8770431703359535, "learning_rate": 3.2253549505877715e-05, "loss": 0.6793, "step": 13910 }, { "epoch": 1.63, "grad_norm": 0.9705169824665427, "learning_rate": 3.2242571800822566e-05, "loss": 0.6704, "step": 13915 }, { "epoch": 1.63, "grad_norm": 0.9245329539519085, "learning_rate": 3.2231592571130196e-05, "loss": 0.6402, "step": 13920 }, { "epoch": 1.63, "grad_norm": 0.8747549649322257, "learning_rate": 3.2220611819111825e-05, "loss": 0.6515, "step": 13925 }, { "epoch": 1.63, "grad_norm": 0.955157692671812, "learning_rate": 3.220962954707904e-05, "loss": 0.5855, "step": 13930 }, { "epoch": 1.63, "grad_norm": 0.8090221885265747, "learning_rate": 3.2198645757343715e-05, "loss": 0.6126, "step": 13935 }, { "epoch": 1.63, "grad_norm": 0.8299315608941946, "learning_rate": 3.218766045221805e-05, "loss": 0.6128, "step": 13940 }, { "epoch": 1.63, "grad_norm": 0.873059829444481, "learning_rate": 3.217667363401456e-05, "loss": 0.6347, "step": 13945 }, { "epoch": 1.63, "grad_norm": 0.9092923994287389, "learning_rate": 3.216568530504611e-05, "loss": 0.6872, "step": 13950 }, { "epoch": 1.63, "grad_norm": 0.9338280320274359, "learning_rate": 3.215469546762584e-05, "loss": 0.6335, "step": 13955 }, { "epoch": 1.63, "grad_norm": 0.9477258003267339, "learning_rate": 3.214370412406723e-05, "loss": 0.6522, "step": 13960 }, { "epoch": 1.63, "grad_norm": 0.7675122204643133, "learning_rate": 3.213271127668407e-05, "loss": 0.5975, "step": 13965 }, { "epoch": 1.63, "grad_norm": 0.9519957274619302, "learning_rate": 3.2121716927790476e-05, "loss": 0.5986, "step": 13970 }, { "epoch": 1.63, "grad_norm": 0.8993298996098514, "learning_rate": 3.211072107970086e-05, "loss": 0.6427, "step": 13975 }, { "epoch": 1.63, "grad_norm": 0.9692459849507432, "learning_rate": 3.209972373472998e-05, "loss": 0.615, "step": 13980 }, { "epoch": 1.63, "grad_norm": 0.9036038616248692, "learning_rate": 3.208872489519288e-05, "loss": 0.6085, "step": 13985 }, { "epoch": 1.63, "grad_norm": 0.9205927195955137, "learning_rate": 3.2077724563404946e-05, "loss": 0.6168, "step": 13990 }, { "epoch": 1.64, "grad_norm": 0.9840479366969124, "learning_rate": 3.2066722741681845e-05, "loss": 0.5396, "step": 13995 }, { "epoch": 1.64, "grad_norm": 0.8869098712612347, "learning_rate": 3.205571943233958e-05, "loss": 0.6423, "step": 14000 }, { "epoch": 1.64, "grad_norm": 0.8351403542328356, "learning_rate": 3.204471463769446e-05, "loss": 0.6563, "step": 14005 }, { "epoch": 1.64, "grad_norm": 0.8635962295641016, "learning_rate": 3.203370836006313e-05, "loss": 0.6148, "step": 14010 }, { "epoch": 1.64, "grad_norm": 0.8716443794972157, "learning_rate": 3.202270060176251e-05, "loss": 0.6199, "step": 14015 }, { "epoch": 1.64, "grad_norm": 0.7892797204684883, "learning_rate": 3.2011691365109844e-05, "loss": 0.5494, "step": 14020 }, { "epoch": 1.64, "grad_norm": 0.9138700219680662, "learning_rate": 3.200068065242271e-05, "loss": 0.6126, "step": 14025 }, { "epoch": 1.64, "grad_norm": 0.9477985254868797, "learning_rate": 3.198966846601896e-05, "loss": 0.6185, "step": 14030 }, { "epoch": 1.64, "grad_norm": 0.8768411577126559, "learning_rate": 3.197865480821678e-05, "loss": 0.5963, "step": 14035 }, { "epoch": 1.64, "grad_norm": 0.9538878594814884, "learning_rate": 3.196763968133466e-05, "loss": 0.5955, "step": 14040 }, { "epoch": 1.64, "grad_norm": 0.8485820899508701, "learning_rate": 3.195662308769142e-05, "loss": 0.5846, "step": 14045 }, { "epoch": 1.64, "grad_norm": 0.7415826065087051, "learning_rate": 3.1945605029606134e-05, "loss": 0.6067, "step": 14050 }, { "epoch": 1.64, "grad_norm": 0.9631237404986648, "learning_rate": 3.193458550939823e-05, "loss": 0.6148, "step": 14055 }, { "epoch": 1.64, "grad_norm": 0.9579848931236165, "learning_rate": 3.192356452938745e-05, "loss": 0.6245, "step": 14060 }, { "epoch": 1.64, "grad_norm": 0.8494664317962217, "learning_rate": 3.19125420918938e-05, "loss": 0.6314, "step": 14065 }, { "epoch": 1.64, "grad_norm": 0.9093549111531063, "learning_rate": 3.190151819923765e-05, "loss": 0.6169, "step": 14070 }, { "epoch": 1.64, "grad_norm": 0.8360978546924288, "learning_rate": 3.1890492853739606e-05, "loss": 0.6328, "step": 14075 }, { "epoch": 1.65, "grad_norm": 0.9358108501322423, "learning_rate": 3.187946605772065e-05, "loss": 0.6187, "step": 14080 }, { "epoch": 1.65, "grad_norm": 0.8165968857856247, "learning_rate": 3.1868437813502026e-05, "loss": 0.651, "step": 14085 }, { "epoch": 1.65, "grad_norm": 0.8804855307176693, "learning_rate": 3.1857408123405293e-05, "loss": 0.6552, "step": 14090 }, { "epoch": 1.65, "grad_norm": 0.9219179133563361, "learning_rate": 3.184637698975232e-05, "loss": 0.6027, "step": 14095 }, { "epoch": 1.65, "grad_norm": 0.934007970502787, "learning_rate": 3.1835344414865276e-05, "loss": 0.5353, "step": 14100 }, { "epoch": 1.65, "grad_norm": 1.006284219952787, "learning_rate": 3.182431040106664e-05, "loss": 0.5708, "step": 14105 }, { "epoch": 1.65, "grad_norm": 0.884477928120637, "learning_rate": 3.181327495067917e-05, "loss": 0.6436, "step": 14110 }, { "epoch": 1.65, "grad_norm": 0.9372121306421645, "learning_rate": 3.1802238066025956e-05, "loss": 0.6088, "step": 14115 }, { "epoch": 1.65, "grad_norm": 0.8290587024866012, "learning_rate": 3.1791199749430376e-05, "loss": 0.615, "step": 14120 }, { "epoch": 1.65, "grad_norm": 0.9088374548664306, "learning_rate": 3.178016000321612e-05, "loss": 0.628, "step": 14125 }, { "epoch": 1.65, "grad_norm": 0.8984696982000056, "learning_rate": 3.1769118829707156e-05, "loss": 0.5717, "step": 14130 }, { "epoch": 1.65, "grad_norm": 0.8520665306169647, "learning_rate": 3.175807623122777e-05, "loss": 0.6085, "step": 14135 }, { "epoch": 1.65, "grad_norm": 1.1054824102563734, "learning_rate": 3.1747032210102545e-05, "loss": 0.6174, "step": 14140 }, { "epoch": 1.65, "grad_norm": 0.8392065814313736, "learning_rate": 3.173598676865637e-05, "loss": 0.5699, "step": 14145 }, { "epoch": 1.65, "grad_norm": 0.954578556851808, "learning_rate": 3.172493990921442e-05, "loss": 0.6186, "step": 14150 }, { "epoch": 1.65, "grad_norm": 0.8854617704570861, "learning_rate": 3.1713891634102176e-05, "loss": 0.6094, "step": 14155 }, { "epoch": 1.65, "grad_norm": 0.9433608289412669, "learning_rate": 3.170284194564542e-05, "loss": 0.6131, "step": 14160 }, { "epoch": 1.65, "grad_norm": 1.6056532075501486, "learning_rate": 3.1691790846170213e-05, "loss": 0.5993, "step": 14165 }, { "epoch": 1.66, "grad_norm": 1.0071999129562839, "learning_rate": 3.168073833800295e-05, "loss": 0.5685, "step": 14170 }, { "epoch": 1.66, "grad_norm": 0.8690295486427226, "learning_rate": 3.1669684423470275e-05, "loss": 0.5967, "step": 14175 }, { "epoch": 1.66, "grad_norm": 0.9742190459209951, "learning_rate": 3.165862910489917e-05, "loss": 0.6288, "step": 14180 }, { "epoch": 1.66, "grad_norm": 0.9068261467518752, "learning_rate": 3.164757238461689e-05, "loss": 0.6178, "step": 14185 }, { "epoch": 1.66, "grad_norm": 0.8753917139112746, "learning_rate": 3.1636514264950984e-05, "loss": 0.5892, "step": 14190 }, { "epoch": 1.66, "grad_norm": 0.8217207675729438, "learning_rate": 3.162545474822931e-05, "loss": 0.6106, "step": 14195 }, { "epoch": 1.66, "grad_norm": 0.9925949137916559, "learning_rate": 3.161439383678002e-05, "loss": 0.5905, "step": 14200 }, { "epoch": 1.66, "grad_norm": 0.8529060034107014, "learning_rate": 3.160333153293153e-05, "loss": 0.5647, "step": 14205 }, { "epoch": 1.66, "grad_norm": 127.03419464098543, "learning_rate": 3.15922678390126e-05, "loss": 0.7069, "step": 14210 }, { "epoch": 1.66, "grad_norm": 0.9821132542654768, "learning_rate": 3.158120275735221e-05, "loss": 0.6089, "step": 14215 }, { "epoch": 1.66, "grad_norm": 0.9239417418913255, "learning_rate": 3.157013629027972e-05, "loss": 0.6066, "step": 14220 }, { "epoch": 1.66, "grad_norm": 0.934742165654382, "learning_rate": 3.155906844012472e-05, "loss": 0.6238, "step": 14225 }, { "epoch": 1.66, "grad_norm": 0.810709193067882, "learning_rate": 3.15479992092171e-05, "loss": 0.6407, "step": 14230 }, { "epoch": 1.66, "grad_norm": 0.897326263443791, "learning_rate": 3.153692859988706e-05, "loss": 0.5537, "step": 14235 }, { "epoch": 1.66, "grad_norm": 0.9700000544479225, "learning_rate": 3.152585661446508e-05, "loss": 0.6122, "step": 14240 }, { "epoch": 1.66, "grad_norm": 0.8670501614316243, "learning_rate": 3.151478325528192e-05, "loss": 0.6191, "step": 14245 }, { "epoch": 1.66, "grad_norm": 0.7940522098542911, "learning_rate": 3.150370852466864e-05, "loss": 0.6351, "step": 14250 }, { "epoch": 1.67, "grad_norm": 0.8885844509213725, "learning_rate": 3.149263242495659e-05, "loss": 0.668, "step": 14255 }, { "epoch": 1.67, "grad_norm": 0.8031221834830546, "learning_rate": 3.148155495847742e-05, "loss": 0.5816, "step": 14260 }, { "epoch": 1.67, "grad_norm": 0.7719612473851699, "learning_rate": 3.147047612756302e-05, "loss": 0.5369, "step": 14265 }, { "epoch": 1.67, "grad_norm": 0.8490951241560902, "learning_rate": 3.1459395934545625e-05, "loss": 0.5952, "step": 14270 }, { "epoch": 1.67, "grad_norm": 0.9392766212076059, "learning_rate": 3.144831438175773e-05, "loss": 0.6449, "step": 14275 }, { "epoch": 1.67, "grad_norm": 0.8148533343490466, "learning_rate": 3.1437231471532104e-05, "loss": 0.6107, "step": 14280 }, { "epoch": 1.67, "grad_norm": 0.8364526118435871, "learning_rate": 3.142614720620182e-05, "loss": 0.6211, "step": 14285 }, { "epoch": 1.67, "grad_norm": 1.1214809028260668, "learning_rate": 3.141506158810024e-05, "loss": 0.5685, "step": 14290 }, { "epoch": 1.67, "grad_norm": 0.7782368941113667, "learning_rate": 3.1403974619561e-05, "loss": 0.6161, "step": 14295 }, { "epoch": 1.67, "grad_norm": 1.0546549739796423, "learning_rate": 3.1392886302918014e-05, "loss": 0.6173, "step": 14300 }, { "epoch": 1.67, "grad_norm": 0.9426567326441045, "learning_rate": 3.13817966405055e-05, "loss": 0.504, "step": 14305 }, { "epoch": 1.67, "grad_norm": 0.7970881502192838, "learning_rate": 3.137070563465796e-05, "loss": 0.5922, "step": 14310 }, { "epoch": 1.67, "grad_norm": 0.8719743927258898, "learning_rate": 3.1359613287710135e-05, "loss": 0.5776, "step": 14315 }, { "epoch": 1.67, "grad_norm": 20.38565106905542, "learning_rate": 3.1348519601997095e-05, "loss": 0.6133, "step": 14320 }, { "epoch": 1.67, "grad_norm": 0.8797446176958362, "learning_rate": 3.133742457985418e-05, "loss": 0.5924, "step": 14325 }, { "epoch": 1.67, "grad_norm": 0.9586506553579648, "learning_rate": 3.132632822361701e-05, "loss": 0.6395, "step": 14330 }, { "epoch": 1.67, "grad_norm": 0.9052415787453856, "learning_rate": 3.1315230535621484e-05, "loss": 0.617, "step": 14335 }, { "epoch": 1.68, "grad_norm": 1.1641799510743467, "learning_rate": 3.1304131518203775e-05, "loss": 0.5867, "step": 14340 }, { "epoch": 1.68, "grad_norm": 0.9839868657007339, "learning_rate": 3.129303117370035e-05, "loss": 0.5656, "step": 14345 }, { "epoch": 1.68, "grad_norm": 0.7945105450408615, "learning_rate": 3.128192950444794e-05, "loss": 0.5594, "step": 14350 }, { "epoch": 1.68, "grad_norm": 1.0125987081997354, "learning_rate": 3.127082651278357e-05, "loss": 0.5053, "step": 14355 }, { "epoch": 1.68, "grad_norm": 0.8633232910847957, "learning_rate": 3.1259722201044525e-05, "loss": 0.5713, "step": 14360 }, { "epoch": 1.68, "grad_norm": 0.9323865904612294, "learning_rate": 3.1248616571568395e-05, "loss": 0.6163, "step": 14365 }, { "epoch": 1.68, "grad_norm": 0.8918135421630677, "learning_rate": 3.123750962669301e-05, "loss": 0.6129, "step": 14370 }, { "epoch": 1.68, "grad_norm": 0.9172412216819962, "learning_rate": 3.122640136875653e-05, "loss": 0.6566, "step": 14375 }, { "epoch": 1.68, "grad_norm": 0.8561319207567425, "learning_rate": 3.121529180009733e-05, "loss": 0.6253, "step": 14380 }, { "epoch": 1.68, "grad_norm": 1.1558106866902784, "learning_rate": 3.120418092305411e-05, "loss": 0.6558, "step": 14385 }, { "epoch": 1.68, "grad_norm": 0.8920430044707267, "learning_rate": 3.119306873996581e-05, "loss": 0.5897, "step": 14390 }, { "epoch": 1.68, "grad_norm": 0.8333270672819031, "learning_rate": 3.118195525317166e-05, "loss": 0.5438, "step": 14395 }, { "epoch": 1.68, "grad_norm": 0.9584031754893765, "learning_rate": 3.117084046501119e-05, "loss": 0.6511, "step": 14400 }, { "epoch": 1.68, "grad_norm": 0.9936939750252267, "learning_rate": 3.1159724377824154e-05, "loss": 0.6499, "step": 14405 }, { "epoch": 1.68, "grad_norm": 0.8987091203563597, "learning_rate": 3.1148606993950614e-05, "loss": 0.5655, "step": 14410 }, { "epoch": 1.68, "grad_norm": 0.9136561564369415, "learning_rate": 3.113748831573089e-05, "loss": 0.5939, "step": 14415 }, { "epoch": 1.68, "grad_norm": 0.8973206809682199, "learning_rate": 3.11263683455056e-05, "loss": 0.5344, "step": 14420 }, { "epoch": 1.69, "grad_norm": 0.8867024741552035, "learning_rate": 3.1115247085615595e-05, "loss": 0.6523, "step": 14425 }, { "epoch": 1.69, "grad_norm": 0.854185379361725, "learning_rate": 3.110412453840201e-05, "loss": 0.6022, "step": 14430 }, { "epoch": 1.69, "grad_norm": 0.7683978766451767, "learning_rate": 3.109300070620629e-05, "loss": 0.6511, "step": 14435 }, { "epoch": 1.69, "grad_norm": 0.8675161607514147, "learning_rate": 3.108187559137009e-05, "loss": 0.6076, "step": 14440 }, { "epoch": 1.69, "grad_norm": 0.7799821783691376, "learning_rate": 3.107074919623536e-05, "loss": 0.557, "step": 14445 }, { "epoch": 1.69, "grad_norm": 0.7907027951675817, "learning_rate": 3.105962152314435e-05, "loss": 0.6054, "step": 14450 }, { "epoch": 1.69, "grad_norm": 1.02855111704502, "learning_rate": 3.104849257443953e-05, "loss": 0.6666, "step": 14455 }, { "epoch": 1.69, "grad_norm": 0.8173704925195387, "learning_rate": 3.1037362352463655e-05, "loss": 0.618, "step": 14460 }, { "epoch": 1.69, "grad_norm": 0.8963121320619791, "learning_rate": 3.102623085955977e-05, "loss": 0.7052, "step": 14465 }, { "epoch": 1.69, "grad_norm": 0.8587133538749553, "learning_rate": 3.101509809807117e-05, "loss": 0.6273, "step": 14470 }, { "epoch": 1.69, "grad_norm": 0.9192409451677703, "learning_rate": 3.10039640703414e-05, "loss": 0.6083, "step": 14475 }, { "epoch": 1.69, "grad_norm": 0.8778016404178711, "learning_rate": 3.0992828778714315e-05, "loss": 0.5685, "step": 14480 }, { "epoch": 1.69, "grad_norm": 0.7979767553828838, "learning_rate": 3.0981692225533994e-05, "loss": 0.6405, "step": 14485 }, { "epoch": 1.69, "grad_norm": 0.8646106636423382, "learning_rate": 3.0970554413144805e-05, "loss": 0.6231, "step": 14490 }, { "epoch": 1.69, "grad_norm": 0.8900011275383782, "learning_rate": 3.095941534389136e-05, "loss": 0.6105, "step": 14495 }, { "epoch": 1.69, "grad_norm": 0.8290283065600704, "learning_rate": 3.0948275020118565e-05, "loss": 0.5956, "step": 14500 }, { "epoch": 1.69, "grad_norm": 0.8486155537079948, "learning_rate": 3.0937133444171585e-05, "loss": 0.6209, "step": 14505 }, { "epoch": 1.7, "grad_norm": 0.8209584881424392, "learning_rate": 3.0925990618395805e-05, "loss": 0.6764, "step": 14510 }, { "epoch": 1.7, "grad_norm": 0.804981920476081, "learning_rate": 3.091484654513694e-05, "loss": 0.6399, "step": 14515 }, { "epoch": 1.7, "grad_norm": 0.9099640077272081, "learning_rate": 3.090370122674091e-05, "loss": 0.6057, "step": 14520 }, { "epoch": 1.7, "grad_norm": 1.2539892671479949, "learning_rate": 3.089255466555393e-05, "loss": 0.5685, "step": 14525 }, { "epoch": 1.7, "grad_norm": 0.8395571770173283, "learning_rate": 3.0881406863922474e-05, "loss": 0.568, "step": 14530 }, { "epoch": 1.7, "grad_norm": 0.8635041366718217, "learning_rate": 3.0870257824193263e-05, "loss": 0.5814, "step": 14535 }, { "epoch": 1.7, "grad_norm": 0.8986187790897435, "learning_rate": 3.085910754871329e-05, "loss": 0.6478, "step": 14540 }, { "epoch": 1.7, "grad_norm": 0.9041831731647924, "learning_rate": 3.0847956039829806e-05, "loss": 0.5643, "step": 14545 }, { "epoch": 1.7, "grad_norm": 0.8986964562669946, "learning_rate": 3.083680329989032e-05, "loss": 0.6201, "step": 14550 }, { "epoch": 1.7, "grad_norm": 0.7310458108687056, "learning_rate": 3.08256493312426e-05, "loss": 0.5765, "step": 14555 }, { "epoch": 1.7, "grad_norm": 0.8885560066656397, "learning_rate": 3.081449413623467e-05, "loss": 0.6367, "step": 14560 }, { "epoch": 1.7, "grad_norm": 1.2048526748579056, "learning_rate": 3.080333771721482e-05, "loss": 0.6058, "step": 14565 }, { "epoch": 1.7, "grad_norm": 0.90271581557056, "learning_rate": 3.0792180076531594e-05, "loss": 0.6091, "step": 14570 }, { "epoch": 1.7, "grad_norm": 0.811745238749785, "learning_rate": 3.078102121653378e-05, "loss": 0.6198, "step": 14575 }, { "epoch": 1.7, "grad_norm": 0.9053030017971876, "learning_rate": 3.076986113957044e-05, "loss": 0.5239, "step": 14580 }, { "epoch": 1.7, "grad_norm": 0.9801263041516508, "learning_rate": 3.075869984799091e-05, "loss": 0.572, "step": 14585 }, { "epoch": 1.7, "grad_norm": 0.7736077071552256, "learning_rate": 3.074753734414473e-05, "loss": 0.5858, "step": 14590 }, { "epoch": 1.71, "grad_norm": 0.8937696389272364, "learning_rate": 3.073637363038173e-05, "loss": 0.5987, "step": 14595 }, { "epoch": 1.71, "grad_norm": 0.9102794810097287, "learning_rate": 3.0725208709051995e-05, "loss": 0.6691, "step": 14600 }, { "epoch": 1.71, "grad_norm": 0.9004238567524465, "learning_rate": 3.071404258250586e-05, "loss": 0.6766, "step": 14605 }, { "epoch": 1.71, "grad_norm": 0.8813464989942501, "learning_rate": 3.070287525309391e-05, "loss": 0.5912, "step": 14610 }, { "epoch": 1.71, "grad_norm": 1.0110013115712124, "learning_rate": 3.069170672316697e-05, "loss": 0.6944, "step": 14615 }, { "epoch": 1.71, "grad_norm": 0.9278201852419052, "learning_rate": 3.068053699507616e-05, "loss": 0.5681, "step": 14620 }, { "epoch": 1.71, "grad_norm": 0.7648483584981083, "learning_rate": 3.066936607117279e-05, "loss": 0.5743, "step": 14625 }, { "epoch": 1.71, "grad_norm": 0.9574087744313496, "learning_rate": 3.0658193953808496e-05, "loss": 0.6013, "step": 14630 }, { "epoch": 1.71, "grad_norm": 0.8353107589488827, "learning_rate": 3.06470206453351e-05, "loss": 0.5471, "step": 14635 }, { "epoch": 1.71, "grad_norm": 0.8872461468913105, "learning_rate": 3.0635846148104695e-05, "loss": 0.6327, "step": 14640 }, { "epoch": 1.71, "grad_norm": 0.9061540783556211, "learning_rate": 3.062467046446965e-05, "loss": 0.5713, "step": 14645 }, { "epoch": 1.71, "grad_norm": 0.8305816397556461, "learning_rate": 3.061349359678255e-05, "loss": 0.631, "step": 14650 }, { "epoch": 1.71, "grad_norm": 0.8974785947761166, "learning_rate": 3.060231554739625e-05, "loss": 0.7026, "step": 14655 }, { "epoch": 1.71, "grad_norm": 0.8756909288269374, "learning_rate": 3.059113631866384e-05, "loss": 0.6555, "step": 14660 }, { "epoch": 1.71, "grad_norm": 0.872034223424026, "learning_rate": 3.0579955912938676e-05, "loss": 0.5679, "step": 14665 }, { "epoch": 1.71, "grad_norm": 0.8355138767572048, "learning_rate": 3.056877433257434e-05, "loss": 0.587, "step": 14670 }, { "epoch": 1.71, "grad_norm": 0.9117033754783374, "learning_rate": 3.055759157992467e-05, "loss": 0.5837, "step": 14675 }, { "epoch": 1.72, "grad_norm": 0.8530265605140811, "learning_rate": 3.0546407657343756e-05, "loss": 0.6483, "step": 14680 }, { "epoch": 1.72, "grad_norm": 0.7763231446002526, "learning_rate": 3.053522256718594e-05, "loss": 0.6325, "step": 14685 }, { "epoch": 1.72, "grad_norm": 0.9020648029260706, "learning_rate": 3.0524036311805784e-05, "loss": 0.6497, "step": 14690 }, { "epoch": 1.72, "grad_norm": 0.970187214484655, "learning_rate": 3.0512848893558127e-05, "loss": 0.5897, "step": 14695 }, { "epoch": 1.72, "grad_norm": 0.8484531605228595, "learning_rate": 3.050166031479803e-05, "loss": 0.5927, "step": 14700 }, { "epoch": 1.72, "grad_norm": 0.8161308322549383, "learning_rate": 3.04904705778808e-05, "loss": 0.5992, "step": 14705 }, { "epoch": 1.72, "grad_norm": 0.8543303417382723, "learning_rate": 3.047927968516201e-05, "loss": 0.5202, "step": 14710 }, { "epoch": 1.72, "grad_norm": 0.9863281093765126, "learning_rate": 3.046808763899745e-05, "loss": 0.5968, "step": 14715 }, { "epoch": 1.72, "grad_norm": 0.9075513079802398, "learning_rate": 3.0456894441743162e-05, "loss": 0.635, "step": 14720 }, { "epoch": 1.72, "grad_norm": 0.8781686159947228, "learning_rate": 3.0445700095755437e-05, "loss": 0.6409, "step": 14725 }, { "epoch": 1.72, "grad_norm": 0.7942874690493922, "learning_rate": 3.043450460339079e-05, "loss": 0.6029, "step": 14730 }, { "epoch": 1.72, "grad_norm": 0.9401660525442465, "learning_rate": 3.0423307967006016e-05, "loss": 0.6639, "step": 14735 }, { "epoch": 1.72, "grad_norm": 0.7951913814377338, "learning_rate": 3.0412110188958086e-05, "loss": 0.6135, "step": 14740 }, { "epoch": 1.72, "grad_norm": 1.168511259090768, "learning_rate": 3.0400911271604278e-05, "loss": 0.5948, "step": 14745 }, { "epoch": 1.72, "grad_norm": 0.9279348721137287, "learning_rate": 3.0389711217302076e-05, "loss": 0.623, "step": 14750 }, { "epoch": 1.72, "grad_norm": 1.01043980625006, "learning_rate": 3.0378510028409202e-05, "loss": 0.6818, "step": 14755 }, { "epoch": 1.72, "grad_norm": 0.8172186808017409, "learning_rate": 3.0367307707283626e-05, "loss": 0.5827, "step": 14760 }, { "epoch": 1.73, "grad_norm": 0.9067086546627713, "learning_rate": 3.0356104256283564e-05, "loss": 0.6682, "step": 14765 }, { "epoch": 1.73, "grad_norm": 0.8827044081760728, "learning_rate": 3.0344899677767445e-05, "loss": 0.5882, "step": 14770 }, { "epoch": 1.73, "grad_norm": 0.8591130134602417, "learning_rate": 3.0333693974093952e-05, "loss": 0.5362, "step": 14775 }, { "epoch": 1.73, "grad_norm": 0.8970185058699236, "learning_rate": 3.0322487147622014e-05, "loss": 0.5956, "step": 14780 }, { "epoch": 1.73, "grad_norm": 0.9620011834401389, "learning_rate": 3.031127920071078e-05, "loss": 0.6752, "step": 14785 }, { "epoch": 1.73, "grad_norm": 0.8208474687559303, "learning_rate": 3.030007013571963e-05, "loss": 0.6174, "step": 14790 }, { "epoch": 1.73, "grad_norm": 1.0667917692702664, "learning_rate": 3.028885995500821e-05, "loss": 0.6889, "step": 14795 }, { "epoch": 1.73, "grad_norm": 0.807708090439241, "learning_rate": 3.0277648660936364e-05, "loss": 0.5148, "step": 14800 }, { "epoch": 1.73, "grad_norm": 1.0448599728959822, "learning_rate": 3.0266436255864195e-05, "loss": 0.62, "step": 14805 }, { "epoch": 1.73, "grad_norm": 0.7976554034356785, "learning_rate": 3.0255222742152018e-05, "loss": 0.6313, "step": 14810 }, { "epoch": 1.73, "grad_norm": 0.8699695422624385, "learning_rate": 3.0244008122160415e-05, "loss": 0.6253, "step": 14815 }, { "epoch": 1.73, "grad_norm": 0.7994744383268445, "learning_rate": 3.023279239825017e-05, "loss": 0.6261, "step": 14820 }, { "epoch": 1.73, "grad_norm": 0.9674425525867826, "learning_rate": 3.0221575572782314e-05, "loss": 0.5977, "step": 14825 }, { "epoch": 1.73, "grad_norm": 0.9428129798667975, "learning_rate": 3.02103576481181e-05, "loss": 0.5971, "step": 14830 }, { "epoch": 1.73, "grad_norm": 0.8735977519404388, "learning_rate": 3.0199138626619027e-05, "loss": 0.6318, "step": 14835 }, { "epoch": 1.73, "grad_norm": 0.8862499366700649, "learning_rate": 3.018791851064681e-05, "loss": 0.6627, "step": 14840 }, { "epoch": 1.73, "grad_norm": 0.8454794192143442, "learning_rate": 3.0176697302563405e-05, "loss": 0.6552, "step": 14845 }, { "epoch": 1.74, "grad_norm": 0.93641355543239, "learning_rate": 3.0165475004730993e-05, "loss": 0.5273, "step": 14850 }, { "epoch": 1.74, "grad_norm": 0.9657752810686301, "learning_rate": 3.015425161951198e-05, "loss": 0.6083, "step": 14855 }, { "epoch": 1.74, "grad_norm": 0.8769095588710879, "learning_rate": 3.0143027149269014e-05, "loss": 0.6323, "step": 14860 }, { "epoch": 1.74, "grad_norm": 1.022561444880132, "learning_rate": 3.0131801596364966e-05, "loss": 0.6103, "step": 14865 }, { "epoch": 1.74, "grad_norm": 0.7843705192053287, "learning_rate": 3.012057496316292e-05, "loss": 0.6647, "step": 14870 }, { "epoch": 1.74, "grad_norm": 0.9139901418397574, "learning_rate": 3.0109347252026205e-05, "loss": 0.6511, "step": 14875 }, { "epoch": 1.74, "grad_norm": 0.859650142540925, "learning_rate": 3.009811846531838e-05, "loss": 0.6503, "step": 14880 }, { "epoch": 1.74, "grad_norm": 1.0254925119533578, "learning_rate": 3.0086888605403208e-05, "loss": 0.6025, "step": 14885 }, { "epoch": 1.74, "grad_norm": 0.9402171720272638, "learning_rate": 3.0075657674644703e-05, "loss": 0.7411, "step": 14890 }, { "epoch": 1.74, "grad_norm": 0.971803849491066, "learning_rate": 3.0064425675407092e-05, "loss": 0.5835, "step": 14895 }, { "epoch": 1.74, "grad_norm": 0.8461273985912147, "learning_rate": 3.005319261005483e-05, "loss": 0.8373, "step": 14900 }, { "epoch": 1.74, "grad_norm": 0.9259937114555273, "learning_rate": 3.004195848095258e-05, "loss": 0.6089, "step": 14905 }, { "epoch": 1.74, "grad_norm": 0.8604636731952273, "learning_rate": 3.0030723290465267e-05, "loss": 0.5828, "step": 14910 }, { "epoch": 1.74, "grad_norm": 0.9519233288910779, "learning_rate": 3.0019487040958004e-05, "loss": 0.6379, "step": 14915 }, { "epoch": 1.74, "grad_norm": 0.833454807449613, "learning_rate": 3.0008249734796134e-05, "loss": 0.6281, "step": 14920 }, { "epoch": 1.74, "grad_norm": 0.9901037613201923, "learning_rate": 2.999701137434524e-05, "loss": 0.5581, "step": 14925 }, { "epoch": 1.74, "grad_norm": 0.884230922647901, "learning_rate": 2.9985771961971104e-05, "loss": 0.6369, "step": 14930 }, { "epoch": 1.74, "grad_norm": 0.9078545673245122, "learning_rate": 2.9974531500039748e-05, "loss": 0.636, "step": 14935 }, { "epoch": 1.75, "grad_norm": 0.9149208943264309, "learning_rate": 2.99632899909174e-05, "loss": 0.5886, "step": 14940 }, { "epoch": 1.75, "grad_norm": 0.8108307065097886, "learning_rate": 2.995204743697052e-05, "loss": 0.6109, "step": 14945 }, { "epoch": 1.75, "grad_norm": 0.8663201113907295, "learning_rate": 2.9940803840565784e-05, "loss": 0.6138, "step": 14950 }, { "epoch": 1.75, "grad_norm": 0.8509709944392145, "learning_rate": 2.9929559204070078e-05, "loss": 0.5897, "step": 14955 }, { "epoch": 1.75, "grad_norm": 0.9060482229291699, "learning_rate": 2.9918313529850533e-05, "loss": 0.6479, "step": 14960 }, { "epoch": 1.75, "grad_norm": 0.8532734201153297, "learning_rate": 2.9907066820274466e-05, "loss": 0.6352, "step": 14965 }, { "epoch": 1.75, "grad_norm": 0.8456501479967143, "learning_rate": 2.9895819077709436e-05, "loss": 0.5928, "step": 14970 }, { "epoch": 1.75, "grad_norm": 0.9064687851107315, "learning_rate": 2.98845703045232e-05, "loss": 0.6689, "step": 14975 }, { "epoch": 1.75, "grad_norm": 0.8652678076261537, "learning_rate": 2.9873320503083756e-05, "loss": 0.5986, "step": 14980 }, { "epoch": 1.75, "grad_norm": 0.8401529460240302, "learning_rate": 2.9862069675759296e-05, "loss": 0.545, "step": 14985 }, { "epoch": 1.75, "grad_norm": 0.9102955944265461, "learning_rate": 2.9850817824918237e-05, "loss": 0.595, "step": 14990 }, { "epoch": 1.75, "grad_norm": 0.9749114740477626, "learning_rate": 2.9839564952929223e-05, "loss": 0.5803, "step": 14995 }, { "epoch": 1.75, "grad_norm": 0.8789190175440106, "learning_rate": 2.982831106216108e-05, "loss": 0.6571, "step": 15000 }, { "epoch": 1.75, "grad_norm": 0.8593762315789426, "learning_rate": 2.9817056154982888e-05, "loss": 0.6226, "step": 15005 }, { "epoch": 1.75, "grad_norm": 1.0075139965241253, "learning_rate": 2.980580023376392e-05, "loss": 0.5852, "step": 15010 }, { "epoch": 1.75, "grad_norm": 0.9118240855892054, "learning_rate": 2.9794543300873657e-05, "loss": 0.5793, "step": 15015 }, { "epoch": 1.75, "grad_norm": 0.9197131815881998, "learning_rate": 2.9783285358681806e-05, "loss": 0.5663, "step": 15020 }, { "epoch": 1.76, "grad_norm": 0.8890433750502471, "learning_rate": 2.9772026409558285e-05, "loss": 0.684, "step": 15025 }, { "epoch": 1.76, "grad_norm": 2.4943136624401174, "learning_rate": 2.9760766455873222e-05, "loss": 0.6545, "step": 15030 }, { "epoch": 1.76, "grad_norm": 0.9147116041387249, "learning_rate": 2.9749505499996943e-05, "loss": 0.5876, "step": 15035 }, { "epoch": 1.76, "grad_norm": 0.7459538100279501, "learning_rate": 2.9738243544300015e-05, "loss": 0.5966, "step": 15040 }, { "epoch": 1.76, "grad_norm": 0.8956785128391742, "learning_rate": 2.972698059115318e-05, "loss": 0.5829, "step": 15045 }, { "epoch": 1.76, "grad_norm": 0.7281108168341436, "learning_rate": 2.971571664292742e-05, "loss": 0.5968, "step": 15050 }, { "epoch": 1.76, "grad_norm": 0.9531359022710515, "learning_rate": 2.9704451701993906e-05, "loss": 0.5784, "step": 15055 }, { "epoch": 1.76, "grad_norm": 0.8752772059586545, "learning_rate": 2.9693185770724035e-05, "loss": 0.6822, "step": 15060 }, { "epoch": 1.76, "grad_norm": 0.875999587143, "learning_rate": 2.9681918851489398e-05, "loss": 0.6772, "step": 15065 }, { "epoch": 1.76, "grad_norm": 0.902186723360979, "learning_rate": 2.96706509466618e-05, "loss": 0.6529, "step": 15070 }, { "epoch": 1.76, "grad_norm": 0.9551614373463759, "learning_rate": 2.965938205861325e-05, "loss": 1.2168, "step": 15075 }, { "epoch": 1.76, "grad_norm": 0.9487659564948596, "learning_rate": 2.9648112189715977e-05, "loss": 0.569, "step": 15080 }, { "epoch": 1.76, "grad_norm": 1.0512244187765623, "learning_rate": 2.9636841342342392e-05, "loss": 0.6442, "step": 15085 }, { "epoch": 1.76, "grad_norm": 0.9570967230910677, "learning_rate": 2.9625569518865138e-05, "loss": 0.5953, "step": 15090 }, { "epoch": 1.76, "grad_norm": 1.0480209215855774, "learning_rate": 2.9614296721657042e-05, "loss": 0.564, "step": 15095 }, { "epoch": 1.76, "grad_norm": 0.9607319310527488, "learning_rate": 2.960302295309116e-05, "loss": 0.6626, "step": 15100 }, { "epoch": 1.76, "grad_norm": 0.9133064612445343, "learning_rate": 2.959174821554072e-05, "loss": 0.5827, "step": 15105 }, { "epoch": 1.77, "grad_norm": 1.1524396340438572, "learning_rate": 2.9580472511379197e-05, "loss": 0.6612, "step": 15110 }, { "epoch": 1.77, "grad_norm": 0.8550727310609498, "learning_rate": 2.9569195842980218e-05, "loss": 0.6398, "step": 15115 }, { "epoch": 1.77, "grad_norm": 0.8893716966803409, "learning_rate": 2.9557918212717657e-05, "loss": 0.5985, "step": 15120 }, { "epoch": 1.77, "grad_norm": 0.8542210506636788, "learning_rate": 2.9546639622965566e-05, "loss": 0.5318, "step": 15125 }, { "epoch": 1.77, "grad_norm": 95.57073773407024, "learning_rate": 2.9535360076098205e-05, "loss": 0.7631, "step": 15130 }, { "epoch": 1.77, "grad_norm": 0.9104315726838731, "learning_rate": 2.9524079574490047e-05, "loss": 0.625, "step": 15135 }, { "epoch": 1.77, "grad_norm": 0.9690479458585121, "learning_rate": 2.951279812051575e-05, "loss": 0.5394, "step": 15140 }, { "epoch": 1.77, "grad_norm": 0.9971123983145045, "learning_rate": 2.9501515716550178e-05, "loss": 0.5625, "step": 15145 }, { "epoch": 1.77, "grad_norm": 0.9058075794904135, "learning_rate": 2.949023236496839e-05, "loss": 0.615, "step": 15150 }, { "epoch": 1.77, "grad_norm": 0.805836797371851, "learning_rate": 2.9478948068145655e-05, "loss": 0.6353, "step": 15155 }, { "epoch": 1.77, "grad_norm": 0.9032182189079547, "learning_rate": 2.946766282845744e-05, "loss": 0.5637, "step": 15160 }, { "epoch": 1.77, "grad_norm": 25.33904420349589, "learning_rate": 2.9456376648279393e-05, "loss": 0.6757, "step": 15165 }, { "epoch": 1.77, "grad_norm": 0.8391892317600869, "learning_rate": 2.9445089529987384e-05, "loss": 0.6036, "step": 15170 }, { "epoch": 1.77, "grad_norm": 0.8872383767708902, "learning_rate": 2.9433801475957475e-05, "loss": 0.7093, "step": 15175 }, { "epoch": 1.77, "grad_norm": 0.821819208772899, "learning_rate": 2.94225124885659e-05, "loss": 0.6905, "step": 15180 }, { "epoch": 1.77, "grad_norm": 0.8132078498239131, "learning_rate": 2.9411222570189123e-05, "loss": 0.6638, "step": 15185 }, { "epoch": 1.77, "grad_norm": 0.841537340818313, "learning_rate": 2.9399931723203796e-05, "loss": 0.5923, "step": 15190 }, { "epoch": 1.78, "grad_norm": 0.9559097040451198, "learning_rate": 2.9388639949986744e-05, "loss": 0.5672, "step": 15195 }, { "epoch": 1.78, "grad_norm": 0.9214939182203384, "learning_rate": 2.937734725291501e-05, "loss": 0.5893, "step": 15200 }, { "epoch": 1.78, "grad_norm": 0.972882928842492, "learning_rate": 2.9366053634365835e-05, "loss": 0.6262, "step": 15205 }, { "epoch": 1.78, "grad_norm": 0.7715616907361613, "learning_rate": 2.9354759096716634e-05, "loss": 0.58, "step": 15210 }, { "epoch": 1.78, "grad_norm": 0.8662036895757589, "learning_rate": 2.934346364234503e-05, "loss": 0.5611, "step": 15215 }, { "epoch": 1.78, "grad_norm": 0.8300598715474246, "learning_rate": 2.9332167273628823e-05, "loss": 0.6309, "step": 15220 }, { "epoch": 1.78, "grad_norm": 0.9357369214526228, "learning_rate": 2.932086999294604e-05, "loss": 0.6437, "step": 15225 }, { "epoch": 1.78, "grad_norm": 0.815579467164919, "learning_rate": 2.9309571802674847e-05, "loss": 0.5935, "step": 15230 }, { "epoch": 1.78, "grad_norm": 0.9643868137658049, "learning_rate": 2.9298272705193652e-05, "loss": 0.6416, "step": 15235 }, { "epoch": 1.78, "grad_norm": 0.85492742793463, "learning_rate": 2.9286972702881034e-05, "loss": 0.6264, "step": 15240 }, { "epoch": 1.78, "grad_norm": 0.9076639692940736, "learning_rate": 2.927567179811575e-05, "loss": 0.5829, "step": 15245 }, { "epoch": 1.78, "grad_norm": 0.8909180365387361, "learning_rate": 2.926436999327677e-05, "loss": 0.5887, "step": 15250 }, { "epoch": 1.78, "grad_norm": 0.8726015115225713, "learning_rate": 2.9253067290743235e-05, "loss": 0.6311, "step": 15255 }, { "epoch": 1.78, "grad_norm": 0.9277518081708441, "learning_rate": 2.9241763692894486e-05, "loss": 0.6007, "step": 15260 }, { "epoch": 1.78, "grad_norm": 0.9861246412172564, "learning_rate": 2.9230459202110046e-05, "loss": 0.5774, "step": 15265 }, { "epoch": 1.78, "grad_norm": 0.886301681983085, "learning_rate": 2.9219153820769628e-05, "loss": 0.5534, "step": 15270 }, { "epoch": 1.78, "grad_norm": 0.9689578852269723, "learning_rate": 2.9207847551253142e-05, "loss": 0.5831, "step": 15275 }, { "epoch": 1.79, "grad_norm": 0.8994382022070937, "learning_rate": 2.919654039594066e-05, "loss": 0.5879, "step": 15280 }, { "epoch": 1.79, "grad_norm": 0.8963503428487793, "learning_rate": 2.9185232357212473e-05, "loss": 0.6201, "step": 15285 }, { "epoch": 1.79, "grad_norm": 0.8463400957560143, "learning_rate": 2.9173923437449036e-05, "loss": 0.6057, "step": 15290 }, { "epoch": 1.79, "grad_norm": 0.8430091402173308, "learning_rate": 2.9162613639030993e-05, "loss": 0.6042, "step": 15295 }, { "epoch": 1.79, "grad_norm": 0.924570201297349, "learning_rate": 2.9151302964339172e-05, "loss": 0.6009, "step": 15300 }, { "epoch": 1.79, "grad_norm": 0.9357602735634031, "learning_rate": 2.9139991415754597e-05, "loss": 0.4952, "step": 15305 }, { "epoch": 1.79, "grad_norm": 0.8584845844509773, "learning_rate": 2.912867899565846e-05, "loss": 0.6335, "step": 15310 }, { "epoch": 1.79, "grad_norm": 0.9003463725769718, "learning_rate": 2.9117365706432144e-05, "loss": 0.5629, "step": 15315 }, { "epoch": 1.79, "grad_norm": 0.9045460629709449, "learning_rate": 2.9106051550457224e-05, "loss": 0.5758, "step": 15320 }, { "epoch": 1.79, "grad_norm": 0.9652131903777093, "learning_rate": 2.909473653011544e-05, "loss": 0.5894, "step": 15325 }, { "epoch": 1.79, "grad_norm": 0.8606927582561902, "learning_rate": 2.908342064778873e-05, "loss": 0.577, "step": 15330 }, { "epoch": 1.79, "grad_norm": 0.8914239692430463, "learning_rate": 2.9072103905859183e-05, "loss": 0.642, "step": 15335 }, { "epoch": 1.79, "grad_norm": 0.8290004897092969, "learning_rate": 2.9060786306709126e-05, "loss": 0.6131, "step": 15340 }, { "epoch": 1.79, "grad_norm": 0.8234281004776719, "learning_rate": 2.9049467852721007e-05, "loss": 0.637, "step": 15345 }, { "epoch": 1.79, "grad_norm": 0.9733989681776443, "learning_rate": 2.9038148546277488e-05, "loss": 0.5919, "step": 15350 }, { "epoch": 1.79, "grad_norm": 0.7939712851349656, "learning_rate": 2.9026828389761406e-05, "loss": 0.5485, "step": 15355 }, { "epoch": 1.79, "grad_norm": 0.8090157164040375, "learning_rate": 2.9015507385555763e-05, "loss": 0.6575, "step": 15360 }, { "epoch": 1.8, "grad_norm": 0.8220637441077542, "learning_rate": 2.9004185536043755e-05, "loss": 0.6048, "step": 15365 }, { "epoch": 1.8, "grad_norm": 0.8912668114056833, "learning_rate": 2.8992862843608746e-05, "loss": 0.5851, "step": 15370 }, { "epoch": 1.8, "grad_norm": 0.8414273747273939, "learning_rate": 2.898153931063428e-05, "loss": 0.6565, "step": 15375 }, { "epoch": 1.8, "grad_norm": 0.9075142797543228, "learning_rate": 2.8970214939504087e-05, "loss": 0.6455, "step": 15380 }, { "epoch": 1.8, "grad_norm": 1.5635072354235717, "learning_rate": 2.8958889732602064e-05, "loss": 0.638, "step": 15385 }, { "epoch": 1.8, "grad_norm": 0.867668645195686, "learning_rate": 2.894756369231228e-05, "loss": 0.603, "step": 15390 }, { "epoch": 1.8, "grad_norm": 0.9022789682232436, "learning_rate": 2.8936236821018988e-05, "loss": 0.6011, "step": 15395 }, { "epoch": 1.8, "grad_norm": 0.9483143692107806, "learning_rate": 2.8924909121106612e-05, "loss": 0.601, "step": 15400 }, { "epoch": 1.8, "grad_norm": 0.8606154189427925, "learning_rate": 2.891358059495976e-05, "loss": 0.6186, "step": 15405 }, { "epoch": 1.8, "grad_norm": 0.8792379084804409, "learning_rate": 2.890225124496319e-05, "loss": 0.5889, "step": 15410 }, { "epoch": 1.8, "grad_norm": 0.678016259254467, "learning_rate": 2.889092107350186e-05, "loss": 0.6146, "step": 15415 }, { "epoch": 1.8, "grad_norm": 0.9207736389941988, "learning_rate": 2.887959008296089e-05, "loss": 0.6457, "step": 15420 }, { "epoch": 1.8, "grad_norm": 0.9090070245499225, "learning_rate": 2.886825827572557e-05, "loss": 0.6205, "step": 15425 }, { "epoch": 1.8, "grad_norm": 0.925285019670458, "learning_rate": 2.8856925654181356e-05, "loss": 0.6799, "step": 15430 }, { "epoch": 1.8, "grad_norm": 1.02916783656782, "learning_rate": 2.8845592220713898e-05, "loss": 0.6014, "step": 15435 }, { "epoch": 1.8, "grad_norm": 0.8528647209907759, "learning_rate": 2.883425797770899e-05, "loss": 0.7026, "step": 15440 }, { "epoch": 1.8, "grad_norm": 0.8842330714333839, "learning_rate": 2.8822922927552616e-05, "loss": 0.6199, "step": 15445 }, { "epoch": 1.81, "grad_norm": 0.786792938782078, "learning_rate": 2.881158707263092e-05, "loss": 0.5768, "step": 15450 }, { "epoch": 1.81, "grad_norm": 1.0406915896839435, "learning_rate": 2.880025041533022e-05, "loss": 0.7346, "step": 15455 }, { "epoch": 1.81, "grad_norm": 0.9759327538869409, "learning_rate": 2.8788912958037006e-05, "loss": 0.5937, "step": 15460 }, { "epoch": 1.81, "grad_norm": 0.9879829680698227, "learning_rate": 2.8777574703137915e-05, "loss": 0.6215, "step": 15465 }, { "epoch": 1.81, "grad_norm": 0.8361553037833519, "learning_rate": 2.876623565301979e-05, "loss": 0.5848, "step": 15470 }, { "epoch": 1.81, "grad_norm": 0.9417289077100779, "learning_rate": 2.8754895810069603e-05, "loss": 0.6114, "step": 15475 }, { "epoch": 1.81, "grad_norm": 0.7976990879482714, "learning_rate": 2.874355517667451e-05, "loss": 0.6002, "step": 15480 }, { "epoch": 1.81, "grad_norm": 0.890926456435577, "learning_rate": 2.8732213755221855e-05, "loss": 0.6446, "step": 15485 }, { "epoch": 1.81, "grad_norm": 0.9451491166608306, "learning_rate": 2.8720871548099095e-05, "loss": 0.583, "step": 15490 }, { "epoch": 1.81, "grad_norm": 0.9099321630529936, "learning_rate": 2.8709528557693906e-05, "loss": 0.6244, "step": 15495 }, { "epoch": 1.81, "grad_norm": 0.8616978463790455, "learning_rate": 2.8698184786394094e-05, "loss": 0.644, "step": 15500 }, { "epoch": 1.81, "grad_norm": 0.8587283905001046, "learning_rate": 2.8686840236587646e-05, "loss": 0.5416, "step": 15505 }, { "epoch": 1.81, "grad_norm": 0.946700712832544, "learning_rate": 2.8675494910662713e-05, "loss": 0.5252, "step": 15510 }, { "epoch": 1.81, "grad_norm": 1.1241769763091565, "learning_rate": 2.8664148811007596e-05, "loss": 0.6402, "step": 15515 }, { "epoch": 1.81, "grad_norm": 0.8107623762806399, "learning_rate": 2.8652801940010782e-05, "loss": 0.6053, "step": 15520 }, { "epoch": 1.81, "grad_norm": 0.8811204274624614, "learning_rate": 2.8641454300060887e-05, "loss": 0.6016, "step": 15525 }, { "epoch": 1.81, "grad_norm": 0.7812222171445361, "learning_rate": 2.863010589354672e-05, "loss": 0.6214, "step": 15530 }, { "epoch": 1.82, "grad_norm": 0.802755397149214, "learning_rate": 2.8618756722857244e-05, "loss": 0.6961, "step": 15535 }, { "epoch": 1.82, "grad_norm": 1.0153862895503316, "learning_rate": 2.8607406790381564e-05, "loss": 0.6024, "step": 15540 }, { "epoch": 1.82, "grad_norm": 0.8864798088264312, "learning_rate": 2.8596056098508968e-05, "loss": 0.6065, "step": 15545 }, { "epoch": 1.82, "grad_norm": 0.8431454967113972, "learning_rate": 2.8584704649628895e-05, "loss": 0.5828, "step": 15550 }, { "epoch": 1.82, "grad_norm": 0.8992707917978078, "learning_rate": 2.857335244613095e-05, "loss": 0.5465, "step": 15555 }, { "epoch": 1.82, "grad_norm": 0.9566196774029382, "learning_rate": 2.856199949040488e-05, "loss": 0.639, "step": 15560 }, { "epoch": 1.82, "grad_norm": 0.8930180690240621, "learning_rate": 2.85506457848406e-05, "loss": 0.6598, "step": 15565 }, { "epoch": 1.82, "grad_norm": 0.9516695105041987, "learning_rate": 2.8539291331828198e-05, "loss": 0.6314, "step": 15570 }, { "epoch": 1.82, "grad_norm": 0.9142514895966403, "learning_rate": 2.8527936133757893e-05, "loss": 0.6409, "step": 15575 }, { "epoch": 1.82, "grad_norm": 1.2595048858553646, "learning_rate": 2.8516580193020075e-05, "loss": 0.6823, "step": 15580 }, { "epoch": 1.82, "grad_norm": 0.9038634882816586, "learning_rate": 2.8505223512005297e-05, "loss": 0.6373, "step": 15585 }, { "epoch": 1.82, "grad_norm": 0.8395131364030178, "learning_rate": 2.8493866093104248e-05, "loss": 0.6452, "step": 15590 }, { "epoch": 1.82, "grad_norm": 0.9002844445332441, "learning_rate": 2.8482507938707794e-05, "loss": 0.6476, "step": 15595 }, { "epoch": 1.82, "grad_norm": 0.9488186000848398, "learning_rate": 2.8471149051206936e-05, "loss": 0.6223, "step": 15600 }, { "epoch": 1.82, "grad_norm": 0.9073022444458917, "learning_rate": 2.845978943299284e-05, "loss": 0.5655, "step": 15605 }, { "epoch": 1.82, "grad_norm": 0.9069415011986827, "learning_rate": 2.8448429086456825e-05, "loss": 0.5351, "step": 15610 }, { "epoch": 1.82, "grad_norm": 0.873628949718507, "learning_rate": 2.8437068013990377e-05, "loss": 0.6071, "step": 15615 }, { "epoch": 1.82, "grad_norm": 0.9604049892919877, "learning_rate": 2.84257062179851e-05, "loss": 0.6448, "step": 15620 }, { "epoch": 1.83, "grad_norm": 0.8308200298302506, "learning_rate": 2.8414343700832778e-05, "loss": 0.5532, "step": 15625 }, { "epoch": 1.83, "grad_norm": 0.9027708754985672, "learning_rate": 2.8402980464925344e-05, "loss": 0.6373, "step": 15630 }, { "epoch": 1.83, "grad_norm": 0.8919052142974081, "learning_rate": 2.839161651265489e-05, "loss": 0.5416, "step": 15635 }, { "epoch": 1.83, "grad_norm": 0.8321245606009727, "learning_rate": 2.8380251846413613e-05, "loss": 0.6215, "step": 15640 }, { "epoch": 1.83, "grad_norm": 0.8564624880366666, "learning_rate": 2.836888646859392e-05, "loss": 0.5784, "step": 15645 }, { "epoch": 1.83, "grad_norm": 1.0011875896902096, "learning_rate": 2.835752038158835e-05, "loss": 0.6294, "step": 15650 }, { "epoch": 1.83, "grad_norm": 0.8151643297421427, "learning_rate": 2.8346153587789553e-05, "loss": 0.6102, "step": 15655 }, { "epoch": 1.83, "grad_norm": 0.865754298984823, "learning_rate": 2.833478608959038e-05, "loss": 0.6642, "step": 15660 }, { "epoch": 1.83, "grad_norm": 0.854560387439589, "learning_rate": 2.832341788938381e-05, "loss": 0.5829, "step": 15665 }, { "epoch": 1.83, "grad_norm": 0.8804218472430021, "learning_rate": 2.8312048989562955e-05, "loss": 0.6074, "step": 15670 }, { "epoch": 1.83, "grad_norm": 0.8545618799163368, "learning_rate": 2.8300679392521096e-05, "loss": 0.5895, "step": 15675 }, { "epoch": 1.83, "grad_norm": 0.9652038609049712, "learning_rate": 2.828930910065165e-05, "loss": 0.6483, "step": 15680 }, { "epoch": 1.83, "grad_norm": 0.785036131334889, "learning_rate": 2.8277938116348184e-05, "loss": 0.5727, "step": 15685 }, { "epoch": 1.83, "grad_norm": 0.7781872319644279, "learning_rate": 2.8266566442004406e-05, "loss": 0.5165, "step": 15690 }, { "epoch": 1.83, "grad_norm": 0.9088117541912111, "learning_rate": 2.825519408001418e-05, "loss": 0.6232, "step": 15695 }, { "epoch": 1.83, "grad_norm": 0.9001654774268263, "learning_rate": 2.82438210327715e-05, "loss": 0.5938, "step": 15700 }, { "epoch": 1.83, "grad_norm": 0.9664343477830007, "learning_rate": 2.8232447302670513e-05, "loss": 0.6753, "step": 15705 }, { "epoch": 1.84, "grad_norm": 0.8339831249463702, "learning_rate": 2.8221072892105505e-05, "loss": 0.6622, "step": 15710 }, { "epoch": 1.84, "grad_norm": 1.0084503753988086, "learning_rate": 2.820969780347092e-05, "loss": 0.578, "step": 15715 }, { "epoch": 1.84, "grad_norm": 1.003284467156913, "learning_rate": 2.8198322039161322e-05, "loss": 0.6289, "step": 15720 }, { "epoch": 1.84, "grad_norm": 0.8883027528047214, "learning_rate": 2.8186945601571428e-05, "loss": 0.6119, "step": 15725 }, { "epoch": 1.84, "grad_norm": 0.9284869809696922, "learning_rate": 2.8175568493096115e-05, "loss": 0.6524, "step": 15730 }, { "epoch": 1.84, "grad_norm": 0.9161117199431074, "learning_rate": 2.816419071613036e-05, "loss": 0.672, "step": 15735 }, { "epoch": 1.84, "grad_norm": 0.8726369520586122, "learning_rate": 2.815281227306932e-05, "loss": 0.5792, "step": 15740 }, { "epoch": 1.84, "grad_norm": 0.9594375541580534, "learning_rate": 2.8141433166308263e-05, "loss": 0.5308, "step": 15745 }, { "epoch": 1.84, "grad_norm": 0.9782893364223625, "learning_rate": 2.813005339824263e-05, "loss": 0.6294, "step": 15750 }, { "epoch": 1.84, "grad_norm": 0.9043232377410992, "learning_rate": 2.8118672971267963e-05, "loss": 0.6249, "step": 15755 }, { "epoch": 1.84, "grad_norm": 0.8909021373088903, "learning_rate": 2.810729188777996e-05, "loss": 0.6219, "step": 15760 }, { "epoch": 1.84, "grad_norm": 0.8638747358728026, "learning_rate": 2.809591015017448e-05, "loss": 0.6145, "step": 15765 }, { "epoch": 1.84, "grad_norm": 2.2259314996671353, "learning_rate": 2.808452776084748e-05, "loss": 0.5233, "step": 15770 }, { "epoch": 1.84, "grad_norm": 1.00790397103229, "learning_rate": 2.807314472219507e-05, "loss": 0.5996, "step": 15775 }, { "epoch": 1.84, "grad_norm": 0.7995895086216097, "learning_rate": 2.8061761036613514e-05, "loss": 0.5967, "step": 15780 }, { "epoch": 1.84, "grad_norm": 0.8298635074958586, "learning_rate": 2.8050376706499183e-05, "loss": 0.6003, "step": 15785 }, { "epoch": 1.84, "grad_norm": 0.8977249628470072, "learning_rate": 2.80389917342486e-05, "loss": 0.6264, "step": 15790 }, { "epoch": 1.85, "grad_norm": 1.0262435288309792, "learning_rate": 2.802760612225843e-05, "loss": 0.6015, "step": 15795 }, { "epoch": 1.85, "grad_norm": 0.7437627141196989, "learning_rate": 2.8016219872925453e-05, "loss": 0.5647, "step": 15800 }, { "epoch": 1.85, "grad_norm": 0.8625797903378296, "learning_rate": 2.8004832988646594e-05, "loss": 0.5896, "step": 15805 }, { "epoch": 1.85, "grad_norm": 0.8563876446849167, "learning_rate": 2.7993445471818926e-05, "loss": 0.5638, "step": 15810 }, { "epoch": 1.85, "grad_norm": 0.9110849079384351, "learning_rate": 2.798205732483963e-05, "loss": 0.5721, "step": 15815 }, { "epoch": 1.85, "grad_norm": 0.8706606739055289, "learning_rate": 2.7970668550106017e-05, "loss": 0.6834, "step": 15820 }, { "epoch": 1.85, "grad_norm": 0.8706579403217899, "learning_rate": 2.7959279150015567e-05, "loss": 0.5853, "step": 15825 }, { "epoch": 1.85, "grad_norm": 0.7752437116512222, "learning_rate": 2.794788912696586e-05, "loss": 0.5763, "step": 15830 }, { "epoch": 1.85, "grad_norm": 1.1087155870782206, "learning_rate": 2.7936498483354605e-05, "loss": 0.5552, "step": 15835 }, { "epoch": 1.85, "grad_norm": 0.8706300922872348, "learning_rate": 2.792510722157966e-05, "loss": 0.5767, "step": 15840 }, { "epoch": 1.85, "grad_norm": 0.8492687112401328, "learning_rate": 2.791371534403901e-05, "loss": 0.6507, "step": 15845 }, { "epoch": 1.85, "grad_norm": 0.9121793610697639, "learning_rate": 2.7902322853130757e-05, "loss": 0.5322, "step": 15850 }, { "epoch": 1.85, "grad_norm": 0.9189718096351744, "learning_rate": 2.7890929751253142e-05, "loss": 0.6173, "step": 15855 }, { "epoch": 1.85, "grad_norm": 0.8821655189226002, "learning_rate": 2.7879536040804533e-05, "loss": 0.6, "step": 15860 }, { "epoch": 1.85, "grad_norm": 1.0864360265978323, "learning_rate": 2.7868141724183423e-05, "loss": 0.5779, "step": 15865 }, { "epoch": 1.85, "grad_norm": 0.9574193815415722, "learning_rate": 2.7856746803788435e-05, "loss": 0.6123, "step": 15870 }, { "epoch": 1.85, "grad_norm": 0.8910269556806888, "learning_rate": 2.7845351282018324e-05, "loss": 0.5723, "step": 15875 }, { "epoch": 1.86, "grad_norm": 0.9529094205532579, "learning_rate": 2.7833955161271968e-05, "loss": 0.6605, "step": 15880 }, { "epoch": 1.86, "grad_norm": 0.8758479065335699, "learning_rate": 2.7822558443948354e-05, "loss": 0.6688, "step": 15885 }, { "epoch": 1.86, "grad_norm": 0.9096254482813643, "learning_rate": 2.7811161132446624e-05, "loss": 0.6436, "step": 15890 }, { "epoch": 1.86, "grad_norm": 0.8443034074249516, "learning_rate": 2.7799763229166038e-05, "loss": 0.5973, "step": 15895 }, { "epoch": 1.86, "grad_norm": 0.8205982941896848, "learning_rate": 2.778836473650595e-05, "loss": 0.5606, "step": 15900 }, { "epoch": 1.86, "grad_norm": 0.9547737518306739, "learning_rate": 2.7776965656865888e-05, "loss": 0.5735, "step": 15905 }, { "epoch": 1.86, "grad_norm": 1.1558645525388844, "learning_rate": 2.7765565992645465e-05, "loss": 0.6548, "step": 15910 }, { "epoch": 1.86, "grad_norm": 0.8363521802205757, "learning_rate": 2.7754165746244433e-05, "loss": 0.6069, "step": 15915 }, { "epoch": 1.86, "grad_norm": 0.7795771399962282, "learning_rate": 2.7742764920062657e-05, "loss": 0.5617, "step": 15920 }, { "epoch": 1.86, "grad_norm": 0.8557510305810598, "learning_rate": 2.7731363516500138e-05, "loss": 0.642, "step": 15925 }, { "epoch": 1.86, "grad_norm": 0.8543614234786983, "learning_rate": 2.771996153795699e-05, "loss": 0.5914, "step": 15930 }, { "epoch": 1.86, "grad_norm": 0.876360395363473, "learning_rate": 2.7708558986833437e-05, "loss": 0.572, "step": 15935 }, { "epoch": 1.86, "grad_norm": 0.891020462313241, "learning_rate": 2.7697155865529856e-05, "loss": 0.662, "step": 15940 }, { "epoch": 1.86, "grad_norm": 0.9042436366135987, "learning_rate": 2.768575217644671e-05, "loss": 0.6147, "step": 15945 }, { "epoch": 1.86, "grad_norm": 0.9571926136739703, "learning_rate": 2.76743479219846e-05, "loss": 0.625, "step": 15950 }, { "epoch": 1.86, "grad_norm": 0.8429153692879391, "learning_rate": 2.7662943104544232e-05, "loss": 0.6348, "step": 15955 }, { "epoch": 1.86, "grad_norm": 0.8835499717886393, "learning_rate": 2.765153772652645e-05, "loss": 0.5903, "step": 15960 }, { "epoch": 1.87, "grad_norm": 0.9325576787556494, "learning_rate": 2.7640131790332207e-05, "loss": 0.6216, "step": 15965 }, { "epoch": 1.87, "grad_norm": 0.8862124946222595, "learning_rate": 2.7628725298362555e-05, "loss": 0.5942, "step": 15970 }, { "epoch": 1.87, "grad_norm": 0.8818170829648544, "learning_rate": 2.7617318253018702e-05, "loss": 0.5894, "step": 15975 }, { "epoch": 1.87, "grad_norm": 1.1057024370006303, "learning_rate": 2.760591065670194e-05, "loss": 0.6122, "step": 15980 }, { "epoch": 1.87, "grad_norm": 0.886506665632321, "learning_rate": 2.7594502511813685e-05, "loss": 0.6534, "step": 15985 }, { "epoch": 1.87, "grad_norm": 0.8152950589213906, "learning_rate": 2.7583093820755472e-05, "loss": 0.6085, "step": 15990 }, { "epoch": 1.87, "grad_norm": 0.945828116042296, "learning_rate": 2.7571684585928958e-05, "loss": 0.5736, "step": 15995 }, { "epoch": 1.87, "grad_norm": 1.098688746584009, "learning_rate": 2.7560274809735898e-05, "loss": 0.6811, "step": 16000 }, { "epoch": 1.87, "grad_norm": 0.8332466340426692, "learning_rate": 2.754886449457817e-05, "loss": 0.6163, "step": 16005 }, { "epoch": 1.87, "grad_norm": 0.7996120088231948, "learning_rate": 2.7537453642857773e-05, "loss": 0.5831, "step": 16010 }, { "epoch": 1.87, "grad_norm": 0.8819697375827106, "learning_rate": 2.7526042256976798e-05, "loss": 0.6018, "step": 16015 }, { "epoch": 1.87, "grad_norm": 0.8417995282798617, "learning_rate": 2.7514630339337473e-05, "loss": 0.5892, "step": 16020 }, { "epoch": 1.87, "grad_norm": 0.8875331153906554, "learning_rate": 2.7503217892342116e-05, "loss": 0.5747, "step": 16025 }, { "epoch": 1.87, "grad_norm": 0.8057349187041387, "learning_rate": 2.749180491839317e-05, "loss": 0.5617, "step": 16030 }, { "epoch": 1.87, "grad_norm": 0.8020825363525532, "learning_rate": 2.7480391419893193e-05, "loss": 0.5508, "step": 16035 }, { "epoch": 1.87, "grad_norm": 0.9500815451598762, "learning_rate": 2.7468977399244837e-05, "loss": 0.5503, "step": 16040 }, { "epoch": 1.87, "grad_norm": 0.8965576506239832, "learning_rate": 2.745756285885088e-05, "loss": 0.5632, "step": 16045 }, { "epoch": 1.88, "grad_norm": 0.819190099085965, "learning_rate": 2.744614780111419e-05, "loss": 0.5793, "step": 16050 }, { "epoch": 1.88, "grad_norm": 0.8656872801588841, "learning_rate": 2.743473222843777e-05, "loss": 0.6403, "step": 16055 }, { "epoch": 1.88, "grad_norm": 0.9275576130610358, "learning_rate": 2.7423316143224716e-05, "loss": 0.7171, "step": 16060 }, { "epoch": 1.88, "grad_norm": 0.8611193076102043, "learning_rate": 2.7411899547878222e-05, "loss": 0.6863, "step": 16065 }, { "epoch": 1.88, "grad_norm": 0.9699922676618187, "learning_rate": 2.7400482444801612e-05, "loss": 0.6487, "step": 16070 }, { "epoch": 1.88, "grad_norm": 0.9271339272446701, "learning_rate": 2.7389064836398303e-05, "loss": 0.6285, "step": 16075 }, { "epoch": 1.88, "grad_norm": 0.9044821352559754, "learning_rate": 2.7377646725071827e-05, "loss": 0.5975, "step": 16080 }, { "epoch": 1.88, "grad_norm": 0.8673870922871352, "learning_rate": 2.73662281132258e-05, "loss": 0.51, "step": 16085 }, { "epoch": 1.88, "grad_norm": 0.8562655756504537, "learning_rate": 2.735480900326397e-05, "loss": 0.5777, "step": 16090 }, { "epoch": 1.88, "grad_norm": 1.0837876640952775, "learning_rate": 2.7343389397590184e-05, "loss": 0.5815, "step": 16095 }, { "epoch": 1.88, "grad_norm": 0.9740839357332823, "learning_rate": 2.7331969298608378e-05, "loss": 0.6277, "step": 16100 }, { "epoch": 1.88, "grad_norm": 0.9668206650183705, "learning_rate": 2.732054870872261e-05, "loss": 0.5675, "step": 16105 }, { "epoch": 1.88, "grad_norm": 0.9321700443566489, "learning_rate": 2.7309127630337033e-05, "loss": 0.6736, "step": 16110 }, { "epoch": 1.88, "grad_norm": 0.850400796749632, "learning_rate": 2.7297706065855898e-05, "loss": 0.59, "step": 16115 }, { "epoch": 1.88, "grad_norm": 0.88337331368329, "learning_rate": 2.7286284017683567e-05, "loss": 0.6245, "step": 16120 }, { "epoch": 1.88, "grad_norm": 0.9228023934065946, "learning_rate": 2.727486148822451e-05, "loss": 0.6126, "step": 16125 }, { "epoch": 1.88, "grad_norm": 0.9297848327807844, "learning_rate": 2.7263438479883275e-05, "loss": 0.6041, "step": 16130 }, { "epoch": 1.89, "grad_norm": 0.8897542897390996, "learning_rate": 2.7252014995064522e-05, "loss": 0.6164, "step": 16135 }, { "epoch": 1.89, "grad_norm": 0.8354749432178362, "learning_rate": 2.724059103617304e-05, "loss": 0.6245, "step": 16140 }, { "epoch": 1.89, "grad_norm": 0.9522027141721513, "learning_rate": 2.7229166605613664e-05, "loss": 0.6494, "step": 16145 }, { "epoch": 1.89, "grad_norm": 0.8991223295241942, "learning_rate": 2.7217741705791373e-05, "loss": 0.6171, "step": 16150 }, { "epoch": 1.89, "grad_norm": 0.8626663455817438, "learning_rate": 2.720631633911122e-05, "loss": 0.5996, "step": 16155 }, { "epoch": 1.89, "grad_norm": 0.89915405906176, "learning_rate": 2.7194890507978366e-05, "loss": 0.5941, "step": 16160 }, { "epoch": 1.89, "grad_norm": 0.9764651439984724, "learning_rate": 2.7183464214798065e-05, "loss": 0.5903, "step": 16165 }, { "epoch": 1.89, "grad_norm": 0.8043282982413243, "learning_rate": 2.717203746197568e-05, "loss": 0.5275, "step": 16170 }, { "epoch": 1.89, "grad_norm": 0.8424033337242354, "learning_rate": 2.7160610251916658e-05, "loss": 0.6002, "step": 16175 }, { "epoch": 1.89, "grad_norm": 0.8392593205409246, "learning_rate": 2.714918258702654e-05, "loss": 0.5489, "step": 16180 }, { "epoch": 1.89, "grad_norm": 0.8555816970175268, "learning_rate": 2.7137754469710974e-05, "loss": 0.5691, "step": 16185 }, { "epoch": 1.89, "grad_norm": 0.9432108375139995, "learning_rate": 2.712632590237571e-05, "loss": 0.6494, "step": 16190 }, { "epoch": 1.89, "grad_norm": 0.9629151410031676, "learning_rate": 2.7114896887426565e-05, "loss": 0.6422, "step": 16195 }, { "epoch": 1.89, "grad_norm": 0.8760125629876795, "learning_rate": 2.7103467427269462e-05, "loss": 0.6639, "step": 16200 }, { "epoch": 1.89, "grad_norm": 1.0141312487457848, "learning_rate": 2.709203752431045e-05, "loss": 0.559, "step": 16205 }, { "epoch": 1.89, "grad_norm": 0.9715762700049224, "learning_rate": 2.7080607180955615e-05, "loss": 0.6759, "step": 16210 }, { "epoch": 1.89, "grad_norm": 0.9885706554451825, "learning_rate": 2.7069176399611167e-05, "loss": 0.636, "step": 16215 }, { "epoch": 1.9, "grad_norm": 0.9214957989058412, "learning_rate": 2.7057745182683424e-05, "loss": 0.6248, "step": 16220 }, { "epoch": 1.9, "grad_norm": 0.8412140512772429, "learning_rate": 2.7046313532578767e-05, "loss": 0.6241, "step": 16225 }, { "epoch": 1.9, "grad_norm": 0.8698811885760802, "learning_rate": 2.703488145170367e-05, "loss": 0.5535, "step": 16230 }, { "epoch": 1.9, "grad_norm": 0.8648144351536741, "learning_rate": 2.7023448942464715e-05, "loss": 0.6362, "step": 16235 }, { "epoch": 1.9, "grad_norm": 0.8774876929432863, "learning_rate": 2.7012016007268558e-05, "loss": 0.5914, "step": 16240 }, { "epoch": 1.9, "grad_norm": 0.887143315732413, "learning_rate": 2.700058264852196e-05, "loss": 0.6272, "step": 16245 }, { "epoch": 1.9, "grad_norm": 0.935063005758012, "learning_rate": 2.6989148868631752e-05, "loss": 0.629, "step": 16250 }, { "epoch": 1.9, "grad_norm": 1.0821960174018377, "learning_rate": 2.6977714670004883e-05, "loss": 0.6524, "step": 16255 }, { "epoch": 1.9, "grad_norm": 0.8373458304438235, "learning_rate": 2.6966280055048348e-05, "loss": 0.6123, "step": 16260 }, { "epoch": 1.9, "grad_norm": 0.9265005171783958, "learning_rate": 2.6954845026169274e-05, "loss": 0.669, "step": 16265 }, { "epoch": 1.9, "grad_norm": 0.9390398529339612, "learning_rate": 2.6943409585774842e-05, "loss": 0.6138, "step": 16270 }, { "epoch": 1.9, "grad_norm": 0.8827613329609988, "learning_rate": 2.6931973736272332e-05, "loss": 0.6328, "step": 16275 }, { "epoch": 1.9, "grad_norm": 0.9182019884321903, "learning_rate": 2.6920537480069115e-05, "loss": 0.6652, "step": 16280 }, { "epoch": 1.9, "grad_norm": 0.9856380537138498, "learning_rate": 2.690910081957264e-05, "loss": 0.6202, "step": 16285 }, { "epoch": 1.9, "grad_norm": 0.8983374203247493, "learning_rate": 2.689766375719045e-05, "loss": 0.5754, "step": 16290 }, { "epoch": 1.9, "grad_norm": 0.8701257426198524, "learning_rate": 2.688622629533015e-05, "loss": 0.6083, "step": 16295 }, { "epoch": 1.9, "grad_norm": 0.8944257789411685, "learning_rate": 2.6874788436399468e-05, "loss": 0.6473, "step": 16300 }, { "epoch": 1.91, "grad_norm": 0.7986303831329686, "learning_rate": 2.686335018280618e-05, "loss": 0.6428, "step": 16305 }, { "epoch": 1.91, "grad_norm": 0.8097106579939339, "learning_rate": 2.6851911536958153e-05, "loss": 0.6428, "step": 16310 }, { "epoch": 1.91, "grad_norm": 0.830689177166533, "learning_rate": 2.6840472501263353e-05, "loss": 0.6745, "step": 16315 }, { "epoch": 1.91, "grad_norm": 0.8298203205638941, "learning_rate": 2.6829033078129818e-05, "loss": 0.5736, "step": 16320 }, { "epoch": 1.91, "grad_norm": 0.9171280769701649, "learning_rate": 2.6817593269965656e-05, "loss": 0.6021, "step": 16325 }, { "epoch": 1.91, "grad_norm": 0.8516406706788282, "learning_rate": 2.680615307917907e-05, "loss": 0.6042, "step": 16330 }, { "epoch": 1.91, "grad_norm": 0.9653169827711248, "learning_rate": 2.6794712508178344e-05, "loss": 0.6341, "step": 16335 }, { "epoch": 1.91, "grad_norm": 1.056215567765017, "learning_rate": 2.6783271559371836e-05, "loss": 0.5396, "step": 16340 }, { "epoch": 1.91, "grad_norm": 0.8384006411542563, "learning_rate": 2.6771830235167977e-05, "loss": 0.607, "step": 16345 }, { "epoch": 1.91, "grad_norm": 1.3336453249967208, "learning_rate": 2.6760388537975307e-05, "loss": 0.6252, "step": 16350 }, { "epoch": 1.91, "grad_norm": 0.8887856382899293, "learning_rate": 2.6748946470202402e-05, "loss": 0.6445, "step": 16355 }, { "epoch": 1.91, "grad_norm": 0.8663139256509812, "learning_rate": 2.673750403425795e-05, "loss": 0.5686, "step": 16360 }, { "epoch": 1.91, "grad_norm": 0.8246043116283, "learning_rate": 2.672606123255069e-05, "loss": 0.5149, "step": 16365 }, { "epoch": 1.91, "grad_norm": 0.8532783920532206, "learning_rate": 2.671461806748947e-05, "loss": 0.6382, "step": 16370 }, { "epoch": 1.91, "grad_norm": 0.9817389534989502, "learning_rate": 2.6703174541483177e-05, "loss": 0.6358, "step": 16375 }, { "epoch": 1.91, "grad_norm": 0.8444300865838213, "learning_rate": 2.6691730656940806e-05, "loss": 0.5604, "step": 16380 }, { "epoch": 1.91, "grad_norm": 0.9282467998731498, "learning_rate": 2.6680286416271422e-05, "loss": 0.5599, "step": 16385 }, { "epoch": 1.91, "grad_norm": 0.8245567772369279, "learning_rate": 2.6668841821884126e-05, "loss": 0.5736, "step": 16390 }, { "epoch": 1.92, "grad_norm": 0.9371113506022917, "learning_rate": 2.6657396876188162e-05, "loss": 0.613, "step": 16395 }, { "epoch": 1.92, "grad_norm": 0.8300184279600211, "learning_rate": 2.6645951581592786e-05, "loss": 0.5858, "step": 16400 }, { "epoch": 1.92, "grad_norm": 0.9271838086884295, "learning_rate": 2.663450594050737e-05, "loss": 0.5869, "step": 16405 }, { "epoch": 1.92, "grad_norm": 0.9884078459708783, "learning_rate": 2.6623059955341323e-05, "loss": 0.6117, "step": 16410 }, { "epoch": 1.92, "grad_norm": 0.9399718188479353, "learning_rate": 2.6611613628504156e-05, "loss": 0.6711, "step": 16415 }, { "epoch": 1.92, "grad_norm": 0.8235430722960222, "learning_rate": 2.660016696240544e-05, "loss": 0.6408, "step": 16420 }, { "epoch": 1.92, "grad_norm": 0.8540579428831879, "learning_rate": 2.658871995945481e-05, "loss": 0.6056, "step": 16425 }, { "epoch": 1.92, "grad_norm": 0.846066542908944, "learning_rate": 2.6577272622061993e-05, "loss": 0.5666, "step": 16430 }, { "epoch": 1.92, "grad_norm": 0.8734639685391002, "learning_rate": 2.656582495263677e-05, "loss": 0.6051, "step": 16435 }, { "epoch": 1.92, "grad_norm": 1.0118201204888633, "learning_rate": 2.6554376953588993e-05, "loss": 0.6603, "step": 16440 }, { "epoch": 1.92, "grad_norm": 0.948139380146183, "learning_rate": 2.654292862732858e-05, "loss": 0.5653, "step": 16445 }, { "epoch": 1.92, "grad_norm": 1.046031784317703, "learning_rate": 2.6531479976265534e-05, "loss": 0.6146, "step": 16450 }, { "epoch": 1.92, "grad_norm": 0.7937036579389388, "learning_rate": 2.6520031002809908e-05, "loss": 0.5019, "step": 16455 }, { "epoch": 1.92, "grad_norm": 0.8335148953635896, "learning_rate": 2.6508581709371828e-05, "loss": 0.5851, "step": 16460 }, { "epoch": 1.92, "grad_norm": 0.7570566263529973, "learning_rate": 2.6497132098361506e-05, "loss": 0.5716, "step": 16465 }, { "epoch": 1.92, "grad_norm": 0.9469409523832679, "learning_rate": 2.648568217218919e-05, "loss": 0.6053, "step": 16470 }, { "epoch": 1.92, "grad_norm": 0.8349934403837173, "learning_rate": 2.6474231933265214e-05, "loss": 0.5199, "step": 16475 }, { "epoch": 1.93, "grad_norm": 0.9583480579930664, "learning_rate": 2.646278138399997e-05, "loss": 0.6359, "step": 16480 }, { "epoch": 1.93, "grad_norm": 0.9462513146533498, "learning_rate": 2.645133052680393e-05, "loss": 0.7017, "step": 16485 }, { "epoch": 1.93, "grad_norm": 0.8753439140524353, "learning_rate": 2.6439879364087612e-05, "loss": 0.6299, "step": 16490 }, { "epoch": 1.93, "grad_norm": 0.8437288100362328, "learning_rate": 2.6428427898261595e-05, "loss": 0.5901, "step": 16495 }, { "epoch": 1.93, "grad_norm": 0.9124240236022785, "learning_rate": 2.6416976131736553e-05, "loss": 0.6147, "step": 16500 }, { "epoch": 1.93, "grad_norm": 1.0390665817586586, "learning_rate": 2.6405524066923188e-05, "loss": 0.6224, "step": 16505 }, { "epoch": 1.93, "grad_norm": 1.0416717994843472, "learning_rate": 2.6394071706232286e-05, "loss": 0.6315, "step": 16510 }, { "epoch": 1.93, "grad_norm": 0.7920638661302623, "learning_rate": 2.6382619052074687e-05, "loss": 0.5539, "step": 16515 }, { "epoch": 1.93, "grad_norm": 0.9084293560134566, "learning_rate": 2.637116610686129e-05, "loss": 0.5834, "step": 16520 }, { "epoch": 1.93, "grad_norm": 0.8316448292525853, "learning_rate": 2.6359712873003066e-05, "loss": 0.6082, "step": 16525 }, { "epoch": 1.93, "grad_norm": 0.9500532616079839, "learning_rate": 2.6348259352911047e-05, "loss": 0.5725, "step": 16530 }, { "epoch": 1.93, "grad_norm": 0.8984691047263826, "learning_rate": 2.6336805548996306e-05, "loss": 0.6271, "step": 16535 }, { "epoch": 1.93, "grad_norm": 0.8282147108429584, "learning_rate": 2.632535146366999e-05, "loss": 0.5914, "step": 16540 }, { "epoch": 1.93, "grad_norm": 0.8506475092981782, "learning_rate": 2.631389709934331e-05, "loss": 0.5297, "step": 16545 }, { "epoch": 1.93, "grad_norm": 0.8163470172035223, "learning_rate": 2.6302442458427535e-05, "loss": 0.6161, "step": 16550 }, { "epoch": 1.93, "grad_norm": 0.8843141338832471, "learning_rate": 2.6290987543333967e-05, "loss": 0.6149, "step": 16555 }, { "epoch": 1.93, "grad_norm": 0.8110236544239569, "learning_rate": 2.6279532356474008e-05, "loss": 0.5899, "step": 16560 }, { "epoch": 1.94, "grad_norm": 0.89710013290311, "learning_rate": 2.6268076900259083e-05, "loss": 0.6152, "step": 16565 }, { "epoch": 1.94, "grad_norm": 0.8453458929522863, "learning_rate": 2.6256621177100687e-05, "loss": 0.5538, "step": 16570 }, { "epoch": 1.94, "grad_norm": 0.9995764988508382, "learning_rate": 2.624516518941037e-05, "loss": 0.5506, "step": 16575 }, { "epoch": 1.94, "grad_norm": 0.7985082591789547, "learning_rate": 2.6233708939599738e-05, "loss": 0.6672, "step": 16580 }, { "epoch": 1.94, "grad_norm": 0.7951410022637205, "learning_rate": 2.622225243008045e-05, "loss": 0.5468, "step": 16585 }, { "epoch": 1.94, "grad_norm": 0.925844545735007, "learning_rate": 2.621079566326422e-05, "loss": 0.5619, "step": 16590 }, { "epoch": 1.94, "grad_norm": 0.8973457390987843, "learning_rate": 2.6199338641562825e-05, "loss": 0.5496, "step": 16595 }, { "epoch": 1.94, "grad_norm": 0.9555595312502254, "learning_rate": 2.6187881367388078e-05, "loss": 0.572, "step": 16600 }, { "epoch": 1.94, "grad_norm": 0.858256456733164, "learning_rate": 2.6176423843151865e-05, "loss": 0.5845, "step": 16605 }, { "epoch": 1.94, "grad_norm": 1.0504556295738423, "learning_rate": 2.6164966071266104e-05, "loss": 0.6216, "step": 16610 }, { "epoch": 1.94, "grad_norm": 1.0106378262720044, "learning_rate": 2.6153508054142795e-05, "loss": 0.561, "step": 16615 }, { "epoch": 1.94, "grad_norm": 0.8001979265884642, "learning_rate": 2.6142049794193947e-05, "loss": 0.5893, "step": 16620 }, { "epoch": 1.94, "grad_norm": 0.8868633271089033, "learning_rate": 2.6130591293831652e-05, "loss": 0.6672, "step": 16625 }, { "epoch": 1.94, "grad_norm": 0.8552408210797487, "learning_rate": 2.6119132555468055e-05, "loss": 0.6252, "step": 16630 }, { "epoch": 1.94, "grad_norm": 1.0055482549177566, "learning_rate": 2.610767358151533e-05, "loss": 0.5638, "step": 16635 }, { "epoch": 1.94, "grad_norm": 0.8464189486452749, "learning_rate": 2.6096214374385707e-05, "loss": 0.6157, "step": 16640 }, { "epoch": 1.94, "grad_norm": 0.8884619050402282, "learning_rate": 2.6084754936491484e-05, "loss": 0.5963, "step": 16645 }, { "epoch": 1.95, "grad_norm": 0.8305666401989756, "learning_rate": 2.6073295270244984e-05, "loss": 0.6318, "step": 16650 }, { "epoch": 1.95, "grad_norm": 0.8718791739176128, "learning_rate": 2.606183537805858e-05, "loss": 0.5727, "step": 16655 }, { "epoch": 1.95, "grad_norm": 0.8404766048331345, "learning_rate": 2.6050375262344707e-05, "loss": 0.5444, "step": 16660 }, { "epoch": 1.95, "grad_norm": 0.804592121241699, "learning_rate": 2.6038914925515846e-05, "loss": 0.5891, "step": 16665 }, { "epoch": 1.95, "grad_norm": 0.8035335923587422, "learning_rate": 2.6027454369984498e-05, "loss": 0.5794, "step": 16670 }, { "epoch": 1.95, "grad_norm": 0.9228027019338096, "learning_rate": 2.6015993598163257e-05, "loss": 0.5544, "step": 16675 }, { "epoch": 1.95, "grad_norm": 0.8365424636355522, "learning_rate": 2.6004532612464715e-05, "loss": 0.6053, "step": 16680 }, { "epoch": 1.95, "grad_norm": 0.8551052746039735, "learning_rate": 2.5993071415301536e-05, "loss": 0.5737, "step": 16685 }, { "epoch": 1.95, "grad_norm": 0.9170677809172477, "learning_rate": 2.5981610009086417e-05, "loss": 0.5848, "step": 16690 }, { "epoch": 1.95, "grad_norm": 0.8602008361428335, "learning_rate": 2.5970148396232115e-05, "loss": 0.651, "step": 16695 }, { "epoch": 1.95, "grad_norm": 0.9653960906626363, "learning_rate": 2.5958686579151413e-05, "loss": 0.5969, "step": 16700 }, { "epoch": 1.95, "grad_norm": 0.8434143686757325, "learning_rate": 2.594722456025714e-05, "loss": 0.6052, "step": 16705 }, { "epoch": 1.95, "grad_norm": 0.8422190248696347, "learning_rate": 2.593576234196218e-05, "loss": 0.5969, "step": 16710 }, { "epoch": 1.95, "grad_norm": 0.9962820126693613, "learning_rate": 2.592429992667944e-05, "loss": 0.6164, "step": 16715 }, { "epoch": 1.95, "grad_norm": 0.7985716172985903, "learning_rate": 2.5912837316821888e-05, "loss": 0.5559, "step": 16720 }, { "epoch": 1.95, "grad_norm": 0.817605352178141, "learning_rate": 2.5901374514802513e-05, "loss": 0.5903, "step": 16725 }, { "epoch": 1.95, "grad_norm": 0.9248492532734429, "learning_rate": 2.5889911523034365e-05, "loss": 0.6226, "step": 16730 }, { "epoch": 1.96, "grad_norm": 1.2289085615072273, "learning_rate": 2.5878448343930516e-05, "loss": 0.609, "step": 16735 }, { "epoch": 1.96, "grad_norm": 0.7378848106477746, "learning_rate": 2.586698497990409e-05, "loss": 0.563, "step": 16740 }, { "epoch": 1.96, "grad_norm": 1.0005531804580519, "learning_rate": 2.585552143336825e-05, "loss": 0.5924, "step": 16745 }, { "epoch": 1.96, "grad_norm": 0.768968006608705, "learning_rate": 2.584405770673618e-05, "loss": 0.6359, "step": 16750 }, { "epoch": 1.96, "grad_norm": 0.8862142709284415, "learning_rate": 2.583259380242113e-05, "loss": 0.5704, "step": 16755 }, { "epoch": 1.96, "grad_norm": 0.9621465603691413, "learning_rate": 2.5821129722836357e-05, "loss": 0.6572, "step": 16760 }, { "epoch": 1.96, "grad_norm": 0.9006019843685333, "learning_rate": 2.5809665470395172e-05, "loss": 0.6479, "step": 16765 }, { "epoch": 1.96, "grad_norm": 0.9092680938267377, "learning_rate": 2.5798201047510928e-05, "loss": 0.5876, "step": 16770 }, { "epoch": 1.96, "grad_norm": 0.9391622878245258, "learning_rate": 2.5786736456597e-05, "loss": 0.5706, "step": 16775 }, { "epoch": 1.96, "grad_norm": 0.9431639716438284, "learning_rate": 2.5775271700066816e-05, "loss": 0.6805, "step": 16780 }, { "epoch": 1.96, "grad_norm": 0.9030138272808294, "learning_rate": 2.576380678033381e-05, "loss": 0.6415, "step": 16785 }, { "epoch": 1.96, "grad_norm": 0.7992925270642898, "learning_rate": 2.575234169981148e-05, "loss": 0.5662, "step": 16790 }, { "epoch": 1.96, "grad_norm": 1.120085046764829, "learning_rate": 2.5740876460913345e-05, "loss": 0.6798, "step": 16795 }, { "epoch": 1.96, "grad_norm": 0.9397953257168703, "learning_rate": 2.572941106605294e-05, "loss": 0.5723, "step": 16800 }, { "epoch": 1.96, "grad_norm": 0.8598085551464412, "learning_rate": 2.5717945517643877e-05, "loss": 0.6011, "step": 16805 }, { "epoch": 1.96, "grad_norm": 0.868106234903999, "learning_rate": 2.570647981809976e-05, "loss": 0.6699, "step": 16810 }, { "epoch": 1.96, "grad_norm": 0.8118746870352042, "learning_rate": 2.5695013969834242e-05, "loss": 0.5899, "step": 16815 }, { "epoch": 1.97, "grad_norm": 0.8880877589467654, "learning_rate": 2.5683547975260997e-05, "loss": 0.6217, "step": 16820 }, { "epoch": 1.97, "grad_norm": 0.915873503949781, "learning_rate": 2.5672081836793748e-05, "loss": 0.5892, "step": 16825 }, { "epoch": 1.97, "grad_norm": 0.856335943551882, "learning_rate": 2.5660615556846236e-05, "loss": 0.5516, "step": 16830 }, { "epoch": 1.97, "grad_norm": 0.8286314251356766, "learning_rate": 2.564914913783222e-05, "loss": 0.519, "step": 16835 }, { "epoch": 1.97, "grad_norm": 0.7498305594885852, "learning_rate": 2.563768258216552e-05, "loss": 0.5765, "step": 16840 }, { "epoch": 1.97, "grad_norm": 0.8991917670335136, "learning_rate": 2.5626215892259953e-05, "loss": 0.6489, "step": 16845 }, { "epoch": 1.97, "grad_norm": 0.8736694743046873, "learning_rate": 2.5614749070529382e-05, "loss": 0.5791, "step": 16850 }, { "epoch": 1.97, "grad_norm": 0.904213413843636, "learning_rate": 2.5603282119387685e-05, "loss": 0.5843, "step": 16855 }, { "epoch": 1.97, "grad_norm": 0.7930718957465704, "learning_rate": 2.5591815041248796e-05, "loss": 0.6205, "step": 16860 }, { "epoch": 1.97, "grad_norm": 0.8974345724699936, "learning_rate": 2.558034783852663e-05, "loss": 0.5887, "step": 16865 }, { "epoch": 1.97, "grad_norm": 0.9841381922253502, "learning_rate": 2.5568880513635162e-05, "loss": 0.6442, "step": 16870 }, { "epoch": 1.97, "grad_norm": 0.9042253499119333, "learning_rate": 2.55574130689884e-05, "loss": 0.655, "step": 16875 }, { "epoch": 1.97, "grad_norm": 0.7442416514466498, "learning_rate": 2.554594550700034e-05, "loss": 0.5884, "step": 16880 }, { "epoch": 1.97, "grad_norm": 1.2975522908602373, "learning_rate": 2.5534477830085036e-05, "loss": 0.5685, "step": 16885 }, { "epoch": 1.97, "grad_norm": 0.8543090316384669, "learning_rate": 2.552301004065654e-05, "loss": 0.6303, "step": 16890 }, { "epoch": 1.97, "grad_norm": 0.9595412894206504, "learning_rate": 2.551154214112897e-05, "loss": 0.5736, "step": 16895 }, { "epoch": 1.97, "grad_norm": 0.9529149076817772, "learning_rate": 2.5500074133916406e-05, "loss": 0.6283, "step": 16900 }, { "epoch": 1.98, "grad_norm": 0.8358242303560207, "learning_rate": 2.5488606021433005e-05, "loss": 0.6515, "step": 16905 }, { "epoch": 1.98, "grad_norm": 0.8982360038691455, "learning_rate": 2.5477137806092923e-05, "loss": 0.5438, "step": 16910 }, { "epoch": 1.98, "grad_norm": 0.7951213507262371, "learning_rate": 2.546566949031032e-05, "loss": 0.5642, "step": 16915 }, { "epoch": 1.98, "grad_norm": 0.8782406187411251, "learning_rate": 2.5454201076499422e-05, "loss": 0.6379, "step": 16920 }, { "epoch": 1.98, "grad_norm": 0.9153368453589131, "learning_rate": 2.544273256707444e-05, "loss": 0.6252, "step": 16925 }, { "epoch": 1.98, "grad_norm": 0.8148545242138204, "learning_rate": 2.5431263964449614e-05, "loss": 0.563, "step": 16930 }, { "epoch": 1.98, "grad_norm": 0.7197345301449499, "learning_rate": 2.5419795271039203e-05, "loss": 0.6045, "step": 16935 }, { "epoch": 1.98, "grad_norm": 0.8393789454018323, "learning_rate": 2.5408326489257496e-05, "loss": 0.6014, "step": 16940 }, { "epoch": 1.98, "grad_norm": 0.7903400215076338, "learning_rate": 2.5396857621518783e-05, "loss": 0.6736, "step": 16945 }, { "epoch": 1.98, "grad_norm": 0.8782324369881968, "learning_rate": 2.5385388670237374e-05, "loss": 0.6072, "step": 16950 }, { "epoch": 1.98, "grad_norm": 0.7465079656224758, "learning_rate": 2.5373919637827627e-05, "loss": 0.5816, "step": 16955 }, { "epoch": 1.98, "grad_norm": 0.8933501622674589, "learning_rate": 2.5362450526703874e-05, "loss": 0.6506, "step": 16960 }, { "epoch": 1.98, "grad_norm": 0.7706031489737, "learning_rate": 2.535098133928049e-05, "loss": 0.6377, "step": 16965 }, { "epoch": 1.98, "grad_norm": 0.852046615275629, "learning_rate": 2.533951207797185e-05, "loss": 0.5703, "step": 16970 }, { "epoch": 1.98, "grad_norm": 0.8156620801240052, "learning_rate": 2.532804274519237e-05, "loss": 0.6108, "step": 16975 }, { "epoch": 1.98, "grad_norm": 1.0297750440049749, "learning_rate": 2.5316573343356454e-05, "loss": 0.6874, "step": 16980 }, { "epoch": 1.98, "grad_norm": 0.8602869864432551, "learning_rate": 2.5305103874878533e-05, "loss": 0.6578, "step": 16985 }, { "epoch": 1.99, "grad_norm": 1.0371792763361753, "learning_rate": 2.529363434217305e-05, "loss": 0.6033, "step": 16990 }, { "epoch": 1.99, "grad_norm": 0.9238445910411287, "learning_rate": 2.5282164747654463e-05, "loss": 0.6279, "step": 16995 }, { "epoch": 1.99, "grad_norm": 0.9401134678293528, "learning_rate": 2.5270695093737235e-05, "loss": 0.6026, "step": 17000 }, { "epoch": 1.99, "grad_norm": 0.9249105829062576, "learning_rate": 2.5259225382835862e-05, "loss": 0.5666, "step": 17005 }, { "epoch": 1.99, "grad_norm": 1.0329416712213435, "learning_rate": 2.5247755617364828e-05, "loss": 0.5789, "step": 17010 }, { "epoch": 1.99, "grad_norm": 0.8996205439380367, "learning_rate": 2.523628579973864e-05, "loss": 0.6134, "step": 17015 }, { "epoch": 1.99, "grad_norm": 0.996982043749138, "learning_rate": 2.5224815932371815e-05, "loss": 0.5876, "step": 17020 }, { "epoch": 1.99, "grad_norm": 0.9602185979829699, "learning_rate": 2.521334601767889e-05, "loss": 0.5928, "step": 17025 }, { "epoch": 1.99, "grad_norm": 0.9207581679596757, "learning_rate": 2.5201876058074386e-05, "loss": 0.6419, "step": 17030 }, { "epoch": 1.99, "grad_norm": 0.9976838175883397, "learning_rate": 2.5190406055972854e-05, "loss": 0.5844, "step": 17035 }, { "epoch": 1.99, "grad_norm": 0.908672282345896, "learning_rate": 2.5178936013788862e-05, "loss": 0.6398, "step": 17040 }, { "epoch": 1.99, "grad_norm": 0.9513677094943923, "learning_rate": 2.5167465933936956e-05, "loss": 0.6254, "step": 17045 }, { "epoch": 1.99, "grad_norm": 0.8867481863888266, "learning_rate": 2.5155995818831725e-05, "loss": 0.6178, "step": 17050 }, { "epoch": 1.99, "grad_norm": 0.9079319261230471, "learning_rate": 2.514452567088773e-05, "loss": 0.6834, "step": 17055 }, { "epoch": 1.99, "grad_norm": 0.9006898502150343, "learning_rate": 2.5133055492519574e-05, "loss": 0.623, "step": 17060 }, { "epoch": 1.99, "grad_norm": 0.8621736262313544, "learning_rate": 2.512158528614184e-05, "loss": 0.5758, "step": 17065 }, { "epoch": 1.99, "grad_norm": 0.9459559248324776, "learning_rate": 2.5110115054169136e-05, "loss": 0.6371, "step": 17070 }, { "epoch": 1.99, "grad_norm": 1.0604462835247377, "learning_rate": 2.5098644799016053e-05, "loss": 0.578, "step": 17075 }, { "epoch": 2.0, "grad_norm": 0.9134108857072495, "learning_rate": 2.5087174523097206e-05, "loss": 0.5858, "step": 17080 }, { "epoch": 2.0, "grad_norm": 1.0205067462570563, "learning_rate": 2.5075704228827213e-05, "loss": 0.5686, "step": 17085 }, { "epoch": 2.0, "grad_norm": 0.9497294364292642, "learning_rate": 2.506423391862069e-05, "loss": 0.6471, "step": 17090 }, { "epoch": 2.0, "grad_norm": 0.8423440191690329, "learning_rate": 2.505276359489225e-05, "loss": 0.5846, "step": 17095 }, { "epoch": 2.0, "grad_norm": 0.904922017254225, "learning_rate": 2.5041293260056514e-05, "loss": 0.5598, "step": 17100 }, { "epoch": 2.0, "grad_norm": 0.9168715776753865, "learning_rate": 2.5029822916528124e-05, "loss": 0.6542, "step": 17105 }, { "epoch": 2.0, "grad_norm": 0.8420788907513983, "learning_rate": 2.501835256672169e-05, "loss": 0.5946, "step": 17110 }, { "epoch": 2.0, "grad_norm": 0.8939470086470097, "learning_rate": 2.5006882213051853e-05, "loss": 0.6605, "step": 17115 }, { "epoch": 2.0, "grad_norm": 0.8333788677494608, "learning_rate": 2.4995411857933236e-05, "loss": 0.5478, "step": 17120 }, { "epoch": 2.0, "grad_norm": 0.9603733468073307, "learning_rate": 2.4983941503780474e-05, "loss": 0.3922, "step": 17125 }, { "epoch": 2.0, "grad_norm": 0.8859297788558389, "learning_rate": 2.497247115300819e-05, "loss": 0.3093, "step": 17130 }, { "epoch": 2.0, "grad_norm": 1.0192122631091693, "learning_rate": 2.4961000808031028e-05, "loss": 0.3381, "step": 17135 }, { "epoch": 2.0, "grad_norm": 0.9176969557641856, "learning_rate": 2.4949530471263593e-05, "loss": 0.3637, "step": 17140 }, { "epoch": 2.0, "grad_norm": 1.0327964684954671, "learning_rate": 2.4938060145120523e-05, "loss": 0.3189, "step": 17145 }, { "epoch": 2.0, "grad_norm": 0.9816091433877076, "learning_rate": 2.4926589832016438e-05, "loss": 0.3442, "step": 17150 }, { "epoch": 2.0, "grad_norm": 0.9140664639954762, "learning_rate": 2.4915119534365968e-05, "loss": 0.3332, "step": 17155 }, { "epoch": 2.0, "grad_norm": 0.9379566439341127, "learning_rate": 2.4903649254583732e-05, "loss": 0.3005, "step": 17160 }, { "epoch": 2.01, "grad_norm": 0.9916249328406576, "learning_rate": 2.4892178995084327e-05, "loss": 0.3448, "step": 17165 }, { "epoch": 2.01, "grad_norm": 0.8194002451321004, "learning_rate": 2.4880708758282376e-05, "loss": 0.2776, "step": 17170 }, { "epoch": 2.01, "grad_norm": 0.8248128527151911, "learning_rate": 2.4869238546592478e-05, "loss": 0.3432, "step": 17175 }, { "epoch": 2.01, "grad_norm": 1.0940180123900358, "learning_rate": 2.485776836242923e-05, "loss": 0.3571, "step": 17180 }, { "epoch": 2.01, "grad_norm": 0.8077867248896436, "learning_rate": 2.484629820820725e-05, "loss": 0.3184, "step": 17185 }, { "epoch": 2.01, "grad_norm": 0.8799185092935281, "learning_rate": 2.48348280863411e-05, "loss": 0.3134, "step": 17190 }, { "epoch": 2.01, "grad_norm": 0.8492333707808328, "learning_rate": 2.482335799924536e-05, "loss": 0.2756, "step": 17195 }, { "epoch": 2.01, "grad_norm": 0.9653531459324595, "learning_rate": 2.481188794933462e-05, "loss": 0.3286, "step": 17200 }, { "epoch": 2.01, "grad_norm": 0.9910112047087751, "learning_rate": 2.4800417939023436e-05, "loss": 0.3609, "step": 17205 }, { "epoch": 2.01, "grad_norm": 1.060884259039683, "learning_rate": 2.4788947970726362e-05, "loss": 0.3572, "step": 17210 }, { "epoch": 2.01, "grad_norm": 0.8864800880756659, "learning_rate": 2.477747804685795e-05, "loss": 0.2775, "step": 17215 }, { "epoch": 2.01, "grad_norm": 0.949222088740071, "learning_rate": 2.476600816983274e-05, "loss": 0.2796, "step": 17220 }, { "epoch": 2.01, "grad_norm": 0.9699748257748605, "learning_rate": 2.4754538342065262e-05, "loss": 0.3025, "step": 17225 }, { "epoch": 2.01, "grad_norm": 0.8788443815813005, "learning_rate": 2.4743068565970044e-05, "loss": 0.3457, "step": 17230 }, { "epoch": 2.01, "grad_norm": 1.1169666507318705, "learning_rate": 2.4731598843961572e-05, "loss": 0.3445, "step": 17235 }, { "epoch": 2.01, "grad_norm": 0.917127682219297, "learning_rate": 2.472012917845436e-05, "loss": 0.3034, "step": 17240 }, { "epoch": 2.01, "grad_norm": 0.953704266404627, "learning_rate": 2.4708659571862887e-05, "loss": 0.2491, "step": 17245 }, { "epoch": 2.02, "grad_norm": 1.0856311027224432, "learning_rate": 2.4697190026601623e-05, "loss": 0.3461, "step": 17250 }, { "epoch": 2.02, "grad_norm": 0.8834116242457338, "learning_rate": 2.4685720545085042e-05, "loss": 0.3397, "step": 17255 }, { "epoch": 2.02, "grad_norm": 0.9189022126420227, "learning_rate": 2.4674251129727566e-05, "loss": 0.309, "step": 17260 }, { "epoch": 2.02, "grad_norm": 1.0504422218122913, "learning_rate": 2.4662781782943638e-05, "loss": 0.336, "step": 17265 }, { "epoch": 2.02, "grad_norm": 0.8912952056533301, "learning_rate": 2.4651312507147684e-05, "loss": 0.315, "step": 17270 }, { "epoch": 2.02, "grad_norm": 0.878345353339824, "learning_rate": 2.4639843304754104e-05, "loss": 0.3055, "step": 17275 }, { "epoch": 2.02, "grad_norm": 0.9684809126947463, "learning_rate": 2.4628374178177272e-05, "loss": 0.343, "step": 17280 }, { "epoch": 2.02, "grad_norm": 0.9746897312903375, "learning_rate": 2.4616905129831572e-05, "loss": 0.2882, "step": 17285 }, { "epoch": 2.02, "grad_norm": 0.8404782475832167, "learning_rate": 2.4605436162131347e-05, "loss": 0.2955, "step": 17290 }, { "epoch": 2.02, "grad_norm": 0.8506945892178218, "learning_rate": 2.4593967277490946e-05, "loss": 0.4619, "step": 17295 }, { "epoch": 2.02, "grad_norm": 0.9541116771277358, "learning_rate": 2.4582498478324698e-05, "loss": 0.3051, "step": 17300 }, { "epoch": 2.02, "grad_norm": 0.8236177620344743, "learning_rate": 2.457102976704689e-05, "loss": 0.3451, "step": 17305 }, { "epoch": 2.02, "grad_norm": 0.9213990587629239, "learning_rate": 2.45595611460718e-05, "loss": 0.3607, "step": 17310 }, { "epoch": 2.02, "grad_norm": 0.8723357222497063, "learning_rate": 2.454809261781371e-05, "loss": 0.2929, "step": 17315 }, { "epoch": 2.02, "grad_norm": 0.9480733391434952, "learning_rate": 2.4536624184686847e-05, "loss": 0.3125, "step": 17320 }, { "epoch": 2.02, "grad_norm": 1.5675813618845456, "learning_rate": 2.4525155849105465e-05, "loss": 0.3024, "step": 17325 }, { "epoch": 2.02, "grad_norm": 0.9627573940108838, "learning_rate": 2.4513687613483734e-05, "loss": 0.3457, "step": 17330 }, { "epoch": 2.03, "grad_norm": 1.0243274837634995, "learning_rate": 2.4502219480235862e-05, "loss": 0.3384, "step": 17335 }, { "epoch": 2.03, "grad_norm": 0.8480022057754414, "learning_rate": 2.4490751451776002e-05, "loss": 0.3139, "step": 17340 }, { "epoch": 2.03, "grad_norm": 0.9903741283030525, "learning_rate": 2.4479283530518302e-05, "loss": 0.2594, "step": 17345 }, { "epoch": 2.03, "grad_norm": 1.0199372197175123, "learning_rate": 2.4467815718876863e-05, "loss": 0.3362, "step": 17350 }, { "epoch": 2.03, "grad_norm": 0.8618474272851386, "learning_rate": 2.4456348019265794e-05, "loss": 0.2858, "step": 17355 }, { "epoch": 2.03, "grad_norm": 0.9735975817160154, "learning_rate": 2.444488043409916e-05, "loss": 0.3503, "step": 17360 }, { "epoch": 2.03, "grad_norm": 0.8898212656121688, "learning_rate": 2.4433412965791014e-05, "loss": 0.3476, "step": 17365 }, { "epoch": 2.03, "grad_norm": 1.020482655923461, "learning_rate": 2.442194561675538e-05, "loss": 0.3517, "step": 17370 }, { "epoch": 2.03, "grad_norm": 0.8976920607162705, "learning_rate": 2.4410478389406236e-05, "loss": 0.3012, "step": 17375 }, { "epoch": 2.03, "grad_norm": 0.7375671295520114, "learning_rate": 2.4399011286157564e-05, "loss": 0.2654, "step": 17380 }, { "epoch": 2.03, "grad_norm": 0.8872065117202679, "learning_rate": 2.438754430942332e-05, "loss": 0.2868, "step": 17385 }, { "epoch": 2.03, "grad_norm": 0.9658662036254859, "learning_rate": 2.4376077461617407e-05, "loss": 0.2665, "step": 17390 }, { "epoch": 2.03, "grad_norm": 0.9271367226076082, "learning_rate": 2.4364610745153736e-05, "loss": 0.2678, "step": 17395 }, { "epoch": 2.03, "grad_norm": 0.8490318071965303, "learning_rate": 2.4353144162446153e-05, "loss": 0.2665, "step": 17400 }, { "epoch": 2.03, "grad_norm": 0.858582147563123, "learning_rate": 2.4341677715908494e-05, "loss": 0.3059, "step": 17405 }, { "epoch": 2.03, "grad_norm": 1.0230979388841956, "learning_rate": 2.4330211407954574e-05, "loss": 0.3552, "step": 17410 }, { "epoch": 2.03, "grad_norm": 0.988131308725531, "learning_rate": 2.4318745240998178e-05, "loss": 0.3, "step": 17415 }, { "epoch": 2.04, "grad_norm": 1.0460092365578237, "learning_rate": 2.430727921745304e-05, "loss": 0.3038, "step": 17420 }, { "epoch": 2.04, "grad_norm": 0.8903441366020896, "learning_rate": 2.4295813339732875e-05, "loss": 0.3011, "step": 17425 }, { "epoch": 2.04, "grad_norm": 0.9269108066605277, "learning_rate": 2.4284347610251384e-05, "loss": 0.303, "step": 17430 }, { "epoch": 2.04, "grad_norm": 0.8970700240366455, "learning_rate": 2.4272882031422215e-05, "loss": 0.324, "step": 17435 }, { "epoch": 2.04, "grad_norm": 0.8670441823301787, "learning_rate": 2.4261416605659005e-05, "loss": 0.3332, "step": 17440 }, { "epoch": 2.04, "grad_norm": 0.947934789143799, "learning_rate": 2.4249951335375322e-05, "loss": 0.3078, "step": 17445 }, { "epoch": 2.04, "grad_norm": 0.7845898370122005, "learning_rate": 2.423848622298475e-05, "loss": 0.3079, "step": 17450 }, { "epoch": 2.04, "grad_norm": 0.9041180105863424, "learning_rate": 2.4227021270900796e-05, "loss": 0.3561, "step": 17455 }, { "epoch": 2.04, "grad_norm": 0.9969265199985096, "learning_rate": 2.4215556481536962e-05, "loss": 0.3632, "step": 17460 }, { "epoch": 2.04, "grad_norm": 0.9843266685052504, "learning_rate": 2.4204091857306715e-05, "loss": 0.3265, "step": 17465 }, { "epoch": 2.04, "grad_norm": 1.0200931355251979, "learning_rate": 2.4192627400623467e-05, "loss": 0.2828, "step": 17470 }, { "epoch": 2.04, "grad_norm": 0.9250327050669283, "learning_rate": 2.41811631139006e-05, "loss": 0.3227, "step": 17475 }, { "epoch": 2.04, "grad_norm": 0.967646323264111, "learning_rate": 2.4169698999551483e-05, "loss": 0.3231, "step": 17480 }, { "epoch": 2.04, "grad_norm": 0.8174524454148893, "learning_rate": 2.4158235059989426e-05, "loss": 0.2788, "step": 17485 }, { "epoch": 2.04, "grad_norm": 0.8596024109022202, "learning_rate": 2.4146771297627697e-05, "loss": 0.3339, "step": 17490 }, { "epoch": 2.04, "grad_norm": 0.789800962791306, "learning_rate": 2.4135307714879546e-05, "loss": 0.4363, "step": 17495 }, { "epoch": 2.04, "grad_norm": 0.9778444661411706, "learning_rate": 2.4123844314158186e-05, "loss": 0.3007, "step": 17500 }, { "epoch": 2.05, "grad_norm": 0.8572374376686605, "learning_rate": 2.4112381097876772e-05, "loss": 0.2994, "step": 17505 }, { "epoch": 2.05, "grad_norm": 0.9458634889415379, "learning_rate": 2.4100918068448442e-05, "loss": 0.305, "step": 17510 }, { "epoch": 2.05, "grad_norm": 1.2084205887146289, "learning_rate": 2.408945522828627e-05, "loss": 0.2995, "step": 17515 }, { "epoch": 2.05, "grad_norm": 0.8589052641322348, "learning_rate": 2.4077992579803306e-05, "loss": 0.2545, "step": 17520 }, { "epoch": 2.05, "grad_norm": 1.0193983154870474, "learning_rate": 2.4066530125412565e-05, "loss": 0.2631, "step": 17525 }, { "epoch": 2.05, "grad_norm": 0.8330716395593406, "learning_rate": 2.4055067867527014e-05, "loss": 0.3248, "step": 17530 }, { "epoch": 2.05, "grad_norm": 0.8001572156382677, "learning_rate": 2.404360580855958e-05, "loss": 0.3142, "step": 17535 }, { "epoch": 2.05, "grad_norm": 0.9512966692156783, "learning_rate": 2.403214395092313e-05, "loss": 0.2876, "step": 17540 }, { "epoch": 2.05, "grad_norm": 0.8008460898710926, "learning_rate": 2.4020682297030524e-05, "loss": 0.3022, "step": 17545 }, { "epoch": 2.05, "grad_norm": 1.0463965729666374, "learning_rate": 2.4009220849294544e-05, "loss": 0.317, "step": 17550 }, { "epoch": 2.05, "grad_norm": 0.9652821189327888, "learning_rate": 2.3997759610127966e-05, "loss": 0.3073, "step": 17555 }, { "epoch": 2.05, "grad_norm": 0.8204171538106345, "learning_rate": 2.3986298581943482e-05, "loss": 0.294, "step": 17560 }, { "epoch": 2.05, "grad_norm": 0.8837246568376214, "learning_rate": 2.3974837767153767e-05, "loss": 0.3237, "step": 17565 }, { "epoch": 2.05, "grad_norm": 1.0141254398224684, "learning_rate": 2.3963377168171435e-05, "loss": 0.2863, "step": 17570 }, { "epoch": 2.05, "grad_norm": 1.0463994619453385, "learning_rate": 2.3951916787409065e-05, "loss": 0.3563, "step": 17575 }, { "epoch": 2.05, "grad_norm": 0.91538276156957, "learning_rate": 2.3940456627279202e-05, "loss": 0.2804, "step": 17580 }, { "epoch": 2.05, "grad_norm": 0.8199897389856885, "learning_rate": 2.3928996690194314e-05, "loss": 0.3123, "step": 17585 }, { "epoch": 2.06, "grad_norm": 0.9019888526692975, "learning_rate": 2.3917536978566835e-05, "loss": 0.323, "step": 17590 }, { "epoch": 2.06, "grad_norm": 0.9566856084089564, "learning_rate": 2.390607749480917e-05, "loss": 0.3045, "step": 17595 }, { "epoch": 2.06, "grad_norm": 0.933611932594224, "learning_rate": 2.3894618241333638e-05, "loss": 0.3521, "step": 17600 }, { "epoch": 2.06, "grad_norm": 0.7277770884507857, "learning_rate": 2.388315922055256e-05, "loss": 0.2754, "step": 17605 }, { "epoch": 2.06, "grad_norm": 0.739979901483226, "learning_rate": 2.3871700434878154e-05, "loss": 0.2781, "step": 17610 }, { "epoch": 2.06, "grad_norm": 0.9622292982299171, "learning_rate": 2.3860241886722632e-05, "loss": 0.3037, "step": 17615 }, { "epoch": 2.06, "grad_norm": 0.8778439527538725, "learning_rate": 2.3848783578498124e-05, "loss": 0.2683, "step": 17620 }, { "epoch": 2.06, "grad_norm": 0.9834064417215478, "learning_rate": 2.383732551261674e-05, "loss": 0.2865, "step": 17625 }, { "epoch": 2.06, "grad_norm": 0.8127924211541777, "learning_rate": 2.3825867691490513e-05, "loss": 0.295, "step": 17630 }, { "epoch": 2.06, "grad_norm": 1.0956783553251446, "learning_rate": 2.3814410117531433e-05, "loss": 0.3838, "step": 17635 }, { "epoch": 2.06, "grad_norm": 1.0508621690876265, "learning_rate": 2.3802952793151438e-05, "loss": 0.3265, "step": 17640 }, { "epoch": 2.06, "grad_norm": 0.9314797582825766, "learning_rate": 2.3791495720762426e-05, "loss": 0.2913, "step": 17645 }, { "epoch": 2.06, "grad_norm": 1.0664927404042088, "learning_rate": 2.3780038902776226e-05, "loss": 0.322, "step": 17650 }, { "epoch": 2.06, "grad_norm": 0.990039974345198, "learning_rate": 2.376858234160461e-05, "loss": 0.3088, "step": 17655 }, { "epoch": 2.06, "grad_norm": 0.9445338526591907, "learning_rate": 2.3757126039659314e-05, "loss": 0.3368, "step": 17660 }, { "epoch": 2.06, "grad_norm": 0.8944551492459483, "learning_rate": 2.3745669999352e-05, "loss": 0.3041, "step": 17665 }, { "epoch": 2.06, "grad_norm": 0.7361238350213304, "learning_rate": 2.3734214223094288e-05, "loss": 0.2971, "step": 17670 }, { "epoch": 2.07, "grad_norm": 0.9845168746919798, "learning_rate": 2.3722758713297756e-05, "loss": 0.2899, "step": 17675 }, { "epoch": 2.07, "grad_norm": 0.9908751426023606, "learning_rate": 2.3711303472373882e-05, "loss": 0.3371, "step": 17680 }, { "epoch": 2.07, "grad_norm": 1.0223196264062282, "learning_rate": 2.3699848502734126e-05, "loss": 0.3635, "step": 17685 }, { "epoch": 2.07, "grad_norm": 0.8192788140094526, "learning_rate": 2.3688393806789876e-05, "loss": 0.3069, "step": 17690 }, { "epoch": 2.07, "grad_norm": 0.9177860908789507, "learning_rate": 2.367693938695248e-05, "loss": 0.3248, "step": 17695 }, { "epoch": 2.07, "grad_norm": 0.9126884935041745, "learning_rate": 2.36654852456332e-05, "loss": 0.2971, "step": 17700 }, { "epoch": 2.07, "grad_norm": 1.8346638059648912, "learning_rate": 2.3654031385243248e-05, "loss": 0.4914, "step": 17705 }, { "epoch": 2.07, "grad_norm": 0.7579956220568959, "learning_rate": 2.3642577808193793e-05, "loss": 0.2594, "step": 17710 }, { "epoch": 2.07, "grad_norm": 0.9826270141906627, "learning_rate": 2.3631124516895924e-05, "loss": 0.3118, "step": 17715 }, { "epoch": 2.07, "grad_norm": 1.0299733042950374, "learning_rate": 2.361967151376069e-05, "loss": 0.3255, "step": 17720 }, { "epoch": 2.07, "grad_norm": 1.070308244402529, "learning_rate": 2.3608218801199054e-05, "loss": 0.3384, "step": 17725 }, { "epoch": 2.07, "grad_norm": 0.8309321333151022, "learning_rate": 2.3596766381621944e-05, "loss": 0.336, "step": 17730 }, { "epoch": 2.07, "grad_norm": 1.0075054481170602, "learning_rate": 2.3585314257440202e-05, "loss": 0.3402, "step": 17735 }, { "epoch": 2.07, "grad_norm": 0.8817988330814701, "learning_rate": 2.3573862431064636e-05, "loss": 0.271, "step": 17740 }, { "epoch": 2.07, "grad_norm": 0.9206174617142147, "learning_rate": 2.3562410904905973e-05, "loss": 0.3056, "step": 17745 }, { "epoch": 2.07, "grad_norm": 0.7865613476564897, "learning_rate": 2.3550959681374858e-05, "loss": 0.256, "step": 17750 }, { "epoch": 2.07, "grad_norm": 0.8522575872914615, "learning_rate": 2.353950876288191e-05, "loss": 0.349, "step": 17755 }, { "epoch": 2.08, "grad_norm": 0.7993515698440024, "learning_rate": 2.3528058151837674e-05, "loss": 0.3321, "step": 17760 }, { "epoch": 2.08, "grad_norm": 0.9547298977592046, "learning_rate": 2.3516607850652604e-05, "loss": 0.2937, "step": 17765 }, { "epoch": 2.08, "grad_norm": 0.9414497661479948, "learning_rate": 2.3505157861737137e-05, "loss": 0.3422, "step": 17770 }, { "epoch": 2.08, "grad_norm": 0.860544991148803, "learning_rate": 2.3493708187501592e-05, "loss": 0.3225, "step": 17775 }, { "epoch": 2.08, "grad_norm": 0.7513022922227058, "learning_rate": 2.3482258830356243e-05, "loss": 0.3321, "step": 17780 }, { "epoch": 2.08, "grad_norm": 0.7909143198209917, "learning_rate": 2.3470809792711312e-05, "loss": 0.258, "step": 17785 }, { "epoch": 2.08, "grad_norm": 1.1973799650308607, "learning_rate": 2.345936107697695e-05, "loss": 0.3477, "step": 17790 }, { "epoch": 2.08, "grad_norm": 0.7830698848979976, "learning_rate": 2.3447912685563208e-05, "loss": 0.327, "step": 17795 }, { "epoch": 2.08, "grad_norm": 0.8341165224803347, "learning_rate": 2.3436464620880104e-05, "loss": 0.2748, "step": 17800 }, { "epoch": 2.08, "grad_norm": 0.949280199241921, "learning_rate": 2.342501688533757e-05, "loss": 0.3201, "step": 17805 }, { "epoch": 2.08, "grad_norm": 0.8998789013407463, "learning_rate": 2.3413569481345487e-05, "loss": 0.3298, "step": 17810 }, { "epoch": 2.08, "grad_norm": 0.9546963615119591, "learning_rate": 2.3402122411313654e-05, "loss": 0.2971, "step": 17815 }, { "epoch": 2.08, "grad_norm": 1.1113313676138408, "learning_rate": 2.3390675677651777e-05, "loss": 0.3424, "step": 17820 }, { "epoch": 2.08, "grad_norm": 0.97477551525791, "learning_rate": 2.3379229282769536e-05, "loss": 0.3156, "step": 17825 }, { "epoch": 2.08, "grad_norm": 0.9457693468387023, "learning_rate": 2.33677832290765e-05, "loss": 0.2806, "step": 17830 }, { "epoch": 2.08, "grad_norm": 0.825824585582643, "learning_rate": 2.3356337518982193e-05, "loss": 0.2881, "step": 17835 }, { "epoch": 2.08, "grad_norm": 0.8299506431675037, "learning_rate": 2.3344892154896063e-05, "loss": 0.3074, "step": 17840 }, { "epoch": 2.08, "grad_norm": 0.8890205053188829, "learning_rate": 2.333344713922747e-05, "loss": 0.373, "step": 17845 }, { "epoch": 2.09, "grad_norm": 0.7929268480744854, "learning_rate": 2.3322002474385708e-05, "loss": 0.2844, "step": 17850 }, { "epoch": 2.09, "grad_norm": 0.8673610739122468, "learning_rate": 2.3310558162780002e-05, "loss": 0.3196, "step": 17855 }, { "epoch": 2.09, "grad_norm": 0.6794440522669515, "learning_rate": 2.329911420681951e-05, "loss": 0.3136, "step": 17860 }, { "epoch": 2.09, "grad_norm": 0.8922434111330548, "learning_rate": 2.328767060891328e-05, "loss": 0.3092, "step": 17865 }, { "epoch": 2.09, "grad_norm": 0.9063661460113771, "learning_rate": 2.327622737147032e-05, "loss": 0.3014, "step": 17870 }, { "epoch": 2.09, "grad_norm": 1.2526418271553343, "learning_rate": 2.3264784496899567e-05, "loss": 0.3411, "step": 17875 }, { "epoch": 2.09, "grad_norm": 1.0397573969835696, "learning_rate": 2.3253341987609844e-05, "loss": 0.3094, "step": 17880 }, { "epoch": 2.09, "grad_norm": 1.0375345942566607, "learning_rate": 2.324189984600994e-05, "loss": 0.3142, "step": 17885 }, { "epoch": 2.09, "grad_norm": 0.9354433773364085, "learning_rate": 2.323045807450853e-05, "loss": 0.3201, "step": 17890 }, { "epoch": 2.09, "grad_norm": 1.0440716763013076, "learning_rate": 2.3219016675514226e-05, "loss": 0.3009, "step": 17895 }, { "epoch": 2.09, "grad_norm": 0.8227562198699478, "learning_rate": 2.3207575651435567e-05, "loss": 0.2786, "step": 17900 }, { "epoch": 2.09, "grad_norm": 0.8470677011857068, "learning_rate": 2.3196135004681015e-05, "loss": 0.3022, "step": 17905 }, { "epoch": 2.09, "grad_norm": 1.0751179292977704, "learning_rate": 2.3184694737658942e-05, "loss": 0.3175, "step": 17910 }, { "epoch": 2.09, "grad_norm": 1.117780919164468, "learning_rate": 2.3173254852777635e-05, "loss": 0.3277, "step": 17915 }, { "epoch": 2.09, "grad_norm": 0.9172643757132686, "learning_rate": 2.3161815352445314e-05, "loss": 0.311, "step": 17920 }, { "epoch": 2.09, "grad_norm": 0.944647364858098, "learning_rate": 2.3150376239070122e-05, "loss": 0.3401, "step": 17925 }, { "epoch": 2.09, "grad_norm": 0.9739125334211022, "learning_rate": 2.3138937515060113e-05, "loss": 0.3075, "step": 17930 }, { "epoch": 2.1, "grad_norm": 0.7872825078347776, "learning_rate": 2.3127499182823238e-05, "loss": 0.3423, "step": 17935 }, { "epoch": 2.1, "grad_norm": 0.8438321710149619, "learning_rate": 2.311606124476741e-05, "loss": 0.3619, "step": 17940 }, { "epoch": 2.1, "grad_norm": 0.9577101179855763, "learning_rate": 2.3104623703300416e-05, "loss": 0.3245, "step": 17945 }, { "epoch": 2.1, "grad_norm": 0.8867860968444758, "learning_rate": 2.3093186560829984e-05, "loss": 0.3493, "step": 17950 }, { "epoch": 2.1, "grad_norm": 0.7615531967554799, "learning_rate": 2.308174981976377e-05, "loss": 0.3213, "step": 17955 }, { "epoch": 2.1, "grad_norm": 0.8700634825323005, "learning_rate": 2.30703134825093e-05, "loss": 0.2801, "step": 17960 }, { "epoch": 2.1, "grad_norm": 1.1121489729524707, "learning_rate": 2.3058877551474056e-05, "loss": 0.3429, "step": 17965 }, { "epoch": 2.1, "grad_norm": 1.015920965890387, "learning_rate": 2.3047442029065424e-05, "loss": 0.2998, "step": 17970 }, { "epoch": 2.1, "grad_norm": 0.9317820690993767, "learning_rate": 2.303600691769069e-05, "loss": 0.3194, "step": 17975 }, { "epoch": 2.1, "grad_norm": 0.9308927772521824, "learning_rate": 2.3024572219757087e-05, "loss": 0.3121, "step": 17980 }, { "epoch": 2.1, "grad_norm": 0.8715719095838016, "learning_rate": 2.3013137937671713e-05, "loss": 0.2792, "step": 17985 }, { "epoch": 2.1, "grad_norm": 0.6480854795825243, "learning_rate": 2.3001704073841616e-05, "loss": 0.2478, "step": 17990 }, { "epoch": 2.1, "grad_norm": 0.8524422127329091, "learning_rate": 2.2990270630673743e-05, "loss": 0.3308, "step": 17995 }, { "epoch": 2.1, "grad_norm": 0.8209643624248277, "learning_rate": 2.2978837610574964e-05, "loss": 0.2808, "step": 18000 }, { "epoch": 2.1, "grad_norm": 1.0015748101581854, "learning_rate": 2.2967405015952033e-05, "loss": 0.2942, "step": 18005 }, { "epoch": 2.1, "grad_norm": 0.9178753838370047, "learning_rate": 2.2955972849211636e-05, "loss": 0.3218, "step": 18010 }, { "epoch": 2.1, "grad_norm": 0.880660627565955, "learning_rate": 2.294454111276036e-05, "loss": 0.2732, "step": 18015 }, { "epoch": 2.11, "grad_norm": 0.9259840538083625, "learning_rate": 2.293310980900472e-05, "loss": 0.3087, "step": 18020 }, { "epoch": 2.11, "grad_norm": 1.003457708044618, "learning_rate": 2.2921678940351117e-05, "loss": 0.3571, "step": 18025 }, { "epoch": 2.11, "grad_norm": 1.0543284321793844, "learning_rate": 2.2910248509205858e-05, "loss": 0.2857, "step": 18030 }, { "epoch": 2.11, "grad_norm": 0.829469513748819, "learning_rate": 2.289881851797518e-05, "loss": 0.3608, "step": 18035 }, { "epoch": 2.11, "grad_norm": 0.9760987994680816, "learning_rate": 2.2887388969065215e-05, "loss": 0.3032, "step": 18040 }, { "epoch": 2.11, "grad_norm": 0.9954090334760022, "learning_rate": 2.2875959864882002e-05, "loss": 0.2673, "step": 18045 }, { "epoch": 2.11, "grad_norm": 0.9537178743048064, "learning_rate": 2.286453120783149e-05, "loss": 0.2574, "step": 18050 }, { "epoch": 2.11, "grad_norm": 1.0105368157624715, "learning_rate": 2.285310300031953e-05, "loss": 0.3446, "step": 18055 }, { "epoch": 2.11, "grad_norm": 0.9028534388400108, "learning_rate": 2.2841675244751864e-05, "loss": 0.2917, "step": 18060 }, { "epoch": 2.11, "grad_norm": 1.1006166058146305, "learning_rate": 2.2830247943534173e-05, "loss": 0.3239, "step": 18065 }, { "epoch": 2.11, "grad_norm": 0.8314069673933822, "learning_rate": 2.2818821099072026e-05, "loss": 0.3083, "step": 18070 }, { "epoch": 2.11, "grad_norm": 0.8732818465923707, "learning_rate": 2.280739471377088e-05, "loss": 0.2459, "step": 18075 }, { "epoch": 2.11, "grad_norm": 0.9263975825910838, "learning_rate": 2.279596879003611e-05, "loss": 0.2691, "step": 18080 }, { "epoch": 2.11, "grad_norm": 0.8929642614822747, "learning_rate": 2.2784543330273e-05, "loss": 0.3194, "step": 18085 }, { "epoch": 2.11, "grad_norm": 0.9267118318702681, "learning_rate": 2.2773118336886724e-05, "loss": 0.3024, "step": 18090 }, { "epoch": 2.11, "grad_norm": 0.8654807620027696, "learning_rate": 2.2761693812282374e-05, "loss": 0.275, "step": 18095 }, { "epoch": 2.11, "grad_norm": 1.1141103936568186, "learning_rate": 2.2750269758864908e-05, "loss": 0.2969, "step": 18100 }, { "epoch": 2.12, "grad_norm": 0.9331348976820988, "learning_rate": 2.273884617903923e-05, "loss": 0.357, "step": 18105 }, { "epoch": 2.12, "grad_norm": 0.8641249789300807, "learning_rate": 2.2727423075210114e-05, "loss": 0.3441, "step": 18110 }, { "epoch": 2.12, "grad_norm": 1.0425715696452151, "learning_rate": 2.271600044978225e-05, "loss": 0.3697, "step": 18115 }, { "epoch": 2.12, "grad_norm": 0.8700057932394241, "learning_rate": 2.2704578305160224e-05, "loss": 0.2618, "step": 18120 }, { "epoch": 2.12, "grad_norm": 0.8765256429701753, "learning_rate": 2.2693156643748494e-05, "loss": 0.2987, "step": 18125 }, { "epoch": 2.12, "grad_norm": 0.8818888911087939, "learning_rate": 2.2681735467951455e-05, "loss": 0.3049, "step": 18130 }, { "epoch": 2.12, "grad_norm": 0.9990160192901573, "learning_rate": 2.267031478017339e-05, "loss": 0.315, "step": 18135 }, { "epoch": 2.12, "grad_norm": 0.9316250696122101, "learning_rate": 2.2658894582818473e-05, "loss": 0.3028, "step": 18140 }, { "epoch": 2.12, "grad_norm": 0.9582781812524059, "learning_rate": 2.264747487829076e-05, "loss": 0.2614, "step": 18145 }, { "epoch": 2.12, "grad_norm": 0.8207848899410607, "learning_rate": 2.2636055668994223e-05, "loss": 0.2931, "step": 18150 }, { "epoch": 2.12, "grad_norm": 0.9568272704311052, "learning_rate": 2.262463695733274e-05, "loss": 0.3393, "step": 18155 }, { "epoch": 2.12, "grad_norm": 1.0587143343536545, "learning_rate": 2.261321874571005e-05, "loss": 0.2952, "step": 18160 }, { "epoch": 2.12, "grad_norm": 0.9705452970853029, "learning_rate": 2.2601801036529828e-05, "loss": 0.3477, "step": 18165 }, { "epoch": 2.12, "grad_norm": 0.9392396884471369, "learning_rate": 2.2590383832195607e-05, "loss": 0.3223, "step": 18170 }, { "epoch": 2.12, "grad_norm": 0.9669233535316395, "learning_rate": 2.2578967135110817e-05, "loss": 0.3629, "step": 18175 }, { "epoch": 2.12, "grad_norm": 0.9447899098335893, "learning_rate": 2.2567550947678812e-05, "loss": 0.301, "step": 18180 }, { "epoch": 2.12, "grad_norm": 0.9715206016803206, "learning_rate": 2.2556135272302815e-05, "loss": 0.3335, "step": 18185 }, { "epoch": 2.13, "grad_norm": 0.8898709982860302, "learning_rate": 2.2544720111385946e-05, "loss": 0.3133, "step": 18190 }, { "epoch": 2.13, "grad_norm": 1.0451956513969018, "learning_rate": 2.2533305467331205e-05, "loss": 0.3485, "step": 18195 }, { "epoch": 2.13, "grad_norm": 0.7884296102556158, "learning_rate": 2.2521891342541504e-05, "loss": 0.3105, "step": 18200 }, { "epoch": 2.13, "grad_norm": 0.9206978124265363, "learning_rate": 2.2510477739419628e-05, "loss": 0.3507, "step": 18205 }, { "epoch": 2.13, "grad_norm": 0.9290153798588782, "learning_rate": 2.2499064660368276e-05, "loss": 0.333, "step": 18210 }, { "epoch": 2.13, "grad_norm": 0.9108268388135041, "learning_rate": 2.248765210779e-05, "loss": 0.2871, "step": 18215 }, { "epoch": 2.13, "grad_norm": 0.8729223519674997, "learning_rate": 2.247624008408728e-05, "loss": 0.2964, "step": 18220 }, { "epoch": 2.13, "grad_norm": 0.870354251454955, "learning_rate": 2.246482859166245e-05, "loss": 0.2991, "step": 18225 }, { "epoch": 2.13, "grad_norm": 1.078234666437474, "learning_rate": 2.2453417632917764e-05, "loss": 0.3253, "step": 18230 }, { "epoch": 2.13, "grad_norm": 1.145530000137898, "learning_rate": 2.2442007210255345e-05, "loss": 0.327, "step": 18235 }, { "epoch": 2.13, "grad_norm": 0.8315235260130985, "learning_rate": 2.2430597326077195e-05, "loss": 0.3206, "step": 18240 }, { "epoch": 2.13, "grad_norm": 0.8934828764310307, "learning_rate": 2.241918798278522e-05, "loss": 0.3156, "step": 18245 }, { "epoch": 2.13, "grad_norm": 0.9226141806632508, "learning_rate": 2.2407779182781214e-05, "loss": 0.3273, "step": 18250 }, { "epoch": 2.13, "grad_norm": 0.6704774582282024, "learning_rate": 2.239637092846684e-05, "loss": 0.2567, "step": 18255 }, { "epoch": 2.13, "grad_norm": 1.153973848334559, "learning_rate": 2.2384963222243667e-05, "loss": 0.3028, "step": 18260 }, { "epoch": 2.13, "grad_norm": 0.749385826493055, "learning_rate": 2.2373556066513118e-05, "loss": 0.2319, "step": 18265 }, { "epoch": 2.13, "grad_norm": 0.935174964858879, "learning_rate": 2.2362149463676536e-05, "loss": 0.2981, "step": 18270 }, { "epoch": 2.14, "grad_norm": 0.9733176057387588, "learning_rate": 2.2350743416135114e-05, "loss": 0.3924, "step": 18275 }, { "epoch": 2.14, "grad_norm": 0.8524327605791269, "learning_rate": 2.2339337926289965e-05, "loss": 0.349, "step": 18280 }, { "epoch": 2.14, "grad_norm": 0.8025760346566144, "learning_rate": 2.232793299654205e-05, "loss": 0.298, "step": 18285 }, { "epoch": 2.14, "grad_norm": 1.0213137232814853, "learning_rate": 2.231652862929222e-05, "loss": 0.3126, "step": 18290 }, { "epoch": 2.14, "grad_norm": 0.8798503065456813, "learning_rate": 2.2305124826941227e-05, "loss": 0.3112, "step": 18295 }, { "epoch": 2.14, "grad_norm": 0.8838245903461434, "learning_rate": 2.2293721591889687e-05, "loss": 0.2912, "step": 18300 }, { "epoch": 2.14, "grad_norm": 0.9019523235492393, "learning_rate": 2.228231892653811e-05, "loss": 0.3018, "step": 18305 }, { "epoch": 2.14, "grad_norm": 0.8957271836172304, "learning_rate": 2.2270916833286854e-05, "loss": 0.3394, "step": 18310 }, { "epoch": 2.14, "grad_norm": 0.8526951451315468, "learning_rate": 2.22595153145362e-05, "loss": 0.2867, "step": 18315 }, { "epoch": 2.14, "grad_norm": 1.0688657507311814, "learning_rate": 2.2248114372686267e-05, "loss": 0.3185, "step": 18320 }, { "epoch": 2.14, "grad_norm": 0.9883563035033653, "learning_rate": 2.2236714010137085e-05, "loss": 0.3437, "step": 18325 }, { "epoch": 2.14, "grad_norm": 1.0003111808483434, "learning_rate": 2.2225314229288563e-05, "loss": 0.3597, "step": 18330 }, { "epoch": 2.14, "grad_norm": 0.9178490395686039, "learning_rate": 2.2213915032540457e-05, "loss": 0.2907, "step": 18335 }, { "epoch": 2.14, "grad_norm": 0.7771970475602619, "learning_rate": 2.220251642229241e-05, "loss": 0.2581, "step": 18340 }, { "epoch": 2.14, "grad_norm": 1.1254601013105048, "learning_rate": 2.2191118400943968e-05, "loss": 0.284, "step": 18345 }, { "epoch": 2.14, "grad_norm": 0.8648829345674997, "learning_rate": 2.217972097089453e-05, "loss": 0.3172, "step": 18350 }, { "epoch": 2.14, "grad_norm": 1.0761924695941634, "learning_rate": 2.216832413454336e-05, "loss": 0.2933, "step": 18355 }, { "epoch": 2.15, "grad_norm": 1.010747136086127, "learning_rate": 2.215692789428962e-05, "loss": 0.3212, "step": 18360 }, { "epoch": 2.15, "grad_norm": 0.8228467981530794, "learning_rate": 2.214553225253234e-05, "loss": 0.3518, "step": 18365 }, { "epoch": 2.15, "grad_norm": 0.9307398095949934, "learning_rate": 2.2134137211670423e-05, "loss": 0.3428, "step": 18370 }, { "epoch": 2.15, "grad_norm": 0.8408019872828656, "learning_rate": 2.212274277410265e-05, "loss": 0.3365, "step": 18375 }, { "epoch": 2.15, "grad_norm": 1.1117320976727552, "learning_rate": 2.2111348942227654e-05, "loss": 0.3178, "step": 18380 }, { "epoch": 2.15, "grad_norm": 0.897556555638027, "learning_rate": 2.209995571844397e-05, "loss": 0.3081, "step": 18385 }, { "epoch": 2.15, "grad_norm": 0.8564390697391289, "learning_rate": 2.208856310514997e-05, "loss": 0.3313, "step": 18390 }, { "epoch": 2.15, "grad_norm": 0.9180043817157929, "learning_rate": 2.2077171104743952e-05, "loss": 0.3905, "step": 18395 }, { "epoch": 2.15, "grad_norm": 0.8611305380974957, "learning_rate": 2.206577971962403e-05, "loss": 0.3271, "step": 18400 }, { "epoch": 2.15, "grad_norm": 1.0356129986553817, "learning_rate": 2.2054388952188205e-05, "loss": 0.3212, "step": 18405 }, { "epoch": 2.15, "grad_norm": 1.0433556352168651, "learning_rate": 2.2042998804834362e-05, "loss": 0.3268, "step": 18410 }, { "epoch": 2.15, "grad_norm": 0.8957551962509526, "learning_rate": 2.2031609279960248e-05, "loss": 0.3076, "step": 18415 }, { "epoch": 2.15, "grad_norm": 0.9469706031649314, "learning_rate": 2.202022037996348e-05, "loss": 0.3488, "step": 18420 }, { "epoch": 2.15, "grad_norm": 1.069691306867801, "learning_rate": 2.2008832107241523e-05, "loss": 0.2991, "step": 18425 }, { "epoch": 2.15, "grad_norm": 0.8680007043591736, "learning_rate": 2.1997444464191745e-05, "loss": 0.3185, "step": 18430 }, { "epoch": 2.15, "grad_norm": 0.9524391239303843, "learning_rate": 2.1986057453211357e-05, "loss": 0.3189, "step": 18435 }, { "epoch": 2.15, "grad_norm": 1.1634891773684417, "learning_rate": 2.1974671076697443e-05, "loss": 0.3099, "step": 18440 }, { "epoch": 2.16, "grad_norm": 1.112284340124006, "learning_rate": 2.196328533704697e-05, "loss": 0.3404, "step": 18445 }, { "epoch": 2.16, "grad_norm": 0.7276850845865299, "learning_rate": 2.1951900236656732e-05, "loss": 0.3269, "step": 18450 }, { "epoch": 2.16, "grad_norm": 1.0083017044753924, "learning_rate": 2.1940515777923423e-05, "loss": 0.327, "step": 18455 }, { "epoch": 2.16, "grad_norm": 0.8643501837061109, "learning_rate": 2.1929131963243595e-05, "loss": 0.3166, "step": 18460 }, { "epoch": 2.16, "grad_norm": 1.008850448068876, "learning_rate": 2.1917748795013652e-05, "loss": 0.3069, "step": 18465 }, { "epoch": 2.16, "grad_norm": 0.936178350374074, "learning_rate": 2.190636627562988e-05, "loss": 0.2842, "step": 18470 }, { "epoch": 2.16, "grad_norm": 1.0486685031021898, "learning_rate": 2.1894984407488407e-05, "loss": 0.3338, "step": 18475 }, { "epoch": 2.16, "grad_norm": 3.3593178941955615, "learning_rate": 2.1883603192985242e-05, "loss": 0.3639, "step": 18480 }, { "epoch": 2.16, "grad_norm": 0.8611971490316048, "learning_rate": 2.1872222634516247e-05, "loss": 0.3481, "step": 18485 }, { "epoch": 2.16, "grad_norm": 0.973783465887101, "learning_rate": 2.1860842734477157e-05, "loss": 0.3446, "step": 18490 }, { "epoch": 2.16, "grad_norm": 0.9059663310557655, "learning_rate": 2.1849463495263546e-05, "loss": 0.3494, "step": 18495 }, { "epoch": 2.16, "grad_norm": 0.7233760658678366, "learning_rate": 2.1838084919270878e-05, "loss": 0.3107, "step": 18500 }, { "epoch": 2.16, "grad_norm": 0.9666266908941069, "learning_rate": 2.1826707008894447e-05, "loss": 0.2824, "step": 18505 }, { "epoch": 2.16, "grad_norm": 1.2927638142707638, "learning_rate": 2.181532976652944e-05, "loss": 0.26, "step": 18510 }, { "epoch": 2.16, "grad_norm": 1.112396121685976, "learning_rate": 2.1803953194570876e-05, "loss": 0.3153, "step": 18515 }, { "epoch": 2.16, "grad_norm": 0.9197697793642021, "learning_rate": 2.1792577295413636e-05, "loss": 0.3381, "step": 18520 }, { "epoch": 2.16, "grad_norm": 0.8808141042044956, "learning_rate": 2.1781202071452473e-05, "loss": 0.3361, "step": 18525 }, { "epoch": 2.16, "grad_norm": 0.8501400473105637, "learning_rate": 2.1769827525081992e-05, "loss": 0.3362, "step": 18530 }, { "epoch": 2.17, "grad_norm": 1.0049353574789925, "learning_rate": 2.1758453658696652e-05, "loss": 0.378, "step": 18535 }, { "epoch": 2.17, "grad_norm": 0.9190250097282301, "learning_rate": 2.1747080474690778e-05, "loss": 0.2885, "step": 18540 }, { "epoch": 2.17, "grad_norm": 1.017124404057533, "learning_rate": 2.1735707975458536e-05, "loss": 0.3525, "step": 18545 }, { "epoch": 2.17, "grad_norm": 0.85261659111868, "learning_rate": 2.1724336163393953e-05, "loss": 0.3307, "step": 18550 }, { "epoch": 2.17, "grad_norm": 0.8491688998189677, "learning_rate": 2.171296504089092e-05, "loss": 0.3086, "step": 18555 }, { "epoch": 2.17, "grad_norm": 0.8852672571434125, "learning_rate": 2.1701594610343186e-05, "loss": 0.2915, "step": 18560 }, { "epoch": 2.17, "grad_norm": 0.8785399334865858, "learning_rate": 2.1690224874144334e-05, "loss": 0.3112, "step": 18565 }, { "epoch": 2.17, "grad_norm": 0.9189243416216638, "learning_rate": 2.1678855834687808e-05, "loss": 0.3608, "step": 18570 }, { "epoch": 2.17, "grad_norm": 0.8851670260650392, "learning_rate": 2.1667487494366924e-05, "loss": 0.3172, "step": 18575 }, { "epoch": 2.17, "grad_norm": 0.9444106717230838, "learning_rate": 2.1656119855574823e-05, "loss": 0.2757, "step": 18580 }, { "epoch": 2.17, "grad_norm": 1.0490894692485158, "learning_rate": 2.1644752920704534e-05, "loss": 0.301, "step": 18585 }, { "epoch": 2.17, "grad_norm": 0.9044117705653503, "learning_rate": 2.1633386692148887e-05, "loss": 0.271, "step": 18590 }, { "epoch": 2.17, "grad_norm": 1.0132114129186218, "learning_rate": 2.1622021172300615e-05, "loss": 0.3379, "step": 18595 }, { "epoch": 2.17, "grad_norm": 0.8262285607743437, "learning_rate": 2.1610656363552263e-05, "loss": 0.2998, "step": 18600 }, { "epoch": 2.17, "grad_norm": 0.886276912329097, "learning_rate": 2.159929226829625e-05, "loss": 0.3211, "step": 18605 }, { "epoch": 2.17, "grad_norm": 0.9520381706715765, "learning_rate": 2.1587928888924846e-05, "loss": 0.3424, "step": 18610 }, { "epoch": 2.17, "grad_norm": 0.8813676335843862, "learning_rate": 2.1576566227830148e-05, "loss": 0.3187, "step": 18615 }, { "epoch": 2.18, "grad_norm": 0.8842688610429247, "learning_rate": 2.1565204287404112e-05, "loss": 0.2931, "step": 18620 }, { "epoch": 2.18, "grad_norm": 0.8635441066615837, "learning_rate": 2.155384307003856e-05, "loss": 0.309, "step": 18625 }, { "epoch": 2.18, "grad_norm": 1.1814391282253924, "learning_rate": 2.1542482578125143e-05, "loss": 0.3379, "step": 18630 }, { "epoch": 2.18, "grad_norm": 0.98510832562715, "learning_rate": 2.1531122814055354e-05, "loss": 0.3187, "step": 18635 }, { "epoch": 2.18, "grad_norm": 0.8463320101883766, "learning_rate": 2.151976378022055e-05, "loss": 0.3154, "step": 18640 }, { "epoch": 2.18, "grad_norm": 0.9945810369310236, "learning_rate": 2.1508405479011924e-05, "loss": 0.2917, "step": 18645 }, { "epoch": 2.18, "grad_norm": 0.9354568415984246, "learning_rate": 2.149704791282052e-05, "loss": 0.3189, "step": 18650 }, { "epoch": 2.18, "grad_norm": 1.079737917001233, "learning_rate": 2.1485691084037227e-05, "loss": 0.2811, "step": 18655 }, { "epoch": 2.18, "grad_norm": 0.934457210198989, "learning_rate": 2.1474334995052773e-05, "loss": 0.3033, "step": 18660 }, { "epoch": 2.18, "grad_norm": 0.983614608893491, "learning_rate": 2.1462979648257723e-05, "loss": 0.3364, "step": 18665 }, { "epoch": 2.18, "grad_norm": 0.8757070321472686, "learning_rate": 2.145162504604251e-05, "loss": 0.321, "step": 18670 }, { "epoch": 2.18, "grad_norm": 0.7639473189930548, "learning_rate": 2.1440271190797403e-05, "loss": 0.2831, "step": 18675 }, { "epoch": 2.18, "grad_norm": 1.0222393347235514, "learning_rate": 2.1428918084912502e-05, "loss": 0.3378, "step": 18680 }, { "epoch": 2.18, "grad_norm": 0.7759554606294536, "learning_rate": 2.141756573077774e-05, "loss": 0.2837, "step": 18685 }, { "epoch": 2.18, "grad_norm": 1.2215849766691051, "learning_rate": 2.140621413078292e-05, "loss": 0.3186, "step": 18690 }, { "epoch": 2.18, "grad_norm": 1.1188905490077006, "learning_rate": 2.1394863287317665e-05, "loss": 0.3247, "step": 18695 }, { "epoch": 2.18, "grad_norm": 0.8869815403849061, "learning_rate": 2.1383513202771463e-05, "loss": 0.3212, "step": 18700 }, { "epoch": 2.19, "grad_norm": 1.0014657774041709, "learning_rate": 2.1372163879533605e-05, "loss": 0.3036, "step": 18705 }, { "epoch": 2.19, "grad_norm": 0.8399256289840166, "learning_rate": 2.1360815319993254e-05, "loss": 0.2979, "step": 18710 }, { "epoch": 2.19, "grad_norm": 0.9587365053171065, "learning_rate": 2.1349467526539396e-05, "loss": 0.3174, "step": 18715 }, { "epoch": 2.19, "grad_norm": 0.9773621666350173, "learning_rate": 2.1338120501560862e-05, "loss": 0.3673, "step": 18720 }, { "epoch": 2.19, "grad_norm": 0.9699215562631516, "learning_rate": 2.132677424744633e-05, "loss": 0.3199, "step": 18725 }, { "epoch": 2.19, "grad_norm": 1.1659867432406463, "learning_rate": 2.131542876658429e-05, "loss": 0.2846, "step": 18730 }, { "epoch": 2.19, "grad_norm": 1.2586784513783118, "learning_rate": 2.1304084061363086e-05, "loss": 0.2963, "step": 18735 }, { "epoch": 2.19, "grad_norm": 0.9883578232800853, "learning_rate": 2.1292740134170906e-05, "loss": 0.2978, "step": 18740 }, { "epoch": 2.19, "grad_norm": 0.8348168014760463, "learning_rate": 2.1281396987395758e-05, "loss": 0.3214, "step": 18745 }, { "epoch": 2.19, "grad_norm": 0.9235468088555026, "learning_rate": 2.1270054623425505e-05, "loss": 0.2982, "step": 18750 }, { "epoch": 2.19, "grad_norm": 0.874670486891471, "learning_rate": 2.1258713044647814e-05, "loss": 0.275, "step": 18755 }, { "epoch": 2.19, "grad_norm": 1.0817822379405924, "learning_rate": 2.124737225345022e-05, "loss": 0.2546, "step": 18760 }, { "epoch": 2.19, "grad_norm": 0.8760625455528133, "learning_rate": 2.123603225222007e-05, "loss": 0.3082, "step": 18765 }, { "epoch": 2.19, "grad_norm": 0.8080183160380161, "learning_rate": 2.1224693043344578e-05, "loss": 0.3184, "step": 18770 }, { "epoch": 2.19, "grad_norm": 1.0758553886981022, "learning_rate": 2.121335462921073e-05, "loss": 0.2751, "step": 18775 }, { "epoch": 2.19, "grad_norm": 0.8908569631670024, "learning_rate": 2.1202017012205397e-05, "loss": 0.3136, "step": 18780 }, { "epoch": 2.19, "grad_norm": 0.9068004507143597, "learning_rate": 2.1190680194715265e-05, "loss": 0.3649, "step": 18785 }, { "epoch": 2.2, "grad_norm": 0.8845124608014854, "learning_rate": 2.117934417912686e-05, "loss": 0.2974, "step": 18790 }, { "epoch": 2.2, "grad_norm": 1.035245114722013, "learning_rate": 2.1168008967826534e-05, "loss": 0.3506, "step": 18795 }, { "epoch": 2.2, "grad_norm": 0.8648860971381006, "learning_rate": 2.1156674563200446e-05, "loss": 0.3232, "step": 18800 }, { "epoch": 2.2, "grad_norm": 0.8722927111964861, "learning_rate": 2.1145340967634628e-05, "loss": 0.3233, "step": 18805 }, { "epoch": 2.2, "grad_norm": 1.1044457948745663, "learning_rate": 2.1134008183514906e-05, "loss": 0.2843, "step": 18810 }, { "epoch": 2.2, "grad_norm": 0.8937792684462321, "learning_rate": 2.112267621322696e-05, "loss": 0.3102, "step": 18815 }, { "epoch": 2.2, "grad_norm": 0.855978502051007, "learning_rate": 2.1111345059156295e-05, "loss": 0.2817, "step": 18820 }, { "epoch": 2.2, "grad_norm": 0.9737418379836537, "learning_rate": 2.1100014723688226e-05, "loss": 0.3009, "step": 18825 }, { "epoch": 2.2, "grad_norm": 0.8629444786204769, "learning_rate": 2.1088685209207898e-05, "loss": 0.2945, "step": 18830 }, { "epoch": 2.2, "grad_norm": 1.4690368546840122, "learning_rate": 2.107735651810031e-05, "loss": 0.4054, "step": 18835 }, { "epoch": 2.2, "grad_norm": 0.8851498431032873, "learning_rate": 2.1066028652750272e-05, "loss": 0.324, "step": 18840 }, { "epoch": 2.2, "grad_norm": 0.8824187781320911, "learning_rate": 2.1054701615542407e-05, "loss": 0.311, "step": 18845 }, { "epoch": 2.2, "grad_norm": 0.8342603278944576, "learning_rate": 2.104337540886117e-05, "loss": 0.3074, "step": 18850 }, { "epoch": 2.2, "grad_norm": 1.07601561771372, "learning_rate": 2.1032050035090865e-05, "loss": 0.337, "step": 18855 }, { "epoch": 2.2, "grad_norm": 0.8537132451686101, "learning_rate": 2.1020725496615585e-05, "loss": 0.2634, "step": 18860 }, { "epoch": 2.2, "grad_norm": 0.8515139235900822, "learning_rate": 2.1009401795819277e-05, "loss": 0.2713, "step": 18865 }, { "epoch": 2.2, "grad_norm": 0.9345747288714233, "learning_rate": 2.0998078935085686e-05, "loss": 0.3411, "step": 18870 }, { "epoch": 2.21, "grad_norm": 0.9128851451492647, "learning_rate": 2.0986756916798403e-05, "loss": 0.3187, "step": 18875 }, { "epoch": 2.21, "grad_norm": 0.9787631223096794, "learning_rate": 2.0975435743340822e-05, "loss": 0.31, "step": 18880 }, { "epoch": 2.21, "grad_norm": 1.0338863888769156, "learning_rate": 2.096411541709618e-05, "loss": 0.2932, "step": 18885 }, { "epoch": 2.21, "grad_norm": 0.9340066213116687, "learning_rate": 2.0952795940447522e-05, "loss": 0.3045, "step": 18890 }, { "epoch": 2.21, "grad_norm": 0.8842521690733544, "learning_rate": 2.09414773157777e-05, "loss": 0.2818, "step": 18895 }, { "epoch": 2.21, "grad_norm": 0.9653041989853287, "learning_rate": 2.093015954546942e-05, "loss": 0.2922, "step": 18900 }, { "epoch": 2.21, "grad_norm": 0.9981339006893417, "learning_rate": 2.0918842631905195e-05, "loss": 0.3978, "step": 18905 }, { "epoch": 2.21, "grad_norm": 0.9597187235787491, "learning_rate": 2.0907526577467345e-05, "loss": 0.2983, "step": 18910 }, { "epoch": 2.21, "grad_norm": 0.964709160695622, "learning_rate": 2.089621138453801e-05, "loss": 0.3161, "step": 18915 }, { "epoch": 2.21, "grad_norm": 1.1290528259316996, "learning_rate": 2.0884897055499173e-05, "loss": 0.3303, "step": 18920 }, { "epoch": 2.21, "grad_norm": 0.8965214514996026, "learning_rate": 2.0873583592732602e-05, "loss": 0.2818, "step": 18925 }, { "epoch": 2.21, "grad_norm": 0.9468307837590071, "learning_rate": 2.0862270998619908e-05, "loss": 0.3145, "step": 18930 }, { "epoch": 2.21, "grad_norm": 0.9813663049968601, "learning_rate": 2.0850959275542527e-05, "loss": 0.3266, "step": 18935 }, { "epoch": 2.21, "grad_norm": 0.9818995648649408, "learning_rate": 2.0839648425881668e-05, "loss": 0.3798, "step": 18940 }, { "epoch": 2.21, "grad_norm": 1.2844426306133414, "learning_rate": 2.0828338452018396e-05, "loss": 0.3421, "step": 18945 }, { "epoch": 2.21, "grad_norm": 0.8618822871814008, "learning_rate": 2.0817029356333574e-05, "loss": 0.3069, "step": 18950 }, { "epoch": 2.21, "grad_norm": 0.9598016210865685, "learning_rate": 2.0805721141207894e-05, "loss": 0.2491, "step": 18955 }, { "epoch": 2.22, "grad_norm": 0.8545341745506732, "learning_rate": 2.0794413809021853e-05, "loss": 0.2721, "step": 18960 }, { "epoch": 2.22, "grad_norm": 0.9669650056308706, "learning_rate": 2.078310736215575e-05, "loss": 0.3128, "step": 18965 }, { "epoch": 2.22, "grad_norm": 0.9885806359417516, "learning_rate": 2.0771801802989724e-05, "loss": 0.2915, "step": 18970 }, { "epoch": 2.22, "grad_norm": 1.0368117798149952, "learning_rate": 2.07604971339037e-05, "loss": 0.3407, "step": 18975 }, { "epoch": 2.22, "grad_norm": 0.9066354449033249, "learning_rate": 2.0749193357277452e-05, "loss": 0.3776, "step": 18980 }, { "epoch": 2.22, "grad_norm": 0.992363809705979, "learning_rate": 2.0737890475490516e-05, "loss": 0.343, "step": 18985 }, { "epoch": 2.22, "grad_norm": 1.019680567735476, "learning_rate": 2.0726588490922288e-05, "loss": 0.3354, "step": 18990 }, { "epoch": 2.22, "grad_norm": 0.8962852655172553, "learning_rate": 2.071528740595194e-05, "loss": 0.3085, "step": 18995 }, { "epoch": 2.22, "grad_norm": 0.9242524408274967, "learning_rate": 2.070398722295848e-05, "loss": 0.2694, "step": 19000 }, { "epoch": 2.22, "grad_norm": 1.0338168520135511, "learning_rate": 2.0692687944320716e-05, "loss": 0.3348, "step": 19005 }, { "epoch": 2.22, "grad_norm": 0.9408727091633743, "learning_rate": 2.0681389572417244e-05, "loss": 0.3465, "step": 19010 }, { "epoch": 2.22, "grad_norm": 1.0213095798049854, "learning_rate": 2.067009210962651e-05, "loss": 0.2908, "step": 19015 }, { "epoch": 2.22, "grad_norm": 0.99087844897229, "learning_rate": 2.0658795558326743e-05, "loss": 0.2452, "step": 19020 }, { "epoch": 2.22, "grad_norm": 1.0056084115234374, "learning_rate": 2.064749992089598e-05, "loss": 0.2724, "step": 19025 }, { "epoch": 2.22, "grad_norm": 1.0505525269988698, "learning_rate": 2.0636205199712088e-05, "loss": 0.3437, "step": 19030 }, { "epoch": 2.22, "grad_norm": 1.0061621842171282, "learning_rate": 2.062491139715271e-05, "loss": 0.328, "step": 19035 }, { "epoch": 2.22, "grad_norm": 1.097324635588219, "learning_rate": 2.0613618515595302e-05, "loss": 0.3263, "step": 19040 }, { "epoch": 2.23, "grad_norm": 0.9248932166460767, "learning_rate": 2.060232655741714e-05, "loss": 0.287, "step": 19045 }, { "epoch": 2.23, "grad_norm": 1.2223474490151356, "learning_rate": 2.059103552499532e-05, "loss": 0.3625, "step": 19050 }, { "epoch": 2.23, "grad_norm": 0.9914663311262445, "learning_rate": 2.0579745420706698e-05, "loss": 0.337, "step": 19055 }, { "epoch": 2.23, "grad_norm": 1.0915530577272503, "learning_rate": 2.0568456246927957e-05, "loss": 0.3098, "step": 19060 }, { "epoch": 2.23, "grad_norm": 1.0191701121067873, "learning_rate": 2.0557168006035604e-05, "loss": 0.3384, "step": 19065 }, { "epoch": 2.23, "grad_norm": 1.0424222717286833, "learning_rate": 2.0545880700405924e-05, "loss": 0.276, "step": 19070 }, { "epoch": 2.23, "grad_norm": 0.88243624258224, "learning_rate": 2.0534594332415022e-05, "loss": 0.3213, "step": 19075 }, { "epoch": 2.23, "grad_norm": 0.9466989063921294, "learning_rate": 2.0523308904438775e-05, "loss": 0.2971, "step": 19080 }, { "epoch": 2.23, "grad_norm": 0.8594658974255374, "learning_rate": 2.0512024418852902e-05, "loss": 0.2777, "step": 19085 }, { "epoch": 2.23, "grad_norm": 1.0495013322238314, "learning_rate": 2.0500740878032894e-05, "loss": 0.3512, "step": 19090 }, { "epoch": 2.23, "grad_norm": 0.9080577979051785, "learning_rate": 2.048945828435406e-05, "loss": 0.331, "step": 19095 }, { "epoch": 2.23, "grad_norm": 0.8811052628893118, "learning_rate": 2.0478176640191517e-05, "loss": 0.2599, "step": 19100 }, { "epoch": 2.23, "grad_norm": 1.0627314793894536, "learning_rate": 2.046689594792015e-05, "loss": 0.3088, "step": 19105 }, { "epoch": 2.23, "grad_norm": 1.3021815605428533, "learning_rate": 2.0455616209914668e-05, "loss": 0.336, "step": 19110 }, { "epoch": 2.23, "grad_norm": 0.9436131936328238, "learning_rate": 2.044433742854958e-05, "loss": 0.3173, "step": 19115 }, { "epoch": 2.23, "grad_norm": 0.8156132958112147, "learning_rate": 2.0433059606199187e-05, "loss": 0.3251, "step": 19120 }, { "epoch": 2.23, "grad_norm": 1.1041518148813803, "learning_rate": 2.0421782745237574e-05, "loss": 0.312, "step": 19125 }, { "epoch": 2.24, "grad_norm": 0.8949361717279436, "learning_rate": 2.0410506848038653e-05, "loss": 0.3462, "step": 19130 }, { "epoch": 2.24, "grad_norm": 0.9772182318208312, "learning_rate": 2.0399231916976118e-05, "loss": 0.3461, "step": 19135 }, { "epoch": 2.24, "grad_norm": 0.9071619935038618, "learning_rate": 2.038795795442345e-05, "loss": 0.3412, "step": 19140 }, { "epoch": 2.24, "grad_norm": 0.8126851347200398, "learning_rate": 2.037668496275396e-05, "loss": 0.2758, "step": 19145 }, { "epoch": 2.24, "grad_norm": 0.8636289614269506, "learning_rate": 2.0365412944340705e-05, "loss": 0.3192, "step": 19150 }, { "epoch": 2.24, "grad_norm": 1.0520663722279355, "learning_rate": 2.0354141901556564e-05, "loss": 0.2935, "step": 19155 }, { "epoch": 2.24, "grad_norm": 0.8565307240481904, "learning_rate": 2.034287183677422e-05, "loss": 0.3315, "step": 19160 }, { "epoch": 2.24, "grad_norm": 0.9643274687575706, "learning_rate": 2.0331602752366145e-05, "loss": 0.2867, "step": 19165 }, { "epoch": 2.24, "grad_norm": 1.1266583053840489, "learning_rate": 2.0320334650704594e-05, "loss": 0.35, "step": 19170 }, { "epoch": 2.24, "grad_norm": 0.9972158709128763, "learning_rate": 2.0309067534161607e-05, "loss": 0.3005, "step": 19175 }, { "epoch": 2.24, "grad_norm": 0.962264043008785, "learning_rate": 2.0297801405109045e-05, "loss": 0.3173, "step": 19180 }, { "epoch": 2.24, "grad_norm": 0.8794429143721346, "learning_rate": 2.0286536265918545e-05, "loss": 0.3136, "step": 19185 }, { "epoch": 2.24, "grad_norm": 0.8807659876711478, "learning_rate": 2.027527211896153e-05, "loss": 0.3009, "step": 19190 }, { "epoch": 2.24, "grad_norm": 1.0474060369169909, "learning_rate": 2.0264008966609235e-05, "loss": 0.3033, "step": 19195 }, { "epoch": 2.24, "grad_norm": 0.9802655467410194, "learning_rate": 2.025274681123266e-05, "loss": 0.2801, "step": 19200 }, { "epoch": 2.24, "grad_norm": 0.8487097465263694, "learning_rate": 2.0241485655202595e-05, "loss": 0.2609, "step": 19205 }, { "epoch": 2.24, "grad_norm": 85.35932775830166, "learning_rate": 2.023022550088965e-05, "loss": 0.5221, "step": 19210 }, { "epoch": 2.25, "grad_norm": 0.8750779760450615, "learning_rate": 2.021896635066421e-05, "loss": 0.3384, "step": 19215 }, { "epoch": 2.25, "grad_norm": 1.0004307742160832, "learning_rate": 2.0207708206896432e-05, "loss": 0.2956, "step": 19220 }, { "epoch": 2.25, "grad_norm": 0.8804472066511255, "learning_rate": 2.0196451071956267e-05, "loss": 0.2984, "step": 19225 }, { "epoch": 2.25, "grad_norm": 0.8400089124620665, "learning_rate": 2.018519494821347e-05, "loss": 0.2818, "step": 19230 }, { "epoch": 2.25, "grad_norm": 0.9278831009910146, "learning_rate": 2.017393983803757e-05, "loss": 0.3385, "step": 19235 }, { "epoch": 2.25, "grad_norm": 0.8982325636562568, "learning_rate": 2.0162685743797895e-05, "loss": 0.2913, "step": 19240 }, { "epoch": 2.25, "grad_norm": 1.243173813072387, "learning_rate": 2.0151432667863533e-05, "loss": 0.3507, "step": 19245 }, { "epoch": 2.25, "grad_norm": 0.8428910331745797, "learning_rate": 2.014018061260339e-05, "loss": 0.2919, "step": 19250 }, { "epoch": 2.25, "grad_norm": 1.0254555271890058, "learning_rate": 2.0128929580386124e-05, "loss": 0.3088, "step": 19255 }, { "epoch": 2.25, "grad_norm": 1.0208843920868895, "learning_rate": 2.011767957358021e-05, "loss": 0.2895, "step": 19260 }, { "epoch": 2.25, "grad_norm": 0.856232668905099, "learning_rate": 2.01064305945539e-05, "loss": 0.3078, "step": 19265 }, { "epoch": 2.25, "grad_norm": 0.9353336958152558, "learning_rate": 2.0095182645675192e-05, "loss": 0.4259, "step": 19270 }, { "epoch": 2.25, "grad_norm": 1.0303151759896785, "learning_rate": 2.0083935729311917e-05, "loss": 0.3164, "step": 19275 }, { "epoch": 2.25, "grad_norm": 0.9514570444388372, "learning_rate": 2.007268984783167e-05, "loss": 0.302, "step": 19280 }, { "epoch": 2.25, "grad_norm": 0.8084884408234481, "learning_rate": 2.0061445003601835e-05, "loss": 0.3265, "step": 19285 }, { "epoch": 2.25, "grad_norm": 0.9002291361118132, "learning_rate": 2.0050201198989543e-05, "loss": 0.2801, "step": 19290 }, { "epoch": 2.25, "grad_norm": 1.092576421543211, "learning_rate": 2.0038958436361746e-05, "loss": 0.3198, "step": 19295 }, { "epoch": 2.25, "grad_norm": 0.9471667202195733, "learning_rate": 2.002771671808517e-05, "loss": 0.3527, "step": 19300 }, { "epoch": 2.26, "grad_norm": 0.9247703460511966, "learning_rate": 2.0016476046526305e-05, "loss": 0.2766, "step": 19305 }, { "epoch": 2.26, "grad_norm": 0.9019095311137985, "learning_rate": 2.0005236424051444e-05, "loss": 0.309, "step": 19310 }, { "epoch": 2.26, "grad_norm": 0.8688007671477888, "learning_rate": 1.999399785302663e-05, "loss": 0.2816, "step": 19315 }, { "epoch": 2.26, "grad_norm": 0.8514171760719479, "learning_rate": 1.9982760335817703e-05, "loss": 0.3171, "step": 19320 }, { "epoch": 2.26, "grad_norm": 0.9165854555566252, "learning_rate": 1.9971523874790278e-05, "loss": 0.3974, "step": 19325 }, { "epoch": 2.26, "grad_norm": 1.0556633992010753, "learning_rate": 1.9960288472309756e-05, "loss": 0.2982, "step": 19330 }, { "epoch": 2.26, "grad_norm": 0.8758221087250375, "learning_rate": 1.9949054130741307e-05, "loss": 0.2897, "step": 19335 }, { "epoch": 2.26, "grad_norm": 1.0588190353192888, "learning_rate": 1.993782085244986e-05, "loss": 0.3258, "step": 19340 }, { "epoch": 2.26, "grad_norm": 1.0266069382429226, "learning_rate": 1.9926588639800154e-05, "loss": 0.3194, "step": 19345 }, { "epoch": 2.26, "grad_norm": 1.082861249746999, "learning_rate": 1.991535749515668e-05, "loss": 0.3441, "step": 19350 }, { "epoch": 2.26, "grad_norm": 0.8490691732837352, "learning_rate": 1.990412742088372e-05, "loss": 0.344, "step": 19355 }, { "epoch": 2.26, "grad_norm": 1.03528974974153, "learning_rate": 1.9892898419345312e-05, "loss": 0.328, "step": 19360 }, { "epoch": 2.26, "grad_norm": 0.8596815033383882, "learning_rate": 1.988167049290529e-05, "loss": 0.3007, "step": 19365 }, { "epoch": 2.26, "grad_norm": 0.9555655197639726, "learning_rate": 1.9870443643927233e-05, "loss": 0.3161, "step": 19370 }, { "epoch": 2.26, "grad_norm": 1.1913956772242842, "learning_rate": 1.9859217874774528e-05, "loss": 0.3618, "step": 19375 }, { "epoch": 2.26, "grad_norm": 0.8704937448994219, "learning_rate": 1.9847993187810316e-05, "loss": 0.2792, "step": 19380 }, { "epoch": 2.26, "grad_norm": 0.8960534221328859, "learning_rate": 1.9836769585397492e-05, "loss": 0.3149, "step": 19385 }, { "epoch": 2.27, "grad_norm": 0.8544079730529959, "learning_rate": 1.982554706989875e-05, "loss": 0.2909, "step": 19390 }, { "epoch": 2.27, "grad_norm": 0.7973887460843961, "learning_rate": 1.981432564367657e-05, "loss": 0.3271, "step": 19395 }, { "epoch": 2.27, "grad_norm": 0.878333975664547, "learning_rate": 1.9803105309093145e-05, "loss": 0.2919, "step": 19400 }, { "epoch": 2.27, "grad_norm": 1.1221867278169215, "learning_rate": 1.9791886068510503e-05, "loss": 0.3255, "step": 19405 }, { "epoch": 2.27, "grad_norm": 0.9822828299529107, "learning_rate": 1.978066792429039e-05, "loss": 0.3309, "step": 19410 }, { "epoch": 2.27, "grad_norm": 0.9738605039542356, "learning_rate": 1.9769450878794356e-05, "loss": 0.3105, "step": 19415 }, { "epoch": 2.27, "grad_norm": 0.9346212942476916, "learning_rate": 1.9758234934383694e-05, "loss": 0.3294, "step": 19420 }, { "epoch": 2.27, "grad_norm": 0.8317241589287779, "learning_rate": 1.9747020093419495e-05, "loss": 0.2977, "step": 19425 }, { "epoch": 2.27, "grad_norm": 0.9284375730977665, "learning_rate": 1.9735806358262588e-05, "loss": 0.2905, "step": 19430 }, { "epoch": 2.27, "grad_norm": 0.9742098549584641, "learning_rate": 1.9724593731273577e-05, "loss": 0.3668, "step": 19435 }, { "epoch": 2.27, "grad_norm": 1.050345894412471, "learning_rate": 1.971338221481285e-05, "loss": 0.2973, "step": 19440 }, { "epoch": 2.27, "grad_norm": 0.8305778127957896, "learning_rate": 1.9702171811240543e-05, "loss": 0.2391, "step": 19445 }, { "epoch": 2.27, "grad_norm": 0.8919978112261086, "learning_rate": 1.9690962522916574e-05, "loss": 0.3464, "step": 19450 }, { "epoch": 2.27, "grad_norm": 0.9360553419905666, "learning_rate": 1.967975435220059e-05, "loss": 0.3149, "step": 19455 }, { "epoch": 2.27, "grad_norm": 1.0851103178228876, "learning_rate": 1.9668547301452055e-05, "loss": 0.3382, "step": 19460 }, { "epoch": 2.27, "grad_norm": 0.8640620889860231, "learning_rate": 1.9657341373030153e-05, "loss": 0.2878, "step": 19465 }, { "epoch": 2.27, "grad_norm": 1.0539855484445155, "learning_rate": 1.9646136569293852e-05, "loss": 0.2915, "step": 19470 }, { "epoch": 2.28, "grad_norm": 0.884082682077803, "learning_rate": 1.96349328926019e-05, "loss": 0.2854, "step": 19475 }, { "epoch": 2.28, "grad_norm": 0.7959663538683446, "learning_rate": 1.9623730345312763e-05, "loss": 0.3067, "step": 19480 }, { "epoch": 2.28, "grad_norm": 0.8980690506314459, "learning_rate": 1.96125289297847e-05, "loss": 0.3378, "step": 19485 }, { "epoch": 2.28, "grad_norm": 1.069052419234371, "learning_rate": 1.960132864837574e-05, "loss": 0.3395, "step": 19490 }, { "epoch": 2.28, "grad_norm": 0.7989861579540782, "learning_rate": 1.959012950344365e-05, "loss": 0.2348, "step": 19495 }, { "epoch": 2.28, "grad_norm": 0.9175251853040606, "learning_rate": 1.957893149734596e-05, "loss": 0.3043, "step": 19500 }, { "epoch": 2.28, "grad_norm": 0.8844698023071313, "learning_rate": 1.9567734632439974e-05, "loss": 0.255, "step": 19505 }, { "epoch": 2.28, "grad_norm": 0.967259586317296, "learning_rate": 1.9556538911082755e-05, "loss": 0.3042, "step": 19510 }, { "epoch": 2.28, "grad_norm": 0.9339863995744397, "learning_rate": 1.9545344335631114e-05, "loss": 0.3416, "step": 19515 }, { "epoch": 2.28, "grad_norm": 0.8276360288505716, "learning_rate": 1.9534150908441633e-05, "loss": 0.2616, "step": 19520 }, { "epoch": 2.28, "grad_norm": 0.9926911385271557, "learning_rate": 1.9522958631870632e-05, "loss": 0.3257, "step": 19525 }, { "epoch": 2.28, "grad_norm": 0.9526508330226306, "learning_rate": 1.9511767508274214e-05, "loss": 0.2965, "step": 19530 }, { "epoch": 2.28, "grad_norm": 1.2362134828162996, "learning_rate": 1.950057754000822e-05, "loss": 0.3141, "step": 19535 }, { "epoch": 2.28, "grad_norm": 0.857833732272555, "learning_rate": 1.9489388729428264e-05, "loss": 0.3232, "step": 19540 }, { "epoch": 2.28, "grad_norm": 0.9793739174366688, "learning_rate": 1.9478201078889713e-05, "loss": 0.3348, "step": 19545 }, { "epoch": 2.28, "grad_norm": 0.7979001385092815, "learning_rate": 1.9467014590747662e-05, "loss": 0.3015, "step": 19550 }, { "epoch": 2.28, "grad_norm": 0.8553307227067145, "learning_rate": 1.9455829267357e-05, "loss": 0.2654, "step": 19555 }, { "epoch": 2.29, "grad_norm": 1.1191995705594253, "learning_rate": 1.9444645111072352e-05, "loss": 0.369, "step": 19560 }, { "epoch": 2.29, "grad_norm": 0.9193459249953835, "learning_rate": 1.9433462124248108e-05, "loss": 0.3283, "step": 19565 }, { "epoch": 2.29, "grad_norm": 0.836878614391158, "learning_rate": 1.9422280309238384e-05, "loss": 0.3285, "step": 19570 }, { "epoch": 2.29, "grad_norm": 1.1016396311345666, "learning_rate": 1.9411099668397085e-05, "loss": 0.3217, "step": 19575 }, { "epoch": 2.29, "grad_norm": 1.0582528640245685, "learning_rate": 1.939992020407785e-05, "loss": 0.3323, "step": 19580 }, { "epoch": 2.29, "grad_norm": 0.9747675360002149, "learning_rate": 1.9388741918634067e-05, "loss": 0.3395, "step": 19585 }, { "epoch": 2.29, "grad_norm": 1.0523604250854264, "learning_rate": 1.93775648144189e-05, "loss": 0.3129, "step": 19590 }, { "epoch": 2.29, "grad_norm": 0.8831891685961533, "learning_rate": 1.9366388893785228e-05, "loss": 0.3044, "step": 19595 }, { "epoch": 2.29, "grad_norm": 0.983763036952166, "learning_rate": 1.9355214159085702e-05, "loss": 0.3027, "step": 19600 }, { "epoch": 2.29, "grad_norm": 0.8033630850470433, "learning_rate": 1.934404061267273e-05, "loss": 0.2847, "step": 19605 }, { "epoch": 2.29, "grad_norm": 0.9486630028808671, "learning_rate": 1.9332868256898447e-05, "loss": 0.2703, "step": 19610 }, { "epoch": 2.29, "grad_norm": 0.9180339767802568, "learning_rate": 1.932169709411477e-05, "loss": 0.3165, "step": 19615 }, { "epoch": 2.29, "grad_norm": 0.9960696458027846, "learning_rate": 1.931052712667332e-05, "loss": 0.2882, "step": 19620 }, { "epoch": 2.29, "grad_norm": 0.9388677311214171, "learning_rate": 1.9299358356925517e-05, "loss": 0.317, "step": 19625 }, { "epoch": 2.29, "grad_norm": 0.8693859957602147, "learning_rate": 1.928819078722249e-05, "loss": 0.3413, "step": 19630 }, { "epoch": 2.29, "grad_norm": 0.9481992395450557, "learning_rate": 1.927702441991514e-05, "loss": 0.3446, "step": 19635 }, { "epoch": 2.29, "grad_norm": 0.9603315436364415, "learning_rate": 1.9265859257354084e-05, "loss": 0.3323, "step": 19640 }, { "epoch": 2.3, "grad_norm": 1.0232614614090032, "learning_rate": 1.925469530188973e-05, "loss": 0.3099, "step": 19645 }, { "epoch": 2.3, "grad_norm": 0.871830739955941, "learning_rate": 1.924353255587219e-05, "loss": 0.3054, "step": 19650 }, { "epoch": 2.3, "grad_norm": 0.8832566520955866, "learning_rate": 1.9232371021651348e-05, "loss": 0.322, "step": 19655 }, { "epoch": 2.3, "grad_norm": 0.9049802543319408, "learning_rate": 1.922121070157683e-05, "loss": 0.3193, "step": 19660 }, { "epoch": 2.3, "grad_norm": 0.9497730070594884, "learning_rate": 1.921005159799798e-05, "loss": 0.359, "step": 19665 }, { "epoch": 2.3, "grad_norm": 0.7390709369195414, "learning_rate": 1.9198893713263915e-05, "loss": 0.3002, "step": 19670 }, { "epoch": 2.3, "grad_norm": 0.9233609742716121, "learning_rate": 1.9187737049723498e-05, "loss": 0.3276, "step": 19675 }, { "epoch": 2.3, "grad_norm": 0.9162390278085042, "learning_rate": 1.9176581609725307e-05, "loss": 0.2535, "step": 19680 }, { "epoch": 2.3, "grad_norm": 0.9043474635288291, "learning_rate": 1.9165427395617704e-05, "loss": 0.3366, "step": 19685 }, { "epoch": 2.3, "grad_norm": 1.041571717313108, "learning_rate": 1.9154274409748742e-05, "loss": 0.3126, "step": 19690 }, { "epoch": 2.3, "grad_norm": 1.0242304000187896, "learning_rate": 1.914312265446624e-05, "loss": 0.2696, "step": 19695 }, { "epoch": 2.3, "grad_norm": 0.9500210492750076, "learning_rate": 1.9131972132117776e-05, "loss": 0.3057, "step": 19700 }, { "epoch": 2.3, "grad_norm": 0.9595355388664257, "learning_rate": 1.912082284505065e-05, "loss": 0.3239, "step": 19705 }, { "epoch": 2.3, "grad_norm": 0.9843058650941644, "learning_rate": 1.9109674795611898e-05, "loss": 0.295, "step": 19710 }, { "epoch": 2.3, "grad_norm": 0.9497760161615868, "learning_rate": 1.9098527986148294e-05, "loss": 0.3141, "step": 19715 }, { "epoch": 2.3, "grad_norm": 0.9319852658392956, "learning_rate": 1.908738241900638e-05, "loss": 0.3126, "step": 19720 }, { "epoch": 2.3, "grad_norm": 0.9517394251860755, "learning_rate": 1.9076238096532387e-05, "loss": 0.3052, "step": 19725 }, { "epoch": 2.31, "grad_norm": 0.8776288579568556, "learning_rate": 1.9065095021072338e-05, "loss": 0.3147, "step": 19730 }, { "epoch": 2.31, "grad_norm": 0.9956225821780308, "learning_rate": 1.9053953194971947e-05, "loss": 0.275, "step": 19735 }, { "epoch": 2.31, "grad_norm": 0.9430343748420174, "learning_rate": 1.9042812620576693e-05, "loss": 0.3057, "step": 19740 }, { "epoch": 2.31, "grad_norm": 0.92481447819531, "learning_rate": 1.9031673300231782e-05, "loss": 0.3181, "step": 19745 }, { "epoch": 2.31, "grad_norm": 0.8562383790311145, "learning_rate": 1.902053523628216e-05, "loss": 0.3391, "step": 19750 }, { "epoch": 2.31, "grad_norm": 0.8178204357095703, "learning_rate": 1.900939843107251e-05, "loss": 0.2893, "step": 19755 }, { "epoch": 2.31, "grad_norm": 1.0002941132167138, "learning_rate": 1.899826288694724e-05, "loss": 0.3398, "step": 19760 }, { "epoch": 2.31, "grad_norm": 0.9376293134034597, "learning_rate": 1.8987128606250496e-05, "loss": 0.3366, "step": 19765 }, { "epoch": 2.31, "grad_norm": 1.9937934518182798, "learning_rate": 1.8975995591326168e-05, "loss": 0.3415, "step": 19770 }, { "epoch": 2.31, "grad_norm": 0.9731590454269231, "learning_rate": 1.8964863844517873e-05, "loss": 0.3192, "step": 19775 }, { "epoch": 2.31, "grad_norm": 0.9642478291439845, "learning_rate": 1.895373336816895e-05, "loss": 0.3168, "step": 19780 }, { "epoch": 2.31, "grad_norm": 1.0842544573081947, "learning_rate": 1.8942604164622484e-05, "loss": 0.2827, "step": 19785 }, { "epoch": 2.31, "grad_norm": 0.8549012298612148, "learning_rate": 1.8931476236221298e-05, "loss": 0.3364, "step": 19790 }, { "epoch": 2.31, "grad_norm": 1.1556458673996979, "learning_rate": 1.8920349585307925e-05, "loss": 0.2749, "step": 19795 }, { "epoch": 2.31, "grad_norm": 0.7874059307206615, "learning_rate": 1.8909224214224662e-05, "loss": 0.3078, "step": 19800 }, { "epoch": 2.31, "grad_norm": 0.862194654250148, "learning_rate": 1.8898100125313494e-05, "loss": 0.2583, "step": 19805 }, { "epoch": 2.31, "grad_norm": 0.8802550167148678, "learning_rate": 1.8886977320916163e-05, "loss": 0.3166, "step": 19810 }, { "epoch": 2.32, "grad_norm": 0.807661318717498, "learning_rate": 1.887585580337414e-05, "loss": 0.2529, "step": 19815 }, { "epoch": 2.32, "grad_norm": 0.9838482239265266, "learning_rate": 1.8864735575028623e-05, "loss": 0.3496, "step": 19820 }, { "epoch": 2.32, "grad_norm": 0.9378745616535644, "learning_rate": 1.885361663822054e-05, "loss": 0.2921, "step": 19825 }, { "epoch": 2.32, "grad_norm": 0.9683712557290653, "learning_rate": 1.884249899529053e-05, "loss": 0.3071, "step": 19830 }, { "epoch": 2.32, "grad_norm": 0.8797188305993103, "learning_rate": 1.883138264857898e-05, "loss": 0.3515, "step": 19835 }, { "epoch": 2.32, "grad_norm": 0.923157112158475, "learning_rate": 1.8820267600425996e-05, "loss": 0.3242, "step": 19840 }, { "epoch": 2.32, "grad_norm": 1.0043636550225985, "learning_rate": 1.8809153853171426e-05, "loss": 0.3603, "step": 19845 }, { "epoch": 2.32, "grad_norm": 0.884430837449619, "learning_rate": 1.8798041409154807e-05, "loss": 0.3015, "step": 19850 }, { "epoch": 2.32, "grad_norm": 0.8884575530989697, "learning_rate": 1.878693027071544e-05, "loss": 0.2682, "step": 19855 }, { "epoch": 2.32, "grad_norm": 0.8892073683106652, "learning_rate": 1.8775820440192332e-05, "loss": 0.2965, "step": 19860 }, { "epoch": 2.32, "grad_norm": 0.9757747820687703, "learning_rate": 1.8764711919924215e-05, "loss": 0.347, "step": 19865 }, { "epoch": 2.32, "grad_norm": 1.1857940083995246, "learning_rate": 1.8753604712249565e-05, "loss": 0.3276, "step": 19870 }, { "epoch": 2.32, "grad_norm": 0.780213776198026, "learning_rate": 1.874249881950655e-05, "loss": 0.2825, "step": 19875 }, { "epoch": 2.32, "grad_norm": 0.8247014195197875, "learning_rate": 1.873139424403307e-05, "loss": 0.3045, "step": 19880 }, { "epoch": 2.32, "grad_norm": 0.9065511886311877, "learning_rate": 1.8720290988166773e-05, "loss": 0.3149, "step": 19885 }, { "epoch": 2.32, "grad_norm": 0.9543313082655942, "learning_rate": 1.8709189054244996e-05, "loss": 0.3119, "step": 19890 }, { "epoch": 2.32, "grad_norm": 0.9439106028514123, "learning_rate": 1.8698088444604833e-05, "loss": 0.3012, "step": 19895 }, { "epoch": 2.33, "grad_norm": 1.2122520388016078, "learning_rate": 1.8686989161583053e-05, "loss": 0.3253, "step": 19900 }, { "epoch": 2.33, "grad_norm": 0.9243484416547644, "learning_rate": 1.8675891207516185e-05, "loss": 0.3067, "step": 19905 }, { "epoch": 2.33, "grad_norm": 0.8879016294564582, "learning_rate": 1.866479458474046e-05, "loss": 0.3367, "step": 19910 }, { "epoch": 2.33, "grad_norm": 0.9467859236463569, "learning_rate": 1.8653699295591843e-05, "loss": 0.3168, "step": 19915 }, { "epoch": 2.33, "grad_norm": 0.8253396517583389, "learning_rate": 1.8642605342405998e-05, "loss": 0.3012, "step": 19920 }, { "epoch": 2.33, "grad_norm": 0.897623523671182, "learning_rate": 1.863151272751831e-05, "loss": 0.2899, "step": 19925 }, { "epoch": 2.33, "grad_norm": 0.9663078010236645, "learning_rate": 1.8620421453263915e-05, "loss": 0.3093, "step": 19930 }, { "epoch": 2.33, "grad_norm": 1.1058123989036994, "learning_rate": 1.8609331521977623e-05, "loss": 0.2874, "step": 19935 }, { "epoch": 2.33, "grad_norm": 0.979926423581943, "learning_rate": 1.8598242935994e-05, "loss": 0.3074, "step": 19940 }, { "epoch": 2.33, "grad_norm": 0.9119635970877846, "learning_rate": 1.8587155697647283e-05, "loss": 0.2864, "step": 19945 }, { "epoch": 2.33, "grad_norm": 1.0677179479240284, "learning_rate": 1.8576069809271475e-05, "loss": 0.3143, "step": 19950 }, { "epoch": 2.33, "grad_norm": 1.0704260422345384, "learning_rate": 1.8564985273200253e-05, "loss": 0.2875, "step": 19955 }, { "epoch": 2.33, "grad_norm": 1.0827876965857768, "learning_rate": 1.855390209176704e-05, "loss": 0.3595, "step": 19960 }, { "epoch": 2.33, "grad_norm": 1.7358631958751243, "learning_rate": 1.8542820267304966e-05, "loss": 0.2646, "step": 19965 }, { "epoch": 2.33, "grad_norm": 0.9378841438888215, "learning_rate": 1.853173980214686e-05, "loss": 0.3298, "step": 19970 }, { "epoch": 2.33, "grad_norm": 0.9520672581606249, "learning_rate": 1.8520660698625275e-05, "loss": 0.3034, "step": 19975 }, { "epoch": 2.33, "grad_norm": 0.9944129246574176, "learning_rate": 1.8509582959072486e-05, "loss": 0.3381, "step": 19980 }, { "epoch": 2.33, "grad_norm": 0.9070642683200693, "learning_rate": 1.849850658582048e-05, "loss": 0.2888, "step": 19985 }, { "epoch": 2.34, "grad_norm": 1.0002980590119825, "learning_rate": 1.8487431581200933e-05, "loss": 0.3067, "step": 19990 }, { "epoch": 2.34, "grad_norm": 0.9572080732898964, "learning_rate": 1.8476357947545252e-05, "loss": 0.2876, "step": 19995 }, { "epoch": 2.34, "grad_norm": 1.17871576249345, "learning_rate": 1.8465285687184562e-05, "loss": 0.3532, "step": 20000 }, { "epoch": 2.34, "grad_norm": 1.0592207583832656, "learning_rate": 1.845421480244969e-05, "loss": 0.352, "step": 20005 }, { "epoch": 2.34, "grad_norm": 0.9706162294261611, "learning_rate": 1.8443145295671165e-05, "loss": 0.3119, "step": 20010 }, { "epoch": 2.34, "grad_norm": 0.806236961695897, "learning_rate": 1.8432077169179235e-05, "loss": 0.2668, "step": 20015 }, { "epoch": 2.34, "grad_norm": 0.8750205284897073, "learning_rate": 1.8421010425303865e-05, "loss": 0.3402, "step": 20020 }, { "epoch": 2.34, "grad_norm": 0.9711022131903482, "learning_rate": 1.8409945066374706e-05, "loss": 0.3262, "step": 20025 }, { "epoch": 2.34, "grad_norm": 0.8227846744115173, "learning_rate": 1.839888109472115e-05, "loss": 0.2799, "step": 20030 }, { "epoch": 2.34, "grad_norm": 0.9430191483338972, "learning_rate": 1.838781851267227e-05, "loss": 0.3193, "step": 20035 }, { "epoch": 2.34, "grad_norm": 0.8561815774260501, "learning_rate": 1.8376757322556847e-05, "loss": 0.2757, "step": 20040 }, { "epoch": 2.34, "grad_norm": 1.1202063880113722, "learning_rate": 1.8365697526703384e-05, "loss": 0.3899, "step": 20045 }, { "epoch": 2.34, "grad_norm": 0.9679879927630963, "learning_rate": 1.8354639127440097e-05, "loss": 0.337, "step": 20050 }, { "epoch": 2.34, "grad_norm": 0.8964955923152649, "learning_rate": 1.8343582127094884e-05, "loss": 0.3162, "step": 20055 }, { "epoch": 2.34, "grad_norm": 0.8628627585749146, "learning_rate": 1.8332526527995347e-05, "loss": 0.3038, "step": 20060 }, { "epoch": 2.34, "grad_norm": 0.7741338316261027, "learning_rate": 1.8321472332468825e-05, "loss": 0.306, "step": 20065 }, { "epoch": 2.34, "grad_norm": 0.8581152877946427, "learning_rate": 1.8310419542842327e-05, "loss": 0.351, "step": 20070 }, { "epoch": 2.35, "grad_norm": 1.259252654960738, "learning_rate": 1.8299368161442588e-05, "loss": 0.3686, "step": 20075 }, { "epoch": 2.35, "grad_norm": 0.9003352820308149, "learning_rate": 1.828831819059605e-05, "loss": 0.2956, "step": 20080 }, { "epoch": 2.35, "grad_norm": 0.898095328432729, "learning_rate": 1.827726963262883e-05, "loss": 0.2842, "step": 20085 }, { "epoch": 2.35, "grad_norm": 0.8104156384289827, "learning_rate": 1.8266222489866775e-05, "loss": 0.319, "step": 20090 }, { "epoch": 2.35, "grad_norm": 0.9225468555985851, "learning_rate": 1.8255176764635414e-05, "loss": 0.2555, "step": 20095 }, { "epoch": 2.35, "grad_norm": 0.9545276851350571, "learning_rate": 1.824413245926e-05, "loss": 0.2826, "step": 20100 }, { "epoch": 2.35, "grad_norm": 0.9444292032530351, "learning_rate": 1.823308957606548e-05, "loss": 0.2617, "step": 20105 }, { "epoch": 2.35, "grad_norm": 0.9305023289632784, "learning_rate": 1.8222048117376477e-05, "loss": 0.3235, "step": 20110 }, { "epoch": 2.35, "grad_norm": 0.925720308203352, "learning_rate": 1.821100808551735e-05, "loss": 0.3112, "step": 20115 }, { "epoch": 2.35, "grad_norm": 1.0307969690814232, "learning_rate": 1.8199969482812124e-05, "loss": 0.3259, "step": 20120 }, { "epoch": 2.35, "grad_norm": 0.8948437195312565, "learning_rate": 1.8188932311584565e-05, "loss": 0.2991, "step": 20125 }, { "epoch": 2.35, "grad_norm": 0.9424291443738005, "learning_rate": 1.817789657415809e-05, "loss": 0.296, "step": 20130 }, { "epoch": 2.35, "grad_norm": 0.9862261356785185, "learning_rate": 1.816686227285585e-05, "loss": 0.3486, "step": 20135 }, { "epoch": 2.35, "grad_norm": 0.759140054054554, "learning_rate": 1.815582941000068e-05, "loss": 0.2994, "step": 20140 }, { "epoch": 2.35, "grad_norm": 0.9512712707924539, "learning_rate": 1.814479798791511e-05, "loss": 0.2484, "step": 20145 }, { "epoch": 2.35, "grad_norm": 0.9188747932844091, "learning_rate": 1.813376800892138e-05, "loss": 0.3367, "step": 20150 }, { "epoch": 2.35, "grad_norm": 0.9277092399760558, "learning_rate": 1.8122739475341394e-05, "loss": 0.3215, "step": 20155 }, { "epoch": 2.36, "grad_norm": 0.9746324153220186, "learning_rate": 1.811171238949679e-05, "loss": 0.36, "step": 20160 }, { "epoch": 2.36, "grad_norm": 0.9308516314055012, "learning_rate": 1.8100686753708883e-05, "loss": 0.2864, "step": 20165 }, { "epoch": 2.36, "grad_norm": 0.8672692702521405, "learning_rate": 1.808966257029868e-05, "loss": 0.3292, "step": 20170 }, { "epoch": 2.36, "grad_norm": 1.0259830739178795, "learning_rate": 1.80786398415869e-05, "loss": 0.3078, "step": 20175 }, { "epoch": 2.36, "grad_norm": 0.8356647581383919, "learning_rate": 1.8067618569893934e-05, "loss": 0.3183, "step": 20180 }, { "epoch": 2.36, "grad_norm": 0.9135833041312057, "learning_rate": 1.8056598757539868e-05, "loss": 0.3449, "step": 20185 }, { "epoch": 2.36, "grad_norm": 1.036834507167456, "learning_rate": 1.8045580406844498e-05, "loss": 0.3243, "step": 20190 }, { "epoch": 2.36, "grad_norm": 0.9362926626029245, "learning_rate": 1.8034563520127307e-05, "loss": 0.3352, "step": 20195 }, { "epoch": 2.36, "grad_norm": 0.8872973538161422, "learning_rate": 1.802354809970745e-05, "loss": 0.3348, "step": 20200 }, { "epoch": 2.36, "grad_norm": 0.928004858577566, "learning_rate": 1.801253414790379e-05, "loss": 0.2836, "step": 20205 }, { "epoch": 2.36, "grad_norm": 0.8778173207054601, "learning_rate": 1.800152166703489e-05, "loss": 0.2704, "step": 20210 }, { "epoch": 2.36, "grad_norm": 0.9205221847500501, "learning_rate": 1.7990510659418992e-05, "loss": 0.2771, "step": 20215 }, { "epoch": 2.36, "grad_norm": 0.9048422029957066, "learning_rate": 1.7979501127374025e-05, "loss": 0.3259, "step": 20220 }, { "epoch": 2.36, "grad_norm": 0.771507383270431, "learning_rate": 1.7968493073217603e-05, "loss": 0.312, "step": 20225 }, { "epoch": 2.36, "grad_norm": 0.9524902863447129, "learning_rate": 1.7957486499267044e-05, "loss": 0.3293, "step": 20230 }, { "epoch": 2.36, "grad_norm": 1.0341427448944533, "learning_rate": 1.794648140783935e-05, "loss": 0.3011, "step": 20235 }, { "epoch": 2.36, "grad_norm": 0.8530488169788732, "learning_rate": 1.7935477801251198e-05, "loss": 0.2806, "step": 20240 }, { "epoch": 2.37, "grad_norm": 0.8850242125129613, "learning_rate": 1.792447568181898e-05, "loss": 0.2993, "step": 20245 }, { "epoch": 2.37, "grad_norm": 1.1175102612321604, "learning_rate": 1.7913475051858744e-05, "loss": 0.3698, "step": 20250 }, { "epoch": 2.37, "grad_norm": 0.930189418267071, "learning_rate": 1.7902475913686233e-05, "loss": 0.3069, "step": 20255 }, { "epoch": 2.37, "grad_norm": 1.043525356659347, "learning_rate": 1.7891478269616895e-05, "loss": 0.3052, "step": 20260 }, { "epoch": 2.37, "grad_norm": 1.0077921462624668, "learning_rate": 1.7880482121965846e-05, "loss": 0.3247, "step": 20265 }, { "epoch": 2.37, "grad_norm": 0.99922877145686, "learning_rate": 1.786948747304788e-05, "loss": 0.2995, "step": 20270 }, { "epoch": 2.37, "grad_norm": 0.9261530706677262, "learning_rate": 1.785849432517749e-05, "loss": 0.3458, "step": 20275 }, { "epoch": 2.37, "grad_norm": 0.928385961082353, "learning_rate": 1.7847502680668853e-05, "loss": 0.2871, "step": 20280 }, { "epoch": 2.37, "grad_norm": 0.9707647471317726, "learning_rate": 1.783651254183582e-05, "loss": 0.3758, "step": 20285 }, { "epoch": 2.37, "grad_norm": 0.9544664370722635, "learning_rate": 1.7825523910991948e-05, "loss": 0.3155, "step": 20290 }, { "epoch": 2.37, "grad_norm": 1.0412653462424266, "learning_rate": 1.7814536790450437e-05, "loss": 0.3145, "step": 20295 }, { "epoch": 2.37, "grad_norm": 0.8931060674317187, "learning_rate": 1.780355118252419e-05, "loss": 0.3357, "step": 20300 }, { "epoch": 2.37, "grad_norm": 0.9414105831261608, "learning_rate": 1.779256708952581e-05, "loss": 0.3354, "step": 20305 }, { "epoch": 2.37, "grad_norm": 0.875494951333337, "learning_rate": 1.7781584513767552e-05, "loss": 0.2973, "step": 20310 }, { "epoch": 2.37, "grad_norm": 0.901421317662818, "learning_rate": 1.7770603457561374e-05, "loss": 0.285, "step": 20315 }, { "epoch": 2.37, "grad_norm": 1.0382276614992962, "learning_rate": 1.7759623923218887e-05, "loss": 0.274, "step": 20320 }, { "epoch": 2.37, "grad_norm": 0.9712509780882456, "learning_rate": 1.7748645913051408e-05, "loss": 0.3247, "step": 20325 }, { "epoch": 2.38, "grad_norm": 0.9923133228320421, "learning_rate": 1.7737669429369925e-05, "loss": 0.3054, "step": 20330 }, { "epoch": 2.38, "grad_norm": 1.0475887270092925, "learning_rate": 1.77266944744851e-05, "loss": 0.334, "step": 20335 }, { "epoch": 2.38, "grad_norm": 0.9223358575970703, "learning_rate": 1.771572105070727e-05, "loss": 0.3115, "step": 20340 }, { "epoch": 2.38, "grad_norm": 0.9961148002246296, "learning_rate": 1.7704749160346465e-05, "loss": 0.3091, "step": 20345 }, { "epoch": 2.38, "grad_norm": 1.1185843876837405, "learning_rate": 1.7693778805712376e-05, "loss": 0.3235, "step": 20350 }, { "epoch": 2.38, "grad_norm": 0.9868665116080396, "learning_rate": 1.7682809989114373e-05, "loss": 0.3396, "step": 20355 }, { "epoch": 2.38, "grad_norm": 0.9719937218883374, "learning_rate": 1.767184271286153e-05, "loss": 0.2979, "step": 20360 }, { "epoch": 2.38, "grad_norm": 0.8678706609228284, "learning_rate": 1.7660876979262552e-05, "loss": 0.31, "step": 20365 }, { "epoch": 2.38, "grad_norm": 0.9225186965328284, "learning_rate": 1.7649912790625843e-05, "loss": 0.2641, "step": 20370 }, { "epoch": 2.38, "grad_norm": 1.0249936988213915, "learning_rate": 1.7638950149259485e-05, "loss": 0.336, "step": 20375 }, { "epoch": 2.38, "grad_norm": 0.7492566501317238, "learning_rate": 1.7627989057471227e-05, "loss": 0.2871, "step": 20380 }, { "epoch": 2.38, "grad_norm": 0.9753784196827595, "learning_rate": 1.7617029517568502e-05, "loss": 0.3094, "step": 20385 }, { "epoch": 2.38, "grad_norm": 0.9774791800120136, "learning_rate": 1.7606071531858386e-05, "loss": 0.304, "step": 20390 }, { "epoch": 2.38, "grad_norm": 0.854841108571231, "learning_rate": 1.7595115102647672e-05, "loss": 0.3253, "step": 20395 }, { "epoch": 2.38, "grad_norm": 0.973166471733524, "learning_rate": 1.7584160232242784e-05, "loss": 0.3098, "step": 20400 }, { "epoch": 2.38, "grad_norm": 0.9790106172918394, "learning_rate": 1.7573206922949864e-05, "loss": 0.3505, "step": 20405 }, { "epoch": 2.38, "grad_norm": 1.0426348851970602, "learning_rate": 1.7562255177074677e-05, "loss": 0.305, "step": 20410 }, { "epoch": 2.39, "grad_norm": 0.9537398421609526, "learning_rate": 1.7551304996922675e-05, "loss": 0.3112, "step": 20415 }, { "epoch": 2.39, "grad_norm": 1.4043081186463895, "learning_rate": 1.7540356384798996e-05, "loss": 0.3744, "step": 20420 }, { "epoch": 2.39, "grad_norm": 0.8768677469798624, "learning_rate": 1.7529409343008445e-05, "loss": 0.3183, "step": 20425 }, { "epoch": 2.39, "grad_norm": 0.9103740863512297, "learning_rate": 1.7518463873855486e-05, "loss": 0.3253, "step": 20430 }, { "epoch": 2.39, "grad_norm": 0.9925106643874901, "learning_rate": 1.750751997964424e-05, "loss": 0.3322, "step": 20435 }, { "epoch": 2.39, "grad_norm": 0.9560424441306414, "learning_rate": 1.749657766267852e-05, "loss": 0.3543, "step": 20440 }, { "epoch": 2.39, "grad_norm": 0.8437797542296586, "learning_rate": 1.7485636925261806e-05, "loss": 0.3068, "step": 20445 }, { "epoch": 2.39, "grad_norm": 1.09168729894445, "learning_rate": 1.7474697769697222e-05, "loss": 0.3427, "step": 20450 }, { "epoch": 2.39, "grad_norm": 0.8503990414237921, "learning_rate": 1.7463760198287602e-05, "loss": 0.3394, "step": 20455 }, { "epoch": 2.39, "grad_norm": 0.8572862986765358, "learning_rate": 1.745282421333539e-05, "loss": 0.2855, "step": 20460 }, { "epoch": 2.39, "grad_norm": 0.951134552703443, "learning_rate": 1.7441889817142737e-05, "loss": 0.327, "step": 20465 }, { "epoch": 2.39, "grad_norm": 0.9700613902614589, "learning_rate": 1.7430957012011444e-05, "loss": 0.3073, "step": 20470 }, { "epoch": 2.39, "grad_norm": 0.9610966526333646, "learning_rate": 1.7420025800243e-05, "loss": 0.288, "step": 20475 }, { "epoch": 2.39, "grad_norm": 0.9680948412201371, "learning_rate": 1.7409096184138517e-05, "loss": 0.3166, "step": 20480 }, { "epoch": 2.39, "grad_norm": 0.8133368103559565, "learning_rate": 1.7398168165998798e-05, "loss": 0.322, "step": 20485 }, { "epoch": 2.39, "grad_norm": 0.9689577117767062, "learning_rate": 1.738724174812431e-05, "loss": 0.3244, "step": 20490 }, { "epoch": 2.39, "grad_norm": 1.0866970303455001, "learning_rate": 1.7376316932815174e-05, "loss": 0.3001, "step": 20495 }, { "epoch": 2.4, "grad_norm": 0.8133781800096883, "learning_rate": 1.7365393722371194e-05, "loss": 0.3048, "step": 20500 }, { "epoch": 2.4, "grad_norm": 0.8121343138556024, "learning_rate": 1.735447211909179e-05, "loss": 0.325, "step": 20505 }, { "epoch": 2.4, "grad_norm": 0.9108360070294981, "learning_rate": 1.73435521252761e-05, "loss": 0.3447, "step": 20510 }, { "epoch": 2.4, "grad_norm": 0.9053299857111836, "learning_rate": 1.7332633743222887e-05, "loss": 0.2491, "step": 20515 }, { "epoch": 2.4, "grad_norm": 0.9232985741818027, "learning_rate": 1.732171697523059e-05, "loss": 0.2873, "step": 20520 }, { "epoch": 2.4, "grad_norm": 1.0195852979904876, "learning_rate": 1.73108018235973e-05, "loss": 0.2974, "step": 20525 }, { "epoch": 2.4, "grad_norm": 1.087397788177389, "learning_rate": 1.7299888290620762e-05, "loss": 0.3484, "step": 20530 }, { "epoch": 2.4, "grad_norm": 0.9307975027803674, "learning_rate": 1.72889763785984e-05, "loss": 0.305, "step": 20535 }, { "epoch": 2.4, "grad_norm": 1.1617616823165626, "learning_rate": 1.7278066089827282e-05, "loss": 0.3136, "step": 20540 }, { "epoch": 2.4, "grad_norm": 1.3359904433851528, "learning_rate": 1.7267157426604148e-05, "loss": 0.3886, "step": 20545 }, { "epoch": 2.4, "grad_norm": 1.0804653042748416, "learning_rate": 1.725625039122536e-05, "loss": 0.313, "step": 20550 }, { "epoch": 2.4, "grad_norm": 0.9179991876521328, "learning_rate": 1.7245344985986987e-05, "loss": 0.3091, "step": 20555 }, { "epoch": 2.4, "grad_norm": 0.9345260839164207, "learning_rate": 1.7234441213184726e-05, "loss": 0.2925, "step": 20560 }, { "epoch": 2.4, "grad_norm": 0.9544697820682926, "learning_rate": 1.722353907511393e-05, "loss": 0.2916, "step": 20565 }, { "epoch": 2.4, "grad_norm": 1.136804425881183, "learning_rate": 1.7212638574069623e-05, "loss": 0.3242, "step": 20570 }, { "epoch": 2.4, "grad_norm": 0.9068613494483474, "learning_rate": 1.7201739712346464e-05, "loss": 0.2891, "step": 20575 }, { "epoch": 2.4, "grad_norm": 0.8867231952479596, "learning_rate": 1.719084249223878e-05, "loss": 0.3496, "step": 20580 }, { "epoch": 2.41, "grad_norm": 0.9129609215482772, "learning_rate": 1.7179946916040553e-05, "loss": 0.2766, "step": 20585 }, { "epoch": 2.41, "grad_norm": 0.7625365671053158, "learning_rate": 1.7169052986045413e-05, "loss": 0.2599, "step": 20590 }, { "epoch": 2.41, "grad_norm": 0.8844413078907383, "learning_rate": 1.7158160704546656e-05, "loss": 0.2618, "step": 20595 }, { "epoch": 2.41, "grad_norm": 0.8351459548422423, "learning_rate": 1.7147270073837203e-05, "loss": 0.2811, "step": 20600 }, { "epoch": 2.41, "grad_norm": 0.9604317844847353, "learning_rate": 1.7136381096209664e-05, "loss": 0.346, "step": 20605 }, { "epoch": 2.41, "grad_norm": 1.020036289302891, "learning_rate": 1.7125493773956265e-05, "loss": 0.2953, "step": 20610 }, { "epoch": 2.41, "grad_norm": 0.9252697031509793, "learning_rate": 1.7114608109368924e-05, "loss": 0.3301, "step": 20615 }, { "epoch": 2.41, "grad_norm": 0.8607711306172855, "learning_rate": 1.7103724104739167e-05, "loss": 0.3075, "step": 20620 }, { "epoch": 2.41, "grad_norm": 0.933881258289097, "learning_rate": 1.7092841762358203e-05, "loss": 0.2776, "step": 20625 }, { "epoch": 2.41, "grad_norm": 0.9013882227731405, "learning_rate": 1.7081961084516866e-05, "loss": 0.2868, "step": 20630 }, { "epoch": 2.41, "grad_norm": 0.9918354175891291, "learning_rate": 1.707108207350567e-05, "loss": 0.3237, "step": 20635 }, { "epoch": 2.41, "grad_norm": 1.019497834107873, "learning_rate": 1.7060204731614755e-05, "loss": 0.3267, "step": 20640 }, { "epoch": 2.41, "grad_norm": 0.9166277589657522, "learning_rate": 1.7049329061133907e-05, "loss": 0.3176, "step": 20645 }, { "epoch": 2.41, "grad_norm": 0.8899707757904882, "learning_rate": 1.7038455064352572e-05, "loss": 0.3125, "step": 20650 }, { "epoch": 2.41, "grad_norm": 0.8585113793155914, "learning_rate": 1.7027582743559843e-05, "loss": 0.3147, "step": 20655 }, { "epoch": 2.41, "grad_norm": 0.9388739924671904, "learning_rate": 1.7016712101044456e-05, "loss": 0.3305, "step": 20660 }, { "epoch": 2.41, "grad_norm": 0.9882207033822796, "learning_rate": 1.7005843139094807e-05, "loss": 0.3064, "step": 20665 }, { "epoch": 2.42, "grad_norm": 1.0144948178408904, "learning_rate": 1.69949758599989e-05, "loss": 0.3115, "step": 20670 }, { "epoch": 2.42, "grad_norm": 0.8036509267305074, "learning_rate": 1.698411026604444e-05, "loss": 0.2843, "step": 20675 }, { "epoch": 2.42, "grad_norm": 0.8517237684574767, "learning_rate": 1.6973246359518724e-05, "loss": 0.294, "step": 20680 }, { "epoch": 2.42, "grad_norm": 0.9234673618868314, "learning_rate": 1.6962384142708736e-05, "loss": 0.3341, "step": 20685 }, { "epoch": 2.42, "grad_norm": 0.8854195602713585, "learning_rate": 1.6951523617901084e-05, "loss": 0.3327, "step": 20690 }, { "epoch": 2.42, "grad_norm": 0.8830809344350237, "learning_rate": 1.6940664787382007e-05, "loss": 0.3364, "step": 20695 }, { "epoch": 2.42, "grad_norm": 0.9080807803233512, "learning_rate": 1.6929807653437412e-05, "loss": 0.3209, "step": 20700 }, { "epoch": 2.42, "grad_norm": 0.934693526725521, "learning_rate": 1.6918952218352846e-05, "loss": 0.3175, "step": 20705 }, { "epoch": 2.42, "grad_norm": 0.9434819447256206, "learning_rate": 1.6908098484413493e-05, "loss": 0.2822, "step": 20710 }, { "epoch": 2.42, "grad_norm": 0.8651975521694826, "learning_rate": 1.689724645390416e-05, "loss": 0.337, "step": 20715 }, { "epoch": 2.42, "grad_norm": 1.0821995155315314, "learning_rate": 1.688639612910933e-05, "loss": 0.285, "step": 20720 }, { "epoch": 2.42, "grad_norm": 0.9046286616752537, "learning_rate": 1.68755475123131e-05, "loss": 0.3192, "step": 20725 }, { "epoch": 2.42, "grad_norm": 0.9587134528252275, "learning_rate": 1.686470060579922e-05, "loss": 0.3072, "step": 20730 }, { "epoch": 2.42, "grad_norm": 0.9697384738696976, "learning_rate": 1.6853855411851087e-05, "loss": 0.3531, "step": 20735 }, { "epoch": 2.42, "grad_norm": 0.890594451563599, "learning_rate": 1.6843011932751717e-05, "loss": 0.3155, "step": 20740 }, { "epoch": 2.42, "grad_norm": 0.9752737066820358, "learning_rate": 1.6832170170783776e-05, "loss": 0.3066, "step": 20745 }, { "epoch": 2.42, "grad_norm": 0.9038019834976873, "learning_rate": 1.6821330128229573e-05, "loss": 0.2846, "step": 20750 }, { "epoch": 2.42, "grad_norm": 0.9688024861410557, "learning_rate": 1.681049180737105e-05, "loss": 0.2701, "step": 20755 }, { "epoch": 2.43, "grad_norm": 0.8357071365403306, "learning_rate": 1.679965521048979e-05, "loss": 0.2982, "step": 20760 }, { "epoch": 2.43, "grad_norm": 0.9845445436958881, "learning_rate": 1.6788820339866995e-05, "loss": 0.3069, "step": 20765 }, { "epoch": 2.43, "grad_norm": 0.822498152565524, "learning_rate": 1.677798719778354e-05, "loss": 0.2701, "step": 20770 }, { "epoch": 2.43, "grad_norm": 0.881184037213802, "learning_rate": 1.6767155786519895e-05, "loss": 0.3361, "step": 20775 }, { "epoch": 2.43, "grad_norm": 1.0960580204089654, "learning_rate": 1.675632610835621e-05, "loss": 0.3533, "step": 20780 }, { "epoch": 2.43, "grad_norm": 0.881113603312044, "learning_rate": 1.674549816557222e-05, "loss": 0.2994, "step": 20785 }, { "epoch": 2.43, "grad_norm": 0.8609811422833646, "learning_rate": 1.6734671960447333e-05, "loss": 0.3276, "step": 20790 }, { "epoch": 2.43, "grad_norm": 0.853432243819674, "learning_rate": 1.6723847495260573e-05, "loss": 0.3173, "step": 20795 }, { "epoch": 2.43, "grad_norm": 0.8940328699733874, "learning_rate": 1.6713024772290614e-05, "loss": 0.3047, "step": 20800 }, { "epoch": 2.43, "grad_norm": 1.0294205464621227, "learning_rate": 1.670220379381575e-05, "loss": 0.3541, "step": 20805 }, { "epoch": 2.43, "grad_norm": 0.9774537385969103, "learning_rate": 1.66913845621139e-05, "loss": 0.3288, "step": 20810 }, { "epoch": 2.43, "grad_norm": 0.9703909878845425, "learning_rate": 1.6680567079462625e-05, "loss": 0.3471, "step": 20815 }, { "epoch": 2.43, "grad_norm": 1.1556860907985436, "learning_rate": 1.6669751348139134e-05, "loss": 0.3527, "step": 20820 }, { "epoch": 2.43, "grad_norm": 0.9276049684644704, "learning_rate": 1.6658937370420236e-05, "loss": 0.3755, "step": 20825 }, { "epoch": 2.43, "grad_norm": 0.8648750472734364, "learning_rate": 1.6648125148582406e-05, "loss": 0.3026, "step": 20830 }, { "epoch": 2.43, "grad_norm": 0.8573433900894533, "learning_rate": 1.6637314684901713e-05, "loss": 0.3244, "step": 20835 }, { "epoch": 2.43, "grad_norm": 0.9443938285571626, "learning_rate": 1.6626505981653867e-05, "loss": 0.3236, "step": 20840 }, { "epoch": 2.44, "grad_norm": 0.9657790650448367, "learning_rate": 1.6615699041114225e-05, "loss": 0.3456, "step": 20845 }, { "epoch": 2.44, "grad_norm": 0.9753422846569205, "learning_rate": 1.6604893865557773e-05, "loss": 0.3278, "step": 20850 }, { "epoch": 2.44, "grad_norm": 1.2491490489624901, "learning_rate": 1.6594090457259092e-05, "loss": 0.3047, "step": 20855 }, { "epoch": 2.44, "grad_norm": 0.9474631513214848, "learning_rate": 1.658328881849242e-05, "loss": 0.3028, "step": 20860 }, { "epoch": 2.44, "grad_norm": 0.8847222692210336, "learning_rate": 1.6572488951531617e-05, "loss": 0.2928, "step": 20865 }, { "epoch": 2.44, "grad_norm": 0.7130706051034976, "learning_rate": 1.6561690858650164e-05, "loss": 0.2608, "step": 20870 }, { "epoch": 2.44, "grad_norm": 1.061097368136985, "learning_rate": 1.6550894542121186e-05, "loss": 0.3109, "step": 20875 }, { "epoch": 2.44, "grad_norm": 1.0742504512238875, "learning_rate": 1.6540100004217402e-05, "loss": 0.3064, "step": 20880 }, { "epoch": 2.44, "grad_norm": 0.9336147598275403, "learning_rate": 1.6529307247211187e-05, "loss": 0.2522, "step": 20885 }, { "epoch": 2.44, "grad_norm": 0.9896140210309206, "learning_rate": 1.6518516273374522e-05, "loss": 0.3639, "step": 20890 }, { "epoch": 2.44, "grad_norm": 0.8502843148498878, "learning_rate": 1.6507727084979026e-05, "loss": 0.3006, "step": 20895 }, { "epoch": 2.44, "grad_norm": 1.0080279193669577, "learning_rate": 1.649693968429594e-05, "loss": 0.3241, "step": 20900 }, { "epoch": 2.44, "grad_norm": 1.6558955517805702, "learning_rate": 1.6486154073596117e-05, "loss": 0.3661, "step": 20905 }, { "epoch": 2.44, "grad_norm": 1.048229699332212, "learning_rate": 1.6475370255150037e-05, "loss": 0.2927, "step": 20910 }, { "epoch": 2.44, "grad_norm": 0.9849249594950062, "learning_rate": 1.646458823122782e-05, "loss": 0.3282, "step": 20915 }, { "epoch": 2.44, "grad_norm": 0.7842460666991516, "learning_rate": 1.6453808004099192e-05, "loss": 0.3099, "step": 20920 }, { "epoch": 2.44, "grad_norm": 0.7856172373810463, "learning_rate": 1.644302957603349e-05, "loss": 0.2576, "step": 20925 }, { "epoch": 2.45, "grad_norm": 0.9245349575648175, "learning_rate": 1.6432252949299698e-05, "loss": 0.3118, "step": 20930 }, { "epoch": 2.45, "grad_norm": 0.8560391212522578, "learning_rate": 1.6421478126166406e-05, "loss": 0.2792, "step": 20935 }, { "epoch": 2.45, "grad_norm": 1.049366249690819, "learning_rate": 1.641070510890183e-05, "loss": 0.3484, "step": 20940 }, { "epoch": 2.45, "grad_norm": 0.8710677198901201, "learning_rate": 1.6399933899773805e-05, "loss": 0.2494, "step": 20945 }, { "epoch": 2.45, "grad_norm": 0.9297356372925153, "learning_rate": 1.6389164501049774e-05, "loss": 0.3285, "step": 20950 }, { "epoch": 2.45, "grad_norm": 0.9408150799930833, "learning_rate": 1.6378396914996812e-05, "loss": 0.3072, "step": 20955 }, { "epoch": 2.45, "grad_norm": 0.9654945901133778, "learning_rate": 1.6367631143881608e-05, "loss": 0.2907, "step": 20960 }, { "epoch": 2.45, "grad_norm": 0.916107020986903, "learning_rate": 1.6356867189970475e-05, "loss": 0.3095, "step": 20965 }, { "epoch": 2.45, "grad_norm": 0.8770344997279952, "learning_rate": 1.634610505552934e-05, "loss": 0.3104, "step": 20970 }, { "epoch": 2.45, "grad_norm": 1.0910499935207085, "learning_rate": 1.6335344742823737e-05, "loss": 0.3505, "step": 20975 }, { "epoch": 2.45, "grad_norm": 0.8453726345725098, "learning_rate": 1.6324586254118823e-05, "loss": 0.264, "step": 20980 }, { "epoch": 2.45, "grad_norm": 0.9731022217076088, "learning_rate": 1.6313829591679377e-05, "loss": 0.3277, "step": 20985 }, { "epoch": 2.45, "grad_norm": 0.8964074944333037, "learning_rate": 1.63030747577698e-05, "loss": 0.3091, "step": 20990 }, { "epoch": 2.45, "grad_norm": 0.9797503908378691, "learning_rate": 1.629232175465407e-05, "loss": 0.3036, "step": 20995 }, { "epoch": 2.45, "grad_norm": 1.1043463387354728, "learning_rate": 1.6281570584595833e-05, "loss": 0.3116, "step": 21000 }, { "epoch": 2.45, "grad_norm": 0.8566110963898634, "learning_rate": 1.6270821249858305e-05, "loss": 0.341, "step": 21005 }, { "epoch": 2.45, "grad_norm": 0.8645923223264744, "learning_rate": 1.6260073752704343e-05, "loss": 0.2899, "step": 21010 }, { "epoch": 2.46, "grad_norm": 0.9319372269066225, "learning_rate": 1.6249328095396415e-05, "loss": 0.3885, "step": 21015 }, { "epoch": 2.46, "grad_norm": 0.8348021237681238, "learning_rate": 1.6238584280196578e-05, "loss": 0.3243, "step": 21020 }, { "epoch": 2.46, "grad_norm": 1.2796334645012506, "learning_rate": 1.6227842309366525e-05, "loss": 0.3697, "step": 21025 }, { "epoch": 2.46, "grad_norm": 0.8753334106917359, "learning_rate": 1.6217102185167555e-05, "loss": 0.3334, "step": 21030 }, { "epoch": 2.46, "grad_norm": 1.0364566555468364, "learning_rate": 1.6206363909860573e-05, "loss": 0.3071, "step": 21035 }, { "epoch": 2.46, "grad_norm": 0.9702234932821351, "learning_rate": 1.619562748570611e-05, "loss": 0.3092, "step": 21040 }, { "epoch": 2.46, "grad_norm": 0.9433252560829067, "learning_rate": 1.618489291496427e-05, "loss": 0.2943, "step": 21045 }, { "epoch": 2.46, "grad_norm": 0.8497851155983013, "learning_rate": 1.617416019989482e-05, "loss": 0.3227, "step": 21050 }, { "epoch": 2.46, "grad_norm": 0.7951703507666193, "learning_rate": 1.6163429342757098e-05, "loss": 0.2961, "step": 21055 }, { "epoch": 2.46, "grad_norm": 0.9139584647556377, "learning_rate": 1.6152700345810063e-05, "loss": 0.3552, "step": 21060 }, { "epoch": 2.46, "grad_norm": 0.998207735343552, "learning_rate": 1.614197321131228e-05, "loss": 0.3026, "step": 21065 }, { "epoch": 2.46, "grad_norm": 0.894406830485271, "learning_rate": 1.6131247941521916e-05, "loss": 0.312, "step": 21070 }, { "epoch": 2.46, "grad_norm": 1.007765064582181, "learning_rate": 1.6120524538696757e-05, "loss": 0.3256, "step": 21075 }, { "epoch": 2.46, "grad_norm": 0.931676126096992, "learning_rate": 1.6109803005094203e-05, "loss": 0.3272, "step": 21080 }, { "epoch": 2.46, "grad_norm": 0.9227146381814008, "learning_rate": 1.6099083342971243e-05, "loss": 0.3193, "step": 21085 }, { "epoch": 2.46, "grad_norm": 1.3632590369105373, "learning_rate": 1.608836555458447e-05, "loss": 0.3132, "step": 21090 }, { "epoch": 2.46, "grad_norm": 1.228437500653841, "learning_rate": 1.6077649642190098e-05, "loss": 0.2778, "step": 21095 }, { "epoch": 2.47, "grad_norm": 1.255178901707754, "learning_rate": 1.6066935608043932e-05, "loss": 0.3171, "step": 21100 }, { "epoch": 2.47, "grad_norm": 1.094881540919093, "learning_rate": 1.6056223454401396e-05, "loss": 0.3329, "step": 21105 }, { "epoch": 2.47, "grad_norm": 0.8917136376999013, "learning_rate": 1.6045513183517518e-05, "loss": 0.3105, "step": 21110 }, { "epoch": 2.47, "grad_norm": 0.8138921819723233, "learning_rate": 1.6034804797646903e-05, "loss": 0.327, "step": 21115 }, { "epoch": 2.47, "grad_norm": 0.9964847694209485, "learning_rate": 1.602409829904379e-05, "loss": 0.3354, "step": 21120 }, { "epoch": 2.47, "grad_norm": 0.8926927176072407, "learning_rate": 1.6013393689962e-05, "loss": 0.2865, "step": 21125 }, { "epoch": 2.47, "grad_norm": 0.8478548005242057, "learning_rate": 1.600269097265499e-05, "loss": 0.2932, "step": 21130 }, { "epoch": 2.47, "grad_norm": 1.004624752413318, "learning_rate": 1.5991990149375763e-05, "loss": 0.2294, "step": 21135 }, { "epoch": 2.47, "grad_norm": 0.9891482440374775, "learning_rate": 1.5981291222376965e-05, "loss": 0.3007, "step": 21140 }, { "epoch": 2.47, "grad_norm": 0.9190798932379494, "learning_rate": 1.5970594193910842e-05, "loss": 0.304, "step": 21145 }, { "epoch": 2.47, "grad_norm": 1.058838624775201, "learning_rate": 1.5959899066229218e-05, "loss": 0.3432, "step": 21150 }, { "epoch": 2.47, "grad_norm": 0.8837833311307045, "learning_rate": 1.594920584158354e-05, "loss": 0.3194, "step": 21155 }, { "epoch": 2.47, "grad_norm": 0.9768447308248345, "learning_rate": 1.5938514522224836e-05, "loss": 0.2978, "step": 21160 }, { "epoch": 2.47, "grad_norm": 0.9130036586167442, "learning_rate": 1.5927825110403747e-05, "loss": 0.2978, "step": 21165 }, { "epoch": 2.47, "grad_norm": 1.0443342817975974, "learning_rate": 1.5917137608370496e-05, "loss": 0.3234, "step": 21170 }, { "epoch": 2.47, "grad_norm": 0.9358383767714993, "learning_rate": 1.5906452018374926e-05, "loss": 0.3153, "step": 21175 }, { "epoch": 2.47, "grad_norm": 0.9779753493468136, "learning_rate": 1.5895768342666464e-05, "loss": 0.3321, "step": 21180 }, { "epoch": 2.48, "grad_norm": 0.9531950154804638, "learning_rate": 1.5885086583494126e-05, "loss": 0.3092, "step": 21185 }, { "epoch": 2.48, "grad_norm": 0.9815328359454177, "learning_rate": 1.5874406743106536e-05, "loss": 0.3882, "step": 21190 }, { "epoch": 2.48, "grad_norm": 1.0536605715420615, "learning_rate": 1.5863728823751923e-05, "loss": 0.2998, "step": 21195 }, { "epoch": 2.48, "grad_norm": 0.9306718431105918, "learning_rate": 1.5853052827678096e-05, "loss": 0.2839, "step": 21200 }, { "epoch": 2.48, "grad_norm": 0.8420643194261997, "learning_rate": 1.5842378757132457e-05, "loss": 0.3093, "step": 21205 }, { "epoch": 2.48, "grad_norm": 1.011613633405876, "learning_rate": 1.583170661436202e-05, "loss": 0.2913, "step": 21210 }, { "epoch": 2.48, "grad_norm": 0.9154677363296995, "learning_rate": 1.5821036401613372e-05, "loss": 0.3225, "step": 21215 }, { "epoch": 2.48, "grad_norm": 1.0061547615463968, "learning_rate": 1.5810368121132707e-05, "loss": 0.3198, "step": 21220 }, { "epoch": 2.48, "grad_norm": 0.8333003271567757, "learning_rate": 1.5799701775165827e-05, "loss": 0.357, "step": 21225 }, { "epoch": 2.48, "grad_norm": 0.8620641799749553, "learning_rate": 1.5789037365958086e-05, "loss": 0.2813, "step": 21230 }, { "epoch": 2.48, "grad_norm": 0.9178676620739203, "learning_rate": 1.5778374895754462e-05, "loss": 0.3116, "step": 21235 }, { "epoch": 2.48, "grad_norm": 0.9689875611703933, "learning_rate": 1.576771436679952e-05, "loss": 0.3332, "step": 21240 }, { "epoch": 2.48, "grad_norm": 0.9341003327637706, "learning_rate": 1.5757055781337413e-05, "loss": 0.3163, "step": 21245 }, { "epoch": 2.48, "grad_norm": 0.9037989852739657, "learning_rate": 1.574639914161189e-05, "loss": 0.3519, "step": 21250 }, { "epoch": 2.48, "grad_norm": 0.8911504656732542, "learning_rate": 1.5735744449866265e-05, "loss": 0.3826, "step": 21255 }, { "epoch": 2.48, "grad_norm": 0.8536462281017833, "learning_rate": 1.5725091708343488e-05, "loss": 0.3162, "step": 21260 }, { "epoch": 2.48, "grad_norm": 0.8938295547254937, "learning_rate": 1.5714440919286046e-05, "loss": 0.3062, "step": 21265 }, { "epoch": 2.49, "grad_norm": 0.9444821201941503, "learning_rate": 1.570379208493607e-05, "loss": 0.2876, "step": 21270 }, { "epoch": 2.49, "grad_norm": 0.8476946156698107, "learning_rate": 1.5693145207535232e-05, "loss": 0.2514, "step": 21275 }, { "epoch": 2.49, "grad_norm": 1.1291391211192547, "learning_rate": 1.5682500289324812e-05, "loss": 0.2881, "step": 21280 }, { "epoch": 2.49, "grad_norm": 0.9809673886703361, "learning_rate": 1.5671857332545685e-05, "loss": 0.3014, "step": 21285 }, { "epoch": 2.49, "grad_norm": 0.8915515000075215, "learning_rate": 1.56612163394383e-05, "loss": 0.3185, "step": 21290 }, { "epoch": 2.49, "grad_norm": 0.9313460242350698, "learning_rate": 1.565057731224271e-05, "loss": 0.3252, "step": 21295 }, { "epoch": 2.49, "grad_norm": 0.7874113080840245, "learning_rate": 1.5639940253198516e-05, "loss": 0.296, "step": 21300 }, { "epoch": 2.49, "grad_norm": 0.8563869005385844, "learning_rate": 1.5629305164544946e-05, "loss": 0.2947, "step": 21305 }, { "epoch": 2.49, "grad_norm": 0.8183212889462009, "learning_rate": 1.5618672048520795e-05, "loss": 0.4039, "step": 21310 }, { "epoch": 2.49, "grad_norm": 0.9447373465993163, "learning_rate": 1.5608040907364448e-05, "loss": 0.3016, "step": 21315 }, { "epoch": 2.49, "grad_norm": 1.217664155674163, "learning_rate": 1.559741174331388e-05, "loss": 0.3035, "step": 21320 }, { "epoch": 2.49, "grad_norm": 0.9640455232239884, "learning_rate": 1.5586784558606622e-05, "loss": 0.347, "step": 21325 }, { "epoch": 2.49, "grad_norm": 0.995528036862888, "learning_rate": 1.5576159355479812e-05, "loss": 0.2933, "step": 21330 }, { "epoch": 2.49, "grad_norm": 0.8930067045422301, "learning_rate": 1.556553613617018e-05, "loss": 0.2754, "step": 21335 }, { "epoch": 2.49, "grad_norm": 0.9209789298288272, "learning_rate": 1.5554914902914015e-05, "loss": 0.3289, "step": 21340 }, { "epoch": 2.49, "grad_norm": 0.8976493568693282, "learning_rate": 1.5544295657947203e-05, "loss": 0.2918, "step": 21345 }, { "epoch": 2.49, "grad_norm": 1.0422036247979658, "learning_rate": 1.5533678403505195e-05, "loss": 0.3133, "step": 21350 }, { "epoch": 2.5, "grad_norm": 1.0630563397839716, "learning_rate": 1.5523063141823042e-05, "loss": 0.3355, "step": 21355 }, { "epoch": 2.5, "grad_norm": 0.841155753226513, "learning_rate": 1.551244987513537e-05, "loss": 0.2827, "step": 21360 }, { "epoch": 2.5, "grad_norm": 0.8899685364821089, "learning_rate": 1.5501838605676393e-05, "loss": 0.2858, "step": 21365 }, { "epoch": 2.5, "grad_norm": 0.9183498263743104, "learning_rate": 1.5491229335679866e-05, "loss": 0.2877, "step": 21370 }, { "epoch": 2.5, "grad_norm": 0.8818492997732088, "learning_rate": 1.5480622067379176e-05, "loss": 0.2936, "step": 21375 }, { "epoch": 2.5, "grad_norm": 1.098851486905534, "learning_rate": 1.5470016803007244e-05, "loss": 0.2858, "step": 21380 }, { "epoch": 2.5, "grad_norm": 1.001947808749853, "learning_rate": 1.5459413544796608e-05, "loss": 0.3084, "step": 21385 }, { "epoch": 2.5, "grad_norm": 0.8044809446530183, "learning_rate": 1.5448812294979366e-05, "loss": 0.3297, "step": 21390 }, { "epoch": 2.5, "grad_norm": 1.1354642131570167, "learning_rate": 1.5438213055787175e-05, "loss": 0.3572, "step": 21395 }, { "epoch": 2.5, "grad_norm": 0.9555562354743699, "learning_rate": 1.5427615829451293e-05, "loss": 0.3258, "step": 21400 }, { "epoch": 2.5, "grad_norm": 0.9041572980961791, "learning_rate": 1.5417020618202554e-05, "loss": 0.2744, "step": 21405 }, { "epoch": 2.5, "grad_norm": 0.92828666985902, "learning_rate": 1.540642742427136e-05, "loss": 0.3579, "step": 21410 }, { "epoch": 2.5, "grad_norm": 0.9714841916203468, "learning_rate": 1.5395836249887673e-05, "loss": 0.2795, "step": 21415 }, { "epoch": 2.5, "grad_norm": 0.9729645569409078, "learning_rate": 1.538524709728106e-05, "loss": 0.2857, "step": 21420 }, { "epoch": 2.5, "grad_norm": 0.9648888031954621, "learning_rate": 1.5374659968680656e-05, "loss": 0.3358, "step": 21425 }, { "epoch": 2.5, "grad_norm": 1.0237146928994092, "learning_rate": 1.5364074866315144e-05, "loss": 0.3014, "step": 21430 }, { "epoch": 2.5, "grad_norm": 1.0018081448364091, "learning_rate": 1.5353491792412817e-05, "loss": 0.3353, "step": 21435 }, { "epoch": 2.5, "grad_norm": 0.8784137250141396, "learning_rate": 1.5342910749201512e-05, "loss": 0.3098, "step": 21440 }, { "epoch": 2.51, "grad_norm": 0.8431315728926133, "learning_rate": 1.5332331738908643e-05, "loss": 0.2958, "step": 21445 }, { "epoch": 2.51, "grad_norm": 0.8431432780814623, "learning_rate": 1.5321754763761213e-05, "loss": 0.2835, "step": 21450 }, { "epoch": 2.51, "grad_norm": 0.9303355836360612, "learning_rate": 1.5311179825985787e-05, "loss": 0.3325, "step": 21455 }, { "epoch": 2.51, "grad_norm": 1.0197415995910337, "learning_rate": 1.5300606927808506e-05, "loss": 0.2789, "step": 21460 }, { "epoch": 2.51, "grad_norm": 0.8085242134872769, "learning_rate": 1.5290036071455055e-05, "loss": 0.3242, "step": 21465 }, { "epoch": 2.51, "grad_norm": 0.8451584232445656, "learning_rate": 1.5279467259150724e-05, "loss": 0.2614, "step": 21470 }, { "epoch": 2.51, "grad_norm": 1.2059538937613414, "learning_rate": 1.5268900493120358e-05, "loss": 0.2983, "step": 21475 }, { "epoch": 2.51, "grad_norm": 1.1868231319031395, "learning_rate": 1.5258335775588378e-05, "loss": 0.3298, "step": 21480 }, { "epoch": 2.51, "grad_norm": 0.9366140089236894, "learning_rate": 1.524777310877875e-05, "loss": 0.3165, "step": 21485 }, { "epoch": 2.51, "grad_norm": 0.8517976116392272, "learning_rate": 1.5237212494915042e-05, "loss": 0.328, "step": 21490 }, { "epoch": 2.51, "grad_norm": 0.8999219767268843, "learning_rate": 1.5226653936220364e-05, "loss": 0.3095, "step": 21495 }, { "epoch": 2.51, "grad_norm": 0.9716401143297234, "learning_rate": 1.5216097434917407e-05, "loss": 0.3172, "step": 21500 }, { "epoch": 2.51, "grad_norm": 0.9967004859855729, "learning_rate": 1.5205542993228434e-05, "loss": 0.3873, "step": 21505 }, { "epoch": 2.51, "grad_norm": 0.8556761493451421, "learning_rate": 1.5194990613375253e-05, "loss": 0.2538, "step": 21510 }, { "epoch": 2.51, "grad_norm": 0.8934369242786062, "learning_rate": 1.518444029757925e-05, "loss": 0.2548, "step": 21515 }, { "epoch": 2.51, "grad_norm": 0.7622862399215884, "learning_rate": 1.5173892048061383e-05, "loss": 0.2745, "step": 21520 }, { "epoch": 2.51, "grad_norm": 0.9950398043817247, "learning_rate": 1.5163345867042167e-05, "loss": 0.3396, "step": 21525 }, { "epoch": 2.52, "grad_norm": 0.9985447911690505, "learning_rate": 1.5152801756741691e-05, "loss": 0.3238, "step": 21530 }, { "epoch": 2.52, "grad_norm": 0.9748434500915857, "learning_rate": 1.5142259719379587e-05, "loss": 0.3188, "step": 21535 }, { "epoch": 2.52, "grad_norm": 1.0306792962321856, "learning_rate": 1.5131719757175076e-05, "loss": 0.3002, "step": 21540 }, { "epoch": 2.52, "grad_norm": 1.0281294746838727, "learning_rate": 1.5121181872346918e-05, "loss": 0.282, "step": 21545 }, { "epoch": 2.52, "grad_norm": 0.9616497316132993, "learning_rate": 1.5110646067113466e-05, "loss": 0.3332, "step": 21550 }, { "epoch": 2.52, "grad_norm": 0.8279731514348087, "learning_rate": 1.5100112343692604e-05, "loss": 0.3131, "step": 21555 }, { "epoch": 2.52, "grad_norm": 1.45107633871601, "learning_rate": 1.5089580704301787e-05, "loss": 0.308, "step": 21560 }, { "epoch": 2.52, "grad_norm": 0.9087351292104445, "learning_rate": 1.5079051151158044e-05, "loss": 0.2575, "step": 21565 }, { "epoch": 2.52, "grad_norm": 0.8484494290989507, "learning_rate": 1.5068523686477958e-05, "loss": 0.3036, "step": 21570 }, { "epoch": 2.52, "grad_norm": 0.899554809311599, "learning_rate": 1.5057998312477678e-05, "loss": 0.3071, "step": 21575 }, { "epoch": 2.52, "grad_norm": 0.8961586307425095, "learning_rate": 1.5047475031372878e-05, "loss": 0.308, "step": 21580 }, { "epoch": 2.52, "grad_norm": 0.9209952729958419, "learning_rate": 1.5036953845378838e-05, "loss": 0.2782, "step": 21585 }, { "epoch": 2.52, "grad_norm": 1.0869939772725805, "learning_rate": 1.5026434756710384e-05, "loss": 0.3754, "step": 21590 }, { "epoch": 2.52, "grad_norm": 0.9656953909379147, "learning_rate": 1.5015917767581878e-05, "loss": 0.3399, "step": 21595 }, { "epoch": 2.52, "grad_norm": 0.9976930024795567, "learning_rate": 1.5005402880207273e-05, "loss": 0.2385, "step": 21600 }, { "epoch": 2.52, "grad_norm": 0.8720208845712702, "learning_rate": 1.4994890096800052e-05, "loss": 0.3139, "step": 21605 }, { "epoch": 2.52, "grad_norm": 0.9752751614726635, "learning_rate": 1.498437941957326e-05, "loss": 0.3337, "step": 21610 }, { "epoch": 2.53, "grad_norm": 1.1258240022550678, "learning_rate": 1.4973870850739516e-05, "loss": 0.347, "step": 21615 }, { "epoch": 2.53, "grad_norm": 1.0253648923424215, "learning_rate": 1.496336439251099e-05, "loss": 0.3298, "step": 21620 }, { "epoch": 2.53, "grad_norm": 0.9862069058778715, "learning_rate": 1.4952860047099387e-05, "loss": 0.2947, "step": 21625 }, { "epoch": 2.53, "grad_norm": 0.8725384443656609, "learning_rate": 1.4942357816715981e-05, "loss": 0.2954, "step": 21630 }, { "epoch": 2.53, "grad_norm": 1.4489002970792106, "learning_rate": 1.4931857703571616e-05, "loss": 0.34, "step": 21635 }, { "epoch": 2.53, "grad_norm": 0.9150676855976392, "learning_rate": 1.4921359709876658e-05, "loss": 0.2853, "step": 21640 }, { "epoch": 2.53, "grad_norm": 0.8415966628341504, "learning_rate": 1.4910863837841068e-05, "loss": 0.3725, "step": 21645 }, { "epoch": 2.53, "grad_norm": 0.9018462955910234, "learning_rate": 1.4900370089674313e-05, "loss": 0.306, "step": 21650 }, { "epoch": 2.53, "grad_norm": 0.865625382019111, "learning_rate": 1.4889878467585449e-05, "loss": 0.2874, "step": 21655 }, { "epoch": 2.53, "grad_norm": 0.8711490168610587, "learning_rate": 1.4879388973783065e-05, "loss": 0.3086, "step": 21660 }, { "epoch": 2.53, "grad_norm": 0.8603117155347523, "learning_rate": 1.4868901610475322e-05, "loss": 0.33, "step": 21665 }, { "epoch": 2.53, "grad_norm": 0.9271282845041959, "learning_rate": 1.4858416379869917e-05, "loss": 0.3376, "step": 21670 }, { "epoch": 2.53, "grad_norm": 0.8589838925297786, "learning_rate": 1.4847933284174086e-05, "loss": 0.27, "step": 21675 }, { "epoch": 2.53, "grad_norm": 0.9557897100628673, "learning_rate": 1.4837452325594642e-05, "loss": 0.3324, "step": 21680 }, { "epoch": 2.53, "grad_norm": 1.0404946909050719, "learning_rate": 1.4826973506337937e-05, "loss": 0.338, "step": 21685 }, { "epoch": 2.53, "grad_norm": 0.9056108852906005, "learning_rate": 1.4816496828609878e-05, "loss": 0.3039, "step": 21690 }, { "epoch": 2.53, "grad_norm": 0.8622947791761709, "learning_rate": 1.4806022294615896e-05, "loss": 0.3446, "step": 21695 }, { "epoch": 2.54, "grad_norm": 1.0779904081132172, "learning_rate": 1.4795549906561005e-05, "loss": 0.3913, "step": 21700 }, { "epoch": 2.54, "grad_norm": 0.8984380911638086, "learning_rate": 1.4785079666649754e-05, "loss": 0.3157, "step": 21705 }, { "epoch": 2.54, "grad_norm": 1.0543290010377133, "learning_rate": 1.4774611577086226e-05, "loss": 0.3345, "step": 21710 }, { "epoch": 2.54, "grad_norm": 0.9341774053294449, "learning_rate": 1.4764145640074083e-05, "loss": 0.3099, "step": 21715 }, { "epoch": 2.54, "grad_norm": 0.9704998914701681, "learning_rate": 1.4753681857816499e-05, "loss": 0.3215, "step": 21720 }, { "epoch": 2.54, "grad_norm": 0.9637876248542727, "learning_rate": 1.4743220232516203e-05, "loss": 0.2813, "step": 21725 }, { "epoch": 2.54, "grad_norm": 0.9637558940049057, "learning_rate": 1.4732760766375491e-05, "loss": 0.3042, "step": 21730 }, { "epoch": 2.54, "grad_norm": 0.8846325978183377, "learning_rate": 1.472230346159619e-05, "loss": 0.2785, "step": 21735 }, { "epoch": 2.54, "grad_norm": 0.9453883799357359, "learning_rate": 1.4711848320379673e-05, "loss": 0.331, "step": 21740 }, { "epoch": 2.54, "grad_norm": 1.004157022319753, "learning_rate": 1.4701395344926843e-05, "loss": 0.2834, "step": 21745 }, { "epoch": 2.54, "grad_norm": 0.9172977530267649, "learning_rate": 1.4690944537438179e-05, "loss": 0.2968, "step": 21750 }, { "epoch": 2.54, "grad_norm": 0.8867169088187941, "learning_rate": 1.4680495900113667e-05, "loss": 0.3188, "step": 21755 }, { "epoch": 2.54, "grad_norm": 1.0909762921342887, "learning_rate": 1.467004943515287e-05, "loss": 0.3144, "step": 21760 }, { "epoch": 2.54, "grad_norm": 0.8780589425033255, "learning_rate": 1.4659605144754868e-05, "loss": 0.3529, "step": 21765 }, { "epoch": 2.54, "grad_norm": 0.9217936574749216, "learning_rate": 1.4649163031118302e-05, "loss": 0.3199, "step": 21770 }, { "epoch": 2.54, "grad_norm": 0.9703598843043927, "learning_rate": 1.4638723096441337e-05, "loss": 0.3449, "step": 21775 }, { "epoch": 2.54, "grad_norm": 0.9290620496605048, "learning_rate": 1.46282853429217e-05, "loss": 0.2795, "step": 21780 }, { "epoch": 2.55, "grad_norm": 0.9201341096369683, "learning_rate": 1.4617849772756642e-05, "loss": 0.3291, "step": 21785 }, { "epoch": 2.55, "grad_norm": 0.9117022379286077, "learning_rate": 1.4607416388142952e-05, "loss": 0.3016, "step": 21790 }, { "epoch": 2.55, "grad_norm": 1.204010209721409, "learning_rate": 1.4596985191276973e-05, "loss": 0.3136, "step": 21795 }, { "epoch": 2.55, "grad_norm": 0.8260889622901302, "learning_rate": 1.4586556184354583e-05, "loss": 0.301, "step": 21800 }, { "epoch": 2.55, "grad_norm": 0.9335207222344308, "learning_rate": 1.4576129369571207e-05, "loss": 0.2673, "step": 21805 }, { "epoch": 2.55, "grad_norm": 0.9442132899585794, "learning_rate": 1.4565704749121784e-05, "loss": 0.2842, "step": 21810 }, { "epoch": 2.55, "grad_norm": 0.858087870080883, "learning_rate": 1.4555282325200802e-05, "loss": 0.3121, "step": 21815 }, { "epoch": 2.55, "grad_norm": 0.8908228275451331, "learning_rate": 1.4544862100002298e-05, "loss": 0.3074, "step": 21820 }, { "epoch": 2.55, "grad_norm": 1.0055442115253612, "learning_rate": 1.4534444075719839e-05, "loss": 0.3184, "step": 21825 }, { "epoch": 2.55, "grad_norm": 0.9926824519971305, "learning_rate": 1.452402825454654e-05, "loss": 0.301, "step": 21830 }, { "epoch": 2.55, "grad_norm": 0.8926720005926836, "learning_rate": 1.4513614638675017e-05, "loss": 0.3001, "step": 21835 }, { "epoch": 2.55, "grad_norm": 0.8576112056427009, "learning_rate": 1.4503203230297462e-05, "loss": 0.3164, "step": 21840 }, { "epoch": 2.55, "grad_norm": 1.2227561495462187, "learning_rate": 1.4492794031605577e-05, "loss": 0.3562, "step": 21845 }, { "epoch": 2.55, "grad_norm": 0.8762471013867944, "learning_rate": 1.4482387044790607e-05, "loss": 0.3234, "step": 21850 }, { "epoch": 2.55, "grad_norm": 1.1274170474211664, "learning_rate": 1.4471982272043344e-05, "loss": 0.349, "step": 21855 }, { "epoch": 2.55, "grad_norm": 0.9190755223805522, "learning_rate": 1.4461579715554086e-05, "loss": 0.3633, "step": 21860 }, { "epoch": 2.55, "grad_norm": 0.9431066321046293, "learning_rate": 1.4451179377512685e-05, "loss": 0.3077, "step": 21865 }, { "epoch": 2.56, "grad_norm": 0.9766465029319518, "learning_rate": 1.4440781260108521e-05, "loss": 0.3168, "step": 21870 }, { "epoch": 2.56, "grad_norm": 0.980798157247656, "learning_rate": 1.4430385365530525e-05, "loss": 0.3461, "step": 21875 }, { "epoch": 2.56, "grad_norm": 0.9537709701778024, "learning_rate": 1.441999169596712e-05, "loss": 0.3398, "step": 21880 }, { "epoch": 2.56, "grad_norm": 1.0998677352492243, "learning_rate": 1.4409600253606279e-05, "loss": 0.2806, "step": 21885 }, { "epoch": 2.56, "grad_norm": 1.0949361133837958, "learning_rate": 1.4399211040635519e-05, "loss": 0.2775, "step": 21890 }, { "epoch": 2.56, "grad_norm": 0.9590641548640746, "learning_rate": 1.4388824059241877e-05, "loss": 0.2966, "step": 21895 }, { "epoch": 2.56, "grad_norm": 1.0177205487811838, "learning_rate": 1.437843931161193e-05, "loss": 0.2879, "step": 21900 }, { "epoch": 2.56, "grad_norm": 0.9492964748487662, "learning_rate": 1.4368056799931761e-05, "loss": 0.2671, "step": 21905 }, { "epoch": 2.56, "grad_norm": 0.8265834262847414, "learning_rate": 1.4357676526387011e-05, "loss": 0.3135, "step": 21910 }, { "epoch": 2.56, "grad_norm": 0.7950916529130719, "learning_rate": 1.4347298493162823e-05, "loss": 0.3439, "step": 21915 }, { "epoch": 2.56, "grad_norm": 0.8420633802320864, "learning_rate": 1.4336922702443886e-05, "loss": 0.356, "step": 21920 }, { "epoch": 2.56, "grad_norm": 0.8702770802355497, "learning_rate": 1.4326549156414425e-05, "loss": 0.3261, "step": 21925 }, { "epoch": 2.56, "grad_norm": 0.9036489919601084, "learning_rate": 1.4316177857258162e-05, "loss": 0.2752, "step": 21930 }, { "epoch": 2.56, "grad_norm": 0.8985751600578105, "learning_rate": 1.4305808807158371e-05, "loss": 0.3287, "step": 21935 }, { "epoch": 2.56, "grad_norm": 0.8925960711606314, "learning_rate": 1.4295442008297855e-05, "loss": 0.3474, "step": 21940 }, { "epoch": 2.56, "grad_norm": 0.8487347851618743, "learning_rate": 1.428507746285892e-05, "loss": 0.3373, "step": 21945 }, { "epoch": 2.56, "grad_norm": 0.8411604135176117, "learning_rate": 1.4274715173023424e-05, "loss": 0.2984, "step": 21950 }, { "epoch": 2.57, "grad_norm": 0.8968618547731546, "learning_rate": 1.426435514097272e-05, "loss": 0.2991, "step": 21955 }, { "epoch": 2.57, "grad_norm": 0.9819905059089123, "learning_rate": 1.4253997368887717e-05, "loss": 0.3157, "step": 21960 }, { "epoch": 2.57, "grad_norm": 0.9888725866185373, "learning_rate": 1.4243641858948828e-05, "loss": 0.3046, "step": 21965 }, { "epoch": 2.57, "grad_norm": 0.926040934764479, "learning_rate": 1.423328861333601e-05, "loss": 0.2801, "step": 21970 }, { "epoch": 2.57, "grad_norm": 1.0373093783367293, "learning_rate": 1.4222937634228723e-05, "loss": 0.3542, "step": 21975 }, { "epoch": 2.57, "grad_norm": 0.7467546127030646, "learning_rate": 1.4212588923805939e-05, "loss": 0.3083, "step": 21980 }, { "epoch": 2.57, "grad_norm": 0.9288160630406694, "learning_rate": 1.4202242484246187e-05, "loss": 0.2745, "step": 21985 }, { "epoch": 2.57, "grad_norm": 1.0316764878541007, "learning_rate": 1.4191898317727493e-05, "loss": 0.3658, "step": 21990 }, { "epoch": 2.57, "grad_norm": 0.7459196824500384, "learning_rate": 1.418155642642743e-05, "loss": 0.3142, "step": 21995 }, { "epoch": 2.57, "grad_norm": 0.9044159052033965, "learning_rate": 1.4171216812523053e-05, "loss": 0.2959, "step": 22000 }, { "epoch": 2.57, "grad_norm": 0.9539881445206426, "learning_rate": 1.4160879478190974e-05, "loss": 0.3374, "step": 22005 }, { "epoch": 2.57, "grad_norm": 0.9373581899158129, "learning_rate": 1.4150544425607298e-05, "loss": 0.3233, "step": 22010 }, { "epoch": 2.57, "grad_norm": 0.8690571068705678, "learning_rate": 1.4140211656947664e-05, "loss": 0.287, "step": 22015 }, { "epoch": 2.57, "grad_norm": 0.9674882461493011, "learning_rate": 1.4129881174387245e-05, "loss": 0.3475, "step": 22020 }, { "epoch": 2.57, "grad_norm": 0.8364071193852061, "learning_rate": 1.4119552980100686e-05, "loss": 0.2963, "step": 22025 }, { "epoch": 2.57, "grad_norm": 1.2259107520284953, "learning_rate": 1.4109227076262204e-05, "loss": 0.3318, "step": 22030 }, { "epoch": 2.57, "grad_norm": 0.8894802608889425, "learning_rate": 1.409890346504551e-05, "loss": 0.3229, "step": 22035 }, { "epoch": 2.58, "grad_norm": 1.0790495184870377, "learning_rate": 1.4088582148623821e-05, "loss": 0.3631, "step": 22040 }, { "epoch": 2.58, "grad_norm": 0.8795077450911526, "learning_rate": 1.4078263129169881e-05, "loss": 0.3219, "step": 22045 }, { "epoch": 2.58, "grad_norm": 0.8788546073802213, "learning_rate": 1.4067946408855953e-05, "loss": 0.3304, "step": 22050 }, { "epoch": 2.58, "grad_norm": 1.2314295934309314, "learning_rate": 1.405763198985382e-05, "loss": 0.3338, "step": 22055 }, { "epoch": 2.58, "grad_norm": 0.940266360497376, "learning_rate": 1.4047319874334775e-05, "loss": 0.3321, "step": 22060 }, { "epoch": 2.58, "grad_norm": 0.950986158184618, "learning_rate": 1.403701006446963e-05, "loss": 0.2729, "step": 22065 }, { "epoch": 2.58, "grad_norm": 0.8544533092020391, "learning_rate": 1.4026702562428706e-05, "loss": 0.3221, "step": 22070 }, { "epoch": 2.58, "grad_norm": 0.9276787665533494, "learning_rate": 1.4016397370381828e-05, "loss": 0.3282, "step": 22075 }, { "epoch": 2.58, "grad_norm": 0.9299458928615699, "learning_rate": 1.4006094490498357e-05, "loss": 0.3091, "step": 22080 }, { "epoch": 2.58, "grad_norm": 1.0240973407710294, "learning_rate": 1.3995793924947154e-05, "loss": 0.2674, "step": 22085 }, { "epoch": 2.58, "grad_norm": 0.9795739138584069, "learning_rate": 1.3985495675896606e-05, "loss": 0.3, "step": 22090 }, { "epoch": 2.58, "grad_norm": 0.9756987880235342, "learning_rate": 1.3975199745514587e-05, "loss": 0.3562, "step": 22095 }, { "epoch": 2.58, "grad_norm": 0.9266357004842157, "learning_rate": 1.3964906135968502e-05, "loss": 0.3221, "step": 22100 }, { "epoch": 2.58, "grad_norm": 0.9880875179234331, "learning_rate": 1.3954614849425278e-05, "loss": 0.2753, "step": 22105 }, { "epoch": 2.58, "grad_norm": 0.9715887432818779, "learning_rate": 1.3944325888051332e-05, "loss": 0.341, "step": 22110 }, { "epoch": 2.58, "grad_norm": 0.9319262744596718, "learning_rate": 1.3934039254012581e-05, "loss": 0.2815, "step": 22115 }, { "epoch": 2.58, "grad_norm": 0.9355321231137924, "learning_rate": 1.3923754949474482e-05, "loss": 0.2836, "step": 22120 }, { "epoch": 2.58, "grad_norm": 0.8493254490597868, "learning_rate": 1.391347297660199e-05, "loss": 0.3268, "step": 22125 }, { "epoch": 2.59, "grad_norm": 0.9593297755050466, "learning_rate": 1.3903193337559567e-05, "loss": 0.3339, "step": 22130 }, { "epoch": 2.59, "grad_norm": 0.9337554509211765, "learning_rate": 1.3892916034511194e-05, "loss": 0.3134, "step": 22135 }, { "epoch": 2.59, "grad_norm": 1.0314420833235727, "learning_rate": 1.3882641069620339e-05, "loss": 0.3131, "step": 22140 }, { "epoch": 2.59, "grad_norm": 0.8467455956369683, "learning_rate": 1.3872368445049988e-05, "loss": 0.3113, "step": 22145 }, { "epoch": 2.59, "grad_norm": 0.9612854707358428, "learning_rate": 1.3862098162962634e-05, "loss": 0.361, "step": 22150 }, { "epoch": 2.59, "grad_norm": 1.037164286377405, "learning_rate": 1.385183022552029e-05, "loss": 0.3351, "step": 22155 }, { "epoch": 2.59, "grad_norm": 0.8871833782297859, "learning_rate": 1.3841564634884468e-05, "loss": 0.2969, "step": 22160 }, { "epoch": 2.59, "grad_norm": 1.138127781359419, "learning_rate": 1.3831301393216162e-05, "loss": 0.327, "step": 22165 }, { "epoch": 2.59, "grad_norm": 1.0491251261188774, "learning_rate": 1.3821040502675917e-05, "loss": 0.2956, "step": 22170 }, { "epoch": 2.59, "grad_norm": 0.910138684742929, "learning_rate": 1.3810781965423733e-05, "loss": 0.2876, "step": 22175 }, { "epoch": 2.59, "grad_norm": 0.916874112830565, "learning_rate": 1.3800525783619153e-05, "loss": 0.3252, "step": 22180 }, { "epoch": 2.59, "grad_norm": 0.9871472402334044, "learning_rate": 1.3790271959421219e-05, "loss": 0.2914, "step": 22185 }, { "epoch": 2.59, "grad_norm": 0.9667719324447465, "learning_rate": 1.3780020494988446e-05, "loss": 0.3316, "step": 22190 }, { "epoch": 2.59, "grad_norm": 0.9021139700593398, "learning_rate": 1.376977139247889e-05, "loss": 0.2942, "step": 22195 }, { "epoch": 2.59, "grad_norm": 0.9812378086512034, "learning_rate": 1.3759524654050099e-05, "loss": 0.3077, "step": 22200 }, { "epoch": 2.59, "grad_norm": 1.0369509005832003, "learning_rate": 1.3749280281859115e-05, "loss": 0.3216, "step": 22205 }, { "epoch": 2.59, "grad_norm": 0.9880425173466699, "learning_rate": 1.3739038278062469e-05, "loss": 0.2905, "step": 22210 }, { "epoch": 2.6, "grad_norm": 1.0030333932721018, "learning_rate": 1.3728798644816226e-05, "loss": 0.2905, "step": 22215 }, { "epoch": 2.6, "grad_norm": 0.9145963169819733, "learning_rate": 1.371856138427593e-05, "loss": 0.3243, "step": 22220 }, { "epoch": 2.6, "grad_norm": 0.9094212256059082, "learning_rate": 1.3708326498596635e-05, "loss": 0.3262, "step": 22225 }, { "epoch": 2.6, "grad_norm": 1.7211366808369744, "learning_rate": 1.3698093989932904e-05, "loss": 0.2928, "step": 22230 }, { "epoch": 2.6, "grad_norm": 0.9290274270175071, "learning_rate": 1.368786386043877e-05, "loss": 0.3348, "step": 22235 }, { "epoch": 2.6, "grad_norm": 0.8849402483176418, "learning_rate": 1.3677636112267783e-05, "loss": 0.3255, "step": 22240 }, { "epoch": 2.6, "grad_norm": 0.8907632563006805, "learning_rate": 1.3667410747572993e-05, "loss": 0.3148, "step": 22245 }, { "epoch": 2.6, "grad_norm": 1.2672466276436878, "learning_rate": 1.3657187768506947e-05, "loss": 0.3489, "step": 22250 }, { "epoch": 2.6, "grad_norm": 0.9445377996551351, "learning_rate": 1.3646967177221703e-05, "loss": 0.3111, "step": 22255 }, { "epoch": 2.6, "grad_norm": 0.9370018840528048, "learning_rate": 1.363674897586878e-05, "loss": 0.3457, "step": 22260 }, { "epoch": 2.6, "grad_norm": 0.8729678721048351, "learning_rate": 1.3626533166599234e-05, "loss": 0.2879, "step": 22265 }, { "epoch": 2.6, "grad_norm": 0.9129632297947332, "learning_rate": 1.3616319751563583e-05, "loss": 0.345, "step": 22270 }, { "epoch": 2.6, "grad_norm": 0.9279111883711738, "learning_rate": 1.3606108732911882e-05, "loss": 0.3475, "step": 22275 }, { "epoch": 2.6, "grad_norm": 0.9085686116761412, "learning_rate": 1.3595900112793628e-05, "loss": 0.326, "step": 22280 }, { "epoch": 2.6, "grad_norm": 0.8487967564167486, "learning_rate": 1.3585693893357859e-05, "loss": 0.3286, "step": 22285 }, { "epoch": 2.6, "grad_norm": 0.8462189507118796, "learning_rate": 1.357549007675309e-05, "loss": 0.2757, "step": 22290 }, { "epoch": 2.6, "grad_norm": 0.7874921812807293, "learning_rate": 1.356528866512734e-05, "loss": 0.3138, "step": 22295 }, { "epoch": 2.61, "grad_norm": 0.8288795215023179, "learning_rate": 1.35550896606281e-05, "loss": 0.332, "step": 22300 }, { "epoch": 2.61, "grad_norm": 0.9836124785910392, "learning_rate": 1.3544893065402367e-05, "loss": 0.3086, "step": 22305 }, { "epoch": 2.61, "grad_norm": 1.0011073229720042, "learning_rate": 1.3534698881596634e-05, "loss": 0.2994, "step": 22310 }, { "epoch": 2.61, "grad_norm": 1.1202544185226706, "learning_rate": 1.3524507111356882e-05, "loss": 0.2973, "step": 22315 }, { "epoch": 2.61, "grad_norm": 0.9927803989554137, "learning_rate": 1.3514317756828587e-05, "loss": 0.2989, "step": 22320 }, { "epoch": 2.61, "grad_norm": 0.8851821084364495, "learning_rate": 1.350413082015673e-05, "loss": 0.3116, "step": 22325 }, { "epoch": 2.61, "grad_norm": 0.9357914636791029, "learning_rate": 1.349394630348574e-05, "loss": 0.3163, "step": 22330 }, { "epoch": 2.61, "grad_norm": 0.9357242386931341, "learning_rate": 1.348376420895959e-05, "loss": 0.3144, "step": 22335 }, { "epoch": 2.61, "grad_norm": 1.055088450514771, "learning_rate": 1.3473584538721695e-05, "loss": 0.3318, "step": 22340 }, { "epoch": 2.61, "grad_norm": 0.8079073834693146, "learning_rate": 1.3463407294915003e-05, "loss": 0.2924, "step": 22345 }, { "epoch": 2.61, "grad_norm": 0.9241905447655252, "learning_rate": 1.345323247968191e-05, "loss": 0.3139, "step": 22350 }, { "epoch": 2.61, "grad_norm": 1.200051772921454, "learning_rate": 1.3443060095164334e-05, "loss": 0.2796, "step": 22355 }, { "epoch": 2.61, "grad_norm": 0.94352949674795, "learning_rate": 1.3432890143503662e-05, "loss": 0.3038, "step": 22360 }, { "epoch": 2.61, "grad_norm": 0.8614922011973818, "learning_rate": 1.3422722626840791e-05, "loss": 0.2795, "step": 22365 }, { "epoch": 2.61, "grad_norm": 0.9335999127995972, "learning_rate": 1.3412557547316078e-05, "loss": 0.282, "step": 22370 }, { "epoch": 2.61, "grad_norm": 1.1108151826143688, "learning_rate": 1.3402394907069371e-05, "loss": 0.2669, "step": 22375 }, { "epoch": 2.61, "grad_norm": 0.8977587632381109, "learning_rate": 1.3392234708240014e-05, "loss": 0.2529, "step": 22380 }, { "epoch": 2.62, "grad_norm": 0.9005057039331144, "learning_rate": 1.338207695296685e-05, "loss": 0.3057, "step": 22385 }, { "epoch": 2.62, "grad_norm": 0.7995389290467421, "learning_rate": 1.3371921643388177e-05, "loss": 0.3341, "step": 22390 }, { "epoch": 2.62, "grad_norm": 0.8662523631006348, "learning_rate": 1.3361768781641815e-05, "loss": 0.2716, "step": 22395 }, { "epoch": 2.62, "grad_norm": 0.9834719270831563, "learning_rate": 1.3351618369865033e-05, "loss": 0.3302, "step": 22400 }, { "epoch": 2.62, "grad_norm": 0.9287366305473104, "learning_rate": 1.3341470410194589e-05, "loss": 0.2872, "step": 22405 }, { "epoch": 2.62, "grad_norm": 0.9187165831535372, "learning_rate": 1.3331324904766745e-05, "loss": 0.3323, "step": 22410 }, { "epoch": 2.62, "grad_norm": 1.0300046433533967, "learning_rate": 1.332118185571725e-05, "loss": 0.2982, "step": 22415 }, { "epoch": 2.62, "grad_norm": 1.0061265493377358, "learning_rate": 1.3311041265181295e-05, "loss": 0.3156, "step": 22420 }, { "epoch": 2.62, "grad_norm": 0.9755586053544507, "learning_rate": 1.33009031352936e-05, "loss": 0.3387, "step": 22425 }, { "epoch": 2.62, "grad_norm": 0.9528265438692267, "learning_rate": 1.3290767468188347e-05, "loss": 0.2738, "step": 22430 }, { "epoch": 2.62, "grad_norm": 0.9918111684227027, "learning_rate": 1.328063426599919e-05, "loss": 0.3154, "step": 22435 }, { "epoch": 2.62, "grad_norm": 1.0567538575954991, "learning_rate": 1.3270503530859285e-05, "loss": 0.3355, "step": 22440 }, { "epoch": 2.62, "grad_norm": 0.994060291909992, "learning_rate": 1.3260375264901246e-05, "loss": 0.302, "step": 22445 }, { "epoch": 2.62, "grad_norm": 1.3885612477963847, "learning_rate": 1.3250249470257181e-05, "loss": 0.3221, "step": 22450 }, { "epoch": 2.62, "grad_norm": 1.3426906968977006, "learning_rate": 1.3240126149058685e-05, "loss": 0.3113, "step": 22455 }, { "epoch": 2.62, "grad_norm": 0.902323349376137, "learning_rate": 1.3230005303436827e-05, "loss": 0.3226, "step": 22460 }, { "epoch": 2.62, "grad_norm": 0.8948102539833652, "learning_rate": 1.321988693552214e-05, "loss": 0.3024, "step": 22465 }, { "epoch": 2.63, "grad_norm": 0.8720083584700585, "learning_rate": 1.320977104744464e-05, "loss": 0.3368, "step": 22470 }, { "epoch": 2.63, "grad_norm": 0.8176955567209032, "learning_rate": 1.3199657641333834e-05, "loss": 0.3016, "step": 22475 }, { "epoch": 2.63, "grad_norm": 0.9915903249964074, "learning_rate": 1.3189546719318707e-05, "loss": 0.2988, "step": 22480 }, { "epoch": 2.63, "grad_norm": 0.9763553630051854, "learning_rate": 1.3179438283527718e-05, "loss": 0.3508, "step": 22485 }, { "epoch": 2.63, "grad_norm": 0.9728259857812372, "learning_rate": 1.3169332336088777e-05, "loss": 0.3427, "step": 22490 }, { "epoch": 2.63, "grad_norm": 0.9537774092069546, "learning_rate": 1.3159228879129315e-05, "loss": 0.3316, "step": 22495 }, { "epoch": 2.63, "grad_norm": 0.9063923724301263, "learning_rate": 1.3149127914776196e-05, "loss": 0.3076, "step": 22500 }, { "epoch": 2.63, "grad_norm": 0.8530138379409117, "learning_rate": 1.3139029445155787e-05, "loss": 0.2639, "step": 22505 }, { "epoch": 2.63, "grad_norm": 0.8501351601698401, "learning_rate": 1.3128933472393933e-05, "loss": 0.2753, "step": 22510 }, { "epoch": 2.63, "grad_norm": 0.8754644724040899, "learning_rate": 1.3118839998615923e-05, "loss": 0.2801, "step": 22515 }, { "epoch": 2.63, "grad_norm": 1.0782942094429984, "learning_rate": 1.3108749025946548e-05, "loss": 0.3156, "step": 22520 }, { "epoch": 2.63, "grad_norm": 0.977207959497981, "learning_rate": 1.309866055651007e-05, "loss": 0.3556, "step": 22525 }, { "epoch": 2.63, "grad_norm": 0.9827450199941207, "learning_rate": 1.3088574592430203e-05, "loss": 0.2921, "step": 22530 }, { "epoch": 2.63, "grad_norm": 0.9917675416297038, "learning_rate": 1.3078491135830162e-05, "loss": 0.3382, "step": 22535 }, { "epoch": 2.63, "grad_norm": 1.0068197277109445, "learning_rate": 1.3068410188832608e-05, "loss": 0.3286, "step": 22540 }, { "epoch": 2.63, "grad_norm": 0.8704928072284427, "learning_rate": 1.3058331753559688e-05, "loss": 0.3166, "step": 22545 }, { "epoch": 2.63, "grad_norm": 1.0238253220711975, "learning_rate": 1.3048255832133021e-05, "loss": 0.3175, "step": 22550 }, { "epoch": 2.64, "grad_norm": 0.9657460608900847, "learning_rate": 1.3038182426673708e-05, "loss": 0.3299, "step": 22555 }, { "epoch": 2.64, "grad_norm": 0.91784610433293, "learning_rate": 1.3028111539302284e-05, "loss": 0.3142, "step": 22560 }, { "epoch": 2.64, "grad_norm": 0.9897676336839573, "learning_rate": 1.3018043172138795e-05, "loss": 0.3354, "step": 22565 }, { "epoch": 2.64, "grad_norm": 0.9026601028583013, "learning_rate": 1.3007977327302718e-05, "loss": 0.3476, "step": 22570 }, { "epoch": 2.64, "grad_norm": 1.1044215284632963, "learning_rate": 1.2997914006913025e-05, "loss": 0.3696, "step": 22575 }, { "epoch": 2.64, "grad_norm": 0.8579009026236615, "learning_rate": 1.298785321308817e-05, "loss": 0.274, "step": 22580 }, { "epoch": 2.64, "grad_norm": 0.9641122322953601, "learning_rate": 1.2977794947946027e-05, "loss": 0.288, "step": 22585 }, { "epoch": 2.64, "grad_norm": 0.8277263305868358, "learning_rate": 1.296773921360398e-05, "loss": 0.2994, "step": 22590 }, { "epoch": 2.64, "grad_norm": 1.1993870908859585, "learning_rate": 1.2957686012178877e-05, "loss": 0.2986, "step": 22595 }, { "epoch": 2.64, "grad_norm": 1.0301932819527575, "learning_rate": 1.2947635345786998e-05, "loss": 0.2961, "step": 22600 }, { "epoch": 2.64, "grad_norm": 0.9601215288273234, "learning_rate": 1.293758721654414e-05, "loss": 0.296, "step": 22605 }, { "epoch": 2.64, "grad_norm": 1.1635434021530018, "learning_rate": 1.292754162656551e-05, "loss": 0.2782, "step": 22610 }, { "epoch": 2.64, "grad_norm": 0.8829817937768688, "learning_rate": 1.291749857796583e-05, "loss": 0.3161, "step": 22615 }, { "epoch": 2.64, "grad_norm": 1.1350742722734048, "learning_rate": 1.290745807285926e-05, "loss": 0.3428, "step": 22620 }, { "epoch": 2.64, "grad_norm": 0.9924071622341578, "learning_rate": 1.2897420113359445e-05, "loss": 0.3302, "step": 22625 }, { "epoch": 2.64, "grad_norm": 1.008556091355803, "learning_rate": 1.288738470157947e-05, "loss": 0.3827, "step": 22630 }, { "epoch": 2.64, "grad_norm": 0.8654524650235262, "learning_rate": 1.2877351839631884e-05, "loss": 0.3023, "step": 22635 }, { "epoch": 2.65, "grad_norm": 0.8770868541537176, "learning_rate": 1.286732152962872e-05, "loss": 0.3003, "step": 22640 }, { "epoch": 2.65, "grad_norm": 0.9385612017530551, "learning_rate": 1.2857293773681462e-05, "loss": 0.293, "step": 22645 }, { "epoch": 2.65, "grad_norm": 1.0058701076610725, "learning_rate": 1.2847268573901067e-05, "loss": 0.3275, "step": 22650 }, { "epoch": 2.65, "grad_norm": 1.1351550407047633, "learning_rate": 1.2837245932397928e-05, "loss": 0.2854, "step": 22655 }, { "epoch": 2.65, "grad_norm": 0.8812545562750342, "learning_rate": 1.2827225851281935e-05, "loss": 0.3026, "step": 22660 }, { "epoch": 2.65, "grad_norm": 0.9334285091298161, "learning_rate": 1.2817208332662398e-05, "loss": 0.3087, "step": 22665 }, { "epoch": 2.65, "grad_norm": 0.8975354225181005, "learning_rate": 1.280719337864812e-05, "loss": 0.3279, "step": 22670 }, { "epoch": 2.65, "grad_norm": 0.8866553139546327, "learning_rate": 1.2797180991347365e-05, "loss": 0.3334, "step": 22675 }, { "epoch": 2.65, "grad_norm": 1.2602813394651082, "learning_rate": 1.2787171172867826e-05, "loss": 0.2196, "step": 22680 }, { "epoch": 2.65, "grad_norm": 0.8192613215676635, "learning_rate": 1.2777163925316682e-05, "loss": 0.2976, "step": 22685 }, { "epoch": 2.65, "grad_norm": 1.0084069949842545, "learning_rate": 1.2767159250800575e-05, "loss": 0.3489, "step": 22690 }, { "epoch": 2.65, "grad_norm": 1.2190776303772917, "learning_rate": 1.2757157151425585e-05, "loss": 0.359, "step": 22695 }, { "epoch": 2.65, "grad_norm": 0.9386472738505037, "learning_rate": 1.2747157629297246e-05, "loss": 0.3185, "step": 22700 }, { "epoch": 2.65, "grad_norm": 0.9288912079926299, "learning_rate": 1.2737160686520578e-05, "loss": 0.3335, "step": 22705 }, { "epoch": 2.65, "grad_norm": 1.0490074872445914, "learning_rate": 1.2727166325200029e-05, "loss": 0.2788, "step": 22710 }, { "epoch": 2.65, "grad_norm": 0.956614048346336, "learning_rate": 1.2717174547439531e-05, "loss": 0.323, "step": 22715 }, { "epoch": 2.65, "grad_norm": 0.8576283768806858, "learning_rate": 1.2707185355342463e-05, "loss": 0.3005, "step": 22720 }, { "epoch": 2.66, "grad_norm": 1.0152148478707892, "learning_rate": 1.2697198751011641e-05, "loss": 0.3248, "step": 22725 }, { "epoch": 2.66, "grad_norm": 1.069992861691313, "learning_rate": 1.2687214736549347e-05, "loss": 0.314, "step": 22730 }, { "epoch": 2.66, "grad_norm": 0.9161206915867551, "learning_rate": 1.2677233314057325e-05, "loss": 0.3414, "step": 22735 }, { "epoch": 2.66, "grad_norm": 0.8880615504729791, "learning_rate": 1.2667254485636768e-05, "loss": 0.3206, "step": 22740 }, { "epoch": 2.66, "grad_norm": 1.0020043974106367, "learning_rate": 1.2657278253388339e-05, "loss": 0.3089, "step": 22745 }, { "epoch": 2.66, "grad_norm": 1.041960170687636, "learning_rate": 1.2647304619412115e-05, "loss": 0.387, "step": 22750 }, { "epoch": 2.66, "grad_norm": 0.9159069360908418, "learning_rate": 1.263733358580767e-05, "loss": 0.2891, "step": 22755 }, { "epoch": 2.66, "grad_norm": 1.0165755903192042, "learning_rate": 1.2627365154673996e-05, "loss": 0.2849, "step": 22760 }, { "epoch": 2.66, "grad_norm": 0.8613151940899465, "learning_rate": 1.2617399328109569e-05, "loss": 0.2735, "step": 22765 }, { "epoch": 2.66, "grad_norm": 0.9403422949616127, "learning_rate": 1.2607436108212278e-05, "loss": 0.34, "step": 22770 }, { "epoch": 2.66, "grad_norm": 0.9925258914315268, "learning_rate": 1.25974754970795e-05, "loss": 0.3683, "step": 22775 }, { "epoch": 2.66, "grad_norm": 1.0820569641316524, "learning_rate": 1.2587517496808043e-05, "loss": 0.3219, "step": 22780 }, { "epoch": 2.66, "grad_norm": 0.9830223344350332, "learning_rate": 1.2577562109494176e-05, "loss": 0.2828, "step": 22785 }, { "epoch": 2.66, "grad_norm": 0.8993939195910761, "learning_rate": 1.2567609337233616e-05, "loss": 0.2778, "step": 22790 }, { "epoch": 2.66, "grad_norm": 0.8928369391557056, "learning_rate": 1.2557659182121521e-05, "loss": 0.3312, "step": 22795 }, { "epoch": 2.66, "grad_norm": 0.9871003396337353, "learning_rate": 1.2547711646252488e-05, "loss": 0.2489, "step": 22800 }, { "epoch": 2.66, "grad_norm": 0.9881226477898498, "learning_rate": 1.253776673172059e-05, "loss": 0.3138, "step": 22805 }, { "epoch": 2.67, "grad_norm": 0.9751302778861034, "learning_rate": 1.252782444061934e-05, "loss": 0.2824, "step": 22810 }, { "epoch": 2.67, "grad_norm": 0.8227718195045954, "learning_rate": 1.25178847750417e-05, "loss": 0.3184, "step": 22815 }, { "epoch": 2.67, "grad_norm": 0.9784556220522435, "learning_rate": 1.250794773708005e-05, "loss": 0.2949, "step": 22820 }, { "epoch": 2.67, "grad_norm": 0.8974417702940808, "learning_rate": 1.2498013328826266e-05, "loss": 0.3271, "step": 22825 }, { "epoch": 2.67, "grad_norm": 0.9215540502532096, "learning_rate": 1.2488081552371625e-05, "loss": 0.3078, "step": 22830 }, { "epoch": 2.67, "grad_norm": 0.9041214809423772, "learning_rate": 1.2478152409806887e-05, "loss": 0.3268, "step": 22835 }, { "epoch": 2.67, "grad_norm": 1.0712076396392887, "learning_rate": 1.2468225903222222e-05, "loss": 0.3354, "step": 22840 }, { "epoch": 2.67, "grad_norm": 0.7961951267326319, "learning_rate": 1.2458302034707273e-05, "loss": 0.2792, "step": 22845 }, { "epoch": 2.67, "grad_norm": 0.9036218869960682, "learning_rate": 1.244838080635112e-05, "loss": 0.2816, "step": 22850 }, { "epoch": 2.67, "grad_norm": 0.9248018611702412, "learning_rate": 1.2438462220242294e-05, "loss": 0.2807, "step": 22855 }, { "epoch": 2.67, "grad_norm": 0.8496000027457463, "learning_rate": 1.2428546278468753e-05, "loss": 0.327, "step": 22860 }, { "epoch": 2.67, "grad_norm": 1.0322936271653458, "learning_rate": 1.2418632983117895e-05, "loss": 0.3154, "step": 22865 }, { "epoch": 2.67, "grad_norm": 1.0752425689502767, "learning_rate": 1.2408722336276582e-05, "loss": 0.3314, "step": 22870 }, { "epoch": 2.67, "grad_norm": 0.9154246028702314, "learning_rate": 1.2398814340031111e-05, "loss": 0.2862, "step": 22875 }, { "epoch": 2.67, "grad_norm": 0.8775205498737138, "learning_rate": 1.238890899646722e-05, "loss": 0.3083, "step": 22880 }, { "epoch": 2.67, "grad_norm": 0.9402785406703711, "learning_rate": 1.237900630767009e-05, "loss": 0.2985, "step": 22885 }, { "epoch": 2.67, "grad_norm": 0.9048407571226267, "learning_rate": 1.2369106275724338e-05, "loss": 0.2926, "step": 22890 }, { "epoch": 2.67, "grad_norm": 1.106763611185044, "learning_rate": 1.2359208902714015e-05, "loss": 0.3305, "step": 22895 }, { "epoch": 2.68, "grad_norm": 0.8170659558529766, "learning_rate": 1.2349314190722628e-05, "loss": 0.2429, "step": 22900 }, { "epoch": 2.68, "grad_norm": 0.9095155861723698, "learning_rate": 1.2339422141833127e-05, "loss": 0.3375, "step": 22905 }, { "epoch": 2.68, "grad_norm": 0.9850619858537725, "learning_rate": 1.2329532758127876e-05, "loss": 0.305, "step": 22910 }, { "epoch": 2.68, "grad_norm": 0.9414565556280247, "learning_rate": 1.2319646041688699e-05, "loss": 0.3297, "step": 22915 }, { "epoch": 2.68, "grad_norm": 0.9239046040538416, "learning_rate": 1.230976199459687e-05, "loss": 0.3156, "step": 22920 }, { "epoch": 2.68, "grad_norm": 0.881956410083199, "learning_rate": 1.2299880618933054e-05, "loss": 0.2611, "step": 22925 }, { "epoch": 2.68, "grad_norm": 0.9519661667281967, "learning_rate": 1.2290001916777417e-05, "loss": 0.3146, "step": 22930 }, { "epoch": 2.68, "grad_norm": 0.9628756422793493, "learning_rate": 1.22801258902095e-05, "loss": 0.3105, "step": 22935 }, { "epoch": 2.68, "grad_norm": 1.14828743599253, "learning_rate": 1.2270252541308321e-05, "loss": 0.2648, "step": 22940 }, { "epoch": 2.68, "grad_norm": 0.8678421024765048, "learning_rate": 1.2260381872152326e-05, "loss": 0.3029, "step": 22945 }, { "epoch": 2.68, "grad_norm": 0.9573770585311471, "learning_rate": 1.2250513884819403e-05, "loss": 0.3216, "step": 22950 }, { "epoch": 2.68, "grad_norm": 0.9844073693907955, "learning_rate": 1.224064858138686e-05, "loss": 0.3145, "step": 22955 }, { "epoch": 2.68, "grad_norm": 1.0625608962780768, "learning_rate": 1.2230785963931433e-05, "loss": 0.3331, "step": 22960 }, { "epoch": 2.68, "grad_norm": 0.9365157892447141, "learning_rate": 1.2220926034529315e-05, "loss": 0.297, "step": 22965 }, { "epoch": 2.68, "grad_norm": 0.8385356007864888, "learning_rate": 1.2211068795256129e-05, "loss": 0.2705, "step": 22970 }, { "epoch": 2.68, "grad_norm": 0.8311017205813936, "learning_rate": 1.2201214248186931e-05, "loss": 0.2961, "step": 22975 }, { "epoch": 2.68, "grad_norm": 1.0809393138060064, "learning_rate": 1.2191362395396192e-05, "loss": 0.3141, "step": 22980 }, { "epoch": 2.69, "grad_norm": 0.9303754817227148, "learning_rate": 1.2181513238957848e-05, "loss": 0.3245, "step": 22985 }, { "epoch": 2.69, "grad_norm": 0.920008901625138, "learning_rate": 1.2171666780945232e-05, "loss": 0.2564, "step": 22990 }, { "epoch": 2.69, "grad_norm": 1.0481922293324, "learning_rate": 1.2161823023431133e-05, "loss": 0.3241, "step": 22995 }, { "epoch": 2.69, "grad_norm": 0.8904455686199985, "learning_rate": 1.2151981968487775e-05, "loss": 0.2721, "step": 23000 }, { "epoch": 2.69, "grad_norm": 0.9426783715872329, "learning_rate": 1.2142143618186786e-05, "loss": 0.2923, "step": 23005 }, { "epoch": 2.69, "grad_norm": 1.00934636607528, "learning_rate": 1.213230797459925e-05, "loss": 0.308, "step": 23010 }, { "epoch": 2.69, "grad_norm": 0.9328454098011226, "learning_rate": 1.2122475039795672e-05, "loss": 0.3043, "step": 23015 }, { "epoch": 2.69, "grad_norm": 0.9282216598172034, "learning_rate": 1.2112644815845999e-05, "loss": 0.2925, "step": 23020 }, { "epoch": 2.69, "grad_norm": 0.8891395519608644, "learning_rate": 1.2102817304819589e-05, "loss": 0.3171, "step": 23025 }, { "epoch": 2.69, "grad_norm": 1.0581740837403106, "learning_rate": 1.2092992508785223e-05, "loss": 0.3388, "step": 23030 }, { "epoch": 2.69, "grad_norm": 0.9850361554562754, "learning_rate": 1.208317042981113e-05, "loss": 0.2748, "step": 23035 }, { "epoch": 2.69, "grad_norm": 0.9352614588879432, "learning_rate": 1.207335106996497e-05, "loss": 0.8495, "step": 23040 }, { "epoch": 2.69, "grad_norm": 0.9244927194442741, "learning_rate": 1.2063534431313819e-05, "loss": 0.2738, "step": 23045 }, { "epoch": 2.69, "grad_norm": 1.0403690905006984, "learning_rate": 1.2053720515924174e-05, "loss": 0.3679, "step": 23050 }, { "epoch": 2.69, "grad_norm": 0.9372004716169946, "learning_rate": 1.2043909325861979e-05, "loss": 0.4102, "step": 23055 }, { "epoch": 2.69, "grad_norm": 0.8419795794585395, "learning_rate": 1.2034100863192576e-05, "loss": 0.2657, "step": 23060 }, { "epoch": 2.69, "grad_norm": 0.8940436812343538, "learning_rate": 1.2024295129980756e-05, "loss": 0.2525, "step": 23065 }, { "epoch": 2.7, "grad_norm": 0.9682780694913872, "learning_rate": 1.2014492128290744e-05, "loss": 0.3663, "step": 23070 }, { "epoch": 2.7, "grad_norm": 0.9289494309975811, "learning_rate": 1.2004691860186148e-05, "loss": 0.2787, "step": 23075 }, { "epoch": 2.7, "grad_norm": 0.8755275997308232, "learning_rate": 1.1994894327730042e-05, "loss": 0.2513, "step": 23080 }, { "epoch": 2.7, "grad_norm": 1.036489585957659, "learning_rate": 1.1985099532984917e-05, "loss": 0.3112, "step": 23085 }, { "epoch": 2.7, "grad_norm": 0.9363246013821599, "learning_rate": 1.197530747801266e-05, "loss": 0.3112, "step": 23090 }, { "epoch": 2.7, "grad_norm": 0.9965596474671534, "learning_rate": 1.196551816487462e-05, "loss": 0.3401, "step": 23095 }, { "epoch": 2.7, "grad_norm": 0.8801080314559394, "learning_rate": 1.1955731595631533e-05, "loss": 0.3098, "step": 23100 }, { "epoch": 2.7, "grad_norm": 0.7800694148980456, "learning_rate": 1.1945947772343585e-05, "loss": 0.3022, "step": 23105 }, { "epoch": 2.7, "grad_norm": 1.0400503352529242, "learning_rate": 1.193616669707037e-05, "loss": 0.3802, "step": 23110 }, { "epoch": 2.7, "grad_norm": 0.8515452029840317, "learning_rate": 1.1926388371870913e-05, "loss": 0.3027, "step": 23115 }, { "epoch": 2.7, "grad_norm": 0.8999896412599249, "learning_rate": 1.1916612798803655e-05, "loss": 0.332, "step": 23120 }, { "epoch": 2.7, "grad_norm": 1.47350156682016, "learning_rate": 1.1906839979926438e-05, "loss": 0.317, "step": 23125 }, { "epoch": 2.7, "grad_norm": 0.9591569056072188, "learning_rate": 1.1897069917296555e-05, "loss": 0.2909, "step": 23130 }, { "epoch": 2.7, "grad_norm": 0.8642377846151067, "learning_rate": 1.1887302612970704e-05, "loss": 0.2794, "step": 23135 }, { "epoch": 2.7, "grad_norm": 1.1177579897166943, "learning_rate": 1.187753806900502e-05, "loss": 0.3409, "step": 23140 }, { "epoch": 2.7, "grad_norm": 0.8855697457554093, "learning_rate": 1.1867776287455021e-05, "loss": 0.2737, "step": 23145 }, { "epoch": 2.7, "grad_norm": 0.8677257562352807, "learning_rate": 1.1858017270375679e-05, "loss": 0.6087, "step": 23150 }, { "epoch": 2.71, "grad_norm": 0.8720100939063528, "learning_rate": 1.1848261019821355e-05, "loss": 0.2826, "step": 23155 }, { "epoch": 2.71, "grad_norm": 1.0674767780478314, "learning_rate": 1.183850753784585e-05, "loss": 0.3158, "step": 23160 }, { "epoch": 2.71, "grad_norm": 0.9337730019343822, "learning_rate": 1.1828756826502382e-05, "loss": 0.3378, "step": 23165 }, { "epoch": 2.71, "grad_norm": 1.0981605244517039, "learning_rate": 1.181900888784356e-05, "loss": 0.3013, "step": 23170 }, { "epoch": 2.71, "grad_norm": 1.1357751781323333, "learning_rate": 1.1809263723921438e-05, "loss": 0.3377, "step": 23175 }, { "epoch": 2.71, "grad_norm": 0.9350243569688483, "learning_rate": 1.1799521336787483e-05, "loss": 0.3216, "step": 23180 }, { "epoch": 2.71, "grad_norm": 0.7818555007876645, "learning_rate": 1.178978172849256e-05, "loss": 0.2675, "step": 23185 }, { "epoch": 2.71, "grad_norm": 0.9635830398252786, "learning_rate": 1.178004490108695e-05, "loss": 0.296, "step": 23190 }, { "epoch": 2.71, "grad_norm": 0.9675959787159427, "learning_rate": 1.1770310856620368e-05, "loss": 0.3001, "step": 23195 }, { "epoch": 2.71, "grad_norm": 0.9657993732349076, "learning_rate": 1.1760579597141927e-05, "loss": 0.2896, "step": 23200 }, { "epoch": 2.71, "grad_norm": 0.9554835133283328, "learning_rate": 1.1750851124700166e-05, "loss": 0.3138, "step": 23205 }, { "epoch": 2.71, "grad_norm": 0.8974886250895949, "learning_rate": 1.1741125441343034e-05, "loss": 0.3078, "step": 23210 }, { "epoch": 2.71, "grad_norm": 0.969468758922195, "learning_rate": 1.1731402549117885e-05, "loss": 0.2867, "step": 23215 }, { "epoch": 2.71, "grad_norm": 92.14569291240348, "learning_rate": 1.1721682450071476e-05, "loss": 0.3363, "step": 23220 }, { "epoch": 2.71, "grad_norm": 1.1018434913945603, "learning_rate": 1.1711965146250001e-05, "loss": 0.3024, "step": 23225 }, { "epoch": 2.71, "grad_norm": 0.8225866825145253, "learning_rate": 1.1702250639699053e-05, "loss": 0.2941, "step": 23230 }, { "epoch": 2.71, "grad_norm": 0.8124022526661175, "learning_rate": 1.1692538932463648e-05, "loss": 0.2454, "step": 23235 }, { "epoch": 2.72, "grad_norm": 0.9436323813880034, "learning_rate": 1.1682830026588181e-05, "loss": 0.2853, "step": 23240 }, { "epoch": 2.72, "grad_norm": 0.9318640069811314, "learning_rate": 1.167312392411649e-05, "loss": 0.407, "step": 23245 }, { "epoch": 2.72, "grad_norm": 1.0172614894097036, "learning_rate": 1.1663420627091822e-05, "loss": 0.2768, "step": 23250 }, { "epoch": 2.72, "grad_norm": 1.097797890608603, "learning_rate": 1.1653720137556814e-05, "loss": 0.3432, "step": 23255 }, { "epoch": 2.72, "grad_norm": 0.9135469122716455, "learning_rate": 1.1644022457553506e-05, "loss": 0.309, "step": 23260 }, { "epoch": 2.72, "grad_norm": 1.0021623665089956, "learning_rate": 1.1634327589123373e-05, "loss": 0.3867, "step": 23265 }, { "epoch": 2.72, "grad_norm": 0.8967069354500952, "learning_rate": 1.1624635534307288e-05, "loss": 0.2773, "step": 23270 }, { "epoch": 2.72, "grad_norm": 0.7630547449795746, "learning_rate": 1.1614946295145529e-05, "loss": 0.2874, "step": 23275 }, { "epoch": 2.72, "grad_norm": 0.8757736405807965, "learning_rate": 1.160525987367779e-05, "loss": 0.3174, "step": 23280 }, { "epoch": 2.72, "grad_norm": 0.7304558363140028, "learning_rate": 1.1595576271943157e-05, "loss": 0.2611, "step": 23285 }, { "epoch": 2.72, "grad_norm": 0.9214452185153639, "learning_rate": 1.158589549198012e-05, "loss": 0.285, "step": 23290 }, { "epoch": 2.72, "grad_norm": 0.9717582566065859, "learning_rate": 1.1576217535826594e-05, "loss": 0.2624, "step": 23295 }, { "epoch": 2.72, "grad_norm": 0.9928811557681508, "learning_rate": 1.156654240551989e-05, "loss": 0.3046, "step": 23300 }, { "epoch": 2.72, "grad_norm": 0.966256974647924, "learning_rate": 1.1556870103096731e-05, "loss": 0.3205, "step": 23305 }, { "epoch": 2.72, "grad_norm": 0.9360870584149181, "learning_rate": 1.1547200630593224e-05, "loss": 0.3169, "step": 23310 }, { "epoch": 2.72, "grad_norm": 0.9291298702386991, "learning_rate": 1.153753399004491e-05, "loss": 0.2691, "step": 23315 }, { "epoch": 2.72, "grad_norm": 0.8677978034457244, "learning_rate": 1.15278701834867e-05, "loss": 0.3149, "step": 23320 }, { "epoch": 2.73, "grad_norm": 0.9178350772908358, "learning_rate": 1.1518209212952943e-05, "loss": 0.333, "step": 23325 }, { "epoch": 2.73, "grad_norm": 0.8582811803734776, "learning_rate": 1.1508551080477362e-05, "loss": 0.2709, "step": 23330 }, { "epoch": 2.73, "grad_norm": 1.0674644550282384, "learning_rate": 1.1498895788093097e-05, "loss": 0.2977, "step": 23335 }, { "epoch": 2.73, "grad_norm": 0.8162154138435762, "learning_rate": 1.1489243337832695e-05, "loss": 0.302, "step": 23340 }, { "epoch": 2.73, "grad_norm": 1.0016102446118273, "learning_rate": 1.14795937317281e-05, "loss": 0.3436, "step": 23345 }, { "epoch": 2.73, "grad_norm": 0.8282365695876247, "learning_rate": 1.1469946971810652e-05, "loss": 0.3037, "step": 23350 }, { "epoch": 2.73, "grad_norm": 1.0606606541997659, "learning_rate": 1.1460303060111083e-05, "loss": 0.2995, "step": 23355 }, { "epoch": 2.73, "grad_norm": 1.0060869548756155, "learning_rate": 1.1450661998659548e-05, "loss": 0.2688, "step": 23360 }, { "epoch": 2.73, "grad_norm": 0.8571141652928844, "learning_rate": 1.1441023789485589e-05, "loss": 0.2797, "step": 23365 }, { "epoch": 2.73, "grad_norm": 0.932008462361764, "learning_rate": 1.143138843461815e-05, "loss": 0.2731, "step": 23370 }, { "epoch": 2.73, "grad_norm": 0.8478332005755546, "learning_rate": 1.1421755936085587e-05, "loss": 0.2918, "step": 23375 }, { "epoch": 2.73, "grad_norm": 0.8891455074469505, "learning_rate": 1.1412126295915632e-05, "loss": 0.3147, "step": 23380 }, { "epoch": 2.73, "grad_norm": 0.8864718154402954, "learning_rate": 1.1402499516135412e-05, "loss": 0.2855, "step": 23385 }, { "epoch": 2.73, "grad_norm": 0.9832832938254445, "learning_rate": 1.1392875598771477e-05, "loss": 0.3033, "step": 23390 }, { "epoch": 2.73, "grad_norm": 1.0168586417932541, "learning_rate": 1.138325454584977e-05, "loss": 0.3291, "step": 23395 }, { "epoch": 2.73, "grad_norm": 1.0824853837646562, "learning_rate": 1.137363635939561e-05, "loss": 0.295, "step": 23400 }, { "epoch": 2.73, "grad_norm": 0.7687830909532848, "learning_rate": 1.1364021041433725e-05, "loss": 0.2661, "step": 23405 }, { "epoch": 2.74, "grad_norm": 0.983860772846003, "learning_rate": 1.135440859398826e-05, "loss": 0.2712, "step": 23410 }, { "epoch": 2.74, "grad_norm": 0.9377613189200062, "learning_rate": 1.1344799019082713e-05, "loss": 0.3005, "step": 23415 }, { "epoch": 2.74, "grad_norm": 0.9877198462161915, "learning_rate": 1.133519231874002e-05, "loss": 0.3292, "step": 23420 }, { "epoch": 2.74, "grad_norm": 0.9978212050040649, "learning_rate": 1.132558849498247e-05, "loss": 0.307, "step": 23425 }, { "epoch": 2.74, "grad_norm": 1.005941789063786, "learning_rate": 1.1315987549831782e-05, "loss": 0.3214, "step": 23430 }, { "epoch": 2.74, "grad_norm": 1.1876485332302549, "learning_rate": 1.1306389485309052e-05, "loss": 0.2902, "step": 23435 }, { "epoch": 2.74, "grad_norm": 1.0957393105014936, "learning_rate": 1.1296794303434785e-05, "loss": 0.2857, "step": 23440 }, { "epoch": 2.74, "grad_norm": 0.8808072967665173, "learning_rate": 1.1287202006228858e-05, "loss": 0.3033, "step": 23445 }, { "epoch": 2.74, "grad_norm": 1.0399370331087272, "learning_rate": 1.1277612595710538e-05, "loss": 0.3018, "step": 23450 }, { "epoch": 2.74, "grad_norm": 0.9565855267443694, "learning_rate": 1.1268026073898508e-05, "loss": 0.3007, "step": 23455 }, { "epoch": 2.74, "grad_norm": 0.9695752108566751, "learning_rate": 1.1258442442810833e-05, "loss": 0.3246, "step": 23460 }, { "epoch": 2.74, "grad_norm": 0.8858621291842715, "learning_rate": 1.1248861704464971e-05, "loss": 0.3326, "step": 23465 }, { "epoch": 2.74, "grad_norm": 0.9662613576891774, "learning_rate": 1.1239283860877756e-05, "loss": 0.3875, "step": 23470 }, { "epoch": 2.74, "grad_norm": 0.9665851921586993, "learning_rate": 1.122970891406543e-05, "loss": 0.3164, "step": 23475 }, { "epoch": 2.74, "grad_norm": 0.8920046177154548, "learning_rate": 1.1220136866043629e-05, "loss": 0.3219, "step": 23480 }, { "epoch": 2.74, "grad_norm": 0.8301284085478287, "learning_rate": 1.121056771882735e-05, "loss": 0.2779, "step": 23485 }, { "epoch": 2.74, "grad_norm": 1.0708710290620955, "learning_rate": 1.1201001474431022e-05, "loss": 0.3004, "step": 23490 }, { "epoch": 2.75, "grad_norm": 0.9160839116538275, "learning_rate": 1.1191438134868414e-05, "loss": 0.2972, "step": 23495 }, { "epoch": 2.75, "grad_norm": 0.8986245165879151, "learning_rate": 1.1181877702152725e-05, "loss": 0.3049, "step": 23500 }, { "epoch": 2.75, "grad_norm": 0.8913374620198132, "learning_rate": 1.1172320178296522e-05, "loss": 0.3168, "step": 23505 }, { "epoch": 2.75, "grad_norm": 0.8395060466096411, "learning_rate": 1.1162765565311775e-05, "loss": 0.2649, "step": 23510 }, { "epoch": 2.75, "grad_norm": 0.9027930251491513, "learning_rate": 1.1153213865209821e-05, "loss": 0.3267, "step": 23515 }, { "epoch": 2.75, "grad_norm": 0.8577130979311428, "learning_rate": 1.1143665080001382e-05, "loss": 0.2876, "step": 23520 }, { "epoch": 2.75, "grad_norm": 1.069821600651597, "learning_rate": 1.1134119211696589e-05, "loss": 0.351, "step": 23525 }, { "epoch": 2.75, "grad_norm": 0.8846252990133306, "learning_rate": 1.1124576262304944e-05, "loss": 0.2868, "step": 23530 }, { "epoch": 2.75, "grad_norm": 0.9389204534337867, "learning_rate": 1.1115036233835349e-05, "loss": 0.323, "step": 23535 }, { "epoch": 2.75, "grad_norm": 0.905324547311633, "learning_rate": 1.1105499128296062e-05, "loss": 0.2857, "step": 23540 }, { "epoch": 2.75, "grad_norm": 1.0434526042134222, "learning_rate": 1.1095964947694761e-05, "loss": 0.2864, "step": 23545 }, { "epoch": 2.75, "grad_norm": 1.1948811192937119, "learning_rate": 1.1086433694038476e-05, "loss": 0.3767, "step": 23550 }, { "epoch": 2.75, "grad_norm": 0.9855581532827002, "learning_rate": 1.107690536933364e-05, "loss": 0.2921, "step": 23555 }, { "epoch": 2.75, "grad_norm": 0.9798418335090091, "learning_rate": 1.1067379975586077e-05, "loss": 0.2997, "step": 23560 }, { "epoch": 2.75, "grad_norm": 0.903539605706739, "learning_rate": 1.1057857514800959e-05, "loss": 0.3094, "step": 23565 }, { "epoch": 2.75, "grad_norm": 0.9374344894487049, "learning_rate": 1.1048337988982883e-05, "loss": 0.3198, "step": 23570 }, { "epoch": 2.75, "grad_norm": 1.0006717022993645, "learning_rate": 1.1038821400135807e-05, "loss": 0.2578, "step": 23575 }, { "epoch": 2.75, "grad_norm": 0.9603612301139439, "learning_rate": 1.102930775026306e-05, "loss": 0.3162, "step": 23580 }, { "epoch": 2.76, "grad_norm": 0.8797527273472097, "learning_rate": 1.1019797041367386e-05, "loss": 0.3177, "step": 23585 }, { "epoch": 2.76, "grad_norm": 0.9466543099457245, "learning_rate": 1.1010289275450864e-05, "loss": 0.2613, "step": 23590 }, { "epoch": 2.76, "grad_norm": 1.0794091578765985, "learning_rate": 1.1000784454514992e-05, "loss": 0.3242, "step": 23595 }, { "epoch": 2.76, "grad_norm": 1.0583407393853788, "learning_rate": 1.0991282580560633e-05, "loss": 0.3046, "step": 23600 }, { "epoch": 2.76, "grad_norm": 0.9470761468887988, "learning_rate": 1.0981783655588038e-05, "loss": 0.3098, "step": 23605 }, { "epoch": 2.76, "grad_norm": 1.0091157325675195, "learning_rate": 1.0972287681596824e-05, "loss": 0.2901, "step": 23610 }, { "epoch": 2.76, "grad_norm": 0.9282651434372654, "learning_rate": 1.0962794660585984e-05, "loss": 0.2749, "step": 23615 }, { "epoch": 2.76, "grad_norm": 1.069238462542684, "learning_rate": 1.0953304594553906e-05, "loss": 0.2848, "step": 23620 }, { "epoch": 2.76, "grad_norm": 0.9208592207905248, "learning_rate": 1.094381748549835e-05, "loss": 0.3125, "step": 23625 }, { "epoch": 2.76, "grad_norm": 0.9759889117580864, "learning_rate": 1.0934333335416458e-05, "loss": 0.321, "step": 23630 }, { "epoch": 2.76, "grad_norm": 0.8520053159356505, "learning_rate": 1.092485214630473e-05, "loss": 0.3363, "step": 23635 }, { "epoch": 2.76, "grad_norm": 1.0416556282099745, "learning_rate": 1.0915373920159067e-05, "loss": 0.3111, "step": 23640 }, { "epoch": 2.76, "grad_norm": 1.0005953944544534, "learning_rate": 1.0905898658974719e-05, "loss": 0.3018, "step": 23645 }, { "epoch": 2.76, "grad_norm": 0.9879947021385429, "learning_rate": 1.089642636474634e-05, "loss": 0.2662, "step": 23650 }, { "epoch": 2.76, "grad_norm": 0.9098697306046369, "learning_rate": 1.0886957039467954e-05, "loss": 0.2973, "step": 23655 }, { "epoch": 2.76, "grad_norm": 1.1403501140169232, "learning_rate": 1.0877490685132935e-05, "loss": 0.2864, "step": 23660 }, { "epoch": 2.76, "grad_norm": 1.0968395990976476, "learning_rate": 1.0868027303734057e-05, "loss": 0.298, "step": 23665 }, { "epoch": 2.77, "grad_norm": 0.8532057197963661, "learning_rate": 1.0858566897263475e-05, "loss": 0.3434, "step": 23670 }, { "epoch": 2.77, "grad_norm": 0.9417573683852283, "learning_rate": 1.0849109467712681e-05, "loss": 0.3072, "step": 23675 }, { "epoch": 2.77, "grad_norm": 0.8550553077331028, "learning_rate": 1.0839655017072583e-05, "loss": 0.2988, "step": 23680 }, { "epoch": 2.77, "grad_norm": 0.9972584699976879, "learning_rate": 1.0830203547333422e-05, "loss": 0.2996, "step": 23685 }, { "epoch": 2.77, "grad_norm": 1.0017408881083572, "learning_rate": 1.0820755060484844e-05, "loss": 0.3076, "step": 23690 }, { "epoch": 2.77, "grad_norm": 0.9377687191213354, "learning_rate": 1.081130955851585e-05, "loss": 0.3408, "step": 23695 }, { "epoch": 2.77, "grad_norm": 0.8865272909002163, "learning_rate": 1.0801867043414828e-05, "loss": 0.2973, "step": 23700 }, { "epoch": 2.77, "grad_norm": 0.9055830495038591, "learning_rate": 1.0792427517169512e-05, "loss": 0.3146, "step": 23705 }, { "epoch": 2.77, "grad_norm": 1.2083013782768683, "learning_rate": 1.0782990981767033e-05, "loss": 0.3924, "step": 23710 }, { "epoch": 2.77, "grad_norm": 1.0168900333033841, "learning_rate": 1.0773557439193865e-05, "loss": 0.3041, "step": 23715 }, { "epoch": 2.77, "grad_norm": 0.9154854672836121, "learning_rate": 1.0764126891435877e-05, "loss": 0.2904, "step": 23720 }, { "epoch": 2.77, "grad_norm": 0.9268696915625491, "learning_rate": 1.0754699340478311e-05, "loss": 0.2855, "step": 23725 }, { "epoch": 2.77, "grad_norm": 0.9365293492548428, "learning_rate": 1.074527478830574e-05, "loss": 0.2945, "step": 23730 }, { "epoch": 2.77, "grad_norm": 1.0241618296670367, "learning_rate": 1.073585323690214e-05, "loss": 0.3134, "step": 23735 }, { "epoch": 2.77, "grad_norm": 0.8317413520640192, "learning_rate": 1.0726434688250864e-05, "loss": 0.3482, "step": 23740 }, { "epoch": 2.77, "grad_norm": 1.0491610838366687, "learning_rate": 1.071701914433459e-05, "loss": 0.2888, "step": 23745 }, { "epoch": 2.77, "grad_norm": 1.1826379442010508, "learning_rate": 1.0707606607135407e-05, "loss": 0.2835, "step": 23750 }, { "epoch": 2.78, "grad_norm": 1.0022085838235593, "learning_rate": 1.0698197078634733e-05, "loss": 0.3022, "step": 23755 }, { "epoch": 2.78, "grad_norm": 1.1295260043233306, "learning_rate": 1.0688790560813388e-05, "loss": 0.2994, "step": 23760 }, { "epoch": 2.78, "grad_norm": 0.8590583871989996, "learning_rate": 1.0679387055651538e-05, "loss": 0.3298, "step": 23765 }, { "epoch": 2.78, "grad_norm": 0.7896318522487846, "learning_rate": 1.0669986565128732e-05, "loss": 0.274, "step": 23770 }, { "epoch": 2.78, "grad_norm": 0.9470255666600508, "learning_rate": 1.0660589091223855e-05, "loss": 0.2834, "step": 23775 }, { "epoch": 2.78, "grad_norm": 0.806691952222429, "learning_rate": 1.0651194635915172e-05, "loss": 0.2869, "step": 23780 }, { "epoch": 2.78, "grad_norm": 0.9920244881950887, "learning_rate": 1.064180320118032e-05, "loss": 0.3048, "step": 23785 }, { "epoch": 2.78, "grad_norm": 0.9811043245735639, "learning_rate": 1.0632414788996294e-05, "loss": 0.3593, "step": 23790 }, { "epoch": 2.78, "grad_norm": 0.981571241604834, "learning_rate": 1.0623029401339466e-05, "loss": 0.2827, "step": 23795 }, { "epoch": 2.78, "grad_norm": 0.908638091882898, "learning_rate": 1.0613647040185535e-05, "loss": 0.2473, "step": 23800 }, { "epoch": 2.78, "grad_norm": 0.8804751881187239, "learning_rate": 1.0604267707509608e-05, "loss": 0.2593, "step": 23805 }, { "epoch": 2.78, "grad_norm": 0.9621528614821562, "learning_rate": 1.0594891405286112e-05, "loss": 0.2765, "step": 23810 }, { "epoch": 2.78, "grad_norm": 0.8760314269025647, "learning_rate": 1.058551813548887e-05, "loss": 0.3164, "step": 23815 }, { "epoch": 2.78, "grad_norm": 1.0026624709941225, "learning_rate": 1.0576147900091058e-05, "loss": 0.2776, "step": 23820 }, { "epoch": 2.78, "grad_norm": 0.8610740877250057, "learning_rate": 1.0566780701065193e-05, "loss": 0.2928, "step": 23825 }, { "epoch": 2.78, "grad_norm": 0.9211160043873191, "learning_rate": 1.055741654038318e-05, "loss": 0.2913, "step": 23830 }, { "epoch": 2.78, "grad_norm": 0.8762788904048749, "learning_rate": 1.0548055420016276e-05, "loss": 0.3495, "step": 23835 }, { "epoch": 2.79, "grad_norm": 0.9220130035727188, "learning_rate": 1.0538697341935091e-05, "loss": 0.2629, "step": 23840 }, { "epoch": 2.79, "grad_norm": 0.8588414662232006, "learning_rate": 1.0529342308109586e-05, "loss": 0.3281, "step": 23845 }, { "epoch": 2.79, "grad_norm": 0.9718662344894322, "learning_rate": 1.0519990320509104e-05, "loss": 0.2874, "step": 23850 }, { "epoch": 2.79, "grad_norm": 0.9142675264104025, "learning_rate": 1.0510641381102337e-05, "loss": 0.2962, "step": 23855 }, { "epoch": 2.79, "grad_norm": 0.8873390971475601, "learning_rate": 1.0501295491857335e-05, "loss": 0.3124, "step": 23860 }, { "epoch": 2.79, "grad_norm": 1.073030921493654, "learning_rate": 1.0491952654741514e-05, "loss": 0.2933, "step": 23865 }, { "epoch": 2.79, "grad_norm": 1.0891083550397802, "learning_rate": 1.0482612871721628e-05, "loss": 0.3159, "step": 23870 }, { "epoch": 2.79, "grad_norm": 0.9627610015886772, "learning_rate": 1.0473276144763793e-05, "loss": 0.3195, "step": 23875 }, { "epoch": 2.79, "grad_norm": 0.9846751290510684, "learning_rate": 1.0463942475833496e-05, "loss": 0.296, "step": 23880 }, { "epoch": 2.79, "grad_norm": 1.0213771648613001, "learning_rate": 1.0454611866895572e-05, "loss": 0.3272, "step": 23885 }, { "epoch": 2.79, "grad_norm": 0.878120819137557, "learning_rate": 1.044528431991422e-05, "loss": 0.323, "step": 23890 }, { "epoch": 2.79, "grad_norm": 1.0376013751257847, "learning_rate": 1.0435959836852967e-05, "loss": 0.3301, "step": 23895 }, { "epoch": 2.79, "grad_norm": 0.6877638430729498, "learning_rate": 1.0426638419674739e-05, "loss": 0.2612, "step": 23900 }, { "epoch": 2.79, "grad_norm": 1.014633177373045, "learning_rate": 1.0417320070341765e-05, "loss": 0.3062, "step": 23905 }, { "epoch": 2.79, "grad_norm": 0.8993083758190988, "learning_rate": 1.040800479081568e-05, "loss": 0.3274, "step": 23910 }, { "epoch": 2.79, "grad_norm": 0.9128952190570753, "learning_rate": 1.0398692583057429e-05, "loss": 0.3708, "step": 23915 }, { "epoch": 2.79, "grad_norm": 0.8357886771472559, "learning_rate": 1.0389383449027335e-05, "loss": 0.3347, "step": 23920 }, { "epoch": 2.8, "grad_norm": 1.3085852767781703, "learning_rate": 1.038007739068507e-05, "loss": 0.2539, "step": 23925 }, { "epoch": 2.8, "grad_norm": 0.868166987701541, "learning_rate": 1.0370774409989655e-05, "loss": 0.3505, "step": 23930 }, { "epoch": 2.8, "grad_norm": 0.8720868415654188, "learning_rate": 1.036147450889948e-05, "loss": 0.3045, "step": 23935 }, { "epoch": 2.8, "grad_norm": 0.9692423511286279, "learning_rate": 1.0352177689372256e-05, "loss": 0.3012, "step": 23940 }, { "epoch": 2.8, "grad_norm": 1.1488121193790917, "learning_rate": 1.0342883953365051e-05, "loss": 0.3919, "step": 23945 }, { "epoch": 2.8, "grad_norm": 0.9253449880918742, "learning_rate": 1.033359330283431e-05, "loss": 0.2679, "step": 23950 }, { "epoch": 2.8, "grad_norm": 0.8402356983312381, "learning_rate": 1.0324305739735807e-05, "loss": 0.2856, "step": 23955 }, { "epoch": 2.8, "grad_norm": 1.0368873730232566, "learning_rate": 1.0315021266024683e-05, "loss": 0.3022, "step": 23960 }, { "epoch": 2.8, "grad_norm": 1.1496190175764356, "learning_rate": 1.0305739883655395e-05, "loss": 0.3141, "step": 23965 }, { "epoch": 2.8, "grad_norm": 0.8834647969007838, "learning_rate": 1.0296461594581791e-05, "loss": 0.2887, "step": 23970 }, { "epoch": 2.8, "grad_norm": 0.9370827068333535, "learning_rate": 1.0287186400757032e-05, "loss": 0.2785, "step": 23975 }, { "epoch": 2.8, "grad_norm": 1.0303960318240577, "learning_rate": 1.0277914304133661e-05, "loss": 0.3355, "step": 23980 }, { "epoch": 2.8, "grad_norm": 0.9915618216413891, "learning_rate": 1.0268645306663532e-05, "loss": 0.2747, "step": 23985 }, { "epoch": 2.8, "grad_norm": 1.3257351002283646, "learning_rate": 1.0259379410297875e-05, "loss": 0.3253, "step": 23990 }, { "epoch": 2.8, "grad_norm": 0.8704719409336908, "learning_rate": 1.025011661698726e-05, "loss": 0.2971, "step": 23995 }, { "epoch": 2.8, "grad_norm": 16.970800750921004, "learning_rate": 1.0240856928681611e-05, "loss": 0.4302, "step": 24000 }, { "epoch": 2.8, "grad_norm": 0.9618174684253242, "learning_rate": 1.0231600347330179e-05, "loss": 0.3103, "step": 24005 }, { "epoch": 2.81, "grad_norm": 0.93887331601894, "learning_rate": 1.022234687488156e-05, "loss": 0.2672, "step": 24010 }, { "epoch": 2.81, "grad_norm": 0.9079812314562753, "learning_rate": 1.021309651328372e-05, "loss": 0.2884, "step": 24015 }, { "epoch": 2.81, "grad_norm": 0.811229992269716, "learning_rate": 1.0203849264483955e-05, "loss": 0.3001, "step": 24020 }, { "epoch": 2.81, "grad_norm": 0.8988081282422115, "learning_rate": 1.019460513042891e-05, "loss": 0.3309, "step": 24025 }, { "epoch": 2.81, "grad_norm": 0.8694364127794912, "learning_rate": 1.0185364113064577e-05, "loss": 0.2498, "step": 24030 }, { "epoch": 2.81, "grad_norm": 1.064006727748688, "learning_rate": 1.0176126214336282e-05, "loss": 0.2985, "step": 24035 }, { "epoch": 2.81, "grad_norm": 0.9328393222840907, "learning_rate": 1.016689143618869e-05, "loss": 0.3444, "step": 24040 }, { "epoch": 2.81, "grad_norm": 0.9201184699376819, "learning_rate": 1.0157659780565823e-05, "loss": 0.3114, "step": 24045 }, { "epoch": 2.81, "grad_norm": 0.831694478570635, "learning_rate": 1.0148431249411056e-05, "loss": 0.2788, "step": 24050 }, { "epoch": 2.81, "grad_norm": 1.0546481758656725, "learning_rate": 1.013920584466707e-05, "loss": 0.3586, "step": 24055 }, { "epoch": 2.81, "grad_norm": 0.9665089853766448, "learning_rate": 1.0129983568275919e-05, "loss": 0.2626, "step": 24060 }, { "epoch": 2.81, "grad_norm": 0.9440834510312459, "learning_rate": 1.0120764422179e-05, "loss": 0.3136, "step": 24065 }, { "epoch": 2.81, "grad_norm": 0.894586536893201, "learning_rate": 1.0111548408317017e-05, "loss": 0.2692, "step": 24070 }, { "epoch": 2.81, "grad_norm": 1.095624332163829, "learning_rate": 1.0102335528630061e-05, "loss": 0.2727, "step": 24075 }, { "epoch": 2.81, "grad_norm": 0.9391056339302459, "learning_rate": 1.0093125785057517e-05, "loss": 0.2903, "step": 24080 }, { "epoch": 2.81, "grad_norm": 1.066638189111323, "learning_rate": 1.0083919179538146e-05, "loss": 0.3365, "step": 24085 }, { "epoch": 2.81, "grad_norm": 0.9294220742852979, "learning_rate": 1.0074715714010031e-05, "loss": 0.3135, "step": 24090 }, { "epoch": 2.82, "grad_norm": 0.9222853252288646, "learning_rate": 1.0065515390410607e-05, "loss": 0.2619, "step": 24095 }, { "epoch": 2.82, "grad_norm": 0.9088450074899105, "learning_rate": 1.0056318210676633e-05, "loss": 0.2775, "step": 24100 }, { "epoch": 2.82, "grad_norm": 0.9113917583432781, "learning_rate": 1.0047124176744198e-05, "loss": 0.3094, "step": 24105 }, { "epoch": 2.82, "grad_norm": 0.9123154466025912, "learning_rate": 1.0037933290548757e-05, "loss": 0.3239, "step": 24110 }, { "epoch": 2.82, "grad_norm": 0.8819793087263148, "learning_rate": 1.0028745554025082e-05, "loss": 0.3033, "step": 24115 }, { "epoch": 2.82, "grad_norm": 1.04487368529544, "learning_rate": 1.0019560969107302e-05, "loss": 0.3469, "step": 24120 }, { "epoch": 2.82, "grad_norm": 0.9133161019310184, "learning_rate": 1.0010379537728845e-05, "loss": 0.2764, "step": 24125 }, { "epoch": 2.82, "grad_norm": 0.9286874522791484, "learning_rate": 1.000120126182252e-05, "loss": 0.3253, "step": 24130 }, { "epoch": 2.82, "grad_norm": 0.9415924465774013, "learning_rate": 9.992026143320434e-06, "loss": 0.3031, "step": 24135 }, { "epoch": 2.82, "grad_norm": 0.8500447394001744, "learning_rate": 9.982854184154053e-06, "loss": 0.3126, "step": 24140 }, { "epoch": 2.82, "grad_norm": 1.0655944706528835, "learning_rate": 9.973685386254174e-06, "loss": 0.2806, "step": 24145 }, { "epoch": 2.82, "grad_norm": 0.9758574861517874, "learning_rate": 9.964519751550916e-06, "loss": 0.2787, "step": 24150 }, { "epoch": 2.82, "grad_norm": 0.8918688811302502, "learning_rate": 9.955357281973743e-06, "loss": 0.2993, "step": 24155 }, { "epoch": 2.82, "grad_norm": 0.888293877938757, "learning_rate": 9.946197979451454e-06, "loss": 0.2925, "step": 24160 }, { "epoch": 2.82, "grad_norm": 0.843654989719251, "learning_rate": 9.937041845912188e-06, "loss": 0.3359, "step": 24165 }, { "epoch": 2.82, "grad_norm": 1.1291216942967641, "learning_rate": 9.927888883283395e-06, "loss": 0.298, "step": 24170 }, { "epoch": 2.82, "grad_norm": 0.9005704624947608, "learning_rate": 9.918739093491863e-06, "loss": 0.3058, "step": 24175 }, { "epoch": 2.83, "grad_norm": 0.8744905349602763, "learning_rate": 9.909592478463725e-06, "loss": 0.2782, "step": 24180 }, { "epoch": 2.83, "grad_norm": 0.9765592927756971, "learning_rate": 9.900449040124444e-06, "loss": 0.3324, "step": 24185 }, { "epoch": 2.83, "grad_norm": 0.8608439155318706, "learning_rate": 9.891308780398813e-06, "loss": 0.2496, "step": 24190 }, { "epoch": 2.83, "grad_norm": 0.9443155856131693, "learning_rate": 9.88217170121094e-06, "loss": 0.3242, "step": 24195 }, { "epoch": 2.83, "grad_norm": 0.9430139713644371, "learning_rate": 9.87303780448429e-06, "loss": 0.2959, "step": 24200 }, { "epoch": 2.83, "grad_norm": 0.964402836844983, "learning_rate": 9.863907092141624e-06, "loss": 0.2831, "step": 24205 }, { "epoch": 2.83, "grad_norm": 0.9884365774866519, "learning_rate": 9.854779566105068e-06, "loss": 0.3134, "step": 24210 }, { "epoch": 2.83, "grad_norm": 1.0053095844951974, "learning_rate": 9.845655228296067e-06, "loss": 0.3005, "step": 24215 }, { "epoch": 2.83, "grad_norm": 0.948097139787577, "learning_rate": 9.836534080635371e-06, "loss": 0.3142, "step": 24220 }, { "epoch": 2.83, "grad_norm": 0.892488647252319, "learning_rate": 9.827416125043085e-06, "loss": 0.3265, "step": 24225 }, { "epoch": 2.83, "grad_norm": 0.893053947844135, "learning_rate": 9.818301363438647e-06, "loss": 0.3282, "step": 24230 }, { "epoch": 2.83, "grad_norm": 0.9258964399894913, "learning_rate": 9.809189797740792e-06, "loss": 0.296, "step": 24235 }, { "epoch": 2.83, "grad_norm": 0.8800263191295711, "learning_rate": 9.800081429867614e-06, "loss": 0.3035, "step": 24240 }, { "epoch": 2.83, "grad_norm": 1.1475290789393873, "learning_rate": 9.790976261736504e-06, "loss": 0.3253, "step": 24245 }, { "epoch": 2.83, "grad_norm": 0.8461825763490542, "learning_rate": 9.781874295264204e-06, "loss": 0.2804, "step": 24250 }, { "epoch": 2.83, "grad_norm": 0.9579451385308144, "learning_rate": 9.772775532366774e-06, "loss": 0.3204, "step": 24255 }, { "epoch": 2.83, "grad_norm": 0.893914078122257, "learning_rate": 9.763679974959608e-06, "loss": 0.3487, "step": 24260 }, { "epoch": 2.84, "grad_norm": 0.8925869635932685, "learning_rate": 9.754587624957406e-06, "loss": 0.315, "step": 24265 }, { "epoch": 2.84, "grad_norm": 0.8943505956822083, "learning_rate": 9.745498484274192e-06, "loss": 0.3058, "step": 24270 }, { "epoch": 2.84, "grad_norm": 0.9838052552361592, "learning_rate": 9.73641255482334e-06, "loss": 0.3012, "step": 24275 }, { "epoch": 2.84, "grad_norm": 0.9122863458162808, "learning_rate": 9.727329838517529e-06, "loss": 0.335, "step": 24280 }, { "epoch": 2.84, "grad_norm": 0.9941288036306175, "learning_rate": 9.718250337268778e-06, "loss": 0.2565, "step": 24285 }, { "epoch": 2.84, "grad_norm": 0.944530994635024, "learning_rate": 9.709174052988393e-06, "loss": 0.3127, "step": 24290 }, { "epoch": 2.84, "grad_norm": 0.8257866544916291, "learning_rate": 9.700100987587052e-06, "loss": 0.2639, "step": 24295 }, { "epoch": 2.84, "grad_norm": 0.9174961357745193, "learning_rate": 9.691031142974707e-06, "loss": 0.3362, "step": 24300 }, { "epoch": 2.84, "grad_norm": 0.9575370788479027, "learning_rate": 9.681964521060669e-06, "loss": 0.2745, "step": 24305 }, { "epoch": 2.84, "grad_norm": 0.8954608120326626, "learning_rate": 9.672901123753561e-06, "loss": 0.3191, "step": 24310 }, { "epoch": 2.84, "grad_norm": 0.96033961140247, "learning_rate": 9.663840952961312e-06, "loss": 0.2855, "step": 24315 }, { "epoch": 2.84, "grad_norm": 0.9656117809561124, "learning_rate": 9.654784010591184e-06, "loss": 0.2707, "step": 24320 }, { "epoch": 2.84, "grad_norm": 0.9336011373036461, "learning_rate": 9.645730298549771e-06, "loss": 0.2401, "step": 24325 }, { "epoch": 2.84, "grad_norm": 0.9060333854482696, "learning_rate": 9.636679818742963e-06, "loss": 0.3044, "step": 24330 }, { "epoch": 2.84, "grad_norm": 1.0578848328033112, "learning_rate": 9.627632573075977e-06, "loss": 0.3122, "step": 24335 }, { "epoch": 2.84, "grad_norm": 0.8086007794739632, "learning_rate": 9.618588563453357e-06, "loss": 0.3296, "step": 24340 }, { "epoch": 2.84, "grad_norm": 1.1310610018705838, "learning_rate": 9.609547791778964e-06, "loss": 0.3143, "step": 24345 }, { "epoch": 2.84, "grad_norm": 0.9452698888828003, "learning_rate": 9.600510259955975e-06, "loss": 0.3578, "step": 24350 }, { "epoch": 2.85, "grad_norm": 0.998464299457308, "learning_rate": 9.591475969886893e-06, "loss": 0.3508, "step": 24355 }, { "epoch": 2.85, "grad_norm": 1.0542119406339037, "learning_rate": 9.58244492347352e-06, "loss": 0.3273, "step": 24360 }, { "epoch": 2.85, "grad_norm": 0.8454439583324652, "learning_rate": 9.573417122616981e-06, "loss": 0.2897, "step": 24365 }, { "epoch": 2.85, "grad_norm": 1.0502961480556605, "learning_rate": 9.56439256921773e-06, "loss": 0.3027, "step": 24370 }, { "epoch": 2.85, "grad_norm": 1.548830058672662, "learning_rate": 9.555371265175533e-06, "loss": 0.3567, "step": 24375 }, { "epoch": 2.85, "grad_norm": 0.9410034588960128, "learning_rate": 9.546353212389472e-06, "loss": 0.3176, "step": 24380 }, { "epoch": 2.85, "grad_norm": 0.8484823982551155, "learning_rate": 9.537338412757927e-06, "loss": 0.3084, "step": 24385 }, { "epoch": 2.85, "grad_norm": 0.9862240061867517, "learning_rate": 9.528326868178616e-06, "loss": 0.3441, "step": 24390 }, { "epoch": 2.85, "grad_norm": 1.0163199165368089, "learning_rate": 9.519318580548572e-06, "loss": 0.2759, "step": 24395 }, { "epoch": 2.85, "grad_norm": 0.8309326565634324, "learning_rate": 9.510313551764128e-06, "loss": 0.2636, "step": 24400 }, { "epoch": 2.85, "grad_norm": 0.9829718270033582, "learning_rate": 9.501311783720923e-06, "loss": 0.2574, "step": 24405 }, { "epoch": 2.85, "grad_norm": 0.8814662162771878, "learning_rate": 9.492313278313936e-06, "loss": 0.2722, "step": 24410 }, { "epoch": 2.85, "grad_norm": 0.9291428404439623, "learning_rate": 9.483318037437447e-06, "loss": 0.2412, "step": 24415 }, { "epoch": 2.85, "grad_norm": 0.8990467108792146, "learning_rate": 9.474326062985044e-06, "loss": 0.2678, "step": 24420 }, { "epoch": 2.85, "grad_norm": 0.8854606173102194, "learning_rate": 9.465337356849646e-06, "loss": 0.2733, "step": 24425 }, { "epoch": 2.85, "grad_norm": 0.9367297271360102, "learning_rate": 9.456351920923459e-06, "loss": 0.3471, "step": 24430 }, { "epoch": 2.85, "grad_norm": 1.1542124346690832, "learning_rate": 9.447369757098002e-06, "loss": 0.3439, "step": 24435 }, { "epoch": 2.86, "grad_norm": 0.8136588297985716, "learning_rate": 9.438390867264121e-06, "loss": 0.3132, "step": 24440 }, { "epoch": 2.86, "grad_norm": 0.9276599989330045, "learning_rate": 9.429415253311973e-06, "loss": 0.3015, "step": 24445 }, { "epoch": 2.86, "grad_norm": 0.9264355796206835, "learning_rate": 9.420442917131023e-06, "loss": 0.3267, "step": 24450 }, { "epoch": 2.86, "grad_norm": 0.9778534344340757, "learning_rate": 9.411473860610024e-06, "loss": 0.352, "step": 24455 }, { "epoch": 2.86, "grad_norm": 0.9749908719170531, "learning_rate": 9.402508085637074e-06, "loss": 0.3242, "step": 24460 }, { "epoch": 2.86, "grad_norm": 1.0216443893015401, "learning_rate": 9.393545594099545e-06, "loss": 0.2599, "step": 24465 }, { "epoch": 2.86, "grad_norm": 1.073380628884854, "learning_rate": 9.384586387884154e-06, "loss": 0.3333, "step": 24470 }, { "epoch": 2.86, "grad_norm": 1.004031629808907, "learning_rate": 9.37563046887689e-06, "loss": 0.2896, "step": 24475 }, { "epoch": 2.86, "grad_norm": 1.049920041637581, "learning_rate": 9.366677838963078e-06, "loss": 0.2889, "step": 24480 }, { "epoch": 2.86, "grad_norm": 0.8857842529992311, "learning_rate": 9.357728500027335e-06, "loss": 0.2873, "step": 24485 }, { "epoch": 2.86, "grad_norm": 1.1858521971637972, "learning_rate": 9.348782453953605e-06, "loss": 0.2461, "step": 24490 }, { "epoch": 2.86, "grad_norm": 1.300941750191885, "learning_rate": 9.339839702625117e-06, "loss": 0.3624, "step": 24495 }, { "epoch": 2.86, "grad_norm": 0.9108355469535715, "learning_rate": 9.330900247924396e-06, "loss": 0.3194, "step": 24500 }, { "epoch": 2.86, "grad_norm": 1.0251410109567916, "learning_rate": 9.321964091733309e-06, "loss": 0.3045, "step": 24505 }, { "epoch": 2.86, "grad_norm": 0.873628049375326, "learning_rate": 9.313031235933006e-06, "loss": 0.2968, "step": 24510 }, { "epoch": 2.86, "grad_norm": 0.9425558829599604, "learning_rate": 9.304101682403948e-06, "loss": 0.2931, "step": 24515 }, { "epoch": 2.86, "grad_norm": 0.8856538997826833, "learning_rate": 9.295175433025908e-06, "loss": 0.3086, "step": 24520 }, { "epoch": 2.87, "grad_norm": 0.951910918674955, "learning_rate": 9.286252489677944e-06, "loss": 0.3129, "step": 24525 }, { "epoch": 2.87, "grad_norm": 0.895392300327379, "learning_rate": 9.277332854238424e-06, "loss": 0.2881, "step": 24530 }, { "epoch": 2.87, "grad_norm": 0.921277631091775, "learning_rate": 9.26841652858503e-06, "loss": 0.339, "step": 24535 }, { "epoch": 2.87, "grad_norm": 0.9034321573935192, "learning_rate": 9.259503514594755e-06, "loss": 0.3136, "step": 24540 }, { "epoch": 2.87, "grad_norm": 1.0666000335582098, "learning_rate": 9.250593814143862e-06, "loss": 0.3038, "step": 24545 }, { "epoch": 2.87, "grad_norm": 0.9381343135718994, "learning_rate": 9.241687429107942e-06, "loss": 0.3199, "step": 24550 }, { "epoch": 2.87, "grad_norm": 0.9415290930121821, "learning_rate": 9.2327843613619e-06, "loss": 0.2776, "step": 24555 }, { "epoch": 2.87, "grad_norm": 0.8276495863971298, "learning_rate": 9.223884612779895e-06, "loss": 0.3221, "step": 24560 }, { "epoch": 2.87, "grad_norm": 0.9501980539342294, "learning_rate": 9.214988185235443e-06, "loss": 0.2676, "step": 24565 }, { "epoch": 2.87, "grad_norm": 1.015005000412691, "learning_rate": 9.206095080601319e-06, "loss": 0.3148, "step": 24570 }, { "epoch": 2.87, "grad_norm": 0.8803260434759336, "learning_rate": 9.197205300749616e-06, "loss": 0.273, "step": 24575 }, { "epoch": 2.87, "grad_norm": 0.9683203169687521, "learning_rate": 9.188318847551734e-06, "loss": 0.2614, "step": 24580 }, { "epoch": 2.87, "grad_norm": 1.0098958851834654, "learning_rate": 9.179435722878366e-06, "loss": 0.288, "step": 24585 }, { "epoch": 2.87, "grad_norm": 0.9664564904216176, "learning_rate": 9.1705559285995e-06, "loss": 0.313, "step": 24590 }, { "epoch": 2.87, "grad_norm": 0.847107745954699, "learning_rate": 9.161679466584414e-06, "loss": 0.2922, "step": 24595 }, { "epoch": 2.87, "grad_norm": 0.8335466475318957, "learning_rate": 9.152806338701702e-06, "loss": 0.2948, "step": 24600 }, { "epoch": 2.87, "grad_norm": 0.8642420220658538, "learning_rate": 9.143936546819257e-06, "loss": 0.3179, "step": 24605 }, { "epoch": 2.88, "grad_norm": 0.857499274055507, "learning_rate": 9.135070092804268e-06, "loss": 0.289, "step": 24610 }, { "epoch": 2.88, "grad_norm": 0.8468770120674177, "learning_rate": 9.126206978523202e-06, "loss": 0.2744, "step": 24615 }, { "epoch": 2.88, "grad_norm": 1.1222186531946703, "learning_rate": 9.117347205841842e-06, "loss": 0.3313, "step": 24620 }, { "epoch": 2.88, "grad_norm": 1.0387109032163027, "learning_rate": 9.108490776625272e-06, "loss": 0.33, "step": 24625 }, { "epoch": 2.88, "grad_norm": 1.052204403017723, "learning_rate": 9.099637692737847e-06, "loss": 0.3172, "step": 24630 }, { "epoch": 2.88, "grad_norm": 0.9204754344447226, "learning_rate": 9.090787956043254e-06, "loss": 0.2953, "step": 24635 }, { "epoch": 2.88, "grad_norm": 0.9777513645130192, "learning_rate": 9.081941568404431e-06, "loss": 0.3142, "step": 24640 }, { "epoch": 2.88, "grad_norm": 0.8169307014216313, "learning_rate": 9.073098531683652e-06, "loss": 0.2662, "step": 24645 }, { "epoch": 2.88, "grad_norm": 0.9508802286425354, "learning_rate": 9.064258847742465e-06, "loss": 0.3173, "step": 24650 }, { "epoch": 2.88, "grad_norm": 0.9882138493539279, "learning_rate": 9.055422518441723e-06, "loss": 0.3045, "step": 24655 }, { "epoch": 2.88, "grad_norm": 1.0014893014288486, "learning_rate": 9.04658954564156e-06, "loss": 0.3147, "step": 24660 }, { "epoch": 2.88, "grad_norm": 0.8920019982657743, "learning_rate": 9.037759931201401e-06, "loss": 0.2803, "step": 24665 }, { "epoch": 2.88, "grad_norm": 0.9169261574206906, "learning_rate": 9.028933676979975e-06, "loss": 0.3046, "step": 24670 }, { "epoch": 2.88, "grad_norm": 0.9619180282976926, "learning_rate": 9.020110784835309e-06, "loss": 0.3194, "step": 24675 }, { "epoch": 2.88, "grad_norm": 1.0412445137432709, "learning_rate": 9.011291256624722e-06, "loss": 0.3042, "step": 24680 }, { "epoch": 2.88, "grad_norm": 0.9475581134203468, "learning_rate": 9.002475094204793e-06, "loss": 0.3198, "step": 24685 }, { "epoch": 2.88, "grad_norm": 0.9363921415624746, "learning_rate": 8.993662299431438e-06, "loss": 0.3045, "step": 24690 }, { "epoch": 2.89, "grad_norm": 0.9427388755947488, "learning_rate": 8.984852874159828e-06, "loss": 0.3174, "step": 24695 }, { "epoch": 2.89, "grad_norm": 0.9072574323751448, "learning_rate": 8.976046820244444e-06, "loss": 0.3243, "step": 24700 }, { "epoch": 2.89, "grad_norm": 1.1344893055326293, "learning_rate": 8.967244139539064e-06, "loss": 0.3097, "step": 24705 }, { "epoch": 2.89, "grad_norm": 0.9083667456910753, "learning_rate": 8.958444833896722e-06, "loss": 0.329, "step": 24710 }, { "epoch": 2.89, "grad_norm": 1.0562803420074898, "learning_rate": 8.94964890516978e-06, "loss": 0.2935, "step": 24715 }, { "epoch": 2.89, "grad_norm": 1.012198085776275, "learning_rate": 8.940856355209879e-06, "loss": 0.3238, "step": 24720 }, { "epoch": 2.89, "grad_norm": 0.9521868464235179, "learning_rate": 8.932067185867923e-06, "loss": 0.3065, "step": 24725 }, { "epoch": 2.89, "grad_norm": 1.049505578202553, "learning_rate": 8.923281398994142e-06, "loss": 0.3147, "step": 24730 }, { "epoch": 2.89, "grad_norm": 0.9006298487894506, "learning_rate": 8.914498996438023e-06, "loss": 0.3121, "step": 24735 }, { "epoch": 2.89, "grad_norm": 0.9531838808650365, "learning_rate": 8.905719980048357e-06, "loss": 0.3185, "step": 24740 }, { "epoch": 2.89, "grad_norm": 0.8874956465339281, "learning_rate": 8.896944351673223e-06, "loss": 0.3331, "step": 24745 }, { "epoch": 2.89, "grad_norm": 0.9455213941980077, "learning_rate": 8.888172113159989e-06, "loss": 0.3334, "step": 24750 }, { "epoch": 2.89, "grad_norm": 0.9489368419238016, "learning_rate": 8.879403266355295e-06, "loss": 0.2781, "step": 24755 }, { "epoch": 2.89, "grad_norm": 0.884408675047062, "learning_rate": 8.870637813105067e-06, "loss": 0.3338, "step": 24760 }, { "epoch": 2.89, "grad_norm": 0.7607836167263174, "learning_rate": 8.861875755254536e-06, "loss": 0.2626, "step": 24765 }, { "epoch": 2.89, "grad_norm": 0.8606595833042103, "learning_rate": 8.8531170946482e-06, "loss": 0.3219, "step": 24770 }, { "epoch": 2.89, "grad_norm": 1.0075728878169883, "learning_rate": 8.844361833129863e-06, "loss": 0.2816, "step": 24775 }, { "epoch": 2.9, "grad_norm": 1.0042239632571994, "learning_rate": 8.835609972542583e-06, "loss": 0.2671, "step": 24780 }, { "epoch": 2.9, "grad_norm": 0.9931029966640522, "learning_rate": 8.826861514728729e-06, "loss": 0.2486, "step": 24785 }, { "epoch": 2.9, "grad_norm": 0.9048359932877287, "learning_rate": 8.818116461529932e-06, "loss": 0.2602, "step": 24790 }, { "epoch": 2.9, "grad_norm": 0.9576046174799073, "learning_rate": 8.809374814787124e-06, "loss": 0.3446, "step": 24795 }, { "epoch": 2.9, "grad_norm": 0.9434411265982885, "learning_rate": 8.800636576340521e-06, "loss": 0.3335, "step": 24800 }, { "epoch": 2.9, "grad_norm": 0.8994936482141401, "learning_rate": 8.791901748029596e-06, "loss": 0.2641, "step": 24805 }, { "epoch": 2.9, "grad_norm": 1.045448196175751, "learning_rate": 8.783170331693132e-06, "loss": 0.337, "step": 24810 }, { "epoch": 2.9, "grad_norm": 0.9759772445120798, "learning_rate": 8.774442329169194e-06, "loss": 0.3026, "step": 24815 }, { "epoch": 2.9, "grad_norm": 0.9642300159610239, "learning_rate": 8.765717742295104e-06, "loss": 0.2677, "step": 24820 }, { "epoch": 2.9, "grad_norm": 0.9209249174773894, "learning_rate": 8.756996572907475e-06, "loss": 0.2849, "step": 24825 }, { "epoch": 2.9, "grad_norm": 1.0521239083363665, "learning_rate": 8.748278822842212e-06, "loss": 0.3004, "step": 24830 }, { "epoch": 2.9, "grad_norm": 1.1630370278283324, "learning_rate": 8.73956449393449e-06, "loss": 0.3435, "step": 24835 }, { "epoch": 2.9, "grad_norm": 0.8847554266545826, "learning_rate": 8.730853588018772e-06, "loss": 0.3388, "step": 24840 }, { "epoch": 2.9, "grad_norm": 0.9065749128496903, "learning_rate": 8.722146106928797e-06, "loss": 0.2851, "step": 24845 }, { "epoch": 2.9, "grad_norm": 1.056609312345644, "learning_rate": 8.713442052497569e-06, "loss": 0.2904, "step": 24850 }, { "epoch": 2.9, "grad_norm": 1.0676033295107987, "learning_rate": 8.704741426557397e-06, "loss": 0.3076, "step": 24855 }, { "epoch": 2.9, "grad_norm": 0.8007208284557547, "learning_rate": 8.69604423093984e-06, "loss": 0.2564, "step": 24860 }, { "epoch": 2.91, "grad_norm": 0.8789258588724301, "learning_rate": 8.687350467475753e-06, "loss": 0.338, "step": 24865 }, { "epoch": 2.91, "grad_norm": 0.9267851243235534, "learning_rate": 8.678660137995277e-06, "loss": 0.3078, "step": 24870 }, { "epoch": 2.91, "grad_norm": 1.6057379325578072, "learning_rate": 8.669973244327795e-06, "loss": 0.3259, "step": 24875 }, { "epoch": 2.91, "grad_norm": 0.9370285568380858, "learning_rate": 8.661289788302001e-06, "loss": 0.295, "step": 24880 }, { "epoch": 2.91, "grad_norm": 0.8671113771512887, "learning_rate": 8.652609771745862e-06, "loss": 0.3139, "step": 24885 }, { "epoch": 2.91, "grad_norm": 0.9005948265731976, "learning_rate": 8.643933196486606e-06, "loss": 0.2872, "step": 24890 }, { "epoch": 2.91, "grad_norm": 0.9835958307674249, "learning_rate": 8.63526006435073e-06, "loss": 0.3104, "step": 24895 }, { "epoch": 2.91, "grad_norm": 0.9932819649744413, "learning_rate": 8.62659037716403e-06, "loss": 0.2988, "step": 24900 }, { "epoch": 2.91, "grad_norm": 0.8756829091910165, "learning_rate": 8.617924136751565e-06, "loss": 0.271, "step": 24905 }, { "epoch": 2.91, "grad_norm": 0.9841391651223605, "learning_rate": 8.609261344937672e-06, "loss": 0.345, "step": 24910 }, { "epoch": 2.91, "grad_norm": 0.9218290008804002, "learning_rate": 8.600602003545965e-06, "loss": 0.3074, "step": 24915 }, { "epoch": 2.91, "grad_norm": 1.0064452276414906, "learning_rate": 8.591946114399318e-06, "loss": 0.2912, "step": 24920 }, { "epoch": 2.91, "grad_norm": 0.9811380341919945, "learning_rate": 8.583293679319879e-06, "loss": 0.3443, "step": 24925 }, { "epoch": 2.91, "grad_norm": 0.8557771398955576, "learning_rate": 8.574644700129087e-06, "loss": 0.2676, "step": 24930 }, { "epoch": 2.91, "grad_norm": 1.091720722784849, "learning_rate": 8.565999178647635e-06, "loss": 0.2928, "step": 24935 }, { "epoch": 2.91, "grad_norm": 1.0502109626464502, "learning_rate": 8.557357116695513e-06, "loss": 0.2733, "step": 24940 }, { "epoch": 2.91, "grad_norm": 0.9918073036821434, "learning_rate": 8.548718516091941e-06, "loss": 0.3308, "step": 24945 }, { "epoch": 2.92, "grad_norm": 1.0696325538985048, "learning_rate": 8.540083378655459e-06, "loss": 0.3054, "step": 24950 }, { "epoch": 2.92, "grad_norm": 0.9801403717362307, "learning_rate": 8.531451706203834e-06, "loss": 0.285, "step": 24955 }, { "epoch": 2.92, "grad_norm": 0.9264429454334463, "learning_rate": 8.522823500554136e-06, "loss": 0.2651, "step": 24960 }, { "epoch": 2.92, "grad_norm": 0.9832433898782803, "learning_rate": 8.514198763522685e-06, "loss": 0.3028, "step": 24965 }, { "epoch": 2.92, "grad_norm": 0.9220175379039554, "learning_rate": 8.505577496925076e-06, "loss": 0.3085, "step": 24970 }, { "epoch": 2.92, "grad_norm": 0.9703688560850237, "learning_rate": 8.496959702576187e-06, "loss": 0.2635, "step": 24975 }, { "epoch": 2.92, "grad_norm": 0.9698795623140767, "learning_rate": 8.488345382290153e-06, "loss": 0.2775, "step": 24980 }, { "epoch": 2.92, "grad_norm": 1.002558600075366, "learning_rate": 8.479734537880376e-06, "loss": 0.2977, "step": 24985 }, { "epoch": 2.92, "grad_norm": 0.9086077302224278, "learning_rate": 8.47112717115952e-06, "loss": 0.2845, "step": 24990 }, { "epoch": 2.92, "grad_norm": 0.9543291941718527, "learning_rate": 8.46252328393953e-06, "loss": 0.2794, "step": 24995 }, { "epoch": 2.92, "grad_norm": 0.9165797481909358, "learning_rate": 8.45392287803162e-06, "loss": 0.2867, "step": 25000 }, { "epoch": 2.92, "grad_norm": 0.9548700714444911, "learning_rate": 8.445325955246263e-06, "loss": 0.3252, "step": 25005 }, { "epoch": 2.92, "grad_norm": 0.9443692603011027, "learning_rate": 8.436732517393209e-06, "loss": 0.3121, "step": 25010 }, { "epoch": 2.92, "grad_norm": 1.029144686390705, "learning_rate": 8.428142566281457e-06, "loss": 0.3065, "step": 25015 }, { "epoch": 2.92, "grad_norm": 1.111544576800941, "learning_rate": 8.419556103719279e-06, "loss": 0.2902, "step": 25020 }, { "epoch": 2.92, "grad_norm": 0.9725286974420745, "learning_rate": 8.410973131514219e-06, "loss": 0.3221, "step": 25025 }, { "epoch": 2.92, "grad_norm": 0.8972052985056075, "learning_rate": 8.40239365147309e-06, "loss": 0.3449, "step": 25030 }, { "epoch": 2.92, "grad_norm": 1.1226855913493445, "learning_rate": 8.393817665401948e-06, "loss": 0.3322, "step": 25035 }, { "epoch": 2.93, "grad_norm": 1.070646222368591, "learning_rate": 8.385245175106137e-06, "loss": 0.3184, "step": 25040 }, { "epoch": 2.93, "grad_norm": 1.077157184125533, "learning_rate": 8.376676182390264e-06, "loss": 0.3291, "step": 25045 }, { "epoch": 2.93, "grad_norm": 0.9450512489402122, "learning_rate": 8.368110689058173e-06, "loss": 0.2896, "step": 25050 }, { "epoch": 2.93, "grad_norm": 0.8858532441525451, "learning_rate": 8.359548696913005e-06, "loss": 0.3069, "step": 25055 }, { "epoch": 2.93, "grad_norm": 0.8031783898826441, "learning_rate": 8.350990207757139e-06, "loss": 0.2939, "step": 25060 }, { "epoch": 2.93, "grad_norm": 0.8884756988335079, "learning_rate": 8.342435223392232e-06, "loss": 0.2905, "step": 25065 }, { "epoch": 2.93, "grad_norm": 0.9668259502950691, "learning_rate": 8.333883745619195e-06, "loss": 0.3151, "step": 25070 }, { "epoch": 2.93, "grad_norm": 0.9268049264399959, "learning_rate": 8.325335776238205e-06, "loss": 0.3137, "step": 25075 }, { "epoch": 2.93, "grad_norm": 1.0110813515722838, "learning_rate": 8.316791317048711e-06, "loss": 0.3085, "step": 25080 }, { "epoch": 2.93, "grad_norm": 1.1284587518167715, "learning_rate": 8.308250369849399e-06, "loss": 0.3173, "step": 25085 }, { "epoch": 2.93, "grad_norm": 0.8554973317775217, "learning_rate": 8.299712936438219e-06, "loss": 0.2982, "step": 25090 }, { "epoch": 2.93, "grad_norm": 0.8601800805064899, "learning_rate": 8.291179018612403e-06, "loss": 0.2881, "step": 25095 }, { "epoch": 2.93, "grad_norm": 1.0123035345647424, "learning_rate": 8.282648618168423e-06, "loss": 0.328, "step": 25100 }, { "epoch": 2.93, "grad_norm": 0.8490959403173556, "learning_rate": 8.274121736902032e-06, "loss": 0.3249, "step": 25105 }, { "epoch": 2.93, "grad_norm": 0.9562105723442911, "learning_rate": 8.265598376608211e-06, "loss": 0.2747, "step": 25110 }, { "epoch": 2.93, "grad_norm": 1.5227998599867598, "learning_rate": 8.257078539081228e-06, "loss": 0.3261, "step": 25115 }, { "epoch": 2.93, "grad_norm": 0.9725892292393016, "learning_rate": 8.24856222611459e-06, "loss": 0.2482, "step": 25120 }, { "epoch": 2.94, "grad_norm": 0.7829322934508481, "learning_rate": 8.240049439501076e-06, "loss": 0.2532, "step": 25125 }, { "epoch": 2.94, "grad_norm": 1.0435525207066698, "learning_rate": 8.23154018103271e-06, "loss": 0.3127, "step": 25130 }, { "epoch": 2.94, "grad_norm": 0.9068990636688932, "learning_rate": 8.223034452500783e-06, "loss": 0.3466, "step": 25135 }, { "epoch": 2.94, "grad_norm": 0.9018229599526959, "learning_rate": 8.21453225569584e-06, "loss": 0.2839, "step": 25140 }, { "epoch": 2.94, "grad_norm": 1.2041744910979784, "learning_rate": 8.206033592407692e-06, "loss": 0.2774, "step": 25145 }, { "epoch": 2.94, "grad_norm": 0.7509338575046008, "learning_rate": 8.197538464425388e-06, "loss": 0.329, "step": 25150 }, { "epoch": 2.94, "grad_norm": 0.9817463508991848, "learning_rate": 8.189046873537237e-06, "loss": 0.317, "step": 25155 }, { "epoch": 2.94, "grad_norm": 0.863682598043019, "learning_rate": 8.180558821530809e-06, "loss": 0.2984, "step": 25160 }, { "epoch": 2.94, "grad_norm": 0.9799862878965853, "learning_rate": 8.172074310192932e-06, "loss": 0.2505, "step": 25165 }, { "epoch": 2.94, "grad_norm": 0.934625093552519, "learning_rate": 8.163593341309683e-06, "loss": 0.3764, "step": 25170 }, { "epoch": 2.94, "grad_norm": 1.2261049081646873, "learning_rate": 8.155115916666406e-06, "loss": 0.3337, "step": 25175 }, { "epoch": 2.94, "grad_norm": 1.1071113235931016, "learning_rate": 8.146642038047676e-06, "loss": 0.3307, "step": 25180 }, { "epoch": 2.94, "grad_norm": 0.9405602993412395, "learning_rate": 8.138171707237327e-06, "loss": 0.3154, "step": 25185 }, { "epoch": 2.94, "grad_norm": 0.895897066437274, "learning_rate": 8.129704926018458e-06, "loss": 0.2916, "step": 25190 }, { "epoch": 2.94, "grad_norm": 0.9193982511243515, "learning_rate": 8.121241696173427e-06, "loss": 0.2758, "step": 25195 }, { "epoch": 2.94, "grad_norm": 0.8691080857432063, "learning_rate": 8.112782019483813e-06, "loss": 0.3183, "step": 25200 }, { "epoch": 2.94, "grad_norm": 0.8533154443780022, "learning_rate": 8.104325897730478e-06, "loss": 0.3113, "step": 25205 }, { "epoch": 2.95, "grad_norm": 0.9698906157091091, "learning_rate": 8.09587333269353e-06, "loss": 0.3093, "step": 25210 }, { "epoch": 2.95, "grad_norm": 0.9345616452731546, "learning_rate": 8.087424326152307e-06, "loss": 0.304, "step": 25215 }, { "epoch": 2.95, "grad_norm": 0.948408823700671, "learning_rate": 8.078978879885427e-06, "loss": 0.2897, "step": 25220 }, { "epoch": 2.95, "grad_norm": 1.0329578825174837, "learning_rate": 8.070536995670733e-06, "loss": 0.2946, "step": 25225 }, { "epoch": 2.95, "grad_norm": 0.9031067363888123, "learning_rate": 8.062098675285336e-06, "loss": 0.2691, "step": 25230 }, { "epoch": 2.95, "grad_norm": 1.5348889285378948, "learning_rate": 8.05366392050559e-06, "loss": 0.3223, "step": 25235 }, { "epoch": 2.95, "grad_norm": 1.0753213492704967, "learning_rate": 8.04523273310711e-06, "loss": 0.3055, "step": 25240 }, { "epoch": 2.95, "grad_norm": 0.941987526197896, "learning_rate": 8.036805114864736e-06, "loss": 0.2797, "step": 25245 }, { "epoch": 2.95, "grad_norm": 0.9025154619081572, "learning_rate": 8.02838106755257e-06, "loss": 0.3306, "step": 25250 }, { "epoch": 2.95, "grad_norm": 0.9794634482544485, "learning_rate": 8.019960592943964e-06, "loss": 0.2781, "step": 25255 }, { "epoch": 2.95, "grad_norm": 0.9164668418013237, "learning_rate": 8.011543692811518e-06, "loss": 0.2989, "step": 25260 }, { "epoch": 2.95, "grad_norm": 1.056848921657759, "learning_rate": 8.003130368927084e-06, "loss": 0.3397, "step": 25265 }, { "epoch": 2.95, "grad_norm": 0.8543138805055576, "learning_rate": 7.994720623061742e-06, "loss": 0.3336, "step": 25270 }, { "epoch": 2.95, "grad_norm": 0.866552380301082, "learning_rate": 7.986314456985844e-06, "loss": 0.3498, "step": 25275 }, { "epoch": 2.95, "grad_norm": 0.8957177408395095, "learning_rate": 7.97791187246896e-06, "loss": 0.3068, "step": 25280 }, { "epoch": 2.95, "grad_norm": 0.9403020253776941, "learning_rate": 7.969512871279936e-06, "loss": 0.2793, "step": 25285 }, { "epoch": 2.95, "grad_norm": 0.9637639700025175, "learning_rate": 7.96111745518685e-06, "loss": 0.3043, "step": 25290 }, { "epoch": 2.96, "grad_norm": 0.9485857804683302, "learning_rate": 7.952725625957017e-06, "loss": 0.278, "step": 25295 }, { "epoch": 2.96, "grad_norm": 0.910895684876032, "learning_rate": 7.944337385357003e-06, "loss": 0.2929, "step": 25300 }, { "epoch": 2.96, "grad_norm": 1.258688954227687, "learning_rate": 7.93595273515263e-06, "loss": 0.2719, "step": 25305 }, { "epoch": 2.96, "grad_norm": 1.0280424248620488, "learning_rate": 7.927571677108959e-06, "loss": 0.3092, "step": 25310 }, { "epoch": 2.96, "grad_norm": 0.8918193652901086, "learning_rate": 7.91919421299028e-06, "loss": 0.3171, "step": 25315 }, { "epoch": 2.96, "grad_norm": 0.9830915256539035, "learning_rate": 7.910820344560133e-06, "loss": 0.3601, "step": 25320 }, { "epoch": 2.96, "grad_norm": 0.9556672171145505, "learning_rate": 7.902450073581311e-06, "loss": 0.3134, "step": 25325 }, { "epoch": 2.96, "grad_norm": 0.954231053543131, "learning_rate": 7.894083401815842e-06, "loss": 0.3439, "step": 25330 }, { "epoch": 2.96, "grad_norm": 1.0639659962908548, "learning_rate": 7.88572033102501e-06, "loss": 0.3284, "step": 25335 }, { "epoch": 2.96, "grad_norm": 0.9731087610442579, "learning_rate": 7.87736086296931e-06, "loss": 0.2972, "step": 25340 }, { "epoch": 2.96, "grad_norm": 1.0207396307907552, "learning_rate": 7.869004999408514e-06, "loss": 0.3531, "step": 25345 }, { "epoch": 2.96, "grad_norm": 0.8670107072061473, "learning_rate": 7.860652742101604e-06, "loss": 0.2896, "step": 25350 }, { "epoch": 2.96, "grad_norm": 0.9303874843214798, "learning_rate": 7.852304092806825e-06, "loss": 0.2729, "step": 25355 }, { "epoch": 2.96, "grad_norm": 0.9174337877363012, "learning_rate": 7.843959053281663e-06, "loss": 0.302, "step": 25360 }, { "epoch": 2.96, "grad_norm": 0.8070936212042978, "learning_rate": 7.835617625282823e-06, "loss": 0.3262, "step": 25365 }, { "epoch": 2.96, "grad_norm": 0.9095039615039022, "learning_rate": 7.827279810566266e-06, "loss": 0.3218, "step": 25370 }, { "epoch": 2.96, "grad_norm": 0.9894243417439872, "learning_rate": 7.818945610887199e-06, "loss": 0.3248, "step": 25375 }, { "epoch": 2.97, "grad_norm": 51.67964912522946, "learning_rate": 7.810615028000045e-06, "loss": 0.3222, "step": 25380 }, { "epoch": 2.97, "grad_norm": 1.0860220630808157, "learning_rate": 7.802288063658492e-06, "loss": 0.321, "step": 25385 }, { "epoch": 2.97, "grad_norm": 0.9146857739040198, "learning_rate": 7.793964719615438e-06, "loss": 0.3003, "step": 25390 }, { "epoch": 2.97, "grad_norm": 0.8986280899323342, "learning_rate": 7.785644997623043e-06, "loss": 0.302, "step": 25395 }, { "epoch": 2.97, "grad_norm": 0.8016509258170548, "learning_rate": 7.777328899432695e-06, "loss": 0.2454, "step": 25400 }, { "epoch": 2.97, "grad_norm": 0.933538865578769, "learning_rate": 7.769016426795028e-06, "loss": 0.2979, "step": 25405 }, { "epoch": 2.97, "grad_norm": 0.9563963607873767, "learning_rate": 7.760707581459894e-06, "loss": 0.2882, "step": 25410 }, { "epoch": 2.97, "grad_norm": 1.320537311757936, "learning_rate": 7.75240236517639e-06, "loss": 0.2718, "step": 25415 }, { "epoch": 2.97, "grad_norm": 0.8874759288697339, "learning_rate": 7.744100779692853e-06, "loss": 0.3079, "step": 25420 }, { "epoch": 2.97, "grad_norm": 0.9549955471653746, "learning_rate": 7.735802826756856e-06, "loss": 0.3094, "step": 25425 }, { "epoch": 2.97, "grad_norm": 0.9110601784095469, "learning_rate": 7.727508508115214e-06, "loss": 0.319, "step": 25430 }, { "epoch": 2.97, "grad_norm": 0.9028471801879644, "learning_rate": 7.71921782551395e-06, "loss": 0.2791, "step": 25435 }, { "epoch": 2.97, "grad_norm": 0.9734956441893885, "learning_rate": 7.710930780698356e-06, "loss": 0.3037, "step": 25440 }, { "epoch": 2.97, "grad_norm": 0.9617939557045352, "learning_rate": 7.702647375412927e-06, "loss": 0.3197, "step": 25445 }, { "epoch": 2.97, "grad_norm": 0.9257312035679582, "learning_rate": 7.694367611401416e-06, "loss": 0.2735, "step": 25450 }, { "epoch": 2.97, "grad_norm": 0.914998502597473, "learning_rate": 7.686091490406802e-06, "loss": 0.3045, "step": 25455 }, { "epoch": 2.97, "grad_norm": 0.989967742539965, "learning_rate": 7.677819014171286e-06, "loss": 0.2977, "step": 25460 }, { "epoch": 2.98, "grad_norm": 0.9827260944602146, "learning_rate": 7.669550184436313e-06, "loss": 0.272, "step": 25465 }, { "epoch": 2.98, "grad_norm": 0.9007182908011555, "learning_rate": 7.661285002942572e-06, "loss": 0.305, "step": 25470 }, { "epoch": 2.98, "grad_norm": 0.8976312410532772, "learning_rate": 7.65302347142996e-06, "loss": 0.2709, "step": 25475 }, { "epoch": 2.98, "grad_norm": 1.1873652781078856, "learning_rate": 7.644765591637607e-06, "loss": 0.3129, "step": 25480 }, { "epoch": 2.98, "grad_norm": 0.8611721232147667, "learning_rate": 7.63651136530389e-06, "loss": 0.253, "step": 25485 }, { "epoch": 2.98, "grad_norm": 0.9757808787533969, "learning_rate": 7.628260794166412e-06, "loss": 0.3511, "step": 25490 }, { "epoch": 2.98, "grad_norm": 1.8856369973388363, "learning_rate": 7.620013879962004e-06, "loss": 0.3206, "step": 25495 }, { "epoch": 2.98, "grad_norm": 1.0326700916031482, "learning_rate": 7.611770624426737e-06, "loss": 0.28, "step": 25500 }, { "epoch": 2.98, "grad_norm": 0.8523192368453756, "learning_rate": 7.603531029295893e-06, "loss": 0.2911, "step": 25505 }, { "epoch": 2.98, "grad_norm": 0.9816393367238673, "learning_rate": 7.5952950963039865e-06, "loss": 0.2839, "step": 25510 }, { "epoch": 2.98, "grad_norm": 0.9831184260138018, "learning_rate": 7.5870628271847765e-06, "loss": 0.3339, "step": 25515 }, { "epoch": 2.98, "grad_norm": 0.8898209461236166, "learning_rate": 7.578834223671238e-06, "loss": 0.2744, "step": 25520 }, { "epoch": 2.98, "grad_norm": 0.970413732195831, "learning_rate": 7.570609287495592e-06, "loss": 0.2895, "step": 25525 }, { "epoch": 2.98, "grad_norm": 0.9230828492351917, "learning_rate": 7.562388020389252e-06, "loss": 0.2742, "step": 25530 }, { "epoch": 2.98, "grad_norm": 1.0587845345117999, "learning_rate": 7.554170424082893e-06, "loss": 0.3381, "step": 25535 }, { "epoch": 2.98, "grad_norm": 0.9560823806000868, "learning_rate": 7.545956500306409e-06, "loss": 0.2926, "step": 25540 }, { "epoch": 2.98, "grad_norm": 0.9977047617581358, "learning_rate": 7.537746250788913e-06, "loss": 0.2996, "step": 25545 }, { "epoch": 2.99, "grad_norm": 0.8579617788187831, "learning_rate": 7.5295396772587376e-06, "loss": 0.3436, "step": 25550 }, { "epoch": 2.99, "grad_norm": 0.9515549313546731, "learning_rate": 7.521336781443464e-06, "loss": 0.277, "step": 25555 }, { "epoch": 2.99, "grad_norm": 0.9289571590436669, "learning_rate": 7.5131375650698835e-06, "loss": 0.3019, "step": 25560 }, { "epoch": 2.99, "grad_norm": 1.0714930756891181, "learning_rate": 7.5049420298640175e-06, "loss": 0.2972, "step": 25565 }, { "epoch": 2.99, "grad_norm": 1.0679839569446519, "learning_rate": 7.496750177551126e-06, "loss": 0.2693, "step": 25570 }, { "epoch": 2.99, "grad_norm": 0.7943835500224294, "learning_rate": 7.488562009855663e-06, "loss": 0.328, "step": 25575 }, { "epoch": 2.99, "grad_norm": 0.8810813658279644, "learning_rate": 7.480377528501323e-06, "loss": 0.287, "step": 25580 }, { "epoch": 2.99, "grad_norm": 0.9873446119957718, "learning_rate": 7.472196735211029e-06, "loss": 0.3034, "step": 25585 }, { "epoch": 2.99, "grad_norm": 1.1409084100736497, "learning_rate": 7.4640196317069225e-06, "loss": 0.32, "step": 25590 }, { "epoch": 2.99, "grad_norm": 0.9064147784656964, "learning_rate": 7.455846219710383e-06, "loss": 0.3348, "step": 25595 }, { "epoch": 2.99, "grad_norm": 0.8767706195156121, "learning_rate": 7.4476765009419775e-06, "loss": 0.3092, "step": 25600 }, { "epoch": 2.99, "grad_norm": 1.007118586183292, "learning_rate": 7.439510477121536e-06, "loss": 0.3057, "step": 25605 }, { "epoch": 2.99, "grad_norm": 1.0466972292582857, "learning_rate": 7.431348149968076e-06, "loss": 0.3463, "step": 25610 }, { "epoch": 2.99, "grad_norm": 0.9055093635662006, "learning_rate": 7.423189521199867e-06, "loss": 0.3069, "step": 25615 }, { "epoch": 2.99, "grad_norm": 0.8815073452368876, "learning_rate": 7.415034592534373e-06, "loss": 0.2835, "step": 25620 }, { "epoch": 2.99, "grad_norm": 0.9796541721478743, "learning_rate": 7.4068833656883e-06, "loss": 0.334, "step": 25625 }, { "epoch": 2.99, "grad_norm": 0.939487768224349, "learning_rate": 7.3987358423775634e-06, "loss": 0.2473, "step": 25630 }, { "epoch": 3.0, "grad_norm": 1.0164116096828941, "learning_rate": 7.390592024317311e-06, "loss": 0.2894, "step": 25635 }, { "epoch": 3.0, "grad_norm": 0.9088123317314842, "learning_rate": 7.382451913221894e-06, "loss": 0.2943, "step": 25640 }, { "epoch": 3.0, "grad_norm": 0.9855048029428187, "learning_rate": 7.374315510804886e-06, "loss": 0.2654, "step": 25645 }, { "epoch": 3.0, "grad_norm": 0.8004988873093533, "learning_rate": 7.366182818779088e-06, "loss": 0.2896, "step": 25650 }, { "epoch": 3.0, "grad_norm": 0.9663828356806119, "learning_rate": 7.358053838856521e-06, "loss": 0.2732, "step": 25655 }, { "epoch": 3.0, "grad_norm": 0.9749653199155557, "learning_rate": 7.3499285727484184e-06, "loss": 0.3009, "step": 25660 }, { "epoch": 3.0, "grad_norm": 0.9486304246992414, "learning_rate": 7.341807022165245e-06, "loss": 0.2978, "step": 25665 }, { "epoch": 3.0, "grad_norm": 0.8988789510433882, "learning_rate": 7.333689188816658e-06, "loss": 0.2961, "step": 25670 }, { "epoch": 3.0, "grad_norm": 0.8739588118307898, "learning_rate": 7.325575074411545e-06, "loss": 0.3114, "step": 25675 }, { "epoch": 3.0, "grad_norm": 0.6666462630897451, "learning_rate": 7.317464680658015e-06, "loss": 0.1854, "step": 25680 }, { "epoch": 3.0, "grad_norm": 0.5792332061408854, "learning_rate": 7.3093580092634004e-06, "loss": 0.128, "step": 25685 }, { "epoch": 3.0, "grad_norm": 0.5800027182233656, "learning_rate": 7.301255061934226e-06, "loss": 0.1165, "step": 25690 }, { "epoch": 3.0, "grad_norm": 0.590470133985006, "learning_rate": 7.2931558403762535e-06, "loss": 0.1099, "step": 25695 }, { "epoch": 3.0, "grad_norm": 0.6843244980344247, "learning_rate": 7.2850603462944616e-06, "loss": 0.1189, "step": 25700 }, { "epoch": 3.0, "grad_norm": 0.6698734669524816, "learning_rate": 7.27696858139302e-06, "loss": 0.1001, "step": 25705 }, { "epoch": 3.0, "grad_norm": 0.82789249783902, "learning_rate": 7.268880547375351e-06, "loss": 0.1393, "step": 25710 }, { "epoch": 3.0, "grad_norm": 0.7624113790559901, "learning_rate": 7.2607962459440485e-06, "loss": 0.1061, "step": 25715 }, { "epoch": 3.01, "grad_norm": 0.7789374637318257, "learning_rate": 7.2527156788009556e-06, "loss": 0.143, "step": 25720 }, { "epoch": 3.01, "grad_norm": 0.6067680235070263, "learning_rate": 7.244638847647112e-06, "loss": 0.0931, "step": 25725 }, { "epoch": 3.01, "grad_norm": 0.7527601937177296, "learning_rate": 7.236565754182783e-06, "loss": 0.1137, "step": 25730 }, { "epoch": 3.01, "grad_norm": 0.7040234194527107, "learning_rate": 7.228496400107434e-06, "loss": 0.1121, "step": 25735 }, { "epoch": 3.01, "grad_norm": 0.6230299647399241, "learning_rate": 7.220430787119742e-06, "loss": 0.1043, "step": 25740 }, { "epoch": 3.01, "grad_norm": 0.8032028417597421, "learning_rate": 7.212368916917609e-06, "loss": 0.1133, "step": 25745 }, { "epoch": 3.01, "grad_norm": 0.7110139189186594, "learning_rate": 7.204310791198141e-06, "loss": 0.1294, "step": 25750 }, { "epoch": 3.01, "grad_norm": 0.6508090702228945, "learning_rate": 7.196256411657667e-06, "loss": 0.1155, "step": 25755 }, { "epoch": 3.01, "grad_norm": 0.7348275068098351, "learning_rate": 7.188205779991705e-06, "loss": 0.1276, "step": 25760 }, { "epoch": 3.01, "grad_norm": 0.8073421940574722, "learning_rate": 7.1801588978950036e-06, "loss": 0.1155, "step": 25765 }, { "epoch": 3.01, "grad_norm": 0.7497618927437621, "learning_rate": 7.172115767061524e-06, "loss": 0.0996, "step": 25770 }, { "epoch": 3.01, "grad_norm": 0.7059470049647905, "learning_rate": 7.164076389184413e-06, "loss": 0.1235, "step": 25775 }, { "epoch": 3.01, "grad_norm": 0.9038235295073999, "learning_rate": 7.156040765956057e-06, "loss": 0.0955, "step": 25780 }, { "epoch": 3.01, "grad_norm": 0.6955374781103616, "learning_rate": 7.148008899068029e-06, "loss": 0.0978, "step": 25785 }, { "epoch": 3.01, "grad_norm": 0.7957312573531673, "learning_rate": 7.139980790211123e-06, "loss": 0.1081, "step": 25790 }, { "epoch": 3.01, "grad_norm": 0.7466190161407813, "learning_rate": 7.1319564410753465e-06, "loss": 0.2046, "step": 25795 }, { "epoch": 3.01, "grad_norm": 0.6904227609716052, "learning_rate": 7.123935853349911e-06, "loss": 0.0943, "step": 25800 }, { "epoch": 3.01, "grad_norm": 1.0197191763284226, "learning_rate": 7.115919028723228e-06, "loss": 0.116, "step": 25805 }, { "epoch": 3.02, "grad_norm": 0.8762461118157909, "learning_rate": 7.1079059688829164e-06, "loss": 0.1139, "step": 25810 }, { "epoch": 3.02, "grad_norm": 0.8896254201440681, "learning_rate": 7.099896675515819e-06, "loss": 0.1199, "step": 25815 }, { "epoch": 3.02, "grad_norm": 0.6035578091906693, "learning_rate": 7.091891150307972e-06, "loss": 0.0891, "step": 25820 }, { "epoch": 3.02, "grad_norm": 0.61801022746837, "learning_rate": 7.083889394944629e-06, "loss": 0.1069, "step": 25825 }, { "epoch": 3.02, "grad_norm": 0.6852288254533534, "learning_rate": 7.0758914111102335e-06, "loss": 0.1203, "step": 25830 }, { "epoch": 3.02, "grad_norm": 0.698277667934384, "learning_rate": 7.067897200488457e-06, "loss": 0.1002, "step": 25835 }, { "epoch": 3.02, "grad_norm": 0.7585407421949465, "learning_rate": 7.059906764762148e-06, "loss": 0.0979, "step": 25840 }, { "epoch": 3.02, "grad_norm": 0.7002183196178202, "learning_rate": 7.051920105613388e-06, "loss": 0.1006, "step": 25845 }, { "epoch": 3.02, "grad_norm": 1.0840698577683, "learning_rate": 7.043937224723457e-06, "loss": 0.1257, "step": 25850 }, { "epoch": 3.02, "grad_norm": 0.8124452490345206, "learning_rate": 7.035958123772821e-06, "loss": 0.1269, "step": 25855 }, { "epoch": 3.02, "grad_norm": 0.7366707511174803, "learning_rate": 7.027982804441171e-06, "loss": 0.1141, "step": 25860 }, { "epoch": 3.02, "grad_norm": 0.6306136980050606, "learning_rate": 7.020011268407406e-06, "loss": 0.107, "step": 25865 }, { "epoch": 3.02, "grad_norm": 0.6173998104994289, "learning_rate": 7.012043517349598e-06, "loss": 0.0934, "step": 25870 }, { "epoch": 3.02, "grad_norm": 0.7062152809136335, "learning_rate": 7.004079552945062e-06, "loss": 0.1198, "step": 25875 }, { "epoch": 3.02, "grad_norm": 0.659383273148436, "learning_rate": 6.9961193768702745e-06, "loss": 0.1191, "step": 25880 }, { "epoch": 3.02, "grad_norm": 0.682355103218542, "learning_rate": 6.98816299080095e-06, "loss": 0.0819, "step": 25885 }, { "epoch": 3.02, "grad_norm": 0.6453553331248967, "learning_rate": 6.980210396411987e-06, "loss": 0.1212, "step": 25890 }, { "epoch": 3.03, "grad_norm": 0.7318894613121328, "learning_rate": 6.9722615953774996e-06, "loss": 0.125, "step": 25895 }, { "epoch": 3.03, "grad_norm": 0.7625115410481241, "learning_rate": 6.9643165893707865e-06, "loss": 0.1159, "step": 25900 }, { "epoch": 3.03, "grad_norm": 0.6356848020563247, "learning_rate": 6.956375380064345e-06, "loss": 0.0985, "step": 25905 }, { "epoch": 3.03, "grad_norm": 0.7048958348134202, "learning_rate": 6.948437969129892e-06, "loss": 0.1266, "step": 25910 }, { "epoch": 3.03, "grad_norm": 0.5732019712783071, "learning_rate": 6.940504358238337e-06, "loss": 0.1083, "step": 25915 }, { "epoch": 3.03, "grad_norm": 0.6783886104219875, "learning_rate": 6.932574549059789e-06, "loss": 0.1216, "step": 25920 }, { "epoch": 3.03, "grad_norm": 0.659643945342329, "learning_rate": 6.924648543263551e-06, "loss": 0.1338, "step": 25925 }, { "epoch": 3.03, "grad_norm": 0.7032557462918197, "learning_rate": 6.916726342518137e-06, "loss": 0.1154, "step": 25930 }, { "epoch": 3.03, "grad_norm": 0.5495182729776876, "learning_rate": 6.908807948491241e-06, "loss": 0.1047, "step": 25935 }, { "epoch": 3.03, "grad_norm": 0.5985940204560098, "learning_rate": 6.900893362849778e-06, "loss": 0.099, "step": 25940 }, { "epoch": 3.03, "grad_norm": 0.720134198341354, "learning_rate": 6.892982587259852e-06, "loss": 0.1105, "step": 25945 }, { "epoch": 3.03, "grad_norm": 0.7530826576061642, "learning_rate": 6.885075623386758e-06, "loss": 0.1093, "step": 25950 }, { "epoch": 3.03, "grad_norm": 0.7880013058248595, "learning_rate": 6.877172472894996e-06, "loss": 0.1138, "step": 25955 }, { "epoch": 3.03, "grad_norm": 0.6986562461350952, "learning_rate": 6.869273137448271e-06, "loss": 0.1155, "step": 25960 }, { "epoch": 3.03, "grad_norm": 0.8489108713953327, "learning_rate": 6.861377618709466e-06, "loss": 0.125, "step": 25965 }, { "epoch": 3.03, "grad_norm": 0.6991534866152611, "learning_rate": 6.853485918340669e-06, "loss": 0.1211, "step": 25970 }, { "epoch": 3.03, "grad_norm": 0.6760740065478332, "learning_rate": 6.845598038003168e-06, "loss": 0.1316, "step": 25975 }, { "epoch": 3.04, "grad_norm": 0.5424280130641633, "learning_rate": 6.837713979357446e-06, "loss": 0.0901, "step": 25980 }, { "epoch": 3.04, "grad_norm": 0.6310513669801356, "learning_rate": 6.82983374406318e-06, "loss": 0.1274, "step": 25985 }, { "epoch": 3.04, "grad_norm": 0.6472820295166074, "learning_rate": 6.821957333779249e-06, "loss": 0.1243, "step": 25990 }, { "epoch": 3.04, "grad_norm": 0.7395819056318942, "learning_rate": 6.814084750163705e-06, "loss": 0.1116, "step": 25995 }, { "epoch": 3.04, "grad_norm": 0.8112187535618616, "learning_rate": 6.806215994873827e-06, "loss": 0.106, "step": 26000 }, { "epoch": 3.04, "grad_norm": 0.7712546623283802, "learning_rate": 6.79835106956605e-06, "loss": 0.1054, "step": 26005 }, { "epoch": 3.04, "grad_norm": 0.6585336437449546, "learning_rate": 6.790489975896033e-06, "loss": 0.103, "step": 26010 }, { "epoch": 3.04, "grad_norm": 0.5712067309109954, "learning_rate": 6.78263271551863e-06, "loss": 0.1163, "step": 26015 }, { "epoch": 3.04, "grad_norm": 0.6953360387018156, "learning_rate": 6.774779290087857e-06, "loss": 0.1126, "step": 26020 }, { "epoch": 3.04, "grad_norm": 0.6022364608823308, "learning_rate": 6.76692970125695e-06, "loss": 0.0821, "step": 26025 }, { "epoch": 3.04, "grad_norm": 0.6276423851743334, "learning_rate": 6.7590839506783385e-06, "loss": 0.0966, "step": 26030 }, { "epoch": 3.04, "grad_norm": 0.7558626237999606, "learning_rate": 6.751242040003628e-06, "loss": 0.1168, "step": 26035 }, { "epoch": 3.04, "grad_norm": 0.6427284842209443, "learning_rate": 6.743403970883616e-06, "loss": 0.0957, "step": 26040 }, { "epoch": 3.04, "grad_norm": 0.6563102159911584, "learning_rate": 6.735569744968303e-06, "loss": 0.1188, "step": 26045 }, { "epoch": 3.04, "grad_norm": 0.8075500551366093, "learning_rate": 6.727739363906876e-06, "loss": 0.1106, "step": 26050 }, { "epoch": 3.04, "grad_norm": 0.7586503712845535, "learning_rate": 6.719912829347716e-06, "loss": 0.1291, "step": 26055 }, { "epoch": 3.04, "grad_norm": 0.6329569960480992, "learning_rate": 6.712090142938396e-06, "loss": 0.1174, "step": 26060 }, { "epoch": 3.05, "grad_norm": 0.9599491186613833, "learning_rate": 6.704271306325663e-06, "loss": 0.1111, "step": 26065 }, { "epoch": 3.05, "grad_norm": 0.5789830062540582, "learning_rate": 6.696456321155459e-06, "loss": 0.1105, "step": 26070 }, { "epoch": 3.05, "grad_norm": 0.6688107528963738, "learning_rate": 6.688645189072929e-06, "loss": 0.1107, "step": 26075 }, { "epoch": 3.05, "grad_norm": 0.638058702836562, "learning_rate": 6.6808379117223925e-06, "loss": 0.11, "step": 26080 }, { "epoch": 3.05, "grad_norm": 0.6873110907466587, "learning_rate": 6.673034490747379e-06, "loss": 0.0971, "step": 26085 }, { "epoch": 3.05, "grad_norm": 0.8192393902004805, "learning_rate": 6.66523492779057e-06, "loss": 0.1141, "step": 26090 }, { "epoch": 3.05, "grad_norm": 0.7217670932676725, "learning_rate": 6.657439224493872e-06, "loss": 0.0989, "step": 26095 }, { "epoch": 3.05, "grad_norm": 0.628813249978428, "learning_rate": 6.649647382498345e-06, "loss": 0.0972, "step": 26100 }, { "epoch": 3.05, "grad_norm": 0.7656671164556205, "learning_rate": 6.64185940344427e-06, "loss": 0.1156, "step": 26105 }, { "epoch": 3.05, "grad_norm": 0.6608351175345727, "learning_rate": 6.634075288971084e-06, "loss": 0.0999, "step": 26110 }, { "epoch": 3.05, "grad_norm": 0.6887990486861082, "learning_rate": 6.626295040717431e-06, "loss": 0.0992, "step": 26115 }, { "epoch": 3.05, "grad_norm": 0.6267976043256926, "learning_rate": 6.618518660321135e-06, "loss": 0.1098, "step": 26120 }, { "epoch": 3.05, "grad_norm": 0.5821058528785312, "learning_rate": 6.610746149419214e-06, "loss": 0.0863, "step": 26125 }, { "epoch": 3.05, "grad_norm": 0.6636853664323545, "learning_rate": 6.602977509647854e-06, "loss": 0.0909, "step": 26130 }, { "epoch": 3.05, "grad_norm": 0.7223944871968184, "learning_rate": 6.5952127426424286e-06, "loss": 0.1053, "step": 26135 }, { "epoch": 3.05, "grad_norm": 0.6640745565999715, "learning_rate": 6.587451850037507e-06, "loss": 0.0902, "step": 26140 }, { "epoch": 3.05, "grad_norm": 0.6031629585219299, "learning_rate": 6.579694833466843e-06, "loss": 0.1099, "step": 26145 }, { "epoch": 3.06, "grad_norm": 0.6204414585622152, "learning_rate": 6.571941694563369e-06, "loss": 0.1052, "step": 26150 }, { "epoch": 3.06, "grad_norm": 0.7827397136665145, "learning_rate": 6.564192434959205e-06, "loss": 0.1257, "step": 26155 }, { "epoch": 3.06, "grad_norm": 0.7955731670164823, "learning_rate": 6.55644705628565e-06, "loss": 0.0989, "step": 26160 }, { "epoch": 3.06, "grad_norm": 0.7194559130686795, "learning_rate": 6.548705560173174e-06, "loss": 0.1093, "step": 26165 }, { "epoch": 3.06, "grad_norm": 0.7427976229280938, "learning_rate": 6.540967948251456e-06, "loss": 0.1082, "step": 26170 }, { "epoch": 3.06, "grad_norm": 0.8049930168062688, "learning_rate": 6.533234222149348e-06, "loss": 0.1005, "step": 26175 }, { "epoch": 3.06, "grad_norm": 0.7869616922831549, "learning_rate": 6.525504383494865e-06, "loss": 0.0946, "step": 26180 }, { "epoch": 3.06, "grad_norm": 0.7245779254144922, "learning_rate": 6.517778433915231e-06, "loss": 0.0875, "step": 26185 }, { "epoch": 3.06, "grad_norm": 0.7195324852015156, "learning_rate": 6.510056375036841e-06, "loss": 0.0974, "step": 26190 }, { "epoch": 3.06, "grad_norm": 0.6596443541433088, "learning_rate": 6.502338208485259e-06, "loss": 0.0969, "step": 26195 }, { "epoch": 3.06, "grad_norm": 0.6714694117818513, "learning_rate": 6.494623935885255e-06, "loss": 0.112, "step": 26200 }, { "epoch": 3.06, "grad_norm": 0.6951530175989664, "learning_rate": 6.486913558860744e-06, "loss": 0.1081, "step": 26205 }, { "epoch": 3.06, "grad_norm": 0.918517832724242, "learning_rate": 6.479207079034855e-06, "loss": 0.1826, "step": 26210 }, { "epoch": 3.06, "grad_norm": 0.7481462899702817, "learning_rate": 6.471504498029876e-06, "loss": 0.1378, "step": 26215 }, { "epoch": 3.06, "grad_norm": 0.7781801200232565, "learning_rate": 6.463805817467286e-06, "loss": 0.1028, "step": 26220 }, { "epoch": 3.06, "grad_norm": 0.6305727540011729, "learning_rate": 6.4561110389677465e-06, "loss": 0.1043, "step": 26225 }, { "epoch": 3.06, "grad_norm": 0.7414077335804938, "learning_rate": 6.448420164151076e-06, "loss": 0.1062, "step": 26230 }, { "epoch": 3.07, "grad_norm": 0.779076066844843, "learning_rate": 6.440733194636281e-06, "loss": 0.1178, "step": 26235 }, { "epoch": 3.07, "grad_norm": 0.5959009473978787, "learning_rate": 6.433050132041554e-06, "loss": 0.1063, "step": 26240 }, { "epoch": 3.07, "grad_norm": 0.7411986353085473, "learning_rate": 6.425370977984271e-06, "loss": 0.0967, "step": 26245 }, { "epoch": 3.07, "grad_norm": 0.78144453500627, "learning_rate": 6.4176957340809556e-06, "loss": 0.114, "step": 26250 }, { "epoch": 3.07, "grad_norm": 0.6617298682824334, "learning_rate": 6.410024401947337e-06, "loss": 0.0826, "step": 26255 }, { "epoch": 3.07, "grad_norm": 0.7841786828968641, "learning_rate": 6.402356983198319e-06, "loss": 0.1096, "step": 26260 }, { "epoch": 3.07, "grad_norm": 0.6780428882784549, "learning_rate": 6.394693479447958e-06, "loss": 0.118, "step": 26265 }, { "epoch": 3.07, "grad_norm": 0.6340536895837628, "learning_rate": 6.387033892309513e-06, "loss": 0.0876, "step": 26270 }, { "epoch": 3.07, "grad_norm": 0.584421856402789, "learning_rate": 6.379378223395396e-06, "loss": 0.0953, "step": 26275 }, { "epoch": 3.07, "grad_norm": 0.6735257359327141, "learning_rate": 6.3717264743172134e-06, "loss": 0.0973, "step": 26280 }, { "epoch": 3.07, "grad_norm": 0.6344762737763165, "learning_rate": 6.36407864668574e-06, "loss": 0.093, "step": 26285 }, { "epoch": 3.07, "grad_norm": 0.6012282402288402, "learning_rate": 6.356434742110926e-06, "loss": 0.1055, "step": 26290 }, { "epoch": 3.07, "grad_norm": 0.6984866310526668, "learning_rate": 6.348794762201887e-06, "loss": 0.1159, "step": 26295 }, { "epoch": 3.07, "grad_norm": 0.5663254297759245, "learning_rate": 6.341158708566916e-06, "loss": 0.0892, "step": 26300 }, { "epoch": 3.07, "grad_norm": 0.7066668094270536, "learning_rate": 6.333526582813487e-06, "loss": 0.1051, "step": 26305 }, { "epoch": 3.07, "grad_norm": 0.7820246602332909, "learning_rate": 6.32589838654824e-06, "loss": 0.0796, "step": 26310 }, { "epoch": 3.07, "grad_norm": 0.7779764016634094, "learning_rate": 6.318274121376999e-06, "loss": 0.1333, "step": 26315 }, { "epoch": 3.08, "grad_norm": 0.7856533319848613, "learning_rate": 6.310653788904741e-06, "loss": 0.1228, "step": 26320 }, { "epoch": 3.08, "grad_norm": 0.6777718896520394, "learning_rate": 6.303037390735634e-06, "loss": 0.1007, "step": 26325 }, { "epoch": 3.08, "grad_norm": 0.6724219744148024, "learning_rate": 6.295424928472998e-06, "loss": 0.1039, "step": 26330 }, { "epoch": 3.08, "grad_norm": 0.5944257338427684, "learning_rate": 6.287816403719346e-06, "loss": 0.1141, "step": 26335 }, { "epoch": 3.08, "grad_norm": 0.7244686837654882, "learning_rate": 6.2802118180763565e-06, "loss": 0.111, "step": 26340 }, { "epoch": 3.08, "grad_norm": 0.7376764639433125, "learning_rate": 6.27261117314486e-06, "loss": 0.1163, "step": 26345 }, { "epoch": 3.08, "grad_norm": 0.6425688286861146, "learning_rate": 6.265014470524877e-06, "loss": 0.1198, "step": 26350 }, { "epoch": 3.08, "grad_norm": 0.7828557117103871, "learning_rate": 6.257421711815606e-06, "loss": 0.1126, "step": 26355 }, { "epoch": 3.08, "grad_norm": 0.7648900480180448, "learning_rate": 6.249832898615382e-06, "loss": 0.1046, "step": 26360 }, { "epoch": 3.08, "grad_norm": 0.6229532976416061, "learning_rate": 6.242248032521747e-06, "loss": 0.1104, "step": 26365 }, { "epoch": 3.08, "grad_norm": 0.8228338217744782, "learning_rate": 6.234667115131382e-06, "loss": 0.1291, "step": 26370 }, { "epoch": 3.08, "grad_norm": 0.648720665762594, "learning_rate": 6.227090148040151e-06, "loss": 0.1043, "step": 26375 }, { "epoch": 3.08, "grad_norm": 0.6109161005446335, "learning_rate": 6.219517132843092e-06, "loss": 0.1329, "step": 26380 }, { "epoch": 3.08, "grad_norm": 0.7832889781037061, "learning_rate": 6.211948071134405e-06, "loss": 0.0952, "step": 26385 }, { "epoch": 3.08, "grad_norm": 0.6406279830428531, "learning_rate": 6.204382964507455e-06, "loss": 0.1041, "step": 26390 }, { "epoch": 3.08, "grad_norm": 0.6500546169680852, "learning_rate": 6.196821814554765e-06, "loss": 0.101, "step": 26395 }, { "epoch": 3.08, "grad_norm": 0.6953458208336976, "learning_rate": 6.1892646228680436e-06, "loss": 0.0916, "step": 26400 }, { "epoch": 3.09, "grad_norm": 0.7197682220683861, "learning_rate": 6.181711391038161e-06, "loss": 0.1235, "step": 26405 }, { "epoch": 3.09, "grad_norm": 0.7472525102366198, "learning_rate": 6.174162120655158e-06, "loss": 0.1221, "step": 26410 }, { "epoch": 3.09, "grad_norm": 0.7543055334480568, "learning_rate": 6.166616813308221e-06, "loss": 0.1254, "step": 26415 }, { "epoch": 3.09, "grad_norm": 0.778530498108572, "learning_rate": 6.1590754705857286e-06, "loss": 0.1029, "step": 26420 }, { "epoch": 3.09, "grad_norm": 0.7668497198221197, "learning_rate": 6.151538094075202e-06, "loss": 0.1249, "step": 26425 }, { "epoch": 3.09, "grad_norm": 0.68060243855134, "learning_rate": 6.1440046853633415e-06, "loss": 0.1124, "step": 26430 }, { "epoch": 3.09, "grad_norm": 0.8991230533280472, "learning_rate": 6.1364752460360165e-06, "loss": 0.1351, "step": 26435 }, { "epoch": 3.09, "grad_norm": 0.7823041953362571, "learning_rate": 6.128949777678239e-06, "loss": 0.118, "step": 26440 }, { "epoch": 3.09, "grad_norm": 0.6946285467655421, "learning_rate": 6.121428281874206e-06, "loss": 0.1049, "step": 26445 }, { "epoch": 3.09, "grad_norm": 0.7437546497377329, "learning_rate": 6.1139107602072724e-06, "loss": 0.1092, "step": 26450 }, { "epoch": 3.09, "grad_norm": 0.6219489966003742, "learning_rate": 6.1063972142599594e-06, "loss": 0.1205, "step": 26455 }, { "epoch": 3.09, "grad_norm": 0.7207845145834177, "learning_rate": 6.098887645613943e-06, "loss": 0.1259, "step": 26460 }, { "epoch": 3.09, "grad_norm": 0.5872623675405833, "learning_rate": 6.0913820558500565e-06, "loss": 0.0922, "step": 26465 }, { "epoch": 3.09, "grad_norm": 0.6650476410227526, "learning_rate": 6.083880446548315e-06, "loss": 0.0989, "step": 26470 }, { "epoch": 3.09, "grad_norm": 0.6471236933645951, "learning_rate": 6.0763828192878815e-06, "loss": 0.0925, "step": 26475 }, { "epoch": 3.09, "grad_norm": 0.6463297381498803, "learning_rate": 6.068889175647099e-06, "loss": 0.1048, "step": 26480 }, { "epoch": 3.09, "grad_norm": 0.8025955386858256, "learning_rate": 6.061399517203437e-06, "loss": 0.0917, "step": 26485 }, { "epoch": 3.09, "grad_norm": 0.7742384055578915, "learning_rate": 6.0539138455335654e-06, "loss": 0.1196, "step": 26490 }, { "epoch": 3.1, "grad_norm": 0.7724864261958942, "learning_rate": 6.046432162213281e-06, "loss": 0.1106, "step": 26495 }, { "epoch": 3.1, "grad_norm": 0.6099700977648117, "learning_rate": 6.038954468817562e-06, "loss": 0.1056, "step": 26500 }, { "epoch": 3.1, "grad_norm": 0.9475686445717705, "learning_rate": 6.03148076692055e-06, "loss": 0.1262, "step": 26505 }, { "epoch": 3.1, "grad_norm": 0.666432364610513, "learning_rate": 6.0240110580955245e-06, "loss": 0.1014, "step": 26510 }, { "epoch": 3.1, "grad_norm": 0.7301857662848863, "learning_rate": 6.016545343914942e-06, "loss": 0.0851, "step": 26515 }, { "epoch": 3.1, "grad_norm": 0.6728897174201404, "learning_rate": 6.0090836259504235e-06, "loss": 0.0995, "step": 26520 }, { "epoch": 3.1, "grad_norm": 0.6330719176786265, "learning_rate": 6.001625905772728e-06, "loss": 0.1067, "step": 26525 }, { "epoch": 3.1, "grad_norm": 0.6607892889945931, "learning_rate": 5.99417218495178e-06, "loss": 0.0975, "step": 26530 }, { "epoch": 3.1, "grad_norm": 0.6760077819146996, "learning_rate": 5.986722465056671e-06, "loss": 0.1105, "step": 26535 }, { "epoch": 3.1, "grad_norm": 0.5823624055652249, "learning_rate": 5.979276747655646e-06, "loss": 0.0791, "step": 26540 }, { "epoch": 3.1, "grad_norm": 0.7732439781230626, "learning_rate": 5.971835034316106e-06, "loss": 0.0891, "step": 26545 }, { "epoch": 3.1, "grad_norm": 0.8518185811550463, "learning_rate": 5.9643973266046145e-06, "loss": 0.0991, "step": 26550 }, { "epoch": 3.1, "grad_norm": 0.889079466197909, "learning_rate": 5.956963626086887e-06, "loss": 0.1106, "step": 26555 }, { "epoch": 3.1, "grad_norm": 0.7358821376254031, "learning_rate": 5.949533934327781e-06, "loss": 0.087, "step": 26560 }, { "epoch": 3.1, "grad_norm": 0.6852286418982134, "learning_rate": 5.942108252891332e-06, "loss": 0.1004, "step": 26565 }, { "epoch": 3.1, "grad_norm": 0.7288681796790348, "learning_rate": 5.934686583340726e-06, "loss": 0.1144, "step": 26570 }, { "epoch": 3.1, "grad_norm": 0.7534896634630351, "learning_rate": 5.927268927238311e-06, "loss": 0.13, "step": 26575 }, { "epoch": 3.11, "grad_norm": 0.6724731061119408, "learning_rate": 5.919855286145565e-06, "loss": 0.1146, "step": 26580 }, { "epoch": 3.11, "grad_norm": 0.5433158235176958, "learning_rate": 5.91244566162315e-06, "loss": 0.0996, "step": 26585 }, { "epoch": 3.11, "grad_norm": 0.7941017448479711, "learning_rate": 5.905040055230857e-06, "loss": 0.109, "step": 26590 }, { "epoch": 3.11, "grad_norm": 0.7311546127556436, "learning_rate": 5.897638468527653e-06, "loss": 0.1388, "step": 26595 }, { "epoch": 3.11, "grad_norm": 0.7554797565473562, "learning_rate": 5.89024090307165e-06, "loss": 0.1292, "step": 26600 }, { "epoch": 3.11, "grad_norm": 0.6747894223440208, "learning_rate": 5.882847360420104e-06, "loss": 0.1232, "step": 26605 }, { "epoch": 3.11, "grad_norm": 0.6522699389091695, "learning_rate": 5.875457842129442e-06, "loss": 0.1097, "step": 26610 }, { "epoch": 3.11, "grad_norm": 0.608592157775863, "learning_rate": 5.8680723497552385e-06, "loss": 0.1014, "step": 26615 }, { "epoch": 3.11, "grad_norm": 0.7015928263690588, "learning_rate": 5.8606908848522075e-06, "loss": 0.13, "step": 26620 }, { "epoch": 3.11, "grad_norm": 0.9573701161443582, "learning_rate": 5.853313448974223e-06, "loss": 0.116, "step": 26625 }, { "epoch": 3.11, "grad_norm": 0.638785191618915, "learning_rate": 5.8459400436743194e-06, "loss": 0.1058, "step": 26630 }, { "epoch": 3.11, "grad_norm": 0.6470159394474275, "learning_rate": 5.83857067050467e-06, "loss": 0.1163, "step": 26635 }, { "epoch": 3.11, "grad_norm": 0.717140696406229, "learning_rate": 5.831205331016612e-06, "loss": 0.1044, "step": 26640 }, { "epoch": 3.11, "grad_norm": 0.6791795053845087, "learning_rate": 5.82384402676063e-06, "loss": 0.1302, "step": 26645 }, { "epoch": 3.11, "grad_norm": 0.7550541743212152, "learning_rate": 5.816486759286346e-06, "loss": 0.0997, "step": 26650 }, { "epoch": 3.11, "grad_norm": 0.6621251421228375, "learning_rate": 5.809133530142541e-06, "loss": 0.1239, "step": 26655 }, { "epoch": 3.11, "grad_norm": 0.6896644489241749, "learning_rate": 5.8017843408771505e-06, "loss": 0.0954, "step": 26660 }, { "epoch": 3.12, "grad_norm": 0.7413372527055853, "learning_rate": 5.794439193037254e-06, "loss": 0.0983, "step": 26665 }, { "epoch": 3.12, "grad_norm": 0.8539530848760365, "learning_rate": 5.787098088169093e-06, "loss": 0.0882, "step": 26670 }, { "epoch": 3.12, "grad_norm": 0.675353532582595, "learning_rate": 5.779761027818032e-06, "loss": 0.0891, "step": 26675 }, { "epoch": 3.12, "grad_norm": 0.7077296112373209, "learning_rate": 5.772428013528603e-06, "loss": 0.0894, "step": 26680 }, { "epoch": 3.12, "grad_norm": 0.6178922942418108, "learning_rate": 5.765099046844491e-06, "loss": 0.086, "step": 26685 }, { "epoch": 3.12, "grad_norm": 0.6442620754744803, "learning_rate": 5.7577741293085155e-06, "loss": 0.0868, "step": 26690 }, { "epoch": 3.12, "grad_norm": 0.9776893155592145, "learning_rate": 5.750453262462638e-06, "loss": 0.1255, "step": 26695 }, { "epoch": 3.12, "grad_norm": 0.7794271144464824, "learning_rate": 5.743136447847988e-06, "loss": 0.1163, "step": 26700 }, { "epoch": 3.12, "grad_norm": 0.6175694692614657, "learning_rate": 5.735823687004829e-06, "loss": 0.0804, "step": 26705 }, { "epoch": 3.12, "grad_norm": 0.6939106951586549, "learning_rate": 5.728514981472574e-06, "loss": 0.1122, "step": 26710 }, { "epoch": 3.12, "grad_norm": 0.6671432513878683, "learning_rate": 5.721210332789789e-06, "loss": 0.1415, "step": 26715 }, { "epoch": 3.12, "grad_norm": 0.7205003142859444, "learning_rate": 5.713909742494175e-06, "loss": 0.0876, "step": 26720 }, { "epoch": 3.12, "grad_norm": 0.6369795426666236, "learning_rate": 5.706613212122572e-06, "loss": 0.0915, "step": 26725 }, { "epoch": 3.12, "grad_norm": 0.725033362553751, "learning_rate": 5.699320743210984e-06, "loss": 0.1124, "step": 26730 }, { "epoch": 3.12, "grad_norm": 0.7360469854393888, "learning_rate": 5.692032337294553e-06, "loss": 0.0984, "step": 26735 }, { "epoch": 3.12, "grad_norm": 0.8521217209399783, "learning_rate": 5.684747995907572e-06, "loss": 0.1223, "step": 26740 }, { "epoch": 3.12, "grad_norm": 0.6698252790546645, "learning_rate": 5.677467720583454e-06, "loss": 0.0991, "step": 26745 }, { "epoch": 3.13, "grad_norm": 0.7834146283020887, "learning_rate": 5.670191512854794e-06, "loss": 0.1075, "step": 26750 }, { "epoch": 3.13, "grad_norm": 0.7869042545390015, "learning_rate": 5.66291937425329e-06, "loss": 0.1018, "step": 26755 }, { "epoch": 3.13, "grad_norm": 0.6237457533117062, "learning_rate": 5.655651306309817e-06, "loss": 0.1058, "step": 26760 }, { "epoch": 3.13, "grad_norm": 0.7257566097352954, "learning_rate": 5.64838731055437e-06, "loss": 0.1152, "step": 26765 }, { "epoch": 3.13, "grad_norm": 0.686365040716021, "learning_rate": 5.641127388516099e-06, "loss": 0.0947, "step": 26770 }, { "epoch": 3.13, "grad_norm": 0.8668466677381959, "learning_rate": 5.633871541723295e-06, "loss": 0.1146, "step": 26775 }, { "epoch": 3.13, "grad_norm": 0.8206974661503591, "learning_rate": 5.626619771703401e-06, "loss": 0.1099, "step": 26780 }, { "epoch": 3.13, "grad_norm": 0.6511487152495449, "learning_rate": 5.619372079982977e-06, "loss": 0.1099, "step": 26785 }, { "epoch": 3.13, "grad_norm": 0.7182798907040946, "learning_rate": 5.612128468087733e-06, "loss": 0.1012, "step": 26790 }, { "epoch": 3.13, "grad_norm": 0.6051376427094062, "learning_rate": 5.6048889375425325e-06, "loss": 0.0854, "step": 26795 }, { "epoch": 3.13, "grad_norm": 0.7423645166512383, "learning_rate": 5.597653489871374e-06, "loss": 0.1057, "step": 26800 }, { "epoch": 3.13, "grad_norm": 0.7291798969335211, "learning_rate": 5.590422126597392e-06, "loss": 0.1016, "step": 26805 }, { "epoch": 3.13, "grad_norm": 0.7614474823627243, "learning_rate": 5.583194849242876e-06, "loss": 0.0896, "step": 26810 }, { "epoch": 3.13, "grad_norm": 0.8200355087782505, "learning_rate": 5.575971659329232e-06, "loss": 0.0968, "step": 26815 }, { "epoch": 3.13, "grad_norm": 0.7595931128737672, "learning_rate": 5.5687525583770135e-06, "loss": 0.1163, "step": 26820 }, { "epoch": 3.13, "grad_norm": 0.6852212704750229, "learning_rate": 5.56153754790592e-06, "loss": 0.1037, "step": 26825 }, { "epoch": 3.13, "grad_norm": 0.9401996968414855, "learning_rate": 5.5543266294347975e-06, "loss": 0.099, "step": 26830 }, { "epoch": 3.14, "grad_norm": 0.7019136730149339, "learning_rate": 5.547119804481602e-06, "loss": 0.1147, "step": 26835 }, { "epoch": 3.14, "grad_norm": 0.8750223399812699, "learning_rate": 5.539917074563458e-06, "loss": 0.1165, "step": 26840 }, { "epoch": 3.14, "grad_norm": 0.7394121315107945, "learning_rate": 5.532718441196619e-06, "loss": 0.0969, "step": 26845 }, { "epoch": 3.14, "grad_norm": 0.6738828238811119, "learning_rate": 5.525523905896457e-06, "loss": 0.1151, "step": 26850 }, { "epoch": 3.14, "grad_norm": 0.9018949667894144, "learning_rate": 5.518333470177514e-06, "loss": 0.1119, "step": 26855 }, { "epoch": 3.14, "grad_norm": 0.8344878140872373, "learning_rate": 5.51114713555344e-06, "loss": 0.113, "step": 26860 }, { "epoch": 3.14, "grad_norm": 0.6732499314882177, "learning_rate": 5.503964903537037e-06, "loss": 0.0905, "step": 26865 }, { "epoch": 3.14, "grad_norm": 0.7580865065964432, "learning_rate": 5.496786775640239e-06, "loss": 0.1009, "step": 26870 }, { "epoch": 3.14, "grad_norm": 0.5485585342289443, "learning_rate": 5.489612753374129e-06, "loss": 0.0816, "step": 26875 }, { "epoch": 3.14, "grad_norm": 0.6132192215312308, "learning_rate": 5.482442838248902e-06, "loss": 0.1045, "step": 26880 }, { "epoch": 3.14, "grad_norm": 0.6994951119398936, "learning_rate": 5.4752770317738986e-06, "loss": 0.1101, "step": 26885 }, { "epoch": 3.14, "grad_norm": 0.7652090036615278, "learning_rate": 5.4681153354575965e-06, "loss": 0.1112, "step": 26890 }, { "epoch": 3.14, "grad_norm": 0.7267449347499928, "learning_rate": 5.460957750807613e-06, "loss": 0.1203, "step": 26895 }, { "epoch": 3.14, "grad_norm": 0.6964480910368923, "learning_rate": 5.453804279330699e-06, "loss": 0.0961, "step": 26900 }, { "epoch": 3.14, "grad_norm": 0.6601239058338471, "learning_rate": 5.446654922532723e-06, "loss": 0.1176, "step": 26905 }, { "epoch": 3.14, "grad_norm": 0.5339339899569211, "learning_rate": 5.439509681918703e-06, "loss": 0.0828, "step": 26910 }, { "epoch": 3.14, "grad_norm": 0.7847670398120649, "learning_rate": 5.432368558992798e-06, "loss": 0.1142, "step": 26915 }, { "epoch": 3.15, "grad_norm": 0.6715354267693744, "learning_rate": 5.425231555258275e-06, "loss": 0.096, "step": 26920 }, { "epoch": 3.15, "grad_norm": 0.6980572843695022, "learning_rate": 5.418098672217559e-06, "loss": 0.0914, "step": 26925 }, { "epoch": 3.15, "grad_norm": 0.6421570779050814, "learning_rate": 5.410969911372185e-06, "loss": 0.0976, "step": 26930 }, { "epoch": 3.15, "grad_norm": 0.6982240700427049, "learning_rate": 5.403845274222838e-06, "loss": 0.1152, "step": 26935 }, { "epoch": 3.15, "grad_norm": 0.8689530704203923, "learning_rate": 5.396724762269331e-06, "loss": 0.125, "step": 26940 }, { "epoch": 3.15, "grad_norm": 0.7526612680374339, "learning_rate": 5.389608377010608e-06, "loss": 0.0912, "step": 26945 }, { "epoch": 3.15, "grad_norm": 0.7315746698970573, "learning_rate": 5.382496119944741e-06, "loss": 0.0819, "step": 26950 }, { "epoch": 3.15, "grad_norm": 0.7457490956056376, "learning_rate": 5.375387992568926e-06, "loss": 0.1192, "step": 26955 }, { "epoch": 3.15, "grad_norm": 0.6396251754410884, "learning_rate": 5.368283996379503e-06, "loss": 0.1259, "step": 26960 }, { "epoch": 3.15, "grad_norm": 0.8173946270816257, "learning_rate": 5.361184132871938e-06, "loss": 0.1138, "step": 26965 }, { "epoch": 3.15, "grad_norm": 0.7156755356941285, "learning_rate": 5.354088403540839e-06, "loss": 0.1077, "step": 26970 }, { "epoch": 3.15, "grad_norm": 0.586096916571345, "learning_rate": 5.346996809879909e-06, "loss": 0.0875, "step": 26975 }, { "epoch": 3.15, "grad_norm": 0.6724035558098741, "learning_rate": 5.339909353382025e-06, "loss": 0.0946, "step": 26980 }, { "epoch": 3.15, "grad_norm": 0.748002460811208, "learning_rate": 5.332826035539151e-06, "loss": 0.1245, "step": 26985 }, { "epoch": 3.15, "grad_norm": 0.7630746176250409, "learning_rate": 5.325746857842409e-06, "loss": 0.1296, "step": 26990 }, { "epoch": 3.15, "grad_norm": 0.9542456042326904, "learning_rate": 5.318671821782045e-06, "loss": 0.105, "step": 26995 }, { "epoch": 3.15, "grad_norm": 0.6647030524377335, "learning_rate": 5.3116009288474184e-06, "loss": 0.104, "step": 27000 }, { "epoch": 3.16, "grad_norm": 0.6342066903485182, "learning_rate": 5.3045341805270295e-06, "loss": 0.1013, "step": 27005 }, { "epoch": 3.16, "grad_norm": 0.623039292805288, "learning_rate": 5.297471578308514e-06, "loss": 0.0856, "step": 27010 }, { "epoch": 3.16, "grad_norm": 0.5249978434384504, "learning_rate": 5.2904131236786035e-06, "loss": 0.0962, "step": 27015 }, { "epoch": 3.16, "grad_norm": 0.6838930500034113, "learning_rate": 5.283358818123194e-06, "loss": 0.1043, "step": 27020 }, { "epoch": 3.16, "grad_norm": 0.6466364545351454, "learning_rate": 5.276308663127275e-06, "loss": 0.097, "step": 27025 }, { "epoch": 3.16, "grad_norm": 0.8560642212933232, "learning_rate": 5.2692626601749896e-06, "loss": 0.0941, "step": 27030 }, { "epoch": 3.16, "grad_norm": 0.7567922570859831, "learning_rate": 5.262220810749588e-06, "loss": 0.1198, "step": 27035 }, { "epoch": 3.16, "grad_norm": 0.5690760130912845, "learning_rate": 5.255183116333468e-06, "loss": 0.1139, "step": 27040 }, { "epoch": 3.16, "grad_norm": 0.7396968849244785, "learning_rate": 5.2481495784081266e-06, "loss": 0.0928, "step": 27045 }, { "epoch": 3.16, "grad_norm": 0.8090046760330366, "learning_rate": 5.241120198454191e-06, "loss": 0.1196, "step": 27050 }, { "epoch": 3.16, "grad_norm": 0.7455128271891508, "learning_rate": 5.234094977951426e-06, "loss": 0.111, "step": 27055 }, { "epoch": 3.16, "grad_norm": 0.790042469115102, "learning_rate": 5.2270739183787135e-06, "loss": 0.1078, "step": 27060 }, { "epoch": 3.16, "grad_norm": 0.6974573320143602, "learning_rate": 5.22005702121407e-06, "loss": 0.1046, "step": 27065 }, { "epoch": 3.16, "grad_norm": 0.786268622734536, "learning_rate": 5.213044287934607e-06, "loss": 0.115, "step": 27070 }, { "epoch": 3.16, "grad_norm": 0.670783263341755, "learning_rate": 5.206035720016597e-06, "loss": 0.0853, "step": 27075 }, { "epoch": 3.16, "grad_norm": 0.773883624157871, "learning_rate": 5.199031318935401e-06, "loss": 0.1154, "step": 27080 }, { "epoch": 3.16, "grad_norm": 0.6617728403963338, "learning_rate": 5.192031086165527e-06, "loss": 0.1146, "step": 27085 }, { "epoch": 3.17, "grad_norm": 0.7619043365471327, "learning_rate": 5.185035023180601e-06, "loss": 0.1106, "step": 27090 }, { "epoch": 3.17, "grad_norm": 0.5899245509212457, "learning_rate": 5.178043131453356e-06, "loss": 0.1113, "step": 27095 }, { "epoch": 3.17, "grad_norm": 0.6256910143154276, "learning_rate": 5.171055412455666e-06, "loss": 0.1054, "step": 27100 }, { "epoch": 3.17, "grad_norm": 0.7309779170390247, "learning_rate": 5.1640718676585266e-06, "loss": 0.1239, "step": 27105 }, { "epoch": 3.17, "grad_norm": 0.7315159673523742, "learning_rate": 5.157092498532037e-06, "loss": 0.0994, "step": 27110 }, { "epoch": 3.17, "grad_norm": 0.6441877490629239, "learning_rate": 5.150117306545421e-06, "loss": 0.097, "step": 27115 }, { "epoch": 3.17, "grad_norm": 0.8021058180272631, "learning_rate": 5.143146293167042e-06, "loss": 0.1179, "step": 27120 }, { "epoch": 3.17, "grad_norm": 0.8419191250545054, "learning_rate": 5.136179459864365e-06, "loss": 0.1086, "step": 27125 }, { "epoch": 3.17, "grad_norm": 0.6973020320305778, "learning_rate": 5.129216808103982e-06, "loss": 0.1042, "step": 27130 }, { "epoch": 3.17, "grad_norm": 0.8417271559701516, "learning_rate": 5.122258339351615e-06, "loss": 0.1116, "step": 27135 }, { "epoch": 3.17, "grad_norm": 0.8639719780933128, "learning_rate": 5.115304055072076e-06, "loss": 0.1234, "step": 27140 }, { "epoch": 3.17, "grad_norm": 0.7724986819807443, "learning_rate": 5.108353956729334e-06, "loss": 0.1058, "step": 27145 }, { "epoch": 3.17, "grad_norm": 0.7591267969376034, "learning_rate": 5.101408045786437e-06, "loss": 0.1192, "step": 27150 }, { "epoch": 3.17, "grad_norm": 0.905319124257068, "learning_rate": 5.094466323705585e-06, "loss": 0.113, "step": 27155 }, { "epoch": 3.17, "grad_norm": 0.730739666838174, "learning_rate": 5.087528791948087e-06, "loss": 0.1413, "step": 27160 }, { "epoch": 3.17, "grad_norm": 0.7460351126458739, "learning_rate": 5.0805954519743545e-06, "loss": 0.1128, "step": 27165 }, { "epoch": 3.17, "grad_norm": 0.8485064216318304, "learning_rate": 5.073666305243932e-06, "loss": 0.1419, "step": 27170 }, { "epoch": 3.18, "grad_norm": 0.7356958177883444, "learning_rate": 5.066741353215485e-06, "loss": 0.1172, "step": 27175 }, { "epoch": 3.18, "grad_norm": 0.7050655369783162, "learning_rate": 5.059820597346784e-06, "loss": 0.1218, "step": 27180 }, { "epoch": 3.18, "grad_norm": 0.6576658350405158, "learning_rate": 5.0529040390947104e-06, "loss": 0.1034, "step": 27185 }, { "epoch": 3.18, "grad_norm": 0.7458185831059889, "learning_rate": 5.045991679915282e-06, "loss": 0.0967, "step": 27190 }, { "epoch": 3.18, "grad_norm": 0.7324996519904005, "learning_rate": 5.039083521263621e-06, "loss": 0.0875, "step": 27195 }, { "epoch": 3.18, "grad_norm": 0.648054663546603, "learning_rate": 5.03217956459397e-06, "loss": 0.1005, "step": 27200 }, { "epoch": 3.18, "grad_norm": 1.4949461433351363, "learning_rate": 5.025279811359685e-06, "loss": 0.1248, "step": 27205 }, { "epoch": 3.18, "grad_norm": 0.8264578900501282, "learning_rate": 5.018384263013234e-06, "loss": 0.0889, "step": 27210 }, { "epoch": 3.18, "grad_norm": 0.7877416501333684, "learning_rate": 5.011492921006197e-06, "loss": 0.129, "step": 27215 }, { "epoch": 3.18, "grad_norm": 0.6481943119871718, "learning_rate": 5.004605786789276e-06, "loss": 0.0917, "step": 27220 }, { "epoch": 3.18, "grad_norm": 0.8508648860382823, "learning_rate": 4.9977228618122905e-06, "loss": 0.1144, "step": 27225 }, { "epoch": 3.18, "grad_norm": 0.7705185755863887, "learning_rate": 4.990844147524168e-06, "loss": 0.1145, "step": 27230 }, { "epoch": 3.18, "grad_norm": 0.7134886485696199, "learning_rate": 4.983969645372941e-06, "loss": 0.1056, "step": 27235 }, { "epoch": 3.18, "grad_norm": 0.7419276364071465, "learning_rate": 4.977099356805776e-06, "loss": 0.12, "step": 27240 }, { "epoch": 3.18, "grad_norm": 0.6391129925150747, "learning_rate": 4.9702332832689296e-06, "loss": 0.1058, "step": 27245 }, { "epoch": 3.18, "grad_norm": 0.6182474754521315, "learning_rate": 4.963371426207792e-06, "loss": 0.1131, "step": 27250 }, { "epoch": 3.18, "grad_norm": 0.747832014556228, "learning_rate": 4.956513787066844e-06, "loss": 0.1122, "step": 27255 }, { "epoch": 3.18, "grad_norm": 0.8018221106196859, "learning_rate": 4.949660367289699e-06, "loss": 0.0928, "step": 27260 }, { "epoch": 3.19, "grad_norm": 0.6773045912995305, "learning_rate": 4.9428111683190724e-06, "loss": 0.0964, "step": 27265 }, { "epoch": 3.19, "grad_norm": 0.7642994700303932, "learning_rate": 4.9359661915968e-06, "loss": 0.1308, "step": 27270 }, { "epoch": 3.19, "grad_norm": 0.8148137456643133, "learning_rate": 4.9291254385638115e-06, "loss": 0.1162, "step": 27275 }, { "epoch": 3.19, "grad_norm": 0.6189231669299485, "learning_rate": 4.9222889106601555e-06, "loss": 0.1088, "step": 27280 }, { "epoch": 3.19, "grad_norm": 0.6594079789690681, "learning_rate": 4.915456609324992e-06, "loss": 0.1121, "step": 27285 }, { "epoch": 3.19, "grad_norm": 0.649369583688146, "learning_rate": 4.908628535996598e-06, "loss": 0.0915, "step": 27290 }, { "epoch": 3.19, "grad_norm": 0.947305512989576, "learning_rate": 4.901804692112355e-06, "loss": 0.1085, "step": 27295 }, { "epoch": 3.19, "grad_norm": 0.6132927610783767, "learning_rate": 4.894985079108755e-06, "loss": 0.0797, "step": 27300 }, { "epoch": 3.19, "grad_norm": 0.7964776300632762, "learning_rate": 4.888169698421394e-06, "loss": 0.1067, "step": 27305 }, { "epoch": 3.19, "grad_norm": 0.7696520997622317, "learning_rate": 4.881358551484977e-06, "loss": 0.1306, "step": 27310 }, { "epoch": 3.19, "grad_norm": 0.7592114319955823, "learning_rate": 4.874551639733324e-06, "loss": 0.1057, "step": 27315 }, { "epoch": 3.19, "grad_norm": 0.7532520958374271, "learning_rate": 4.86774896459937e-06, "loss": 0.1221, "step": 27320 }, { "epoch": 3.19, "grad_norm": 0.593577090320849, "learning_rate": 4.860950527515137e-06, "loss": 0.0982, "step": 27325 }, { "epoch": 3.19, "grad_norm": 0.6969294665036798, "learning_rate": 4.854156329911771e-06, "loss": 0.1239, "step": 27330 }, { "epoch": 3.19, "grad_norm": 0.7178381604216698, "learning_rate": 4.847366373219533e-06, "loss": 0.1006, "step": 27335 }, { "epoch": 3.19, "grad_norm": 0.7814375234050084, "learning_rate": 4.840580658867763e-06, "loss": 0.0829, "step": 27340 }, { "epoch": 3.19, "grad_norm": 0.667215134841802, "learning_rate": 4.833799188284938e-06, "loss": 0.0946, "step": 27345 }, { "epoch": 3.2, "grad_norm": 0.6603972568708967, "learning_rate": 4.827021962898618e-06, "loss": 0.0892, "step": 27350 }, { "epoch": 3.2, "grad_norm": 0.6877643207029196, "learning_rate": 4.820248984135484e-06, "loss": 0.1139, "step": 27355 }, { "epoch": 3.2, "grad_norm": 0.7350312212434802, "learning_rate": 4.813480253421321e-06, "loss": 0.1018, "step": 27360 }, { "epoch": 3.2, "grad_norm": 0.6445812729815801, "learning_rate": 4.806715772181017e-06, "loss": 0.1136, "step": 27365 }, { "epoch": 3.2, "grad_norm": 0.657302089898427, "learning_rate": 4.799955541838571e-06, "loss": 0.0912, "step": 27370 }, { "epoch": 3.2, "grad_norm": 0.7606474669808397, "learning_rate": 4.793199563817077e-06, "loss": 0.1072, "step": 27375 }, { "epoch": 3.2, "grad_norm": 0.6174379246275021, "learning_rate": 4.786447839538735e-06, "loss": 0.0736, "step": 27380 }, { "epoch": 3.2, "grad_norm": 0.8589451756160479, "learning_rate": 4.779700370424859e-06, "loss": 0.1285, "step": 27385 }, { "epoch": 3.2, "grad_norm": 0.7252507841928352, "learning_rate": 4.772957157895863e-06, "loss": 0.092, "step": 27390 }, { "epoch": 3.2, "grad_norm": 0.669912688672878, "learning_rate": 4.766218203371259e-06, "loss": 0.0736, "step": 27395 }, { "epoch": 3.2, "grad_norm": 0.6631870987168231, "learning_rate": 4.759483508269669e-06, "loss": 0.081, "step": 27400 }, { "epoch": 3.2, "grad_norm": 0.586173927746635, "learning_rate": 4.7527530740088255e-06, "loss": 0.1007, "step": 27405 }, { "epoch": 3.2, "grad_norm": 0.8162239229999849, "learning_rate": 4.746026902005543e-06, "loss": 0.0804, "step": 27410 }, { "epoch": 3.2, "grad_norm": 0.8529158843599705, "learning_rate": 4.7393049936757615e-06, "loss": 0.1179, "step": 27415 }, { "epoch": 3.2, "grad_norm": 0.6451636090274976, "learning_rate": 4.7325873504345e-06, "loss": 0.0918, "step": 27420 }, { "epoch": 3.2, "grad_norm": 0.6442376937288511, "learning_rate": 4.725873973695901e-06, "loss": 0.114, "step": 27425 }, { "epoch": 3.2, "grad_norm": 0.7265220580977397, "learning_rate": 4.719164864873202e-06, "loss": 0.1152, "step": 27430 }, { "epoch": 3.21, "grad_norm": 0.8066954147334778, "learning_rate": 4.712460025378745e-06, "loss": 0.1083, "step": 27435 }, { "epoch": 3.21, "grad_norm": 0.8562634903931027, "learning_rate": 4.705759456623962e-06, "loss": 0.1008, "step": 27440 }, { "epoch": 3.21, "grad_norm": 0.6539493485674843, "learning_rate": 4.69906316001939e-06, "loss": 0.1035, "step": 27445 }, { "epoch": 3.21, "grad_norm": 0.6934335428071814, "learning_rate": 4.692371136974671e-06, "loss": 0.0889, "step": 27450 }, { "epoch": 3.21, "grad_norm": 0.8194032096332868, "learning_rate": 4.685683388898549e-06, "loss": 0.105, "step": 27455 }, { "epoch": 3.21, "grad_norm": 0.8242818311171684, "learning_rate": 4.67899991719887e-06, "loss": 0.0952, "step": 27460 }, { "epoch": 3.21, "grad_norm": 0.68509853483034, "learning_rate": 4.672320723282567e-06, "loss": 0.1006, "step": 27465 }, { "epoch": 3.21, "grad_norm": 0.646136344114196, "learning_rate": 4.665645808555685e-06, "loss": 0.0946, "step": 27470 }, { "epoch": 3.21, "grad_norm": 0.9226283805520535, "learning_rate": 4.658975174423355e-06, "loss": 0.1278, "step": 27475 }, { "epoch": 3.21, "grad_norm": 0.6413821967411327, "learning_rate": 4.652308822289822e-06, "loss": 0.0929, "step": 27480 }, { "epoch": 3.21, "grad_norm": 0.9290877306279111, "learning_rate": 4.645646753558427e-06, "loss": 0.0905, "step": 27485 }, { "epoch": 3.21, "grad_norm": 0.6284747443256513, "learning_rate": 4.638988969631597e-06, "loss": 0.0745, "step": 27490 }, { "epoch": 3.21, "grad_norm": 0.582407361883294, "learning_rate": 4.632335471910867e-06, "loss": 0.0865, "step": 27495 }, { "epoch": 3.21, "grad_norm": 0.7124413487525606, "learning_rate": 4.625686261796877e-06, "loss": 0.103, "step": 27500 }, { "epoch": 3.21, "grad_norm": 0.8082164147629005, "learning_rate": 4.619041340689342e-06, "loss": 0.0911, "step": 27505 }, { "epoch": 3.21, "grad_norm": 0.9446964085254289, "learning_rate": 4.6124007099871e-06, "loss": 0.132, "step": 27510 }, { "epoch": 3.21, "grad_norm": 0.8165742292714081, "learning_rate": 4.605764371088059e-06, "loss": 0.0943, "step": 27515 }, { "epoch": 3.22, "grad_norm": 0.6491200237377126, "learning_rate": 4.599132325389246e-06, "loss": 0.1182, "step": 27520 }, { "epoch": 3.22, "grad_norm": 0.702653031198587, "learning_rate": 4.5925045742867795e-06, "loss": 0.0969, "step": 27525 }, { "epoch": 3.22, "grad_norm": 0.7558146522110792, "learning_rate": 4.58588111917587e-06, "loss": 0.1003, "step": 27530 }, { "epoch": 3.22, "grad_norm": 0.6964839434435839, "learning_rate": 4.5792619614508205e-06, "loss": 0.1387, "step": 27535 }, { "epoch": 3.22, "grad_norm": 0.6307877078793908, "learning_rate": 4.572647102505029e-06, "loss": 0.0984, "step": 27540 }, { "epoch": 3.22, "grad_norm": 0.6002784628232989, "learning_rate": 4.566036543730998e-06, "loss": 0.1157, "step": 27545 }, { "epoch": 3.22, "grad_norm": 0.5956414211799476, "learning_rate": 4.559430286520316e-06, "loss": 0.1011, "step": 27550 }, { "epoch": 3.22, "grad_norm": 0.8924788513454035, "learning_rate": 4.552828332263681e-06, "loss": 0.1026, "step": 27555 }, { "epoch": 3.22, "grad_norm": 0.7150669248264977, "learning_rate": 4.546230682350858e-06, "loss": 0.0952, "step": 27560 }, { "epoch": 3.22, "grad_norm": 0.6112310103405998, "learning_rate": 4.5396373381707325e-06, "loss": 0.1015, "step": 27565 }, { "epoch": 3.22, "grad_norm": 0.6999709408215883, "learning_rate": 4.533048301111261e-06, "loss": 0.1045, "step": 27570 }, { "epoch": 3.22, "grad_norm": 0.6177048309235855, "learning_rate": 4.526463572559514e-06, "loss": 0.0955, "step": 27575 }, { "epoch": 3.22, "grad_norm": 0.7049165061116903, "learning_rate": 4.5198831539016495e-06, "loss": 0.1115, "step": 27580 }, { "epoch": 3.22, "grad_norm": 0.8279660873808702, "learning_rate": 4.513307046522902e-06, "loss": 0.1382, "step": 27585 }, { "epoch": 3.22, "grad_norm": 0.729804069750821, "learning_rate": 4.50673525180762e-06, "loss": 0.0993, "step": 27590 }, { "epoch": 3.22, "grad_norm": 0.702232755288587, "learning_rate": 4.50016777113923e-06, "loss": 0.108, "step": 27595 }, { "epoch": 3.22, "grad_norm": 0.6043014054154779, "learning_rate": 4.493604605900265e-06, "loss": 0.1242, "step": 27600 }, { "epoch": 3.23, "grad_norm": 0.9120160607162382, "learning_rate": 4.487045757472333e-06, "loss": 0.1235, "step": 27605 }, { "epoch": 3.23, "grad_norm": 0.7529335024242906, "learning_rate": 4.4804912272361335e-06, "loss": 0.0977, "step": 27610 }, { "epoch": 3.23, "grad_norm": 0.7767946440583706, "learning_rate": 4.473941016571473e-06, "loss": 0.1156, "step": 27615 }, { "epoch": 3.23, "grad_norm": 0.720918558122027, "learning_rate": 4.467395126857235e-06, "loss": 0.1189, "step": 27620 }, { "epoch": 3.23, "grad_norm": 0.7390866302083426, "learning_rate": 4.460853559471409e-06, "loss": 0.1005, "step": 27625 }, { "epoch": 3.23, "grad_norm": 0.692631963637318, "learning_rate": 4.4543163157910465e-06, "loss": 0.1001, "step": 27630 }, { "epoch": 3.23, "grad_norm": 0.8751217070481909, "learning_rate": 4.44778339719232e-06, "loss": 0.1019, "step": 27635 }, { "epoch": 3.23, "grad_norm": 0.676708125136999, "learning_rate": 4.441254805050468e-06, "loss": 0.0994, "step": 27640 }, { "epoch": 3.23, "grad_norm": 0.8743354861161313, "learning_rate": 4.434730540739826e-06, "loss": 0.1397, "step": 27645 }, { "epoch": 3.23, "grad_norm": 0.8309006449711872, "learning_rate": 4.428210605633834e-06, "loss": 0.0949, "step": 27650 }, { "epoch": 3.23, "grad_norm": 0.8199344137038984, "learning_rate": 4.421695001104992e-06, "loss": 0.1037, "step": 27655 }, { "epoch": 3.23, "grad_norm": 0.9577009355195544, "learning_rate": 4.4151837285249046e-06, "loss": 0.1262, "step": 27660 }, { "epoch": 3.23, "grad_norm": 0.6913118427731747, "learning_rate": 4.408676789264277e-06, "loss": 0.0916, "step": 27665 }, { "epoch": 3.23, "grad_norm": 0.5849149667509002, "learning_rate": 4.402174184692876e-06, "loss": 0.1063, "step": 27670 }, { "epoch": 3.23, "grad_norm": 0.6410479779252873, "learning_rate": 4.395675916179562e-06, "loss": 0.1253, "step": 27675 }, { "epoch": 3.23, "grad_norm": 0.7934768652877257, "learning_rate": 4.389181985092297e-06, "loss": 0.1021, "step": 27680 }, { "epoch": 3.23, "grad_norm": 0.7515742225417876, "learning_rate": 4.382692392798121e-06, "loss": 0.1055, "step": 27685 }, { "epoch": 3.24, "grad_norm": 0.8122932922837836, "learning_rate": 4.3762071406631605e-06, "loss": 0.1194, "step": 27690 }, { "epoch": 3.24, "grad_norm": 0.6175770177498836, "learning_rate": 4.369726230052637e-06, "loss": 0.0922, "step": 27695 }, { "epoch": 3.24, "grad_norm": 0.72023934967413, "learning_rate": 4.3632496623308425e-06, "loss": 0.1074, "step": 27700 }, { "epoch": 3.24, "grad_norm": 0.5931993310959411, "learning_rate": 4.3567774388611564e-06, "loss": 0.1234, "step": 27705 }, { "epoch": 3.24, "grad_norm": 0.8489762207115195, "learning_rate": 4.350309561006052e-06, "loss": 0.128, "step": 27710 }, { "epoch": 3.24, "grad_norm": 0.6640435514765165, "learning_rate": 4.343846030127094e-06, "loss": 0.1126, "step": 27715 }, { "epoch": 3.24, "grad_norm": 0.764850100630166, "learning_rate": 4.337386847584921e-06, "loss": 0.1036, "step": 27720 }, { "epoch": 3.24, "grad_norm": 0.7684789331928018, "learning_rate": 4.33093201473925e-06, "loss": 0.1259, "step": 27725 }, { "epoch": 3.24, "grad_norm": 0.644490431888392, "learning_rate": 4.324481532948904e-06, "loss": 0.0929, "step": 27730 }, { "epoch": 3.24, "grad_norm": 0.6882481887072169, "learning_rate": 4.318035403571763e-06, "loss": 0.1002, "step": 27735 }, { "epoch": 3.24, "grad_norm": 0.8834753509239305, "learning_rate": 4.3115936279648165e-06, "loss": 0.1199, "step": 27740 }, { "epoch": 3.24, "grad_norm": 0.6479994512780162, "learning_rate": 4.305156207484115e-06, "loss": 0.0928, "step": 27745 }, { "epoch": 3.24, "grad_norm": 0.7513403539592943, "learning_rate": 4.298723143484809e-06, "loss": 0.1028, "step": 27750 }, { "epoch": 3.24, "grad_norm": 0.7625194523488714, "learning_rate": 4.292294437321126e-06, "loss": 0.1298, "step": 27755 }, { "epoch": 3.24, "grad_norm": 1.0784639149206594, "learning_rate": 4.28587009034638e-06, "loss": 0.1114, "step": 27760 }, { "epoch": 3.24, "grad_norm": 0.748527229005044, "learning_rate": 4.27945010391296e-06, "loss": 0.0996, "step": 27765 }, { "epoch": 3.24, "grad_norm": 0.6424390744174807, "learning_rate": 4.273034479372334e-06, "loss": 0.1007, "step": 27770 }, { "epoch": 3.25, "grad_norm": 0.6343905500050343, "learning_rate": 4.266623218075061e-06, "loss": 0.1134, "step": 27775 }, { "epoch": 3.25, "grad_norm": 0.71423066113107, "learning_rate": 4.260216321370783e-06, "loss": 0.1068, "step": 27780 }, { "epoch": 3.25, "grad_norm": 0.7915180587399318, "learning_rate": 4.253813790608216e-06, "loss": 0.1215, "step": 27785 }, { "epoch": 3.25, "grad_norm": 0.7141301407900196, "learning_rate": 4.247415627135168e-06, "loss": 0.1154, "step": 27790 }, { "epoch": 3.25, "grad_norm": 0.7255746806542855, "learning_rate": 4.2410218322985115e-06, "loss": 0.0934, "step": 27795 }, { "epoch": 3.25, "grad_norm": 0.6306130921518569, "learning_rate": 4.2346324074442e-06, "loss": 0.096, "step": 27800 }, { "epoch": 3.25, "grad_norm": 0.6052389191725739, "learning_rate": 4.228247353917283e-06, "loss": 0.0877, "step": 27805 }, { "epoch": 3.25, "grad_norm": 0.7184656926850397, "learning_rate": 4.221866673061889e-06, "loss": 0.1185, "step": 27810 }, { "epoch": 3.25, "grad_norm": 0.7437902535805804, "learning_rate": 4.215490366221201e-06, "loss": 0.0917, "step": 27815 }, { "epoch": 3.25, "grad_norm": 0.5551879590931311, "learning_rate": 4.209118434737508e-06, "loss": 0.1443, "step": 27820 }, { "epoch": 3.25, "grad_norm": 0.6216844687697246, "learning_rate": 4.202750879952167e-06, "loss": 0.1009, "step": 27825 }, { "epoch": 3.25, "grad_norm": 0.6760739931594562, "learning_rate": 4.196387703205621e-06, "loss": 0.1115, "step": 27830 }, { "epoch": 3.25, "grad_norm": 0.6945406611582664, "learning_rate": 4.190028905837382e-06, "loss": 0.097, "step": 27835 }, { "epoch": 3.25, "grad_norm": 0.639885643623021, "learning_rate": 4.183674489186032e-06, "loss": 0.0884, "step": 27840 }, { "epoch": 3.25, "grad_norm": 0.742319783732264, "learning_rate": 4.177324454589252e-06, "loss": 0.1095, "step": 27845 }, { "epoch": 3.25, "grad_norm": 0.7420810909770709, "learning_rate": 4.1709788033837905e-06, "loss": 0.1096, "step": 27850 }, { "epoch": 3.25, "grad_norm": 0.9133393405230974, "learning_rate": 4.164637536905472e-06, "loss": 0.1309, "step": 27855 }, { "epoch": 3.26, "grad_norm": 0.5642246154062726, "learning_rate": 4.1583006564892084e-06, "loss": 0.0685, "step": 27860 }, { "epoch": 3.26, "grad_norm": 0.7951090429763442, "learning_rate": 4.1519681634689725e-06, "loss": 0.1087, "step": 27865 }, { "epoch": 3.26, "grad_norm": 0.6162920868713485, "learning_rate": 4.14564005917781e-06, "loss": 0.1056, "step": 27870 }, { "epoch": 3.26, "grad_norm": 0.7408872322545546, "learning_rate": 4.139316344947866e-06, "loss": 0.1178, "step": 27875 }, { "epoch": 3.26, "grad_norm": 0.8148778293857757, "learning_rate": 4.132997022110352e-06, "loss": 0.1163, "step": 27880 }, { "epoch": 3.26, "grad_norm": 0.6475694152389248, "learning_rate": 4.1266820919955375e-06, "loss": 0.1058, "step": 27885 }, { "epoch": 3.26, "grad_norm": 0.8219073220725632, "learning_rate": 4.120371555932786e-06, "loss": 0.084, "step": 27890 }, { "epoch": 3.26, "grad_norm": 0.661840268827891, "learning_rate": 4.114065415250545e-06, "loss": 0.1113, "step": 27895 }, { "epoch": 3.26, "grad_norm": 0.8958507627503823, "learning_rate": 4.107763671276305e-06, "loss": 0.1501, "step": 27900 }, { "epoch": 3.26, "grad_norm": 0.6915926013371538, "learning_rate": 4.101466325336661e-06, "loss": 0.0841, "step": 27905 }, { "epoch": 3.26, "grad_norm": 0.6496701901388301, "learning_rate": 4.0951733787572635e-06, "loss": 0.0987, "step": 27910 }, { "epoch": 3.26, "grad_norm": 0.8739538308905007, "learning_rate": 4.088884832862844e-06, "loss": 0.1324, "step": 27915 }, { "epoch": 3.26, "grad_norm": 0.8178434259171943, "learning_rate": 4.082600688977209e-06, "loss": 0.1168, "step": 27920 }, { "epoch": 3.26, "grad_norm": 0.8809897556690398, "learning_rate": 4.0763209484232425e-06, "loss": 0.1277, "step": 27925 }, { "epoch": 3.26, "grad_norm": 0.742955650754623, "learning_rate": 4.070045612522891e-06, "loss": 0.091, "step": 27930 }, { "epoch": 3.26, "grad_norm": 0.7630920473057111, "learning_rate": 4.063774682597174e-06, "loss": 0.0967, "step": 27935 }, { "epoch": 3.26, "grad_norm": 0.689831566834404, "learning_rate": 4.057508159966189e-06, "loss": 0.1049, "step": 27940 }, { "epoch": 3.26, "grad_norm": 0.8296147421207558, "learning_rate": 4.051246045949106e-06, "loss": 0.1253, "step": 27945 }, { "epoch": 3.27, "grad_norm": 0.67284792786023, "learning_rate": 4.044988341864175e-06, "loss": 0.1007, "step": 27950 }, { "epoch": 3.27, "grad_norm": 0.8211630670759348, "learning_rate": 4.038735049028694e-06, "loss": 0.1109, "step": 27955 }, { "epoch": 3.27, "grad_norm": 0.7889608098422833, "learning_rate": 4.032486168759059e-06, "loss": 0.1215, "step": 27960 }, { "epoch": 3.27, "grad_norm": 0.6847364483082103, "learning_rate": 4.026241702370712e-06, "loss": 0.0921, "step": 27965 }, { "epoch": 3.27, "grad_norm": 0.7284210583509032, "learning_rate": 4.020001651178182e-06, "loss": 0.112, "step": 27970 }, { "epoch": 3.27, "grad_norm": 0.8095645977967392, "learning_rate": 4.013766016495077e-06, "loss": 0.1221, "step": 27975 }, { "epoch": 3.27, "grad_norm": 0.7618551184614817, "learning_rate": 4.00753479963405e-06, "loss": 0.1274, "step": 27980 }, { "epoch": 3.27, "grad_norm": 0.7932976114361806, "learning_rate": 4.001308001906839e-06, "loss": 0.1248, "step": 27985 }, { "epoch": 3.27, "grad_norm": 0.7605060396957194, "learning_rate": 3.995085624624262e-06, "loss": 0.1027, "step": 27990 }, { "epoch": 3.27, "grad_norm": 0.7354698511348152, "learning_rate": 3.98886766909618e-06, "loss": 0.0931, "step": 27995 }, { "epoch": 3.27, "grad_norm": 0.70975155276137, "learning_rate": 3.982654136631547e-06, "loss": 0.1035, "step": 28000 }, { "epoch": 3.27, "grad_norm": 0.7110277812308022, "learning_rate": 3.976445028538367e-06, "loss": 0.0913, "step": 28005 }, { "epoch": 3.27, "grad_norm": 0.758231518178429, "learning_rate": 3.970240346123732e-06, "loss": 0.1016, "step": 28010 }, { "epoch": 3.27, "grad_norm": 0.6528700462826965, "learning_rate": 3.964040090693785e-06, "loss": 0.1034, "step": 28015 }, { "epoch": 3.27, "grad_norm": 1.109410882650879, "learning_rate": 3.9578442635537586e-06, "loss": 0.1029, "step": 28020 }, { "epoch": 3.27, "grad_norm": 0.6821703378736803, "learning_rate": 3.951652866007926e-06, "loss": 0.0851, "step": 28025 }, { "epoch": 3.27, "grad_norm": 0.6878920530103287, "learning_rate": 3.945465899359643e-06, "loss": 0.0851, "step": 28030 }, { "epoch": 3.28, "grad_norm": 0.6855964259262746, "learning_rate": 3.939283364911331e-06, "loss": 0.0847, "step": 28035 }, { "epoch": 3.28, "grad_norm": 0.7523315156156533, "learning_rate": 3.933105263964479e-06, "loss": 0.0862, "step": 28040 }, { "epoch": 3.28, "grad_norm": 0.8325013214330024, "learning_rate": 3.92693159781965e-06, "loss": 0.0942, "step": 28045 }, { "epoch": 3.28, "grad_norm": 0.7470833577632429, "learning_rate": 3.920762367776451e-06, "loss": 0.0894, "step": 28050 }, { "epoch": 3.28, "grad_norm": 0.6105296101043943, "learning_rate": 3.914597575133577e-06, "loss": 0.0938, "step": 28055 }, { "epoch": 3.28, "grad_norm": 0.9751688959200584, "learning_rate": 3.9084372211887875e-06, "loss": 0.1404, "step": 28060 }, { "epoch": 3.28, "grad_norm": 0.7088108020323782, "learning_rate": 3.9022813072388895e-06, "loss": 0.1071, "step": 28065 }, { "epoch": 3.28, "grad_norm": 0.7931428650114524, "learning_rate": 3.896129834579778e-06, "loss": 0.1217, "step": 28070 }, { "epoch": 3.28, "grad_norm": 0.6318974791436598, "learning_rate": 3.889982804506395e-06, "loss": 0.1038, "step": 28075 }, { "epoch": 3.28, "grad_norm": 0.7251789213608979, "learning_rate": 3.883840218312757e-06, "loss": 0.0805, "step": 28080 }, { "epoch": 3.28, "grad_norm": 0.6574597475711454, "learning_rate": 3.877702077291939e-06, "loss": 0.0894, "step": 28085 }, { "epoch": 3.28, "grad_norm": 0.8217694309033453, "learning_rate": 3.871568382736099e-06, "loss": 0.1343, "step": 28090 }, { "epoch": 3.28, "grad_norm": 0.6296160237840762, "learning_rate": 3.86543913593643e-06, "loss": 0.1017, "step": 28095 }, { "epoch": 3.28, "grad_norm": 0.9733008740232575, "learning_rate": 3.859314338183201e-06, "loss": 0.1015, "step": 28100 }, { "epoch": 3.28, "grad_norm": 0.8336248904635234, "learning_rate": 3.853193990765749e-06, "loss": 0.1226, "step": 28105 }, { "epoch": 3.28, "grad_norm": 0.8732939090618191, "learning_rate": 3.8470780949724755e-06, "loss": 0.1345, "step": 28110 }, { "epoch": 3.28, "grad_norm": 0.7173714317798201, "learning_rate": 3.840966652090841e-06, "loss": 0.1023, "step": 28115 }, { "epoch": 3.29, "grad_norm": 0.7318872194025108, "learning_rate": 3.834859663407361e-06, "loss": 0.1151, "step": 28120 }, { "epoch": 3.29, "grad_norm": 0.7035034621776279, "learning_rate": 3.828757130207633e-06, "loss": 0.0881, "step": 28125 }, { "epoch": 3.29, "grad_norm": 0.7824267856465141, "learning_rate": 3.8226590537762865e-06, "loss": 0.104, "step": 28130 }, { "epoch": 3.29, "grad_norm": 0.6988643708450567, "learning_rate": 3.816565435397043e-06, "loss": 0.1138, "step": 28135 }, { "epoch": 3.29, "grad_norm": 0.7216208500758892, "learning_rate": 3.810476276352676e-06, "loss": 0.0869, "step": 28140 }, { "epoch": 3.29, "grad_norm": 0.853636524374595, "learning_rate": 3.804391577925004e-06, "loss": 0.1302, "step": 28145 }, { "epoch": 3.29, "grad_norm": 0.8574143648342926, "learning_rate": 3.798311341394928e-06, "loss": 0.0974, "step": 28150 }, { "epoch": 3.29, "grad_norm": 0.6646070219220201, "learning_rate": 3.7922355680424043e-06, "loss": 0.0931, "step": 28155 }, { "epoch": 3.29, "grad_norm": 0.7554673561955634, "learning_rate": 3.786164259146438e-06, "loss": 0.1055, "step": 28160 }, { "epoch": 3.29, "grad_norm": 0.5975903415718572, "learning_rate": 3.7800974159851176e-06, "loss": 0.1076, "step": 28165 }, { "epoch": 3.29, "grad_norm": 0.7166386805167545, "learning_rate": 3.7740350398355577e-06, "loss": 0.1191, "step": 28170 }, { "epoch": 3.29, "grad_norm": 0.7235675483820421, "learning_rate": 3.7679771319739654e-06, "loss": 0.0917, "step": 28175 }, { "epoch": 3.29, "grad_norm": 0.6122845853775039, "learning_rate": 3.76192369367559e-06, "loss": 0.1167, "step": 28180 }, { "epoch": 3.29, "grad_norm": 0.8014150079940554, "learning_rate": 3.7558747262147478e-06, "loss": 0.107, "step": 28185 }, { "epoch": 3.29, "grad_norm": 0.715272868162659, "learning_rate": 3.749830230864809e-06, "loss": 0.0876, "step": 28190 }, { "epoch": 3.29, "grad_norm": 0.726670229347444, "learning_rate": 3.743790208898193e-06, "loss": 0.1069, "step": 28195 }, { "epoch": 3.29, "grad_norm": 0.9572182692267391, "learning_rate": 3.7377546615863956e-06, "loss": 0.1199, "step": 28200 }, { "epoch": 3.3, "grad_norm": 0.759306046843035, "learning_rate": 3.731723590199962e-06, "loss": 0.0988, "step": 28205 }, { "epoch": 3.3, "grad_norm": 0.7367485391109551, "learning_rate": 3.7256969960085015e-06, "loss": 0.0974, "step": 28210 }, { "epoch": 3.3, "grad_norm": 0.6373062728382746, "learning_rate": 3.719674880280663e-06, "loss": 0.0823, "step": 28215 }, { "epoch": 3.3, "grad_norm": 0.6748088665471876, "learning_rate": 3.71365724428418e-06, "loss": 0.137, "step": 28220 }, { "epoch": 3.3, "grad_norm": 0.7394675782194076, "learning_rate": 3.707644089285814e-06, "loss": 0.116, "step": 28225 }, { "epoch": 3.3, "grad_norm": 0.7095203871798705, "learning_rate": 3.7016354165514023e-06, "loss": 0.0966, "step": 28230 }, { "epoch": 3.3, "grad_norm": 0.6416884365890312, "learning_rate": 3.6956312273458386e-06, "loss": 0.0925, "step": 28235 }, { "epoch": 3.3, "grad_norm": 0.9913771035590639, "learning_rate": 3.6896315229330568e-06, "loss": 0.0859, "step": 28240 }, { "epoch": 3.3, "grad_norm": 0.7768635680095392, "learning_rate": 3.6836363045760656e-06, "loss": 0.0956, "step": 28245 }, { "epoch": 3.3, "grad_norm": 0.6909818548395155, "learning_rate": 3.6776455735369255e-06, "loss": 0.0925, "step": 28250 }, { "epoch": 3.3, "grad_norm": 0.7961110282872308, "learning_rate": 3.671659331076743e-06, "loss": 0.1227, "step": 28255 }, { "epoch": 3.3, "grad_norm": 0.6560022020241033, "learning_rate": 3.665677578455676e-06, "loss": 0.0912, "step": 28260 }, { "epoch": 3.3, "grad_norm": 0.6939255407942331, "learning_rate": 3.659700316932954e-06, "loss": 0.1064, "step": 28265 }, { "epoch": 3.3, "grad_norm": 0.6773516481523861, "learning_rate": 3.6537275477668553e-06, "loss": 0.0826, "step": 28270 }, { "epoch": 3.3, "grad_norm": 0.6924984935034643, "learning_rate": 3.647759272214707e-06, "loss": 0.0986, "step": 28275 }, { "epoch": 3.3, "grad_norm": 0.633980236253523, "learning_rate": 3.6417954915329013e-06, "loss": 0.1136, "step": 28280 }, { "epoch": 3.3, "grad_norm": 0.7523943599154672, "learning_rate": 3.6358362069768613e-06, "loss": 0.1082, "step": 28285 }, { "epoch": 3.31, "grad_norm": 0.625307175686647, "learning_rate": 3.6298814198010956e-06, "loss": 0.1086, "step": 28290 }, { "epoch": 3.31, "grad_norm": 0.6645045567046022, "learning_rate": 3.6239311312591367e-06, "loss": 0.0922, "step": 28295 }, { "epoch": 3.31, "grad_norm": 0.6528998271115086, "learning_rate": 3.617985342603586e-06, "loss": 0.1064, "step": 28300 }, { "epoch": 3.31, "grad_norm": 0.6265223743295981, "learning_rate": 3.6120440550861e-06, "loss": 0.0786, "step": 28305 }, { "epoch": 3.31, "grad_norm": 0.6686967521700447, "learning_rate": 3.6061072699573735e-06, "loss": 0.1055, "step": 28310 }, { "epoch": 3.31, "grad_norm": 1.0386554672662853, "learning_rate": 3.6001749884671672e-06, "loss": 0.103, "step": 28315 }, { "epoch": 3.31, "grad_norm": 0.6439533182825401, "learning_rate": 3.5942472118642905e-06, "loss": 0.1174, "step": 28320 }, { "epoch": 3.31, "grad_norm": 0.7497856409960862, "learning_rate": 3.5883239413966015e-06, "loss": 0.1023, "step": 28325 }, { "epoch": 3.31, "grad_norm": 0.6791858942497785, "learning_rate": 3.582405178310999e-06, "loss": 0.0966, "step": 28330 }, { "epoch": 3.31, "grad_norm": 0.5653042669623298, "learning_rate": 3.5764909238534566e-06, "loss": 0.0856, "step": 28335 }, { "epoch": 3.31, "grad_norm": 0.7874656910578491, "learning_rate": 3.5705811792689835e-06, "loss": 0.1108, "step": 28340 }, { "epoch": 3.31, "grad_norm": 0.7339945506210517, "learning_rate": 3.564675945801646e-06, "loss": 0.1097, "step": 28345 }, { "epoch": 3.31, "grad_norm": 0.7367749406803198, "learning_rate": 3.5587752246945577e-06, "loss": 0.106, "step": 28350 }, { "epoch": 3.31, "grad_norm": 0.6448960373103251, "learning_rate": 3.5528790171898806e-06, "loss": 0.1022, "step": 28355 }, { "epoch": 3.31, "grad_norm": 0.7017291207586156, "learning_rate": 3.546987324528822e-06, "loss": 0.105, "step": 28360 }, { "epoch": 3.31, "grad_norm": 0.6891959880441897, "learning_rate": 3.541100147951651e-06, "loss": 0.0844, "step": 28365 }, { "epoch": 3.31, "grad_norm": 0.8467842946050073, "learning_rate": 3.5352174886976786e-06, "loss": 0.12, "step": 28370 }, { "epoch": 3.32, "grad_norm": 0.7064643668473771, "learning_rate": 3.52933934800527e-06, "loss": 0.0957, "step": 28375 }, { "epoch": 3.32, "grad_norm": 0.8760332734549043, "learning_rate": 3.52346572711183e-06, "loss": 0.1171, "step": 28380 }, { "epoch": 3.32, "grad_norm": 0.7109978048818296, "learning_rate": 3.517596627253822e-06, "loss": 0.1295, "step": 28385 }, { "epoch": 3.32, "grad_norm": 0.7434335016570166, "learning_rate": 3.5117320496667465e-06, "loss": 0.1049, "step": 28390 }, { "epoch": 3.32, "grad_norm": 0.7395214464760852, "learning_rate": 3.5058719955851667e-06, "loss": 0.1171, "step": 28395 }, { "epoch": 3.32, "grad_norm": 46.628418668526784, "learning_rate": 3.500016466242678e-06, "loss": 0.1812, "step": 28400 }, { "epoch": 3.32, "grad_norm": 0.829512945499508, "learning_rate": 3.4941654628719317e-06, "loss": 0.1094, "step": 28405 }, { "epoch": 3.32, "grad_norm": 0.8133826051556866, "learning_rate": 3.488318986704628e-06, "loss": 0.1262, "step": 28410 }, { "epoch": 3.32, "grad_norm": 0.7473918140585176, "learning_rate": 3.4824770389715155e-06, "loss": 0.1072, "step": 28415 }, { "epoch": 3.32, "grad_norm": 0.9698470878744179, "learning_rate": 3.4766396209023807e-06, "loss": 0.1339, "step": 28420 }, { "epoch": 3.32, "grad_norm": 0.9618886651414976, "learning_rate": 3.4708067337260564e-06, "loss": 0.1022, "step": 28425 }, { "epoch": 3.32, "grad_norm": 0.703730759439064, "learning_rate": 3.4649783786704304e-06, "loss": 0.0988, "step": 28430 }, { "epoch": 3.32, "grad_norm": 0.9484169541283957, "learning_rate": 3.459154556962435e-06, "loss": 0.1103, "step": 28435 }, { "epoch": 3.32, "grad_norm": 0.6923825211374073, "learning_rate": 3.4533352698280424e-06, "loss": 0.0841, "step": 28440 }, { "epoch": 3.32, "grad_norm": 1.151036733018734, "learning_rate": 3.447520518492281e-06, "loss": 0.0932, "step": 28445 }, { "epoch": 3.32, "grad_norm": 0.7057042108508013, "learning_rate": 3.4417103041792103e-06, "loss": 0.0882, "step": 28450 }, { "epoch": 3.32, "grad_norm": 0.7327663201075238, "learning_rate": 3.4359046281119357e-06, "loss": 0.0912, "step": 28455 }, { "epoch": 3.33, "grad_norm": 0.7802611355170154, "learning_rate": 3.4301034915126206e-06, "loss": 0.1109, "step": 28460 }, { "epoch": 3.33, "grad_norm": 0.6617619079776523, "learning_rate": 3.4243068956024692e-06, "loss": 0.117, "step": 28465 }, { "epoch": 3.33, "grad_norm": 0.6453268036726225, "learning_rate": 3.418514841601714e-06, "loss": 0.0936, "step": 28470 }, { "epoch": 3.33, "grad_norm": 0.6348505617563418, "learning_rate": 3.412727330729648e-06, "loss": 0.1211, "step": 28475 }, { "epoch": 3.33, "grad_norm": 0.7756873705446905, "learning_rate": 3.406944364204609e-06, "loss": 0.0945, "step": 28480 }, { "epoch": 3.33, "grad_norm": 0.6345505585427157, "learning_rate": 3.401165943243964e-06, "loss": 0.0979, "step": 28485 }, { "epoch": 3.33, "grad_norm": 0.8463425298075826, "learning_rate": 3.395392069064138e-06, "loss": 0.1534, "step": 28490 }, { "epoch": 3.33, "grad_norm": 0.7317160188867926, "learning_rate": 3.38962274288058e-06, "loss": 0.1046, "step": 28495 }, { "epoch": 3.33, "grad_norm": 0.7495061458301996, "learning_rate": 3.3838579659078052e-06, "loss": 0.1253, "step": 28500 }, { "epoch": 3.33, "grad_norm": 0.7484124777400966, "learning_rate": 3.378097739359354e-06, "loss": 0.1281, "step": 28505 }, { "epoch": 3.33, "grad_norm": 0.7896845924974772, "learning_rate": 3.372342064447817e-06, "loss": 0.103, "step": 28510 }, { "epoch": 3.33, "grad_norm": 0.6806777040521867, "learning_rate": 3.3665909423848287e-06, "loss": 0.1045, "step": 28515 }, { "epoch": 3.33, "grad_norm": 0.9745860705600712, "learning_rate": 3.360844374381056e-06, "loss": 0.1173, "step": 28520 }, { "epoch": 3.33, "grad_norm": 0.722399156603803, "learning_rate": 3.3551023616462034e-06, "loss": 0.1054, "step": 28525 }, { "epoch": 3.33, "grad_norm": 0.6534491395575144, "learning_rate": 3.3493649053890326e-06, "loss": 0.0869, "step": 28530 }, { "epoch": 3.33, "grad_norm": 0.6696850162519702, "learning_rate": 3.343632006817346e-06, "loss": 0.0852, "step": 28535 }, { "epoch": 3.33, "grad_norm": 0.6424707775122042, "learning_rate": 3.3379036671379623e-06, "loss": 0.0988, "step": 28540 }, { "epoch": 3.34, "grad_norm": 0.6005995174874295, "learning_rate": 3.3321798875567662e-06, "loss": 0.0962, "step": 28545 }, { "epoch": 3.34, "grad_norm": 0.799612994702888, "learning_rate": 3.3264606692786774e-06, "loss": 0.1165, "step": 28550 }, { "epoch": 3.34, "grad_norm": 0.7275850439751429, "learning_rate": 3.320746013507642e-06, "loss": 0.1024, "step": 28555 }, { "epoch": 3.34, "grad_norm": 0.6859576681219415, "learning_rate": 3.3150359214466657e-06, "loss": 0.1111, "step": 28560 }, { "epoch": 3.34, "grad_norm": 0.7557470833944618, "learning_rate": 3.3093303942977687e-06, "loss": 0.0927, "step": 28565 }, { "epoch": 3.34, "grad_norm": 0.7975180596742816, "learning_rate": 3.3036294332620317e-06, "loss": 0.1026, "step": 28570 }, { "epoch": 3.34, "grad_norm": 0.7370316545547761, "learning_rate": 3.2979330395395662e-06, "loss": 0.0918, "step": 28575 }, { "epoch": 3.34, "grad_norm": 0.6524360551785489, "learning_rate": 3.2922412143295308e-06, "loss": 0.1163, "step": 28580 }, { "epoch": 3.34, "grad_norm": 0.605049105321238, "learning_rate": 3.2865539588301046e-06, "loss": 0.1485, "step": 28585 }, { "epoch": 3.34, "grad_norm": 0.8167607212334527, "learning_rate": 3.2808712742385143e-06, "loss": 0.0952, "step": 28590 }, { "epoch": 3.34, "grad_norm": 0.6721525492901864, "learning_rate": 3.2751931617510223e-06, "loss": 0.0904, "step": 28595 }, { "epoch": 3.34, "grad_norm": 0.8248765600577694, "learning_rate": 3.269519622562939e-06, "loss": 0.1235, "step": 28600 }, { "epoch": 3.34, "grad_norm": 0.5608091027475581, "learning_rate": 3.263850657868606e-06, "loss": 0.0811, "step": 28605 }, { "epoch": 3.34, "grad_norm": 0.7019272348132117, "learning_rate": 3.2581862688613852e-06, "loss": 0.0965, "step": 28610 }, { "epoch": 3.34, "grad_norm": 0.8745104263853927, "learning_rate": 3.2525264567337067e-06, "loss": 0.1065, "step": 28615 }, { "epoch": 3.34, "grad_norm": 0.6045150586551867, "learning_rate": 3.246871222677006e-06, "loss": 0.1209, "step": 28620 }, { "epoch": 3.34, "grad_norm": 0.5933104381777957, "learning_rate": 3.2412205678817754e-06, "loss": 0.0892, "step": 28625 }, { "epoch": 3.35, "grad_norm": 0.9128926987384887, "learning_rate": 3.2355744935375408e-06, "loss": 0.1221, "step": 28630 }, { "epoch": 3.35, "grad_norm": 0.6301476160604672, "learning_rate": 3.229933000832852e-06, "loss": 0.1034, "step": 28635 }, { "epoch": 3.35, "grad_norm": 0.8105424576133822, "learning_rate": 3.2242960909553034e-06, "loss": 0.1232, "step": 28640 }, { "epoch": 3.35, "grad_norm": 0.9823096751582189, "learning_rate": 3.218663765091534e-06, "loss": 0.1121, "step": 28645 }, { "epoch": 3.35, "grad_norm": 0.6567971919147917, "learning_rate": 3.213036024427196e-06, "loss": 0.104, "step": 28650 }, { "epoch": 3.35, "grad_norm": 0.6363796489399048, "learning_rate": 3.207412870146995e-06, "loss": 0.0938, "step": 28655 }, { "epoch": 3.35, "grad_norm": 0.6292393997725017, "learning_rate": 3.2017943034346547e-06, "loss": 0.1189, "step": 28660 }, { "epoch": 3.35, "grad_norm": 0.8254562079331257, "learning_rate": 3.1961803254729473e-06, "loss": 0.1196, "step": 28665 }, { "epoch": 3.35, "grad_norm": 0.6406829868749524, "learning_rate": 3.1905709374436766e-06, "loss": 0.0983, "step": 28670 }, { "epoch": 3.35, "grad_norm": 0.677321685074616, "learning_rate": 3.1849661405276786e-06, "loss": 0.1029, "step": 28675 }, { "epoch": 3.35, "grad_norm": 0.6561117157146823, "learning_rate": 3.179365935904821e-06, "loss": 0.11, "step": 28680 }, { "epoch": 3.35, "grad_norm": 0.7956179799967459, "learning_rate": 3.173770324753997e-06, "loss": 0.0945, "step": 28685 }, { "epoch": 3.35, "grad_norm": 0.5988166043308951, "learning_rate": 3.1681793082531467e-06, "loss": 0.0838, "step": 28690 }, { "epoch": 3.35, "grad_norm": 0.5788063393337889, "learning_rate": 3.162592887579241e-06, "loss": 0.0913, "step": 28695 }, { "epoch": 3.35, "grad_norm": 0.5552989395705786, "learning_rate": 3.1570110639082828e-06, "loss": 0.108, "step": 28700 }, { "epoch": 3.35, "grad_norm": 0.7528522604751364, "learning_rate": 3.1514338384152907e-06, "loss": 0.1099, "step": 28705 }, { "epoch": 3.35, "grad_norm": 0.775242964088523, "learning_rate": 3.1458612122743452e-06, "loss": 0.1105, "step": 28710 }, { "epoch": 3.35, "grad_norm": 0.7445331715729363, "learning_rate": 3.14029318665853e-06, "loss": 0.105, "step": 28715 }, { "epoch": 3.36, "grad_norm": 0.8185360665545963, "learning_rate": 3.134729762739977e-06, "loss": 0.132, "step": 28720 }, { "epoch": 3.36, "grad_norm": 0.5629552780660295, "learning_rate": 3.129170941689849e-06, "loss": 0.0957, "step": 28725 }, { "epoch": 3.36, "grad_norm": 1.278820647104455, "learning_rate": 3.12361672467833e-06, "loss": 0.1104, "step": 28730 }, { "epoch": 3.36, "grad_norm": 0.7438259220057238, "learning_rate": 3.1180671128746424e-06, "loss": 0.1046, "step": 28735 }, { "epoch": 3.36, "grad_norm": 0.7834522284533213, "learning_rate": 3.1125221074470412e-06, "loss": 0.0937, "step": 28740 }, { "epoch": 3.36, "grad_norm": 0.7097981764499439, "learning_rate": 3.106981709562809e-06, "loss": 0.1082, "step": 28745 }, { "epoch": 3.36, "grad_norm": 0.6641091932527232, "learning_rate": 3.1014459203882547e-06, "loss": 0.0942, "step": 28750 }, { "epoch": 3.36, "grad_norm": 0.7569603173961842, "learning_rate": 3.095914741088715e-06, "loss": 0.1162, "step": 28755 }, { "epoch": 3.36, "grad_norm": 0.6410382781028555, "learning_rate": 3.0903881728285676e-06, "loss": 0.0954, "step": 28760 }, { "epoch": 3.36, "grad_norm": 0.6319997471040962, "learning_rate": 3.0848662167712093e-06, "loss": 0.1101, "step": 28765 }, { "epoch": 3.36, "grad_norm": 0.6311186394777405, "learning_rate": 3.0793488740790772e-06, "loss": 0.0963, "step": 28770 }, { "epoch": 3.36, "grad_norm": 0.6514567768433571, "learning_rate": 3.0738361459136193e-06, "loss": 0.075, "step": 28775 }, { "epoch": 3.36, "grad_norm": 0.864989351619348, "learning_rate": 3.0683280334353303e-06, "loss": 0.0934, "step": 28780 }, { "epoch": 3.36, "grad_norm": 0.7283768040490727, "learning_rate": 3.0628245378037213e-06, "loss": 0.1051, "step": 28785 }, { "epoch": 3.36, "grad_norm": 0.7430287648124071, "learning_rate": 3.0573256601773347e-06, "loss": 0.115, "step": 28790 }, { "epoch": 3.36, "grad_norm": 0.8345508745104409, "learning_rate": 3.0518314017137507e-06, "loss": 0.1105, "step": 28795 }, { "epoch": 3.36, "grad_norm": 0.9637332157980986, "learning_rate": 3.046341763569557e-06, "loss": 0.1283, "step": 28800 }, { "epoch": 3.37, "grad_norm": 0.7005262897299117, "learning_rate": 3.0408567469003833e-06, "loss": 0.0845, "step": 28805 }, { "epoch": 3.37, "grad_norm": 0.7161662199125126, "learning_rate": 3.0353763528608912e-06, "loss": 0.1013, "step": 28810 }, { "epoch": 3.37, "grad_norm": 0.7400550628093231, "learning_rate": 3.029900582604753e-06, "loss": 0.1053, "step": 28815 }, { "epoch": 3.37, "grad_norm": 0.8866578364353414, "learning_rate": 3.024429437284673e-06, "loss": 0.1204, "step": 28820 }, { "epoch": 3.37, "grad_norm": 0.7800555378250028, "learning_rate": 3.01896291805239e-06, "loss": 0.0977, "step": 28825 }, { "epoch": 3.37, "grad_norm": 0.6619530551831401, "learning_rate": 3.0135010260586597e-06, "loss": 0.1142, "step": 28830 }, { "epoch": 3.37, "grad_norm": 0.5808083178837569, "learning_rate": 3.008043762453272e-06, "loss": 0.0902, "step": 28835 }, { "epoch": 3.37, "grad_norm": 0.7287917742169926, "learning_rate": 3.0025911283850407e-06, "loss": 0.0971, "step": 28840 }, { "epoch": 3.37, "grad_norm": 0.8441276554314261, "learning_rate": 2.997143125001797e-06, "loss": 0.1166, "step": 28845 }, { "epoch": 3.37, "grad_norm": 0.8818960550511538, "learning_rate": 2.9916997534504e-06, "loss": 0.0992, "step": 28850 }, { "epoch": 3.37, "grad_norm": 0.8366504717913369, "learning_rate": 2.98626101487674e-06, "loss": 0.1344, "step": 28855 }, { "epoch": 3.37, "grad_norm": 0.8771492738864335, "learning_rate": 2.980826910425727e-06, "loss": 0.1195, "step": 28860 }, { "epoch": 3.37, "grad_norm": 0.7389778485725227, "learning_rate": 2.9753974412413064e-06, "loss": 0.0978, "step": 28865 }, { "epoch": 3.37, "grad_norm": 0.7682188432319635, "learning_rate": 2.969972608466423e-06, "loss": 0.1097, "step": 28870 }, { "epoch": 3.37, "grad_norm": 0.6878324031177027, "learning_rate": 2.964552413243074e-06, "loss": 0.0935, "step": 28875 }, { "epoch": 3.37, "grad_norm": 0.7648388262874948, "learning_rate": 2.9591368567122552e-06, "loss": 0.1329, "step": 28880 }, { "epoch": 3.37, "grad_norm": 0.7243896712392304, "learning_rate": 2.9537259400140098e-06, "loss": 0.0986, "step": 28885 }, { "epoch": 3.38, "grad_norm": 0.8949618611385405, "learning_rate": 2.948319664287383e-06, "loss": 0.115, "step": 28890 }, { "epoch": 3.38, "grad_norm": 0.6975126638492564, "learning_rate": 2.9429180306704577e-06, "loss": 0.1144, "step": 28895 }, { "epoch": 3.38, "grad_norm": 0.563270986491268, "learning_rate": 2.937521040300331e-06, "loss": 0.0917, "step": 28900 }, { "epoch": 3.38, "grad_norm": 0.684725148392325, "learning_rate": 2.932128694313133e-06, "loss": 0.1057, "step": 28905 }, { "epoch": 3.38, "grad_norm": 0.8012474732674006, "learning_rate": 2.926740993844004e-06, "loss": 0.1391, "step": 28910 }, { "epoch": 3.38, "grad_norm": 0.7043712683169141, "learning_rate": 2.9213579400271084e-06, "loss": 0.0895, "step": 28915 }, { "epoch": 3.38, "grad_norm": 0.8109038973939088, "learning_rate": 2.915979533995636e-06, "loss": 0.1369, "step": 28920 }, { "epoch": 3.38, "grad_norm": 0.6234423968702772, "learning_rate": 2.9106057768818e-06, "loss": 0.0923, "step": 28925 }, { "epoch": 3.38, "grad_norm": 0.6418320256885169, "learning_rate": 2.9052366698168325e-06, "loss": 0.0971, "step": 28930 }, { "epoch": 3.38, "grad_norm": 0.8288007080766526, "learning_rate": 2.8998722139309933e-06, "loss": 0.1039, "step": 28935 }, { "epoch": 3.38, "grad_norm": 0.7728739172187259, "learning_rate": 2.89451241035355e-06, "loss": 0.0915, "step": 28940 }, { "epoch": 3.38, "grad_norm": 0.6284288967122575, "learning_rate": 2.889157260212791e-06, "loss": 0.1303, "step": 28945 }, { "epoch": 3.38, "grad_norm": 0.6832618380112833, "learning_rate": 2.8838067646360406e-06, "loss": 0.0998, "step": 28950 }, { "epoch": 3.38, "grad_norm": 0.7799895755027861, "learning_rate": 2.8784609247496345e-06, "loss": 0.1046, "step": 28955 }, { "epoch": 3.38, "grad_norm": 0.5902164516015337, "learning_rate": 2.87311974167892e-06, "loss": 0.0735, "step": 28960 }, { "epoch": 3.38, "grad_norm": 0.799705596574271, "learning_rate": 2.867783216548278e-06, "loss": 0.1094, "step": 28965 }, { "epoch": 3.38, "grad_norm": 0.841515992106704, "learning_rate": 2.8624513504811016e-06, "loss": 0.1146, "step": 28970 }, { "epoch": 3.39, "grad_norm": 0.5646634517917594, "learning_rate": 2.85712414459981e-06, "loss": 0.1143, "step": 28975 }, { "epoch": 3.39, "grad_norm": 1.1905755291134354, "learning_rate": 2.85180160002583e-06, "loss": 0.1095, "step": 28980 }, { "epoch": 3.39, "grad_norm": 0.7485076791360326, "learning_rate": 2.846483717879611e-06, "loss": 0.1291, "step": 28985 }, { "epoch": 3.39, "grad_norm": 0.7274379661646979, "learning_rate": 2.8411704992806235e-06, "loss": 0.1035, "step": 28990 }, { "epoch": 3.39, "grad_norm": 0.6748241732764865, "learning_rate": 2.8358619453473595e-06, "loss": 0.11, "step": 28995 }, { "epoch": 3.39, "grad_norm": 0.7836737650211666, "learning_rate": 2.8305580571973217e-06, "loss": 0.1087, "step": 29000 }, { "epoch": 3.39, "grad_norm": 0.6059980465913074, "learning_rate": 2.8252588359470423e-06, "loss": 0.1126, "step": 29005 }, { "epoch": 3.39, "grad_norm": 0.8912021457147173, "learning_rate": 2.8199642827120565e-06, "loss": 0.11, "step": 29010 }, { "epoch": 3.39, "grad_norm": 0.7152104094778989, "learning_rate": 2.8146743986069173e-06, "loss": 0.1047, "step": 29015 }, { "epoch": 3.39, "grad_norm": 0.7644796147224163, "learning_rate": 2.8093891847452093e-06, "loss": 0.1094, "step": 29020 }, { "epoch": 3.39, "grad_norm": 0.7180270545351612, "learning_rate": 2.804108642239525e-06, "loss": 0.0986, "step": 29025 }, { "epoch": 3.39, "grad_norm": 0.6626397345100121, "learning_rate": 2.7988327722014677e-06, "loss": 0.1025, "step": 29030 }, { "epoch": 3.39, "grad_norm": 0.6646506326763864, "learning_rate": 2.7935615757416657e-06, "loss": 0.0965, "step": 29035 }, { "epoch": 3.39, "grad_norm": 0.7728865547213659, "learning_rate": 2.7882950539697695e-06, "loss": 0.0853, "step": 29040 }, { "epoch": 3.39, "grad_norm": 0.6209600407248309, "learning_rate": 2.783033207994426e-06, "loss": 0.0893, "step": 29045 }, { "epoch": 3.39, "grad_norm": 0.7401401218026368, "learning_rate": 2.777776038923316e-06, "loss": 0.0974, "step": 29050 }, { "epoch": 3.39, "grad_norm": 0.6757958408727438, "learning_rate": 2.7725235478631208e-06, "loss": 0.0932, "step": 29055 }, { "epoch": 3.4, "grad_norm": 0.8297523385247889, "learning_rate": 2.767275735919553e-06, "loss": 0.1313, "step": 29060 }, { "epoch": 3.4, "grad_norm": 0.7640702041099631, "learning_rate": 2.762032604197326e-06, "loss": 0.1029, "step": 29065 }, { "epoch": 3.4, "grad_norm": 0.721447562342801, "learning_rate": 2.7567941538001796e-06, "loss": 0.1044, "step": 29070 }, { "epoch": 3.4, "grad_norm": 0.6158025124774261, "learning_rate": 2.7515603858308646e-06, "loss": 0.0855, "step": 29075 }, { "epoch": 3.4, "grad_norm": 0.8049845982201027, "learning_rate": 2.746331301391131e-06, "loss": 0.114, "step": 29080 }, { "epoch": 3.4, "grad_norm": 0.7012783328623803, "learning_rate": 2.7411069015817637e-06, "loss": 0.1054, "step": 29085 }, { "epoch": 3.4, "grad_norm": 0.7509113185769443, "learning_rate": 2.7358871875025533e-06, "loss": 0.136, "step": 29090 }, { "epoch": 3.4, "grad_norm": 0.629958538376477, "learning_rate": 2.7306721602523122e-06, "loss": 0.0869, "step": 29095 }, { "epoch": 3.4, "grad_norm": 0.7068033603025061, "learning_rate": 2.7254618209288428e-06, "loss": 0.081, "step": 29100 }, { "epoch": 3.4, "grad_norm": 0.5573870894706169, "learning_rate": 2.7202561706289902e-06, "loss": 0.0771, "step": 29105 }, { "epoch": 3.4, "grad_norm": 0.6748362995080456, "learning_rate": 2.7150552104485887e-06, "loss": 0.1073, "step": 29110 }, { "epoch": 3.4, "grad_norm": 0.7418188309902358, "learning_rate": 2.709858941482496e-06, "loss": 0.1463, "step": 29115 }, { "epoch": 3.4, "grad_norm": 0.6778664681816945, "learning_rate": 2.7046673648245924e-06, "loss": 0.1142, "step": 29120 }, { "epoch": 3.4, "grad_norm": 0.6360195370630303, "learning_rate": 2.69948048156774e-06, "loss": 0.0961, "step": 29125 }, { "epoch": 3.4, "grad_norm": 0.8104118949415474, "learning_rate": 2.6942982928038456e-06, "loss": 0.0832, "step": 29130 }, { "epoch": 3.4, "grad_norm": 0.6628470983511452, "learning_rate": 2.6891207996238147e-06, "loss": 0.1023, "step": 29135 }, { "epoch": 3.4, "grad_norm": 0.6188057428595316, "learning_rate": 2.6839480031175563e-06, "loss": 0.0923, "step": 29140 }, { "epoch": 3.41, "grad_norm": 0.6751220481125374, "learning_rate": 2.678779904374007e-06, "loss": 0.0875, "step": 29145 }, { "epoch": 3.41, "grad_norm": 0.7905414691782526, "learning_rate": 2.6736165044810968e-06, "loss": 0.1414, "step": 29150 }, { "epoch": 3.41, "grad_norm": 0.5906334431066138, "learning_rate": 2.6684578045257756e-06, "loss": 0.1158, "step": 29155 }, { "epoch": 3.41, "grad_norm": 0.9755394176692816, "learning_rate": 2.663303805594011e-06, "loss": 0.1081, "step": 29160 }, { "epoch": 3.41, "grad_norm": 0.7795644246251584, "learning_rate": 2.6581545087707745e-06, "loss": 0.0934, "step": 29165 }, { "epoch": 3.41, "grad_norm": 0.7290049032443088, "learning_rate": 2.6530099151400424e-06, "loss": 0.1041, "step": 29170 }, { "epoch": 3.41, "grad_norm": 0.9267338127754627, "learning_rate": 2.6478700257848015e-06, "loss": 0.0951, "step": 29175 }, { "epoch": 3.41, "grad_norm": 0.762222417964181, "learning_rate": 2.642734841787059e-06, "loss": 0.1197, "step": 29180 }, { "epoch": 3.41, "grad_norm": 0.8189430335745673, "learning_rate": 2.6376043642278194e-06, "loss": 0.1311, "step": 29185 }, { "epoch": 3.41, "grad_norm": 0.7403126685475722, "learning_rate": 2.632478594187113e-06, "loss": 0.0748, "step": 29190 }, { "epoch": 3.41, "grad_norm": 0.6530098855201154, "learning_rate": 2.6273575327439543e-06, "loss": 0.0957, "step": 29195 }, { "epoch": 3.41, "grad_norm": 0.6657590050695665, "learning_rate": 2.622241180976384e-06, "loss": 0.1083, "step": 29200 }, { "epoch": 3.41, "grad_norm": 0.828161588253386, "learning_rate": 2.6171295399614603e-06, "loss": 0.0944, "step": 29205 }, { "epoch": 3.41, "grad_norm": 0.6644647074876299, "learning_rate": 2.612022610775219e-06, "loss": 0.1031, "step": 29210 }, { "epoch": 3.41, "grad_norm": 0.7970219720616535, "learning_rate": 2.6069203944927344e-06, "loss": 0.0947, "step": 29215 }, { "epoch": 3.41, "grad_norm": 0.7416239119729081, "learning_rate": 2.6018228921880716e-06, "loss": 0.1097, "step": 29220 }, { "epoch": 3.41, "grad_norm": 0.6856097941178884, "learning_rate": 2.5967301049343057e-06, "loss": 0.101, "step": 29225 }, { "epoch": 3.42, "grad_norm": 0.6421768561224025, "learning_rate": 2.591642033803526e-06, "loss": 0.1171, "step": 29230 }, { "epoch": 3.42, "grad_norm": 0.7121098570384933, "learning_rate": 2.5865586798668295e-06, "loss": 0.094, "step": 29235 }, { "epoch": 3.42, "grad_norm": 0.6344778805389947, "learning_rate": 2.581480044194309e-06, "loss": 0.0722, "step": 29240 }, { "epoch": 3.42, "grad_norm": 0.8119722711457465, "learning_rate": 2.576406127855066e-06, "loss": 0.1026, "step": 29245 }, { "epoch": 3.42, "grad_norm": 0.7264250483786848, "learning_rate": 2.5713369319172203e-06, "loss": 0.0967, "step": 29250 }, { "epoch": 3.42, "grad_norm": 0.7592911933932857, "learning_rate": 2.566272457447888e-06, "loss": 0.0968, "step": 29255 }, { "epoch": 3.42, "grad_norm": 0.7267884548447344, "learning_rate": 2.5612127055131994e-06, "loss": 0.1136, "step": 29260 }, { "epoch": 3.42, "grad_norm": 0.6628811294532958, "learning_rate": 2.5561576771782753e-06, "loss": 0.0923, "step": 29265 }, { "epoch": 3.42, "grad_norm": 0.9113579569054251, "learning_rate": 2.551107373507264e-06, "loss": 0.098, "step": 29270 }, { "epoch": 3.42, "grad_norm": 0.740175511306563, "learning_rate": 2.5460617955632927e-06, "loss": 0.1172, "step": 29275 }, { "epoch": 3.42, "grad_norm": 0.6891898154441953, "learning_rate": 2.5410209444085202e-06, "loss": 0.1019, "step": 29280 }, { "epoch": 3.42, "grad_norm": 0.7693895953309321, "learning_rate": 2.535984821104098e-06, "loss": 0.1377, "step": 29285 }, { "epoch": 3.42, "grad_norm": 0.5597343743842762, "learning_rate": 2.5309534267101755e-06, "loss": 0.1272, "step": 29290 }, { "epoch": 3.42, "grad_norm": 0.8711712768550239, "learning_rate": 2.5259267622859182e-06, "loss": 0.0971, "step": 29295 }, { "epoch": 3.42, "grad_norm": 0.6343153191358625, "learning_rate": 2.5209048288894947e-06, "loss": 0.1164, "step": 29300 }, { "epoch": 3.42, "grad_norm": 0.6573692866518238, "learning_rate": 2.515887627578073e-06, "loss": 0.1283, "step": 29305 }, { "epoch": 3.42, "grad_norm": 0.7636692752731608, "learning_rate": 2.5108751594078215e-06, "loss": 0.118, "step": 29310 }, { "epoch": 3.43, "grad_norm": 0.6975976270822801, "learning_rate": 2.505867425433919e-06, "loss": 0.1004, "step": 29315 }, { "epoch": 3.43, "grad_norm": 0.7305641733284647, "learning_rate": 2.5008644267105475e-06, "loss": 0.1408, "step": 29320 }, { "epoch": 3.43, "grad_norm": 0.749871081140077, "learning_rate": 2.4958661642908916e-06, "loss": 0.1075, "step": 29325 }, { "epoch": 3.43, "grad_norm": 0.6998715082856394, "learning_rate": 2.4908726392271405e-06, "loss": 0.1105, "step": 29330 }, { "epoch": 3.43, "grad_norm": 0.7163083322383815, "learning_rate": 2.485883852570481e-06, "loss": 0.1093, "step": 29335 }, { "epoch": 3.43, "grad_norm": 0.6692625496174391, "learning_rate": 2.4808998053711007e-06, "loss": 0.1036, "step": 29340 }, { "epoch": 3.43, "grad_norm": 0.6574895050472488, "learning_rate": 2.475920498678194e-06, "loss": 0.0909, "step": 29345 }, { "epoch": 3.43, "grad_norm": 0.7502253731469911, "learning_rate": 2.4709459335399644e-06, "loss": 0.1232, "step": 29350 }, { "epoch": 3.43, "grad_norm": 0.6703918768763688, "learning_rate": 2.4659761110036067e-06, "loss": 0.084, "step": 29355 }, { "epoch": 3.43, "grad_norm": 0.697285688473299, "learning_rate": 2.461011032115315e-06, "loss": 0.0834, "step": 29360 }, { "epoch": 3.43, "grad_norm": 0.792383437010644, "learning_rate": 2.4560506979203037e-06, "loss": 0.0916, "step": 29365 }, { "epoch": 3.43, "grad_norm": 0.7460563190654858, "learning_rate": 2.4510951094627587e-06, "loss": 0.1296, "step": 29370 }, { "epoch": 3.43, "grad_norm": 0.8121431572883597, "learning_rate": 2.4461442677858966e-06, "loss": 0.1557, "step": 29375 }, { "epoch": 3.43, "grad_norm": 0.8172526067249357, "learning_rate": 2.441198173931911e-06, "loss": 0.1044, "step": 29380 }, { "epoch": 3.43, "grad_norm": 0.7242549091305366, "learning_rate": 2.4362568289420103e-06, "loss": 0.1015, "step": 29385 }, { "epoch": 3.43, "grad_norm": 0.962713573667299, "learning_rate": 2.431320233856402e-06, "loss": 0.1008, "step": 29390 }, { "epoch": 3.43, "grad_norm": 0.627634598651306, "learning_rate": 2.426388389714293e-06, "loss": 0.0898, "step": 29395 }, { "epoch": 3.43, "grad_norm": 0.7161684131859233, "learning_rate": 2.421461297553887e-06, "loss": 0.1115, "step": 29400 }, { "epoch": 3.44, "grad_norm": 0.6172124523067426, "learning_rate": 2.4165389584123786e-06, "loss": 0.1103, "step": 29405 }, { "epoch": 3.44, "grad_norm": 0.7832300611814855, "learning_rate": 2.4116213733259813e-06, "loss": 0.1029, "step": 29410 }, { "epoch": 3.44, "grad_norm": 0.6723344207683406, "learning_rate": 2.406708543329897e-06, "loss": 0.1063, "step": 29415 }, { "epoch": 3.44, "grad_norm": 0.669013782229076, "learning_rate": 2.401800469458326e-06, "loss": 0.1289, "step": 29420 }, { "epoch": 3.44, "grad_norm": 0.9427092079307101, "learning_rate": 2.396897152744476e-06, "loss": 0.1314, "step": 29425 }, { "epoch": 3.44, "grad_norm": 0.886625223129027, "learning_rate": 2.3919985942205347e-06, "loss": 0.1271, "step": 29430 }, { "epoch": 3.44, "grad_norm": 0.7342224755835731, "learning_rate": 2.3871047949177134e-06, "loss": 0.1081, "step": 29435 }, { "epoch": 3.44, "grad_norm": 0.6425947907024737, "learning_rate": 2.3822157558661936e-06, "loss": 0.1078, "step": 29440 }, { "epoch": 3.44, "grad_norm": 0.742880312146628, "learning_rate": 2.377331478095182e-06, "loss": 0.1013, "step": 29445 }, { "epoch": 3.44, "grad_norm": 0.7553991079315303, "learning_rate": 2.37245196263286e-06, "loss": 0.0997, "step": 29450 }, { "epoch": 3.44, "grad_norm": 0.7379032736592557, "learning_rate": 2.3675772105064246e-06, "loss": 0.1003, "step": 29455 }, { "epoch": 3.44, "grad_norm": 0.7791272004835175, "learning_rate": 2.3627072227420555e-06, "loss": 0.1044, "step": 29460 }, { "epoch": 3.44, "grad_norm": 0.7825039822147692, "learning_rate": 2.357842000364946e-06, "loss": 0.1085, "step": 29465 }, { "epoch": 3.44, "grad_norm": 0.7846339138558028, "learning_rate": 2.3529815443992716e-06, "loss": 0.1428, "step": 29470 }, { "epoch": 3.44, "grad_norm": 0.9272745116826714, "learning_rate": 2.3481258558681993e-06, "loss": 0.1009, "step": 29475 }, { "epoch": 3.44, "grad_norm": 0.6641186732339446, "learning_rate": 2.3432749357939154e-06, "loss": 0.1272, "step": 29480 }, { "epoch": 3.44, "grad_norm": 0.6713945118911927, "learning_rate": 2.3384287851975828e-06, "loss": 0.1016, "step": 29485 }, { "epoch": 3.45, "grad_norm": 0.6932855509169203, "learning_rate": 2.3335874050993662e-06, "loss": 0.0812, "step": 29490 }, { "epoch": 3.45, "grad_norm": 0.6408136630860655, "learning_rate": 2.328750796518439e-06, "loss": 0.1055, "step": 29495 }, { "epoch": 3.45, "grad_norm": 0.6771248487102963, "learning_rate": 2.3239189604729456e-06, "loss": 0.1063, "step": 29500 }, { "epoch": 3.45, "grad_norm": 0.7345077934824498, "learning_rate": 2.3190918979800384e-06, "loss": 0.1136, "step": 29505 }, { "epoch": 3.45, "grad_norm": 0.6127066578573924, "learning_rate": 2.3142696100558666e-06, "loss": 0.1164, "step": 29510 }, { "epoch": 3.45, "grad_norm": 0.6199968957723145, "learning_rate": 2.3094520977155724e-06, "loss": 0.0906, "step": 29515 }, { "epoch": 3.45, "grad_norm": 0.6928113091544492, "learning_rate": 2.3046393619732988e-06, "loss": 0.1117, "step": 29520 }, { "epoch": 3.45, "grad_norm": 0.7205811573603301, "learning_rate": 2.2998314038421647e-06, "loss": 0.0875, "step": 29525 }, { "epoch": 3.45, "grad_norm": 0.8272049674631475, "learning_rate": 2.295028224334311e-06, "loss": 0.1228, "step": 29530 }, { "epoch": 3.45, "grad_norm": 0.6431489298814284, "learning_rate": 2.2902298244608404e-06, "loss": 0.1092, "step": 29535 }, { "epoch": 3.45, "grad_norm": 0.8655063556552018, "learning_rate": 2.2854362052318805e-06, "loss": 0.1011, "step": 29540 }, { "epoch": 3.45, "grad_norm": 0.5948747205967501, "learning_rate": 2.28064736765653e-06, "loss": 0.0963, "step": 29545 }, { "epoch": 3.45, "grad_norm": 0.7627693110926421, "learning_rate": 2.2758633127428902e-06, "loss": 0.1307, "step": 29550 }, { "epoch": 3.45, "grad_norm": 0.8005795290374716, "learning_rate": 2.271084041498056e-06, "loss": 0.1125, "step": 29555 }, { "epoch": 3.45, "grad_norm": 0.7562957756354073, "learning_rate": 2.2663095549281198e-06, "loss": 0.0894, "step": 29560 }, { "epoch": 3.45, "grad_norm": 0.7367859080830766, "learning_rate": 2.2615398540381582e-06, "loss": 0.1403, "step": 29565 }, { "epoch": 3.45, "grad_norm": 0.7087112407472577, "learning_rate": 2.256774939832235e-06, "loss": 0.0939, "step": 29570 }, { "epoch": 3.46, "grad_norm": 0.8423252212658027, "learning_rate": 2.2520148133134188e-06, "loss": 0.1172, "step": 29575 }, { "epoch": 3.46, "grad_norm": 0.753557258574696, "learning_rate": 2.2472594754837677e-06, "loss": 0.1306, "step": 29580 }, { "epoch": 3.46, "grad_norm": 0.7164448453210157, "learning_rate": 2.2425089273443308e-06, "loss": 0.1154, "step": 29585 }, { "epoch": 3.46, "grad_norm": 0.681736563524342, "learning_rate": 2.2377631698951517e-06, "loss": 0.1043, "step": 29590 }, { "epoch": 3.46, "grad_norm": 0.8388401309111846, "learning_rate": 2.23302220413526e-06, "loss": 0.0977, "step": 29595 }, { "epoch": 3.46, "grad_norm": 0.7027937256903803, "learning_rate": 2.228286031062671e-06, "loss": 0.1101, "step": 29600 }, { "epoch": 3.46, "grad_norm": 0.7960660650406883, "learning_rate": 2.223554651674403e-06, "loss": 0.0855, "step": 29605 }, { "epoch": 3.46, "grad_norm": 0.7798220483638502, "learning_rate": 2.218828066966466e-06, "loss": 0.1124, "step": 29610 }, { "epoch": 3.46, "grad_norm": 0.7520684507516269, "learning_rate": 2.21410627793385e-06, "loss": 0.1127, "step": 29615 }, { "epoch": 3.46, "grad_norm": 0.7982058092565384, "learning_rate": 2.2093892855705427e-06, "loss": 0.0905, "step": 29620 }, { "epoch": 3.46, "grad_norm": 0.7402156315502857, "learning_rate": 2.2046770908695225e-06, "loss": 0.0936, "step": 29625 }, { "epoch": 3.46, "grad_norm": 0.7553460326717047, "learning_rate": 2.199969694822751e-06, "loss": 0.1177, "step": 29630 }, { "epoch": 3.46, "grad_norm": 0.7223576707840773, "learning_rate": 2.1952670984211916e-06, "loss": 0.0923, "step": 29635 }, { "epoch": 3.46, "grad_norm": 0.9972742622300764, "learning_rate": 2.1905693026547824e-06, "loss": 0.1251, "step": 29640 }, { "epoch": 3.46, "grad_norm": 0.6135068448064825, "learning_rate": 2.1858763085124606e-06, "loss": 0.0995, "step": 29645 }, { "epoch": 3.46, "grad_norm": 0.8142846116876807, "learning_rate": 2.1811881169821525e-06, "loss": 0.1065, "step": 29650 }, { "epoch": 3.46, "grad_norm": 0.6410774447950901, "learning_rate": 2.176504729050771e-06, "loss": 0.1286, "step": 29655 }, { "epoch": 3.47, "grad_norm": 0.6528806528996806, "learning_rate": 2.1718261457042198e-06, "loss": 0.1027, "step": 29660 }, { "epoch": 3.47, "grad_norm": 0.9261994985098858, "learning_rate": 2.1671523679273913e-06, "loss": 0.0942, "step": 29665 }, { "epoch": 3.47, "grad_norm": 0.873180430554436, "learning_rate": 2.162483396704157e-06, "loss": 0.0828, "step": 29670 }, { "epoch": 3.47, "grad_norm": 0.7506590214474583, "learning_rate": 2.1578192330173884e-06, "loss": 0.1154, "step": 29675 }, { "epoch": 3.47, "grad_norm": 0.5714440020364331, "learning_rate": 2.153159877848945e-06, "loss": 0.0952, "step": 29680 }, { "epoch": 3.47, "grad_norm": 0.6545279794856892, "learning_rate": 2.148505332179662e-06, "loss": 0.1016, "step": 29685 }, { "epoch": 3.47, "grad_norm": 0.6759975362827372, "learning_rate": 2.143855596989375e-06, "loss": 0.1018, "step": 29690 }, { "epoch": 3.47, "grad_norm": 0.8175998225899073, "learning_rate": 2.139210673256906e-06, "loss": 0.0752, "step": 29695 }, { "epoch": 3.47, "grad_norm": 0.781744949817547, "learning_rate": 2.1345705619600505e-06, "loss": 0.1122, "step": 29700 }, { "epoch": 3.47, "grad_norm": 0.6541858555385428, "learning_rate": 2.129935264075608e-06, "loss": 0.1195, "step": 29705 }, { "epoch": 3.47, "grad_norm": 0.8150222126809816, "learning_rate": 2.1253047805793512e-06, "loss": 0.1123, "step": 29710 }, { "epoch": 3.47, "grad_norm": 0.5947498118902488, "learning_rate": 2.1206791124460475e-06, "loss": 0.1115, "step": 29715 }, { "epoch": 3.47, "grad_norm": 0.7376340811720784, "learning_rate": 2.1160582606494504e-06, "loss": 0.1145, "step": 29720 }, { "epoch": 3.47, "grad_norm": 0.6914320295135671, "learning_rate": 2.1114422261623024e-06, "loss": 0.0882, "step": 29725 }, { "epoch": 3.47, "grad_norm": 0.8912485210262087, "learning_rate": 2.1068310099563197e-06, "loss": 0.1109, "step": 29730 }, { "epoch": 3.47, "grad_norm": 0.7760332751416525, "learning_rate": 2.10222461300221e-06, "loss": 0.1077, "step": 29735 }, { "epoch": 3.47, "grad_norm": 0.7159138146194893, "learning_rate": 2.0976230362696717e-06, "loss": 0.1135, "step": 29740 }, { "epoch": 3.48, "grad_norm": 0.8504758711423386, "learning_rate": 2.0930262807273838e-06, "loss": 0.0995, "step": 29745 }, { "epoch": 3.48, "grad_norm": 0.6697499728590433, "learning_rate": 2.0884343473430187e-06, "loss": 0.1078, "step": 29750 }, { "epoch": 3.48, "grad_norm": 0.8488219778616212, "learning_rate": 2.083847237083214e-06, "loss": 0.1043, "step": 29755 }, { "epoch": 3.48, "grad_norm": 0.6269326423906878, "learning_rate": 2.0792649509136176e-06, "loss": 0.1143, "step": 29760 }, { "epoch": 3.48, "grad_norm": 0.5635992543444682, "learning_rate": 2.074687489798835e-06, "loss": 0.0879, "step": 29765 }, { "epoch": 3.48, "grad_norm": 0.6632833663663561, "learning_rate": 2.070114854702479e-06, "loss": 0.1017, "step": 29770 }, { "epoch": 3.48, "grad_norm": 0.8563487899051208, "learning_rate": 2.065547046587135e-06, "loss": 0.112, "step": 29775 }, { "epoch": 3.48, "grad_norm": 1.9769101649494136, "learning_rate": 2.060984066414373e-06, "loss": 0.1344, "step": 29780 }, { "epoch": 3.48, "grad_norm": 0.6512013725430853, "learning_rate": 2.0564259151447495e-06, "loss": 0.1033, "step": 29785 }, { "epoch": 3.48, "grad_norm": 0.7692582266715774, "learning_rate": 2.051872593737808e-06, "loss": 0.0995, "step": 29790 }, { "epoch": 3.48, "grad_norm": 0.80599170768257, "learning_rate": 2.0473241031520595e-06, "loss": 0.1028, "step": 29795 }, { "epoch": 3.48, "grad_norm": 0.6177769690351818, "learning_rate": 2.042780444345019e-06, "loss": 0.089, "step": 29800 }, { "epoch": 3.48, "grad_norm": 0.6704388565558964, "learning_rate": 2.038241618273165e-06, "loss": 0.1069, "step": 29805 }, { "epoch": 3.48, "grad_norm": 0.7145042613746281, "learning_rate": 2.033707625891973e-06, "loss": 0.1002, "step": 29810 }, { "epoch": 3.48, "grad_norm": 0.7255153904857279, "learning_rate": 2.029178468155896e-06, "loss": 0.1113, "step": 29815 }, { "epoch": 3.48, "grad_norm": 0.6002068071953629, "learning_rate": 2.024654146018373e-06, "loss": 0.1069, "step": 29820 }, { "epoch": 3.48, "grad_norm": 0.6618288339805396, "learning_rate": 2.020134660431819e-06, "loss": 0.0954, "step": 29825 }, { "epoch": 3.49, "grad_norm": 0.7243744673563712, "learning_rate": 2.015620012347627e-06, "loss": 0.1057, "step": 29830 }, { "epoch": 3.49, "grad_norm": 0.6837794056184231, "learning_rate": 2.0111102027161795e-06, "loss": 0.128, "step": 29835 }, { "epoch": 3.49, "grad_norm": 0.7147791612819262, "learning_rate": 2.006605232486841e-06, "loss": 0.112, "step": 29840 }, { "epoch": 3.49, "grad_norm": 0.6344571573075815, "learning_rate": 2.0021051026079625e-06, "loss": 0.1153, "step": 29845 }, { "epoch": 3.49, "grad_norm": 0.8699061730027201, "learning_rate": 1.9976098140268547e-06, "loss": 0.1049, "step": 29850 }, { "epoch": 3.49, "grad_norm": 0.8577047826517152, "learning_rate": 1.993119367689833e-06, "loss": 0.1252, "step": 29855 }, { "epoch": 3.49, "grad_norm": 0.8373719155975242, "learning_rate": 1.9886337645421748e-06, "loss": 0.0908, "step": 29860 }, { "epoch": 3.49, "grad_norm": 0.81009924448923, "learning_rate": 1.9841530055281527e-06, "loss": 0.1044, "step": 29865 }, { "epoch": 3.49, "grad_norm": 0.7653155678713628, "learning_rate": 1.979677091591017e-06, "loss": 0.1066, "step": 29870 }, { "epoch": 3.49, "grad_norm": 0.8845095186235438, "learning_rate": 1.9752060236729847e-06, "loss": 0.1106, "step": 29875 }, { "epoch": 3.49, "grad_norm": 0.6219240422414486, "learning_rate": 1.9707398027152657e-06, "loss": 0.0809, "step": 29880 }, { "epoch": 3.49, "grad_norm": 0.6679866263327529, "learning_rate": 1.96627842965805e-06, "loss": 0.1104, "step": 29885 }, { "epoch": 3.49, "grad_norm": 0.7040350693426078, "learning_rate": 1.961821905440503e-06, "loss": 0.113, "step": 29890 }, { "epoch": 3.49, "grad_norm": 0.5605961104647257, "learning_rate": 1.9573702310007685e-06, "loss": 0.0878, "step": 29895 }, { "epoch": 3.49, "grad_norm": 0.741879989123289, "learning_rate": 1.9529234072759666e-06, "loss": 0.1033, "step": 29900 }, { "epoch": 3.49, "grad_norm": 0.7520960961749575, "learning_rate": 1.948481435202204e-06, "loss": 0.1207, "step": 29905 }, { "epoch": 3.49, "grad_norm": 0.6861962547627196, "learning_rate": 1.944044315714563e-06, "loss": 0.0865, "step": 29910 }, { "epoch": 3.5, "grad_norm": 0.8830763845265285, "learning_rate": 1.9396120497471055e-06, "loss": 0.1127, "step": 29915 }, { "epoch": 3.5, "grad_norm": 0.7228421517754152, "learning_rate": 1.9351846382328647e-06, "loss": 0.1066, "step": 29920 }, { "epoch": 3.5, "grad_norm": 0.7289905994994464, "learning_rate": 1.9307620821038624e-06, "loss": 0.1057, "step": 29925 }, { "epoch": 3.5, "grad_norm": 0.6549743339898696, "learning_rate": 1.926344382291084e-06, "loss": 0.1148, "step": 29930 }, { "epoch": 3.5, "grad_norm": 0.6763036977054995, "learning_rate": 1.9219315397245112e-06, "loss": 0.0913, "step": 29935 }, { "epoch": 3.5, "grad_norm": 0.729354571440642, "learning_rate": 1.917523555333092e-06, "loss": 0.0936, "step": 29940 }, { "epoch": 3.5, "grad_norm": 0.7220866756089067, "learning_rate": 1.9131204300447485e-06, "loss": 0.0981, "step": 29945 }, { "epoch": 3.5, "grad_norm": 0.641621699347034, "learning_rate": 1.908722164786389e-06, "loss": 0.0905, "step": 29950 }, { "epoch": 3.5, "grad_norm": 0.829459591491851, "learning_rate": 1.904328760483895e-06, "loss": 0.1013, "step": 29955 }, { "epoch": 3.5, "grad_norm": 0.8684300945979784, "learning_rate": 1.899940218062124e-06, "loss": 0.1315, "step": 29960 }, { "epoch": 3.5, "grad_norm": 0.7534157393863317, "learning_rate": 1.8955565384449009e-06, "loss": 0.1049, "step": 29965 }, { "epoch": 3.5, "grad_norm": 0.9170822053165137, "learning_rate": 1.891177722555046e-06, "loss": 0.1291, "step": 29970 }, { "epoch": 3.5, "grad_norm": 0.7201914176973393, "learning_rate": 1.8868037713143438e-06, "loss": 0.0883, "step": 29975 }, { "epoch": 3.5, "grad_norm": 0.5993128329341708, "learning_rate": 1.8824346856435554e-06, "loss": 0.0814, "step": 29980 }, { "epoch": 3.5, "grad_norm": 0.6684869037525144, "learning_rate": 1.8780704664624254e-06, "loss": 0.1095, "step": 29985 }, { "epoch": 3.5, "grad_norm": 0.6591917817028597, "learning_rate": 1.8737111146896636e-06, "loss": 0.1021, "step": 29990 }, { "epoch": 3.5, "grad_norm": 0.8337658355263574, "learning_rate": 1.8693566312429523e-06, "loss": 0.1213, "step": 29995 }, { "epoch": 3.51, "grad_norm": 0.7030770025677868, "learning_rate": 1.8650070170389638e-06, "loss": 0.0988, "step": 30000 }, { "epoch": 3.51, "grad_norm": 0.6724310530097579, "learning_rate": 1.8606622729933327e-06, "loss": 0.0939, "step": 30005 }, { "epoch": 3.51, "grad_norm": 0.601669461990693, "learning_rate": 1.8563224000206797e-06, "loss": 0.121, "step": 30010 }, { "epoch": 3.51, "grad_norm": 0.6694623566725991, "learning_rate": 1.851987399034588e-06, "loss": 0.1158, "step": 30015 }, { "epoch": 3.51, "grad_norm": 0.7579723088424573, "learning_rate": 1.8476572709476252e-06, "loss": 0.1136, "step": 30020 }, { "epoch": 3.51, "grad_norm": 0.7066572789129003, "learning_rate": 1.843332016671323e-06, "loss": 0.1164, "step": 30025 }, { "epoch": 3.51, "grad_norm": 0.6848351202689783, "learning_rate": 1.8390116371161975e-06, "loss": 0.1333, "step": 30030 }, { "epoch": 3.51, "grad_norm": 0.6090232475076448, "learning_rate": 1.8346961331917268e-06, "loss": 0.0904, "step": 30035 }, { "epoch": 3.51, "grad_norm": 0.8349273496802704, "learning_rate": 1.8303855058063785e-06, "loss": 0.1, "step": 30040 }, { "epoch": 3.51, "grad_norm": 0.5877918752062457, "learning_rate": 1.8260797558675795e-06, "loss": 0.0795, "step": 30045 }, { "epoch": 3.51, "grad_norm": 0.6970295666938696, "learning_rate": 1.8217788842817385e-06, "loss": 0.1142, "step": 30050 }, { "epoch": 3.51, "grad_norm": 0.6864493678625406, "learning_rate": 1.8174828919542365e-06, "loss": 0.0817, "step": 30055 }, { "epoch": 3.51, "grad_norm": 0.7567386165997011, "learning_rate": 1.8131917797894137e-06, "loss": 0.0951, "step": 30060 }, { "epoch": 3.51, "grad_norm": 0.7009500549872775, "learning_rate": 1.808905548690601e-06, "loss": 0.1221, "step": 30065 }, { "epoch": 3.51, "grad_norm": 0.6867681321454739, "learning_rate": 1.8046241995600955e-06, "loss": 0.0993, "step": 30070 }, { "epoch": 3.51, "grad_norm": 0.8387130800187012, "learning_rate": 1.800347733299168e-06, "loss": 0.1073, "step": 30075 }, { "epoch": 3.51, "grad_norm": 0.7515601317918266, "learning_rate": 1.7960761508080598e-06, "loss": 0.1131, "step": 30080 }, { "epoch": 3.52, "grad_norm": 1.007349960605761, "learning_rate": 1.791809452985979e-06, "loss": 0.1212, "step": 30085 }, { "epoch": 3.52, "grad_norm": 0.5772775531085205, "learning_rate": 1.7875476407311104e-06, "loss": 0.1129, "step": 30090 }, { "epoch": 3.52, "grad_norm": 0.8278259993212991, "learning_rate": 1.7832907149406107e-06, "loss": 0.1233, "step": 30095 }, { "epoch": 3.52, "grad_norm": 0.69576482445141, "learning_rate": 1.7790386765106136e-06, "loss": 0.122, "step": 30100 }, { "epoch": 3.52, "grad_norm": 0.764394656390215, "learning_rate": 1.7747915263362087e-06, "loss": 0.1031, "step": 30105 }, { "epoch": 3.52, "grad_norm": 0.7131820499632343, "learning_rate": 1.7705492653114692e-06, "loss": 0.127, "step": 30110 }, { "epoch": 3.52, "grad_norm": 0.7916639532196048, "learning_rate": 1.7663118943294366e-06, "loss": 0.0958, "step": 30115 }, { "epoch": 3.52, "grad_norm": 0.6555928940259859, "learning_rate": 1.7620794142821252e-06, "loss": 0.0971, "step": 30120 }, { "epoch": 3.52, "grad_norm": 0.7408316574870435, "learning_rate": 1.7578518260605136e-06, "loss": 0.1075, "step": 30125 }, { "epoch": 3.52, "grad_norm": 0.7308757022576423, "learning_rate": 1.753629130554546e-06, "loss": 0.111, "step": 30130 }, { "epoch": 3.52, "grad_norm": 0.6917739341510492, "learning_rate": 1.74941132865315e-06, "loss": 0.09, "step": 30135 }, { "epoch": 3.52, "grad_norm": 0.6396529175396755, "learning_rate": 1.745198421244218e-06, "loss": 0.0849, "step": 30140 }, { "epoch": 3.52, "grad_norm": 0.7181317820142172, "learning_rate": 1.740990409214613e-06, "loss": 0.1148, "step": 30145 }, { "epoch": 3.52, "grad_norm": 0.7552779098443219, "learning_rate": 1.736787293450165e-06, "loss": 0.1099, "step": 30150 }, { "epoch": 3.52, "grad_norm": 0.6042706706691103, "learning_rate": 1.732589074835672e-06, "loss": 0.106, "step": 30155 }, { "epoch": 3.52, "grad_norm": 0.7693145189900836, "learning_rate": 1.7283957542548967e-06, "loss": 0.1032, "step": 30160 }, { "epoch": 3.52, "grad_norm": 0.5757751422567693, "learning_rate": 1.7242073325905855e-06, "loss": 0.0904, "step": 30165 }, { "epoch": 3.52, "grad_norm": 0.7345532378064857, "learning_rate": 1.720023810724447e-06, "loss": 0.0947, "step": 30170 }, { "epoch": 3.53, "grad_norm": 0.672432548764249, "learning_rate": 1.7158451895371492e-06, "loss": 0.1209, "step": 30175 }, { "epoch": 3.53, "grad_norm": 0.78512066957067, "learning_rate": 1.711671469908338e-06, "loss": 0.1142, "step": 30180 }, { "epoch": 3.53, "grad_norm": 0.753435965669234, "learning_rate": 1.7075026527166276e-06, "loss": 0.1242, "step": 30185 }, { "epoch": 3.53, "grad_norm": 0.6875550180403542, "learning_rate": 1.7033387388395939e-06, "loss": 0.1147, "step": 30190 }, { "epoch": 3.53, "grad_norm": 0.6858820948959818, "learning_rate": 1.699179729153791e-06, "loss": 0.1085, "step": 30195 }, { "epoch": 3.53, "grad_norm": 0.8546619948360874, "learning_rate": 1.695025624534724e-06, "loss": 0.1319, "step": 30200 }, { "epoch": 3.53, "grad_norm": 0.7589788955019657, "learning_rate": 1.6908764258568798e-06, "loss": 0.0927, "step": 30205 }, { "epoch": 3.53, "grad_norm": 0.7109319974877252, "learning_rate": 1.6867321339937097e-06, "loss": 0.0883, "step": 30210 }, { "epoch": 3.53, "grad_norm": 0.7844257336172835, "learning_rate": 1.6825927498176352e-06, "loss": 0.1026, "step": 30215 }, { "epoch": 3.53, "grad_norm": 0.7270447619891282, "learning_rate": 1.6784582742000344e-06, "loss": 0.1074, "step": 30220 }, { "epoch": 3.53, "grad_norm": 0.6296880993887636, "learning_rate": 1.6743287080112551e-06, "loss": 0.0765, "step": 30225 }, { "epoch": 3.53, "grad_norm": 0.7570993723475119, "learning_rate": 1.6702040521206164e-06, "loss": 0.111, "step": 30230 }, { "epoch": 3.53, "grad_norm": 0.6729880671579386, "learning_rate": 1.666084307396401e-06, "loss": 0.1044, "step": 30235 }, { "epoch": 3.53, "grad_norm": 0.7117127035916897, "learning_rate": 1.6619694747058655e-06, "loss": 0.1186, "step": 30240 }, { "epoch": 3.53, "grad_norm": 0.8451615844004793, "learning_rate": 1.6578595549152143e-06, "loss": 0.1144, "step": 30245 }, { "epoch": 3.53, "grad_norm": 0.6799578625344473, "learning_rate": 1.6537545488896383e-06, "loss": 0.1051, "step": 30250 }, { "epoch": 3.53, "grad_norm": 0.7528467138154912, "learning_rate": 1.6496544574932742e-06, "loss": 0.1656, "step": 30255 }, { "epoch": 3.54, "grad_norm": 0.632563571183305, "learning_rate": 1.6455592815892429e-06, "loss": 0.0849, "step": 30260 }, { "epoch": 3.54, "grad_norm": 0.7278141213811052, "learning_rate": 1.6414690220396212e-06, "loss": 0.0943, "step": 30265 }, { "epoch": 3.54, "grad_norm": 0.6980445332990993, "learning_rate": 1.6373836797054454e-06, "loss": 0.0979, "step": 30270 }, { "epoch": 3.54, "grad_norm": 0.7086109283789179, "learning_rate": 1.6333032554467247e-06, "loss": 0.1256, "step": 30275 }, { "epoch": 3.54, "grad_norm": 0.6240411305109839, "learning_rate": 1.629227750122439e-06, "loss": 0.0865, "step": 30280 }, { "epoch": 3.54, "grad_norm": 0.7338511459381485, "learning_rate": 1.6251571645905155e-06, "loss": 0.104, "step": 30285 }, { "epoch": 3.54, "grad_norm": 0.5954368010784094, "learning_rate": 1.6210914997078608e-06, "loss": 0.0997, "step": 30290 }, { "epoch": 3.54, "grad_norm": 0.7219874291456158, "learning_rate": 1.6170307563303345e-06, "loss": 0.1178, "step": 30295 }, { "epoch": 3.54, "grad_norm": 0.663865683842347, "learning_rate": 1.6129749353127666e-06, "loss": 0.1089, "step": 30300 }, { "epoch": 3.54, "grad_norm": 0.6370785721988429, "learning_rate": 1.6089240375089548e-06, "loss": 0.1013, "step": 30305 }, { "epoch": 3.54, "grad_norm": 0.6977146089209333, "learning_rate": 1.604878063771656e-06, "loss": 0.0833, "step": 30310 }, { "epoch": 3.54, "grad_norm": 0.728816747907338, "learning_rate": 1.600837014952586e-06, "loss": 0.135, "step": 30315 }, { "epoch": 3.54, "grad_norm": 0.6861784533880604, "learning_rate": 1.5968008919024252e-06, "loss": 0.1003, "step": 30320 }, { "epoch": 3.54, "grad_norm": 0.7388362126691856, "learning_rate": 1.592769695470822e-06, "loss": 0.1037, "step": 30325 }, { "epoch": 3.54, "grad_norm": 0.6320539051673353, "learning_rate": 1.5887434265063867e-06, "loss": 0.1011, "step": 30330 }, { "epoch": 3.54, "grad_norm": 0.7347120604634856, "learning_rate": 1.5847220858566963e-06, "loss": 0.106, "step": 30335 }, { "epoch": 3.54, "grad_norm": 0.7565692857938016, "learning_rate": 1.5807056743682768e-06, "loss": 0.0995, "step": 30340 }, { "epoch": 3.55, "grad_norm": 0.7274958811806206, "learning_rate": 1.5766941928866268e-06, "loss": 0.1176, "step": 30345 }, { "epoch": 3.55, "grad_norm": 0.7476399689038891, "learning_rate": 1.5726876422562097e-06, "loss": 0.099, "step": 30350 }, { "epoch": 3.55, "grad_norm": 0.7032509082437083, "learning_rate": 1.5686860233204426e-06, "loss": 0.1046, "step": 30355 }, { "epoch": 3.55, "grad_norm": 0.7185373977541134, "learning_rate": 1.5646893369217098e-06, "loss": 0.1116, "step": 30360 }, { "epoch": 3.55, "grad_norm": 0.8439230053912808, "learning_rate": 1.5606975839013493e-06, "loss": 0.1347, "step": 30365 }, { "epoch": 3.55, "grad_norm": 0.8656226453840884, "learning_rate": 1.5567107650996755e-06, "loss": 0.1486, "step": 30370 }, { "epoch": 3.55, "grad_norm": 0.7463729970352376, "learning_rate": 1.55272888135595e-06, "loss": 0.0896, "step": 30375 }, { "epoch": 3.55, "grad_norm": 0.8349109123694637, "learning_rate": 1.5487519335084077e-06, "loss": 0.1267, "step": 30380 }, { "epoch": 3.55, "grad_norm": 0.6105206625714796, "learning_rate": 1.5447799223942322e-06, "loss": 0.1269, "step": 30385 }, { "epoch": 3.55, "grad_norm": 0.7396446408486453, "learning_rate": 1.5408128488495738e-06, "loss": 0.1163, "step": 30390 }, { "epoch": 3.55, "grad_norm": 0.6189592696357049, "learning_rate": 1.5368507137095395e-06, "loss": 0.103, "step": 30395 }, { "epoch": 3.55, "grad_norm": 0.8032044614354412, "learning_rate": 1.5328935178082093e-06, "loss": 0.0941, "step": 30400 }, { "epoch": 3.55, "grad_norm": 0.695375524556541, "learning_rate": 1.5289412619786087e-06, "loss": 0.1015, "step": 30405 }, { "epoch": 3.55, "grad_norm": 0.7058744521661271, "learning_rate": 1.5249939470527303e-06, "loss": 0.1198, "step": 30410 }, { "epoch": 3.55, "grad_norm": 0.7093769818338022, "learning_rate": 1.521051573861526e-06, "loss": 0.1108, "step": 30415 }, { "epoch": 3.55, "grad_norm": 0.6873656152446026, "learning_rate": 1.5171141432349017e-06, "loss": 0.1198, "step": 30420 }, { "epoch": 3.55, "grad_norm": 0.8086205495645065, "learning_rate": 1.5131816560017332e-06, "loss": 0.1017, "step": 30425 }, { "epoch": 3.56, "grad_norm": 0.622626190627697, "learning_rate": 1.5092541129898496e-06, "loss": 0.093, "step": 30430 }, { "epoch": 3.56, "grad_norm": 0.8382484078300015, "learning_rate": 1.5053315150260372e-06, "loss": 0.1166, "step": 30435 }, { "epoch": 3.56, "grad_norm": 0.7707883733747339, "learning_rate": 1.5014138629360464e-06, "loss": 0.1226, "step": 30440 }, { "epoch": 3.56, "grad_norm": 1.1690520415423127, "learning_rate": 1.4975011575445868e-06, "loss": 0.1357, "step": 30445 }, { "epoch": 3.56, "grad_norm": 0.7405758833093162, "learning_rate": 1.4935933996753193e-06, "loss": 0.1058, "step": 30450 }, { "epoch": 3.56, "grad_norm": 0.7929422386184488, "learning_rate": 1.4896905901508634e-06, "loss": 0.0947, "step": 30455 }, { "epoch": 3.56, "grad_norm": 0.5315884543493301, "learning_rate": 1.4857927297928093e-06, "loss": 0.0993, "step": 30460 }, { "epoch": 3.56, "grad_norm": 0.6672226288493984, "learning_rate": 1.481899819421695e-06, "loss": 0.1018, "step": 30465 }, { "epoch": 3.56, "grad_norm": 0.7490504824315871, "learning_rate": 1.4780118598570176e-06, "loss": 0.0798, "step": 30470 }, { "epoch": 3.56, "grad_norm": 0.8520425237413395, "learning_rate": 1.4741288519172392e-06, "loss": 0.1076, "step": 30475 }, { "epoch": 3.56, "grad_norm": 0.9415960741560183, "learning_rate": 1.4702507964197698e-06, "loss": 0.1146, "step": 30480 }, { "epoch": 3.56, "grad_norm": 0.854722336985771, "learning_rate": 1.4663776941809758e-06, "loss": 0.1306, "step": 30485 }, { "epoch": 3.56, "grad_norm": 0.6425322403902617, "learning_rate": 1.4625095460161886e-06, "loss": 0.085, "step": 30490 }, { "epoch": 3.56, "grad_norm": 0.6880677266207403, "learning_rate": 1.4586463527396953e-06, "loss": 0.0874, "step": 30495 }, { "epoch": 3.56, "grad_norm": 0.760682069787075, "learning_rate": 1.454788115164743e-06, "loss": 0.1349, "step": 30500 }, { "epoch": 3.56, "grad_norm": 0.7350827242572235, "learning_rate": 1.4509348341035234e-06, "loss": 0.1473, "step": 30505 }, { "epoch": 3.56, "grad_norm": 0.5100135971975714, "learning_rate": 1.4470865103671988e-06, "loss": 0.0787, "step": 30510 }, { "epoch": 3.57, "grad_norm": 0.6221786234969559, "learning_rate": 1.4432431447658768e-06, "loss": 0.1011, "step": 30515 }, { "epoch": 3.57, "grad_norm": 0.7495511918024548, "learning_rate": 1.4394047381086296e-06, "loss": 0.1126, "step": 30520 }, { "epoch": 3.57, "grad_norm": 0.7170330828502082, "learning_rate": 1.4355712912034747e-06, "loss": 0.0987, "step": 30525 }, { "epoch": 3.57, "grad_norm": 0.8918494796035314, "learning_rate": 1.4317428048574028e-06, "loss": 0.1231, "step": 30530 }, { "epoch": 3.57, "grad_norm": 0.6702582202625147, "learning_rate": 1.4279192798763442e-06, "loss": 0.1157, "step": 30535 }, { "epoch": 3.57, "grad_norm": 0.7101520738050748, "learning_rate": 1.4241007170651966e-06, "loss": 0.1013, "step": 30540 }, { "epoch": 3.57, "grad_norm": 0.749895158624273, "learning_rate": 1.4202871172277976e-06, "loss": 0.1, "step": 30545 }, { "epoch": 3.57, "grad_norm": 0.7012710684574495, "learning_rate": 1.4164784811669634e-06, "loss": 0.0837, "step": 30550 }, { "epoch": 3.57, "grad_norm": 0.7544510955121172, "learning_rate": 1.4126748096844389e-06, "loss": 0.1127, "step": 30555 }, { "epoch": 3.57, "grad_norm": 0.799453078811553, "learning_rate": 1.4088761035809418e-06, "loss": 0.1139, "step": 30560 }, { "epoch": 3.57, "grad_norm": 0.671300088548873, "learning_rate": 1.4050823636561405e-06, "loss": 0.0998, "step": 30565 }, { "epoch": 3.57, "grad_norm": 0.6814785351247556, "learning_rate": 1.4012935907086604e-06, "loss": 0.1028, "step": 30570 }, { "epoch": 3.57, "grad_norm": 0.6033503820126753, "learning_rate": 1.3975097855360719e-06, "loss": 0.1141, "step": 30575 }, { "epoch": 3.57, "grad_norm": 0.8607250181611813, "learning_rate": 1.3937309489349098e-06, "loss": 0.1136, "step": 30580 }, { "epoch": 3.57, "grad_norm": 0.7260714249231374, "learning_rate": 1.3899570817006518e-06, "loss": 0.0724, "step": 30585 }, { "epoch": 3.57, "grad_norm": 1.0134082384505552, "learning_rate": 1.3861881846277457e-06, "loss": 0.1057, "step": 30590 }, { "epoch": 3.57, "grad_norm": 0.6903353111328807, "learning_rate": 1.3824242585095765e-06, "loss": 0.0904, "step": 30595 }, { "epoch": 3.58, "grad_norm": 0.6479093169972459, "learning_rate": 1.3786653041384934e-06, "loss": 0.1253, "step": 30600 }, { "epoch": 3.58, "grad_norm": 0.706618052323915, "learning_rate": 1.374911322305794e-06, "loss": 0.0928, "step": 30605 }, { "epoch": 3.58, "grad_norm": 0.7730806081105609, "learning_rate": 1.3711623138017382e-06, "loss": 0.1093, "step": 30610 }, { "epoch": 3.58, "grad_norm": 0.8110782556336296, "learning_rate": 1.3674182794155221e-06, "loss": 0.1241, "step": 30615 }, { "epoch": 3.58, "grad_norm": 0.7101880388993977, "learning_rate": 1.3636792199353072e-06, "loss": 0.134, "step": 30620 }, { "epoch": 3.58, "grad_norm": 0.7609645809319204, "learning_rate": 1.3599451361482023e-06, "loss": 0.0994, "step": 30625 }, { "epoch": 3.58, "grad_norm": 0.7251122545405657, "learning_rate": 1.3562160288402765e-06, "loss": 0.104, "step": 30630 }, { "epoch": 3.58, "grad_norm": 0.701560935746696, "learning_rate": 1.352491898796543e-06, "loss": 0.1069, "step": 30635 }, { "epoch": 3.58, "grad_norm": 0.6962299398585544, "learning_rate": 1.348772746800972e-06, "loss": 0.0824, "step": 30640 }, { "epoch": 3.58, "grad_norm": 0.6115203717305909, "learning_rate": 1.3450585736364845e-06, "loss": 0.1069, "step": 30645 }, { "epoch": 3.58, "grad_norm": 0.7932417908654995, "learning_rate": 1.341349380084947e-06, "loss": 0.0944, "step": 30650 }, { "epoch": 3.58, "grad_norm": 0.7533833079263604, "learning_rate": 1.3376451669271843e-06, "loss": 0.1146, "step": 30655 }, { "epoch": 3.58, "grad_norm": 0.8226205256475028, "learning_rate": 1.3339459349429817e-06, "loss": 0.0796, "step": 30660 }, { "epoch": 3.58, "grad_norm": 0.9158840146963627, "learning_rate": 1.3302516849110575e-06, "loss": 0.1128, "step": 30665 }, { "epoch": 3.58, "grad_norm": 0.9103873327014819, "learning_rate": 1.3265624176090896e-06, "loss": 0.1097, "step": 30670 }, { "epoch": 3.58, "grad_norm": 0.9007267389169074, "learning_rate": 1.3228781338137154e-06, "loss": 0.1209, "step": 30675 }, { "epoch": 3.58, "grad_norm": 0.6968928320298946, "learning_rate": 1.3191988343005057e-06, "loss": 0.1139, "step": 30680 }, { "epoch": 3.59, "grad_norm": 0.7790285046756766, "learning_rate": 1.3155245198439998e-06, "loss": 0.1107, "step": 30685 }, { "epoch": 3.59, "grad_norm": 0.8161993660905291, "learning_rate": 1.3118551912176702e-06, "loss": 0.1258, "step": 30690 }, { "epoch": 3.59, "grad_norm": 0.7584225985260754, "learning_rate": 1.3081908491939548e-06, "loss": 0.0953, "step": 30695 }, { "epoch": 3.59, "grad_norm": 0.6214847642514225, "learning_rate": 1.3045314945442365e-06, "loss": 0.1364, "step": 30700 }, { "epoch": 3.59, "grad_norm": 0.7803490102127154, "learning_rate": 1.3008771280388493e-06, "loss": 0.0973, "step": 30705 }, { "epoch": 3.59, "grad_norm": 0.8423252221511995, "learning_rate": 1.2972277504470747e-06, "loss": 0.1052, "step": 30710 }, { "epoch": 3.59, "grad_norm": 0.8632699019500358, "learning_rate": 1.2935833625371402e-06, "loss": 0.1539, "step": 30715 }, { "epoch": 3.59, "grad_norm": 0.6369688793359027, "learning_rate": 1.289943965076229e-06, "loss": 0.1059, "step": 30720 }, { "epoch": 3.59, "grad_norm": 0.7977223320859471, "learning_rate": 1.286309558830473e-06, "loss": 0.1083, "step": 30725 }, { "epoch": 3.59, "grad_norm": 0.7072128610398377, "learning_rate": 1.2826801445649572e-06, "loss": 0.0926, "step": 30730 }, { "epoch": 3.59, "grad_norm": 0.8339533938867757, "learning_rate": 1.2790557230437067e-06, "loss": 0.1155, "step": 30735 }, { "epoch": 3.59, "grad_norm": 0.7905407541306227, "learning_rate": 1.2754362950297056e-06, "loss": 0.1047, "step": 30740 }, { "epoch": 3.59, "grad_norm": 1.0314729179383137, "learning_rate": 1.271821861284872e-06, "loss": 0.1384, "step": 30745 }, { "epoch": 3.59, "grad_norm": 0.7513239790318796, "learning_rate": 1.2682124225700864e-06, "loss": 0.1019, "step": 30750 }, { "epoch": 3.59, "grad_norm": 0.8367626738826848, "learning_rate": 1.2646079796451799e-06, "loss": 0.1216, "step": 30755 }, { "epoch": 3.59, "grad_norm": 0.7439746037629938, "learning_rate": 1.2610085332689148e-06, "loss": 0.081, "step": 30760 }, { "epoch": 3.59, "grad_norm": 0.6648709530113841, "learning_rate": 1.2574140841990183e-06, "loss": 0.0994, "step": 30765 }, { "epoch": 3.6, "grad_norm": 0.6738018534951443, "learning_rate": 1.2538246331921633e-06, "loss": 0.0976, "step": 30770 }, { "epoch": 3.6, "grad_norm": 0.7370853713720266, "learning_rate": 1.250240181003956e-06, "loss": 0.1048, "step": 30775 }, { "epoch": 3.6, "grad_norm": 0.7794566942866542, "learning_rate": 1.2466607283889735e-06, "loss": 0.1225, "step": 30780 }, { "epoch": 3.6, "grad_norm": 0.7887026521849617, "learning_rate": 1.2430862761007162e-06, "loss": 0.1136, "step": 30785 }, { "epoch": 3.6, "grad_norm": 0.8797796784147511, "learning_rate": 1.239516824891651e-06, "loss": 0.1018, "step": 30790 }, { "epoch": 3.6, "grad_norm": 0.7385919099057635, "learning_rate": 1.235952375513183e-06, "loss": 0.1094, "step": 30795 }, { "epoch": 3.6, "grad_norm": 0.7626768551121854, "learning_rate": 1.23239292871567e-06, "loss": 0.0979, "step": 30800 }, { "epoch": 3.6, "grad_norm": 0.7017793073840625, "learning_rate": 1.228838485248407e-06, "loss": 0.0754, "step": 30805 }, { "epoch": 3.6, "grad_norm": 0.6892827610660984, "learning_rate": 1.2252890458596456e-06, "loss": 0.1122, "step": 30810 }, { "epoch": 3.6, "grad_norm": 0.7292032464870041, "learning_rate": 1.2217446112965742e-06, "loss": 0.1065, "step": 30815 }, { "epoch": 3.6, "grad_norm": 0.8284638103765318, "learning_rate": 1.2182051823053403e-06, "loss": 0.1117, "step": 30820 }, { "epoch": 3.6, "grad_norm": 0.852184232673036, "learning_rate": 1.2146707596310287e-06, "loss": 0.1252, "step": 30825 }, { "epoch": 3.6, "grad_norm": 0.8685632630499958, "learning_rate": 1.2111413440176689e-06, "loss": 0.1355, "step": 30830 }, { "epoch": 3.6, "grad_norm": 0.740040692250554, "learning_rate": 1.2076169362082417e-06, "loss": 0.1038, "step": 30835 }, { "epoch": 3.6, "grad_norm": 0.6154452377136472, "learning_rate": 1.204097536944679e-06, "loss": 0.0991, "step": 30840 }, { "epoch": 3.6, "grad_norm": 0.8404543237894199, "learning_rate": 1.2005831469678375e-06, "loss": 0.1006, "step": 30845 }, { "epoch": 3.6, "grad_norm": 0.6151444357096342, "learning_rate": 1.197073767017548e-06, "loss": 0.12, "step": 30850 }, { "epoch": 3.6, "grad_norm": 0.7415547058952063, "learning_rate": 1.1935693978325586e-06, "loss": 0.0957, "step": 30855 }, { "epoch": 3.61, "grad_norm": 0.7183084712073893, "learning_rate": 1.1900700401505815e-06, "loss": 0.1165, "step": 30860 }, { "epoch": 3.61, "grad_norm": 0.6758753993057471, "learning_rate": 1.1865756947082668e-06, "loss": 0.0821, "step": 30865 }, { "epoch": 3.61, "grad_norm": 0.788278272907019, "learning_rate": 1.1830863622412174e-06, "loss": 0.1298, "step": 30870 }, { "epoch": 3.61, "grad_norm": 0.6406636438402642, "learning_rate": 1.1796020434839684e-06, "loss": 0.1086, "step": 30875 }, { "epoch": 3.61, "grad_norm": 0.7689486724984816, "learning_rate": 1.176122739170002e-06, "loss": 0.0959, "step": 30880 }, { "epoch": 3.61, "grad_norm": 0.8118827275917105, "learning_rate": 1.1726484500317519e-06, "loss": 0.1128, "step": 30885 }, { "epoch": 3.61, "grad_norm": 0.6521569657206522, "learning_rate": 1.1691791768005944e-06, "loss": 0.0924, "step": 30890 }, { "epoch": 3.61, "grad_norm": 0.7462813556491096, "learning_rate": 1.165714920206848e-06, "loss": 0.1038, "step": 30895 }, { "epoch": 3.61, "grad_norm": 0.7121505231635527, "learning_rate": 1.1622556809797709e-06, "loss": 0.1019, "step": 30900 }, { "epoch": 3.61, "grad_norm": 0.6794821530992015, "learning_rate": 1.158801459847575e-06, "loss": 0.1053, "step": 30905 }, { "epoch": 3.61, "grad_norm": 0.5513109753605802, "learning_rate": 1.155352257537401e-06, "loss": 0.0969, "step": 30910 }, { "epoch": 3.61, "grad_norm": 0.752400586844115, "learning_rate": 1.1519080747753485e-06, "loss": 0.0939, "step": 30915 }, { "epoch": 3.61, "grad_norm": 0.7123151205222714, "learning_rate": 1.1484689122864572e-06, "loss": 0.0925, "step": 30920 }, { "epoch": 3.61, "grad_norm": 0.7062978269609498, "learning_rate": 1.145034770794698e-06, "loss": 0.1241, "step": 30925 }, { "epoch": 3.61, "grad_norm": 0.7806735515865889, "learning_rate": 1.1416056510230006e-06, "loss": 0.103, "step": 30930 }, { "epoch": 3.61, "grad_norm": 0.8264858524728509, "learning_rate": 1.1381815536932294e-06, "loss": 0.1148, "step": 30935 }, { "epoch": 3.61, "grad_norm": 0.6920881588928678, "learning_rate": 1.1347624795261913e-06, "loss": 0.1, "step": 30940 }, { "epoch": 3.62, "grad_norm": 0.7409309185540295, "learning_rate": 1.1313484292416355e-06, "loss": 0.1208, "step": 30945 }, { "epoch": 3.62, "grad_norm": 0.6981410730097878, "learning_rate": 1.127939403558259e-06, "loss": 0.095, "step": 30950 }, { "epoch": 3.62, "grad_norm": 0.706329616500167, "learning_rate": 1.124535403193694e-06, "loss": 0.1085, "step": 30955 }, { "epoch": 3.62, "grad_norm": 0.5929923159217567, "learning_rate": 1.121136428864522e-06, "loss": 0.0813, "step": 30960 }, { "epoch": 3.62, "grad_norm": 0.8325646783131384, "learning_rate": 1.117742481286263e-06, "loss": 0.0972, "step": 30965 }, { "epoch": 3.62, "grad_norm": 0.6234694109559572, "learning_rate": 1.1143535611733756e-06, "loss": 0.1117, "step": 30970 }, { "epoch": 3.62, "grad_norm": 0.7597598893184861, "learning_rate": 1.1109696692392618e-06, "loss": 0.1132, "step": 30975 }, { "epoch": 3.62, "grad_norm": 0.776733663886879, "learning_rate": 1.107590806196268e-06, "loss": 0.0935, "step": 30980 }, { "epoch": 3.62, "grad_norm": 0.7362651475659323, "learning_rate": 1.1042169727556784e-06, "loss": 0.1042, "step": 30985 }, { "epoch": 3.62, "grad_norm": 0.7399682046729068, "learning_rate": 1.1008481696277273e-06, "loss": 0.1365, "step": 30990 }, { "epoch": 3.62, "grad_norm": 0.6785790904653346, "learning_rate": 1.0974843975215755e-06, "loss": 0.0971, "step": 30995 }, { "epoch": 3.62, "grad_norm": 0.6813358393585447, "learning_rate": 1.0941256571453395e-06, "loss": 0.1009, "step": 31000 }, { "epoch": 3.62, "grad_norm": 0.7985176997914822, "learning_rate": 1.0907719492060598e-06, "loss": 0.1002, "step": 31005 }, { "epoch": 3.62, "grad_norm": 0.7796320693181589, "learning_rate": 1.0874232744097324e-06, "loss": 0.1017, "step": 31010 }, { "epoch": 3.62, "grad_norm": 0.7565294050199212, "learning_rate": 1.0840796334612908e-06, "loss": 0.0981, "step": 31015 }, { "epoch": 3.62, "grad_norm": 0.9640351906112309, "learning_rate": 1.0807410270645995e-06, "loss": 0.1374, "step": 31020 }, { "epoch": 3.62, "grad_norm": 0.7208860018110056, "learning_rate": 1.0774074559224745e-06, "loss": 0.1014, "step": 31025 }, { "epoch": 3.63, "grad_norm": 0.7140557847340524, "learning_rate": 1.0740789207366681e-06, "loss": 0.0931, "step": 31030 }, { "epoch": 3.63, "grad_norm": 0.8788224816679496, "learning_rate": 1.0707554222078753e-06, "loss": 0.0992, "step": 31035 }, { "epoch": 3.63, "grad_norm": 0.7157146461960749, "learning_rate": 1.0674369610357226e-06, "loss": 0.0951, "step": 31040 }, { "epoch": 3.63, "grad_norm": 0.7104996632753755, "learning_rate": 1.0641235379187765e-06, "loss": 0.0758, "step": 31045 }, { "epoch": 3.63, "grad_norm": 0.7603019890925156, "learning_rate": 1.0608151535545536e-06, "loss": 0.129, "step": 31050 }, { "epoch": 3.63, "grad_norm": 0.8831543879602979, "learning_rate": 1.0575118086394998e-06, "loss": 0.1026, "step": 31055 }, { "epoch": 3.63, "grad_norm": 0.6477463640915135, "learning_rate": 1.0542135038690115e-06, "loss": 0.0931, "step": 31060 }, { "epoch": 3.63, "grad_norm": 0.6321805796937292, "learning_rate": 1.0509202399374057e-06, "loss": 0.1031, "step": 31065 }, { "epoch": 3.63, "grad_norm": 0.7702906780146669, "learning_rate": 1.0476320175379584e-06, "loss": 0.1059, "step": 31070 }, { "epoch": 3.63, "grad_norm": 0.5245198782706144, "learning_rate": 1.0443488373628657e-06, "loss": 0.0924, "step": 31075 }, { "epoch": 3.63, "grad_norm": 0.7050140212425805, "learning_rate": 1.0410707001032778e-06, "loss": 0.0896, "step": 31080 }, { "epoch": 3.63, "grad_norm": 0.9136205760240936, "learning_rate": 1.0377976064492784e-06, "loss": 0.1036, "step": 31085 }, { "epoch": 3.63, "grad_norm": 0.753894979494184, "learning_rate": 1.0345295570898833e-06, "loss": 0.1029, "step": 31090 }, { "epoch": 3.63, "grad_norm": 0.880739181097662, "learning_rate": 1.0312665527130505e-06, "loss": 0.1206, "step": 31095 }, { "epoch": 3.63, "grad_norm": 0.6196072565620092, "learning_rate": 1.0280085940056834e-06, "loss": 0.1209, "step": 31100 }, { "epoch": 3.63, "grad_norm": 0.7404682146724354, "learning_rate": 1.0247556816536108e-06, "loss": 0.121, "step": 31105 }, { "epoch": 3.63, "grad_norm": 0.674463419867539, "learning_rate": 1.0215078163416019e-06, "loss": 0.1032, "step": 31110 }, { "epoch": 3.64, "grad_norm": 0.7085558959853813, "learning_rate": 1.0182649987533737e-06, "loss": 0.1305, "step": 31115 }, { "epoch": 3.64, "grad_norm": 0.7633816213228541, "learning_rate": 1.0150272295715662e-06, "loss": 0.1187, "step": 31120 }, { "epoch": 3.64, "grad_norm": 0.8208019223665899, "learning_rate": 1.0117945094777703e-06, "loss": 0.0906, "step": 31125 }, { "epoch": 3.64, "grad_norm": 0.5707036933825403, "learning_rate": 1.0085668391525056e-06, "loss": 0.0876, "step": 31130 }, { "epoch": 3.64, "grad_norm": 0.8106112387888617, "learning_rate": 1.005344219275231e-06, "loss": 0.1268, "step": 31135 }, { "epoch": 3.64, "grad_norm": 0.6529543234238515, "learning_rate": 1.0021266505243375e-06, "loss": 0.1105, "step": 31140 }, { "epoch": 3.64, "grad_norm": 0.7479011668431952, "learning_rate": 9.989141335771585e-07, "loss": 0.0945, "step": 31145 }, { "epoch": 3.64, "grad_norm": 0.7200662064724209, "learning_rate": 9.957066691099665e-07, "loss": 0.1242, "step": 31150 }, { "epoch": 3.64, "grad_norm": 0.7598052155566435, "learning_rate": 9.925042577979632e-07, "loss": 0.1062, "step": 31155 }, { "epoch": 3.64, "grad_norm": 0.7696118458374727, "learning_rate": 9.893069003152898e-07, "loss": 0.1092, "step": 31160 }, { "epoch": 3.64, "grad_norm": 0.651026528529035, "learning_rate": 9.86114597335025e-07, "loss": 0.1112, "step": 31165 }, { "epoch": 3.64, "grad_norm": 0.6929494746884063, "learning_rate": 9.829273495291808e-07, "loss": 0.108, "step": 31170 }, { "epoch": 3.64, "grad_norm": 0.6084206607832694, "learning_rate": 9.797451575687067e-07, "loss": 0.0921, "step": 31175 }, { "epoch": 3.64, "grad_norm": 0.8976307820162673, "learning_rate": 9.765680221234836e-07, "loss": 0.1069, "step": 31180 }, { "epoch": 3.64, "grad_norm": 0.709830016301572, "learning_rate": 9.733959438623374e-07, "loss": 0.1144, "step": 31185 }, { "epoch": 3.64, "grad_norm": 0.7192244443408814, "learning_rate": 9.702289234530204e-07, "loss": 0.1064, "step": 31190 }, { "epoch": 3.64, "grad_norm": 0.7429228989070715, "learning_rate": 9.670669615622297e-07, "loss": 0.0999, "step": 31195 }, { "epoch": 3.65, "grad_norm": 0.930482859875252, "learning_rate": 9.639100588555855e-07, "loss": 0.1034, "step": 31200 }, { "epoch": 3.65, "grad_norm": 0.7764541793833177, "learning_rate": 9.60758215997648e-07, "loss": 0.1011, "step": 31205 }, { "epoch": 3.65, "grad_norm": 0.740458995237549, "learning_rate": 9.576114336519115e-07, "loss": 0.0846, "step": 31210 }, { "epoch": 3.65, "grad_norm": 0.7112168281038594, "learning_rate": 9.544697124808127e-07, "loss": 0.0967, "step": 31215 }, { "epoch": 3.65, "grad_norm": 0.7646635169320589, "learning_rate": 9.51333053145717e-07, "loss": 0.1114, "step": 31220 }, { "epoch": 3.65, "grad_norm": 0.8011725676063679, "learning_rate": 9.482014563069214e-07, "loss": 0.0872, "step": 31225 }, { "epoch": 3.65, "grad_norm": 0.741812880068649, "learning_rate": 9.450749226236594e-07, "loss": 0.1101, "step": 31230 }, { "epoch": 3.65, "grad_norm": 0.6852096825791646, "learning_rate": 9.419534527540991e-07, "loss": 0.0986, "step": 31235 }, { "epoch": 3.65, "grad_norm": 0.7256183959011522, "learning_rate": 9.388370473553398e-07, "loss": 0.0835, "step": 31240 }, { "epoch": 3.65, "grad_norm": 0.837669727743677, "learning_rate": 9.357257070834264e-07, "loss": 0.1152, "step": 31245 }, { "epoch": 3.65, "grad_norm": 0.4832255391006528, "learning_rate": 9.326194325933179e-07, "loss": 0.1037, "step": 31250 }, { "epoch": 3.65, "grad_norm": 0.5585812776062311, "learning_rate": 9.29518224538925e-07, "loss": 0.0975, "step": 31255 }, { "epoch": 3.65, "grad_norm": 0.5704524566223363, "learning_rate": 9.264220835730808e-07, "loss": 0.1043, "step": 31260 }, { "epoch": 3.65, "grad_norm": 0.6979544046940148, "learning_rate": 9.233310103475612e-07, "loss": 0.1074, "step": 31265 }, { "epoch": 3.65, "grad_norm": 0.6515405559149345, "learning_rate": 9.202450055130623e-07, "loss": 0.099, "step": 31270 }, { "epoch": 3.65, "grad_norm": 0.7728655197938917, "learning_rate": 9.17164069719223e-07, "loss": 0.0949, "step": 31275 }, { "epoch": 3.65, "grad_norm": 0.6407515661204616, "learning_rate": 9.140882036146103e-07, "loss": 0.0668, "step": 31280 }, { "epoch": 3.66, "grad_norm": 0.7749618913566683, "learning_rate": 9.110174078467315e-07, "loss": 0.1059, "step": 31285 }, { "epoch": 3.66, "grad_norm": 0.7454509444142788, "learning_rate": 9.079516830620166e-07, "loss": 0.1173, "step": 31290 }, { "epoch": 3.66, "grad_norm": 0.7000106844419353, "learning_rate": 9.048910299058383e-07, "loss": 0.1052, "step": 31295 }, { "epoch": 3.66, "grad_norm": 0.7239052084244525, "learning_rate": 9.018354490224923e-07, "loss": 0.1181, "step": 31300 }, { "epoch": 3.66, "grad_norm": 0.7089461885977787, "learning_rate": 8.987849410552085e-07, "loss": 0.1136, "step": 31305 }, { "epoch": 3.66, "grad_norm": 0.8107318611294542, "learning_rate": 8.95739506646151e-07, "loss": 0.1004, "step": 31310 }, { "epoch": 3.66, "grad_norm": 0.6556299897288846, "learning_rate": 8.926991464364209e-07, "loss": 0.087, "step": 31315 }, { "epoch": 3.66, "grad_norm": 0.6825216426856706, "learning_rate": 8.896638610660396e-07, "loss": 0.0992, "step": 31320 }, { "epoch": 3.66, "grad_norm": 0.6728781173135941, "learning_rate": 8.866336511739709e-07, "loss": 0.1033, "step": 31325 }, { "epoch": 3.66, "grad_norm": 0.7835423788270992, "learning_rate": 8.836085173981046e-07, "loss": 0.1101, "step": 31330 }, { "epoch": 3.66, "grad_norm": 0.5644290505735234, "learning_rate": 8.805884603752618e-07, "loss": 0.1095, "step": 31335 }, { "epoch": 3.66, "grad_norm": 0.6575360225949405, "learning_rate": 8.775734807412006e-07, "loss": 0.0892, "step": 31340 }, { "epoch": 3.66, "grad_norm": 0.6739638964744041, "learning_rate": 8.745635791305967e-07, "loss": 0.0839, "step": 31345 }, { "epoch": 3.66, "grad_norm": 0.5604147421155383, "learning_rate": 8.715587561770739e-07, "loss": 0.0873, "step": 31350 }, { "epoch": 3.66, "grad_norm": 0.5821479040472021, "learning_rate": 8.685590125131732e-07, "loss": 0.093, "step": 31355 }, { "epoch": 3.66, "grad_norm": 0.7177866912022105, "learning_rate": 8.655643487703813e-07, "loss": 0.117, "step": 31360 }, { "epoch": 3.66, "grad_norm": 0.515857735156103, "learning_rate": 8.625747655790994e-07, "loss": 0.104, "step": 31365 }, { "epoch": 3.67, "grad_norm": 0.7684893506185952, "learning_rate": 8.595902635686631e-07, "loss": 0.0877, "step": 31370 }, { "epoch": 3.67, "grad_norm": 0.6116212074920049, "learning_rate": 8.566108433673448e-07, "loss": 0.0924, "step": 31375 }, { "epoch": 3.67, "grad_norm": 0.6616138923328854, "learning_rate": 8.536365056023454e-07, "loss": 0.113, "step": 31380 }, { "epoch": 3.67, "grad_norm": 0.7857435117126611, "learning_rate": 8.506672508997948e-07, "loss": 0.1145, "step": 31385 }, { "epoch": 3.67, "grad_norm": 0.8801971501001228, "learning_rate": 8.477030798847485e-07, "loss": 0.0844, "step": 31390 }, { "epoch": 3.67, "grad_norm": 0.6597655440928749, "learning_rate": 8.44743993181199e-07, "loss": 0.1184, "step": 31395 }, { "epoch": 3.67, "grad_norm": 0.7267353538771502, "learning_rate": 8.417899914120591e-07, "loss": 0.0907, "step": 31400 }, { "epoch": 3.67, "grad_norm": 0.8222255970681178, "learning_rate": 8.388410751991843e-07, "loss": 0.0977, "step": 31405 }, { "epoch": 3.67, "grad_norm": 0.6174506661697344, "learning_rate": 8.358972451633501e-07, "loss": 0.1041, "step": 31410 }, { "epoch": 3.67, "grad_norm": 0.6644366043972187, "learning_rate": 8.329585019242609e-07, "loss": 0.0994, "step": 31415 }, { "epoch": 3.67, "grad_norm": 0.8194103486338397, "learning_rate": 8.300248461005521e-07, "loss": 0.1171, "step": 31420 }, { "epoch": 3.67, "grad_norm": 0.6706162633012751, "learning_rate": 8.270962783097968e-07, "loss": 0.0892, "step": 31425 }, { "epoch": 3.67, "grad_norm": 0.748968476989497, "learning_rate": 8.241727991684823e-07, "loss": 0.1021, "step": 31430 }, { "epoch": 3.67, "grad_norm": 0.6647751647930565, "learning_rate": 8.212544092920354e-07, "loss": 0.1087, "step": 31435 }, { "epoch": 3.67, "grad_norm": 0.7662992417188828, "learning_rate": 8.18341109294804e-07, "loss": 0.1357, "step": 31440 }, { "epoch": 3.67, "grad_norm": 0.721200236585321, "learning_rate": 8.154328997900668e-07, "loss": 0.0932, "step": 31445 }, { "epoch": 3.67, "grad_norm": 0.7380514291941953, "learning_rate": 8.125297813900396e-07, "loss": 0.0895, "step": 31450 }, { "epoch": 3.68, "grad_norm": 0.7081270897732465, "learning_rate": 8.096317547058557e-07, "loss": 0.1009, "step": 31455 }, { "epoch": 3.68, "grad_norm": 0.8165014134913714, "learning_rate": 8.067388203475801e-07, "loss": 0.0856, "step": 31460 }, { "epoch": 3.68, "grad_norm": 0.8569513419803297, "learning_rate": 8.038509789242005e-07, "loss": 0.1236, "step": 31465 }, { "epoch": 3.68, "grad_norm": 0.8692520356919332, "learning_rate": 8.009682310436445e-07, "loss": 0.1087, "step": 31470 }, { "epoch": 3.68, "grad_norm": 0.6750348123895584, "learning_rate": 7.980905773127572e-07, "loss": 0.0991, "step": 31475 }, { "epoch": 3.68, "grad_norm": 0.8982958265958888, "learning_rate": 7.952180183373209e-07, "loss": 0.0977, "step": 31480 }, { "epoch": 3.68, "grad_norm": 0.6588766502930193, "learning_rate": 7.923505547220322e-07, "loss": 0.0861, "step": 31485 }, { "epoch": 3.68, "grad_norm": 0.5565799674616073, "learning_rate": 7.894881870705251e-07, "loss": 0.092, "step": 31490 }, { "epoch": 3.68, "grad_norm": 0.7884827987896651, "learning_rate": 7.866309159853591e-07, "loss": 0.1012, "step": 31495 }, { "epoch": 3.68, "grad_norm": 0.6243882843330959, "learning_rate": 7.83778742068017e-07, "loss": 0.0924, "step": 31500 }, { "epoch": 3.68, "grad_norm": 0.7287092352528951, "learning_rate": 7.809316659189131e-07, "loss": 0.1041, "step": 31505 }, { "epoch": 3.68, "grad_norm": 0.741149182688481, "learning_rate": 7.780896881373872e-07, "loss": 0.102, "step": 31510 }, { "epoch": 3.68, "grad_norm": 0.6576854112107415, "learning_rate": 7.752528093217026e-07, "loss": 0.0977, "step": 31515 }, { "epoch": 3.68, "grad_norm": 0.8299192735893729, "learning_rate": 7.724210300690538e-07, "loss": 0.1001, "step": 31520 }, { "epoch": 3.68, "grad_norm": 0.7424029451038306, "learning_rate": 7.695943509755609e-07, "loss": 0.1225, "step": 31525 }, { "epoch": 3.68, "grad_norm": 0.7973278921384976, "learning_rate": 7.667727726362705e-07, "loss": 0.1031, "step": 31530 }, { "epoch": 3.68, "grad_norm": 0.7818614369903614, "learning_rate": 7.639562956451491e-07, "loss": 0.1352, "step": 31535 }, { "epoch": 3.69, "grad_norm": 0.6567330468296246, "learning_rate": 7.611449205950999e-07, "loss": 0.0998, "step": 31540 }, { "epoch": 3.69, "grad_norm": 0.6997190924182812, "learning_rate": 7.583386480779442e-07, "loss": 0.0955, "step": 31545 }, { "epoch": 3.69, "grad_norm": 0.7549684899034701, "learning_rate": 7.555374786844344e-07, "loss": 0.1239, "step": 31550 }, { "epoch": 3.69, "grad_norm": 0.7230958954969856, "learning_rate": 7.527414130042403e-07, "loss": 0.12, "step": 31555 }, { "epoch": 3.69, "grad_norm": 0.6872266124706731, "learning_rate": 7.49950451625972e-07, "loss": 0.1011, "step": 31560 }, { "epoch": 3.69, "grad_norm": 0.8932201897522624, "learning_rate": 7.471645951371481e-07, "loss": 0.0991, "step": 31565 }, { "epoch": 3.69, "grad_norm": 0.7296096098265074, "learning_rate": 7.443838441242218e-07, "loss": 0.1152, "step": 31570 }, { "epoch": 3.69, "grad_norm": 0.7188277105664748, "learning_rate": 7.416081991725748e-07, "loss": 0.0987, "step": 31575 }, { "epoch": 3.69, "grad_norm": 0.7514901226980588, "learning_rate": 7.388376608665037e-07, "loss": 0.0982, "step": 31580 }, { "epoch": 3.69, "grad_norm": 0.6221105917830667, "learning_rate": 7.36072229789242e-07, "loss": 0.0934, "step": 31585 }, { "epoch": 3.69, "grad_norm": 0.7605546615709045, "learning_rate": 7.333119065229377e-07, "loss": 0.0985, "step": 31590 }, { "epoch": 3.69, "grad_norm": 0.7088352797716291, "learning_rate": 7.305566916486678e-07, "loss": 0.1231, "step": 31595 }, { "epoch": 3.69, "grad_norm": 0.6991949017852305, "learning_rate": 7.27806585746435e-07, "loss": 0.0995, "step": 31600 }, { "epoch": 3.69, "grad_norm": 0.9342096300297633, "learning_rate": 7.250615893951651e-07, "loss": 0.1018, "step": 31605 }, { "epoch": 3.69, "grad_norm": 0.7993146714026558, "learning_rate": 7.22321703172707e-07, "loss": 0.1312, "step": 31610 }, { "epoch": 3.69, "grad_norm": 0.9210544337818863, "learning_rate": 7.195869276558381e-07, "loss": 0.1313, "step": 31615 }, { "epoch": 3.69, "grad_norm": 0.7288024236976424, "learning_rate": 7.168572634202592e-07, "loss": 0.0798, "step": 31620 }, { "epoch": 3.69, "grad_norm": 0.7668314603172326, "learning_rate": 7.141327110405882e-07, "loss": 0.0948, "step": 31625 }, { "epoch": 3.7, "grad_norm": 0.6733367562177196, "learning_rate": 7.11413271090372e-07, "loss": 0.0868, "step": 31630 }, { "epoch": 3.7, "grad_norm": 0.794689203530448, "learning_rate": 7.08698944142086e-07, "loss": 0.0939, "step": 31635 }, { "epoch": 3.7, "grad_norm": 1.002349815481805, "learning_rate": 7.059897307671176e-07, "loss": 0.0908, "step": 31640 }, { "epoch": 3.7, "grad_norm": 0.7176324922817938, "learning_rate": 7.032856315357938e-07, "loss": 0.0952, "step": 31645 }, { "epoch": 3.7, "grad_norm": 0.967835312412662, "learning_rate": 7.005866470173484e-07, "loss": 0.1021, "step": 31650 }, { "epoch": 3.7, "grad_norm": 0.6344779328448761, "learning_rate": 6.978927777799488e-07, "loss": 0.1005, "step": 31655 }, { "epoch": 3.7, "grad_norm": 0.7548874723433963, "learning_rate": 6.952040243906804e-07, "loss": 0.0823, "step": 31660 }, { "epoch": 3.7, "grad_norm": 0.8022687325241348, "learning_rate": 6.925203874155601e-07, "loss": 0.1017, "step": 31665 }, { "epoch": 3.7, "grad_norm": 0.6464930726035433, "learning_rate": 6.898418674195106e-07, "loss": 0.0902, "step": 31670 }, { "epoch": 3.7, "grad_norm": 0.6920613426884838, "learning_rate": 6.871684649663979e-07, "loss": 0.0837, "step": 31675 }, { "epoch": 3.7, "grad_norm": 0.6195165995257804, "learning_rate": 6.845001806189994e-07, "loss": 0.1149, "step": 31680 }, { "epoch": 3.7, "grad_norm": 0.7221992648829098, "learning_rate": 6.818370149390157e-07, "loss": 0.1098, "step": 31685 }, { "epoch": 3.7, "grad_norm": 0.6597585306554329, "learning_rate": 6.791789684870681e-07, "loss": 0.0942, "step": 31690 }, { "epoch": 3.7, "grad_norm": 0.6231016938905666, "learning_rate": 6.765260418227115e-07, "loss": 0.0784, "step": 31695 }, { "epoch": 3.7, "grad_norm": 0.9862723356322458, "learning_rate": 6.738782355044049e-07, "loss": 0.1291, "step": 31700 }, { "epoch": 3.7, "grad_norm": 0.7602967534371786, "learning_rate": 6.71235550089544e-07, "loss": 0.0976, "step": 31705 }, { "epoch": 3.7, "grad_norm": 0.7055854913640962, "learning_rate": 6.685979861344422e-07, "loss": 0.1187, "step": 31710 }, { "epoch": 3.71, "grad_norm": 1.402533773406938, "learning_rate": 6.659655441943357e-07, "loss": 0.1497, "step": 31715 }, { "epoch": 3.71, "grad_norm": 0.7086175003531568, "learning_rate": 6.633382248233788e-07, "loss": 0.0856, "step": 31720 }, { "epoch": 3.71, "grad_norm": 0.7558513054407868, "learning_rate": 6.607160285746511e-07, "loss": 0.1102, "step": 31725 }, { "epoch": 3.71, "grad_norm": 0.8424433960132696, "learning_rate": 6.580989560001472e-07, "loss": 0.1075, "step": 31730 }, { "epoch": 3.71, "grad_norm": 0.71752907496591, "learning_rate": 6.554870076507985e-07, "loss": 0.0685, "step": 31735 }, { "epoch": 3.71, "grad_norm": 0.7754307549127908, "learning_rate": 6.528801840764348e-07, "loss": 0.1, "step": 31740 }, { "epoch": 3.71, "grad_norm": 0.6130146678101475, "learning_rate": 6.50278485825831e-07, "loss": 0.0978, "step": 31745 }, { "epoch": 3.71, "grad_norm": 0.7311569641789891, "learning_rate": 6.476819134466627e-07, "loss": 0.1075, "step": 31750 }, { "epoch": 3.71, "grad_norm": 0.8774468706453505, "learning_rate": 6.450904674855457e-07, "loss": 0.1079, "step": 31755 }, { "epoch": 3.71, "grad_norm": 0.6682794645991317, "learning_rate": 6.425041484880017e-07, "loss": 0.1039, "step": 31760 }, { "epoch": 3.71, "grad_norm": 0.599137427065413, "learning_rate": 6.399229569984733e-07, "loss": 0.0907, "step": 31765 }, { "epoch": 3.71, "grad_norm": 0.7520552828935392, "learning_rate": 6.37346893560331e-07, "loss": 0.0927, "step": 31770 }, { "epoch": 3.71, "grad_norm": 0.897157424920915, "learning_rate": 6.347759587158664e-07, "loss": 0.1275, "step": 31775 }, { "epoch": 3.71, "grad_norm": 0.7211398917763695, "learning_rate": 6.322101530062851e-07, "loss": 0.0667, "step": 31780 }, { "epoch": 3.71, "grad_norm": 0.7601362803643443, "learning_rate": 6.296494769717192e-07, "loss": 0.1082, "step": 31785 }, { "epoch": 3.71, "grad_norm": 0.7497018239373299, "learning_rate": 6.27093931151218e-07, "loss": 0.0946, "step": 31790 }, { "epoch": 3.71, "grad_norm": 0.788625277471891, "learning_rate": 6.245435160827456e-07, "loss": 0.1182, "step": 31795 }, { "epoch": 3.72, "grad_norm": 0.7166816562564375, "learning_rate": 6.21998232303192e-07, "loss": 0.1055, "step": 31800 }, { "epoch": 3.72, "grad_norm": 0.7289494593735245, "learning_rate": 6.194580803483729e-07, "loss": 0.1234, "step": 31805 }, { "epoch": 3.72, "grad_norm": 0.6962758016708448, "learning_rate": 6.16923060753008e-07, "loss": 0.0874, "step": 31810 }, { "epoch": 3.72, "grad_norm": 0.7105918346150539, "learning_rate": 6.143931740507508e-07, "loss": 0.1231, "step": 31815 }, { "epoch": 3.72, "grad_norm": 0.6369738757187089, "learning_rate": 6.118684207741671e-07, "loss": 0.0857, "step": 31820 }, { "epoch": 3.72, "grad_norm": 0.7475843133510652, "learning_rate": 6.093488014547454e-07, "loss": 0.095, "step": 31825 }, { "epoch": 3.72, "grad_norm": 0.8053850669258545, "learning_rate": 6.068343166228924e-07, "loss": 0.1068, "step": 31830 }, { "epoch": 3.72, "grad_norm": 0.675416035558085, "learning_rate": 6.043249668079287e-07, "loss": 0.1035, "step": 31835 }, { "epoch": 3.72, "grad_norm": 0.6617614436126136, "learning_rate": 6.018207525381043e-07, "loss": 0.1003, "step": 31840 }, { "epoch": 3.72, "grad_norm": 0.8191196748517496, "learning_rate": 5.993216743405805e-07, "loss": 0.0966, "step": 31845 }, { "epoch": 3.72, "grad_norm": 0.778793782037446, "learning_rate": 5.968277327414396e-07, "loss": 0.1214, "step": 31850 }, { "epoch": 3.72, "grad_norm": 0.9787277871929254, "learning_rate": 5.943389282656836e-07, "loss": 0.1245, "step": 31855 }, { "epoch": 3.72, "grad_norm": 0.5940157034284285, "learning_rate": 5.918552614372269e-07, "loss": 0.0912, "step": 31860 }, { "epoch": 3.72, "grad_norm": 0.7904920113539996, "learning_rate": 5.893767327789123e-07, "loss": 0.1017, "step": 31865 }, { "epoch": 3.72, "grad_norm": 0.770788055170638, "learning_rate": 5.869033428124949e-07, "loss": 0.1005, "step": 31870 }, { "epoch": 3.72, "grad_norm": 0.7748995685414579, "learning_rate": 5.844350920586494e-07, "loss": 0.1096, "step": 31875 }, { "epoch": 3.72, "grad_norm": 0.7501230595308362, "learning_rate": 5.81971981036969e-07, "loss": 0.1078, "step": 31880 }, { "epoch": 3.73, "grad_norm": 0.729095357781294, "learning_rate": 5.795140102659635e-07, "loss": 0.0934, "step": 31885 }, { "epoch": 3.73, "grad_norm": 0.6928355964505368, "learning_rate": 5.770611802630582e-07, "loss": 0.1274, "step": 31890 }, { "epoch": 3.73, "grad_norm": 0.6009133560066421, "learning_rate": 5.746134915446038e-07, "loss": 0.0884, "step": 31895 }, { "epoch": 3.73, "grad_norm": 0.766552774339302, "learning_rate": 5.72170944625866e-07, "loss": 0.1207, "step": 31900 }, { "epoch": 3.73, "grad_norm": 0.7177780762936741, "learning_rate": 5.697335400210224e-07, "loss": 0.1001, "step": 31905 }, { "epoch": 3.73, "grad_norm": 0.7226168733491731, "learning_rate": 5.673012782431736e-07, "loss": 0.1013, "step": 31910 }, { "epoch": 3.73, "grad_norm": 0.6662601906310749, "learning_rate": 5.648741598043378e-07, "loss": 0.0877, "step": 31915 }, { "epoch": 3.73, "grad_norm": 0.8466086073922562, "learning_rate": 5.624521852154452e-07, "loss": 0.121, "step": 31920 }, { "epoch": 3.73, "grad_norm": 0.7274178543233253, "learning_rate": 5.600353549863518e-07, "loss": 0.101, "step": 31925 }, { "epoch": 3.73, "grad_norm": 0.8168205245693064, "learning_rate": 5.576236696258174e-07, "loss": 0.1153, "step": 31930 }, { "epoch": 3.73, "grad_norm": 0.7560861537779024, "learning_rate": 5.55217129641536e-07, "loss": 0.1237, "step": 31935 }, { "epoch": 3.73, "grad_norm": 0.6426173752321725, "learning_rate": 5.528157355401048e-07, "loss": 0.1, "step": 31940 }, { "epoch": 3.73, "grad_norm": 0.7950875421713026, "learning_rate": 5.504194878270447e-07, "loss": 0.1065, "step": 31945 }, { "epoch": 3.73, "grad_norm": 0.7621534245877817, "learning_rate": 5.480283870067909e-07, "loss": 0.0896, "step": 31950 }, { "epoch": 3.73, "grad_norm": 0.8779140943706185, "learning_rate": 5.456424335826938e-07, "loss": 0.1027, "step": 31955 }, { "epoch": 3.73, "grad_norm": 0.7790190175684206, "learning_rate": 5.43261628057018e-07, "loss": 0.098, "step": 31960 }, { "epoch": 3.73, "grad_norm": 0.5752335770154818, "learning_rate": 5.408859709309543e-07, "loss": 0.1155, "step": 31965 }, { "epoch": 3.74, "grad_norm": 0.7171955695246557, "learning_rate": 5.38515462704603e-07, "loss": 0.0917, "step": 31970 }, { "epoch": 3.74, "grad_norm": 0.6756969120921441, "learning_rate": 5.361501038769757e-07, "loss": 0.0935, "step": 31975 }, { "epoch": 3.74, "grad_norm": 0.696641107065719, "learning_rate": 5.337898949460074e-07, "loss": 0.0882, "step": 31980 }, { "epoch": 3.74, "grad_norm": 0.7664429656739975, "learning_rate": 5.314348364085536e-07, "loss": 0.0939, "step": 31985 }, { "epoch": 3.74, "grad_norm": 0.7360982228554642, "learning_rate": 5.290849287603677e-07, "loss": 0.1476, "step": 31990 }, { "epoch": 3.74, "grad_norm": 0.7287834259358721, "learning_rate": 5.267401724961374e-07, "loss": 0.1105, "step": 31995 }, { "epoch": 3.74, "grad_norm": 0.7474724704349867, "learning_rate": 5.244005681094566e-07, "loss": 0.102, "step": 32000 }, { "epoch": 3.74, "grad_norm": 0.5805564856057654, "learning_rate": 5.220661160928341e-07, "loss": 0.0928, "step": 32005 }, { "epoch": 3.74, "grad_norm": 0.6680307774440886, "learning_rate": 5.197368169376965e-07, "loss": 0.0959, "step": 32010 }, { "epoch": 3.74, "grad_norm": 0.6567285003555864, "learning_rate": 5.174126711343929e-07, "loss": 0.0723, "step": 32015 }, { "epoch": 3.74, "grad_norm": 0.75056860111149, "learning_rate": 5.150936791721738e-07, "loss": 0.0803, "step": 32020 }, { "epoch": 3.74, "grad_norm": 0.6198175822490691, "learning_rate": 5.127798415392126e-07, "loss": 0.0927, "step": 32025 }, { "epoch": 3.74, "grad_norm": 0.68919247441596, "learning_rate": 5.104711587225946e-07, "loss": 0.0897, "step": 32030 }, { "epoch": 3.74, "grad_norm": 0.6058948482392801, "learning_rate": 5.081676312083227e-07, "loss": 0.1101, "step": 32035 }, { "epoch": 3.74, "grad_norm": 0.7077953911942385, "learning_rate": 5.058692594813174e-07, "loss": 0.096, "step": 32040 }, { "epoch": 3.74, "grad_norm": 0.6550449890185702, "learning_rate": 5.035760440254055e-07, "loss": 0.0891, "step": 32045 }, { "epoch": 3.74, "grad_norm": 0.6897821749729027, "learning_rate": 5.01287985323337e-07, "loss": 0.1231, "step": 32050 }, { "epoch": 3.75, "grad_norm": 0.6312566097100962, "learning_rate": 4.990050838567656e-07, "loss": 0.0998, "step": 32055 }, { "epoch": 3.75, "grad_norm": 0.7908750295842191, "learning_rate": 4.967273401062706e-07, "loss": 0.0943, "step": 32060 }, { "epoch": 3.75, "grad_norm": 0.7165356925541896, "learning_rate": 4.944547545513434e-07, "loss": 0.1101, "step": 32065 }, { "epoch": 3.75, "grad_norm": 0.6855069235440959, "learning_rate": 4.921873276703793e-07, "loss": 0.1121, "step": 32070 }, { "epoch": 3.75, "grad_norm": 0.6598458109315708, "learning_rate": 4.899250599407018e-07, "loss": 0.0992, "step": 32075 }, { "epoch": 3.75, "grad_norm": 0.8019533892863002, "learning_rate": 4.876679518385441e-07, "loss": 0.1243, "step": 32080 }, { "epoch": 3.75, "grad_norm": 0.5490616878274747, "learning_rate": 4.854160038390454e-07, "loss": 0.0902, "step": 32085 }, { "epoch": 3.75, "grad_norm": 0.7606295165897898, "learning_rate": 4.831692164162627e-07, "loss": 0.1196, "step": 32090 }, { "epoch": 3.75, "grad_norm": 0.6654441845897996, "learning_rate": 4.809275900431731e-07, "loss": 0.1134, "step": 32095 }, { "epoch": 3.75, "grad_norm": 0.7055330103260731, "learning_rate": 4.786911251916632e-07, "loss": 0.1089, "step": 32100 }, { "epoch": 3.75, "grad_norm": 0.6216197575243764, "learning_rate": 4.764598223325284e-07, "loss": 0.0952, "step": 32105 }, { "epoch": 3.75, "grad_norm": 0.7659172735607368, "learning_rate": 4.742336819354848e-07, "loss": 0.1004, "step": 32110 }, { "epoch": 3.75, "grad_norm": 0.6941872468872129, "learning_rate": 4.720127044691547e-07, "loss": 0.1041, "step": 32115 }, { "epoch": 3.75, "grad_norm": 0.6535215367952291, "learning_rate": 4.697968904010808e-07, "loss": 0.0922, "step": 32120 }, { "epoch": 3.75, "grad_norm": 0.6553835309409535, "learning_rate": 4.6758624019771214e-07, "loss": 0.0904, "step": 32125 }, { "epoch": 3.75, "grad_norm": 0.6325256204357212, "learning_rate": 4.6538075432441265e-07, "loss": 0.1047, "step": 32130 }, { "epoch": 3.75, "grad_norm": 0.8063852630655081, "learning_rate": 4.631804332454665e-07, "loss": 0.0986, "step": 32135 }, { "epoch": 3.76, "grad_norm": 0.7026017866628933, "learning_rate": 4.609852774240586e-07, "loss": 0.1181, "step": 32140 }, { "epoch": 3.76, "grad_norm": 0.9341510778916376, "learning_rate": 4.587952873222945e-07, "loss": 0.1094, "step": 32145 }, { "epoch": 3.76, "grad_norm": 0.8396831801570894, "learning_rate": 4.5661046340118863e-07, "loss": 0.1388, "step": 32150 }, { "epoch": 3.76, "grad_norm": 0.7897132713379599, "learning_rate": 4.544308061206731e-07, "loss": 0.1176, "step": 32155 }, { "epoch": 3.76, "grad_norm": 0.8918041132470432, "learning_rate": 4.5225631593958096e-07, "loss": 0.127, "step": 32160 }, { "epoch": 3.76, "grad_norm": 0.8481475239619473, "learning_rate": 4.5008699331567094e-07, "loss": 0.1175, "step": 32165 }, { "epoch": 3.76, "grad_norm": 1.037197264147563, "learning_rate": 4.479228387056084e-07, "loss": 0.1244, "step": 32170 }, { "epoch": 3.76, "grad_norm": 0.6201156288530035, "learning_rate": 4.4576385256497043e-07, "loss": 0.0869, "step": 32175 }, { "epoch": 3.76, "grad_norm": 0.5389787897735214, "learning_rate": 4.4361003534824363e-07, "loss": 0.1034, "step": 32180 }, { "epoch": 3.76, "grad_norm": 0.6958170324794998, "learning_rate": 4.414613875088347e-07, "loss": 0.0853, "step": 32185 }, { "epoch": 3.76, "grad_norm": 0.7305058019977781, "learning_rate": 4.3931790949904574e-07, "loss": 0.0859, "step": 32190 }, { "epoch": 3.76, "grad_norm": 0.8097256084435239, "learning_rate": 4.371796017701102e-07, "loss": 0.1063, "step": 32195 }, { "epoch": 3.76, "grad_norm": 0.8237693426801642, "learning_rate": 4.350464647721625e-07, "loss": 0.1093, "step": 32200 }, { "epoch": 3.76, "grad_norm": 0.8031535617941814, "learning_rate": 4.3291849895425175e-07, "loss": 0.1064, "step": 32205 }, { "epoch": 3.76, "grad_norm": 0.5219197508824569, "learning_rate": 4.307957047643335e-07, "loss": 0.093, "step": 32210 }, { "epoch": 3.76, "grad_norm": 0.878516332890269, "learning_rate": 4.286780826492809e-07, "loss": 0.1161, "step": 32215 }, { "epoch": 3.76, "grad_norm": 0.6921775188321084, "learning_rate": 4.265656330548734e-07, "loss": 0.0914, "step": 32220 }, { "epoch": 3.77, "grad_norm": 0.6746299030826209, "learning_rate": 4.244583564258053e-07, "loss": 0.0735, "step": 32225 }, { "epoch": 3.77, "grad_norm": 0.696905826905185, "learning_rate": 4.223562532056802e-07, "loss": 0.1361, "step": 32230 }, { "epoch": 3.77, "grad_norm": 0.689137537666753, "learning_rate": 4.202593238370106e-07, "loss": 0.1075, "step": 32235 }, { "epoch": 3.77, "grad_norm": 0.7473092294054602, "learning_rate": 4.1816756876122407e-07, "loss": 0.1089, "step": 32240 }, { "epoch": 3.77, "grad_norm": 0.729034784265019, "learning_rate": 4.1608098841866005e-07, "loss": 0.0915, "step": 32245 }, { "epoch": 3.77, "grad_norm": 0.7484673214344195, "learning_rate": 4.1399958324856436e-07, "loss": 0.1093, "step": 32250 }, { "epoch": 3.77, "grad_norm": 0.6085416620023498, "learning_rate": 4.1192335368908654e-07, "loss": 0.1186, "step": 32255 }, { "epoch": 3.77, "grad_norm": 0.7167815382190764, "learning_rate": 4.098523001773047e-07, "loss": 0.0945, "step": 32260 }, { "epoch": 3.77, "grad_norm": 0.5231131116256913, "learning_rate": 4.077864231491896e-07, "loss": 0.0925, "step": 32265 }, { "epoch": 3.77, "grad_norm": 0.677433455664288, "learning_rate": 4.057257230396377e-07, "loss": 0.1055, "step": 32270 }, { "epoch": 3.77, "grad_norm": 0.6689239647252212, "learning_rate": 4.036702002824438e-07, "loss": 0.113, "step": 32275 }, { "epoch": 3.77, "grad_norm": 0.7964204649183307, "learning_rate": 4.016198553103173e-07, "loss": 0.1325, "step": 32280 }, { "epoch": 3.77, "grad_norm": 0.66506983266089, "learning_rate": 3.9957468855487676e-07, "loss": 0.0958, "step": 32285 }, { "epoch": 3.77, "grad_norm": 0.7439628542562258, "learning_rate": 3.975347004466501e-07, "loss": 0.1228, "step": 32290 }, { "epoch": 3.77, "grad_norm": 0.628035276525356, "learning_rate": 3.954998914150826e-07, "loss": 0.1026, "step": 32295 }, { "epoch": 3.77, "grad_norm": 0.8237452201719537, "learning_rate": 3.9347026188851224e-07, "loss": 0.0874, "step": 32300 }, { "epoch": 3.77, "grad_norm": 0.8599742567001131, "learning_rate": 3.9144581229420554e-07, "loss": 0.1077, "step": 32305 }, { "epoch": 3.77, "grad_norm": 0.6495778697538434, "learning_rate": 3.8942654305832994e-07, "loss": 0.0862, "step": 32310 }, { "epoch": 3.78, "grad_norm": 0.8468017153386671, "learning_rate": 3.8741245460595933e-07, "loss": 0.1273, "step": 32315 }, { "epoch": 3.78, "grad_norm": 0.7284267593418271, "learning_rate": 3.854035473610823e-07, "loss": 0.106, "step": 32320 }, { "epoch": 3.78, "grad_norm": 0.7357282763768429, "learning_rate": 3.833998217465912e-07, "loss": 0.1109, "step": 32325 }, { "epoch": 3.78, "grad_norm": 0.6074583455708537, "learning_rate": 3.814012781842985e-07, "loss": 0.11, "step": 32330 }, { "epoch": 3.78, "grad_norm": 0.6939829432836436, "learning_rate": 3.794079170949122e-07, "loss": 0.1032, "step": 32335 }, { "epoch": 3.78, "grad_norm": 0.7779167688886567, "learning_rate": 3.774197388980605e-07, "loss": 0.1013, "step": 32340 }, { "epoch": 3.78, "grad_norm": 0.7098688463691589, "learning_rate": 3.754367440122725e-07, "loss": 0.1277, "step": 32345 }, { "epoch": 3.78, "grad_norm": 0.7668796203291967, "learning_rate": 3.7345893285498643e-07, "loss": 0.0975, "step": 32350 }, { "epoch": 3.78, "grad_norm": 0.7766788071318416, "learning_rate": 3.714863058425583e-07, "loss": 0.1094, "step": 32355 }, { "epoch": 3.78, "grad_norm": 0.5571161753711943, "learning_rate": 3.6951886339024464e-07, "loss": 0.0916, "step": 32360 }, { "epoch": 3.78, "grad_norm": 0.6805847714632857, "learning_rate": 3.675566059122143e-07, "loss": 0.1712, "step": 32365 }, { "epoch": 3.78, "grad_norm": 0.7480091049755091, "learning_rate": 3.655995338215396e-07, "loss": 0.084, "step": 32370 }, { "epoch": 3.78, "grad_norm": 0.7666541441240888, "learning_rate": 3.636476475302075e-07, "loss": 0.0922, "step": 32375 }, { "epoch": 3.78, "grad_norm": 0.8174045648653325, "learning_rate": 3.61700947449109e-07, "loss": 0.1427, "step": 32380 }, { "epoch": 3.78, "grad_norm": 0.8975254307081054, "learning_rate": 3.597594339880439e-07, "loss": 0.0932, "step": 32385 }, { "epoch": 3.78, "grad_norm": 0.7851768562536167, "learning_rate": 3.5782310755572424e-07, "loss": 0.0993, "step": 32390 }, { "epoch": 3.78, "grad_norm": 0.6238493419521051, "learning_rate": 3.5589196855976557e-07, "loss": 0.1218, "step": 32395 }, { "epoch": 3.79, "grad_norm": 0.6440162629752348, "learning_rate": 3.5396601740669e-07, "loss": 0.1035, "step": 32400 }, { "epoch": 3.79, "grad_norm": 0.6845897160311911, "learning_rate": 3.520452545019343e-07, "loss": 0.1258, "step": 32405 }, { "epoch": 3.79, "grad_norm": 0.7304107013891382, "learning_rate": 3.5012968024983886e-07, "loss": 0.1149, "step": 32410 }, { "epoch": 3.79, "grad_norm": 0.7161864661783787, "learning_rate": 3.482192950536506e-07, "loss": 0.0788, "step": 32415 }, { "epoch": 3.79, "grad_norm": 0.641249480587915, "learning_rate": 3.463140993155256e-07, "loss": 0.1133, "step": 32420 }, { "epoch": 3.79, "grad_norm": 0.6667119593137242, "learning_rate": 3.4441409343652645e-07, "loss": 0.0917, "step": 32425 }, { "epoch": 3.79, "grad_norm": 0.6802432431372306, "learning_rate": 3.4251927781662753e-07, "loss": 0.1215, "step": 32430 }, { "epoch": 3.79, "grad_norm": 0.6121738068271202, "learning_rate": 3.406296528547071e-07, "loss": 0.1124, "step": 32435 }, { "epoch": 3.79, "grad_norm": 0.7707344658469334, "learning_rate": 3.3874521894854683e-07, "loss": 0.1031, "step": 32440 }, { "epoch": 3.79, "grad_norm": 0.7543138390164132, "learning_rate": 3.368659764948462e-07, "loss": 0.0887, "step": 32445 }, { "epoch": 3.79, "grad_norm": 0.6678721222443602, "learning_rate": 3.3499192588919704e-07, "loss": 0.0853, "step": 32450 }, { "epoch": 3.79, "grad_norm": 0.6952611342488365, "learning_rate": 3.331230675261143e-07, "loss": 0.1365, "step": 32455 }, { "epoch": 3.79, "grad_norm": 0.6713618547812098, "learning_rate": 3.3125940179901117e-07, "loss": 0.0874, "step": 32460 }, { "epoch": 3.79, "grad_norm": 0.6875983823508983, "learning_rate": 3.2940092910020427e-07, "loss": 0.1038, "step": 32465 }, { "epoch": 3.79, "grad_norm": 0.6112247853868717, "learning_rate": 3.275476498209251e-07, "loss": 0.0925, "step": 32470 }, { "epoch": 3.79, "grad_norm": 0.7269089250932869, "learning_rate": 3.256995643513089e-07, "loss": 0.1063, "step": 32475 }, { "epoch": 3.79, "grad_norm": 0.7565875614682014, "learning_rate": 3.2385667308039157e-07, "loss": 0.1121, "step": 32480 }, { "epoch": 3.8, "grad_norm": 0.6802430124661524, "learning_rate": 3.2201897639612953e-07, "loss": 0.1107, "step": 32485 }, { "epoch": 3.8, "grad_norm": 0.638711612266897, "learning_rate": 3.2018647468537166e-07, "loss": 0.1049, "step": 32490 }, { "epoch": 3.8, "grad_norm": 0.709719443730476, "learning_rate": 3.183591683338788e-07, "loss": 0.0968, "step": 32495 }, { "epoch": 3.8, "grad_norm": 0.7325362835943887, "learning_rate": 3.165370577263183e-07, "loss": 0.1145, "step": 32500 }, { "epoch": 3.8, "grad_norm": 0.6702909787447334, "learning_rate": 3.147201432462665e-07, "loss": 0.1119, "step": 32505 }, { "epoch": 3.8, "grad_norm": 0.7473171509810815, "learning_rate": 3.12908425276201e-07, "loss": 0.1135, "step": 32510 }, { "epoch": 3.8, "grad_norm": 0.6613029331545832, "learning_rate": 3.1110190419750553e-07, "loss": 0.0883, "step": 32515 }, { "epoch": 3.8, "grad_norm": 0.6391911362908526, "learning_rate": 3.093005803904758e-07, "loss": 0.11, "step": 32520 }, { "epoch": 3.8, "grad_norm": 0.790014312046305, "learning_rate": 3.0750445423430306e-07, "loss": 0.1226, "step": 32525 }, { "epoch": 3.8, "grad_norm": 0.7103440016776329, "learning_rate": 3.0571352610709867e-07, "loss": 0.1102, "step": 32530 }, { "epoch": 3.8, "grad_norm": 0.8647429918507457, "learning_rate": 3.0392779638586665e-07, "loss": 0.1196, "step": 32535 }, { "epoch": 3.8, "grad_norm": 0.6848698310642378, "learning_rate": 3.0214726544652297e-07, "loss": 0.0824, "step": 32540 }, { "epoch": 3.8, "grad_norm": 0.9288260149324713, "learning_rate": 3.0037193366388717e-07, "loss": 0.1169, "step": 32545 }, { "epoch": 3.8, "grad_norm": 0.625657519376986, "learning_rate": 2.986018014116854e-07, "loss": 0.0858, "step": 32550 }, { "epoch": 3.8, "grad_norm": 0.7673256417289757, "learning_rate": 2.9683686906255e-07, "loss": 0.1239, "step": 32555 }, { "epoch": 3.8, "grad_norm": 0.9064260430582168, "learning_rate": 2.950771369880173e-07, "loss": 0.1157, "step": 32560 }, { "epoch": 3.8, "grad_norm": 0.7211034657064757, "learning_rate": 2.933226055585298e-07, "loss": 0.1021, "step": 32565 }, { "epoch": 3.81, "grad_norm": 0.902949273593796, "learning_rate": 2.9157327514343357e-07, "loss": 0.0935, "step": 32570 }, { "epoch": 3.81, "grad_norm": 0.557179738459025, "learning_rate": 2.8982914611098145e-07, "loss": 0.1153, "step": 32575 }, { "epoch": 3.81, "grad_norm": 0.7169991489878155, "learning_rate": 2.8809021882833245e-07, "loss": 0.0974, "step": 32580 }, { "epoch": 3.81, "grad_norm": 0.8178308296262314, "learning_rate": 2.863564936615465e-07, "loss": 0.0984, "step": 32585 }, { "epoch": 3.81, "grad_norm": 0.754432967088495, "learning_rate": 2.8462797097559277e-07, "loss": 0.1055, "step": 32590 }, { "epoch": 3.81, "grad_norm": 0.7991279121977617, "learning_rate": 2.829046511343414e-07, "loss": 0.1178, "step": 32595 }, { "epoch": 3.81, "grad_norm": 0.6101320785201979, "learning_rate": 2.8118653450057156e-07, "loss": 0.0894, "step": 32600 }, { "epoch": 3.81, "grad_norm": 0.6675669973010184, "learning_rate": 2.794736214359661e-07, "loss": 0.083, "step": 32605 }, { "epoch": 3.81, "grad_norm": 0.6948785367932933, "learning_rate": 2.7776591230110616e-07, "loss": 0.1013, "step": 32610 }, { "epoch": 3.81, "grad_norm": 1.1985140712048135, "learning_rate": 2.7606340745548463e-07, "loss": 0.1315, "step": 32615 }, { "epoch": 3.81, "grad_norm": 0.664158543894892, "learning_rate": 2.743661072574955e-07, "loss": 0.0871, "step": 32620 }, { "epoch": 3.81, "grad_norm": 0.5929488520729995, "learning_rate": 2.7267401206444445e-07, "loss": 0.1048, "step": 32625 }, { "epoch": 3.81, "grad_norm": 0.7556457158394289, "learning_rate": 2.709871222325272e-07, "loss": 0.1284, "step": 32630 }, { "epoch": 3.81, "grad_norm": 0.5821284737285009, "learning_rate": 2.6930543811685417e-07, "loss": 0.0976, "step": 32635 }, { "epoch": 3.81, "grad_norm": 0.7919901912087645, "learning_rate": 2.676289600714421e-07, "loss": 0.0996, "step": 32640 }, { "epoch": 3.81, "grad_norm": 0.7236007923326127, "learning_rate": 2.6595768844920043e-07, "loss": 0.122, "step": 32645 }, { "epoch": 3.81, "grad_norm": 0.8350896875559715, "learning_rate": 2.6429162360195604e-07, "loss": 0.1068, "step": 32650 }, { "epoch": 3.82, "grad_norm": 0.7259744368333533, "learning_rate": 2.6263076588042557e-07, "loss": 0.1211, "step": 32655 }, { "epoch": 3.82, "grad_norm": 0.7469267122403975, "learning_rate": 2.609751156342405e-07, "loss": 0.1173, "step": 32660 }, { "epoch": 3.82, "grad_norm": 0.6845252081357407, "learning_rate": 2.593246732119359e-07, "loss": 0.1084, "step": 32665 }, { "epoch": 3.82, "grad_norm": 0.5837892758380301, "learning_rate": 2.5767943896094214e-07, "loss": 0.102, "step": 32670 }, { "epoch": 3.82, "grad_norm": 0.6398196226972974, "learning_rate": 2.5603941322760447e-07, "loss": 0.1148, "step": 32675 }, { "epoch": 3.82, "grad_norm": 0.6639789541658405, "learning_rate": 2.544045963571551e-07, "loss": 0.0907, "step": 32680 }, { "epoch": 3.82, "grad_norm": 0.6943677216364776, "learning_rate": 2.5277498869374915e-07, "loss": 0.1023, "step": 32685 }, { "epoch": 3.82, "grad_norm": 0.6715262922582725, "learning_rate": 2.5115059058043175e-07, "loss": 0.0813, "step": 32690 }, { "epoch": 3.82, "grad_norm": 0.7556016083695952, "learning_rate": 2.4953140235915996e-07, "loss": 0.0926, "step": 32695 }, { "epoch": 3.82, "grad_norm": 0.685707078716426, "learning_rate": 2.47917424370786e-07, "loss": 0.0894, "step": 32700 }, { "epoch": 3.82, "grad_norm": 0.6267829794940367, "learning_rate": 2.4630865695507156e-07, "loss": 0.0989, "step": 32705 }, { "epoch": 3.82, "grad_norm": 0.7271071170557324, "learning_rate": 2.447051004506762e-07, "loss": 0.1027, "step": 32710 }, { "epoch": 3.82, "grad_norm": 0.73891620426844, "learning_rate": 2.43106755195166e-07, "loss": 0.0942, "step": 32715 }, { "epoch": 3.82, "grad_norm": 0.7630890231537586, "learning_rate": 2.4151362152501077e-07, "loss": 0.1151, "step": 32720 }, { "epoch": 3.82, "grad_norm": 0.6415328811103522, "learning_rate": 2.399256997755811e-07, "loss": 0.1045, "step": 32725 }, { "epoch": 3.82, "grad_norm": 0.6457011037497107, "learning_rate": 2.3834299028115125e-07, "loss": 0.1107, "step": 32730 }, { "epoch": 3.82, "grad_norm": 0.7563047492549106, "learning_rate": 2.3676549337490195e-07, "loss": 0.11, "step": 32735 }, { "epoch": 3.83, "grad_norm": 0.6943494751661062, "learning_rate": 2.3519320938890644e-07, "loss": 0.0843, "step": 32740 }, { "epoch": 3.83, "grad_norm": 0.7397622650658459, "learning_rate": 2.3362613865414994e-07, "loss": 0.1445, "step": 32745 }, { "epoch": 3.83, "grad_norm": 0.7719907620459707, "learning_rate": 2.3206428150051584e-07, "loss": 0.1243, "step": 32750 }, { "epoch": 3.83, "grad_norm": 0.6483475507050217, "learning_rate": 2.305076382567911e-07, "loss": 0.0964, "step": 32755 }, { "epoch": 3.83, "grad_norm": 0.9620139163266311, "learning_rate": 2.2895620925066918e-07, "loss": 0.0997, "step": 32760 }, { "epoch": 3.83, "grad_norm": 0.703196161486108, "learning_rate": 2.2740999480874158e-07, "loss": 0.0931, "step": 32765 }, { "epoch": 3.83, "grad_norm": 0.6959990049701891, "learning_rate": 2.2586899525650074e-07, "loss": 0.1041, "step": 32770 }, { "epoch": 3.83, "grad_norm": 0.7188801905720508, "learning_rate": 2.243332109183399e-07, "loss": 0.1149, "step": 32775 }, { "epoch": 3.83, "grad_norm": 0.6900215615479066, "learning_rate": 2.2280264211756162e-07, "loss": 0.1108, "step": 32780 }, { "epoch": 3.83, "grad_norm": 0.8582597865770255, "learning_rate": 2.2127728917636925e-07, "loss": 0.1074, "step": 32785 }, { "epoch": 3.83, "grad_norm": 0.7723227820547632, "learning_rate": 2.197571524158615e-07, "loss": 0.0841, "step": 32790 }, { "epoch": 3.83, "grad_norm": 0.7325119649035282, "learning_rate": 2.1824223215604345e-07, "loss": 0.0934, "step": 32795 }, { "epoch": 3.83, "grad_norm": 0.6585334310683003, "learning_rate": 2.1673252871582394e-07, "loss": 0.0915, "step": 32800 }, { "epoch": 3.83, "grad_norm": 0.6829520818192908, "learning_rate": 2.152280424130071e-07, "loss": 0.1232, "step": 32805 }, { "epoch": 3.83, "grad_norm": 0.8933620592736912, "learning_rate": 2.1372877356430898e-07, "loss": 0.139, "step": 32810 }, { "epoch": 3.83, "grad_norm": 0.670699260948708, "learning_rate": 2.1223472248533828e-07, "loss": 0.1057, "step": 32815 }, { "epoch": 3.83, "grad_norm": 0.7181288777438867, "learning_rate": 2.1074588949060447e-07, "loss": 0.1071, "step": 32820 }, { "epoch": 3.84, "grad_norm": 0.8206283163313677, "learning_rate": 2.0926227489352913e-07, "loss": 0.1011, "step": 32825 }, { "epoch": 3.84, "grad_norm": 0.7898996390125568, "learning_rate": 2.0778387900642627e-07, "loss": 0.1171, "step": 32830 }, { "epoch": 3.84, "grad_norm": 0.7397473588449304, "learning_rate": 2.0631070214051086e-07, "loss": 0.0908, "step": 32835 }, { "epoch": 3.84, "grad_norm": 0.6019278201804931, "learning_rate": 2.0484274460590702e-07, "loss": 0.0967, "step": 32840 }, { "epoch": 3.84, "grad_norm": 0.7294917106704344, "learning_rate": 2.0338000671163148e-07, "loss": 0.0931, "step": 32845 }, { "epoch": 3.84, "grad_norm": 0.735366965535307, "learning_rate": 2.0192248876560738e-07, "loss": 0.1015, "step": 32850 }, { "epoch": 3.84, "grad_norm": 0.695523670843052, "learning_rate": 2.0047019107465592e-07, "loss": 0.0972, "step": 32855 }, { "epoch": 3.84, "grad_norm": 0.717702936640991, "learning_rate": 1.990231139445048e-07, "loss": 0.1001, "step": 32860 }, { "epoch": 3.84, "grad_norm": 0.6931280547082822, "learning_rate": 1.9758125767977698e-07, "loss": 0.12, "step": 32865 }, { "epoch": 3.84, "grad_norm": 0.920092984059423, "learning_rate": 1.9614462258399636e-07, "loss": 0.0989, "step": 32870 }, { "epoch": 3.84, "grad_norm": 0.7544529122596434, "learning_rate": 1.9471320895959044e-07, "loss": 0.0813, "step": 32875 }, { "epoch": 3.84, "grad_norm": 0.6353310379459537, "learning_rate": 1.9328701710788766e-07, "loss": 0.1283, "step": 32880 }, { "epoch": 3.84, "grad_norm": 0.6008702900711697, "learning_rate": 1.918660473291145e-07, "loss": 0.097, "step": 32885 }, { "epoch": 3.84, "grad_norm": 0.8212571569984326, "learning_rate": 1.9045029992240394e-07, "loss": 0.109, "step": 32890 }, { "epoch": 3.84, "grad_norm": 0.6573232738310341, "learning_rate": 1.890397751857814e-07, "loss": 0.1119, "step": 32895 }, { "epoch": 3.84, "grad_norm": 0.7396375184239369, "learning_rate": 1.8763447341618166e-07, "loss": 0.0988, "step": 32900 }, { "epoch": 3.84, "grad_norm": 0.6706843706478215, "learning_rate": 1.8623439490943196e-07, "loss": 0.0977, "step": 32905 }, { "epoch": 3.85, "grad_norm": 0.6518495048785922, "learning_rate": 1.8483953996026315e-07, "loss": 0.0894, "step": 32910 }, { "epoch": 3.85, "grad_norm": 0.853311570942987, "learning_rate": 1.8344990886230707e-07, "loss": 0.1016, "step": 32915 }, { "epoch": 3.85, "grad_norm": 0.6822120818833445, "learning_rate": 1.8206550190809913e-07, "loss": 0.0849, "step": 32920 }, { "epoch": 3.85, "grad_norm": 0.808763867838551, "learning_rate": 1.8068631938906455e-07, "loss": 0.1065, "step": 32925 }, { "epoch": 3.85, "grad_norm": 0.6777801634803904, "learning_rate": 1.793123615955461e-07, "loss": 0.121, "step": 32930 }, { "epoch": 3.85, "grad_norm": 0.5216664008856158, "learning_rate": 1.7794362881676795e-07, "loss": 0.1038, "step": 32935 }, { "epoch": 3.85, "grad_norm": 0.733465640691882, "learning_rate": 1.7658012134086355e-07, "loss": 0.1186, "step": 32940 }, { "epoch": 3.85, "grad_norm": 0.6690813153046264, "learning_rate": 1.7522183945486715e-07, "loss": 0.0959, "step": 32945 }, { "epoch": 3.85, "grad_norm": 0.6527260330200685, "learning_rate": 1.7386878344471113e-07, "loss": 0.0969, "step": 32950 }, { "epoch": 3.85, "grad_norm": 0.7548421301919747, "learning_rate": 1.725209535952288e-07, "loss": 0.1092, "step": 32955 }, { "epoch": 3.85, "grad_norm": 0.7434471422746521, "learning_rate": 1.7117835019014873e-07, "loss": 0.1014, "step": 32960 }, { "epoch": 3.85, "grad_norm": 0.7781774749965156, "learning_rate": 1.698409735121087e-07, "loss": 0.1114, "step": 32965 }, { "epoch": 3.85, "grad_norm": 0.8529976801774993, "learning_rate": 1.6850882384263633e-07, "loss": 0.13, "step": 32970 }, { "epoch": 3.85, "grad_norm": 0.6729666114275548, "learning_rate": 1.6718190146216561e-07, "loss": 0.1044, "step": 32975 }, { "epoch": 3.85, "grad_norm": 0.820076386360636, "learning_rate": 1.6586020665002588e-07, "loss": 0.1169, "step": 32980 }, { "epoch": 3.85, "grad_norm": 0.5692615674621903, "learning_rate": 1.6454373968444736e-07, "loss": 0.1005, "step": 32985 }, { "epoch": 3.85, "grad_norm": 0.7497492165580306, "learning_rate": 1.6323250084256115e-07, "loss": 0.112, "step": 32990 }, { "epoch": 3.86, "grad_norm": 0.6672835548870172, "learning_rate": 1.619264904003992e-07, "loss": 0.1029, "step": 32995 }, { "epoch": 3.86, "grad_norm": 1.0023162133682435, "learning_rate": 1.6062570863288608e-07, "loss": 0.0922, "step": 33000 }, { "epoch": 3.86, "grad_norm": 0.7626385054677094, "learning_rate": 1.593301558138527e-07, "loss": 0.124, "step": 33005 }, { "epoch": 3.86, "grad_norm": 0.5707166728478943, "learning_rate": 1.5803983221602257e-07, "loss": 0.0954, "step": 33010 }, { "epoch": 3.86, "grad_norm": 0.6640522393751017, "learning_rate": 1.5675473811102848e-07, "loss": 0.117, "step": 33015 }, { "epoch": 3.86, "grad_norm": 0.7930275495634006, "learning_rate": 1.5547487376939007e-07, "loss": 0.1109, "step": 33020 }, { "epoch": 3.86, "grad_norm": 0.6476419683914998, "learning_rate": 1.542002394605363e-07, "loss": 0.1042, "step": 33025 }, { "epoch": 3.86, "grad_norm": 1.2257779445973738, "learning_rate": 1.529308354527914e-07, "loss": 0.1506, "step": 33030 }, { "epoch": 3.86, "grad_norm": 0.8766695120797283, "learning_rate": 1.51666662013375e-07, "loss": 0.1324, "step": 33035 }, { "epoch": 3.86, "grad_norm": 0.7746484476213903, "learning_rate": 1.5040771940841024e-07, "loss": 0.1222, "step": 33040 }, { "epoch": 3.86, "grad_norm": 0.7162793865115763, "learning_rate": 1.4915400790292133e-07, "loss": 0.1192, "step": 33045 }, { "epoch": 3.86, "grad_norm": 0.7436189803687565, "learning_rate": 1.4790552776081935e-07, "loss": 0.1252, "step": 33050 }, { "epoch": 3.86, "grad_norm": 0.512078692715849, "learning_rate": 1.4666227924493014e-07, "loss": 0.0928, "step": 33055 }, { "epoch": 3.86, "grad_norm": 0.6412273309909928, "learning_rate": 1.454242626169694e-07, "loss": 0.0957, "step": 33060 }, { "epoch": 3.86, "grad_norm": 0.7453721356989709, "learning_rate": 1.4419147813754808e-07, "loss": 0.0928, "step": 33065 }, { "epoch": 3.86, "grad_norm": 0.6044719042014551, "learning_rate": 1.4296392606618359e-07, "loss": 0.0829, "step": 33070 }, { "epoch": 3.86, "grad_norm": 0.6579268397350826, "learning_rate": 1.4174160666128865e-07, "loss": 0.0967, "step": 33075 }, { "epoch": 3.86, "grad_norm": 0.6337095541512262, "learning_rate": 1.4052452018017415e-07, "loss": 0.0898, "step": 33080 }, { "epoch": 3.87, "grad_norm": 0.6854527863497736, "learning_rate": 1.3931266687904897e-07, "loss": 0.1107, "step": 33085 }, { "epoch": 3.87, "grad_norm": 0.8285005601543393, "learning_rate": 1.381060470130202e-07, "loss": 0.0973, "step": 33090 }, { "epoch": 3.87, "grad_norm": 0.7835418171330808, "learning_rate": 1.3690466083609577e-07, "loss": 0.1163, "step": 33095 }, { "epoch": 3.87, "grad_norm": 0.7789627426880127, "learning_rate": 1.3570850860118166e-07, "loss": 0.1078, "step": 33100 }, { "epoch": 3.87, "grad_norm": 0.8177357433154049, "learning_rate": 1.3451759056007373e-07, "loss": 0.1, "step": 33105 }, { "epoch": 3.87, "grad_norm": 0.6553833711563116, "learning_rate": 1.3333190696347697e-07, "loss": 0.1216, "step": 33110 }, { "epoch": 3.87, "grad_norm": 0.8986849612898306, "learning_rate": 1.3215145806099172e-07, "loss": 0.0992, "step": 33115 }, { "epoch": 3.87, "grad_norm": 0.5771019353697012, "learning_rate": 1.3097624410111364e-07, "loss": 0.1003, "step": 33120 }, { "epoch": 3.87, "grad_norm": 0.6467013077509892, "learning_rate": 1.2980626533123652e-07, "loss": 0.0971, "step": 33125 }, { "epoch": 3.87, "grad_norm": 0.6947407302025335, "learning_rate": 1.28641521997655e-07, "loss": 0.1084, "step": 33130 }, { "epoch": 3.87, "grad_norm": 0.6018842177853012, "learning_rate": 1.2748201434555628e-07, "loss": 0.0741, "step": 33135 }, { "epoch": 3.87, "grad_norm": 0.7536956818890865, "learning_rate": 1.2632774261903403e-07, "loss": 0.1032, "step": 33140 }, { "epoch": 3.87, "grad_norm": 0.7462745606244382, "learning_rate": 1.2517870706106883e-07, "loss": 0.089, "step": 33145 }, { "epoch": 3.87, "grad_norm": 0.6993265220807361, "learning_rate": 1.2403490791354776e-07, "loss": 0.1259, "step": 33150 }, { "epoch": 3.87, "grad_norm": 0.7986671695766824, "learning_rate": 1.2289634541725325e-07, "loss": 0.1147, "step": 33155 }, { "epoch": 3.87, "grad_norm": 0.8410296199753822, "learning_rate": 1.21763019811863e-07, "loss": 0.1169, "step": 33160 }, { "epoch": 3.87, "grad_norm": 0.8784415726983966, "learning_rate": 1.2063493133595837e-07, "loss": 0.1129, "step": 33165 }, { "epoch": 3.88, "grad_norm": 0.7451154941953951, "learning_rate": 1.19512080227005e-07, "loss": 0.1062, "step": 33170 }, { "epoch": 3.88, "grad_norm": 0.7734901166213242, "learning_rate": 1.1839446672138043e-07, "loss": 0.1009, "step": 33175 }, { "epoch": 3.88, "grad_norm": 0.7809789727997014, "learning_rate": 1.1728209105435484e-07, "loss": 0.1001, "step": 33180 }, { "epoch": 3.88, "grad_norm": 0.6281136783223772, "learning_rate": 1.1617495346009367e-07, "loss": 0.1, "step": 33185 }, { "epoch": 3.88, "grad_norm": 0.7723672394865146, "learning_rate": 1.1507305417166048e-07, "loss": 0.0941, "step": 33190 }, { "epoch": 3.88, "grad_norm": 0.7205157090910749, "learning_rate": 1.1397639342101696e-07, "loss": 0.114, "step": 33195 }, { "epoch": 3.88, "grad_norm": 0.6665552215993625, "learning_rate": 1.1288497143902288e-07, "loss": 0.0916, "step": 33200 }, { "epoch": 3.88, "grad_norm": 0.8077242572567152, "learning_rate": 1.1179878845543056e-07, "loss": 0.1105, "step": 33205 }, { "epoch": 3.88, "grad_norm": 0.725589908230325, "learning_rate": 1.10717844698896e-07, "loss": 0.1069, "step": 33210 }, { "epoch": 3.88, "grad_norm": 0.6961444627918473, "learning_rate": 1.0964214039696774e-07, "loss": 0.1029, "step": 33215 }, { "epoch": 3.88, "grad_norm": 0.7568406385243643, "learning_rate": 1.0857167577609517e-07, "loss": 0.0901, "step": 33220 }, { "epoch": 3.88, "grad_norm": 0.9051158833564148, "learning_rate": 1.0750645106162028e-07, "loss": 0.1322, "step": 33225 }, { "epoch": 3.88, "grad_norm": 0.8357552113691215, "learning_rate": 1.0644646647778312e-07, "loss": 0.1192, "step": 33230 }, { "epoch": 3.88, "grad_norm": 0.8965697828998737, "learning_rate": 1.0539172224772465e-07, "loss": 0.1306, "step": 33235 }, { "epoch": 3.88, "grad_norm": 0.8219282601721526, "learning_rate": 1.043422185934756e-07, "loss": 0.0919, "step": 33240 }, { "epoch": 3.88, "grad_norm": 0.8386350469888751, "learning_rate": 1.0329795573596756e-07, "loss": 0.1242, "step": 33245 }, { "epoch": 3.88, "grad_norm": 0.6588818579812464, "learning_rate": 1.0225893389503304e-07, "loss": 0.0925, "step": 33250 }, { "epoch": 3.89, "grad_norm": 0.6571316830726425, "learning_rate": 1.0122515328939709e-07, "loss": 0.0848, "step": 33255 }, { "epoch": 3.89, "grad_norm": 0.7358825850984076, "learning_rate": 1.0019661413667725e-07, "loss": 0.0879, "step": 33260 }, { "epoch": 3.89, "grad_norm": 0.6644260693294591, "learning_rate": 9.917331665339202e-08, "loss": 0.0966, "step": 33265 }, { "epoch": 3.89, "grad_norm": 0.6325696920490801, "learning_rate": 9.815526105495799e-08, "loss": 0.0727, "step": 33270 }, { "epoch": 3.89, "grad_norm": 0.7947258279183854, "learning_rate": 9.714244755568702e-08, "loss": 0.1053, "step": 33275 }, { "epoch": 3.89, "grad_norm": 0.9447115517050975, "learning_rate": 9.613487636878915e-08, "loss": 0.1182, "step": 33280 }, { "epoch": 3.89, "grad_norm": 0.6856283036419851, "learning_rate": 9.513254770636137e-08, "loss": 0.086, "step": 33285 }, { "epoch": 3.89, "grad_norm": 0.5332081866270109, "learning_rate": 9.413546177941269e-08, "loss": 0.0799, "step": 33290 }, { "epoch": 3.89, "grad_norm": 0.5446089743953006, "learning_rate": 9.314361879783351e-08, "loss": 0.0836, "step": 33295 }, { "epoch": 3.89, "grad_norm": 0.7627020285780585, "learning_rate": 9.215701897042072e-08, "loss": 0.1232, "step": 33300 }, { "epoch": 3.89, "grad_norm": 2.257405302459278, "learning_rate": 9.117566250486098e-08, "loss": 0.0944, "step": 33305 }, { "epoch": 3.89, "grad_norm": 0.7979373104809461, "learning_rate": 9.019954960774457e-08, "loss": 0.1111, "step": 33310 }, { "epoch": 3.89, "grad_norm": 0.6369170698961653, "learning_rate": 8.92286804845488e-08, "loss": 0.1126, "step": 33315 }, { "epoch": 3.89, "grad_norm": 0.6965647196067069, "learning_rate": 8.82630553396574e-08, "loss": 0.1002, "step": 33320 }, { "epoch": 3.89, "grad_norm": 0.7328705376593999, "learning_rate": 8.730267437633832e-08, "loss": 0.1044, "step": 33325 }, { "epoch": 3.89, "grad_norm": 0.7695604713234119, "learning_rate": 8.634753779676596e-08, "loss": 0.1126, "step": 33330 }, { "epoch": 3.89, "grad_norm": 0.7168317718667033, "learning_rate": 8.539764580200726e-08, "loss": 0.1076, "step": 33335 }, { "epoch": 3.9, "grad_norm": 0.6860980668540384, "learning_rate": 8.445299859201894e-08, "loss": 0.0887, "step": 33340 }, { "epoch": 3.9, "grad_norm": 0.7178454389519474, "learning_rate": 8.351359636566414e-08, "loss": 0.1001, "step": 33345 }, { "epoch": 3.9, "grad_norm": 0.6627330711064285, "learning_rate": 8.257943932069856e-08, "loss": 0.1023, "step": 33350 }, { "epoch": 3.9, "grad_norm": 0.6314070593844423, "learning_rate": 8.165052765376491e-08, "loss": 0.0941, "step": 33355 }, { "epoch": 3.9, "grad_norm": 1.2514996973935248, "learning_rate": 8.07268615604151e-08, "loss": 0.1229, "step": 33360 }, { "epoch": 3.9, "grad_norm": 0.8448608013988977, "learning_rate": 7.980844123508524e-08, "loss": 0.1244, "step": 33365 }, { "epoch": 3.9, "grad_norm": 0.5392703455846917, "learning_rate": 7.88952668711207e-08, "loss": 0.1034, "step": 33370 }, { "epoch": 3.9, "grad_norm": 0.5959426998468517, "learning_rate": 7.798733866074548e-08, "loss": 0.0821, "step": 33375 }, { "epoch": 3.9, "grad_norm": 1.0519091864583134, "learning_rate": 7.708465679509003e-08, "loss": 0.1207, "step": 33380 }, { "epoch": 3.9, "grad_norm": 0.6156119003200068, "learning_rate": 7.61872214641829e-08, "loss": 0.0956, "step": 33385 }, { "epoch": 3.9, "grad_norm": 0.8278157176081044, "learning_rate": 7.529503285693962e-08, "loss": 0.0971, "step": 33390 }, { "epoch": 3.9, "grad_norm": 0.8911182785867745, "learning_rate": 7.440809116117664e-08, "loss": 0.1542, "step": 33395 }, { "epoch": 3.9, "grad_norm": 0.8142086442637851, "learning_rate": 7.352639656360294e-08, "loss": 0.1412, "step": 33400 }, { "epoch": 3.9, "grad_norm": 3.9884926239661724, "learning_rate": 7.26499492498256e-08, "loss": 0.1485, "step": 33405 }, { "epoch": 3.9, "grad_norm": 0.6543922954498517, "learning_rate": 7.177874940434703e-08, "loss": 0.1086, "step": 33410 }, { "epoch": 3.9, "grad_norm": 0.9101625988160381, "learning_rate": 7.091279721055943e-08, "loss": 0.1162, "step": 33415 }, { "epoch": 3.9, "grad_norm": 0.7435678301335883, "learning_rate": 7.005209285076141e-08, "loss": 0.1384, "step": 33420 }, { "epoch": 3.91, "grad_norm": 0.6626040543619331, "learning_rate": 6.919663650613862e-08, "loss": 0.0875, "step": 33425 }, { "epoch": 3.91, "grad_norm": 0.7360348438866782, "learning_rate": 6.834642835676919e-08, "loss": 0.0923, "step": 33430 }, { "epoch": 3.91, "grad_norm": 0.722797405398677, "learning_rate": 6.750146858163498e-08, "loss": 0.0983, "step": 33435 }, { "epoch": 3.91, "grad_norm": 0.7893377507024466, "learning_rate": 6.666175735860758e-08, "loss": 0.1026, "step": 33440 }, { "epoch": 3.91, "grad_norm": 0.7249850498666216, "learning_rate": 6.582729486445393e-08, "loss": 0.1107, "step": 33445 }, { "epoch": 3.91, "grad_norm": 0.8121679770889151, "learning_rate": 6.49980812748363e-08, "loss": 0.1138, "step": 33450 }, { "epoch": 3.91, "grad_norm": 0.6601798958973863, "learning_rate": 6.417411676431784e-08, "loss": 0.0777, "step": 33455 }, { "epoch": 3.91, "grad_norm": 0.607472502744155, "learning_rate": 6.335540150634589e-08, "loss": 0.0795, "step": 33460 }, { "epoch": 3.91, "grad_norm": 0.6922592549521205, "learning_rate": 6.254193567327427e-08, "loss": 0.1412, "step": 33465 }, { "epoch": 3.91, "grad_norm": 0.6649560521868016, "learning_rate": 6.173371943634099e-08, "loss": 0.1155, "step": 33470 }, { "epoch": 3.91, "grad_norm": 0.6744963317035093, "learning_rate": 6.093075296568774e-08, "loss": 0.0819, "step": 33475 }, { "epoch": 3.91, "grad_norm": 0.861428374146387, "learning_rate": 6.013303643034319e-08, "loss": 0.0977, "step": 33480 }, { "epoch": 3.91, "grad_norm": 0.8426083832818959, "learning_rate": 5.934056999823967e-08, "loss": 0.0984, "step": 33485 }, { "epoch": 3.91, "grad_norm": 1.3346263487238814, "learning_rate": 5.8553353836196535e-08, "loss": 0.0991, "step": 33490 }, { "epoch": 3.91, "grad_norm": 0.6448161012973965, "learning_rate": 5.777138810993399e-08, "loss": 0.1077, "step": 33495 }, { "epoch": 3.91, "grad_norm": 0.6667483839701805, "learning_rate": 5.699467298405925e-08, "loss": 0.0831, "step": 33500 }, { "epoch": 3.91, "grad_norm": 0.8138717103727736, "learning_rate": 5.622320862208319e-08, "loss": 0.0894, "step": 33505 }, { "epoch": 3.92, "grad_norm": 0.6845526280905971, "learning_rate": 5.5456995186406455e-08, "loss": 0.1004, "step": 33510 }, { "epoch": 3.92, "grad_norm": 0.7395078166880183, "learning_rate": 5.469603283832225e-08, "loss": 0.0987, "step": 33515 }, { "epoch": 3.92, "grad_norm": 0.6214036595764711, "learning_rate": 5.3940321738024656e-08, "loss": 0.0847, "step": 33520 }, { "epoch": 3.92, "grad_norm": 0.6401316669431774, "learning_rate": 5.3189862044597526e-08, "loss": 0.0926, "step": 33525 }, { "epoch": 3.92, "grad_norm": 0.6217032885242302, "learning_rate": 5.2444653916020045e-08, "loss": 0.095, "step": 33530 }, { "epoch": 3.92, "grad_norm": 0.7248023953012112, "learning_rate": 5.170469750916396e-08, "loss": 0.1178, "step": 33535 }, { "epoch": 3.92, "grad_norm": 0.8369934764451854, "learning_rate": 5.096999297980465e-08, "loss": 0.0985, "step": 33540 }, { "epoch": 3.92, "grad_norm": 0.6431345849655774, "learning_rate": 5.0240540482596187e-08, "loss": 0.1211, "step": 33545 }, { "epoch": 3.92, "grad_norm": 1.026422262899042, "learning_rate": 4.9516340171104626e-08, "loss": 0.1182, "step": 33550 }, { "epoch": 3.92, "grad_norm": 0.625611866226964, "learning_rate": 4.879739219777746e-08, "loss": 0.0848, "step": 33555 }, { "epoch": 3.92, "grad_norm": 0.6293556494239031, "learning_rate": 4.808369671396029e-08, "loss": 0.081, "step": 33560 }, { "epoch": 3.92, "grad_norm": 0.7686242653350159, "learning_rate": 4.737525386989405e-08, "loss": 0.1037, "step": 33565 }, { "epoch": 3.92, "grad_norm": 0.7330377126756894, "learning_rate": 4.667206381471223e-08, "loss": 0.0987, "step": 33570 }, { "epoch": 3.92, "grad_norm": 0.6797606005008716, "learning_rate": 4.597412669644641e-08, "loss": 0.1156, "step": 33575 }, { "epoch": 3.92, "grad_norm": 0.7835310593315485, "learning_rate": 4.5281442662017945e-08, "loss": 0.1243, "step": 33580 }, { "epoch": 3.92, "grad_norm": 0.8678003874081279, "learning_rate": 4.459401185724355e-08, "loss": 0.1289, "step": 33585 }, { "epoch": 3.92, "grad_norm": 0.6305765323751037, "learning_rate": 4.3911834426838e-08, "loss": 0.0855, "step": 33590 }, { "epoch": 3.93, "grad_norm": 0.663003353273071, "learning_rate": 4.323491051440309e-08, "loss": 0.1084, "step": 33595 }, { "epoch": 3.93, "grad_norm": 0.7415541684729337, "learning_rate": 4.256324026243874e-08, "loss": 0.1085, "step": 33600 }, { "epoch": 3.93, "grad_norm": 0.7920274582707633, "learning_rate": 4.1896823812340166e-08, "loss": 0.1192, "step": 33605 }, { "epoch": 3.93, "grad_norm": 0.7914292127569258, "learning_rate": 4.1235661304395154e-08, "loss": 0.0941, "step": 33610 }, { "epoch": 3.93, "grad_norm": 0.6800069258927826, "learning_rate": 4.057975287778404e-08, "loss": 0.0962, "step": 33615 }, { "epoch": 3.93, "grad_norm": 0.662411293180192, "learning_rate": 3.9929098670582476e-08, "loss": 0.1008, "step": 33620 }, { "epoch": 3.93, "grad_norm": 0.5597369403391514, "learning_rate": 3.9283698819758684e-08, "loss": 0.1129, "step": 33625 }, { "epoch": 3.93, "grad_norm": 0.7887612776867691, "learning_rate": 3.8643553461178985e-08, "loss": 0.1054, "step": 33630 }, { "epoch": 3.93, "grad_norm": 0.6394196479168663, "learning_rate": 3.8008662729599464e-08, "loss": 0.0704, "step": 33635 }, { "epoch": 3.93, "grad_norm": 0.6397274259824062, "learning_rate": 3.737902675867155e-08, "loss": 0.0939, "step": 33640 }, { "epoch": 3.93, "grad_norm": 0.7653282387371841, "learning_rate": 3.6754645680942e-08, "loss": 0.0771, "step": 33645 }, { "epoch": 3.93, "grad_norm": 0.8695060810047941, "learning_rate": 3.613551962784456e-08, "loss": 0.1264, "step": 33650 }, { "epoch": 3.93, "grad_norm": 0.7124541455814115, "learning_rate": 3.552164872971664e-08, "loss": 0.1311, "step": 33655 }, { "epoch": 3.93, "grad_norm": 0.7483825871614354, "learning_rate": 3.491303311577987e-08, "loss": 0.1237, "step": 33660 }, { "epoch": 3.93, "grad_norm": 0.7314402274775395, "learning_rate": 3.430967291415954e-08, "loss": 0.1047, "step": 33665 }, { "epoch": 3.93, "grad_norm": 0.784191002070497, "learning_rate": 3.3711568251865166e-08, "loss": 0.1036, "step": 33670 }, { "epoch": 3.93, "grad_norm": 0.7002832022753681, "learning_rate": 3.311871925480714e-08, "loss": 0.089, "step": 33675 }, { "epoch": 3.94, "grad_norm": 0.7157808569863947, "learning_rate": 3.253112604778286e-08, "loss": 0.0944, "step": 33680 }, { "epoch": 3.94, "grad_norm": 0.7690302242905498, "learning_rate": 3.194878875449059e-08, "loss": 0.1001, "step": 33685 }, { "epoch": 3.94, "grad_norm": 0.7414198306593133, "learning_rate": 3.1371707497512836e-08, "loss": 0.1064, "step": 33690 }, { "epoch": 3.94, "grad_norm": 0.7107395518265439, "learning_rate": 3.079988239833853e-08, "loss": 0.1095, "step": 33695 }, { "epoch": 3.94, "grad_norm": 0.7577010613976696, "learning_rate": 3.0233313577338055e-08, "loss": 0.0867, "step": 33700 }, { "epoch": 3.94, "grad_norm": 0.9477436716739325, "learning_rate": 2.967200115378266e-08, "loss": 0.1254, "step": 33705 }, { "epoch": 3.94, "grad_norm": 0.7070504141193993, "learning_rate": 2.911594524583061e-08, "loss": 0.1116, "step": 33710 }, { "epoch": 3.94, "grad_norm": 0.7098825236377333, "learning_rate": 2.856514597054383e-08, "loss": 0.1208, "step": 33715 }, { "epoch": 3.94, "grad_norm": 0.737516373092627, "learning_rate": 2.801960344386567e-08, "loss": 0.1021, "step": 33720 }, { "epoch": 3.94, "grad_norm": 0.7687391928011947, "learning_rate": 2.747931778064039e-08, "loss": 0.0962, "step": 33725 }, { "epoch": 3.94, "grad_norm": 0.6901272284902701, "learning_rate": 2.6944289094602005e-08, "loss": 0.119, "step": 33730 }, { "epoch": 3.94, "grad_norm": 0.6559284554910177, "learning_rate": 2.641451749838264e-08, "loss": 0.1188, "step": 33735 }, { "epoch": 3.94, "grad_norm": 0.6720122791796105, "learning_rate": 2.58900031035042e-08, "loss": 0.1079, "step": 33740 }, { "epoch": 3.94, "grad_norm": 0.6682742709753189, "learning_rate": 2.5370746020381142e-08, "loss": 0.0857, "step": 33745 }, { "epoch": 3.94, "grad_norm": 0.7208439681675699, "learning_rate": 2.4856746358326023e-08, "loss": 0.1074, "step": 33750 }, { "epoch": 3.94, "grad_norm": 0.815113495890879, "learning_rate": 2.4348004225535627e-08, "loss": 0.1263, "step": 33755 }, { "epoch": 3.94, "grad_norm": 0.6423649019025683, "learning_rate": 2.3844519729107616e-08, "loss": 0.0894, "step": 33760 }, { "epoch": 3.94, "grad_norm": 0.7536765704891965, "learning_rate": 2.3346292975032212e-08, "loss": 0.1248, "step": 33765 }, { "epoch": 3.95, "grad_norm": 0.7319238420791185, "learning_rate": 2.2853324068192184e-08, "loss": 0.1095, "step": 33770 }, { "epoch": 3.95, "grad_norm": 0.5632626053157793, "learning_rate": 2.2365613112360074e-08, "loss": 0.0788, "step": 33775 }, { "epoch": 3.95, "grad_norm": 0.6740306505306219, "learning_rate": 2.1883160210203756e-08, "loss": 0.1018, "step": 33780 }, { "epoch": 3.95, "grad_norm": 0.7522175455704624, "learning_rate": 2.1405965463289212e-08, "loss": 0.1205, "step": 33785 }, { "epoch": 3.95, "grad_norm": 0.784204697416689, "learning_rate": 2.0934028972063867e-08, "loss": 0.1062, "step": 33790 }, { "epoch": 3.95, "grad_norm": 0.6753449597633256, "learning_rate": 2.0467350835881583e-08, "loss": 0.1027, "step": 33795 }, { "epoch": 3.95, "grad_norm": 0.6617045140518762, "learning_rate": 2.000593115298044e-08, "loss": 0.0929, "step": 33800 }, { "epoch": 3.95, "grad_norm": 0.6234472419640817, "learning_rate": 1.9549770020491076e-08, "loss": 0.0814, "step": 33805 }, { "epoch": 3.95, "grad_norm": 0.8697063755447425, "learning_rate": 1.9098867534447785e-08, "loss": 0.1052, "step": 33810 }, { "epoch": 3.95, "grad_norm": 0.7352054725543583, "learning_rate": 1.865322378976353e-08, "loss": 0.1034, "step": 33815 }, { "epoch": 3.95, "grad_norm": 0.6781023912643896, "learning_rate": 1.821283888025216e-08, "loss": 0.0973, "step": 33820 }, { "epoch": 3.95, "grad_norm": 0.9892892763916536, "learning_rate": 1.7777712898622845e-08, "loss": 0.1115, "step": 33825 }, { "epoch": 3.95, "grad_norm": 0.6928806081834832, "learning_rate": 1.7347845936468987e-08, "loss": 0.0986, "step": 33830 }, { "epoch": 3.95, "grad_norm": 0.7609546731271315, "learning_rate": 1.692323808428764e-08, "loss": 0.118, "step": 33835 }, { "epoch": 3.95, "grad_norm": 0.7410730759366381, "learning_rate": 1.6503889431460085e-08, "loss": 0.1541, "step": 33840 }, { "epoch": 3.95, "grad_norm": 0.7218414487697394, "learning_rate": 1.608980006626015e-08, "loss": 0.1066, "step": 33845 }, { "epoch": 3.95, "grad_norm": 0.8822990443595471, "learning_rate": 1.568097007586533e-08, "loss": 0.1025, "step": 33850 }, { "epoch": 3.96, "grad_norm": 0.8202464436402083, "learning_rate": 1.527739954633456e-08, "loss": 0.0982, "step": 33855 }, { "epoch": 3.96, "grad_norm": 0.7192033692752401, "learning_rate": 1.4879088562624877e-08, "loss": 0.1053, "step": 33860 }, { "epoch": 3.96, "grad_norm": 0.705012222162766, "learning_rate": 1.4486037208583102e-08, "loss": 0.1185, "step": 33865 }, { "epoch": 3.96, "grad_norm": 0.7513719561032955, "learning_rate": 1.4098245566951385e-08, "loss": 0.1005, "step": 33870 }, { "epoch": 3.96, "grad_norm": 0.8403034307299907, "learning_rate": 1.371571371936442e-08, "loss": 0.1173, "step": 33875 }, { "epoch": 3.96, "grad_norm": 0.8237935847068727, "learning_rate": 1.3338441746346687e-08, "loss": 0.1038, "step": 33880 }, { "epoch": 3.96, "grad_norm": 0.6510598233606743, "learning_rate": 1.2966429727323537e-08, "loss": 0.0766, "step": 33885 }, { "epoch": 3.96, "grad_norm": 0.7207365751696249, "learning_rate": 1.2599677740604554e-08, "loss": 0.1144, "step": 33890 }, { "epoch": 3.96, "grad_norm": 0.6489953454447048, "learning_rate": 1.2238185863391871e-08, "loss": 0.127, "step": 33895 }, { "epoch": 3.96, "grad_norm": 0.6161927058919678, "learning_rate": 1.1881954171785725e-08, "loss": 0.0904, "step": 33900 }, { "epoch": 3.96, "grad_norm": 0.6284960074667723, "learning_rate": 1.1530982740778906e-08, "loss": 0.093, "step": 33905 }, { "epoch": 3.96, "grad_norm": 0.7343434090453264, "learning_rate": 1.1185271644251206e-08, "loss": 0.1004, "step": 33910 }, { "epoch": 3.96, "grad_norm": 0.6523001321962878, "learning_rate": 1.0844820954980516e-08, "loss": 0.0959, "step": 33915 }, { "epoch": 3.96, "grad_norm": 0.683588832166793, "learning_rate": 1.0509630744634513e-08, "loss": 0.099, "step": 33920 }, { "epoch": 3.96, "grad_norm": 0.7001910580164512, "learning_rate": 1.0179701083770643e-08, "loss": 0.0876, "step": 33925 }, { "epoch": 3.96, "grad_norm": 0.65851734533734, "learning_rate": 9.85503204185001e-09, "loss": 0.075, "step": 33930 }, { "epoch": 3.96, "grad_norm": 0.7039173929392916, "learning_rate": 9.535623687212391e-09, "loss": 0.0772, "step": 33935 }, { "epoch": 3.97, "grad_norm": 0.7712174304185561, "learning_rate": 9.221476087101222e-09, "loss": 0.1143, "step": 33940 }, { "epoch": 3.97, "grad_norm": 0.7134411916766169, "learning_rate": 8.91258930764416e-09, "loss": 0.089, "step": 33945 }, { "epoch": 3.97, "grad_norm": 0.6945707367005447, "learning_rate": 8.608963413866966e-09, "loss": 0.1038, "step": 33950 }, { "epoch": 3.97, "grad_norm": 0.6810045447227157, "learning_rate": 8.310598469687958e-09, "loss": 0.0833, "step": 33955 }, { "epoch": 3.97, "grad_norm": 1.1836911040275282, "learning_rate": 8.017494537909676e-09, "loss": 0.1162, "step": 33960 }, { "epoch": 3.97, "grad_norm": 0.7143943769664722, "learning_rate": 7.72965168024109e-09, "loss": 0.0786, "step": 33965 }, { "epoch": 3.97, "grad_norm": 0.7195874144744634, "learning_rate": 7.447069957272624e-09, "loss": 0.0971, "step": 33970 }, { "epoch": 3.97, "grad_norm": 0.6224476831249508, "learning_rate": 7.1697494284900245e-09, "loss": 0.0873, "step": 33975 }, { "epoch": 3.97, "grad_norm": 0.6470236046022421, "learning_rate": 6.897690152271596e-09, "loss": 0.0964, "step": 33980 }, { "epoch": 3.97, "grad_norm": 0.604727612901551, "learning_rate": 6.630892185890969e-09, "loss": 0.0898, "step": 33985 }, { "epoch": 3.97, "grad_norm": 1.0460214148557467, "learning_rate": 6.369355585511549e-09, "loss": 0.1192, "step": 33990 }, { "epoch": 3.97, "grad_norm": 0.7126613373807706, "learning_rate": 6.113080406189298e-09, "loss": 0.111, "step": 33995 }, { "epoch": 3.97, "grad_norm": 0.7555909151058204, "learning_rate": 5.862066701872726e-09, "loss": 0.1019, "step": 34000 }, { "epoch": 3.97, "grad_norm": 0.7451304178457472, "learning_rate": 5.616314525402899e-09, "loss": 0.1142, "step": 34005 }, { "epoch": 3.97, "grad_norm": 0.6690123178253058, "learning_rate": 5.375823928510659e-09, "loss": 0.0772, "step": 34010 }, { "epoch": 3.97, "grad_norm": 0.6222457397145524, "learning_rate": 5.140594961827727e-09, "loss": 0.1098, "step": 34015 }, { "epoch": 3.97, "grad_norm": 0.7833964491785993, "learning_rate": 4.910627674867274e-09, "loss": 0.1043, "step": 34020 }, { "epoch": 3.98, "grad_norm": 0.6505869625534229, "learning_rate": 4.685922116040575e-09, "loss": 0.0757, "step": 34025 }, { "epoch": 3.98, "grad_norm": 0.8965448975155454, "learning_rate": 4.466478332651458e-09, "loss": 0.1063, "step": 34030 }, { "epoch": 3.98, "grad_norm": 0.7700486995481837, "learning_rate": 4.252296370893527e-09, "loss": 0.1088, "step": 34035 }, { "epoch": 3.98, "grad_norm": 0.7656239378842655, "learning_rate": 4.043376275855715e-09, "loss": 0.104, "step": 34040 }, { "epoch": 3.98, "grad_norm": 0.545221838353672, "learning_rate": 3.839718091519506e-09, "loss": 0.079, "step": 34045 }, { "epoch": 3.98, "grad_norm": 0.5706935403414807, "learning_rate": 3.641321860756164e-09, "loss": 0.0998, "step": 34050 }, { "epoch": 3.98, "grad_norm": 0.6710423199561348, "learning_rate": 3.4481876253267263e-09, "loss": 0.102, "step": 34055 }, { "epoch": 3.98, "grad_norm": 0.673991634828044, "learning_rate": 3.2603154258931122e-09, "loss": 0.1019, "step": 34060 }, { "epoch": 3.98, "grad_norm": 1.172901444146483, "learning_rate": 3.0777053019986903e-09, "loss": 0.1553, "step": 34065 }, { "epoch": 3.98, "grad_norm": 0.723033328228414, "learning_rate": 2.900357292090483e-09, "loss": 0.1211, "step": 34070 }, { "epoch": 3.98, "grad_norm": 0.8122987070651587, "learning_rate": 2.7282714334997405e-09, "loss": 0.0984, "step": 34075 }, { "epoch": 3.98, "grad_norm": 0.7597744751992892, "learning_rate": 2.5614477624530397e-09, "loss": 0.0995, "step": 34080 }, { "epoch": 3.98, "grad_norm": 0.6960762295450328, "learning_rate": 2.3998863140639594e-09, "loss": 0.1072, "step": 34085 }, { "epoch": 3.98, "grad_norm": 0.7525631690355659, "learning_rate": 2.2435871223497328e-09, "loss": 0.1076, "step": 34090 }, { "epoch": 3.98, "grad_norm": 0.7563564478825053, "learning_rate": 2.092550220209044e-09, "loss": 0.1279, "step": 34095 }, { "epoch": 3.98, "grad_norm": 0.9037530226918882, "learning_rate": 1.9467756394386804e-09, "loss": 0.1157, "step": 34100 }, { "epoch": 3.98, "grad_norm": 0.7520334279010105, "learning_rate": 1.8062634107224308e-09, "loss": 0.1144, "step": 34105 }, { "epoch": 3.99, "grad_norm": 0.7719614821902274, "learning_rate": 1.671013563644963e-09, "loss": 0.097, "step": 34110 }, { "epoch": 3.99, "grad_norm": 0.6740552218025032, "learning_rate": 1.5410261266723958e-09, "loss": 0.1128, "step": 34115 }, { "epoch": 3.99, "grad_norm": 0.7173180353504346, "learning_rate": 1.4163011271689508e-09, "loss": 0.0998, "step": 34120 }, { "epoch": 3.99, "grad_norm": 0.7838965630235181, "learning_rate": 1.2968385913941784e-09, "loss": 0.1062, "step": 34125 }, { "epoch": 3.99, "grad_norm": 0.9710404510554173, "learning_rate": 1.1826385444946297e-09, "loss": 0.1151, "step": 34130 }, { "epoch": 3.99, "grad_norm": 0.7472071809720408, "learning_rate": 1.0737010105094091e-09, "loss": 0.0923, "step": 34135 }, { "epoch": 3.99, "grad_norm": 0.6630474952740282, "learning_rate": 9.700260123729488e-10, "loss": 0.1119, "step": 34140 }, { "epoch": 3.99, "grad_norm": 0.755434665642974, "learning_rate": 8.716135719094576e-10, "loss": 0.1038, "step": 34145 }, { "epoch": 3.99, "grad_norm": 0.6704277119315952, "learning_rate": 7.784637098329217e-10, "loss": 0.0867, "step": 34150 }, { "epoch": 3.99, "grad_norm": 1.039383158977122, "learning_rate": 6.905764457554309e-10, "loss": 0.096, "step": 34155 }, { "epoch": 3.99, "grad_norm": 0.8253035942519326, "learning_rate": 6.079517981788519e-10, "loss": 0.1033, "step": 34160 }, { "epoch": 3.99, "grad_norm": 0.6901537550325304, "learning_rate": 5.305897844920527e-10, "loss": 0.0944, "step": 34165 }, { "epoch": 3.99, "grad_norm": 0.6065534322052565, "learning_rate": 4.584904209847807e-10, "loss": 0.0915, "step": 34170 }, { "epoch": 3.99, "grad_norm": 0.725501993545981, "learning_rate": 3.9165372283378467e-10, "loss": 0.1113, "step": 34175 }, { "epoch": 3.99, "grad_norm": 0.6751466793515113, "learning_rate": 3.300797041083659e-10, "loss": 0.0942, "step": 34180 }, { "epoch": 3.99, "grad_norm": 0.8102656387388302, "learning_rate": 2.737683777731537e-10, "loss": 0.1247, "step": 34185 }, { "epoch": 3.99, "grad_norm": 0.7669960411364124, "learning_rate": 2.227197556770033e-10, "loss": 0.0901, "step": 34190 }, { "epoch": 4.0, "grad_norm": 0.6535970616670373, "learning_rate": 1.7693384856964923e-10, "loss": 0.0972, "step": 34195 }, { "epoch": 4.0, "grad_norm": 0.7958982069060069, "learning_rate": 1.3641066608782728e-10, "loss": 0.1079, "step": 34200 }, { "epoch": 4.0, "grad_norm": 0.6170226473389493, "learning_rate": 1.0115021676360137e-10, "loss": 0.0906, "step": 34205 }, { "epoch": 4.0, "grad_norm": 0.7297000805729437, "learning_rate": 7.115250801881246e-11, "loss": 0.0991, "step": 34210 }, { "epoch": 4.0, "grad_norm": 0.6705272964389867, "learning_rate": 4.641754617062954e-11, "loss": 0.0744, "step": 34215 }, { "epoch": 4.0, "grad_norm": 0.7694244317658472, "learning_rate": 2.6945336423223054e-11, "loss": 0.133, "step": 34220 }, { "epoch": 4.0, "grad_norm": 0.7535135737427812, "learning_rate": 1.2735882873315952e-11, "loss": 0.142, "step": 34225 }, { "epoch": 4.0, "grad_norm": 0.731573104772695, "learning_rate": 3.7891885185104004e-12, "loss": 0.1157, "step": 34230 }, { "epoch": 4.0, "grad_norm": 0.7739159439442312, "learning_rate": 1.0525523785887004e-13, "loss": 0.112, "step": 34235 }, { "epoch": 4.0, "step": 34236, "total_flos": 3463540144865280.0, "train_loss": 0.4881485020316365, "train_runtime": 142779.1917, "train_samples_per_second": 0.959, "train_steps_per_second": 0.24 } ], "logging_steps": 5, "max_steps": 34236, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 8900, "total_flos": 3463540144865280.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }