{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998796485738356, "eval_steps": 500, "global_step": 6231, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00016046856821920005, "grad_norm": 9.7310152053833, "learning_rate": 1.0000000000000002e-06, "loss": 44.3516, "step": 1 }, { "epoch": 0.0003209371364384001, "grad_norm": 8.799237251281738, "learning_rate": 2.0000000000000003e-06, "loss": 44.4609, "step": 2 }, { "epoch": 0.0004814057046576002, "grad_norm": 9.908658981323242, "learning_rate": 3e-06, "loss": 44.4609, "step": 3 }, { "epoch": 0.0006418742728768002, "grad_norm": 8.212166786193848, "learning_rate": 4.000000000000001e-06, "loss": 44.5312, "step": 4 }, { "epoch": 0.0008023428410960003, "grad_norm": 11.298144340515137, "learning_rate": 5e-06, "loss": 44.3906, "step": 5 }, { "epoch": 0.0009628114093152004, "grad_norm": 8.43470287322998, "learning_rate": 6e-06, "loss": 44.3594, "step": 6 }, { "epoch": 0.0011232799775344005, "grad_norm": 8.777496337890625, "learning_rate": 7.000000000000001e-06, "loss": 44.5, "step": 7 }, { "epoch": 0.0012837485457536004, "grad_norm": 10.484176635742188, "learning_rate": 8.000000000000001e-06, "loss": 44.5312, "step": 8 }, { "epoch": 0.0014442171139728006, "grad_norm": 8.143372535705566, "learning_rate": 9e-06, "loss": 44.5234, "step": 9 }, { "epoch": 0.0016046856821920006, "grad_norm": 9.605515480041504, "learning_rate": 1e-05, "loss": 44.4531, "step": 10 }, { "epoch": 0.0017651542504112008, "grad_norm": 9.896804809570312, "learning_rate": 1.1000000000000001e-05, "loss": 44.5547, "step": 11 }, { "epoch": 0.0019256228186304008, "grad_norm": 9.673745155334473, "learning_rate": 1.2e-05, "loss": 44.3047, "step": 12 }, { "epoch": 0.002086091386849601, "grad_norm": 11.46737003326416, "learning_rate": 1.3000000000000001e-05, "loss": 44.2891, "step": 13 }, { "epoch": 0.002246559955068801, "grad_norm": 8.89222526550293, "learning_rate": 1.4000000000000001e-05, "loss": 44.4219, "step": 14 }, { "epoch": 0.002407028523288001, "grad_norm": 9.469602584838867, "learning_rate": 1.5e-05, "loss": 44.5, "step": 15 }, { "epoch": 0.002567497091507201, "grad_norm": 9.707489013671875, "learning_rate": 1.6000000000000003e-05, "loss": 44.3906, "step": 16 }, { "epoch": 0.0027279656597264013, "grad_norm": 9.139500617980957, "learning_rate": 1.7000000000000003e-05, "loss": 44.4141, "step": 17 }, { "epoch": 0.0028884342279456013, "grad_norm": 9.822836875915527, "learning_rate": 1.8e-05, "loss": 44.5, "step": 18 }, { "epoch": 0.0030489027961648012, "grad_norm": 9.545944213867188, "learning_rate": 1.9e-05, "loss": 44.4531, "step": 19 }, { "epoch": 0.003209371364384001, "grad_norm": 11.085175514221191, "learning_rate": 2e-05, "loss": 44.4219, "step": 20 }, { "epoch": 0.003369839932603201, "grad_norm": 9.366348266601562, "learning_rate": 2.1e-05, "loss": 44.3359, "step": 21 }, { "epoch": 0.0035303085008224016, "grad_norm": 9.157999038696289, "learning_rate": 2.2000000000000003e-05, "loss": 44.3125, "step": 22 }, { "epoch": 0.0036907770690416015, "grad_norm": 10.825629234313965, "learning_rate": 2.3000000000000003e-05, "loss": 44.3203, "step": 23 }, { "epoch": 0.0038512456372608015, "grad_norm": 9.991546630859375, "learning_rate": 2.4e-05, "loss": 44.3281, "step": 24 }, { "epoch": 0.0040117142054800015, "grad_norm": 9.910871505737305, "learning_rate": 2.5e-05, "loss": 44.3594, "step": 25 }, { "epoch": 0.004172182773699202, "grad_norm": 9.889579772949219, "learning_rate": 2.6000000000000002e-05, "loss": 44.3125, "step": 26 }, { "epoch": 0.0043326513419184014, "grad_norm": 10.513590812683105, "learning_rate": 2.7000000000000002e-05, "loss": 44.3516, "step": 27 }, { "epoch": 0.004493119910137602, "grad_norm": 8.475963592529297, "learning_rate": 2.8000000000000003e-05, "loss": 44.2578, "step": 28 }, { "epoch": 0.004653588478356802, "grad_norm": 9.803268432617188, "learning_rate": 2.9e-05, "loss": 44.4609, "step": 29 }, { "epoch": 0.004814057046576002, "grad_norm": 12.621723175048828, "learning_rate": 3e-05, "loss": 44.1719, "step": 30 }, { "epoch": 0.004974525614795202, "grad_norm": 11.39841079711914, "learning_rate": 3.1e-05, "loss": 44.2344, "step": 31 }, { "epoch": 0.005134994183014402, "grad_norm": 9.363529205322266, "learning_rate": 3.2000000000000005e-05, "loss": 44.1406, "step": 32 }, { "epoch": 0.005295462751233602, "grad_norm": 11.102368354797363, "learning_rate": 3.3e-05, "loss": 44.4531, "step": 33 }, { "epoch": 0.0054559313194528026, "grad_norm": 10.808040618896484, "learning_rate": 3.4000000000000007e-05, "loss": 44.1953, "step": 34 }, { "epoch": 0.005616399887672002, "grad_norm": 9.610026359558105, "learning_rate": 3.5e-05, "loss": 44.1094, "step": 35 }, { "epoch": 0.0057768684558912025, "grad_norm": 11.276328086853027, "learning_rate": 3.6e-05, "loss": 44.1641, "step": 36 }, { "epoch": 0.005937337024110402, "grad_norm": 9.280231475830078, "learning_rate": 3.7e-05, "loss": 44.1719, "step": 37 }, { "epoch": 0.0060978055923296024, "grad_norm": 10.414131164550781, "learning_rate": 3.8e-05, "loss": 44.25, "step": 38 }, { "epoch": 0.006258274160548803, "grad_norm": 10.355223655700684, "learning_rate": 3.9000000000000006e-05, "loss": 44.1875, "step": 39 }, { "epoch": 0.006418742728768002, "grad_norm": 10.263233184814453, "learning_rate": 4e-05, "loss": 44.2031, "step": 40 }, { "epoch": 0.006579211296987203, "grad_norm": 11.302879333496094, "learning_rate": 4.1e-05, "loss": 44.0781, "step": 41 }, { "epoch": 0.006739679865206402, "grad_norm": 9.988799095153809, "learning_rate": 4.2e-05, "loss": 44.1094, "step": 42 }, { "epoch": 0.006900148433425603, "grad_norm": 9.3355073928833, "learning_rate": 4.3e-05, "loss": 44.1016, "step": 43 }, { "epoch": 0.007060617001644803, "grad_norm": 10.810892105102539, "learning_rate": 4.4000000000000006e-05, "loss": 44.0781, "step": 44 }, { "epoch": 0.007221085569864003, "grad_norm": 11.13605785369873, "learning_rate": 4.5e-05, "loss": 43.9922, "step": 45 }, { "epoch": 0.007381554138083203, "grad_norm": 9.779239654541016, "learning_rate": 4.600000000000001e-05, "loss": 44.0, "step": 46 }, { "epoch": 0.007542022706302403, "grad_norm": 11.68825626373291, "learning_rate": 4.7e-05, "loss": 43.8125, "step": 47 }, { "epoch": 0.007702491274521603, "grad_norm": 9.578836441040039, "learning_rate": 4.8e-05, "loss": 43.9844, "step": 48 }, { "epoch": 0.007862959842740803, "grad_norm": 8.959626197814941, "learning_rate": 4.9e-05, "loss": 43.9375, "step": 49 }, { "epoch": 0.008023428410960003, "grad_norm": 9.231488227844238, "learning_rate": 5e-05, "loss": 44.0391, "step": 50 }, { "epoch": 0.008183896979179203, "grad_norm": 8.391412734985352, "learning_rate": 5.1000000000000006e-05, "loss": 44.1406, "step": 51 }, { "epoch": 0.008344365547398404, "grad_norm": 11.8761625289917, "learning_rate": 5.2000000000000004e-05, "loss": 43.9375, "step": 52 }, { "epoch": 0.008504834115617603, "grad_norm": 9.099921226501465, "learning_rate": 5.300000000000001e-05, "loss": 43.9688, "step": 53 }, { "epoch": 0.008665302683836803, "grad_norm": 12.023362159729004, "learning_rate": 5.4000000000000005e-05, "loss": 43.6953, "step": 54 }, { "epoch": 0.008825771252056004, "grad_norm": 9.342513084411621, "learning_rate": 5.500000000000001e-05, "loss": 43.8594, "step": 55 }, { "epoch": 0.008986239820275204, "grad_norm": 10.633225440979004, "learning_rate": 5.6000000000000006e-05, "loss": 43.6953, "step": 56 }, { "epoch": 0.009146708388494403, "grad_norm": 10.679011344909668, "learning_rate": 5.6999999999999996e-05, "loss": 43.7891, "step": 57 }, { "epoch": 0.009307176956713605, "grad_norm": 11.335249900817871, "learning_rate": 5.8e-05, "loss": 43.4531, "step": 58 }, { "epoch": 0.009467645524932804, "grad_norm": 9.723751068115234, "learning_rate": 5.9e-05, "loss": 43.8359, "step": 59 }, { "epoch": 0.009628114093152004, "grad_norm": 10.07986068725586, "learning_rate": 6e-05, "loss": 43.6719, "step": 60 }, { "epoch": 0.009788582661371203, "grad_norm": 10.535473823547363, "learning_rate": 6.1e-05, "loss": 43.7266, "step": 61 }, { "epoch": 0.009949051229590404, "grad_norm": 12.007545471191406, "learning_rate": 6.2e-05, "loss": 43.875, "step": 62 }, { "epoch": 0.010109519797809604, "grad_norm": 9.52319049835205, "learning_rate": 6.3e-05, "loss": 43.4922, "step": 63 }, { "epoch": 0.010269988366028803, "grad_norm": 11.315747261047363, "learning_rate": 6.400000000000001e-05, "loss": 43.375, "step": 64 }, { "epoch": 0.010430456934248005, "grad_norm": 9.576462745666504, "learning_rate": 6.500000000000001e-05, "loss": 43.5391, "step": 65 }, { "epoch": 0.010590925502467204, "grad_norm": 9.320272445678711, "learning_rate": 6.6e-05, "loss": 43.5469, "step": 66 }, { "epoch": 0.010751394070686404, "grad_norm": 11.282541275024414, "learning_rate": 6.7e-05, "loss": 43.5312, "step": 67 }, { "epoch": 0.010911862638905605, "grad_norm": 10.026611328125, "learning_rate": 6.800000000000001e-05, "loss": 43.3438, "step": 68 }, { "epoch": 0.011072331207124805, "grad_norm": 10.210331916809082, "learning_rate": 6.9e-05, "loss": 43.2734, "step": 69 }, { "epoch": 0.011232799775344004, "grad_norm": 11.038369178771973, "learning_rate": 7e-05, "loss": 43.3594, "step": 70 }, { "epoch": 0.011393268343563204, "grad_norm": 10.174579620361328, "learning_rate": 7.1e-05, "loss": 43.2969, "step": 71 }, { "epoch": 0.011553736911782405, "grad_norm": 10.813322067260742, "learning_rate": 7.2e-05, "loss": 43.1797, "step": 72 }, { "epoch": 0.011714205480001605, "grad_norm": 11.024142265319824, "learning_rate": 7.3e-05, "loss": 43.1719, "step": 73 }, { "epoch": 0.011874674048220804, "grad_norm": 10.714055061340332, "learning_rate": 7.4e-05, "loss": 43.0625, "step": 74 }, { "epoch": 0.012035142616440005, "grad_norm": 11.363909721374512, "learning_rate": 7.500000000000001e-05, "loss": 42.8984, "step": 75 }, { "epoch": 0.012195611184659205, "grad_norm": 10.05684757232666, "learning_rate": 7.6e-05, "loss": 43.1562, "step": 76 }, { "epoch": 0.012356079752878404, "grad_norm": 11.590270042419434, "learning_rate": 7.7e-05, "loss": 43.1875, "step": 77 }, { "epoch": 0.012516548321097606, "grad_norm": 10.419472694396973, "learning_rate": 7.800000000000001e-05, "loss": 43.0703, "step": 78 }, { "epoch": 0.012677016889316805, "grad_norm": 10.023338317871094, "learning_rate": 7.900000000000001e-05, "loss": 42.9922, "step": 79 }, { "epoch": 0.012837485457536005, "grad_norm": 10.85767650604248, "learning_rate": 8e-05, "loss": 42.8359, "step": 80 }, { "epoch": 0.012997954025755206, "grad_norm": 11.6627197265625, "learning_rate": 8.1e-05, "loss": 42.9219, "step": 81 }, { "epoch": 0.013158422593974406, "grad_norm": 11.558862686157227, "learning_rate": 8.2e-05, "loss": 42.8828, "step": 82 }, { "epoch": 0.013318891162193605, "grad_norm": 10.572707176208496, "learning_rate": 8.3e-05, "loss": 42.9141, "step": 83 }, { "epoch": 0.013479359730412805, "grad_norm": 11.022262573242188, "learning_rate": 8.4e-05, "loss": 42.7422, "step": 84 }, { "epoch": 0.013639828298632006, "grad_norm": 11.714750289916992, "learning_rate": 8.5e-05, "loss": 42.6328, "step": 85 }, { "epoch": 0.013800296866851205, "grad_norm": 10.94667911529541, "learning_rate": 8.6e-05, "loss": 42.6953, "step": 86 }, { "epoch": 0.013960765435070405, "grad_norm": 10.495635986328125, "learning_rate": 8.7e-05, "loss": 42.7891, "step": 87 }, { "epoch": 0.014121234003289606, "grad_norm": 10.921380043029785, "learning_rate": 8.800000000000001e-05, "loss": 42.7188, "step": 88 }, { "epoch": 0.014281702571508806, "grad_norm": 11.405807495117188, "learning_rate": 8.900000000000001e-05, "loss": 42.6094, "step": 89 }, { "epoch": 0.014442171139728005, "grad_norm": 11.794400215148926, "learning_rate": 9e-05, "loss": 42.625, "step": 90 }, { "epoch": 0.014602639707947207, "grad_norm": 10.982904434204102, "learning_rate": 9.1e-05, "loss": 42.6328, "step": 91 }, { "epoch": 0.014763108276166406, "grad_norm": 11.19552230834961, "learning_rate": 9.200000000000001e-05, "loss": 42.5, "step": 92 }, { "epoch": 0.014923576844385606, "grad_norm": 10.717450141906738, "learning_rate": 9.300000000000001e-05, "loss": 42.6328, "step": 93 }, { "epoch": 0.015084045412604805, "grad_norm": 11.066262245178223, "learning_rate": 9.4e-05, "loss": 42.5078, "step": 94 }, { "epoch": 0.015244513980824007, "grad_norm": 11.868555068969727, "learning_rate": 9.5e-05, "loss": 42.3984, "step": 95 }, { "epoch": 0.015404982549043206, "grad_norm": 10.57606029510498, "learning_rate": 9.6e-05, "loss": 42.5469, "step": 96 }, { "epoch": 0.015565451117262406, "grad_norm": 11.487339973449707, "learning_rate": 9.7e-05, "loss": 42.4766, "step": 97 }, { "epoch": 0.015725919685481607, "grad_norm": 11.72191333770752, "learning_rate": 9.8e-05, "loss": 42.5938, "step": 98 }, { "epoch": 0.015886388253700805, "grad_norm": 11.557637214660645, "learning_rate": 9.900000000000001e-05, "loss": 42.4531, "step": 99 }, { "epoch": 0.016046856821920006, "grad_norm": 10.748390197753906, "learning_rate": 0.0001, "loss": 42.6797, "step": 100 }, { "epoch": 0.016207325390139207, "grad_norm": 10.827314376831055, "learning_rate": 9.999999343587094e-05, "loss": 42.6875, "step": 101 }, { "epoch": 0.016367793958358405, "grad_norm": 11.961786270141602, "learning_rate": 9.999997374348542e-05, "loss": 42.5312, "step": 102 }, { "epoch": 0.016528262526577606, "grad_norm": 11.416112899780273, "learning_rate": 9.999994092284869e-05, "loss": 42.3203, "step": 103 }, { "epoch": 0.016688731094796808, "grad_norm": 12.198850631713867, "learning_rate": 9.999989497396927e-05, "loss": 42.5078, "step": 104 }, { "epoch": 0.016849199663016005, "grad_norm": 11.978021621704102, "learning_rate": 9.99998358968593e-05, "loss": 42.2031, "step": 105 }, { "epoch": 0.017009668231235207, "grad_norm": 11.614749908447266, "learning_rate": 9.999976369153428e-05, "loss": 42.2188, "step": 106 }, { "epoch": 0.017170136799454408, "grad_norm": 11.96450424194336, "learning_rate": 9.999967835801314e-05, "loss": 42.1328, "step": 107 }, { "epoch": 0.017330605367673606, "grad_norm": 11.762166976928711, "learning_rate": 9.999957989631829e-05, "loss": 42.3359, "step": 108 }, { "epoch": 0.017491073935892807, "grad_norm": 12.41796588897705, "learning_rate": 9.999946830647562e-05, "loss": 42.0391, "step": 109 }, { "epoch": 0.01765154250411201, "grad_norm": 12.420923233032227, "learning_rate": 9.99993435885144e-05, "loss": 42.1484, "step": 110 }, { "epoch": 0.017812011072331206, "grad_norm": 11.765009880065918, "learning_rate": 9.999920574246738e-05, "loss": 42.1797, "step": 111 }, { "epoch": 0.017972479640550407, "grad_norm": 11.856393814086914, "learning_rate": 9.999905476837072e-05, "loss": 41.9844, "step": 112 }, { "epoch": 0.01813294820876961, "grad_norm": 11.730620384216309, "learning_rate": 9.999889066626413e-05, "loss": 42.0234, "step": 113 }, { "epoch": 0.018293416776988806, "grad_norm": 11.955710411071777, "learning_rate": 9.999871343619066e-05, "loss": 42.0, "step": 114 }, { "epoch": 0.018453885345208008, "grad_norm": 11.675226211547852, "learning_rate": 9.999852307819683e-05, "loss": 42.0312, "step": 115 }, { "epoch": 0.01861435391342721, "grad_norm": 12.273092269897461, "learning_rate": 9.999831959233266e-05, "loss": 42.0078, "step": 116 }, { "epoch": 0.018774822481646407, "grad_norm": 12.01719856262207, "learning_rate": 9.999810297865155e-05, "loss": 41.75, "step": 117 }, { "epoch": 0.018935291049865608, "grad_norm": 12.260398864746094, "learning_rate": 9.999787323721038e-05, "loss": 41.7656, "step": 118 }, { "epoch": 0.01909575961808481, "grad_norm": 12.107258796691895, "learning_rate": 9.999763036806947e-05, "loss": 41.7812, "step": 119 }, { "epoch": 0.019256228186304007, "grad_norm": 12.962327003479004, "learning_rate": 9.999737437129259e-05, "loss": 41.7578, "step": 120 }, { "epoch": 0.01941669675452321, "grad_norm": 11.940987586975098, "learning_rate": 9.999710524694697e-05, "loss": 41.7656, "step": 121 }, { "epoch": 0.019577165322742406, "grad_norm": 12.447089195251465, "learning_rate": 9.999682299510324e-05, "loss": 41.7031, "step": 122 }, { "epoch": 0.019737633890961608, "grad_norm": 12.21422004699707, "learning_rate": 9.999652761583555e-05, "loss": 41.6641, "step": 123 }, { "epoch": 0.01989810245918081, "grad_norm": 12.085108757019043, "learning_rate": 9.999621910922143e-05, "loss": 41.6953, "step": 124 }, { "epoch": 0.020058571027400007, "grad_norm": 11.883008003234863, "learning_rate": 9.99958974753419e-05, "loss": 41.6484, "step": 125 }, { "epoch": 0.020219039595619208, "grad_norm": 12.33384895324707, "learning_rate": 9.999556271428139e-05, "loss": 41.5781, "step": 126 }, { "epoch": 0.02037950816383841, "grad_norm": 12.227206230163574, "learning_rate": 9.99952148261278e-05, "loss": 41.5703, "step": 127 }, { "epoch": 0.020539976732057607, "grad_norm": 12.04488468170166, "learning_rate": 9.999485381097249e-05, "loss": 41.5625, "step": 128 }, { "epoch": 0.020700445300276808, "grad_norm": 12.285660743713379, "learning_rate": 9.999447966891023e-05, "loss": 41.5312, "step": 129 }, { "epoch": 0.02086091386849601, "grad_norm": 12.135247230529785, "learning_rate": 9.999409240003927e-05, "loss": 41.5312, "step": 130 }, { "epoch": 0.021021382436715207, "grad_norm": 12.203625679016113, "learning_rate": 9.99936920044613e-05, "loss": 41.4922, "step": 131 }, { "epoch": 0.02118185100493441, "grad_norm": 11.848212242126465, "learning_rate": 9.999327848228144e-05, "loss": 41.5078, "step": 132 }, { "epoch": 0.02134231957315361, "grad_norm": 11.939364433288574, "learning_rate": 9.999285183360825e-05, "loss": 41.4375, "step": 133 }, { "epoch": 0.021502788141372808, "grad_norm": 11.90745735168457, "learning_rate": 9.999241205855378e-05, "loss": 41.375, "step": 134 }, { "epoch": 0.02166325670959201, "grad_norm": 12.255194664001465, "learning_rate": 9.99919591572335e-05, "loss": 41.3203, "step": 135 }, { "epoch": 0.02182372527781121, "grad_norm": 12.113066673278809, "learning_rate": 9.99914931297663e-05, "loss": 41.3984, "step": 136 }, { "epoch": 0.021984193846030408, "grad_norm": 12.025561332702637, "learning_rate": 9.999101397627455e-05, "loss": 41.2734, "step": 137 }, { "epoch": 0.02214466241424961, "grad_norm": 12.38244915008545, "learning_rate": 9.999052169688408e-05, "loss": 41.3203, "step": 138 }, { "epoch": 0.02230513098246881, "grad_norm": 12.467464447021484, "learning_rate": 9.999001629172414e-05, "loss": 41.2734, "step": 139 }, { "epoch": 0.02246559955068801, "grad_norm": 11.854839324951172, "learning_rate": 9.998949776092741e-05, "loss": 41.3594, "step": 140 }, { "epoch": 0.02262606811890721, "grad_norm": 12.4625883102417, "learning_rate": 9.998896610463004e-05, "loss": 41.125, "step": 141 }, { "epoch": 0.022786536687126407, "grad_norm": 12.083056449890137, "learning_rate": 9.998842132297166e-05, "loss": 41.2266, "step": 142 }, { "epoch": 0.02294700525534561, "grad_norm": 12.166191101074219, "learning_rate": 9.998786341609527e-05, "loss": 41.2344, "step": 143 }, { "epoch": 0.02310747382356481, "grad_norm": 12.008769035339355, "learning_rate": 9.998729238414739e-05, "loss": 41.4688, "step": 144 }, { "epoch": 0.023267942391784008, "grad_norm": 12.104840278625488, "learning_rate": 9.998670822727793e-05, "loss": 41.1172, "step": 145 }, { "epoch": 0.02342841096000321, "grad_norm": 12.455314636230469, "learning_rate": 9.998611094564026e-05, "loss": 41.125, "step": 146 }, { "epoch": 0.02358887952822241, "grad_norm": 12.325359344482422, "learning_rate": 9.998550053939124e-05, "loss": 41.3125, "step": 147 }, { "epoch": 0.023749348096441608, "grad_norm": 12.488564491271973, "learning_rate": 9.998487700869113e-05, "loss": 41.2578, "step": 148 }, { "epoch": 0.02390981666466081, "grad_norm": 12.002960205078125, "learning_rate": 9.998424035370361e-05, "loss": 41.2031, "step": 149 }, { "epoch": 0.02407028523288001, "grad_norm": 12.198607444763184, "learning_rate": 9.998359057459591e-05, "loss": 41.3047, "step": 150 }, { "epoch": 0.02423075380109921, "grad_norm": 13.47744083404541, "learning_rate": 9.998292767153858e-05, "loss": 41.3438, "step": 151 }, { "epoch": 0.02439122236931841, "grad_norm": 12.323281288146973, "learning_rate": 9.998225164470572e-05, "loss": 41.0859, "step": 152 }, { "epoch": 0.02455169093753761, "grad_norm": 12.209098815917969, "learning_rate": 9.99815624942748e-05, "loss": 41.1797, "step": 153 }, { "epoch": 0.02471215950575681, "grad_norm": 11.961400985717773, "learning_rate": 9.998086022042677e-05, "loss": 41.1094, "step": 154 }, { "epoch": 0.02487262807397601, "grad_norm": 11.984774589538574, "learning_rate": 9.998014482334601e-05, "loss": 41.125, "step": 155 }, { "epoch": 0.02503309664219521, "grad_norm": 12.07571029663086, "learning_rate": 9.997941630322041e-05, "loss": 41.0469, "step": 156 }, { "epoch": 0.02519356521041441, "grad_norm": 12.088325500488281, "learning_rate": 9.99786746602412e-05, "loss": 41.0625, "step": 157 }, { "epoch": 0.02535403377863361, "grad_norm": 12.23095989227295, "learning_rate": 9.997791989460314e-05, "loss": 40.9766, "step": 158 }, { "epoch": 0.025514502346852812, "grad_norm": 12.230219841003418, "learning_rate": 9.997715200650441e-05, "loss": 40.8281, "step": 159 }, { "epoch": 0.02567497091507201, "grad_norm": 11.825447082519531, "learning_rate": 9.997637099614661e-05, "loss": 40.9453, "step": 160 }, { "epoch": 0.02583543948329121, "grad_norm": 12.706337928771973, "learning_rate": 9.99755768637348e-05, "loss": 40.7812, "step": 161 }, { "epoch": 0.025995908051510412, "grad_norm": 12.137880325317383, "learning_rate": 9.997476960947751e-05, "loss": 40.7891, "step": 162 }, { "epoch": 0.02615637661972961, "grad_norm": 12.087493896484375, "learning_rate": 9.99739492335867e-05, "loss": 40.7031, "step": 163 }, { "epoch": 0.02631684518794881, "grad_norm": 12.216191291809082, "learning_rate": 9.997311573627774e-05, "loss": 40.7891, "step": 164 }, { "epoch": 0.02647731375616801, "grad_norm": 12.771894454956055, "learning_rate": 9.997226911776953e-05, "loss": 40.7969, "step": 165 }, { "epoch": 0.02663778232438721, "grad_norm": 12.323055267333984, "learning_rate": 9.997140937828432e-05, "loss": 40.6719, "step": 166 }, { "epoch": 0.02679825089260641, "grad_norm": 12.11103343963623, "learning_rate": 9.997053651804786e-05, "loss": 40.6641, "step": 167 }, { "epoch": 0.02695871946082561, "grad_norm": 12.816680908203125, "learning_rate": 9.996965053728934e-05, "loss": 40.5469, "step": 168 }, { "epoch": 0.02711918802904481, "grad_norm": 12.268040657043457, "learning_rate": 9.996875143624139e-05, "loss": 40.5391, "step": 169 }, { "epoch": 0.027279656597264012, "grad_norm": 11.930557250976562, "learning_rate": 9.996783921514005e-05, "loss": 40.5391, "step": 170 }, { "epoch": 0.02744012516548321, "grad_norm": 12.445047378540039, "learning_rate": 9.996691387422488e-05, "loss": 40.5469, "step": 171 }, { "epoch": 0.02760059373370241, "grad_norm": 12.424190521240234, "learning_rate": 9.99659754137388e-05, "loss": 40.4375, "step": 172 }, { "epoch": 0.027761062301921612, "grad_norm": 12.275189399719238, "learning_rate": 9.996502383392828e-05, "loss": 40.6172, "step": 173 }, { "epoch": 0.02792153087014081, "grad_norm": 12.424285888671875, "learning_rate": 9.99640591350431e-05, "loss": 40.5312, "step": 174 }, { "epoch": 0.02808199943836001, "grad_norm": 12.179903984069824, "learning_rate": 9.996308131733659e-05, "loss": 40.5, "step": 175 }, { "epoch": 0.028242468006579213, "grad_norm": 12.617328643798828, "learning_rate": 9.99620903810655e-05, "loss": 40.4922, "step": 176 }, { "epoch": 0.02840293657479841, "grad_norm": 12.943615913391113, "learning_rate": 9.996108632649001e-05, "loss": 40.3984, "step": 177 }, { "epoch": 0.02856340514301761, "grad_norm": 12.386348724365234, "learning_rate": 9.996006915387373e-05, "loss": 40.3906, "step": 178 }, { "epoch": 0.028723873711236813, "grad_norm": 14.624496459960938, "learning_rate": 9.995903886348377e-05, "loss": 40.4531, "step": 179 }, { "epoch": 0.02888434227945601, "grad_norm": 11.932466506958008, "learning_rate": 9.995799545559062e-05, "loss": 40.3672, "step": 180 }, { "epoch": 0.029044810847675212, "grad_norm": 12.341383934020996, "learning_rate": 9.995693893046824e-05, "loss": 40.2812, "step": 181 }, { "epoch": 0.029205279415894413, "grad_norm": 12.181395530700684, "learning_rate": 9.995586928839405e-05, "loss": 40.2344, "step": 182 }, { "epoch": 0.02936574798411361, "grad_norm": 12.180512428283691, "learning_rate": 9.995478652964889e-05, "loss": 40.2812, "step": 183 }, { "epoch": 0.029526216552332812, "grad_norm": 12.276628494262695, "learning_rate": 9.995369065451707e-05, "loss": 40.2578, "step": 184 }, { "epoch": 0.02968668512055201, "grad_norm": 12.220592498779297, "learning_rate": 9.995258166328632e-05, "loss": 40.2344, "step": 185 }, { "epoch": 0.02984715368877121, "grad_norm": 12.081377983093262, "learning_rate": 9.995145955624783e-05, "loss": 40.1875, "step": 186 }, { "epoch": 0.030007622256990413, "grad_norm": 12.067660331726074, "learning_rate": 9.995032433369622e-05, "loss": 40.3125, "step": 187 }, { "epoch": 0.03016809082520961, "grad_norm": 12.062532424926758, "learning_rate": 9.994917599592954e-05, "loss": 40.1094, "step": 188 }, { "epoch": 0.030328559393428812, "grad_norm": 11.940292358398438, "learning_rate": 9.994801454324935e-05, "loss": 40.1875, "step": 189 }, { "epoch": 0.030489027961648013, "grad_norm": 12.156018257141113, "learning_rate": 9.994683997596055e-05, "loss": 40.1562, "step": 190 }, { "epoch": 0.03064949652986721, "grad_norm": 12.178804397583008, "learning_rate": 9.99456522943716e-05, "loss": 40.0938, "step": 191 }, { "epoch": 0.030809965098086412, "grad_norm": 12.56262493133545, "learning_rate": 9.99444514987943e-05, "loss": 40.0703, "step": 192 }, { "epoch": 0.030970433666305613, "grad_norm": 13.586669921875, "learning_rate": 9.994323758954395e-05, "loss": 39.9609, "step": 193 }, { "epoch": 0.03113090223452481, "grad_norm": 12.66300106048584, "learning_rate": 9.994201056693928e-05, "loss": 39.9609, "step": 194 }, { "epoch": 0.03129137080274401, "grad_norm": 11.896414756774902, "learning_rate": 9.994077043130246e-05, "loss": 40.1641, "step": 195 }, { "epoch": 0.031451839370963214, "grad_norm": 12.261089324951172, "learning_rate": 9.993951718295913e-05, "loss": 39.9609, "step": 196 }, { "epoch": 0.03161230793918241, "grad_norm": 12.255698204040527, "learning_rate": 9.993825082223831e-05, "loss": 39.9453, "step": 197 }, { "epoch": 0.03177277650740161, "grad_norm": 12.250739097595215, "learning_rate": 9.993697134947254e-05, "loss": 39.9453, "step": 198 }, { "epoch": 0.031933245075620814, "grad_norm": 12.271764755249023, "learning_rate": 9.993567876499774e-05, "loss": 40.0703, "step": 199 }, { "epoch": 0.03209371364384001, "grad_norm": 13.30996322631836, "learning_rate": 9.99343730691533e-05, "loss": 40.2734, "step": 200 }, { "epoch": 0.03225418221205921, "grad_norm": 11.971368789672852, "learning_rate": 9.993305426228205e-05, "loss": 40.0781, "step": 201 }, { "epoch": 0.032414650780278415, "grad_norm": 12.369047164916992, "learning_rate": 9.99317223447303e-05, "loss": 40.1797, "step": 202 }, { "epoch": 0.03257511934849761, "grad_norm": 12.820901870727539, "learning_rate": 9.99303773168477e-05, "loss": 39.9844, "step": 203 }, { "epoch": 0.03273558791671681, "grad_norm": 12.016521453857422, "learning_rate": 9.992901917898744e-05, "loss": 40.0859, "step": 204 }, { "epoch": 0.032896056484936015, "grad_norm": 12.361124038696289, "learning_rate": 9.992764793150616e-05, "loss": 39.9141, "step": 205 }, { "epoch": 0.03305652505315521, "grad_norm": 11.987215995788574, "learning_rate": 9.992626357476383e-05, "loss": 39.8125, "step": 206 }, { "epoch": 0.03321699362137441, "grad_norm": 12.450790405273438, "learning_rate": 9.992486610912397e-05, "loss": 39.7969, "step": 207 }, { "epoch": 0.033377462189593615, "grad_norm": 12.21453857421875, "learning_rate": 9.992345553495351e-05, "loss": 39.8125, "step": 208 }, { "epoch": 0.03353793075781281, "grad_norm": 12.111603736877441, "learning_rate": 9.992203185262279e-05, "loss": 39.75, "step": 209 }, { "epoch": 0.03369839932603201, "grad_norm": 12.336906433105469, "learning_rate": 9.992059506250566e-05, "loss": 39.6484, "step": 210 }, { "epoch": 0.033858867894251216, "grad_norm": 12.3292236328125, "learning_rate": 9.991914516497934e-05, "loss": 39.75, "step": 211 }, { "epoch": 0.03401933646247041, "grad_norm": 12.328758239746094, "learning_rate": 9.991768216042454e-05, "loss": 39.6094, "step": 212 }, { "epoch": 0.03417980503068961, "grad_norm": 12.409753799438477, "learning_rate": 9.991620604922538e-05, "loss": 39.5781, "step": 213 }, { "epoch": 0.034340273598908816, "grad_norm": 12.301977157592773, "learning_rate": 9.991471683176945e-05, "loss": 39.625, "step": 214 }, { "epoch": 0.034500742167128014, "grad_norm": 12.003325462341309, "learning_rate": 9.991321450844775e-05, "loss": 39.6562, "step": 215 }, { "epoch": 0.03466121073534721, "grad_norm": 12.550724983215332, "learning_rate": 9.991169907965475e-05, "loss": 39.4219, "step": 216 }, { "epoch": 0.034821679303566416, "grad_norm": 12.16519832611084, "learning_rate": 9.991017054578834e-05, "loss": 39.4844, "step": 217 }, { "epoch": 0.034982147871785614, "grad_norm": 12.036355018615723, "learning_rate": 9.990862890724988e-05, "loss": 39.6094, "step": 218 }, { "epoch": 0.03514261644000481, "grad_norm": 12.186607360839844, "learning_rate": 9.990707416444412e-05, "loss": 39.4297, "step": 219 }, { "epoch": 0.03530308500822402, "grad_norm": 12.143532752990723, "learning_rate": 9.99055063177793e-05, "loss": 39.4062, "step": 220 }, { "epoch": 0.035463553576443214, "grad_norm": 12.357522010803223, "learning_rate": 9.990392536766709e-05, "loss": 39.3516, "step": 221 }, { "epoch": 0.03562402214466241, "grad_norm": 12.422952651977539, "learning_rate": 9.990233131452258e-05, "loss": 39.3594, "step": 222 }, { "epoch": 0.03578449071288162, "grad_norm": 12.0408296585083, "learning_rate": 9.99007241587643e-05, "loss": 39.4219, "step": 223 }, { "epoch": 0.035944959281100815, "grad_norm": 12.240140914916992, "learning_rate": 9.989910390081426e-05, "loss": 39.4141, "step": 224 }, { "epoch": 0.03610542784932001, "grad_norm": 12.124786376953125, "learning_rate": 9.989747054109788e-05, "loss": 39.3125, "step": 225 }, { "epoch": 0.03626589641753922, "grad_norm": 12.029878616333008, "learning_rate": 9.989582408004398e-05, "loss": 39.3672, "step": 226 }, { "epoch": 0.036426364985758415, "grad_norm": 12.077906608581543, "learning_rate": 9.989416451808494e-05, "loss": 39.2812, "step": 227 }, { "epoch": 0.03658683355397761, "grad_norm": 12.269673347473145, "learning_rate": 9.989249185565642e-05, "loss": 39.2031, "step": 228 }, { "epoch": 0.03674730212219682, "grad_norm": 12.673739433288574, "learning_rate": 9.989080609319767e-05, "loss": 39.1562, "step": 229 }, { "epoch": 0.036907770690416015, "grad_norm": 12.285554885864258, "learning_rate": 9.988910723115126e-05, "loss": 39.1562, "step": 230 }, { "epoch": 0.03706823925863521, "grad_norm": 12.095409393310547, "learning_rate": 9.98873952699633e-05, "loss": 39.1406, "step": 231 }, { "epoch": 0.03722870782685442, "grad_norm": 12.385478973388672, "learning_rate": 9.988567021008326e-05, "loss": 39.1562, "step": 232 }, { "epoch": 0.037389176395073616, "grad_norm": 12.277629852294922, "learning_rate": 9.98839320519641e-05, "loss": 39.125, "step": 233 }, { "epoch": 0.037549644963292814, "grad_norm": 12.178648948669434, "learning_rate": 9.988218079606218e-05, "loss": 39.1484, "step": 234 }, { "epoch": 0.03771011353151202, "grad_norm": 12.326722145080566, "learning_rate": 9.988041644283733e-05, "loss": 39.0391, "step": 235 }, { "epoch": 0.037870582099731216, "grad_norm": 12.300928115844727, "learning_rate": 9.98786389927528e-05, "loss": 38.9922, "step": 236 }, { "epoch": 0.038031050667950414, "grad_norm": 12.512060165405273, "learning_rate": 9.987684844627532e-05, "loss": 38.9062, "step": 237 }, { "epoch": 0.03819151923616962, "grad_norm": 12.108129501342773, "learning_rate": 9.987504480387497e-05, "loss": 39.0547, "step": 238 }, { "epoch": 0.03835198780438882, "grad_norm": 12.195442199707031, "learning_rate": 9.987322806602535e-05, "loss": 38.9922, "step": 239 }, { "epoch": 0.038512456372608014, "grad_norm": 12.245491981506348, "learning_rate": 9.987139823320349e-05, "loss": 38.9531, "step": 240 }, { "epoch": 0.03867292494082721, "grad_norm": 11.947349548339844, "learning_rate": 9.986955530588981e-05, "loss": 39.0469, "step": 241 }, { "epoch": 0.03883339350904642, "grad_norm": 12.064835548400879, "learning_rate": 9.986769928456822e-05, "loss": 38.9141, "step": 242 }, { "epoch": 0.038993862077265615, "grad_norm": 12.284024238586426, "learning_rate": 9.986583016972603e-05, "loss": 38.9766, "step": 243 }, { "epoch": 0.03915433064548481, "grad_norm": 12.050925254821777, "learning_rate": 9.986394796185404e-05, "loss": 39.0156, "step": 244 }, { "epoch": 0.03931479921370402, "grad_norm": 12.264935493469238, "learning_rate": 9.98620526614464e-05, "loss": 38.8984, "step": 245 }, { "epoch": 0.039475267781923215, "grad_norm": 12.461281776428223, "learning_rate": 9.986014426900077e-05, "loss": 38.8984, "step": 246 }, { "epoch": 0.03963573635014241, "grad_norm": 12.705689430236816, "learning_rate": 9.985822278501824e-05, "loss": 38.9922, "step": 247 }, { "epoch": 0.03979620491836162, "grad_norm": 12.2362699508667, "learning_rate": 9.985628821000332e-05, "loss": 38.8516, "step": 248 }, { "epoch": 0.039956673486580815, "grad_norm": 12.692111015319824, "learning_rate": 9.985434054446396e-05, "loss": 39.1406, "step": 249 }, { "epoch": 0.04011714205480001, "grad_norm": 12.109597206115723, "learning_rate": 9.985237978891155e-05, "loss": 39.0625, "step": 250 }, { "epoch": 0.04027761062301922, "grad_norm": 12.269950866699219, "learning_rate": 9.985040594386091e-05, "loss": 39.1016, "step": 251 }, { "epoch": 0.040438079191238416, "grad_norm": 12.311545372009277, "learning_rate": 9.98484190098303e-05, "loss": 39.0703, "step": 252 }, { "epoch": 0.040598547759457614, "grad_norm": 12.42984676361084, "learning_rate": 9.984641898734145e-05, "loss": 38.7734, "step": 253 }, { "epoch": 0.04075901632767682, "grad_norm": 12.3108549118042, "learning_rate": 9.984440587691945e-05, "loss": 39.0234, "step": 254 }, { "epoch": 0.040919484895896016, "grad_norm": 12.24936580657959, "learning_rate": 9.984237967909292e-05, "loss": 38.8594, "step": 255 }, { "epoch": 0.041079953464115214, "grad_norm": 12.44944953918457, "learning_rate": 9.984034039439383e-05, "loss": 38.8984, "step": 256 }, { "epoch": 0.04124042203233442, "grad_norm": 12.14439868927002, "learning_rate": 9.983828802335763e-05, "loss": 38.8281, "step": 257 }, { "epoch": 0.041400890600553616, "grad_norm": 12.275596618652344, "learning_rate": 9.983622256652323e-05, "loss": 38.6562, "step": 258 }, { "epoch": 0.041561359168772814, "grad_norm": 12.045271873474121, "learning_rate": 9.983414402443292e-05, "loss": 38.7422, "step": 259 }, { "epoch": 0.04172182773699202, "grad_norm": 12.013596534729004, "learning_rate": 9.983205239763247e-05, "loss": 38.6797, "step": 260 }, { "epoch": 0.04188229630521122, "grad_norm": 12.335979461669922, "learning_rate": 9.982994768667105e-05, "loss": 38.6562, "step": 261 }, { "epoch": 0.042042764873430415, "grad_norm": 12.278074264526367, "learning_rate": 9.982782989210129e-05, "loss": 38.5859, "step": 262 }, { "epoch": 0.04220323344164962, "grad_norm": 12.718676567077637, "learning_rate": 9.982569901447926e-05, "loss": 38.6094, "step": 263 }, { "epoch": 0.04236370200986882, "grad_norm": 12.164244651794434, "learning_rate": 9.982355505436445e-05, "loss": 38.5703, "step": 264 }, { "epoch": 0.042524170578088015, "grad_norm": 12.08256721496582, "learning_rate": 9.982139801231978e-05, "loss": 38.4766, "step": 265 }, { "epoch": 0.04268463914630722, "grad_norm": 12.150715827941895, "learning_rate": 9.981922788891161e-05, "loss": 38.5234, "step": 266 }, { "epoch": 0.04284510771452642, "grad_norm": 12.216110229492188, "learning_rate": 9.981704468470976e-05, "loss": 38.5547, "step": 267 }, { "epoch": 0.043005576282745615, "grad_norm": 11.940841674804688, "learning_rate": 9.981484840028746e-05, "loss": 38.5703, "step": 268 }, { "epoch": 0.04316604485096482, "grad_norm": 12.22832202911377, "learning_rate": 9.981263903622136e-05, "loss": 38.6328, "step": 269 }, { "epoch": 0.04332651341918402, "grad_norm": 11.964570999145508, "learning_rate": 9.981041659309156e-05, "loss": 38.4375, "step": 270 }, { "epoch": 0.043486981987403216, "grad_norm": 11.83170223236084, "learning_rate": 9.980818107148162e-05, "loss": 38.4219, "step": 271 }, { "epoch": 0.04364745055562242, "grad_norm": 12.109923362731934, "learning_rate": 9.980593247197851e-05, "loss": 38.375, "step": 272 }, { "epoch": 0.04380791912384162, "grad_norm": 12.267512321472168, "learning_rate": 9.980367079517262e-05, "loss": 38.2656, "step": 273 }, { "epoch": 0.043968387692060816, "grad_norm": 12.157785415649414, "learning_rate": 9.980139604165776e-05, "loss": 38.3594, "step": 274 }, { "epoch": 0.04412885626028002, "grad_norm": 12.201786994934082, "learning_rate": 9.979910821203124e-05, "loss": 38.2656, "step": 275 }, { "epoch": 0.04428932482849922, "grad_norm": 12.505314826965332, "learning_rate": 9.979680730689378e-05, "loss": 38.1641, "step": 276 }, { "epoch": 0.044449793396718416, "grad_norm": 12.196447372436523, "learning_rate": 9.979449332684946e-05, "loss": 38.3281, "step": 277 }, { "epoch": 0.04461026196493762, "grad_norm": 12.141575813293457, "learning_rate": 9.979216627250592e-05, "loss": 38.2422, "step": 278 }, { "epoch": 0.04477073053315682, "grad_norm": 12.141407012939453, "learning_rate": 9.97898261444741e-05, "loss": 38.25, "step": 279 }, { "epoch": 0.04493119910137602, "grad_norm": 12.23816967010498, "learning_rate": 9.978747294336847e-05, "loss": 38.1953, "step": 280 }, { "epoch": 0.04509166766959522, "grad_norm": 12.190579414367676, "learning_rate": 9.978510666980689e-05, "loss": 38.1875, "step": 281 }, { "epoch": 0.04525213623781442, "grad_norm": 12.048676490783691, "learning_rate": 9.978272732441065e-05, "loss": 38.1484, "step": 282 }, { "epoch": 0.04541260480603362, "grad_norm": 12.122055053710938, "learning_rate": 9.978033490780452e-05, "loss": 38.1016, "step": 283 }, { "epoch": 0.045573073374252815, "grad_norm": 12.098814010620117, "learning_rate": 9.977792942061662e-05, "loss": 38.2109, "step": 284 }, { "epoch": 0.04573354194247202, "grad_norm": 12.261090278625488, "learning_rate": 9.977551086347858e-05, "loss": 38.0078, "step": 285 }, { "epoch": 0.04589401051069122, "grad_norm": 12.467159271240234, "learning_rate": 9.977307923702541e-05, "loss": 38.0547, "step": 286 }, { "epoch": 0.046054479078910415, "grad_norm": 12.137351989746094, "learning_rate": 9.977063454189558e-05, "loss": 38.1719, "step": 287 }, { "epoch": 0.04621494764712962, "grad_norm": 12.070253372192383, "learning_rate": 9.976817677873099e-05, "loss": 38.1016, "step": 288 }, { "epoch": 0.04637541621534882, "grad_norm": 12.278700828552246, "learning_rate": 9.976570594817693e-05, "loss": 38.0234, "step": 289 }, { "epoch": 0.046535884783568016, "grad_norm": 12.410167694091797, "learning_rate": 9.976322205088217e-05, "loss": 37.9844, "step": 290 }, { "epoch": 0.04669635335178722, "grad_norm": 12.41714859008789, "learning_rate": 9.976072508749893e-05, "loss": 37.9375, "step": 291 }, { "epoch": 0.04685682192000642, "grad_norm": 12.250529289245605, "learning_rate": 9.975821505868278e-05, "loss": 37.9922, "step": 292 }, { "epoch": 0.047017290488225616, "grad_norm": 11.931462287902832, "learning_rate": 9.975569196509278e-05, "loss": 38.1016, "step": 293 }, { "epoch": 0.04717775905644482, "grad_norm": 12.381219863891602, "learning_rate": 9.975315580739142e-05, "loss": 37.8594, "step": 294 }, { "epoch": 0.04733822762466402, "grad_norm": 12.172926902770996, "learning_rate": 9.975060658624457e-05, "loss": 37.8203, "step": 295 }, { "epoch": 0.047498696192883216, "grad_norm": 12.125041961669922, "learning_rate": 9.974804430232163e-05, "loss": 37.8984, "step": 296 }, { "epoch": 0.04765916476110242, "grad_norm": 12.101672172546387, "learning_rate": 9.97454689562953e-05, "loss": 37.7812, "step": 297 }, { "epoch": 0.04781963332932162, "grad_norm": 12.259007453918457, "learning_rate": 9.974288054884182e-05, "loss": 37.8047, "step": 298 }, { "epoch": 0.04798010189754082, "grad_norm": 12.032666206359863, "learning_rate": 9.974027908064079e-05, "loss": 37.9531, "step": 299 }, { "epoch": 0.04814057046576002, "grad_norm": 12.182025909423828, "learning_rate": 9.973766455237527e-05, "loss": 38.0781, "step": 300 }, { "epoch": 0.04830103903397922, "grad_norm": 12.617944717407227, "learning_rate": 9.973503696473176e-05, "loss": 37.9531, "step": 301 }, { "epoch": 0.04846150760219842, "grad_norm": 12.09209156036377, "learning_rate": 9.973239631840016e-05, "loss": 37.9531, "step": 302 }, { "epoch": 0.04862197617041762, "grad_norm": 12.376977920532227, "learning_rate": 9.972974261407382e-05, "loss": 37.8828, "step": 303 }, { "epoch": 0.04878244473863682, "grad_norm": 12.08849048614502, "learning_rate": 9.972707585244951e-05, "loss": 37.8516, "step": 304 }, { "epoch": 0.04894291330685602, "grad_norm": 12.217604637145996, "learning_rate": 9.972439603422741e-05, "loss": 38.0156, "step": 305 }, { "epoch": 0.04910338187507522, "grad_norm": 12.269989967346191, "learning_rate": 9.972170316011117e-05, "loss": 38.0234, "step": 306 }, { "epoch": 0.04926385044329442, "grad_norm": 12.354762077331543, "learning_rate": 9.971899723080782e-05, "loss": 37.6875, "step": 307 }, { "epoch": 0.04942431901151362, "grad_norm": 12.085607528686523, "learning_rate": 9.971627824702787e-05, "loss": 37.7656, "step": 308 }, { "epoch": 0.04958478757973282, "grad_norm": 12.133856773376465, "learning_rate": 9.97135462094852e-05, "loss": 37.5938, "step": 309 }, { "epoch": 0.04974525614795202, "grad_norm": 12.396000862121582, "learning_rate": 9.971080111889719e-05, "loss": 37.6406, "step": 310 }, { "epoch": 0.04990572471617122, "grad_norm": 12.418793678283691, "learning_rate": 9.970804297598458e-05, "loss": 37.5781, "step": 311 }, { "epoch": 0.05006619328439042, "grad_norm": 12.342123985290527, "learning_rate": 9.970527178147155e-05, "loss": 37.6484, "step": 312 }, { "epoch": 0.05022666185260962, "grad_norm": 12.228877067565918, "learning_rate": 9.970248753608574e-05, "loss": 37.4844, "step": 313 }, { "epoch": 0.05038713042082882, "grad_norm": 12.125776290893555, "learning_rate": 9.969969024055819e-05, "loss": 37.6016, "step": 314 }, { "epoch": 0.05054759898904802, "grad_norm": 12.096100807189941, "learning_rate": 9.969687989562335e-05, "loss": 37.5, "step": 315 }, { "epoch": 0.05070806755726722, "grad_norm": 12.056300163269043, "learning_rate": 9.969405650201917e-05, "loss": 37.5156, "step": 316 }, { "epoch": 0.05086853612548642, "grad_norm": 12.201271057128906, "learning_rate": 9.969122006048693e-05, "loss": 37.4688, "step": 317 }, { "epoch": 0.051029004693705624, "grad_norm": 11.991839408874512, "learning_rate": 9.968837057177139e-05, "loss": 37.4922, "step": 318 }, { "epoch": 0.05118947326192482, "grad_norm": 11.905840873718262, "learning_rate": 9.968550803662074e-05, "loss": 37.6406, "step": 319 }, { "epoch": 0.05134994183014402, "grad_norm": 12.317198753356934, "learning_rate": 9.968263245578658e-05, "loss": 37.4766, "step": 320 }, { "epoch": 0.051510410398363224, "grad_norm": 12.171464920043945, "learning_rate": 9.96797438300239e-05, "loss": 37.375, "step": 321 }, { "epoch": 0.05167087896658242, "grad_norm": 12.124490737915039, "learning_rate": 9.967684216009121e-05, "loss": 37.4219, "step": 322 }, { "epoch": 0.05183134753480162, "grad_norm": 12.037687301635742, "learning_rate": 9.967392744675035e-05, "loss": 37.3828, "step": 323 }, { "epoch": 0.051991816103020824, "grad_norm": 12.203550338745117, "learning_rate": 9.967099969076664e-05, "loss": 37.4219, "step": 324 }, { "epoch": 0.05215228467124002, "grad_norm": 12.12485408782959, "learning_rate": 9.96680588929088e-05, "loss": 37.3906, "step": 325 }, { "epoch": 0.05231275323945922, "grad_norm": 12.114737510681152, "learning_rate": 9.966510505394897e-05, "loss": 37.3516, "step": 326 }, { "epoch": 0.05247322180767842, "grad_norm": 11.952505111694336, "learning_rate": 9.966213817466275e-05, "loss": 37.4688, "step": 327 }, { "epoch": 0.05263369037589762, "grad_norm": 12.040366172790527, "learning_rate": 9.965915825582913e-05, "loss": 37.2969, "step": 328 }, { "epoch": 0.05279415894411682, "grad_norm": 12.19294548034668, "learning_rate": 9.96561652982305e-05, "loss": 37.2344, "step": 329 }, { "epoch": 0.05295462751233602, "grad_norm": 12.623602867126465, "learning_rate": 9.965315930265275e-05, "loss": 37.3281, "step": 330 }, { "epoch": 0.05311509608055522, "grad_norm": 12.188882827758789, "learning_rate": 9.965014026988514e-05, "loss": 37.2969, "step": 331 }, { "epoch": 0.05327556464877442, "grad_norm": 12.38530158996582, "learning_rate": 9.964710820072035e-05, "loss": 37.1875, "step": 332 }, { "epoch": 0.05343603321699362, "grad_norm": 12.03764533996582, "learning_rate": 9.964406309595451e-05, "loss": 37.2422, "step": 333 }, { "epoch": 0.05359650178521282, "grad_norm": 12.107439994812012, "learning_rate": 9.964100495638715e-05, "loss": 37.0938, "step": 334 }, { "epoch": 0.05375697035343202, "grad_norm": 12.24302864074707, "learning_rate": 9.963793378282123e-05, "loss": 37.1641, "step": 335 }, { "epoch": 0.05391743892165122, "grad_norm": 12.162142753601074, "learning_rate": 9.963484957606312e-05, "loss": 37.0703, "step": 336 }, { "epoch": 0.05407790748987042, "grad_norm": 12.547365188598633, "learning_rate": 9.963175233692265e-05, "loss": 37.0312, "step": 337 }, { "epoch": 0.05423837605808962, "grad_norm": 12.028240203857422, "learning_rate": 9.962864206621305e-05, "loss": 37.1172, "step": 338 }, { "epoch": 0.05439884462630882, "grad_norm": 12.21715259552002, "learning_rate": 9.962551876475095e-05, "loss": 36.9922, "step": 339 }, { "epoch": 0.054559313194528024, "grad_norm": 12.090536117553711, "learning_rate": 9.962238243335642e-05, "loss": 37.0625, "step": 340 }, { "epoch": 0.05471978176274722, "grad_norm": 12.282057762145996, "learning_rate": 9.961923307285295e-05, "loss": 36.9844, "step": 341 }, { "epoch": 0.05488025033096642, "grad_norm": 12.227474212646484, "learning_rate": 9.961607068406749e-05, "loss": 37.0859, "step": 342 }, { "epoch": 0.055040718899185624, "grad_norm": 11.90120792388916, "learning_rate": 9.961289526783031e-05, "loss": 37.1328, "step": 343 }, { "epoch": 0.05520118746740482, "grad_norm": 12.309969902038574, "learning_rate": 9.960970682497522e-05, "loss": 37.0469, "step": 344 }, { "epoch": 0.05536165603562402, "grad_norm": 12.618575096130371, "learning_rate": 9.960650535633936e-05, "loss": 36.9609, "step": 345 }, { "epoch": 0.055522124603843225, "grad_norm": 12.398002624511719, "learning_rate": 9.960329086276335e-05, "loss": 36.9375, "step": 346 }, { "epoch": 0.05568259317206242, "grad_norm": 12.18591594696045, "learning_rate": 9.960006334509116e-05, "loss": 37.0156, "step": 347 }, { "epoch": 0.05584306174028162, "grad_norm": 12.619271278381348, "learning_rate": 9.959682280417027e-05, "loss": 36.9609, "step": 348 }, { "epoch": 0.056003530308500825, "grad_norm": 12.6240234375, "learning_rate": 9.959356924085151e-05, "loss": 37.1406, "step": 349 }, { "epoch": 0.05616399887672002, "grad_norm": 12.366124153137207, "learning_rate": 9.959030265598917e-05, "loss": 37.1562, "step": 350 }, { "epoch": 0.05632446744493922, "grad_norm": 12.158038139343262, "learning_rate": 9.958702305044092e-05, "loss": 37.3672, "step": 351 }, { "epoch": 0.056484936013158425, "grad_norm": 12.324678421020508, "learning_rate": 9.958373042506788e-05, "loss": 37.0781, "step": 352 }, { "epoch": 0.05664540458137762, "grad_norm": 12.360443115234375, "learning_rate": 9.95804247807346e-05, "loss": 37.0234, "step": 353 }, { "epoch": 0.05680587314959682, "grad_norm": 12.2138090133667, "learning_rate": 9.957710611830898e-05, "loss": 36.7969, "step": 354 }, { "epoch": 0.056966341717816026, "grad_norm": 12.162678718566895, "learning_rate": 9.957377443866241e-05, "loss": 36.9297, "step": 355 }, { "epoch": 0.05712681028603522, "grad_norm": 12.021390914916992, "learning_rate": 9.957042974266968e-05, "loss": 36.9453, "step": 356 }, { "epoch": 0.05728727885425442, "grad_norm": 12.327353477478027, "learning_rate": 9.956707203120899e-05, "loss": 36.8516, "step": 357 }, { "epoch": 0.057447747422473626, "grad_norm": 12.155403137207031, "learning_rate": 9.956370130516196e-05, "loss": 36.9531, "step": 358 }, { "epoch": 0.057608215990692824, "grad_norm": 11.969284057617188, "learning_rate": 9.95603175654136e-05, "loss": 36.8203, "step": 359 }, { "epoch": 0.05776868455891202, "grad_norm": 12.27929401397705, "learning_rate": 9.955692081285238e-05, "loss": 36.7812, "step": 360 }, { "epoch": 0.057929153127131226, "grad_norm": 12.3348388671875, "learning_rate": 9.955351104837018e-05, "loss": 36.7656, "step": 361 }, { "epoch": 0.058089621695350424, "grad_norm": 12.38863468170166, "learning_rate": 9.955008827286228e-05, "loss": 36.6094, "step": 362 }, { "epoch": 0.05825009026356962, "grad_norm": 12.541666984558105, "learning_rate": 9.954665248722736e-05, "loss": 36.6875, "step": 363 }, { "epoch": 0.05841055883178883, "grad_norm": 12.12757682800293, "learning_rate": 9.954320369236756e-05, "loss": 36.7578, "step": 364 }, { "epoch": 0.058571027400008024, "grad_norm": 12.206369400024414, "learning_rate": 9.95397418891884e-05, "loss": 36.6328, "step": 365 }, { "epoch": 0.05873149596822722, "grad_norm": 12.412717819213867, "learning_rate": 9.953626707859884e-05, "loss": 36.6719, "step": 366 }, { "epoch": 0.05889196453644643, "grad_norm": 12.231123924255371, "learning_rate": 9.953277926151123e-05, "loss": 36.6328, "step": 367 }, { "epoch": 0.059052433104665625, "grad_norm": 12.366118431091309, "learning_rate": 9.952927843884138e-05, "loss": 36.5859, "step": 368 }, { "epoch": 0.05921290167288482, "grad_norm": 12.029590606689453, "learning_rate": 9.952576461150846e-05, "loss": 36.5859, "step": 369 }, { "epoch": 0.05937337024110402, "grad_norm": 12.256035804748535, "learning_rate": 9.952223778043507e-05, "loss": 36.5859, "step": 370 }, { "epoch": 0.059533838809323225, "grad_norm": 12.106677055358887, "learning_rate": 9.951869794654724e-05, "loss": 36.5781, "step": 371 }, { "epoch": 0.05969430737754242, "grad_norm": 12.249667167663574, "learning_rate": 9.951514511077443e-05, "loss": 36.5469, "step": 372 }, { "epoch": 0.05985477594576162, "grad_norm": 11.892324447631836, "learning_rate": 9.951157927404945e-05, "loss": 36.5312, "step": 373 }, { "epoch": 0.060015244513980825, "grad_norm": 12.277322769165039, "learning_rate": 9.95080004373086e-05, "loss": 36.5234, "step": 374 }, { "epoch": 0.06017571308220002, "grad_norm": 12.357880592346191, "learning_rate": 9.950440860149158e-05, "loss": 36.3906, "step": 375 }, { "epoch": 0.06033618165041922, "grad_norm": 11.917757034301758, "learning_rate": 9.950080376754141e-05, "loss": 36.6016, "step": 376 }, { "epoch": 0.060496650218638426, "grad_norm": 12.03747844696045, "learning_rate": 9.949718593640462e-05, "loss": 36.4297, "step": 377 }, { "epoch": 0.060657118786857624, "grad_norm": 12.218464851379395, "learning_rate": 9.949355510903116e-05, "loss": 36.3281, "step": 378 }, { "epoch": 0.06081758735507682, "grad_norm": 12.337445259094238, "learning_rate": 9.948991128637433e-05, "loss": 36.5, "step": 379 }, { "epoch": 0.060978055923296026, "grad_norm": 12.734203338623047, "learning_rate": 9.948625446939086e-05, "loss": 36.3516, "step": 380 }, { "epoch": 0.061138524491515224, "grad_norm": 12.509225845336914, "learning_rate": 9.948258465904093e-05, "loss": 36.3594, "step": 381 }, { "epoch": 0.06129899305973442, "grad_norm": 12.046271324157715, "learning_rate": 9.94789018562881e-05, "loss": 36.3281, "step": 382 }, { "epoch": 0.06145946162795363, "grad_norm": 12.501344680786133, "learning_rate": 9.947520606209933e-05, "loss": 36.3438, "step": 383 }, { "epoch": 0.061619930196172824, "grad_norm": 12.364696502685547, "learning_rate": 9.947149727744501e-05, "loss": 36.4375, "step": 384 }, { "epoch": 0.06178039876439202, "grad_norm": 12.215829849243164, "learning_rate": 9.946777550329895e-05, "loss": 36.2734, "step": 385 }, { "epoch": 0.06194086733261123, "grad_norm": 12.43471908569336, "learning_rate": 9.946404074063836e-05, "loss": 36.3359, "step": 386 }, { "epoch": 0.062101335900830425, "grad_norm": 12.36361026763916, "learning_rate": 9.946029299044384e-05, "loss": 36.2266, "step": 387 }, { "epoch": 0.06226180446904962, "grad_norm": 12.348123550415039, "learning_rate": 9.945653225369943e-05, "loss": 36.4141, "step": 388 }, { "epoch": 0.06242227303726883, "grad_norm": 12.101619720458984, "learning_rate": 9.945275853139257e-05, "loss": 36.2578, "step": 389 }, { "epoch": 0.06258274160548802, "grad_norm": 12.602407455444336, "learning_rate": 9.94489718245141e-05, "loss": 36.0781, "step": 390 }, { "epoch": 0.06274321017370722, "grad_norm": 11.95312786102295, "learning_rate": 9.944517213405829e-05, "loss": 36.2109, "step": 391 }, { "epoch": 0.06290367874192643, "grad_norm": 12.23216438293457, "learning_rate": 9.94413594610228e-05, "loss": 36.25, "step": 392 }, { "epoch": 0.06306414731014562, "grad_norm": 12.176434516906738, "learning_rate": 9.94375338064087e-05, "loss": 36.2578, "step": 393 }, { "epoch": 0.06322461587836482, "grad_norm": 12.355364799499512, "learning_rate": 9.943369517122047e-05, "loss": 36.1641, "step": 394 }, { "epoch": 0.06338508444658403, "grad_norm": 12.766522407531738, "learning_rate": 9.942984355646601e-05, "loss": 36.0781, "step": 395 }, { "epoch": 0.06354555301480322, "grad_norm": 12.137805938720703, "learning_rate": 9.942597896315664e-05, "loss": 36.3125, "step": 396 }, { "epoch": 0.06370602158302242, "grad_norm": 12.293081283569336, "learning_rate": 9.942210139230704e-05, "loss": 36.2109, "step": 397 }, { "epoch": 0.06386649015124163, "grad_norm": 12.604049682617188, "learning_rate": 9.94182108449353e-05, "loss": 36.2188, "step": 398 }, { "epoch": 0.06402695871946082, "grad_norm": 12.311972618103027, "learning_rate": 9.9414307322063e-05, "loss": 36.3516, "step": 399 }, { "epoch": 0.06418742728768002, "grad_norm": 12.530227661132812, "learning_rate": 9.941039082471506e-05, "loss": 36.6016, "step": 400 }, { "epoch": 0.06434789585589923, "grad_norm": 12.469507217407227, "learning_rate": 9.940646135391978e-05, "loss": 36.2812, "step": 401 }, { "epoch": 0.06450836442411842, "grad_norm": 12.82365608215332, "learning_rate": 9.940251891070893e-05, "loss": 36.5078, "step": 402 }, { "epoch": 0.06466883299233762, "grad_norm": 12.25727653503418, "learning_rate": 9.939856349611762e-05, "loss": 36.2812, "step": 403 }, { "epoch": 0.06482930156055683, "grad_norm": 12.158031463623047, "learning_rate": 9.939459511118446e-05, "loss": 36.3125, "step": 404 }, { "epoch": 0.06498977012877602, "grad_norm": 12.65229320526123, "learning_rate": 9.939061375695137e-05, "loss": 36.2031, "step": 405 }, { "epoch": 0.06515023869699522, "grad_norm": 12.177111625671387, "learning_rate": 9.938661943446375e-05, "loss": 36.0859, "step": 406 }, { "epoch": 0.06531070726521443, "grad_norm": 12.38880729675293, "learning_rate": 9.938261214477034e-05, "loss": 36.1406, "step": 407 }, { "epoch": 0.06547117583343362, "grad_norm": 12.24881649017334, "learning_rate": 9.937859188892331e-05, "loss": 35.9844, "step": 408 }, { "epoch": 0.06563164440165282, "grad_norm": 12.230077743530273, "learning_rate": 9.937455866797825e-05, "loss": 36.0312, "step": 409 }, { "epoch": 0.06579211296987203, "grad_norm": 12.195962905883789, "learning_rate": 9.937051248299416e-05, "loss": 35.8672, "step": 410 }, { "epoch": 0.06595258153809122, "grad_norm": 12.746848106384277, "learning_rate": 9.936645333503341e-05, "loss": 35.9609, "step": 411 }, { "epoch": 0.06611305010631043, "grad_norm": 12.269394874572754, "learning_rate": 9.936238122516179e-05, "loss": 36.1172, "step": 412 }, { "epoch": 0.06627351867452963, "grad_norm": 12.390003204345703, "learning_rate": 9.935829615444849e-05, "loss": 35.9609, "step": 413 }, { "epoch": 0.06643398724274882, "grad_norm": 12.163464546203613, "learning_rate": 9.935419812396612e-05, "loss": 36.0391, "step": 414 }, { "epoch": 0.06659445581096803, "grad_norm": 12.367769241333008, "learning_rate": 9.935008713479068e-05, "loss": 35.8594, "step": 415 }, { "epoch": 0.06675492437918723, "grad_norm": 12.199562072753906, "learning_rate": 9.934596318800157e-05, "loss": 35.9219, "step": 416 }, { "epoch": 0.06691539294740642, "grad_norm": 12.359149932861328, "learning_rate": 9.934182628468158e-05, "loss": 35.8828, "step": 417 }, { "epoch": 0.06707586151562563, "grad_norm": 12.234763145446777, "learning_rate": 9.933767642591693e-05, "loss": 35.875, "step": 418 }, { "epoch": 0.06723633008384483, "grad_norm": 12.091044425964355, "learning_rate": 9.933351361279724e-05, "loss": 35.9766, "step": 419 }, { "epoch": 0.06739679865206402, "grad_norm": 12.161142349243164, "learning_rate": 9.93293378464155e-05, "loss": 35.8281, "step": 420 }, { "epoch": 0.06755726722028323, "grad_norm": 12.21980094909668, "learning_rate": 9.932514912786812e-05, "loss": 35.8281, "step": 421 }, { "epoch": 0.06771773578850243, "grad_norm": 11.930070877075195, "learning_rate": 9.932094745825492e-05, "loss": 35.8906, "step": 422 }, { "epoch": 0.06787820435672162, "grad_norm": 12.06513500213623, "learning_rate": 9.931673283867911e-05, "loss": 35.8359, "step": 423 }, { "epoch": 0.06803867292494083, "grad_norm": 12.261039733886719, "learning_rate": 9.931250527024731e-05, "loss": 35.7891, "step": 424 }, { "epoch": 0.06819914149316003, "grad_norm": 12.579341888427734, "learning_rate": 9.930826475406953e-05, "loss": 35.7266, "step": 425 }, { "epoch": 0.06835961006137922, "grad_norm": 12.304201126098633, "learning_rate": 9.930401129125917e-05, "loss": 35.7422, "step": 426 }, { "epoch": 0.06852007862959843, "grad_norm": 12.166975975036621, "learning_rate": 9.929974488293306e-05, "loss": 35.6719, "step": 427 }, { "epoch": 0.06868054719781763, "grad_norm": 12.154206275939941, "learning_rate": 9.929546553021139e-05, "loss": 35.6953, "step": 428 }, { "epoch": 0.06884101576603682, "grad_norm": 12.573437690734863, "learning_rate": 9.929117323421778e-05, "loss": 35.6484, "step": 429 }, { "epoch": 0.06900148433425603, "grad_norm": 12.141873359680176, "learning_rate": 9.928686799607923e-05, "loss": 35.5547, "step": 430 }, { "epoch": 0.06916195290247523, "grad_norm": 12.165328979492188, "learning_rate": 9.928254981692613e-05, "loss": 35.6562, "step": 431 }, { "epoch": 0.06932242147069442, "grad_norm": 12.017364501953125, "learning_rate": 9.927821869789234e-05, "loss": 35.6797, "step": 432 }, { "epoch": 0.06948289003891363, "grad_norm": 12.129446029663086, "learning_rate": 9.927387464011502e-05, "loss": 35.6641, "step": 433 }, { "epoch": 0.06964335860713283, "grad_norm": 12.105347633361816, "learning_rate": 9.926951764473475e-05, "loss": 35.6641, "step": 434 }, { "epoch": 0.06980382717535202, "grad_norm": 12.194924354553223, "learning_rate": 9.926514771289556e-05, "loss": 35.5859, "step": 435 }, { "epoch": 0.06996429574357123, "grad_norm": 12.420768737792969, "learning_rate": 9.926076484574485e-05, "loss": 35.5625, "step": 436 }, { "epoch": 0.07012476431179043, "grad_norm": 12.219247817993164, "learning_rate": 9.925636904443336e-05, "loss": 35.5781, "step": 437 }, { "epoch": 0.07028523288000962, "grad_norm": 12.361431121826172, "learning_rate": 9.925196031011532e-05, "loss": 35.6094, "step": 438 }, { "epoch": 0.07044570144822883, "grad_norm": 12.298192977905273, "learning_rate": 9.924753864394829e-05, "loss": 35.5078, "step": 439 }, { "epoch": 0.07060617001644803, "grad_norm": 12.398717880249023, "learning_rate": 9.924310404709325e-05, "loss": 35.3906, "step": 440 }, { "epoch": 0.07076663858466722, "grad_norm": 12.084007263183594, "learning_rate": 9.923865652071456e-05, "loss": 35.5, "step": 441 }, { "epoch": 0.07092710715288643, "grad_norm": 11.971172332763672, "learning_rate": 9.923419606598e-05, "loss": 35.6406, "step": 442 }, { "epoch": 0.07108757572110563, "grad_norm": 12.156078338623047, "learning_rate": 9.922972268406073e-05, "loss": 35.5234, "step": 443 }, { "epoch": 0.07124804428932482, "grad_norm": 12.32149600982666, "learning_rate": 9.922523637613129e-05, "loss": 35.5078, "step": 444 }, { "epoch": 0.07140851285754403, "grad_norm": 12.044044494628906, "learning_rate": 9.922073714336963e-05, "loss": 35.5625, "step": 445 }, { "epoch": 0.07156898142576323, "grad_norm": 12.317621231079102, "learning_rate": 9.921622498695712e-05, "loss": 35.4844, "step": 446 }, { "epoch": 0.07172944999398242, "grad_norm": 12.219581604003906, "learning_rate": 9.921169990807846e-05, "loss": 35.6094, "step": 447 }, { "epoch": 0.07188991856220163, "grad_norm": 12.420787811279297, "learning_rate": 9.92071619079218e-05, "loss": 35.6562, "step": 448 }, { "epoch": 0.07205038713042083, "grad_norm": 12.08934497833252, "learning_rate": 9.920261098767864e-05, "loss": 35.4219, "step": 449 }, { "epoch": 0.07221085569864003, "grad_norm": 12.444972038269043, "learning_rate": 9.919804714854392e-05, "loss": 36.0, "step": 450 }, { "epoch": 0.07237132426685923, "grad_norm": 12.514429092407227, "learning_rate": 9.919347039171593e-05, "loss": 35.8438, "step": 451 }, { "epoch": 0.07253179283507843, "grad_norm": 12.370487213134766, "learning_rate": 9.918888071839636e-05, "loss": 35.7266, "step": 452 }, { "epoch": 0.07269226140329763, "grad_norm": 12.159078598022461, "learning_rate": 9.918427812979031e-05, "loss": 35.5547, "step": 453 }, { "epoch": 0.07285272997151683, "grad_norm": 12.154401779174805, "learning_rate": 9.917966262710625e-05, "loss": 35.5469, "step": 454 }, { "epoch": 0.07301319853973604, "grad_norm": 12.296919822692871, "learning_rate": 9.917503421155606e-05, "loss": 35.4922, "step": 455 }, { "epoch": 0.07317366710795523, "grad_norm": 12.467832565307617, "learning_rate": 9.9170392884355e-05, "loss": 35.6719, "step": 456 }, { "epoch": 0.07333413567617443, "grad_norm": 12.33187198638916, "learning_rate": 9.916573864672171e-05, "loss": 35.5078, "step": 457 }, { "epoch": 0.07349460424439364, "grad_norm": 12.115418434143066, "learning_rate": 9.916107149987824e-05, "loss": 35.5078, "step": 458 }, { "epoch": 0.07365507281261283, "grad_norm": 12.259269714355469, "learning_rate": 9.915639144505002e-05, "loss": 35.4688, "step": 459 }, { "epoch": 0.07381554138083203, "grad_norm": 12.4638090133667, "learning_rate": 9.915169848346588e-05, "loss": 35.3125, "step": 460 }, { "epoch": 0.07397600994905124, "grad_norm": 12.091376304626465, "learning_rate": 9.914699261635801e-05, "loss": 35.5547, "step": 461 }, { "epoch": 0.07413647851727043, "grad_norm": 12.563615798950195, "learning_rate": 9.9142273844962e-05, "loss": 35.4844, "step": 462 }, { "epoch": 0.07429694708548963, "grad_norm": 12.313272476196289, "learning_rate": 9.913754217051685e-05, "loss": 35.4062, "step": 463 }, { "epoch": 0.07445741565370884, "grad_norm": 12.137652397155762, "learning_rate": 9.913279759426492e-05, "loss": 35.4922, "step": 464 }, { "epoch": 0.07461788422192803, "grad_norm": 12.172255516052246, "learning_rate": 9.912804011745201e-05, "loss": 35.4297, "step": 465 }, { "epoch": 0.07477835279014723, "grad_norm": 12.195493698120117, "learning_rate": 9.912326974132722e-05, "loss": 35.3438, "step": 466 }, { "epoch": 0.07493882135836644, "grad_norm": 12.169876098632812, "learning_rate": 9.911848646714311e-05, "loss": 35.2812, "step": 467 }, { "epoch": 0.07509928992658563, "grad_norm": 12.396075248718262, "learning_rate": 9.911369029615558e-05, "loss": 35.2344, "step": 468 }, { "epoch": 0.07525975849480483, "grad_norm": 12.177727699279785, "learning_rate": 9.910888122962397e-05, "loss": 35.1641, "step": 469 }, { "epoch": 0.07542022706302404, "grad_norm": 12.209211349487305, "learning_rate": 9.910405926881095e-05, "loss": 35.2734, "step": 470 }, { "epoch": 0.07558069563124323, "grad_norm": 12.529163360595703, "learning_rate": 9.90992244149826e-05, "loss": 35.1484, "step": 471 }, { "epoch": 0.07574116419946243, "grad_norm": 12.11865234375, "learning_rate": 9.909437666940839e-05, "loss": 35.4844, "step": 472 }, { "epoch": 0.07590163276768164, "grad_norm": 12.152861595153809, "learning_rate": 9.908951603336117e-05, "loss": 35.125, "step": 473 }, { "epoch": 0.07606210133590083, "grad_norm": 12.433799743652344, "learning_rate": 9.908464250811716e-05, "loss": 35.0469, "step": 474 }, { "epoch": 0.07622256990412003, "grad_norm": 12.59202766418457, "learning_rate": 9.9079756094956e-05, "loss": 35.0859, "step": 475 }, { "epoch": 0.07638303847233924, "grad_norm": 12.149296760559082, "learning_rate": 9.907485679516068e-05, "loss": 35.1719, "step": 476 }, { "epoch": 0.07654350704055843, "grad_norm": 12.246817588806152, "learning_rate": 9.906994461001759e-05, "loss": 35.1016, "step": 477 }, { "epoch": 0.07670397560877763, "grad_norm": 12.447131156921387, "learning_rate": 9.906501954081649e-05, "loss": 35.1484, "step": 478 }, { "epoch": 0.07686444417699682, "grad_norm": 12.15877628326416, "learning_rate": 9.906008158885052e-05, "loss": 35.0703, "step": 479 }, { "epoch": 0.07702491274521603, "grad_norm": 12.149840354919434, "learning_rate": 9.905513075541625e-05, "loss": 35.0703, "step": 480 }, { "epoch": 0.07718538131343523, "grad_norm": 11.93680477142334, "learning_rate": 9.905016704181358e-05, "loss": 35.2812, "step": 481 }, { "epoch": 0.07734584988165442, "grad_norm": 12.220955848693848, "learning_rate": 9.90451904493458e-05, "loss": 35.2109, "step": 482 }, { "epoch": 0.07750631844987363, "grad_norm": 12.348457336425781, "learning_rate": 9.904020097931959e-05, "loss": 35.0469, "step": 483 }, { "epoch": 0.07766678701809283, "grad_norm": 12.213186264038086, "learning_rate": 9.9035198633045e-05, "loss": 35.0703, "step": 484 }, { "epoch": 0.07782725558631202, "grad_norm": 12.057159423828125, "learning_rate": 9.903018341183552e-05, "loss": 35.2734, "step": 485 }, { "epoch": 0.07798772415453123, "grad_norm": 12.094229698181152, "learning_rate": 9.902515531700793e-05, "loss": 35.1172, "step": 486 }, { "epoch": 0.07814819272275043, "grad_norm": 12.033550262451172, "learning_rate": 9.902011434988242e-05, "loss": 35.2031, "step": 487 }, { "epoch": 0.07830866129096963, "grad_norm": 12.021283149719238, "learning_rate": 9.901506051178264e-05, "loss": 35.0859, "step": 488 }, { "epoch": 0.07846912985918883, "grad_norm": 12.039957046508789, "learning_rate": 9.900999380403548e-05, "loss": 35.2109, "step": 489 }, { "epoch": 0.07862959842740803, "grad_norm": 12.530855178833008, "learning_rate": 9.900491422797131e-05, "loss": 35.0469, "step": 490 }, { "epoch": 0.07879006699562723, "grad_norm": 11.987561225891113, "learning_rate": 9.899982178492383e-05, "loss": 35.2656, "step": 491 }, { "epoch": 0.07895053556384643, "grad_norm": 12.387149810791016, "learning_rate": 9.89947164762302e-05, "loss": 35.0781, "step": 492 }, { "epoch": 0.07911100413206563, "grad_norm": 12.41811752319336, "learning_rate": 9.898959830323082e-05, "loss": 35.0156, "step": 493 }, { "epoch": 0.07927147270028483, "grad_norm": 12.28010082244873, "learning_rate": 9.898446726726958e-05, "loss": 35.0469, "step": 494 }, { "epoch": 0.07943194126850403, "grad_norm": 12.169075965881348, "learning_rate": 9.897932336969371e-05, "loss": 35.1328, "step": 495 }, { "epoch": 0.07959240983672324, "grad_norm": 12.544301986694336, "learning_rate": 9.897416661185381e-05, "loss": 35.2031, "step": 496 }, { "epoch": 0.07975287840494243, "grad_norm": 12.022409439086914, "learning_rate": 9.896899699510388e-05, "loss": 35.0391, "step": 497 }, { "epoch": 0.07991334697316163, "grad_norm": 12.147648811340332, "learning_rate": 9.896381452080128e-05, "loss": 35.0391, "step": 498 }, { "epoch": 0.08007381554138084, "grad_norm": 12.409008026123047, "learning_rate": 9.895861919030672e-05, "loss": 35.3594, "step": 499 }, { "epoch": 0.08023428410960003, "grad_norm": 12.492522239685059, "learning_rate": 9.895341100498435e-05, "loss": 35.6094, "step": 500 }, { "epoch": 0.08039475267781923, "grad_norm": 12.676169395446777, "learning_rate": 9.894818996620161e-05, "loss": 35.6875, "step": 501 }, { "epoch": 0.08055522124603844, "grad_norm": 12.468733787536621, "learning_rate": 9.894295607532942e-05, "loss": 35.1953, "step": 502 }, { "epoch": 0.08071568981425763, "grad_norm": 12.312492370605469, "learning_rate": 9.893770933374198e-05, "loss": 35.1797, "step": 503 }, { "epoch": 0.08087615838247683, "grad_norm": 11.92618465423584, "learning_rate": 9.893244974281692e-05, "loss": 35.2266, "step": 504 }, { "epoch": 0.08103662695069604, "grad_norm": 12.261231422424316, "learning_rate": 9.89271773039352e-05, "loss": 34.9609, "step": 505 }, { "epoch": 0.08119709551891523, "grad_norm": 12.08036994934082, "learning_rate": 9.892189201848121e-05, "loss": 35.2188, "step": 506 }, { "epoch": 0.08135756408713443, "grad_norm": 12.09913444519043, "learning_rate": 9.891659388784267e-05, "loss": 35.1797, "step": 507 }, { "epoch": 0.08151803265535364, "grad_norm": 12.171730995178223, "learning_rate": 9.891128291341068e-05, "loss": 34.9766, "step": 508 }, { "epoch": 0.08167850122357283, "grad_norm": 12.164820671081543, "learning_rate": 9.890595909657972e-05, "loss": 34.9062, "step": 509 }, { "epoch": 0.08183896979179203, "grad_norm": 12.035665512084961, "learning_rate": 9.890062243874763e-05, "loss": 35.0156, "step": 510 }, { "epoch": 0.08199943836001124, "grad_norm": 12.602537155151367, "learning_rate": 9.889527294131563e-05, "loss": 34.8906, "step": 511 }, { "epoch": 0.08215990692823043, "grad_norm": 12.344341278076172, "learning_rate": 9.888991060568833e-05, "loss": 34.8672, "step": 512 }, { "epoch": 0.08232037549644963, "grad_norm": 12.184473991394043, "learning_rate": 9.88845354332737e-05, "loss": 35.0312, "step": 513 }, { "epoch": 0.08248084406466884, "grad_norm": 12.55515193939209, "learning_rate": 9.887914742548304e-05, "loss": 34.9219, "step": 514 }, { "epoch": 0.08264131263288803, "grad_norm": 12.182931900024414, "learning_rate": 9.887374658373105e-05, "loss": 34.9219, "step": 515 }, { "epoch": 0.08280178120110723, "grad_norm": 12.410592079162598, "learning_rate": 9.886833290943585e-05, "loss": 34.9141, "step": 516 }, { "epoch": 0.08296224976932644, "grad_norm": 12.317026138305664, "learning_rate": 9.886290640401885e-05, "loss": 34.8594, "step": 517 }, { "epoch": 0.08312271833754563, "grad_norm": 12.168461799621582, "learning_rate": 9.885746706890485e-05, "loss": 34.9297, "step": 518 }, { "epoch": 0.08328318690576483, "grad_norm": 12.341252326965332, "learning_rate": 9.885201490552206e-05, "loss": 34.9531, "step": 519 }, { "epoch": 0.08344365547398404, "grad_norm": 12.287508010864258, "learning_rate": 9.8846549915302e-05, "loss": 34.7734, "step": 520 }, { "epoch": 0.08360412404220323, "grad_norm": 12.04936695098877, "learning_rate": 9.88410720996796e-05, "loss": 35.1719, "step": 521 }, { "epoch": 0.08376459261042243, "grad_norm": 12.097387313842773, "learning_rate": 9.883558146009317e-05, "loss": 34.8828, "step": 522 }, { "epoch": 0.08392506117864164, "grad_norm": 12.176275253295898, "learning_rate": 9.88300779979843e-05, "loss": 34.8984, "step": 523 }, { "epoch": 0.08408552974686083, "grad_norm": 12.18079662322998, "learning_rate": 9.882456171479806e-05, "loss": 34.7969, "step": 524 }, { "epoch": 0.08424599831508003, "grad_norm": 12.45856761932373, "learning_rate": 9.88190326119828e-05, "loss": 34.9062, "step": 525 }, { "epoch": 0.08440646688329924, "grad_norm": 12.28471565246582, "learning_rate": 9.881349069099031e-05, "loss": 34.8047, "step": 526 }, { "epoch": 0.08456693545151843, "grad_norm": 12.21817398071289, "learning_rate": 9.880793595327566e-05, "loss": 34.8359, "step": 527 }, { "epoch": 0.08472740401973763, "grad_norm": 12.276748657226562, "learning_rate": 9.880236840029735e-05, "loss": 34.7656, "step": 528 }, { "epoch": 0.08488787258795684, "grad_norm": 12.295353889465332, "learning_rate": 9.879678803351725e-05, "loss": 34.7734, "step": 529 }, { "epoch": 0.08504834115617603, "grad_norm": 12.102806091308594, "learning_rate": 9.879119485440052e-05, "loss": 34.9062, "step": 530 }, { "epoch": 0.08520880972439523, "grad_norm": 12.017329216003418, "learning_rate": 9.878558886441576e-05, "loss": 34.9141, "step": 531 }, { "epoch": 0.08536927829261444, "grad_norm": 12.897024154663086, "learning_rate": 9.877997006503492e-05, "loss": 34.6562, "step": 532 }, { "epoch": 0.08552974686083363, "grad_norm": 12.535922050476074, "learning_rate": 9.877433845773328e-05, "loss": 34.7031, "step": 533 }, { "epoch": 0.08569021542905284, "grad_norm": 12.05150032043457, "learning_rate": 9.87686940439895e-05, "loss": 34.9297, "step": 534 }, { "epoch": 0.08585068399727204, "grad_norm": 12.20790958404541, "learning_rate": 9.876303682528565e-05, "loss": 34.8047, "step": 535 }, { "epoch": 0.08601115256549123, "grad_norm": 12.061967849731445, "learning_rate": 9.875736680310707e-05, "loss": 34.8438, "step": 536 }, { "epoch": 0.08617162113371044, "grad_norm": 12.17227554321289, "learning_rate": 9.875168397894253e-05, "loss": 34.8906, "step": 537 }, { "epoch": 0.08633208970192964, "grad_norm": 12.538081169128418, "learning_rate": 9.874598835428412e-05, "loss": 34.7891, "step": 538 }, { "epoch": 0.08649255827014883, "grad_norm": 12.091554641723633, "learning_rate": 9.874027993062735e-05, "loss": 34.7266, "step": 539 }, { "epoch": 0.08665302683836804, "grad_norm": 12.468729972839355, "learning_rate": 9.873455870947104e-05, "loss": 34.7422, "step": 540 }, { "epoch": 0.08681349540658724, "grad_norm": 12.20627498626709, "learning_rate": 9.872882469231739e-05, "loss": 34.8359, "step": 541 }, { "epoch": 0.08697396397480643, "grad_norm": 12.198914527893066, "learning_rate": 9.872307788067192e-05, "loss": 34.7188, "step": 542 }, { "epoch": 0.08713443254302564, "grad_norm": 12.22457504272461, "learning_rate": 9.871731827604357e-05, "loss": 34.8984, "step": 543 }, { "epoch": 0.08729490111124484, "grad_norm": 12.708807945251465, "learning_rate": 9.87115458799446e-05, "loss": 34.6641, "step": 544 }, { "epoch": 0.08745536967946403, "grad_norm": 12.174970626831055, "learning_rate": 9.870576069389066e-05, "loss": 34.8438, "step": 545 }, { "epoch": 0.08761583824768324, "grad_norm": 12.235054969787598, "learning_rate": 9.869996271940072e-05, "loss": 34.8594, "step": 546 }, { "epoch": 0.08777630681590244, "grad_norm": 12.654104232788086, "learning_rate": 9.869415195799712e-05, "loss": 34.9297, "step": 547 }, { "epoch": 0.08793677538412163, "grad_norm": 12.552769660949707, "learning_rate": 9.868832841120558e-05, "loss": 34.7422, "step": 548 }, { "epoch": 0.08809724395234084, "grad_norm": 12.017119407653809, "learning_rate": 9.868249208055515e-05, "loss": 34.8828, "step": 549 }, { "epoch": 0.08825771252056004, "grad_norm": 12.209840774536133, "learning_rate": 9.867664296757826e-05, "loss": 35.3125, "step": 550 }, { "epoch": 0.08841818108877923, "grad_norm": 12.497334480285645, "learning_rate": 9.867078107381067e-05, "loss": 35.0859, "step": 551 }, { "epoch": 0.08857864965699844, "grad_norm": 12.699776649475098, "learning_rate": 9.866490640079152e-05, "loss": 34.8281, "step": 552 }, { "epoch": 0.08873911822521764, "grad_norm": 12.140986442565918, "learning_rate": 9.865901895006327e-05, "loss": 34.8125, "step": 553 }, { "epoch": 0.08889958679343683, "grad_norm": 12.189597129821777, "learning_rate": 9.86531187231718e-05, "loss": 35.0312, "step": 554 }, { "epoch": 0.08906005536165604, "grad_norm": 12.243202209472656, "learning_rate": 9.864720572166627e-05, "loss": 34.9453, "step": 555 }, { "epoch": 0.08922052392987524, "grad_norm": 12.526098251342773, "learning_rate": 9.864127994709924e-05, "loss": 34.7734, "step": 556 }, { "epoch": 0.08938099249809443, "grad_norm": 12.413402557373047, "learning_rate": 9.863534140102661e-05, "loss": 34.6328, "step": 557 }, { "epoch": 0.08954146106631364, "grad_norm": 12.270722389221191, "learning_rate": 9.862939008500765e-05, "loss": 34.7734, "step": 558 }, { "epoch": 0.08970192963453284, "grad_norm": 12.025915145874023, "learning_rate": 9.862342600060494e-05, "loss": 34.6953, "step": 559 }, { "epoch": 0.08986239820275203, "grad_norm": 12.03973388671875, "learning_rate": 9.861744914938447e-05, "loss": 34.6562, "step": 560 }, { "epoch": 0.09002286677097124, "grad_norm": 12.09620475769043, "learning_rate": 9.861145953291553e-05, "loss": 34.5703, "step": 561 }, { "epoch": 0.09018333533919044, "grad_norm": 12.38181209564209, "learning_rate": 9.860545715277078e-05, "loss": 34.6562, "step": 562 }, { "epoch": 0.09034380390740963, "grad_norm": 11.915289878845215, "learning_rate": 9.85994420105263e-05, "loss": 34.7578, "step": 563 }, { "epoch": 0.09050427247562884, "grad_norm": 12.051196098327637, "learning_rate": 9.859341410776136e-05, "loss": 34.6328, "step": 564 }, { "epoch": 0.09066474104384803, "grad_norm": 12.146503448486328, "learning_rate": 9.858737344605873e-05, "loss": 34.7344, "step": 565 }, { "epoch": 0.09082520961206723, "grad_norm": 11.932822227478027, "learning_rate": 9.858132002700447e-05, "loss": 34.7656, "step": 566 }, { "epoch": 0.09098567818028644, "grad_norm": 12.16544246673584, "learning_rate": 9.857525385218802e-05, "loss": 34.5625, "step": 567 }, { "epoch": 0.09114614674850563, "grad_norm": 12.405448913574219, "learning_rate": 9.85691749232021e-05, "loss": 34.5, "step": 568 }, { "epoch": 0.09130661531672483, "grad_norm": 12.151448249816895, "learning_rate": 9.856308324164288e-05, "loss": 34.6328, "step": 569 }, { "epoch": 0.09146708388494404, "grad_norm": 12.304437637329102, "learning_rate": 9.855697880910976e-05, "loss": 34.7266, "step": 570 }, { "epoch": 0.09162755245316323, "grad_norm": 12.095673561096191, "learning_rate": 9.855086162720559e-05, "loss": 34.5938, "step": 571 }, { "epoch": 0.09178802102138243, "grad_norm": 12.207725524902344, "learning_rate": 9.854473169753653e-05, "loss": 34.5312, "step": 572 }, { "epoch": 0.09194848958960164, "grad_norm": 12.089463233947754, "learning_rate": 9.853858902171209e-05, "loss": 34.5391, "step": 573 }, { "epoch": 0.09210895815782083, "grad_norm": 12.341694831848145, "learning_rate": 9.85324336013451e-05, "loss": 34.4453, "step": 574 }, { "epoch": 0.09226942672604004, "grad_norm": 12.247749328613281, "learning_rate": 9.852626543805177e-05, "loss": 34.5078, "step": 575 }, { "epoch": 0.09242989529425924, "grad_norm": 12.044300079345703, "learning_rate": 9.852008453345163e-05, "loss": 34.6406, "step": 576 }, { "epoch": 0.09259036386247843, "grad_norm": 12.149894714355469, "learning_rate": 9.851389088916761e-05, "loss": 34.6172, "step": 577 }, { "epoch": 0.09275083243069764, "grad_norm": 12.153769493103027, "learning_rate": 9.85076845068259e-05, "loss": 34.5234, "step": 578 }, { "epoch": 0.09291130099891684, "grad_norm": 12.534831047058105, "learning_rate": 9.850146538805612e-05, "loss": 34.4766, "step": 579 }, { "epoch": 0.09307176956713603, "grad_norm": 12.208760261535645, "learning_rate": 9.849523353449117e-05, "loss": 34.5391, "step": 580 }, { "epoch": 0.09323223813535524, "grad_norm": 12.328667640686035, "learning_rate": 9.848898894776733e-05, "loss": 34.4375, "step": 581 }, { "epoch": 0.09339270670357444, "grad_norm": 12.170989990234375, "learning_rate": 9.848273162952419e-05, "loss": 34.5703, "step": 582 }, { "epoch": 0.09355317527179363, "grad_norm": 12.367341995239258, "learning_rate": 9.847646158140471e-05, "loss": 34.4375, "step": 583 }, { "epoch": 0.09371364384001284, "grad_norm": 12.076321601867676, "learning_rate": 9.84701788050552e-05, "loss": 34.5156, "step": 584 }, { "epoch": 0.09387411240823204, "grad_norm": 12.086761474609375, "learning_rate": 9.84638833021253e-05, "loss": 34.6016, "step": 585 }, { "epoch": 0.09403458097645123, "grad_norm": 12.402017593383789, "learning_rate": 9.845757507426797e-05, "loss": 34.5078, "step": 586 }, { "epoch": 0.09419504954467044, "grad_norm": 12.41186809539795, "learning_rate": 9.845125412313955e-05, "loss": 34.5312, "step": 587 }, { "epoch": 0.09435551811288964, "grad_norm": 12.10059642791748, "learning_rate": 9.844492045039968e-05, "loss": 34.6328, "step": 588 }, { "epoch": 0.09451598668110883, "grad_norm": 12.087583541870117, "learning_rate": 9.84385740577114e-05, "loss": 34.4922, "step": 589 }, { "epoch": 0.09467645524932804, "grad_norm": 12.034614562988281, "learning_rate": 9.8432214946741e-05, "loss": 34.6562, "step": 590 }, { "epoch": 0.09483692381754724, "grad_norm": 12.167352676391602, "learning_rate": 9.84258431191582e-05, "loss": 34.6016, "step": 591 }, { "epoch": 0.09499739238576643, "grad_norm": 12.198674201965332, "learning_rate": 9.841945857663601e-05, "loss": 34.4688, "step": 592 }, { "epoch": 0.09515786095398564, "grad_norm": 12.177042961120605, "learning_rate": 9.841306132085079e-05, "loss": 34.5547, "step": 593 }, { "epoch": 0.09531832952220484, "grad_norm": 12.534533500671387, "learning_rate": 9.840665135348221e-05, "loss": 34.4844, "step": 594 }, { "epoch": 0.09547879809042403, "grad_norm": 12.698091506958008, "learning_rate": 9.840022867621335e-05, "loss": 34.4531, "step": 595 }, { "epoch": 0.09563926665864324, "grad_norm": 12.146955490112305, "learning_rate": 9.839379329073055e-05, "loss": 34.4922, "step": 596 }, { "epoch": 0.09579973522686244, "grad_norm": 12.454896926879883, "learning_rate": 9.838734519872352e-05, "loss": 34.4609, "step": 597 }, { "epoch": 0.09596020379508163, "grad_norm": 12.154399871826172, "learning_rate": 9.838088440188533e-05, "loss": 34.5859, "step": 598 }, { "epoch": 0.09612067236330084, "grad_norm": 12.256073951721191, "learning_rate": 9.837441090191233e-05, "loss": 34.7812, "step": 599 }, { "epoch": 0.09628114093152004, "grad_norm": 12.712606430053711, "learning_rate": 9.836792470050424e-05, "loss": 34.7969, "step": 600 }, { "epoch": 0.09644160949973923, "grad_norm": 12.490970611572266, "learning_rate": 9.836142579936413e-05, "loss": 34.9062, "step": 601 }, { "epoch": 0.09660207806795844, "grad_norm": 12.462943077087402, "learning_rate": 9.835491420019836e-05, "loss": 34.7188, "step": 602 }, { "epoch": 0.09676254663617764, "grad_norm": 12.356083869934082, "learning_rate": 9.834838990471667e-05, "loss": 34.875, "step": 603 }, { "epoch": 0.09692301520439683, "grad_norm": 12.270198822021484, "learning_rate": 9.83418529146321e-05, "loss": 34.7266, "step": 604 }, { "epoch": 0.09708348377261604, "grad_norm": 12.058670043945312, "learning_rate": 9.833530323166104e-05, "loss": 34.5469, "step": 605 }, { "epoch": 0.09724395234083524, "grad_norm": 12.59762954711914, "learning_rate": 9.83287408575232e-05, "loss": 34.6953, "step": 606 }, { "epoch": 0.09740442090905443, "grad_norm": 12.317266464233398, "learning_rate": 9.832216579394164e-05, "loss": 34.5156, "step": 607 }, { "epoch": 0.09756488947727364, "grad_norm": 12.178384780883789, "learning_rate": 9.831557804264275e-05, "loss": 34.5938, "step": 608 }, { "epoch": 0.09772535804549284, "grad_norm": 12.16208267211914, "learning_rate": 9.830897760535624e-05, "loss": 34.6094, "step": 609 }, { "epoch": 0.09788582661371203, "grad_norm": 12.441542625427246, "learning_rate": 9.830236448381514e-05, "loss": 34.6953, "step": 610 }, { "epoch": 0.09804629518193124, "grad_norm": 12.237664222717285, "learning_rate": 9.829573867975584e-05, "loss": 34.5703, "step": 611 }, { "epoch": 0.09820676375015044, "grad_norm": 12.246814727783203, "learning_rate": 9.828910019491804e-05, "loss": 34.5469, "step": 612 }, { "epoch": 0.09836723231836964, "grad_norm": 11.903926849365234, "learning_rate": 9.828244903104477e-05, "loss": 34.5234, "step": 613 }, { "epoch": 0.09852770088658884, "grad_norm": 12.153425216674805, "learning_rate": 9.827578518988241e-05, "loss": 34.6094, "step": 614 }, { "epoch": 0.09868816945480804, "grad_norm": 12.030207633972168, "learning_rate": 9.826910867318065e-05, "loss": 34.4766, "step": 615 }, { "epoch": 0.09884863802302724, "grad_norm": 12.453965187072754, "learning_rate": 9.826241948269248e-05, "loss": 34.5547, "step": 616 }, { "epoch": 0.09900910659124644, "grad_norm": 12.30009937286377, "learning_rate": 9.825571762017429e-05, "loss": 34.4062, "step": 617 }, { "epoch": 0.09916957515946564, "grad_norm": 12.278019905090332, "learning_rate": 9.824900308738572e-05, "loss": 34.3828, "step": 618 }, { "epoch": 0.09933004372768484, "grad_norm": 12.14680290222168, "learning_rate": 9.824227588608981e-05, "loss": 34.4766, "step": 619 }, { "epoch": 0.09949051229590404, "grad_norm": 12.82495403289795, "learning_rate": 9.823553601805286e-05, "loss": 34.375, "step": 620 }, { "epoch": 0.09965098086412325, "grad_norm": 12.047040939331055, "learning_rate": 9.822878348504452e-05, "loss": 34.4297, "step": 621 }, { "epoch": 0.09981144943234244, "grad_norm": 12.175782203674316, "learning_rate": 9.822201828883779e-05, "loss": 34.4844, "step": 622 }, { "epoch": 0.09997191800056164, "grad_norm": 12.024333953857422, "learning_rate": 9.821524043120898e-05, "loss": 34.4453, "step": 623 }, { "epoch": 0.10013238656878085, "grad_norm": 12.26768970489502, "learning_rate": 9.82084499139377e-05, "loss": 34.3516, "step": 624 }, { "epoch": 0.10029285513700004, "grad_norm": 12.13764762878418, "learning_rate": 9.820164673880691e-05, "loss": 34.5, "step": 625 }, { "epoch": 0.10045332370521924, "grad_norm": 12.17335319519043, "learning_rate": 9.819483090760288e-05, "loss": 34.3594, "step": 626 }, { "epoch": 0.10061379227343845, "grad_norm": 12.6514253616333, "learning_rate": 9.818800242211523e-05, "loss": 34.375, "step": 627 }, { "epoch": 0.10077426084165764, "grad_norm": 12.086881637573242, "learning_rate": 9.818116128413686e-05, "loss": 34.4375, "step": 628 }, { "epoch": 0.10093472940987684, "grad_norm": 12.017769813537598, "learning_rate": 9.817430749546403e-05, "loss": 34.4375, "step": 629 }, { "epoch": 0.10109519797809605, "grad_norm": 12.038982391357422, "learning_rate": 9.81674410578963e-05, "loss": 34.4922, "step": 630 }, { "epoch": 0.10125566654631524, "grad_norm": 12.209297180175781, "learning_rate": 9.816056197323654e-05, "loss": 34.375, "step": 631 }, { "epoch": 0.10141613511453444, "grad_norm": 11.956053733825684, "learning_rate": 9.815367024329098e-05, "loss": 34.3594, "step": 632 }, { "epoch": 0.10157660368275365, "grad_norm": 12.142565727233887, "learning_rate": 9.814676586986916e-05, "loss": 34.25, "step": 633 }, { "epoch": 0.10173707225097284, "grad_norm": 12.295663833618164, "learning_rate": 9.813984885478391e-05, "loss": 34.25, "step": 634 }, { "epoch": 0.10189754081919204, "grad_norm": 12.281755447387695, "learning_rate": 9.81329191998514e-05, "loss": 34.4453, "step": 635 }, { "epoch": 0.10205800938741125, "grad_norm": 12.091012001037598, "learning_rate": 9.812597690689109e-05, "loss": 34.2969, "step": 636 }, { "epoch": 0.10221847795563044, "grad_norm": 12.0304594039917, "learning_rate": 9.811902197772582e-05, "loss": 34.3984, "step": 637 }, { "epoch": 0.10237894652384964, "grad_norm": 12.518059730529785, "learning_rate": 9.811205441418171e-05, "loss": 34.1328, "step": 638 }, { "epoch": 0.10253941509206885, "grad_norm": 12.164511680603027, "learning_rate": 9.810507421808819e-05, "loss": 34.3906, "step": 639 }, { "epoch": 0.10269988366028804, "grad_norm": 12.197732925415039, "learning_rate": 9.8098081391278e-05, "loss": 34.3203, "step": 640 }, { "epoch": 0.10286035222850724, "grad_norm": 12.277789115905762, "learning_rate": 9.809107593558724e-05, "loss": 34.3828, "step": 641 }, { "epoch": 0.10302082079672645, "grad_norm": 12.311266899108887, "learning_rate": 9.808405785285531e-05, "loss": 34.2812, "step": 642 }, { "epoch": 0.10318128936494564, "grad_norm": 12.181012153625488, "learning_rate": 9.807702714492486e-05, "loss": 34.2578, "step": 643 }, { "epoch": 0.10334175793316484, "grad_norm": 12.024508476257324, "learning_rate": 9.806998381364195e-05, "loss": 34.3984, "step": 644 }, { "epoch": 0.10350222650138405, "grad_norm": 12.232510566711426, "learning_rate": 9.806292786085591e-05, "loss": 34.4062, "step": 645 }, { "epoch": 0.10366269506960324, "grad_norm": 12.079463958740234, "learning_rate": 9.805585928841939e-05, "loss": 34.3047, "step": 646 }, { "epoch": 0.10382316363782244, "grad_norm": 12.233291625976562, "learning_rate": 9.80487780981883e-05, "loss": 34.4453, "step": 647 }, { "epoch": 0.10398363220604165, "grad_norm": 12.085198402404785, "learning_rate": 9.8041684292022e-05, "loss": 34.3516, "step": 648 }, { "epoch": 0.10414410077426084, "grad_norm": 12.490997314453125, "learning_rate": 9.803457787178304e-05, "loss": 34.2969, "step": 649 }, { "epoch": 0.10430456934248004, "grad_norm": 12.208477020263672, "learning_rate": 9.802745883933728e-05, "loss": 34.8906, "step": 650 }, { "epoch": 0.10446503791069923, "grad_norm": 12.280533790588379, "learning_rate": 9.802032719655396e-05, "loss": 34.9297, "step": 651 }, { "epoch": 0.10462550647891844, "grad_norm": 12.095348358154297, "learning_rate": 9.801318294530563e-05, "loss": 34.4922, "step": 652 }, { "epoch": 0.10478597504713764, "grad_norm": 12.394257545471191, "learning_rate": 9.800602608746808e-05, "loss": 34.2969, "step": 653 }, { "epoch": 0.10494644361535684, "grad_norm": 12.165133476257324, "learning_rate": 9.799885662492046e-05, "loss": 34.5781, "step": 654 }, { "epoch": 0.10510691218357604, "grad_norm": 12.432930946350098, "learning_rate": 9.799167455954523e-05, "loss": 34.4922, "step": 655 }, { "epoch": 0.10526738075179524, "grad_norm": 12.190054893493652, "learning_rate": 9.798447989322814e-05, "loss": 34.4375, "step": 656 }, { "epoch": 0.10542784932001444, "grad_norm": 12.170463562011719, "learning_rate": 9.797727262785828e-05, "loss": 34.3594, "step": 657 }, { "epoch": 0.10558831788823364, "grad_norm": 12.483928680419922, "learning_rate": 9.797005276532801e-05, "loss": 34.3672, "step": 658 }, { "epoch": 0.10574878645645285, "grad_norm": 12.162064552307129, "learning_rate": 9.7962820307533e-05, "loss": 34.3594, "step": 659 }, { "epoch": 0.10590925502467204, "grad_norm": 12.298524856567383, "learning_rate": 9.795557525637226e-05, "loss": 34.2266, "step": 660 }, { "epoch": 0.10606972359289124, "grad_norm": 12.021821975708008, "learning_rate": 9.794831761374809e-05, "loss": 34.3281, "step": 661 }, { "epoch": 0.10623019216111045, "grad_norm": 12.241772651672363, "learning_rate": 9.794104738156611e-05, "loss": 34.3516, "step": 662 }, { "epoch": 0.10639066072932964, "grad_norm": 12.024081230163574, "learning_rate": 9.793376456173518e-05, "loss": 34.3125, "step": 663 }, { "epoch": 0.10655112929754884, "grad_norm": 12.464444160461426, "learning_rate": 9.792646915616757e-05, "loss": 34.1953, "step": 664 }, { "epoch": 0.10671159786576805, "grad_norm": 12.39520263671875, "learning_rate": 9.791916116677874e-05, "loss": 34.2266, "step": 665 }, { "epoch": 0.10687206643398724, "grad_norm": 11.981856346130371, "learning_rate": 9.791184059548759e-05, "loss": 34.3906, "step": 666 }, { "epoch": 0.10703253500220644, "grad_norm": 12.152158737182617, "learning_rate": 9.790450744421618e-05, "loss": 34.2109, "step": 667 }, { "epoch": 0.10719300357042565, "grad_norm": 12.101911544799805, "learning_rate": 9.789716171488996e-05, "loss": 34.3359, "step": 668 }, { "epoch": 0.10735347213864484, "grad_norm": 12.02397632598877, "learning_rate": 9.788980340943768e-05, "loss": 34.25, "step": 669 }, { "epoch": 0.10751394070686404, "grad_norm": 12.127471923828125, "learning_rate": 9.788243252979136e-05, "loss": 34.2812, "step": 670 }, { "epoch": 0.10767440927508325, "grad_norm": 12.607544898986816, "learning_rate": 9.787504907788632e-05, "loss": 34.1875, "step": 671 }, { "epoch": 0.10783487784330244, "grad_norm": 12.372306823730469, "learning_rate": 9.786765305566123e-05, "loss": 34.1172, "step": 672 }, { "epoch": 0.10799534641152164, "grad_norm": 12.17798900604248, "learning_rate": 9.786024446505802e-05, "loss": 34.2344, "step": 673 }, { "epoch": 0.10815581497974085, "grad_norm": 12.298652648925781, "learning_rate": 9.785282330802191e-05, "loss": 34.1953, "step": 674 }, { "epoch": 0.10831628354796004, "grad_norm": 12.146167755126953, "learning_rate": 9.784538958650144e-05, "loss": 34.1719, "step": 675 }, { "epoch": 0.10847675211617924, "grad_norm": 12.268320083618164, "learning_rate": 9.783794330244847e-05, "loss": 34.0781, "step": 676 }, { "epoch": 0.10863722068439845, "grad_norm": 12.026270866394043, "learning_rate": 9.783048445781813e-05, "loss": 34.2734, "step": 677 }, { "epoch": 0.10879768925261764, "grad_norm": 11.969409942626953, "learning_rate": 9.782301305456881e-05, "loss": 34.2578, "step": 678 }, { "epoch": 0.10895815782083684, "grad_norm": 12.332582473754883, "learning_rate": 9.78155290946623e-05, "loss": 34.0938, "step": 679 }, { "epoch": 0.10911862638905605, "grad_norm": 12.338920593261719, "learning_rate": 9.780803258006358e-05, "loss": 34.0938, "step": 680 }, { "epoch": 0.10927909495727524, "grad_norm": 12.366517066955566, "learning_rate": 9.7800523512741e-05, "loss": 34.1094, "step": 681 }, { "epoch": 0.10943956352549444, "grad_norm": 12.146489143371582, "learning_rate": 9.779300189466616e-05, "loss": 34.1406, "step": 682 }, { "epoch": 0.10960003209371365, "grad_norm": 12.267520904541016, "learning_rate": 9.778546772781401e-05, "loss": 34.2266, "step": 683 }, { "epoch": 0.10976050066193284, "grad_norm": 12.017552375793457, "learning_rate": 9.777792101416272e-05, "loss": 34.2109, "step": 684 }, { "epoch": 0.10992096923015204, "grad_norm": 12.152997970581055, "learning_rate": 9.777036175569382e-05, "loss": 34.1797, "step": 685 }, { "epoch": 0.11008143779837125, "grad_norm": 12.0151948928833, "learning_rate": 9.776278995439209e-05, "loss": 34.1797, "step": 686 }, { "epoch": 0.11024190636659044, "grad_norm": 12.161999702453613, "learning_rate": 9.775520561224564e-05, "loss": 34.1016, "step": 687 }, { "epoch": 0.11040237493480964, "grad_norm": 12.07791805267334, "learning_rate": 9.774760873124584e-05, "loss": 34.1562, "step": 688 }, { "epoch": 0.11056284350302885, "grad_norm": 12.404033660888672, "learning_rate": 9.773999931338738e-05, "loss": 34.0469, "step": 689 }, { "epoch": 0.11072331207124804, "grad_norm": 12.031968116760254, "learning_rate": 9.77323773606682e-05, "loss": 34.2812, "step": 690 }, { "epoch": 0.11088378063946724, "grad_norm": 12.150376319885254, "learning_rate": 9.772474287508958e-05, "loss": 34.25, "step": 691 }, { "epoch": 0.11104424920768645, "grad_norm": 12.14076042175293, "learning_rate": 9.771709585865607e-05, "loss": 34.125, "step": 692 }, { "epoch": 0.11120471777590564, "grad_norm": 12.281757354736328, "learning_rate": 9.770943631337552e-05, "loss": 33.9922, "step": 693 }, { "epoch": 0.11136518634412484, "grad_norm": 12.526540756225586, "learning_rate": 9.770176424125903e-05, "loss": 33.9766, "step": 694 }, { "epoch": 0.11152565491234405, "grad_norm": 12.47968578338623, "learning_rate": 9.769407964432105e-05, "loss": 34.2109, "step": 695 }, { "epoch": 0.11168612348056324, "grad_norm": 12.281343460083008, "learning_rate": 9.768638252457927e-05, "loss": 34.0938, "step": 696 }, { "epoch": 0.11184659204878244, "grad_norm": 12.301072120666504, "learning_rate": 9.767867288405469e-05, "loss": 34.1094, "step": 697 }, { "epoch": 0.11200706061700165, "grad_norm": 12.041645050048828, "learning_rate": 9.767095072477159e-05, "loss": 34.3203, "step": 698 }, { "epoch": 0.11216752918522084, "grad_norm": 12.606226921081543, "learning_rate": 9.766321604875754e-05, "loss": 34.0938, "step": 699 }, { "epoch": 0.11232799775344005, "grad_norm": 12.666418075561523, "learning_rate": 9.765546885804341e-05, "loss": 34.4219, "step": 700 }, { "epoch": 0.11248846632165925, "grad_norm": 17.695735931396484, "learning_rate": 9.764770915466332e-05, "loss": 34.3984, "step": 701 }, { "epoch": 0.11264893488987844, "grad_norm": 12.522146224975586, "learning_rate": 9.763993694065472e-05, "loss": 34.3438, "step": 702 }, { "epoch": 0.11280940345809765, "grad_norm": 12.757789611816406, "learning_rate": 9.76321522180583e-05, "loss": 34.3438, "step": 703 }, { "epoch": 0.11296987202631685, "grad_norm": 12.32081127166748, "learning_rate": 9.762435498891806e-05, "loss": 34.0703, "step": 704 }, { "epoch": 0.11313034059453604, "grad_norm": 12.282418251037598, "learning_rate": 9.761654525528132e-05, "loss": 34.1641, "step": 705 }, { "epoch": 0.11329080916275525, "grad_norm": 12.438352584838867, "learning_rate": 9.76087230191986e-05, "loss": 34.2578, "step": 706 }, { "epoch": 0.11345127773097445, "grad_norm": 12.108648300170898, "learning_rate": 9.760088828272374e-05, "loss": 34.0938, "step": 707 }, { "epoch": 0.11361174629919364, "grad_norm": 12.16850757598877, "learning_rate": 9.759304104791391e-05, "loss": 34.125, "step": 708 }, { "epoch": 0.11377221486741285, "grad_norm": 12.165984153747559, "learning_rate": 9.758518131682949e-05, "loss": 34.1953, "step": 709 }, { "epoch": 0.11393268343563205, "grad_norm": 11.896326065063477, "learning_rate": 9.757730909153419e-05, "loss": 34.0547, "step": 710 }, { "epoch": 0.11409315200385124, "grad_norm": 12.292864799499512, "learning_rate": 9.756942437409497e-05, "loss": 34.1172, "step": 711 }, { "epoch": 0.11425362057207045, "grad_norm": 12.180912971496582, "learning_rate": 9.756152716658209e-05, "loss": 33.9609, "step": 712 }, { "epoch": 0.11441408914028965, "grad_norm": 11.919355392456055, "learning_rate": 9.755361747106906e-05, "loss": 34.0859, "step": 713 }, { "epoch": 0.11457455770850884, "grad_norm": 12.15156364440918, "learning_rate": 9.754569528963273e-05, "loss": 34.1172, "step": 714 }, { "epoch": 0.11473502627672805, "grad_norm": 12.205577850341797, "learning_rate": 9.753776062435315e-05, "loss": 34.0156, "step": 715 }, { "epoch": 0.11489549484494725, "grad_norm": 12.092063903808594, "learning_rate": 9.75298134773137e-05, "loss": 34.0625, "step": 716 }, { "epoch": 0.11505596341316644, "grad_norm": 12.043376922607422, "learning_rate": 9.752185385060104e-05, "loss": 34.0469, "step": 717 }, { "epoch": 0.11521643198138565, "grad_norm": 12.176663398742676, "learning_rate": 9.751388174630507e-05, "loss": 34.0703, "step": 718 }, { "epoch": 0.11537690054960485, "grad_norm": 12.175376892089844, "learning_rate": 9.750589716651898e-05, "loss": 33.9062, "step": 719 }, { "epoch": 0.11553736911782404, "grad_norm": 12.19766616821289, "learning_rate": 9.749790011333928e-05, "loss": 34.1094, "step": 720 }, { "epoch": 0.11569783768604325, "grad_norm": 12.311942100524902, "learning_rate": 9.748989058886569e-05, "loss": 33.8906, "step": 721 }, { "epoch": 0.11585830625426245, "grad_norm": 12.142897605895996, "learning_rate": 9.748186859520123e-05, "loss": 33.9688, "step": 722 }, { "epoch": 0.11601877482248164, "grad_norm": 12.29298210144043, "learning_rate": 9.747383413445222e-05, "loss": 33.9531, "step": 723 }, { "epoch": 0.11617924339070085, "grad_norm": 12.35881233215332, "learning_rate": 9.746578720872819e-05, "loss": 33.8438, "step": 724 }, { "epoch": 0.11633971195892005, "grad_norm": 12.482568740844727, "learning_rate": 9.7457727820142e-05, "loss": 33.8828, "step": 725 }, { "epoch": 0.11650018052713924, "grad_norm": 12.026962280273438, "learning_rate": 9.74496559708098e-05, "loss": 33.9453, "step": 726 }, { "epoch": 0.11666064909535845, "grad_norm": 12.276592254638672, "learning_rate": 9.744157166285092e-05, "loss": 33.9688, "step": 727 }, { "epoch": 0.11682111766357765, "grad_norm": 12.211012840270996, "learning_rate": 9.743347489838806e-05, "loss": 33.9062, "step": 728 }, { "epoch": 0.11698158623179684, "grad_norm": 12.354799270629883, "learning_rate": 9.74253656795471e-05, "loss": 33.9219, "step": 729 }, { "epoch": 0.11714205480001605, "grad_norm": 12.29271411895752, "learning_rate": 9.74172440084573e-05, "loss": 33.8984, "step": 730 }, { "epoch": 0.11730252336823525, "grad_norm": 12.286246299743652, "learning_rate": 9.740910988725109e-05, "loss": 33.9375, "step": 731 }, { "epoch": 0.11746299193645444, "grad_norm": 12.276592254638672, "learning_rate": 9.74009633180642e-05, "loss": 33.8516, "step": 732 }, { "epoch": 0.11762346050467365, "grad_norm": 12.202718734741211, "learning_rate": 9.739280430303568e-05, "loss": 33.9062, "step": 733 }, { "epoch": 0.11778392907289285, "grad_norm": 12.16313648223877, "learning_rate": 9.738463284430775e-05, "loss": 34.125, "step": 734 }, { "epoch": 0.11794439764111204, "grad_norm": 12.529966354370117, "learning_rate": 9.737644894402598e-05, "loss": 33.8984, "step": 735 }, { "epoch": 0.11810486620933125, "grad_norm": 12.107869148254395, "learning_rate": 9.736825260433917e-05, "loss": 33.9766, "step": 736 }, { "epoch": 0.11826533477755044, "grad_norm": 12.285296440124512, "learning_rate": 9.73600438273994e-05, "loss": 33.9453, "step": 737 }, { "epoch": 0.11842580334576965, "grad_norm": 12.038984298706055, "learning_rate": 9.7351822615362e-05, "loss": 34.0391, "step": 738 }, { "epoch": 0.11858627191398885, "grad_norm": 12.338826179504395, "learning_rate": 9.734358897038557e-05, "loss": 33.8516, "step": 739 }, { "epoch": 0.11874674048220804, "grad_norm": 12.027115821838379, "learning_rate": 9.733534289463198e-05, "loss": 34.0234, "step": 740 }, { "epoch": 0.11890720905042725, "grad_norm": 12.538253784179688, "learning_rate": 9.732708439026638e-05, "loss": 33.8906, "step": 741 }, { "epoch": 0.11906767761864645, "grad_norm": 12.147754669189453, "learning_rate": 9.731881345945715e-05, "loss": 33.9688, "step": 742 }, { "epoch": 0.11922814618686564, "grad_norm": 12.156073570251465, "learning_rate": 9.731053010437594e-05, "loss": 34.0, "step": 743 }, { "epoch": 0.11938861475508485, "grad_norm": 12.015466690063477, "learning_rate": 9.73022343271977e-05, "loss": 33.8828, "step": 744 }, { "epoch": 0.11954908332330405, "grad_norm": 12.78547191619873, "learning_rate": 9.729392613010058e-05, "loss": 33.9219, "step": 745 }, { "epoch": 0.11970955189152324, "grad_norm": 12.317170143127441, "learning_rate": 9.728560551526605e-05, "loss": 34.0625, "step": 746 }, { "epoch": 0.11987002045974245, "grad_norm": 12.229652404785156, "learning_rate": 9.727727248487877e-05, "loss": 33.9766, "step": 747 }, { "epoch": 0.12003048902796165, "grad_norm": 12.343064308166504, "learning_rate": 9.726892704112676e-05, "loss": 33.9531, "step": 748 }, { "epoch": 0.12019095759618084, "grad_norm": 12.049503326416016, "learning_rate": 9.726056918620121e-05, "loss": 34.25, "step": 749 }, { "epoch": 0.12035142616440005, "grad_norm": 12.473838806152344, "learning_rate": 9.725219892229661e-05, "loss": 34.3203, "step": 750 }, { "epoch": 0.12051189473261925, "grad_norm": 12.237913131713867, "learning_rate": 9.72438162516107e-05, "loss": 34.3438, "step": 751 }, { "epoch": 0.12067236330083844, "grad_norm": 12.47913646697998, "learning_rate": 9.723542117634447e-05, "loss": 33.9688, "step": 752 }, { "epoch": 0.12083283186905765, "grad_norm": 11.96550464630127, "learning_rate": 9.722701369870218e-05, "loss": 34.0859, "step": 753 }, { "epoch": 0.12099330043727685, "grad_norm": 12.193903923034668, "learning_rate": 9.721859382089133e-05, "loss": 34.0703, "step": 754 }, { "epoch": 0.12115376900549604, "grad_norm": 12.464092254638672, "learning_rate": 9.721016154512271e-05, "loss": 34.1562, "step": 755 }, { "epoch": 0.12131423757371525, "grad_norm": 12.03750991821289, "learning_rate": 9.720171687361033e-05, "loss": 34.2031, "step": 756 }, { "epoch": 0.12147470614193445, "grad_norm": 12.321125984191895, "learning_rate": 9.719325980857146e-05, "loss": 33.9766, "step": 757 }, { "epoch": 0.12163517471015364, "grad_norm": 12.023637771606445, "learning_rate": 9.718479035222663e-05, "loss": 33.9844, "step": 758 }, { "epoch": 0.12179564327837285, "grad_norm": 12.032689094543457, "learning_rate": 9.717630850679963e-05, "loss": 33.9453, "step": 759 }, { "epoch": 0.12195611184659205, "grad_norm": 12.272231101989746, "learning_rate": 9.716781427451751e-05, "loss": 33.9297, "step": 760 }, { "epoch": 0.12211658041481124, "grad_norm": 12.086605072021484, "learning_rate": 9.715930765761054e-05, "loss": 33.9297, "step": 761 }, { "epoch": 0.12227704898303045, "grad_norm": 12.302791595458984, "learning_rate": 9.715078865831227e-05, "loss": 33.9609, "step": 762 }, { "epoch": 0.12243751755124965, "grad_norm": 12.724241256713867, "learning_rate": 9.714225727885948e-05, "loss": 33.875, "step": 763 }, { "epoch": 0.12259798611946884, "grad_norm": 11.901150703430176, "learning_rate": 9.713371352149224e-05, "loss": 33.9531, "step": 764 }, { "epoch": 0.12275845468768805, "grad_norm": 12.594470977783203, "learning_rate": 9.712515738845381e-05, "loss": 33.8594, "step": 765 }, { "epoch": 0.12291892325590725, "grad_norm": 12.152751922607422, "learning_rate": 9.711658888199076e-05, "loss": 33.8125, "step": 766 }, { "epoch": 0.12307939182412644, "grad_norm": 12.278380393981934, "learning_rate": 9.710800800435287e-05, "loss": 33.8359, "step": 767 }, { "epoch": 0.12323986039234565, "grad_norm": 12.667648315429688, "learning_rate": 9.709941475779316e-05, "loss": 33.9219, "step": 768 }, { "epoch": 0.12340032896056485, "grad_norm": 12.269062995910645, "learning_rate": 9.709080914456797e-05, "loss": 33.8047, "step": 769 }, { "epoch": 0.12356079752878404, "grad_norm": 12.147608757019043, "learning_rate": 9.708219116693678e-05, "loss": 33.9453, "step": 770 }, { "epoch": 0.12372126609700325, "grad_norm": 12.272743225097656, "learning_rate": 9.707356082716239e-05, "loss": 33.7812, "step": 771 }, { "epoch": 0.12388173466522245, "grad_norm": 12.239176750183105, "learning_rate": 9.706491812751082e-05, "loss": 33.7734, "step": 772 }, { "epoch": 0.12404220323344164, "grad_norm": 12.536361694335938, "learning_rate": 9.705626307025137e-05, "loss": 33.75, "step": 773 }, { "epoch": 0.12420267180166085, "grad_norm": 12.285197257995605, "learning_rate": 9.704759565765653e-05, "loss": 33.9062, "step": 774 }, { "epoch": 0.12436314036988005, "grad_norm": 12.033427238464355, "learning_rate": 9.703891589200207e-05, "loss": 33.9688, "step": 775 }, { "epoch": 0.12452360893809925, "grad_norm": 12.240666389465332, "learning_rate": 9.703022377556699e-05, "loss": 33.7969, "step": 776 }, { "epoch": 0.12468407750631845, "grad_norm": 12.09111213684082, "learning_rate": 9.702151931063352e-05, "loss": 33.7891, "step": 777 }, { "epoch": 0.12484454607453765, "grad_norm": 12.084823608398438, "learning_rate": 9.701280249948719e-05, "loss": 33.7656, "step": 778 }, { "epoch": 0.12500501464275685, "grad_norm": 12.728083610534668, "learning_rate": 9.70040733444167e-05, "loss": 33.6328, "step": 779 }, { "epoch": 0.12516548321097604, "grad_norm": 12.148091316223145, "learning_rate": 9.699533184771404e-05, "loss": 33.7266, "step": 780 }, { "epoch": 0.12532595177919525, "grad_norm": 12.212233543395996, "learning_rate": 9.69865780116744e-05, "loss": 33.75, "step": 781 }, { "epoch": 0.12548642034741445, "grad_norm": 12.408559799194336, "learning_rate": 9.697781183859624e-05, "loss": 33.6953, "step": 782 }, { "epoch": 0.12564688891563364, "grad_norm": 11.906750679016113, "learning_rate": 9.696903333078128e-05, "loss": 33.7812, "step": 783 }, { "epoch": 0.12580735748385286, "grad_norm": 12.02591609954834, "learning_rate": 9.696024249053441e-05, "loss": 33.7188, "step": 784 }, { "epoch": 0.12596782605207205, "grad_norm": 12.027726173400879, "learning_rate": 9.695143932016381e-05, "loss": 33.7031, "step": 785 }, { "epoch": 0.12612829462029124, "grad_norm": 12.345088005065918, "learning_rate": 9.694262382198091e-05, "loss": 33.6719, "step": 786 }, { "epoch": 0.12628876318851046, "grad_norm": 12.46387767791748, "learning_rate": 9.693379599830034e-05, "loss": 33.6641, "step": 787 }, { "epoch": 0.12644923175672965, "grad_norm": 12.846700668334961, "learning_rate": 9.692495585143995e-05, "loss": 33.6484, "step": 788 }, { "epoch": 0.12660970032494884, "grad_norm": 12.51674747467041, "learning_rate": 9.691610338372089e-05, "loss": 33.6094, "step": 789 }, { "epoch": 0.12677016889316806, "grad_norm": 12.228646278381348, "learning_rate": 9.690723859746749e-05, "loss": 33.7812, "step": 790 }, { "epoch": 0.12693063746138725, "grad_norm": 12.390236854553223, "learning_rate": 9.689836149500736e-05, "loss": 33.6016, "step": 791 }, { "epoch": 0.12709110602960644, "grad_norm": 12.154253005981445, "learning_rate": 9.688947207867128e-05, "loss": 33.7969, "step": 792 }, { "epoch": 0.12725157459782566, "grad_norm": 12.272354125976562, "learning_rate": 9.688057035079334e-05, "loss": 33.625, "step": 793 }, { "epoch": 0.12741204316604485, "grad_norm": 12.133902549743652, "learning_rate": 9.687165631371078e-05, "loss": 33.625, "step": 794 }, { "epoch": 0.12757251173426404, "grad_norm": 12.219783782958984, "learning_rate": 9.686272996976415e-05, "loss": 33.7344, "step": 795 }, { "epoch": 0.12773298030248326, "grad_norm": 12.097436904907227, "learning_rate": 9.685379132129719e-05, "loss": 33.8438, "step": 796 }, { "epoch": 0.12789344887070245, "grad_norm": 12.223419189453125, "learning_rate": 9.684484037065686e-05, "loss": 33.8672, "step": 797 }, { "epoch": 0.12805391743892164, "grad_norm": 12.366413116455078, "learning_rate": 9.68358771201934e-05, "loss": 33.7031, "step": 798 }, { "epoch": 0.12821438600714086, "grad_norm": 12.23433780670166, "learning_rate": 9.682690157226023e-05, "loss": 33.9688, "step": 799 }, { "epoch": 0.12837485457536005, "grad_norm": 12.21444320678711, "learning_rate": 9.6817913729214e-05, "loss": 34.1875, "step": 800 }, { "epoch": 0.12853532314357924, "grad_norm": 12.694849967956543, "learning_rate": 9.680891359341464e-05, "loss": 33.8828, "step": 801 }, { "epoch": 0.12869579171179846, "grad_norm": 12.250027656555176, "learning_rate": 9.679990116722524e-05, "loss": 33.8125, "step": 802 }, { "epoch": 0.12885626028001765, "grad_norm": 12.125271797180176, "learning_rate": 9.679087645301216e-05, "loss": 34.1016, "step": 803 }, { "epoch": 0.12901672884823684, "grad_norm": 12.384320259094238, "learning_rate": 9.678183945314498e-05, "loss": 33.9844, "step": 804 }, { "epoch": 0.12917719741645606, "grad_norm": 11.780651092529297, "learning_rate": 9.67727901699965e-05, "loss": 34.0859, "step": 805 }, { "epoch": 0.12933766598467525, "grad_norm": 12.253490447998047, "learning_rate": 9.676372860594275e-05, "loss": 33.6953, "step": 806 }, { "epoch": 0.12949813455289444, "grad_norm": 12.104471206665039, "learning_rate": 9.675465476336296e-05, "loss": 33.7656, "step": 807 }, { "epoch": 0.12965860312111366, "grad_norm": 12.037130355834961, "learning_rate": 9.674556864463965e-05, "loss": 33.8516, "step": 808 }, { "epoch": 0.12981907168933285, "grad_norm": 12.029041290283203, "learning_rate": 9.673647025215847e-05, "loss": 33.7734, "step": 809 }, { "epoch": 0.12997954025755204, "grad_norm": 12.026715278625488, "learning_rate": 9.672735958830836e-05, "loss": 33.75, "step": 810 }, { "epoch": 0.13014000882577126, "grad_norm": 12.09045696258545, "learning_rate": 9.671823665548146e-05, "loss": 33.625, "step": 811 }, { "epoch": 0.13030047739399045, "grad_norm": 12.147566795349121, "learning_rate": 9.670910145607313e-05, "loss": 33.75, "step": 812 }, { "epoch": 0.13046094596220964, "grad_norm": 12.04011344909668, "learning_rate": 9.669995399248198e-05, "loss": 33.6406, "step": 813 }, { "epoch": 0.13062141453042886, "grad_norm": 12.351555824279785, "learning_rate": 9.669079426710979e-05, "loss": 33.7656, "step": 814 }, { "epoch": 0.13078188309864805, "grad_norm": 12.40310001373291, "learning_rate": 9.668162228236159e-05, "loss": 33.7422, "step": 815 }, { "epoch": 0.13094235166686724, "grad_norm": 12.34679889678955, "learning_rate": 9.667243804064562e-05, "loss": 33.5938, "step": 816 }, { "epoch": 0.13110282023508646, "grad_norm": 12.265654563903809, "learning_rate": 9.666324154437336e-05, "loss": 33.6562, "step": 817 }, { "epoch": 0.13126328880330565, "grad_norm": 12.309660911560059, "learning_rate": 9.665403279595948e-05, "loss": 33.7422, "step": 818 }, { "epoch": 0.13142375737152484, "grad_norm": 12.14634895324707, "learning_rate": 9.664481179782187e-05, "loss": 33.6797, "step": 819 }, { "epoch": 0.13158422593974406, "grad_norm": 12.029441833496094, "learning_rate": 9.663557855238165e-05, "loss": 33.6562, "step": 820 }, { "epoch": 0.13174469450796325, "grad_norm": 11.970999717712402, "learning_rate": 9.662633306206314e-05, "loss": 33.7188, "step": 821 }, { "epoch": 0.13190516307618244, "grad_norm": 12.14890193939209, "learning_rate": 9.66170753292939e-05, "loss": 33.6719, "step": 822 }, { "epoch": 0.13206563164440166, "grad_norm": 12.149620056152344, "learning_rate": 9.660780535650465e-05, "loss": 33.6797, "step": 823 }, { "epoch": 0.13222610021262085, "grad_norm": 12.030505180358887, "learning_rate": 9.65985231461294e-05, "loss": 33.625, "step": 824 }, { "epoch": 0.13238656878084004, "grad_norm": 12.026296615600586, "learning_rate": 9.658922870060534e-05, "loss": 33.6875, "step": 825 }, { "epoch": 0.13254703734905926, "grad_norm": 12.026628494262695, "learning_rate": 9.657992202237285e-05, "loss": 33.6953, "step": 826 }, { "epoch": 0.13270750591727845, "grad_norm": 12.02844524383545, "learning_rate": 9.657060311387554e-05, "loss": 33.7109, "step": 827 }, { "epoch": 0.13286797448549764, "grad_norm": 12.088480949401855, "learning_rate": 9.656127197756022e-05, "loss": 33.6172, "step": 828 }, { "epoch": 0.13302844305371686, "grad_norm": 12.245230674743652, "learning_rate": 9.655192861587695e-05, "loss": 33.7188, "step": 829 }, { "epoch": 0.13318891162193605, "grad_norm": 12.40030574798584, "learning_rate": 9.654257303127893e-05, "loss": 33.6172, "step": 830 }, { "epoch": 0.13334938019015524, "grad_norm": 12.27630615234375, "learning_rate": 9.653320522622266e-05, "loss": 33.6641, "step": 831 }, { "epoch": 0.13350984875837446, "grad_norm": 11.97527027130127, "learning_rate": 9.652382520316776e-05, "loss": 33.7812, "step": 832 }, { "epoch": 0.13367031732659365, "grad_norm": 12.135701179504395, "learning_rate": 9.651443296457712e-05, "loss": 33.7031, "step": 833 }, { "epoch": 0.13383078589481284, "grad_norm": 12.492874145507812, "learning_rate": 9.650502851291681e-05, "loss": 33.6328, "step": 834 }, { "epoch": 0.13399125446303206, "grad_norm": 12.334928512573242, "learning_rate": 9.64956118506561e-05, "loss": 33.5547, "step": 835 }, { "epoch": 0.13415172303125125, "grad_norm": 12.142913818359375, "learning_rate": 9.648618298026748e-05, "loss": 33.7266, "step": 836 }, { "epoch": 0.13431219159947044, "grad_norm": 12.141510009765625, "learning_rate": 9.647674190422667e-05, "loss": 33.625, "step": 837 }, { "epoch": 0.13447266016768966, "grad_norm": 12.259313583374023, "learning_rate": 9.646728862501253e-05, "loss": 33.5781, "step": 838 }, { "epoch": 0.13463312873590885, "grad_norm": 12.413002014160156, "learning_rate": 9.645782314510719e-05, "loss": 33.6953, "step": 839 }, { "epoch": 0.13479359730412804, "grad_norm": 12.28516960144043, "learning_rate": 9.644834546699592e-05, "loss": 33.6406, "step": 840 }, { "epoch": 0.13495406587234726, "grad_norm": 12.027729988098145, "learning_rate": 9.643885559316727e-05, "loss": 33.7578, "step": 841 }, { "epoch": 0.13511453444056645, "grad_norm": 12.154401779174805, "learning_rate": 9.642935352611292e-05, "loss": 33.7344, "step": 842 }, { "epoch": 0.13527500300878564, "grad_norm": 12.260881423950195, "learning_rate": 9.641983926832781e-05, "loss": 33.625, "step": 843 }, { "epoch": 0.13543547157700486, "grad_norm": 12.340879440307617, "learning_rate": 9.641031282231001e-05, "loss": 33.6328, "step": 844 }, { "epoch": 0.13559594014522405, "grad_norm": 12.21296215057373, "learning_rate": 9.640077419056089e-05, "loss": 33.7109, "step": 845 }, { "epoch": 0.13575640871344324, "grad_norm": 12.151961326599121, "learning_rate": 9.639122337558491e-05, "loss": 33.6406, "step": 846 }, { "epoch": 0.13591687728166246, "grad_norm": 12.017525672912598, "learning_rate": 9.638166037988981e-05, "loss": 33.6953, "step": 847 }, { "epoch": 0.13607734584988165, "grad_norm": 12.09818172454834, "learning_rate": 9.637208520598649e-05, "loss": 33.7422, "step": 848 }, { "epoch": 0.13623781441810084, "grad_norm": 12.229907989501953, "learning_rate": 9.636249785638905e-05, "loss": 33.8438, "step": 849 }, { "epoch": 0.13639828298632006, "grad_norm": 12.21794605255127, "learning_rate": 9.63528983336148e-05, "loss": 35.0469, "step": 850 }, { "epoch": 0.13655875155453925, "grad_norm": 12.388143539428711, "learning_rate": 9.634328664018425e-05, "loss": 34.1094, "step": 851 }, { "epoch": 0.13671922012275844, "grad_norm": 12.457525253295898, "learning_rate": 9.63336627786211e-05, "loss": 34.0, "step": 852 }, { "epoch": 0.13687968869097766, "grad_norm": 12.24722671508789, "learning_rate": 9.632402675145223e-05, "loss": 33.7422, "step": 853 }, { "epoch": 0.13704015725919685, "grad_norm": 12.302533149719238, "learning_rate": 9.631437856120773e-05, "loss": 33.9141, "step": 854 }, { "epoch": 0.13720062582741605, "grad_norm": 12.292370796203613, "learning_rate": 9.630471821042087e-05, "loss": 33.7891, "step": 855 }, { "epoch": 0.13736109439563526, "grad_norm": 12.6324462890625, "learning_rate": 9.629504570162811e-05, "loss": 33.7266, "step": 856 }, { "epoch": 0.13752156296385445, "grad_norm": 12.217673301696777, "learning_rate": 9.628536103736914e-05, "loss": 33.7188, "step": 857 }, { "epoch": 0.13768203153207365, "grad_norm": 12.040963172912598, "learning_rate": 9.62756642201868e-05, "loss": 33.8438, "step": 858 }, { "epoch": 0.13784250010029286, "grad_norm": 12.147037506103516, "learning_rate": 9.626595525262714e-05, "loss": 33.7578, "step": 859 }, { "epoch": 0.13800296866851205, "grad_norm": 12.233925819396973, "learning_rate": 9.62562341372394e-05, "loss": 33.7266, "step": 860 }, { "epoch": 0.13816343723673125, "grad_norm": 12.238898277282715, "learning_rate": 9.6246500876576e-05, "loss": 33.8047, "step": 861 }, { "epoch": 0.13832390580495046, "grad_norm": 12.329315185546875, "learning_rate": 9.623675547319257e-05, "loss": 33.6484, "step": 862 }, { "epoch": 0.13848437437316966, "grad_norm": 11.8487548828125, "learning_rate": 9.622699792964789e-05, "loss": 33.8594, "step": 863 }, { "epoch": 0.13864484294138885, "grad_norm": 12.220367431640625, "learning_rate": 9.621722824850396e-05, "loss": 33.5391, "step": 864 }, { "epoch": 0.13880531150960806, "grad_norm": 12.668402671813965, "learning_rate": 9.620744643232597e-05, "loss": 33.625, "step": 865 }, { "epoch": 0.13896578007782726, "grad_norm": 11.958166122436523, "learning_rate": 9.619765248368229e-05, "loss": 33.625, "step": 866 }, { "epoch": 0.13912624864604645, "grad_norm": 12.451037406921387, "learning_rate": 9.618784640514444e-05, "loss": 33.7422, "step": 867 }, { "epoch": 0.13928671721426567, "grad_norm": 12.313945770263672, "learning_rate": 9.617802819928716e-05, "loss": 33.6875, "step": 868 }, { "epoch": 0.13944718578248486, "grad_norm": 12.029467582702637, "learning_rate": 9.61681978686884e-05, "loss": 33.625, "step": 869 }, { "epoch": 0.13960765435070405, "grad_norm": 12.143996238708496, "learning_rate": 9.615835541592924e-05, "loss": 33.7031, "step": 870 }, { "epoch": 0.13976812291892327, "grad_norm": 12.109487533569336, "learning_rate": 9.614850084359395e-05, "loss": 33.6484, "step": 871 }, { "epoch": 0.13992859148714246, "grad_norm": 12.087468147277832, "learning_rate": 9.613863415427002e-05, "loss": 33.6016, "step": 872 }, { "epoch": 0.14008906005536165, "grad_norm": 12.534379959106445, "learning_rate": 9.612875535054809e-05, "loss": 33.5703, "step": 873 }, { "epoch": 0.14024952862358087, "grad_norm": 12.147172927856445, "learning_rate": 9.6118864435022e-05, "loss": 33.5234, "step": 874 }, { "epoch": 0.14040999719180006, "grad_norm": 12.120466232299805, "learning_rate": 9.610896141028874e-05, "loss": 33.6016, "step": 875 }, { "epoch": 0.14057046576001925, "grad_norm": 12.1178617477417, "learning_rate": 9.609904627894852e-05, "loss": 33.5703, "step": 876 }, { "epoch": 0.14073093432823847, "grad_norm": 12.085833549499512, "learning_rate": 9.60891190436047e-05, "loss": 33.5391, "step": 877 }, { "epoch": 0.14089140289645766, "grad_norm": 12.098105430603027, "learning_rate": 9.607917970686382e-05, "loss": 33.5312, "step": 878 }, { "epoch": 0.14105187146467685, "grad_norm": 12.07713794708252, "learning_rate": 9.606922827133562e-05, "loss": 33.5, "step": 879 }, { "epoch": 0.14121234003289607, "grad_norm": 12.475556373596191, "learning_rate": 9.605926473963298e-05, "loss": 33.4688, "step": 880 }, { "epoch": 0.14137280860111526, "grad_norm": 11.96829891204834, "learning_rate": 9.604928911437198e-05, "loss": 33.5234, "step": 881 }, { "epoch": 0.14153327716933445, "grad_norm": 12.14249038696289, "learning_rate": 9.60393013981719e-05, "loss": 33.4766, "step": 882 }, { "epoch": 0.14169374573755367, "grad_norm": 12.093583106994629, "learning_rate": 9.602930159365512e-05, "loss": 33.5391, "step": 883 }, { "epoch": 0.14185421430577286, "grad_norm": 12.200350761413574, "learning_rate": 9.601928970344727e-05, "loss": 33.4297, "step": 884 }, { "epoch": 0.14201468287399205, "grad_norm": 12.281271934509277, "learning_rate": 9.600926573017713e-05, "loss": 33.5859, "step": 885 }, { "epoch": 0.14217515144221127, "grad_norm": 12.201940536499023, "learning_rate": 9.599922967647662e-05, "loss": 33.4297, "step": 886 }, { "epoch": 0.14233562001043046, "grad_norm": 12.082409858703613, "learning_rate": 9.598918154498088e-05, "loss": 33.4531, "step": 887 }, { "epoch": 0.14249608857864965, "grad_norm": 12.014155387878418, "learning_rate": 9.597912133832818e-05, "loss": 33.5781, "step": 888 }, { "epoch": 0.14265655714686887, "grad_norm": 12.140546798706055, "learning_rate": 9.596904905916e-05, "loss": 33.5156, "step": 889 }, { "epoch": 0.14281702571508806, "grad_norm": 12.209859848022461, "learning_rate": 9.595896471012096e-05, "loss": 33.4766, "step": 890 }, { "epoch": 0.14297749428330725, "grad_norm": 11.94814395904541, "learning_rate": 9.594886829385886e-05, "loss": 33.5312, "step": 891 }, { "epoch": 0.14313796285152647, "grad_norm": 12.170619010925293, "learning_rate": 9.593875981302467e-05, "loss": 33.5781, "step": 892 }, { "epoch": 0.14329843141974566, "grad_norm": 12.143488883972168, "learning_rate": 9.592863927027251e-05, "loss": 33.4922, "step": 893 }, { "epoch": 0.14345889998796485, "grad_norm": 12.0193452835083, "learning_rate": 9.591850666825971e-05, "loss": 33.5156, "step": 894 }, { "epoch": 0.14361936855618407, "grad_norm": 12.264323234558105, "learning_rate": 9.590836200964671e-05, "loss": 33.4062, "step": 895 }, { "epoch": 0.14377983712440326, "grad_norm": 12.265458106994629, "learning_rate": 9.589820529709716e-05, "loss": 33.4219, "step": 896 }, { "epoch": 0.14394030569262245, "grad_norm": 12.391097068786621, "learning_rate": 9.588803653327784e-05, "loss": 33.3828, "step": 897 }, { "epoch": 0.14410077426084167, "grad_norm": 12.140542030334473, "learning_rate": 9.587785572085876e-05, "loss": 33.5, "step": 898 }, { "epoch": 0.14426124282906086, "grad_norm": 12.098193168640137, "learning_rate": 9.586766286251299e-05, "loss": 33.7031, "step": 899 }, { "epoch": 0.14442171139728005, "grad_norm": 12.085846900939941, "learning_rate": 9.585745796091686e-05, "loss": 33.6641, "step": 900 }, { "epoch": 0.14458217996549927, "grad_norm": 12.32812786102295, "learning_rate": 9.584724101874978e-05, "loss": 34.0234, "step": 901 }, { "epoch": 0.14474264853371846, "grad_norm": 12.366793632507324, "learning_rate": 9.58370120386944e-05, "loss": 33.6719, "step": 902 }, { "epoch": 0.14490311710193765, "grad_norm": 12.592416763305664, "learning_rate": 9.582677102343649e-05, "loss": 33.6484, "step": 903 }, { "epoch": 0.14506358567015687, "grad_norm": 12.239705085754395, "learning_rate": 9.581651797566498e-05, "loss": 33.5703, "step": 904 }, { "epoch": 0.14522405423837606, "grad_norm": 12.383085250854492, "learning_rate": 9.580625289807195e-05, "loss": 33.75, "step": 905 }, { "epoch": 0.14538452280659525, "grad_norm": 12.218204498291016, "learning_rate": 9.579597579335267e-05, "loss": 33.5703, "step": 906 }, { "epoch": 0.14554499137481447, "grad_norm": 12.028315544128418, "learning_rate": 9.578568666420552e-05, "loss": 33.5156, "step": 907 }, { "epoch": 0.14570545994303366, "grad_norm": 12.092467308044434, "learning_rate": 9.577538551333211e-05, "loss": 33.6719, "step": 908 }, { "epoch": 0.14586592851125285, "grad_norm": 12.284672737121582, "learning_rate": 9.576507234343712e-05, "loss": 33.4141, "step": 909 }, { "epoch": 0.14602639707947207, "grad_norm": 12.213695526123047, "learning_rate": 9.575474715722848e-05, "loss": 33.4844, "step": 910 }, { "epoch": 0.14618686564769126, "grad_norm": 12.638843536376953, "learning_rate": 9.574440995741716e-05, "loss": 33.3359, "step": 911 }, { "epoch": 0.14634733421591045, "grad_norm": 12.537108421325684, "learning_rate": 9.57340607467174e-05, "loss": 33.5391, "step": 912 }, { "epoch": 0.14650780278412967, "grad_norm": 12.168804168701172, "learning_rate": 9.572369952784653e-05, "loss": 33.5625, "step": 913 }, { "epoch": 0.14666827135234886, "grad_norm": 12.105915069580078, "learning_rate": 9.571332630352504e-05, "loss": 33.5312, "step": 914 }, { "epoch": 0.14682873992056805, "grad_norm": 12.15595817565918, "learning_rate": 9.570294107647657e-05, "loss": 33.4219, "step": 915 }, { "epoch": 0.14698920848878727, "grad_norm": 12.290095329284668, "learning_rate": 9.569254384942793e-05, "loss": 33.4453, "step": 916 }, { "epoch": 0.14714967705700646, "grad_norm": 12.227568626403809, "learning_rate": 9.568213462510907e-05, "loss": 33.4609, "step": 917 }, { "epoch": 0.14731014562522565, "grad_norm": 12.084335327148438, "learning_rate": 9.567171340625307e-05, "loss": 33.4453, "step": 918 }, { "epoch": 0.14747061419344487, "grad_norm": 12.158865928649902, "learning_rate": 9.566128019559621e-05, "loss": 33.4062, "step": 919 }, { "epoch": 0.14763108276166406, "grad_norm": 12.73717212677002, "learning_rate": 9.565083499587788e-05, "loss": 33.3516, "step": 920 }, { "epoch": 0.14779155132988325, "grad_norm": 12.1446533203125, "learning_rate": 9.564037780984061e-05, "loss": 33.4375, "step": 921 }, { "epoch": 0.14795201989810247, "grad_norm": 12.100737571716309, "learning_rate": 9.562990864023009e-05, "loss": 33.4531, "step": 922 }, { "epoch": 0.14811248846632166, "grad_norm": 11.932141304016113, "learning_rate": 9.561942748979517e-05, "loss": 33.5547, "step": 923 }, { "epoch": 0.14827295703454085, "grad_norm": 12.160917282104492, "learning_rate": 9.560893436128785e-05, "loss": 33.4141, "step": 924 }, { "epoch": 0.14843342560276007, "grad_norm": 12.15184497833252, "learning_rate": 9.559842925746324e-05, "loss": 33.4062, "step": 925 }, { "epoch": 0.14859389417097926, "grad_norm": 11.83060073852539, "learning_rate": 9.55879121810796e-05, "loss": 33.5156, "step": 926 }, { "epoch": 0.14875436273919845, "grad_norm": 12.023212432861328, "learning_rate": 9.55773831348984e-05, "loss": 33.4609, "step": 927 }, { "epoch": 0.14891483130741767, "grad_norm": 12.232771873474121, "learning_rate": 9.556684212168415e-05, "loss": 33.5234, "step": 928 }, { "epoch": 0.14907529987563686, "grad_norm": 12.072418212890625, "learning_rate": 9.555628914420458e-05, "loss": 33.3984, "step": 929 }, { "epoch": 0.14923576844385605, "grad_norm": 11.948058128356934, "learning_rate": 9.554572420523051e-05, "loss": 33.3984, "step": 930 }, { "epoch": 0.14939623701207527, "grad_norm": 12.141063690185547, "learning_rate": 9.553514730753596e-05, "loss": 33.4531, "step": 931 }, { "epoch": 0.14955670558029446, "grad_norm": 12.33297061920166, "learning_rate": 9.552455845389802e-05, "loss": 33.3281, "step": 932 }, { "epoch": 0.14971717414851365, "grad_norm": 12.142127990722656, "learning_rate": 9.551395764709696e-05, "loss": 33.3906, "step": 933 }, { "epoch": 0.14987764271673287, "grad_norm": 12.094014167785645, "learning_rate": 9.550334488991621e-05, "loss": 33.6172, "step": 934 }, { "epoch": 0.15003811128495206, "grad_norm": 12.13930892944336, "learning_rate": 9.549272018514228e-05, "loss": 33.4844, "step": 935 }, { "epoch": 0.15019857985317125, "grad_norm": 12.777738571166992, "learning_rate": 9.548208353556486e-05, "loss": 33.3438, "step": 936 }, { "epoch": 0.15035904842139047, "grad_norm": 12.152709007263184, "learning_rate": 9.547143494397678e-05, "loss": 33.5, "step": 937 }, { "epoch": 0.15051951698960966, "grad_norm": 11.953795433044434, "learning_rate": 9.546077441317397e-05, "loss": 33.5234, "step": 938 }, { "epoch": 0.15067998555782885, "grad_norm": 11.950197219848633, "learning_rate": 9.545010194595549e-05, "loss": 33.5078, "step": 939 }, { "epoch": 0.15084045412604807, "grad_norm": 12.267656326293945, "learning_rate": 9.54394175451236e-05, "loss": 33.3828, "step": 940 }, { "epoch": 0.15100092269426726, "grad_norm": 12.266139030456543, "learning_rate": 9.542872121348362e-05, "loss": 33.3516, "step": 941 }, { "epoch": 0.15116139126248646, "grad_norm": 12.147031784057617, "learning_rate": 9.541801295384408e-05, "loss": 33.3828, "step": 942 }, { "epoch": 0.15132185983070567, "grad_norm": 12.27250862121582, "learning_rate": 9.540729276901654e-05, "loss": 33.3828, "step": 943 }, { "epoch": 0.15148232839892486, "grad_norm": 12.198196411132812, "learning_rate": 9.53965606618158e-05, "loss": 33.3828, "step": 944 }, { "epoch": 0.15164279696714406, "grad_norm": 12.159756660461426, "learning_rate": 9.538581663505967e-05, "loss": 33.4375, "step": 945 }, { "epoch": 0.15180326553536327, "grad_norm": 12.60629653930664, "learning_rate": 9.537506069156923e-05, "loss": 33.3828, "step": 946 }, { "epoch": 0.15196373410358247, "grad_norm": 11.895346641540527, "learning_rate": 9.536429283416857e-05, "loss": 33.5156, "step": 947 }, { "epoch": 0.15212420267180166, "grad_norm": 12.417686462402344, "learning_rate": 9.535351306568497e-05, "loss": 33.5703, "step": 948 }, { "epoch": 0.15228467124002087, "grad_norm": 12.095378875732422, "learning_rate": 9.534272138894882e-05, "loss": 33.5547, "step": 949 }, { "epoch": 0.15244513980824007, "grad_norm": 12.31610107421875, "learning_rate": 9.533191780679363e-05, "loss": 33.7656, "step": 950 }, { "epoch": 0.15260560837645926, "grad_norm": 12.440524101257324, "learning_rate": 9.532110232205607e-05, "loss": 33.8516, "step": 951 }, { "epoch": 0.15276607694467847, "grad_norm": 12.60257625579834, "learning_rate": 9.531027493757587e-05, "loss": 33.5, "step": 952 }, { "epoch": 0.15292654551289767, "grad_norm": 12.436591148376465, "learning_rate": 9.529943565619596e-05, "loss": 33.5547, "step": 953 }, { "epoch": 0.15308701408111686, "grad_norm": 12.178030014038086, "learning_rate": 9.528858448076235e-05, "loss": 33.4219, "step": 954 }, { "epoch": 0.15324748264933605, "grad_norm": 12.42248821258545, "learning_rate": 9.527772141412415e-05, "loss": 33.3281, "step": 955 }, { "epoch": 0.15340795121755527, "grad_norm": 12.160523414611816, "learning_rate": 9.526684645913367e-05, "loss": 33.3594, "step": 956 }, { "epoch": 0.15356841978577446, "grad_norm": 12.15877628326416, "learning_rate": 9.525595961864625e-05, "loss": 33.6875, "step": 957 }, { "epoch": 0.15372888835399365, "grad_norm": 12.427752494812012, "learning_rate": 9.524506089552042e-05, "loss": 33.3906, "step": 958 }, { "epoch": 0.15388935692221287, "grad_norm": 12.21751594543457, "learning_rate": 9.523415029261779e-05, "loss": 33.3359, "step": 959 }, { "epoch": 0.15404982549043206, "grad_norm": 12.210027694702148, "learning_rate": 9.522322781280315e-05, "loss": 33.3359, "step": 960 }, { "epoch": 0.15421029405865125, "grad_norm": 11.958441734313965, "learning_rate": 9.52122934589443e-05, "loss": 33.3594, "step": 961 }, { "epoch": 0.15437076262687047, "grad_norm": 12.209763526916504, "learning_rate": 9.520134723391225e-05, "loss": 33.25, "step": 962 }, { "epoch": 0.15453123119508966, "grad_norm": 12.54742431640625, "learning_rate": 9.519038914058112e-05, "loss": 33.2031, "step": 963 }, { "epoch": 0.15469169976330885, "grad_norm": 12.478660583496094, "learning_rate": 9.517941918182806e-05, "loss": 33.2812, "step": 964 }, { "epoch": 0.15485216833152807, "grad_norm": 12.265108108520508, "learning_rate": 9.516843736053347e-05, "loss": 33.3516, "step": 965 }, { "epoch": 0.15501263689974726, "grad_norm": 12.272890090942383, "learning_rate": 9.515744367958074e-05, "loss": 33.2656, "step": 966 }, { "epoch": 0.15517310546796645, "grad_norm": 12.01298999786377, "learning_rate": 9.514643814185647e-05, "loss": 33.3594, "step": 967 }, { "epoch": 0.15533357403618567, "grad_norm": 12.375133514404297, "learning_rate": 9.51354207502503e-05, "loss": 33.2812, "step": 968 }, { "epoch": 0.15549404260440486, "grad_norm": 12.64398193359375, "learning_rate": 9.512439150765502e-05, "loss": 33.2188, "step": 969 }, { "epoch": 0.15565451117262405, "grad_norm": 12.087769508361816, "learning_rate": 9.511335041696654e-05, "loss": 33.3281, "step": 970 }, { "epoch": 0.15581497974084327, "grad_norm": 12.222114562988281, "learning_rate": 9.510229748108383e-05, "loss": 33.2266, "step": 971 }, { "epoch": 0.15597544830906246, "grad_norm": 12.416135787963867, "learning_rate": 9.509123270290907e-05, "loss": 33.3281, "step": 972 }, { "epoch": 0.15613591687728165, "grad_norm": 12.160148620605469, "learning_rate": 9.508015608534741e-05, "loss": 33.2031, "step": 973 }, { "epoch": 0.15629638544550087, "grad_norm": 12.004653930664062, "learning_rate": 9.506906763130723e-05, "loss": 33.375, "step": 974 }, { "epoch": 0.15645685401372006, "grad_norm": 12.010258674621582, "learning_rate": 9.505796734369996e-05, "loss": 33.2422, "step": 975 }, { "epoch": 0.15661732258193925, "grad_norm": 12.022955894470215, "learning_rate": 9.504685522544016e-05, "loss": 33.2812, "step": 976 }, { "epoch": 0.15677779115015847, "grad_norm": 12.113801956176758, "learning_rate": 9.503573127944546e-05, "loss": 33.3047, "step": 977 }, { "epoch": 0.15693825971837766, "grad_norm": 12.08180046081543, "learning_rate": 9.502459550863666e-05, "loss": 33.3281, "step": 978 }, { "epoch": 0.15709872828659685, "grad_norm": 12.525431632995605, "learning_rate": 9.501344791593757e-05, "loss": 33.2031, "step": 979 }, { "epoch": 0.15725919685481607, "grad_norm": 12.209314346313477, "learning_rate": 9.500228850427521e-05, "loss": 33.2578, "step": 980 }, { "epoch": 0.15741966542303526, "grad_norm": 12.081807136535645, "learning_rate": 9.499111727657963e-05, "loss": 33.2734, "step": 981 }, { "epoch": 0.15758013399125445, "grad_norm": 11.952651023864746, "learning_rate": 9.4979934235784e-05, "loss": 33.3594, "step": 982 }, { "epoch": 0.15774060255947367, "grad_norm": 12.201787948608398, "learning_rate": 9.49687393848246e-05, "loss": 33.2578, "step": 983 }, { "epoch": 0.15790107112769286, "grad_norm": 12.140170097351074, "learning_rate": 9.495753272664083e-05, "loss": 33.2344, "step": 984 }, { "epoch": 0.15806153969591205, "grad_norm": 12.145255088806152, "learning_rate": 9.494631426417516e-05, "loss": 33.2031, "step": 985 }, { "epoch": 0.15822200826413127, "grad_norm": 12.265219688415527, "learning_rate": 9.493508400037313e-05, "loss": 33.2656, "step": 986 }, { "epoch": 0.15838247683235046, "grad_norm": 12.271042823791504, "learning_rate": 9.492384193818346e-05, "loss": 33.2266, "step": 987 }, { "epoch": 0.15854294540056965, "grad_norm": 12.350101470947266, "learning_rate": 9.491258808055791e-05, "loss": 33.2188, "step": 988 }, { "epoch": 0.15870341396878887, "grad_norm": 12.27942180633545, "learning_rate": 9.490132243045136e-05, "loss": 33.25, "step": 989 }, { "epoch": 0.15886388253700806, "grad_norm": 12.34074592590332, "learning_rate": 9.489004499082175e-05, "loss": 33.1875, "step": 990 }, { "epoch": 0.15902435110522725, "grad_norm": 12.196820259094238, "learning_rate": 9.487875576463015e-05, "loss": 33.2344, "step": 991 }, { "epoch": 0.15918481967344647, "grad_norm": 12.266101837158203, "learning_rate": 9.486745475484076e-05, "loss": 33.1797, "step": 992 }, { "epoch": 0.15934528824166566, "grad_norm": 12.403106689453125, "learning_rate": 9.485614196442077e-05, "loss": 33.2109, "step": 993 }, { "epoch": 0.15950575680988485, "grad_norm": 12.581741333007812, "learning_rate": 9.484481739634057e-05, "loss": 33.1875, "step": 994 }, { "epoch": 0.15966622537810407, "grad_norm": 12.150084495544434, "learning_rate": 9.483348105357357e-05, "loss": 33.375, "step": 995 }, { "epoch": 0.15982669394632326, "grad_norm": 12.404050827026367, "learning_rate": 9.482213293909632e-05, "loss": 33.2578, "step": 996 }, { "epoch": 0.15998716251454245, "grad_norm": 12.042458534240723, "learning_rate": 9.481077305588841e-05, "loss": 33.4141, "step": 997 }, { "epoch": 0.16014763108276167, "grad_norm": 12.454623222351074, "learning_rate": 9.479940140693258e-05, "loss": 33.2891, "step": 998 }, { "epoch": 0.16030809965098086, "grad_norm": 12.520160675048828, "learning_rate": 9.478801799521461e-05, "loss": 33.4922, "step": 999 }, { "epoch": 0.16046856821920005, "grad_norm": 12.289641380310059, "learning_rate": 9.477662282372341e-05, "loss": 33.6094, "step": 1000 }, { "epoch": 0.16062903678741927, "grad_norm": 12.300127983093262, "learning_rate": 9.476521589545095e-05, "loss": 33.2656, "step": 1001 }, { "epoch": 0.16078950535563846, "grad_norm": 12.007270812988281, "learning_rate": 9.475379721339226e-05, "loss": 33.6641, "step": 1002 }, { "epoch": 0.16094997392385765, "grad_norm": 12.173056602478027, "learning_rate": 9.47423667805455e-05, "loss": 33.5547, "step": 1003 }, { "epoch": 0.16111044249207687, "grad_norm": 12.027740478515625, "learning_rate": 9.473092459991193e-05, "loss": 33.4531, "step": 1004 }, { "epoch": 0.16127091106029606, "grad_norm": 12.285428047180176, "learning_rate": 9.471947067449585e-05, "loss": 33.2734, "step": 1005 }, { "epoch": 0.16143137962851525, "grad_norm": 12.016648292541504, "learning_rate": 9.470800500730466e-05, "loss": 33.3281, "step": 1006 }, { "epoch": 0.16159184819673447, "grad_norm": 12.029805183410645, "learning_rate": 9.469652760134886e-05, "loss": 33.5234, "step": 1007 }, { "epoch": 0.16175231676495366, "grad_norm": 11.956128120422363, "learning_rate": 9.468503845964198e-05, "loss": 33.2891, "step": 1008 }, { "epoch": 0.16191278533317285, "grad_norm": 12.207318305969238, "learning_rate": 9.46735375852007e-05, "loss": 33.3281, "step": 1009 }, { "epoch": 0.16207325390139207, "grad_norm": 12.441898345947266, "learning_rate": 9.466202498104475e-05, "loss": 33.3516, "step": 1010 }, { "epoch": 0.16223372246961126, "grad_norm": 12.415881156921387, "learning_rate": 9.465050065019694e-05, "loss": 33.2891, "step": 1011 }, { "epoch": 0.16239419103783045, "grad_norm": 12.214960098266602, "learning_rate": 9.463896459568314e-05, "loss": 33.2578, "step": 1012 }, { "epoch": 0.16255465960604967, "grad_norm": 12.02489948272705, "learning_rate": 9.462741682053231e-05, "loss": 33.2266, "step": 1013 }, { "epoch": 0.16271512817426886, "grad_norm": 12.154875755310059, "learning_rate": 9.461585732777651e-05, "loss": 33.2969, "step": 1014 }, { "epoch": 0.16287559674248805, "grad_norm": 12.014429092407227, "learning_rate": 9.460428612045086e-05, "loss": 33.3203, "step": 1015 }, { "epoch": 0.16303606531070727, "grad_norm": 12.14484691619873, "learning_rate": 9.459270320159359e-05, "loss": 33.2266, "step": 1016 }, { "epoch": 0.16319653387892646, "grad_norm": 11.891423225402832, "learning_rate": 9.458110857424589e-05, "loss": 33.2734, "step": 1017 }, { "epoch": 0.16335700244714565, "grad_norm": 12.289481163024902, "learning_rate": 9.456950224145218e-05, "loss": 33.2812, "step": 1018 }, { "epoch": 0.16351747101536487, "grad_norm": 12.617005348205566, "learning_rate": 9.455788420625983e-05, "loss": 33.2578, "step": 1019 }, { "epoch": 0.16367793958358406, "grad_norm": 12.0852689743042, "learning_rate": 9.454625447171936e-05, "loss": 33.25, "step": 1020 }, { "epoch": 0.16383840815180326, "grad_norm": 12.082281112670898, "learning_rate": 9.453461304088432e-05, "loss": 33.1953, "step": 1021 }, { "epoch": 0.16399887672002247, "grad_norm": 12.202752113342285, "learning_rate": 9.452295991681135e-05, "loss": 33.2188, "step": 1022 }, { "epoch": 0.16415934528824166, "grad_norm": 12.204641342163086, "learning_rate": 9.451129510256014e-05, "loss": 33.1484, "step": 1023 }, { "epoch": 0.16431981385646086, "grad_norm": 12.183605194091797, "learning_rate": 9.44996186011935e-05, "loss": 33.2656, "step": 1024 }, { "epoch": 0.16448028242468007, "grad_norm": 12.344829559326172, "learning_rate": 9.448793041577721e-05, "loss": 33.2031, "step": 1025 }, { "epoch": 0.16464075099289927, "grad_norm": 12.409613609313965, "learning_rate": 9.447623054938027e-05, "loss": 33.1289, "step": 1026 }, { "epoch": 0.16480121956111846, "grad_norm": 12.014729499816895, "learning_rate": 9.446451900507456e-05, "loss": 33.168, "step": 1027 }, { "epoch": 0.16496168812933767, "grad_norm": 12.079889297485352, "learning_rate": 9.44527957859352e-05, "loss": 33.1758, "step": 1028 }, { "epoch": 0.16512215669755687, "grad_norm": 12.079207420349121, "learning_rate": 9.444106089504026e-05, "loss": 33.1836, "step": 1029 }, { "epoch": 0.16528262526577606, "grad_norm": 12.397550582885742, "learning_rate": 9.442931433547091e-05, "loss": 33.1367, "step": 1030 }, { "epoch": 0.16544309383399528, "grad_norm": 12.039215087890625, "learning_rate": 9.44175561103114e-05, "loss": 33.3047, "step": 1031 }, { "epoch": 0.16560356240221447, "grad_norm": 12.136113166809082, "learning_rate": 9.440578622264904e-05, "loss": 33.1133, "step": 1032 }, { "epoch": 0.16576403097043366, "grad_norm": 12.13729476928711, "learning_rate": 9.439400467557418e-05, "loss": 33.0977, "step": 1033 }, { "epoch": 0.16592449953865288, "grad_norm": 12.399459838867188, "learning_rate": 9.438221147218025e-05, "loss": 33.0391, "step": 1034 }, { "epoch": 0.16608496810687207, "grad_norm": 12.194896697998047, "learning_rate": 9.437040661556372e-05, "loss": 33.1523, "step": 1035 }, { "epoch": 0.16624543667509126, "grad_norm": 12.203405380249023, "learning_rate": 9.435859010882416e-05, "loss": 33.1289, "step": 1036 }, { "epoch": 0.16640590524331048, "grad_norm": 12.208578109741211, "learning_rate": 9.434676195506414e-05, "loss": 33.1562, "step": 1037 }, { "epoch": 0.16656637381152967, "grad_norm": 12.006962776184082, "learning_rate": 9.433492215738934e-05, "loss": 33.0938, "step": 1038 }, { "epoch": 0.16672684237974886, "grad_norm": 12.409685134887695, "learning_rate": 9.43230707189085e-05, "loss": 33.0312, "step": 1039 }, { "epoch": 0.16688731094796808, "grad_norm": 12.408037185668945, "learning_rate": 9.431120764273336e-05, "loss": 33.1875, "step": 1040 }, { "epoch": 0.16704777951618727, "grad_norm": 12.390191078186035, "learning_rate": 9.429933293197875e-05, "loss": 33.1172, "step": 1041 }, { "epoch": 0.16720824808440646, "grad_norm": 12.138550758361816, "learning_rate": 9.428744658976258e-05, "loss": 33.0625, "step": 1042 }, { "epoch": 0.16736871665262568, "grad_norm": 12.10119915008545, "learning_rate": 9.427554861920577e-05, "loss": 33.1406, "step": 1043 }, { "epoch": 0.16752918522084487, "grad_norm": 12.480338096618652, "learning_rate": 9.426363902343232e-05, "loss": 33.0781, "step": 1044 }, { "epoch": 0.16768965378906406, "grad_norm": 12.015308380126953, "learning_rate": 9.425171780556927e-05, "loss": 33.1562, "step": 1045 }, { "epoch": 0.16785012235728328, "grad_norm": 12.464158058166504, "learning_rate": 9.423978496874673e-05, "loss": 33.1484, "step": 1046 }, { "epoch": 0.16801059092550247, "grad_norm": 12.083477973937988, "learning_rate": 9.422784051609783e-05, "loss": 33.1875, "step": 1047 }, { "epoch": 0.16817105949372166, "grad_norm": 12.146838188171387, "learning_rate": 9.42158844507588e-05, "loss": 33.1484, "step": 1048 }, { "epoch": 0.16833152806194088, "grad_norm": 12.712502479553223, "learning_rate": 9.420391677586884e-05, "loss": 33.1719, "step": 1049 }, { "epoch": 0.16849199663016007, "grad_norm": 12.534636497497559, "learning_rate": 9.419193749457028e-05, "loss": 33.3438, "step": 1050 }, { "epoch": 0.16865246519837926, "grad_norm": 12.066561698913574, "learning_rate": 9.417994661000842e-05, "loss": 33.3594, "step": 1051 }, { "epoch": 0.16881293376659848, "grad_norm": 12.35615062713623, "learning_rate": 9.41679441253317e-05, "loss": 33.6094, "step": 1052 }, { "epoch": 0.16897340233481767, "grad_norm": 12.474895477294922, "learning_rate": 9.415593004369152e-05, "loss": 33.4297, "step": 1053 }, { "epoch": 0.16913387090303686, "grad_norm": 12.436800956726074, "learning_rate": 9.414390436824239e-05, "loss": 33.1953, "step": 1054 }, { "epoch": 0.16929433947125608, "grad_norm": 12.498964309692383, "learning_rate": 9.41318671021418e-05, "loss": 33.2109, "step": 1055 }, { "epoch": 0.16945480803947527, "grad_norm": 12.090023040771484, "learning_rate": 9.411981824855033e-05, "loss": 33.1719, "step": 1056 }, { "epoch": 0.16961527660769446, "grad_norm": 12.298830032348633, "learning_rate": 9.410775781063159e-05, "loss": 33.1484, "step": 1057 }, { "epoch": 0.16977574517591368, "grad_norm": 12.273396492004395, "learning_rate": 9.409568579155223e-05, "loss": 33.2266, "step": 1058 }, { "epoch": 0.16993621374413287, "grad_norm": 12.351298332214355, "learning_rate": 9.408360219448194e-05, "loss": 33.1641, "step": 1059 }, { "epoch": 0.17009668231235206, "grad_norm": 12.3953218460083, "learning_rate": 9.407150702259347e-05, "loss": 33.1875, "step": 1060 }, { "epoch": 0.17025715088057128, "grad_norm": 12.152956008911133, "learning_rate": 9.405940027906256e-05, "loss": 33.1719, "step": 1061 }, { "epoch": 0.17041761944879047, "grad_norm": 12.147673606872559, "learning_rate": 9.404728196706805e-05, "loss": 33.1719, "step": 1062 }, { "epoch": 0.17057808801700966, "grad_norm": 12.08627986907959, "learning_rate": 9.403515208979175e-05, "loss": 33.1484, "step": 1063 }, { "epoch": 0.17073855658522888, "grad_norm": 12.026674270629883, "learning_rate": 9.402301065041857e-05, "loss": 33.1641, "step": 1064 }, { "epoch": 0.17089902515344807, "grad_norm": 12.219449996948242, "learning_rate": 9.401085765213642e-05, "loss": 33.1797, "step": 1065 }, { "epoch": 0.17105949372166726, "grad_norm": 12.072858810424805, "learning_rate": 9.399869309813627e-05, "loss": 33.1797, "step": 1066 }, { "epoch": 0.17121996228988648, "grad_norm": 12.38693904876709, "learning_rate": 9.398651699161207e-05, "loss": 33.1562, "step": 1067 }, { "epoch": 0.17138043085810567, "grad_norm": 12.10025405883789, "learning_rate": 9.39743293357609e-05, "loss": 33.2109, "step": 1068 }, { "epoch": 0.17154089942632486, "grad_norm": 12.228199005126953, "learning_rate": 9.396213013378276e-05, "loss": 33.2422, "step": 1069 }, { "epoch": 0.17170136799454408, "grad_norm": 12.162321090698242, "learning_rate": 9.394991938888076e-05, "loss": 33.0938, "step": 1070 }, { "epoch": 0.17186183656276327, "grad_norm": 12.077207565307617, "learning_rate": 9.393769710426099e-05, "loss": 33.1484, "step": 1071 }, { "epoch": 0.17202230513098246, "grad_norm": 12.277790069580078, "learning_rate": 9.392546328313265e-05, "loss": 33.1094, "step": 1072 }, { "epoch": 0.17218277369920168, "grad_norm": 12.085344314575195, "learning_rate": 9.391321792870787e-05, "loss": 33.1016, "step": 1073 }, { "epoch": 0.17234324226742087, "grad_norm": 12.071640968322754, "learning_rate": 9.390096104420187e-05, "loss": 33.0977, "step": 1074 }, { "epoch": 0.17250371083564006, "grad_norm": 12.075268745422363, "learning_rate": 9.388869263283286e-05, "loss": 33.1758, "step": 1075 }, { "epoch": 0.17266417940385928, "grad_norm": 12.400370597839355, "learning_rate": 9.387641269782213e-05, "loss": 33.0586, "step": 1076 }, { "epoch": 0.17282464797207847, "grad_norm": 11.888729095458984, "learning_rate": 9.386412124239394e-05, "loss": 33.1133, "step": 1077 }, { "epoch": 0.17298511654029766, "grad_norm": 12.080443382263184, "learning_rate": 9.385181826977562e-05, "loss": 33.1445, "step": 1078 }, { "epoch": 0.17314558510851688, "grad_norm": 12.144816398620605, "learning_rate": 9.383950378319748e-05, "loss": 33.0312, "step": 1079 }, { "epoch": 0.17330605367673607, "grad_norm": 12.338086128234863, "learning_rate": 9.38271777858929e-05, "loss": 33.0508, "step": 1080 }, { "epoch": 0.17346652224495526, "grad_norm": 12.20231819152832, "learning_rate": 9.381484028109822e-05, "loss": 33.0117, "step": 1081 }, { "epoch": 0.17362699081317448, "grad_norm": 12.082432746887207, "learning_rate": 9.380249127205284e-05, "loss": 33.1094, "step": 1082 }, { "epoch": 0.17378745938139367, "grad_norm": 12.342620849609375, "learning_rate": 9.379013076199925e-05, "loss": 33.082, "step": 1083 }, { "epoch": 0.17394792794961286, "grad_norm": 12.398025512695312, "learning_rate": 9.37777587541828e-05, "loss": 33.0078, "step": 1084 }, { "epoch": 0.17410839651783208, "grad_norm": 12.223939895629883, "learning_rate": 9.3765375251852e-05, "loss": 33.0352, "step": 1085 }, { "epoch": 0.17426886508605127, "grad_norm": 12.200745582580566, "learning_rate": 9.375298025825832e-05, "loss": 33.0078, "step": 1086 }, { "epoch": 0.17442933365427046, "grad_norm": 11.89215087890625, "learning_rate": 9.374057377665624e-05, "loss": 33.1992, "step": 1087 }, { "epoch": 0.17458980222248968, "grad_norm": 12.278791427612305, "learning_rate": 9.372815581030327e-05, "loss": 33.0703, "step": 1088 }, { "epoch": 0.17475027079070887, "grad_norm": 12.335151672363281, "learning_rate": 9.371572636245995e-05, "loss": 33.0391, "step": 1089 }, { "epoch": 0.17491073935892806, "grad_norm": 12.1411771774292, "learning_rate": 9.37032854363898e-05, "loss": 33.0234, "step": 1090 }, { "epoch": 0.17507120792714728, "grad_norm": 12.08110523223877, "learning_rate": 9.36908330353594e-05, "loss": 33.1172, "step": 1091 }, { "epoch": 0.17523167649536647, "grad_norm": 12.151885032653809, "learning_rate": 9.367836916263829e-05, "loss": 33.0859, "step": 1092 }, { "epoch": 0.17539214506358566, "grad_norm": 12.212114334106445, "learning_rate": 9.366589382149907e-05, "loss": 33.0898, "step": 1093 }, { "epoch": 0.17555261363180488, "grad_norm": 12.901446342468262, "learning_rate": 9.365340701521733e-05, "loss": 32.8672, "step": 1094 }, { "epoch": 0.17571308220002407, "grad_norm": 12.51463794708252, "learning_rate": 9.364090874707163e-05, "loss": 32.9219, "step": 1095 }, { "epoch": 0.17587355076824326, "grad_norm": 12.097221374511719, "learning_rate": 9.362839902034364e-05, "loss": 33.1016, "step": 1096 }, { "epoch": 0.17603401933646248, "grad_norm": 12.39077377319336, "learning_rate": 9.361587783831794e-05, "loss": 32.9922, "step": 1097 }, { "epoch": 0.17619448790468167, "grad_norm": 12.26990032196045, "learning_rate": 9.360334520428216e-05, "loss": 33.0352, "step": 1098 }, { "epoch": 0.17635495647290086, "grad_norm": 12.288627624511719, "learning_rate": 9.359080112152694e-05, "loss": 33.2031, "step": 1099 }, { "epoch": 0.17651542504112008, "grad_norm": 12.294098854064941, "learning_rate": 9.357824559334593e-05, "loss": 33.7344, "step": 1100 }, { "epoch": 0.17667589360933927, "grad_norm": 12.133959770202637, "learning_rate": 9.356567862303575e-05, "loss": 33.3359, "step": 1101 }, { "epoch": 0.17683636217755846, "grad_norm": 12.054420471191406, "learning_rate": 9.355310021389608e-05, "loss": 33.3125, "step": 1102 }, { "epoch": 0.17699683074577768, "grad_norm": 12.11069393157959, "learning_rate": 9.354051036922953e-05, "loss": 33.2734, "step": 1103 }, { "epoch": 0.17715729931399687, "grad_norm": 12.04633903503418, "learning_rate": 9.35279090923418e-05, "loss": 33.3125, "step": 1104 }, { "epoch": 0.17731776788221607, "grad_norm": 12.09249210357666, "learning_rate": 9.35152963865415e-05, "loss": 33.1328, "step": 1105 }, { "epoch": 0.17747823645043528, "grad_norm": 12.233891487121582, "learning_rate": 9.350267225514032e-05, "loss": 33.0938, "step": 1106 }, { "epoch": 0.17763870501865447, "grad_norm": 12.28703784942627, "learning_rate": 9.349003670145291e-05, "loss": 32.9844, "step": 1107 }, { "epoch": 0.17779917358687367, "grad_norm": 12.09511947631836, "learning_rate": 9.347738972879692e-05, "loss": 33.0547, "step": 1108 }, { "epoch": 0.17795964215509288, "grad_norm": 11.955655097961426, "learning_rate": 9.346473134049303e-05, "loss": 33.0234, "step": 1109 }, { "epoch": 0.17812011072331208, "grad_norm": 12.344415664672852, "learning_rate": 9.345206153986485e-05, "loss": 33.1172, "step": 1110 }, { "epoch": 0.17828057929153127, "grad_norm": 12.177855491638184, "learning_rate": 9.343938033023905e-05, "loss": 32.9531, "step": 1111 }, { "epoch": 0.17844104785975048, "grad_norm": 12.214128494262695, "learning_rate": 9.342668771494528e-05, "loss": 32.9688, "step": 1112 }, { "epoch": 0.17860151642796968, "grad_norm": 12.268990516662598, "learning_rate": 9.341398369731617e-05, "loss": 33.0391, "step": 1113 }, { "epoch": 0.17876198499618887, "grad_norm": 12.384572982788086, "learning_rate": 9.340126828068733e-05, "loss": 32.9922, "step": 1114 }, { "epoch": 0.17892245356440808, "grad_norm": 12.388864517211914, "learning_rate": 9.338854146839744e-05, "loss": 32.9609, "step": 1115 }, { "epoch": 0.17908292213262728, "grad_norm": 12.136252403259277, "learning_rate": 9.337580326378809e-05, "loss": 33.0547, "step": 1116 }, { "epoch": 0.17924339070084647, "grad_norm": 12.29074478149414, "learning_rate": 9.336305367020387e-05, "loss": 32.9922, "step": 1117 }, { "epoch": 0.17940385926906569, "grad_norm": 12.429705619812012, "learning_rate": 9.33502926909924e-05, "loss": 32.9375, "step": 1118 }, { "epoch": 0.17956432783728488, "grad_norm": 12.168283462524414, "learning_rate": 9.333752032950426e-05, "loss": 33.1562, "step": 1119 }, { "epoch": 0.17972479640550407, "grad_norm": 12.160048484802246, "learning_rate": 9.332473658909303e-05, "loss": 33.0078, "step": 1120 }, { "epoch": 0.17988526497372329, "grad_norm": 12.145769119262695, "learning_rate": 9.331194147311528e-05, "loss": 33.0078, "step": 1121 }, { "epoch": 0.18004573354194248, "grad_norm": 12.290964126586914, "learning_rate": 9.329913498493054e-05, "loss": 32.9922, "step": 1122 }, { "epoch": 0.18020620211016167, "grad_norm": 12.236030578613281, "learning_rate": 9.328631712790139e-05, "loss": 32.9922, "step": 1123 }, { "epoch": 0.18036667067838089, "grad_norm": 12.27035903930664, "learning_rate": 9.327348790539332e-05, "loss": 33.0234, "step": 1124 }, { "epoch": 0.18052713924660008, "grad_norm": 12.349613189697266, "learning_rate": 9.326064732077483e-05, "loss": 32.9414, "step": 1125 }, { "epoch": 0.18068760781481927, "grad_norm": 12.146008491516113, "learning_rate": 9.324779537741744e-05, "loss": 32.9922, "step": 1126 }, { "epoch": 0.18084807638303846, "grad_norm": 11.956833839416504, "learning_rate": 9.32349320786956e-05, "loss": 33.0469, "step": 1127 }, { "epoch": 0.18100854495125768, "grad_norm": 12.165189743041992, "learning_rate": 9.322205742798677e-05, "loss": 33.0664, "step": 1128 }, { "epoch": 0.18116901351947687, "grad_norm": 12.469655990600586, "learning_rate": 9.320917142867141e-05, "loss": 32.9766, "step": 1129 }, { "epoch": 0.18132948208769606, "grad_norm": 12.098530769348145, "learning_rate": 9.319627408413288e-05, "loss": 33.0156, "step": 1130 }, { "epoch": 0.18148995065591528, "grad_norm": 12.018035888671875, "learning_rate": 9.318336539775762e-05, "loss": 32.9688, "step": 1131 }, { "epoch": 0.18165041922413447, "grad_norm": 12.010557174682617, "learning_rate": 9.317044537293497e-05, "loss": 32.9805, "step": 1132 }, { "epoch": 0.18181088779235366, "grad_norm": 12.295463562011719, "learning_rate": 9.31575140130573e-05, "loss": 32.9688, "step": 1133 }, { "epoch": 0.18197135636057288, "grad_norm": 12.078481674194336, "learning_rate": 9.314457132151995e-05, "loss": 33.0469, "step": 1134 }, { "epoch": 0.18213182492879207, "grad_norm": 12.221768379211426, "learning_rate": 9.313161730172117e-05, "loss": 32.9414, "step": 1135 }, { "epoch": 0.18229229349701126, "grad_norm": 12.525656700134277, "learning_rate": 9.311865195706227e-05, "loss": 32.832, "step": 1136 }, { "epoch": 0.18245276206523048, "grad_norm": 12.353414535522461, "learning_rate": 9.31056752909475e-05, "loss": 32.9922, "step": 1137 }, { "epoch": 0.18261323063344967, "grad_norm": 12.413508415222168, "learning_rate": 9.309268730678406e-05, "loss": 32.9414, "step": 1138 }, { "epoch": 0.18277369920166886, "grad_norm": 12.203007698059082, "learning_rate": 9.307968800798216e-05, "loss": 32.9766, "step": 1139 }, { "epoch": 0.18293416776988808, "grad_norm": 12.647937774658203, "learning_rate": 9.306667739795496e-05, "loss": 32.8945, "step": 1140 }, { "epoch": 0.18309463633810727, "grad_norm": 12.00545597076416, "learning_rate": 9.305365548011858e-05, "loss": 32.9414, "step": 1141 }, { "epoch": 0.18325510490632646, "grad_norm": 12.286664962768555, "learning_rate": 9.304062225789214e-05, "loss": 32.9492, "step": 1142 }, { "epoch": 0.18341557347454568, "grad_norm": 12.203886032104492, "learning_rate": 9.302757773469768e-05, "loss": 32.9258, "step": 1143 }, { "epoch": 0.18357604204276487, "grad_norm": 12.076444625854492, "learning_rate": 9.301452191396028e-05, "loss": 32.8945, "step": 1144 }, { "epoch": 0.18373651061098406, "grad_norm": 12.451517105102539, "learning_rate": 9.30014547991079e-05, "loss": 32.8945, "step": 1145 }, { "epoch": 0.18389697917920328, "grad_norm": 12.206002235412598, "learning_rate": 9.298837639357157e-05, "loss": 33.0195, "step": 1146 }, { "epoch": 0.18405744774742247, "grad_norm": 12.200182914733887, "learning_rate": 9.297528670078515e-05, "loss": 32.8828, "step": 1147 }, { "epoch": 0.18421791631564166, "grad_norm": 12.020113945007324, "learning_rate": 9.296218572418558e-05, "loss": 33.0469, "step": 1148 }, { "epoch": 0.18437838488386088, "grad_norm": 12.26274299621582, "learning_rate": 9.294907346721271e-05, "loss": 33.1367, "step": 1149 }, { "epoch": 0.18453885345208007, "grad_norm": 12.313446998596191, "learning_rate": 9.293594993330937e-05, "loss": 33.2695, "step": 1150 }, { "epoch": 0.18469932202029926, "grad_norm": 12.200316429138184, "learning_rate": 9.292281512592134e-05, "loss": 33.4805, "step": 1151 }, { "epoch": 0.18485979058851848, "grad_norm": 12.192322731018066, "learning_rate": 9.290966904849736e-05, "loss": 32.9688, "step": 1152 }, { "epoch": 0.18502025915673767, "grad_norm": 12.300168991088867, "learning_rate": 9.289651170448912e-05, "loss": 33.0859, "step": 1153 }, { "epoch": 0.18518072772495686, "grad_norm": 12.168892860412598, "learning_rate": 9.288334309735131e-05, "loss": 33.3203, "step": 1154 }, { "epoch": 0.18534119629317608, "grad_norm": 12.09194564819336, "learning_rate": 9.287016323054152e-05, "loss": 32.9844, "step": 1155 }, { "epoch": 0.18550166486139527, "grad_norm": 12.092218399047852, "learning_rate": 9.285697210752033e-05, "loss": 33.1328, "step": 1156 }, { "epoch": 0.18566213342961446, "grad_norm": 12.35184383392334, "learning_rate": 9.284376973175128e-05, "loss": 32.9844, "step": 1157 }, { "epoch": 0.18582260199783368, "grad_norm": 12.148576736450195, "learning_rate": 9.283055610670084e-05, "loss": 32.9688, "step": 1158 }, { "epoch": 0.18598307056605287, "grad_norm": 11.831072807312012, "learning_rate": 9.281733123583846e-05, "loss": 33.1445, "step": 1159 }, { "epoch": 0.18614353913427206, "grad_norm": 12.150705337524414, "learning_rate": 9.280409512263652e-05, "loss": 32.9922, "step": 1160 }, { "epoch": 0.18630400770249128, "grad_norm": 11.904607772827148, "learning_rate": 9.279084777057036e-05, "loss": 33.0234, "step": 1161 }, { "epoch": 0.18646447627071047, "grad_norm": 12.276108741760254, "learning_rate": 9.277758918311829e-05, "loss": 32.9375, "step": 1162 }, { "epoch": 0.18662494483892966, "grad_norm": 12.01564884185791, "learning_rate": 9.276431936376156e-05, "loss": 32.9609, "step": 1163 }, { "epoch": 0.18678541340714888, "grad_norm": 12.168350219726562, "learning_rate": 9.275103831598432e-05, "loss": 32.9844, "step": 1164 }, { "epoch": 0.18694588197536807, "grad_norm": 12.206188201904297, "learning_rate": 9.273774604327374e-05, "loss": 32.9375, "step": 1165 }, { "epoch": 0.18710635054358726, "grad_norm": 12.318970680236816, "learning_rate": 9.272444254911991e-05, "loss": 32.9766, "step": 1166 }, { "epoch": 0.18726681911180648, "grad_norm": 12.21146011352539, "learning_rate": 9.271112783701585e-05, "loss": 32.9766, "step": 1167 }, { "epoch": 0.18742728768002567, "grad_norm": 12.074122428894043, "learning_rate": 9.269780191045756e-05, "loss": 32.9453, "step": 1168 }, { "epoch": 0.18758775624824486, "grad_norm": 12.271350860595703, "learning_rate": 9.268446477294395e-05, "loss": 32.8594, "step": 1169 }, { "epoch": 0.18774822481646408, "grad_norm": 12.101672172546387, "learning_rate": 9.267111642797687e-05, "loss": 32.8984, "step": 1170 }, { "epoch": 0.18790869338468327, "grad_norm": 12.20921802520752, "learning_rate": 9.265775687906117e-05, "loss": 32.9141, "step": 1171 }, { "epoch": 0.18806916195290246, "grad_norm": 12.147126197814941, "learning_rate": 9.264438612970456e-05, "loss": 33.0078, "step": 1172 }, { "epoch": 0.18822963052112168, "grad_norm": 11.961902618408203, "learning_rate": 9.263100418341777e-05, "loss": 33.0312, "step": 1173 }, { "epoch": 0.18839009908934087, "grad_norm": 12.080116271972656, "learning_rate": 9.261761104371443e-05, "loss": 32.8828, "step": 1174 }, { "epoch": 0.18855056765756006, "grad_norm": 12.344924926757812, "learning_rate": 9.260420671411108e-05, "loss": 32.8828, "step": 1175 }, { "epoch": 0.18871103622577928, "grad_norm": 12.018938064575195, "learning_rate": 9.259079119812726e-05, "loss": 32.9062, "step": 1176 }, { "epoch": 0.18887150479399847, "grad_norm": 12.078723907470703, "learning_rate": 9.25773644992854e-05, "loss": 32.8594, "step": 1177 }, { "epoch": 0.18903197336221766, "grad_norm": 12.210217475891113, "learning_rate": 9.256392662111088e-05, "loss": 32.8984, "step": 1178 }, { "epoch": 0.18919244193043688, "grad_norm": 12.39870548248291, "learning_rate": 9.255047756713205e-05, "loss": 32.8906, "step": 1179 }, { "epoch": 0.18935291049865607, "grad_norm": 12.219487190246582, "learning_rate": 9.253701734088014e-05, "loss": 32.9141, "step": 1180 }, { "epoch": 0.18951337906687526, "grad_norm": 12.267534255981445, "learning_rate": 9.252354594588932e-05, "loss": 32.8672, "step": 1181 }, { "epoch": 0.18967384763509448, "grad_norm": 12.152219772338867, "learning_rate": 9.251006338569675e-05, "loss": 32.9609, "step": 1182 }, { "epoch": 0.18983431620331367, "grad_norm": 12.27643871307373, "learning_rate": 9.249656966384244e-05, "loss": 32.8203, "step": 1183 }, { "epoch": 0.18999478477153287, "grad_norm": 12.200353622436523, "learning_rate": 9.248306478386939e-05, "loss": 32.9297, "step": 1184 }, { "epoch": 0.19015525333975208, "grad_norm": 12.019641876220703, "learning_rate": 9.246954874932353e-05, "loss": 33.0, "step": 1185 }, { "epoch": 0.19031572190797127, "grad_norm": 12.523127555847168, "learning_rate": 9.245602156375366e-05, "loss": 32.875, "step": 1186 }, { "epoch": 0.19047619047619047, "grad_norm": 12.007797241210938, "learning_rate": 9.244248323071157e-05, "loss": 32.8594, "step": 1187 }, { "epoch": 0.19063665904440968, "grad_norm": 12.143168449401855, "learning_rate": 9.242893375375195e-05, "loss": 32.875, "step": 1188 }, { "epoch": 0.19079712761262888, "grad_norm": 12.268779754638672, "learning_rate": 9.241537313643243e-05, "loss": 32.8359, "step": 1189 }, { "epoch": 0.19095759618084807, "grad_norm": 12.153072357177734, "learning_rate": 9.240180138231355e-05, "loss": 32.9609, "step": 1190 }, { "epoch": 0.19111806474906728, "grad_norm": 12.077916145324707, "learning_rate": 9.238821849495878e-05, "loss": 32.8906, "step": 1191 }, { "epoch": 0.19127853331728648, "grad_norm": 12.302989959716797, "learning_rate": 9.23746244779345e-05, "loss": 32.9453, "step": 1192 }, { "epoch": 0.19143900188550567, "grad_norm": 12.52847671508789, "learning_rate": 9.236101933481004e-05, "loss": 32.7891, "step": 1193 }, { "epoch": 0.19159947045372488, "grad_norm": 12.139325141906738, "learning_rate": 9.234740306915763e-05, "loss": 32.8281, "step": 1194 }, { "epoch": 0.19175993902194408, "grad_norm": 12.257619857788086, "learning_rate": 9.233377568455244e-05, "loss": 32.8438, "step": 1195 }, { "epoch": 0.19192040759016327, "grad_norm": 12.411484718322754, "learning_rate": 9.232013718457254e-05, "loss": 32.9688, "step": 1196 }, { "epoch": 0.19208087615838249, "grad_norm": 12.075109481811523, "learning_rate": 9.230648757279891e-05, "loss": 33.0391, "step": 1197 }, { "epoch": 0.19224134472660168, "grad_norm": 12.335755348205566, "learning_rate": 9.229282685281548e-05, "loss": 32.9609, "step": 1198 }, { "epoch": 0.19240181329482087, "grad_norm": 12.397574424743652, "learning_rate": 9.227915502820908e-05, "loss": 33.0781, "step": 1199 }, { "epoch": 0.19256228186304009, "grad_norm": 12.303254127502441, "learning_rate": 9.226547210256944e-05, "loss": 33.1797, "step": 1200 }, { "epoch": 0.19272275043125928, "grad_norm": 12.45410442352295, "learning_rate": 9.225177807948922e-05, "loss": 33.0547, "step": 1201 }, { "epoch": 0.19288321899947847, "grad_norm": 12.254301071166992, "learning_rate": 9.223807296256402e-05, "loss": 33.1641, "step": 1202 }, { "epoch": 0.19304368756769769, "grad_norm": 12.10006046295166, "learning_rate": 9.22243567553923e-05, "loss": 33.0312, "step": 1203 }, { "epoch": 0.19320415613591688, "grad_norm": 12.089755058288574, "learning_rate": 9.221062946157546e-05, "loss": 33.0078, "step": 1204 }, { "epoch": 0.19336462470413607, "grad_norm": 12.227227210998535, "learning_rate": 9.219689108471781e-05, "loss": 33.0547, "step": 1205 }, { "epoch": 0.1935250932723553, "grad_norm": 12.414423942565918, "learning_rate": 9.218314162842658e-05, "loss": 32.9688, "step": 1206 }, { "epoch": 0.19368556184057448, "grad_norm": 12.23129940032959, "learning_rate": 9.21693810963119e-05, "loss": 32.8906, "step": 1207 }, { "epoch": 0.19384603040879367, "grad_norm": 12.08535099029541, "learning_rate": 9.215560949198679e-05, "loss": 32.8594, "step": 1208 }, { "epoch": 0.1940064989770129, "grad_norm": 12.270577430725098, "learning_rate": 9.214182681906722e-05, "loss": 32.9922, "step": 1209 }, { "epoch": 0.19416696754523208, "grad_norm": 12.409749031066895, "learning_rate": 9.212803308117201e-05, "loss": 33.0, "step": 1210 }, { "epoch": 0.19432743611345127, "grad_norm": 12.020496368408203, "learning_rate": 9.211422828192293e-05, "loss": 32.9609, "step": 1211 }, { "epoch": 0.1944879046816705, "grad_norm": 11.958410263061523, "learning_rate": 9.210041242494462e-05, "loss": 32.9922, "step": 1212 }, { "epoch": 0.19464837324988968, "grad_norm": 12.40658950805664, "learning_rate": 9.208658551386466e-05, "loss": 33.0156, "step": 1213 }, { "epoch": 0.19480884181810887, "grad_norm": 12.281318664550781, "learning_rate": 9.207274755231354e-05, "loss": 32.875, "step": 1214 }, { "epoch": 0.1949693103863281, "grad_norm": 12.080079078674316, "learning_rate": 9.205889854392457e-05, "loss": 32.9688, "step": 1215 }, { "epoch": 0.19512977895454728, "grad_norm": 12.168057441711426, "learning_rate": 9.204503849233408e-05, "loss": 32.9297, "step": 1216 }, { "epoch": 0.19529024752276647, "grad_norm": 11.961030960083008, "learning_rate": 9.203116740118119e-05, "loss": 32.9531, "step": 1217 }, { "epoch": 0.1954507160909857, "grad_norm": 12.08236312866211, "learning_rate": 9.201728527410798e-05, "loss": 32.8906, "step": 1218 }, { "epoch": 0.19561118465920488, "grad_norm": 11.955870628356934, "learning_rate": 9.200339211475941e-05, "loss": 33.0078, "step": 1219 }, { "epoch": 0.19577165322742407, "grad_norm": 12.151692390441895, "learning_rate": 9.198948792678334e-05, "loss": 32.8984, "step": 1220 }, { "epoch": 0.1959321217956433, "grad_norm": 11.947708129882812, "learning_rate": 9.197557271383053e-05, "loss": 32.9297, "step": 1221 }, { "epoch": 0.19609259036386248, "grad_norm": 12.137404441833496, "learning_rate": 9.196164647955463e-05, "loss": 32.8672, "step": 1222 }, { "epoch": 0.19625305893208167, "grad_norm": 12.269968032836914, "learning_rate": 9.194770922761219e-05, "loss": 32.8672, "step": 1223 }, { "epoch": 0.1964135275003009, "grad_norm": 12.074886322021484, "learning_rate": 9.193376096166264e-05, "loss": 32.8984, "step": 1224 }, { "epoch": 0.19657399606852008, "grad_norm": 12.643418312072754, "learning_rate": 9.191980168536829e-05, "loss": 32.8984, "step": 1225 }, { "epoch": 0.19673446463673927, "grad_norm": 12.339845657348633, "learning_rate": 9.19058314023944e-05, "loss": 32.8828, "step": 1226 }, { "epoch": 0.1968949332049585, "grad_norm": 12.081893920898438, "learning_rate": 9.189185011640902e-05, "loss": 32.8672, "step": 1227 }, { "epoch": 0.19705540177317768, "grad_norm": 12.28332233428955, "learning_rate": 9.187785783108323e-05, "loss": 32.9219, "step": 1228 }, { "epoch": 0.19721587034139687, "grad_norm": 12.20278263092041, "learning_rate": 9.186385455009083e-05, "loss": 32.8438, "step": 1229 }, { "epoch": 0.1973763389096161, "grad_norm": 12.070813179016113, "learning_rate": 9.184984027710867e-05, "loss": 32.8594, "step": 1230 }, { "epoch": 0.19753680747783528, "grad_norm": 12.114298820495605, "learning_rate": 9.183581501581637e-05, "loss": 33.0078, "step": 1231 }, { "epoch": 0.19769727604605447, "grad_norm": 12.405579566955566, "learning_rate": 9.182177876989646e-05, "loss": 32.8984, "step": 1232 }, { "epoch": 0.1978577446142737, "grad_norm": 12.135832786560059, "learning_rate": 9.180773154303441e-05, "loss": 32.8516, "step": 1233 }, { "epoch": 0.19801821318249288, "grad_norm": 12.332612037658691, "learning_rate": 9.179367333891851e-05, "loss": 32.8594, "step": 1234 }, { "epoch": 0.19817868175071207, "grad_norm": 12.1524658203125, "learning_rate": 9.177960416123996e-05, "loss": 32.9219, "step": 1235 }, { "epoch": 0.1983391503189313, "grad_norm": 12.459787368774414, "learning_rate": 9.176552401369283e-05, "loss": 32.875, "step": 1236 }, { "epoch": 0.19849961888715048, "grad_norm": 12.524718284606934, "learning_rate": 9.175143289997408e-05, "loss": 32.8203, "step": 1237 }, { "epoch": 0.19866008745536967, "grad_norm": 12.068811416625977, "learning_rate": 9.173733082378353e-05, "loss": 32.9219, "step": 1238 }, { "epoch": 0.1988205560235889, "grad_norm": 12.266541481018066, "learning_rate": 9.172321778882394e-05, "loss": 33.0156, "step": 1239 }, { "epoch": 0.19898102459180808, "grad_norm": 12.147832870483398, "learning_rate": 9.170909379880084e-05, "loss": 32.8672, "step": 1240 }, { "epoch": 0.19914149316002727, "grad_norm": 12.270184516906738, "learning_rate": 9.169495885742274e-05, "loss": 32.9062, "step": 1241 }, { "epoch": 0.1993019617282465, "grad_norm": 12.038176536560059, "learning_rate": 9.168081296840096e-05, "loss": 33.0938, "step": 1242 }, { "epoch": 0.19946243029646568, "grad_norm": 11.961238861083984, "learning_rate": 9.166665613544976e-05, "loss": 33.0078, "step": 1243 }, { "epoch": 0.19962289886468487, "grad_norm": 12.338180541992188, "learning_rate": 9.165248836228617e-05, "loss": 32.8906, "step": 1244 }, { "epoch": 0.1997833674329041, "grad_norm": 12.143040657043457, "learning_rate": 9.163830965263019e-05, "loss": 33.0391, "step": 1245 }, { "epoch": 0.19994383600112328, "grad_norm": 12.273344039916992, "learning_rate": 9.162412001020466e-05, "loss": 32.8984, "step": 1246 }, { "epoch": 0.20010430456934247, "grad_norm": 11.89305305480957, "learning_rate": 9.160991943873527e-05, "loss": 33.0312, "step": 1247 }, { "epoch": 0.2002647731375617, "grad_norm": 12.92479133605957, "learning_rate": 9.159570794195059e-05, "loss": 32.9297, "step": 1248 }, { "epoch": 0.20042524170578088, "grad_norm": 12.646844863891602, "learning_rate": 9.158148552358208e-05, "loss": 32.8984, "step": 1249 }, { "epoch": 0.20058571027400007, "grad_norm": 12.607234954833984, "learning_rate": 9.156725218736405e-05, "loss": 33.1094, "step": 1250 }, { "epoch": 0.2007461788422193, "grad_norm": 12.378408432006836, "learning_rate": 9.155300793703368e-05, "loss": 33.2344, "step": 1251 }, { "epoch": 0.20090664741043848, "grad_norm": 12.188116073608398, "learning_rate": 9.153875277633099e-05, "loss": 33.1875, "step": 1252 }, { "epoch": 0.20106711597865767, "grad_norm": 12.236346244812012, "learning_rate": 9.152448670899891e-05, "loss": 32.8906, "step": 1253 }, { "epoch": 0.2012275845468769, "grad_norm": 12.278716087341309, "learning_rate": 9.151020973878321e-05, "loss": 33.0625, "step": 1254 }, { "epoch": 0.20138805311509608, "grad_norm": 12.422780990600586, "learning_rate": 9.149592186943253e-05, "loss": 32.9922, "step": 1255 }, { "epoch": 0.20154852168331527, "grad_norm": 11.95702838897705, "learning_rate": 9.148162310469837e-05, "loss": 33.1406, "step": 1256 }, { "epoch": 0.2017089902515345, "grad_norm": 12.268261909484863, "learning_rate": 9.146731344833506e-05, "loss": 32.9141, "step": 1257 }, { "epoch": 0.20186945881975368, "grad_norm": 12.275854110717773, "learning_rate": 9.145299290409983e-05, "loss": 32.9531, "step": 1258 }, { "epoch": 0.20202992738797287, "grad_norm": 11.821500778198242, "learning_rate": 9.143866147575277e-05, "loss": 33.0547, "step": 1259 }, { "epoch": 0.2021903959561921, "grad_norm": 11.884370803833008, "learning_rate": 9.142431916705681e-05, "loss": 32.9844, "step": 1260 }, { "epoch": 0.20235086452441128, "grad_norm": 12.266637802124023, "learning_rate": 9.140996598177774e-05, "loss": 32.9141, "step": 1261 }, { "epoch": 0.20251133309263047, "grad_norm": 12.432637214660645, "learning_rate": 9.13956019236842e-05, "loss": 32.9141, "step": 1262 }, { "epoch": 0.2026718016608497, "grad_norm": 12.208271980285645, "learning_rate": 9.138122699654767e-05, "loss": 32.8359, "step": 1263 }, { "epoch": 0.20283227022906888, "grad_norm": 12.084965705871582, "learning_rate": 9.136684120414255e-05, "loss": 33.0078, "step": 1264 }, { "epoch": 0.20299273879728807, "grad_norm": 12.08887004852295, "learning_rate": 9.135244455024602e-05, "loss": 32.9844, "step": 1265 }, { "epoch": 0.2031532073655073, "grad_norm": 12.013785362243652, "learning_rate": 9.133803703863815e-05, "loss": 32.875, "step": 1266 }, { "epoch": 0.20331367593372648, "grad_norm": 12.149818420410156, "learning_rate": 9.132361867310184e-05, "loss": 32.8438, "step": 1267 }, { "epoch": 0.20347414450194568, "grad_norm": 12.387660026550293, "learning_rate": 9.130918945742286e-05, "loss": 32.8516, "step": 1268 }, { "epoch": 0.2036346130701649, "grad_norm": 12.095032691955566, "learning_rate": 9.129474939538983e-05, "loss": 32.9453, "step": 1269 }, { "epoch": 0.20379508163838408, "grad_norm": 12.155768394470215, "learning_rate": 9.128029849079418e-05, "loss": 32.9141, "step": 1270 }, { "epoch": 0.20395555020660328, "grad_norm": 12.340645790100098, "learning_rate": 9.126583674743022e-05, "loss": 32.8047, "step": 1271 }, { "epoch": 0.2041160187748225, "grad_norm": 12.533561706542969, "learning_rate": 9.125136416909512e-05, "loss": 32.7969, "step": 1272 }, { "epoch": 0.20427648734304168, "grad_norm": 12.141677856445312, "learning_rate": 9.123688075958886e-05, "loss": 32.9219, "step": 1273 }, { "epoch": 0.20443695591126088, "grad_norm": 12.39115047454834, "learning_rate": 9.122238652271428e-05, "loss": 32.8438, "step": 1274 }, { "epoch": 0.2045974244794801, "grad_norm": 12.06014347076416, "learning_rate": 9.120788146227708e-05, "loss": 33.0, "step": 1275 }, { "epoch": 0.20475789304769929, "grad_norm": 12.025286674499512, "learning_rate": 9.119336558208574e-05, "loss": 32.875, "step": 1276 }, { "epoch": 0.20491836161591848, "grad_norm": 12.142241477966309, "learning_rate": 9.117883888595167e-05, "loss": 32.8359, "step": 1277 }, { "epoch": 0.2050788301841377, "grad_norm": 11.947781562805176, "learning_rate": 9.116430137768905e-05, "loss": 32.875, "step": 1278 }, { "epoch": 0.20523929875235689, "grad_norm": 12.207921981811523, "learning_rate": 9.114975306111492e-05, "loss": 32.875, "step": 1279 }, { "epoch": 0.20539976732057608, "grad_norm": 12.209257125854492, "learning_rate": 9.113519394004917e-05, "loss": 32.9062, "step": 1280 }, { "epoch": 0.2055602358887953, "grad_norm": 12.395835876464844, "learning_rate": 9.112062401831454e-05, "loss": 32.875, "step": 1281 }, { "epoch": 0.20572070445701449, "grad_norm": 12.147117614746094, "learning_rate": 9.110604329973655e-05, "loss": 32.8594, "step": 1282 }, { "epoch": 0.20588117302523368, "grad_norm": 12.594613075256348, "learning_rate": 9.10914517881436e-05, "loss": 32.8359, "step": 1283 }, { "epoch": 0.2060416415934529, "grad_norm": 11.962821006774902, "learning_rate": 9.10768494873669e-05, "loss": 33.0469, "step": 1284 }, { "epoch": 0.2062021101616721, "grad_norm": 12.58713436126709, "learning_rate": 9.106223640124055e-05, "loss": 32.8438, "step": 1285 }, { "epoch": 0.20636257872989128, "grad_norm": 12.260599136352539, "learning_rate": 9.104761253360137e-05, "loss": 32.8047, "step": 1286 }, { "epoch": 0.2065230472981105, "grad_norm": 12.214639663696289, "learning_rate": 9.103297788828913e-05, "loss": 32.8438, "step": 1287 }, { "epoch": 0.2066835158663297, "grad_norm": 12.472445487976074, "learning_rate": 9.101833246914636e-05, "loss": 32.7891, "step": 1288 }, { "epoch": 0.20684398443454888, "grad_norm": 12.13967514038086, "learning_rate": 9.100367628001845e-05, "loss": 32.9453, "step": 1289 }, { "epoch": 0.2070044530027681, "grad_norm": 12.21585464477539, "learning_rate": 9.098900932475358e-05, "loss": 32.8281, "step": 1290 }, { "epoch": 0.2071649215709873, "grad_norm": 12.341490745544434, "learning_rate": 9.09743316072028e-05, "loss": 32.8516, "step": 1291 }, { "epoch": 0.20732539013920648, "grad_norm": 12.219271659851074, "learning_rate": 9.095964313121996e-05, "loss": 32.8984, "step": 1292 }, { "epoch": 0.2074858587074257, "grad_norm": 12.262969017028809, "learning_rate": 9.094494390066175e-05, "loss": 32.8594, "step": 1293 }, { "epoch": 0.2076463272756449, "grad_norm": 12.15459156036377, "learning_rate": 9.093023391938767e-05, "loss": 33.0625, "step": 1294 }, { "epoch": 0.20780679584386408, "grad_norm": 12.267287254333496, "learning_rate": 9.091551319126004e-05, "loss": 32.8125, "step": 1295 }, { "epoch": 0.2079672644120833, "grad_norm": 12.0936861038208, "learning_rate": 9.090078172014403e-05, "loss": 33.0, "step": 1296 }, { "epoch": 0.2081277329803025, "grad_norm": 12.326549530029297, "learning_rate": 9.08860395099076e-05, "loss": 32.8438, "step": 1297 }, { "epoch": 0.20828820154852168, "grad_norm": 12.684407234191895, "learning_rate": 9.087128656442154e-05, "loss": 32.8203, "step": 1298 }, { "epoch": 0.20844867011674087, "grad_norm": 12.278642654418945, "learning_rate": 9.085652288755946e-05, "loss": 32.9922, "step": 1299 }, { "epoch": 0.2086091386849601, "grad_norm": 12.168437957763672, "learning_rate": 9.084174848319779e-05, "loss": 33.2266, "step": 1300 }, { "epoch": 0.20876960725317928, "grad_norm": 12.049073219299316, "learning_rate": 9.082696335521576e-05, "loss": 33.1797, "step": 1301 }, { "epoch": 0.20893007582139847, "grad_norm": 12.324045181274414, "learning_rate": 9.081216750749546e-05, "loss": 33.0625, "step": 1302 }, { "epoch": 0.2090905443896177, "grad_norm": 12.318975448608398, "learning_rate": 9.079736094392176e-05, "loss": 32.9219, "step": 1303 }, { "epoch": 0.20925101295783688, "grad_norm": 12.20948314666748, "learning_rate": 9.078254366838229e-05, "loss": 33.125, "step": 1304 }, { "epoch": 0.20941148152605607, "grad_norm": 12.140623092651367, "learning_rate": 9.076771568476763e-05, "loss": 33.0312, "step": 1305 }, { "epoch": 0.2095719500942753, "grad_norm": 12.494233131408691, "learning_rate": 9.075287699697103e-05, "loss": 32.9453, "step": 1306 }, { "epoch": 0.20973241866249448, "grad_norm": 12.49509048461914, "learning_rate": 9.073802760888866e-05, "loss": 32.9766, "step": 1307 }, { "epoch": 0.20989288723071367, "grad_norm": 12.275577545166016, "learning_rate": 9.072316752441942e-05, "loss": 33.0781, "step": 1308 }, { "epoch": 0.2100533557989329, "grad_norm": 12.441141128540039, "learning_rate": 9.070829674746506e-05, "loss": 32.8828, "step": 1309 }, { "epoch": 0.21021382436715208, "grad_norm": 12.267849922180176, "learning_rate": 9.069341528193013e-05, "loss": 32.9453, "step": 1310 }, { "epoch": 0.21037429293537127, "grad_norm": 12.022933959960938, "learning_rate": 9.067852313172198e-05, "loss": 32.9219, "step": 1311 }, { "epoch": 0.2105347615035905, "grad_norm": 11.901705741882324, "learning_rate": 9.066362030075077e-05, "loss": 33.1719, "step": 1312 }, { "epoch": 0.21069523007180968, "grad_norm": 12.102359771728516, "learning_rate": 9.064870679292948e-05, "loss": 32.9688, "step": 1313 }, { "epoch": 0.21085569864002887, "grad_norm": 11.979387283325195, "learning_rate": 9.063378261217386e-05, "loss": 33.0312, "step": 1314 }, { "epoch": 0.2110161672082481, "grad_norm": 12.138246536254883, "learning_rate": 9.061884776240248e-05, "loss": 32.8828, "step": 1315 }, { "epoch": 0.21117663577646728, "grad_norm": 12.150689125061035, "learning_rate": 9.060390224753673e-05, "loss": 32.8516, "step": 1316 }, { "epoch": 0.21133710434468647, "grad_norm": 12.08694076538086, "learning_rate": 9.058894607150076e-05, "loss": 32.9219, "step": 1317 }, { "epoch": 0.2114975729129057, "grad_norm": 12.227446556091309, "learning_rate": 9.057397923822154e-05, "loss": 32.8281, "step": 1318 }, { "epoch": 0.21165804148112488, "grad_norm": 12.204444885253906, "learning_rate": 9.055900175162886e-05, "loss": 32.8906, "step": 1319 }, { "epoch": 0.21181851004934407, "grad_norm": 12.0230712890625, "learning_rate": 9.054401361565528e-05, "loss": 32.8594, "step": 1320 }, { "epoch": 0.2119789786175633, "grad_norm": 12.150514602661133, "learning_rate": 9.052901483423612e-05, "loss": 32.8672, "step": 1321 }, { "epoch": 0.21213944718578248, "grad_norm": 12.465692520141602, "learning_rate": 9.05140054113096e-05, "loss": 32.7578, "step": 1322 }, { "epoch": 0.21229991575400167, "grad_norm": 12.352755546569824, "learning_rate": 9.049898535081666e-05, "loss": 32.9688, "step": 1323 }, { "epoch": 0.2124603843222209, "grad_norm": 12.084716796875, "learning_rate": 9.0483954656701e-05, "loss": 32.9375, "step": 1324 }, { "epoch": 0.21262085289044008, "grad_norm": 12.14003849029541, "learning_rate": 9.046891333290921e-05, "loss": 32.8047, "step": 1325 }, { "epoch": 0.21278132145865927, "grad_norm": 12.204856872558594, "learning_rate": 9.045386138339058e-05, "loss": 32.8438, "step": 1326 }, { "epoch": 0.2129417900268785, "grad_norm": 12.141925811767578, "learning_rate": 9.043879881209724e-05, "loss": 32.8125, "step": 1327 }, { "epoch": 0.21310225859509768, "grad_norm": 12.220921516418457, "learning_rate": 9.042372562298411e-05, "loss": 32.9531, "step": 1328 }, { "epoch": 0.21326272716331687, "grad_norm": 12.071066856384277, "learning_rate": 9.040864182000888e-05, "loss": 32.8125, "step": 1329 }, { "epoch": 0.2134231957315361, "grad_norm": 12.516263961791992, "learning_rate": 9.039354740713199e-05, "loss": 32.7812, "step": 1330 }, { "epoch": 0.21358366429975528, "grad_norm": 12.333529472351074, "learning_rate": 9.037844238831677e-05, "loss": 32.875, "step": 1331 }, { "epoch": 0.21374413286797447, "grad_norm": 12.246232986450195, "learning_rate": 9.036332676752923e-05, "loss": 32.8594, "step": 1332 }, { "epoch": 0.2139046014361937, "grad_norm": 12.138320922851562, "learning_rate": 9.034820054873823e-05, "loss": 32.9922, "step": 1333 }, { "epoch": 0.21406507000441288, "grad_norm": 12.079879760742188, "learning_rate": 9.033306373591537e-05, "loss": 32.9062, "step": 1334 }, { "epoch": 0.21422553857263207, "grad_norm": 12.260420799255371, "learning_rate": 9.031791633303505e-05, "loss": 32.8438, "step": 1335 }, { "epoch": 0.2143860071408513, "grad_norm": 12.077990531921387, "learning_rate": 9.030275834407446e-05, "loss": 32.8594, "step": 1336 }, { "epoch": 0.21454647570907048, "grad_norm": 12.021785736083984, "learning_rate": 9.028758977301355e-05, "loss": 32.8672, "step": 1337 }, { "epoch": 0.21470694427728967, "grad_norm": 11.97958755493164, "learning_rate": 9.027241062383507e-05, "loss": 32.9375, "step": 1338 }, { "epoch": 0.2148674128455089, "grad_norm": 12.019061088562012, "learning_rate": 9.025722090052453e-05, "loss": 32.8438, "step": 1339 }, { "epoch": 0.21502788141372808, "grad_norm": 12.027741432189941, "learning_rate": 9.024202060707023e-05, "loss": 32.9375, "step": 1340 }, { "epoch": 0.21518834998194727, "grad_norm": 12.144603729248047, "learning_rate": 9.022680974746321e-05, "loss": 32.8594, "step": 1341 }, { "epoch": 0.2153488185501665, "grad_norm": 12.017986297607422, "learning_rate": 9.021158832569734e-05, "loss": 33.0859, "step": 1342 }, { "epoch": 0.21550928711838568, "grad_norm": 12.262337684631348, "learning_rate": 9.019635634576925e-05, "loss": 32.8125, "step": 1343 }, { "epoch": 0.21566975568660487, "grad_norm": 12.138935089111328, "learning_rate": 9.018111381167827e-05, "loss": 32.875, "step": 1344 }, { "epoch": 0.2158302242548241, "grad_norm": 12.42293643951416, "learning_rate": 9.01658607274266e-05, "loss": 32.9688, "step": 1345 }, { "epoch": 0.21599069282304328, "grad_norm": 12.291671752929688, "learning_rate": 9.015059709701918e-05, "loss": 32.9219, "step": 1346 }, { "epoch": 0.21615116139126248, "grad_norm": 12.263324737548828, "learning_rate": 9.013532292446368e-05, "loss": 32.7812, "step": 1347 }, { "epoch": 0.2163116299594817, "grad_norm": 12.13644790649414, "learning_rate": 9.012003821377055e-05, "loss": 32.8594, "step": 1348 }, { "epoch": 0.21647209852770088, "grad_norm": 12.133214950561523, "learning_rate": 9.010474296895309e-05, "loss": 32.9219, "step": 1349 }, { "epoch": 0.21663256709592008, "grad_norm": 12.368852615356445, "learning_rate": 9.008943719402722e-05, "loss": 33.2734, "step": 1350 }, { "epoch": 0.2167930356641393, "grad_norm": 12.330281257629395, "learning_rate": 9.007412089301175e-05, "loss": 33.3359, "step": 1351 }, { "epoch": 0.21695350423235849, "grad_norm": 12.439889907836914, "learning_rate": 9.005879406992819e-05, "loss": 33.1328, "step": 1352 }, { "epoch": 0.21711397280057768, "grad_norm": 12.23691177368164, "learning_rate": 9.004345672880083e-05, "loss": 33.0625, "step": 1353 }, { "epoch": 0.2172744413687969, "grad_norm": 12.311563491821289, "learning_rate": 9.002810887365675e-05, "loss": 32.9688, "step": 1354 }, { "epoch": 0.21743490993701609, "grad_norm": 12.084696769714355, "learning_rate": 9.001275050852573e-05, "loss": 32.9766, "step": 1355 }, { "epoch": 0.21759537850523528, "grad_norm": 12.217792510986328, "learning_rate": 8.999738163744034e-05, "loss": 32.9531, "step": 1356 }, { "epoch": 0.2177558470734545, "grad_norm": 12.23078727722168, "learning_rate": 8.998200226443593e-05, "loss": 32.9766, "step": 1357 }, { "epoch": 0.21791631564167369, "grad_norm": 12.091564178466797, "learning_rate": 8.996661239355058e-05, "loss": 33.0391, "step": 1358 }, { "epoch": 0.21807678420989288, "grad_norm": 12.01706600189209, "learning_rate": 8.995121202882513e-05, "loss": 33.0547, "step": 1359 }, { "epoch": 0.2182372527781121, "grad_norm": 12.153360366821289, "learning_rate": 8.993580117430318e-05, "loss": 32.8906, "step": 1360 }, { "epoch": 0.21839772134633129, "grad_norm": 12.33328628540039, "learning_rate": 8.992037983403108e-05, "loss": 32.8984, "step": 1361 }, { "epoch": 0.21855818991455048, "grad_norm": 11.897679328918457, "learning_rate": 8.990494801205795e-05, "loss": 32.8984, "step": 1362 }, { "epoch": 0.2187186584827697, "grad_norm": 12.080750465393066, "learning_rate": 8.988950571243562e-05, "loss": 32.9453, "step": 1363 }, { "epoch": 0.2188791270509889, "grad_norm": 12.217530250549316, "learning_rate": 8.987405293921875e-05, "loss": 32.8828, "step": 1364 }, { "epoch": 0.21903959561920808, "grad_norm": 12.157771110534668, "learning_rate": 8.985858969646465e-05, "loss": 32.875, "step": 1365 }, { "epoch": 0.2192000641874273, "grad_norm": 12.147436141967773, "learning_rate": 8.984311598823346e-05, "loss": 32.8984, "step": 1366 }, { "epoch": 0.2193605327556465, "grad_norm": 12.159769058227539, "learning_rate": 8.982763181858801e-05, "loss": 32.8984, "step": 1367 }, { "epoch": 0.21952100132386568, "grad_norm": 12.20595645904541, "learning_rate": 8.981213719159394e-05, "loss": 32.8516, "step": 1368 }, { "epoch": 0.2196814698920849, "grad_norm": 12.011938095092773, "learning_rate": 8.979663211131958e-05, "loss": 32.8594, "step": 1369 }, { "epoch": 0.2198419384603041, "grad_norm": 12.433873176574707, "learning_rate": 8.978111658183601e-05, "loss": 32.875, "step": 1370 }, { "epoch": 0.22000240702852328, "grad_norm": 12.138720512390137, "learning_rate": 8.976559060721708e-05, "loss": 32.8359, "step": 1371 }, { "epoch": 0.2201628755967425, "grad_norm": 12.414124488830566, "learning_rate": 8.975005419153938e-05, "loss": 32.8594, "step": 1372 }, { "epoch": 0.2203233441649617, "grad_norm": 12.07902717590332, "learning_rate": 8.973450733888221e-05, "loss": 32.9531, "step": 1373 }, { "epoch": 0.22048381273318088, "grad_norm": 12.080512046813965, "learning_rate": 8.971895005332766e-05, "loss": 32.875, "step": 1374 }, { "epoch": 0.2206442813014001, "grad_norm": 12.078123092651367, "learning_rate": 8.970338233896049e-05, "loss": 32.9141, "step": 1375 }, { "epoch": 0.2208047498696193, "grad_norm": 12.077031135559082, "learning_rate": 8.968780419986829e-05, "loss": 32.8047, "step": 1376 }, { "epoch": 0.22096521843783848, "grad_norm": 12.341989517211914, "learning_rate": 8.96722156401413e-05, "loss": 32.8047, "step": 1377 }, { "epoch": 0.2211256870060577, "grad_norm": 12.129439353942871, "learning_rate": 8.965661666387254e-05, "loss": 32.8281, "step": 1378 }, { "epoch": 0.2212861555742769, "grad_norm": 12.145071029663086, "learning_rate": 8.964100727515776e-05, "loss": 32.8281, "step": 1379 }, { "epoch": 0.22144662414249608, "grad_norm": 12.07311725616455, "learning_rate": 8.962538747809546e-05, "loss": 32.8203, "step": 1380 }, { "epoch": 0.2216070927107153, "grad_norm": 12.583333969116211, "learning_rate": 8.960975727678683e-05, "loss": 32.7891, "step": 1381 }, { "epoch": 0.2217675612789345, "grad_norm": 12.270670890808105, "learning_rate": 8.959411667533581e-05, "loss": 32.8203, "step": 1382 }, { "epoch": 0.22192802984715368, "grad_norm": 12.142889976501465, "learning_rate": 8.95784656778491e-05, "loss": 32.8359, "step": 1383 }, { "epoch": 0.2220884984153729, "grad_norm": 12.272384643554688, "learning_rate": 8.956280428843611e-05, "loss": 32.7812, "step": 1384 }, { "epoch": 0.2222489669835921, "grad_norm": 12.384993553161621, "learning_rate": 8.954713251120894e-05, "loss": 32.8203, "step": 1385 }, { "epoch": 0.22240943555181128, "grad_norm": 12.081121444702148, "learning_rate": 8.953145035028249e-05, "loss": 32.9453, "step": 1386 }, { "epoch": 0.2225699041200305, "grad_norm": 12.136312484741211, "learning_rate": 8.951575780977432e-05, "loss": 32.9062, "step": 1387 }, { "epoch": 0.2227303726882497, "grad_norm": 12.641393661499023, "learning_rate": 8.950005489380479e-05, "loss": 32.8047, "step": 1388 }, { "epoch": 0.22289084125646888, "grad_norm": 12.341940879821777, "learning_rate": 8.948434160649689e-05, "loss": 32.9219, "step": 1389 }, { "epoch": 0.2230513098246881, "grad_norm": 12.08092975616455, "learning_rate": 8.946861795197639e-05, "loss": 32.8984, "step": 1390 }, { "epoch": 0.2232117783929073, "grad_norm": 12.384552001953125, "learning_rate": 8.94528839343718e-05, "loss": 32.7656, "step": 1391 }, { "epoch": 0.22337224696112648, "grad_norm": 12.146490097045898, "learning_rate": 8.94371395578143e-05, "loss": 32.8203, "step": 1392 }, { "epoch": 0.2235327155293457, "grad_norm": 12.20400619506836, "learning_rate": 8.942138482643783e-05, "loss": 32.9453, "step": 1393 }, { "epoch": 0.2236931840975649, "grad_norm": 12.575555801391602, "learning_rate": 8.940561974437903e-05, "loss": 32.7734, "step": 1394 }, { "epoch": 0.22385365266578408, "grad_norm": 12.152567863464355, "learning_rate": 8.938984431577724e-05, "loss": 32.9297, "step": 1395 }, { "epoch": 0.2240141212340033, "grad_norm": 12.206945419311523, "learning_rate": 8.937405854477458e-05, "loss": 32.8906, "step": 1396 }, { "epoch": 0.2241745898022225, "grad_norm": 12.345471382141113, "learning_rate": 8.93582624355158e-05, "loss": 32.9062, "step": 1397 }, { "epoch": 0.22433505837044168, "grad_norm": 12.151382446289062, "learning_rate": 8.934245599214845e-05, "loss": 33.0, "step": 1398 }, { "epoch": 0.2244955269386609, "grad_norm": 12.370957374572754, "learning_rate": 8.932663921882272e-05, "loss": 33.1719, "step": 1399 }, { "epoch": 0.2246559955068801, "grad_norm": 12.34723949432373, "learning_rate": 8.931081211969155e-05, "loss": 33.1719, "step": 1400 }, { "epoch": 0.22481646407509928, "grad_norm": 12.405576705932617, "learning_rate": 8.929497469891057e-05, "loss": 33.1094, "step": 1401 }, { "epoch": 0.2249769326433185, "grad_norm": 12.072848320007324, "learning_rate": 8.927912696063818e-05, "loss": 33.2305, "step": 1402 }, { "epoch": 0.2251374012115377, "grad_norm": 12.099752426147461, "learning_rate": 8.926326890903541e-05, "loss": 33.0156, "step": 1403 }, { "epoch": 0.22529786977975688, "grad_norm": 12.172395706176758, "learning_rate": 8.924740054826603e-05, "loss": 32.9141, "step": 1404 }, { "epoch": 0.2254583383479761, "grad_norm": 12.75675106048584, "learning_rate": 8.923152188249654e-05, "loss": 32.9297, "step": 1405 }, { "epoch": 0.2256188069161953, "grad_norm": 12.349474906921387, "learning_rate": 8.92156329158961e-05, "loss": 32.8828, "step": 1406 }, { "epoch": 0.22577927548441448, "grad_norm": 12.161763191223145, "learning_rate": 8.919973365263662e-05, "loss": 32.9375, "step": 1407 }, { "epoch": 0.2259397440526337, "grad_norm": 12.233555793762207, "learning_rate": 8.918382409689267e-05, "loss": 32.8594, "step": 1408 }, { "epoch": 0.2261002126208529, "grad_norm": 12.148098945617676, "learning_rate": 8.916790425284159e-05, "loss": 32.9219, "step": 1409 }, { "epoch": 0.22626068118907208, "grad_norm": 12.291463851928711, "learning_rate": 8.915197412466331e-05, "loss": 32.9219, "step": 1410 }, { "epoch": 0.2264211497572913, "grad_norm": 12.368185997009277, "learning_rate": 8.913603371654058e-05, "loss": 32.8438, "step": 1411 }, { "epoch": 0.2265816183255105, "grad_norm": 12.466485977172852, "learning_rate": 8.912008303265878e-05, "loss": 32.9844, "step": 1412 }, { "epoch": 0.22674208689372968, "grad_norm": 12.151595115661621, "learning_rate": 8.9104122077206e-05, "loss": 32.8672, "step": 1413 }, { "epoch": 0.2269025554619489, "grad_norm": 12.278592109680176, "learning_rate": 8.908815085437303e-05, "loss": 32.9375, "step": 1414 }, { "epoch": 0.2270630240301681, "grad_norm": 12.017824172973633, "learning_rate": 8.907216936835336e-05, "loss": 32.875, "step": 1415 }, { "epoch": 0.22722349259838728, "grad_norm": 12.259954452514648, "learning_rate": 8.905617762334317e-05, "loss": 32.9609, "step": 1416 }, { "epoch": 0.2273839611666065, "grad_norm": 12.1551513671875, "learning_rate": 8.904017562354133e-05, "loss": 32.8047, "step": 1417 }, { "epoch": 0.2275444297348257, "grad_norm": 12.021613121032715, "learning_rate": 8.902416337314943e-05, "loss": 32.832, "step": 1418 }, { "epoch": 0.22770489830304488, "grad_norm": 12.352150917053223, "learning_rate": 8.90081408763717e-05, "loss": 33.0312, "step": 1419 }, { "epoch": 0.2278653668712641, "grad_norm": 12.306416511535645, "learning_rate": 8.89921081374151e-05, "loss": 32.8984, "step": 1420 }, { "epoch": 0.2280258354394833, "grad_norm": 12.528759002685547, "learning_rate": 8.897606516048928e-05, "loss": 32.875, "step": 1421 }, { "epoch": 0.22818630400770248, "grad_norm": 12.139875411987305, "learning_rate": 8.896001194980654e-05, "loss": 32.9922, "step": 1422 }, { "epoch": 0.2283467725759217, "grad_norm": 12.076135635375977, "learning_rate": 8.894394850958194e-05, "loss": 32.8672, "step": 1423 }, { "epoch": 0.2285072411441409, "grad_norm": 12.32767105102539, "learning_rate": 8.892787484403314e-05, "loss": 32.7812, "step": 1424 }, { "epoch": 0.22866770971236008, "grad_norm": 11.887190818786621, "learning_rate": 8.891179095738053e-05, "loss": 32.8984, "step": 1425 }, { "epoch": 0.2288281782805793, "grad_norm": 12.283519744873047, "learning_rate": 8.889569685384719e-05, "loss": 32.8281, "step": 1426 }, { "epoch": 0.2289886468487985, "grad_norm": 12.454032897949219, "learning_rate": 8.887959253765885e-05, "loss": 32.7812, "step": 1427 }, { "epoch": 0.22914911541701768, "grad_norm": 12.285383224487305, "learning_rate": 8.886347801304398e-05, "loss": 32.8125, "step": 1428 }, { "epoch": 0.2293095839852369, "grad_norm": 12.64743423461914, "learning_rate": 8.884735328423365e-05, "loss": 32.7734, "step": 1429 }, { "epoch": 0.2294700525534561, "grad_norm": 12.393592834472656, "learning_rate": 8.88312183554617e-05, "loss": 32.7969, "step": 1430 }, { "epoch": 0.22963052112167529, "grad_norm": 12.345138549804688, "learning_rate": 8.881507323096457e-05, "loss": 32.9141, "step": 1431 }, { "epoch": 0.2297909896898945, "grad_norm": 12.133811950683594, "learning_rate": 8.879891791498138e-05, "loss": 32.8516, "step": 1432 }, { "epoch": 0.2299514582581137, "grad_norm": 12.266765594482422, "learning_rate": 8.8782752411754e-05, "loss": 32.8047, "step": 1433 }, { "epoch": 0.23011192682633289, "grad_norm": 12.270825386047363, "learning_rate": 8.876657672552691e-05, "loss": 32.8203, "step": 1434 }, { "epoch": 0.2302723953945521, "grad_norm": 12.332040786743164, "learning_rate": 8.875039086054729e-05, "loss": 32.7969, "step": 1435 }, { "epoch": 0.2304328639627713, "grad_norm": 12.076224327087402, "learning_rate": 8.873419482106497e-05, "loss": 32.8359, "step": 1436 }, { "epoch": 0.23059333253099049, "grad_norm": 12.318559646606445, "learning_rate": 8.871798861133247e-05, "loss": 32.75, "step": 1437 }, { "epoch": 0.2307538010992097, "grad_norm": 12.200715065002441, "learning_rate": 8.870177223560499e-05, "loss": 32.8516, "step": 1438 }, { "epoch": 0.2309142696674289, "grad_norm": 12.136573791503906, "learning_rate": 8.868554569814036e-05, "loss": 32.9062, "step": 1439 }, { "epoch": 0.2310747382356481, "grad_norm": 12.295567512512207, "learning_rate": 8.866930900319914e-05, "loss": 32.9219, "step": 1440 }, { "epoch": 0.2312352068038673, "grad_norm": 12.0796537399292, "learning_rate": 8.865306215504448e-05, "loss": 32.8984, "step": 1441 }, { "epoch": 0.2313956753720865, "grad_norm": 12.516879081726074, "learning_rate": 8.863680515794225e-05, "loss": 32.7969, "step": 1442 }, { "epoch": 0.2315561439403057, "grad_norm": 12.328428268432617, "learning_rate": 8.862053801616099e-05, "loss": 32.7969, "step": 1443 }, { "epoch": 0.2317166125085249, "grad_norm": 12.261853218078613, "learning_rate": 8.860426073397188e-05, "loss": 32.75, "step": 1444 }, { "epoch": 0.2318770810767441, "grad_norm": 11.947609901428223, "learning_rate": 8.858797331564874e-05, "loss": 32.9531, "step": 1445 }, { "epoch": 0.2320375496449633, "grad_norm": 12.389449119567871, "learning_rate": 8.85716757654681e-05, "loss": 32.8906, "step": 1446 }, { "epoch": 0.2321980182131825, "grad_norm": 12.28200912475586, "learning_rate": 8.855536808770912e-05, "loss": 32.9375, "step": 1447 }, { "epoch": 0.2323584867814017, "grad_norm": 12.20185661315918, "learning_rate": 8.853905028665364e-05, "loss": 32.9531, "step": 1448 }, { "epoch": 0.2325189553496209, "grad_norm": 11.966291427612305, "learning_rate": 8.852272236658615e-05, "loss": 33.0781, "step": 1449 }, { "epoch": 0.2326794239178401, "grad_norm": 12.371480941772461, "learning_rate": 8.850638433179376e-05, "loss": 33.2266, "step": 1450 }, { "epoch": 0.2328398924860593, "grad_norm": 12.825140953063965, "learning_rate": 8.84900361865663e-05, "loss": 33.2656, "step": 1451 }, { "epoch": 0.2330003610542785, "grad_norm": 12.285270690917969, "learning_rate": 8.847367793519623e-05, "loss": 33.0703, "step": 1452 }, { "epoch": 0.2331608296224977, "grad_norm": 12.456119537353516, "learning_rate": 8.845730958197862e-05, "loss": 33.1562, "step": 1453 }, { "epoch": 0.2333212981907169, "grad_norm": 12.289176940917969, "learning_rate": 8.844093113121125e-05, "loss": 33.0312, "step": 1454 }, { "epoch": 0.2334817667589361, "grad_norm": 12.289371490478516, "learning_rate": 8.842454258719455e-05, "loss": 33.1328, "step": 1455 }, { "epoch": 0.2336422353271553, "grad_norm": 12.034566879272461, "learning_rate": 8.840814395423155e-05, "loss": 33.0156, "step": 1456 }, { "epoch": 0.2338027038953745, "grad_norm": 12.092621803283691, "learning_rate": 8.839173523662798e-05, "loss": 32.9688, "step": 1457 }, { "epoch": 0.2339631724635937, "grad_norm": 12.039896011352539, "learning_rate": 8.837531643869218e-05, "loss": 32.9922, "step": 1458 }, { "epoch": 0.2341236410318129, "grad_norm": 12.225656509399414, "learning_rate": 8.835888756473516e-05, "loss": 32.8984, "step": 1459 }, { "epoch": 0.2342841096000321, "grad_norm": 12.149333000183105, "learning_rate": 8.834244861907059e-05, "loss": 32.9531, "step": 1460 }, { "epoch": 0.2344445781682513, "grad_norm": 12.226985931396484, "learning_rate": 8.832599960601475e-05, "loss": 32.9375, "step": 1461 }, { "epoch": 0.2346050467364705, "grad_norm": 12.215836524963379, "learning_rate": 8.830954052988655e-05, "loss": 32.8594, "step": 1462 }, { "epoch": 0.2347655153046897, "grad_norm": 12.228897094726562, "learning_rate": 8.829307139500763e-05, "loss": 32.9219, "step": 1463 }, { "epoch": 0.2349259838729089, "grad_norm": 12.165102005004883, "learning_rate": 8.827659220570214e-05, "loss": 32.875, "step": 1464 }, { "epoch": 0.2350864524411281, "grad_norm": 12.156187057495117, "learning_rate": 8.826010296629701e-05, "loss": 32.8828, "step": 1465 }, { "epoch": 0.2352469210093473, "grad_norm": 12.332021713256836, "learning_rate": 8.82436036811217e-05, "loss": 32.9609, "step": 1466 }, { "epoch": 0.2354073895775665, "grad_norm": 12.089898109436035, "learning_rate": 8.822709435450834e-05, "loss": 32.8984, "step": 1467 }, { "epoch": 0.2355678581457857, "grad_norm": 12.147605895996094, "learning_rate": 8.821057499079174e-05, "loss": 32.8672, "step": 1468 }, { "epoch": 0.2357283267140049, "grad_norm": 12.02421760559082, "learning_rate": 8.819404559430928e-05, "loss": 32.9297, "step": 1469 }, { "epoch": 0.2358887952822241, "grad_norm": 12.345791816711426, "learning_rate": 8.817750616940101e-05, "loss": 32.7812, "step": 1470 }, { "epoch": 0.2360492638504433, "grad_norm": 12.436837196350098, "learning_rate": 8.816095672040961e-05, "loss": 32.9297, "step": 1471 }, { "epoch": 0.2362097324186625, "grad_norm": 12.684324264526367, "learning_rate": 8.81443972516804e-05, "loss": 32.8594, "step": 1472 }, { "epoch": 0.2363702009868817, "grad_norm": 12.096505165100098, "learning_rate": 8.81278277675613e-05, "loss": 32.875, "step": 1473 }, { "epoch": 0.23653066955510088, "grad_norm": 12.337701797485352, "learning_rate": 8.811124827240289e-05, "loss": 32.7969, "step": 1474 }, { "epoch": 0.2366911381233201, "grad_norm": 11.9680814743042, "learning_rate": 8.809465877055835e-05, "loss": 32.8828, "step": 1475 }, { "epoch": 0.2368516066915393, "grad_norm": 12.200042724609375, "learning_rate": 8.807805926638354e-05, "loss": 32.832, "step": 1476 }, { "epoch": 0.23701207525975848, "grad_norm": 12.14283275604248, "learning_rate": 8.806144976423689e-05, "loss": 32.8438, "step": 1477 }, { "epoch": 0.2371725438279777, "grad_norm": 12.271925926208496, "learning_rate": 8.804483026847947e-05, "loss": 32.8984, "step": 1478 }, { "epoch": 0.2373330123961969, "grad_norm": 11.965862274169922, "learning_rate": 8.802820078347499e-05, "loss": 33.0234, "step": 1479 }, { "epoch": 0.23749348096441608, "grad_norm": 12.069267272949219, "learning_rate": 8.801156131358977e-05, "loss": 32.832, "step": 1480 }, { "epoch": 0.2376539495326353, "grad_norm": 12.075664520263672, "learning_rate": 8.799491186319277e-05, "loss": 32.8359, "step": 1481 }, { "epoch": 0.2378144181008545, "grad_norm": 12.396795272827148, "learning_rate": 8.797825243665554e-05, "loss": 32.8242, "step": 1482 }, { "epoch": 0.23797488666907368, "grad_norm": 12.132454872131348, "learning_rate": 8.796158303835225e-05, "loss": 32.8281, "step": 1483 }, { "epoch": 0.2381353552372929, "grad_norm": 12.14719295501709, "learning_rate": 8.794490367265974e-05, "loss": 32.8477, "step": 1484 }, { "epoch": 0.2382958238055121, "grad_norm": 12.20322036743164, "learning_rate": 8.792821434395741e-05, "loss": 32.8242, "step": 1485 }, { "epoch": 0.23845629237373128, "grad_norm": 12.017762184143066, "learning_rate": 8.791151505662729e-05, "loss": 32.8555, "step": 1486 }, { "epoch": 0.2386167609419505, "grad_norm": 12.53168773651123, "learning_rate": 8.789480581505404e-05, "loss": 32.8125, "step": 1487 }, { "epoch": 0.2387772295101697, "grad_norm": 12.142306327819824, "learning_rate": 8.787808662362493e-05, "loss": 33.0391, "step": 1488 }, { "epoch": 0.23893769807838888, "grad_norm": 12.139273643493652, "learning_rate": 8.786135748672984e-05, "loss": 32.875, "step": 1489 }, { "epoch": 0.2390981666466081, "grad_norm": 12.138469696044922, "learning_rate": 8.784461840876123e-05, "loss": 32.8359, "step": 1490 }, { "epoch": 0.2392586352148273, "grad_norm": 12.269777297973633, "learning_rate": 8.782786939411423e-05, "loss": 32.8516, "step": 1491 }, { "epoch": 0.23941910378304648, "grad_norm": 12.708763122558594, "learning_rate": 8.781111044718656e-05, "loss": 32.7266, "step": 1492 }, { "epoch": 0.2395795723512657, "grad_norm": 11.962242126464844, "learning_rate": 8.779434157237847e-05, "loss": 32.9297, "step": 1493 }, { "epoch": 0.2397400409194849, "grad_norm": 12.079781532287598, "learning_rate": 8.777756277409295e-05, "loss": 32.8867, "step": 1494 }, { "epoch": 0.23990050948770408, "grad_norm": 12.203561782836914, "learning_rate": 8.776077405673548e-05, "loss": 32.9219, "step": 1495 }, { "epoch": 0.2400609780559233, "grad_norm": 12.2855863571167, "learning_rate": 8.774397542471424e-05, "loss": 32.8633, "step": 1496 }, { "epoch": 0.2402214466241425, "grad_norm": 12.076820373535156, "learning_rate": 8.772716688243992e-05, "loss": 33.0, "step": 1497 }, { "epoch": 0.24038191519236168, "grad_norm": 12.285438537597656, "learning_rate": 8.771034843432588e-05, "loss": 32.9648, "step": 1498 }, { "epoch": 0.2405423837605809, "grad_norm": 12.457834243774414, "learning_rate": 8.769352008478808e-05, "loss": 32.9492, "step": 1499 }, { "epoch": 0.2407028523288001, "grad_norm": 12.1118745803833, "learning_rate": 8.767668183824502e-05, "loss": 33.3594, "step": 1500 }, { "epoch": 0.24086332089701928, "grad_norm": 12.187365531921387, "learning_rate": 8.765983369911785e-05, "loss": 33.2656, "step": 1501 }, { "epoch": 0.2410237894652385, "grad_norm": 12.717020988464355, "learning_rate": 8.764297567183031e-05, "loss": 33.0117, "step": 1502 }, { "epoch": 0.2411842580334577, "grad_norm": 12.215934753417969, "learning_rate": 8.762610776080872e-05, "loss": 33.0352, "step": 1503 }, { "epoch": 0.24134472660167688, "grad_norm": 12.21189022064209, "learning_rate": 8.760922997048201e-05, "loss": 32.9219, "step": 1504 }, { "epoch": 0.2415051951698961, "grad_norm": 12.31835651397705, "learning_rate": 8.75923423052817e-05, "loss": 32.8867, "step": 1505 }, { "epoch": 0.2416656637381153, "grad_norm": 12.166045188903809, "learning_rate": 8.757544476964192e-05, "loss": 32.8477, "step": 1506 }, { "epoch": 0.24182613230633448, "grad_norm": 12.086834907531738, "learning_rate": 8.755853736799935e-05, "loss": 32.8828, "step": 1507 }, { "epoch": 0.2419866008745537, "grad_norm": 12.154613494873047, "learning_rate": 8.75416201047933e-05, "loss": 32.8359, "step": 1508 }, { "epoch": 0.2421470694427729, "grad_norm": 12.0834379196167, "learning_rate": 8.752469298446564e-05, "loss": 32.9141, "step": 1509 }, { "epoch": 0.24230753801099209, "grad_norm": 12.016217231750488, "learning_rate": 8.750775601146086e-05, "loss": 32.9258, "step": 1510 }, { "epoch": 0.2424680065792113, "grad_norm": 12.270318031311035, "learning_rate": 8.749080919022599e-05, "loss": 33.0078, "step": 1511 }, { "epoch": 0.2426284751474305, "grad_norm": 12.023747444152832, "learning_rate": 8.747385252521073e-05, "loss": 33.0273, "step": 1512 }, { "epoch": 0.24278894371564969, "grad_norm": 12.145262718200684, "learning_rate": 8.745688602086724e-05, "loss": 32.8828, "step": 1513 }, { "epoch": 0.2429494122838689, "grad_norm": 12.021357536315918, "learning_rate": 8.743990968165036e-05, "loss": 32.8086, "step": 1514 }, { "epoch": 0.2431098808520881, "grad_norm": 12.273113250732422, "learning_rate": 8.742292351201751e-05, "loss": 32.8203, "step": 1515 }, { "epoch": 0.24327034942030729, "grad_norm": 12.087359428405762, "learning_rate": 8.740592751642866e-05, "loss": 32.8359, "step": 1516 }, { "epoch": 0.2434308179885265, "grad_norm": 12.404559135437012, "learning_rate": 8.738892169934633e-05, "loss": 32.9531, "step": 1517 }, { "epoch": 0.2435912865567457, "grad_norm": 11.9540376663208, "learning_rate": 8.737190606523568e-05, "loss": 32.9375, "step": 1518 }, { "epoch": 0.2437517551249649, "grad_norm": 12.553484916687012, "learning_rate": 8.735488061856446e-05, "loss": 32.8164, "step": 1519 }, { "epoch": 0.2439122236931841, "grad_norm": 12.425048828125, "learning_rate": 8.733784536380288e-05, "loss": 32.7891, "step": 1520 }, { "epoch": 0.2440726922614033, "grad_norm": 12.216239929199219, "learning_rate": 8.732080030542386e-05, "loss": 32.7812, "step": 1521 }, { "epoch": 0.2442331608296225, "grad_norm": 12.290180206298828, "learning_rate": 8.730374544790282e-05, "loss": 32.7734, "step": 1522 }, { "epoch": 0.2443936293978417, "grad_norm": 11.95776081085205, "learning_rate": 8.728668079571778e-05, "loss": 32.875, "step": 1523 }, { "epoch": 0.2445540979660609, "grad_norm": 12.208032608032227, "learning_rate": 8.726960635334932e-05, "loss": 32.8438, "step": 1524 }, { "epoch": 0.2447145665342801, "grad_norm": 12.076950073242188, "learning_rate": 8.725252212528059e-05, "loss": 32.832, "step": 1525 }, { "epoch": 0.2448750351024993, "grad_norm": 11.913230895996094, "learning_rate": 8.723542811599732e-05, "loss": 32.9258, "step": 1526 }, { "epoch": 0.2450355036707185, "grad_norm": 12.263748168945312, "learning_rate": 8.721832432998778e-05, "loss": 32.7773, "step": 1527 }, { "epoch": 0.2451959722389377, "grad_norm": 12.353625297546387, "learning_rate": 8.720121077174285e-05, "loss": 32.7539, "step": 1528 }, { "epoch": 0.2453564408071569, "grad_norm": 12.036454200744629, "learning_rate": 8.718408744575598e-05, "loss": 32.8789, "step": 1529 }, { "epoch": 0.2455169093753761, "grad_norm": 12.150959968566895, "learning_rate": 8.716695435652311e-05, "loss": 32.7734, "step": 1530 }, { "epoch": 0.2456773779435953, "grad_norm": 12.198078155517578, "learning_rate": 8.714981150854281e-05, "loss": 32.7578, "step": 1531 }, { "epoch": 0.2458378465118145, "grad_norm": 12.131881713867188, "learning_rate": 8.713265890631619e-05, "loss": 32.7266, "step": 1532 }, { "epoch": 0.2459983150800337, "grad_norm": 12.13191032409668, "learning_rate": 8.711549655434692e-05, "loss": 32.7109, "step": 1533 }, { "epoch": 0.2461587836482529, "grad_norm": 12.068419456481934, "learning_rate": 8.709832445714127e-05, "loss": 32.7734, "step": 1534 }, { "epoch": 0.2463192522164721, "grad_norm": 12.154287338256836, "learning_rate": 8.708114261920802e-05, "loss": 32.7422, "step": 1535 }, { "epoch": 0.2464797207846913, "grad_norm": 12.393065452575684, "learning_rate": 8.706395104505849e-05, "loss": 32.8203, "step": 1536 }, { "epoch": 0.2466401893529105, "grad_norm": 12.652451515197754, "learning_rate": 8.704674973920662e-05, "loss": 32.75, "step": 1537 }, { "epoch": 0.2468006579211297, "grad_norm": 12.4011869430542, "learning_rate": 8.702953870616886e-05, "loss": 32.7773, "step": 1538 }, { "epoch": 0.2469611264893489, "grad_norm": 12.26321029663086, "learning_rate": 8.701231795046426e-05, "loss": 32.7344, "step": 1539 }, { "epoch": 0.2471215950575681, "grad_norm": 12.016030311584473, "learning_rate": 8.699508747661433e-05, "loss": 32.875, "step": 1540 }, { "epoch": 0.2472820636257873, "grad_norm": 12.223402976989746, "learning_rate": 8.697784728914324e-05, "loss": 32.7578, "step": 1541 }, { "epoch": 0.2474425321940065, "grad_norm": 12.401176452636719, "learning_rate": 8.696059739257766e-05, "loss": 32.7773, "step": 1542 }, { "epoch": 0.2476030007622257, "grad_norm": 12.153773307800293, "learning_rate": 8.694333779144679e-05, "loss": 32.832, "step": 1543 }, { "epoch": 0.2477634693304449, "grad_norm": 12.33544921875, "learning_rate": 8.692606849028242e-05, "loss": 32.8086, "step": 1544 }, { "epoch": 0.2479239378986641, "grad_norm": 12.323482513427734, "learning_rate": 8.690878949361885e-05, "loss": 32.793, "step": 1545 }, { "epoch": 0.2480844064668833, "grad_norm": 12.268255233764648, "learning_rate": 8.689150080599294e-05, "loss": 32.793, "step": 1546 }, { "epoch": 0.2482448750351025, "grad_norm": 12.302160263061523, "learning_rate": 8.687420243194411e-05, "loss": 32.9141, "step": 1547 }, { "epoch": 0.2484053436033217, "grad_norm": 12.111080169677734, "learning_rate": 8.685689437601433e-05, "loss": 32.9531, "step": 1548 }, { "epoch": 0.2485658121715409, "grad_norm": 12.30471134185791, "learning_rate": 8.683957664274805e-05, "loss": 32.9844, "step": 1549 }, { "epoch": 0.2487262807397601, "grad_norm": 12.242919921875, "learning_rate": 8.682224923669234e-05, "loss": 33.3594, "step": 1550 }, { "epoch": 0.2488867493079793, "grad_norm": 12.300583839416504, "learning_rate": 8.680491216239674e-05, "loss": 32.9219, "step": 1551 }, { "epoch": 0.2490472178761985, "grad_norm": 12.035911560058594, "learning_rate": 8.678756542441338e-05, "loss": 32.8672, "step": 1552 }, { "epoch": 0.2492076864444177, "grad_norm": 12.381172180175781, "learning_rate": 8.67702090272969e-05, "loss": 32.8984, "step": 1553 }, { "epoch": 0.2493681550126369, "grad_norm": 12.15617847442627, "learning_rate": 8.675284297560452e-05, "loss": 32.8438, "step": 1554 }, { "epoch": 0.2495286235808561, "grad_norm": 11.890564918518066, "learning_rate": 8.673546727389592e-05, "loss": 32.875, "step": 1555 }, { "epoch": 0.2496890921490753, "grad_norm": 12.362042427062988, "learning_rate": 8.671808192673336e-05, "loss": 32.8672, "step": 1556 }, { "epoch": 0.2498495607172945, "grad_norm": 12.283479690551758, "learning_rate": 8.670068693868166e-05, "loss": 32.9219, "step": 1557 }, { "epoch": 0.2500100292855137, "grad_norm": 12.288480758666992, "learning_rate": 8.668328231430808e-05, "loss": 32.8984, "step": 1558 }, { "epoch": 0.2501704978537329, "grad_norm": 12.219414710998535, "learning_rate": 8.66658680581825e-05, "loss": 32.7812, "step": 1559 }, { "epoch": 0.2503309664219521, "grad_norm": 12.594189643859863, "learning_rate": 8.664844417487731e-05, "loss": 32.8672, "step": 1560 }, { "epoch": 0.2504914349901713, "grad_norm": 12.14850902557373, "learning_rate": 8.663101066896741e-05, "loss": 32.8203, "step": 1561 }, { "epoch": 0.2506519035583905, "grad_norm": 11.947615623474121, "learning_rate": 8.661356754503021e-05, "loss": 32.9375, "step": 1562 }, { "epoch": 0.2508123721266097, "grad_norm": 12.098788261413574, "learning_rate": 8.659611480764569e-05, "loss": 32.8125, "step": 1563 }, { "epoch": 0.2509728406948289, "grad_norm": 12.146905899047852, "learning_rate": 8.657865246139631e-05, "loss": 32.875, "step": 1564 }, { "epoch": 0.2511333092630481, "grad_norm": 12.195253372192383, "learning_rate": 8.656118051086708e-05, "loss": 32.875, "step": 1565 }, { "epoch": 0.2512937778312673, "grad_norm": 12.287575721740723, "learning_rate": 8.654369896064553e-05, "loss": 32.8359, "step": 1566 }, { "epoch": 0.2514542463994865, "grad_norm": 12.150550842285156, "learning_rate": 8.652620781532172e-05, "loss": 32.8203, "step": 1567 }, { "epoch": 0.2516147149677057, "grad_norm": 12.167280197143555, "learning_rate": 8.650870707948819e-05, "loss": 32.9219, "step": 1568 }, { "epoch": 0.2517751835359249, "grad_norm": 12.174259185791016, "learning_rate": 8.649119675774005e-05, "loss": 32.8906, "step": 1569 }, { "epoch": 0.2519356521041441, "grad_norm": 12.27365779876709, "learning_rate": 8.647367685467487e-05, "loss": 32.75, "step": 1570 }, { "epoch": 0.2520961206723633, "grad_norm": 12.269755363464355, "learning_rate": 8.645614737489278e-05, "loss": 32.875, "step": 1571 }, { "epoch": 0.2522565892405825, "grad_norm": 12.205912590026855, "learning_rate": 8.64386083229964e-05, "loss": 32.9062, "step": 1572 }, { "epoch": 0.2524170578088017, "grad_norm": 12.022317886352539, "learning_rate": 8.642105970359091e-05, "loss": 32.8516, "step": 1573 }, { "epoch": 0.2525775263770209, "grad_norm": 12.727015495300293, "learning_rate": 8.640350152128394e-05, "loss": 32.7891, "step": 1574 }, { "epoch": 0.2527379949452401, "grad_norm": 12.014094352722168, "learning_rate": 8.638593378068565e-05, "loss": 32.7891, "step": 1575 }, { "epoch": 0.2528984635134593, "grad_norm": 11.885509490966797, "learning_rate": 8.636835648640874e-05, "loss": 32.7891, "step": 1576 }, { "epoch": 0.2530589320816785, "grad_norm": 12.463735580444336, "learning_rate": 8.635076964306836e-05, "loss": 32.6875, "step": 1577 }, { "epoch": 0.2532194006498977, "grad_norm": 12.331717491149902, "learning_rate": 8.633317325528223e-05, "loss": 32.7422, "step": 1578 }, { "epoch": 0.2533798692181169, "grad_norm": 12.031414031982422, "learning_rate": 8.631556732767055e-05, "loss": 32.7578, "step": 1579 }, { "epoch": 0.2535403377863361, "grad_norm": 12.125776290893555, "learning_rate": 8.629795186485601e-05, "loss": 32.8438, "step": 1580 }, { "epoch": 0.2537008063545553, "grad_norm": 12.413663864135742, "learning_rate": 8.628032687146381e-05, "loss": 32.7812, "step": 1581 }, { "epoch": 0.2538612749227745, "grad_norm": 12.076170921325684, "learning_rate": 8.626269235212169e-05, "loss": 32.7812, "step": 1582 }, { "epoch": 0.2540217434909937, "grad_norm": 12.145233154296875, "learning_rate": 8.624504831145983e-05, "loss": 32.7891, "step": 1583 }, { "epoch": 0.2541822120592129, "grad_norm": 12.391176223754883, "learning_rate": 8.622739475411096e-05, "loss": 32.8047, "step": 1584 }, { "epoch": 0.2543426806274321, "grad_norm": 12.337126731872559, "learning_rate": 8.620973168471027e-05, "loss": 32.7656, "step": 1585 }, { "epoch": 0.2545031491956513, "grad_norm": 12.398188591003418, "learning_rate": 8.619205910789549e-05, "loss": 32.7656, "step": 1586 }, { "epoch": 0.2546636177638705, "grad_norm": 12.386624336242676, "learning_rate": 8.61743770283068e-05, "loss": 32.7578, "step": 1587 }, { "epoch": 0.2548240863320897, "grad_norm": 12.456376075744629, "learning_rate": 8.615668545058692e-05, "loss": 32.7344, "step": 1588 }, { "epoch": 0.2549845549003089, "grad_norm": 12.392383575439453, "learning_rate": 8.613898437938102e-05, "loss": 32.7344, "step": 1589 }, { "epoch": 0.2551450234685281, "grad_norm": 12.075759887695312, "learning_rate": 8.612127381933682e-05, "loss": 32.7656, "step": 1590 }, { "epoch": 0.2553054920367473, "grad_norm": 11.923885345458984, "learning_rate": 8.610355377510444e-05, "loss": 32.9609, "step": 1591 }, { "epoch": 0.2554659606049665, "grad_norm": 12.397507667541504, "learning_rate": 8.60858242513366e-05, "loss": 32.7891, "step": 1592 }, { "epoch": 0.2556264291731857, "grad_norm": 12.016196250915527, "learning_rate": 8.606808525268843e-05, "loss": 32.9375, "step": 1593 }, { "epoch": 0.2557868977414049, "grad_norm": 12.007681846618652, "learning_rate": 8.605033678381757e-05, "loss": 32.7578, "step": 1594 }, { "epoch": 0.2559473663096241, "grad_norm": 12.158896446228027, "learning_rate": 8.603257884938415e-05, "loss": 32.8203, "step": 1595 }, { "epoch": 0.2561078348778433, "grad_norm": 12.273011207580566, "learning_rate": 8.60148114540508e-05, "loss": 32.875, "step": 1596 }, { "epoch": 0.2562683034460625, "grad_norm": 12.392343521118164, "learning_rate": 8.59970346024826e-05, "loss": 32.8047, "step": 1597 }, { "epoch": 0.2564287720142817, "grad_norm": 12.166397094726562, "learning_rate": 8.597924829934716e-05, "loss": 32.9297, "step": 1598 }, { "epoch": 0.2565892405825009, "grad_norm": 12.192665100097656, "learning_rate": 8.596145254931451e-05, "loss": 32.8906, "step": 1599 }, { "epoch": 0.2567497091507201, "grad_norm": 12.09815788269043, "learning_rate": 8.594364735705723e-05, "loss": 33.1484, "step": 1600 }, { "epoch": 0.2569101777189393, "grad_norm": 12.279725074768066, "learning_rate": 8.59258327272503e-05, "loss": 33.1797, "step": 1601 }, { "epoch": 0.2570706462871585, "grad_norm": 12.22189998626709, "learning_rate": 8.590800866457125e-05, "loss": 32.9805, "step": 1602 }, { "epoch": 0.2572311148553777, "grad_norm": 12.157562255859375, "learning_rate": 8.589017517370006e-05, "loss": 32.9141, "step": 1603 }, { "epoch": 0.2573915834235969, "grad_norm": 11.966273307800293, "learning_rate": 8.587233225931917e-05, "loss": 32.9648, "step": 1604 }, { "epoch": 0.2575520519918161, "grad_norm": 12.22354507446289, "learning_rate": 8.585447992611351e-05, "loss": 32.9531, "step": 1605 }, { "epoch": 0.2577125205600353, "grad_norm": 12.483003616333008, "learning_rate": 8.583661817877048e-05, "loss": 32.9453, "step": 1606 }, { "epoch": 0.2578729891282545, "grad_norm": 12.02143669128418, "learning_rate": 8.581874702197996e-05, "loss": 32.8242, "step": 1607 }, { "epoch": 0.2580334576964737, "grad_norm": 12.30821418762207, "learning_rate": 8.580086646043431e-05, "loss": 32.7617, "step": 1608 }, { "epoch": 0.2581939262646929, "grad_norm": 12.234659194946289, "learning_rate": 8.578297649882829e-05, "loss": 32.8359, "step": 1609 }, { "epoch": 0.2583543948329121, "grad_norm": 11.966185569763184, "learning_rate": 8.576507714185923e-05, "loss": 32.7734, "step": 1610 }, { "epoch": 0.2585148634011313, "grad_norm": 12.146775245666504, "learning_rate": 8.574716839422686e-05, "loss": 32.832, "step": 1611 }, { "epoch": 0.2586753319693505, "grad_norm": 12.342456817626953, "learning_rate": 8.572925026063339e-05, "loss": 32.75, "step": 1612 }, { "epoch": 0.2588358005375697, "grad_norm": 12.13851547241211, "learning_rate": 8.57113227457835e-05, "loss": 32.7773, "step": 1613 }, { "epoch": 0.2589962691057889, "grad_norm": 12.031702995300293, "learning_rate": 8.569338585438434e-05, "loss": 32.9609, "step": 1614 }, { "epoch": 0.2591567376740081, "grad_norm": 12.210882186889648, "learning_rate": 8.56754395911455e-05, "loss": 32.9883, "step": 1615 }, { "epoch": 0.2593172062422273, "grad_norm": 12.075369834899902, "learning_rate": 8.565748396077904e-05, "loss": 32.7891, "step": 1616 }, { "epoch": 0.2594776748104465, "grad_norm": 12.019018173217773, "learning_rate": 8.563951896799951e-05, "loss": 32.7812, "step": 1617 }, { "epoch": 0.2596381433786657, "grad_norm": 12.151320457458496, "learning_rate": 8.562154461752385e-05, "loss": 32.8398, "step": 1618 }, { "epoch": 0.2597986119468849, "grad_norm": 12.397635459899902, "learning_rate": 8.560356091407154e-05, "loss": 32.7578, "step": 1619 }, { "epoch": 0.2599590805151041, "grad_norm": 12.261595726013184, "learning_rate": 8.558556786236445e-05, "loss": 32.707, "step": 1620 }, { "epoch": 0.2601195490833233, "grad_norm": 12.1434907913208, "learning_rate": 8.556756546712693e-05, "loss": 32.7266, "step": 1621 }, { "epoch": 0.2602800176515425, "grad_norm": 12.204574584960938, "learning_rate": 8.554955373308578e-05, "loss": 32.8164, "step": 1622 }, { "epoch": 0.2604404862197617, "grad_norm": 12.298073768615723, "learning_rate": 8.553153266497027e-05, "loss": 32.8828, "step": 1623 }, { "epoch": 0.2606009547879809, "grad_norm": 12.069507598876953, "learning_rate": 8.551350226751209e-05, "loss": 32.6406, "step": 1624 }, { "epoch": 0.2607614233562001, "grad_norm": 12.135066986083984, "learning_rate": 8.549546254544541e-05, "loss": 32.6602, "step": 1625 }, { "epoch": 0.2609218919244193, "grad_norm": 12.530217170715332, "learning_rate": 8.54774135035068e-05, "loss": 32.7188, "step": 1626 }, { "epoch": 0.2610823604926385, "grad_norm": 12.686484336853027, "learning_rate": 8.545935514643535e-05, "loss": 32.6758, "step": 1627 }, { "epoch": 0.2612428290608577, "grad_norm": 12.592630386352539, "learning_rate": 8.544128747897252e-05, "loss": 32.6641, "step": 1628 }, { "epoch": 0.2614032976290769, "grad_norm": 12.263590812683105, "learning_rate": 8.542321050586228e-05, "loss": 32.6484, "step": 1629 }, { "epoch": 0.2615637661972961, "grad_norm": 12.213068962097168, "learning_rate": 8.540512423185099e-05, "loss": 32.7734, "step": 1630 }, { "epoch": 0.2617242347655153, "grad_norm": 12.913739204406738, "learning_rate": 8.538702866168749e-05, "loss": 32.6016, "step": 1631 }, { "epoch": 0.2618847033337345, "grad_norm": 12.396862983703613, "learning_rate": 8.536892380012304e-05, "loss": 32.7188, "step": 1632 }, { "epoch": 0.2620451719019537, "grad_norm": 12.018280982971191, "learning_rate": 8.535080965191134e-05, "loss": 32.8438, "step": 1633 }, { "epoch": 0.2622056404701729, "grad_norm": 12.149358749389648, "learning_rate": 8.533268622180855e-05, "loss": 32.6875, "step": 1634 }, { "epoch": 0.2623661090383921, "grad_norm": 11.97989559173584, "learning_rate": 8.531455351457322e-05, "loss": 32.8828, "step": 1635 }, { "epoch": 0.2625265776066113, "grad_norm": 12.338279724121094, "learning_rate": 8.529641153496644e-05, "loss": 32.7344, "step": 1636 }, { "epoch": 0.2626870461748305, "grad_norm": 12.387608528137207, "learning_rate": 8.527826028775156e-05, "loss": 32.625, "step": 1637 }, { "epoch": 0.2628475147430497, "grad_norm": 12.38588809967041, "learning_rate": 8.526009977769455e-05, "loss": 32.6484, "step": 1638 }, { "epoch": 0.2630079833112689, "grad_norm": 12.06928539276123, "learning_rate": 8.524193000956367e-05, "loss": 32.6719, "step": 1639 }, { "epoch": 0.2631684518794881, "grad_norm": 12.679421424865723, "learning_rate": 8.522375098812971e-05, "loss": 32.6016, "step": 1640 }, { "epoch": 0.2633289204477073, "grad_norm": 12.139123916625977, "learning_rate": 8.520556271816583e-05, "loss": 32.6797, "step": 1641 }, { "epoch": 0.2634893890159265, "grad_norm": 12.136763572692871, "learning_rate": 8.518736520444764e-05, "loss": 32.625, "step": 1642 }, { "epoch": 0.2636498575841457, "grad_norm": 12.068551063537598, "learning_rate": 8.516915845175315e-05, "loss": 32.7266, "step": 1643 }, { "epoch": 0.2638103261523649, "grad_norm": 12.399215698242188, "learning_rate": 8.515094246486286e-05, "loss": 32.6484, "step": 1644 }, { "epoch": 0.2639707947205841, "grad_norm": 13.027161598205566, "learning_rate": 8.513271724855961e-05, "loss": 32.5469, "step": 1645 }, { "epoch": 0.2641312632888033, "grad_norm": 12.081975936889648, "learning_rate": 8.511448280762875e-05, "loss": 32.8203, "step": 1646 }, { "epoch": 0.2642917318570225, "grad_norm": 12.280073165893555, "learning_rate": 8.509623914685797e-05, "loss": 32.7578, "step": 1647 }, { "epoch": 0.2644522004252417, "grad_norm": 12.973363876342773, "learning_rate": 8.507798627103744e-05, "loss": 32.6172, "step": 1648 }, { "epoch": 0.2646126689934609, "grad_norm": 12.29483699798584, "learning_rate": 8.505972418495975e-05, "loss": 32.9141, "step": 1649 }, { "epoch": 0.2647731375616801, "grad_norm": 12.26298999786377, "learning_rate": 8.504145289341984e-05, "loss": 33.0312, "step": 1650 }, { "epoch": 0.2649336061298993, "grad_norm": 12.194525718688965, "learning_rate": 8.502317240121515e-05, "loss": 32.9609, "step": 1651 }, { "epoch": 0.2650940746981185, "grad_norm": 12.310232162475586, "learning_rate": 8.500488271314549e-05, "loss": 32.9062, "step": 1652 }, { "epoch": 0.2652545432663377, "grad_norm": 12.15299129486084, "learning_rate": 8.498658383401308e-05, "loss": 32.8672, "step": 1653 }, { "epoch": 0.2654150118345569, "grad_norm": 12.15855598449707, "learning_rate": 8.496827576862261e-05, "loss": 32.7969, "step": 1654 }, { "epoch": 0.2655754804027761, "grad_norm": 12.151863098144531, "learning_rate": 8.494995852178109e-05, "loss": 32.7969, "step": 1655 }, { "epoch": 0.2657359489709953, "grad_norm": 12.14870548248291, "learning_rate": 8.493163209829802e-05, "loss": 32.8359, "step": 1656 }, { "epoch": 0.2658964175392145, "grad_norm": 11.960965156555176, "learning_rate": 8.491329650298527e-05, "loss": 32.8125, "step": 1657 }, { "epoch": 0.2660568861074337, "grad_norm": 12.619915962219238, "learning_rate": 8.489495174065714e-05, "loss": 32.75, "step": 1658 }, { "epoch": 0.2662173546756529, "grad_norm": 11.952417373657227, "learning_rate": 8.48765978161303e-05, "loss": 32.9141, "step": 1659 }, { "epoch": 0.2663778232438721, "grad_norm": 12.086203575134277, "learning_rate": 8.48582347342239e-05, "loss": 32.7344, "step": 1660 }, { "epoch": 0.2665382918120913, "grad_norm": 12.089826583862305, "learning_rate": 8.483986249975939e-05, "loss": 32.75, "step": 1661 }, { "epoch": 0.2666987603803105, "grad_norm": 12.17044734954834, "learning_rate": 8.482148111756071e-05, "loss": 32.7891, "step": 1662 }, { "epoch": 0.2668592289485297, "grad_norm": 12.081165313720703, "learning_rate": 8.480309059245417e-05, "loss": 32.7656, "step": 1663 }, { "epoch": 0.2670196975167489, "grad_norm": 12.080260276794434, "learning_rate": 8.478469092926849e-05, "loss": 32.7891, "step": 1664 }, { "epoch": 0.2671801660849681, "grad_norm": 12.219499588012695, "learning_rate": 8.476628213283474e-05, "loss": 32.6953, "step": 1665 }, { "epoch": 0.2673406346531873, "grad_norm": 12.014325141906738, "learning_rate": 8.474786420798646e-05, "loss": 32.6953, "step": 1666 }, { "epoch": 0.2675011032214065, "grad_norm": 12.146161079406738, "learning_rate": 8.472943715955955e-05, "loss": 32.7344, "step": 1667 }, { "epoch": 0.2676615717896257, "grad_norm": 12.338777542114258, "learning_rate": 8.471100099239232e-05, "loss": 32.7422, "step": 1668 }, { "epoch": 0.2678220403578449, "grad_norm": 12.273233413696289, "learning_rate": 8.469255571132545e-05, "loss": 32.7422, "step": 1669 }, { "epoch": 0.2679825089260641, "grad_norm": 12.160501480102539, "learning_rate": 8.467410132120203e-05, "loss": 32.75, "step": 1670 }, { "epoch": 0.2681429774942833, "grad_norm": 11.924298286437988, "learning_rate": 8.465563782686756e-05, "loss": 32.9062, "step": 1671 }, { "epoch": 0.2683034460625025, "grad_norm": 12.139840126037598, "learning_rate": 8.463716523316989e-05, "loss": 32.6953, "step": 1672 }, { "epoch": 0.2684639146307217, "grad_norm": 12.42048454284668, "learning_rate": 8.461868354495928e-05, "loss": 32.7031, "step": 1673 }, { "epoch": 0.2686243831989409, "grad_norm": 11.951004028320312, "learning_rate": 8.460019276708837e-05, "loss": 32.75, "step": 1674 }, { "epoch": 0.2687848517671601, "grad_norm": 12.33829116821289, "learning_rate": 8.458169290441223e-05, "loss": 32.6406, "step": 1675 }, { "epoch": 0.2689453203353793, "grad_norm": 12.070027351379395, "learning_rate": 8.456318396178824e-05, "loss": 32.7188, "step": 1676 }, { "epoch": 0.2691057889035985, "grad_norm": 12.239757537841797, "learning_rate": 8.454466594407622e-05, "loss": 33.2422, "step": 1677 }, { "epoch": 0.2692662574718177, "grad_norm": 12.266468048095703, "learning_rate": 8.452613885613836e-05, "loss": 32.6875, "step": 1678 }, { "epoch": 0.2694267260400369, "grad_norm": 12.202836990356445, "learning_rate": 8.450760270283921e-05, "loss": 32.7031, "step": 1679 }, { "epoch": 0.2695871946082561, "grad_norm": 12.521763801574707, "learning_rate": 8.448905748904576e-05, "loss": 32.5938, "step": 1680 }, { "epoch": 0.2697476631764753, "grad_norm": 12.215171813964844, "learning_rate": 8.447050321962729e-05, "loss": 32.6641, "step": 1681 }, { "epoch": 0.2699081317446945, "grad_norm": 12.271849632263184, "learning_rate": 8.445193989945553e-05, "loss": 32.6875, "step": 1682 }, { "epoch": 0.2700686003129137, "grad_norm": 12.26722240447998, "learning_rate": 8.443336753340454e-05, "loss": 32.6562, "step": 1683 }, { "epoch": 0.2702290688811329, "grad_norm": 12.274375915527344, "learning_rate": 8.44147861263508e-05, "loss": 32.6953, "step": 1684 }, { "epoch": 0.2703895374493521, "grad_norm": 12.215197563171387, "learning_rate": 8.439619568317314e-05, "loss": 32.6641, "step": 1685 }, { "epoch": 0.2705500060175713, "grad_norm": 12.262452125549316, "learning_rate": 8.437759620875275e-05, "loss": 32.6406, "step": 1686 }, { "epoch": 0.2707104745857905, "grad_norm": 12.269820213317871, "learning_rate": 8.43589877079732e-05, "loss": 32.625, "step": 1687 }, { "epoch": 0.2708709431540097, "grad_norm": 12.52116584777832, "learning_rate": 8.434037018572044e-05, "loss": 32.6406, "step": 1688 }, { "epoch": 0.2710314117222289, "grad_norm": 12.086153030395508, "learning_rate": 8.43217436468828e-05, "loss": 32.7969, "step": 1689 }, { "epoch": 0.2711918802904481, "grad_norm": 11.953587532043457, "learning_rate": 8.430310809635093e-05, "loss": 32.8047, "step": 1690 }, { "epoch": 0.2713523488586673, "grad_norm": 12.144169807434082, "learning_rate": 8.428446353901789e-05, "loss": 32.5938, "step": 1691 }, { "epoch": 0.2715128174268865, "grad_norm": 12.46260929107666, "learning_rate": 8.426580997977911e-05, "loss": 32.7188, "step": 1692 }, { "epoch": 0.2716732859951057, "grad_norm": 12.217458724975586, "learning_rate": 8.424714742353232e-05, "loss": 32.75, "step": 1693 }, { "epoch": 0.2718337545633249, "grad_norm": 12.069623947143555, "learning_rate": 8.42284758751777e-05, "loss": 32.7344, "step": 1694 }, { "epoch": 0.2719942231315441, "grad_norm": 12.07145881652832, "learning_rate": 8.420979533961775e-05, "loss": 32.6875, "step": 1695 }, { "epoch": 0.2721546916997633, "grad_norm": 12.201951026916504, "learning_rate": 8.419110582175729e-05, "loss": 32.6562, "step": 1696 }, { "epoch": 0.2723151602679825, "grad_norm": 12.011662483215332, "learning_rate": 8.417240732650356e-05, "loss": 32.7109, "step": 1697 }, { "epoch": 0.2724756288362017, "grad_norm": 12.262080192565918, "learning_rate": 8.41536998587661e-05, "loss": 32.6484, "step": 1698 }, { "epoch": 0.2726360974044209, "grad_norm": 12.334620475769043, "learning_rate": 8.413498342345692e-05, "loss": 32.7734, "step": 1699 }, { "epoch": 0.2727965659726401, "grad_norm": 12.135205268859863, "learning_rate": 8.411625802549022e-05, "loss": 32.8516, "step": 1700 }, { "epoch": 0.2729570345408593, "grad_norm": 12.54869270324707, "learning_rate": 8.40975236697827e-05, "loss": 32.8906, "step": 1701 }, { "epoch": 0.2731175031090785, "grad_norm": 12.092065811157227, "learning_rate": 8.40787803612533e-05, "loss": 32.8828, "step": 1702 }, { "epoch": 0.2732779716772977, "grad_norm": 12.101240158081055, "learning_rate": 8.406002810482338e-05, "loss": 32.8438, "step": 1703 }, { "epoch": 0.2734384402455169, "grad_norm": 12.226303100585938, "learning_rate": 8.404126690541662e-05, "loss": 32.8047, "step": 1704 }, { "epoch": 0.2735989088137361, "grad_norm": 12.038634300231934, "learning_rate": 8.402249676795907e-05, "loss": 32.7812, "step": 1705 }, { "epoch": 0.2737593773819553, "grad_norm": 12.352331161499023, "learning_rate": 8.400371769737912e-05, "loss": 32.7578, "step": 1706 }, { "epoch": 0.2739198459501745, "grad_norm": 12.579036712646484, "learning_rate": 8.39849296986075e-05, "loss": 32.7578, "step": 1707 }, { "epoch": 0.2740803145183937, "grad_norm": 12.084753036499023, "learning_rate": 8.396613277657725e-05, "loss": 32.7891, "step": 1708 }, { "epoch": 0.2742407830866129, "grad_norm": 12.025375366210938, "learning_rate": 8.394732693622382e-05, "loss": 32.75, "step": 1709 }, { "epoch": 0.2744012516548321, "grad_norm": 12.091814041137695, "learning_rate": 8.392851218248497e-05, "loss": 32.8203, "step": 1710 }, { "epoch": 0.2745617202230513, "grad_norm": 12.213984489440918, "learning_rate": 8.390968852030078e-05, "loss": 32.8281, "step": 1711 }, { "epoch": 0.2747221887912705, "grad_norm": 12.216316223144531, "learning_rate": 8.389085595461369e-05, "loss": 32.7344, "step": 1712 }, { "epoch": 0.2748826573594897, "grad_norm": 12.078030586242676, "learning_rate": 8.38720144903685e-05, "loss": 32.7734, "step": 1713 }, { "epoch": 0.2750431259277089, "grad_norm": 12.152382850646973, "learning_rate": 8.385316413251228e-05, "loss": 32.7266, "step": 1714 }, { "epoch": 0.2752035944959281, "grad_norm": 12.298269271850586, "learning_rate": 8.383430488599451e-05, "loss": 32.6875, "step": 1715 }, { "epoch": 0.2753640630641473, "grad_norm": 12.012724876403809, "learning_rate": 8.381543675576698e-05, "loss": 32.7656, "step": 1716 }, { "epoch": 0.2755245316323665, "grad_norm": 12.39163589477539, "learning_rate": 8.379655974678375e-05, "loss": 32.6875, "step": 1717 }, { "epoch": 0.27568500020058573, "grad_norm": 12.301249504089355, "learning_rate": 8.377767386400132e-05, "loss": 32.8281, "step": 1718 }, { "epoch": 0.2758454687688049, "grad_norm": 12.22695541381836, "learning_rate": 8.375877911237847e-05, "loss": 32.7109, "step": 1719 }, { "epoch": 0.2760059373370241, "grad_norm": 12.01559829711914, "learning_rate": 8.373987549687625e-05, "loss": 32.7109, "step": 1720 }, { "epoch": 0.27616640590524333, "grad_norm": 12.143108367919922, "learning_rate": 8.372096302245813e-05, "loss": 32.6797, "step": 1721 }, { "epoch": 0.2763268744734625, "grad_norm": 12.210331916809082, "learning_rate": 8.370204169408986e-05, "loss": 32.6641, "step": 1722 }, { "epoch": 0.2764873430416817, "grad_norm": 12.215949058532715, "learning_rate": 8.36831115167395e-05, "loss": 32.6406, "step": 1723 }, { "epoch": 0.27664781160990093, "grad_norm": 12.269742965698242, "learning_rate": 8.36641724953775e-05, "loss": 32.6328, "step": 1724 }, { "epoch": 0.2768082801781201, "grad_norm": 12.07189655303955, "learning_rate": 8.364522463497654e-05, "loss": 32.6953, "step": 1725 }, { "epoch": 0.2769687487463393, "grad_norm": 12.098681449890137, "learning_rate": 8.362626794051171e-05, "loss": 32.7344, "step": 1726 }, { "epoch": 0.27712921731455853, "grad_norm": 12.326811790466309, "learning_rate": 8.360730241696036e-05, "loss": 32.6641, "step": 1727 }, { "epoch": 0.2772896858827777, "grad_norm": 12.322792053222656, "learning_rate": 8.358832806930215e-05, "loss": 32.5938, "step": 1728 }, { "epoch": 0.2774501544509969, "grad_norm": 12.206414222717285, "learning_rate": 8.356934490251911e-05, "loss": 32.6484, "step": 1729 }, { "epoch": 0.27761062301921613, "grad_norm": 12.037450790405273, "learning_rate": 8.355035292159556e-05, "loss": 32.7266, "step": 1730 }, { "epoch": 0.2777710915874353, "grad_norm": 12.390081405639648, "learning_rate": 8.353135213151813e-05, "loss": 32.6719, "step": 1731 }, { "epoch": 0.2779315601556545, "grad_norm": 12.081255912780762, "learning_rate": 8.351234253727576e-05, "loss": 32.75, "step": 1732 }, { "epoch": 0.27809202872387373, "grad_norm": 12.19651985168457, "learning_rate": 8.349332414385971e-05, "loss": 32.6094, "step": 1733 }, { "epoch": 0.2782524972920929, "grad_norm": 12.205437660217285, "learning_rate": 8.347429695626354e-05, "loss": 32.75, "step": 1734 }, { "epoch": 0.2784129658603121, "grad_norm": 12.39555835723877, "learning_rate": 8.345526097948313e-05, "loss": 32.5938, "step": 1735 }, { "epoch": 0.27857343442853133, "grad_norm": 12.464709281921387, "learning_rate": 8.343621621851668e-05, "loss": 32.6484, "step": 1736 }, { "epoch": 0.2787339029967505, "grad_norm": 11.821876525878906, "learning_rate": 8.341716267836465e-05, "loss": 33.3828, "step": 1737 }, { "epoch": 0.2788943715649697, "grad_norm": 12.26815414428711, "learning_rate": 8.339810036402988e-05, "loss": 32.6797, "step": 1738 }, { "epoch": 0.27905484013318893, "grad_norm": 12.013328552246094, "learning_rate": 8.337902928051743e-05, "loss": 32.6484, "step": 1739 }, { "epoch": 0.2792153087014081, "grad_norm": 12.139246940612793, "learning_rate": 8.335994943283472e-05, "loss": 32.6797, "step": 1740 }, { "epoch": 0.2793757772696273, "grad_norm": 12.138810157775879, "learning_rate": 8.334086082599145e-05, "loss": 32.6719, "step": 1741 }, { "epoch": 0.27953624583784653, "grad_norm": 12.400823593139648, "learning_rate": 8.33217634649996e-05, "loss": 32.6719, "step": 1742 }, { "epoch": 0.2796967144060657, "grad_norm": 12.20035457611084, "learning_rate": 8.330265735487352e-05, "loss": 32.6719, "step": 1743 }, { "epoch": 0.2798571829742849, "grad_norm": 12.076383590698242, "learning_rate": 8.328354250062977e-05, "loss": 32.6953, "step": 1744 }, { "epoch": 0.28001765154250413, "grad_norm": 12.085222244262695, "learning_rate": 8.326441890728727e-05, "loss": 32.7188, "step": 1745 }, { "epoch": 0.2801781201107233, "grad_norm": 12.13740348815918, "learning_rate": 8.324528657986718e-05, "loss": 32.6641, "step": 1746 }, { "epoch": 0.2803385886789425, "grad_norm": 12.338993072509766, "learning_rate": 8.322614552339302e-05, "loss": 32.7344, "step": 1747 }, { "epoch": 0.28049905724716173, "grad_norm": 12.441941261291504, "learning_rate": 8.320699574289054e-05, "loss": 32.9297, "step": 1748 }, { "epoch": 0.2806595258153809, "grad_norm": 12.389745712280273, "learning_rate": 8.318783724338781e-05, "loss": 32.8281, "step": 1749 }, { "epoch": 0.2808199943836001, "grad_norm": 12.104756355285645, "learning_rate": 8.316867002991518e-05, "loss": 33.1328, "step": 1750 }, { "epoch": 0.28098046295181933, "grad_norm": 12.289093971252441, "learning_rate": 8.314949410750529e-05, "loss": 32.9141, "step": 1751 }, { "epoch": 0.2811409315200385, "grad_norm": 12.243664741516113, "learning_rate": 8.313030948119309e-05, "loss": 32.8359, "step": 1752 }, { "epoch": 0.2813014000882577, "grad_norm": 12.098668098449707, "learning_rate": 8.311111615601577e-05, "loss": 32.9297, "step": 1753 }, { "epoch": 0.28146186865647693, "grad_norm": 12.444111824035645, "learning_rate": 8.309191413701285e-05, "loss": 32.8047, "step": 1754 }, { "epoch": 0.2816223372246961, "grad_norm": 12.29434871673584, "learning_rate": 8.307270342922609e-05, "loss": 32.7422, "step": 1755 }, { "epoch": 0.2817828057929153, "grad_norm": 12.615156173706055, "learning_rate": 8.305348403769956e-05, "loss": 32.8359, "step": 1756 }, { "epoch": 0.28194327436113453, "grad_norm": 12.347224235534668, "learning_rate": 8.303425596747961e-05, "loss": 32.7344, "step": 1757 }, { "epoch": 0.2821037429293537, "grad_norm": 12.335906028747559, "learning_rate": 8.301501922361485e-05, "loss": 32.7969, "step": 1758 }, { "epoch": 0.2822642114975729, "grad_norm": 12.14656925201416, "learning_rate": 8.29957738111562e-05, "loss": 32.7188, "step": 1759 }, { "epoch": 0.28242468006579213, "grad_norm": 12.493324279785156, "learning_rate": 8.297651973515684e-05, "loss": 32.7031, "step": 1760 }, { "epoch": 0.2825851486340113, "grad_norm": 12.073770523071289, "learning_rate": 8.295725700067217e-05, "loss": 32.7188, "step": 1761 }, { "epoch": 0.2827456172022305, "grad_norm": 12.22189712524414, "learning_rate": 8.293798561275994e-05, "loss": 32.7344, "step": 1762 }, { "epoch": 0.28290608577044973, "grad_norm": 12.02864933013916, "learning_rate": 8.291870557648016e-05, "loss": 32.7812, "step": 1763 }, { "epoch": 0.2830665543386689, "grad_norm": 11.952363014221191, "learning_rate": 8.289941689689509e-05, "loss": 32.7266, "step": 1764 }, { "epoch": 0.2832270229068881, "grad_norm": 12.098366737365723, "learning_rate": 8.288011957906925e-05, "loss": 32.6797, "step": 1765 }, { "epoch": 0.28338749147510733, "grad_norm": 12.145894050598145, "learning_rate": 8.286081362806946e-05, "loss": 32.7969, "step": 1766 }, { "epoch": 0.2835479600433265, "grad_norm": 12.273152351379395, "learning_rate": 8.284149904896478e-05, "loss": 32.6875, "step": 1767 }, { "epoch": 0.2837084286115457, "grad_norm": 12.278546333312988, "learning_rate": 8.282217584682655e-05, "loss": 32.6484, "step": 1768 }, { "epoch": 0.28386889717976493, "grad_norm": 12.019906044006348, "learning_rate": 8.280284402672835e-05, "loss": 32.8203, "step": 1769 }, { "epoch": 0.2840293657479841, "grad_norm": 12.502687454223633, "learning_rate": 8.278350359374609e-05, "loss": 32.6641, "step": 1770 }, { "epoch": 0.2841898343162033, "grad_norm": 12.139315605163574, "learning_rate": 8.276415455295786e-05, "loss": 32.6719, "step": 1771 }, { "epoch": 0.28435030288442253, "grad_norm": 11.887699127197266, "learning_rate": 8.274479690944404e-05, "loss": 32.7422, "step": 1772 }, { "epoch": 0.2845107714526417, "grad_norm": 11.95772933959961, "learning_rate": 8.272543066828728e-05, "loss": 32.7031, "step": 1773 }, { "epoch": 0.2846712400208609, "grad_norm": 12.209641456604004, "learning_rate": 8.27060558345725e-05, "loss": 32.7188, "step": 1774 }, { "epoch": 0.28483170858908013, "grad_norm": 12.51308536529541, "learning_rate": 8.268667241338681e-05, "loss": 32.6719, "step": 1775 }, { "epoch": 0.2849921771572993, "grad_norm": 12.018391609191895, "learning_rate": 8.266728040981966e-05, "loss": 32.6484, "step": 1776 }, { "epoch": 0.2851526457255185, "grad_norm": 12.452055931091309, "learning_rate": 8.264787982896272e-05, "loss": 32.6406, "step": 1777 }, { "epoch": 0.28531311429373774, "grad_norm": 12.329203605651855, "learning_rate": 8.262847067590989e-05, "loss": 32.6328, "step": 1778 }, { "epoch": 0.2854735828619569, "grad_norm": 12.072844505310059, "learning_rate": 8.260905295575731e-05, "loss": 32.7031, "step": 1779 }, { "epoch": 0.2856340514301761, "grad_norm": 12.03592300415039, "learning_rate": 8.258962667360345e-05, "loss": 32.7969, "step": 1780 }, { "epoch": 0.28579451999839534, "grad_norm": 12.265746116638184, "learning_rate": 8.257019183454896e-05, "loss": 32.6562, "step": 1781 }, { "epoch": 0.2859549885666145, "grad_norm": 12.074728012084961, "learning_rate": 8.25507484436967e-05, "loss": 32.6484, "step": 1782 }, { "epoch": 0.2861154571348337, "grad_norm": 12.083868980407715, "learning_rate": 8.25312965061519e-05, "loss": 32.6797, "step": 1783 }, { "epoch": 0.28627592570305294, "grad_norm": 12.138958930969238, "learning_rate": 8.251183602702193e-05, "loss": 32.6719, "step": 1784 }, { "epoch": 0.2864363942712721, "grad_norm": 12.514199256896973, "learning_rate": 8.249236701141642e-05, "loss": 32.6172, "step": 1785 }, { "epoch": 0.2865968628394913, "grad_norm": 12.391266822814941, "learning_rate": 8.247288946444729e-05, "loss": 32.7109, "step": 1786 }, { "epoch": 0.28675733140771054, "grad_norm": 12.071511268615723, "learning_rate": 8.245340339122863e-05, "loss": 32.6875, "step": 1787 }, { "epoch": 0.2869177999759297, "grad_norm": 12.206296920776367, "learning_rate": 8.243390879687683e-05, "loss": 32.6172, "step": 1788 }, { "epoch": 0.2870782685441489, "grad_norm": 12.57287311553955, "learning_rate": 8.241440568651047e-05, "loss": 32.6016, "step": 1789 }, { "epoch": 0.28723873711236814, "grad_norm": 12.27324390411377, "learning_rate": 8.23948940652504e-05, "loss": 32.6875, "step": 1790 }, { "epoch": 0.2873992056805873, "grad_norm": 12.134624481201172, "learning_rate": 8.237537393821969e-05, "loss": 32.6719, "step": 1791 }, { "epoch": 0.2875596742488065, "grad_norm": 12.400323867797852, "learning_rate": 8.235584531054363e-05, "loss": 32.5781, "step": 1792 }, { "epoch": 0.28772014281702574, "grad_norm": 12.141136169433594, "learning_rate": 8.233630818734979e-05, "loss": 32.7266, "step": 1793 }, { "epoch": 0.2878806113852449, "grad_norm": 12.140402793884277, "learning_rate": 8.23167625737679e-05, "loss": 32.7344, "step": 1794 }, { "epoch": 0.2880410799534641, "grad_norm": 12.522016525268555, "learning_rate": 8.229720847493002e-05, "loss": 32.6094, "step": 1795 }, { "epoch": 0.28820154852168334, "grad_norm": 12.38587474822998, "learning_rate": 8.227764589597029e-05, "loss": 32.5859, "step": 1796 }, { "epoch": 0.2883620170899025, "grad_norm": 12.284767150878906, "learning_rate": 8.225807484202521e-05, "loss": 32.6719, "step": 1797 }, { "epoch": 0.2885224856581217, "grad_norm": 12.212782859802246, "learning_rate": 8.223849531823347e-05, "loss": 32.7109, "step": 1798 }, { "epoch": 0.28868295422634094, "grad_norm": 12.401185989379883, "learning_rate": 8.221890732973594e-05, "loss": 32.6797, "step": 1799 }, { "epoch": 0.2888434227945601, "grad_norm": 11.970154762268066, "learning_rate": 8.219931088167575e-05, "loss": 32.9922, "step": 1800 }, { "epoch": 0.2890038913627793, "grad_norm": 12.023969650268555, "learning_rate": 8.217970597919827e-05, "loss": 32.9297, "step": 1801 }, { "epoch": 0.28916435993099854, "grad_norm": 12.309053421020508, "learning_rate": 8.216009262745101e-05, "loss": 32.7812, "step": 1802 }, { "epoch": 0.2893248284992177, "grad_norm": 12.181326866149902, "learning_rate": 8.214047083158381e-05, "loss": 32.8672, "step": 1803 }, { "epoch": 0.2894852970674369, "grad_norm": 12.23659896850586, "learning_rate": 8.212084059674865e-05, "loss": 32.7109, "step": 1804 }, { "epoch": 0.28964576563565614, "grad_norm": 12.590693473815918, "learning_rate": 8.210120192809974e-05, "loss": 32.7734, "step": 1805 }, { "epoch": 0.2898062342038753, "grad_norm": 12.153250694274902, "learning_rate": 8.20815548307935e-05, "loss": 32.7188, "step": 1806 }, { "epoch": 0.2899667027720945, "grad_norm": 12.274160385131836, "learning_rate": 8.20618993099886e-05, "loss": 32.7188, "step": 1807 }, { "epoch": 0.29012717134031374, "grad_norm": 12.14792251586914, "learning_rate": 8.204223537084587e-05, "loss": 32.7344, "step": 1808 }, { "epoch": 0.2902876399085329, "grad_norm": 12.21224594116211, "learning_rate": 8.20225630185284e-05, "loss": 32.7969, "step": 1809 }, { "epoch": 0.2904481084767521, "grad_norm": 12.660111427307129, "learning_rate": 8.200288225820145e-05, "loss": 32.7266, "step": 1810 }, { "epoch": 0.29060857704497134, "grad_norm": 12.535223007202148, "learning_rate": 8.19831930950325e-05, "loss": 32.7734, "step": 1811 }, { "epoch": 0.2907690456131905, "grad_norm": 12.219816207885742, "learning_rate": 8.196349553419124e-05, "loss": 32.7578, "step": 1812 }, { "epoch": 0.2909295141814097, "grad_norm": 12.401179313659668, "learning_rate": 8.194378958084957e-05, "loss": 32.7422, "step": 1813 }, { "epoch": 0.29108998274962894, "grad_norm": 11.984326362609863, "learning_rate": 8.192407524018158e-05, "loss": 32.75, "step": 1814 }, { "epoch": 0.2912504513178481, "grad_norm": 12.079487800598145, "learning_rate": 8.190435251736356e-05, "loss": 32.6953, "step": 1815 }, { "epoch": 0.2914109198860673, "grad_norm": 12.01998233795166, "learning_rate": 8.188462141757403e-05, "loss": 32.7031, "step": 1816 }, { "epoch": 0.29157138845428654, "grad_norm": 12.298757553100586, "learning_rate": 8.186488194599368e-05, "loss": 32.6406, "step": 1817 }, { "epoch": 0.2917318570225057, "grad_norm": 12.134869575500488, "learning_rate": 8.18451341078054e-05, "loss": 32.7344, "step": 1818 }, { "epoch": 0.2918923255907249, "grad_norm": 12.339306831359863, "learning_rate": 8.182537790819432e-05, "loss": 32.6328, "step": 1819 }, { "epoch": 0.29205279415894414, "grad_norm": 12.07043170928955, "learning_rate": 8.180561335234766e-05, "loss": 32.6719, "step": 1820 }, { "epoch": 0.2922132627271633, "grad_norm": 12.158679008483887, "learning_rate": 8.178584044545496e-05, "loss": 32.6172, "step": 1821 }, { "epoch": 0.2923737312953825, "grad_norm": 12.077170372009277, "learning_rate": 8.17660591927079e-05, "loss": 32.6484, "step": 1822 }, { "epoch": 0.29253419986360174, "grad_norm": 12.205626487731934, "learning_rate": 8.174626959930032e-05, "loss": 32.6484, "step": 1823 }, { "epoch": 0.2926946684318209, "grad_norm": 12.260476112365723, "learning_rate": 8.172647167042828e-05, "loss": 32.6016, "step": 1824 }, { "epoch": 0.2928551370000401, "grad_norm": 12.137556076049805, "learning_rate": 8.170666541129005e-05, "loss": 32.6406, "step": 1825 }, { "epoch": 0.29301560556825934, "grad_norm": 12.141395568847656, "learning_rate": 8.168685082708603e-05, "loss": 32.6484, "step": 1826 }, { "epoch": 0.2931760741364785, "grad_norm": 12.144025802612305, "learning_rate": 8.166702792301888e-05, "loss": 32.6328, "step": 1827 }, { "epoch": 0.2933365427046977, "grad_norm": 12.537698745727539, "learning_rate": 8.164719670429336e-05, "loss": 32.625, "step": 1828 }, { "epoch": 0.29349701127291694, "grad_norm": 11.95993423461914, "learning_rate": 8.162735717611648e-05, "loss": 32.7344, "step": 1829 }, { "epoch": 0.2936574798411361, "grad_norm": 12.385944366455078, "learning_rate": 8.160750934369742e-05, "loss": 32.5547, "step": 1830 }, { "epoch": 0.2938179484093553, "grad_norm": 12.194622039794922, "learning_rate": 8.15876532122475e-05, "loss": 32.6484, "step": 1831 }, { "epoch": 0.29397841697757454, "grad_norm": 12.259896278381348, "learning_rate": 8.156778878698029e-05, "loss": 32.6562, "step": 1832 }, { "epoch": 0.2941388855457937, "grad_norm": 12.204856872558594, "learning_rate": 8.154791607311145e-05, "loss": 32.6797, "step": 1833 }, { "epoch": 0.2942993541140129, "grad_norm": 12.07161808013916, "learning_rate": 8.15280350758589e-05, "loss": 32.6328, "step": 1834 }, { "epoch": 0.29445982268223214, "grad_norm": 12.468602180480957, "learning_rate": 8.150814580044268e-05, "loss": 32.6094, "step": 1835 }, { "epoch": 0.2946202912504513, "grad_norm": 12.027383804321289, "learning_rate": 8.1488248252085e-05, "loss": 32.7734, "step": 1836 }, { "epoch": 0.2947807598186705, "grad_norm": 12.39437198638916, "learning_rate": 8.146834243601032e-05, "loss": 32.6172, "step": 1837 }, { "epoch": 0.29494122838688974, "grad_norm": 12.136211395263672, "learning_rate": 8.144842835744515e-05, "loss": 32.6641, "step": 1838 }, { "epoch": 0.2951016969551089, "grad_norm": 12.009175300598145, "learning_rate": 8.142850602161826e-05, "loss": 32.7188, "step": 1839 }, { "epoch": 0.2952621655233281, "grad_norm": 11.95237922668457, "learning_rate": 8.140857543376058e-05, "loss": 32.6875, "step": 1840 }, { "epoch": 0.29542263409154734, "grad_norm": 11.944247245788574, "learning_rate": 8.138863659910516e-05, "loss": 32.6797, "step": 1841 }, { "epoch": 0.2955831026597665, "grad_norm": 12.386866569519043, "learning_rate": 8.136868952288727e-05, "loss": 32.6328, "step": 1842 }, { "epoch": 0.2957435712279857, "grad_norm": 12.13771915435791, "learning_rate": 8.13487342103443e-05, "loss": 32.8281, "step": 1843 }, { "epoch": 0.29590403979620494, "grad_norm": 12.708600997924805, "learning_rate": 8.13287706667158e-05, "loss": 32.6406, "step": 1844 }, { "epoch": 0.2960645083644241, "grad_norm": 12.137191772460938, "learning_rate": 8.130879889724354e-05, "loss": 32.7344, "step": 1845 }, { "epoch": 0.2962249769326433, "grad_norm": 12.45838737487793, "learning_rate": 8.128881890717139e-05, "loss": 32.6328, "step": 1846 }, { "epoch": 0.29638544550086254, "grad_norm": 12.207077980041504, "learning_rate": 8.126883070174541e-05, "loss": 32.7656, "step": 1847 }, { "epoch": 0.2965459140690817, "grad_norm": 12.211467742919922, "learning_rate": 8.124883428621381e-05, "loss": 32.8594, "step": 1848 }, { "epoch": 0.2967063826373009, "grad_norm": 12.216129302978516, "learning_rate": 8.122882966582693e-05, "loss": 32.8047, "step": 1849 }, { "epoch": 0.29686685120552014, "grad_norm": 12.2948637008667, "learning_rate": 8.12088168458373e-05, "loss": 33.1094, "step": 1850 }, { "epoch": 0.2970273197737393, "grad_norm": 12.29720401763916, "learning_rate": 8.118879583149956e-05, "loss": 32.8594, "step": 1851 }, { "epoch": 0.2971877883419585, "grad_norm": 12.479796409606934, "learning_rate": 8.116876662807059e-05, "loss": 32.7656, "step": 1852 }, { "epoch": 0.29734825691017774, "grad_norm": 12.109203338623047, "learning_rate": 8.114872924080932e-05, "loss": 32.875, "step": 1853 }, { "epoch": 0.2975087254783969, "grad_norm": 11.957758903503418, "learning_rate": 8.112868367497689e-05, "loss": 32.7656, "step": 1854 }, { "epoch": 0.2976691940466161, "grad_norm": 12.082207679748535, "learning_rate": 8.110862993583655e-05, "loss": 32.7344, "step": 1855 }, { "epoch": 0.29782966261483534, "grad_norm": 12.028236389160156, "learning_rate": 8.108856802865372e-05, "loss": 32.6953, "step": 1856 }, { "epoch": 0.2979901311830545, "grad_norm": 12.022505760192871, "learning_rate": 8.106849795869596e-05, "loss": 32.7578, "step": 1857 }, { "epoch": 0.2981505997512737, "grad_norm": 12.156734466552734, "learning_rate": 8.104841973123297e-05, "loss": 32.7578, "step": 1858 }, { "epoch": 0.29831106831949294, "grad_norm": 12.724309921264648, "learning_rate": 8.102833335153657e-05, "loss": 32.6641, "step": 1859 }, { "epoch": 0.2984715368877121, "grad_norm": 12.214303016662598, "learning_rate": 8.10082388248808e-05, "loss": 32.7266, "step": 1860 }, { "epoch": 0.2986320054559313, "grad_norm": 12.090171813964844, "learning_rate": 8.098813615654172e-05, "loss": 32.7578, "step": 1861 }, { "epoch": 0.29879247402415055, "grad_norm": 12.221662521362305, "learning_rate": 8.096802535179763e-05, "loss": 32.7891, "step": 1862 }, { "epoch": 0.2989529425923697, "grad_norm": 12.14253044128418, "learning_rate": 8.094790641592892e-05, "loss": 32.6875, "step": 1863 }, { "epoch": 0.2991134111605889, "grad_norm": 12.34546184539795, "learning_rate": 8.092777935421812e-05, "loss": 32.7578, "step": 1864 }, { "epoch": 0.29927387972880815, "grad_norm": 12.136091232299805, "learning_rate": 8.090764417194988e-05, "loss": 32.6875, "step": 1865 }, { "epoch": 0.2994343482970273, "grad_norm": 11.954840660095215, "learning_rate": 8.088750087441101e-05, "loss": 32.7578, "step": 1866 }, { "epoch": 0.2995948168652465, "grad_norm": 12.32511043548584, "learning_rate": 8.086734946689045e-05, "loss": 32.7656, "step": 1867 }, { "epoch": 0.29975528543346575, "grad_norm": 12.362078666687012, "learning_rate": 8.084718995467923e-05, "loss": 32.7109, "step": 1868 }, { "epoch": 0.2999157540016849, "grad_norm": 12.47738265991211, "learning_rate": 8.082702234307058e-05, "loss": 32.6094, "step": 1869 }, { "epoch": 0.3000762225699041, "grad_norm": 12.148173332214355, "learning_rate": 8.080684663735976e-05, "loss": 32.7422, "step": 1870 }, { "epoch": 0.30023669113812335, "grad_norm": 12.217541694641113, "learning_rate": 8.078666284284425e-05, "loss": 32.6406, "step": 1871 }, { "epoch": 0.3003971597063425, "grad_norm": 12.009260177612305, "learning_rate": 8.076647096482357e-05, "loss": 32.7031, "step": 1872 }, { "epoch": 0.30055762827456173, "grad_norm": 11.956409454345703, "learning_rate": 8.074627100859943e-05, "loss": 32.7266, "step": 1873 }, { "epoch": 0.30071809684278095, "grad_norm": 12.071775436401367, "learning_rate": 8.072606297947565e-05, "loss": 32.6953, "step": 1874 }, { "epoch": 0.3008785654110001, "grad_norm": 12.081216812133789, "learning_rate": 8.07058468827581e-05, "loss": 32.6875, "step": 1875 }, { "epoch": 0.30103903397921933, "grad_norm": 12.20622444152832, "learning_rate": 8.068562272375489e-05, "loss": 32.6562, "step": 1876 }, { "epoch": 0.30119950254743855, "grad_norm": 12.262903213500977, "learning_rate": 8.066539050777613e-05, "loss": 32.7031, "step": 1877 }, { "epoch": 0.3013599711156577, "grad_norm": 12.022239685058594, "learning_rate": 8.064515024013412e-05, "loss": 32.7656, "step": 1878 }, { "epoch": 0.30152043968387693, "grad_norm": 12.020618438720703, "learning_rate": 8.062490192614325e-05, "loss": 32.6875, "step": 1879 }, { "epoch": 0.30168090825209615, "grad_norm": 12.213376998901367, "learning_rate": 8.060464557112e-05, "loss": 32.6641, "step": 1880 }, { "epoch": 0.3018413768203153, "grad_norm": 12.407100677490234, "learning_rate": 8.0584381180383e-05, "loss": 32.6484, "step": 1881 }, { "epoch": 0.30200184538853453, "grad_norm": 12.143608093261719, "learning_rate": 8.056410875925297e-05, "loss": 32.7266, "step": 1882 }, { "epoch": 0.30216231395675375, "grad_norm": 12.489471435546875, "learning_rate": 8.054382831305274e-05, "loss": 32.6484, "step": 1883 }, { "epoch": 0.3023227825249729, "grad_norm": 12.070215225219727, "learning_rate": 8.052353984710727e-05, "loss": 32.6328, "step": 1884 }, { "epoch": 0.30248325109319213, "grad_norm": 12.136857032775879, "learning_rate": 8.050324336674356e-05, "loss": 32.6797, "step": 1885 }, { "epoch": 0.30264371966141135, "grad_norm": 12.528462409973145, "learning_rate": 8.048293887729079e-05, "loss": 32.6719, "step": 1886 }, { "epoch": 0.3028041882296305, "grad_norm": 12.294614791870117, "learning_rate": 8.046262638408021e-05, "loss": 32.7266, "step": 1887 }, { "epoch": 0.30296465679784973, "grad_norm": 12.07193374633789, "learning_rate": 8.044230589244516e-05, "loss": 32.625, "step": 1888 }, { "epoch": 0.30312512536606895, "grad_norm": 12.201951026916504, "learning_rate": 8.04219774077211e-05, "loss": 32.6562, "step": 1889 }, { "epoch": 0.3032855939342881, "grad_norm": 12.147361755371094, "learning_rate": 8.040164093524561e-05, "loss": 32.6875, "step": 1890 }, { "epoch": 0.30344606250250733, "grad_norm": 11.961504936218262, "learning_rate": 8.038129648035828e-05, "loss": 32.7188, "step": 1891 }, { "epoch": 0.30360653107072655, "grad_norm": 12.332993507385254, "learning_rate": 8.03609440484009e-05, "loss": 32.75, "step": 1892 }, { "epoch": 0.3037669996389457, "grad_norm": 12.519119262695312, "learning_rate": 8.034058364471729e-05, "loss": 32.5703, "step": 1893 }, { "epoch": 0.30392746820716493, "grad_norm": 12.346817970275879, "learning_rate": 8.03202152746534e-05, "loss": 32.6719, "step": 1894 }, { "epoch": 0.30408793677538415, "grad_norm": 12.071335792541504, "learning_rate": 8.029983894355724e-05, "loss": 32.7656, "step": 1895 }, { "epoch": 0.3042484053436033, "grad_norm": 12.386723518371582, "learning_rate": 8.027945465677894e-05, "loss": 32.5703, "step": 1896 }, { "epoch": 0.30440887391182253, "grad_norm": 11.888148307800293, "learning_rate": 8.025906241967067e-05, "loss": 32.7422, "step": 1897 }, { "epoch": 0.30456934248004175, "grad_norm": 11.95980167388916, "learning_rate": 8.023866223758676e-05, "loss": 33.0078, "step": 1898 }, { "epoch": 0.3047298110482609, "grad_norm": 12.147912979125977, "learning_rate": 8.021825411588355e-05, "loss": 32.8047, "step": 1899 }, { "epoch": 0.30489027961648013, "grad_norm": 12.148622512817383, "learning_rate": 8.019783805991953e-05, "loss": 32.7891, "step": 1900 }, { "epoch": 0.30505074818469935, "grad_norm": 12.113358497619629, "learning_rate": 8.017741407505525e-05, "loss": 32.9531, "step": 1901 }, { "epoch": 0.3052112167529185, "grad_norm": 12.288043975830078, "learning_rate": 8.015698216665333e-05, "loss": 32.8281, "step": 1902 }, { "epoch": 0.30537168532113773, "grad_norm": 12.363632202148438, "learning_rate": 8.013654234007845e-05, "loss": 32.7891, "step": 1903 }, { "epoch": 0.30553215388935695, "grad_norm": 12.29064655303955, "learning_rate": 8.011609460069745e-05, "loss": 32.8672, "step": 1904 }, { "epoch": 0.3056926224575761, "grad_norm": 12.492523193359375, "learning_rate": 8.009563895387913e-05, "loss": 32.7734, "step": 1905 }, { "epoch": 0.30585309102579533, "grad_norm": 11.949992179870605, "learning_rate": 8.007517540499448e-05, "loss": 32.7422, "step": 1906 }, { "epoch": 0.3060135595940145, "grad_norm": 12.266684532165527, "learning_rate": 8.005470395941648e-05, "loss": 32.8125, "step": 1907 }, { "epoch": 0.3061740281622337, "grad_norm": 12.480427742004395, "learning_rate": 8.003422462252025e-05, "loss": 32.7188, "step": 1908 }, { "epoch": 0.30633449673045293, "grad_norm": 12.020862579345703, "learning_rate": 8.001373739968294e-05, "loss": 32.7344, "step": 1909 }, { "epoch": 0.3064949652986721, "grad_norm": 12.153523445129395, "learning_rate": 7.999324229628375e-05, "loss": 32.75, "step": 1910 }, { "epoch": 0.3066554338668913, "grad_norm": 12.079179763793945, "learning_rate": 7.997273931770403e-05, "loss": 32.7266, "step": 1911 }, { "epoch": 0.30681590243511053, "grad_norm": 12.014328002929688, "learning_rate": 7.995222846932713e-05, "loss": 32.7812, "step": 1912 }, { "epoch": 0.3069763710033297, "grad_norm": 12.26723861694336, "learning_rate": 7.993170975653847e-05, "loss": 32.7266, "step": 1913 }, { "epoch": 0.3071368395715489, "grad_norm": 12.2319974899292, "learning_rate": 7.991118318472557e-05, "loss": 32.6172, "step": 1914 }, { "epoch": 0.30729730813976813, "grad_norm": 12.140921592712402, "learning_rate": 7.989064875927797e-05, "loss": 32.6797, "step": 1915 }, { "epoch": 0.3074577767079873, "grad_norm": 11.955341339111328, "learning_rate": 7.987010648558731e-05, "loss": 32.7266, "step": 1916 }, { "epoch": 0.3076182452762065, "grad_norm": 12.209967613220215, "learning_rate": 7.984955636904726e-05, "loss": 32.6953, "step": 1917 }, { "epoch": 0.30777871384442573, "grad_norm": 12.21218490600586, "learning_rate": 7.982899841505359e-05, "loss": 32.7188, "step": 1918 }, { "epoch": 0.3079391824126449, "grad_norm": 12.214058876037598, "learning_rate": 7.980843262900409e-05, "loss": 32.6406, "step": 1919 }, { "epoch": 0.3080996509808641, "grad_norm": 12.392106056213379, "learning_rate": 7.978785901629862e-05, "loss": 32.7031, "step": 1920 }, { "epoch": 0.30826011954908333, "grad_norm": 12.475071907043457, "learning_rate": 7.976727758233908e-05, "loss": 32.6328, "step": 1921 }, { "epoch": 0.3084205881173025, "grad_norm": 12.022991180419922, "learning_rate": 7.974668833252945e-05, "loss": 32.6328, "step": 1922 }, { "epoch": 0.3085810566855217, "grad_norm": 12.203106880187988, "learning_rate": 7.972609127227575e-05, "loss": 32.6484, "step": 1923 }, { "epoch": 0.30874152525374093, "grad_norm": 12.213427543640137, "learning_rate": 7.970548640698606e-05, "loss": 32.625, "step": 1924 }, { "epoch": 0.3089019938219601, "grad_norm": 12.266040802001953, "learning_rate": 7.968487374207048e-05, "loss": 32.625, "step": 1925 }, { "epoch": 0.3090624623901793, "grad_norm": 12.338322639465332, "learning_rate": 7.96642532829412e-05, "loss": 32.6328, "step": 1926 }, { "epoch": 0.30922293095839853, "grad_norm": 12.147542953491211, "learning_rate": 7.96436250350124e-05, "loss": 32.6797, "step": 1927 }, { "epoch": 0.3093833995266177, "grad_norm": 11.89585018157959, "learning_rate": 7.962298900370038e-05, "loss": 32.7266, "step": 1928 }, { "epoch": 0.3095438680948369, "grad_norm": 12.091941833496094, "learning_rate": 7.960234519442341e-05, "loss": 32.6484, "step": 1929 }, { "epoch": 0.30970433666305613, "grad_norm": 12.209465980529785, "learning_rate": 7.958169361260187e-05, "loss": 32.5938, "step": 1930 }, { "epoch": 0.3098648052312753, "grad_norm": 12.349709510803223, "learning_rate": 7.95610342636581e-05, "loss": 32.6953, "step": 1931 }, { "epoch": 0.3100252737994945, "grad_norm": 12.471278190612793, "learning_rate": 7.954036715301655e-05, "loss": 32.6719, "step": 1932 }, { "epoch": 0.31018574236771373, "grad_norm": 12.314888954162598, "learning_rate": 7.95196922861037e-05, "loss": 32.7656, "step": 1933 }, { "epoch": 0.3103462109359329, "grad_norm": 12.523877143859863, "learning_rate": 7.949900966834802e-05, "loss": 32.6719, "step": 1934 }, { "epoch": 0.3105066795041521, "grad_norm": 12.266151428222656, "learning_rate": 7.947831930518006e-05, "loss": 32.6406, "step": 1935 }, { "epoch": 0.31066714807237134, "grad_norm": 12.265656471252441, "learning_rate": 7.94576212020324e-05, "loss": 32.6172, "step": 1936 }, { "epoch": 0.3108276166405905, "grad_norm": 11.89580249786377, "learning_rate": 7.943691536433961e-05, "loss": 32.7656, "step": 1937 }, { "epoch": 0.3109880852088097, "grad_norm": 12.458654403686523, "learning_rate": 7.941620179753834e-05, "loss": 32.6641, "step": 1938 }, { "epoch": 0.31114855377702894, "grad_norm": 12.13284683227539, "learning_rate": 7.939548050706726e-05, "loss": 32.6562, "step": 1939 }, { "epoch": 0.3113090223452481, "grad_norm": 12.2626953125, "learning_rate": 7.937475149836706e-05, "loss": 32.6641, "step": 1940 }, { "epoch": 0.3114694909134673, "grad_norm": 12.142154693603516, "learning_rate": 7.935401477688043e-05, "loss": 32.6719, "step": 1941 }, { "epoch": 0.31162995948168654, "grad_norm": 12.353893280029297, "learning_rate": 7.933327034805211e-05, "loss": 32.6797, "step": 1942 }, { "epoch": 0.3117904280499057, "grad_norm": 12.009218215942383, "learning_rate": 7.931251821732891e-05, "loss": 32.6953, "step": 1943 }, { "epoch": 0.3119508966181249, "grad_norm": 12.38861083984375, "learning_rate": 7.929175839015956e-05, "loss": 32.5938, "step": 1944 }, { "epoch": 0.31211136518634414, "grad_norm": 11.953731536865234, "learning_rate": 7.927099087199492e-05, "loss": 32.6875, "step": 1945 }, { "epoch": 0.3122718337545633, "grad_norm": 12.130005836486816, "learning_rate": 7.925021566828777e-05, "loss": 32.6094, "step": 1946 }, { "epoch": 0.3124323023227825, "grad_norm": 12.275365829467773, "learning_rate": 7.922943278449297e-05, "loss": 32.625, "step": 1947 }, { "epoch": 0.31259277089100174, "grad_norm": 12.071160316467285, "learning_rate": 7.92086422260674e-05, "loss": 32.6953, "step": 1948 }, { "epoch": 0.3127532394592209, "grad_norm": 12.02635383605957, "learning_rate": 7.918784399846993e-05, "loss": 32.9453, "step": 1949 }, { "epoch": 0.3129137080274401, "grad_norm": 12.172542572021484, "learning_rate": 7.916703810716144e-05, "loss": 33.0625, "step": 1950 }, { "epoch": 0.31307417659565934, "grad_norm": 12.5894136428833, "learning_rate": 7.914622455760482e-05, "loss": 32.7891, "step": 1951 }, { "epoch": 0.3132346451638785, "grad_norm": 12.66975212097168, "learning_rate": 7.9125403355265e-05, "loss": 32.8125, "step": 1952 }, { "epoch": 0.3133951137320977, "grad_norm": 12.295248031616211, "learning_rate": 7.91045745056089e-05, "loss": 32.7812, "step": 1953 }, { "epoch": 0.31355558230031694, "grad_norm": 12.23631763458252, "learning_rate": 7.908373801410546e-05, "loss": 32.8047, "step": 1954 }, { "epoch": 0.3137160508685361, "grad_norm": 12.024572372436523, "learning_rate": 7.906289388622561e-05, "loss": 32.7734, "step": 1955 }, { "epoch": 0.3138765194367553, "grad_norm": 12.405295372009277, "learning_rate": 7.90420421274423e-05, "loss": 32.75, "step": 1956 }, { "epoch": 0.31403698800497454, "grad_norm": 12.400248527526855, "learning_rate": 7.902118274323043e-05, "loss": 32.9453, "step": 1957 }, { "epoch": 0.3141974565731937, "grad_norm": 12.139070510864258, "learning_rate": 7.900031573906701e-05, "loss": 32.7734, "step": 1958 }, { "epoch": 0.3143579251414129, "grad_norm": 12.208471298217773, "learning_rate": 7.897944112043096e-05, "loss": 32.8047, "step": 1959 }, { "epoch": 0.31451839370963214, "grad_norm": 12.159788131713867, "learning_rate": 7.895855889280321e-05, "loss": 32.7812, "step": 1960 }, { "epoch": 0.3146788622778513, "grad_norm": 12.041024208068848, "learning_rate": 7.893766906166674e-05, "loss": 32.75, "step": 1961 }, { "epoch": 0.3148393308460705, "grad_norm": 12.082608222961426, "learning_rate": 7.891677163250647e-05, "loss": 32.7891, "step": 1962 }, { "epoch": 0.31499979941428974, "grad_norm": 12.016382217407227, "learning_rate": 7.889586661080934e-05, "loss": 32.6875, "step": 1963 }, { "epoch": 0.3151602679825089, "grad_norm": 12.08211898803711, "learning_rate": 7.887495400206428e-05, "loss": 32.7109, "step": 1964 }, { "epoch": 0.3153207365507281, "grad_norm": 12.098043441772461, "learning_rate": 7.885403381176223e-05, "loss": 32.6875, "step": 1965 }, { "epoch": 0.31548120511894734, "grad_norm": 12.266938209533691, "learning_rate": 7.883310604539609e-05, "loss": 32.75, "step": 1966 }, { "epoch": 0.3156416736871665, "grad_norm": 12.333539962768555, "learning_rate": 7.881217070846074e-05, "loss": 32.6953, "step": 1967 }, { "epoch": 0.3158021422553857, "grad_norm": 12.347479820251465, "learning_rate": 7.879122780645312e-05, "loss": 32.6641, "step": 1968 }, { "epoch": 0.31596261082360494, "grad_norm": 12.14940357208252, "learning_rate": 7.877027734487205e-05, "loss": 32.6641, "step": 1969 }, { "epoch": 0.3161230793918241, "grad_norm": 12.655757904052734, "learning_rate": 7.874931932921842e-05, "loss": 32.6094, "step": 1970 }, { "epoch": 0.3162835479600433, "grad_norm": 12.074747085571289, "learning_rate": 7.87283537649951e-05, "loss": 32.6797, "step": 1971 }, { "epoch": 0.31644401652826254, "grad_norm": 12.268176078796387, "learning_rate": 7.870738065770688e-05, "loss": 32.6953, "step": 1972 }, { "epoch": 0.3166044850964817, "grad_norm": 12.079723358154297, "learning_rate": 7.868640001286058e-05, "loss": 32.7344, "step": 1973 }, { "epoch": 0.3167649536647009, "grad_norm": 12.139418601989746, "learning_rate": 7.866541183596497e-05, "loss": 32.6562, "step": 1974 }, { "epoch": 0.31692542223292014, "grad_norm": 12.401458740234375, "learning_rate": 7.864441613253085e-05, "loss": 32.6719, "step": 1975 }, { "epoch": 0.3170858908011393, "grad_norm": 12.140576362609863, "learning_rate": 7.862341290807092e-05, "loss": 32.6641, "step": 1976 }, { "epoch": 0.3172463593693585, "grad_norm": 12.21422004699707, "learning_rate": 7.86024021680999e-05, "loss": 32.6406, "step": 1977 }, { "epoch": 0.31740682793757774, "grad_norm": 12.072349548339844, "learning_rate": 7.858138391813453e-05, "loss": 32.6406, "step": 1978 }, { "epoch": 0.3175672965057969, "grad_norm": 12.778155326843262, "learning_rate": 7.856035816369341e-05, "loss": 32.5625, "step": 1979 }, { "epoch": 0.3177277650740161, "grad_norm": 12.075343132019043, "learning_rate": 7.853932491029721e-05, "loss": 32.6641, "step": 1980 }, { "epoch": 0.31788823364223534, "grad_norm": 12.207056045532227, "learning_rate": 7.85182841634685e-05, "loss": 32.6562, "step": 1981 }, { "epoch": 0.3180487022104545, "grad_norm": 12.204526901245117, "learning_rate": 7.849723592873186e-05, "loss": 32.6328, "step": 1982 }, { "epoch": 0.3182091707786737, "grad_norm": 12.19892692565918, "learning_rate": 7.847618021161382e-05, "loss": 32.6172, "step": 1983 }, { "epoch": 0.31836963934689294, "grad_norm": 12.142447471618652, "learning_rate": 7.84551170176429e-05, "loss": 32.6797, "step": 1984 }, { "epoch": 0.3185301079151121, "grad_norm": 12.392160415649414, "learning_rate": 7.843404635234952e-05, "loss": 32.6562, "step": 1985 }, { "epoch": 0.3186905764833313, "grad_norm": 12.382942199707031, "learning_rate": 7.841296822126614e-05, "loss": 32.5938, "step": 1986 }, { "epoch": 0.31885104505155054, "grad_norm": 12.323341369628906, "learning_rate": 7.839188262992712e-05, "loss": 32.6562, "step": 1987 }, { "epoch": 0.3190115136197697, "grad_norm": 12.08413028717041, "learning_rate": 7.83707895838688e-05, "loss": 32.7656, "step": 1988 }, { "epoch": 0.3191719821879889, "grad_norm": 12.30366039276123, "learning_rate": 7.834968908862949e-05, "loss": 32.6719, "step": 1989 }, { "epoch": 0.31933245075620814, "grad_norm": 12.101174354553223, "learning_rate": 7.832858114974946e-05, "loss": 32.7031, "step": 1990 }, { "epoch": 0.3194929193244273, "grad_norm": 12.340744018554688, "learning_rate": 7.830746577277089e-05, "loss": 32.6484, "step": 1991 }, { "epoch": 0.3196533878926465, "grad_norm": 12.199697494506836, "learning_rate": 7.828634296323796e-05, "loss": 32.6172, "step": 1992 }, { "epoch": 0.31981385646086574, "grad_norm": 12.32124137878418, "learning_rate": 7.826521272669678e-05, "loss": 32.6016, "step": 1993 }, { "epoch": 0.3199743250290849, "grad_norm": 12.3399658203125, "learning_rate": 7.82440750686954e-05, "loss": 32.6016, "step": 1994 }, { "epoch": 0.3201347935973041, "grad_norm": 12.5255765914917, "learning_rate": 7.822292999478387e-05, "loss": 32.6172, "step": 1995 }, { "epoch": 0.32029526216552334, "grad_norm": 12.013086318969727, "learning_rate": 7.820177751051412e-05, "loss": 32.7031, "step": 1996 }, { "epoch": 0.3204557307337425, "grad_norm": 12.16789436340332, "learning_rate": 7.818061762144005e-05, "loss": 32.8906, "step": 1997 }, { "epoch": 0.3206161993019617, "grad_norm": 12.096217155456543, "learning_rate": 7.815945033311753e-05, "loss": 32.7344, "step": 1998 }, { "epoch": 0.32077666787018094, "grad_norm": 12.13414192199707, "learning_rate": 7.813827565110432e-05, "loss": 32.7188, "step": 1999 }, { "epoch": 0.3209371364384001, "grad_norm": 12.184038162231445, "learning_rate": 7.81170935809602e-05, "loss": 33.0625, "step": 2000 }, { "epoch": 0.3210976050066193, "grad_norm": 12.13788890838623, "learning_rate": 7.809590412824682e-05, "loss": 32.9844, "step": 2001 }, { "epoch": 0.32125807357483854, "grad_norm": 12.304027557373047, "learning_rate": 7.80747072985278e-05, "loss": 32.9609, "step": 2002 }, { "epoch": 0.3214185421430577, "grad_norm": 12.174406051635742, "learning_rate": 7.805350309736866e-05, "loss": 32.8906, "step": 2003 }, { "epoch": 0.3215790107112769, "grad_norm": 12.23380184173584, "learning_rate": 7.803229153033691e-05, "loss": 32.6953, "step": 2004 }, { "epoch": 0.32173947927949614, "grad_norm": 12.407094955444336, "learning_rate": 7.801107260300198e-05, "loss": 32.7578, "step": 2005 }, { "epoch": 0.3218999478477153, "grad_norm": 12.500799179077148, "learning_rate": 7.79898463209352e-05, "loss": 32.8594, "step": 2006 }, { "epoch": 0.3220604164159345, "grad_norm": 12.167032241821289, "learning_rate": 7.796861268970985e-05, "loss": 32.6641, "step": 2007 }, { "epoch": 0.32222088498415374, "grad_norm": 12.39539623260498, "learning_rate": 7.794737171490116e-05, "loss": 32.8047, "step": 2008 }, { "epoch": 0.3223813535523729, "grad_norm": 12.146407127380371, "learning_rate": 7.792612340208625e-05, "loss": 32.75, "step": 2009 }, { "epoch": 0.3225418221205921, "grad_norm": 12.092911720275879, "learning_rate": 7.79048677568442e-05, "loss": 32.6562, "step": 2010 }, { "epoch": 0.32270229068881134, "grad_norm": 12.078593254089355, "learning_rate": 7.788360478475601e-05, "loss": 32.7656, "step": 2011 }, { "epoch": 0.3228627592570305, "grad_norm": 12.0133638381958, "learning_rate": 7.786233449140457e-05, "loss": 32.8203, "step": 2012 }, { "epoch": 0.3230232278252497, "grad_norm": 12.035638809204102, "learning_rate": 7.784105688237475e-05, "loss": 32.6875, "step": 2013 }, { "epoch": 0.32318369639346894, "grad_norm": 12.198573112487793, "learning_rate": 7.781977196325327e-05, "loss": 32.6641, "step": 2014 }, { "epoch": 0.3233441649616881, "grad_norm": 12.305093765258789, "learning_rate": 7.779847973962884e-05, "loss": 32.7109, "step": 2015 }, { "epoch": 0.3235046335299073, "grad_norm": 12.226032257080078, "learning_rate": 7.777718021709205e-05, "loss": 32.6953, "step": 2016 }, { "epoch": 0.32366510209812654, "grad_norm": 12.152571678161621, "learning_rate": 7.77558734012354e-05, "loss": 32.6797, "step": 2017 }, { "epoch": 0.3238255706663457, "grad_norm": 12.410238265991211, "learning_rate": 7.773455929765333e-05, "loss": 32.6406, "step": 2018 }, { "epoch": 0.3239860392345649, "grad_norm": 12.074629783630371, "learning_rate": 7.771323791194219e-05, "loss": 32.6484, "step": 2019 }, { "epoch": 0.32414650780278415, "grad_norm": 12.146078109741211, "learning_rate": 7.76919092497002e-05, "loss": 32.6719, "step": 2020 }, { "epoch": 0.3243069763710033, "grad_norm": 12.012941360473633, "learning_rate": 7.767057331652756e-05, "loss": 32.7109, "step": 2021 }, { "epoch": 0.3244674449392225, "grad_norm": 12.150534629821777, "learning_rate": 7.764923011802632e-05, "loss": 32.6172, "step": 2022 }, { "epoch": 0.32462791350744175, "grad_norm": 12.214831352233887, "learning_rate": 7.762787965980045e-05, "loss": 32.5938, "step": 2023 }, { "epoch": 0.3247883820756609, "grad_norm": 12.340642929077148, "learning_rate": 7.760652194745588e-05, "loss": 32.6406, "step": 2024 }, { "epoch": 0.3249488506438801, "grad_norm": 12.1421480178833, "learning_rate": 7.758515698660037e-05, "loss": 32.625, "step": 2025 }, { "epoch": 0.32510931921209935, "grad_norm": 12.335497856140137, "learning_rate": 7.756378478284361e-05, "loss": 32.6172, "step": 2026 }, { "epoch": 0.3252697877803185, "grad_norm": 12.134269714355469, "learning_rate": 7.75424053417972e-05, "loss": 32.6016, "step": 2027 }, { "epoch": 0.3254302563485377, "grad_norm": 12.516704559326172, "learning_rate": 7.752101866907466e-05, "loss": 32.5625, "step": 2028 }, { "epoch": 0.32559072491675695, "grad_norm": 12.139418601989746, "learning_rate": 7.749962477029135e-05, "loss": 32.6562, "step": 2029 }, { "epoch": 0.3257511934849761, "grad_norm": 12.193859100341797, "learning_rate": 7.747822365106458e-05, "loss": 32.6016, "step": 2030 }, { "epoch": 0.32591166205319533, "grad_norm": 12.278441429138184, "learning_rate": 7.745681531701357e-05, "loss": 32.6094, "step": 2031 }, { "epoch": 0.32607213062141455, "grad_norm": 12.26615047454834, "learning_rate": 7.743539977375934e-05, "loss": 32.6797, "step": 2032 }, { "epoch": 0.3262325991896337, "grad_norm": 12.280115127563477, "learning_rate": 7.74139770269249e-05, "loss": 32.6562, "step": 2033 }, { "epoch": 0.32639306775785293, "grad_norm": 12.14997673034668, "learning_rate": 7.739254708213512e-05, "loss": 32.6094, "step": 2034 }, { "epoch": 0.32655353632607215, "grad_norm": 12.087248802185059, "learning_rate": 7.737110994501674e-05, "loss": 32.7109, "step": 2035 }, { "epoch": 0.3267140048942913, "grad_norm": 12.027435302734375, "learning_rate": 7.734966562119843e-05, "loss": 32.75, "step": 2036 }, { "epoch": 0.32687447346251053, "grad_norm": 12.207540512084961, "learning_rate": 7.73282141163107e-05, "loss": 32.6875, "step": 2037 }, { "epoch": 0.32703494203072975, "grad_norm": 12.396987915039062, "learning_rate": 7.730675543598598e-05, "loss": 32.6328, "step": 2038 }, { "epoch": 0.3271954105989489, "grad_norm": 12.004434585571289, "learning_rate": 7.728528958585856e-05, "loss": 32.6406, "step": 2039 }, { "epoch": 0.32735587916716813, "grad_norm": 12.205455780029297, "learning_rate": 7.726381657156463e-05, "loss": 32.6016, "step": 2040 }, { "epoch": 0.32751634773538735, "grad_norm": 12.068554878234863, "learning_rate": 7.724233639874227e-05, "loss": 32.6094, "step": 2041 }, { "epoch": 0.3276768163036065, "grad_norm": 12.399735450744629, "learning_rate": 7.72208490730314e-05, "loss": 32.5781, "step": 2042 }, { "epoch": 0.32783728487182573, "grad_norm": 12.139638900756836, "learning_rate": 7.719935460007388e-05, "loss": 32.6484, "step": 2043 }, { "epoch": 0.32799775344004495, "grad_norm": 12.132695198059082, "learning_rate": 7.717785298551337e-05, "loss": 32.6641, "step": 2044 }, { "epoch": 0.3281582220082641, "grad_norm": 12.322469711303711, "learning_rate": 7.715634423499545e-05, "loss": 32.5547, "step": 2045 }, { "epoch": 0.32831869057648333, "grad_norm": 12.545904159545898, "learning_rate": 7.713482835416759e-05, "loss": 32.6641, "step": 2046 }, { "epoch": 0.32847915914470255, "grad_norm": 12.02036190032959, "learning_rate": 7.711330534867909e-05, "loss": 32.8359, "step": 2047 }, { "epoch": 0.3286396277129217, "grad_norm": 12.149779319763184, "learning_rate": 7.709177522418117e-05, "loss": 32.8203, "step": 2048 }, { "epoch": 0.32880009628114093, "grad_norm": 12.096420288085938, "learning_rate": 7.707023798632686e-05, "loss": 32.9766, "step": 2049 }, { "epoch": 0.32896056484936015, "grad_norm": 12.109066009521484, "learning_rate": 7.704869364077111e-05, "loss": 33.0781, "step": 2050 }, { "epoch": 0.3291210334175793, "grad_norm": 12.16655445098877, "learning_rate": 7.702714219317068e-05, "loss": 32.7969, "step": 2051 }, { "epoch": 0.32928150198579853, "grad_norm": 12.415210723876953, "learning_rate": 7.700558364918429e-05, "loss": 32.7734, "step": 2052 }, { "epoch": 0.32944197055401775, "grad_norm": 12.435466766357422, "learning_rate": 7.698401801447241e-05, "loss": 32.8828, "step": 2053 }, { "epoch": 0.3296024391222369, "grad_norm": 12.419699668884277, "learning_rate": 7.696244529469742e-05, "loss": 32.7031, "step": 2054 }, { "epoch": 0.32976290769045613, "grad_norm": 12.350029945373535, "learning_rate": 7.694086549552364e-05, "loss": 32.6875, "step": 2055 }, { "epoch": 0.32992337625867535, "grad_norm": 12.15404224395752, "learning_rate": 7.691927862261707e-05, "loss": 32.7812, "step": 2056 }, { "epoch": 0.3300838448268945, "grad_norm": 12.276366233825684, "learning_rate": 7.689768468164575e-05, "loss": 32.7656, "step": 2057 }, { "epoch": 0.33024431339511373, "grad_norm": 12.017070770263672, "learning_rate": 7.687608367827945e-05, "loss": 32.7109, "step": 2058 }, { "epoch": 0.33040478196333295, "grad_norm": 12.074450492858887, "learning_rate": 7.685447561818986e-05, "loss": 32.7422, "step": 2059 }, { "epoch": 0.3305652505315521, "grad_norm": 12.267860412597656, "learning_rate": 7.683286050705052e-05, "loss": 32.6953, "step": 2060 }, { "epoch": 0.33072571909977133, "grad_norm": 12.22042179107666, "learning_rate": 7.681123835053676e-05, "loss": 32.6562, "step": 2061 }, { "epoch": 0.33088618766799055, "grad_norm": 11.950933456420898, "learning_rate": 7.678960915432584e-05, "loss": 32.7109, "step": 2062 }, { "epoch": 0.3310466562362097, "grad_norm": 12.015518188476562, "learning_rate": 7.676797292409684e-05, "loss": 32.7812, "step": 2063 }, { "epoch": 0.33120712480442893, "grad_norm": 12.099163055419922, "learning_rate": 7.674632966553066e-05, "loss": 32.7734, "step": 2064 }, { "epoch": 0.33136759337264815, "grad_norm": 12.078598976135254, "learning_rate": 7.672467938431006e-05, "loss": 32.7188, "step": 2065 }, { "epoch": 0.3315280619408673, "grad_norm": 12.265091896057129, "learning_rate": 7.670302208611966e-05, "loss": 32.6484, "step": 2066 }, { "epoch": 0.33168853050908653, "grad_norm": 12.022340774536133, "learning_rate": 7.668135777664594e-05, "loss": 32.7344, "step": 2067 }, { "epoch": 0.33184899907730575, "grad_norm": 12.2685546875, "learning_rate": 7.665968646157716e-05, "loss": 32.5859, "step": 2068 }, { "epoch": 0.3320094676455249, "grad_norm": 12.012083053588867, "learning_rate": 7.663800814660343e-05, "loss": 32.7812, "step": 2069 }, { "epoch": 0.33216993621374413, "grad_norm": 12.209261894226074, "learning_rate": 7.661632283741678e-05, "loss": 32.7188, "step": 2070 }, { "epoch": 0.33233040478196335, "grad_norm": 12.076820373535156, "learning_rate": 7.659463053971096e-05, "loss": 32.7344, "step": 2071 }, { "epoch": 0.3324908733501825, "grad_norm": 12.28513240814209, "learning_rate": 7.657293125918165e-05, "loss": 32.625, "step": 2072 }, { "epoch": 0.33265134191840173, "grad_norm": 12.211836814880371, "learning_rate": 7.655122500152631e-05, "loss": 32.5703, "step": 2073 }, { "epoch": 0.33281181048662095, "grad_norm": 12.012642860412598, "learning_rate": 7.652951177244425e-05, "loss": 32.6953, "step": 2074 }, { "epoch": 0.3329722790548401, "grad_norm": 12.089885711669922, "learning_rate": 7.650779157763661e-05, "loss": 32.8125, "step": 2075 }, { "epoch": 0.33313274762305933, "grad_norm": 12.155937194824219, "learning_rate": 7.648606442280635e-05, "loss": 32.75, "step": 2076 }, { "epoch": 0.33329321619127855, "grad_norm": 12.389664649963379, "learning_rate": 7.646433031365827e-05, "loss": 32.6172, "step": 2077 }, { "epoch": 0.3334536847594977, "grad_norm": 12.262886047363281, "learning_rate": 7.6442589255899e-05, "loss": 32.5859, "step": 2078 }, { "epoch": 0.33361415332771693, "grad_norm": 12.401394844055176, "learning_rate": 7.642084125523696e-05, "loss": 32.625, "step": 2079 }, { "epoch": 0.33377462189593615, "grad_norm": 12.079198837280273, "learning_rate": 7.639908631738241e-05, "loss": 32.6484, "step": 2080 }, { "epoch": 0.3339350904641553, "grad_norm": 11.949688911437988, "learning_rate": 7.637732444804748e-05, "loss": 32.7109, "step": 2081 }, { "epoch": 0.33409555903237453, "grad_norm": 12.324986457824707, "learning_rate": 7.635555565294606e-05, "loss": 32.6016, "step": 2082 }, { "epoch": 0.33425602760059375, "grad_norm": 12.385835647583008, "learning_rate": 7.633377993779386e-05, "loss": 32.5234, "step": 2083 }, { "epoch": 0.3344164961688129, "grad_norm": 12.261664390563965, "learning_rate": 7.631199730830844e-05, "loss": 32.6016, "step": 2084 }, { "epoch": 0.33457696473703213, "grad_norm": 12.207337379455566, "learning_rate": 7.629020777020916e-05, "loss": 32.6016, "step": 2085 }, { "epoch": 0.33473743330525135, "grad_norm": 12.642672538757324, "learning_rate": 7.626841132921721e-05, "loss": 32.6016, "step": 2086 }, { "epoch": 0.3348979018734705, "grad_norm": 12.134981155395508, "learning_rate": 7.624660799105555e-05, "loss": 32.625, "step": 2087 }, { "epoch": 0.33505837044168973, "grad_norm": 12.266752243041992, "learning_rate": 7.622479776144895e-05, "loss": 32.6328, "step": 2088 }, { "epoch": 0.33521883900990895, "grad_norm": 12.257709503173828, "learning_rate": 7.620298064612408e-05, "loss": 32.6406, "step": 2089 }, { "epoch": 0.3353793075781281, "grad_norm": 12.196718215942383, "learning_rate": 7.618115665080934e-05, "loss": 32.625, "step": 2090 }, { "epoch": 0.33553977614634733, "grad_norm": 12.011063575744629, "learning_rate": 7.61593257812349e-05, "loss": 32.6484, "step": 2091 }, { "epoch": 0.33570024471456655, "grad_norm": 12.015287399291992, "learning_rate": 7.613748804313284e-05, "loss": 32.6953, "step": 2092 }, { "epoch": 0.3358607132827857, "grad_norm": 12.211474418640137, "learning_rate": 7.611564344223698e-05, "loss": 32.6641, "step": 2093 }, { "epoch": 0.33602118185100494, "grad_norm": 12.464223861694336, "learning_rate": 7.609379198428293e-05, "loss": 32.6328, "step": 2094 }, { "epoch": 0.33618165041922415, "grad_norm": 12.27436351776123, "learning_rate": 7.607193367500813e-05, "loss": 32.6406, "step": 2095 }, { "epoch": 0.3363421189874433, "grad_norm": 12.392304420471191, "learning_rate": 7.605006852015184e-05, "loss": 32.5938, "step": 2096 }, { "epoch": 0.33650258755566254, "grad_norm": 12.5142183303833, "learning_rate": 7.602819652545503e-05, "loss": 32.6094, "step": 2097 }, { "epoch": 0.33666305612388175, "grad_norm": 12.083832740783691, "learning_rate": 7.600631769666058e-05, "loss": 32.8203, "step": 2098 }, { "epoch": 0.3368235246921009, "grad_norm": 12.180719375610352, "learning_rate": 7.598443203951309e-05, "loss": 32.9531, "step": 2099 }, { "epoch": 0.33698399326032014, "grad_norm": 12.317481994628906, "learning_rate": 7.596253955975895e-05, "loss": 33.0391, "step": 2100 }, { "epoch": 0.33714446182853935, "grad_norm": 12.109567642211914, "learning_rate": 7.594064026314638e-05, "loss": 33.0, "step": 2101 }, { "epoch": 0.3373049303967585, "grad_norm": 12.038192749023438, "learning_rate": 7.591873415542538e-05, "loss": 32.7891, "step": 2102 }, { "epoch": 0.33746539896497774, "grad_norm": 12.03904914855957, "learning_rate": 7.589682124234772e-05, "loss": 32.9922, "step": 2103 }, { "epoch": 0.33762586753319695, "grad_norm": 12.292123794555664, "learning_rate": 7.587490152966694e-05, "loss": 32.7266, "step": 2104 }, { "epoch": 0.3377863361014161, "grad_norm": 12.102675437927246, "learning_rate": 7.585297502313846e-05, "loss": 32.8438, "step": 2105 }, { "epoch": 0.33794680466963534, "grad_norm": 12.339762687683105, "learning_rate": 7.583104172851936e-05, "loss": 32.7344, "step": 2106 }, { "epoch": 0.33810727323785456, "grad_norm": 12.21908187866211, "learning_rate": 7.580910165156858e-05, "loss": 32.6875, "step": 2107 }, { "epoch": 0.3382677418060737, "grad_norm": 12.22819995880127, "learning_rate": 7.578715479804683e-05, "loss": 32.7578, "step": 2108 }, { "epoch": 0.33842821037429294, "grad_norm": 12.104158401489258, "learning_rate": 7.576520117371656e-05, "loss": 32.6797, "step": 2109 }, { "epoch": 0.33858867894251216, "grad_norm": 12.277253150939941, "learning_rate": 7.574324078434206e-05, "loss": 32.7266, "step": 2110 }, { "epoch": 0.3387491475107313, "grad_norm": 12.088164329528809, "learning_rate": 7.572127363568933e-05, "loss": 32.7891, "step": 2111 }, { "epoch": 0.33890961607895054, "grad_norm": 11.953847885131836, "learning_rate": 7.56992997335262e-05, "loss": 32.6328, "step": 2112 }, { "epoch": 0.33907008464716976, "grad_norm": 12.07366943359375, "learning_rate": 7.567731908362225e-05, "loss": 32.7578, "step": 2113 }, { "epoch": 0.3392305532153889, "grad_norm": 12.410852432250977, "learning_rate": 7.565533169174885e-05, "loss": 32.6172, "step": 2114 }, { "epoch": 0.33939102178360814, "grad_norm": 12.26280403137207, "learning_rate": 7.563333756367908e-05, "loss": 32.6406, "step": 2115 }, { "epoch": 0.33955149035182736, "grad_norm": 12.334229469299316, "learning_rate": 7.561133670518785e-05, "loss": 32.6719, "step": 2116 }, { "epoch": 0.3397119589200465, "grad_norm": 12.023713111877441, "learning_rate": 7.558932912205183e-05, "loss": 32.6797, "step": 2117 }, { "epoch": 0.33987242748826574, "grad_norm": 12.396377563476562, "learning_rate": 7.556731482004944e-05, "loss": 32.6406, "step": 2118 }, { "epoch": 0.34003289605648496, "grad_norm": 12.279824256896973, "learning_rate": 7.554529380496087e-05, "loss": 32.625, "step": 2119 }, { "epoch": 0.3401933646247041, "grad_norm": 12.1394681930542, "learning_rate": 7.552326608256806e-05, "loss": 32.6016, "step": 2120 }, { "epoch": 0.34035383319292334, "grad_norm": 12.133904457092285, "learning_rate": 7.550123165865472e-05, "loss": 32.6484, "step": 2121 }, { "epoch": 0.34051430176114256, "grad_norm": 12.20091438293457, "learning_rate": 7.547919053900637e-05, "loss": 32.6328, "step": 2122 }, { "epoch": 0.3406747703293617, "grad_norm": 12.023364067077637, "learning_rate": 7.545714272941017e-05, "loss": 32.7109, "step": 2123 }, { "epoch": 0.34083523889758094, "grad_norm": 12.170853614807129, "learning_rate": 7.543508823565515e-05, "loss": 32.6719, "step": 2124 }, { "epoch": 0.34099570746580016, "grad_norm": 12.040548324584961, "learning_rate": 7.541302706353204e-05, "loss": 32.6406, "step": 2125 }, { "epoch": 0.3411561760340193, "grad_norm": 12.263276100158691, "learning_rate": 7.539095921883335e-05, "loss": 32.6875, "step": 2126 }, { "epoch": 0.34131664460223854, "grad_norm": 12.182713508605957, "learning_rate": 7.536888470735329e-05, "loss": 32.6406, "step": 2127 }, { "epoch": 0.34147711317045776, "grad_norm": 12.401986122131348, "learning_rate": 7.534680353488787e-05, "loss": 32.6172, "step": 2128 }, { "epoch": 0.3416375817386769, "grad_norm": 12.196983337402344, "learning_rate": 7.532471570723489e-05, "loss": 32.5938, "step": 2129 }, { "epoch": 0.34179805030689614, "grad_norm": 12.263548851013184, "learning_rate": 7.530262123019378e-05, "loss": 32.5938, "step": 2130 }, { "epoch": 0.34195851887511536, "grad_norm": 12.198760032653809, "learning_rate": 7.52805201095658e-05, "loss": 32.5469, "step": 2131 }, { "epoch": 0.3421189874433345, "grad_norm": 12.588075637817383, "learning_rate": 7.525841235115394e-05, "loss": 32.5938, "step": 2132 }, { "epoch": 0.34227945601155374, "grad_norm": 11.948966026306152, "learning_rate": 7.523629796076294e-05, "loss": 32.6328, "step": 2133 }, { "epoch": 0.34243992457977296, "grad_norm": 12.260299682617188, "learning_rate": 7.521417694419923e-05, "loss": 32.6172, "step": 2134 }, { "epoch": 0.3426003931479921, "grad_norm": 12.198942184448242, "learning_rate": 7.519204930727104e-05, "loss": 32.6562, "step": 2135 }, { "epoch": 0.34276086171621134, "grad_norm": 12.389754295349121, "learning_rate": 7.516991505578834e-05, "loss": 32.6172, "step": 2136 }, { "epoch": 0.34292133028443056, "grad_norm": 12.074220657348633, "learning_rate": 7.514777419556278e-05, "loss": 32.6406, "step": 2137 }, { "epoch": 0.3430817988526497, "grad_norm": 12.329785346984863, "learning_rate": 7.512562673240778e-05, "loss": 32.5703, "step": 2138 }, { "epoch": 0.34324226742086894, "grad_norm": 12.01423454284668, "learning_rate": 7.51034726721385e-05, "loss": 32.7109, "step": 2139 }, { "epoch": 0.34340273598908816, "grad_norm": 12.444132804870605, "learning_rate": 7.508131202057185e-05, "loss": 32.5469, "step": 2140 }, { "epoch": 0.3435632045573073, "grad_norm": 12.262584686279297, "learning_rate": 7.505914478352641e-05, "loss": 32.5625, "step": 2141 }, { "epoch": 0.34372367312552654, "grad_norm": 12.391871452331543, "learning_rate": 7.50369709668225e-05, "loss": 32.6016, "step": 2142 }, { "epoch": 0.34388414169374576, "grad_norm": 12.400235176086426, "learning_rate": 7.501479057628227e-05, "loss": 32.6719, "step": 2143 }, { "epoch": 0.3440446102619649, "grad_norm": 12.455357551574707, "learning_rate": 7.499260361772947e-05, "loss": 32.6562, "step": 2144 }, { "epoch": 0.34420507883018414, "grad_norm": 12.198214530944824, "learning_rate": 7.49704100969896e-05, "loss": 32.625, "step": 2145 }, { "epoch": 0.34436554739840336, "grad_norm": 12.15054702758789, "learning_rate": 7.494821001988996e-05, "loss": 32.7422, "step": 2146 }, { "epoch": 0.3445260159666225, "grad_norm": 12.054112434387207, "learning_rate": 7.492600339225948e-05, "loss": 32.9141, "step": 2147 }, { "epoch": 0.34468648453484174, "grad_norm": 12.426119804382324, "learning_rate": 7.490379021992885e-05, "loss": 32.6641, "step": 2148 }, { "epoch": 0.34484695310306096, "grad_norm": 12.226665496826172, "learning_rate": 7.48815705087305e-05, "loss": 32.7656, "step": 2149 }, { "epoch": 0.3450074216712801, "grad_norm": 12.191387176513672, "learning_rate": 7.485934426449852e-05, "loss": 33.0078, "step": 2150 }, { "epoch": 0.34516789023949934, "grad_norm": 12.278726577758789, "learning_rate": 7.483711149306875e-05, "loss": 33.2266, "step": 2151 }, { "epoch": 0.34532835880771856, "grad_norm": 12.061503410339355, "learning_rate": 7.481487220027878e-05, "loss": 33.0469, "step": 2152 }, { "epoch": 0.3454888273759377, "grad_norm": 12.026934623718262, "learning_rate": 7.479262639196782e-05, "loss": 32.9531, "step": 2153 }, { "epoch": 0.34564929594415694, "grad_norm": 11.892868995666504, "learning_rate": 7.477037407397689e-05, "loss": 32.7578, "step": 2154 }, { "epoch": 0.34580976451237616, "grad_norm": 11.900732040405273, "learning_rate": 7.474811525214864e-05, "loss": 32.8203, "step": 2155 }, { "epoch": 0.3459702330805953, "grad_norm": 12.340561866760254, "learning_rate": 7.472584993232748e-05, "loss": 32.7656, "step": 2156 }, { "epoch": 0.34613070164881454, "grad_norm": 12.168110847473145, "learning_rate": 7.47035781203595e-05, "loss": 32.7109, "step": 2157 }, { "epoch": 0.34629117021703376, "grad_norm": 12.224162101745605, "learning_rate": 7.46812998220925e-05, "loss": 32.7422, "step": 2158 }, { "epoch": 0.3464516387852529, "grad_norm": 12.236382484436035, "learning_rate": 7.4659015043376e-05, "loss": 32.6094, "step": 2159 }, { "epoch": 0.34661210735347214, "grad_norm": 12.097586631774902, "learning_rate": 7.463672379006119e-05, "loss": 32.6875, "step": 2160 }, { "epoch": 0.34677257592169136, "grad_norm": 12.2130126953125, "learning_rate": 7.461442606800098e-05, "loss": 32.7969, "step": 2161 }, { "epoch": 0.3469330444899105, "grad_norm": 12.167923927307129, "learning_rate": 7.459212188304998e-05, "loss": 32.6484, "step": 2162 }, { "epoch": 0.34709351305812974, "grad_norm": 12.389789581298828, "learning_rate": 7.456981124106449e-05, "loss": 32.6953, "step": 2163 }, { "epoch": 0.34725398162634896, "grad_norm": 12.283058166503906, "learning_rate": 7.45474941479025e-05, "loss": 32.6953, "step": 2164 }, { "epoch": 0.3474144501945681, "grad_norm": 12.077667236328125, "learning_rate": 7.45251706094237e-05, "loss": 32.7266, "step": 2165 }, { "epoch": 0.34757491876278734, "grad_norm": 12.079412460327148, "learning_rate": 7.450284063148951e-05, "loss": 32.7109, "step": 2166 }, { "epoch": 0.34773538733100656, "grad_norm": 12.273990631103516, "learning_rate": 7.448050421996296e-05, "loss": 32.7891, "step": 2167 }, { "epoch": 0.3478958558992257, "grad_norm": 11.903678894042969, "learning_rate": 7.445816138070883e-05, "loss": 32.7578, "step": 2168 }, { "epoch": 0.34805632446744494, "grad_norm": 12.153571128845215, "learning_rate": 7.443581211959357e-05, "loss": 32.6406, "step": 2169 }, { "epoch": 0.34821679303566416, "grad_norm": 12.029052734375, "learning_rate": 7.441345644248532e-05, "loss": 32.8438, "step": 2170 }, { "epoch": 0.3483772616038833, "grad_norm": 12.403664588928223, "learning_rate": 7.439109435525389e-05, "loss": 32.5625, "step": 2171 }, { "epoch": 0.34853773017210254, "grad_norm": 12.072144508361816, "learning_rate": 7.43687258637708e-05, "loss": 32.5859, "step": 2172 }, { "epoch": 0.34869819874032176, "grad_norm": 12.14384651184082, "learning_rate": 7.434635097390925e-05, "loss": 32.7031, "step": 2173 }, { "epoch": 0.3488586673085409, "grad_norm": 12.325647354125977, "learning_rate": 7.432396969154406e-05, "loss": 32.625, "step": 2174 }, { "epoch": 0.34901913587676014, "grad_norm": 12.292840003967285, "learning_rate": 7.430158202255181e-05, "loss": 32.7188, "step": 2175 }, { "epoch": 0.34917960444497936, "grad_norm": 12.353443145751953, "learning_rate": 7.427918797281071e-05, "loss": 32.8203, "step": 2176 }, { "epoch": 0.3493400730131985, "grad_norm": 12.138967514038086, "learning_rate": 7.425678754820067e-05, "loss": 32.625, "step": 2177 }, { "epoch": 0.34950054158141775, "grad_norm": 12.149334907531738, "learning_rate": 7.423438075460326e-05, "loss": 32.6406, "step": 2178 }, { "epoch": 0.34966101014963696, "grad_norm": 12.46255111694336, "learning_rate": 7.421196759790172e-05, "loss": 32.5703, "step": 2179 }, { "epoch": 0.3498214787178561, "grad_norm": 12.396422386169434, "learning_rate": 7.418954808398095e-05, "loss": 32.5625, "step": 2180 }, { "epoch": 0.34998194728607535, "grad_norm": 12.162446022033691, "learning_rate": 7.416712221872755e-05, "loss": 32.625, "step": 2181 }, { "epoch": 0.35014241585429456, "grad_norm": 12.066083908081055, "learning_rate": 7.414469000802974e-05, "loss": 32.5938, "step": 2182 }, { "epoch": 0.3503028844225137, "grad_norm": 12.258280754089355, "learning_rate": 7.412225145777749e-05, "loss": 32.5703, "step": 2183 }, { "epoch": 0.35046335299073295, "grad_norm": 12.44849681854248, "learning_rate": 7.409980657386237e-05, "loss": 32.5391, "step": 2184 }, { "epoch": 0.35062382155895216, "grad_norm": 12.092304229736328, "learning_rate": 7.407735536217757e-05, "loss": 32.6406, "step": 2185 }, { "epoch": 0.3507842901271713, "grad_norm": 12.073243141174316, "learning_rate": 7.405489782861806e-05, "loss": 32.6797, "step": 2186 }, { "epoch": 0.35094475869539055, "grad_norm": 12.283534049987793, "learning_rate": 7.403243397908037e-05, "loss": 32.6328, "step": 2187 }, { "epoch": 0.35110522726360976, "grad_norm": 12.394241333007812, "learning_rate": 7.400996381946275e-05, "loss": 32.6016, "step": 2188 }, { "epoch": 0.35126569583182893, "grad_norm": 12.134099960327148, "learning_rate": 7.398748735566503e-05, "loss": 32.6562, "step": 2189 }, { "epoch": 0.35142616440004815, "grad_norm": 12.330605506896973, "learning_rate": 7.396500459358882e-05, "loss": 32.5938, "step": 2190 }, { "epoch": 0.35158663296826737, "grad_norm": 12.319780349731445, "learning_rate": 7.394251553913726e-05, "loss": 32.5703, "step": 2191 }, { "epoch": 0.35174710153648653, "grad_norm": 12.138465881347656, "learning_rate": 7.39200201982152e-05, "loss": 32.6016, "step": 2192 }, { "epoch": 0.35190757010470575, "grad_norm": 12.133731842041016, "learning_rate": 7.389751857672913e-05, "loss": 32.5859, "step": 2193 }, { "epoch": 0.35206803867292497, "grad_norm": 12.141603469848633, "learning_rate": 7.387501068058723e-05, "loss": 32.7031, "step": 2194 }, { "epoch": 0.35222850724114413, "grad_norm": 12.459044456481934, "learning_rate": 7.385249651569923e-05, "loss": 32.7031, "step": 2195 }, { "epoch": 0.35238897580936335, "grad_norm": 12.209897994995117, "learning_rate": 7.38299760879766e-05, "loss": 32.6172, "step": 2196 }, { "epoch": 0.35254944437758257, "grad_norm": 12.158629417419434, "learning_rate": 7.380744940333241e-05, "loss": 32.6406, "step": 2197 }, { "epoch": 0.35270991294580173, "grad_norm": 12.457180976867676, "learning_rate": 7.378491646768139e-05, "loss": 32.6016, "step": 2198 }, { "epoch": 0.35287038151402095, "grad_norm": 12.293951988220215, "learning_rate": 7.37623772869399e-05, "loss": 32.6953, "step": 2199 }, { "epoch": 0.35303085008224017, "grad_norm": 12.086517333984375, "learning_rate": 7.373983186702595e-05, "loss": 32.8281, "step": 2200 }, { "epoch": 0.35319131865045933, "grad_norm": 12.204671859741211, "learning_rate": 7.371728021385918e-05, "loss": 32.9688, "step": 2201 }, { "epoch": 0.35335178721867855, "grad_norm": 12.455398559570312, "learning_rate": 7.369472233336087e-05, "loss": 32.8203, "step": 2202 }, { "epoch": 0.35351225578689777, "grad_norm": 12.155475616455078, "learning_rate": 7.367215823145391e-05, "loss": 32.7578, "step": 2203 }, { "epoch": 0.35367272435511693, "grad_norm": 12.029694557189941, "learning_rate": 7.364958791406288e-05, "loss": 32.7578, "step": 2204 }, { "epoch": 0.35383319292333615, "grad_norm": 12.355827331542969, "learning_rate": 7.362701138711393e-05, "loss": 32.7344, "step": 2205 }, { "epoch": 0.35399366149155537, "grad_norm": 12.020431518554688, "learning_rate": 7.360442865653489e-05, "loss": 32.7734, "step": 2206 }, { "epoch": 0.35415413005977453, "grad_norm": 12.08755874633789, "learning_rate": 7.358183972825519e-05, "loss": 32.7188, "step": 2207 }, { "epoch": 0.35431459862799375, "grad_norm": 12.07912826538086, "learning_rate": 7.355924460820591e-05, "loss": 32.7578, "step": 2208 }, { "epoch": 0.35447506719621297, "grad_norm": 12.273898124694824, "learning_rate": 7.353664330231972e-05, "loss": 32.8281, "step": 2209 }, { "epoch": 0.35463553576443213, "grad_norm": 12.13957691192627, "learning_rate": 7.351403581653095e-05, "loss": 32.6562, "step": 2210 }, { "epoch": 0.35479600433265135, "grad_norm": 11.961817741394043, "learning_rate": 7.349142215677553e-05, "loss": 32.6484, "step": 2211 }, { "epoch": 0.35495647290087057, "grad_norm": 12.417837142944336, "learning_rate": 7.3468802328991e-05, "loss": 32.5703, "step": 2212 }, { "epoch": 0.35511694146908973, "grad_norm": 12.402538299560547, "learning_rate": 7.34461763391166e-05, "loss": 32.6484, "step": 2213 }, { "epoch": 0.35527741003730895, "grad_norm": 12.270025253295898, "learning_rate": 7.342354419309307e-05, "loss": 32.6562, "step": 2214 }, { "epoch": 0.35543787860552817, "grad_norm": 11.899141311645508, "learning_rate": 7.340090589686284e-05, "loss": 32.7969, "step": 2215 }, { "epoch": 0.35559834717374733, "grad_norm": 12.276846885681152, "learning_rate": 7.337826145636993e-05, "loss": 32.7344, "step": 2216 }, { "epoch": 0.35575881574196655, "grad_norm": 12.26407527923584, "learning_rate": 7.335561087756e-05, "loss": 32.6797, "step": 2217 }, { "epoch": 0.35591928431018577, "grad_norm": 12.195710182189941, "learning_rate": 7.33329541663803e-05, "loss": 32.6797, "step": 2218 }, { "epoch": 0.35607975287840493, "grad_norm": 12.397942543029785, "learning_rate": 7.331029132877966e-05, "loss": 32.5547, "step": 2219 }, { "epoch": 0.35624022144662415, "grad_norm": 12.390276908874512, "learning_rate": 7.32876223707086e-05, "loss": 32.5703, "step": 2220 }, { "epoch": 0.35640069001484337, "grad_norm": 12.540118217468262, "learning_rate": 7.326494729811919e-05, "loss": 32.6094, "step": 2221 }, { "epoch": 0.35656115858306253, "grad_norm": 12.07880687713623, "learning_rate": 7.324226611696508e-05, "loss": 32.6953, "step": 2222 }, { "epoch": 0.35672162715128175, "grad_norm": 11.96279239654541, "learning_rate": 7.321957883320156e-05, "loss": 32.7266, "step": 2223 }, { "epoch": 0.35688209571950097, "grad_norm": 12.013376235961914, "learning_rate": 7.319688545278556e-05, "loss": 32.7578, "step": 2224 }, { "epoch": 0.35704256428772013, "grad_norm": 12.533300399780273, "learning_rate": 7.317418598167556e-05, "loss": 32.5703, "step": 2225 }, { "epoch": 0.35720303285593935, "grad_norm": 12.201725959777832, "learning_rate": 7.315148042583164e-05, "loss": 32.6953, "step": 2226 }, { "epoch": 0.35736350142415857, "grad_norm": 12.139558792114258, "learning_rate": 7.312876879121546e-05, "loss": 32.6953, "step": 2227 }, { "epoch": 0.35752396999237773, "grad_norm": 12.142987251281738, "learning_rate": 7.310605108379036e-05, "loss": 32.6875, "step": 2228 }, { "epoch": 0.35768443856059695, "grad_norm": 11.961786270141602, "learning_rate": 7.308332730952117e-05, "loss": 32.7344, "step": 2229 }, { "epoch": 0.35784490712881617, "grad_norm": 12.206927299499512, "learning_rate": 7.306059747437439e-05, "loss": 32.6875, "step": 2230 }, { "epoch": 0.35800537569703533, "grad_norm": 12.073750495910645, "learning_rate": 7.303786158431809e-05, "loss": 32.7188, "step": 2231 }, { "epoch": 0.35816584426525455, "grad_norm": 12.405261039733887, "learning_rate": 7.30151196453219e-05, "loss": 32.5625, "step": 2232 }, { "epoch": 0.35832631283347377, "grad_norm": 12.21024227142334, "learning_rate": 7.299237166335707e-05, "loss": 32.625, "step": 2233 }, { "epoch": 0.35848678140169293, "grad_norm": 12.010218620300293, "learning_rate": 7.29696176443964e-05, "loss": 32.6328, "step": 2234 }, { "epoch": 0.35864724996991215, "grad_norm": 12.138545036315918, "learning_rate": 7.294685759441436e-05, "loss": 32.6562, "step": 2235 }, { "epoch": 0.35880771853813137, "grad_norm": 12.138992309570312, "learning_rate": 7.29240915193869e-05, "loss": 32.6328, "step": 2236 }, { "epoch": 0.35896818710635053, "grad_norm": 11.958240509033203, "learning_rate": 7.290131942529161e-05, "loss": 32.8281, "step": 2237 }, { "epoch": 0.35912865567456975, "grad_norm": 12.591509819030762, "learning_rate": 7.287854131810768e-05, "loss": 32.6094, "step": 2238 }, { "epoch": 0.35928912424278897, "grad_norm": 12.38581371307373, "learning_rate": 7.285575720381578e-05, "loss": 32.5547, "step": 2239 }, { "epoch": 0.35944959281100813, "grad_norm": 12.06935977935791, "learning_rate": 7.283296708839829e-05, "loss": 32.5859, "step": 2240 }, { "epoch": 0.35961006137922735, "grad_norm": 12.399974822998047, "learning_rate": 7.281017097783908e-05, "loss": 32.5469, "step": 2241 }, { "epoch": 0.35977052994744657, "grad_norm": 12.202542304992676, "learning_rate": 7.27873688781236e-05, "loss": 32.6562, "step": 2242 }, { "epoch": 0.35993099851566573, "grad_norm": 12.83583927154541, "learning_rate": 7.276456079523888e-05, "loss": 32.5234, "step": 2243 }, { "epoch": 0.36009146708388495, "grad_norm": 12.457746505737305, "learning_rate": 7.274174673517357e-05, "loss": 32.6328, "step": 2244 }, { "epoch": 0.36025193565210417, "grad_norm": 12.383074760437012, "learning_rate": 7.271892670391782e-05, "loss": 32.5312, "step": 2245 }, { "epoch": 0.36041240422032333, "grad_norm": 12.990309715270996, "learning_rate": 7.269610070746336e-05, "loss": 32.6953, "step": 2246 }, { "epoch": 0.36057287278854255, "grad_norm": 12.404083251953125, "learning_rate": 7.267326875180351e-05, "loss": 32.7031, "step": 2247 }, { "epoch": 0.36073334135676177, "grad_norm": 12.270939826965332, "learning_rate": 7.265043084293318e-05, "loss": 32.6328, "step": 2248 }, { "epoch": 0.36089380992498094, "grad_norm": 12.314179420471191, "learning_rate": 7.262758698684877e-05, "loss": 32.7734, "step": 2249 }, { "epoch": 0.36105427849320015, "grad_norm": 12.538858413696289, "learning_rate": 7.26047371895483e-05, "loss": 33.1484, "step": 2250 }, { "epoch": 0.3612147470614193, "grad_norm": 12.23190975189209, "learning_rate": 7.258188145703134e-05, "loss": 32.7734, "step": 2251 }, { "epoch": 0.36137521562963854, "grad_norm": 12.114513397216797, "learning_rate": 7.255901979529895e-05, "loss": 32.7969, "step": 2252 }, { "epoch": 0.36153568419785775, "grad_norm": 12.230073928833008, "learning_rate": 7.253615221035388e-05, "loss": 32.7578, "step": 2253 }, { "epoch": 0.3616961527660769, "grad_norm": 12.099953651428223, "learning_rate": 7.251327870820033e-05, "loss": 32.6875, "step": 2254 }, { "epoch": 0.36185662133429614, "grad_norm": 12.292953491210938, "learning_rate": 7.249039929484409e-05, "loss": 32.7578, "step": 2255 }, { "epoch": 0.36201708990251535, "grad_norm": 12.609991073608398, "learning_rate": 7.246751397629248e-05, "loss": 32.6406, "step": 2256 }, { "epoch": 0.3621775584707345, "grad_norm": 12.221269607543945, "learning_rate": 7.244462275855441e-05, "loss": 32.7266, "step": 2257 }, { "epoch": 0.36233802703895374, "grad_norm": 11.957390785217285, "learning_rate": 7.242172564764031e-05, "loss": 32.8359, "step": 2258 }, { "epoch": 0.36249849560717295, "grad_norm": 12.262577056884766, "learning_rate": 7.239882264956215e-05, "loss": 32.7266, "step": 2259 }, { "epoch": 0.3626589641753921, "grad_norm": 12.284759521484375, "learning_rate": 7.237591377033349e-05, "loss": 32.875, "step": 2260 }, { "epoch": 0.36281943274361134, "grad_norm": 12.013185501098633, "learning_rate": 7.235299901596937e-05, "loss": 32.6562, "step": 2261 }, { "epoch": 0.36297990131183056, "grad_norm": 12.204174041748047, "learning_rate": 7.23300783924864e-05, "loss": 32.7031, "step": 2262 }, { "epoch": 0.3631403698800497, "grad_norm": 11.887612342834473, "learning_rate": 7.230715190590278e-05, "loss": 32.7578, "step": 2263 }, { "epoch": 0.36330083844826894, "grad_norm": 12.282400131225586, "learning_rate": 7.228421956223818e-05, "loss": 32.6641, "step": 2264 }, { "epoch": 0.36346130701648816, "grad_norm": 12.260375022888184, "learning_rate": 7.226128136751385e-05, "loss": 32.7188, "step": 2265 }, { "epoch": 0.3636217755847073, "grad_norm": 12.080647468566895, "learning_rate": 7.223833732775252e-05, "loss": 32.6953, "step": 2266 }, { "epoch": 0.36378224415292654, "grad_norm": 12.334369659423828, "learning_rate": 7.221538744897855e-05, "loss": 32.5859, "step": 2267 }, { "epoch": 0.36394271272114576, "grad_norm": 12.261180877685547, "learning_rate": 7.219243173721774e-05, "loss": 32.6797, "step": 2268 }, { "epoch": 0.3641031812893649, "grad_norm": 12.197396278381348, "learning_rate": 7.216947019849748e-05, "loss": 32.6641, "step": 2269 }, { "epoch": 0.36426364985758414, "grad_norm": 12.076576232910156, "learning_rate": 7.214650283884666e-05, "loss": 32.6484, "step": 2270 }, { "epoch": 0.36442411842580336, "grad_norm": 12.518392562866211, "learning_rate": 7.212352966429571e-05, "loss": 32.6328, "step": 2271 }, { "epoch": 0.3645845869940225, "grad_norm": 12.258557319641113, "learning_rate": 7.21005506808766e-05, "loss": 32.6094, "step": 2272 }, { "epoch": 0.36474505556224174, "grad_norm": 12.144733428955078, "learning_rate": 7.20775658946228e-05, "loss": 32.7422, "step": 2273 }, { "epoch": 0.36490552413046096, "grad_norm": 12.262289047241211, "learning_rate": 7.20545753115693e-05, "loss": 32.6406, "step": 2274 }, { "epoch": 0.3650659926986801, "grad_norm": 12.011289596557617, "learning_rate": 7.203157893775263e-05, "loss": 32.6953, "step": 2275 }, { "epoch": 0.36522646126689934, "grad_norm": 12.273407936096191, "learning_rate": 7.200857677921085e-05, "loss": 32.5938, "step": 2276 }, { "epoch": 0.36538692983511856, "grad_norm": 12.07402515411377, "learning_rate": 7.198556884198354e-05, "loss": 32.6016, "step": 2277 }, { "epoch": 0.3655473984033377, "grad_norm": 12.211715698242188, "learning_rate": 7.196255513211175e-05, "loss": 32.6562, "step": 2278 }, { "epoch": 0.36570786697155694, "grad_norm": 12.391084671020508, "learning_rate": 7.193953565563808e-05, "loss": 32.5156, "step": 2279 }, { "epoch": 0.36586833553977616, "grad_norm": 12.201627731323242, "learning_rate": 7.191651041860667e-05, "loss": 32.6797, "step": 2280 }, { "epoch": 0.3660288041079953, "grad_norm": 12.141018867492676, "learning_rate": 7.189347942706311e-05, "loss": 32.6641, "step": 2281 }, { "epoch": 0.36618927267621454, "grad_norm": 12.266423225402832, "learning_rate": 7.187044268705459e-05, "loss": 32.6016, "step": 2282 }, { "epoch": 0.36634974124443376, "grad_norm": 12.211565017700195, "learning_rate": 7.184740020462968e-05, "loss": 32.6641, "step": 2283 }, { "epoch": 0.3665102098126529, "grad_norm": 12.211867332458496, "learning_rate": 7.18243519858386e-05, "loss": 32.6641, "step": 2284 }, { "epoch": 0.36667067838087214, "grad_norm": 12.536839485168457, "learning_rate": 7.180129803673299e-05, "loss": 32.6562, "step": 2285 }, { "epoch": 0.36683114694909136, "grad_norm": 12.25590991973877, "learning_rate": 7.1778238363366e-05, "loss": 32.5781, "step": 2286 }, { "epoch": 0.3669916155173105, "grad_norm": 12.449060440063477, "learning_rate": 7.175517297179228e-05, "loss": 32.6016, "step": 2287 }, { "epoch": 0.36715208408552974, "grad_norm": 12.332308769226074, "learning_rate": 7.173210186806804e-05, "loss": 32.6719, "step": 2288 }, { "epoch": 0.36731255265374896, "grad_norm": 12.133978843688965, "learning_rate": 7.170902505825094e-05, "loss": 32.6641, "step": 2289 }, { "epoch": 0.3674730212219681, "grad_norm": 12.25345516204834, "learning_rate": 7.168594254840012e-05, "loss": 32.6094, "step": 2290 }, { "epoch": 0.36763348979018734, "grad_norm": 12.132880210876465, "learning_rate": 7.166285434457628e-05, "loss": 32.5547, "step": 2291 }, { "epoch": 0.36779395835840656, "grad_norm": 12.200227737426758, "learning_rate": 7.163976045284155e-05, "loss": 32.6719, "step": 2292 }, { "epoch": 0.3679544269266257, "grad_norm": 12.077431678771973, "learning_rate": 7.161666087925957e-05, "loss": 32.6094, "step": 2293 }, { "epoch": 0.36811489549484494, "grad_norm": 12.203001022338867, "learning_rate": 7.159355562989553e-05, "loss": 32.5859, "step": 2294 }, { "epoch": 0.36827536406306416, "grad_norm": 12.27511978149414, "learning_rate": 7.157044471081603e-05, "loss": 32.6172, "step": 2295 }, { "epoch": 0.3684358326312833, "grad_norm": 12.32475757598877, "learning_rate": 7.154732812808919e-05, "loss": 32.6797, "step": 2296 }, { "epoch": 0.36859630119950254, "grad_norm": 12.262060165405273, "learning_rate": 7.152420588778463e-05, "loss": 32.6172, "step": 2297 }, { "epoch": 0.36875676976772176, "grad_norm": 12.072861671447754, "learning_rate": 7.150107799597345e-05, "loss": 32.7344, "step": 2298 }, { "epoch": 0.3689172383359409, "grad_norm": 12.15463638305664, "learning_rate": 7.147794445872822e-05, "loss": 32.6953, "step": 2299 }, { "epoch": 0.36907770690416014, "grad_norm": 12.186306953430176, "learning_rate": 7.145480528212298e-05, "loss": 33.0938, "step": 2300 }, { "epoch": 0.36923817547237936, "grad_norm": 12.71121883392334, "learning_rate": 7.143166047223332e-05, "loss": 32.9297, "step": 2301 }, { "epoch": 0.3693986440405985, "grad_norm": 12.161323547363281, "learning_rate": 7.140851003513623e-05, "loss": 32.8125, "step": 2302 }, { "epoch": 0.36955911260881774, "grad_norm": 12.716721534729004, "learning_rate": 7.138535397691021e-05, "loss": 32.7891, "step": 2303 }, { "epoch": 0.36971958117703696, "grad_norm": 12.474403381347656, "learning_rate": 7.136219230363523e-05, "loss": 32.7812, "step": 2304 }, { "epoch": 0.3698800497452561, "grad_norm": 12.017943382263184, "learning_rate": 7.133902502139276e-05, "loss": 32.75, "step": 2305 }, { "epoch": 0.37004051831347534, "grad_norm": 12.400033950805664, "learning_rate": 7.131585213626569e-05, "loss": 32.6797, "step": 2306 }, { "epoch": 0.37020098688169456, "grad_norm": 11.947123527526855, "learning_rate": 7.129267365433844e-05, "loss": 32.6797, "step": 2307 }, { "epoch": 0.3703614554499137, "grad_norm": 12.07521915435791, "learning_rate": 7.126948958169686e-05, "loss": 32.7422, "step": 2308 }, { "epoch": 0.37052192401813294, "grad_norm": 12.402665138244629, "learning_rate": 7.124629992442829e-05, "loss": 32.7109, "step": 2309 }, { "epoch": 0.37068239258635216, "grad_norm": 12.13227653503418, "learning_rate": 7.12231046886215e-05, "loss": 32.7266, "step": 2310 }, { "epoch": 0.3708428611545713, "grad_norm": 12.962347030639648, "learning_rate": 7.119990388036677e-05, "loss": 32.6406, "step": 2311 }, { "epoch": 0.37100332972279054, "grad_norm": 12.212499618530273, "learning_rate": 7.117669750575584e-05, "loss": 32.6406, "step": 2312 }, { "epoch": 0.37116379829100976, "grad_norm": 12.079351425170898, "learning_rate": 7.115348557088184e-05, "loss": 32.625, "step": 2313 }, { "epoch": 0.3713242668592289, "grad_norm": 11.95525074005127, "learning_rate": 7.11302680818395e-05, "loss": 32.7109, "step": 2314 }, { "epoch": 0.37148473542744814, "grad_norm": 12.240345001220703, "learning_rate": 7.110704504472485e-05, "loss": 32.6406, "step": 2315 }, { "epoch": 0.37164520399566736, "grad_norm": 12.340970993041992, "learning_rate": 7.108381646563548e-05, "loss": 32.6016, "step": 2316 }, { "epoch": 0.3718056725638865, "grad_norm": 11.883662223815918, "learning_rate": 7.106058235067041e-05, "loss": 32.75, "step": 2317 }, { "epoch": 0.37196614113210574, "grad_norm": 12.244269371032715, "learning_rate": 7.10373427059301e-05, "loss": 32.5859, "step": 2318 }, { "epoch": 0.37212660970032496, "grad_norm": 12.209083557128906, "learning_rate": 7.101409753751647e-05, "loss": 32.625, "step": 2319 }, { "epoch": 0.3722870782685441, "grad_norm": 12.202171325683594, "learning_rate": 7.09908468515329e-05, "loss": 32.6094, "step": 2320 }, { "epoch": 0.37244754683676334, "grad_norm": 12.396146774291992, "learning_rate": 7.09675906540842e-05, "loss": 32.6016, "step": 2321 }, { "epoch": 0.37260801540498256, "grad_norm": 12.132735252380371, "learning_rate": 7.094432895127664e-05, "loss": 32.6406, "step": 2322 }, { "epoch": 0.3727684839732017, "grad_norm": 12.029590606689453, "learning_rate": 7.092106174921793e-05, "loss": 32.7188, "step": 2323 }, { "epoch": 0.37292895254142094, "grad_norm": 12.40823745727539, "learning_rate": 7.089778905401724e-05, "loss": 32.6406, "step": 2324 }, { "epoch": 0.37308942110964016, "grad_norm": 12.452139854431152, "learning_rate": 7.087451087178516e-05, "loss": 32.5547, "step": 2325 }, { "epoch": 0.3732498896778593, "grad_norm": 12.506433486938477, "learning_rate": 7.085122720863373e-05, "loss": 32.5312, "step": 2326 }, { "epoch": 0.37341035824607854, "grad_norm": 11.948781967163086, "learning_rate": 7.082793807067643e-05, "loss": 32.6562, "step": 2327 }, { "epoch": 0.37357082681429776, "grad_norm": 12.135283470153809, "learning_rate": 7.080464346402818e-05, "loss": 32.6406, "step": 2328 }, { "epoch": 0.3737312953825169, "grad_norm": 12.079876899719238, "learning_rate": 7.07813433948053e-05, "loss": 32.7578, "step": 2329 }, { "epoch": 0.37389176395073614, "grad_norm": 12.268832206726074, "learning_rate": 7.075803786912563e-05, "loss": 32.6797, "step": 2330 }, { "epoch": 0.37405223251895536, "grad_norm": 12.213279724121094, "learning_rate": 7.073472689310836e-05, "loss": 32.6094, "step": 2331 }, { "epoch": 0.3742127010871745, "grad_norm": 12.266838073730469, "learning_rate": 7.071141047287415e-05, "loss": 32.6094, "step": 2332 }, { "epoch": 0.37437316965539374, "grad_norm": 12.648015975952148, "learning_rate": 7.068808861454506e-05, "loss": 32.5938, "step": 2333 }, { "epoch": 0.37453363822361296, "grad_norm": 12.07181453704834, "learning_rate": 7.066476132424463e-05, "loss": 32.6875, "step": 2334 }, { "epoch": 0.3746941067918321, "grad_norm": 12.201128959655762, "learning_rate": 7.064142860809775e-05, "loss": 32.6719, "step": 2335 }, { "epoch": 0.37485457536005135, "grad_norm": 12.1343994140625, "learning_rate": 7.061809047223083e-05, "loss": 32.6016, "step": 2336 }, { "epoch": 0.37501504392827056, "grad_norm": 12.400093078613281, "learning_rate": 7.05947469227716e-05, "loss": 32.5938, "step": 2337 }, { "epoch": 0.3751755124964897, "grad_norm": 12.649468421936035, "learning_rate": 7.057139796584929e-05, "loss": 32.5781, "step": 2338 }, { "epoch": 0.37533598106470895, "grad_norm": 12.293877601623535, "learning_rate": 7.054804360759452e-05, "loss": 32.6328, "step": 2339 }, { "epoch": 0.37549644963292816, "grad_norm": 12.324207305908203, "learning_rate": 7.052468385413934e-05, "loss": 32.5781, "step": 2340 }, { "epoch": 0.3756569182011473, "grad_norm": 12.574324607849121, "learning_rate": 7.050131871161719e-05, "loss": 32.5781, "step": 2341 }, { "epoch": 0.37581738676936655, "grad_norm": 11.951045036315918, "learning_rate": 7.047794818616295e-05, "loss": 32.7969, "step": 2342 }, { "epoch": 0.37597785533758576, "grad_norm": 12.197578430175781, "learning_rate": 7.045457228391293e-05, "loss": 32.6484, "step": 2343 }, { "epoch": 0.3761383239058049, "grad_norm": 12.266343116760254, "learning_rate": 7.043119101100479e-05, "loss": 32.75, "step": 2344 }, { "epoch": 0.37629879247402415, "grad_norm": 12.266961097717285, "learning_rate": 7.040780437357764e-05, "loss": 32.6172, "step": 2345 }, { "epoch": 0.37645926104224336, "grad_norm": 12.137420654296875, "learning_rate": 7.038441237777201e-05, "loss": 32.5938, "step": 2346 }, { "epoch": 0.37661972961046253, "grad_norm": 12.445749282836914, "learning_rate": 7.036101502972982e-05, "loss": 32.5312, "step": 2347 }, { "epoch": 0.37678019817868175, "grad_norm": 12.139123916625977, "learning_rate": 7.033761233559442e-05, "loss": 32.6953, "step": 2348 }, { "epoch": 0.37694066674690097, "grad_norm": 12.108936309814453, "learning_rate": 7.03142043015105e-05, "loss": 33.0391, "step": 2349 }, { "epoch": 0.37710113531512013, "grad_norm": 12.039668083190918, "learning_rate": 7.029079093362422e-05, "loss": 32.8672, "step": 2350 }, { "epoch": 0.37726160388333935, "grad_norm": 11.981073379516602, "learning_rate": 7.026737223808313e-05, "loss": 32.8828, "step": 2351 }, { "epoch": 0.37742207245155857, "grad_norm": 11.967604637145996, "learning_rate": 7.024394822103613e-05, "loss": 32.9609, "step": 2352 }, { "epoch": 0.37758254101977773, "grad_norm": 12.038151741027832, "learning_rate": 7.022051888863355e-05, "loss": 32.9453, "step": 2353 }, { "epoch": 0.37774300958799695, "grad_norm": 12.49194049835205, "learning_rate": 7.019708424702715e-05, "loss": 32.6719, "step": 2354 }, { "epoch": 0.37790347815621617, "grad_norm": 12.284921646118164, "learning_rate": 7.017364430237005e-05, "loss": 32.7266, "step": 2355 }, { "epoch": 0.37806394672443533, "grad_norm": 12.27648639678955, "learning_rate": 7.015019906081671e-05, "loss": 32.6484, "step": 2356 }, { "epoch": 0.37822441529265455, "grad_norm": 12.021838188171387, "learning_rate": 7.012674852852309e-05, "loss": 32.7422, "step": 2357 }, { "epoch": 0.37838488386087377, "grad_norm": 12.2848539352417, "learning_rate": 7.010329271164646e-05, "loss": 32.7422, "step": 2358 }, { "epoch": 0.37854535242909293, "grad_norm": 11.952521324157715, "learning_rate": 7.00798316163455e-05, "loss": 32.7188, "step": 2359 }, { "epoch": 0.37870582099731215, "grad_norm": 12.140868186950684, "learning_rate": 7.005636524878026e-05, "loss": 32.6875, "step": 2360 }, { "epoch": 0.37886628956553137, "grad_norm": 12.427902221679688, "learning_rate": 7.003289361511224e-05, "loss": 32.7188, "step": 2361 }, { "epoch": 0.37902675813375053, "grad_norm": 12.077795028686523, "learning_rate": 7.000941672150423e-05, "loss": 32.6719, "step": 2362 }, { "epoch": 0.37918722670196975, "grad_norm": 12.139556884765625, "learning_rate": 6.998593457412045e-05, "loss": 32.6641, "step": 2363 }, { "epoch": 0.37934769527018897, "grad_norm": 12.01377010345459, "learning_rate": 6.996244717912651e-05, "loss": 32.6484, "step": 2364 }, { "epoch": 0.37950816383840813, "grad_norm": 12.141175270080566, "learning_rate": 6.993895454268938e-05, "loss": 32.7266, "step": 2365 }, { "epoch": 0.37966863240662735, "grad_norm": 12.20991325378418, "learning_rate": 6.991545667097738e-05, "loss": 32.6875, "step": 2366 }, { "epoch": 0.37982910097484657, "grad_norm": 12.20610523223877, "learning_rate": 6.989195357016026e-05, "loss": 32.7266, "step": 2367 }, { "epoch": 0.37998956954306573, "grad_norm": 12.527850151062012, "learning_rate": 6.986844524640912e-05, "loss": 32.6719, "step": 2368 }, { "epoch": 0.38015003811128495, "grad_norm": 12.133920669555664, "learning_rate": 6.98449317058964e-05, "loss": 32.625, "step": 2369 }, { "epoch": 0.38031050667950417, "grad_norm": 12.273444175720215, "learning_rate": 6.982141295479595e-05, "loss": 32.6797, "step": 2370 }, { "epoch": 0.38047097524772333, "grad_norm": 11.98233413696289, "learning_rate": 6.979788899928298e-05, "loss": 32.7422, "step": 2371 }, { "epoch": 0.38063144381594255, "grad_norm": 12.391613006591797, "learning_rate": 6.977435984553408e-05, "loss": 32.7109, "step": 2372 }, { "epoch": 0.38079191238416177, "grad_norm": 12.16110897064209, "learning_rate": 6.975082549972713e-05, "loss": 32.6484, "step": 2373 }, { "epoch": 0.38095238095238093, "grad_norm": 12.20345401763916, "learning_rate": 6.972728596804149e-05, "loss": 32.6719, "step": 2374 }, { "epoch": 0.38111284952060015, "grad_norm": 12.075139999389648, "learning_rate": 6.970374125665779e-05, "loss": 32.5938, "step": 2375 }, { "epoch": 0.38127331808881937, "grad_norm": 12.320767402648926, "learning_rate": 6.968019137175805e-05, "loss": 32.5859, "step": 2376 }, { "epoch": 0.38143378665703853, "grad_norm": 12.32455825805664, "learning_rate": 6.965663631952563e-05, "loss": 32.6172, "step": 2377 }, { "epoch": 0.38159425522525775, "grad_norm": 12.291312217712402, "learning_rate": 6.963307610614533e-05, "loss": 32.6172, "step": 2378 }, { "epoch": 0.38175472379347697, "grad_norm": 12.20787239074707, "learning_rate": 6.96095107378032e-05, "loss": 32.5938, "step": 2379 }, { "epoch": 0.38191519236169613, "grad_norm": 12.140585899353027, "learning_rate": 6.958594022068665e-05, "loss": 32.5781, "step": 2380 }, { "epoch": 0.38207566092991535, "grad_norm": 12.114192962646484, "learning_rate": 6.956236456098455e-05, "loss": 32.7109, "step": 2381 }, { "epoch": 0.38223612949813457, "grad_norm": 12.499593734741211, "learning_rate": 6.9538783764887e-05, "loss": 32.6172, "step": 2382 }, { "epoch": 0.38239659806635373, "grad_norm": 12.01889705657959, "learning_rate": 6.95151978385855e-05, "loss": 32.6875, "step": 2383 }, { "epoch": 0.38255706663457295, "grad_norm": 12.079890251159668, "learning_rate": 6.949160678827288e-05, "loss": 32.6562, "step": 2384 }, { "epoch": 0.38271753520279217, "grad_norm": 12.011130332946777, "learning_rate": 6.946801062014339e-05, "loss": 32.7031, "step": 2385 }, { "epoch": 0.38287800377101133, "grad_norm": 11.946682929992676, "learning_rate": 6.94444093403925e-05, "loss": 32.6406, "step": 2386 }, { "epoch": 0.38303847233923055, "grad_norm": 12.200482368469238, "learning_rate": 6.942080295521708e-05, "loss": 32.625, "step": 2387 }, { "epoch": 0.38319894090744977, "grad_norm": 12.5205659866333, "learning_rate": 6.939719147081539e-05, "loss": 32.5547, "step": 2388 }, { "epoch": 0.38335940947566893, "grad_norm": 12.642642974853516, "learning_rate": 6.937357489338695e-05, "loss": 32.5703, "step": 2389 }, { "epoch": 0.38351987804388815, "grad_norm": 12.25538444519043, "learning_rate": 6.934995322913267e-05, "loss": 32.5547, "step": 2390 }, { "epoch": 0.38368034661210737, "grad_norm": 12.267136573791504, "learning_rate": 6.932632648425476e-05, "loss": 32.6562, "step": 2391 }, { "epoch": 0.38384081518032653, "grad_norm": 11.955707550048828, "learning_rate": 6.930269466495678e-05, "loss": 32.7344, "step": 2392 }, { "epoch": 0.38400128374854575, "grad_norm": 12.076354026794434, "learning_rate": 6.927905777744364e-05, "loss": 32.6562, "step": 2393 }, { "epoch": 0.38416175231676497, "grad_norm": 12.131132125854492, "learning_rate": 6.925541582792155e-05, "loss": 32.6875, "step": 2394 }, { "epoch": 0.38432222088498413, "grad_norm": 12.325695991516113, "learning_rate": 6.923176882259805e-05, "loss": 32.7109, "step": 2395 }, { "epoch": 0.38448268945320335, "grad_norm": 12.527390480041504, "learning_rate": 6.920811676768205e-05, "loss": 32.625, "step": 2396 }, { "epoch": 0.38464315802142257, "grad_norm": 12.513842582702637, "learning_rate": 6.918445966938374e-05, "loss": 32.5781, "step": 2397 }, { "epoch": 0.38480362658964173, "grad_norm": 12.270431518554688, "learning_rate": 6.916079753391465e-05, "loss": 32.625, "step": 2398 }, { "epoch": 0.38496409515786095, "grad_norm": 12.550390243530273, "learning_rate": 6.913713036748762e-05, "loss": 32.6406, "step": 2399 }, { "epoch": 0.38512456372608017, "grad_norm": 12.288671493530273, "learning_rate": 6.911345817631685e-05, "loss": 32.9219, "step": 2400 }, { "epoch": 0.38528503229429933, "grad_norm": 12.230406761169434, "learning_rate": 6.90897809666178e-05, "loss": 33.0391, "step": 2401 }, { "epoch": 0.38544550086251855, "grad_norm": 12.704160690307617, "learning_rate": 6.90660987446073e-05, "loss": 32.7109, "step": 2402 }, { "epoch": 0.38560596943073777, "grad_norm": 12.434389114379883, "learning_rate": 6.904241151650351e-05, "loss": 32.8438, "step": 2403 }, { "epoch": 0.38576643799895693, "grad_norm": 12.42084789276123, "learning_rate": 6.90187192885258e-05, "loss": 32.625, "step": 2404 }, { "epoch": 0.38592690656717615, "grad_norm": 12.140170097351074, "learning_rate": 6.899502206689498e-05, "loss": 32.6641, "step": 2405 }, { "epoch": 0.38608737513539537, "grad_norm": 12.08987808227539, "learning_rate": 6.897131985783308e-05, "loss": 32.6953, "step": 2406 }, { "epoch": 0.38624784370361454, "grad_norm": 12.088188171386719, "learning_rate": 6.89476126675635e-05, "loss": 32.7578, "step": 2407 }, { "epoch": 0.38640831227183375, "grad_norm": 12.572986602783203, "learning_rate": 6.89239005023109e-05, "loss": 32.7344, "step": 2408 }, { "epoch": 0.386568780840053, "grad_norm": 12.094995498657227, "learning_rate": 6.890018336830129e-05, "loss": 32.8594, "step": 2409 }, { "epoch": 0.38672924940827214, "grad_norm": 12.217108726501465, "learning_rate": 6.887646127176195e-05, "loss": 32.75, "step": 2410 }, { "epoch": 0.38688971797649135, "grad_norm": 12.149100303649902, "learning_rate": 6.885273421892147e-05, "loss": 32.7188, "step": 2411 }, { "epoch": 0.3870501865447106, "grad_norm": 12.452677726745605, "learning_rate": 6.882900221600978e-05, "loss": 32.6328, "step": 2412 }, { "epoch": 0.38721065511292974, "grad_norm": 12.197305679321289, "learning_rate": 6.880526526925803e-05, "loss": 32.6484, "step": 2413 }, { "epoch": 0.38737112368114895, "grad_norm": 11.94866943359375, "learning_rate": 6.878152338489877e-05, "loss": 32.6797, "step": 2414 }, { "epoch": 0.3875315922493682, "grad_norm": 12.327415466308594, "learning_rate": 6.875777656916574e-05, "loss": 32.6641, "step": 2415 }, { "epoch": 0.38769206081758734, "grad_norm": 12.168451309204102, "learning_rate": 6.873402482829404e-05, "loss": 32.6172, "step": 2416 }, { "epoch": 0.38785252938580655, "grad_norm": 12.028331756591797, "learning_rate": 6.871026816852006e-05, "loss": 32.6641, "step": 2417 }, { "epoch": 0.3880129979540258, "grad_norm": 12.209275245666504, "learning_rate": 6.868650659608146e-05, "loss": 32.7266, "step": 2418 }, { "epoch": 0.38817346652224494, "grad_norm": 12.22249984741211, "learning_rate": 6.866274011721724e-05, "loss": 32.6406, "step": 2419 }, { "epoch": 0.38833393509046416, "grad_norm": 12.027021408081055, "learning_rate": 6.86389687381676e-05, "loss": 32.6875, "step": 2420 }, { "epoch": 0.3884944036586834, "grad_norm": 12.415223121643066, "learning_rate": 6.86151924651741e-05, "loss": 32.6719, "step": 2421 }, { "epoch": 0.38865487222690254, "grad_norm": 12.233210563659668, "learning_rate": 6.859141130447956e-05, "loss": 32.7344, "step": 2422 }, { "epoch": 0.38881534079512176, "grad_norm": 12.411916732788086, "learning_rate": 6.856762526232807e-05, "loss": 32.6094, "step": 2423 }, { "epoch": 0.388975809363341, "grad_norm": 12.514443397521973, "learning_rate": 6.854383434496502e-05, "loss": 32.5938, "step": 2424 }, { "epoch": 0.38913627793156014, "grad_norm": 12.193929672241211, "learning_rate": 6.852003855863709e-05, "loss": 32.6406, "step": 2425 }, { "epoch": 0.38929674649977936, "grad_norm": 12.008530616760254, "learning_rate": 6.849623790959223e-05, "loss": 32.6406, "step": 2426 }, { "epoch": 0.3894572150679986, "grad_norm": 12.271463394165039, "learning_rate": 6.847243240407963e-05, "loss": 32.5859, "step": 2427 }, { "epoch": 0.38961768363621774, "grad_norm": 12.525593757629395, "learning_rate": 6.844862204834982e-05, "loss": 32.6016, "step": 2428 }, { "epoch": 0.38977815220443696, "grad_norm": 11.946995735168457, "learning_rate": 6.842480684865456e-05, "loss": 32.6562, "step": 2429 }, { "epoch": 0.3899386207726562, "grad_norm": 12.272738456726074, "learning_rate": 6.840098681124688e-05, "loss": 32.6562, "step": 2430 }, { "epoch": 0.39009908934087534, "grad_norm": 12.198683738708496, "learning_rate": 6.837716194238107e-05, "loss": 32.7031, "step": 2431 }, { "epoch": 0.39025955790909456, "grad_norm": 12.132387161254883, "learning_rate": 6.835333224831278e-05, "loss": 32.625, "step": 2432 }, { "epoch": 0.3904200264773138, "grad_norm": 12.069718360900879, "learning_rate": 6.832949773529878e-05, "loss": 32.6328, "step": 2433 }, { "epoch": 0.39058049504553294, "grad_norm": 12.323370933532715, "learning_rate": 6.830565840959725e-05, "loss": 32.6875, "step": 2434 }, { "epoch": 0.39074096361375216, "grad_norm": 12.519057273864746, "learning_rate": 6.828181427746752e-05, "loss": 32.5625, "step": 2435 }, { "epoch": 0.3909014321819714, "grad_norm": 12.01806640625, "learning_rate": 6.825796534517024e-05, "loss": 32.6562, "step": 2436 }, { "epoch": 0.39106190075019054, "grad_norm": 12.02474308013916, "learning_rate": 6.823411161896732e-05, "loss": 32.6719, "step": 2437 }, { "epoch": 0.39122236931840976, "grad_norm": 12.3280611038208, "learning_rate": 6.821025310512192e-05, "loss": 32.6406, "step": 2438 }, { "epoch": 0.391382837886629, "grad_norm": 12.275179862976074, "learning_rate": 6.818638980989843e-05, "loss": 32.6875, "step": 2439 }, { "epoch": 0.39154330645484814, "grad_norm": 12.156569480895996, "learning_rate": 6.816252173956254e-05, "loss": 32.7031, "step": 2440 }, { "epoch": 0.39170377502306736, "grad_norm": 12.099746704101562, "learning_rate": 6.813864890038116e-05, "loss": 32.7812, "step": 2441 }, { "epoch": 0.3918642435912866, "grad_norm": 12.266070365905762, "learning_rate": 6.81147712986225e-05, "loss": 32.6094, "step": 2442 }, { "epoch": 0.39202471215950574, "grad_norm": 12.258678436279297, "learning_rate": 6.809088894055594e-05, "loss": 32.6172, "step": 2443 }, { "epoch": 0.39218518072772496, "grad_norm": 12.263093948364258, "learning_rate": 6.806700183245218e-05, "loss": 32.6094, "step": 2444 }, { "epoch": 0.3923456492959442, "grad_norm": 12.025383949279785, "learning_rate": 6.804310998058314e-05, "loss": 32.7734, "step": 2445 }, { "epoch": 0.39250611786416334, "grad_norm": 12.28370189666748, "learning_rate": 6.8019213391222e-05, "loss": 32.8047, "step": 2446 }, { "epoch": 0.39266658643238256, "grad_norm": 12.082561492919922, "learning_rate": 6.799531207064315e-05, "loss": 32.75, "step": 2447 }, { "epoch": 0.3928270550006018, "grad_norm": 12.45088005065918, "learning_rate": 6.797140602512225e-05, "loss": 32.6484, "step": 2448 }, { "epoch": 0.39298752356882094, "grad_norm": 12.139820098876953, "learning_rate": 6.794749526093619e-05, "loss": 32.8438, "step": 2449 }, { "epoch": 0.39314799213704016, "grad_norm": 12.040200233459473, "learning_rate": 6.792357978436312e-05, "loss": 33.0781, "step": 2450 }, { "epoch": 0.3933084607052594, "grad_norm": 12.583925247192383, "learning_rate": 6.789965960168239e-05, "loss": 33.0156, "step": 2451 }, { "epoch": 0.39346892927347854, "grad_norm": 12.665193557739258, "learning_rate": 6.787573471917463e-05, "loss": 32.8984, "step": 2452 }, { "epoch": 0.39362939784169776, "grad_norm": 12.477397918701172, "learning_rate": 6.785180514312169e-05, "loss": 32.8125, "step": 2453 }, { "epoch": 0.393789866409917, "grad_norm": 12.243133544921875, "learning_rate": 6.78278708798066e-05, "loss": 32.7188, "step": 2454 }, { "epoch": 0.39395033497813614, "grad_norm": 12.483259201049805, "learning_rate": 6.780393193551368e-05, "loss": 32.8594, "step": 2455 }, { "epoch": 0.39411080354635536, "grad_norm": 12.152865409851074, "learning_rate": 6.777998831652849e-05, "loss": 32.8281, "step": 2456 }, { "epoch": 0.3942712721145746, "grad_norm": 12.211280822753906, "learning_rate": 6.775604002913775e-05, "loss": 32.6484, "step": 2457 }, { "epoch": 0.39443174068279374, "grad_norm": 12.289308547973633, "learning_rate": 6.773208707962947e-05, "loss": 33.0391, "step": 2458 }, { "epoch": 0.39459220925101296, "grad_norm": 12.077164649963379, "learning_rate": 6.770812947429286e-05, "loss": 32.75, "step": 2459 }, { "epoch": 0.3947526778192322, "grad_norm": 12.21951675415039, "learning_rate": 6.768416721941835e-05, "loss": 32.7188, "step": 2460 }, { "epoch": 0.39491314638745134, "grad_norm": 12.216842651367188, "learning_rate": 6.766020032129757e-05, "loss": 32.6328, "step": 2461 }, { "epoch": 0.39507361495567056, "grad_norm": 12.034399032592773, "learning_rate": 6.763622878622345e-05, "loss": 32.7812, "step": 2462 }, { "epoch": 0.3952340835238898, "grad_norm": 12.153594017028809, "learning_rate": 6.761225262049003e-05, "loss": 32.6641, "step": 2463 }, { "epoch": 0.39539455209210894, "grad_norm": 12.014570236206055, "learning_rate": 6.758827183039262e-05, "loss": 32.6719, "step": 2464 }, { "epoch": 0.39555502066032816, "grad_norm": 11.962729454040527, "learning_rate": 6.756428642222775e-05, "loss": 32.7188, "step": 2465 }, { "epoch": 0.3957154892285474, "grad_norm": 12.458135604858398, "learning_rate": 6.754029640229315e-05, "loss": 32.6797, "step": 2466 }, { "epoch": 0.39587595779676654, "grad_norm": 12.016695976257324, "learning_rate": 6.751630177688778e-05, "loss": 32.6484, "step": 2467 }, { "epoch": 0.39603642636498576, "grad_norm": 12.084885597229004, "learning_rate": 6.749230255231179e-05, "loss": 32.7109, "step": 2468 }, { "epoch": 0.396196894933205, "grad_norm": 12.024014472961426, "learning_rate": 6.74682987348665e-05, "loss": 32.7344, "step": 2469 }, { "epoch": 0.39635736350142414, "grad_norm": 12.073131561279297, "learning_rate": 6.744429033085452e-05, "loss": 32.6328, "step": 2470 }, { "epoch": 0.39651783206964336, "grad_norm": 12.395962715148926, "learning_rate": 6.74202773465796e-05, "loss": 32.6406, "step": 2471 }, { "epoch": 0.3966783006378626, "grad_norm": 12.406055450439453, "learning_rate": 6.739625978834672e-05, "loss": 32.6875, "step": 2472 }, { "epoch": 0.39683876920608174, "grad_norm": 12.071504592895508, "learning_rate": 6.737223766246206e-05, "loss": 32.6875, "step": 2473 }, { "epoch": 0.39699923777430096, "grad_norm": 12.33737564086914, "learning_rate": 6.734821097523298e-05, "loss": 32.6641, "step": 2474 }, { "epoch": 0.3971597063425202, "grad_norm": 12.21285629272461, "learning_rate": 6.732417973296805e-05, "loss": 32.6094, "step": 2475 }, { "epoch": 0.39732017491073934, "grad_norm": 12.397217750549316, "learning_rate": 6.730014394197705e-05, "loss": 32.5703, "step": 2476 }, { "epoch": 0.39748064347895856, "grad_norm": 12.62290096282959, "learning_rate": 6.727610360857095e-05, "loss": 32.5938, "step": 2477 }, { "epoch": 0.3976411120471778, "grad_norm": 12.088768005371094, "learning_rate": 6.725205873906187e-05, "loss": 32.7266, "step": 2478 }, { "epoch": 0.39780158061539694, "grad_norm": 12.358561515808105, "learning_rate": 6.72280093397632e-05, "loss": 32.6016, "step": 2479 }, { "epoch": 0.39796204918361616, "grad_norm": 12.027924537658691, "learning_rate": 6.720395541698943e-05, "loss": 32.6641, "step": 2480 }, { "epoch": 0.3981225177518354, "grad_norm": 12.129958152770996, "learning_rate": 6.71798969770563e-05, "loss": 32.5781, "step": 2481 }, { "epoch": 0.39828298632005454, "grad_norm": 12.208832740783691, "learning_rate": 6.715583402628073e-05, "loss": 32.6562, "step": 2482 }, { "epoch": 0.39844345488827376, "grad_norm": 12.267292976379395, "learning_rate": 6.71317665709808e-05, "loss": 32.6797, "step": 2483 }, { "epoch": 0.398603923456493, "grad_norm": 12.256439208984375, "learning_rate": 6.710769461747578e-05, "loss": 32.5625, "step": 2484 }, { "epoch": 0.39876439202471214, "grad_norm": 12.212366104125977, "learning_rate": 6.708361817208615e-05, "loss": 32.6094, "step": 2485 }, { "epoch": 0.39892486059293136, "grad_norm": 12.208013534545898, "learning_rate": 6.705953724113352e-05, "loss": 32.6172, "step": 2486 }, { "epoch": 0.3990853291611506, "grad_norm": 12.136621475219727, "learning_rate": 6.703545183094072e-05, "loss": 32.6641, "step": 2487 }, { "epoch": 0.39924579772936974, "grad_norm": 12.269691467285156, "learning_rate": 6.70113619478317e-05, "loss": 32.6016, "step": 2488 }, { "epoch": 0.39940626629758896, "grad_norm": 12.20022201538086, "learning_rate": 6.69872675981317e-05, "loss": 32.5938, "step": 2489 }, { "epoch": 0.3995667348658082, "grad_norm": 12.128239631652832, "learning_rate": 6.696316878816699e-05, "loss": 32.625, "step": 2490 }, { "epoch": 0.39972720343402734, "grad_norm": 12.514575004577637, "learning_rate": 6.693906552426511e-05, "loss": 32.6406, "step": 2491 }, { "epoch": 0.39988767200224656, "grad_norm": 12.266548156738281, "learning_rate": 6.691495781275473e-05, "loss": 32.6328, "step": 2492 }, { "epoch": 0.4000481405704658, "grad_norm": 12.454617500305176, "learning_rate": 6.68908456599657e-05, "loss": 32.5781, "step": 2493 }, { "epoch": 0.40020860913868495, "grad_norm": 12.074214935302734, "learning_rate": 6.686672907222901e-05, "loss": 32.6875, "step": 2494 }, { "epoch": 0.40036907770690416, "grad_norm": 12.242494583129883, "learning_rate": 6.684260805587686e-05, "loss": 32.7656, "step": 2495 }, { "epoch": 0.4005295462751234, "grad_norm": 12.32192611694336, "learning_rate": 6.681848261724258e-05, "loss": 32.6719, "step": 2496 }, { "epoch": 0.40069001484334255, "grad_norm": 12.58973217010498, "learning_rate": 6.679435276266069e-05, "loss": 32.5859, "step": 2497 }, { "epoch": 0.40085048341156176, "grad_norm": 12.451330184936523, "learning_rate": 6.67702184984668e-05, "loss": 32.5625, "step": 2498 }, { "epoch": 0.401010951979781, "grad_norm": 12.354839324951172, "learning_rate": 6.674607983099777e-05, "loss": 32.7109, "step": 2499 }, { "epoch": 0.40117142054800015, "grad_norm": 12.262072563171387, "learning_rate": 6.672193676659156e-05, "loss": 33.0469, "step": 2500 }, { "epoch": 0.40133188911621936, "grad_norm": 12.176898002624512, "learning_rate": 6.66977893115873e-05, "loss": 32.9297, "step": 2501 }, { "epoch": 0.4014923576844386, "grad_norm": 12.63403034210205, "learning_rate": 6.667363747232524e-05, "loss": 32.7188, "step": 2502 }, { "epoch": 0.40165282625265775, "grad_norm": 12.9168062210083, "learning_rate": 6.664948125514688e-05, "loss": 32.7578, "step": 2503 }, { "epoch": 0.40181329482087697, "grad_norm": 12.409080505371094, "learning_rate": 6.662532066639474e-05, "loss": 32.7656, "step": 2504 }, { "epoch": 0.4019737633890962, "grad_norm": 12.155218124389648, "learning_rate": 6.660115571241256e-05, "loss": 32.8359, "step": 2505 }, { "epoch": 0.40213423195731535, "grad_norm": 12.895380973815918, "learning_rate": 6.657698639954524e-05, "loss": 32.6328, "step": 2506 }, { "epoch": 0.40229470052553457, "grad_norm": 12.023021697998047, "learning_rate": 6.655281273413879e-05, "loss": 32.8047, "step": 2507 }, { "epoch": 0.4024551690937538, "grad_norm": 12.01630687713623, "learning_rate": 6.652863472254034e-05, "loss": 32.7891, "step": 2508 }, { "epoch": 0.40261563766197295, "grad_norm": 12.201449394226074, "learning_rate": 6.650445237109825e-05, "loss": 32.7578, "step": 2509 }, { "epoch": 0.40277610623019217, "grad_norm": 12.27462100982666, "learning_rate": 6.648026568616191e-05, "loss": 32.7188, "step": 2510 }, { "epoch": 0.4029365747984114, "grad_norm": 12.015669822692871, "learning_rate": 6.645607467408193e-05, "loss": 32.6328, "step": 2511 }, { "epoch": 0.40309704336663055, "grad_norm": 12.146994590759277, "learning_rate": 6.643187934121002e-05, "loss": 32.6875, "step": 2512 }, { "epoch": 0.40325751193484977, "grad_norm": 12.326753616333008, "learning_rate": 6.640767969389904e-05, "loss": 32.6641, "step": 2513 }, { "epoch": 0.403417980503069, "grad_norm": 12.145975112915039, "learning_rate": 6.638347573850296e-05, "loss": 32.6953, "step": 2514 }, { "epoch": 0.40357844907128815, "grad_norm": 12.57834243774414, "learning_rate": 6.635926748137691e-05, "loss": 32.6875, "step": 2515 }, { "epoch": 0.40373891763950737, "grad_norm": 12.091126441955566, "learning_rate": 6.633505492887711e-05, "loss": 32.6719, "step": 2516 }, { "epoch": 0.4038993862077266, "grad_norm": 12.013092994689941, "learning_rate": 6.631083808736097e-05, "loss": 32.6406, "step": 2517 }, { "epoch": 0.40405985477594575, "grad_norm": 12.192049026489258, "learning_rate": 6.628661696318696e-05, "loss": 32.6875, "step": 2518 }, { "epoch": 0.40422032334416497, "grad_norm": 12.333298683166504, "learning_rate": 6.62623915627147e-05, "loss": 32.6797, "step": 2519 }, { "epoch": 0.4043807919123842, "grad_norm": 12.083244323730469, "learning_rate": 6.623816189230497e-05, "loss": 32.6875, "step": 2520 }, { "epoch": 0.40454126048060335, "grad_norm": 12.522696495056152, "learning_rate": 6.62139279583196e-05, "loss": 32.7656, "step": 2521 }, { "epoch": 0.40470172904882257, "grad_norm": 12.198995590209961, "learning_rate": 6.61896897671216e-05, "loss": 32.6406, "step": 2522 }, { "epoch": 0.4048621976170418, "grad_norm": 12.287930488586426, "learning_rate": 6.616544732507507e-05, "loss": 32.7031, "step": 2523 }, { "epoch": 0.40502266618526095, "grad_norm": 12.0750732421875, "learning_rate": 6.614120063854524e-05, "loss": 32.6172, "step": 2524 }, { "epoch": 0.40518313475348017, "grad_norm": 12.134249687194824, "learning_rate": 6.611694971389839e-05, "loss": 32.6094, "step": 2525 }, { "epoch": 0.4053436033216994, "grad_norm": 12.207923889160156, "learning_rate": 6.609269455750205e-05, "loss": 32.6641, "step": 2526 }, { "epoch": 0.40550407188991855, "grad_norm": 12.14388370513916, "learning_rate": 6.606843517572474e-05, "loss": 32.6172, "step": 2527 }, { "epoch": 0.40566454045813777, "grad_norm": 12.203959465026855, "learning_rate": 6.604417157493611e-05, "loss": 32.6406, "step": 2528 }, { "epoch": 0.405825009026357, "grad_norm": 12.262211799621582, "learning_rate": 6.601990376150696e-05, "loss": 32.5859, "step": 2529 }, { "epoch": 0.40598547759457615, "grad_norm": 12.598244667053223, "learning_rate": 6.599563174180917e-05, "loss": 32.6172, "step": 2530 }, { "epoch": 0.40614594616279537, "grad_norm": 12.509696960449219, "learning_rate": 6.597135552221572e-05, "loss": 32.6016, "step": 2531 }, { "epoch": 0.4063064147310146, "grad_norm": 12.260539054870605, "learning_rate": 6.594707510910073e-05, "loss": 32.5938, "step": 2532 }, { "epoch": 0.40646688329923375, "grad_norm": 12.008384704589844, "learning_rate": 6.592279050883933e-05, "loss": 32.7266, "step": 2533 }, { "epoch": 0.40662735186745297, "grad_norm": 12.318510055541992, "learning_rate": 6.589850172780787e-05, "loss": 32.6016, "step": 2534 }, { "epoch": 0.4067878204356722, "grad_norm": 12.269887924194336, "learning_rate": 6.587420877238369e-05, "loss": 32.6328, "step": 2535 }, { "epoch": 0.40694828900389135, "grad_norm": 12.267616271972656, "learning_rate": 6.584991164894531e-05, "loss": 32.625, "step": 2536 }, { "epoch": 0.40710875757211057, "grad_norm": 12.07096004486084, "learning_rate": 6.582561036387229e-05, "loss": 32.6562, "step": 2537 }, { "epoch": 0.4072692261403298, "grad_norm": 12.281391143798828, "learning_rate": 6.58013049235453e-05, "loss": 32.625, "step": 2538 }, { "epoch": 0.40742969470854895, "grad_norm": 12.13537311553955, "learning_rate": 6.577699533434611e-05, "loss": 32.6094, "step": 2539 }, { "epoch": 0.40759016327676817, "grad_norm": 12.20139217376709, "learning_rate": 6.575268160265758e-05, "loss": 32.6094, "step": 2540 }, { "epoch": 0.4077506318449874, "grad_norm": 12.260272979736328, "learning_rate": 6.572836373486363e-05, "loss": 32.5938, "step": 2541 }, { "epoch": 0.40791110041320655, "grad_norm": 12.454469680786133, "learning_rate": 6.570404173734929e-05, "loss": 32.5859, "step": 2542 }, { "epoch": 0.40807156898142577, "grad_norm": 12.068942070007324, "learning_rate": 6.567971561650067e-05, "loss": 32.6328, "step": 2543 }, { "epoch": 0.408232037549645, "grad_norm": 12.132258415222168, "learning_rate": 6.565538537870498e-05, "loss": 32.6328, "step": 2544 }, { "epoch": 0.40839250611786415, "grad_norm": 12.644466400146484, "learning_rate": 6.563105103035046e-05, "loss": 32.6484, "step": 2545 }, { "epoch": 0.40855297468608337, "grad_norm": 12.336236000061035, "learning_rate": 6.560671257782647e-05, "loss": 32.6875, "step": 2546 }, { "epoch": 0.4087134432543026, "grad_norm": 12.137396812438965, "learning_rate": 6.558237002752346e-05, "loss": 32.6797, "step": 2547 }, { "epoch": 0.40887391182252175, "grad_norm": 12.288535118103027, "learning_rate": 6.555802338583293e-05, "loss": 32.7422, "step": 2548 }, { "epoch": 0.40903438039074097, "grad_norm": 11.955560684204102, "learning_rate": 6.553367265914743e-05, "loss": 32.7969, "step": 2549 }, { "epoch": 0.4091948489589602, "grad_norm": 12.263067245483398, "learning_rate": 6.550931785386066e-05, "loss": 33.0625, "step": 2550 }, { "epoch": 0.40935531752717935, "grad_norm": 12.79189395904541, "learning_rate": 6.548495897636732e-05, "loss": 33.2109, "step": 2551 }, { "epoch": 0.40951578609539857, "grad_norm": 11.967326164245605, "learning_rate": 6.546059603306318e-05, "loss": 32.8047, "step": 2552 }, { "epoch": 0.4096762546636178, "grad_norm": 12.361310958862305, "learning_rate": 6.543622903034515e-05, "loss": 32.7891, "step": 2553 }, { "epoch": 0.40983672323183695, "grad_norm": 12.492563247680664, "learning_rate": 6.54118579746111e-05, "loss": 32.7812, "step": 2554 }, { "epoch": 0.40999719180005617, "grad_norm": 12.09607982635498, "learning_rate": 6.538748287226007e-05, "loss": 32.7109, "step": 2555 }, { "epoch": 0.4101576603682754, "grad_norm": 12.156268119812012, "learning_rate": 6.536310372969209e-05, "loss": 32.7344, "step": 2556 }, { "epoch": 0.41031812893649455, "grad_norm": 12.29469108581543, "learning_rate": 6.533872055330827e-05, "loss": 32.6484, "step": 2557 }, { "epoch": 0.41047859750471377, "grad_norm": 12.099759101867676, "learning_rate": 6.531433334951079e-05, "loss": 32.8047, "step": 2558 }, { "epoch": 0.410639066072933, "grad_norm": 12.209949493408203, "learning_rate": 6.528994212470287e-05, "loss": 32.7188, "step": 2559 }, { "epoch": 0.41079953464115215, "grad_norm": 12.289643287658691, "learning_rate": 6.526554688528881e-05, "loss": 32.7578, "step": 2560 }, { "epoch": 0.41096000320937137, "grad_norm": 12.214194297790527, "learning_rate": 6.524114763767394e-05, "loss": 32.6406, "step": 2561 }, { "epoch": 0.4111204717775906, "grad_norm": 11.889647483825684, "learning_rate": 6.521674438826466e-05, "loss": 32.6562, "step": 2562 }, { "epoch": 0.41128094034580975, "grad_norm": 12.220385551452637, "learning_rate": 6.519233714346841e-05, "loss": 32.6641, "step": 2563 }, { "epoch": 0.41144140891402897, "grad_norm": 12.080402374267578, "learning_rate": 6.516792590969368e-05, "loss": 32.6875, "step": 2564 }, { "epoch": 0.4116018774822482, "grad_norm": 12.212374687194824, "learning_rate": 6.514351069334999e-05, "loss": 32.7578, "step": 2565 }, { "epoch": 0.41176234605046735, "grad_norm": 12.23002815246582, "learning_rate": 6.511909150084798e-05, "loss": 32.7109, "step": 2566 }, { "epoch": 0.4119228146186866, "grad_norm": 12.358318328857422, "learning_rate": 6.509466833859922e-05, "loss": 32.5938, "step": 2567 }, { "epoch": 0.4120832831869058, "grad_norm": 12.264657974243164, "learning_rate": 6.507024121301644e-05, "loss": 32.7109, "step": 2568 }, { "epoch": 0.41224375175512495, "grad_norm": 12.15548038482666, "learning_rate": 6.504581013051329e-05, "loss": 32.6406, "step": 2569 }, { "epoch": 0.4124042203233442, "grad_norm": 11.946799278259277, "learning_rate": 6.502137509750457e-05, "loss": 32.7266, "step": 2570 }, { "epoch": 0.4125646888915634, "grad_norm": 12.613266944885254, "learning_rate": 6.499693612040603e-05, "loss": 32.7891, "step": 2571 }, { "epoch": 0.41272515745978255, "grad_norm": 12.145811080932617, "learning_rate": 6.497249320563452e-05, "loss": 32.6797, "step": 2572 }, { "epoch": 0.4128856260280018, "grad_norm": 12.210052490234375, "learning_rate": 6.49480463596079e-05, "loss": 32.6172, "step": 2573 }, { "epoch": 0.413046094596221, "grad_norm": 12.133501052856445, "learning_rate": 6.492359558874505e-05, "loss": 32.6797, "step": 2574 }, { "epoch": 0.41320656316444015, "grad_norm": 12.445826530456543, "learning_rate": 6.489914089946587e-05, "loss": 32.5781, "step": 2575 }, { "epoch": 0.4133670317326594, "grad_norm": 12.076622009277344, "learning_rate": 6.487468229819136e-05, "loss": 32.6875, "step": 2576 }, { "epoch": 0.4135275003008786, "grad_norm": 11.964786529541016, "learning_rate": 6.485021979134344e-05, "loss": 32.6953, "step": 2577 }, { "epoch": 0.41368796886909776, "grad_norm": 12.207820892333984, "learning_rate": 6.482575338534516e-05, "loss": 32.6172, "step": 2578 }, { "epoch": 0.413848437437317, "grad_norm": 12.008310317993164, "learning_rate": 6.480128308662053e-05, "loss": 32.6172, "step": 2579 }, { "epoch": 0.4140089060055362, "grad_norm": 12.459409713745117, "learning_rate": 6.477680890159458e-05, "loss": 32.5312, "step": 2580 }, { "epoch": 0.41416937457375536, "grad_norm": 12.139142036437988, "learning_rate": 6.47523308366934e-05, "loss": 32.6016, "step": 2581 }, { "epoch": 0.4143298431419746, "grad_norm": 11.951282501220703, "learning_rate": 6.472784889834404e-05, "loss": 32.7812, "step": 2582 }, { "epoch": 0.4144903117101938, "grad_norm": 12.082317352294922, "learning_rate": 6.470336309297467e-05, "loss": 32.6875, "step": 2583 }, { "epoch": 0.41465078027841296, "grad_norm": 12.194815635681152, "learning_rate": 6.467887342701437e-05, "loss": 32.5625, "step": 2584 }, { "epoch": 0.4148112488466322, "grad_norm": 12.268580436706543, "learning_rate": 6.465437990689327e-05, "loss": 32.6328, "step": 2585 }, { "epoch": 0.4149717174148514, "grad_norm": 12.193889617919922, "learning_rate": 6.46298825390425e-05, "loss": 32.5625, "step": 2586 }, { "epoch": 0.41513218598307056, "grad_norm": 12.009765625, "learning_rate": 6.460538132989427e-05, "loss": 32.6094, "step": 2587 }, { "epoch": 0.4152926545512898, "grad_norm": 12.199915885925293, "learning_rate": 6.45808762858817e-05, "loss": 32.6562, "step": 2588 }, { "epoch": 0.415453123119509, "grad_norm": 12.195830345153809, "learning_rate": 6.455636741343894e-05, "loss": 32.5898, "step": 2589 }, { "epoch": 0.41561359168772816, "grad_norm": 12.008638381958008, "learning_rate": 6.453185471900123e-05, "loss": 32.6484, "step": 2590 }, { "epoch": 0.4157740602559474, "grad_norm": 12.260751724243164, "learning_rate": 6.450733820900472e-05, "loss": 32.6094, "step": 2591 }, { "epoch": 0.4159345288241666, "grad_norm": 12.142149925231934, "learning_rate": 6.448281788988657e-05, "loss": 32.5859, "step": 2592 }, { "epoch": 0.41609499739238576, "grad_norm": 12.320630073547363, "learning_rate": 6.4458293768085e-05, "loss": 32.5781, "step": 2593 }, { "epoch": 0.416255465960605, "grad_norm": 12.279218673706055, "learning_rate": 6.443376585003915e-05, "loss": 32.6172, "step": 2594 }, { "epoch": 0.4164159345288242, "grad_norm": 12.259406089782715, "learning_rate": 6.440923414218924e-05, "loss": 32.6172, "step": 2595 }, { "epoch": 0.41657640309704336, "grad_norm": 12.228931427001953, "learning_rate": 6.438469865097639e-05, "loss": 32.8359, "step": 2596 }, { "epoch": 0.4167368716652626, "grad_norm": 12.25940990447998, "learning_rate": 6.436015938284281e-05, "loss": 32.5938, "step": 2597 }, { "epoch": 0.41689734023348174, "grad_norm": 12.46329116821289, "learning_rate": 6.433561634423165e-05, "loss": 32.6875, "step": 2598 }, { "epoch": 0.41705780880170096, "grad_norm": 12.087678909301758, "learning_rate": 6.431106954158702e-05, "loss": 32.7422, "step": 2599 }, { "epoch": 0.4172182773699202, "grad_norm": 12.028881072998047, "learning_rate": 6.428651898135411e-05, "loss": 32.9219, "step": 2600 }, { "epoch": 0.41737874593813934, "grad_norm": 12.131860733032227, "learning_rate": 6.4261964669979e-05, "loss": 33.1484, "step": 2601 }, { "epoch": 0.41753921450635856, "grad_norm": 12.115181922912598, "learning_rate": 6.423740661390882e-05, "loss": 32.8672, "step": 2602 }, { "epoch": 0.4176996830745778, "grad_norm": 12.170747756958008, "learning_rate": 6.421284481959164e-05, "loss": 32.7578, "step": 2603 }, { "epoch": 0.41786015164279694, "grad_norm": 12.09249496459961, "learning_rate": 6.418827929347656e-05, "loss": 32.7656, "step": 2604 }, { "epoch": 0.41802062021101616, "grad_norm": 11.898598670959473, "learning_rate": 6.41637100420136e-05, "loss": 32.8906, "step": 2605 }, { "epoch": 0.4181810887792354, "grad_norm": 12.152441024780273, "learning_rate": 6.413913707165381e-05, "loss": 32.7266, "step": 2606 }, { "epoch": 0.41834155734745454, "grad_norm": 12.094085693359375, "learning_rate": 6.411456038884919e-05, "loss": 32.7109, "step": 2607 }, { "epoch": 0.41850202591567376, "grad_norm": 12.27522087097168, "learning_rate": 6.408998000005274e-05, "loss": 32.6641, "step": 2608 }, { "epoch": 0.418662494483893, "grad_norm": 12.157388687133789, "learning_rate": 6.406539591171836e-05, "loss": 32.7422, "step": 2609 }, { "epoch": 0.41882296305211214, "grad_norm": 12.352498054504395, "learning_rate": 6.404080813030102e-05, "loss": 32.7578, "step": 2610 }, { "epoch": 0.41898343162033136, "grad_norm": 12.214208602905273, "learning_rate": 6.401621666225661e-05, "loss": 32.7969, "step": 2611 }, { "epoch": 0.4191439001885506, "grad_norm": 12.265632629394531, "learning_rate": 6.399162151404199e-05, "loss": 32.6406, "step": 2612 }, { "epoch": 0.41930436875676974, "grad_norm": 12.667216300964355, "learning_rate": 6.396702269211498e-05, "loss": 32.6562, "step": 2613 }, { "epoch": 0.41946483732498896, "grad_norm": 12.075662612915039, "learning_rate": 6.394242020293438e-05, "loss": 32.6328, "step": 2614 }, { "epoch": 0.4196253058932082, "grad_norm": 12.233247756958008, "learning_rate": 6.391781405295997e-05, "loss": 32.6719, "step": 2615 }, { "epoch": 0.41978577446142734, "grad_norm": 12.27579402923584, "learning_rate": 6.389320424865242e-05, "loss": 32.6953, "step": 2616 }, { "epoch": 0.41994624302964656, "grad_norm": 12.39751148223877, "learning_rate": 6.386859079647342e-05, "loss": 32.6172, "step": 2617 }, { "epoch": 0.4201067115978658, "grad_norm": 12.26053524017334, "learning_rate": 6.384397370288565e-05, "loss": 32.6641, "step": 2618 }, { "epoch": 0.42026718016608494, "grad_norm": 12.022659301757812, "learning_rate": 6.381935297435265e-05, "loss": 32.7188, "step": 2619 }, { "epoch": 0.42042764873430416, "grad_norm": 12.394726753234863, "learning_rate": 6.379472861733897e-05, "loss": 32.6328, "step": 2620 }, { "epoch": 0.4205881173025234, "grad_norm": 12.006644248962402, "learning_rate": 6.377010063831015e-05, "loss": 32.6172, "step": 2621 }, { "epoch": 0.42074858587074254, "grad_norm": 12.260340690612793, "learning_rate": 6.37454690437326e-05, "loss": 32.6016, "step": 2622 }, { "epoch": 0.42090905443896176, "grad_norm": 11.959997177124023, "learning_rate": 6.372083384007372e-05, "loss": 32.7266, "step": 2623 }, { "epoch": 0.421069523007181, "grad_norm": 12.137308120727539, "learning_rate": 6.369619503380189e-05, "loss": 32.7188, "step": 2624 }, { "epoch": 0.42122999157540014, "grad_norm": 12.144571304321289, "learning_rate": 6.367155263138635e-05, "loss": 32.6562, "step": 2625 }, { "epoch": 0.42139046014361936, "grad_norm": 12.209083557128906, "learning_rate": 6.364690663929738e-05, "loss": 32.6016, "step": 2626 }, { "epoch": 0.4215509287118386, "grad_norm": 12.269458770751953, "learning_rate": 6.362225706400616e-05, "loss": 32.5859, "step": 2627 }, { "epoch": 0.42171139728005774, "grad_norm": 11.94817066192627, "learning_rate": 6.359760391198477e-05, "loss": 32.625, "step": 2628 }, { "epoch": 0.42187186584827696, "grad_norm": 11.886964797973633, "learning_rate": 6.357294718970627e-05, "loss": 32.6562, "step": 2629 }, { "epoch": 0.4220323344164962, "grad_norm": 12.26916217803955, "learning_rate": 6.354828690364472e-05, "loss": 32.7188, "step": 2630 }, { "epoch": 0.42219280298471534, "grad_norm": 12.207706451416016, "learning_rate": 6.3523623060275e-05, "loss": 32.6562, "step": 2631 }, { "epoch": 0.42235327155293456, "grad_norm": 12.096175193786621, "learning_rate": 6.349895566607298e-05, "loss": 32.6562, "step": 2632 }, { "epoch": 0.4225137401211538, "grad_norm": 12.13098430633545, "learning_rate": 6.347428472751547e-05, "loss": 32.5938, "step": 2633 }, { "epoch": 0.42267420868937294, "grad_norm": 12.139631271362305, "learning_rate": 6.34496102510802e-05, "loss": 32.6875, "step": 2634 }, { "epoch": 0.42283467725759216, "grad_norm": 12.26697063446045, "learning_rate": 6.342493224324581e-05, "loss": 32.7188, "step": 2635 }, { "epoch": 0.4229951458258114, "grad_norm": 12.074134826660156, "learning_rate": 6.34002507104919e-05, "loss": 32.6172, "step": 2636 }, { "epoch": 0.42315561439403054, "grad_norm": 12.452791213989258, "learning_rate": 6.337556565929899e-05, "loss": 32.6328, "step": 2637 }, { "epoch": 0.42331608296224976, "grad_norm": 12.781867027282715, "learning_rate": 6.33508770961485e-05, "loss": 32.5781, "step": 2638 }, { "epoch": 0.423476551530469, "grad_norm": 12.51957893371582, "learning_rate": 6.332618502752277e-05, "loss": 32.5547, "step": 2639 }, { "epoch": 0.42363702009868814, "grad_norm": 12.383210182189941, "learning_rate": 6.330148945990511e-05, "loss": 32.6406, "step": 2640 }, { "epoch": 0.42379748866690736, "grad_norm": 12.322052955627441, "learning_rate": 6.327679039977969e-05, "loss": 32.5938, "step": 2641 }, { "epoch": 0.4239579572351266, "grad_norm": 12.3211669921875, "learning_rate": 6.325208785363166e-05, "loss": 32.5234, "step": 2642 }, { "epoch": 0.42411842580334574, "grad_norm": 12.076340675354004, "learning_rate": 6.3227381827947e-05, "loss": 32.7109, "step": 2643 }, { "epoch": 0.42427889437156496, "grad_norm": 12.139331817626953, "learning_rate": 6.320267232921268e-05, "loss": 32.7031, "step": 2644 }, { "epoch": 0.4244393629397842, "grad_norm": 12.385150909423828, "learning_rate": 6.317795936391655e-05, "loss": 32.5508, "step": 2645 }, { "epoch": 0.42459983150800334, "grad_norm": 12.201382637023926, "learning_rate": 6.315324293854737e-05, "loss": 32.625, "step": 2646 }, { "epoch": 0.42476030007622256, "grad_norm": 12.081537246704102, "learning_rate": 6.312852305959482e-05, "loss": 32.7422, "step": 2647 }, { "epoch": 0.4249207686444418, "grad_norm": 12.209724426269531, "learning_rate": 6.310379973354947e-05, "loss": 32.6172, "step": 2648 }, { "epoch": 0.42508123721266095, "grad_norm": 12.343032836914062, "learning_rate": 6.30790729669028e-05, "loss": 32.6562, "step": 2649 }, { "epoch": 0.42524170578088016, "grad_norm": 11.987664222717285, "learning_rate": 6.305434276614721e-05, "loss": 32.9609, "step": 2650 }, { "epoch": 0.4254021743490994, "grad_norm": 12.168196678161621, "learning_rate": 6.302960913777598e-05, "loss": 33.1797, "step": 2651 }, { "epoch": 0.42556264291731855, "grad_norm": 12.235143661499023, "learning_rate": 6.300487208828331e-05, "loss": 32.8125, "step": 2652 }, { "epoch": 0.42572311148553776, "grad_norm": 11.955587387084961, "learning_rate": 6.298013162416427e-05, "loss": 32.8125, "step": 2653 }, { "epoch": 0.425883580053757, "grad_norm": 12.245844841003418, "learning_rate": 6.295538775191486e-05, "loss": 32.8438, "step": 2654 }, { "epoch": 0.42604404862197615, "grad_norm": 12.144214630126953, "learning_rate": 6.293064047803195e-05, "loss": 32.6875, "step": 2655 }, { "epoch": 0.42620451719019536, "grad_norm": 12.398801803588867, "learning_rate": 6.290588980901332e-05, "loss": 32.6641, "step": 2656 }, { "epoch": 0.4263649857584146, "grad_norm": 12.281562805175781, "learning_rate": 6.288113575135762e-05, "loss": 32.7578, "step": 2657 }, { "epoch": 0.42652545432663375, "grad_norm": 12.149870872497559, "learning_rate": 6.285637831156442e-05, "loss": 32.7578, "step": 2658 }, { "epoch": 0.42668592289485296, "grad_norm": 12.26806640625, "learning_rate": 6.283161749613415e-05, "loss": 32.7656, "step": 2659 }, { "epoch": 0.4268463914630722, "grad_norm": 12.291868209838867, "learning_rate": 6.280685331156814e-05, "loss": 32.6172, "step": 2660 }, { "epoch": 0.42700686003129135, "grad_norm": 12.410451889038086, "learning_rate": 6.278208576436861e-05, "loss": 32.9219, "step": 2661 }, { "epoch": 0.42716732859951057, "grad_norm": 12.19542121887207, "learning_rate": 6.275731486103865e-05, "loss": 32.7422, "step": 2662 }, { "epoch": 0.4273277971677298, "grad_norm": 12.266242980957031, "learning_rate": 6.273254060808222e-05, "loss": 32.7656, "step": 2663 }, { "epoch": 0.42748826573594895, "grad_norm": 12.527993202209473, "learning_rate": 6.27077630120042e-05, "loss": 32.625, "step": 2664 }, { "epoch": 0.42764873430416817, "grad_norm": 12.013361930847168, "learning_rate": 6.268298207931031e-05, "loss": 32.6953, "step": 2665 }, { "epoch": 0.4278092028723874, "grad_norm": 12.225245475769043, "learning_rate": 6.265819781650715e-05, "loss": 32.7266, "step": 2666 }, { "epoch": 0.42796967144060655, "grad_norm": 12.082259178161621, "learning_rate": 6.263341023010223e-05, "loss": 32.625, "step": 2667 }, { "epoch": 0.42813014000882577, "grad_norm": 12.203978538513184, "learning_rate": 6.260861932660391e-05, "loss": 32.6875, "step": 2668 }, { "epoch": 0.428290608577045, "grad_norm": 12.088157653808594, "learning_rate": 6.258382511252139e-05, "loss": 32.6484, "step": 2669 }, { "epoch": 0.42845107714526415, "grad_norm": 12.272374153137207, "learning_rate": 6.255902759436476e-05, "loss": 32.6094, "step": 2670 }, { "epoch": 0.42861154571348337, "grad_norm": 12.198324203491211, "learning_rate": 6.253422677864501e-05, "loss": 32.6953, "step": 2671 }, { "epoch": 0.4287720142817026, "grad_norm": 12.147428512573242, "learning_rate": 6.250942267187398e-05, "loss": 32.6797, "step": 2672 }, { "epoch": 0.42893248284992175, "grad_norm": 12.017227172851562, "learning_rate": 6.248461528056432e-05, "loss": 32.6406, "step": 2673 }, { "epoch": 0.42909295141814097, "grad_norm": 12.22763442993164, "learning_rate": 6.245980461122963e-05, "loss": 32.5703, "step": 2674 }, { "epoch": 0.4292534199863602, "grad_norm": 12.285539627075195, "learning_rate": 6.243499067038431e-05, "loss": 32.5781, "step": 2675 }, { "epoch": 0.42941388855457935, "grad_norm": 12.417498588562012, "learning_rate": 6.241017346454363e-05, "loss": 32.7266, "step": 2676 }, { "epoch": 0.42957435712279857, "grad_norm": 12.150111198425293, "learning_rate": 6.238535300022374e-05, "loss": 32.7031, "step": 2677 }, { "epoch": 0.4297348256910178, "grad_norm": 12.006800651550293, "learning_rate": 6.236052928394163e-05, "loss": 32.625, "step": 2678 }, { "epoch": 0.42989529425923695, "grad_norm": 12.200780868530273, "learning_rate": 6.233570232221513e-05, "loss": 32.6641, "step": 2679 }, { "epoch": 0.43005576282745617, "grad_norm": 12.142677307128906, "learning_rate": 6.231087212156292e-05, "loss": 32.5938, "step": 2680 }, { "epoch": 0.4302162313956754, "grad_norm": 12.206140518188477, "learning_rate": 6.22860386885046e-05, "loss": 32.6094, "step": 2681 }, { "epoch": 0.43037669996389455, "grad_norm": 12.333410263061523, "learning_rate": 6.22612020295605e-05, "loss": 32.5859, "step": 2682 }, { "epoch": 0.43053716853211377, "grad_norm": 12.258347511291504, "learning_rate": 6.223636215125188e-05, "loss": 32.5547, "step": 2683 }, { "epoch": 0.430697637100333, "grad_norm": 12.332045555114746, "learning_rate": 6.221151906010085e-05, "loss": 32.6641, "step": 2684 }, { "epoch": 0.43085810566855215, "grad_norm": 12.257067680358887, "learning_rate": 6.218667276263034e-05, "loss": 32.6797, "step": 2685 }, { "epoch": 0.43101857423677137, "grad_norm": 12.323877334594727, "learning_rate": 6.21618232653641e-05, "loss": 32.5859, "step": 2686 }, { "epoch": 0.4311790428049906, "grad_norm": 12.330878257751465, "learning_rate": 6.213697057482676e-05, "loss": 32.6094, "step": 2687 }, { "epoch": 0.43133951137320975, "grad_norm": 12.26251220703125, "learning_rate": 6.211211469754376e-05, "loss": 32.5352, "step": 2688 }, { "epoch": 0.43149997994142897, "grad_norm": 12.16896915435791, "learning_rate": 6.208725564004138e-05, "loss": 32.7422, "step": 2689 }, { "epoch": 0.4316604485096482, "grad_norm": 12.210183143615723, "learning_rate": 6.206239340884674e-05, "loss": 32.6484, "step": 2690 }, { "epoch": 0.43182091707786735, "grad_norm": 12.261420249938965, "learning_rate": 6.203752801048785e-05, "loss": 32.6328, "step": 2691 }, { "epoch": 0.43198138564608657, "grad_norm": 12.32235050201416, "learning_rate": 6.201265945149343e-05, "loss": 32.6094, "step": 2692 }, { "epoch": 0.4321418542143058, "grad_norm": 12.40017032623291, "learning_rate": 6.198778773839311e-05, "loss": 32.6875, "step": 2693 }, { "epoch": 0.43230232278252495, "grad_norm": 11.944177627563477, "learning_rate": 6.196291287771738e-05, "loss": 32.6562, "step": 2694 }, { "epoch": 0.43246279135074417, "grad_norm": 12.266621589660645, "learning_rate": 6.193803487599745e-05, "loss": 32.6953, "step": 2695 }, { "epoch": 0.4326232599189634, "grad_norm": 12.213273048400879, "learning_rate": 6.191315373976547e-05, "loss": 32.6719, "step": 2696 }, { "epoch": 0.43278372848718255, "grad_norm": 12.277902603149414, "learning_rate": 6.188826947555433e-05, "loss": 32.7266, "step": 2697 }, { "epoch": 0.43294419705540177, "grad_norm": 12.135564804077148, "learning_rate": 6.186338208989777e-05, "loss": 32.7891, "step": 2698 }, { "epoch": 0.433104665623621, "grad_norm": 12.054149627685547, "learning_rate": 6.183849158933037e-05, "loss": 32.9844, "step": 2699 }, { "epoch": 0.43326513419184015, "grad_norm": 12.414159774780273, "learning_rate": 6.181359798038747e-05, "loss": 33.1328, "step": 2700 }, { "epoch": 0.43342560276005937, "grad_norm": 12.054771423339844, "learning_rate": 6.178870126960532e-05, "loss": 32.9609, "step": 2701 }, { "epoch": 0.4335860713282786, "grad_norm": 12.354795455932617, "learning_rate": 6.176380146352089e-05, "loss": 32.8438, "step": 2702 }, { "epoch": 0.43374653989649775, "grad_norm": 12.090492248535156, "learning_rate": 6.173889856867201e-05, "loss": 32.7812, "step": 2703 }, { "epoch": 0.43390700846471697, "grad_norm": 11.93397045135498, "learning_rate": 6.171399259159732e-05, "loss": 32.7422, "step": 2704 }, { "epoch": 0.4340674770329362, "grad_norm": 12.102800369262695, "learning_rate": 6.168908353883625e-05, "loss": 32.7969, "step": 2705 }, { "epoch": 0.43422794560115535, "grad_norm": 12.716232299804688, "learning_rate": 6.166417141692906e-05, "loss": 32.7969, "step": 2706 }, { "epoch": 0.43438841416937457, "grad_norm": 12.153694152832031, "learning_rate": 6.163925623241678e-05, "loss": 32.6875, "step": 2707 }, { "epoch": 0.4345488827375938, "grad_norm": 12.531085968017578, "learning_rate": 6.16143379918413e-05, "loss": 32.625, "step": 2708 }, { "epoch": 0.43470935130581295, "grad_norm": 12.218779563903809, "learning_rate": 6.158941670174529e-05, "loss": 32.7422, "step": 2709 }, { "epoch": 0.43486981987403217, "grad_norm": 12.215499877929688, "learning_rate": 6.156449236867218e-05, "loss": 32.6797, "step": 2710 }, { "epoch": 0.4350302884422514, "grad_norm": 12.272798538208008, "learning_rate": 6.153956499916625e-05, "loss": 32.7031, "step": 2711 }, { "epoch": 0.43519075701047055, "grad_norm": 12.217477798461914, "learning_rate": 6.151463459977255e-05, "loss": 32.6797, "step": 2712 }, { "epoch": 0.43535122557868977, "grad_norm": 12.21098804473877, "learning_rate": 6.148970117703694e-05, "loss": 32.7422, "step": 2713 }, { "epoch": 0.435511694146909, "grad_norm": 12.14852237701416, "learning_rate": 6.146476473750606e-05, "loss": 32.5938, "step": 2714 }, { "epoch": 0.43567216271512815, "grad_norm": 12.269075393676758, "learning_rate": 6.143982528772738e-05, "loss": 32.7344, "step": 2715 }, { "epoch": 0.43583263128334737, "grad_norm": 12.022967338562012, "learning_rate": 6.14148828342491e-05, "loss": 32.6953, "step": 2716 }, { "epoch": 0.4359930998515666, "grad_norm": 12.523646354675293, "learning_rate": 6.138993738362023e-05, "loss": 32.7344, "step": 2717 }, { "epoch": 0.43615356841978575, "grad_norm": 12.014289855957031, "learning_rate": 6.136498894239062e-05, "loss": 32.6953, "step": 2718 }, { "epoch": 0.43631403698800497, "grad_norm": 12.262746810913086, "learning_rate": 6.134003751711082e-05, "loss": 32.5859, "step": 2719 }, { "epoch": 0.4364745055562242, "grad_norm": 12.022299766540527, "learning_rate": 6.131508311433222e-05, "loss": 32.7422, "step": 2720 }, { "epoch": 0.43663497412444335, "grad_norm": 12.26945686340332, "learning_rate": 6.129012574060699e-05, "loss": 32.6797, "step": 2721 }, { "epoch": 0.43679544269266257, "grad_norm": 12.274199485778809, "learning_rate": 6.126516540248806e-05, "loss": 32.6172, "step": 2722 }, { "epoch": 0.4369559112608818, "grad_norm": 12.331488609313965, "learning_rate": 6.124020210652911e-05, "loss": 32.6484, "step": 2723 }, { "epoch": 0.43711637982910095, "grad_norm": 12.27806568145752, "learning_rate": 6.121523585928467e-05, "loss": 32.7539, "step": 2724 }, { "epoch": 0.4372768483973202, "grad_norm": 12.013643264770508, "learning_rate": 6.119026666731002e-05, "loss": 32.6484, "step": 2725 }, { "epoch": 0.4374373169655394, "grad_norm": 12.08949089050293, "learning_rate": 6.116529453716117e-05, "loss": 32.6016, "step": 2726 }, { "epoch": 0.43759778553375855, "grad_norm": 12.214343070983887, "learning_rate": 6.114031947539494e-05, "loss": 32.6094, "step": 2727 }, { "epoch": 0.4377582541019778, "grad_norm": 12.341020584106445, "learning_rate": 6.11153414885689e-05, "loss": 32.6094, "step": 2728 }, { "epoch": 0.437918722670197, "grad_norm": 12.664484977722168, "learning_rate": 6.109036058324142e-05, "loss": 32.5859, "step": 2729 }, { "epoch": 0.43807919123841615, "grad_norm": 12.198543548583984, "learning_rate": 6.10653767659716e-05, "loss": 32.6094, "step": 2730 }, { "epoch": 0.4382396598066354, "grad_norm": 12.517091751098633, "learning_rate": 6.104039004331932e-05, "loss": 32.5234, "step": 2731 }, { "epoch": 0.4384001283748546, "grad_norm": 12.282010078430176, "learning_rate": 6.101540042184525e-05, "loss": 32.6875, "step": 2732 }, { "epoch": 0.43856059694307375, "grad_norm": 12.32210922241211, "learning_rate": 6.0990407908110755e-05, "loss": 32.5391, "step": 2733 }, { "epoch": 0.438721065511293, "grad_norm": 12.074358940124512, "learning_rate": 6.0965412508678024e-05, "loss": 32.6094, "step": 2734 }, { "epoch": 0.4388815340795122, "grad_norm": 12.269257545471191, "learning_rate": 6.0940414230109966e-05, "loss": 32.6016, "step": 2735 }, { "epoch": 0.43904200264773136, "grad_norm": 12.178513526916504, "learning_rate": 6.091541307897026e-05, "loss": 32.75, "step": 2736 }, { "epoch": 0.4392024712159506, "grad_norm": 12.1309814453125, "learning_rate": 6.0890409061823336e-05, "loss": 32.5703, "step": 2737 }, { "epoch": 0.4393629397841698, "grad_norm": 12.20628833770752, "learning_rate": 6.08654021852344e-05, "loss": 32.5938, "step": 2738 }, { "epoch": 0.43952340835238896, "grad_norm": 12.137373924255371, "learning_rate": 6.0840392455769367e-05, "loss": 32.6914, "step": 2739 }, { "epoch": 0.4396838769206082, "grad_norm": 12.580931663513184, "learning_rate": 6.0815379879994905e-05, "loss": 32.5391, "step": 2740 }, { "epoch": 0.4398443454888274, "grad_norm": 12.329150199890137, "learning_rate": 6.079036446447847e-05, "loss": 32.6484, "step": 2741 }, { "epoch": 0.44000481405704656, "grad_norm": 12.139830589294434, "learning_rate": 6.076534621578822e-05, "loss": 32.6641, "step": 2742 }, { "epoch": 0.4401652826252658, "grad_norm": 12.074950218200684, "learning_rate": 6.0740325140493106e-05, "loss": 32.6562, "step": 2743 }, { "epoch": 0.440325751193485, "grad_norm": 12.197844505310059, "learning_rate": 6.071530124516276e-05, "loss": 32.5781, "step": 2744 }, { "epoch": 0.44048621976170416, "grad_norm": 12.26257038116455, "learning_rate": 6.0690274536367594e-05, "loss": 32.7031, "step": 2745 }, { "epoch": 0.4406466883299234, "grad_norm": 12.265152931213379, "learning_rate": 6.0665245020678764e-05, "loss": 32.6562, "step": 2746 }, { "epoch": 0.4408071568981426, "grad_norm": 12.072123527526855, "learning_rate": 6.064021270466811e-05, "loss": 32.6562, "step": 2747 }, { "epoch": 0.44096762546636176, "grad_norm": 12.296649932861328, "learning_rate": 6.061517759490829e-05, "loss": 32.8594, "step": 2748 }, { "epoch": 0.441128094034581, "grad_norm": 12.285767555236816, "learning_rate": 6.059013969797264e-05, "loss": 32.8359, "step": 2749 }, { "epoch": 0.4412885626028002, "grad_norm": 12.60598087310791, "learning_rate": 6.056509902043522e-05, "loss": 32.8438, "step": 2750 }, { "epoch": 0.44144903117101936, "grad_norm": 12.249711990356445, "learning_rate": 6.054005556887087e-05, "loss": 32.918, "step": 2751 }, { "epoch": 0.4416094997392386, "grad_norm": 12.298042297363281, "learning_rate": 6.05150093498551e-05, "loss": 32.8203, "step": 2752 }, { "epoch": 0.4417699683074578, "grad_norm": 12.15419864654541, "learning_rate": 6.0489960369964194e-05, "loss": 32.9141, "step": 2753 }, { "epoch": 0.44193043687567696, "grad_norm": 12.09304428100586, "learning_rate": 6.046490863577512e-05, "loss": 32.7266, "step": 2754 }, { "epoch": 0.4420909054438962, "grad_norm": 12.366775512695312, "learning_rate": 6.043985415386561e-05, "loss": 32.7656, "step": 2755 }, { "epoch": 0.4422513740121154, "grad_norm": 12.265339851379395, "learning_rate": 6.041479693081411e-05, "loss": 32.6719, "step": 2756 }, { "epoch": 0.44241184258033456, "grad_norm": 12.400389671325684, "learning_rate": 6.038973697319973e-05, "loss": 32.6641, "step": 2757 }, { "epoch": 0.4425723111485538, "grad_norm": 12.151317596435547, "learning_rate": 6.0364674287602384e-05, "loss": 32.6875, "step": 2758 }, { "epoch": 0.442732779716773, "grad_norm": 12.081716537475586, "learning_rate": 6.033960888060264e-05, "loss": 32.7812, "step": 2759 }, { "epoch": 0.44289324828499216, "grad_norm": 12.20032024383545, "learning_rate": 6.03145407587818e-05, "loss": 32.6484, "step": 2760 }, { "epoch": 0.4430537168532114, "grad_norm": 12.147274017333984, "learning_rate": 6.028946992872188e-05, "loss": 32.6875, "step": 2761 }, { "epoch": 0.4432141854214306, "grad_norm": 12.141515731811523, "learning_rate": 6.026439639700564e-05, "loss": 32.6328, "step": 2762 }, { "epoch": 0.44337465398964976, "grad_norm": 12.02443790435791, "learning_rate": 6.023932017021646e-05, "loss": 32.875, "step": 2763 }, { "epoch": 0.443535122557869, "grad_norm": 12.278945922851562, "learning_rate": 6.021424125493851e-05, "loss": 32.6719, "step": 2764 }, { "epoch": 0.4436955911260882, "grad_norm": 12.26962661743164, "learning_rate": 6.0189159657756656e-05, "loss": 32.6875, "step": 2765 }, { "epoch": 0.44385605969430736, "grad_norm": 12.271950721740723, "learning_rate": 6.016407538525642e-05, "loss": 32.6328, "step": 2766 }, { "epoch": 0.4440165282625266, "grad_norm": 12.084898948669434, "learning_rate": 6.013898844402408e-05, "loss": 32.6953, "step": 2767 }, { "epoch": 0.4441769968307458, "grad_norm": 12.095361709594727, "learning_rate": 6.01138988406466e-05, "loss": 32.6172, "step": 2768 }, { "epoch": 0.44433746539896496, "grad_norm": 12.14903450012207, "learning_rate": 6.0088806581711596e-05, "loss": 32.6172, "step": 2769 }, { "epoch": 0.4444979339671842, "grad_norm": 11.950077056884766, "learning_rate": 6.006371167380747e-05, "loss": 32.6719, "step": 2770 }, { "epoch": 0.4446584025354034, "grad_norm": 12.023716926574707, "learning_rate": 6.003861412352323e-05, "loss": 32.6016, "step": 2771 }, { "epoch": 0.44481887110362256, "grad_norm": 12.328051567077637, "learning_rate": 6.001351393744864e-05, "loss": 32.5547, "step": 2772 }, { "epoch": 0.4449793396718418, "grad_norm": 12.02194881439209, "learning_rate": 5.9988411122174135e-05, "loss": 32.6484, "step": 2773 }, { "epoch": 0.445139808240061, "grad_norm": 11.955652236938477, "learning_rate": 5.996330568429085e-05, "loss": 32.6328, "step": 2774 }, { "epoch": 0.44530027680828016, "grad_norm": 12.135664939880371, "learning_rate": 5.993819763039057e-05, "loss": 32.6875, "step": 2775 }, { "epoch": 0.4454607453764994, "grad_norm": 12.14427375793457, "learning_rate": 5.99130869670658e-05, "loss": 32.7031, "step": 2776 }, { "epoch": 0.4456212139447186, "grad_norm": 12.202877044677734, "learning_rate": 5.988797370090974e-05, "loss": 32.625, "step": 2777 }, { "epoch": 0.44578168251293776, "grad_norm": 12.328996658325195, "learning_rate": 5.986285783851626e-05, "loss": 32.5781, "step": 2778 }, { "epoch": 0.445942151081157, "grad_norm": 12.582887649536133, "learning_rate": 5.9837739386479895e-05, "loss": 32.6172, "step": 2779 }, { "epoch": 0.4461026196493762, "grad_norm": 11.954724311828613, "learning_rate": 5.98126183513959e-05, "loss": 32.7656, "step": 2780 }, { "epoch": 0.44626308821759536, "grad_norm": 12.267264366149902, "learning_rate": 5.978749473986016e-05, "loss": 32.5938, "step": 2781 }, { "epoch": 0.4464235567858146, "grad_norm": 12.13973617553711, "learning_rate": 5.976236855846926e-05, "loss": 32.6953, "step": 2782 }, { "epoch": 0.4465840253540338, "grad_norm": 12.136712074279785, "learning_rate": 5.9737239813820475e-05, "loss": 32.6328, "step": 2783 }, { "epoch": 0.44674449392225296, "grad_norm": 12.259288787841797, "learning_rate": 5.9712108512511734e-05, "loss": 32.6172, "step": 2784 }, { "epoch": 0.4469049624904722, "grad_norm": 12.133926391601562, "learning_rate": 5.9686974661141635e-05, "loss": 32.6484, "step": 2785 }, { "epoch": 0.4470654310586914, "grad_norm": 12.072129249572754, "learning_rate": 5.9661838266309465e-05, "loss": 32.6406, "step": 2786 }, { "epoch": 0.44722589962691056, "grad_norm": 12.144696235656738, "learning_rate": 5.963669933461513e-05, "loss": 32.6562, "step": 2787 }, { "epoch": 0.4473863681951298, "grad_norm": 12.397523880004883, "learning_rate": 5.9611557872659285e-05, "loss": 32.5859, "step": 2788 }, { "epoch": 0.447546836763349, "grad_norm": 12.32801342010498, "learning_rate": 5.958641388704318e-05, "loss": 32.6797, "step": 2789 }, { "epoch": 0.44770730533156816, "grad_norm": 12.134345054626465, "learning_rate": 5.9561267384368743e-05, "loss": 32.6484, "step": 2790 }, { "epoch": 0.4478677738997874, "grad_norm": 12.469914436340332, "learning_rate": 5.953611837123857e-05, "loss": 32.6172, "step": 2791 }, { "epoch": 0.4480282424680066, "grad_norm": 12.10496711730957, "learning_rate": 5.951096685425594e-05, "loss": 32.8047, "step": 2792 }, { "epoch": 0.44818871103622576, "grad_norm": 12.003597259521484, "learning_rate": 5.9485812840024744e-05, "loss": 32.625, "step": 2793 }, { "epoch": 0.448349179604445, "grad_norm": 11.887685775756836, "learning_rate": 5.946065633514954e-05, "loss": 32.7891, "step": 2794 }, { "epoch": 0.4485096481726642, "grad_norm": 12.465096473693848, "learning_rate": 5.943549734623557e-05, "loss": 32.6094, "step": 2795 }, { "epoch": 0.44867011674088336, "grad_norm": 12.075212478637695, "learning_rate": 5.941033587988871e-05, "loss": 32.7188, "step": 2796 }, { "epoch": 0.4488305853091026, "grad_norm": 12.13528823852539, "learning_rate": 5.9385171942715465e-05, "loss": 32.7344, "step": 2797 }, { "epoch": 0.4489910538773218, "grad_norm": 12.324274063110352, "learning_rate": 5.936000554132303e-05, "loss": 32.5547, "step": 2798 }, { "epoch": 0.44915152244554096, "grad_norm": 12.46161937713623, "learning_rate": 5.93348366823192e-05, "loss": 32.7578, "step": 2799 }, { "epoch": 0.4493119910137602, "grad_norm": 12.18732738494873, "learning_rate": 5.9309665372312475e-05, "loss": 33.0781, "step": 2800 }, { "epoch": 0.4494724595819794, "grad_norm": 11.979188919067383, "learning_rate": 5.928449161791192e-05, "loss": 32.9062, "step": 2801 }, { "epoch": 0.44963292815019856, "grad_norm": 12.157078742980957, "learning_rate": 5.9259315425727316e-05, "loss": 32.7969, "step": 2802 }, { "epoch": 0.4497933967184178, "grad_norm": 12.363584518432617, "learning_rate": 5.9234136802369064e-05, "loss": 32.8828, "step": 2803 }, { "epoch": 0.449953865286637, "grad_norm": 12.162184715270996, "learning_rate": 5.920895575444816e-05, "loss": 32.9141, "step": 2804 }, { "epoch": 0.45011433385485616, "grad_norm": 12.4072847366333, "learning_rate": 5.91837722885763e-05, "loss": 32.7188, "step": 2805 }, { "epoch": 0.4502748024230754, "grad_norm": 12.219629287719727, "learning_rate": 5.915858641136576e-05, "loss": 32.6953, "step": 2806 }, { "epoch": 0.4504352709912946, "grad_norm": 12.345932006835938, "learning_rate": 5.9133398129429494e-05, "loss": 32.6641, "step": 2807 }, { "epoch": 0.45059573955951376, "grad_norm": 12.353378295898438, "learning_rate": 5.910820744938105e-05, "loss": 32.7031, "step": 2808 }, { "epoch": 0.450756208127733, "grad_norm": 12.079246520996094, "learning_rate": 5.908301437783465e-05, "loss": 32.7969, "step": 2809 }, { "epoch": 0.4509166766959522, "grad_norm": 11.94432544708252, "learning_rate": 5.905781892140508e-05, "loss": 32.8438, "step": 2810 }, { "epoch": 0.45107714526417136, "grad_norm": 12.072352409362793, "learning_rate": 5.903262108670782e-05, "loss": 32.6875, "step": 2811 }, { "epoch": 0.4512376138323906, "grad_norm": 12.28055191040039, "learning_rate": 5.900742088035892e-05, "loss": 32.6562, "step": 2812 }, { "epoch": 0.4513980824006098, "grad_norm": 12.20411491394043, "learning_rate": 5.8982218308975103e-05, "loss": 32.6406, "step": 2813 }, { "epoch": 0.45155855096882896, "grad_norm": 12.079483032226562, "learning_rate": 5.895701337917366e-05, "loss": 32.6953, "step": 2814 }, { "epoch": 0.4517190195370482, "grad_norm": 12.26444149017334, "learning_rate": 5.893180609757255e-05, "loss": 32.7266, "step": 2815 }, { "epoch": 0.4518794881052674, "grad_norm": 12.143106460571289, "learning_rate": 5.890659647079032e-05, "loss": 32.6289, "step": 2816 }, { "epoch": 0.45203995667348656, "grad_norm": 12.2095947265625, "learning_rate": 5.8881384505446126e-05, "loss": 32.6328, "step": 2817 }, { "epoch": 0.4522004252417058, "grad_norm": 12.272095680236816, "learning_rate": 5.8856170208159755e-05, "loss": 32.6328, "step": 2818 }, { "epoch": 0.452360893809925, "grad_norm": 12.080020904541016, "learning_rate": 5.883095358555162e-05, "loss": 32.6406, "step": 2819 }, { "epoch": 0.45252136237814417, "grad_norm": 12.456220626831055, "learning_rate": 5.8805734644242714e-05, "loss": 32.5469, "step": 2820 }, { "epoch": 0.4526818309463634, "grad_norm": 12.143473625183105, "learning_rate": 5.878051339085465e-05, "loss": 32.6562, "step": 2821 }, { "epoch": 0.4528422995145826, "grad_norm": 12.213464736938477, "learning_rate": 5.8755289832009666e-05, "loss": 32.6484, "step": 2822 }, { "epoch": 0.45300276808280177, "grad_norm": 12.075161933898926, "learning_rate": 5.873006397433058e-05, "loss": 32.6406, "step": 2823 }, { "epoch": 0.453163236651021, "grad_norm": 12.337160110473633, "learning_rate": 5.870483582444082e-05, "loss": 32.6172, "step": 2824 }, { "epoch": 0.4533237052192402, "grad_norm": 12.581392288208008, "learning_rate": 5.867960538896442e-05, "loss": 32.5938, "step": 2825 }, { "epoch": 0.45348417378745937, "grad_norm": 12.008828163146973, "learning_rate": 5.8654372674526016e-05, "loss": 32.6484, "step": 2826 }, { "epoch": 0.4536446423556786, "grad_norm": 12.216151237487793, "learning_rate": 5.862913768775086e-05, "loss": 32.6016, "step": 2827 }, { "epoch": 0.4538051109238978, "grad_norm": 12.719042778015137, "learning_rate": 5.8603900435264736e-05, "loss": 32.5391, "step": 2828 }, { "epoch": 0.45396557949211697, "grad_norm": 12.521378517150879, "learning_rate": 5.85786609236941e-05, "loss": 32.6562, "step": 2829 }, { "epoch": 0.4541260480603362, "grad_norm": 12.332757949829102, "learning_rate": 5.8553419159665954e-05, "loss": 32.6406, "step": 2830 }, { "epoch": 0.4542865166285554, "grad_norm": 12.02782917022705, "learning_rate": 5.8528175149807905e-05, "loss": 32.7578, "step": 2831 }, { "epoch": 0.45444698519677457, "grad_norm": 12.216097831726074, "learning_rate": 5.850292890074818e-05, "loss": 32.7188, "step": 2832 }, { "epoch": 0.4546074537649938, "grad_norm": 12.199295043945312, "learning_rate": 5.8477680419115544e-05, "loss": 32.6328, "step": 2833 }, { "epoch": 0.454767922333213, "grad_norm": 12.143446922302246, "learning_rate": 5.845242971153935e-05, "loss": 32.6484, "step": 2834 }, { "epoch": 0.45492839090143217, "grad_norm": 12.20250129699707, "learning_rate": 5.842717678464957e-05, "loss": 32.6406, "step": 2835 }, { "epoch": 0.4550888594696514, "grad_norm": 12.070145606994629, "learning_rate": 5.840192164507675e-05, "loss": 32.5859, "step": 2836 }, { "epoch": 0.4552493280378706, "grad_norm": 12.070695877075195, "learning_rate": 5.837666429945201e-05, "loss": 32.6289, "step": 2837 }, { "epoch": 0.45540979660608977, "grad_norm": 12.200575828552246, "learning_rate": 5.8351404754407024e-05, "loss": 32.6016, "step": 2838 }, { "epoch": 0.455570265174309, "grad_norm": 11.895792961120605, "learning_rate": 5.8326143016574105e-05, "loss": 32.8516, "step": 2839 }, { "epoch": 0.4557307337425282, "grad_norm": 12.131301879882812, "learning_rate": 5.8300879092586094e-05, "loss": 32.6328, "step": 2840 }, { "epoch": 0.45589120231074737, "grad_norm": 11.887489318847656, "learning_rate": 5.827561298907639e-05, "loss": 32.7109, "step": 2841 }, { "epoch": 0.4560516708789666, "grad_norm": 12.638008117675781, "learning_rate": 5.825034471267903e-05, "loss": 32.5391, "step": 2842 }, { "epoch": 0.4562121394471858, "grad_norm": 12.261319160461426, "learning_rate": 5.8225074270028555e-05, "loss": 32.6641, "step": 2843 }, { "epoch": 0.45637260801540497, "grad_norm": 12.223725318908691, "learning_rate": 5.819980166776011e-05, "loss": 32.6094, "step": 2844 }, { "epoch": 0.4565330765836242, "grad_norm": 12.147931098937988, "learning_rate": 5.817452691250942e-05, "loss": 32.6562, "step": 2845 }, { "epoch": 0.4566935451518434, "grad_norm": 12.07388687133789, "learning_rate": 5.8149250010912724e-05, "loss": 32.6562, "step": 2846 }, { "epoch": 0.45685401372006257, "grad_norm": 12.534236907958984, "learning_rate": 5.812397096960688e-05, "loss": 32.5859, "step": 2847 }, { "epoch": 0.4570144822882818, "grad_norm": 12.148574829101562, "learning_rate": 5.8098689795229256e-05, "loss": 32.8672, "step": 2848 }, { "epoch": 0.457174950856501, "grad_norm": 12.03702449798584, "learning_rate": 5.807340649441785e-05, "loss": 32.8047, "step": 2849 }, { "epoch": 0.45733541942472017, "grad_norm": 12.74423885345459, "learning_rate": 5.804812107381114e-05, "loss": 33.1094, "step": 2850 }, { "epoch": 0.4574958879929394, "grad_norm": 12.706012725830078, "learning_rate": 5.8022833540048203e-05, "loss": 32.8672, "step": 2851 }, { "epoch": 0.4576563565611586, "grad_norm": 12.175758361816406, "learning_rate": 5.799754389976868e-05, "loss": 32.8672, "step": 2852 }, { "epoch": 0.45781682512937777, "grad_norm": 12.551979064941406, "learning_rate": 5.7972252159612736e-05, "loss": 32.8672, "step": 2853 }, { "epoch": 0.457977293697597, "grad_norm": 12.15969467163086, "learning_rate": 5.794695832622111e-05, "loss": 32.7031, "step": 2854 }, { "epoch": 0.4581377622658162, "grad_norm": 12.353763580322266, "learning_rate": 5.792166240623506e-05, "loss": 32.7109, "step": 2855 }, { "epoch": 0.45829823083403537, "grad_norm": 12.281805038452148, "learning_rate": 5.789636440629644e-05, "loss": 32.8906, "step": 2856 }, { "epoch": 0.4584586994022546, "grad_norm": 12.165481567382812, "learning_rate": 5.787106433304762e-05, "loss": 32.7578, "step": 2857 }, { "epoch": 0.4586191679704738, "grad_norm": 12.034923553466797, "learning_rate": 5.784576219313149e-05, "loss": 32.7266, "step": 2858 }, { "epoch": 0.45877963653869297, "grad_norm": 12.357725143432617, "learning_rate": 5.782045799319155e-05, "loss": 32.6641, "step": 2859 }, { "epoch": 0.4589401051069122, "grad_norm": 12.026298522949219, "learning_rate": 5.779515173987178e-05, "loss": 32.6953, "step": 2860 }, { "epoch": 0.4591005736751314, "grad_norm": 12.143233299255371, "learning_rate": 5.7769843439816716e-05, "loss": 32.7109, "step": 2861 }, { "epoch": 0.45926104224335057, "grad_norm": 12.273409843444824, "learning_rate": 5.774453309967145e-05, "loss": 32.7109, "step": 2862 }, { "epoch": 0.4594215108115698, "grad_norm": 12.089183807373047, "learning_rate": 5.771922072608158e-05, "loss": 32.6328, "step": 2863 }, { "epoch": 0.459581979379789, "grad_norm": 12.017435073852539, "learning_rate": 5.769390632569326e-05, "loss": 32.625, "step": 2864 }, { "epoch": 0.45974244794800817, "grad_norm": 12.233134269714355, "learning_rate": 5.766858990515317e-05, "loss": 32.6484, "step": 2865 }, { "epoch": 0.4599029165162274, "grad_norm": 76.27388763427734, "learning_rate": 5.764327147110853e-05, "loss": 32.7891, "step": 2866 }, { "epoch": 0.4600633850844466, "grad_norm": 12.269061088562012, "learning_rate": 5.761795103020706e-05, "loss": 32.6016, "step": 2867 }, { "epoch": 0.46022385365266577, "grad_norm": 12.517923355102539, "learning_rate": 5.7592628589097054e-05, "loss": 32.5703, "step": 2868 }, { "epoch": 0.460384322220885, "grad_norm": 11.955879211425781, "learning_rate": 5.756730415442727e-05, "loss": 32.7188, "step": 2869 }, { "epoch": 0.4605447907891042, "grad_norm": 12.291924476623535, "learning_rate": 5.754197773284704e-05, "loss": 32.5781, "step": 2870 }, { "epoch": 0.46070525935732337, "grad_norm": 12.266890525817871, "learning_rate": 5.75166493310062e-05, "loss": 32.6172, "step": 2871 }, { "epoch": 0.4608657279255426, "grad_norm": 12.13759708404541, "learning_rate": 5.74913189555551e-05, "loss": 32.6016, "step": 2872 }, { "epoch": 0.4610261964937618, "grad_norm": 12.478960990905762, "learning_rate": 5.7465986613144616e-05, "loss": 32.8438, "step": 2873 }, { "epoch": 0.46118666506198097, "grad_norm": 11.957503318786621, "learning_rate": 5.744065231042616e-05, "loss": 32.75, "step": 2874 }, { "epoch": 0.4613471336302002, "grad_norm": 12.138556480407715, "learning_rate": 5.7415316054051594e-05, "loss": 32.5938, "step": 2875 }, { "epoch": 0.4615076021984194, "grad_norm": 12.392292976379395, "learning_rate": 5.738997785067336e-05, "loss": 32.625, "step": 2876 }, { "epoch": 0.46166807076663857, "grad_norm": 12.01107120513916, "learning_rate": 5.73646377069444e-05, "loss": 32.7734, "step": 2877 }, { "epoch": 0.4618285393348578, "grad_norm": 11.970091819763184, "learning_rate": 5.733929562951812e-05, "loss": 32.6875, "step": 2878 }, { "epoch": 0.461989007903077, "grad_norm": 12.138362884521484, "learning_rate": 5.73139516250485e-05, "loss": 32.6016, "step": 2879 }, { "epoch": 0.4621494764712962, "grad_norm": 12.112935066223145, "learning_rate": 5.728860570018999e-05, "loss": 32.8281, "step": 2880 }, { "epoch": 0.4623099450395154, "grad_norm": 11.964418411254883, "learning_rate": 5.7263257861597516e-05, "loss": 32.6992, "step": 2881 }, { "epoch": 0.4624704136077346, "grad_norm": 12.396100997924805, "learning_rate": 5.7237908115926574e-05, "loss": 32.6055, "step": 2882 }, { "epoch": 0.4626308821759538, "grad_norm": 12.011941909790039, "learning_rate": 5.7212556469833104e-05, "loss": 32.6328, "step": 2883 }, { "epoch": 0.462791350744173, "grad_norm": 12.257014274597168, "learning_rate": 5.718720292997356e-05, "loss": 32.6094, "step": 2884 }, { "epoch": 0.4629518193123922, "grad_norm": 12.21619987487793, "learning_rate": 5.7161847503004906e-05, "loss": 32.6562, "step": 2885 }, { "epoch": 0.4631122878806114, "grad_norm": 12.263697624206543, "learning_rate": 5.7136490195584614e-05, "loss": 32.6797, "step": 2886 }, { "epoch": 0.4632727564488306, "grad_norm": 12.334196090698242, "learning_rate": 5.71111310143706e-05, "loss": 32.5781, "step": 2887 }, { "epoch": 0.4634332250170498, "grad_norm": 12.021784782409668, "learning_rate": 5.708576996602129e-05, "loss": 32.7109, "step": 2888 }, { "epoch": 0.463593693585269, "grad_norm": 12.404345512390137, "learning_rate": 5.706040705719566e-05, "loss": 32.5469, "step": 2889 }, { "epoch": 0.4637541621534882, "grad_norm": 12.20490837097168, "learning_rate": 5.70350422945531e-05, "loss": 32.7305, "step": 2890 }, { "epoch": 0.4639146307217074, "grad_norm": 12.321582794189453, "learning_rate": 5.7009675684753505e-05, "loss": 32.5703, "step": 2891 }, { "epoch": 0.4640750992899266, "grad_norm": 12.138092994689941, "learning_rate": 5.698430723445728e-05, "loss": 32.5938, "step": 2892 }, { "epoch": 0.4642355678581458, "grad_norm": 12.382683753967285, "learning_rate": 5.695893695032528e-05, "loss": 32.5312, "step": 2893 }, { "epoch": 0.464396036426365, "grad_norm": 12.260589599609375, "learning_rate": 5.693356483901887e-05, "loss": 32.5625, "step": 2894 }, { "epoch": 0.4645565049945842, "grad_norm": 12.266215324401855, "learning_rate": 5.6908190907199864e-05, "loss": 32.6484, "step": 2895 }, { "epoch": 0.4647169735628034, "grad_norm": 12.26541805267334, "learning_rate": 5.68828151615306e-05, "loss": 32.6016, "step": 2896 }, { "epoch": 0.4648774421310226, "grad_norm": 12.273659706115723, "learning_rate": 5.685743760867385e-05, "loss": 32.6406, "step": 2897 }, { "epoch": 0.4650379106992418, "grad_norm": 12.324599266052246, "learning_rate": 5.683205825529288e-05, "loss": 32.5938, "step": 2898 }, { "epoch": 0.465198379267461, "grad_norm": 12.289294242858887, "learning_rate": 5.680667710805141e-05, "loss": 32.8984, "step": 2899 }, { "epoch": 0.4653588478356802, "grad_norm": 12.178863525390625, "learning_rate": 5.678129417361366e-05, "loss": 33.0234, "step": 2900 }, { "epoch": 0.4655193164038994, "grad_norm": 12.180769920349121, "learning_rate": 5.675590945864431e-05, "loss": 33.0156, "step": 2901 }, { "epoch": 0.4656797849721186, "grad_norm": 12.099671363830566, "learning_rate": 5.673052296980848e-05, "loss": 33.1133, "step": 2902 }, { "epoch": 0.4658402535403378, "grad_norm": 12.157477378845215, "learning_rate": 5.670513471377178e-05, "loss": 32.7578, "step": 2903 }, { "epoch": 0.466000722108557, "grad_norm": 13.088361740112305, "learning_rate": 5.667974469720032e-05, "loss": 32.8086, "step": 2904 }, { "epoch": 0.4661611906767762, "grad_norm": 12.35209846496582, "learning_rate": 5.665435292676058e-05, "loss": 32.7031, "step": 2905 }, { "epoch": 0.4663216592449954, "grad_norm": 12.143635749816895, "learning_rate": 5.662895940911959e-05, "loss": 32.7188, "step": 2906 }, { "epoch": 0.4664821278132146, "grad_norm": 12.685863494873047, "learning_rate": 5.660356415094479e-05, "loss": 32.7266, "step": 2907 }, { "epoch": 0.4666425963814338, "grad_norm": 12.079643249511719, "learning_rate": 5.657816715890408e-05, "loss": 32.6875, "step": 2908 }, { "epoch": 0.466803064949653, "grad_norm": 12.302250862121582, "learning_rate": 5.6552768439665845e-05, "loss": 32.75, "step": 2909 }, { "epoch": 0.4669635335178722, "grad_norm": 12.082573890686035, "learning_rate": 5.652736799989891e-05, "loss": 32.7031, "step": 2910 }, { "epoch": 0.4671240020860914, "grad_norm": 12.338663101196289, "learning_rate": 5.650196584627252e-05, "loss": 32.7734, "step": 2911 }, { "epoch": 0.4672844706543106, "grad_norm": 12.338050842285156, "learning_rate": 5.6476561985456414e-05, "loss": 32.6406, "step": 2912 }, { "epoch": 0.4674449392225298, "grad_norm": 12.143746376037598, "learning_rate": 5.6451156424120746e-05, "loss": 32.7891, "step": 2913 }, { "epoch": 0.467605407790749, "grad_norm": 12.00725269317627, "learning_rate": 5.6425749168936146e-05, "loss": 32.7812, "step": 2914 }, { "epoch": 0.4677658763589682, "grad_norm": 12.07937240600586, "learning_rate": 5.640034022657367e-05, "loss": 32.6719, "step": 2915 }, { "epoch": 0.4679263449271874, "grad_norm": 12.141589164733887, "learning_rate": 5.6374929603704804e-05, "loss": 32.6094, "step": 2916 }, { "epoch": 0.4680868134954066, "grad_norm": 12.091111183166504, "learning_rate": 5.634951730700153e-05, "loss": 32.6211, "step": 2917 }, { "epoch": 0.4682472820636258, "grad_norm": 12.143571853637695, "learning_rate": 5.6324103343136205e-05, "loss": 32.6094, "step": 2918 }, { "epoch": 0.468407750631845, "grad_norm": 12.078853607177734, "learning_rate": 5.629868771878163e-05, "loss": 32.7188, "step": 2919 }, { "epoch": 0.4685682192000642, "grad_norm": 12.391018867492676, "learning_rate": 5.62732704406111e-05, "loss": 32.6016, "step": 2920 }, { "epoch": 0.4687286877682834, "grad_norm": 12.011299133300781, "learning_rate": 5.624785151529832e-05, "loss": 32.6328, "step": 2921 }, { "epoch": 0.4688891563365026, "grad_norm": 12.131105422973633, "learning_rate": 5.6222430949517355e-05, "loss": 32.6016, "step": 2922 }, { "epoch": 0.4690496249047218, "grad_norm": 12.203667640686035, "learning_rate": 5.619700874994281e-05, "loss": 32.6641, "step": 2923 }, { "epoch": 0.469210093472941, "grad_norm": 12.592941284179688, "learning_rate": 5.6171584923249655e-05, "loss": 32.5469, "step": 2924 }, { "epoch": 0.4693705620411602, "grad_norm": 12.266257286071777, "learning_rate": 5.614615947611329e-05, "loss": 32.7109, "step": 2925 }, { "epoch": 0.4695310306093794, "grad_norm": 12.292795181274414, "learning_rate": 5.6120732415209554e-05, "loss": 32.6172, "step": 2926 }, { "epoch": 0.4696914991775986, "grad_norm": 12.277864456176758, "learning_rate": 5.6095303747214735e-05, "loss": 32.6328, "step": 2927 }, { "epoch": 0.4698519677458178, "grad_norm": 12.329256057739258, "learning_rate": 5.606987347880549e-05, "loss": 32.6094, "step": 2928 }, { "epoch": 0.470012436314037, "grad_norm": 12.396240234375, "learning_rate": 5.6044441616658905e-05, "loss": 32.5703, "step": 2929 }, { "epoch": 0.4701729048822562, "grad_norm": 12.014894485473633, "learning_rate": 5.6019008167452534e-05, "loss": 32.6641, "step": 2930 }, { "epoch": 0.4703333734504754, "grad_norm": 12.444461822509766, "learning_rate": 5.599357313786431e-05, "loss": 32.5078, "step": 2931 }, { "epoch": 0.4704938420186946, "grad_norm": 12.093761444091797, "learning_rate": 5.596813653457256e-05, "loss": 32.7656, "step": 2932 }, { "epoch": 0.4706543105869138, "grad_norm": 12.07478141784668, "learning_rate": 5.594269836425609e-05, "loss": 32.625, "step": 2933 }, { "epoch": 0.470814779155133, "grad_norm": 12.199615478515625, "learning_rate": 5.591725863359404e-05, "loss": 32.6328, "step": 2934 }, { "epoch": 0.4709752477233522, "grad_norm": 12.194808959960938, "learning_rate": 5.5891817349266005e-05, "loss": 32.6328, "step": 2935 }, { "epoch": 0.4711357162915714, "grad_norm": 12.317960739135742, "learning_rate": 5.5866374517952e-05, "loss": 32.5391, "step": 2936 }, { "epoch": 0.4712961848597906, "grad_norm": 12.335968971252441, "learning_rate": 5.5840930146332414e-05, "loss": 32.7578, "step": 2937 }, { "epoch": 0.4714566534280098, "grad_norm": 12.453960418701172, "learning_rate": 5.581548424108806e-05, "loss": 32.5469, "step": 2938 }, { "epoch": 0.471617121996229, "grad_norm": 12.349529266357422, "learning_rate": 5.5790036808900125e-05, "loss": 32.6094, "step": 2939 }, { "epoch": 0.4717775905644482, "grad_norm": 12.64113998413086, "learning_rate": 5.5764587856450235e-05, "loss": 32.6797, "step": 2940 }, { "epoch": 0.4719380591326674, "grad_norm": 12.386606216430664, "learning_rate": 5.5739137390420393e-05, "loss": 32.7188, "step": 2941 }, { "epoch": 0.4720985277008866, "grad_norm": 12.446318626403809, "learning_rate": 5.5713685417493e-05, "loss": 32.5859, "step": 2942 }, { "epoch": 0.4722589962691058, "grad_norm": 12.138110160827637, "learning_rate": 5.568823194435088e-05, "loss": 32.6406, "step": 2943 }, { "epoch": 0.472419464837325, "grad_norm": 12.411866188049316, "learning_rate": 5.56627769776772e-05, "loss": 32.6406, "step": 2944 }, { "epoch": 0.47257993340554416, "grad_norm": 12.144911766052246, "learning_rate": 5.5637320524155566e-05, "loss": 32.7109, "step": 2945 }, { "epoch": 0.4727404019737634, "grad_norm": 12.266763687133789, "learning_rate": 5.561186259046995e-05, "loss": 32.7656, "step": 2946 }, { "epoch": 0.4729008705419826, "grad_norm": 12.343271255493164, "learning_rate": 5.5586403183304715e-05, "loss": 32.7109, "step": 2947 }, { "epoch": 0.47306133911020176, "grad_norm": 12.194454193115234, "learning_rate": 5.556094230934462e-05, "loss": 32.6172, "step": 2948 }, { "epoch": 0.473221807678421, "grad_norm": 12.142669677734375, "learning_rate": 5.553547997527479e-05, "loss": 32.7422, "step": 2949 }, { "epoch": 0.4733822762466402, "grad_norm": 12.221343040466309, "learning_rate": 5.551001618778078e-05, "loss": 33.1484, "step": 2950 }, { "epoch": 0.47354274481485936, "grad_norm": 12.16130256652832, "learning_rate": 5.548455095354846e-05, "loss": 32.7969, "step": 2951 }, { "epoch": 0.4737032133830786, "grad_norm": 12.277838706970215, "learning_rate": 5.5459084279264115e-05, "loss": 33.0312, "step": 2952 }, { "epoch": 0.4738636819512978, "grad_norm": 12.163341522216797, "learning_rate": 5.543361617161443e-05, "loss": 32.9062, "step": 2953 }, { "epoch": 0.47402415051951696, "grad_norm": 12.2735595703125, "learning_rate": 5.540814663728643e-05, "loss": 32.7891, "step": 2954 }, { "epoch": 0.4741846190877362, "grad_norm": 12.035889625549316, "learning_rate": 5.538267568296752e-05, "loss": 32.7188, "step": 2955 }, { "epoch": 0.4743450876559554, "grad_norm": 12.665221214294434, "learning_rate": 5.535720331534549e-05, "loss": 32.6562, "step": 2956 }, { "epoch": 0.47450555622417456, "grad_norm": 12.085351943969727, "learning_rate": 5.533172954110851e-05, "loss": 32.6406, "step": 2957 }, { "epoch": 0.4746660247923938, "grad_norm": 12.683778762817383, "learning_rate": 5.5306254366945085e-05, "loss": 32.7148, "step": 2958 }, { "epoch": 0.474826493360613, "grad_norm": 12.269927024841309, "learning_rate": 5.5280777799544104e-05, "loss": 32.7188, "step": 2959 }, { "epoch": 0.47498696192883216, "grad_norm": 12.133574485778809, "learning_rate": 5.525529984559485e-05, "loss": 32.6367, "step": 2960 }, { "epoch": 0.4751474304970514, "grad_norm": 12.13895034790039, "learning_rate": 5.522982051178694e-05, "loss": 32.8672, "step": 2961 }, { "epoch": 0.4753078990652706, "grad_norm": 12.145508766174316, "learning_rate": 5.520433980481037e-05, "loss": 32.7344, "step": 2962 }, { "epoch": 0.47546836763348976, "grad_norm": 12.142200469970703, "learning_rate": 5.517885773135545e-05, "loss": 32.8008, "step": 2963 }, { "epoch": 0.475628836201709, "grad_norm": 11.889168739318848, "learning_rate": 5.5153374298112916e-05, "loss": 32.9062, "step": 2964 }, { "epoch": 0.4757893047699282, "grad_norm": 12.290804862976074, "learning_rate": 5.512788951177381e-05, "loss": 32.6719, "step": 2965 }, { "epoch": 0.47594977333814736, "grad_norm": 12.26119613647461, "learning_rate": 5.5102403379029556e-05, "loss": 32.6484, "step": 2966 }, { "epoch": 0.4761102419063666, "grad_norm": 12.392732620239258, "learning_rate": 5.5076915906571925e-05, "loss": 32.7812, "step": 2967 }, { "epoch": 0.4762707104745858, "grad_norm": 12.07211971282959, "learning_rate": 5.505142710109306e-05, "loss": 32.6719, "step": 2968 }, { "epoch": 0.47643117904280496, "grad_norm": 12.335126876831055, "learning_rate": 5.50259369692854e-05, "loss": 32.6172, "step": 2969 }, { "epoch": 0.4765916476110242, "grad_norm": 12.082139015197754, "learning_rate": 5.5000445517841784e-05, "loss": 32.625, "step": 2970 }, { "epoch": 0.4767521161792434, "grad_norm": 12.332731246948242, "learning_rate": 5.497495275345538e-05, "loss": 32.6406, "step": 2971 }, { "epoch": 0.47691258474746256, "grad_norm": 12.07453441619873, "learning_rate": 5.494945868281969e-05, "loss": 32.6953, "step": 2972 }, { "epoch": 0.4770730533156818, "grad_norm": 12.204142570495605, "learning_rate": 5.492396331262857e-05, "loss": 32.5938, "step": 2973 }, { "epoch": 0.477233521883901, "grad_norm": 12.072687149047852, "learning_rate": 5.489846664957623e-05, "loss": 32.5898, "step": 2974 }, { "epoch": 0.47739399045212016, "grad_norm": 12.207780838012695, "learning_rate": 5.48729687003572e-05, "loss": 32.6172, "step": 2975 }, { "epoch": 0.4775544590203394, "grad_norm": 12.261139869689941, "learning_rate": 5.484746947166633e-05, "loss": 32.582, "step": 2976 }, { "epoch": 0.4777149275885586, "grad_norm": 12.079598426818848, "learning_rate": 5.482196897019885e-05, "loss": 32.6797, "step": 2977 }, { "epoch": 0.47787539615677777, "grad_norm": 12.137077331542969, "learning_rate": 5.4796467202650314e-05, "loss": 32.625, "step": 2978 }, { "epoch": 0.478035864724997, "grad_norm": 12.26504898071289, "learning_rate": 5.477096417571659e-05, "loss": 32.6406, "step": 2979 }, { "epoch": 0.4781963332932162, "grad_norm": 12.145750045776367, "learning_rate": 5.474545989609386e-05, "loss": 32.6641, "step": 2980 }, { "epoch": 0.47835680186143537, "grad_norm": 12.220768928527832, "learning_rate": 5.47199543704787e-05, "loss": 32.6523, "step": 2981 }, { "epoch": 0.4785172704296546, "grad_norm": 12.143381118774414, "learning_rate": 5.469444760556794e-05, "loss": 32.625, "step": 2982 }, { "epoch": 0.4786777389978738, "grad_norm": 12.142395973205566, "learning_rate": 5.4668939608058764e-05, "loss": 32.6797, "step": 2983 }, { "epoch": 0.47883820756609297, "grad_norm": 12.482154846191406, "learning_rate": 5.464343038464872e-05, "loss": 32.6406, "step": 2984 }, { "epoch": 0.4789986761343122, "grad_norm": 11.883025169372559, "learning_rate": 5.4617919942035624e-05, "loss": 32.6953, "step": 2985 }, { "epoch": 0.4791591447025314, "grad_norm": 12.202802658081055, "learning_rate": 5.459240828691761e-05, "loss": 32.6172, "step": 2986 }, { "epoch": 0.47931961327075057, "grad_norm": 12.44970417022705, "learning_rate": 5.456689542599317e-05, "loss": 32.6328, "step": 2987 }, { "epoch": 0.4794800818389698, "grad_norm": 12.136819839477539, "learning_rate": 5.45413813659611e-05, "loss": 32.6562, "step": 2988 }, { "epoch": 0.479640550407189, "grad_norm": 12.13233470916748, "learning_rate": 5.4515866113520465e-05, "loss": 32.6016, "step": 2989 }, { "epoch": 0.47980101897540817, "grad_norm": 12.274288177490234, "learning_rate": 5.449034967537072e-05, "loss": 32.6953, "step": 2990 }, { "epoch": 0.4799614875436274, "grad_norm": 12.52149486541748, "learning_rate": 5.446483205821159e-05, "loss": 32.6406, "step": 2991 }, { "epoch": 0.4801219561118466, "grad_norm": 12.269797325134277, "learning_rate": 5.44393132687431e-05, "loss": 32.6719, "step": 2992 }, { "epoch": 0.48028242468006577, "grad_norm": 12.005627632141113, "learning_rate": 5.4413793313665596e-05, "loss": 32.6797, "step": 2993 }, { "epoch": 0.480442893248285, "grad_norm": 12.212211608886719, "learning_rate": 5.4388272199679736e-05, "loss": 32.6172, "step": 2994 }, { "epoch": 0.4806033618165042, "grad_norm": 12.205368995666504, "learning_rate": 5.436274993348647e-05, "loss": 32.6641, "step": 2995 }, { "epoch": 0.48076383038472337, "grad_norm": 12.32695198059082, "learning_rate": 5.433722652178703e-05, "loss": 32.5547, "step": 2996 }, { "epoch": 0.4809242989529426, "grad_norm": 12.012035369873047, "learning_rate": 5.431170197128304e-05, "loss": 32.6562, "step": 2997 }, { "epoch": 0.4810847675211618, "grad_norm": 12.286272048950195, "learning_rate": 5.4286176288676306e-05, "loss": 32.7695, "step": 2998 }, { "epoch": 0.48124523608938097, "grad_norm": 12.411584854125977, "learning_rate": 5.426064948066898e-05, "loss": 32.875, "step": 2999 }, { "epoch": 0.4814057046576002, "grad_norm": 12.017148971557617, "learning_rate": 5.423512155396354e-05, "loss": 32.8281, "step": 3000 }, { "epoch": 0.4815661732258194, "grad_norm": 12.05942440032959, "learning_rate": 5.420959251526272e-05, "loss": 33.0703, "step": 3001 }, { "epoch": 0.48172664179403857, "grad_norm": 12.290811538696289, "learning_rate": 5.418406237126956e-05, "loss": 32.8125, "step": 3002 }, { "epoch": 0.4818871103622578, "grad_norm": 12.219350814819336, "learning_rate": 5.415853112868736e-05, "loss": 32.8359, "step": 3003 }, { "epoch": 0.482047578930477, "grad_norm": 12.084529876708984, "learning_rate": 5.413299879421979e-05, "loss": 32.7656, "step": 3004 }, { "epoch": 0.48220804749869617, "grad_norm": 12.301545143127441, "learning_rate": 5.4107465374570696e-05, "loss": 32.7891, "step": 3005 }, { "epoch": 0.4823685160669154, "grad_norm": 12.289926528930664, "learning_rate": 5.4081930876444286e-05, "loss": 32.8359, "step": 3006 }, { "epoch": 0.4825289846351346, "grad_norm": 12.155714988708496, "learning_rate": 5.405639530654504e-05, "loss": 32.8203, "step": 3007 }, { "epoch": 0.48268945320335377, "grad_norm": 12.019086837768555, "learning_rate": 5.403085867157769e-05, "loss": 32.75, "step": 3008 }, { "epoch": 0.482849921771573, "grad_norm": 12.222868919372559, "learning_rate": 5.4005320978247286e-05, "loss": 32.6875, "step": 3009 }, { "epoch": 0.4830103903397922, "grad_norm": 11.955860137939453, "learning_rate": 5.397978223325912e-05, "loss": 32.7188, "step": 3010 }, { "epoch": 0.48317085890801137, "grad_norm": 12.291885375976562, "learning_rate": 5.395424244331878e-05, "loss": 32.6953, "step": 3011 }, { "epoch": 0.4833313274762306, "grad_norm": 12.160765647888184, "learning_rate": 5.392870161513214e-05, "loss": 32.7188, "step": 3012 }, { "epoch": 0.4834917960444498, "grad_norm": 12.230212211608887, "learning_rate": 5.39031597554053e-05, "loss": 32.6992, "step": 3013 }, { "epoch": 0.48365226461266897, "grad_norm": 12.014546394348145, "learning_rate": 5.3877616870844695e-05, "loss": 32.7031, "step": 3014 }, { "epoch": 0.4838127331808882, "grad_norm": 11.951945304870605, "learning_rate": 5.385207296815699e-05, "loss": 32.6367, "step": 3015 }, { "epoch": 0.4839732017491074, "grad_norm": 12.351214408874512, "learning_rate": 5.3826528054049105e-05, "loss": 32.6641, "step": 3016 }, { "epoch": 0.48413367031732657, "grad_norm": 12.278130531311035, "learning_rate": 5.3800982135228274e-05, "loss": 32.6953, "step": 3017 }, { "epoch": 0.4842941388855458, "grad_norm": 11.950722694396973, "learning_rate": 5.377543521840194e-05, "loss": 32.6797, "step": 3018 }, { "epoch": 0.484454607453765, "grad_norm": 12.204793930053711, "learning_rate": 5.3749887310277835e-05, "loss": 32.6797, "step": 3019 }, { "epoch": 0.48461507602198417, "grad_norm": 12.271443367004395, "learning_rate": 5.372433841756396e-05, "loss": 32.6836, "step": 3020 }, { "epoch": 0.4847755445902034, "grad_norm": 12.228618621826172, "learning_rate": 5.369878854696857e-05, "loss": 32.6914, "step": 3021 }, { "epoch": 0.4849360131584226, "grad_norm": 12.26641845703125, "learning_rate": 5.367323770520016e-05, "loss": 32.5859, "step": 3022 }, { "epoch": 0.48509648172664177, "grad_norm": 12.339254379272461, "learning_rate": 5.3647685898967495e-05, "loss": 32.6445, "step": 3023 }, { "epoch": 0.485256950294861, "grad_norm": 12.006611824035645, "learning_rate": 5.362213313497958e-05, "loss": 32.6289, "step": 3024 }, { "epoch": 0.4854174188630802, "grad_norm": 11.886746406555176, "learning_rate": 5.359657941994569e-05, "loss": 32.7422, "step": 3025 }, { "epoch": 0.48557788743129937, "grad_norm": 12.080343246459961, "learning_rate": 5.357102476057534e-05, "loss": 32.6016, "step": 3026 }, { "epoch": 0.4857383559995186, "grad_norm": 12.268681526184082, "learning_rate": 5.35454691635783e-05, "loss": 32.5938, "step": 3027 }, { "epoch": 0.4858988245677378, "grad_norm": 12.008288383483887, "learning_rate": 5.351991263566456e-05, "loss": 32.6914, "step": 3028 }, { "epoch": 0.48605929313595697, "grad_norm": 12.133630752563477, "learning_rate": 5.349435518354439e-05, "loss": 32.6406, "step": 3029 }, { "epoch": 0.4862197617041762, "grad_norm": 12.209254264831543, "learning_rate": 5.3468796813928265e-05, "loss": 32.6562, "step": 3030 }, { "epoch": 0.4863802302723954, "grad_norm": 12.014397621154785, "learning_rate": 5.3443237533526956e-05, "loss": 32.6641, "step": 3031 }, { "epoch": 0.48654069884061457, "grad_norm": 12.069313049316406, "learning_rate": 5.341767734905142e-05, "loss": 32.6406, "step": 3032 }, { "epoch": 0.4867011674088338, "grad_norm": 12.202764511108398, "learning_rate": 5.339211626721288e-05, "loss": 32.6172, "step": 3033 }, { "epoch": 0.486861635977053, "grad_norm": 12.074394226074219, "learning_rate": 5.336655429472277e-05, "loss": 32.5938, "step": 3034 }, { "epoch": 0.48702210454527217, "grad_norm": 12.085576057434082, "learning_rate": 5.3340991438292786e-05, "loss": 32.6172, "step": 3035 }, { "epoch": 0.4871825731134914, "grad_norm": 12.263340950012207, "learning_rate": 5.331542770463483e-05, "loss": 32.5859, "step": 3036 }, { "epoch": 0.4873430416817106, "grad_norm": 12.080513954162598, "learning_rate": 5.328986310046108e-05, "loss": 32.6484, "step": 3037 }, { "epoch": 0.4875035102499298, "grad_norm": 12.150522232055664, "learning_rate": 5.3264297632483875e-05, "loss": 32.7109, "step": 3038 }, { "epoch": 0.487663978818149, "grad_norm": 12.072787284851074, "learning_rate": 5.3238731307415836e-05, "loss": 32.6797, "step": 3039 }, { "epoch": 0.4878244473863682, "grad_norm": 12.466885566711426, "learning_rate": 5.3213164131969775e-05, "loss": 32.5703, "step": 3040 }, { "epoch": 0.4879849159545874, "grad_norm": 12.397015571594238, "learning_rate": 5.318759611285876e-05, "loss": 32.5938, "step": 3041 }, { "epoch": 0.4881453845228066, "grad_norm": 12.198786735534668, "learning_rate": 5.316202725679604e-05, "loss": 32.6719, "step": 3042 }, { "epoch": 0.4883058530910258, "grad_norm": 12.515266418457031, "learning_rate": 5.3136457570495126e-05, "loss": 32.5859, "step": 3043 }, { "epoch": 0.488466321659245, "grad_norm": 11.948287010192871, "learning_rate": 5.3110887060669715e-05, "loss": 32.7656, "step": 3044 }, { "epoch": 0.4886267902274642, "grad_norm": 12.129264831542969, "learning_rate": 5.308531573403375e-05, "loss": 32.6094, "step": 3045 }, { "epoch": 0.4887872587956834, "grad_norm": 12.746966361999512, "learning_rate": 5.3059743597301336e-05, "loss": 32.6797, "step": 3046 }, { "epoch": 0.4889477273639026, "grad_norm": 12.021860122680664, "learning_rate": 5.3034170657186856e-05, "loss": 32.7422, "step": 3047 }, { "epoch": 0.4891081959321218, "grad_norm": 12.07812213897705, "learning_rate": 5.300859692040486e-05, "loss": 32.7656, "step": 3048 }, { "epoch": 0.489268664500341, "grad_norm": 12.962091445922852, "learning_rate": 5.298302239367012e-05, "loss": 32.8984, "step": 3049 }, { "epoch": 0.4894291330685602, "grad_norm": 13.033432960510254, "learning_rate": 5.295744708369762e-05, "loss": 33.0078, "step": 3050 }, { "epoch": 0.4895896016367794, "grad_norm": 12.287068367004395, "learning_rate": 5.2931870997202556e-05, "loss": 32.8047, "step": 3051 }, { "epoch": 0.4897500702049986, "grad_norm": 12.539616584777832, "learning_rate": 5.29062941409003e-05, "loss": 32.8281, "step": 3052 }, { "epoch": 0.4899105387732178, "grad_norm": 12.091720581054688, "learning_rate": 5.2880716521506436e-05, "loss": 32.7656, "step": 3053 }, { "epoch": 0.490071007341437, "grad_norm": 12.16191291809082, "learning_rate": 5.285513814573678e-05, "loss": 32.7969, "step": 3054 }, { "epoch": 0.4902314759096562, "grad_norm": 12.524524688720703, "learning_rate": 5.282955902030732e-05, "loss": 32.6992, "step": 3055 }, { "epoch": 0.4903919444778754, "grad_norm": 12.089607238769531, "learning_rate": 5.2803979151934225e-05, "loss": 32.7734, "step": 3056 }, { "epoch": 0.4905524130460946, "grad_norm": 12.21644115447998, "learning_rate": 5.2778398547333884e-05, "loss": 32.7344, "step": 3057 }, { "epoch": 0.4907128816143138, "grad_norm": 12.157405853271484, "learning_rate": 5.275281721322288e-05, "loss": 32.6719, "step": 3058 }, { "epoch": 0.490873350182533, "grad_norm": 12.392108917236328, "learning_rate": 5.272723515631798e-05, "loss": 32.6562, "step": 3059 }, { "epoch": 0.4910338187507522, "grad_norm": 12.27414321899414, "learning_rate": 5.2701652383336117e-05, "loss": 32.6641, "step": 3060 }, { "epoch": 0.4911942873189714, "grad_norm": 12.214574813842773, "learning_rate": 5.2676068900994477e-05, "loss": 32.6953, "step": 3061 }, { "epoch": 0.4913547558871906, "grad_norm": 12.281960487365723, "learning_rate": 5.2650484716010364e-05, "loss": 32.668, "step": 3062 }, { "epoch": 0.4915152244554098, "grad_norm": 12.0757474899292, "learning_rate": 5.262489983510129e-05, "loss": 32.6875, "step": 3063 }, { "epoch": 0.491675693023629, "grad_norm": 12.209561347961426, "learning_rate": 5.2599314264984975e-05, "loss": 32.6484, "step": 3064 }, { "epoch": 0.4918361615918482, "grad_norm": 12.148008346557617, "learning_rate": 5.257372801237929e-05, "loss": 32.668, "step": 3065 }, { "epoch": 0.4919966301600674, "grad_norm": 12.201635360717773, "learning_rate": 5.2548141084002275e-05, "loss": 32.6172, "step": 3066 }, { "epoch": 0.4921570987282866, "grad_norm": 11.947481155395508, "learning_rate": 5.252255348657219e-05, "loss": 32.6992, "step": 3067 }, { "epoch": 0.4923175672965058, "grad_norm": 12.208451271057129, "learning_rate": 5.249696522680744e-05, "loss": 32.6016, "step": 3068 }, { "epoch": 0.492478035864725, "grad_norm": 11.945496559143066, "learning_rate": 5.247137631142661e-05, "loss": 32.6758, "step": 3069 }, { "epoch": 0.4926385044329442, "grad_norm": 12.074607849121094, "learning_rate": 5.244578674714844e-05, "loss": 32.625, "step": 3070 }, { "epoch": 0.4927989730011634, "grad_norm": 12.148959159851074, "learning_rate": 5.2420196540691894e-05, "loss": 32.6406, "step": 3071 }, { "epoch": 0.4929594415693826, "grad_norm": 12.319865226745605, "learning_rate": 5.2394605698776034e-05, "loss": 32.6406, "step": 3072 }, { "epoch": 0.4931199101376018, "grad_norm": 12.5279541015625, "learning_rate": 5.236901422812014e-05, "loss": 32.6562, "step": 3073 }, { "epoch": 0.493280378705821, "grad_norm": 12.202109336853027, "learning_rate": 5.234342213544363e-05, "loss": 32.6094, "step": 3074 }, { "epoch": 0.4934408472740402, "grad_norm": 12.589198112487793, "learning_rate": 5.2317829427466136e-05, "loss": 32.5234, "step": 3075 }, { "epoch": 0.4936013158422594, "grad_norm": 12.009249687194824, "learning_rate": 5.229223611090737e-05, "loss": 32.707, "step": 3076 }, { "epoch": 0.4937617844104786, "grad_norm": 13.081812858581543, "learning_rate": 5.226664219248723e-05, "loss": 32.4844, "step": 3077 }, { "epoch": 0.4939222529786978, "grad_norm": 12.012848854064941, "learning_rate": 5.2241047678925834e-05, "loss": 32.6406, "step": 3078 }, { "epoch": 0.494082721546917, "grad_norm": 12.27226448059082, "learning_rate": 5.221545257694339e-05, "loss": 32.6406, "step": 3079 }, { "epoch": 0.4942431901151362, "grad_norm": 12.455731391906738, "learning_rate": 5.218985689326027e-05, "loss": 32.6562, "step": 3080 }, { "epoch": 0.4944036586833554, "grad_norm": 12.073785781860352, "learning_rate": 5.216426063459703e-05, "loss": 32.6953, "step": 3081 }, { "epoch": 0.4945641272515746, "grad_norm": 12.202544212341309, "learning_rate": 5.2138663807674336e-05, "loss": 32.7031, "step": 3082 }, { "epoch": 0.4947245958197938, "grad_norm": 12.026436805725098, "learning_rate": 5.211306641921303e-05, "loss": 32.6562, "step": 3083 }, { "epoch": 0.494885064388013, "grad_norm": 12.270567893981934, "learning_rate": 5.208746847593409e-05, "loss": 32.6016, "step": 3084 }, { "epoch": 0.4950455329562322, "grad_norm": 12.13742733001709, "learning_rate": 5.206186998455865e-05, "loss": 32.5938, "step": 3085 }, { "epoch": 0.4952060015244514, "grad_norm": 12.1309175491333, "learning_rate": 5.203627095180801e-05, "loss": 32.5859, "step": 3086 }, { "epoch": 0.4953664700926706, "grad_norm": 12.072673797607422, "learning_rate": 5.201067138440352e-05, "loss": 32.6328, "step": 3087 }, { "epoch": 0.4955269386608898, "grad_norm": 12.387251853942871, "learning_rate": 5.198507128906678e-05, "loss": 32.5625, "step": 3088 }, { "epoch": 0.495687407229109, "grad_norm": 12.132598876953125, "learning_rate": 5.195947067251948e-05, "loss": 32.6328, "step": 3089 }, { "epoch": 0.4958478757973282, "grad_norm": 12.098024368286133, "learning_rate": 5.1933869541483426e-05, "loss": 32.7266, "step": 3090 }, { "epoch": 0.4960083443655474, "grad_norm": 12.173127174377441, "learning_rate": 5.1908267902680615e-05, "loss": 32.7969, "step": 3091 }, { "epoch": 0.4961688129337666, "grad_norm": 12.265299797058105, "learning_rate": 5.188266576283315e-05, "loss": 32.6016, "step": 3092 }, { "epoch": 0.4963292815019858, "grad_norm": 12.19953727722168, "learning_rate": 5.185706312866321e-05, "loss": 32.6172, "step": 3093 }, { "epoch": 0.496489750070205, "grad_norm": 12.26952838897705, "learning_rate": 5.18314600068932e-05, "loss": 32.6484, "step": 3094 }, { "epoch": 0.4966502186384242, "grad_norm": 12.270471572875977, "learning_rate": 5.18058564042456e-05, "loss": 32.6719, "step": 3095 }, { "epoch": 0.4968106872066434, "grad_norm": 12.274210929870605, "learning_rate": 5.1780252327443013e-05, "loss": 32.6602, "step": 3096 }, { "epoch": 0.4969711557748626, "grad_norm": 12.4740571975708, "learning_rate": 5.175464778320818e-05, "loss": 32.6875, "step": 3097 }, { "epoch": 0.4971316243430818, "grad_norm": 12.289067268371582, "learning_rate": 5.172904277826397e-05, "loss": 32.8047, "step": 3098 }, { "epoch": 0.497292092911301, "grad_norm": 12.449114799499512, "learning_rate": 5.170343731933337e-05, "loss": 32.6484, "step": 3099 }, { "epoch": 0.4974525614795202, "grad_norm": 12.874300956726074, "learning_rate": 5.1677831413139446e-05, "loss": 32.8125, "step": 3100 }, { "epoch": 0.4976130300477394, "grad_norm": 12.151012420654297, "learning_rate": 5.165222506640547e-05, "loss": 32.8438, "step": 3101 }, { "epoch": 0.4977734986159586, "grad_norm": 12.223907470703125, "learning_rate": 5.1626618285854735e-05, "loss": 32.875, "step": 3102 }, { "epoch": 0.4979339671841778, "grad_norm": 12.292607307434082, "learning_rate": 5.160101107821071e-05, "loss": 32.9688, "step": 3103 }, { "epoch": 0.498094435752397, "grad_norm": 12.346973419189453, "learning_rate": 5.157540345019696e-05, "loss": 32.7734, "step": 3104 }, { "epoch": 0.4982549043206162, "grad_norm": 12.342065811157227, "learning_rate": 5.1549795408537135e-05, "loss": 32.625, "step": 3105 }, { "epoch": 0.4984153728888354, "grad_norm": 12.145355224609375, "learning_rate": 5.152418695995502e-05, "loss": 32.7422, "step": 3106 }, { "epoch": 0.4985758414570546, "grad_norm": 12.217306137084961, "learning_rate": 5.149857811117451e-05, "loss": 32.7969, "step": 3107 }, { "epoch": 0.4987363100252738, "grad_norm": 12.083992004394531, "learning_rate": 5.14729688689196e-05, "loss": 32.918, "step": 3108 }, { "epoch": 0.498896778593493, "grad_norm": 12.298340797424316, "learning_rate": 5.1447359239914384e-05, "loss": 32.6641, "step": 3109 }, { "epoch": 0.4990572471617122, "grad_norm": 12.157227516174316, "learning_rate": 5.1421749230883045e-05, "loss": 32.7109, "step": 3110 }, { "epoch": 0.4992177157299314, "grad_norm": 12.52773380279541, "learning_rate": 5.139613884854989e-05, "loss": 32.6641, "step": 3111 }, { "epoch": 0.4993781842981506, "grad_norm": 12.225043296813965, "learning_rate": 5.1370528099639313e-05, "loss": 32.6406, "step": 3112 }, { "epoch": 0.4995386528663698, "grad_norm": 12.010137557983398, "learning_rate": 5.1344916990875805e-05, "loss": 32.6875, "step": 3113 }, { "epoch": 0.499699121434589, "grad_norm": 12.411381721496582, "learning_rate": 5.131930552898393e-05, "loss": 32.7031, "step": 3114 }, { "epoch": 0.4998595900028082, "grad_norm": 12.412568092346191, "learning_rate": 5.1293693720688395e-05, "loss": 32.5781, "step": 3115 }, { "epoch": 0.5000200585710274, "grad_norm": 12.574954986572266, "learning_rate": 5.1268081572713974e-05, "loss": 32.7188, "step": 3116 }, { "epoch": 0.5001805271392465, "grad_norm": 12.39391803741455, "learning_rate": 5.1242469091785484e-05, "loss": 32.5859, "step": 3117 }, { "epoch": 0.5003409957074658, "grad_norm": 12.077997207641602, "learning_rate": 5.12168562846279e-05, "loss": 32.6016, "step": 3118 }, { "epoch": 0.500501464275685, "grad_norm": 12.019993782043457, "learning_rate": 5.119124315796626e-05, "loss": 32.7344, "step": 3119 }, { "epoch": 0.5006619328439041, "grad_norm": 12.20028018951416, "learning_rate": 5.116562971852565e-05, "loss": 32.5781, "step": 3120 }, { "epoch": 0.5008224014121234, "grad_norm": 12.006571769714355, "learning_rate": 5.114001597303128e-05, "loss": 32.6953, "step": 3121 }, { "epoch": 0.5009828699803426, "grad_norm": 12.336376190185547, "learning_rate": 5.1114401928208454e-05, "loss": 32.6797, "step": 3122 }, { "epoch": 0.5011433385485617, "grad_norm": 12.196489334106445, "learning_rate": 5.108878759078249e-05, "loss": 32.625, "step": 3123 }, { "epoch": 0.501303807116781, "grad_norm": 11.949687004089355, "learning_rate": 5.1063172967478836e-05, "loss": 32.6992, "step": 3124 }, { "epoch": 0.5014642756850002, "grad_norm": 12.342007637023926, "learning_rate": 5.103755806502299e-05, "loss": 32.5586, "step": 3125 }, { "epoch": 0.5016247442532193, "grad_norm": 12.07333755493164, "learning_rate": 5.1011942890140555e-05, "loss": 32.6719, "step": 3126 }, { "epoch": 0.5017852128214386, "grad_norm": 12.020318984985352, "learning_rate": 5.098632744955716e-05, "loss": 32.6406, "step": 3127 }, { "epoch": 0.5019456813896578, "grad_norm": 12.010516166687012, "learning_rate": 5.0960711749998545e-05, "loss": 32.6719, "step": 3128 }, { "epoch": 0.502106149957877, "grad_norm": 12.012649536132812, "learning_rate": 5.093509579819049e-05, "loss": 32.6875, "step": 3129 }, { "epoch": 0.5022666185260962, "grad_norm": 12.132214546203613, "learning_rate": 5.0909479600858854e-05, "loss": 32.6094, "step": 3130 }, { "epoch": 0.5024270870943154, "grad_norm": 12.391837120056152, "learning_rate": 5.088386316472955e-05, "loss": 32.6094, "step": 3131 }, { "epoch": 0.5025875556625345, "grad_norm": 12.083276748657227, "learning_rate": 5.085824649652857e-05, "loss": 32.7344, "step": 3132 }, { "epoch": 0.5027480242307538, "grad_norm": 12.013423919677734, "learning_rate": 5.083262960298198e-05, "loss": 32.7188, "step": 3133 }, { "epoch": 0.502908492798973, "grad_norm": 12.588258743286133, "learning_rate": 5.080701249081584e-05, "loss": 32.5781, "step": 3134 }, { "epoch": 0.5030689613671921, "grad_norm": 12.070131301879883, "learning_rate": 5.078139516675635e-05, "loss": 32.6406, "step": 3135 }, { "epoch": 0.5032294299354114, "grad_norm": 12.334019660949707, "learning_rate": 5.07557776375297e-05, "loss": 32.5859, "step": 3136 }, { "epoch": 0.5033898985036306, "grad_norm": 12.148571968078613, "learning_rate": 5.073015990986217e-05, "loss": 32.6562, "step": 3137 }, { "epoch": 0.5035503670718497, "grad_norm": 12.256649017333984, "learning_rate": 5.070454199048009e-05, "loss": 32.5938, "step": 3138 }, { "epoch": 0.503710835640069, "grad_norm": 12.26246452331543, "learning_rate": 5.067892388610984e-05, "loss": 32.5938, "step": 3139 }, { "epoch": 0.5038713042082882, "grad_norm": 12.257270812988281, "learning_rate": 5.065330560347782e-05, "loss": 32.6094, "step": 3140 }, { "epoch": 0.5040317727765073, "grad_norm": 12.143379211425781, "learning_rate": 5.062768714931049e-05, "loss": 32.5938, "step": 3141 }, { "epoch": 0.5041922413447266, "grad_norm": 12.59839916229248, "learning_rate": 5.060206853033439e-05, "loss": 32.6328, "step": 3142 }, { "epoch": 0.5043527099129458, "grad_norm": 12.254678726196289, "learning_rate": 5.0576449753276076e-05, "loss": 32.5859, "step": 3143 }, { "epoch": 0.504513178481165, "grad_norm": 12.386138916015625, "learning_rate": 5.055083082486213e-05, "loss": 32.5703, "step": 3144 }, { "epoch": 0.5046736470493842, "grad_norm": 12.143272399902344, "learning_rate": 5.052521175181921e-05, "loss": 32.6328, "step": 3145 }, { "epoch": 0.5048341156176034, "grad_norm": 12.09825611114502, "learning_rate": 5.0499592540873984e-05, "loss": 32.6719, "step": 3146 }, { "epoch": 0.5049945841858225, "grad_norm": 12.27558422088623, "learning_rate": 5.047397319875314e-05, "loss": 32.6406, "step": 3147 }, { "epoch": 0.5051550527540418, "grad_norm": 12.590792655944824, "learning_rate": 5.044835373218346e-05, "loss": 32.6484, "step": 3148 }, { "epoch": 0.505315521322261, "grad_norm": 12.171749114990234, "learning_rate": 5.04227341478917e-05, "loss": 32.7891, "step": 3149 }, { "epoch": 0.5054759898904801, "grad_norm": 12.239598274230957, "learning_rate": 5.03971144526047e-05, "loss": 33.0352, "step": 3150 }, { "epoch": 0.5056364584586994, "grad_norm": 12.68615436553955, "learning_rate": 5.037149465304926e-05, "loss": 32.7969, "step": 3151 }, { "epoch": 0.5057969270269186, "grad_norm": 12.047430038452148, "learning_rate": 5.034587475595227e-05, "loss": 32.7969, "step": 3152 }, { "epoch": 0.5059573955951377, "grad_norm": 12.408615112304688, "learning_rate": 5.032025476804062e-05, "loss": 32.6641, "step": 3153 }, { "epoch": 0.506117864163357, "grad_norm": 12.595785140991211, "learning_rate": 5.029463469604122e-05, "loss": 32.7109, "step": 3154 }, { "epoch": 0.5062783327315762, "grad_norm": 12.437198638916016, "learning_rate": 5.0269014546681015e-05, "loss": 32.8594, "step": 3155 }, { "epoch": 0.5064388012997953, "grad_norm": 12.21882438659668, "learning_rate": 5.0243394326686965e-05, "loss": 32.7422, "step": 3156 }, { "epoch": 0.5065992698680146, "grad_norm": 12.604170799255371, "learning_rate": 5.021777404278605e-05, "loss": 32.6328, "step": 3157 }, { "epoch": 0.5067597384362338, "grad_norm": 12.085110664367676, "learning_rate": 5.019215370170525e-05, "loss": 32.7227, "step": 3158 }, { "epoch": 0.506920207004453, "grad_norm": 12.418082237243652, "learning_rate": 5.0166533310171585e-05, "loss": 32.7188, "step": 3159 }, { "epoch": 0.5070806755726722, "grad_norm": 12.145774841308594, "learning_rate": 5.0140912874912074e-05, "loss": 32.7422, "step": 3160 }, { "epoch": 0.5072411441408914, "grad_norm": 12.090315818786621, "learning_rate": 5.0115292402653736e-05, "loss": 32.6562, "step": 3161 }, { "epoch": 0.5074016127091105, "grad_norm": 12.06881046295166, "learning_rate": 5.008967190012365e-05, "loss": 32.6797, "step": 3162 }, { "epoch": 0.5075620812773298, "grad_norm": 12.274384498596191, "learning_rate": 5.006405137404885e-05, "loss": 32.5898, "step": 3163 }, { "epoch": 0.507722549845549, "grad_norm": 12.077564239501953, "learning_rate": 5.0038430831156366e-05, "loss": 32.6719, "step": 3164 }, { "epoch": 0.5078830184137682, "grad_norm": 12.21032428741455, "learning_rate": 5.001281027817331e-05, "loss": 32.7109, "step": 3165 }, { "epoch": 0.5080434869819874, "grad_norm": 12.20373821258545, "learning_rate": 4.99871897218267e-05, "loss": 32.7656, "step": 3166 }, { "epoch": 0.5082039555502066, "grad_norm": 12.262704849243164, "learning_rate": 4.996156916884364e-05, "loss": 32.6016, "step": 3167 }, { "epoch": 0.5083644241184258, "grad_norm": 12.55046558380127, "learning_rate": 4.993594862595116e-05, "loss": 32.6328, "step": 3168 }, { "epoch": 0.508524892686645, "grad_norm": 12.39217758178711, "learning_rate": 4.991032809987635e-05, "loss": 32.7266, "step": 3169 }, { "epoch": 0.5086853612548642, "grad_norm": 12.205659866333008, "learning_rate": 4.988470759734626e-05, "loss": 32.6953, "step": 3170 }, { "epoch": 0.5088458298230834, "grad_norm": 11.95400333404541, "learning_rate": 4.985908712508794e-05, "loss": 32.6797, "step": 3171 }, { "epoch": 0.5090062983913026, "grad_norm": 12.215934753417969, "learning_rate": 4.9833466689828433e-05, "loss": 32.7891, "step": 3172 }, { "epoch": 0.5091667669595218, "grad_norm": 12.40870475769043, "learning_rate": 4.980784629829477e-05, "loss": 32.668, "step": 3173 }, { "epoch": 0.509327235527741, "grad_norm": 12.071495056152344, "learning_rate": 4.9782225957213964e-05, "loss": 32.6484, "step": 3174 }, { "epoch": 0.5094877040959602, "grad_norm": 12.460526466369629, "learning_rate": 4.975660567331304e-05, "loss": 32.6641, "step": 3175 }, { "epoch": 0.5096481726641794, "grad_norm": 12.270561218261719, "learning_rate": 4.973098545331899e-05, "loss": 32.6094, "step": 3176 }, { "epoch": 0.5098086412323986, "grad_norm": 12.130407333374023, "learning_rate": 4.970536530395879e-05, "loss": 32.6016, "step": 3177 }, { "epoch": 0.5099691098006178, "grad_norm": 12.326557159423828, "learning_rate": 4.9679745231959384e-05, "loss": 32.6367, "step": 3178 }, { "epoch": 0.510129578368837, "grad_norm": 12.21677303314209, "learning_rate": 4.965412524404774e-05, "loss": 32.7422, "step": 3179 }, { "epoch": 0.5102900469370562, "grad_norm": 12.071439743041992, "learning_rate": 4.962850534695075e-05, "loss": 32.7188, "step": 3180 }, { "epoch": 0.5104505155052754, "grad_norm": 12.288900375366211, "learning_rate": 4.960288554739532e-05, "loss": 32.6016, "step": 3181 }, { "epoch": 0.5106109840734946, "grad_norm": 12.069438934326172, "learning_rate": 4.9577265852108296e-05, "loss": 32.6016, "step": 3182 }, { "epoch": 0.5107714526417138, "grad_norm": 12.076736450195312, "learning_rate": 4.9551646267816545e-05, "loss": 32.6953, "step": 3183 }, { "epoch": 0.510931921209933, "grad_norm": 12.14231014251709, "learning_rate": 4.952602680124687e-05, "loss": 32.5938, "step": 3184 }, { "epoch": 0.5110923897781522, "grad_norm": 12.145395278930664, "learning_rate": 4.950040745912604e-05, "loss": 32.6406, "step": 3185 }, { "epoch": 0.5112528583463714, "grad_norm": 12.01669692993164, "learning_rate": 4.9474788248180806e-05, "loss": 32.6562, "step": 3186 }, { "epoch": 0.5114133269145906, "grad_norm": 12.144721984863281, "learning_rate": 4.944916917513788e-05, "loss": 32.6953, "step": 3187 }, { "epoch": 0.5115737954828098, "grad_norm": 12.464412689208984, "learning_rate": 4.9423550246723935e-05, "loss": 32.6172, "step": 3188 }, { "epoch": 0.511734264051029, "grad_norm": 12.140079498291016, "learning_rate": 4.9397931469665615e-05, "loss": 32.6289, "step": 3189 }, { "epoch": 0.5118947326192482, "grad_norm": 12.400510787963867, "learning_rate": 4.937231285068952e-05, "loss": 32.5859, "step": 3190 }, { "epoch": 0.5120552011874674, "grad_norm": 12.197160720825195, "learning_rate": 4.9346694396522194e-05, "loss": 32.6094, "step": 3191 }, { "epoch": 0.5122156697556866, "grad_norm": 12.141490936279297, "learning_rate": 4.932107611389017e-05, "loss": 32.6562, "step": 3192 }, { "epoch": 0.5123761383239058, "grad_norm": 12.393607139587402, "learning_rate": 4.929545800951991e-05, "loss": 32.5469, "step": 3193 }, { "epoch": 0.512536606892125, "grad_norm": 12.205057144165039, "learning_rate": 4.9269840090137834e-05, "loss": 32.6641, "step": 3194 }, { "epoch": 0.5126970754603442, "grad_norm": 12.387913703918457, "learning_rate": 4.924422236247032e-05, "loss": 32.5938, "step": 3195 }, { "epoch": 0.5128575440285634, "grad_norm": 12.068414688110352, "learning_rate": 4.921860483324368e-05, "loss": 32.6172, "step": 3196 }, { "epoch": 0.5130180125967826, "grad_norm": 12.271855354309082, "learning_rate": 4.9192987509184165e-05, "loss": 32.7031, "step": 3197 }, { "epoch": 0.5131784811650018, "grad_norm": 12.256938934326172, "learning_rate": 4.9167370397018035e-05, "loss": 32.5938, "step": 3198 }, { "epoch": 0.513338949733221, "grad_norm": 11.960258483886719, "learning_rate": 4.914175350347143e-05, "loss": 32.8828, "step": 3199 }, { "epoch": 0.5134994183014402, "grad_norm": 21.230365753173828, "learning_rate": 4.9116136835270466e-05, "loss": 33.0195, "step": 3200 }, { "epoch": 0.5136598868696594, "grad_norm": 12.061826705932617, "learning_rate": 4.909052039914116e-05, "loss": 33.1797, "step": 3201 }, { "epoch": 0.5138203554378786, "grad_norm": 12.227727890014648, "learning_rate": 4.906490420180952e-05, "loss": 32.7422, "step": 3202 }, { "epoch": 0.5139808240060978, "grad_norm": 12.593948364257812, "learning_rate": 4.9039288250001466e-05, "loss": 32.7812, "step": 3203 }, { "epoch": 0.514141292574317, "grad_norm": 12.044966697692871, "learning_rate": 4.9013672550442854e-05, "loss": 32.8203, "step": 3204 }, { "epoch": 0.5143017611425362, "grad_norm": 12.417328834533691, "learning_rate": 4.898805710985945e-05, "loss": 32.8203, "step": 3205 }, { "epoch": 0.5144622297107554, "grad_norm": 12.206353187561035, "learning_rate": 4.896244193497701e-05, "loss": 32.6797, "step": 3206 }, { "epoch": 0.5146226982789746, "grad_norm": 12.306853294372559, "learning_rate": 4.893682703252118e-05, "loss": 32.75, "step": 3207 }, { "epoch": 0.5147831668471938, "grad_norm": 12.614336967468262, "learning_rate": 4.891121240921753e-05, "loss": 32.8125, "step": 3208 }, { "epoch": 0.514943635415413, "grad_norm": 11.883138656616211, "learning_rate": 4.8885598071791564e-05, "loss": 32.7734, "step": 3209 }, { "epoch": 0.5151041039836322, "grad_norm": 12.277851104736328, "learning_rate": 4.885998402696872e-05, "loss": 32.75, "step": 3210 }, { "epoch": 0.5152645725518514, "grad_norm": 12.13554859161377, "learning_rate": 4.883437028147435e-05, "loss": 32.6797, "step": 3211 }, { "epoch": 0.5154250411200706, "grad_norm": 12.14285659790039, "learning_rate": 4.880875684203375e-05, "loss": 32.6641, "step": 3212 }, { "epoch": 0.5155855096882898, "grad_norm": 12.144835472106934, "learning_rate": 4.8783143715372106e-05, "loss": 32.7031, "step": 3213 }, { "epoch": 0.515745978256509, "grad_norm": 12.215812683105469, "learning_rate": 4.875753090821453e-05, "loss": 32.6445, "step": 3214 }, { "epoch": 0.5159064468247282, "grad_norm": 12.529195785522461, "learning_rate": 4.873191842728603e-05, "loss": 32.7148, "step": 3215 }, { "epoch": 0.5160669153929474, "grad_norm": 12.142955780029297, "learning_rate": 4.8706306279311596e-05, "loss": 32.6484, "step": 3216 }, { "epoch": 0.5162273839611666, "grad_norm": 12.22248363494873, "learning_rate": 4.868069447101607e-05, "loss": 32.6562, "step": 3217 }, { "epoch": 0.5163878525293858, "grad_norm": 12.276505470275879, "learning_rate": 4.8655083009124206e-05, "loss": 32.5625, "step": 3218 }, { "epoch": 0.516548321097605, "grad_norm": 12.491209983825684, "learning_rate": 4.8629471900360705e-05, "loss": 32.6328, "step": 3219 }, { "epoch": 0.5167087896658242, "grad_norm": 12.336277961730957, "learning_rate": 4.8603861151450125e-05, "loss": 32.6328, "step": 3220 }, { "epoch": 0.5168692582340434, "grad_norm": 12.34106159210205, "learning_rate": 4.857825076911696e-05, "loss": 32.5391, "step": 3221 }, { "epoch": 0.5170297268022626, "grad_norm": 12.096207618713379, "learning_rate": 4.855264076008563e-05, "loss": 32.7344, "step": 3222 }, { "epoch": 0.5171901953704818, "grad_norm": 12.403036117553711, "learning_rate": 4.852703113108041e-05, "loss": 32.625, "step": 3223 }, { "epoch": 0.517350663938701, "grad_norm": 12.329678535461426, "learning_rate": 4.85014218888255e-05, "loss": 32.6406, "step": 3224 }, { "epoch": 0.5175111325069202, "grad_norm": 12.194215774536133, "learning_rate": 4.847581304004498e-05, "loss": 32.6016, "step": 3225 }, { "epoch": 0.5176716010751394, "grad_norm": 12.460897445678711, "learning_rate": 4.8450204591462877e-05, "loss": 32.7109, "step": 3226 }, { "epoch": 0.5178320696433586, "grad_norm": 12.25877571105957, "learning_rate": 4.8424596549803055e-05, "loss": 32.6328, "step": 3227 }, { "epoch": 0.5179925382115778, "grad_norm": 12.2019624710083, "learning_rate": 4.839898892178929e-05, "loss": 32.625, "step": 3228 }, { "epoch": 0.518153006779797, "grad_norm": 12.52374267578125, "learning_rate": 4.837338171414526e-05, "loss": 32.6641, "step": 3229 }, { "epoch": 0.5183134753480162, "grad_norm": 12.51473331451416, "learning_rate": 4.834777493359454e-05, "loss": 32.5859, "step": 3230 }, { "epoch": 0.5184739439162354, "grad_norm": 12.193907737731934, "learning_rate": 4.832216858686056e-05, "loss": 32.6523, "step": 3231 }, { "epoch": 0.5186344124844546, "grad_norm": 12.008614540100098, "learning_rate": 4.8296562680666654e-05, "loss": 32.625, "step": 3232 }, { "epoch": 0.5187948810526738, "grad_norm": 12.13473129272461, "learning_rate": 4.827095722173605e-05, "loss": 32.6055, "step": 3233 }, { "epoch": 0.518955349620893, "grad_norm": 12.394466400146484, "learning_rate": 4.824535221679184e-05, "loss": 32.5469, "step": 3234 }, { "epoch": 0.5191158181891122, "grad_norm": 12.02106761932373, "learning_rate": 4.8219747672557e-05, "loss": 32.6602, "step": 3235 }, { "epoch": 0.5192762867573314, "grad_norm": 12.011252403259277, "learning_rate": 4.8194143595754414e-05, "loss": 32.6562, "step": 3236 }, { "epoch": 0.5194367553255506, "grad_norm": 12.268196105957031, "learning_rate": 4.8168539993106806e-05, "loss": 32.6016, "step": 3237 }, { "epoch": 0.5195972238937698, "grad_norm": 12.284062385559082, "learning_rate": 4.8142936871336804e-05, "loss": 32.6641, "step": 3238 }, { "epoch": 0.519757692461989, "grad_norm": 12.323627471923828, "learning_rate": 4.811733423716687e-05, "loss": 32.6875, "step": 3239 }, { "epoch": 0.5199181610302082, "grad_norm": 12.2608642578125, "learning_rate": 4.809173209731938e-05, "loss": 32.5703, "step": 3240 }, { "epoch": 0.5200786295984274, "grad_norm": 12.129973411560059, "learning_rate": 4.806613045851657e-05, "loss": 32.6094, "step": 3241 }, { "epoch": 0.5202390981666466, "grad_norm": 12.326172828674316, "learning_rate": 4.804052932748052e-05, "loss": 32.5938, "step": 3242 }, { "epoch": 0.5203995667348658, "grad_norm": 12.318625450134277, "learning_rate": 4.8014928710933235e-05, "loss": 32.5391, "step": 3243 }, { "epoch": 0.520560035303085, "grad_norm": 12.14233112335205, "learning_rate": 4.798932861559651e-05, "loss": 32.6328, "step": 3244 }, { "epoch": 0.5207205038713042, "grad_norm": 12.088244438171387, "learning_rate": 4.7963729048192016e-05, "loss": 32.8047, "step": 3245 }, { "epoch": 0.5208809724395234, "grad_norm": 12.197029113769531, "learning_rate": 4.793813001544136e-05, "loss": 32.5781, "step": 3246 }, { "epoch": 0.5210414410077426, "grad_norm": 12.071378707885742, "learning_rate": 4.791253152406592e-05, "loss": 32.5859, "step": 3247 }, { "epoch": 0.5212019095759618, "grad_norm": 12.19015884399414, "learning_rate": 4.788693358078698e-05, "loss": 32.6328, "step": 3248 }, { "epoch": 0.521362378144181, "grad_norm": 12.415900230407715, "learning_rate": 4.786133619232568e-05, "loss": 32.8242, "step": 3249 }, { "epoch": 0.5215228467124002, "grad_norm": 16.865476608276367, "learning_rate": 4.7835739365402987e-05, "loss": 33.0742, "step": 3250 }, { "epoch": 0.5216833152806194, "grad_norm": 12.213895797729492, "learning_rate": 4.781014310673974e-05, "loss": 32.7578, "step": 3251 }, { "epoch": 0.5218437838488386, "grad_norm": 12.445226669311523, "learning_rate": 4.7784547423056614e-05, "loss": 32.8047, "step": 3252 }, { "epoch": 0.5220042524170578, "grad_norm": 11.953008651733398, "learning_rate": 4.775895232107417e-05, "loss": 32.8438, "step": 3253 }, { "epoch": 0.522164720985277, "grad_norm": 12.290484428405762, "learning_rate": 4.773335780751278e-05, "loss": 32.8828, "step": 3254 }, { "epoch": 0.5223251895534962, "grad_norm": 12.49337387084961, "learning_rate": 4.770776388909266e-05, "loss": 32.7969, "step": 3255 }, { "epoch": 0.5224856581217154, "grad_norm": 12.414667129516602, "learning_rate": 4.768217057253388e-05, "loss": 32.7109, "step": 3256 }, { "epoch": 0.5226461266899346, "grad_norm": 12.347360610961914, "learning_rate": 4.7656577864556376e-05, "loss": 32.707, "step": 3257 }, { "epoch": 0.5228065952581538, "grad_norm": 12.080144882202148, "learning_rate": 4.7630985771879865e-05, "loss": 32.6484, "step": 3258 }, { "epoch": 0.522967063826373, "grad_norm": 12.270941734313965, "learning_rate": 4.760539430122398e-05, "loss": 32.7266, "step": 3259 }, { "epoch": 0.5231275323945922, "grad_norm": 12.27366828918457, "learning_rate": 4.7579803459308125e-05, "loss": 32.6797, "step": 3260 }, { "epoch": 0.5232880009628114, "grad_norm": 12.136326789855957, "learning_rate": 4.755421325285157e-05, "loss": 32.6914, "step": 3261 }, { "epoch": 0.5234484695310306, "grad_norm": 12.20160961151123, "learning_rate": 4.75286236885734e-05, "loss": 32.7422, "step": 3262 }, { "epoch": 0.5236089380992498, "grad_norm": 11.943547248840332, "learning_rate": 4.750303477319257e-05, "loss": 32.6953, "step": 3263 }, { "epoch": 0.523769406667469, "grad_norm": 12.140074729919434, "learning_rate": 4.7477446513427814e-05, "loss": 32.6484, "step": 3264 }, { "epoch": 0.5239298752356882, "grad_norm": 12.163237571716309, "learning_rate": 4.745185891599773e-05, "loss": 32.6953, "step": 3265 }, { "epoch": 0.5240903438039074, "grad_norm": 12.149867057800293, "learning_rate": 4.742627198762073e-05, "loss": 32.6328, "step": 3266 }, { "epoch": 0.5242508123721266, "grad_norm": 12.144353866577148, "learning_rate": 4.740068573501504e-05, "loss": 32.6406, "step": 3267 }, { "epoch": 0.5244112809403458, "grad_norm": 12.227357864379883, "learning_rate": 4.7375100164898714e-05, "loss": 32.6641, "step": 3268 }, { "epoch": 0.524571749508565, "grad_norm": 12.269204139709473, "learning_rate": 4.734951528398965e-05, "loss": 32.6172, "step": 3269 }, { "epoch": 0.5247322180767842, "grad_norm": 12.548699378967285, "learning_rate": 4.7323931099005535e-05, "loss": 32.5859, "step": 3270 }, { "epoch": 0.5248926866450034, "grad_norm": 12.133208274841309, "learning_rate": 4.729834761666389e-05, "loss": 32.5859, "step": 3271 }, { "epoch": 0.5250531552132226, "grad_norm": 12.266404151916504, "learning_rate": 4.727276484368203e-05, "loss": 32.5938, "step": 3272 }, { "epoch": 0.5252136237814418, "grad_norm": 12.076913833618164, "learning_rate": 4.7247182786777125e-05, "loss": 32.6797, "step": 3273 }, { "epoch": 0.525374092349661, "grad_norm": 12.070122718811035, "learning_rate": 4.722160145266612e-05, "loss": 32.6641, "step": 3274 }, { "epoch": 0.5255345609178802, "grad_norm": 12.030816078186035, "learning_rate": 4.719602084806579e-05, "loss": 32.7344, "step": 3275 }, { "epoch": 0.5256950294860994, "grad_norm": 12.134939193725586, "learning_rate": 4.717044097969268e-05, "loss": 32.6328, "step": 3276 }, { "epoch": 0.5258554980543186, "grad_norm": 12.137334823608398, "learning_rate": 4.714486185426322e-05, "loss": 32.5859, "step": 3277 }, { "epoch": 0.5260159666225378, "grad_norm": 16.831050872802734, "learning_rate": 4.711928347849357e-05, "loss": 32.6953, "step": 3278 }, { "epoch": 0.526176435190757, "grad_norm": 12.538810729980469, "learning_rate": 4.709370585909972e-05, "loss": 32.6094, "step": 3279 }, { "epoch": 0.5263369037589762, "grad_norm": 12.005887985229492, "learning_rate": 4.706812900279746e-05, "loss": 32.6719, "step": 3280 }, { "epoch": 0.5264973723271954, "grad_norm": 12.030732154846191, "learning_rate": 4.704255291630239e-05, "loss": 32.7266, "step": 3281 }, { "epoch": 0.5266578408954146, "grad_norm": 12.197125434875488, "learning_rate": 4.7016977606329885e-05, "loss": 32.5781, "step": 3282 }, { "epoch": 0.5268183094636338, "grad_norm": 12.00639820098877, "learning_rate": 4.699140307959515e-05, "loss": 32.6328, "step": 3283 }, { "epoch": 0.526978778031853, "grad_norm": 12.297978401184082, "learning_rate": 4.6965829342813156e-05, "loss": 32.6953, "step": 3284 }, { "epoch": 0.5271392466000722, "grad_norm": 12.005636215209961, "learning_rate": 4.6940256402698675e-05, "loss": 32.5938, "step": 3285 }, { "epoch": 0.5272997151682914, "grad_norm": 12.12985897064209, "learning_rate": 4.6914684265966254e-05, "loss": 32.5703, "step": 3286 }, { "epoch": 0.5274601837365106, "grad_norm": 12.209407806396484, "learning_rate": 4.688911293933029e-05, "loss": 32.6094, "step": 3287 }, { "epoch": 0.5276206523047298, "grad_norm": 12.453469276428223, "learning_rate": 4.686354242950488e-05, "loss": 32.6094, "step": 3288 }, { "epoch": 0.527781120872949, "grad_norm": 12.471870422363281, "learning_rate": 4.6837972743203965e-05, "loss": 32.6953, "step": 3289 }, { "epoch": 0.5279415894411682, "grad_norm": 12.135765075683594, "learning_rate": 4.6812403887141266e-05, "loss": 32.6406, "step": 3290 }, { "epoch": 0.5281020580093874, "grad_norm": 12.327762603759766, "learning_rate": 4.678683586803024e-05, "loss": 32.5547, "step": 3291 }, { "epoch": 0.5282625265776066, "grad_norm": 12.383560180664062, "learning_rate": 4.676126869258418e-05, "loss": 32.5742, "step": 3292 }, { "epoch": 0.5284229951458258, "grad_norm": 12.196882247924805, "learning_rate": 4.6735702367516136e-05, "loss": 32.6172, "step": 3293 }, { "epoch": 0.528583463714045, "grad_norm": 12.15123176574707, "learning_rate": 4.671013689953894e-05, "loss": 32.8047, "step": 3294 }, { "epoch": 0.5287439322822642, "grad_norm": 12.200591087341309, "learning_rate": 4.668457229536517e-05, "loss": 32.6562, "step": 3295 }, { "epoch": 0.5289044008504834, "grad_norm": 12.19261360168457, "learning_rate": 4.6659008561707226e-05, "loss": 32.6719, "step": 3296 }, { "epoch": 0.5290648694187026, "grad_norm": 12.200567245483398, "learning_rate": 4.663344570527724e-05, "loss": 32.6641, "step": 3297 }, { "epoch": 0.5292253379869218, "grad_norm": 12.143678665161133, "learning_rate": 4.6607883732787134e-05, "loss": 32.8203, "step": 3298 }, { "epoch": 0.529385806555141, "grad_norm": 12.725349426269531, "learning_rate": 4.658232265094858e-05, "loss": 32.6406, "step": 3299 }, { "epoch": 0.5295462751233602, "grad_norm": 12.161802291870117, "learning_rate": 4.655676246647304e-05, "loss": 32.8438, "step": 3300 }, { "epoch": 0.5297067436915794, "grad_norm": 12.45149040222168, "learning_rate": 4.653120318607173e-05, "loss": 33.0469, "step": 3301 }, { "epoch": 0.5298672122597986, "grad_norm": 12.24856948852539, "learning_rate": 4.650564481645563e-05, "loss": 32.8594, "step": 3302 }, { "epoch": 0.5300276808280178, "grad_norm": 12.027243614196777, "learning_rate": 4.648008736433546e-05, "loss": 32.7578, "step": 3303 }, { "epoch": 0.530188149396237, "grad_norm": 12.694765090942383, "learning_rate": 4.645453083642173e-05, "loss": 32.7031, "step": 3304 }, { "epoch": 0.5303486179644562, "grad_norm": 12.341439247131348, "learning_rate": 4.642897523942467e-05, "loss": 32.7109, "step": 3305 }, { "epoch": 0.5305090865326754, "grad_norm": 12.16045093536377, "learning_rate": 4.640342058005432e-05, "loss": 32.75, "step": 3306 }, { "epoch": 0.5306695551008946, "grad_norm": 12.149210929870605, "learning_rate": 4.637786686502043e-05, "loss": 32.7656, "step": 3307 }, { "epoch": 0.5308300236691138, "grad_norm": 12.27546215057373, "learning_rate": 4.635231410103252e-05, "loss": 32.6406, "step": 3308 }, { "epoch": 0.530990492237333, "grad_norm": 12.150023460388184, "learning_rate": 4.6326762294799844e-05, "loss": 32.7344, "step": 3309 }, { "epoch": 0.5311509608055522, "grad_norm": 12.622673034667969, "learning_rate": 4.630121145303143e-05, "loss": 32.6797, "step": 3310 }, { "epoch": 0.5313114293737714, "grad_norm": 12.893417358398438, "learning_rate": 4.6275661582436044e-05, "loss": 32.5547, "step": 3311 }, { "epoch": 0.5314718979419906, "grad_norm": 12.402213096618652, "learning_rate": 4.625011268972217e-05, "loss": 32.7109, "step": 3312 }, { "epoch": 0.5316323665102098, "grad_norm": 12.647138595581055, "learning_rate": 4.6224564781598086e-05, "loss": 32.5703, "step": 3313 }, { "epoch": 0.531792835078429, "grad_norm": 12.449739456176758, "learning_rate": 4.619901786477175e-05, "loss": 32.625, "step": 3314 }, { "epoch": 0.5319533036466482, "grad_norm": 12.197332382202148, "learning_rate": 4.61734719459509e-05, "loss": 32.6406, "step": 3315 }, { "epoch": 0.5321137722148674, "grad_norm": 12.276908874511719, "learning_rate": 4.6147927031843024e-05, "loss": 32.625, "step": 3316 }, { "epoch": 0.5322742407830866, "grad_norm": 12.203433990478516, "learning_rate": 4.612238312915532e-05, "loss": 32.6875, "step": 3317 }, { "epoch": 0.5324347093513058, "grad_norm": 12.076157569885254, "learning_rate": 4.609684024459471e-05, "loss": 32.6797, "step": 3318 }, { "epoch": 0.532595177919525, "grad_norm": 12.075846672058105, "learning_rate": 4.607129838486787e-05, "loss": 32.6719, "step": 3319 }, { "epoch": 0.5327556464877442, "grad_norm": 12.013787269592285, "learning_rate": 4.604575755668122e-05, "loss": 32.6328, "step": 3320 }, { "epoch": 0.5329161150559634, "grad_norm": 12.077878952026367, "learning_rate": 4.602021776674089e-05, "loss": 32.5781, "step": 3321 }, { "epoch": 0.5330765836241826, "grad_norm": 12.27701473236084, "learning_rate": 4.5994679021752725e-05, "loss": 32.6406, "step": 3322 }, { "epoch": 0.5332370521924018, "grad_norm": 12.079507827758789, "learning_rate": 4.5969141328422306e-05, "loss": 32.6641, "step": 3323 }, { "epoch": 0.533397520760621, "grad_norm": 12.200389862060547, "learning_rate": 4.594360469345496e-05, "loss": 32.6016, "step": 3324 }, { "epoch": 0.5335579893288402, "grad_norm": 12.195257186889648, "learning_rate": 4.5918069123555726e-05, "loss": 32.6172, "step": 3325 }, { "epoch": 0.5337184578970594, "grad_norm": 12.267351150512695, "learning_rate": 4.5892534625429316e-05, "loss": 32.6172, "step": 3326 }, { "epoch": 0.5338789264652786, "grad_norm": 12.1334810256958, "learning_rate": 4.586700120578023e-05, "loss": 32.6406, "step": 3327 }, { "epoch": 0.5340393950334978, "grad_norm": 12.270418167114258, "learning_rate": 4.584146887131264e-05, "loss": 32.5664, "step": 3328 }, { "epoch": 0.534199863601717, "grad_norm": 12.143588066101074, "learning_rate": 4.581593762873045e-05, "loss": 32.6328, "step": 3329 }, { "epoch": 0.5343603321699362, "grad_norm": 12.516153335571289, "learning_rate": 4.5790407484737285e-05, "loss": 32.5703, "step": 3330 }, { "epoch": 0.5345208007381554, "grad_norm": 12.323019981384277, "learning_rate": 4.576487844603647e-05, "loss": 32.6406, "step": 3331 }, { "epoch": 0.5346812693063746, "grad_norm": 12.081475257873535, "learning_rate": 4.573935051933103e-05, "loss": 32.6484, "step": 3332 }, { "epoch": 0.5348417378745938, "grad_norm": 12.278904914855957, "learning_rate": 4.5713823711323706e-05, "loss": 32.6328, "step": 3333 }, { "epoch": 0.535002206442813, "grad_norm": 12.068388938903809, "learning_rate": 4.568829802871697e-05, "loss": 32.5938, "step": 3334 }, { "epoch": 0.5351626750110322, "grad_norm": 12.341952323913574, "learning_rate": 4.566277347821296e-05, "loss": 32.6719, "step": 3335 }, { "epoch": 0.5353231435792514, "grad_norm": 12.07898235321045, "learning_rate": 4.563725006651355e-05, "loss": 32.6875, "step": 3336 }, { "epoch": 0.5354836121474706, "grad_norm": 12.36776351928711, "learning_rate": 4.561172780032029e-05, "loss": 32.8047, "step": 3337 }, { "epoch": 0.5356440807156898, "grad_norm": 12.336898803710938, "learning_rate": 4.558620668633442e-05, "loss": 32.7188, "step": 3338 }, { "epoch": 0.535804549283909, "grad_norm": 12.13687801361084, "learning_rate": 4.556068673125691e-05, "loss": 32.7031, "step": 3339 }, { "epoch": 0.5359650178521282, "grad_norm": 12.197481155395508, "learning_rate": 4.553516794178842e-05, "loss": 32.5547, "step": 3340 }, { "epoch": 0.5361254864203474, "grad_norm": 12.192831993103027, "learning_rate": 4.5509650324629286e-05, "loss": 32.5508, "step": 3341 }, { "epoch": 0.5362859549885666, "grad_norm": 12.330071449279785, "learning_rate": 4.548413388647955e-05, "loss": 32.6094, "step": 3342 }, { "epoch": 0.5364464235567858, "grad_norm": 12.504595756530762, "learning_rate": 4.545861863403892e-05, "loss": 32.5625, "step": 3343 }, { "epoch": 0.536606892125005, "grad_norm": 12.265117645263672, "learning_rate": 4.543310457400683e-05, "loss": 32.5781, "step": 3344 }, { "epoch": 0.5367673606932242, "grad_norm": 12.384075164794922, "learning_rate": 4.5407591713082395e-05, "loss": 32.5391, "step": 3345 }, { "epoch": 0.5369278292614434, "grad_norm": 12.019896507263184, "learning_rate": 4.538208005796438e-05, "loss": 32.8203, "step": 3346 }, { "epoch": 0.5370882978296626, "grad_norm": 12.330560684204102, "learning_rate": 4.535656961535128e-05, "loss": 32.6172, "step": 3347 }, { "epoch": 0.5372487663978818, "grad_norm": 12.266079902648926, "learning_rate": 4.533106039194123e-05, "loss": 32.6172, "step": 3348 }, { "epoch": 0.537409234966101, "grad_norm": 12.149341583251953, "learning_rate": 4.5305552394432084e-05, "loss": 32.7812, "step": 3349 }, { "epoch": 0.5375697035343202, "grad_norm": 12.583693504333496, "learning_rate": 4.528004562952132e-05, "loss": 32.9336, "step": 3350 }, { "epoch": 0.5377301721025394, "grad_norm": 12.16651439666748, "learning_rate": 4.525454010390615e-05, "loss": 32.7891, "step": 3351 }, { "epoch": 0.5378906406707586, "grad_norm": 12.238677978515625, "learning_rate": 4.5229035824283424e-05, "loss": 32.7969, "step": 3352 }, { "epoch": 0.5380511092389778, "grad_norm": 12.47253131866455, "learning_rate": 4.520353279734969e-05, "loss": 32.7031, "step": 3353 }, { "epoch": 0.538211577807197, "grad_norm": 12.253324508666992, "learning_rate": 4.517803102980115e-05, "loss": 32.8281, "step": 3354 }, { "epoch": 0.5383720463754162, "grad_norm": 12.598233222961426, "learning_rate": 4.515253052833368e-05, "loss": 32.7344, "step": 3355 }, { "epoch": 0.5385325149436354, "grad_norm": 12.42313003540039, "learning_rate": 4.512703129964281e-05, "loss": 32.6953, "step": 3356 }, { "epoch": 0.5386929835118546, "grad_norm": 12.202323913574219, "learning_rate": 4.510153335042378e-05, "loss": 32.6797, "step": 3357 }, { "epoch": 0.5388534520800738, "grad_norm": 12.277214050292969, "learning_rate": 4.507603668737143e-05, "loss": 32.7188, "step": 3358 }, { "epoch": 0.539013920648293, "grad_norm": 12.398941993713379, "learning_rate": 4.5050541317180314e-05, "loss": 32.6406, "step": 3359 }, { "epoch": 0.5391743892165122, "grad_norm": 12.397350311279297, "learning_rate": 4.5025047246544625e-05, "loss": 32.7031, "step": 3360 }, { "epoch": 0.5393348577847314, "grad_norm": 12.214591979980469, "learning_rate": 4.4999554482158234e-05, "loss": 32.7305, "step": 3361 }, { "epoch": 0.5394953263529506, "grad_norm": 12.391152381896973, "learning_rate": 4.4974063030714606e-05, "loss": 32.6016, "step": 3362 }, { "epoch": 0.5396557949211698, "grad_norm": 12.262369155883789, "learning_rate": 4.4948572898906955e-05, "loss": 32.5781, "step": 3363 }, { "epoch": 0.539816263489389, "grad_norm": 12.204087257385254, "learning_rate": 4.492308409342808e-05, "loss": 32.8125, "step": 3364 }, { "epoch": 0.5399767320576082, "grad_norm": 12.075645446777344, "learning_rate": 4.489759662097046e-05, "loss": 32.6523, "step": 3365 }, { "epoch": 0.5401372006258274, "grad_norm": 12.074418067932129, "learning_rate": 4.487211048822621e-05, "loss": 32.6328, "step": 3366 }, { "epoch": 0.5402976691940466, "grad_norm": 12.274591445922852, "learning_rate": 4.48466257018871e-05, "loss": 32.7344, "step": 3367 }, { "epoch": 0.5404581377622658, "grad_norm": 12.07669448852539, "learning_rate": 4.482114226864456e-05, "loss": 32.6875, "step": 3368 }, { "epoch": 0.540618606330485, "grad_norm": 12.277713775634766, "learning_rate": 4.479566019518965e-05, "loss": 32.6562, "step": 3369 }, { "epoch": 0.5407790748987042, "grad_norm": 12.06838321685791, "learning_rate": 4.477017948821305e-05, "loss": 32.6406, "step": 3370 }, { "epoch": 0.5409395434669234, "grad_norm": 12.139763832092285, "learning_rate": 4.474470015440514e-05, "loss": 32.6758, "step": 3371 }, { "epoch": 0.5411000120351426, "grad_norm": 12.32642650604248, "learning_rate": 4.47192222004559e-05, "loss": 32.6016, "step": 3372 }, { "epoch": 0.5412604806033618, "grad_norm": 12.079667091369629, "learning_rate": 4.4693745633054934e-05, "loss": 32.625, "step": 3373 }, { "epoch": 0.541420949171581, "grad_norm": 12.287654876708984, "learning_rate": 4.4668270458891516e-05, "loss": 32.6328, "step": 3374 }, { "epoch": 0.5415814177398002, "grad_norm": 12.145505905151367, "learning_rate": 4.4642796684654524e-05, "loss": 32.6641, "step": 3375 }, { "epoch": 0.5417418863080194, "grad_norm": 12.209080696105957, "learning_rate": 4.461732431703249e-05, "loss": 32.5977, "step": 3376 }, { "epoch": 0.5419023548762386, "grad_norm": 12.073022842407227, "learning_rate": 4.459185336271358e-05, "loss": 32.6328, "step": 3377 }, { "epoch": 0.5420628234444578, "grad_norm": 12.274564743041992, "learning_rate": 4.456638382838558e-05, "loss": 32.6797, "step": 3378 }, { "epoch": 0.542223292012677, "grad_norm": 12.074118614196777, "learning_rate": 4.454091572073589e-05, "loss": 32.6875, "step": 3379 }, { "epoch": 0.5423837605808962, "grad_norm": 12.209443092346191, "learning_rate": 4.451544904645155e-05, "loss": 32.6016, "step": 3380 }, { "epoch": 0.5425442291491154, "grad_norm": 12.200252532958984, "learning_rate": 4.4489983812219225e-05, "loss": 32.5781, "step": 3381 }, { "epoch": 0.5427046977173346, "grad_norm": 12.334961891174316, "learning_rate": 4.4464520024725206e-05, "loss": 32.6484, "step": 3382 }, { "epoch": 0.5428651662855538, "grad_norm": 12.320162773132324, "learning_rate": 4.4439057690655387e-05, "loss": 32.5938, "step": 3383 }, { "epoch": 0.543025634853773, "grad_norm": 12.009743690490723, "learning_rate": 4.4413596816695304e-05, "loss": 32.6875, "step": 3384 }, { "epoch": 0.5431861034219923, "grad_norm": 12.07384204864502, "learning_rate": 4.438813740953007e-05, "loss": 32.6641, "step": 3385 }, { "epoch": 0.5433465719902114, "grad_norm": 12.1348295211792, "learning_rate": 4.4362679475844445e-05, "loss": 32.625, "step": 3386 }, { "epoch": 0.5435070405584306, "grad_norm": 12.130666732788086, "learning_rate": 4.433722302232281e-05, "loss": 32.6172, "step": 3387 }, { "epoch": 0.5436675091266499, "grad_norm": 12.262802124023438, "learning_rate": 4.4311768055649134e-05, "loss": 32.6719, "step": 3388 }, { "epoch": 0.543827977694869, "grad_norm": 12.073479652404785, "learning_rate": 4.428631458250701e-05, "loss": 32.6406, "step": 3389 }, { "epoch": 0.5439884462630882, "grad_norm": 11.891199111938477, "learning_rate": 4.426086260957961e-05, "loss": 32.6641, "step": 3390 }, { "epoch": 0.5441489148313075, "grad_norm": 12.383193016052246, "learning_rate": 4.4235412143549777e-05, "loss": 32.6133, "step": 3391 }, { "epoch": 0.5443093833995266, "grad_norm": 12.134709358215332, "learning_rate": 4.4209963191099887e-05, "loss": 32.625, "step": 3392 }, { "epoch": 0.5444698519677458, "grad_norm": 12.203280448913574, "learning_rate": 4.4184515758911954e-05, "loss": 32.6875, "step": 3393 }, { "epoch": 0.544630320535965, "grad_norm": 12.258545875549316, "learning_rate": 4.4159069853667584e-05, "loss": 32.6719, "step": 3394 }, { "epoch": 0.5447907891041842, "grad_norm": 12.26595401763916, "learning_rate": 4.4133625482048e-05, "loss": 32.7383, "step": 3395 }, { "epoch": 0.5449512576724034, "grad_norm": 12.446629524230957, "learning_rate": 4.410818265073401e-05, "loss": 32.6328, "step": 3396 }, { "epoch": 0.5451117262406227, "grad_norm": 12.324897766113281, "learning_rate": 4.408274136640598e-05, "loss": 32.625, "step": 3397 }, { "epoch": 0.5452721948088418, "grad_norm": 12.663689613342285, "learning_rate": 4.405730163574394e-05, "loss": 32.7109, "step": 3398 }, { "epoch": 0.545432663377061, "grad_norm": 12.391568183898926, "learning_rate": 4.4031863465427454e-05, "loss": 32.6641, "step": 3399 }, { "epoch": 0.5455931319452803, "grad_norm": 12.26512336730957, "learning_rate": 4.400642686213571e-05, "loss": 33.0, "step": 3400 }, { "epoch": 0.5457536005134994, "grad_norm": 12.379902839660645, "learning_rate": 4.398099183254748e-05, "loss": 33.1562, "step": 3401 }, { "epoch": 0.5459140690817186, "grad_norm": 12.081219673156738, "learning_rate": 4.3955558383341106e-05, "loss": 32.8633, "step": 3402 }, { "epoch": 0.5460745376499379, "grad_norm": 12.282188415527344, "learning_rate": 4.393012652119452e-05, "loss": 32.7266, "step": 3403 }, { "epoch": 0.546235006218157, "grad_norm": 12.293980598449707, "learning_rate": 4.390469625278527e-05, "loss": 32.7891, "step": 3404 }, { "epoch": 0.5463954747863762, "grad_norm": 12.015298843383789, "learning_rate": 4.3879267584790444e-05, "loss": 32.7422, "step": 3405 }, { "epoch": 0.5465559433545955, "grad_norm": 12.26699447631836, "learning_rate": 4.385384052388672e-05, "loss": 32.7422, "step": 3406 }, { "epoch": 0.5467164119228146, "grad_norm": 11.949263572692871, "learning_rate": 4.382841507675034e-05, "loss": 32.7109, "step": 3407 }, { "epoch": 0.5468768804910338, "grad_norm": 12.079423904418945, "learning_rate": 4.380299125005721e-05, "loss": 32.75, "step": 3408 }, { "epoch": 0.547037349059253, "grad_norm": 12.147594451904297, "learning_rate": 4.377756905048265e-05, "loss": 32.8281, "step": 3409 }, { "epoch": 0.5471978176274722, "grad_norm": 12.400127410888672, "learning_rate": 4.37521484847017e-05, "loss": 32.7812, "step": 3410 }, { "epoch": 0.5473582861956914, "grad_norm": 12.080330848693848, "learning_rate": 4.37267295593889e-05, "loss": 32.6875, "step": 3411 }, { "epoch": 0.5475187547639107, "grad_norm": 12.26734733581543, "learning_rate": 4.3701312281218375e-05, "loss": 32.6172, "step": 3412 }, { "epoch": 0.5476792233321298, "grad_norm": 12.21045207977295, "learning_rate": 4.3675896656863814e-05, "loss": 32.7969, "step": 3413 }, { "epoch": 0.547839691900349, "grad_norm": 12.279391288757324, "learning_rate": 4.3650482692998484e-05, "loss": 32.6719, "step": 3414 }, { "epoch": 0.5480001604685683, "grad_norm": 12.211039543151855, "learning_rate": 4.36250703962952e-05, "loss": 32.6875, "step": 3415 }, { "epoch": 0.5481606290367874, "grad_norm": 12.140236854553223, "learning_rate": 4.359965977342635e-05, "loss": 32.6953, "step": 3416 }, { "epoch": 0.5483210976050066, "grad_norm": 12.262643814086914, "learning_rate": 4.357425083106386e-05, "loss": 32.6328, "step": 3417 }, { "epoch": 0.5484815661732259, "grad_norm": 12.282512664794922, "learning_rate": 4.354884357587926e-05, "loss": 32.6602, "step": 3418 }, { "epoch": 0.548642034741445, "grad_norm": 12.017369270324707, "learning_rate": 4.3523438014543605e-05, "loss": 32.625, "step": 3419 }, { "epoch": 0.5488025033096642, "grad_norm": 12.934313774108887, "learning_rate": 4.34980341537275e-05, "loss": 32.6484, "step": 3420 }, { "epoch": 0.5489629718778835, "grad_norm": 12.007207870483398, "learning_rate": 4.347263200010111e-05, "loss": 32.5938, "step": 3421 }, { "epoch": 0.5491234404461026, "grad_norm": 12.135698318481445, "learning_rate": 4.3447231560334166e-05, "loss": 32.6406, "step": 3422 }, { "epoch": 0.5492839090143218, "grad_norm": 12.459182739257812, "learning_rate": 4.342183284109593e-05, "loss": 32.5508, "step": 3423 }, { "epoch": 0.549444377582541, "grad_norm": 12.197611808776855, "learning_rate": 4.339643584905523e-05, "loss": 32.6562, "step": 3424 }, { "epoch": 0.5496048461507602, "grad_norm": 12.129707336425781, "learning_rate": 4.337104059088043e-05, "loss": 32.6562, "step": 3425 }, { "epoch": 0.5497653147189794, "grad_norm": 12.211163520812988, "learning_rate": 4.334564707323944e-05, "loss": 32.5469, "step": 3426 }, { "epoch": 0.5499257832871987, "grad_norm": 12.023109436035156, "learning_rate": 4.332025530279969e-05, "loss": 32.6797, "step": 3427 }, { "epoch": 0.5500862518554178, "grad_norm": 12.07055377960205, "learning_rate": 4.329486528622822e-05, "loss": 32.6719, "step": 3428 }, { "epoch": 0.550246720423637, "grad_norm": 12.142109870910645, "learning_rate": 4.326947703019153e-05, "loss": 32.6719, "step": 3429 }, { "epoch": 0.5504071889918563, "grad_norm": 12.006824493408203, "learning_rate": 4.3244090541355705e-05, "loss": 32.6406, "step": 3430 }, { "epoch": 0.5505676575600754, "grad_norm": 12.078473091125488, "learning_rate": 4.321870582638636e-05, "loss": 32.625, "step": 3431 }, { "epoch": 0.5507281261282946, "grad_norm": 12.454936981201172, "learning_rate": 4.319332289194861e-05, "loss": 32.5625, "step": 3432 }, { "epoch": 0.5508885946965139, "grad_norm": 12.320721626281738, "learning_rate": 4.3167941744707135e-05, "loss": 32.625, "step": 3433 }, { "epoch": 0.551049063264733, "grad_norm": 12.390233039855957, "learning_rate": 4.314256239132616e-05, "loss": 32.5703, "step": 3434 }, { "epoch": 0.5512095318329522, "grad_norm": 12.266776084899902, "learning_rate": 4.311718483846941e-05, "loss": 32.6016, "step": 3435 }, { "epoch": 0.5513700004011715, "grad_norm": 12.006987571716309, "learning_rate": 4.309180909280015e-05, "loss": 32.625, "step": 3436 }, { "epoch": 0.5515304689693906, "grad_norm": 12.260805130004883, "learning_rate": 4.306643516098114e-05, "loss": 32.5547, "step": 3437 }, { "epoch": 0.5516909375376098, "grad_norm": 12.069096565246582, "learning_rate": 4.304106304967473e-05, "loss": 32.6172, "step": 3438 }, { "epoch": 0.5518514061058291, "grad_norm": 12.259800910949707, "learning_rate": 4.301569276554273e-05, "loss": 32.5469, "step": 3439 }, { "epoch": 0.5520118746740482, "grad_norm": 12.029793739318848, "learning_rate": 4.29903243152465e-05, "loss": 32.6406, "step": 3440 }, { "epoch": 0.5521723432422674, "grad_norm": 12.324989318847656, "learning_rate": 4.296495770544689e-05, "loss": 32.6094, "step": 3441 }, { "epoch": 0.5523328118104867, "grad_norm": 12.19960880279541, "learning_rate": 4.2939592942804336e-05, "loss": 32.5898, "step": 3442 }, { "epoch": 0.5524932803787058, "grad_norm": 12.206941604614258, "learning_rate": 4.291423003397871e-05, "loss": 32.6641, "step": 3443 }, { "epoch": 0.552653748946925, "grad_norm": 12.327692031860352, "learning_rate": 4.288886898562942e-05, "loss": 32.6719, "step": 3444 }, { "epoch": 0.5528142175151443, "grad_norm": 12.285576820373535, "learning_rate": 4.286350980441541e-05, "loss": 32.7656, "step": 3445 }, { "epoch": 0.5529746860833634, "grad_norm": 12.258216857910156, "learning_rate": 4.2838152496995106e-05, "loss": 32.5391, "step": 3446 }, { "epoch": 0.5531351546515826, "grad_norm": 12.516213417053223, "learning_rate": 4.2812797070026446e-05, "loss": 32.5625, "step": 3447 }, { "epoch": 0.5532956232198019, "grad_norm": 12.482551574707031, "learning_rate": 4.278744353016691e-05, "loss": 32.6484, "step": 3448 }, { "epoch": 0.553456091788021, "grad_norm": 11.945255279541016, "learning_rate": 4.276209188407344e-05, "loss": 32.75, "step": 3449 }, { "epoch": 0.5536165603562402, "grad_norm": 12.254231452941895, "learning_rate": 4.273674213840249e-05, "loss": 32.7031, "step": 3450 }, { "epoch": 0.5537770289244595, "grad_norm": 12.067684173583984, "learning_rate": 4.271139429981002e-05, "loss": 32.9609, "step": 3451 }, { "epoch": 0.5539374974926786, "grad_norm": 12.240365028381348, "learning_rate": 4.2686048374951496e-05, "loss": 32.875, "step": 3452 }, { "epoch": 0.5540979660608978, "grad_norm": 12.691115379333496, "learning_rate": 4.266070437048188e-05, "loss": 32.9609, "step": 3453 }, { "epoch": 0.5542584346291171, "grad_norm": 12.098764419555664, "learning_rate": 4.26353622930556e-05, "loss": 32.8281, "step": 3454 }, { "epoch": 0.5544189031973362, "grad_norm": 12.159570693969727, "learning_rate": 4.261002214932666e-05, "loss": 32.7227, "step": 3455 }, { "epoch": 0.5545793717655554, "grad_norm": 12.085740089416504, "learning_rate": 4.258468394594843e-05, "loss": 32.7188, "step": 3456 }, { "epoch": 0.5547398403337747, "grad_norm": 12.142539978027344, "learning_rate": 4.2559347689573865e-05, "loss": 32.8477, "step": 3457 }, { "epoch": 0.5549003089019938, "grad_norm": 12.404834747314453, "learning_rate": 4.2534013386855395e-05, "loss": 32.625, "step": 3458 }, { "epoch": 0.555060777470213, "grad_norm": 12.020076751708984, "learning_rate": 4.2508681044444906e-05, "loss": 32.6719, "step": 3459 }, { "epoch": 0.5552212460384323, "grad_norm": 12.27392864227295, "learning_rate": 4.24833506689938e-05, "loss": 32.6953, "step": 3460 }, { "epoch": 0.5553817146066514, "grad_norm": 12.138651847839355, "learning_rate": 4.2458022267152964e-05, "loss": 32.7031, "step": 3461 }, { "epoch": 0.5555421831748706, "grad_norm": 12.14605712890625, "learning_rate": 4.243269584557274e-05, "loss": 32.6953, "step": 3462 }, { "epoch": 0.5557026517430899, "grad_norm": 12.08152961730957, "learning_rate": 4.2407371410902964e-05, "loss": 32.7422, "step": 3463 }, { "epoch": 0.555863120311309, "grad_norm": 12.076705932617188, "learning_rate": 4.238204896979293e-05, "loss": 32.6406, "step": 3464 }, { "epoch": 0.5560235888795282, "grad_norm": 12.135537147521973, "learning_rate": 4.235672852889147e-05, "loss": 32.7344, "step": 3465 }, { "epoch": 0.5561840574477475, "grad_norm": 12.157004356384277, "learning_rate": 4.233141009484683e-05, "loss": 32.7617, "step": 3466 }, { "epoch": 0.5563445260159666, "grad_norm": 11.949310302734375, "learning_rate": 4.230609367430676e-05, "loss": 32.7695, "step": 3467 }, { "epoch": 0.5565049945841858, "grad_norm": 12.147756576538086, "learning_rate": 4.228077927391845e-05, "loss": 32.6172, "step": 3468 }, { "epoch": 0.5566654631524051, "grad_norm": 11.945765495300293, "learning_rate": 4.2255466900328575e-05, "loss": 32.7109, "step": 3469 }, { "epoch": 0.5568259317206242, "grad_norm": 11.944405555725098, "learning_rate": 4.2230156560183296e-05, "loss": 32.7031, "step": 3470 }, { "epoch": 0.5569864002888434, "grad_norm": 12.39172649383545, "learning_rate": 4.220484826012824e-05, "loss": 32.6172, "step": 3471 }, { "epoch": 0.5571468688570627, "grad_norm": 12.20994758605957, "learning_rate": 4.217954200680846e-05, "loss": 32.6055, "step": 3472 }, { "epoch": 0.5573073374252818, "grad_norm": 12.074739456176758, "learning_rate": 4.215423780686851e-05, "loss": 32.6016, "step": 3473 }, { "epoch": 0.557467805993501, "grad_norm": 11.952178001403809, "learning_rate": 4.2128935666952386e-05, "loss": 32.7695, "step": 3474 }, { "epoch": 0.5576282745617203, "grad_norm": 12.004789352416992, "learning_rate": 4.2103635593703564e-05, "loss": 32.6719, "step": 3475 }, { "epoch": 0.5577887431299394, "grad_norm": 11.95121955871582, "learning_rate": 4.207833759376494e-05, "loss": 32.6797, "step": 3476 }, { "epoch": 0.5579492116981586, "grad_norm": 12.202054977416992, "learning_rate": 4.20530416737789e-05, "loss": 32.5781, "step": 3477 }, { "epoch": 0.5581096802663779, "grad_norm": 12.269648551940918, "learning_rate": 4.202774784038728e-05, "loss": 32.6797, "step": 3478 }, { "epoch": 0.558270148834597, "grad_norm": 12.276613235473633, "learning_rate": 4.2002456100231336e-05, "loss": 32.625, "step": 3479 }, { "epoch": 0.5584306174028162, "grad_norm": 12.198887825012207, "learning_rate": 4.19771664599518e-05, "loss": 32.6094, "step": 3480 }, { "epoch": 0.5585910859710355, "grad_norm": 12.202631950378418, "learning_rate": 4.195187892618887e-05, "loss": 32.6484, "step": 3481 }, { "epoch": 0.5587515545392546, "grad_norm": 12.327127456665039, "learning_rate": 4.192659350558217e-05, "loss": 32.6328, "step": 3482 }, { "epoch": 0.5589120231074738, "grad_norm": 12.141912460327148, "learning_rate": 4.190131020477075e-05, "loss": 32.7188, "step": 3483 }, { "epoch": 0.5590724916756931, "grad_norm": 12.075028419494629, "learning_rate": 4.1876029030393124e-05, "loss": 32.7812, "step": 3484 }, { "epoch": 0.5592329602439122, "grad_norm": 12.140687942504883, "learning_rate": 4.185074998908729e-05, "loss": 32.6797, "step": 3485 }, { "epoch": 0.5593934288121314, "grad_norm": 12.196562767028809, "learning_rate": 4.182547308749059e-05, "loss": 32.5938, "step": 3486 }, { "epoch": 0.5595538973803507, "grad_norm": 12.275703430175781, "learning_rate": 4.180019833223989e-05, "loss": 32.6406, "step": 3487 }, { "epoch": 0.5597143659485698, "grad_norm": 12.133760452270508, "learning_rate": 4.177492572997145e-05, "loss": 32.625, "step": 3488 }, { "epoch": 0.559874834516789, "grad_norm": 12.199546813964844, "learning_rate": 4.174965528732098e-05, "loss": 32.7031, "step": 3489 }, { "epoch": 0.5600353030850083, "grad_norm": 12.260801315307617, "learning_rate": 4.172438701092362e-05, "loss": 32.6484, "step": 3490 }, { "epoch": 0.5601957716532274, "grad_norm": 12.195131301879883, "learning_rate": 4.169912090741393e-05, "loss": 32.6406, "step": 3491 }, { "epoch": 0.5603562402214466, "grad_norm": 12.193771362304688, "learning_rate": 4.1673856983425906e-05, "loss": 32.6328, "step": 3492 }, { "epoch": 0.5605167087896659, "grad_norm": 11.890006065368652, "learning_rate": 4.164859524559299e-05, "loss": 32.7578, "step": 3493 }, { "epoch": 0.560677177357885, "grad_norm": 12.203766822814941, "learning_rate": 4.1623335700548005e-05, "loss": 32.6484, "step": 3494 }, { "epoch": 0.5608376459261042, "grad_norm": 12.268318176269531, "learning_rate": 4.159807835492326e-05, "loss": 32.5625, "step": 3495 }, { "epoch": 0.5609981144943235, "grad_norm": 12.267171859741211, "learning_rate": 4.1572823215350435e-05, "loss": 32.5977, "step": 3496 }, { "epoch": 0.5611585830625426, "grad_norm": 12.458049774169922, "learning_rate": 4.154757028846067e-05, "loss": 32.7109, "step": 3497 }, { "epoch": 0.5613190516307618, "grad_norm": 12.215971946716309, "learning_rate": 4.1522319580884474e-05, "loss": 32.7539, "step": 3498 }, { "epoch": 0.5614795201989811, "grad_norm": 12.352740287780762, "learning_rate": 4.1497071099251824e-05, "loss": 32.7109, "step": 3499 }, { "epoch": 0.5616399887672002, "grad_norm": 12.14428424835205, "learning_rate": 4.1471824850192086e-05, "loss": 32.7422, "step": 3500 }, { "epoch": 0.5618004573354194, "grad_norm": 12.21371841430664, "learning_rate": 4.1446580840334044e-05, "loss": 32.8359, "step": 3501 }, { "epoch": 0.5619609259036387, "grad_norm": 12.431864738464355, "learning_rate": 4.142133907630593e-05, "loss": 32.9453, "step": 3502 }, { "epoch": 0.5621213944718578, "grad_norm": 12.01589584350586, "learning_rate": 4.139609956473529e-05, "loss": 32.75, "step": 3503 }, { "epoch": 0.562281863040077, "grad_norm": 12.090046882629395, "learning_rate": 4.1370862312249165e-05, "loss": 32.7969, "step": 3504 }, { "epoch": 0.5624423316082963, "grad_norm": 12.15223217010498, "learning_rate": 4.1345627325473995e-05, "loss": 32.7656, "step": 3505 }, { "epoch": 0.5626028001765154, "grad_norm": 12.166690826416016, "learning_rate": 4.13203946110356e-05, "loss": 32.9531, "step": 3506 }, { "epoch": 0.5627632687447346, "grad_norm": 12.151240348815918, "learning_rate": 4.1295164175559185e-05, "loss": 32.7812, "step": 3507 }, { "epoch": 0.5629237373129539, "grad_norm": 12.079483985900879, "learning_rate": 4.126993602566943e-05, "loss": 32.7188, "step": 3508 }, { "epoch": 0.563084205881173, "grad_norm": 12.27736759185791, "learning_rate": 4.124471016799034e-05, "loss": 32.625, "step": 3509 }, { "epoch": 0.5632446744493922, "grad_norm": 12.142521858215332, "learning_rate": 4.121948660914535e-05, "loss": 32.8438, "step": 3510 }, { "epoch": 0.5634051430176115, "grad_norm": 12.141510009765625, "learning_rate": 4.119426535575729e-05, "loss": 32.7461, "step": 3511 }, { "epoch": 0.5635656115858306, "grad_norm": 12.196615219116211, "learning_rate": 4.116904641444839e-05, "loss": 32.7422, "step": 3512 }, { "epoch": 0.5637260801540498, "grad_norm": 12.016562461853027, "learning_rate": 4.114382979184025e-05, "loss": 32.6797, "step": 3513 }, { "epoch": 0.5638865487222691, "grad_norm": 12.074959754943848, "learning_rate": 4.11186154945539e-05, "loss": 32.7109, "step": 3514 }, { "epoch": 0.5640470172904882, "grad_norm": 12.260991096496582, "learning_rate": 4.109340352920971e-05, "loss": 32.6484, "step": 3515 }, { "epoch": 0.5642074858587074, "grad_norm": 12.74951457977295, "learning_rate": 4.106819390242747e-05, "loss": 32.6562, "step": 3516 }, { "epoch": 0.5643679544269267, "grad_norm": 12.142577171325684, "learning_rate": 4.104298662082634e-05, "loss": 32.6406, "step": 3517 }, { "epoch": 0.5645284229951458, "grad_norm": 12.265242576599121, "learning_rate": 4.10177816910249e-05, "loss": 32.6797, "step": 3518 }, { "epoch": 0.564688891563365, "grad_norm": 12.223005294799805, "learning_rate": 4.099257911964109e-05, "loss": 32.7031, "step": 3519 }, { "epoch": 0.5648493601315843, "grad_norm": 12.266968727111816, "learning_rate": 4.09673789132922e-05, "loss": 32.6328, "step": 3520 }, { "epoch": 0.5650098286998034, "grad_norm": 12.134150505065918, "learning_rate": 4.0942181078594924e-05, "loss": 32.6953, "step": 3521 }, { "epoch": 0.5651702972680226, "grad_norm": 12.147275924682617, "learning_rate": 4.091698562216536e-05, "loss": 32.6094, "step": 3522 }, { "epoch": 0.5653307658362419, "grad_norm": 12.207071304321289, "learning_rate": 4.089179255061895e-05, "loss": 32.6172, "step": 3523 }, { "epoch": 0.565491234404461, "grad_norm": 12.139519691467285, "learning_rate": 4.086660187057052e-05, "loss": 32.5898, "step": 3524 }, { "epoch": 0.5656517029726802, "grad_norm": 12.339984893798828, "learning_rate": 4.084141358863424e-05, "loss": 32.7031, "step": 3525 }, { "epoch": 0.5658121715408995, "grad_norm": 12.40249252319336, "learning_rate": 4.081622771142373e-05, "loss": 32.6016, "step": 3526 }, { "epoch": 0.5659726401091186, "grad_norm": 12.138495445251465, "learning_rate": 4.079104424555185e-05, "loss": 32.5859, "step": 3527 }, { "epoch": 0.5661331086773378, "grad_norm": 12.132462501525879, "learning_rate": 4.0765863197630955e-05, "loss": 32.6641, "step": 3528 }, { "epoch": 0.5662935772455571, "grad_norm": 12.394408226013184, "learning_rate": 4.074068457427269e-05, "loss": 32.5625, "step": 3529 }, { "epoch": 0.5664540458137762, "grad_norm": 12.267147064208984, "learning_rate": 4.0715508382088094e-05, "loss": 32.5938, "step": 3530 }, { "epoch": 0.5666145143819954, "grad_norm": 12.140938758850098, "learning_rate": 4.0690334627687544e-05, "loss": 32.582, "step": 3531 }, { "epoch": 0.5667749829502147, "grad_norm": 12.261794090270996, "learning_rate": 4.06651633176808e-05, "loss": 32.6562, "step": 3532 }, { "epoch": 0.5669354515184338, "grad_norm": 12.074222564697266, "learning_rate": 4.063999445867698e-05, "loss": 32.6328, "step": 3533 }, { "epoch": 0.567095920086653, "grad_norm": 12.134568214416504, "learning_rate": 4.0614828057284546e-05, "loss": 32.625, "step": 3534 }, { "epoch": 0.5672563886548723, "grad_norm": 12.142013549804688, "learning_rate": 4.0589664120111295e-05, "loss": 32.5938, "step": 3535 }, { "epoch": 0.5674168572230914, "grad_norm": 12.01126480102539, "learning_rate": 4.056450265376442e-05, "loss": 32.6914, "step": 3536 }, { "epoch": 0.5675773257913106, "grad_norm": 12.332916259765625, "learning_rate": 4.053934366485047e-05, "loss": 32.6406, "step": 3537 }, { "epoch": 0.5677377943595299, "grad_norm": 12.271631240844727, "learning_rate": 4.0514187159975274e-05, "loss": 32.6445, "step": 3538 }, { "epoch": 0.567898262927749, "grad_norm": 11.942605018615723, "learning_rate": 4.0489033145744075e-05, "loss": 32.7422, "step": 3539 }, { "epoch": 0.5680587314959682, "grad_norm": 12.259197235107422, "learning_rate": 4.046388162876143e-05, "loss": 32.543, "step": 3540 }, { "epoch": 0.5682192000641875, "grad_norm": 12.13784408569336, "learning_rate": 4.043873261563126e-05, "loss": 32.7266, "step": 3541 }, { "epoch": 0.5683796686324066, "grad_norm": 12.136022567749023, "learning_rate": 4.041358611295683e-05, "loss": 32.6172, "step": 3542 }, { "epoch": 0.5685401372006258, "grad_norm": 12.295812606811523, "learning_rate": 4.038844212734072e-05, "loss": 32.7109, "step": 3543 }, { "epoch": 0.5687006057688451, "grad_norm": 11.946793556213379, "learning_rate": 4.0363300665384875e-05, "loss": 32.7422, "step": 3544 }, { "epoch": 0.5688610743370642, "grad_norm": 12.200444221496582, "learning_rate": 4.0338161733690546e-05, "loss": 32.6523, "step": 3545 }, { "epoch": 0.5690215429052834, "grad_norm": 12.525634765625, "learning_rate": 4.031302533885837e-05, "loss": 32.6562, "step": 3546 }, { "epoch": 0.5691820114735027, "grad_norm": 12.19977855682373, "learning_rate": 4.028789148748827e-05, "loss": 32.6406, "step": 3547 }, { "epoch": 0.5693424800417218, "grad_norm": 12.168387413024902, "learning_rate": 4.026276018617953e-05, "loss": 32.7812, "step": 3548 }, { "epoch": 0.569502948609941, "grad_norm": 12.268152236938477, "learning_rate": 4.023763144153076e-05, "loss": 32.7656, "step": 3549 }, { "epoch": 0.5696634171781603, "grad_norm": 12.309306144714355, "learning_rate": 4.0212505260139866e-05, "loss": 32.8516, "step": 3550 }, { "epoch": 0.5698238857463794, "grad_norm": 12.307623863220215, "learning_rate": 4.018738164860411e-05, "loss": 33.0703, "step": 3551 }, { "epoch": 0.5699843543145986, "grad_norm": 12.294570922851562, "learning_rate": 4.016226061352011e-05, "loss": 32.7969, "step": 3552 }, { "epoch": 0.5701448228828179, "grad_norm": 12.10714340209961, "learning_rate": 4.0137142161483756e-05, "loss": 32.8594, "step": 3553 }, { "epoch": 0.570305291451037, "grad_norm": 12.157452583312988, "learning_rate": 4.011202629909027e-05, "loss": 32.9219, "step": 3554 }, { "epoch": 0.5704657600192562, "grad_norm": 12.146232604980469, "learning_rate": 4.0086913032934206e-05, "loss": 32.7188, "step": 3555 }, { "epoch": 0.5706262285874755, "grad_norm": 12.34429931640625, "learning_rate": 4.006180236960945e-05, "loss": 32.7344, "step": 3556 }, { "epoch": 0.5707866971556946, "grad_norm": 12.141654014587402, "learning_rate": 4.003669431570917e-05, "loss": 32.6875, "step": 3557 }, { "epoch": 0.5709471657239138, "grad_norm": 12.08870792388916, "learning_rate": 4.0011588877825856e-05, "loss": 32.7422, "step": 3558 }, { "epoch": 0.5711076342921331, "grad_norm": 12.083084106445312, "learning_rate": 3.998648606255135e-05, "loss": 32.6406, "step": 3559 }, { "epoch": 0.5712681028603522, "grad_norm": 12.22290325164795, "learning_rate": 3.996138587647677e-05, "loss": 32.7617, "step": 3560 }, { "epoch": 0.5714285714285714, "grad_norm": 12.160221099853516, "learning_rate": 3.993628832619255e-05, "loss": 32.9219, "step": 3561 }, { "epoch": 0.5715890399967907, "grad_norm": 11.955832481384277, "learning_rate": 3.9911193418288416e-05, "loss": 32.7344, "step": 3562 }, { "epoch": 0.5717495085650098, "grad_norm": 12.158224105834961, "learning_rate": 3.988610115935343e-05, "loss": 32.7266, "step": 3563 }, { "epoch": 0.571909977133229, "grad_norm": 12.072916984558105, "learning_rate": 3.9861011555975926e-05, "loss": 32.6797, "step": 3564 }, { "epoch": 0.5720704457014483, "grad_norm": 12.077191352844238, "learning_rate": 3.983592461474359e-05, "loss": 32.7188, "step": 3565 }, { "epoch": 0.5722309142696674, "grad_norm": 12.278800964355469, "learning_rate": 3.981084034224336e-05, "loss": 32.7734, "step": 3566 }, { "epoch": 0.5723913828378866, "grad_norm": 12.266292572021484, "learning_rate": 3.9785758745061496e-05, "loss": 32.6953, "step": 3567 }, { "epoch": 0.5725518514061059, "grad_norm": 12.333598136901855, "learning_rate": 3.9760679829783544e-05, "loss": 32.6797, "step": 3568 }, { "epoch": 0.572712319974325, "grad_norm": 12.013089179992676, "learning_rate": 3.9735603602994374e-05, "loss": 32.8438, "step": 3569 }, { "epoch": 0.5728727885425442, "grad_norm": 12.20529556274414, "learning_rate": 3.971053007127812e-05, "loss": 32.7891, "step": 3570 }, { "epoch": 0.5730332571107635, "grad_norm": 12.925578117370605, "learning_rate": 3.9685459241218205e-05, "loss": 32.6484, "step": 3571 }, { "epoch": 0.5731937256789826, "grad_norm": 12.067206382751465, "learning_rate": 3.9660391119397364e-05, "loss": 32.7031, "step": 3572 }, { "epoch": 0.5733541942472018, "grad_norm": 12.408670425415039, "learning_rate": 3.963532571239764e-05, "loss": 32.75, "step": 3573 }, { "epoch": 0.5735146628154211, "grad_norm": 11.94632625579834, "learning_rate": 3.961026302680027e-05, "loss": 32.6562, "step": 3574 }, { "epoch": 0.5736751313836402, "grad_norm": 12.211403846740723, "learning_rate": 3.958520306918591e-05, "loss": 32.6562, "step": 3575 }, { "epoch": 0.5738355999518594, "grad_norm": 11.941865921020508, "learning_rate": 3.95601458461344e-05, "loss": 32.7266, "step": 3576 }, { "epoch": 0.5739960685200787, "grad_norm": 12.141462326049805, "learning_rate": 3.953509136422489e-05, "loss": 32.6406, "step": 3577 }, { "epoch": 0.5741565370882978, "grad_norm": 12.39400863647461, "learning_rate": 3.951003963003582e-05, "loss": 32.5547, "step": 3578 }, { "epoch": 0.574317005656517, "grad_norm": 12.351432800292969, "learning_rate": 3.948499065014491e-05, "loss": 32.7109, "step": 3579 }, { "epoch": 0.5744774742247363, "grad_norm": 12.084924697875977, "learning_rate": 3.9459944431129143e-05, "loss": 32.6641, "step": 3580 }, { "epoch": 0.5746379427929554, "grad_norm": 12.263713836669922, "learning_rate": 3.943490097956478e-05, "loss": 32.5469, "step": 3581 }, { "epoch": 0.5747984113611746, "grad_norm": 12.272241592407227, "learning_rate": 3.9409860302027366e-05, "loss": 32.5742, "step": 3582 }, { "epoch": 0.5749588799293939, "grad_norm": 12.08363151550293, "learning_rate": 3.938482240509171e-05, "loss": 32.625, "step": 3583 }, { "epoch": 0.575119348497613, "grad_norm": 12.58022689819336, "learning_rate": 3.935978729533189e-05, "loss": 32.6172, "step": 3584 }, { "epoch": 0.5752798170658322, "grad_norm": 12.344145774841309, "learning_rate": 3.933475497932126e-05, "loss": 32.625, "step": 3585 }, { "epoch": 0.5754402856340515, "grad_norm": 12.463418006896973, "learning_rate": 3.9309725463632424e-05, "loss": 32.5703, "step": 3586 }, { "epoch": 0.5756007542022706, "grad_norm": 12.270010948181152, "learning_rate": 3.928469875483726e-05, "loss": 32.5781, "step": 3587 }, { "epoch": 0.5757612227704898, "grad_norm": 12.074677467346191, "learning_rate": 3.9259674859506905e-05, "loss": 32.6328, "step": 3588 }, { "epoch": 0.5759216913387091, "grad_norm": 12.150169372558594, "learning_rate": 3.9234653784211783e-05, "loss": 32.6484, "step": 3589 }, { "epoch": 0.5760821599069282, "grad_norm": 12.389191627502441, "learning_rate": 3.9209635535521545e-05, "loss": 32.5938, "step": 3590 }, { "epoch": 0.5762426284751474, "grad_norm": 12.007847785949707, "learning_rate": 3.918462012000511e-05, "loss": 32.625, "step": 3591 }, { "epoch": 0.5764030970433667, "grad_norm": 12.148822784423828, "learning_rate": 3.9159607544230645e-05, "loss": 32.7344, "step": 3592 }, { "epoch": 0.5765635656115858, "grad_norm": 12.326192855834961, "learning_rate": 3.913459781476561e-05, "loss": 32.6094, "step": 3593 }, { "epoch": 0.576724034179805, "grad_norm": 12.052337646484375, "learning_rate": 3.9109590938176655e-05, "loss": 32.8086, "step": 3594 }, { "epoch": 0.5768845027480243, "grad_norm": 12.453706741333008, "learning_rate": 3.908458692102974e-05, "loss": 32.6484, "step": 3595 }, { "epoch": 0.5770449713162434, "grad_norm": 12.763252258300781, "learning_rate": 3.905958576989005e-05, "loss": 32.6016, "step": 3596 }, { "epoch": 0.5772054398844626, "grad_norm": 12.199947357177734, "learning_rate": 3.9034587491321995e-05, "loss": 32.6328, "step": 3597 }, { "epoch": 0.5773659084526819, "grad_norm": 12.084548950195312, "learning_rate": 3.9009592091889256e-05, "loss": 32.8828, "step": 3598 }, { "epoch": 0.577526377020901, "grad_norm": 12.607904434204102, "learning_rate": 3.898459957815477e-05, "loss": 32.8047, "step": 3599 }, { "epoch": 0.5776868455891202, "grad_norm": 12.243553161621094, "learning_rate": 3.895960995668069e-05, "loss": 33.0859, "step": 3600 }, { "epoch": 0.5778473141573395, "grad_norm": 12.239008903503418, "learning_rate": 3.8934623234028415e-05, "loss": 32.8125, "step": 3601 }, { "epoch": 0.5780077827255586, "grad_norm": 12.369253158569336, "learning_rate": 3.8909639416758585e-05, "loss": 32.8125, "step": 3602 }, { "epoch": 0.5781682512937778, "grad_norm": 12.036633491516113, "learning_rate": 3.88846585114311e-05, "loss": 32.8281, "step": 3603 }, { "epoch": 0.5783287198619971, "grad_norm": 12.083532333374023, "learning_rate": 3.8859680524605074e-05, "loss": 32.7188, "step": 3604 }, { "epoch": 0.5784891884302162, "grad_norm": 12.355076789855957, "learning_rate": 3.883470546283884e-05, "loss": 32.7656, "step": 3605 }, { "epoch": 0.5786496569984354, "grad_norm": 12.205049514770508, "learning_rate": 3.880973333268998e-05, "loss": 32.625, "step": 3606 }, { "epoch": 0.5788101255666547, "grad_norm": 12.08525276184082, "learning_rate": 3.878476414071532e-05, "loss": 32.6484, "step": 3607 }, { "epoch": 0.5789705941348738, "grad_norm": 11.882889747619629, "learning_rate": 3.87597978934709e-05, "loss": 32.7266, "step": 3608 }, { "epoch": 0.579131062703093, "grad_norm": 12.009331703186035, "learning_rate": 3.873483459751197e-05, "loss": 32.7656, "step": 3609 }, { "epoch": 0.5792915312713123, "grad_norm": 12.07844066619873, "learning_rate": 3.870987425939303e-05, "loss": 32.6875, "step": 3610 }, { "epoch": 0.5794519998395314, "grad_norm": 12.077715873718262, "learning_rate": 3.868491688566779e-05, "loss": 32.6719, "step": 3611 }, { "epoch": 0.5796124684077506, "grad_norm": 12.208990097045898, "learning_rate": 3.865996248288919e-05, "loss": 32.5938, "step": 3612 }, { "epoch": 0.5797729369759699, "grad_norm": 11.953262329101562, "learning_rate": 3.8635011057609396e-05, "loss": 32.6641, "step": 3613 }, { "epoch": 0.579933405544189, "grad_norm": 12.138404846191406, "learning_rate": 3.861006261637978e-05, "loss": 32.6484, "step": 3614 }, { "epoch": 0.5800938741124082, "grad_norm": 12.136821746826172, "learning_rate": 3.8585117165750915e-05, "loss": 32.6172, "step": 3615 }, { "epoch": 0.5802543426806275, "grad_norm": 12.330986022949219, "learning_rate": 3.856017471227263e-05, "loss": 32.5547, "step": 3616 }, { "epoch": 0.5804148112488466, "grad_norm": 12.008398056030273, "learning_rate": 3.853523526249394e-05, "loss": 32.7188, "step": 3617 }, { "epoch": 0.5805752798170658, "grad_norm": 12.21334171295166, "learning_rate": 3.851029882296307e-05, "loss": 32.6172, "step": 3618 }, { "epoch": 0.5807357483852851, "grad_norm": 12.263895988464355, "learning_rate": 3.848536540022745e-05, "loss": 32.6484, "step": 3619 }, { "epoch": 0.5808962169535042, "grad_norm": 12.744641304016113, "learning_rate": 3.8460435000833776e-05, "loss": 32.5703, "step": 3620 }, { "epoch": 0.5810566855217234, "grad_norm": 12.07136344909668, "learning_rate": 3.8435507631327836e-05, "loss": 32.6172, "step": 3621 }, { "epoch": 0.5812171540899427, "grad_norm": 12.150655746459961, "learning_rate": 3.841058329825473e-05, "loss": 32.7031, "step": 3622 }, { "epoch": 0.5813776226581618, "grad_norm": 12.149602890014648, "learning_rate": 3.8385662008158706e-05, "loss": 32.6562, "step": 3623 }, { "epoch": 0.581538091226381, "grad_norm": 12.257755279541016, "learning_rate": 3.836074376758323e-05, "loss": 32.6016, "step": 3624 }, { "epoch": 0.5816985597946003, "grad_norm": 12.207391738891602, "learning_rate": 3.833582858307097e-05, "loss": 32.6484, "step": 3625 }, { "epoch": 0.5818590283628194, "grad_norm": 12.074431419372559, "learning_rate": 3.831091646116377e-05, "loss": 32.7188, "step": 3626 }, { "epoch": 0.5820194969310386, "grad_norm": 12.271665573120117, "learning_rate": 3.82860074084027e-05, "loss": 32.6562, "step": 3627 }, { "epoch": 0.5821799654992579, "grad_norm": 12.335921287536621, "learning_rate": 3.8261101431328e-05, "loss": 32.6797, "step": 3628 }, { "epoch": 0.582340434067477, "grad_norm": 12.135940551757812, "learning_rate": 3.823619853647912e-05, "loss": 32.6406, "step": 3629 }, { "epoch": 0.5825009026356962, "grad_norm": 12.327780723571777, "learning_rate": 3.821129873039469e-05, "loss": 32.6328, "step": 3630 }, { "epoch": 0.5826613712039155, "grad_norm": 12.207282066345215, "learning_rate": 3.818640201961253e-05, "loss": 32.6016, "step": 3631 }, { "epoch": 0.5828218397721346, "grad_norm": 12.069193840026855, "learning_rate": 3.816150841066965e-05, "loss": 32.5859, "step": 3632 }, { "epoch": 0.5829823083403538, "grad_norm": 12.081086158752441, "learning_rate": 3.813661791010225e-05, "loss": 32.7188, "step": 3633 }, { "epoch": 0.5831427769085731, "grad_norm": 12.076106071472168, "learning_rate": 3.811173052444569e-05, "loss": 32.6875, "step": 3634 }, { "epoch": 0.5833032454767922, "grad_norm": 12.33249568939209, "learning_rate": 3.8086846260234544e-05, "loss": 32.6094, "step": 3635 }, { "epoch": 0.5834637140450114, "grad_norm": 12.391646385192871, "learning_rate": 3.8061965124002554e-05, "loss": 32.5859, "step": 3636 }, { "epoch": 0.5836241826132307, "grad_norm": 12.264320373535156, "learning_rate": 3.803708712228264e-05, "loss": 32.6328, "step": 3637 }, { "epoch": 0.5837846511814498, "grad_norm": 12.077710151672363, "learning_rate": 3.801221226160689e-05, "loss": 32.75, "step": 3638 }, { "epoch": 0.583945119749669, "grad_norm": 12.146330833435059, "learning_rate": 3.7987340548506584e-05, "loss": 32.7969, "step": 3639 }, { "epoch": 0.5841055883178883, "grad_norm": 12.258939743041992, "learning_rate": 3.7962471989512165e-05, "loss": 32.6094, "step": 3640 }, { "epoch": 0.5842660568861074, "grad_norm": 12.020719528198242, "learning_rate": 3.793760659115325e-05, "loss": 32.7188, "step": 3641 }, { "epoch": 0.5844265254543266, "grad_norm": 12.082084655761719, "learning_rate": 3.791274435995863e-05, "loss": 32.7266, "step": 3642 }, { "epoch": 0.5845869940225459, "grad_norm": 12.59066104888916, "learning_rate": 3.788788530245625e-05, "loss": 32.5703, "step": 3643 }, { "epoch": 0.584747462590765, "grad_norm": 12.318979263305664, "learning_rate": 3.786302942517327e-05, "loss": 32.6406, "step": 3644 }, { "epoch": 0.5849079311589842, "grad_norm": 12.23369312286377, "learning_rate": 3.783817673463591e-05, "loss": 32.7266, "step": 3645 }, { "epoch": 0.5850683997272035, "grad_norm": 12.34404182434082, "learning_rate": 3.7813327237369674e-05, "loss": 32.6953, "step": 3646 }, { "epoch": 0.5852288682954226, "grad_norm": 12.259613990783691, "learning_rate": 3.778848093989916e-05, "loss": 32.7422, "step": 3647 }, { "epoch": 0.5853893368636418, "grad_norm": 12.60306167602539, "learning_rate": 3.776363784874813e-05, "loss": 32.7109, "step": 3648 }, { "epoch": 0.5855498054318611, "grad_norm": 12.586189270019531, "learning_rate": 3.773879797043952e-05, "loss": 32.8594, "step": 3649 }, { "epoch": 0.5857102740000802, "grad_norm": 12.09480094909668, "learning_rate": 3.771396131149543e-05, "loss": 32.9297, "step": 3650 }, { "epoch": 0.5858707425682994, "grad_norm": 12.182134628295898, "learning_rate": 3.768912787843708e-05, "loss": 32.9609, "step": 3651 }, { "epoch": 0.5860312111365187, "grad_norm": 12.100342750549316, "learning_rate": 3.7664297677784885e-05, "loss": 32.9453, "step": 3652 }, { "epoch": 0.5861916797047378, "grad_norm": 12.093555450439453, "learning_rate": 3.763947071605837e-05, "loss": 32.7422, "step": 3653 }, { "epoch": 0.586352148272957, "grad_norm": 12.232358932495117, "learning_rate": 3.7614646999776254e-05, "loss": 32.7422, "step": 3654 }, { "epoch": 0.5865126168411763, "grad_norm": 12.021773338317871, "learning_rate": 3.758982653545637e-05, "loss": 32.8281, "step": 3655 }, { "epoch": 0.5866730854093954, "grad_norm": 12.623337745666504, "learning_rate": 3.7565009329615704e-05, "loss": 32.7422, "step": 3656 }, { "epoch": 0.5868335539776146, "grad_norm": 12.024174690246582, "learning_rate": 3.7540195388770386e-05, "loss": 32.8047, "step": 3657 }, { "epoch": 0.5869940225458339, "grad_norm": 12.211112022399902, "learning_rate": 3.7515384719435694e-05, "loss": 32.7031, "step": 3658 }, { "epoch": 0.587154491114053, "grad_norm": 12.086769104003906, "learning_rate": 3.749057732812604e-05, "loss": 32.7188, "step": 3659 }, { "epoch": 0.5873149596822722, "grad_norm": 12.275131225585938, "learning_rate": 3.7465773221355e-05, "loss": 32.7031, "step": 3660 }, { "epoch": 0.5874754282504915, "grad_norm": 12.144760131835938, "learning_rate": 3.7440972405635255e-05, "loss": 32.6641, "step": 3661 }, { "epoch": 0.5876358968187106, "grad_norm": 12.146224975585938, "learning_rate": 3.7416174887478626e-05, "loss": 32.6328, "step": 3662 }, { "epoch": 0.5877963653869298, "grad_norm": 12.281168937683105, "learning_rate": 3.739138067339611e-05, "loss": 32.6719, "step": 3663 }, { "epoch": 0.5879568339551491, "grad_norm": 12.009185791015625, "learning_rate": 3.736658976989776e-05, "loss": 32.7109, "step": 3664 }, { "epoch": 0.5881173025233682, "grad_norm": 12.198758125305176, "learning_rate": 3.734180218349285e-05, "loss": 32.625, "step": 3665 }, { "epoch": 0.5882777710915874, "grad_norm": 12.287699699401855, "learning_rate": 3.731701792068969e-05, "loss": 32.6484, "step": 3666 }, { "epoch": 0.5884382396598067, "grad_norm": 12.146845817565918, "learning_rate": 3.729223698799582e-05, "loss": 32.6641, "step": 3667 }, { "epoch": 0.5885987082280258, "grad_norm": 12.165651321411133, "learning_rate": 3.7267459391917804e-05, "loss": 32.75, "step": 3668 }, { "epoch": 0.588759176796245, "grad_norm": 12.272422790527344, "learning_rate": 3.7242685138961366e-05, "loss": 32.5547, "step": 3669 }, { "epoch": 0.5889196453644643, "grad_norm": 12.274888038635254, "learning_rate": 3.72179142356314e-05, "loss": 32.5859, "step": 3670 }, { "epoch": 0.5890801139326834, "grad_norm": 12.5413236618042, "learning_rate": 3.719314668843187e-05, "loss": 32.6797, "step": 3671 }, { "epoch": 0.5892405825009026, "grad_norm": 12.398616790771484, "learning_rate": 3.716838250386585e-05, "loss": 32.5703, "step": 3672 }, { "epoch": 0.5894010510691219, "grad_norm": 12.071931838989258, "learning_rate": 3.714362168843558e-05, "loss": 32.6797, "step": 3673 }, { "epoch": 0.589561519637341, "grad_norm": 12.284093856811523, "learning_rate": 3.7118864248642384e-05, "loss": 32.7656, "step": 3674 }, { "epoch": 0.5897219882055602, "grad_norm": 12.397127151489258, "learning_rate": 3.7094110190986694e-05, "loss": 32.625, "step": 3675 }, { "epoch": 0.5898824567737795, "grad_norm": 11.944280624389648, "learning_rate": 3.706935952196805e-05, "loss": 32.6406, "step": 3676 }, { "epoch": 0.5900429253419986, "grad_norm": 12.257983207702637, "learning_rate": 3.704461224808514e-05, "loss": 32.6562, "step": 3677 }, { "epoch": 0.5902033939102178, "grad_norm": 12.329955101013184, "learning_rate": 3.7019868375835736e-05, "loss": 32.5703, "step": 3678 }, { "epoch": 0.5903638624784371, "grad_norm": 12.006725311279297, "learning_rate": 3.6995127911716715e-05, "loss": 32.6875, "step": 3679 }, { "epoch": 0.5905243310466562, "grad_norm": 12.514588356018066, "learning_rate": 3.697039086222404e-05, "loss": 32.6484, "step": 3680 }, { "epoch": 0.5906847996148754, "grad_norm": 12.258294105529785, "learning_rate": 3.694565723385282e-05, "loss": 32.5938, "step": 3681 }, { "epoch": 0.5908452681830947, "grad_norm": 12.200634956359863, "learning_rate": 3.692092703309721e-05, "loss": 32.6719, "step": 3682 }, { "epoch": 0.5910057367513138, "grad_norm": 12.267491340637207, "learning_rate": 3.6896200266450545e-05, "loss": 32.5938, "step": 3683 }, { "epoch": 0.591166205319533, "grad_norm": 12.445457458496094, "learning_rate": 3.68714769404052e-05, "loss": 32.6641, "step": 3684 }, { "epoch": 0.5913266738877523, "grad_norm": 12.167256355285645, "learning_rate": 3.6846757061452644e-05, "loss": 32.7188, "step": 3685 }, { "epoch": 0.5914871424559714, "grad_norm": 12.274937629699707, "learning_rate": 3.6822040636083454e-05, "loss": 32.5625, "step": 3686 }, { "epoch": 0.5916476110241906, "grad_norm": 12.138152122497559, "learning_rate": 3.679732767078733e-05, "loss": 32.6484, "step": 3687 }, { "epoch": 0.5918080795924099, "grad_norm": 12.023834228515625, "learning_rate": 3.677261817205301e-05, "loss": 32.7031, "step": 3688 }, { "epoch": 0.591968548160629, "grad_norm": 12.016057968139648, "learning_rate": 3.674791214636836e-05, "loss": 32.668, "step": 3689 }, { "epoch": 0.5921290167288482, "grad_norm": 12.260727882385254, "learning_rate": 3.67232096002203e-05, "loss": 32.5859, "step": 3690 }, { "epoch": 0.5922894852970675, "grad_norm": 12.576273918151855, "learning_rate": 3.669851054009491e-05, "loss": 32.7422, "step": 3691 }, { "epoch": 0.5924499538652866, "grad_norm": 12.140472412109375, "learning_rate": 3.667381497247724e-05, "loss": 32.5859, "step": 3692 }, { "epoch": 0.5926104224335058, "grad_norm": 12.262439727783203, "learning_rate": 3.6649122903851526e-05, "loss": 32.6016, "step": 3693 }, { "epoch": 0.5927708910017251, "grad_norm": 12.149698257446289, "learning_rate": 3.662443434070103e-05, "loss": 32.7188, "step": 3694 }, { "epoch": 0.5929313595699442, "grad_norm": 12.067748069763184, "learning_rate": 3.659974928950811e-05, "loss": 32.6016, "step": 3695 }, { "epoch": 0.5930918281381634, "grad_norm": 12.584467887878418, "learning_rate": 3.6575067756754195e-05, "loss": 32.6172, "step": 3696 }, { "epoch": 0.5932522967063827, "grad_norm": 12.523456573486328, "learning_rate": 3.655038974891981e-05, "loss": 32.7031, "step": 3697 }, { "epoch": 0.5934127652746018, "grad_norm": 12.325772285461426, "learning_rate": 3.652571527248453e-05, "loss": 32.7344, "step": 3698 }, { "epoch": 0.593573233842821, "grad_norm": 11.95582389831543, "learning_rate": 3.650104433392703e-05, "loss": 32.8594, "step": 3699 }, { "epoch": 0.5937337024110403, "grad_norm": 12.280241012573242, "learning_rate": 3.6476376939725e-05, "loss": 32.9062, "step": 3700 }, { "epoch": 0.5938941709792595, "grad_norm": 12.179352760314941, "learning_rate": 3.645171309635528e-05, "loss": 33.0469, "step": 3701 }, { "epoch": 0.5940546395474786, "grad_norm": 12.030317306518555, "learning_rate": 3.6427052810293724e-05, "loss": 33.0078, "step": 3702 }, { "epoch": 0.5942151081156979, "grad_norm": 12.40307331085205, "learning_rate": 3.640239608801525e-05, "loss": 32.7891, "step": 3703 }, { "epoch": 0.594375576683917, "grad_norm": 12.303383827209473, "learning_rate": 3.6377742935993874e-05, "loss": 32.7734, "step": 3704 }, { "epoch": 0.5945360452521362, "grad_norm": 12.15244197845459, "learning_rate": 3.635309336070264e-05, "loss": 32.8672, "step": 3705 }, { "epoch": 0.5946965138203555, "grad_norm": 12.08311653137207, "learning_rate": 3.632844736861365e-05, "loss": 32.7422, "step": 3706 }, { "epoch": 0.5948569823885747, "grad_norm": 12.016911506652832, "learning_rate": 3.630380496619813e-05, "loss": 32.6484, "step": 3707 }, { "epoch": 0.5950174509567938, "grad_norm": 11.950003623962402, "learning_rate": 3.627916615992629e-05, "loss": 32.8047, "step": 3708 }, { "epoch": 0.5951779195250131, "grad_norm": 12.19992446899414, "learning_rate": 3.625453095626742e-05, "loss": 32.6094, "step": 3709 }, { "epoch": 0.5953383880932323, "grad_norm": 12.203136444091797, "learning_rate": 3.622989936168986e-05, "loss": 32.7266, "step": 3710 }, { "epoch": 0.5954988566614514, "grad_norm": 12.076252937316895, "learning_rate": 3.620527138266102e-05, "loss": 32.6953, "step": 3711 }, { "epoch": 0.5956593252296707, "grad_norm": 12.082510948181152, "learning_rate": 3.6180647025647364e-05, "loss": 32.7266, "step": 3712 }, { "epoch": 0.5958197937978899, "grad_norm": 12.138703346252441, "learning_rate": 3.615602629711435e-05, "loss": 32.6641, "step": 3713 }, { "epoch": 0.595980262366109, "grad_norm": 12.457901000976562, "learning_rate": 3.613140920352659e-05, "loss": 32.5664, "step": 3714 }, { "epoch": 0.5961407309343283, "grad_norm": 12.071717262268066, "learning_rate": 3.610679575134761e-05, "loss": 32.625, "step": 3715 }, { "epoch": 0.5963011995025475, "grad_norm": 12.1304292678833, "learning_rate": 3.608218594704005e-05, "loss": 32.7188, "step": 3716 }, { "epoch": 0.5964616680707666, "grad_norm": 12.137445449829102, "learning_rate": 3.605757979706563e-05, "loss": 32.6484, "step": 3717 }, { "epoch": 0.5966221366389859, "grad_norm": 12.199212074279785, "learning_rate": 3.603297730788503e-05, "loss": 32.6562, "step": 3718 }, { "epoch": 0.596782605207205, "grad_norm": 12.007987022399902, "learning_rate": 3.600837848595801e-05, "loss": 32.6484, "step": 3719 }, { "epoch": 0.5969430737754242, "grad_norm": 12.27281665802002, "learning_rate": 3.5983783337743396e-05, "loss": 32.7344, "step": 3720 }, { "epoch": 0.5971035423436435, "grad_norm": 12.009159088134766, "learning_rate": 3.595919186969898e-05, "loss": 32.6406, "step": 3721 }, { "epoch": 0.5972640109118627, "grad_norm": 12.071283340454102, "learning_rate": 3.5934604088281654e-05, "loss": 32.6562, "step": 3722 }, { "epoch": 0.5974244794800818, "grad_norm": 12.139006614685059, "learning_rate": 3.5910019999947275e-05, "loss": 32.6094, "step": 3723 }, { "epoch": 0.5975849480483011, "grad_norm": 12.210177421569824, "learning_rate": 3.5885439611150806e-05, "loss": 32.6953, "step": 3724 }, { "epoch": 0.5977454166165203, "grad_norm": 12.326787948608398, "learning_rate": 3.586086292834619e-05, "loss": 32.6094, "step": 3725 }, { "epoch": 0.5979058851847394, "grad_norm": 12.536192893981934, "learning_rate": 3.5836289957986414e-05, "loss": 32.7422, "step": 3726 }, { "epoch": 0.5980663537529587, "grad_norm": 12.129606246948242, "learning_rate": 3.581172070652346e-05, "loss": 32.6094, "step": 3727 }, { "epoch": 0.5982268223211779, "grad_norm": 12.143683433532715, "learning_rate": 3.5787155180408374e-05, "loss": 32.6484, "step": 3728 }, { "epoch": 0.598387290889397, "grad_norm": 12.0191011428833, "learning_rate": 3.576259338609119e-05, "loss": 32.7188, "step": 3729 }, { "epoch": 0.5985477594576163, "grad_norm": 12.282296180725098, "learning_rate": 3.573803533002101e-05, "loss": 32.6719, "step": 3730 }, { "epoch": 0.5987082280258355, "grad_norm": 12.070307731628418, "learning_rate": 3.57134810186459e-05, "loss": 32.5938, "step": 3731 }, { "epoch": 0.5988686965940546, "grad_norm": 12.012173652648926, "learning_rate": 3.568893045841298e-05, "loss": 32.5859, "step": 3732 }, { "epoch": 0.5990291651622739, "grad_norm": 12.522652626037598, "learning_rate": 3.5664383655768365e-05, "loss": 32.5859, "step": 3733 }, { "epoch": 0.599189633730493, "grad_norm": 12.19826602935791, "learning_rate": 3.56398406171572e-05, "loss": 32.6484, "step": 3734 }, { "epoch": 0.5993501022987122, "grad_norm": 12.134735107421875, "learning_rate": 3.561530134902361e-05, "loss": 32.7031, "step": 3735 }, { "epoch": 0.5995105708669315, "grad_norm": 12.711235046386719, "learning_rate": 3.559076585781078e-05, "loss": 32.6562, "step": 3736 }, { "epoch": 0.5996710394351507, "grad_norm": 12.390074729919434, "learning_rate": 3.556623414996084e-05, "loss": 32.6016, "step": 3737 }, { "epoch": 0.5998315080033698, "grad_norm": 12.2576322555542, "learning_rate": 3.554170623191503e-05, "loss": 32.6094, "step": 3738 }, { "epoch": 0.5999919765715891, "grad_norm": 12.067850112915039, "learning_rate": 3.551718211011343e-05, "loss": 32.5703, "step": 3739 }, { "epoch": 0.6001524451398083, "grad_norm": 12.07667350769043, "learning_rate": 3.5492661790995294e-05, "loss": 32.7109, "step": 3740 }, { "epoch": 0.6003129137080274, "grad_norm": 12.0771484375, "learning_rate": 3.546814528099878e-05, "loss": 32.6719, "step": 3741 }, { "epoch": 0.6004733822762467, "grad_norm": 12.140548706054688, "learning_rate": 3.544363258656106e-05, "loss": 32.6172, "step": 3742 }, { "epoch": 0.6006338508444659, "grad_norm": 12.071049690246582, "learning_rate": 3.541912371411832e-05, "loss": 32.625, "step": 3743 }, { "epoch": 0.600794319412685, "grad_norm": 12.199115753173828, "learning_rate": 3.5394618670105746e-05, "loss": 32.5859, "step": 3744 }, { "epoch": 0.6009547879809043, "grad_norm": 12.195233345031738, "learning_rate": 3.53701174609575e-05, "loss": 32.582, "step": 3745 }, { "epoch": 0.6011152565491235, "grad_norm": 12.520365715026855, "learning_rate": 3.5345620093106746e-05, "loss": 32.7109, "step": 3746 }, { "epoch": 0.6012757251173426, "grad_norm": 12.01130485534668, "learning_rate": 3.532112657298564e-05, "loss": 32.7148, "step": 3747 }, { "epoch": 0.6014361936855619, "grad_norm": 12.747610092163086, "learning_rate": 3.5296636907025325e-05, "loss": 32.6953, "step": 3748 }, { "epoch": 0.6015966622537811, "grad_norm": 12.051742553710938, "learning_rate": 3.527215110165595e-05, "loss": 33.0938, "step": 3749 }, { "epoch": 0.6017571308220002, "grad_norm": 12.021291732788086, "learning_rate": 3.524766916330663e-05, "loss": 32.9297, "step": 3750 }, { "epoch": 0.6019175993902195, "grad_norm": 12.304274559020996, "learning_rate": 3.522319109840544e-05, "loss": 33.0078, "step": 3751 }, { "epoch": 0.6020780679584387, "grad_norm": 12.170859336853027, "learning_rate": 3.5198716913379496e-05, "loss": 32.9297, "step": 3752 }, { "epoch": 0.6022385365266578, "grad_norm": 12.235950469970703, "learning_rate": 3.517424661465485e-05, "loss": 32.8047, "step": 3753 }, { "epoch": 0.6023990050948771, "grad_norm": 12.416960716247559, "learning_rate": 3.5149780208656564e-05, "loss": 32.6484, "step": 3754 }, { "epoch": 0.6025594736630963, "grad_norm": 12.015339851379395, "learning_rate": 3.512531770180866e-05, "loss": 32.793, "step": 3755 }, { "epoch": 0.6027199422313154, "grad_norm": 12.351983070373535, "learning_rate": 3.510085910053413e-05, "loss": 32.7422, "step": 3756 }, { "epoch": 0.6028804107995347, "grad_norm": 12.688265800476074, "learning_rate": 3.507640441125496e-05, "loss": 32.6172, "step": 3757 }, { "epoch": 0.6030408793677539, "grad_norm": 12.228882789611816, "learning_rate": 3.505195364039211e-05, "loss": 32.832, "step": 3758 }, { "epoch": 0.603201347935973, "grad_norm": 12.147198677062988, "learning_rate": 3.502750679436548e-05, "loss": 32.8047, "step": 3759 }, { "epoch": 0.6033618165041923, "grad_norm": 12.211877822875977, "learning_rate": 3.500306387959398e-05, "loss": 32.6484, "step": 3760 }, { "epoch": 0.6035222850724115, "grad_norm": 12.13784122467041, "learning_rate": 3.497862490249546e-05, "loss": 32.6953, "step": 3761 }, { "epoch": 0.6036827536406306, "grad_norm": 12.275322914123535, "learning_rate": 3.495418986948673e-05, "loss": 32.7031, "step": 3762 }, { "epoch": 0.6038432222088499, "grad_norm": 11.947810173034668, "learning_rate": 3.492975878698358e-05, "loss": 32.7656, "step": 3763 }, { "epoch": 0.6040036907770691, "grad_norm": 12.33576488494873, "learning_rate": 3.4905331661400775e-05, "loss": 32.7344, "step": 3764 }, { "epoch": 0.6041641593452882, "grad_norm": 12.33642864227295, "learning_rate": 3.488090849915203e-05, "loss": 32.5859, "step": 3765 }, { "epoch": 0.6043246279135075, "grad_norm": 12.268169403076172, "learning_rate": 3.485648930665001e-05, "loss": 32.625, "step": 3766 }, { "epoch": 0.6044850964817267, "grad_norm": 12.206457138061523, "learning_rate": 3.483207409030633e-05, "loss": 32.6875, "step": 3767 }, { "epoch": 0.6046455650499458, "grad_norm": 12.271087646484375, "learning_rate": 3.48076628565316e-05, "loss": 32.7344, "step": 3768 }, { "epoch": 0.6048060336181651, "grad_norm": 12.338930130004883, "learning_rate": 3.478325561173535e-05, "loss": 32.5859, "step": 3769 }, { "epoch": 0.6049665021863843, "grad_norm": 12.205745697021484, "learning_rate": 3.475885236232607e-05, "loss": 32.6406, "step": 3770 }, { "epoch": 0.6051269707546034, "grad_norm": 12.2034912109375, "learning_rate": 3.4734453114711194e-05, "loss": 32.6172, "step": 3771 }, { "epoch": 0.6052874393228227, "grad_norm": 12.142410278320312, "learning_rate": 3.471005787529714e-05, "loss": 32.6484, "step": 3772 }, { "epoch": 0.6054479078910419, "grad_norm": 12.13345718383789, "learning_rate": 3.468566665048924e-05, "loss": 32.6797, "step": 3773 }, { "epoch": 0.605608376459261, "grad_norm": 12.260210990905762, "learning_rate": 3.466127944669175e-05, "loss": 32.6094, "step": 3774 }, { "epoch": 0.6057688450274803, "grad_norm": 12.269150733947754, "learning_rate": 3.463689627030794e-05, "loss": 32.5938, "step": 3775 }, { "epoch": 0.6059293135956995, "grad_norm": 12.638394355773926, "learning_rate": 3.461251712773994e-05, "loss": 32.5938, "step": 3776 }, { "epoch": 0.6060897821639186, "grad_norm": 12.13276195526123, "learning_rate": 3.45881420253889e-05, "loss": 32.6328, "step": 3777 }, { "epoch": 0.6062502507321379, "grad_norm": 12.413818359375, "learning_rate": 3.456377096965487e-05, "loss": 32.6016, "step": 3778 }, { "epoch": 0.6064107193003571, "grad_norm": 12.020486831665039, "learning_rate": 3.453940396693683e-05, "loss": 32.7031, "step": 3779 }, { "epoch": 0.6065711878685762, "grad_norm": 12.402395248413086, "learning_rate": 3.4515041023632696e-05, "loss": 32.5938, "step": 3780 }, { "epoch": 0.6067316564367955, "grad_norm": 11.948982238769531, "learning_rate": 3.449068214613935e-05, "loss": 32.6562, "step": 3781 }, { "epoch": 0.6068921250050147, "grad_norm": 12.517374038696289, "learning_rate": 3.446632734085257e-05, "loss": 32.5625, "step": 3782 }, { "epoch": 0.6070525935732338, "grad_norm": 12.519057273864746, "learning_rate": 3.444197661416709e-05, "loss": 32.5312, "step": 3783 }, { "epoch": 0.6072130621414531, "grad_norm": 12.013790130615234, "learning_rate": 3.441762997247654e-05, "loss": 32.6641, "step": 3784 }, { "epoch": 0.6073735307096723, "grad_norm": 12.4563627243042, "learning_rate": 3.4393287422173546e-05, "loss": 32.5312, "step": 3785 }, { "epoch": 0.6075339992778914, "grad_norm": 12.14511489868164, "learning_rate": 3.436894896964956e-05, "loss": 32.6953, "step": 3786 }, { "epoch": 0.6076944678461107, "grad_norm": 12.009445190429688, "learning_rate": 3.434461462129504e-05, "loss": 32.6562, "step": 3787 }, { "epoch": 0.6078549364143299, "grad_norm": 12.09693431854248, "learning_rate": 3.4320284383499344e-05, "loss": 32.7891, "step": 3788 }, { "epoch": 0.608015404982549, "grad_norm": 12.215110778808594, "learning_rate": 3.429595826265073e-05, "loss": 32.75, "step": 3789 }, { "epoch": 0.6081758735507683, "grad_norm": 12.515267372131348, "learning_rate": 3.4271636265136374e-05, "loss": 32.6172, "step": 3790 }, { "epoch": 0.6083363421189875, "grad_norm": 12.320855140686035, "learning_rate": 3.424731839734243e-05, "loss": 32.5547, "step": 3791 }, { "epoch": 0.6084968106872066, "grad_norm": 12.077014923095703, "learning_rate": 3.422300466565389e-05, "loss": 32.6719, "step": 3792 }, { "epoch": 0.6086572792554259, "grad_norm": 12.26582145690918, "learning_rate": 3.4198695076454706e-05, "loss": 32.6484, "step": 3793 }, { "epoch": 0.6088177478236451, "grad_norm": 11.953093528747559, "learning_rate": 3.417438963612771e-05, "loss": 32.7188, "step": 3794 }, { "epoch": 0.6089782163918642, "grad_norm": 12.520390510559082, "learning_rate": 3.41500883510547e-05, "loss": 32.6328, "step": 3795 }, { "epoch": 0.6091386849600835, "grad_norm": 12.4527006149292, "learning_rate": 3.412579122761631e-05, "loss": 32.5938, "step": 3796 }, { "epoch": 0.6092991535283027, "grad_norm": 12.088016510009766, "learning_rate": 3.4101498272192154e-05, "loss": 32.875, "step": 3797 }, { "epoch": 0.6094596220965218, "grad_norm": 12.29342269897461, "learning_rate": 3.407720949116069e-05, "loss": 32.8047, "step": 3798 }, { "epoch": 0.6096200906647411, "grad_norm": 12.105511665344238, "learning_rate": 3.40529248908993e-05, "loss": 32.9453, "step": 3799 }, { "epoch": 0.6097805592329603, "grad_norm": 12.298863410949707, "learning_rate": 3.402864447778428e-05, "loss": 33.0586, "step": 3800 }, { "epoch": 0.6099410278011794, "grad_norm": 12.47733211517334, "learning_rate": 3.400436825819083e-05, "loss": 33.125, "step": 3801 }, { "epoch": 0.6101014963693987, "grad_norm": 12.178444862365723, "learning_rate": 3.3980096238493056e-05, "loss": 32.9844, "step": 3802 }, { "epoch": 0.6102619649376179, "grad_norm": 12.277992248535156, "learning_rate": 3.395582842506391e-05, "loss": 32.8438, "step": 3803 }, { "epoch": 0.610422433505837, "grad_norm": 12.209622383117676, "learning_rate": 3.393156482427527e-05, "loss": 32.6875, "step": 3804 }, { "epoch": 0.6105829020740563, "grad_norm": 12.2752685546875, "learning_rate": 3.390730544249795e-05, "loss": 32.7734, "step": 3805 }, { "epoch": 0.6107433706422755, "grad_norm": 12.207980155944824, "learning_rate": 3.3883050286101604e-05, "loss": 32.7578, "step": 3806 }, { "epoch": 0.6109038392104946, "grad_norm": 12.214301109313965, "learning_rate": 3.385879936145478e-05, "loss": 32.6875, "step": 3807 }, { "epoch": 0.6110643077787139, "grad_norm": 12.083067893981934, "learning_rate": 3.383455267492493e-05, "loss": 32.7031, "step": 3808 }, { "epoch": 0.6112247763469331, "grad_norm": 12.014479637145996, "learning_rate": 3.381031023287842e-05, "loss": 32.7891, "step": 3809 }, { "epoch": 0.6113852449151522, "grad_norm": 12.084776878356934, "learning_rate": 3.3786072041680406e-05, "loss": 32.8125, "step": 3810 }, { "epoch": 0.6115457134833714, "grad_norm": 11.953312873840332, "learning_rate": 3.3761838107695045e-05, "loss": 32.707, "step": 3811 }, { "epoch": 0.6117061820515907, "grad_norm": 12.015544891357422, "learning_rate": 3.373760843728531e-05, "loss": 32.7109, "step": 3812 }, { "epoch": 0.6118666506198098, "grad_norm": 12.216556549072266, "learning_rate": 3.371338303681306e-05, "loss": 32.8047, "step": 3813 }, { "epoch": 0.612027119188029, "grad_norm": 12.213202476501465, "learning_rate": 3.3689161912639047e-05, "loss": 32.7344, "step": 3814 }, { "epoch": 0.6121875877562483, "grad_norm": 12.206872940063477, "learning_rate": 3.36649450711229e-05, "loss": 32.6953, "step": 3815 }, { "epoch": 0.6123480563244674, "grad_norm": 12.00633430480957, "learning_rate": 3.364073251862311e-05, "loss": 32.6875, "step": 3816 }, { "epoch": 0.6125085248926866, "grad_norm": 12.085654258728027, "learning_rate": 3.361652426149704e-05, "loss": 32.6875, "step": 3817 }, { "epoch": 0.6126689934609059, "grad_norm": 12.011541366577148, "learning_rate": 3.3592320306100965e-05, "loss": 32.6406, "step": 3818 }, { "epoch": 0.612829462029125, "grad_norm": 12.071128845214844, "learning_rate": 3.356812065878998e-05, "loss": 32.7422, "step": 3819 }, { "epoch": 0.6129899305973442, "grad_norm": 12.008995056152344, "learning_rate": 3.354392532591808e-05, "loss": 32.625, "step": 3820 }, { "epoch": 0.6131503991655635, "grad_norm": 12.205297470092773, "learning_rate": 3.351973431383811e-05, "loss": 32.6953, "step": 3821 }, { "epoch": 0.6133108677337826, "grad_norm": 12.400455474853516, "learning_rate": 3.349554762890178e-05, "loss": 32.6172, "step": 3822 }, { "epoch": 0.6134713363020018, "grad_norm": 12.142451286315918, "learning_rate": 3.347136527745968e-05, "loss": 32.6328, "step": 3823 }, { "epoch": 0.6136318048702211, "grad_norm": 12.073012351989746, "learning_rate": 3.344718726586123e-05, "loss": 32.7891, "step": 3824 }, { "epoch": 0.6137922734384402, "grad_norm": 12.006672859191895, "learning_rate": 3.342301360045477e-05, "loss": 32.6016, "step": 3825 }, { "epoch": 0.6139527420066594, "grad_norm": 11.825037002563477, "learning_rate": 3.339884428758745e-05, "loss": 32.7891, "step": 3826 }, { "epoch": 0.6141132105748787, "grad_norm": 12.14023208618164, "learning_rate": 3.337467933360527e-05, "loss": 32.6719, "step": 3827 }, { "epoch": 0.6142736791430978, "grad_norm": 12.198405265808105, "learning_rate": 3.3350518744853135e-05, "loss": 32.6406, "step": 3828 }, { "epoch": 0.614434147711317, "grad_norm": 12.338452339172363, "learning_rate": 3.3326362527674756e-05, "loss": 32.6328, "step": 3829 }, { "epoch": 0.6145946162795363, "grad_norm": 12.2776517868042, "learning_rate": 3.3302210688412714e-05, "loss": 32.6328, "step": 3830 }, { "epoch": 0.6147550848477554, "grad_norm": 12.215742111206055, "learning_rate": 3.327806323340844e-05, "loss": 32.7266, "step": 3831 }, { "epoch": 0.6149155534159746, "grad_norm": 12.13964557647705, "learning_rate": 3.325392016900225e-05, "loss": 32.6406, "step": 3832 }, { "epoch": 0.6150760219841939, "grad_norm": 12.199101448059082, "learning_rate": 3.322978150153321e-05, "loss": 32.6641, "step": 3833 }, { "epoch": 0.615236490552413, "grad_norm": 12.198150634765625, "learning_rate": 3.320564723733933e-05, "loss": 32.6953, "step": 3834 }, { "epoch": 0.6153969591206322, "grad_norm": 11.95039176940918, "learning_rate": 3.3181517382757425e-05, "loss": 32.7344, "step": 3835 }, { "epoch": 0.6155574276888515, "grad_norm": 12.197611808776855, "learning_rate": 3.315739194412315e-05, "loss": 32.6484, "step": 3836 }, { "epoch": 0.6157178962570706, "grad_norm": 12.207688331604004, "learning_rate": 3.313327092777099e-05, "loss": 32.6875, "step": 3837 }, { "epoch": 0.6158783648252898, "grad_norm": 12.019599914550781, "learning_rate": 3.3109154340034315e-05, "loss": 32.6758, "step": 3838 }, { "epoch": 0.6160388333935091, "grad_norm": 12.194872856140137, "learning_rate": 3.308504218724528e-05, "loss": 32.5859, "step": 3839 }, { "epoch": 0.6161993019617282, "grad_norm": 12.137645721435547, "learning_rate": 3.30609344757349e-05, "loss": 32.6719, "step": 3840 }, { "epoch": 0.6163597705299474, "grad_norm": 12.010112762451172, "learning_rate": 3.3036831211833006e-05, "loss": 32.6016, "step": 3841 }, { "epoch": 0.6165202390981667, "grad_norm": 12.467964172363281, "learning_rate": 3.301273240186831e-05, "loss": 32.6328, "step": 3842 }, { "epoch": 0.6166807076663858, "grad_norm": 12.266769409179688, "learning_rate": 3.29886380521683e-05, "loss": 32.6484, "step": 3843 }, { "epoch": 0.616841176234605, "grad_norm": 12.696147918701172, "learning_rate": 3.296454816905931e-05, "loss": 32.5781, "step": 3844 }, { "epoch": 0.6170016448028243, "grad_norm": 12.075810432434082, "learning_rate": 3.294046275886651e-05, "loss": 32.6953, "step": 3845 }, { "epoch": 0.6171621133710434, "grad_norm": 12.271076202392578, "learning_rate": 3.291638182791388e-05, "loss": 32.6484, "step": 3846 }, { "epoch": 0.6173225819392626, "grad_norm": 12.257250785827637, "learning_rate": 3.289230538252422e-05, "loss": 32.6641, "step": 3847 }, { "epoch": 0.6174830505074819, "grad_norm": 12.452792167663574, "learning_rate": 3.2868233429019215e-05, "loss": 32.6562, "step": 3848 }, { "epoch": 0.617643519075701, "grad_norm": 12.154302597045898, "learning_rate": 3.2844165973719285e-05, "loss": 32.8359, "step": 3849 }, { "epoch": 0.6178039876439202, "grad_norm": 12.221757888793945, "learning_rate": 3.282010302294371e-05, "loss": 32.8438, "step": 3850 }, { "epoch": 0.6179644562121395, "grad_norm": 12.23040771484375, "learning_rate": 3.279604458301058e-05, "loss": 32.7656, "step": 3851 }, { "epoch": 0.6181249247803586, "grad_norm": 12.420522689819336, "learning_rate": 3.2771990660236815e-05, "loss": 32.8359, "step": 3852 }, { "epoch": 0.6182853933485778, "grad_norm": 12.142989158630371, "learning_rate": 3.274794126093813e-05, "loss": 32.7422, "step": 3853 }, { "epoch": 0.6184458619167971, "grad_norm": 12.537836074829102, "learning_rate": 3.272389639142905e-05, "loss": 32.7422, "step": 3854 }, { "epoch": 0.6186063304850162, "grad_norm": 12.345328330993652, "learning_rate": 3.269985605802294e-05, "loss": 32.8359, "step": 3855 }, { "epoch": 0.6187667990532354, "grad_norm": 11.95272445678711, "learning_rate": 3.2675820267031966e-05, "loss": 32.7812, "step": 3856 }, { "epoch": 0.6189272676214547, "grad_norm": 12.275110244750977, "learning_rate": 3.2651789024767035e-05, "loss": 32.8047, "step": 3857 }, { "epoch": 0.6190877361896738, "grad_norm": 12.534021377563477, "learning_rate": 3.2627762337537956e-05, "loss": 32.6641, "step": 3858 }, { "epoch": 0.619248204757893, "grad_norm": 11.947364807128906, "learning_rate": 3.2603740211653297e-05, "loss": 32.7031, "step": 3859 }, { "epoch": 0.6194086733261123, "grad_norm": 12.160625457763672, "learning_rate": 3.257972265342042e-05, "loss": 33.0469, "step": 3860 }, { "epoch": 0.6195691418943314, "grad_norm": 12.076281547546387, "learning_rate": 3.255570966914549e-05, "loss": 32.6484, "step": 3861 }, { "epoch": 0.6197296104625506, "grad_norm": 12.211888313293457, "learning_rate": 3.2531701265133505e-05, "loss": 32.7461, "step": 3862 }, { "epoch": 0.6198900790307699, "grad_norm": 12.388444900512695, "learning_rate": 3.250769744768823e-05, "loss": 32.6094, "step": 3863 }, { "epoch": 0.620050547598989, "grad_norm": 12.208839416503906, "learning_rate": 3.248369822311222e-05, "loss": 32.7109, "step": 3864 }, { "epoch": 0.6202110161672082, "grad_norm": 12.138554573059082, "learning_rate": 3.245970359770684e-05, "loss": 32.6875, "step": 3865 }, { "epoch": 0.6203714847354275, "grad_norm": 12.08031940460205, "learning_rate": 3.243571357777225e-05, "loss": 32.7344, "step": 3866 }, { "epoch": 0.6205319533036466, "grad_norm": 12.34432601928711, "learning_rate": 3.2411728169607394e-05, "loss": 32.7539, "step": 3867 }, { "epoch": 0.6206924218718658, "grad_norm": 12.404481887817383, "learning_rate": 3.238774737951e-05, "loss": 32.6328, "step": 3868 }, { "epoch": 0.6208528904400851, "grad_norm": 12.078803062438965, "learning_rate": 3.236377121377657e-05, "loss": 32.6602, "step": 3869 }, { "epoch": 0.6210133590083042, "grad_norm": 12.01256275177002, "learning_rate": 3.233979967870244e-05, "loss": 32.6328, "step": 3870 }, { "epoch": 0.6211738275765234, "grad_norm": 12.00841999053955, "learning_rate": 3.231583278058167e-05, "loss": 32.6875, "step": 3871 }, { "epoch": 0.6213342961447427, "grad_norm": 12.263066291809082, "learning_rate": 3.2291870525707156e-05, "loss": 32.625, "step": 3872 }, { "epoch": 0.6214947647129618, "grad_norm": 12.013908386230469, "learning_rate": 3.2267912920370547e-05, "loss": 32.5938, "step": 3873 }, { "epoch": 0.621655233281181, "grad_norm": 12.016862869262695, "learning_rate": 3.224395997086226e-05, "loss": 32.6953, "step": 3874 }, { "epoch": 0.6218157018494003, "grad_norm": 12.270346641540527, "learning_rate": 3.222001168347153e-05, "loss": 32.5625, "step": 3875 }, { "epoch": 0.6219761704176194, "grad_norm": 12.008161544799805, "learning_rate": 3.219606806448633e-05, "loss": 32.6797, "step": 3876 }, { "epoch": 0.6221366389858386, "grad_norm": 12.074244499206543, "learning_rate": 3.2172129120193415e-05, "loss": 32.7422, "step": 3877 }, { "epoch": 0.6222971075540579, "grad_norm": 12.204928398132324, "learning_rate": 3.2148194856878325e-05, "loss": 32.6562, "step": 3878 }, { "epoch": 0.622457576122277, "grad_norm": 11.944314002990723, "learning_rate": 3.212426528082538e-05, "loss": 32.6406, "step": 3879 }, { "epoch": 0.6226180446904962, "grad_norm": 12.068490982055664, "learning_rate": 3.2100340398317614e-05, "loss": 32.6328, "step": 3880 }, { "epoch": 0.6227785132587155, "grad_norm": 11.941576957702637, "learning_rate": 3.2076420215636894e-05, "loss": 32.6484, "step": 3881 }, { "epoch": 0.6229389818269346, "grad_norm": 12.077844619750977, "learning_rate": 3.205250473906383e-05, "loss": 32.5938, "step": 3882 }, { "epoch": 0.6230994503951538, "grad_norm": 12.152231216430664, "learning_rate": 3.202859397487777e-05, "loss": 32.6797, "step": 3883 }, { "epoch": 0.6232599189633731, "grad_norm": 12.206893920898438, "learning_rate": 3.200468792935687e-05, "loss": 32.6406, "step": 3884 }, { "epoch": 0.6234203875315922, "grad_norm": 12.078174591064453, "learning_rate": 3.1980786608778014e-05, "loss": 32.6445, "step": 3885 }, { "epoch": 0.6235808560998114, "grad_norm": 12.20309829711914, "learning_rate": 3.195689001941686e-05, "loss": 32.5781, "step": 3886 }, { "epoch": 0.6237413246680307, "grad_norm": 12.265830993652344, "learning_rate": 3.193299816754782e-05, "loss": 32.6719, "step": 3887 }, { "epoch": 0.6239017932362498, "grad_norm": 12.398613929748535, "learning_rate": 3.190911105944407e-05, "loss": 32.5703, "step": 3888 }, { "epoch": 0.624062261804469, "grad_norm": 12.073345184326172, "learning_rate": 3.188522870137751e-05, "loss": 32.7578, "step": 3889 }, { "epoch": 0.6242227303726883, "grad_norm": 12.534049034118652, "learning_rate": 3.186135109961883e-05, "loss": 32.6328, "step": 3890 }, { "epoch": 0.6243831989409074, "grad_norm": 12.13752555847168, "learning_rate": 3.183747826043747e-05, "loss": 32.6406, "step": 3891 }, { "epoch": 0.6245436675091266, "grad_norm": 12.264070510864258, "learning_rate": 3.181361019010159e-05, "loss": 32.6484, "step": 3892 }, { "epoch": 0.6247041360773459, "grad_norm": 12.09269905090332, "learning_rate": 3.17897468948781e-05, "loss": 32.75, "step": 3893 }, { "epoch": 0.624864604645565, "grad_norm": 12.389420509338379, "learning_rate": 3.176588838103269e-05, "loss": 32.5938, "step": 3894 }, { "epoch": 0.6250250732137842, "grad_norm": 12.895341873168945, "learning_rate": 3.174203465482977e-05, "loss": 32.5703, "step": 3895 }, { "epoch": 0.6251855417820035, "grad_norm": 12.133003234863281, "learning_rate": 3.1718185722532504e-05, "loss": 32.625, "step": 3896 }, { "epoch": 0.6253460103502226, "grad_norm": 12.138745307922363, "learning_rate": 3.169434159040277e-05, "loss": 32.6484, "step": 3897 }, { "epoch": 0.6255064789184418, "grad_norm": 12.139738082885742, "learning_rate": 3.167050226470122e-05, "loss": 32.6797, "step": 3898 }, { "epoch": 0.6256669474866611, "grad_norm": 12.282882690429688, "learning_rate": 3.1646667751687245e-05, "loss": 32.8203, "step": 3899 }, { "epoch": 0.6258274160548802, "grad_norm": 12.306949615478516, "learning_rate": 3.162283805761893e-05, "loss": 32.8164, "step": 3900 }, { "epoch": 0.6259878846230994, "grad_norm": 12.219400405883789, "learning_rate": 3.159901318875314e-05, "loss": 32.7734, "step": 3901 }, { "epoch": 0.6261483531913187, "grad_norm": 12.3494291305542, "learning_rate": 3.157519315134545e-05, "loss": 32.7188, "step": 3902 }, { "epoch": 0.6263088217595378, "grad_norm": 12.290868759155273, "learning_rate": 3.1551377951650205e-05, "loss": 32.8125, "step": 3903 }, { "epoch": 0.626469290327757, "grad_norm": 12.226056098937988, "learning_rate": 3.152756759592037e-05, "loss": 32.7734, "step": 3904 }, { "epoch": 0.6266297588959763, "grad_norm": 12.156743049621582, "learning_rate": 3.150376209040778e-05, "loss": 32.8203, "step": 3905 }, { "epoch": 0.6267902274641954, "grad_norm": 11.949551582336426, "learning_rate": 3.147996144136291e-05, "loss": 32.7031, "step": 3906 }, { "epoch": 0.6269506960324146, "grad_norm": 12.2138090133667, "learning_rate": 3.145616565503498e-05, "loss": 32.6641, "step": 3907 }, { "epoch": 0.6271111646006339, "grad_norm": 11.957330703735352, "learning_rate": 3.1432374737671944e-05, "loss": 32.8438, "step": 3908 }, { "epoch": 0.627271633168853, "grad_norm": 12.138328552246094, "learning_rate": 3.140858869552046e-05, "loss": 32.6641, "step": 3909 }, { "epoch": 0.6274321017370722, "grad_norm": 12.019499778747559, "learning_rate": 3.138480753482591e-05, "loss": 32.8438, "step": 3910 }, { "epoch": 0.6275925703052915, "grad_norm": 12.197311401367188, "learning_rate": 3.136103126183241e-05, "loss": 32.6016, "step": 3911 }, { "epoch": 0.6277530388735106, "grad_norm": 12.082196235656738, "learning_rate": 3.133725988278276e-05, "loss": 32.7344, "step": 3912 }, { "epoch": 0.6279135074417298, "grad_norm": 12.259875297546387, "learning_rate": 3.131349340391853e-05, "loss": 32.6406, "step": 3913 }, { "epoch": 0.6280739760099491, "grad_norm": 12.322641372680664, "learning_rate": 3.128973183147994e-05, "loss": 32.6484, "step": 3914 }, { "epoch": 0.6282344445781682, "grad_norm": 12.139511108398438, "learning_rate": 3.126597517170598e-05, "loss": 32.7969, "step": 3915 }, { "epoch": 0.6283949131463874, "grad_norm": 12.272000312805176, "learning_rate": 3.124222343083429e-05, "loss": 32.7812, "step": 3916 }, { "epoch": 0.6285553817146067, "grad_norm": 12.40396499633789, "learning_rate": 3.121847661510126e-05, "loss": 32.8125, "step": 3917 }, { "epoch": 0.6287158502828258, "grad_norm": 12.140229225158691, "learning_rate": 3.119473473074197e-05, "loss": 32.625, "step": 3918 }, { "epoch": 0.628876318851045, "grad_norm": 12.149857521057129, "learning_rate": 3.117099778399023e-05, "loss": 32.7266, "step": 3919 }, { "epoch": 0.6290367874192643, "grad_norm": 12.135348320007324, "learning_rate": 3.114726578107853e-05, "loss": 32.625, "step": 3920 }, { "epoch": 0.6291972559874834, "grad_norm": 12.073982238769531, "learning_rate": 3.112353872823806e-05, "loss": 32.6172, "step": 3921 }, { "epoch": 0.6293577245557026, "grad_norm": 12.010427474975586, "learning_rate": 3.109981663169871e-05, "loss": 32.6797, "step": 3922 }, { "epoch": 0.6295181931239219, "grad_norm": 12.26507568359375, "learning_rate": 3.1076099497689105e-05, "loss": 32.5703, "step": 3923 }, { "epoch": 0.629678661692141, "grad_norm": 12.008885383605957, "learning_rate": 3.1052387332436504e-05, "loss": 32.6875, "step": 3924 }, { "epoch": 0.6298391302603602, "grad_norm": 12.521852493286133, "learning_rate": 3.102868014216691e-05, "loss": 32.6016, "step": 3925 }, { "epoch": 0.6299995988285795, "grad_norm": 12.200695991516113, "learning_rate": 3.100497793310504e-05, "loss": 32.625, "step": 3926 }, { "epoch": 0.6301600673967986, "grad_norm": 12.0697603225708, "learning_rate": 3.0981280711474214e-05, "loss": 32.6992, "step": 3927 }, { "epoch": 0.6303205359650178, "grad_norm": 12.070611000061035, "learning_rate": 3.0957588483496513e-05, "loss": 32.6719, "step": 3928 }, { "epoch": 0.6304810045332371, "grad_norm": 12.152384757995605, "learning_rate": 3.093390125539269e-05, "loss": 32.6406, "step": 3929 }, { "epoch": 0.6306414731014562, "grad_norm": 12.08122444152832, "learning_rate": 3.091021903338221e-05, "loss": 32.6953, "step": 3930 }, { "epoch": 0.6308019416696754, "grad_norm": 12.195094108581543, "learning_rate": 3.0886541823683165e-05, "loss": 32.6172, "step": 3931 }, { "epoch": 0.6309624102378947, "grad_norm": 12.193861961364746, "learning_rate": 3.086286963251239e-05, "loss": 32.6016, "step": 3932 }, { "epoch": 0.6311228788061138, "grad_norm": 12.263833045959473, "learning_rate": 3.083920246608537e-05, "loss": 32.6133, "step": 3933 }, { "epoch": 0.631283347374333, "grad_norm": 12.14999771118164, "learning_rate": 3.081554033061628e-05, "loss": 32.6797, "step": 3934 }, { "epoch": 0.6314438159425523, "grad_norm": 12.19926929473877, "learning_rate": 3.0791883232317955e-05, "loss": 32.6484, "step": 3935 }, { "epoch": 0.6316042845107714, "grad_norm": 12.266716003417969, "learning_rate": 3.076823117740195e-05, "loss": 32.6172, "step": 3936 }, { "epoch": 0.6317647530789906, "grad_norm": 12.455559730529785, "learning_rate": 3.074458417207846e-05, "loss": 32.5234, "step": 3937 }, { "epoch": 0.6319252216472099, "grad_norm": 12.263051986694336, "learning_rate": 3.072094222255638e-05, "loss": 32.6211, "step": 3938 }, { "epoch": 0.632085690215429, "grad_norm": 12.513256072998047, "learning_rate": 3.0697305335043234e-05, "loss": 32.625, "step": 3939 }, { "epoch": 0.6322461587836482, "grad_norm": 12.194822311401367, "learning_rate": 3.067367351574526e-05, "loss": 32.6172, "step": 3940 }, { "epoch": 0.6324066273518675, "grad_norm": 12.072813987731934, "learning_rate": 3.0650046770867344e-05, "loss": 32.6562, "step": 3941 }, { "epoch": 0.6325670959200866, "grad_norm": 12.198344230651855, "learning_rate": 3.062642510661306e-05, "loss": 32.6016, "step": 3942 }, { "epoch": 0.6327275644883058, "grad_norm": 12.393199920654297, "learning_rate": 3.0602808529184626e-05, "loss": 32.6172, "step": 3943 }, { "epoch": 0.6328880330565251, "grad_norm": 12.451923370361328, "learning_rate": 3.057919704478293e-05, "loss": 32.625, "step": 3944 }, { "epoch": 0.6330485016247442, "grad_norm": 12.271222114562988, "learning_rate": 3.0555590659607516e-05, "loss": 32.6641, "step": 3945 }, { "epoch": 0.6332089701929634, "grad_norm": 12.075728416442871, "learning_rate": 3.053198937985662e-05, "loss": 32.6914, "step": 3946 }, { "epoch": 0.6333694387611827, "grad_norm": 12.350379943847656, "learning_rate": 3.0508393211727115e-05, "loss": 32.7031, "step": 3947 }, { "epoch": 0.6335299073294018, "grad_norm": 12.367754936218262, "learning_rate": 3.0484802161414517e-05, "loss": 32.9453, "step": 3948 }, { "epoch": 0.633690375897621, "grad_norm": 12.807127952575684, "learning_rate": 3.046121623511301e-05, "loss": 32.8125, "step": 3949 }, { "epoch": 0.6338508444658403, "grad_norm": 12.032925605773926, "learning_rate": 3.0437635439015476e-05, "loss": 33.0469, "step": 3950 }, { "epoch": 0.6340113130340594, "grad_norm": 12.50973129272461, "learning_rate": 3.0414059779313353e-05, "loss": 32.9141, "step": 3951 }, { "epoch": 0.6341717816022786, "grad_norm": 12.620070457458496, "learning_rate": 3.039048926219683e-05, "loss": 32.9297, "step": 3952 }, { "epoch": 0.6343322501704979, "grad_norm": 12.029611587524414, "learning_rate": 3.036692389385468e-05, "loss": 32.8672, "step": 3953 }, { "epoch": 0.634492718738717, "grad_norm": 12.437466621398926, "learning_rate": 3.0343363680474368e-05, "loss": 32.9531, "step": 3954 }, { "epoch": 0.6346531873069362, "grad_norm": 12.28226089477539, "learning_rate": 3.0319808628241964e-05, "loss": 32.7734, "step": 3955 }, { "epoch": 0.6348136558751555, "grad_norm": 12.278997421264648, "learning_rate": 3.029625874334222e-05, "loss": 32.6797, "step": 3956 }, { "epoch": 0.6349741244433746, "grad_norm": 12.215486526489258, "learning_rate": 3.0272714031958517e-05, "loss": 32.8906, "step": 3957 }, { "epoch": 0.6351345930115938, "grad_norm": 12.468183517456055, "learning_rate": 3.024917450027287e-05, "loss": 32.6172, "step": 3958 }, { "epoch": 0.6352950615798131, "grad_norm": 12.273743629455566, "learning_rate": 3.0225640154465928e-05, "loss": 32.7422, "step": 3959 }, { "epoch": 0.6354555301480322, "grad_norm": 12.414531707763672, "learning_rate": 3.0202111000717014e-05, "loss": 32.7188, "step": 3960 }, { "epoch": 0.6356159987162514, "grad_norm": 12.077595710754395, "learning_rate": 3.0178587045204048e-05, "loss": 32.6875, "step": 3961 }, { "epoch": 0.6357764672844707, "grad_norm": 12.274703979492188, "learning_rate": 3.0155068294103617e-05, "loss": 32.6875, "step": 3962 }, { "epoch": 0.6359369358526898, "grad_norm": 12.337233543395996, "learning_rate": 3.0131554753590907e-05, "loss": 32.6875, "step": 3963 }, { "epoch": 0.636097404420909, "grad_norm": 12.274480819702148, "learning_rate": 3.0108046429839754e-05, "loss": 32.6641, "step": 3964 }, { "epoch": 0.6362578729891283, "grad_norm": 12.262864112854004, "learning_rate": 3.008454332902263e-05, "loss": 32.6328, "step": 3965 }, { "epoch": 0.6364183415573474, "grad_norm": 12.137571334838867, "learning_rate": 3.0061045457310644e-05, "loss": 32.6875, "step": 3966 }, { "epoch": 0.6365788101255666, "grad_norm": 12.337268829345703, "learning_rate": 3.0037552820873504e-05, "loss": 32.6641, "step": 3967 }, { "epoch": 0.6367392786937859, "grad_norm": 12.143854141235352, "learning_rate": 3.0014065425879562e-05, "loss": 32.6094, "step": 3968 }, { "epoch": 0.636899747262005, "grad_norm": 12.075421333312988, "learning_rate": 2.9990583278495777e-05, "loss": 32.6836, "step": 3969 }, { "epoch": 0.6370602158302242, "grad_norm": 12.386360168457031, "learning_rate": 2.9967106384887766e-05, "loss": 32.5859, "step": 3970 }, { "epoch": 0.6372206843984435, "grad_norm": 12.091395378112793, "learning_rate": 2.994363475121974e-05, "loss": 32.8203, "step": 3971 }, { "epoch": 0.6373811529666626, "grad_norm": 12.009478569030762, "learning_rate": 2.9920168383654508e-05, "loss": 32.6562, "step": 3972 }, { "epoch": 0.6375416215348818, "grad_norm": 12.136879920959473, "learning_rate": 2.9896707288353543e-05, "loss": 32.6172, "step": 3973 }, { "epoch": 0.6377020901031011, "grad_norm": 12.07259750366211, "learning_rate": 2.9873251471476936e-05, "loss": 32.6719, "step": 3974 }, { "epoch": 0.6378625586713202, "grad_norm": 12.008752822875977, "learning_rate": 2.9849800939183305e-05, "loss": 32.6172, "step": 3975 }, { "epoch": 0.6380230272395394, "grad_norm": 12.009711265563965, "learning_rate": 2.9826355697629976e-05, "loss": 32.6328, "step": 3976 }, { "epoch": 0.6381834958077587, "grad_norm": 12.142989158630371, "learning_rate": 2.980291575297286e-05, "loss": 32.6172, "step": 3977 }, { "epoch": 0.6383439643759778, "grad_norm": 12.139960289001465, "learning_rate": 2.977948111136646e-05, "loss": 32.6172, "step": 3978 }, { "epoch": 0.638504432944197, "grad_norm": 12.26916217803955, "learning_rate": 2.975605177896389e-05, "loss": 32.6094, "step": 3979 }, { "epoch": 0.6386649015124163, "grad_norm": 12.138413429260254, "learning_rate": 2.9732627761916888e-05, "loss": 32.6328, "step": 3980 }, { "epoch": 0.6388253700806354, "grad_norm": 12.14905834197998, "learning_rate": 2.9709209066375787e-05, "loss": 32.6875, "step": 3981 }, { "epoch": 0.6389858386488546, "grad_norm": 12.339629173278809, "learning_rate": 2.96857956984895e-05, "loss": 32.6094, "step": 3982 }, { "epoch": 0.6391463072170739, "grad_norm": 12.14476490020752, "learning_rate": 2.966238766440559e-05, "loss": 32.6094, "step": 3983 }, { "epoch": 0.639306775785293, "grad_norm": 12.399520874023438, "learning_rate": 2.9638984970270178e-05, "loss": 32.6484, "step": 3984 }, { "epoch": 0.6394672443535122, "grad_norm": 11.963911056518555, "learning_rate": 2.9615587622228004e-05, "loss": 32.7969, "step": 3985 }, { "epoch": 0.6396277129217315, "grad_norm": 12.578731536865234, "learning_rate": 2.959219562642238e-05, "loss": 32.5703, "step": 3986 }, { "epoch": 0.6397881814899506, "grad_norm": 12.134474754333496, "learning_rate": 2.9568808988995245e-05, "loss": 32.6797, "step": 3987 }, { "epoch": 0.6399486500581698, "grad_norm": 12.326050758361816, "learning_rate": 2.954542771608709e-05, "loss": 32.6562, "step": 3988 }, { "epoch": 0.6401091186263891, "grad_norm": 12.195590019226074, "learning_rate": 2.9522051813837048e-05, "loss": 32.6328, "step": 3989 }, { "epoch": 0.6402695871946082, "grad_norm": 12.259130477905273, "learning_rate": 2.9498681288382817e-05, "loss": 32.5781, "step": 3990 }, { "epoch": 0.6404300557628274, "grad_norm": 12.20203685760498, "learning_rate": 2.9475316145860675e-05, "loss": 32.6953, "step": 3991 }, { "epoch": 0.6405905243310467, "grad_norm": 12.144842147827148, "learning_rate": 2.9451956392405477e-05, "loss": 32.7188, "step": 3992 }, { "epoch": 0.6407509928992658, "grad_norm": 12.647875785827637, "learning_rate": 2.9428602034150715e-05, "loss": 32.5625, "step": 3993 }, { "epoch": 0.640911461467485, "grad_norm": 12.078291893005371, "learning_rate": 2.940525307722841e-05, "loss": 32.6992, "step": 3994 }, { "epoch": 0.6410719300357043, "grad_norm": 12.588744163513184, "learning_rate": 2.9381909527769193e-05, "loss": 32.5859, "step": 3995 }, { "epoch": 0.6412323986039234, "grad_norm": 12.738235473632812, "learning_rate": 2.9358571391902244e-05, "loss": 32.7422, "step": 3996 }, { "epoch": 0.6413928671721426, "grad_norm": 12.39195442199707, "learning_rate": 2.93352386757554e-05, "loss": 32.6406, "step": 3997 }, { "epoch": 0.6415533357403619, "grad_norm": 12.33816146850586, "learning_rate": 2.931191138545495e-05, "loss": 32.7109, "step": 3998 }, { "epoch": 0.641713804308581, "grad_norm": 12.357681274414062, "learning_rate": 2.9288589527125865e-05, "loss": 32.7344, "step": 3999 }, { "epoch": 0.6418742728768002, "grad_norm": 12.729585647583008, "learning_rate": 2.9265273106891643e-05, "loss": 32.9531, "step": 4000 }, { "epoch": 0.6420347414450195, "grad_norm": 12.24566650390625, "learning_rate": 2.9241962130874378e-05, "loss": 33.0469, "step": 4001 }, { "epoch": 0.6421952100132386, "grad_norm": 12.091102600097656, "learning_rate": 2.9218656605194695e-05, "loss": 32.7188, "step": 4002 }, { "epoch": 0.6423556785814578, "grad_norm": 12.360910415649414, "learning_rate": 2.9195356535971842e-05, "loss": 32.8047, "step": 4003 }, { "epoch": 0.6425161471496771, "grad_norm": 12.550888061523438, "learning_rate": 2.917206192932358e-05, "loss": 32.7812, "step": 4004 }, { "epoch": 0.6426766157178962, "grad_norm": 12.147579193115234, "learning_rate": 2.9148772791366285e-05, "loss": 32.9297, "step": 4005 }, { "epoch": 0.6428370842861154, "grad_norm": 12.148853302001953, "learning_rate": 2.9125489128214856e-05, "loss": 32.6875, "step": 4006 }, { "epoch": 0.6429975528543347, "grad_norm": 12.019498825073242, "learning_rate": 2.910221094598278e-05, "loss": 32.6797, "step": 4007 }, { "epoch": 0.6431580214225538, "grad_norm": 12.213434219360352, "learning_rate": 2.9078938250782062e-05, "loss": 32.6719, "step": 4008 }, { "epoch": 0.643318489990773, "grad_norm": 12.411824226379395, "learning_rate": 2.9055671048723383e-05, "loss": 32.8594, "step": 4009 }, { "epoch": 0.6434789585589923, "grad_norm": 12.018806457519531, "learning_rate": 2.9032409345915823e-05, "loss": 32.8164, "step": 4010 }, { "epoch": 0.6436394271272114, "grad_norm": 12.208616256713867, "learning_rate": 2.9009153148467127e-05, "loss": 32.6484, "step": 4011 }, { "epoch": 0.6437998956954306, "grad_norm": 12.012358665466309, "learning_rate": 2.8985902462483555e-05, "loss": 32.7344, "step": 4012 }, { "epoch": 0.6439603642636499, "grad_norm": 12.073837280273438, "learning_rate": 2.896265729406993e-05, "loss": 32.7344, "step": 4013 }, { "epoch": 0.644120832831869, "grad_norm": 12.154541969299316, "learning_rate": 2.8939417649329592e-05, "loss": 32.6797, "step": 4014 }, { "epoch": 0.6442813014000882, "grad_norm": 12.324271202087402, "learning_rate": 2.891618353436452e-05, "loss": 32.6406, "step": 4015 }, { "epoch": 0.6444417699683075, "grad_norm": 12.072504043579102, "learning_rate": 2.8892954955275152e-05, "loss": 32.6523, "step": 4016 }, { "epoch": 0.6446022385365267, "grad_norm": 12.08008861541748, "learning_rate": 2.886973191816051e-05, "loss": 32.6406, "step": 4017 }, { "epoch": 0.6447627071047458, "grad_norm": 12.21681022644043, "learning_rate": 2.884651442911815e-05, "loss": 32.6406, "step": 4018 }, { "epoch": 0.6449231756729651, "grad_norm": 12.138184547424316, "learning_rate": 2.8823302494244176e-05, "loss": 32.7344, "step": 4019 }, { "epoch": 0.6450836442411843, "grad_norm": 12.539312362670898, "learning_rate": 2.8800096119633234e-05, "loss": 32.6016, "step": 4020 }, { "epoch": 0.6452441128094034, "grad_norm": 11.883883476257324, "learning_rate": 2.877689531137851e-05, "loss": 32.6484, "step": 4021 }, { "epoch": 0.6454045813776227, "grad_norm": 12.602266311645508, "learning_rate": 2.8753700075571733e-05, "loss": 32.6641, "step": 4022 }, { "epoch": 0.6455650499458419, "grad_norm": 12.324625015258789, "learning_rate": 2.8730510418303148e-05, "loss": 32.5781, "step": 4023 }, { "epoch": 0.645725518514061, "grad_norm": 12.075206756591797, "learning_rate": 2.8707326345661572e-05, "loss": 32.6016, "step": 4024 }, { "epoch": 0.6458859870822803, "grad_norm": 12.133759498596191, "learning_rate": 2.8684147863734323e-05, "loss": 32.625, "step": 4025 }, { "epoch": 0.6460464556504995, "grad_norm": 11.94691276550293, "learning_rate": 2.866097497860726e-05, "loss": 32.6328, "step": 4026 }, { "epoch": 0.6462069242187186, "grad_norm": 12.264500617980957, "learning_rate": 2.863780769636478e-05, "loss": 32.6875, "step": 4027 }, { "epoch": 0.6463673927869379, "grad_norm": 12.459335327148438, "learning_rate": 2.861464602308979e-05, "loss": 32.6484, "step": 4028 }, { "epoch": 0.646527861355157, "grad_norm": 12.260421752929688, "learning_rate": 2.859148996486377e-05, "loss": 32.5859, "step": 4029 }, { "epoch": 0.6466883299233762, "grad_norm": 12.84316635131836, "learning_rate": 2.856833952776668e-05, "loss": 32.5469, "step": 4030 }, { "epoch": 0.6468487984915955, "grad_norm": 12.327214241027832, "learning_rate": 2.854519471787701e-05, "loss": 32.6484, "step": 4031 }, { "epoch": 0.6470092670598147, "grad_norm": 12.749334335327148, "learning_rate": 2.8522055541271793e-05, "loss": 32.5859, "step": 4032 }, { "epoch": 0.6471697356280338, "grad_norm": 12.073629379272461, "learning_rate": 2.849892200402658e-05, "loss": 32.6562, "step": 4033 }, { "epoch": 0.6473302041962531, "grad_norm": 12.142985343933105, "learning_rate": 2.8475794112215375e-05, "loss": 32.6016, "step": 4034 }, { "epoch": 0.6474906727644723, "grad_norm": 12.209076881408691, "learning_rate": 2.845267187191082e-05, "loss": 32.6016, "step": 4035 }, { "epoch": 0.6476511413326914, "grad_norm": 12.134262084960938, "learning_rate": 2.8429555289183984e-05, "loss": 32.5547, "step": 4036 }, { "epoch": 0.6478116099009107, "grad_norm": 12.662829399108887, "learning_rate": 2.840644437010448e-05, "loss": 32.6172, "step": 4037 }, { "epoch": 0.6479720784691299, "grad_norm": 12.263533592224121, "learning_rate": 2.8383339120740426e-05, "loss": 32.5859, "step": 4038 }, { "epoch": 0.648132547037349, "grad_norm": 12.006806373596191, "learning_rate": 2.836023954715847e-05, "loss": 32.6641, "step": 4039 }, { "epoch": 0.6482930156055683, "grad_norm": 12.582355499267578, "learning_rate": 2.8337145655423737e-05, "loss": 32.625, "step": 4040 }, { "epoch": 0.6484534841737875, "grad_norm": 12.02590274810791, "learning_rate": 2.8314057451599867e-05, "loss": 32.7344, "step": 4041 }, { "epoch": 0.6486139527420066, "grad_norm": 12.578415870666504, "learning_rate": 2.829097494174906e-05, "loss": 32.5312, "step": 4042 }, { "epoch": 0.6487744213102259, "grad_norm": 12.386530876159668, "learning_rate": 2.8267898131931953e-05, "loss": 32.6094, "step": 4043 }, { "epoch": 0.648934889878445, "grad_norm": 12.387877464294434, "learning_rate": 2.8244827028207734e-05, "loss": 32.6094, "step": 4044 }, { "epoch": 0.6490953584466642, "grad_norm": 12.449673652648926, "learning_rate": 2.8221761636634037e-05, "loss": 32.6328, "step": 4045 }, { "epoch": 0.6492558270148835, "grad_norm": 12.323515892028809, "learning_rate": 2.8198701963267044e-05, "loss": 32.6641, "step": 4046 }, { "epoch": 0.6494162955831027, "grad_norm": 12.015003204345703, "learning_rate": 2.8175648014161405e-05, "loss": 32.7344, "step": 4047 }, { "epoch": 0.6495767641513218, "grad_norm": 12.427068710327148, "learning_rate": 2.8152599795370317e-05, "loss": 32.7109, "step": 4048 }, { "epoch": 0.6497372327195411, "grad_norm": 12.281229972839355, "learning_rate": 2.812955731294543e-05, "loss": 32.6836, "step": 4049 }, { "epoch": 0.6498977012877603, "grad_norm": 12.059517860412598, "learning_rate": 2.810652057293689e-05, "loss": 33.0547, "step": 4050 }, { "epoch": 0.6500581698559794, "grad_norm": 12.487459182739258, "learning_rate": 2.8083489581393345e-05, "loss": 33.5, "step": 4051 }, { "epoch": 0.6502186384241987, "grad_norm": 12.093109130859375, "learning_rate": 2.8060464344361938e-05, "loss": 32.8047, "step": 4052 }, { "epoch": 0.6503791069924179, "grad_norm": 12.09921932220459, "learning_rate": 2.8037444867888275e-05, "loss": 32.7891, "step": 4053 }, { "epoch": 0.650539575560637, "grad_norm": 12.214859962463379, "learning_rate": 2.8014431158016485e-05, "loss": 32.7031, "step": 4054 }, { "epoch": 0.6507000441288563, "grad_norm": 12.490270614624023, "learning_rate": 2.7991423220789137e-05, "loss": 32.7422, "step": 4055 }, { "epoch": 0.6508605126970755, "grad_norm": 12.157557487487793, "learning_rate": 2.7968421062247386e-05, "loss": 32.6953, "step": 4056 }, { "epoch": 0.6510209812652946, "grad_norm": 12.537428855895996, "learning_rate": 2.7945424688430728e-05, "loss": 32.6875, "step": 4057 }, { "epoch": 0.6511814498335139, "grad_norm": 12.008828163146973, "learning_rate": 2.7922434105377233e-05, "loss": 32.6484, "step": 4058 }, { "epoch": 0.651341918401733, "grad_norm": 12.198979377746582, "learning_rate": 2.789944931912342e-05, "loss": 32.8125, "step": 4059 }, { "epoch": 0.6515023869699522, "grad_norm": 12.27562427520752, "learning_rate": 2.7876470335704308e-05, "loss": 32.7891, "step": 4060 }, { "epoch": 0.6516628555381715, "grad_norm": 12.075055122375488, "learning_rate": 2.785349716115334e-05, "loss": 32.7422, "step": 4061 }, { "epoch": 0.6518233241063907, "grad_norm": 12.087946891784668, "learning_rate": 2.7830529801502524e-05, "loss": 32.7031, "step": 4062 }, { "epoch": 0.6519837926746098, "grad_norm": 12.078763961791992, "learning_rate": 2.780756826278226e-05, "loss": 32.7266, "step": 4063 }, { "epoch": 0.6521442612428291, "grad_norm": 12.272186279296875, "learning_rate": 2.778461255102146e-05, "loss": 32.6406, "step": 4064 }, { "epoch": 0.6523047298110483, "grad_norm": 12.142549514770508, "learning_rate": 2.776166267224748e-05, "loss": 32.6875, "step": 4065 }, { "epoch": 0.6524651983792674, "grad_norm": 12.198375701904297, "learning_rate": 2.773871863248617e-05, "loss": 32.6953, "step": 4066 }, { "epoch": 0.6526256669474867, "grad_norm": 12.134257316589355, "learning_rate": 2.7715780437761828e-05, "loss": 32.6797, "step": 4067 }, { "epoch": 0.6527861355157059, "grad_norm": 12.2067289352417, "learning_rate": 2.769284809409723e-05, "loss": 32.6328, "step": 4068 }, { "epoch": 0.652946604083925, "grad_norm": 11.943167686462402, "learning_rate": 2.7669921607513604e-05, "loss": 32.6562, "step": 4069 }, { "epoch": 0.6531070726521443, "grad_norm": 12.130722999572754, "learning_rate": 2.7647000984030658e-05, "loss": 32.6875, "step": 4070 }, { "epoch": 0.6532675412203635, "grad_norm": 12.262761116027832, "learning_rate": 2.7624086229666536e-05, "loss": 32.625, "step": 4071 }, { "epoch": 0.6534280097885826, "grad_norm": 12.496589660644531, "learning_rate": 2.7601177350437862e-05, "loss": 32.5781, "step": 4072 }, { "epoch": 0.6535884783568019, "grad_norm": 12.271647453308105, "learning_rate": 2.757827435235971e-05, "loss": 32.7266, "step": 4073 }, { "epoch": 0.6537489469250211, "grad_norm": 12.072928428649902, "learning_rate": 2.75553772414456e-05, "loss": 32.6094, "step": 4074 }, { "epoch": 0.6539094154932402, "grad_norm": 11.945816040039062, "learning_rate": 2.7532486023707517e-05, "loss": 32.6406, "step": 4075 }, { "epoch": 0.6540698840614595, "grad_norm": 12.745147705078125, "learning_rate": 2.7509600705155913e-05, "loss": 32.5859, "step": 4076 }, { "epoch": 0.6542303526296787, "grad_norm": 12.458059310913086, "learning_rate": 2.7486721291799665e-05, "loss": 32.5859, "step": 4077 }, { "epoch": 0.6543908211978978, "grad_norm": 12.135242462158203, "learning_rate": 2.7463847789646118e-05, "loss": 32.5781, "step": 4078 }, { "epoch": 0.6545512897661171, "grad_norm": 12.013628959655762, "learning_rate": 2.744098020470105e-05, "loss": 32.6719, "step": 4079 }, { "epoch": 0.6547117583343363, "grad_norm": 12.075305938720703, "learning_rate": 2.7418118542968706e-05, "loss": 32.6133, "step": 4080 }, { "epoch": 0.6548722269025554, "grad_norm": 12.210162162780762, "learning_rate": 2.739526281045171e-05, "loss": 32.6016, "step": 4081 }, { "epoch": 0.6550326954707747, "grad_norm": 12.194884300231934, "learning_rate": 2.7372413013151238e-05, "loss": 32.5703, "step": 4082 }, { "epoch": 0.6551931640389939, "grad_norm": 12.139066696166992, "learning_rate": 2.7349569157066836e-05, "loss": 32.6016, "step": 4083 }, { "epoch": 0.655353632607213, "grad_norm": 12.520946502685547, "learning_rate": 2.732673124819649e-05, "loss": 32.6797, "step": 4084 }, { "epoch": 0.6555141011754323, "grad_norm": 12.015254020690918, "learning_rate": 2.730389929253666e-05, "loss": 32.6953, "step": 4085 }, { "epoch": 0.6556745697436515, "grad_norm": 12.400850296020508, "learning_rate": 2.7281073296082205e-05, "loss": 32.6328, "step": 4086 }, { "epoch": 0.6558350383118706, "grad_norm": 12.079707145690918, "learning_rate": 2.725825326482645e-05, "loss": 32.7109, "step": 4087 }, { "epoch": 0.6559955068800899, "grad_norm": 12.460386276245117, "learning_rate": 2.7235439204761104e-05, "loss": 32.5703, "step": 4088 }, { "epoch": 0.6561559754483091, "grad_norm": 12.133429527282715, "learning_rate": 2.7212631121876402e-05, "loss": 32.6719, "step": 4089 }, { "epoch": 0.6563164440165282, "grad_norm": 12.328436851501465, "learning_rate": 2.7189829022160924e-05, "loss": 32.6797, "step": 4090 }, { "epoch": 0.6564769125847475, "grad_norm": 12.196781158447266, "learning_rate": 2.7167032911601703e-05, "loss": 32.625, "step": 4091 }, { "epoch": 0.6566373811529667, "grad_norm": 12.67672061920166, "learning_rate": 2.7144242796184226e-05, "loss": 32.625, "step": 4092 }, { "epoch": 0.6567978497211858, "grad_norm": 13.044561386108398, "learning_rate": 2.7121458681892354e-05, "loss": 32.5781, "step": 4093 }, { "epoch": 0.6569583182894051, "grad_norm": 12.067301750183105, "learning_rate": 2.709868057470838e-05, "loss": 32.6016, "step": 4094 }, { "epoch": 0.6571187868576243, "grad_norm": 12.198732376098633, "learning_rate": 2.70759084806131e-05, "loss": 32.6094, "step": 4095 }, { "epoch": 0.6572792554258434, "grad_norm": 12.010433197021484, "learning_rate": 2.7053142405585652e-05, "loss": 32.7188, "step": 4096 }, { "epoch": 0.6574397239940627, "grad_norm": 12.196890830993652, "learning_rate": 2.70303823556036e-05, "loss": 32.625, "step": 4097 }, { "epoch": 0.6576001925622819, "grad_norm": 12.146248817443848, "learning_rate": 2.7007628336642954e-05, "loss": 32.75, "step": 4098 }, { "epoch": 0.657760661130501, "grad_norm": 12.258325576782227, "learning_rate": 2.698488035467812e-05, "loss": 32.6641, "step": 4099 }, { "epoch": 0.6579211296987203, "grad_norm": 12.380645751953125, "learning_rate": 2.6962138415681925e-05, "loss": 32.8281, "step": 4100 }, { "epoch": 0.6580815982669395, "grad_norm": 12.055535316467285, "learning_rate": 2.693940252562561e-05, "loss": 33.0156, "step": 4101 }, { "epoch": 0.6582420668351586, "grad_norm": 12.225872039794922, "learning_rate": 2.691667269047882e-05, "loss": 32.7734, "step": 4102 }, { "epoch": 0.6584025354033779, "grad_norm": 12.014854431152344, "learning_rate": 2.6893948916209662e-05, "loss": 32.7617, "step": 4103 }, { "epoch": 0.6585630039715971, "grad_norm": 12.614897727966309, "learning_rate": 2.6871231208784554e-05, "loss": 32.6953, "step": 4104 }, { "epoch": 0.6587234725398162, "grad_norm": 12.0923433303833, "learning_rate": 2.684851957416839e-05, "loss": 32.7734, "step": 4105 }, { "epoch": 0.6588839411080355, "grad_norm": 12.077861785888672, "learning_rate": 2.6825814018324458e-05, "loss": 32.7578, "step": 4106 }, { "epoch": 0.6590444096762547, "grad_norm": 12.302831649780273, "learning_rate": 2.6803114547214447e-05, "loss": 32.7109, "step": 4107 }, { "epoch": 0.6592048782444738, "grad_norm": 12.006303787231445, "learning_rate": 2.6780421166798432e-05, "loss": 32.6719, "step": 4108 }, { "epoch": 0.6593653468126931, "grad_norm": 12.140019416809082, "learning_rate": 2.675773388303493e-05, "loss": 32.6562, "step": 4109 }, { "epoch": 0.6595258153809123, "grad_norm": 12.745742797851562, "learning_rate": 2.6735052701880824e-05, "loss": 32.6875, "step": 4110 }, { "epoch": 0.6596862839491314, "grad_norm": 12.281559944152832, "learning_rate": 2.6712377629291397e-05, "loss": 32.7188, "step": 4111 }, { "epoch": 0.6598467525173507, "grad_norm": 11.897666931152344, "learning_rate": 2.6689708671220337e-05, "loss": 32.8359, "step": 4112 }, { "epoch": 0.6600072210855699, "grad_norm": 12.145583152770996, "learning_rate": 2.6667045833619713e-05, "loss": 32.7734, "step": 4113 }, { "epoch": 0.660167689653789, "grad_norm": 12.017313957214355, "learning_rate": 2.6644389122440007e-05, "loss": 32.7031, "step": 4114 }, { "epoch": 0.6603281582220083, "grad_norm": 12.19927978515625, "learning_rate": 2.6621738543630076e-05, "loss": 32.6328, "step": 4115 }, { "epoch": 0.6604886267902275, "grad_norm": 12.198382377624512, "learning_rate": 2.659909410313718e-05, "loss": 32.6094, "step": 4116 }, { "epoch": 0.6606490953584466, "grad_norm": 12.199853897094727, "learning_rate": 2.6576455806906946e-05, "loss": 32.6406, "step": 4117 }, { "epoch": 0.6608095639266659, "grad_norm": 12.022005081176758, "learning_rate": 2.6553823660883416e-05, "loss": 32.7188, "step": 4118 }, { "epoch": 0.6609700324948851, "grad_norm": 12.023829460144043, "learning_rate": 2.6531197671009e-05, "loss": 32.6875, "step": 4119 }, { "epoch": 0.6611305010631042, "grad_norm": 12.009711265563965, "learning_rate": 2.650857784322449e-05, "loss": 32.6172, "step": 4120 }, { "epoch": 0.6612909696313235, "grad_norm": 12.46228313446045, "learning_rate": 2.648596418346907e-05, "loss": 32.6172, "step": 4121 }, { "epoch": 0.6614514381995427, "grad_norm": 12.015971183776855, "learning_rate": 2.6463356697680274e-05, "loss": 32.6328, "step": 4122 }, { "epoch": 0.6616119067677618, "grad_norm": 12.398712158203125, "learning_rate": 2.6440755391794093e-05, "loss": 32.6406, "step": 4123 }, { "epoch": 0.6617723753359811, "grad_norm": 12.389585494995117, "learning_rate": 2.6418160271744803e-05, "loss": 32.5938, "step": 4124 }, { "epoch": 0.6619328439042003, "grad_norm": 12.08614730834961, "learning_rate": 2.639557134346511e-05, "loss": 32.7031, "step": 4125 }, { "epoch": 0.6620933124724194, "grad_norm": 12.136610984802246, "learning_rate": 2.6372988612886075e-05, "loss": 32.6172, "step": 4126 }, { "epoch": 0.6622537810406387, "grad_norm": 12.26846981048584, "learning_rate": 2.6350412085937148e-05, "loss": 32.625, "step": 4127 }, { "epoch": 0.6624142496088579, "grad_norm": 12.263998031616211, "learning_rate": 2.632784176854609e-05, "loss": 32.625, "step": 4128 }, { "epoch": 0.662574718177077, "grad_norm": 12.1339693069458, "learning_rate": 2.630527766663914e-05, "loss": 32.6484, "step": 4129 }, { "epoch": 0.6627351867452963, "grad_norm": 11.948101997375488, "learning_rate": 2.628271978614082e-05, "loss": 32.6875, "step": 4130 }, { "epoch": 0.6628956553135155, "grad_norm": 12.331572532653809, "learning_rate": 2.626016813297405e-05, "loss": 32.6875, "step": 4131 }, { "epoch": 0.6630561238817346, "grad_norm": 12.198923110961914, "learning_rate": 2.6237622713060102e-05, "loss": 32.5781, "step": 4132 }, { "epoch": 0.6632165924499539, "grad_norm": 12.070260047912598, "learning_rate": 2.6215083532318617e-05, "loss": 32.5938, "step": 4133 }, { "epoch": 0.6633770610181731, "grad_norm": 12.077723503112793, "learning_rate": 2.6192550596667596e-05, "loss": 32.7578, "step": 4134 }, { "epoch": 0.6635375295863922, "grad_norm": 12.259897232055664, "learning_rate": 2.61700239120234e-05, "loss": 32.5625, "step": 4135 }, { "epoch": 0.6636979981546115, "grad_norm": 12.529989242553711, "learning_rate": 2.6147503484300772e-05, "loss": 32.7031, "step": 4136 }, { "epoch": 0.6638584667228307, "grad_norm": 12.265751838684082, "learning_rate": 2.6124989319412785e-05, "loss": 32.6172, "step": 4137 }, { "epoch": 0.6640189352910498, "grad_norm": 12.40380573272705, "learning_rate": 2.6102481423270862e-05, "loss": 32.6016, "step": 4138 }, { "epoch": 0.6641794038592691, "grad_norm": 12.070868492126465, "learning_rate": 2.6079979801784827e-05, "loss": 32.6797, "step": 4139 }, { "epoch": 0.6643398724274883, "grad_norm": 12.151668548583984, "learning_rate": 2.6057484460862768e-05, "loss": 32.7109, "step": 4140 }, { "epoch": 0.6645003409957074, "grad_norm": 12.019810676574707, "learning_rate": 2.603499540641119e-05, "loss": 32.6875, "step": 4141 }, { "epoch": 0.6646608095639267, "grad_norm": 12.068881034851074, "learning_rate": 2.6012512644334963e-05, "loss": 32.6641, "step": 4142 }, { "epoch": 0.6648212781321459, "grad_norm": 12.465666770935059, "learning_rate": 2.599003618053727e-05, "loss": 32.6094, "step": 4143 }, { "epoch": 0.664981746700365, "grad_norm": 12.13809585571289, "learning_rate": 2.5967566020919636e-05, "loss": 32.7031, "step": 4144 }, { "epoch": 0.6651422152685843, "grad_norm": 12.140106201171875, "learning_rate": 2.594510217138195e-05, "loss": 32.7031, "step": 4145 }, { "epoch": 0.6653026838368035, "grad_norm": 12.35220718383789, "learning_rate": 2.5922644637822435e-05, "loss": 32.6562, "step": 4146 }, { "epoch": 0.6654631524050226, "grad_norm": 12.139582633972168, "learning_rate": 2.590019342613765e-05, "loss": 32.6953, "step": 4147 }, { "epoch": 0.6656236209732419, "grad_norm": 12.0170316696167, "learning_rate": 2.5877748542222513e-05, "loss": 32.8516, "step": 4148 }, { "epoch": 0.6657840895414611, "grad_norm": 12.144831657409668, "learning_rate": 2.585530999197024e-05, "loss": 32.7891, "step": 4149 }, { "epoch": 0.6659445581096802, "grad_norm": 12.206899642944336, "learning_rate": 2.583287778127247e-05, "loss": 32.9922, "step": 4150 }, { "epoch": 0.6661050266778995, "grad_norm": 12.089517593383789, "learning_rate": 2.5810451916019067e-05, "loss": 33.0469, "step": 4151 }, { "epoch": 0.6662654952461187, "grad_norm": 12.558218002319336, "learning_rate": 2.5788032402098306e-05, "loss": 32.8516, "step": 4152 }, { "epoch": 0.6664259638143378, "grad_norm": 12.486832618713379, "learning_rate": 2.5765619245396756e-05, "loss": 32.9297, "step": 4153 }, { "epoch": 0.6665864323825571, "grad_norm": 12.523715019226074, "learning_rate": 2.5743212451799338e-05, "loss": 32.7969, "step": 4154 }, { "epoch": 0.6667469009507763, "grad_norm": 12.090957641601562, "learning_rate": 2.5720812027189282e-05, "loss": 32.7969, "step": 4155 }, { "epoch": 0.6669073695189954, "grad_norm": 12.296150207519531, "learning_rate": 2.5698417977448195e-05, "loss": 32.8281, "step": 4156 }, { "epoch": 0.6670678380872147, "grad_norm": 12.20587158203125, "learning_rate": 2.5676030308455947e-05, "loss": 32.6484, "step": 4157 }, { "epoch": 0.6672283066554339, "grad_norm": 12.074535369873047, "learning_rate": 2.565364902609077e-05, "loss": 32.7852, "step": 4158 }, { "epoch": 0.667388775223653, "grad_norm": 12.142082214355469, "learning_rate": 2.56312741362292e-05, "loss": 32.7031, "step": 4159 }, { "epoch": 0.6675492437918723, "grad_norm": 12.07654094696045, "learning_rate": 2.5608905644746116e-05, "loss": 32.6328, "step": 4160 }, { "epoch": 0.6677097123600915, "grad_norm": 12.205073356628418, "learning_rate": 2.5586543557514698e-05, "loss": 32.6875, "step": 4161 }, { "epoch": 0.6678701809283106, "grad_norm": 12.406306266784668, "learning_rate": 2.556418788040644e-05, "loss": 32.6484, "step": 4162 }, { "epoch": 0.6680306494965299, "grad_norm": 12.137660026550293, "learning_rate": 2.5541838619291182e-05, "loss": 32.75, "step": 4163 }, { "epoch": 0.6681911180647491, "grad_norm": 12.081547737121582, "learning_rate": 2.551949578003705e-05, "loss": 32.75, "step": 4164 }, { "epoch": 0.6683515866329682, "grad_norm": 12.673971176147461, "learning_rate": 2.5497159368510505e-05, "loss": 32.7109, "step": 4165 }, { "epoch": 0.6685120552011875, "grad_norm": 12.270833969116211, "learning_rate": 2.54748293905763e-05, "loss": 32.6484, "step": 4166 }, { "epoch": 0.6686725237694067, "grad_norm": 12.027782440185547, "learning_rate": 2.5452505852097514e-05, "loss": 32.7109, "step": 4167 }, { "epoch": 0.6688329923376258, "grad_norm": 12.073129653930664, "learning_rate": 2.5430188758935523e-05, "loss": 32.6406, "step": 4168 }, { "epoch": 0.6689934609058451, "grad_norm": 12.136759757995605, "learning_rate": 2.540787811695002e-05, "loss": 32.625, "step": 4169 }, { "epoch": 0.6691539294740643, "grad_norm": 11.882854461669922, "learning_rate": 2.5385573931999018e-05, "loss": 32.7344, "step": 4170 }, { "epoch": 0.6693143980422834, "grad_norm": 12.269510269165039, "learning_rate": 2.5363276209938813e-05, "loss": 32.625, "step": 4171 }, { "epoch": 0.6694748666105027, "grad_norm": 12.396723747253418, "learning_rate": 2.5340984956624004e-05, "loss": 32.6719, "step": 4172 }, { "epoch": 0.6696353351787219, "grad_norm": 11.95376205444336, "learning_rate": 2.5318700177907502e-05, "loss": 32.6719, "step": 4173 }, { "epoch": 0.669795803746941, "grad_norm": 12.207656860351562, "learning_rate": 2.529642187964053e-05, "loss": 32.6719, "step": 4174 }, { "epoch": 0.6699562723151603, "grad_norm": 12.264245986938477, "learning_rate": 2.527415006767253e-05, "loss": 32.5938, "step": 4175 }, { "epoch": 0.6701167408833795, "grad_norm": 12.139596939086914, "learning_rate": 2.5251884747851373e-05, "loss": 32.6641, "step": 4176 }, { "epoch": 0.6702772094515986, "grad_norm": 12.208203315734863, "learning_rate": 2.5229625926023125e-05, "loss": 32.7109, "step": 4177 }, { "epoch": 0.6704376780198179, "grad_norm": 12.011556625366211, "learning_rate": 2.520737360803219e-05, "loss": 32.6953, "step": 4178 }, { "epoch": 0.6705981465880371, "grad_norm": 12.387537002563477, "learning_rate": 2.5185127799721242e-05, "loss": 32.6094, "step": 4179 }, { "epoch": 0.6707586151562562, "grad_norm": 12.335148811340332, "learning_rate": 2.5162888506931258e-05, "loss": 32.6094, "step": 4180 }, { "epoch": 0.6709190837244755, "grad_norm": 12.202770233154297, "learning_rate": 2.5140655735501502e-05, "loss": 32.5703, "step": 4181 }, { "epoch": 0.6710795522926947, "grad_norm": 12.198208808898926, "learning_rate": 2.5118429491269524e-05, "loss": 32.75, "step": 4182 }, { "epoch": 0.6712400208609138, "grad_norm": 12.204449653625488, "learning_rate": 2.5096209780071144e-05, "loss": 32.6172, "step": 4183 }, { "epoch": 0.6714004894291331, "grad_norm": 23.12446403503418, "learning_rate": 2.5073996607740525e-05, "loss": 33.2109, "step": 4184 }, { "epoch": 0.6715609579973523, "grad_norm": 12.141374588012695, "learning_rate": 2.5051789980110042e-05, "loss": 32.6875, "step": 4185 }, { "epoch": 0.6717214265655714, "grad_norm": 12.136737823486328, "learning_rate": 2.5029589903010415e-05, "loss": 32.6094, "step": 4186 }, { "epoch": 0.6718818951337907, "grad_norm": 12.082152366638184, "learning_rate": 2.5007396382270566e-05, "loss": 32.6406, "step": 4187 }, { "epoch": 0.6720423637020099, "grad_norm": 12.144502639770508, "learning_rate": 2.4985209423717753e-05, "loss": 32.6328, "step": 4188 }, { "epoch": 0.672202832270229, "grad_norm": 12.533626556396484, "learning_rate": 2.4963029033177492e-05, "loss": 32.5625, "step": 4189 }, { "epoch": 0.6723633008384483, "grad_norm": 12.329277038574219, "learning_rate": 2.4940855216473612e-05, "loss": 32.6016, "step": 4190 }, { "epoch": 0.6725237694066675, "grad_norm": 12.389668464660645, "learning_rate": 2.4918687979428168e-05, "loss": 32.7344, "step": 4191 }, { "epoch": 0.6726842379748866, "grad_norm": 12.027177810668945, "learning_rate": 2.4896527327861502e-05, "loss": 32.7422, "step": 4192 }, { "epoch": 0.6728447065431059, "grad_norm": 12.14041519165039, "learning_rate": 2.4874373267592233e-05, "loss": 32.6484, "step": 4193 }, { "epoch": 0.6730051751113251, "grad_norm": 12.395249366760254, "learning_rate": 2.485222580443724e-05, "loss": 32.6562, "step": 4194 }, { "epoch": 0.6731656436795442, "grad_norm": 12.130033493041992, "learning_rate": 2.4830084944211673e-05, "loss": 32.6094, "step": 4195 }, { "epoch": 0.6733261122477635, "grad_norm": 12.198225021362305, "learning_rate": 2.4807950692728942e-05, "loss": 32.7109, "step": 4196 }, { "epoch": 0.6734865808159827, "grad_norm": 12.266044616699219, "learning_rate": 2.478582305580076e-05, "loss": 32.7422, "step": 4197 }, { "epoch": 0.6736470493842018, "grad_norm": 12.456989288330078, "learning_rate": 2.476370203923708e-05, "loss": 32.6562, "step": 4198 }, { "epoch": 0.6738075179524211, "grad_norm": 12.33561897277832, "learning_rate": 2.474158764884607e-05, "loss": 32.6719, "step": 4199 }, { "epoch": 0.6739679865206403, "grad_norm": 12.251907348632812, "learning_rate": 2.471947989043421e-05, "loss": 32.8672, "step": 4200 }, { "epoch": 0.6741284550888594, "grad_norm": 12.098337173461914, "learning_rate": 2.469737876980624e-05, "loss": 32.9531, "step": 4201 }, { "epoch": 0.6742889236570787, "grad_norm": 12.2852783203125, "learning_rate": 2.4675284292765117e-05, "loss": 32.8203, "step": 4202 }, { "epoch": 0.6744493922252979, "grad_norm": 12.30484676361084, "learning_rate": 2.4653196465112116e-05, "loss": 32.8594, "step": 4203 }, { "epoch": 0.674609860793517, "grad_norm": 12.210169792175293, "learning_rate": 2.463111529264672e-05, "loss": 32.6797, "step": 4204 }, { "epoch": 0.6747703293617363, "grad_norm": 12.348319053649902, "learning_rate": 2.460904078116667e-05, "loss": 32.7969, "step": 4205 }, { "epoch": 0.6749307979299555, "grad_norm": 12.143460273742676, "learning_rate": 2.458697293646797e-05, "loss": 32.6875, "step": 4206 }, { "epoch": 0.6750912664981746, "grad_norm": 12.155064582824707, "learning_rate": 2.4564911764344862e-05, "loss": 32.7656, "step": 4207 }, { "epoch": 0.6752517350663939, "grad_norm": 12.149377822875977, "learning_rate": 2.4542857270589843e-05, "loss": 32.7969, "step": 4208 }, { "epoch": 0.6754122036346131, "grad_norm": 12.215167045593262, "learning_rate": 2.4520809460993655e-05, "loss": 32.7734, "step": 4209 }, { "epoch": 0.6755726722028322, "grad_norm": 12.32113265991211, "learning_rate": 2.4498768341345278e-05, "loss": 32.5781, "step": 4210 }, { "epoch": 0.6757331407710515, "grad_norm": 12.269556999206543, "learning_rate": 2.4476733917431955e-05, "loss": 32.6641, "step": 4211 }, { "epoch": 0.6758936093392707, "grad_norm": 12.146018981933594, "learning_rate": 2.445470619503915e-05, "loss": 32.75, "step": 4212 }, { "epoch": 0.6760540779074898, "grad_norm": 12.210101127624512, "learning_rate": 2.4432685179950577e-05, "loss": 32.8203, "step": 4213 }, { "epoch": 0.6762145464757091, "grad_norm": 12.138687133789062, "learning_rate": 2.4410670877948184e-05, "loss": 32.6875, "step": 4214 }, { "epoch": 0.6763750150439283, "grad_norm": 12.07640266418457, "learning_rate": 2.4388663294812163e-05, "loss": 32.7188, "step": 4215 }, { "epoch": 0.6765354836121474, "grad_norm": 12.005656242370605, "learning_rate": 2.4366662436320926e-05, "loss": 32.6406, "step": 4216 }, { "epoch": 0.6766959521803667, "grad_norm": 12.141788482666016, "learning_rate": 2.4344668308251156e-05, "loss": 32.6641, "step": 4217 }, { "epoch": 0.6768564207485859, "grad_norm": 12.15169620513916, "learning_rate": 2.4322680916377738e-05, "loss": 32.7031, "step": 4218 }, { "epoch": 0.677016889316805, "grad_norm": 12.139430046081543, "learning_rate": 2.4300700266473793e-05, "loss": 32.6953, "step": 4219 }, { "epoch": 0.6771773578850243, "grad_norm": 12.144211769104004, "learning_rate": 2.4278726364310666e-05, "loss": 32.8047, "step": 4220 }, { "epoch": 0.6773378264532435, "grad_norm": 12.076138496398926, "learning_rate": 2.425675921565797e-05, "loss": 32.6289, "step": 4221 }, { "epoch": 0.6774982950214626, "grad_norm": 12.004192352294922, "learning_rate": 2.4234798826283444e-05, "loss": 32.6641, "step": 4222 }, { "epoch": 0.6776587635896819, "grad_norm": 12.614598274230957, "learning_rate": 2.4212845201953183e-05, "loss": 32.5391, "step": 4223 }, { "epoch": 0.6778192321579011, "grad_norm": 12.007430076599121, "learning_rate": 2.4190898348431425e-05, "loss": 32.6328, "step": 4224 }, { "epoch": 0.6779797007261202, "grad_norm": 12.283247947692871, "learning_rate": 2.4168958271480646e-05, "loss": 32.75, "step": 4225 }, { "epoch": 0.6781401692943395, "grad_norm": 12.009638786315918, "learning_rate": 2.4147024976861553e-05, "loss": 32.5938, "step": 4226 }, { "epoch": 0.6783006378625587, "grad_norm": 12.075713157653809, "learning_rate": 2.4125098470333064e-05, "loss": 32.6953, "step": 4227 }, { "epoch": 0.6784611064307778, "grad_norm": 12.017111778259277, "learning_rate": 2.4103178757652306e-05, "loss": 32.6797, "step": 4228 }, { "epoch": 0.6786215749989971, "grad_norm": 12.14151382446289, "learning_rate": 2.4081265844574648e-05, "loss": 32.5859, "step": 4229 }, { "epoch": 0.6787820435672163, "grad_norm": 12.0708589553833, "learning_rate": 2.4059359736853622e-05, "loss": 32.7188, "step": 4230 }, { "epoch": 0.6789425121354354, "grad_norm": 12.40091609954834, "learning_rate": 2.4037460440241057e-05, "loss": 32.6016, "step": 4231 }, { "epoch": 0.6791029807036547, "grad_norm": 12.258136749267578, "learning_rate": 2.401556796048692e-05, "loss": 32.5938, "step": 4232 }, { "epoch": 0.6792634492718739, "grad_norm": 12.327390670776367, "learning_rate": 2.3993682303339437e-05, "loss": 32.625, "step": 4233 }, { "epoch": 0.679423917840093, "grad_norm": 11.880888938903809, "learning_rate": 2.397180347454498e-05, "loss": 32.7188, "step": 4234 }, { "epoch": 0.6795843864083123, "grad_norm": 12.007790565490723, "learning_rate": 2.3949931479848187e-05, "loss": 32.625, "step": 4235 }, { "epoch": 0.6797448549765315, "grad_norm": 12.594905853271484, "learning_rate": 2.3928066324991867e-05, "loss": 32.5547, "step": 4236 }, { "epoch": 0.6799053235447506, "grad_norm": 11.965787887573242, "learning_rate": 2.3906208015717073e-05, "loss": 32.8516, "step": 4237 }, { "epoch": 0.6800657921129699, "grad_norm": 12.198866844177246, "learning_rate": 2.388435655776303e-05, "loss": 32.6641, "step": 4238 }, { "epoch": 0.6802262606811891, "grad_norm": 12.7990083694458, "learning_rate": 2.386251195686716e-05, "loss": 32.6094, "step": 4239 }, { "epoch": 0.6803867292494082, "grad_norm": 12.007333755493164, "learning_rate": 2.384067421876511e-05, "loss": 32.6016, "step": 4240 }, { "epoch": 0.6805471978176275, "grad_norm": 12.136483192443848, "learning_rate": 2.3818843349190685e-05, "loss": 32.5859, "step": 4241 }, { "epoch": 0.6807076663858467, "grad_norm": 12.148444175720215, "learning_rate": 2.3797019353875926e-05, "loss": 32.6797, "step": 4242 }, { "epoch": 0.6808681349540658, "grad_norm": 12.071565628051758, "learning_rate": 2.3775202238551035e-05, "loss": 32.6016, "step": 4243 }, { "epoch": 0.6810286035222851, "grad_norm": 12.585593223571777, "learning_rate": 2.3753392008944465e-05, "loss": 32.6406, "step": 4244 }, { "epoch": 0.6811890720905043, "grad_norm": 12.267797470092773, "learning_rate": 2.3731588670782818e-05, "loss": 32.7031, "step": 4245 }, { "epoch": 0.6813495406587234, "grad_norm": 12.262954711914062, "learning_rate": 2.370979222979085e-05, "loss": 32.625, "step": 4246 }, { "epoch": 0.6815100092269427, "grad_norm": 12.466755867004395, "learning_rate": 2.3688002691691573e-05, "loss": 32.5781, "step": 4247 }, { "epoch": 0.6816704777951619, "grad_norm": 12.21820068359375, "learning_rate": 2.3666220062206163e-05, "loss": 32.7812, "step": 4248 }, { "epoch": 0.681830946363381, "grad_norm": 12.11717700958252, "learning_rate": 2.3644444347053953e-05, "loss": 32.8828, "step": 4249 }, { "epoch": 0.6819914149316003, "grad_norm": 12.339943885803223, "learning_rate": 2.362267555195252e-05, "loss": 32.7578, "step": 4250 }, { "epoch": 0.6821518834998195, "grad_norm": 41.86260986328125, "learning_rate": 2.3600913682617586e-05, "loss": 33.1836, "step": 4251 }, { "epoch": 0.6823123520680386, "grad_norm": 12.185928344726562, "learning_rate": 2.3579158744763058e-05, "loss": 32.8711, "step": 4252 }, { "epoch": 0.6824728206362579, "grad_norm": 12.09005355834961, "learning_rate": 2.3557410744101015e-05, "loss": 32.8594, "step": 4253 }, { "epoch": 0.6826332892044771, "grad_norm": 11.969498634338379, "learning_rate": 2.3535669686341738e-05, "loss": 32.8203, "step": 4254 }, { "epoch": 0.6827937577726962, "grad_norm": 12.021535873413086, "learning_rate": 2.351393557719366e-05, "loss": 32.7734, "step": 4255 }, { "epoch": 0.6829542263409155, "grad_norm": 12.415603637695312, "learning_rate": 2.3492208422363398e-05, "loss": 32.7656, "step": 4256 }, { "epoch": 0.6831146949091347, "grad_norm": 12.20348072052002, "learning_rate": 2.347048822755576e-05, "loss": 32.6797, "step": 4257 }, { "epoch": 0.6832751634773538, "grad_norm": 12.08539867401123, "learning_rate": 2.34487749984737e-05, "loss": 32.7344, "step": 4258 }, { "epoch": 0.6834356320455731, "grad_norm": 12.78592586517334, "learning_rate": 2.3427068740818364e-05, "loss": 32.6484, "step": 4259 }, { "epoch": 0.6835961006137923, "grad_norm": 12.025421142578125, "learning_rate": 2.3405369460289057e-05, "loss": 32.8672, "step": 4260 }, { "epoch": 0.6837565691820114, "grad_norm": 12.084014892578125, "learning_rate": 2.3383677162583246e-05, "loss": 32.6562, "step": 4261 }, { "epoch": 0.6839170377502307, "grad_norm": 12.143267631530762, "learning_rate": 2.3361991853396587e-05, "loss": 32.7031, "step": 4262 }, { "epoch": 0.6840775063184499, "grad_norm": 12.074917793273926, "learning_rate": 2.3340313538422852e-05, "loss": 32.6797, "step": 4263 }, { "epoch": 0.684237974886669, "grad_norm": 12.66495418548584, "learning_rate": 2.3318642223354066e-05, "loss": 32.7031, "step": 4264 }, { "epoch": 0.6843984434548883, "grad_norm": 12.13904094696045, "learning_rate": 2.329697791388033e-05, "loss": 32.6484, "step": 4265 }, { "epoch": 0.6845589120231075, "grad_norm": 12.136653900146484, "learning_rate": 2.327532061568994e-05, "loss": 32.6484, "step": 4266 }, { "epoch": 0.6847193805913266, "grad_norm": 12.53044605255127, "learning_rate": 2.3253670334469345e-05, "loss": 32.6328, "step": 4267 }, { "epoch": 0.6848798491595459, "grad_norm": 12.070975303649902, "learning_rate": 2.3232027075903186e-05, "loss": 32.6719, "step": 4268 }, { "epoch": 0.6850403177277651, "grad_norm": 12.008939743041992, "learning_rate": 2.3210390845674157e-05, "loss": 32.7578, "step": 4269 }, { "epoch": 0.6852007862959842, "grad_norm": 12.263137817382812, "learning_rate": 2.318876164946324e-05, "loss": 32.6094, "step": 4270 }, { "epoch": 0.6853612548642035, "grad_norm": 12.269872665405273, "learning_rate": 2.3167139492949496e-05, "loss": 32.7148, "step": 4271 }, { "epoch": 0.6855217234324227, "grad_norm": 12.008399963378906, "learning_rate": 2.3145524381810146e-05, "loss": 32.6406, "step": 4272 }, { "epoch": 0.6856821920006418, "grad_norm": 12.008207321166992, "learning_rate": 2.3123916321720563e-05, "loss": 32.6641, "step": 4273 }, { "epoch": 0.6858426605688611, "grad_norm": 12.01516056060791, "learning_rate": 2.310231531835427e-05, "loss": 32.7266, "step": 4274 }, { "epoch": 0.6860031291370803, "grad_norm": 12.206995010375977, "learning_rate": 2.3080721377382934e-05, "loss": 32.625, "step": 4275 }, { "epoch": 0.6861635977052994, "grad_norm": 12.083436012268066, "learning_rate": 2.3059134504476388e-05, "loss": 32.6328, "step": 4276 }, { "epoch": 0.6863240662735187, "grad_norm": 12.081819534301758, "learning_rate": 2.3037554705302555e-05, "loss": 32.7266, "step": 4277 }, { "epoch": 0.6864845348417379, "grad_norm": 12.137274742126465, "learning_rate": 2.301598198552759e-05, "loss": 32.6797, "step": 4278 }, { "epoch": 0.686645003409957, "grad_norm": 12.140634536743164, "learning_rate": 2.2994416350815712e-05, "loss": 32.6562, "step": 4279 }, { "epoch": 0.6868054719781763, "grad_norm": 12.387679100036621, "learning_rate": 2.297285780682933e-05, "loss": 32.6094, "step": 4280 }, { "epoch": 0.6869659405463955, "grad_norm": 12.070839881896973, "learning_rate": 2.295130635922892e-05, "loss": 32.6016, "step": 4281 }, { "epoch": 0.6871264091146146, "grad_norm": 12.454360961914062, "learning_rate": 2.2929762013673163e-05, "loss": 32.5938, "step": 4282 }, { "epoch": 0.6872868776828339, "grad_norm": 12.324591636657715, "learning_rate": 2.2908224775818836e-05, "loss": 32.5625, "step": 4283 }, { "epoch": 0.6874473462510531, "grad_norm": 12.397653579711914, "learning_rate": 2.2886694651320912e-05, "loss": 32.6172, "step": 4284 }, { "epoch": 0.6876078148192722, "grad_norm": 12.137603759765625, "learning_rate": 2.2865171645832417e-05, "loss": 32.6328, "step": 4285 }, { "epoch": 0.6877682833874915, "grad_norm": 11.88029956817627, "learning_rate": 2.2843655765004562e-05, "loss": 32.6719, "step": 4286 }, { "epoch": 0.6879287519557107, "grad_norm": 12.13762092590332, "learning_rate": 2.282214701448665e-05, "loss": 32.6641, "step": 4287 }, { "epoch": 0.6880892205239298, "grad_norm": 12.136213302612305, "learning_rate": 2.280064539992614e-05, "loss": 32.6328, "step": 4288 }, { "epoch": 0.6882496890921491, "grad_norm": 12.137385368347168, "learning_rate": 2.2779150926968605e-05, "loss": 32.7109, "step": 4289 }, { "epoch": 0.6884101576603683, "grad_norm": 12.786325454711914, "learning_rate": 2.2757663601257728e-05, "loss": 32.5703, "step": 4290 }, { "epoch": 0.6885706262285874, "grad_norm": 12.263197898864746, "learning_rate": 2.2736183428435364e-05, "loss": 32.5859, "step": 4291 }, { "epoch": 0.6887310947968067, "grad_norm": 12.533208847045898, "learning_rate": 2.271471041414146e-05, "loss": 32.5859, "step": 4292 }, { "epoch": 0.6888915633650259, "grad_norm": 12.25749397277832, "learning_rate": 2.2693244564014044e-05, "loss": 32.5938, "step": 4293 }, { "epoch": 0.689052031933245, "grad_norm": 12.321316719055176, "learning_rate": 2.2671785883689322e-05, "loss": 32.6406, "step": 4294 }, { "epoch": 0.6892125005014643, "grad_norm": 12.195843696594238, "learning_rate": 2.265033437880159e-05, "loss": 32.6172, "step": 4295 }, { "epoch": 0.6893729690696835, "grad_norm": 13.032407760620117, "learning_rate": 2.262889005498326e-05, "loss": 32.5703, "step": 4296 }, { "epoch": 0.6895334376379026, "grad_norm": 12.646841049194336, "learning_rate": 2.2607452917864887e-05, "loss": 32.668, "step": 4297 }, { "epoch": 0.6896939062061219, "grad_norm": 12.07453727722168, "learning_rate": 2.2586022973075106e-05, "loss": 32.6562, "step": 4298 }, { "epoch": 0.6898543747743411, "grad_norm": 12.33935832977295, "learning_rate": 2.2564600226240668e-05, "loss": 32.8438, "step": 4299 }, { "epoch": 0.6900148433425602, "grad_norm": 12.40762710571289, "learning_rate": 2.2543184682986444e-05, "loss": 32.8125, "step": 4300 }, { "epoch": 0.6901753119107795, "grad_norm": 12.121000289916992, "learning_rate": 2.2521776348935415e-05, "loss": 33.1719, "step": 4301 }, { "epoch": 0.6903357804789987, "grad_norm": 12.549849510192871, "learning_rate": 2.2500375229708654e-05, "loss": 32.8672, "step": 4302 }, { "epoch": 0.6904962490472178, "grad_norm": 12.48288631439209, "learning_rate": 2.247898133092535e-05, "loss": 32.8516, "step": 4303 }, { "epoch": 0.6906567176154371, "grad_norm": 12.077072143554688, "learning_rate": 2.2457594658202808e-05, "loss": 32.6953, "step": 4304 }, { "epoch": 0.6908171861836563, "grad_norm": 12.274537086486816, "learning_rate": 2.243621521715641e-05, "loss": 32.7031, "step": 4305 }, { "epoch": 0.6909776547518754, "grad_norm": 12.023672103881836, "learning_rate": 2.241484301339965e-05, "loss": 32.8281, "step": 4306 }, { "epoch": 0.6911381233200947, "grad_norm": 12.139181137084961, "learning_rate": 2.2393478052544135e-05, "loss": 32.7812, "step": 4307 }, { "epoch": 0.6912985918883139, "grad_norm": 12.268728256225586, "learning_rate": 2.237212034019956e-05, "loss": 32.7422, "step": 4308 }, { "epoch": 0.691459060456533, "grad_norm": 12.076737403869629, "learning_rate": 2.2350769881973705e-05, "loss": 32.7344, "step": 4309 }, { "epoch": 0.6916195290247523, "grad_norm": 12.267913818359375, "learning_rate": 2.2329426683472448e-05, "loss": 32.7188, "step": 4310 }, { "epoch": 0.6917799975929715, "grad_norm": 12.20002555847168, "learning_rate": 2.23080907502998e-05, "loss": 32.6641, "step": 4311 }, { "epoch": 0.6919404661611906, "grad_norm": 12.412720680236816, "learning_rate": 2.228676208805782e-05, "loss": 32.6875, "step": 4312 }, { "epoch": 0.6921009347294099, "grad_norm": 11.944411277770996, "learning_rate": 2.2265440702346667e-05, "loss": 32.6719, "step": 4313 }, { "epoch": 0.6922614032976291, "grad_norm": 12.010615348815918, "learning_rate": 2.2244126598764603e-05, "loss": 32.6406, "step": 4314 }, { "epoch": 0.6924218718658482, "grad_norm": 12.076505661010742, "learning_rate": 2.2222819782907954e-05, "loss": 32.7109, "step": 4315 }, { "epoch": 0.6925823404340675, "grad_norm": 12.017574310302734, "learning_rate": 2.220152026037116e-05, "loss": 32.6875, "step": 4316 }, { "epoch": 0.6927428090022867, "grad_norm": 12.26023006439209, "learning_rate": 2.2180228036746737e-05, "loss": 32.7422, "step": 4317 }, { "epoch": 0.6929032775705058, "grad_norm": 12.559688568115234, "learning_rate": 2.215894311762527e-05, "loss": 32.6914, "step": 4318 }, { "epoch": 0.6930637461387251, "grad_norm": 12.546003341674805, "learning_rate": 2.213766550859544e-05, "loss": 32.6016, "step": 4319 }, { "epoch": 0.6932242147069443, "grad_norm": 12.13880729675293, "learning_rate": 2.2116395215244008e-05, "loss": 32.6641, "step": 4320 }, { "epoch": 0.6933846832751634, "grad_norm": 12.138705253601074, "learning_rate": 2.209513224315581e-05, "loss": 32.6016, "step": 4321 }, { "epoch": 0.6935451518433827, "grad_norm": 12.070618629455566, "learning_rate": 2.207387659791376e-05, "loss": 32.6562, "step": 4322 }, { "epoch": 0.6937056204116019, "grad_norm": 12.275945663452148, "learning_rate": 2.205262828509886e-05, "loss": 32.6094, "step": 4323 }, { "epoch": 0.693866088979821, "grad_norm": 12.077530860900879, "learning_rate": 2.2031387310290146e-05, "loss": 32.6094, "step": 4324 }, { "epoch": 0.6940265575480403, "grad_norm": 12.020124435424805, "learning_rate": 2.2010153679064806e-05, "loss": 32.6328, "step": 4325 }, { "epoch": 0.6941870261162595, "grad_norm": 12.390021324157715, "learning_rate": 2.1988927396998023e-05, "loss": 32.6328, "step": 4326 }, { "epoch": 0.6943474946844786, "grad_norm": 12.135738372802734, "learning_rate": 2.1967708469663105e-05, "loss": 32.6172, "step": 4327 }, { "epoch": 0.6945079632526979, "grad_norm": 12.401812553405762, "learning_rate": 2.1946496902631363e-05, "loss": 32.6172, "step": 4328 }, { "epoch": 0.6946684318209171, "grad_norm": 12.348748207092285, "learning_rate": 2.1925292701472232e-05, "loss": 32.7109, "step": 4329 }, { "epoch": 0.6948289003891363, "grad_norm": 12.073836326599121, "learning_rate": 2.1904095871753183e-05, "loss": 32.6484, "step": 4330 }, { "epoch": 0.6949893689573555, "grad_norm": 12.137781143188477, "learning_rate": 2.18829064190398e-05, "loss": 32.5625, "step": 4331 }, { "epoch": 0.6951498375255747, "grad_norm": 12.196854591369629, "learning_rate": 2.186172434889568e-05, "loss": 32.6406, "step": 4332 }, { "epoch": 0.6953103060937939, "grad_norm": 12.26996898651123, "learning_rate": 2.1840549666882486e-05, "loss": 32.6172, "step": 4333 }, { "epoch": 0.6954707746620131, "grad_norm": 12.201902389526367, "learning_rate": 2.181938237855996e-05, "loss": 32.625, "step": 4334 }, { "epoch": 0.6956312432302323, "grad_norm": 12.537397384643555, "learning_rate": 2.1798222489485898e-05, "loss": 32.6094, "step": 4335 }, { "epoch": 0.6957917117984515, "grad_norm": 12.15080451965332, "learning_rate": 2.1777070005216137e-05, "loss": 32.7109, "step": 4336 }, { "epoch": 0.6959521803666707, "grad_norm": 12.197200775146484, "learning_rate": 2.17559249313046e-05, "loss": 32.5781, "step": 4337 }, { "epoch": 0.6961126489348899, "grad_norm": 12.259077072143555, "learning_rate": 2.1734787273303215e-05, "loss": 32.5469, "step": 4338 }, { "epoch": 0.696273117503109, "grad_norm": 12.265089988708496, "learning_rate": 2.1713657036762054e-05, "loss": 32.7109, "step": 4339 }, { "epoch": 0.6964335860713283, "grad_norm": 12.137735366821289, "learning_rate": 2.1692534227229123e-05, "loss": 32.6094, "step": 4340 }, { "epoch": 0.6965940546395475, "grad_norm": 12.195167541503906, "learning_rate": 2.1671418850250558e-05, "loss": 32.6094, "step": 4341 }, { "epoch": 0.6967545232077667, "grad_norm": 12.199703216552734, "learning_rate": 2.165031091137052e-05, "loss": 32.625, "step": 4342 }, { "epoch": 0.6969149917759859, "grad_norm": 12.391984939575195, "learning_rate": 2.1629210416131217e-05, "loss": 32.625, "step": 4343 }, { "epoch": 0.6970754603442051, "grad_norm": 12.131450653076172, "learning_rate": 2.1608117370072893e-05, "loss": 32.625, "step": 4344 }, { "epoch": 0.6972359289124243, "grad_norm": 12.466994285583496, "learning_rate": 2.1587031778733875e-05, "loss": 32.6406, "step": 4345 }, { "epoch": 0.6973963974806435, "grad_norm": 12.149807929992676, "learning_rate": 2.156595364765049e-05, "loss": 32.7266, "step": 4346 }, { "epoch": 0.6975568660488627, "grad_norm": 12.136420249938965, "learning_rate": 2.1544882982357118e-05, "loss": 32.7188, "step": 4347 }, { "epoch": 0.6977173346170819, "grad_norm": 12.261331558227539, "learning_rate": 2.1523819788386186e-05, "loss": 32.6328, "step": 4348 }, { "epoch": 0.6978778031853011, "grad_norm": 12.141218185424805, "learning_rate": 2.150276407126815e-05, "loss": 32.7422, "step": 4349 }, { "epoch": 0.6980382717535203, "grad_norm": 12.087318420410156, "learning_rate": 2.148171583653152e-05, "loss": 32.8125, "step": 4350 }, { "epoch": 0.6981987403217395, "grad_norm": 12.40267276763916, "learning_rate": 2.146067508970281e-05, "loss": 33.0234, "step": 4351 }, { "epoch": 0.6983592088899587, "grad_norm": 12.243218421936035, "learning_rate": 2.1439641836306602e-05, "loss": 32.9844, "step": 4352 }, { "epoch": 0.6985196774581779, "grad_norm": 12.223154067993164, "learning_rate": 2.141861608186549e-05, "loss": 32.7266, "step": 4353 }, { "epoch": 0.698680146026397, "grad_norm": 12.345086097717285, "learning_rate": 2.1397597831900102e-05, "loss": 32.7188, "step": 4354 }, { "epoch": 0.6988406145946163, "grad_norm": 12.283063888549805, "learning_rate": 2.13765870919291e-05, "loss": 32.7773, "step": 4355 }, { "epoch": 0.6990010831628355, "grad_norm": 12.204627990722656, "learning_rate": 2.1355583867469182e-05, "loss": 32.8203, "step": 4356 }, { "epoch": 0.6991615517310547, "grad_norm": 12.090182304382324, "learning_rate": 2.133458816403503e-05, "loss": 32.8203, "step": 4357 }, { "epoch": 0.6993220202992739, "grad_norm": 12.280292510986328, "learning_rate": 2.1313599987139433e-05, "loss": 32.7031, "step": 4358 }, { "epoch": 0.6994824888674931, "grad_norm": 12.338869094848633, "learning_rate": 2.129261934229313e-05, "loss": 32.7188, "step": 4359 }, { "epoch": 0.6996429574357123, "grad_norm": 12.021392822265625, "learning_rate": 2.1271646235004904e-05, "loss": 32.7578, "step": 4360 }, { "epoch": 0.6998034260039315, "grad_norm": 12.148527145385742, "learning_rate": 2.125068067078157e-05, "loss": 32.7578, "step": 4361 }, { "epoch": 0.6999638945721507, "grad_norm": 11.946821212768555, "learning_rate": 2.1229722655127954e-05, "loss": 32.6875, "step": 4362 }, { "epoch": 0.7001243631403699, "grad_norm": 12.354042053222656, "learning_rate": 2.1208772193546896e-05, "loss": 32.7734, "step": 4363 }, { "epoch": 0.7002848317085891, "grad_norm": 12.321860313415527, "learning_rate": 2.1187829291539264e-05, "loss": 32.625, "step": 4364 }, { "epoch": 0.7004453002768083, "grad_norm": 12.260095596313477, "learning_rate": 2.116689395460393e-05, "loss": 32.6719, "step": 4365 }, { "epoch": 0.7006057688450275, "grad_norm": 12.38679027557373, "learning_rate": 2.1145966188237776e-05, "loss": 32.8359, "step": 4366 }, { "epoch": 0.7007662374132467, "grad_norm": 12.395804405212402, "learning_rate": 2.112504599793572e-05, "loss": 32.6484, "step": 4367 }, { "epoch": 0.7009267059814659, "grad_norm": 12.143658638000488, "learning_rate": 2.1104133389190667e-05, "loss": 32.6797, "step": 4368 }, { "epoch": 0.701087174549685, "grad_norm": 12.32746696472168, "learning_rate": 2.1083228367493547e-05, "loss": 32.6719, "step": 4369 }, { "epoch": 0.7012476431179043, "grad_norm": 12.22032356262207, "learning_rate": 2.1062330938333275e-05, "loss": 32.5625, "step": 4370 }, { "epoch": 0.7014081116861235, "grad_norm": 12.144070625305176, "learning_rate": 2.1041441107196787e-05, "loss": 32.6172, "step": 4371 }, { "epoch": 0.7015685802543427, "grad_norm": 12.268239974975586, "learning_rate": 2.1020558879569046e-05, "loss": 32.6797, "step": 4372 }, { "epoch": 0.7017290488225619, "grad_norm": 12.197325706481934, "learning_rate": 2.099968426093299e-05, "loss": 32.6016, "step": 4373 }, { "epoch": 0.7018895173907811, "grad_norm": 12.07607650756836, "learning_rate": 2.097881725676956e-05, "loss": 32.6328, "step": 4374 }, { "epoch": 0.7020499859590003, "grad_norm": 12.021093368530273, "learning_rate": 2.0957957872557733e-05, "loss": 32.7266, "step": 4375 }, { "epoch": 0.7022104545272195, "grad_norm": 12.144546508789062, "learning_rate": 2.0937106113774407e-05, "loss": 32.6406, "step": 4376 }, { "epoch": 0.7023709230954387, "grad_norm": 12.135629653930664, "learning_rate": 2.0916261985894538e-05, "loss": 32.6172, "step": 4377 }, { "epoch": 0.7025313916636579, "grad_norm": 11.946937561035156, "learning_rate": 2.089542549439109e-05, "loss": 32.6641, "step": 4378 }, { "epoch": 0.7026918602318771, "grad_norm": 12.137514114379883, "learning_rate": 2.0874596644735002e-05, "loss": 32.6094, "step": 4379 }, { "epoch": 0.7028523288000963, "grad_norm": 12.142087936401367, "learning_rate": 2.085377544239519e-05, "loss": 32.7344, "step": 4380 }, { "epoch": 0.7030127973683155, "grad_norm": 12.077348709106445, "learning_rate": 2.0832961892838577e-05, "loss": 32.625, "step": 4381 }, { "epoch": 0.7031732659365347, "grad_norm": 12.198657989501953, "learning_rate": 2.081215600153008e-05, "loss": 32.6641, "step": 4382 }, { "epoch": 0.7033337345047539, "grad_norm": 12.386340141296387, "learning_rate": 2.0791357773932602e-05, "loss": 32.5703, "step": 4383 }, { "epoch": 0.7034942030729731, "grad_norm": 12.337193489074707, "learning_rate": 2.0770567215507032e-05, "loss": 32.625, "step": 4384 }, { "epoch": 0.7036546716411923, "grad_norm": 12.402425765991211, "learning_rate": 2.0749784331712226e-05, "loss": 32.7578, "step": 4385 }, { "epoch": 0.7038151402094115, "grad_norm": 12.146373748779297, "learning_rate": 2.0729009128005102e-05, "loss": 32.6875, "step": 4386 }, { "epoch": 0.7039756087776307, "grad_norm": 12.022364616394043, "learning_rate": 2.0708241609840444e-05, "loss": 32.7109, "step": 4387 }, { "epoch": 0.7041360773458499, "grad_norm": 11.946023941040039, "learning_rate": 2.0687481782671107e-05, "loss": 32.6953, "step": 4388 }, { "epoch": 0.7042965459140691, "grad_norm": 12.070279121398926, "learning_rate": 2.0666729651947896e-05, "loss": 32.5938, "step": 4389 }, { "epoch": 0.7044570144822883, "grad_norm": 12.076729774475098, "learning_rate": 2.064598522311959e-05, "loss": 32.6562, "step": 4390 }, { "epoch": 0.7046174830505075, "grad_norm": 12.013343811035156, "learning_rate": 2.0625248501632944e-05, "loss": 32.6719, "step": 4391 }, { "epoch": 0.7047779516187267, "grad_norm": 12.135379791259766, "learning_rate": 2.0604519492932732e-05, "loss": 32.6328, "step": 4392 }, { "epoch": 0.7049384201869459, "grad_norm": 12.33474063873291, "learning_rate": 2.0583798202461653e-05, "loss": 32.7031, "step": 4393 }, { "epoch": 0.7050988887551651, "grad_norm": 12.195886611938477, "learning_rate": 2.056308463566039e-05, "loss": 32.6094, "step": 4394 }, { "epoch": 0.7052593573233843, "grad_norm": 12.26854419708252, "learning_rate": 2.054237879796761e-05, "loss": 32.6406, "step": 4395 }, { "epoch": 0.7054198258916035, "grad_norm": 12.0671968460083, "learning_rate": 2.052168069481994e-05, "loss": 32.6406, "step": 4396 }, { "epoch": 0.7055802944598227, "grad_norm": 12.27090835571289, "learning_rate": 2.0500990331651988e-05, "loss": 32.6953, "step": 4397 }, { "epoch": 0.7057407630280419, "grad_norm": 12.076264381408691, "learning_rate": 2.0480307713896312e-05, "loss": 32.7383, "step": 4398 }, { "epoch": 0.7059012315962611, "grad_norm": 12.36329460144043, "learning_rate": 2.0459632846983457e-05, "loss": 32.9062, "step": 4399 }, { "epoch": 0.7060617001644803, "grad_norm": 12.183692932128906, "learning_rate": 2.0438965736341916e-05, "loss": 32.9375, "step": 4400 }, { "epoch": 0.7062221687326995, "grad_norm": 12.092583656311035, "learning_rate": 2.0418306387398156e-05, "loss": 32.8281, "step": 4401 }, { "epoch": 0.7063826373009187, "grad_norm": 12.234221458435059, "learning_rate": 2.03976548055766e-05, "loss": 32.875, "step": 4402 }, { "epoch": 0.7065431058691379, "grad_norm": 12.344701766967773, "learning_rate": 2.037701099629964e-05, "loss": 32.8125, "step": 4403 }, { "epoch": 0.7067035744373571, "grad_norm": 12.009429931640625, "learning_rate": 2.0356374964987597e-05, "loss": 32.7266, "step": 4404 }, { "epoch": 0.7068640430055763, "grad_norm": 12.400602340698242, "learning_rate": 2.0335746717058806e-05, "loss": 32.7656, "step": 4405 }, { "epoch": 0.7070245115737955, "grad_norm": 12.140562057495117, "learning_rate": 2.0315126257929517e-05, "loss": 32.75, "step": 4406 }, { "epoch": 0.7071849801420147, "grad_norm": 12.07802677154541, "learning_rate": 2.0294513593013942e-05, "loss": 32.625, "step": 4407 }, { "epoch": 0.7073454487102339, "grad_norm": 12.01053524017334, "learning_rate": 2.027390872772425e-05, "loss": 32.6641, "step": 4408 }, { "epoch": 0.7075059172784531, "grad_norm": 12.42641830444336, "learning_rate": 2.0253311667470553e-05, "loss": 32.7969, "step": 4409 }, { "epoch": 0.7076663858466723, "grad_norm": 12.266206741333008, "learning_rate": 2.0232722417660932e-05, "loss": 32.6172, "step": 4410 }, { "epoch": 0.7078268544148915, "grad_norm": 12.135363578796387, "learning_rate": 2.0212140983701393e-05, "loss": 32.7188, "step": 4411 }, { "epoch": 0.7079873229831107, "grad_norm": 12.139713287353516, "learning_rate": 2.0191567370995923e-05, "loss": 32.6016, "step": 4412 }, { "epoch": 0.7081477915513299, "grad_norm": 12.206252098083496, "learning_rate": 2.0171001584946418e-05, "loss": 32.7812, "step": 4413 }, { "epoch": 0.7083082601195491, "grad_norm": 12.015580177307129, "learning_rate": 2.015044363095275e-05, "loss": 32.75, "step": 4414 }, { "epoch": 0.7084687286877683, "grad_norm": 12.075815200805664, "learning_rate": 2.0129893514412707e-05, "loss": 32.6797, "step": 4415 }, { "epoch": 0.7086291972559875, "grad_norm": 12.076647758483887, "learning_rate": 2.0109351240722048e-05, "loss": 32.7031, "step": 4416 }, { "epoch": 0.7087896658242067, "grad_norm": 11.946710586547852, "learning_rate": 2.0088816815274453e-05, "loss": 32.7578, "step": 4417 }, { "epoch": 0.7089501343924259, "grad_norm": 12.477701187133789, "learning_rate": 2.0068290243461523e-05, "loss": 32.6562, "step": 4418 }, { "epoch": 0.7091106029606451, "grad_norm": 12.00495719909668, "learning_rate": 2.0047771530672872e-05, "loss": 32.625, "step": 4419 }, { "epoch": 0.7092710715288643, "grad_norm": 12.200692176818848, "learning_rate": 2.0027260682295963e-05, "loss": 32.6719, "step": 4420 }, { "epoch": 0.7094315400970835, "grad_norm": 12.218554496765137, "learning_rate": 2.0006757703716245e-05, "loss": 32.7656, "step": 4421 }, { "epoch": 0.7095920086653027, "grad_norm": 12.088764190673828, "learning_rate": 1.998626260031709e-05, "loss": 32.7734, "step": 4422 }, { "epoch": 0.7097524772335219, "grad_norm": 12.072993278503418, "learning_rate": 1.996577537747977e-05, "loss": 32.6172, "step": 4423 }, { "epoch": 0.7099129458017411, "grad_norm": 12.200554847717285, "learning_rate": 1.9945296040583523e-05, "loss": 32.6328, "step": 4424 }, { "epoch": 0.7100734143699603, "grad_norm": 12.155891418457031, "learning_rate": 1.9924824595005536e-05, "loss": 32.6562, "step": 4425 }, { "epoch": 0.7102338829381795, "grad_norm": 12.58115291595459, "learning_rate": 1.9904361046120878e-05, "loss": 32.6172, "step": 4426 }, { "epoch": 0.7103943515063987, "grad_norm": 12.200139045715332, "learning_rate": 1.988390539930257e-05, "loss": 32.7109, "step": 4427 }, { "epoch": 0.7105548200746179, "grad_norm": 12.268129348754883, "learning_rate": 1.9863457659921553e-05, "loss": 32.6562, "step": 4428 }, { "epoch": 0.7107152886428371, "grad_norm": 12.070233345031738, "learning_rate": 1.9843017833346683e-05, "loss": 32.6484, "step": 4429 }, { "epoch": 0.7108757572110563, "grad_norm": 12.550908088684082, "learning_rate": 1.9822585924944753e-05, "loss": 32.5938, "step": 4430 }, { "epoch": 0.7110362257792755, "grad_norm": 12.03311824798584, "learning_rate": 1.9802161940080467e-05, "loss": 32.8125, "step": 4431 }, { "epoch": 0.7111966943474947, "grad_norm": 12.081600189208984, "learning_rate": 1.9781745884116436e-05, "loss": 32.6328, "step": 4432 }, { "epoch": 0.7113571629157139, "grad_norm": 12.137929916381836, "learning_rate": 1.9761337762413263e-05, "loss": 32.7031, "step": 4433 }, { "epoch": 0.7115176314839331, "grad_norm": 12.07027816772461, "learning_rate": 1.974093758032935e-05, "loss": 32.5859, "step": 4434 }, { "epoch": 0.7116781000521523, "grad_norm": 12.011981964111328, "learning_rate": 1.9720545343221086e-05, "loss": 32.6016, "step": 4435 }, { "epoch": 0.7118385686203715, "grad_norm": 12.397140502929688, "learning_rate": 1.9700161056442777e-05, "loss": 32.6562, "step": 4436 }, { "epoch": 0.7119990371885907, "grad_norm": 11.942895889282227, "learning_rate": 1.9679784725346616e-05, "loss": 32.6328, "step": 4437 }, { "epoch": 0.7121595057568099, "grad_norm": 12.709551811218262, "learning_rate": 1.9659416355282705e-05, "loss": 32.5391, "step": 4438 }, { "epoch": 0.7123199743250291, "grad_norm": 11.945362091064453, "learning_rate": 1.9639055951599107e-05, "loss": 32.7656, "step": 4439 }, { "epoch": 0.7124804428932483, "grad_norm": 12.134064674377441, "learning_rate": 1.9618703519641724e-05, "loss": 32.6484, "step": 4440 }, { "epoch": 0.7126409114614675, "grad_norm": 12.016597747802734, "learning_rate": 1.9598359064754408e-05, "loss": 32.6875, "step": 4441 }, { "epoch": 0.7128013800296867, "grad_norm": 12.21990966796875, "learning_rate": 1.9578022592278893e-05, "loss": 32.6875, "step": 4442 }, { "epoch": 0.7129618485979059, "grad_norm": 12.154803276062012, "learning_rate": 1.9557694107554842e-05, "loss": 32.7812, "step": 4443 }, { "epoch": 0.7131223171661251, "grad_norm": 12.209243774414062, "learning_rate": 1.9537373615919797e-05, "loss": 32.6406, "step": 4444 }, { "epoch": 0.7132827857343443, "grad_norm": 12.071650505065918, "learning_rate": 1.951706112270922e-05, "loss": 32.6406, "step": 4445 }, { "epoch": 0.7134432543025635, "grad_norm": 12.198882102966309, "learning_rate": 1.9496756633256454e-05, "loss": 32.6328, "step": 4446 }, { "epoch": 0.7136037228707827, "grad_norm": 12.26561164855957, "learning_rate": 1.947646015289275e-05, "loss": 32.6172, "step": 4447 }, { "epoch": 0.7137641914390019, "grad_norm": 12.332622528076172, "learning_rate": 1.9456171686947267e-05, "loss": 32.5625, "step": 4448 }, { "epoch": 0.7139246600072211, "grad_norm": 12.055335998535156, "learning_rate": 1.9435891240747038e-05, "loss": 32.9531, "step": 4449 }, { "epoch": 0.7140851285754403, "grad_norm": 12.448436737060547, "learning_rate": 1.9415618819617015e-05, "loss": 33.0078, "step": 4450 }, { "epoch": 0.7142455971436595, "grad_norm": 12.032441139221191, "learning_rate": 1.9395354428879996e-05, "loss": 32.9609, "step": 4451 }, { "epoch": 0.7144060657118787, "grad_norm": 12.211808204650879, "learning_rate": 1.9375098073856758e-05, "loss": 32.75, "step": 4452 }, { "epoch": 0.7145665342800979, "grad_norm": 12.356968879699707, "learning_rate": 1.9354849759865878e-05, "loss": 32.75, "step": 4453 }, { "epoch": 0.7147270028483171, "grad_norm": 12.402192115783691, "learning_rate": 1.933460949222387e-05, "loss": 32.6406, "step": 4454 }, { "epoch": 0.7148874714165363, "grad_norm": 12.039358139038086, "learning_rate": 1.9314377276245115e-05, "loss": 32.75, "step": 4455 }, { "epoch": 0.7150479399847555, "grad_norm": 12.139947891235352, "learning_rate": 1.9294153117241896e-05, "loss": 32.8125, "step": 4456 }, { "epoch": 0.7152084085529747, "grad_norm": 12.094361305236816, "learning_rate": 1.9273937020524367e-05, "loss": 32.7734, "step": 4457 }, { "epoch": 0.7153688771211939, "grad_norm": 12.297158241271973, "learning_rate": 1.9253728991400577e-05, "loss": 32.7969, "step": 4458 }, { "epoch": 0.7155293456894131, "grad_norm": 12.144292831420898, "learning_rate": 1.923352903517644e-05, "loss": 32.75, "step": 4459 }, { "epoch": 0.7156898142576323, "grad_norm": 11.882333755493164, "learning_rate": 1.9213337157155768e-05, "loss": 32.7656, "step": 4460 }, { "epoch": 0.7158502828258515, "grad_norm": 12.331113815307617, "learning_rate": 1.919315336264025e-05, "loss": 32.6406, "step": 4461 }, { "epoch": 0.7160107513940707, "grad_norm": 12.069620132446289, "learning_rate": 1.9172977656929438e-05, "loss": 32.6406, "step": 4462 }, { "epoch": 0.7161712199622899, "grad_norm": 12.010082244873047, "learning_rate": 1.9152810045320767e-05, "loss": 32.625, "step": 4463 }, { "epoch": 0.7163316885305091, "grad_norm": 12.205680847167969, "learning_rate": 1.9132650533109564e-05, "loss": 32.6719, "step": 4464 }, { "epoch": 0.7164921570987283, "grad_norm": 12.144599914550781, "learning_rate": 1.911249912558898e-05, "loss": 32.6289, "step": 4465 }, { "epoch": 0.7166526256669475, "grad_norm": 12.139424324035645, "learning_rate": 1.909235582805012e-05, "loss": 32.6641, "step": 4466 }, { "epoch": 0.7168130942351667, "grad_norm": 12.14235782623291, "learning_rate": 1.9072220645781885e-05, "loss": 32.6641, "step": 4467 }, { "epoch": 0.7169735628033859, "grad_norm": 12.719788551330566, "learning_rate": 1.905209358407108e-05, "loss": 32.6406, "step": 4468 }, { "epoch": 0.7171340313716051, "grad_norm": 12.521056175231934, "learning_rate": 1.9031974648202387e-05, "loss": 32.6094, "step": 4469 }, { "epoch": 0.7172944999398243, "grad_norm": 12.34179401397705, "learning_rate": 1.90118638434583e-05, "loss": 32.6055, "step": 4470 }, { "epoch": 0.7174549685080435, "grad_norm": 12.272801399230957, "learning_rate": 1.8991761175119217e-05, "loss": 32.6719, "step": 4471 }, { "epoch": 0.7176154370762627, "grad_norm": 12.200560569763184, "learning_rate": 1.897166664846343e-05, "loss": 32.7578, "step": 4472 }, { "epoch": 0.7177759056444819, "grad_norm": 12.133136749267578, "learning_rate": 1.895158026876705e-05, "loss": 32.6328, "step": 4473 }, { "epoch": 0.7179363742127011, "grad_norm": 12.07683277130127, "learning_rate": 1.8931502041304056e-05, "loss": 32.7109, "step": 4474 }, { "epoch": 0.7180968427809203, "grad_norm": 12.072065353393555, "learning_rate": 1.8911431971346292e-05, "loss": 32.6445, "step": 4475 }, { "epoch": 0.7182573113491395, "grad_norm": 12.391812324523926, "learning_rate": 1.8891370064163465e-05, "loss": 32.6406, "step": 4476 }, { "epoch": 0.7184177799173587, "grad_norm": 12.138285636901855, "learning_rate": 1.8871316325023124e-05, "loss": 32.6328, "step": 4477 }, { "epoch": 0.7185782484855779, "grad_norm": 12.21198844909668, "learning_rate": 1.8851270759190685e-05, "loss": 32.6328, "step": 4478 }, { "epoch": 0.7187387170537971, "grad_norm": 12.196673393249512, "learning_rate": 1.8831233371929403e-05, "loss": 32.6641, "step": 4479 }, { "epoch": 0.7188991856220163, "grad_norm": 12.13336181640625, "learning_rate": 1.8811204168500428e-05, "loss": 32.625, "step": 4480 }, { "epoch": 0.7190596541902355, "grad_norm": 12.346602439880371, "learning_rate": 1.8791183154162728e-05, "loss": 32.6484, "step": 4481 }, { "epoch": 0.7192201227584547, "grad_norm": 12.385998725891113, "learning_rate": 1.87711703341731e-05, "loss": 32.7578, "step": 4482 }, { "epoch": 0.7193805913266739, "grad_norm": 12.011364936828613, "learning_rate": 1.8751165713786217e-05, "loss": 32.6797, "step": 4483 }, { "epoch": 0.7195410598948931, "grad_norm": 12.070847511291504, "learning_rate": 1.8731169298254602e-05, "loss": 32.6484, "step": 4484 }, { "epoch": 0.7197015284631123, "grad_norm": 12.136895179748535, "learning_rate": 1.8711181092828607e-05, "loss": 32.5938, "step": 4485 }, { "epoch": 0.7198619970313315, "grad_norm": 12.644181251525879, "learning_rate": 1.8691201102756454e-05, "loss": 32.5547, "step": 4486 }, { "epoch": 0.7200224655995507, "grad_norm": 12.012344360351562, "learning_rate": 1.8671229333284196e-05, "loss": 32.7109, "step": 4487 }, { "epoch": 0.7201829341677699, "grad_norm": 12.076114654541016, "learning_rate": 1.8651265789655714e-05, "loss": 32.6562, "step": 4488 }, { "epoch": 0.7203434027359891, "grad_norm": 12.259340286254883, "learning_rate": 1.863131047711274e-05, "loss": 32.5938, "step": 4489 }, { "epoch": 0.7205038713042083, "grad_norm": 12.080230712890625, "learning_rate": 1.8611363400894837e-05, "loss": 32.6562, "step": 4490 }, { "epoch": 0.7206643398724275, "grad_norm": 12.595839500427246, "learning_rate": 1.859142456623943e-05, "loss": 32.5859, "step": 4491 }, { "epoch": 0.7208248084406467, "grad_norm": 12.206145286560059, "learning_rate": 1.8571493978381744e-05, "loss": 32.668, "step": 4492 }, { "epoch": 0.7209852770088659, "grad_norm": 12.136310577392578, "learning_rate": 1.8551571642554865e-05, "loss": 32.6406, "step": 4493 }, { "epoch": 0.7211457455770851, "grad_norm": 12.14441967010498, "learning_rate": 1.853165756398971e-05, "loss": 32.6797, "step": 4494 }, { "epoch": 0.7213062141453043, "grad_norm": 12.14013957977295, "learning_rate": 1.8511751747915006e-05, "loss": 32.6406, "step": 4495 }, { "epoch": 0.7214666827135235, "grad_norm": 12.32667350769043, "learning_rate": 1.8491854199557346e-05, "loss": 32.6094, "step": 4496 }, { "epoch": 0.7216271512817427, "grad_norm": 12.387819290161133, "learning_rate": 1.8471964924141118e-05, "loss": 32.5391, "step": 4497 }, { "epoch": 0.7217876198499619, "grad_norm": 12.203372955322266, "learning_rate": 1.8452083926888563e-05, "loss": 32.7109, "step": 4498 }, { "epoch": 0.7219480884181811, "grad_norm": 12.383970260620117, "learning_rate": 1.8432211213019712e-05, "loss": 32.9375, "step": 4499 }, { "epoch": 0.7221085569864003, "grad_norm": 12.165568351745605, "learning_rate": 1.8412346787752495e-05, "loss": 32.9609, "step": 4500 }, { "epoch": 0.7222690255546195, "grad_norm": 12.366477012634277, "learning_rate": 1.8392490656302584e-05, "loss": 32.7812, "step": 4501 }, { "epoch": 0.7224294941228386, "grad_norm": 12.29000473022461, "learning_rate": 1.837264282388352e-05, "loss": 32.875, "step": 4502 }, { "epoch": 0.7225899626910579, "grad_norm": 12.203662872314453, "learning_rate": 1.835280329570665e-05, "loss": 32.7422, "step": 4503 }, { "epoch": 0.7227504312592771, "grad_norm": 12.49921989440918, "learning_rate": 1.8332972076981152e-05, "loss": 32.8125, "step": 4504 }, { "epoch": 0.7229108998274962, "grad_norm": 12.33557415008545, "learning_rate": 1.8313149172913973e-05, "loss": 32.8125, "step": 4505 }, { "epoch": 0.7230713683957155, "grad_norm": 12.275167465209961, "learning_rate": 1.8293334588709965e-05, "loss": 32.7656, "step": 4506 }, { "epoch": 0.7232318369639347, "grad_norm": 12.272835731506348, "learning_rate": 1.8273528329571728e-05, "loss": 32.7344, "step": 4507 }, { "epoch": 0.7233923055321538, "grad_norm": 12.577232360839844, "learning_rate": 1.8253730400699693e-05, "loss": 32.6172, "step": 4508 }, { "epoch": 0.7235527741003731, "grad_norm": 12.208182334899902, "learning_rate": 1.8233940807292117e-05, "loss": 32.7422, "step": 4509 }, { "epoch": 0.7237132426685923, "grad_norm": 12.073895454406738, "learning_rate": 1.8214159554545048e-05, "loss": 32.7109, "step": 4510 }, { "epoch": 0.7238737112368114, "grad_norm": 12.450322151184082, "learning_rate": 1.819438664765235e-05, "loss": 32.6328, "step": 4511 }, { "epoch": 0.7240341798050307, "grad_norm": 12.08234977722168, "learning_rate": 1.8174622091805697e-05, "loss": 32.6602, "step": 4512 }, { "epoch": 0.7241946483732499, "grad_norm": 12.666685104370117, "learning_rate": 1.8154865892194595e-05, "loss": 32.7344, "step": 4513 }, { "epoch": 0.724355116941469, "grad_norm": 12.009966850280762, "learning_rate": 1.8135118054006324e-05, "loss": 32.6484, "step": 4514 }, { "epoch": 0.7245155855096883, "grad_norm": 12.206504821777344, "learning_rate": 1.8115378582425973e-05, "loss": 32.6406, "step": 4515 }, { "epoch": 0.7246760540779075, "grad_norm": 12.402070999145508, "learning_rate": 1.809564748263646e-05, "loss": 32.7578, "step": 4516 }, { "epoch": 0.7248365226461266, "grad_norm": 12.133157730102539, "learning_rate": 1.8075924759818454e-05, "loss": 32.7344, "step": 4517 }, { "epoch": 0.7249969912143459, "grad_norm": 12.20137882232666, "learning_rate": 1.8056210419150445e-05, "loss": 32.7422, "step": 4518 }, { "epoch": 0.7251574597825651, "grad_norm": 12.205679893493652, "learning_rate": 1.8036504465808768e-05, "loss": 32.6641, "step": 4519 }, { "epoch": 0.7253179283507842, "grad_norm": 12.25782585144043, "learning_rate": 1.801680690496751e-05, "loss": 32.6016, "step": 4520 }, { "epoch": 0.7254783969190035, "grad_norm": 12.256885528564453, "learning_rate": 1.7997117741798557e-05, "loss": 32.5625, "step": 4521 }, { "epoch": 0.7256388654872227, "grad_norm": 12.075176239013672, "learning_rate": 1.7977436981471608e-05, "loss": 32.6875, "step": 4522 }, { "epoch": 0.7257993340554418, "grad_norm": 12.134714126586914, "learning_rate": 1.7957764629154133e-05, "loss": 32.6641, "step": 4523 }, { "epoch": 0.7259598026236611, "grad_norm": 12.20493221282959, "learning_rate": 1.7938100690011413e-05, "loss": 32.625, "step": 4524 }, { "epoch": 0.7261202711918803, "grad_norm": 12.332372665405273, "learning_rate": 1.791844516920651e-05, "loss": 32.6797, "step": 4525 }, { "epoch": 0.7262807397600994, "grad_norm": 12.070125579833984, "learning_rate": 1.7898798071900263e-05, "loss": 32.6484, "step": 4526 }, { "epoch": 0.7264412083283187, "grad_norm": 12.20492172241211, "learning_rate": 1.787915940325135e-05, "loss": 32.5859, "step": 4527 }, { "epoch": 0.7266016768965379, "grad_norm": 12.329840660095215, "learning_rate": 1.78595291684162e-05, "loss": 32.625, "step": 4528 }, { "epoch": 0.726762145464757, "grad_norm": 12.401717185974121, "learning_rate": 1.7839907372548998e-05, "loss": 32.5703, "step": 4529 }, { "epoch": 0.7269226140329763, "grad_norm": 12.274455070495605, "learning_rate": 1.782029402080176e-05, "loss": 32.6484, "step": 4530 }, { "epoch": 0.7270830826011955, "grad_norm": 12.333630561828613, "learning_rate": 1.7800689118324266e-05, "loss": 32.5703, "step": 4531 }, { "epoch": 0.7272435511694146, "grad_norm": 12.269452095031738, "learning_rate": 1.7781092670264066e-05, "loss": 32.6875, "step": 4532 }, { "epoch": 0.7274040197376339, "grad_norm": 12.261054992675781, "learning_rate": 1.776150468176654e-05, "loss": 32.5703, "step": 4533 }, { "epoch": 0.7275644883058531, "grad_norm": 12.009443283081055, "learning_rate": 1.7741925157974788e-05, "loss": 32.6797, "step": 4534 }, { "epoch": 0.7277249568740722, "grad_norm": 12.068188667297363, "learning_rate": 1.7722354104029716e-05, "loss": 32.6328, "step": 4535 }, { "epoch": 0.7278854254422915, "grad_norm": 12.19961929321289, "learning_rate": 1.770279152507e-05, "loss": 32.5938, "step": 4536 }, { "epoch": 0.7280458940105107, "grad_norm": 12.011438369750977, "learning_rate": 1.7683237426232097e-05, "loss": 32.6875, "step": 4537 }, { "epoch": 0.7282063625787298, "grad_norm": 12.010998725891113, "learning_rate": 1.7663691812650217e-05, "loss": 32.6562, "step": 4538 }, { "epoch": 0.7283668311469491, "grad_norm": 11.943591117858887, "learning_rate": 1.7644154689456376e-05, "loss": 32.6797, "step": 4539 }, { "epoch": 0.7285272997151683, "grad_norm": 12.20511245727539, "learning_rate": 1.7624626061780332e-05, "loss": 32.625, "step": 4540 }, { "epoch": 0.7286877682833874, "grad_norm": 12.031047821044922, "learning_rate": 1.760510593474962e-05, "loss": 32.8594, "step": 4541 }, { "epoch": 0.7288482368516067, "grad_norm": 12.263422012329102, "learning_rate": 1.758559431348955e-05, "loss": 32.5625, "step": 4542 }, { "epoch": 0.7290087054198259, "grad_norm": 12.641525268554688, "learning_rate": 1.7566091203123193e-05, "loss": 32.5547, "step": 4543 }, { "epoch": 0.729169173988045, "grad_norm": 12.960322380065918, "learning_rate": 1.7546596608771383e-05, "loss": 32.5625, "step": 4544 }, { "epoch": 0.7293296425562643, "grad_norm": 12.215771675109863, "learning_rate": 1.7527110535552726e-05, "loss": 32.6406, "step": 4545 }, { "epoch": 0.7294901111244835, "grad_norm": 12.215038299560547, "learning_rate": 1.7507632988583573e-05, "loss": 32.7656, "step": 4546 }, { "epoch": 0.7296505796927026, "grad_norm": 12.070384979248047, "learning_rate": 1.748816397297807e-05, "loss": 32.6406, "step": 4547 }, { "epoch": 0.7298110482609219, "grad_norm": 12.018436431884766, "learning_rate": 1.74687034938481e-05, "loss": 32.8203, "step": 4548 }, { "epoch": 0.7299715168291411, "grad_norm": 12.361044883728027, "learning_rate": 1.7449251556303293e-05, "loss": 32.8047, "step": 4549 }, { "epoch": 0.7301319853973602, "grad_norm": 12.390410423278809, "learning_rate": 1.7429808165451063e-05, "loss": 33.1172, "step": 4550 }, { "epoch": 0.7302924539655795, "grad_norm": 12.18814754486084, "learning_rate": 1.7410373326396572e-05, "loss": 33.0859, "step": 4551 }, { "epoch": 0.7304529225337987, "grad_norm": 12.091423034667969, "learning_rate": 1.7390947044242694e-05, "loss": 32.8906, "step": 4552 }, { "epoch": 0.7306133911020178, "grad_norm": 12.351814270019531, "learning_rate": 1.737152932409013e-05, "loss": 32.7344, "step": 4553 }, { "epoch": 0.7307738596702371, "grad_norm": 12.211512565612793, "learning_rate": 1.7352120171037295e-05, "loss": 32.8125, "step": 4554 }, { "epoch": 0.7309343282384563, "grad_norm": 12.020867347717285, "learning_rate": 1.7332719590180346e-05, "loss": 32.7344, "step": 4555 }, { "epoch": 0.7310947968066754, "grad_norm": 12.396899223327637, "learning_rate": 1.73133275866132e-05, "loss": 32.6641, "step": 4556 }, { "epoch": 0.7312552653748947, "grad_norm": 12.152924537658691, "learning_rate": 1.7293944165427528e-05, "loss": 32.7734, "step": 4557 }, { "epoch": 0.7314157339431139, "grad_norm": 12.156139373779297, "learning_rate": 1.7274569331712736e-05, "loss": 32.7969, "step": 4558 }, { "epoch": 0.731576202511333, "grad_norm": 12.4114990234375, "learning_rate": 1.7255203090555966e-05, "loss": 32.6719, "step": 4559 }, { "epoch": 0.7317366710795523, "grad_norm": 12.391785621643066, "learning_rate": 1.7235845447042148e-05, "loss": 32.7734, "step": 4560 }, { "epoch": 0.7318971396477715, "grad_norm": 12.14000129699707, "learning_rate": 1.721649640625391e-05, "loss": 32.7227, "step": 4561 }, { "epoch": 0.7320576082159906, "grad_norm": 12.410593032836914, "learning_rate": 1.719715597327164e-05, "loss": 32.5938, "step": 4562 }, { "epoch": 0.7322180767842099, "grad_norm": 12.149222373962402, "learning_rate": 1.7177824153173477e-05, "loss": 32.875, "step": 4563 }, { "epoch": 0.7323785453524291, "grad_norm": 12.015515327453613, "learning_rate": 1.7158500951035246e-05, "loss": 32.7266, "step": 4564 }, { "epoch": 0.7325390139206482, "grad_norm": 12.276789665222168, "learning_rate": 1.713918637193055e-05, "loss": 32.6172, "step": 4565 }, { "epoch": 0.7326994824888675, "grad_norm": 12.265860557556152, "learning_rate": 1.7119880420930757e-05, "loss": 32.6562, "step": 4566 }, { "epoch": 0.7328599510570867, "grad_norm": 12.52540111541748, "learning_rate": 1.7100583103104923e-05, "loss": 32.6797, "step": 4567 }, { "epoch": 0.7330204196253058, "grad_norm": 12.751749992370605, "learning_rate": 1.708129442351985e-05, "loss": 32.8047, "step": 4568 }, { "epoch": 0.7331808881935251, "grad_norm": 12.141637802124023, "learning_rate": 1.7062014387240067e-05, "loss": 32.7109, "step": 4569 }, { "epoch": 0.7333413567617443, "grad_norm": 12.204872131347656, "learning_rate": 1.704274299932785e-05, "loss": 32.6641, "step": 4570 }, { "epoch": 0.7335018253299634, "grad_norm": 12.262224197387695, "learning_rate": 1.702348026484319e-05, "loss": 32.6484, "step": 4571 }, { "epoch": 0.7336622938981827, "grad_norm": 12.1368408203125, "learning_rate": 1.7004226188843804e-05, "loss": 32.6406, "step": 4572 }, { "epoch": 0.7338227624664019, "grad_norm": 12.402066230773926, "learning_rate": 1.6984980776385134e-05, "loss": 32.6719, "step": 4573 }, { "epoch": 0.733983231034621, "grad_norm": 12.221872329711914, "learning_rate": 1.6965744032520386e-05, "loss": 32.6094, "step": 4574 }, { "epoch": 0.7341436996028403, "grad_norm": 12.008415222167969, "learning_rate": 1.694651596230046e-05, "loss": 32.6719, "step": 4575 }, { "epoch": 0.7343041681710595, "grad_norm": 12.398512840270996, "learning_rate": 1.6927296570773936e-05, "loss": 32.6562, "step": 4576 }, { "epoch": 0.7344646367392786, "grad_norm": 12.324417114257812, "learning_rate": 1.6908085862987176e-05, "loss": 32.5938, "step": 4577 }, { "epoch": 0.7346251053074979, "grad_norm": 12.199882507324219, "learning_rate": 1.6888883843984256e-05, "loss": 32.6875, "step": 4578 }, { "epoch": 0.7347855738757171, "grad_norm": 12.069725036621094, "learning_rate": 1.6869690518806923e-05, "loss": 32.625, "step": 4579 }, { "epoch": 0.7349460424439362, "grad_norm": 12.018672943115234, "learning_rate": 1.6850505892494718e-05, "loss": 32.8125, "step": 4580 }, { "epoch": 0.7351065110121555, "grad_norm": 12.134636878967285, "learning_rate": 1.6831329970084837e-05, "loss": 32.6484, "step": 4581 }, { "epoch": 0.7352669795803747, "grad_norm": 12.323751449584961, "learning_rate": 1.6812162756612205e-05, "loss": 32.6172, "step": 4582 }, { "epoch": 0.7354274481485938, "grad_norm": 12.075593948364258, "learning_rate": 1.6793004257109473e-05, "loss": 32.6523, "step": 4583 }, { "epoch": 0.7355879167168131, "grad_norm": 12.148301124572754, "learning_rate": 1.6773854476606987e-05, "loss": 32.6406, "step": 4584 }, { "epoch": 0.7357483852850323, "grad_norm": 11.947427749633789, "learning_rate": 1.6754713420132824e-05, "loss": 32.7031, "step": 4585 }, { "epoch": 0.7359088538532514, "grad_norm": 12.328134536743164, "learning_rate": 1.6735581092712744e-05, "loss": 32.5938, "step": 4586 }, { "epoch": 0.7360693224214707, "grad_norm": 12.083564758300781, "learning_rate": 1.6716457499370236e-05, "loss": 32.6641, "step": 4587 }, { "epoch": 0.7362297909896899, "grad_norm": 12.075163841247559, "learning_rate": 1.6697342645126495e-05, "loss": 32.6406, "step": 4588 }, { "epoch": 0.736390259557909, "grad_norm": 12.32316780090332, "learning_rate": 1.6678236535000406e-05, "loss": 32.5703, "step": 4589 }, { "epoch": 0.7365507281261283, "grad_norm": 12.263737678527832, "learning_rate": 1.6659139174008575e-05, "loss": 32.6016, "step": 4590 }, { "epoch": 0.7367111966943475, "grad_norm": 12.400476455688477, "learning_rate": 1.6640050567165306e-05, "loss": 32.5938, "step": 4591 }, { "epoch": 0.7368716652625666, "grad_norm": 12.136061668395996, "learning_rate": 1.6620970719482593e-05, "loss": 32.6875, "step": 4592 }, { "epoch": 0.7370321338307859, "grad_norm": 12.153630256652832, "learning_rate": 1.6601899635970126e-05, "loss": 32.7188, "step": 4593 }, { "epoch": 0.7371926023990051, "grad_norm": 12.27729320526123, "learning_rate": 1.658283732163534e-05, "loss": 32.6875, "step": 4594 }, { "epoch": 0.7373530709672242, "grad_norm": 12.14190673828125, "learning_rate": 1.6563783781483326e-05, "loss": 32.7266, "step": 4595 }, { "epoch": 0.7375135395354435, "grad_norm": 12.200196266174316, "learning_rate": 1.654473902051687e-05, "loss": 32.6172, "step": 4596 }, { "epoch": 0.7376740081036627, "grad_norm": 12.009177207946777, "learning_rate": 1.6525703043736467e-05, "loss": 32.6875, "step": 4597 }, { "epoch": 0.7378344766718818, "grad_norm": 12.4039306640625, "learning_rate": 1.6506675856140304e-05, "loss": 32.6719, "step": 4598 }, { "epoch": 0.7379949452401011, "grad_norm": 12.388569831848145, "learning_rate": 1.648765746272425e-05, "loss": 32.625, "step": 4599 }, { "epoch": 0.7381554138083203, "grad_norm": 12.032159805297852, "learning_rate": 1.646864786848188e-05, "loss": 32.9141, "step": 4600 }, { "epoch": 0.7383158823765394, "grad_norm": 12.103559494018555, "learning_rate": 1.6449647078404444e-05, "loss": 33.1875, "step": 4601 }, { "epoch": 0.7384763509447587, "grad_norm": 12.078863143920898, "learning_rate": 1.6430655097480897e-05, "loss": 32.6719, "step": 4602 }, { "epoch": 0.7386368195129779, "grad_norm": 12.433663368225098, "learning_rate": 1.6411671930697862e-05, "loss": 32.8398, "step": 4603 }, { "epoch": 0.738797288081197, "grad_norm": 12.135189056396484, "learning_rate": 1.639269758303966e-05, "loss": 32.6562, "step": 4604 }, { "epoch": 0.7389577566494163, "grad_norm": 12.144491195678711, "learning_rate": 1.63737320594883e-05, "loss": 32.6797, "step": 4605 }, { "epoch": 0.7391182252176355, "grad_norm": 12.07380199432373, "learning_rate": 1.6354775365023444e-05, "loss": 32.6875, "step": 4606 }, { "epoch": 0.7392786937858546, "grad_norm": 12.260950088500977, "learning_rate": 1.6335827504622498e-05, "loss": 32.625, "step": 4607 }, { "epoch": 0.7394391623540739, "grad_norm": 12.015227317810059, "learning_rate": 1.631688848326049e-05, "loss": 32.8281, "step": 4608 }, { "epoch": 0.7395996309222931, "grad_norm": 12.013606071472168, "learning_rate": 1.6297958305910144e-05, "loss": 32.8203, "step": 4609 }, { "epoch": 0.7397600994905122, "grad_norm": 12.148202896118164, "learning_rate": 1.627903697754189e-05, "loss": 32.7266, "step": 4610 }, { "epoch": 0.7399205680587315, "grad_norm": 12.397109031677246, "learning_rate": 1.626012450312377e-05, "loss": 32.6875, "step": 4611 }, { "epoch": 0.7400810366269507, "grad_norm": 12.465351104736328, "learning_rate": 1.6241220887621537e-05, "loss": 32.7812, "step": 4612 }, { "epoch": 0.7402415051951698, "grad_norm": 12.25633716583252, "learning_rate": 1.622232613599867e-05, "loss": 32.6328, "step": 4613 }, { "epoch": 0.7404019737633891, "grad_norm": 11.948629379272461, "learning_rate": 1.6203440253216245e-05, "loss": 32.7188, "step": 4614 }, { "epoch": 0.7405624423316083, "grad_norm": 12.139904975891113, "learning_rate": 1.6184563244233036e-05, "loss": 32.625, "step": 4615 }, { "epoch": 0.7407229108998274, "grad_norm": 12.137085914611816, "learning_rate": 1.616569511400549e-05, "loss": 32.6484, "step": 4616 }, { "epoch": 0.7408833794680467, "grad_norm": 12.775156021118164, "learning_rate": 1.6146835867487724e-05, "loss": 32.5391, "step": 4617 }, { "epoch": 0.7410438480362659, "grad_norm": 12.068997383117676, "learning_rate": 1.6127985509631523e-05, "loss": 32.6406, "step": 4618 }, { "epoch": 0.741204316604485, "grad_norm": 12.145155906677246, "learning_rate": 1.610914404538632e-05, "loss": 32.7344, "step": 4619 }, { "epoch": 0.7413647851727043, "grad_norm": 12.777229309082031, "learning_rate": 1.609031147969922e-05, "loss": 32.5469, "step": 4620 }, { "epoch": 0.7415252537409235, "grad_norm": 12.207139015197754, "learning_rate": 1.607148781751503e-05, "loss": 32.7266, "step": 4621 }, { "epoch": 0.7416857223091426, "grad_norm": 12.080289840698242, "learning_rate": 1.6052673063776186e-05, "loss": 32.7188, "step": 4622 }, { "epoch": 0.7418461908773619, "grad_norm": 12.19212532043457, "learning_rate": 1.603386722342276e-05, "loss": 32.5859, "step": 4623 }, { "epoch": 0.7420066594455811, "grad_norm": 12.022920608520508, "learning_rate": 1.601507030139252e-05, "loss": 32.7344, "step": 4624 }, { "epoch": 0.7421671280138002, "grad_norm": 12.012195587158203, "learning_rate": 1.599628230262089e-05, "loss": 32.7734, "step": 4625 }, { "epoch": 0.7423275965820195, "grad_norm": 12.260295867919922, "learning_rate": 1.5977503232040917e-05, "loss": 32.5938, "step": 4626 }, { "epoch": 0.7424880651502387, "grad_norm": 12.133206367492676, "learning_rate": 1.595873309458338e-05, "loss": 32.6484, "step": 4627 }, { "epoch": 0.7426485337184578, "grad_norm": 12.07229995727539, "learning_rate": 1.593997189517663e-05, "loss": 32.6016, "step": 4628 }, { "epoch": 0.7428090022866771, "grad_norm": 12.15496826171875, "learning_rate": 1.5921219638746713e-05, "loss": 32.7109, "step": 4629 }, { "epoch": 0.7429694708548963, "grad_norm": 12.328042984008789, "learning_rate": 1.5902476330217316e-05, "loss": 32.6016, "step": 4630 }, { "epoch": 0.7431299394231154, "grad_norm": 12.516448974609375, "learning_rate": 1.588374197450978e-05, "loss": 32.6094, "step": 4631 }, { "epoch": 0.7432904079913347, "grad_norm": 12.388081550598145, "learning_rate": 1.586501657654309e-05, "loss": 32.6406, "step": 4632 }, { "epoch": 0.7434508765595539, "grad_norm": 12.076770782470703, "learning_rate": 1.584630014123389e-05, "loss": 32.6328, "step": 4633 }, { "epoch": 0.743611345127773, "grad_norm": 12.007450103759766, "learning_rate": 1.582759267349646e-05, "loss": 32.6016, "step": 4634 }, { "epoch": 0.7437718136959923, "grad_norm": 12.242728233337402, "learning_rate": 1.5808894178242733e-05, "loss": 32.7383, "step": 4635 }, { "epoch": 0.7439322822642115, "grad_norm": 12.336281776428223, "learning_rate": 1.5790204660382272e-05, "loss": 32.5625, "step": 4636 }, { "epoch": 0.7440927508324306, "grad_norm": 12.347079277038574, "learning_rate": 1.5771524124822308e-05, "loss": 32.6797, "step": 4637 }, { "epoch": 0.7442532194006499, "grad_norm": 12.26287841796875, "learning_rate": 1.575285257646769e-05, "loss": 32.5859, "step": 4638 }, { "epoch": 0.7444136879688691, "grad_norm": 12.209053993225098, "learning_rate": 1.573419002022091e-05, "loss": 32.6797, "step": 4639 }, { "epoch": 0.7445741565370882, "grad_norm": 12.329981803894043, "learning_rate": 1.5715536460982107e-05, "loss": 32.6797, "step": 4640 }, { "epoch": 0.7447346251053075, "grad_norm": 12.137319564819336, "learning_rate": 1.5696891903649074e-05, "loss": 32.7031, "step": 4641 }, { "epoch": 0.7448950936735267, "grad_norm": 12.200759887695312, "learning_rate": 1.567825635311721e-05, "loss": 32.6484, "step": 4642 }, { "epoch": 0.7450555622417459, "grad_norm": 11.945943832397461, "learning_rate": 1.5659629814279563e-05, "loss": 32.6406, "step": 4643 }, { "epoch": 0.7452160308099651, "grad_norm": 12.137755393981934, "learning_rate": 1.5641012292026807e-05, "loss": 32.6562, "step": 4644 }, { "epoch": 0.7453764993781843, "grad_norm": 12.208436965942383, "learning_rate": 1.5622403791247265e-05, "loss": 32.6484, "step": 4645 }, { "epoch": 0.7455369679464035, "grad_norm": 12.274641990661621, "learning_rate": 1.560380431682687e-05, "loss": 32.6328, "step": 4646 }, { "epoch": 0.7456974365146227, "grad_norm": 12.197702407836914, "learning_rate": 1.5585213873649203e-05, "loss": 32.6797, "step": 4647 }, { "epoch": 0.7458579050828419, "grad_norm": 12.20352554321289, "learning_rate": 1.5566632466595467e-05, "loss": 32.7031, "step": 4648 }, { "epoch": 0.746018373651061, "grad_norm": 12.155393600463867, "learning_rate": 1.5548060100544486e-05, "loss": 32.8047, "step": 4649 }, { "epoch": 0.7461788422192803, "grad_norm": 12.153889656066895, "learning_rate": 1.552949678037272e-05, "loss": 32.8672, "step": 4650 }, { "epoch": 0.7463393107874995, "grad_norm": 12.303956031799316, "learning_rate": 1.5510942510954257e-05, "loss": 33.0234, "step": 4651 }, { "epoch": 0.7464997793557187, "grad_norm": 12.015432357788086, "learning_rate": 1.5492397297160787e-05, "loss": 32.8125, "step": 4652 }, { "epoch": 0.7466602479239379, "grad_norm": 12.297836303710938, "learning_rate": 1.5473861143861652e-05, "loss": 32.7969, "step": 4653 }, { "epoch": 0.7468207164921571, "grad_norm": 12.221158981323242, "learning_rate": 1.5455334055923775e-05, "loss": 32.8594, "step": 4654 }, { "epoch": 0.7469811850603763, "grad_norm": 12.143050193786621, "learning_rate": 1.543681603821176e-05, "loss": 32.7656, "step": 4655 }, { "epoch": 0.7471416536285955, "grad_norm": 12.214700698852539, "learning_rate": 1.5418307095587777e-05, "loss": 32.7344, "step": 4656 }, { "epoch": 0.7473021221968147, "grad_norm": 12.01083755493164, "learning_rate": 1.5399807232911638e-05, "loss": 32.7266, "step": 4657 }, { "epoch": 0.7474625907650339, "grad_norm": 12.342293739318848, "learning_rate": 1.5381316455040744e-05, "loss": 32.6562, "step": 4658 }, { "epoch": 0.7476230593332531, "grad_norm": 12.270655632019043, "learning_rate": 1.5362834766830136e-05, "loss": 32.7383, "step": 4659 }, { "epoch": 0.7477835279014723, "grad_norm": 12.706460952758789, "learning_rate": 1.5344362173132448e-05, "loss": 32.6172, "step": 4660 }, { "epoch": 0.7479439964696915, "grad_norm": 12.202423095703125, "learning_rate": 1.5325898678797967e-05, "loss": 32.7656, "step": 4661 }, { "epoch": 0.7481044650379107, "grad_norm": 12.134648323059082, "learning_rate": 1.5307444288674557e-05, "loss": 32.6484, "step": 4662 }, { "epoch": 0.7482649336061299, "grad_norm": 11.957030296325684, "learning_rate": 1.528899900760769e-05, "loss": 32.7891, "step": 4663 }, { "epoch": 0.748425402174349, "grad_norm": 12.073202133178711, "learning_rate": 1.5270562840440454e-05, "loss": 32.7109, "step": 4664 }, { "epoch": 0.7485858707425683, "grad_norm": 12.085883140563965, "learning_rate": 1.525213579201355e-05, "loss": 32.8359, "step": 4665 }, { "epoch": 0.7487463393107875, "grad_norm": 12.276567459106445, "learning_rate": 1.5233717867165277e-05, "loss": 32.6641, "step": 4666 }, { "epoch": 0.7489068078790067, "grad_norm": 12.196990013122559, "learning_rate": 1.5215309070731521e-05, "loss": 32.625, "step": 4667 }, { "epoch": 0.7490672764472259, "grad_norm": 12.075723648071289, "learning_rate": 1.5196909407545823e-05, "loss": 32.7031, "step": 4668 }, { "epoch": 0.7492277450154451, "grad_norm": 12.326360702514648, "learning_rate": 1.5178518882439297e-05, "loss": 32.6016, "step": 4669 }, { "epoch": 0.7493882135836643, "grad_norm": 12.200350761413574, "learning_rate": 1.5160137500240623e-05, "loss": 32.6484, "step": 4670 }, { "epoch": 0.7495486821518835, "grad_norm": 12.076242446899414, "learning_rate": 1.5141765265776125e-05, "loss": 32.6641, "step": 4671 }, { "epoch": 0.7497091507201027, "grad_norm": 12.0060396194458, "learning_rate": 1.5123402183869706e-05, "loss": 32.6719, "step": 4672 }, { "epoch": 0.7498696192883219, "grad_norm": 12.133743286132812, "learning_rate": 1.5105048259342869e-05, "loss": 32.6797, "step": 4673 }, { "epoch": 0.7500300878565411, "grad_norm": 12.271866798400879, "learning_rate": 1.5086703497014738e-05, "loss": 32.5625, "step": 4674 }, { "epoch": 0.7501905564247603, "grad_norm": 12.210887908935547, "learning_rate": 1.5068367901701991e-05, "loss": 32.7109, "step": 4675 }, { "epoch": 0.7503510249929795, "grad_norm": 12.211675643920898, "learning_rate": 1.5050041478218924e-05, "loss": 32.6484, "step": 4676 }, { "epoch": 0.7505114935611987, "grad_norm": 12.145259857177734, "learning_rate": 1.5031724231377408e-05, "loss": 32.6406, "step": 4677 }, { "epoch": 0.7506719621294179, "grad_norm": 12.208637237548828, "learning_rate": 1.5013416165986927e-05, "loss": 32.5781, "step": 4678 }, { "epoch": 0.750832430697637, "grad_norm": 11.945272445678711, "learning_rate": 1.4995117286854526e-05, "loss": 32.7656, "step": 4679 }, { "epoch": 0.7509928992658563, "grad_norm": 12.072966575622559, "learning_rate": 1.4976827598784866e-05, "loss": 32.5781, "step": 4680 }, { "epoch": 0.7511533678340755, "grad_norm": 12.148838996887207, "learning_rate": 1.4958547106580173e-05, "loss": 32.6406, "step": 4681 }, { "epoch": 0.7513138364022947, "grad_norm": 11.945777893066406, "learning_rate": 1.4940275815040272e-05, "loss": 32.6875, "step": 4682 }, { "epoch": 0.7514743049705139, "grad_norm": 12.212858200073242, "learning_rate": 1.4922013728962564e-05, "loss": 32.6562, "step": 4683 }, { "epoch": 0.7516347735387331, "grad_norm": 12.138895988464355, "learning_rate": 1.490376085314204e-05, "loss": 32.6484, "step": 4684 }, { "epoch": 0.7517952421069523, "grad_norm": 12.198585510253906, "learning_rate": 1.4885517192371267e-05, "loss": 32.6328, "step": 4685 }, { "epoch": 0.7519557106751715, "grad_norm": 12.071463584899902, "learning_rate": 1.4867282751440404e-05, "loss": 32.625, "step": 4686 }, { "epoch": 0.7521161792433907, "grad_norm": 12.005583763122559, "learning_rate": 1.4849057535137151e-05, "loss": 32.6094, "step": 4687 }, { "epoch": 0.7522766478116099, "grad_norm": 12.199132919311523, "learning_rate": 1.4830841548246848e-05, "loss": 32.5781, "step": 4688 }, { "epoch": 0.7524371163798291, "grad_norm": 12.010945320129395, "learning_rate": 1.4812634795552371e-05, "loss": 32.6484, "step": 4689 }, { "epoch": 0.7525975849480483, "grad_norm": 12.288716316223145, "learning_rate": 1.4794437281834172e-05, "loss": 32.6484, "step": 4690 }, { "epoch": 0.7527580535162675, "grad_norm": 12.0238037109375, "learning_rate": 1.4776249011870291e-05, "loss": 32.7656, "step": 4691 }, { "epoch": 0.7529185220844867, "grad_norm": 12.449838638305664, "learning_rate": 1.475806999043633e-05, "loss": 32.6172, "step": 4692 }, { "epoch": 0.7530789906527059, "grad_norm": 11.95398998260498, "learning_rate": 1.4739900222305463e-05, "loss": 32.7812, "step": 4693 }, { "epoch": 0.7532394592209251, "grad_norm": 12.40337085723877, "learning_rate": 1.4721739712248445e-05, "loss": 32.5781, "step": 4694 }, { "epoch": 0.7533999277891443, "grad_norm": 12.212316513061523, "learning_rate": 1.4703588465033585e-05, "loss": 32.6797, "step": 4695 }, { "epoch": 0.7535603963573635, "grad_norm": 12.259669303894043, "learning_rate": 1.4685446485426773e-05, "loss": 32.5859, "step": 4696 }, { "epoch": 0.7537208649255827, "grad_norm": 12.139063835144043, "learning_rate": 1.4667313778191466e-05, "loss": 32.6719, "step": 4697 }, { "epoch": 0.7538813334938019, "grad_norm": 12.679699897766113, "learning_rate": 1.4649190348088675e-05, "loss": 32.625, "step": 4698 }, { "epoch": 0.7540418020620211, "grad_norm": 12.171676635742188, "learning_rate": 1.463107619987698e-05, "loss": 32.8594, "step": 4699 }, { "epoch": 0.7542022706302403, "grad_norm": 12.158451080322266, "learning_rate": 1.4612971338312531e-05, "loss": 32.8789, "step": 4700 }, { "epoch": 0.7543627391984595, "grad_norm": 12.215143203735352, "learning_rate": 1.4594875768149013e-05, "loss": 32.9375, "step": 4701 }, { "epoch": 0.7545232077666787, "grad_norm": 12.42088508605957, "learning_rate": 1.4576789494137728e-05, "loss": 32.9062, "step": 4702 }, { "epoch": 0.7546836763348979, "grad_norm": 12.223814964294434, "learning_rate": 1.4558712521027479e-05, "loss": 32.7812, "step": 4703 }, { "epoch": 0.7548441449031171, "grad_norm": 12.160947799682617, "learning_rate": 1.4540644853564655e-05, "loss": 32.7773, "step": 4704 }, { "epoch": 0.7550046134713363, "grad_norm": 12.531890869140625, "learning_rate": 1.4522586496493213e-05, "loss": 32.7812, "step": 4705 }, { "epoch": 0.7551650820395555, "grad_norm": 12.079865455627441, "learning_rate": 1.4504537454554618e-05, "loss": 32.7031, "step": 4706 }, { "epoch": 0.7553255506077747, "grad_norm": 12.27364444732666, "learning_rate": 1.4486497732487909e-05, "loss": 32.7031, "step": 4707 }, { "epoch": 0.7554860191759939, "grad_norm": 12.733564376831055, "learning_rate": 1.4468467335029734e-05, "loss": 32.6562, "step": 4708 }, { "epoch": 0.7556464877442131, "grad_norm": 12.204866409301758, "learning_rate": 1.445044626691422e-05, "loss": 32.7266, "step": 4709 }, { "epoch": 0.7558069563124323, "grad_norm": 12.072696685791016, "learning_rate": 1.443243453287308e-05, "loss": 32.6641, "step": 4710 }, { "epoch": 0.7559674248806515, "grad_norm": 12.07732105255127, "learning_rate": 1.4414432137635564e-05, "loss": 32.7188, "step": 4711 }, { "epoch": 0.7561278934488707, "grad_norm": 12.07228946685791, "learning_rate": 1.439643908592847e-05, "loss": 32.6797, "step": 4712 }, { "epoch": 0.7562883620170899, "grad_norm": 12.32885456085205, "learning_rate": 1.4378455382476159e-05, "loss": 32.6484, "step": 4713 }, { "epoch": 0.7564488305853091, "grad_norm": 12.324308395385742, "learning_rate": 1.4360481032000495e-05, "loss": 32.6719, "step": 4714 }, { "epoch": 0.7566092991535283, "grad_norm": 12.387351036071777, "learning_rate": 1.4342516039220955e-05, "loss": 32.625, "step": 4715 }, { "epoch": 0.7567697677217475, "grad_norm": 12.194647789001465, "learning_rate": 1.432456040885452e-05, "loss": 32.6094, "step": 4716 }, { "epoch": 0.7569302362899667, "grad_norm": 12.069535255432129, "learning_rate": 1.4306614145615681e-05, "loss": 32.6484, "step": 4717 }, { "epoch": 0.7570907048581859, "grad_norm": 12.26668643951416, "learning_rate": 1.4288677254216515e-05, "loss": 32.6094, "step": 4718 }, { "epoch": 0.7572511734264051, "grad_norm": 12.14069938659668, "learning_rate": 1.427074973936663e-05, "loss": 32.5703, "step": 4719 }, { "epoch": 0.7574116419946243, "grad_norm": 12.268500328063965, "learning_rate": 1.4252831605773147e-05, "loss": 32.6172, "step": 4720 }, { "epoch": 0.7575721105628435, "grad_norm": 12.134005546569824, "learning_rate": 1.4234922858140776e-05, "loss": 32.7031, "step": 4721 }, { "epoch": 0.7577325791310627, "grad_norm": 11.94255542755127, "learning_rate": 1.4217023501171711e-05, "loss": 32.6719, "step": 4722 }, { "epoch": 0.7578930476992819, "grad_norm": 12.33300495147705, "learning_rate": 1.4199133539565706e-05, "loss": 32.6797, "step": 4723 }, { "epoch": 0.7580535162675011, "grad_norm": 12.067501068115234, "learning_rate": 1.4181252978020043e-05, "loss": 32.6719, "step": 4724 }, { "epoch": 0.7582139848357203, "grad_norm": 12.196372032165527, "learning_rate": 1.4163381821229527e-05, "loss": 32.6172, "step": 4725 }, { "epoch": 0.7583744534039395, "grad_norm": 12.071733474731445, "learning_rate": 1.4145520073886503e-05, "loss": 32.7109, "step": 4726 }, { "epoch": 0.7585349219721587, "grad_norm": 12.41407585144043, "learning_rate": 1.4127667740680844e-05, "loss": 32.6484, "step": 4727 }, { "epoch": 0.7586953905403779, "grad_norm": 12.2002534866333, "learning_rate": 1.4109824826299955e-05, "loss": 32.7031, "step": 4728 }, { "epoch": 0.7588558591085971, "grad_norm": 12.387828826904297, "learning_rate": 1.409199133542876e-05, "loss": 32.6328, "step": 4729 }, { "epoch": 0.7590163276768163, "grad_norm": 12.069112777709961, "learning_rate": 1.4074167272749716e-05, "loss": 32.6016, "step": 4730 }, { "epoch": 0.7591767962450355, "grad_norm": 12.074650764465332, "learning_rate": 1.4056352642942793e-05, "loss": 32.6875, "step": 4731 }, { "epoch": 0.7593372648132547, "grad_norm": 12.32242488861084, "learning_rate": 1.4038547450685502e-05, "loss": 32.5312, "step": 4732 }, { "epoch": 0.7594977333814739, "grad_norm": 12.138854026794434, "learning_rate": 1.4020751700652856e-05, "loss": 32.5859, "step": 4733 }, { "epoch": 0.7596582019496931, "grad_norm": 12.00742244720459, "learning_rate": 1.4002965397517392e-05, "loss": 32.7031, "step": 4734 }, { "epoch": 0.7598186705179123, "grad_norm": 12.200458526611328, "learning_rate": 1.3985188545949201e-05, "loss": 32.5547, "step": 4735 }, { "epoch": 0.7599791390861315, "grad_norm": 12.023611068725586, "learning_rate": 1.396742115061585e-05, "loss": 32.6875, "step": 4736 }, { "epoch": 0.7601396076543507, "grad_norm": 12.64834976196289, "learning_rate": 1.394966321618244e-05, "loss": 32.6094, "step": 4737 }, { "epoch": 0.7603000762225699, "grad_norm": 12.130545616149902, "learning_rate": 1.393191474731158e-05, "loss": 32.6172, "step": 4738 }, { "epoch": 0.7604605447907891, "grad_norm": 12.199201583862305, "learning_rate": 1.391417574866341e-05, "loss": 32.6016, "step": 4739 }, { "epoch": 0.7606210133590083, "grad_norm": 12.277935028076172, "learning_rate": 1.3896446224895559e-05, "loss": 32.6797, "step": 4740 }, { "epoch": 0.7607814819272275, "grad_norm": 12.538034439086914, "learning_rate": 1.3878726180663199e-05, "loss": 32.7031, "step": 4741 }, { "epoch": 0.7609419504954467, "grad_norm": 12.583928108215332, "learning_rate": 1.3861015620618978e-05, "loss": 32.5469, "step": 4742 }, { "epoch": 0.7611024190636659, "grad_norm": 12.269360542297363, "learning_rate": 1.384331454941309e-05, "loss": 32.6562, "step": 4743 }, { "epoch": 0.7612628876318851, "grad_norm": 12.333420753479004, "learning_rate": 1.3825622971693202e-05, "loss": 32.6562, "step": 4744 }, { "epoch": 0.7614233562001043, "grad_norm": 12.39629077911377, "learning_rate": 1.380794089210452e-05, "loss": 32.6562, "step": 4745 }, { "epoch": 0.7615838247683235, "grad_norm": 12.26362419128418, "learning_rate": 1.3790268315289739e-05, "loss": 32.625, "step": 4746 }, { "epoch": 0.7617442933365427, "grad_norm": 12.333793640136719, "learning_rate": 1.3772605245889053e-05, "loss": 32.5625, "step": 4747 }, { "epoch": 0.7619047619047619, "grad_norm": 12.096818923950195, "learning_rate": 1.3754951688540168e-05, "loss": 32.8438, "step": 4748 }, { "epoch": 0.7620652304729811, "grad_norm": 12.182770729064941, "learning_rate": 1.373730764787831e-05, "loss": 32.9688, "step": 4749 }, { "epoch": 0.7622256990412003, "grad_norm": 12.330718040466309, "learning_rate": 1.3719673128536187e-05, "loss": 33.1094, "step": 4750 }, { "epoch": 0.7623861676094195, "grad_norm": 12.035865783691406, "learning_rate": 1.3702048135144002e-05, "loss": 32.9844, "step": 4751 }, { "epoch": 0.7625466361776387, "grad_norm": 12.39798641204834, "learning_rate": 1.3684432672329478e-05, "loss": 32.6641, "step": 4752 }, { "epoch": 0.7627071047458579, "grad_norm": 12.455376625061035, "learning_rate": 1.3666826744717792e-05, "loss": 32.7969, "step": 4753 }, { "epoch": 0.7628675733140771, "grad_norm": 12.092351913452148, "learning_rate": 1.3649230356931653e-05, "loss": 32.7734, "step": 4754 }, { "epoch": 0.7630280418822963, "grad_norm": 12.484586715698242, "learning_rate": 1.363164351359128e-05, "loss": 32.6953, "step": 4755 }, { "epoch": 0.7631885104505155, "grad_norm": 12.211262702941895, "learning_rate": 1.3614066219314358e-05, "loss": 32.8203, "step": 4756 }, { "epoch": 0.7633489790187347, "grad_norm": 12.075260162353516, "learning_rate": 1.3596498478716075e-05, "loss": 32.75, "step": 4757 }, { "epoch": 0.7635094475869539, "grad_norm": 12.140669822692871, "learning_rate": 1.3578940296409098e-05, "loss": 32.7656, "step": 4758 }, { "epoch": 0.7636699161551731, "grad_norm": 12.457956314086914, "learning_rate": 1.35613916770036e-05, "loss": 32.5781, "step": 4759 }, { "epoch": 0.7638303847233923, "grad_norm": 11.946218490600586, "learning_rate": 1.354385262510724e-05, "loss": 32.7344, "step": 4760 }, { "epoch": 0.7639908532916115, "grad_norm": 12.083076477050781, "learning_rate": 1.3526323145325137e-05, "loss": 32.7344, "step": 4761 }, { "epoch": 0.7641513218598307, "grad_norm": 12.145844459533691, "learning_rate": 1.350880324225996e-05, "loss": 32.7344, "step": 4762 }, { "epoch": 0.7643117904280499, "grad_norm": 12.212014198303223, "learning_rate": 1.3491292920511806e-05, "loss": 32.7266, "step": 4763 }, { "epoch": 0.7644722589962691, "grad_norm": 12.202581405639648, "learning_rate": 1.3473792184678296e-05, "loss": 32.5938, "step": 4764 }, { "epoch": 0.7646327275644883, "grad_norm": 12.211045265197754, "learning_rate": 1.345630103935448e-05, "loss": 32.6875, "step": 4765 }, { "epoch": 0.7647931961327075, "grad_norm": 12.006744384765625, "learning_rate": 1.3438819489132937e-05, "loss": 32.6406, "step": 4766 }, { "epoch": 0.7649536647009267, "grad_norm": 12.341553688049316, "learning_rate": 1.3421347538603696e-05, "loss": 32.6328, "step": 4767 }, { "epoch": 0.7651141332691459, "grad_norm": 12.202773094177246, "learning_rate": 1.340388519235432e-05, "loss": 32.7188, "step": 4768 }, { "epoch": 0.7652746018373651, "grad_norm": 12.285533905029297, "learning_rate": 1.3386432454969794e-05, "loss": 32.6797, "step": 4769 }, { "epoch": 0.7654350704055843, "grad_norm": 12.2703857421875, "learning_rate": 1.3368989331032594e-05, "loss": 32.6719, "step": 4770 }, { "epoch": 0.7655955389738035, "grad_norm": 12.07373046875, "learning_rate": 1.3351555825122686e-05, "loss": 32.625, "step": 4771 }, { "epoch": 0.7657560075420227, "grad_norm": 12.20146656036377, "learning_rate": 1.3334131941817496e-05, "loss": 32.6094, "step": 4772 }, { "epoch": 0.7659164761102419, "grad_norm": 12.322728157043457, "learning_rate": 1.3316717685691926e-05, "loss": 32.6484, "step": 4773 }, { "epoch": 0.7660769446784611, "grad_norm": 12.392518997192383, "learning_rate": 1.3299313061318364e-05, "loss": 32.5859, "step": 4774 }, { "epoch": 0.7662374132466803, "grad_norm": 11.955184936523438, "learning_rate": 1.3281918073266642e-05, "loss": 32.7578, "step": 4775 }, { "epoch": 0.7663978818148995, "grad_norm": 12.552698135375977, "learning_rate": 1.326453272610409e-05, "loss": 32.6953, "step": 4776 }, { "epoch": 0.7665583503831187, "grad_norm": 12.135266304016113, "learning_rate": 1.324715702439549e-05, "loss": 32.6562, "step": 4777 }, { "epoch": 0.7667188189513379, "grad_norm": 12.070589065551758, "learning_rate": 1.3229790972703098e-05, "loss": 32.6172, "step": 4778 }, { "epoch": 0.7668792875195571, "grad_norm": 12.196195602416992, "learning_rate": 1.3212434575586635e-05, "loss": 32.5938, "step": 4779 }, { "epoch": 0.7670397560877763, "grad_norm": 12.268302917480469, "learning_rate": 1.3195087837603281e-05, "loss": 32.6953, "step": 4780 }, { "epoch": 0.7672002246559955, "grad_norm": 12.202554702758789, "learning_rate": 1.3177750763307672e-05, "loss": 32.5781, "step": 4781 }, { "epoch": 0.7673606932242147, "grad_norm": 12.009183883666992, "learning_rate": 1.3160423357251956e-05, "loss": 32.6367, "step": 4782 }, { "epoch": 0.7675211617924339, "grad_norm": 12.512388229370117, "learning_rate": 1.314310562398568e-05, "loss": 32.6094, "step": 4783 }, { "epoch": 0.7676816303606531, "grad_norm": 12.207971572875977, "learning_rate": 1.3125797568055886e-05, "loss": 32.5625, "step": 4784 }, { "epoch": 0.7678420989288723, "grad_norm": 12.204916954040527, "learning_rate": 1.3108499194007063e-05, "loss": 32.6016, "step": 4785 }, { "epoch": 0.7680025674970915, "grad_norm": 12.273655891418457, "learning_rate": 1.3091210506381168e-05, "loss": 32.5781, "step": 4786 }, { "epoch": 0.7681630360653107, "grad_norm": 12.138689994812012, "learning_rate": 1.3073931509717595e-05, "loss": 32.5859, "step": 4787 }, { "epoch": 0.7683235046335299, "grad_norm": 12.195060729980469, "learning_rate": 1.3056662208553223e-05, "loss": 32.6094, "step": 4788 }, { "epoch": 0.7684839732017491, "grad_norm": 12.81103229522705, "learning_rate": 1.3039402607422353e-05, "loss": 33.3125, "step": 4789 }, { "epoch": 0.7686444417699683, "grad_norm": 12.264261245727539, "learning_rate": 1.302215271085676e-05, "loss": 32.625, "step": 4790 }, { "epoch": 0.7688049103381875, "grad_norm": 12.082637786865234, "learning_rate": 1.3004912523385676e-05, "loss": 32.7422, "step": 4791 }, { "epoch": 0.7689653789064067, "grad_norm": 12.328145027160645, "learning_rate": 1.2987682049535759e-05, "loss": 32.6562, "step": 4792 }, { "epoch": 0.7691258474746259, "grad_norm": 12.391850471496582, "learning_rate": 1.2970461293831142e-05, "loss": 32.625, "step": 4793 }, { "epoch": 0.7692863160428451, "grad_norm": 12.0801362991333, "learning_rate": 1.2953250260793393e-05, "loss": 32.6484, "step": 4794 }, { "epoch": 0.7694467846110643, "grad_norm": 12.137322425842285, "learning_rate": 1.293604895494151e-05, "loss": 32.6484, "step": 4795 }, { "epoch": 0.7696072531792835, "grad_norm": 12.516097068786621, "learning_rate": 1.2918857380791988e-05, "loss": 32.5703, "step": 4796 }, { "epoch": 0.7697677217475027, "grad_norm": 12.196013450622559, "learning_rate": 1.2901675542858727e-05, "loss": 32.6953, "step": 4797 }, { "epoch": 0.7699281903157219, "grad_norm": 12.073221206665039, "learning_rate": 1.2884503445653068e-05, "loss": 32.6797, "step": 4798 }, { "epoch": 0.7700886588839411, "grad_norm": 12.317594528198242, "learning_rate": 1.2867341093683833e-05, "loss": 32.8984, "step": 4799 }, { "epoch": 0.7702491274521603, "grad_norm": 12.258868217468262, "learning_rate": 1.2850188491457215e-05, "loss": 33.1328, "step": 4800 }, { "epoch": 0.7704095960203795, "grad_norm": 12.394037246704102, "learning_rate": 1.2833045643476898e-05, "loss": 33.0078, "step": 4801 }, { "epoch": 0.7705700645885987, "grad_norm": 12.284249305725098, "learning_rate": 1.281591255424403e-05, "loss": 32.7969, "step": 4802 }, { "epoch": 0.7707305331568179, "grad_norm": 12.810078620910645, "learning_rate": 1.2798789228257141e-05, "loss": 32.8047, "step": 4803 }, { "epoch": 0.7708910017250371, "grad_norm": 12.520668983459473, "learning_rate": 1.2781675670012223e-05, "loss": 32.6719, "step": 4804 }, { "epoch": 0.7710514702932563, "grad_norm": 12.406957626342773, "learning_rate": 1.2764571884002697e-05, "loss": 32.7031, "step": 4805 }, { "epoch": 0.7712119388614755, "grad_norm": 12.20237922668457, "learning_rate": 1.2747477874719422e-05, "loss": 32.6406, "step": 4806 }, { "epoch": 0.7713724074296947, "grad_norm": 12.348761558532715, "learning_rate": 1.2730393646650695e-05, "loss": 32.8086, "step": 4807 }, { "epoch": 0.7715328759979139, "grad_norm": 12.516679763793945, "learning_rate": 1.2713319204282232e-05, "loss": 32.6328, "step": 4808 }, { "epoch": 0.7716933445661331, "grad_norm": 11.94929313659668, "learning_rate": 1.2696254552097181e-05, "loss": 32.75, "step": 4809 }, { "epoch": 0.7718538131343523, "grad_norm": 12.385716438293457, "learning_rate": 1.2679199694576144e-05, "loss": 32.5859, "step": 4810 }, { "epoch": 0.7720142817025715, "grad_norm": 12.473901748657227, "learning_rate": 1.2662154636197137e-05, "loss": 32.6328, "step": 4811 }, { "epoch": 0.7721747502707907, "grad_norm": 12.3233642578125, "learning_rate": 1.2645119381435572e-05, "loss": 32.5781, "step": 4812 }, { "epoch": 0.7723352188390099, "grad_norm": 12.1383056640625, "learning_rate": 1.2628093934764323e-05, "loss": 32.6406, "step": 4813 }, { "epoch": 0.7724956874072291, "grad_norm": 12.136890411376953, "learning_rate": 1.2611078300653684e-05, "loss": 32.7344, "step": 4814 }, { "epoch": 0.7726561559754483, "grad_norm": 12.016472816467285, "learning_rate": 1.259407248357135e-05, "loss": 32.7188, "step": 4815 }, { "epoch": 0.7728166245436675, "grad_norm": 12.13024616241455, "learning_rate": 1.2577076487982487e-05, "loss": 32.6172, "step": 4816 }, { "epoch": 0.7729770931118867, "grad_norm": 12.131903648376465, "learning_rate": 1.2560090318349638e-05, "loss": 32.625, "step": 4817 }, { "epoch": 0.773137561680106, "grad_norm": 12.43281364440918, "learning_rate": 1.254311397913277e-05, "loss": 32.7344, "step": 4818 }, { "epoch": 0.7732980302483251, "grad_norm": 12.714970588684082, "learning_rate": 1.2526147474789291e-05, "loss": 32.5859, "step": 4819 }, { "epoch": 0.7734584988165443, "grad_norm": 12.257752418518066, "learning_rate": 1.2509190809774007e-05, "loss": 32.7578, "step": 4820 }, { "epoch": 0.7736189673847635, "grad_norm": 12.411235809326172, "learning_rate": 1.2492243988539154e-05, "loss": 32.6562, "step": 4821 }, { "epoch": 0.7737794359529827, "grad_norm": 12.199987411499023, "learning_rate": 1.2475307015534354e-05, "loss": 32.6172, "step": 4822 }, { "epoch": 0.7739399045212019, "grad_norm": 12.195033073425293, "learning_rate": 1.2458379895206713e-05, "loss": 32.6094, "step": 4823 }, { "epoch": 0.7741003730894211, "grad_norm": 12.075008392333984, "learning_rate": 1.2441462632000661e-05, "loss": 32.6797, "step": 4824 }, { "epoch": 0.7742608416576403, "grad_norm": 12.007936477661133, "learning_rate": 1.2424555230358093e-05, "loss": 32.625, "step": 4825 }, { "epoch": 0.7744213102258595, "grad_norm": 12.136594772338867, "learning_rate": 1.2407657694718306e-05, "loss": 32.6641, "step": 4826 }, { "epoch": 0.7745817787940787, "grad_norm": 12.261701583862305, "learning_rate": 1.2390770029518007e-05, "loss": 32.625, "step": 4827 }, { "epoch": 0.7747422473622979, "grad_norm": 12.013284683227539, "learning_rate": 1.2373892239191288e-05, "loss": 32.6953, "step": 4828 }, { "epoch": 0.7749027159305171, "grad_norm": 11.94320297241211, "learning_rate": 1.2357024328169703e-05, "loss": 32.6484, "step": 4829 }, { "epoch": 0.7750631844987363, "grad_norm": 12.52331829071045, "learning_rate": 1.2340166300882155e-05, "loss": 32.6953, "step": 4830 }, { "epoch": 0.7752236530669555, "grad_norm": 12.076908111572266, "learning_rate": 1.2323318161754987e-05, "loss": 32.75, "step": 4831 }, { "epoch": 0.7753841216351747, "grad_norm": 12.203329086303711, "learning_rate": 1.2306479915211928e-05, "loss": 32.5781, "step": 4832 }, { "epoch": 0.775544590203394, "grad_norm": 12.133615493774414, "learning_rate": 1.2289651565674114e-05, "loss": 32.5938, "step": 4833 }, { "epoch": 0.7757050587716131, "grad_norm": 12.079315185546875, "learning_rate": 1.2272833117560078e-05, "loss": 32.6484, "step": 4834 }, { "epoch": 0.7758655273398323, "grad_norm": 12.00408935546875, "learning_rate": 1.2256024575285774e-05, "loss": 32.625, "step": 4835 }, { "epoch": 0.7760259959080515, "grad_norm": 12.334023475646973, "learning_rate": 1.2239225943264521e-05, "loss": 32.6797, "step": 4836 }, { "epoch": 0.7761864644762707, "grad_norm": 12.07532024383545, "learning_rate": 1.2222437225907068e-05, "loss": 32.8281, "step": 4837 }, { "epoch": 0.7763469330444899, "grad_norm": 12.468573570251465, "learning_rate": 1.2205658427621542e-05, "loss": 32.6797, "step": 4838 }, { "epoch": 0.7765074016127091, "grad_norm": 12.076621055603027, "learning_rate": 1.218888955281347e-05, "loss": 32.707, "step": 4839 }, { "epoch": 0.7766678701809283, "grad_norm": 12.087812423706055, "learning_rate": 1.217213060588578e-05, "loss": 32.7578, "step": 4840 }, { "epoch": 0.7768283387491475, "grad_norm": 12.213922500610352, "learning_rate": 1.2155381591238779e-05, "loss": 32.6914, "step": 4841 }, { "epoch": 0.7769888073173667, "grad_norm": 12.07425308227539, "learning_rate": 1.2138642513270166e-05, "loss": 32.6172, "step": 4842 }, { "epoch": 0.7771492758855859, "grad_norm": 12.651140213012695, "learning_rate": 1.2121913376375066e-05, "loss": 32.5703, "step": 4843 }, { "epoch": 0.7773097444538051, "grad_norm": 12.272921562194824, "learning_rate": 1.2105194184945955e-05, "loss": 32.7031, "step": 4844 }, { "epoch": 0.7774702130220243, "grad_norm": 12.322585105895996, "learning_rate": 1.2088484943372714e-05, "loss": 32.6094, "step": 4845 }, { "epoch": 0.7776306815902435, "grad_norm": 12.147456169128418, "learning_rate": 1.207178565604261e-05, "loss": 32.6328, "step": 4846 }, { "epoch": 0.7777911501584627, "grad_norm": 12.13424015045166, "learning_rate": 1.2055096327340282e-05, "loss": 32.6562, "step": 4847 }, { "epoch": 0.777951618726682, "grad_norm": 12.344743728637695, "learning_rate": 1.2038416961647752e-05, "loss": 32.6406, "step": 4848 }, { "epoch": 0.7781120872949011, "grad_norm": 12.141195297241211, "learning_rate": 1.2021747563344477e-05, "loss": 32.6797, "step": 4849 }, { "epoch": 0.7782725558631203, "grad_norm": 12.43612003326416, "learning_rate": 1.200508813680724e-05, "loss": 32.8906, "step": 4850 }, { "epoch": 0.7784330244313395, "grad_norm": 12.400596618652344, "learning_rate": 1.1988438686410236e-05, "loss": 32.8828, "step": 4851 }, { "epoch": 0.7785934929995587, "grad_norm": 12.40751838684082, "learning_rate": 1.197179921652502e-05, "loss": 32.9219, "step": 4852 }, { "epoch": 0.7787539615677779, "grad_norm": 12.224037170410156, "learning_rate": 1.1955169731520544e-05, "loss": 32.8125, "step": 4853 }, { "epoch": 0.7789144301359971, "grad_norm": 12.345669746398926, "learning_rate": 1.193855023576313e-05, "loss": 32.7734, "step": 4854 }, { "epoch": 0.7790748987042163, "grad_norm": 12.20728588104248, "learning_rate": 1.1921940733616472e-05, "loss": 32.7188, "step": 4855 }, { "epoch": 0.7792353672724355, "grad_norm": 11.94533634185791, "learning_rate": 1.1905341229441642e-05, "loss": 32.7578, "step": 4856 }, { "epoch": 0.7793958358406547, "grad_norm": 12.33561897277832, "learning_rate": 1.1888751727597114e-05, "loss": 32.7031, "step": 4857 }, { "epoch": 0.7795563044088739, "grad_norm": 12.145379066467285, "learning_rate": 1.1872172232438717e-05, "loss": 32.7344, "step": 4858 }, { "epoch": 0.7797167729770931, "grad_norm": 12.154006958007812, "learning_rate": 1.1855602748319617e-05, "loss": 32.8125, "step": 4859 }, { "epoch": 0.7798772415453123, "grad_norm": 12.017960548400879, "learning_rate": 1.1839043279590401e-05, "loss": 32.7578, "step": 4860 }, { "epoch": 0.7800377101135315, "grad_norm": 12.074902534484863, "learning_rate": 1.1822493830599002e-05, "loss": 32.7734, "step": 4861 }, { "epoch": 0.7801981786817507, "grad_norm": 12.200699806213379, "learning_rate": 1.1805954405690722e-05, "loss": 32.625, "step": 4862 }, { "epoch": 0.78035864724997, "grad_norm": 12.135663032531738, "learning_rate": 1.1789425009208266e-05, "loss": 32.6797, "step": 4863 }, { "epoch": 0.7805191158181891, "grad_norm": 12.140868186950684, "learning_rate": 1.1772905645491662e-05, "loss": 32.75, "step": 4864 }, { "epoch": 0.7806795843864083, "grad_norm": 12.256882667541504, "learning_rate": 1.175639631887831e-05, "loss": 32.6875, "step": 4865 }, { "epoch": 0.7808400529546276, "grad_norm": 12.455209732055664, "learning_rate": 1.1739897033703002e-05, "loss": 32.7109, "step": 4866 }, { "epoch": 0.7810005215228467, "grad_norm": 12.012426376342773, "learning_rate": 1.1723407794297858e-05, "loss": 32.7031, "step": 4867 }, { "epoch": 0.7811609900910659, "grad_norm": 11.881999969482422, "learning_rate": 1.170692860499239e-05, "loss": 32.6953, "step": 4868 }, { "epoch": 0.7813214586592852, "grad_norm": 12.074705123901367, "learning_rate": 1.169045947011344e-05, "loss": 32.7266, "step": 4869 }, { "epoch": 0.7814819272275043, "grad_norm": 12.010941505432129, "learning_rate": 1.1674000393985273e-05, "loss": 32.6719, "step": 4870 }, { "epoch": 0.7816423957957235, "grad_norm": 12.13786506652832, "learning_rate": 1.1657551380929427e-05, "loss": 32.6562, "step": 4871 }, { "epoch": 0.7818028643639428, "grad_norm": 12.267618179321289, "learning_rate": 1.1641112435264845e-05, "loss": 32.6719, "step": 4872 }, { "epoch": 0.7819633329321619, "grad_norm": 12.197000503540039, "learning_rate": 1.1624683561307837e-05, "loss": 32.6562, "step": 4873 }, { "epoch": 0.7821238015003811, "grad_norm": 12.389264106750488, "learning_rate": 1.160826476337204e-05, "loss": 32.6328, "step": 4874 }, { "epoch": 0.7822842700686004, "grad_norm": 12.993374824523926, "learning_rate": 1.159185604576845e-05, "loss": 32.6328, "step": 4875 }, { "epoch": 0.7824447386368195, "grad_norm": 12.259291648864746, "learning_rate": 1.1575457412805458e-05, "loss": 32.5703, "step": 4876 }, { "epoch": 0.7826052072050387, "grad_norm": 12.271653175354004, "learning_rate": 1.1559068868788748e-05, "loss": 32.6289, "step": 4877 }, { "epoch": 0.782765675773258, "grad_norm": 12.135534286499023, "learning_rate": 1.154269041802139e-05, "loss": 32.6562, "step": 4878 }, { "epoch": 0.7829261443414771, "grad_norm": 12.452408790588379, "learning_rate": 1.152632206480379e-05, "loss": 32.5586, "step": 4879 }, { "epoch": 0.7830866129096963, "grad_norm": 12.072678565979004, "learning_rate": 1.1509963813433705e-05, "loss": 32.6172, "step": 4880 }, { "epoch": 0.7832470814779156, "grad_norm": 12.542755126953125, "learning_rate": 1.1493615668206247e-05, "loss": 32.5234, "step": 4881 }, { "epoch": 0.7834075500461347, "grad_norm": 12.13345718383789, "learning_rate": 1.1477277633413869e-05, "loss": 32.625, "step": 4882 }, { "epoch": 0.7835680186143539, "grad_norm": 12.200021743774414, "learning_rate": 1.1460949713346369e-05, "loss": 32.6172, "step": 4883 }, { "epoch": 0.7837284871825732, "grad_norm": 12.146315574645996, "learning_rate": 1.1444631912290887e-05, "loss": 32.6953, "step": 4884 }, { "epoch": 0.7838889557507923, "grad_norm": 12.260954856872559, "learning_rate": 1.1428324234531918e-05, "loss": 32.5547, "step": 4885 }, { "epoch": 0.7840494243190115, "grad_norm": 12.073545455932617, "learning_rate": 1.1412026684351274e-05, "loss": 32.6016, "step": 4886 }, { "epoch": 0.7842098928872308, "grad_norm": 12.399402618408203, "learning_rate": 1.1395739266028138e-05, "loss": 32.5938, "step": 4887 }, { "epoch": 0.7843703614554499, "grad_norm": 12.39229679107666, "learning_rate": 1.1379461983839017e-05, "loss": 32.5312, "step": 4888 }, { "epoch": 0.7845308300236691, "grad_norm": 12.27211856842041, "learning_rate": 1.136319484205774e-05, "loss": 32.6055, "step": 4889 }, { "epoch": 0.7846912985918884, "grad_norm": 12.14094066619873, "learning_rate": 1.1346937844955519e-05, "loss": 32.6484, "step": 4890 }, { "epoch": 0.7848517671601075, "grad_norm": 12.19602108001709, "learning_rate": 1.1330690996800869e-05, "loss": 32.6016, "step": 4891 }, { "epoch": 0.7850122357283267, "grad_norm": 13.534175872802734, "learning_rate": 1.1314454301859634e-05, "loss": 32.4922, "step": 4892 }, { "epoch": 0.785172704296546, "grad_norm": 12.41919994354248, "learning_rate": 1.1298227764395026e-05, "loss": 32.6797, "step": 4893 }, { "epoch": 0.7853331728647651, "grad_norm": 12.141838073730469, "learning_rate": 1.128201138866754e-05, "loss": 32.7344, "step": 4894 }, { "epoch": 0.7854936414329843, "grad_norm": 12.33132553100586, "learning_rate": 1.1265805178935034e-05, "loss": 32.6172, "step": 4895 }, { "epoch": 0.7856541100012036, "grad_norm": 12.139928817749023, "learning_rate": 1.1249609139452715e-05, "loss": 32.6797, "step": 4896 }, { "epoch": 0.7858145785694227, "grad_norm": 12.398480415344238, "learning_rate": 1.1233423274473093e-05, "loss": 32.6328, "step": 4897 }, { "epoch": 0.7859750471376419, "grad_norm": 12.39870834350586, "learning_rate": 1.1217247588246005e-05, "loss": 32.6562, "step": 4898 }, { "epoch": 0.7861355157058612, "grad_norm": 12.455316543579102, "learning_rate": 1.1201082085018627e-05, "loss": 32.7344, "step": 4899 }, { "epoch": 0.7862959842740803, "grad_norm": 12.272859573364258, "learning_rate": 1.1184926769035458e-05, "loss": 32.7734, "step": 4900 }, { "epoch": 0.7864564528422995, "grad_norm": 12.358118057250977, "learning_rate": 1.1168781644538317e-05, "loss": 32.9844, "step": 4901 }, { "epoch": 0.7866169214105188, "grad_norm": 12.280301094055176, "learning_rate": 1.1152646715766352e-05, "loss": 32.7656, "step": 4902 }, { "epoch": 0.7867773899787379, "grad_norm": 12.150636672973633, "learning_rate": 1.1136521986956022e-05, "loss": 32.7891, "step": 4903 }, { "epoch": 0.7869378585469571, "grad_norm": 11.961244583129883, "learning_rate": 1.1120407462341142e-05, "loss": 32.8594, "step": 4904 }, { "epoch": 0.7870983271151764, "grad_norm": 12.2129487991333, "learning_rate": 1.1104303146152828e-05, "loss": 32.7344, "step": 4905 }, { "epoch": 0.7872587956833955, "grad_norm": 12.276178359985352, "learning_rate": 1.108820904261949e-05, "loss": 32.7422, "step": 4906 }, { "epoch": 0.7874192642516147, "grad_norm": 12.333955764770508, "learning_rate": 1.1072125155966884e-05, "loss": 32.7109, "step": 4907 }, { "epoch": 0.787579732819834, "grad_norm": 12.07317066192627, "learning_rate": 1.1056051490418078e-05, "loss": 32.6797, "step": 4908 }, { "epoch": 0.7877402013880531, "grad_norm": 12.207662582397461, "learning_rate": 1.1039988050193456e-05, "loss": 32.6953, "step": 4909 }, { "epoch": 0.7879006699562723, "grad_norm": 12.007636070251465, "learning_rate": 1.102393483951073e-05, "loss": 32.6094, "step": 4910 }, { "epoch": 0.7880611385244916, "grad_norm": 12.075111389160156, "learning_rate": 1.1007891862584906e-05, "loss": 32.7422, "step": 4911 }, { "epoch": 0.7882216070927107, "grad_norm": 12.262367248535156, "learning_rate": 1.0991859123628313e-05, "loss": 32.6562, "step": 4912 }, { "epoch": 0.7883820756609299, "grad_norm": 12.074309349060059, "learning_rate": 1.0975836626850583e-05, "loss": 32.7812, "step": 4913 }, { "epoch": 0.7885425442291492, "grad_norm": 12.52382755279541, "learning_rate": 1.0959824376458678e-05, "loss": 32.7344, "step": 4914 }, { "epoch": 0.7887030127973683, "grad_norm": 12.016456604003906, "learning_rate": 1.0943822376656843e-05, "loss": 32.7656, "step": 4915 }, { "epoch": 0.7888634813655875, "grad_norm": 12.3966064453125, "learning_rate": 1.092783063164664e-05, "loss": 32.6562, "step": 4916 }, { "epoch": 0.7890239499338068, "grad_norm": 12.786572456359863, "learning_rate": 1.0911849145626985e-05, "loss": 32.5859, "step": 4917 }, { "epoch": 0.7891844185020259, "grad_norm": 12.133132934570312, "learning_rate": 1.0895877922794017e-05, "loss": 32.6719, "step": 4918 }, { "epoch": 0.7893448870702451, "grad_norm": 12.449256896972656, "learning_rate": 1.087991696734123e-05, "loss": 32.5625, "step": 4919 }, { "epoch": 0.7895053556384644, "grad_norm": 12.345396041870117, "learning_rate": 1.0863966283459432e-05, "loss": 32.7422, "step": 4920 }, { "epoch": 0.7896658242066835, "grad_norm": 12.131916999816895, "learning_rate": 1.0848025875336697e-05, "loss": 32.6953, "step": 4921 }, { "epoch": 0.7898262927749027, "grad_norm": 12.19572925567627, "learning_rate": 1.0832095747158422e-05, "loss": 32.7188, "step": 4922 }, { "epoch": 0.789986761343122, "grad_norm": 12.265003204345703, "learning_rate": 1.0816175903107323e-05, "loss": 32.6328, "step": 4923 }, { "epoch": 0.7901472299113411, "grad_norm": 11.947881698608398, "learning_rate": 1.0800266347363386e-05, "loss": 32.7266, "step": 4924 }, { "epoch": 0.7903076984795603, "grad_norm": 12.136727333068848, "learning_rate": 1.0784367084103903e-05, "loss": 32.6094, "step": 4925 }, { "epoch": 0.7904681670477796, "grad_norm": 12.331693649291992, "learning_rate": 1.0768478117503467e-05, "loss": 32.5547, "step": 4926 }, { "epoch": 0.7906286356159987, "grad_norm": 12.075722694396973, "learning_rate": 1.0752599451733975e-05, "loss": 32.6328, "step": 4927 }, { "epoch": 0.7907891041842179, "grad_norm": 11.882923126220703, "learning_rate": 1.0736731090964603e-05, "loss": 32.6562, "step": 4928 }, { "epoch": 0.7909495727524372, "grad_norm": 12.5993013381958, "learning_rate": 1.0720873039361828e-05, "loss": 32.5703, "step": 4929 }, { "epoch": 0.7911100413206563, "grad_norm": 12.266982078552246, "learning_rate": 1.0705025301089433e-05, "loss": 32.5938, "step": 4930 }, { "epoch": 0.7912705098888755, "grad_norm": 12.394674301147461, "learning_rate": 1.0689187880308471e-05, "loss": 32.707, "step": 4931 }, { "epoch": 0.7914309784570948, "grad_norm": 12.142427444458008, "learning_rate": 1.06733607811773e-05, "loss": 32.6797, "step": 4932 }, { "epoch": 0.7915914470253139, "grad_norm": 12.006093978881836, "learning_rate": 1.0657544007851566e-05, "loss": 32.5938, "step": 4933 }, { "epoch": 0.7917519155935331, "grad_norm": 12.326864242553711, "learning_rate": 1.0641737564484206e-05, "loss": 32.5469, "step": 4934 }, { "epoch": 0.7919123841617524, "grad_norm": 12.60529899597168, "learning_rate": 1.0625941455225435e-05, "loss": 32.6641, "step": 4935 }, { "epoch": 0.7920728527299715, "grad_norm": 12.135295867919922, "learning_rate": 1.0610155684222756e-05, "loss": 32.6875, "step": 4936 }, { "epoch": 0.7922333212981907, "grad_norm": 12.195971488952637, "learning_rate": 1.0594380255620977e-05, "loss": 32.6406, "step": 4937 }, { "epoch": 0.79239378986641, "grad_norm": 12.38643741607666, "learning_rate": 1.0578615173562174e-05, "loss": 32.6562, "step": 4938 }, { "epoch": 0.7925542584346291, "grad_norm": 12.207046508789062, "learning_rate": 1.05628604421857e-05, "loss": 32.6094, "step": 4939 }, { "epoch": 0.7927147270028483, "grad_norm": 12.141169548034668, "learning_rate": 1.0547116065628216e-05, "loss": 32.5781, "step": 4940 }, { "epoch": 0.7928751955710676, "grad_norm": 12.459171295166016, "learning_rate": 1.0531382048023625e-05, "loss": 32.5703, "step": 4941 }, { "epoch": 0.7930356641392867, "grad_norm": 12.323895454406738, "learning_rate": 1.051565839350312e-05, "loss": 32.6875, "step": 4942 }, { "epoch": 0.7931961327075059, "grad_norm": 12.537787437438965, "learning_rate": 1.0499945106195225e-05, "loss": 32.5938, "step": 4943 }, { "epoch": 0.7933566012757252, "grad_norm": 12.335169792175293, "learning_rate": 1.0484242190225674e-05, "loss": 32.6406, "step": 4944 }, { "epoch": 0.7935170698439443, "grad_norm": 12.44986343383789, "learning_rate": 1.0468549649717519e-05, "loss": 32.6328, "step": 4945 }, { "epoch": 0.7936775384121635, "grad_norm": 12.332046508789062, "learning_rate": 1.0452867488791068e-05, "loss": 32.5469, "step": 4946 }, { "epoch": 0.7938380069803828, "grad_norm": 12.010063171386719, "learning_rate": 1.0437195711563912e-05, "loss": 32.7422, "step": 4947 }, { "epoch": 0.7939984755486019, "grad_norm": 12.292804718017578, "learning_rate": 1.042153432215091e-05, "loss": 32.7109, "step": 4948 }, { "epoch": 0.7941589441168211, "grad_norm": 12.176101684570312, "learning_rate": 1.04058833246642e-05, "loss": 32.8984, "step": 4949 }, { "epoch": 0.7943194126850404, "grad_norm": 12.230135917663574, "learning_rate": 1.0390242723213179e-05, "loss": 32.9688, "step": 4950 }, { "epoch": 0.7944798812532595, "grad_norm": 11.961569786071777, "learning_rate": 1.0374612521904548e-05, "loss": 33.1172, "step": 4951 }, { "epoch": 0.7946403498214787, "grad_norm": 12.016860961914062, "learning_rate": 1.0358992724842248e-05, "loss": 32.9375, "step": 4952 }, { "epoch": 0.794800818389698, "grad_norm": 12.349855422973633, "learning_rate": 1.0343383336127476e-05, "loss": 32.7891, "step": 4953 }, { "epoch": 0.7949612869579171, "grad_norm": 12.143523216247559, "learning_rate": 1.0327784359858721e-05, "loss": 32.7344, "step": 4954 }, { "epoch": 0.7951217555261363, "grad_norm": 12.22536849975586, "learning_rate": 1.0312195800131729e-05, "loss": 32.9766, "step": 4955 }, { "epoch": 0.7952822240943556, "grad_norm": 12.150156021118164, "learning_rate": 1.0296617661039503e-05, "loss": 32.8281, "step": 4956 }, { "epoch": 0.7954426926625747, "grad_norm": 12.072426795959473, "learning_rate": 1.028104994667235e-05, "loss": 32.75, "step": 4957 }, { "epoch": 0.7956031612307939, "grad_norm": 12.28160572052002, "learning_rate": 1.0265492661117792e-05, "loss": 32.6875, "step": 4958 }, { "epoch": 0.7957636297990132, "grad_norm": 12.336073875427246, "learning_rate": 1.0249945808460627e-05, "loss": 32.7344, "step": 4959 }, { "epoch": 0.7959240983672323, "grad_norm": 12.137829780578613, "learning_rate": 1.0234409392782923e-05, "loss": 32.6797, "step": 4960 }, { "epoch": 0.7960845669354515, "grad_norm": 12.527470588684082, "learning_rate": 1.0218883418164e-05, "loss": 32.6641, "step": 4961 }, { "epoch": 0.7962450355036708, "grad_norm": 12.516040802001953, "learning_rate": 1.0203367888680432e-05, "loss": 32.6406, "step": 4962 }, { "epoch": 0.7964055040718899, "grad_norm": 12.522917747497559, "learning_rate": 1.0187862808406062e-05, "loss": 32.5625, "step": 4963 }, { "epoch": 0.7965659726401091, "grad_norm": 12.402268409729004, "learning_rate": 1.0172368181411984e-05, "loss": 32.7422, "step": 4964 }, { "epoch": 0.7967264412083284, "grad_norm": 12.275967597961426, "learning_rate": 1.0156884011766549e-05, "loss": 32.7734, "step": 4965 }, { "epoch": 0.7968869097765475, "grad_norm": 12.074289321899414, "learning_rate": 1.014141030353536e-05, "loss": 32.6797, "step": 4966 }, { "epoch": 0.7970473783447667, "grad_norm": 12.389494895935059, "learning_rate": 1.0125947060781266e-05, "loss": 32.6953, "step": 4967 }, { "epoch": 0.797207846912986, "grad_norm": 12.259838104248047, "learning_rate": 1.0110494287564382e-05, "loss": 32.6641, "step": 4968 }, { "epoch": 0.7973683154812051, "grad_norm": 12.073405265808105, "learning_rate": 1.0095051987942072e-05, "loss": 32.7031, "step": 4969 }, { "epoch": 0.7975287840494243, "grad_norm": 12.205193519592285, "learning_rate": 1.0079620165968922e-05, "loss": 32.7734, "step": 4970 }, { "epoch": 0.7976892526176436, "grad_norm": 12.135305404663086, "learning_rate": 1.006419882569683e-05, "loss": 32.6094, "step": 4971 }, { "epoch": 0.7978497211858627, "grad_norm": 12.014839172363281, "learning_rate": 1.0048787971174877e-05, "loss": 32.7969, "step": 4972 }, { "epoch": 0.7980101897540819, "grad_norm": 12.448267936706543, "learning_rate": 1.003338760644943e-05, "loss": 32.6328, "step": 4973 }, { "epoch": 0.7981706583223012, "grad_norm": 11.946359634399414, "learning_rate": 1.0017997735564072e-05, "loss": 32.6719, "step": 4974 }, { "epoch": 0.7983311268905203, "grad_norm": 12.073624610900879, "learning_rate": 1.0002618362559662e-05, "loss": 32.7578, "step": 4975 }, { "epoch": 0.7984915954587395, "grad_norm": 11.945701599121094, "learning_rate": 9.987249491474282e-06, "loss": 32.6797, "step": 4976 }, { "epoch": 0.7986520640269588, "grad_norm": 11.89472484588623, "learning_rate": 9.971891126343257e-06, "loss": 33.3359, "step": 4977 }, { "epoch": 0.7988125325951779, "grad_norm": 12.324295043945312, "learning_rate": 9.956543271199165e-06, "loss": 32.5547, "step": 4978 }, { "epoch": 0.7989730011633971, "grad_norm": 12.210604667663574, "learning_rate": 9.941205930071818e-06, "loss": 32.7578, "step": 4979 }, { "epoch": 0.7991334697316164, "grad_norm": 12.346479415893555, "learning_rate": 9.925879106988262e-06, "loss": 32.6641, "step": 4980 }, { "epoch": 0.7992939382998355, "grad_norm": 11.878199577331543, "learning_rate": 9.910562805972795e-06, "loss": 32.6094, "step": 4981 }, { "epoch": 0.7994544068680547, "grad_norm": 12.839963912963867, "learning_rate": 9.895257031046935e-06, "loss": 32.5469, "step": 4982 }, { "epoch": 0.799614875436274, "grad_norm": 12.257609367370605, "learning_rate": 9.879961786229441e-06, "loss": 32.5938, "step": 4983 }, { "epoch": 0.7997753440044931, "grad_norm": 12.21324634552002, "learning_rate": 9.864677075536333e-06, "loss": 32.7031, "step": 4984 }, { "epoch": 0.7999358125727123, "grad_norm": 12.259987831115723, "learning_rate": 9.849402902980825e-06, "loss": 32.6094, "step": 4985 }, { "epoch": 0.8000962811409316, "grad_norm": 12.01060962677002, "learning_rate": 9.834139272573394e-06, "loss": 32.6953, "step": 4986 }, { "epoch": 0.8002567497091507, "grad_norm": 12.26358699798584, "learning_rate": 9.81888618832173e-06, "loss": 32.7266, "step": 4987 }, { "epoch": 0.8004172182773699, "grad_norm": 12.198774337768555, "learning_rate": 9.803643654230776e-06, "loss": 32.6523, "step": 4988 }, { "epoch": 0.8005776868455892, "grad_norm": 12.199049949645996, "learning_rate": 9.788411674302656e-06, "loss": 32.625, "step": 4989 }, { "epoch": 0.8007381554138083, "grad_norm": 12.202634811401367, "learning_rate": 9.77319025253679e-06, "loss": 32.5859, "step": 4990 }, { "epoch": 0.8008986239820275, "grad_norm": 12.198460578918457, "learning_rate": 9.757979392929783e-06, "loss": 32.6875, "step": 4991 }, { "epoch": 0.8010590925502468, "grad_norm": 11.882614135742188, "learning_rate": 9.742779099475475e-06, "loss": 32.7344, "step": 4992 }, { "epoch": 0.8012195611184659, "grad_norm": 12.071748733520508, "learning_rate": 9.727589376164936e-06, "loss": 32.625, "step": 4993 }, { "epoch": 0.8013800296866851, "grad_norm": 11.946598052978516, "learning_rate": 9.712410226986457e-06, "loss": 32.6797, "step": 4994 }, { "epoch": 0.8015404982549044, "grad_norm": 12.387822151184082, "learning_rate": 9.697241655925555e-06, "loss": 32.5859, "step": 4995 }, { "epoch": 0.8017009668231235, "grad_norm": 12.077462196350098, "learning_rate": 9.682083666964964e-06, "loss": 32.7266, "step": 4996 }, { "epoch": 0.8018614353913427, "grad_norm": 12.423226356506348, "learning_rate": 9.666936264084636e-06, "loss": 32.9141, "step": 4997 }, { "epoch": 0.802021903959562, "grad_norm": 12.072587966918945, "learning_rate": 9.651799451261773e-06, "loss": 32.6797, "step": 4998 }, { "epoch": 0.8021823725277811, "grad_norm": 12.13400936126709, "learning_rate": 9.636673232470777e-06, "loss": 32.6484, "step": 4999 }, { "epoch": 0.8023428410960003, "grad_norm": 12.213194847106934, "learning_rate": 9.62155761168324e-06, "loss": 32.8594, "step": 5000 }, { "epoch": 0.8025033096642196, "grad_norm": 12.15316104888916, "learning_rate": 9.606452592868015e-06, "loss": 32.8984, "step": 5001 }, { "epoch": 0.8026637782324387, "grad_norm": 12.412602424621582, "learning_rate": 9.591358179991145e-06, "loss": 32.8594, "step": 5002 }, { "epoch": 0.8028242468006579, "grad_norm": 12.278053283691406, "learning_rate": 9.576274377015892e-06, "loss": 32.7422, "step": 5003 }, { "epoch": 0.8029847153688772, "grad_norm": 12.083683013916016, "learning_rate": 9.561201187902757e-06, "loss": 32.8828, "step": 5004 }, { "epoch": 0.8031451839370963, "grad_norm": 12.024084091186523, "learning_rate": 9.546138616609424e-06, "loss": 32.7969, "step": 5005 }, { "epoch": 0.8033056525053155, "grad_norm": 12.208419799804688, "learning_rate": 9.531086667090799e-06, "loss": 32.8203, "step": 5006 }, { "epoch": 0.8034661210735348, "grad_norm": 12.015748977661133, "learning_rate": 9.516045343298996e-06, "loss": 32.7969, "step": 5007 }, { "epoch": 0.8036265896417539, "grad_norm": 12.264739990234375, "learning_rate": 9.501014649183353e-06, "loss": 32.6797, "step": 5008 }, { "epoch": 0.8037870582099731, "grad_norm": 12.329618453979492, "learning_rate": 9.4859945886904e-06, "loss": 32.7891, "step": 5009 }, { "epoch": 0.8039475267781924, "grad_norm": 12.074323654174805, "learning_rate": 9.470985165763879e-06, "loss": 32.7344, "step": 5010 }, { "epoch": 0.8041079953464115, "grad_norm": 12.400571823120117, "learning_rate": 9.455986384344744e-06, "loss": 32.6797, "step": 5011 }, { "epoch": 0.8042684639146307, "grad_norm": 12.068860054016113, "learning_rate": 9.440998248371152e-06, "loss": 32.6562, "step": 5012 }, { "epoch": 0.80442893248285, "grad_norm": 12.760482788085938, "learning_rate": 9.42602076177847e-06, "loss": 32.6328, "step": 5013 }, { "epoch": 0.8045894010510691, "grad_norm": 12.528762817382812, "learning_rate": 9.411053928499258e-06, "loss": 32.6484, "step": 5014 }, { "epoch": 0.8047498696192883, "grad_norm": 12.011455535888672, "learning_rate": 9.396097752463284e-06, "loss": 32.7266, "step": 5015 }, { "epoch": 0.8049103381875076, "grad_norm": 12.268157958984375, "learning_rate": 9.381152237597529e-06, "loss": 32.6406, "step": 5016 }, { "epoch": 0.8050708067557267, "grad_norm": 12.010969161987305, "learning_rate": 9.366217387826137e-06, "loss": 32.6094, "step": 5017 }, { "epoch": 0.8052312753239459, "grad_norm": 13.12826156616211, "learning_rate": 9.351293207070522e-06, "loss": 32.5938, "step": 5018 }, { "epoch": 0.8053917438921652, "grad_norm": 12.262035369873047, "learning_rate": 9.336379699249225e-06, "loss": 32.5625, "step": 5019 }, { "epoch": 0.8055522124603843, "grad_norm": 12.259577751159668, "learning_rate": 9.321476868278022e-06, "loss": 32.6719, "step": 5020 }, { "epoch": 0.8057126810286035, "grad_norm": 12.13845443725586, "learning_rate": 9.306584718069877e-06, "loss": 32.6797, "step": 5021 }, { "epoch": 0.8058731495968228, "grad_norm": 12.473783493041992, "learning_rate": 9.29170325253495e-06, "loss": 32.6211, "step": 5022 }, { "epoch": 0.8060336181650419, "grad_norm": 12.273788452148438, "learning_rate": 9.276832475580594e-06, "loss": 32.6484, "step": 5023 }, { "epoch": 0.8061940867332611, "grad_norm": 12.27209186553955, "learning_rate": 9.261972391111352e-06, "loss": 32.6328, "step": 5024 }, { "epoch": 0.8063545553014804, "grad_norm": 12.074080467224121, "learning_rate": 9.247123003028974e-06, "loss": 32.6172, "step": 5025 }, { "epoch": 0.8065150238696995, "grad_norm": 12.452459335327148, "learning_rate": 9.232284315232387e-06, "loss": 32.5938, "step": 5026 }, { "epoch": 0.8066754924379187, "grad_norm": 12.206292152404785, "learning_rate": 9.217456331617713e-06, "loss": 32.6562, "step": 5027 }, { "epoch": 0.806835961006138, "grad_norm": 12.385239601135254, "learning_rate": 9.202639056078266e-06, "loss": 32.5938, "step": 5028 }, { "epoch": 0.8069964295743571, "grad_norm": 12.007990837097168, "learning_rate": 9.187832492504544e-06, "loss": 32.6484, "step": 5029 }, { "epoch": 0.8071568981425763, "grad_norm": 12.133105278015137, "learning_rate": 9.173036644784228e-06, "loss": 32.6406, "step": 5030 }, { "epoch": 0.8073173667107956, "grad_norm": 12.011333465576172, "learning_rate": 9.158251516802207e-06, "loss": 32.7656, "step": 5031 }, { "epoch": 0.8074778352790147, "grad_norm": 12.133596420288086, "learning_rate": 9.14347711244054e-06, "loss": 32.5703, "step": 5032 }, { "epoch": 0.8076383038472339, "grad_norm": 12.013615608215332, "learning_rate": 9.128713435578462e-06, "loss": 32.6797, "step": 5033 }, { "epoch": 0.8077987724154532, "grad_norm": 12.070012092590332, "learning_rate": 9.1139604900924e-06, "loss": 32.6562, "step": 5034 }, { "epoch": 0.8079592409836723, "grad_norm": 12.151304244995117, "learning_rate": 9.099218279855981e-06, "loss": 32.6641, "step": 5035 }, { "epoch": 0.8081197095518915, "grad_norm": 12.393404006958008, "learning_rate": 9.084486808739956e-06, "loss": 32.6719, "step": 5036 }, { "epoch": 0.8082801781201108, "grad_norm": 12.139350891113281, "learning_rate": 9.069766080612335e-06, "loss": 32.625, "step": 5037 }, { "epoch": 0.8084406466883299, "grad_norm": 12.213770866394043, "learning_rate": 9.055056099338254e-06, "loss": 32.8359, "step": 5038 }, { "epoch": 0.8086011152565491, "grad_norm": 12.130375862121582, "learning_rate": 9.040356868780042e-06, "loss": 32.5625, "step": 5039 }, { "epoch": 0.8087615838247684, "grad_norm": 12.13305950164795, "learning_rate": 9.025668392797209e-06, "loss": 32.6719, "step": 5040 }, { "epoch": 0.8089220523929875, "grad_norm": 12.469697952270508, "learning_rate": 9.010990675246427e-06, "loss": 32.5781, "step": 5041 }, { "epoch": 0.8090825209612067, "grad_norm": 12.45341682434082, "learning_rate": 8.996323719981565e-06, "loss": 32.5625, "step": 5042 }, { "epoch": 0.809242989529426, "grad_norm": 12.018402099609375, "learning_rate": 8.98166753085365e-06, "loss": 32.7031, "step": 5043 }, { "epoch": 0.8094034580976451, "grad_norm": 12.203144073486328, "learning_rate": 8.967022111710872e-06, "loss": 32.6719, "step": 5044 }, { "epoch": 0.8095639266658643, "grad_norm": 12.069533348083496, "learning_rate": 8.952387466398632e-06, "loss": 32.6328, "step": 5045 }, { "epoch": 0.8097243952340836, "grad_norm": 12.143033981323242, "learning_rate": 8.937763598759469e-06, "loss": 32.6562, "step": 5046 }, { "epoch": 0.8098848638023027, "grad_norm": 12.080162048339844, "learning_rate": 8.923150512633111e-06, "loss": 32.6992, "step": 5047 }, { "epoch": 0.8100453323705219, "grad_norm": 12.28344440460205, "learning_rate": 8.90854821185642e-06, "loss": 32.7734, "step": 5048 }, { "epoch": 0.8102058009387412, "grad_norm": 12.204035758972168, "learning_rate": 8.893956700263472e-06, "loss": 32.8203, "step": 5049 }, { "epoch": 0.8103662695069603, "grad_norm": 12.3543119430542, "learning_rate": 8.879375981685467e-06, "loss": 33.2812, "step": 5050 }, { "epoch": 0.8105267380751795, "grad_norm": 12.171539306640625, "learning_rate": 8.864806059950825e-06, "loss": 32.9062, "step": 5051 }, { "epoch": 0.8106872066433988, "grad_norm": 12.229490280151367, "learning_rate": 8.850246938885081e-06, "loss": 32.7734, "step": 5052 }, { "epoch": 0.8108476752116179, "grad_norm": 12.269376754760742, "learning_rate": 8.835698622310961e-06, "loss": 32.6875, "step": 5053 }, { "epoch": 0.8110081437798371, "grad_norm": 12.07813549041748, "learning_rate": 8.821161114048342e-06, "loss": 32.7812, "step": 5054 }, { "epoch": 0.8111686123480564, "grad_norm": 12.343894958496094, "learning_rate": 8.806634417914267e-06, "loss": 32.8906, "step": 5055 }, { "epoch": 0.8113290809162755, "grad_norm": 12.401894569396973, "learning_rate": 8.792118537722938e-06, "loss": 32.6719, "step": 5056 }, { "epoch": 0.8114895494844947, "grad_norm": 12.468526840209961, "learning_rate": 8.777613477285723e-06, "loss": 32.7969, "step": 5057 }, { "epoch": 0.811650018052714, "grad_norm": 12.137207984924316, "learning_rate": 8.76311924041115e-06, "loss": 32.7109, "step": 5058 }, { "epoch": 0.8118104866209331, "grad_norm": 12.016363143920898, "learning_rate": 8.748635830904888e-06, "loss": 32.7734, "step": 5059 }, { "epoch": 0.8119709551891523, "grad_norm": 12.136998176574707, "learning_rate": 8.734163252569787e-06, "loss": 32.6875, "step": 5060 }, { "epoch": 0.8121314237573716, "grad_norm": 12.27785873413086, "learning_rate": 8.719701509205842e-06, "loss": 32.7812, "step": 5061 }, { "epoch": 0.8122918923255907, "grad_norm": 12.007258415222168, "learning_rate": 8.705250604610193e-06, "loss": 32.6875, "step": 5062 }, { "epoch": 0.8124523608938099, "grad_norm": 12.12918758392334, "learning_rate": 8.69081054257715e-06, "loss": 32.6406, "step": 5063 }, { "epoch": 0.8126128294620292, "grad_norm": 12.134171485900879, "learning_rate": 8.676381326898158e-06, "loss": 32.6172, "step": 5064 }, { "epoch": 0.8127732980302483, "grad_norm": 12.142511367797852, "learning_rate": 8.661962961361852e-06, "loss": 32.7109, "step": 5065 }, { "epoch": 0.8129337665984675, "grad_norm": 12.196202278137207, "learning_rate": 8.647555449753985e-06, "loss": 32.6719, "step": 5066 }, { "epoch": 0.8130942351666868, "grad_norm": 12.069443702697754, "learning_rate": 8.633158795857455e-06, "loss": 32.6016, "step": 5067 }, { "epoch": 0.8132547037349059, "grad_norm": 12.204365730285645, "learning_rate": 8.618773003452329e-06, "loss": 32.6172, "step": 5068 }, { "epoch": 0.8134151723031251, "grad_norm": 12.266414642333984, "learning_rate": 8.604398076315817e-06, "loss": 32.6562, "step": 5069 }, { "epoch": 0.8135756408713444, "grad_norm": 12.136323928833008, "learning_rate": 8.590034018222276e-06, "loss": 32.6328, "step": 5070 }, { "epoch": 0.8137361094395635, "grad_norm": 12.017325401306152, "learning_rate": 8.575680832943195e-06, "loss": 32.7578, "step": 5071 }, { "epoch": 0.8138965780077827, "grad_norm": 12.007261276245117, "learning_rate": 8.56133852424723e-06, "loss": 32.6719, "step": 5072 }, { "epoch": 0.814057046576002, "grad_norm": 12.202070236206055, "learning_rate": 8.547007095900178e-06, "loss": 32.7266, "step": 5073 }, { "epoch": 0.8142175151442211, "grad_norm": 12.196053504943848, "learning_rate": 8.53268655166496e-06, "loss": 32.625, "step": 5074 }, { "epoch": 0.8143779837124403, "grad_norm": 12.643031120300293, "learning_rate": 8.518376895301649e-06, "loss": 32.5547, "step": 5075 }, { "epoch": 0.8145384522806596, "grad_norm": 12.140189170837402, "learning_rate": 8.504078130567478e-06, "loss": 32.625, "step": 5076 }, { "epoch": 0.8146989208488787, "grad_norm": 12.389947891235352, "learning_rate": 8.489790261216784e-06, "loss": 32.5469, "step": 5077 }, { "epoch": 0.8148593894170979, "grad_norm": 12.015493392944336, "learning_rate": 8.475513291001092e-06, "loss": 32.6953, "step": 5078 }, { "epoch": 0.8150198579853172, "grad_norm": 12.202702522277832, "learning_rate": 8.461247223669017e-06, "loss": 32.6484, "step": 5079 }, { "epoch": 0.8151803265535363, "grad_norm": 12.13496208190918, "learning_rate": 8.446992062966336e-06, "loss": 32.6172, "step": 5080 }, { "epoch": 0.8153407951217555, "grad_norm": 12.075545310974121, "learning_rate": 8.432747812635956e-06, "loss": 32.6562, "step": 5081 }, { "epoch": 0.8155012636899748, "grad_norm": 12.204143524169922, "learning_rate": 8.418514476417932e-06, "loss": 32.6172, "step": 5082 }, { "epoch": 0.8156617322581939, "grad_norm": 12.130708694458008, "learning_rate": 8.404292058049423e-06, "loss": 32.5859, "step": 5083 }, { "epoch": 0.8158222008264131, "grad_norm": 12.073625564575195, "learning_rate": 8.39008056126474e-06, "loss": 32.6953, "step": 5084 }, { "epoch": 0.8159826693946324, "grad_norm": 12.260115623474121, "learning_rate": 8.37587998979535e-06, "loss": 32.7422, "step": 5085 }, { "epoch": 0.8161431379628515, "grad_norm": 12.323909759521484, "learning_rate": 8.36169034736981e-06, "loss": 32.6406, "step": 5086 }, { "epoch": 0.8163036065310707, "grad_norm": 12.261580467224121, "learning_rate": 8.347511637713834e-06, "loss": 32.5703, "step": 5087 }, { "epoch": 0.81646407509929, "grad_norm": 12.207331657409668, "learning_rate": 8.333343864550257e-06, "loss": 32.7422, "step": 5088 }, { "epoch": 0.8166245436675091, "grad_norm": 12.195080757141113, "learning_rate": 8.319187031599041e-06, "loss": 32.625, "step": 5089 }, { "epoch": 0.8167850122357283, "grad_norm": 12.204000473022461, "learning_rate": 8.30504114257727e-06, "loss": 32.5938, "step": 5090 }, { "epoch": 0.8169454808039476, "grad_norm": 12.903800010681152, "learning_rate": 8.290906201199162e-06, "loss": 32.6172, "step": 5091 }, { "epoch": 0.8171059493721667, "grad_norm": 12.536025047302246, "learning_rate": 8.27678221117607e-06, "loss": 32.6484, "step": 5092 }, { "epoch": 0.8172664179403859, "grad_norm": 12.26030158996582, "learning_rate": 8.262669176216465e-06, "loss": 32.5938, "step": 5093 }, { "epoch": 0.8174268865086052, "grad_norm": 12.283982276916504, "learning_rate": 8.248567100025938e-06, "loss": 32.6797, "step": 5094 }, { "epoch": 0.8175873550768243, "grad_norm": 12.010376930236816, "learning_rate": 8.234475986307189e-06, "loss": 32.7266, "step": 5095 }, { "epoch": 0.8177478236450435, "grad_norm": 12.20177173614502, "learning_rate": 8.22039583876006e-06, "loss": 32.6641, "step": 5096 }, { "epoch": 0.8179082922132628, "grad_norm": 12.279723167419434, "learning_rate": 8.206326661081493e-06, "loss": 32.7109, "step": 5097 }, { "epoch": 0.8180687607814819, "grad_norm": 12.14345645904541, "learning_rate": 8.19226845696559e-06, "loss": 32.6875, "step": 5098 }, { "epoch": 0.8182292293497011, "grad_norm": 12.158210754394531, "learning_rate": 8.178221230103539e-06, "loss": 32.7656, "step": 5099 }, { "epoch": 0.8183896979179204, "grad_norm": 12.31942367553711, "learning_rate": 8.164184984183643e-06, "loss": 33.1641, "step": 5100 }, { "epoch": 0.8185501664861395, "grad_norm": 12.464591979980469, "learning_rate": 8.150159722891338e-06, "loss": 33.0391, "step": 5101 }, { "epoch": 0.8187106350543587, "grad_norm": 12.423723220825195, "learning_rate": 8.136145449909166e-06, "loss": 32.7969, "step": 5102 }, { "epoch": 0.818871103622578, "grad_norm": 12.085265159606934, "learning_rate": 8.12214216891679e-06, "loss": 32.75, "step": 5103 }, { "epoch": 0.8190315721907971, "grad_norm": 11.947931289672852, "learning_rate": 8.108149883590983e-06, "loss": 32.8281, "step": 5104 }, { "epoch": 0.8191920407590163, "grad_norm": 12.267963409423828, "learning_rate": 8.09416859760561e-06, "loss": 32.6484, "step": 5105 }, { "epoch": 0.8193525093272356, "grad_norm": 12.145207405090332, "learning_rate": 8.080198314631725e-06, "loss": 32.7109, "step": 5106 }, { "epoch": 0.8195129778954547, "grad_norm": 12.550127983093262, "learning_rate": 8.066239038337386e-06, "loss": 32.7656, "step": 5107 }, { "epoch": 0.8196734464636739, "grad_norm": 12.077579498291016, "learning_rate": 8.052290772387827e-06, "loss": 32.6875, "step": 5108 }, { "epoch": 0.8198339150318932, "grad_norm": 12.075846672058105, "learning_rate": 8.03835352044538e-06, "loss": 32.7188, "step": 5109 }, { "epoch": 0.8199943836001123, "grad_norm": 12.074268341064453, "learning_rate": 8.024427286169484e-06, "loss": 32.6719, "step": 5110 }, { "epoch": 0.8201548521683315, "grad_norm": 12.513829231262207, "learning_rate": 8.010512073216664e-06, "loss": 32.6562, "step": 5111 }, { "epoch": 0.8203153207365508, "grad_norm": 12.57572078704834, "learning_rate": 7.996607885240598e-06, "loss": 32.6172, "step": 5112 }, { "epoch": 0.8204757893047699, "grad_norm": 12.073234558105469, "learning_rate": 7.982714725892032e-06, "loss": 32.7422, "step": 5113 }, { "epoch": 0.8206362578729891, "grad_norm": 12.405346870422363, "learning_rate": 7.96883259881882e-06, "loss": 32.6797, "step": 5114 }, { "epoch": 0.8207967264412084, "grad_norm": 12.207376480102539, "learning_rate": 7.954961507665932e-06, "loss": 32.7031, "step": 5115 }, { "epoch": 0.8209571950094275, "grad_norm": 12.134063720703125, "learning_rate": 7.941101456075428e-06, "loss": 32.6406, "step": 5116 }, { "epoch": 0.8211176635776467, "grad_norm": 12.468737602233887, "learning_rate": 7.927252447686472e-06, "loss": 32.625, "step": 5117 }, { "epoch": 0.821278132145866, "grad_norm": 12.074827194213867, "learning_rate": 7.91341448613534e-06, "loss": 32.6094, "step": 5118 }, { "epoch": 0.8214386007140851, "grad_norm": 12.07465648651123, "learning_rate": 7.89958757505539e-06, "loss": 32.6406, "step": 5119 }, { "epoch": 0.8215990692823043, "grad_norm": 12.404179573059082, "learning_rate": 7.885771718077094e-06, "loss": 32.6328, "step": 5120 }, { "epoch": 0.8217595378505236, "grad_norm": 12.141215324401855, "learning_rate": 7.871966918828011e-06, "loss": 32.6641, "step": 5121 }, { "epoch": 0.8219200064187427, "grad_norm": 12.069757461547852, "learning_rate": 7.858173180932799e-06, "loss": 32.6406, "step": 5122 }, { "epoch": 0.8220804749869619, "grad_norm": 12.205145835876465, "learning_rate": 7.844390508013216e-06, "loss": 32.7031, "step": 5123 }, { "epoch": 0.8222409435551812, "grad_norm": 12.005865097045898, "learning_rate": 7.830618903688108e-06, "loss": 32.6484, "step": 5124 }, { "epoch": 0.8224014121234003, "grad_norm": 12.13855266571045, "learning_rate": 7.816858371573416e-06, "loss": 32.625, "step": 5125 }, { "epoch": 0.8225618806916195, "grad_norm": 12.40318489074707, "learning_rate": 7.803108915282187e-06, "loss": 32.6172, "step": 5126 }, { "epoch": 0.8227223492598388, "grad_norm": 12.01549243927002, "learning_rate": 7.789370538424545e-06, "loss": 32.7422, "step": 5127 }, { "epoch": 0.8228828178280579, "grad_norm": 12.13609790802002, "learning_rate": 7.775643244607711e-06, "loss": 32.6172, "step": 5128 }, { "epoch": 0.8230432863962771, "grad_norm": 12.19927978515625, "learning_rate": 7.761927037436002e-06, "loss": 32.6328, "step": 5129 }, { "epoch": 0.8232037549644964, "grad_norm": 12.332908630371094, "learning_rate": 7.748221920510796e-06, "loss": 32.6172, "step": 5130 }, { "epoch": 0.8233642235327155, "grad_norm": 12.198209762573242, "learning_rate": 7.734527897430572e-06, "loss": 32.6172, "step": 5131 }, { "epoch": 0.8235246921009347, "grad_norm": 12.332956314086914, "learning_rate": 7.720844971790935e-06, "loss": 32.5859, "step": 5132 }, { "epoch": 0.823685160669154, "grad_norm": 12.2649507522583, "learning_rate": 7.707173147184527e-06, "loss": 32.5781, "step": 5133 }, { "epoch": 0.8238456292373731, "grad_norm": 12.134811401367188, "learning_rate": 7.693512427201099e-06, "loss": 32.6094, "step": 5134 }, { "epoch": 0.8240060978055923, "grad_norm": 12.527667999267578, "learning_rate": 7.679862815427475e-06, "loss": 32.7031, "step": 5135 }, { "epoch": 0.8241665663738116, "grad_norm": 12.389739036560059, "learning_rate": 7.666224315447567e-06, "loss": 32.6406, "step": 5136 }, { "epoch": 0.8243270349420307, "grad_norm": 12.206371307373047, "learning_rate": 7.652596930842376e-06, "loss": 32.6172, "step": 5137 }, { "epoch": 0.8244875035102499, "grad_norm": 12.073375701904297, "learning_rate": 7.63898066518996e-06, "loss": 32.6406, "step": 5138 }, { "epoch": 0.8246479720784692, "grad_norm": 12.077567100524902, "learning_rate": 7.625375522065503e-06, "loss": 32.625, "step": 5139 }, { "epoch": 0.8248084406466883, "grad_norm": 12.15147590637207, "learning_rate": 7.6117815050412326e-06, "loss": 32.7578, "step": 5140 }, { "epoch": 0.8249689092149075, "grad_norm": 12.277994155883789, "learning_rate": 7.598198617686469e-06, "loss": 32.6719, "step": 5141 }, { "epoch": 0.8251293777831268, "grad_norm": 12.258563041687012, "learning_rate": 7.584626863567584e-06, "loss": 32.5859, "step": 5142 }, { "epoch": 0.825289846351346, "grad_norm": 12.082659721374512, "learning_rate": 7.5710662462480654e-06, "loss": 32.7578, "step": 5143 }, { "epoch": 0.8254503149195651, "grad_norm": 12.597410202026367, "learning_rate": 7.557516769288436e-06, "loss": 32.625, "step": 5144 }, { "epoch": 0.8256107834877844, "grad_norm": 12.535743713378906, "learning_rate": 7.543978436246351e-06, "loss": 32.5703, "step": 5145 }, { "epoch": 0.8257712520560035, "grad_norm": 12.523865699768066, "learning_rate": 7.530451250676485e-06, "loss": 32.5469, "step": 5146 }, { "epoch": 0.8259317206242227, "grad_norm": 12.40578842163086, "learning_rate": 7.516935216130611e-06, "loss": 32.6484, "step": 5147 }, { "epoch": 0.826092189192442, "grad_norm": 12.13887882232666, "learning_rate": 7.503430336157568e-06, "loss": 32.75, "step": 5148 }, { "epoch": 0.8262526577606611, "grad_norm": 12.394464492797852, "learning_rate": 7.489936614303267e-06, "loss": 32.7969, "step": 5149 }, { "epoch": 0.8264131263288803, "grad_norm": 12.18700122833252, "learning_rate": 7.476454054110682e-06, "loss": 33.0938, "step": 5150 }, { "epoch": 0.8265735948970996, "grad_norm": 12.247552871704102, "learning_rate": 7.4629826591198774e-06, "loss": 32.8906, "step": 5151 }, { "epoch": 0.8267340634653187, "grad_norm": 12.110363006591797, "learning_rate": 7.449522432867945e-06, "loss": 33.0234, "step": 5152 }, { "epoch": 0.8268945320335379, "grad_norm": 12.531576156616211, "learning_rate": 7.436073378889119e-06, "loss": 32.8359, "step": 5153 }, { "epoch": 0.8270550006017572, "grad_norm": 12.10218620300293, "learning_rate": 7.422635500714614e-06, "loss": 32.8203, "step": 5154 }, { "epoch": 0.8272154691699763, "grad_norm": 12.15427303314209, "learning_rate": 7.4092088018727575e-06, "loss": 32.8047, "step": 5155 }, { "epoch": 0.8273759377381955, "grad_norm": 12.16258716583252, "learning_rate": 7.39579328588893e-06, "loss": 32.8984, "step": 5156 }, { "epoch": 0.8275364063064148, "grad_norm": 12.140871047973633, "learning_rate": 7.382388956285591e-06, "loss": 32.8086, "step": 5157 }, { "epoch": 0.827696874874634, "grad_norm": 12.328597068786621, "learning_rate": 7.368995816582225e-06, "loss": 32.6953, "step": 5158 }, { "epoch": 0.8278573434428531, "grad_norm": 12.021851539611816, "learning_rate": 7.355613870295436e-06, "loss": 32.7656, "step": 5159 }, { "epoch": 0.8280178120110724, "grad_norm": 12.012492179870605, "learning_rate": 7.342243120938841e-06, "loss": 32.7266, "step": 5160 }, { "epoch": 0.8281782805792915, "grad_norm": 12.402474403381348, "learning_rate": 7.328883572023132e-06, "loss": 32.6953, "step": 5161 }, { "epoch": 0.8283387491475107, "grad_norm": 12.136042594909668, "learning_rate": 7.315535227056064e-06, "loss": 32.6328, "step": 5162 }, { "epoch": 0.82849921771573, "grad_norm": 12.091439247131348, "learning_rate": 7.30219808954245e-06, "loss": 32.7266, "step": 5163 }, { "epoch": 0.8286596862839491, "grad_norm": 12.1425199508667, "learning_rate": 7.288872162984156e-06, "loss": 32.6641, "step": 5164 }, { "epoch": 0.8288201548521683, "grad_norm": 12.067241668701172, "learning_rate": 7.275557450880099e-06, "loss": 32.6484, "step": 5165 }, { "epoch": 0.8289806234203876, "grad_norm": 12.143729209899902, "learning_rate": 7.262253956726267e-06, "loss": 32.7266, "step": 5166 }, { "epoch": 0.8291410919886067, "grad_norm": 12.075417518615723, "learning_rate": 7.248961684015693e-06, "loss": 32.7031, "step": 5167 }, { "epoch": 0.8293015605568259, "grad_norm": 12.334870338439941, "learning_rate": 7.235680636238462e-06, "loss": 32.7031, "step": 5168 }, { "epoch": 0.8294620291250452, "grad_norm": 12.205035209655762, "learning_rate": 7.222410816881714e-06, "loss": 32.75, "step": 5169 }, { "epoch": 0.8296224976932643, "grad_norm": 11.94874095916748, "learning_rate": 7.2091522294296445e-06, "loss": 32.75, "step": 5170 }, { "epoch": 0.8297829662614835, "grad_norm": 12.005627632141113, "learning_rate": 7.1959048773634904e-06, "loss": 32.6641, "step": 5171 }, { "epoch": 0.8299434348297028, "grad_norm": 12.259973526000977, "learning_rate": 7.182668764161543e-06, "loss": 32.5547, "step": 5172 }, { "epoch": 0.830103903397922, "grad_norm": 11.953303337097168, "learning_rate": 7.16944389329916e-06, "loss": 32.7422, "step": 5173 }, { "epoch": 0.8302643719661411, "grad_norm": 12.132166862487793, "learning_rate": 7.156230268248726e-06, "loss": 32.5859, "step": 5174 }, { "epoch": 0.8304248405343604, "grad_norm": 12.007699012756348, "learning_rate": 7.14302789247967e-06, "loss": 32.625, "step": 5175 }, { "epoch": 0.8305853091025795, "grad_norm": 12.282673835754395, "learning_rate": 7.129836769458498e-06, "loss": 32.7188, "step": 5176 }, { "epoch": 0.8307457776707987, "grad_norm": 12.203088760375977, "learning_rate": 7.116656902648711e-06, "loss": 32.6172, "step": 5177 }, { "epoch": 0.830906246239018, "grad_norm": 12.200841903686523, "learning_rate": 7.10348829551088e-06, "loss": 32.6328, "step": 5178 }, { "epoch": 0.8310667148072372, "grad_norm": 12.081168174743652, "learning_rate": 7.0903309515026495e-06, "loss": 32.7656, "step": 5179 }, { "epoch": 0.8312271833754563, "grad_norm": 12.209744453430176, "learning_rate": 7.077184874078663e-06, "loss": 32.5781, "step": 5180 }, { "epoch": 0.8313876519436756, "grad_norm": 12.584746360778809, "learning_rate": 7.064050066690631e-06, "loss": 32.5781, "step": 5181 }, { "epoch": 0.8315481205118948, "grad_norm": 12.20318603515625, "learning_rate": 7.050926532787294e-06, "loss": 32.6094, "step": 5182 }, { "epoch": 0.8317085890801139, "grad_norm": 12.143953323364258, "learning_rate": 7.037814275814431e-06, "loss": 32.7266, "step": 5183 }, { "epoch": 0.8318690576483332, "grad_norm": 12.194633483886719, "learning_rate": 7.024713299214863e-06, "loss": 32.6406, "step": 5184 }, { "epoch": 0.8320295262165524, "grad_norm": 12.014087677001953, "learning_rate": 7.011623606428447e-06, "loss": 32.7812, "step": 5185 }, { "epoch": 0.8321899947847715, "grad_norm": 12.264366149902344, "learning_rate": 6.998545200892093e-06, "loss": 32.6016, "step": 5186 }, { "epoch": 0.8323504633529908, "grad_norm": 12.201702117919922, "learning_rate": 6.9854780860397265e-06, "loss": 32.625, "step": 5187 }, { "epoch": 0.83251093192121, "grad_norm": 12.071552276611328, "learning_rate": 6.972422265302331e-06, "loss": 32.5938, "step": 5188 }, { "epoch": 0.8326714004894291, "grad_norm": 12.07764720916748, "learning_rate": 6.959377742107886e-06, "loss": 32.7109, "step": 5189 }, { "epoch": 0.8328318690576484, "grad_norm": 12.155411720275879, "learning_rate": 6.946344519881437e-06, "loss": 32.8047, "step": 5190 }, { "epoch": 0.8329923376258676, "grad_norm": 12.39944076538086, "learning_rate": 6.93332260204505e-06, "loss": 32.6641, "step": 5191 }, { "epoch": 0.8331528061940867, "grad_norm": 12.078264236450195, "learning_rate": 6.920311992017847e-06, "loss": 32.7422, "step": 5192 }, { "epoch": 0.833313274762306, "grad_norm": 12.005922317504883, "learning_rate": 6.907312693215945e-06, "loss": 32.6172, "step": 5193 }, { "epoch": 0.8334737433305252, "grad_norm": 12.272933959960938, "learning_rate": 6.8943247090525075e-06, "loss": 32.5938, "step": 5194 }, { "epoch": 0.8336342118987443, "grad_norm": 12.471732139587402, "learning_rate": 6.88134804293773e-06, "loss": 32.6641, "step": 5195 }, { "epoch": 0.8337946804669635, "grad_norm": 12.00943660736084, "learning_rate": 6.8683826982788355e-06, "loss": 32.6562, "step": 5196 }, { "epoch": 0.8339551490351828, "grad_norm": 12.517510414123535, "learning_rate": 6.855428678480064e-06, "loss": 32.6094, "step": 5197 }, { "epoch": 0.8341156176034019, "grad_norm": 12.386425018310547, "learning_rate": 6.8424859869426984e-06, "loss": 32.7109, "step": 5198 }, { "epoch": 0.8342760861716211, "grad_norm": 12.139289855957031, "learning_rate": 6.829554627065021e-06, "loss": 32.7344, "step": 5199 }, { "epoch": 0.8344365547398404, "grad_norm": 12.158077239990234, "learning_rate": 6.816634602242395e-06, "loss": 32.9375, "step": 5200 }, { "epoch": 0.8345970233080595, "grad_norm": 12.2460298538208, "learning_rate": 6.803725915867132e-06, "loss": 32.8828, "step": 5201 }, { "epoch": 0.8347574918762787, "grad_norm": 12.14768123626709, "learning_rate": 6.790828571328611e-06, "loss": 32.7656, "step": 5202 }, { "epoch": 0.834917960444498, "grad_norm": 12.206535339355469, "learning_rate": 6.77794257201323e-06, "loss": 32.75, "step": 5203 }, { "epoch": 0.8350784290127171, "grad_norm": 12.328048706054688, "learning_rate": 6.765067921304407e-06, "loss": 32.6953, "step": 5204 }, { "epoch": 0.8352388975809363, "grad_norm": 11.949281692504883, "learning_rate": 6.752204622582558e-06, "loss": 32.7656, "step": 5205 }, { "epoch": 0.8353993661491556, "grad_norm": 12.433597564697266, "learning_rate": 6.7393526792251636e-06, "loss": 32.875, "step": 5206 }, { "epoch": 0.8355598347173747, "grad_norm": 12.271953582763672, "learning_rate": 6.726512094606685e-06, "loss": 32.7422, "step": 5207 }, { "epoch": 0.8357203032855939, "grad_norm": 12.00966739654541, "learning_rate": 6.7136828720986144e-06, "loss": 32.7031, "step": 5208 }, { "epoch": 0.8358807718538132, "grad_norm": 12.272756576538086, "learning_rate": 6.700865015069457e-06, "loss": 32.75, "step": 5209 }, { "epoch": 0.8360412404220323, "grad_norm": 12.203350067138672, "learning_rate": 6.688058526884728e-06, "loss": 32.6797, "step": 5210 }, { "epoch": 0.8362017089902515, "grad_norm": 12.011788368225098, "learning_rate": 6.675263410906979e-06, "loss": 32.7578, "step": 5211 }, { "epoch": 0.8363621775584708, "grad_norm": 12.417097091674805, "learning_rate": 6.662479670495753e-06, "loss": 32.6953, "step": 5212 }, { "epoch": 0.8365226461266899, "grad_norm": 12.275212287902832, "learning_rate": 6.649707309007613e-06, "loss": 32.6641, "step": 5213 }, { "epoch": 0.8366831146949091, "grad_norm": 12.403146743774414, "learning_rate": 6.6369463297961396e-06, "loss": 32.7031, "step": 5214 }, { "epoch": 0.8368435832631284, "grad_norm": 12.274017333984375, "learning_rate": 6.624196736211924e-06, "loss": 32.6172, "step": 5215 }, { "epoch": 0.8370040518313475, "grad_norm": 12.132453918457031, "learning_rate": 6.6114585316025654e-06, "loss": 32.5859, "step": 5216 }, { "epoch": 0.8371645203995667, "grad_norm": 12.327010154724121, "learning_rate": 6.598731719312667e-06, "loss": 32.625, "step": 5217 }, { "epoch": 0.837324988967786, "grad_norm": 12.541733741760254, "learning_rate": 6.5860163026838505e-06, "loss": 32.6328, "step": 5218 }, { "epoch": 0.8374854575360051, "grad_norm": 12.141403198242188, "learning_rate": 6.573312285054728e-06, "loss": 32.6484, "step": 5219 }, { "epoch": 0.8376459261042243, "grad_norm": 12.266034126281738, "learning_rate": 6.560619669760954e-06, "loss": 32.6953, "step": 5220 }, { "epoch": 0.8378063946724436, "grad_norm": 12.005029678344727, "learning_rate": 6.547938460135156e-06, "loss": 32.5977, "step": 5221 }, { "epoch": 0.8379668632406627, "grad_norm": 12.072518348693848, "learning_rate": 6.535268659506982e-06, "loss": 32.7266, "step": 5222 }, { "epoch": 0.8381273318088819, "grad_norm": 12.25861644744873, "learning_rate": 6.522610271203089e-06, "loss": 32.6094, "step": 5223 }, { "epoch": 0.8382878003771012, "grad_norm": 12.32783317565918, "learning_rate": 6.509963298547106e-06, "loss": 32.6406, "step": 5224 }, { "epoch": 0.8384482689453203, "grad_norm": 12.072807312011719, "learning_rate": 6.497327744859683e-06, "loss": 32.7266, "step": 5225 }, { "epoch": 0.8386087375135395, "grad_norm": 12.136024475097656, "learning_rate": 6.484703613458509e-06, "loss": 32.6797, "step": 5226 }, { "epoch": 0.8387692060817588, "grad_norm": 11.883723258972168, "learning_rate": 6.472090907658218e-06, "loss": 32.6406, "step": 5227 }, { "epoch": 0.8389296746499779, "grad_norm": 12.196209907531738, "learning_rate": 6.459489630770477e-06, "loss": 32.6484, "step": 5228 }, { "epoch": 0.8390901432181971, "grad_norm": 12.195802688598633, "learning_rate": 6.4468997861039385e-06, "loss": 32.625, "step": 5229 }, { "epoch": 0.8392506117864164, "grad_norm": 12.478919982910156, "learning_rate": 6.434321376964253e-06, "loss": 32.5938, "step": 5230 }, { "epoch": 0.8394110803546355, "grad_norm": 12.6363525390625, "learning_rate": 6.421754406654079e-06, "loss": 32.5469, "step": 5231 }, { "epoch": 0.8395715489228547, "grad_norm": 12.077566146850586, "learning_rate": 6.409198878473055e-06, "loss": 32.6953, "step": 5232 }, { "epoch": 0.839732017491074, "grad_norm": 12.46275806427002, "learning_rate": 6.396654795717838e-06, "loss": 32.6484, "step": 5233 }, { "epoch": 0.8398924860592931, "grad_norm": 12.215614318847656, "learning_rate": 6.3841221616820655e-06, "loss": 32.6562, "step": 5234 }, { "epoch": 0.8400529546275123, "grad_norm": 12.261098861694336, "learning_rate": 6.371600979656378e-06, "loss": 32.6016, "step": 5235 }, { "epoch": 0.8402134231957316, "grad_norm": 12.134326934814453, "learning_rate": 6.3590912529283784e-06, "loss": 32.6797, "step": 5236 }, { "epoch": 0.8403738917639507, "grad_norm": 12.072735786437988, "learning_rate": 6.346592984782701e-06, "loss": 32.7188, "step": 5237 }, { "epoch": 0.8405343603321699, "grad_norm": 12.20710563659668, "learning_rate": 6.334106178500948e-06, "loss": 32.5859, "step": 5238 }, { "epoch": 0.8406948289003892, "grad_norm": 12.006156921386719, "learning_rate": 6.321630837361714e-06, "loss": 32.6094, "step": 5239 }, { "epoch": 0.8408552974686083, "grad_norm": 12.26357364654541, "learning_rate": 6.3091669646406115e-06, "loss": 32.6797, "step": 5240 }, { "epoch": 0.8410157660368275, "grad_norm": 12.131288528442383, "learning_rate": 6.2967145636102065e-06, "loss": 32.6094, "step": 5241 }, { "epoch": 0.8411762346050468, "grad_norm": 12.07032299041748, "learning_rate": 6.284273637540067e-06, "loss": 32.6406, "step": 5242 }, { "epoch": 0.8413367031732659, "grad_norm": 12.270100593566895, "learning_rate": 6.27184418969674e-06, "loss": 32.6797, "step": 5243 }, { "epoch": 0.8414971717414851, "grad_norm": 12.420252799987793, "learning_rate": 6.259426223343778e-06, "loss": 32.7578, "step": 5244 }, { "epoch": 0.8416576403097044, "grad_norm": 12.4606351852417, "learning_rate": 6.2470197417417e-06, "loss": 32.6719, "step": 5245 }, { "epoch": 0.8418181088779235, "grad_norm": 12.135124206542969, "learning_rate": 6.2346247481479995e-06, "loss": 32.625, "step": 5246 }, { "epoch": 0.8419785774461427, "grad_norm": 12.389192581176758, "learning_rate": 6.222241245817206e-06, "loss": 32.5703, "step": 5247 }, { "epoch": 0.842139046014362, "grad_norm": 12.333765029907227, "learning_rate": 6.209869238000771e-06, "loss": 32.6875, "step": 5248 }, { "epoch": 0.8422995145825811, "grad_norm": 12.590027809143066, "learning_rate": 6.197508727947154e-06, "loss": 32.6016, "step": 5249 }, { "epoch": 0.8424599831508003, "grad_norm": 12.488832473754883, "learning_rate": 6.185159718901801e-06, "loss": 32.8906, "step": 5250 }, { "epoch": 0.8426204517190196, "grad_norm": 12.790060997009277, "learning_rate": 6.172822214107127e-06, "loss": 33.0781, "step": 5251 }, { "epoch": 0.8427809202872387, "grad_norm": 12.081092834472656, "learning_rate": 6.160496216802519e-06, "loss": 32.75, "step": 5252 }, { "epoch": 0.8429413888554579, "grad_norm": 12.343573570251465, "learning_rate": 6.1481817302243785e-06, "loss": 32.8125, "step": 5253 }, { "epoch": 0.8431018574236772, "grad_norm": 11.958976745605469, "learning_rate": 6.13587875760605e-06, "loss": 32.8203, "step": 5254 }, { "epoch": 0.8432623259918963, "grad_norm": 12.211312294006348, "learning_rate": 6.123587302177869e-06, "loss": 32.8281, "step": 5255 }, { "epoch": 0.8434227945601155, "grad_norm": 12.272767066955566, "learning_rate": 6.11130736716714e-06, "loss": 32.75, "step": 5256 }, { "epoch": 0.8435832631283348, "grad_norm": 12.265377044677734, "learning_rate": 6.099038955798142e-06, "loss": 32.6953, "step": 5257 }, { "epoch": 0.8437437316965539, "grad_norm": 12.145928382873535, "learning_rate": 6.086782071292141e-06, "loss": 32.7422, "step": 5258 }, { "epoch": 0.8439042002647731, "grad_norm": 12.331000328063965, "learning_rate": 6.074536716867362e-06, "loss": 32.75, "step": 5259 }, { "epoch": 0.8440646688329924, "grad_norm": 12.219331741333008, "learning_rate": 6.062302895739008e-06, "loss": 32.7422, "step": 5260 }, { "epoch": 0.8442251374012115, "grad_norm": 12.072206497192383, "learning_rate": 6.050080611119258e-06, "loss": 32.7422, "step": 5261 }, { "epoch": 0.8443856059694307, "grad_norm": 12.078920364379883, "learning_rate": 6.037869866217255e-06, "loss": 32.8047, "step": 5262 }, { "epoch": 0.84454607453765, "grad_norm": 12.271448135375977, "learning_rate": 6.025670664239119e-06, "loss": 32.6562, "step": 5263 }, { "epoch": 0.8447065431058691, "grad_norm": 12.072481155395508, "learning_rate": 6.013483008387932e-06, "loss": 32.7188, "step": 5264 }, { "epoch": 0.8448670116740883, "grad_norm": 12.138742446899414, "learning_rate": 6.001306901863746e-06, "loss": 32.7109, "step": 5265 }, { "epoch": 0.8450274802423076, "grad_norm": 12.070754051208496, "learning_rate": 5.98914234786358e-06, "loss": 32.7031, "step": 5266 }, { "epoch": 0.8451879488105267, "grad_norm": 12.135205268859863, "learning_rate": 5.9769893495814355e-06, "loss": 32.6719, "step": 5267 }, { "epoch": 0.8453484173787459, "grad_norm": 12.016374588012695, "learning_rate": 5.9648479102082545e-06, "loss": 32.7031, "step": 5268 }, { "epoch": 0.8455088859469652, "grad_norm": 12.601594924926758, "learning_rate": 5.952718032931964e-06, "loss": 32.5938, "step": 5269 }, { "epoch": 0.8456693545151843, "grad_norm": 12.130316734313965, "learning_rate": 5.940599720937445e-06, "loss": 32.625, "step": 5270 }, { "epoch": 0.8458298230834035, "grad_norm": 12.20259952545166, "learning_rate": 5.9284929774065465e-06, "loss": 32.625, "step": 5271 }, { "epoch": 0.8459902916516228, "grad_norm": 12.132428169250488, "learning_rate": 5.916397805518059e-06, "loss": 32.625, "step": 5272 }, { "epoch": 0.8461507602198419, "grad_norm": 12.260586738586426, "learning_rate": 5.904314208447776e-06, "loss": 32.6172, "step": 5273 }, { "epoch": 0.8463112287880611, "grad_norm": 12.265722274780273, "learning_rate": 5.892242189368418e-06, "loss": 32.7656, "step": 5274 }, { "epoch": 0.8464716973562804, "grad_norm": 12.135530471801758, "learning_rate": 5.880181751449676e-06, "loss": 32.6562, "step": 5275 }, { "epoch": 0.8466321659244995, "grad_norm": 12.447591781616211, "learning_rate": 5.868132897858214e-06, "loss": 32.6562, "step": 5276 }, { "epoch": 0.8467926344927187, "grad_norm": 12.201614379882812, "learning_rate": 5.856095631757624e-06, "loss": 32.5781, "step": 5277 }, { "epoch": 0.846953103060938, "grad_norm": 12.073102951049805, "learning_rate": 5.844069956308479e-06, "loss": 32.7422, "step": 5278 }, { "epoch": 0.8471135716291571, "grad_norm": 12.51307201385498, "learning_rate": 5.8320558746683076e-06, "loss": 32.6094, "step": 5279 }, { "epoch": 0.8472740401973763, "grad_norm": 12.457993507385254, "learning_rate": 5.8200533899915755e-06, "loss": 32.5391, "step": 5280 }, { "epoch": 0.8474345087655956, "grad_norm": 12.202840805053711, "learning_rate": 5.808062505429735e-06, "loss": 32.6406, "step": 5281 }, { "epoch": 0.8475949773338147, "grad_norm": 12.3897705078125, "learning_rate": 5.796083224131177e-06, "loss": 32.5781, "step": 5282 }, { "epoch": 0.8477554459020339, "grad_norm": 12.327783584594727, "learning_rate": 5.78411554924122e-06, "loss": 32.6953, "step": 5283 }, { "epoch": 0.8479159144702532, "grad_norm": 12.402963638305664, "learning_rate": 5.772159483902173e-06, "loss": 32.6328, "step": 5284 }, { "epoch": 0.8480763830384723, "grad_norm": 12.133103370666504, "learning_rate": 5.76021503125328e-06, "loss": 32.6719, "step": 5285 }, { "epoch": 0.8482368516066915, "grad_norm": 12.003220558166504, "learning_rate": 5.748282194430732e-06, "loss": 32.6641, "step": 5286 }, { "epoch": 0.8483973201749108, "grad_norm": 12.517950057983398, "learning_rate": 5.73636097656769e-06, "loss": 32.5703, "step": 5287 }, { "epoch": 0.8485577887431299, "grad_norm": 12.08144760131836, "learning_rate": 5.7244513807942435e-06, "loss": 32.6406, "step": 5288 }, { "epoch": 0.8487182573113491, "grad_norm": 12.198819160461426, "learning_rate": 5.712553410237436e-06, "loss": 32.6406, "step": 5289 }, { "epoch": 0.8488787258795684, "grad_norm": 12.401315689086914, "learning_rate": 5.7006670680212596e-06, "loss": 32.6328, "step": 5290 }, { "epoch": 0.8490391944477875, "grad_norm": 12.19920539855957, "learning_rate": 5.688792357266659e-06, "loss": 32.6172, "step": 5291 }, { "epoch": 0.8491996630160067, "grad_norm": 12.195904731750488, "learning_rate": 5.676929281091514e-06, "loss": 32.6016, "step": 5292 }, { "epoch": 0.849360131584226, "grad_norm": 12.324049949645996, "learning_rate": 5.66507784261065e-06, "loss": 32.6484, "step": 5293 }, { "epoch": 0.8495206001524451, "grad_norm": 12.335219383239746, "learning_rate": 5.653238044935865e-06, "loss": 32.5625, "step": 5294 }, { "epoch": 0.8496810687206643, "grad_norm": 12.713841438293457, "learning_rate": 5.641409891175853e-06, "loss": 32.8047, "step": 5295 }, { "epoch": 0.8498415372888836, "grad_norm": 12.19699478149414, "learning_rate": 5.629593384436288e-06, "loss": 32.6328, "step": 5296 }, { "epoch": 0.8500020058571027, "grad_norm": 12.709508895874023, "learning_rate": 5.617788527819762e-06, "loss": 32.5625, "step": 5297 }, { "epoch": 0.8501624744253219, "grad_norm": 12.007149696350098, "learning_rate": 5.605995324425828e-06, "loss": 32.6719, "step": 5298 }, { "epoch": 0.8503229429935412, "grad_norm": 11.973064422607422, "learning_rate": 5.59421377735096e-06, "loss": 33.0, "step": 5299 }, { "epoch": 0.8504834115617603, "grad_norm": 12.319746971130371, "learning_rate": 5.582443889688599e-06, "loss": 33.0859, "step": 5300 }, { "epoch": 0.8506438801299795, "grad_norm": 12.348373413085938, "learning_rate": 5.570685664529096e-06, "loss": 32.7656, "step": 5301 }, { "epoch": 0.8508043486981988, "grad_norm": 12.212596893310547, "learning_rate": 5.558939104959754e-06, "loss": 32.7969, "step": 5302 }, { "epoch": 0.8509648172664179, "grad_norm": 12.089398384094238, "learning_rate": 5.547204214064816e-06, "loss": 32.8125, "step": 5303 }, { "epoch": 0.8511252858346371, "grad_norm": 12.406484603881836, "learning_rate": 5.53548099492544e-06, "loss": 32.7031, "step": 5304 }, { "epoch": 0.8512857544028564, "grad_norm": 12.651280403137207, "learning_rate": 5.523769450619753e-06, "loss": 32.8281, "step": 5305 }, { "epoch": 0.8514462229710755, "grad_norm": 12.147356033325195, "learning_rate": 5.512069584222784e-06, "loss": 32.7812, "step": 5306 }, { "epoch": 0.8516066915392947, "grad_norm": 12.01753044128418, "learning_rate": 5.50038139880652e-06, "loss": 32.8359, "step": 5307 }, { "epoch": 0.851767160107514, "grad_norm": 12.07604694366455, "learning_rate": 5.488704897439867e-06, "loss": 32.6562, "step": 5308 }, { "epoch": 0.8519276286757331, "grad_norm": 12.269389152526855, "learning_rate": 5.477040083188667e-06, "loss": 32.6953, "step": 5309 }, { "epoch": 0.8520880972439523, "grad_norm": 12.0814790725708, "learning_rate": 5.465386959115693e-06, "loss": 32.7188, "step": 5310 }, { "epoch": 0.8522485658121716, "grad_norm": 12.26021671295166, "learning_rate": 5.453745528280657e-06, "loss": 32.6484, "step": 5311 }, { "epoch": 0.8524090343803907, "grad_norm": 12.391736030578613, "learning_rate": 5.442115793740182e-06, "loss": 32.6406, "step": 5312 }, { "epoch": 0.8525695029486099, "grad_norm": 12.19643783569336, "learning_rate": 5.430497758547831e-06, "loss": 32.6562, "step": 5313 }, { "epoch": 0.8527299715168292, "grad_norm": 12.06938362121582, "learning_rate": 5.418891425754108e-06, "loss": 32.6328, "step": 5314 }, { "epoch": 0.8528904400850483, "grad_norm": 12.006336212158203, "learning_rate": 5.4072967984064245e-06, "loss": 32.6484, "step": 5315 }, { "epoch": 0.8530509086532675, "grad_norm": 12.193666458129883, "learning_rate": 5.395713879549125e-06, "loss": 32.6094, "step": 5316 }, { "epoch": 0.8532113772214868, "grad_norm": 12.203320503234863, "learning_rate": 5.384142672223486e-06, "loss": 32.6719, "step": 5317 }, { "epoch": 0.8533718457897059, "grad_norm": 12.271514892578125, "learning_rate": 5.3725831794677054e-06, "loss": 32.625, "step": 5318 }, { "epoch": 0.8535323143579251, "grad_norm": 12.204299926757812, "learning_rate": 5.361035404316878e-06, "loss": 32.6953, "step": 5319 }, { "epoch": 0.8536927829261444, "grad_norm": 12.13640308380127, "learning_rate": 5.349499349803072e-06, "loss": 32.6484, "step": 5320 }, { "epoch": 0.8538532514943635, "grad_norm": 11.944845199584961, "learning_rate": 5.337975018955254e-06, "loss": 32.75, "step": 5321 }, { "epoch": 0.8540137200625827, "grad_norm": 12.397612571716309, "learning_rate": 5.326462414799299e-06, "loss": 32.5625, "step": 5322 }, { "epoch": 0.854174188630802, "grad_norm": 12.00784969329834, "learning_rate": 5.314961540358027e-06, "loss": 32.6719, "step": 5323 }, { "epoch": 0.8543346571990211, "grad_norm": 12.004042625427246, "learning_rate": 5.30347239865116e-06, "loss": 32.6406, "step": 5324 }, { "epoch": 0.8544951257672403, "grad_norm": 11.949747085571289, "learning_rate": 5.291994992695348e-06, "loss": 32.6953, "step": 5325 }, { "epoch": 0.8546555943354596, "grad_norm": 12.077229499816895, "learning_rate": 5.2805293255041575e-06, "loss": 32.6094, "step": 5326 }, { "epoch": 0.8548160629036787, "grad_norm": 12.340152740478516, "learning_rate": 5.269075400088069e-06, "loss": 32.6562, "step": 5327 }, { "epoch": 0.8549765314718979, "grad_norm": 12.068575859069824, "learning_rate": 5.257633219454494e-06, "loss": 32.7031, "step": 5328 }, { "epoch": 0.8551370000401172, "grad_norm": 12.263256072998047, "learning_rate": 5.246202786607751e-06, "loss": 32.5781, "step": 5329 }, { "epoch": 0.8552974686083363, "grad_norm": 12.19384479522705, "learning_rate": 5.234784104549073e-06, "loss": 32.6719, "step": 5330 }, { "epoch": 0.8554579371765555, "grad_norm": 12.510965347290039, "learning_rate": 5.223377176276595e-06, "loss": 32.5938, "step": 5331 }, { "epoch": 0.8556184057447748, "grad_norm": 12.004613876342773, "learning_rate": 5.211982004785387e-06, "loss": 32.6797, "step": 5332 }, { "epoch": 0.8557788743129939, "grad_norm": 12.134442329406738, "learning_rate": 5.200598593067418e-06, "loss": 32.5703, "step": 5333 }, { "epoch": 0.8559393428812131, "grad_norm": 12.201457023620605, "learning_rate": 5.189226944111586e-06, "loss": 32.6172, "step": 5334 }, { "epoch": 0.8560998114494324, "grad_norm": 12.203203201293945, "learning_rate": 5.1778670609036905e-06, "loss": 32.6172, "step": 5335 }, { "epoch": 0.8562602800176515, "grad_norm": 11.9434175491333, "learning_rate": 5.166518946426435e-06, "loss": 32.6797, "step": 5336 }, { "epoch": 0.8564207485858707, "grad_norm": 12.130440711975098, "learning_rate": 5.155182603659442e-06, "loss": 32.5938, "step": 5337 }, { "epoch": 0.85658121715409, "grad_norm": 12.27375602722168, "learning_rate": 5.143858035579235e-06, "loss": 32.6484, "step": 5338 }, { "epoch": 0.8567416857223091, "grad_norm": 12.259048461914062, "learning_rate": 5.132545245159259e-06, "loss": 32.5625, "step": 5339 }, { "epoch": 0.8569021542905283, "grad_norm": 12.073587417602539, "learning_rate": 5.121244235369843e-06, "loss": 32.5938, "step": 5340 }, { "epoch": 0.8570626228587476, "grad_norm": 12.13487720489502, "learning_rate": 5.109955009178263e-06, "loss": 32.6328, "step": 5341 }, { "epoch": 0.8572230914269667, "grad_norm": 12.200797080993652, "learning_rate": 5.09867756954866e-06, "loss": 32.7578, "step": 5342 }, { "epoch": 0.8573835599951859, "grad_norm": 12.383764266967773, "learning_rate": 5.087411919442097e-06, "loss": 32.5547, "step": 5343 }, { "epoch": 0.8575440285634052, "grad_norm": 12.2601957321167, "learning_rate": 5.076158061816549e-06, "loss": 32.5703, "step": 5344 }, { "epoch": 0.8577044971316243, "grad_norm": 12.458828926086426, "learning_rate": 5.0649159996268795e-06, "loss": 32.5938, "step": 5345 }, { "epoch": 0.8578649656998435, "grad_norm": 12.21237850189209, "learning_rate": 5.053685735824853e-06, "loss": 32.6172, "step": 5346 }, { "epoch": 0.8580254342680628, "grad_norm": 12.224441528320312, "learning_rate": 5.04246727335917e-06, "loss": 32.7422, "step": 5347 }, { "epoch": 0.8581859028362819, "grad_norm": 12.513017654418945, "learning_rate": 5.031260615175398e-06, "loss": 32.6094, "step": 5348 }, { "epoch": 0.8583463714045011, "grad_norm": 12.027348518371582, "learning_rate": 5.020065764216009e-06, "loss": 32.7734, "step": 5349 }, { "epoch": 0.8585068399727204, "grad_norm": 12.21124267578125, "learning_rate": 5.008882723420388e-06, "loss": 32.9141, "step": 5350 }, { "epoch": 0.8586673085409395, "grad_norm": 12.546711921691895, "learning_rate": 4.997711495724805e-06, "loss": 32.8047, "step": 5351 }, { "epoch": 0.8588277771091587, "grad_norm": 12.012624740600586, "learning_rate": 4.98655208406244e-06, "loss": 32.8438, "step": 5352 }, { "epoch": 0.858988245677378, "grad_norm": 12.075226783752441, "learning_rate": 4.975404491363361e-06, "loss": 32.7031, "step": 5353 }, { "epoch": 0.8591487142455971, "grad_norm": 12.072149276733398, "learning_rate": 4.964268720554543e-06, "loss": 32.8281, "step": 5354 }, { "epoch": 0.8593091828138163, "grad_norm": 12.082338333129883, "learning_rate": 4.953144774559853e-06, "loss": 32.8516, "step": 5355 }, { "epoch": 0.8594696513820356, "grad_norm": 12.330948829650879, "learning_rate": 4.942032656300044e-06, "loss": 32.8359, "step": 5356 }, { "epoch": 0.8596301199502547, "grad_norm": 12.276060104370117, "learning_rate": 4.930932368692776e-06, "loss": 32.7422, "step": 5357 }, { "epoch": 0.8597905885184739, "grad_norm": 12.201828956604004, "learning_rate": 4.9198439146526e-06, "loss": 32.6953, "step": 5358 }, { "epoch": 0.8599510570866932, "grad_norm": 12.327198028564453, "learning_rate": 4.908767297090949e-06, "loss": 32.6641, "step": 5359 }, { "epoch": 0.8601115256549123, "grad_norm": 12.550463676452637, "learning_rate": 4.897702518916158e-06, "loss": 32.8594, "step": 5360 }, { "epoch": 0.8602719942231315, "grad_norm": 12.205208778381348, "learning_rate": 4.8866495830334645e-06, "loss": 32.6953, "step": 5361 }, { "epoch": 0.8604324627913508, "grad_norm": 12.01207447052002, "learning_rate": 4.87560849234498e-06, "loss": 32.7812, "step": 5362 }, { "epoch": 0.8605929313595699, "grad_norm": 12.526965141296387, "learning_rate": 4.864579249749701e-06, "loss": 32.625, "step": 5363 }, { "epoch": 0.8607533999277891, "grad_norm": 12.137333869934082, "learning_rate": 4.853561858143535e-06, "loss": 32.7422, "step": 5364 }, { "epoch": 0.8609138684960084, "grad_norm": 12.386415481567383, "learning_rate": 4.842556320419262e-06, "loss": 32.5859, "step": 5365 }, { "epoch": 0.8610743370642275, "grad_norm": 12.151424407958984, "learning_rate": 4.831562639466536e-06, "loss": 32.8438, "step": 5366 }, { "epoch": 0.8612348056324467, "grad_norm": 12.384058952331543, "learning_rate": 4.820580818171932e-06, "loss": 32.5938, "step": 5367 }, { "epoch": 0.861395274200666, "grad_norm": 12.136688232421875, "learning_rate": 4.809610859418895e-06, "loss": 32.6953, "step": 5368 }, { "epoch": 0.8615557427688851, "grad_norm": 12.19611644744873, "learning_rate": 4.798652766087747e-06, "loss": 32.6406, "step": 5369 }, { "epoch": 0.8617162113371043, "grad_norm": 12.136480331420898, "learning_rate": 4.787706541055703e-06, "loss": 32.6953, "step": 5370 }, { "epoch": 0.8618766799053236, "grad_norm": 12.197064399719238, "learning_rate": 4.7767721871968654e-06, "loss": 32.7031, "step": 5371 }, { "epoch": 0.8620371484735427, "grad_norm": 12.004734992980957, "learning_rate": 4.765849707382203e-06, "loss": 32.6172, "step": 5372 }, { "epoch": 0.8621976170417619, "grad_norm": 12.071492195129395, "learning_rate": 4.754939104479589e-06, "loss": 32.6875, "step": 5373 }, { "epoch": 0.8623580856099812, "grad_norm": 11.951360702514648, "learning_rate": 4.744040381353754e-06, "loss": 32.7891, "step": 5374 }, { "epoch": 0.8625185541782003, "grad_norm": 12.264890670776367, "learning_rate": 4.733153540866347e-06, "loss": 32.6016, "step": 5375 }, { "epoch": 0.8626790227464195, "grad_norm": 12.134909629821777, "learning_rate": 4.722278585875856e-06, "loss": 32.6055, "step": 5376 }, { "epoch": 0.8628394913146388, "grad_norm": 12.141223907470703, "learning_rate": 4.711415519237678e-06, "loss": 32.7344, "step": 5377 }, { "epoch": 0.8629999598828579, "grad_norm": 12.391044616699219, "learning_rate": 4.700564343804054e-06, "loss": 32.6016, "step": 5378 }, { "epoch": 0.8631604284510771, "grad_norm": 12.067712783813477, "learning_rate": 4.689725062424139e-06, "loss": 32.6094, "step": 5379 }, { "epoch": 0.8633208970192964, "grad_norm": 12.321499824523926, "learning_rate": 4.678897677943939e-06, "loss": 32.5625, "step": 5380 }, { "epoch": 0.8634813655875155, "grad_norm": 12.283369064331055, "learning_rate": 4.668082193206369e-06, "loss": 32.6328, "step": 5381 }, { "epoch": 0.8636418341557347, "grad_norm": 12.396905899047852, "learning_rate": 4.657278611051186e-06, "loss": 32.6016, "step": 5382 }, { "epoch": 0.863802302723954, "grad_norm": 12.38387680053711, "learning_rate": 4.646486934315031e-06, "loss": 32.5938, "step": 5383 }, { "epoch": 0.8639627712921731, "grad_norm": 12.1370849609375, "learning_rate": 4.635707165831432e-06, "loss": 32.6094, "step": 5384 }, { "epoch": 0.8641232398603923, "grad_norm": 12.132977485656738, "learning_rate": 4.624939308430776e-06, "loss": 32.6094, "step": 5385 }, { "epoch": 0.8642837084286116, "grad_norm": 12.01791763305664, "learning_rate": 4.614183364940328e-06, "loss": 32.7109, "step": 5386 }, { "epoch": 0.8644441769968307, "grad_norm": 12.319295883178711, "learning_rate": 4.603439338184207e-06, "loss": 32.6094, "step": 5387 }, { "epoch": 0.8646046455650499, "grad_norm": 12.259459495544434, "learning_rate": 4.5927072309834614e-06, "loss": 32.6797, "step": 5388 }, { "epoch": 0.8647651141332692, "grad_norm": 12.143077850341797, "learning_rate": 4.581987046155933e-06, "loss": 32.6641, "step": 5389 }, { "epoch": 0.8649255827014883, "grad_norm": 12.073153495788574, "learning_rate": 4.5712787865163755e-06, "loss": 32.6406, "step": 5390 }, { "epoch": 0.8650860512697075, "grad_norm": 12.274068832397461, "learning_rate": 4.560582454876411e-06, "loss": 32.6562, "step": 5391 }, { "epoch": 0.8652465198379268, "grad_norm": 12.454387664794922, "learning_rate": 4.549898054044521e-06, "loss": 32.582, "step": 5392 }, { "epoch": 0.8654069884061459, "grad_norm": 12.703583717346191, "learning_rate": 4.539225586826057e-06, "loss": 32.6328, "step": 5393 }, { "epoch": 0.8655674569743651, "grad_norm": 12.389662742614746, "learning_rate": 4.528565056023226e-06, "loss": 32.6484, "step": 5394 }, { "epoch": 0.8657279255425844, "grad_norm": 12.159344673156738, "learning_rate": 4.517916464435134e-06, "loss": 32.8281, "step": 5395 }, { "epoch": 0.8658883941108035, "grad_norm": 12.206911087036133, "learning_rate": 4.507279814857718e-06, "loss": 32.6406, "step": 5396 }, { "epoch": 0.8660488626790227, "grad_norm": 12.202664375305176, "learning_rate": 4.4966551100837915e-06, "loss": 32.6562, "step": 5397 }, { "epoch": 0.866209331247242, "grad_norm": 12.138538360595703, "learning_rate": 4.486042352903036e-06, "loss": 32.5547, "step": 5398 }, { "epoch": 0.8663697998154611, "grad_norm": 12.202064514160156, "learning_rate": 4.475441546101988e-06, "loss": 32.6562, "step": 5399 }, { "epoch": 0.8665302683836803, "grad_norm": 12.163764953613281, "learning_rate": 4.464852692464056e-06, "loss": 32.9922, "step": 5400 }, { "epoch": 0.8666907369518996, "grad_norm": 12.082488059997559, "learning_rate": 4.454275794769491e-06, "loss": 32.875, "step": 5401 }, { "epoch": 0.8668512055201187, "grad_norm": 12.093361854553223, "learning_rate": 4.44371085579543e-06, "loss": 32.9844, "step": 5402 }, { "epoch": 0.8670116740883379, "grad_norm": 12.076640129089355, "learning_rate": 4.433157878315852e-06, "loss": 32.7188, "step": 5403 }, { "epoch": 0.8671721426565572, "grad_norm": 11.949728012084961, "learning_rate": 4.422616865101609e-06, "loss": 32.75, "step": 5404 }, { "epoch": 0.8673326112247763, "grad_norm": 12.981569290161133, "learning_rate": 4.41208781892039e-06, "loss": 32.6875, "step": 5405 }, { "epoch": 0.8674930797929955, "grad_norm": 12.087662696838379, "learning_rate": 4.40157074253677e-06, "loss": 32.7891, "step": 5406 }, { "epoch": 0.8676535483612148, "grad_norm": 12.139659881591797, "learning_rate": 4.39106563871215e-06, "loss": 32.8203, "step": 5407 }, { "epoch": 0.8678140169294339, "grad_norm": 12.341267585754395, "learning_rate": 4.380572510204822e-06, "loss": 32.7266, "step": 5408 }, { "epoch": 0.8679744854976531, "grad_norm": 12.205276489257812, "learning_rate": 4.370091359769912e-06, "loss": 32.7422, "step": 5409 }, { "epoch": 0.8681349540658724, "grad_norm": 12.202878952026367, "learning_rate": 4.359622190159401e-06, "loss": 32.6953, "step": 5410 }, { "epoch": 0.8682954226340915, "grad_norm": 12.271549224853516, "learning_rate": 4.34916500412213e-06, "loss": 32.7578, "step": 5411 }, { "epoch": 0.8684558912023107, "grad_norm": 12.454569816589355, "learning_rate": 4.338719804403796e-06, "loss": 32.6562, "step": 5412 }, { "epoch": 0.86861635977053, "grad_norm": 12.260150909423828, "learning_rate": 4.328286593746928e-06, "loss": 32.6406, "step": 5413 }, { "epoch": 0.8687768283387491, "grad_norm": 12.075044631958008, "learning_rate": 4.3178653748909395e-06, "loss": 32.6562, "step": 5414 }, { "epoch": 0.8689372969069683, "grad_norm": 12.268991470336914, "learning_rate": 4.307456150572075e-06, "loss": 32.6875, "step": 5415 }, { "epoch": 0.8690977654751876, "grad_norm": 12.067054748535156, "learning_rate": 4.297058923523433e-06, "loss": 32.6172, "step": 5416 }, { "epoch": 0.8692582340434067, "grad_norm": 12.075996398925781, "learning_rate": 4.286673696474969e-06, "loss": 32.6719, "step": 5417 }, { "epoch": 0.8694187026116259, "grad_norm": 11.940357208251953, "learning_rate": 4.276300472153477e-06, "loss": 32.6562, "step": 5418 }, { "epoch": 0.8695791711798452, "grad_norm": 12.335524559020996, "learning_rate": 4.2659392532826025e-06, "loss": 32.6172, "step": 5419 }, { "epoch": 0.8697396397480643, "grad_norm": 12.698692321777344, "learning_rate": 4.255590042582841e-06, "loss": 32.5859, "step": 5420 }, { "epoch": 0.8699001083162835, "grad_norm": 12.135313987731934, "learning_rate": 4.24525284277153e-06, "loss": 32.5938, "step": 5421 }, { "epoch": 0.8700605768845028, "grad_norm": 12.528879165649414, "learning_rate": 4.234927656562876e-06, "loss": 32.625, "step": 5422 }, { "epoch": 0.8702210454527219, "grad_norm": 12.14037036895752, "learning_rate": 4.2246144866678995e-06, "loss": 32.7422, "step": 5423 }, { "epoch": 0.8703815140209411, "grad_norm": 12.274025917053223, "learning_rate": 4.21431333579449e-06, "loss": 32.6875, "step": 5424 }, { "epoch": 0.8705419825891604, "grad_norm": 12.324211120605469, "learning_rate": 4.204024206647356e-06, "loss": 32.6719, "step": 5425 }, { "epoch": 0.8707024511573795, "grad_norm": 12.207637786865234, "learning_rate": 4.193747101928069e-06, "loss": 32.6562, "step": 5426 }, { "epoch": 0.8708629197255987, "grad_norm": 12.318877220153809, "learning_rate": 4.1834820243350285e-06, "loss": 32.5312, "step": 5427 }, { "epoch": 0.871023388293818, "grad_norm": 12.194470405578613, "learning_rate": 4.173228976563514e-06, "loss": 32.6406, "step": 5428 }, { "epoch": 0.8711838568620371, "grad_norm": 12.144281387329102, "learning_rate": 4.162987961305598e-06, "loss": 32.6016, "step": 5429 }, { "epoch": 0.8713443254302563, "grad_norm": 12.208146095275879, "learning_rate": 4.152758981250221e-06, "loss": 32.6797, "step": 5430 }, { "epoch": 0.8715047939984756, "grad_norm": 12.006032943725586, "learning_rate": 4.142542039083159e-06, "loss": 32.6562, "step": 5431 }, { "epoch": 0.8716652625666947, "grad_norm": 12.19774341583252, "learning_rate": 4.132337137487019e-06, "loss": 32.625, "step": 5432 }, { "epoch": 0.8718257311349139, "grad_norm": 12.194622039794922, "learning_rate": 4.122144279141255e-06, "loss": 32.5625, "step": 5433 }, { "epoch": 0.8719861997031332, "grad_norm": 12.139382362365723, "learning_rate": 4.1119634667221574e-06, "loss": 32.6562, "step": 5434 }, { "epoch": 0.8721466682713523, "grad_norm": 12.136886596679688, "learning_rate": 4.101794702902839e-06, "loss": 32.6094, "step": 5435 }, { "epoch": 0.8723071368395715, "grad_norm": 12.136853218078613, "learning_rate": 4.091637990353297e-06, "loss": 32.6719, "step": 5436 }, { "epoch": 0.8724676054077908, "grad_norm": 12.005995750427246, "learning_rate": 4.0814933317402995e-06, "loss": 32.6406, "step": 5437 }, { "epoch": 0.8726280739760099, "grad_norm": 12.131671905517578, "learning_rate": 4.071360729727491e-06, "loss": 32.6562, "step": 5438 }, { "epoch": 0.8727885425442291, "grad_norm": 12.996047019958496, "learning_rate": 4.06124018697534e-06, "loss": 32.5469, "step": 5439 }, { "epoch": 0.8729490111124484, "grad_norm": 12.199821472167969, "learning_rate": 4.051131706141148e-06, "loss": 32.5781, "step": 5440 }, { "epoch": 0.8731094796806675, "grad_norm": 12.202059745788574, "learning_rate": 4.041035289879036e-06, "loss": 32.6172, "step": 5441 }, { "epoch": 0.8732699482488867, "grad_norm": 12.325651168823242, "learning_rate": 4.03095094084e-06, "loss": 32.5625, "step": 5442 }, { "epoch": 0.873430416817106, "grad_norm": 12.261187553405762, "learning_rate": 4.020878661671818e-06, "loss": 32.6406, "step": 5443 }, { "epoch": 0.8735908853853251, "grad_norm": 12.63852310180664, "learning_rate": 4.010818455019127e-06, "loss": 32.6094, "step": 5444 }, { "epoch": 0.8737513539535443, "grad_norm": 12.15053653717041, "learning_rate": 4.000770323523384e-06, "loss": 32.7891, "step": 5445 }, { "epoch": 0.8739118225217636, "grad_norm": 12.39509391784668, "learning_rate": 3.990734269822883e-06, "loss": 32.6875, "step": 5446 }, { "epoch": 0.8740722910899827, "grad_norm": 12.19644546508789, "learning_rate": 3.980710296552731e-06, "loss": 32.6328, "step": 5447 }, { "epoch": 0.8742327596582019, "grad_norm": 12.10240364074707, "learning_rate": 3.970698406344886e-06, "loss": 32.8828, "step": 5448 }, { "epoch": 0.8743932282264212, "grad_norm": 12.334670066833496, "learning_rate": 3.9606986018281165e-06, "loss": 32.6875, "step": 5449 }, { "epoch": 0.8745536967946403, "grad_norm": 12.111785888671875, "learning_rate": 3.950710885628023e-06, "loss": 33.0156, "step": 5450 }, { "epoch": 0.8747141653628595, "grad_norm": 12.120031356811523, "learning_rate": 3.9407352603670344e-06, "loss": 33.3125, "step": 5451 }, { "epoch": 0.8748746339310788, "grad_norm": 12.601340293884277, "learning_rate": 3.930771728664395e-06, "loss": 32.8047, "step": 5452 }, { "epoch": 0.875035102499298, "grad_norm": 12.46458625793457, "learning_rate": 3.920820293136191e-06, "loss": 32.7812, "step": 5453 }, { "epoch": 0.8751955710675171, "grad_norm": 12.081461906433105, "learning_rate": 3.910880956395302e-06, "loss": 32.7891, "step": 5454 }, { "epoch": 0.8753560396357364, "grad_norm": 12.401456832885742, "learning_rate": 3.9009537210514815e-06, "loss": 32.7266, "step": 5455 }, { "epoch": 0.8755165082039555, "grad_norm": 12.017793655395508, "learning_rate": 3.89103858971126e-06, "loss": 32.7578, "step": 5456 }, { "epoch": 0.8756769767721747, "grad_norm": 12.547713279724121, "learning_rate": 3.881135564978011e-06, "loss": 32.6797, "step": 5457 }, { "epoch": 0.875837445340394, "grad_norm": 12.961533546447754, "learning_rate": 3.871244649451916e-06, "loss": 32.5625, "step": 5458 }, { "epoch": 0.8759979139086131, "grad_norm": 12.13286018371582, "learning_rate": 3.861365845729997e-06, "loss": 32.6797, "step": 5459 }, { "epoch": 0.8761583824768323, "grad_norm": 12.12907886505127, "learning_rate": 3.851499156406058e-06, "loss": 32.625, "step": 5460 }, { "epoch": 0.8763188510450516, "grad_norm": 12.204082489013672, "learning_rate": 3.841644584070775e-06, "loss": 32.7734, "step": 5461 }, { "epoch": 0.8764793196132707, "grad_norm": 12.075582504272461, "learning_rate": 3.831802131311607e-06, "loss": 32.75, "step": 5462 }, { "epoch": 0.8766397881814899, "grad_norm": 12.391998291015625, "learning_rate": 3.821971800712837e-06, "loss": 32.6016, "step": 5463 }, { "epoch": 0.8768002567497092, "grad_norm": 12.144392967224121, "learning_rate": 3.812153594855572e-06, "loss": 32.6641, "step": 5464 }, { "epoch": 0.8769607253179283, "grad_norm": 12.081489562988281, "learning_rate": 3.8023475163177225e-06, "loss": 32.7109, "step": 5465 }, { "epoch": 0.8771211938861475, "grad_norm": 12.134576797485352, "learning_rate": 3.7925535676740287e-06, "loss": 32.6719, "step": 5466 }, { "epoch": 0.8772816624543668, "grad_norm": 12.13371753692627, "learning_rate": 3.782771751496039e-06, "loss": 32.6328, "step": 5467 }, { "epoch": 0.877442131022586, "grad_norm": 12.464675903320312, "learning_rate": 3.7730020703521106e-06, "loss": 32.6562, "step": 5468 }, { "epoch": 0.8776025995908051, "grad_norm": 12.508882522583008, "learning_rate": 3.7632445268074356e-06, "loss": 32.6328, "step": 5469 }, { "epoch": 0.8777630681590244, "grad_norm": 11.94017219543457, "learning_rate": 3.753499123423998e-06, "loss": 32.6484, "step": 5470 }, { "epoch": 0.8779235367272435, "grad_norm": 12.387022018432617, "learning_rate": 3.743765862760612e-06, "loss": 32.5859, "step": 5471 }, { "epoch": 0.8780840052954627, "grad_norm": 12.095831871032715, "learning_rate": 3.7340447473728723e-06, "loss": 32.7812, "step": 5472 }, { "epoch": 0.878244473863682, "grad_norm": 12.341204643249512, "learning_rate": 3.724335779813215e-06, "loss": 32.6523, "step": 5473 }, { "epoch": 0.8784049424319011, "grad_norm": 12.206171989440918, "learning_rate": 3.714638962630873e-06, "loss": 32.6875, "step": 5474 }, { "epoch": 0.8785654110001203, "grad_norm": 12.00990104675293, "learning_rate": 3.704954298371899e-06, "loss": 32.6875, "step": 5475 }, { "epoch": 0.8787258795683396, "grad_norm": 12.197357177734375, "learning_rate": 3.695281789579147e-06, "loss": 32.5859, "step": 5476 }, { "epoch": 0.8788863481365587, "grad_norm": 12.478188514709473, "learning_rate": 3.6856214387922807e-06, "loss": 32.6211, "step": 5477 }, { "epoch": 0.8790468167047779, "grad_norm": 12.38415813446045, "learning_rate": 3.6759732485477707e-06, "loss": 32.6797, "step": 5478 }, { "epoch": 0.8792072852729972, "grad_norm": 12.13443660736084, "learning_rate": 3.666337221378896e-06, "loss": 32.6172, "step": 5479 }, { "epoch": 0.8793677538412163, "grad_norm": 12.130938529968262, "learning_rate": 3.656713359815739e-06, "loss": 32.6367, "step": 5480 }, { "epoch": 0.8795282224094355, "grad_norm": 12.014104843139648, "learning_rate": 3.6471016663851943e-06, "loss": 32.6719, "step": 5481 }, { "epoch": 0.8796886909776548, "grad_norm": 12.263334274291992, "learning_rate": 3.637502143610949e-06, "loss": 32.5859, "step": 5482 }, { "epoch": 0.879849159545874, "grad_norm": 12.410409927368164, "learning_rate": 3.627914794013526e-06, "loss": 32.5547, "step": 5483 }, { "epoch": 0.8800096281140931, "grad_norm": 12.09997272491455, "learning_rate": 3.618339620110206e-06, "loss": 32.7656, "step": 5484 }, { "epoch": 0.8801700966823124, "grad_norm": 12.139982223510742, "learning_rate": 3.6087766244151054e-06, "loss": 32.5938, "step": 5485 }, { "epoch": 0.8803305652505315, "grad_norm": 12.07016658782959, "learning_rate": 3.599225809439133e-06, "loss": 32.6328, "step": 5486 }, { "epoch": 0.8804910338187507, "grad_norm": 12.331456184387207, "learning_rate": 3.5896871776899986e-06, "loss": 32.5625, "step": 5487 }, { "epoch": 0.88065150238697, "grad_norm": 12.258233070373535, "learning_rate": 3.5801607316722053e-06, "loss": 32.5391, "step": 5488 }, { "epoch": 0.8808119709551891, "grad_norm": 12.334860801696777, "learning_rate": 3.570646473887085e-06, "loss": 32.5859, "step": 5489 }, { "epoch": 0.8809724395234083, "grad_norm": 11.943977355957031, "learning_rate": 3.5611444068327393e-06, "loss": 32.668, "step": 5490 }, { "epoch": 0.8811329080916276, "grad_norm": 12.265750885009766, "learning_rate": 3.551654533004084e-06, "loss": 32.5547, "step": 5491 }, { "epoch": 0.8812933766598467, "grad_norm": 12.259103775024414, "learning_rate": 3.5421768548928314e-06, "loss": 32.6641, "step": 5492 }, { "epoch": 0.8814538452280659, "grad_norm": 12.957751274108887, "learning_rate": 3.5327113749874795e-06, "loss": 32.5625, "step": 5493 }, { "epoch": 0.8816143137962852, "grad_norm": 12.32695198059082, "learning_rate": 3.5232580957733465e-06, "loss": 32.5703, "step": 5494 }, { "epoch": 0.8817747823645044, "grad_norm": 12.207304000854492, "learning_rate": 3.513817019732518e-06, "loss": 32.6406, "step": 5495 }, { "epoch": 0.8819352509327235, "grad_norm": 12.389680862426758, "learning_rate": 3.5043881493439068e-06, "loss": 32.5781, "step": 5496 }, { "epoch": 0.8820957195009428, "grad_norm": 12.842192649841309, "learning_rate": 3.494971487083204e-06, "loss": 32.5234, "step": 5497 }, { "epoch": 0.882256188069162, "grad_norm": 12.00745677947998, "learning_rate": 3.4855670354228876e-06, "loss": 32.6172, "step": 5498 }, { "epoch": 0.8824166566373811, "grad_norm": 12.280672073364258, "learning_rate": 3.4761747968322432e-06, "loss": 32.75, "step": 5499 }, { "epoch": 0.8825771252056004, "grad_norm": 12.432411193847656, "learning_rate": 3.4667947737773543e-06, "loss": 33.0703, "step": 5500 }, { "epoch": 0.8827375937738196, "grad_norm": 12.302495002746582, "learning_rate": 3.457426968721067e-06, "loss": 32.9922, "step": 5501 }, { "epoch": 0.8828980623420387, "grad_norm": 12.146492004394531, "learning_rate": 3.4480713841230638e-06, "loss": 32.7734, "step": 5502 }, { "epoch": 0.883058530910258, "grad_norm": 12.478023529052734, "learning_rate": 3.4387280224397854e-06, "loss": 32.7188, "step": 5503 }, { "epoch": 0.8832189994784772, "grad_norm": 12.13792610168457, "learning_rate": 3.4293968861244754e-06, "loss": 32.7109, "step": 5504 }, { "epoch": 0.8833794680466963, "grad_norm": 12.265681266784668, "learning_rate": 3.4200779776271565e-06, "loss": 32.6719, "step": 5505 }, { "epoch": 0.8835399366149156, "grad_norm": 12.202179908752441, "learning_rate": 3.410771299394672e-06, "loss": 32.6484, "step": 5506 }, { "epoch": 0.8837004051831348, "grad_norm": 12.150239944458008, "learning_rate": 3.401476853870594e-06, "loss": 32.7344, "step": 5507 }, { "epoch": 0.8838608737513539, "grad_norm": 12.016687393188477, "learning_rate": 3.3921946434953555e-06, "loss": 32.7734, "step": 5508 }, { "epoch": 0.8840213423195732, "grad_norm": 12.007925033569336, "learning_rate": 3.3829246707061224e-06, "loss": 32.6641, "step": 5509 }, { "epoch": 0.8841818108877924, "grad_norm": 12.481383323669434, "learning_rate": 3.3736669379368703e-06, "loss": 32.7812, "step": 5510 }, { "epoch": 0.8843422794560115, "grad_norm": 12.266897201538086, "learning_rate": 3.3644214476183657e-06, "loss": 32.7109, "step": 5511 }, { "epoch": 0.8845027480242308, "grad_norm": 12.469744682312012, "learning_rate": 3.355188202178139e-06, "loss": 32.7812, "step": 5512 }, { "epoch": 0.88466321659245, "grad_norm": 12.511866569519043, "learning_rate": 3.3459672040405287e-06, "loss": 32.5625, "step": 5513 }, { "epoch": 0.8848236851606691, "grad_norm": 12.014120101928711, "learning_rate": 3.3367584556266427e-06, "loss": 32.75, "step": 5514 }, { "epoch": 0.8849841537288884, "grad_norm": 12.007064819335938, "learning_rate": 3.327561959354375e-06, "loss": 32.6875, "step": 5515 }, { "epoch": 0.8851446222971076, "grad_norm": 12.268616676330566, "learning_rate": 3.3183777176384157e-06, "loss": 32.6641, "step": 5516 }, { "epoch": 0.8853050908653267, "grad_norm": 12.071195602416992, "learning_rate": 3.3092057328902194e-06, "loss": 32.6719, "step": 5517 }, { "epoch": 0.885465559433546, "grad_norm": 12.003155708312988, "learning_rate": 3.300046007518037e-06, "loss": 32.668, "step": 5518 }, { "epoch": 0.8856260280017652, "grad_norm": 12.514074325561523, "learning_rate": 3.290898543926879e-06, "loss": 32.5547, "step": 5519 }, { "epoch": 0.8857864965699843, "grad_norm": 12.259195327758789, "learning_rate": 3.281763344518557e-06, "loss": 32.625, "step": 5520 }, { "epoch": 0.8859469651382036, "grad_norm": 12.139671325683594, "learning_rate": 3.2726404116916574e-06, "loss": 32.7031, "step": 5521 }, { "epoch": 0.8861074337064228, "grad_norm": 12.00920295715332, "learning_rate": 3.2635297478415427e-06, "loss": 32.7188, "step": 5522 }, { "epoch": 0.8862679022746419, "grad_norm": 12.509825706481934, "learning_rate": 3.2544313553603656e-06, "loss": 32.5938, "step": 5523 }, { "epoch": 0.8864283708428612, "grad_norm": 11.950386047363281, "learning_rate": 3.2453452366370372e-06, "loss": 32.7422, "step": 5524 }, { "epoch": 0.8865888394110804, "grad_norm": 11.881768226623535, "learning_rate": 3.2362713940572552e-06, "loss": 32.7344, "step": 5525 }, { "epoch": 0.8867493079792995, "grad_norm": 12.14560317993164, "learning_rate": 3.2272098300035026e-06, "loss": 32.7031, "step": 5526 }, { "epoch": 0.8869097765475188, "grad_norm": 12.072209358215332, "learning_rate": 3.2181605468550202e-06, "loss": 32.6953, "step": 5527 }, { "epoch": 0.887070245115738, "grad_norm": 12.641597747802734, "learning_rate": 3.209123546987841e-06, "loss": 32.6719, "step": 5528 }, { "epoch": 0.8872307136839571, "grad_norm": 12.208854675292969, "learning_rate": 3.200098832774756e-06, "loss": 32.625, "step": 5529 }, { "epoch": 0.8873911822521764, "grad_norm": 12.129405975341797, "learning_rate": 3.1910864065853695e-06, "loss": 32.6172, "step": 5530 }, { "epoch": 0.8875516508203956, "grad_norm": 12.132933616638184, "learning_rate": 3.182086270786e-06, "loss": 32.5703, "step": 5531 }, { "epoch": 0.8877121193886147, "grad_norm": 12.325074195861816, "learning_rate": 3.173098427739779e-06, "loss": 32.5469, "step": 5532 }, { "epoch": 0.887872587956834, "grad_norm": 12.0696439743042, "learning_rate": 3.1641228798066024e-06, "loss": 32.6406, "step": 5533 }, { "epoch": 0.8880330565250532, "grad_norm": 12.271424293518066, "learning_rate": 3.1551596293431405e-06, "loss": 32.625, "step": 5534 }, { "epoch": 0.8881935250932723, "grad_norm": 12.004884719848633, "learning_rate": 3.146208678702811e-06, "loss": 32.5938, "step": 5535 }, { "epoch": 0.8883539936614916, "grad_norm": 12.079663276672363, "learning_rate": 3.1372700302358503e-06, "loss": 32.75, "step": 5536 }, { "epoch": 0.8885144622297108, "grad_norm": 12.196430206298828, "learning_rate": 3.128343686289226e-06, "loss": 32.6328, "step": 5537 }, { "epoch": 0.8886749307979299, "grad_norm": 12.014028549194336, "learning_rate": 3.119429649206673e-06, "loss": 32.7188, "step": 5538 }, { "epoch": 0.8888353993661492, "grad_norm": 12.14764404296875, "learning_rate": 3.110527921328721e-06, "loss": 32.6875, "step": 5539 }, { "epoch": 0.8889958679343684, "grad_norm": 11.947507858276367, "learning_rate": 3.101638504992649e-06, "loss": 32.6797, "step": 5540 }, { "epoch": 0.8891563365025875, "grad_norm": 12.204683303833008, "learning_rate": 3.092761402532507e-06, "loss": 32.6406, "step": 5541 }, { "epoch": 0.8893168050708068, "grad_norm": 12.650052070617676, "learning_rate": 3.0838966162791137e-06, "loss": 32.5312, "step": 5542 }, { "epoch": 0.889477273639026, "grad_norm": 12.133056640625, "learning_rate": 3.075044148560052e-06, "loss": 32.6094, "step": 5543 }, { "epoch": 0.8896377422072451, "grad_norm": 12.136388778686523, "learning_rate": 3.066204001699674e-06, "loss": 32.5938, "step": 5544 }, { "epoch": 0.8897982107754644, "grad_norm": 12.405306816101074, "learning_rate": 3.057376178019089e-06, "loss": 32.6562, "step": 5545 }, { "epoch": 0.8899586793436836, "grad_norm": 12.026244163513184, "learning_rate": 3.048560679836182e-06, "loss": 32.7188, "step": 5546 }, { "epoch": 0.8901191479119027, "grad_norm": 12.160698890686035, "learning_rate": 3.0397575094655962e-06, "loss": 32.8906, "step": 5547 }, { "epoch": 0.890279616480122, "grad_norm": 12.204390525817871, "learning_rate": 3.0309666692187323e-06, "loss": 32.6016, "step": 5548 }, { "epoch": 0.8904400850483412, "grad_norm": 12.02844524383545, "learning_rate": 3.0221881614037495e-06, "loss": 32.8984, "step": 5549 }, { "epoch": 0.8906005536165603, "grad_norm": 12.163005828857422, "learning_rate": 3.0134219883256045e-06, "loss": 32.9219, "step": 5550 }, { "epoch": 0.8907610221847796, "grad_norm": 12.021745681762695, "learning_rate": 3.0046681522859667e-06, "loss": 33.0312, "step": 5551 }, { "epoch": 0.8909214907529988, "grad_norm": 12.244038581848145, "learning_rate": 2.995926655583303e-06, "loss": 32.8828, "step": 5552 }, { "epoch": 0.8910819593212179, "grad_norm": 12.211442947387695, "learning_rate": 2.9871975005128172e-06, "loss": 32.8281, "step": 5553 }, { "epoch": 0.8912424278894372, "grad_norm": 12.345383644104004, "learning_rate": 2.9784806893664863e-06, "loss": 32.7812, "step": 5554 }, { "epoch": 0.8914028964576564, "grad_norm": 12.268365859985352, "learning_rate": 2.969776224433024e-06, "loss": 32.7031, "step": 5555 }, { "epoch": 0.8915633650258755, "grad_norm": 12.527166366577148, "learning_rate": 2.961084107997941e-06, "loss": 32.7422, "step": 5556 }, { "epoch": 0.8917238335940948, "grad_norm": 12.264283180236816, "learning_rate": 2.952404342343479e-06, "loss": 32.6797, "step": 5557 }, { "epoch": 0.891884302162314, "grad_norm": 12.013495445251465, "learning_rate": 2.9437369297486415e-06, "loss": 32.9453, "step": 5558 }, { "epoch": 0.8920447707305331, "grad_norm": 12.44918155670166, "learning_rate": 2.9350818724891814e-06, "loss": 32.6172, "step": 5559 }, { "epoch": 0.8922052392987524, "grad_norm": 12.073376655578613, "learning_rate": 2.9264391728376252e-06, "loss": 32.7305, "step": 5560 }, { "epoch": 0.8923657078669716, "grad_norm": 12.198100090026855, "learning_rate": 2.9178088330632403e-06, "loss": 32.6719, "step": 5561 }, { "epoch": 0.8925261764351907, "grad_norm": 12.281156539916992, "learning_rate": 2.9091908554320478e-06, "loss": 32.7812, "step": 5562 }, { "epoch": 0.89268664500341, "grad_norm": 11.951423645019531, "learning_rate": 2.900585242206838e-06, "loss": 32.7344, "step": 5563 }, { "epoch": 0.8928471135716292, "grad_norm": 12.26102352142334, "learning_rate": 2.891991995647142e-06, "loss": 32.7031, "step": 5564 }, { "epoch": 0.8930075821398483, "grad_norm": 12.135679244995117, "learning_rate": 2.8834111180092548e-06, "loss": 32.6172, "step": 5565 }, { "epoch": 0.8931680507080676, "grad_norm": 11.946219444274902, "learning_rate": 2.8748426115462025e-06, "loss": 32.7031, "step": 5566 }, { "epoch": 0.8933285192762868, "grad_norm": 12.067241668701172, "learning_rate": 2.866286478507779e-06, "loss": 32.7188, "step": 5567 }, { "epoch": 0.8934889878445059, "grad_norm": 12.134489059448242, "learning_rate": 2.8577427211405215e-06, "loss": 32.6328, "step": 5568 }, { "epoch": 0.8936494564127252, "grad_norm": 12.004258155822754, "learning_rate": 2.849211341687741e-06, "loss": 32.6172, "step": 5569 }, { "epoch": 0.8938099249809444, "grad_norm": 11.944486618041992, "learning_rate": 2.840692342389467e-06, "loss": 32.7344, "step": 5570 }, { "epoch": 0.8939703935491635, "grad_norm": 12.466506958007812, "learning_rate": 2.8321857254824945e-06, "loss": 32.6875, "step": 5571 }, { "epoch": 0.8941308621173828, "grad_norm": 12.004838943481445, "learning_rate": 2.82369149320037e-06, "loss": 32.6562, "step": 5572 }, { "epoch": 0.894291330685602, "grad_norm": 12.130234718322754, "learning_rate": 2.815209647773376e-06, "loss": 32.5781, "step": 5573 }, { "epoch": 0.8944517992538211, "grad_norm": 12.195939064025879, "learning_rate": 2.806740191428553e-06, "loss": 32.6406, "step": 5574 }, { "epoch": 0.8946122678220404, "grad_norm": 12.132756233215332, "learning_rate": 2.798283126389684e-06, "loss": 32.625, "step": 5575 }, { "epoch": 0.8947727363902596, "grad_norm": 12.537755012512207, "learning_rate": 2.7898384548772914e-06, "loss": 32.6328, "step": 5576 }, { "epoch": 0.8949332049584787, "grad_norm": 12.067971229553223, "learning_rate": 2.7814061791086754e-06, "loss": 32.6016, "step": 5577 }, { "epoch": 0.895093673526698, "grad_norm": 12.148703575134277, "learning_rate": 2.7729863012978364e-06, "loss": 32.6875, "step": 5578 }, { "epoch": 0.8952541420949172, "grad_norm": 12.135470390319824, "learning_rate": 2.7645788236555448e-06, "loss": 32.6172, "step": 5579 }, { "epoch": 0.8954146106631363, "grad_norm": 12.133649826049805, "learning_rate": 2.7561837483893183e-06, "loss": 32.6172, "step": 5580 }, { "epoch": 0.8955750792313556, "grad_norm": 12.204904556274414, "learning_rate": 2.747801077703399e-06, "loss": 32.5781, "step": 5581 }, { "epoch": 0.8957355477995748, "grad_norm": 12.395099639892578, "learning_rate": 2.7394308137987934e-06, "loss": 32.5625, "step": 5582 }, { "epoch": 0.8958960163677939, "grad_norm": 12.772405624389648, "learning_rate": 2.731072958873243e-06, "loss": 32.5156, "step": 5583 }, { "epoch": 0.8960564849360132, "grad_norm": 12.007195472717285, "learning_rate": 2.7227275151212263e-06, "loss": 32.7109, "step": 5584 }, { "epoch": 0.8962169535042324, "grad_norm": 12.004260063171387, "learning_rate": 2.7143944847339677e-06, "loss": 32.6094, "step": 5585 }, { "epoch": 0.8963774220724515, "grad_norm": 12.582220077514648, "learning_rate": 2.706073869899428e-06, "loss": 32.5078, "step": 5586 }, { "epoch": 0.8965378906406708, "grad_norm": 12.270964622497559, "learning_rate": 2.6977656728023103e-06, "loss": 32.6406, "step": 5587 }, { "epoch": 0.89669835920889, "grad_norm": 12.071606636047363, "learning_rate": 2.689469895624064e-06, "loss": 32.6562, "step": 5588 }, { "epoch": 0.8968588277771091, "grad_norm": 11.949420928955078, "learning_rate": 2.6811865405428627e-06, "loss": 32.7148, "step": 5589 }, { "epoch": 0.8970192963453284, "grad_norm": 12.703448295593262, "learning_rate": 2.6729156097336284e-06, "loss": 32.5547, "step": 5590 }, { "epoch": 0.8971797649135476, "grad_norm": 12.3936128616333, "learning_rate": 2.6646571053680238e-06, "loss": 32.5234, "step": 5591 }, { "epoch": 0.8973402334817667, "grad_norm": 12.069656372070312, "learning_rate": 2.6564110296144417e-06, "loss": 32.6484, "step": 5592 }, { "epoch": 0.897500702049986, "grad_norm": 12.19926929473877, "learning_rate": 2.6481773846380174e-06, "loss": 32.5781, "step": 5593 }, { "epoch": 0.8976611706182052, "grad_norm": 12.199999809265137, "learning_rate": 2.6399561726006107e-06, "loss": 32.6328, "step": 5594 }, { "epoch": 0.8978216391864243, "grad_norm": 12.197477340698242, "learning_rate": 2.6317473956608384e-06, "loss": 32.5781, "step": 5595 }, { "epoch": 0.8979821077546436, "grad_norm": 12.145110130310059, "learning_rate": 2.6235510559740216e-06, "loss": 32.6562, "step": 5596 }, { "epoch": 0.8981425763228628, "grad_norm": 11.882786750793457, "learning_rate": 2.61536715569225e-06, "loss": 32.7344, "step": 5597 }, { "epoch": 0.8983030448910819, "grad_norm": 12.08181381225586, "learning_rate": 2.607195696964326e-06, "loss": 32.7031, "step": 5598 }, { "epoch": 0.8984635134593012, "grad_norm": 12.27312183380127, "learning_rate": 2.59903668193579e-06, "loss": 32.6641, "step": 5599 }, { "epoch": 0.8986239820275204, "grad_norm": 12.168632507324219, "learning_rate": 2.590890112748917e-06, "loss": 33.0391, "step": 5600 }, { "epoch": 0.8987844505957395, "grad_norm": 12.53368854522705, "learning_rate": 2.5827559915427124e-06, "loss": 32.8906, "step": 5601 }, { "epoch": 0.8989449191639588, "grad_norm": 12.074212074279785, "learning_rate": 2.5746343204528957e-06, "loss": 32.8828, "step": 5602 }, { "epoch": 0.899105387732178, "grad_norm": 12.156689643859863, "learning_rate": 2.5665251016119608e-06, "loss": 32.8203, "step": 5603 }, { "epoch": 0.8992658563003971, "grad_norm": 12.410901069641113, "learning_rate": 2.558428337149088e-06, "loss": 32.8359, "step": 5604 }, { "epoch": 0.8994263248686164, "grad_norm": 12.143449783325195, "learning_rate": 2.550344029190216e-06, "loss": 32.7422, "step": 5605 }, { "epoch": 0.8995867934368356, "grad_norm": 12.22761344909668, "learning_rate": 2.5422721798579963e-06, "loss": 32.8398, "step": 5606 }, { "epoch": 0.8997472620050547, "grad_norm": 12.070807456970215, "learning_rate": 2.5342127912718228e-06, "loss": 32.7344, "step": 5607 }, { "epoch": 0.899907730573274, "grad_norm": 12.20047664642334, "learning_rate": 2.526165865547797e-06, "loss": 32.6953, "step": 5608 }, { "epoch": 0.9000681991414932, "grad_norm": 12.074760437011719, "learning_rate": 2.5181314047987682e-06, "loss": 32.7422, "step": 5609 }, { "epoch": 0.9002286677097123, "grad_norm": 12.532421112060547, "learning_rate": 2.510109411134309e-06, "loss": 32.7812, "step": 5610 }, { "epoch": 0.9003891362779316, "grad_norm": 12.26502799987793, "learning_rate": 2.502099886660719e-06, "loss": 32.6875, "step": 5611 }, { "epoch": 0.9005496048461508, "grad_norm": 11.94317626953125, "learning_rate": 2.49410283348101e-06, "loss": 32.7188, "step": 5612 }, { "epoch": 0.9007100734143699, "grad_norm": 12.509424209594727, "learning_rate": 2.4861182536949467e-06, "loss": 32.5703, "step": 5613 }, { "epoch": 0.9008705419825892, "grad_norm": 12.070950508117676, "learning_rate": 2.4781461493989746e-06, "loss": 32.6406, "step": 5614 }, { "epoch": 0.9010310105508084, "grad_norm": 12.004932403564453, "learning_rate": 2.4701865226863018e-06, "loss": 32.6484, "step": 5615 }, { "epoch": 0.9011914791190275, "grad_norm": 12.069092750549316, "learning_rate": 2.4622393756468576e-06, "loss": 32.7031, "step": 5616 }, { "epoch": 0.9013519476872468, "grad_norm": 12.78956413269043, "learning_rate": 2.4543047103672835e-06, "loss": 32.6016, "step": 5617 }, { "epoch": 0.901512416255466, "grad_norm": 12.271025657653809, "learning_rate": 2.4463825289309406e-06, "loss": 32.7266, "step": 5618 }, { "epoch": 0.9016728848236851, "grad_norm": 11.941154479980469, "learning_rate": 2.4384728334179206e-06, "loss": 32.625, "step": 5619 }, { "epoch": 0.9018333533919044, "grad_norm": 11.954011917114258, "learning_rate": 2.430575625905035e-06, "loss": 32.7109, "step": 5620 }, { "epoch": 0.9019938219601236, "grad_norm": 12.071500778198242, "learning_rate": 2.4226909084658132e-06, "loss": 32.7109, "step": 5621 }, { "epoch": 0.9021542905283427, "grad_norm": 12.07249641418457, "learning_rate": 2.414818683170511e-06, "loss": 32.75, "step": 5622 }, { "epoch": 0.902314759096562, "grad_norm": 12.009892463684082, "learning_rate": 2.4069589520860913e-06, "loss": 32.6562, "step": 5623 }, { "epoch": 0.9024752276647812, "grad_norm": 12.266616821289062, "learning_rate": 2.399111717276259e-06, "loss": 32.6953, "step": 5624 }, { "epoch": 0.9026356962330003, "grad_norm": 12.004863739013672, "learning_rate": 2.391276980801416e-06, "loss": 32.6484, "step": 5625 }, { "epoch": 0.9027961648012196, "grad_norm": 12.137327194213867, "learning_rate": 2.3834547447186938e-06, "loss": 32.6094, "step": 5626 }, { "epoch": 0.9029566333694388, "grad_norm": 12.275786399841309, "learning_rate": 2.3756450110819395e-06, "loss": 32.7188, "step": 5627 }, { "epoch": 0.9031171019376579, "grad_norm": 12.0703763961792, "learning_rate": 2.367847781941712e-06, "loss": 32.6172, "step": 5628 }, { "epoch": 0.9032775705058772, "grad_norm": 12.38332748413086, "learning_rate": 2.360063059345291e-06, "loss": 32.5625, "step": 5629 }, { "epoch": 0.9034380390740964, "grad_norm": 11.94372844696045, "learning_rate": 2.35229084533668e-06, "loss": 32.6016, "step": 5630 }, { "epoch": 0.9035985076423155, "grad_norm": 12.466002464294434, "learning_rate": 2.344531141956596e-06, "loss": 32.543, "step": 5631 }, { "epoch": 0.9037589762105348, "grad_norm": 12.132869720458984, "learning_rate": 2.3367839512424604e-06, "loss": 32.625, "step": 5632 }, { "epoch": 0.903919444778754, "grad_norm": 11.945507049560547, "learning_rate": 2.329049275228412e-06, "loss": 32.6875, "step": 5633 }, { "epoch": 0.9040799133469731, "grad_norm": 12.196220397949219, "learning_rate": 2.3213271159453154e-06, "loss": 32.6562, "step": 5634 }, { "epoch": 0.9042403819151924, "grad_norm": 12.008248329162598, "learning_rate": 2.313617475420732e-06, "loss": 32.625, "step": 5635 }, { "epoch": 0.9044008504834116, "grad_norm": 12.011863708496094, "learning_rate": 2.3059203556789533e-06, "loss": 32.6797, "step": 5636 }, { "epoch": 0.9045613190516307, "grad_norm": 12.20702838897705, "learning_rate": 2.2982357587409685e-06, "loss": 32.7344, "step": 5637 }, { "epoch": 0.90472178761985, "grad_norm": 12.20164966583252, "learning_rate": 2.290563686624492e-06, "loss": 32.5859, "step": 5638 }, { "epoch": 0.9048822561880692, "grad_norm": 12.385222434997559, "learning_rate": 2.2829041413439334e-06, "loss": 32.5859, "step": 5639 }, { "epoch": 0.9050427247562883, "grad_norm": 12.199316024780273, "learning_rate": 2.2752571249104236e-06, "loss": 32.6328, "step": 5640 }, { "epoch": 0.9052031933245076, "grad_norm": 12.405594825744629, "learning_rate": 2.267622639331812e-06, "loss": 32.6016, "step": 5641 }, { "epoch": 0.9053636618927268, "grad_norm": 12.260981559753418, "learning_rate": 2.26000068661264e-06, "loss": 32.5469, "step": 5642 }, { "epoch": 0.9055241304609459, "grad_norm": 11.945152282714844, "learning_rate": 2.2523912687541616e-06, "loss": 32.6797, "step": 5643 }, { "epoch": 0.9056845990291652, "grad_norm": 11.943219184875488, "learning_rate": 2.244794387754362e-06, "loss": 32.6875, "step": 5644 }, { "epoch": 0.9058450675973844, "grad_norm": 12.643936157226562, "learning_rate": 2.237210045607907e-06, "loss": 32.6562, "step": 5645 }, { "epoch": 0.9060055361656035, "grad_norm": 12.326133728027344, "learning_rate": 2.2296382443061868e-06, "loss": 32.6641, "step": 5646 }, { "epoch": 0.9061660047338228, "grad_norm": 12.392936706542969, "learning_rate": 2.2220789858372838e-06, "loss": 32.7109, "step": 5647 }, { "epoch": 0.906326473302042, "grad_norm": 12.32333755493164, "learning_rate": 2.214532272186004e-06, "loss": 32.7031, "step": 5648 }, { "epoch": 0.9064869418702611, "grad_norm": 12.207945823669434, "learning_rate": 2.206998105333835e-06, "loss": 32.6602, "step": 5649 }, { "epoch": 0.9066474104384804, "grad_norm": 12.032108306884766, "learning_rate": 2.1994764872590055e-06, "loss": 33.0391, "step": 5650 }, { "epoch": 0.9068078790066996, "grad_norm": 12.332868576049805, "learning_rate": 2.1919674199364305e-06, "loss": 32.8203, "step": 5651 }, { "epoch": 0.9069683475749187, "grad_norm": 12.197039604187012, "learning_rate": 2.1844709053377157e-06, "loss": 32.6953, "step": 5652 }, { "epoch": 0.907128816143138, "grad_norm": 12.34123706817627, "learning_rate": 2.176986945431192e-06, "loss": 32.7578, "step": 5653 }, { "epoch": 0.9072892847113572, "grad_norm": 12.139888763427734, "learning_rate": 2.169515542181888e-06, "loss": 32.7109, "step": 5654 }, { "epoch": 0.9074497532795763, "grad_norm": 11.942821502685547, "learning_rate": 2.1620566975515344e-06, "loss": 32.7969, "step": 5655 }, { "epoch": 0.9076102218477956, "grad_norm": 12.144193649291992, "learning_rate": 2.154610413498548e-06, "loss": 32.7812, "step": 5656 }, { "epoch": 0.9077706904160148, "grad_norm": 12.271440505981445, "learning_rate": 2.147176691978092e-06, "loss": 32.7344, "step": 5657 }, { "epoch": 0.9079311589842339, "grad_norm": 12.06812572479248, "learning_rate": 2.139755534941984e-06, "loss": 32.625, "step": 5658 }, { "epoch": 0.9080916275524532, "grad_norm": 12.331586837768555, "learning_rate": 2.1323469443387644e-06, "loss": 32.7578, "step": 5659 }, { "epoch": 0.9082520961206724, "grad_norm": 12.203194618225098, "learning_rate": 2.1249509221136833e-06, "loss": 32.7383, "step": 5660 }, { "epoch": 0.9084125646888915, "grad_norm": 12.228944778442383, "learning_rate": 2.1175674702086532e-06, "loss": 32.75, "step": 5661 }, { "epoch": 0.9085730332571108, "grad_norm": 12.15214729309082, "learning_rate": 2.1101965905623243e-06, "loss": 32.7188, "step": 5662 }, { "epoch": 0.90873350182533, "grad_norm": 12.011547088623047, "learning_rate": 2.1028382851100426e-06, "loss": 32.6484, "step": 5663 }, { "epoch": 0.9088939703935491, "grad_norm": 12.274499893188477, "learning_rate": 2.095492555783829e-06, "loss": 32.6953, "step": 5664 }, { "epoch": 0.9090544389617684, "grad_norm": 12.338850975036621, "learning_rate": 2.0881594045124243e-06, "loss": 32.7109, "step": 5665 }, { "epoch": 0.9092149075299876, "grad_norm": 12.322059631347656, "learning_rate": 2.080838833221249e-06, "loss": 32.7188, "step": 5666 }, { "epoch": 0.9093753760982067, "grad_norm": 12.202940940856934, "learning_rate": 2.073530843832444e-06, "loss": 32.6953, "step": 5667 }, { "epoch": 0.909535844666426, "grad_norm": 12.384197235107422, "learning_rate": 2.0662354382648242e-06, "loss": 32.5469, "step": 5668 }, { "epoch": 0.9096963132346452, "grad_norm": 12.070460319519043, "learning_rate": 2.058952618433907e-06, "loss": 32.7109, "step": 5669 }, { "epoch": 0.9098567818028643, "grad_norm": 12.071595191955566, "learning_rate": 2.0516823862519017e-06, "loss": 32.6641, "step": 5670 }, { "epoch": 0.9100172503710836, "grad_norm": 12.201979637145996, "learning_rate": 2.0444247436277432e-06, "loss": 32.6484, "step": 5671 }, { "epoch": 0.9101777189393028, "grad_norm": 12.265835762023926, "learning_rate": 2.037179692467012e-06, "loss": 32.625, "step": 5672 }, { "epoch": 0.9103381875075219, "grad_norm": 11.941426277160645, "learning_rate": 2.0299472346720138e-06, "loss": 32.625, "step": 5673 }, { "epoch": 0.9104986560757412, "grad_norm": 12.016433715820312, "learning_rate": 2.022727372141736e-06, "loss": 32.8047, "step": 5674 }, { "epoch": 0.9106591246439604, "grad_norm": 12.14145278930664, "learning_rate": 2.0155201067718676e-06, "loss": 32.6328, "step": 5675 }, { "epoch": 0.9108195932121795, "grad_norm": 12.069326400756836, "learning_rate": 2.008325440454778e-06, "loss": 32.6328, "step": 5676 }, { "epoch": 0.9109800617803988, "grad_norm": 12.33401870727539, "learning_rate": 2.0011433750795505e-06, "loss": 32.6016, "step": 5677 }, { "epoch": 0.911140530348618, "grad_norm": 12.13537883758545, "learning_rate": 1.993973912531932e-06, "loss": 32.5938, "step": 5678 }, { "epoch": 0.9113009989168371, "grad_norm": 12.342120170593262, "learning_rate": 1.9868170546943845e-06, "loss": 32.6172, "step": 5679 }, { "epoch": 0.9114614674850564, "grad_norm": 12.544355392456055, "learning_rate": 1.9796728034460365e-06, "loss": 32.5781, "step": 5680 }, { "epoch": 0.9116219360532756, "grad_norm": 12.453733444213867, "learning_rate": 1.972541160662733e-06, "loss": 32.5703, "step": 5681 }, { "epoch": 0.9117824046214947, "grad_norm": 12.204298973083496, "learning_rate": 1.9654221282169815e-06, "loss": 32.6875, "step": 5682 }, { "epoch": 0.911942873189714, "grad_norm": 12.446730613708496, "learning_rate": 1.9583157079780036e-06, "loss": 32.5547, "step": 5683 }, { "epoch": 0.9121033417579332, "grad_norm": 12.275165557861328, "learning_rate": 1.9512219018116894e-06, "loss": 32.6406, "step": 5684 }, { "epoch": 0.9122638103261523, "grad_norm": 12.01745319366455, "learning_rate": 1.944140711580633e-06, "loss": 32.9062, "step": 5685 }, { "epoch": 0.9124242788943716, "grad_norm": 12.086050033569336, "learning_rate": 1.9370721391441026e-06, "loss": 32.6641, "step": 5686 }, { "epoch": 0.9125847474625908, "grad_norm": 12.01107406616211, "learning_rate": 1.930016186358058e-06, "loss": 32.7422, "step": 5687 }, { "epoch": 0.9127452160308099, "grad_norm": 11.942320823669434, "learning_rate": 1.9229728550751504e-06, "loss": 32.6328, "step": 5688 }, { "epoch": 0.9129056845990292, "grad_norm": 12.068849563598633, "learning_rate": 1.9159421471447116e-06, "loss": 32.6328, "step": 5689 }, { "epoch": 0.9130661531672484, "grad_norm": 12.261458396911621, "learning_rate": 1.9089240644127538e-06, "loss": 32.5781, "step": 5690 }, { "epoch": 0.9132266217354675, "grad_norm": 12.145110130310059, "learning_rate": 1.9019186087219977e-06, "loss": 32.6641, "step": 5691 }, { "epoch": 0.9133870903036868, "grad_norm": 12.00780963897705, "learning_rate": 1.8949257819118215e-06, "loss": 32.6406, "step": 5692 }, { "epoch": 0.913547558871906, "grad_norm": 12.207886695861816, "learning_rate": 1.887945585818296e-06, "loss": 32.6797, "step": 5693 }, { "epoch": 0.9137080274401251, "grad_norm": 12.27255916595459, "learning_rate": 1.8809780222741824e-06, "loss": 32.5859, "step": 5694 }, { "epoch": 0.9138684960083444, "grad_norm": 12.334278106689453, "learning_rate": 1.8740230931089231e-06, "loss": 32.7188, "step": 5695 }, { "epoch": 0.9140289645765636, "grad_norm": 12.463826179504395, "learning_rate": 1.8670808001486184e-06, "loss": 32.6914, "step": 5696 }, { "epoch": 0.9141894331447827, "grad_norm": 11.951067924499512, "learning_rate": 1.8601511452160992e-06, "loss": 32.7188, "step": 5697 }, { "epoch": 0.914349901713002, "grad_norm": 12.614909172058105, "learning_rate": 1.8532341301308431e-06, "loss": 32.7344, "step": 5698 }, { "epoch": 0.9145103702812212, "grad_norm": 12.77069091796875, "learning_rate": 1.8463297567090144e-06, "loss": 32.6172, "step": 5699 }, { "epoch": 0.9146708388494403, "grad_norm": 12.295363426208496, "learning_rate": 1.8394380267634626e-06, "loss": 32.875, "step": 5700 }, { "epoch": 0.9148313074176596, "grad_norm": 12.157917022705078, "learning_rate": 1.832558942103718e-06, "loss": 32.8906, "step": 5701 }, { "epoch": 0.9149917759858788, "grad_norm": 12.144659042358398, "learning_rate": 1.8256925045359807e-06, "loss": 32.8438, "step": 5702 }, { "epoch": 0.9151522445540979, "grad_norm": 12.083691596984863, "learning_rate": 1.8188387158631471e-06, "loss": 32.7891, "step": 5703 }, { "epoch": 0.9153127131223172, "grad_norm": 12.355478286743164, "learning_rate": 1.8119975778847776e-06, "loss": 32.7812, "step": 5704 }, { "epoch": 0.9154731816905364, "grad_norm": 12.133499145507812, "learning_rate": 1.8051690923971187e-06, "loss": 32.6406, "step": 5705 }, { "epoch": 0.9156336502587555, "grad_norm": 12.271224021911621, "learning_rate": 1.798353261193092e-06, "loss": 32.7344, "step": 5706 }, { "epoch": 0.9157941188269748, "grad_norm": 12.14118480682373, "learning_rate": 1.7915500860623102e-06, "loss": 32.7422, "step": 5707 }, { "epoch": 0.915954587395194, "grad_norm": 12.524659156799316, "learning_rate": 1.7847595687910279e-06, "loss": 32.7578, "step": 5708 }, { "epoch": 0.9161150559634131, "grad_norm": 12.071660041809082, "learning_rate": 1.7779817111622132e-06, "loss": 32.6719, "step": 5709 }, { "epoch": 0.9162755245316324, "grad_norm": 12.324116706848145, "learning_rate": 1.7712165149554816e-06, "loss": 32.7266, "step": 5710 }, { "epoch": 0.9164359930998516, "grad_norm": 12.324084281921387, "learning_rate": 1.7644639819471509e-06, "loss": 32.6641, "step": 5711 }, { "epoch": 0.9165964616680707, "grad_norm": 12.46398639678955, "learning_rate": 1.757724113910203e-06, "loss": 32.7656, "step": 5712 }, { "epoch": 0.91675693023629, "grad_norm": 12.19570255279541, "learning_rate": 1.7509969126142834e-06, "loss": 32.6484, "step": 5713 }, { "epoch": 0.9169173988045092, "grad_norm": 11.90107536315918, "learning_rate": 1.7442823798257236e-06, "loss": 32.8203, "step": 5714 }, { "epoch": 0.9170778673727283, "grad_norm": 12.575486183166504, "learning_rate": 1.7375805173075243e-06, "loss": 32.625, "step": 5715 }, { "epoch": 0.9172383359409476, "grad_norm": 12.011763572692871, "learning_rate": 1.7308913268193673e-06, "loss": 32.7148, "step": 5716 }, { "epoch": 0.9173988045091668, "grad_norm": 12.199719429016113, "learning_rate": 1.7242148101175916e-06, "loss": 32.6172, "step": 5717 }, { "epoch": 0.9175592730773859, "grad_norm": 12.27056884765625, "learning_rate": 1.7175509689552284e-06, "loss": 32.6172, "step": 5718 }, { "epoch": 0.9177197416456052, "grad_norm": 12.070796012878418, "learning_rate": 1.7108998050819725e-06, "loss": 32.5938, "step": 5719 }, { "epoch": 0.9178802102138244, "grad_norm": 11.947136878967285, "learning_rate": 1.7042613202441714e-06, "loss": 32.7031, "step": 5720 }, { "epoch": 0.9180406787820435, "grad_norm": 12.193659782409668, "learning_rate": 1.69763551618487e-06, "loss": 32.6953, "step": 5721 }, { "epoch": 0.9182011473502628, "grad_norm": 12.334674835205078, "learning_rate": 1.6910223946437765e-06, "loss": 32.6719, "step": 5722 }, { "epoch": 0.918361615918482, "grad_norm": 12.400934219360352, "learning_rate": 1.6844219573572517e-06, "loss": 32.5781, "step": 5723 }, { "epoch": 0.9185220844867011, "grad_norm": 12.006426811218262, "learning_rate": 1.6778342060583596e-06, "loss": 32.6172, "step": 5724 }, { "epoch": 0.9186825530549204, "grad_norm": 12.137330055236816, "learning_rate": 1.6712591424768054e-06, "loss": 32.8047, "step": 5725 }, { "epoch": 0.9188430216231396, "grad_norm": 12.20490837097168, "learning_rate": 1.6646967683389748e-06, "loss": 32.6562, "step": 5726 }, { "epoch": 0.9190034901913587, "grad_norm": 12.01102352142334, "learning_rate": 1.6581470853679116e-06, "loss": 32.6875, "step": 5727 }, { "epoch": 0.919163958759578, "grad_norm": 12.073590278625488, "learning_rate": 1.6516100952833402e-06, "loss": 32.6641, "step": 5728 }, { "epoch": 0.9193244273277972, "grad_norm": 12.263379096984863, "learning_rate": 1.6450857998016434e-06, "loss": 32.6094, "step": 5729 }, { "epoch": 0.9194848958960163, "grad_norm": 12.073848724365234, "learning_rate": 1.6385742006358783e-06, "loss": 32.6641, "step": 5730 }, { "epoch": 0.9196453644642356, "grad_norm": 12.390702247619629, "learning_rate": 1.632075299495761e-06, "loss": 32.6172, "step": 5731 }, { "epoch": 0.9198058330324548, "grad_norm": 12.015270233154297, "learning_rate": 1.6255890980876764e-06, "loss": 32.7031, "step": 5732 }, { "epoch": 0.9199663016006739, "grad_norm": 12.131973266601562, "learning_rate": 1.6191155981146789e-06, "loss": 32.6016, "step": 5733 }, { "epoch": 0.9201267701688932, "grad_norm": 12.075151443481445, "learning_rate": 1.6126548012764752e-06, "loss": 32.7422, "step": 5734 }, { "epoch": 0.9202872387371124, "grad_norm": 12.073570251464844, "learning_rate": 1.6062067092694533e-06, "loss": 32.6172, "step": 5735 }, { "epoch": 0.9204477073053315, "grad_norm": 12.335219383239746, "learning_rate": 1.5997713237866585e-06, "loss": 32.5703, "step": 5736 }, { "epoch": 0.9206081758735508, "grad_norm": 12.262679100036621, "learning_rate": 1.5933486465177837e-06, "loss": 32.5469, "step": 5737 }, { "epoch": 0.92076864444177, "grad_norm": 12.194812774658203, "learning_rate": 1.586938679149219e-06, "loss": 32.7109, "step": 5738 }, { "epoch": 0.9209291130099891, "grad_norm": 12.52621078491211, "learning_rate": 1.5805414233639959e-06, "loss": 32.625, "step": 5739 }, { "epoch": 0.9210895815782084, "grad_norm": 12.072819709777832, "learning_rate": 1.574156880841804e-06, "loss": 32.6172, "step": 5740 }, { "epoch": 0.9212500501464276, "grad_norm": 12.013450622558594, "learning_rate": 1.567785053259002e-06, "loss": 32.7422, "step": 5741 }, { "epoch": 0.9214105187146467, "grad_norm": 12.26632308959961, "learning_rate": 1.5614259422886191e-06, "loss": 32.6641, "step": 5742 }, { "epoch": 0.921570987282866, "grad_norm": 12.269989967346191, "learning_rate": 1.555079549600319e-06, "loss": 32.6406, "step": 5743 }, { "epoch": 0.9217314558510852, "grad_norm": 12.261603355407715, "learning_rate": 1.5487458768604579e-06, "loss": 32.668, "step": 5744 }, { "epoch": 0.9218919244193043, "grad_norm": 12.521581649780273, "learning_rate": 1.5424249257320333e-06, "loss": 32.5859, "step": 5745 }, { "epoch": 0.9220523929875236, "grad_norm": 12.291789054870605, "learning_rate": 1.5361166978747122e-06, "loss": 32.7031, "step": 5746 }, { "epoch": 0.9222128615557428, "grad_norm": 12.40322208404541, "learning_rate": 1.5298211949448027e-06, "loss": 32.7344, "step": 5747 }, { "epoch": 0.9223733301239619, "grad_norm": 12.155287742614746, "learning_rate": 1.5235384185952938e-06, "loss": 32.6953, "step": 5748 }, { "epoch": 0.9225337986921812, "grad_norm": 12.337743759155273, "learning_rate": 1.5172683704758272e-06, "loss": 32.7422, "step": 5749 }, { "epoch": 0.9226942672604004, "grad_norm": 12.148116111755371, "learning_rate": 1.511011052232686e-06, "loss": 32.7188, "step": 5750 }, { "epoch": 0.9228547358286195, "grad_norm": 12.029711723327637, "learning_rate": 1.5047664655088334e-06, "loss": 33.1094, "step": 5751 }, { "epoch": 0.9230152043968388, "grad_norm": 12.266265869140625, "learning_rate": 1.4985346119438804e-06, "loss": 32.75, "step": 5752 }, { "epoch": 0.923175672965058, "grad_norm": 12.46033000946045, "learning_rate": 1.4923154931740901e-06, "loss": 32.7891, "step": 5753 }, { "epoch": 0.9233361415332771, "grad_norm": 12.142770767211914, "learning_rate": 1.486109110832401e-06, "loss": 32.7656, "step": 5754 }, { "epoch": 0.9234966101014964, "grad_norm": 12.206141471862793, "learning_rate": 1.4799154665483706e-06, "loss": 32.75, "step": 5755 }, { "epoch": 0.9236570786697156, "grad_norm": 12.080303192138672, "learning_rate": 1.473734561948248e-06, "loss": 32.7344, "step": 5756 }, { "epoch": 0.9238175472379347, "grad_norm": 12.15700912475586, "learning_rate": 1.4675663986549125e-06, "loss": 32.7891, "step": 5757 }, { "epoch": 0.923978015806154, "grad_norm": 12.331537246704102, "learning_rate": 1.4614109782879248e-06, "loss": 32.7539, "step": 5758 }, { "epoch": 0.9241384843743732, "grad_norm": 12.08404541015625, "learning_rate": 1.4552683024634694e-06, "loss": 32.7969, "step": 5759 }, { "epoch": 0.9242989529425923, "grad_norm": 12.079365730285645, "learning_rate": 1.449138372794412e-06, "loss": 32.7578, "step": 5760 }, { "epoch": 0.9244594215108116, "grad_norm": 12.256782531738281, "learning_rate": 1.443021190890248e-06, "loss": 32.6094, "step": 5761 }, { "epoch": 0.9246198900790308, "grad_norm": 12.003899574279785, "learning_rate": 1.436916758357143e-06, "loss": 32.6641, "step": 5762 }, { "epoch": 0.92478035864725, "grad_norm": 12.272110939025879, "learning_rate": 1.4308250767979036e-06, "loss": 32.6172, "step": 5763 }, { "epoch": 0.9249408272154692, "grad_norm": 12.39626407623291, "learning_rate": 1.4247461478119894e-06, "loss": 32.6641, "step": 5764 }, { "epoch": 0.9251012957836884, "grad_norm": 12.068446159362793, "learning_rate": 1.4186799729955236e-06, "loss": 32.6797, "step": 5765 }, { "epoch": 0.9252617643519075, "grad_norm": 12.269655227661133, "learning_rate": 1.4126265539412765e-06, "loss": 32.7422, "step": 5766 }, { "epoch": 0.9254222329201268, "grad_norm": 12.144379615783691, "learning_rate": 1.4065858922386544e-06, "loss": 32.6719, "step": 5767 }, { "epoch": 0.925582701488346, "grad_norm": 12.270901679992676, "learning_rate": 1.4005579894737275e-06, "loss": 32.6797, "step": 5768 }, { "epoch": 0.9257431700565651, "grad_norm": 12.33306884765625, "learning_rate": 1.3945428472292133e-06, "loss": 32.7031, "step": 5769 }, { "epoch": 0.9259036386247844, "grad_norm": 12.19880485534668, "learning_rate": 1.388540467084476e-06, "loss": 32.6484, "step": 5770 }, { "epoch": 0.9260641071930036, "grad_norm": 12.147298812866211, "learning_rate": 1.3825508506155383e-06, "loss": 32.6641, "step": 5771 }, { "epoch": 0.9262245757612227, "grad_norm": 12.203832626342773, "learning_rate": 1.3765739993950643e-06, "loss": 32.6953, "step": 5772 }, { "epoch": 0.926385044329442, "grad_norm": 12.089390754699707, "learning_rate": 1.37060991499236e-06, "loss": 32.8203, "step": 5773 }, { "epoch": 0.9265455128976612, "grad_norm": 12.273941993713379, "learning_rate": 1.3646585989733895e-06, "loss": 32.6328, "step": 5774 }, { "epoch": 0.9267059814658803, "grad_norm": 12.271004676818848, "learning_rate": 1.3587200529007693e-06, "loss": 32.6328, "step": 5775 }, { "epoch": 0.9268664500340996, "grad_norm": 12.510207176208496, "learning_rate": 1.3527942783337411e-06, "loss": 32.6484, "step": 5776 }, { "epoch": 0.9270269186023188, "grad_norm": 12.388679504394531, "learning_rate": 1.3468812768282102e-06, "loss": 32.5938, "step": 5777 }, { "epoch": 0.927187387170538, "grad_norm": 12.205404281616211, "learning_rate": 1.3409810499367347e-06, "loss": 32.6719, "step": 5778 }, { "epoch": 0.9273478557387572, "grad_norm": 12.070989608764648, "learning_rate": 1.335093599208498e-06, "loss": 32.6953, "step": 5779 }, { "epoch": 0.9275083243069764, "grad_norm": 12.072861671447754, "learning_rate": 1.3292189261893406e-06, "loss": 32.6484, "step": 5780 }, { "epoch": 0.9276687928751955, "grad_norm": 12.070652961730957, "learning_rate": 1.3233570324217516e-06, "loss": 32.6484, "step": 5781 }, { "epoch": 0.9278292614434148, "grad_norm": 12.07417106628418, "learning_rate": 1.3175079194448548e-06, "loss": 32.6562, "step": 5782 }, { "epoch": 0.927989730011634, "grad_norm": 11.944246292114258, "learning_rate": 1.311671588794433e-06, "loss": 32.7188, "step": 5783 }, { "epoch": 0.9281501985798531, "grad_norm": 12.330978393554688, "learning_rate": 1.3058480420028884e-06, "loss": 32.6641, "step": 5784 }, { "epoch": 0.9283106671480724, "grad_norm": 12.13792610168457, "learning_rate": 1.3000372805992923e-06, "loss": 32.6484, "step": 5785 }, { "epoch": 0.9284711357162916, "grad_norm": 12.131269454956055, "learning_rate": 1.2942393061093461e-06, "loss": 32.6172, "step": 5786 }, { "epoch": 0.9286316042845107, "grad_norm": 12.194948196411133, "learning_rate": 1.288454120055399e-06, "loss": 32.6484, "step": 5787 }, { "epoch": 0.92879207285273, "grad_norm": 12.393329620361328, "learning_rate": 1.2826817239564358e-06, "loss": 32.6016, "step": 5788 }, { "epoch": 0.9289525414209492, "grad_norm": 12.470803260803223, "learning_rate": 1.2769221193280945e-06, "loss": 32.6562, "step": 5789 }, { "epoch": 0.9291130099891683, "grad_norm": 12.132285118103027, "learning_rate": 1.2711753076826206e-06, "loss": 32.625, "step": 5790 }, { "epoch": 0.9292734785573876, "grad_norm": 12.262391090393066, "learning_rate": 1.265441290528957e-06, "loss": 32.6797, "step": 5791 }, { "epoch": 0.9294339471256068, "grad_norm": 12.391533851623535, "learning_rate": 1.2597200693726441e-06, "loss": 32.625, "step": 5792 }, { "epoch": 0.929594415693826, "grad_norm": 12.203472137451172, "learning_rate": 1.2540116457158745e-06, "loss": 32.6484, "step": 5793 }, { "epoch": 0.9297548842620452, "grad_norm": 12.138772964477539, "learning_rate": 1.248316021057483e-06, "loss": 32.6406, "step": 5794 }, { "epoch": 0.9299153528302644, "grad_norm": 12.267977714538574, "learning_rate": 1.2426331968929395e-06, "loss": 32.5391, "step": 5795 }, { "epoch": 0.9300758213984835, "grad_norm": 12.258838653564453, "learning_rate": 1.2369631747143618e-06, "loss": 32.5938, "step": 5796 }, { "epoch": 0.9302362899667028, "grad_norm": 12.51925277709961, "learning_rate": 1.2313059560104979e-06, "loss": 32.6953, "step": 5797 }, { "epoch": 0.930396758534922, "grad_norm": 12.329487800598145, "learning_rate": 1.2256615422667262e-06, "loss": 32.6172, "step": 5798 }, { "epoch": 0.9305572271031411, "grad_norm": 12.151155471801758, "learning_rate": 1.220029934965089e-06, "loss": 32.7773, "step": 5799 }, { "epoch": 0.9307176956713604, "grad_norm": 12.425657272338867, "learning_rate": 1.214411135584248e-06, "loss": 32.9922, "step": 5800 }, { "epoch": 0.9308781642395796, "grad_norm": 12.217992782592773, "learning_rate": 1.2088051455994953e-06, "loss": 32.9609, "step": 5801 }, { "epoch": 0.9310386328077987, "grad_norm": 12.085238456726074, "learning_rate": 1.2032119664827757e-06, "loss": 32.8984, "step": 5802 }, { "epoch": 0.931199101376018, "grad_norm": 12.201797485351562, "learning_rate": 1.1976315997026534e-06, "loss": 32.6953, "step": 5803 }, { "epoch": 0.9313595699442372, "grad_norm": 12.394139289855957, "learning_rate": 1.1920640467243449e-06, "loss": 32.6719, "step": 5804 }, { "epoch": 0.9315200385124563, "grad_norm": 12.470996856689453, "learning_rate": 1.1865093090096979e-06, "loss": 32.8281, "step": 5805 }, { "epoch": 0.9316805070806756, "grad_norm": 11.949275970458984, "learning_rate": 1.1809673880171957e-06, "loss": 32.7969, "step": 5806 }, { "epoch": 0.9318409756488948, "grad_norm": 12.199719429016113, "learning_rate": 1.1754382852019463e-06, "loss": 32.6797, "step": 5807 }, { "epoch": 0.932001444217114, "grad_norm": 12.345144271850586, "learning_rate": 1.1699220020156997e-06, "loss": 32.7734, "step": 5808 }, { "epoch": 0.9321619127853332, "grad_norm": 11.951226234436035, "learning_rate": 1.164418539906842e-06, "loss": 32.7812, "step": 5809 }, { "epoch": 0.9323223813535524, "grad_norm": 12.257250785827637, "learning_rate": 1.1589279003203945e-06, "loss": 32.7422, "step": 5810 }, { "epoch": 0.9324828499217716, "grad_norm": 12.069300651550293, "learning_rate": 1.153450084697999e-06, "loss": 32.7812, "step": 5811 }, { "epoch": 0.9326433184899908, "grad_norm": 12.272465705871582, "learning_rate": 1.1479850944779436e-06, "loss": 32.7109, "step": 5812 }, { "epoch": 0.93280378705821, "grad_norm": 11.944366455078125, "learning_rate": 1.1425329310951527e-06, "loss": 32.7227, "step": 5813 }, { "epoch": 0.9329642556264292, "grad_norm": 12.009931564331055, "learning_rate": 1.1370935959811647e-06, "loss": 32.7656, "step": 5814 }, { "epoch": 0.9331247241946484, "grad_norm": 12.204251289367676, "learning_rate": 1.131667090564159e-06, "loss": 32.6641, "step": 5815 }, { "epoch": 0.9332851927628676, "grad_norm": 12.00698184967041, "learning_rate": 1.126253416268952e-06, "loss": 32.7266, "step": 5816 }, { "epoch": 0.9334456613310868, "grad_norm": 12.011619567871094, "learning_rate": 1.1208525745169728e-06, "loss": 32.6953, "step": 5817 }, { "epoch": 0.933606129899306, "grad_norm": 12.196700096130371, "learning_rate": 1.1154645667263152e-06, "loss": 32.6406, "step": 5818 }, { "epoch": 0.9337665984675252, "grad_norm": 12.073405265808105, "learning_rate": 1.1100893943116696e-06, "loss": 32.6875, "step": 5819 }, { "epoch": 0.9339270670357444, "grad_norm": 12.26512336730957, "learning_rate": 1.104727058684374e-06, "loss": 32.6328, "step": 5820 }, { "epoch": 0.9340875356039636, "grad_norm": 12.069141387939453, "learning_rate": 1.099377561252385e-06, "loss": 32.625, "step": 5821 }, { "epoch": 0.9342480041721828, "grad_norm": 12.205726623535156, "learning_rate": 1.0940409034203014e-06, "loss": 32.6484, "step": 5822 }, { "epoch": 0.934408472740402, "grad_norm": 12.26495361328125, "learning_rate": 1.0887170865893359e-06, "loss": 32.5391, "step": 5823 }, { "epoch": 0.9345689413086212, "grad_norm": 12.13497257232666, "learning_rate": 1.0834061121573425e-06, "loss": 32.6172, "step": 5824 }, { "epoch": 0.9347294098768404, "grad_norm": 11.941051483154297, "learning_rate": 1.0781079815187945e-06, "loss": 32.6172, "step": 5825 }, { "epoch": 0.9348898784450596, "grad_norm": 12.859663009643555, "learning_rate": 1.0728226960648013e-06, "loss": 32.6797, "step": 5826 }, { "epoch": 0.9350503470132788, "grad_norm": 12.459830284118652, "learning_rate": 1.067550257183092e-06, "loss": 32.6641, "step": 5827 }, { "epoch": 0.935210815581498, "grad_norm": 12.077292442321777, "learning_rate": 1.0622906662580257e-06, "loss": 32.6719, "step": 5828 }, { "epoch": 0.9353712841497172, "grad_norm": 12.577519416809082, "learning_rate": 1.057043924670592e-06, "loss": 32.5938, "step": 5829 }, { "epoch": 0.9355317527179364, "grad_norm": 12.25722599029541, "learning_rate": 1.051810033798395e-06, "loss": 32.6406, "step": 5830 }, { "epoch": 0.9356922212861556, "grad_norm": 12.39626407623291, "learning_rate": 1.0465889950156683e-06, "loss": 32.5898, "step": 5831 }, { "epoch": 0.9358526898543748, "grad_norm": 12.637357711791992, "learning_rate": 1.0413808096932876e-06, "loss": 32.5938, "step": 5832 }, { "epoch": 0.936013158422594, "grad_norm": 12.014605522155762, "learning_rate": 1.0361854791987314e-06, "loss": 32.7422, "step": 5833 }, { "epoch": 0.9361736269908132, "grad_norm": 12.011136054992676, "learning_rate": 1.0310030048961194e-06, "loss": 32.6641, "step": 5834 }, { "epoch": 0.9363340955590324, "grad_norm": 11.950687408447266, "learning_rate": 1.0258333881461856e-06, "loss": 32.7031, "step": 5835 }, { "epoch": 0.9364945641272516, "grad_norm": 12.688945770263672, "learning_rate": 1.0206766303062943e-06, "loss": 32.6406, "step": 5836 }, { "epoch": 0.9366550326954708, "grad_norm": 12.079919815063477, "learning_rate": 1.0155327327304232e-06, "loss": 32.75, "step": 5837 }, { "epoch": 0.93681550126369, "grad_norm": 12.083366394042969, "learning_rate": 1.0104016967691865e-06, "loss": 32.7266, "step": 5838 }, { "epoch": 0.9369759698319092, "grad_norm": 12.14232063293457, "learning_rate": 1.0052835237698121e-06, "loss": 32.6406, "step": 5839 }, { "epoch": 0.9371364384001284, "grad_norm": 12.349117279052734, "learning_rate": 1.0001782150761585e-06, "loss": 32.6797, "step": 5840 }, { "epoch": 0.9372969069683476, "grad_norm": 12.068077087402344, "learning_rate": 9.950857720286978e-07, "loss": 32.6719, "step": 5841 }, { "epoch": 0.9374573755365668, "grad_norm": 12.269177436828613, "learning_rate": 9.900061959645268e-07, "loss": 32.6094, "step": 5842 }, { "epoch": 0.937617844104786, "grad_norm": 12.068480491638184, "learning_rate": 9.849394882173734e-07, "loss": 32.6484, "step": 5843 }, { "epoch": 0.9377783126730052, "grad_norm": 12.071414947509766, "learning_rate": 9.79885650117568e-07, "loss": 32.6172, "step": 5844 }, { "epoch": 0.9379387812412244, "grad_norm": 12.07207202911377, "learning_rate": 9.748446829920766e-07, "loss": 32.6797, "step": 5845 }, { "epoch": 0.9380992498094436, "grad_norm": 12.086341857910156, "learning_rate": 9.69816588164485e-07, "loss": 32.7344, "step": 5846 }, { "epoch": 0.9382597183776628, "grad_norm": 12.146432876586914, "learning_rate": 9.648013669549927e-07, "loss": 32.7422, "step": 5847 }, { "epoch": 0.938420186945882, "grad_norm": 12.330382347106934, "learning_rate": 9.59799020680424e-07, "loss": 32.6719, "step": 5848 }, { "epoch": 0.9385806555141012, "grad_norm": 12.014341354370117, "learning_rate": 9.548095506542176e-07, "loss": 32.6406, "step": 5849 }, { "epoch": 0.9387411240823204, "grad_norm": 12.428731918334961, "learning_rate": 9.498329581864362e-07, "loss": 32.9844, "step": 5850 }, { "epoch": 0.9389015926505396, "grad_norm": 12.412810325622559, "learning_rate": 9.448692445837571e-07, "loss": 33.0625, "step": 5851 }, { "epoch": 0.9390620612187588, "grad_norm": 12.332201957702637, "learning_rate": 9.399184111494819e-07, "loss": 32.7031, "step": 5852 }, { "epoch": 0.939222529786978, "grad_norm": 12.534311294555664, "learning_rate": 9.349804591835265e-07, "loss": 32.7188, "step": 5853 }, { "epoch": 0.9393829983551972, "grad_norm": 12.082000732421875, "learning_rate": 9.30055389982426e-07, "loss": 32.7969, "step": 5854 }, { "epoch": 0.9395434669234164, "grad_norm": 12.479654312133789, "learning_rate": 9.251432048393294e-07, "loss": 32.8594, "step": 5855 }, { "epoch": 0.9397039354916356, "grad_norm": 12.076077461242676, "learning_rate": 9.202439050440049e-07, "loss": 32.75, "step": 5856 }, { "epoch": 0.9398644040598548, "grad_norm": 12.203107833862305, "learning_rate": 9.153574918828456e-07, "loss": 32.6719, "step": 5857 }, { "epoch": 0.940024872628074, "grad_norm": 12.399776458740234, "learning_rate": 9.104839666388421e-07, "loss": 32.7266, "step": 5858 }, { "epoch": 0.9401853411962932, "grad_norm": 12.197696685791016, "learning_rate": 9.056233305916151e-07, "loss": 32.7227, "step": 5859 }, { "epoch": 0.9403458097645124, "grad_norm": 12.453709602355957, "learning_rate": 9.007755850174104e-07, "loss": 32.6562, "step": 5860 }, { "epoch": 0.9405062783327316, "grad_norm": 12.203969955444336, "learning_rate": 8.9594073118906e-07, "loss": 32.7812, "step": 5861 }, { "epoch": 0.9406667469009508, "grad_norm": 12.011332511901855, "learning_rate": 8.91118770376037e-07, "loss": 32.6172, "step": 5862 }, { "epoch": 0.94082721546917, "grad_norm": 11.943930625915527, "learning_rate": 8.863097038444179e-07, "loss": 32.6797, "step": 5863 }, { "epoch": 0.9409876840373892, "grad_norm": 12.00964069366455, "learning_rate": 8.815135328569035e-07, "loss": 32.7969, "step": 5864 }, { "epoch": 0.9411481526056084, "grad_norm": 12.210097312927246, "learning_rate": 8.767302586727866e-07, "loss": 32.7422, "step": 5865 }, { "epoch": 0.9413086211738276, "grad_norm": 12.206707954406738, "learning_rate": 8.719598825479957e-07, "loss": 32.6797, "step": 5866 }, { "epoch": 0.9414690897420468, "grad_norm": 12.38769245147705, "learning_rate": 8.672024057350736e-07, "loss": 32.6953, "step": 5867 }, { "epoch": 0.941629558310266, "grad_norm": 12.637354850769043, "learning_rate": 8.624578294831598e-07, "loss": 32.5547, "step": 5868 }, { "epoch": 0.9417900268784852, "grad_norm": 13.066423416137695, "learning_rate": 8.577261550380133e-07, "loss": 32.5547, "step": 5869 }, { "epoch": 0.9419504954467044, "grad_norm": 12.262173652648926, "learning_rate": 8.530073836420072e-07, "loss": 32.6328, "step": 5870 }, { "epoch": 0.9421109640149236, "grad_norm": 12.210034370422363, "learning_rate": 8.483015165341335e-07, "loss": 32.6562, "step": 5871 }, { "epoch": 0.9422714325831428, "grad_norm": 12.142603874206543, "learning_rate": 8.436085549499817e-07, "loss": 32.6641, "step": 5872 }, { "epoch": 0.942431901151362, "grad_norm": 12.351738929748535, "learning_rate": 8.389285001217606e-07, "loss": 32.6719, "step": 5873 }, { "epoch": 0.9425923697195812, "grad_norm": 12.20052433013916, "learning_rate": 8.342613532782928e-07, "loss": 32.7812, "step": 5874 }, { "epoch": 0.9427528382878004, "grad_norm": 12.651515007019043, "learning_rate": 8.296071156450092e-07, "loss": 32.6094, "step": 5875 }, { "epoch": 0.9429133068560196, "grad_norm": 12.268145561218262, "learning_rate": 8.249657884439432e-07, "loss": 32.6953, "step": 5876 }, { "epoch": 0.9430737754242388, "grad_norm": 12.398040771484375, "learning_rate": 8.203373728937535e-07, "loss": 32.5625, "step": 5877 }, { "epoch": 0.943234243992458, "grad_norm": 12.004472732543945, "learning_rate": 8.157218702096958e-07, "loss": 32.625, "step": 5878 }, { "epoch": 0.9433947125606772, "grad_norm": 12.140478134155273, "learning_rate": 8.11119281603645e-07, "loss": 32.5664, "step": 5879 }, { "epoch": 0.9435551811288964, "grad_norm": 12.131814956665039, "learning_rate": 8.065296082840789e-07, "loss": 32.6641, "step": 5880 }, { "epoch": 0.9437156496971156, "grad_norm": 11.944648742675781, "learning_rate": 8.019528514560837e-07, "loss": 32.6875, "step": 5881 }, { "epoch": 0.9438761182653348, "grad_norm": 12.140066146850586, "learning_rate": 7.973890123213589e-07, "loss": 32.6797, "step": 5882 }, { "epoch": 0.944036586833554, "grad_norm": 12.199268341064453, "learning_rate": 7.928380920782075e-07, "loss": 32.6797, "step": 5883 }, { "epoch": 0.9441970554017732, "grad_norm": 12.004286766052246, "learning_rate": 7.883000919215399e-07, "loss": 32.6211, "step": 5884 }, { "epoch": 0.9443575239699924, "grad_norm": 12.208967208862305, "learning_rate": 7.837750130428867e-07, "loss": 32.7109, "step": 5885 }, { "epoch": 0.9445179925382116, "grad_norm": 12.133419036865234, "learning_rate": 7.792628566303639e-07, "loss": 32.6406, "step": 5886 }, { "epoch": 0.9446784611064307, "grad_norm": 12.574222564697266, "learning_rate": 7.747636238687184e-07, "loss": 32.5625, "step": 5887 }, { "epoch": 0.94483892967465, "grad_norm": 12.144013404846191, "learning_rate": 7.702773159392773e-07, "loss": 32.6094, "step": 5888 }, { "epoch": 0.9449993982428692, "grad_norm": 12.071012496948242, "learning_rate": 7.658039340200041e-07, "loss": 32.5938, "step": 5889 }, { "epoch": 0.9451598668110883, "grad_norm": 12.070427894592285, "learning_rate": 7.61343479285448e-07, "loss": 32.6406, "step": 5890 }, { "epoch": 0.9453203353793076, "grad_norm": 12.069098472595215, "learning_rate": 7.56895952906761e-07, "loss": 32.6172, "step": 5891 }, { "epoch": 0.9454808039475268, "grad_norm": 12.392767906188965, "learning_rate": 7.524613560517146e-07, "loss": 32.625, "step": 5892 }, { "epoch": 0.9456412725157459, "grad_norm": 11.949217796325684, "learning_rate": 7.480396898846831e-07, "loss": 32.6719, "step": 5893 }, { "epoch": 0.9458017410839652, "grad_norm": 12.394497871398926, "learning_rate": 7.436309555666376e-07, "loss": 32.6328, "step": 5894 }, { "epoch": 0.9459622096521844, "grad_norm": 12.147235870361328, "learning_rate": 7.392351542551635e-07, "loss": 32.6562, "step": 5895 }, { "epoch": 0.9461226782204035, "grad_norm": 12.277012825012207, "learning_rate": 7.348522871044372e-07, "loss": 32.6641, "step": 5896 }, { "epoch": 0.9462831467886228, "grad_norm": 12.07687759399414, "learning_rate": 7.304823552652495e-07, "loss": 32.6172, "step": 5897 }, { "epoch": 0.946443615356842, "grad_norm": 12.168679237365723, "learning_rate": 7.261253598849938e-07, "loss": 33.0, "step": 5898 }, { "epoch": 0.9466040839250611, "grad_norm": 12.66720962524414, "learning_rate": 7.217813021076658e-07, "loss": 32.6094, "step": 5899 }, { "epoch": 0.9467645524932804, "grad_norm": 12.275845527648926, "learning_rate": 7.174501830738645e-07, "loss": 32.8203, "step": 5900 }, { "epoch": 0.9469250210614996, "grad_norm": 12.602964401245117, "learning_rate": 7.131320039207856e-07, "loss": 32.8203, "step": 5901 }, { "epoch": 0.9470854896297187, "grad_norm": 12.397345542907715, "learning_rate": 7.088267657822389e-07, "loss": 32.8438, "step": 5902 }, { "epoch": 0.947245958197938, "grad_norm": 12.081352233886719, "learning_rate": 7.045344697886258e-07, "loss": 32.8281, "step": 5903 }, { "epoch": 0.9474064267661572, "grad_norm": 12.605810165405273, "learning_rate": 7.002551170669558e-07, "loss": 32.7422, "step": 5904 }, { "epoch": 0.9475668953343763, "grad_norm": 12.13602066040039, "learning_rate": 6.959887087408357e-07, "loss": 32.7578, "step": 5905 }, { "epoch": 0.9477273639025956, "grad_norm": 12.269021034240723, "learning_rate": 6.917352459304749e-07, "loss": 32.75, "step": 5906 }, { "epoch": 0.9478878324708148, "grad_norm": 12.533232688903809, "learning_rate": 6.874947297526913e-07, "loss": 32.7266, "step": 5907 }, { "epoch": 0.9480483010390339, "grad_norm": 12.330863952636719, "learning_rate": 6.832671613208886e-07, "loss": 32.6562, "step": 5908 }, { "epoch": 0.9482087696072532, "grad_norm": 12.334383010864258, "learning_rate": 6.790525417450844e-07, "loss": 32.6562, "step": 5909 }, { "epoch": 0.9483692381754724, "grad_norm": 12.196722030639648, "learning_rate": 6.74850872131888e-07, "loss": 32.6172, "step": 5910 }, { "epoch": 0.9485297067436915, "grad_norm": 12.348869323730469, "learning_rate": 6.70662153584517e-07, "loss": 32.7266, "step": 5911 }, { "epoch": 0.9486901753119108, "grad_norm": 12.199217796325684, "learning_rate": 6.664863872027694e-07, "loss": 32.6641, "step": 5912 }, { "epoch": 0.94885064388013, "grad_norm": 12.262954711914062, "learning_rate": 6.62323574083068e-07, "loss": 32.6172, "step": 5913 }, { "epoch": 0.9490111124483491, "grad_norm": 12.33072280883789, "learning_rate": 6.58173715318422e-07, "loss": 32.6719, "step": 5914 }, { "epoch": 0.9491715810165684, "grad_norm": 12.069051742553711, "learning_rate": 6.540368119984375e-07, "loss": 32.6875, "step": 5915 }, { "epoch": 0.9493320495847876, "grad_norm": 12.011467933654785, "learning_rate": 6.499128652093233e-07, "loss": 32.6172, "step": 5916 }, { "epoch": 0.9494925181530067, "grad_norm": 12.385876655578613, "learning_rate": 6.458018760338802e-07, "loss": 32.6016, "step": 5917 }, { "epoch": 0.949652986721226, "grad_norm": 12.19454574584961, "learning_rate": 6.417038455515112e-07, "loss": 32.6797, "step": 5918 }, { "epoch": 0.9498134552894452, "grad_norm": 12.268463134765625, "learning_rate": 6.376187748382168e-07, "loss": 32.6719, "step": 5919 }, { "epoch": 0.9499739238576643, "grad_norm": 12.1517915725708, "learning_rate": 6.335466649665999e-07, "loss": 32.7812, "step": 5920 }, { "epoch": 0.9501343924258836, "grad_norm": 12.26169490814209, "learning_rate": 6.294875170058445e-07, "loss": 32.6797, "step": 5921 }, { "epoch": 0.9502948609941028, "grad_norm": 12.212102890014648, "learning_rate": 6.254413320217478e-07, "loss": 32.7383, "step": 5922 }, { "epoch": 0.9504553295623219, "grad_norm": 12.195829391479492, "learning_rate": 6.214081110766989e-07, "loss": 32.6484, "step": 5923 }, { "epoch": 0.9506157981305412, "grad_norm": 12.133628845214844, "learning_rate": 6.17387855229673e-07, "loss": 32.6797, "step": 5924 }, { "epoch": 0.9507762666987604, "grad_norm": 12.26051139831543, "learning_rate": 6.133805655362534e-07, "loss": 32.5938, "step": 5925 }, { "epoch": 0.9509367352669795, "grad_norm": 12.447442054748535, "learning_rate": 6.093862430486208e-07, "loss": 32.6016, "step": 5926 }, { "epoch": 0.9510972038351988, "grad_norm": 12.200752258300781, "learning_rate": 6.054048888155416e-07, "loss": 32.6406, "step": 5927 }, { "epoch": 0.951257672403418, "grad_norm": 12.00772762298584, "learning_rate": 6.014365038823744e-07, "loss": 32.6875, "step": 5928 }, { "epoch": 0.9514181409716371, "grad_norm": 12.798343658447266, "learning_rate": 5.974810892910854e-07, "loss": 32.6094, "step": 5929 }, { "epoch": 0.9515786095398564, "grad_norm": 12.079446792602539, "learning_rate": 5.935386460802328e-07, "loss": 32.6406, "step": 5930 }, { "epoch": 0.9517390781080756, "grad_norm": 12.196473121643066, "learning_rate": 5.896091752849498e-07, "loss": 32.5859, "step": 5931 }, { "epoch": 0.9518995466762947, "grad_norm": 12.266804695129395, "learning_rate": 5.856926779369943e-07, "loss": 32.6328, "step": 5932 }, { "epoch": 0.952060015244514, "grad_norm": 12.59282398223877, "learning_rate": 5.817891550646937e-07, "loss": 32.625, "step": 5933 }, { "epoch": 0.9522204838127332, "grad_norm": 12.891364097595215, "learning_rate": 5.778986076929782e-07, "loss": 32.5156, "step": 5934 }, { "epoch": 0.9523809523809523, "grad_norm": 12.328367233276367, "learning_rate": 5.740210368433696e-07, "loss": 32.5469, "step": 5935 }, { "epoch": 0.9525414209491716, "grad_norm": 12.068310737609863, "learning_rate": 5.701564435339868e-07, "loss": 32.6641, "step": 5936 }, { "epoch": 0.9527018895173908, "grad_norm": 12.322671890258789, "learning_rate": 5.663048287795347e-07, "loss": 32.5781, "step": 5937 }, { "epoch": 0.9528623580856099, "grad_norm": 12.390118598937988, "learning_rate": 5.6246619359131e-07, "loss": 32.6328, "step": 5938 }, { "epoch": 0.9530228266538292, "grad_norm": 11.94189167022705, "learning_rate": 5.586405389772065e-07, "loss": 32.6094, "step": 5939 }, { "epoch": 0.9531832952220484, "grad_norm": 12.209131240844727, "learning_rate": 5.54827865941715e-07, "loss": 32.7266, "step": 5940 }, { "epoch": 0.9533437637902675, "grad_norm": 12.678705215454102, "learning_rate": 5.510281754859014e-07, "loss": 32.5938, "step": 5941 }, { "epoch": 0.9535042323584868, "grad_norm": 12.020135879516602, "learning_rate": 5.472414686074345e-07, "loss": 32.7812, "step": 5942 }, { "epoch": 0.953664700926706, "grad_norm": 12.202872276306152, "learning_rate": 5.434677463005744e-07, "loss": 32.6055, "step": 5943 }, { "epoch": 0.9538251694949251, "grad_norm": 12.265000343322754, "learning_rate": 5.397070095561673e-07, "loss": 32.5625, "step": 5944 }, { "epoch": 0.9539856380631444, "grad_norm": 12.01246166229248, "learning_rate": 5.359592593616459e-07, "loss": 32.7812, "step": 5945 }, { "epoch": 0.9541461066313636, "grad_norm": 12.339418411254883, "learning_rate": 5.322244967010503e-07, "loss": 32.6094, "step": 5946 }, { "epoch": 0.9543065751995827, "grad_norm": 12.458568572998047, "learning_rate": 5.28502722554991e-07, "loss": 32.6406, "step": 5947 }, { "epoch": 0.954467043767802, "grad_norm": 11.953486442565918, "learning_rate": 5.247939379006806e-07, "loss": 32.7891, "step": 5948 }, { "epoch": 0.9546275123360212, "grad_norm": 12.345520973205566, "learning_rate": 5.210981437119123e-07, "loss": 32.8516, "step": 5949 }, { "epoch": 0.9547879809042403, "grad_norm": 12.165484428405762, "learning_rate": 5.174153409590765e-07, "loss": 32.9297, "step": 5950 }, { "epoch": 0.9549484494724596, "grad_norm": 12.158950805664062, "learning_rate": 5.137455306091443e-07, "loss": 32.7969, "step": 5951 }, { "epoch": 0.9551089180406788, "grad_norm": 12.400060653686523, "learning_rate": 5.100887136256838e-07, "loss": 32.7969, "step": 5952 }, { "epoch": 0.9552693866088979, "grad_norm": 12.147334098815918, "learning_rate": 5.06444890968849e-07, "loss": 32.8359, "step": 5953 }, { "epoch": 0.9554298551771172, "grad_norm": 12.07523250579834, "learning_rate": 5.028140635953804e-07, "loss": 32.7266, "step": 5954 }, { "epoch": 0.9555903237453364, "grad_norm": 12.272650718688965, "learning_rate": 4.991962324586042e-07, "loss": 32.6797, "step": 5955 }, { "epoch": 0.9557507923135555, "grad_norm": 12.33088207244873, "learning_rate": 4.955913985084382e-07, "loss": 32.6602, "step": 5956 }, { "epoch": 0.9559112608817748, "grad_norm": 12.078944206237793, "learning_rate": 4.919995626913865e-07, "loss": 32.7812, "step": 5957 }, { "epoch": 0.956071729449994, "grad_norm": 12.141288757324219, "learning_rate": 4.884207259505392e-07, "loss": 32.6641, "step": 5958 }, { "epoch": 0.9562321980182131, "grad_norm": 12.144286155700684, "learning_rate": 4.848548892255722e-07, "loss": 32.7266, "step": 5959 }, { "epoch": 0.9563926665864324, "grad_norm": 12.266960144042969, "learning_rate": 4.813020534527535e-07, "loss": 32.7031, "step": 5960 }, { "epoch": 0.9565531351546516, "grad_norm": 12.263856887817383, "learning_rate": 4.777622195649367e-07, "loss": 32.7344, "step": 5961 }, { "epoch": 0.9567136037228707, "grad_norm": 12.068790435791016, "learning_rate": 4.742353884915507e-07, "loss": 32.6484, "step": 5962 }, { "epoch": 0.95687407229109, "grad_norm": 12.204641342163086, "learning_rate": 4.7072156115862684e-07, "loss": 32.7188, "step": 5963 }, { "epoch": 0.9570345408593092, "grad_norm": 12.195996284484863, "learning_rate": 4.672207384887661e-07, "loss": 32.6406, "step": 5964 }, { "epoch": 0.9571950094275283, "grad_norm": 12.130071640014648, "learning_rate": 4.6373292140116655e-07, "loss": 32.6016, "step": 5965 }, { "epoch": 0.9573554779957476, "grad_norm": 12.071198463439941, "learning_rate": 4.6025811081160663e-07, "loss": 32.7109, "step": 5966 }, { "epoch": 0.9575159465639668, "grad_norm": 12.343276977539062, "learning_rate": 4.5679630763245664e-07, "loss": 32.7266, "step": 5967 }, { "epoch": 0.9576764151321859, "grad_norm": 12.20062255859375, "learning_rate": 4.5334751277265053e-07, "loss": 32.6719, "step": 5968 }, { "epoch": 0.9578368837004052, "grad_norm": 12.131720542907715, "learning_rate": 4.4991172713773624e-07, "loss": 32.6094, "step": 5969 }, { "epoch": 0.9579973522686244, "grad_norm": 12.075218200683594, "learning_rate": 4.4648895162983096e-07, "loss": 32.6719, "step": 5970 }, { "epoch": 0.9581578208368435, "grad_norm": 12.007874488830566, "learning_rate": 4.430791871476214e-07, "loss": 32.6953, "step": 5971 }, { "epoch": 0.9583182894050628, "grad_norm": 12.273018836975098, "learning_rate": 4.3968243458640813e-07, "loss": 32.6562, "step": 5972 }, { "epoch": 0.958478757973282, "grad_norm": 12.195173263549805, "learning_rate": 4.362986948380554e-07, "loss": 32.5859, "step": 5973 }, { "epoch": 0.9586392265415011, "grad_norm": 12.010275840759277, "learning_rate": 4.329279687910137e-07, "loss": 32.6641, "step": 5974 }, { "epoch": 0.9587996951097204, "grad_norm": 12.065829277038574, "learning_rate": 4.2957025733031954e-07, "loss": 32.6094, "step": 5975 }, { "epoch": 0.9589601636779396, "grad_norm": 12.194184303283691, "learning_rate": 4.262255613375898e-07, "loss": 32.6172, "step": 5976 }, { "epoch": 0.9591206322461587, "grad_norm": 12.07168197631836, "learning_rate": 4.2289388169102774e-07, "loss": 32.6953, "step": 5977 }, { "epoch": 0.959281100814378, "grad_norm": 12.265273094177246, "learning_rate": 4.1957521926541674e-07, "loss": 32.6719, "step": 5978 }, { "epoch": 0.9594415693825972, "grad_norm": 11.956586837768555, "learning_rate": 4.1626957493212106e-07, "loss": 32.7109, "step": 5979 }, { "epoch": 0.9596020379508163, "grad_norm": 12.005005836486816, "learning_rate": 4.129769495590852e-07, "loss": 32.6562, "step": 5980 }, { "epoch": 0.9597625065190356, "grad_norm": 12.134175300598145, "learning_rate": 4.0969734401083426e-07, "loss": 32.5859, "step": 5981 }, { "epoch": 0.9599229750872548, "grad_norm": 12.20453929901123, "learning_rate": 4.064307591484906e-07, "loss": 32.7227, "step": 5982 }, { "epoch": 0.9600834436554739, "grad_norm": 12.392778396606445, "learning_rate": 4.0317719582973476e-07, "loss": 32.5938, "step": 5983 }, { "epoch": 0.9602439122236932, "grad_norm": 12.463165283203125, "learning_rate": 3.999366549088446e-07, "loss": 32.6875, "step": 5984 }, { "epoch": 0.9604043807919124, "grad_norm": 12.26997184753418, "learning_rate": 3.967091372366727e-07, "loss": 32.5703, "step": 5985 }, { "epoch": 0.9605648493601315, "grad_norm": 12.387866020202637, "learning_rate": 3.934946436606468e-07, "loss": 32.5781, "step": 5986 }, { "epoch": 0.9607253179283508, "grad_norm": 12.005791664123535, "learning_rate": 3.9029317502478626e-07, "loss": 32.6094, "step": 5987 }, { "epoch": 0.96088578649657, "grad_norm": 12.332795143127441, "learning_rate": 3.871047321696908e-07, "loss": 32.5469, "step": 5988 }, { "epoch": 0.9610462550647891, "grad_norm": 12.138230323791504, "learning_rate": 3.839293159325241e-07, "loss": 32.6016, "step": 5989 }, { "epoch": 0.9612067236330084, "grad_norm": 12.06948471069336, "learning_rate": 3.807669271470471e-07, "loss": 32.6719, "step": 5990 }, { "epoch": 0.9613671922012276, "grad_norm": 12.070877075195312, "learning_rate": 3.7761756664358995e-07, "loss": 32.6641, "step": 5991 }, { "epoch": 0.9615276607694467, "grad_norm": 12.020557403564453, "learning_rate": 3.744812352490579e-07, "loss": 32.8125, "step": 5992 }, { "epoch": 0.961688129337666, "grad_norm": 11.880146026611328, "learning_rate": 3.713579337869588e-07, "loss": 32.6797, "step": 5993 }, { "epoch": 0.9618485979058852, "grad_norm": 12.325376510620117, "learning_rate": 3.6824766307734795e-07, "loss": 32.6094, "step": 5994 }, { "epoch": 0.9620090664741043, "grad_norm": 12.401365280151367, "learning_rate": 3.6515042393688304e-07, "loss": 32.7109, "step": 5995 }, { "epoch": 0.9621695350423236, "grad_norm": 12.133733749389648, "learning_rate": 3.6206621717878584e-07, "loss": 32.5781, "step": 5996 }, { "epoch": 0.9623300036105428, "grad_norm": 12.398680686950684, "learning_rate": 3.5899504361286417e-07, "loss": 32.6875, "step": 5997 }, { "epoch": 0.9624904721787619, "grad_norm": 12.110053062438965, "learning_rate": 3.559369040454952e-07, "loss": 32.8906, "step": 5998 }, { "epoch": 0.9626509407469812, "grad_norm": 12.354613304138184, "learning_rate": 3.528917992796532e-07, "loss": 32.875, "step": 5999 }, { "epoch": 0.9628114093152004, "grad_norm": 12.038311958312988, "learning_rate": 3.4985973011485985e-07, "loss": 33.0938, "step": 6000 }, { "epoch": 0.9629718778834195, "grad_norm": 12.290118217468262, "learning_rate": 3.468406973472449e-07, "loss": 32.8906, "step": 6001 }, { "epoch": 0.9631323464516388, "grad_norm": 12.17325496673584, "learning_rate": 3.4383470176950205e-07, "loss": 32.9531, "step": 6002 }, { "epoch": 0.963292815019858, "grad_norm": 12.524568557739258, "learning_rate": 3.408417441708889e-07, "loss": 32.6641, "step": 6003 }, { "epoch": 0.9634532835880771, "grad_norm": 12.323915481567383, "learning_rate": 3.378618253372545e-07, "loss": 32.7109, "step": 6004 }, { "epoch": 0.9636137521562964, "grad_norm": 11.965531349182129, "learning_rate": 3.3489494605102865e-07, "loss": 32.8438, "step": 6005 }, { "epoch": 0.9637742207245156, "grad_norm": 12.263541221618652, "learning_rate": 3.3194110709120484e-07, "loss": 32.7344, "step": 6006 }, { "epoch": 0.9639346892927347, "grad_norm": 12.134095191955566, "learning_rate": 3.2900030923336266e-07, "loss": 32.7266, "step": 6007 }, { "epoch": 0.964095157860954, "grad_norm": 12.138884544372559, "learning_rate": 3.2607255324965116e-07, "loss": 32.6719, "step": 6008 }, { "epoch": 0.9642556264291732, "grad_norm": 11.953065872192383, "learning_rate": 3.2315783990879423e-07, "loss": 32.7656, "step": 6009 }, { "epoch": 0.9644160949973923, "grad_norm": 12.069402694702148, "learning_rate": 3.20256169976102e-07, "loss": 32.6719, "step": 6010 }, { "epoch": 0.9645765635656116, "grad_norm": 12.074536323547363, "learning_rate": 3.1736754421344275e-07, "loss": 32.6797, "step": 6011 }, { "epoch": 0.9647370321338308, "grad_norm": 11.943315505981445, "learning_rate": 3.144919633792709e-07, "loss": 32.6875, "step": 6012 }, { "epoch": 0.9648975007020499, "grad_norm": 12.070876121520996, "learning_rate": 3.1162942822861587e-07, "loss": 32.7266, "step": 6013 }, { "epoch": 0.9650579692702692, "grad_norm": 12.142959594726562, "learning_rate": 3.0877993951308193e-07, "loss": 32.7578, "step": 6014 }, { "epoch": 0.9652184378384884, "grad_norm": 12.08270263671875, "learning_rate": 3.0594349798084285e-07, "loss": 32.7109, "step": 6015 }, { "epoch": 0.9653789064067075, "grad_norm": 12.004218101501465, "learning_rate": 3.0312010437664737e-07, "loss": 32.6328, "step": 6016 }, { "epoch": 0.9655393749749268, "grad_norm": 12.074748039245605, "learning_rate": 3.003097594418247e-07, "loss": 32.6953, "step": 6017 }, { "epoch": 0.965699843543146, "grad_norm": 12.077108383178711, "learning_rate": 2.975124639142679e-07, "loss": 32.75, "step": 6018 }, { "epoch": 0.9658603121113651, "grad_norm": 12.334678649902344, "learning_rate": 2.947282185284561e-07, "loss": 32.6172, "step": 6019 }, { "epoch": 0.9660207806795844, "grad_norm": 12.137611389160156, "learning_rate": 2.919570240154268e-07, "loss": 32.7031, "step": 6020 }, { "epoch": 0.9661812492478036, "grad_norm": 12.009403228759766, "learning_rate": 2.8919888110280903e-07, "loss": 32.6562, "step": 6021 }, { "epoch": 0.9663417178160227, "grad_norm": 12.133614540100098, "learning_rate": 2.8645379051479016e-07, "loss": 32.6797, "step": 6022 }, { "epoch": 0.966502186384242, "grad_norm": 12.140872955322266, "learning_rate": 2.8372175297213257e-07, "loss": 32.6797, "step": 6023 }, { "epoch": 0.9666626549524612, "grad_norm": 12.19991683959961, "learning_rate": 2.8100276919217904e-07, "loss": 32.6719, "step": 6024 }, { "epoch": 0.9668231235206803, "grad_norm": 12.323201179504395, "learning_rate": 2.7829683988884193e-07, "loss": 32.5781, "step": 6025 }, { "epoch": 0.9669835920888996, "grad_norm": 12.13323974609375, "learning_rate": 2.756039657725917e-07, "loss": 32.6172, "step": 6026 }, { "epoch": 0.9671440606571188, "grad_norm": 12.397845268249512, "learning_rate": 2.7292414755049623e-07, "loss": 32.6406, "step": 6027 }, { "epoch": 0.9673045292253379, "grad_norm": 11.944228172302246, "learning_rate": 2.7025738592617587e-07, "loss": 32.7188, "step": 6028 }, { "epoch": 0.9674649977935572, "grad_norm": 12.135015487670898, "learning_rate": 2.6760368159983173e-07, "loss": 32.625, "step": 6029 }, { "epoch": 0.9676254663617764, "grad_norm": 12.00781536102295, "learning_rate": 2.6496303526823417e-07, "loss": 32.6875, "step": 6030 }, { "epoch": 0.9677859349299955, "grad_norm": 12.071649551391602, "learning_rate": 2.6233544762472307e-07, "loss": 32.625, "step": 6031 }, { "epoch": 0.9679464034982148, "grad_norm": 12.390023231506348, "learning_rate": 2.5972091935921317e-07, "loss": 32.6016, "step": 6032 }, { "epoch": 0.968106872066434, "grad_norm": 12.256755828857422, "learning_rate": 2.571194511581831e-07, "loss": 32.6875, "step": 6033 }, { "epoch": 0.9682673406346531, "grad_norm": 12.069930076599121, "learning_rate": 2.545310437046977e-07, "loss": 32.6641, "step": 6034 }, { "epoch": 0.9684278092028724, "grad_norm": 12.260436058044434, "learning_rate": 2.519556976783799e-07, "loss": 32.5781, "step": 6035 }, { "epoch": 0.9685882777710916, "grad_norm": 12.20202350616455, "learning_rate": 2.4939341375542213e-07, "loss": 32.6797, "step": 6036 }, { "epoch": 0.9687487463393107, "grad_norm": 12.20960807800293, "learning_rate": 2.468441926085918e-07, "loss": 32.6641, "step": 6037 }, { "epoch": 0.96890921490753, "grad_norm": 12.509750366210938, "learning_rate": 2.4430803490722577e-07, "loss": 32.5469, "step": 6038 }, { "epoch": 0.9690696834757492, "grad_norm": 12.204446792602539, "learning_rate": 2.417849413172246e-07, "loss": 32.6641, "step": 6039 }, { "epoch": 0.9692301520439683, "grad_norm": 12.260791778564453, "learning_rate": 2.3927491250107513e-07, "loss": 32.5781, "step": 6040 }, { "epoch": 0.9693906206121876, "grad_norm": 12.141207695007324, "learning_rate": 2.3677794911782237e-07, "loss": 32.7109, "step": 6041 }, { "epoch": 0.9695510891804068, "grad_norm": 12.015997886657715, "learning_rate": 2.3429405182308073e-07, "loss": 32.6797, "step": 6042 }, { "epoch": 0.9697115577486259, "grad_norm": 12.268774032592773, "learning_rate": 2.3182322126902855e-07, "loss": 32.6016, "step": 6043 }, { "epoch": 0.9698720263168452, "grad_norm": 12.640323638916016, "learning_rate": 2.2936545810442467e-07, "loss": 32.5625, "step": 6044 }, { "epoch": 0.9700324948850644, "grad_norm": 11.947772979736328, "learning_rate": 2.269207629745973e-07, "loss": 32.7422, "step": 6045 }, { "epoch": 0.9701929634532835, "grad_norm": 12.401349067687988, "learning_rate": 2.2448913652142746e-07, "loss": 32.7578, "step": 6046 }, { "epoch": 0.9703534320215028, "grad_norm": 12.136629104614258, "learning_rate": 2.2207057938338216e-07, "loss": 32.6094, "step": 6047 }, { "epoch": 0.970513900589722, "grad_norm": 12.527872085571289, "learning_rate": 2.196650921954868e-07, "loss": 32.6406, "step": 6048 }, { "epoch": 0.9706743691579411, "grad_norm": 12.198957443237305, "learning_rate": 2.1727267558934726e-07, "loss": 32.7188, "step": 6049 }, { "epoch": 0.9708348377261604, "grad_norm": 12.164648056030273, "learning_rate": 2.1489333019311663e-07, "loss": 32.8828, "step": 6050 }, { "epoch": 0.9709953062943796, "grad_norm": 12.229124069213867, "learning_rate": 2.125270566315396e-07, "loss": 32.9609, "step": 6051 }, { "epoch": 0.9711557748625987, "grad_norm": 12.277730941772461, "learning_rate": 2.1017385552590808e-07, "loss": 32.7578, "step": 6052 }, { "epoch": 0.971316243430818, "grad_norm": 12.095230102539062, "learning_rate": 2.0783372749409446e-07, "loss": 32.8516, "step": 6053 }, { "epoch": 0.9714767119990372, "grad_norm": 12.41497802734375, "learning_rate": 2.0550667315053506e-07, "loss": 32.7969, "step": 6054 }, { "epoch": 0.9716371805672563, "grad_norm": 12.268839836120605, "learning_rate": 2.0319269310623e-07, "loss": 32.7344, "step": 6055 }, { "epoch": 0.9717976491354756, "grad_norm": 12.343233108520508, "learning_rate": 2.008917879687544e-07, "loss": 32.7891, "step": 6056 }, { "epoch": 0.9719581177036948, "grad_norm": 12.266586303710938, "learning_rate": 1.9860395834224165e-07, "loss": 32.7891, "step": 6057 }, { "epoch": 0.9721185862719139, "grad_norm": 12.131184577941895, "learning_rate": 1.963292048274057e-07, "loss": 32.6641, "step": 6058 }, { "epoch": 0.9722790548401332, "grad_norm": 12.003844261169434, "learning_rate": 1.9406752802150209e-07, "loss": 32.6797, "step": 6059 }, { "epoch": 0.9724395234083524, "grad_norm": 12.011630058288574, "learning_rate": 1.9181892851838356e-07, "loss": 32.7188, "step": 6060 }, { "epoch": 0.9725999919765715, "grad_norm": 11.881682395935059, "learning_rate": 1.89583406908439e-07, "loss": 32.7188, "step": 6061 }, { "epoch": 0.9727604605447908, "grad_norm": 12.195618629455566, "learning_rate": 1.8736096377865442e-07, "loss": 32.7266, "step": 6062 }, { "epoch": 0.97292092911301, "grad_norm": 11.883008003234863, "learning_rate": 1.8515159971255193e-07, "loss": 32.7266, "step": 6063 }, { "epoch": 0.9730813976812291, "grad_norm": 12.077873229980469, "learning_rate": 1.8295531529024523e-07, "loss": 32.7109, "step": 6064 }, { "epoch": 0.9732418662494484, "grad_norm": 12.072311401367188, "learning_rate": 1.8077211108838977e-07, "loss": 32.7266, "step": 6065 }, { "epoch": 0.9734023348176676, "grad_norm": 12.207962036132812, "learning_rate": 1.786019876802325e-07, "loss": 32.5859, "step": 6066 }, { "epoch": 0.9735628033858867, "grad_norm": 12.38875961303711, "learning_rate": 1.764449456355566e-07, "loss": 32.7344, "step": 6067 }, { "epoch": 0.973723271954106, "grad_norm": 12.52641487121582, "learning_rate": 1.7430098552074224e-07, "loss": 32.5703, "step": 6068 }, { "epoch": 0.9738837405223252, "grad_norm": 11.885881423950195, "learning_rate": 1.7217010789871146e-07, "loss": 32.7422, "step": 6069 }, { "epoch": 0.9740442090905443, "grad_norm": 12.339937210083008, "learning_rate": 1.7005231332895556e-07, "loss": 32.6719, "step": 6070 }, { "epoch": 0.9742046776587636, "grad_norm": 12.13304328918457, "learning_rate": 1.6794760236753527e-07, "loss": 32.7266, "step": 6071 }, { "epoch": 0.9743651462269828, "grad_norm": 12.260263442993164, "learning_rate": 1.6585597556708076e-07, "loss": 32.6328, "step": 6072 }, { "epoch": 0.9745256147952019, "grad_norm": 12.265755653381348, "learning_rate": 1.63777433476775e-07, "loss": 32.6641, "step": 6073 }, { "epoch": 0.9746860833634212, "grad_norm": 12.200181007385254, "learning_rate": 1.617119766423647e-07, "loss": 32.7344, "step": 6074 }, { "epoch": 0.9748465519316404, "grad_norm": 11.943659782409668, "learning_rate": 1.596596056061772e-07, "loss": 32.7031, "step": 6075 }, { "epoch": 0.9750070204998595, "grad_norm": 12.265103340148926, "learning_rate": 1.57620320907087e-07, "loss": 32.7109, "step": 6076 }, { "epoch": 0.9751674890680788, "grad_norm": 12.081156730651855, "learning_rate": 1.555941230805491e-07, "loss": 32.7188, "step": 6077 }, { "epoch": 0.975327957636298, "grad_norm": 12.254765510559082, "learning_rate": 1.535810126585546e-07, "loss": 32.5781, "step": 6078 }, { "epoch": 0.9754884262045171, "grad_norm": 12.069008827209473, "learning_rate": 1.5158099016969186e-07, "loss": 32.6406, "step": 6079 }, { "epoch": 0.9756488947727364, "grad_norm": 12.132584571838379, "learning_rate": 1.4959405613909073e-07, "loss": 32.5938, "step": 6080 }, { "epoch": 0.9758093633409556, "grad_norm": 12.011120796203613, "learning_rate": 1.4762021108845615e-07, "loss": 32.6484, "step": 6081 }, { "epoch": 0.9759698319091747, "grad_norm": 12.133279800415039, "learning_rate": 1.4565945553604576e-07, "loss": 32.6094, "step": 6082 }, { "epoch": 0.976130300477394, "grad_norm": 12.005486488342285, "learning_rate": 1.437117899966811e-07, "loss": 32.6328, "step": 6083 }, { "epoch": 0.9762907690456132, "grad_norm": 12.071885108947754, "learning_rate": 1.4177721498176423e-07, "loss": 32.6484, "step": 6084 }, { "epoch": 0.9764512376138323, "grad_norm": 12.200470924377441, "learning_rate": 1.3985573099923876e-07, "loss": 32.6406, "step": 6085 }, { "epoch": 0.9766117061820516, "grad_norm": 12.41226577758789, "learning_rate": 1.3794733855361784e-07, "loss": 32.6719, "step": 6086 }, { "epoch": 0.9767721747502708, "grad_norm": 12.508879661560059, "learning_rate": 1.3605203814597844e-07, "loss": 32.5547, "step": 6087 }, { "epoch": 0.97693264331849, "grad_norm": 12.3993501663208, "learning_rate": 1.341698302739669e-07, "loss": 32.6875, "step": 6088 }, { "epoch": 0.9770931118867092, "grad_norm": 12.281229972839355, "learning_rate": 1.323007154317879e-07, "loss": 32.6094, "step": 6089 }, { "epoch": 0.9772535804549284, "grad_norm": 12.00964069366455, "learning_rate": 1.3044469411019332e-07, "loss": 32.6562, "step": 6090 }, { "epoch": 0.9774140490231475, "grad_norm": 12.70207405090332, "learning_rate": 1.2860176679651559e-07, "loss": 32.6094, "step": 6091 }, { "epoch": 0.9775745175913668, "grad_norm": 12.142716407775879, "learning_rate": 1.2677193397465092e-07, "loss": 32.6484, "step": 6092 }, { "epoch": 0.977734986159586, "grad_norm": 12.20131778717041, "learning_rate": 1.2495519612503725e-07, "loss": 32.6328, "step": 6093 }, { "epoch": 0.9778954547278051, "grad_norm": 12.145359992980957, "learning_rate": 1.23151553724693e-07, "loss": 32.6797, "step": 6094 }, { "epoch": 0.9780559232960244, "grad_norm": 12.39149284362793, "learning_rate": 1.2136100724719492e-07, "loss": 32.6484, "step": 6095 }, { "epoch": 0.9782163918642436, "grad_norm": 12.26777172088623, "learning_rate": 1.1958355716267244e-07, "loss": 32.6094, "step": 6096 }, { "epoch": 0.9783768604324627, "grad_norm": 12.07116413116455, "learning_rate": 1.1781920393782453e-07, "loss": 32.6172, "step": 6097 }, { "epoch": 0.978537329000682, "grad_norm": 12.009273529052734, "learning_rate": 1.1606794803590282e-07, "loss": 32.8125, "step": 6098 }, { "epoch": 0.9786977975689012, "grad_norm": 12.090391159057617, "learning_rate": 1.1432978991673948e-07, "loss": 32.7656, "step": 6099 }, { "epoch": 0.9788582661371203, "grad_norm": 12.418083190917969, "learning_rate": 1.1260473003670279e-07, "loss": 32.9141, "step": 6100 }, { "epoch": 0.9790187347053396, "grad_norm": 12.080338478088379, "learning_rate": 1.10892768848736e-07, "loss": 32.8672, "step": 6101 }, { "epoch": 0.9791792032735588, "grad_norm": 12.079752922058105, "learning_rate": 1.0919390680234065e-07, "loss": 32.7969, "step": 6102 }, { "epoch": 0.979339671841778, "grad_norm": 12.268104553222656, "learning_rate": 1.0750814434358214e-07, "loss": 32.7969, "step": 6103 }, { "epoch": 0.9795001404099972, "grad_norm": 12.008441925048828, "learning_rate": 1.0583548191507864e-07, "loss": 32.7344, "step": 6104 }, { "epoch": 0.9796606089782164, "grad_norm": 12.397246360778809, "learning_rate": 1.0417591995601772e-07, "loss": 32.6953, "step": 6105 }, { "epoch": 0.9798210775464355, "grad_norm": 12.330196380615234, "learning_rate": 1.0252945890213972e-07, "loss": 32.6875, "step": 6106 }, { "epoch": 0.9799815461146548, "grad_norm": 12.351333618164062, "learning_rate": 1.0089609918574328e-07, "loss": 32.8672, "step": 6107 }, { "epoch": 0.980142014682874, "grad_norm": 12.269609451293945, "learning_rate": 9.927584123569645e-08, "loss": 32.6875, "step": 6108 }, { "epoch": 0.9803024832510931, "grad_norm": 12.262701034545898, "learning_rate": 9.766868547742558e-08, "loss": 32.668, "step": 6109 }, { "epoch": 0.9804629518193124, "grad_norm": 12.145179748535156, "learning_rate": 9.60746323329098e-08, "loss": 32.8203, "step": 6110 }, { "epoch": 0.9806234203875316, "grad_norm": 12.13728141784668, "learning_rate": 9.44936822206921e-08, "loss": 32.6797, "step": 6111 }, { "epoch": 0.9807838889557507, "grad_norm": 12.327314376831055, "learning_rate": 9.29258355558793e-08, "loss": 32.6562, "step": 6112 }, { "epoch": 0.98094435752397, "grad_norm": 12.016058921813965, "learning_rate": 9.137109275012545e-08, "loss": 32.7109, "step": 6113 }, { "epoch": 0.9811048260921892, "grad_norm": 12.20680046081543, "learning_rate": 8.982945421165401e-08, "loss": 32.6719, "step": 6114 }, { "epoch": 0.9812652946604083, "grad_norm": 12.078361511230469, "learning_rate": 8.830092034525228e-08, "loss": 32.7031, "step": 6115 }, { "epoch": 0.9814257632286276, "grad_norm": 12.20052433013916, "learning_rate": 8.678549155224924e-08, "loss": 32.6641, "step": 6116 }, { "epoch": 0.9815862317968468, "grad_norm": 12.267253875732422, "learning_rate": 8.528316823055437e-08, "loss": 32.6953, "step": 6117 }, { "epoch": 0.981746700365066, "grad_norm": 12.324004173278809, "learning_rate": 8.379395077461882e-08, "loss": 32.625, "step": 6118 }, { "epoch": 0.9819071689332852, "grad_norm": 12.138651847839355, "learning_rate": 8.231783957546869e-08, "loss": 32.7109, "step": 6119 }, { "epoch": 0.9820676375015044, "grad_norm": 12.144237518310547, "learning_rate": 8.08548350206606e-08, "loss": 32.7188, "step": 6120 }, { "epoch": 0.9822281060697235, "grad_norm": 12.199996948242188, "learning_rate": 7.940493749434285e-08, "loss": 32.6328, "step": 6121 }, { "epoch": 0.9823885746379428, "grad_norm": 12.51646900177002, "learning_rate": 7.796814737721092e-08, "loss": 32.5781, "step": 6122 }, { "epoch": 0.982549043206162, "grad_norm": 12.195613861083984, "learning_rate": 7.654446504650192e-08, "loss": 32.5547, "step": 6123 }, { "epoch": 0.9827095117743812, "grad_norm": 12.518488883972168, "learning_rate": 7.513389087603906e-08, "loss": 32.5703, "step": 6124 }, { "epoch": 0.9828699803426004, "grad_norm": 12.082844734191895, "learning_rate": 7.373642523618162e-08, "loss": 32.6484, "step": 6125 }, { "epoch": 0.9830304489108196, "grad_norm": 12.067102432250977, "learning_rate": 7.235206849385834e-08, "loss": 32.6797, "step": 6126 }, { "epoch": 0.9831909174790388, "grad_norm": 12.473899841308594, "learning_rate": 7.098082101255066e-08, "loss": 32.75, "step": 6127 }, { "epoch": 0.983351386047258, "grad_norm": 13.005560874938965, "learning_rate": 6.962268315230391e-08, "loss": 32.625, "step": 6128 }, { "epoch": 0.9835118546154772, "grad_norm": 12.01072883605957, "learning_rate": 6.827765526971619e-08, "loss": 32.6719, "step": 6129 }, { "epoch": 0.9836723231836964, "grad_norm": 12.333182334899902, "learning_rate": 6.694573771794388e-08, "loss": 32.6953, "step": 6130 }, { "epoch": 0.9838327917519156, "grad_norm": 12.156560897827148, "learning_rate": 6.562693084670723e-08, "loss": 32.7031, "step": 6131 }, { "epoch": 0.9839932603201348, "grad_norm": 12.075634956359863, "learning_rate": 6.432123500226817e-08, "loss": 32.6641, "step": 6132 }, { "epoch": 0.984153728888354, "grad_norm": 12.197182655334473, "learning_rate": 6.30286505274691e-08, "loss": 32.6484, "step": 6133 }, { "epoch": 0.9843141974565732, "grad_norm": 12.197007179260254, "learning_rate": 6.174917776168854e-08, "loss": 32.6016, "step": 6134 }, { "epoch": 0.9844746660247924, "grad_norm": 12.19750690460205, "learning_rate": 6.048281704087999e-08, "loss": 32.5781, "step": 6135 }, { "epoch": 0.9846351345930116, "grad_norm": 11.946809768676758, "learning_rate": 5.9229568697538597e-08, "loss": 32.6484, "step": 6136 }, { "epoch": 0.9847956031612308, "grad_norm": 12.333342552185059, "learning_rate": 5.7989433060723355e-08, "loss": 32.5938, "step": 6137 }, { "epoch": 0.98495607172945, "grad_norm": 12.479798316955566, "learning_rate": 5.6762410456057124e-08, "loss": 32.7031, "step": 6138 }, { "epoch": 0.9851165402976692, "grad_norm": 12.265161514282227, "learning_rate": 5.554850120570998e-08, "loss": 32.6406, "step": 6139 }, { "epoch": 0.9852770088658884, "grad_norm": 12.459465980529785, "learning_rate": 5.4347705628410296e-08, "loss": 32.5859, "step": 6140 }, { "epoch": 0.9854374774341076, "grad_norm": 12.073525428771973, "learning_rate": 5.316002403944476e-08, "loss": 32.6484, "step": 6141 }, { "epoch": 0.9855979460023268, "grad_norm": 12.527277946472168, "learning_rate": 5.198545675065836e-08, "loss": 32.6562, "step": 6142 }, { "epoch": 0.985758414570546, "grad_norm": 12.07540225982666, "learning_rate": 5.08240040704544e-08, "loss": 32.7188, "step": 6143 }, { "epoch": 0.9859188831387652, "grad_norm": 12.391270637512207, "learning_rate": 4.967566630378895e-08, "loss": 32.6562, "step": 6144 }, { "epoch": 0.9860793517069844, "grad_norm": 12.211688041687012, "learning_rate": 4.854044375217082e-08, "loss": 32.625, "step": 6145 }, { "epoch": 0.9862398202752036, "grad_norm": 12.207318305969238, "learning_rate": 4.7418336713678233e-08, "loss": 32.6328, "step": 6146 }, { "epoch": 0.9864002888434228, "grad_norm": 12.417784690856934, "learning_rate": 4.6309345482931086e-08, "loss": 32.7578, "step": 6147 }, { "epoch": 0.986560757411642, "grad_norm": 12.329800605773926, "learning_rate": 4.5213470351107565e-08, "loss": 32.5859, "step": 6148 }, { "epoch": 0.9867212259798612, "grad_norm": 12.08922004699707, "learning_rate": 4.413071160596083e-08, "loss": 32.8516, "step": 6149 }, { "epoch": 0.9868816945480804, "grad_norm": 12.528409004211426, "learning_rate": 4.306106953176903e-08, "loss": 33.1172, "step": 6150 }, { "epoch": 0.9870421631162996, "grad_norm": 11.971783638000488, "learning_rate": 4.2004544409396386e-08, "loss": 33.3984, "step": 6151 }, { "epoch": 0.9872026316845188, "grad_norm": 12.32100772857666, "learning_rate": 4.096113651623767e-08, "loss": 32.7188, "step": 6152 }, { "epoch": 0.987363100252738, "grad_norm": 12.204387664794922, "learning_rate": 3.9930846126268187e-08, "loss": 32.7812, "step": 6153 }, { "epoch": 0.9875235688209572, "grad_norm": 12.668750762939453, "learning_rate": 3.8913673509999304e-08, "loss": 32.8359, "step": 6154 }, { "epoch": 0.9876840373891764, "grad_norm": 12.087092399597168, "learning_rate": 3.7909618934500736e-08, "loss": 32.7969, "step": 6155 }, { "epoch": 0.9878445059573956, "grad_norm": 12.269383430480957, "learning_rate": 3.691868266341158e-08, "loss": 32.7031, "step": 6156 }, { "epoch": 0.9880049745256148, "grad_norm": 12.074990272521973, "learning_rate": 3.594086495690707e-08, "loss": 32.7188, "step": 6157 }, { "epoch": 0.988165443093834, "grad_norm": 12.136786460876465, "learning_rate": 3.497616607173737e-08, "loss": 32.7188, "step": 6158 }, { "epoch": 0.9883259116620532, "grad_norm": 12.005302429199219, "learning_rate": 3.402458626119986e-08, "loss": 32.6719, "step": 6159 }, { "epoch": 0.9884863802302724, "grad_norm": 12.138545036315918, "learning_rate": 3.3086125775133594e-08, "loss": 32.6953, "step": 6160 }, { "epoch": 0.9886468487984916, "grad_norm": 12.009520530700684, "learning_rate": 3.2160784859958104e-08, "loss": 32.6953, "step": 6161 }, { "epoch": 0.9888073173667108, "grad_norm": 12.323981285095215, "learning_rate": 3.124856375862906e-08, "loss": 32.6484, "step": 6162 }, { "epoch": 0.98896778593493, "grad_norm": 12.40037727355957, "learning_rate": 3.034946271066597e-08, "loss": 32.6797, "step": 6163 }, { "epoch": 0.9891282545031492, "grad_norm": 12.20250415802002, "learning_rate": 2.9463481952141102e-08, "loss": 32.7344, "step": 6164 }, { "epoch": 0.9892887230713684, "grad_norm": 12.328492164611816, "learning_rate": 2.859062171568505e-08, "loss": 32.6328, "step": 6165 }, { "epoch": 0.9894491916395876, "grad_norm": 12.384186744689941, "learning_rate": 2.7730882230475598e-08, "loss": 32.6094, "step": 6166 }, { "epoch": 0.9896096602078068, "grad_norm": 12.342653274536133, "learning_rate": 2.688426372225439e-08, "loss": 32.75, "step": 6167 }, { "epoch": 0.989770128776026, "grad_norm": 12.005776405334473, "learning_rate": 2.605076641331028e-08, "loss": 32.7031, "step": 6168 }, { "epoch": 0.9899305973442452, "grad_norm": 12.135232925415039, "learning_rate": 2.5230390522495983e-08, "loss": 32.6875, "step": 6169 }, { "epoch": 0.9900910659124644, "grad_norm": 12.06960678100586, "learning_rate": 2.442313626520587e-08, "loss": 32.6641, "step": 6170 }, { "epoch": 0.9902515344806836, "grad_norm": 12.27203369140625, "learning_rate": 2.362900385340372e-08, "loss": 32.6719, "step": 6171 }, { "epoch": 0.9904120030489028, "grad_norm": 12.473520278930664, "learning_rate": 2.2847993495600518e-08, "loss": 32.6172, "step": 6172 }, { "epoch": 0.990572471617122, "grad_norm": 12.06896686553955, "learning_rate": 2.2080105396854454e-08, "loss": 32.6797, "step": 6173 }, { "epoch": 0.9907329401853412, "grad_norm": 12.198149681091309, "learning_rate": 2.132533975879869e-08, "loss": 32.6094, "step": 6174 }, { "epoch": 0.9908934087535604, "grad_norm": 12.013348579406738, "learning_rate": 2.058369677959693e-08, "loss": 32.6484, "step": 6175 }, { "epoch": 0.9910538773217796, "grad_norm": 11.88232421875, "learning_rate": 1.985517665398784e-08, "loss": 32.7031, "step": 6176 }, { "epoch": 0.9912143458899988, "grad_norm": 12.136037826538086, "learning_rate": 1.9139779573246197e-08, "loss": 32.6328, "step": 6177 }, { "epoch": 0.991374814458218, "grad_norm": 12.006553649902344, "learning_rate": 1.843750572522174e-08, "loss": 32.6641, "step": 6178 }, { "epoch": 0.9915352830264372, "grad_norm": 12.272917747497559, "learning_rate": 1.774835529429475e-08, "loss": 32.6562, "step": 6179 }, { "epoch": 0.9916957515946564, "grad_norm": 12.009355545043945, "learning_rate": 1.7072328461420484e-08, "loss": 32.7266, "step": 6180 }, { "epoch": 0.9918562201628756, "grad_norm": 12.19447135925293, "learning_rate": 1.6409425404095846e-08, "loss": 32.6797, "step": 6181 }, { "epoch": 0.9920166887310948, "grad_norm": 12.410117149353027, "learning_rate": 1.5759646296381603e-08, "loss": 32.6016, "step": 6182 }, { "epoch": 0.992177157299314, "grad_norm": 11.880857467651367, "learning_rate": 1.5122991308880176e-08, "loss": 32.6719, "step": 6183 }, { "epoch": 0.9923376258675332, "grad_norm": 12.141195297241211, "learning_rate": 1.4499460608757842e-08, "loss": 32.7188, "step": 6184 }, { "epoch": 0.9924980944357524, "grad_norm": 12.583425521850586, "learning_rate": 1.3889054359739196e-08, "loss": 32.5547, "step": 6185 }, { "epoch": 0.9926585630039716, "grad_norm": 12.258193969726562, "learning_rate": 1.3291772722079376e-08, "loss": 32.5859, "step": 6186 }, { "epoch": 0.9928190315721908, "grad_norm": 12.269168853759766, "learning_rate": 1.2707615852619593e-08, "loss": 32.6875, "step": 6187 }, { "epoch": 0.99297950014041, "grad_norm": 12.07051944732666, "learning_rate": 1.2136583904731602e-08, "loss": 32.7188, "step": 6188 }, { "epoch": 0.9931399687086292, "grad_norm": 12.274292945861816, "learning_rate": 1.1578677028351025e-08, "loss": 32.6719, "step": 6189 }, { "epoch": 0.9933004372768484, "grad_norm": 12.333338737487793, "learning_rate": 1.1033895369960689e-08, "loss": 32.6016, "step": 6190 }, { "epoch": 0.9934609058450676, "grad_norm": 12.133672714233398, "learning_rate": 1.0502239072601727e-08, "loss": 32.6484, "step": 6191 }, { "epoch": 0.9936213744132868, "grad_norm": 12.469490051269531, "learning_rate": 9.983708275879134e-09, "loss": 32.5938, "step": 6192 }, { "epoch": 0.993781842981506, "grad_norm": 12.204483032226562, "learning_rate": 9.478303115922904e-09, "loss": 32.625, "step": 6193 }, { "epoch": 0.9939423115497252, "grad_norm": 12.07431697845459, "learning_rate": 8.9860237254491e-09, "loss": 32.7031, "step": 6194 }, { "epoch": 0.9941027801179444, "grad_norm": 12.546222686767578, "learning_rate": 8.506870233709884e-09, "loss": 32.6484, "step": 6195 }, { "epoch": 0.9942632486861636, "grad_norm": 12.089861869812012, "learning_rate": 8.040842766515732e-09, "loss": 32.7656, "step": 6196 }, { "epoch": 0.9944237172543828, "grad_norm": 12.202198028564453, "learning_rate": 7.58794144621877e-09, "loss": 32.6016, "step": 6197 }, { "epoch": 0.994584185822602, "grad_norm": 12.708792686462402, "learning_rate": 7.148166391751643e-09, "loss": 32.5156, "step": 6198 }, { "epoch": 0.9947446543908212, "grad_norm": 12.266938209533691, "learning_rate": 6.721517718566439e-09, "loss": 32.6641, "step": 6199 }, { "epoch": 0.9949051229590404, "grad_norm": 12.163092613220215, "learning_rate": 6.307995538701317e-09, "loss": 32.8281, "step": 6200 }, { "epoch": 0.9950655915272596, "grad_norm": 12.266057968139648, "learning_rate": 5.907599960724985e-09, "loss": 32.7812, "step": 6201 }, { "epoch": 0.9952260600954788, "grad_norm": 12.269065856933594, "learning_rate": 5.520331089770015e-09, "loss": 32.7188, "step": 6202 }, { "epoch": 0.995386528663698, "grad_norm": 12.138554573059082, "learning_rate": 5.14618902751618e-09, "loss": 32.7266, "step": 6203 }, { "epoch": 0.9955469972319172, "grad_norm": 12.149367332458496, "learning_rate": 4.785173872201565e-09, "loss": 32.7891, "step": 6204 }, { "epoch": 0.9957074658001364, "grad_norm": 12.264166831970215, "learning_rate": 4.437285718617013e-09, "loss": 32.6719, "step": 6205 }, { "epoch": 0.9958679343683556, "grad_norm": 12.278693199157715, "learning_rate": 4.1025246581116725e-09, "loss": 32.7891, "step": 6206 }, { "epoch": 0.9960284029365748, "grad_norm": 12.264019012451172, "learning_rate": 3.7808907785708e-09, "loss": 32.6406, "step": 6207 }, { "epoch": 0.996188871504794, "grad_norm": 12.512829780578613, "learning_rate": 3.472384164454612e-09, "loss": 32.8047, "step": 6208 }, { "epoch": 0.9963493400730132, "grad_norm": 12.325884819030762, "learning_rate": 3.177004896753877e-09, "loss": 32.6484, "step": 6209 }, { "epoch": 0.9965098086412324, "grad_norm": 12.592985153198242, "learning_rate": 2.894753053039878e-09, "loss": 32.6406, "step": 6210 }, { "epoch": 0.9966702772094516, "grad_norm": 11.942554473876953, "learning_rate": 2.6256287074144514e-09, "loss": 32.6484, "step": 6211 }, { "epoch": 0.9968307457776708, "grad_norm": 12.193041801452637, "learning_rate": 2.3696319305377414e-09, "loss": 32.6406, "step": 6212 }, { "epoch": 0.99699121434589, "grad_norm": 12.007349967956543, "learning_rate": 2.1267627896282005e-09, "loss": 32.6875, "step": 6213 }, { "epoch": 0.9971516829141092, "grad_norm": 11.946442604064941, "learning_rate": 1.89702134845704e-09, "loss": 32.7734, "step": 6214 }, { "epoch": 0.9973121514823284, "grad_norm": 12.067755699157715, "learning_rate": 1.680407667342676e-09, "loss": 32.7109, "step": 6215 }, { "epoch": 0.9974726200505476, "grad_norm": 12.38700008392334, "learning_rate": 1.4769218031618349e-09, "loss": 32.5391, "step": 6216 }, { "epoch": 0.9976330886187668, "grad_norm": 12.381599426269531, "learning_rate": 1.2865638093439992e-09, "loss": 32.6172, "step": 6217 }, { "epoch": 0.997793557186986, "grad_norm": 12.004240989685059, "learning_rate": 1.1093337358658585e-09, "loss": 32.6016, "step": 6218 }, { "epoch": 0.9979540257552052, "grad_norm": 12.466684341430664, "learning_rate": 9.45231629273513e-10, "loss": 32.625, "step": 6219 }, { "epoch": 0.9981144943234244, "grad_norm": 12.143556594848633, "learning_rate": 7.942575326380653e-10, "loss": 32.6562, "step": 6220 }, { "epoch": 0.9982749628916436, "grad_norm": 12.071325302124023, "learning_rate": 6.564114856111304e-10, "loss": 32.7344, "step": 6221 }, { "epoch": 0.9984354314598628, "grad_norm": 12.138935089111328, "learning_rate": 5.316935243859789e-10, "loss": 32.8047, "step": 6222 }, { "epoch": 0.998595900028082, "grad_norm": 12.325695991516113, "learning_rate": 4.20103681708639e-10, "loss": 32.5938, "step": 6223 }, { "epoch": 0.9987563685963012, "grad_norm": 12.195591926574707, "learning_rate": 3.216419868723453e-10, "loss": 32.5938, "step": 6224 }, { "epoch": 0.9989168371645204, "grad_norm": 12.133679389953613, "learning_rate": 2.3630846573419183e-10, "loss": 32.6016, "step": 6225 }, { "epoch": 0.9990773057327396, "grad_norm": 12.008955001831055, "learning_rate": 1.6410314070403054e-10, "loss": 32.625, "step": 6226 }, { "epoch": 0.9992377743009588, "grad_norm": 12.204169273376465, "learning_rate": 1.0502603073336836e-10, "loss": 32.6797, "step": 6227 }, { "epoch": 0.999398242869178, "grad_norm": 12.070178031921387, "learning_rate": 5.907715133202096e-11, "loss": 32.6016, "step": 6228 }, { "epoch": 0.9995587114373972, "grad_norm": 12.140969276428223, "learning_rate": 2.6256514579214854e-11, "loss": 32.6406, "step": 6229 }, { "epoch": 0.9997191800056164, "grad_norm": 12.390560150146484, "learning_rate": 6.564129073627356e-12, "loss": 32.6172, "step": 6230 }, { "epoch": 0.9998796485738356, "grad_norm": 12.162409782409668, "learning_rate": 0.0, "loss": 32.75, "step": 6231 } ], "logging_steps": 1, "max_steps": 6231, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 239, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2614542345437184.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }