{ "best_metric": 0.23343217372894287, "best_model_checkpoint": "./TrOCR-runs-aug-finale-v2/checkpoint-8000", "epoch": 4.941321803582459, "eval_steps": 500, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0012353304508956147, "grad_norm": 71.72766876220703, "learning_rate": 9.99987646695491e-07, "loss": 9.2153, "step": 2 }, { "epoch": 0.0024706609017912293, "grad_norm": 67.64663696289062, "learning_rate": 9.999752933909822e-07, "loss": 10.1131, "step": 4 }, { "epoch": 0.0037059913526868438, "grad_norm": 57.88589859008789, "learning_rate": 9.999505867819642e-07, "loss": 10.3751, "step": 6 }, { "epoch": 0.004941321803582459, "grad_norm": 71.45208740234375, "learning_rate": 9.999258801729461e-07, "loss": 8.7143, "step": 8 }, { "epoch": 0.006176652254478073, "grad_norm": 46.98135757446289, "learning_rate": 9.999011735639284e-07, "loss": 8.6301, "step": 10 }, { "epoch": 0.0074119827053736875, "grad_norm": 61.08885955810547, "learning_rate": 9.998764669549103e-07, "loss": 8.6332, "step": 12 }, { "epoch": 0.008647313156269302, "grad_norm": 56.84159851074219, "learning_rate": 9.998641136504016e-07, "loss": 8.5605, "step": 14 }, { "epoch": 0.009882643607164917, "grad_norm": 49.98945617675781, "learning_rate": 9.998394070413835e-07, "loss": 8.622, "step": 16 }, { "epoch": 0.011117974058060531, "grad_norm": 58.00190353393555, "learning_rate": 9.998147004323655e-07, "loss": 7.3658, "step": 18 }, { "epoch": 0.012353304508956145, "grad_norm": 47.51729202270508, "learning_rate": 9.997899938233477e-07, "loss": 7.5778, "step": 20 }, { "epoch": 0.013588634959851761, "grad_norm": 64.86264038085938, "learning_rate": 9.997652872143297e-07, "loss": 7.7128, "step": 22 }, { "epoch": 0.014823965410747375, "grad_norm": 61.19200134277344, "learning_rate": 9.99740580605312e-07, "loss": 6.867, "step": 24 }, { "epoch": 0.01605929586164299, "grad_norm": 43.85209655761719, "learning_rate": 9.99728227300803e-07, "loss": 6.7129, "step": 26 }, { "epoch": 0.017294626312538603, "grad_norm": 50.005550384521484, "learning_rate": 9.99703520691785e-07, "loss": 6.6063, "step": 28 }, { "epoch": 0.01852995676343422, "grad_norm": 47.58658981323242, "learning_rate": 9.996788140827671e-07, "loss": 6.0632, "step": 30 }, { "epoch": 0.019765287214329835, "grad_norm": 33.09718704223633, "learning_rate": 9.996541074737491e-07, "loss": 5.3508, "step": 32 }, { "epoch": 0.021000617665225447, "grad_norm": 48.82681655883789, "learning_rate": 9.996294008647313e-07, "loss": 5.5117, "step": 34 }, { "epoch": 0.022235948116121063, "grad_norm": 47.9992561340332, "learning_rate": 9.996046942557133e-07, "loss": 5.225, "step": 36 }, { "epoch": 0.02347127856701668, "grad_norm": 39.85643768310547, "learning_rate": 9.995799876466955e-07, "loss": 5.3149, "step": 38 }, { "epoch": 0.02470660901791229, "grad_norm": 39.706642150878906, "learning_rate": 9.995552810376775e-07, "loss": 5.0132, "step": 40 }, { "epoch": 0.025941939468807906, "grad_norm": 40.663238525390625, "learning_rate": 9.995305744286595e-07, "loss": 4.6379, "step": 42 }, { "epoch": 0.027177269919703522, "grad_norm": 39.07304000854492, "learning_rate": 9.995058678196417e-07, "loss": 4.0933, "step": 44 }, { "epoch": 0.028412600370599134, "grad_norm": 48.008544921875, "learning_rate": 9.994811612106237e-07, "loss": 4.8225, "step": 46 }, { "epoch": 0.02964793082149475, "grad_norm": 37.83854675292969, "learning_rate": 9.99456454601606e-07, "loss": 3.9614, "step": 48 }, { "epoch": 0.030883261272390366, "grad_norm": 33.701416015625, "learning_rate": 9.99431747992588e-07, "loss": 3.7444, "step": 50 }, { "epoch": 0.03211859172328598, "grad_norm": 34.781402587890625, "learning_rate": 9.9940704138357e-07, "loss": 3.8451, "step": 52 }, { "epoch": 0.033353922174181594, "grad_norm": 33.60985565185547, "learning_rate": 9.993823347745521e-07, "loss": 3.6996, "step": 54 }, { "epoch": 0.034589252625077206, "grad_norm": 41.1012077331543, "learning_rate": 9.993576281655343e-07, "loss": 3.8867, "step": 56 }, { "epoch": 0.035824583075972825, "grad_norm": 31.453266143798828, "learning_rate": 9.993329215565163e-07, "loss": 3.3616, "step": 58 }, { "epoch": 0.03705991352686844, "grad_norm": 51.81782913208008, "learning_rate": 9.993082149474983e-07, "loss": 2.7517, "step": 60 }, { "epoch": 0.03829524397776405, "grad_norm": 31.347463607788086, "learning_rate": 9.992835083384805e-07, "loss": 3.196, "step": 62 }, { "epoch": 0.03953057442865967, "grad_norm": 44.23834991455078, "learning_rate": 9.992588017294627e-07, "loss": 2.5395, "step": 64 }, { "epoch": 0.04076590487955528, "grad_norm": 28.05289077758789, "learning_rate": 9.992340951204447e-07, "loss": 2.2963, "step": 66 }, { "epoch": 0.042001235330450894, "grad_norm": 49.94702911376953, "learning_rate": 9.992093885114267e-07, "loss": 2.845, "step": 68 }, { "epoch": 0.04323656578134651, "grad_norm": 26.696266174316406, "learning_rate": 9.99184681902409e-07, "loss": 2.2142, "step": 70 }, { "epoch": 0.044471896232242125, "grad_norm": 26.680503845214844, "learning_rate": 9.99159975293391e-07, "loss": 2.1783, "step": 72 }, { "epoch": 0.04570722668313774, "grad_norm": 24.9090576171875, "learning_rate": 9.991352686843731e-07, "loss": 2.0249, "step": 74 }, { "epoch": 0.04694255713403336, "grad_norm": 31.43658447265625, "learning_rate": 9.99110562075355e-07, "loss": 2.1626, "step": 76 }, { "epoch": 0.04817788758492897, "grad_norm": 30.58403778076172, "learning_rate": 9.99085855466337e-07, "loss": 2.2337, "step": 78 }, { "epoch": 0.04941321803582458, "grad_norm": 27.157451629638672, "learning_rate": 9.990611488573193e-07, "loss": 1.9184, "step": 80 }, { "epoch": 0.0506485484867202, "grad_norm": 40.74632263183594, "learning_rate": 9.990364422483015e-07, "loss": 2.4097, "step": 82 }, { "epoch": 0.05188387893761581, "grad_norm": 27.926969528198242, "learning_rate": 9.990117356392835e-07, "loss": 1.8846, "step": 84 }, { "epoch": 0.053119209388511425, "grad_norm": 20.523977279663086, "learning_rate": 9.989870290302655e-07, "loss": 1.4784, "step": 86 }, { "epoch": 0.054354539839407044, "grad_norm": 24.449764251708984, "learning_rate": 9.989623224212477e-07, "loss": 1.3437, "step": 88 }, { "epoch": 0.055589870290302656, "grad_norm": 31.04705047607422, "learning_rate": 9.989376158122297e-07, "loss": 1.8884, "step": 90 }, { "epoch": 0.05682520074119827, "grad_norm": 26.518516540527344, "learning_rate": 9.989129092032119e-07, "loss": 1.4222, "step": 92 }, { "epoch": 0.05806053119209389, "grad_norm": 33.895477294921875, "learning_rate": 9.988882025941939e-07, "loss": 1.9298, "step": 94 }, { "epoch": 0.0592958616429895, "grad_norm": 57.44404983520508, "learning_rate": 9.988634959851759e-07, "loss": 1.7292, "step": 96 }, { "epoch": 0.06053119209388511, "grad_norm": 30.77213478088379, "learning_rate": 9.98838789376158e-07, "loss": 1.7928, "step": 98 }, { "epoch": 0.06176652254478073, "grad_norm": 31.70636558532715, "learning_rate": 9.988140827671403e-07, "loss": 1.2945, "step": 100 }, { "epoch": 0.06300185299567634, "grad_norm": 24.849475860595703, "learning_rate": 9.987893761581223e-07, "loss": 1.5619, "step": 102 }, { "epoch": 0.06423718344657196, "grad_norm": 25.598918914794922, "learning_rate": 9.987646695491043e-07, "loss": 1.7367, "step": 104 }, { "epoch": 0.06547251389746757, "grad_norm": 23.529155731201172, "learning_rate": 9.987399629400865e-07, "loss": 1.1101, "step": 106 }, { "epoch": 0.06670784434836319, "grad_norm": 19.445283889770508, "learning_rate": 9.987152563310685e-07, "loss": 1.048, "step": 108 }, { "epoch": 0.06794317479925881, "grad_norm": 21.385316848754883, "learning_rate": 9.986905497220507e-07, "loss": 1.5492, "step": 110 }, { "epoch": 0.06917850525015441, "grad_norm": 24.34470558166504, "learning_rate": 9.986658431130327e-07, "loss": 1.9437, "step": 112 }, { "epoch": 0.07041383570105003, "grad_norm": 25.90285873413086, "learning_rate": 9.986411365040149e-07, "loss": 1.5043, "step": 114 }, { "epoch": 0.07164916615194565, "grad_norm": 168.87521362304688, "learning_rate": 9.986164298949969e-07, "loss": 0.9734, "step": 116 }, { "epoch": 0.07288449660284126, "grad_norm": 25.614904403686523, "learning_rate": 9.985917232859789e-07, "loss": 1.3394, "step": 118 }, { "epoch": 0.07411982705373688, "grad_norm": 23.98908042907715, "learning_rate": 9.98567016676961e-07, "loss": 1.3632, "step": 120 }, { "epoch": 0.0753551575046325, "grad_norm": 16.986907958984375, "learning_rate": 9.98542310067943e-07, "loss": 1.3794, "step": 122 }, { "epoch": 0.0765904879555281, "grad_norm": 17.485687255859375, "learning_rate": 9.985176034589253e-07, "loss": 1.1723, "step": 124 }, { "epoch": 0.07782581840642372, "grad_norm": 21.20154571533203, "learning_rate": 9.984928968499073e-07, "loss": 1.4665, "step": 126 }, { "epoch": 0.07906114885731934, "grad_norm": 24.31536865234375, "learning_rate": 9.984681902408893e-07, "loss": 0.9344, "step": 128 }, { "epoch": 0.08029647930821494, "grad_norm": 24.695209503173828, "learning_rate": 9.984434836318715e-07, "loss": 1.2382, "step": 130 }, { "epoch": 0.08153180975911056, "grad_norm": 17.45518684387207, "learning_rate": 9.984187770228537e-07, "loss": 0.8183, "step": 132 }, { "epoch": 0.08276714021000618, "grad_norm": 17.702117919921875, "learning_rate": 9.983940704138357e-07, "loss": 0.8411, "step": 134 }, { "epoch": 0.08400247066090179, "grad_norm": 22.185117721557617, "learning_rate": 9.983693638048177e-07, "loss": 1.3129, "step": 136 }, { "epoch": 0.0852378011117974, "grad_norm": 14.196030616760254, "learning_rate": 9.983446571957999e-07, "loss": 0.9097, "step": 138 }, { "epoch": 0.08647313156269303, "grad_norm": 33.055572509765625, "learning_rate": 9.983199505867818e-07, "loss": 1.3507, "step": 140 }, { "epoch": 0.08770846201358863, "grad_norm": 19.699203491210938, "learning_rate": 9.98295243977764e-07, "loss": 1.3357, "step": 142 }, { "epoch": 0.08894379246448425, "grad_norm": 20.9130916595459, "learning_rate": 9.98270537368746e-07, "loss": 0.9426, "step": 144 }, { "epoch": 0.09017912291537987, "grad_norm": 19.27548599243164, "learning_rate": 9.982458307597283e-07, "loss": 1.0504, "step": 146 }, { "epoch": 0.09141445336627547, "grad_norm": 14.8910493850708, "learning_rate": 9.982211241507102e-07, "loss": 0.8537, "step": 148 }, { "epoch": 0.0926497838171711, "grad_norm": 21.517824172973633, "learning_rate": 9.981964175416924e-07, "loss": 1.2364, "step": 150 }, { "epoch": 0.09388511426806671, "grad_norm": 17.42777442932129, "learning_rate": 9.981717109326744e-07, "loss": 0.8757, "step": 152 }, { "epoch": 0.09512044471896232, "grad_norm": 19.806381225585938, "learning_rate": 9.981470043236564e-07, "loss": 1.5345, "step": 154 }, { "epoch": 0.09635577516985794, "grad_norm": 17.592008590698242, "learning_rate": 9.981222977146386e-07, "loss": 0.8935, "step": 156 }, { "epoch": 0.09759110562075356, "grad_norm": 14.699040412902832, "learning_rate": 9.980975911056208e-07, "loss": 0.7365, "step": 158 }, { "epoch": 0.09882643607164916, "grad_norm": 23.289772033691406, "learning_rate": 9.980728844966028e-07, "loss": 1.0525, "step": 160 }, { "epoch": 0.10006176652254478, "grad_norm": 24.338224411010742, "learning_rate": 9.980481778875848e-07, "loss": 1.2888, "step": 162 }, { "epoch": 0.1012970969734404, "grad_norm": 19.1699275970459, "learning_rate": 9.98023471278567e-07, "loss": 1.2957, "step": 164 }, { "epoch": 0.102532427424336, "grad_norm": 17.44431495666504, "learning_rate": 9.97998764669549e-07, "loss": 1.2426, "step": 166 }, { "epoch": 0.10376775787523163, "grad_norm": 12.378316879272461, "learning_rate": 9.979740580605312e-07, "loss": 0.8658, "step": 168 }, { "epoch": 0.10500308832612724, "grad_norm": 15.12086009979248, "learning_rate": 9.979493514515132e-07, "loss": 1.001, "step": 170 }, { "epoch": 0.10623841877702285, "grad_norm": 18.822998046875, "learning_rate": 9.979246448424952e-07, "loss": 0.951, "step": 172 }, { "epoch": 0.10747374922791847, "grad_norm": 51.462501525878906, "learning_rate": 9.978999382334774e-07, "loss": 0.8261, "step": 174 }, { "epoch": 0.10870907967881409, "grad_norm": 19.76039695739746, "learning_rate": 9.978752316244594e-07, "loss": 0.7488, "step": 176 }, { "epoch": 0.1099444101297097, "grad_norm": 14.760765075683594, "learning_rate": 9.978505250154416e-07, "loss": 1.2227, "step": 178 }, { "epoch": 0.11117974058060531, "grad_norm": 18.886341094970703, "learning_rate": 9.978258184064236e-07, "loss": 1.5584, "step": 180 }, { "epoch": 0.11241507103150093, "grad_norm": 15.282675743103027, "learning_rate": 9.978011117974058e-07, "loss": 1.2986, "step": 182 }, { "epoch": 0.11365040148239654, "grad_norm": 13.883673667907715, "learning_rate": 9.977764051883878e-07, "loss": 1.0403, "step": 184 }, { "epoch": 0.11488573193329216, "grad_norm": 16.408676147460938, "learning_rate": 9.977516985793698e-07, "loss": 0.8108, "step": 186 }, { "epoch": 0.11612106238418778, "grad_norm": 18.788314819335938, "learning_rate": 9.97726991970352e-07, "loss": 0.9349, "step": 188 }, { "epoch": 0.11735639283508338, "grad_norm": 17.197912216186523, "learning_rate": 9.977022853613342e-07, "loss": 0.7876, "step": 190 }, { "epoch": 0.118591723285979, "grad_norm": 17.1162166595459, "learning_rate": 9.976775787523162e-07, "loss": 0.8989, "step": 192 }, { "epoch": 0.11982705373687462, "grad_norm": 14.513823509216309, "learning_rate": 9.976528721432982e-07, "loss": 0.7845, "step": 194 }, { "epoch": 0.12106238418777023, "grad_norm": 22.108673095703125, "learning_rate": 9.976281655342804e-07, "loss": 1.1763, "step": 196 }, { "epoch": 0.12229771463866584, "grad_norm": 21.103227615356445, "learning_rate": 9.976034589252624e-07, "loss": 0.9939, "step": 198 }, { "epoch": 0.12353304508956146, "grad_norm": 22.806676864624023, "learning_rate": 9.975787523162446e-07, "loss": 0.9029, "step": 200 }, { "epoch": 0.12476837554045707, "grad_norm": 20.006986618041992, "learning_rate": 9.975540457072266e-07, "loss": 1.2054, "step": 202 }, { "epoch": 0.1260037059913527, "grad_norm": 12.667633056640625, "learning_rate": 9.975293390982086e-07, "loss": 0.6179, "step": 204 }, { "epoch": 0.1272390364422483, "grad_norm": 18.188520431518555, "learning_rate": 9.975046324891908e-07, "loss": 1.4037, "step": 206 }, { "epoch": 0.12847436689314393, "grad_norm": 18.446788787841797, "learning_rate": 9.97479925880173e-07, "loss": 0.8284, "step": 208 }, { "epoch": 0.12970969734403953, "grad_norm": 11.185157775878906, "learning_rate": 9.97455219271155e-07, "loss": 0.6419, "step": 210 }, { "epoch": 0.13094502779493514, "grad_norm": 18.047321319580078, "learning_rate": 9.97430512662137e-07, "loss": 0.9218, "step": 212 }, { "epoch": 0.13218035824583077, "grad_norm": 15.082748413085938, "learning_rate": 9.974058060531192e-07, "loss": 0.8288, "step": 214 }, { "epoch": 0.13341568869672638, "grad_norm": 14.941337585449219, "learning_rate": 9.973810994441012e-07, "loss": 0.7424, "step": 216 }, { "epoch": 0.13465101914762198, "grad_norm": 13.711079597473145, "learning_rate": 9.973563928350834e-07, "loss": 0.9976, "step": 218 }, { "epoch": 0.13588634959851761, "grad_norm": 17.407535552978516, "learning_rate": 9.973316862260654e-07, "loss": 1.3196, "step": 220 }, { "epoch": 0.13712168004941322, "grad_norm": 14.61522388458252, "learning_rate": 9.973069796170474e-07, "loss": 0.8617, "step": 222 }, { "epoch": 0.13835701050030882, "grad_norm": 14.372088432312012, "learning_rate": 9.972822730080296e-07, "loss": 0.7798, "step": 224 }, { "epoch": 0.13959234095120446, "grad_norm": 19.171306610107422, "learning_rate": 9.972575663990118e-07, "loss": 1.0585, "step": 226 }, { "epoch": 0.14082767140210006, "grad_norm": 13.912126541137695, "learning_rate": 9.972328597899938e-07, "loss": 0.8506, "step": 228 }, { "epoch": 0.14206300185299567, "grad_norm": 13.152045249938965, "learning_rate": 9.972081531809758e-07, "loss": 0.9543, "step": 230 }, { "epoch": 0.1432983323038913, "grad_norm": 17.657926559448242, "learning_rate": 9.97183446571958e-07, "loss": 0.6974, "step": 232 }, { "epoch": 0.1445336627547869, "grad_norm": 16.033647537231445, "learning_rate": 9.971587399629402e-07, "loss": 0.6913, "step": 234 }, { "epoch": 0.1457689932056825, "grad_norm": 14.736217498779297, "learning_rate": 9.971340333539222e-07, "loss": 0.5499, "step": 236 }, { "epoch": 0.14700432365657815, "grad_norm": 12.394487380981445, "learning_rate": 9.971093267449042e-07, "loss": 0.5879, "step": 238 }, { "epoch": 0.14823965410747375, "grad_norm": 16.227188110351562, "learning_rate": 9.970846201358864e-07, "loss": 1.2846, "step": 240 }, { "epoch": 0.14947498455836936, "grad_norm": 19.60755729675293, "learning_rate": 9.970599135268684e-07, "loss": 0.431, "step": 242 }, { "epoch": 0.150710315009265, "grad_norm": 21.10592269897461, "learning_rate": 9.970352069178506e-07, "loss": 0.9966, "step": 244 }, { "epoch": 0.1519456454601606, "grad_norm": 10.145764350891113, "learning_rate": 9.970105003088326e-07, "loss": 0.6007, "step": 246 }, { "epoch": 0.1531809759110562, "grad_norm": 17.178129196166992, "learning_rate": 9.969857936998146e-07, "loss": 0.783, "step": 248 }, { "epoch": 0.15441630636195183, "grad_norm": 21.214557647705078, "learning_rate": 9.969610870907968e-07, "loss": 0.9852, "step": 250 }, { "epoch": 0.15565163681284744, "grad_norm": 20.845138549804688, "learning_rate": 9.969363804817788e-07, "loss": 1.1633, "step": 252 }, { "epoch": 0.15688696726374304, "grad_norm": 12.531917572021484, "learning_rate": 9.96911673872761e-07, "loss": 0.3497, "step": 254 }, { "epoch": 0.15812229771463868, "grad_norm": 16.30786895751953, "learning_rate": 9.96886967263743e-07, "loss": 0.5291, "step": 256 }, { "epoch": 0.15935762816553428, "grad_norm": 16.56238555908203, "learning_rate": 9.968622606547252e-07, "loss": 0.5913, "step": 258 }, { "epoch": 0.1605929586164299, "grad_norm": 16.48219108581543, "learning_rate": 9.968375540457072e-07, "loss": 0.9389, "step": 260 }, { "epoch": 0.16182828906732552, "grad_norm": 12.103386878967285, "learning_rate": 9.968128474366892e-07, "loss": 0.7109, "step": 262 }, { "epoch": 0.16306361951822113, "grad_norm": 17.113920211791992, "learning_rate": 9.967881408276714e-07, "loss": 0.9197, "step": 264 }, { "epoch": 0.16429894996911673, "grad_norm": 11.577539443969727, "learning_rate": 9.967634342186536e-07, "loss": 0.4145, "step": 266 }, { "epoch": 0.16553428042001236, "grad_norm": 14.596395492553711, "learning_rate": 9.967387276096356e-07, "loss": 0.2645, "step": 268 }, { "epoch": 0.16676961087090797, "grad_norm": 14.482308387756348, "learning_rate": 9.967140210006176e-07, "loss": 0.9736, "step": 270 }, { "epoch": 0.16800494132180357, "grad_norm": 12.60310173034668, "learning_rate": 9.966893143915998e-07, "loss": 0.7393, "step": 272 }, { "epoch": 0.1692402717726992, "grad_norm": 25.737974166870117, "learning_rate": 9.966646077825817e-07, "loss": 0.7075, "step": 274 }, { "epoch": 0.1704756022235948, "grad_norm": 16.092885971069336, "learning_rate": 9.96639901173564e-07, "loss": 0.6598, "step": 276 }, { "epoch": 0.17171093267449042, "grad_norm": 17.0423641204834, "learning_rate": 9.96615194564546e-07, "loss": 0.7944, "step": 278 }, { "epoch": 0.17294626312538605, "grad_norm": 16.882080078125, "learning_rate": 9.96590487955528e-07, "loss": 0.9018, "step": 280 }, { "epoch": 0.17418159357628166, "grad_norm": 11.822787284851074, "learning_rate": 9.965657813465101e-07, "loss": 0.4928, "step": 282 }, { "epoch": 0.17541692402717726, "grad_norm": 17.484827041625977, "learning_rate": 9.965410747374924e-07, "loss": 0.8534, "step": 284 }, { "epoch": 0.1766522544780729, "grad_norm": 10.80589485168457, "learning_rate": 9.965163681284743e-07, "loss": 0.653, "step": 286 }, { "epoch": 0.1778875849289685, "grad_norm": 10.111434936523438, "learning_rate": 9.964916615194563e-07, "loss": 0.5558, "step": 288 }, { "epoch": 0.1791229153798641, "grad_norm": 11.375866889953613, "learning_rate": 9.964669549104385e-07, "loss": 0.4523, "step": 290 }, { "epoch": 0.18035824583075974, "grad_norm": 27.320940017700195, "learning_rate": 9.964422483014205e-07, "loss": 1.2617, "step": 292 }, { "epoch": 0.18159357628165534, "grad_norm": 19.13496971130371, "learning_rate": 9.964175416924027e-07, "loss": 1.0371, "step": 294 }, { "epoch": 0.18282890673255095, "grad_norm": 17.60503387451172, "learning_rate": 9.963928350833847e-07, "loss": 0.9687, "step": 296 }, { "epoch": 0.18406423718344658, "grad_norm": 17.020675659179688, "learning_rate": 9.963681284743667e-07, "loss": 0.5305, "step": 298 }, { "epoch": 0.1852995676343422, "grad_norm": 19.35376739501953, "learning_rate": 9.96343421865349e-07, "loss": 0.9451, "step": 300 }, { "epoch": 0.1865348980852378, "grad_norm": 17.563274383544922, "learning_rate": 9.963187152563311e-07, "loss": 0.8488, "step": 302 }, { "epoch": 0.18777022853613343, "grad_norm": 15.941134452819824, "learning_rate": 9.962940086473131e-07, "loss": 0.8172, "step": 304 }, { "epoch": 0.18900555898702903, "grad_norm": 15.170184135437012, "learning_rate": 9.962693020382951e-07, "loss": 0.6588, "step": 306 }, { "epoch": 0.19024088943792464, "grad_norm": 14.167213439941406, "learning_rate": 9.962445954292773e-07, "loss": 0.7326, "step": 308 }, { "epoch": 0.19147621988882027, "grad_norm": 17.4707088470459, "learning_rate": 9.962198888202593e-07, "loss": 0.9899, "step": 310 }, { "epoch": 0.19271155033971588, "grad_norm": 17.586883544921875, "learning_rate": 9.961951822112415e-07, "loss": 0.8041, "step": 312 }, { "epoch": 0.19394688079061148, "grad_norm": 13.271316528320312, "learning_rate": 9.961704756022235e-07, "loss": 0.4108, "step": 314 }, { "epoch": 0.19518221124150711, "grad_norm": 7.508425712585449, "learning_rate": 9.961457689932057e-07, "loss": 0.2587, "step": 316 }, { "epoch": 0.19641754169240272, "grad_norm": 26.1580810546875, "learning_rate": 9.961210623841877e-07, "loss": 1.7318, "step": 318 }, { "epoch": 0.19765287214329832, "grad_norm": 16.42892074584961, "learning_rate": 9.960963557751697e-07, "loss": 0.729, "step": 320 }, { "epoch": 0.19888820259419396, "grad_norm": 21.161420822143555, "learning_rate": 9.96071649166152e-07, "loss": 1.1597, "step": 322 }, { "epoch": 0.20012353304508956, "grad_norm": 11.683164596557617, "learning_rate": 9.96046942557134e-07, "loss": 0.3326, "step": 324 }, { "epoch": 0.20135886349598517, "grad_norm": 14.820412635803223, "learning_rate": 9.960222359481161e-07, "loss": 0.5379, "step": 326 }, { "epoch": 0.2025941939468808, "grad_norm": 9.761212348937988, "learning_rate": 9.959975293390981e-07, "loss": 0.5222, "step": 328 }, { "epoch": 0.2038295243977764, "grad_norm": 11.775703430175781, "learning_rate": 9.959728227300803e-07, "loss": 0.555, "step": 330 }, { "epoch": 0.205064854848672, "grad_norm": 8.24718952178955, "learning_rate": 9.959481161210623e-07, "loss": 0.3504, "step": 332 }, { "epoch": 0.20630018529956765, "grad_norm": 8.57484245300293, "learning_rate": 9.959234095120445e-07, "loss": 0.2836, "step": 334 }, { "epoch": 0.20753551575046325, "grad_norm": 13.6355619430542, "learning_rate": 9.958987029030265e-07, "loss": 0.6494, "step": 336 }, { "epoch": 0.20877084620135886, "grad_norm": 12.57628345489502, "learning_rate": 9.958739962940085e-07, "loss": 0.6571, "step": 338 }, { "epoch": 0.2100061766522545, "grad_norm": 7.38503360748291, "learning_rate": 9.958492896849907e-07, "loss": 0.3936, "step": 340 }, { "epoch": 0.2112415071031501, "grad_norm": 11.209733009338379, "learning_rate": 9.958245830759727e-07, "loss": 0.501, "step": 342 }, { "epoch": 0.2124768375540457, "grad_norm": 22.492795944213867, "learning_rate": 9.95799876466955e-07, "loss": 1.0777, "step": 344 }, { "epoch": 0.21371216800494133, "grad_norm": 8.73935317993164, "learning_rate": 9.957751698579369e-07, "loss": 0.448, "step": 346 }, { "epoch": 0.21494749845583694, "grad_norm": 13.384405136108398, "learning_rate": 9.95750463248919e-07, "loss": 0.8644, "step": 348 }, { "epoch": 0.21618282890673254, "grad_norm": 22.039762496948242, "learning_rate": 9.95725756639901e-07, "loss": 0.7745, "step": 350 }, { "epoch": 0.21741815935762818, "grad_norm": 14.62644100189209, "learning_rate": 9.957010500308833e-07, "loss": 0.6445, "step": 352 }, { "epoch": 0.21865348980852378, "grad_norm": 11.361739158630371, "learning_rate": 9.956763434218653e-07, "loss": 0.4719, "step": 354 }, { "epoch": 0.2198888202594194, "grad_norm": 13.390713691711426, "learning_rate": 9.956516368128473e-07, "loss": 0.4385, "step": 356 }, { "epoch": 0.22112415071031502, "grad_norm": 12.377495765686035, "learning_rate": 9.956269302038295e-07, "loss": 0.5283, "step": 358 }, { "epoch": 0.22235948116121063, "grad_norm": 11.655351638793945, "learning_rate": 9.956022235948117e-07, "loss": 0.2497, "step": 360 }, { "epoch": 0.22359481161210623, "grad_norm": 16.40611457824707, "learning_rate": 9.955775169857937e-07, "loss": 0.5596, "step": 362 }, { "epoch": 0.22483014206300186, "grad_norm": 12.542157173156738, "learning_rate": 9.955528103767757e-07, "loss": 0.4112, "step": 364 }, { "epoch": 0.22606547251389747, "grad_norm": 12.771021842956543, "learning_rate": 9.955281037677579e-07, "loss": 0.4966, "step": 366 }, { "epoch": 0.22730080296479308, "grad_norm": 14.755356788635254, "learning_rate": 9.955033971587399e-07, "loss": 0.7261, "step": 368 }, { "epoch": 0.2285361334156887, "grad_norm": 10.18393325805664, "learning_rate": 9.95478690549722e-07, "loss": 0.3005, "step": 370 }, { "epoch": 0.2297714638665843, "grad_norm": 15.390872955322266, "learning_rate": 9.95453983940704e-07, "loss": 0.7446, "step": 372 }, { "epoch": 0.23100679431747992, "grad_norm": 19.43260955810547, "learning_rate": 9.95429277331686e-07, "loss": 0.7721, "step": 374 }, { "epoch": 0.23224212476837555, "grad_norm": 13.79875373840332, "learning_rate": 9.954045707226683e-07, "loss": 0.683, "step": 376 }, { "epoch": 0.23347745521927116, "grad_norm": 14.18612003326416, "learning_rate": 9.953798641136505e-07, "loss": 0.8194, "step": 378 }, { "epoch": 0.23471278567016676, "grad_norm": 16.63796043395996, "learning_rate": 9.953551575046325e-07, "loss": 0.5045, "step": 380 }, { "epoch": 0.2359481161210624, "grad_norm": 13.430312156677246, "learning_rate": 9.953304508956145e-07, "loss": 0.6202, "step": 382 }, { "epoch": 0.237183446571958, "grad_norm": 10.66471004486084, "learning_rate": 9.953057442865967e-07, "loss": 0.6735, "step": 384 }, { "epoch": 0.2384187770228536, "grad_norm": 16.518630981445312, "learning_rate": 9.952810376775787e-07, "loss": 0.4911, "step": 386 }, { "epoch": 0.23965410747374924, "grad_norm": 16.44118881225586, "learning_rate": 9.952563310685609e-07, "loss": 0.5488, "step": 388 }, { "epoch": 0.24088943792464484, "grad_norm": 14.693402290344238, "learning_rate": 9.952316244595429e-07, "loss": 0.8461, "step": 390 }, { "epoch": 0.24212476837554045, "grad_norm": 12.455424308776855, "learning_rate": 9.95206917850525e-07, "loss": 0.7984, "step": 392 }, { "epoch": 0.24336009882643608, "grad_norm": 16.71371841430664, "learning_rate": 9.95182211241507e-07, "loss": 0.669, "step": 394 }, { "epoch": 0.2445954292773317, "grad_norm": 35.789100646972656, "learning_rate": 9.95157504632489e-07, "loss": 0.9036, "step": 396 }, { "epoch": 0.2458307597282273, "grad_norm": 10.194372177124023, "learning_rate": 9.951327980234713e-07, "loss": 0.5287, "step": 398 }, { "epoch": 0.24706609017912293, "grad_norm": 15.933816909790039, "learning_rate": 9.951080914144533e-07, "loss": 0.4575, "step": 400 }, { "epoch": 0.24830142063001853, "grad_norm": 5.894368648529053, "learning_rate": 9.950833848054355e-07, "loss": 0.4456, "step": 402 }, { "epoch": 0.24953675108091414, "grad_norm": 17.090517044067383, "learning_rate": 9.950586781964175e-07, "loss": 0.2799, "step": 404 }, { "epoch": 0.25077208153180974, "grad_norm": 15.057173728942871, "learning_rate": 9.950339715873994e-07, "loss": 0.575, "step": 406 }, { "epoch": 0.2520074119827054, "grad_norm": 15.679609298706055, "learning_rate": 9.950092649783816e-07, "loss": 0.7558, "step": 408 }, { "epoch": 0.253242742433601, "grad_norm": 17.215869903564453, "learning_rate": 9.949845583693639e-07, "loss": 1.1198, "step": 410 }, { "epoch": 0.2544780728844966, "grad_norm": 10.309164047241211, "learning_rate": 9.949598517603458e-07, "loss": 0.4055, "step": 412 }, { "epoch": 0.2557134033353922, "grad_norm": 15.062151908874512, "learning_rate": 9.949351451513278e-07, "loss": 0.7199, "step": 414 }, { "epoch": 0.25694873378628785, "grad_norm": 14.115408897399902, "learning_rate": 9.9491043854231e-07, "loss": 0.5529, "step": 416 }, { "epoch": 0.25818406423718343, "grad_norm": 14.018616676330566, "learning_rate": 9.94885731933292e-07, "loss": 1.2108, "step": 418 }, { "epoch": 0.25941939468807906, "grad_norm": 13.71825122833252, "learning_rate": 9.948610253242742e-07, "loss": 0.7183, "step": 420 }, { "epoch": 0.2606547251389747, "grad_norm": 13.472809791564941, "learning_rate": 9.948363187152562e-07, "loss": 0.637, "step": 422 }, { "epoch": 0.2618900555898703, "grad_norm": 15.608954429626465, "learning_rate": 9.948116121062384e-07, "loss": 0.7388, "step": 424 }, { "epoch": 0.2631253860407659, "grad_norm": 9.622334480285645, "learning_rate": 9.947869054972204e-07, "loss": 0.6346, "step": 426 }, { "epoch": 0.26436071649166154, "grad_norm": 21.81568145751953, "learning_rate": 9.947621988882026e-07, "loss": 1.2417, "step": 428 }, { "epoch": 0.2655960469425571, "grad_norm": 13.337302207946777, "learning_rate": 9.947374922791846e-07, "loss": 0.2575, "step": 430 }, { "epoch": 0.26683137739345275, "grad_norm": 21.577953338623047, "learning_rate": 9.947127856701666e-07, "loss": 1.2993, "step": 432 }, { "epoch": 0.2680667078443484, "grad_norm": 5.260891437530518, "learning_rate": 9.946880790611488e-07, "loss": 0.3742, "step": 434 }, { "epoch": 0.26930203829524396, "grad_norm": 11.256314277648926, "learning_rate": 9.94663372452131e-07, "loss": 0.2751, "step": 436 }, { "epoch": 0.2705373687461396, "grad_norm": 13.449044227600098, "learning_rate": 9.94638665843113e-07, "loss": 0.9115, "step": 438 }, { "epoch": 0.27177269919703523, "grad_norm": 17.206329345703125, "learning_rate": 9.94613959234095e-07, "loss": 0.9115, "step": 440 }, { "epoch": 0.2730080296479308, "grad_norm": 15.699419975280762, "learning_rate": 9.945892526250772e-07, "loss": 0.5676, "step": 442 }, { "epoch": 0.27424336009882644, "grad_norm": 35.12668228149414, "learning_rate": 9.945645460160592e-07, "loss": 0.5921, "step": 444 }, { "epoch": 0.27547869054972207, "grad_norm": 19.437124252319336, "learning_rate": 9.945398394070414e-07, "loss": 0.8908, "step": 446 }, { "epoch": 0.27671402100061765, "grad_norm": 10.11853313446045, "learning_rate": 9.945151327980234e-07, "loss": 0.3397, "step": 448 }, { "epoch": 0.2779493514515133, "grad_norm": 9.424795150756836, "learning_rate": 9.944904261890054e-07, "loss": 0.4716, "step": 450 }, { "epoch": 0.2791846819024089, "grad_norm": 20.16798973083496, "learning_rate": 9.944657195799876e-07, "loss": 0.5683, "step": 452 }, { "epoch": 0.2804200123533045, "grad_norm": 16.144746780395508, "learning_rate": 9.944410129709696e-07, "loss": 0.6009, "step": 454 }, { "epoch": 0.2816553428042001, "grad_norm": 16.022663116455078, "learning_rate": 9.944163063619518e-07, "loss": 0.5731, "step": 456 }, { "epoch": 0.28289067325509576, "grad_norm": 10.294392585754395, "learning_rate": 9.943915997529338e-07, "loss": 0.2914, "step": 458 }, { "epoch": 0.28412600370599134, "grad_norm": 7.467857837677002, "learning_rate": 9.94366893143916e-07, "loss": 0.2406, "step": 460 }, { "epoch": 0.28536133415688697, "grad_norm": 14.024642944335938, "learning_rate": 9.94342186534898e-07, "loss": 0.5511, "step": 462 }, { "epoch": 0.2865966646077826, "grad_norm": 18.540225982666016, "learning_rate": 9.943174799258802e-07, "loss": 0.673, "step": 464 }, { "epoch": 0.2878319950586782, "grad_norm": 7.857827663421631, "learning_rate": 9.942927733168622e-07, "loss": 0.318, "step": 466 }, { "epoch": 0.2890673255095738, "grad_norm": 9.922952651977539, "learning_rate": 9.942680667078444e-07, "loss": 0.6641, "step": 468 }, { "epoch": 0.29030265596046945, "grad_norm": 15.63807201385498, "learning_rate": 9.942433600988264e-07, "loss": 0.6554, "step": 470 }, { "epoch": 0.291537986411365, "grad_norm": 17.43051528930664, "learning_rate": 9.942186534898084e-07, "loss": 0.5039, "step": 472 }, { "epoch": 0.29277331686226066, "grad_norm": 11.257383346557617, "learning_rate": 9.941939468807906e-07, "loss": 0.424, "step": 474 }, { "epoch": 0.2940086473131563, "grad_norm": 15.08475399017334, "learning_rate": 9.941692402717726e-07, "loss": 0.6401, "step": 476 }, { "epoch": 0.29524397776405187, "grad_norm": 11.43052864074707, "learning_rate": 9.941445336627548e-07, "loss": 0.4775, "step": 478 }, { "epoch": 0.2964793082149475, "grad_norm": 9.09970474243164, "learning_rate": 9.941198270537368e-07, "loss": 0.309, "step": 480 }, { "epoch": 0.29771463866584313, "grad_norm": 12.336624145507812, "learning_rate": 9.940951204447188e-07, "loss": 0.5821, "step": 482 }, { "epoch": 0.2989499691167387, "grad_norm": 13.842415809631348, "learning_rate": 9.94070413835701e-07, "loss": 0.5675, "step": 484 }, { "epoch": 0.30018529956763434, "grad_norm": 14.6986665725708, "learning_rate": 9.940457072266832e-07, "loss": 0.7702, "step": 486 }, { "epoch": 0.30142063001853, "grad_norm": 13.009441375732422, "learning_rate": 9.940210006176652e-07, "loss": 0.5388, "step": 488 }, { "epoch": 0.30265596046942556, "grad_norm": 11.021795272827148, "learning_rate": 9.939962940086472e-07, "loss": 0.407, "step": 490 }, { "epoch": 0.3038912909203212, "grad_norm": 10.79912281036377, "learning_rate": 9.939715873996294e-07, "loss": 0.5867, "step": 492 }, { "epoch": 0.3051266213712168, "grad_norm": 11.231956481933594, "learning_rate": 9.939468807906114e-07, "loss": 0.3549, "step": 494 }, { "epoch": 0.3063619518221124, "grad_norm": 16.803054809570312, "learning_rate": 9.939221741815936e-07, "loss": 0.8485, "step": 496 }, { "epoch": 0.30759728227300803, "grad_norm": 14.21468734741211, "learning_rate": 9.938974675725756e-07, "loss": 0.6306, "step": 498 }, { "epoch": 0.30883261272390367, "grad_norm": 9.012039184570312, "learning_rate": 9.938727609635576e-07, "loss": 0.5193, "step": 500 }, { "epoch": 0.30883261272390367, "eval_cer": 0.10004846638510005, "eval_loss": 0.5267010927200317, "eval_runtime": 63.7772, "eval_samples_per_second": 12.873, "eval_steps_per_second": 1.615, "step": 500 }, { "epoch": 0.31006794317479924, "grad_norm": 14.587767601013184, "learning_rate": 9.938480543545398e-07, "loss": 0.7727, "step": 502 }, { "epoch": 0.3113032736256949, "grad_norm": 8.637341499328613, "learning_rate": 9.93823347745522e-07, "loss": 0.3489, "step": 504 }, { "epoch": 0.3125386040765905, "grad_norm": 13.284430503845215, "learning_rate": 9.93798641136504e-07, "loss": 0.5621, "step": 506 }, { "epoch": 0.3137739345274861, "grad_norm": 8.449568748474121, "learning_rate": 9.93773934527486e-07, "loss": 0.3588, "step": 508 }, { "epoch": 0.3150092649783817, "grad_norm": 13.436771392822266, "learning_rate": 9.937492279184682e-07, "loss": 0.7739, "step": 510 }, { "epoch": 0.31624459542927735, "grad_norm": 6.883078575134277, "learning_rate": 9.937245213094504e-07, "loss": 0.3397, "step": 512 }, { "epoch": 0.31747992588017293, "grad_norm": 11.048806190490723, "learning_rate": 9.936998147004324e-07, "loss": 0.5928, "step": 514 }, { "epoch": 0.31871525633106856, "grad_norm": 11.2460298538208, "learning_rate": 9.936751080914144e-07, "loss": 0.6285, "step": 516 }, { "epoch": 0.3199505867819642, "grad_norm": 14.401810646057129, "learning_rate": 9.936504014823966e-07, "loss": 0.8767, "step": 518 }, { "epoch": 0.3211859172328598, "grad_norm": 9.32492446899414, "learning_rate": 9.936256948733786e-07, "loss": 0.5252, "step": 520 }, { "epoch": 0.3224212476837554, "grad_norm": 13.264812469482422, "learning_rate": 9.936009882643608e-07, "loss": 0.5916, "step": 522 }, { "epoch": 0.32365657813465104, "grad_norm": 10.455059051513672, "learning_rate": 9.935762816553428e-07, "loss": 0.4993, "step": 524 }, { "epoch": 0.3248919085855466, "grad_norm": 11.208000183105469, "learning_rate": 9.935515750463248e-07, "loss": 0.421, "step": 526 }, { "epoch": 0.32612723903644225, "grad_norm": 13.890771865844727, "learning_rate": 9.93526868437307e-07, "loss": 0.4011, "step": 528 }, { "epoch": 0.3273625694873379, "grad_norm": 13.576091766357422, "learning_rate": 9.93502161828289e-07, "loss": 0.2686, "step": 530 }, { "epoch": 0.32859789993823346, "grad_norm": 21.836318969726562, "learning_rate": 9.934774552192712e-07, "loss": 0.4207, "step": 532 }, { "epoch": 0.3298332303891291, "grad_norm": 14.639293670654297, "learning_rate": 9.934527486102532e-07, "loss": 0.5026, "step": 534 }, { "epoch": 0.33106856084002473, "grad_norm": 15.862173080444336, "learning_rate": 9.934280420012354e-07, "loss": 0.6967, "step": 536 }, { "epoch": 0.3323038912909203, "grad_norm": 9.525956153869629, "learning_rate": 9.934033353922174e-07, "loss": 0.2852, "step": 538 }, { "epoch": 0.33353922174181594, "grad_norm": 14.413555145263672, "learning_rate": 9.933786287831993e-07, "loss": 0.476, "step": 540 }, { "epoch": 0.33477455219271157, "grad_norm": 9.307226181030273, "learning_rate": 9.933539221741815e-07, "loss": 0.6453, "step": 542 }, { "epoch": 0.33600988264360715, "grad_norm": 10.434736251831055, "learning_rate": 9.933292155651638e-07, "loss": 0.4417, "step": 544 }, { "epoch": 0.3372452130945028, "grad_norm": 13.389243125915527, "learning_rate": 9.933045089561457e-07, "loss": 0.6562, "step": 546 }, { "epoch": 0.3384805435453984, "grad_norm": 17.70511817932129, "learning_rate": 9.932798023471277e-07, "loss": 0.3946, "step": 548 }, { "epoch": 0.339715873996294, "grad_norm": 9.944230079650879, "learning_rate": 9.9325509573811e-07, "loss": 0.4497, "step": 550 }, { "epoch": 0.3409512044471896, "grad_norm": 16.116775512695312, "learning_rate": 9.93230389129092e-07, "loss": 1.1408, "step": 552 }, { "epoch": 0.34218653489808526, "grad_norm": 28.37726402282715, "learning_rate": 9.932056825200741e-07, "loss": 0.411, "step": 554 }, { "epoch": 0.34342186534898084, "grad_norm": 14.809420585632324, "learning_rate": 9.931809759110561e-07, "loss": 0.452, "step": 556 }, { "epoch": 0.34465719579987647, "grad_norm": 12.30087947845459, "learning_rate": 9.931562693020381e-07, "loss": 0.4715, "step": 558 }, { "epoch": 0.3458925262507721, "grad_norm": 12.231100082397461, "learning_rate": 9.931315626930203e-07, "loss": 0.326, "step": 560 }, { "epoch": 0.3471278567016677, "grad_norm": 16.285499572753906, "learning_rate": 9.931068560840025e-07, "loss": 0.2247, "step": 562 }, { "epoch": 0.3483631871525633, "grad_norm": 15.419703483581543, "learning_rate": 9.930821494749845e-07, "loss": 0.6439, "step": 564 }, { "epoch": 0.34959851760345895, "grad_norm": 13.59210205078125, "learning_rate": 9.930574428659665e-07, "loss": 0.5629, "step": 566 }, { "epoch": 0.3508338480543545, "grad_norm": 28.939977645874023, "learning_rate": 9.930327362569487e-07, "loss": 0.5533, "step": 568 }, { "epoch": 0.35206917850525016, "grad_norm": 8.927345275878906, "learning_rate": 9.930080296479307e-07, "loss": 0.296, "step": 570 }, { "epoch": 0.3533045089561458, "grad_norm": 11.477635383605957, "learning_rate": 9.92983323038913e-07, "loss": 0.4404, "step": 572 }, { "epoch": 0.35453983940704137, "grad_norm": 10.93437385559082, "learning_rate": 9.92958616429895e-07, "loss": 0.3652, "step": 574 }, { "epoch": 0.355775169857937, "grad_norm": 16.103187561035156, "learning_rate": 9.92933909820877e-07, "loss": 0.5749, "step": 576 }, { "epoch": 0.35701050030883263, "grad_norm": 17.2227840423584, "learning_rate": 9.929092032118591e-07, "loss": 0.5804, "step": 578 }, { "epoch": 0.3582458307597282, "grad_norm": 12.40194034576416, "learning_rate": 9.928844966028413e-07, "loss": 0.6917, "step": 580 }, { "epoch": 0.35948116121062385, "grad_norm": 9.424482345581055, "learning_rate": 9.928597899938233e-07, "loss": 0.645, "step": 582 }, { "epoch": 0.3607164916615195, "grad_norm": 13.61655044555664, "learning_rate": 9.928350833848053e-07, "loss": 0.554, "step": 584 }, { "epoch": 0.36195182211241506, "grad_norm": 7.836410999298096, "learning_rate": 9.928103767757875e-07, "loss": 0.4333, "step": 586 }, { "epoch": 0.3631871525633107, "grad_norm": 11.152990341186523, "learning_rate": 9.927856701667695e-07, "loss": 0.6489, "step": 588 }, { "epoch": 0.3644224830142063, "grad_norm": 5.921578884124756, "learning_rate": 9.927609635577517e-07, "loss": 0.1111, "step": 590 }, { "epoch": 0.3656578134651019, "grad_norm": 8.367027282714844, "learning_rate": 9.927362569487337e-07, "loss": 0.2289, "step": 592 }, { "epoch": 0.36689314391599753, "grad_norm": 6.848742961883545, "learning_rate": 9.92711550339716e-07, "loss": 0.3667, "step": 594 }, { "epoch": 0.36812847436689317, "grad_norm": 12.698480606079102, "learning_rate": 9.92686843730698e-07, "loss": 0.4587, "step": 596 }, { "epoch": 0.36936380481778874, "grad_norm": 7.496913909912109, "learning_rate": 9.926621371216801e-07, "loss": 0.3049, "step": 598 }, { "epoch": 0.3705991352686844, "grad_norm": 14.749767303466797, "learning_rate": 9.926374305126621e-07, "loss": 0.6105, "step": 600 }, { "epoch": 0.37183446571958, "grad_norm": 7.424029350280762, "learning_rate": 9.92612723903644e-07, "loss": 0.269, "step": 602 }, { "epoch": 0.3730697961704756, "grad_norm": 6.417357921600342, "learning_rate": 9.925880172946263e-07, "loss": 0.3811, "step": 604 }, { "epoch": 0.3743051266213712, "grad_norm": 15.438959121704102, "learning_rate": 9.925633106856083e-07, "loss": 0.4702, "step": 606 }, { "epoch": 0.37554045707226685, "grad_norm": 8.805246353149414, "learning_rate": 9.925386040765905e-07, "loss": 0.2176, "step": 608 }, { "epoch": 0.37677578752316243, "grad_norm": 7.553557395935059, "learning_rate": 9.925138974675725e-07, "loss": 0.1368, "step": 610 }, { "epoch": 0.37801111797405806, "grad_norm": 12.287881851196289, "learning_rate": 9.924891908585547e-07, "loss": 0.6506, "step": 612 }, { "epoch": 0.3792464484249537, "grad_norm": 15.295019149780273, "learning_rate": 9.924644842495367e-07, "loss": 0.5125, "step": 614 }, { "epoch": 0.3804817788758493, "grad_norm": 7.991222858428955, "learning_rate": 9.924397776405187e-07, "loss": 0.3398, "step": 616 }, { "epoch": 0.3817171093267449, "grad_norm": 4.484683990478516, "learning_rate": 9.924150710315009e-07, "loss": 0.3043, "step": 618 }, { "epoch": 0.38295243977764054, "grad_norm": 7.510758876800537, "learning_rate": 9.923903644224829e-07, "loss": 0.1668, "step": 620 }, { "epoch": 0.3841877702285361, "grad_norm": 8.815669059753418, "learning_rate": 9.92365657813465e-07, "loss": 0.3833, "step": 622 }, { "epoch": 0.38542310067943175, "grad_norm": 14.810882568359375, "learning_rate": 9.92340951204447e-07, "loss": 0.4986, "step": 624 }, { "epoch": 0.3866584311303274, "grad_norm": 13.7406587600708, "learning_rate": 9.923162445954293e-07, "loss": 0.4217, "step": 626 }, { "epoch": 0.38789376158122296, "grad_norm": 5.838079929351807, "learning_rate": 9.922915379864113e-07, "loss": 0.0936, "step": 628 }, { "epoch": 0.3891290920321186, "grad_norm": 9.333106994628906, "learning_rate": 9.922668313773935e-07, "loss": 0.1566, "step": 630 }, { "epoch": 0.39036442248301423, "grad_norm": 15.752297401428223, "learning_rate": 9.922421247683755e-07, "loss": 0.6991, "step": 632 }, { "epoch": 0.3915997529339098, "grad_norm": 7.805171012878418, "learning_rate": 9.922174181593575e-07, "loss": 0.4128, "step": 634 }, { "epoch": 0.39283508338480544, "grad_norm": 8.296146392822266, "learning_rate": 9.921927115503397e-07, "loss": 0.2942, "step": 636 }, { "epoch": 0.39407041383570107, "grad_norm": 7.760396480560303, "learning_rate": 9.921680049413219e-07, "loss": 0.1749, "step": 638 }, { "epoch": 0.39530574428659665, "grad_norm": 11.017698287963867, "learning_rate": 9.921432983323039e-07, "loss": 0.5588, "step": 640 }, { "epoch": 0.3965410747374923, "grad_norm": 18.76515769958496, "learning_rate": 9.921185917232859e-07, "loss": 0.7983, "step": 642 }, { "epoch": 0.3977764051883879, "grad_norm": 8.274770736694336, "learning_rate": 9.92093885114268e-07, "loss": 0.2422, "step": 644 }, { "epoch": 0.3990117356392835, "grad_norm": 11.841864585876465, "learning_rate": 9.9206917850525e-07, "loss": 0.4016, "step": 646 }, { "epoch": 0.4002470660901791, "grad_norm": 9.112067222595215, "learning_rate": 9.920444718962323e-07, "loss": 0.2527, "step": 648 }, { "epoch": 0.40148239654107476, "grad_norm": 12.512776374816895, "learning_rate": 9.920197652872143e-07, "loss": 0.3099, "step": 650 }, { "epoch": 0.40271772699197034, "grad_norm": 9.065421104431152, "learning_rate": 9.919950586781963e-07, "loss": 0.3614, "step": 652 }, { "epoch": 0.40395305744286597, "grad_norm": 9.075920104980469, "learning_rate": 9.919703520691785e-07, "loss": 0.4032, "step": 654 }, { "epoch": 0.4051883878937616, "grad_norm": 15.861579895019531, "learning_rate": 9.919456454601607e-07, "loss": 0.6085, "step": 656 }, { "epoch": 0.4064237183446572, "grad_norm": 5.774117946624756, "learning_rate": 9.919209388511427e-07, "loss": 0.2744, "step": 658 }, { "epoch": 0.4076590487955528, "grad_norm": 12.715323448181152, "learning_rate": 9.918962322421247e-07, "loss": 0.4403, "step": 660 }, { "epoch": 0.40889437924644845, "grad_norm": 15.304740905761719, "learning_rate": 9.918715256331069e-07, "loss": 0.5766, "step": 662 }, { "epoch": 0.410129709697344, "grad_norm": 13.599628448486328, "learning_rate": 9.918468190240889e-07, "loss": 0.1394, "step": 664 }, { "epoch": 0.41136504014823966, "grad_norm": 5.77752161026001, "learning_rate": 9.91822112415071e-07, "loss": 0.3516, "step": 666 }, { "epoch": 0.4126003705991353, "grad_norm": 16.13694190979004, "learning_rate": 9.91797405806053e-07, "loss": 0.6144, "step": 668 }, { "epoch": 0.41383570105003087, "grad_norm": 12.713834762573242, "learning_rate": 9.917726991970353e-07, "loss": 0.3965, "step": 670 }, { "epoch": 0.4150710315009265, "grad_norm": 17.18771743774414, "learning_rate": 9.917479925880173e-07, "loss": 0.7835, "step": 672 }, { "epoch": 0.41630636195182213, "grad_norm": 11.747124671936035, "learning_rate": 9.917232859789992e-07, "loss": 0.425, "step": 674 }, { "epoch": 0.4175416924027177, "grad_norm": 10.122307777404785, "learning_rate": 9.916985793699815e-07, "loss": 0.432, "step": 676 }, { "epoch": 0.41877702285361335, "grad_norm": 12.387354850769043, "learning_rate": 9.916738727609634e-07, "loss": 0.3433, "step": 678 }, { "epoch": 0.420012353304509, "grad_norm": 10.521968841552734, "learning_rate": 9.916491661519456e-07, "loss": 0.2785, "step": 680 }, { "epoch": 0.42124768375540456, "grad_norm": 10.170622825622559, "learning_rate": 9.916244595429276e-07, "loss": 0.3655, "step": 682 }, { "epoch": 0.4224830142063002, "grad_norm": 13.403547286987305, "learning_rate": 9.915997529339096e-07, "loss": 0.596, "step": 684 }, { "epoch": 0.4237183446571958, "grad_norm": 11.802879333496094, "learning_rate": 9.915750463248918e-07, "loss": 0.826, "step": 686 }, { "epoch": 0.4249536751080914, "grad_norm": 12.558028221130371, "learning_rate": 9.91550339715874e-07, "loss": 0.3988, "step": 688 }, { "epoch": 0.42618900555898703, "grad_norm": 13.262487411499023, "learning_rate": 9.91525633106856e-07, "loss": 0.5189, "step": 690 }, { "epoch": 0.42742433600988267, "grad_norm": 14.45762825012207, "learning_rate": 9.91500926497838e-07, "loss": 0.7918, "step": 692 }, { "epoch": 0.42865966646077824, "grad_norm": 12.116777420043945, "learning_rate": 9.914762198888202e-07, "loss": 0.7829, "step": 694 }, { "epoch": 0.4298949969116739, "grad_norm": 12.325629234313965, "learning_rate": 9.914515132798022e-07, "loss": 0.3218, "step": 696 }, { "epoch": 0.4311303273625695, "grad_norm": 11.490076065063477, "learning_rate": 9.914268066707844e-07, "loss": 0.3159, "step": 698 }, { "epoch": 0.4323656578134651, "grad_norm": 18.443016052246094, "learning_rate": 9.914021000617664e-07, "loss": 0.8749, "step": 700 }, { "epoch": 0.4336009882643607, "grad_norm": 11.877280235290527, "learning_rate": 9.913773934527484e-07, "loss": 0.3942, "step": 702 }, { "epoch": 0.43483631871525635, "grad_norm": 15.162976264953613, "learning_rate": 9.913526868437306e-07, "loss": 0.4006, "step": 704 }, { "epoch": 0.43607164916615193, "grad_norm": 8.129733085632324, "learning_rate": 9.913279802347128e-07, "loss": 0.2829, "step": 706 }, { "epoch": 0.43730697961704756, "grad_norm": 13.559920310974121, "learning_rate": 9.913032736256948e-07, "loss": 0.6198, "step": 708 }, { "epoch": 0.4385423100679432, "grad_norm": 13.482331275939941, "learning_rate": 9.912785670166768e-07, "loss": 0.3668, "step": 710 }, { "epoch": 0.4397776405188388, "grad_norm": 10.194774627685547, "learning_rate": 9.91253860407659e-07, "loss": 0.4888, "step": 712 }, { "epoch": 0.4410129709697344, "grad_norm": 26.723228454589844, "learning_rate": 9.912291537986412e-07, "loss": 0.6769, "step": 714 }, { "epoch": 0.44224830142063004, "grad_norm": 12.821329116821289, "learning_rate": 9.912044471896232e-07, "loss": 0.1647, "step": 716 }, { "epoch": 0.4434836318715256, "grad_norm": 11.265690803527832, "learning_rate": 9.911797405806052e-07, "loss": 0.3153, "step": 718 }, { "epoch": 0.44471896232242125, "grad_norm": 7.989147663116455, "learning_rate": 9.911550339715874e-07, "loss": 0.2247, "step": 720 }, { "epoch": 0.4459542927733169, "grad_norm": 9.570841789245605, "learning_rate": 9.911303273625694e-07, "loss": 0.3751, "step": 722 }, { "epoch": 0.44718962322421246, "grad_norm": 9.440723419189453, "learning_rate": 9.911056207535516e-07, "loss": 0.4723, "step": 724 }, { "epoch": 0.4484249536751081, "grad_norm": 22.459779739379883, "learning_rate": 9.910809141445336e-07, "loss": 0.6713, "step": 726 }, { "epoch": 0.44966028412600373, "grad_norm": 10.259244918823242, "learning_rate": 9.910562075355156e-07, "loss": 0.2992, "step": 728 }, { "epoch": 0.4508956145768993, "grad_norm": 11.765793800354004, "learning_rate": 9.910315009264978e-07, "loss": 0.5279, "step": 730 }, { "epoch": 0.45213094502779494, "grad_norm": 8.524374961853027, "learning_rate": 9.9100679431748e-07, "loss": 0.4982, "step": 732 }, { "epoch": 0.4533662754786906, "grad_norm": 15.224843978881836, "learning_rate": 9.90982087708462e-07, "loss": 0.4196, "step": 734 }, { "epoch": 0.45460160592958615, "grad_norm": 10.724671363830566, "learning_rate": 9.90957381099444e-07, "loss": 0.3002, "step": 736 }, { "epoch": 0.4558369363804818, "grad_norm": 10.389793395996094, "learning_rate": 9.909326744904262e-07, "loss": 0.3721, "step": 738 }, { "epoch": 0.4570722668313774, "grad_norm": 10.702335357666016, "learning_rate": 9.909079678814082e-07, "loss": 0.2983, "step": 740 }, { "epoch": 0.458307597282273, "grad_norm": 10.03173828125, "learning_rate": 9.908832612723904e-07, "loss": 0.321, "step": 742 }, { "epoch": 0.4595429277331686, "grad_norm": 11.119577407836914, "learning_rate": 9.908585546633724e-07, "loss": 0.2847, "step": 744 }, { "epoch": 0.46077825818406426, "grad_norm": 7.4072651863098145, "learning_rate": 9.908338480543546e-07, "loss": 0.2316, "step": 746 }, { "epoch": 0.46201358863495984, "grad_norm": 11.390522956848145, "learning_rate": 9.908091414453366e-07, "loss": 0.6083, "step": 748 }, { "epoch": 0.46324891908585547, "grad_norm": 11.556458473205566, "learning_rate": 9.907844348363186e-07, "loss": 0.4369, "step": 750 }, { "epoch": 0.4644842495367511, "grad_norm": 13.147842407226562, "learning_rate": 9.907597282273008e-07, "loss": 0.6258, "step": 752 }, { "epoch": 0.4657195799876467, "grad_norm": 17.201335906982422, "learning_rate": 9.907350216182828e-07, "loss": 0.5758, "step": 754 }, { "epoch": 0.4669549104385423, "grad_norm": 7.273223400115967, "learning_rate": 9.90710315009265e-07, "loss": 0.2683, "step": 756 }, { "epoch": 0.46819024088943795, "grad_norm": 11.533844947814941, "learning_rate": 9.90685608400247e-07, "loss": 0.465, "step": 758 }, { "epoch": 0.4694255713403335, "grad_norm": 13.881172180175781, "learning_rate": 9.90660901791229e-07, "loss": 0.411, "step": 760 }, { "epoch": 0.47066090179122916, "grad_norm": 18.294076919555664, "learning_rate": 9.906361951822112e-07, "loss": 1.016, "step": 762 }, { "epoch": 0.4718962322421248, "grad_norm": 4.4896240234375, "learning_rate": 9.906114885731934e-07, "loss": 0.1791, "step": 764 }, { "epoch": 0.47313156269302037, "grad_norm": 7.812268257141113, "learning_rate": 9.905867819641754e-07, "loss": 0.3664, "step": 766 }, { "epoch": 0.474366893143916, "grad_norm": 10.678888320922852, "learning_rate": 9.905620753551574e-07, "loss": 0.5258, "step": 768 }, { "epoch": 0.47560222359481164, "grad_norm": 7.406228065490723, "learning_rate": 9.905373687461396e-07, "loss": 0.2568, "step": 770 }, { "epoch": 0.4768375540457072, "grad_norm": 10.585089683532715, "learning_rate": 9.905126621371216e-07, "loss": 0.5362, "step": 772 }, { "epoch": 0.47807288449660285, "grad_norm": 7.420055866241455, "learning_rate": 9.904879555281038e-07, "loss": 0.1812, "step": 774 }, { "epoch": 0.4793082149474985, "grad_norm": 9.4044828414917, "learning_rate": 9.904632489190858e-07, "loss": 0.3822, "step": 776 }, { "epoch": 0.48054354539839406, "grad_norm": 14.168746948242188, "learning_rate": 9.904385423100678e-07, "loss": 0.845, "step": 778 }, { "epoch": 0.4817788758492897, "grad_norm": 6.16392707824707, "learning_rate": 9.9041383570105e-07, "loss": 0.3748, "step": 780 }, { "epoch": 0.4830142063001853, "grad_norm": 7.534607887268066, "learning_rate": 9.903891290920322e-07, "loss": 0.287, "step": 782 }, { "epoch": 0.4842495367510809, "grad_norm": 8.907422065734863, "learning_rate": 9.903644224830142e-07, "loss": 0.3072, "step": 784 }, { "epoch": 0.48548486720197653, "grad_norm": 16.64270782470703, "learning_rate": 9.903397158739962e-07, "loss": 0.2625, "step": 786 }, { "epoch": 0.48672019765287217, "grad_norm": 12.010709762573242, "learning_rate": 9.903150092649784e-07, "loss": 1.0324, "step": 788 }, { "epoch": 0.48795552810376774, "grad_norm": 14.731252670288086, "learning_rate": 9.902903026559606e-07, "loss": 0.4046, "step": 790 }, { "epoch": 0.4891908585546634, "grad_norm": 19.054306030273438, "learning_rate": 9.902655960469426e-07, "loss": 0.247, "step": 792 }, { "epoch": 0.490426189005559, "grad_norm": 8.124000549316406, "learning_rate": 9.902408894379246e-07, "loss": 0.2706, "step": 794 }, { "epoch": 0.4916615194564546, "grad_norm": 10.127885818481445, "learning_rate": 9.902161828289068e-07, "loss": 0.3574, "step": 796 }, { "epoch": 0.4928968499073502, "grad_norm": 9.194710731506348, "learning_rate": 9.901914762198888e-07, "loss": 0.5131, "step": 798 }, { "epoch": 0.49413218035824585, "grad_norm": 10.387843132019043, "learning_rate": 9.90166769610871e-07, "loss": 0.3305, "step": 800 }, { "epoch": 0.49536751080914143, "grad_norm": 13.17009449005127, "learning_rate": 9.90142063001853e-07, "loss": 0.2494, "step": 802 }, { "epoch": 0.49660284126003706, "grad_norm": 8.86633586883545, "learning_rate": 9.90117356392835e-07, "loss": 0.2293, "step": 804 }, { "epoch": 0.4978381717109327, "grad_norm": 16.02943229675293, "learning_rate": 9.900926497838172e-07, "loss": 0.5462, "step": 806 }, { "epoch": 0.4990735021618283, "grad_norm": 12.347894668579102, "learning_rate": 9.900679431747991e-07, "loss": 0.4533, "step": 808 }, { "epoch": 0.5003088326127239, "grad_norm": 14.270822525024414, "learning_rate": 9.900432365657814e-07, "loss": 0.3671, "step": 810 }, { "epoch": 0.5015441630636195, "grad_norm": 9.294798851013184, "learning_rate": 9.900185299567633e-07, "loss": 0.4711, "step": 812 }, { "epoch": 0.5027794935145151, "grad_norm": 8.771409034729004, "learning_rate": 9.899938233477455e-07, "loss": 0.7586, "step": 814 }, { "epoch": 0.5040148239654108, "grad_norm": 10.456218719482422, "learning_rate": 9.899691167387275e-07, "loss": 0.2167, "step": 816 }, { "epoch": 0.5052501544163064, "grad_norm": 12.497394561767578, "learning_rate": 9.899444101297095e-07, "loss": 0.3167, "step": 818 }, { "epoch": 0.506485484867202, "grad_norm": 5.725962162017822, "learning_rate": 9.899197035206917e-07, "loss": 0.2681, "step": 820 }, { "epoch": 0.5077208153180975, "grad_norm": NaN, "learning_rate": 9.899073502161827e-07, "loss": 0.4588, "step": 822 }, { "epoch": 0.5089561457689932, "grad_norm": 9.149727821350098, "learning_rate": 9.89882643607165e-07, "loss": 0.2243, "step": 824 }, { "epoch": 0.5101914762198888, "grad_norm": 18.050565719604492, "learning_rate": 9.89857936998147e-07, "loss": 0.5518, "step": 826 }, { "epoch": 0.5114268066707844, "grad_norm": 10.027440071105957, "learning_rate": 9.89833230389129e-07, "loss": 0.3232, "step": 828 }, { "epoch": 0.5126621371216801, "grad_norm": 5.495999813079834, "learning_rate": 9.898085237801111e-07, "loss": 0.2467, "step": 830 }, { "epoch": 0.5138974675725757, "grad_norm": 27.85940170288086, "learning_rate": 9.897838171710933e-07, "loss": 0.7106, "step": 832 }, { "epoch": 0.5151327980234712, "grad_norm": 6.2650909423828125, "learning_rate": 9.897591105620753e-07, "loss": 0.3997, "step": 834 }, { "epoch": 0.5163681284743669, "grad_norm": 32.89210510253906, "learning_rate": 9.897344039530573e-07, "loss": 0.8103, "step": 836 }, { "epoch": 0.5176034589252625, "grad_norm": 16.348735809326172, "learning_rate": 9.897096973440395e-07, "loss": 0.7873, "step": 838 }, { "epoch": 0.5188387893761581, "grad_norm": 12.705388069152832, "learning_rate": 9.896849907350215e-07, "loss": 0.4139, "step": 840 }, { "epoch": 0.5200741198270538, "grad_norm": 10.026324272155762, "learning_rate": 9.896602841260037e-07, "loss": 0.2527, "step": 842 }, { "epoch": 0.5213094502779494, "grad_norm": 12.008644104003906, "learning_rate": 9.896355775169857e-07, "loss": 0.4628, "step": 844 }, { "epoch": 0.5225447807288449, "grad_norm": 2.128446102142334, "learning_rate": 9.896108709079677e-07, "loss": 0.3384, "step": 846 }, { "epoch": 0.5237801111797405, "grad_norm": 13.744701385498047, "learning_rate": 9.8958616429895e-07, "loss": 0.3143, "step": 848 }, { "epoch": 0.5250154416306362, "grad_norm": 12.430827140808105, "learning_rate": 9.895614576899321e-07, "loss": 0.4557, "step": 850 }, { "epoch": 0.5262507720815318, "grad_norm": 11.058006286621094, "learning_rate": 9.895367510809141e-07, "loss": 0.4195, "step": 852 }, { "epoch": 0.5274861025324274, "grad_norm": 14.355245590209961, "learning_rate": 9.895120444718961e-07, "loss": 0.7626, "step": 854 }, { "epoch": 0.5287214329833231, "grad_norm": 13.31268310546875, "learning_rate": 9.894873378628783e-07, "loss": 0.4424, "step": 856 }, { "epoch": 0.5299567634342186, "grad_norm": 12.614999771118164, "learning_rate": 9.894626312538603e-07, "loss": 0.4948, "step": 858 }, { "epoch": 0.5311920938851142, "grad_norm": 6.873323917388916, "learning_rate": 9.894379246448425e-07, "loss": 0.2156, "step": 860 }, { "epoch": 0.5324274243360099, "grad_norm": 10.446084976196289, "learning_rate": 9.894132180358245e-07, "loss": 0.4206, "step": 862 }, { "epoch": 0.5336627547869055, "grad_norm": 13.160015106201172, "learning_rate": 9.893885114268065e-07, "loss": 0.3714, "step": 864 }, { "epoch": 0.5348980852378011, "grad_norm": 10.677501678466797, "learning_rate": 9.893638048177887e-07, "loss": 0.6782, "step": 866 }, { "epoch": 0.5361334156886968, "grad_norm": 9.042564392089844, "learning_rate": 9.89339098208771e-07, "loss": 0.5709, "step": 868 }, { "epoch": 0.5373687461395923, "grad_norm": 10.39052963256836, "learning_rate": 9.89314391599753e-07, "loss": 0.5679, "step": 870 }, { "epoch": 0.5386040765904879, "grad_norm": 7.668248176574707, "learning_rate": 9.89289684990735e-07, "loss": 0.2415, "step": 872 }, { "epoch": 0.5398394070413836, "grad_norm": 7.0617218017578125, "learning_rate": 9.89264978381717e-07, "loss": 0.3387, "step": 874 }, { "epoch": 0.5410747374922792, "grad_norm": 11.028111457824707, "learning_rate": 9.89240271772699e-07, "loss": 0.2792, "step": 876 }, { "epoch": 0.5423100679431748, "grad_norm": 8.035455703735352, "learning_rate": 9.892155651636813e-07, "loss": 0.1943, "step": 878 }, { "epoch": 0.5435453983940705, "grad_norm": 10.800418853759766, "learning_rate": 9.891908585546633e-07, "loss": 0.2887, "step": 880 }, { "epoch": 0.544780728844966, "grad_norm": 4.274187088012695, "learning_rate": 9.891661519456455e-07, "loss": 0.1064, "step": 882 }, { "epoch": 0.5460160592958616, "grad_norm": 4.651637077331543, "learning_rate": 9.891414453366275e-07, "loss": 0.1947, "step": 884 }, { "epoch": 0.5472513897467572, "grad_norm": 13.171686172485352, "learning_rate": 9.891167387276095e-07, "loss": 0.4472, "step": 886 }, { "epoch": 0.5484867201976529, "grad_norm": 6.317527770996094, "learning_rate": 9.890920321185917e-07, "loss": 0.3644, "step": 888 }, { "epoch": 0.5497220506485485, "grad_norm": 9.421491622924805, "learning_rate": 9.890673255095737e-07, "loss": 0.2027, "step": 890 }, { "epoch": 0.5509573810994441, "grad_norm": 10.154773712158203, "learning_rate": 9.890426189005559e-07, "loss": 0.1663, "step": 892 }, { "epoch": 0.5521927115503397, "grad_norm": 10.188980102539062, "learning_rate": 9.890179122915379e-07, "loss": 0.2798, "step": 894 }, { "epoch": 0.5534280420012353, "grad_norm": 12.523406028747559, "learning_rate": 9.8899320568252e-07, "loss": 0.4561, "step": 896 }, { "epoch": 0.5546633724521309, "grad_norm": 7.1613874435424805, "learning_rate": 9.88968499073502e-07, "loss": 0.2718, "step": 898 }, { "epoch": 0.5558987029030266, "grad_norm": 11.903841018676758, "learning_rate": 9.889437924644843e-07, "loss": 0.5052, "step": 900 }, { "epoch": 0.5571340333539222, "grad_norm": 10.26794719696045, "learning_rate": 9.889190858554663e-07, "loss": 0.2634, "step": 902 }, { "epoch": 0.5583693638048178, "grad_norm": 10.02596378326416, "learning_rate": 9.888943792464483e-07, "loss": 1.0964, "step": 904 }, { "epoch": 0.5596046942557134, "grad_norm": 21.510549545288086, "learning_rate": 9.888696726374305e-07, "loss": 0.5723, "step": 906 }, { "epoch": 0.560840024706609, "grad_norm": 12.176825523376465, "learning_rate": 9.888449660284127e-07, "loss": 0.4111, "step": 908 }, { "epoch": 0.5620753551575046, "grad_norm": 12.058060646057129, "learning_rate": 9.888202594193947e-07, "loss": 0.3237, "step": 910 }, { "epoch": 0.5633106856084003, "grad_norm": 15.622177124023438, "learning_rate": 9.887955528103767e-07, "loss": 0.771, "step": 912 }, { "epoch": 0.5645460160592959, "grad_norm": 15.394332885742188, "learning_rate": 9.887708462013589e-07, "loss": 0.4409, "step": 914 }, { "epoch": 0.5657813465101915, "grad_norm": 11.156314849853516, "learning_rate": 9.887461395923409e-07, "loss": 0.6427, "step": 916 }, { "epoch": 0.567016676961087, "grad_norm": 22.630449295043945, "learning_rate": 9.88721432983323e-07, "loss": 0.7925, "step": 918 }, { "epoch": 0.5682520074119827, "grad_norm": 9.721307754516602, "learning_rate": 9.88696726374305e-07, "loss": 0.2727, "step": 920 }, { "epoch": 0.5694873378628783, "grad_norm": 8.149195671081543, "learning_rate": 9.88672019765287e-07, "loss": 0.4104, "step": 922 }, { "epoch": 0.5707226683137739, "grad_norm": 13.134840965270996, "learning_rate": 9.886473131562693e-07, "loss": 0.2651, "step": 924 }, { "epoch": 0.5719579987646696, "grad_norm": 9.156907081604004, "learning_rate": 9.886226065472515e-07, "loss": 0.1814, "step": 926 }, { "epoch": 0.5731933292155652, "grad_norm": 12.063240051269531, "learning_rate": 9.885978999382335e-07, "loss": 0.5385, "step": 928 }, { "epoch": 0.5744286596664607, "grad_norm": 6.595946788787842, "learning_rate": 9.885731933292155e-07, "loss": 0.2502, "step": 930 }, { "epoch": 0.5756639901173564, "grad_norm": 7.861462593078613, "learning_rate": 9.885484867201977e-07, "loss": 0.2783, "step": 932 }, { "epoch": 0.576899320568252, "grad_norm": 12.008648872375488, "learning_rate": 9.885237801111797e-07, "loss": 0.3949, "step": 934 }, { "epoch": 0.5781346510191476, "grad_norm": 24.49518585205078, "learning_rate": 9.884990735021619e-07, "loss": 0.6727, "step": 936 }, { "epoch": 0.5793699814700433, "grad_norm": 10.187993049621582, "learning_rate": 9.884743668931439e-07, "loss": 0.1829, "step": 938 }, { "epoch": 0.5806053119209389, "grad_norm": 11.938882827758789, "learning_rate": 9.884496602841258e-07, "loss": 0.3382, "step": 940 }, { "epoch": 0.5818406423718344, "grad_norm": 11.929837226867676, "learning_rate": 9.88424953675108e-07, "loss": 0.243, "step": 942 }, { "epoch": 0.58307597282273, "grad_norm": 10.741907119750977, "learning_rate": 9.884002470660903e-07, "loss": 0.5253, "step": 944 }, { "epoch": 0.5843113032736257, "grad_norm": 24.082183837890625, "learning_rate": 9.883755404570722e-07, "loss": 0.5888, "step": 946 }, { "epoch": 0.5855466337245213, "grad_norm": 11.670113563537598, "learning_rate": 9.883508338480542e-07, "loss": 0.423, "step": 948 }, { "epoch": 0.586781964175417, "grad_norm": 6.894332408905029, "learning_rate": 9.883261272390364e-07, "loss": 0.3454, "step": 950 }, { "epoch": 0.5880172946263126, "grad_norm": 11.896034240722656, "learning_rate": 9.883014206300184e-07, "loss": 0.3578, "step": 952 }, { "epoch": 0.5892526250772081, "grad_norm": 41.695804595947266, "learning_rate": 9.882767140210006e-07, "loss": 0.8581, "step": 954 }, { "epoch": 0.5904879555281037, "grad_norm": 12.528753280639648, "learning_rate": 9.882520074119826e-07, "loss": 0.4792, "step": 956 }, { "epoch": 0.5917232859789994, "grad_norm": 20.830663681030273, "learning_rate": 9.882273008029648e-07, "loss": 0.8543, "step": 958 }, { "epoch": 0.592958616429895, "grad_norm": 7.014253616333008, "learning_rate": 9.882025941939468e-07, "loss": 0.2555, "step": 960 }, { "epoch": 0.5941939468807906, "grad_norm": 10.412546157836914, "learning_rate": 9.881778875849288e-07, "loss": 0.2216, "step": 962 }, { "epoch": 0.5954292773316863, "grad_norm": 13.913363456726074, "learning_rate": 9.88153180975911e-07, "loss": 0.4277, "step": 964 }, { "epoch": 0.5966646077825818, "grad_norm": 6.683900833129883, "learning_rate": 9.88128474366893e-07, "loss": 0.1447, "step": 966 }, { "epoch": 0.5978999382334774, "grad_norm": 12.473090171813965, "learning_rate": 9.881037677578752e-07, "loss": 0.296, "step": 968 }, { "epoch": 0.5991352686843731, "grad_norm": 5.740317344665527, "learning_rate": 9.880790611488572e-07, "loss": 0.1996, "step": 970 }, { "epoch": 0.6003705991352687, "grad_norm": 11.558337211608887, "learning_rate": 9.880543545398392e-07, "loss": 0.3426, "step": 972 }, { "epoch": 0.6016059295861643, "grad_norm": 10.134198188781738, "learning_rate": 9.880296479308214e-07, "loss": 0.3835, "step": 974 }, { "epoch": 0.60284126003706, "grad_norm": 8.636388778686523, "learning_rate": 9.880049413218036e-07, "loss": 0.1798, "step": 976 }, { "epoch": 0.6040765904879555, "grad_norm": 9.383220672607422, "learning_rate": 9.879802347127856e-07, "loss": 0.475, "step": 978 }, { "epoch": 0.6053119209388511, "grad_norm": 12.968520164489746, "learning_rate": 9.879555281037676e-07, "loss": 0.3935, "step": 980 }, { "epoch": 0.6065472513897467, "grad_norm": 8.868171691894531, "learning_rate": 9.879308214947498e-07, "loss": 0.2561, "step": 982 }, { "epoch": 0.6077825818406424, "grad_norm": 14.148285865783691, "learning_rate": 9.879061148857318e-07, "loss": 0.4673, "step": 984 }, { "epoch": 0.609017912291538, "grad_norm": 6.488974571228027, "learning_rate": 9.87881408276714e-07, "loss": 0.1029, "step": 986 }, { "epoch": 0.6102532427424336, "grad_norm": 7.15607213973999, "learning_rate": 9.87856701667696e-07, "loss": 0.2332, "step": 988 }, { "epoch": 0.6114885731933292, "grad_norm": 8.585821151733398, "learning_rate": 9.878319950586782e-07, "loss": 0.2319, "step": 990 }, { "epoch": 0.6127239036442248, "grad_norm": 7.2581467628479, "learning_rate": 9.878072884496602e-07, "loss": 0.1274, "step": 992 }, { "epoch": 0.6139592340951204, "grad_norm": 15.409602165222168, "learning_rate": 9.877825818406424e-07, "loss": 0.2159, "step": 994 }, { "epoch": 0.6151945645460161, "grad_norm": 7.143137454986572, "learning_rate": 9.877578752316244e-07, "loss": 0.1469, "step": 996 }, { "epoch": 0.6164298949969117, "grad_norm": 11.921899795532227, "learning_rate": 9.877331686226064e-07, "loss": 0.5047, "step": 998 }, { "epoch": 0.6176652254478073, "grad_norm": 13.633674621582031, "learning_rate": 9.877084620135886e-07, "loss": 0.5482, "step": 1000 }, { "epoch": 0.6176652254478073, "eval_cer": 0.0781001177040781, "eval_loss": 0.38156017661094666, "eval_runtime": 64.8451, "eval_samples_per_second": 12.661, "eval_steps_per_second": 1.588, "step": 1000 }, { "epoch": 0.6189005558987029, "grad_norm": 8.908910751342773, "learning_rate": 9.876837554045708e-07, "loss": 0.5032, "step": 1002 }, { "epoch": 0.6201358863495985, "grad_norm": 10.353494644165039, "learning_rate": 9.876590487955528e-07, "loss": 0.5005, "step": 1004 }, { "epoch": 0.6213712168004941, "grad_norm": 16.235559463500977, "learning_rate": 9.876343421865348e-07, "loss": 0.3146, "step": 1006 }, { "epoch": 0.6226065472513898, "grad_norm": 7.180684566497803, "learning_rate": 9.87609635577517e-07, "loss": 0.2642, "step": 1008 }, { "epoch": 0.6238418777022854, "grad_norm": 6.8852009773254395, "learning_rate": 9.87584928968499e-07, "loss": 0.2983, "step": 1010 }, { "epoch": 0.625077208153181, "grad_norm": 7.567797660827637, "learning_rate": 9.875602223594812e-07, "loss": 0.1672, "step": 1012 }, { "epoch": 0.6263125386040765, "grad_norm": 13.79082202911377, "learning_rate": 9.875355157504632e-07, "loss": 0.5895, "step": 1014 }, { "epoch": 0.6275478690549722, "grad_norm": 10.479715347290039, "learning_rate": 9.875108091414452e-07, "loss": 0.2047, "step": 1016 }, { "epoch": 0.6287831995058678, "grad_norm": 9.827377319335938, "learning_rate": 9.874861025324274e-07, "loss": 0.2233, "step": 1018 }, { "epoch": 0.6300185299567634, "grad_norm": 7.837274074554443, "learning_rate": 9.874613959234094e-07, "loss": 0.1387, "step": 1020 }, { "epoch": 0.6312538604076591, "grad_norm": 10.190793991088867, "learning_rate": 9.874366893143916e-07, "loss": 0.172, "step": 1022 }, { "epoch": 0.6324891908585547, "grad_norm": 7.874151229858398, "learning_rate": 9.874119827053736e-07, "loss": 0.2302, "step": 1024 }, { "epoch": 0.6337245213094502, "grad_norm": 7.375782012939453, "learning_rate": 9.873872760963558e-07, "loss": 0.4176, "step": 1026 }, { "epoch": 0.6349598517603459, "grad_norm": 8.656686782836914, "learning_rate": 9.873625694873378e-07, "loss": 0.1717, "step": 1028 }, { "epoch": 0.6361951822112415, "grad_norm": 10.012526512145996, "learning_rate": 9.8733786287832e-07, "loss": 0.1674, "step": 1030 }, { "epoch": 0.6374305126621371, "grad_norm": 10.031706809997559, "learning_rate": 9.87313156269302e-07, "loss": 0.256, "step": 1032 }, { "epoch": 0.6386658431130328, "grad_norm": 13.79273509979248, "learning_rate": 9.872884496602842e-07, "loss": 0.5814, "step": 1034 }, { "epoch": 0.6399011735639284, "grad_norm": 10.62157154083252, "learning_rate": 9.872637430512662e-07, "loss": 0.5543, "step": 1036 }, { "epoch": 0.6411365040148239, "grad_norm": 12.3402738571167, "learning_rate": 9.872390364422482e-07, "loss": 0.8595, "step": 1038 }, { "epoch": 0.6423718344657195, "grad_norm": 8.289229393005371, "learning_rate": 9.872143298332304e-07, "loss": 0.3725, "step": 1040 }, { "epoch": 0.6436071649166152, "grad_norm": 10.70551586151123, "learning_rate": 9.871896232242124e-07, "loss": 0.24, "step": 1042 }, { "epoch": 0.6448424953675108, "grad_norm": 8.135108947753906, "learning_rate": 9.871649166151946e-07, "loss": 0.3118, "step": 1044 }, { "epoch": 0.6460778258184064, "grad_norm": 12.714792251586914, "learning_rate": 9.871402100061766e-07, "loss": 0.248, "step": 1046 }, { "epoch": 0.6473131562693021, "grad_norm": 11.77755069732666, "learning_rate": 9.871155033971586e-07, "loss": 0.6518, "step": 1048 }, { "epoch": 0.6485484867201976, "grad_norm": 7.763424873352051, "learning_rate": 9.870907967881408e-07, "loss": 0.4195, "step": 1050 }, { "epoch": 0.6497838171710932, "grad_norm": 11.852334022521973, "learning_rate": 9.87066090179123e-07, "loss": 0.262, "step": 1052 }, { "epoch": 0.6510191476219889, "grad_norm": 9.841442108154297, "learning_rate": 9.87041383570105e-07, "loss": 0.4522, "step": 1054 }, { "epoch": 0.6522544780728845, "grad_norm": 10.79570198059082, "learning_rate": 9.87016676961087e-07, "loss": 0.3063, "step": 1056 }, { "epoch": 0.6534898085237801, "grad_norm": 11.719614028930664, "learning_rate": 9.869919703520692e-07, "loss": 0.4145, "step": 1058 }, { "epoch": 0.6547251389746758, "grad_norm": 6.345350742340088, "learning_rate": 9.869672637430512e-07, "loss": 0.2238, "step": 1060 }, { "epoch": 0.6559604694255713, "grad_norm": 15.156318664550781, "learning_rate": 9.869425571340334e-07, "loss": 0.6402, "step": 1062 }, { "epoch": 0.6571957998764669, "grad_norm": 13.626304626464844, "learning_rate": 9.869178505250154e-07, "loss": 0.4881, "step": 1064 }, { "epoch": 0.6584311303273626, "grad_norm": 6.198817729949951, "learning_rate": 9.868931439159976e-07, "loss": 0.2955, "step": 1066 }, { "epoch": 0.6596664607782582, "grad_norm": 6.775591850280762, "learning_rate": 9.868684373069796e-07, "loss": 0.3939, "step": 1068 }, { "epoch": 0.6609017912291538, "grad_norm": 6.222322940826416, "learning_rate": 9.868437306979618e-07, "loss": 0.1635, "step": 1070 }, { "epoch": 0.6621371216800495, "grad_norm": 17.99842071533203, "learning_rate": 9.868190240889438e-07, "loss": 0.6231, "step": 1072 }, { "epoch": 0.663372452130945, "grad_norm": 12.086143493652344, "learning_rate": 9.867943174799257e-07, "loss": 0.6234, "step": 1074 }, { "epoch": 0.6646077825818406, "grad_norm": 9.73873519897461, "learning_rate": 9.86769610870908e-07, "loss": 0.2708, "step": 1076 }, { "epoch": 0.6658431130327362, "grad_norm": 8.554228782653809, "learning_rate": 9.867449042618902e-07, "loss": 0.3512, "step": 1078 }, { "epoch": 0.6670784434836319, "grad_norm": 23.13804817199707, "learning_rate": 9.867201976528721e-07, "loss": 0.7014, "step": 1080 }, { "epoch": 0.6683137739345275, "grad_norm": 11.794222831726074, "learning_rate": 9.866954910438541e-07, "loss": 0.2559, "step": 1082 }, { "epoch": 0.6695491043854231, "grad_norm": 7.187536716461182, "learning_rate": 9.866707844348363e-07, "loss": 0.3025, "step": 1084 }, { "epoch": 0.6707844348363187, "grad_norm": 16.57205581665039, "learning_rate": 9.866460778258183e-07, "loss": 0.6813, "step": 1086 }, { "epoch": 0.6720197652872143, "grad_norm": 8.015583992004395, "learning_rate": 9.866213712168005e-07, "loss": 0.3801, "step": 1088 }, { "epoch": 0.6732550957381099, "grad_norm": 7.861381530761719, "learning_rate": 9.865966646077825e-07, "loss": 0.2516, "step": 1090 }, { "epoch": 0.6744904261890056, "grad_norm": 5.225973129272461, "learning_rate": 9.865719579987645e-07, "loss": 0.2268, "step": 1092 }, { "epoch": 0.6757257566399012, "grad_norm": 17.32479476928711, "learning_rate": 9.865472513897467e-07, "loss": 0.5741, "step": 1094 }, { "epoch": 0.6769610870907968, "grad_norm": 10.461854934692383, "learning_rate": 9.865225447807287e-07, "loss": 0.3484, "step": 1096 }, { "epoch": 0.6781964175416924, "grad_norm": 10.546394348144531, "learning_rate": 9.86497838171711e-07, "loss": 0.2642, "step": 1098 }, { "epoch": 0.679431747992588, "grad_norm": 8.273014068603516, "learning_rate": 9.86473131562693e-07, "loss": 0.2879, "step": 1100 }, { "epoch": 0.6806670784434836, "grad_norm": 11.363059997558594, "learning_rate": 9.864484249536751e-07, "loss": 0.2316, "step": 1102 }, { "epoch": 0.6819024088943793, "grad_norm": 16.404132843017578, "learning_rate": 9.864237183446571e-07, "loss": 0.3174, "step": 1104 }, { "epoch": 0.6831377393452749, "grad_norm": 14.019509315490723, "learning_rate": 9.863990117356391e-07, "loss": 0.7625, "step": 1106 }, { "epoch": 0.6843730697961705, "grad_norm": 14.049559593200684, "learning_rate": 9.863743051266213e-07, "loss": 0.5991, "step": 1108 }, { "epoch": 0.685608400247066, "grad_norm": 16.432628631591797, "learning_rate": 9.863495985176035e-07, "loss": 0.437, "step": 1110 }, { "epoch": 0.6868437306979617, "grad_norm": 34.229373931884766, "learning_rate": 9.863248919085855e-07, "loss": 0.4561, "step": 1112 }, { "epoch": 0.6880790611488573, "grad_norm": 10.534465789794922, "learning_rate": 9.863001852995675e-07, "loss": 0.3605, "step": 1114 }, { "epoch": 0.6893143915997529, "grad_norm": 20.420713424682617, "learning_rate": 9.862754786905497e-07, "loss": 0.3864, "step": 1116 }, { "epoch": 0.6905497220506486, "grad_norm": 8.302571296691895, "learning_rate": 9.862507720815317e-07, "loss": 0.6486, "step": 1118 }, { "epoch": 0.6917850525015442, "grad_norm": 7.670549392700195, "learning_rate": 9.86226065472514e-07, "loss": 0.2528, "step": 1120 }, { "epoch": 0.6930203829524397, "grad_norm": 10.974410057067871, "learning_rate": 9.86201358863496e-07, "loss": 0.5605, "step": 1122 }, { "epoch": 0.6942557134033354, "grad_norm": 10.88280963897705, "learning_rate": 9.86176652254478e-07, "loss": 0.1838, "step": 1124 }, { "epoch": 0.695491043854231, "grad_norm": 6.624936103820801, "learning_rate": 9.861519456454601e-07, "loss": 0.0715, "step": 1126 }, { "epoch": 0.6967263743051266, "grad_norm": 4.356945991516113, "learning_rate": 9.861272390364423e-07, "loss": 0.1272, "step": 1128 }, { "epoch": 0.6979617047560223, "grad_norm": 14.520866394042969, "learning_rate": 9.861025324274243e-07, "loss": 0.486, "step": 1130 }, { "epoch": 0.6991970352069179, "grad_norm": 13.983577728271484, "learning_rate": 9.860778258184063e-07, "loss": 0.4789, "step": 1132 }, { "epoch": 0.7004323656578134, "grad_norm": 9.389881134033203, "learning_rate": 9.860531192093885e-07, "loss": 0.2232, "step": 1134 }, { "epoch": 0.701667696108709, "grad_norm": 12.153282165527344, "learning_rate": 9.860284126003705e-07, "loss": 0.348, "step": 1136 }, { "epoch": 0.7029030265596047, "grad_norm": 7.331610679626465, "learning_rate": 9.860037059913527e-07, "loss": 0.4616, "step": 1138 }, { "epoch": 0.7041383570105003, "grad_norm": 6.835014820098877, "learning_rate": 9.859789993823347e-07, "loss": 0.3029, "step": 1140 }, { "epoch": 0.705373687461396, "grad_norm": 8.396074295043945, "learning_rate": 9.859542927733167e-07, "loss": 0.133, "step": 1142 }, { "epoch": 0.7066090179122916, "grad_norm": 12.81733226776123, "learning_rate": 9.85929586164299e-07, "loss": 0.2211, "step": 1144 }, { "epoch": 0.7078443483631871, "grad_norm": 10.581403732299805, "learning_rate": 9.85904879555281e-07, "loss": 0.4264, "step": 1146 }, { "epoch": 0.7090796788140827, "grad_norm": 11.790607452392578, "learning_rate": 9.85880172946263e-07, "loss": 0.2395, "step": 1148 }, { "epoch": 0.7103150092649784, "grad_norm": 17.299856185913086, "learning_rate": 9.85855466337245e-07, "loss": 0.6871, "step": 1150 }, { "epoch": 0.711550339715874, "grad_norm": 12.006670951843262, "learning_rate": 9.858307597282273e-07, "loss": 0.3474, "step": 1152 }, { "epoch": 0.7127856701667696, "grad_norm": 6.2609663009643555, "learning_rate": 9.858060531192093e-07, "loss": 0.1176, "step": 1154 }, { "epoch": 0.7140210006176653, "grad_norm": 12.932299613952637, "learning_rate": 9.857813465101915e-07, "loss": 0.2712, "step": 1156 }, { "epoch": 0.7152563310685608, "grad_norm": 5.787951946258545, "learning_rate": 9.857566399011735e-07, "loss": 0.1545, "step": 1158 }, { "epoch": 0.7164916615194564, "grad_norm": 10.314936637878418, "learning_rate": 9.857319332921557e-07, "loss": 0.4706, "step": 1160 }, { "epoch": 0.7177269919703521, "grad_norm": 9.448033332824707, "learning_rate": 9.857072266831377e-07, "loss": 0.4346, "step": 1162 }, { "epoch": 0.7189623224212477, "grad_norm": 11.779792785644531, "learning_rate": 9.856825200741199e-07, "loss": 0.1792, "step": 1164 }, { "epoch": 0.7201976528721433, "grad_norm": 8.687360763549805, "learning_rate": 9.856578134651019e-07, "loss": 0.2082, "step": 1166 }, { "epoch": 0.721432983323039, "grad_norm": 7.131823539733887, "learning_rate": 9.856331068560839e-07, "loss": 0.3074, "step": 1168 }, { "epoch": 0.7226683137739345, "grad_norm": 9.34787654876709, "learning_rate": 9.85608400247066e-07, "loss": 0.4275, "step": 1170 }, { "epoch": 0.7239036442248301, "grad_norm": 11.279828071594238, "learning_rate": 9.85583693638048e-07, "loss": 0.1642, "step": 1172 }, { "epoch": 0.7251389746757257, "grad_norm": 17.798864364624023, "learning_rate": 9.855589870290303e-07, "loss": 0.602, "step": 1174 }, { "epoch": 0.7263743051266214, "grad_norm": 7.146918773651123, "learning_rate": 9.855342804200123e-07, "loss": 0.083, "step": 1176 }, { "epoch": 0.727609635577517, "grad_norm": 15.954838752746582, "learning_rate": 9.855095738109945e-07, "loss": 0.6475, "step": 1178 }, { "epoch": 0.7288449660284126, "grad_norm": 9.449953079223633, "learning_rate": 9.854848672019765e-07, "loss": 0.5093, "step": 1180 }, { "epoch": 0.7300802964793082, "grad_norm": 25.32857894897461, "learning_rate": 9.854601605929585e-07, "loss": 0.6771, "step": 1182 }, { "epoch": 0.7313156269302038, "grad_norm": 9.687775611877441, "learning_rate": 9.854354539839407e-07, "loss": 0.5292, "step": 1184 }, { "epoch": 0.7325509573810994, "grad_norm": 7.6101884841918945, "learning_rate": 9.854107473749229e-07, "loss": 0.3482, "step": 1186 }, { "epoch": 0.7337862878319951, "grad_norm": 13.445752143859863, "learning_rate": 9.853860407659049e-07, "loss": 0.5759, "step": 1188 }, { "epoch": 0.7350216182828907, "grad_norm": 5.2540740966796875, "learning_rate": 9.853613341568869e-07, "loss": 0.0965, "step": 1190 }, { "epoch": 0.7362569487337863, "grad_norm": 8.142379760742188, "learning_rate": 9.85336627547869e-07, "loss": 0.2662, "step": 1192 }, { "epoch": 0.7374922791846819, "grad_norm": 8.563712120056152, "learning_rate": 9.85311920938851e-07, "loss": 0.2249, "step": 1194 }, { "epoch": 0.7387276096355775, "grad_norm": 9.249364852905273, "learning_rate": 9.852872143298333e-07, "loss": 0.1959, "step": 1196 }, { "epoch": 0.7399629400864731, "grad_norm": 10.021387100219727, "learning_rate": 9.852625077208153e-07, "loss": 0.2938, "step": 1198 }, { "epoch": 0.7411982705373688, "grad_norm": 4.6741557121276855, "learning_rate": 9.852378011117972e-07, "loss": 0.1567, "step": 1200 }, { "epoch": 0.7424336009882644, "grad_norm": 9.503564834594727, "learning_rate": 9.852130945027795e-07, "loss": 0.2681, "step": 1202 }, { "epoch": 0.74366893143916, "grad_norm": 2.63293194770813, "learning_rate": 9.851883878937617e-07, "loss": 0.0652, "step": 1204 }, { "epoch": 0.7449042618900555, "grad_norm": 14.77422046661377, "learning_rate": 9.851636812847437e-07, "loss": 0.3335, "step": 1206 }, { "epoch": 0.7461395923409512, "grad_norm": 9.001810073852539, "learning_rate": 9.851389746757256e-07, "loss": 0.267, "step": 1208 }, { "epoch": 0.7473749227918468, "grad_norm": 13.490711212158203, "learning_rate": 9.851142680667079e-07, "loss": 0.4525, "step": 1210 }, { "epoch": 0.7486102532427424, "grad_norm": 6.008123397827148, "learning_rate": 9.850895614576898e-07, "loss": 0.2726, "step": 1212 }, { "epoch": 0.7498455836936381, "grad_norm": 10.868868827819824, "learning_rate": 9.85064854848672e-07, "loss": 0.3588, "step": 1214 }, { "epoch": 0.7510809141445337, "grad_norm": 14.597075462341309, "learning_rate": 9.85040148239654e-07, "loss": 0.6867, "step": 1216 }, { "epoch": 0.7523162445954292, "grad_norm": 15.076939582824707, "learning_rate": 9.85015441630636e-07, "loss": 0.675, "step": 1218 }, { "epoch": 0.7535515750463249, "grad_norm": 4.158604145050049, "learning_rate": 9.849907350216182e-07, "loss": 0.248, "step": 1220 }, { "epoch": 0.7547869054972205, "grad_norm": 12.403220176696777, "learning_rate": 9.849660284126004e-07, "loss": 0.2483, "step": 1222 }, { "epoch": 0.7560222359481161, "grad_norm": 8.262627601623535, "learning_rate": 9.849413218035824e-07, "loss": 0.2715, "step": 1224 }, { "epoch": 0.7572575663990118, "grad_norm": 7.378356456756592, "learning_rate": 9.849166151945644e-07, "loss": 0.3651, "step": 1226 }, { "epoch": 0.7584928968499074, "grad_norm": 11.720860481262207, "learning_rate": 9.848919085855466e-07, "loss": 0.4802, "step": 1228 }, { "epoch": 0.7597282273008029, "grad_norm": 14.722637176513672, "learning_rate": 9.848672019765286e-07, "loss": 0.4557, "step": 1230 }, { "epoch": 0.7609635577516985, "grad_norm": 5.724219799041748, "learning_rate": 9.848424953675108e-07, "loss": 0.1709, "step": 1232 }, { "epoch": 0.7621988882025942, "grad_norm": 7.940279483795166, "learning_rate": 9.848177887584928e-07, "loss": 0.1774, "step": 1234 }, { "epoch": 0.7634342186534898, "grad_norm": 16.330867767333984, "learning_rate": 9.84793082149475e-07, "loss": 0.6017, "step": 1236 }, { "epoch": 0.7646695491043854, "grad_norm": 14.604491233825684, "learning_rate": 9.84768375540457e-07, "loss": 0.5485, "step": 1238 }, { "epoch": 0.7659048795552811, "grad_norm": 8.832310676574707, "learning_rate": 9.84743668931439e-07, "loss": 0.2013, "step": 1240 }, { "epoch": 0.7671402100061766, "grad_norm": 9.431452751159668, "learning_rate": 9.847189623224212e-07, "loss": 0.3362, "step": 1242 }, { "epoch": 0.7683755404570722, "grad_norm": 12.98123550415039, "learning_rate": 9.846942557134032e-07, "loss": 0.4926, "step": 1244 }, { "epoch": 0.7696108709079679, "grad_norm": 9.618399620056152, "learning_rate": 9.846695491043854e-07, "loss": 0.255, "step": 1246 }, { "epoch": 0.7708462013588635, "grad_norm": 11.266002655029297, "learning_rate": 9.846448424953674e-07, "loss": 0.2688, "step": 1248 }, { "epoch": 0.7720815318097591, "grad_norm": 14.786582946777344, "learning_rate": 9.846201358863494e-07, "loss": 0.9588, "step": 1250 }, { "epoch": 0.7733168622606548, "grad_norm": 10.453850746154785, "learning_rate": 9.845954292773316e-07, "loss": 0.6201, "step": 1252 }, { "epoch": 0.7745521927115503, "grad_norm": 8.108349800109863, "learning_rate": 9.845707226683138e-07, "loss": 0.216, "step": 1254 }, { "epoch": 0.7757875231624459, "grad_norm": 8.283027648925781, "learning_rate": 9.845460160592958e-07, "loss": 0.3563, "step": 1256 }, { "epoch": 0.7770228536133416, "grad_norm": 11.888049125671387, "learning_rate": 9.845213094502778e-07, "loss": 0.2007, "step": 1258 }, { "epoch": 0.7782581840642372, "grad_norm": 5.44971227645874, "learning_rate": 9.8449660284126e-07, "loss": 0.2419, "step": 1260 }, { "epoch": 0.7794935145151328, "grad_norm": 7.638753414154053, "learning_rate": 9.84471896232242e-07, "loss": 0.1916, "step": 1262 }, { "epoch": 0.7807288449660285, "grad_norm": 5.566444396972656, "learning_rate": 9.844471896232242e-07, "loss": 0.222, "step": 1264 }, { "epoch": 0.781964175416924, "grad_norm": 4.4692583084106445, "learning_rate": 9.844224830142062e-07, "loss": 0.1132, "step": 1266 }, { "epoch": 0.7831995058678196, "grad_norm": 7.298047065734863, "learning_rate": 9.843977764051884e-07, "loss": 0.1074, "step": 1268 }, { "epoch": 0.7844348363187152, "grad_norm": 11.748432159423828, "learning_rate": 9.843730697961704e-07, "loss": 0.3781, "step": 1270 }, { "epoch": 0.7856701667696109, "grad_norm": 12.390447616577148, "learning_rate": 9.843483631871526e-07, "loss": 0.4015, "step": 1272 }, { "epoch": 0.7869054972205065, "grad_norm": 8.805943489074707, "learning_rate": 9.843236565781346e-07, "loss": 0.2388, "step": 1274 }, { "epoch": 0.7881408276714021, "grad_norm": 12.1315336227417, "learning_rate": 9.842989499691166e-07, "loss": 0.5365, "step": 1276 }, { "epoch": 0.7893761581222977, "grad_norm": 9.304017066955566, "learning_rate": 9.842742433600988e-07, "loss": 0.1808, "step": 1278 }, { "epoch": 0.7906114885731933, "grad_norm": 11.677606582641602, "learning_rate": 9.84249536751081e-07, "loss": 0.6204, "step": 1280 }, { "epoch": 0.7918468190240889, "grad_norm": 9.5737886428833, "learning_rate": 9.84224830142063e-07, "loss": 0.5264, "step": 1282 }, { "epoch": 0.7930821494749846, "grad_norm": 9.740925788879395, "learning_rate": 9.84200123533045e-07, "loss": 0.3419, "step": 1284 }, { "epoch": 0.7943174799258802, "grad_norm": 12.027420043945312, "learning_rate": 9.841754169240272e-07, "loss": 0.3023, "step": 1286 }, { "epoch": 0.7955528103767758, "grad_norm": 12.687722206115723, "learning_rate": 9.841507103150092e-07, "loss": 0.453, "step": 1288 }, { "epoch": 0.7967881408276714, "grad_norm": 11.913833618164062, "learning_rate": 9.841260037059914e-07, "loss": 0.3229, "step": 1290 }, { "epoch": 0.798023471278567, "grad_norm": 7.142927169799805, "learning_rate": 9.841012970969734e-07, "loss": 0.3403, "step": 1292 }, { "epoch": 0.7992588017294626, "grad_norm": 9.858790397644043, "learning_rate": 9.840765904879554e-07, "loss": 0.3855, "step": 1294 }, { "epoch": 0.8004941321803583, "grad_norm": 11.309351921081543, "learning_rate": 9.840518838789376e-07, "loss": 0.4167, "step": 1296 }, { "epoch": 0.8017294626312539, "grad_norm": 5.317216873168945, "learning_rate": 9.840271772699198e-07, "loss": 0.1795, "step": 1298 }, { "epoch": 0.8029647930821495, "grad_norm": 12.985337257385254, "learning_rate": 9.840024706609018e-07, "loss": 0.6079, "step": 1300 }, { "epoch": 0.804200123533045, "grad_norm": 11.02403450012207, "learning_rate": 9.839777640518838e-07, "loss": 0.3117, "step": 1302 }, { "epoch": 0.8054354539839407, "grad_norm": 15.345975875854492, "learning_rate": 9.83953057442866e-07, "loss": 0.2998, "step": 1304 }, { "epoch": 0.8066707844348363, "grad_norm": 12.688470840454102, "learning_rate": 9.83928350833848e-07, "loss": 0.3375, "step": 1306 }, { "epoch": 0.8079061148857319, "grad_norm": 9.370979309082031, "learning_rate": 9.839036442248302e-07, "loss": 0.3838, "step": 1308 }, { "epoch": 0.8091414453366276, "grad_norm": 9.066483497619629, "learning_rate": 9.838789376158122e-07, "loss": 0.2317, "step": 1310 }, { "epoch": 0.8103767757875232, "grad_norm": 9.484530448913574, "learning_rate": 9.838542310067944e-07, "loss": 0.3731, "step": 1312 }, { "epoch": 0.8116121062384187, "grad_norm": 9.709023475646973, "learning_rate": 9.838295243977764e-07, "loss": 0.4545, "step": 1314 }, { "epoch": 0.8128474366893144, "grad_norm": 16.16204261779785, "learning_rate": 9.838048177887584e-07, "loss": 0.5823, "step": 1316 }, { "epoch": 0.81408276714021, "grad_norm": 7.486817359924316, "learning_rate": 9.837801111797406e-07, "loss": 0.2866, "step": 1318 }, { "epoch": 0.8153180975911056, "grad_norm": 10.504172325134277, "learning_rate": 9.837554045707226e-07, "loss": 0.2637, "step": 1320 }, { "epoch": 0.8165534280420013, "grad_norm": 13.57312297821045, "learning_rate": 9.837306979617048e-07, "loss": 0.5764, "step": 1322 }, { "epoch": 0.8177887584928969, "grad_norm": 9.14157485961914, "learning_rate": 9.837059913526868e-07, "loss": 0.2327, "step": 1324 }, { "epoch": 0.8190240889437924, "grad_norm": 5.015178203582764, "learning_rate": 9.836812847436688e-07, "loss": 0.1384, "step": 1326 }, { "epoch": 0.820259419394688, "grad_norm": 39.87788772583008, "learning_rate": 9.83656578134651e-07, "loss": 0.1645, "step": 1328 }, { "epoch": 0.8214947498455837, "grad_norm": 9.02287483215332, "learning_rate": 9.836318715256332e-07, "loss": 0.3168, "step": 1330 }, { "epoch": 0.8227300802964793, "grad_norm": 7.666600227355957, "learning_rate": 9.836071649166152e-07, "loss": 0.1028, "step": 1332 }, { "epoch": 0.823965410747375, "grad_norm": 3.6616268157958984, "learning_rate": 9.835824583075971e-07, "loss": 0.0883, "step": 1334 }, { "epoch": 0.8252007411982706, "grad_norm": 10.652318954467773, "learning_rate": 9.835577516985794e-07, "loss": 0.4641, "step": 1336 }, { "epoch": 0.8264360716491661, "grad_norm": 9.671696662902832, "learning_rate": 9.835330450895613e-07, "loss": 0.3535, "step": 1338 }, { "epoch": 0.8276714021000617, "grad_norm": 8.790804862976074, "learning_rate": 9.835083384805436e-07, "loss": 0.12, "step": 1340 }, { "epoch": 0.8289067325509574, "grad_norm": 9.460057258605957, "learning_rate": 9.834836318715255e-07, "loss": 0.2262, "step": 1342 }, { "epoch": 0.830142063001853, "grad_norm": 7.451642990112305, "learning_rate": 9.834589252625075e-07, "loss": 0.4422, "step": 1344 }, { "epoch": 0.8313773934527486, "grad_norm": 11.066305160522461, "learning_rate": 9.834342186534897e-07, "loss": 0.4064, "step": 1346 }, { "epoch": 0.8326127239036443, "grad_norm": 11.685218811035156, "learning_rate": 9.83409512044472e-07, "loss": 0.3294, "step": 1348 }, { "epoch": 0.8338480543545398, "grad_norm": 7.6997294425964355, "learning_rate": 9.83384805435454e-07, "loss": 0.1727, "step": 1350 }, { "epoch": 0.8350833848054354, "grad_norm": 8.19925594329834, "learning_rate": 9.83360098826436e-07, "loss": 0.304, "step": 1352 }, { "epoch": 0.8363187152563311, "grad_norm": 4.209212303161621, "learning_rate": 9.833353922174181e-07, "loss": 0.3658, "step": 1354 }, { "epoch": 0.8375540457072267, "grad_norm": 7.934873580932617, "learning_rate": 9.833106856084003e-07, "loss": 0.2228, "step": 1356 }, { "epoch": 0.8387893761581223, "grad_norm": 12.827791213989258, "learning_rate": 9.832859789993823e-07, "loss": 0.4837, "step": 1358 }, { "epoch": 0.840024706609018, "grad_norm": 10.72163200378418, "learning_rate": 9.832612723903643e-07, "loss": 0.3174, "step": 1360 }, { "epoch": 0.8412600370599135, "grad_norm": 11.27523136138916, "learning_rate": 9.832365657813465e-07, "loss": 0.5041, "step": 1362 }, { "epoch": 0.8424953675108091, "grad_norm": 4.622845649719238, "learning_rate": 9.832118591723285e-07, "loss": 0.2374, "step": 1364 }, { "epoch": 0.8437306979617047, "grad_norm": 10.003692626953125, "learning_rate": 9.831871525633107e-07, "loss": 0.3945, "step": 1366 }, { "epoch": 0.8449660284126004, "grad_norm": 9.476014137268066, "learning_rate": 9.831624459542927e-07, "loss": 0.3816, "step": 1368 }, { "epoch": 0.846201358863496, "grad_norm": 14.773927688598633, "learning_rate": 9.831377393452747e-07, "loss": 0.4123, "step": 1370 }, { "epoch": 0.8474366893143916, "grad_norm": 8.008591651916504, "learning_rate": 9.83113032736257e-07, "loss": 0.187, "step": 1372 }, { "epoch": 0.8486720197652872, "grad_norm": 6.861488342285156, "learning_rate": 9.83088326127239e-07, "loss": 0.1382, "step": 1374 }, { "epoch": 0.8499073502161828, "grad_norm": 7.184529781341553, "learning_rate": 9.830636195182211e-07, "loss": 0.2279, "step": 1376 }, { "epoch": 0.8511426806670784, "grad_norm": 7.0378875732421875, "learning_rate": 9.830389129092031e-07, "loss": 0.2234, "step": 1378 }, { "epoch": 0.8523780111179741, "grad_norm": 7.122949123382568, "learning_rate": 9.830142063001853e-07, "loss": 0.1917, "step": 1380 }, { "epoch": 0.8536133415688697, "grad_norm": 10.67326831817627, "learning_rate": 9.829894996911673e-07, "loss": 0.6557, "step": 1382 }, { "epoch": 0.8548486720197653, "grad_norm": 6.2014384269714355, "learning_rate": 9.829647930821493e-07, "loss": 0.2012, "step": 1384 }, { "epoch": 0.8560840024706609, "grad_norm": 7.1239142417907715, "learning_rate": 9.829400864731315e-07, "loss": 0.351, "step": 1386 }, { "epoch": 0.8573193329215565, "grad_norm": 3.2680413722991943, "learning_rate": 9.829153798641137e-07, "loss": 0.1229, "step": 1388 }, { "epoch": 0.8585546633724521, "grad_norm": 5.0420355796813965, "learning_rate": 9.828906732550957e-07, "loss": 0.1135, "step": 1390 }, { "epoch": 0.8597899938233478, "grad_norm": 5.801534652709961, "learning_rate": 9.828659666460777e-07, "loss": 0.296, "step": 1392 }, { "epoch": 0.8610253242742434, "grad_norm": 9.696951866149902, "learning_rate": 9.8284126003706e-07, "loss": 0.6055, "step": 1394 }, { "epoch": 0.862260654725139, "grad_norm": 4.352874279022217, "learning_rate": 9.82816553428042e-07, "loss": 0.1597, "step": 1396 }, { "epoch": 0.8634959851760345, "grad_norm": 5.4031877517700195, "learning_rate": 9.827918468190241e-07, "loss": 0.384, "step": 1398 }, { "epoch": 0.8647313156269302, "grad_norm": 8.546557426452637, "learning_rate": 9.82767140210006e-07, "loss": 0.5354, "step": 1400 }, { "epoch": 0.8659666460778258, "grad_norm": 6.968789100646973, "learning_rate": 9.82742433600988e-07, "loss": 0.1297, "step": 1402 }, { "epoch": 0.8672019765287214, "grad_norm": 11.15526008605957, "learning_rate": 9.827177269919703e-07, "loss": 0.3277, "step": 1404 }, { "epoch": 0.8684373069796171, "grad_norm": 11.398725509643555, "learning_rate": 9.826930203829525e-07, "loss": 0.5527, "step": 1406 }, { "epoch": 0.8696726374305127, "grad_norm": 11.861637115478516, "learning_rate": 9.826683137739345e-07, "loss": 0.3722, "step": 1408 }, { "epoch": 0.8709079678814082, "grad_norm": 7.168190002441406, "learning_rate": 9.826436071649165e-07, "loss": 0.6051, "step": 1410 }, { "epoch": 0.8721432983323039, "grad_norm": 4.506381988525391, "learning_rate": 9.826189005558987e-07, "loss": 0.3634, "step": 1412 }, { "epoch": 0.8733786287831995, "grad_norm": 14.544111251831055, "learning_rate": 9.825941939468807e-07, "loss": 0.4071, "step": 1414 }, { "epoch": 0.8746139592340951, "grad_norm": 9.729899406433105, "learning_rate": 9.82569487337863e-07, "loss": 0.1861, "step": 1416 }, { "epoch": 0.8758492896849908, "grad_norm": 7.823622226715088, "learning_rate": 9.825447807288449e-07, "loss": 0.3327, "step": 1418 }, { "epoch": 0.8770846201358864, "grad_norm": 11.25328254699707, "learning_rate": 9.825200741198269e-07, "loss": 0.3021, "step": 1420 }, { "epoch": 0.8783199505867819, "grad_norm": 6.100447177886963, "learning_rate": 9.82495367510809e-07, "loss": 0.1524, "step": 1422 }, { "epoch": 0.8795552810376775, "grad_norm": 12.928689956665039, "learning_rate": 9.824706609017913e-07, "loss": 0.6496, "step": 1424 }, { "epoch": 0.8807906114885732, "grad_norm": 11.798970222473145, "learning_rate": 9.824459542927733e-07, "loss": 0.3425, "step": 1426 }, { "epoch": 0.8820259419394688, "grad_norm": 8.553908348083496, "learning_rate": 9.824212476837553e-07, "loss": 0.2467, "step": 1428 }, { "epoch": 0.8832612723903644, "grad_norm": 5.1667704582214355, "learning_rate": 9.823965410747375e-07, "loss": 0.3932, "step": 1430 }, { "epoch": 0.8844966028412601, "grad_norm": 8.755138397216797, "learning_rate": 9.823718344657197e-07, "loss": 0.3173, "step": 1432 }, { "epoch": 0.8857319332921556, "grad_norm": 12.308649063110352, "learning_rate": 9.823471278567017e-07, "loss": 0.3864, "step": 1434 }, { "epoch": 0.8869672637430512, "grad_norm": 8.489375114440918, "learning_rate": 9.823224212476837e-07, "loss": 0.5445, "step": 1436 }, { "epoch": 0.8882025941939469, "grad_norm": 17.242874145507812, "learning_rate": 9.822977146386659e-07, "loss": 0.3736, "step": 1438 }, { "epoch": 0.8894379246448425, "grad_norm": 12.563277244567871, "learning_rate": 9.822730080296479e-07, "loss": 0.3705, "step": 1440 }, { "epoch": 0.8906732550957381, "grad_norm": 9.356912612915039, "learning_rate": 9.8224830142063e-07, "loss": 0.2024, "step": 1442 }, { "epoch": 0.8919085855466338, "grad_norm": 5.5173492431640625, "learning_rate": 9.82223594811612e-07, "loss": 0.2107, "step": 1444 }, { "epoch": 0.8931439159975293, "grad_norm": 10.190670013427734, "learning_rate": 9.82198888202594e-07, "loss": 0.2556, "step": 1446 }, { "epoch": 0.8943792464484249, "grad_norm": 7.64862060546875, "learning_rate": 9.821741815935763e-07, "loss": 0.2, "step": 1448 }, { "epoch": 0.8956145768993206, "grad_norm": 14.747041702270508, "learning_rate": 9.821494749845583e-07, "loss": 0.7253, "step": 1450 }, { "epoch": 0.8968499073502162, "grad_norm": 8.428139686584473, "learning_rate": 9.821247683755405e-07, "loss": 0.1758, "step": 1452 }, { "epoch": 0.8980852378011118, "grad_norm": 6.676511287689209, "learning_rate": 9.821000617665225e-07, "loss": 0.1367, "step": 1454 }, { "epoch": 0.8993205682520075, "grad_norm": 5.130162715911865, "learning_rate": 9.820753551575047e-07, "loss": 0.1814, "step": 1456 }, { "epoch": 0.900555898702903, "grad_norm": 10.216890335083008, "learning_rate": 9.820506485484867e-07, "loss": 0.409, "step": 1458 }, { "epoch": 0.9017912291537986, "grad_norm": 5.724135398864746, "learning_rate": 9.820259419394687e-07, "loss": 0.2257, "step": 1460 }, { "epoch": 0.9030265596046942, "grad_norm": 5.549025058746338, "learning_rate": 9.820012353304509e-07, "loss": 0.7603, "step": 1462 }, { "epoch": 0.9042618900555899, "grad_norm": 3.3847670555114746, "learning_rate": 9.819765287214329e-07, "loss": 0.5244, "step": 1464 }, { "epoch": 0.9054972205064855, "grad_norm": 16.286794662475586, "learning_rate": 9.81951822112415e-07, "loss": 0.5047, "step": 1466 }, { "epoch": 0.9067325509573811, "grad_norm": 22.29071044921875, "learning_rate": 9.81927115503397e-07, "loss": 0.4663, "step": 1468 }, { "epoch": 0.9079678814082767, "grad_norm": 8.590787887573242, "learning_rate": 9.819024088943793e-07, "loss": 0.3002, "step": 1470 }, { "epoch": 0.9092032118591723, "grad_norm": 11.419143676757812, "learning_rate": 9.818777022853612e-07, "loss": 0.4178, "step": 1472 }, { "epoch": 0.9104385423100679, "grad_norm": 5.945801258087158, "learning_rate": 9.818529956763435e-07, "loss": 0.3838, "step": 1474 }, { "epoch": 0.9116738727609636, "grad_norm": 4.276825904846191, "learning_rate": 9.818282890673254e-07, "loss": 0.1355, "step": 1476 }, { "epoch": 0.9129092032118592, "grad_norm": 5.829901695251465, "learning_rate": 9.818035824583074e-07, "loss": 0.0782, "step": 1478 }, { "epoch": 0.9141445336627548, "grad_norm": 8.855517387390137, "learning_rate": 9.817788758492896e-07, "loss": 0.2307, "step": 1480 }, { "epoch": 0.9153798641136504, "grad_norm": 7.790690898895264, "learning_rate": 9.817541692402718e-07, "loss": 0.2152, "step": 1482 }, { "epoch": 0.916615194564546, "grad_norm": 10.961869239807129, "learning_rate": 9.817294626312538e-07, "loss": 0.4511, "step": 1484 }, { "epoch": 0.9178505250154416, "grad_norm": 21.709983825683594, "learning_rate": 9.817047560222358e-07, "loss": 0.4257, "step": 1486 }, { "epoch": 0.9190858554663373, "grad_norm": 12.011651992797852, "learning_rate": 9.81680049413218e-07, "loss": 0.3156, "step": 1488 }, { "epoch": 0.9203211859172329, "grad_norm": 7.158661365509033, "learning_rate": 9.816553428042e-07, "loss": 0.1899, "step": 1490 }, { "epoch": 0.9215565163681285, "grad_norm": 6.94175910949707, "learning_rate": 9.816306361951822e-07, "loss": 0.3646, "step": 1492 }, { "epoch": 0.922791846819024, "grad_norm": 11.92798900604248, "learning_rate": 9.816059295861642e-07, "loss": 0.5961, "step": 1494 }, { "epoch": 0.9240271772699197, "grad_norm": 6.904292106628418, "learning_rate": 9.815812229771462e-07, "loss": 0.5612, "step": 1496 }, { "epoch": 0.9252625077208153, "grad_norm": 8.2186861038208, "learning_rate": 9.815565163681284e-07, "loss": 0.1501, "step": 1498 }, { "epoch": 0.9264978381717109, "grad_norm": 11.312210083007812, "learning_rate": 9.815318097591106e-07, "loss": 0.2854, "step": 1500 }, { "epoch": 0.9264978381717109, "eval_cer": 0.0708993976320709, "eval_loss": 0.32916030287742615, "eval_runtime": 63.7444, "eval_samples_per_second": 12.88, "eval_steps_per_second": 1.616, "step": 1500 }, { "epoch": 0.9277331686226066, "grad_norm": 10.704874992370605, "learning_rate": 9.815071031500926e-07, "loss": 0.1852, "step": 1502 }, { "epoch": 0.9289684990735022, "grad_norm": 16.1801700592041, "learning_rate": 9.814823965410746e-07, "loss": 0.4257, "step": 1504 }, { "epoch": 0.9302038295243977, "grad_norm": 10.279834747314453, "learning_rate": 9.814576899320568e-07, "loss": 0.2825, "step": 1506 }, { "epoch": 0.9314391599752934, "grad_norm": 13.19426441192627, "learning_rate": 9.814329833230388e-07, "loss": 0.4558, "step": 1508 }, { "epoch": 0.932674490426189, "grad_norm": 8.698274612426758, "learning_rate": 9.81408276714021e-07, "loss": 0.2158, "step": 1510 }, { "epoch": 0.9339098208770846, "grad_norm": 10.00684928894043, "learning_rate": 9.81383570105003e-07, "loss": 0.5019, "step": 1512 }, { "epoch": 0.9351451513279803, "grad_norm": 11.47139835357666, "learning_rate": 9.813588634959852e-07, "loss": 0.1386, "step": 1514 }, { "epoch": 0.9363804817788759, "grad_norm": 9.424354553222656, "learning_rate": 9.813341568869672e-07, "loss": 0.2504, "step": 1516 }, { "epoch": 0.9376158122297714, "grad_norm": 7.461796760559082, "learning_rate": 9.813094502779492e-07, "loss": 0.2105, "step": 1518 }, { "epoch": 0.938851142680667, "grad_norm": 9.559112548828125, "learning_rate": 9.812847436689314e-07, "loss": 0.2124, "step": 1520 }, { "epoch": 0.9400864731315627, "grad_norm": 6.376932621002197, "learning_rate": 9.812600370599134e-07, "loss": 0.1057, "step": 1522 }, { "epoch": 0.9413218035824583, "grad_norm": 9.151801109313965, "learning_rate": 9.812353304508956e-07, "loss": 0.2869, "step": 1524 }, { "epoch": 0.942557134033354, "grad_norm": 12.431580543518066, "learning_rate": 9.812106238418776e-07, "loss": 0.3818, "step": 1526 }, { "epoch": 0.9437924644842496, "grad_norm": 13.616825103759766, "learning_rate": 9.811859172328596e-07, "loss": 0.5548, "step": 1528 }, { "epoch": 0.9450277949351451, "grad_norm": 18.5328426361084, "learning_rate": 9.811612106238418e-07, "loss": 0.5813, "step": 1530 }, { "epoch": 0.9462631253860407, "grad_norm": 10.398816108703613, "learning_rate": 9.81136504014824e-07, "loss": 0.481, "step": 1532 }, { "epoch": 0.9474984558369364, "grad_norm": 6.251501560211182, "learning_rate": 9.81111797405806e-07, "loss": 0.2241, "step": 1534 }, { "epoch": 0.948733786287832, "grad_norm": 14.329692840576172, "learning_rate": 9.81087090796788e-07, "loss": 0.5433, "step": 1536 }, { "epoch": 0.9499691167387276, "grad_norm": 5.312857151031494, "learning_rate": 9.810623841877702e-07, "loss": 0.1463, "step": 1538 }, { "epoch": 0.9512044471896233, "grad_norm": 6.333440780639648, "learning_rate": 9.810376775787522e-07, "loss": 0.5114, "step": 1540 }, { "epoch": 0.9524397776405188, "grad_norm": 11.503284454345703, "learning_rate": 9.810129709697344e-07, "loss": 0.2695, "step": 1542 }, { "epoch": 0.9536751080914144, "grad_norm": 5.738722324371338, "learning_rate": 9.809882643607164e-07, "loss": 0.1603, "step": 1544 }, { "epoch": 0.9549104385423101, "grad_norm": 6.132312774658203, "learning_rate": 9.809635577516986e-07, "loss": 0.366, "step": 1546 }, { "epoch": 0.9561457689932057, "grad_norm": 6.2031402587890625, "learning_rate": 9.809388511426806e-07, "loss": 0.1349, "step": 1548 }, { "epoch": 0.9573810994441013, "grad_norm": 6.3127760887146, "learning_rate": 9.809141445336628e-07, "loss": 0.1535, "step": 1550 }, { "epoch": 0.958616429894997, "grad_norm": 8.198063850402832, "learning_rate": 9.808894379246448e-07, "loss": 0.2307, "step": 1552 }, { "epoch": 0.9598517603458925, "grad_norm": 6.54553747177124, "learning_rate": 9.808647313156268e-07, "loss": 0.261, "step": 1554 }, { "epoch": 0.9610870907967881, "grad_norm": 16.921100616455078, "learning_rate": 9.80840024706609e-07, "loss": 0.4889, "step": 1556 }, { "epoch": 0.9623224212476837, "grad_norm": 2.307810068130493, "learning_rate": 9.808153180975912e-07, "loss": 0.1535, "step": 1558 }, { "epoch": 0.9635577516985794, "grad_norm": 9.86186408996582, "learning_rate": 9.807906114885732e-07, "loss": 0.4781, "step": 1560 }, { "epoch": 0.964793082149475, "grad_norm": 10.992141723632812, "learning_rate": 9.807659048795552e-07, "loss": 0.3287, "step": 1562 }, { "epoch": 0.9660284126003706, "grad_norm": 7.938678741455078, "learning_rate": 9.807411982705374e-07, "loss": 0.2767, "step": 1564 }, { "epoch": 0.9672637430512662, "grad_norm": 6.509798049926758, "learning_rate": 9.807164916615194e-07, "loss": 0.1812, "step": 1566 }, { "epoch": 0.9684990735021618, "grad_norm": 12.229863166809082, "learning_rate": 9.806917850525016e-07, "loss": 0.3144, "step": 1568 }, { "epoch": 0.9697344039530574, "grad_norm": 12.174707412719727, "learning_rate": 9.806670784434836e-07, "loss": 0.2088, "step": 1570 }, { "epoch": 0.9709697344039531, "grad_norm": 10.724156379699707, "learning_rate": 9.806423718344656e-07, "loss": 0.3693, "step": 1572 }, { "epoch": 0.9722050648548487, "grad_norm": 9.771634101867676, "learning_rate": 9.806176652254478e-07, "loss": 0.584, "step": 1574 }, { "epoch": 0.9734403953057443, "grad_norm": 4.813446998596191, "learning_rate": 9.8059295861643e-07, "loss": 0.2469, "step": 1576 }, { "epoch": 0.9746757257566399, "grad_norm": 10.717865943908691, "learning_rate": 9.80568252007412e-07, "loss": 0.3671, "step": 1578 }, { "epoch": 0.9759110562075355, "grad_norm": 11.018463134765625, "learning_rate": 9.80543545398394e-07, "loss": 0.261, "step": 1580 }, { "epoch": 0.9771463866584311, "grad_norm": 9.792383193969727, "learning_rate": 9.805188387893762e-07, "loss": 0.3245, "step": 1582 }, { "epoch": 0.9783817171093268, "grad_norm": 21.760038375854492, "learning_rate": 9.804941321803582e-07, "loss": 0.4656, "step": 1584 }, { "epoch": 0.9796170475602224, "grad_norm": 17.624727249145508, "learning_rate": 9.804694255713404e-07, "loss": 0.3727, "step": 1586 }, { "epoch": 0.980852378011118, "grad_norm": 3.2476603984832764, "learning_rate": 9.804447189623224e-07, "loss": 0.0781, "step": 1588 }, { "epoch": 0.9820877084620135, "grad_norm": 11.291727066040039, "learning_rate": 9.804200123533046e-07, "loss": 0.4597, "step": 1590 }, { "epoch": 0.9833230389129092, "grad_norm": 10.524027824401855, "learning_rate": 9.803953057442866e-07, "loss": 0.2872, "step": 1592 }, { "epoch": 0.9845583693638048, "grad_norm": 10.133498191833496, "learning_rate": 9.803705991352686e-07, "loss": 0.3145, "step": 1594 }, { "epoch": 0.9857936998147004, "grad_norm": 11.511552810668945, "learning_rate": 9.803458925262508e-07, "loss": 0.3463, "step": 1596 }, { "epoch": 0.9870290302655961, "grad_norm": 9.461629867553711, "learning_rate": 9.803211859172328e-07, "loss": 0.3976, "step": 1598 }, { "epoch": 0.9882643607164917, "grad_norm": 11.425546646118164, "learning_rate": 9.80296479308215e-07, "loss": 0.2202, "step": 1600 }, { "epoch": 0.9894996911673872, "grad_norm": 11.092352867126465, "learning_rate": 9.80271772699197e-07, "loss": 0.4355, "step": 1602 }, { "epoch": 0.9907350216182829, "grad_norm": 7.643012046813965, "learning_rate": 9.80247066090179e-07, "loss": 0.326, "step": 1604 }, { "epoch": 0.9919703520691785, "grad_norm": 5.958633899688721, "learning_rate": 9.802223594811611e-07, "loss": 0.2197, "step": 1606 }, { "epoch": 0.9932056825200741, "grad_norm": 22.608871459960938, "learning_rate": 9.801976528721434e-07, "loss": 0.3536, "step": 1608 }, { "epoch": 0.9944410129709698, "grad_norm": 5.725681304931641, "learning_rate": 9.801729462631253e-07, "loss": 0.1408, "step": 1610 }, { "epoch": 0.9956763434218654, "grad_norm": 6.983816146850586, "learning_rate": 9.801482396541073e-07, "loss": 0.1806, "step": 1612 }, { "epoch": 0.9969116738727609, "grad_norm": 9.130241394042969, "learning_rate": 9.801235330450895e-07, "loss": 0.2517, "step": 1614 }, { "epoch": 0.9981470043236566, "grad_norm": 5.814324378967285, "learning_rate": 9.800988264360715e-07, "loss": 0.0933, "step": 1616 }, { "epoch": 0.9993823347745522, "grad_norm": 8.92272663116455, "learning_rate": 9.800741198270537e-07, "loss": 0.3376, "step": 1618 }, { "epoch": 1.0006176652254477, "grad_norm": 2.9017436504364014, "learning_rate": 9.800494132180357e-07, "loss": 0.0585, "step": 1620 }, { "epoch": 1.0018529956763433, "grad_norm": 13.568907737731934, "learning_rate": 9.800247066090177e-07, "loss": 0.3948, "step": 1622 }, { "epoch": 1.003088326127239, "grad_norm": 5.428772449493408, "learning_rate": 9.8e-07, "loss": 0.1262, "step": 1624 }, { "epoch": 1.0043236565781346, "grad_norm": 3.4260404109954834, "learning_rate": 9.799752933909821e-07, "loss": 0.0951, "step": 1626 }, { "epoch": 1.0055589870290302, "grad_norm": 7.736699104309082, "learning_rate": 9.799505867819641e-07, "loss": 0.1375, "step": 1628 }, { "epoch": 1.0067943174799259, "grad_norm": 4.6357903480529785, "learning_rate": 9.799258801729461e-07, "loss": 0.1155, "step": 1630 }, { "epoch": 1.0080296479308215, "grad_norm": 7.860255241394043, "learning_rate": 9.799011735639283e-07, "loss": 0.271, "step": 1632 }, { "epoch": 1.0092649783817171, "grad_norm": 8.635394096374512, "learning_rate": 9.798764669549105e-07, "loss": 0.3765, "step": 1634 }, { "epoch": 1.0105003088326128, "grad_norm": 8.527502059936523, "learning_rate": 9.798517603458925e-07, "loss": 0.0784, "step": 1636 }, { "epoch": 1.0117356392835084, "grad_norm": 10.55746841430664, "learning_rate": 9.798270537368745e-07, "loss": 0.3277, "step": 1638 }, { "epoch": 1.012970969734404, "grad_norm": 3.9183151721954346, "learning_rate": 9.798023471278567e-07, "loss": 0.1848, "step": 1640 }, { "epoch": 1.0142063001852997, "grad_norm": 11.409595489501953, "learning_rate": 9.797776405188387e-07, "loss": 0.334, "step": 1642 }, { "epoch": 1.015441630636195, "grad_norm": 6.719525337219238, "learning_rate": 9.79752933909821e-07, "loss": 0.2463, "step": 1644 }, { "epoch": 1.0166769610870907, "grad_norm": 8.414685249328613, "learning_rate": 9.79728227300803e-07, "loss": 0.3351, "step": 1646 }, { "epoch": 1.0179122915379863, "grad_norm": 8.872358322143555, "learning_rate": 9.79703520691785e-07, "loss": 0.1657, "step": 1648 }, { "epoch": 1.019147621988882, "grad_norm": 5.505781650543213, "learning_rate": 9.796788140827671e-07, "loss": 0.2088, "step": 1650 }, { "epoch": 1.0203829524397776, "grad_norm": 12.548725128173828, "learning_rate": 9.796541074737491e-07, "loss": 0.1937, "step": 1652 }, { "epoch": 1.0216182828906732, "grad_norm": 6.5797319412231445, "learning_rate": 9.796294008647313e-07, "loss": 0.6549, "step": 1654 }, { "epoch": 1.0228536133415689, "grad_norm": 5.937653064727783, "learning_rate": 9.796046942557133e-07, "loss": 0.2543, "step": 1656 }, { "epoch": 1.0240889437924645, "grad_norm": 7.8403096199035645, "learning_rate": 9.795799876466955e-07, "loss": 0.1687, "step": 1658 }, { "epoch": 1.0253242742433601, "grad_norm": 5.431320667266846, "learning_rate": 9.795552810376775e-07, "loss": 0.1251, "step": 1660 }, { "epoch": 1.0265596046942558, "grad_norm": 6.809297561645508, "learning_rate": 9.795305744286597e-07, "loss": 0.2036, "step": 1662 }, { "epoch": 1.0277949351451514, "grad_norm": 5.847197532653809, "learning_rate": 9.795058678196417e-07, "loss": 0.088, "step": 1664 }, { "epoch": 1.029030265596047, "grad_norm": 7.917027950286865, "learning_rate": 9.79481161210624e-07, "loss": 0.4963, "step": 1666 }, { "epoch": 1.0302655960469425, "grad_norm": 9.684175491333008, "learning_rate": 9.79456454601606e-07, "loss": 0.2443, "step": 1668 }, { "epoch": 1.031500926497838, "grad_norm": 14.359932899475098, "learning_rate": 9.79431747992588e-07, "loss": 0.387, "step": 1670 }, { "epoch": 1.0327362569487337, "grad_norm": 17.160797119140625, "learning_rate": 9.7940704138357e-07, "loss": 0.3574, "step": 1672 }, { "epoch": 1.0339715873996294, "grad_norm": 5.783222198486328, "learning_rate": 9.79382334774552e-07, "loss": 0.1215, "step": 1674 }, { "epoch": 1.035206917850525, "grad_norm": 8.407276153564453, "learning_rate": 9.793576281655343e-07, "loss": 0.349, "step": 1676 }, { "epoch": 1.0364422483014206, "grad_norm": 5.227428913116455, "learning_rate": 9.793329215565163e-07, "loss": 0.0975, "step": 1678 }, { "epoch": 1.0376775787523163, "grad_norm": 13.486154556274414, "learning_rate": 9.793082149474983e-07, "loss": 0.498, "step": 1680 }, { "epoch": 1.0389129092032119, "grad_norm": 6.697354793548584, "learning_rate": 9.792835083384805e-07, "loss": 0.1742, "step": 1682 }, { "epoch": 1.0401482396541075, "grad_norm": 8.624005317687988, "learning_rate": 9.792588017294627e-07, "loss": 0.3382, "step": 1684 }, { "epoch": 1.0413835701050032, "grad_norm": 11.703718185424805, "learning_rate": 9.792340951204447e-07, "loss": 0.1946, "step": 1686 }, { "epoch": 1.0426189005558988, "grad_norm": 10.803505897521973, "learning_rate": 9.792093885114267e-07, "loss": 0.2591, "step": 1688 }, { "epoch": 1.0438542310067944, "grad_norm": 6.428384780883789, "learning_rate": 9.791846819024089e-07, "loss": 0.123, "step": 1690 }, { "epoch": 1.0450895614576898, "grad_norm": 13.74370288848877, "learning_rate": 9.791599752933909e-07, "loss": 0.3537, "step": 1692 }, { "epoch": 1.0463248919085855, "grad_norm": 4.71676778793335, "learning_rate": 9.79135268684373e-07, "loss": 0.1587, "step": 1694 }, { "epoch": 1.047560222359481, "grad_norm": 21.964792251586914, "learning_rate": 9.79110562075355e-07, "loss": 0.351, "step": 1696 }, { "epoch": 1.0487955528103767, "grad_norm": 6.2238664627075195, "learning_rate": 9.79085855466337e-07, "loss": 0.1324, "step": 1698 }, { "epoch": 1.0500308832612724, "grad_norm": 12.412793159484863, "learning_rate": 9.790611488573193e-07, "loss": 0.1354, "step": 1700 }, { "epoch": 1.051266213712168, "grad_norm": 2.51253604888916, "learning_rate": 9.790364422483015e-07, "loss": 0.2595, "step": 1702 }, { "epoch": 1.0525015441630636, "grad_norm": 7.541430950164795, "learning_rate": 9.790117356392835e-07, "loss": 0.1068, "step": 1704 }, { "epoch": 1.0537368746139593, "grad_norm": 10.158905982971191, "learning_rate": 9.789870290302655e-07, "loss": 0.3996, "step": 1706 }, { "epoch": 1.054972205064855, "grad_norm": 4.805825710296631, "learning_rate": 9.789623224212477e-07, "loss": 0.4703, "step": 1708 }, { "epoch": 1.0562075355157505, "grad_norm": 5.316933631896973, "learning_rate": 9.789376158122299e-07, "loss": 0.1132, "step": 1710 }, { "epoch": 1.0574428659666462, "grad_norm": 7.513204097747803, "learning_rate": 9.789129092032119e-07, "loss": 0.2618, "step": 1712 }, { "epoch": 1.0586781964175418, "grad_norm": 10.501216888427734, "learning_rate": 9.788882025941939e-07, "loss": 0.3379, "step": 1714 }, { "epoch": 1.0599135268684372, "grad_norm": 9.296151161193848, "learning_rate": 9.78863495985176e-07, "loss": 0.2693, "step": 1716 }, { "epoch": 1.0611488573193328, "grad_norm": 4.719005584716797, "learning_rate": 9.78838789376158e-07, "loss": 0.2626, "step": 1718 }, { "epoch": 1.0623841877702285, "grad_norm": 13.646428108215332, "learning_rate": 9.788140827671403e-07, "loss": 0.2411, "step": 1720 }, { "epoch": 1.063619518221124, "grad_norm": 16.47609519958496, "learning_rate": 9.787893761581223e-07, "loss": 0.4989, "step": 1722 }, { "epoch": 1.0648548486720197, "grad_norm": 11.532161712646484, "learning_rate": 9.787646695491043e-07, "loss": 0.4061, "step": 1724 }, { "epoch": 1.0660901791229154, "grad_norm": 15.965110778808594, "learning_rate": 9.787399629400865e-07, "loss": 0.7882, "step": 1726 }, { "epoch": 1.067325509573811, "grad_norm": 8.780924797058105, "learning_rate": 9.787152563310685e-07, "loss": 0.3045, "step": 1728 }, { "epoch": 1.0685608400247066, "grad_norm": 5.434606552124023, "learning_rate": 9.786905497220507e-07, "loss": 0.183, "step": 1730 }, { "epoch": 1.0697961704756023, "grad_norm": 6.947041034698486, "learning_rate": 9.786658431130327e-07, "loss": 0.1402, "step": 1732 }, { "epoch": 1.071031500926498, "grad_norm": 6.533885955810547, "learning_rate": 9.786411365040149e-07, "loss": 0.1266, "step": 1734 }, { "epoch": 1.0722668313773935, "grad_norm": 8.696701049804688, "learning_rate": 9.786164298949969e-07, "loss": 0.3812, "step": 1736 }, { "epoch": 1.0735021618282892, "grad_norm": 9.287471771240234, "learning_rate": 9.785917232859788e-07, "loss": 0.2439, "step": 1738 }, { "epoch": 1.0747374922791846, "grad_norm": 7.611733436584473, "learning_rate": 9.78567016676961e-07, "loss": 0.2913, "step": 1740 }, { "epoch": 1.0759728227300802, "grad_norm": 4.5250749588012695, "learning_rate": 9.78542310067943e-07, "loss": 0.1982, "step": 1742 }, { "epoch": 1.0772081531809758, "grad_norm": 5.264224529266357, "learning_rate": 9.785176034589252e-07, "loss": 0.1123, "step": 1744 }, { "epoch": 1.0784434836318715, "grad_norm": 12.875831604003906, "learning_rate": 9.784928968499072e-07, "loss": 0.4355, "step": 1746 }, { "epoch": 1.0796788140827671, "grad_norm": 8.101836204528809, "learning_rate": 9.784681902408894e-07, "loss": 0.2104, "step": 1748 }, { "epoch": 1.0809141445336627, "grad_norm": 7.9239583015441895, "learning_rate": 9.784434836318714e-07, "loss": 0.1457, "step": 1750 }, { "epoch": 1.0821494749845584, "grad_norm": 3.995741844177246, "learning_rate": 9.784187770228536e-07, "loss": 0.2201, "step": 1752 }, { "epoch": 1.083384805435454, "grad_norm": 7.475259304046631, "learning_rate": 9.783940704138356e-07, "loss": 0.289, "step": 1754 }, { "epoch": 1.0846201358863496, "grad_norm": 9.598747253417969, "learning_rate": 9.783693638048176e-07, "loss": 0.4585, "step": 1756 }, { "epoch": 1.0858554663372453, "grad_norm": 5.883282661437988, "learning_rate": 9.783446571957998e-07, "loss": 0.0857, "step": 1758 }, { "epoch": 1.087090796788141, "grad_norm": 9.946076393127441, "learning_rate": 9.78319950586782e-07, "loss": 0.2397, "step": 1760 }, { "epoch": 1.0883261272390365, "grad_norm": 8.807205200195312, "learning_rate": 9.78295243977764e-07, "loss": 0.2191, "step": 1762 }, { "epoch": 1.089561457689932, "grad_norm": 4.629156589508057, "learning_rate": 9.78270537368746e-07, "loss": 0.1721, "step": 1764 }, { "epoch": 1.0907967881408276, "grad_norm": 8.939301490783691, "learning_rate": 9.782458307597282e-07, "loss": 0.3922, "step": 1766 }, { "epoch": 1.0920321185917232, "grad_norm": 9.642497062683105, "learning_rate": 9.782211241507102e-07, "loss": 0.2212, "step": 1768 }, { "epoch": 1.0932674490426189, "grad_norm": 6.811967372894287, "learning_rate": 9.781964175416924e-07, "loss": 0.1849, "step": 1770 }, { "epoch": 1.0945027794935145, "grad_norm": 7.477466583251953, "learning_rate": 9.781717109326744e-07, "loss": 0.1627, "step": 1772 }, { "epoch": 1.0957381099444101, "grad_norm": 13.64073657989502, "learning_rate": 9.781470043236564e-07, "loss": 0.3249, "step": 1774 }, { "epoch": 1.0969734403953058, "grad_norm": 8.833832740783691, "learning_rate": 9.781222977146386e-07, "loss": 0.1885, "step": 1776 }, { "epoch": 1.0982087708462014, "grad_norm": 12.62389087677002, "learning_rate": 9.780975911056208e-07, "loss": 0.2334, "step": 1778 }, { "epoch": 1.099444101297097, "grad_norm": 8.996566772460938, "learning_rate": 9.780728844966028e-07, "loss": 0.4112, "step": 1780 }, { "epoch": 1.1006794317479927, "grad_norm": 11.589725494384766, "learning_rate": 9.780481778875848e-07, "loss": 0.3244, "step": 1782 }, { "epoch": 1.1019147621988883, "grad_norm": 8.69617748260498, "learning_rate": 9.78023471278567e-07, "loss": 0.1563, "step": 1784 }, { "epoch": 1.103150092649784, "grad_norm": 4.889014720916748, "learning_rate": 9.77998764669549e-07, "loss": 0.0759, "step": 1786 }, { "epoch": 1.1043854231006793, "grad_norm": 12.642362594604492, "learning_rate": 9.779740580605312e-07, "loss": 0.3022, "step": 1788 }, { "epoch": 1.105620753551575, "grad_norm": 4.534321308135986, "learning_rate": 9.779493514515132e-07, "loss": 0.3799, "step": 1790 }, { "epoch": 1.1068560840024706, "grad_norm": 9.928446769714355, "learning_rate": 9.779246448424954e-07, "loss": 0.1208, "step": 1792 }, { "epoch": 1.1080914144533662, "grad_norm": 7.571257591247559, "learning_rate": 9.778999382334774e-07, "loss": 0.1829, "step": 1794 }, { "epoch": 1.1093267449042619, "grad_norm": 3.6540448665618896, "learning_rate": 9.778752316244596e-07, "loss": 0.1906, "step": 1796 }, { "epoch": 1.1105620753551575, "grad_norm": 8.51205825805664, "learning_rate": 9.778505250154416e-07, "loss": 0.0941, "step": 1798 }, { "epoch": 1.1117974058060531, "grad_norm": 11.153965950012207, "learning_rate": 9.778258184064236e-07, "loss": 0.2039, "step": 1800 }, { "epoch": 1.1130327362569488, "grad_norm": 13.023344039916992, "learning_rate": 9.778011117974058e-07, "loss": 0.3648, "step": 1802 }, { "epoch": 1.1142680667078444, "grad_norm": 10.956592559814453, "learning_rate": 9.777764051883878e-07, "loss": 0.4897, "step": 1804 }, { "epoch": 1.11550339715874, "grad_norm": 2.5070767402648926, "learning_rate": 9.7775169857937e-07, "loss": 0.103, "step": 1806 }, { "epoch": 1.1167387276096357, "grad_norm": 11.04659652709961, "learning_rate": 9.77726991970352e-07, "loss": 0.2563, "step": 1808 }, { "epoch": 1.1179740580605313, "grad_norm": 4.337741374969482, "learning_rate": 9.777022853613342e-07, "loss": 0.1035, "step": 1810 }, { "epoch": 1.1192093885114267, "grad_norm": 5.769378662109375, "learning_rate": 9.776775787523162e-07, "loss": 0.1055, "step": 1812 }, { "epoch": 1.1204447189623223, "grad_norm": 10.213554382324219, "learning_rate": 9.776528721432982e-07, "loss": 0.243, "step": 1814 }, { "epoch": 1.121680049413218, "grad_norm": 5.366856098175049, "learning_rate": 9.776281655342804e-07, "loss": 0.093, "step": 1816 }, { "epoch": 1.1229153798641136, "grad_norm": 8.528334617614746, "learning_rate": 9.776034589252624e-07, "loss": 0.2433, "step": 1818 }, { "epoch": 1.1241507103150092, "grad_norm": 5.685783386230469, "learning_rate": 9.775787523162446e-07, "loss": 0.072, "step": 1820 }, { "epoch": 1.1253860407659049, "grad_norm": 17.133319854736328, "learning_rate": 9.775540457072266e-07, "loss": 0.251, "step": 1822 }, { "epoch": 1.1266213712168005, "grad_norm": 9.167200088500977, "learning_rate": 9.775293390982086e-07, "loss": 0.1824, "step": 1824 }, { "epoch": 1.1278567016676961, "grad_norm": 4.805238723754883, "learning_rate": 9.775046324891908e-07, "loss": 0.2169, "step": 1826 }, { "epoch": 1.1290920321185918, "grad_norm": 7.515695095062256, "learning_rate": 9.77479925880173e-07, "loss": 0.1781, "step": 1828 }, { "epoch": 1.1303273625694874, "grad_norm": 5.644960403442383, "learning_rate": 9.77455219271155e-07, "loss": 0.2704, "step": 1830 }, { "epoch": 1.131562693020383, "grad_norm": 11.84373664855957, "learning_rate": 9.77430512662137e-07, "loss": 0.1874, "step": 1832 }, { "epoch": 1.1327980234712784, "grad_norm": 10.761896133422852, "learning_rate": 9.774058060531192e-07, "loss": 0.1708, "step": 1834 }, { "epoch": 1.1340333539221743, "grad_norm": 15.2642183303833, "learning_rate": 9.773810994441014e-07, "loss": 0.3127, "step": 1836 }, { "epoch": 1.1352686843730697, "grad_norm": 15.994034767150879, "learning_rate": 9.773563928350834e-07, "loss": 0.201, "step": 1838 }, { "epoch": 1.1365040148239653, "grad_norm": 10.937432289123535, "learning_rate": 9.773316862260654e-07, "loss": 0.1588, "step": 1840 }, { "epoch": 1.137739345274861, "grad_norm": 7.250688076019287, "learning_rate": 9.773069796170476e-07, "loss": 0.2566, "step": 1842 }, { "epoch": 1.1389746757257566, "grad_norm": 8.077079772949219, "learning_rate": 9.772822730080296e-07, "loss": 0.1844, "step": 1844 }, { "epoch": 1.1402100061766522, "grad_norm": 5.437566757202148, "learning_rate": 9.772575663990118e-07, "loss": 0.2679, "step": 1846 }, { "epoch": 1.1414453366275479, "grad_norm": 8.265015602111816, "learning_rate": 9.772328597899938e-07, "loss": 0.1615, "step": 1848 }, { "epoch": 1.1426806670784435, "grad_norm": 17.30742645263672, "learning_rate": 9.772081531809758e-07, "loss": 0.2926, "step": 1850 }, { "epoch": 1.1439159975293391, "grad_norm": 9.343238830566406, "learning_rate": 9.77183446571958e-07, "loss": 0.2515, "step": 1852 }, { "epoch": 1.1451513279802348, "grad_norm": 12.023786544799805, "learning_rate": 9.771587399629402e-07, "loss": 0.4703, "step": 1854 }, { "epoch": 1.1463866584311304, "grad_norm": 6.720271110534668, "learning_rate": 9.771340333539222e-07, "loss": 0.1992, "step": 1856 }, { "epoch": 1.147621988882026, "grad_norm": 10.17690372467041, "learning_rate": 9.771093267449042e-07, "loss": 0.2589, "step": 1858 }, { "epoch": 1.1488573193329215, "grad_norm": 4.803305625915527, "learning_rate": 9.770846201358864e-07, "loss": 0.0768, "step": 1860 }, { "epoch": 1.150092649783817, "grad_norm": 6.125646591186523, "learning_rate": 9.770599135268684e-07, "loss": 0.0688, "step": 1862 }, { "epoch": 1.1513279802347127, "grad_norm": 6.599908351898193, "learning_rate": 9.770352069178506e-07, "loss": 0.1434, "step": 1864 }, { "epoch": 1.1525633106856084, "grad_norm": 9.768272399902344, "learning_rate": 9.770105003088326e-07, "loss": 0.3095, "step": 1866 }, { "epoch": 1.153798641136504, "grad_norm": 10.723702430725098, "learning_rate": 9.769857936998148e-07, "loss": 0.2755, "step": 1868 }, { "epoch": 1.1550339715873996, "grad_norm": 6.603543758392334, "learning_rate": 9.769610870907968e-07, "loss": 0.5084, "step": 1870 }, { "epoch": 1.1562693020382953, "grad_norm": 12.193613052368164, "learning_rate": 9.769363804817787e-07, "loss": 0.5618, "step": 1872 }, { "epoch": 1.1575046324891909, "grad_norm": 15.016202926635742, "learning_rate": 9.76911673872761e-07, "loss": 0.3068, "step": 1874 }, { "epoch": 1.1587399629400865, "grad_norm": 4.7892961502075195, "learning_rate": 9.76886967263743e-07, "loss": 0.1791, "step": 1876 }, { "epoch": 1.1599752933909822, "grad_norm": 7.924830913543701, "learning_rate": 9.768622606547251e-07, "loss": 0.3082, "step": 1878 }, { "epoch": 1.1612106238418778, "grad_norm": 16.563570022583008, "learning_rate": 9.768375540457071e-07, "loss": 1.0196, "step": 1880 }, { "epoch": 1.1624459542927732, "grad_norm": 7.7738447189331055, "learning_rate": 9.768128474366891e-07, "loss": 0.1339, "step": 1882 }, { "epoch": 1.163681284743669, "grad_norm": 9.218232154846191, "learning_rate": 9.767881408276713e-07, "loss": 0.424, "step": 1884 }, { "epoch": 1.1649166151945645, "grad_norm": 6.621976375579834, "learning_rate": 9.767634342186535e-07, "loss": 0.2432, "step": 1886 }, { "epoch": 1.16615194564546, "grad_norm": 10.75043773651123, "learning_rate": 9.767387276096355e-07, "loss": 0.4051, "step": 1888 }, { "epoch": 1.1673872760963557, "grad_norm": 7.568307876586914, "learning_rate": 9.767140210006175e-07, "loss": 0.2215, "step": 1890 }, { "epoch": 1.1686226065472514, "grad_norm": 21.287479400634766, "learning_rate": 9.766893143915997e-07, "loss": 0.2431, "step": 1892 }, { "epoch": 1.169857936998147, "grad_norm": 6.453188896179199, "learning_rate": 9.766646077825817e-07, "loss": 0.6197, "step": 1894 }, { "epoch": 1.1710932674490426, "grad_norm": 8.693233489990234, "learning_rate": 9.76639901173564e-07, "loss": 0.2153, "step": 1896 }, { "epoch": 1.1723285978999383, "grad_norm": 7.888098239898682, "learning_rate": 9.76615194564546e-07, "loss": 0.1486, "step": 1898 }, { "epoch": 1.173563928350834, "grad_norm": 10.303062438964844, "learning_rate": 9.76590487955528e-07, "loss": 0.2657, "step": 1900 }, { "epoch": 1.1747992588017295, "grad_norm": 6.6308698654174805, "learning_rate": 9.765657813465101e-07, "loss": 0.3571, "step": 1902 }, { "epoch": 1.1760345892526252, "grad_norm": 9.76501178741455, "learning_rate": 9.765410747374923e-07, "loss": 0.3313, "step": 1904 }, { "epoch": 1.1772699197035208, "grad_norm": 4.353906154632568, "learning_rate": 9.765163681284743e-07, "loss": 0.2198, "step": 1906 }, { "epoch": 1.1785052501544162, "grad_norm": 9.070924758911133, "learning_rate": 9.764916615194563e-07, "loss": 0.3919, "step": 1908 }, { "epoch": 1.1797405806053118, "grad_norm": 3.1099300384521484, "learning_rate": 9.764669549104385e-07, "loss": 0.2462, "step": 1910 }, { "epoch": 1.1809759110562075, "grad_norm": 7.368007183074951, "learning_rate": 9.764422483014207e-07, "loss": 0.2161, "step": 1912 }, { "epoch": 1.182211241507103, "grad_norm": 1.6279380321502686, "learning_rate": 9.764175416924027e-07, "loss": 0.0489, "step": 1914 }, { "epoch": 1.1834465719579987, "grad_norm": 5.614766597747803, "learning_rate": 9.763928350833847e-07, "loss": 0.264, "step": 1916 }, { "epoch": 1.1846819024088944, "grad_norm": 6.870380878448486, "learning_rate": 9.76368128474367e-07, "loss": 0.269, "step": 1918 }, { "epoch": 1.18591723285979, "grad_norm": 5.8874616622924805, "learning_rate": 9.76343421865349e-07, "loss": 0.4297, "step": 1920 }, { "epoch": 1.1871525633106856, "grad_norm": 11.392495155334473, "learning_rate": 9.763187152563311e-07, "loss": 0.4656, "step": 1922 }, { "epoch": 1.1883878937615813, "grad_norm": 2.3462657928466797, "learning_rate": 9.762940086473131e-07, "loss": 0.1796, "step": 1924 }, { "epoch": 1.189623224212477, "grad_norm": 8.74726676940918, "learning_rate": 9.76269302038295e-07, "loss": 0.5496, "step": 1926 }, { "epoch": 1.1908585546633725, "grad_norm": 7.455406188964844, "learning_rate": 9.762445954292773e-07, "loss": 0.1674, "step": 1928 }, { "epoch": 1.192093885114268, "grad_norm": 5.746286869049072, "learning_rate": 9.762198888202595e-07, "loss": 0.1622, "step": 1930 }, { "epoch": 1.1933292155651638, "grad_norm": 8.765424728393555, "learning_rate": 9.761951822112415e-07, "loss": 0.2441, "step": 1932 }, { "epoch": 1.1945645460160592, "grad_norm": 10.32790470123291, "learning_rate": 9.761704756022235e-07, "loss": 0.1804, "step": 1934 }, { "epoch": 1.1957998764669548, "grad_norm": 6.823236465454102, "learning_rate": 9.761457689932057e-07, "loss": 0.2803, "step": 1936 }, { "epoch": 1.1970352069178505, "grad_norm": 6.418849468231201, "learning_rate": 9.761210623841877e-07, "loss": 0.1259, "step": 1938 }, { "epoch": 1.1982705373687461, "grad_norm": 4.334043025970459, "learning_rate": 9.7609635577517e-07, "loss": 0.1637, "step": 1940 }, { "epoch": 1.1995058678196417, "grad_norm": 7.887840270996094, "learning_rate": 9.76071649166152e-07, "loss": 0.2065, "step": 1942 }, { "epoch": 1.2007411982705374, "grad_norm": 9.43116569519043, "learning_rate": 9.760469425571339e-07, "loss": 0.3102, "step": 1944 }, { "epoch": 1.201976528721433, "grad_norm": 11.344623565673828, "learning_rate": 9.76022235948116e-07, "loss": 0.2903, "step": 1946 }, { "epoch": 1.2032118591723286, "grad_norm": 4.1701273918151855, "learning_rate": 9.75997529339098e-07, "loss": 0.2425, "step": 1948 }, { "epoch": 1.2044471896232243, "grad_norm": 7.252767086029053, "learning_rate": 9.759728227300803e-07, "loss": 0.3049, "step": 1950 }, { "epoch": 1.20568252007412, "grad_norm": 6.3402099609375, "learning_rate": 9.759481161210623e-07, "loss": 0.1167, "step": 1952 }, { "epoch": 1.2069178505250155, "grad_norm": 16.482440948486328, "learning_rate": 9.759234095120445e-07, "loss": 0.277, "step": 1954 }, { "epoch": 1.208153180975911, "grad_norm": 5.07111930847168, "learning_rate": 9.758987029030265e-07, "loss": 0.0947, "step": 1956 }, { "epoch": 1.2093885114268066, "grad_norm": 11.281218528747559, "learning_rate": 9.758739962940085e-07, "loss": 0.2534, "step": 1958 }, { "epoch": 1.2106238418777022, "grad_norm": 9.201102256774902, "learning_rate": 9.758492896849907e-07, "loss": 0.1617, "step": 1960 }, { "epoch": 1.2118591723285979, "grad_norm": 9.428502082824707, "learning_rate": 9.758245830759729e-07, "loss": 0.3671, "step": 1962 }, { "epoch": 1.2130945027794935, "grad_norm": 7.800783157348633, "learning_rate": 9.757998764669549e-07, "loss": 0.2479, "step": 1964 }, { "epoch": 1.2143298332303891, "grad_norm": 8.253400802612305, "learning_rate": 9.757751698579369e-07, "loss": 0.3372, "step": 1966 }, { "epoch": 1.2155651636812848, "grad_norm": 9.812432289123535, "learning_rate": 9.75750463248919e-07, "loss": 0.2344, "step": 1968 }, { "epoch": 1.2168004941321804, "grad_norm": 5.164513111114502, "learning_rate": 9.75725756639901e-07, "loss": 0.1685, "step": 1970 }, { "epoch": 1.218035824583076, "grad_norm": 2.948543071746826, "learning_rate": 9.757010500308833e-07, "loss": 0.7233, "step": 1972 }, { "epoch": 1.2192711550339717, "grad_norm": 21.31548309326172, "learning_rate": 9.756763434218653e-07, "loss": 0.7934, "step": 1974 }, { "epoch": 1.2205064854848673, "grad_norm": 15.818662643432617, "learning_rate": 9.756516368128473e-07, "loss": 0.1571, "step": 1976 }, { "epoch": 1.2217418159357627, "grad_norm": 6.371011257171631, "learning_rate": 9.756269302038295e-07, "loss": 0.1428, "step": 1978 }, { "epoch": 1.2229771463866586, "grad_norm": 4.934885501861572, "learning_rate": 9.756022235948117e-07, "loss": 0.1694, "step": 1980 }, { "epoch": 1.224212476837554, "grad_norm": 11.341374397277832, "learning_rate": 9.755775169857937e-07, "loss": 0.1529, "step": 1982 }, { "epoch": 1.2254478072884496, "grad_norm": 16.292444229125977, "learning_rate": 9.755528103767757e-07, "loss": 0.3368, "step": 1984 }, { "epoch": 1.2266831377393452, "grad_norm": 8.694572448730469, "learning_rate": 9.755281037677579e-07, "loss": 0.3102, "step": 1986 }, { "epoch": 1.2279184681902409, "grad_norm": 8.309039115905762, "learning_rate": 9.7550339715874e-07, "loss": 0.1795, "step": 1988 }, { "epoch": 1.2291537986411365, "grad_norm": 8.629555702209473, "learning_rate": 9.75478690549722e-07, "loss": 0.2362, "step": 1990 }, { "epoch": 1.2303891290920321, "grad_norm": 6.354255199432373, "learning_rate": 9.75453983940704e-07, "loss": 0.2423, "step": 1992 }, { "epoch": 1.2316244595429278, "grad_norm": 9.799184799194336, "learning_rate": 9.754292773316863e-07, "loss": 0.4499, "step": 1994 }, { "epoch": 1.2328597899938234, "grad_norm": 2.281754732131958, "learning_rate": 9.754045707226683e-07, "loss": 0.0991, "step": 1996 }, { "epoch": 1.234095120444719, "grad_norm": 14.224867820739746, "learning_rate": 9.753798641136505e-07, "loss": 0.4951, "step": 1998 }, { "epoch": 1.2353304508956147, "grad_norm": 7.022400856018066, "learning_rate": 9.753551575046325e-07, "loss": 0.227, "step": 2000 }, { "epoch": 1.2353304508956147, "eval_cer": 0.06342172678806342, "eval_loss": 0.30068209767341614, "eval_runtime": 63.9796, "eval_samples_per_second": 12.832, "eval_steps_per_second": 1.61, "step": 2000 }, { "epoch": 1.2365657813465103, "grad_norm": 5.096597194671631, "learning_rate": 9.753304508956144e-07, "loss": 0.0802, "step": 2002 }, { "epoch": 1.2378011117974057, "grad_norm": 14.31028938293457, "learning_rate": 9.753057442865967e-07, "loss": 0.5777, "step": 2004 }, { "epoch": 1.2390364422483013, "grad_norm": 9.062532424926758, "learning_rate": 9.752810376775786e-07, "loss": 0.1754, "step": 2006 }, { "epoch": 1.240271772699197, "grad_norm": 8.912712097167969, "learning_rate": 9.752563310685608e-07, "loss": 0.1805, "step": 2008 }, { "epoch": 1.2415071031500926, "grad_norm": 4.490794658660889, "learning_rate": 9.752316244595428e-07, "loss": 0.1521, "step": 2010 }, { "epoch": 1.2427424336009882, "grad_norm": 5.542106628417969, "learning_rate": 9.75206917850525e-07, "loss": 0.1101, "step": 2012 }, { "epoch": 1.2439777640518839, "grad_norm": 12.627464294433594, "learning_rate": 9.75182211241507e-07, "loss": 0.4147, "step": 2014 }, { "epoch": 1.2452130945027795, "grad_norm": 12.410797119140625, "learning_rate": 9.75157504632489e-07, "loss": 0.2556, "step": 2016 }, { "epoch": 1.2464484249536751, "grad_norm": 7.850796699523926, "learning_rate": 9.751327980234712e-07, "loss": 0.2435, "step": 2018 }, { "epoch": 1.2476837554045708, "grad_norm": 10.223228454589844, "learning_rate": 9.751080914144532e-07, "loss": 0.395, "step": 2020 }, { "epoch": 1.2489190858554664, "grad_norm": 9.490227699279785, "learning_rate": 9.750833848054354e-07, "loss": 0.4128, "step": 2022 }, { "epoch": 1.250154416306362, "grad_norm": 7.560307502746582, "learning_rate": 9.750586781964174e-07, "loss": 0.1982, "step": 2024 }, { "epoch": 1.2513897467572574, "grad_norm": 5.034794807434082, "learning_rate": 9.750339715873996e-07, "loss": 0.3956, "step": 2026 }, { "epoch": 1.2526250772081533, "grad_norm": 11.776135444641113, "learning_rate": 9.750092649783816e-07, "loss": 0.3742, "step": 2028 }, { "epoch": 1.2538604076590487, "grad_norm": 9.451992988586426, "learning_rate": 9.749845583693638e-07, "loss": 0.1724, "step": 2030 }, { "epoch": 1.2550957381099443, "grad_norm": 1.7785922288894653, "learning_rate": 9.749598517603458e-07, "loss": 0.1584, "step": 2032 }, { "epoch": 1.25633106856084, "grad_norm": 9.33643627166748, "learning_rate": 9.749351451513278e-07, "loss": 0.134, "step": 2034 }, { "epoch": 1.2575663990117356, "grad_norm": 3.70857310295105, "learning_rate": 9.7491043854231e-07, "loss": 0.2205, "step": 2036 }, { "epoch": 1.2588017294626312, "grad_norm": 12.28228759765625, "learning_rate": 9.748857319332922e-07, "loss": 0.6507, "step": 2038 }, { "epoch": 1.2600370599135269, "grad_norm": 6.213970184326172, "learning_rate": 9.748610253242742e-07, "loss": 0.1758, "step": 2040 }, { "epoch": 1.2612723903644225, "grad_norm": 10.185943603515625, "learning_rate": 9.748363187152562e-07, "loss": 0.3257, "step": 2042 }, { "epoch": 1.2625077208153181, "grad_norm": 8.788241386413574, "learning_rate": 9.748116121062384e-07, "loss": 0.1682, "step": 2044 }, { "epoch": 1.2637430512662138, "grad_norm": 10.306446075439453, "learning_rate": 9.747869054972204e-07, "loss": 0.337, "step": 2046 }, { "epoch": 1.2649783817171092, "grad_norm": 7.1338582038879395, "learning_rate": 9.747621988882026e-07, "loss": 0.2147, "step": 2048 }, { "epoch": 1.266213712168005, "grad_norm": 7.377591609954834, "learning_rate": 9.747374922791846e-07, "loss": 0.2562, "step": 2050 }, { "epoch": 1.2674490426189005, "grad_norm": 11.769505500793457, "learning_rate": 9.747127856701666e-07, "loss": 0.469, "step": 2052 }, { "epoch": 1.268684373069796, "grad_norm": 8.844001770019531, "learning_rate": 9.746880790611488e-07, "loss": 0.1278, "step": 2054 }, { "epoch": 1.2699197035206917, "grad_norm": 8.652342796325684, "learning_rate": 9.74663372452131e-07, "loss": 0.2922, "step": 2056 }, { "epoch": 1.2711550339715874, "grad_norm": 13.059402465820312, "learning_rate": 9.74638665843113e-07, "loss": 0.43, "step": 2058 }, { "epoch": 1.272390364422483, "grad_norm": 7.482826232910156, "learning_rate": 9.74613959234095e-07, "loss": 0.2042, "step": 2060 }, { "epoch": 1.2736256948733786, "grad_norm": 9.917162895202637, "learning_rate": 9.745892526250772e-07, "loss": 0.3178, "step": 2062 }, { "epoch": 1.2748610253242743, "grad_norm": 9.39922046661377, "learning_rate": 9.745645460160592e-07, "loss": 0.2458, "step": 2064 }, { "epoch": 1.2760963557751699, "grad_norm": 7.787039279937744, "learning_rate": 9.745398394070414e-07, "loss": 0.1811, "step": 2066 }, { "epoch": 1.2773316862260655, "grad_norm": 11.146538734436035, "learning_rate": 9.745151327980234e-07, "loss": 0.264, "step": 2068 }, { "epoch": 1.2785670166769612, "grad_norm": 11.147686958312988, "learning_rate": 9.744904261890056e-07, "loss": 0.3667, "step": 2070 }, { "epoch": 1.2798023471278568, "grad_norm": 6.782652378082275, "learning_rate": 9.744657195799876e-07, "loss": 0.2102, "step": 2072 }, { "epoch": 1.2810376775787522, "grad_norm": 6.4533233642578125, "learning_rate": 9.744410129709698e-07, "loss": 0.3211, "step": 2074 }, { "epoch": 1.282273008029648, "grad_norm": 4.656149864196777, "learning_rate": 9.744163063619518e-07, "loss": 0.1634, "step": 2076 }, { "epoch": 1.2835083384805435, "grad_norm": 13.728606224060059, "learning_rate": 9.743915997529338e-07, "loss": 0.3109, "step": 2078 }, { "epoch": 1.284743668931439, "grad_norm": 1.1337441205978394, "learning_rate": 9.74366893143916e-07, "loss": 0.0997, "step": 2080 }, { "epoch": 1.2859789993823347, "grad_norm": 8.902132987976074, "learning_rate": 9.74342186534898e-07, "loss": 0.2332, "step": 2082 }, { "epoch": 1.2872143298332304, "grad_norm": 5.747141361236572, "learning_rate": 9.743174799258802e-07, "loss": 0.1209, "step": 2084 }, { "epoch": 1.288449660284126, "grad_norm": 12.486663818359375, "learning_rate": 9.742927733168622e-07, "loss": 0.1757, "step": 2086 }, { "epoch": 1.2896849907350216, "grad_norm": 11.936094284057617, "learning_rate": 9.742680667078444e-07, "loss": 0.3653, "step": 2088 }, { "epoch": 1.2909203211859173, "grad_norm": 7.577267646789551, "learning_rate": 9.742433600988264e-07, "loss": 0.517, "step": 2090 }, { "epoch": 1.292155651636813, "grad_norm": 4.034299373626709, "learning_rate": 9.742186534898084e-07, "loss": 0.4312, "step": 2092 }, { "epoch": 1.2933909820877085, "grad_norm": 14.714553833007812, "learning_rate": 9.741939468807906e-07, "loss": 0.587, "step": 2094 }, { "epoch": 1.294626312538604, "grad_norm": 1.8326517343521118, "learning_rate": 9.741692402717726e-07, "loss": 0.1088, "step": 2096 }, { "epoch": 1.2958616429894998, "grad_norm": 5.480863571166992, "learning_rate": 9.741445336627548e-07, "loss": 0.5512, "step": 2098 }, { "epoch": 1.2970969734403952, "grad_norm": 6.798644542694092, "learning_rate": 9.741198270537368e-07, "loss": 0.322, "step": 2100 }, { "epoch": 1.2983323038912908, "grad_norm": 6.998974323272705, "learning_rate": 9.740951204447188e-07, "loss": 0.6194, "step": 2102 }, { "epoch": 1.2995676343421865, "grad_norm": 8.565950393676758, "learning_rate": 9.74070413835701e-07, "loss": 0.1694, "step": 2104 }, { "epoch": 1.300802964793082, "grad_norm": 5.71638298034668, "learning_rate": 9.740457072266832e-07, "loss": 0.1244, "step": 2106 }, { "epoch": 1.3020382952439777, "grad_norm": 11.918869018554688, "learning_rate": 9.740210006176652e-07, "loss": 0.4101, "step": 2108 }, { "epoch": 1.3032736256948734, "grad_norm": 4.172709941864014, "learning_rate": 9.739962940086472e-07, "loss": 0.124, "step": 2110 }, { "epoch": 1.304508956145769, "grad_norm": 5.089834690093994, "learning_rate": 9.739715873996294e-07, "loss": 0.1793, "step": 2112 }, { "epoch": 1.3057442865966646, "grad_norm": 17.060588836669922, "learning_rate": 9.739468807906116e-07, "loss": 0.3715, "step": 2114 }, { "epoch": 1.3069796170475603, "grad_norm": 2.238166570663452, "learning_rate": 9.739221741815936e-07, "loss": 0.1889, "step": 2116 }, { "epoch": 1.308214947498456, "grad_norm": 5.620054721832275, "learning_rate": 9.738974675725756e-07, "loss": 0.1849, "step": 2118 }, { "epoch": 1.3094502779493515, "grad_norm": 6.505105972290039, "learning_rate": 9.738727609635578e-07, "loss": 0.1566, "step": 2120 }, { "epoch": 1.310685608400247, "grad_norm": 13.423113822937012, "learning_rate": 9.738480543545398e-07, "loss": 0.4712, "step": 2122 }, { "epoch": 1.3119209388511428, "grad_norm": 11.075716018676758, "learning_rate": 9.73823347745522e-07, "loss": 0.325, "step": 2124 }, { "epoch": 1.3131562693020382, "grad_norm": 6.743824005126953, "learning_rate": 9.73798641136504e-07, "loss": 0.1991, "step": 2126 }, { "epoch": 1.3143915997529338, "grad_norm": 15.096368789672852, "learning_rate": 9.73773934527486e-07, "loss": 0.3805, "step": 2128 }, { "epoch": 1.3156269302038295, "grad_norm": 5.482355117797852, "learning_rate": 9.737492279184682e-07, "loss": 0.0538, "step": 2130 }, { "epoch": 1.3168622606547251, "grad_norm": 6.522045135498047, "learning_rate": 9.737245213094504e-07, "loss": 0.1835, "step": 2132 }, { "epoch": 1.3180975911056207, "grad_norm": 13.759203910827637, "learning_rate": 9.736998147004324e-07, "loss": 0.358, "step": 2134 }, { "epoch": 1.3193329215565164, "grad_norm": 20.091110229492188, "learning_rate": 9.736751080914143e-07, "loss": 0.4363, "step": 2136 }, { "epoch": 1.320568252007412, "grad_norm": 8.511918067932129, "learning_rate": 9.736504014823966e-07, "loss": 0.4192, "step": 2138 }, { "epoch": 1.3218035824583076, "grad_norm": 12.464509010314941, "learning_rate": 9.736256948733785e-07, "loss": 0.1751, "step": 2140 }, { "epoch": 1.3230389129092033, "grad_norm": 6.884275436401367, "learning_rate": 9.736009882643607e-07, "loss": 0.1314, "step": 2142 }, { "epoch": 1.3242742433600987, "grad_norm": 9.312164306640625, "learning_rate": 9.735762816553427e-07, "loss": 0.2614, "step": 2144 }, { "epoch": 1.3255095738109945, "grad_norm": 5.029036045074463, "learning_rate": 9.73551575046325e-07, "loss": 0.269, "step": 2146 }, { "epoch": 1.32674490426189, "grad_norm": 12.932140350341797, "learning_rate": 9.73526868437307e-07, "loss": 0.5907, "step": 2148 }, { "epoch": 1.3279802347127856, "grad_norm": 8.62439250946045, "learning_rate": 9.73502161828289e-07, "loss": 0.3013, "step": 2150 }, { "epoch": 1.3292155651636812, "grad_norm": 11.798541069030762, "learning_rate": 9.734774552192711e-07, "loss": 0.2379, "step": 2152 }, { "epoch": 1.3304508956145769, "grad_norm": 5.165622711181641, "learning_rate": 9.734527486102531e-07, "loss": 0.1397, "step": 2154 }, { "epoch": 1.3316862260654725, "grad_norm": 7.121552467346191, "learning_rate": 9.734280420012353e-07, "loss": 0.2171, "step": 2156 }, { "epoch": 1.3329215565163681, "grad_norm": 5.7693305015563965, "learning_rate": 9.734033353922173e-07, "loss": 0.2526, "step": 2158 }, { "epoch": 1.3341568869672638, "grad_norm": 7.942063808441162, "learning_rate": 9.733786287831993e-07, "loss": 0.2915, "step": 2160 }, { "epoch": 1.3353922174181594, "grad_norm": 5.81102991104126, "learning_rate": 9.733539221741815e-07, "loss": 0.2911, "step": 2162 }, { "epoch": 1.336627547869055, "grad_norm": 9.999027252197266, "learning_rate": 9.733292155651637e-07, "loss": 0.1628, "step": 2164 }, { "epoch": 1.3378628783199507, "grad_norm": 4.957307815551758, "learning_rate": 9.733045089561457e-07, "loss": 0.1483, "step": 2166 }, { "epoch": 1.3390982087708463, "grad_norm": 8.274043083190918, "learning_rate": 9.732798023471277e-07, "loss": 0.2061, "step": 2168 }, { "epoch": 1.3403335392217417, "grad_norm": 15.76842975616455, "learning_rate": 9.7325509573811e-07, "loss": 0.3306, "step": 2170 }, { "epoch": 1.3415688696726376, "grad_norm": 6.084107398986816, "learning_rate": 9.73230389129092e-07, "loss": 0.1633, "step": 2172 }, { "epoch": 1.342804200123533, "grad_norm": 8.72464656829834, "learning_rate": 9.732056825200741e-07, "loss": 0.3023, "step": 2174 }, { "epoch": 1.3440395305744286, "grad_norm": 6.427175521850586, "learning_rate": 9.731809759110561e-07, "loss": 0.1239, "step": 2176 }, { "epoch": 1.3452748610253242, "grad_norm": 4.579740047454834, "learning_rate": 9.731562693020381e-07, "loss": 0.0761, "step": 2178 }, { "epoch": 1.3465101914762199, "grad_norm": 6.2341532707214355, "learning_rate": 9.731315626930203e-07, "loss": 0.1591, "step": 2180 }, { "epoch": 1.3477455219271155, "grad_norm": 6.92950963973999, "learning_rate": 9.731068560840025e-07, "loss": 0.2062, "step": 2182 }, { "epoch": 1.3489808523780111, "grad_norm": 2.7027244567871094, "learning_rate": 9.730821494749845e-07, "loss": 0.0915, "step": 2184 }, { "epoch": 1.3502161828289068, "grad_norm": 12.928427696228027, "learning_rate": 9.730574428659665e-07, "loss": 0.5085, "step": 2186 }, { "epoch": 1.3514515132798024, "grad_norm": 8.30628776550293, "learning_rate": 9.730327362569487e-07, "loss": 0.2828, "step": 2188 }, { "epoch": 1.352686843730698, "grad_norm": 8.011591911315918, "learning_rate": 9.73008029647931e-07, "loss": 0.1756, "step": 2190 }, { "epoch": 1.3539221741815934, "grad_norm": 9.748241424560547, "learning_rate": 9.72983323038913e-07, "loss": 0.1293, "step": 2192 }, { "epoch": 1.3551575046324893, "grad_norm": 6.213412284851074, "learning_rate": 9.72958616429895e-07, "loss": 0.1479, "step": 2194 }, { "epoch": 1.3563928350833847, "grad_norm": 4.936829090118408, "learning_rate": 9.729339098208771e-07, "loss": 0.1383, "step": 2196 }, { "epoch": 1.3576281655342803, "grad_norm": 11.37684440612793, "learning_rate": 9.72909203211859e-07, "loss": 0.4499, "step": 2198 }, { "epoch": 1.358863495985176, "grad_norm": 4.068573474884033, "learning_rate": 9.728844966028413e-07, "loss": 0.4542, "step": 2200 }, { "epoch": 1.3600988264360716, "grad_norm": 4.923436641693115, "learning_rate": 9.728597899938233e-07, "loss": 0.0976, "step": 2202 }, { "epoch": 1.3613341568869672, "grad_norm": 12.803295135498047, "learning_rate": 9.728350833848053e-07, "loss": 0.4511, "step": 2204 }, { "epoch": 1.3625694873378629, "grad_norm": 10.998076438903809, "learning_rate": 9.728103767757875e-07, "loss": 0.3346, "step": 2206 }, { "epoch": 1.3638048177887585, "grad_norm": 4.211141586303711, "learning_rate": 9.727856701667697e-07, "loss": 0.0568, "step": 2208 }, { "epoch": 1.3650401482396541, "grad_norm": 12.637617111206055, "learning_rate": 9.727609635577517e-07, "loss": 0.2858, "step": 2210 }, { "epoch": 1.3662754786905498, "grad_norm": 7.706092834472656, "learning_rate": 9.727362569487337e-07, "loss": 0.2897, "step": 2212 }, { "epoch": 1.3675108091414454, "grad_norm": 7.141176700592041, "learning_rate": 9.72711550339716e-07, "loss": 0.2194, "step": 2214 }, { "epoch": 1.368746139592341, "grad_norm": 8.382420539855957, "learning_rate": 9.726868437306979e-07, "loss": 0.1613, "step": 2216 }, { "epoch": 1.3699814700432364, "grad_norm": 5.232321739196777, "learning_rate": 9.7266213712168e-07, "loss": 0.224, "step": 2218 }, { "epoch": 1.3712168004941323, "grad_norm": 6.994580268859863, "learning_rate": 9.72637430512662e-07, "loss": 0.1419, "step": 2220 }, { "epoch": 1.3724521309450277, "grad_norm": 7.741380214691162, "learning_rate": 9.72612723903644e-07, "loss": 0.1573, "step": 2222 }, { "epoch": 1.3736874613959233, "grad_norm": 7.906628608703613, "learning_rate": 9.725880172946263e-07, "loss": 0.2112, "step": 2224 }, { "epoch": 1.374922791846819, "grad_norm": 5.651715278625488, "learning_rate": 9.725633106856083e-07, "loss": 0.3981, "step": 2226 }, { "epoch": 1.3761581222977146, "grad_norm": 6.669356346130371, "learning_rate": 9.725386040765905e-07, "loss": 0.2648, "step": 2228 }, { "epoch": 1.3773934527486102, "grad_norm": 10.835314750671387, "learning_rate": 9.725138974675725e-07, "loss": 0.2428, "step": 2230 }, { "epoch": 1.3786287831995059, "grad_norm": 10.223993301391602, "learning_rate": 9.724891908585547e-07, "loss": 0.3715, "step": 2232 }, { "epoch": 1.3798641136504015, "grad_norm": 8.267576217651367, "learning_rate": 9.724644842495367e-07, "loss": 0.2411, "step": 2234 }, { "epoch": 1.3810994441012971, "grad_norm": 6.697554111480713, "learning_rate": 9.724397776405187e-07, "loss": 0.304, "step": 2236 }, { "epoch": 1.3823347745521928, "grad_norm": 11.678669929504395, "learning_rate": 9.724150710315009e-07, "loss": 0.5016, "step": 2238 }, { "epoch": 1.3835701050030882, "grad_norm": 10.778976440429688, "learning_rate": 9.72390364422483e-07, "loss": 0.4586, "step": 2240 }, { "epoch": 1.384805435453984, "grad_norm": 10.3942289352417, "learning_rate": 9.72365657813465e-07, "loss": 0.5641, "step": 2242 }, { "epoch": 1.3860407659048795, "grad_norm": 5.648514747619629, "learning_rate": 9.72340951204447e-07, "loss": 0.108, "step": 2244 }, { "epoch": 1.387276096355775, "grad_norm": 5.56667423248291, "learning_rate": 9.723162445954293e-07, "loss": 0.2673, "step": 2246 }, { "epoch": 1.3885114268066707, "grad_norm": 5.508030414581299, "learning_rate": 9.722915379864113e-07, "loss": 0.1326, "step": 2248 }, { "epoch": 1.3897467572575664, "grad_norm": 9.452683448791504, "learning_rate": 9.722668313773935e-07, "loss": 0.3066, "step": 2250 }, { "epoch": 1.390982087708462, "grad_norm": 12.026906967163086, "learning_rate": 9.722421247683755e-07, "loss": 0.3228, "step": 2252 }, { "epoch": 1.3922174181593576, "grad_norm": 7.087366580963135, "learning_rate": 9.722174181593575e-07, "loss": 0.3552, "step": 2254 }, { "epoch": 1.3934527486102533, "grad_norm": 9.267197608947754, "learning_rate": 9.721927115503397e-07, "loss": 0.3368, "step": 2256 }, { "epoch": 1.3946880790611489, "grad_norm": 8.853251457214355, "learning_rate": 9.721680049413219e-07, "loss": 0.3036, "step": 2258 }, { "epoch": 1.3959234095120445, "grad_norm": 7.71788215637207, "learning_rate": 9.721432983323039e-07, "loss": 0.1827, "step": 2260 }, { "epoch": 1.3971587399629402, "grad_norm": 8.646215438842773, "learning_rate": 9.721185917232859e-07, "loss": 0.2924, "step": 2262 }, { "epoch": 1.3983940704138358, "grad_norm": 10.460055351257324, "learning_rate": 9.72093885114268e-07, "loss": 0.2628, "step": 2264 }, { "epoch": 1.3996294008647312, "grad_norm": 4.580101013183594, "learning_rate": 9.720691785052503e-07, "loss": 0.1162, "step": 2266 }, { "epoch": 1.400864731315627, "grad_norm": 13.810263633728027, "learning_rate": 9.720444718962323e-07, "loss": 0.2543, "step": 2268 }, { "epoch": 1.4021000617665225, "grad_norm": 5.817704677581787, "learning_rate": 9.720197652872142e-07, "loss": 0.0636, "step": 2270 }, { "epoch": 1.403335392217418, "grad_norm": 6.196863174438477, "learning_rate": 9.719950586781965e-07, "loss": 0.1088, "step": 2272 }, { "epoch": 1.4045707226683137, "grad_norm": 10.995612144470215, "learning_rate": 9.719703520691784e-07, "loss": 0.3231, "step": 2274 }, { "epoch": 1.4058060531192094, "grad_norm": 7.985601902008057, "learning_rate": 9.719456454601607e-07, "loss": 0.1284, "step": 2276 }, { "epoch": 1.407041383570105, "grad_norm": 2.907884120941162, "learning_rate": 9.719209388511426e-07, "loss": 0.0902, "step": 2278 }, { "epoch": 1.4082767140210006, "grad_norm": 5.456057548522949, "learning_rate": 9.718962322421246e-07, "loss": 0.0732, "step": 2280 }, { "epoch": 1.4095120444718963, "grad_norm": 7.610612869262695, "learning_rate": 9.718715256331068e-07, "loss": 0.239, "step": 2282 }, { "epoch": 1.410747374922792, "grad_norm": 2.738907814025879, "learning_rate": 9.718468190240888e-07, "loss": 0.1525, "step": 2284 }, { "epoch": 1.4119827053736875, "grad_norm": 8.727079391479492, "learning_rate": 9.71822112415071e-07, "loss": 0.5213, "step": 2286 }, { "epoch": 1.413218035824583, "grad_norm": 9.318388938903809, "learning_rate": 9.71797405806053e-07, "loss": 0.2148, "step": 2288 }, { "epoch": 1.4144533662754788, "grad_norm": 8.702249526977539, "learning_rate": 9.717726991970352e-07, "loss": 0.282, "step": 2290 }, { "epoch": 1.4156886967263742, "grad_norm": 10.791404724121094, "learning_rate": 9.717479925880172e-07, "loss": 0.5285, "step": 2292 }, { "epoch": 1.4169240271772698, "grad_norm": 10.714669227600098, "learning_rate": 9.717232859789992e-07, "loss": 0.3295, "step": 2294 }, { "epoch": 1.4181593576281655, "grad_norm": 6.846580505371094, "learning_rate": 9.716985793699814e-07, "loss": 0.3377, "step": 2296 }, { "epoch": 1.419394688079061, "grad_norm": 7.580219268798828, "learning_rate": 9.716738727609634e-07, "loss": 0.2101, "step": 2298 }, { "epoch": 1.4206300185299567, "grad_norm": 8.674392700195312, "learning_rate": 9.716491661519456e-07, "loss": 0.1756, "step": 2300 }, { "epoch": 1.4218653489808524, "grad_norm": 8.339240074157715, "learning_rate": 9.716244595429276e-07, "loss": 0.1256, "step": 2302 }, { "epoch": 1.423100679431748, "grad_norm": 21.646406173706055, "learning_rate": 9.715997529339098e-07, "loss": 0.3611, "step": 2304 }, { "epoch": 1.4243360098826436, "grad_norm": 5.328543663024902, "learning_rate": 9.715750463248918e-07, "loss": 0.163, "step": 2306 }, { "epoch": 1.4255713403335393, "grad_norm": 4.289745807647705, "learning_rate": 9.71550339715874e-07, "loss": 0.1183, "step": 2308 }, { "epoch": 1.426806670784435, "grad_norm": 8.393165588378906, "learning_rate": 9.71525633106856e-07, "loss": 0.3171, "step": 2310 }, { "epoch": 1.4280420012353305, "grad_norm": 12.640758514404297, "learning_rate": 9.71500926497838e-07, "loss": 0.2712, "step": 2312 }, { "epoch": 1.429277331686226, "grad_norm": 2.1922926902770996, "learning_rate": 9.714762198888202e-07, "loss": 0.1149, "step": 2314 }, { "epoch": 1.4305126621371218, "grad_norm": 4.934012413024902, "learning_rate": 9.714515132798024e-07, "loss": 0.1367, "step": 2316 }, { "epoch": 1.4317479925880172, "grad_norm": 11.2390775680542, "learning_rate": 9.714268066707844e-07, "loss": 0.3001, "step": 2318 }, { "epoch": 1.4329833230389128, "grad_norm": 22.152170181274414, "learning_rate": 9.714021000617664e-07, "loss": 0.5488, "step": 2320 }, { "epoch": 1.4342186534898085, "grad_norm": 9.601851463317871, "learning_rate": 9.713773934527486e-07, "loss": 0.1267, "step": 2322 }, { "epoch": 1.4354539839407041, "grad_norm": 9.37967300415039, "learning_rate": 9.713526868437306e-07, "loss": 0.2215, "step": 2324 }, { "epoch": 1.4366893143915997, "grad_norm": 9.782364845275879, "learning_rate": 9.713279802347128e-07, "loss": 0.2989, "step": 2326 }, { "epoch": 1.4379246448424954, "grad_norm": 7.606380462646484, "learning_rate": 9.713032736256948e-07, "loss": 0.3534, "step": 2328 }, { "epoch": 1.439159975293391, "grad_norm": 5.37879753112793, "learning_rate": 9.712785670166768e-07, "loss": 0.1332, "step": 2330 }, { "epoch": 1.4403953057442866, "grad_norm": 5.49813175201416, "learning_rate": 9.71253860407659e-07, "loss": 0.0809, "step": 2332 }, { "epoch": 1.4416306361951823, "grad_norm": 14.297944068908691, "learning_rate": 9.712291537986412e-07, "loss": 0.2709, "step": 2334 }, { "epoch": 1.4428659666460777, "grad_norm": 6.800910949707031, "learning_rate": 9.712044471896232e-07, "loss": 0.254, "step": 2336 }, { "epoch": 1.4441012970969735, "grad_norm": 8.78974437713623, "learning_rate": 9.711797405806052e-07, "loss": 0.3752, "step": 2338 }, { "epoch": 1.445336627547869, "grad_norm": 13.776782035827637, "learning_rate": 9.711550339715874e-07, "loss": 0.1321, "step": 2340 }, { "epoch": 1.4465719579987646, "grad_norm": 10.756874084472656, "learning_rate": 9.711303273625694e-07, "loss": 0.4096, "step": 2342 }, { "epoch": 1.4478072884496602, "grad_norm": 13.819480895996094, "learning_rate": 9.711056207535516e-07, "loss": 0.431, "step": 2344 }, { "epoch": 1.4490426189005559, "grad_norm": 6.797832489013672, "learning_rate": 9.710809141445336e-07, "loss": 0.9287, "step": 2346 }, { "epoch": 1.4502779493514515, "grad_norm": 3.7874817848205566, "learning_rate": 9.710562075355158e-07, "loss": 0.0841, "step": 2348 }, { "epoch": 1.4515132798023471, "grad_norm": 4.468257904052734, "learning_rate": 9.710315009264978e-07, "loss": 0.2338, "step": 2350 }, { "epoch": 1.4527486102532428, "grad_norm": 11.773722648620605, "learning_rate": 9.7100679431748e-07, "loss": 0.2002, "step": 2352 }, { "epoch": 1.4539839407041384, "grad_norm": 5.630013942718506, "learning_rate": 9.70982087708462e-07, "loss": 0.1625, "step": 2354 }, { "epoch": 1.455219271155034, "grad_norm": 17.66055679321289, "learning_rate": 9.70957381099444e-07, "loss": 0.1828, "step": 2356 }, { "epoch": 1.4564546016059297, "grad_norm": 6.57562255859375, "learning_rate": 9.709326744904262e-07, "loss": 0.334, "step": 2358 }, { "epoch": 1.4576899320568253, "grad_norm": 7.02437162399292, "learning_rate": 9.709079678814082e-07, "loss": 0.148, "step": 2360 }, { "epoch": 1.4589252625077207, "grad_norm": 7.9235944747924805, "learning_rate": 9.708832612723904e-07, "loss": 0.1964, "step": 2362 }, { "epoch": 1.4601605929586166, "grad_norm": 9.236466407775879, "learning_rate": 9.708585546633724e-07, "loss": 0.4052, "step": 2364 }, { "epoch": 1.461395923409512, "grad_norm": 16.035268783569336, "learning_rate": 9.708338480543546e-07, "loss": 0.3763, "step": 2366 }, { "epoch": 1.4626312538604076, "grad_norm": 4.531691551208496, "learning_rate": 9.708091414453366e-07, "loss": 0.1105, "step": 2368 }, { "epoch": 1.4638665843113032, "grad_norm": 3.6176416873931885, "learning_rate": 9.707844348363186e-07, "loss": 0.111, "step": 2370 }, { "epoch": 1.4651019147621989, "grad_norm": 5.2465386390686035, "learning_rate": 9.707597282273008e-07, "loss": 0.2016, "step": 2372 }, { "epoch": 1.4663372452130945, "grad_norm": 10.782515525817871, "learning_rate": 9.707350216182828e-07, "loss": 0.2493, "step": 2374 }, { "epoch": 1.4675725756639901, "grad_norm": 7.413103103637695, "learning_rate": 9.70710315009265e-07, "loss": 0.3909, "step": 2376 }, { "epoch": 1.4688079061148858, "grad_norm": 3.988673448562622, "learning_rate": 9.70685608400247e-07, "loss": 0.1446, "step": 2378 }, { "epoch": 1.4700432365657814, "grad_norm": 12.651814460754395, "learning_rate": 9.70660901791229e-07, "loss": 0.1697, "step": 2380 }, { "epoch": 1.471278567016677, "grad_norm": 12.499869346618652, "learning_rate": 9.706361951822112e-07, "loss": 0.6158, "step": 2382 }, { "epoch": 1.4725138974675724, "grad_norm": 3.798100471496582, "learning_rate": 9.706114885731934e-07, "loss": 0.0519, "step": 2384 }, { "epoch": 1.4737492279184683, "grad_norm": 12.24736213684082, "learning_rate": 9.705867819641754e-07, "loss": 0.4564, "step": 2386 }, { "epoch": 1.4749845583693637, "grad_norm": 8.273289680480957, "learning_rate": 9.705620753551574e-07, "loss": 0.2101, "step": 2388 }, { "epoch": 1.4762198888202593, "grad_norm": 13.244219779968262, "learning_rate": 9.705373687461396e-07, "loss": 0.3326, "step": 2390 }, { "epoch": 1.477455219271155, "grad_norm": 8.854063034057617, "learning_rate": 9.705126621371218e-07, "loss": 0.2706, "step": 2392 }, { "epoch": 1.4786905497220506, "grad_norm": 3.3893966674804688, "learning_rate": 9.704879555281038e-07, "loss": 0.3387, "step": 2394 }, { "epoch": 1.4799258801729462, "grad_norm": 5.095641136169434, "learning_rate": 9.704632489190858e-07, "loss": 0.1643, "step": 2396 }, { "epoch": 1.4811612106238419, "grad_norm": 11.171756744384766, "learning_rate": 9.70438542310068e-07, "loss": 0.358, "step": 2398 }, { "epoch": 1.4823965410747375, "grad_norm": 9.565918922424316, "learning_rate": 9.7041383570105e-07, "loss": 0.1664, "step": 2400 }, { "epoch": 1.4836318715256331, "grad_norm": 9.624619483947754, "learning_rate": 9.703891290920322e-07, "loss": 0.264, "step": 2402 }, { "epoch": 1.4848672019765288, "grad_norm": 8.59361457824707, "learning_rate": 9.703644224830141e-07, "loss": 0.1628, "step": 2404 }, { "epoch": 1.4861025324274244, "grad_norm": 5.1248908042907715, "learning_rate": 9.703397158739961e-07, "loss": 0.1217, "step": 2406 }, { "epoch": 1.48733786287832, "grad_norm": 4.317987442016602, "learning_rate": 9.703150092649783e-07, "loss": 0.1498, "step": 2408 }, { "epoch": 1.4885731933292154, "grad_norm": 8.254712104797363, "learning_rate": 9.702903026559606e-07, "loss": 0.2179, "step": 2410 }, { "epoch": 1.4898085237801113, "grad_norm": 10.455377578735352, "learning_rate": 9.702655960469425e-07, "loss": 0.3689, "step": 2412 }, { "epoch": 1.4910438542310067, "grad_norm": 5.123176574707031, "learning_rate": 9.702408894379245e-07, "loss": 0.0577, "step": 2414 }, { "epoch": 1.4922791846819023, "grad_norm": 5.399490833282471, "learning_rate": 9.702161828289067e-07, "loss": 0.1754, "step": 2416 }, { "epoch": 1.493514515132798, "grad_norm": 13.461236953735352, "learning_rate": 9.701914762198887e-07, "loss": 0.3147, "step": 2418 }, { "epoch": 1.4947498455836936, "grad_norm": 8.150778770446777, "learning_rate": 9.70166769610871e-07, "loss": 0.248, "step": 2420 }, { "epoch": 1.4959851760345892, "grad_norm": 4.080265522003174, "learning_rate": 9.70142063001853e-07, "loss": 0.1453, "step": 2422 }, { "epoch": 1.4972205064854849, "grad_norm": 3.146939992904663, "learning_rate": 9.70117356392835e-07, "loss": 0.1321, "step": 2424 }, { "epoch": 1.4984558369363805, "grad_norm": 6.301631927490234, "learning_rate": 9.700926497838171e-07, "loss": 0.1693, "step": 2426 }, { "epoch": 1.4996911673872761, "grad_norm": 6.3111252784729, "learning_rate": 9.700679431747991e-07, "loss": 0.1635, "step": 2428 }, { "epoch": 1.5009264978381718, "grad_norm": 15.067344665527344, "learning_rate": 9.700432365657813e-07, "loss": 0.3314, "step": 2430 }, { "epoch": 1.5021618282890672, "grad_norm": 1.8311514854431152, "learning_rate": 9.700185299567633e-07, "loss": 0.0653, "step": 2432 }, { "epoch": 1.503397158739963, "grad_norm": 7.6088547706604, "learning_rate": 9.699938233477455e-07, "loss": 0.1523, "step": 2434 }, { "epoch": 1.5046324891908585, "grad_norm": 8.041434288024902, "learning_rate": 9.699691167387275e-07, "loss": 0.1643, "step": 2436 }, { "epoch": 1.5058678196417543, "grad_norm": 12.795093536376953, "learning_rate": 9.699444101297097e-07, "loss": 0.234, "step": 2438 }, { "epoch": 1.5071031500926497, "grad_norm": 11.459220886230469, "learning_rate": 9.699197035206917e-07, "loss": 0.2833, "step": 2440 }, { "epoch": 1.5083384805435454, "grad_norm": 16.811609268188477, "learning_rate": 9.69894996911674e-07, "loss": 0.4443, "step": 2442 }, { "epoch": 1.509573810994441, "grad_norm": 20.4755916595459, "learning_rate": 9.69870290302656e-07, "loss": 0.6754, "step": 2444 }, { "epoch": 1.5108091414453366, "grad_norm": 7.823728561401367, "learning_rate": 9.69845583693638e-07, "loss": 0.3172, "step": 2446 }, { "epoch": 1.5120444718962323, "grad_norm": 5.190246105194092, "learning_rate": 9.698208770846201e-07, "loss": 0.0906, "step": 2448 }, { "epoch": 1.513279802347128, "grad_norm": 6.317382335662842, "learning_rate": 9.697961704756021e-07, "loss": 0.123, "step": 2450 }, { "epoch": 1.5145151327980235, "grad_norm": 8.796656608581543, "learning_rate": 9.697714638665843e-07, "loss": 0.1453, "step": 2452 }, { "epoch": 1.515750463248919, "grad_norm": 9.733014106750488, "learning_rate": 9.697467572575663e-07, "loss": 0.4373, "step": 2454 }, { "epoch": 1.5169857936998148, "grad_norm": 6.719393730163574, "learning_rate": 9.697220506485483e-07, "loss": 0.0775, "step": 2456 }, { "epoch": 1.5182211241507102, "grad_norm": 9.416003227233887, "learning_rate": 9.696973440395305e-07, "loss": 0.3621, "step": 2458 }, { "epoch": 1.519456454601606, "grad_norm": 15.300041198730469, "learning_rate": 9.696726374305127e-07, "loss": 0.1222, "step": 2460 }, { "epoch": 1.5206917850525015, "grad_norm": 3.6535158157348633, "learning_rate": 9.696479308214947e-07, "loss": 0.1689, "step": 2462 }, { "epoch": 1.5219271155033973, "grad_norm": 9.637825012207031, "learning_rate": 9.696232242124767e-07, "loss": 0.4965, "step": 2464 }, { "epoch": 1.5231624459542927, "grad_norm": 5.819857120513916, "learning_rate": 9.69598517603459e-07, "loss": 0.2567, "step": 2466 }, { "epoch": 1.5243977764051884, "grad_norm": 9.087779998779297, "learning_rate": 9.695738109944411e-07, "loss": 0.4787, "step": 2468 }, { "epoch": 1.525633106856084, "grad_norm": 7.357247829437256, "learning_rate": 9.69549104385423e-07, "loss": 0.1658, "step": 2470 }, { "epoch": 1.5268684373069796, "grad_norm": 5.315907001495361, "learning_rate": 9.69524397776405e-07, "loss": 0.2453, "step": 2472 }, { "epoch": 1.5281037677578753, "grad_norm": 11.430032730102539, "learning_rate": 9.694996911673873e-07, "loss": 0.4058, "step": 2474 }, { "epoch": 1.529339098208771, "grad_norm": 14.165947914123535, "learning_rate": 9.694749845583693e-07, "loss": 0.5571, "step": 2476 }, { "epoch": 1.5305744286596665, "grad_norm": 5.145192623138428, "learning_rate": 9.694502779493515e-07, "loss": 0.0936, "step": 2478 }, { "epoch": 1.531809759110562, "grad_norm": 8.31199836730957, "learning_rate": 9.694255713403335e-07, "loss": 0.1432, "step": 2480 }, { "epoch": 1.5330450895614578, "grad_norm": 5.2109761238098145, "learning_rate": 9.694008647313155e-07, "loss": 0.1833, "step": 2482 }, { "epoch": 1.5342804200123532, "grad_norm": 6.503425121307373, "learning_rate": 9.693761581222977e-07, "loss": 0.2663, "step": 2484 }, { "epoch": 1.535515750463249, "grad_norm": 10.541895866394043, "learning_rate": 9.693514515132799e-07, "loss": 0.2428, "step": 2486 }, { "epoch": 1.5367510809141445, "grad_norm": 5.826534748077393, "learning_rate": 9.693267449042619e-07, "loss": 0.2543, "step": 2488 }, { "epoch": 1.53798641136504, "grad_norm": 9.81247329711914, "learning_rate": 9.693020382952439e-07, "loss": 0.1612, "step": 2490 }, { "epoch": 1.5392217418159357, "grad_norm": 4.890615463256836, "learning_rate": 9.69277331686226e-07, "loss": 0.1113, "step": 2492 }, { "epoch": 1.5404570722668314, "grad_norm": 5.156204700469971, "learning_rate": 9.69252625077208e-07, "loss": 0.0952, "step": 2494 }, { "epoch": 1.541692402717727, "grad_norm": 14.323589324951172, "learning_rate": 9.692279184681903e-07, "loss": 0.3541, "step": 2496 }, { "epoch": 1.5429277331686226, "grad_norm": 10.682315826416016, "learning_rate": 9.692032118591723e-07, "loss": 0.4803, "step": 2498 }, { "epoch": 1.5441630636195183, "grad_norm": 6.592191219329834, "learning_rate": 9.691785052501543e-07, "loss": 0.1264, "step": 2500 }, { "epoch": 1.5441630636195183, "eval_cer": 0.05767499826905768, "eval_loss": 0.2710132896900177, "eval_runtime": 64.447, "eval_samples_per_second": 12.739, "eval_steps_per_second": 1.598, "step": 2500 }, { "epoch": 1.5453983940704137, "grad_norm": 5.484565734863281, "learning_rate": 9.691537986411365e-07, "loss": 0.3923, "step": 2502 }, { "epoch": 1.5466337245213095, "grad_norm": 5.544233322143555, "learning_rate": 9.691290920321185e-07, "loss": 0.2481, "step": 2504 }, { "epoch": 1.547869054972205, "grad_norm": 4.86369514465332, "learning_rate": 9.691043854231007e-07, "loss": 0.0713, "step": 2506 }, { "epoch": 1.5491043854231008, "grad_norm": 8.790548324584961, "learning_rate": 9.690796788140827e-07, "loss": 0.1271, "step": 2508 }, { "epoch": 1.5503397158739962, "grad_norm": 7.755229949951172, "learning_rate": 9.690549722050649e-07, "loss": 0.0929, "step": 2510 }, { "epoch": 1.551575046324892, "grad_norm": 7.590632438659668, "learning_rate": 9.690302655960469e-07, "loss": 0.1595, "step": 2512 }, { "epoch": 1.5528103767757875, "grad_norm": 5.936056137084961, "learning_rate": 9.690055589870289e-07, "loss": 0.0945, "step": 2514 }, { "epoch": 1.5540457072266831, "grad_norm": 16.359254837036133, "learning_rate": 9.68980852378011e-07, "loss": 0.3209, "step": 2516 }, { "epoch": 1.5552810376775787, "grad_norm": 5.824875831604004, "learning_rate": 9.689561457689933e-07, "loss": 0.2347, "step": 2518 }, { "epoch": 1.5565163681284744, "grad_norm": 7.284151554107666, "learning_rate": 9.689314391599753e-07, "loss": 0.2407, "step": 2520 }, { "epoch": 1.55775169857937, "grad_norm": 5.092019557952881, "learning_rate": 9.689067325509573e-07, "loss": 0.1346, "step": 2522 }, { "epoch": 1.5589870290302656, "grad_norm": 6.3401103019714355, "learning_rate": 9.688820259419395e-07, "loss": 0.1456, "step": 2524 }, { "epoch": 1.5602223594811613, "grad_norm": 10.053674697875977, "learning_rate": 9.688573193329215e-07, "loss": 0.1871, "step": 2526 }, { "epoch": 1.5614576899320567, "grad_norm": 10.972770690917969, "learning_rate": 9.688326127239037e-07, "loss": 0.4037, "step": 2528 }, { "epoch": 1.5626930203829525, "grad_norm": 14.707892417907715, "learning_rate": 9.688079061148857e-07, "loss": 0.2755, "step": 2530 }, { "epoch": 1.563928350833848, "grad_norm": 11.164092063903809, "learning_rate": 9.687831995058676e-07, "loss": 0.6674, "step": 2532 }, { "epoch": 1.5651636812847438, "grad_norm": 5.094479560852051, "learning_rate": 9.687584928968498e-07, "loss": 0.3088, "step": 2534 }, { "epoch": 1.5663990117356392, "grad_norm": 7.115458965301514, "learning_rate": 9.68733786287832e-07, "loss": 0.2369, "step": 2536 }, { "epoch": 1.5676343421865349, "grad_norm": 6.998493671417236, "learning_rate": 9.68709079678814e-07, "loss": 0.2031, "step": 2538 }, { "epoch": 1.5688696726374305, "grad_norm": 5.214633941650391, "learning_rate": 9.68684373069796e-07, "loss": 0.1306, "step": 2540 }, { "epoch": 1.5701050030883261, "grad_norm": 19.276643753051758, "learning_rate": 9.686596664607782e-07, "loss": 0.3746, "step": 2542 }, { "epoch": 1.5713403335392218, "grad_norm": 5.422847270965576, "learning_rate": 9.686349598517602e-07, "loss": 0.128, "step": 2544 }, { "epoch": 1.5725756639901174, "grad_norm": 13.017534255981445, "learning_rate": 9.686102532427424e-07, "loss": 0.2897, "step": 2546 }, { "epoch": 1.573810994441013, "grad_norm": 8.969852447509766, "learning_rate": 9.685855466337244e-07, "loss": 0.3433, "step": 2548 }, { "epoch": 1.5750463248919084, "grad_norm": 9.451289176940918, "learning_rate": 9.685608400247066e-07, "loss": 0.1719, "step": 2550 }, { "epoch": 1.5762816553428043, "grad_norm": 3.2544679641723633, "learning_rate": 9.685361334156886e-07, "loss": 0.1261, "step": 2552 }, { "epoch": 1.5775169857936997, "grad_norm": 9.674405097961426, "learning_rate": 9.685114268066708e-07, "loss": 0.1667, "step": 2554 }, { "epoch": 1.5787523162445956, "grad_norm": 5.516912460327148, "learning_rate": 9.684867201976528e-07, "loss": 0.1949, "step": 2556 }, { "epoch": 1.579987646695491, "grad_norm": 8.280144691467285, "learning_rate": 9.684620135886348e-07, "loss": 0.2366, "step": 2558 }, { "epoch": 1.5812229771463868, "grad_norm": 7.684884548187256, "learning_rate": 9.68437306979617e-07, "loss": 0.2017, "step": 2560 }, { "epoch": 1.5824583075972822, "grad_norm": 4.781105995178223, "learning_rate": 9.68412600370599e-07, "loss": 0.1192, "step": 2562 }, { "epoch": 1.5836936380481779, "grad_norm": 5.5213775634765625, "learning_rate": 9.683878937615812e-07, "loss": 0.3159, "step": 2564 }, { "epoch": 1.5849289684990735, "grad_norm": 8.281396865844727, "learning_rate": 9.683631871525632e-07, "loss": 0.1761, "step": 2566 }, { "epoch": 1.5861642989499691, "grad_norm": 7.370389938354492, "learning_rate": 9.683384805435454e-07, "loss": 0.211, "step": 2568 }, { "epoch": 1.5873996294008648, "grad_norm": 6.2375969886779785, "learning_rate": 9.683137739345274e-07, "loss": 0.0637, "step": 2570 }, { "epoch": 1.5886349598517604, "grad_norm": 14.429162979125977, "learning_rate": 9.682890673255096e-07, "loss": 0.1984, "step": 2572 }, { "epoch": 1.589870290302656, "grad_norm": 8.387210845947266, "learning_rate": 9.682643607164916e-07, "loss": 0.1758, "step": 2574 }, { "epoch": 1.5911056207535514, "grad_norm": 9.392293930053711, "learning_rate": 9.682396541074736e-07, "loss": 0.3025, "step": 2576 }, { "epoch": 1.5923409512044473, "grad_norm": 5.58435583114624, "learning_rate": 9.682149474984558e-07, "loss": 0.2047, "step": 2578 }, { "epoch": 1.5935762816553427, "grad_norm": 13.494682312011719, "learning_rate": 9.681902408894378e-07, "loss": 0.3401, "step": 2580 }, { "epoch": 1.5948116121062386, "grad_norm": 3.067329168319702, "learning_rate": 9.6816553428042e-07, "loss": 0.2168, "step": 2582 }, { "epoch": 1.596046942557134, "grad_norm": 16.480302810668945, "learning_rate": 9.68140827671402e-07, "loss": 0.2536, "step": 2584 }, { "epoch": 1.5972822730080296, "grad_norm": 10.5988130569458, "learning_rate": 9.681161210623842e-07, "loss": 0.3501, "step": 2586 }, { "epoch": 1.5985176034589252, "grad_norm": 4.451106548309326, "learning_rate": 9.680914144533662e-07, "loss": 0.0695, "step": 2588 }, { "epoch": 1.5997529339098209, "grad_norm": 5.099520683288574, "learning_rate": 9.680667078443482e-07, "loss": 0.3586, "step": 2590 }, { "epoch": 1.6009882643607165, "grad_norm": 9.448123931884766, "learning_rate": 9.680420012353304e-07, "loss": 0.302, "step": 2592 }, { "epoch": 1.6022235948116121, "grad_norm": 10.873140335083008, "learning_rate": 9.680172946263126e-07, "loss": 0.3011, "step": 2594 }, { "epoch": 1.6034589252625078, "grad_norm": 7.4377970695495605, "learning_rate": 9.679925880172946e-07, "loss": 0.3387, "step": 2596 }, { "epoch": 1.6046942557134032, "grad_norm": 4.1673407554626465, "learning_rate": 9.679678814082766e-07, "loss": 0.1166, "step": 2598 }, { "epoch": 1.605929586164299, "grad_norm": 7.6363420486450195, "learning_rate": 9.679431747992588e-07, "loss": 0.1802, "step": 2600 }, { "epoch": 1.6071649166151944, "grad_norm": 9.105381965637207, "learning_rate": 9.679184681902408e-07, "loss": 0.3064, "step": 2602 }, { "epoch": 1.6084002470660903, "grad_norm": 1.650923728942871, "learning_rate": 9.67893761581223e-07, "loss": 0.0352, "step": 2604 }, { "epoch": 1.6096355775169857, "grad_norm": 5.488612174987793, "learning_rate": 9.67869054972205e-07, "loss": 0.3121, "step": 2606 }, { "epoch": 1.6108709079678816, "grad_norm": 4.6394243240356445, "learning_rate": 9.67844348363187e-07, "loss": 0.1066, "step": 2608 }, { "epoch": 1.612106238418777, "grad_norm": 8.823269844055176, "learning_rate": 9.678196417541692e-07, "loss": 0.22, "step": 2610 }, { "epoch": 1.6133415688696726, "grad_norm": 5.4332756996154785, "learning_rate": 9.677949351451514e-07, "loss": 0.0663, "step": 2612 }, { "epoch": 1.6145768993205682, "grad_norm": 5.774885654449463, "learning_rate": 9.677702285361334e-07, "loss": 0.2096, "step": 2614 }, { "epoch": 1.6158122297714639, "grad_norm": 10.836307525634766, "learning_rate": 9.677455219271154e-07, "loss": 0.4468, "step": 2616 }, { "epoch": 1.6170475602223595, "grad_norm": 3.870434045791626, "learning_rate": 9.677208153180976e-07, "loss": 0.215, "step": 2618 }, { "epoch": 1.6182828906732551, "grad_norm": 3.4858765602111816, "learning_rate": 9.676961087090796e-07, "loss": 0.3059, "step": 2620 }, { "epoch": 1.6195182211241508, "grad_norm": 5.157665252685547, "learning_rate": 9.676714021000618e-07, "loss": 0.1433, "step": 2622 }, { "epoch": 1.6207535515750462, "grad_norm": 4.6807050704956055, "learning_rate": 9.676466954910438e-07, "loss": 0.1896, "step": 2624 }, { "epoch": 1.621988882025942, "grad_norm": 6.423030376434326, "learning_rate": 9.67621988882026e-07, "loss": 0.1363, "step": 2626 }, { "epoch": 1.6232242124768375, "grad_norm": 11.350923538208008, "learning_rate": 9.67597282273008e-07, "loss": 0.3692, "step": 2628 }, { "epoch": 1.6244595429277333, "grad_norm": 5.9003801345825195, "learning_rate": 9.675725756639902e-07, "loss": 0.1538, "step": 2630 }, { "epoch": 1.6256948733786287, "grad_norm": 8.398294448852539, "learning_rate": 9.675478690549722e-07, "loss": 0.3296, "step": 2632 }, { "epoch": 1.6269302038295244, "grad_norm": 7.285464763641357, "learning_rate": 9.675231624459542e-07, "loss": 0.2233, "step": 2634 }, { "epoch": 1.62816553428042, "grad_norm": 6.7286696434021, "learning_rate": 9.674984558369364e-07, "loss": 0.1235, "step": 2636 }, { "epoch": 1.6294008647313156, "grad_norm": 6.304098606109619, "learning_rate": 9.674737492279184e-07, "loss": 0.0989, "step": 2638 }, { "epoch": 1.6306361951822113, "grad_norm": 6.757815837860107, "learning_rate": 9.674490426189006e-07, "loss": 0.3025, "step": 2640 }, { "epoch": 1.631871525633107, "grad_norm": 8.652840614318848, "learning_rate": 9.674243360098826e-07, "loss": 0.2614, "step": 2642 }, { "epoch": 1.6331068560840025, "grad_norm": 11.873394966125488, "learning_rate": 9.673996294008648e-07, "loss": 0.2873, "step": 2644 }, { "epoch": 1.634342186534898, "grad_norm": 6.853764057159424, "learning_rate": 9.673749227918468e-07, "loss": 0.1373, "step": 2646 }, { "epoch": 1.6355775169857938, "grad_norm": 14.222911834716797, "learning_rate": 9.673502161828288e-07, "loss": 0.5299, "step": 2648 }, { "epoch": 1.6368128474366892, "grad_norm": 6.07527494430542, "learning_rate": 9.67325509573811e-07, "loss": 0.18, "step": 2650 }, { "epoch": 1.638048177887585, "grad_norm": 4.529391288757324, "learning_rate": 9.67300802964793e-07, "loss": 0.1857, "step": 2652 }, { "epoch": 1.6392835083384805, "grad_norm": 10.154864311218262, "learning_rate": 9.672760963557752e-07, "loss": 0.275, "step": 2654 }, { "epoch": 1.6405188387893763, "grad_norm": 9.386743545532227, "learning_rate": 9.672513897467572e-07, "loss": 0.2654, "step": 2656 }, { "epoch": 1.6417541692402717, "grad_norm": 9.386473655700684, "learning_rate": 9.672266831377391e-07, "loss": 0.2285, "step": 2658 }, { "epoch": 1.6429894996911674, "grad_norm": 7.951175212860107, "learning_rate": 9.672019765287214e-07, "loss": 0.1862, "step": 2660 }, { "epoch": 1.644224830142063, "grad_norm": 6.723814010620117, "learning_rate": 9.671772699197036e-07, "loss": 0.1533, "step": 2662 }, { "epoch": 1.6454601605929586, "grad_norm": 6.557336330413818, "learning_rate": 9.671525633106856e-07, "loss": 0.0967, "step": 2664 }, { "epoch": 1.6466954910438543, "grad_norm": 11.83738899230957, "learning_rate": 9.671278567016675e-07, "loss": 0.2921, "step": 2666 }, { "epoch": 1.64793082149475, "grad_norm": 7.182523727416992, "learning_rate": 9.671031500926498e-07, "loss": 0.1515, "step": 2668 }, { "epoch": 1.6491661519456455, "grad_norm": 9.90876293182373, "learning_rate": 9.67078443483632e-07, "loss": 0.2815, "step": 2670 }, { "epoch": 1.650401482396541, "grad_norm": 8.89404010772705, "learning_rate": 9.67053736874614e-07, "loss": 0.27, "step": 2672 }, { "epoch": 1.6516368128474368, "grad_norm": 7.986016273498535, "learning_rate": 9.67029030265596e-07, "loss": 0.2185, "step": 2674 }, { "epoch": 1.6528721432983322, "grad_norm": 8.338407516479492, "learning_rate": 9.670043236565781e-07, "loss": 0.2982, "step": 2676 }, { "epoch": 1.654107473749228, "grad_norm": 3.3331031799316406, "learning_rate": 9.669796170475601e-07, "loss": 0.0803, "step": 2678 }, { "epoch": 1.6553428042001235, "grad_norm": 14.814986228942871, "learning_rate": 9.669549104385423e-07, "loss": 0.4253, "step": 2680 }, { "epoch": 1.656578134651019, "grad_norm": 8.746072769165039, "learning_rate": 9.669302038295243e-07, "loss": 0.3311, "step": 2682 }, { "epoch": 1.6578134651019147, "grad_norm": 2.762324810028076, "learning_rate": 9.669054972205063e-07, "loss": 0.1601, "step": 2684 }, { "epoch": 1.6590487955528104, "grad_norm": 7.648646354675293, "learning_rate": 9.668807906114885e-07, "loss": 0.1514, "step": 2686 }, { "epoch": 1.660284126003706, "grad_norm": 5.25765323638916, "learning_rate": 9.668560840024707e-07, "loss": 0.0739, "step": 2688 }, { "epoch": 1.6615194564546016, "grad_norm": 4.40885591506958, "learning_rate": 9.668313773934527e-07, "loss": 0.0693, "step": 2690 }, { "epoch": 1.6627547869054973, "grad_norm": 4.504990100860596, "learning_rate": 9.668066707844347e-07, "loss": 0.1599, "step": 2692 }, { "epoch": 1.6639901173563927, "grad_norm": 22.886295318603516, "learning_rate": 9.66781964175417e-07, "loss": 0.6135, "step": 2694 }, { "epoch": 1.6652254478072885, "grad_norm": 7.471564292907715, "learning_rate": 9.66757257566399e-07, "loss": 0.2339, "step": 2696 }, { "epoch": 1.666460778258184, "grad_norm": 8.468391418457031, "learning_rate": 9.667325509573811e-07, "loss": 0.1651, "step": 2698 }, { "epoch": 1.6676961087090798, "grad_norm": 10.484896659851074, "learning_rate": 9.667078443483631e-07, "loss": 0.2467, "step": 2700 }, { "epoch": 1.6689314391599752, "grad_norm": 3.8585920333862305, "learning_rate": 9.666831377393451e-07, "loss": 0.1713, "step": 2702 }, { "epoch": 1.670166769610871, "grad_norm": 13.67261791229248, "learning_rate": 9.666584311303273e-07, "loss": 0.3485, "step": 2704 }, { "epoch": 1.6714021000617665, "grad_norm": 8.407907485961914, "learning_rate": 9.666337245213095e-07, "loss": 0.2111, "step": 2706 }, { "epoch": 1.6726374305126621, "grad_norm": 7.529630661010742, "learning_rate": 9.666090179122915e-07, "loss": 0.2665, "step": 2708 }, { "epoch": 1.6738727609635577, "grad_norm": 5.5804829597473145, "learning_rate": 9.665843113032735e-07, "loss": 0.1172, "step": 2710 }, { "epoch": 1.6751080914144534, "grad_norm": 5.456620693206787, "learning_rate": 9.665596046942557e-07, "loss": 0.3568, "step": 2712 }, { "epoch": 1.676343421865349, "grad_norm": 7.157198905944824, "learning_rate": 9.665348980852377e-07, "loss": 0.2073, "step": 2714 }, { "epoch": 1.6775787523162446, "grad_norm": 11.354540824890137, "learning_rate": 9.6651019147622e-07, "loss": 0.2876, "step": 2716 }, { "epoch": 1.6788140827671403, "grad_norm": 9.579867362976074, "learning_rate": 9.66485484867202e-07, "loss": 0.2544, "step": 2718 }, { "epoch": 1.6800494132180357, "grad_norm": 13.930813789367676, "learning_rate": 9.664607782581841e-07, "loss": 0.4608, "step": 2720 }, { "epoch": 1.6812847436689315, "grad_norm": 4.833138942718506, "learning_rate": 9.664360716491661e-07, "loss": 0.2248, "step": 2722 }, { "epoch": 1.682520074119827, "grad_norm": 12.78492546081543, "learning_rate": 9.66411365040148e-07, "loss": 0.4629, "step": 2724 }, { "epoch": 1.6837554045707228, "grad_norm": 4.324697017669678, "learning_rate": 9.663866584311303e-07, "loss": 0.0761, "step": 2726 }, { "epoch": 1.6849907350216182, "grad_norm": 8.716373443603516, "learning_rate": 9.663619518221123e-07, "loss": 0.4461, "step": 2728 }, { "epoch": 1.6862260654725139, "grad_norm": 2.783438205718994, "learning_rate": 9.663372452130945e-07, "loss": 0.2715, "step": 2730 }, { "epoch": 1.6874613959234095, "grad_norm": 11.447611808776855, "learning_rate": 9.663125386040765e-07, "loss": 0.3038, "step": 2732 }, { "epoch": 1.6886967263743051, "grad_norm": 6.6908345222473145, "learning_rate": 9.662878319950585e-07, "loss": 0.1262, "step": 2734 }, { "epoch": 1.6899320568252008, "grad_norm": 18.089059829711914, "learning_rate": 9.662631253860407e-07, "loss": 0.6904, "step": 2736 }, { "epoch": 1.6911673872760964, "grad_norm": 5.295236110687256, "learning_rate": 9.66238418777023e-07, "loss": 0.1388, "step": 2738 }, { "epoch": 1.692402717726992, "grad_norm": 8.566317558288574, "learning_rate": 9.66213712168005e-07, "loss": 0.2603, "step": 2740 }, { "epoch": 1.6936380481778874, "grad_norm": 2.5950050354003906, "learning_rate": 9.661890055589869e-07, "loss": 0.2537, "step": 2742 }, { "epoch": 1.6948733786287833, "grad_norm": 4.689173698425293, "learning_rate": 9.66164298949969e-07, "loss": 0.1622, "step": 2744 }, { "epoch": 1.6961087090796787, "grad_norm": 7.51813268661499, "learning_rate": 9.661395923409513e-07, "loss": 0.3293, "step": 2746 }, { "epoch": 1.6973440395305746, "grad_norm": 5.210186958312988, "learning_rate": 9.661148857319333e-07, "loss": 0.1179, "step": 2748 }, { "epoch": 1.69857936998147, "grad_norm": 4.013556957244873, "learning_rate": 9.660901791229153e-07, "loss": 0.1932, "step": 2750 }, { "epoch": 1.6998147004323658, "grad_norm": 8.616752624511719, "learning_rate": 9.660654725138975e-07, "loss": 0.2308, "step": 2752 }, { "epoch": 1.7010500308832612, "grad_norm": 4.936615467071533, "learning_rate": 9.660407659048795e-07, "loss": 0.3584, "step": 2754 }, { "epoch": 1.7022853613341569, "grad_norm": 7.35371732711792, "learning_rate": 9.660160592958617e-07, "loss": 0.1789, "step": 2756 }, { "epoch": 1.7035206917850525, "grad_norm": 4.98695182800293, "learning_rate": 9.659913526868437e-07, "loss": 0.139, "step": 2758 }, { "epoch": 1.7047560222359481, "grad_norm": 14.02633285522461, "learning_rate": 9.659666460778257e-07, "loss": 0.3306, "step": 2760 }, { "epoch": 1.7059913526868438, "grad_norm": 7.360008239746094, "learning_rate": 9.659419394688079e-07, "loss": 0.1429, "step": 2762 }, { "epoch": 1.7072266831377394, "grad_norm": 7.573976516723633, "learning_rate": 9.6591723285979e-07, "loss": 0.2334, "step": 2764 }, { "epoch": 1.708462013588635, "grad_norm": 5.331390380859375, "learning_rate": 9.65892526250772e-07, "loss": 0.1469, "step": 2766 }, { "epoch": 1.7096973440395304, "grad_norm": 9.667875289916992, "learning_rate": 9.65867819641754e-07, "loss": 0.1648, "step": 2768 }, { "epoch": 1.7109326744904263, "grad_norm": 11.650323867797852, "learning_rate": 9.658431130327363e-07, "loss": 0.3246, "step": 2770 }, { "epoch": 1.7121680049413217, "grad_norm": 11.594045639038086, "learning_rate": 9.658184064237183e-07, "loss": 0.3462, "step": 2772 }, { "epoch": 1.7134033353922176, "grad_norm": 7.765203475952148, "learning_rate": 9.657936998147005e-07, "loss": 0.2511, "step": 2774 }, { "epoch": 1.714638665843113, "grad_norm": 8.165852546691895, "learning_rate": 9.657689932056825e-07, "loss": 0.2689, "step": 2776 }, { "epoch": 1.7158739962940086, "grad_norm": 8.573752403259277, "learning_rate": 9.657442865966645e-07, "loss": 0.4757, "step": 2778 }, { "epoch": 1.7171093267449042, "grad_norm": 12.333540916442871, "learning_rate": 9.657195799876467e-07, "loss": 0.3557, "step": 2780 }, { "epoch": 1.7183446571957999, "grad_norm": 12.10044002532959, "learning_rate": 9.656948733786287e-07, "loss": 0.2764, "step": 2782 }, { "epoch": 1.7195799876466955, "grad_norm": 6.601150035858154, "learning_rate": 9.656701667696109e-07, "loss": 0.2415, "step": 2784 }, { "epoch": 1.7208153180975911, "grad_norm": 8.24493408203125, "learning_rate": 9.656454601605929e-07, "loss": 0.205, "step": 2786 }, { "epoch": 1.7220506485484868, "grad_norm": 8.778180122375488, "learning_rate": 9.65620753551575e-07, "loss": 0.1861, "step": 2788 }, { "epoch": 1.7232859789993822, "grad_norm": 6.154422283172607, "learning_rate": 9.65596046942557e-07, "loss": 0.1611, "step": 2790 }, { "epoch": 1.724521309450278, "grad_norm": 3.2241458892822266, "learning_rate": 9.65571340333539e-07, "loss": 0.2256, "step": 2792 }, { "epoch": 1.7257566399011735, "grad_norm": 5.957387447357178, "learning_rate": 9.655466337245213e-07, "loss": 0.1604, "step": 2794 }, { "epoch": 1.7269919703520693, "grad_norm": 8.567706108093262, "learning_rate": 9.655219271155035e-07, "loss": 0.2262, "step": 2796 }, { "epoch": 1.7282273008029647, "grad_norm": 6.084476947784424, "learning_rate": 9.654972205064855e-07, "loss": 0.1127, "step": 2798 }, { "epoch": 1.7294626312538606, "grad_norm": 10.429274559020996, "learning_rate": 9.654725138974674e-07, "loss": 0.2673, "step": 2800 }, { "epoch": 1.730697961704756, "grad_norm": 6.718810081481934, "learning_rate": 9.654478072884497e-07, "loss": 0.1745, "step": 2802 }, { "epoch": 1.7319332921556516, "grad_norm": 5.5132317543029785, "learning_rate": 9.654231006794316e-07, "loss": 0.3967, "step": 2804 }, { "epoch": 1.7331686226065472, "grad_norm": 7.907872676849365, "learning_rate": 9.653983940704138e-07, "loss": 0.211, "step": 2806 }, { "epoch": 1.7344039530574429, "grad_norm": 8.230831146240234, "learning_rate": 9.653736874613958e-07, "loss": 0.3068, "step": 2808 }, { "epoch": 1.7356392835083385, "grad_norm": 5.418038368225098, "learning_rate": 9.653489808523778e-07, "loss": 0.2096, "step": 2810 }, { "epoch": 1.7368746139592341, "grad_norm": 5.634387016296387, "learning_rate": 9.6532427424336e-07, "loss": 0.1899, "step": 2812 }, { "epoch": 1.7381099444101298, "grad_norm": 10.15645694732666, "learning_rate": 9.652995676343422e-07, "loss": 0.4486, "step": 2814 }, { "epoch": 1.7393452748610252, "grad_norm": 9.2191162109375, "learning_rate": 9.652748610253242e-07, "loss": 0.1249, "step": 2816 }, { "epoch": 1.740580605311921, "grad_norm": 7.705990314483643, "learning_rate": 9.652501544163062e-07, "loss": 0.2338, "step": 2818 }, { "epoch": 1.7418159357628165, "grad_norm": 4.364472389221191, "learning_rate": 9.652254478072884e-07, "loss": 0.0981, "step": 2820 }, { "epoch": 1.7430512662137123, "grad_norm": 11.622819900512695, "learning_rate": 9.652007411982704e-07, "loss": 0.3623, "step": 2822 }, { "epoch": 1.7442865966646077, "grad_norm": 10.44304084777832, "learning_rate": 9.651760345892526e-07, "loss": 0.2111, "step": 2824 }, { "epoch": 1.7455219271155034, "grad_norm": 10.741951942443848, "learning_rate": 9.651513279802346e-07, "loss": 0.2237, "step": 2826 }, { "epoch": 1.746757257566399, "grad_norm": 5.135391712188721, "learning_rate": 9.651266213712168e-07, "loss": 0.1059, "step": 2828 }, { "epoch": 1.7479925880172946, "grad_norm": 5.723481178283691, "learning_rate": 9.651019147621988e-07, "loss": 0.1596, "step": 2830 }, { "epoch": 1.7492279184681903, "grad_norm": 9.954404830932617, "learning_rate": 9.65077208153181e-07, "loss": 0.2534, "step": 2832 }, { "epoch": 1.750463248919086, "grad_norm": 6.534234046936035, "learning_rate": 9.65052501544163e-07, "loss": 0.2008, "step": 2834 }, { "epoch": 1.7516985793699815, "grad_norm": 8.206174850463867, "learning_rate": 9.65027794935145e-07, "loss": 0.1169, "step": 2836 }, { "epoch": 1.752933909820877, "grad_norm": 3.5385146141052246, "learning_rate": 9.650030883261272e-07, "loss": 0.248, "step": 2838 }, { "epoch": 1.7541692402717728, "grad_norm": 5.931628704071045, "learning_rate": 9.649783817171094e-07, "loss": 0.1507, "step": 2840 }, { "epoch": 1.7554045707226682, "grad_norm": 5.760183334350586, "learning_rate": 9.649536751080914e-07, "loss": 0.1137, "step": 2842 }, { "epoch": 1.756639901173564, "grad_norm": 7.733980655670166, "learning_rate": 9.649289684990734e-07, "loss": 0.1765, "step": 2844 }, { "epoch": 1.7578752316244595, "grad_norm": 8.25438404083252, "learning_rate": 9.649042618900556e-07, "loss": 0.163, "step": 2846 }, { "epoch": 1.7591105620753553, "grad_norm": 4.755652904510498, "learning_rate": 9.648795552810376e-07, "loss": 0.1887, "step": 2848 }, { "epoch": 1.7603458925262507, "grad_norm": 12.638389587402344, "learning_rate": 9.648548486720198e-07, "loss": 0.2473, "step": 2850 }, { "epoch": 1.7615812229771464, "grad_norm": 7.96309757232666, "learning_rate": 9.648301420630018e-07, "loss": 0.1312, "step": 2852 }, { "epoch": 1.762816553428042, "grad_norm": 7.6788530349731445, "learning_rate": 9.648054354539838e-07, "loss": 0.1195, "step": 2854 }, { "epoch": 1.7640518838789376, "grad_norm": 10.32253646850586, "learning_rate": 9.64780728844966e-07, "loss": 0.2604, "step": 2856 }, { "epoch": 1.7652872143298333, "grad_norm": 6.504575252532959, "learning_rate": 9.64756022235948e-07, "loss": 0.2869, "step": 2858 }, { "epoch": 1.766522544780729, "grad_norm": 6.243370056152344, "learning_rate": 9.647313156269302e-07, "loss": 0.3697, "step": 2860 }, { "epoch": 1.7677578752316245, "grad_norm": 7.3240275382995605, "learning_rate": 9.647066090179122e-07, "loss": 0.117, "step": 2862 }, { "epoch": 1.76899320568252, "grad_norm": 8.208178520202637, "learning_rate": 9.646819024088944e-07, "loss": 0.1544, "step": 2864 }, { "epoch": 1.7702285361334158, "grad_norm": 10.630889892578125, "learning_rate": 9.646571957998764e-07, "loss": 0.2475, "step": 2866 }, { "epoch": 1.7714638665843112, "grad_norm": 29.675886154174805, "learning_rate": 9.646324891908584e-07, "loss": 0.2206, "step": 2868 }, { "epoch": 1.772699197035207, "grad_norm": 27.210987091064453, "learning_rate": 9.646077825818406e-07, "loss": 0.1193, "step": 2870 }, { "epoch": 1.7739345274861025, "grad_norm": 11.934255599975586, "learning_rate": 9.645830759728228e-07, "loss": 0.3786, "step": 2872 }, { "epoch": 1.775169857936998, "grad_norm": 3.819181442260742, "learning_rate": 9.645583693638048e-07, "loss": 0.2936, "step": 2874 }, { "epoch": 1.7764051883878937, "grad_norm": 11.919044494628906, "learning_rate": 9.645336627547868e-07, "loss": 0.2173, "step": 2876 }, { "epoch": 1.7776405188387894, "grad_norm": 6.315796852111816, "learning_rate": 9.64508956145769e-07, "loss": 0.1596, "step": 2878 }, { "epoch": 1.778875849289685, "grad_norm": 8.277098655700684, "learning_rate": 9.64484249536751e-07, "loss": 0.1992, "step": 2880 }, { "epoch": 1.7801111797405806, "grad_norm": 5.623571872711182, "learning_rate": 9.644595429277332e-07, "loss": 0.1568, "step": 2882 }, { "epoch": 1.7813465101914763, "grad_norm": 9.068856239318848, "learning_rate": 9.644348363187152e-07, "loss": 0.2753, "step": 2884 }, { "epoch": 1.7825818406423717, "grad_norm": 6.582336902618408, "learning_rate": 9.644101297096972e-07, "loss": 0.1912, "step": 2886 }, { "epoch": 1.7838171710932675, "grad_norm": 9.066543579101562, "learning_rate": 9.643854231006794e-07, "loss": 0.2531, "step": 2888 }, { "epoch": 1.785052501544163, "grad_norm": 4.85512113571167, "learning_rate": 9.643607164916616e-07, "loss": 0.3895, "step": 2890 }, { "epoch": 1.7862878319950588, "grad_norm": 8.904130935668945, "learning_rate": 9.643360098826436e-07, "loss": 0.1642, "step": 2892 }, { "epoch": 1.7875231624459542, "grad_norm": 7.741667747497559, "learning_rate": 9.643113032736256e-07, "loss": 0.1786, "step": 2894 }, { "epoch": 1.78875849289685, "grad_norm": 4.89739990234375, "learning_rate": 9.642865966646078e-07, "loss": 0.3573, "step": 2896 }, { "epoch": 1.7899938233477455, "grad_norm": 5.692763328552246, "learning_rate": 9.642618900555898e-07, "loss": 0.1218, "step": 2898 }, { "epoch": 1.7912291537986411, "grad_norm": 5.96331787109375, "learning_rate": 9.64237183446572e-07, "loss": 0.395, "step": 2900 }, { "epoch": 1.7924644842495367, "grad_norm": 9.650392532348633, "learning_rate": 9.64212476837554e-07, "loss": 0.2477, "step": 2902 }, { "epoch": 1.7936998147004324, "grad_norm": 8.861757278442383, "learning_rate": 9.64187770228536e-07, "loss": 0.1838, "step": 2904 }, { "epoch": 1.794935145151328, "grad_norm": 5.535726547241211, "learning_rate": 9.641630636195182e-07, "loss": 0.2219, "step": 2906 }, { "epoch": 1.7961704756022236, "grad_norm": 5.020589351654053, "learning_rate": 9.641383570105004e-07, "loss": 0.0689, "step": 2908 }, { "epoch": 1.7974058060531193, "grad_norm": 4.491690158843994, "learning_rate": 9.641136504014824e-07, "loss": 0.1694, "step": 2910 }, { "epoch": 1.7986411365040147, "grad_norm": 2.8576266765594482, "learning_rate": 9.640889437924644e-07, "loss": 0.311, "step": 2912 }, { "epoch": 1.7998764669549105, "grad_norm": 4.538076400756836, "learning_rate": 9.640642371834466e-07, "loss": 0.2101, "step": 2914 }, { "epoch": 1.801111797405806, "grad_norm": 10.320256233215332, "learning_rate": 9.640395305744286e-07, "loss": 0.2004, "step": 2916 }, { "epoch": 1.8023471278567018, "grad_norm": 8.027030944824219, "learning_rate": 9.640148239654108e-07, "loss": 0.1576, "step": 2918 }, { "epoch": 1.8035824583075972, "grad_norm": 4.724896430969238, "learning_rate": 9.639901173563928e-07, "loss": 0.3432, "step": 2920 }, { "epoch": 1.8048177887584929, "grad_norm": 7.122416019439697, "learning_rate": 9.63965410747375e-07, "loss": 0.2046, "step": 2922 }, { "epoch": 1.8060531192093885, "grad_norm": 6.6743340492248535, "learning_rate": 9.63940704138357e-07, "loss": 0.1133, "step": 2924 }, { "epoch": 1.8072884496602841, "grad_norm": 5.963653564453125, "learning_rate": 9.63915997529339e-07, "loss": 0.245, "step": 2926 }, { "epoch": 1.8085237801111798, "grad_norm": 10.299002647399902, "learning_rate": 9.638912909203212e-07, "loss": 0.2784, "step": 2928 }, { "epoch": 1.8097591105620754, "grad_norm": 7.263350486755371, "learning_rate": 9.638665843113031e-07, "loss": 0.1097, "step": 2930 }, { "epoch": 1.810994441012971, "grad_norm": 6.656221389770508, "learning_rate": 9.638418777022854e-07, "loss": 0.2981, "step": 2932 }, { "epoch": 1.8122297714638664, "grad_norm": 5.335504055023193, "learning_rate": 9.638171710932673e-07, "loss": 0.1728, "step": 2934 }, { "epoch": 1.8134651019147623, "grad_norm": 5.270801067352295, "learning_rate": 9.637924644842496e-07, "loss": 0.1862, "step": 2936 }, { "epoch": 1.8147004323656577, "grad_norm": 8.92037582397461, "learning_rate": 9.637677578752315e-07, "loss": 0.3127, "step": 2938 }, { "epoch": 1.8159357628165536, "grad_norm": 4.050203800201416, "learning_rate": 9.637430512662137e-07, "loss": 0.1062, "step": 2940 }, { "epoch": 1.817171093267449, "grad_norm": 9.192275047302246, "learning_rate": 9.637183446571957e-07, "loss": 0.1347, "step": 2942 }, { "epoch": 1.8184064237183448, "grad_norm": 5.891340732574463, "learning_rate": 9.636936380481777e-07, "loss": 0.2175, "step": 2944 }, { "epoch": 1.8196417541692402, "grad_norm": 6.984597206115723, "learning_rate": 9.6366893143916e-07, "loss": 0.2369, "step": 2946 }, { "epoch": 1.8208770846201359, "grad_norm": 6.410945892333984, "learning_rate": 9.636442248301421e-07, "loss": 0.1297, "step": 2948 }, { "epoch": 1.8221124150710315, "grad_norm": 8.220746040344238, "learning_rate": 9.636195182211241e-07, "loss": 0.3782, "step": 2950 }, { "epoch": 1.8233477455219271, "grad_norm": 8.767640113830566, "learning_rate": 9.635948116121061e-07, "loss": 0.1976, "step": 2952 }, { "epoch": 1.8245830759728228, "grad_norm": 6.135279655456543, "learning_rate": 9.635701050030883e-07, "loss": 0.0957, "step": 2954 }, { "epoch": 1.8258184064237184, "grad_norm": 8.666990280151367, "learning_rate": 9.635453983940703e-07, "loss": 0.2347, "step": 2956 }, { "epoch": 1.827053736874614, "grad_norm": 6.924798488616943, "learning_rate": 9.635206917850525e-07, "loss": 0.2102, "step": 2958 }, { "epoch": 1.8282890673255094, "grad_norm": 8.37646484375, "learning_rate": 9.634959851760345e-07, "loss": 0.1541, "step": 2960 }, { "epoch": 1.8295243977764053, "grad_norm": 5.0588226318359375, "learning_rate": 9.634712785670165e-07, "loss": 0.2887, "step": 2962 }, { "epoch": 1.8307597282273007, "grad_norm": 4.289551258087158, "learning_rate": 9.634465719579987e-07, "loss": 0.0458, "step": 2964 }, { "epoch": 1.8319950586781966, "grad_norm": 10.607537269592285, "learning_rate": 9.63421865348981e-07, "loss": 0.285, "step": 2966 }, { "epoch": 1.833230389129092, "grad_norm": 5.3892316818237305, "learning_rate": 9.63397158739963e-07, "loss": 0.087, "step": 2968 }, { "epoch": 1.8344657195799876, "grad_norm": 7.424550533294678, "learning_rate": 9.63372452130945e-07, "loss": 0.1156, "step": 2970 }, { "epoch": 1.8357010500308832, "grad_norm": 27.07206916809082, "learning_rate": 9.633477455219271e-07, "loss": 0.2135, "step": 2972 }, { "epoch": 1.8369363804817789, "grad_norm": 6.234593391418457, "learning_rate": 9.633230389129091e-07, "loss": 0.1677, "step": 2974 }, { "epoch": 1.8381717109326745, "grad_norm": 8.618860244750977, "learning_rate": 9.632983323038913e-07, "loss": 0.217, "step": 2976 }, { "epoch": 1.8394070413835701, "grad_norm": 4.401519298553467, "learning_rate": 9.632736256948733e-07, "loss": 0.3173, "step": 2978 }, { "epoch": 1.8406423718344658, "grad_norm": 12.750863075256348, "learning_rate": 9.632489190858553e-07, "loss": 0.372, "step": 2980 }, { "epoch": 1.8418777022853612, "grad_norm": 6.593647480010986, "learning_rate": 9.632242124768375e-07, "loss": 0.4692, "step": 2982 }, { "epoch": 1.843113032736257, "grad_norm": 6.004411220550537, "learning_rate": 9.631995058678197e-07, "loss": 0.1695, "step": 2984 }, { "epoch": 1.8443483631871525, "grad_norm": 7.797753810882568, "learning_rate": 9.631747992588017e-07, "loss": 0.1804, "step": 2986 }, { "epoch": 1.8455836936380483, "grad_norm": 7.686452388763428, "learning_rate": 9.631500926497837e-07, "loss": 0.2048, "step": 2988 }, { "epoch": 1.8468190240889437, "grad_norm": 7.532174110412598, "learning_rate": 9.63125386040766e-07, "loss": 0.3538, "step": 2990 }, { "epoch": 1.8480543545398396, "grad_norm": 10.035093307495117, "learning_rate": 9.63100679431748e-07, "loss": 0.436, "step": 2992 }, { "epoch": 1.849289684990735, "grad_norm": 3.1869845390319824, "learning_rate": 9.630759728227301e-07, "loss": 0.1798, "step": 2994 }, { "epoch": 1.8505250154416306, "grad_norm": 1.4785269498825073, "learning_rate": 9.63051266213712e-07, "loss": 0.1147, "step": 2996 }, { "epoch": 1.8517603458925262, "grad_norm": 10.402931213378906, "learning_rate": 9.630265596046943e-07, "loss": 0.2002, "step": 2998 }, { "epoch": 1.8529956763434219, "grad_norm": 9.134345054626465, "learning_rate": 9.630018529956763e-07, "loss": 0.3447, "step": 3000 }, { "epoch": 1.8529956763434219, "eval_cer": 0.057882711348057884, "eval_loss": 0.25854164361953735, "eval_runtime": 64.0468, "eval_samples_per_second": 12.819, "eval_steps_per_second": 1.608, "step": 3000 }, { "epoch": 1.8542310067943175, "grad_norm": 7.529721260070801, "learning_rate": 9.629771463866583e-07, "loss": 0.1476, "step": 3002 }, { "epoch": 1.8554663372452131, "grad_norm": 8.677084922790527, "learning_rate": 9.629524397776405e-07, "loss": 0.1136, "step": 3004 }, { "epoch": 1.8567016676961088, "grad_norm": 6.602086067199707, "learning_rate": 9.629277331686225e-07, "loss": 0.2449, "step": 3006 }, { "epoch": 1.8579369981470042, "grad_norm": 21.041324615478516, "learning_rate": 9.629030265596047e-07, "loss": 0.1597, "step": 3008 }, { "epoch": 1.8591723285979, "grad_norm": 6.306572914123535, "learning_rate": 9.628783199505867e-07, "loss": 0.1443, "step": 3010 }, { "epoch": 1.8604076590487955, "grad_norm": 6.7645673751831055, "learning_rate": 9.628536133415687e-07, "loss": 0.1281, "step": 3012 }, { "epoch": 1.8616429894996913, "grad_norm": 13.624529838562012, "learning_rate": 9.628289067325509e-07, "loss": 0.4624, "step": 3014 }, { "epoch": 1.8628783199505867, "grad_norm": 9.312023162841797, "learning_rate": 9.62804200123533e-07, "loss": 0.1843, "step": 3016 }, { "epoch": 1.8641136504014824, "grad_norm": 7.199138164520264, "learning_rate": 9.62779493514515e-07, "loss": 0.2533, "step": 3018 }, { "epoch": 1.865348980852378, "grad_norm": 5.096278190612793, "learning_rate": 9.62754786905497e-07, "loss": 0.1137, "step": 3020 }, { "epoch": 1.8665843113032736, "grad_norm": 8.457734107971191, "learning_rate": 9.627300802964793e-07, "loss": 0.6007, "step": 3022 }, { "epoch": 1.8678196417541693, "grad_norm": 9.331452369689941, "learning_rate": 9.627053736874613e-07, "loss": 0.2838, "step": 3024 }, { "epoch": 1.869054972205065, "grad_norm": 10.463943481445312, "learning_rate": 9.626806670784435e-07, "loss": 0.2811, "step": 3026 }, { "epoch": 1.8702903026559605, "grad_norm": 4.039429187774658, "learning_rate": 9.626559604694255e-07, "loss": 0.2203, "step": 3028 }, { "epoch": 1.871525633106856, "grad_norm": 10.505375862121582, "learning_rate": 9.626312538604077e-07, "loss": 0.2897, "step": 3030 }, { "epoch": 1.8727609635577518, "grad_norm": 7.804625511169434, "learning_rate": 9.626065472513897e-07, "loss": 0.2012, "step": 3032 }, { "epoch": 1.8739962940086472, "grad_norm": 6.869112491607666, "learning_rate": 9.625818406423719e-07, "loss": 0.0918, "step": 3034 }, { "epoch": 1.875231624459543, "grad_norm": 7.053314208984375, "learning_rate": 9.625571340333539e-07, "loss": 0.238, "step": 3036 }, { "epoch": 1.8764669549104385, "grad_norm": 10.541196823120117, "learning_rate": 9.625324274243359e-07, "loss": 0.198, "step": 3038 }, { "epoch": 1.8777022853613343, "grad_norm": 6.87946891784668, "learning_rate": 9.62507720815318e-07, "loss": 0.1417, "step": 3040 }, { "epoch": 1.8789376158122297, "grad_norm": 4.354513645172119, "learning_rate": 9.624830142063003e-07, "loss": 0.0796, "step": 3042 }, { "epoch": 1.8801729462631254, "grad_norm": 10.45518970489502, "learning_rate": 9.624583075972823e-07, "loss": 0.3753, "step": 3044 }, { "epoch": 1.881408276714021, "grad_norm": 7.823563098907471, "learning_rate": 9.624336009882643e-07, "loss": 0.3057, "step": 3046 }, { "epoch": 1.8826436071649166, "grad_norm": 9.75716495513916, "learning_rate": 9.624088943792465e-07, "loss": 0.2177, "step": 3048 }, { "epoch": 1.8838789376158123, "grad_norm": 6.345613479614258, "learning_rate": 9.623841877702285e-07, "loss": 0.2277, "step": 3050 }, { "epoch": 1.885114268066708, "grad_norm": 8.871455192565918, "learning_rate": 9.623594811612107e-07, "loss": 0.1963, "step": 3052 }, { "epoch": 1.8863495985176035, "grad_norm": 8.130118370056152, "learning_rate": 9.623347745521927e-07, "loss": 0.1569, "step": 3054 }, { "epoch": 1.887584928968499, "grad_norm": 8.82680892944336, "learning_rate": 9.623100679431747e-07, "loss": 0.3253, "step": 3056 }, { "epoch": 1.8888202594193948, "grad_norm": 7.36236572265625, "learning_rate": 9.622853613341569e-07, "loss": 0.1037, "step": 3058 }, { "epoch": 1.8900555898702902, "grad_norm": 9.913825035095215, "learning_rate": 9.622606547251388e-07, "loss": 0.4279, "step": 3060 }, { "epoch": 1.891290920321186, "grad_norm": 8.291047096252441, "learning_rate": 9.62235948116121e-07, "loss": 0.1441, "step": 3062 }, { "epoch": 1.8925262507720815, "grad_norm": 8.583385467529297, "learning_rate": 9.62211241507103e-07, "loss": 0.4431, "step": 3064 }, { "epoch": 1.893761581222977, "grad_norm": 3.160839557647705, "learning_rate": 9.621865348980853e-07, "loss": 0.2121, "step": 3066 }, { "epoch": 1.8949969116738727, "grad_norm": 7.888951778411865, "learning_rate": 9.621618282890672e-07, "loss": 0.2773, "step": 3068 }, { "epoch": 1.8962322421247684, "grad_norm": 5.41503381729126, "learning_rate": 9.621371216800495e-07, "loss": 0.1427, "step": 3070 }, { "epoch": 1.897467572575664, "grad_norm": 6.7920613288879395, "learning_rate": 9.621124150710314e-07, "loss": 0.1122, "step": 3072 }, { "epoch": 1.8987029030265596, "grad_norm": 7.2256269454956055, "learning_rate": 9.620877084620136e-07, "loss": 0.1879, "step": 3074 }, { "epoch": 1.8999382334774553, "grad_norm": 7.853900909423828, "learning_rate": 9.620630018529956e-07, "loss": 0.2372, "step": 3076 }, { "epoch": 1.9011735639283507, "grad_norm": 6.310141086578369, "learning_rate": 9.620382952439776e-07, "loss": 0.2368, "step": 3078 }, { "epoch": 1.9024088943792465, "grad_norm": 3.2934131622314453, "learning_rate": 9.620135886349598e-07, "loss": 0.0657, "step": 3080 }, { "epoch": 1.903644224830142, "grad_norm": 7.878880500793457, "learning_rate": 9.619888820259418e-07, "loss": 0.2271, "step": 3082 }, { "epoch": 1.9048795552810378, "grad_norm": 14.889912605285645, "learning_rate": 9.61964175416924e-07, "loss": 0.4214, "step": 3084 }, { "epoch": 1.9061148857319332, "grad_norm": 4.628279685974121, "learning_rate": 9.61939468807906e-07, "loss": 0.1789, "step": 3086 }, { "epoch": 1.907350216182829, "grad_norm": 9.112343788146973, "learning_rate": 9.61914762198888e-07, "loss": 0.1873, "step": 3088 }, { "epoch": 1.9085855466337245, "grad_norm": 4.681764125823975, "learning_rate": 9.618900555898702e-07, "loss": 0.0766, "step": 3090 }, { "epoch": 1.9098208770846201, "grad_norm": 6.584549427032471, "learning_rate": 9.618653489808524e-07, "loss": 0.1855, "step": 3092 }, { "epoch": 1.9110562075355157, "grad_norm": 8.21350383758545, "learning_rate": 9.618406423718344e-07, "loss": 0.2131, "step": 3094 }, { "epoch": 1.9122915379864114, "grad_norm": 8.973699569702148, "learning_rate": 9.618159357628164e-07, "loss": 0.2947, "step": 3096 }, { "epoch": 1.913526868437307, "grad_norm": 4.83086633682251, "learning_rate": 9.617912291537986e-07, "loss": 0.1141, "step": 3098 }, { "epoch": 1.9147621988882026, "grad_norm": 7.061661243438721, "learning_rate": 9.617665225447806e-07, "loss": 0.3556, "step": 3100 }, { "epoch": 1.9159975293390983, "grad_norm": 4.64771032333374, "learning_rate": 9.617418159357628e-07, "loss": 0.0658, "step": 3102 }, { "epoch": 1.9172328597899937, "grad_norm": 10.449604034423828, "learning_rate": 9.617171093267448e-07, "loss": 0.1693, "step": 3104 }, { "epoch": 1.9184681902408895, "grad_norm": 12.394244194030762, "learning_rate": 9.61692402717727e-07, "loss": 0.3446, "step": 3106 }, { "epoch": 1.919703520691785, "grad_norm": 9.057596206665039, "learning_rate": 9.61667696108709e-07, "loss": 0.1447, "step": 3108 }, { "epoch": 1.9209388511426808, "grad_norm": 6.3730058670043945, "learning_rate": 9.616429894996912e-07, "loss": 0.0775, "step": 3110 }, { "epoch": 1.9221741815935762, "grad_norm": 2.6644668579101562, "learning_rate": 9.616182828906732e-07, "loss": 0.0891, "step": 3112 }, { "epoch": 1.9234095120444719, "grad_norm": 4.459999084472656, "learning_rate": 9.615935762816552e-07, "loss": 0.0919, "step": 3114 }, { "epoch": 1.9246448424953675, "grad_norm": 15.830841064453125, "learning_rate": 9.615688696726374e-07, "loss": 0.3924, "step": 3116 }, { "epoch": 1.9258801729462631, "grad_norm": 3.993715763092041, "learning_rate": 9.615441630636196e-07, "loss": 0.3164, "step": 3118 }, { "epoch": 1.9271155033971588, "grad_norm": 7.4245924949646, "learning_rate": 9.615194564546016e-07, "loss": 0.1546, "step": 3120 }, { "epoch": 1.9283508338480544, "grad_norm": 12.869503021240234, "learning_rate": 9.614947498455836e-07, "loss": 0.5025, "step": 3122 }, { "epoch": 1.92958616429895, "grad_norm": 8.431431770324707, "learning_rate": 9.614700432365658e-07, "loss": 0.3036, "step": 3124 }, { "epoch": 1.9308214947498454, "grad_norm": 5.307292938232422, "learning_rate": 9.614453366275478e-07, "loss": 0.1178, "step": 3126 }, { "epoch": 1.9320568252007413, "grad_norm": 5.9054789543151855, "learning_rate": 9.6142063001853e-07, "loss": 0.1321, "step": 3128 }, { "epoch": 1.9332921556516367, "grad_norm": 5.922335147857666, "learning_rate": 9.61395923409512e-07, "loss": 0.1192, "step": 3130 }, { "epoch": 1.9345274861025326, "grad_norm": 9.217927932739258, "learning_rate": 9.61371216800494e-07, "loss": 0.1491, "step": 3132 }, { "epoch": 1.935762816553428, "grad_norm": 5.643740653991699, "learning_rate": 9.613465101914762e-07, "loss": 0.176, "step": 3134 }, { "epoch": 1.9369981470043238, "grad_norm": 10.755738258361816, "learning_rate": 9.613218035824582e-07, "loss": 0.2609, "step": 3136 }, { "epoch": 1.9382334774552192, "grad_norm": 9.64962387084961, "learning_rate": 9.612970969734404e-07, "loss": 0.1357, "step": 3138 }, { "epoch": 1.9394688079061149, "grad_norm": 5.740861415863037, "learning_rate": 9.612723903644224e-07, "loss": 0.1856, "step": 3140 }, { "epoch": 1.9407041383570105, "grad_norm": 6.7962212562561035, "learning_rate": 9.612476837554046e-07, "loss": 0.2365, "step": 3142 }, { "epoch": 1.9419394688079061, "grad_norm": 8.395347595214844, "learning_rate": 9.612229771463866e-07, "loss": 0.198, "step": 3144 }, { "epoch": 1.9431747992588018, "grad_norm": 13.320188522338867, "learning_rate": 9.611982705373686e-07, "loss": 0.4137, "step": 3146 }, { "epoch": 1.9444101297096974, "grad_norm": 8.031266212463379, "learning_rate": 9.611735639283508e-07, "loss": 0.2297, "step": 3148 }, { "epoch": 1.945645460160593, "grad_norm": 11.001839637756348, "learning_rate": 9.61148857319333e-07, "loss": 0.2362, "step": 3150 }, { "epoch": 1.9468807906114884, "grad_norm": 5.541065692901611, "learning_rate": 9.61124150710315e-07, "loss": 0.1397, "step": 3152 }, { "epoch": 1.9481161210623843, "grad_norm": 11.493008613586426, "learning_rate": 9.61099444101297e-07, "loss": 0.2882, "step": 3154 }, { "epoch": 1.9493514515132797, "grad_norm": 7.550553798675537, "learning_rate": 9.610747374922792e-07, "loss": 0.13, "step": 3156 }, { "epoch": 1.9505867819641756, "grad_norm": 2.574258804321289, "learning_rate": 9.610500308832612e-07, "loss": 0.0521, "step": 3158 }, { "epoch": 1.951822112415071, "grad_norm": 10.167543411254883, "learning_rate": 9.610253242742434e-07, "loss": 0.258, "step": 3160 }, { "epoch": 1.9530574428659666, "grad_norm": 5.440715789794922, "learning_rate": 9.610006176652254e-07, "loss": 0.116, "step": 3162 }, { "epoch": 1.9542927733168622, "grad_norm": 3.53387188911438, "learning_rate": 9.609759110562074e-07, "loss": 0.3773, "step": 3164 }, { "epoch": 1.9555281037677579, "grad_norm": 5.033820629119873, "learning_rate": 9.609512044471896e-07, "loss": 0.0784, "step": 3166 }, { "epoch": 1.9567634342186535, "grad_norm": 7.654403209686279, "learning_rate": 9.609264978381718e-07, "loss": 0.4109, "step": 3168 }, { "epoch": 1.9579987646695491, "grad_norm": 2.74133038520813, "learning_rate": 9.609017912291538e-07, "loss": 0.3455, "step": 3170 }, { "epoch": 1.9592340951204448, "grad_norm": 3.892502546310425, "learning_rate": 9.608770846201358e-07, "loss": 0.1295, "step": 3172 }, { "epoch": 1.9604694255713402, "grad_norm": 9.546514511108398, "learning_rate": 9.60852378011118e-07, "loss": 0.4682, "step": 3174 }, { "epoch": 1.961704756022236, "grad_norm": 10.27074146270752, "learning_rate": 9.608276714021e-07, "loss": 0.2851, "step": 3176 }, { "epoch": 1.9629400864731315, "grad_norm": 7.954784870147705, "learning_rate": 9.608029647930822e-07, "loss": 0.2385, "step": 3178 }, { "epoch": 1.9641754169240273, "grad_norm": 7.934713363647461, "learning_rate": 9.607782581840642e-07, "loss": 0.1754, "step": 3180 }, { "epoch": 1.9654107473749227, "grad_norm": 5.090959548950195, "learning_rate": 9.607535515750462e-07, "loss": 0.2641, "step": 3182 }, { "epoch": 1.9666460778258186, "grad_norm": 9.647467613220215, "learning_rate": 9.607288449660284e-07, "loss": 0.1233, "step": 3184 }, { "epoch": 1.967881408276714, "grad_norm": 6.795328140258789, "learning_rate": 9.607041383570106e-07, "loss": 0.417, "step": 3186 }, { "epoch": 1.9691167387276096, "grad_norm": 5.320108413696289, "learning_rate": 9.606794317479926e-07, "loss": 0.3374, "step": 3188 }, { "epoch": 1.9703520691785052, "grad_norm": 7.71553373336792, "learning_rate": 9.606547251389746e-07, "loss": 0.1768, "step": 3190 }, { "epoch": 1.9715873996294009, "grad_norm": 5.287419319152832, "learning_rate": 9.606300185299568e-07, "loss": 0.1507, "step": 3192 }, { "epoch": 1.9728227300802965, "grad_norm": 5.874441146850586, "learning_rate": 9.606053119209388e-07, "loss": 0.2848, "step": 3194 }, { "epoch": 1.9740580605311921, "grad_norm": 3.5328967571258545, "learning_rate": 9.60580605311921e-07, "loss": 0.0572, "step": 3196 }, { "epoch": 1.9752933909820878, "grad_norm": 5.943166732788086, "learning_rate": 9.60555898702903e-07, "loss": 0.1382, "step": 3198 }, { "epoch": 1.9765287214329832, "grad_norm": 8.397371292114258, "learning_rate": 9.605311920938852e-07, "loss": 0.1855, "step": 3200 }, { "epoch": 1.977764051883879, "grad_norm": 12.237709999084473, "learning_rate": 9.605064854848671e-07, "loss": 0.3395, "step": 3202 }, { "epoch": 1.9789993823347745, "grad_norm": 11.95200252532959, "learning_rate": 9.604817788758494e-07, "loss": 0.339, "step": 3204 }, { "epoch": 1.9802347127856703, "grad_norm": 8.401416778564453, "learning_rate": 9.604570722668313e-07, "loss": 0.167, "step": 3206 }, { "epoch": 1.9814700432365657, "grad_norm": 4.447693347930908, "learning_rate": 9.604323656578133e-07, "loss": 0.0918, "step": 3208 }, { "epoch": 1.9827053736874614, "grad_norm": 3.6788413524627686, "learning_rate": 9.604076590487955e-07, "loss": 0.0653, "step": 3210 }, { "epoch": 1.983940704138357, "grad_norm": 6.405369758605957, "learning_rate": 9.603829524397775e-07, "loss": 0.1671, "step": 3212 }, { "epoch": 1.9851760345892526, "grad_norm": 7.782864570617676, "learning_rate": 9.603582458307597e-07, "loss": 0.2869, "step": 3214 }, { "epoch": 1.9864113650401483, "grad_norm": 6.554023265838623, "learning_rate": 9.603335392217417e-07, "loss": 0.1415, "step": 3216 }, { "epoch": 1.987646695491044, "grad_norm": 2.3370466232299805, "learning_rate": 9.60308832612724e-07, "loss": 0.0587, "step": 3218 }, { "epoch": 1.9888820259419395, "grad_norm": 1.2395919561386108, "learning_rate": 9.60284126003706e-07, "loss": 0.075, "step": 3220 }, { "epoch": 1.990117356392835, "grad_norm": 11.273198127746582, "learning_rate": 9.60259419394688e-07, "loss": 0.3571, "step": 3222 }, { "epoch": 1.9913526868437308, "grad_norm": 12.579070091247559, "learning_rate": 9.602347127856701e-07, "loss": 0.2911, "step": 3224 }, { "epoch": 1.9925880172946262, "grad_norm": 9.07384204864502, "learning_rate": 9.602100061766523e-07, "loss": 0.1894, "step": 3226 }, { "epoch": 1.993823347745522, "grad_norm": 3.2934279441833496, "learning_rate": 9.601852995676343e-07, "loss": 0.2225, "step": 3228 }, { "epoch": 1.9950586781964175, "grad_norm": 6.49041223526001, "learning_rate": 9.601605929586163e-07, "loss": 0.2681, "step": 3230 }, { "epoch": 1.9962940086473133, "grad_norm": 7.311825275421143, "learning_rate": 9.601358863495985e-07, "loss": 0.3203, "step": 3232 }, { "epoch": 1.9975293390982087, "grad_norm": 15.853619575500488, "learning_rate": 9.601111797405805e-07, "loss": 0.2447, "step": 3234 }, { "epoch": 1.9987646695491044, "grad_norm": 7.202307224273682, "learning_rate": 9.600864731315627e-07, "loss": 0.1738, "step": 3236 }, { "epoch": 2.0, "grad_norm": 0.22392110526561737, "learning_rate": 9.600617665225447e-07, "loss": 0.2413, "step": 3238 }, { "epoch": 2.0012353304508954, "grad_norm": 4.495696067810059, "learning_rate": 9.600370599135267e-07, "loss": 0.2446, "step": 3240 }, { "epoch": 2.0024706609017913, "grad_norm": 3.801104784011841, "learning_rate": 9.60012353304509e-07, "loss": 0.1635, "step": 3242 }, { "epoch": 2.0037059913526867, "grad_norm": 7.113442420959473, "learning_rate": 9.599876466954911e-07, "loss": 0.1177, "step": 3244 }, { "epoch": 2.0049413218035825, "grad_norm": 7.07243537902832, "learning_rate": 9.599629400864731e-07, "loss": 0.3718, "step": 3246 }, { "epoch": 2.006176652254478, "grad_norm": 4.875885963439941, "learning_rate": 9.599382334774551e-07, "loss": 0.0901, "step": 3248 }, { "epoch": 2.007411982705374, "grad_norm": 7.956467628479004, "learning_rate": 9.599135268684373e-07, "loss": 0.2088, "step": 3250 }, { "epoch": 2.008647313156269, "grad_norm": 5.388776779174805, "learning_rate": 9.598888202594193e-07, "loss": 0.1749, "step": 3252 }, { "epoch": 2.009882643607165, "grad_norm": 6.17380952835083, "learning_rate": 9.598641136504015e-07, "loss": 0.1957, "step": 3254 }, { "epoch": 2.0111179740580605, "grad_norm": 4.05656623840332, "learning_rate": 9.598394070413835e-07, "loss": 0.1546, "step": 3256 }, { "epoch": 2.0123533045089563, "grad_norm": 5.02143669128418, "learning_rate": 9.598147004323655e-07, "loss": 0.1658, "step": 3258 }, { "epoch": 2.0135886349598517, "grad_norm": 8.045797348022461, "learning_rate": 9.597899938233477e-07, "loss": 0.2281, "step": 3260 }, { "epoch": 2.0148239654107476, "grad_norm": 6.337091445922852, "learning_rate": 9.5976528721433e-07, "loss": 0.1082, "step": 3262 }, { "epoch": 2.016059295861643, "grad_norm": 4.295886039733887, "learning_rate": 9.59740580605312e-07, "loss": 0.082, "step": 3264 }, { "epoch": 2.0172946263125384, "grad_norm": 2.227527618408203, "learning_rate": 9.59715873996294e-07, "loss": 0.0948, "step": 3266 }, { "epoch": 2.0185299567634343, "grad_norm": 7.509311199188232, "learning_rate": 9.59691167387276e-07, "loss": 0.1985, "step": 3268 }, { "epoch": 2.0197652872143297, "grad_norm": 6.9978251457214355, "learning_rate": 9.59666460778258e-07, "loss": 0.1206, "step": 3270 }, { "epoch": 2.0210006176652255, "grad_norm": 5.188405513763428, "learning_rate": 9.596417541692403e-07, "loss": 0.2168, "step": 3272 }, { "epoch": 2.022235948116121, "grad_norm": 9.68016242980957, "learning_rate": 9.596170475602223e-07, "loss": 0.2497, "step": 3274 }, { "epoch": 2.023471278567017, "grad_norm": 7.3739399909973145, "learning_rate": 9.595923409512045e-07, "loss": 0.0984, "step": 3276 }, { "epoch": 2.024706609017912, "grad_norm": 4.624042510986328, "learning_rate": 9.595676343421865e-07, "loss": 0.1818, "step": 3278 }, { "epoch": 2.025941939468808, "grad_norm": 7.299385070800781, "learning_rate": 9.595429277331685e-07, "loss": 0.2328, "step": 3280 }, { "epoch": 2.0271772699197035, "grad_norm": 15.161133766174316, "learning_rate": 9.595182211241507e-07, "loss": 0.2734, "step": 3282 }, { "epoch": 2.0284126003705993, "grad_norm": 7.058229446411133, "learning_rate": 9.594935145151327e-07, "loss": 0.108, "step": 3284 }, { "epoch": 2.0296479308214948, "grad_norm": 2.531327247619629, "learning_rate": 9.594688079061149e-07, "loss": 0.0639, "step": 3286 }, { "epoch": 2.03088326127239, "grad_norm": 6.260293483734131, "learning_rate": 9.594441012970969e-07, "loss": 0.1709, "step": 3288 }, { "epoch": 2.032118591723286, "grad_norm": 4.698343753814697, "learning_rate": 9.594193946880789e-07, "loss": 0.1279, "step": 3290 }, { "epoch": 2.0333539221741814, "grad_norm": 6.189927577972412, "learning_rate": 9.59394688079061e-07, "loss": 0.0871, "step": 3292 }, { "epoch": 2.0345892526250773, "grad_norm": 3.8811237812042236, "learning_rate": 9.593699814700433e-07, "loss": 0.088, "step": 3294 }, { "epoch": 2.0358245830759727, "grad_norm": 11.498834609985352, "learning_rate": 9.593452748610253e-07, "loss": 0.415, "step": 3296 }, { "epoch": 2.0370599135268685, "grad_norm": 5.938139915466309, "learning_rate": 9.593205682520073e-07, "loss": 0.1489, "step": 3298 }, { "epoch": 2.038295243977764, "grad_norm": 6.596236705780029, "learning_rate": 9.592958616429895e-07, "loss": 0.1126, "step": 3300 }, { "epoch": 2.03953057442866, "grad_norm": 7.644078254699707, "learning_rate": 9.592711550339715e-07, "loss": 0.2634, "step": 3302 }, { "epoch": 2.0407659048795552, "grad_norm": 6.954422473907471, "learning_rate": 9.592464484249537e-07, "loss": 0.0994, "step": 3304 }, { "epoch": 2.042001235330451, "grad_norm": 5.049615383148193, "learning_rate": 9.592217418159357e-07, "loss": 0.1566, "step": 3306 }, { "epoch": 2.0432365657813465, "grad_norm": 5.00437068939209, "learning_rate": 9.591970352069179e-07, "loss": 0.0867, "step": 3308 }, { "epoch": 2.044471896232242, "grad_norm": 5.374063968658447, "learning_rate": 9.591723285978999e-07, "loss": 0.3013, "step": 3310 }, { "epoch": 2.0457072266831378, "grad_norm": 5.3937177658081055, "learning_rate": 9.59147621988882e-07, "loss": 0.1879, "step": 3312 }, { "epoch": 2.046942557134033, "grad_norm": 6.151480197906494, "learning_rate": 9.59122915379864e-07, "loss": 0.1191, "step": 3314 }, { "epoch": 2.048177887584929, "grad_norm": 5.3284010887146, "learning_rate": 9.59098208770846e-07, "loss": 0.0784, "step": 3316 }, { "epoch": 2.0494132180358244, "grad_norm": 7.876260757446289, "learning_rate": 9.590735021618283e-07, "loss": 0.1342, "step": 3318 }, { "epoch": 2.0506485484867203, "grad_norm": 7.57796049118042, "learning_rate": 9.590487955528105e-07, "loss": 0.2304, "step": 3320 }, { "epoch": 2.0518838789376157, "grad_norm": 5.456426620483398, "learning_rate": 9.590240889437925e-07, "loss": 0.1122, "step": 3322 }, { "epoch": 2.0531192093885116, "grad_norm": 3.6080846786499023, "learning_rate": 9.589993823347745e-07, "loss": 0.106, "step": 3324 }, { "epoch": 2.054354539839407, "grad_norm": 9.444742202758789, "learning_rate": 9.589746757257567e-07, "loss": 0.332, "step": 3326 }, { "epoch": 2.055589870290303, "grad_norm": 7.491119861602783, "learning_rate": 9.589499691167387e-07, "loss": 0.6666, "step": 3328 }, { "epoch": 2.0568252007411982, "grad_norm": 10.937352180480957, "learning_rate": 9.589252625077209e-07, "loss": 0.3095, "step": 3330 }, { "epoch": 2.058060531192094, "grad_norm": 4.39161491394043, "learning_rate": 9.589005558987028e-07, "loss": 0.1027, "step": 3332 }, { "epoch": 2.0592958616429895, "grad_norm": 7.8114519119262695, "learning_rate": 9.588758492896848e-07, "loss": 0.3542, "step": 3334 }, { "epoch": 2.060531192093885, "grad_norm": 8.368453025817871, "learning_rate": 9.58851142680667e-07, "loss": 0.2154, "step": 3336 }, { "epoch": 2.0617665225447808, "grad_norm": 4.0745649337768555, "learning_rate": 9.588264360716493e-07, "loss": 0.2825, "step": 3338 }, { "epoch": 2.063001852995676, "grad_norm": 0.7834110260009766, "learning_rate": 9.588017294626312e-07, "loss": 0.15, "step": 3340 }, { "epoch": 2.064237183446572, "grad_norm": 12.825756072998047, "learning_rate": 9.587770228536132e-07, "loss": 0.2941, "step": 3342 }, { "epoch": 2.0654725138974674, "grad_norm": 5.498414516448975, "learning_rate": 9.587523162445954e-07, "loss": 0.162, "step": 3344 }, { "epoch": 2.0667078443483633, "grad_norm": 8.897367477416992, "learning_rate": 9.587276096355774e-07, "loss": 0.0675, "step": 3346 }, { "epoch": 2.0679431747992587, "grad_norm": 6.407273292541504, "learning_rate": 9.587029030265596e-07, "loss": 0.1277, "step": 3348 }, { "epoch": 2.0691785052501546, "grad_norm": 7.066868305206299, "learning_rate": 9.586781964175416e-07, "loss": 0.1323, "step": 3350 }, { "epoch": 2.07041383570105, "grad_norm": 5.836636066436768, "learning_rate": 9.586534898085238e-07, "loss": 0.2991, "step": 3352 }, { "epoch": 2.071649166151946, "grad_norm": 9.519028663635254, "learning_rate": 9.586287831995058e-07, "loss": 0.2633, "step": 3354 }, { "epoch": 2.0728844966028412, "grad_norm": 4.614165306091309, "learning_rate": 9.586040765904878e-07, "loss": 0.0826, "step": 3356 }, { "epoch": 2.074119827053737, "grad_norm": 7.997287273406982, "learning_rate": 9.5857936998147e-07, "loss": 0.1372, "step": 3358 }, { "epoch": 2.0753551575046325, "grad_norm": 6.54919958114624, "learning_rate": 9.58554663372452e-07, "loss": 0.1583, "step": 3360 }, { "epoch": 2.076590487955528, "grad_norm": 9.502483367919922, "learning_rate": 9.585299567634342e-07, "loss": 0.1831, "step": 3362 }, { "epoch": 2.0778258184064238, "grad_norm": 1.5178873538970947, "learning_rate": 9.585052501544162e-07, "loss": 0.1906, "step": 3364 }, { "epoch": 2.079061148857319, "grad_norm": 8.858867645263672, "learning_rate": 9.584805435453982e-07, "loss": 0.1467, "step": 3366 }, { "epoch": 2.080296479308215, "grad_norm": 6.535043239593506, "learning_rate": 9.584558369363804e-07, "loss": 0.1855, "step": 3368 }, { "epoch": 2.0815318097591105, "grad_norm": 6.166882038116455, "learning_rate": 9.584311303273626e-07, "loss": 0.1732, "step": 3370 }, { "epoch": 2.0827671402100063, "grad_norm": 6.970484256744385, "learning_rate": 9.584064237183446e-07, "loss": 0.241, "step": 3372 }, { "epoch": 2.0840024706609017, "grad_norm": 5.212179660797119, "learning_rate": 9.583817171093266e-07, "loss": 0.1205, "step": 3374 }, { "epoch": 2.0852378011117976, "grad_norm": 10.234179496765137, "learning_rate": 9.583570105003088e-07, "loss": 0.1742, "step": 3376 }, { "epoch": 2.086473131562693, "grad_norm": 7.176692485809326, "learning_rate": 9.583323038912908e-07, "loss": 0.1821, "step": 3378 }, { "epoch": 2.087708462013589, "grad_norm": 7.842673301696777, "learning_rate": 9.58307597282273e-07, "loss": 0.2664, "step": 3380 }, { "epoch": 2.0889437924644843, "grad_norm": 6.903709888458252, "learning_rate": 9.58282890673255e-07, "loss": 0.1349, "step": 3382 }, { "epoch": 2.0901791229153797, "grad_norm": 8.73415470123291, "learning_rate": 9.58258184064237e-07, "loss": 0.1783, "step": 3384 }, { "epoch": 2.0914144533662755, "grad_norm": 7.1346821784973145, "learning_rate": 9.582334774552192e-07, "loss": 0.0713, "step": 3386 }, { "epoch": 2.092649783817171, "grad_norm": 3.5337440967559814, "learning_rate": 9.582087708462014e-07, "loss": 0.0631, "step": 3388 }, { "epoch": 2.093885114268067, "grad_norm": 11.719534873962402, "learning_rate": 9.581840642371834e-07, "loss": 0.6558, "step": 3390 }, { "epoch": 2.095120444718962, "grad_norm": 15.689053535461426, "learning_rate": 9.581593576281654e-07, "loss": 0.1903, "step": 3392 }, { "epoch": 2.096355775169858, "grad_norm": 5.047426223754883, "learning_rate": 9.581346510191476e-07, "loss": 0.1444, "step": 3394 }, { "epoch": 2.0975911056207535, "grad_norm": 6.189749240875244, "learning_rate": 9.581099444101298e-07, "loss": 0.0746, "step": 3396 }, { "epoch": 2.0988264360716493, "grad_norm": 7.880880832672119, "learning_rate": 9.580852378011118e-07, "loss": 0.5036, "step": 3398 }, { "epoch": 2.1000617665225447, "grad_norm": 3.9373908042907715, "learning_rate": 9.580605311920938e-07, "loss": 0.1008, "step": 3400 }, { "epoch": 2.1012970969734406, "grad_norm": 10.471332550048828, "learning_rate": 9.58035824583076e-07, "loss": 0.128, "step": 3402 }, { "epoch": 2.102532427424336, "grad_norm": 6.6472625732421875, "learning_rate": 9.58011117974058e-07, "loss": 0.1721, "step": 3404 }, { "epoch": 2.1037677578752314, "grad_norm": 2.205274820327759, "learning_rate": 9.579864113650402e-07, "loss": 0.0889, "step": 3406 }, { "epoch": 2.1050030883261273, "grad_norm": 7.811753749847412, "learning_rate": 9.579617047560222e-07, "loss": 0.1568, "step": 3408 }, { "epoch": 2.1062384187770227, "grad_norm": 4.82016134262085, "learning_rate": 9.579369981470042e-07, "loss": 0.0889, "step": 3410 }, { "epoch": 2.1074737492279185, "grad_norm": 7.756528854370117, "learning_rate": 9.579122915379864e-07, "loss": 0.2698, "step": 3412 }, { "epoch": 2.108709079678814, "grad_norm": 7.721359729766846, "learning_rate": 9.578875849289684e-07, "loss": 0.1511, "step": 3414 }, { "epoch": 2.10994441012971, "grad_norm": 8.068668365478516, "learning_rate": 9.578628783199506e-07, "loss": 0.256, "step": 3416 }, { "epoch": 2.111179740580605, "grad_norm": 5.059365749359131, "learning_rate": 9.578381717109326e-07, "loss": 0.2009, "step": 3418 }, { "epoch": 2.112415071031501, "grad_norm": 4.069121360778809, "learning_rate": 9.578134651019148e-07, "loss": 0.2312, "step": 3420 }, { "epoch": 2.1136504014823965, "grad_norm": 5.541772842407227, "learning_rate": 9.577887584928968e-07, "loss": 0.0745, "step": 3422 }, { "epoch": 2.1148857319332923, "grad_norm": 8.758554458618164, "learning_rate": 9.577640518838788e-07, "loss": 0.0686, "step": 3424 }, { "epoch": 2.1161210623841877, "grad_norm": 11.184683799743652, "learning_rate": 9.57739345274861e-07, "loss": 0.0957, "step": 3426 }, { "epoch": 2.1173563928350836, "grad_norm": 6.80023717880249, "learning_rate": 9.577146386658432e-07, "loss": 0.1002, "step": 3428 }, { "epoch": 2.118591723285979, "grad_norm": 9.773594856262207, "learning_rate": 9.576899320568252e-07, "loss": 0.3429, "step": 3430 }, { "epoch": 2.1198270537368744, "grad_norm": 5.236018657684326, "learning_rate": 9.576652254478072e-07, "loss": 0.1506, "step": 3432 }, { "epoch": 2.1210623841877703, "grad_norm": 6.755019187927246, "learning_rate": 9.576405188387894e-07, "loss": 0.3623, "step": 3434 }, { "epoch": 2.1222977146386657, "grad_norm": 5.002331733703613, "learning_rate": 9.576158122297714e-07, "loss": 0.1803, "step": 3436 }, { "epoch": 2.1235330450895615, "grad_norm": 13.163592338562012, "learning_rate": 9.575911056207536e-07, "loss": 0.2842, "step": 3438 }, { "epoch": 2.124768375540457, "grad_norm": 6.011323928833008, "learning_rate": 9.575663990117356e-07, "loss": 0.1021, "step": 3440 }, { "epoch": 2.126003705991353, "grad_norm": 8.493245124816895, "learning_rate": 9.575416924027176e-07, "loss": 0.2034, "step": 3442 }, { "epoch": 2.127239036442248, "grad_norm": 10.324592590332031, "learning_rate": 9.575169857936998e-07, "loss": 0.1863, "step": 3444 }, { "epoch": 2.128474366893144, "grad_norm": 4.196793556213379, "learning_rate": 9.57492279184682e-07, "loss": 0.2771, "step": 3446 }, { "epoch": 2.1297096973440395, "grad_norm": 2.676285982131958, "learning_rate": 9.57467572575664e-07, "loss": 0.0913, "step": 3448 }, { "epoch": 2.1309450277949353, "grad_norm": 5.902418613433838, "learning_rate": 9.57442865966646e-07, "loss": 0.1324, "step": 3450 }, { "epoch": 2.1321803582458307, "grad_norm": 9.569107055664062, "learning_rate": 9.574181593576282e-07, "loss": 0.3056, "step": 3452 }, { "epoch": 2.1334156886967266, "grad_norm": 3.747103214263916, "learning_rate": 9.573934527486102e-07, "loss": 0.1832, "step": 3454 }, { "epoch": 2.134651019147622, "grad_norm": 6.08473014831543, "learning_rate": 9.573687461395924e-07, "loss": 0.2199, "step": 3456 }, { "epoch": 2.1358863495985174, "grad_norm": 5.142740249633789, "learning_rate": 9.573440395305744e-07, "loss": 0.1664, "step": 3458 }, { "epoch": 2.1371216800494133, "grad_norm": 9.792118072509766, "learning_rate": 9.573193329215563e-07, "loss": 0.1974, "step": 3460 }, { "epoch": 2.1383570105003087, "grad_norm": 4.78532600402832, "learning_rate": 9.572946263125386e-07, "loss": 0.1018, "step": 3462 }, { "epoch": 2.1395923409512045, "grad_norm": 9.019325256347656, "learning_rate": 9.572699197035208e-07, "loss": 0.1667, "step": 3464 }, { "epoch": 2.1408276714021, "grad_norm": 5.921115398406982, "learning_rate": 9.572452130945027e-07, "loss": 0.0941, "step": 3466 }, { "epoch": 2.142063001852996, "grad_norm": 5.828497409820557, "learning_rate": 9.572205064854847e-07, "loss": 0.1918, "step": 3468 }, { "epoch": 2.143298332303891, "grad_norm": 11.955028533935547, "learning_rate": 9.57195799876467e-07, "loss": 0.4252, "step": 3470 }, { "epoch": 2.144533662754787, "grad_norm": 8.407090187072754, "learning_rate": 9.571710932674492e-07, "loss": 0.1546, "step": 3472 }, { "epoch": 2.1457689932056825, "grad_norm": 7.282629489898682, "learning_rate": 9.571463866584311e-07, "loss": 0.2318, "step": 3474 }, { "epoch": 2.1470043236565783, "grad_norm": 18.85344886779785, "learning_rate": 9.571216800494131e-07, "loss": 0.3146, "step": 3476 }, { "epoch": 2.1482396541074738, "grad_norm": 7.841081619262695, "learning_rate": 9.570969734403953e-07, "loss": 0.2504, "step": 3478 }, { "epoch": 2.149474984558369, "grad_norm": 2.5359416007995605, "learning_rate": 9.570722668313773e-07, "loss": 0.1532, "step": 3480 }, { "epoch": 2.150710315009265, "grad_norm": 10.394577026367188, "learning_rate": 9.570475602223595e-07, "loss": 0.2155, "step": 3482 }, { "epoch": 2.1519456454601604, "grad_norm": 9.885597229003906, "learning_rate": 9.570228536133415e-07, "loss": 0.2427, "step": 3484 }, { "epoch": 2.1531809759110563, "grad_norm": 7.818650722503662, "learning_rate": 9.569981470043235e-07, "loss": 0.1156, "step": 3486 }, { "epoch": 2.1544163063619517, "grad_norm": 6.966557502746582, "learning_rate": 9.569734403953057e-07, "loss": 0.2115, "step": 3488 }, { "epoch": 2.1556516368128475, "grad_norm": 8.153482437133789, "learning_rate": 9.569487337862877e-07, "loss": 0.1875, "step": 3490 }, { "epoch": 2.156886967263743, "grad_norm": 8.690757751464844, "learning_rate": 9.5692402717727e-07, "loss": 0.3058, "step": 3492 }, { "epoch": 2.158122297714639, "grad_norm": 7.058000564575195, "learning_rate": 9.56899320568252e-07, "loss": 0.157, "step": 3494 }, { "epoch": 2.1593576281655342, "grad_norm": 8.35284423828125, "learning_rate": 9.568746139592341e-07, "loss": 0.3036, "step": 3496 }, { "epoch": 2.16059295861643, "grad_norm": 5.160130023956299, "learning_rate": 9.568499073502161e-07, "loss": 0.1335, "step": 3498 }, { "epoch": 2.1618282890673255, "grad_norm": 3.5157930850982666, "learning_rate": 9.568252007411981e-07, "loss": 0.0646, "step": 3500 }, { "epoch": 2.1618282890673255, "eval_cer": 0.05552862978605553, "eval_loss": 0.25218600034713745, "eval_runtime": 63.7558, "eval_samples_per_second": 12.877, "eval_steps_per_second": 1.616, "step": 3500 }, { "epoch": 2.163063619518221, "grad_norm": 6.975582122802734, "learning_rate": 9.568004941321803e-07, "loss": 0.1673, "step": 3502 }, { "epoch": 2.1642989499691168, "grad_norm": 9.23320484161377, "learning_rate": 9.567757875231623e-07, "loss": 0.271, "step": 3504 }, { "epoch": 2.165534280420012, "grad_norm": 2.6776204109191895, "learning_rate": 9.567510809141445e-07, "loss": 0.0433, "step": 3506 }, { "epoch": 2.166769610870908, "grad_norm": 3.342407464981079, "learning_rate": 9.567263743051265e-07, "loss": 0.1068, "step": 3508 }, { "epoch": 2.1680049413218034, "grad_norm": 1.4430954456329346, "learning_rate": 9.567016676961087e-07, "loss": 0.1445, "step": 3510 }, { "epoch": 2.1692402717726993, "grad_norm": 21.222612380981445, "learning_rate": 9.566769610870907e-07, "loss": 0.2789, "step": 3512 }, { "epoch": 2.1704756022235947, "grad_norm": 9.635834693908691, "learning_rate": 9.56652254478073e-07, "loss": 0.3227, "step": 3514 }, { "epoch": 2.1717109326744906, "grad_norm": 6.770580291748047, "learning_rate": 9.56627547869055e-07, "loss": 0.187, "step": 3516 }, { "epoch": 2.172946263125386, "grad_norm": 10.383820533752441, "learning_rate": 9.56602841260037e-07, "loss": 0.2203, "step": 3518 }, { "epoch": 2.174181593576282, "grad_norm": 5.551806449890137, "learning_rate": 9.565781346510191e-07, "loss": 0.2317, "step": 3520 }, { "epoch": 2.1754169240271772, "grad_norm": 5.682162284851074, "learning_rate": 9.565534280420013e-07, "loss": 0.1961, "step": 3522 }, { "epoch": 2.176652254478073, "grad_norm": 11.410911560058594, "learning_rate": 9.565287214329833e-07, "loss": 0.3074, "step": 3524 }, { "epoch": 2.1778875849289685, "grad_norm": 4.387307643890381, "learning_rate": 9.565040148239653e-07, "loss": 0.0746, "step": 3526 }, { "epoch": 2.179122915379864, "grad_norm": 9.8275146484375, "learning_rate": 9.564793082149475e-07, "loss": 0.2432, "step": 3528 }, { "epoch": 2.1803582458307598, "grad_norm": 6.207986831665039, "learning_rate": 9.564546016059295e-07, "loss": 0.1518, "step": 3530 }, { "epoch": 2.181593576281655, "grad_norm": 12.782702445983887, "learning_rate": 9.564298949969117e-07, "loss": 0.0905, "step": 3532 }, { "epoch": 2.182828906732551, "grad_norm": 4.792867660522461, "learning_rate": 9.564051883878937e-07, "loss": 0.1387, "step": 3534 }, { "epoch": 2.1840642371834464, "grad_norm": 6.4039177894592285, "learning_rate": 9.563804817788757e-07, "loss": 0.1691, "step": 3536 }, { "epoch": 2.1852995676343423, "grad_norm": 10.42359733581543, "learning_rate": 9.56355775169858e-07, "loss": 0.4894, "step": 3538 }, { "epoch": 2.1865348980852377, "grad_norm": 20.60373306274414, "learning_rate": 9.5633106856084e-07, "loss": 0.1195, "step": 3540 }, { "epoch": 2.1877702285361336, "grad_norm": 6.780261516571045, "learning_rate": 9.56306361951822e-07, "loss": 0.1092, "step": 3542 }, { "epoch": 2.189005558987029, "grad_norm": 7.713423728942871, "learning_rate": 9.56281655342804e-07, "loss": 0.1565, "step": 3544 }, { "epoch": 2.190240889437925, "grad_norm": 11.635817527770996, "learning_rate": 9.562569487337863e-07, "loss": 0.2821, "step": 3546 }, { "epoch": 2.1914762198888202, "grad_norm": 8.521610260009766, "learning_rate": 9.562322421247683e-07, "loss": 0.3762, "step": 3548 }, { "epoch": 2.192711550339716, "grad_norm": 6.567923545837402, "learning_rate": 9.562075355157505e-07, "loss": 0.2428, "step": 3550 }, { "epoch": 2.1939468807906115, "grad_norm": 3.3307063579559326, "learning_rate": 9.561828289067325e-07, "loss": 0.0951, "step": 3552 }, { "epoch": 2.195182211241507, "grad_norm": 2.075441360473633, "learning_rate": 9.561581222977147e-07, "loss": 0.0746, "step": 3554 }, { "epoch": 2.1964175416924028, "grad_norm": 12.22225570678711, "learning_rate": 9.561334156886967e-07, "loss": 0.1656, "step": 3556 }, { "epoch": 2.197652872143298, "grad_norm": 5.1489129066467285, "learning_rate": 9.561087090796787e-07, "loss": 0.1185, "step": 3558 }, { "epoch": 2.198888202594194, "grad_norm": 4.30659818649292, "learning_rate": 9.560840024706609e-07, "loss": 0.0941, "step": 3560 }, { "epoch": 2.2001235330450895, "grad_norm": 7.605743885040283, "learning_rate": 9.560592958616429e-07, "loss": 0.2616, "step": 3562 }, { "epoch": 2.2013588634959853, "grad_norm": 9.99692440032959, "learning_rate": 9.56034589252625e-07, "loss": 0.3675, "step": 3564 }, { "epoch": 2.2025941939468807, "grad_norm": 5.656116962432861, "learning_rate": 9.56009882643607e-07, "loss": 0.1325, "step": 3566 }, { "epoch": 2.2038295243977766, "grad_norm": 6.527948379516602, "learning_rate": 9.55985176034589e-07, "loss": 0.1604, "step": 3568 }, { "epoch": 2.205064854848672, "grad_norm": 4.379159927368164, "learning_rate": 9.559604694255713e-07, "loss": 0.0916, "step": 3570 }, { "epoch": 2.206300185299568, "grad_norm": 5.951384544372559, "learning_rate": 9.559357628165535e-07, "loss": 0.0825, "step": 3572 }, { "epoch": 2.2075355157504633, "grad_norm": 5.5376362800598145, "learning_rate": 9.559110562075355e-07, "loss": 0.1526, "step": 3574 }, { "epoch": 2.2087708462013587, "grad_norm": 6.315401554107666, "learning_rate": 9.558863495985175e-07, "loss": 0.0804, "step": 3576 }, { "epoch": 2.2100061766522545, "grad_norm": 22.974590301513672, "learning_rate": 9.558616429894997e-07, "loss": 0.2694, "step": 3578 }, { "epoch": 2.21124150710315, "grad_norm": 4.472351551055908, "learning_rate": 9.558369363804817e-07, "loss": 0.5308, "step": 3580 }, { "epoch": 2.212476837554046, "grad_norm": 8.186083793640137, "learning_rate": 9.558122297714639e-07, "loss": 0.2994, "step": 3582 }, { "epoch": 2.213712168004941, "grad_norm": 4.629955291748047, "learning_rate": 9.557875231624459e-07, "loss": 0.0909, "step": 3584 }, { "epoch": 2.214947498455837, "grad_norm": 4.958153247833252, "learning_rate": 9.55762816553428e-07, "loss": 0.0688, "step": 3586 }, { "epoch": 2.2161828289067325, "grad_norm": 8.825881958007812, "learning_rate": 9.5573810994441e-07, "loss": 0.2303, "step": 3588 }, { "epoch": 2.2174181593576283, "grad_norm": 6.924987316131592, "learning_rate": 9.557134033353923e-07, "loss": 0.1824, "step": 3590 }, { "epoch": 2.2186534898085237, "grad_norm": 5.005072116851807, "learning_rate": 9.556886967263743e-07, "loss": 0.1485, "step": 3592 }, { "epoch": 2.2198888202594196, "grad_norm": 7.166744232177734, "learning_rate": 9.556639901173562e-07, "loss": 0.1607, "step": 3594 }, { "epoch": 2.221124150710315, "grad_norm": 5.844821929931641, "learning_rate": 9.556392835083385e-07, "loss": 0.0538, "step": 3596 }, { "epoch": 2.2223594811612104, "grad_norm": 9.589033126831055, "learning_rate": 9.556145768993207e-07, "loss": 0.6961, "step": 3598 }, { "epoch": 2.2235948116121063, "grad_norm": 3.9977200031280518, "learning_rate": 9.555898702903026e-07, "loss": 0.0769, "step": 3600 }, { "epoch": 2.2248301420630017, "grad_norm": 3.9579625129699707, "learning_rate": 9.555651636812846e-07, "loss": 0.293, "step": 3602 }, { "epoch": 2.2260654725138975, "grad_norm": 6.283119201660156, "learning_rate": 9.555404570722668e-07, "loss": 0.1754, "step": 3604 }, { "epoch": 2.227300802964793, "grad_norm": 3.349116325378418, "learning_rate": 9.555157504632488e-07, "loss": 0.1036, "step": 3606 }, { "epoch": 2.228536133415689, "grad_norm": 7.26770544052124, "learning_rate": 9.55491043854231e-07, "loss": 0.2189, "step": 3608 }, { "epoch": 2.229771463866584, "grad_norm": 4.5853400230407715, "learning_rate": 9.55466337245213e-07, "loss": 0.1298, "step": 3610 }, { "epoch": 2.23100679431748, "grad_norm": 6.061213493347168, "learning_rate": 9.55441630636195e-07, "loss": 0.1188, "step": 3612 }, { "epoch": 2.2322421247683755, "grad_norm": 5.719973087310791, "learning_rate": 9.554169240271772e-07, "loss": 0.1747, "step": 3614 }, { "epoch": 2.2334774552192713, "grad_norm": 6.698716640472412, "learning_rate": 9.553922174181594e-07, "loss": 0.0985, "step": 3616 }, { "epoch": 2.2347127856701667, "grad_norm": 6.049123764038086, "learning_rate": 9.553675108091414e-07, "loss": 0.0816, "step": 3618 }, { "epoch": 2.2359481161210626, "grad_norm": 3.9003171920776367, "learning_rate": 9.553428042001234e-07, "loss": 0.1063, "step": 3620 }, { "epoch": 2.237183446571958, "grad_norm": 6.264972686767578, "learning_rate": 9.553180975911056e-07, "loss": 0.194, "step": 3622 }, { "epoch": 2.2384187770228534, "grad_norm": 10.404424667358398, "learning_rate": 9.552933909820876e-07, "loss": 0.0991, "step": 3624 }, { "epoch": 2.2396541074737493, "grad_norm": 3.9255683422088623, "learning_rate": 9.552686843730698e-07, "loss": 0.0596, "step": 3626 }, { "epoch": 2.2408894379246447, "grad_norm": 2.485140800476074, "learning_rate": 9.552439777640518e-07, "loss": 0.1736, "step": 3628 }, { "epoch": 2.2421247683755405, "grad_norm": 4.42326545715332, "learning_rate": 9.55219271155034e-07, "loss": 0.2119, "step": 3630 }, { "epoch": 2.243360098826436, "grad_norm": 5.0218682289123535, "learning_rate": 9.55194564546016e-07, "loss": 0.1654, "step": 3632 }, { "epoch": 2.244595429277332, "grad_norm": 6.198904991149902, "learning_rate": 9.55169857936998e-07, "loss": 0.1155, "step": 3634 }, { "epoch": 2.245830759728227, "grad_norm": 6.18739128112793, "learning_rate": 9.551451513279802e-07, "loss": 0.0587, "step": 3636 }, { "epoch": 2.247066090179123, "grad_norm": 8.361427307128906, "learning_rate": 9.551204447189622e-07, "loss": 0.3413, "step": 3638 }, { "epoch": 2.2483014206300185, "grad_norm": 9.919855117797852, "learning_rate": 9.550957381099444e-07, "loss": 0.1173, "step": 3640 }, { "epoch": 2.2495367510809143, "grad_norm": 2.583134174346924, "learning_rate": 9.550710315009264e-07, "loss": 0.0458, "step": 3642 }, { "epoch": 2.2507720815318097, "grad_norm": 5.473998069763184, "learning_rate": 9.550463248919084e-07, "loss": 0.1648, "step": 3644 }, { "epoch": 2.2520074119827056, "grad_norm": 10.036271095275879, "learning_rate": 9.550216182828906e-07, "loss": 0.1555, "step": 3646 }, { "epoch": 2.253242742433601, "grad_norm": 7.684455394744873, "learning_rate": 9.549969116738728e-07, "loss": 0.4086, "step": 3648 }, { "epoch": 2.2544780728844964, "grad_norm": 6.014695167541504, "learning_rate": 9.549722050648548e-07, "loss": 0.1272, "step": 3650 }, { "epoch": 2.2557134033353923, "grad_norm": 4.685242652893066, "learning_rate": 9.549474984558368e-07, "loss": 0.1368, "step": 3652 }, { "epoch": 2.2569487337862877, "grad_norm": 2.7646782398223877, "learning_rate": 9.54922791846819e-07, "loss": 0.1896, "step": 3654 }, { "epoch": 2.2581840642371835, "grad_norm": 5.321026802062988, "learning_rate": 9.54898085237801e-07, "loss": 0.0932, "step": 3656 }, { "epoch": 2.259419394688079, "grad_norm": 6.085170745849609, "learning_rate": 9.548733786287832e-07, "loss": 0.1226, "step": 3658 }, { "epoch": 2.260654725138975, "grad_norm": 9.636940002441406, "learning_rate": 9.548486720197652e-07, "loss": 0.1119, "step": 3660 }, { "epoch": 2.26189005558987, "grad_norm": 6.082615852355957, "learning_rate": 9.548239654107472e-07, "loss": 0.1665, "step": 3662 }, { "epoch": 2.263125386040766, "grad_norm": 7.991528034210205, "learning_rate": 9.547992588017294e-07, "loss": 0.133, "step": 3664 }, { "epoch": 2.2643607164916615, "grad_norm": 8.48002815246582, "learning_rate": 9.547745521927116e-07, "loss": 0.209, "step": 3666 }, { "epoch": 2.265596046942557, "grad_norm": 9.722286224365234, "learning_rate": 9.547498455836936e-07, "loss": 0.2787, "step": 3668 }, { "epoch": 2.2668313773934528, "grad_norm": 8.717191696166992, "learning_rate": 9.547251389746756e-07, "loss": 0.2313, "step": 3670 }, { "epoch": 2.2680667078443486, "grad_norm": 6.808514595031738, "learning_rate": 9.547004323656578e-07, "loss": 0.0982, "step": 3672 }, { "epoch": 2.269302038295244, "grad_norm": 3.2684834003448486, "learning_rate": 9.5467572575664e-07, "loss": 0.045, "step": 3674 }, { "epoch": 2.2705373687461394, "grad_norm": 5.994133472442627, "learning_rate": 9.54651019147622e-07, "loss": 0.1111, "step": 3676 }, { "epoch": 2.2717726991970353, "grad_norm": 6.282434940338135, "learning_rate": 9.54626312538604e-07, "loss": 0.1846, "step": 3678 }, { "epoch": 2.2730080296479307, "grad_norm": 4.145854473114014, "learning_rate": 9.546016059295862e-07, "loss": 0.1704, "step": 3680 }, { "epoch": 2.2742433600988265, "grad_norm": 10.434826850891113, "learning_rate": 9.545768993205682e-07, "loss": 0.3539, "step": 3682 }, { "epoch": 2.275478690549722, "grad_norm": 5.327663898468018, "learning_rate": 9.545521927115504e-07, "loss": 0.1416, "step": 3684 }, { "epoch": 2.276714021000618, "grad_norm": 3.8038341999053955, "learning_rate": 9.545274861025324e-07, "loss": 0.1388, "step": 3686 }, { "epoch": 2.2779493514515132, "grad_norm": 5.33845853805542, "learning_rate": 9.545027794935144e-07, "loss": 0.1412, "step": 3688 }, { "epoch": 2.279184681902409, "grad_norm": 3.5018458366394043, "learning_rate": 9.544780728844966e-07, "loss": 0.1003, "step": 3690 }, { "epoch": 2.2804200123533045, "grad_norm": 4.569183349609375, "learning_rate": 9.544533662754786e-07, "loss": 0.0768, "step": 3692 }, { "epoch": 2.2816553428042, "grad_norm": 7.940682411193848, "learning_rate": 9.544286596664608e-07, "loss": 0.1832, "step": 3694 }, { "epoch": 2.2828906732550958, "grad_norm": 4.654417991638184, "learning_rate": 9.544039530574428e-07, "loss": 0.0927, "step": 3696 }, { "epoch": 2.284126003705991, "grad_norm": 7.326111316680908, "learning_rate": 9.54379246448425e-07, "loss": 0.1713, "step": 3698 }, { "epoch": 2.285361334156887, "grad_norm": 10.418829917907715, "learning_rate": 9.54354539839407e-07, "loss": 0.1198, "step": 3700 }, { "epoch": 2.2865966646077824, "grad_norm": 6.58595609664917, "learning_rate": 9.54329833230389e-07, "loss": 0.126, "step": 3702 }, { "epoch": 2.2878319950586783, "grad_norm": 7.397129058837891, "learning_rate": 9.543051266213712e-07, "loss": 0.2661, "step": 3704 }, { "epoch": 2.2890673255095737, "grad_norm": 6.1796393394470215, "learning_rate": 9.542804200123534e-07, "loss": 0.1806, "step": 3706 }, { "epoch": 2.2903026559604696, "grad_norm": 4.962209701538086, "learning_rate": 9.542557134033354e-07, "loss": 0.1659, "step": 3708 }, { "epoch": 2.291537986411365, "grad_norm": 8.580303192138672, "learning_rate": 9.542310067943174e-07, "loss": 0.2842, "step": 3710 }, { "epoch": 2.292773316862261, "grad_norm": 5.785215377807617, "learning_rate": 9.542063001852996e-07, "loss": 0.0773, "step": 3712 }, { "epoch": 2.2940086473131562, "grad_norm": 8.221529960632324, "learning_rate": 9.541815935762816e-07, "loss": 0.2276, "step": 3714 }, { "epoch": 2.295243977764052, "grad_norm": 2.3973002433776855, "learning_rate": 9.541568869672638e-07, "loss": 0.052, "step": 3716 }, { "epoch": 2.2964793082149475, "grad_norm": 5.817103385925293, "learning_rate": 9.541321803582458e-07, "loss": 0.2501, "step": 3718 }, { "epoch": 2.297714638665843, "grad_norm": 6.00213098526001, "learning_rate": 9.541074737492278e-07, "loss": 0.0814, "step": 3720 }, { "epoch": 2.2989499691167388, "grad_norm": 4.891816139221191, "learning_rate": 9.5408276714021e-07, "loss": 0.0778, "step": 3722 }, { "epoch": 2.300185299567634, "grad_norm": 9.537749290466309, "learning_rate": 9.540580605311922e-07, "loss": 0.18, "step": 3724 }, { "epoch": 2.30142063001853, "grad_norm": 11.095427513122559, "learning_rate": 9.540333539221742e-07, "loss": 0.2845, "step": 3726 }, { "epoch": 2.3026559604694254, "grad_norm": 5.3335490226745605, "learning_rate": 9.540086473131561e-07, "loss": 0.2137, "step": 3728 }, { "epoch": 2.3038912909203213, "grad_norm": 6.516895771026611, "learning_rate": 9.539839407041384e-07, "loss": 0.2098, "step": 3730 }, { "epoch": 2.3051266213712167, "grad_norm": 5.4740071296691895, "learning_rate": 9.539592340951203e-07, "loss": 0.2881, "step": 3732 }, { "epoch": 2.3063619518221126, "grad_norm": 8.303621292114258, "learning_rate": 9.539345274861026e-07, "loss": 0.1376, "step": 3734 }, { "epoch": 2.307597282273008, "grad_norm": 9.870479583740234, "learning_rate": 9.539098208770845e-07, "loss": 0.142, "step": 3736 }, { "epoch": 2.308832612723904, "grad_norm": 5.984689235687256, "learning_rate": 9.538851142680665e-07, "loss": 0.0996, "step": 3738 }, { "epoch": 2.3100679431747992, "grad_norm": 4.042320728302002, "learning_rate": 9.538604076590487e-07, "loss": 0.1051, "step": 3740 }, { "epoch": 2.311303273625695, "grad_norm": 7.933021068572998, "learning_rate": 9.53835701050031e-07, "loss": 0.2325, "step": 3742 }, { "epoch": 2.3125386040765905, "grad_norm": 13.434671401977539, "learning_rate": 9.53810994441013e-07, "loss": 0.2887, "step": 3744 }, { "epoch": 2.313773934527486, "grad_norm": 3.207671642303467, "learning_rate": 9.53786287831995e-07, "loss": 0.0728, "step": 3746 }, { "epoch": 2.3150092649783818, "grad_norm": 5.902285099029541, "learning_rate": 9.537615812229771e-07, "loss": 0.075, "step": 3748 }, { "epoch": 2.316244595429277, "grad_norm": 10.490276336669922, "learning_rate": 9.537368746139593e-07, "loss": 0.3216, "step": 3750 }, { "epoch": 2.317479925880173, "grad_norm": 5.872098922729492, "learning_rate": 9.537121680049413e-07, "loss": 0.1344, "step": 3752 }, { "epoch": 2.3187152563310685, "grad_norm": 4.639268398284912, "learning_rate": 9.536874613959233e-07, "loss": 0.0914, "step": 3754 }, { "epoch": 2.3199505867819643, "grad_norm": 3.7453665733337402, "learning_rate": 9.536627547869055e-07, "loss": 0.0877, "step": 3756 }, { "epoch": 2.3211859172328597, "grad_norm": 3.911614179611206, "learning_rate": 9.536380481778875e-07, "loss": 0.2641, "step": 3758 }, { "epoch": 2.3224212476837556, "grad_norm": 0.9541102051734924, "learning_rate": 9.536133415688696e-07, "loss": 0.144, "step": 3760 }, { "epoch": 2.323656578134651, "grad_norm": 7.825087547302246, "learning_rate": 9.535886349598517e-07, "loss": 0.1522, "step": 3762 }, { "epoch": 2.3248919085855464, "grad_norm": 2.109666585922241, "learning_rate": 9.535639283508337e-07, "loss": 0.1242, "step": 3764 }, { "epoch": 2.3261272390364423, "grad_norm": 4.002219200134277, "learning_rate": 9.535392217418159e-07, "loss": 0.1197, "step": 3766 }, { "epoch": 2.327362569487338, "grad_norm": 5.290385723114014, "learning_rate": 9.53514515132798e-07, "loss": 0.239, "step": 3768 }, { "epoch": 2.3285978999382335, "grad_norm": 7.236030578613281, "learning_rate": 9.5348980852378e-07, "loss": 0.1139, "step": 3770 }, { "epoch": 2.329833230389129, "grad_norm": 7.448349952697754, "learning_rate": 9.534651019147621e-07, "loss": 0.1072, "step": 3772 }, { "epoch": 2.331068560840025, "grad_norm": 8.57740592956543, "learning_rate": 9.534403953057443e-07, "loss": 0.1942, "step": 3774 }, { "epoch": 2.33230389129092, "grad_norm": 0.4678352177143097, "learning_rate": 9.534156886967263e-07, "loss": 0.1508, "step": 3776 }, { "epoch": 2.333539221741816, "grad_norm": 3.6598942279815674, "learning_rate": 9.533909820877084e-07, "loss": 0.061, "step": 3778 }, { "epoch": 2.3347745521927115, "grad_norm": 2.943610906600952, "learning_rate": 9.533662754786905e-07, "loss": 0.0334, "step": 3780 }, { "epoch": 2.3360098826436073, "grad_norm": 5.683017730712891, "learning_rate": 9.533415688696725e-07, "loss": 0.1197, "step": 3782 }, { "epoch": 2.3372452130945027, "grad_norm": 10.841135025024414, "learning_rate": 9.533168622606547e-07, "loss": 0.1356, "step": 3784 }, { "epoch": 2.3384805435453986, "grad_norm": 5.867833614349365, "learning_rate": 9.532921556516368e-07, "loss": 0.1131, "step": 3786 }, { "epoch": 2.339715873996294, "grad_norm": 7.845891952514648, "learning_rate": 9.532674490426189e-07, "loss": 0.2293, "step": 3788 }, { "epoch": 2.3409512044471894, "grad_norm": 1.9645183086395264, "learning_rate": 9.532427424336009e-07, "loss": 0.0923, "step": 3790 }, { "epoch": 2.3421865348980853, "grad_norm": 6.4359846115112305, "learning_rate": 9.53218035824583e-07, "loss": 0.1308, "step": 3792 }, { "epoch": 2.3434218653489807, "grad_norm": 11.398540496826172, "learning_rate": 9.531933292155652e-07, "loss": 0.3587, "step": 3794 }, { "epoch": 2.3446571957998765, "grad_norm": 7.472131729125977, "learning_rate": 9.531686226065472e-07, "loss": 0.2665, "step": 3796 }, { "epoch": 2.345892526250772, "grad_norm": 9.334091186523438, "learning_rate": 9.531439159975293e-07, "loss": 0.35, "step": 3798 }, { "epoch": 2.347127856701668, "grad_norm": 7.1718668937683105, "learning_rate": 9.531192093885114e-07, "loss": 0.1594, "step": 3800 }, { "epoch": 2.348363187152563, "grad_norm": 8.676079750061035, "learning_rate": 9.530945027794934e-07, "loss": 0.1551, "step": 3802 }, { "epoch": 2.349598517603459, "grad_norm": 9.056697845458984, "learning_rate": 9.530697961704756e-07, "loss": 0.1267, "step": 3804 }, { "epoch": 2.3508338480543545, "grad_norm": 7.402749538421631, "learning_rate": 9.530450895614577e-07, "loss": 0.1347, "step": 3806 }, { "epoch": 2.3520691785052503, "grad_norm": 2.693100929260254, "learning_rate": 9.530203829524397e-07, "loss": 0.0898, "step": 3808 }, { "epoch": 2.3533045089561457, "grad_norm": 8.89450740814209, "learning_rate": 9.529956763434218e-07, "loss": 0.2209, "step": 3810 }, { "epoch": 2.3545398394070416, "grad_norm": 5.573294639587402, "learning_rate": 9.52970969734404e-07, "loss": 0.1419, "step": 3812 }, { "epoch": 2.355775169857937, "grad_norm": 10.316119194030762, "learning_rate": 9.52946263125386e-07, "loss": 0.2082, "step": 3814 }, { "epoch": 2.3570105003088324, "grad_norm": 14.548480033874512, "learning_rate": 9.529215565163681e-07, "loss": 0.4077, "step": 3816 }, { "epoch": 2.3582458307597283, "grad_norm": 1.6657311916351318, "learning_rate": 9.528968499073502e-07, "loss": 0.0906, "step": 3818 }, { "epoch": 2.3594811612106237, "grad_norm": 10.985822677612305, "learning_rate": 9.528721432983322e-07, "loss": 0.2725, "step": 3820 }, { "epoch": 2.3607164916615195, "grad_norm": 7.2303853034973145, "learning_rate": 9.528474366893144e-07, "loss": 0.0834, "step": 3822 }, { "epoch": 2.361951822112415, "grad_norm": 9.193991661071777, "learning_rate": 9.528227300802965e-07, "loss": 0.1525, "step": 3824 }, { "epoch": 2.363187152563311, "grad_norm": 9.35415267944336, "learning_rate": 9.527980234712786e-07, "loss": 0.2252, "step": 3826 }, { "epoch": 2.364422483014206, "grad_norm": 8.192651748657227, "learning_rate": 9.527733168622606e-07, "loss": 0.1401, "step": 3828 }, { "epoch": 2.365657813465102, "grad_norm": 5.4562811851501465, "learning_rate": 9.527486102532427e-07, "loss": 0.1182, "step": 3830 }, { "epoch": 2.3668931439159975, "grad_norm": 7.854249477386475, "learning_rate": 9.527239036442249e-07, "loss": 0.2395, "step": 3832 }, { "epoch": 2.3681284743668933, "grad_norm": 12.927903175354004, "learning_rate": 9.526991970352069e-07, "loss": 0.4697, "step": 3834 }, { "epoch": 2.3693638048177887, "grad_norm": 8.10112476348877, "learning_rate": 9.52674490426189e-07, "loss": 0.1534, "step": 3836 }, { "epoch": 2.3705991352686846, "grad_norm": 7.712443828582764, "learning_rate": 9.526497838171711e-07, "loss": 0.1354, "step": 3838 }, { "epoch": 2.37183446571958, "grad_norm": 5.303149700164795, "learning_rate": 9.526250772081531e-07, "loss": 0.3051, "step": 3840 }, { "epoch": 2.3730697961704754, "grad_norm": 5.470002174377441, "learning_rate": 9.526003705991353e-07, "loss": 0.2918, "step": 3842 }, { "epoch": 2.3743051266213713, "grad_norm": 9.508384704589844, "learning_rate": 9.525756639901174e-07, "loss": 0.089, "step": 3844 }, { "epoch": 2.3755404570722667, "grad_norm": 10.02641773223877, "learning_rate": 9.525509573810994e-07, "loss": 0.2682, "step": 3846 }, { "epoch": 2.3767757875231625, "grad_norm": 6.910661220550537, "learning_rate": 9.525262507720815e-07, "loss": 0.178, "step": 3848 }, { "epoch": 2.378011117974058, "grad_norm": 6.066755771636963, "learning_rate": 9.525015441630636e-07, "loss": 0.0704, "step": 3850 }, { "epoch": 2.379246448424954, "grad_norm": 9.829758644104004, "learning_rate": 9.524768375540457e-07, "loss": 0.1591, "step": 3852 }, { "epoch": 2.380481778875849, "grad_norm": 2.51155161857605, "learning_rate": 9.524521309450278e-07, "loss": 0.0614, "step": 3854 }, { "epoch": 2.381717109326745, "grad_norm": 4.153083801269531, "learning_rate": 9.524274243360099e-07, "loss": 0.1192, "step": 3856 }, { "epoch": 2.3829524397776405, "grad_norm": 4.179171562194824, "learning_rate": 9.524027177269918e-07, "loss": 0.1518, "step": 3858 }, { "epoch": 2.384187770228536, "grad_norm": 6.5246124267578125, "learning_rate": 9.52378011117974e-07, "loss": 0.1493, "step": 3860 }, { "epoch": 2.3854231006794318, "grad_norm": 6.626105785369873, "learning_rate": 9.523533045089562e-07, "loss": 0.0565, "step": 3862 }, { "epoch": 2.3866584311303276, "grad_norm": 14.364203453063965, "learning_rate": 9.523285978999381e-07, "loss": 0.4846, "step": 3864 }, { "epoch": 2.387893761581223, "grad_norm": 5.689853668212891, "learning_rate": 9.523038912909202e-07, "loss": 0.1606, "step": 3866 }, { "epoch": 2.3891290920321184, "grad_norm": 8.042296409606934, "learning_rate": 9.522791846819023e-07, "loss": 0.1173, "step": 3868 }, { "epoch": 2.3903644224830143, "grad_norm": 3.9178924560546875, "learning_rate": 9.522544780728845e-07, "loss": 0.1425, "step": 3870 }, { "epoch": 2.3915997529339097, "grad_norm": 6.12578010559082, "learning_rate": 9.522297714638665e-07, "loss": 0.2382, "step": 3872 }, { "epoch": 2.3928350833848056, "grad_norm": 4.467011451721191, "learning_rate": 9.522050648548486e-07, "loss": 0.0354, "step": 3874 }, { "epoch": 2.394070413835701, "grad_norm": 12.433756828308105, "learning_rate": 9.521803582458307e-07, "loss": 0.5009, "step": 3876 }, { "epoch": 2.395305744286597, "grad_norm": 9.745952606201172, "learning_rate": 9.521556516368127e-07, "loss": 0.2103, "step": 3878 }, { "epoch": 2.3965410747374922, "grad_norm": 6.655928134918213, "learning_rate": 9.521309450277949e-07, "loss": 0.3155, "step": 3880 }, { "epoch": 2.397776405188388, "grad_norm": 2.7288780212402344, "learning_rate": 9.52106238418777e-07, "loss": 0.0525, "step": 3882 }, { "epoch": 2.3990117356392835, "grad_norm": 3.2606616020202637, "learning_rate": 9.52081531809759e-07, "loss": 0.0439, "step": 3884 }, { "epoch": 2.400247066090179, "grad_norm": 6.175978183746338, "learning_rate": 9.520568252007411e-07, "loss": 0.247, "step": 3886 }, { "epoch": 2.4014823965410748, "grad_norm": 3.1601083278656006, "learning_rate": 9.520321185917232e-07, "loss": 0.0608, "step": 3888 }, { "epoch": 2.40271772699197, "grad_norm": 12.997496604919434, "learning_rate": 9.520074119827053e-07, "loss": 0.3569, "step": 3890 }, { "epoch": 2.403953057442866, "grad_norm": 5.787109851837158, "learning_rate": 9.519827053736874e-07, "loss": 0.1897, "step": 3892 }, { "epoch": 2.4051883878937614, "grad_norm": 21.276777267456055, "learning_rate": 9.519579987646695e-07, "loss": 0.3682, "step": 3894 }, { "epoch": 2.4064237183446573, "grad_norm": 11.816195487976074, "learning_rate": 9.519332921556515e-07, "loss": 0.3021, "step": 3896 }, { "epoch": 2.4076590487955527, "grad_norm": 8.888312339782715, "learning_rate": 9.519085855466336e-07, "loss": 0.3456, "step": 3898 }, { "epoch": 2.4088943792464486, "grad_norm": 9.906089782714844, "learning_rate": 9.518838789376158e-07, "loss": 0.1798, "step": 3900 }, { "epoch": 2.410129709697344, "grad_norm": 17.369340896606445, "learning_rate": 9.518591723285978e-07, "loss": 0.2326, "step": 3902 }, { "epoch": 2.41136504014824, "grad_norm": 11.39885139465332, "learning_rate": 9.518344657195799e-07, "loss": 0.3123, "step": 3904 }, { "epoch": 2.4126003705991352, "grad_norm": 5.58793306350708, "learning_rate": 9.51809759110562e-07, "loss": 0.1099, "step": 3906 }, { "epoch": 2.413835701050031, "grad_norm": 7.296766757965088, "learning_rate": 9.517850525015442e-07, "loss": 0.1791, "step": 3908 }, { "epoch": 2.4150710315009265, "grad_norm": 5.956603050231934, "learning_rate": 9.517603458925262e-07, "loss": 0.1179, "step": 3910 }, { "epoch": 2.416306361951822, "grad_norm": 4.608818054199219, "learning_rate": 9.517356392835083e-07, "loss": 0.0548, "step": 3912 }, { "epoch": 2.4175416924027178, "grad_norm": 8.254487037658691, "learning_rate": 9.517109326744904e-07, "loss": 0.1748, "step": 3914 }, { "epoch": 2.418777022853613, "grad_norm": 6.881091594696045, "learning_rate": 9.516862260654724e-07, "loss": 0.1337, "step": 3916 }, { "epoch": 2.420012353304509, "grad_norm": 5.31564474105835, "learning_rate": 9.516615194564546e-07, "loss": 0.1688, "step": 3918 }, { "epoch": 2.4212476837554044, "grad_norm": 6.220884323120117, "learning_rate": 9.516368128474367e-07, "loss": 0.134, "step": 3920 }, { "epoch": 2.4224830142063003, "grad_norm": 7.00637674331665, "learning_rate": 9.516121062384187e-07, "loss": 0.1444, "step": 3922 }, { "epoch": 2.4237183446571957, "grad_norm": 1.81105637550354, "learning_rate": 9.515873996294008e-07, "loss": 0.1028, "step": 3924 }, { "epoch": 2.4249536751080916, "grad_norm": 12.004409790039062, "learning_rate": 9.515626930203829e-07, "loss": 0.3779, "step": 3926 }, { "epoch": 2.426189005558987, "grad_norm": 4.22098445892334, "learning_rate": 9.51537986411365e-07, "loss": 0.0818, "step": 3928 }, { "epoch": 2.427424336009883, "grad_norm": 5.8943376541137695, "learning_rate": 9.515132798023471e-07, "loss": 0.1474, "step": 3930 }, { "epoch": 2.4286596664607782, "grad_norm": 6.851485729217529, "learning_rate": 9.514885731933292e-07, "loss": 0.2479, "step": 3932 }, { "epoch": 2.429894996911674, "grad_norm": 8.956628799438477, "learning_rate": 9.514638665843112e-07, "loss": 0.2622, "step": 3934 }, { "epoch": 2.4311303273625695, "grad_norm": 9.68490219116211, "learning_rate": 9.514391599752933e-07, "loss": 0.4398, "step": 3936 }, { "epoch": 2.432365657813465, "grad_norm": 5.907257556915283, "learning_rate": 9.514144533662755e-07, "loss": 0.1412, "step": 3938 }, { "epoch": 2.4336009882643608, "grad_norm": 5.9960808753967285, "learning_rate": 9.513897467572575e-07, "loss": 0.0845, "step": 3940 }, { "epoch": 2.434836318715256, "grad_norm": 11.464165687561035, "learning_rate": 9.513650401482396e-07, "loss": 0.3311, "step": 3942 }, { "epoch": 2.436071649166152, "grad_norm": 7.067351818084717, "learning_rate": 9.513403335392217e-07, "loss": 0.2168, "step": 3944 }, { "epoch": 2.4373069796170475, "grad_norm": 2.5815212726593018, "learning_rate": 9.513156269302039e-07, "loss": 0.1588, "step": 3946 }, { "epoch": 2.4385423100679433, "grad_norm": 7.363598346710205, "learning_rate": 9.512909203211859e-07, "loss": 0.1884, "step": 3948 }, { "epoch": 2.4397776405188387, "grad_norm": 5.47083854675293, "learning_rate": 9.51266213712168e-07, "loss": 0.0641, "step": 3950 }, { "epoch": 2.4410129709697346, "grad_norm": 8.44034194946289, "learning_rate": 9.512415071031501e-07, "loss": 0.2282, "step": 3952 }, { "epoch": 2.44224830142063, "grad_norm": 4.6630096435546875, "learning_rate": 9.512168004941321e-07, "loss": 0.0712, "step": 3954 }, { "epoch": 2.4434836318715254, "grad_norm": 4.905350208282471, "learning_rate": 9.511920938851143e-07, "loss": 0.1377, "step": 3956 }, { "epoch": 2.4447189623224213, "grad_norm": 9.021784782409668, "learning_rate": 9.511673872760964e-07, "loss": 0.2095, "step": 3958 }, { "epoch": 2.445954292773317, "grad_norm": 4.163048267364502, "learning_rate": 9.511426806670784e-07, "loss": 0.0419, "step": 3960 }, { "epoch": 2.4471896232242125, "grad_norm": 12.030973434448242, "learning_rate": 9.511179740580605e-07, "loss": 0.2967, "step": 3962 }, { "epoch": 2.448424953675108, "grad_norm": 7.119677543640137, "learning_rate": 9.510932674490426e-07, "loss": 0.1605, "step": 3964 }, { "epoch": 2.449660284126004, "grad_norm": 3.691103935241699, "learning_rate": 9.510685608400247e-07, "loss": 0.1146, "step": 3966 }, { "epoch": 2.450895614576899, "grad_norm": 8.175715446472168, "learning_rate": 9.510438542310068e-07, "loss": 0.1455, "step": 3968 }, { "epoch": 2.452130945027795, "grad_norm": 3.2379355430603027, "learning_rate": 9.510191476219889e-07, "loss": 0.123, "step": 3970 }, { "epoch": 2.4533662754786905, "grad_norm": 2.8734652996063232, "learning_rate": 9.509944410129709e-07, "loss": 0.1188, "step": 3972 }, { "epoch": 2.4546016059295863, "grad_norm": 8.039328575134277, "learning_rate": 9.50969734403953e-07, "loss": 0.1222, "step": 3974 }, { "epoch": 2.4558369363804817, "grad_norm": 4.957016944885254, "learning_rate": 9.509450277949352e-07, "loss": 0.4045, "step": 3976 }, { "epoch": 2.4570722668313776, "grad_norm": 6.359691143035889, "learning_rate": 9.509203211859172e-07, "loss": 0.1179, "step": 3978 }, { "epoch": 2.458307597282273, "grad_norm": 4.508669853210449, "learning_rate": 9.508956145768993e-07, "loss": 0.0904, "step": 3980 }, { "epoch": 2.4595429277331684, "grad_norm": 13.624983787536621, "learning_rate": 9.508709079678814e-07, "loss": 0.1742, "step": 3982 }, { "epoch": 2.4607782581840643, "grad_norm": 5.929695129394531, "learning_rate": 9.508462013588634e-07, "loss": 0.3558, "step": 3984 }, { "epoch": 2.4620135886349597, "grad_norm": 12.800386428833008, "learning_rate": 9.508214947498456e-07, "loss": 0.3074, "step": 3986 }, { "epoch": 2.4632489190858555, "grad_norm": 9.588747024536133, "learning_rate": 9.507967881408277e-07, "loss": 0.1518, "step": 3988 }, { "epoch": 2.464484249536751, "grad_norm": 7.475319862365723, "learning_rate": 9.507720815318098e-07, "loss": 0.2044, "step": 3990 }, { "epoch": 2.465719579987647, "grad_norm": 5.431769847869873, "learning_rate": 9.507473749227917e-07, "loss": 0.0745, "step": 3992 }, { "epoch": 2.466954910438542, "grad_norm": 5.271090030670166, "learning_rate": 9.507226683137738e-07, "loss": 0.2419, "step": 3994 }, { "epoch": 2.468190240889438, "grad_norm": 7.005252361297607, "learning_rate": 9.506979617047561e-07, "loss": 0.0749, "step": 3996 }, { "epoch": 2.4694255713403335, "grad_norm": 6.365462303161621, "learning_rate": 9.50673255095738e-07, "loss": 0.1739, "step": 3998 }, { "epoch": 2.4706609017912293, "grad_norm": 6.568759441375732, "learning_rate": 9.506485484867201e-07, "loss": 0.1692, "step": 4000 }, { "epoch": 2.4706609017912293, "eval_cer": 0.05490549054905491, "eval_loss": 0.24874834716320038, "eval_runtime": 63.7556, "eval_samples_per_second": 12.877, "eval_steps_per_second": 1.616, "step": 4000 }, { "epoch": 2.4718962322421247, "grad_norm": 2.7376253604888916, "learning_rate": 9.506238418777022e-07, "loss": 0.1273, "step": 4002 }, { "epoch": 2.4731315626930206, "grad_norm": 12.004936218261719, "learning_rate": 9.505991352686843e-07, "loss": 0.1587, "step": 4004 }, { "epoch": 2.474366893143916, "grad_norm": 5.389779090881348, "learning_rate": 9.505744286596664e-07, "loss": 0.0894, "step": 4006 }, { "epoch": 2.4756022235948114, "grad_norm": 6.576326847076416, "learning_rate": 9.505497220506485e-07, "loss": 0.2924, "step": 4008 }, { "epoch": 2.4768375540457073, "grad_norm": 5.033486366271973, "learning_rate": 9.505250154416305e-07, "loss": 0.294, "step": 4010 }, { "epoch": 2.4780728844966027, "grad_norm": 1.704712152481079, "learning_rate": 9.505003088326126e-07, "loss": 0.0379, "step": 4012 }, { "epoch": 2.4793082149474985, "grad_norm": 6.810169219970703, "learning_rate": 9.504756022235948e-07, "loss": 0.1269, "step": 4014 }, { "epoch": 2.480543545398394, "grad_norm": 4.01441764831543, "learning_rate": 9.504508956145768e-07, "loss": 0.134, "step": 4016 }, { "epoch": 2.48177887584929, "grad_norm": 3.7855565547943115, "learning_rate": 9.504261890055589e-07, "loss": 0.0963, "step": 4018 }, { "epoch": 2.483014206300185, "grad_norm": 7.20186185836792, "learning_rate": 9.50401482396541e-07, "loss": 0.1794, "step": 4020 }, { "epoch": 2.484249536751081, "grad_norm": 2.8768186569213867, "learning_rate": 9.50376775787523e-07, "loss": 0.4491, "step": 4022 }, { "epoch": 2.4854848672019765, "grad_norm": 6.1670145988464355, "learning_rate": 9.503520691785052e-07, "loss": 0.1834, "step": 4024 }, { "epoch": 2.4867201976528723, "grad_norm": 5.842103004455566, "learning_rate": 9.503273625694873e-07, "loss": 0.195, "step": 4026 }, { "epoch": 2.4879555281037677, "grad_norm": 6.453327655792236, "learning_rate": 9.503026559604694e-07, "loss": 0.1607, "step": 4028 }, { "epoch": 2.4891908585546636, "grad_norm": 11.440865516662598, "learning_rate": 9.502779493514514e-07, "loss": 0.3798, "step": 4030 }, { "epoch": 2.490426189005559, "grad_norm": 4.983465194702148, "learning_rate": 9.502532427424335e-07, "loss": 0.1107, "step": 4032 }, { "epoch": 2.4916615194564544, "grad_norm": 7.25463342666626, "learning_rate": 9.502285361334157e-07, "loss": 0.1448, "step": 4034 }, { "epoch": 2.4928968499073503, "grad_norm": 11.871488571166992, "learning_rate": 9.502038295243977e-07, "loss": 0.3706, "step": 4036 }, { "epoch": 2.4941321803582457, "grad_norm": 9.56612491607666, "learning_rate": 9.501791229153798e-07, "loss": 0.5047, "step": 4038 }, { "epoch": 2.4953675108091415, "grad_norm": 5.26118278503418, "learning_rate": 9.501544163063619e-07, "loss": 0.1213, "step": 4040 }, { "epoch": 2.496602841260037, "grad_norm": 6.977703094482422, "learning_rate": 9.501297096973439e-07, "loss": 0.1616, "step": 4042 }, { "epoch": 2.497838171710933, "grad_norm": 5.946619987487793, "learning_rate": 9.501050030883261e-07, "loss": 0.0597, "step": 4044 }, { "epoch": 2.499073502161828, "grad_norm": 5.62060546875, "learning_rate": 9.500802964793082e-07, "loss": 0.1295, "step": 4046 }, { "epoch": 2.500308832612724, "grad_norm": 8.069540977478027, "learning_rate": 9.500555898702902e-07, "loss": 0.1142, "step": 4048 }, { "epoch": 2.5015441630636195, "grad_norm": 2.996459484100342, "learning_rate": 9.500308832612723e-07, "loss": 0.1871, "step": 4050 }, { "epoch": 2.502779493514515, "grad_norm": 15.975199699401855, "learning_rate": 9.500061766522545e-07, "loss": 0.3384, "step": 4052 }, { "epoch": 2.5040148239654108, "grad_norm": 8.22244644165039, "learning_rate": 9.499814700432365e-07, "loss": 0.1139, "step": 4054 }, { "epoch": 2.5052501544163066, "grad_norm": 8.032392501831055, "learning_rate": 9.499567634342186e-07, "loss": 0.2194, "step": 4056 }, { "epoch": 2.506485484867202, "grad_norm": 4.692416667938232, "learning_rate": 9.499320568252007e-07, "loss": 0.1416, "step": 4058 }, { "epoch": 2.5077208153180974, "grad_norm": 4.797483921051025, "learning_rate": 9.499073502161827e-07, "loss": 0.105, "step": 4060 }, { "epoch": 2.5089561457689933, "grad_norm": 6.300050258636475, "learning_rate": 9.498826436071649e-07, "loss": 0.1727, "step": 4062 }, { "epoch": 2.5101914762198887, "grad_norm": 7.503382682800293, "learning_rate": 9.49857936998147e-07, "loss": 0.2453, "step": 4064 }, { "epoch": 2.5114268066707846, "grad_norm": 4.761933326721191, "learning_rate": 9.498332303891291e-07, "loss": 0.092, "step": 4066 }, { "epoch": 2.51266213712168, "grad_norm": 7.54844331741333, "learning_rate": 9.498085237801111e-07, "loss": 0.097, "step": 4068 }, { "epoch": 2.513897467572576, "grad_norm": 9.954164505004883, "learning_rate": 9.497838171710932e-07, "loss": 0.2309, "step": 4070 }, { "epoch": 2.5151327980234712, "grad_norm": 7.489900588989258, "learning_rate": 9.497591105620754e-07, "loss": 0.2968, "step": 4072 }, { "epoch": 2.516368128474367, "grad_norm": 6.724361419677734, "learning_rate": 9.497344039530574e-07, "loss": 0.1135, "step": 4074 }, { "epoch": 2.5176034589252625, "grad_norm": 3.873833179473877, "learning_rate": 9.497096973440395e-07, "loss": 0.0675, "step": 4076 }, { "epoch": 2.518838789376158, "grad_norm": 7.007181644439697, "learning_rate": 9.496849907350216e-07, "loss": 0.3676, "step": 4078 }, { "epoch": 2.5200741198270538, "grad_norm": 8.686723709106445, "learning_rate": 9.496602841260036e-07, "loss": 0.2445, "step": 4080 }, { "epoch": 2.5213094502779496, "grad_norm": 4.711117267608643, "learning_rate": 9.496355775169858e-07, "loss": 0.1935, "step": 4082 }, { "epoch": 2.522544780728845, "grad_norm": 9.745352745056152, "learning_rate": 9.496108709079679e-07, "loss": 0.1364, "step": 4084 }, { "epoch": 2.5237801111797404, "grad_norm": 8.859101295471191, "learning_rate": 9.495861642989499e-07, "loss": 0.1326, "step": 4086 }, { "epoch": 2.5250154416306363, "grad_norm": 7.434624195098877, "learning_rate": 9.49561457689932e-07, "loss": 0.1893, "step": 4088 }, { "epoch": 2.5262507720815317, "grad_norm": 2.577780246734619, "learning_rate": 9.495367510809142e-07, "loss": 0.0997, "step": 4090 }, { "epoch": 2.5274861025324276, "grad_norm": 6.342944145202637, "learning_rate": 9.495120444718962e-07, "loss": 0.195, "step": 4092 }, { "epoch": 2.528721432983323, "grad_norm": 11.908976554870605, "learning_rate": 9.494873378628783e-07, "loss": 0.1685, "step": 4094 }, { "epoch": 2.5299567634342184, "grad_norm": 11.225905418395996, "learning_rate": 9.494626312538604e-07, "loss": 0.5389, "step": 4096 }, { "epoch": 2.5311920938851142, "grad_norm": 5.324714660644531, "learning_rate": 9.494379246448424e-07, "loss": 0.0908, "step": 4098 }, { "epoch": 2.53242742433601, "grad_norm": 5.568947792053223, "learning_rate": 9.494132180358246e-07, "loss": 0.066, "step": 4100 }, { "epoch": 2.5336627547869055, "grad_norm": 7.845712184906006, "learning_rate": 9.493885114268067e-07, "loss": 0.1335, "step": 4102 }, { "epoch": 2.534898085237801, "grad_norm": 5.00520133972168, "learning_rate": 9.493638048177887e-07, "loss": 0.0923, "step": 4104 }, { "epoch": 2.5361334156886968, "grad_norm": 6.518111228942871, "learning_rate": 9.493390982087708e-07, "loss": 0.1234, "step": 4106 }, { "epoch": 2.537368746139592, "grad_norm": 4.2761549949646, "learning_rate": 9.493143915997529e-07, "loss": 0.1018, "step": 4108 }, { "epoch": 2.538604076590488, "grad_norm": 4.913449764251709, "learning_rate": 9.492896849907351e-07, "loss": 0.3026, "step": 4110 }, { "epoch": 2.5398394070413834, "grad_norm": 2.4374375343322754, "learning_rate": 9.492649783817171e-07, "loss": 0.0815, "step": 4112 }, { "epoch": 2.5410747374922793, "grad_norm": 6.4227519035339355, "learning_rate": 9.492402717726992e-07, "loss": 0.0648, "step": 4114 }, { "epoch": 2.5423100679431747, "grad_norm": 6.226362228393555, "learning_rate": 9.492155651636813e-07, "loss": 0.1503, "step": 4116 }, { "epoch": 2.5435453983940706, "grad_norm": 7.5432562828063965, "learning_rate": 9.491908585546633e-07, "loss": 0.0863, "step": 4118 }, { "epoch": 2.544780728844966, "grad_norm": 5.4423699378967285, "learning_rate": 9.491661519456455e-07, "loss": 0.4819, "step": 4120 }, { "epoch": 2.5460160592958614, "grad_norm": 5.562419891357422, "learning_rate": 9.491414453366276e-07, "loss": 0.0746, "step": 4122 }, { "epoch": 2.5472513897467572, "grad_norm": 7.462055206298828, "learning_rate": 9.491167387276096e-07, "loss": 0.1072, "step": 4124 }, { "epoch": 2.548486720197653, "grad_norm": 8.970100402832031, "learning_rate": 9.490920321185916e-07, "loss": 0.2716, "step": 4126 }, { "epoch": 2.5497220506485485, "grad_norm": 3.938837766647339, "learning_rate": 9.490673255095737e-07, "loss": 0.2104, "step": 4128 }, { "epoch": 2.550957381099444, "grad_norm": 11.849865913391113, "learning_rate": 9.490426189005558e-07, "loss": 0.2997, "step": 4130 }, { "epoch": 2.5521927115503398, "grad_norm": 6.4802117347717285, "learning_rate": 9.49017912291538e-07, "loss": 0.601, "step": 4132 }, { "epoch": 2.553428042001235, "grad_norm": 7.002749919891357, "learning_rate": 9.4899320568252e-07, "loss": 0.141, "step": 4134 }, { "epoch": 2.554663372452131, "grad_norm": 8.415003776550293, "learning_rate": 9.48968499073502e-07, "loss": 0.31, "step": 4136 }, { "epoch": 2.5558987029030265, "grad_norm": 10.70018482208252, "learning_rate": 9.489437924644842e-07, "loss": 0.192, "step": 4138 }, { "epoch": 2.5571340333539223, "grad_norm": 5.884407043457031, "learning_rate": 9.489190858554663e-07, "loss": 0.0933, "step": 4140 }, { "epoch": 2.5583693638048177, "grad_norm": 4.644347190856934, "learning_rate": 9.488943792464483e-07, "loss": 0.088, "step": 4142 }, { "epoch": 2.5596046942557136, "grad_norm": 6.577848434448242, "learning_rate": 9.488696726374304e-07, "loss": 0.1727, "step": 4144 }, { "epoch": 2.560840024706609, "grad_norm": 6.158406734466553, "learning_rate": 9.488449660284125e-07, "loss": 0.146, "step": 4146 }, { "epoch": 2.5620753551575044, "grad_norm": 4.625839710235596, "learning_rate": 9.488202594193947e-07, "loss": 0.1157, "step": 4148 }, { "epoch": 2.5633106856084003, "grad_norm": 2.5360267162323, "learning_rate": 9.487955528103767e-07, "loss": 0.1298, "step": 4150 }, { "epoch": 2.564546016059296, "grad_norm": 9.057004928588867, "learning_rate": 9.487708462013588e-07, "loss": 0.112, "step": 4152 }, { "epoch": 2.5657813465101915, "grad_norm": 12.195537567138672, "learning_rate": 9.487461395923409e-07, "loss": 0.2722, "step": 4154 }, { "epoch": 2.567016676961087, "grad_norm": 8.709891319274902, "learning_rate": 9.487214329833229e-07, "loss": 0.1939, "step": 4156 }, { "epoch": 2.568252007411983, "grad_norm": 8.842302322387695, "learning_rate": 9.486967263743051e-07, "loss": 0.1514, "step": 4158 }, { "epoch": 2.569487337862878, "grad_norm": 4.97824239730835, "learning_rate": 9.486720197652872e-07, "loss": 0.0859, "step": 4160 }, { "epoch": 2.570722668313774, "grad_norm": 2.5129647254943848, "learning_rate": 9.486473131562692e-07, "loss": 0.0257, "step": 4162 }, { "epoch": 2.5719579987646695, "grad_norm": 6.1671366691589355, "learning_rate": 9.486226065472513e-07, "loss": 0.1099, "step": 4164 }, { "epoch": 2.5731933292155653, "grad_norm": 9.64902400970459, "learning_rate": 9.485978999382334e-07, "loss": 0.3344, "step": 4166 }, { "epoch": 2.5744286596664607, "grad_norm": 5.9542155265808105, "learning_rate": 9.485731933292155e-07, "loss": 0.2116, "step": 4168 }, { "epoch": 2.5756639901173566, "grad_norm": 8.691288948059082, "learning_rate": 9.485484867201976e-07, "loss": 0.2043, "step": 4170 }, { "epoch": 2.576899320568252, "grad_norm": 4.425544261932373, "learning_rate": 9.485237801111797e-07, "loss": 0.2296, "step": 4172 }, { "epoch": 2.5781346510191474, "grad_norm": 6.3132758140563965, "learning_rate": 9.484990735021617e-07, "loss": 0.0479, "step": 4174 }, { "epoch": 2.5793699814700433, "grad_norm": 7.774844169616699, "learning_rate": 9.484743668931438e-07, "loss": 0.1705, "step": 4176 }, { "epoch": 2.580605311920939, "grad_norm": 13.547688484191895, "learning_rate": 9.48449660284126e-07, "loss": 0.2739, "step": 4178 }, { "epoch": 2.5818406423718345, "grad_norm": 6.376012802124023, "learning_rate": 9.48424953675108e-07, "loss": 0.0894, "step": 4180 }, { "epoch": 2.58307597282273, "grad_norm": 14.2280912399292, "learning_rate": 9.484002470660901e-07, "loss": 0.489, "step": 4182 }, { "epoch": 2.584311303273626, "grad_norm": 6.133642673492432, "learning_rate": 9.483755404570722e-07, "loss": 0.1403, "step": 4184 }, { "epoch": 2.585546633724521, "grad_norm": 6.445113182067871, "learning_rate": 9.483508338480544e-07, "loss": 0.1643, "step": 4186 }, { "epoch": 2.586781964175417, "grad_norm": 13.831384658813477, "learning_rate": 9.483261272390364e-07, "loss": 0.146, "step": 4188 }, { "epoch": 2.5880172946263125, "grad_norm": 7.696075439453125, "learning_rate": 9.483014206300185e-07, "loss": 0.2681, "step": 4190 }, { "epoch": 2.589252625077208, "grad_norm": 4.739177227020264, "learning_rate": 9.482767140210006e-07, "loss": 0.1196, "step": 4192 }, { "epoch": 2.5904879555281037, "grad_norm": 4.567897796630859, "learning_rate": 9.482520074119826e-07, "loss": 0.119, "step": 4194 }, { "epoch": 2.5917232859789996, "grad_norm": 6.097799777984619, "learning_rate": 9.482273008029648e-07, "loss": 0.0502, "step": 4196 }, { "epoch": 2.592958616429895, "grad_norm": 13.05683708190918, "learning_rate": 9.482025941939469e-07, "loss": 0.2413, "step": 4198 }, { "epoch": 2.5941939468807904, "grad_norm": 9.382558822631836, "learning_rate": 9.481778875849289e-07, "loss": 0.1801, "step": 4200 }, { "epoch": 2.5954292773316863, "grad_norm": 6.861790180206299, "learning_rate": 9.48153180975911e-07, "loss": 0.1559, "step": 4202 }, { "epoch": 2.5966646077825817, "grad_norm": 6.289473056793213, "learning_rate": 9.481284743668931e-07, "loss": 0.1347, "step": 4204 }, { "epoch": 2.5978999382334775, "grad_norm": 7.928835391998291, "learning_rate": 9.481037677578752e-07, "loss": 0.2631, "step": 4206 }, { "epoch": 2.599135268684373, "grad_norm": 6.545368194580078, "learning_rate": 9.480790611488573e-07, "loss": 0.1327, "step": 4208 }, { "epoch": 2.600370599135269, "grad_norm": 4.331226825714111, "learning_rate": 9.480543545398394e-07, "loss": 0.0601, "step": 4210 }, { "epoch": 2.601605929586164, "grad_norm": 5.758735179901123, "learning_rate": 9.480296479308214e-07, "loss": 0.0505, "step": 4212 }, { "epoch": 2.60284126003706, "grad_norm": 4.208133697509766, "learning_rate": 9.480049413218035e-07, "loss": 0.1043, "step": 4214 }, { "epoch": 2.6040765904879555, "grad_norm": 4.655519962310791, "learning_rate": 9.479802347127857e-07, "loss": 0.204, "step": 4216 }, { "epoch": 2.605311920938851, "grad_norm": 8.062469482421875, "learning_rate": 9.479555281037677e-07, "loss": 0.2681, "step": 4218 }, { "epoch": 2.6065472513897467, "grad_norm": 5.361331939697266, "learning_rate": 9.479308214947498e-07, "loss": 0.0656, "step": 4220 }, { "epoch": 2.6077825818406426, "grad_norm": 6.148868560791016, "learning_rate": 9.479061148857319e-07, "loss": 0.1053, "step": 4222 }, { "epoch": 2.609017912291538, "grad_norm": 14.697903633117676, "learning_rate": 9.478814082767139e-07, "loss": 0.456, "step": 4224 }, { "epoch": 2.6102532427424334, "grad_norm": 4.794983863830566, "learning_rate": 9.478567016676961e-07, "loss": 0.0753, "step": 4226 }, { "epoch": 2.6114885731933293, "grad_norm": 4.261406421661377, "learning_rate": 9.478319950586782e-07, "loss": 0.1172, "step": 4228 }, { "epoch": 2.6127239036442247, "grad_norm": 3.8489389419555664, "learning_rate": 9.478072884496603e-07, "loss": 0.0936, "step": 4230 }, { "epoch": 2.6139592340951205, "grad_norm": 8.11456298828125, "learning_rate": 9.477825818406423e-07, "loss": 0.2924, "step": 4232 }, { "epoch": 2.615194564546016, "grad_norm": 7.296055316925049, "learning_rate": 9.477578752316245e-07, "loss": 0.0976, "step": 4234 }, { "epoch": 2.616429894996912, "grad_norm": 7.475132942199707, "learning_rate": 9.477331686226066e-07, "loss": 0.1014, "step": 4236 }, { "epoch": 2.617665225447807, "grad_norm": 2.6685702800750732, "learning_rate": 9.477084620135886e-07, "loss": 0.0598, "step": 4238 }, { "epoch": 2.618900555898703, "grad_norm": 10.886232376098633, "learning_rate": 9.476837554045707e-07, "loss": 0.4033, "step": 4240 }, { "epoch": 2.6201358863495985, "grad_norm": 4.441849231719971, "learning_rate": 9.476590487955528e-07, "loss": 0.1468, "step": 4242 }, { "epoch": 2.621371216800494, "grad_norm": 5.6187520027160645, "learning_rate": 9.476343421865349e-07, "loss": 0.0362, "step": 4244 }, { "epoch": 2.6226065472513898, "grad_norm": 5.3848876953125, "learning_rate": 9.47609635577517e-07, "loss": 0.1892, "step": 4246 }, { "epoch": 2.6238418777022856, "grad_norm": 4.513540267944336, "learning_rate": 9.475849289684991e-07, "loss": 0.1468, "step": 4248 }, { "epoch": 2.625077208153181, "grad_norm": 10.689798355102539, "learning_rate": 9.475602223594811e-07, "loss": 0.1514, "step": 4250 }, { "epoch": 2.6263125386040764, "grad_norm": 4.375190734863281, "learning_rate": 9.475355157504632e-07, "loss": 0.2211, "step": 4252 }, { "epoch": 2.6275478690549723, "grad_norm": 6.08987283706665, "learning_rate": 9.475108091414454e-07, "loss": 0.2337, "step": 4254 }, { "epoch": 2.6287831995058677, "grad_norm": 15.166552543640137, "learning_rate": 9.474861025324274e-07, "loss": 0.104, "step": 4256 }, { "epoch": 2.6300185299567636, "grad_norm": 4.530309200286865, "learning_rate": 9.474613959234095e-07, "loss": 0.3236, "step": 4258 }, { "epoch": 2.631253860407659, "grad_norm": 11.268346786499023, "learning_rate": 9.474366893143916e-07, "loss": 0.3451, "step": 4260 }, { "epoch": 2.632489190858555, "grad_norm": 3.8305320739746094, "learning_rate": 9.474119827053735e-07, "loss": 0.1907, "step": 4262 }, { "epoch": 2.6337245213094502, "grad_norm": 8.105924606323242, "learning_rate": 9.473872760963557e-07, "loss": 0.0943, "step": 4264 }, { "epoch": 2.634959851760346, "grad_norm": 3.364950656890869, "learning_rate": 9.473625694873378e-07, "loss": 0.1555, "step": 4266 }, { "epoch": 2.6361951822112415, "grad_norm": 5.421189785003662, "learning_rate": 9.473378628783199e-07, "loss": 0.105, "step": 4268 }, { "epoch": 2.637430512662137, "grad_norm": 8.261091232299805, "learning_rate": 9.473131562693019e-07, "loss": 0.3851, "step": 4270 }, { "epoch": 2.6386658431130328, "grad_norm": 5.618124961853027, "learning_rate": 9.472884496602841e-07, "loss": 0.1669, "step": 4272 }, { "epoch": 2.6399011735639286, "grad_norm": 5.052395343780518, "learning_rate": 9.472637430512662e-07, "loss": 0.105, "step": 4274 }, { "epoch": 2.641136504014824, "grad_norm": 5.637566566467285, "learning_rate": 9.472390364422482e-07, "loss": 0.1458, "step": 4276 }, { "epoch": 2.6423718344657194, "grad_norm": 4.423276424407959, "learning_rate": 9.472143298332303e-07, "loss": 0.064, "step": 4278 }, { "epoch": 2.6436071649166153, "grad_norm": 7.6652936935424805, "learning_rate": 9.471896232242124e-07, "loss": 0.2001, "step": 4280 }, { "epoch": 2.6448424953675107, "grad_norm": 8.667336463928223, "learning_rate": 9.471649166151945e-07, "loss": 0.2015, "step": 4282 }, { "epoch": 2.6460778258184066, "grad_norm": 3.6341652870178223, "learning_rate": 9.471402100061766e-07, "loss": 0.0466, "step": 4284 }, { "epoch": 2.647313156269302, "grad_norm": 3.500105619430542, "learning_rate": 9.471155033971587e-07, "loss": 0.0802, "step": 4286 }, { "epoch": 2.6485484867201974, "grad_norm": 7.924619674682617, "learning_rate": 9.470907967881407e-07, "loss": 0.2098, "step": 4288 }, { "epoch": 2.6497838171710932, "grad_norm": 5.037041664123535, "learning_rate": 9.470660901791228e-07, "loss": 0.2957, "step": 4290 }, { "epoch": 2.651019147621989, "grad_norm": 8.890530586242676, "learning_rate": 9.47041383570105e-07, "loss": 0.1975, "step": 4292 }, { "epoch": 2.6522544780728845, "grad_norm": 8.793524742126465, "learning_rate": 9.47016676961087e-07, "loss": 0.1698, "step": 4294 }, { "epoch": 2.65348980852378, "grad_norm": 5.571446895599365, "learning_rate": 9.469919703520691e-07, "loss": 0.0629, "step": 4296 }, { "epoch": 2.6547251389746758, "grad_norm": 3.4834792613983154, "learning_rate": 9.469672637430512e-07, "loss": 0.0432, "step": 4298 }, { "epoch": 2.655960469425571, "grad_norm": 3.2590999603271484, "learning_rate": 9.469425571340332e-07, "loss": 0.1408, "step": 4300 }, { "epoch": 2.657195799876467, "grad_norm": 5.246474742889404, "learning_rate": 9.469178505250154e-07, "loss": 0.1776, "step": 4302 }, { "epoch": 2.6584311303273624, "grad_norm": 2.905986785888672, "learning_rate": 9.468931439159975e-07, "loss": 0.0779, "step": 4304 }, { "epoch": 2.6596664607782583, "grad_norm": 5.4546918869018555, "learning_rate": 9.468684373069796e-07, "loss": 0.1237, "step": 4306 }, { "epoch": 2.6609017912291537, "grad_norm": 8.350580215454102, "learning_rate": 9.468437306979616e-07, "loss": 0.1856, "step": 4308 }, { "epoch": 2.6621371216800496, "grad_norm": 7.287728309631348, "learning_rate": 9.468190240889437e-07, "loss": 0.1586, "step": 4310 }, { "epoch": 2.663372452130945, "grad_norm": 9.607575416564941, "learning_rate": 9.467943174799259e-07, "loss": 0.1901, "step": 4312 }, { "epoch": 2.6646077825818404, "grad_norm": 1.2869740724563599, "learning_rate": 9.467696108709079e-07, "loss": 0.0822, "step": 4314 }, { "epoch": 2.6658431130327362, "grad_norm": 8.343040466308594, "learning_rate": 9.4674490426189e-07, "loss": 0.34, "step": 4316 }, { "epoch": 2.667078443483632, "grad_norm": 7.177077770233154, "learning_rate": 9.467201976528721e-07, "loss": 0.1339, "step": 4318 }, { "epoch": 2.6683137739345275, "grad_norm": 5.153106689453125, "learning_rate": 9.466954910438542e-07, "loss": 0.0718, "step": 4320 }, { "epoch": 2.669549104385423, "grad_norm": 8.901847839355469, "learning_rate": 9.466707844348363e-07, "loss": 0.159, "step": 4322 }, { "epoch": 2.6707844348363188, "grad_norm": 10.352700233459473, "learning_rate": 9.466460778258184e-07, "loss": 0.0921, "step": 4324 }, { "epoch": 2.672019765287214, "grad_norm": 4.539913177490234, "learning_rate": 9.466213712168004e-07, "loss": 0.1284, "step": 4326 }, { "epoch": 2.67325509573811, "grad_norm": 2.8913686275482178, "learning_rate": 9.465966646077825e-07, "loss": 0.0682, "step": 4328 }, { "epoch": 2.6744904261890055, "grad_norm": 6.189278602600098, "learning_rate": 9.465719579987647e-07, "loss": 0.0793, "step": 4330 }, { "epoch": 2.6757257566399013, "grad_norm": 8.536238670349121, "learning_rate": 9.465472513897467e-07, "loss": 0.389, "step": 4332 }, { "epoch": 2.6769610870907967, "grad_norm": 5.830776691436768, "learning_rate": 9.465225447807288e-07, "loss": 0.1037, "step": 4334 }, { "epoch": 2.6781964175416926, "grad_norm": 10.89932632446289, "learning_rate": 9.464978381717109e-07, "loss": 0.3022, "step": 4336 }, { "epoch": 2.679431747992588, "grad_norm": 3.244485378265381, "learning_rate": 9.464731315626929e-07, "loss": 0.088, "step": 4338 }, { "epoch": 2.6806670784434834, "grad_norm": 2.6823008060455322, "learning_rate": 9.464484249536751e-07, "loss": 0.184, "step": 4340 }, { "epoch": 2.6819024088943793, "grad_norm": 5.629920482635498, "learning_rate": 9.464237183446572e-07, "loss": 0.1822, "step": 4342 }, { "epoch": 2.683137739345275, "grad_norm": 4.128172874450684, "learning_rate": 9.463990117356392e-07, "loss": 0.1341, "step": 4344 }, { "epoch": 2.6843730697961705, "grad_norm": 4.469693183898926, "learning_rate": 9.463743051266213e-07, "loss": 0.0876, "step": 4346 }, { "epoch": 2.685608400247066, "grad_norm": 16.020662307739258, "learning_rate": 9.463495985176034e-07, "loss": 0.5185, "step": 4348 }, { "epoch": 2.686843730697962, "grad_norm": 4.802816867828369, "learning_rate": 9.463248919085856e-07, "loss": 0.0652, "step": 4350 }, { "epoch": 2.688079061148857, "grad_norm": 5.94922399520874, "learning_rate": 9.463001852995676e-07, "loss": 0.0892, "step": 4352 }, { "epoch": 2.689314391599753, "grad_norm": 9.052387237548828, "learning_rate": 9.462754786905497e-07, "loss": 0.137, "step": 4354 }, { "epoch": 2.6905497220506485, "grad_norm": 7.775435447692871, "learning_rate": 9.462507720815318e-07, "loss": 0.2859, "step": 4356 }, { "epoch": 2.6917850525015443, "grad_norm": 10.671358108520508, "learning_rate": 9.462260654725138e-07, "loss": 0.3726, "step": 4358 }, { "epoch": 2.6930203829524397, "grad_norm": 6.017147541046143, "learning_rate": 9.46201358863496e-07, "loss": 0.2079, "step": 4360 }, { "epoch": 2.6942557134033356, "grad_norm": 6.2945404052734375, "learning_rate": 9.461766522544781e-07, "loss": 0.2454, "step": 4362 }, { "epoch": 2.695491043854231, "grad_norm": 7.808111667633057, "learning_rate": 9.461519456454601e-07, "loss": 0.1929, "step": 4364 }, { "epoch": 2.6967263743051264, "grad_norm": 9.131196022033691, "learning_rate": 9.461272390364422e-07, "loss": 0.1733, "step": 4366 }, { "epoch": 2.6979617047560223, "grad_norm": 2.395477056503296, "learning_rate": 9.461025324274244e-07, "loss": 0.0297, "step": 4368 }, { "epoch": 2.699197035206918, "grad_norm": 2.839163064956665, "learning_rate": 9.460778258184064e-07, "loss": 0.0599, "step": 4370 }, { "epoch": 2.7004323656578135, "grad_norm": 6.131670951843262, "learning_rate": 9.460531192093885e-07, "loss": 0.1867, "step": 4372 }, { "epoch": 2.701667696108709, "grad_norm": 4.099411487579346, "learning_rate": 9.460284126003706e-07, "loss": 0.0948, "step": 4374 }, { "epoch": 2.702903026559605, "grad_norm": 5.2648797035217285, "learning_rate": 9.460037059913526e-07, "loss": 0.0675, "step": 4376 }, { "epoch": 2.7041383570105, "grad_norm": 5.02418851852417, "learning_rate": 9.459789993823348e-07, "loss": 0.1773, "step": 4378 }, { "epoch": 2.705373687461396, "grad_norm": 3.1020660400390625, "learning_rate": 9.459542927733169e-07, "loss": 0.1011, "step": 4380 }, { "epoch": 2.7066090179122915, "grad_norm": 11.438507080078125, "learning_rate": 9.459295861642989e-07, "loss": 0.2506, "step": 4382 }, { "epoch": 2.707844348363187, "grad_norm": 11.21475601196289, "learning_rate": 9.45904879555281e-07, "loss": 0.1815, "step": 4384 }, { "epoch": 2.7090796788140827, "grad_norm": 12.856938362121582, "learning_rate": 9.458801729462631e-07, "loss": 0.3535, "step": 4386 }, { "epoch": 2.7103150092649786, "grad_norm": 14.351848602294922, "learning_rate": 9.458554663372453e-07, "loss": 0.4572, "step": 4388 }, { "epoch": 2.711550339715874, "grad_norm": 11.069694519042969, "learning_rate": 9.458307597282273e-07, "loss": 0.3256, "step": 4390 }, { "epoch": 2.7127856701667694, "grad_norm": 5.241029739379883, "learning_rate": 9.458060531192094e-07, "loss": 0.1431, "step": 4392 }, { "epoch": 2.7140210006176653, "grad_norm": 13.895182609558105, "learning_rate": 9.457813465101915e-07, "loss": 0.3399, "step": 4394 }, { "epoch": 2.7152563310685607, "grad_norm": 3.268735885620117, "learning_rate": 9.457566399011734e-07, "loss": 0.1293, "step": 4396 }, { "epoch": 2.7164916615194565, "grad_norm": 7.383371353149414, "learning_rate": 9.457319332921556e-07, "loss": 0.3575, "step": 4398 }, { "epoch": 2.717726991970352, "grad_norm": 11.854717254638672, "learning_rate": 9.457072266831377e-07, "loss": 0.1513, "step": 4400 }, { "epoch": 2.718962322421248, "grad_norm": 7.20127010345459, "learning_rate": 9.456825200741197e-07, "loss": 0.1016, "step": 4402 }, { "epoch": 2.720197652872143, "grad_norm": 20.02941131591797, "learning_rate": 9.456578134651018e-07, "loss": 0.3724, "step": 4404 }, { "epoch": 2.721432983323039, "grad_norm": 6.312193393707275, "learning_rate": 9.45633106856084e-07, "loss": 0.1287, "step": 4406 }, { "epoch": 2.7226683137739345, "grad_norm": 11.35637378692627, "learning_rate": 9.45608400247066e-07, "loss": 0.299, "step": 4408 }, { "epoch": 2.72390364422483, "grad_norm": 10.35221004486084, "learning_rate": 9.455836936380481e-07, "loss": 0.3138, "step": 4410 }, { "epoch": 2.7251389746757257, "grad_norm": 8.754813194274902, "learning_rate": 9.455589870290302e-07, "loss": 0.266, "step": 4412 }, { "epoch": 2.7263743051266216, "grad_norm": 5.294969081878662, "learning_rate": 9.455342804200122e-07, "loss": 0.1301, "step": 4414 }, { "epoch": 2.727609635577517, "grad_norm": 3.387493371963501, "learning_rate": 9.455095738109944e-07, "loss": 0.0547, "step": 4416 }, { "epoch": 2.7288449660284124, "grad_norm": 4.005185604095459, "learning_rate": 9.454848672019765e-07, "loss": 0.188, "step": 4418 }, { "epoch": 2.7300802964793083, "grad_norm": 8.206558227539062, "learning_rate": 9.454601605929585e-07, "loss": 0.1339, "step": 4420 }, { "epoch": 2.7313156269302037, "grad_norm": 5.679891586303711, "learning_rate": 9.454354539839406e-07, "loss": 0.1284, "step": 4422 }, { "epoch": 2.7325509573810995, "grad_norm": 3.5869264602661133, "learning_rate": 9.454107473749227e-07, "loss": 0.0487, "step": 4424 }, { "epoch": 2.733786287831995, "grad_norm": 12.170257568359375, "learning_rate": 9.453860407659049e-07, "loss": 0.2249, "step": 4426 }, { "epoch": 2.735021618282891, "grad_norm": 10.494721412658691, "learning_rate": 9.453613341568869e-07, "loss": 0.1985, "step": 4428 }, { "epoch": 2.736256948733786, "grad_norm": 5.64244270324707, "learning_rate": 9.45336627547869e-07, "loss": 0.1928, "step": 4430 }, { "epoch": 2.737492279184682, "grad_norm": 5.92388916015625, "learning_rate": 9.453119209388511e-07, "loss": 0.1239, "step": 4432 }, { "epoch": 2.7387276096355775, "grad_norm": 2.977785110473633, "learning_rate": 9.452872143298331e-07, "loss": 0.018, "step": 4434 }, { "epoch": 2.739962940086473, "grad_norm": 8.01559066772461, "learning_rate": 9.452625077208153e-07, "loss": 0.341, "step": 4436 }, { "epoch": 2.7411982705373688, "grad_norm": 3.5326695442199707, "learning_rate": 9.452378011117974e-07, "loss": 0.1638, "step": 4438 }, { "epoch": 2.7424336009882646, "grad_norm": 2.338163375854492, "learning_rate": 9.452130945027794e-07, "loss": 0.1673, "step": 4440 }, { "epoch": 2.74366893143916, "grad_norm": 9.369385719299316, "learning_rate": 9.451883878937615e-07, "loss": 0.2739, "step": 4442 }, { "epoch": 2.7449042618900554, "grad_norm": 11.544421195983887, "learning_rate": 9.451636812847436e-07, "loss": 0.1538, "step": 4444 }, { "epoch": 2.7461395923409513, "grad_norm": 2.063323497772217, "learning_rate": 9.451389746757257e-07, "loss": 0.018, "step": 4446 }, { "epoch": 2.7473749227918467, "grad_norm": 7.424622535705566, "learning_rate": 9.451142680667078e-07, "loss": 0.1435, "step": 4448 }, { "epoch": 2.7486102532427426, "grad_norm": 4.462342739105225, "learning_rate": 9.450895614576899e-07, "loss": 0.1021, "step": 4450 }, { "epoch": 2.749845583693638, "grad_norm": 5.305174827575684, "learning_rate": 9.450648548486719e-07, "loss": 0.1776, "step": 4452 }, { "epoch": 2.751080914144534, "grad_norm": 5.94201135635376, "learning_rate": 9.450401482396541e-07, "loss": 0.1538, "step": 4454 }, { "epoch": 2.7523162445954292, "grad_norm": 3.9345040321350098, "learning_rate": 9.450154416306362e-07, "loss": 0.1065, "step": 4456 }, { "epoch": 2.753551575046325, "grad_norm": 8.46268367767334, "learning_rate": 9.449907350216182e-07, "loss": 0.2538, "step": 4458 }, { "epoch": 2.7547869054972205, "grad_norm": 7.804061412811279, "learning_rate": 9.449660284126003e-07, "loss": 0.3342, "step": 4460 }, { "epoch": 2.756022235948116, "grad_norm": 4.14879846572876, "learning_rate": 9.449413218035824e-07, "loss": 0.0842, "step": 4462 }, { "epoch": 2.7572575663990118, "grad_norm": 4.1963324546813965, "learning_rate": 9.449166151945645e-07, "loss": 0.1069, "step": 4464 }, { "epoch": 2.7584928968499076, "grad_norm": 5.674936771392822, "learning_rate": 9.448919085855466e-07, "loss": 0.0928, "step": 4466 }, { "epoch": 2.759728227300803, "grad_norm": 8.766497611999512, "learning_rate": 9.448672019765287e-07, "loss": 0.2304, "step": 4468 }, { "epoch": 2.7609635577516984, "grad_norm": 6.842972755432129, "learning_rate": 9.448424953675108e-07, "loss": 0.7486, "step": 4470 }, { "epoch": 2.7621988882025943, "grad_norm": 35.067359924316406, "learning_rate": 9.448177887584928e-07, "loss": 0.131, "step": 4472 }, { "epoch": 2.7634342186534897, "grad_norm": 8.56219482421875, "learning_rate": 9.44793082149475e-07, "loss": 0.2382, "step": 4474 }, { "epoch": 2.7646695491043856, "grad_norm": 7.584375858306885, "learning_rate": 9.447683755404571e-07, "loss": 0.3564, "step": 4476 }, { "epoch": 2.765904879555281, "grad_norm": 4.756228923797607, "learning_rate": 9.447436689314391e-07, "loss": 0.1784, "step": 4478 }, { "epoch": 2.7671402100061764, "grad_norm": 7.094266414642334, "learning_rate": 9.447189623224212e-07, "loss": 0.0972, "step": 4480 }, { "epoch": 2.7683755404570722, "grad_norm": 2.3056139945983887, "learning_rate": 9.446942557134033e-07, "loss": 0.0681, "step": 4482 }, { "epoch": 2.769610870907968, "grad_norm": 9.547074317932129, "learning_rate": 9.446695491043854e-07, "loss": 0.2483, "step": 4484 }, { "epoch": 2.7708462013588635, "grad_norm": 6.2246880531311035, "learning_rate": 9.446448424953675e-07, "loss": 0.2329, "step": 4486 }, { "epoch": 2.772081531809759, "grad_norm": 8.319201469421387, "learning_rate": 9.446201358863496e-07, "loss": 0.1253, "step": 4488 }, { "epoch": 2.7733168622606548, "grad_norm": 6.626651287078857, "learning_rate": 9.445954292773316e-07, "loss": 0.1084, "step": 4490 }, { "epoch": 2.77455219271155, "grad_norm": 10.793845176696777, "learning_rate": 9.445707226683137e-07, "loss": 0.2187, "step": 4492 }, { "epoch": 2.775787523162446, "grad_norm": 5.125445365905762, "learning_rate": 9.445460160592959e-07, "loss": 0.048, "step": 4494 }, { "epoch": 2.7770228536133414, "grad_norm": 4.628481864929199, "learning_rate": 9.445213094502779e-07, "loss": 0.1719, "step": 4496 }, { "epoch": 2.7782581840642373, "grad_norm": 4.507478713989258, "learning_rate": 9.4449660284126e-07, "loss": 0.1629, "step": 4498 }, { "epoch": 2.7794935145151327, "grad_norm": 10.159656524658203, "learning_rate": 9.444718962322421e-07, "loss": 0.3411, "step": 4500 }, { "epoch": 2.7794935145151327, "eval_cer": 0.05795194904105795, "eval_loss": 0.24432991445064545, "eval_runtime": 65.2629, "eval_samples_per_second": 12.58, "eval_steps_per_second": 1.578, "step": 4500 }, { "epoch": 2.7807288449660286, "grad_norm": 3.5315303802490234, "learning_rate": 9.444471896232242e-07, "loss": 0.0639, "step": 4502 }, { "epoch": 2.781964175416924, "grad_norm": 4.874752998352051, "learning_rate": 9.444224830142063e-07, "loss": 0.1524, "step": 4504 }, { "epoch": 2.7831995058678194, "grad_norm": 8.411628723144531, "learning_rate": 9.443977764051884e-07, "loss": 0.3684, "step": 4506 }, { "epoch": 2.7844348363187152, "grad_norm": 11.44243335723877, "learning_rate": 9.443730697961705e-07, "loss": 0.144, "step": 4508 }, { "epoch": 2.785670166769611, "grad_norm": 4.23677921295166, "learning_rate": 9.443483631871525e-07, "loss": 0.0491, "step": 4510 }, { "epoch": 2.7869054972205065, "grad_norm": 14.108415603637695, "learning_rate": 9.443236565781347e-07, "loss": 0.3375, "step": 4512 }, { "epoch": 2.788140827671402, "grad_norm": 1.9302301406860352, "learning_rate": 9.442989499691168e-07, "loss": 0.0561, "step": 4514 }, { "epoch": 2.7893761581222978, "grad_norm": 5.8146443367004395, "learning_rate": 9.442742433600988e-07, "loss": 0.1187, "step": 4516 }, { "epoch": 2.790611488573193, "grad_norm": 8.56892204284668, "learning_rate": 9.442495367510809e-07, "loss": 0.2367, "step": 4518 }, { "epoch": 2.791846819024089, "grad_norm": 10.571788787841797, "learning_rate": 9.44224830142063e-07, "loss": 0.2713, "step": 4520 }, { "epoch": 2.7930821494749845, "grad_norm": 8.499664306640625, "learning_rate": 9.442001235330451e-07, "loss": 0.1503, "step": 4522 }, { "epoch": 2.7943174799258803, "grad_norm": 8.584565162658691, "learning_rate": 9.441754169240272e-07, "loss": 0.1974, "step": 4524 }, { "epoch": 2.7955528103767757, "grad_norm": 6.2315778732299805, "learning_rate": 9.441507103150093e-07, "loss": 0.093, "step": 4526 }, { "epoch": 2.7967881408276716, "grad_norm": 14.124565124511719, "learning_rate": 9.441260037059912e-07, "loss": 0.5675, "step": 4528 }, { "epoch": 2.798023471278567, "grad_norm": 10.597208976745605, "learning_rate": 9.441012970969733e-07, "loss": 0.1992, "step": 4530 }, { "epoch": 2.7992588017294624, "grad_norm": 5.452177047729492, "learning_rate": 9.440765904879555e-07, "loss": 0.2131, "step": 4532 }, { "epoch": 2.8004941321803583, "grad_norm": 6.4009480476379395, "learning_rate": 9.440518838789375e-07, "loss": 0.0522, "step": 4534 }, { "epoch": 2.801729462631254, "grad_norm": 6.921955108642578, "learning_rate": 9.440271772699196e-07, "loss": 0.2964, "step": 4536 }, { "epoch": 2.8029647930821495, "grad_norm": 4.686029434204102, "learning_rate": 9.440024706609017e-07, "loss": 0.0423, "step": 4538 }, { "epoch": 2.804200123533045, "grad_norm": 1.9486443996429443, "learning_rate": 9.439777640518837e-07, "loss": 0.0771, "step": 4540 }, { "epoch": 2.805435453983941, "grad_norm": 2.2988569736480713, "learning_rate": 9.439530574428659e-07, "loss": 0.0217, "step": 4542 }, { "epoch": 2.806670784434836, "grad_norm": 9.531474113464355, "learning_rate": 9.43928350833848e-07, "loss": 0.2195, "step": 4544 }, { "epoch": 2.807906114885732, "grad_norm": 6.657232284545898, "learning_rate": 9.439036442248301e-07, "loss": 0.2216, "step": 4546 }, { "epoch": 2.8091414453366275, "grad_norm": 3.550626516342163, "learning_rate": 9.438789376158121e-07, "loss": 0.0504, "step": 4548 }, { "epoch": 2.8103767757875233, "grad_norm": 7.085525989532471, "learning_rate": 9.438542310067943e-07, "loss": 0.3002, "step": 4550 }, { "epoch": 2.8116121062384187, "grad_norm": 3.9700498580932617, "learning_rate": 9.438295243977764e-07, "loss": 0.0889, "step": 4552 }, { "epoch": 2.8128474366893146, "grad_norm": 8.040168762207031, "learning_rate": 9.438048177887584e-07, "loss": 0.2192, "step": 4554 }, { "epoch": 2.81408276714021, "grad_norm": 9.056206703186035, "learning_rate": 9.437801111797405e-07, "loss": 0.0805, "step": 4556 }, { "epoch": 2.8153180975911054, "grad_norm": 2.935046434402466, "learning_rate": 9.437554045707226e-07, "loss": 0.1115, "step": 4558 }, { "epoch": 2.8165534280420013, "grad_norm": 6.651248455047607, "learning_rate": 9.437306979617047e-07, "loss": 0.0886, "step": 4560 }, { "epoch": 2.817788758492897, "grad_norm": 7.848507404327393, "learning_rate": 9.437059913526868e-07, "loss": 0.1697, "step": 4562 }, { "epoch": 2.8190240889437925, "grad_norm": 3.356825590133667, "learning_rate": 9.436812847436689e-07, "loss": 0.0899, "step": 4564 }, { "epoch": 2.820259419394688, "grad_norm": 7.650514602661133, "learning_rate": 9.436565781346509e-07, "loss": 0.1677, "step": 4566 }, { "epoch": 2.821494749845584, "grad_norm": 5.794844627380371, "learning_rate": 9.43631871525633e-07, "loss": 0.2046, "step": 4568 }, { "epoch": 2.822730080296479, "grad_norm": 5.939474582672119, "learning_rate": 9.436071649166152e-07, "loss": 0.0861, "step": 4570 }, { "epoch": 2.823965410747375, "grad_norm": 7.535871982574463, "learning_rate": 9.435824583075972e-07, "loss": 0.2388, "step": 4572 }, { "epoch": 2.8252007411982705, "grad_norm": 8.894636154174805, "learning_rate": 9.435577516985793e-07, "loss": 0.1356, "step": 4574 }, { "epoch": 2.826436071649166, "grad_norm": 4.777245044708252, "learning_rate": 9.435330450895614e-07, "loss": 0.1176, "step": 4576 }, { "epoch": 2.8276714021000617, "grad_norm": 1.799149513244629, "learning_rate": 9.435083384805434e-07, "loss": 0.1319, "step": 4578 }, { "epoch": 2.8289067325509576, "grad_norm": 8.459534645080566, "learning_rate": 9.434836318715256e-07, "loss": 0.2901, "step": 4580 }, { "epoch": 2.830142063001853, "grad_norm": 8.880565643310547, "learning_rate": 9.434589252625077e-07, "loss": 0.1756, "step": 4582 }, { "epoch": 2.8313773934527484, "grad_norm": 3.74516224861145, "learning_rate": 9.434342186534897e-07, "loss": 0.0739, "step": 4584 }, { "epoch": 2.8326127239036443, "grad_norm": 10.778867721557617, "learning_rate": 9.434095120444718e-07, "loss": 0.4133, "step": 4586 }, { "epoch": 2.8338480543545397, "grad_norm": 6.023871898651123, "learning_rate": 9.43384805435454e-07, "loss": 0.1696, "step": 4588 }, { "epoch": 2.8350833848054355, "grad_norm": 8.614434242248535, "learning_rate": 9.433600988264361e-07, "loss": 0.2104, "step": 4590 }, { "epoch": 2.836318715256331, "grad_norm": 5.944378852844238, "learning_rate": 9.433353922174181e-07, "loss": 0.0916, "step": 4592 }, { "epoch": 2.837554045707227, "grad_norm": 1.2978465557098389, "learning_rate": 9.433106856084002e-07, "loss": 0.0861, "step": 4594 }, { "epoch": 2.838789376158122, "grad_norm": 7.689224720001221, "learning_rate": 9.432859789993823e-07, "loss": 0.1999, "step": 4596 }, { "epoch": 2.840024706609018, "grad_norm": 1.6910516023635864, "learning_rate": 9.432612723903644e-07, "loss": 0.0595, "step": 4598 }, { "epoch": 2.8412600370599135, "grad_norm": 7.453045845031738, "learning_rate": 9.432365657813465e-07, "loss": 0.0897, "step": 4600 }, { "epoch": 2.842495367510809, "grad_norm": 10.226162910461426, "learning_rate": 9.432118591723286e-07, "loss": 0.1908, "step": 4602 }, { "epoch": 2.8437306979617047, "grad_norm": 4.611224174499512, "learning_rate": 9.431871525633106e-07, "loss": 0.0682, "step": 4604 }, { "epoch": 2.8449660284126006, "grad_norm": 4.6477952003479, "learning_rate": 9.431624459542927e-07, "loss": 0.2552, "step": 4606 }, { "epoch": 2.846201358863496, "grad_norm": 6.2369842529296875, "learning_rate": 9.431377393452749e-07, "loss": 0.2118, "step": 4608 }, { "epoch": 2.8474366893143914, "grad_norm": 9.19079303741455, "learning_rate": 9.431130327362569e-07, "loss": 0.1328, "step": 4610 }, { "epoch": 2.8486720197652873, "grad_norm": 9.063045501708984, "learning_rate": 9.43088326127239e-07, "loss": 0.148, "step": 4612 }, { "epoch": 2.8499073502161827, "grad_norm": 17.72916603088379, "learning_rate": 9.430636195182211e-07, "loss": 0.081, "step": 4614 }, { "epoch": 2.8511426806670785, "grad_norm": 6.651655197143555, "learning_rate": 9.430389129092031e-07, "loss": 0.1323, "step": 4616 }, { "epoch": 2.852378011117974, "grad_norm": 7.1694207191467285, "learning_rate": 9.430142063001853e-07, "loss": 0.1626, "step": 4618 }, { "epoch": 2.85361334156887, "grad_norm": 4.9725260734558105, "learning_rate": 9.429894996911674e-07, "loss": 0.0835, "step": 4620 }, { "epoch": 2.854848672019765, "grad_norm": 5.870615005493164, "learning_rate": 9.429647930821494e-07, "loss": 0.0665, "step": 4622 }, { "epoch": 2.856084002470661, "grad_norm": 8.006712913513184, "learning_rate": 9.429400864731315e-07, "loss": 0.2071, "step": 4624 }, { "epoch": 2.8573193329215565, "grad_norm": 6.5154619216918945, "learning_rate": 9.429153798641136e-07, "loss": 0.054, "step": 4626 }, { "epoch": 2.858554663372452, "grad_norm": 6.450688362121582, "learning_rate": 9.428906732550958e-07, "loss": 0.1541, "step": 4628 }, { "epoch": 2.8597899938233478, "grad_norm": 3.823272943496704, "learning_rate": 9.428659666460778e-07, "loss": 0.1677, "step": 4630 }, { "epoch": 2.8610253242742436, "grad_norm": 3.8116087913513184, "learning_rate": 9.428412600370599e-07, "loss": 0.0873, "step": 4632 }, { "epoch": 2.862260654725139, "grad_norm": 2.2550623416900635, "learning_rate": 9.42816553428042e-07, "loss": 0.2753, "step": 4634 }, { "epoch": 2.8634959851760344, "grad_norm": 7.159323692321777, "learning_rate": 9.427918468190241e-07, "loss": 0.1669, "step": 4636 }, { "epoch": 2.8647313156269303, "grad_norm": 8.139884948730469, "learning_rate": 9.427671402100062e-07, "loss": 0.1193, "step": 4638 }, { "epoch": 2.8659666460778257, "grad_norm": 3.557209014892578, "learning_rate": 9.427424336009883e-07, "loss": 0.0409, "step": 4640 }, { "epoch": 2.8672019765287216, "grad_norm": 4.737778663635254, "learning_rate": 9.427177269919703e-07, "loss": 0.0907, "step": 4642 }, { "epoch": 2.868437306979617, "grad_norm": 3.5263261795043945, "learning_rate": 9.426930203829524e-07, "loss": 0.0456, "step": 4644 }, { "epoch": 2.869672637430513, "grad_norm": 7.8748626708984375, "learning_rate": 9.426683137739346e-07, "loss": 0.2585, "step": 4646 }, { "epoch": 2.8709079678814082, "grad_norm": 4.636354446411133, "learning_rate": 9.426436071649166e-07, "loss": 0.1073, "step": 4648 }, { "epoch": 2.872143298332304, "grad_norm": 3.1065752506256104, "learning_rate": 9.426189005558987e-07, "loss": 0.1785, "step": 4650 }, { "epoch": 2.8733786287831995, "grad_norm": 6.061158180236816, "learning_rate": 9.425941939468808e-07, "loss": 0.0907, "step": 4652 }, { "epoch": 2.874613959234095, "grad_norm": 6.274352550506592, "learning_rate": 9.425694873378627e-07, "loss": 0.4353, "step": 4654 }, { "epoch": 2.8758492896849908, "grad_norm": 5.371829986572266, "learning_rate": 9.42544780728845e-07, "loss": 0.2156, "step": 4656 }, { "epoch": 2.8770846201358866, "grad_norm": 6.115880012512207, "learning_rate": 9.425200741198271e-07, "loss": 0.3904, "step": 4658 }, { "epoch": 2.878319950586782, "grad_norm": 6.846569061279297, "learning_rate": 9.42495367510809e-07, "loss": 0.1013, "step": 4660 }, { "epoch": 2.8795552810376774, "grad_norm": 7.036654949188232, "learning_rate": 9.424706609017911e-07, "loss": 0.1037, "step": 4662 }, { "epoch": 2.8807906114885733, "grad_norm": 5.050328254699707, "learning_rate": 9.424459542927732e-07, "loss": 0.1699, "step": 4664 }, { "epoch": 2.8820259419394687, "grad_norm": 6.9003496170043945, "learning_rate": 9.424212476837554e-07, "loss": 0.3589, "step": 4666 }, { "epoch": 2.8832612723903646, "grad_norm": 5.445608139038086, "learning_rate": 9.423965410747374e-07, "loss": 0.1018, "step": 4668 }, { "epoch": 2.88449660284126, "grad_norm": 11.291731834411621, "learning_rate": 9.423718344657195e-07, "loss": 0.1024, "step": 4670 }, { "epoch": 2.8857319332921554, "grad_norm": 3.3711154460906982, "learning_rate": 9.423471278567016e-07, "loss": 0.0515, "step": 4672 }, { "epoch": 2.8869672637430512, "grad_norm": 1.0841940641403198, "learning_rate": 9.423224212476836e-07, "loss": 0.0428, "step": 4674 }, { "epoch": 2.888202594193947, "grad_norm": 5.055369853973389, "learning_rate": 9.422977146386658e-07, "loss": 0.1497, "step": 4676 }, { "epoch": 2.8894379246448425, "grad_norm": 3.3181488513946533, "learning_rate": 9.422730080296479e-07, "loss": 0.0709, "step": 4678 }, { "epoch": 2.890673255095738, "grad_norm": 5.795508861541748, "learning_rate": 9.422483014206299e-07, "loss": 0.1324, "step": 4680 }, { "epoch": 2.8919085855466338, "grad_norm": 11.782768249511719, "learning_rate": 9.42223594811612e-07, "loss": 0.248, "step": 4682 }, { "epoch": 2.893143915997529, "grad_norm": 9.508358001708984, "learning_rate": 9.421988882025942e-07, "loss": 0.4203, "step": 4684 }, { "epoch": 2.894379246448425, "grad_norm": 6.695348262786865, "learning_rate": 9.421741815935762e-07, "loss": 0.1548, "step": 4686 }, { "epoch": 2.8956145768993204, "grad_norm": 18.771854400634766, "learning_rate": 9.421494749845583e-07, "loss": 0.7319, "step": 4688 }, { "epoch": 2.8968499073502163, "grad_norm": 8.72005844116211, "learning_rate": 9.421247683755404e-07, "loss": 0.2814, "step": 4690 }, { "epoch": 2.8980852378011117, "grad_norm": 5.499704360961914, "learning_rate": 9.421000617665224e-07, "loss": 0.1211, "step": 4692 }, { "epoch": 2.8993205682520076, "grad_norm": 8.422891616821289, "learning_rate": 9.420753551575046e-07, "loss": 0.1118, "step": 4694 }, { "epoch": 2.900555898702903, "grad_norm": 3.7739527225494385, "learning_rate": 9.420506485484867e-07, "loss": 0.2005, "step": 4696 }, { "epoch": 2.9017912291537984, "grad_norm": 5.856757640838623, "learning_rate": 9.420259419394687e-07, "loss": 0.2633, "step": 4698 }, { "epoch": 2.9030265596046942, "grad_norm": 4.202661037445068, "learning_rate": 9.420012353304508e-07, "loss": 0.0623, "step": 4700 }, { "epoch": 2.90426189005559, "grad_norm": 4.857954502105713, "learning_rate": 9.419765287214329e-07, "loss": 0.0943, "step": 4702 }, { "epoch": 2.9054972205064855, "grad_norm": 6.193096160888672, "learning_rate": 9.41951822112415e-07, "loss": 0.162, "step": 4704 }, { "epoch": 2.906732550957381, "grad_norm": 7.238914966583252, "learning_rate": 9.419271155033971e-07, "loss": 0.155, "step": 4706 }, { "epoch": 2.9079678814082768, "grad_norm": 9.002262115478516, "learning_rate": 9.419024088943792e-07, "loss": 0.1932, "step": 4708 }, { "epoch": 2.909203211859172, "grad_norm": 6.94495153427124, "learning_rate": 9.418777022853613e-07, "loss": 0.1613, "step": 4710 }, { "epoch": 2.910438542310068, "grad_norm": 5.7140021324157715, "learning_rate": 9.418529956763433e-07, "loss": 0.2575, "step": 4712 }, { "epoch": 2.9116738727609635, "grad_norm": 4.70513916015625, "learning_rate": 9.418282890673255e-07, "loss": 0.3165, "step": 4714 }, { "epoch": 2.9129092032118593, "grad_norm": 7.488254547119141, "learning_rate": 9.418035824583076e-07, "loss": 0.1554, "step": 4716 }, { "epoch": 2.9141445336627547, "grad_norm": 8.696020126342773, "learning_rate": 9.417788758492896e-07, "loss": 0.3493, "step": 4718 }, { "epoch": 2.9153798641136506, "grad_norm": 12.219847679138184, "learning_rate": 9.417541692402717e-07, "loss": 0.2975, "step": 4720 }, { "epoch": 2.916615194564546, "grad_norm": 9.933042526245117, "learning_rate": 9.417294626312539e-07, "loss": 0.1349, "step": 4722 }, { "epoch": 2.9178505250154414, "grad_norm": 3.8928799629211426, "learning_rate": 9.417047560222359e-07, "loss": 0.1581, "step": 4724 }, { "epoch": 2.9190858554663373, "grad_norm": 7.55739164352417, "learning_rate": 9.41680049413218e-07, "loss": 0.1683, "step": 4726 }, { "epoch": 2.920321185917233, "grad_norm": 2.532310724258423, "learning_rate": 9.416553428042001e-07, "loss": 0.1297, "step": 4728 }, { "epoch": 2.9215565163681285, "grad_norm": 3.5088884830474854, "learning_rate": 9.416306361951821e-07, "loss": 0.1232, "step": 4730 }, { "epoch": 2.922791846819024, "grad_norm": 5.726474761962891, "learning_rate": 9.416059295861643e-07, "loss": 0.0551, "step": 4732 }, { "epoch": 2.92402717726992, "grad_norm": 7.5111188888549805, "learning_rate": 9.415812229771464e-07, "loss": 0.1015, "step": 4734 }, { "epoch": 2.925262507720815, "grad_norm": 4.841724395751953, "learning_rate": 9.415565163681284e-07, "loss": 0.1162, "step": 4736 }, { "epoch": 2.926497838171711, "grad_norm": 11.44068717956543, "learning_rate": 9.415318097591105e-07, "loss": 0.322, "step": 4738 }, { "epoch": 2.9277331686226065, "grad_norm": 7.694859504699707, "learning_rate": 9.415071031500926e-07, "loss": 0.2084, "step": 4740 }, { "epoch": 2.9289684990735023, "grad_norm": 3.6374075412750244, "learning_rate": 9.414823965410747e-07, "loss": 0.0709, "step": 4742 }, { "epoch": 2.9302038295243977, "grad_norm": 7.864887714385986, "learning_rate": 9.414576899320568e-07, "loss": 0.1742, "step": 4744 }, { "epoch": 2.9314391599752936, "grad_norm": 8.344207763671875, "learning_rate": 9.414329833230389e-07, "loss": 0.1281, "step": 4746 }, { "epoch": 2.932674490426189, "grad_norm": 4.526418685913086, "learning_rate": 9.41408276714021e-07, "loss": 0.0807, "step": 4748 }, { "epoch": 2.9339098208770844, "grad_norm": 12.01827335357666, "learning_rate": 9.41383570105003e-07, "loss": 0.1775, "step": 4750 }, { "epoch": 2.9351451513279803, "grad_norm": 9.89918327331543, "learning_rate": 9.413588634959852e-07, "loss": 0.255, "step": 4752 }, { "epoch": 2.936380481778876, "grad_norm": 4.281338214874268, "learning_rate": 9.413341568869673e-07, "loss": 0.1687, "step": 4754 }, { "epoch": 2.9376158122297715, "grad_norm": 6.5143022537231445, "learning_rate": 9.413094502779493e-07, "loss": 0.1014, "step": 4756 }, { "epoch": 2.938851142680667, "grad_norm": 9.714712142944336, "learning_rate": 9.412847436689314e-07, "loss": 0.1825, "step": 4758 }, { "epoch": 2.940086473131563, "grad_norm": 7.310574054718018, "learning_rate": 9.412600370599135e-07, "loss": 0.1951, "step": 4760 }, { "epoch": 2.941321803582458, "grad_norm": 6.779116630554199, "learning_rate": 9.412353304508956e-07, "loss": 0.1745, "step": 4762 }, { "epoch": 2.942557134033354, "grad_norm": 7.492888450622559, "learning_rate": 9.412106238418777e-07, "loss": 0.2666, "step": 4764 }, { "epoch": 2.9437924644842495, "grad_norm": 5.412779808044434, "learning_rate": 9.411859172328598e-07, "loss": 0.1605, "step": 4766 }, { "epoch": 2.945027794935145, "grad_norm": 9.771838188171387, "learning_rate": 9.411612106238418e-07, "loss": 0.2988, "step": 4768 }, { "epoch": 2.9462631253860407, "grad_norm": 4.988750457763672, "learning_rate": 9.41136504014824e-07, "loss": 0.3331, "step": 4770 }, { "epoch": 2.9474984558369366, "grad_norm": 7.7679853439331055, "learning_rate": 9.411117974058061e-07, "loss": 0.1079, "step": 4772 }, { "epoch": 2.948733786287832, "grad_norm": 11.181714057922363, "learning_rate": 9.410870907967881e-07, "loss": 0.1383, "step": 4774 }, { "epoch": 2.9499691167387274, "grad_norm": 1.5921131372451782, "learning_rate": 9.410623841877702e-07, "loss": 0.053, "step": 4776 }, { "epoch": 2.9512044471896233, "grad_norm": 6.9040632247924805, "learning_rate": 9.410376775787523e-07, "loss": 0.1594, "step": 4778 }, { "epoch": 2.9524397776405187, "grad_norm": 5.568849086761475, "learning_rate": 9.410129709697344e-07, "loss": 0.4053, "step": 4780 }, { "epoch": 2.9536751080914145, "grad_norm": 8.846184730529785, "learning_rate": 9.409882643607165e-07, "loss": 0.2839, "step": 4782 }, { "epoch": 2.95491043854231, "grad_norm": 13.04240894317627, "learning_rate": 9.409635577516986e-07, "loss": 0.356, "step": 4784 }, { "epoch": 2.956145768993206, "grad_norm": 13.558549880981445, "learning_rate": 9.409388511426807e-07, "loss": 0.2928, "step": 4786 }, { "epoch": 2.957381099444101, "grad_norm": 5.19019889831543, "learning_rate": 9.409141445336626e-07, "loss": 0.2308, "step": 4788 }, { "epoch": 2.958616429894997, "grad_norm": 17.705656051635742, "learning_rate": 9.408894379246449e-07, "loss": 0.3107, "step": 4790 }, { "epoch": 2.9598517603458925, "grad_norm": 10.255051612854004, "learning_rate": 9.40864731315627e-07, "loss": 0.2068, "step": 4792 }, { "epoch": 2.961087090796788, "grad_norm": 8.141858100891113, "learning_rate": 9.40840024706609e-07, "loss": 0.2267, "step": 4794 }, { "epoch": 2.9623224212476837, "grad_norm": 7.883449554443359, "learning_rate": 9.40815318097591e-07, "loss": 0.2217, "step": 4796 }, { "epoch": 2.9635577516985796, "grad_norm": 9.38566780090332, "learning_rate": 9.407906114885731e-07, "loss": 0.1954, "step": 4798 }, { "epoch": 2.964793082149475, "grad_norm": 9.011748313903809, "learning_rate": 9.407659048795552e-07, "loss": 0.2083, "step": 4800 }, { "epoch": 2.9660284126003704, "grad_norm": 2.864114761352539, "learning_rate": 9.407411982705373e-07, "loss": 0.1142, "step": 4802 }, { "epoch": 2.9672637430512663, "grad_norm": 8.171381950378418, "learning_rate": 9.407164916615194e-07, "loss": 0.2177, "step": 4804 }, { "epoch": 2.9684990735021617, "grad_norm": 6.503474712371826, "learning_rate": 9.406917850525014e-07, "loss": 0.1811, "step": 4806 }, { "epoch": 2.9697344039530575, "grad_norm": 7.49078893661499, "learning_rate": 9.406670784434835e-07, "loss": 0.1455, "step": 4808 }, { "epoch": 2.970969734403953, "grad_norm": 3.1233134269714355, "learning_rate": 9.406423718344657e-07, "loss": 0.0836, "step": 4810 }, { "epoch": 2.972205064854849, "grad_norm": 3.2372078895568848, "learning_rate": 9.406176652254477e-07, "loss": 0.0717, "step": 4812 }, { "epoch": 2.973440395305744, "grad_norm": 3.843714475631714, "learning_rate": 9.405929586164298e-07, "loss": 0.0581, "step": 4814 }, { "epoch": 2.97467572575664, "grad_norm": 8.635309219360352, "learning_rate": 9.405682520074119e-07, "loss": 0.1956, "step": 4816 }, { "epoch": 2.9759110562075355, "grad_norm": 8.747565269470215, "learning_rate": 9.40543545398394e-07, "loss": 0.2117, "step": 4818 }, { "epoch": 2.977146386658431, "grad_norm": 6.8150506019592285, "learning_rate": 9.405188387893761e-07, "loss": 0.0892, "step": 4820 }, { "epoch": 2.9783817171093268, "grad_norm": 2.213101387023926, "learning_rate": 9.404941321803582e-07, "loss": 0.0734, "step": 4822 }, { "epoch": 2.9796170475602226, "grad_norm": 5.898355484008789, "learning_rate": 9.404694255713402e-07, "loss": 0.1251, "step": 4824 }, { "epoch": 2.980852378011118, "grad_norm": 3.8652265071868896, "learning_rate": 9.404447189623223e-07, "loss": 0.1033, "step": 4826 }, { "epoch": 2.9820877084620134, "grad_norm": 17.43421745300293, "learning_rate": 9.404200123533045e-07, "loss": 0.266, "step": 4828 }, { "epoch": 2.9833230389129093, "grad_norm": 4.537426471710205, "learning_rate": 9.403953057442866e-07, "loss": 0.0759, "step": 4830 }, { "epoch": 2.9845583693638047, "grad_norm": 9.206770896911621, "learning_rate": 9.403705991352686e-07, "loss": 0.2403, "step": 4832 }, { "epoch": 2.9857936998147006, "grad_norm": 6.148711204528809, "learning_rate": 9.403458925262507e-07, "loss": 0.2322, "step": 4834 }, { "epoch": 2.987029030265596, "grad_norm": 8.173531532287598, "learning_rate": 9.403211859172328e-07, "loss": 0.2636, "step": 4836 }, { "epoch": 2.988264360716492, "grad_norm": 12.05524730682373, "learning_rate": 9.402964793082149e-07, "loss": 0.2594, "step": 4838 }, { "epoch": 2.9894996911673872, "grad_norm": 3.60595965385437, "learning_rate": 9.40271772699197e-07, "loss": 0.1836, "step": 4840 }, { "epoch": 2.990735021618283, "grad_norm": 3.116018295288086, "learning_rate": 9.402470660901791e-07, "loss": 0.1255, "step": 4842 }, { "epoch": 2.9919703520691785, "grad_norm": 7.912368297576904, "learning_rate": 9.402223594811611e-07, "loss": 0.2825, "step": 4844 }, { "epoch": 2.993205682520074, "grad_norm": 7.833459854125977, "learning_rate": 9.401976528721432e-07, "loss": 0.2506, "step": 4846 }, { "epoch": 2.9944410129709698, "grad_norm": 8.845340728759766, "learning_rate": 9.401729462631254e-07, "loss": 0.1904, "step": 4848 }, { "epoch": 2.9956763434218656, "grad_norm": 6.138392925262451, "learning_rate": 9.401482396541074e-07, "loss": 0.1394, "step": 4850 }, { "epoch": 2.996911673872761, "grad_norm": 6.587239742279053, "learning_rate": 9.401235330450895e-07, "loss": 0.1326, "step": 4852 }, { "epoch": 2.9981470043236564, "grad_norm": 3.9278266429901123, "learning_rate": 9.400988264360716e-07, "loss": 0.0452, "step": 4854 }, { "epoch": 2.9993823347745523, "grad_norm": 5.618165969848633, "learning_rate": 9.400741198270536e-07, "loss": 0.1613, "step": 4856 }, { "epoch": 3.0006176652254477, "grad_norm": 2.974703550338745, "learning_rate": 9.400494132180358e-07, "loss": 0.0313, "step": 4858 }, { "epoch": 3.0018529956763436, "grad_norm": 3.344507932662964, "learning_rate": 9.400247066090179e-07, "loss": 0.0404, "step": 4860 }, { "epoch": 3.003088326127239, "grad_norm": 5.732065677642822, "learning_rate": 9.399999999999999e-07, "loss": 0.1563, "step": 4862 }, { "epoch": 3.004323656578135, "grad_norm": 8.236889839172363, "learning_rate": 9.39975293390982e-07, "loss": 0.1427, "step": 4864 }, { "epoch": 3.0055589870290302, "grad_norm": 8.42541790008545, "learning_rate": 9.399505867819642e-07, "loss": 0.1636, "step": 4866 }, { "epoch": 3.006794317479926, "grad_norm": 1.5292948484420776, "learning_rate": 9.399258801729463e-07, "loss": 0.0391, "step": 4868 }, { "epoch": 3.0080296479308215, "grad_norm": 4.3554229736328125, "learning_rate": 9.399011735639283e-07, "loss": 0.128, "step": 4870 }, { "epoch": 3.009264978381717, "grad_norm": 6.711137771606445, "learning_rate": 9.398764669549104e-07, "loss": 0.3013, "step": 4872 }, { "epoch": 3.0105003088326128, "grad_norm": 6.04845666885376, "learning_rate": 9.398517603458925e-07, "loss": 0.0996, "step": 4874 }, { "epoch": 3.011735639283508, "grad_norm": 8.530277252197266, "learning_rate": 9.398270537368746e-07, "loss": 0.1477, "step": 4876 }, { "epoch": 3.012970969734404, "grad_norm": 5.449336528778076, "learning_rate": 9.398023471278567e-07, "loss": 0.3173, "step": 4878 }, { "epoch": 3.0142063001852994, "grad_norm": 9.063302993774414, "learning_rate": 9.397776405188388e-07, "loss": 0.2955, "step": 4880 }, { "epoch": 3.0154416306361953, "grad_norm": 11.091453552246094, "learning_rate": 9.397529339098208e-07, "loss": 0.1913, "step": 4882 }, { "epoch": 3.0166769610870907, "grad_norm": 4.598958492279053, "learning_rate": 9.397282273008029e-07, "loss": 0.1222, "step": 4884 }, { "epoch": 3.0179122915379866, "grad_norm": 6.823906421661377, "learning_rate": 9.397035206917851e-07, "loss": 0.1787, "step": 4886 }, { "epoch": 3.019147621988882, "grad_norm": 2.0138702392578125, "learning_rate": 9.396788140827671e-07, "loss": 0.0695, "step": 4888 }, { "epoch": 3.020382952439778, "grad_norm": 4.0586981773376465, "learning_rate": 9.396541074737492e-07, "loss": 0.1377, "step": 4890 }, { "epoch": 3.0216182828906732, "grad_norm": 6.983353614807129, "learning_rate": 9.396294008647313e-07, "loss": 0.2145, "step": 4892 }, { "epoch": 3.0228536133415687, "grad_norm": 5.035432815551758, "learning_rate": 9.396046942557133e-07, "loss": 0.1263, "step": 4894 }, { "epoch": 3.0240889437924645, "grad_norm": 4.232234954833984, "learning_rate": 9.395799876466955e-07, "loss": 0.0521, "step": 4896 }, { "epoch": 3.02532427424336, "grad_norm": 1.6722995042800903, "learning_rate": 9.395552810376776e-07, "loss": 0.2089, "step": 4898 }, { "epoch": 3.026559604694256, "grad_norm": 10.495844841003418, "learning_rate": 9.395305744286596e-07, "loss": 0.4365, "step": 4900 }, { "epoch": 3.027794935145151, "grad_norm": 4.662744045257568, "learning_rate": 9.395058678196417e-07, "loss": 0.0474, "step": 4902 }, { "epoch": 3.029030265596047, "grad_norm": 13.561700820922852, "learning_rate": 9.394811612106239e-07, "loss": 0.4381, "step": 4904 }, { "epoch": 3.0302655960469425, "grad_norm": 4.759292125701904, "learning_rate": 9.39456454601606e-07, "loss": 0.1223, "step": 4906 }, { "epoch": 3.0315009264978383, "grad_norm": 6.975395679473877, "learning_rate": 9.39431747992588e-07, "loss": 0.1219, "step": 4908 }, { "epoch": 3.0327362569487337, "grad_norm": 4.798420429229736, "learning_rate": 9.394070413835701e-07, "loss": 0.1389, "step": 4910 }, { "epoch": 3.0339715873996296, "grad_norm": 8.945579528808594, "learning_rate": 9.393823347745522e-07, "loss": 0.1642, "step": 4912 }, { "epoch": 3.035206917850525, "grad_norm": 6.467700004577637, "learning_rate": 9.393576281655343e-07, "loss": 0.1353, "step": 4914 }, { "epoch": 3.036442248301421, "grad_norm": 5.861745834350586, "learning_rate": 9.393329215565164e-07, "loss": 0.1231, "step": 4916 }, { "epoch": 3.0376775787523163, "grad_norm": 10.847447395324707, "learning_rate": 9.393082149474985e-07, "loss": 0.2468, "step": 4918 }, { "epoch": 3.0389129092032117, "grad_norm": 8.220146179199219, "learning_rate": 9.392835083384805e-07, "loss": 0.2123, "step": 4920 }, { "epoch": 3.0401482396541075, "grad_norm": 5.352616310119629, "learning_rate": 9.392588017294626e-07, "loss": 0.0843, "step": 4922 }, { "epoch": 3.041383570105003, "grad_norm": 3.489342451095581, "learning_rate": 9.392340951204448e-07, "loss": 0.0308, "step": 4924 }, { "epoch": 3.042618900555899, "grad_norm": 2.2542848587036133, "learning_rate": 9.392093885114267e-07, "loss": 0.075, "step": 4926 }, { "epoch": 3.043854231006794, "grad_norm": 5.173557281494141, "learning_rate": 9.391846819024088e-07, "loss": 0.1326, "step": 4928 }, { "epoch": 3.04508956145769, "grad_norm": 1.8530043363571167, "learning_rate": 9.391599752933909e-07, "loss": 0.0765, "step": 4930 }, { "epoch": 3.0463248919085855, "grad_norm": 6.3807573318481445, "learning_rate": 9.391352686843729e-07, "loss": 0.1613, "step": 4932 }, { "epoch": 3.0475602223594813, "grad_norm": 1.8877235651016235, "learning_rate": 9.391105620753551e-07, "loss": 0.119, "step": 4934 }, { "epoch": 3.0487955528103767, "grad_norm": 3.6588237285614014, "learning_rate": 9.390858554663372e-07, "loss": 0.2059, "step": 4936 }, { "epoch": 3.0500308832612726, "grad_norm": 11.838861465454102, "learning_rate": 9.390611488573192e-07, "loss": 0.27, "step": 4938 }, { "epoch": 3.051266213712168, "grad_norm": 7.518467903137207, "learning_rate": 9.390364422483013e-07, "loss": 0.16, "step": 4940 }, { "epoch": 3.0525015441630634, "grad_norm": 3.652761459350586, "learning_rate": 9.390117356392834e-07, "loss": 0.1241, "step": 4942 }, { "epoch": 3.0537368746139593, "grad_norm": 9.425799369812012, "learning_rate": 9.389870290302655e-07, "loss": 0.1805, "step": 4944 }, { "epoch": 3.0549722050648547, "grad_norm": 6.616809368133545, "learning_rate": 9.389623224212476e-07, "loss": 0.2667, "step": 4946 }, { "epoch": 3.0562075355157505, "grad_norm": 5.414646625518799, "learning_rate": 9.389376158122297e-07, "loss": 0.115, "step": 4948 }, { "epoch": 3.057442865966646, "grad_norm": 6.486731052398682, "learning_rate": 9.389129092032118e-07, "loss": 0.1983, "step": 4950 }, { "epoch": 3.058678196417542, "grad_norm": 2.2418689727783203, "learning_rate": 9.388882025941939e-07, "loss": 0.0243, "step": 4952 }, { "epoch": 3.059913526868437, "grad_norm": 4.2606892585754395, "learning_rate": 9.38863495985176e-07, "loss": 0.0939, "step": 4954 }, { "epoch": 3.061148857319333, "grad_norm": 4.463229656219482, "learning_rate": 9.388387893761581e-07, "loss": 0.0807, "step": 4956 }, { "epoch": 3.0623841877702285, "grad_norm": 6.285942077636719, "learning_rate": 9.388140827671401e-07, "loss": 0.1022, "step": 4958 }, { "epoch": 3.0636195182211243, "grad_norm": 7.594030380249023, "learning_rate": 9.387893761581222e-07, "loss": 0.1742, "step": 4960 }, { "epoch": 3.0648548486720197, "grad_norm": 5.630216121673584, "learning_rate": 9.387646695491044e-07, "loss": 0.0657, "step": 4962 }, { "epoch": 3.0660901791229156, "grad_norm": 8.772477149963379, "learning_rate": 9.387399629400864e-07, "loss": 0.1279, "step": 4964 }, { "epoch": 3.067325509573811, "grad_norm": 6.1805925369262695, "learning_rate": 9.387152563310685e-07, "loss": 0.0365, "step": 4966 }, { "epoch": 3.0685608400247064, "grad_norm": 2.5731101036071777, "learning_rate": 9.386905497220506e-07, "loss": 0.2038, "step": 4968 }, { "epoch": 3.0697961704756023, "grad_norm": 4.19912052154541, "learning_rate": 9.386658431130326e-07, "loss": 0.0617, "step": 4970 }, { "epoch": 3.0710315009264977, "grad_norm": 4.764763832092285, "learning_rate": 9.386411365040148e-07, "loss": 0.185, "step": 4972 }, { "epoch": 3.0722668313773935, "grad_norm": 4.960380554199219, "learning_rate": 9.386164298949969e-07, "loss": 0.071, "step": 4974 }, { "epoch": 3.073502161828289, "grad_norm": 5.251114845275879, "learning_rate": 9.385917232859789e-07, "loss": 0.0671, "step": 4976 }, { "epoch": 3.074737492279185, "grad_norm": 1.8727303743362427, "learning_rate": 9.38567016676961e-07, "loss": 0.023, "step": 4978 }, { "epoch": 3.07597282273008, "grad_norm": 2.836474895477295, "learning_rate": 9.385423100679431e-07, "loss": 0.2113, "step": 4980 }, { "epoch": 3.077208153180976, "grad_norm": 9.153929710388184, "learning_rate": 9.385176034589252e-07, "loss": 0.1452, "step": 4982 }, { "epoch": 3.0784434836318715, "grad_norm": 12.045419692993164, "learning_rate": 9.384928968499073e-07, "loss": 0.3832, "step": 4984 }, { "epoch": 3.0796788140827673, "grad_norm": 7.369360446929932, "learning_rate": 9.384681902408894e-07, "loss": 0.2246, "step": 4986 }, { "epoch": 3.0809141445336627, "grad_norm": 1.251805067062378, "learning_rate": 9.384434836318715e-07, "loss": 0.194, "step": 4988 }, { "epoch": 3.082149474984558, "grad_norm": 3.58553409576416, "learning_rate": 9.384187770228535e-07, "loss": 0.0447, "step": 4990 }, { "epoch": 3.083384805435454, "grad_norm": 11.710257530212402, "learning_rate": 9.383940704138357e-07, "loss": 0.2454, "step": 4992 }, { "epoch": 3.0846201358863494, "grad_norm": 7.897751331329346, "learning_rate": 9.383693638048178e-07, "loss": 0.1775, "step": 4994 }, { "epoch": 3.0858554663372453, "grad_norm": 3.584780216217041, "learning_rate": 9.383446571957998e-07, "loss": 0.0285, "step": 4996 }, { "epoch": 3.0870907967881407, "grad_norm": 15.294472694396973, "learning_rate": 9.383199505867819e-07, "loss": 0.779, "step": 4998 }, { "epoch": 3.0883261272390365, "grad_norm": 4.985859394073486, "learning_rate": 9.382952439777641e-07, "loss": 0.0976, "step": 5000 }, { "epoch": 3.0883261272390365, "eval_cer": 0.052343695908052344, "eval_loss": 0.23960939049720764, "eval_runtime": 64.1485, "eval_samples_per_second": 12.798, "eval_steps_per_second": 1.606, "step": 5000 }, { "epoch": 3.089561457689932, "grad_norm": 6.250057697296143, "learning_rate": 9.382705373687461e-07, "loss": 0.1509, "step": 5002 }, { "epoch": 3.090796788140828, "grad_norm": 5.445752143859863, "learning_rate": 9.382458307597282e-07, "loss": 0.1051, "step": 5004 }, { "epoch": 3.0920321185917232, "grad_norm": 3.33603835105896, "learning_rate": 9.382211241507103e-07, "loss": 0.0973, "step": 5006 }, { "epoch": 3.093267449042619, "grad_norm": 3.6812942028045654, "learning_rate": 9.381964175416923e-07, "loss": 0.0731, "step": 5008 }, { "epoch": 3.0945027794935145, "grad_norm": 3.2153143882751465, "learning_rate": 9.381717109326745e-07, "loss": 0.0375, "step": 5010 }, { "epoch": 3.0957381099444103, "grad_norm": 3.1247682571411133, "learning_rate": 9.381470043236566e-07, "loss": 0.0769, "step": 5012 }, { "epoch": 3.0969734403953058, "grad_norm": 5.071110248565674, "learning_rate": 9.381222977146386e-07, "loss": 0.0995, "step": 5014 }, { "epoch": 3.098208770846201, "grad_norm": 4.2940239906311035, "learning_rate": 9.380975911056207e-07, "loss": 0.1308, "step": 5016 }, { "epoch": 3.099444101297097, "grad_norm": 6.893390655517578, "learning_rate": 9.380728844966028e-07, "loss": 0.1801, "step": 5018 }, { "epoch": 3.1006794317479924, "grad_norm": 5.632668972015381, "learning_rate": 9.380481778875849e-07, "loss": 0.4014, "step": 5020 }, { "epoch": 3.1019147621988883, "grad_norm": 7.3163743019104, "learning_rate": 9.38023471278567e-07, "loss": 0.1713, "step": 5022 }, { "epoch": 3.1031500926497837, "grad_norm": 8.36023235321045, "learning_rate": 9.379987646695491e-07, "loss": 0.1914, "step": 5024 }, { "epoch": 3.1043854231006796, "grad_norm": 5.890882968902588, "learning_rate": 9.379740580605312e-07, "loss": 0.0772, "step": 5026 }, { "epoch": 3.105620753551575, "grad_norm": 4.130723476409912, "learning_rate": 9.379493514515132e-07, "loss": 0.0309, "step": 5028 }, { "epoch": 3.106856084002471, "grad_norm": 7.793521881103516, "learning_rate": 9.379246448424954e-07, "loss": 0.2614, "step": 5030 }, { "epoch": 3.1080914144533662, "grad_norm": 6.221404075622559, "learning_rate": 9.378999382334775e-07, "loss": 0.1148, "step": 5032 }, { "epoch": 3.109326744904262, "grad_norm": 4.117484092712402, "learning_rate": 9.378752316244595e-07, "loss": 0.0761, "step": 5034 }, { "epoch": 3.1105620753551575, "grad_norm": 3.474668025970459, "learning_rate": 9.378505250154416e-07, "loss": 0.1634, "step": 5036 }, { "epoch": 3.111797405806053, "grad_norm": 6.243168354034424, "learning_rate": 9.378258184064238e-07, "loss": 0.1646, "step": 5038 }, { "epoch": 3.1130327362569488, "grad_norm": 1.9761930704116821, "learning_rate": 9.378011117974058e-07, "loss": 0.03, "step": 5040 }, { "epoch": 3.114268066707844, "grad_norm": 8.713094711303711, "learning_rate": 9.377764051883879e-07, "loss": 0.1571, "step": 5042 }, { "epoch": 3.11550339715874, "grad_norm": 2.9503822326660156, "learning_rate": 9.3775169857937e-07, "loss": 0.1528, "step": 5044 }, { "epoch": 3.1167387276096354, "grad_norm": 2.302454948425293, "learning_rate": 9.37726991970352e-07, "loss": 0.0191, "step": 5046 }, { "epoch": 3.1179740580605313, "grad_norm": 4.280328273773193, "learning_rate": 9.377022853613342e-07, "loss": 0.1496, "step": 5048 }, { "epoch": 3.1192093885114267, "grad_norm": 7.770121097564697, "learning_rate": 9.376775787523163e-07, "loss": 0.0706, "step": 5050 }, { "epoch": 3.1204447189623226, "grad_norm": 2.713052272796631, "learning_rate": 9.376528721432983e-07, "loss": 0.0385, "step": 5052 }, { "epoch": 3.121680049413218, "grad_norm": 8.92708683013916, "learning_rate": 9.376281655342804e-07, "loss": 0.2374, "step": 5054 }, { "epoch": 3.122915379864114, "grad_norm": 4.704719543457031, "learning_rate": 9.376034589252625e-07, "loss": 0.1042, "step": 5056 }, { "epoch": 3.1241507103150092, "grad_norm": 2.959904432296753, "learning_rate": 9.375787523162445e-07, "loss": 0.2088, "step": 5058 }, { "epoch": 3.125386040765905, "grad_norm": 4.03812837600708, "learning_rate": 9.375540457072266e-07, "loss": 0.0562, "step": 5060 }, { "epoch": 3.1266213712168005, "grad_norm": 6.542498588562012, "learning_rate": 9.375293390982087e-07, "loss": 0.1511, "step": 5062 }, { "epoch": 3.127856701667696, "grad_norm": 7.222618579864502, "learning_rate": 9.375046324891907e-07, "loss": 0.2077, "step": 5064 }, { "epoch": 3.1290920321185918, "grad_norm": 4.490553379058838, "learning_rate": 9.374799258801728e-07, "loss": 0.0492, "step": 5066 }, { "epoch": 3.130327362569487, "grad_norm": 4.638528823852539, "learning_rate": 9.37455219271155e-07, "loss": 0.0878, "step": 5068 }, { "epoch": 3.131562693020383, "grad_norm": 4.349446773529053, "learning_rate": 9.374305126621371e-07, "loss": 0.2401, "step": 5070 }, { "epoch": 3.1327980234712784, "grad_norm": 8.762704849243164, "learning_rate": 9.374058060531191e-07, "loss": 0.1751, "step": 5072 }, { "epoch": 3.1340333539221743, "grad_norm": 5.47465705871582, "learning_rate": 9.373810994441012e-07, "loss": 0.1047, "step": 5074 }, { "epoch": 3.1352686843730697, "grad_norm": 6.834657192230225, "learning_rate": 9.373563928350833e-07, "loss": 0.1068, "step": 5076 }, { "epoch": 3.1365040148239656, "grad_norm": 5.099918842315674, "learning_rate": 9.373316862260654e-07, "loss": 0.0545, "step": 5078 }, { "epoch": 3.137739345274861, "grad_norm": 5.6116743087768555, "learning_rate": 9.373069796170475e-07, "loss": 0.1311, "step": 5080 }, { "epoch": 3.138974675725757, "grad_norm": 3.3837573528289795, "learning_rate": 9.372822730080296e-07, "loss": 0.0558, "step": 5082 }, { "epoch": 3.1402100061766522, "grad_norm": 4.932106018066406, "learning_rate": 9.372575663990116e-07, "loss": 0.0726, "step": 5084 }, { "epoch": 3.141445336627548, "grad_norm": 3.5555427074432373, "learning_rate": 9.372328597899938e-07, "loss": 0.0763, "step": 5086 }, { "epoch": 3.1426806670784435, "grad_norm": 8.485612869262695, "learning_rate": 9.372081531809759e-07, "loss": 0.0604, "step": 5088 }, { "epoch": 3.143915997529339, "grad_norm": 5.796864032745361, "learning_rate": 9.371834465719579e-07, "loss": 0.1201, "step": 5090 }, { "epoch": 3.145151327980235, "grad_norm": 0.9834946990013123, "learning_rate": 9.3715873996294e-07, "loss": 0.066, "step": 5092 }, { "epoch": 3.14638665843113, "grad_norm": 4.153322696685791, "learning_rate": 9.371340333539221e-07, "loss": 0.4348, "step": 5094 }, { "epoch": 3.147621988882026, "grad_norm": 9.126821517944336, "learning_rate": 9.371093267449042e-07, "loss": 0.2857, "step": 5096 }, { "epoch": 3.1488573193329215, "grad_norm": 3.6629090309143066, "learning_rate": 9.370846201358863e-07, "loss": 0.0932, "step": 5098 }, { "epoch": 3.1500926497838173, "grad_norm": 7.227264881134033, "learning_rate": 9.370599135268684e-07, "loss": 0.1721, "step": 5100 }, { "epoch": 3.1513279802347127, "grad_norm": 7.083561420440674, "learning_rate": 9.370352069178504e-07, "loss": 0.2423, "step": 5102 }, { "epoch": 3.1525633106856086, "grad_norm": 5.765565395355225, "learning_rate": 9.370105003088325e-07, "loss": 0.1642, "step": 5104 }, { "epoch": 3.153798641136504, "grad_norm": 6.774807453155518, "learning_rate": 9.369857936998147e-07, "loss": 0.1167, "step": 5106 }, { "epoch": 3.1550339715873994, "grad_norm": 8.976481437683105, "learning_rate": 9.369610870907968e-07, "loss": 0.1708, "step": 5108 }, { "epoch": 3.1562693020382953, "grad_norm": 10.732537269592285, "learning_rate": 9.369363804817788e-07, "loss": 0.3431, "step": 5110 }, { "epoch": 3.1575046324891907, "grad_norm": 4.824680805206299, "learning_rate": 9.369116738727609e-07, "loss": 0.0912, "step": 5112 }, { "epoch": 3.1587399629400865, "grad_norm": 2.119701623916626, "learning_rate": 9.36886967263743e-07, "loss": 0.0245, "step": 5114 }, { "epoch": 3.159975293390982, "grad_norm": 9.499276161193848, "learning_rate": 9.368622606547251e-07, "loss": 0.2079, "step": 5116 }, { "epoch": 3.161210623841878, "grad_norm": 9.629617691040039, "learning_rate": 9.368375540457072e-07, "loss": 0.1552, "step": 5118 }, { "epoch": 3.162445954292773, "grad_norm": 6.019927501678467, "learning_rate": 9.368128474366893e-07, "loss": 0.2016, "step": 5120 }, { "epoch": 3.163681284743669, "grad_norm": 8.003661155700684, "learning_rate": 9.367881408276713e-07, "loss": 0.1529, "step": 5122 }, { "epoch": 3.1649166151945645, "grad_norm": 5.456172943115234, "learning_rate": 9.367634342186534e-07, "loss": 0.0597, "step": 5124 }, { "epoch": 3.1661519456454603, "grad_norm": 11.08430290222168, "learning_rate": 9.367387276096356e-07, "loss": 0.3265, "step": 5126 }, { "epoch": 3.1673872760963557, "grad_norm": 3.581467866897583, "learning_rate": 9.367140210006176e-07, "loss": 0.0953, "step": 5128 }, { "epoch": 3.1686226065472516, "grad_norm": 0.9934538006782532, "learning_rate": 9.366893143915997e-07, "loss": 0.0669, "step": 5130 }, { "epoch": 3.169857936998147, "grad_norm": 3.5304269790649414, "learning_rate": 9.366646077825818e-07, "loss": 0.1415, "step": 5132 }, { "epoch": 3.1710932674490424, "grad_norm": 8.354665756225586, "learning_rate": 9.366399011735638e-07, "loss": 0.2294, "step": 5134 }, { "epoch": 3.1723285978999383, "grad_norm": 6.39453649520874, "learning_rate": 9.36615194564546e-07, "loss": 0.4189, "step": 5136 }, { "epoch": 3.1735639283508337, "grad_norm": 5.42775297164917, "learning_rate": 9.365904879555281e-07, "loss": 0.0727, "step": 5138 }, { "epoch": 3.1747992588017295, "grad_norm": 3.710305690765381, "learning_rate": 9.365657813465101e-07, "loss": 0.1322, "step": 5140 }, { "epoch": 3.176034589252625, "grad_norm": 10.304737091064453, "learning_rate": 9.365410747374922e-07, "loss": 0.1833, "step": 5142 }, { "epoch": 3.177269919703521, "grad_norm": 6.0204033851623535, "learning_rate": 9.365163681284744e-07, "loss": 0.1167, "step": 5144 }, { "epoch": 3.178505250154416, "grad_norm": 5.794194221496582, "learning_rate": 9.364916615194565e-07, "loss": 0.1159, "step": 5146 }, { "epoch": 3.179740580605312, "grad_norm": 3.1719870567321777, "learning_rate": 9.364669549104385e-07, "loss": 0.0354, "step": 5148 }, { "epoch": 3.1809759110562075, "grad_norm": 6.749721527099609, "learning_rate": 9.364422483014206e-07, "loss": 0.1047, "step": 5150 }, { "epoch": 3.1822112415071033, "grad_norm": 1.857270359992981, "learning_rate": 9.364175416924027e-07, "loss": 0.0131, "step": 5152 }, { "epoch": 3.1834465719579987, "grad_norm": 6.522891998291016, "learning_rate": 9.363928350833848e-07, "loss": 0.1654, "step": 5154 }, { "epoch": 3.1846819024088946, "grad_norm": 3.6759800910949707, "learning_rate": 9.363681284743669e-07, "loss": 0.0172, "step": 5156 }, { "epoch": 3.18591723285979, "grad_norm": 2.481370210647583, "learning_rate": 9.36343421865349e-07, "loss": 0.0463, "step": 5158 }, { "epoch": 3.1871525633106854, "grad_norm": 8.016493797302246, "learning_rate": 9.36318715256331e-07, "loss": 0.1418, "step": 5160 }, { "epoch": 3.1883878937615813, "grad_norm": 7.996901512145996, "learning_rate": 9.362940086473131e-07, "loss": 0.2233, "step": 5162 }, { "epoch": 3.1896232242124767, "grad_norm": 6.965433120727539, "learning_rate": 9.362693020382953e-07, "loss": 0.1781, "step": 5164 }, { "epoch": 3.1908585546633725, "grad_norm": 4.529210090637207, "learning_rate": 9.362445954292773e-07, "loss": 0.1601, "step": 5166 }, { "epoch": 3.192093885114268, "grad_norm": 11.969141960144043, "learning_rate": 9.362198888202594e-07, "loss": 0.0975, "step": 5168 }, { "epoch": 3.193329215565164, "grad_norm": 4.5952863693237305, "learning_rate": 9.361951822112415e-07, "loss": 0.0608, "step": 5170 }, { "epoch": 3.194564546016059, "grad_norm": 5.260905742645264, "learning_rate": 9.361704756022235e-07, "loss": 0.0579, "step": 5172 }, { "epoch": 3.195799876466955, "grad_norm": 15.209275245666504, "learning_rate": 9.361457689932057e-07, "loss": 0.5101, "step": 5174 }, { "epoch": 3.1970352069178505, "grad_norm": 3.746169328689575, "learning_rate": 9.361210623841878e-07, "loss": 0.0569, "step": 5176 }, { "epoch": 3.1982705373687463, "grad_norm": 12.544998168945312, "learning_rate": 9.360963557751698e-07, "loss": 0.1767, "step": 5178 }, { "epoch": 3.1995058678196417, "grad_norm": 8.284343719482422, "learning_rate": 9.360716491661519e-07, "loss": 0.2447, "step": 5180 }, { "epoch": 3.2007411982705376, "grad_norm": 6.641915798187256, "learning_rate": 9.360469425571341e-07, "loss": 0.1339, "step": 5182 }, { "epoch": 3.201976528721433, "grad_norm": 2.4025285243988037, "learning_rate": 9.360222359481161e-07, "loss": 0.1325, "step": 5184 }, { "epoch": 3.2032118591723284, "grad_norm": 5.95137882232666, "learning_rate": 9.359975293390982e-07, "loss": 0.0624, "step": 5186 }, { "epoch": 3.2044471896232243, "grad_norm": 3.987536907196045, "learning_rate": 9.359728227300803e-07, "loss": 0.0546, "step": 5188 }, { "epoch": 3.2056825200741197, "grad_norm": 7.575462341308594, "learning_rate": 9.359481161210624e-07, "loss": 0.1175, "step": 5190 }, { "epoch": 3.2069178505250155, "grad_norm": 13.630064010620117, "learning_rate": 9.359234095120445e-07, "loss": 0.2578, "step": 5192 }, { "epoch": 3.208153180975911, "grad_norm": 6.343199253082275, "learning_rate": 9.358987029030265e-07, "loss": 0.1275, "step": 5194 }, { "epoch": 3.209388511426807, "grad_norm": 11.277961730957031, "learning_rate": 9.358739962940086e-07, "loss": 0.26, "step": 5196 }, { "epoch": 3.2106238418777022, "grad_norm": 11.654131889343262, "learning_rate": 9.358492896849906e-07, "loss": 0.4196, "step": 5198 }, { "epoch": 3.211859172328598, "grad_norm": 5.672300338745117, "learning_rate": 9.358245830759727e-07, "loss": 0.1254, "step": 5200 }, { "epoch": 3.2130945027794935, "grad_norm": 4.9428887367248535, "learning_rate": 9.357998764669549e-07, "loss": 0.1216, "step": 5202 }, { "epoch": 3.214329833230389, "grad_norm": 3.6892130374908447, "learning_rate": 9.357751698579369e-07, "loss": 0.0451, "step": 5204 }, { "epoch": 3.2155651636812848, "grad_norm": 4.747214317321777, "learning_rate": 9.35750463248919e-07, "loss": 0.0523, "step": 5206 }, { "epoch": 3.21680049413218, "grad_norm": 7.185458183288574, "learning_rate": 9.357257566399011e-07, "loss": 0.1115, "step": 5208 }, { "epoch": 3.218035824583076, "grad_norm": 6.3951191902160645, "learning_rate": 9.357010500308831e-07, "loss": 0.0912, "step": 5210 }, { "epoch": 3.2192711550339714, "grad_norm": 8.423969268798828, "learning_rate": 9.356763434218653e-07, "loss": 0.0645, "step": 5212 }, { "epoch": 3.2205064854848673, "grad_norm": 9.268211364746094, "learning_rate": 9.356516368128474e-07, "loss": 0.1842, "step": 5214 }, { "epoch": 3.2217418159357627, "grad_norm": 4.73678731918335, "learning_rate": 9.356269302038294e-07, "loss": 0.1679, "step": 5216 }, { "epoch": 3.2229771463866586, "grad_norm": 7.762099742889404, "learning_rate": 9.356022235948115e-07, "loss": 0.1704, "step": 5218 }, { "epoch": 3.224212476837554, "grad_norm": 3.9446613788604736, "learning_rate": 9.355775169857937e-07, "loss": 0.0684, "step": 5220 }, { "epoch": 3.22544780728845, "grad_norm": 4.314286231994629, "learning_rate": 9.355528103767757e-07, "loss": 0.0852, "step": 5222 }, { "epoch": 3.2266831377393452, "grad_norm": 5.9341535568237305, "learning_rate": 9.355281037677578e-07, "loss": 0.0808, "step": 5224 }, { "epoch": 3.227918468190241, "grad_norm": 11.0156888961792, "learning_rate": 9.355033971587399e-07, "loss": 0.1926, "step": 5226 }, { "epoch": 3.2291537986411365, "grad_norm": 6.242677688598633, "learning_rate": 9.35478690549722e-07, "loss": 0.1266, "step": 5228 }, { "epoch": 3.230389129092032, "grad_norm": 5.67989444732666, "learning_rate": 9.354539839407041e-07, "loss": 0.1669, "step": 5230 }, { "epoch": 3.2316244595429278, "grad_norm": 7.176552772521973, "learning_rate": 9.354292773316862e-07, "loss": 0.1083, "step": 5232 }, { "epoch": 3.232859789993823, "grad_norm": 4.326044082641602, "learning_rate": 9.354045707226683e-07, "loss": 0.2111, "step": 5234 }, { "epoch": 3.234095120444719, "grad_norm": 3.059438705444336, "learning_rate": 9.353798641136503e-07, "loss": 0.0764, "step": 5236 }, { "epoch": 3.2353304508956144, "grad_norm": 8.5943603515625, "learning_rate": 9.353551575046324e-07, "loss": 0.2124, "step": 5238 }, { "epoch": 3.2365657813465103, "grad_norm": 4.278831958770752, "learning_rate": 9.353304508956146e-07, "loss": 0.1203, "step": 5240 }, { "epoch": 3.2378011117974057, "grad_norm": 7.582533836364746, "learning_rate": 9.353057442865966e-07, "loss": 0.184, "step": 5242 }, { "epoch": 3.2390364422483016, "grad_norm": 5.505610942840576, "learning_rate": 9.352810376775787e-07, "loss": 0.0579, "step": 5244 }, { "epoch": 3.240271772699197, "grad_norm": 5.940659999847412, "learning_rate": 9.352563310685608e-07, "loss": 0.0713, "step": 5246 }, { "epoch": 3.241507103150093, "grad_norm": 5.812680244445801, "learning_rate": 9.352316244595428e-07, "loss": 0.1248, "step": 5248 }, { "epoch": 3.2427424336009882, "grad_norm": 5.486135959625244, "learning_rate": 9.35206917850525e-07, "loss": 0.204, "step": 5250 }, { "epoch": 3.243977764051884, "grad_norm": 4.127468109130859, "learning_rate": 9.351822112415071e-07, "loss": 0.0653, "step": 5252 }, { "epoch": 3.2452130945027795, "grad_norm": 8.25555419921875, "learning_rate": 9.351575046324891e-07, "loss": 0.1322, "step": 5254 }, { "epoch": 3.246448424953675, "grad_norm": 5.875545501708984, "learning_rate": 9.351327980234712e-07, "loss": 0.1876, "step": 5256 }, { "epoch": 3.2476837554045708, "grad_norm": 5.601260662078857, "learning_rate": 9.351080914144533e-07, "loss": 0.1289, "step": 5258 }, { "epoch": 3.248919085855466, "grad_norm": 4.563013553619385, "learning_rate": 9.350833848054354e-07, "loss": 0.0694, "step": 5260 }, { "epoch": 3.250154416306362, "grad_norm": 3.8842484951019287, "learning_rate": 9.350586781964175e-07, "loss": 0.1932, "step": 5262 }, { "epoch": 3.2513897467572574, "grad_norm": 4.8060526847839355, "learning_rate": 9.350339715873996e-07, "loss": 0.2302, "step": 5264 }, { "epoch": 3.2526250772081533, "grad_norm": 7.339760780334473, "learning_rate": 9.350092649783817e-07, "loss": 0.3209, "step": 5266 }, { "epoch": 3.2538604076590487, "grad_norm": 7.508329391479492, "learning_rate": 9.349845583693638e-07, "loss": 0.191, "step": 5268 }, { "epoch": 3.2550957381099446, "grad_norm": 10.486861228942871, "learning_rate": 9.349598517603459e-07, "loss": 0.1794, "step": 5270 }, { "epoch": 3.25633106856084, "grad_norm": 0.4189794659614563, "learning_rate": 9.34935145151328e-07, "loss": 0.0239, "step": 5272 }, { "epoch": 3.2575663990117354, "grad_norm": 7.9062371253967285, "learning_rate": 9.3491043854231e-07, "loss": 0.2413, "step": 5274 }, { "epoch": 3.2588017294626312, "grad_norm": 0.9082076549530029, "learning_rate": 9.348857319332921e-07, "loss": 0.0615, "step": 5276 }, { "epoch": 3.260037059913527, "grad_norm": 4.7169060707092285, "learning_rate": 9.348610253242743e-07, "loss": 0.1329, "step": 5278 }, { "epoch": 3.2612723903644225, "grad_norm": 9.33991813659668, "learning_rate": 9.348363187152563e-07, "loss": 0.2584, "step": 5280 }, { "epoch": 3.262507720815318, "grad_norm": 7.667590141296387, "learning_rate": 9.348116121062384e-07, "loss": 0.1488, "step": 5282 }, { "epoch": 3.263743051266214, "grad_norm": 2.8844375610351562, "learning_rate": 9.347869054972205e-07, "loss": 0.0423, "step": 5284 }, { "epoch": 3.264978381717109, "grad_norm": 4.171808242797852, "learning_rate": 9.347621988882025e-07, "loss": 0.4396, "step": 5286 }, { "epoch": 3.266213712168005, "grad_norm": 10.462688446044922, "learning_rate": 9.347374922791847e-07, "loss": 0.2408, "step": 5288 }, { "epoch": 3.2674490426189005, "grad_norm": 6.146017551422119, "learning_rate": 9.347127856701668e-07, "loss": 0.1388, "step": 5290 }, { "epoch": 3.2686843730697963, "grad_norm": 7.081465244293213, "learning_rate": 9.346880790611488e-07, "loss": 0.1169, "step": 5292 }, { "epoch": 3.2699197035206917, "grad_norm": 14.853014945983887, "learning_rate": 9.346633724521309e-07, "loss": 0.4448, "step": 5294 }, { "epoch": 3.2711550339715876, "grad_norm": 8.149724006652832, "learning_rate": 9.34638665843113e-07, "loss": 0.1458, "step": 5296 }, { "epoch": 3.272390364422483, "grad_norm": 4.035031318664551, "learning_rate": 9.346139592340951e-07, "loss": 0.1222, "step": 5298 }, { "epoch": 3.2736256948733784, "grad_norm": 4.422637939453125, "learning_rate": 9.345892526250772e-07, "loss": 0.2078, "step": 5300 }, { "epoch": 3.2748610253242743, "grad_norm": 6.55415678024292, "learning_rate": 9.345645460160593e-07, "loss": 0.1653, "step": 5302 }, { "epoch": 3.2760963557751697, "grad_norm": 6.2477545738220215, "learning_rate": 9.345398394070413e-07, "loss": 0.0956, "step": 5304 }, { "epoch": 3.2773316862260655, "grad_norm": 4.273682117462158, "learning_rate": 9.345151327980234e-07, "loss": 0.1721, "step": 5306 }, { "epoch": 3.278567016676961, "grad_norm": 7.35977840423584, "learning_rate": 9.344904261890056e-07, "loss": 0.172, "step": 5308 }, { "epoch": 3.279802347127857, "grad_norm": 7.210842609405518, "learning_rate": 9.344657195799877e-07, "loss": 0.1612, "step": 5310 }, { "epoch": 3.281037677578752, "grad_norm": 6.475824356079102, "learning_rate": 9.344410129709697e-07, "loss": 0.1379, "step": 5312 }, { "epoch": 3.282273008029648, "grad_norm": 2.485318183898926, "learning_rate": 9.344163063619518e-07, "loss": 0.043, "step": 5314 }, { "epoch": 3.2835083384805435, "grad_norm": 10.166560173034668, "learning_rate": 9.34391599752934e-07, "loss": 0.2373, "step": 5316 }, { "epoch": 3.2847436689314393, "grad_norm": 4.858229637145996, "learning_rate": 9.34366893143916e-07, "loss": 0.0436, "step": 5318 }, { "epoch": 3.2859789993823347, "grad_norm": 4.127087116241455, "learning_rate": 9.343421865348981e-07, "loss": 0.0678, "step": 5320 }, { "epoch": 3.2872143298332306, "grad_norm": 7.505861282348633, "learning_rate": 9.343174799258802e-07, "loss": 0.1923, "step": 5322 }, { "epoch": 3.288449660284126, "grad_norm": 4.293774127960205, "learning_rate": 9.342927733168621e-07, "loss": 0.0734, "step": 5324 }, { "epoch": 3.2896849907350214, "grad_norm": 12.901810646057129, "learning_rate": 9.342680667078444e-07, "loss": 0.3578, "step": 5326 }, { "epoch": 3.2909203211859173, "grad_norm": 4.527834892272949, "learning_rate": 9.342433600988264e-07, "loss": 0.0618, "step": 5328 }, { "epoch": 3.2921556516368127, "grad_norm": 4.435755252838135, "learning_rate": 9.342186534898084e-07, "loss": 0.099, "step": 5330 }, { "epoch": 3.2933909820877085, "grad_norm": 13.011190414428711, "learning_rate": 9.341939468807905e-07, "loss": 0.1409, "step": 5332 }, { "epoch": 3.294626312538604, "grad_norm": 3.73850154876709, "learning_rate": 9.341692402717726e-07, "loss": 0.1466, "step": 5334 }, { "epoch": 3.2958616429895, "grad_norm": 6.4024200439453125, "learning_rate": 9.341445336627547e-07, "loss": 0.0692, "step": 5336 }, { "epoch": 3.297096973440395, "grad_norm": 5.320733547210693, "learning_rate": 9.341198270537368e-07, "loss": 0.0736, "step": 5338 }, { "epoch": 3.298332303891291, "grad_norm": 8.405721664428711, "learning_rate": 9.340951204447189e-07, "loss": 0.13, "step": 5340 }, { "epoch": 3.2995676343421865, "grad_norm": 6.031059741973877, "learning_rate": 9.340704138357009e-07, "loss": 0.0981, "step": 5342 }, { "epoch": 3.3008029647930823, "grad_norm": 2.951979637145996, "learning_rate": 9.34045707226683e-07, "loss": 0.0466, "step": 5344 }, { "epoch": 3.3020382952439777, "grad_norm": 10.56931209564209, "learning_rate": 9.340210006176652e-07, "loss": 0.1191, "step": 5346 }, { "epoch": 3.3032736256948736, "grad_norm": 2.701730728149414, "learning_rate": 9.339962940086473e-07, "loss": 0.1112, "step": 5348 }, { "epoch": 3.304508956145769, "grad_norm": 3.2345030307769775, "learning_rate": 9.339715873996293e-07, "loss": 0.2029, "step": 5350 }, { "epoch": 3.3057442865966644, "grad_norm": 1.6499876976013184, "learning_rate": 9.339468807906114e-07, "loss": 0.0298, "step": 5352 }, { "epoch": 3.3069796170475603, "grad_norm": 4.655429840087891, "learning_rate": 9.339221741815936e-07, "loss": 0.1372, "step": 5354 }, { "epoch": 3.3082149474984557, "grad_norm": 5.882189750671387, "learning_rate": 9.338974675725756e-07, "loss": 0.1766, "step": 5356 }, { "epoch": 3.3094502779493515, "grad_norm": 3.1114797592163086, "learning_rate": 9.338727609635577e-07, "loss": 0.0526, "step": 5358 }, { "epoch": 3.310685608400247, "grad_norm": 6.890286922454834, "learning_rate": 9.338480543545398e-07, "loss": 0.0823, "step": 5360 }, { "epoch": 3.311920938851143, "grad_norm": 4.814736843109131, "learning_rate": 9.338233477455218e-07, "loss": 0.0781, "step": 5362 }, { "epoch": 3.313156269302038, "grad_norm": 4.138197898864746, "learning_rate": 9.33798641136504e-07, "loss": 0.0694, "step": 5364 }, { "epoch": 3.314391599752934, "grad_norm": 11.327193260192871, "learning_rate": 9.337739345274861e-07, "loss": 0.3386, "step": 5366 }, { "epoch": 3.3156269302038295, "grad_norm": 10.136038780212402, "learning_rate": 9.337492279184681e-07, "loss": 0.1735, "step": 5368 }, { "epoch": 3.316862260654725, "grad_norm": 4.485020637512207, "learning_rate": 9.337245213094502e-07, "loss": 0.0441, "step": 5370 }, { "epoch": 3.3180975911056207, "grad_norm": 23.31106185913086, "learning_rate": 9.336998147004323e-07, "loss": 0.1348, "step": 5372 }, { "epoch": 3.3193329215565166, "grad_norm": 3.295750379562378, "learning_rate": 9.336751080914144e-07, "loss": 0.0397, "step": 5374 }, { "epoch": 3.320568252007412, "grad_norm": 14.365428924560547, "learning_rate": 9.336504014823965e-07, "loss": 0.1468, "step": 5376 }, { "epoch": 3.3218035824583074, "grad_norm": 6.173242568969727, "learning_rate": 9.336256948733786e-07, "loss": 0.1392, "step": 5378 }, { "epoch": 3.3230389129092033, "grad_norm": 3.4240047931671143, "learning_rate": 9.336009882643606e-07, "loss": 0.0679, "step": 5380 }, { "epoch": 3.3242742433600987, "grad_norm": 5.921154975891113, "learning_rate": 9.335762816553427e-07, "loss": 0.083, "step": 5382 }, { "epoch": 3.3255095738109945, "grad_norm": 6.744754791259766, "learning_rate": 9.335515750463249e-07, "loss": 0.1039, "step": 5384 }, { "epoch": 3.32674490426189, "grad_norm": 6.126888751983643, "learning_rate": 9.33526868437307e-07, "loss": 0.0869, "step": 5386 }, { "epoch": 3.327980234712786, "grad_norm": 5.27272367477417, "learning_rate": 9.33502161828289e-07, "loss": 0.0855, "step": 5388 }, { "epoch": 3.3292155651636812, "grad_norm": 8.833009719848633, "learning_rate": 9.334774552192711e-07, "loss": 0.1577, "step": 5390 }, { "epoch": 3.330450895614577, "grad_norm": 5.855361461639404, "learning_rate": 9.334527486102532e-07, "loss": 0.1603, "step": 5392 }, { "epoch": 3.3316862260654725, "grad_norm": 10.856033325195312, "learning_rate": 9.334280420012353e-07, "loss": 0.0945, "step": 5394 }, { "epoch": 3.332921556516368, "grad_norm": 3.865380048751831, "learning_rate": 9.334033353922174e-07, "loss": 0.0725, "step": 5396 }, { "epoch": 3.3341568869672638, "grad_norm": 7.238272666931152, "learning_rate": 9.333786287831995e-07, "loss": 0.1574, "step": 5398 }, { "epoch": 3.335392217418159, "grad_norm": 5.679964065551758, "learning_rate": 9.333539221741815e-07, "loss": 0.0972, "step": 5400 }, { "epoch": 3.336627547869055, "grad_norm": 5.327393054962158, "learning_rate": 9.333292155651637e-07, "loss": 0.1075, "step": 5402 }, { "epoch": 3.3378628783199504, "grad_norm": 7.292872905731201, "learning_rate": 9.333045089561458e-07, "loss": 0.3267, "step": 5404 }, { "epoch": 3.3390982087708463, "grad_norm": 5.037456035614014, "learning_rate": 9.332798023471278e-07, "loss": 0.089, "step": 5406 }, { "epoch": 3.3403335392217417, "grad_norm": 4.565584659576416, "learning_rate": 9.332550957381099e-07, "loss": 0.1019, "step": 5408 }, { "epoch": 3.3415688696726376, "grad_norm": 6.578120708465576, "learning_rate": 9.33230389129092e-07, "loss": 0.143, "step": 5410 }, { "epoch": 3.342804200123533, "grad_norm": 3.831688165664673, "learning_rate": 9.332056825200741e-07, "loss": 0.0661, "step": 5412 }, { "epoch": 3.344039530574429, "grad_norm": 7.88670015335083, "learning_rate": 9.331809759110562e-07, "loss": 0.4997, "step": 5414 }, { "epoch": 3.3452748610253242, "grad_norm": 4.24778938293457, "learning_rate": 9.331562693020383e-07, "loss": 0.0792, "step": 5416 }, { "epoch": 3.34651019147622, "grad_norm": 5.839548587799072, "learning_rate": 9.331315626930203e-07, "loss": 0.1309, "step": 5418 }, { "epoch": 3.3477455219271155, "grad_norm": 5.74635124206543, "learning_rate": 9.331068560840024e-07, "loss": 0.0884, "step": 5420 }, { "epoch": 3.348980852378011, "grad_norm": 4.802165508270264, "learning_rate": 9.330821494749846e-07, "loss": 0.0987, "step": 5422 }, { "epoch": 3.3502161828289068, "grad_norm": 5.123709201812744, "learning_rate": 9.330574428659666e-07, "loss": 0.0582, "step": 5424 }, { "epoch": 3.351451513279802, "grad_norm": 10.165739059448242, "learning_rate": 9.330327362569487e-07, "loss": 0.2071, "step": 5426 }, { "epoch": 3.352686843730698, "grad_norm": 8.773980140686035, "learning_rate": 9.330080296479308e-07, "loss": 0.1389, "step": 5428 }, { "epoch": 3.3539221741815934, "grad_norm": 5.529778003692627, "learning_rate": 9.329833230389129e-07, "loss": 0.1347, "step": 5430 }, { "epoch": 3.3551575046324893, "grad_norm": 8.58530330657959, "learning_rate": 9.32958616429895e-07, "loss": 0.6532, "step": 5432 }, { "epoch": 3.3563928350833847, "grad_norm": 7.877458572387695, "learning_rate": 9.329339098208771e-07, "loss": 0.1853, "step": 5434 }, { "epoch": 3.3576281655342806, "grad_norm": 5.164477348327637, "learning_rate": 9.329092032118592e-07, "loss": 0.1221, "step": 5436 }, { "epoch": 3.358863495985176, "grad_norm": 4.223608493804932, "learning_rate": 9.328844966028412e-07, "loss": 0.0654, "step": 5438 }, { "epoch": 3.360098826436072, "grad_norm": 4.03449010848999, "learning_rate": 9.328597899938233e-07, "loss": 0.1866, "step": 5440 }, { "epoch": 3.3613341568869672, "grad_norm": 3.1238813400268555, "learning_rate": 9.328350833848055e-07, "loss": 0.1116, "step": 5442 }, { "epoch": 3.362569487337863, "grad_norm": 7.836691856384277, "learning_rate": 9.328103767757875e-07, "loss": 0.0543, "step": 5444 }, { "epoch": 3.3638048177887585, "grad_norm": 2.9494245052337646, "learning_rate": 9.327856701667696e-07, "loss": 0.0275, "step": 5446 }, { "epoch": 3.365040148239654, "grad_norm": 4.899575233459473, "learning_rate": 9.327609635577517e-07, "loss": 0.0997, "step": 5448 }, { "epoch": 3.3662754786905498, "grad_norm": 2.566070795059204, "learning_rate": 9.327362569487336e-07, "loss": 0.1504, "step": 5450 }, { "epoch": 3.367510809141445, "grad_norm": 10.011204719543457, "learning_rate": 9.327115503397159e-07, "loss": 0.1282, "step": 5452 }, { "epoch": 3.368746139592341, "grad_norm": 2.6964914798736572, "learning_rate": 9.32686843730698e-07, "loss": 0.0304, "step": 5454 }, { "epoch": 3.3699814700432364, "grad_norm": 8.73211669921875, "learning_rate": 9.326621371216799e-07, "loss": 0.1167, "step": 5456 }, { "epoch": 3.3712168004941323, "grad_norm": 7.511831283569336, "learning_rate": 9.32637430512662e-07, "loss": 0.0892, "step": 5458 }, { "epoch": 3.3724521309450277, "grad_norm": 3.831012010574341, "learning_rate": 9.326127239036443e-07, "loss": 0.0898, "step": 5460 }, { "epoch": 3.3736874613959236, "grad_norm": 1.7199493646621704, "learning_rate": 9.325880172946262e-07, "loss": 0.0604, "step": 5462 }, { "epoch": 3.374922791846819, "grad_norm": 8.395380020141602, "learning_rate": 9.325633106856083e-07, "loss": 0.1294, "step": 5464 }, { "epoch": 3.3761581222977144, "grad_norm": 11.41085147857666, "learning_rate": 9.325386040765904e-07, "loss": 0.3, "step": 5466 }, { "epoch": 3.3773934527486102, "grad_norm": 3.2687764167785645, "learning_rate": 9.325138974675725e-07, "loss": 0.0916, "step": 5468 }, { "epoch": 3.378628783199506, "grad_norm": 10.38535213470459, "learning_rate": 9.324891908585546e-07, "loss": 0.2519, "step": 5470 }, { "epoch": 3.3798641136504015, "grad_norm": 7.004671573638916, "learning_rate": 9.324644842495367e-07, "loss": 0.1503, "step": 5472 }, { "epoch": 3.381099444101297, "grad_norm": 7.7294745445251465, "learning_rate": 9.324397776405188e-07, "loss": 0.1152, "step": 5474 }, { "epoch": 3.382334774552193, "grad_norm": 7.487433910369873, "learning_rate": 9.324150710315008e-07, "loss": 0.1418, "step": 5476 }, { "epoch": 3.383570105003088, "grad_norm": 7.24869441986084, "learning_rate": 9.323903644224829e-07, "loss": 0.1099, "step": 5478 }, { "epoch": 3.384805435453984, "grad_norm": 5.5868401527404785, "learning_rate": 9.323656578134651e-07, "loss": 0.1087, "step": 5480 }, { "epoch": 3.3860407659048795, "grad_norm": 3.733945846557617, "learning_rate": 9.323409512044471e-07, "loss": 0.1253, "step": 5482 }, { "epoch": 3.3872760963557753, "grad_norm": 4.09145450592041, "learning_rate": 9.323162445954292e-07, "loss": 0.0744, "step": 5484 }, { "epoch": 3.3885114268066707, "grad_norm": 9.08012866973877, "learning_rate": 9.322915379864113e-07, "loss": 0.1484, "step": 5486 }, { "epoch": 3.3897467572575666, "grad_norm": 6.859593868255615, "learning_rate": 9.322668313773933e-07, "loss": 0.0793, "step": 5488 }, { "epoch": 3.390982087708462, "grad_norm": 12.818761825561523, "learning_rate": 9.322421247683755e-07, "loss": 0.3096, "step": 5490 }, { "epoch": 3.3922174181593574, "grad_norm": 10.49356460571289, "learning_rate": 9.322174181593576e-07, "loss": 0.2405, "step": 5492 }, { "epoch": 3.3934527486102533, "grad_norm": 11.93057918548584, "learning_rate": 9.321927115503396e-07, "loss": 0.1376, "step": 5494 }, { "epoch": 3.3946880790611487, "grad_norm": 10.04720401763916, "learning_rate": 9.321680049413217e-07, "loss": 0.1858, "step": 5496 }, { "epoch": 3.3959234095120445, "grad_norm": 8.79696273803711, "learning_rate": 9.321432983323039e-07, "loss": 0.1769, "step": 5498 }, { "epoch": 3.39715873996294, "grad_norm": 4.457633972167969, "learning_rate": 9.321185917232859e-07, "loss": 0.0847, "step": 5500 }, { "epoch": 3.39715873996294, "eval_cer": 0.05241293360105241, "eval_loss": 0.2411765307188034, "eval_runtime": 63.9603, "eval_samples_per_second": 12.836, "eval_steps_per_second": 1.61, "step": 5500 }, { "epoch": 3.398394070413836, "grad_norm": 2.0400807857513428, "learning_rate": 9.32093885114268e-07, "loss": 0.0524, "step": 5502 }, { "epoch": 3.399629400864731, "grad_norm": 4.713177680969238, "learning_rate": 9.320691785052501e-07, "loss": 0.0787, "step": 5504 }, { "epoch": 3.400864731315627, "grad_norm": 5.81998872756958, "learning_rate": 9.320444718962322e-07, "loss": 0.1243, "step": 5506 }, { "epoch": 3.4021000617665225, "grad_norm": 1.3650944232940674, "learning_rate": 9.320197652872143e-07, "loss": 0.0769, "step": 5508 }, { "epoch": 3.4033353922174183, "grad_norm": 5.7202653884887695, "learning_rate": 9.319950586781964e-07, "loss": 0.2616, "step": 5510 }, { "epoch": 3.4045707226683137, "grad_norm": 10.214456558227539, "learning_rate": 9.319703520691785e-07, "loss": 0.1207, "step": 5512 }, { "epoch": 3.4058060531192096, "grad_norm": 2.759676218032837, "learning_rate": 9.319456454601605e-07, "loss": 0.0891, "step": 5514 }, { "epoch": 3.407041383570105, "grad_norm": 2.8376424312591553, "learning_rate": 9.319209388511426e-07, "loss": 0.2278, "step": 5516 }, { "epoch": 3.4082767140210004, "grad_norm": 7.517940521240234, "learning_rate": 9.318962322421248e-07, "loss": 0.2306, "step": 5518 }, { "epoch": 3.4095120444718963, "grad_norm": 14.118997573852539, "learning_rate": 9.318715256331068e-07, "loss": 0.2199, "step": 5520 }, { "epoch": 3.4107473749227917, "grad_norm": 1.430908203125, "learning_rate": 9.318468190240889e-07, "loss": 0.0435, "step": 5522 }, { "epoch": 3.4119827053736875, "grad_norm": 3.698929786682129, "learning_rate": 9.31822112415071e-07, "loss": 0.1142, "step": 5524 }, { "epoch": 3.413218035824583, "grad_norm": 5.753276824951172, "learning_rate": 9.31797405806053e-07, "loss": 0.118, "step": 5526 }, { "epoch": 3.414453366275479, "grad_norm": 3.207190752029419, "learning_rate": 9.317726991970352e-07, "loss": 0.0356, "step": 5528 }, { "epoch": 3.415688696726374, "grad_norm": 5.204171657562256, "learning_rate": 9.317479925880173e-07, "loss": 0.1315, "step": 5530 }, { "epoch": 3.41692402717727, "grad_norm": 2.053248882293701, "learning_rate": 9.317232859789993e-07, "loss": 0.0423, "step": 5532 }, { "epoch": 3.4181593576281655, "grad_norm": 6.9229960441589355, "learning_rate": 9.316985793699814e-07, "loss": 0.1078, "step": 5534 }, { "epoch": 3.4193946880790613, "grad_norm": 9.389573097229004, "learning_rate": 9.316738727609636e-07, "loss": 0.1155, "step": 5536 }, { "epoch": 3.4206300185299567, "grad_norm": 6.118762969970703, "learning_rate": 9.316491661519456e-07, "loss": 0.1603, "step": 5538 }, { "epoch": 3.4218653489808526, "grad_norm": 1.5016924142837524, "learning_rate": 9.316244595429277e-07, "loss": 0.0582, "step": 5540 }, { "epoch": 3.423100679431748, "grad_norm": 5.31566858291626, "learning_rate": 9.315997529339098e-07, "loss": 0.1394, "step": 5542 }, { "epoch": 3.4243360098826434, "grad_norm": 10.062122344970703, "learning_rate": 9.315750463248918e-07, "loss": 0.2073, "step": 5544 }, { "epoch": 3.4255713403335393, "grad_norm": 5.511789321899414, "learning_rate": 9.31550339715874e-07, "loss": 0.2159, "step": 5546 }, { "epoch": 3.4268066707844347, "grad_norm": 6.877129077911377, "learning_rate": 9.315256331068561e-07, "loss": 0.1239, "step": 5548 }, { "epoch": 3.4280420012353305, "grad_norm": 7.405218601226807, "learning_rate": 9.315009264978382e-07, "loss": 0.2241, "step": 5550 }, { "epoch": 3.429277331686226, "grad_norm": 8.719903945922852, "learning_rate": 9.314762198888202e-07, "loss": 0.2777, "step": 5552 }, { "epoch": 3.430512662137122, "grad_norm": 5.311723232269287, "learning_rate": 9.314515132798023e-07, "loss": 0.2336, "step": 5554 }, { "epoch": 3.431747992588017, "grad_norm": 7.415318012237549, "learning_rate": 9.314268066707845e-07, "loss": 0.1787, "step": 5556 }, { "epoch": 3.432983323038913, "grad_norm": 2.2037854194641113, "learning_rate": 9.314021000617665e-07, "loss": 0.094, "step": 5558 }, { "epoch": 3.4342186534898085, "grad_norm": 5.612064838409424, "learning_rate": 9.313773934527486e-07, "loss": 0.0668, "step": 5560 }, { "epoch": 3.435453983940704, "grad_norm": 9.441787719726562, "learning_rate": 9.313526868437307e-07, "loss": 0.1449, "step": 5562 }, { "epoch": 3.4366893143915997, "grad_norm": 9.504274368286133, "learning_rate": 9.313279802347127e-07, "loss": 0.1201, "step": 5564 }, { "epoch": 3.4379246448424956, "grad_norm": 4.68726921081543, "learning_rate": 9.313032736256949e-07, "loss": 0.0817, "step": 5566 }, { "epoch": 3.439159975293391, "grad_norm": 6.883860111236572, "learning_rate": 9.31278567016677e-07, "loss": 0.1016, "step": 5568 }, { "epoch": 3.4403953057442864, "grad_norm": 6.896549224853516, "learning_rate": 9.31253860407659e-07, "loss": 0.0979, "step": 5570 }, { "epoch": 3.4416306361951823, "grad_norm": 9.694355964660645, "learning_rate": 9.312291537986411e-07, "loss": 0.106, "step": 5572 }, { "epoch": 3.4428659666460777, "grad_norm": 4.014219284057617, "learning_rate": 9.312044471896232e-07, "loss": 0.0976, "step": 5574 }, { "epoch": 3.4441012970969735, "grad_norm": 2.6040380001068115, "learning_rate": 9.311797405806053e-07, "loss": 0.0606, "step": 5576 }, { "epoch": 3.445336627547869, "grad_norm": 2.282350778579712, "learning_rate": 9.311550339715874e-07, "loss": 0.0961, "step": 5578 }, { "epoch": 3.446571957998765, "grad_norm": 3.70759916305542, "learning_rate": 9.311303273625695e-07, "loss": 0.0868, "step": 5580 }, { "epoch": 3.4478072884496602, "grad_norm": 5.8223490715026855, "learning_rate": 9.311056207535515e-07, "loss": 0.1438, "step": 5582 }, { "epoch": 3.449042618900556, "grad_norm": 10.746100425720215, "learning_rate": 9.310809141445335e-07, "loss": 0.2751, "step": 5584 }, { "epoch": 3.4502779493514515, "grad_norm": 4.483519077301025, "learning_rate": 9.310562075355158e-07, "loss": 0.0845, "step": 5586 }, { "epoch": 3.451513279802347, "grad_norm": 5.0282697677612305, "learning_rate": 9.310315009264979e-07, "loss": 0.1087, "step": 5588 }, { "epoch": 3.4527486102532428, "grad_norm": 9.536825180053711, "learning_rate": 9.310067943174798e-07, "loss": 0.2209, "step": 5590 }, { "epoch": 3.453983940704138, "grad_norm": 3.2194137573242188, "learning_rate": 9.309820877084619e-07, "loss": 0.0823, "step": 5592 }, { "epoch": 3.455219271155034, "grad_norm": 4.14432430267334, "learning_rate": 9.309573810994442e-07, "loss": 0.1114, "step": 5594 }, { "epoch": 3.4564546016059294, "grad_norm": 6.724538803100586, "learning_rate": 9.309326744904261e-07, "loss": 0.1014, "step": 5596 }, { "epoch": 3.4576899320568253, "grad_norm": 4.214767932891846, "learning_rate": 9.309079678814082e-07, "loss": 0.179, "step": 5598 }, { "epoch": 3.4589252625077207, "grad_norm": 5.808231353759766, "learning_rate": 9.308832612723903e-07, "loss": 0.0761, "step": 5600 }, { "epoch": 3.4601605929586166, "grad_norm": 5.38842248916626, "learning_rate": 9.308585546633723e-07, "loss": 0.1119, "step": 5602 }, { "epoch": 3.461395923409512, "grad_norm": 3.640423536300659, "learning_rate": 9.308338480543545e-07, "loss": 0.1596, "step": 5604 }, { "epoch": 3.462631253860408, "grad_norm": 3.9884212017059326, "learning_rate": 9.308091414453366e-07, "loss": 0.0527, "step": 5606 }, { "epoch": 3.4638665843113032, "grad_norm": 7.400572299957275, "learning_rate": 9.307844348363186e-07, "loss": 0.1664, "step": 5608 }, { "epoch": 3.465101914762199, "grad_norm": 9.243182182312012, "learning_rate": 9.307597282273007e-07, "loss": 0.2006, "step": 5610 }, { "epoch": 3.4663372452130945, "grad_norm": 7.151304721832275, "learning_rate": 9.307350216182828e-07, "loss": 0.2657, "step": 5612 }, { "epoch": 3.46757257566399, "grad_norm": 9.160399436950684, "learning_rate": 9.307103150092649e-07, "loss": 0.1912, "step": 5614 }, { "epoch": 3.4688079061148858, "grad_norm": 2.867583751678467, "learning_rate": 9.30685608400247e-07, "loss": 0.1251, "step": 5616 }, { "epoch": 3.470043236565781, "grad_norm": 5.970004558563232, "learning_rate": 9.306609017912291e-07, "loss": 0.1286, "step": 5618 }, { "epoch": 3.471278567016677, "grad_norm": 10.741471290588379, "learning_rate": 9.306361951822111e-07, "loss": 0.2432, "step": 5620 }, { "epoch": 3.4725138974675724, "grad_norm": 2.336322069168091, "learning_rate": 9.306114885731932e-07, "loss": 0.0317, "step": 5622 }, { "epoch": 3.4737492279184683, "grad_norm": 3.2767422199249268, "learning_rate": 9.305867819641754e-07, "loss": 0.1339, "step": 5624 }, { "epoch": 3.4749845583693637, "grad_norm": 7.106132507324219, "learning_rate": 9.305620753551575e-07, "loss": 0.1659, "step": 5626 }, { "epoch": 3.4762198888202596, "grad_norm": 7.811439037322998, "learning_rate": 9.305373687461395e-07, "loss": 0.0959, "step": 5628 }, { "epoch": 3.477455219271155, "grad_norm": 6.107494831085205, "learning_rate": 9.305126621371216e-07, "loss": 0.042, "step": 5630 }, { "epoch": 3.478690549722051, "grad_norm": 3.9336421489715576, "learning_rate": 9.304879555281038e-07, "loss": 0.1161, "step": 5632 }, { "epoch": 3.4799258801729462, "grad_norm": 5.405012607574463, "learning_rate": 9.304632489190858e-07, "loss": 0.1115, "step": 5634 }, { "epoch": 3.481161210623842, "grad_norm": 7.15204381942749, "learning_rate": 9.304385423100679e-07, "loss": 0.0757, "step": 5636 }, { "epoch": 3.4823965410747375, "grad_norm": 5.862243175506592, "learning_rate": 9.3041383570105e-07, "loss": 0.0856, "step": 5638 }, { "epoch": 3.483631871525633, "grad_norm": 5.113637447357178, "learning_rate": 9.30389129092032e-07, "loss": 0.0776, "step": 5640 }, { "epoch": 3.4848672019765288, "grad_norm": 6.447036266326904, "learning_rate": 9.303644224830142e-07, "loss": 0.2577, "step": 5642 }, { "epoch": 3.486102532427424, "grad_norm": 1.9295741319656372, "learning_rate": 9.303397158739963e-07, "loss": 0.0633, "step": 5644 }, { "epoch": 3.48733786287832, "grad_norm": 4.771258354187012, "learning_rate": 9.303150092649783e-07, "loss": 0.0592, "step": 5646 }, { "epoch": 3.4885731933292154, "grad_norm": 13.975327491760254, "learning_rate": 9.302903026559604e-07, "loss": 0.1686, "step": 5648 }, { "epoch": 3.4898085237801113, "grad_norm": 2.0206658840179443, "learning_rate": 9.302655960469425e-07, "loss": 0.1319, "step": 5650 }, { "epoch": 3.4910438542310067, "grad_norm": 7.510827541351318, "learning_rate": 9.302408894379246e-07, "loss": 0.139, "step": 5652 }, { "epoch": 3.4922791846819026, "grad_norm": 3.540858030319214, "learning_rate": 9.302161828289067e-07, "loss": 0.0673, "step": 5654 }, { "epoch": 3.493514515132798, "grad_norm": 2.094536304473877, "learning_rate": 9.301914762198888e-07, "loss": 0.0362, "step": 5656 }, { "epoch": 3.4947498455836934, "grad_norm": 5.380131721496582, "learning_rate": 9.301667696108708e-07, "loss": 0.148, "step": 5658 }, { "epoch": 3.4959851760345892, "grad_norm": 7.135090351104736, "learning_rate": 9.301420630018529e-07, "loss": 0.0838, "step": 5660 }, { "epoch": 3.497220506485485, "grad_norm": 7.2599711418151855, "learning_rate": 9.301173563928351e-07, "loss": 0.091, "step": 5662 }, { "epoch": 3.4984558369363805, "grad_norm": 5.034377574920654, "learning_rate": 9.300926497838171e-07, "loss": 0.1127, "step": 5664 }, { "epoch": 3.499691167387276, "grad_norm": 4.322237014770508, "learning_rate": 9.300679431747992e-07, "loss": 0.0686, "step": 5666 }, { "epoch": 3.500926497838172, "grad_norm": 6.20134973526001, "learning_rate": 9.300432365657813e-07, "loss": 0.1113, "step": 5668 }, { "epoch": 3.502161828289067, "grad_norm": 7.4715986251831055, "learning_rate": 9.300185299567635e-07, "loss": 0.193, "step": 5670 }, { "epoch": 3.503397158739963, "grad_norm": 8.144533157348633, "learning_rate": 9.299938233477455e-07, "loss": 0.0927, "step": 5672 }, { "epoch": 3.5046324891908585, "grad_norm": 11.203166961669922, "learning_rate": 9.299691167387276e-07, "loss": 0.3153, "step": 5674 }, { "epoch": 3.5058678196417543, "grad_norm": 4.479475975036621, "learning_rate": 9.299444101297097e-07, "loss": 0.1443, "step": 5676 }, { "epoch": 3.5071031500926497, "grad_norm": 2.631784200668335, "learning_rate": 9.299197035206917e-07, "loss": 0.0505, "step": 5678 }, { "epoch": 3.5083384805435456, "grad_norm": 7.066079139709473, "learning_rate": 9.298949969116739e-07, "loss": 0.1845, "step": 5680 }, { "epoch": 3.509573810994441, "grad_norm": 5.701436996459961, "learning_rate": 9.29870290302656e-07, "loss": 0.0811, "step": 5682 }, { "epoch": 3.5108091414453364, "grad_norm": 6.136870384216309, "learning_rate": 9.29845583693638e-07, "loss": 0.1053, "step": 5684 }, { "epoch": 3.5120444718962323, "grad_norm": 11.87230396270752, "learning_rate": 9.298208770846201e-07, "loss": 0.3336, "step": 5686 }, { "epoch": 3.513279802347128, "grad_norm": 3.013829231262207, "learning_rate": 9.297961704756022e-07, "loss": 0.0271, "step": 5688 }, { "epoch": 3.5145151327980235, "grad_norm": 10.304848670959473, "learning_rate": 9.297714638665843e-07, "loss": 0.137, "step": 5690 }, { "epoch": 3.515750463248919, "grad_norm": 5.434764862060547, "learning_rate": 9.297467572575664e-07, "loss": 0.1275, "step": 5692 }, { "epoch": 3.516985793699815, "grad_norm": 4.942391395568848, "learning_rate": 9.297220506485485e-07, "loss": 0.0816, "step": 5694 }, { "epoch": 3.51822112415071, "grad_norm": 6.3259124755859375, "learning_rate": 9.296973440395305e-07, "loss": 0.108, "step": 5696 }, { "epoch": 3.519456454601606, "grad_norm": 3.0425174236297607, "learning_rate": 9.296726374305126e-07, "loss": 0.1054, "step": 5698 }, { "epoch": 3.5206917850525015, "grad_norm": 6.083683013916016, "learning_rate": 9.296479308214948e-07, "loss": 0.1422, "step": 5700 }, { "epoch": 3.5219271155033973, "grad_norm": 7.862792491912842, "learning_rate": 9.296232242124768e-07, "loss": 0.1791, "step": 5702 }, { "epoch": 3.5231624459542927, "grad_norm": 3.3270044326782227, "learning_rate": 9.295985176034589e-07, "loss": 0.0391, "step": 5704 }, { "epoch": 3.5243977764051886, "grad_norm": 4.845223426818848, "learning_rate": 9.29573810994441e-07, "loss": 0.0517, "step": 5706 }, { "epoch": 3.525633106856084, "grad_norm": 5.137983322143555, "learning_rate": 9.295491043854231e-07, "loss": 0.1641, "step": 5708 }, { "epoch": 3.5268684373069794, "grad_norm": 4.856241226196289, "learning_rate": 9.295243977764052e-07, "loss": 0.2191, "step": 5710 }, { "epoch": 3.5281037677578753, "grad_norm": 10.4603853225708, "learning_rate": 9.294996911673873e-07, "loss": 0.1282, "step": 5712 }, { "epoch": 3.529339098208771, "grad_norm": 12.80218505859375, "learning_rate": 9.294749845583694e-07, "loss": 0.3674, "step": 5714 }, { "epoch": 3.5305744286596665, "grad_norm": 3.8871846199035645, "learning_rate": 9.294502779493514e-07, "loss": 0.118, "step": 5716 }, { "epoch": 3.531809759110562, "grad_norm": 6.443920135498047, "learning_rate": 9.294255713403335e-07, "loss": 0.1528, "step": 5718 }, { "epoch": 3.533045089561458, "grad_norm": 9.66309642791748, "learning_rate": 9.294008647313157e-07, "loss": 0.0784, "step": 5720 }, { "epoch": 3.534280420012353, "grad_norm": 4.727669715881348, "learning_rate": 9.293761581222976e-07, "loss": 0.0609, "step": 5722 }, { "epoch": 3.535515750463249, "grad_norm": 1.2842477560043335, "learning_rate": 9.293514515132797e-07, "loss": 0.2591, "step": 5724 }, { "epoch": 3.5367510809141445, "grad_norm": 6.794493198394775, "learning_rate": 9.293267449042618e-07, "loss": 0.1471, "step": 5726 }, { "epoch": 3.53798641136504, "grad_norm": 5.454641819000244, "learning_rate": 9.293020382952439e-07, "loss": 0.096, "step": 5728 }, { "epoch": 3.5392217418159357, "grad_norm": 7.717525959014893, "learning_rate": 9.29277331686226e-07, "loss": 0.1881, "step": 5730 }, { "epoch": 3.5404570722668316, "grad_norm": 5.542108535766602, "learning_rate": 9.292526250772081e-07, "loss": 0.1353, "step": 5732 }, { "epoch": 3.541692402717727, "grad_norm": 7.334532260894775, "learning_rate": 9.292279184681901e-07, "loss": 0.2186, "step": 5734 }, { "epoch": 3.5429277331686224, "grad_norm": 6.068449020385742, "learning_rate": 9.292032118591722e-07, "loss": 0.1035, "step": 5736 }, { "epoch": 3.5441630636195183, "grad_norm": 3.2163314819335938, "learning_rate": 9.291785052501544e-07, "loss": 0.033, "step": 5738 }, { "epoch": 3.5453983940704137, "grad_norm": 8.891519546508789, "learning_rate": 9.291537986411364e-07, "loss": 0.2489, "step": 5740 }, { "epoch": 3.5466337245213095, "grad_norm": 2.1294522285461426, "learning_rate": 9.291290920321185e-07, "loss": 0.0973, "step": 5742 }, { "epoch": 3.547869054972205, "grad_norm": 3.849573850631714, "learning_rate": 9.291043854231006e-07, "loss": 0.0404, "step": 5744 }, { "epoch": 3.549104385423101, "grad_norm": 6.235145092010498, "learning_rate": 9.290796788140827e-07, "loss": 0.0755, "step": 5746 }, { "epoch": 3.550339715873996, "grad_norm": 1.9533294439315796, "learning_rate": 9.290549722050648e-07, "loss": 0.097, "step": 5748 }, { "epoch": 3.551575046324892, "grad_norm": 7.897111415863037, "learning_rate": 9.290302655960469e-07, "loss": 0.1124, "step": 5750 }, { "epoch": 3.5528103767757875, "grad_norm": 4.259381294250488, "learning_rate": 9.29005558987029e-07, "loss": 0.1926, "step": 5752 }, { "epoch": 3.554045707226683, "grad_norm": 5.677684307098389, "learning_rate": 9.28980852378011e-07, "loss": 0.2546, "step": 5754 }, { "epoch": 3.5552810376775787, "grad_norm": 5.470479965209961, "learning_rate": 9.289561457689931e-07, "loss": 0.0733, "step": 5756 }, { "epoch": 3.5565163681284746, "grad_norm": 11.663447380065918, "learning_rate": 9.289314391599753e-07, "loss": 0.272, "step": 5758 }, { "epoch": 3.55775169857937, "grad_norm": 4.678787708282471, "learning_rate": 9.289067325509573e-07, "loss": 0.272, "step": 5760 }, { "epoch": 3.5589870290302654, "grad_norm": 7.426303386688232, "learning_rate": 9.288820259419394e-07, "loss": 0.1584, "step": 5762 }, { "epoch": 3.5602223594811613, "grad_norm": 3.90014910697937, "learning_rate": 9.288573193329215e-07, "loss": 0.1211, "step": 5764 }, { "epoch": 3.5614576899320567, "grad_norm": 4.478926181793213, "learning_rate": 9.288326127239035e-07, "loss": 0.0781, "step": 5766 }, { "epoch": 3.5626930203829525, "grad_norm": 5.8954291343688965, "learning_rate": 9.288079061148857e-07, "loss": 0.0438, "step": 5768 }, { "epoch": 3.563928350833848, "grad_norm": 2.8972201347351074, "learning_rate": 9.287831995058678e-07, "loss": 0.0347, "step": 5770 }, { "epoch": 3.565163681284744, "grad_norm": 9.359313011169434, "learning_rate": 9.287584928968498e-07, "loss": 0.1424, "step": 5772 }, { "epoch": 3.5663990117356392, "grad_norm": 5.866787433624268, "learning_rate": 9.287337862878319e-07, "loss": 0.0925, "step": 5774 }, { "epoch": 3.567634342186535, "grad_norm": 4.720247268676758, "learning_rate": 9.287090796788141e-07, "loss": 0.1915, "step": 5776 }, { "epoch": 3.5688696726374305, "grad_norm": 3.828629970550537, "learning_rate": 9.286843730697961e-07, "loss": 0.0799, "step": 5778 }, { "epoch": 3.570105003088326, "grad_norm": 1.4006502628326416, "learning_rate": 9.286596664607782e-07, "loss": 0.0421, "step": 5780 }, { "epoch": 3.5713403335392218, "grad_norm": 8.126641273498535, "learning_rate": 9.286349598517603e-07, "loss": 0.1401, "step": 5782 }, { "epoch": 3.5725756639901176, "grad_norm": 10.364194869995117, "learning_rate": 9.286102532427423e-07, "loss": 0.275, "step": 5784 }, { "epoch": 3.573810994441013, "grad_norm": 7.06198787689209, "learning_rate": 9.285855466337245e-07, "loss": 0.1171, "step": 5786 }, { "epoch": 3.5750463248919084, "grad_norm": 6.938035011291504, "learning_rate": 9.285608400247066e-07, "loss": 0.0917, "step": 5788 }, { "epoch": 3.5762816553428043, "grad_norm": 3.169788360595703, "learning_rate": 9.285361334156887e-07, "loss": 0.0426, "step": 5790 }, { "epoch": 3.5775169857936997, "grad_norm": 7.075786590576172, "learning_rate": 9.285114268066707e-07, "loss": 0.1758, "step": 5792 }, { "epoch": 3.5787523162445956, "grad_norm": 11.781095504760742, "learning_rate": 9.284867201976528e-07, "loss": 0.0834, "step": 5794 }, { "epoch": 3.579987646695491, "grad_norm": 14.461583137512207, "learning_rate": 9.28462013588635e-07, "loss": 0.3805, "step": 5796 }, { "epoch": 3.581222977146387, "grad_norm": 5.141334533691406, "learning_rate": 9.28437306979617e-07, "loss": 0.224, "step": 5798 }, { "epoch": 3.5824583075972822, "grad_norm": 7.975008964538574, "learning_rate": 9.284126003705991e-07, "loss": 0.0513, "step": 5800 }, { "epoch": 3.583693638048178, "grad_norm": 3.4325239658355713, "learning_rate": 9.283878937615812e-07, "loss": 0.1165, "step": 5802 }, { "epoch": 3.5849289684990735, "grad_norm": 4.9290242195129395, "learning_rate": 9.283631871525632e-07, "loss": 0.1555, "step": 5804 }, { "epoch": 3.586164298949969, "grad_norm": 3.395967960357666, "learning_rate": 9.283384805435454e-07, "loss": 0.0597, "step": 5806 }, { "epoch": 3.5873996294008648, "grad_norm": 7.5358805656433105, "learning_rate": 9.283137739345275e-07, "loss": 0.1716, "step": 5808 }, { "epoch": 3.5886349598517606, "grad_norm": 5.936628818511963, "learning_rate": 9.282890673255095e-07, "loss": 0.0735, "step": 5810 }, { "epoch": 3.589870290302656, "grad_norm": 5.63833475112915, "learning_rate": 9.282643607164916e-07, "loss": 0.0516, "step": 5812 }, { "epoch": 3.5911056207535514, "grad_norm": 3.41776704788208, "learning_rate": 9.282396541074738e-07, "loss": 0.1271, "step": 5814 }, { "epoch": 3.5923409512044473, "grad_norm": 3.987887144088745, "learning_rate": 9.282149474984558e-07, "loss": 0.0729, "step": 5816 }, { "epoch": 3.5935762816553427, "grad_norm": 11.841955184936523, "learning_rate": 9.281902408894379e-07, "loss": 0.1537, "step": 5818 }, { "epoch": 3.5948116121062386, "grad_norm": 8.456028938293457, "learning_rate": 9.2816553428042e-07, "loss": 0.2533, "step": 5820 }, { "epoch": 3.596046942557134, "grad_norm": 7.694912433624268, "learning_rate": 9.28140827671402e-07, "loss": 0.0823, "step": 5822 }, { "epoch": 3.5972822730080294, "grad_norm": 2.3006653785705566, "learning_rate": 9.281161210623842e-07, "loss": 0.0615, "step": 5824 }, { "epoch": 3.5985176034589252, "grad_norm": 2.770542621612549, "learning_rate": 9.280914144533663e-07, "loss": 0.0327, "step": 5826 }, { "epoch": 3.599752933909821, "grad_norm": 8.859792709350586, "learning_rate": 9.280667078443484e-07, "loss": 0.1066, "step": 5828 }, { "epoch": 3.6009882643607165, "grad_norm": 3.5365922451019287, "learning_rate": 9.280420012353304e-07, "loss": 0.0726, "step": 5830 }, { "epoch": 3.602223594811612, "grad_norm": 1.0910652875900269, "learning_rate": 9.280172946263125e-07, "loss": 0.0406, "step": 5832 }, { "epoch": 3.6034589252625078, "grad_norm": 6.128666400909424, "learning_rate": 9.279925880172947e-07, "loss": 0.1359, "step": 5834 }, { "epoch": 3.604694255713403, "grad_norm": 3.22670578956604, "learning_rate": 9.279678814082767e-07, "loss": 0.0461, "step": 5836 }, { "epoch": 3.605929586164299, "grad_norm": 8.05734634399414, "learning_rate": 9.279431747992588e-07, "loss": 0.0668, "step": 5838 }, { "epoch": 3.6071649166151944, "grad_norm": 6.651103973388672, "learning_rate": 9.279184681902409e-07, "loss": 0.1435, "step": 5840 }, { "epoch": 3.6084002470660903, "grad_norm": 4.018473148345947, "learning_rate": 9.278937615812229e-07, "loss": 0.0869, "step": 5842 }, { "epoch": 3.6096355775169857, "grad_norm": 2.9919724464416504, "learning_rate": 9.278690549722051e-07, "loss": 0.0598, "step": 5844 }, { "epoch": 3.6108709079678816, "grad_norm": 4.853970527648926, "learning_rate": 9.278443483631872e-07, "loss": 0.1908, "step": 5846 }, { "epoch": 3.612106238418777, "grad_norm": 3.6671974658966064, "learning_rate": 9.278196417541692e-07, "loss": 0.0317, "step": 5848 }, { "epoch": 3.6133415688696724, "grad_norm": 6.768109321594238, "learning_rate": 9.277949351451513e-07, "loss": 0.1809, "step": 5850 }, { "epoch": 3.6145768993205682, "grad_norm": 11.358040809631348, "learning_rate": 9.277702285361334e-07, "loss": 0.1251, "step": 5852 }, { "epoch": 3.615812229771464, "grad_norm": 2.0870087146759033, "learning_rate": 9.277455219271154e-07, "loss": 0.0766, "step": 5854 }, { "epoch": 3.6170475602223595, "grad_norm": 4.736161231994629, "learning_rate": 9.277208153180975e-07, "loss": 0.1132, "step": 5856 }, { "epoch": 3.618282890673255, "grad_norm": 5.044903755187988, "learning_rate": 9.276961087090796e-07, "loss": 0.0594, "step": 5858 }, { "epoch": 3.619518221124151, "grad_norm": 3.9958925247192383, "learning_rate": 9.276714021000616e-07, "loss": 0.1933, "step": 5860 }, { "epoch": 3.620753551575046, "grad_norm": 6.112231731414795, "learning_rate": 9.276466954910438e-07, "loss": 0.082, "step": 5862 }, { "epoch": 3.621988882025942, "grad_norm": 8.454690933227539, "learning_rate": 9.276219888820259e-07, "loss": 0.1344, "step": 5864 }, { "epoch": 3.6232242124768375, "grad_norm": 18.003637313842773, "learning_rate": 9.27597282273008e-07, "loss": 0.2145, "step": 5866 }, { "epoch": 3.6244595429277333, "grad_norm": 2.3891830444335938, "learning_rate": 9.2757257566399e-07, "loss": 0.0244, "step": 5868 }, { "epoch": 3.6256948733786287, "grad_norm": 8.383711814880371, "learning_rate": 9.275478690549721e-07, "loss": 0.1882, "step": 5870 }, { "epoch": 3.6269302038295246, "grad_norm": 7.811016082763672, "learning_rate": 9.275231624459543e-07, "loss": 0.114, "step": 5872 }, { "epoch": 3.62816553428042, "grad_norm": 5.232565879821777, "learning_rate": 9.274984558369363e-07, "loss": 0.0369, "step": 5874 }, { "epoch": 3.6294008647313154, "grad_norm": 4.598668575286865, "learning_rate": 9.274737492279184e-07, "loss": 0.1118, "step": 5876 }, { "epoch": 3.6306361951822113, "grad_norm": 3.541700601577759, "learning_rate": 9.274490426189005e-07, "loss": 0.1546, "step": 5878 }, { "epoch": 3.631871525633107, "grad_norm": 4.984029293060303, "learning_rate": 9.274243360098825e-07, "loss": 0.1022, "step": 5880 }, { "epoch": 3.6331068560840025, "grad_norm": 3.568045139312744, "learning_rate": 9.273996294008647e-07, "loss": 0.0637, "step": 5882 }, { "epoch": 3.634342186534898, "grad_norm": 6.37099027633667, "learning_rate": 9.273749227918468e-07, "loss": 0.076, "step": 5884 }, { "epoch": 3.635577516985794, "grad_norm": 2.3006350994110107, "learning_rate": 9.273502161828288e-07, "loss": 0.0775, "step": 5886 }, { "epoch": 3.636812847436689, "grad_norm": 6.063013076782227, "learning_rate": 9.273255095738109e-07, "loss": 0.2948, "step": 5888 }, { "epoch": 3.638048177887585, "grad_norm": 3.295900821685791, "learning_rate": 9.27300802964793e-07, "loss": 0.0625, "step": 5890 }, { "epoch": 3.6392835083384805, "grad_norm": 3.8882100582122803, "learning_rate": 9.272760963557751e-07, "loss": 0.0377, "step": 5892 }, { "epoch": 3.6405188387893763, "grad_norm": 2.5078883171081543, "learning_rate": 9.272513897467572e-07, "loss": 0.049, "step": 5894 }, { "epoch": 3.6417541692402717, "grad_norm": 4.619590759277344, "learning_rate": 9.272266831377393e-07, "loss": 0.1103, "step": 5896 }, { "epoch": 3.6429894996911676, "grad_norm": 5.287200450897217, "learning_rate": 9.272019765287213e-07, "loss": 0.0478, "step": 5898 }, { "epoch": 3.644224830142063, "grad_norm": 11.45142650604248, "learning_rate": 9.271772699197034e-07, "loss": 0.1623, "step": 5900 }, { "epoch": 3.6454601605929584, "grad_norm": 3.945894479751587, "learning_rate": 9.271525633106856e-07, "loss": 0.1673, "step": 5902 }, { "epoch": 3.6466954910438543, "grad_norm": 3.7838330268859863, "learning_rate": 9.271278567016676e-07, "loss": 0.0747, "step": 5904 }, { "epoch": 3.64793082149475, "grad_norm": 2.1622304916381836, "learning_rate": 9.271031500926497e-07, "loss": 0.0289, "step": 5906 }, { "epoch": 3.6491661519456455, "grad_norm": 9.298218727111816, "learning_rate": 9.270784434836318e-07, "loss": 0.2302, "step": 5908 }, { "epoch": 3.650401482396541, "grad_norm": 4.8782572746276855, "learning_rate": 9.27053736874614e-07, "loss": 0.0866, "step": 5910 }, { "epoch": 3.651636812847437, "grad_norm": 3.970357656478882, "learning_rate": 9.27029030265596e-07, "loss": 0.0487, "step": 5912 }, { "epoch": 3.652872143298332, "grad_norm": 4.274364471435547, "learning_rate": 9.270043236565781e-07, "loss": 0.1122, "step": 5914 }, { "epoch": 3.654107473749228, "grad_norm": 3.8562111854553223, "learning_rate": 9.269796170475602e-07, "loss": 0.1176, "step": 5916 }, { "epoch": 3.6553428042001235, "grad_norm": 17.6290225982666, "learning_rate": 9.269549104385422e-07, "loss": 0.329, "step": 5918 }, { "epoch": 3.656578134651019, "grad_norm": 6.5719170570373535, "learning_rate": 9.269302038295244e-07, "loss": 0.1258, "step": 5920 }, { "epoch": 3.6578134651019147, "grad_norm": 9.148049354553223, "learning_rate": 9.269054972205065e-07, "loss": 0.2264, "step": 5922 }, { "epoch": 3.6590487955528106, "grad_norm": 3.986654281616211, "learning_rate": 9.268807906114885e-07, "loss": 0.1103, "step": 5924 }, { "epoch": 3.660284126003706, "grad_norm": 15.611662864685059, "learning_rate": 9.268560840024706e-07, "loss": 0.217, "step": 5926 }, { "epoch": 3.6615194564546014, "grad_norm": 3.4016919136047363, "learning_rate": 9.268313773934527e-07, "loss": 0.1021, "step": 5928 }, { "epoch": 3.6627547869054973, "grad_norm": 5.366927623748779, "learning_rate": 9.268066707844348e-07, "loss": 0.1089, "step": 5930 }, { "epoch": 3.6639901173563927, "grad_norm": 8.466314315795898, "learning_rate": 9.267819641754169e-07, "loss": 0.1076, "step": 5932 }, { "epoch": 3.6652254478072885, "grad_norm": 3.6685433387756348, "learning_rate": 9.26757257566399e-07, "loss": 0.0557, "step": 5934 }, { "epoch": 3.666460778258184, "grad_norm": 7.035249710083008, "learning_rate": 9.26732550957381e-07, "loss": 0.0697, "step": 5936 }, { "epoch": 3.66769610870908, "grad_norm": 6.656756401062012, "learning_rate": 9.267078443483631e-07, "loss": 0.1434, "step": 5938 }, { "epoch": 3.668931439159975, "grad_norm": 5.0675787925720215, "learning_rate": 9.266831377393453e-07, "loss": 0.065, "step": 5940 }, { "epoch": 3.670166769610871, "grad_norm": 11.526930809020996, "learning_rate": 9.266584311303273e-07, "loss": 0.2536, "step": 5942 }, { "epoch": 3.6714021000617665, "grad_norm": 8.63247299194336, "learning_rate": 9.266337245213094e-07, "loss": 0.1617, "step": 5944 }, { "epoch": 3.672637430512662, "grad_norm": 15.678590774536133, "learning_rate": 9.266090179122915e-07, "loss": 0.2723, "step": 5946 }, { "epoch": 3.6738727609635577, "grad_norm": 8.121999740600586, "learning_rate": 9.265843113032737e-07, "loss": 0.1446, "step": 5948 }, { "epoch": 3.6751080914144536, "grad_norm": 6.848685264587402, "learning_rate": 9.265596046942557e-07, "loss": 0.2113, "step": 5950 }, { "epoch": 3.676343421865349, "grad_norm": 4.845231533050537, "learning_rate": 9.265348980852378e-07, "loss": 0.0526, "step": 5952 }, { "epoch": 3.6775787523162444, "grad_norm": 6.051812648773193, "learning_rate": 9.265101914762199e-07, "loss": 0.1702, "step": 5954 }, { "epoch": 3.6788140827671403, "grad_norm": 5.461794376373291, "learning_rate": 9.264854848672019e-07, "loss": 0.0885, "step": 5956 }, { "epoch": 3.6800494132180357, "grad_norm": 7.964094638824463, "learning_rate": 9.264607782581841e-07, "loss": 0.1404, "step": 5958 }, { "epoch": 3.6812847436689315, "grad_norm": 7.814087867736816, "learning_rate": 9.264360716491662e-07, "loss": 0.1657, "step": 5960 }, { "epoch": 3.682520074119827, "grad_norm": 6.887968063354492, "learning_rate": 9.264113650401482e-07, "loss": 0.1801, "step": 5962 }, { "epoch": 3.683755404570723, "grad_norm": 4.901174545288086, "learning_rate": 9.263866584311303e-07, "loss": 0.1917, "step": 5964 }, { "epoch": 3.6849907350216182, "grad_norm": 3.88356614112854, "learning_rate": 9.263619518221124e-07, "loss": 0.4532, "step": 5966 }, { "epoch": 3.686226065472514, "grad_norm": 4.4976935386657715, "learning_rate": 9.263372452130945e-07, "loss": 0.0553, "step": 5968 }, { "epoch": 3.6874613959234095, "grad_norm": 9.069510459899902, "learning_rate": 9.263125386040766e-07, "loss": 0.142, "step": 5970 }, { "epoch": 3.688696726374305, "grad_norm": 5.300302028656006, "learning_rate": 9.262878319950587e-07, "loss": 0.0834, "step": 5972 }, { "epoch": 3.6899320568252008, "grad_norm": 1.7811568975448608, "learning_rate": 9.262631253860407e-07, "loss": 0.1403, "step": 5974 }, { "epoch": 3.6911673872760966, "grad_norm": 4.331371784210205, "learning_rate": 9.262384187770228e-07, "loss": 0.0521, "step": 5976 }, { "epoch": 3.692402717726992, "grad_norm": 18.033824920654297, "learning_rate": 9.26213712168005e-07, "loss": 0.1072, "step": 5978 }, { "epoch": 3.6936380481778874, "grad_norm": 3.9273757934570312, "learning_rate": 9.26189005558987e-07, "loss": 0.0831, "step": 5980 }, { "epoch": 3.6948733786287833, "grad_norm": 7.443383693695068, "learning_rate": 9.26164298949969e-07, "loss": 0.1015, "step": 5982 }, { "epoch": 3.6961087090796787, "grad_norm": 9.213252067565918, "learning_rate": 9.261395923409512e-07, "loss": 0.1402, "step": 5984 }, { "epoch": 3.6973440395305746, "grad_norm": 4.3863959312438965, "learning_rate": 9.261148857319334e-07, "loss": 0.1798, "step": 5986 }, { "epoch": 3.69857936998147, "grad_norm": 13.311588287353516, "learning_rate": 9.260901791229154e-07, "loss": 0.3007, "step": 5988 }, { "epoch": 3.699814700432366, "grad_norm": 3.307270050048828, "learning_rate": 9.260654725138974e-07, "loss": 0.062, "step": 5990 }, { "epoch": 3.7010500308832612, "grad_norm": 9.425045013427734, "learning_rate": 9.260407659048795e-07, "loss": 0.1472, "step": 5992 }, { "epoch": 3.702285361334157, "grad_norm": 7.733996868133545, "learning_rate": 9.260160592958615e-07, "loss": 0.0961, "step": 5994 }, { "epoch": 3.7035206917850525, "grad_norm": 8.095602035522461, "learning_rate": 9.259913526868437e-07, "loss": 0.1419, "step": 5996 }, { "epoch": 3.704756022235948, "grad_norm": 14.408303260803223, "learning_rate": 9.259666460778258e-07, "loss": 0.2504, "step": 5998 }, { "epoch": 3.7059913526868438, "grad_norm": 2.3664329051971436, "learning_rate": 9.259419394688078e-07, "loss": 0.0285, "step": 6000 }, { "epoch": 3.7059913526868438, "eval_cer": 0.053174548224053174, "eval_loss": 0.24139411747455597, "eval_runtime": 64.6515, "eval_samples_per_second": 12.699, "eval_steps_per_second": 1.593, "step": 6000 }, { "epoch": 3.7072266831377396, "grad_norm": 2.2601194381713867, "learning_rate": 9.259172328597899e-07, "loss": 0.1589, "step": 6002 }, { "epoch": 3.708462013588635, "grad_norm": 5.4482316970825195, "learning_rate": 9.25892526250772e-07, "loss": 0.0888, "step": 6004 }, { "epoch": 3.7096973440395304, "grad_norm": 4.173800945281982, "learning_rate": 9.258678196417541e-07, "loss": 0.0536, "step": 6006 }, { "epoch": 3.7109326744904263, "grad_norm": 9.028238296508789, "learning_rate": 9.258431130327362e-07, "loss": 0.1931, "step": 6008 }, { "epoch": 3.7121680049413217, "grad_norm": 4.091108322143555, "learning_rate": 9.258184064237183e-07, "loss": 0.1099, "step": 6010 }, { "epoch": 3.7134033353922176, "grad_norm": 7.516265392303467, "learning_rate": 9.257936998147003e-07, "loss": 0.0926, "step": 6012 }, { "epoch": 3.714638665843113, "grad_norm": 23.029888153076172, "learning_rate": 9.257689932056824e-07, "loss": 0.0992, "step": 6014 }, { "epoch": 3.7158739962940084, "grad_norm": 9.805196762084961, "learning_rate": 9.257442865966646e-07, "loss": 0.331, "step": 6016 }, { "epoch": 3.7171093267449042, "grad_norm": 6.496755599975586, "learning_rate": 9.257195799876466e-07, "loss": 0.1319, "step": 6018 }, { "epoch": 3.7183446571958, "grad_norm": 5.022274494171143, "learning_rate": 9.256948733786287e-07, "loss": 0.0626, "step": 6020 }, { "epoch": 3.7195799876466955, "grad_norm": 8.965230941772461, "learning_rate": 9.256701667696108e-07, "loss": 0.1027, "step": 6022 }, { "epoch": 3.720815318097591, "grad_norm": 5.6646647453308105, "learning_rate": 9.256454601605928e-07, "loss": 0.1671, "step": 6024 }, { "epoch": 3.7220506485484868, "grad_norm": 5.593928337097168, "learning_rate": 9.25620753551575e-07, "loss": 0.2263, "step": 6026 }, { "epoch": 3.723285978999382, "grad_norm": 6.090520858764648, "learning_rate": 9.255960469425571e-07, "loss": 0.0974, "step": 6028 }, { "epoch": 3.724521309450278, "grad_norm": 5.348970890045166, "learning_rate": 9.255713403335392e-07, "loss": 0.0586, "step": 6030 }, { "epoch": 3.7257566399011735, "grad_norm": 14.21500015258789, "learning_rate": 9.255466337245212e-07, "loss": 0.1445, "step": 6032 }, { "epoch": 3.7269919703520693, "grad_norm": 3.1553640365600586, "learning_rate": 9.255219271155033e-07, "loss": 0.0785, "step": 6034 }, { "epoch": 3.7282273008029647, "grad_norm": 4.203160762786865, "learning_rate": 9.254972205064855e-07, "loss": 0.0671, "step": 6036 }, { "epoch": 3.7294626312538606, "grad_norm": 11.51084041595459, "learning_rate": 9.254725138974675e-07, "loss": 0.1152, "step": 6038 }, { "epoch": 3.730697961704756, "grad_norm": 16.801633834838867, "learning_rate": 9.254478072884496e-07, "loss": 0.1749, "step": 6040 }, { "epoch": 3.7319332921556514, "grad_norm": 6.497267723083496, "learning_rate": 9.254231006794317e-07, "loss": 0.0719, "step": 6042 }, { "epoch": 3.7331686226065472, "grad_norm": 8.630836486816406, "learning_rate": 9.253983940704138e-07, "loss": 0.339, "step": 6044 }, { "epoch": 3.734403953057443, "grad_norm": 6.813369274139404, "learning_rate": 9.253736874613959e-07, "loss": 0.1851, "step": 6046 }, { "epoch": 3.7356392835083385, "grad_norm": 1.8706026077270508, "learning_rate": 9.25348980852378e-07, "loss": 0.2115, "step": 6048 }, { "epoch": 3.736874613959234, "grad_norm": 0.2312498241662979, "learning_rate": 9.2532427424336e-07, "loss": 0.2538, "step": 6050 }, { "epoch": 3.73810994441013, "grad_norm": 5.596468448638916, "learning_rate": 9.252995676343421e-07, "loss": 0.1814, "step": 6052 }, { "epoch": 3.739345274861025, "grad_norm": 5.3115949630737305, "learning_rate": 9.252748610253243e-07, "loss": 0.13, "step": 6054 }, { "epoch": 3.740580605311921, "grad_norm": 2.623802423477173, "learning_rate": 9.252501544163063e-07, "loss": 0.0309, "step": 6056 }, { "epoch": 3.7418159357628165, "grad_norm": 8.898798942565918, "learning_rate": 9.252254478072884e-07, "loss": 0.1676, "step": 6058 }, { "epoch": 3.7430512662137123, "grad_norm": 6.964505195617676, "learning_rate": 9.252007411982705e-07, "loss": 0.1064, "step": 6060 }, { "epoch": 3.7442865966646077, "grad_norm": 7.014443397521973, "learning_rate": 9.251760345892525e-07, "loss": 0.1854, "step": 6062 }, { "epoch": 3.7455219271155036, "grad_norm": 9.166293144226074, "learning_rate": 9.251513279802347e-07, "loss": 0.0507, "step": 6064 }, { "epoch": 3.746757257566399, "grad_norm": 5.054125785827637, "learning_rate": 9.251266213712168e-07, "loss": 0.095, "step": 6066 }, { "epoch": 3.7479925880172944, "grad_norm": 7.314349174499512, "learning_rate": 9.251019147621989e-07, "loss": 0.097, "step": 6068 }, { "epoch": 3.7492279184681903, "grad_norm": 26.79680824279785, "learning_rate": 9.250772081531809e-07, "loss": 0.0837, "step": 6070 }, { "epoch": 3.750463248919086, "grad_norm": 5.667154312133789, "learning_rate": 9.25052501544163e-07, "loss": 0.1269, "step": 6072 }, { "epoch": 3.7516985793699815, "grad_norm": 6.480410099029541, "learning_rate": 9.250277949351452e-07, "loss": 0.0998, "step": 6074 }, { "epoch": 3.752933909820877, "grad_norm": 11.041542053222656, "learning_rate": 9.250030883261272e-07, "loss": 0.2893, "step": 6076 }, { "epoch": 3.754169240271773, "grad_norm": 6.781369209289551, "learning_rate": 9.249783817171093e-07, "loss": 0.1329, "step": 6078 }, { "epoch": 3.755404570722668, "grad_norm": 6.558681488037109, "learning_rate": 9.249536751080914e-07, "loss": 0.2441, "step": 6080 }, { "epoch": 3.756639901173564, "grad_norm": 6.377396583557129, "learning_rate": 9.249289684990734e-07, "loss": 0.1898, "step": 6082 }, { "epoch": 3.7578752316244595, "grad_norm": 8.153913497924805, "learning_rate": 9.249042618900556e-07, "loss": 0.1164, "step": 6084 }, { "epoch": 3.7591105620753553, "grad_norm": 5.3544230461120605, "learning_rate": 9.248795552810377e-07, "loss": 0.2931, "step": 6086 }, { "epoch": 3.7603458925262507, "grad_norm": 4.14177131652832, "learning_rate": 9.248548486720197e-07, "loss": 0.0493, "step": 6088 }, { "epoch": 3.7615812229771466, "grad_norm": 5.419217109680176, "learning_rate": 9.248301420630018e-07, "loss": 0.0613, "step": 6090 }, { "epoch": 3.762816553428042, "grad_norm": 9.39295768737793, "learning_rate": 9.24805435453984e-07, "loss": 0.3472, "step": 6092 }, { "epoch": 3.7640518838789374, "grad_norm": 7.578539848327637, "learning_rate": 9.24780728844966e-07, "loss": 0.1662, "step": 6094 }, { "epoch": 3.7652872143298333, "grad_norm": 7.771241188049316, "learning_rate": 9.247560222359481e-07, "loss": 0.1612, "step": 6096 }, { "epoch": 3.766522544780729, "grad_norm": 5.40378999710083, "learning_rate": 9.247313156269302e-07, "loss": 0.0904, "step": 6098 }, { "epoch": 3.7677578752316245, "grad_norm": 10.72472095489502, "learning_rate": 9.247066090179122e-07, "loss": 0.2099, "step": 6100 }, { "epoch": 3.76899320568252, "grad_norm": 6.149609565734863, "learning_rate": 9.246819024088944e-07, "loss": 0.0948, "step": 6102 }, { "epoch": 3.770228536133416, "grad_norm": 4.916388034820557, "learning_rate": 9.246571957998765e-07, "loss": 0.0598, "step": 6104 }, { "epoch": 3.771463866584311, "grad_norm": 7.510563850402832, "learning_rate": 9.246324891908586e-07, "loss": 0.2432, "step": 6106 }, { "epoch": 3.772699197035207, "grad_norm": 5.048687934875488, "learning_rate": 9.246077825818406e-07, "loss": 0.0499, "step": 6108 }, { "epoch": 3.7739345274861025, "grad_norm": 2.6815812587738037, "learning_rate": 9.245830759728227e-07, "loss": 0.037, "step": 6110 }, { "epoch": 3.775169857936998, "grad_norm": 5.512421607971191, "learning_rate": 9.245583693638049e-07, "loss": 0.094, "step": 6112 }, { "epoch": 3.7764051883878937, "grad_norm": 7.683080673217773, "learning_rate": 9.245336627547869e-07, "loss": 0.1534, "step": 6114 }, { "epoch": 3.7776405188387896, "grad_norm": 5.131374359130859, "learning_rate": 9.24508956145769e-07, "loss": 0.0685, "step": 6116 }, { "epoch": 3.778875849289685, "grad_norm": 7.18672513961792, "learning_rate": 9.24484249536751e-07, "loss": 0.1628, "step": 6118 }, { "epoch": 3.7801111797405804, "grad_norm": 5.174349784851074, "learning_rate": 9.24459542927733e-07, "loss": 0.1526, "step": 6120 }, { "epoch": 3.7813465101914763, "grad_norm": 8.095183372497559, "learning_rate": 9.244348363187153e-07, "loss": 0.2354, "step": 6122 }, { "epoch": 3.7825818406423717, "grad_norm": 6.940358638763428, "learning_rate": 9.244101297096973e-07, "loss": 0.1831, "step": 6124 }, { "epoch": 3.7838171710932675, "grad_norm": 7.185666084289551, "learning_rate": 9.243854231006793e-07, "loss": 0.2809, "step": 6126 }, { "epoch": 3.785052501544163, "grad_norm": 4.414030075073242, "learning_rate": 9.243607164916614e-07, "loss": 0.1502, "step": 6128 }, { "epoch": 3.786287831995059, "grad_norm": 5.3099236488342285, "learning_rate": 9.243360098826436e-07, "loss": 0.1223, "step": 6130 }, { "epoch": 3.787523162445954, "grad_norm": 4.4830780029296875, "learning_rate": 9.243113032736256e-07, "loss": 0.2045, "step": 6132 }, { "epoch": 3.78875849289685, "grad_norm": 13.908071517944336, "learning_rate": 9.242865966646077e-07, "loss": 0.6444, "step": 6134 }, { "epoch": 3.7899938233477455, "grad_norm": 2.966928243637085, "learning_rate": 9.242618900555898e-07, "loss": 0.0413, "step": 6136 }, { "epoch": 3.791229153798641, "grad_norm": 5.308862686157227, "learning_rate": 9.242371834465718e-07, "loss": 0.0403, "step": 6138 }, { "epoch": 3.7924644842495367, "grad_norm": 8.297928810119629, "learning_rate": 9.24212476837554e-07, "loss": 0.1248, "step": 6140 }, { "epoch": 3.7936998147004326, "grad_norm": 8.005627632141113, "learning_rate": 9.241877702285361e-07, "loss": 0.1371, "step": 6142 }, { "epoch": 3.794935145151328, "grad_norm": 4.125904083251953, "learning_rate": 9.241630636195181e-07, "loss": 0.1411, "step": 6144 }, { "epoch": 3.7961704756022234, "grad_norm": 12.198182106018066, "learning_rate": 9.241383570105002e-07, "loss": 0.4102, "step": 6146 }, { "epoch": 3.7974058060531193, "grad_norm": 9.271328926086426, "learning_rate": 9.241136504014823e-07, "loss": 0.1619, "step": 6148 }, { "epoch": 3.7986411365040147, "grad_norm": 5.70451545715332, "learning_rate": 9.240889437924645e-07, "loss": 0.1905, "step": 6150 }, { "epoch": 3.7998764669549105, "grad_norm": 3.790344715118408, "learning_rate": 9.240642371834465e-07, "loss": 0.166, "step": 6152 }, { "epoch": 3.801111797405806, "grad_norm": 9.62696361541748, "learning_rate": 9.240395305744286e-07, "loss": 0.1381, "step": 6154 }, { "epoch": 3.802347127856702, "grad_norm": 16.009029388427734, "learning_rate": 9.240148239654107e-07, "loss": 0.1154, "step": 6156 }, { "epoch": 3.8035824583075972, "grad_norm": 7.1139116287231445, "learning_rate": 9.239901173563927e-07, "loss": 0.2142, "step": 6158 }, { "epoch": 3.804817788758493, "grad_norm": 5.687557220458984, "learning_rate": 9.239654107473749e-07, "loss": 0.1114, "step": 6160 }, { "epoch": 3.8060531192093885, "grad_norm": 4.157767295837402, "learning_rate": 9.23940704138357e-07, "loss": 0.052, "step": 6162 }, { "epoch": 3.807288449660284, "grad_norm": 6.326875686645508, "learning_rate": 9.23915997529339e-07, "loss": 0.114, "step": 6164 }, { "epoch": 3.8085237801111798, "grad_norm": 7.11458683013916, "learning_rate": 9.238912909203211e-07, "loss": 0.0965, "step": 6166 }, { "epoch": 3.8097591105620756, "grad_norm": 6.833465099334717, "learning_rate": 9.238665843113032e-07, "loss": 0.1239, "step": 6168 }, { "epoch": 3.810994441012971, "grad_norm": 3.924453020095825, "learning_rate": 9.238418777022853e-07, "loss": 0.0549, "step": 6170 }, { "epoch": 3.8122297714638664, "grad_norm": 7.49632453918457, "learning_rate": 9.238171710932674e-07, "loss": 0.4061, "step": 6172 }, { "epoch": 3.8134651019147623, "grad_norm": 8.509312629699707, "learning_rate": 9.237924644842495e-07, "loss": 0.2147, "step": 6174 }, { "epoch": 3.8147004323656577, "grad_norm": 2.4121086597442627, "learning_rate": 9.237677578752315e-07, "loss": 0.127, "step": 6176 }, { "epoch": 3.8159357628165536, "grad_norm": 9.53685188293457, "learning_rate": 9.237430512662137e-07, "loss": 0.2654, "step": 6178 }, { "epoch": 3.817171093267449, "grad_norm": 4.623907566070557, "learning_rate": 9.237183446571958e-07, "loss": 0.0663, "step": 6180 }, { "epoch": 3.818406423718345, "grad_norm": 12.921734809875488, "learning_rate": 9.236936380481778e-07, "loss": 0.6814, "step": 6182 }, { "epoch": 3.8196417541692402, "grad_norm": 10.089010238647461, "learning_rate": 9.236689314391599e-07, "loss": 0.1685, "step": 6184 }, { "epoch": 3.820877084620136, "grad_norm": 1.3722541332244873, "learning_rate": 9.23644224830142e-07, "loss": 0.038, "step": 6186 }, { "epoch": 3.8221124150710315, "grad_norm": 4.376243591308594, "learning_rate": 9.236195182211242e-07, "loss": 0.0693, "step": 6188 }, { "epoch": 3.823347745521927, "grad_norm": 7.097993850708008, "learning_rate": 9.235948116121062e-07, "loss": 0.1902, "step": 6190 }, { "epoch": 3.8245830759728228, "grad_norm": 4.147150039672852, "learning_rate": 9.235701050030883e-07, "loss": 0.1455, "step": 6192 }, { "epoch": 3.8258184064237186, "grad_norm": 6.139621734619141, "learning_rate": 9.235453983940704e-07, "loss": 0.1395, "step": 6194 }, { "epoch": 3.827053736874614, "grad_norm": 3.8950538635253906, "learning_rate": 9.235206917850524e-07, "loss": 0.0739, "step": 6196 }, { "epoch": 3.8282890673255094, "grad_norm": 5.187674045562744, "learning_rate": 9.234959851760346e-07, "loss": 0.1039, "step": 6198 }, { "epoch": 3.8295243977764053, "grad_norm": 4.864297866821289, "learning_rate": 9.234712785670167e-07, "loss": 0.0848, "step": 6200 }, { "epoch": 3.8307597282273007, "grad_norm": 9.56394100189209, "learning_rate": 9.234465719579987e-07, "loss": 0.2466, "step": 6202 }, { "epoch": 3.8319950586781966, "grad_norm": 12.141324996948242, "learning_rate": 9.234218653489808e-07, "loss": 0.2297, "step": 6204 }, { "epoch": 3.833230389129092, "grad_norm": 4.058130264282227, "learning_rate": 9.233971587399629e-07, "loss": 0.0878, "step": 6206 }, { "epoch": 3.8344657195799874, "grad_norm": 3.4477691650390625, "learning_rate": 9.23372452130945e-07, "loss": 0.1195, "step": 6208 }, { "epoch": 3.8357010500308832, "grad_norm": 6.4883270263671875, "learning_rate": 9.233477455219271e-07, "loss": 0.1172, "step": 6210 }, { "epoch": 3.836936380481779, "grad_norm": 9.245906829833984, "learning_rate": 9.233230389129092e-07, "loss": 0.2343, "step": 6212 }, { "epoch": 3.8381717109326745, "grad_norm": 4.945746421813965, "learning_rate": 9.232983323038912e-07, "loss": 0.0567, "step": 6214 }, { "epoch": 3.83940704138357, "grad_norm": 7.254429340362549, "learning_rate": 9.232736256948733e-07, "loss": 0.155, "step": 6216 }, { "epoch": 3.8406423718344658, "grad_norm": 5.111907005310059, "learning_rate": 9.232489190858555e-07, "loss": 0.1126, "step": 6218 }, { "epoch": 3.841877702285361, "grad_norm": 11.450480461120605, "learning_rate": 9.232242124768375e-07, "loss": 0.1804, "step": 6220 }, { "epoch": 3.843113032736257, "grad_norm": 5.585058689117432, "learning_rate": 9.231995058678196e-07, "loss": 0.1879, "step": 6222 }, { "epoch": 3.8443483631871525, "grad_norm": 9.342671394348145, "learning_rate": 9.231747992588017e-07, "loss": 0.0885, "step": 6224 }, { "epoch": 3.8455836936380483, "grad_norm": 10.553268432617188, "learning_rate": 9.231500926497839e-07, "loss": 0.1502, "step": 6226 }, { "epoch": 3.8468190240889437, "grad_norm": 5.474380016326904, "learning_rate": 9.231253860407659e-07, "loss": 0.0909, "step": 6228 }, { "epoch": 3.8480543545398396, "grad_norm": 7.584311008453369, "learning_rate": 9.23100679431748e-07, "loss": 0.1508, "step": 6230 }, { "epoch": 3.849289684990735, "grad_norm": 2.4429771900177, "learning_rate": 9.230759728227301e-07, "loss": 0.058, "step": 6232 }, { "epoch": 3.8505250154416304, "grad_norm": 3.6164896488189697, "learning_rate": 9.230512662137121e-07, "loss": 0.0814, "step": 6234 }, { "epoch": 3.8517603458925262, "grad_norm": 3.5526540279388428, "learning_rate": 9.230265596046943e-07, "loss": 0.1456, "step": 6236 }, { "epoch": 3.852995676343422, "grad_norm": 6.559079647064209, "learning_rate": 9.230018529956764e-07, "loss": 0.0693, "step": 6238 }, { "epoch": 3.8542310067943175, "grad_norm": 7.515732288360596, "learning_rate": 9.229771463866584e-07, "loss": 0.1247, "step": 6240 }, { "epoch": 3.855466337245213, "grad_norm": 2.6499457359313965, "learning_rate": 9.229524397776405e-07, "loss": 0.0279, "step": 6242 }, { "epoch": 3.856701667696109, "grad_norm": 6.461893558502197, "learning_rate": 9.229277331686226e-07, "loss": 0.1241, "step": 6244 }, { "epoch": 3.857936998147004, "grad_norm": 5.865048885345459, "learning_rate": 9.229030265596047e-07, "loss": 0.1362, "step": 6246 }, { "epoch": 3.8591723285979, "grad_norm": 2.2597758769989014, "learning_rate": 9.228783199505868e-07, "loss": 0.3662, "step": 6248 }, { "epoch": 3.8604076590487955, "grad_norm": 3.533358573913574, "learning_rate": 9.228536133415689e-07, "loss": 0.0489, "step": 6250 }, { "epoch": 3.8616429894996913, "grad_norm": 7.5942769050598145, "learning_rate": 9.228289067325508e-07, "loss": 0.1291, "step": 6252 }, { "epoch": 3.8628783199505867, "grad_norm": 21.785274505615234, "learning_rate": 9.228042001235329e-07, "loss": 0.0861, "step": 6254 }, { "epoch": 3.8641136504014826, "grad_norm": 3.486288070678711, "learning_rate": 9.227794935145152e-07, "loss": 0.1104, "step": 6256 }, { "epoch": 3.865348980852378, "grad_norm": 0.5536136031150818, "learning_rate": 9.227547869054971e-07, "loss": 0.0768, "step": 6258 }, { "epoch": 3.8665843113032734, "grad_norm": 1.6125150918960571, "learning_rate": 9.227300802964792e-07, "loss": 0.2052, "step": 6260 }, { "epoch": 3.8678196417541693, "grad_norm": 1.8556950092315674, "learning_rate": 9.227053736874613e-07, "loss": 0.0606, "step": 6262 }, { "epoch": 3.869054972205065, "grad_norm": 6.632902145385742, "learning_rate": 9.226806670784433e-07, "loss": 0.1266, "step": 6264 }, { "epoch": 3.8702903026559605, "grad_norm": 4.103086471557617, "learning_rate": 9.226559604694255e-07, "loss": 0.2946, "step": 6266 }, { "epoch": 3.871525633106856, "grad_norm": 6.2949538230896, "learning_rate": 9.226312538604076e-07, "loss": 0.1384, "step": 6268 }, { "epoch": 3.872760963557752, "grad_norm": 2.967031478881836, "learning_rate": 9.226065472513897e-07, "loss": 0.0308, "step": 6270 }, { "epoch": 3.873996294008647, "grad_norm": 5.040622711181641, "learning_rate": 9.225818406423717e-07, "loss": 0.1135, "step": 6272 }, { "epoch": 3.875231624459543, "grad_norm": 7.5838541984558105, "learning_rate": 9.225571340333539e-07, "loss": 0.2806, "step": 6274 }, { "epoch": 3.8764669549104385, "grad_norm": 7.2848005294799805, "learning_rate": 9.22532427424336e-07, "loss": 0.0394, "step": 6276 }, { "epoch": 3.8777022853613343, "grad_norm": 12.716438293457031, "learning_rate": 9.22507720815318e-07, "loss": 0.4494, "step": 6278 }, { "epoch": 3.8789376158122297, "grad_norm": 1.9535878896713257, "learning_rate": 9.224830142063001e-07, "loss": 0.0983, "step": 6280 }, { "epoch": 3.8801729462631256, "grad_norm": 6.251683712005615, "learning_rate": 9.224583075972822e-07, "loss": 0.0872, "step": 6282 }, { "epoch": 3.881408276714021, "grad_norm": 5.259305477142334, "learning_rate": 9.224336009882643e-07, "loss": 0.1055, "step": 6284 }, { "epoch": 3.8826436071649164, "grad_norm": 2.4833269119262695, "learning_rate": 9.224088943792464e-07, "loss": 0.0342, "step": 6286 }, { "epoch": 3.8838789376158123, "grad_norm": 4.747370719909668, "learning_rate": 9.223841877702285e-07, "loss": 0.0927, "step": 6288 }, { "epoch": 3.885114268066708, "grad_norm": 1.7249596118927002, "learning_rate": 9.223594811612105e-07, "loss": 0.0323, "step": 6290 }, { "epoch": 3.8863495985176035, "grad_norm": 6.051753520965576, "learning_rate": 9.223347745521926e-07, "loss": 0.1383, "step": 6292 }, { "epoch": 3.887584928968499, "grad_norm": 5.681085586547852, "learning_rate": 9.223100679431748e-07, "loss": 0.1631, "step": 6294 }, { "epoch": 3.888820259419395, "grad_norm": 4.3354010581970215, "learning_rate": 9.222853613341568e-07, "loss": 0.0356, "step": 6296 }, { "epoch": 3.89005558987029, "grad_norm": 12.512460708618164, "learning_rate": 9.222606547251389e-07, "loss": 0.2522, "step": 6298 }, { "epoch": 3.891290920321186, "grad_norm": 2.6971521377563477, "learning_rate": 9.22235948116121e-07, "loss": 0.0967, "step": 6300 }, { "epoch": 3.8925262507720815, "grad_norm": 1.6997190713882446, "learning_rate": 9.22211241507103e-07, "loss": 0.0318, "step": 6302 }, { "epoch": 3.893761581222977, "grad_norm": 10.40310287475586, "learning_rate": 9.221865348980852e-07, "loss": 0.3286, "step": 6304 }, { "epoch": 3.8949969116738727, "grad_norm": 5.327568054199219, "learning_rate": 9.221618282890673e-07, "loss": 0.0631, "step": 6306 }, { "epoch": 3.8962322421247686, "grad_norm": 7.8724517822265625, "learning_rate": 9.221371216800494e-07, "loss": 0.16, "step": 6308 }, { "epoch": 3.897467572575664, "grad_norm": 4.397527694702148, "learning_rate": 9.221124150710314e-07, "loss": 0.0902, "step": 6310 }, { "epoch": 3.8987029030265594, "grad_norm": 7.140380382537842, "learning_rate": 9.220877084620136e-07, "loss": 0.1874, "step": 6312 }, { "epoch": 3.8999382334774553, "grad_norm": 4.326656341552734, "learning_rate": 9.220630018529957e-07, "loss": 0.0742, "step": 6314 }, { "epoch": 3.9011735639283507, "grad_norm": 3.853715181350708, "learning_rate": 9.220382952439777e-07, "loss": 0.0603, "step": 6316 }, { "epoch": 3.9024088943792465, "grad_norm": 5.965705871582031, "learning_rate": 9.220135886349598e-07, "loss": 0.1771, "step": 6318 }, { "epoch": 3.903644224830142, "grad_norm": 7.772800445556641, "learning_rate": 9.219888820259419e-07, "loss": 0.1002, "step": 6320 }, { "epoch": 3.904879555281038, "grad_norm": 7.226902961730957, "learning_rate": 9.21964175416924e-07, "loss": 0.1349, "step": 6322 }, { "epoch": 3.906114885731933, "grad_norm": 8.858543395996094, "learning_rate": 9.219394688079061e-07, "loss": 0.1317, "step": 6324 }, { "epoch": 3.907350216182829, "grad_norm": 6.471269130706787, "learning_rate": 9.219147621988882e-07, "loss": 0.0952, "step": 6326 }, { "epoch": 3.9085855466337245, "grad_norm": 2.810595989227295, "learning_rate": 9.218900555898702e-07, "loss": 0.1273, "step": 6328 }, { "epoch": 3.90982087708462, "grad_norm": 5.293196201324463, "learning_rate": 9.218653489808523e-07, "loss": 0.0646, "step": 6330 }, { "epoch": 3.9110562075355157, "grad_norm": 6.762948036193848, "learning_rate": 9.218406423718345e-07, "loss": 0.1143, "step": 6332 }, { "epoch": 3.9122915379864116, "grad_norm": 5.055931568145752, "learning_rate": 9.218159357628165e-07, "loss": 0.2349, "step": 6334 }, { "epoch": 3.913526868437307, "grad_norm": 7.433049201965332, "learning_rate": 9.217912291537986e-07, "loss": 0.2127, "step": 6336 }, { "epoch": 3.9147621988882024, "grad_norm": 5.2378950119018555, "learning_rate": 9.217665225447807e-07, "loss": 0.1786, "step": 6338 }, { "epoch": 3.9159975293390983, "grad_norm": 6.634721279144287, "learning_rate": 9.217418159357627e-07, "loss": 0.105, "step": 6340 }, { "epoch": 3.9172328597899937, "grad_norm": 11.316915512084961, "learning_rate": 9.217171093267449e-07, "loss": 0.3334, "step": 6342 }, { "epoch": 3.9184681902408895, "grad_norm": 16.533214569091797, "learning_rate": 9.21692402717727e-07, "loss": 0.2723, "step": 6344 }, { "epoch": 3.919703520691785, "grad_norm": 5.828943252563477, "learning_rate": 9.216676961087091e-07, "loss": 0.1605, "step": 6346 }, { "epoch": 3.920938851142681, "grad_norm": 5.104292869567871, "learning_rate": 9.216429894996911e-07, "loss": 0.0608, "step": 6348 }, { "epoch": 3.9221741815935762, "grad_norm": 9.3994722366333, "learning_rate": 9.216182828906732e-07, "loss": 0.1509, "step": 6350 }, { "epoch": 3.923409512044472, "grad_norm": 4.095046520233154, "learning_rate": 9.215935762816554e-07, "loss": 0.0688, "step": 6352 }, { "epoch": 3.9246448424953675, "grad_norm": 4.436873912811279, "learning_rate": 9.215688696726374e-07, "loss": 0.1164, "step": 6354 }, { "epoch": 3.925880172946263, "grad_norm": 5.365219593048096, "learning_rate": 9.215441630636195e-07, "loss": 0.0714, "step": 6356 }, { "epoch": 3.9271155033971588, "grad_norm": 4.656512260437012, "learning_rate": 9.215194564546016e-07, "loss": 0.055, "step": 6358 }, { "epoch": 3.9283508338480546, "grad_norm": 4.094333171844482, "learning_rate": 9.214947498455837e-07, "loss": 0.0655, "step": 6360 }, { "epoch": 3.92958616429895, "grad_norm": 4.098690986633301, "learning_rate": 9.214700432365658e-07, "loss": 0.07, "step": 6362 }, { "epoch": 3.9308214947498454, "grad_norm": 6.782210350036621, "learning_rate": 9.214453366275479e-07, "loss": 0.1087, "step": 6364 }, { "epoch": 3.9320568252007413, "grad_norm": 4.305397987365723, "learning_rate": 9.214206300185299e-07, "loss": 0.0784, "step": 6366 }, { "epoch": 3.9332921556516367, "grad_norm": 4.292744159698486, "learning_rate": 9.21395923409512e-07, "loss": 0.0442, "step": 6368 }, { "epoch": 3.9345274861025326, "grad_norm": 22.536476135253906, "learning_rate": 9.213712168004942e-07, "loss": 0.5015, "step": 6370 }, { "epoch": 3.935762816553428, "grad_norm": 10.666199684143066, "learning_rate": 9.213465101914762e-07, "loss": 0.2228, "step": 6372 }, { "epoch": 3.936998147004324, "grad_norm": 8.17652416229248, "learning_rate": 9.213218035824583e-07, "loss": 0.1786, "step": 6374 }, { "epoch": 3.9382334774552192, "grad_norm": 8.01025104522705, "learning_rate": 9.212970969734404e-07, "loss": 0.1237, "step": 6376 }, { "epoch": 3.939468807906115, "grad_norm": 7.8157854080200195, "learning_rate": 9.212723903644224e-07, "loss": 0.1376, "step": 6378 }, { "epoch": 3.9407041383570105, "grad_norm": 4.219436168670654, "learning_rate": 9.212476837554046e-07, "loss": 0.2661, "step": 6380 }, { "epoch": 3.941939468807906, "grad_norm": 11.35641098022461, "learning_rate": 9.212229771463867e-07, "loss": 0.2238, "step": 6382 }, { "epoch": 3.9431747992588018, "grad_norm": 10.35657787322998, "learning_rate": 9.211982705373686e-07, "loss": 0.3311, "step": 6384 }, { "epoch": 3.9444101297096976, "grad_norm": 6.153178691864014, "learning_rate": 9.211735639283507e-07, "loss": 0.2063, "step": 6386 }, { "epoch": 3.945645460160593, "grad_norm": 5.672163963317871, "learning_rate": 9.211488573193328e-07, "loss": 0.3109, "step": 6388 }, { "epoch": 3.9468807906114884, "grad_norm": 6.770474910736084, "learning_rate": 9.21124150710315e-07, "loss": 0.1442, "step": 6390 }, { "epoch": 3.9481161210623843, "grad_norm": 9.461878776550293, "learning_rate": 9.21099444101297e-07, "loss": 0.2307, "step": 6392 }, { "epoch": 3.9493514515132797, "grad_norm": 11.860806465148926, "learning_rate": 9.210747374922791e-07, "loss": 0.2665, "step": 6394 }, { "epoch": 3.9505867819641756, "grad_norm": 15.94314956665039, "learning_rate": 9.210500308832612e-07, "loss": 0.1412, "step": 6396 }, { "epoch": 3.951822112415071, "grad_norm": 4.760583400726318, "learning_rate": 9.210253242742432e-07, "loss": 0.1703, "step": 6398 }, { "epoch": 3.9530574428659664, "grad_norm": 5.318960189819336, "learning_rate": 9.210006176652254e-07, "loss": 0.1349, "step": 6400 }, { "epoch": 3.9542927733168622, "grad_norm": 9.459582328796387, "learning_rate": 9.209759110562075e-07, "loss": 0.1289, "step": 6402 }, { "epoch": 3.955528103767758, "grad_norm": 4.075008392333984, "learning_rate": 9.209512044471895e-07, "loss": 0.0401, "step": 6404 }, { "epoch": 3.9567634342186535, "grad_norm": 3.779829263687134, "learning_rate": 9.209264978381716e-07, "loss": 0.2292, "step": 6406 }, { "epoch": 3.957998764669549, "grad_norm": 7.944407939910889, "learning_rate": 9.209017912291538e-07, "loss": 0.1143, "step": 6408 }, { "epoch": 3.9592340951204448, "grad_norm": 4.118803977966309, "learning_rate": 9.208770846201358e-07, "loss": 0.1494, "step": 6410 }, { "epoch": 3.96046942557134, "grad_norm": 7.87486457824707, "learning_rate": 9.208523780111179e-07, "loss": 0.1497, "step": 6412 }, { "epoch": 3.961704756022236, "grad_norm": 8.449039459228516, "learning_rate": 9.208276714021e-07, "loss": 0.1322, "step": 6414 }, { "epoch": 3.9629400864731315, "grad_norm": 9.83460807800293, "learning_rate": 9.20802964793082e-07, "loss": 0.2749, "step": 6416 }, { "epoch": 3.9641754169240273, "grad_norm": 5.35679817199707, "learning_rate": 9.207782581840642e-07, "loss": 0.0375, "step": 6418 }, { "epoch": 3.9654107473749227, "grad_norm": 6.33883810043335, "learning_rate": 9.207535515750463e-07, "loss": 0.2054, "step": 6420 }, { "epoch": 3.9666460778258186, "grad_norm": 7.018109321594238, "learning_rate": 9.207288449660283e-07, "loss": 0.0768, "step": 6422 }, { "epoch": 3.967881408276714, "grad_norm": 8.886080741882324, "learning_rate": 9.207041383570104e-07, "loss": 0.1263, "step": 6424 }, { "epoch": 3.9691167387276094, "grad_norm": 5.140292167663574, "learning_rate": 9.206794317479925e-07, "loss": 0.0918, "step": 6426 }, { "epoch": 3.9703520691785052, "grad_norm": 8.336038589477539, "learning_rate": 9.206547251389747e-07, "loss": 0.373, "step": 6428 }, { "epoch": 3.971587399629401, "grad_norm": 7.607062339782715, "learning_rate": 9.206300185299567e-07, "loss": 0.2076, "step": 6430 }, { "epoch": 3.9728227300802965, "grad_norm": 0.8739811778068542, "learning_rate": 9.206053119209388e-07, "loss": 0.102, "step": 6432 }, { "epoch": 3.974058060531192, "grad_norm": 3.1105916500091553, "learning_rate": 9.205806053119209e-07, "loss": 0.0604, "step": 6434 }, { "epoch": 3.975293390982088, "grad_norm": 10.092368125915527, "learning_rate": 9.205558987029029e-07, "loss": 0.0675, "step": 6436 }, { "epoch": 3.976528721432983, "grad_norm": 3.0484089851379395, "learning_rate": 9.205311920938851e-07, "loss": 0.0321, "step": 6438 }, { "epoch": 3.977764051883879, "grad_norm": 8.305204391479492, "learning_rate": 9.205064854848672e-07, "loss": 0.2171, "step": 6440 }, { "epoch": 3.9789993823347745, "grad_norm": 7.8330817222595215, "learning_rate": 9.204817788758492e-07, "loss": 0.3811, "step": 6442 }, { "epoch": 3.9802347127856703, "grad_norm": 3.1947765350341797, "learning_rate": 9.204570722668313e-07, "loss": 0.0785, "step": 6444 }, { "epoch": 3.9814700432365657, "grad_norm": 5.351154804229736, "learning_rate": 9.204323656578135e-07, "loss": 0.1486, "step": 6446 }, { "epoch": 3.9827053736874616, "grad_norm": 7.385144233703613, "learning_rate": 9.204076590487955e-07, "loss": 0.1881, "step": 6448 }, { "epoch": 3.983940704138357, "grad_norm": 9.416825294494629, "learning_rate": 9.203829524397776e-07, "loss": 0.1607, "step": 6450 }, { "epoch": 3.9851760345892524, "grad_norm": 8.405505180358887, "learning_rate": 9.203582458307597e-07, "loss": 0.0745, "step": 6452 }, { "epoch": 3.9864113650401483, "grad_norm": 6.128489017486572, "learning_rate": 9.203335392217417e-07, "loss": 0.3808, "step": 6454 }, { "epoch": 3.987646695491044, "grad_norm": 7.792023658752441, "learning_rate": 9.203088326127239e-07, "loss": 0.0613, "step": 6456 }, { "epoch": 3.9888820259419395, "grad_norm": 4.728753089904785, "learning_rate": 9.20284126003706e-07, "loss": 0.1741, "step": 6458 }, { "epoch": 3.990117356392835, "grad_norm": 4.525623321533203, "learning_rate": 9.20259419394688e-07, "loss": 0.0595, "step": 6460 }, { "epoch": 3.991352686843731, "grad_norm": 6.990654468536377, "learning_rate": 9.202347127856701e-07, "loss": 0.2841, "step": 6462 }, { "epoch": 3.992588017294626, "grad_norm": 11.034262657165527, "learning_rate": 9.202100061766522e-07, "loss": 0.1913, "step": 6464 }, { "epoch": 3.993823347745522, "grad_norm": 7.857149600982666, "learning_rate": 9.201852995676344e-07, "loss": 0.1465, "step": 6466 }, { "epoch": 3.9950586781964175, "grad_norm": 6.5363359451293945, "learning_rate": 9.201605929586164e-07, "loss": 0.1929, "step": 6468 }, { "epoch": 3.9962940086473133, "grad_norm": 5.793256759643555, "learning_rate": 9.201358863495985e-07, "loss": 0.1209, "step": 6470 }, { "epoch": 3.9975293390982087, "grad_norm": 4.810014724731445, "learning_rate": 9.201111797405806e-07, "loss": 0.0766, "step": 6472 }, { "epoch": 3.9987646695491046, "grad_norm": 4.742744445800781, "learning_rate": 9.200864731315626e-07, "loss": 0.0923, "step": 6474 }, { "epoch": 4.0, "grad_norm": 12.771225929260254, "learning_rate": 9.200617665225448e-07, "loss": 0.0633, "step": 6476 }, { "epoch": 4.001235330450895, "grad_norm": 4.6623640060424805, "learning_rate": 9.200370599135269e-07, "loss": 0.0601, "step": 6478 }, { "epoch": 4.002470660901791, "grad_norm": 3.639422655105591, "learning_rate": 9.200123533045089e-07, "loss": 0.1012, "step": 6480 }, { "epoch": 4.003705991352687, "grad_norm": 10.62014102935791, "learning_rate": 9.19987646695491e-07, "loss": 0.2812, "step": 6482 }, { "epoch": 4.0049413218035825, "grad_norm": 4.441190242767334, "learning_rate": 9.199629400864731e-07, "loss": 0.0472, "step": 6484 }, { "epoch": 4.006176652254478, "grad_norm": 0.376647412776947, "learning_rate": 9.199382334774552e-07, "loss": 0.033, "step": 6486 }, { "epoch": 4.007411982705373, "grad_norm": 5.94053316116333, "learning_rate": 9.199135268684373e-07, "loss": 0.1544, "step": 6488 }, { "epoch": 4.00864731315627, "grad_norm": 3.0041027069091797, "learning_rate": 9.198888202594194e-07, "loss": 0.0504, "step": 6490 }, { "epoch": 4.009882643607165, "grad_norm": 4.972756862640381, "learning_rate": 9.198641136504014e-07, "loss": 0.0576, "step": 6492 }, { "epoch": 4.0111179740580605, "grad_norm": 10.469466209411621, "learning_rate": 9.198394070413836e-07, "loss": 0.1345, "step": 6494 }, { "epoch": 4.012353304508956, "grad_norm": 3.4573564529418945, "learning_rate": 9.198147004323657e-07, "loss": 0.0627, "step": 6496 }, { "epoch": 4.013588634959852, "grad_norm": 3.1969504356384277, "learning_rate": 9.197899938233477e-07, "loss": 0.1368, "step": 6498 }, { "epoch": 4.014823965410748, "grad_norm": 7.360875606536865, "learning_rate": 9.197652872143298e-07, "loss": 0.0994, "step": 6500 }, { "epoch": 4.014823965410748, "eval_cer": 0.05358997438205359, "eval_loss": 0.23711024224758148, "eval_runtime": 64.4513, "eval_samples_per_second": 12.738, "eval_steps_per_second": 1.598, "step": 6500 }, { "epoch": 4.016059295861643, "grad_norm": 5.211010456085205, "learning_rate": 9.197405806053119e-07, "loss": 0.0795, "step": 6502 }, { "epoch": 4.017294626312538, "grad_norm": 4.138883113861084, "learning_rate": 9.19715873996294e-07, "loss": 0.0851, "step": 6504 }, { "epoch": 4.018529956763434, "grad_norm": 9.323567390441895, "learning_rate": 9.196911673872761e-07, "loss": 0.1038, "step": 6506 }, { "epoch": 4.01976528721433, "grad_norm": 5.299613952636719, "learning_rate": 9.196664607782582e-07, "loss": 0.0741, "step": 6508 }, { "epoch": 4.0210006176652255, "grad_norm": 3.3084733486175537, "learning_rate": 9.196417541692403e-07, "loss": 0.0647, "step": 6510 }, { "epoch": 4.022235948116121, "grad_norm": 6.684720516204834, "learning_rate": 9.196170475602223e-07, "loss": 0.0869, "step": 6512 }, { "epoch": 4.023471278567016, "grad_norm": 4.802043437957764, "learning_rate": 9.195923409512045e-07, "loss": 0.1542, "step": 6514 }, { "epoch": 4.024706609017913, "grad_norm": 8.781576156616211, "learning_rate": 9.195676343421866e-07, "loss": 0.1493, "step": 6516 }, { "epoch": 4.025941939468808, "grad_norm": 14.695884704589844, "learning_rate": 9.195429277331685e-07, "loss": 0.392, "step": 6518 }, { "epoch": 4.0271772699197035, "grad_norm": 2.4804606437683105, "learning_rate": 9.195182211241506e-07, "loss": 0.0222, "step": 6520 }, { "epoch": 4.028412600370599, "grad_norm": 8.205392837524414, "learning_rate": 9.194935145151327e-07, "loss": 0.1393, "step": 6522 }, { "epoch": 4.029647930821495, "grad_norm": 4.120705604553223, "learning_rate": 9.194688079061148e-07, "loss": 0.0359, "step": 6524 }, { "epoch": 4.030883261272391, "grad_norm": 4.484269618988037, "learning_rate": 9.194441012970969e-07, "loss": 0.2916, "step": 6526 }, { "epoch": 4.032118591723286, "grad_norm": 2.2557451725006104, "learning_rate": 9.19419394688079e-07, "loss": 0.0356, "step": 6528 }, { "epoch": 4.033353922174181, "grad_norm": 6.17080545425415, "learning_rate": 9.19394688079061e-07, "loss": 0.1002, "step": 6530 }, { "epoch": 4.034589252625077, "grad_norm": 7.682319164276123, "learning_rate": 9.193699814700431e-07, "loss": 0.1549, "step": 6532 }, { "epoch": 4.035824583075973, "grad_norm": 2.96439266204834, "learning_rate": 9.193452748610253e-07, "loss": 0.0334, "step": 6534 }, { "epoch": 4.0370599135268685, "grad_norm": 7.402942657470703, "learning_rate": 9.193205682520073e-07, "loss": 0.0724, "step": 6536 }, { "epoch": 4.038295243977764, "grad_norm": 3.2320096492767334, "learning_rate": 9.192958616429894e-07, "loss": 0.1026, "step": 6538 }, { "epoch": 4.039530574428659, "grad_norm": 9.756072044372559, "learning_rate": 9.192711550339715e-07, "loss": 0.1589, "step": 6540 }, { "epoch": 4.040765904879556, "grad_norm": 8.088984489440918, "learning_rate": 9.192464484249536e-07, "loss": 0.1025, "step": 6542 }, { "epoch": 4.042001235330451, "grad_norm": 9.076614379882812, "learning_rate": 9.192217418159357e-07, "loss": 0.1269, "step": 6544 }, { "epoch": 4.0432365657813465, "grad_norm": 5.714547634124756, "learning_rate": 9.191970352069178e-07, "loss": 0.0746, "step": 6546 }, { "epoch": 4.044471896232242, "grad_norm": 2.1310229301452637, "learning_rate": 9.191723285978999e-07, "loss": 0.0331, "step": 6548 }, { "epoch": 4.045707226683137, "grad_norm": 3.276388645172119, "learning_rate": 9.191476219888819e-07, "loss": 0.0304, "step": 6550 }, { "epoch": 4.046942557134034, "grad_norm": 4.948581695556641, "learning_rate": 9.191229153798641e-07, "loss": 0.0524, "step": 6552 }, { "epoch": 4.048177887584929, "grad_norm": 2.1555869579315186, "learning_rate": 9.190982087708462e-07, "loss": 0.0827, "step": 6554 }, { "epoch": 4.049413218035824, "grad_norm": 4.27159309387207, "learning_rate": 9.190735021618282e-07, "loss": 0.0552, "step": 6556 }, { "epoch": 4.05064854848672, "grad_norm": 3.1461212635040283, "learning_rate": 9.190487955528103e-07, "loss": 0.13, "step": 6558 }, { "epoch": 4.051883878937616, "grad_norm": 13.252410888671875, "learning_rate": 9.190240889437924e-07, "loss": 0.2608, "step": 6560 }, { "epoch": 4.053119209388512, "grad_norm": 2.7096340656280518, "learning_rate": 9.189993823347745e-07, "loss": 0.1074, "step": 6562 }, { "epoch": 4.054354539839407, "grad_norm": 8.072587966918945, "learning_rate": 9.189746757257566e-07, "loss": 0.1458, "step": 6564 }, { "epoch": 4.055589870290302, "grad_norm": 5.536533832550049, "learning_rate": 9.189499691167387e-07, "loss": 0.0992, "step": 6566 }, { "epoch": 4.056825200741199, "grad_norm": 3.3487255573272705, "learning_rate": 9.189252625077207e-07, "loss": 0.0355, "step": 6568 }, { "epoch": 4.058060531192094, "grad_norm": 2.4754230976104736, "learning_rate": 9.189005558987028e-07, "loss": 0.0951, "step": 6570 }, { "epoch": 4.0592958616429895, "grad_norm": 5.462555408477783, "learning_rate": 9.18875849289685e-07, "loss": 0.1565, "step": 6572 }, { "epoch": 4.060531192093885, "grad_norm": 1.2830778360366821, "learning_rate": 9.18851142680667e-07, "loss": 0.0464, "step": 6574 }, { "epoch": 4.06176652254478, "grad_norm": 4.159124374389648, "learning_rate": 9.188264360716491e-07, "loss": 0.1026, "step": 6576 }, { "epoch": 4.063001852995677, "grad_norm": 3.752761125564575, "learning_rate": 9.188017294626312e-07, "loss": 0.0855, "step": 6578 }, { "epoch": 4.064237183446572, "grad_norm": 8.878849029541016, "learning_rate": 9.187770228536132e-07, "loss": 0.191, "step": 6580 }, { "epoch": 4.065472513897467, "grad_norm": 3.549513339996338, "learning_rate": 9.187523162445954e-07, "loss": 0.0439, "step": 6582 }, { "epoch": 4.066707844348363, "grad_norm": 6.513664722442627, "learning_rate": 9.187276096355775e-07, "loss": 0.2385, "step": 6584 }, { "epoch": 4.067943174799259, "grad_norm": 5.990884780883789, "learning_rate": 9.187029030265596e-07, "loss": 0.0673, "step": 6586 }, { "epoch": 4.069178505250155, "grad_norm": 0.4739679992198944, "learning_rate": 9.186781964175416e-07, "loss": 0.0092, "step": 6588 }, { "epoch": 4.07041383570105, "grad_norm": 2.792599678039551, "learning_rate": 9.186534898085238e-07, "loss": 0.0535, "step": 6590 }, { "epoch": 4.071649166151945, "grad_norm": 4.451262474060059, "learning_rate": 9.186287831995059e-07, "loss": 0.096, "step": 6592 }, { "epoch": 4.072884496602842, "grad_norm": 8.085258483886719, "learning_rate": 9.186040765904879e-07, "loss": 0.1896, "step": 6594 }, { "epoch": 4.074119827053737, "grad_norm": 7.529166221618652, "learning_rate": 9.1857936998147e-07, "loss": 0.1416, "step": 6596 }, { "epoch": 4.0753551575046325, "grad_norm": 3.192466974258423, "learning_rate": 9.185546633724521e-07, "loss": 0.0709, "step": 6598 }, { "epoch": 4.076590487955528, "grad_norm": 2.7898623943328857, "learning_rate": 9.185299567634342e-07, "loss": 0.0999, "step": 6600 }, { "epoch": 4.077825818406423, "grad_norm": 4.931257247924805, "learning_rate": 9.185052501544163e-07, "loss": 0.042, "step": 6602 }, { "epoch": 4.07906114885732, "grad_norm": 8.155835151672363, "learning_rate": 9.184805435453984e-07, "loss": 0.0823, "step": 6604 }, { "epoch": 4.080296479308215, "grad_norm": 1.2691330909729004, "learning_rate": 9.184558369363804e-07, "loss": 0.0285, "step": 6606 }, { "epoch": 4.0815318097591105, "grad_norm": 6.12661600112915, "learning_rate": 9.184311303273625e-07, "loss": 0.1116, "step": 6608 }, { "epoch": 4.082767140210006, "grad_norm": 10.978297233581543, "learning_rate": 9.184064237183447e-07, "loss": 0.2389, "step": 6610 }, { "epoch": 4.084002470660902, "grad_norm": 5.821916580200195, "learning_rate": 9.183817171093267e-07, "loss": 0.0915, "step": 6612 }, { "epoch": 4.085237801111798, "grad_norm": 5.025089740753174, "learning_rate": 9.183570105003088e-07, "loss": 0.0572, "step": 6614 }, { "epoch": 4.086473131562693, "grad_norm": 4.669924736022949, "learning_rate": 9.183323038912909e-07, "loss": 0.1514, "step": 6616 }, { "epoch": 4.087708462013588, "grad_norm": 11.583474159240723, "learning_rate": 9.183075972822729e-07, "loss": 0.1133, "step": 6618 }, { "epoch": 4.088943792464484, "grad_norm": 6.195975303649902, "learning_rate": 9.182828906732551e-07, "loss": 0.1379, "step": 6620 }, { "epoch": 4.09017912291538, "grad_norm": 9.91965103149414, "learning_rate": 9.182581840642372e-07, "loss": 0.2324, "step": 6622 }, { "epoch": 4.0914144533662755, "grad_norm": 2.7626307010650635, "learning_rate": 9.182334774552192e-07, "loss": 0.1428, "step": 6624 }, { "epoch": 4.092649783817171, "grad_norm": 4.5802903175354, "learning_rate": 9.182087708462013e-07, "loss": 0.0827, "step": 6626 }, { "epoch": 4.093885114268066, "grad_norm": 4.735642433166504, "learning_rate": 9.181840642371835e-07, "loss": 0.0649, "step": 6628 }, { "epoch": 4.095120444718963, "grad_norm": 6.165685653686523, "learning_rate": 9.181593576281656e-07, "loss": 0.2189, "step": 6630 }, { "epoch": 4.096355775169858, "grad_norm": 6.146791934967041, "learning_rate": 9.181346510191476e-07, "loss": 0.0921, "step": 6632 }, { "epoch": 4.0975911056207535, "grad_norm": 6.731791019439697, "learning_rate": 9.181099444101297e-07, "loss": 0.0555, "step": 6634 }, { "epoch": 4.098826436071649, "grad_norm": 4.253931522369385, "learning_rate": 9.180852378011118e-07, "loss": 0.0974, "step": 6636 }, { "epoch": 4.100061766522545, "grad_norm": 7.769657611846924, "learning_rate": 9.180605311920939e-07, "loss": 0.1237, "step": 6638 }, { "epoch": 4.101297096973441, "grad_norm": 1.9722294807434082, "learning_rate": 9.18035824583076e-07, "loss": 0.0323, "step": 6640 }, { "epoch": 4.102532427424336, "grad_norm": 5.47061824798584, "learning_rate": 9.180111179740581e-07, "loss": 0.2419, "step": 6642 }, { "epoch": 4.103767757875231, "grad_norm": 4.838558197021484, "learning_rate": 9.1798641136504e-07, "loss": 0.0522, "step": 6644 }, { "epoch": 4.105003088326127, "grad_norm": 9.555896759033203, "learning_rate": 9.179617047560222e-07, "loss": 0.1526, "step": 6646 }, { "epoch": 4.106238418777023, "grad_norm": 2.1596367359161377, "learning_rate": 9.179369981470044e-07, "loss": 0.0414, "step": 6648 }, { "epoch": 4.1074737492279185, "grad_norm": 7.719715118408203, "learning_rate": 9.179122915379864e-07, "loss": 0.1333, "step": 6650 }, { "epoch": 4.108709079678814, "grad_norm": 4.407188892364502, "learning_rate": 9.178875849289684e-07, "loss": 0.1186, "step": 6652 }, { "epoch": 4.109944410129709, "grad_norm": 6.342453479766846, "learning_rate": 9.178628783199505e-07, "loss": 0.0938, "step": 6654 }, { "epoch": 4.111179740580606, "grad_norm": 3.595310688018799, "learning_rate": 9.178381717109325e-07, "loss": 0.0362, "step": 6656 }, { "epoch": 4.112415071031501, "grad_norm": 5.715254306793213, "learning_rate": 9.178134651019147e-07, "loss": 0.0738, "step": 6658 }, { "epoch": 4.1136504014823965, "grad_norm": 3.401876211166382, "learning_rate": 9.177887584928968e-07, "loss": 0.1056, "step": 6660 }, { "epoch": 4.114885731933292, "grad_norm": 3.1236214637756348, "learning_rate": 9.177640518838788e-07, "loss": 0.0892, "step": 6662 }, { "epoch": 4.116121062384188, "grad_norm": 3.369828224182129, "learning_rate": 9.177393452748609e-07, "loss": 0.1157, "step": 6664 }, { "epoch": 4.117356392835084, "grad_norm": 6.254116535186768, "learning_rate": 9.17714638665843e-07, "loss": 0.0793, "step": 6666 }, { "epoch": 4.118591723285979, "grad_norm": 10.379107475280762, "learning_rate": 9.176899320568252e-07, "loss": 0.1744, "step": 6668 }, { "epoch": 4.119827053736874, "grad_norm": 4.726716041564941, "learning_rate": 9.176652254478072e-07, "loss": 0.066, "step": 6670 }, { "epoch": 4.12106238418777, "grad_norm": 3.2081398963928223, "learning_rate": 9.176405188387893e-07, "loss": 0.0605, "step": 6672 }, { "epoch": 4.122297714638666, "grad_norm": 6.4414520263671875, "learning_rate": 9.176158122297714e-07, "loss": 0.1022, "step": 6674 }, { "epoch": 4.1235330450895615, "grad_norm": 5.3698344230651855, "learning_rate": 9.175911056207535e-07, "loss": 0.0844, "step": 6676 }, { "epoch": 4.124768375540457, "grad_norm": 5.943309783935547, "learning_rate": 9.175663990117356e-07, "loss": 0.0869, "step": 6678 }, { "epoch": 4.126003705991352, "grad_norm": 4.858236312866211, "learning_rate": 9.175416924027177e-07, "loss": 0.0959, "step": 6680 }, { "epoch": 4.127239036442249, "grad_norm": 6.560520172119141, "learning_rate": 9.175169857936997e-07, "loss": 0.0963, "step": 6682 }, { "epoch": 4.128474366893144, "grad_norm": 8.50085163116455, "learning_rate": 9.174922791846818e-07, "loss": 0.088, "step": 6684 }, { "epoch": 4.1297096973440395, "grad_norm": 17.179927825927734, "learning_rate": 9.17467572575664e-07, "loss": 0.2875, "step": 6686 }, { "epoch": 4.130945027794935, "grad_norm": 4.994451522827148, "learning_rate": 9.17442865966646e-07, "loss": 0.1168, "step": 6688 }, { "epoch": 4.132180358245831, "grad_norm": 2.0937118530273438, "learning_rate": 9.174181593576281e-07, "loss": 0.3072, "step": 6690 }, { "epoch": 4.133415688696727, "grad_norm": 6.9706010818481445, "learning_rate": 9.173934527486102e-07, "loss": 0.1938, "step": 6692 }, { "epoch": 4.134651019147622, "grad_norm": 3.5316379070281982, "learning_rate": 9.173687461395922e-07, "loss": 0.1027, "step": 6694 }, { "epoch": 4.135886349598517, "grad_norm": 2.2965259552001953, "learning_rate": 9.173440395305744e-07, "loss": 0.0758, "step": 6696 }, { "epoch": 4.137121680049413, "grad_norm": 4.9543352127075195, "learning_rate": 9.173193329215565e-07, "loss": 0.1108, "step": 6698 }, { "epoch": 4.138357010500309, "grad_norm": 8.037967681884766, "learning_rate": 9.172946263125385e-07, "loss": 0.1803, "step": 6700 }, { "epoch": 4.1395923409512045, "grad_norm": 5.03834342956543, "learning_rate": 9.172699197035206e-07, "loss": 0.0759, "step": 6702 }, { "epoch": 4.1408276714021, "grad_norm": 9.338523864746094, "learning_rate": 9.172452130945027e-07, "loss": 0.1502, "step": 6704 }, { "epoch": 4.142063001852995, "grad_norm": 4.835606098175049, "learning_rate": 9.172205064854849e-07, "loss": 0.1108, "step": 6706 }, { "epoch": 4.143298332303892, "grad_norm": 12.465996742248535, "learning_rate": 9.171957998764669e-07, "loss": 0.2295, "step": 6708 }, { "epoch": 4.144533662754787, "grad_norm": 7.326292514801025, "learning_rate": 9.17171093267449e-07, "loss": 0.1274, "step": 6710 }, { "epoch": 4.1457689932056825, "grad_norm": 5.435612678527832, "learning_rate": 9.171463866584311e-07, "loss": 0.0939, "step": 6712 }, { "epoch": 4.147004323656578, "grad_norm": 2.8460636138916016, "learning_rate": 9.171216800494131e-07, "loss": 0.1716, "step": 6714 }, { "epoch": 4.148239654107474, "grad_norm": 5.8933634757995605, "learning_rate": 9.170969734403953e-07, "loss": 0.1416, "step": 6716 }, { "epoch": 4.14947498455837, "grad_norm": 3.230404853820801, "learning_rate": 9.170722668313774e-07, "loss": 0.0421, "step": 6718 }, { "epoch": 4.150710315009265, "grad_norm": 6.771663665771484, "learning_rate": 9.170475602223594e-07, "loss": 0.1648, "step": 6720 }, { "epoch": 4.15194564546016, "grad_norm": 2.732349395751953, "learning_rate": 9.170228536133415e-07, "loss": 0.0239, "step": 6722 }, { "epoch": 4.153180975911056, "grad_norm": 8.953352928161621, "learning_rate": 9.169981470043237e-07, "loss": 0.1537, "step": 6724 }, { "epoch": 4.154416306361952, "grad_norm": 7.034547328948975, "learning_rate": 9.169734403953057e-07, "loss": 0.0745, "step": 6726 }, { "epoch": 4.1556516368128475, "grad_norm": 4.3527398109436035, "learning_rate": 9.169487337862878e-07, "loss": 0.0563, "step": 6728 }, { "epoch": 4.156886967263743, "grad_norm": 4.481092929840088, "learning_rate": 9.169240271772699e-07, "loss": 0.0483, "step": 6730 }, { "epoch": 4.158122297714638, "grad_norm": 3.32924747467041, "learning_rate": 9.168993205682519e-07, "loss": 0.0641, "step": 6732 }, { "epoch": 4.159357628165535, "grad_norm": 4.08587646484375, "learning_rate": 9.168746139592341e-07, "loss": 0.1754, "step": 6734 }, { "epoch": 4.16059295861643, "grad_norm": 3.6263885498046875, "learning_rate": 9.168499073502162e-07, "loss": 0.0701, "step": 6736 }, { "epoch": 4.1618282890673255, "grad_norm": 7.246702194213867, "learning_rate": 9.168252007411982e-07, "loss": 0.1356, "step": 6738 }, { "epoch": 4.163063619518221, "grad_norm": 4.334619522094727, "learning_rate": 9.168004941321803e-07, "loss": 0.1382, "step": 6740 }, { "epoch": 4.164298949969116, "grad_norm": 6.990710735321045, "learning_rate": 9.167757875231624e-07, "loss": 0.1554, "step": 6742 }, { "epoch": 4.165534280420013, "grad_norm": 4.6081953048706055, "learning_rate": 9.167510809141445e-07, "loss": 0.143, "step": 6744 }, { "epoch": 4.166769610870908, "grad_norm": 3.702495574951172, "learning_rate": 9.167263743051266e-07, "loss": 0.1323, "step": 6746 }, { "epoch": 4.168004941321803, "grad_norm": 6.420180797576904, "learning_rate": 9.167016676961087e-07, "loss": 0.071, "step": 6748 }, { "epoch": 4.169240271772699, "grad_norm": 3.7162039279937744, "learning_rate": 9.166769610870908e-07, "loss": 0.0808, "step": 6750 }, { "epoch": 4.170475602223595, "grad_norm": 16.583925247192383, "learning_rate": 9.166522544780728e-07, "loss": 0.245, "step": 6752 }, { "epoch": 4.171710932674491, "grad_norm": 7.245690822601318, "learning_rate": 9.16627547869055e-07, "loss": 0.1163, "step": 6754 }, { "epoch": 4.172946263125386, "grad_norm": 21.14237403869629, "learning_rate": 9.166028412600371e-07, "loss": 0.3657, "step": 6756 }, { "epoch": 4.174181593576281, "grad_norm": 7.423818588256836, "learning_rate": 9.165781346510191e-07, "loss": 0.1364, "step": 6758 }, { "epoch": 4.175416924027178, "grad_norm": 10.99629020690918, "learning_rate": 9.165534280420012e-07, "loss": 0.0797, "step": 6760 }, { "epoch": 4.176652254478073, "grad_norm": 5.306580066680908, "learning_rate": 9.165287214329834e-07, "loss": 0.1139, "step": 6762 }, { "epoch": 4.1778875849289685, "grad_norm": 3.1361498832702637, "learning_rate": 9.165040148239654e-07, "loss": 0.0348, "step": 6764 }, { "epoch": 4.179122915379864, "grad_norm": 6.699647426605225, "learning_rate": 9.164793082149475e-07, "loss": 0.0751, "step": 6766 }, { "epoch": 4.180358245830759, "grad_norm": 4.903552055358887, "learning_rate": 9.164546016059296e-07, "loss": 0.0611, "step": 6768 }, { "epoch": 4.181593576281656, "grad_norm": 20.393844604492188, "learning_rate": 9.164298949969116e-07, "loss": 0.625, "step": 6770 }, { "epoch": 4.182828906732551, "grad_norm": 5.3843889236450195, "learning_rate": 9.164051883878938e-07, "loss": 0.0636, "step": 6772 }, { "epoch": 4.1840642371834464, "grad_norm": 5.530162334442139, "learning_rate": 9.163804817788759e-07, "loss": 0.2658, "step": 6774 }, { "epoch": 4.185299567634342, "grad_norm": 5.395704746246338, "learning_rate": 9.163557751698579e-07, "loss": 0.0314, "step": 6776 }, { "epoch": 4.186534898085238, "grad_norm": 1.5605146884918213, "learning_rate": 9.1633106856084e-07, "loss": 0.0133, "step": 6778 }, { "epoch": 4.187770228536134, "grad_norm": 5.0936408042907715, "learning_rate": 9.16306361951822e-07, "loss": 0.1352, "step": 6780 }, { "epoch": 4.189005558987029, "grad_norm": 6.292842388153076, "learning_rate": 9.162816553428042e-07, "loss": 0.0564, "step": 6782 }, { "epoch": 4.190240889437924, "grad_norm": 5.729795932769775, "learning_rate": 9.162569487337863e-07, "loss": 0.0582, "step": 6784 }, { "epoch": 4.191476219888821, "grad_norm": 3.7185535430908203, "learning_rate": 9.162322421247683e-07, "loss": 0.2752, "step": 6786 }, { "epoch": 4.192711550339716, "grad_norm": 3.7866897583007812, "learning_rate": 9.162075355157504e-07, "loss": 0.1241, "step": 6788 }, { "epoch": 4.1939468807906115, "grad_norm": 3.225928783416748, "learning_rate": 9.161828289067324e-07, "loss": 0.0341, "step": 6790 }, { "epoch": 4.195182211241507, "grad_norm": 6.677703380584717, "learning_rate": 9.161581222977146e-07, "loss": 0.1338, "step": 6792 }, { "epoch": 4.196417541692402, "grad_norm": 6.371048450469971, "learning_rate": 9.161334156886967e-07, "loss": 0.1348, "step": 6794 }, { "epoch": 4.197652872143299, "grad_norm": 6.814829349517822, "learning_rate": 9.161087090796787e-07, "loss": 0.0884, "step": 6796 }, { "epoch": 4.198888202594194, "grad_norm": 6.324218273162842, "learning_rate": 9.160840024706608e-07, "loss": 0.1053, "step": 6798 }, { "epoch": 4.2001235330450895, "grad_norm": 8.46120548248291, "learning_rate": 9.160592958616429e-07, "loss": 0.2191, "step": 6800 }, { "epoch": 4.201358863495985, "grad_norm": 6.6311869621276855, "learning_rate": 9.16034589252625e-07, "loss": 0.0784, "step": 6802 }, { "epoch": 4.202594193946881, "grad_norm": 4.653926849365234, "learning_rate": 9.160098826436071e-07, "loss": 0.0392, "step": 6804 }, { "epoch": 4.203829524397777, "grad_norm": 6.542823791503906, "learning_rate": 9.159851760345892e-07, "loss": 0.1115, "step": 6806 }, { "epoch": 4.205064854848672, "grad_norm": 3.499319553375244, "learning_rate": 9.159604694255712e-07, "loss": 0.0328, "step": 6808 }, { "epoch": 4.206300185299567, "grad_norm": 6.024972438812256, "learning_rate": 9.159357628165534e-07, "loss": 0.1721, "step": 6810 }, { "epoch": 4.207535515750463, "grad_norm": 3.4050981998443604, "learning_rate": 9.159110562075355e-07, "loss": 0.0262, "step": 6812 }, { "epoch": 4.208770846201359, "grad_norm": 7.417334079742432, "learning_rate": 9.158863495985175e-07, "loss": 0.1953, "step": 6814 }, { "epoch": 4.2100061766522545, "grad_norm": 7.4818196296691895, "learning_rate": 9.158616429894996e-07, "loss": 0.1936, "step": 6816 }, { "epoch": 4.21124150710315, "grad_norm": 3.541612148284912, "learning_rate": 9.158369363804817e-07, "loss": 0.0867, "step": 6818 }, { "epoch": 4.212476837554045, "grad_norm": 4.989611625671387, "learning_rate": 9.158122297714638e-07, "loss": 0.0387, "step": 6820 }, { "epoch": 4.213712168004942, "grad_norm": 5.888008117675781, "learning_rate": 9.157875231624459e-07, "loss": 0.0797, "step": 6822 }, { "epoch": 4.214947498455837, "grad_norm": 4.660358428955078, "learning_rate": 9.15762816553428e-07, "loss": 0.1895, "step": 6824 }, { "epoch": 4.2161828289067325, "grad_norm": 4.699699878692627, "learning_rate": 9.157381099444101e-07, "loss": 0.0441, "step": 6826 }, { "epoch": 4.217418159357628, "grad_norm": 6.779531955718994, "learning_rate": 9.157134033353921e-07, "loss": 0.0884, "step": 6828 }, { "epoch": 4.218653489808524, "grad_norm": 1.5614784955978394, "learning_rate": 9.156886967263743e-07, "loss": 0.0937, "step": 6830 }, { "epoch": 4.21988882025942, "grad_norm": 3.9747142791748047, "learning_rate": 9.156639901173564e-07, "loss": 0.1308, "step": 6832 }, { "epoch": 4.221124150710315, "grad_norm": 5.152132034301758, "learning_rate": 9.156392835083384e-07, "loss": 0.085, "step": 6834 }, { "epoch": 4.22235948116121, "grad_norm": 5.623650074005127, "learning_rate": 9.156145768993205e-07, "loss": 0.0595, "step": 6836 }, { "epoch": 4.223594811612106, "grad_norm": 3.3845908641815186, "learning_rate": 9.155898702903026e-07, "loss": 0.0749, "step": 6838 }, { "epoch": 4.224830142063002, "grad_norm": 13.711058616638184, "learning_rate": 9.155651636812847e-07, "loss": 0.2402, "step": 6840 }, { "epoch": 4.2260654725138975, "grad_norm": 6.198634624481201, "learning_rate": 9.155404570722668e-07, "loss": 0.0721, "step": 6842 }, { "epoch": 4.227300802964793, "grad_norm": 7.199183464050293, "learning_rate": 9.155157504632489e-07, "loss": 0.1184, "step": 6844 }, { "epoch": 4.228536133415688, "grad_norm": 7.560590744018555, "learning_rate": 9.154910438542309e-07, "loss": 0.0763, "step": 6846 }, { "epoch": 4.229771463866585, "grad_norm": 5.278291702270508, "learning_rate": 9.15466337245213e-07, "loss": 0.148, "step": 6848 }, { "epoch": 4.23100679431748, "grad_norm": 9.02103042602539, "learning_rate": 9.154416306361952e-07, "loss": 0.1152, "step": 6850 }, { "epoch": 4.2322421247683755, "grad_norm": 4.2919416427612305, "learning_rate": 9.154169240271772e-07, "loss": 0.0644, "step": 6852 }, { "epoch": 4.233477455219271, "grad_norm": 7.535450458526611, "learning_rate": 9.153922174181593e-07, "loss": 0.1209, "step": 6854 }, { "epoch": 4.234712785670167, "grad_norm": 6.073698043823242, "learning_rate": 9.153675108091414e-07, "loss": 0.1359, "step": 6856 }, { "epoch": 4.235948116121063, "grad_norm": 5.350350379943848, "learning_rate": 9.153428042001235e-07, "loss": 0.0637, "step": 6858 }, { "epoch": 4.237183446571958, "grad_norm": 5.638591766357422, "learning_rate": 9.153180975911056e-07, "loss": 0.0731, "step": 6860 }, { "epoch": 4.238418777022853, "grad_norm": 6.317381858825684, "learning_rate": 9.152933909820877e-07, "loss": 0.1073, "step": 6862 }, { "epoch": 4.239654107473749, "grad_norm": 5.388689994812012, "learning_rate": 9.152686843730697e-07, "loss": 0.0624, "step": 6864 }, { "epoch": 4.240889437924645, "grad_norm": 3.5920658111572266, "learning_rate": 9.152439777640518e-07, "loss": 0.025, "step": 6866 }, { "epoch": 4.2421247683755405, "grad_norm": 5.872882843017578, "learning_rate": 9.15219271155034e-07, "loss": 0.0548, "step": 6868 }, { "epoch": 4.243360098826436, "grad_norm": 1.4151225090026855, "learning_rate": 9.151945645460161e-07, "loss": 0.0391, "step": 6870 }, { "epoch": 4.244595429277331, "grad_norm": 9.522316932678223, "learning_rate": 9.151698579369981e-07, "loss": 0.0915, "step": 6872 }, { "epoch": 4.245830759728228, "grad_norm": 4.516439914703369, "learning_rate": 9.151451513279802e-07, "loss": 0.0633, "step": 6874 }, { "epoch": 4.247066090179123, "grad_norm": 2.798992156982422, "learning_rate": 9.151204447189623e-07, "loss": 0.0229, "step": 6876 }, { "epoch": 4.2483014206300185, "grad_norm": 3.721020460128784, "learning_rate": 9.150957381099444e-07, "loss": 0.0511, "step": 6878 }, { "epoch": 4.249536751080914, "grad_norm": 1.5441596508026123, "learning_rate": 9.150710315009265e-07, "loss": 0.0898, "step": 6880 }, { "epoch": 4.25077208153181, "grad_norm": 8.613372802734375, "learning_rate": 9.150463248919086e-07, "loss": 0.1405, "step": 6882 }, { "epoch": 4.252007411982706, "grad_norm": 15.092780113220215, "learning_rate": 9.150216182828906e-07, "loss": 0.0627, "step": 6884 }, { "epoch": 4.253242742433601, "grad_norm": 8.058306694030762, "learning_rate": 9.149969116738727e-07, "loss": 0.143, "step": 6886 }, { "epoch": 4.254478072884496, "grad_norm": 2.639694929122925, "learning_rate": 9.149722050648549e-07, "loss": 0.0388, "step": 6888 }, { "epoch": 4.255713403335392, "grad_norm": 4.05617618560791, "learning_rate": 9.149474984558369e-07, "loss": 0.0528, "step": 6890 }, { "epoch": 4.256948733786288, "grad_norm": 10.760944366455078, "learning_rate": 9.14922791846819e-07, "loss": 0.3524, "step": 6892 }, { "epoch": 4.2581840642371835, "grad_norm": 7.797626495361328, "learning_rate": 9.148980852378011e-07, "loss": 0.2006, "step": 6894 }, { "epoch": 4.259419394688079, "grad_norm": 3.1414196491241455, "learning_rate": 9.148733786287831e-07, "loss": 0.0709, "step": 6896 }, { "epoch": 4.260654725138974, "grad_norm": 6.917920112609863, "learning_rate": 9.148486720197653e-07, "loss": 0.1696, "step": 6898 }, { "epoch": 4.261890055589871, "grad_norm": 7.068999290466309, "learning_rate": 9.148239654107474e-07, "loss": 0.0721, "step": 6900 }, { "epoch": 4.263125386040766, "grad_norm": 5.507189750671387, "learning_rate": 9.147992588017294e-07, "loss": 0.0621, "step": 6902 }, { "epoch": 4.2643607164916615, "grad_norm": 6.07601261138916, "learning_rate": 9.147745521927115e-07, "loss": 0.071, "step": 6904 }, { "epoch": 4.265596046942557, "grad_norm": 5.666412353515625, "learning_rate": 9.147498455836937e-07, "loss": 0.0418, "step": 6906 }, { "epoch": 4.266831377393453, "grad_norm": 18.160823822021484, "learning_rate": 9.147251389746758e-07, "loss": 0.2642, "step": 6908 }, { "epoch": 4.268066707844349, "grad_norm": 3.824949264526367, "learning_rate": 9.147004323656578e-07, "loss": 0.4732, "step": 6910 }, { "epoch": 4.269302038295244, "grad_norm": 3.8784947395324707, "learning_rate": 9.146757257566399e-07, "loss": 0.1194, "step": 6912 }, { "epoch": 4.270537368746139, "grad_norm": 8.84994125366211, "learning_rate": 9.14651019147622e-07, "loss": 0.0789, "step": 6914 }, { "epoch": 4.271772699197035, "grad_norm": 7.2515339851379395, "learning_rate": 9.14626312538604e-07, "loss": 0.2308, "step": 6916 }, { "epoch": 4.273008029647931, "grad_norm": 6.233921051025391, "learning_rate": 9.146016059295862e-07, "loss": 0.0736, "step": 6918 }, { "epoch": 4.2742433600988265, "grad_norm": 7.6823248863220215, "learning_rate": 9.145768993205682e-07, "loss": 0.0784, "step": 6920 }, { "epoch": 4.275478690549722, "grad_norm": 3.8461222648620605, "learning_rate": 9.145521927115502e-07, "loss": 0.1092, "step": 6922 }, { "epoch": 4.276714021000617, "grad_norm": 4.482606887817383, "learning_rate": 9.145274861025323e-07, "loss": 0.0415, "step": 6924 }, { "epoch": 4.277949351451514, "grad_norm": 3.0854098796844482, "learning_rate": 9.145027794935145e-07, "loss": 0.0765, "step": 6926 }, { "epoch": 4.279184681902409, "grad_norm": 8.577544212341309, "learning_rate": 9.144780728844965e-07, "loss": 0.112, "step": 6928 }, { "epoch": 4.2804200123533045, "grad_norm": 2.794713258743286, "learning_rate": 9.144533662754786e-07, "loss": 0.0622, "step": 6930 }, { "epoch": 4.2816553428042, "grad_norm": 4.636708736419678, "learning_rate": 9.144286596664607e-07, "loss": 0.076, "step": 6932 }, { "epoch": 4.282890673255096, "grad_norm": 2.240186929702759, "learning_rate": 9.144039530574427e-07, "loss": 0.0322, "step": 6934 }, { "epoch": 4.284126003705992, "grad_norm": 3.9186410903930664, "learning_rate": 9.143792464484249e-07, "loss": 0.1425, "step": 6936 }, { "epoch": 4.285361334156887, "grad_norm": 6.035135269165039, "learning_rate": 9.14354539839407e-07, "loss": 0.076, "step": 6938 }, { "epoch": 4.286596664607782, "grad_norm": 0.8558205366134644, "learning_rate": 9.14329833230389e-07, "loss": 0.0252, "step": 6940 }, { "epoch": 4.287831995058678, "grad_norm": 5.130290985107422, "learning_rate": 9.143051266213711e-07, "loss": 0.1007, "step": 6942 }, { "epoch": 4.289067325509574, "grad_norm": 3.9684150218963623, "learning_rate": 9.142804200123533e-07, "loss": 0.0954, "step": 6944 }, { "epoch": 4.29030265596047, "grad_norm": 4.45485782623291, "learning_rate": 9.142557134033354e-07, "loss": 0.0842, "step": 6946 }, { "epoch": 4.291537986411365, "grad_norm": 5.196887016296387, "learning_rate": 9.142310067943174e-07, "loss": 0.0538, "step": 6948 }, { "epoch": 4.29277331686226, "grad_norm": 1.9495034217834473, "learning_rate": 9.142063001852995e-07, "loss": 0.1061, "step": 6950 }, { "epoch": 4.294008647313157, "grad_norm": 5.618655681610107, "learning_rate": 9.141815935762816e-07, "loss": 0.1162, "step": 6952 }, { "epoch": 4.295243977764052, "grad_norm": 9.160022735595703, "learning_rate": 9.141568869672637e-07, "loss": 0.132, "step": 6954 }, { "epoch": 4.2964793082149475, "grad_norm": 4.383325576782227, "learning_rate": 9.141321803582458e-07, "loss": 0.1103, "step": 6956 }, { "epoch": 4.297714638665843, "grad_norm": 3.29338002204895, "learning_rate": 9.141074737492279e-07, "loss": 0.1097, "step": 6958 }, { "epoch": 4.298949969116738, "grad_norm": 6.5923752784729, "learning_rate": 9.140827671402099e-07, "loss": 0.0806, "step": 6960 }, { "epoch": 4.300185299567635, "grad_norm": 8.992118835449219, "learning_rate": 9.14058060531192e-07, "loss": 0.2884, "step": 6962 }, { "epoch": 4.30142063001853, "grad_norm": 7.382961750030518, "learning_rate": 9.140333539221742e-07, "loss": 0.1259, "step": 6964 }, { "epoch": 4.3026559604694254, "grad_norm": 5.340368270874023, "learning_rate": 9.140086473131562e-07, "loss": 0.0834, "step": 6966 }, { "epoch": 4.303891290920321, "grad_norm": 3.4125022888183594, "learning_rate": 9.139839407041383e-07, "loss": 0.0426, "step": 6968 }, { "epoch": 4.305126621371217, "grad_norm": 5.728832721710205, "learning_rate": 9.139592340951204e-07, "loss": 0.0679, "step": 6970 }, { "epoch": 4.306361951822113, "grad_norm": 1.915863037109375, "learning_rate": 9.139345274861024e-07, "loss": 0.0354, "step": 6972 }, { "epoch": 4.307597282273008, "grad_norm": 7.880068302154541, "learning_rate": 9.139098208770846e-07, "loss": 0.1097, "step": 6974 }, { "epoch": 4.308832612723903, "grad_norm": 5.703734874725342, "learning_rate": 9.138851142680667e-07, "loss": 0.2556, "step": 6976 }, { "epoch": 4.310067943174799, "grad_norm": 6.503602027893066, "learning_rate": 9.138604076590487e-07, "loss": 0.2868, "step": 6978 }, { "epoch": 4.311303273625695, "grad_norm": 4.985523223876953, "learning_rate": 9.138357010500308e-07, "loss": 0.1155, "step": 6980 }, { "epoch": 4.3125386040765905, "grad_norm": 6.786588668823242, "learning_rate": 9.138109944410129e-07, "loss": 0.1438, "step": 6982 }, { "epoch": 4.313773934527486, "grad_norm": 2.922123908996582, "learning_rate": 9.13786287831995e-07, "loss": 0.0318, "step": 6984 }, { "epoch": 4.315009264978381, "grad_norm": 3.6398775577545166, "learning_rate": 9.137615812229771e-07, "loss": 0.0388, "step": 6986 }, { "epoch": 4.316244595429278, "grad_norm": 2.33793306350708, "learning_rate": 9.137368746139592e-07, "loss": 0.116, "step": 6988 }, { "epoch": 4.317479925880173, "grad_norm": 8.34350872039795, "learning_rate": 9.137121680049413e-07, "loss": 0.1531, "step": 6990 }, { "epoch": 4.3187152563310685, "grad_norm": 6.8687639236450195, "learning_rate": 9.136874613959234e-07, "loss": 0.0863, "step": 6992 }, { "epoch": 4.319950586781964, "grad_norm": 5.0151591300964355, "learning_rate": 9.136627547869055e-07, "loss": 0.0564, "step": 6994 }, { "epoch": 4.32118591723286, "grad_norm": 6.6380133628845215, "learning_rate": 9.136380481778876e-07, "loss": 0.2042, "step": 6996 }, { "epoch": 4.322421247683756, "grad_norm": 8.206398963928223, "learning_rate": 9.136133415688696e-07, "loss": 0.1464, "step": 6998 }, { "epoch": 4.323656578134651, "grad_norm": 4.372036933898926, "learning_rate": 9.135886349598517e-07, "loss": 0.1036, "step": 7000 }, { "epoch": 4.323656578134651, "eval_cer": 0.052343695908052344, "eval_loss": 0.2372334748506546, "eval_runtime": 64.0262, "eval_samples_per_second": 12.823, "eval_steps_per_second": 1.609, "step": 7000 }, { "epoch": 4.324891908585546, "grad_norm": 6.907306671142578, "learning_rate": 9.135639283508339e-07, "loss": 0.312, "step": 7002 }, { "epoch": 4.326127239036442, "grad_norm": 3.749375343322754, "learning_rate": 9.135392217418159e-07, "loss": 0.2994, "step": 7004 }, { "epoch": 4.327362569487338, "grad_norm": 4.756535053253174, "learning_rate": 9.13514515132798e-07, "loss": 0.1292, "step": 7006 }, { "epoch": 4.3285978999382335, "grad_norm": 8.517313003540039, "learning_rate": 9.134898085237801e-07, "loss": 0.1396, "step": 7008 }, { "epoch": 4.329833230389129, "grad_norm": 4.113933086395264, "learning_rate": 9.134651019147621e-07, "loss": 0.0924, "step": 7010 }, { "epoch": 4.331068560840024, "grad_norm": 4.914371490478516, "learning_rate": 9.134403953057443e-07, "loss": 0.066, "step": 7012 }, { "epoch": 4.332303891290921, "grad_norm": 3.8675761222839355, "learning_rate": 9.134156886967264e-07, "loss": 0.1174, "step": 7014 }, { "epoch": 4.333539221741816, "grad_norm": 7.774860858917236, "learning_rate": 9.133909820877084e-07, "loss": 0.0965, "step": 7016 }, { "epoch": 4.3347745521927115, "grad_norm": 8.656599044799805, "learning_rate": 9.133662754786905e-07, "loss": 0.1536, "step": 7018 }, { "epoch": 4.336009882643607, "grad_norm": 8.008512496948242, "learning_rate": 9.133415688696726e-07, "loss": 0.1587, "step": 7020 }, { "epoch": 4.337245213094503, "grad_norm": 11.207775115966797, "learning_rate": 9.133168622606547e-07, "loss": 0.2158, "step": 7022 }, { "epoch": 4.338480543545399, "grad_norm": 2.7751028537750244, "learning_rate": 9.132921556516368e-07, "loss": 0.196, "step": 7024 }, { "epoch": 4.339715873996294, "grad_norm": 7.532310485839844, "learning_rate": 9.132674490426189e-07, "loss": 0.1312, "step": 7026 }, { "epoch": 4.340951204447189, "grad_norm": 9.178390502929688, "learning_rate": 9.13242742433601e-07, "loss": 0.1389, "step": 7028 }, { "epoch": 4.342186534898085, "grad_norm": 5.172399520874023, "learning_rate": 9.13218035824583e-07, "loss": 0.3137, "step": 7030 }, { "epoch": 4.343421865348981, "grad_norm": 9.481047630310059, "learning_rate": 9.131933292155652e-07, "loss": 0.1479, "step": 7032 }, { "epoch": 4.3446571957998765, "grad_norm": 4.019498825073242, "learning_rate": 9.131686226065473e-07, "loss": 0.1644, "step": 7034 }, { "epoch": 4.345892526250772, "grad_norm": 1.86590576171875, "learning_rate": 9.131439159975293e-07, "loss": 0.0553, "step": 7036 }, { "epoch": 4.347127856701667, "grad_norm": 5.0401763916015625, "learning_rate": 9.131192093885114e-07, "loss": 0.1046, "step": 7038 }, { "epoch": 4.348363187152564, "grad_norm": 5.232470512390137, "learning_rate": 9.130945027794936e-07, "loss": 0.3085, "step": 7040 }, { "epoch": 4.349598517603459, "grad_norm": 6.440539836883545, "learning_rate": 9.130697961704756e-07, "loss": 0.2083, "step": 7042 }, { "epoch": 4.3508338480543545, "grad_norm": 5.640471935272217, "learning_rate": 9.130450895614577e-07, "loss": 0.0645, "step": 7044 }, { "epoch": 4.35206917850525, "grad_norm": 6.2620062828063965, "learning_rate": 9.130203829524398e-07, "loss": 0.0555, "step": 7046 }, { "epoch": 4.353304508956146, "grad_norm": 6.823166847229004, "learning_rate": 9.129956763434217e-07, "loss": 0.0651, "step": 7048 }, { "epoch": 4.354539839407042, "grad_norm": 4.57837438583374, "learning_rate": 9.12970969734404e-07, "loss": 0.0954, "step": 7050 }, { "epoch": 4.355775169857937, "grad_norm": 4.566488742828369, "learning_rate": 9.12946263125386e-07, "loss": 0.1296, "step": 7052 }, { "epoch": 4.357010500308832, "grad_norm": 6.802925109863281, "learning_rate": 9.12921556516368e-07, "loss": 0.1623, "step": 7054 }, { "epoch": 4.358245830759728, "grad_norm": 8.346115112304688, "learning_rate": 9.128968499073501e-07, "loss": 0.2181, "step": 7056 }, { "epoch": 4.359481161210624, "grad_norm": 12.841869354248047, "learning_rate": 9.128721432983322e-07, "loss": 0.3602, "step": 7058 }, { "epoch": 4.3607164916615195, "grad_norm": 6.13035249710083, "learning_rate": 9.128474366893143e-07, "loss": 0.1651, "step": 7060 }, { "epoch": 4.361951822112415, "grad_norm": 5.073054790496826, "learning_rate": 9.128227300802964e-07, "loss": 0.1412, "step": 7062 }, { "epoch": 4.36318715256331, "grad_norm": 1.661303162574768, "learning_rate": 9.127980234712785e-07, "loss": 0.0297, "step": 7064 }, { "epoch": 4.364422483014207, "grad_norm": 4.140926837921143, "learning_rate": 9.127733168622606e-07, "loss": 0.1218, "step": 7066 }, { "epoch": 4.365657813465102, "grad_norm": 8.012886047363281, "learning_rate": 9.127486102532426e-07, "loss": 0.1546, "step": 7068 }, { "epoch": 4.3668931439159975, "grad_norm": 3.610746383666992, "learning_rate": 9.127239036442248e-07, "loss": 0.1014, "step": 7070 }, { "epoch": 4.368128474366893, "grad_norm": 8.522265434265137, "learning_rate": 9.126991970352069e-07, "loss": 0.2214, "step": 7072 }, { "epoch": 4.369363804817789, "grad_norm": 4.179976940155029, "learning_rate": 9.126744904261889e-07, "loss": 0.0488, "step": 7074 }, { "epoch": 4.370599135268685, "grad_norm": 4.390453338623047, "learning_rate": 9.12649783817171e-07, "loss": 0.0323, "step": 7076 }, { "epoch": 4.37183446571958, "grad_norm": 2.675853729248047, "learning_rate": 9.126250772081532e-07, "loss": 0.0299, "step": 7078 }, { "epoch": 4.373069796170475, "grad_norm": 5.465578079223633, "learning_rate": 9.126003705991352e-07, "loss": 0.2927, "step": 7080 }, { "epoch": 4.374305126621371, "grad_norm": 7.241730690002441, "learning_rate": 9.125756639901173e-07, "loss": 0.0985, "step": 7082 }, { "epoch": 4.375540457072267, "grad_norm": 7.612433433532715, "learning_rate": 9.125509573810994e-07, "loss": 0.1032, "step": 7084 }, { "epoch": 4.3767757875231625, "grad_norm": 8.474584579467773, "learning_rate": 9.125262507720814e-07, "loss": 0.2236, "step": 7086 }, { "epoch": 4.378011117974058, "grad_norm": 6.660720348358154, "learning_rate": 9.125015441630636e-07, "loss": 0.0592, "step": 7088 }, { "epoch": 4.379246448424953, "grad_norm": 5.455678939819336, "learning_rate": 9.124768375540457e-07, "loss": 0.1873, "step": 7090 }, { "epoch": 4.38048177887585, "grad_norm": 3.7783939838409424, "learning_rate": 9.124521309450277e-07, "loss": 0.1504, "step": 7092 }, { "epoch": 4.381717109326745, "grad_norm": 2.8649463653564453, "learning_rate": 9.124274243360098e-07, "loss": 0.1101, "step": 7094 }, { "epoch": 4.3829524397776405, "grad_norm": 7.799289226531982, "learning_rate": 9.124027177269919e-07, "loss": 0.155, "step": 7096 }, { "epoch": 4.384187770228536, "grad_norm": 2.213665008544922, "learning_rate": 9.12378011117974e-07, "loss": 0.0278, "step": 7098 }, { "epoch": 4.385423100679432, "grad_norm": 4.935547351837158, "learning_rate": 9.123533045089561e-07, "loss": 0.0619, "step": 7100 }, { "epoch": 4.386658431130328, "grad_norm": 7.446197509765625, "learning_rate": 9.123285978999382e-07, "loss": 0.2856, "step": 7102 }, { "epoch": 4.387893761581223, "grad_norm": 8.897978782653809, "learning_rate": 9.123038912909202e-07, "loss": 0.1931, "step": 7104 }, { "epoch": 4.389129092032118, "grad_norm": 6.66575288772583, "learning_rate": 9.122791846819023e-07, "loss": 0.0687, "step": 7106 }, { "epoch": 4.390364422483014, "grad_norm": 5.863260269165039, "learning_rate": 9.122544780728845e-07, "loss": 0.0854, "step": 7108 }, { "epoch": 4.39159975293391, "grad_norm": 4.143052101135254, "learning_rate": 9.122297714638666e-07, "loss": 0.0656, "step": 7110 }, { "epoch": 4.3928350833848056, "grad_norm": 7.530967712402344, "learning_rate": 9.122050648548486e-07, "loss": 0.0953, "step": 7112 }, { "epoch": 4.394070413835701, "grad_norm": 5.324632167816162, "learning_rate": 9.121803582458307e-07, "loss": 0.0669, "step": 7114 }, { "epoch": 4.395305744286596, "grad_norm": 7.917862415313721, "learning_rate": 9.121556516368128e-07, "loss": 0.1326, "step": 7116 }, { "epoch": 4.396541074737493, "grad_norm": 7.483436584472656, "learning_rate": 9.121309450277949e-07, "loss": 0.1557, "step": 7118 }, { "epoch": 4.397776405188388, "grad_norm": 3.1769962310791016, "learning_rate": 9.12106238418777e-07, "loss": 0.0412, "step": 7120 }, { "epoch": 4.3990117356392835, "grad_norm": 11.183279037475586, "learning_rate": 9.120815318097591e-07, "loss": 0.2374, "step": 7122 }, { "epoch": 4.400247066090179, "grad_norm": 6.5735063552856445, "learning_rate": 9.120568252007411e-07, "loss": 0.0712, "step": 7124 }, { "epoch": 4.401482396541075, "grad_norm": 2.1400792598724365, "learning_rate": 9.120321185917233e-07, "loss": 0.1537, "step": 7126 }, { "epoch": 4.402717726991971, "grad_norm": 7.200057029724121, "learning_rate": 9.120074119827054e-07, "loss": 0.1324, "step": 7128 }, { "epoch": 4.403953057442866, "grad_norm": 4.9059529304504395, "learning_rate": 9.119827053736874e-07, "loss": 0.0784, "step": 7130 }, { "epoch": 4.405188387893761, "grad_norm": 2.1410014629364014, "learning_rate": 9.119579987646695e-07, "loss": 0.1315, "step": 7132 }, { "epoch": 4.406423718344657, "grad_norm": 5.952201843261719, "learning_rate": 9.119332921556516e-07, "loss": 0.2644, "step": 7134 }, { "epoch": 4.407659048795553, "grad_norm": 1.2864041328430176, "learning_rate": 9.119085855466337e-07, "loss": 0.069, "step": 7136 }, { "epoch": 4.408894379246449, "grad_norm": 5.97597599029541, "learning_rate": 9.118838789376158e-07, "loss": 0.0957, "step": 7138 }, { "epoch": 4.410129709697344, "grad_norm": 4.555902004241943, "learning_rate": 9.118591723285979e-07, "loss": 0.0642, "step": 7140 }, { "epoch": 4.411365040148239, "grad_norm": 9.987727165222168, "learning_rate": 9.118344657195799e-07, "loss": 0.2364, "step": 7142 }, { "epoch": 4.412600370599136, "grad_norm": 6.5854973793029785, "learning_rate": 9.11809759110562e-07, "loss": 0.1174, "step": 7144 }, { "epoch": 4.413835701050031, "grad_norm": 5.863376140594482, "learning_rate": 9.117850525015442e-07, "loss": 0.0588, "step": 7146 }, { "epoch": 4.4150710315009265, "grad_norm": 3.57562518119812, "learning_rate": 9.117603458925263e-07, "loss": 0.1484, "step": 7148 }, { "epoch": 4.416306361951822, "grad_norm": 8.067484855651855, "learning_rate": 9.117356392835083e-07, "loss": 0.1483, "step": 7150 }, { "epoch": 4.417541692402717, "grad_norm": 3.8324227333068848, "learning_rate": 9.117109326744904e-07, "loss": 0.0425, "step": 7152 }, { "epoch": 4.418777022853614, "grad_norm": 4.348913669586182, "learning_rate": 9.116862260654725e-07, "loss": 0.2531, "step": 7154 }, { "epoch": 4.420012353304509, "grad_norm": 1.8665988445281982, "learning_rate": 9.116615194564546e-07, "loss": 0.0245, "step": 7156 }, { "epoch": 4.4212476837554044, "grad_norm": 3.854729652404785, "learning_rate": 9.116368128474367e-07, "loss": 0.0598, "step": 7158 }, { "epoch": 4.4224830142063, "grad_norm": 6.502483367919922, "learning_rate": 9.116121062384188e-07, "loss": 0.1197, "step": 7160 }, { "epoch": 4.423718344657196, "grad_norm": 5.633001327514648, "learning_rate": 9.115873996294008e-07, "loss": 0.1103, "step": 7162 }, { "epoch": 4.424953675108092, "grad_norm": 3.1286251544952393, "learning_rate": 9.115626930203829e-07, "loss": 0.0781, "step": 7164 }, { "epoch": 4.426189005558987, "grad_norm": 6.744899272918701, "learning_rate": 9.115379864113651e-07, "loss": 0.1038, "step": 7166 }, { "epoch": 4.427424336009882, "grad_norm": 6.277617454528809, "learning_rate": 9.115132798023471e-07, "loss": 0.1262, "step": 7168 }, { "epoch": 4.428659666460778, "grad_norm": 4.146723747253418, "learning_rate": 9.114885731933292e-07, "loss": 0.0496, "step": 7170 }, { "epoch": 4.429894996911674, "grad_norm": 9.028107643127441, "learning_rate": 9.114638665843113e-07, "loss": 0.1742, "step": 7172 }, { "epoch": 4.4311303273625695, "grad_norm": 2.130958318710327, "learning_rate": 9.114391599752933e-07, "loss": 0.0367, "step": 7174 }, { "epoch": 4.432365657813465, "grad_norm": 6.551205158233643, "learning_rate": 9.114144533662755e-07, "loss": 0.245, "step": 7176 }, { "epoch": 4.43360098826436, "grad_norm": 6.178027153015137, "learning_rate": 9.113897467572576e-07, "loss": 0.0654, "step": 7178 }, { "epoch": 4.434836318715257, "grad_norm": 6.732550144195557, "learning_rate": 9.113650401482395e-07, "loss": 0.1107, "step": 7180 }, { "epoch": 4.436071649166152, "grad_norm": 5.408763885498047, "learning_rate": 9.113403335392216e-07, "loss": 0.0758, "step": 7182 }, { "epoch": 4.4373069796170475, "grad_norm": 13.371179580688477, "learning_rate": 9.113156269302039e-07, "loss": 0.3278, "step": 7184 }, { "epoch": 4.438542310067943, "grad_norm": 10.577507972717285, "learning_rate": 9.11290920321186e-07, "loss": 0.3278, "step": 7186 }, { "epoch": 4.439777640518839, "grad_norm": 5.627823352813721, "learning_rate": 9.112662137121679e-07, "loss": 0.0996, "step": 7188 }, { "epoch": 4.441012970969735, "grad_norm": 16.66104507446289, "learning_rate": 9.1124150710315e-07, "loss": 0.1277, "step": 7190 }, { "epoch": 4.44224830142063, "grad_norm": 7.11082124710083, "learning_rate": 9.112168004941321e-07, "loss": 0.1181, "step": 7192 }, { "epoch": 4.443483631871525, "grad_norm": 2.6414546966552734, "learning_rate": 9.111920938851142e-07, "loss": 0.0172, "step": 7194 }, { "epoch": 4.444718962322421, "grad_norm": 2.3965377807617188, "learning_rate": 9.111673872760963e-07, "loss": 0.0584, "step": 7196 }, { "epoch": 4.445954292773317, "grad_norm": 3.921543836593628, "learning_rate": 9.111426806670784e-07, "loss": 0.1188, "step": 7198 }, { "epoch": 4.4471896232242125, "grad_norm": 3.913640022277832, "learning_rate": 9.111179740580604e-07, "loss": 0.0918, "step": 7200 }, { "epoch": 4.448424953675108, "grad_norm": 7.150644779205322, "learning_rate": 9.110932674490425e-07, "loss": 0.0992, "step": 7202 }, { "epoch": 4.449660284126003, "grad_norm": 6.426137924194336, "learning_rate": 9.110685608400247e-07, "loss": 0.0794, "step": 7204 }, { "epoch": 4.4508956145769, "grad_norm": 6.796779632568359, "learning_rate": 9.110438542310067e-07, "loss": 0.2122, "step": 7206 }, { "epoch": 4.452130945027795, "grad_norm": 5.613876819610596, "learning_rate": 9.110191476219888e-07, "loss": 0.0648, "step": 7208 }, { "epoch": 4.4533662754786905, "grad_norm": 2.153139591217041, "learning_rate": 9.109944410129709e-07, "loss": 0.058, "step": 7210 }, { "epoch": 4.454601605929586, "grad_norm": 7.276166915893555, "learning_rate": 9.109697344039529e-07, "loss": 0.1073, "step": 7212 }, { "epoch": 4.455836936380482, "grad_norm": 11.65762996673584, "learning_rate": 9.109450277949351e-07, "loss": 0.2133, "step": 7214 }, { "epoch": 4.457072266831378, "grad_norm": 2.170079231262207, "learning_rate": 9.109203211859172e-07, "loss": 0.0524, "step": 7216 }, { "epoch": 4.458307597282273, "grad_norm": 1.9015003442764282, "learning_rate": 9.108956145768992e-07, "loss": 0.0403, "step": 7218 }, { "epoch": 4.459542927733168, "grad_norm": 8.20728874206543, "learning_rate": 9.108709079678813e-07, "loss": 0.1156, "step": 7220 }, { "epoch": 4.460778258184064, "grad_norm": 2.818824529647827, "learning_rate": 9.108462013588635e-07, "loss": 0.0332, "step": 7222 }, { "epoch": 4.46201358863496, "grad_norm": 4.279005527496338, "learning_rate": 9.108214947498455e-07, "loss": 0.0567, "step": 7224 }, { "epoch": 4.4632489190858555, "grad_norm": 4.91267728805542, "learning_rate": 9.107967881408276e-07, "loss": 0.0327, "step": 7226 }, { "epoch": 4.464484249536751, "grad_norm": 1.5154087543487549, "learning_rate": 9.107720815318097e-07, "loss": 0.0487, "step": 7228 }, { "epoch": 4.465719579987646, "grad_norm": 4.378084659576416, "learning_rate": 9.107473749227918e-07, "loss": 0.0597, "step": 7230 }, { "epoch": 4.466954910438543, "grad_norm": 7.137732028961182, "learning_rate": 9.107226683137739e-07, "loss": 0.1224, "step": 7232 }, { "epoch": 4.468190240889438, "grad_norm": 7.4434123039245605, "learning_rate": 9.10697961704756e-07, "loss": 0.124, "step": 7234 }, { "epoch": 4.4694255713403335, "grad_norm": 8.002681732177734, "learning_rate": 9.106732550957381e-07, "loss": 0.2699, "step": 7236 }, { "epoch": 4.470660901791229, "grad_norm": 1.6922019720077515, "learning_rate": 9.106485484867201e-07, "loss": 0.0607, "step": 7238 }, { "epoch": 4.471896232242125, "grad_norm": 4.821248531341553, "learning_rate": 9.106238418777022e-07, "loss": 0.0981, "step": 7240 }, { "epoch": 4.473131562693021, "grad_norm": 13.132823944091797, "learning_rate": 9.105991352686844e-07, "loss": 0.1073, "step": 7242 }, { "epoch": 4.474366893143916, "grad_norm": 4.802004337310791, "learning_rate": 9.105744286596664e-07, "loss": 0.0707, "step": 7244 }, { "epoch": 4.475602223594811, "grad_norm": 6.735374927520752, "learning_rate": 9.105497220506485e-07, "loss": 0.0888, "step": 7246 }, { "epoch": 4.476837554045707, "grad_norm": 7.105311393737793, "learning_rate": 9.105250154416306e-07, "loss": 0.1007, "step": 7248 }, { "epoch": 4.478072884496603, "grad_norm": 3.9650983810424805, "learning_rate": 9.105003088326126e-07, "loss": 0.13, "step": 7250 }, { "epoch": 4.4793082149474985, "grad_norm": 4.906842231750488, "learning_rate": 9.104756022235948e-07, "loss": 0.2273, "step": 7252 }, { "epoch": 4.480543545398394, "grad_norm": 1.8732367753982544, "learning_rate": 9.104508956145769e-07, "loss": 0.0482, "step": 7254 }, { "epoch": 4.481778875849289, "grad_norm": 4.0947794914245605, "learning_rate": 9.104261890055589e-07, "loss": 0.0438, "step": 7256 }, { "epoch": 4.483014206300186, "grad_norm": 4.069326877593994, "learning_rate": 9.10401482396541e-07, "loss": 0.0653, "step": 7258 }, { "epoch": 4.484249536751081, "grad_norm": 2.4422757625579834, "learning_rate": 9.103767757875232e-07, "loss": 0.0401, "step": 7260 }, { "epoch": 4.4854848672019765, "grad_norm": 2.659245252609253, "learning_rate": 9.103520691785052e-07, "loss": 0.0278, "step": 7262 }, { "epoch": 4.486720197652872, "grad_norm": 6.955539226531982, "learning_rate": 9.103273625694873e-07, "loss": 0.1363, "step": 7264 }, { "epoch": 4.487955528103768, "grad_norm": 6.891416549682617, "learning_rate": 9.103026559604694e-07, "loss": 0.2286, "step": 7266 }, { "epoch": 4.489190858554664, "grad_norm": 8.358527183532715, "learning_rate": 9.102779493514515e-07, "loss": 0.2252, "step": 7268 }, { "epoch": 4.490426189005559, "grad_norm": 10.082792282104492, "learning_rate": 9.102532427424336e-07, "loss": 0.2649, "step": 7270 }, { "epoch": 4.491661519456454, "grad_norm": 6.367825031280518, "learning_rate": 9.102285361334157e-07, "loss": 0.0848, "step": 7272 }, { "epoch": 4.49289684990735, "grad_norm": 3.255016803741455, "learning_rate": 9.102038295243978e-07, "loss": 0.0524, "step": 7274 }, { "epoch": 4.494132180358246, "grad_norm": 9.38541030883789, "learning_rate": 9.101791229153798e-07, "loss": 0.0711, "step": 7276 }, { "epoch": 4.4953675108091415, "grad_norm": 4.675588130950928, "learning_rate": 9.101544163063619e-07, "loss": 0.0561, "step": 7278 }, { "epoch": 4.496602841260037, "grad_norm": 6.72861909866333, "learning_rate": 9.101297096973441e-07, "loss": 0.1491, "step": 7280 }, { "epoch": 4.497838171710932, "grad_norm": 8.132421493530273, "learning_rate": 9.101050030883261e-07, "loss": 0.1589, "step": 7282 }, { "epoch": 4.499073502161829, "grad_norm": 6.356222152709961, "learning_rate": 9.100802964793082e-07, "loss": 0.2009, "step": 7284 }, { "epoch": 4.500308832612724, "grad_norm": 2.6528754234313965, "learning_rate": 9.100555898702903e-07, "loss": 0.1017, "step": 7286 }, { "epoch": 4.5015441630636195, "grad_norm": 5.043853282928467, "learning_rate": 9.100308832612723e-07, "loss": 0.0424, "step": 7288 }, { "epoch": 4.502779493514515, "grad_norm": 4.300603866577148, "learning_rate": 9.100061766522545e-07, "loss": 0.1686, "step": 7290 }, { "epoch": 4.504014823965411, "grad_norm": 6.058945655822754, "learning_rate": 9.099814700432366e-07, "loss": 0.1332, "step": 7292 }, { "epoch": 4.505250154416307, "grad_norm": 5.782216548919678, "learning_rate": 9.099567634342186e-07, "loss": 0.0859, "step": 7294 }, { "epoch": 4.506485484867202, "grad_norm": 1.9811148643493652, "learning_rate": 9.099320568252007e-07, "loss": 0.0486, "step": 7296 }, { "epoch": 4.507720815318097, "grad_norm": 5.416058540344238, "learning_rate": 9.099073502161828e-07, "loss": 0.196, "step": 7298 }, { "epoch": 4.508956145768993, "grad_norm": 8.179959297180176, "learning_rate": 9.098826436071649e-07, "loss": 0.1179, "step": 7300 }, { "epoch": 4.510191476219889, "grad_norm": 3.6890666484832764, "learning_rate": 9.09857936998147e-07, "loss": 0.0827, "step": 7302 }, { "epoch": 4.5114268066707846, "grad_norm": 5.240660190582275, "learning_rate": 9.098332303891291e-07, "loss": 0.0713, "step": 7304 }, { "epoch": 4.51266213712168, "grad_norm": 3.9610466957092285, "learning_rate": 9.098085237801112e-07, "loss": 0.097, "step": 7306 }, { "epoch": 4.513897467572575, "grad_norm": 12.074633598327637, "learning_rate": 9.097838171710932e-07, "loss": 0.3341, "step": 7308 }, { "epoch": 4.515132798023471, "grad_norm": 6.56176233291626, "learning_rate": 9.097591105620754e-07, "loss": 0.1164, "step": 7310 }, { "epoch": 4.516368128474367, "grad_norm": 3.5704152584075928, "learning_rate": 9.097344039530575e-07, "loss": 0.1487, "step": 7312 }, { "epoch": 4.5176034589252625, "grad_norm": 3.267418146133423, "learning_rate": 9.097096973440394e-07, "loss": 0.0988, "step": 7314 }, { "epoch": 4.518838789376158, "grad_norm": 5.098198413848877, "learning_rate": 9.096849907350215e-07, "loss": 0.1364, "step": 7316 }, { "epoch": 4.520074119827054, "grad_norm": 5.3653459548950195, "learning_rate": 9.096602841260038e-07, "loss": 0.099, "step": 7318 }, { "epoch": 4.52130945027795, "grad_norm": 2.7611982822418213, "learning_rate": 9.096355775169857e-07, "loss": 0.032, "step": 7320 }, { "epoch": 4.522544780728845, "grad_norm": 3.919140577316284, "learning_rate": 9.096108709079678e-07, "loss": 0.1298, "step": 7322 }, { "epoch": 4.52378011117974, "grad_norm": 2.369758367538452, "learning_rate": 9.095861642989499e-07, "loss": 0.0458, "step": 7324 }, { "epoch": 4.525015441630636, "grad_norm": 4.244865417480469, "learning_rate": 9.095614576899319e-07, "loss": 0.0477, "step": 7326 }, { "epoch": 4.526250772081532, "grad_norm": 21.059141159057617, "learning_rate": 9.095367510809141e-07, "loss": 0.059, "step": 7328 }, { "epoch": 4.527486102532428, "grad_norm": 4.442272186279297, "learning_rate": 9.095120444718962e-07, "loss": 0.1, "step": 7330 }, { "epoch": 4.528721432983323, "grad_norm": 4.967129707336426, "learning_rate": 9.094873378628782e-07, "loss": 0.0723, "step": 7332 }, { "epoch": 4.529956763434218, "grad_norm": 2.1198832988739014, "learning_rate": 9.094626312538603e-07, "loss": 0.0438, "step": 7334 }, { "epoch": 4.531192093885114, "grad_norm": 7.4472856521606445, "learning_rate": 9.094379246448424e-07, "loss": 0.1362, "step": 7336 }, { "epoch": 4.53242742433601, "grad_norm": 3.9882822036743164, "learning_rate": 9.094132180358245e-07, "loss": 0.101, "step": 7338 }, { "epoch": 4.5336627547869055, "grad_norm": 4.843862056732178, "learning_rate": 9.093885114268066e-07, "loss": 0.0767, "step": 7340 }, { "epoch": 4.534898085237801, "grad_norm": 7.350330829620361, "learning_rate": 9.093638048177887e-07, "loss": 0.3067, "step": 7342 }, { "epoch": 4.536133415688697, "grad_norm": 8.635293006896973, "learning_rate": 9.093390982087707e-07, "loss": 0.1275, "step": 7344 }, { "epoch": 4.537368746139593, "grad_norm": 4.426546573638916, "learning_rate": 9.093143915997528e-07, "loss": 0.0631, "step": 7346 }, { "epoch": 4.538604076590488, "grad_norm": 4.090522766113281, "learning_rate": 9.09289684990735e-07, "loss": 0.0518, "step": 7348 }, { "epoch": 4.5398394070413834, "grad_norm": 5.162106037139893, "learning_rate": 9.092649783817171e-07, "loss": 0.065, "step": 7350 }, { "epoch": 4.541074737492279, "grad_norm": 5.392283916473389, "learning_rate": 9.092402717726991e-07, "loss": 0.0966, "step": 7352 }, { "epoch": 4.542310067943175, "grad_norm": 7.753190517425537, "learning_rate": 9.092155651636812e-07, "loss": 0.1028, "step": 7354 }, { "epoch": 4.543545398394071, "grad_norm": 4.979510307312012, "learning_rate": 9.091908585546634e-07, "loss": 0.0876, "step": 7356 }, { "epoch": 4.544780728844966, "grad_norm": 3.9395508766174316, "learning_rate": 9.091661519456454e-07, "loss": 0.119, "step": 7358 }, { "epoch": 4.546016059295861, "grad_norm": 3.3066649436950684, "learning_rate": 9.091414453366275e-07, "loss": 0.0913, "step": 7360 }, { "epoch": 4.547251389746757, "grad_norm": 10.937034606933594, "learning_rate": 9.091167387276096e-07, "loss": 0.231, "step": 7362 }, { "epoch": 4.548486720197653, "grad_norm": 5.6802592277526855, "learning_rate": 9.090920321185916e-07, "loss": 0.0545, "step": 7364 }, { "epoch": 4.5497220506485485, "grad_norm": 7.012713432312012, "learning_rate": 9.090673255095738e-07, "loss": 0.0733, "step": 7366 }, { "epoch": 4.550957381099444, "grad_norm": 7.179113864898682, "learning_rate": 9.090426189005559e-07, "loss": 0.3573, "step": 7368 }, { "epoch": 4.552192711550339, "grad_norm": 3.698814630508423, "learning_rate": 9.090179122915379e-07, "loss": 0.0336, "step": 7370 }, { "epoch": 4.553428042001236, "grad_norm": 8.49875545501709, "learning_rate": 9.0899320568252e-07, "loss": 0.1523, "step": 7372 }, { "epoch": 4.554663372452131, "grad_norm": 2.5895791053771973, "learning_rate": 9.089684990735021e-07, "loss": 0.0327, "step": 7374 }, { "epoch": 4.5558987029030265, "grad_norm": 8.12928295135498, "learning_rate": 9.089437924644842e-07, "loss": 0.1561, "step": 7376 }, { "epoch": 4.557134033353922, "grad_norm": 7.406088829040527, "learning_rate": 9.089190858554663e-07, "loss": 0.1424, "step": 7378 }, { "epoch": 4.558369363804818, "grad_norm": 5.410096645355225, "learning_rate": 9.088943792464484e-07, "loss": 0.1571, "step": 7380 }, { "epoch": 4.559604694255714, "grad_norm": 7.275083541870117, "learning_rate": 9.088696726374304e-07, "loss": 0.0658, "step": 7382 }, { "epoch": 4.560840024706609, "grad_norm": 4.714039325714111, "learning_rate": 9.088449660284125e-07, "loss": 0.0644, "step": 7384 }, { "epoch": 4.562075355157504, "grad_norm": 4.704112529754639, "learning_rate": 9.088202594193947e-07, "loss": 0.0792, "step": 7386 }, { "epoch": 4.5633106856084, "grad_norm": 3.7411539554595947, "learning_rate": 9.087955528103768e-07, "loss": 0.1512, "step": 7388 }, { "epoch": 4.564546016059296, "grad_norm": 7.1745452880859375, "learning_rate": 9.087708462013588e-07, "loss": 0.104, "step": 7390 }, { "epoch": 4.5657813465101915, "grad_norm": 4.7710981369018555, "learning_rate": 9.087461395923409e-07, "loss": 0.1627, "step": 7392 }, { "epoch": 4.567016676961087, "grad_norm": 2.211831569671631, "learning_rate": 9.087214329833231e-07, "loss": 0.0323, "step": 7394 }, { "epoch": 4.568252007411982, "grad_norm": 4.287656784057617, "learning_rate": 9.086967263743051e-07, "loss": 0.1226, "step": 7396 }, { "epoch": 4.569487337862879, "grad_norm": 4.8359456062316895, "learning_rate": 9.086720197652872e-07, "loss": 0.2625, "step": 7398 }, { "epoch": 4.570722668313774, "grad_norm": 8.280754089355469, "learning_rate": 9.086473131562693e-07, "loss": 0.0814, "step": 7400 }, { "epoch": 4.5719579987646695, "grad_norm": 7.87287712097168, "learning_rate": 9.086226065472513e-07, "loss": 0.1032, "step": 7402 }, { "epoch": 4.573193329215565, "grad_norm": 6.457242012023926, "learning_rate": 9.085978999382335e-07, "loss": 0.0836, "step": 7404 }, { "epoch": 4.574428659666461, "grad_norm": 2.7432312965393066, "learning_rate": 9.085731933292156e-07, "loss": 0.0835, "step": 7406 }, { "epoch": 4.575663990117357, "grad_norm": 12.150992393493652, "learning_rate": 9.085484867201976e-07, "loss": 0.2352, "step": 7408 }, { "epoch": 4.576899320568252, "grad_norm": 5.189281463623047, "learning_rate": 9.085237801111797e-07, "loss": 0.0731, "step": 7410 }, { "epoch": 4.578134651019147, "grad_norm": 6.450319290161133, "learning_rate": 9.084990735021618e-07, "loss": 0.0586, "step": 7412 }, { "epoch": 4.579369981470043, "grad_norm": 5.359395980834961, "learning_rate": 9.084743668931439e-07, "loss": 0.0559, "step": 7414 }, { "epoch": 4.580605311920939, "grad_norm": 7.718494415283203, "learning_rate": 9.08449660284126e-07, "loss": 0.0854, "step": 7416 }, { "epoch": 4.5818406423718345, "grad_norm": 5.401602745056152, "learning_rate": 9.084249536751081e-07, "loss": 0.0424, "step": 7418 }, { "epoch": 4.58307597282273, "grad_norm": 6.1513671875, "learning_rate": 9.084002470660901e-07, "loss": 0.0889, "step": 7420 }, { "epoch": 4.584311303273625, "grad_norm": 4.250930309295654, "learning_rate": 9.083755404570722e-07, "loss": 0.0496, "step": 7422 }, { "epoch": 4.585546633724522, "grad_norm": 7.282406330108643, "learning_rate": 9.083508338480544e-07, "loss": 0.1271, "step": 7424 }, { "epoch": 4.586781964175417, "grad_norm": 8.058886528015137, "learning_rate": 9.083261272390365e-07, "loss": 0.1551, "step": 7426 }, { "epoch": 4.5880172946263125, "grad_norm": 6.002770900726318, "learning_rate": 9.083014206300185e-07, "loss": 0.1636, "step": 7428 }, { "epoch": 4.589252625077208, "grad_norm": 11.8301420211792, "learning_rate": 9.082767140210006e-07, "loss": 0.0939, "step": 7430 }, { "epoch": 4.590487955528104, "grad_norm": 4.672906875610352, "learning_rate": 9.082520074119827e-07, "loss": 0.0629, "step": 7432 }, { "epoch": 4.591723285979, "grad_norm": 2.969277858734131, "learning_rate": 9.082273008029648e-07, "loss": 0.1212, "step": 7434 }, { "epoch": 4.592958616429895, "grad_norm": 16.207244873046875, "learning_rate": 9.082025941939469e-07, "loss": 0.1633, "step": 7436 }, { "epoch": 4.59419394688079, "grad_norm": 6.337627410888672, "learning_rate": 9.08177887584929e-07, "loss": 0.0942, "step": 7438 }, { "epoch": 4.595429277331686, "grad_norm": 3.5349326133728027, "learning_rate": 9.08153180975911e-07, "loss": 0.0629, "step": 7440 }, { "epoch": 4.596664607782582, "grad_norm": 10.506754875183105, "learning_rate": 9.08128474366893e-07, "loss": 0.2854, "step": 7442 }, { "epoch": 4.5978999382334775, "grad_norm": 4.1508402824401855, "learning_rate": 9.081037677578753e-07, "loss": 0.0485, "step": 7444 }, { "epoch": 4.599135268684373, "grad_norm": 10.05119800567627, "learning_rate": 9.080790611488573e-07, "loss": 0.2627, "step": 7446 }, { "epoch": 4.600370599135268, "grad_norm": 6.731666564941406, "learning_rate": 9.080543545398393e-07, "loss": 0.0495, "step": 7448 }, { "epoch": 4.601605929586165, "grad_norm": 5.134609699249268, "learning_rate": 9.080296479308214e-07, "loss": 0.328, "step": 7450 }, { "epoch": 4.60284126003706, "grad_norm": 8.639701843261719, "learning_rate": 9.080049413218035e-07, "loss": 0.1742, "step": 7452 }, { "epoch": 4.6040765904879555, "grad_norm": 5.322540283203125, "learning_rate": 9.079802347127856e-07, "loss": 0.2068, "step": 7454 }, { "epoch": 4.605311920938851, "grad_norm": 8.139877319335938, "learning_rate": 9.079555281037677e-07, "loss": 0.1415, "step": 7456 }, { "epoch": 4.606547251389747, "grad_norm": 8.323644638061523, "learning_rate": 9.079308214947497e-07, "loss": 0.0643, "step": 7458 }, { "epoch": 4.607782581840643, "grad_norm": 2.7286264896392822, "learning_rate": 9.079061148857318e-07, "loss": 0.043, "step": 7460 }, { "epoch": 4.609017912291538, "grad_norm": 3.048794746398926, "learning_rate": 9.07881408276714e-07, "loss": 0.0813, "step": 7462 }, { "epoch": 4.610253242742433, "grad_norm": 4.003198623657227, "learning_rate": 9.07856701667696e-07, "loss": 0.2573, "step": 7464 }, { "epoch": 4.611488573193329, "grad_norm": 3.4025678634643555, "learning_rate": 9.078319950586781e-07, "loss": 0.1789, "step": 7466 }, { "epoch": 4.612723903644225, "grad_norm": 3.285443067550659, "learning_rate": 9.078072884496602e-07, "loss": 0.0663, "step": 7468 }, { "epoch": 4.6139592340951205, "grad_norm": 4.122405529022217, "learning_rate": 9.077825818406423e-07, "loss": 0.145, "step": 7470 }, { "epoch": 4.615194564546016, "grad_norm": 6.4459381103515625, "learning_rate": 9.077578752316244e-07, "loss": 0.12, "step": 7472 }, { "epoch": 4.616429894996911, "grad_norm": 1.3048933744430542, "learning_rate": 9.077331686226065e-07, "loss": 0.0623, "step": 7474 }, { "epoch": 4.617665225447808, "grad_norm": 22.225780487060547, "learning_rate": 9.077084620135886e-07, "loss": 0.2923, "step": 7476 }, { "epoch": 4.618900555898703, "grad_norm": 7.438167095184326, "learning_rate": 9.076837554045706e-07, "loss": 0.2358, "step": 7478 }, { "epoch": 4.6201358863495985, "grad_norm": 6.6582255363464355, "learning_rate": 9.076590487955527e-07, "loss": 0.0884, "step": 7480 }, { "epoch": 4.621371216800494, "grad_norm": 1.6254315376281738, "learning_rate": 9.076343421865349e-07, "loss": 0.0274, "step": 7482 }, { "epoch": 4.62260654725139, "grad_norm": 4.36107873916626, "learning_rate": 9.076096355775169e-07, "loss": 0.0315, "step": 7484 }, { "epoch": 4.623841877702286, "grad_norm": 7.64828634262085, "learning_rate": 9.07584928968499e-07, "loss": 0.1108, "step": 7486 }, { "epoch": 4.625077208153181, "grad_norm": 3.8055458068847656, "learning_rate": 9.075602223594811e-07, "loss": 0.0469, "step": 7488 }, { "epoch": 4.626312538604076, "grad_norm": 7.2914228439331055, "learning_rate": 9.075355157504631e-07, "loss": 0.0633, "step": 7490 }, { "epoch": 4.627547869054972, "grad_norm": 6.999173164367676, "learning_rate": 9.075108091414453e-07, "loss": 0.1236, "step": 7492 }, { "epoch": 4.628783199505868, "grad_norm": 4.295650005340576, "learning_rate": 9.074861025324274e-07, "loss": 0.0292, "step": 7494 }, { "epoch": 4.6300185299567636, "grad_norm": 3.5702731609344482, "learning_rate": 9.074613959234094e-07, "loss": 0.0216, "step": 7496 }, { "epoch": 4.631253860407659, "grad_norm": 3.748274326324463, "learning_rate": 9.074366893143915e-07, "loss": 0.053, "step": 7498 }, { "epoch": 4.632489190858554, "grad_norm": 2.3203744888305664, "learning_rate": 9.074119827053737e-07, "loss": 0.0364, "step": 7500 }, { "epoch": 4.632489190858554, "eval_cer": 0.05158208128505158, "eval_loss": 0.24159987270832062, "eval_runtime": 64.1456, "eval_samples_per_second": 12.799, "eval_steps_per_second": 1.606, "step": 7500 }, { "epoch": 4.63372452130945, "grad_norm": 8.809622764587402, "learning_rate": 9.073872760963557e-07, "loss": 0.2227, "step": 7502 }, { "epoch": 4.634959851760346, "grad_norm": 2.964982748031616, "learning_rate": 9.073625694873378e-07, "loss": 0.074, "step": 7504 }, { "epoch": 4.6361951822112415, "grad_norm": 8.479874610900879, "learning_rate": 9.073378628783199e-07, "loss": 0.0617, "step": 7506 }, { "epoch": 4.637430512662137, "grad_norm": 18.51495361328125, "learning_rate": 9.07313156269302e-07, "loss": 0.0712, "step": 7508 }, { "epoch": 4.638665843113033, "grad_norm": 4.5891242027282715, "learning_rate": 9.072884496602841e-07, "loss": 0.055, "step": 7510 }, { "epoch": 4.639901173563929, "grad_norm": 6.564193248748779, "learning_rate": 9.072637430512662e-07, "loss": 0.0982, "step": 7512 }, { "epoch": 4.641136504014824, "grad_norm": 4.170833587646484, "learning_rate": 9.072390364422483e-07, "loss": 0.0337, "step": 7514 }, { "epoch": 4.642371834465719, "grad_norm": 2.092737913131714, "learning_rate": 9.072143298332303e-07, "loss": 0.03, "step": 7516 }, { "epoch": 4.643607164916615, "grad_norm": 6.539760589599609, "learning_rate": 9.071896232242124e-07, "loss": 0.0588, "step": 7518 }, { "epoch": 4.644842495367511, "grad_norm": 4.626224994659424, "learning_rate": 9.071649166151946e-07, "loss": 0.0834, "step": 7520 }, { "epoch": 4.646077825818407, "grad_norm": 3.969695806503296, "learning_rate": 9.071402100061766e-07, "loss": 0.0746, "step": 7522 }, { "epoch": 4.647313156269302, "grad_norm": 19.17711067199707, "learning_rate": 9.071155033971587e-07, "loss": 0.0898, "step": 7524 }, { "epoch": 4.648548486720197, "grad_norm": 1.3913137912750244, "learning_rate": 9.070907967881408e-07, "loss": 0.0351, "step": 7526 }, { "epoch": 4.649783817171093, "grad_norm": 6.474262237548828, "learning_rate": 9.070660901791228e-07, "loss": 0.1257, "step": 7528 }, { "epoch": 4.651019147621989, "grad_norm": 5.735784530639648, "learning_rate": 9.07041383570105e-07, "loss": 0.1506, "step": 7530 }, { "epoch": 4.6522544780728845, "grad_norm": 7.1956048011779785, "learning_rate": 9.070166769610871e-07, "loss": 0.0929, "step": 7532 }, { "epoch": 4.65348980852378, "grad_norm": 6.968749046325684, "learning_rate": 9.069919703520691e-07, "loss": 0.0869, "step": 7534 }, { "epoch": 4.654725138974676, "grad_norm": 4.9328742027282715, "learning_rate": 9.069672637430512e-07, "loss": 0.0625, "step": 7536 }, { "epoch": 4.655960469425572, "grad_norm": 4.313979148864746, "learning_rate": 9.069425571340334e-07, "loss": 0.0714, "step": 7538 }, { "epoch": 4.657195799876467, "grad_norm": 14.45595645904541, "learning_rate": 9.069178505250154e-07, "loss": 0.2374, "step": 7540 }, { "epoch": 4.6584311303273624, "grad_norm": 4.014899253845215, "learning_rate": 9.068931439159975e-07, "loss": 0.0428, "step": 7542 }, { "epoch": 4.659666460778258, "grad_norm": 4.846994400024414, "learning_rate": 9.068684373069796e-07, "loss": 0.0399, "step": 7544 }, { "epoch": 4.660901791229154, "grad_norm": 8.122374534606934, "learning_rate": 9.068437306979617e-07, "loss": 0.1196, "step": 7546 }, { "epoch": 4.66213712168005, "grad_norm": 6.668344497680664, "learning_rate": 9.068190240889438e-07, "loss": 0.1053, "step": 7548 }, { "epoch": 4.663372452130945, "grad_norm": 3.7458455562591553, "learning_rate": 9.067943174799259e-07, "loss": 0.0564, "step": 7550 }, { "epoch": 4.66460778258184, "grad_norm": 3.8083627223968506, "learning_rate": 9.06769610870908e-07, "loss": 0.0357, "step": 7552 }, { "epoch": 4.665843113032736, "grad_norm": 3.3858399391174316, "learning_rate": 9.0674490426189e-07, "loss": 0.0874, "step": 7554 }, { "epoch": 4.667078443483632, "grad_norm": 2.479045867919922, "learning_rate": 9.067201976528721e-07, "loss": 0.0165, "step": 7556 }, { "epoch": 4.6683137739345275, "grad_norm": 11.206296920776367, "learning_rate": 9.066954910438543e-07, "loss": 0.2782, "step": 7558 }, { "epoch": 4.669549104385423, "grad_norm": 3.161240577697754, "learning_rate": 9.066707844348363e-07, "loss": 0.0683, "step": 7560 }, { "epoch": 4.670784434836318, "grad_norm": 5.283199310302734, "learning_rate": 9.066460778258184e-07, "loss": 0.0866, "step": 7562 }, { "epoch": 4.672019765287215, "grad_norm": 13.662107467651367, "learning_rate": 9.066213712168005e-07, "loss": 0.6991, "step": 7564 }, { "epoch": 4.67325509573811, "grad_norm": 2.955472230911255, "learning_rate": 9.065966646077825e-07, "loss": 0.0525, "step": 7566 }, { "epoch": 4.6744904261890055, "grad_norm": 9.636919975280762, "learning_rate": 9.065719579987647e-07, "loss": 0.1486, "step": 7568 }, { "epoch": 4.675725756639901, "grad_norm": 5.122795104980469, "learning_rate": 9.065472513897468e-07, "loss": 0.1186, "step": 7570 }, { "epoch": 4.676961087090797, "grad_norm": 9.625365257263184, "learning_rate": 9.065225447807288e-07, "loss": 0.1498, "step": 7572 }, { "epoch": 4.678196417541693, "grad_norm": 4.5669474601745605, "learning_rate": 9.064978381717109e-07, "loss": 0.0577, "step": 7574 }, { "epoch": 4.679431747992588, "grad_norm": 3.429224967956543, "learning_rate": 9.064731315626931e-07, "loss": 0.2511, "step": 7576 }, { "epoch": 4.680667078443483, "grad_norm": 4.45626163482666, "learning_rate": 9.06448424953675e-07, "loss": 0.1015, "step": 7578 }, { "epoch": 4.681902408894379, "grad_norm": 2.3474502563476562, "learning_rate": 9.064237183446572e-07, "loss": 0.0518, "step": 7580 }, { "epoch": 4.683137739345275, "grad_norm": 6.491011619567871, "learning_rate": 9.063990117356392e-07, "loss": 0.0822, "step": 7582 }, { "epoch": 4.6843730697961705, "grad_norm": 6.609340190887451, "learning_rate": 9.063743051266212e-07, "loss": 0.1229, "step": 7584 }, { "epoch": 4.685608400247066, "grad_norm": 7.533172607421875, "learning_rate": 9.063495985176034e-07, "loss": 0.0728, "step": 7586 }, { "epoch": 4.686843730697961, "grad_norm": 4.3029046058654785, "learning_rate": 9.063248919085855e-07, "loss": 0.0713, "step": 7588 }, { "epoch": 4.688079061148858, "grad_norm": 1.6809531450271606, "learning_rate": 9.063001852995676e-07, "loss": 0.1374, "step": 7590 }, { "epoch": 4.689314391599753, "grad_norm": 3.7036385536193848, "learning_rate": 9.062754786905496e-07, "loss": 0.012, "step": 7592 }, { "epoch": 4.6905497220506485, "grad_norm": 5.610781192779541, "learning_rate": 9.062507720815317e-07, "loss": 0.0822, "step": 7594 }, { "epoch": 4.691785052501544, "grad_norm": 4.563458442687988, "learning_rate": 9.062260654725139e-07, "loss": 0.058, "step": 7596 }, { "epoch": 4.69302038295244, "grad_norm": 5.165139675140381, "learning_rate": 9.062013588634959e-07, "loss": 0.0593, "step": 7598 }, { "epoch": 4.694255713403336, "grad_norm": 14.13510513305664, "learning_rate": 9.06176652254478e-07, "loss": 0.1948, "step": 7600 }, { "epoch": 4.695491043854231, "grad_norm": 4.580004692077637, "learning_rate": 9.061519456454601e-07, "loss": 0.1156, "step": 7602 }, { "epoch": 4.696726374305126, "grad_norm": 2.818925142288208, "learning_rate": 9.061272390364421e-07, "loss": 0.0306, "step": 7604 }, { "epoch": 4.697961704756022, "grad_norm": 1.5514250993728638, "learning_rate": 9.061025324274243e-07, "loss": 0.035, "step": 7606 }, { "epoch": 4.699197035206918, "grad_norm": 3.2164618968963623, "learning_rate": 9.060778258184064e-07, "loss": 0.0391, "step": 7608 }, { "epoch": 4.7004323656578135, "grad_norm": 3.9473729133605957, "learning_rate": 9.060531192093884e-07, "loss": 0.081, "step": 7610 }, { "epoch": 4.701667696108709, "grad_norm": 2.3716824054718018, "learning_rate": 9.060284126003705e-07, "loss": 0.0262, "step": 7612 }, { "epoch": 4.702903026559604, "grad_norm": 0.8717756867408752, "learning_rate": 9.060037059913526e-07, "loss": 0.0435, "step": 7614 }, { "epoch": 4.704138357010501, "grad_norm": 3.2482664585113525, "learning_rate": 9.059789993823347e-07, "loss": 0.0422, "step": 7616 }, { "epoch": 4.705373687461396, "grad_norm": 3.7096731662750244, "learning_rate": 9.059542927733168e-07, "loss": 0.0838, "step": 7618 }, { "epoch": 4.7066090179122915, "grad_norm": 3.009963035583496, "learning_rate": 9.059295861642989e-07, "loss": 0.0222, "step": 7620 }, { "epoch": 4.707844348363187, "grad_norm": 4.681815147399902, "learning_rate": 9.059048795552809e-07, "loss": 0.0672, "step": 7622 }, { "epoch": 4.709079678814083, "grad_norm": 7.029771327972412, "learning_rate": 9.05880172946263e-07, "loss": 0.1363, "step": 7624 }, { "epoch": 4.710315009264979, "grad_norm": 2.4505209922790527, "learning_rate": 9.058554663372452e-07, "loss": 0.1335, "step": 7626 }, { "epoch": 4.711550339715874, "grad_norm": 8.388994216918945, "learning_rate": 9.058307597282273e-07, "loss": 0.1346, "step": 7628 }, { "epoch": 4.712785670166769, "grad_norm": 4.451980113983154, "learning_rate": 9.058060531192093e-07, "loss": 0.1071, "step": 7630 }, { "epoch": 4.714021000617665, "grad_norm": 7.828093528747559, "learning_rate": 9.057813465101914e-07, "loss": 0.096, "step": 7632 }, { "epoch": 4.715256331068561, "grad_norm": 1.9119654893875122, "learning_rate": 9.057566399011736e-07, "loss": 0.2107, "step": 7634 }, { "epoch": 4.7164916615194565, "grad_norm": 2.4100522994995117, "learning_rate": 9.057319332921556e-07, "loss": 0.0657, "step": 7636 }, { "epoch": 4.717726991970352, "grad_norm": 4.047884464263916, "learning_rate": 9.057072266831377e-07, "loss": 0.362, "step": 7638 }, { "epoch": 4.718962322421247, "grad_norm": 9.272951126098633, "learning_rate": 9.056825200741198e-07, "loss": 0.1153, "step": 7640 }, { "epoch": 4.720197652872144, "grad_norm": 3.014000415802002, "learning_rate": 9.056578134651018e-07, "loss": 0.1437, "step": 7642 }, { "epoch": 4.721432983323039, "grad_norm": 2.447235345840454, "learning_rate": 9.05633106856084e-07, "loss": 0.0355, "step": 7644 }, { "epoch": 4.7226683137739345, "grad_norm": 0.45880037546157837, "learning_rate": 9.056084002470661e-07, "loss": 0.0202, "step": 7646 }, { "epoch": 4.72390364422483, "grad_norm": 7.02223539352417, "learning_rate": 9.055836936380481e-07, "loss": 0.1666, "step": 7648 }, { "epoch": 4.725138974675726, "grad_norm": 7.123632907867432, "learning_rate": 9.055589870290302e-07, "loss": 0.0765, "step": 7650 }, { "epoch": 4.726374305126622, "grad_norm": 4.26580286026001, "learning_rate": 9.055342804200123e-07, "loss": 0.3423, "step": 7652 }, { "epoch": 4.727609635577517, "grad_norm": 3.1265525817871094, "learning_rate": 9.055095738109944e-07, "loss": 0.0163, "step": 7654 }, { "epoch": 4.728844966028412, "grad_norm": 5.991120338439941, "learning_rate": 9.054848672019765e-07, "loss": 0.0679, "step": 7656 }, { "epoch": 4.730080296479308, "grad_norm": 9.435754776000977, "learning_rate": 9.054601605929586e-07, "loss": 0.1583, "step": 7658 }, { "epoch": 4.731315626930204, "grad_norm": 1.645023226737976, "learning_rate": 9.054354539839406e-07, "loss": 0.023, "step": 7660 }, { "epoch": 4.7325509573810995, "grad_norm": 4.877512454986572, "learning_rate": 9.054107473749227e-07, "loss": 0.0399, "step": 7662 }, { "epoch": 4.733786287831995, "grad_norm": 2.4953808784484863, "learning_rate": 9.053983940704138e-07, "loss": 0.0401, "step": 7664 }, { "epoch": 4.73502161828289, "grad_norm": 2.571674346923828, "learning_rate": 9.053736874613959e-07, "loss": 0.0868, "step": 7666 }, { "epoch": 4.736256948733787, "grad_norm": 7.873549938201904, "learning_rate": 9.05348980852378e-07, "loss": 0.0903, "step": 7668 }, { "epoch": 4.737492279184682, "grad_norm": 6.683528423309326, "learning_rate": 9.053242742433601e-07, "loss": 0.0666, "step": 7670 }, { "epoch": 4.7387276096355775, "grad_norm": 8.726422309875488, "learning_rate": 9.052995676343421e-07, "loss": 0.222, "step": 7672 }, { "epoch": 4.739962940086473, "grad_norm": 7.633820533752441, "learning_rate": 9.052748610253243e-07, "loss": 0.081, "step": 7674 }, { "epoch": 4.741198270537369, "grad_norm": 3.0170555114746094, "learning_rate": 9.052501544163064e-07, "loss": 0.0368, "step": 7676 }, { "epoch": 4.742433600988265, "grad_norm": 6.528805732727051, "learning_rate": 9.052254478072884e-07, "loss": 0.1093, "step": 7678 }, { "epoch": 4.74366893143916, "grad_norm": 6.9975810050964355, "learning_rate": 9.052007411982705e-07, "loss": 0.1077, "step": 7680 }, { "epoch": 4.744904261890055, "grad_norm": 7.881847381591797, "learning_rate": 9.051760345892526e-07, "loss": 0.2501, "step": 7682 }, { "epoch": 4.746139592340951, "grad_norm": 7.0307769775390625, "learning_rate": 9.051513279802347e-07, "loss": 0.0761, "step": 7684 }, { "epoch": 4.747374922791847, "grad_norm": 4.258395671844482, "learning_rate": 9.051266213712168e-07, "loss": 0.0976, "step": 7686 }, { "epoch": 4.7486102532427426, "grad_norm": 5.731120586395264, "learning_rate": 9.051019147621989e-07, "loss": 0.2023, "step": 7688 }, { "epoch": 4.749845583693638, "grad_norm": 8.14689826965332, "learning_rate": 9.050772081531809e-07, "loss": 0.1504, "step": 7690 }, { "epoch": 4.751080914144533, "grad_norm": 3.013887882232666, "learning_rate": 9.05052501544163e-07, "loss": 0.0521, "step": 7692 }, { "epoch": 4.752316244595429, "grad_norm": 2.952094554901123, "learning_rate": 9.050277949351452e-07, "loss": 0.0721, "step": 7694 }, { "epoch": 4.753551575046325, "grad_norm": 3.108173131942749, "learning_rate": 9.050030883261272e-07, "loss": 0.195, "step": 7696 }, { "epoch": 4.7547869054972205, "grad_norm": 4.090005874633789, "learning_rate": 9.049783817171093e-07, "loss": 0.0623, "step": 7698 }, { "epoch": 4.756022235948116, "grad_norm": 5.3843159675598145, "learning_rate": 9.049536751080914e-07, "loss": 0.0741, "step": 7700 }, { "epoch": 4.757257566399012, "grad_norm": 7.5537309646606445, "learning_rate": 9.049289684990735e-07, "loss": 0.0597, "step": 7702 }, { "epoch": 4.758492896849908, "grad_norm": 2.992713212966919, "learning_rate": 9.049042618900556e-07, "loss": 0.0477, "step": 7704 }, { "epoch": 4.759728227300803, "grad_norm": 2.956991195678711, "learning_rate": 9.048795552810377e-07, "loss": 0.0226, "step": 7706 }, { "epoch": 4.760963557751698, "grad_norm": 1.9861388206481934, "learning_rate": 9.048548486720198e-07, "loss": 0.1063, "step": 7708 }, { "epoch": 4.762198888202594, "grad_norm": 3.121279716491699, "learning_rate": 9.048301420630018e-07, "loss": 0.4207, "step": 7710 }, { "epoch": 4.76343421865349, "grad_norm": 4.079547882080078, "learning_rate": 9.04805435453984e-07, "loss": 0.0554, "step": 7712 }, { "epoch": 4.764669549104386, "grad_norm": 3.638902425765991, "learning_rate": 9.047807288449661e-07, "loss": 0.2468, "step": 7714 }, { "epoch": 4.765904879555281, "grad_norm": 9.928899765014648, "learning_rate": 9.04756022235948e-07, "loss": 0.1487, "step": 7716 }, { "epoch": 4.767140210006176, "grad_norm": 3.4558894634246826, "learning_rate": 9.047313156269301e-07, "loss": 0.0777, "step": 7718 }, { "epoch": 4.768375540457072, "grad_norm": 5.880917549133301, "learning_rate": 9.047066090179122e-07, "loss": 0.1732, "step": 7720 }, { "epoch": 4.769610870907968, "grad_norm": 5.002291202545166, "learning_rate": 9.046819024088943e-07, "loss": 0.1106, "step": 7722 }, { "epoch": 4.7708462013588635, "grad_norm": 8.678997039794922, "learning_rate": 9.046571957998764e-07, "loss": 0.1593, "step": 7724 }, { "epoch": 4.772081531809759, "grad_norm": 3.276226043701172, "learning_rate": 9.046324891908585e-07, "loss": 0.0774, "step": 7726 }, { "epoch": 4.773316862260655, "grad_norm": 3.057553768157959, "learning_rate": 9.046077825818405e-07, "loss": 0.0367, "step": 7728 }, { "epoch": 4.774552192711551, "grad_norm": 4.629548072814941, "learning_rate": 9.045830759728226e-07, "loss": 0.0462, "step": 7730 }, { "epoch": 4.775787523162446, "grad_norm": 5.660343647003174, "learning_rate": 9.045583693638048e-07, "loss": 0.1742, "step": 7732 }, { "epoch": 4.7770228536133414, "grad_norm": 9.112336158752441, "learning_rate": 9.045336627547868e-07, "loss": 0.2646, "step": 7734 }, { "epoch": 4.778258184064237, "grad_norm": 9.385017395019531, "learning_rate": 9.045089561457689e-07, "loss": 0.1779, "step": 7736 }, { "epoch": 4.779493514515133, "grad_norm": 5.973051071166992, "learning_rate": 9.04484249536751e-07, "loss": 0.0604, "step": 7738 }, { "epoch": 4.780728844966029, "grad_norm": 3.460843563079834, "learning_rate": 9.04459542927733e-07, "loss": 0.0461, "step": 7740 }, { "epoch": 4.781964175416924, "grad_norm": 4.350451469421387, "learning_rate": 9.044348363187152e-07, "loss": 0.1162, "step": 7742 }, { "epoch": 4.783199505867819, "grad_norm": 8.926337242126465, "learning_rate": 9.044101297096973e-07, "loss": 0.0893, "step": 7744 }, { "epoch": 4.784434836318715, "grad_norm": 3.549748182296753, "learning_rate": 9.043854231006794e-07, "loss": 0.1939, "step": 7746 }, { "epoch": 4.785670166769611, "grad_norm": 8.90920639038086, "learning_rate": 9.043607164916614e-07, "loss": 0.1565, "step": 7748 }, { "epoch": 4.7869054972205065, "grad_norm": 13.328091621398926, "learning_rate": 9.043360098826436e-07, "loss": 0.2447, "step": 7750 }, { "epoch": 4.788140827671402, "grad_norm": 3.4208474159240723, "learning_rate": 9.043113032736257e-07, "loss": 0.0248, "step": 7752 }, { "epoch": 4.789376158122297, "grad_norm": 5.2390618324279785, "learning_rate": 9.042865966646077e-07, "loss": 0.0631, "step": 7754 }, { "epoch": 4.790611488573194, "grad_norm": 2.8211541175842285, "learning_rate": 9.042618900555898e-07, "loss": 0.0404, "step": 7756 }, { "epoch": 4.791846819024089, "grad_norm": 2.6014013290405273, "learning_rate": 9.042371834465719e-07, "loss": 0.0705, "step": 7758 }, { "epoch": 4.7930821494749845, "grad_norm": 10.307024955749512, "learning_rate": 9.04212476837554e-07, "loss": 0.2946, "step": 7760 }, { "epoch": 4.79431747992588, "grad_norm": 9.78590202331543, "learning_rate": 9.041877702285361e-07, "loss": 0.2192, "step": 7762 }, { "epoch": 4.795552810376776, "grad_norm": 9.042428970336914, "learning_rate": 9.041630636195182e-07, "loss": 0.1855, "step": 7764 }, { "epoch": 4.796788140827672, "grad_norm": 4.970710754394531, "learning_rate": 9.041383570105002e-07, "loss": 0.1321, "step": 7766 }, { "epoch": 4.798023471278567, "grad_norm": 5.9123029708862305, "learning_rate": 9.041136504014823e-07, "loss": 0.0423, "step": 7768 }, { "epoch": 4.799258801729462, "grad_norm": 8.951457977294922, "learning_rate": 9.040889437924645e-07, "loss": 0.1336, "step": 7770 }, { "epoch": 4.800494132180358, "grad_norm": 11.713054656982422, "learning_rate": 9.040642371834465e-07, "loss": 0.1975, "step": 7772 }, { "epoch": 4.801729462631254, "grad_norm": 9.050058364868164, "learning_rate": 9.040395305744286e-07, "loss": 0.1155, "step": 7774 }, { "epoch": 4.8029647930821495, "grad_norm": 2.3082211017608643, "learning_rate": 9.040148239654107e-07, "loss": 0.1064, "step": 7776 }, { "epoch": 4.804200123533045, "grad_norm": 5.431203365325928, "learning_rate": 9.039901173563927e-07, "loss": 0.0626, "step": 7778 }, { "epoch": 4.80543545398394, "grad_norm": 4.385817527770996, "learning_rate": 9.039654107473749e-07, "loss": 0.1018, "step": 7780 }, { "epoch": 4.806670784434837, "grad_norm": 4.78995943069458, "learning_rate": 9.03940704138357e-07, "loss": 0.4791, "step": 7782 }, { "epoch": 4.807906114885732, "grad_norm": 2.3688602447509766, "learning_rate": 9.03915997529339e-07, "loss": 0.0554, "step": 7784 }, { "epoch": 4.8091414453366275, "grad_norm": 6.077561855316162, "learning_rate": 9.038912909203211e-07, "loss": 0.0418, "step": 7786 }, { "epoch": 4.810376775787523, "grad_norm": 2.3526790142059326, "learning_rate": 9.038665843113033e-07, "loss": 0.1216, "step": 7788 }, { "epoch": 4.811612106238419, "grad_norm": 2.0656676292419434, "learning_rate": 9.038418777022854e-07, "loss": 0.1017, "step": 7790 }, { "epoch": 4.812847436689315, "grad_norm": 4.654843330383301, "learning_rate": 9.038171710932674e-07, "loss": 0.0581, "step": 7792 }, { "epoch": 4.81408276714021, "grad_norm": 6.606248378753662, "learning_rate": 9.037924644842495e-07, "loss": 0.1042, "step": 7794 }, { "epoch": 4.815318097591105, "grad_norm": 4.491543292999268, "learning_rate": 9.037677578752316e-07, "loss": 0.0724, "step": 7796 }, { "epoch": 4.816553428042001, "grad_norm": 4.6384501457214355, "learning_rate": 9.037430512662137e-07, "loss": 0.0556, "step": 7798 }, { "epoch": 4.817788758492897, "grad_norm": 4.49702262878418, "learning_rate": 9.037183446571958e-07, "loss": 0.0555, "step": 7800 }, { "epoch": 4.8190240889437925, "grad_norm": 10.390351295471191, "learning_rate": 9.036936380481779e-07, "loss": 0.2285, "step": 7802 }, { "epoch": 4.820259419394688, "grad_norm": 8.220772743225098, "learning_rate": 9.036689314391599e-07, "loss": 0.1254, "step": 7804 }, { "epoch": 4.821494749845583, "grad_norm": 5.285287380218506, "learning_rate": 9.03644224830142e-07, "loss": 0.048, "step": 7806 }, { "epoch": 4.82273008029648, "grad_norm": 17.212278366088867, "learning_rate": 9.036195182211242e-07, "loss": 0.1321, "step": 7808 }, { "epoch": 4.823965410747375, "grad_norm": 4.926477909088135, "learning_rate": 9.035948116121062e-07, "loss": 0.2072, "step": 7810 }, { "epoch": 4.8252007411982705, "grad_norm": 5.3812971115112305, "learning_rate": 9.035701050030883e-07, "loss": 0.1107, "step": 7812 }, { "epoch": 4.826436071649166, "grad_norm": 2.7653000354766846, "learning_rate": 9.035453983940704e-07, "loss": 0.0873, "step": 7814 }, { "epoch": 4.827671402100062, "grad_norm": 9.149673461914062, "learning_rate": 9.035206917850524e-07, "loss": 0.1284, "step": 7816 }, { "epoch": 4.828906732550958, "grad_norm": 6.1425018310546875, "learning_rate": 9.034959851760346e-07, "loss": 0.1563, "step": 7818 }, { "epoch": 4.830142063001853, "grad_norm": 4.74194860458374, "learning_rate": 9.034712785670167e-07, "loss": 0.1379, "step": 7820 }, { "epoch": 4.831377393452748, "grad_norm": 5.983094215393066, "learning_rate": 9.034465719579987e-07, "loss": 0.1001, "step": 7822 }, { "epoch": 4.832612723903644, "grad_norm": 4.973236560821533, "learning_rate": 9.034218653489808e-07, "loss": 0.0835, "step": 7824 }, { "epoch": 4.83384805435454, "grad_norm": 4.591813564300537, "learning_rate": 9.033971587399629e-07, "loss": 0.202, "step": 7826 }, { "epoch": 4.8350833848054355, "grad_norm": 2.749110460281372, "learning_rate": 9.033724521309451e-07, "loss": 0.0663, "step": 7828 }, { "epoch": 4.836318715256331, "grad_norm": 5.374269962310791, "learning_rate": 9.033477455219271e-07, "loss": 0.1712, "step": 7830 }, { "epoch": 4.837554045707226, "grad_norm": 18.362207412719727, "learning_rate": 9.033230389129092e-07, "loss": 0.3359, "step": 7832 }, { "epoch": 4.838789376158123, "grad_norm": 6.490198135375977, "learning_rate": 9.032983323038913e-07, "loss": 0.0736, "step": 7834 }, { "epoch": 4.840024706609018, "grad_norm": 7.6115522384643555, "learning_rate": 9.032736256948734e-07, "loss": 0.0898, "step": 7836 }, { "epoch": 4.8412600370599135, "grad_norm": 6.161343097686768, "learning_rate": 9.032489190858555e-07, "loss": 0.1339, "step": 7838 }, { "epoch": 4.842495367510809, "grad_norm": 6.221913814544678, "learning_rate": 9.032242124768376e-07, "loss": 0.0629, "step": 7840 }, { "epoch": 4.843730697961705, "grad_norm": 4.442539215087891, "learning_rate": 9.031995058678196e-07, "loss": 0.2888, "step": 7842 }, { "epoch": 4.844966028412601, "grad_norm": 3.9080708026885986, "learning_rate": 9.031747992588017e-07, "loss": 0.0962, "step": 7844 }, { "epoch": 4.846201358863496, "grad_norm": 4.139521598815918, "learning_rate": 9.031500926497839e-07, "loss": 0.0287, "step": 7846 }, { "epoch": 4.847436689314391, "grad_norm": 4.498985290527344, "learning_rate": 9.031253860407658e-07, "loss": 0.2235, "step": 7848 }, { "epoch": 4.848672019765287, "grad_norm": 6.304028511047363, "learning_rate": 9.03100679431748e-07, "loss": 0.128, "step": 7850 }, { "epoch": 4.849907350216183, "grad_norm": 3.3027281761169434, "learning_rate": 9.0307597282273e-07, "loss": 0.0823, "step": 7852 }, { "epoch": 4.8511426806670785, "grad_norm": 3.5530998706817627, "learning_rate": 9.03051266213712e-07, "loss": 0.0441, "step": 7854 }, { "epoch": 4.852378011117974, "grad_norm": 5.370185852050781, "learning_rate": 9.030265596046942e-07, "loss": 0.0959, "step": 7856 }, { "epoch": 4.853613341568869, "grad_norm": 5.808426856994629, "learning_rate": 9.030018529956763e-07, "loss": 0.0583, "step": 7858 }, { "epoch": 4.854848672019766, "grad_norm": 6.755468845367432, "learning_rate": 9.029771463866583e-07, "loss": 0.1072, "step": 7860 }, { "epoch": 4.856084002470661, "grad_norm": 3.1061694622039795, "learning_rate": 9.029524397776404e-07, "loss": 0.033, "step": 7862 }, { "epoch": 4.8573193329215565, "grad_norm": 10.767065048217773, "learning_rate": 9.029277331686225e-07, "loss": 0.3612, "step": 7864 }, { "epoch": 4.858554663372452, "grad_norm": 13.45327377319336, "learning_rate": 9.029030265596047e-07, "loss": 0.1067, "step": 7866 }, { "epoch": 4.859789993823348, "grad_norm": 3.242938756942749, "learning_rate": 9.028783199505867e-07, "loss": 0.0312, "step": 7868 }, { "epoch": 4.861025324274244, "grad_norm": 2.7386298179626465, "learning_rate": 9.028536133415688e-07, "loss": 0.1206, "step": 7870 }, { "epoch": 4.862260654725139, "grad_norm": 5.593037128448486, "learning_rate": 9.028289067325509e-07, "loss": 0.0437, "step": 7872 }, { "epoch": 4.863495985176034, "grad_norm": 2.2553508281707764, "learning_rate": 9.028042001235329e-07, "loss": 0.0914, "step": 7874 }, { "epoch": 4.86473131562693, "grad_norm": 4.458423137664795, "learning_rate": 9.027794935145151e-07, "loss": 0.0486, "step": 7876 }, { "epoch": 4.865966646077826, "grad_norm": 9.986715316772461, "learning_rate": 9.027547869054972e-07, "loss": 0.2437, "step": 7878 }, { "epoch": 4.8672019765287216, "grad_norm": 4.164139747619629, "learning_rate": 9.027300802964792e-07, "loss": 0.0936, "step": 7880 }, { "epoch": 4.868437306979617, "grad_norm": 3.343682289123535, "learning_rate": 9.027053736874613e-07, "loss": 0.0478, "step": 7882 }, { "epoch": 4.869672637430512, "grad_norm": 5.487553596496582, "learning_rate": 9.026806670784435e-07, "loss": 0.1668, "step": 7884 }, { "epoch": 4.870907967881408, "grad_norm": 10.645922660827637, "learning_rate": 9.026559604694255e-07, "loss": 0.1312, "step": 7886 }, { "epoch": 4.872143298332304, "grad_norm": 9.794191360473633, "learning_rate": 9.026312538604076e-07, "loss": 0.0437, "step": 7888 }, { "epoch": 4.8733786287831995, "grad_norm": 3.935554027557373, "learning_rate": 9.026065472513897e-07, "loss": 0.0705, "step": 7890 }, { "epoch": 4.874613959234095, "grad_norm": 4.814070224761963, "learning_rate": 9.025818406423717e-07, "loss": 0.0962, "step": 7892 }, { "epoch": 4.875849289684991, "grad_norm": 6.84005880355835, "learning_rate": 9.025571340333539e-07, "loss": 0.1086, "step": 7894 }, { "epoch": 4.877084620135887, "grad_norm": 10.831822395324707, "learning_rate": 9.02532427424336e-07, "loss": 0.1286, "step": 7896 }, { "epoch": 4.878319950586782, "grad_norm": 7.497257232666016, "learning_rate": 9.02507720815318e-07, "loss": 0.103, "step": 7898 }, { "epoch": 4.879555281037677, "grad_norm": 4.37783145904541, "learning_rate": 9.024830142063001e-07, "loss": 0.0594, "step": 7900 }, { "epoch": 4.880790611488573, "grad_norm": 4.231611251831055, "learning_rate": 9.024583075972822e-07, "loss": 0.0736, "step": 7902 }, { "epoch": 4.882025941939469, "grad_norm": 3.5657835006713867, "learning_rate": 9.024336009882643e-07, "loss": 0.0505, "step": 7904 }, { "epoch": 4.883261272390365, "grad_norm": 5.623210430145264, "learning_rate": 9.024088943792464e-07, "loss": 0.0815, "step": 7906 }, { "epoch": 4.88449660284126, "grad_norm": 7.928351402282715, "learning_rate": 9.023841877702285e-07, "loss": 0.1058, "step": 7908 }, { "epoch": 4.885731933292155, "grad_norm": 4.996882915496826, "learning_rate": 9.023594811612106e-07, "loss": 0.0394, "step": 7910 }, { "epoch": 4.886967263743051, "grad_norm": 7.481711387634277, "learning_rate": 9.023347745521926e-07, "loss": 0.0938, "step": 7912 }, { "epoch": 4.888202594193947, "grad_norm": 5.518074989318848, "learning_rate": 9.023100679431748e-07, "loss": 0.1187, "step": 7914 }, { "epoch": 4.8894379246448425, "grad_norm": 10.541658401489258, "learning_rate": 9.022853613341569e-07, "loss": 0.1912, "step": 7916 }, { "epoch": 4.890673255095738, "grad_norm": 16.368053436279297, "learning_rate": 9.022606547251389e-07, "loss": 0.1621, "step": 7918 }, { "epoch": 4.891908585546634, "grad_norm": 11.109421730041504, "learning_rate": 9.02235948116121e-07, "loss": 0.1146, "step": 7920 }, { "epoch": 4.89314391599753, "grad_norm": 2.6519551277160645, "learning_rate": 9.022112415071032e-07, "loss": 0.024, "step": 7922 }, { "epoch": 4.894379246448425, "grad_norm": 7.280858516693115, "learning_rate": 9.021865348980852e-07, "loss": 0.0779, "step": 7924 }, { "epoch": 4.8956145768993204, "grad_norm": 12.06163215637207, "learning_rate": 9.021618282890673e-07, "loss": 0.2507, "step": 7926 }, { "epoch": 4.896849907350216, "grad_norm": 6.357548713684082, "learning_rate": 9.021371216800494e-07, "loss": 0.2879, "step": 7928 }, { "epoch": 4.898085237801112, "grad_norm": 12.624154090881348, "learning_rate": 9.021124150710314e-07, "loss": 0.2685, "step": 7930 }, { "epoch": 4.899320568252008, "grad_norm": 5.5955119132995605, "learning_rate": 9.020877084620136e-07, "loss": 0.1329, "step": 7932 }, { "epoch": 4.900555898702903, "grad_norm": 6.587497234344482, "learning_rate": 9.020630018529957e-07, "loss": 0.1009, "step": 7934 }, { "epoch": 4.901791229153798, "grad_norm": 2.077887773513794, "learning_rate": 9.020382952439777e-07, "loss": 0.105, "step": 7936 }, { "epoch": 4.903026559604694, "grad_norm": 7.464636325836182, "learning_rate": 9.020135886349598e-07, "loss": 0.1607, "step": 7938 }, { "epoch": 4.90426189005559, "grad_norm": 1.090586543083191, "learning_rate": 9.019888820259419e-07, "loss": 0.0431, "step": 7940 }, { "epoch": 4.9054972205064855, "grad_norm": 4.926720142364502, "learning_rate": 9.01964175416924e-07, "loss": 0.0342, "step": 7942 }, { "epoch": 4.906732550957381, "grad_norm": 5.083724021911621, "learning_rate": 9.019394688079061e-07, "loss": 0.1476, "step": 7944 }, { "epoch": 4.907967881408276, "grad_norm": 7.871881008148193, "learning_rate": 9.019147621988882e-07, "loss": 0.2218, "step": 7946 }, { "epoch": 4.909203211859173, "grad_norm": 8.980428695678711, "learning_rate": 9.018900555898703e-07, "loss": 0.2003, "step": 7948 }, { "epoch": 4.910438542310068, "grad_norm": 7.15848970413208, "learning_rate": 9.018653489808523e-07, "loss": 0.0492, "step": 7950 }, { "epoch": 4.9116738727609635, "grad_norm": 7.393805027008057, "learning_rate": 9.018406423718345e-07, "loss": 0.2265, "step": 7952 }, { "epoch": 4.912909203211859, "grad_norm": 4.047158718109131, "learning_rate": 9.018159357628166e-07, "loss": 0.0443, "step": 7954 }, { "epoch": 4.914144533662755, "grad_norm": 7.021085262298584, "learning_rate": 9.017912291537986e-07, "loss": 0.081, "step": 7956 }, { "epoch": 4.915379864113651, "grad_norm": 1.613834261894226, "learning_rate": 9.017665225447807e-07, "loss": 0.0612, "step": 7958 }, { "epoch": 4.916615194564546, "grad_norm": 3.5722391605377197, "learning_rate": 9.017418159357628e-07, "loss": 0.077, "step": 7960 }, { "epoch": 4.917850525015441, "grad_norm": 8.686017036437988, "learning_rate": 9.017171093267449e-07, "loss": 0.1165, "step": 7962 }, { "epoch": 4.919085855466337, "grad_norm": 2.0339195728302, "learning_rate": 9.01692402717727e-07, "loss": 0.0815, "step": 7964 }, { "epoch": 4.920321185917233, "grad_norm": 10.292388916015625, "learning_rate": 9.016676961087091e-07, "loss": 0.2764, "step": 7966 }, { "epoch": 4.9215565163681285, "grad_norm": 7.148754119873047, "learning_rate": 9.016429894996911e-07, "loss": 0.1341, "step": 7968 }, { "epoch": 4.922791846819024, "grad_norm": 8.499019622802734, "learning_rate": 9.016182828906733e-07, "loss": 0.1311, "step": 7970 }, { "epoch": 4.924027177269919, "grad_norm": 6.216228485107422, "learning_rate": 9.015935762816554e-07, "loss": 0.0705, "step": 7972 }, { "epoch": 4.925262507720816, "grad_norm": 4.10164213180542, "learning_rate": 9.015688696726374e-07, "loss": 0.0599, "step": 7974 }, { "epoch": 4.926497838171711, "grad_norm": 6.066524982452393, "learning_rate": 9.015441630636195e-07, "loss": 0.1154, "step": 7976 }, { "epoch": 4.9277331686226065, "grad_norm": 6.671455383300781, "learning_rate": 9.015194564546016e-07, "loss": 0.2475, "step": 7978 }, { "epoch": 4.928968499073502, "grad_norm": 3.340322971343994, "learning_rate": 9.014947498455837e-07, "loss": 0.0831, "step": 7980 }, { "epoch": 4.930203829524398, "grad_norm": 5.936152458190918, "learning_rate": 9.014700432365657e-07, "loss": 0.0768, "step": 7982 }, { "epoch": 4.931439159975294, "grad_norm": 3.533153533935547, "learning_rate": 9.014453366275478e-07, "loss": 0.0472, "step": 7984 }, { "epoch": 4.932674490426189, "grad_norm": 4.704244613647461, "learning_rate": 9.0142063001853e-07, "loss": 0.0423, "step": 7986 }, { "epoch": 4.933909820877084, "grad_norm": 3.593474864959717, "learning_rate": 9.013959234095119e-07, "loss": 0.0989, "step": 7988 }, { "epoch": 4.93514515132798, "grad_norm": 10.242197036743164, "learning_rate": 9.013712168004941e-07, "loss": 0.2057, "step": 7990 }, { "epoch": 4.936380481778876, "grad_norm": 8.922701835632324, "learning_rate": 9.013465101914762e-07, "loss": 0.1379, "step": 7992 }, { "epoch": 4.9376158122297715, "grad_norm": 8.211941719055176, "learning_rate": 9.013218035824582e-07, "loss": 0.129, "step": 7994 }, { "epoch": 4.938851142680667, "grad_norm": 10.847566604614258, "learning_rate": 9.012970969734403e-07, "loss": 0.2831, "step": 7996 }, { "epoch": 4.940086473131562, "grad_norm": 5.839463710784912, "learning_rate": 9.012723903644224e-07, "loss": 0.0885, "step": 7998 }, { "epoch": 4.941321803582459, "grad_norm": 5.888739109039307, "learning_rate": 9.012476837554045e-07, "loss": 0.0833, "step": 8000 }, { "epoch": 4.941321803582459, "eval_cer": 0.05158208128505158, "eval_loss": 0.23343217372894287, "eval_runtime": 64.058, "eval_samples_per_second": 12.817, "eval_steps_per_second": 1.608, "step": 8000 } ], "logging_steps": 2, "max_steps": 80950, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8.595510528084384e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }